diff options
author | Andrew Gallant <jamslam@gmail.com> | 2019-01-25 17:18:57 -0500 |
---|---|---|
committer | Andrew Gallant <jamslam@gmail.com> | 2019-01-25 17:18:57 -0500 |
commit | 276e2c9b9ab3dac7643033d8b9d6c272d5dac1d5 (patch) | |
tree | 781a5da81a73e541d869accfcdce15e5df518de3 | |
parent | 9a9f54d44ce45c6c3c5bfa31ad1a08fda57cdad7 (diff) |
searcher: always strip BOM
This fixes a bug where a BOM prefix was included. While this was somewhat
intentional in order to have a faithful "UTF8 passthru" option, in
practice, this causes problems such as breaking patterns like `^` in a
really non-obvious way.
The actual fix was to add a new API to encoding_rs_io, which this commit
brings in.
Fixes #1163
-rw-r--r-- | Cargo.lock | 6 | ||||
-rw-r--r-- | grep-searcher/Cargo.toml | 2 | ||||
-rw-r--r-- | grep-searcher/src/searcher/mod.rs | 1 | ||||
-rw-r--r-- | tests/regression.rs | 9 |
4 files changed, 14 insertions, 4 deletions
@@ -103,7 +103,7 @@ dependencies = [ [[package]] name = "encoding_rs_io" -version = "0.1.3" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)", @@ -221,7 +221,7 @@ version = "0.1.1" dependencies = [ "bytecount 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", + "encoding_rs_io 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)", "grep-matcher 0.1.1", "grep-regex 0.1.1", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", @@ -757,7 +757,7 @@ dependencies = [ "checksum crossbeam-channel 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "137bc235f622ffaa0428e3854e24acb53291fc0b3ff6fb2cb75a8be6fb02f06b" "checksum crossbeam-utils 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "41ee4864f4797060e52044376f7d107429ce1fb43460021b126424b7180ee21a" "checksum encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)" = "a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7" -"checksum encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "098f6a0ab73a9ba256b71344dc82c6d7e252736ad9db7f4e35345f3a1f8713f5" +"checksum encoding_rs_io 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "6c89a56158243c7cde22fde70e452a40dded9d9d9100f71273df19af9be4d034" "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" diff --git a/grep-searcher/Cargo.toml b/grep-searcher/Cargo.toml index 4cf5fde3..1d977b45 100644 --- a/grep-searcher/Cargo.toml +++ b/grep-searcher/Cargo.toml @@ -15,7 +15,7 @@ license = "Unlicense/MIT" [dependencies] bytecount = "0.5" encoding_rs = "0.8.14" -encoding_rs_io = "0.1.3" +encoding_rs_io = "0.1.4" grep-matcher = { version = "0.1.1", path = "../grep-matcher" } log = "0.4.5" memchr = "2.1" diff --git a/grep-searcher/src/searcher/mod.rs b/grep-searcher/src/searcher/mod.rs index bc428b68..c70b3a0e 100644 --- a/grep-searcher/src/searcher/mod.rs +++ b/grep-searcher/src/searcher/mod.rs @@ -307,6 +307,7 @@ impl SearcherBuilder { decode_builder .encoding(self.config.encoding.as_ref().map(|e| e.0)) .utf8_passthru(true) + .strip_bom(true) .bom_override(true); Searcher { config: config, diff --git a/tests/regression.rs b/tests/regression.rs index b8dc26d0..15dbcad7 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -592,6 +592,15 @@ rgtest!(r1130, |dir: Dir, mut cmd: TestCommand| { ); }); +// See: https://github.com/BurntSushi/ripgrep/issues/1163 +rgtest!(r1163, |dir: Dir, mut cmd: TestCommand| { + dir.create("bom.txt", "\u{FEFF}test123\ntest123"); + eqnice!( + "bom.txt:test123\nbom.txt:test123\n", + cmd.arg("^test123").stdout() + ); +}); + // See: https://github.com/BurntSushi/ripgrep/issues/1164 rgtest!(r1164, |dir: Dir, mut cmd: TestCommand| { dir.create_dir(".git"); |