summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrew Gallant <jamslam@gmail.com>2019-01-25 17:18:57 -0500
committerAndrew Gallant <jamslam@gmail.com>2019-01-25 17:18:57 -0500
commit276e2c9b9ab3dac7643033d8b9d6c272d5dac1d5 (patch)
tree781a5da81a73e541d869accfcdce15e5df518de3
parent9a9f54d44ce45c6c3c5bfa31ad1a08fda57cdad7 (diff)
searcher: always strip BOM
This fixes a bug where a BOM prefix was included. While this was somewhat intentional in order to have a faithful "UTF8 passthru" option, in practice, this causes problems such as breaking patterns like `^` in a really non-obvious way. The actual fix was to add a new API to encoding_rs_io, which this commit brings in. Fixes #1163
-rw-r--r--Cargo.lock6
-rw-r--r--grep-searcher/Cargo.toml2
-rw-r--r--grep-searcher/src/searcher/mod.rs1
-rw-r--r--tests/regression.rs9
4 files changed, 14 insertions, 4 deletions
diff --git a/Cargo.lock b/Cargo.lock
index 6e479b12..9af1a04c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -103,7 +103,7 @@ dependencies = [
[[package]]
name = "encoding_rs_io"
-version = "0.1.3"
+version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -221,7 +221,7 @@ version = "0.1.1"
dependencies = [
"bytecount 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
- "encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "encoding_rs_io 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"grep-matcher 0.1.1",
"grep-regex 0.1.1",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -757,7 +757,7 @@ dependencies = [
"checksum crossbeam-channel 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "137bc235f622ffaa0428e3854e24acb53291fc0b3ff6fb2cb75a8be6fb02f06b"
"checksum crossbeam-utils 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "41ee4864f4797060e52044376f7d107429ce1fb43460021b126424b7180ee21a"
"checksum encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)" = "a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7"
-"checksum encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "098f6a0ab73a9ba256b71344dc82c6d7e252736ad9db7f4e35345f3a1f8713f5"
+"checksum encoding_rs_io 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "6c89a56158243c7cde22fde70e452a40dded9d9d9100f71273df19af9be4d034"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
diff --git a/grep-searcher/Cargo.toml b/grep-searcher/Cargo.toml
index 4cf5fde3..1d977b45 100644
--- a/grep-searcher/Cargo.toml
+++ b/grep-searcher/Cargo.toml
@@ -15,7 +15,7 @@ license = "Unlicense/MIT"
[dependencies]
bytecount = "0.5"
encoding_rs = "0.8.14"
-encoding_rs_io = "0.1.3"
+encoding_rs_io = "0.1.4"
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
log = "0.4.5"
memchr = "2.1"
diff --git a/grep-searcher/src/searcher/mod.rs b/grep-searcher/src/searcher/mod.rs
index bc428b68..c70b3a0e 100644
--- a/grep-searcher/src/searcher/mod.rs
+++ b/grep-searcher/src/searcher/mod.rs
@@ -307,6 +307,7 @@ impl SearcherBuilder {
decode_builder
.encoding(self.config.encoding.as_ref().map(|e| e.0))
.utf8_passthru(true)
+ .strip_bom(true)
.bom_override(true);
Searcher {
config: config,
diff --git a/tests/regression.rs b/tests/regression.rs
index b8dc26d0..15dbcad7 100644
--- a/tests/regression.rs
+++ b/tests/regression.rs
@@ -592,6 +592,15 @@ rgtest!(r1130, |dir: Dir, mut cmd: TestCommand| {
);
});
+// See: https://github.com/BurntSushi/ripgrep/issues/1163
+rgtest!(r1163, |dir: Dir, mut cmd: TestCommand| {
+ dir.create("bom.txt", "\u{FEFF}test123\ntest123");
+ eqnice!(
+ "bom.txt:test123\nbom.txt:test123\n",
+ cmd.arg("^test123").stdout()
+ );
+});
+
// See: https://github.com/BurntSushi/ripgrep/issues/1164
rgtest!(r1164, |dir: Dir, mut cmd: TestCommand| {
dir.create_dir(".git");