1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-04-02 20:45:38 +02:00

searcher: always strip BOM

This fixes a bug where a BOM prefix was included. While this was somewhat
intentional in order to have a faithful "UTF8 passthru" option, in
practice, this causes problems such as breaking patterns like `^` in a
really non-obvious way.

The actual fix was to add a new API to encoding_rs_io, which this commit
brings in.

Fixes #1163
This commit is contained in:
Andrew Gallant 2019-01-25 17:18:57 -05:00
parent 9a9f54d44c
commit 276e2c9b9a
No known key found for this signature in database
GPG Key ID: B2E3A4923F8B0D44
4 changed files with 14 additions and 4 deletions

6
Cargo.lock generated
View File

@ -103,7 +103,7 @@ dependencies = [
[[package]]
name = "encoding_rs_io"
version = "0.1.3"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
@ -221,7 +221,7 @@ version = "0.1.1"
dependencies = [
"bytecount 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs_io 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"grep-matcher 0.1.1",
"grep-regex 0.1.1",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
@ -757,7 +757,7 @@ dependencies = [
"checksum crossbeam-channel 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "137bc235f622ffaa0428e3854e24acb53291fc0b3ff6fb2cb75a8be6fb02f06b"
"checksum crossbeam-utils 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "41ee4864f4797060e52044376f7d107429ce1fb43460021b126424b7180ee21a"
"checksum encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)" = "a69d152eaa438a291636c1971b0a370212165ca8a75759eb66818c5ce9b538f7"
"checksum encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "098f6a0ab73a9ba256b71344dc82c6d7e252736ad9db7f4e35345f3a1f8713f5"
"checksum encoding_rs_io 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "6c89a56158243c7cde22fde70e452a40dded9d9d9100f71273df19af9be4d034"
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"

View File

@ -15,7 +15,7 @@ license = "Unlicense/MIT"
[dependencies]
bytecount = "0.5"
encoding_rs = "0.8.14"
encoding_rs_io = "0.1.3"
encoding_rs_io = "0.1.4"
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
log = "0.4.5"
memchr = "2.1"

View File

@ -307,6 +307,7 @@ impl SearcherBuilder {
decode_builder
.encoding(self.config.encoding.as_ref().map(|e| e.0))
.utf8_passthru(true)
.strip_bom(true)
.bom_override(true);
Searcher {
config: config,

View File

@ -592,6 +592,15 @@ rgtest!(r1130, |dir: Dir, mut cmd: TestCommand| {
);
});
// See: https://github.com/BurntSushi/ripgrep/issues/1163
rgtest!(r1163, |dir: Dir, mut cmd: TestCommand| {
dir.create("bom.txt", "\u{FEFF}test123\ntest123");
eqnice!(
"bom.txt:test123\nbom.txt:test123\n",
cmd.arg("^test123").stdout()
);
});
// See: https://github.com/BurntSushi/ripgrep/issues/1164
rgtest!(r1164, |dir: Dir, mut cmd: TestCommand| {
dir.create_dir(".git");