1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-02-09 14:14:56 +02:00

Don't replace NUL bytes when searching binary files as text.

This was a result of misinterpreting a feature in grep where NUL bytes
are replaced with \n. The primary reason for doing this is to avoid
excessive memory usage on truly binary data. However, grep only does this
when searching binary files as if they were binary, and which only reports
whether the file matched or not. When grep is told to search binary data
as text (the -a/--text flag), then it doesn't do any replacement so we
shouldn't either.

In general, this makes sense, because the user is essentially asserting
that a particular file that looks like binary is actually text. In that
case, we shouldn't try to replace any NUL bytes.

ripgrep doesn't actually support searching binary data for whether it
matches or not, so we don't actually need the replace_buf function.
However, it does seem like a potentially useful feature.
This commit is contained in:
Andrew Gallant 2016-09-25 21:26:49 -04:00
parent 278e1168bf
commit b034b77798
2 changed files with 9 additions and 8 deletions

View File

@ -532,10 +532,6 @@ impl InputBuffer {
if self.first && is_binary(&self.buf[self.end..self.end + n]) {
self.is_binary = true;
}
if self.is_binary {
replace_buf(
&mut self.buf[self.end..self.end + n], b'\x00', self.eol);
}
self.first = false;
// We assume that reading 0 bytes means we've hit EOF.
if n == 0 {
@ -658,6 +654,7 @@ pub fn count_lines(buf: &[u8], eol: u8) -> u64 {
}
/// Replaces a with b in buf.
#[allow(dead_code)]
fn replace_buf(buf: &mut [u8], a: u8, b: u8) {
if a == b {
return;
@ -999,7 +996,7 @@ fn main() {
let text = "Sherlock\n\x00Holmes\n";
let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true));
assert_eq!(2, count);
assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:Holmes\n");
assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n");
}
#[test]

View File

@ -138,7 +138,11 @@ be, to a very large extent, the result of luck. Sherlock Holmes
foo
Sherlock Holmes lives on Baker Street.
";
assert!(lines == expected1 || lines == expected2);
if lines != expected1 {
assert_eq!(lines, expected2);
} else {
assert_eq!(lines, expected1);
}
});
sherlock!(inverted, |wd: WorkDir, mut cmd: Command| {
@ -587,7 +591,7 @@ sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("-uuu");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file:foo\nfile:foo\n");
assert_eq!(lines, "file:foo\x00bar\nfile:foo\x00baz\n");
});
// On Windows, this test uses memory maps, so the NUL bytes don't get replaced.
@ -785,7 +789,7 @@ fn binary_search_no_mmap() {
let mut cmd = wd.command();
cmd.arg("-a").arg("--no-mmap").arg("foo").arg("file");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo\nfoo\n");
assert_eq!(lines, "foo\x00bar\nfoo\x00baz\n");
}
#[test]