mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-02-09 14:14:56 +02:00
Don't replace NUL bytes when searching binary files as text.
This was a result of misinterpreting a feature in grep where NUL bytes are replaced with \n. The primary reason for doing this is to avoid excessive memory usage on truly binary data. However, grep only does this when searching binary files as if they were binary, and which only reports whether the file matched or not. When grep is told to search binary data as text (the -a/--text flag), then it doesn't do any replacement so we shouldn't either. In general, this makes sense, because the user is essentially asserting that a particular file that looks like binary is actually text. In that case, we shouldn't try to replace any NUL bytes. ripgrep doesn't actually support searching binary data for whether it matches or not, so we don't actually need the replace_buf function. However, it does seem like a potentially useful feature.
This commit is contained in:
parent
278e1168bf
commit
b034b77798
@ -532,10 +532,6 @@ impl InputBuffer {
|
||||
if self.first && is_binary(&self.buf[self.end..self.end + n]) {
|
||||
self.is_binary = true;
|
||||
}
|
||||
if self.is_binary {
|
||||
replace_buf(
|
||||
&mut self.buf[self.end..self.end + n], b'\x00', self.eol);
|
||||
}
|
||||
self.first = false;
|
||||
// We assume that reading 0 bytes means we've hit EOF.
|
||||
if n == 0 {
|
||||
@ -658,6 +654,7 @@ pub fn count_lines(buf: &[u8], eol: u8) -> u64 {
|
||||
}
|
||||
|
||||
/// Replaces a with b in buf.
|
||||
#[allow(dead_code)]
|
||||
fn replace_buf(buf: &mut [u8], a: u8, b: u8) {
|
||||
if a == b {
|
||||
return;
|
||||
@ -999,7 +996,7 @@ fn main() {
|
||||
let text = "Sherlock\n\x00Holmes\n";
|
||||
let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true));
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:Holmes\n");
|
||||
assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -138,7 +138,11 @@ be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
foo
|
||||
Sherlock Holmes lives on Baker Street.
|
||||
";
|
||||
assert!(lines == expected1 || lines == expected2);
|
||||
if lines != expected1 {
|
||||
assert_eq!(lines, expected2);
|
||||
} else {
|
||||
assert_eq!(lines, expected1);
|
||||
}
|
||||
});
|
||||
|
||||
sherlock!(inverted, |wd: WorkDir, mut cmd: Command| {
|
||||
@ -587,7 +591,7 @@ sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-uuu");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "file:foo\nfile:foo\n");
|
||||
assert_eq!(lines, "file:foo\x00bar\nfile:foo\x00baz\n");
|
||||
});
|
||||
|
||||
// On Windows, this test uses memory maps, so the NUL bytes don't get replaced.
|
||||
@ -785,7 +789,7 @@ fn binary_search_no_mmap() {
|
||||
let mut cmd = wd.command();
|
||||
cmd.arg("-a").arg("--no-mmap").arg("foo").arg("file");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "foo\nfoo\n");
|
||||
assert_eq!(lines, "foo\x00bar\nfoo\x00baz\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
Loading…
x
Reference in New Issue
Block a user