mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2024-12-12 19:18:24 +02:00
a7d26c8f14
This commit attempts to surface binary filtering in a slightly more user friendly way. Namely, before, ripgrep would silently stop searching a file if it detected a NUL byte, even if it had previously printed a match. This can lead to the user quite reasonably assuming that there are no more matches, since a partial search is fairly unintuitive. (ripgrep has this behavior by default because it really wants to NOT search binary files at all, just like it doesn't search gitignored or hidden files.) With this commit, if a match has already been printed and ripgrep detects a NUL byte, then it will print a warning message indicating that the search stopped prematurely. Moreover, this commit adds a new flag, --binary, which causes ripgrep to stop filtering binary files, but in a way that still avoids dumping binary data into terminals. That is, the --binary flag makes ripgrep behave more like grep's default behavior. For files explicitly specified in a search, e.g., `rg foo some-file`, then no binary filtering is applied (just like no gitignore and no hidden file filtering is applied). Instead, ripgrep behaves as if you gave the --binary flag for all explicitly given files. This was a fairly invasive change, and potentially increases the UX complexity of ripgrep around binary files. (Before, there were two binary modes, where as now there are three.) However, ripgrep is now a bit louder with warning messages when binary file detection might otherwise be hiding potential matches, so hopefully this is a net improvement. Finally, the `-uuu` convenience now maps to `--no-ignore --hidden --binary`, since this is closer to the actualy intent of the `--unrestricted` flag, i.e., to reduce ripgrep's smart filtering. As a consequence, `rg -uuu foo` should now search roughly the same number of bytes as `grep -r foo`, and `rg -uuua foo` should search roughly the same number of bytes as `grep -ra foo`. (The "roughly" weasel word is used because grep's and ripgrep's binary file detection might differ somewhat---perhaps based on buffer sizes---which can impact exactly what is and isn't searched.) See the numerous tests in tests/binary.rs for intended behavior. Fixes #306, Fixes #855
110 lines
3.7 KiB
Rust
110 lines
3.7 KiB
Rust
use crate::hay::SHERLOCK;
|
|
use crate::util::{Dir, TestCommand};
|
|
|
|
// This tests that multiline matches that span multiple lines, but where
|
|
// multiple matches may begin and end on the same line work correctly.
|
|
rgtest!(overlap1, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create("test", "xxx\nabc\ndefxxxabc\ndefxxx\nxxx");
|
|
cmd.arg("-n").arg("-U").arg("abc\ndef").arg("test");
|
|
eqnice!("2:abc\n3:defxxxabc\n4:defxxx\n", cmd.stdout());
|
|
});
|
|
|
|
// Like overlap1, but tests the case where one match ends at precisely the same
|
|
// location at which the next match begins.
|
|
rgtest!(overlap2, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create("test", "xxx\nabc\ndefabc\ndefxxx\nxxx");
|
|
cmd.arg("-n").arg("-U").arg("abc\ndef").arg("test");
|
|
eqnice!("2:abc\n3:defabc\n4:defxxx\n", cmd.stdout());
|
|
});
|
|
|
|
// Tests that even in a multiline search, a '.' does not match a newline.
|
|
rgtest!(dot_no_newline, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create("sherlock", SHERLOCK);
|
|
cmd.args(&[
|
|
"-n", "-U", "of this world.+detective work", "sherlock",
|
|
]);
|
|
cmd.assert_err();
|
|
});
|
|
|
|
// Tests that the --multiline-dotall flag causes '.' to match a newline.
|
|
rgtest!(dot_all, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create("sherlock", SHERLOCK);
|
|
cmd.args(&[
|
|
"-n", "-U", "--multiline-dotall",
|
|
"of this world.+detective work", "sherlock",
|
|
]);
|
|
|
|
let expected = "\
|
|
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
|
2:Holmeses, success in the province of detective work must always
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Tests that --only-matching works in multiline mode.
|
|
rgtest!(only_matching, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create("sherlock", SHERLOCK);
|
|
cmd.args(&[
|
|
"-n", "-U", "--only-matching",
|
|
r"Watson|Sherlock\p{Any}+?Holmes", "sherlock",
|
|
]);
|
|
|
|
let expected = "\
|
|
1:Watson
|
|
1:Sherlock
|
|
2:Holmes
|
|
3:Sherlock Holmes
|
|
5:Watson
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Tests that --vimgrep works in multiline mode.
|
|
rgtest!(vimgrep, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create("sherlock", SHERLOCK);
|
|
cmd.args(&[
|
|
"-n", "-U", "--vimgrep",
|
|
r"Watson|Sherlock\p{Any}+?Holmes", "sherlock",
|
|
]);
|
|
|
|
let expected = "\
|
|
sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
|
|
sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock
|
|
sherlock:2:57:Holmeses, success in the province of detective work must always
|
|
sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes
|
|
sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted,
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Tests that multiline search works when reading from stdin. This is an
|
|
// important test because multiline search must read the entire contents of
|
|
// what it is searching into memory before executing the search.
|
|
rgtest!(stdin, |_: Dir, mut cmd: TestCommand| {
|
|
cmd.args(&[
|
|
"-n", "-U", r"of this world\p{Any}+?detective work",
|
|
]);
|
|
let expected = "\
|
|
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
|
2:Holmeses, success in the province of detective work must always
|
|
";
|
|
eqnice!(expected, cmd.pipe(SHERLOCK.as_bytes()));
|
|
});
|
|
|
|
// Test that multiline search and contextual matches work.
|
|
rgtest!(context, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create("sherlock", SHERLOCK);
|
|
cmd.args(&[
|
|
"-n", "-U", "-C1",
|
|
r"detective work\p{Any}+?result of luck", "sherlock",
|
|
]);
|
|
|
|
let expected = "\
|
|
1-For the Doctor Watsons of this world, as opposed to the Sherlock
|
|
2:Holmeses, success in the province of detective work must always
|
|
3:be, to a very large extent, the result of luck. Sherlock Holmes
|
|
4-can extract a clew from a wisp of straw or a flake of cigar ash;
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|