1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-04-14 00:58:43 +02:00

printer: vimgrep now only prints one line

It turns out that the vimgrep format really only wants one line per
match, even when that match spans multiple lines.

We continue to support the previous behavior (print all lines in a
match) in the `grep-printer` crate. We add a new option to enable the
"only print the first line" behavior, and unconditionally enable it in
ripgrep. We can do that because the option has no effect in single-line
mode, since, well, in that case matches are guaranteed to span one line
anyway.

Fixes #1866
This commit is contained in:
Andrew Gallant 2021-05-30 21:36:35 -04:00
parent 578e1992fa
commit fc31aedcf3
5 changed files with 140 additions and 12 deletions

View File

@ -4,6 +4,8 @@ Unreleased changes. Release notes have not yet been written.
**BREAKING CHANGES**:
**Binary detection output has changed slightly.**
In this release, a small tweak has been made to the output format when a binary
file is detected. Previously, it looked like this:
@ -17,6 +19,13 @@ Now it looks like this:
FOO: binary file matches (found "\0" byte around offset XXX)
```
**vimgrep output in multi-line now only prints the first line for each match.**
See [issue 1866](https://github.com/BurntSushi/ripgrep/issues/1866) for more
discussion on this. Previously, every line in a match was duplicated, even
when it spanned multiple lines. There are no changes to vimgrep output when
multi-line mode is disabled.
Security fixes:
* [CVE-2021-3013](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-3013):

View File

@ -777,6 +777,7 @@ impl ArgMatches {
.path(self.with_filename(paths))
.only_matching(self.is_present("only-matching"))
.per_match(self.is_present("vimgrep"))
.per_match_one_line(true)
.replacement(self.replacement())
.max_columns(self.max_columns()?)
.max_columns_preview(self.max_columns_preview())

View File

@ -31,6 +31,7 @@ struct Config {
path: bool,
only_matching: bool,
per_match: bool,
per_match_one_line: bool,
replacement: Arc<Option<Vec<u8>>>,
max_columns: Option<u64>,
max_columns_preview: bool,
@ -55,6 +56,7 @@ impl Default for Config {
path: true,
only_matching: false,
per_match: false,
per_match_one_line: false,
replacement: Arc::new(None),
max_columns: None,
max_columns_preview: false,
@ -219,17 +221,36 @@ impl StandardBuilder {
/// the `column` option, which will show the starting column number for
/// every match on every line.
///
/// When multi-line mode is enabled, each match and its accompanying lines
/// are printed. As with single line matches, if a line contains multiple
/// matches (even if only partially), then that line is printed once for
/// each match it participates in. In multi-line mode, column numbers only
/// indicate the start of a match. If a line only continues a match, then
/// the column number printed is always `1`.
/// When multi-line mode is enabled, each match is printed, including every
/// line in the match. As with single line matches, if a line contains
/// multiple matches (even if only partially), then that line is printed
/// once for each match it participates in, assuming it's the first line in
/// that match. In multi-line mode, column numbers only indicate the start
/// of a match. Subsequent lines in a multi-line match always have a column
/// number of `1`.
///
/// When a match contains multiple lines, enabling `per_match_one_line`
/// will cause only the first line each in match to be printed.
pub fn per_match(&mut self, yes: bool) -> &mut StandardBuilder {
self.config.per_match = yes;
self
}
/// Print at most one line per match when `per_match` is enabled.
///
/// By default, every line in each match found is printed when `per_match`
/// is enabled. However, this is sometimes undesirable, e.g., when you
/// only ever want one line per match.
///
/// This is only applicable when multi-line matching is enabled, since
/// otherwise, matches are guaranteed to span one line.
///
/// This is disabled by default.
pub fn per_match_one_line(&mut self, yes: bool) -> &mut StandardBuilder {
self.config.per_match_one_line = yes;
self
}
/// Set the bytes that will be used to replace each occurrence of a match
/// found.
///
@ -294,9 +315,6 @@ impl StandardBuilder {
/// Column numbers are computed in terms of bytes from the start of the
/// line being printed.
///
/// For matches that span multiple lines, the column number for each
/// matching line is in terms of the first matching line.
///
/// This is disabled by default.
pub fn column(&mut self, yes: bool) -> &mut StandardBuilder {
self.config.column = yes;
@ -1139,6 +1157,15 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
}
}
self.write_line_term()?;
// It turns out that vimgrep really only wants one line per
// match, even when a match spans multiple lines. So when
// that option is enabled, we just quit after printing the
// first line.
//
// See: https://github.com/BurntSushi/ripgrep/issues/1866
if self.config().per_match_one_line {
break;
}
}
}
Ok(())
@ -3058,6 +3085,91 @@ Holmeses, success in the province of detective work must always
assert_eq_printed!(expected, got);
}
#[test]
fn per_match_multi_line1_only_first_line() {
let matcher =
RegexMatcher::new(r"(?s:.{0})(Doctor Watsons|Sherlock)").unwrap();
let mut printer = StandardBuilder::new()
.per_match(true)
.per_match_one_line(true)
.column(true)
.build(NoColor::new(vec![]));
SearcherBuilder::new()
.multi_line(true)
.line_number(true)
.build()
.search_reader(
&matcher,
SHERLOCK.as_bytes(),
printer.sink(&matcher),
)
.unwrap();
let got = printer_contents(&mut printer);
let expected = "\
1:9:For the Doctor Watsons of this world, as opposed to the Sherlock
1:57:For the Doctor Watsons of this world, as opposed to the Sherlock
3:49:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq_printed!(expected, got);
}
#[test]
fn per_match_multi_line2_only_first_line() {
let matcher =
RegexMatcher::new(r"(?s)Watson.+?(Holmeses|clearly)").unwrap();
let mut printer = StandardBuilder::new()
.per_match(true)
.per_match_one_line(true)
.column(true)
.build(NoColor::new(vec![]));
SearcherBuilder::new()
.multi_line(true)
.line_number(true)
.build()
.search_reader(
&matcher,
SHERLOCK.as_bytes(),
printer.sink(&matcher),
)
.unwrap();
let got = printer_contents(&mut printer);
let expected = "\
1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
5:12:but Doctor Watson has to have it taken out for him and dusted,
";
assert_eq_printed!(expected, got);
}
#[test]
fn per_match_multi_line3_only_first_line() {
let matcher =
RegexMatcher::new(r"(?s)Watson.+?Holmeses|always.+?be").unwrap();
let mut printer = StandardBuilder::new()
.per_match(true)
.per_match_one_line(true)
.column(true)
.build(NoColor::new(vec![]));
SearcherBuilder::new()
.multi_line(true)
.line_number(true)
.build()
.search_reader(
&matcher,
SHERLOCK.as_bytes(),
printer.sink(&matcher),
)
.unwrap();
let got = printer_contents(&mut printer);
let expected = "\
1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
2:58:Holmeses, success in the province of detective work must always
";
assert_eq_printed!(expected, got);
}
#[test]
fn replacement_passthru() {
let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap();

View File

@ -64,6 +64,11 @@ rgtest!(only_matching, |dir: Dir, mut cmd: TestCommand| {
});
// Tests that --vimgrep works in multiline mode.
//
// In particular, we test that only the first line of each match is printed,
// even when a match spans multiple lines.
//
// See: https://github.com/BurntSushi/ripgrep/issues/1866
rgtest!(vimgrep, |dir: Dir, mut cmd: TestCommand| {
dir.create("sherlock", SHERLOCK);
cmd.args(&[
@ -77,7 +82,6 @@ rgtest!(vimgrep, |dir: Dir, mut cmd: TestCommand| {
let expected = "\
sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:2:1:Holmeses, success in the province of detective work must always
sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes
sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted,
";

View File

@ -917,10 +917,12 @@ rgtest!(r1866, |dir: Dir, mut cmd: TestCommand| {
"test",
]);
// vimgrep only wants the first line of each match, even when a match
// spans multiple lines.
//
// See: https://github.com/BurntSushi/ripgrep/issues/1866
let expected = "\
test:1:1:foobar
test:2:1:foobar
test:3:1:foo quux
test:3:5:foo quux
";
eqnice!(expected, cmd.stdout());