1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-04-24 17:12:16 +02:00

cli: add --stop-on-nonmatch flag

This causes ripgrep to stop searching an individual file after it has
found a non-matching line. But this only occurs after it has found a
matching line.

Fixes #1790, Closes #1930
This commit is contained in:
Edoardo Pirovano 2021-07-07 17:50:23 +01:00 committed by Andrew Gallant
parent 4782ebd5e0
commit 6d95c130d5
7 changed files with 99 additions and 12 deletions

View File

@ -11,6 +11,8 @@ Unreleased changes. Release notes have not yet been written.
Feature enhancements: Feature enhancements:
* Added or improved file type filtering for Ada, DITA, Elixir, Fuchsia, Gentoo, GraphQL, Markdown, Raku, TypeScript, USD, V * Added or improved file type filtering for Ada, DITA, Elixir, Fuchsia, Gentoo, GraphQL, Markdown, Raku, TypeScript, USD, V
* [FEATURE #1790](https://github.com/BurntSushi/ripgrep/issues/1790):
Add new `--stop-on-nonmatch` flag.
* [FEATURE #2195](https://github.com/BurntSushi/ripgrep/issues/2195): * [FEATURE #2195](https://github.com/BurntSushi/ripgrep/issues/2195):
When `extra-verbose` mode is enabled in zsh, show extra file type info. When `extra-verbose` mode is enabled in zsh, show extra file type info.
* [FEATURE #2409](https://github.com/BurntSushi/ripgrep/pull/2409): * [FEATURE #2409](https://github.com/BurntSushi/ripgrep/pull/2409):

View File

@ -319,6 +319,7 @@ _rg() {
'(-q --quiet)'{-q,--quiet}'[suppress normal output]' '(-q --quiet)'{-q,--quiet}'[suppress normal output]'
'--regex-size-limit=[specify upper size limit of compiled regex]:regex size (bytes)' '--regex-size-limit=[specify upper size limit of compiled regex]:regex size (bytes)'
'*'{-u,--unrestricted}'[reduce level of "smart" searching]' '*'{-u,--unrestricted}'[reduce level of "smart" searching]'
'--stop-on-nonmatch[stop on first non-matching line after a matching one]'
+ operand # Operands + operand # Operands
'(--files --type-list file regexp)1: :_guard "^-*" pattern' '(--files --type-list file regexp)1: :_guard "^-*" pattern'

View File

@ -632,6 +632,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_sort(&mut args); flag_sort(&mut args);
flag_sortr(&mut args); flag_sortr(&mut args);
flag_stats(&mut args); flag_stats(&mut args);
flag_stop_on_nonmatch(&mut args);
flag_text(&mut args); flag_text(&mut args);
flag_threads(&mut args); flag_threads(&mut args);
flag_trim(&mut args); flag_trim(&mut args);
@ -1926,13 +1927,16 @@ Nevertheless, if you only care about matches spanning at most one line, then it
is always better to disable multiline mode. is always better to disable multiline mode.
This flag can be disabled with --no-multiline. This flag can be disabled with --no-multiline.
This overrides the --stop-on-nonmatch flag.
" "
); );
let arg = RGArg::switch("multiline") let arg = RGArg::switch("multiline")
.short("U") .short("U")
.help(SHORT) .help(SHORT)
.long_help(LONG) .long_help(LONG)
.overrides("no-multiline"); .overrides("no-multiline")
.overrides("stop-on-nonmatch");
args.push(arg); args.push(arg);
let arg = RGArg::switch("no-multiline").hidden().overrides("multiline"); let arg = RGArg::switch("no-multiline").hidden().overrides("multiline");
@ -2854,6 +2858,25 @@ This flag can be disabled with --no-stats.
args.push(arg); args.push(arg);
} }
fn flag_stop_on_nonmatch(args: &mut Vec<RGArg>) {
const SHORT: &str = "Stop searching after a non-match.";
const LONG: &str = long!(
"\
Enabling this option will cause ripgrep to stop reading a file once it
encounters a non-matching line after it has encountered a matching line.
This is useful if it is expected that all matches in a given file will be on
sequential lines, for example due to the lines being sorted.
This overrides the -U/--multiline flag.
"
);
let arg = RGArg::switch("stop-on-nonmatch")
.help(SHORT)
.long_help(LONG)
.overrides("multiline");
args.push(arg);
}
fn flag_text(args: &mut Vec<RGArg>) { fn flag_text(args: &mut Vec<RGArg>) {
const SHORT: &str = "Search binary files as if they were text."; const SHORT: &str = "Search binary files as if they were text.";
const LONG: &str = long!( const LONG: &str = long!(

View File

@ -821,7 +821,8 @@ impl ArgMatches {
.before_context(ctx_before) .before_context(ctx_before)
.after_context(ctx_after) .after_context(ctx_after)
.passthru(self.is_present("passthru")) .passthru(self.is_present("passthru"))
.memory_map(self.mmap_choice(paths)); .memory_map(self.mmap_choice(paths))
.stop_on_nonmatch(self.is_present("stop-on-nonmatch"));
match self.encoding()? { match self.encoding()? {
EncodingMode::Some(enc) => { EncodingMode::Some(enc) => {
builder.encoding(Some(enc)); builder.encoding(Some(enc));

View File

@ -10,6 +10,12 @@ use crate::sink::{
}; };
use grep_matcher::{LineMatchKind, Matcher}; use grep_matcher::{LineMatchKind, Matcher};
enum FastMatchResult {
Continue,
Stop,
SwitchToSlow,
}
#[derive(Debug)] #[derive(Debug)]
pub struct Core<'s, M: 's, S> { pub struct Core<'s, M: 's, S> {
config: &'s Config, config: &'s Config,
@ -25,6 +31,7 @@ pub struct Core<'s, M: 's, S> {
last_line_visited: usize, last_line_visited: usize,
after_context_left: usize, after_context_left: usize,
has_sunk: bool, has_sunk: bool,
has_matched: bool,
} }
impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
@ -50,6 +57,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
last_line_visited: 0, last_line_visited: 0,
after_context_left: 0, after_context_left: 0,
has_sunk: false, has_sunk: false,
has_matched: false,
}; };
if !core.searcher.multi_line_with_matcher(&core.matcher) { if !core.searcher.multi_line_with_matcher(&core.matcher) {
if core.is_line_by_line_fast() { if core.is_line_by_line_fast() {
@ -109,7 +117,11 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> { pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
if self.is_line_by_line_fast() { if self.is_line_by_line_fast() {
self.match_by_line_fast(buf) match self.match_by_line_fast(buf)? {
FastMatchResult::SwitchToSlow => self.match_by_line_slow(buf),
FastMatchResult::Continue => Ok(true),
FastMatchResult::Stop => Ok(false),
}
} else { } else {
self.match_by_line_slow(buf) self.match_by_line_slow(buf)
} }
@ -270,7 +282,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
} }
}; };
self.set_pos(line.end()); self.set_pos(line.end());
if matched != self.config.invert_match { let success = matched != self.config.invert_match;
if success {
self.has_matched = true;
if !self.before_context_by_line(buf, line.start())? { if !self.before_context_by_line(buf, line.start())? {
return Ok(false); return Ok(false);
} }
@ -286,40 +300,51 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
return Ok(false); return Ok(false);
} }
} }
if self.config.stop_on_nonmatch && !success && self.has_matched {
return Ok(false);
}
} }
Ok(true) Ok(true)
} }
fn match_by_line_fast(&mut self, buf: &[u8]) -> Result<bool, S::Error> { fn match_by_line_fast(
debug_assert!(!self.config.passthru); &mut self,
buf: &[u8],
) -> Result<FastMatchResult, S::Error> {
use FastMatchResult::*;
debug_assert!(!self.config.passthru);
while !buf[self.pos()..].is_empty() { while !buf[self.pos()..].is_empty() {
if self.config.stop_on_nonmatch && self.has_matched {
return Ok(SwitchToSlow);
}
if self.config.invert_match { if self.config.invert_match {
if !self.match_by_line_fast_invert(buf)? { if !self.match_by_line_fast_invert(buf)? {
return Ok(false); return Ok(Stop);
} }
} else if let Some(line) = self.find_by_line_fast(buf)? { } else if let Some(line) = self.find_by_line_fast(buf)? {
self.has_matched = true;
if self.config.max_context() > 0 { if self.config.max_context() > 0 {
if !self.after_context_by_line(buf, line.start())? { if !self.after_context_by_line(buf, line.start())? {
return Ok(false); return Ok(Stop);
} }
if !self.before_context_by_line(buf, line.start())? { if !self.before_context_by_line(buf, line.start())? {
return Ok(false); return Ok(Stop);
} }
} }
self.set_pos(line.end()); self.set_pos(line.end());
if !self.sink_matched(buf, &line)? { if !self.sink_matched(buf, &line)? {
return Ok(false); return Ok(Stop);
} }
} else { } else {
break; break;
} }
} }
if !self.after_context_by_line(buf, buf.len())? { if !self.after_context_by_line(buf, buf.len())? {
return Ok(false); return Ok(Stop);
} }
self.set_pos(buf.len()); self.set_pos(buf.len());
Ok(true) Ok(Continue)
} }
#[inline(always)] #[inline(always)]
@ -344,6 +369,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
if invert_match.is_empty() { if invert_match.is_empty() {
return Ok(true); return Ok(true);
} }
self.has_matched = true;
if !self.after_context_by_line(buf, invert_match.start())? { if !self.after_context_by_line(buf, invert_match.start())? {
return Ok(false); return Ok(false);
} }
@ -577,6 +603,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
if self.config.passthru { if self.config.passthru {
return false; return false;
} }
if self.config.stop_on_nonmatch && self.has_matched {
return false;
}
if let Some(line_term) = self.matcher.line_terminator() { if let Some(line_term) = self.matcher.line_terminator() {
if line_term == self.config.line_term { if line_term == self.config.line_term {
return true; return true;

View File

@ -173,6 +173,9 @@ pub struct Config {
encoding: Option<Encoding>, encoding: Option<Encoding>,
/// Whether to do automatic transcoding based on a BOM or not. /// Whether to do automatic transcoding based on a BOM or not.
bom_sniffing: bool, bom_sniffing: bool,
/// Whether to stop searching when a non-matching line is found after a
/// matching line.
stop_on_nonmatch: bool,
} }
impl Default for Config { impl Default for Config {
@ -190,6 +193,7 @@ impl Default for Config {
multi_line: false, multi_line: false,
encoding: None, encoding: None,
bom_sniffing: true, bom_sniffing: true,
stop_on_nonmatch: false,
} }
} }
} }
@ -555,6 +559,19 @@ impl SearcherBuilder {
self.config.bom_sniffing = yes; self.config.bom_sniffing = yes;
self self
} }
/// Stop searching a file when a non-matching line is found after a
/// matching line.
///
/// This is useful for searching sorted files where it is expected that all
/// the matches will be on adjacent lines.
pub fn stop_on_nonmatch(
&mut self,
stop_on_nonmatch: bool,
) -> &mut SearcherBuilder {
self.config.stop_on_nonmatch = stop_on_nonmatch;
self
}
} }
/// A searcher executes searches over a haystack and writes results to a caller /// A searcher executes searches over a haystack and writes results to a caller
@ -838,6 +855,13 @@ impl Searcher {
self.config.multi_line self.config.multi_line
} }
/// Returns true if and only if this searcher is configured to stop when in
/// finds a non-matching line after a matching one.
#[inline]
pub fn stop_on_nonmatch(&self) -> bool {
self.config.stop_on_nonmatch
}
/// Returns true if and only if this searcher will choose a multi-line /// Returns true if and only if this searcher will choose a multi-line
/// strategy given the provided matcher. /// strategy given the provided matcher.
/// ///

View File

@ -992,3 +992,10 @@ rgtest!(no_unicode, |dir: Dir, mut cmd: TestCommand| {
dir.create("test", "δ"); dir.create("test", "δ");
cmd.arg("-i").arg("--no-unicode").arg("Δ").assert_err(); cmd.arg("-i").arg("--no-unicode").arg("Δ").assert_err();
}); });
// See: https://github.com/BurntSushi/ripgrep/issues/1790
rgtest!(stop_on_nonmatch, |dir: Dir, mut cmd: TestCommand| {
dir.create("test", "line1\nline2\nline3\nline4\nline5");
cmd.args(&["--stop-on-nonmatch", "[235]"]);
eqnice!("test:line2\ntest:line3\n", cmd.stdout());
});