1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-03-03 14:32:22 +02:00

search: add a --count-matches flag

This commit introduces a new flag, --count-matches, which will cause
ripgrep to report a total count of all matches instead of a count of
total lines matched.

Closes #566, Closes #814
This commit is contained in:
Balaji Sivaraman 2018-02-20 21:03:07 +05:30 committed by Andrew Gallant
parent 96f73293c0
commit 27fc9f2fd3
No known key found for this signature in database
GPG Key ID: B2E3A4923F8B0D44
7 changed files with 147 additions and 7 deletions

View File

@ -27,7 +27,8 @@ _rg() {
'(-A -B -C --after-context --before-context --context)'{-C+,--context=}'[specify lines to show before and after each match]:number of lines'
'(-b --byte-offset)'{-b,--byte-offset}'[print the 0-based byte offset for each matching line]'
'--context-separator=[specify string used to separate non-continuous context lines in output]:separator'
'(-c --count --passthrough --passthru)'{-c,--count}'[only show count of matches for each file]'
'(-c --count --count-matches --passthrough --passthru)'{-c,--count}'[only show count of matching lines for each file]'
'(--count-matches -c --count --passthrough --passthru)--count-matches[only show count of individual matches for each file]'
'--debug[show debug messages]'
'--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size'
'(-E --encoding)'{-E+,--encoding=}'[specify text encoding of files to search]: :_rg_encodings'

View File

@ -517,6 +517,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_context(&mut args);
flag_context_separator(&mut args);
flag_count(&mut args);
flag_count_matches(&mut args);
flag_debug(&mut args);
flag_dfa_size_limit(&mut args);
flag_encoding(&mut args);
@ -636,7 +637,8 @@ This overrides the --context flag.
}
fn flag_byte_offset(args: &mut Vec<RGArg>) {
const SHORT: &str = "Print the 0-based byte offset for each matching line.";
const SHORT: &str =
"Print the 0-based byte offset for each matching line.";
const LONG: &str = long!("\
Print the 0-based byte offset within the input file
before each line of output. If -o (--only-matching) is
@ -771,7 +773,7 @@ sequences like \\x7F or \\t may be used. The default value is --.
}
fn flag_count(args: &mut Vec<RGArg>) {
const SHORT: &str = "Only show the count of matches for each file.";
const SHORT: &str = "Only show the count of matching lines for each file.";
const LONG: &str = long!("\
This flag suppresses normal output and shows the number of lines that match
the given patterns for each file searched. Each file containing a match has its
@ -781,9 +783,32 @@ that match and not the total number of matches.
If only one file is given to ripgrep, then only the count is printed if there
is a match. The --with-filename flag can be used to force printing the file
path in this case.
This overrides the --count-matches flag.
");
let arg = RGArg::switch("count").short("c")
.help(SHORT).long_help(LONG);
.help(SHORT).long_help(LONG).overrides("count-matches");
args.push(arg);
}
fn flag_count_matches(args: &mut Vec<RGArg>) {
const SHORT: &str =
"Only show the count of individual matches for each file.";
const LONG: &str = long!("\
This flag suppresses normal output and shows the number of individual
matches of the given patterns for each file searched. Each file
containing matches has its path and match count printed on each line.
Note that this reports the total number of individual matches and not
the number of lines that match.
If only one file is given to ripgrep, then only the count is printed if there
is a match. The --with-filename flag can be used to force printing the file
path in this case.
This overrides the --count flag.
");
let arg = RGArg::switch("count-matches")
.help(SHORT).long_help(LONG).overrides("count");
args.push(arg);
}

View File

@ -41,6 +41,7 @@ pub struct Args {
column: bool,
context_separator: Vec<u8>,
count: bool,
count_matches: bool,
encoding: Option<&'static Encoding>,
files_with_matches: bool,
files_without_matches: bool,
@ -200,6 +201,7 @@ impl Args {
pub fn file_separator(&self) -> Option<Vec<u8>> {
let contextless =
self.count
|| self.count_matches
|| self.files_with_matches
|| self.files_without_matches;
let use_heading_sep = self.heading && !contextless;
@ -262,6 +264,7 @@ impl Args {
.before_context(self.before_context)
.byte_offset(self.byte_offset)
.count(self.count)
.count_matches(self.count_matches)
.encoding(self.encoding)
.files_with_matches(self.files_with_matches)
.files_without_matches(self.files_without_matches)
@ -358,6 +361,7 @@ impl<'a> ArgMatches<'a> {
let mmap = self.mmap(&paths)?;
let with_filename = self.with_filename(&paths);
let (before_context, after_context) = self.contexts()?;
let (count, count_matches) = self.counts();
let quiet = self.is_present("quiet");
let args = Args {
paths: paths,
@ -368,7 +372,8 @@ impl<'a> ArgMatches<'a> {
colors: self.color_specs()?,
column: self.column(),
context_separator: self.context_separator(),
count: self.is_present("count"),
count: count,
count_matches: count_matches,
encoding: self.encoding()?,
files_with_matches: self.is_present("files-with-matches"),
files_without_matches: self.is_present("files-without-match"),
@ -732,6 +737,22 @@ impl<'a> ArgMatches<'a> {
})
}
/// Returns whether the -c/--count or the --count-matches flags were
/// passed from the command line.
///
/// If --count-matches and --invert-match were passed in, behave
/// as if --count and --invert-match were passed in (i.e. rg will
/// count inverted matches as per existing behavior).
fn counts(&self) -> (bool, bool) {
let count = self.is_present("count");
let count_matches = self.is_present("count-matches");
let invert_matches = self.is_present("invert-match");
if count_matches && invert_matches {
return (true, false);
}
(count, count_matches)
}
/// Returns the user's color choice based on command line parameters and
/// environment.
fn color_choice(&self) -> termcolor::ColorChoice {

View File

@ -22,6 +22,7 @@ pub struct BufferSearcher<'a, W: 'a> {
path: &'a Path,
buf: &'a [u8],
match_line_count: u64,
match_count: Option<u64>,
line_count: Option<u64>,
byte_offset: Option<u64>,
last_line: usize,
@ -41,6 +42,7 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {
path: path,
buf: buf,
match_line_count: 0,
match_count: None,
line_count: None,
byte_offset: None,
last_line: 0,
@ -65,6 +67,15 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {
self
}
/// If enabled, searching will print the count of individual matches
/// instead of each match.
///
/// Disabled by default.
pub fn count_matches(mut self, yes: bool) -> Self {
self.opts.count_matches = yes;
self
}
/// If enabled, searching will print the path instead of each match.
///
/// Disabled by default.
@ -135,6 +146,7 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {
// The memory map searcher uses one contiguous block of bytes, so the
// offsets given the printer are sufficient to compute the byte offset.
self.byte_offset = if self.opts.byte_offset { Some(0) } else { None };
self.match_count = if self.opts.count_matches { Some(0) } else { None };
let mut last_end = 0;
for m in self.grep.iter(self.buf) {
if self.opts.invert_match {
@ -153,6 +165,10 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {
}
if self.opts.count && self.match_line_count > 0 {
self.printer.path_count(self.path, self.match_line_count);
} else if self.opts.count_matches
&& self.match_count.map_or(false, |c| c > 0)
{
self.printer.path_count(self.path, self.match_count.unwrap());
}
if self.opts.files_with_matches && self.match_line_count > 0 {
self.printer.path(self.path);
@ -163,9 +179,19 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {
self.match_line_count
}
#[inline(always)]
fn count_individual_matches(&mut self, start: usize, end: usize) {
if let Some(ref mut count) = self.match_count {
for _ in self.grep.regex().find_iter(&self.buf[start..end]) {
*count += 1;
}
}
}
#[inline(always)]
pub fn print_match(&mut self, start: usize, end: usize) {
self.match_line_count += 1;
self.count_individual_matches(start, end);
if self.opts.skip_matches() {
return;
}
@ -317,6 +343,13 @@ and exhibited clearly, with a label attached.\
assert_eq!(out, "/baz.rs:2\n");
}
#[test]
fn count_matches() {
let (_, out) = search(
"the", SHERLOCK, |s| s.count_matches(true));
assert_eq!(out, "/baz.rs:4\n");
}
#[test]
fn files_with_matches() {
let (count, out) = search(

View File

@ -68,6 +68,7 @@ pub struct Searcher<'a, R, W: 'a> {
path: &'a Path,
haystack: R,
match_line_count: u64,
match_count: Option<u64>,
line_count: Option<u64>,
byte_offset: Option<u64>,
last_match: Match,
@ -83,6 +84,7 @@ pub struct Options {
pub before_context: usize,
pub byte_offset: bool,
pub count: bool,
pub count_matches: bool,
pub files_with_matches: bool,
pub files_without_matches: bool,
pub eol: u8,
@ -100,6 +102,7 @@ impl Default for Options {
before_context: 0,
byte_offset: false,
count: false,
count_matches: false,
files_with_matches: false,
files_without_matches: false,
eol: b'\n',
@ -114,11 +117,11 @@ impl Default for Options {
}
impl Options {
/// Several options (--quiet, --count, --files-with-matches,
/// Several options (--quiet, --count, --count-matches, --files-with-matches,
/// --files-without-match) imply that we shouldn't ever display matches.
pub fn skip_matches(&self) -> bool {
self.count || self.files_with_matches || self.files_without_matches
|| self.quiet
|| self.quiet || self.count_matches
}
/// Some options (--quiet, --files-with-matches, --files-without-match)
@ -167,6 +170,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
path: path,
haystack: haystack,
match_line_count: 0,
match_count: None,
line_count: None,
byte_offset: None,
last_match: Match::default(),
@ -208,6 +212,15 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
self
}
/// If enabled, searching will print the count of individual matches
/// instead of each match.
///
/// Disabled by default.
pub fn count_matches(mut self, yes: bool) -> Self {
self.opts.count_matches = yes;
self
}
/// If enabled, searching will print the path instead of each match.
///
/// Disabled by default.
@ -274,6 +287,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
self.match_line_count = 0;
self.line_count = if self.opts.line_number { Some(0) } else { None };
self.byte_offset = if self.opts.byte_offset { Some(0) } else { None };
self.match_count = if self.opts.count_matches { Some(0) } else { None };
self.last_match = Match::default();
self.after_context_remaining = 0;
while !self.terminate() {
@ -326,6 +340,8 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
if self.match_line_count > 0 {
if self.opts.count {
self.printer.path_count(self.path, self.match_line_count);
} else if self.opts.count_matches {
self.printer.path_count(self.path, self.match_count.unwrap());
} else if self.opts.files_with_matches {
self.printer.path(self.path);
}
@ -428,6 +444,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
#[inline(always)]
fn print_match(&mut self, start: usize, end: usize) {
self.match_line_count += 1;
self.count_individual_matches(start, end);
if self.opts.skip_matches() {
return;
}
@ -472,6 +489,15 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
}
}
#[inline(always)]
fn count_individual_matches(&mut self, start: usize, end: usize) {
if let Some(ref mut count) = self.match_count {
for _ in self.grep.regex().find_iter(&self.inp.buf[start..end]) {
*count += 1;
}
}
}
#[inline(always)]
fn count_lines(&mut self, upto: usize) {
if let Some(ref mut line_count) = self.line_count {
@ -1066,6 +1092,13 @@ fn main() {
");
}
#[test]
fn count_matches() {
let (_, out) = search_smallcap(
"the", SHERLOCK, |s| s.count_matches(true));
assert_eq!(out, "/baz.rs:4\n");
}
#[test]
fn files_with_matches() {
let (count, out) = search_smallcap(

View File

@ -35,6 +35,7 @@ struct Options {
before_context: usize,
byte_offset: bool,
count: bool,
count_matches: bool,
files_with_matches: bool,
files_without_matches: bool,
eol: u8,
@ -56,6 +57,7 @@ impl Default for Options {
before_context: 0,
byte_offset: false,
count: false,
count_matches: false,
files_with_matches: false,
files_without_matches: false,
eol: b'\n',
@ -126,6 +128,15 @@ impl WorkerBuilder {
self
}
/// If enabled, searching will print the count of individual matches
/// instead of each match.
///
/// Disabled by default.
pub fn count_matches(mut self, yes: bool) -> Self {
self.opts.count_matches = yes;
self
}
/// Set the encoding to use to read each file.
///
/// If the encoding is `None` (the default), then the encoding is
@ -297,6 +308,7 @@ impl Worker {
.before_context(self.opts.before_context)
.byte_offset(self.opts.byte_offset)
.count(self.opts.count)
.count_matches(self.opts.count_matches)
.files_with_matches(self.opts.files_with_matches)
.files_without_matches(self.opts.files_without_matches)
.eol(self.opts.eol)
@ -337,6 +349,7 @@ impl Worker {
Ok(searcher
.byte_offset(self.opts.byte_offset)
.count(self.opts.count)
.count_matches(self.opts.count_matches)
.files_with_matches(self.opts.files_with_matches)
.files_without_matches(self.opts.files_without_matches)
.eol(self.opts.eol)

View File

@ -412,6 +412,20 @@ sherlock!(count, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
assert_eq!(lines, expected);
});
sherlock!(count_matches, "the", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--count-matches");
let lines: String = wd.stdout(&mut cmd);
let expected = "sherlock:4\n";
assert_eq!(lines, expected);
});
sherlock!(count_matches_inverted, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--count-matches").arg("--invert-match");
let lines: String = wd.stdout(&mut cmd);
let expected = "sherlock:4\n";
assert_eq!(lines, expected);
});
sherlock!(files_with_matches, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--files-with-matches");
let lines: String = wd.stdout(&mut cmd);