1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-04-02 20:45:38 +02:00

output: add --stats flag

This commit provides basic support for a --stats flag, which will print
various aggregate statistics about a search after all of the results
have been printed. This is mostly intended to support a similar feature
found in the Silver Searcher. Note though that we don't emit the total
bytes searched; this is a first pass at an implementation and we can
improve upon it later.

Closes #411, Closes #799
This commit is contained in:
Balaji Sivaraman 2018-02-12 22:47:22 +05:30 committed by Andrew Gallant
parent 11a8f0eaf0
commit 00520b30f5
No known key found for this signature in database
GPG Key ID: B2E3A4923F8B0D44
5 changed files with 138 additions and 4 deletions

View File

@ -91,6 +91,7 @@ _rg() {
'(-e -f --file --files --regexp --type-list)1: :_rg_pattern'
'(--type-list)*:file:_files'
'(-z --search-zip)'{-z,--search-zip}'[search in compressed files]'
"(--stats)--stats[print stats about this search]"
)
[[ ${_RG_COMPLETE_LIST_ARGS:-} == (1|t*|y*) ]] && {

View File

@ -559,6 +559,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_search_zip(&mut args);
flag_smart_case(&mut args);
flag_sort_files(&mut args);
flag_stats(&mut args);
flag_text(&mut args);
flag_threads(&mut args);
flag_type(&mut args);
@ -1488,6 +1489,25 @@ This flag can be disabled with --no-sort-files.
args.push(arg);
}
fn flag_stats(args: &mut Vec<RGArg>) {
const SHORT: &str = "Print statistics about this ripgrep search.";
const LONG: &str = long!("\
Print aggregate statistics about this ripgrep search. When this flag is
present, ripgrep will print the following stats to stdout at the end of the
search: number of matched lines, number of files with matches, number of files
searched, and the time taken for the entire search to complete.
This set of aggregate statistics may expand over time.
Note that this flag has no effect if --files, --files-with-matches or
--files-without-match is passed.");
let arg = RGArg::switch("stats")
.help(SHORT).long_help(LONG);
args.push(arg);
}
fn flag_text(args: &mut Vec<RGArg>) {
const SHORT: &str = "Search binary files as if they were text.";
const LONG: &str = long!("\

View File

@ -79,7 +79,8 @@ pub struct Args {
type_list: bool,
types: Types,
with_filename: bool,
search_zip_files: bool
search_zip_files: bool,
stats: bool
}
impl Args {
@ -221,6 +222,12 @@ impl Args {
self.max_count == Some(0)
}
/// Returns whether ripgrep should track stats for this run
pub fn stats(&self) -> bool {
self.stats
}
/// Create a new writer for single-threaded searching with color support.
pub fn stdout(&self) -> termcolor::StandardStream {
termcolor::StandardStream::stdout(self.color_choice)
@ -411,7 +418,8 @@ impl<'a> ArgMatches<'a> {
type_list: self.is_present("type-list"),
types: self.types()?,
with_filename: with_filename,
search_zip_files: self.is_present("search-zip")
search_zip_files: self.is_present("search-zip"),
stats: self.stats()
};
if args.mmap {
debug!("will try to use memory maps");
@ -825,6 +833,19 @@ impl<'a> ArgMatches<'a> {
}
}
/// Returns whether status should be tracked for this run of ripgrep
/// This is automatically disabled if we're asked to only list the
/// files that wil be searched, files with matches or files
/// without matches.
fn stats(&self) -> bool {
if self.is_present("files-with-matches") ||
self.is_present("files-without-match") {
return false;
}
self.is_present("stats")
}
/// Returns the approximate number of threads that ripgrep should use.
fn threads(&self) -> Result<usize> {
if self.is_present("sort-files") {

View File

@ -27,6 +27,7 @@ use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::mpsc;
use std::thread;
use std::time::{Duration, Instant};
use args::Args;
use worker::Work;
@ -85,16 +86,19 @@ fn run(args: Arc<Args>) -> Result<u64> {
}
fn run_parallel(args: &Arc<Args>) -> Result<u64> {
let start_time = Instant::now();
let bufwtr = Arc::new(args.buffer_writer());
let quiet_matched = args.quiet_matched();
let paths_searched = Arc::new(AtomicUsize::new(0));
let match_line_count = Arc::new(AtomicUsize::new(0));
let paths_matched = Arc::new(AtomicUsize::new(0));
args.walker_parallel().run(|| {
let args = Arc::clone(args);
let quiet_matched = quiet_matched.clone();
let paths_searched = paths_searched.clone();
let match_line_count = match_line_count.clone();
let paths_matched = paths_matched.clone();
let bufwtr = Arc::clone(&bufwtr);
let mut buf = bufwtr.buffer();
let mut worker = args.worker();
@ -129,6 +133,9 @@ fn run_parallel(args: &Arc<Args>) -> Result<u64> {
if quiet_matched.set_match(count > 0) {
return Quit;
}
if args.stats() && count > 0 {
paths_matched.fetch_add(1, Ordering::SeqCst);
}
}
// BUG(burntsushi): We should handle this error instead of ignoring
// it. See: https://github.com/BurntSushi/ripgrep/issues/200
@ -141,15 +148,28 @@ fn run_parallel(args: &Arc<Args>) -> Result<u64> {
eprint_nothing_searched();
}
}
Ok(match_line_count.load(Ordering::SeqCst) as u64)
let match_line_count = match_line_count.load(Ordering::SeqCst) as u64;
let paths_searched = paths_searched.load(Ordering::SeqCst) as u64;
let paths_matched = paths_matched.load(Ordering::SeqCst) as u64;
if args.stats() {
print_stats(
match_line_count,
paths_searched,
paths_matched,
start_time.elapsed(),
);
}
Ok(match_line_count)
}
fn run_one_thread(args: &Arc<Args>) -> Result<u64> {
let start_time = Instant::now();
let stdout = args.stdout();
let mut stdout = stdout.lock();
let mut worker = args.worker();
let mut paths_searched: u64 = 0;
let mut match_line_count = 0;
let mut paths_matched: u64 = 0;
for result in args.walker() {
let dent = match get_or_log_dir_entry(
result,
@ -170,18 +190,30 @@ fn run_one_thread(args: &Arc<Args>) -> Result<u64> {
}
}
paths_searched += 1;
match_line_count +=
let count =
if dent.is_stdin() {
worker.run(&mut printer, Work::Stdin)
} else {
worker.run(&mut printer, Work::DirEntry(dent))
};
match_line_count += count;
if args.stats() && count > 0 {
paths_matched += 1;
}
}
if !args.paths().is_empty() && paths_searched == 0 {
if !args.no_messages() {
eprint_nothing_searched();
}
}
if args.stats() {
print_stats(
match_line_count,
paths_searched,
paths_matched,
start_time.elapsed(),
);
}
Ok(match_line_count)
}
@ -373,6 +405,22 @@ fn eprint_nothing_searched() {
Try running again with --debug.");
}
fn print_stats(
match_count: u64,
paths_searched: u64,
paths_matched: u64,
time_elapsed: Duration,
) {
let time_elapsed =
time_elapsed.as_secs() as f64
+ (time_elapsed.subsec_nanos() as f64 * 1e-9);
println!("\n{} matched lines\n\
{} files contained matches\n\
{} files searched\n\
{:.3} seconds", match_count, paths_matched,
paths_searched, time_elapsed);
}
// The Rust standard library suppresses the default SIGPIPE behavior, so that
// writing to a closed pipe doesn't kill the process. The goal is to instead
// handle errors through the normal result mechanism. Ripgrep needs some

View File

@ -1811,6 +1811,50 @@ be, to a very large extent, the result of luck. Sherlock Holmes
assert_eq!(lines, expected);
});
sherlock!(feature_411_single_threaded_search_stats,
|wd: WorkDir, mut cmd: Command| {
cmd.arg("--stats");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines.contains("2 matched lines"), true);
assert_eq!(lines.contains("1 files contained matches"), true);
assert_eq!(lines.contains("1 files searched"), true);
assert_eq!(lines.contains("seconds"), true);
});
#[test]
fn feature_411_parallel_search_stats() {
let wd = WorkDir::new("feature_411");
wd.create("sherlock_1", hay::SHERLOCK);
wd.create("sherlock_2", hay::SHERLOCK);
let mut cmd = wd.command();
cmd.arg("--stats");
cmd.arg("Sherlock");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines.contains("4 matched lines"), true);
assert_eq!(lines.contains("2 files contained matches"), true);
assert_eq!(lines.contains("2 files searched"), true);
assert_eq!(lines.contains("seconds"), true);
}
sherlock!(feature_411_ignore_stats_1, |wd: WorkDir, mut cmd: Command| {
cmd.arg("--files-with-matches");
cmd.arg("--stats");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines.contains("seconds"), false);
});
sherlock!(feature_411_ignore_stats_2, |wd: WorkDir, mut cmd: Command| {
cmd.arg("--files-without-match");
cmd.arg("--stats");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines.contains("seconds"), false);
});
#[test]
fn feature_740_passthru() {
let wd = WorkDir::new("feature_740");