1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2024-12-12 19:18:24 +02:00

Add --max-filesize option to cli

The --max-filesize option allows filtering files which are larger than
the specified limit. This is potentially useful if one is attempting to
search a number of large files without common file-types/suffixes.

See #369.
This commit is contained in:
tiehuis 2017-02-28 17:53:52 +13:00 committed by Andrew Gallant
parent 49fd668712
commit 714ae82241
7 changed files with 96 additions and 3 deletions

View File

@ -45,6 +45,7 @@ termcolor = { version = "0.3.0", path = "termcolor" }
[build-dependencies]
clap = "2.18"
lazy_static = "0.2"
regex = "0.2.1"
[features]
avx-accel = ["bytecount/avx-accel"]

View File

@ -2,6 +2,7 @@
extern crate clap;
#[macro_use]
extern crate lazy_static;
extern crate regex;
use std::env;
use std::fs;

View File

@ -1,4 +1,4 @@
.\" Automatically generated by Pandoc 1.19.1
.\" Automatically generated by Pandoc 1.19.2.1
.\"
.TH "rg" "1"
.hy
@ -275,11 +275,21 @@ Follow symlinks.
.RS
.RE
.TP
.B \-m, \-\-max\-count NUM
.B \-m, \-\-max\-count \f[I]NUM\f[]
Limit the number of matching lines per file searched to NUM.
.RS
.RE
.TP
.B \-\-max\-filesize \f[I]NUM\f[]+\f[I]SUFFIX\f[]?
Ignore files larger than \f[I]NUM\f[] in size.
Directories will never be ignored.
.RS
.PP
\f[I]SUFFIX\f[] is optional and may be one of K, M or G.
These correspond to kilobytes, megabytes and gigabytes respectively.
If omitted the input is treated as bytes.
.RE
.TP
.B \-\-maxdepth \f[I]NUM\f[]
Descend at most NUM directories below the command line arguments.
A value of zero searches only the starting\-points themselves.

View File

@ -187,9 +187,16 @@ Project home page: https://github.com/BurntSushi/ripgrep
-L, --follow
: Follow symlinks.
-m, --max-count NUM
-m, --max-count *NUM*
: Limit the number of matching lines per file searched to NUM.
--max-filesize *NUM*+*SUFFIX*?
: Ignore files larger than *NUM* in size. Directories will never be ignored.
*SUFFIX* is optional and may be one of K, M or G. These correspond to
kilobytes, megabytes and gigabytes respectively. If omitted the input is
treated as bytes.
--maxdepth *NUM*
: Descend at most NUM directories below the command line arguments.
A value of zero searches only the starting-points themselves.

View File

@ -1,6 +1,7 @@
use std::collections::HashMap;
use clap::{App, AppSettings, Arg, ArgSettings};
use regex::Regex;
const ABOUT: &'static str = "
ripgrep (rg) recursively searches your current directory for a regex pattern.
@ -145,6 +146,9 @@ fn app<F>(next_line_help: bool, doc: F) -> App<'static, 'static>
.arg(flag("max-count")
.short("m").value_name("NUM").takes_value(true)
.validator(validate_number))
.arg(flag("max-filesize")
.value_name("NUM+SUFFIX?").takes_value(true)
.validator(validate_max_filesize))
.arg(flag("maxdepth")
.value_name("NUM").takes_value(true)
.validator(validate_number))
@ -371,6 +375,13 @@ lazy_static! {
doc!(h, "max-count",
"Limit the number of matches.",
"Limit the number of matching lines per file searched to NUM.");
doc!(h, "max-filesize",
"Ignore files larger than NUM in size.",
"Ignore files larger than NUM in size. Does not ignore directories. \
\n\nThe input format accepts suffixes of K, M or G which \
correspond to kilobytes, megabytes and gigabytes. If no suffix is \
provided the input is treated as bytes. \
\n\nExample: --max-filesize 50K or --max-filesize 80M");
doc!(h, "maxdepth",
"Descend at most NUM directories.",
"Limit the depth of directory traversal to NUM levels beyond \
@ -491,3 +502,24 @@ lazy_static! {
fn validate_number(s: String) -> Result<(), String> {
s.parse::<usize>().map(|_|()).map_err(|err| err.to_string())
}
fn validate_max_filesize(s: String) -> Result<(), String> {
let re = Regex::new(r#"^(\d+)([KMG])?$"#).unwrap();
let caps = try!(re.captures(&s)
.ok_or("invalid format for max-filesize argument"));
let value = caps.get(1);
let suffix = caps.get(2).map(|x| x.as_str());
match value {
Some(value) => {
try!(value.as_str().parse::<u64>().map_err(|err| err.to_string()));
}
None => ()
}
match suffix {
None | Some("K") | Some("M") | Some("G") => Ok(()),
_ => Err(From::from("invalid suffix for max-filesize argument"))
}
}

View File

@ -55,6 +55,7 @@ pub struct Args {
line_number: bool,
line_per_match: bool,
max_count: Option<u64>,
max_filesize: Option<u64>,
maxdepth: Option<usize>,
mmap: bool,
no_ignore: bool,
@ -285,6 +286,7 @@ impl Args {
wd.follow_links(self.follow);
wd.hidden(!self.hidden);
wd.max_depth(self.maxdepth);
wd.max_filesize(self.max_filesize);
wd.overrides(self.glob_overrides.clone());
wd.types(self.types.clone());
wd.git_global(!self.no_ignore && !self.no_ignore_vcs);
@ -342,6 +344,7 @@ impl<'a> ArgMatches<'a> {
line_number: self.line_number(),
line_per_match: self.is_present("vimgrep"),
max_count: try!(self.usize_of("max-count")).map(|max| max as u64),
max_filesize: try!(self.max_filesize()),
maxdepth: try!(self.usize_of("maxdepth")),
mmap: mmap,
no_ignore: self.no_ignore(),
@ -779,6 +782,33 @@ impl<'a> ArgMatches<'a> {
btypes.build().map_err(From::from)
}
/// Parses the max-filesize argument option into a byte count.
fn max_filesize(&self) -> Result<Option<u64>> {
use regex::Regex;
let max_filesize = match self.value_of_lossy("max-filesize") {
Some(x) => x,
None => return Ok(None)
};
let re = Regex::new(r#"^(\d+)([KMG])?$"#).unwrap();
let caps = try!(re.captures(&max_filesize)
.ok_or("invalid format for max-filesize argument"));
let value = match caps.get(1) {
Some(value) => Some(try!(value.as_str().parse::<u64>())),
None => None
};
let suffix = caps.get(2).map(|x| x.as_str());
match suffix {
None => Ok(value),
Some("K") => Ok(value.map(|x| x * 1024)),
Some("M") => Ok(value.map(|x| x * 1024 * 1024)),
Some("G") => Ok(value.map(|x| x * 1024 * 1024 * 1024)),
_ => Err(From::from("invalid suffix for max-filesize argument"))
}
}
/// Returns true if ignore files should be ignored.
fn no_ignore(&self) -> bool {
self.is_present("no-ignore")

View File

@ -432,6 +432,18 @@ sherlock!(context_line_numbers, "world|attached",
assert_eq!(lines, expected);
});
sherlock!(max_filesize_parse_error_length, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
cmd.arg("--max-filesize").arg("44444444444444444444");
wd.assert_err(&mut cmd);
});
sherlock!(max_filesize_parse_error_suffix, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
cmd.arg("--max-filesize").arg("45k");
wd.assert_err(&mut cmd);
});
sherlock!(ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.remove("sherlock");
wd.create(".sherlock", hay::SHERLOCK);