1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2024-12-12 19:18:24 +02:00

Add dfa-size-limit and regex-size-limit arguments

Fixes #362.
This commit is contained in:
Marc Tiehuis 2017-04-02 09:55:58 +12:00 committed by Andrew Gallant
parent 1f2a9b0306
commit 66efbad871
3 changed files with 139 additions and 23 deletions

View File

@ -41,7 +41,9 @@ OPTIONS:
/// in a `build.rs` script to build shell completion files.
pub fn app() -> App<'static, 'static> {
let arg = |name| {
Arg::with_name(name).help(USAGES[name].short).long_help(USAGES[name].long)
Arg::with_name(name)
.help(USAGES[name].short)
.long_help(USAGES[name].long)
};
let flag = |name| arg(name).long(name);
@ -114,6 +116,8 @@ pub fn app() -> App<'static, 'static> {
.arg(flag("column"))
.arg(flag("context-separator")
.value_name("SEPARATOR").takes_value(true))
.arg(flag("dfa-size-limit")
.value_name("NUM+SUFFIX?").takes_value(true))
.arg(flag("debug"))
.arg(flag("file").short("f")
.value_name("FILE").takes_value(true)
@ -148,6 +152,8 @@ pub fn app() -> App<'static, 'static> {
.arg(flag("path-separator").value_name("SEPARATOR").takes_value(true))
.arg(flag("pretty").short("p"))
.arg(flag("replace").short("r").value_name("ARG").takes_value(true))
.arg(flag("regex-size-limit")
.value_name("NUM+SUFFIX?").takes_value(true))
.arg(flag("case-sensitive").short("s"))
.arg(flag("smart-case").short("S"))
.arg(flag("sort-files"))
@ -326,6 +332,13 @@ lazy_static! {
doc!(h, "debug",
"Show debug messages.",
"Show debug messages. Please use this when filing a bug report.");
doc!(h, "dfa-size-limit",
"The upper size limit of the generated dfa.",
"The upper size limit of the generated dfa. The default limit is \
10M. This should only be changed on very large regex inputs \
where the (slower) fallback regex engine may otherwise be used. \
\n\nThe argument accepts the same size suffixes as allowed in \
the 'max-filesize' argument.");
doc!(h, "file",
"Search for patterns from the given file.",
"Search for patterns from the given file, with one pattern per \
@ -444,6 +457,11 @@ lazy_static! {
Note that the replacement by default replaces each match, and \
NOT the entire line. To replace the entire line, you should \
match the entire line.");
doc!(h, "regex-size-limit",
"The upper size limit of the compiled regex.",
"The upper size limit of the compiled regex. The default limit \
is 10M. \n\nThe argument accepts the same size suffixes as \
allowed in the 'max-filesize' argument.");
doc!(h, "case-sensitive",
"Search case sensitively.",
"Search case sensitively. This overrides -i/--ignore-case and \

View File

@ -771,12 +771,18 @@ impl<'a> ArgMatches<'a> {
let casei =
self.is_present("ignore-case")
&& !self.is_present("case-sensitive");
GrepBuilder::new(&try!(self.pattern()))
let mut gb = GrepBuilder::new(&try!(self.pattern()))
.case_smart(smart)
.case_insensitive(casei)
.line_terminator(b'\n')
.build()
.map_err(From::from)
.line_terminator(b'\n');
if let Some(limit) = try!(self.dfa_size_limit()) {
gb = gb.dfa_size_limit(limit);
}
if let Some(limit) = try!(self.regex_size_limit()) {
gb = gb.size_limit(limit);
}
gb.build().map_err(From::from)
}
/// Builds the set of glob overrides from the command line flags.
@ -807,31 +813,64 @@ impl<'a> ArgMatches<'a> {
btypes.build().map_err(From::from)
}
/// Parses the max-filesize argument option into a byte count.
fn max_filesize(&self) -> Result<Option<u64>> {
use regex::Regex;
let max_filesize = match self.value_of_lossy("max-filesize") {
/// Parses an argument of the form `[0-9]+(KMG)?`.
///
/// This always returns the result as a type `u64`. This must be converted
/// to the appropriate type by the caller.
fn parse_human_readable_size_arg(
&self,
arg_name: &str,
) -> Result<Option<u64>> {
let arg_value = match self.value_of_lossy(arg_name) {
Some(x) => x,
None => return Ok(None)
};
let re = regex::Regex::new("^([0-9]+)([KMG])?$").unwrap();
let caps = try!(
re.captures(&arg_value).ok_or_else(|| {
format!("invalid format for {}", arg_name)
}));
let re = Regex::new("^([0-9]+)([KMG])?$").unwrap();
let caps = try!(re.captures(&max_filesize)
.ok_or("invalid format for max-filesize argument"));
let value = try!(caps[1].parse::<u64>().map_err(|err|err.to_string()));
let value = try!(caps[1].parse::<u64>());
let suffix = caps.get(2).map(|x| x.as_str());
let v_10 = value.checked_mul(1024);
let v_20 = v_10.and_then(|x| x.checked_mul(1024));
let v_30 = v_20.and_then(|x| x.checked_mul(1024));
let try_suffix = |x: Option<u64>| {
if x.is_some() {
Ok(x)
} else {
Err(From::from(format!("number too large for {}", arg_name)))
}
};
match suffix {
None => Ok(Some(value)),
Some("K") => Ok(Some(value * 1024)),
Some("M") => Ok(Some(value * 1024 * 1024)),
Some("G") => Ok(Some(value * 1024 * 1024 * 1024)),
_ => Err(From::from("invalid suffix for max-filesize argument"))
Some("K") => try_suffix(v_10),
Some("M") => try_suffix(v_20),
Some("G") => try_suffix(v_30),
_ => Err(From::from(format!("invalid suffix for {}", arg_name)))
}
}
/// Parse the dfa-size-limit argument option into a byte count.
fn dfa_size_limit(&self) -> Result<Option<usize>> {
let r = try!(self.parse_human_readable_size_arg("dfa-size-limit"));
human_readable_to_usize("dfa-size-limit", r)
}
/// Parse the regex-size-limit argument option into a byte count.
fn regex_size_limit(&self) -> Result<Option<usize>> {
let r = try!(self.parse_human_readable_size_arg("regex-size-limit"));
human_readable_to_usize("regex-size-limit", r)
}
/// Parses the max-filesize argument option into a byte count.
fn max_filesize(&self) -> Result<Option<u64>> {
self.parse_human_readable_size_arg("max-filesize")
}
/// Returns true if ignore files should be ignored.
fn no_ignore(&self) -> bool {
self.is_present("no-ignore")
@ -926,6 +965,27 @@ impl QuietMatched {
}
}
/// Convert the result of a `parse_human_readable_size_arg` call into
/// a `usize`, failing if the type does not fit.
fn human_readable_to_usize(
arg_name: &str,
value: Option<u64>,
) -> Result<Option<usize>> {
use std::usize;
match value {
None => Ok(None),
Some(v) => {
if v <= usize::MAX as u64 {
Ok(Some(v as usize))
} else {
let msg = format!("number too large for {}", arg_name);
Err(From::from(msg))
}
}
}
}
/// Returns true if and only if stdin is deemed searchable.
#[cfg(unix)]
fn stdin_is_readable() -> bool {

View File

@ -455,7 +455,6 @@ sherlock!(max_filesize_parse_no_suffix, "Sherlock", ".",
let expected = "\
foo
";
assert_eq!(lines, expected);
});
@ -470,7 +469,6 @@ sherlock!(max_filesize_parse_k_suffix, "Sherlock", ".",
let expected = "\
foo
";
assert_eq!(lines, expected);
});
@ -485,10 +483,19 @@ sherlock!(max_filesize_parse_m_suffix, "Sherlock", ".",
let expected = "\
foo
";
assert_eq!(lines, expected);
});
sherlock!(max_filesize_suffix_overflow, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
wd.remove("sherlock");
wd.create_size("foo", 1000000);
// 2^35 * 2^30 would otherwise overflow
cmd.arg("--max-filesize").arg("34359738368G").arg("--files");
wd.assert_err(&mut cmd);
});
sherlock!(ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.remove("sherlock");
wd.create(".sherlock", hay::SHERLOCK);
@ -1443,6 +1450,37 @@ clean!(feature_275_pathsep, "test", ".", |wd: WorkDir, mut cmd: Command| {
assert_eq!(lines, "fooZbar:test\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/362
sherlock!(feature_362_dfa_size_limit, r"For\s",
|wd: WorkDir, mut cmd: Command| {
// This should fall back to the nfa engine but should still produce the
// expected result.
cmd.arg("--dfa-size-limit").arg("10");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
";
assert_eq!(lines, expected);
});
sherlock!(feature_362_exceeds_regex_size_limit, r"[0-9]\w+",
|wd: WorkDir, mut cmd: Command| {
cmd.arg("--regex-size-limit").arg("10K");
wd.assert_err(&mut cmd);
});
#[cfg(target_pointer_width = "32")]
sherlock!(feature_362_u64_to_narrow_usize_suffix_overflow, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
wd.remove("sherlock");
wd.create_size("foo", 1000000);
// 2^35 * 2^20 is ok for u64, but not for usize
cmd.arg("--dfa-size-limit").arg("34359738368M").arg("--files");
wd.assert_err(&mut cmd);
});
// See: https://github.com/BurntSushi/ripgrep/issues/419
sherlock!(feature_419_zero_as_shortcut_for_null, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {