1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-03-03 14:32:22 +02:00

ripgrep: add --auto-hybrid-regex flag

This flag, when set, will automatically dispatch to PCRE2 if the given
regex cannot be compiled by Rust's regex engine. If both engines fail to
compile the regex, then both errors are surfaced.

Closes #1155
This commit is contained in:
Andrew Gallant 2019-04-14 17:39:37 -04:00
parent 9952ba2068
commit 967e7ad0de
5 changed files with 91 additions and 2 deletions

View File

@ -49,6 +49,8 @@ Feature enhancements:
Add support for Brotli and Zstd to the `-z/--search-zip` flag.
* [FEATURE #1138](https://github.com/BurntSushi/ripgrep/pull/1138):
Add `--no-ignore-dot` flag for ignoring `.ignore` files.
* [FEATURE #1155](https://github.com/BurntSushi/ripgrep/pull/1155):
Add `--auto-hybrid-regex` flag for automatically falling back to PCRE2.
* [FEATURE #1159](https://github.com/BurntSushi/ripgrep/pull/1159):
ripgrep's exit status logic should now match GNU grep. See updated man page.
* [FEATURE #1170](https://github.com/BurntSushi/ripgrep/pull/1170):

View File

@ -112,6 +112,10 @@ _rg() {
'--hidden[search hidden files and directories]'
$no"--no-hidden[don't search hidden files and directories]"
+ '(hybrid)' # hybrid regex options
'--auto-hybrid-regex[dynamically use PCRE2 if necessary]'
$no"--no-auto-hybrid-regex[don't dynamically use PCRE2 if necessary]"
+ '(ignore)' # Ignore-file options
"(--no-ignore-global --no-ignore-parent --no-ignore-vcs --no-ignore-dot)--no-ignore[don't respect ignore files]"
$no'(--ignore-global --ignore-parent --ignore-vcs --ignore-dot)--ignore[respect ignore files]'

View File

@ -547,6 +547,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
// flags are hidden and merely mentioned in the docs of the corresponding
// "positive" flag.
flag_after_context(&mut args);
flag_auto_hybrid_regex(&mut args);
flag_before_context(&mut args);
flag_binary(&mut args);
flag_block_buffered(&mut args);
@ -683,6 +684,50 @@ This overrides the --context flag.
args.push(arg);
}
fn flag_auto_hybrid_regex(args: &mut Vec<RGArg>) {
const SHORT: &str = "Dynamically use PCRE2 if necessary.";
const LONG: &str = long!("\
When this flag is used, ripgrep will dynamically choose between supported regex
engines depending on the features used in a pattern. When ripgrep chooses a
regex engine, it applies that choice for every regex provided to ripgrep (e.g.,
via multiple -e/--regexp or -f/--file flags).
As an example of how this flag might behave, ripgrep will attempt to use
its default finite automata based regex engine whenever the pattern can be
successfully compiled with that regex engine. If PCRE2 is enabled and if the
pattern given could not be compiled with the default regex engine, then PCRE2
will be automatically used for searching. If PCRE2 isn't available, then this
flag has no effect because there is only one regex engine to choose from.
In the future, ripgrep may adjust its heuristics for how it decides which
regex engine to use. In general, the heuristics will be limited to a static
analysis of the patterns, and not to any specific runtime behavior observed
while searching files.
The primary downside of using this flag is that it may not always be obvious
which regex engine ripgrep uses, and thus, the match semantics or performance
profile of ripgrep may subtly and unexpectedly change. However, in many cases,
all regex engines will agree on what constitutes a match and it can be nice
to transparently support more advanced regex features like look-around and
backreferences without explicitly needing to enable them.
This flag can be disabled with --no-auto-hybrid-regex.
");
let arg = RGArg::switch("auto-hybrid-regex")
.help(SHORT).long_help(LONG)
.overrides("no-auto-hybrid-regex")
.overrides("pcre2")
.overrides("no-pcre2");
args.push(arg);
let arg = RGArg::switch("no-auto-hybrid-regex")
.hidden()
.overrides("auto-hybrid-regex")
.overrides("pcre2")
.overrides("no-pcre2");
args.push(arg);
}
fn flag_before_context(args: &mut Vec<RGArg>) {
const SHORT: &str = "Show NUM lines before each match.";
const LONG: &str = long!("\
@ -1938,12 +1983,16 @@ This flag can be disabled with --no-pcre2.
");
let arg = RGArg::switch("pcre2").short("P")
.help(SHORT).long_help(LONG)
.overrides("no-pcre2");
.overrides("no-pcre2")
.overrides("auto-hybrid-regex")
.overrides("no-auto-hybrid-regex");
args.push(arg);
let arg = RGArg::switch("no-pcre2")
.hidden()
.overrides("pcre2");
.overrides("pcre2")
.overrides("auto-hybrid-regex")
.overrides("no-auto-hybrid-regex");
args.push(arg);
}

View File

@ -599,6 +599,25 @@ impl ArgMatches {
if self.is_present("pcre2") {
let matcher = self.matcher_pcre2(patterns)?;
Ok(PatternMatcher::PCRE2(matcher))
} else if self.is_present("auto-hybrid-regex") {
let rust_err = match self.matcher_rust(patterns) {
Ok(matcher) => return Ok(PatternMatcher::RustRegex(matcher)),
Err(err) => err,
};
log::debug!(
"error building Rust regex in hybrid mode:\n{}", rust_err,
);
let pcre_err = match self.matcher_pcre2(patterns) {
Ok(matcher) => return Ok(PatternMatcher::PCRE2(matcher)),
Err(err) => err,
};
Err(From::from(format!(
"regex could not be compiled with either the default regex \
engine or with PCRE2.\n\n\
default regex engine error:\n{}\n{}\n{}\n\n\
PCRE2 regex engine error:\n{}",
"~".repeat(79), rust_err, "~".repeat(79), pcre_err,
)))
} else {
let matcher = match self.matcher_rust(patterns) {
Ok(matcher) => matcher,

View File

@ -681,6 +681,21 @@ rgtest!(f1138_no_ignore_dot, |dir: Dir, mut cmd: TestCommand| {
eqnice!("bar\n", cmd.arg("--ignore-file").arg(".fzf-ignore").stdout());
});
// See: https://github.com/BurntSushi/ripgrep/issues/1155
rgtest!(f1155_auto_hybrid_regex, |dir: Dir, mut cmd: TestCommand| {
// No sense in testing a hybrid regex engine with only one engine!
if !dir.is_pcre2() {
return;
}
dir.create("sherlock", SHERLOCK);
cmd.arg("--no-pcre2").arg("--auto-hybrid-regex").arg(r"(?<=the )Sherlock");
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
";
eqnice!(expected, cmd.stdout());
});
// See: https://github.com/BurntSushi/ripgrep/issues/1207
//