mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-04-19 09:02:15 +02:00
Don't union inner literals of repetitions.
If we do, this results in extracting `foofoofoo` from `(\wfoo){3}`, which is wrong. This does prevent us from extracting `foofoofoo` from `foo{3}`, which is unfortunate, but we miss plenty of other stuff too. Literal extracting needs a good rethink (all the way down into the regex engine). Fixes #93
This commit is contained in:
parent
a13ac3e3d4
commit
6a8051b258
@ -8,7 +8,6 @@ Note that this implementation is incredibly suspicious. We need something more
|
|||||||
principled.
|
principled.
|
||||||
*/
|
*/
|
||||||
use std::cmp;
|
use std::cmp;
|
||||||
use std::iter;
|
|
||||||
|
|
||||||
use regex::bytes::Regex;
|
use regex::bytes::Regex;
|
||||||
use syntax::{
|
use syntax::{
|
||||||
@ -181,8 +180,6 @@ fn repeat_range_literals<F: FnMut(&Expr, &mut Literals)>(
|
|||||||
lits: &mut Literals,
|
lits: &mut Literals,
|
||||||
mut f: F,
|
mut f: F,
|
||||||
) {
|
) {
|
||||||
use syntax::Expr::*;
|
|
||||||
|
|
||||||
if min == 0 {
|
if min == 0 {
|
||||||
// This is a bit conservative. If `max` is set, then we could
|
// This is a bit conservative. If `max` is set, then we could
|
||||||
// treat this as a finite set of alternations. For now, we
|
// treat this as a finite set of alternations. For now, we
|
||||||
@ -190,8 +187,12 @@ fn repeat_range_literals<F: FnMut(&Expr, &mut Literals)>(
|
|||||||
lits.cut();
|
lits.cut();
|
||||||
} else {
|
} else {
|
||||||
let n = cmp::min(lits.limit_size(), min as usize);
|
let n = cmp::min(lits.limit_size(), min as usize);
|
||||||
let es = iter::repeat(e.clone()).take(n).collect();
|
// We only extract literals from a single repetition, even though
|
||||||
f(&Concat(es), lits);
|
// we could do more. e.g., `a{3}` will have `a` extracted instead of
|
||||||
|
// `aaa`. The reason is that inner literal extraction can't be unioned
|
||||||
|
// across repetitions. e.g., extracting `foofoofoo` from `(\w+foo){3}`
|
||||||
|
// is wrong.
|
||||||
|
f(e, lits);
|
||||||
if n < min as usize {
|
if n < min as usize {
|
||||||
lits.cut();
|
lits.cut();
|
||||||
}
|
}
|
||||||
|
@ -703,6 +703,15 @@ clean!(regression_90, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
|||||||
assert_eq!(lines, ".foo:test\n");
|
assert_eq!(lines, ".foo:test\n");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// See: https://github.com/BurntSushi/ripgrep/issues/93
|
||||||
|
clean!(regression_93, r"(\d{1,3}\.){3}\d{1,3}", ".",
|
||||||
|
|wd: WorkDir, mut cmd: Command| {
|
||||||
|
wd.create("foo", "192.168.1.1");
|
||||||
|
|
||||||
|
let lines: String = wd.stdout(&mut cmd);
|
||||||
|
assert_eq!(lines, "foo:192.168.1.1\n");
|
||||||
|
});
|
||||||
|
|
||||||
// See: https://github.com/BurntSushi/ripgrep/issues/20
|
// See: https://github.com/BurntSushi/ripgrep/issues/20
|
||||||
sherlock!(feature_20, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
sherlock!(feature_20, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||||
cmd.arg("--no-filename");
|
cmd.arg("--no-filename");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user