1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-04-24 17:12:16 +02:00

regex: fix -w when regex can match empty string

This is a weird bug where our optimization for handling -w more quickly
than we would otherwise failed. In particular, if the original regex can
match the empty string, then our word boundary detection would produce
invalid indices to the start the next search at. We "fix" it by simply
bailing when the indices are known to be incorrect.

This wasn't a problem in a previous release since ripgrep 13 tweaked how
word boundaries are detected in commit efd9cfb2.

Fixes #1891
This commit is contained in:
Andrew Gallant 2021-06-12 14:16:35 -04:00
parent 5e81c60b35
commit bc76a30c23
No known key found for this signature in database
GPG Key ID: B2E3A4923F8B0D44
3 changed files with 19 additions and 4 deletions

View File

@ -116,7 +116,7 @@ impl Match {
/// This method panics if `start > self.end`.
#[inline]
pub fn with_start(&self, start: usize) -> Match {
assert!(start <= self.end);
assert!(start <= self.end, "{} is not <= {}", start, self.end);
Match { start, ..*self }
}
@ -128,7 +128,7 @@ impl Match {
/// This method panics if `self.start > end`.
#[inline]
pub fn with_end(&self, end: usize) -> Match {
assert!(self.start <= end);
assert!(self.start <= end, "{} is not <= {}", self.start, end);
Match { end, ..*self }
}

View File

@ -111,8 +111,15 @@ impl WordMatcher {
}
let (_, slen) = bstr::decode_utf8(&haystack[cand]);
let (_, elen) = bstr::decode_last_utf8(&haystack[cand]);
cand =
cand.with_start(cand.start() + slen).with_end(cand.end() - elen);
let new_start = cand.start() + slen;
let new_end = cand.end() - elen;
// This occurs the original regex can match the empty string. In this
// case, just bail instead of trying to get it right here since it's
// likely a pathological case.
if new_start > new_end {
return Err(());
}
cand = cand.with_start(new_start).with_end(new_end);
if self.original.is_match(&haystack[cand]) {
Ok(Some(cand))
} else {

View File

@ -1029,3 +1029,11 @@ rgtest!(r1878, |dir: Dir, _: TestCommand| {
let args = &["-U", "--mmap", r"\Abaz", "test"];
dir.command().args(args).assert_err();
});
// See: https://github.com/BurntSushi/ripgrep/issues/1891
rgtest!(r1891, |dir: Dir, mut cmd: TestCommand| {
dir.create("test", "\n##\n");
// N.B. We use -o here to force the issue to occur, which seems to only
// happen when each match needs to be detected.
eqnice!("1:\n2:\n2:\n", cmd.args(&["-won", "", "test"]).stdout());
});