mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-04-24 17:12:16 +02:00
regex: fix -w when regex can match empty string
This is a weird bug where our optimization for handling -w more quickly than we would otherwise failed. In particular, if the original regex can match the empty string, then our word boundary detection would produce invalid indices to the start the next search at. We "fix" it by simply bailing when the indices are known to be incorrect. This wasn't a problem in a previous release since ripgrep 13 tweaked how word boundaries are detected in commit efd9cfb2. Fixes #1891
This commit is contained in:
parent
5e81c60b35
commit
bc76a30c23
@ -116,7 +116,7 @@ impl Match {
|
||||
/// This method panics if `start > self.end`.
|
||||
#[inline]
|
||||
pub fn with_start(&self, start: usize) -> Match {
|
||||
assert!(start <= self.end);
|
||||
assert!(start <= self.end, "{} is not <= {}", start, self.end);
|
||||
Match { start, ..*self }
|
||||
}
|
||||
|
||||
@ -128,7 +128,7 @@ impl Match {
|
||||
/// This method panics if `self.start > end`.
|
||||
#[inline]
|
||||
pub fn with_end(&self, end: usize) -> Match {
|
||||
assert!(self.start <= end);
|
||||
assert!(self.start <= end, "{} is not <= {}", self.start, end);
|
||||
Match { end, ..*self }
|
||||
}
|
||||
|
||||
|
@ -111,8 +111,15 @@ impl WordMatcher {
|
||||
}
|
||||
let (_, slen) = bstr::decode_utf8(&haystack[cand]);
|
||||
let (_, elen) = bstr::decode_last_utf8(&haystack[cand]);
|
||||
cand =
|
||||
cand.with_start(cand.start() + slen).with_end(cand.end() - elen);
|
||||
let new_start = cand.start() + slen;
|
||||
let new_end = cand.end() - elen;
|
||||
// This occurs the original regex can match the empty string. In this
|
||||
// case, just bail instead of trying to get it right here since it's
|
||||
// likely a pathological case.
|
||||
if new_start > new_end {
|
||||
return Err(());
|
||||
}
|
||||
cand = cand.with_start(new_start).with_end(new_end);
|
||||
if self.original.is_match(&haystack[cand]) {
|
||||
Ok(Some(cand))
|
||||
} else {
|
||||
|
@ -1029,3 +1029,11 @@ rgtest!(r1878, |dir: Dir, _: TestCommand| {
|
||||
let args = &["-U", "--mmap", r"\Abaz", "test"];
|
||||
dir.command().args(args).assert_err();
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1891
|
||||
rgtest!(r1891, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("test", "\n##\n");
|
||||
// N.B. We use -o here to force the issue to occur, which seems to only
|
||||
// happen when each match needs to be detected.
|
||||
eqnice!("1:\n2:\n2:\n", cmd.args(&["-won", "", "test"]).stdout());
|
||||
});
|
||||
|
Loading…
x
Reference in New Issue
Block a user