1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-06-30 22:23:44 +02:00

printer: trim line terminator before finding submatches

This fixes a bug where PCRE2 look-around could change the result of a
match if it observed a line terminator in the printer. And in
particular, this is precisely how the searcher operates: the line is
considered unto itself *without* the line terminator.

Fixes #1401
This commit is contained in:
Andrew Gallant
2021-05-31 19:00:56 -04:00
parent efd9cfb2fc
commit ee23ab5173
4 changed files with 55 additions and 14 deletions

View File

@ -418,7 +418,12 @@ where
bytes = &bytes[..range.end + MAX_LOOK_AHEAD];
}
} else {
bytes = &bytes[..range.end];
// When searching a single line, we should remove the line terminator.
// Otherwise, it's possible for the regex (via look-around) to observe
// the line terminator and not match because of it.
let mut m = Match::new(0, range.end);
trim_line_terminator(searcher, bytes, &mut m);
bytes = &bytes[..m.end()];
}
matcher
.find_iter_at(bytes, range.start, |m| {
@ -429,3 +434,21 @@ where
})
.map_err(io::Error::error_message)
}
/// Given a buf and some bounds, if there is a line terminator at the end of
/// the given bounds in buf, then the bounds are trimmed to remove the line
/// terminator.
pub fn trim_line_terminator(
searcher: &Searcher,
buf: &[u8],
line: &mut Match,
) {
let lineterm = searcher.line_terminator();
if lineterm.is_suffix(&buf[*line]) {
let mut end = line.end() - 1;
if lineterm.is_crlf() && end > 0 && buf.get(end - 1) == Some(&b'\r') {
end -= 1;
}
*line = line.with_end(end);
}
}