1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-11-23 21:54:45 +02:00

printer: preserve line terminator when using --crlf and --replace

Ref #3097, Closes #3100
This commit is contained in:
Isaac
2025-07-09 22:46:40 +10:00
committed by Andrew Gallant
parent f596a5d875
commit 64174b8e68
3 changed files with 59 additions and 7 deletions

View File

@@ -3947,4 +3947,41 @@ e
let expected = "4:d\n5-e\n6:d\n";
assert_eq_printed!(expected, got);
}
#[test]
fn regression_crlf_preserve() {
let haystack = "hello\nworld\r\n";
let matcher =
RegexMatcherBuilder::new().crlf(true).build(r".").unwrap();
let mut printer = StandardBuilder::new().build(NoColor::new(vec![]));
let mut searcher = SearcherBuilder::new()
.line_number(false)
.line_terminator(LineTerminator::crlf())
.build();
searcher
.search_reader(
&matcher,
haystack.as_bytes(),
printer.sink(&matcher),
)
.unwrap();
let got = printer_contents(&mut printer);
let expected = "hello\nworld\r\n";
assert_eq_printed!(expected, got);
let mut printer = StandardBuilder::new()
.replacement(Some(b"$0".to_vec()))
.build(NoColor::new(vec![]));
searcher
.search_reader(
&matcher,
haystack.as_bytes(),
printer.sink(&matcher),
)
.unwrap();
let got = printer_contents(&mut printer);
let expected = "hello\nworld\r\n";
assert_eq_printed!(expected, got);
}
}

View File

@@ -59,19 +59,23 @@ impl<M: Matcher> Replacer<M> {
// See the giant comment in 'find_iter_at_in_context' below for why we
// do this dance.
let is_multi_line = searcher.multi_line_with_matcher(&matcher);
if is_multi_line {
// Get the line_terminator that was removed (if any) so we can add it back
let line_terminator = if is_multi_line {
if haystack[range.end..].len() >= MAX_LOOK_AHEAD {
haystack = &haystack[..range.end + MAX_LOOK_AHEAD];
}
&[]
} else {
// When searching a single line, we should remove the line
// terminator. Otherwise, it's possible for the regex (via
// look-around) to observe the line terminator and not match
// because of it.
let mut m = Match::new(0, range.end);
trim_line_terminator(searcher, haystack, &mut m);
let line_terminator =
trim_line_terminator(searcher, haystack, &mut m);
haystack = &haystack[..m.end()];
}
line_terminator
};
{
let &mut Space { ref mut dst, ref mut caps, ref mut matches } =
self.allocate(matcher)?;
@@ -81,6 +85,7 @@ impl<M: Matcher> Replacer<M> {
replace_with_captures_in_context(
matcher,
haystack,
line_terminator,
range.clone(),
caps,
dst,
@@ -508,6 +513,7 @@ where
// Otherwise, it's possible for the regex (via look-around) to observe
// the line terminator and not match because of it.
let mut m = Match::new(0, range.end);
// No need to rember the line terminator as we aren't doing a replace here
trim_line_terminator(searcher, bytes, &mut m);
bytes = &bytes[..m.end()];
}
@@ -523,19 +529,23 @@ where
/// Given a buf and some bounds, if there is a line terminator at the end of
/// the given bounds in buf, then the bounds are trimmed to remove the line
/// terminator.
pub(crate) fn trim_line_terminator(
/// terminator, returning the slice of the removed line terminator (if any).
pub(crate) fn trim_line_terminator<'b>(
searcher: &Searcher,
buf: &[u8],
buf: &'b [u8],
line: &mut Match,
) {
) -> &'b [u8] {
let lineterm = searcher.line_terminator();
if lineterm.is_suffix(&buf[*line]) {
let mut end = line.end() - 1;
if lineterm.is_crlf() && end > 0 && buf.get(end - 1) == Some(&b'\r') {
end -= 1;
}
let orig_end = line.end();
*line = line.with_end(end);
&buf[end..orig_end]
} else {
&[]
}
}
@@ -545,6 +555,7 @@ pub(crate) fn trim_line_terminator(
fn replace_with_captures_in_context<M, F>(
matcher: M,
bytes: &[u8],
line_terminator: &[u8],
range: std::ops::Range<usize>,
caps: &mut M::Captures,
dst: &mut Vec<u8>,
@@ -566,6 +577,8 @@ where
})?;
let end = std::cmp::min(bytes.len(), range.end);
dst.extend(&bytes[last_match..end]);
// Add back any line terminator
dst.extend(line_terminator);
Ok(())
}