diff --git a/CHANGELOG.md b/CHANGELOG.md index dcc23d02..928d0998 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,6 +27,8 @@ Bug fixes: Fix a bug where ripgrep would mishandle globs that ended with a `.`. * [BUG #3076](https://github.com/BurntSushi/ripgrep/issues/3076): Fix bug with `-m/--max-count` and `-U/--multiline` showing too many matches. +* [BUG #3100](https://github.com/BurntSushi/ripgrep/pull/3100): + Preserve line terminators when using `-r/--replace` flag. * [BUG #3108](https://github.com/BurntSushi/ripgrep/issues/3108): Fix a bug where `-q --files-without-match` inverted the exit code. * [BUG #3140](https://github.com/BurntSushi/ripgrep/issues/3140): diff --git a/crates/printer/src/standard.rs b/crates/printer/src/standard.rs index 494901a4..cf2565a5 100644 --- a/crates/printer/src/standard.rs +++ b/crates/printer/src/standard.rs @@ -3947,4 +3947,41 @@ e let expected = "4:d\n5-e\n6:d\n"; assert_eq_printed!(expected, got); } + + #[test] + fn regression_crlf_preserve() { + let haystack = "hello\nworld\r\n"; + let matcher = + RegexMatcherBuilder::new().crlf(true).build(r".").unwrap(); + let mut printer = StandardBuilder::new().build(NoColor::new(vec![])); + let mut searcher = SearcherBuilder::new() + .line_number(false) + .line_terminator(LineTerminator::crlf()) + .build(); + + searcher + .search_reader( + &matcher, + haystack.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + let got = printer_contents(&mut printer); + let expected = "hello\nworld\r\n"; + assert_eq_printed!(expected, got); + + let mut printer = StandardBuilder::new() + .replacement(Some(b"$0".to_vec())) + .build(NoColor::new(vec![])); + searcher + .search_reader( + &matcher, + haystack.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + let got = printer_contents(&mut printer); + let expected = "hello\nworld\r\n"; + assert_eq_printed!(expected, got); + } } diff --git a/crates/printer/src/util.rs b/crates/printer/src/util.rs index 825f81c1..4d94131e 100644 --- a/crates/printer/src/util.rs +++ b/crates/printer/src/util.rs @@ -59,19 +59,23 @@ impl Replacer { // See the giant comment in 'find_iter_at_in_context' below for why we // do this dance. let is_multi_line = searcher.multi_line_with_matcher(&matcher); - if is_multi_line { + // Get the line_terminator that was removed (if any) so we can add it back + let line_terminator = if is_multi_line { if haystack[range.end..].len() >= MAX_LOOK_AHEAD { haystack = &haystack[..range.end + MAX_LOOK_AHEAD]; } + &[] } else { // When searching a single line, we should remove the line // terminator. Otherwise, it's possible for the regex (via // look-around) to observe the line terminator and not match // because of it. let mut m = Match::new(0, range.end); - trim_line_terminator(searcher, haystack, &mut m); + let line_terminator = + trim_line_terminator(searcher, haystack, &mut m); haystack = &haystack[..m.end()]; - } + line_terminator + }; { let &mut Space { ref mut dst, ref mut caps, ref mut matches } = self.allocate(matcher)?; @@ -81,6 +85,7 @@ impl Replacer { replace_with_captures_in_context( matcher, haystack, + line_terminator, range.clone(), caps, dst, @@ -508,6 +513,7 @@ where // Otherwise, it's possible for the regex (via look-around) to observe // the line terminator and not match because of it. let mut m = Match::new(0, range.end); + // No need to rember the line terminator as we aren't doing a replace here trim_line_terminator(searcher, bytes, &mut m); bytes = &bytes[..m.end()]; } @@ -523,19 +529,23 @@ where /// Given a buf and some bounds, if there is a line terminator at the end of /// the given bounds in buf, then the bounds are trimmed to remove the line -/// terminator. -pub(crate) fn trim_line_terminator( +/// terminator, returning the slice of the removed line terminator (if any). +pub(crate) fn trim_line_terminator<'b>( searcher: &Searcher, - buf: &[u8], + buf: &'b [u8], line: &mut Match, -) { +) -> &'b [u8] { let lineterm = searcher.line_terminator(); if lineterm.is_suffix(&buf[*line]) { let mut end = line.end() - 1; if lineterm.is_crlf() && end > 0 && buf.get(end - 1) == Some(&b'\r') { end -= 1; } + let orig_end = line.end(); *line = line.with_end(end); + &buf[end..orig_end] + } else { + &[] } } @@ -545,6 +555,7 @@ pub(crate) fn trim_line_terminator( fn replace_with_captures_in_context( matcher: M, bytes: &[u8], + line_terminator: &[u8], range: std::ops::Range, caps: &mut M::Captures, dst: &mut Vec, @@ -566,6 +577,8 @@ where })?; let end = std::cmp::min(bytes.len(), range.end); dst.extend(&bytes[last_match..end]); + // Add back any line terminator + dst.extend(line_terminator); Ok(()) }