mirror of
				https://github.com/BurntSushi/ripgrep.git
				synced 2025-10-30 23:17:47 +02:00 
			
		
		
		
	printer: trim line terminator before finding submatches
This fixes a bug where PCRE2 look-around could change the result of a match if it observed a line terminator in the printer. And in particular, this is precisely how the searcher operates: the line is considered unto itself *without* the line terminator. Fixes #1401
This commit is contained in:
		| @@ -71,6 +71,8 @@ Bug fixes: | ||||
|   Document cygwin path translation behavior in the FAQ. | ||||
| * [BUG #1311](https://github.com/BurntSushi/ripgrep/issues/1311): | ||||
|   Fix multi-line bug where a search & replace for `\n` didn't work as expected. | ||||
| * [BUG #1401](https://github.com/BurntSushi/ripgrep/issues/1401): | ||||
|   Fix buggy interaction between PCRE2 look-around and `-o/--only-matching`. | ||||
| * [BUG #1412](https://github.com/BurntSushi/ripgrep/issues/1412): | ||||
|   Fix multi-line bug with searches using look-around past matching lines. | ||||
| * [BUG #1642](https://github.com/BurntSushi/ripgrep/issues/1642): | ||||
|   | ||||
| @@ -17,7 +17,8 @@ use color::ColorSpecs; | ||||
| use counter::CounterWriter; | ||||
| use stats::Stats; | ||||
| use util::{ | ||||
|     find_iter_at_in_context, trim_ascii_prefix, PrinterPath, Replacer, Sunk, | ||||
|     find_iter_at_in_context, trim_ascii_prefix, trim_line_terminator, | ||||
|     PrinterPath, Replacer, Sunk, | ||||
| }; | ||||
|  | ||||
| /// The configuration for the standard printer. | ||||
| @@ -1547,17 +1548,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> { | ||||
|     } | ||||
|  | ||||
|     fn trim_line_terminator(&self, buf: &[u8], line: &mut Match) { | ||||
|         let lineterm = self.searcher.line_terminator(); | ||||
|         if lineterm.is_suffix(&buf[*line]) { | ||||
|             let mut end = line.end() - 1; | ||||
|             if lineterm.is_crlf() | ||||
|                 && end > 0 | ||||
|                 && buf.get(end - 1) == Some(&b'\r') | ||||
|             { | ||||
|                 end -= 1; | ||||
|             } | ||||
|             *line = line.with_end(end); | ||||
|         } | ||||
|         trim_line_terminator(&self.searcher, buf, line); | ||||
|     } | ||||
|  | ||||
|     fn has_line_terminator(&self, buf: &[u8]) -> bool { | ||||
|   | ||||
| @@ -418,7 +418,12 @@ where | ||||
|             bytes = &bytes[..range.end + MAX_LOOK_AHEAD]; | ||||
|         } | ||||
|     } else { | ||||
|         bytes = &bytes[..range.end]; | ||||
|         // When searching a single line, we should remove the line terminator. | ||||
|         // Otherwise, it's possible for the regex (via look-around) to observe | ||||
|         // the line terminator and not match because of it. | ||||
|         let mut m = Match::new(0, range.end); | ||||
|         trim_line_terminator(searcher, bytes, &mut m); | ||||
|         bytes = &bytes[..m.end()]; | ||||
|     } | ||||
|     matcher | ||||
|         .find_iter_at(bytes, range.start, |m| { | ||||
| @@ -429,3 +434,21 @@ where | ||||
|         }) | ||||
|         .map_err(io::Error::error_message) | ||||
| } | ||||
|  | ||||
| /// Given a buf and some bounds, if there is a line terminator at the end of | ||||
| /// the given bounds in buf, then the bounds are trimmed to remove the line | ||||
| /// terminator. | ||||
| pub fn trim_line_terminator( | ||||
|     searcher: &Searcher, | ||||
|     buf: &[u8], | ||||
|     line: &mut Match, | ||||
| ) { | ||||
|     let lineterm = searcher.line_terminator(); | ||||
|     if lineterm.is_suffix(&buf[*line]) { | ||||
|         let mut end = line.end() - 1; | ||||
|         if lineterm.is_crlf() && end > 0 && buf.get(end - 1) == Some(&b'\r') { | ||||
|             end -= 1; | ||||
|         } | ||||
|         *line = line.with_end(end); | ||||
|     } | ||||
| } | ||||
|   | ||||
| @@ -806,7 +806,32 @@ rgtest!(r1389_bad_symlinks_no_biscuit, |dir: Dir, mut cmd: TestCommand| { | ||||
|     eqnice!("mylink/file.txt:test\n", stdout); | ||||
| }); | ||||
|  | ||||
| // printf "foo\nbar\n" | rg -PU '(?<=foo\n)bar' -r quux | ||||
| // See: https://github.com/BurntSushi/ripgrep/issues/1401 | ||||
| rgtest!(r1401_look_ahead_only_matching_1, |dir: Dir, mut cmd: TestCommand| { | ||||
|     // Only PCRE2 supports look-around. | ||||
|     if !dir.is_pcre2() { | ||||
|         return; | ||||
|     } | ||||
|     dir.create("ip.txt", "foo 42\nxoyz\ncat\tdog\n"); | ||||
|     cmd.args(&["-No", r".*o(?!.*\s)", "ip.txt"]); | ||||
|     eqnice!("xo\ncat\tdo\n", cmd.stdout()); | ||||
|  | ||||
|     let mut cmd = dir.command(); | ||||
|     cmd.args(&["-No", r".*o(?!.*[ \t])", "ip.txt"]); | ||||
|     eqnice!("xo\ncat\tdo\n", cmd.stdout()); | ||||
| }); | ||||
|  | ||||
| // See: https://github.com/BurntSushi/ripgrep/issues/1401 | ||||
| rgtest!(r1401_look_ahead_only_matching_2, |dir: Dir, mut cmd: TestCommand| { | ||||
|     // Only PCRE2 supports look-around. | ||||
|     if !dir.is_pcre2() { | ||||
|         return; | ||||
|     } | ||||
|     dir.create("ip.txt", "foo 42\nxoyz\ncat\tdog\nfoo"); | ||||
|     cmd.args(&["-No", r".*o(?!.*\s)", "ip.txt"]); | ||||
|     eqnice!("xo\ncat\tdo\nfoo\n", cmd.stdout()); | ||||
| }); | ||||
|  | ||||
| // See: https://github.com/BurntSushi/ripgrep/issues/1412 | ||||
| rgtest!(r1412_look_behind_no_replacement, |dir: Dir, mut cmd: TestCommand| { | ||||
|     // Only PCRE2 supports look-around. | ||||
|   | ||||
		Reference in New Issue
	
	Block a user