From b006943c01561a4ae5b928081d1bb4087b599e19 Mon Sep 17 00:00:00 2001 From: Balaji Sivaraman Date: Wed, 21 Feb 2018 22:16:45 +0530 Subject: [PATCH] search: add -b/--byte-offset flag This commit adds support for printing 0-based byte offset before each line. We handle corner cases such as `-o/--only-matching` and `-C/--context` as well. Closes #812 --- complete/_rg | 1 + src/app.rs | 13 +++++++ src/args.rs | 3 ++ src/printer.rs | 26 ++++++++++++-- src/search_buffer.rs | 40 ++++++++++++++++++++- src/search_stream.rs | 86 +++++++++++++++++++++++++++++++++++++------- src/worker.rs | 14 ++++++++ tests/tests.rs | 10 ++++++ 8 files changed, 176 insertions(+), 17 deletions(-) diff --git a/complete/_rg b/complete/_rg index 6fecf6d8..0659c820 100644 --- a/complete/_rg +++ b/complete/_rg @@ -25,6 +25,7 @@ _rg() { '*--colors=[specify color settings and styles]: :->colorspec' '--column[show column numbers]' '(-A -B -C --after-context --before-context --context)'{-C+,--context=}'[specify lines to show before and after each match]:number of lines' + '(-b --byte-offset)'{-b,--byte-offset}'[print the 0-based byte offset for each matching line]' '--context-separator=[specify string used to separate non-continuous context lines in output]:separator' '(-c --count --passthrough --passthru)'{-c,--count}'[only show count of matches for each file]' '--debug[show debug messages]' diff --git a/src/app.rs b/src/app.rs index e54ecc16..6be318b4 100644 --- a/src/app.rs +++ b/src/app.rs @@ -509,6 +509,7 @@ pub fn all_args_and_flags() -> Vec { // Flags can be defined in any order, but we do it alphabetically. flag_after_context(&mut args); flag_before_context(&mut args); + flag_byte_offset(&mut args); flag_case_sensitive(&mut args); flag_color(&mut args); flag_colors(&mut args); @@ -634,6 +635,18 @@ This overrides the --context flag. args.push(arg); } +fn flag_byte_offset(args: &mut Vec) { + const SHORT: &str = "Print the 0-based byte offset for each matching line."; + const LONG: &str = long!("\ +Print the 0-based byte offset within the input file +before each line of output. If -o (--only-matching) is +specified, print the offset of the matching part itself. +"); + let arg = RGArg::switch("byte-offset").short("b") + .help(SHORT).long_help(LONG); + args.push(arg); +} + fn flag_case_sensitive(args: &mut Vec) { const SHORT: &str = "Search case sensitively (default)."; const LONG: &str = long!("\ diff --git a/src/args.rs b/src/args.rs index 0461261b..b8714deb 100644 --- a/src/args.rs +++ b/src/args.rs @@ -35,6 +35,7 @@ pub struct Args { paths: Vec, after_context: usize, before_context: usize, + byte_offset: bool, color_choice: termcolor::ColorChoice, colors: ColorSpecs, column: bool, @@ -259,6 +260,7 @@ impl Args { WorkerBuilder::new(self.grep()) .after_context(self.after_context) .before_context(self.before_context) + .byte_offset(self.byte_offset) .count(self.count) .encoding(self.encoding) .files_with_matches(self.files_with_matches) @@ -361,6 +363,7 @@ impl<'a> ArgMatches<'a> { paths: paths, after_context: after_context, before_context: before_context, + byte_offset: self.is_present("byte-offset"), color_choice: self.color_choice(), colors: self.color_specs()?, column: self.column(), diff --git a/src/printer.rs b/src/printer.rs index 38b8c2b2..840c7c9a 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -280,6 +280,7 @@ impl Printer { start: usize, end: usize, line_number: Option, + byte_offset: Option ) { if !self.line_per_match && !self.only_matching { let mat = re @@ -287,12 +288,13 @@ impl Printer { .map(|m| (m.start(), m.end())) .unwrap_or((0, 0)); return self.write_match( - re, path, buf, start, end, line_number, mat.0, mat.1); + re, path, buf, start, end, line_number, + byte_offset, mat.0, mat.1); } for m in re.find_iter(&buf[start..end]) { self.write_match( - re, path.as_ref(), buf, start, end, - line_number, m.start(), m.end()); + re, path.as_ref(), buf, start, end, line_number, + byte_offset, m.start(), m.end()); } } @@ -304,6 +306,7 @@ impl Printer { start: usize, end: usize, line_number: Option, + byte_offset: Option, match_start: usize, match_end: usize, ) { @@ -321,6 +324,14 @@ impl Printer { if self.column { self.column_number(match_start as u64 + 1, b':'); } + if let Some(byte_offset) = byte_offset { + if self.only_matching { + self.write_byte_offset( + byte_offset + ((start + match_start) as u64), b':'); + } else { + self.write_byte_offset(byte_offset + (start as u64), b':'); + } + } if self.replace.is_some() { let mut count = 0; let mut offsets = Vec::new(); @@ -395,6 +406,7 @@ impl Printer { start: usize, end: usize, line_number: Option, + byte_offset: Option, ) { if self.heading && self.with_filename && !self.has_printed { self.write_file_sep(); @@ -407,6 +419,9 @@ impl Printer { if let Some(line_number) = line_number { self.line_number(line_number, b'-'); } + if let Some(byte_offset) = byte_offset { + self.write_byte_offset(byte_offset + (start as u64), b'-'); + } if self.max_columns.map_or(false, |m| end - start > m) { self.write(b"[Omitted long context line]"); self.write_eol(); @@ -481,6 +496,11 @@ impl Printer { self.separator(&[sep]); } + fn write_byte_offset(&mut self, o: u64, sep: u8) { + self.write_colored(o.to_string().as_bytes(), |colors| colors.column()); + self.separator(&[sep]); + } + fn write(&mut self, buf: &[u8]) { self.has_printed = true; let _ = self.wtr.write_all(buf); diff --git a/src/search_buffer.rs b/src/search_buffer.rs index 11b561ea..fa446f78 100644 --- a/src/search_buffer.rs +++ b/src/search_buffer.rs @@ -23,6 +23,7 @@ pub struct BufferSearcher<'a, W: 'a> { buf: &'a [u8], match_count: u64, line_count: Option, + byte_offset: Option, last_line: usize, } @@ -41,10 +42,21 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> { buf: buf, match_count: 0, line_count: None, + byte_offset: None, last_line: 0, } } + /// If enabled, searching will print a 0-based offset of the + /// matching line (or the actual match if -o is specified) before + /// printing the line itself. + /// + /// Disabled by default. + pub fn byte_offset(mut self, yes: bool) -> Self { + self.opts.byte_offset = yes; + self + } + /// If enabled, searching will print a count instead of each match. /// /// Disabled by default. @@ -120,6 +132,9 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> { self.match_count = 0; self.line_count = if self.opts.line_number { Some(0) } else { None }; + // The memory map searcher uses one contiguous block of bytes, so the + // offsets given the printer are sufficient to compute the byte offset. + self.byte_offset = if self.opts.byte_offset { Some(0) } else { None }; let mut last_end = 0; for m in self.grep.iter(self.buf) { if self.opts.invert_match { @@ -158,7 +173,7 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> { self.add_line(end); self.printer.matched( self.grep.regex(), self.path, self.buf, - start, end, self.line_count); + start, end, self.line_count, self.byte_offset); } #[inline(always)] @@ -271,6 +286,29 @@ and exhibited clearly, with a label attached.\ "); } + #[test] + fn byte_offset() { + let (_, out) = search( + "Sherlock", SHERLOCK, |s| s.byte_offset(true)); + assert_eq!(out, "\ +/baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock +/baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes +"); + } + + #[test] + fn byte_offset_inverted() { + let (_, out) = search("Sherlock", SHERLOCK, |s| { + s.invert_match(true).byte_offset(true) + }); + assert_eq!(out, "\ +/baz.rs:65:Holmeses, success in the province of detective work must always +/baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash; +/baz.rs:258:but Doctor Watson has to have it taken out for him and dusted, +/baz.rs:321:and exhibited clearly, with a label attached. +"); + } + #[test] fn count() { let (count, out) = search( diff --git a/src/search_stream.rs b/src/search_stream.rs index 3d8396cb..3021f515 100644 --- a/src/search_stream.rs +++ b/src/search_stream.rs @@ -69,6 +69,7 @@ pub struct Searcher<'a, R, W: 'a> { haystack: R, match_count: u64, line_count: Option, + byte_offset: Option, last_match: Match, last_printed: usize, last_line: usize, @@ -80,6 +81,7 @@ pub struct Searcher<'a, R, W: 'a> { pub struct Options { pub after_context: usize, pub before_context: usize, + pub byte_offset: bool, pub count: bool, pub files_with_matches: bool, pub files_without_matches: bool, @@ -96,6 +98,7 @@ impl Default for Options { Options { after_context: 0, before_context: 0, + byte_offset: false, count: false, files_with_matches: false, files_without_matches: false, @@ -165,6 +168,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { haystack: haystack, match_count: 0, line_count: None, + byte_offset: None, last_match: Match::default(), last_printed: 0, last_line: 0, @@ -186,6 +190,16 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { self } + /// If enabled, searching will print a 0-based offset of the + /// matching line (or the actual match if -o is specified) before + /// printing the line itself. + /// + /// Disabled by default. + pub fn byte_offset(mut self, yes: bool) -> Self { + self.opts.byte_offset = yes; + self + } + /// If enabled, searching will print a count instead of each match. /// /// Disabled by default. @@ -259,6 +273,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { self.inp.reset(); self.match_count = 0; self.line_count = if self.opts.line_number { Some(0) } else { None }; + self.byte_offset = if self.opts.byte_offset { Some(0) } else { None }; self.last_match = Match::default(); self.after_context_remaining = 0; while !self.terminate() { @@ -327,17 +342,18 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { #[inline(always)] fn fill(&mut self) -> Result { - let keep = if self.opts.before_context > 0 || self.opts.after_context > 0 { - let lines = 1 + cmp::max( - self.opts.before_context, self.opts.after_context); - start_of_previous_lines( - self.opts.eol, - &self.inp.buf, - self.inp.lastnl.saturating_sub(1), - lines) - } else { - self.inp.lastnl - }; + let keep = + if self.opts.before_context > 0 || self.opts.after_context > 0 { + let lines = 1 + cmp::max( + self.opts.before_context, self.opts.after_context); + start_of_previous_lines( + self.opts.eol, + &self.inp.buf, + self.inp.lastnl.saturating_sub(1), + lines) + } else { + self.inp.lastnl + }; if keep < self.last_printed { self.last_printed -= keep; } else { @@ -349,6 +365,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { self.count_lines(keep); self.last_line = 0; } + self.count_byte_offset(keep); let ok = self.inp.fill(&mut self.haystack, keep).map_err(|err| { Error::from_io(err, &self.path) })?; @@ -419,7 +436,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { self.add_line(end); self.printer.matched( self.grep.regex(), self.path, - &self.inp.buf, start, end, self.line_count); + &self.inp.buf, start, end, self.line_count, self.byte_offset); self.last_printed = end; self.after_context_remaining = self.opts.after_context; } @@ -429,7 +446,8 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { self.count_lines(start); self.add_line(end); self.printer.context( - &self.path, &self.inp.buf, start, end, self.line_count); + &self.path, &self.inp.buf, start, end, + self.line_count, self.byte_offset); self.last_printed = end; } @@ -447,6 +465,13 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { } } + #[inline(always)] + fn count_byte_offset(&mut self, buf_last_end: usize) { + if let Some(ref mut byte_offset) = self.byte_offset { + *byte_offset += buf_last_end as u64; + } + } + #[inline(always)] fn count_lines(&mut self, upto: usize) { if let Some(ref mut line_count) = self.line_count { @@ -1006,6 +1031,41 @@ fn main() { assert_eq!(out, "/baz.rs:2\n"); } + #[test] + fn byte_offset() { + let (_, out) = search_smallcap( + "Sherlock", SHERLOCK, |s| s.byte_offset(true)); + assert_eq!(out, "\ +/baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock +/baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes +"); + } + + #[test] + fn byte_offset_with_before_context() { + let (_, out) = search_smallcap("dusted", SHERLOCK, |s| { + s.line_number(true).byte_offset(true).before_context(2) + }); + assert_eq!(out, "\ +/baz.rs-3-129-be, to a very large extent, the result of luck. Sherlock Holmes +/baz.rs-4-193-can extract a clew from a wisp of straw or a flake of cigar ash; +/baz.rs:5:258:but Doctor Watson has to have it taken out for him and dusted, +"); + } + + #[test] + fn byte_offset_inverted() { + let (_, out) = search_smallcap("Sherlock", SHERLOCK, |s| { + s.invert_match(true).byte_offset(true) + }); + assert_eq!(out, "\ +/baz.rs:65:Holmeses, success in the province of detective work must always +/baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash; +/baz.rs:258:but Doctor Watson has to have it taken out for him and dusted, +/baz.rs:321:and exhibited clearly, with a label attached. +"); + } + #[test] fn files_with_matches() { let (count, out) = search_smallcap( diff --git a/src/worker.rs b/src/worker.rs index 952a334b..e5f7546a 100644 --- a/src/worker.rs +++ b/src/worker.rs @@ -33,6 +33,7 @@ struct Options { encoding: Option<&'static Encoding>, after_context: usize, before_context: usize, + byte_offset: bool, count: bool, files_with_matches: bool, files_without_matches: bool, @@ -53,6 +54,7 @@ impl Default for Options { encoding: None, after_context: 0, before_context: 0, + byte_offset: false, count: false, files_with_matches: false, files_without_matches: false, @@ -106,6 +108,16 @@ impl WorkerBuilder { self } + /// If enabled, searching will print a 0-based offset of the + /// matching line (or the actual match if -o is specified) before + /// printing the line itself. + /// + /// Disabled by default. + pub fn byte_offset(mut self, yes: bool) -> Self { + self.opts.byte_offset = yes; + self + } + /// If enabled, searching will print a count instead of each match. /// /// Disabled by default. @@ -283,6 +295,7 @@ impl Worker { searcher .after_context(self.opts.after_context) .before_context(self.opts.before_context) + .byte_offset(self.opts.byte_offset) .count(self.opts.count) .files_with_matches(self.opts.files_with_matches) .files_without_matches(self.opts.files_without_matches) @@ -322,6 +335,7 @@ impl Worker { } let searcher = BufferSearcher::new(printer, &self.grep, path, buf); Ok(searcher + .byte_offset(self.opts.byte_offset) .count(self.opts.count) .files_with_matches(self.opts.files_with_matches) .files_without_matches(self.opts.files_without_matches) diff --git a/tests/tests.rs b/tests/tests.rs index 6becfe0f..7ae23895 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -395,6 +395,16 @@ sherlock!(csglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { assert_eq!(lines, "file2.html:Sherlock\n"); }); +sherlock!(byte_offset_only_matching, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { + cmd.arg("-b").arg("-o"); + let lines: String = wd.stdout(&mut cmd); + let expected = "\ +sherlock:56:Sherlock +sherlock:177:Sherlock +"; + assert_eq!(lines, expected); +}); + sherlock!(count, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--count"); let lines: String = wd.stdout(&mut cmd);