mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-04-02 20:45:38 +02:00
search: add -b/--byte-offset flag
This commit adds support for printing 0-based byte offset before each line. We handle corner cases such as `-o/--only-matching` and `-C/--context` as well. Closes #812
This commit is contained in:
parent
91d0756f62
commit
b006943c01
@ -25,6 +25,7 @@ _rg() {
|
||||
'*--colors=[specify color settings and styles]: :->colorspec'
|
||||
'--column[show column numbers]'
|
||||
'(-A -B -C --after-context --before-context --context)'{-C+,--context=}'[specify lines to show before and after each match]:number of lines'
|
||||
'(-b --byte-offset)'{-b,--byte-offset}'[print the 0-based byte offset for each matching line]'
|
||||
'--context-separator=[specify string used to separate non-continuous context lines in output]:separator'
|
||||
'(-c --count --passthrough --passthru)'{-c,--count}'[only show count of matches for each file]'
|
||||
'--debug[show debug messages]'
|
||||
|
13
src/app.rs
13
src/app.rs
@ -509,6 +509,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
|
||||
// Flags can be defined in any order, but we do it alphabetically.
|
||||
flag_after_context(&mut args);
|
||||
flag_before_context(&mut args);
|
||||
flag_byte_offset(&mut args);
|
||||
flag_case_sensitive(&mut args);
|
||||
flag_color(&mut args);
|
||||
flag_colors(&mut args);
|
||||
@ -634,6 +635,18 @@ This overrides the --context flag.
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_byte_offset(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Print the 0-based byte offset for each matching line.";
|
||||
const LONG: &str = long!("\
|
||||
Print the 0-based byte offset within the input file
|
||||
before each line of output. If -o (--only-matching) is
|
||||
specified, print the offset of the matching part itself.
|
||||
");
|
||||
let arg = RGArg::switch("byte-offset").short("b")
|
||||
.help(SHORT).long_help(LONG);
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_case_sensitive(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Search case sensitively (default).";
|
||||
const LONG: &str = long!("\
|
||||
|
@ -35,6 +35,7 @@ pub struct Args {
|
||||
paths: Vec<PathBuf>,
|
||||
after_context: usize,
|
||||
before_context: usize,
|
||||
byte_offset: bool,
|
||||
color_choice: termcolor::ColorChoice,
|
||||
colors: ColorSpecs,
|
||||
column: bool,
|
||||
@ -259,6 +260,7 @@ impl Args {
|
||||
WorkerBuilder::new(self.grep())
|
||||
.after_context(self.after_context)
|
||||
.before_context(self.before_context)
|
||||
.byte_offset(self.byte_offset)
|
||||
.count(self.count)
|
||||
.encoding(self.encoding)
|
||||
.files_with_matches(self.files_with_matches)
|
||||
@ -361,6 +363,7 @@ impl<'a> ArgMatches<'a> {
|
||||
paths: paths,
|
||||
after_context: after_context,
|
||||
before_context: before_context,
|
||||
byte_offset: self.is_present("byte-offset"),
|
||||
color_choice: self.color_choice(),
|
||||
colors: self.color_specs()?,
|
||||
column: self.column(),
|
||||
|
@ -280,6 +280,7 @@ impl<W: WriteColor> Printer<W> {
|
||||
start: usize,
|
||||
end: usize,
|
||||
line_number: Option<u64>,
|
||||
byte_offset: Option<u64>
|
||||
) {
|
||||
if !self.line_per_match && !self.only_matching {
|
||||
let mat = re
|
||||
@ -287,12 +288,13 @@ impl<W: WriteColor> Printer<W> {
|
||||
.map(|m| (m.start(), m.end()))
|
||||
.unwrap_or((0, 0));
|
||||
return self.write_match(
|
||||
re, path, buf, start, end, line_number, mat.0, mat.1);
|
||||
re, path, buf, start, end, line_number,
|
||||
byte_offset, mat.0, mat.1);
|
||||
}
|
||||
for m in re.find_iter(&buf[start..end]) {
|
||||
self.write_match(
|
||||
re, path.as_ref(), buf, start, end,
|
||||
line_number, m.start(), m.end());
|
||||
re, path.as_ref(), buf, start, end, line_number,
|
||||
byte_offset, m.start(), m.end());
|
||||
}
|
||||
}
|
||||
|
||||
@ -304,6 +306,7 @@ impl<W: WriteColor> Printer<W> {
|
||||
start: usize,
|
||||
end: usize,
|
||||
line_number: Option<u64>,
|
||||
byte_offset: Option<u64>,
|
||||
match_start: usize,
|
||||
match_end: usize,
|
||||
) {
|
||||
@ -321,6 +324,14 @@ impl<W: WriteColor> Printer<W> {
|
||||
if self.column {
|
||||
self.column_number(match_start as u64 + 1, b':');
|
||||
}
|
||||
if let Some(byte_offset) = byte_offset {
|
||||
if self.only_matching {
|
||||
self.write_byte_offset(
|
||||
byte_offset + ((start + match_start) as u64), b':');
|
||||
} else {
|
||||
self.write_byte_offset(byte_offset + (start as u64), b':');
|
||||
}
|
||||
}
|
||||
if self.replace.is_some() {
|
||||
let mut count = 0;
|
||||
let mut offsets = Vec::new();
|
||||
@ -395,6 +406,7 @@ impl<W: WriteColor> Printer<W> {
|
||||
start: usize,
|
||||
end: usize,
|
||||
line_number: Option<u64>,
|
||||
byte_offset: Option<u64>,
|
||||
) {
|
||||
if self.heading && self.with_filename && !self.has_printed {
|
||||
self.write_file_sep();
|
||||
@ -407,6 +419,9 @@ impl<W: WriteColor> Printer<W> {
|
||||
if let Some(line_number) = line_number {
|
||||
self.line_number(line_number, b'-');
|
||||
}
|
||||
if let Some(byte_offset) = byte_offset {
|
||||
self.write_byte_offset(byte_offset + (start as u64), b'-');
|
||||
}
|
||||
if self.max_columns.map_or(false, |m| end - start > m) {
|
||||
self.write(b"[Omitted long context line]");
|
||||
self.write_eol();
|
||||
@ -481,6 +496,11 @@ impl<W: WriteColor> Printer<W> {
|
||||
self.separator(&[sep]);
|
||||
}
|
||||
|
||||
fn write_byte_offset(&mut self, o: u64, sep: u8) {
|
||||
self.write_colored(o.to_string().as_bytes(), |colors| colors.column());
|
||||
self.separator(&[sep]);
|
||||
}
|
||||
|
||||
fn write(&mut self, buf: &[u8]) {
|
||||
self.has_printed = true;
|
||||
let _ = self.wtr.write_all(buf);
|
||||
|
@ -23,6 +23,7 @@ pub struct BufferSearcher<'a, W: 'a> {
|
||||
buf: &'a [u8],
|
||||
match_count: u64,
|
||||
line_count: Option<u64>,
|
||||
byte_offset: Option<u64>,
|
||||
last_line: usize,
|
||||
}
|
||||
|
||||
@ -41,10 +42,21 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {
|
||||
buf: buf,
|
||||
match_count: 0,
|
||||
line_count: None,
|
||||
byte_offset: None,
|
||||
last_line: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// If enabled, searching will print a 0-based offset of the
|
||||
/// matching line (or the actual match if -o is specified) before
|
||||
/// printing the line itself.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn byte_offset(mut self, yes: bool) -> Self {
|
||||
self.opts.byte_offset = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print a count instead of each match.
|
||||
///
|
||||
/// Disabled by default.
|
||||
@ -120,6 +132,9 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {
|
||||
|
||||
self.match_count = 0;
|
||||
self.line_count = if self.opts.line_number { Some(0) } else { None };
|
||||
// The memory map searcher uses one contiguous block of bytes, so the
|
||||
// offsets given the printer are sufficient to compute the byte offset.
|
||||
self.byte_offset = if self.opts.byte_offset { Some(0) } else { None };
|
||||
let mut last_end = 0;
|
||||
for m in self.grep.iter(self.buf) {
|
||||
if self.opts.invert_match {
|
||||
@ -158,7 +173,7 @@ impl<'a, W: WriteColor> BufferSearcher<'a, W> {
|
||||
self.add_line(end);
|
||||
self.printer.matched(
|
||||
self.grep.regex(), self.path, self.buf,
|
||||
start, end, self.line_count);
|
||||
start, end, self.line_count, self.byte_offset);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
@ -271,6 +286,29 @@ and exhibited clearly, with a label attached.\
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn byte_offset() {
|
||||
let (_, out) = search(
|
||||
"Sherlock", SHERLOCK, |s| s.byte_offset(true));
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
/baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn byte_offset_inverted() {
|
||||
let (_, out) = search("Sherlock", SHERLOCK, |s| {
|
||||
s.invert_match(true).byte_offset(true)
|
||||
});
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:65:Holmeses, success in the province of detective work must always
|
||||
/baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
/baz.rs:258:but Doctor Watson has to have it taken out for him and dusted,
|
||||
/baz.rs:321:and exhibited clearly, with a label attached.
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn count() {
|
||||
let (count, out) = search(
|
||||
|
@ -69,6 +69,7 @@ pub struct Searcher<'a, R, W: 'a> {
|
||||
haystack: R,
|
||||
match_count: u64,
|
||||
line_count: Option<u64>,
|
||||
byte_offset: Option<u64>,
|
||||
last_match: Match,
|
||||
last_printed: usize,
|
||||
last_line: usize,
|
||||
@ -80,6 +81,7 @@ pub struct Searcher<'a, R, W: 'a> {
|
||||
pub struct Options {
|
||||
pub after_context: usize,
|
||||
pub before_context: usize,
|
||||
pub byte_offset: bool,
|
||||
pub count: bool,
|
||||
pub files_with_matches: bool,
|
||||
pub files_without_matches: bool,
|
||||
@ -96,6 +98,7 @@ impl Default for Options {
|
||||
Options {
|
||||
after_context: 0,
|
||||
before_context: 0,
|
||||
byte_offset: false,
|
||||
count: false,
|
||||
files_with_matches: false,
|
||||
files_without_matches: false,
|
||||
@ -165,6 +168,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
|
||||
haystack: haystack,
|
||||
match_count: 0,
|
||||
line_count: None,
|
||||
byte_offset: None,
|
||||
last_match: Match::default(),
|
||||
last_printed: 0,
|
||||
last_line: 0,
|
||||
@ -186,6 +190,16 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print a 0-based offset of the
|
||||
/// matching line (or the actual match if -o is specified) before
|
||||
/// printing the line itself.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn byte_offset(mut self, yes: bool) -> Self {
|
||||
self.opts.byte_offset = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print a count instead of each match.
|
||||
///
|
||||
/// Disabled by default.
|
||||
@ -259,6 +273,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
|
||||
self.inp.reset();
|
||||
self.match_count = 0;
|
||||
self.line_count = if self.opts.line_number { Some(0) } else { None };
|
||||
self.byte_offset = if self.opts.byte_offset { Some(0) } else { None };
|
||||
self.last_match = Match::default();
|
||||
self.after_context_remaining = 0;
|
||||
while !self.terminate() {
|
||||
@ -327,17 +342,18 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
|
||||
|
||||
#[inline(always)]
|
||||
fn fill(&mut self) -> Result<bool, Error> {
|
||||
let keep = if self.opts.before_context > 0 || self.opts.after_context > 0 {
|
||||
let lines = 1 + cmp::max(
|
||||
self.opts.before_context, self.opts.after_context);
|
||||
start_of_previous_lines(
|
||||
self.opts.eol,
|
||||
&self.inp.buf,
|
||||
self.inp.lastnl.saturating_sub(1),
|
||||
lines)
|
||||
} else {
|
||||
self.inp.lastnl
|
||||
};
|
||||
let keep =
|
||||
if self.opts.before_context > 0 || self.opts.after_context > 0 {
|
||||
let lines = 1 + cmp::max(
|
||||
self.opts.before_context, self.opts.after_context);
|
||||
start_of_previous_lines(
|
||||
self.opts.eol,
|
||||
&self.inp.buf,
|
||||
self.inp.lastnl.saturating_sub(1),
|
||||
lines)
|
||||
} else {
|
||||
self.inp.lastnl
|
||||
};
|
||||
if keep < self.last_printed {
|
||||
self.last_printed -= keep;
|
||||
} else {
|
||||
@ -349,6 +365,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
|
||||
self.count_lines(keep);
|
||||
self.last_line = 0;
|
||||
}
|
||||
self.count_byte_offset(keep);
|
||||
let ok = self.inp.fill(&mut self.haystack, keep).map_err(|err| {
|
||||
Error::from_io(err, &self.path)
|
||||
})?;
|
||||
@ -419,7 +436,7 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
|
||||
self.add_line(end);
|
||||
self.printer.matched(
|
||||
self.grep.regex(), self.path,
|
||||
&self.inp.buf, start, end, self.line_count);
|
||||
&self.inp.buf, start, end, self.line_count, self.byte_offset);
|
||||
self.last_printed = end;
|
||||
self.after_context_remaining = self.opts.after_context;
|
||||
}
|
||||
@ -429,7 +446,8 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
|
||||
self.count_lines(start);
|
||||
self.add_line(end);
|
||||
self.printer.context(
|
||||
&self.path, &self.inp.buf, start, end, self.line_count);
|
||||
&self.path, &self.inp.buf, start, end,
|
||||
self.line_count, self.byte_offset);
|
||||
self.last_printed = end;
|
||||
}
|
||||
|
||||
@ -447,6 +465,13 @@ impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn count_byte_offset(&mut self, buf_last_end: usize) {
|
||||
if let Some(ref mut byte_offset) = self.byte_offset {
|
||||
*byte_offset += buf_last_end as u64;
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn count_lines(&mut self, upto: usize) {
|
||||
if let Some(ref mut line_count) = self.line_count {
|
||||
@ -1006,6 +1031,41 @@ fn main() {
|
||||
assert_eq!(out, "/baz.rs:2\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn byte_offset() {
|
||||
let (_, out) = search_smallcap(
|
||||
"Sherlock", SHERLOCK, |s| s.byte_offset(true));
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
/baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn byte_offset_with_before_context() {
|
||||
let (_, out) = search_smallcap("dusted", SHERLOCK, |s| {
|
||||
s.line_number(true).byte_offset(true).before_context(2)
|
||||
});
|
||||
assert_eq!(out, "\
|
||||
/baz.rs-3-129-be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
/baz.rs-4-193-can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
/baz.rs:5:258:but Doctor Watson has to have it taken out for him and dusted,
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn byte_offset_inverted() {
|
||||
let (_, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.invert_match(true).byte_offset(true)
|
||||
});
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:65:Holmeses, success in the province of detective work must always
|
||||
/baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
/baz.rs:258:but Doctor Watson has to have it taken out for him and dusted,
|
||||
/baz.rs:321:and exhibited clearly, with a label attached.
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn files_with_matches() {
|
||||
let (count, out) = search_smallcap(
|
||||
|
@ -33,6 +33,7 @@ struct Options {
|
||||
encoding: Option<&'static Encoding>,
|
||||
after_context: usize,
|
||||
before_context: usize,
|
||||
byte_offset: bool,
|
||||
count: bool,
|
||||
files_with_matches: bool,
|
||||
files_without_matches: bool,
|
||||
@ -53,6 +54,7 @@ impl Default for Options {
|
||||
encoding: None,
|
||||
after_context: 0,
|
||||
before_context: 0,
|
||||
byte_offset: false,
|
||||
count: false,
|
||||
files_with_matches: false,
|
||||
files_without_matches: false,
|
||||
@ -106,6 +108,16 @@ impl WorkerBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print a 0-based offset of the
|
||||
/// matching line (or the actual match if -o is specified) before
|
||||
/// printing the line itself.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn byte_offset(mut self, yes: bool) -> Self {
|
||||
self.opts.byte_offset = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print a count instead of each match.
|
||||
///
|
||||
/// Disabled by default.
|
||||
@ -283,6 +295,7 @@ impl Worker {
|
||||
searcher
|
||||
.after_context(self.opts.after_context)
|
||||
.before_context(self.opts.before_context)
|
||||
.byte_offset(self.opts.byte_offset)
|
||||
.count(self.opts.count)
|
||||
.files_with_matches(self.opts.files_with_matches)
|
||||
.files_without_matches(self.opts.files_without_matches)
|
||||
@ -322,6 +335,7 @@ impl Worker {
|
||||
}
|
||||
let searcher = BufferSearcher::new(printer, &self.grep, path, buf);
|
||||
Ok(searcher
|
||||
.byte_offset(self.opts.byte_offset)
|
||||
.count(self.opts.count)
|
||||
.files_with_matches(self.opts.files_with_matches)
|
||||
.files_without_matches(self.opts.files_without_matches)
|
||||
|
@ -395,6 +395,16 @@ sherlock!(csglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
assert_eq!(lines, "file2.html:Sherlock\n");
|
||||
});
|
||||
|
||||
sherlock!(byte_offset_only_matching, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-b").arg("-o");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock:56:Sherlock
|
||||
sherlock:177:Sherlock
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(count, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("--count");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
|
Loading…
x
Reference in New Issue
Block a user