1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-01-03 05:10:12 +02:00

The search code is a mess, but...

... we now support inverted matches and line numbers!
This commit is contained in:
Andrew Gallant 2016-08-29 22:44:15 -04:00
parent c809679cf2
commit d011cea053
4 changed files with 196 additions and 27 deletions

View File

@ -150,6 +150,7 @@ impl GrepBuilder {
try!(syntax::ExprBuilder::new() try!(syntax::ExprBuilder::new()
.allow_bytes(true) .allow_bytes(true)
.unicode(true) .unicode(true)
.case_insensitive(self.opts.case_insensitive)
.parse(&self.pattern)); .parse(&self.pattern));
Ok(try!(nonl::remove(expr, self.opts.line_terminator))) Ok(try!(nonl::remove(expr, self.opts.line_terminator)))
} }

View File

@ -69,11 +69,13 @@ Options:
--debug Show debug messages. --debug Show debug messages.
--files Print each file that would be searched --files Print each file that would be searched
(but don't search). (but don't search).
-L, --follow Follow symlinks.
--hidden Search hidden directories and files. --hidden Search hidden directories and files.
-i, --ignore-case Case insensitive search. -i, --ignore-case Case insensitive search.
-L, --follow Follow symlinks.
-n, --line-number Show line numbers (1-based).
-t, --threads ARG The number of threads to use. Defaults to the number -t, --threads ARG The number of threads to use. Defaults to the number
of logical CPUs. [default: 0] of logical CPUs. [default: 0]
-v, --invert-match Invert matching.
"; ";
#[derive(RustcDecodable)] #[derive(RustcDecodable)]
@ -86,6 +88,8 @@ struct Args {
flag_follow: bool, flag_follow: bool,
flag_hidden: bool, flag_hidden: bool,
flag_ignore_case: bool, flag_ignore_case: bool,
flag_invert_match: bool,
flag_line_number: bool,
flag_threads: usize, flag_threads: usize,
} }
@ -224,13 +228,16 @@ impl Worker {
outbuf.clear(); outbuf.clear();
let mut printer = self.args.printer(outbuf); let mut printer = self.args.printer(outbuf);
{ {
let searcher = Searcher { let mut searcher = Searcher::new(
grep: &self.grep, &mut self.inpbuf,
path: &path, &mut printer,
haystack: file, &self.grep,
inp: &mut self.inpbuf, &path,
printer: &mut printer, file,
}; );
searcher = searcher.count(self.args.flag_count);
searcher = searcher.line_number(self.args.flag_line_number);
searcher = searcher.invert_match(self.args.flag_invert_match);
if let Err(err) = searcher.run() { if let Err(err) = searcher.run() {
eprintln!("{}", err); eprintln!("{}", err);
} }

View File

@ -1,14 +1,18 @@
use std::io; use std::io;
use std::path::Path; use std::path::Path;
use grep::Match;
macro_rules! wln { macro_rules! wln {
($($tt:tt)*) => { ($($tt:tt)*) => {
let _ = writeln!($($tt)*); let _ = writeln!($($tt)*);
} }
} }
macro_rules! w {
($($tt:tt)*) => {
let _ = write!($($tt)*);
}
}
pub struct Printer<W> { pub struct Printer<W> {
wtr: W, wtr: W,
} }
@ -40,15 +44,25 @@ impl<W: io::Write> Printer<W> {
&mut self, &mut self,
path: P, path: P,
buf: &[u8], buf: &[u8],
m: &Match, start: usize,
end: usize,
line_number: Option<u64>,
) { ) {
let _ = self.wtr.write(path.as_ref().to_string_lossy().as_bytes()); self.write(path.as_ref().to_string_lossy().as_bytes());
let _ = self.wtr.write(b":"); self.write(b":");
let _ = self.wtr.write(&buf[m.start()..m.end()]); if let Some(line_number) = line_number {
let _ = self.wtr.write(b"\n"); self.write(line_number.to_string().as_bytes());
self.write(b":");
}
self.write(&buf[start..end]);
self.write(b"\n");
} }
pub fn binary_matched<P: AsRef<Path>>(&mut self, path: P) { pub fn binary_matched<P: AsRef<Path>>(&mut self, path: P) {
wln!(&mut self.wtr, "binary file {} matches", path.as_ref().display()); wln!(&mut self.wtr, "binary file {} matches", path.as_ref().display());
} }
fn write(&mut self, buf: &[u8]) {
let _ = self.wtr.write_all(buf);
}
} }

View File

@ -14,6 +14,7 @@ use memchr::{memchr, memrchr};
use printer::Printer; use printer::Printer;
/// The default read size (capacity of input buffer).
const READ_SIZE: usize = 8 * (1<<10); const READ_SIZE: usize = 8 * (1<<10);
/// Error describes errors that can occur while searching. /// Error describes errors that can occur while searching.
@ -57,37 +58,165 @@ impl fmt::Display for Error {
} }
pub struct Searcher<'a, R, W: 'a> { pub struct Searcher<'a, R, W: 'a> {
pub grep: &'a Grep, inp: &'a mut InputBuffer,
pub path: &'a Path, printer: &'a mut Printer<W>,
pub haystack: R, grep: &'a Grep,
pub inp: &'a mut InputBuffer, path: &'a Path,
pub printer: &'a mut Printer<W>, haystack: R,
count: bool,
invert_match: bool,
line_number: bool,
} }
impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> { impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
/// Create a new searcher.
///
/// `inp` is a reusable input buffer that is used as scratch space by this
/// searcher.
///
/// `printer` is used to output all results of searching.
///
/// `grep` is the actual matcher.
///
/// `path` is the file path being searched.
///
/// `haystack` is a reader of text to search.
pub fn new(
inp: &'a mut InputBuffer,
printer: &'a mut Printer<W>,
grep: &'a Grep,
path: &'a Path,
haystack: R,
) -> Searcher<'a, R, W> {
Searcher {
inp: inp,
printer: printer,
grep: grep,
path: path,
haystack: haystack,
count: false,
invert_match: false,
line_number: false,
}
}
/// If enabled, searching will print a count instead of each match.
///
/// Disabled by default.
pub fn count(mut self, yes: bool) -> Self {
self.count = yes;
self
}
/// If enabled, matching is inverted so that lines that *don't* match the
/// given pattern are treated as matches.
pub fn invert_match(mut self, yes: bool) -> Self {
self.invert_match = yes;
self
}
/// If enabled, compute line numbers and prefix each line of output with
/// them.
pub fn line_number(mut self, yes: bool) -> Self {
self.line_number = yes;
self
}
/// Execute the search. Results are written to the printer and the total
/// number of matches is returned.
#[inline(never)] #[inline(never)]
pub fn run(mut self) -> Result<(), Error> { pub fn run(mut self) -> Result<u64, Error> {
self.inp.reset(); self.inp.reset();
let mut match_count = 0;
let mut line_count = if self.line_number { Some(0) } else { None };
let mut mat = Match::default(); let mut mat = Match::default();
loop { loop {
let ok = try!(self.inp.fill(&mut self.haystack).map_err(|err| { let ok = try!(self.inp.fill(&mut self.haystack).map_err(|err| {
Error::from_io(err, &self.path) Error::from_io(err, &self.path)
})); }));
if !ok { if !ok {
return Ok(()); break;
} }
loop { while self.inp.pos < self.inp.lastnl {
let ok = self.grep.read_match( let ok = self.grep.read_match(
&mut mat, &mut mat,
&mut self.inp.buf[..self.inp.lastnl], &mut self.inp.buf[..self.inp.lastnl],
self.inp.pos); self.inp.pos);
if !ok { if !ok {
if self.invert_match {
while let Some(pos) = memchr(b'\n', &self.inp.buf[self.inp.pos..self.inp.lastnl]) {
if let Some(ref mut line_count) = line_count {
*line_count += 1;
}
self.printer.matched(
&self.path,
&self.inp.buf,
self.inp.pos,
self.inp.pos + pos,
line_count,
);
self.inp.pos += pos + 1;
match_count += 1;
if self.inp.pos >= self.inp.lastnl {
break;
}
}
self.inp.pos = self.inp.lastnl;
} else if let Some(ref mut line_count) = line_count {
*line_count += count_lines(
&self.inp.buf[self.inp.pos..self.inp.lastnl]);
}
break; break;
} }
self.inp.pos = mat.end() + 1; if self.invert_match {
self.printer.matched(self.path, &self.inp.buf, &mat); while let Some(pos) = memchr(b'\n', &self.inp.buf[self.inp.pos..mat.start()]) {
if let Some(ref mut line_count) = line_count {
*line_count += 1;
}
self.printer.matched(
&self.path,
&self.inp.buf,
self.inp.pos,
self.inp.pos + pos,
line_count,
);
self.inp.pos += pos + 1;
match_count += 1;
}
if let Some(ref mut line_count) = line_count {
*line_count += 1;
}
self.inp.pos = mat.end() + 1;
} else {
if let Some(ref mut line_count) = line_count {
// mat.end() always points immediately after the end
// of a match, which could be *at* a nl or past our
// current search buffer. Either way, count it as one
// more line.
*line_count += 1 + count_lines(
&self.inp.buf[self.inp.pos..mat.end()]);
}
match_count += 1;
if !self.count {
self.printer.matched(
self.path,
&self.inp.buf,
mat.start(),
mat.end(),
line_count,
);
}
// Move the position one past the end of the match so that
// the next search starts after the nl. If we're at EOF,
// then pos will be past EOF.
self.inp.pos = mat.end() + 1;
}
} }
} }
if self.count && match_count > 0 {
self.printer.path_count(self.path, match_count);
}
Ok(match_count)
} }
} }
@ -102,9 +231,18 @@ pub struct InputBuffer {
} }
impl InputBuffer { impl InputBuffer {
/// Create a new buffer with a default capacity.
pub fn new() -> InputBuffer { pub fn new() -> InputBuffer {
InputBuffer::with_capacity(READ_SIZE)
}
/// Create a new buffer with the capacity given.
///
/// The capacity determines the size of each read from the underlying
/// reader.
pub fn with_capacity(cap: usize) -> InputBuffer {
InputBuffer { InputBuffer {
buf: vec![0; READ_SIZE], buf: vec![0; cap],
tmp: vec![], tmp: vec![],
pos: 0, pos: 0,
lastnl: 0, lastnl: 0,
@ -160,7 +298,7 @@ impl InputBuffer {
// 2) Subsequent iterations only occur if no nl could be found. // 2) Subsequent iterations only occur if no nl could be found.
self.lastnl = self.lastnl =
memrchr(b'\n', &self.buf[self.end..self.end + n]) memrchr(b'\n', &self.buf[self.end..self.end + n])
.map(|i| self.end + i) .map(|i| self.end + i + 1)
.unwrap_or(0); .unwrap_or(0);
self.end += n; self.end += n;
} }
@ -174,3 +312,12 @@ fn is_binary(buf: &[u8]) -> bool {
} }
memchr(b'\x00', &buf[0..cmp::min(1024, buf.len())]).is_some() memchr(b'\x00', &buf[0..cmp::min(1024, buf.len())]).is_some()
} }
fn count_lines(mut buf: &[u8]) -> u64 {
let mut count = 0;
while let Some(pos) = memchr(b'\n', buf) {
count += 1;
buf = &buf[pos + 1..];
}
count
}