1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-01-03 05:10:12 +02:00

The search code is a mess, but...

... we now support inverted matches and line numbers!
This commit is contained in:
Andrew Gallant 2016-08-29 22:44:15 -04:00
parent c809679cf2
commit d011cea053
4 changed files with 196 additions and 27 deletions

View File

@ -150,6 +150,7 @@ impl GrepBuilder {
try!(syntax::ExprBuilder::new()
.allow_bytes(true)
.unicode(true)
.case_insensitive(self.opts.case_insensitive)
.parse(&self.pattern));
Ok(try!(nonl::remove(expr, self.opts.line_terminator)))
}

View File

@ -69,11 +69,13 @@ Options:
--debug Show debug messages.
--files Print each file that would be searched
(but don't search).
-L, --follow Follow symlinks.
--hidden Search hidden directories and files.
-i, --ignore-case Case insensitive search.
-L, --follow Follow symlinks.
-n, --line-number Show line numbers (1-based).
-t, --threads ARG The number of threads to use. Defaults to the number
of logical CPUs. [default: 0]
-v, --invert-match Invert matching.
";
#[derive(RustcDecodable)]
@ -86,6 +88,8 @@ struct Args {
flag_follow: bool,
flag_hidden: bool,
flag_ignore_case: bool,
flag_invert_match: bool,
flag_line_number: bool,
flag_threads: usize,
}
@ -224,13 +228,16 @@ impl Worker {
outbuf.clear();
let mut printer = self.args.printer(outbuf);
{
let searcher = Searcher {
grep: &self.grep,
path: &path,
haystack: file,
inp: &mut self.inpbuf,
printer: &mut printer,
};
let mut searcher = Searcher::new(
&mut self.inpbuf,
&mut printer,
&self.grep,
&path,
file,
);
searcher = searcher.count(self.args.flag_count);
searcher = searcher.line_number(self.args.flag_line_number);
searcher = searcher.invert_match(self.args.flag_invert_match);
if let Err(err) = searcher.run() {
eprintln!("{}", err);
}

View File

@ -1,14 +1,18 @@
use std::io;
use std::path::Path;
use grep::Match;
macro_rules! wln {
($($tt:tt)*) => {
let _ = writeln!($($tt)*);
}
}
macro_rules! w {
($($tt:tt)*) => {
let _ = write!($($tt)*);
}
}
pub struct Printer<W> {
wtr: W,
}
@ -40,15 +44,25 @@ impl<W: io::Write> Printer<W> {
&mut self,
path: P,
buf: &[u8],
m: &Match,
start: usize,
end: usize,
line_number: Option<u64>,
) {
let _ = self.wtr.write(path.as_ref().to_string_lossy().as_bytes());
let _ = self.wtr.write(b":");
let _ = self.wtr.write(&buf[m.start()..m.end()]);
let _ = self.wtr.write(b"\n");
self.write(path.as_ref().to_string_lossy().as_bytes());
self.write(b":");
if let Some(line_number) = line_number {
self.write(line_number.to_string().as_bytes());
self.write(b":");
}
self.write(&buf[start..end]);
self.write(b"\n");
}
pub fn binary_matched<P: AsRef<Path>>(&mut self, path: P) {
wln!(&mut self.wtr, "binary file {} matches", path.as_ref().display());
}
fn write(&mut self, buf: &[u8]) {
let _ = self.wtr.write_all(buf);
}
}

View File

@ -14,6 +14,7 @@ use memchr::{memchr, memrchr};
use printer::Printer;
/// The default read size (capacity of input buffer).
const READ_SIZE: usize = 8 * (1<<10);
/// Error describes errors that can occur while searching.
@ -57,38 +58,166 @@ impl fmt::Display for Error {
}
pub struct Searcher<'a, R, W: 'a> {
pub grep: &'a Grep,
pub path: &'a Path,
pub haystack: R,
pub inp: &'a mut InputBuffer,
pub printer: &'a mut Printer<W>,
inp: &'a mut InputBuffer,
printer: &'a mut Printer<W>,
grep: &'a Grep,
path: &'a Path,
haystack: R,
count: bool,
invert_match: bool,
line_number: bool,
}
impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
/// Create a new searcher.
///
/// `inp` is a reusable input buffer that is used as scratch space by this
/// searcher.
///
/// `printer` is used to output all results of searching.
///
/// `grep` is the actual matcher.
///
/// `path` is the file path being searched.
///
/// `haystack` is a reader of text to search.
pub fn new(
inp: &'a mut InputBuffer,
printer: &'a mut Printer<W>,
grep: &'a Grep,
path: &'a Path,
haystack: R,
) -> Searcher<'a, R, W> {
Searcher {
inp: inp,
printer: printer,
grep: grep,
path: path,
haystack: haystack,
count: false,
invert_match: false,
line_number: false,
}
}
/// If enabled, searching will print a count instead of each match.
///
/// Disabled by default.
pub fn count(mut self, yes: bool) -> Self {
self.count = yes;
self
}
/// If enabled, matching is inverted so that lines that *don't* match the
/// given pattern are treated as matches.
pub fn invert_match(mut self, yes: bool) -> Self {
self.invert_match = yes;
self
}
/// If enabled, compute line numbers and prefix each line of output with
/// them.
pub fn line_number(mut self, yes: bool) -> Self {
self.line_number = yes;
self
}
/// Execute the search. Results are written to the printer and the total
/// number of matches is returned.
#[inline(never)]
pub fn run(mut self) -> Result<(), Error> {
pub fn run(mut self) -> Result<u64, Error> {
self.inp.reset();
let mut match_count = 0;
let mut line_count = if self.line_number { Some(0) } else { None };
let mut mat = Match::default();
loop {
let ok = try!(self.inp.fill(&mut self.haystack).map_err(|err| {
Error::from_io(err, &self.path)
}));
if !ok {
return Ok(());
break;
}
loop {
while self.inp.pos < self.inp.lastnl {
let ok = self.grep.read_match(
&mut mat,
&mut self.inp.buf[..self.inp.lastnl],
self.inp.pos);
if !ok {
if self.invert_match {
while let Some(pos) = memchr(b'\n', &self.inp.buf[self.inp.pos..self.inp.lastnl]) {
if let Some(ref mut line_count) = line_count {
*line_count += 1;
}
self.printer.matched(
&self.path,
&self.inp.buf,
self.inp.pos,
self.inp.pos + pos,
line_count,
);
self.inp.pos += pos + 1;
match_count += 1;
if self.inp.pos >= self.inp.lastnl {
break;
}
}
self.inp.pos = self.inp.lastnl;
} else if let Some(ref mut line_count) = line_count {
*line_count += count_lines(
&self.inp.buf[self.inp.pos..self.inp.lastnl]);
}
break;
}
if self.invert_match {
while let Some(pos) = memchr(b'\n', &self.inp.buf[self.inp.pos..mat.start()]) {
if let Some(ref mut line_count) = line_count {
*line_count += 1;
}
self.printer.matched(
&self.path,
&self.inp.buf,
self.inp.pos,
self.inp.pos + pos,
line_count,
);
self.inp.pos += pos + 1;
match_count += 1;
}
if let Some(ref mut line_count) = line_count {
*line_count += 1;
}
self.inp.pos = mat.end() + 1;
} else {
if let Some(ref mut line_count) = line_count {
// mat.end() always points immediately after the end
// of a match, which could be *at* a nl or past our
// current search buffer. Either way, count it as one
// more line.
*line_count += 1 + count_lines(
&self.inp.buf[self.inp.pos..mat.end()]);
}
match_count += 1;
if !self.count {
self.printer.matched(
self.path,
&self.inp.buf,
mat.start(),
mat.end(),
line_count,
);
}
// Move the position one past the end of the match so that
// the next search starts after the nl. If we're at EOF,
// then pos will be past EOF.
self.inp.pos = mat.end() + 1;
self.printer.matched(self.path, &self.inp.buf, &mat);
}
}
}
if self.count && match_count > 0 {
self.printer.path_count(self.path, match_count);
}
Ok(match_count)
}
}
pub struct InputBuffer {
@ -102,9 +231,18 @@ pub struct InputBuffer {
}
impl InputBuffer {
/// Create a new buffer with a default capacity.
pub fn new() -> InputBuffer {
InputBuffer::with_capacity(READ_SIZE)
}
/// Create a new buffer with the capacity given.
///
/// The capacity determines the size of each read from the underlying
/// reader.
pub fn with_capacity(cap: usize) -> InputBuffer {
InputBuffer {
buf: vec![0; READ_SIZE],
buf: vec![0; cap],
tmp: vec![],
pos: 0,
lastnl: 0,
@ -160,7 +298,7 @@ impl InputBuffer {
// 2) Subsequent iterations only occur if no nl could be found.
self.lastnl =
memrchr(b'\n', &self.buf[self.end..self.end + n])
.map(|i| self.end + i)
.map(|i| self.end + i + 1)
.unwrap_or(0);
self.end += n;
}
@ -174,3 +312,12 @@ fn is_binary(buf: &[u8]) -> bool {
}
memchr(b'\x00', &buf[0..cmp::min(1024, buf.len())]).is_some()
}
fn count_lines(mut buf: &[u8]) -> u64 {
let mut count = 0;
while let Some(pos) = memchr(b'\n', buf) {
count += 1;
buf = &buf[pos + 1..];
}
count
}