1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-06-09 14:07:45 +02:00
ripgrep/src/search.rs

514 lines
16 KiB
Rust
Raw Normal View History

/*!
The search module is responsible for searching a single file and printing
matches.
*/
use std::cmp;
use std::error::Error as StdError;
use std::fmt;
use std::io;
use std::path::{Path, PathBuf};
use grep::{Grep, Match};
use memchr::{memchr, memrchr};
use printer::Printer;
/// The default read size (capacity of input buffer).
const READ_SIZE: usize = 8 * (1<<10);
/// Error describes errors that can occur while searching.
#[derive(Debug)]
pub enum Error {
/// Normal IO or Mmap errors suck. Include the path the originated them.
Io {
err: io::Error,
path: PathBuf,
}
}
impl Error {
fn from_io<P: AsRef<Path>>(err: io::Error, path: P) -> Error {
Error::Io { err: err, path: path.as_ref().to_path_buf() }
}
}
impl StdError for Error {
fn description(&self) -> &str {
match *self {
Error::Io { ref err, .. } => err.description(),
}
}
fn cause(&self) -> Option<&StdError> {
match *self {
Error::Io { ref err, .. } => Some(err),
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::Io { ref err, ref path } => {
write!(f, "{}: {}", path.display(), err)
}
}
}
}
pub struct Searcher<'a, R, W: 'a> {
inp: &'a mut InputBuffer,
printer: &'a mut Printer<W>,
grep: &'a Grep,
path: &'a Path,
haystack: R,
2016-08-31 15:52:35 -04:00
match_count: u64,
line_count: Option<u64>,
last_match: Match,
count: bool,
invert_match: bool,
line_number: bool,
}
impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
/// Create a new searcher.
///
/// `inp` is a reusable input buffer that is used as scratch space by this
/// searcher.
///
/// `printer` is used to output all results of searching.
///
/// `grep` is the actual matcher.
///
/// `path` is the file path being searched.
///
/// `haystack` is a reader of text to search.
pub fn new(
inp: &'a mut InputBuffer,
printer: &'a mut Printer<W>,
grep: &'a Grep,
path: &'a Path,
haystack: R,
) -> Searcher<'a, R, W> {
Searcher {
inp: inp,
printer: printer,
grep: grep,
path: path,
haystack: haystack,
2016-08-31 15:52:35 -04:00
match_count: 0,
line_count: None,
last_match: Match::default(),
count: false,
invert_match: false,
line_number: false,
}
}
/// If enabled, searching will print a count instead of each match.
///
/// Disabled by default.
pub fn count(mut self, yes: bool) -> Self {
self.count = yes;
self
}
/// If enabled, matching is inverted so that lines that *don't* match the
/// given pattern are treated as matches.
pub fn invert_match(mut self, yes: bool) -> Self {
self.invert_match = yes;
self
}
/// If enabled, compute line numbers and prefix each line of output with
/// them.
pub fn line_number(mut self, yes: bool) -> Self {
self.line_number = yes;
self
}
/// Execute the search. Results are written to the printer and the total
/// number of matches is returned.
#[inline(never)]
pub fn run(mut self) -> Result<u64, Error> {
self.inp.reset();
2016-08-31 15:52:35 -04:00
self.match_count = 0;
self.line_count = if self.line_number { Some(0) } else { None };
self.last_match = Match::default();
loop {
let ok = try!(self.inp.fill(&mut self.haystack).map_err(|err| {
Error::from_io(err, &self.path)
}));
if !ok {
break;
}
while self.inp.pos < self.inp.lastnl {
let ok = self.grep.read_match(
2016-08-31 15:52:35 -04:00
&mut self.last_match,
&mut self.inp.buf[..self.inp.lastnl],
self.inp.pos);
if !ok {
2016-08-31 15:52:35 -04:00
let upto = self.inp.lastnl;
if self.invert_match {
2016-08-31 15:52:35 -04:00
self.find_inverted_matches(upto);
} else {
self.count_lines(upto);
}
2016-08-31 15:52:35 -04:00
self.inp.pos = upto;
break;
}
if self.invert_match {
2016-08-31 15:52:35 -04:00
let inverted_upto = self.last_match.start();
self.find_inverted_matches(inverted_upto);
// Add a line to account for the match...
if let Some(ref mut line_count) = self.line_count {
*line_count += 1;
}
2016-08-31 15:52:35 -04:00
// ... and skip over the match.
self.inp.pos = self.last_match.end() + 1;
} else {
2016-08-31 15:52:35 -04:00
self.match_count += 1;
if !self.count {
2016-08-31 15:52:35 -04:00
let upto = cmp::min(
self.inp.lastnl, self.last_match.end() + 1);
self.count_lines(upto);
self.printer.matched(
self.path,
&self.inp.buf,
2016-08-31 15:52:35 -04:00
self.last_match.start(),
self.last_match.end(),
self.line_count,
);
}
// Move the position one past the end of the match so that
// the next search starts after the nl. If we're at EOF,
// then pos will be past EOF.
2016-08-31 15:52:35 -04:00
self.inp.pos = self.last_match.end() + 1;
}
}
}
2016-08-31 15:52:35 -04:00
if self.count && self.match_count > 0 {
self.printer.path_count(self.path, self.match_count);
}
Ok(self.match_count)
}
#[inline(always)]
fn find_inverted_matches(&mut self, upto: usize) {
debug_assert!(self.invert_match);
while self.inp.pos < upto {
let pos = memchr(b'\n', &self.inp.buf[self.inp.pos..upto])
.unwrap_or(upto);
if !self.count {
if let Some(ref mut line_count) = self.line_count {
*line_count += 1;
}
self.printer.matched(
&self.path,
&self.inp.buf,
self.inp.pos,
self.inp.pos + pos,
self.line_count,
);
}
self.inp.pos += pos + 1;
self.match_count += 1;
}
}
#[inline(always)]
fn count_lines(&mut self, upto: usize) {
if let Some(ref mut line_count) = self.line_count {
*line_count += count_lines(&self.inp.buf[self.inp.pos..upto]);
}
}
}
pub struct InputBuffer {
2016-08-31 15:52:35 -04:00
read_size: usize,
buf: Vec<u8>,
tmp: Vec<u8>,
pos: usize,
lastnl: usize,
end: usize,
first: bool,
is_binary: bool,
}
impl InputBuffer {
/// Create a new buffer with a default capacity.
pub fn new() -> InputBuffer {
InputBuffer::with_capacity(READ_SIZE)
}
/// Create a new buffer with the capacity given.
///
/// The capacity determines the size of each read from the underlying
/// reader.
2016-08-31 15:52:35 -04:00
pub fn with_capacity(mut cap: usize) -> InputBuffer {
if cap == 0 {
cap = 1;
}
InputBuffer {
2016-08-31 15:52:35 -04:00
read_size: cap,
buf: vec![0; cap],
tmp: vec![],
pos: 0,
lastnl: 0,
end: 0,
first: true,
is_binary: false,
}
}
fn reset(&mut self) {
self.pos = 0;
self.lastnl = 0;
self.end = 0;
self.first = true;
self.is_binary = false;
}
fn fill<R: io::Read>(&mut self, rdr: &mut R) -> Result<bool, io::Error> {
if self.lastnl < self.end {
self.tmp.clear();
self.tmp.extend_from_slice(&self.buf[self.lastnl..self.end]);
self.buf[0..self.tmp.len()].copy_from_slice(&self.tmp);
self.end = self.tmp.len();
} else {
self.end = 0;
}
self.pos = 0;
self.lastnl = 0;
while self.lastnl == 0 {
2016-08-31 15:52:35 -04:00
if self.buf.len() - self.end < self.read_size {
let min_len = self.read_size + self.buf.len() - self.end;
let new_len = cmp::max(min_len, self.buf.len() * 2);
self.buf.resize(new_len, 0);
}
let n = try!(rdr.read(
2016-08-31 15:52:35 -04:00
&mut self.buf[self.end..self.end + self.read_size]));
if self.first {
if is_binary(&self.buf[self.end..self.end + n]) {
return Ok(false);
}
}
self.first = false;
if n == 0 {
if self.end == 0 {
return Ok(false);
}
self.lastnl = self.end;
break;
}
// We know there is no nl between self.start..self.end since:
// 1) If this is the first iteration, then any bytes preceding
// self.end do not contain nl by construction.
// 2) Subsequent iterations only occur if no nl could be found.
self.lastnl =
memrchr(b'\n', &self.buf[self.end..self.end + n])
.map(|i| self.end + i + 1)
.unwrap_or(0);
self.end += n;
}
Ok(true)
}
}
2016-08-31 15:52:35 -04:00
#[inline(always)]
fn is_binary(buf: &[u8]) -> bool {
if buf.len() >= 4 && &buf[0..4] == b"%PDF" {
return true;
}
memchr(b'\x00', &buf[0..cmp::min(1024, buf.len())]).is_some()
}
2016-08-31 15:52:35 -04:00
#[inline(always)]
fn count_lines(mut buf: &[u8]) -> u64 {
let mut count = 0;
while let Some(pos) = memchr(b'\n', buf) {
count += 1;
buf = &buf[pos + 1..];
}
count
}
2016-08-31 15:52:35 -04:00
#[cfg(test)]
mod tests {
use std::io;
use std::path::Path;
use grep::{Grep, GrepBuilder};
use printer::Printer;
use super::{InputBuffer, Searcher};
fn hay(s: &str) -> io::Cursor<Vec<u8>> {
io::Cursor::new(s.to_string().into_bytes())
}
fn matcher(pat: &str) -> Grep {
GrepBuilder::new(pat).build().unwrap()
}
fn test_path() -> &'static Path {
&Path::new("/baz.rs")
}
#[test]
fn basic_search() {
let text = hay("\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
can extract a clew from a wisp of straw or a flake of cigar ash;
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.\
");
let mut inp = InputBuffer::with_capacity(1);
let mut pp = Printer::new(vec![]);
let grep = matcher("Sherlock");
let count = {
let searcher = Searcher::new(
&mut inp, &mut pp, &grep, test_path(), text);
searcher.run().unwrap()
};
assert_eq!(2, count);
let out = String::from_utf8(pp.into_inner()).unwrap();
assert_eq!(out, "\
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes
");
}
#[test]
fn line_numbers() {
let text = hay("\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
can extract a clew from a wisp of straw or a flake of cigar ash;
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.\
");
let mut inp = InputBuffer::with_capacity(1);
let mut pp = Printer::new(vec![]);
let grep = matcher("Sherlock");
let count = {
let searcher = Searcher::new(
&mut inp, &mut pp, &grep, test_path(), text);
searcher.line_number(true).run().unwrap()
};
assert_eq!(2, count);
let out = String::from_utf8(pp.into_inner()).unwrap();
assert_eq!(out, "\
/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes
");
}
#[test]
fn count() {
let text = hay("\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
can extract a clew from a wisp of straw or a flake of cigar ash;
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.\
");
let mut inp = InputBuffer::with_capacity(1);
let mut pp = Printer::new(vec![]);
let grep = matcher("Sherlock");
let count = {
let searcher = Searcher::new(
&mut inp, &mut pp, &grep, test_path(), text);
searcher.count(true).run().unwrap()
};
assert_eq!(2, count);
let out = String::from_utf8(pp.into_inner()).unwrap();
assert_eq!(out, "/baz.rs:2\n");
}
#[test]
fn invert_match() {
let text = hay("\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
can extract a clew from a wisp of straw or a flake of cigar ash;
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.\
");
let mut inp = InputBuffer::with_capacity(1);
let mut pp = Printer::new(vec![]);
let grep = matcher("Sherlock");
let count = {
let searcher = Searcher::new(
&mut inp, &mut pp, &grep, test_path(), text);
searcher.invert_match(true).run().unwrap()
};
assert_eq!(4, count);
let out = String::from_utf8(pp.into_inner()).unwrap();
assert_eq!(out, "\
/baz.rs:Holmeses, success in the province of detective work must always
/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:but Doctor Watson has to have it taken out for him and dusted,
/baz.rs:and exhibited clearly, with a label attached.
");
}
#[test]
fn invert_match_line_numbers() {
let text = hay("\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
can extract a clew from a wisp of straw or a flake of cigar ash;
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.\
");
let mut inp = InputBuffer::with_capacity(1);
let mut pp = Printer::new(vec![]);
let grep = matcher("Sherlock");
let count = {
let searcher = Searcher::new(
&mut inp, &mut pp, &grep, test_path(), text);
searcher.invert_match(true).line_number(true).run().unwrap()
};
assert_eq!(4, count);
let out = String::from_utf8(pp.into_inner()).unwrap();
assert_eq!(out, "\
/baz.rs:2:Holmeses, success in the province of detective work must always
/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted,
/baz.rs:6:and exhibited clearly, with a label attached.
");
}
#[test]
fn invert_match_count() {
let text = hay("\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
can extract a clew from a wisp of straw or a flake of cigar ash;
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.\
");
let mut inp = InputBuffer::with_capacity(1);
let mut pp = Printer::new(vec![]);
let grep = matcher("Sherlock");
let count = {
let searcher = Searcher::new(
&mut inp, &mut pp, &grep, test_path(), text);
searcher.invert_match(true).count(true).run().unwrap()
};
assert_eq!(4, count);
let out = String::from_utf8(pp.into_inner()).unwrap();
assert_eq!(out, "/baz.rs:4\n");
}
}