diff --git a/src/args.rs b/src/args.rs index e5320110..de9ddcb8 100644 --- a/src/args.rs +++ b/src/args.rs @@ -16,6 +16,7 @@ use ignore::Ignore; use out::Out; use printer::Printer; use search::{InputBuffer, Searcher}; +use search_buffer::BufferSearcher; use sys; use types::{FileTypeDef, Types, TypesBuilder}; use walk; @@ -111,6 +112,14 @@ Less common options: The byte to use for a line terminator. Escape sequences may be used. [default: \\n] + --mmap + Search using memory maps when possible. This is enabled by default + when xrep thinks it will be faster. (Note that mmap searching doesn't + current support the various context related options.) + + --no-mmap + Never use memory maps, even when they might be faster. + --no-ignore Don't respect ignore files (.gitignore, .xrepignore, etc.) @@ -166,10 +175,12 @@ pub struct RawArgs { flag_line_number: bool, flag_line_terminator: String, flag_literal: bool, + flag_mmap: bool, flag_no_heading: bool, flag_no_ignore: bool, flag_no_ignore_parent: bool, flag_no_line_number: bool, + flag_no_mmap: bool, flag_pretty: bool, flag_quiet: bool, flag_replace: Option, @@ -205,6 +216,7 @@ pub struct Args { ignore_case: bool, invert_match: bool, line_number: bool, + mmap: bool, no_ignore: bool, no_ignore_parent: bool, quiet: bool, @@ -251,6 +263,19 @@ impl RawArgs { } else { (self.flag_after_context, self.flag_before_context) }; + let mmap = + if before_context > 0 || after_context > 0 || self.flag_no_mmap { + false + } else if self.flag_mmap { + true + } else { + // If we're only searching a few paths and all of them are + // files, then memory maps are probably faster. + paths.len() <= 10 && paths.iter().all(|p| p.is_file()) + }; + if mmap { + debug!("will try to use memory maps"); + } let eol = { let eol = unescape(&self.flag_line_terminator); if eol.is_empty() { @@ -316,6 +341,7 @@ impl RawArgs { ignore_case: self.flag_ignore_case, invert_match: self.flag_invert_match, line_number: !self.flag_no_line_number && self.flag_line_number, + mmap: mmap, no_ignore: self.flag_no_ignore, no_ignore_parent: self.flag_no_ignore_parent, quiet: self.flag_quiet, @@ -405,6 +431,11 @@ impl Args { inp } + /// Whether we should prefer memory maps for searching or not. + pub fn mmap(&self) -> bool { + self.mmap + } + /// Create a new printer of individual search results that writes to the /// writer given. pub fn printer(&self, wtr: W) -> Printer { @@ -459,6 +490,24 @@ impl Args { .text(self.text) } + /// Create a new line based searcher whose configuration is taken from the + /// command line. This search operates on an entire file all once (which + /// may have been memory mapped). + pub fn searcher_buffer<'a, W: Send + io::Write>( + &self, + printer: &'a mut Printer, + grep: &'a Grep, + path: &'a Path, + buf: &'a [u8], + ) -> BufferSearcher<'a, W> { + BufferSearcher::new(printer, grep, path, buf) + .count(self.count) + .eol(self.eol) + .line_number(self.line_number) + .invert_match(self.invert_match) + .text(self.text) + } + /// Returns the number of worker search threads that should be used. pub fn threads(&self) -> usize { self.threads diff --git a/src/main.rs b/src/main.rs index 10611907..15b250a6 100644 --- a/src/main.rs +++ b/src/main.rs @@ -34,6 +34,7 @@ use std::thread; use crossbeam::sync::chase_lev::{self, Steal, Stealer}; use grep::Grep; +use memmap::{Mmap, Protection}; use walkdir::DirEntry; use args::Args; @@ -61,6 +62,7 @@ mod ignore; mod out; mod printer; mod search; +mod search_buffer; mod sys; mod terminal; mod types; @@ -221,7 +223,11 @@ impl Worker { if let Ok(p) = path.strip_prefix("./") { path = p; } - self.search(printer, path, file) + if self.args.mmap() { + self.search_mmap(printer, path, &file) + } else { + self.search(printer, path, file) + } } }; match result { @@ -248,4 +254,23 @@ impl Worker { rdr, ).run().map_err(From::from) } + + fn search_mmap( + &mut self, + printer: &mut Printer, + path: &Path, + file: &File, + ) -> Result { + if try!(file.metadata()).len() == 0 { + // Opening a memory map with an empty file results in an error. + return Ok(0); + } + let mmap = try!(Mmap::open(file, Protection::Read)); + Ok(self.args.searcher_buffer( + printer, + &self.grep, + path, + unsafe { mmap.as_slice() }, + ).run()) + } } diff --git a/src/search.rs b/src/search.rs index 9739d51f..4f8ae10d 100644 --- a/src/search.rs +++ b/src/search.rs @@ -74,14 +74,14 @@ pub struct Searcher<'a, R, W: 'a> { /// Options for configuring search. #[derive(Clone)] -struct Options { - after_context: usize, - before_context: usize, - count: bool, - eol: u8, - invert_match: bool, - line_number: bool, - text: bool, +pub struct Options { + pub after_context: usize, + pub before_context: usize, + pub count: bool, + pub eol: u8, + pub invert_match: bool, + pub line_number: bool, + pub text: bool, } impl Default for Options { @@ -219,14 +219,11 @@ impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> { self.print_inverted_matches(upto); } } else if matched { - self.match_count += 1; - if !self.opts.count { - let start = self.last_match.start(); - let end = self.last_match.end(); - self.print_after_context(start); - self.print_before_context(start); - self.print_match(start, end); - } + let start = self.last_match.start(); + let end = self.last_match.end(); + self.print_after_context(start); + self.print_before_context(start); + self.print_match(start, end); } if matched { self.inp.pos = self.last_match.end(); @@ -275,11 +272,8 @@ impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> { debug_assert!(self.opts.invert_match); let mut it = IterLines::new(self.opts.eol, self.inp.pos); while let Some((start, end)) = it.next(&self.inp.buf[..upto]) { - if !self.opts.count { - self.print_match(start, end); - } + self.print_match(start, end); self.inp.pos = end; - self.match_count += 1; } } @@ -325,11 +319,15 @@ impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> { #[inline(always)] fn print_match(&mut self, start: usize, end: usize) { + self.match_count += 1; + if self.opts.count { + return; + } self.print_separator(start); self.count_lines(start); self.add_line(end); self.printer.matched( - self.grep.regex(), &self.path, + self.grep.regex(), self.path, &self.inp.buf, start, end, self.line_count); self.last_printed = end; self.after_context_remaining = self.opts.after_context; @@ -535,7 +533,7 @@ impl InputBuffer { /// /// Note that this may return both false positives and false negatives. #[inline(always)] -fn is_binary(buf: &[u8]) -> bool { +pub fn is_binary(buf: &[u8]) -> bool { if buf.len() >= 4 && &buf[0..4] == b"%PDF" { return true; } @@ -544,7 +542,7 @@ fn is_binary(buf: &[u8]) -> bool { /// Count the number of lines in the given buffer. #[inline(always)] -fn count_lines(mut buf: &[u8], eol: u8) -> u64 { +pub fn count_lines(mut buf: &[u8], eol: u8) -> u64 { let mut count = 0; while let Some(pos) = memchr(eol, buf) { count += 1; @@ -575,7 +573,7 @@ fn replace_buf(buf: &mut [u8], a: u8, b: u8) { /// advance over the positions of each line. We neglect that approach to avoid /// the borrow in the search code. (Because the borrow prevents composition /// through other mutable methods.) -struct IterLines { +pub struct IterLines { eol: u8, pos: usize, } @@ -585,7 +583,7 @@ impl IterLines { /// /// The buffer is passed to the `next` method. #[inline(always)] - fn new(eol: u8, start: usize) -> IterLines { + pub fn new(eol: u8, start: usize) -> IterLines { IterLines { eol: eol, pos: start, @@ -597,7 +595,7 @@ impl IterLines { /// /// The range returned includes the new line. #[inline(always)] - fn next(&mut self, buf: &[u8]) -> Option<(usize, usize)> { + pub fn next(&mut self, buf: &[u8]) -> Option<(usize, usize)> { match memchr(self.eol, &buf[self.pos..]) { None => { if self.pos < buf.len() { @@ -870,7 +868,7 @@ fn main() { } #[test] - fn basic_search() { + fn basic_search1() { let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s|s); assert_eq!(2, count); assert_eq!(out, "\ @@ -887,7 +885,6 @@ fn main() { assert_eq!(out, ""); } - #[test] fn binary_text() { let text = "Sherlock\n\x00Holmes\n"; diff --git a/src/search_buffer.rs b/src/search_buffer.rs new file mode 100644 index 00000000..48ff1ba0 --- /dev/null +++ b/src/search_buffer.rs @@ -0,0 +1,284 @@ +use std::cmp; +use std::io; +use std::path::Path; + +use grep::Grep; + +use printer::Printer; +use search::{IterLines, Options, count_lines, is_binary}; + +pub struct BufferSearcher<'a, W: 'a> { + opts: Options, + printer: &'a mut Printer, + grep: &'a Grep, + path: &'a Path, + buf: &'a [u8], + match_count: u64, + line_count: Option, + last_line: usize, +} + +impl<'a, W: Send + io::Write> BufferSearcher<'a, W> { + pub fn new( + printer: &'a mut Printer, + grep: &'a Grep, + path: &'a Path, + buf: &'a [u8], + ) -> BufferSearcher<'a, W> { + BufferSearcher { + opts: Options::default(), + printer: printer, + grep: grep, + path: path, + buf: buf, + match_count: 0, + line_count: None, + last_line: 0, + } + } + + /// If enabled, searching will print a count instead of each match. + /// + /// Disabled by default. + pub fn count(mut self, yes: bool) -> Self { + self.opts.count = yes; + self + } + + /// Set the end-of-line byte used by this searcher. + pub fn eol(mut self, eol: u8) -> Self { + self.opts.eol = eol; + self + } + + /// If enabled, matching is inverted so that lines that *don't* match the + /// given pattern are treated as matches. + pub fn invert_match(mut self, yes: bool) -> Self { + self.opts.invert_match = yes; + self + } + + /// If enabled, compute line numbers and prefix each line of output with + /// them. + pub fn line_number(mut self, yes: bool) -> Self { + self.opts.line_number = yes; + self + } + + /// If enabled, search binary files as if they were text. + pub fn text(mut self, yes: bool) -> Self { + self.opts.text = yes; + self + } + + #[inline(never)] + pub fn run(mut self) -> u64 { + let binary_upto = cmp::min(4096, self.buf.len()); + if !self.opts.text && is_binary(&self.buf[..binary_upto]) { + return 0; + } + + self.match_count = 0; + self.line_count = if self.opts.line_number { Some(0) } else { None }; + let mut last_end = 0; + for m in self.grep.iter(self.buf) { + if self.opts.invert_match { + self.print_inverted_matches(last_end, m.start()); + } else { + self.print_match(m.start(), m.end()); + } + last_end = m.end(); + } + if self.opts.invert_match { + let upto = self.buf.len(); + self.print_inverted_matches(last_end, upto); + } + if self.opts.count && self.match_count > 0 { + self.printer.path_count(self.path, self.match_count); + } + self.match_count + } + + #[inline(always)] + pub fn print_match(&mut self, start: usize, end: usize) { + self.match_count += 1; + if self.opts.count { + return; + } + self.count_lines(start); + self.add_line(end); + self.printer.matched( + self.grep.regex(), self.path, self.buf, + start, end, self.line_count); + } + + #[inline(always)] + fn print_inverted_matches(&mut self, start: usize, end: usize) { + debug_assert!(self.opts.invert_match); + let mut it = IterLines::new(self.opts.eol, start); + while let Some((s, e)) = it.next(&self.buf[..end]) { + self.print_match(s, e); + } + } + + #[inline(always)] + fn count_lines(&mut self, upto: usize) { + if let Some(ref mut line_count) = self.line_count { + *line_count += count_lines( + &self.buf[self.last_line..upto], self.opts.eol); + self.last_line = upto; + } + } + + #[inline(always)] + fn add_line(&mut self, line_end: usize) { + if let Some(ref mut line_count) = self.line_count { + *line_count += 1; + self.last_line = line_end; + } + } +} + +#[cfg(test)] +mod tests { + use std::path::Path; + + use grep::{Grep, GrepBuilder}; + + use printer::Printer; + + use super::BufferSearcher; + + lazy_static! { + static ref SHERLOCK: &'static str = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +Holmeses, success in the province of detective work must always +be, to a very large extent, the result of luck. Sherlock Holmes +can extract a clew from a wisp of straw or a flake of cigar ash; +but Doctor Watson has to have it taken out for him and dusted, +and exhibited clearly, with a label attached.\ +"; + static ref CODE: &'static str = "\ +extern crate snap; + +use std::io; + +fn main() { + let stdin = io::stdin(); + let stdout = io::stdout(); + + // Wrap the stdin reader in a Snappy reader. + let mut rdr = snap::Reader::new(stdin.lock()); + let mut wtr = stdout.lock(); + io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); +} +"; + } + + fn matcher(pat: &str) -> Grep { + GrepBuilder::new(pat).build().unwrap() + } + + fn test_path() -> &'static Path { + &Path::new("/baz.rs") + } + + type TestSearcher<'a> = BufferSearcher<'a, Vec>; + + fn search TestSearcher>( + pat: &str, + haystack: &str, + mut map: F, + ) -> (u64, String) { + let mut pp = Printer::new(vec![], false).with_filename(true); + let grep = GrepBuilder::new(pat).build().unwrap(); + let count = { + let searcher = BufferSearcher::new( + &mut pp, &grep, test_path(), haystack.as_bytes()); + map(searcher).run() + }; + (count, String::from_utf8(pp.into_inner()).unwrap()) + } + + #[test] + fn basic_search() { + let (count, out) = search("Sherlock", &*SHERLOCK, |s|s); + assert_eq!(2, count); + assert_eq!(out, "\ +/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock +/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes +"); + } + + #[test] + fn binary() { + let text = "Sherlock\n\x00Holmes\n"; + let (count, out) = search("Sherlock|Holmes", text, |s|s); + assert_eq!(0, count); + assert_eq!(out, ""); + } + + + #[test] + fn binary_text() { + let text = "Sherlock\n\x00Holmes\n"; + let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true)); + assert_eq!(2, count); + assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n"); + } + + #[test] + fn line_numbers() { + let (count, out) = search( + "Sherlock", &*SHERLOCK, |s| s.line_number(true)); + assert_eq!(2, count); + assert_eq!(out, "\ +/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock +/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes +"); + } + + #[test] + fn count() { + let (count, out) = search( + "Sherlock", &*SHERLOCK, |s| s.count(true)); + assert_eq!(2, count); + assert_eq!(out, "/baz.rs:2\n"); + } + + #[test] + fn invert_match() { + let (count, out) = search( + "Sherlock", &*SHERLOCK, |s| s.invert_match(true)); + assert_eq!(4, count); + assert_eq!(out, "\ +/baz.rs:Holmeses, success in the province of detective work must always +/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash; +/baz.rs:but Doctor Watson has to have it taken out for him and dusted, +/baz.rs:and exhibited clearly, with a label attached. +"); + } + + #[test] + fn invert_match_line_numbers() { + let (count, out) = search("Sherlock", &*SHERLOCK, |s| { + s.invert_match(true).line_number(true) + }); + assert_eq!(4, count); + assert_eq!(out, "\ +/baz.rs:2:Holmeses, success in the province of detective work must always +/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; +/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, +/baz.rs:6:and exhibited clearly, with a label attached. +"); + } + + #[test] + fn invert_match_count() { + let (count, out) = search("Sherlock", &*SHERLOCK, |s| { + s.invert_match(true).count(true) + }); + assert_eq!(4, count); + assert_eq!(out, "/baz.rs:4\n"); + } +}