mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-03-17 20:28:03 +02:00
Lots of progress:
- Refactored interaction between CLI args and rest of xrep. - Filling in a lot more options, including file type filtering. - Fixing some bugs in globbing/ignoring. - More documentation.
This commit is contained in:
parent
0bf278e72f
commit
812cdb13c6
551
src/args.rs
Normal file
551
src/args.rs
Normal file
@ -0,0 +1,551 @@
|
||||
use std::cmp;
|
||||
use std::env;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use docopt::Docopt;
|
||||
use env_logger;
|
||||
use grep::{Grep, GrepBuilder};
|
||||
use log;
|
||||
use num_cpus;
|
||||
use regex;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use gitignore::{Gitignore, GitignoreBuilder};
|
||||
use ignore::Ignore;
|
||||
use out::Out;
|
||||
use printer::Printer;
|
||||
use search::{InputBuffer, Searcher};
|
||||
use types::{FileTypeDef, Types, TypesBuilder};
|
||||
use walk;
|
||||
|
||||
use Result;
|
||||
|
||||
/// The Docopt usage string.
|
||||
///
|
||||
/// If you've never heard of Docopt before, see: http://docopt.org
|
||||
/// (TL;DR: The CLI parser is generated from the usage string below.)
|
||||
const USAGE: &'static str = "
|
||||
Usage: xrep [options] <pattern> [<path> ...]
|
||||
xrep [options] --files [<path> ...]
|
||||
xrep [options] --type-list
|
||||
xrep --help
|
||||
xrep --version
|
||||
|
||||
xrep is like the silver searcher and grep, but faster than both.
|
||||
|
||||
Common options:
|
||||
-a, --text Search binary files as if they were text.
|
||||
-c, --count Only show count of line matches for each file.
|
||||
-g, --glob GLOB ... Include or exclude files for searching that
|
||||
match the given glob. This always overrides any
|
||||
other ignore logic. Multiple glob flags may be
|
||||
used. Globbing rules match .gitignore globs.
|
||||
Precede a glob with a '!' to exclude it.
|
||||
-h, --help Show this usage message.
|
||||
-i, --ignore-case Case insensitive search.
|
||||
-n, --line-number Show line numbers (1-based).
|
||||
-q, --quiet Do not print anything to stdout.
|
||||
-t, --type TYPE ... Only search files matching TYPE. Multiple type
|
||||
flags may be provided. Use the --type-list flag
|
||||
to list all available types.
|
||||
-T, --type-not TYPE ... Do not search files matching TYPE. Multiple
|
||||
not-type flags may be provided.
|
||||
-v, --invert-match Invert matching.
|
||||
-w, --word-regexp Only show matches surrounded by word boundaries.
|
||||
This is equivalent to putting \\b before and
|
||||
after the search pattern.
|
||||
|
||||
Less common options:
|
||||
-A, --after-context NUM
|
||||
Show NUM lines after each match.
|
||||
|
||||
-B, --before-context NUM
|
||||
Show NUM lines before each match.
|
||||
|
||||
-C, --context NUM
|
||||
Show NUM lines before and after each match.
|
||||
|
||||
--context-separator ARG
|
||||
The string to use when separating non-continuous context lines. Escape
|
||||
sequences may be used. [default: --]
|
||||
|
||||
--debug
|
||||
Show debug messages.
|
||||
|
||||
--files
|
||||
Print each file that would be searched (but don't search).
|
||||
|
||||
-H, --with-filename
|
||||
Prefix each match with the file name that contains it. This is the
|
||||
default when more than one file is searched.
|
||||
|
||||
--hidden
|
||||
Search hidden directories and files.
|
||||
|
||||
-L, --follow
|
||||
Follow symlinks.
|
||||
|
||||
--line-terminator ARG
|
||||
The byte to use for a line terminator. Escape sequences may be used.
|
||||
[default: \\n]
|
||||
|
||||
--no-ignore
|
||||
Don't respect ignore files (.gitignore, .xrepignore, etc.)
|
||||
|
||||
-Q, --literal
|
||||
Treat the pattern as a literal string instead of a regular expression.
|
||||
|
||||
--threads ARG
|
||||
The number of threads to use. Defaults to the number of logical CPUs
|
||||
(capped at 6). [default: 0]
|
||||
|
||||
--version
|
||||
Show the version number of xrep and exit.
|
||||
|
||||
File type management options:
|
||||
--type-list
|
||||
Show all supported file types and their associated globs.
|
||||
|
||||
--type-add ARG ...
|
||||
Add a new glob for a particular file type.
|
||||
Example: --type-add html:*.html,*.htm
|
||||
|
||||
--type-clear TYPE ...
|
||||
Clear the file type globs for TYPE.
|
||||
";
|
||||
|
||||
/// RawArgs are the args as they are parsed from Docopt. They aren't used
|
||||
/// directly by the rest of xrep.
|
||||
#[derive(Debug, RustcDecodable)]
|
||||
pub struct RawArgs {
|
||||
arg_pattern: String,
|
||||
arg_path: Vec<String>,
|
||||
flag_after_context: usize,
|
||||
flag_before_context: usize,
|
||||
flag_context: usize,
|
||||
flag_context_separator: String,
|
||||
flag_count: bool,
|
||||
flag_debug: bool,
|
||||
flag_files: bool,
|
||||
flag_follow: bool,
|
||||
flag_glob: Vec<String>,
|
||||
flag_hidden: bool,
|
||||
flag_ignore_case: bool,
|
||||
flag_invert_match: bool,
|
||||
flag_line_number: bool,
|
||||
flag_line_terminator: String,
|
||||
flag_literal: bool,
|
||||
flag_no_ignore: bool,
|
||||
flag_quiet: bool,
|
||||
flag_text: bool,
|
||||
flag_threads: usize,
|
||||
flag_type: Vec<String>,
|
||||
flag_type_not: Vec<String>,
|
||||
flag_type_list: bool,
|
||||
flag_type_add: Vec<String>,
|
||||
flag_type_clear: Vec<String>,
|
||||
flag_with_filename: bool,
|
||||
flag_word_regexp: bool,
|
||||
}
|
||||
|
||||
/// Args are transformed/normalized from RawArgs.
|
||||
#[derive(Debug)]
|
||||
pub struct Args {
|
||||
pattern: String,
|
||||
paths: Vec<PathBuf>,
|
||||
after_context: usize,
|
||||
before_context: usize,
|
||||
context_separator: Vec<u8>,
|
||||
count: bool,
|
||||
eol: u8,
|
||||
files: bool,
|
||||
follow: bool,
|
||||
glob_overrides: Option<Gitignore>,
|
||||
hidden: bool,
|
||||
ignore_case: bool,
|
||||
invert_match: bool,
|
||||
line_number: bool,
|
||||
no_ignore: bool,
|
||||
quiet: bool,
|
||||
text: bool,
|
||||
threads: usize,
|
||||
type_defs: Vec<FileTypeDef>,
|
||||
type_list: bool,
|
||||
types: Types,
|
||||
with_filename: bool,
|
||||
}
|
||||
|
||||
impl RawArgs {
|
||||
/// Convert arguments parsed into a configuration used by xrep.
|
||||
fn to_args(&self) -> Result<Args> {
|
||||
let pattern = {
|
||||
let pattern =
|
||||
if self.flag_literal {
|
||||
regex::quote(&self.arg_pattern)
|
||||
} else {
|
||||
self.arg_pattern.clone()
|
||||
};
|
||||
if self.flag_word_regexp {
|
||||
format!(r"\b{}\b", pattern)
|
||||
} else {
|
||||
pattern
|
||||
}
|
||||
};
|
||||
let paths =
|
||||
if self.arg_path.is_empty() {
|
||||
vec![Path::new("./").to_path_buf()]
|
||||
} else {
|
||||
self.arg_path.iter().map(|p| {
|
||||
Path::new(p).to_path_buf()
|
||||
}).collect()
|
||||
};
|
||||
let (after_context, before_context) =
|
||||
if self.flag_context > 0 {
|
||||
(self.flag_context, self.flag_context)
|
||||
} else {
|
||||
(self.flag_after_context, self.flag_before_context)
|
||||
};
|
||||
let eol = {
|
||||
let eol = unescape(&self.flag_line_terminator);
|
||||
if eol.is_empty() {
|
||||
errored!("Empty line terminator is not allowed.");
|
||||
} else if eol.len() > 1 {
|
||||
errored!("Line terminators are limited to exactly 1 byte.");
|
||||
}
|
||||
eol[0]
|
||||
};
|
||||
let glob_overrides =
|
||||
if self.flag_glob.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let cwd = try!(env::current_dir());
|
||||
let mut bgi = GitignoreBuilder::new(cwd);
|
||||
for pat in &self.flag_glob {
|
||||
try!(bgi.add("<argv>", pat));
|
||||
}
|
||||
Some(try!(bgi.build()))
|
||||
};
|
||||
let threads =
|
||||
if self.flag_threads == 0 {
|
||||
cmp::min(6, num_cpus::get())
|
||||
} else {
|
||||
self.flag_threads
|
||||
};
|
||||
let mut with_filename = self.flag_with_filename;
|
||||
if !with_filename {
|
||||
with_filename = paths.len() > 1 || paths[0].is_dir();
|
||||
}
|
||||
let mut btypes = TypesBuilder::new();
|
||||
btypes.add_defaults();
|
||||
try!(self.add_types(&mut btypes));
|
||||
let types = try!(btypes.build());
|
||||
Ok(Args {
|
||||
pattern: pattern,
|
||||
paths: paths,
|
||||
after_context: after_context,
|
||||
before_context: before_context,
|
||||
context_separator: unescape(&self.flag_context_separator),
|
||||
count: self.flag_count,
|
||||
eol: eol,
|
||||
files: self.flag_files,
|
||||
follow: self.flag_follow,
|
||||
glob_overrides: glob_overrides,
|
||||
hidden: self.flag_hidden,
|
||||
ignore_case: self.flag_ignore_case,
|
||||
invert_match: self.flag_invert_match,
|
||||
line_number: self.flag_line_number,
|
||||
no_ignore: self.flag_no_ignore,
|
||||
quiet: self.flag_quiet,
|
||||
text: self.flag_text,
|
||||
threads: threads,
|
||||
type_defs: btypes.definitions(),
|
||||
type_list: self.flag_type_list,
|
||||
types: types,
|
||||
with_filename: with_filename,
|
||||
})
|
||||
}
|
||||
|
||||
fn add_types(&self, types: &mut TypesBuilder) -> Result<()> {
|
||||
for ty in &self.flag_type_clear {
|
||||
types.clear(ty);
|
||||
}
|
||||
for def in &self.flag_type_add {
|
||||
try!(types.add_def(def));
|
||||
}
|
||||
for ty in &self.flag_type {
|
||||
types.select(ty);
|
||||
}
|
||||
for ty in &self.flag_type_not {
|
||||
types.select_not(ty);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Args {
|
||||
/// Parse the command line arguments for this process.
|
||||
///
|
||||
/// If a CLI usage error occurred, then exit the process and print a usage
|
||||
/// or error message. Similarly, if the user requested the version of
|
||||
/// xrep, then print the version and exit.
|
||||
///
|
||||
/// Also, initialize a global logger.
|
||||
pub fn parse() -> Result<Args> {
|
||||
let raw: RawArgs =
|
||||
Docopt::new(USAGE)
|
||||
.and_then(|d| d.version(Some(version())).decode())
|
||||
.unwrap_or_else(|e| e.exit());
|
||||
|
||||
let mut logb = env_logger::LogBuilder::new();
|
||||
if raw.flag_debug {
|
||||
logb.filter(None, log::LogLevelFilter::Debug);
|
||||
} else {
|
||||
logb.filter(None, log::LogLevelFilter::Warn);
|
||||
}
|
||||
if let Err(err) = logb.init() {
|
||||
errored!("failed to initialize logger: {}", err);
|
||||
}
|
||||
|
||||
raw.to_args().map_err(From::from)
|
||||
}
|
||||
|
||||
/// Returns true if xrep should print the files it will search and exit
|
||||
/// (but not do any actual searching).
|
||||
pub fn files(&self) -> bool {
|
||||
self.files
|
||||
}
|
||||
|
||||
/// Create a new line based matcher. The matcher returned can be used
|
||||
/// across multiple threads simultaneously. This matcher only supports
|
||||
/// basic searching of regular expressions in a single buffer.
|
||||
///
|
||||
/// The pattern and other flags are taken from the command line.
|
||||
pub fn grep(&self) -> Result<Grep> {
|
||||
GrepBuilder::new(&self.pattern)
|
||||
.case_insensitive(self.ignore_case)
|
||||
.line_terminator(self.eol)
|
||||
.build()
|
||||
.map_err(From::from)
|
||||
}
|
||||
|
||||
/// Creates a new input buffer that is used in searching.
|
||||
pub fn input_buffer(&self) -> InputBuffer {
|
||||
let mut inp = InputBuffer::new();
|
||||
inp.eol(self.eol);
|
||||
inp
|
||||
}
|
||||
|
||||
/// Create a new printer of individual search results that writes to the
|
||||
/// writer given.
|
||||
pub fn printer<W: io::Write>(&self, wtr: W) -> Printer<W> {
|
||||
Printer::new(wtr)
|
||||
.context_separator(self.context_separator.clone())
|
||||
.eol(self.eol)
|
||||
.quiet(self.quiet)
|
||||
.with_filename(self.with_filename)
|
||||
}
|
||||
|
||||
/// Create a new printer of search results for an entire file that writes
|
||||
/// to the writer given.
|
||||
pub fn out<W: io::Write>(&self, wtr: W) -> Out<W> {
|
||||
let mut out = Out::new(wtr);
|
||||
if self.before_context > 0 || self.after_context > 0 {
|
||||
out = out.file_separator(self.context_separator.clone());
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Return the paths that should be searched.
|
||||
pub fn paths(&self) -> &[PathBuf] {
|
||||
&self.paths
|
||||
}
|
||||
|
||||
/// Create a new line based searcher whose configuration is taken from the
|
||||
/// command line. This searcher supports a dizzying array of features:
|
||||
/// inverted matching, line counting, context control and more.
|
||||
pub fn searcher<'a, R: io::Read, W: io::Write>(
|
||||
&self,
|
||||
inp: &'a mut InputBuffer,
|
||||
printer: &'a mut Printer<W>,
|
||||
grep: &'a Grep,
|
||||
path: &'a Path,
|
||||
rdr: R,
|
||||
) -> Searcher<'a, R, W> {
|
||||
Searcher::new(inp, printer, grep, path, rdr)
|
||||
.after_context(self.after_context)
|
||||
.before_context(self.before_context)
|
||||
.count(self.count)
|
||||
.eol(self.eol)
|
||||
.line_number(self.line_number)
|
||||
.invert_match(self.invert_match)
|
||||
.text(self.text)
|
||||
}
|
||||
|
||||
/// Returns the number of worker search threads that should be used.
|
||||
pub fn threads(&self) -> usize {
|
||||
self.threads
|
||||
}
|
||||
|
||||
/// Returns a list of type definitions currently loaded.
|
||||
pub fn type_defs(&self) -> &[FileTypeDef] {
|
||||
&self.type_defs
|
||||
}
|
||||
|
||||
/// Returns true if xrep should print the type definitions currently loaded
|
||||
/// and then exit.
|
||||
pub fn type_list(&self) -> bool {
|
||||
self.type_list
|
||||
}
|
||||
|
||||
/// Create a new recursive directory iterator at the path given.
|
||||
pub fn walker(&self, path: &Path) -> walk::Iter {
|
||||
let wd = WalkDir::new(path).follow_links(self.follow);
|
||||
let mut ig = Ignore::new();
|
||||
ig.ignore_hidden(!self.hidden);
|
||||
ig.no_ignore(self.no_ignore);
|
||||
ig.add_types(self.types.clone());
|
||||
if let Some(ref overrides) = self.glob_overrides {
|
||||
ig.add_override(overrides.clone());
|
||||
}
|
||||
walk::Iter::new(ig, wd)
|
||||
}
|
||||
}
|
||||
|
||||
fn version() -> String {
|
||||
let (maj, min, pat) = (
|
||||
option_env!("CARGO_PKG_VERSION_MAJOR"),
|
||||
option_env!("CARGO_PKG_VERSION_MINOR"),
|
||||
option_env!("CARGO_PKG_VERSION_PATCH"),
|
||||
);
|
||||
match (maj, min, pat) {
|
||||
(Some(maj), Some(min), Some(pat)) =>
|
||||
format!("{}.{}.{}", maj, min, pat),
|
||||
_ => "".to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
/// A single state in the state machine used by `unescape`.
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
enum State {
|
||||
Escape,
|
||||
HexFirst,
|
||||
HexSecond(char),
|
||||
Literal,
|
||||
}
|
||||
|
||||
/// Unescapes a string given on the command line. It supports a limit set of
|
||||
/// escape sequences:
|
||||
///
|
||||
/// * \t, \r and \n are mapped to their corresponding ASCII bytes.
|
||||
/// * \xZZ hexadecimal escapes are mapped to their byte.
|
||||
fn unescape(s: &str) -> Vec<u8> {
|
||||
use self::State::*;
|
||||
|
||||
let mut bytes = vec![];
|
||||
let mut state = Literal;
|
||||
for c in s.chars() {
|
||||
match state {
|
||||
Escape => {
|
||||
match c {
|
||||
'n' => { bytes.push(b'\n'); state = Literal; }
|
||||
'r' => { bytes.push(b'\r'); state = Literal; }
|
||||
't' => { bytes.push(b'\t'); state = Literal; }
|
||||
'x' => { state = HexFirst; }
|
||||
c => {
|
||||
bytes.extend(&format!(r"\{}", c).into_bytes());
|
||||
state = Literal;
|
||||
}
|
||||
}
|
||||
}
|
||||
HexFirst => {
|
||||
match c {
|
||||
'0'...'9' | 'A'...'F' | 'a'...'f' => {
|
||||
state = HexSecond(c);
|
||||
}
|
||||
c => {
|
||||
bytes.extend(&format!(r"\x{}", c).into_bytes());
|
||||
state = Literal;
|
||||
}
|
||||
}
|
||||
}
|
||||
HexSecond(first) => {
|
||||
match c {
|
||||
'0'...'9' | 'A'...'F' | 'a'...'f' => {
|
||||
let ordinal = format!("{}{}", first, c);
|
||||
let byte = u8::from_str_radix(&ordinal, 16).unwrap();
|
||||
bytes.push(byte);
|
||||
state = Literal;
|
||||
}
|
||||
c => {
|
||||
let original = format!(r"\x{}{}", first, c);
|
||||
bytes.extend(&original.into_bytes());
|
||||
state = Literal;
|
||||
}
|
||||
}
|
||||
}
|
||||
Literal => {
|
||||
match c {
|
||||
'\\' => { state = Escape; }
|
||||
c => { bytes.extend(c.to_string().as_bytes()); }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
match state {
|
||||
Escape => bytes.push(b'\\'),
|
||||
HexFirst => bytes.extend(b"\\x"),
|
||||
HexSecond(c) => bytes.extend(&format!("\\x{}", c).into_bytes()),
|
||||
Literal => {}
|
||||
}
|
||||
bytes
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::unescape;
|
||||
|
||||
fn b(bytes: &'static [u8]) -> Vec<u8> {
|
||||
bytes.to_vec()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_nul() {
|
||||
assert_eq!(b(b"\x00"), unescape(r"\x00"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_nl() {
|
||||
assert_eq!(b(b"\n"), unescape(r"\n"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_tab() {
|
||||
assert_eq!(b(b"\t"), unescape(r"\t"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_carriage() {
|
||||
assert_eq!(b(b"\r"), unescape(r"\r"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_nothing_simple() {
|
||||
assert_eq!(b(b"\\a"), unescape(r"\a"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_nothing_hex0() {
|
||||
assert_eq!(b(b"\\x"), unescape(r"\x"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_nothing_hex1() {
|
||||
assert_eq!(b(b"\\xz"), unescape(r"\xz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_nothing_hex2() {
|
||||
assert_eq!(b(b"\\xzz"), unescape(r"\xzz"));
|
||||
}
|
||||
}
|
@ -79,6 +79,7 @@ impl From<io::Error> for Error {
|
||||
}
|
||||
|
||||
/// Gitignore is a matcher for the glob patterns in a single gitignore file.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Gitignore {
|
||||
set: glob::Set,
|
||||
root: PathBuf,
|
||||
@ -136,22 +137,26 @@ impl Gitignore {
|
||||
pub fn matched_utf8(&self, path: &str, is_dir: bool) -> Match {
|
||||
// A single regex with a bunch of alternations of glob patterns is
|
||||
// unfortunately typically faster than a regex, so we use it as a
|
||||
// first pass filter. We still need to run the RegexSet to most
|
||||
// first pass filter. We still need to run the RegexSet to get the most
|
||||
// recently defined glob that matched.
|
||||
if !self.set.is_match(path) {
|
||||
return Match::None;
|
||||
}
|
||||
let pat = match self.set.matches(path).iter().last() {
|
||||
None => return Match::None,
|
||||
Some(i) => &self.patterns[i],
|
||||
};
|
||||
if pat.whitelist {
|
||||
Match::Whitelist(&pat)
|
||||
} else if !pat.only_dir || is_dir {
|
||||
Match::Ignored(&pat)
|
||||
} else {
|
||||
Match::None
|
||||
// The regex set can't actually pick the right glob that matched all
|
||||
// on its own. In particular, some globs require that only directories
|
||||
// can match. Thus, only accept a match from the regex set if the given
|
||||
// path satisfies the corresponding glob's directory criteria.
|
||||
for i in self.set.matches(path).iter().rev() {
|
||||
let pat = &self.patterns[i];
|
||||
if !pat.only_dir || is_dir {
|
||||
return if pat.whitelist {
|
||||
Match::Whitelist(pat)
|
||||
} else {
|
||||
Match::Ignored(pat)
|
||||
};
|
||||
}
|
||||
}
|
||||
Match::None
|
||||
}
|
||||
}
|
||||
|
||||
@ -177,6 +182,24 @@ impl<'a> Match<'a> {
|
||||
Match::None | Match::Whitelist(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the match result didn't match any globs.
|
||||
pub fn is_none(&self) -> bool {
|
||||
match *self {
|
||||
Match::None => true,
|
||||
Match::Ignored(_) | Match::Whitelist(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Inverts the match so that Ignored becomes Whitelisted and Whitelisted
|
||||
/// becomes Ignored. A non-match remains the same.
|
||||
pub fn invert(self) -> Match<'a> {
|
||||
match self {
|
||||
Match::None => Match::None,
|
||||
Match::Ignored(pat) => Match::Whitelist(pat),
|
||||
Match::Whitelist(pat) => Match::Ignored(pat),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// GitignoreBuilder constructs a matcher for a single set of globs from a
|
||||
@ -231,7 +254,6 @@ impl GitignoreBuilder {
|
||||
/// Add each pattern line from the file path given.
|
||||
pub fn add_path<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
|
||||
let rdr = io::BufReader::new(try!(File::open(&path)));
|
||||
// println!("adding ignores from: {}", path.as_ref().display());
|
||||
for line in rdr.lines() {
|
||||
try!(self.add(&path, &try!(line)));
|
||||
}
|
||||
|
@ -77,6 +77,8 @@ impl Set {
|
||||
/// Returns every glob pattern (by sequence number) that matches the given
|
||||
/// path.
|
||||
pub fn matches<T: AsRef<[u8]>>(&self, path: T) -> SetMatches {
|
||||
// TODO(burntsushi): If we split this out into a separate crate, don't
|
||||
// expose the regex::SetMatches type in the public API.
|
||||
self.set.matches(path.as_ref())
|
||||
}
|
||||
|
||||
|
@ -18,6 +18,7 @@ use std::fmt;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder, Match};
|
||||
use types::Types;
|
||||
|
||||
/// Represents an error that can occur when parsing a gitignore file.
|
||||
#[derive(Debug)]
|
||||
@ -56,7 +57,13 @@ pub struct Ignore {
|
||||
/// A stack of ignore patterns at each directory level of traversal.
|
||||
/// A directory that contributes no ignore patterns is `None`.
|
||||
stack: Vec<Option<IgnoreDir>>,
|
||||
/// A set of override globs that are always checked first. A match (whether
|
||||
/// it's whitelist or blacklist) trumps anything in stack.
|
||||
overrides: Option<Gitignore>,
|
||||
/// A file type matcher.
|
||||
types: Option<Types>,
|
||||
ignore_hidden: bool,
|
||||
no_ignore: bool,
|
||||
}
|
||||
|
||||
impl Ignore {
|
||||
@ -64,7 +71,10 @@ impl Ignore {
|
||||
pub fn new() -> Ignore {
|
||||
Ignore {
|
||||
stack: vec![],
|
||||
overrides: None,
|
||||
types: None,
|
||||
ignore_hidden: true,
|
||||
no_ignore: false,
|
||||
}
|
||||
}
|
||||
|
||||
@ -74,11 +84,34 @@ impl Ignore {
|
||||
self
|
||||
}
|
||||
|
||||
/// When set, ignore files are ignored.
|
||||
pub fn no_ignore(&mut self, yes: bool) -> &mut Ignore {
|
||||
self.no_ignore = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a set of globs that overrides all other match logic.
|
||||
pub fn add_override(&mut self, gi: Gitignore) -> &mut Ignore {
|
||||
self.overrides = Some(gi);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a file type matcher. The file type matcher has the lowest
|
||||
/// precedence.
|
||||
pub fn add_types(&mut self, types: Types) -> &mut Ignore {
|
||||
self.types = Some(types);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a directory to the stack.
|
||||
///
|
||||
/// Note that even if this returns an error, the directory is added to the
|
||||
/// stack (and therefore should be popped).
|
||||
pub fn push<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
|
||||
if self.no_ignore {
|
||||
self.stack.push(None);
|
||||
return Ok(());
|
||||
}
|
||||
match IgnoreDir::new(path) {
|
||||
Ok(id) => {
|
||||
self.stack.push(id);
|
||||
@ -102,24 +135,57 @@ impl Ignore {
|
||||
/// Returns true if and only if the given file path should be ignored.
|
||||
pub fn ignored<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> bool {
|
||||
let path = path.as_ref();
|
||||
if let Some(ref overrides) = self.overrides {
|
||||
let mat = overrides.matched(path, is_dir).invert();
|
||||
if let Some(is_ignored) = self.ignore_match(path, mat) {
|
||||
return is_ignored;
|
||||
}
|
||||
}
|
||||
if self.ignore_hidden && is_hidden(&path) {
|
||||
debug!("{} ignored because it is hidden", path.display());
|
||||
return true;
|
||||
}
|
||||
for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
|
||||
match id.matched(path, is_dir) {
|
||||
Match::Whitelist(ref pat) => {
|
||||
debug!("{} whitelisted by {:?}", path.display(), pat);
|
||||
return false;
|
||||
}
|
||||
Match::Ignored(ref pat) => {
|
||||
debug!("{} ignored by {:?}", path.display(), pat);
|
||||
let mat = id.matched(path, is_dir);
|
||||
if let Some(is_ignored) = self.ignore_match(path, mat) {
|
||||
if is_ignored {
|
||||
return true;
|
||||
}
|
||||
Match::None => {}
|
||||
// If this path is whitelisted by an ignore, then fallthrough
|
||||
// and let the file type matcher have a say.
|
||||
break;
|
||||
}
|
||||
}
|
||||
if let Some(ref types) = self.types {
|
||||
let mat = types.matched(path, is_dir);
|
||||
if let Some(is_ignored) = self.ignore_match(path, mat) {
|
||||
return is_ignored;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Returns true if the given match says the given pattern should be
|
||||
/// ignored or false if the given pattern should be explicitly whitelisted.
|
||||
/// Returns None otherwise.
|
||||
pub fn ignore_match<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
mat: Match,
|
||||
) -> Option<bool> {
|
||||
let path = path.as_ref();
|
||||
match mat {
|
||||
Match::Whitelist(ref pat) => {
|
||||
debug!("{} whitelisted by {:?}", path.display(), pat);
|
||||
Some(false)
|
||||
}
|
||||
Match::Ignored(ref pat) => {
|
||||
debug!("{} ignored by {:?}", path.display(), pat);
|
||||
Some(true)
|
||||
}
|
||||
Match::None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// IgnoreDir represents a set of ignore patterns retrieved from a single
|
||||
|
286
src/main.rs
286
src/main.rs
@ -19,7 +19,6 @@ extern crate rustc_serialize;
|
||||
extern crate thread_local;
|
||||
extern crate walkdir;
|
||||
|
||||
use std::cmp;
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::{self, Write};
|
||||
@ -30,14 +29,13 @@ use std::sync::Arc;
|
||||
use std::thread;
|
||||
|
||||
use crossbeam::sync::chase_lev::{self, Steal, Stealer};
|
||||
use docopt::Docopt;
|
||||
use grep::{Grep, GrepBuilder};
|
||||
use grep::Grep;
|
||||
use parking_lot::Mutex;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use ignore::Ignore;
|
||||
use args::Args;
|
||||
use out::Out;
|
||||
use printer::Printer;
|
||||
use search::{InputBuffer, Searcher};
|
||||
use search::InputBuffer;
|
||||
|
||||
macro_rules! errored {
|
||||
($($tt:tt)*) => {
|
||||
@ -52,64 +50,22 @@ macro_rules! eprintln {
|
||||
}}
|
||||
}
|
||||
|
||||
mod args;
|
||||
mod gitignore;
|
||||
mod glob;
|
||||
mod ignore;
|
||||
mod out;
|
||||
mod printer;
|
||||
mod search;
|
||||
mod types;
|
||||
mod walk;
|
||||
|
||||
const USAGE: &'static str = "
|
||||
Usage: xrep [options] <pattern> [<path> ...]
|
||||
xrep --files [<path> ...]
|
||||
|
||||
xrep is like the silver searcher and grep, but faster than both.
|
||||
|
||||
WARNING: Searching stdin isn't yet supported.
|
||||
|
||||
Options:
|
||||
-c, --count Suppress normal output and show count of line
|
||||
matches.
|
||||
-A, --after-context NUM Show NUM lines after each match.
|
||||
-B, --before-context NUM Show NUM lines before each match.
|
||||
-C, --context NUM Show NUM lines before and after each match.
|
||||
--debug Show debug messages.
|
||||
--files Print each file that would be searched
|
||||
(but don't search).
|
||||
--hidden Search hidden directories and files.
|
||||
-i, --ignore-case Case insensitive search.
|
||||
-L, --follow Follow symlinks.
|
||||
-n, --line-number Show line numbers (1-based).
|
||||
-t, --threads ARG The number of threads to use. Defaults to the
|
||||
number of logical CPUs. [default: 0]
|
||||
-v, --invert-match Invert matching.
|
||||
";
|
||||
|
||||
#[derive(RustcDecodable)]
|
||||
struct Args {
|
||||
arg_pattern: String,
|
||||
arg_path: Vec<String>,
|
||||
flag_after_context: usize,
|
||||
flag_before_context: usize,
|
||||
flag_context: usize,
|
||||
flag_count: bool,
|
||||
flag_debug: bool,
|
||||
flag_files: bool,
|
||||
flag_follow: bool,
|
||||
flag_hidden: bool,
|
||||
flag_ignore_case: bool,
|
||||
flag_invert_match: bool,
|
||||
flag_line_number: bool,
|
||||
flag_threads: usize,
|
||||
}
|
||||
|
||||
pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;
|
||||
|
||||
fn main() {
|
||||
let args: Args = Docopt::new(USAGE).and_then(|d| d.decode())
|
||||
.unwrap_or_else(|e| e.exit());
|
||||
match run(args) {
|
||||
Ok(_) => process::exit(0),
|
||||
match Args::parse().and_then(run) {
|
||||
Ok(count) if count == 0 => process::exit(1),
|
||||
Ok(count) => process::exit(0),
|
||||
Err(err) => {
|
||||
let _ = writeln!(&mut io::stderr(), "{}", err);
|
||||
process::exit(1);
|
||||
@ -117,194 +73,158 @@ fn main() {
|
||||
}
|
||||
}
|
||||
|
||||
fn run(mut args: Args) -> Result<()> {
|
||||
let mut logb = env_logger::LogBuilder::new();
|
||||
if args.flag_debug {
|
||||
logb.filter(None, log::LogLevelFilter::Debug);
|
||||
} else {
|
||||
logb.filter(None, log::LogLevelFilter::Warn);
|
||||
}
|
||||
if let Err(err) = logb.init() {
|
||||
errored!("failed to initialize logger: {}", err);
|
||||
}
|
||||
|
||||
if args.arg_path.is_empty() {
|
||||
args.arg_path.push("./".to_string());
|
||||
}
|
||||
if args.arg_path.iter().any(|p| p == "-") {
|
||||
errored!("searching <stdin> isn't yet supported");
|
||||
}
|
||||
if args.flag_files {
|
||||
fn run(args: Args) -> Result<u64> {
|
||||
if args.files() {
|
||||
return run_files(args);
|
||||
}
|
||||
if args.type_list() {
|
||||
return run_types(args);
|
||||
}
|
||||
let args = Arc::new(args);
|
||||
let out = Arc::new(Mutex::new(args.out(io::stdout())));
|
||||
let mut workers = vec![];
|
||||
let out = Arc::new(Mutex::new(Out::new(args.clone(), io::stdout())));
|
||||
|
||||
let mut chan_work_send = {
|
||||
let (worker, stealer) = chase_lev::deque();
|
||||
for _ in 0..args.num_workers() {
|
||||
let grepb =
|
||||
GrepBuilder::new(&args.arg_pattern)
|
||||
.case_insensitive(args.flag_ignore_case);
|
||||
let mut workq = {
|
||||
let (workq, stealer) = chase_lev::deque();
|
||||
for _ in 0..args.threads() {
|
||||
let worker = Worker {
|
||||
args: args.clone(),
|
||||
out: out.clone(),
|
||||
chan_work: stealer.clone(),
|
||||
inpbuf: InputBuffer::new(),
|
||||
inpbuf: args.input_buffer(),
|
||||
outbuf: Some(vec![]),
|
||||
grep: try!(grepb.build()),
|
||||
grep: try!(args.grep()),
|
||||
};
|
||||
workers.push(thread::spawn(move || worker.run()));
|
||||
}
|
||||
worker
|
||||
workq
|
||||
};
|
||||
|
||||
for p in &args.arg_path {
|
||||
for path in args.walker(p) {
|
||||
chan_work_send.push(Message::Some(path));
|
||||
for p in args.paths() {
|
||||
if p == Path::new("-") {
|
||||
workq.push(Work::Stdin)
|
||||
} else {
|
||||
for path in args.walker(p) {
|
||||
workq.push(Work::File(path));
|
||||
}
|
||||
}
|
||||
}
|
||||
for _ in 0..workers.len() {
|
||||
chan_work_send.push(Message::Quit);
|
||||
workq.push(Work::Quit);
|
||||
}
|
||||
let mut match_count = 0;
|
||||
for worker in workers {
|
||||
worker.join().unwrap();
|
||||
match_count += worker.join().unwrap();
|
||||
}
|
||||
Ok(())
|
||||
Ok(match_count)
|
||||
}
|
||||
|
||||
fn run_files(args: Args) -> Result<()> {
|
||||
fn run_files(args: Args) -> Result<u64> {
|
||||
let mut printer = Printer::new(io::BufWriter::new(io::stdout()));
|
||||
for p in &args.arg_path {
|
||||
for path in args.walker(p) {
|
||||
printer.path(path);
|
||||
let mut file_count = 0;
|
||||
for p in args.paths() {
|
||||
if p == Path::new("-") {
|
||||
printer.path(&Path::new("<stdin>"));
|
||||
file_count += 1;
|
||||
} else {
|
||||
for path in args.walker(p) {
|
||||
printer.path(path);
|
||||
file_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
Ok(file_count)
|
||||
}
|
||||
|
||||
impl Args {
|
||||
fn printer<W: io::Write>(&self, wtr: W) -> Printer<W> {
|
||||
Printer::new(wtr)
|
||||
}
|
||||
|
||||
fn num_workers(&self) -> usize {
|
||||
let mut num = self.flag_threads;
|
||||
if num == 0 {
|
||||
num = cmp::min(8, num_cpus::get());
|
||||
}
|
||||
num
|
||||
}
|
||||
|
||||
fn walker<P: AsRef<Path>>(&self, path: P) -> walk::Iter {
|
||||
let wd = WalkDir::new(path).follow_links(self.flag_follow);
|
||||
let mut ig = Ignore::new();
|
||||
ig.ignore_hidden(!self.flag_hidden);
|
||||
walk::Iter::new(ig, wd)
|
||||
}
|
||||
|
||||
fn before_context(&self) -> usize {
|
||||
if self.flag_context > 0 {
|
||||
self.flag_context
|
||||
} else {
|
||||
self.flag_before_context
|
||||
}
|
||||
}
|
||||
|
||||
fn after_context(&self) -> usize {
|
||||
if self.flag_context > 0 {
|
||||
self.flag_context
|
||||
} else {
|
||||
self.flag_after_context
|
||||
}
|
||||
}
|
||||
|
||||
fn has_context(&self) -> bool {
|
||||
self.before_context() > 0 || self.after_context() > 0
|
||||
fn run_types(args: Args) -> Result<u64> {
|
||||
let mut printer = Printer::new(io::BufWriter::new(io::stdout()));
|
||||
let mut ty_count = 0;
|
||||
for def in args.type_defs() {
|
||||
printer.type_def(def);
|
||||
ty_count += 1;
|
||||
}
|
||||
Ok(ty_count)
|
||||
}
|
||||
|
||||
enum Message<T> {
|
||||
Some(T),
|
||||
enum Work {
|
||||
File(PathBuf),
|
||||
Stdin,
|
||||
Quit,
|
||||
}
|
||||
|
||||
struct Worker {
|
||||
args: Arc<Args>,
|
||||
out: Arc<Mutex<Out<io::Stdout>>>,
|
||||
chan_work: Stealer<Message<PathBuf>>,
|
||||
chan_work: Stealer<Work>,
|
||||
inpbuf: InputBuffer,
|
||||
outbuf: Option<Vec<u8>>,
|
||||
grep: Grep,
|
||||
}
|
||||
|
||||
impl Worker {
|
||||
fn run(mut self) {
|
||||
fn run(mut self) -> u64 {
|
||||
let mut match_count = 0;
|
||||
loop {
|
||||
let path = match self.chan_work.steal() {
|
||||
let (path, file) = match self.chan_work.steal() {
|
||||
Steal::Empty | Steal::Abort => continue,
|
||||
Steal::Data(Message::Quit) => break,
|
||||
Steal::Data(Message::Some(path)) => path,
|
||||
};
|
||||
let file = match File::open(&path) {
|
||||
Ok(file) => file,
|
||||
Err(err) => {
|
||||
eprintln!("{}: {}", path.display(), err);
|
||||
continue;
|
||||
Steal::Data(Work::Quit) => break,
|
||||
Steal::Data(Work::File(path)) => {
|
||||
match File::open(&path) {
|
||||
Ok(file) => (path, Some(file)),
|
||||
Err(err) => {
|
||||
eprintln!("{}: {}", path.display(), err);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
Steal::Data(Work::Stdin) => {
|
||||
(Path::new("<stdin>").to_path_buf(), None)
|
||||
}
|
||||
};
|
||||
let mut outbuf = self.outbuf.take().unwrap();
|
||||
outbuf.clear();
|
||||
let mut printer = self.args.printer(outbuf);
|
||||
{
|
||||
let mut searcher = Searcher::new(
|
||||
&mut self.inpbuf,
|
||||
&mut printer,
|
||||
&self.grep,
|
||||
&path,
|
||||
file,
|
||||
);
|
||||
searcher = searcher.count(self.args.flag_count);
|
||||
searcher = searcher.line_number(self.args.flag_line_number);
|
||||
searcher = searcher.invert_match(self.args.flag_invert_match);
|
||||
searcher = searcher.after_context(self.args.after_context());
|
||||
searcher = searcher.before_context(self.args.before_context());
|
||||
if let Err(err) = searcher.run() {
|
||||
eprintln!("{}", err);
|
||||
let result = match file {
|
||||
None => {
|
||||
let stdin = io::stdin();
|
||||
let stdin = stdin.lock();
|
||||
self.search(&mut printer, &path, stdin)
|
||||
}
|
||||
Some(file) => {
|
||||
self.search(&mut printer, &path, file)
|
||||
}
|
||||
};
|
||||
match result {
|
||||
Ok(count) => {
|
||||
match_count += count;
|
||||
}
|
||||
Err(err) => {
|
||||
eprintln!("{}", err);
|
||||
}
|
||||
}
|
||||
}
|
||||
let outbuf = printer.into_inner();
|
||||
if !outbuf.is_empty() {
|
||||
let mut out = self.out.lock();
|
||||
out.write_file_matches(&outbuf);
|
||||
out.write(&outbuf);
|
||||
}
|
||||
self.outbuf = Some(outbuf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct Out<W: io::Write> {
|
||||
args: Arc<Args>,
|
||||
wtr: io::BufWriter<W>,
|
||||
printed: bool,
|
||||
}
|
||||
|
||||
impl<W: io::Write> Out<W> {
|
||||
fn new(args: Arc<Args>, wtr: W) -> Out<W> {
|
||||
Out {
|
||||
args: args,
|
||||
wtr: io::BufWriter::new(wtr),
|
||||
printed: false,
|
||||
}
|
||||
match_count
|
||||
}
|
||||
|
||||
fn write_file_matches(&mut self, buf: &[u8]) {
|
||||
if self.printed && self.args.has_context() {
|
||||
let _ = self.wtr.write_all(b"--\n");
|
||||
}
|
||||
let _ = self.wtr.write_all(buf);
|
||||
let _ = self.wtr.flush();
|
||||
self.printed = true;
|
||||
fn search<R: io::Read, W: io::Write>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
path: &Path,
|
||||
rdr: R,
|
||||
) -> Result<u64> {
|
||||
self.args.searcher(
|
||||
&mut self.inpbuf,
|
||||
printer,
|
||||
&self.grep,
|
||||
path,
|
||||
rdr,
|
||||
).run().map_err(From::from)
|
||||
}
|
||||
}
|
||||
|
45
src/out.rs
Normal file
45
src/out.rs
Normal file
@ -0,0 +1,45 @@
|
||||
use std::io::{self, Write};
|
||||
|
||||
/// Out controls the actual output of all search results for a particular file
|
||||
/// to the end user.
|
||||
///
|
||||
/// (The difference between Out and Printer is that a Printer works with
|
||||
/// individual search results where as Out works with search results for each
|
||||
/// file as a whole. For example, it knows when to print a file separator.)
|
||||
pub struct Out<W: io::Write> {
|
||||
wtr: io::BufWriter<W>,
|
||||
printed: bool,
|
||||
file_separator: Vec<u8>,
|
||||
}
|
||||
|
||||
impl<W: io::Write> Out<W> {
|
||||
/// Create a new Out that writes to the wtr given.
|
||||
pub fn new(wtr: W) -> Out<W> {
|
||||
Out {
|
||||
wtr: io::BufWriter::new(wtr),
|
||||
printed: false,
|
||||
file_separator: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// If set, the separator is printed between matches from different files.
|
||||
/// By default, no separator is printed.
|
||||
///
|
||||
/// If sep is empty, then no file separator is printed.
|
||||
pub fn file_separator(mut self, sep: Vec<u8>) -> Out<W> {
|
||||
self.file_separator = sep;
|
||||
self
|
||||
}
|
||||
|
||||
/// Write the search results of a single file to the underlying wtr and
|
||||
/// flush wtr.
|
||||
pub fn write(&mut self, buf: &[u8]) {
|
||||
if self.printed && !self.file_separator.is_empty() {
|
||||
let _ = self.wtr.write_all(&self.file_separator);
|
||||
let _ = self.wtr.write_all(b"\n");
|
||||
}
|
||||
let _ = self.wtr.write_all(buf);
|
||||
let _ = self.wtr.flush();
|
||||
self.printed = true;
|
||||
}
|
||||
}
|
140
src/printer.rs
140
src/printer.rs
@ -1,53 +1,121 @@
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
|
||||
macro_rules! wln {
|
||||
($($tt:tt)*) => {
|
||||
let _ = writeln!($($tt)*);
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! w {
|
||||
($($tt:tt)*) => {
|
||||
let _ = write!($($tt)*);
|
||||
}
|
||||
}
|
||||
use types::FileTypeDef;
|
||||
|
||||
/// Printer encapsulates all output logic for searching.
|
||||
///
|
||||
/// Note that we currently ignore all write errors. It's probably worthwhile
|
||||
/// to fix this, but printers are only ever used for writes to stdout or
|
||||
/// writes to memory, neither of which commonly fail.
|
||||
pub struct Printer<W> {
|
||||
/// The underlying writer.
|
||||
wtr: W,
|
||||
/// Whether anything has been printed to wtr yet.
|
||||
has_printed: bool,
|
||||
/// The string to use to separate non-contiguous runs of context lines.
|
||||
context_separator: Vec<u8>,
|
||||
/// The end-of-line terminator used by the printer. In general, eols are
|
||||
/// printed via the match directly, but occasionally we need to insert them
|
||||
/// ourselves (for example, to print a context separator).
|
||||
eol: u8,
|
||||
/// Whether to suppress all output.
|
||||
quiet: bool,
|
||||
/// Whether to prefix each match with the corresponding file name.
|
||||
with_filename: bool,
|
||||
}
|
||||
|
||||
impl<W: io::Write> Printer<W> {
|
||||
/// Create a new printer that writes to wtr.
|
||||
pub fn new(wtr: W) -> Printer<W> {
|
||||
Printer {
|
||||
wtr: wtr,
|
||||
has_printed: false,
|
||||
context_separator: "--".to_string().into_bytes(),
|
||||
eol: b'\n',
|
||||
quiet: false,
|
||||
with_filename: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the context separator. The default is `--`.
|
||||
pub fn context_separator(mut self, sep: Vec<u8>) -> Printer<W> {
|
||||
self.context_separator = sep;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the end-of-line terminator. The default is `\n`.
|
||||
pub fn eol(mut self, eol: u8) -> Printer<W> {
|
||||
self.eol = eol;
|
||||
self
|
||||
}
|
||||
|
||||
/// When set, all output is suppressed.
|
||||
pub fn quiet(mut self, yes: bool) -> Printer<W> {
|
||||
self.quiet = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// When set, each match is prefixed with the file name that it came from.
|
||||
pub fn with_filename(mut self, yes: bool) -> Printer<W> {
|
||||
self.with_filename = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Returns true if and only if something has been printed.
|
||||
pub fn has_printed(&self) -> bool {
|
||||
self.has_printed
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> W {
|
||||
/// Flushes the underlying writer and returns it.
|
||||
pub fn into_inner(mut self) -> W {
|
||||
let _ = self.wtr.flush();
|
||||
self.wtr
|
||||
}
|
||||
|
||||
/// Prints a type definition.
|
||||
pub fn type_def(&mut self, def: &FileTypeDef) {
|
||||
self.write(def.name().as_bytes());
|
||||
self.write(b": ");
|
||||
let mut first = true;
|
||||
for pat in def.patterns() {
|
||||
if !first {
|
||||
self.write(b", ");
|
||||
}
|
||||
self.write(pat.as_bytes());
|
||||
first = false;
|
||||
}
|
||||
self.write_eol();
|
||||
}
|
||||
|
||||
/// Prints the given path.
|
||||
pub fn path<P: AsRef<Path>>(&mut self, path: P) {
|
||||
wln!(&mut self.wtr, "{}", path.as_ref().display());
|
||||
self.write(path.as_ref().to_string_lossy().as_bytes());
|
||||
self.write_eol();
|
||||
}
|
||||
|
||||
/// Prints the given path and a count of the number of matches found.
|
||||
pub fn path_count<P: AsRef<Path>>(&mut self, path: P, count: u64) {
|
||||
wln!(&mut self.wtr, "{}:{}", path.as_ref().display(), count);
|
||||
if self.with_filename {
|
||||
self.write(path.as_ref().to_string_lossy().as_bytes());
|
||||
self.write(b":");
|
||||
}
|
||||
self.write(count.to_string().as_bytes());
|
||||
self.write_eol();
|
||||
}
|
||||
|
||||
pub fn count(&mut self, count: u64) {
|
||||
wln!(&mut self.wtr, "{}", count);
|
||||
}
|
||||
|
||||
pub fn context_separator(&mut self) {
|
||||
wln!(&mut self.wtr, "--");
|
||||
/// Prints the context separator.
|
||||
pub fn context_separate(&mut self) {
|
||||
// N.B. We can't use `write` here because of borrowing restrictions.
|
||||
if self.quiet {
|
||||
return;
|
||||
}
|
||||
if self.context_separator.is_empty() {
|
||||
return;
|
||||
}
|
||||
self.has_printed = true;
|
||||
let _ = self.wtr.write_all(&self.context_separator);
|
||||
let _ = self.wtr.write_all(&[self.eol]);
|
||||
}
|
||||
|
||||
pub fn matched<P: AsRef<Path>>(
|
||||
@ -58,15 +126,17 @@ impl<W: io::Write> Printer<W> {
|
||||
end: usize,
|
||||
line_number: Option<u64>,
|
||||
) {
|
||||
self.write(path.as_ref().to_string_lossy().as_bytes());
|
||||
self.write(b":");
|
||||
if self.with_filename {
|
||||
self.write(path.as_ref().to_string_lossy().as_bytes());
|
||||
self.write(b":");
|
||||
}
|
||||
if let Some(line_number) = line_number {
|
||||
self.write(line_number.to_string().as_bytes());
|
||||
self.write(b":");
|
||||
}
|
||||
self.write(&buf[start..end]);
|
||||
if buf[start..end].last() != Some(&b'\n') {
|
||||
self.write(b"\n");
|
||||
if buf[start..end].last() != Some(&self.eol) {
|
||||
self.write_eol();
|
||||
}
|
||||
}
|
||||
|
||||
@ -78,24 +148,30 @@ impl<W: io::Write> Printer<W> {
|
||||
end: usize,
|
||||
line_number: Option<u64>,
|
||||
) {
|
||||
self.write(path.as_ref().to_string_lossy().as_bytes());
|
||||
self.write(b"-");
|
||||
if self.with_filename {
|
||||
self.write(path.as_ref().to_string_lossy().as_bytes());
|
||||
self.write(b"-");
|
||||
}
|
||||
if let Some(line_number) = line_number {
|
||||
self.write(line_number.to_string().as_bytes());
|
||||
self.write(b"-");
|
||||
}
|
||||
self.write(&buf[start..end]);
|
||||
if buf[start..end].last() != Some(&b'\n') {
|
||||
self.write(b"\n");
|
||||
if buf[start..end].last() != Some(&self.eol) {
|
||||
self.write_eol();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn binary_matched<P: AsRef<Path>>(&mut self, path: P) {
|
||||
wln!(&mut self.wtr, "Binary file {} matches", path.as_ref().display());
|
||||
}
|
||||
|
||||
fn write(&mut self, buf: &[u8]) {
|
||||
if self.quiet {
|
||||
return;
|
||||
}
|
||||
self.has_printed = true;
|
||||
let _ = self.wtr.write_all(buf);
|
||||
}
|
||||
|
||||
fn write_eol(&mut self) {
|
||||
let eol = self.eol;
|
||||
self.write(&[eol]);
|
||||
}
|
||||
}
|
||||
|
465
src/search.rs
465
src/search.rs
@ -20,6 +20,7 @@ const READ_SIZE: usize = 8 * (1<<10);
|
||||
/// Error describes errors that can occur while searching.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// A standard I/O error attached to a particular file path.
|
||||
Io {
|
||||
err: io::Error,
|
||||
path: PathBuf,
|
||||
@ -57,6 +58,7 @@ impl fmt::Display for Error {
|
||||
}
|
||||
|
||||
pub struct Searcher<'a, R, W: 'a> {
|
||||
opts: Options,
|
||||
inp: &'a mut InputBuffer,
|
||||
printer: &'a mut Printer<W>,
|
||||
grep: &'a Grep,
|
||||
@ -68,11 +70,32 @@ pub struct Searcher<'a, R, W: 'a> {
|
||||
last_printed: usize,
|
||||
last_line: usize,
|
||||
after_context_remaining: usize,
|
||||
}
|
||||
|
||||
/// Options for configuring search.
|
||||
#[derive(Clone)]
|
||||
struct Options {
|
||||
after_context: usize,
|
||||
before_context: usize,
|
||||
count: bool,
|
||||
eol: u8,
|
||||
invert_match: bool,
|
||||
line_number: bool,
|
||||
before_context: usize,
|
||||
after_context: usize,
|
||||
text: bool,
|
||||
}
|
||||
|
||||
impl Default for Options {
|
||||
fn default() -> Options {
|
||||
Options {
|
||||
after_context: 0,
|
||||
before_context: 0,
|
||||
count: false,
|
||||
eol: b'\n',
|
||||
invert_match: false,
|
||||
line_number: false,
|
||||
text: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
|
||||
@ -96,6 +119,7 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
|
||||
haystack: R,
|
||||
) -> Searcher<'a, R, W> {
|
||||
Searcher {
|
||||
opts: Options::default(),
|
||||
inp: inp,
|
||||
printer: printer,
|
||||
grep: grep,
|
||||
@ -107,47 +131,54 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
|
||||
last_printed: 0,
|
||||
last_line: 0,
|
||||
after_context_remaining: 0,
|
||||
count: false,
|
||||
invert_match: false,
|
||||
line_number: false,
|
||||
before_context: 0,
|
||||
after_context: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// If enabled, searching will print a count instead of each match.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn count(mut self, yes: bool) -> Self {
|
||||
self.count = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, matching is inverted so that lines that *don't* match the
|
||||
/// given pattern are treated as matches.
|
||||
pub fn invert_match(mut self, yes: bool) -> Self {
|
||||
self.invert_match = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, compute line numbers and prefix each line of output with
|
||||
/// them.
|
||||
pub fn line_number(mut self, yes: bool) -> Self {
|
||||
self.line_number = yes;
|
||||
/// The number of contextual lines to show after each match. The default
|
||||
/// is zero.
|
||||
pub fn after_context(mut self, count: usize) -> Self {
|
||||
self.opts.after_context = count;
|
||||
self
|
||||
}
|
||||
|
||||
/// The number of contextual lines to show before each match. The default
|
||||
/// is zero.
|
||||
pub fn before_context(mut self, count: usize) -> Self {
|
||||
self.before_context = count;
|
||||
self.opts.before_context = count;
|
||||
self
|
||||
}
|
||||
|
||||
/// The number of contextual lines to show after each match. The default
|
||||
/// is zero.
|
||||
pub fn after_context(mut self, count: usize) -> Self {
|
||||
self.after_context = count;
|
||||
/// If enabled, searching will print a count instead of each match.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn count(mut self, yes: bool) -> Self {
|
||||
self.opts.count = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the end-of-line byte used by this searcher.
|
||||
pub fn eol(mut self, eol: u8) -> Self {
|
||||
self.opts.eol = eol;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, matching is inverted so that lines that *don't* match the
|
||||
/// given pattern are treated as matches.
|
||||
pub fn invert_match(mut self, yes: bool) -> Self {
|
||||
self.opts.invert_match = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, compute line numbers and prefix each line of output with
|
||||
/// them.
|
||||
pub fn line_number(mut self, yes: bool) -> Self {
|
||||
self.opts.line_number = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, search binary files as if they were text.
|
||||
pub fn text(mut self, yes: bool) -> Self {
|
||||
self.opts.text = yes;
|
||||
self
|
||||
}
|
||||
|
||||
@ -157,16 +188,16 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
|
||||
pub fn run(mut self) -> Result<u64, Error> {
|
||||
self.inp.reset();
|
||||
self.match_count = 0;
|
||||
self.line_count = if self.line_number { Some(0) } else { None };
|
||||
self.line_count = if self.opts.line_number { Some(0) } else { None };
|
||||
self.last_match = Match::default();
|
||||
self.after_context_remaining = 0;
|
||||
loop {
|
||||
let upto = self.inp.lastnl;
|
||||
self.print_after_context(upto);
|
||||
if !try!(self.fill()) {
|
||||
if self.inp.is_binary {
|
||||
self.printer.binary_matched(self.path);
|
||||
}
|
||||
break;
|
||||
}
|
||||
if !self.opts.text && self.inp.is_binary {
|
||||
break;
|
||||
}
|
||||
while self.inp.pos < self.inp.lastnl {
|
||||
@ -174,7 +205,7 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
|
||||
&mut self.last_match,
|
||||
&mut self.inp.buf[..self.inp.lastnl],
|
||||
self.inp.pos);
|
||||
if self.invert_match {
|
||||
if self.opts.invert_match {
|
||||
let upto =
|
||||
if matched {
|
||||
self.last_match.start()
|
||||
@ -189,7 +220,7 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
|
||||
}
|
||||
} else if matched {
|
||||
self.match_count += 1;
|
||||
if !self.count {
|
||||
if !self.opts.count {
|
||||
let start = self.last_match.start();
|
||||
let end = self.last_match.end();
|
||||
self.print_after_context(start);
|
||||
@ -204,32 +235,36 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
|
||||
}
|
||||
}
|
||||
}
|
||||
if self.count && self.match_count > 0 {
|
||||
if self.opts.count && self.match_count > 0 {
|
||||
self.printer.path_count(self.path, self.match_count);
|
||||
}
|
||||
Ok(self.match_count)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn fill(&mut self) -> Result<bool, Error> {
|
||||
let mut keep_from = self.inp.lastnl;
|
||||
if self.before_context > 0 || self.after_context > 0 {
|
||||
keep_from = start_of_previous_lines(
|
||||
let mut keep = self.inp.lastnl;
|
||||
if self.opts.before_context > 0 || self.opts.after_context > 0 {
|
||||
let lines = 1 + cmp::max(
|
||||
self.opts.before_context, self.opts.after_context);
|
||||
keep = start_of_previous_lines(
|
||||
self.opts.eol,
|
||||
&self.inp.buf,
|
||||
self.inp.lastnl.saturating_sub(1),
|
||||
cmp::max(self.before_context, self.after_context) + 1);
|
||||
lines);
|
||||
}
|
||||
if keep_from < self.last_printed {
|
||||
self.last_printed = self.last_printed - keep_from;
|
||||
if keep < self.last_printed {
|
||||
self.last_printed = self.last_printed - keep;
|
||||
} else {
|
||||
self.last_printed = 0;
|
||||
}
|
||||
if keep_from <= self.last_line {
|
||||
self.last_line = self.last_line - keep_from;
|
||||
if keep <= self.last_line {
|
||||
self.last_line = self.last_line - keep;
|
||||
} else {
|
||||
self.count_lines(keep_from);
|
||||
self.count_lines(keep);
|
||||
self.last_line = 0;
|
||||
}
|
||||
let ok = try!(self.inp.fill(&mut self.haystack, keep_from).map_err(|err| {
|
||||
let ok = try!(self.inp.fill(&mut self.haystack, keep).map_err(|err| {
|
||||
Error::from_io(err, &self.path)
|
||||
}));
|
||||
Ok(ok)
|
||||
@ -237,10 +272,10 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
|
||||
|
||||
#[inline(always)]
|
||||
fn print_inverted_matches(&mut self, upto: usize) {
|
||||
debug_assert!(self.invert_match);
|
||||
let mut it = IterLines::new(self.inp.pos);
|
||||
debug_assert!(self.opts.invert_match);
|
||||
let mut it = IterLines::new(self.opts.eol, self.inp.pos);
|
||||
while let Some((start, end)) = it.next(&self.inp.buf[..upto]) {
|
||||
if !self.count {
|
||||
if !self.opts.count {
|
||||
self.print_match(start, end);
|
||||
}
|
||||
self.inp.pos = end;
|
||||
@ -250,7 +285,7 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
|
||||
|
||||
#[inline(always)]
|
||||
fn print_before_context(&mut self, upto: usize) {
|
||||
if self.count || self.before_context == 0 {
|
||||
if self.opts.count || self.opts.before_context == 0 {
|
||||
return;
|
||||
}
|
||||
let start = self.last_printed;
|
||||
@ -260,10 +295,11 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
|
||||
}
|
||||
let before_context_start =
|
||||
start + start_of_previous_lines(
|
||||
self.opts.eol,
|
||||
&self.inp.buf[start..],
|
||||
end - start - 1,
|
||||
self.before_context);
|
||||
let mut it = IterLines::new(before_context_start);
|
||||
self.opts.before_context);
|
||||
let mut it = IterLines::new(self.opts.eol, before_context_start);
|
||||
while let Some((s, e)) = it.next(&self.inp.buf[..end]) {
|
||||
self.print_separator(s);
|
||||
self.print_context(s, e);
|
||||
@ -272,12 +308,12 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
|
||||
|
||||
#[inline(always)]
|
||||
fn print_after_context(&mut self, upto: usize) {
|
||||
if self.count || self.after_context_remaining == 0 {
|
||||
if self.opts.count || self.after_context_remaining == 0 {
|
||||
return;
|
||||
}
|
||||
let start = self.last_printed;
|
||||
let end = upto;
|
||||
let mut it = IterLines::new(start);
|
||||
let mut it = IterLines::new(self.opts.eol, start);
|
||||
while let Some((s, e)) = it.next(&self.inp.buf[..end]) {
|
||||
self.print_context(s, e);
|
||||
self.after_context_remaining -= 1;
|
||||
@ -295,7 +331,7 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
|
||||
self.printer.matched(
|
||||
&self.path, &self.inp.buf, start, end, self.line_count);
|
||||
self.last_printed = end;
|
||||
self.after_context_remaining = self.after_context;
|
||||
self.after_context_remaining = self.opts.after_context;
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
@ -309,21 +345,23 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
|
||||
|
||||
#[inline(always)]
|
||||
fn print_separator(&mut self, before: usize) {
|
||||
if self.before_context == 0 && self.after_context == 0 {
|
||||
if self.opts.before_context == 0 && self.opts.after_context == 0 {
|
||||
return;
|
||||
}
|
||||
if !self.printer.has_printed() {
|
||||
return;
|
||||
}
|
||||
if (self.last_printed == 0 && before > 0) || self.last_printed < before {
|
||||
self.printer.context_separator();
|
||||
if (self.last_printed == 0 && before > 0)
|
||||
|| self.last_printed < before {
|
||||
self.printer.context_separate();
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn count_lines(&mut self, upto: usize) {
|
||||
if let Some(ref mut line_count) = self.line_count {
|
||||
*line_count += count_lines(&self.inp.buf[self.last_line..upto]);
|
||||
*line_count += count_lines(
|
||||
&self.inp.buf[self.last_line..upto], self.opts.eol);
|
||||
self.last_line = upto;
|
||||
}
|
||||
}
|
||||
@ -337,15 +375,53 @@ impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> {
|
||||
}
|
||||
}
|
||||
|
||||
/// InputBuffer encapsulates the logic of maintaining a ~fixed sized buffer
|
||||
/// on which to search. There are three key pieces of complexity:
|
||||
///
|
||||
/// 1. We must be able to handle lines that are longer than the size of the
|
||||
/// buffer. For this reason, the buffer is allowed to expand (and is
|
||||
/// therefore not technically fixed). Note that once a buffer expands, it
|
||||
/// will never contract.
|
||||
/// 2. The contents of the buffer may end with a partial line, so we must keep
|
||||
/// track of where the last complete line ends. Namely, the partial line
|
||||
/// is only completed on subsequent reads *after* searching up through
|
||||
/// the last complete line is done.
|
||||
/// 3. When printing the context of a match, the last N lines of the buffer
|
||||
/// may need to be rolled over into the next buffer. For example, a match
|
||||
/// may occur at the beginning of a buffer, in which case, lines at the end
|
||||
/// of the previous contents of the buffer need to be printed.
|
||||
///
|
||||
/// An InputBuffer is designed to be reused and isn't tied to any particular
|
||||
/// reader.
|
||||
pub struct InputBuffer {
|
||||
/// The number of bytes to attempt to read at a time. Once set, this is
|
||||
/// never changed.
|
||||
read_size: usize,
|
||||
/// The end-of-line terminator used in this buffer.
|
||||
eol: u8,
|
||||
/// A scratch buffer.
|
||||
tmp: Vec<u8>,
|
||||
/// A buffer to read bytes into. All searches are executed directly against
|
||||
/// this buffer and pos/lastnl/end point into it.
|
||||
buf: Vec<u8>,
|
||||
tmp1: Vec<u8>,
|
||||
tmp2: Vec<u8>,
|
||||
/// The current position in buf. The current position represents where the
|
||||
/// next search should start.
|
||||
pos: usize,
|
||||
/// The position immediately following the last line terminator in buf.
|
||||
/// This may be equal to end.
|
||||
///
|
||||
/// Searching should never cross this boundary. In particular, the contents
|
||||
/// of the buffer following this position may correspond to *partial* line.
|
||||
/// All contents before this position are complete lines.
|
||||
lastnl: usize,
|
||||
/// The end position of the buffer. Data after this position is not
|
||||
/// specified.
|
||||
end: usize,
|
||||
/// Set to true if and only if no reads have occurred yet.
|
||||
first: bool,
|
||||
/// Set to true if and only if the contents of buf are determined to be
|
||||
/// "binary" (i.e., not searchable text). Note that its value may be
|
||||
/// falsely negative *or* falsely positive. It is only a heuristic.
|
||||
is_binary: bool,
|
||||
}
|
||||
|
||||
@ -367,9 +443,9 @@ impl InputBuffer {
|
||||
}
|
||||
InputBuffer {
|
||||
read_size: cap,
|
||||
eol: b'\n',
|
||||
buf: vec![0; cap],
|
||||
tmp1: vec![],
|
||||
tmp2: vec![],
|
||||
tmp: vec![],
|
||||
pos: 0,
|
||||
lastnl: 0,
|
||||
end: 0,
|
||||
@ -378,6 +454,12 @@ impl InputBuffer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the end-of-line terminator used by this input buffer.
|
||||
pub fn eol(&mut self, eol: u8) {
|
||||
self.eol = eol;
|
||||
}
|
||||
|
||||
/// Resets this buffer so that it may be reused with a new reader.
|
||||
fn reset(&mut self) {
|
||||
self.pos = 0;
|
||||
self.lastnl = 0;
|
||||
@ -386,36 +468,30 @@ impl InputBuffer {
|
||||
self.is_binary = false;
|
||||
}
|
||||
|
||||
/// Fill the contents of this buffer with the reader given. The reader
|
||||
/// given should be the same in every call to fill unless reset has been
|
||||
/// called.
|
||||
///
|
||||
/// The bytes in buf[keep_from..end] are rolled over into the beginning
|
||||
/// of the buffer.
|
||||
fn fill<R: io::Read>(
|
||||
&mut self,
|
||||
rdr: &mut R,
|
||||
keep_from: usize,
|
||||
) -> Result<bool, io::Error> {
|
||||
self.pos = 0;
|
||||
self.tmp1.clear();
|
||||
self.tmp2.clear();
|
||||
|
||||
// Save the leftovers from the previous fill before anything else.
|
||||
if self.lastnl < self.end {
|
||||
self.tmp1.extend_from_slice(&self.buf[self.lastnl..self.end]);
|
||||
}
|
||||
// If we need to save lines to account for context, do that here.
|
||||
// These context lines have already been searched, but make up the
|
||||
// first bytes of this buffer.
|
||||
if keep_from < self.lastnl {
|
||||
self.tmp2.extend_from_slice(&self.buf[keep_from..self.lastnl]);
|
||||
self.buf[0..self.tmp2.len()].copy_from_slice(&self.tmp2);
|
||||
self.pos = self.tmp2.len();
|
||||
}
|
||||
if !self.tmp1.is_empty() {
|
||||
let (start, end) = (self.pos, self.pos + self.tmp1.len());
|
||||
self.buf[start..end].copy_from_slice(&self.tmp1);
|
||||
self.end = end;
|
||||
} else {
|
||||
self.end = self.pos;
|
||||
}
|
||||
// Rollover bytes from buf[keep_from..end] and update our various
|
||||
// pointers. N.B. This could be done with the unsafe ptr::copy, but
|
||||
// I haven't been able to produce a benchmark that notices a difference
|
||||
// in performance. (Invariably, ptr::copy is also clearer IMO.)
|
||||
self.tmp.clear();
|
||||
self.tmp.extend_from_slice(&self.buf[keep_from..self.end]);
|
||||
self.buf[0..self.tmp.len()].copy_from_slice(&self.tmp);
|
||||
self.pos = self.lastnl - keep_from;
|
||||
self.lastnl = 0;
|
||||
self.end = self.tmp.len();
|
||||
while self.lastnl == 0 {
|
||||
// If our buffer isn't big enough to hold the contents of a full
|
||||
// read, expand it.
|
||||
if self.buf.len() - self.end < self.read_size {
|
||||
let min_len = self.read_size + self.buf.len() - self.end;
|
||||
let new_len = cmp::max(min_len, self.buf.len() * 2);
|
||||
@ -423,22 +499,28 @@ impl InputBuffer {
|
||||
}
|
||||
let n = try!(rdr.read(
|
||||
&mut self.buf[self.end..self.end + self.read_size]));
|
||||
if self.first {
|
||||
if is_binary(&self.buf[self.end..self.end + n]) {
|
||||
self.is_binary = true;
|
||||
return Ok(false);
|
||||
}
|
||||
if self.first && is_binary(&self.buf[self.end..self.end + n]) {
|
||||
self.is_binary = true;
|
||||
}
|
||||
if self.is_binary {
|
||||
replace_buf(
|
||||
&mut self.buf[self.end..self.end + n], b'\x00', self.eol);
|
||||
}
|
||||
self.first = false;
|
||||
// We assume that reading 0 bytes means we've hit EOF.
|
||||
if n == 0 {
|
||||
// If we've searched everything up to the end of the buffer,
|
||||
// then there's nothing left to do.
|
||||
if self.end - self.pos == 0 {
|
||||
return Ok(false);
|
||||
}
|
||||
// Even if we hit EOF, we might still have to search the
|
||||
// last line if it didn't contain a trailing terminator.
|
||||
self.lastnl = self.end;
|
||||
break;
|
||||
}
|
||||
self.lastnl =
|
||||
memrchr(b'\n', &self.buf[self.end..self.end + n])
|
||||
memrchr(self.eol, &self.buf[self.end..self.end + n])
|
||||
.map(|i| self.end + i + 1)
|
||||
.unwrap_or(0);
|
||||
self.end += n;
|
||||
@ -450,7 +532,7 @@ impl InputBuffer {
|
||||
/// Returns true if and only if the given buffer is determined to be "binary"
|
||||
/// or otherwise not contain text data that is usefully searchable.
|
||||
///
|
||||
/// Note that this may return both false positives and false negatives!
|
||||
/// Note that this may return both false positives and false negatives.
|
||||
#[inline(always)]
|
||||
fn is_binary(buf: &[u8]) -> bool {
|
||||
if buf.len() >= 4 && &buf[0..4] == b"%PDF" {
|
||||
@ -461,15 +543,31 @@ fn is_binary(buf: &[u8]) -> bool {
|
||||
|
||||
/// Count the number of lines in the given buffer.
|
||||
#[inline(always)]
|
||||
fn count_lines(mut buf: &[u8]) -> u64 {
|
||||
fn count_lines(mut buf: &[u8], eol: u8) -> u64 {
|
||||
let mut count = 0;
|
||||
while let Some(pos) = memchr(b'\n', buf) {
|
||||
while let Some(pos) = memchr(eol, buf) {
|
||||
count += 1;
|
||||
buf = &buf[pos + 1..];
|
||||
}
|
||||
count
|
||||
}
|
||||
|
||||
/// Replaces a with b in buf.
|
||||
fn replace_buf(buf: &mut [u8], a: u8, b: u8) {
|
||||
if a == b {
|
||||
return;
|
||||
}
|
||||
let mut pos = 0;
|
||||
while let Some(i) = memchr(a, &buf[pos..]).map(|i| pos + i) {
|
||||
buf[i] = b;
|
||||
pos = i + 1;
|
||||
while buf.get(pos) == Some(&a) {
|
||||
buf[pos] = b;
|
||||
pos += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An "iterator" over lines in a particular buffer.
|
||||
///
|
||||
/// Idiomatic Rust would borrow the buffer and use it as internal state to
|
||||
@ -477,6 +575,7 @@ fn count_lines(mut buf: &[u8]) -> u64 {
|
||||
/// the borrow in the search code. (Because the borrow prevents composition
|
||||
/// through other mutable methods.)
|
||||
struct IterLines {
|
||||
eol: u8,
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
@ -485,8 +584,9 @@ impl IterLines {
|
||||
///
|
||||
/// The buffer is passed to the `next` method.
|
||||
#[inline(always)]
|
||||
fn new(start: usize) -> IterLines {
|
||||
fn new(eol: u8, start: usize) -> IterLines {
|
||||
IterLines {
|
||||
eol: eol,
|
||||
pos: start,
|
||||
}
|
||||
}
|
||||
@ -497,7 +597,7 @@ impl IterLines {
|
||||
/// The range returned includes the new line.
|
||||
#[inline(always)]
|
||||
fn next(&mut self, buf: &[u8]) -> Option<(usize, usize)> {
|
||||
match memchr(b'\n', &buf[self.pos..]) {
|
||||
match memchr(self.eol, &buf[self.pos..]) {
|
||||
None => {
|
||||
if self.pos < buf.len() {
|
||||
let start = self.pos;
|
||||
@ -528,10 +628,13 @@ impl IterLines {
|
||||
/// The position returned corresponds to the first byte in the given line.
|
||||
#[inline(always)]
|
||||
fn start_of_previous_lines(
|
||||
eol: u8,
|
||||
buf: &[u8],
|
||||
mut end: usize,
|
||||
mut count: usize,
|
||||
) -> usize {
|
||||
// TODO(burntsushi): This function needs to be badly simplified. The case
|
||||
// analysis is impossible to follow.
|
||||
if buf[..end].is_empty() {
|
||||
return 0;
|
||||
}
|
||||
@ -541,14 +644,14 @@ fn start_of_previous_lines(
|
||||
if end == buf.len() {
|
||||
end -= 1;
|
||||
}
|
||||
if buf[end] == b'\n' {
|
||||
if buf[end] == eol {
|
||||
if end == 0 {
|
||||
return end + 1;
|
||||
}
|
||||
end -= 1;
|
||||
}
|
||||
while count > 0 {
|
||||
if buf[end] == b'\n' {
|
||||
if buf[end] == eol {
|
||||
count -= 1;
|
||||
if count == 0 {
|
||||
return end + 1;
|
||||
@ -559,7 +662,7 @@ fn start_of_previous_lines(
|
||||
end -= 1;
|
||||
continue;
|
||||
}
|
||||
match memrchr(b'\n', &buf[..end]) {
|
||||
match memrchr(eol, &buf[..end]) {
|
||||
None => {
|
||||
return 0;
|
||||
}
|
||||
@ -567,7 +670,7 @@ fn start_of_previous_lines(
|
||||
count -= 1;
|
||||
end = i;
|
||||
if end == 0 {
|
||||
if buf[end] == b'\n' && count == 0 {
|
||||
if buf[end] == eol && count == 0 {
|
||||
end += 1;
|
||||
}
|
||||
return end;
|
||||
@ -579,10 +682,6 @@ fn start_of_previous_lines(
|
||||
end + 2
|
||||
}
|
||||
|
||||
fn show(bytes: &[u8]) -> &str {
|
||||
::std::str::from_utf8(bytes).unwrap()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io;
|
||||
@ -668,102 +767,105 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn previous_lines() {
|
||||
let eol = b'\n';
|
||||
let text = SHERLOCK.as_bytes();
|
||||
assert_eq!(366, text.len());
|
||||
|
||||
assert_eq!(0, start_of_previous_lines(text, 366, 100));
|
||||
assert_eq!(366, start_of_previous_lines(text, 366, 0));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 366, 100));
|
||||
assert_eq!(366, start_of_previous_lines(eol, text, 366, 0));
|
||||
|
||||
assert_eq!(321, start_of_previous_lines(text, 366, 1));
|
||||
assert_eq!(321, start_of_previous_lines(text, 365, 1));
|
||||
assert_eq!(321, start_of_previous_lines(text, 364, 1));
|
||||
assert_eq!(321, start_of_previous_lines(text, 322, 1));
|
||||
assert_eq!(321, start_of_previous_lines(text, 321, 1));
|
||||
assert_eq!(258, start_of_previous_lines(text, 320, 1));
|
||||
assert_eq!(321, start_of_previous_lines(eol, text, 366, 1));
|
||||
assert_eq!(321, start_of_previous_lines(eol, text, 365, 1));
|
||||
assert_eq!(321, start_of_previous_lines(eol, text, 364, 1));
|
||||
assert_eq!(321, start_of_previous_lines(eol, text, 322, 1));
|
||||
assert_eq!(321, start_of_previous_lines(eol, text, 321, 1));
|
||||
assert_eq!(258, start_of_previous_lines(eol, text, 320, 1));
|
||||
|
||||
assert_eq!(258, start_of_previous_lines(text, 366, 2));
|
||||
assert_eq!(258, start_of_previous_lines(text, 365, 2));
|
||||
assert_eq!(258, start_of_previous_lines(text, 364, 2));
|
||||
assert_eq!(258, start_of_previous_lines(text, 322, 2));
|
||||
assert_eq!(258, start_of_previous_lines(text, 321, 2));
|
||||
assert_eq!(193, start_of_previous_lines(text, 320, 2));
|
||||
assert_eq!(258, start_of_previous_lines(eol, text, 366, 2));
|
||||
assert_eq!(258, start_of_previous_lines(eol, text, 365, 2));
|
||||
assert_eq!(258, start_of_previous_lines(eol, text, 364, 2));
|
||||
assert_eq!(258, start_of_previous_lines(eol, text, 322, 2));
|
||||
assert_eq!(258, start_of_previous_lines(eol, text, 321, 2));
|
||||
assert_eq!(193, start_of_previous_lines(eol, text, 320, 2));
|
||||
|
||||
assert_eq!(65, start_of_previous_lines(text, 66, 1));
|
||||
assert_eq!(0, start_of_previous_lines(text, 66, 2));
|
||||
assert_eq!(64, start_of_previous_lines(text, 64, 0));
|
||||
assert_eq!(0, start_of_previous_lines(text, 64, 1));
|
||||
assert_eq!(0, start_of_previous_lines(text, 64, 2));
|
||||
assert_eq!(65, start_of_previous_lines(eol, text, 66, 1));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 66, 2));
|
||||
assert_eq!(64, start_of_previous_lines(eol, text, 64, 0));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 64, 1));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 64, 2));
|
||||
|
||||
assert_eq!(0, start_of_previous_lines(text, 0, 2));
|
||||
assert_eq!(0, start_of_previous_lines(text, 0, 1));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 0, 2));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 0, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn previous_lines_short() {
|
||||
let eol = b'\n';
|
||||
let text = &b"a\nb\nc\nd\ne\nf\n"[..];
|
||||
assert_eq!(12, text.len());
|
||||
|
||||
assert_eq!(10, start_of_previous_lines(text, 12, 1));
|
||||
assert_eq!(8, start_of_previous_lines(text, 12, 2));
|
||||
assert_eq!(6, start_of_previous_lines(text, 12, 3));
|
||||
assert_eq!(4, start_of_previous_lines(text, 12, 4));
|
||||
assert_eq!(2, start_of_previous_lines(text, 12, 5));
|
||||
assert_eq!(0, start_of_previous_lines(text, 12, 6));
|
||||
assert_eq!(0, start_of_previous_lines(text, 12, 7));
|
||||
assert_eq!(10, start_of_previous_lines(text, 11, 1));
|
||||
assert_eq!(8, start_of_previous_lines(text, 11, 2));
|
||||
assert_eq!(6, start_of_previous_lines(text, 11, 3));
|
||||
assert_eq!(4, start_of_previous_lines(text, 11, 4));
|
||||
assert_eq!(2, start_of_previous_lines(text, 11, 5));
|
||||
assert_eq!(0, start_of_previous_lines(text, 11, 6));
|
||||
assert_eq!(0, start_of_previous_lines(text, 11, 7));
|
||||
assert_eq!(10, start_of_previous_lines(text, 10, 1));
|
||||
assert_eq!(8, start_of_previous_lines(text, 10, 2));
|
||||
assert_eq!(6, start_of_previous_lines(text, 10, 3));
|
||||
assert_eq!(4, start_of_previous_lines(text, 10, 4));
|
||||
assert_eq!(2, start_of_previous_lines(text, 10, 5));
|
||||
assert_eq!(0, start_of_previous_lines(text, 10, 6));
|
||||
assert_eq!(0, start_of_previous_lines(text, 10, 7));
|
||||
assert_eq!(10, start_of_previous_lines(eol, text, 12, 1));
|
||||
assert_eq!(8, start_of_previous_lines(eol, text, 12, 2));
|
||||
assert_eq!(6, start_of_previous_lines(eol, text, 12, 3));
|
||||
assert_eq!(4, start_of_previous_lines(eol, text, 12, 4));
|
||||
assert_eq!(2, start_of_previous_lines(eol, text, 12, 5));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 12, 6));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 12, 7));
|
||||
assert_eq!(10, start_of_previous_lines(eol, text, 11, 1));
|
||||
assert_eq!(8, start_of_previous_lines(eol, text, 11, 2));
|
||||
assert_eq!(6, start_of_previous_lines(eol, text, 11, 3));
|
||||
assert_eq!(4, start_of_previous_lines(eol, text, 11, 4));
|
||||
assert_eq!(2, start_of_previous_lines(eol, text, 11, 5));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 11, 6));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 11, 7));
|
||||
assert_eq!(10, start_of_previous_lines(eol, text, 10, 1));
|
||||
assert_eq!(8, start_of_previous_lines(eol, text, 10, 2));
|
||||
assert_eq!(6, start_of_previous_lines(eol, text, 10, 3));
|
||||
assert_eq!(4, start_of_previous_lines(eol, text, 10, 4));
|
||||
assert_eq!(2, start_of_previous_lines(eol, text, 10, 5));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 10, 6));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 10, 7));
|
||||
|
||||
assert_eq!(8, start_of_previous_lines(text, 9, 1));
|
||||
assert_eq!(8, start_of_previous_lines(text, 8, 1));
|
||||
assert_eq!(8, start_of_previous_lines(eol, text, 9, 1));
|
||||
assert_eq!(8, start_of_previous_lines(eol, text, 8, 1));
|
||||
|
||||
assert_eq!(6, start_of_previous_lines(text, 7, 1));
|
||||
assert_eq!(6, start_of_previous_lines(text, 6, 1));
|
||||
assert_eq!(6, start_of_previous_lines(eol, text, 7, 1));
|
||||
assert_eq!(6, start_of_previous_lines(eol, text, 6, 1));
|
||||
|
||||
assert_eq!(4, start_of_previous_lines(text, 5, 1));
|
||||
assert_eq!(4, start_of_previous_lines(text, 4, 1));
|
||||
assert_eq!(4, start_of_previous_lines(eol, text, 5, 1));
|
||||
assert_eq!(4, start_of_previous_lines(eol, text, 4, 1));
|
||||
|
||||
assert_eq!(2, start_of_previous_lines(text, 3, 1));
|
||||
assert_eq!(2, start_of_previous_lines(text, 2, 1));
|
||||
assert_eq!(2, start_of_previous_lines(eol, text, 3, 1));
|
||||
assert_eq!(2, start_of_previous_lines(eol, text, 2, 1));
|
||||
|
||||
assert_eq!(0, start_of_previous_lines(text, 1, 1));
|
||||
assert_eq!(0, start_of_previous_lines(text, 0, 1));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 1, 1));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 0, 1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn previous_lines_empty() {
|
||||
let eol = b'\n';
|
||||
let text = &b"\n\n\nd\ne\nf\n"[..];
|
||||
assert_eq!(9, text.len());
|
||||
|
||||
assert_eq!(7, start_of_previous_lines(text, 9, 1));
|
||||
assert_eq!(5, start_of_previous_lines(text, 9, 2));
|
||||
assert_eq!(3, start_of_previous_lines(text, 9, 3));
|
||||
assert_eq!(2, start_of_previous_lines(text, 9, 4));
|
||||
assert_eq!(1, start_of_previous_lines(text, 9, 5));
|
||||
assert_eq!(0, start_of_previous_lines(text, 9, 6));
|
||||
assert_eq!(0, start_of_previous_lines(text, 9, 7));
|
||||
assert_eq!(7, start_of_previous_lines(eol, text, 9, 1));
|
||||
assert_eq!(5, start_of_previous_lines(eol, text, 9, 2));
|
||||
assert_eq!(3, start_of_previous_lines(eol, text, 9, 3));
|
||||
assert_eq!(2, start_of_previous_lines(eol, text, 9, 4));
|
||||
assert_eq!(1, start_of_previous_lines(eol, text, 9, 5));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 9, 6));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 9, 7));
|
||||
|
||||
let text = &b"a\n\n\nd\ne\nf\n"[..];
|
||||
assert_eq!(10, text.len());
|
||||
|
||||
assert_eq!(8, start_of_previous_lines(text, 10, 1));
|
||||
assert_eq!(6, start_of_previous_lines(text, 10, 2));
|
||||
assert_eq!(4, start_of_previous_lines(text, 10, 3));
|
||||
assert_eq!(3, start_of_previous_lines(text, 10, 4));
|
||||
assert_eq!(2, start_of_previous_lines(text, 10, 5));
|
||||
assert_eq!(0, start_of_previous_lines(text, 10, 6));
|
||||
assert_eq!(0, start_of_previous_lines(text, 10, 7));
|
||||
assert_eq!(8, start_of_previous_lines(eol, text, 10, 1));
|
||||
assert_eq!(6, start_of_previous_lines(eol, text, 10, 2));
|
||||
assert_eq!(4, start_of_previous_lines(eol, text, 10, 3));
|
||||
assert_eq!(3, start_of_previous_lines(eol, text, 10, 4));
|
||||
assert_eq!(2, start_of_previous_lines(eol, text, 10, 5));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 10, 6));
|
||||
assert_eq!(0, start_of_previous_lines(eol, text, 10, 7));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -776,6 +878,23 @@ fn main() {
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn binary() {
|
||||
let text = "Sherlock\n\x00Holmes\n";
|
||||
let (count, out) = search("Sherlock|Holmes", text, |s|s);
|
||||
assert_eq!(0, count);
|
||||
assert_eq!(out, "");
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn binary_text() {
|
||||
let text = "Sherlock\n\x00Holmes\n";
|
||||
let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true));
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:Holmes\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn line_numbers() {
|
||||
let (count, out) = search_smallcap(
|
||||
|
358
src/types.rs
Normal file
358
src/types.rs
Normal file
@ -0,0 +1,358 @@
|
||||
/*!
|
||||
The types module provides a way of associating glob patterns on file names to
|
||||
file types.
|
||||
*/
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::error::Error as StdError;
|
||||
use std::fmt;
|
||||
use std::path::Path;
|
||||
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern};
|
||||
|
||||
const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("asm", &["*.asm", "*.s", "*.S"]),
|
||||
("awk", &["*.awk"]),
|
||||
("c", &["*.c", "*.h", "*.H"]),
|
||||
("cbor", &["*.cbor"]),
|
||||
("clojure", &["*.clj", "*.cljs"]),
|
||||
("cmake", &["CMakeLists.txt"]),
|
||||
("coffeescript", &["*.coffee"]),
|
||||
("cpp", &[
|
||||
"*.C", "*.cc", "*.cpp", "*.cxx",
|
||||
"*.h", "*.H", "*.hh", "*.hpp",
|
||||
]),
|
||||
("csharp", &["*.cs"]),
|
||||
("css", &["*.css"]),
|
||||
("cython", &["*.pyx"]),
|
||||
("dart", &["*.dart"]),
|
||||
("d", &["*.d"]),
|
||||
("elisp", &["*.el"]),
|
||||
("erlang", &["*.erl", "*.hrl"]),
|
||||
("fortran", &[
|
||||
"*.f", "*.F", "*.f77", "*.F77", "*.pfo",
|
||||
"*.f90", "*.F90", "*.f95", "*.F95",
|
||||
]),
|
||||
("go", &["*.go"]),
|
||||
("groovy", &["*.groovy"]),
|
||||
("haskell", &["*.hs", "*.lhs"]),
|
||||
("html", &["*.htm", "*.html"]),
|
||||
("java", &["*.java"]),
|
||||
("js", &["*.js"]),
|
||||
("json", &["*.json"]),
|
||||
("jsonl", &["*.jsonl"]),
|
||||
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
||||
("lua", &["*.lua"]),
|
||||
("m4", &["*.ac", "*.m4"]),
|
||||
("make", &["gnumakefile", "Gnumakefile", "makefile", "Makefile", "*.mk"]),
|
||||
("markdown", &["*.md"]),
|
||||
("matlab", &["*.m"]),
|
||||
("mk", &["mkfile"]),
|
||||
("ml", &["*.ml"]),
|
||||
("objc", &["*.h", "*.m"]),
|
||||
("objcpp", &["*.h", "*.mm"]),
|
||||
("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
|
||||
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]),
|
||||
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
|
||||
("py", &["*.py"]),
|
||||
("rr", &["*.R"]),
|
||||
("rst", &["*.rst"]),
|
||||
("ruby", &["*.rb"]),
|
||||
("rust", &["*.rs"]),
|
||||
("scala", &["*.scala"]),
|
||||
("sh", &["*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh"]),
|
||||
("sql", &["*.sql"]),
|
||||
("tex", &["*.tex", "*.cls", "*.sty"]),
|
||||
("txt", &["*.txt"]),
|
||||
("toml", &["*.toml", "Cargo.lock"]),
|
||||
("vala", &["*.vala"]),
|
||||
("vimscript", &["*.vim"]),
|
||||
("xml", &["*.xml"]),
|
||||
("yacc", &["*.y"]),
|
||||
("yaml", &["*.yaml", "*.yml"]),
|
||||
];
|
||||
|
||||
/// Describes all the possible failure conditions for building a file type
|
||||
/// matcher.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// We tried to select (or negate) a file type that is not defined.
|
||||
UnrecognizedFileType(String),
|
||||
/// A user specified file type definition could not be parsed.
|
||||
InvalidDefinition,
|
||||
/// There was an error building the matcher (probably a bad glob).
|
||||
Gitignore(gitignore::Error),
|
||||
}
|
||||
|
||||
impl StdError for Error {
|
||||
fn description(&self) -> &str {
|
||||
match *self {
|
||||
Error::UnrecognizedFileType(_) => "unrecognized file type",
|
||||
Error::InvalidDefinition => "invalid definition",
|
||||
Error::Gitignore(ref err) => err.description(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Error::UnrecognizedFileType(ref ty) => {
|
||||
write!(f, "unrecognized file type: {}", ty)
|
||||
}
|
||||
Error::InvalidDefinition => {
|
||||
write!(f, "invalid definition (format is type:glob, e.g., \
|
||||
html:*.html)")
|
||||
}
|
||||
Error::Gitignore(ref err) => err.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<gitignore::Error> for Error {
|
||||
fn from(err: gitignore::Error) -> Error {
|
||||
Error::Gitignore(err)
|
||||
}
|
||||
}
|
||||
|
||||
/// A single file type definition.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FileTypeDef {
|
||||
name: String,
|
||||
pats: Vec<String>,
|
||||
}
|
||||
|
||||
impl FileTypeDef {
|
||||
/// Return the name of this file type.
|
||||
pub fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
|
||||
/// Return the glob patterns used to recognize this file type.
|
||||
pub fn patterns(&self) -> &[String] {
|
||||
&self.pats
|
||||
}
|
||||
}
|
||||
|
||||
/// Types is a file type matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Types {
|
||||
gi: Option<Gitignore>,
|
||||
has_selected: bool,
|
||||
unmatched_pat: Pattern,
|
||||
}
|
||||
|
||||
impl Types {
|
||||
/// Creates a new file type matcher from the given Gitignore matcher. If
|
||||
/// not Gitignore matcher is provided, then the file type matcher has no
|
||||
/// effect.
|
||||
///
|
||||
/// If has_selected is true, then at least one file type was selected.
|
||||
/// Therefore, any non-matches should be ignored.
|
||||
fn new(gi: Option<Gitignore>, has_selected: bool) -> Types {
|
||||
Types {
|
||||
gi: gi,
|
||||
has_selected: has_selected,
|
||||
unmatched_pat: Pattern {
|
||||
from: Path::new("<filetype>").to_path_buf(),
|
||||
original: "<none>".to_string(),
|
||||
pat: "<none>".to_string(),
|
||||
whitelist: false,
|
||||
only_dir: false,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a match for the given path against this file type matcher.
|
||||
///
|
||||
/// The path is considered whitelisted if it matches a selected file type.
|
||||
/// The path is considered ignored if it matched a negated file type.
|
||||
/// If at least one file type is selected and path doesn't match, then
|
||||
/// the path is also considered ignored.
|
||||
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
|
||||
// File types don't apply to directories.
|
||||
if is_dir {
|
||||
return Match::None;
|
||||
}
|
||||
let path = path.as_ref();
|
||||
self.gi.as_ref()
|
||||
.map(|gi| {
|
||||
let path = &*path.to_string_lossy();
|
||||
let mat = gi.matched_utf8(path, is_dir).invert();
|
||||
if self.has_selected && mat.is_none() {
|
||||
Match::Ignored(&self.unmatched_pat)
|
||||
} else {
|
||||
mat
|
||||
}
|
||||
})
|
||||
.unwrap_or(Match::None)
|
||||
}
|
||||
}
|
||||
|
||||
/// TypesBuilder builds a type matcher from a set of file type definitions and
|
||||
/// a set of file type selections.
|
||||
pub struct TypesBuilder {
|
||||
types: HashMap<String, Vec<String>>,
|
||||
select: Vec<String>,
|
||||
select_not: Vec<String>,
|
||||
}
|
||||
|
||||
impl TypesBuilder {
|
||||
/// Create a new builder for a file type matcher.
|
||||
pub fn new() -> TypesBuilder {
|
||||
TypesBuilder {
|
||||
types: HashMap::new(),
|
||||
select: vec![],
|
||||
select_not: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the current set of file type definitions *and* selections into
|
||||
/// a file type matcher.
|
||||
pub fn build(&self) -> Result<Types, Error> {
|
||||
if self.select.is_empty() && self.select_not.is_empty() {
|
||||
return Ok(Types::new(None, false));
|
||||
}
|
||||
let mut bgi = GitignoreBuilder::new("/");
|
||||
for name in &self.select {
|
||||
let globs = match self.types.get(name) {
|
||||
Some(globs) => globs,
|
||||
None => {
|
||||
return Err(Error::UnrecognizedFileType(name.to_string()));
|
||||
}
|
||||
};
|
||||
for glob in globs {
|
||||
try!(bgi.add("<filetype>", glob));
|
||||
}
|
||||
}
|
||||
for name in &self.select_not {
|
||||
let globs = match self.types.get(name) {
|
||||
Some(globs) => globs,
|
||||
None => {
|
||||
return Err(Error::UnrecognizedFileType(name.to_string()));
|
||||
}
|
||||
};
|
||||
for glob in globs {
|
||||
try!(bgi.add("<filetype>", &format!("!{}", glob)));
|
||||
}
|
||||
}
|
||||
Ok(Types::new(Some(try!(bgi.build())), !self.select.is_empty()))
|
||||
}
|
||||
|
||||
/// Return the set of current file type definitions.
|
||||
pub fn definitions(&self) -> Vec<FileTypeDef> {
|
||||
let mut defs = vec![];
|
||||
for (ref name, ref pats) in &self.types {
|
||||
let mut pats = pats.to_vec();
|
||||
pats.sort();
|
||||
defs.push(FileTypeDef {
|
||||
name: name.to_string(),
|
||||
pats: pats,
|
||||
});
|
||||
}
|
||||
defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
|
||||
defs
|
||||
}
|
||||
|
||||
/// Select the file type given by `name`.
|
||||
pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
self.select.push(name.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// Ignore the file type given by `name`.
|
||||
pub fn select_not(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
self.select_not.push(name.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// Clear any file type definitions for the type given.
|
||||
pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
self.types.remove(name);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a new file type definition. `name` can be arbitrary and `pat`
|
||||
/// should be a glob recognizing file paths belonging to the `name` type.
|
||||
pub fn add(&mut self, name: &str, pat: &str) -> &mut TypesBuilder {
|
||||
self.types.entry(name.to_string())
|
||||
.or_insert(vec![]).push(pat.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a new file type definition specified in string form. The format
|
||||
/// is `name:glob`. Names may not include a colon.
|
||||
pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
|
||||
let name: String = def.chars().take_while(|&c| c != ':').collect();
|
||||
let pat: String = def.chars().skip(name.chars().count() + 1).collect();
|
||||
if name.is_empty() || pat.is_empty() {
|
||||
return Err(Error::InvalidDefinition);
|
||||
}
|
||||
self.add(&name, &pat);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a set of default file type definitions.
|
||||
pub fn add_defaults(&mut self) -> &mut TypesBuilder {
|
||||
for &(name, exts) in TYPE_EXTENSIONS {
|
||||
for ext in exts {
|
||||
self.add(name, ext);
|
||||
}
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::TypesBuilder;
|
||||
|
||||
macro_rules! matched {
|
||||
($name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr) => {
|
||||
matched!($name, $types, $sel, $selnot, $path, true);
|
||||
};
|
||||
(not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr) => {
|
||||
matched!($name, $types, $sel, $selnot, $path, false);
|
||||
};
|
||||
($name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr, $matched:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut btypes = TypesBuilder::new();
|
||||
for tydef in $types {
|
||||
btypes.add_def(tydef).unwrap();
|
||||
}
|
||||
for sel in $sel {
|
||||
btypes.select(sel);
|
||||
}
|
||||
for selnot in $selnot {
|
||||
btypes.select_not(selnot);
|
||||
}
|
||||
let types = btypes.build().unwrap();
|
||||
let mat = types.matched($path, false);
|
||||
assert_eq!($matched, !mat.is_ignored());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn types() -> Vec<&'static str> {
|
||||
vec![
|
||||
"html:*.html",
|
||||
"html:*.htm",
|
||||
"rust:*.rs",
|
||||
"js:*.js",
|
||||
]
|
||||
}
|
||||
|
||||
matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
|
||||
matched!(match2, types(), vec!["html"], vec![], "index.html");
|
||||
matched!(match3, types(), vec!["html"], vec![], "index.htm");
|
||||
matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
|
||||
matched!(match5, types(), vec![], vec![], "index.html");
|
||||
matched!(match6, types(), vec![], vec!["rust"], "index.html");
|
||||
|
||||
matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
|
||||
matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user