1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-06-25 14:22:54 +02:00

Move all gitignore matching to separate crate.

This PR introduces a new sub-crate, `ignore`, which primarily provides a
fast recursive directory iterator that respects ignore files like
gitignore and other configurable filtering rules based on globs or even
file types.

This results in a substantial source of complexity moved out of ripgrep's
core and into a reusable component that others can now (hopefully)
benefit from.

While much of the ignore code carried over from ripgrep's core, a
substantial portion of it was rewritten with the following goals in
mind:

1. Reuse matchers built from gitignore files across directory iteration.
2. Design the matcher data structure to be amenable for parallelizing
   directory iteration. (Indeed, writing the parallel iterator is the
   next step.)

Fixes #9, #44, #45
This commit is contained in:
Andrew Gallant
2016-10-11 19:57:09 -04:00
parent 12b2b1f624
commit d79add341b
30 changed files with 3765 additions and 1760 deletions

View File

@ -14,19 +14,17 @@ use term::Terminal;
use term;
#[cfg(windows)]
use term::WinConsole;
use walkdir::WalkDir;
use atty;
use gitignore::{Gitignore, GitignoreBuilder};
use ignore::Ignore;
use ignore::overrides::{Override, OverrideBuilder};
use ignore::types::{FileTypeDef, Types, TypesBuilder};
use ignore;
use out::{Out, ColoredTerminal};
use printer::Printer;
use search_buffer::BufferSearcher;
use search_stream::{InputBuffer, Searcher};
#[cfg(windows)]
use terminal_win::WindowsBuffer;
use types::{FileTypeDef, Types, TypesBuilder};
use walk;
use Result;
@ -131,6 +129,13 @@ Less common options:
Search hidden directories and files. (Hidden directories and files are
skipped by default.)
--ignore-file FILE ...
Specify additional ignore files for filtering file paths. Ignore files
should be in the gitignore format and are matched relative to the
current working directory. These ignore files have lower precedence
than all other ignore file types. When specifying multiple ignore
files, earlier files have lower precedence than later files.
-L, --follow
Follow symlinks.
@ -234,6 +239,7 @@ pub struct RawArgs {
flag_heading: bool,
flag_hidden: bool,
flag_ignore_case: bool,
flag_ignore_file: Vec<String>,
flag_invert_match: bool,
flag_line_number: bool,
flag_fixed_strings: bool,
@ -279,11 +285,12 @@ pub struct Args {
eol: u8,
files: bool,
follow: bool,
glob_overrides: Option<Gitignore>,
glob_overrides: Override,
grep: Grep,
heading: bool,
hidden: bool,
ignore_case: bool,
ignore_files: Vec<PathBuf>,
invert_match: bool,
line_number: bool,
line_per_match: bool,
@ -347,14 +354,13 @@ impl RawArgs {
}
let glob_overrides =
if self.flag_glob.is_empty() {
None
Override::empty()
} else {
let cwd = try!(env::current_dir());
let mut bgi = GitignoreBuilder::new(cwd);
let mut ovr = OverrideBuilder::new(try!(env::current_dir()));
for pat in &self.flag_glob {
try!(bgi.add("<argv>", pat));
try!(ovr.add(pat));
}
Some(try!(bgi.build()))
try!(ovr.build())
};
let threads =
if self.flag_threads == 0 {
@ -382,6 +388,9 @@ impl RawArgs {
let no_ignore = self.flag_no_ignore || self.flag_unrestricted >= 1;
let hidden = self.flag_hidden || self.flag_unrestricted >= 2;
let text = self.flag_text || self.flag_unrestricted >= 3;
let ignore_files: Vec<_> = self.flag_ignore_file.iter().map(|p| {
Path::new(p).to_path_buf()
}).collect();
let mut args = Args {
paths: paths,
after_context: after_context,
@ -399,6 +408,7 @@ impl RawArgs {
heading: !self.flag_no_heading && self.flag_heading,
hidden: hidden,
ignore_case: self.flag_ignore_case,
ignore_files: ignore_files,
invert_match: self.flag_invert_match,
line_number: !self.flag_no_line_number && self.flag_line_number,
line_per_match: self.flag_vimgrep,
@ -711,31 +721,30 @@ impl Args {
self.type_list
}
/// Create a new recursive directory iterator at the path given.
pub fn walker(&self, path: &Path) -> Result<walk::Iter> {
// Always follow symlinks for explicitly specified files.
let mut wd = WalkDir::new(path).follow_links(
self.follow || path.is_file());
if let Some(maxdepth) = self.maxdepth {
wd = wd.max_depth(maxdepth);
/// Create a new recursive directory iterator over the paths in argv.
pub fn walker(&self) -> Walk {
let paths = self.paths();
let mut wd = ignore::WalkBuilder::new(&paths[0]);
for path in &paths[1..] {
wd.add(path);
}
let mut ig = Ignore::new();
// Only register ignore rules if this is a directory. If it's a file,
// then it was explicitly given by the end user, so we always search
// it.
if path.is_dir() {
ig.ignore_hidden(!self.hidden);
ig.no_ignore(self.no_ignore);
ig.no_ignore_vcs(self.no_ignore_vcs);
ig.add_types(self.types.clone());
if !self.no_ignore_parent {
try!(ig.push_parents(path));
}
if let Some(ref overrides) = self.glob_overrides {
ig.add_override(overrides.clone());
for path in &self.ignore_files {
if let Some(err) = wd.add_ignore(path) {
eprintln!("{}", err);
}
}
Ok(walk::Iter::new(ig, wd))
wd.follow_links(self.follow);
wd.hidden(!self.hidden);
wd.max_depth(self.maxdepth);
wd.overrides(self.glob_overrides.clone());
wd.types(self.types.clone());
wd.git_global(!self.no_ignore && !self.no_ignore_vcs);
wd.git_ignore(!self.no_ignore && !self.no_ignore_vcs);
wd.git_exclude(!self.no_ignore && !self.no_ignore_vcs);
wd.ignore(!self.no_ignore);
wd.parents(!self.no_ignore_parent);
Walk(wd.build())
}
}
@ -752,6 +761,34 @@ fn version() -> String {
}
}
/// A simple wrapper around the ignore::Walk iterator. This will
/// automatically emit error messages to stderr and will skip directories.
pub struct Walk(ignore::Walk);
impl Iterator for Walk {
type Item = ignore::DirEntry;
fn next(&mut self) -> Option<ignore::DirEntry> {
while let Some(result) = self.0.next() {
match result {
Ok(dent) => {
if let Some(err) = dent.error() {
eprintln!("{}", err);
}
if dent.file_type().map_or(false, |x| x.is_dir()) {
continue;
}
return Some(dent);
}
Err(err) => {
eprintln!("{}", err);
}
}
}
None
}
}
/// A single state in the state machine used by `unescape`.
#[derive(Clone, Copy, Eq, PartialEq)]
enum State {
@ -761,7 +798,7 @@ enum State {
Literal,
}
/// Unescapes a string given on the command line. It supports a limit set of
/// Unescapes a string given on the command line. It supports a limited set of
/// escape sequences:
///
/// * \t, \r and \n are mapped to their corresponding ASCII bytes.

View File

@ -1,455 +0,0 @@
/*!
The gitignore module provides a way of reading a gitignore file and applying
it to a particular file name to determine whether it should be ignore or not.
The motivation for this submodule is performance and portability:
1. There is a gitignore crate on crates.io, but it uses the standard `glob`
crate and checks patterns one-by-one. This is a reasonable implementation,
but not suitable for the performance we need here.
2. We could shell out to a `git` sub-command like ls-files or status, but it
seems better to not rely on the existence of external programs for a search
tool. Besides, we need to implement this logic anyway to support things like
an .ignore file.
The key implementation detail here is that a single gitignore file is compiled
into a single RegexSet, which can be used to report which globs match a
particular file name. We can then do a quick post-processing step to implement
additional rules such as whitelists (prefix of `!`) or directory-only globs
(suffix of `/`).
*/
// TODO(burntsushi): Implement something similar, but for Mercurial. We can't
// use this exact implementation because hgignore files are different.
use std::cell::RefCell;
use std::error::Error as StdError;
use std::fmt;
use std::fs::File;
use std::io::{self, BufRead};
use std::path::{Path, PathBuf};
use std::sync::Arc;
use globset::{self, Candidate, GlobBuilder, GlobSet, GlobSetBuilder};
use regex;
use thread_local::ThreadLocal;
use pathutil::{is_file_name, strip_prefix};
/// Represents an error that can occur when parsing a gitignore file.
#[derive(Debug)]
pub enum Error {
Glob(globset::Error),
Regex(regex::Error),
Io(io::Error),
}
impl StdError for Error {
fn description(&self) -> &str {
match *self {
Error::Glob(ref err) => err.description(),
Error::Regex(ref err) => err.description(),
Error::Io(ref err) => err.description(),
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::Glob(ref err) => err.fmt(f),
Error::Regex(ref err) => err.fmt(f),
Error::Io(ref err) => err.fmt(f),
}
}
}
impl From<globset::Error> for Error {
fn from(err: globset::Error) -> Error {
Error::Glob(err)
}
}
impl From<regex::Error> for Error {
fn from(err: regex::Error) -> Error {
Error::Regex(err)
}
}
impl From<io::Error> for Error {
fn from(err: io::Error) -> Error {
Error::Io(err)
}
}
/// Gitignore is a matcher for the glob patterns in a single gitignore file.
#[derive(Clone, Debug)]
pub struct Gitignore {
set: GlobSet,
root: PathBuf,
patterns: Vec<Pattern>,
num_ignores: u64,
num_whitelist: u64,
matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
}
impl Gitignore {
/// Create a new gitignore glob matcher from the given root directory and
/// string containing the contents of a gitignore file.
#[allow(dead_code)]
fn from_str<P: AsRef<Path>>(
root: P,
gitignore: &str,
) -> Result<Gitignore, Error> {
let mut builder = GitignoreBuilder::new(root);
try!(builder.add_str(gitignore));
builder.build()
}
/// Returns true if and only if the given file path should be ignored
/// according to the globs in this gitignore. `is_dir` should be true if
/// the path refers to a directory and false otherwise.
///
/// Before matching path, its prefix (as determined by a common suffix
/// of the directory containing this gitignore) is stripped. If there is
/// no common suffix/prefix overlap, then path is assumed to reside in the
/// same directory as this gitignore file.
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
let mut path = path.as_ref();
if let Some(p) = strip_prefix("./", path) {
path = p;
}
// Strip any common prefix between the candidate path and the root
// of the gitignore, to make sure we get relative matching right.
// BUT, a file name might not have any directory components to it,
// in which case, we don't want to accidentally strip any part of the
// file name.
if !is_file_name(path) {
if let Some(p) = strip_prefix(&self.root, path) {
path = p;
}
}
if let Some(p) = strip_prefix("/", path) {
path = p;
}
self.matched_stripped(path, is_dir)
}
/// Like matched, but takes a path that has already been stripped.
pub fn matched_stripped(&self, path: &Path, is_dir: bool) -> Match {
let _matches = self.matches.get_default();
let mut matches = _matches.borrow_mut();
let candidate = Candidate::new(path);
self.set.matches_candidate_into(&candidate, &mut *matches);
for &i in matches.iter().rev() {
let pat = &self.patterns[i];
if !pat.only_dir || is_dir {
return if pat.whitelist {
Match::Whitelist(pat)
} else {
Match::Ignored(pat)
};
}
}
Match::None
}
/// Returns the total number of ignore patterns.
pub fn num_ignores(&self) -> u64 {
self.num_ignores
}
}
/// The result of a glob match.
///
/// The lifetime `'a` refers to the lifetime of the pattern that resulted in
/// a match (whether ignored or whitelisted).
#[derive(Clone, Debug)]
pub enum Match<'a> {
/// The path didn't match any glob in the gitignore file.
None,
/// The last glob matched indicates the path should be ignored.
Ignored(&'a Pattern),
/// The last glob matched indicates the path should be whitelisted.
Whitelist(&'a Pattern),
}
impl<'a> Match<'a> {
/// Returns true if the match result implies the path should be ignored.
#[allow(dead_code)]
pub fn is_ignored(&self) -> bool {
match *self {
Match::Ignored(_) => true,
Match::None | Match::Whitelist(_) => false,
}
}
/// Returns true if the match result didn't match any globs.
pub fn is_none(&self) -> bool {
match *self {
Match::None => true,
Match::Ignored(_) | Match::Whitelist(_) => false,
}
}
/// Inverts the match so that Ignored becomes Whitelisted and Whitelisted
/// becomes Ignored. A non-match remains the same.
pub fn invert(self) -> Match<'a> {
match self {
Match::None => Match::None,
Match::Ignored(pat) => Match::Whitelist(pat),
Match::Whitelist(pat) => Match::Ignored(pat),
}
}
}
/// GitignoreBuilder constructs a matcher for a single set of globs from a
/// .gitignore file.
pub struct GitignoreBuilder {
builder: GlobSetBuilder,
root: PathBuf,
patterns: Vec<Pattern>,
}
/// Pattern represents a single pattern in a gitignore file. It doesn't
/// know how to do glob matching directly, but it does store additional
/// options on a pattern, such as whether it's whitelisted.
#[derive(Clone, Debug)]
pub struct Pattern {
/// The file path that this pattern was extracted from (may be empty).
pub from: PathBuf,
/// The original glob pattern string.
pub original: String,
/// The actual glob pattern string used to convert to a regex.
pub pat: String,
/// Whether this is a whitelisted pattern or not.
pub whitelist: bool,
/// Whether this pattern should only match directories or not.
pub only_dir: bool,
}
impl GitignoreBuilder {
/// Create a new builder for a gitignore file.
///
/// The path given should be the path at which the globs for this gitignore
/// file should be matched.
pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
let root = strip_prefix("./", root.as_ref()).unwrap_or(root.as_ref());
GitignoreBuilder {
builder: GlobSetBuilder::new(),
root: root.to_path_buf(),
patterns: vec![],
}
}
/// Builds a new matcher from the glob patterns added so far.
///
/// Once a matcher is built, no new glob patterns can be added to it.
pub fn build(self) -> Result<Gitignore, Error> {
let nignores = self.patterns.iter().filter(|p| !p.whitelist).count();
let nwhitelist = self.patterns.iter().filter(|p| p.whitelist).count();
Ok(Gitignore {
set: try!(self.builder.build()),
root: self.root,
patterns: self.patterns,
num_ignores: nignores as u64,
num_whitelist: nwhitelist as u64,
matches: Arc::new(ThreadLocal::default()),
})
}
/// Add each pattern line from the file path given.
pub fn add_path<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
let rdr = io::BufReader::new(try!(File::open(&path)));
debug!("gitignore: {}", path.as_ref().display());
for (i, line) in rdr.lines().enumerate() {
let line = match line {
Ok(line) => line,
Err(err) => {
debug!("error reading line {} in {}: {}",
i, path.as_ref().display(), err);
continue;
}
};
if let Err(err) = self.add(&path, &line) {
debug!("error adding gitignore pattern: '{}': {}", line, err);
}
}
Ok(())
}
/// Add each pattern line from the string given.
pub fn add_str(&mut self, gitignore: &str) -> Result<(), Error> {
for line in gitignore.lines() {
try!(self.add("", line));
}
Ok(())
}
/// Add a line from a gitignore file to this builder.
///
/// If the line could not be parsed as a glob, then an error is returned.
pub fn add<P: AsRef<Path>>(
&mut self,
from: P,
mut line: &str,
) -> Result<(), Error> {
if line.starts_with("#") {
return Ok(());
}
if !line.ends_with("\\ ") {
line = line.trim_right();
}
if line.is_empty() {
return Ok(());
}
let mut pat = Pattern {
from: from.as_ref().to_path_buf(),
original: line.to_string(),
pat: String::new(),
whitelist: false,
only_dir: false,
};
let mut literal_separator = false;
let has_slash = line.chars().any(|c| c == '/');
let is_absolute = line.chars().nth(0).unwrap() == '/';
if line.starts_with("\\!") || line.starts_with("\\#") {
line = &line[1..];
} else {
if line.starts_with("!") {
pat.whitelist = true;
line = &line[1..];
}
if line.starts_with("/") {
// `man gitignore` says that if a glob starts with a slash,
// then the glob can only match the beginning of a path
// (relative to the location of gitignore). We achieve this by
// simply banning wildcards from matching /.
literal_separator = true;
line = &line[1..];
}
}
// If it ends with a slash, then this should only match directories,
// but the slash should otherwise not be used while globbing.
if let Some((i, c)) = line.char_indices().rev().nth(0) {
if c == '/' {
pat.only_dir = true;
line = &line[..i];
}
}
// If there is a literal slash, then we note that so that globbing
// doesn't let wildcards match slashes.
pat.pat = line.to_string();
if has_slash {
literal_separator = true;
}
// If there was a leading slash, then this is a pattern that must
// match the entire path name. Otherwise, we should let it match
// anywhere, so use a **/ prefix.
if !is_absolute {
// ... but only if we don't already have a **/ prefix.
if !pat.pat.starts_with("**/") {
pat.pat = format!("**/{}", pat.pat);
}
}
// If the pattern ends with `/**`, then we should only match everything
// inside a directory, but not the directory itself. Standard globs
// will match the directory. So we add `/*` to force the issue.
if pat.pat.ends_with("/**") {
pat.pat = format!("{}/*", pat.pat);
}
let parsed = try!(
GlobBuilder::new(&pat.pat)
.literal_separator(literal_separator)
.build());
self.builder.add(parsed);
self.patterns.push(pat);
Ok(())
}
}
#[cfg(test)]
mod tests {
use super::Gitignore;
macro_rules! ignored {
($name:ident, $root:expr, $gi:expr, $path:expr) => {
ignored!($name, $root, $gi, $path, false);
};
($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
#[test]
fn $name() {
let gi = Gitignore::from_str($root, $gi).unwrap();
assert!(gi.matched($path, $is_dir).is_ignored());
}
};
}
macro_rules! not_ignored {
($name:ident, $root:expr, $gi:expr, $path:expr) => {
not_ignored!($name, $root, $gi, $path, false);
};
($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
#[test]
fn $name() {
let gi = Gitignore::from_str($root, $gi).unwrap();
assert!(!gi.matched($path, $is_dir).is_ignored());
}
};
}
const ROOT: &'static str = "/home/foobar/rust/rg";
ignored!(ig1, ROOT, "months", "months");
ignored!(ig2, ROOT, "*.lock", "Cargo.lock");
ignored!(ig3, ROOT, "*.rs", "src/main.rs");
ignored!(ig4, ROOT, "src/*.rs", "src/main.rs");
ignored!(ig5, ROOT, "/*.c", "cat-file.c");
ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs");
ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs");
ignored!(ig8, ROOT, "foo/", "foo", true);
ignored!(ig9, ROOT, "**/foo", "foo");
ignored!(ig10, ROOT, "**/foo", "src/foo");
ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar");
ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz");
ignored!(ig13, ROOT, "**/foo/bar", "foo/bar");
ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar");
ignored!(ig15, ROOT, "abc/**", "abc/x");
ignored!(ig16, ROOT, "abc/**", "abc/x/y");
ignored!(ig17, ROOT, "abc/**", "abc/x/y/z");
ignored!(ig18, ROOT, "a/**/b", "a/b");
ignored!(ig19, ROOT, "a/**/b", "a/x/b");
ignored!(ig20, ROOT, "a/**/b", "a/x/y/b");
ignored!(ig21, ROOT, r"\!xy", "!xy");
ignored!(ig22, ROOT, r"\#foo", "#foo");
ignored!(ig23, ROOT, "foo", "./foo");
ignored!(ig24, ROOT, "target", "grep/target");
ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz");
ignored!(ig27, ROOT, "foo/", "xyz/foo", true);
ignored!(ig28, ROOT, "src/*.rs", "src/grep/src/main.rs");
ignored!(ig29, "./src", "/llvm/", "./src/llvm", true);
ignored!(ig30, ROOT, "node_modules/ ", "node_modules", true);
not_ignored!(ignot1, ROOT, "amonths", "months");
not_ignored!(ignot2, ROOT, "monthsa", "months");
not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs");
not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c");
not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs");
not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs");
not_ignored!(ignot7, ROOT, "foo/", "foo", false);
not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz");
not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz");
not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar");
not_ignored!(ignot11, ROOT, "#foo", "#foo");
not_ignored!(ignot12, ROOT, "\n\n\n", "foo");
not_ignored!(ignot13, ROOT, "foo/**", "foo", true);
not_ignored!(
ignot14, "./third_party/protobuf", "m4/ltoptions.m4",
"./third_party/protobuf/csharp/src/packages/repositories.config");
// See: https://github.com/BurntSushi/ripgrep/issues/106
#[test]
fn regression_106() {
Gitignore::from_str("/", " ").unwrap();
}
}

View File

@ -1,493 +0,0 @@
/*!
The ignore module is responsible for managing the state required to determine
whether a *single* file path should be searched or not.
In general, there are two ways to ignore a particular file:
1. Specify an ignore rule in some "global" configuration, such as a
$HOME/.ignore or on the command line.
2. A specific ignore file (like .gitignore) found during directory traversal.
The `IgnoreDir` type handles ignore patterns for any one particular directory
(including "global" ignore patterns), while the `Ignore` type handles a stack
of `IgnoreDir`s for use during directory traversal.
*/
use std::error::Error as StdError;
use std::ffi::OsString;
use std::fmt;
use std::io;
use std::path::{Path, PathBuf};
use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern};
use pathutil::{file_name, is_hidden, strip_prefix};
use types::Types;
const IGNORE_NAMES: &'static [&'static str] = &[
".gitignore",
".ignore",
".rgignore",
];
/// Represents an error that can occur when parsing a gitignore file.
#[derive(Debug)]
pub enum Error {
Gitignore(gitignore::Error),
Io {
path: PathBuf,
err: io::Error,
},
}
impl Error {
fn from_io<P: AsRef<Path>>(path: P, err: io::Error) -> Error {
Error::Io { path: path.as_ref().to_path_buf(), err: err }
}
}
impl StdError for Error {
fn description(&self) -> &str {
match *self {
Error::Gitignore(ref err) => err.description(),
Error::Io { ref err, .. } => err.description(),
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::Gitignore(ref err) => err.fmt(f),
Error::Io { ref path, ref err } => {
write!(f, "{}: {}", path.display(), err)
}
}
}
}
impl From<gitignore::Error> for Error {
fn from(err: gitignore::Error) -> Error {
Error::Gitignore(err)
}
}
/// Ignore represents a collection of ignore patterns organized by directory.
/// In particular, a stack is maintained, where the top of the stack
/// corresponds to the current directory being searched and the bottom of the
/// stack represents the root of a search. Ignore patterns at the top of the
/// stack take precedence over ignore patterns at the bottom of the stack.
pub struct Ignore {
/// A stack of ignore patterns at each directory level of traversal.
/// A directory that contributes no ignore patterns is `None`.
stack: Vec<IgnoreDir>,
/// A stack of parent directories above the root of the current search.
parent_stack: Vec<IgnoreDir>,
/// A set of override globs that are always checked first. A match (whether
/// it's whitelist or blacklist) trumps anything in stack.
overrides: Overrides,
/// A file type matcher.
types: Types,
/// Whether to ignore hidden files or not.
ignore_hidden: bool,
/// When true, don't look at .gitignore or .ignore files for ignore
/// rules.
no_ignore: bool,
/// When true, don't look at .gitignore files for ignore rules.
no_ignore_vcs: bool,
}
impl Ignore {
/// Create an empty set of ignore patterns.
pub fn new() -> Ignore {
Ignore {
stack: vec![],
parent_stack: vec![],
overrides: Overrides::new(None),
types: Types::empty(),
ignore_hidden: true,
no_ignore: false,
no_ignore_vcs: true,
}
}
/// Set whether hidden files/folders should be ignored (defaults to true).
pub fn ignore_hidden(&mut self, yes: bool) -> &mut Ignore {
self.ignore_hidden = yes;
self
}
/// When set, ignore files are ignored.
pub fn no_ignore(&mut self, yes: bool) -> &mut Ignore {
self.no_ignore = yes;
self
}
/// When set, VCS ignore files are ignored.
pub fn no_ignore_vcs(&mut self, yes: bool) -> &mut Ignore {
self.no_ignore_vcs = yes;
self
}
/// Add a set of globs that overrides all other match logic.
pub fn add_override(&mut self, gi: Gitignore) -> &mut Ignore {
self.overrides = Overrides::new(Some(gi));
self
}
/// Add a file type matcher. The file type matcher has the lowest
/// precedence.
pub fn add_types(&mut self, types: Types) -> &mut Ignore {
self.types = types;
self
}
/// Push parent directories of `path` on to the stack.
pub fn push_parents<P: AsRef<Path>>(
&mut self,
path: P,
) -> Result<(), Error> {
let path = try!(path.as_ref().canonicalize().map_err(|err| {
Error::from_io(path.as_ref(), err)
}));
let mut path = &*path;
let mut saw_git = path.join(".git").is_dir();
let mut ignore_names = IGNORE_NAMES.to_vec();
if self.no_ignore_vcs {
ignore_names.retain(|&name| name != ".gitignore");
}
let mut ignore_dir_results = vec![];
while let Some(parent) = path.parent() {
if self.no_ignore {
ignore_dir_results.push(Ok(IgnoreDir::empty(parent)));
} else {
if saw_git {
ignore_names.retain(|&name| name != ".gitignore");
} else {
saw_git = parent.join(".git").is_dir();
}
let ignore_dir_result =
IgnoreDir::with_ignore_names(parent, ignore_names.iter());
ignore_dir_results.push(ignore_dir_result);
}
path = parent;
}
for ignore_dir_result in ignore_dir_results.into_iter().rev() {
self.parent_stack.push(try!(ignore_dir_result));
}
Ok(())
}
/// Add a directory to the stack.
///
/// Note that even if this returns an error, the directory is added to the
/// stack (and therefore should be popped).
pub fn push<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
if self.no_ignore {
self.stack.push(IgnoreDir::empty(path));
Ok(())
} else if self.no_ignore_vcs {
self.push_ignore_dir(IgnoreDir::without_vcs(path))
} else {
self.push_ignore_dir(IgnoreDir::new(path))
}
}
/// Pushes the result of building a directory matcher on to the stack.
///
/// If the result given contains an error, then it is returned.
pub fn push_ignore_dir(
&mut self,
result: Result<IgnoreDir, Error>,
) -> Result<(), Error> {
match result {
Ok(id) => {
self.stack.push(id);
Ok(())
}
Err(err) => {
// Don't leave the stack in an inconsistent state.
self.stack.push(IgnoreDir::empty("error"));
Err(err)
}
}
}
/// Pop a directory from the stack.
///
/// This panics if the stack is empty.
pub fn pop(&mut self) {
self.stack.pop().expect("non-empty stack");
}
/// Returns true if and only if the given file path should be ignored.
pub fn ignored<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> bool {
let mut path = path.as_ref();
if let Some(p) = strip_prefix("./", path) {
path = p;
}
let mat = self.overrides.matched(path, is_dir);
if let Some(is_ignored) = self.ignore_match(path, mat) {
return is_ignored;
}
let mut whitelisted = false;
if !self.no_ignore {
for id in self.stack.iter().rev() {
let mat = id.matched(path, is_dir);
if let Some(is_ignored) = self.ignore_match(path, mat) {
if is_ignored {
return true;
}
// If this path is whitelisted by an ignore, then
// fallthrough and let the file type matcher have a say.
whitelisted = true;
break;
}
}
// If the file has been whitelisted, then we have to stop checking
// parent directories. The only thing that can override a whitelist
// at this point is a type filter.
if !whitelisted {
let mut path = path.to_path_buf();
for id in self.parent_stack.iter().rev() {
if let Some(ref dirname) = id.name {
path = Path::new(dirname).join(path);
}
let mat = id.matched(&*path, is_dir);
if let Some(is_ignored) = self.ignore_match(&*path, mat) {
if is_ignored {
return true;
}
// If this path is whitelisted by an ignore, then
// fallthrough and let the file type matcher have a
// say.
whitelisted = true;
break;
}
}
}
}
let mat = self.types.matched(path, is_dir);
if let Some(is_ignored) = self.ignore_match(path, mat) {
if is_ignored {
return true;
}
whitelisted = true;
}
if !whitelisted && self.ignore_hidden && is_hidden(&path) {
debug!("{} ignored because it is hidden", path.display());
return true;
}
false
}
/// Returns true if the given match says the given pattern should be
/// ignored or false if the given pattern should be explicitly whitelisted.
/// Returns None otherwise.
pub fn ignore_match<P: AsRef<Path>>(
&self,
path: P,
mat: Match,
) -> Option<bool> {
let path = path.as_ref();
match mat {
Match::Whitelist(ref pat) => {
debug!("{} whitelisted by {:?}", path.display(), pat);
Some(false)
}
Match::Ignored(ref pat) => {
debug!("{} ignored by {:?}", path.display(), pat);
Some(true)
}
Match::None => None,
}
}
}
/// IgnoreDir represents a set of ignore patterns retrieved from a single
/// directory.
#[derive(Debug)]
pub struct IgnoreDir {
/// The path to this directory as given.
path: PathBuf,
/// The directory name, if one exists.
name: Option<OsString>,
/// A single accumulation of glob patterns for this directory, matched
/// using gitignore semantics.
///
/// This will include patterns from rgignore as well. The patterns are
/// ordered so that precedence applies automatically (e.g., rgignore
/// patterns procede gitignore patterns).
gi: Option<Gitignore>,
// TODO(burntsushi): Matching other types of glob patterns that don't
// conform to gitignore will probably require refactoring this approach.
}
impl IgnoreDir {
/// Create a new matcher for the given directory.
pub fn new<P: AsRef<Path>>(path: P) -> Result<IgnoreDir, Error> {
IgnoreDir::with_ignore_names(path, IGNORE_NAMES.iter())
}
/// Create a new matcher for the given directory.
///
/// Don't respect VCS ignore files.
pub fn without_vcs<P: AsRef<Path>>(path: P) -> Result<IgnoreDir, Error> {
let names = IGNORE_NAMES.iter().filter(|name| **name != ".gitignore");
IgnoreDir::with_ignore_names(path, names)
}
/// Create a new IgnoreDir that never matches anything with the given path.
pub fn empty<P: AsRef<Path>>(path: P) -> IgnoreDir {
IgnoreDir {
path: path.as_ref().to_path_buf(),
name: file_name(path.as_ref()).map(|s| s.to_os_string()),
gi: None,
}
}
/// Create a new matcher for the given directory using only the ignore
/// patterns found in the file names given.
///
/// If no ignore glob patterns could be found in the directory then `None`
/// is returned.
///
/// Note that the order of the names given is meaningful. Names appearing
/// later in the list have precedence over names appearing earlier in the
/// list.
pub fn with_ignore_names<P: AsRef<Path>, S, I>(
path: P,
names: I,
) -> Result<IgnoreDir, Error>
where P: AsRef<Path>, S: AsRef<str>, I: Iterator<Item=S> {
let mut id = IgnoreDir::empty(path);
let mut ok = false;
let mut builder = GitignoreBuilder::new(&id.path);
// The ordering here is important. Later globs have higher precedence.
for name in names {
ok = builder.add_path(id.path.join(name.as_ref())).is_ok() || ok;
}
if !ok {
return Ok(id);
}
id.gi = Some(try!(builder.build()));
Ok(id)
}
/// Returns true if and only if the given file path should be ignored
/// according to the globs in this directory. `is_dir` should be true if
/// the path refers to a directory and false otherwise.
///
/// Before matching path, its prefix (as determined by a common suffix
/// of this directory) is stripped. If there is
/// no common suffix/prefix overlap, then path is assumed to reside
/// directly in this directory.
///
/// If the given path has a `./` prefix then it is stripped before
/// matching.
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
self.gi.as_ref()
.map(|gi| gi.matched(path, is_dir))
.unwrap_or(Match::None)
}
}
/// Manages a set of overrides provided explicitly by the end user.
struct Overrides {
gi: Option<Gitignore>,
unmatched_pat: Pattern,
}
impl Overrides {
/// Creates a new set of overrides from the gitignore matcher provided.
/// If no matcher is provided, then the resulting overrides have no effect.
fn new(gi: Option<Gitignore>) -> Overrides {
Overrides {
gi: gi,
unmatched_pat: Pattern {
from: Path::new("<argv>").to_path_buf(),
original: "<none>".to_string(),
pat: "<none>".to_string(),
whitelist: false,
only_dir: false,
},
}
}
/// Returns a match for the given path against this set of overrides.
///
/// If there are no overrides, then this always returns Match::None.
///
/// If there is at least one positive override, then this never returns
/// Match::None (and interpreting non-matches as ignored) unless is_dir
/// is true.
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
let path = path.as_ref();
self.gi.as_ref()
.map(|gi| {
let mat = gi.matched_stripped(path, is_dir).invert();
if mat.is_none() && !is_dir {
if gi.num_ignores() > 0 {
return Match::Ignored(&self.unmatched_pat);
}
}
mat
})
.unwrap_or(Match::None)
}
}
#[cfg(test)]
mod tests {
use std::path::Path;
use gitignore::GitignoreBuilder;
use super::IgnoreDir;
macro_rules! ignored_dir {
($name:ident, $root:expr, $gi:expr, $xi:expr, $path:expr) => {
#[test]
fn $name() {
let mut builder = GitignoreBuilder::new(&$root);
builder.add_str($gi).unwrap();
builder.add_str($xi).unwrap();
let gi = builder.build().unwrap();
let id = IgnoreDir {
path: Path::new($root).to_path_buf(),
name: Path::new($root).file_name().map(|s| {
s.to_os_string()
}),
gi: Some(gi),
};
assert!(id.matched($path, false).is_ignored());
}
};
}
macro_rules! not_ignored_dir {
($name:ident, $root:expr, $gi:expr, $xi:expr, $path:expr) => {
#[test]
fn $name() {
let mut builder = GitignoreBuilder::new(&$root);
builder.add_str($gi).unwrap();
builder.add_str($xi).unwrap();
let gi = builder.build().unwrap();
let id = IgnoreDir {
path: Path::new($root).to_path_buf(),
name: Path::new($root).file_name().map(|s| {
s.to_os_string()
}),
gi: Some(gi),
};
assert!(!id.matched($path, false).is_ignored());
}
};
}
const ROOT: &'static str = "/home/foobar/rust/rg";
ignored_dir!(id1, ROOT, "src/main.rs", "", "src/main.rs");
ignored_dir!(id2, ROOT, "", "src/main.rs", "src/main.rs");
ignored_dir!(id3, ROOT, "!src/main.rs", "*.rs", "src/main.rs");
not_ignored_dir!(idnot1, ROOT, "*.rs", "!src/main.rs", "src/main.rs");
}

View File

@ -1,8 +1,8 @@
extern crate deque;
extern crate docopt;
extern crate env_logger;
extern crate globset;
extern crate grep;
extern crate ignore;
#[cfg(windows)]
extern crate kernel32;
#[macro_use]
@ -16,8 +16,6 @@ extern crate num_cpus;
extern crate regex;
extern crate rustc_serialize;
extern crate term;
extern crate thread_local;
extern crate walkdir;
#[cfg(windows)]
extern crate winapi;
@ -36,7 +34,7 @@ use deque::{Stealer, Stolen};
use grep::Grep;
use memmap::{Mmap, Protection};
use term::Terminal;
use walkdir::DirEntry;
use ignore::DirEntry;
use args::Args;
use out::{ColoredTerminal, Out};
@ -61,8 +59,6 @@ macro_rules! eprintln {
mod args;
mod atty;
mod gitignore;
mod ignore;
mod out;
mod pathutil;
mod printer;
@ -70,8 +66,6 @@ mod search_buffer;
mod search_stream;
#[cfg(windows)]
mod terminal_win;
mod types;
mod walk;
pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;
@ -101,7 +95,6 @@ fn run(args: Args) -> Result<u64> {
if threads == 1 || isone {
return run_one_thread(args.clone());
}
let out = Arc::new(Mutex::new(args.out()));
let quiet_matched = QuietMatched::new(args.quiet());
let mut workers = vec![];
@ -126,21 +119,15 @@ fn run(args: Args) -> Result<u64> {
workq
};
let mut paths_searched: u64 = 0;
for p in paths {
for dent in args.walker() {
if quiet_matched.has_match() {
break;
}
if p == Path::new("-") {
paths_searched += 1;
paths_searched += 1;
if dent.is_stdin() {
workq.push(Work::Stdin);
} else {
for ent in try!(args.walker(p)) {
if quiet_matched.has_match() {
break;
}
paths_searched += 1;
workq.push(Work::File(ent));
}
workq.push(Work::File(dent));
}
}
if !paths.is_empty() && paths_searched == 0 {
@ -165,47 +152,33 @@ fn run_one_thread(args: Arc<Args>) -> Result<u64> {
grep: args.grep(),
match_count: 0,
};
let paths = args.paths();
let mut term = args.stdout();
let mut paths_searched: u64 = 0;
for p in paths {
if args.quiet() && worker.match_count > 0 {
break;
}
if p == Path::new("-") {
paths_searched += 1;
let mut printer = args.printer(&mut term);
if worker.match_count > 0 {
if let Some(sep) = args.file_separator() {
printer = printer.file_separator(sep);
}
for dent in args.walker() {
let mut printer = args.printer(&mut term);
if worker.match_count > 0 {
if args.quiet() {
break;
}
if let Some(sep) = args.file_separator() {
printer = printer.file_separator(sep);
}
}
paths_searched += 1;
if dent.is_stdin() {
worker.do_work(&mut printer, WorkReady::Stdin);
} else {
for ent in try!(args.walker(p)) {
paths_searched += 1;
let mut printer = args.printer(&mut term);
if worker.match_count > 0 {
if args.quiet() {
break;
}
if let Some(sep) = args.file_separator() {
printer = printer.file_separator(sep);
}
let file = match File::open(dent.path()) {
Ok(file) => file,
Err(err) => {
eprintln!("{}: {}", dent.path().display(), err);
continue;
}
let file = match File::open(ent.path()) {
Ok(file) => file,
Err(err) => {
eprintln!("{}: {}", ent.path().display(), err);
continue;
}
};
worker.do_work(&mut printer, WorkReady::DirFile(ent, file));
}
};
worker.do_work(&mut printer, WorkReady::DirFile(dent, file));
}
}
if !paths.is_empty() && paths_searched == 0 {
if !args.paths().is_empty() && paths_searched == 0 {
eprintln!("No files were searched, which means ripgrep probably \
applied a filter you didn't expect. \
Try running again with --debug.");
@ -217,16 +190,9 @@ fn run_files(args: Arc<Args>) -> Result<u64> {
let term = args.stdout();
let mut printer = args.printer(term);
let mut file_count = 0;
for p in args.paths() {
if p == Path::new("-") {
printer.path(&Path::new("<stdin>"));
file_count += 1;
} else {
for ent in try!(args.walker(p)) {
printer.path(ent.path());
file_count += 1;
}
}
for dent in args.walker() {
printer.path(dent.path());
file_count += 1;
}
Ok(file_count)
}

View File

@ -8,7 +8,6 @@ with the raw bytes directly.
On large repositories (like chromium), this can have a ~25% performance
improvement on just listing the files to search (!).
*/
use std::ffi::OsStr;
use std::path::Path;
/// Strip `prefix` from the `path` and return the remainder.
@ -19,6 +18,7 @@ pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
prefix: &'a P,
path: &'a Path,
) -> Option<&'a Path> {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
let prefix = prefix.as_ref().as_os_str().as_bytes();
@ -40,79 +40,3 @@ pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
) -> Option<&'a Path> {
path.strip_prefix(prefix).ok()
}
/// The final component of the path, if it is a normal file.
///
/// If the path terminates in ., .., or consists solely of a root of prefix,
/// file_name will return None.
#[cfg(unix)]
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
path: &'a P,
) -> Option<&'a OsStr> {
use std::os::unix::ffi::OsStrExt;
use memchr::memrchr;
let path = path.as_ref().as_os_str().as_bytes();
if path.is_empty() {
return None;
} else if path.len() == 1 && path[0] == b'.' {
return None;
} else if path.last() == Some(&b'.') {
return None;
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
return None;
}
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
Some(OsStr::from_bytes(&path[last_slash..]))
}
/// The final component of the path, if it is a normal file.
///
/// If the path terminates in ., .., or consists solely of a root of prefix,
/// file_name will return None.
#[cfg(not(unix))]
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
path: &'a P,
) -> Option<&'a OsStr> {
path.as_ref().file_name()
}
/// Returns true if and only if this file path is considered to be hidden.
#[cfg(unix)]
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
use std::os::unix::ffi::OsStrExt;
if let Some(name) = file_name(path.as_ref()) {
name.as_bytes().get(0) == Some(&b'.')
} else {
false
}
}
/// Returns true if and only if this file path is considered to be hidden.
#[cfg(not(unix))]
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
if let Some(name) = file_name(path.as_ref()) {
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
} else {
false
}
}
/// Returns true if this file path is just a file name. i.e., Its parent is
/// the empty string.
#[cfg(unix)]
pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool {
use std::os::unix::ffi::OsStrExt;
use memchr::memchr;
let path = path.as_ref().as_os_str().as_bytes();
memchr(b'/', path).is_none()
}
/// Returns true if this file path is just a file name. i.e., Its parent is
/// the empty string.
#[cfg(not(unix))]
pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool {
path.as_ref().parent().map(|p| p.as_os_str().is_empty()).unwrap_or(false)
}

View File

@ -5,7 +5,7 @@ use term::{Attr, Terminal};
use term::color;
use pathutil::strip_prefix;
use types::FileTypeDef;
use ignore::types::FileTypeDef;
/// Printer encapsulates all output logic for searching.
///
@ -168,11 +168,11 @@ impl<W: Terminal + Send> Printer<W> {
self.write(def.name().as_bytes());
self.write(b": ");
let mut first = true;
for pat in def.patterns() {
for glob in def.globs() {
if !first {
self.write(b", ");
}
self.write(pat.as_bytes());
self.write(glob.as_bytes());
first = false;
}
self.write_eol();

View File

View File

@ -1,458 +0,0 @@
/*!
The types module provides a way of associating glob patterns on file names to
file types.
*/
use std::collections::HashMap;
use std::error::Error as StdError;
use std::fmt;
use std::path::Path;
use regex;
use gitignore::{Match, Pattern};
use globset::{self, GlobBuilder, GlobSet, GlobSetBuilder};
const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
("asm", &["*.asm", "*.s", "*.S"]),
("awk", &["*.awk"]),
("c", &["*.c", "*.h", "*.H"]),
("cbor", &["*.cbor"]),
("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
("cmake", &["*.cmake", "CMakeLists.txt"]),
("coffeescript", &["*.coffee"]),
("config", &["*.config"]),
("cpp", &[
"*.C", "*.cc", "*.cpp", "*.cxx",
"*.h", "*.H", "*.hh", "*.hpp",
]),
("csharp", &["*.cs"]),
("css", &["*.css"]),
("cython", &["*.pyx"]),
("dart", &["*.dart"]),
("d", &["*.d"]),
("elisp", &["*.el"]),
("erlang", &["*.erl", "*.hrl"]),
("fortran", &[
"*.f", "*.F", "*.f77", "*.F77", "*.pfo",
"*.f90", "*.F90", "*.f95", "*.F95",
]),
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
("go", &["*.go"]),
("groovy", &["*.groovy", "*.gradle"]),
("hbs", &["*.hbs"]),
("haskell", &["*.hs", "*.lhs"]),
("html", &["*.htm", "*.html"]),
("java", &["*.java"]),
("jinja", &["*.jinja", "*.jinja2"]),
("js", &[
"*.js", "*.jsx", "*.vue",
]),
("json", &["*.json"]),
("jsonl", &["*.jsonl"]),
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
("lua", &["*.lua"]),
("m4", &["*.ac", "*.m4"]),
("make", &["gnumakefile", "Gnumakefile", "makefile", "Makefile", "*.mk"]),
("markdown", &["*.md"]),
("md", &["*.md"]),
("matlab", &["*.m"]),
("mk", &["mkfile"]),
("ml", &["*.ml"]),
("nim", &["*.nim"]),
("objc", &["*.h", "*.m"]),
("objcpp", &["*.h", "*.mm"]),
("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]),
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
("py", &["*.py", "*.pyx"]),
("readme", &["README*", "*README"]),
("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
("rst", &["*.rst"]),
("ruby", &["*.rb"]),
("rust", &["*.rs"]),
("scala", &["*.scala"]),
("sh", &["*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh"]),
("spark", &["*.spark"]),
("sql", &["*.sql"]),
("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
("swift", &["*.swift"]),
("tcl", &["*.tcl"]),
("tex", &["*.tex", "*.cls", "*.sty"]),
("ts", &["*.ts", "*.tsx"]),
("txt", &["*.txt"]),
("toml", &["*.toml", "Cargo.lock"]),
("vala", &["*.vala"]),
("vb", &["*.vb"]),
("vimscript", &["*.vim"]),
("xml", &["*.xml"]),
("yacc", &["*.y"]),
("yaml", &["*.yaml", "*.yml"]),
("zsh", &["*.zsh", ".zshenv", ".zlogin", ".zprofile", ".zshrc"]),
];
/// Describes all the possible failure conditions for building a file type
/// matcher.
#[derive(Debug)]
pub enum Error {
/// We tried to select (or negate) a file type that is not defined.
UnrecognizedFileType(String),
/// A user specified file type definition could not be parsed.
InvalidDefinition,
/// There was an error building the matcher (probably a bad glob).
Glob(globset::Error),
/// There was an error compiling a glob as a regex.
Regex(regex::Error),
}
impl StdError for Error {
fn description(&self) -> &str {
match *self {
Error::UnrecognizedFileType(_) => "unrecognized file type",
Error::InvalidDefinition => "invalid definition",
Error::Glob(ref err) => err.description(),
Error::Regex(ref err) => err.description(),
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::UnrecognizedFileType(ref ty) => {
write!(f, "unrecognized file type: {}", ty)
}
Error::InvalidDefinition => {
write!(f, "invalid definition (format is type:glob, e.g., \
html:*.html)")
}
Error::Glob(ref err) => err.fmt(f),
Error::Regex(ref err) => err.fmt(f),
}
}
}
impl From<globset::Error> for Error {
fn from(err: globset::Error) -> Error {
Error::Glob(err)
}
}
impl From<regex::Error> for Error {
fn from(err: regex::Error) -> Error {
Error::Regex(err)
}
}
/// A single file type definition.
#[derive(Clone, Debug)]
pub struct FileTypeDef {
name: String,
pats: Vec<String>,
}
impl FileTypeDef {
/// Return the name of this file type.
pub fn name(&self) -> &str {
&self.name
}
/// Return the glob patterns used to recognize this file type.
pub fn patterns(&self) -> &[String] {
&self.pats
}
}
/// Types is a file type matcher.
#[derive(Clone, Debug)]
pub struct Types {
defs: Vec<FileTypeDef>,
selected: Option<GlobSet>,
negated: Option<GlobSet>,
has_selected: bool,
unmatched_pat: Pattern,
}
impl Types {
/// Creates a new file type matcher from the given Gitignore matcher. If
/// not Gitignore matcher is provided, then the file type matcher has no
/// effect.
///
/// If has_selected is true, then at least one file type was selected.
/// Therefore, any non-matches should be ignored.
fn new(
selected: Option<GlobSet>,
negated: Option<GlobSet>,
has_selected: bool,
defs: Vec<FileTypeDef>,
) -> Types {
Types {
defs: defs,
selected: selected,
negated: negated,
has_selected: has_selected,
unmatched_pat: Pattern {
from: Path::new("<filetype>").to_path_buf(),
original: "<N/A>".to_string(),
pat: "<N/A>".to_string(),
whitelist: false,
only_dir: false,
},
}
}
/// Creates a new file type matcher that never matches.
pub fn empty() -> Types {
Types::new(None, None, false, vec![])
}
/// Returns a match for the given path against this file type matcher.
///
/// The path is considered whitelisted if it matches a selected file type.
/// The path is considered ignored if it matched a negated file type.
/// If at least one file type is selected and path doesn't match, then
/// the path is also considered ignored.
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
// If we don't have any matcher, then we can't do anything.
if self.negated.is_none() && self.selected.is_none() {
return Match::None;
}
// File types don't apply to directories.
if is_dir {
return Match::None;
}
let path = path.as_ref();
let name = match path.file_name() {
Some(name) => name.to_string_lossy(),
None if self.has_selected => {
return Match::Ignored(&self.unmatched_pat);
}
None => {
return Match::None;
}
};
if self.negated.as_ref().map(|s| s.is_match(&*name)).unwrap_or(false) {
return Match::Ignored(&self.unmatched_pat);
}
if self.selected.as_ref().map(|s|s.is_match(&*name)).unwrap_or(false) {
return Match::Whitelist(&self.unmatched_pat);
}
if self.has_selected {
Match::Ignored(&self.unmatched_pat)
} else {
Match::None
}
}
/// Return the set of current file type definitions.
pub fn definitions(&self) -> &[FileTypeDef] {
&self.defs
}
}
/// TypesBuilder builds a type matcher from a set of file type definitions and
/// a set of file type selections.
pub struct TypesBuilder {
types: HashMap<String, Vec<String>>,
selected: Vec<String>,
negated: Vec<String>,
}
impl TypesBuilder {
/// Create a new builder for a file type matcher.
pub fn new() -> TypesBuilder {
TypesBuilder {
types: HashMap::new(),
selected: vec![],
negated: vec![],
}
}
/// Build the current set of file type definitions *and* selections into
/// a file type matcher.
pub fn build(&self) -> Result<Types, Error> {
let selected_globs =
if self.selected.is_empty() {
None
} else {
let mut bset = GlobSetBuilder::new();
for name in &self.selected {
let globs = match self.types.get(name) {
Some(globs) => globs,
None => {
let msg = name.to_string();
return Err(Error::UnrecognizedFileType(msg));
}
};
for glob in globs {
let pat = try!(
GlobBuilder::new(glob)
.literal_separator(true).build());
bset.add(pat);
}
}
Some(try!(bset.build()))
};
let negated_globs =
if self.negated.is_empty() {
None
} else {
let mut bset = GlobSetBuilder::new();
for name in &self.negated {
let globs = match self.types.get(name) {
Some(globs) => globs,
None => {
let msg = name.to_string();
return Err(Error::UnrecognizedFileType(msg));
}
};
for glob in globs {
let pat = try!(
GlobBuilder::new(glob)
.literal_separator(true).build());
bset.add(pat);
}
}
Some(try!(bset.build()))
};
Ok(Types::new(
selected_globs,
negated_globs,
!self.selected.is_empty(),
self.definitions(),
))
}
/// Return the set of current file type definitions.
pub fn definitions(&self) -> Vec<FileTypeDef> {
let mut defs = vec![];
for (ref name, ref pats) in &self.types {
let mut pats = pats.to_vec();
pats.sort();
defs.push(FileTypeDef {
name: name.to_string(),
pats: pats,
});
}
defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
defs
}
/// Select the file type given by `name`.
///
/// If `name` is `all`, then all file types are selected.
pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
if name == "all" {
for name in self.types.keys() {
self.selected.push(name.to_string());
}
} else {
self.selected.push(name.to_string());
}
self
}
/// Ignore the file type given by `name`.
///
/// If `name` is `all`, then all file types are negated.
pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
if name == "all" {
for name in self.types.keys() {
self.negated.push(name.to_string());
}
} else {
self.negated.push(name.to_string());
}
self
}
/// Clear any file type definitions for the type given.
pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
self.types.remove(name);
self
}
/// Add a new file type definition. `name` can be arbitrary and `pat`
/// should be a glob recognizing file paths belonging to the `name` type.
pub fn add(&mut self, name: &str, pat: &str) -> &mut TypesBuilder {
self.types.entry(name.to_string())
.or_insert(vec![]).push(pat.to_string());
self
}
/// Add a new file type definition specified in string form. The format
/// is `name:glob`. Names may not include a colon.
pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
let name: String = def.chars().take_while(|&c| c != ':').collect();
let pat: String = def.chars().skip(name.chars().count() + 1).collect();
if name.is_empty() || pat.is_empty() {
return Err(Error::InvalidDefinition);
}
self.add(&name, &pat);
Ok(())
}
/// Add a set of default file type definitions.
pub fn add_defaults(&mut self) -> &mut TypesBuilder {
for &(name, exts) in TYPE_EXTENSIONS {
for ext in exts {
self.add(name, ext);
}
}
self
}
}
#[cfg(test)]
mod tests {
use super::TypesBuilder;
macro_rules! matched {
($name:ident, $types:expr, $sel:expr, $selnot:expr,
$path:expr) => {
matched!($name, $types, $sel, $selnot, $path, true);
};
(not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
$path:expr) => {
matched!($name, $types, $sel, $selnot, $path, false);
};
($name:ident, $types:expr, $sel:expr, $selnot:expr,
$path:expr, $matched:expr) => {
#[test]
fn $name() {
let mut btypes = TypesBuilder::new();
for tydef in $types {
btypes.add_def(tydef).unwrap();
}
for sel in $sel {
btypes.select(sel);
}
for selnot in $selnot {
btypes.negate(selnot);
}
let types = btypes.build().unwrap();
let mat = types.matched($path, false);
assert_eq!($matched, !mat.is_ignored());
}
};
}
fn types() -> Vec<&'static str> {
vec![
"html:*.html",
"html:*.htm",
"rust:*.rs",
"js:*.js",
]
}
matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
matched!(match2, types(), vec!["html"], vec![], "index.html");
matched!(match3, types(), vec!["html"], vec![], "index.htm");
matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
matched!(match5, types(), vec![], vec![], "index.html");
matched!(match6, types(), vec![], vec!["rust"], "index.html");
matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
}

View File

@ -1,140 +0,0 @@
/*!
The walk module implements a recursive directory iterator (using the `walkdir`)
crate that can efficiently skip and ignore files and directories specified in
a user's ignore patterns.
*/
use walkdir::{self, DirEntry, WalkDir, WalkDirIterator};
use ignore::Ignore;
/// Iter is a recursive directory iterator over file paths in a directory.
/// Only file paths should be searched are yielded.
pub struct Iter {
ig: Ignore,
it: WalkEventIter,
}
impl Iter {
/// Create a new recursive directory iterator using the ignore patterns
/// and walkdir iterator given.
pub fn new(ig: Ignore, wd: WalkDir) -> Iter {
Iter {
ig: ig,
it: WalkEventIter::from(wd),
}
}
/// Returns true if this entry should be skipped.
#[inline(always)]
fn skip_entry(&self, ent: &DirEntry) -> bool {
if ent.depth() == 0 {
// Never skip the root directory.
return false;
}
if self.ig.ignored(ent.path(), ent.file_type().is_dir()) {
return true;
}
false
}
}
impl Iterator for Iter {
type Item = DirEntry;
#[inline(always)]
fn next(&mut self) -> Option<DirEntry> {
while let Some(ev) = self.it.next() {
match ev {
Err(err) => {
eprintln!("{}", err);
}
Ok(WalkEvent::Exit) => {
self.ig.pop();
}
Ok(WalkEvent::Dir(ent)) => {
if self.skip_entry(&ent) {
self.it.it.skip_current_dir();
// Still need to push this on the stack because we'll
// get a WalkEvent::Exit event for this dir. We don't
// care if it errors though.
let _ = self.ig.push(ent.path());
continue;
}
if let Err(err) = self.ig.push(ent.path()) {
eprintln!("{}", err);
self.it.it.skip_current_dir();
continue;
}
}
Ok(WalkEvent::File(ent)) => {
if self.skip_entry(&ent) {
continue;
}
// If this isn't actually a file (e.g., a symlink), then
// skip it.
if !ent.file_type().is_file() {
continue;
}
return Some(ent);
}
}
}
None
}
}
/// WalkEventIter transforms a WalkDir iterator into an iterator that more
/// accurately describes the directory tree. Namely, it emits events that are
/// one of three types: directory, file or "exit." An "exit" event means that
/// the entire contents of a directory have been enumerated.
struct WalkEventIter {
depth: usize,
it: walkdir::Iter,
next: Option<Result<DirEntry, walkdir::Error>>,
}
#[derive(Debug)]
enum WalkEvent {
Dir(DirEntry),
File(DirEntry),
Exit,
}
impl From<WalkDir> for WalkEventIter {
fn from(it: WalkDir) -> WalkEventIter {
WalkEventIter { depth: 0, it: it.into_iter(), next: None }
}
}
impl Iterator for WalkEventIter {
type Item = walkdir::Result<WalkEvent>;
#[inline(always)]
fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
let dent = self.next.take().or_else(|| self.it.next());
let depth = match dent {
None => 0,
Some(Ok(ref dent)) => dent.depth(),
Some(Err(ref err)) => err.depth(),
};
if depth < self.depth {
self.depth -= 1;
self.next = dent;
return Some(Ok(WalkEvent::Exit));
}
self.depth = depth;
match dent {
None => None,
Some(Err(err)) => Some(Err(err)),
Some(Ok(dent)) => {
if dent.file_type().is_dir() {
self.depth += 1;
Some(Ok(WalkEvent::Dir(dent)))
} else {
Some(Ok(WalkEvent::File(dent)))
}
}
}
}
}