diff --git a/Cargo.lock b/Cargo.lock index ba88e2cb..b10e0602 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,20 +5,18 @@ dependencies = [ "deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", - "globset 0.1.0", "grep 0.1.3", + "ignore 0.1.0", "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", - "memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", + "memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)", "term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", - "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", - "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -44,7 +42,7 @@ version = "0.6.86" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)", "strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -55,7 +53,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -65,7 +63,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "fs2" -version = "0.2.5" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -82,7 +80,7 @@ dependencies = [ "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -91,9 +89,22 @@ version = "0.1.3" dependencies = [ "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", - "memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", + "memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "ignore" +version = "0.1.0" +dependencies = [ + "globset 0.1.0", + "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -130,10 +141,10 @@ dependencies = [ [[package]] name = "memmap" -version = "0.2.3" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)", + "fs2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", @@ -157,12 +168,12 @@ dependencies = [ [[package]] name = "regex" -version = "0.1.77" +version = "0.1.80" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)", "simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", @@ -170,7 +181,7 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.3.7" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -221,7 +232,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "walkdir" -version = "0.1.8" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -244,17 +255,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)" = "4a7ef30445607f6fc8720f0a0a2c7442284b629cf0d049286860fae23e71c4d9" "checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f" "checksum fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc484842f1e2884faf56f529f960cc12ad8c71ce96cc7abba0a067c98fee344" -"checksum fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "bcd414e5a1a979b931bb92f41b7a54106d3f6d2e6c253e9ce943b7cd468251ef" +"checksum fs2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "640001e1bd865c7c32806292822445af576a6866175b5225aa2087ca5e3de551" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" "checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f" "checksum libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "044d1360593a78f5c8e5e710beccdc24ab71d1f01bc19a29bcacdba22e8475d8" "checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054" "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" -"checksum memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f20f72ed93291a72e22e8b16bb18762183bb4943f0f483da5b8be1a9e8192752" +"checksum memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "065ce59af31c18ea2c419100bda6247dd4ec3099423202b12f0bd32e529fabd2" "checksum num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8890e6084723d57d0df8d2720b0d60c6ee67d6c93e7169630e4371e88765dcad" "checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5" -"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665" -"checksum regex-syntax 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "48f0573bcee95a48da786f8823465b5f2a1fae288a55407aca991e5b3e0eae11" +"checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f" +"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957" "checksum rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "6159e4e6e559c81bd706afe9c8fd68f547d3e851ce12e76b1de7914bab61691b" "checksum simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "63b5847c2d766ca7ce7227672850955802fabd779ba616aeabead4c2c3877023" "checksum strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "50c069df92e4b01425a8bf3576d5d417943a6a7272fbabaf5bd80b1aaa76442e" @@ -262,6 +273,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" "checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" "checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" -"checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780" +"checksum walkdir 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "98da26f00240118fbb7a06fa29579d1b39d34cd6e0505ea5c125b26d5260a967" "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" diff --git a/Cargo.toml b/Cargo.toml index e0480c54..60db7c4b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,19 +27,17 @@ path = "tests/tests.rs" deque = "0.3" docopt = "0.6" env_logger = "0.3" -globset = { version = "0.1.0", path = "globset" } grep = { version = "0.1.3", path = "grep" } +ignore = { version = "0.1.0", path = "ignore" } lazy_static = "0.2" libc = "0.2" log = "0.3" memchr = "0.1" -memmap = "0.2" +memmap = "0.5" num_cpus = "1" regex = "0.1.77" rustc-serialize = "0.3" term = "0.4" -thread_local = "0.2.7" -walkdir = "0.1" [target.'cfg(windows)'.dependencies] kernel32-sys = "0.2" diff --git a/appveyor.yml b/appveyor.yml index 266812db..645a525d 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -30,6 +30,7 @@ test_script: - cargo test --verbose - cargo test --verbose --manifest-path grep/Cargo.toml - cargo test --verbose --manifest-path globset/Cargo.toml + - cargo test --verbose --manifest-path ignore/Cargo.toml before_deploy: # Generate artifacts for release @@ -59,7 +60,8 @@ deploy: branches: only: - - appveyor - - /\d+\.\d+\.\d+/ - except: - master + # - appveyor + # - /\d+\.\d+\.\d+/ + # except: + # - master diff --git a/ci/script.sh b/ci/script.sh index eca6c0f6..bf0731a2 100644 --- a/ci/script.sh +++ b/ci/script.sh @@ -23,6 +23,8 @@ run_test_suite() { cargo test --target $TARGET --verbose --manifest-path grep/Cargo.toml cargo build --target $TARGET --verbose --manifest-path globset/Cargo.toml cargo test --target $TARGET --verbose --manifest-path globset/Cargo.toml + cargo build --target $TARGET --verbose --manifest-path ignore/Cargo.toml + cargo test --target $TARGET --verbose --manifest-path ignore/Cargo.toml # sanity check the file type file target/$TARGET/debug/rg diff --git a/globset/Cargo.toml b/globset/Cargo.toml index a885ea18..b302d9cd 100644 --- a/globset/Cargo.toml +++ b/globset/Cargo.toml @@ -28,3 +28,6 @@ regex = "0.1.77" [dev-dependencies] glob = "0.2" + +[features] +simd-accel = ["regex/simd-accel"] diff --git a/globset/benches/bench.rs b/globset/benches/bench.rs index a151645d..e142ed72 100644 --- a/globset/benches/bench.rs +++ b/globset/benches/bench.rs @@ -11,6 +11,9 @@ extern crate lazy_static; extern crate regex; extern crate test; +use std::ffi::OsStr; +use std::path::Path; + use globset::{Candidate, Glob, GlobMatcher, GlobSet, GlobSetBuilder}; const EXT: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt"; diff --git a/globset/src/lib.rs b/globset/src/lib.rs index 056118a3..b9a36d3a 100644 --- a/globset/src/lib.rs +++ b/globset/src/lib.rs @@ -226,10 +226,21 @@ type Fnv = hash::BuildHasherDefault; /// single pass. #[derive(Clone, Debug)] pub struct GlobSet { + len: usize, strats: Vec, } impl GlobSet { + /// Returns true if this set is empty, and therefore matches nothing. + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Returns the number of globs in this set. + pub fn len(&self) -> usize { + self.len + } + /// Returns true if any glob in this set matches the path given. pub fn is_match>(&self, path: P) -> bool { self.is_match_candidate(&Candidate::new(path.as_ref())) @@ -240,6 +251,9 @@ impl GlobSet { /// This takes a Candidate as input, which can be used to amortize the /// cost of preparing a path for matching. pub fn is_match_candidate(&self, path: &Candidate) -> bool { + if self.is_empty() { + return false; + } for strat in &self.strats { if strat.is_match(path) { return true; @@ -250,9 +264,6 @@ impl GlobSet { /// Returns the sequence number of every glob pattern that matches the /// given path. - /// - /// This takes a Candidate as input, which can be used to amortize the - /// cost of preparing a path for matching. pub fn matches>(&self, path: P) -> Vec { self.matches_candidate(&Candidate::new(path.as_ref())) } @@ -264,6 +275,9 @@ impl GlobSet { /// cost of preparing a path for matching. pub fn matches_candidate(&self, path: &Candidate) -> Vec { let mut into = vec![]; + if self.is_empty() { + return into; + } self.matches_candidate_into(path, &mut into); into } @@ -274,12 +288,32 @@ impl GlobSet { /// `into` is is cleared before matching begins, and contains the set of /// sequence numbers (in ascending order) after matching ends. If no globs /// were matched, then `into` will be empty. + pub fn matches_into>( + &self, + path: P, + into: &mut Vec, + ) { + self.matches_candidate_into(&Candidate::new(path.as_ref()), into); + } + + /// Adds the sequence number of every glob pattern that matches the given + /// path to the vec given. + /// + /// `into` is is cleared before matching begins, and contains the set of + /// sequence numbers (in ascending order) after matching ends. If no globs + /// were matched, then `into` will be empty. + /// + /// This takes a Candidate as input, which can be used to amortize the + /// cost of preparing a path for matching. pub fn matches_candidate_into( &self, path: &Candidate, into: &mut Vec, ) { into.clear(); + if self.is_empty() { + return; + } for strat in &self.strats { strat.matches_into(path, into); } @@ -288,6 +322,9 @@ impl GlobSet { } fn new(pats: &[Glob]) -> Result { + if pats.is_empty() { + return Ok(GlobSet { len: 0, strats: vec![] }); + } let mut lits = LiteralStrategy::new(); let mut base_lits = BasenameLiteralStrategy::new(); let mut exts = ExtensionStrategy::new(); @@ -330,6 +367,7 @@ impl GlobSet { prefixes.literals.len(), suffixes.literals.len(), required_exts.0.len(), regexes.literals.len()); Ok(GlobSet { + len: pats.len(), strats: vec![ GlobSetMatchStrategy::Extension(exts), GlobSetMatchStrategy::BasenameLiteral(base_lits), @@ -750,4 +788,11 @@ mod tests { assert_eq!(0, matches[0]); assert_eq!(2, matches[1]); } + + #[test] + fn empty_set_works() { + let set = GlobSetBuilder::new().build().unwrap(); + assert!(!set.is_match("")); + assert!(!set.is_match("a")); + } } diff --git a/globset/src/pathutil.rs b/globset/src/pathutil.rs index 15a3283b..16bd16fc 100644 --- a/globset/src/pathutil.rs +++ b/globset/src/pathutil.rs @@ -89,16 +89,14 @@ pub fn path_bytes(path: &Path) -> Cow<[u8]> { os_str_bytes(path.as_os_str()) } -/// Return the raw bytes of the given OS string, transcoded to UTF-8 if -/// necessary. +/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8. #[cfg(unix)] pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> { use std::os::unix::ffi::OsStrExt; Cow::Borrowed(s.as_bytes()) } -/// Return the raw bytes of the given OS string, transcoded to UTF-8 if -/// necessary. +/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8. #[cfg(not(unix))] pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> { // TODO(burntsushi): On Windows, OS strings are WTF-8, which is a superset diff --git a/grep/Cargo.toml b/grep/Cargo.toml index d14ba886..8637f16b 100644 --- a/grep/Cargo.toml +++ b/grep/Cargo.toml @@ -15,6 +15,6 @@ license = "Unlicense/MIT" [dependencies] log = "0.3" memchr = "0.1" -memmap = "0.2" +memmap = "0.5" regex = "0.1.77" regex-syntax = "0.3.5" diff --git a/ignore/Cargo.lock b/ignore/Cargo.lock new file mode 100644 index 00000000..7046ecdd --- /dev/null +++ b/ignore/Cargo.lock @@ -0,0 +1,170 @@ +[root] +name = "ignore" +version = "0.1.0" +dependencies = [ + "globset 0.1.0", + "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", + "tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "aho-corasick" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "fnv" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "globset" +version = "0.1.0" +dependencies = [ + "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", + "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "kernel32-sys" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "lazy_static" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "libc" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "log" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "memchr" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "rand" +version = "0.3.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex" +version = "0.1.77" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)", + "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)", + "regex-syntax 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", + "simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", + "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)", + "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "regex-syntax" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "simd" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "tempdir" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thread-id" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "thread_local" +version = "0.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "utf8-ranges" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "walkdir" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "winapi" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "winapi-build" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[metadata] +"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66" +"checksum fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc484842f1e2884faf56f529f960cc12ad8c71ce96cc7abba0a067c98fee344" +"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" +"checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f" +"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d" +"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054" +"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20" +"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5" +"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665" +"checksum regex-syntax 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "48f0573bcee95a48da786f8823465b5f2a1fae288a55407aca991e5b3e0eae11" +"checksum simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "63b5847c2d766ca7ce7227672850955802fabd779ba616aeabead4c2c3877023" +"checksum tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "87974a6f5c1dfb344d733055601650059a3363de2a6104819293baff662132d6" +"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03" +"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5" +"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f" +"checksum walkdir 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "98da26f00240118fbb7a06fa29579d1b39d34cd6e0505ea5c125b26d5260a967" +"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" +"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" diff --git a/ignore/Cargo.toml b/ignore/Cargo.toml new file mode 100644 index 00000000..520f9cf4 --- /dev/null +++ b/ignore/Cargo.toml @@ -0,0 +1,36 @@ +[package] +name = "ignore" +version = "0.1.0" #:version +authors = ["Andrew Gallant "] +description = """ +A fast library for efficiently matching ignore files such as `.gitignore` +against file paths. +""" +documentation = "https://docs.rs/ignore" +homepage = "https://github.com/BurntSushi/ripgrep/tree/master/ignore" +repository = "https://github.com/BurntSushi/ripgrep/tree/master/ignore" +readme = "README.md" +keywords = ["glob", "ignore", "gitignore", "pattern", "file"] +license = "Unlicense/MIT" + +[lib] +name = "ignore" +bench = false + +[dependencies] +globset = { version = "0.1.0", path = "../globset" } +lazy_static = "0.2" +log = "0.3" +memchr = "0.1" +regex = "0.1.77" +thread_local = "0.2.7" +walkdir = "1" + +[dev-dependencies] +tempdir = "0.3.5" + +[features] +simd-accel = ["globset/simd-accel"] + +[profile.release] +debug = true diff --git a/ignore/README.md b/ignore/README.md new file mode 100644 index 00000000..2d2907c8 --- /dev/null +++ b/ignore/README.md @@ -0,0 +1,66 @@ +ignore +====== +The ignore crate provides a fast recursive directory iterator that respects +various filters such as globs, file types and `.gitignore` files. This crate +also provides lower level direct access to gitignore and file type matchers. + +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.png)](https://travis-ci.org/BurntSushi/ripgrep) +[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) +[![](https://img.shields.io/crates/v/ignore.svg)](https://crates.io/crates/ignore) + +Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). + +### Documentation + +[https://docs.rs/ignore](https://docs.rs/ignore) + +### Usage + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +ignore = "0.1" +``` + +and this to your crate root: + +```rust +extern crate ignore; +``` + +### Example + +This example shows the most basic usage of this crate. This code will +recursively traverse the current directory while automatically filtering out +files and directories according to ignore globs found in files like +`.ignore` and `.gitignore`: + + +```rust,no_run +use ignore::Walk; + +for result in Walk::new("./") { + // Each item yielded by the iterator is either a directory entry or an + // error, so either print the path or the error. + match result { + Ok(entry) => println!("{}", entry.path().display()), + Err(err) => println!("ERROR: {}", err), + } +} +``` + +### Example: advanced + +By default, the recursive directory iterator will ignore hidden files and +directories. This can be disabled by building the iterator with `WalkBuilder`: + +```rust,no_run +use ignore::WalkBuilder; + +for result in WalkBuilder::new("./").hidden(false).build() { + println!("{:?}", result); +} +``` + +See the documentation for `WalkBuilder` for many other options. diff --git a/ignore/examples/walk.rs b/ignore/examples/walk.rs new file mode 100644 index 00000000..0ce0a086 --- /dev/null +++ b/ignore/examples/walk.rs @@ -0,0 +1,28 @@ +/* +extern crate ignore; +extern crate walkdir; + +use std::env; +use std::io::{self, Write}; +use std::os::unix::ffi::OsStrExt; + +use ignore::ignore::IgnoreBuilder; +use walkdir::WalkDir; + +fn main() { + let path = env::args().nth(1).unwrap(); + let ig = IgnoreBuilder::new().build(); + let wd = WalkDir::new(path); + let walker = ignore::walk::Iter::new(ig, wd); + + let mut stdout = io::BufWriter::new(io::stdout()); + // let mut count = 0; + for dirent in walker { + // count += 1; + stdout.write(dirent.path().as_os_str().as_bytes()).unwrap(); + stdout.write(b"\n").unwrap(); + } + // println!("{}", count); +} +*/ +fn main() {} diff --git a/ignore/src/dir.rs b/ignore/src/dir.rs new file mode 100644 index 00000000..6ac00627 --- /dev/null +++ b/ignore/src/dir.rs @@ -0,0 +1,803 @@ +// This module provides a data structure, `Ignore`, that connects "directory +// traversal" with "ignore matchers." Specifically, it knows about gitignore +// semantics and precedence, and is organized based on directory hierarchy. +// Namely, every matcher logically corresponds to ignore rules from a single +// directory, and points to the matcher for its corresponding parent directory. +// In this sense, `Ignore` is a *persistent* data structure. +// +// This design was specifically chosen to make it possible to use this data +// structure in a parallel directory iterator. +// +// My initial intention was to expose this module as part of this crate's +// public API, but I think the data structure's public API is too complicated +// with non-obvious failure modes. Alas, such things haven't been documented +// well. + +use std::collections::HashMap; +use std::ffi::OsString; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, RwLock}; + +use gitignore::{self, Gitignore, GitignoreBuilder}; +use pathutil::{is_hidden, strip_prefix}; +use overrides::{self, Override}; +use types::{self, Types}; +use {Error, Match, PartialErrorBuilder}; + +/// IgnoreMatch represents information about where a match came from when using +/// the `Ignore` matcher. +#[derive(Clone, Debug)] +pub struct IgnoreMatch<'a>(IgnoreMatchInner<'a>); + +/// IgnoreMatchInner describes precisely where the match information came from. +/// This is private to allow expansion to more matchers in the future. +#[derive(Clone, Debug)] +enum IgnoreMatchInner<'a> { + Override(overrides::Glob<'a>), + Gitignore(&'a gitignore::Glob), + Types(types::Glob<'a>), + Hidden, +} + +impl<'a> IgnoreMatch<'a> { + fn overrides(x: overrides::Glob<'a>) -> IgnoreMatch<'a> { + IgnoreMatch(IgnoreMatchInner::Override(x)) + } + + fn gitignore(x: &'a gitignore::Glob) -> IgnoreMatch<'a> { + IgnoreMatch(IgnoreMatchInner::Gitignore(x)) + } + + fn types(x: types::Glob<'a>) -> IgnoreMatch<'a> { + IgnoreMatch(IgnoreMatchInner::Types(x)) + } + + fn hidden() -> IgnoreMatch<'static> { + IgnoreMatch(IgnoreMatchInner::Hidden) + } +} + +/// Options for the ignore matcher, shared between the matcher itself and the +/// builder. +#[derive(Clone, Copy, Debug)] +struct IgnoreOptions { + /// Whether to ignore hidden file paths or not. + hidden: bool, + /// Whether to read .ignore files. + ignore: bool, + /// Whether to read git's global gitignore file. + git_global: bool, + /// Whether to read .gitignore files. + git_ignore: bool, + /// Whether to read .git/info/exclude files. + git_exclude: bool, +} + +impl IgnoreOptions { + /// Returns true if at least one type of ignore rules should be matched. + fn should_ignores(&self) -> bool { + self.ignore || self.git_global || self.git_ignore || self.git_exclude + } +} + +/// Ignore is a matcher useful for recursively walking one or more directories. +#[derive(Clone, Debug)] +pub struct Ignore(Arc); + +#[derive(Clone, Debug)] +struct IgnoreInner { + /// A map of all existing directories that have already been + /// compiled into matchers. + /// + /// Note that this is never used during matching, only when adding new + /// parent directory matchers. This avoids needing to rebuild glob sets for + /// parent directories if many paths are being searched. + compiled: Arc>>, + /// The path to the directory that this matcher was built from. + dir: PathBuf, + /// An override matcher (default is empty). + overrides: Arc, + /// A file type matcher. + types: Arc, + /// The parent directory to match next. + /// + /// If this is the root directory or there are otherwise no more + /// directories to match, then `parent` is `None`. + parent: Option, + /// Whether this is an absolute parent matcher, as added by add_parent. + is_absolute_parent: bool, + /// The absolute base path of this matcher. Populated only if parent + /// directories are added. + absolute_base: Option>, + /// Explicit ignore matchers specified by the caller. + explicit_ignores: Arc>, + /// The matcher for .ignore files. + ignore_matcher: Gitignore, + /// A global gitignore matcher, usually from $XDG_CONFIG_HOME/git/ignore. + git_global_matcher: Arc, + /// The matcher for .gitignore files. + git_ignore_matcher: Gitignore, + /// Special matcher for `.git/info/exclude` files. + git_exclude_matcher: Gitignore, + /// Whether this directory contains a .git sub-directory. + has_git: bool, + /// Ignore config. + opts: IgnoreOptions, +} + +impl Ignore { + /// Return the directory path of this matcher. + #[allow(dead_code)] + pub fn path(&self) -> &Path { + &self.0.dir + } + + /// Return true if this matcher has no parent. + pub fn is_root(&self) -> bool { + self.0.parent.is_none() + } + + /// Return this matcher's parent, if one exists. + pub fn parent(&self) -> Option { + self.0.parent.clone() + } + + /// Create a new `Ignore` matcher with the parent directories of `dir`. + /// + /// Note that this can only be called on an `Ignore` matcher with no + /// parents (i.e., `is_root` returns `true`). This will panic otherwise. + pub fn add_parents>( + &self, + path: P, + ) -> (Ignore, Option) { + if !self.is_root() { + panic!("Ignore::add_parents called on non-root matcher"); + } + let absolute_base = match path.as_ref().canonicalize() { + Ok(path) => Arc::new(path), + Err(_) => { + // There's not much we can do here, so just return our + // existing matcher. We drop the error to be consistent + // with our general pattern of ignoring I/O errors when + // processing ignore files. + return (self.clone(), None); + } + }; + // List of parents, from child to root. + let mut parents = vec![]; + let mut path = &**absolute_base; + while let Some(parent) = path.parent() { + parents.push(parent); + path = parent; + } + let mut errs = PartialErrorBuilder::default(); + let mut ig = self.clone(); + for parent in parents.into_iter().rev() { + let mut compiled = self.0.compiled.write().unwrap(); + if let Some(prebuilt) = compiled.get(parent.as_os_str()) { + ig = prebuilt.clone(); + continue; + } + let (mut igtmp, err) = ig.add_child_path(parent); + errs.maybe_push(err); + igtmp.is_absolute_parent = true; + igtmp.absolute_base = Some(absolute_base.clone()); + ig = Ignore(Arc::new(igtmp)); + compiled.insert(parent.as_os_str().to_os_string(), ig.clone()); + } + (ig, errs.into_error_option()) + } + + /// Create a new `Ignore` matcher for the given child directory. + /// + /// Since building the matcher may require reading from multiple + /// files, it's possible that this method partially succeeds. Therefore, + /// a matcher is always returned (which may match nothing) and an error is + /// returned if it exists. + /// + /// Note that all I/O errors are completely ignored. + pub fn add_child>( + &self, + dir: P, + ) -> (Ignore, Option) { + let (ig, err) = self.add_child_path(dir.as_ref()); + (Ignore(Arc::new(ig)), err) + } + + /// Like add_child, but takes a full path and returns an IgnoreInner. + fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option) { + static IG_NAMES: &'static [&'static str] = &[".rgignore", ".ignore"]; + + let mut errs = PartialErrorBuilder::default(); + let ig_matcher = + if !self.0.opts.ignore { + Gitignore::empty() + } else { + let (m, err) = create_gitignore(&dir, IG_NAMES); + errs.maybe_push(err); + m + }; + let gi_matcher = + if !self.0.opts.git_ignore { + Gitignore::empty() + } else { + let (m, err) = create_gitignore(&dir, &[".gitignore"]); + errs.maybe_push(err); + m + }; + let gi_exclude_matcher = + if !self.0.opts.git_exclude { + Gitignore::empty() + } else { + let (m, err) = create_gitignore(&dir, &[".git/info/exclude"]); + errs.maybe_push(err); + m + }; + let ig = IgnoreInner { + compiled: self.0.compiled.clone(), + dir: dir.to_path_buf(), + overrides: self.0.overrides.clone(), + types: self.0.types.clone(), + parent: Some(self.clone()), + is_absolute_parent: false, + absolute_base: self.0.absolute_base.clone(), + explicit_ignores: self.0.explicit_ignores.clone(), + ignore_matcher: ig_matcher, + git_global_matcher: self.0.git_global_matcher.clone(), + git_ignore_matcher: gi_matcher, + git_exclude_matcher: gi_exclude_matcher, + has_git: dir.join(".git").is_dir(), + opts: self.0.opts, + }; + (ig, errs.into_error_option()) + } + + /// Returns a match indicating whether the given file path should be + /// ignored or not. + /// + /// The match contains information about its origin. + pub fn matched<'a, P: AsRef>( + &'a self, + path: P, + is_dir: bool, + ) -> Match> { + // We need to be careful with our path. If it has a leading ./, then + // strip it because it causes nothing but trouble. + let mut path = path.as_ref(); + if let Some(p) = strip_prefix("./", path) { + path = p; + } + // Match against the override patterns. If an override matches + // regardless of whether it's whitelist/ignore, then we quit and + // return that result immediately. Overrides have the highest + // precedence. + if !self.0.overrides.is_empty() { + let mat = + self.0.overrides.matched(path, is_dir) + .map(IgnoreMatch::overrides); + if !mat.is_none() { + return mat; + } + } + let mut whitelisted = Match::None; + if self.0.opts.should_ignores() { + let mat = self.matched_ignore(path, is_dir); + if mat.is_ignore() { + return mat; + } else if mat.is_whitelist() { + whitelisted = mat; + } + } + if !self.0.types.is_empty() { + let mat = + self.0.types.matched(path, is_dir).map(IgnoreMatch::types); + if mat.is_ignore() { + return mat; + } else if mat.is_whitelist() { + whitelisted = mat; + } + } + if whitelisted.is_none() && self.0.opts.hidden && is_hidden(path) { + return Match::Ignore(IgnoreMatch::hidden()); + } + whitelisted + } + + /// Performs matching only on the ignore files for this directory and + /// all parent directories. + fn matched_ignore<'a>( + &'a self, + path: &Path, + is_dir: bool, + ) -> Match> { + let (mut m_ignore, mut m_gi, mut m_gi_exclude, mut m_explicit) = + (Match::None, Match::None, Match::None, Match::None); + let mut saw_git = false; + for ig in self.parents().take_while(|ig| !ig.0.is_absolute_parent) { + if m_ignore.is_none() { + m_ignore = + ig.0.ignore_matcher.matched(path, is_dir) + .map(IgnoreMatch::gitignore); + } + if !saw_git && m_gi.is_none() { + m_gi = + ig.0.git_ignore_matcher.matched(path, is_dir) + .map(IgnoreMatch::gitignore); + } + if !saw_git && m_gi_exclude.is_none() { + m_gi_exclude = + ig.0.git_exclude_matcher.matched(path, is_dir) + .map(IgnoreMatch::gitignore); + } + saw_git = saw_git || ig.0.has_git; + } + if let Some(abs_parent_path) = self.absolute_base() { + let path = abs_parent_path.join(path); + for ig in self.parents().skip_while(|ig|!ig.0.is_absolute_parent) { + if m_ignore.is_none() { + m_ignore = + ig.0.ignore_matcher.matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if !saw_git && m_gi.is_none() { + m_gi = + ig.0.git_ignore_matcher.matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + if !saw_git && m_gi_exclude.is_none() { + m_gi_exclude = + ig.0.git_exclude_matcher.matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + } + saw_git = saw_git || ig.0.has_git; + } + } + for gi in self.0.explicit_ignores.iter().rev() { + if !m_explicit.is_none() { + break; + } + m_explicit = gi.matched(&path, is_dir).map(IgnoreMatch::gitignore); + } + let m_global = self.0.git_global_matcher.matched(&path, is_dir) + .map(IgnoreMatch::gitignore); + if !m_ignore.is_none() { + m_ignore + } else if !m_gi.is_none() { + m_gi + } else if !m_gi_exclude.is_none() { + m_gi_exclude + } else if !m_global.is_none() { + m_global + } else if !m_explicit.is_none() { + m_explicit + } else { + Match::None + } + } + + /// Returns an iterator over parent ignore matchers, including this one. + fn parents(&self) -> Parents { + Parents(Some(self)) + } + + /// Returns the first absolute path of the first absolute parent, if + /// one exists. + fn absolute_base(&self) -> Option<&Path> { + self.0.absolute_base.as_ref().map(|p| &***p) + } +} + +struct Parents<'a>(Option<&'a Ignore>); + +impl<'a> Iterator for Parents<'a> { + type Item = &'a Ignore; + + fn next(&mut self) -> Option<&'a Ignore> { + match self.0.take() { + None => None, + Some(ig) => { + self.0 = ig.0.parent.as_ref(); + Some(ig) + } + } + } +} + +/// A builder for creating an Ignore matcher. +#[derive(Clone, Debug)] +pub struct IgnoreBuilder { + /// The root directory path for this ignore matcher. + dir: PathBuf, + /// An override matcher (default is empty). + overrides: Arc, + /// A type matcher (default is empty). + types: Arc, + /// Explicit ignore matchers. + explicit_ignores: Vec, + /// Ignore config. + opts: IgnoreOptions, +} + +impl IgnoreBuilder { + /// Create a new builder for an `Ignore` matcher. + /// + /// All relative file paths are resolved with respect to the current + /// working directory. + pub fn new() -> IgnoreBuilder { + IgnoreBuilder { + dir: Path::new("").to_path_buf(), + overrides: Arc::new(Override::empty()), + types: Arc::new(Types::empty()), + explicit_ignores: vec![], + opts: IgnoreOptions { + hidden: true, + ignore: true, + git_global: true, + git_ignore: true, + git_exclude: true, + }, + } + } + + /// Builds a new `Ignore` matcher. + /// + /// The matcher returned won't match anything until ignore rules from + /// directories are added to it. + pub fn build(&self) -> Ignore { + let git_global_matcher = + if !self.opts.git_global { + Gitignore::empty() + } else { + let (gi, err) = Gitignore::global(); + if let Some(err) = err { + debug!("{}", err); + } + gi + }; + Ignore(Arc::new(IgnoreInner { + compiled: Arc::new(RwLock::new(HashMap::new())), + dir: self.dir.clone(), + overrides: self.overrides.clone(), + types: self.types.clone(), + parent: None, + is_absolute_parent: true, + absolute_base: None, + explicit_ignores: Arc::new(self.explicit_ignores.clone()), + ignore_matcher: Gitignore::empty(), + git_global_matcher: Arc::new(git_global_matcher), + git_ignore_matcher: Gitignore::empty(), + git_exclude_matcher: Gitignore::empty(), + has_git: false, + opts: self.opts, + })) + } + + /// Add an override matcher. + /// + /// By default, no override matcher is used. + /// + /// This overrides any previous setting. + pub fn overrides(&mut self, overrides: Override) -> &mut IgnoreBuilder { + self.overrides = Arc::new(overrides); + self + } + + /// Add a file type matcher. + /// + /// By default, no file type matcher is used. + /// + /// This overrides any previous setting. + pub fn types(&mut self, types: Types) -> &mut IgnoreBuilder { + self.types = Arc::new(types); + self + } + + /// Adds a new global ignore matcher from the ignore file path given. + pub fn add_ignore(&mut self, ig: Gitignore) -> &mut IgnoreBuilder { + self.explicit_ignores.push(ig); + self + } + + /// Enables ignoring hidden files. + /// + /// This is enabled by default. + pub fn hidden(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.hidden = yes; + self + } + + /// Enables reading `.ignore` files. + /// + /// `.ignore` files have the same semantics as `gitignore` files and are + /// supported by search tools such as ripgrep and The Silver Searcher. + /// + /// This is enabled by default. + pub fn ignore(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.ignore = yes; + self + } + + /// Add a global gitignore matcher. + /// + /// Its precedence is lower than both normal `.gitignore` files and + /// `.git/info/exclude` files. + /// + /// This overwrites any previous global gitignore setting. + /// + /// This is enabled by default. + pub fn git_global(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.git_global = yes; + self + } + + /// Enables reading `.gitignore` files. + /// + /// `.gitignore` files have match semantics as described in the `gitignore` + /// man page. + /// + /// This is enabled by default. + pub fn git_ignore(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.git_ignore = yes; + self + } + + /// Enables reading `.git/info/exclude` files. + /// + /// `.git/info/exclude` files have match semantics as described in the + /// `gitignore` man page. + /// + /// This is enabled by default. + pub fn git_exclude(&mut self, yes: bool) -> &mut IgnoreBuilder { + self.opts.git_exclude = yes; + self + } +} + +/// Creates a new gitignore matcher for the directory given. +/// +/// Ignore globs are extracted from each of the file names in `dir` in the +/// order given (earlier names have lower precedence than later names). +/// +/// I/O errors are ignored. +pub fn create_gitignore( + dir: &Path, + names: &[&str], +) -> (Gitignore, Option) { + let mut builder = GitignoreBuilder::new(dir); + let mut errs = PartialErrorBuilder::default(); + for name in names { + let gipath = dir.join(name); + errs.maybe_push_ignore_io(builder.add(gipath)); + } + let gi = match builder.build() { + Ok(gi) => gi, + Err(err) => { + errs.push(err); + GitignoreBuilder::new(dir).build().unwrap() + } + }; + (gi, errs.into_error_option()) +} + +#[cfg(test)] +mod tests { + use std::fs::{self, File}; + use std::io::Write; + use std::path::Path; + + use tempdir::TempDir; + + use dir::IgnoreBuilder; + use gitignore::Gitignore; + use Error; + + fn wfile>(path: P, contents: &str) { + let mut file = File::create(path).unwrap(); + file.write_all(contents.as_bytes()).unwrap(); + } + + fn mkdirp>(path: P) { + fs::create_dir_all(path).unwrap(); + } + + fn partial(err: Error) -> Vec { + match err { + Error::Partial(errs) => errs, + _ => panic!("expected partial error but got {:?}", err), + } + } + + #[test] + fn explicit_ignore() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join("not-an-ignore"), "foo\n!bar"); + + let (gi, err) = Gitignore::new(td.path().join("not-an-ignore")); + assert!(err.is_none()); + let (ig, err) = IgnoreBuilder::new() + .add_ignore(gi).build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn git_exclude() { + let td = TempDir::new("ignore-test-").unwrap(); + mkdirp(td.path().join(".git/info")); + wfile(td.path().join(".git/info/exclude"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn gitignore() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".gitignore"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + #[test] + fn ignore() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".ignore"), "foo\n!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_ignore()); + assert!(ig.matched("bar", false).is_whitelist()); + assert!(ig.matched("baz", false).is_none()); + } + + // Tests that an .ignore will override a .gitignore. + #[test] + fn ignore_over_gitignore() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join(".ignore"), "!foo"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("foo", false).is_whitelist()); + } + + // Tests that exclude has lower precedent than both .ignore and .gitignore. + #[test] + fn exclude_lowest() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".gitignore"), "!foo"); + wfile(td.path().join(".ignore"), "!bar"); + mkdirp(td.path().join(".git/info")); + wfile(td.path().join(".git/info/exclude"), "foo\nbar\nbaz"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + assert!(ig.matched("baz", false).is_ignore()); + assert!(ig.matched("foo", false).is_whitelist()); + assert!(ig.matched("bar", false).is_whitelist()); + } + + #[test] + fn errored() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".gitignore"), "f**oo"); + + let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_some()); + } + + #[test] + fn errored_both() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".gitignore"), "f**oo"); + wfile(td.path().join(".ignore"), "fo**o"); + + let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert_eq!(2, partial(err.expect("an error")).len()); + } + + #[test] + fn errored_partial() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".gitignore"), "f**oo\nbar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_some()); + assert!(ig.matched("bar", false).is_ignore()); + } + + #[test] + fn errored_partial_and_ignore() { + let td = TempDir::new("ignore-test-").unwrap(); + wfile(td.path().join(".gitignore"), "f**oo\nbar"); + wfile(td.path().join(".ignore"), "!bar"); + + let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_some()); + assert!(ig.matched("bar", false).is_whitelist()); + } + + #[test] + fn not_present_empty() { + let td = TempDir::new("ignore-test-").unwrap(); + + let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); + assert!(err.is_none()); + } + + #[test] + fn stops_at_git_dir() { + // This tests that .gitignore files beyond a .git barrier aren't + // matched, but .ignore files are. + let td = TempDir::new("ignore-test-").unwrap(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("foo/.git")); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join(".ignore"), "bar"); + + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_child(td.path()); + assert!(err.is_none()); + let (ig2, err) = ig1.add_child(ig1.path().join("foo")); + assert!(err.is_none()); + + assert!(ig1.matched("foo", false).is_ignore()); + assert!(ig2.matched("foo", false).is_none()); + + assert!(ig1.matched("bar", false).is_ignore()); + assert!(ig2.matched("bar", false).is_ignore()); + } + + #[test] + fn absolute_parent() { + let td = TempDir::new("ignore-test-").unwrap(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("foo")); + wfile(td.path().join(".gitignore"), "bar"); + + // First, check that the parent gitignore file isn't detected if the + // parent isn't added. This establishes a baseline. + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_child(td.path().join("foo")); + assert!(err.is_none()); + assert!(ig1.matched("bar", false).is_none()); + + // Second, check that adding a parent directory actually works. + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_parents(td.path().join("foo")); + assert!(err.is_none()); + let (ig2, err) = ig1.add_child(td.path().join("foo")); + assert!(err.is_none()); + assert!(ig2.matched("bar", false).is_ignore()); + } + + #[test] + fn absolute_parent_anchored() { + let td = TempDir::new("ignore-test-").unwrap(); + mkdirp(td.path().join(".git")); + mkdirp(td.path().join("src/llvm")); + wfile(td.path().join(".gitignore"), "/llvm/\nfoo"); + + let ig0 = IgnoreBuilder::new().build(); + let (ig1, err) = ig0.add_parents(td.path().join("src")); + assert!(err.is_none()); + let (ig2, err) = ig1.add_child("src"); + assert!(err.is_none()); + + assert!(ig1.matched("llvm", true).is_none()); + assert!(ig2.matched("llvm", true).is_none()); + assert!(ig2.matched("src/llvm", true).is_none()); + assert!(ig2.matched("foo", false).is_ignore()); + assert!(ig2.matched("src/foo", false).is_ignore()); + } +} diff --git a/ignore/src/gitignore.rs b/ignore/src/gitignore.rs new file mode 100644 index 00000000..c44910ff --- /dev/null +++ b/ignore/src/gitignore.rs @@ -0,0 +1,607 @@ +/*! +The gitignore module provides a way to match globs from a gitignore file +against file paths. + +Note that this module implements the specification as described in the +`gitignore` man page from scratch. That is, this module does *not* shell out to +the `git` command line tool. +*/ + +use std::cell::RefCell; +use std::env; +use std::fs::File; +use std::io::{self, BufRead, Read}; +use std::path::{Path, PathBuf}; +use std::str; +use std::sync::Arc; + +use globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder}; +use regex::bytes::Regex; +use thread_local::ThreadLocal; + +use pathutil::{is_file_name, strip_prefix}; +use {Error, Match, PartialErrorBuilder}; + +/// Glob represents a single glob in a gitignore file. +/// +/// This is used to report information about the highest precedent glob that +/// matched in one or more gitignore files. +#[derive(Clone, Debug)] +pub struct Glob { + /// The file path that this glob was extracted from. + from: Option, + /// The original glob string. + original: String, + /// The actual glob string used to convert to a regex. + actual: String, + /// Whether this is a whitelisted glob or not. + is_whitelist: bool, + /// Whether this glob should only match directories or not. + is_only_dir: bool, +} + +impl Glob { + /// Returns the file path that defined this glob. + pub fn from(&self) -> Option<&Path> { + self.from.as_ref().map(|p| &**p) + } + + /// The original glob as it was defined in a gitignore file. + pub fn original(&self) -> &str { + &self.original + } + + /// The actual glob that was compiled to respect gitignore + /// semantics. + pub fn actual(&self) -> &str { + &self.actual + } + + /// Whether this was a whitelisted glob or not. + pub fn is_whitelist(&self) -> bool { + self.is_whitelist + } + + /// Whether this glob must match a directory or not. + pub fn is_only_dir(&self) -> bool { + self.is_only_dir + } +} + +/// Gitignore is a matcher for the globs in one or more gitignore files +/// in the same directory. +#[derive(Clone, Debug)] +pub struct Gitignore { + set: GlobSet, + root: PathBuf, + globs: Vec, + num_ignores: u64, + num_whitelists: u64, + matches: Arc>>>, +} + +impl Gitignore { + /// Creates a new gitignore matcher from the gitignore file path given. + /// + /// If it's desirable to include multiple gitignore files in a single + /// matcher, or read gitignore globs from a different source, then + /// use `GitignoreBuilder`. + /// + /// This always returns a valid matcher, even if it's empty. In particular, + /// a Gitignore file can be partially valid, e.g., when one glob is invalid + /// but the rest aren't. + /// + /// Note that I/O errors are ignored. For more granular control over + /// errors, use `GitignoreBuilder`. + pub fn new>( + gitignore_path: P, + ) -> (Gitignore, Option) { + let path = gitignore_path.as_ref(); + let parent = path.parent().unwrap_or(Path::new("/")); + let mut builder = GitignoreBuilder::new(parent); + let mut errs = PartialErrorBuilder::default(); + errs.maybe_push_ignore_io(builder.add(path)); + match builder.build() { + Ok(gi) => (gi, errs.into_error_option()), + Err(err) => { + errs.push(err); + (Gitignore::empty(), errs.into_error_option()) + } + } + } + + /// Creates a new gitignore matcher from the global ignore file, if one + /// exists. + /// + /// The global config file path is specified by git's `core.excludesFile` + /// config option. + /// + /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig` + /// does not exist or does not specify `core.excludesFile`, then + /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not + /// set or is empty, then `$HOME/.config/git/ignore` is used instead. + pub fn global() -> (Gitignore, Option) { + match gitconfig_excludes_path() { + None => (Gitignore::empty(), None), + Some(path) => { + if !path.is_file() { + (Gitignore::empty(), None) + } else { + Gitignore::new(path) + } + } + } + } + + /// Creates a new empty gitignore matcher that never matches anything. + /// + /// Its path is empty. + pub fn empty() -> Gitignore { + GitignoreBuilder::new("").build().unwrap() + } + + /// Returns the directory containing this gitignore matcher. + /// + /// All matches are done relative to this path. + pub fn path(&self) -> &Path { + &*self.root + } + + /// Returns true if and only if this gitignore has zero globs, and + /// therefore never matches any file path. + pub fn is_empty(&self) -> bool { + self.set.is_empty() + } + + /// Returns the total number of globs, which should be equivalent to + /// `num_ignores + num_whitelists`. + pub fn len(&self) -> usize { + self.set.len() + } + + /// Returns the total number of ignore globs. + pub fn num_ignores(&self) -> u64 { + self.num_ignores + } + + /// Returns the total number of whitelisted globs. + pub fn num_whitelists(&self) -> u64 { + self.num_whitelists + } + + /// Returns whether the given file path matched a pattern in this gitignore + /// matcher. + /// + /// `is_dir` should be true if the path refers to a directory and false + /// otherwise. + /// + /// The given path is matched relative to the path given when building + /// the matcher. Specifically, before matching `path`, its prefix (as + /// determined by a common suffix of the directory containing this + /// gitignore) is stripped. If there is no common suffix/prefix overlap, + /// then `path` is assumed to be relative to this matcher. + pub fn matched>( + &self, + path: P, + is_dir: bool, + ) -> Match<&Glob> { + if self.is_empty() { + return Match::None; + } + self.matched_stripped(self.strip(path.as_ref()), is_dir) + } + + /// Like matched, but takes a path that has already been stripped. + fn matched_stripped>( + &self, + path: P, + is_dir: bool, + ) -> Match<&Glob> { + if self.is_empty() { + return Match::None; + } + let path = path.as_ref(); + let _matches = self.matches.get_default(); + let mut matches = _matches.borrow_mut(); + let candidate = Candidate::new(path); + self.set.matches_candidate_into(&candidate, &mut *matches); + for &i in matches.iter().rev() { + let glob = &self.globs[i]; + if !glob.is_only_dir() || is_dir { + return if glob.is_whitelist() { + Match::Whitelist(glob) + } else { + Match::Ignore(glob) + }; + } + } + Match::None + } + + /// Strips the given path such that it's suitable for matching with this + /// gitignore matcher. + fn strip<'a, P: 'a + AsRef + ?Sized>( + &'a self, + path: &'a P, + ) -> &'a Path { + let mut path = path.as_ref(); + // A leading ./ is completely superfluous. We also strip it from + // our gitignore root path, so we need to strip it from our candidate + // path too. + if let Some(p) = strip_prefix("./", path) { + path = p; + } + // Strip any common prefix between the candidate path and the root + // of the gitignore, to make sure we get relative matching right. + // BUT, a file name might not have any directory components to it, + // in which case, we don't want to accidentally strip any part of the + // file name. + if !is_file_name(path) { + if let Some(p) = strip_prefix(&self.root, path) { + path = p; + // If we're left with a leading slash, get rid of it. + if let Some(p) = strip_prefix("/", path) { + path = p; + } + } + } + path + } +} + +/// Builds a matcher for a single set of globs from a .gitignore file. +pub struct GitignoreBuilder { + builder: GlobSetBuilder, + root: PathBuf, + globs: Vec, +} + +impl GitignoreBuilder { + /// Create a new builder for a gitignore file. + /// + /// The path given should be the path at which the globs for this gitignore + /// file should be matched. Note that paths are always matched relative + /// to the root path given here. Generally, the root path should correspond + /// to the *directory* containing a `.gitignore` file. + pub fn new>(root: P) -> GitignoreBuilder { + let root = root.as_ref(); + GitignoreBuilder { + builder: GlobSetBuilder::new(), + root: strip_prefix("./", root).unwrap_or(root).to_path_buf(), + globs: vec![], + } + } + + /// Builds a new matcher from the globs added so far. + /// + /// Once a matcher is built, no new globs can be added to it. + pub fn build(&self) -> Result { + let nignore = self.globs.iter().filter(|g| !g.is_whitelist()).count(); + let nwhite = self.globs.iter().filter(|g| g.is_whitelist()).count(); + let set = try!( + self.builder.build().map_err(|err| Error::Glob(err.to_string()))); + Ok(Gitignore { + set: set, + root: self.root.clone(), + globs: self.globs.clone(), + num_ignores: nignore as u64, + num_whitelists: nwhite as u64, + matches: Arc::new(ThreadLocal::default()), + }) + } + + /// Add each glob from the file path given. + /// + /// The file given should be formatted as a `gitignore` file. + /// + /// Note that partial errors can be returned. For example, if there was + /// a problem adding one glob, an error for that will be returned, but + /// all other valid globs will still be added. + pub fn add>(&mut self, path: P) -> Option { + let path = path.as_ref(); + let file = match File::open(path) { + Err(err) => return Some(Error::Io(err).with_path(path)), + Ok(file) => file, + }; + let rdr = io::BufReader::new(file); + let mut errs = PartialErrorBuilder::default(); + for (i, line) in rdr.lines().enumerate() { + let lineno = (i + 1) as u64; + let line = match line { + Ok(line) => line, + Err(err) => { + errs.push(Error::Io(err).tagged(path, lineno)); + continue; + } + }; + if let Err(err) = self.add_line(Some(path.to_path_buf()), &line) { + errs.push(err.tagged(path, lineno)); + } + } + errs.into_error_option() + } + + /// Add each glob line from the string given. + /// + /// If this string came from a particular `gitignore` file, then its path + /// should be provided here. + /// + /// The string given should be formatted as a `gitignore` file. + #[cfg(test)] + fn add_str( + &mut self, + from: Option, + gitignore: &str, + ) -> Result<&mut GitignoreBuilder, Error> { + for line in gitignore.lines() { + try!(self.add_line(from.clone(), line)); + } + Ok(self) + } + + /// Add a line from a gitignore file to this builder. + /// + /// If this line came from a particular `gitignore` file, then its path + /// should be provided here. + /// + /// If the line could not be parsed as a glob, then an error is returned. + pub fn add_line( + &mut self, + from: Option, + mut line: &str, + ) -> Result<&mut GitignoreBuilder, Error> { + if line.starts_with("#") { + return Ok(self); + } + if !line.ends_with("\\ ") { + line = line.trim_right(); + } + if line.is_empty() { + return Ok(self); + } + let mut glob = Glob { + from: from, + original: line.to_string(), + actual: String::new(), + is_whitelist: false, + is_only_dir: false, + }; + let mut literal_separator = false; + let has_slash = line.chars().any(|c| c == '/'); + let is_absolute = line.chars().nth(0).unwrap() == '/'; + if line.starts_with("\\!") || line.starts_with("\\#") { + line = &line[1..]; + } else { + if line.starts_with("!") { + glob.is_whitelist = true; + line = &line[1..]; + } + if line.starts_with("/") { + // `man gitignore` says that if a glob starts with a slash, + // then the glob can only match the beginning of a path + // (relative to the location of gitignore). We achieve this by + // simply banning wildcards from matching /. + literal_separator = true; + line = &line[1..]; + } + } + // If it ends with a slash, then this should only match directories, + // but the slash should otherwise not be used while globbing. + if let Some((i, c)) = line.char_indices().rev().nth(0) { + if c == '/' { + glob.is_only_dir = true; + line = &line[..i]; + } + } + // If there is a literal slash, then we note that so that globbing + // doesn't let wildcards match slashes. + glob.actual = line.to_string(); + if has_slash { + literal_separator = true; + } + // If there was a leading slash, then this is a glob that must + // match the entire path name. Otherwise, we should let it match + // anywhere, so use a **/ prefix. + if !is_absolute { + // ... but only if we don't already have a **/ prefix. + if !glob.actual.starts_with("**/") { + glob.actual = format!("**/{}", glob.actual); + } + } + // If the glob ends with `/**`, then we should only match everything + // inside a directory, but not the directory itself. Standard globs + // will match the directory. So we add `/*` to force the issue. + if glob.actual.ends_with("/**") { + glob.actual = format!("{}/*", glob.actual); + } + let parsed = try!( + GlobBuilder::new(&glob.actual) + .literal_separator(literal_separator) + .build() + .map_err(|err| Error::Glob(err.to_string()))); + self.builder.add(parsed); + self.globs.push(glob); + Ok(self) + } +} + +/// Return the file path of the current environment's global gitignore file. +/// +/// Note that the file path returned may not exist. +fn gitconfig_excludes_path() -> Option { + gitconfig_contents() + .and_then(|data| parse_excludes_file(&data)) + .or_else(excludes_file_default) +} + +/// Returns the file contents of git's global config file, if one exists. +fn gitconfig_contents() -> Option> { + let home = match env::var_os("HOME") { + None => return None, + Some(home) => PathBuf::from(home), + }; + let mut file = match File::open(home.join(".gitconfig")) { + Err(_) => return None, + Ok(file) => io::BufReader::new(file), + }; + let mut contents = vec![]; + file.read_to_end(&mut contents).ok().map(|_| contents) +} + +/// Returns the default file path for a global .gitignore file. +/// +/// Specifically, this respects XDG_CONFIG_HOME. +fn excludes_file_default() -> Option { + env::var_os("XDG_CONFIG_HOME") + .and_then(|x| if x.is_empty() { None } else { Some(x) }) + .or_else(|| env::var_os("HOME")) + .map(|x| PathBuf::from(x).join("git/ignore")) +} + +/// Extract git's `core.excludesfile` config setting from the raw file contents +/// given. +fn parse_excludes_file(data: &[u8]) -> Option { + // N.B. This is the lazy approach, and isn't technically correct, but + // probably works in more circumstances. I guess we would ideally have + // a full INI parser. Yuck. + lazy_static! { + static ref RE: Regex = Regex::new( + r"(?ium)^\s*excludesfile\s*=\s*(.+)\s*$").unwrap(); + }; + let caps = match RE.captures(data) { + None => return None, + Some(caps) => caps, + }; + str::from_utf8(&caps[1]).ok().map(|s| PathBuf::from(expand_tilde(s))) +} + +/// Expands ~ in file paths to the value of $HOME. +fn expand_tilde(path: &str) -> String { + let home = match env::var("HOME") { + Err(_) => return path.to_string(), + Ok(home) => home, + }; + path.replace("~", &home) +} + +#[cfg(test)] +mod tests { + use std::path::Path; + use super::{Gitignore, GitignoreBuilder}; + + fn gi_from_str>(root: P, s: &str) -> Gitignore { + let mut builder = GitignoreBuilder::new(root); + builder.add_str(None, s).unwrap(); + builder.build().unwrap() + } + + macro_rules! ignored { + ($name:ident, $root:expr, $gi:expr, $path:expr) => { + ignored!($name, $root, $gi, $path, false); + }; + ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { + #[test] + fn $name() { + let gi = gi_from_str($root, $gi); + assert!(gi.matched($path, $is_dir).is_ignore()); + } + }; + } + + macro_rules! not_ignored { + ($name:ident, $root:expr, $gi:expr, $path:expr) => { + not_ignored!($name, $root, $gi, $path, false); + }; + ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { + #[test] + fn $name() { + let gi = gi_from_str($root, $gi); + assert!(!gi.matched($path, $is_dir).is_ignore()); + } + }; + } + + const ROOT: &'static str = "/home/foobar/rust/rg"; + + ignored!(ig1, ROOT, "months", "months"); + ignored!(ig2, ROOT, "*.lock", "Cargo.lock"); + ignored!(ig3, ROOT, "*.rs", "src/main.rs"); + ignored!(ig4, ROOT, "src/*.rs", "src/main.rs"); + ignored!(ig5, ROOT, "/*.c", "cat-file.c"); + ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs"); + ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs"); + ignored!(ig8, ROOT, "foo/", "foo", true); + ignored!(ig9, ROOT, "**/foo", "foo"); + ignored!(ig10, ROOT, "**/foo", "src/foo"); + ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar"); + ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz"); + ignored!(ig13, ROOT, "**/foo/bar", "foo/bar"); + ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar"); + ignored!(ig15, ROOT, "abc/**", "abc/x"); + ignored!(ig16, ROOT, "abc/**", "abc/x/y"); + ignored!(ig17, ROOT, "abc/**", "abc/x/y/z"); + ignored!(ig18, ROOT, "a/**/b", "a/b"); + ignored!(ig19, ROOT, "a/**/b", "a/x/b"); + ignored!(ig20, ROOT, "a/**/b", "a/x/y/b"); + ignored!(ig21, ROOT, r"\!xy", "!xy"); + ignored!(ig22, ROOT, r"\#foo", "#foo"); + ignored!(ig23, ROOT, "foo", "./foo"); + ignored!(ig24, ROOT, "target", "grep/target"); + ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock"); + ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz"); + ignored!(ig27, ROOT, "foo/", "xyz/foo", true); + ignored!(ig28, ROOT, "src/*.rs", "src/grep/src/main.rs"); + ignored!(ig29, "./src", "/llvm/", "./src/llvm", true); + ignored!(ig30, ROOT, "node_modules/ ", "node_modules", true); + + not_ignored!(ignot1, ROOT, "amonths", "months"); + not_ignored!(ignot2, ROOT, "monthsa", "months"); + not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs"); + not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c"); + not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs"); + not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs"); + not_ignored!(ignot7, ROOT, "foo/", "foo", false); + not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz"); + not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz"); + not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar"); + not_ignored!(ignot11, ROOT, "#foo", "#foo"); + not_ignored!(ignot12, ROOT, "\n\n\n", "foo"); + not_ignored!(ignot13, ROOT, "foo/**", "foo", true); + not_ignored!( + ignot14, "./third_party/protobuf", "m4/ltoptions.m4", + "./third_party/protobuf/csharp/src/packages/repositories.config"); + + fn bytes(s: &str) -> Vec { + s.to_string().into_bytes() + } + + fn path_string>(path: P) -> String { + path.as_ref().to_str().unwrap().to_string() + } + + #[test] + fn parse_excludes_file1() { + let data = bytes("[core]\nexcludesFile = /foo/bar"); + let got = super::parse_excludes_file(&data).unwrap(); + assert_eq!(path_string(got), "/foo/bar"); + } + + #[test] + fn parse_excludes_file2() { + let data = bytes("[core]\nexcludesFile = ~/foo/bar"); + let got = super::parse_excludes_file(&data).unwrap(); + assert_eq!(path_string(got), super::expand_tilde("~/foo/bar")); + } + + #[test] + fn parse_excludes_file3() { + let data = bytes("[core]\nexcludeFile = /foo/bar"); + assert!(super::parse_excludes_file(&data).is_none()); + } + + // See: https://github.com/BurntSushi/ripgrep/issues/106 + #[test] + fn regression_106() { + gi_from_str("/", " "); + } +} diff --git a/ignore/src/lib.rs b/ignore/src/lib.rs new file mode 100644 index 00000000..a3aa0c8f --- /dev/null +++ b/ignore/src/lib.rs @@ -0,0 +1,300 @@ +/*! +The ignore crate provides a fast recursive directory iterator that respects +various filters such as globs, file types and `.gitignore` files. The precise +matching rules and precedence is explained in the documentation for +`WalkBuilder`. + +Secondarily, this crate exposes gitignore and file type matchers for use cases +that demand more fine-grained control. + +# Example + +This example shows the most basic usage of this crate. This code will +recursively traverse the current directory while automatically filtering out +files and directories according to ignore globs found in files like +`.ignore` and `.gitignore`: + + +```rust,no_run +use ignore::Walk; + +for result in Walk::new("./") { + // Each item yielded by the iterator is either a directory entry or an + // error, so either print the path or the error. + match result { + Ok(entry) => println!("{}", entry.path().display()), + Err(err) => println!("ERROR: {}", err), + } +} +``` + +# Example: advanced + +By default, the recursive directory iterator will ignore hidden files and +directories. This can be disabled by building the iterator with `WalkBuilder`: + +```rust,no_run +use ignore::WalkBuilder; + +for result in WalkBuilder::new("./").hidden(false).build() { + println!("{:?}", result); +} +``` + +See the documentation for `WalkBuilder` for many other options. +*/ + +extern crate globset; +#[macro_use] +extern crate lazy_static; +#[macro_use] +extern crate log; +extern crate memchr; +extern crate regex; +#[cfg(test)] +extern crate tempdir; +extern crate thread_local; +extern crate walkdir; + +use std::error; +use std::fmt; +use std::io; +use std::path::{Path, PathBuf}; + +pub use walk::{DirEntry, Walk, WalkBuilder}; + +mod dir; +pub mod gitignore; +mod pathutil; +pub mod overrides; +pub mod types; +mod walk; + +/// Represents an error that can occur when parsing a gitignore file. +#[derive(Debug)] +pub enum Error { + /// A collection of "soft" errors. These occur when adding an ignore + /// file partially succeeded. + Partial(Vec), + /// An error associated with a specific line number. + WithLineNumber { line: u64, err: Box }, + /// An error associated with a particular file path. + WithPath { path: PathBuf, err: Box }, + /// An error that occurs when doing I/O, such as reading an ignore file. + Io(io::Error), + /// An error that occurs when trying to parse a glob. + Glob(String), + /// A type selection for a file type that is not defined. + UnrecognizedFileType(String), + /// A user specified file type definition could not be parsed. + InvalidDefinition, +} + +impl Error { + /// Returns true if this is a partial error. + /// + /// A partial error occurs when only some operations failed while others + /// may have succeeded. For example, an ignore file may contain an invalid + /// glob among otherwise valid globs. + pub fn is_partial(&self) -> bool { + match *self { + Error::Partial(_) => true, + Error::WithLineNumber { ref err, .. } => err.is_partial(), + Error::WithPath { ref err, .. } => err.is_partial(), + _ => false, + } + } + + /// Returns true if this error is exclusively an I/O error. + pub fn is_io(&self) -> bool { + match *self { + Error::Partial(ref errs) => errs.len() == 1 && errs[0].is_io(), + Error::WithLineNumber { ref err, .. } => err.is_io(), + Error::WithPath { ref err, .. } => err.is_io(), + Error::Io(_) => true, + Error::Glob(_) => false, + Error::UnrecognizedFileType(_) => false, + Error::InvalidDefinition => false, + } + } + + /// Turn an error into a tagged error with the given file path. + fn with_path>(self, path: P) -> Error { + Error::WithPath { + path: path.as_ref().to_path_buf(), + err: Box::new(self), + } + } + + /// Turn an error into a tagged error with the given file path and line + /// number. If path is empty, then it is omitted from the error. + fn tagged>(self, path: P, lineno: u64) -> Error { + let errline = Error::WithLineNumber { + line: lineno, + err: Box::new(self), + }; + if path.as_ref().as_os_str().is_empty() { + return errline; + } + errline.with_path(path) + } +} + +impl error::Error for Error { + fn description(&self) -> &str { + match *self { + Error::Partial(_) => "partial error", + Error::WithLineNumber { ref err, .. } => err.description(), + Error::WithPath { ref err, .. } => err.description(), + Error::Io(ref err) => err.description(), + Error::Glob(ref msg) => msg, + Error::UnrecognizedFileType(_) => "unrecognized file type", + Error::InvalidDefinition => "invalid definition", + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Error::Partial(ref errs) => { + let msgs: Vec = + errs.iter().map(|err| err.to_string()).collect(); + write!(f, "{}", msgs.join("\n")) + } + Error::WithLineNumber { line, ref err } => { + write!(f, "line {}: {}", line, err) + } + Error::WithPath { ref path, ref err } => { + write!(f, "{}: {}", path.display(), err) + } + Error::Io(ref err) => err.fmt(f), + Error::Glob(ref msg) => write!(f, "{}", msg), + Error::UnrecognizedFileType(ref ty) => { + write!(f, "unrecognized file type: {}", ty) + } + Error::InvalidDefinition => { + write!(f, "invalid definition (format is type:glob, e.g., \ + html:*.html)") + } + } + } +} + +impl From for Error { + fn from(err: io::Error) -> Error { + Error::Io(err) + } +} + +#[derive(Debug, Default)] +struct PartialErrorBuilder(Vec); + +impl PartialErrorBuilder { + fn push(&mut self, err: Error) { + self.0.push(err); + } + + fn push_ignore_io(&mut self, err: Error) { + if !err.is_io() { + self.push(err); + } + } + + fn maybe_push(&mut self, err: Option) { + if let Some(err) = err { + self.push(err); + } + } + + fn maybe_push_ignore_io(&mut self, err: Option) { + if let Some(err) = err { + self.push_ignore_io(err); + } + } + + fn into_error_option(mut self) -> Option { + if self.0.is_empty() { + None + } else if self.0.len() == 1 { + Some(self.0.pop().unwrap()) + } else { + Some(Error::Partial(self.0)) + } + } +} + +/// The result of a glob match. +/// +/// The type parameter `T` typically refers to a type that provides more +/// information about a particular match. For example, it might identify +/// the specific gitignore file and the specific glob pattern that caused +/// the match. +#[derive(Clone, Debug)] +pub enum Match { + /// The path didn't match any glob. + None, + /// The highest precedent glob matched indicates the path should be + /// ignored. + Ignore(T), + /// The highest precedent glob matched indicates the path should be + /// whitelisted. + Whitelist(T), +} + +impl Match { + /// Returns true if the match result didn't match any globs. + pub fn is_none(&self) -> bool { + match *self { + Match::None => true, + Match::Ignore(_) | Match::Whitelist(_) => false, + } + } + + /// Returns true if the match result implies the path should be ignored. + pub fn is_ignore(&self) -> bool { + match *self { + Match::Ignore(_) => true, + Match::None | Match::Whitelist(_) => false, + } + } + + /// Returns true if the match result implies the path should be + /// whitelisted. + pub fn is_whitelist(&self) -> bool { + match *self { + Match::Whitelist(_) => true, + Match::None | Match::Ignore(_) => false, + } + } + + /// Inverts the match so that `Ignore` becomes `Whitelist` and + /// `Whitelist` becomes `Ignore`. A non-match remains the same. + pub fn invert(self) -> Match { + match self { + Match::None => Match::None, + Match::Ignore(t) => Match::Whitelist(t), + Match::Whitelist(t) => Match::Ignore(t), + } + } + + /// Return the value inside this match if it exists. + pub fn inner(&self) -> Option<&T> { + match *self { + Match::None => None, + Match::Ignore(ref t) => Some(t), + Match::Whitelist(ref t) => Some(t), + } + } + + /// Apply the given function to the value inside this match. + /// + /// If the match has no value, then return the match unchanged. + pub fn map U>(self, f: F) -> Match { + match self { + Match::None => Match::None, + Match::Ignore(t) => Match::Ignore(f(t)), + Match::Whitelist(t) => Match::Whitelist(f(t)), + } + } +} diff --git a/ignore/src/overrides.rs b/ignore/src/overrides.rs new file mode 100644 index 00000000..c53a50f7 --- /dev/null +++ b/ignore/src/overrides.rs @@ -0,0 +1,202 @@ +/*! +The overrides module provides a way to specify a set of override globs. +This provides functionality similar to `--include` or `--exclude` in command +line tools. +*/ + +use std::path::Path; + +use gitignore::{self, Gitignore, GitignoreBuilder}; +use {Error, Match}; + +/// Glob represents a single glob in an override matcher. +/// +/// This is used to report information about the highest precedent glob +/// that matched. +/// +/// Note that not all matches necessarily correspond to a specific glob. For +/// example, if there are one or more whitelist globs and a file path doesn't +/// match any glob in the set, then the file path is considered to be ignored. +/// +/// The lifetime `'a` refers to the lifetime of the matcher that produced +/// this glob. +#[derive(Clone, Debug)] +pub struct Glob<'a>(GlobInner<'a>); + +#[derive(Clone, Debug)] +enum GlobInner<'a> { + /// No glob matched, but the file path should still be ignored. + UnmatchedIgnore, + /// A glob matched. + Matched(&'a gitignore::Glob), +} + +impl<'a> Glob<'a> { + fn unmatched() -> Glob<'a> { + Glob(GlobInner::UnmatchedIgnore) + } +} + +/// Manages a set of overrides provided explicitly by the end user. +#[derive(Clone, Debug)] +pub struct Override(Gitignore); + +impl Override { + /// Returns an empty matcher that never matches any file path. + pub fn empty() -> Override { + Override(Gitignore::empty()) + } + + /// Returns the directory of this override set. + /// + /// All matches are done relative to this path. + pub fn path(&self) -> &Path { + self.0.path() + } + + /// Returns true if and only if this matcher is empty. + /// + /// When a matcher is empty, it will never match any file path. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Returns the total number of ignore globs. + pub fn num_ignores(&self) -> u64 { + self.0.num_whitelists() + } + + /// Returns the total number of whitelisted globs. + pub fn num_whitelists(&self) -> u64 { + self.0.num_ignores() + } + + /// Returns whether the given file path matched a pattern in this override + /// matcher. + /// + /// `is_dir` should be true if the path refers to a directory and false + /// otherwise. + /// + /// If there are no overrides, then this always returns `Match::None`. + /// + /// If there is at least one whitelist override, then this never returns + /// `Match::None`, since non-matches are interpreted as ignored. + /// + /// The given path is matched to the globs relative to the path given + /// when building the override matcher. Specifically, before matching + /// `path`, its prefix (as determined by a common suffix of the directory + /// given) is stripped. If there is no common suffix/prefix overlap, then + /// `path` is assumed to reside in the same directory as the root path for + /// this set of overrides. + pub fn matched<'a, P: AsRef>( + &'a self, + path: P, + is_dir: bool, + ) -> Match> { + if self.is_empty() { + return Match::None; + } + let mat = self.0.matched(path, is_dir).invert(); + if mat.is_none() && self.num_whitelists() > 0 { + return Match::Ignore(Glob::unmatched()); + } + mat.map(move |giglob| Glob(GlobInner::Matched(giglob))) + } +} + +/// Builds a matcher for a set of glob overrides. +pub struct OverrideBuilder { + builder: GitignoreBuilder, +} + +impl OverrideBuilder { + /// Create a new override builder. + /// + /// Matching is done relative to the directory path provided. + pub fn new>(path: P) -> OverrideBuilder { + OverrideBuilder { + builder: GitignoreBuilder::new(path), + } + } + + /// Builds a new override matcher from the globs added so far. + /// + /// Once a matcher is built, no new globs can be added to it. + pub fn build(&self) -> Result { + Ok(Override(try!(self.builder.build()))) + } + + /// Add a glob to the set of overrides. + /// + /// Globs provided here have precisely the same semantics as a single + /// line in a `gitignore` file, where the meaning of `!` is inverted: + /// namely, `!` at the beginning of a glob will ignore a file. Without `!`, + /// all matches of the glob provided are treated as whitelist matches. + pub fn add(&mut self, glob: &str) -> Result<&mut OverrideBuilder, Error> { + try!(self.builder.add_line(None, glob)); + Ok(self) + } +} + +#[cfg(test)] +mod tests { + use super::{Override, OverrideBuilder}; + + const ROOT: &'static str = "/home/andrew/foo"; + + fn ov(globs: &[&str]) -> Override { + let mut builder = OverrideBuilder::new(ROOT); + for glob in globs { + builder.add(glob).unwrap(); + } + builder.build().unwrap() + } + + #[test] + fn empty() { + let ov = ov(&[]); + assert!(ov.matched("a.foo", false).is_none()); + assert!(ov.matched("a", false).is_none()); + assert!(ov.matched("", false).is_none()); + } + + #[test] + fn simple() { + let ov = ov(&["*.foo", "!*.bar"]); + assert!(ov.matched("a.foo", false).is_whitelist()); + assert!(ov.matched("a.foo", true).is_whitelist()); + assert!(ov.matched("a.rs", false).is_ignore()); + assert!(ov.matched("a.rs", true).is_ignore()); + assert!(ov.matched("a.bar", false).is_ignore()); + assert!(ov.matched("a.bar", true).is_ignore()); + } + + #[test] + fn only_ignores() { + let ov = ov(&["!*.bar"]); + assert!(ov.matched("a.rs", false).is_none()); + assert!(ov.matched("a.rs", true).is_none()); + assert!(ov.matched("a.bar", false).is_ignore()); + assert!(ov.matched("a.bar", true).is_ignore()); + } + + #[test] + fn precedence() { + let ov = ov(&["*.foo", "!*.bar.foo"]); + assert!(ov.matched("a.foo", false).is_whitelist()); + assert!(ov.matched("a.baz", false).is_ignore()); + assert!(ov.matched("a.bar.foo", false).is_ignore()); + } + + #[test] + fn gitignore() { + let ov = ov(&["/foo", "bar/*.rs", "baz/**"]); + assert!(ov.matched("bar/wat/lib.rs", false).is_ignore()); + assert!(ov.matched("wat/bar/lib.rs", false).is_whitelist()); + assert!(ov.matched("foo", false).is_whitelist()); + assert!(ov.matched("wat/foo", false).is_ignore()); + assert!(ov.matched("baz", false).is_ignore()); + assert!(ov.matched("baz/a", false).is_whitelist()); + assert!(ov.matched("baz/a/b", false).is_whitelist()); + } +} diff --git a/ignore/src/pathutil.rs b/ignore/src/pathutil.rs new file mode 100644 index 00000000..bfd43de3 --- /dev/null +++ b/ignore/src/pathutil.rs @@ -0,0 +1,108 @@ +use std::ffi::OsStr; +use std::path::Path; + +/// Returns true if and only if this file path is considered to be hidden. +#[cfg(unix)] +pub fn is_hidden>(path: P) -> bool { + use std::os::unix::ffi::OsStrExt; + + if let Some(name) = file_name(path.as_ref()) { + name.as_bytes().get(0) == Some(&b'.') + } else { + false + } +} + +/// Returns true if and only if this file path is considered to be hidden. +#[cfg(not(unix))] +pub fn is_hidden>(path: P) -> bool { + if let Some(name) = file_name(path.as_ref()) { + name.to_str().map(|s| s.starts_with(".")).unwrap_or(false) + } else { + false + } +} + +/// Strip `prefix` from the `path` and return the remainder. +/// +/// If `path` doesn't have a prefix `prefix`, then return `None`. +#[cfg(unix)] +pub fn strip_prefix<'a, P: AsRef + ?Sized>( + prefix: &'a P, + path: &'a Path, +) -> Option<&'a Path> { + use std::os::unix::ffi::OsStrExt; + + let prefix = prefix.as_ref().as_os_str().as_bytes(); + let path = path.as_os_str().as_bytes(); + if prefix.len() > path.len() || prefix != &path[0..prefix.len()] { + None + } else { + Some(&Path::new(OsStr::from_bytes(&path[prefix.len()..]))) + } +} + +/// Strip `prefix` from the `path` and return the remainder. +/// +/// If `path` doesn't have a prefix `prefix`, then return `None`. +#[cfg(not(unix))] +pub fn strip_prefix<'a, P: AsRef + ?Sized>( + prefix: &'a P, + path: &'a Path, +) -> Option<&'a Path> { + path.strip_prefix(prefix).ok() +} + +/// Returns true if this file path is just a file name. i.e., Its parent is +/// the empty string. +#[cfg(unix)] +pub fn is_file_name>(path: P) -> bool { + use std::os::unix::ffi::OsStrExt; + use memchr::memchr; + + let path = path.as_ref().as_os_str().as_bytes(); + memchr(b'/', path).is_none() +} + +/// Returns true if this file path is just a file name. i.e., Its parent is +/// the empty string. +#[cfg(not(unix))] +pub fn is_file_name>(path: P) -> bool { + path.as_ref().parent().map(|p| p.as_os_str().is_empty()).unwrap_or(false) +} + +/// The final component of the path, if it is a normal file. +/// +/// If the path terminates in ., .., or consists solely of a root of prefix, +/// file_name will return None. +#[cfg(unix)] +pub fn file_name<'a, P: AsRef + ?Sized>( + path: &'a P, +) -> Option<&'a OsStr> { + use std::os::unix::ffi::OsStrExt; + use memchr::memrchr; + + let path = path.as_ref().as_os_str().as_bytes(); + if path.is_empty() { + return None; + } else if path.len() == 1 && path[0] == b'.' { + return None; + } else if path.last() == Some(&b'.') { + return None; + } else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] { + return None; + } + let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0); + Some(OsStr::from_bytes(&path[last_slash..])) +} + +/// The final component of the path, if it is a normal file. +/// +/// If the path terminates in ., .., or consists solely of a root of prefix, +/// file_name will return None. +#[cfg(not(unix))] +pub fn file_name<'a, P: AsRef + ?Sized>( + path: &'a P, +) -> Option<&'a OsStr> { + path.as_ref().file_name() +} diff --git a/ignore/src/types.rs b/ignore/src/types.rs new file mode 100644 index 00000000..8f4cd171 --- /dev/null +++ b/ignore/src/types.rs @@ -0,0 +1,568 @@ +/*! +The types module provides a way of associating globs on file names to file +types. + +This can be used to match specific types of files. For example, among +the default file types provided, the Rust file type is defined to be `*.rs` +with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with +name `c`. + +Note that the set of default types may change over time. + +# Example + +This shows how to create and use a simple file type matcher using the default +file types defined in this crate. + +``` +use ignore::types::TypesBuilder; + +let mut builder = TypesBuilder::new(); +builder.add_defaults(); +builder.select("rust"); +let matcher = builder.build().unwrap(); + +assert!(matcher.matched("foo.rs", false).is_whitelist()); +assert!(matcher.matched("foo.c", false).is_ignore()); +``` + +# Example: negation + +This is like the previous example, but shows how negating a file type works. +That is, this will let us match file paths that *don't* correspond to a +particular file type. + +``` +use ignore::types::TypesBuilder; + +let mut builder = TypesBuilder::new(); +builder.add_defaults(); +builder.negate("c"); +let matcher = builder.build().unwrap(); + +assert!(matcher.matched("foo.rs", false).is_none()); +assert!(matcher.matched("foo.c", false).is_ignore()); +``` + +# Example: custom file type definitions + +This shows how to extend this library default file type definitions with +your own. + +``` +use ignore::types::TypesBuilder; + +let mut builder = TypesBuilder::new(); +builder.add_defaults(); +builder.add("foo", "*.foo"); +// Another way of adding a file type definition. +// This is useful when accepting input from an end user. +builder.add_def("bar:*.bar"); +// Note: we only select `foo`, not `bar`. +builder.select("foo"); +let matcher = builder.build().unwrap(); + +assert!(matcher.matched("x.foo", false).is_whitelist()); +// This is ignored because we only selected the `foo` file type. +assert!(matcher.matched("x.bar", false).is_ignore()); +``` +*/ + +use std::cell::RefCell; +use std::collections::HashMap; +use std::path::Path; +use std::sync::Arc; + +use globset::{GlobBuilder, GlobSet, GlobSetBuilder}; +use thread_local::ThreadLocal; + +use pathutil::file_name; +use {Error, Match}; + +const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[ + ("asm", &["*.asm", "*.s", "*.S"]), + ("awk", &["*.awk"]), + ("c", &["*.c", "*.h", "*.H"]), + ("cbor", &["*.cbor"]), + ("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]), + ("cmake", &["*.cmake", "CMakeLists.txt"]), + ("coffeescript", &["*.coffee"]), + ("config", &["*.config"]), + ("cpp", &[ + "*.C", "*.cc", "*.cpp", "*.cxx", + "*.h", "*.H", "*.hh", "*.hpp", + ]), + ("csharp", &["*.cs"]), + ("css", &["*.css"]), + ("cython", &["*.pyx"]), + ("dart", &["*.dart"]), + ("d", &["*.d"]), + ("elisp", &["*.el"]), + ("erlang", &["*.erl", "*.hrl"]), + ("fortran", &[ + "*.f", "*.F", "*.f77", "*.F77", "*.pfo", + "*.f90", "*.F90", "*.f95", "*.F95", + ]), + ("fsharp", &["*.fs", "*.fsx", "*.fsi"]), + ("go", &["*.go"]), + ("groovy", &["*.groovy", "*.gradle"]), + ("hbs", &["*.hbs"]), + ("haskell", &["*.hs", "*.lhs"]), + ("html", &["*.htm", "*.html"]), + ("java", &["*.java"]), + ("jinja", &["*.jinja", "*.jinja2"]), + ("js", &[ + "*.js", "*.jsx", "*.vue", + ]), + ("json", &["*.json"]), + ("jsonl", &["*.jsonl"]), + ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]), + ("lua", &["*.lua"]), + ("m4", &["*.ac", "*.m4"]), + ("make", &["gnumakefile", "Gnumakefile", "makefile", "Makefile", "*.mk"]), + ("markdown", &["*.md"]), + ("md", &["*.md"]), + ("matlab", &["*.m"]), + ("mk", &["mkfile"]), + ("ml", &["*.ml"]), + ("nim", &["*.nim"]), + ("objc", &["*.h", "*.m"]), + ("objcpp", &["*.h", "*.mm"]), + ("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]), + ("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]), + ("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]), + ("py", &["*.py"]), + ("readme", &["README*", "*README"]), + ("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]), + ("rst", &["*.rst"]), + ("ruby", &["*.rb"]), + ("rust", &["*.rs"]), + ("scala", &["*.scala"]), + ("sh", &["*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh"]), + ("spark", &["*.spark"]), + ("sql", &["*.sql"]), + ("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]), + ("swift", &["*.swift"]), + ("tcl", &["*.tcl"]), + ("tex", &["*.tex", "*.cls", "*.sty"]), + ("ts", &["*.ts", "*.tsx"]), + ("txt", &["*.txt"]), + ("toml", &["*.toml", "Cargo.lock"]), + ("vala", &["*.vala"]), + ("vb", &["*.vb"]), + ("vimscript", &["*.vim"]), + ("xml", &["*.xml"]), + ("yacc", &["*.y"]), + ("yaml", &["*.yaml", "*.yml"]), +]; + +/// Glob represents a single glob in a set of file type definitions. +/// +/// There may be more than one glob for a particular file type. +/// +/// This is used to report information about the highest precedent glob +/// that matched. +/// +/// Note that not all matches necessarily correspond to a specific glob. +/// For example, if there are one or more selections and a file path doesn't +/// match any of those selections, then the file path is considered to be +/// ignored. +/// +/// The lifetime `'a` refers to the lifetime of the underlying file type +/// definition, which corresponds to the lifetime of the file type matcher. +#[derive(Clone, Debug)] +pub struct Glob<'a>(GlobInner<'a>); + +#[derive(Clone, Debug)] +enum GlobInner<'a> { + /// No glob matched, but the file path should still be ignored. + UnmatchedIgnore, + /// A glob matched. + Matched { + /// The file type definition which provided the glob. + def: &'a FileTypeDef, + /// The index of the glob that matched inside the file type definition. + which: usize, + /// Whether the selection was negated or not. + negated: bool, + } +} + +impl<'a> Glob<'a> { + fn unmatched() -> Glob<'a> { + Glob(GlobInner::UnmatchedIgnore) + } +} + +/// A single file type definition. +/// +/// File type definitions can be retrieved in aggregate from a file type +/// matcher. File type definitions are also reported when its responsible +/// for a match. +#[derive(Clone, Debug)] +pub struct FileTypeDef { + name: String, + globs: Vec, +} + +impl FileTypeDef { + /// Return the name of this file type. + pub fn name(&self) -> &str { + &self.name + } + + /// Return the globs used to recognize this file type. + pub fn globs(&self) -> &[String] { + &self.globs + } +} + +/// Types is a file type matcher. +#[derive(Clone, Debug)] +pub struct Types { + /// All of the file type definitions, sorted lexicographically by name. + defs: Vec, + /// All of the selections made by the user. + selections: Vec>, + /// Whether there is at least one Selection::Select in our selections. + /// When this is true, a Match::None is converted to Match::Ignore. + has_selected: bool, + /// A mapping from glob index in the set to two indices. The first is an + /// index into `selections` and the second is an index into the + /// corresponding file type definition's list of globs. + glob_to_selection: Vec<(usize, usize)>, + /// The set of all glob selections, used for actual matching. + set: GlobSet, + /// Temporary storage for globs that match. + matches: Arc>>>, +} + +/// Indicates the type of a selection for a particular file type. +#[derive(Clone, Debug)] +enum Selection { + Select(String, T), + Negate(String, T), +} + +impl Selection { + fn is_negated(&self) -> bool { + match *self { + Selection::Select(..) => false, + Selection::Negate(..) => true, + } + } + + fn name(&self) -> &str { + match *self { + Selection::Select(ref name, _) => name, + Selection::Negate(ref name, _) => name, + } + } + + fn map U>(self, f: F) -> Selection { + match self { + Selection::Select(name, inner) => { + Selection::Select(name, f(inner)) + } + Selection::Negate(name, inner) => { + Selection::Negate(name, f(inner)) + } + } + } + + fn inner(&self) -> &T { + match *self { + Selection::Select(_, ref inner) => inner, + Selection::Negate(_, ref inner) => inner, + } + } +} + +impl Types { + /// Creates a new file type matcher that never matches any path and + /// contains no file type definitions. + pub fn empty() -> Types { + Types { + defs: vec![], + selections: vec![], + has_selected: false, + glob_to_selection: vec![], + set: GlobSetBuilder::new().build().unwrap(), + matches: Arc::new(ThreadLocal::default()), + } + } + + /// Returns true if and only if this matcher has zero selections. + pub fn is_empty(&self) -> bool { + self.selections.is_empty() + } + + /// Returns the number of selections used in this matcher. + pub fn len(&self) -> usize { + self.selections.len() + } + + /// Return the set of current file type definitions. + /// + /// Definitions and globs are sorted. + pub fn definitions(&self) -> &[FileTypeDef] { + &self.defs + } + + /// Returns a match for the given path against this file type matcher. + /// + /// The path is considered whitelisted if it matches a selected file type. + /// The path is considered ignored if it matches a negated file type. + /// If at least one file type is selected and `path` doesn't match, then + /// the path is also considered ignored. + pub fn matched<'a, P: AsRef>( + &'a self, + path: P, + is_dir: bool, + ) -> Match> { + // File types don't apply to directories, and we can't do anything + // if our glob set is empty. + if is_dir || self.set.is_empty() { + return Match::None; + } + // We only want to match against the file name, so extract it. + // If one doesn't exist, then we can't match it. + let name = match file_name(path.as_ref()) { + Some(name) => name, + None if self.has_selected => { + return Match::Ignore(Glob::unmatched()); + } + None => { + return Match::None; + } + }; + let mut matches = self.matches.get_default().borrow_mut(); + self.set.matches_into(name, &mut *matches); + // The highest precedent match is the last one. + if let Some(&i) = matches.last() { + let (isel, iglob) = self.glob_to_selection[i]; + let sel = &self.selections[isel]; + let glob = Glob(GlobInner::Matched { + def: sel.inner(), + which: iglob, + negated: sel.is_negated(), + }); + return if sel.is_negated() { + Match::Ignore(glob) + } else { + Match::Whitelist(glob) + }; + } + if self.has_selected { + Match::Ignore(Glob::unmatched()) + } else { + Match::None + } + } +} + +/// TypesBuilder builds a type matcher from a set of file type definitions and +/// a set of file type selections. +pub struct TypesBuilder { + types: HashMap, + selections: Vec>, +} + +impl TypesBuilder { + /// Create a new builder for a file type matcher. + /// + /// The builder contains *no* type definitions to start with. A set + /// of default type definitions can be added with `add_defaults`, and + /// additional type definitions can be added with `select` and `negate`. + pub fn new() -> TypesBuilder { + TypesBuilder { + types: HashMap::new(), + selections: vec![], + } + } + + /// Build the current set of file type definitions *and* selections into + /// a file type matcher. + pub fn build(&self) -> Result { + let defs = self.definitions(); + let has_selected = self.selections.iter().any(|s| !s.is_negated()); + + let mut selections = vec![]; + let mut glob_to_selection = vec![]; + let mut build_set = GlobSetBuilder::new(); + for (isel, selection) in self.selections.iter().enumerate() { + let def = match self.types.get(selection.name()) { + Some(def) => def.clone(), + None => { + let name = selection.name().to_string(); + return Err(Error::UnrecognizedFileType(name)); + } + }; + for (iglob, glob) in def.globs.iter().enumerate() { + build_set.add(try!( + GlobBuilder::new(glob) + .literal_separator(true) + .build() + .map_err(|err| Error::Glob(err.to_string())))); + glob_to_selection.push((isel, iglob)); + } + selections.push(selection.clone().map(move |_| def)); + } + let set = try!(build_set.build().map_err(|err| { + Error::Glob(err.to_string()) + })); + Ok(Types { + defs: defs, + selections: selections, + has_selected: has_selected, + glob_to_selection: glob_to_selection, + set: set, + matches: Arc::new(ThreadLocal::default()), + }) + } + + /// Return the set of current file type definitions. + /// + /// Definitions and globs are sorted. + pub fn definitions(&self) -> Vec { + let mut defs = vec![]; + for def in self.types.values() { + let mut def = def.clone(); + def.globs.sort(); + defs.push(def); + } + defs.sort_by(|def1, def2| def1.name().cmp(def2.name())); + defs + } + + /// Select the file type given by `name`. + /// + /// If `name` is `all`, then all file types currently defined are selected. + pub fn select(&mut self, name: &str) -> &mut TypesBuilder { + if name == "all" { + for name in self.types.keys() { + self.selections.push(Selection::Select(name.to_string(), ())); + } + } else { + self.selections.push(Selection::Select(name.to_string(), ())); + } + self + } + + /// Ignore the file type given by `name`. + /// + /// If `name` is `all`, then all file types currently defined are negated. + pub fn negate(&mut self, name: &str) -> &mut TypesBuilder { + if name == "all" { + for name in self.types.keys() { + self.selections.push(Selection::Negate(name.to_string(), ())); + } + } else { + self.selections.push(Selection::Negate(name.to_string(), ())); + } + self + } + + /// Clear any file type definitions for the type name given. + pub fn clear(&mut self, name: &str) -> &mut TypesBuilder { + self.types.remove(name); + self + } + + /// Add a new file type definition. `name` can be arbitrary and `pat` + /// should be a glob recognizing file paths belonging to the `name` type. + /// + /// If `name` is `all` or otherwise contains a `:`, then an error is + /// returned. + pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> { + if name == "all" || name.contains(':') { + return Err(Error::InvalidDefinition); + } + let (key, glob) = (name.to_string(), glob.to_string()); + self.types.entry(key).or_insert_with(|| { + FileTypeDef { name: name.to_string(), globs: vec![] } + }).globs.push(glob); + Ok(()) + } + + /// Add a new file type definition specified in string form. The format + /// is `name:glob`. Names may not include a colon. + pub fn add_def(&mut self, def: &str) -> Result<(), Error> { + let name: String = def.chars().take_while(|&c| c != ':').collect(); + let pat: String = def.chars().skip(name.chars().count() + 1).collect(); + if name.is_empty() || pat.is_empty() { + return Err(Error::InvalidDefinition); + } + self.add(&name, &pat) + } + + /// Add a set of default file type definitions. + pub fn add_defaults(&mut self) -> &mut TypesBuilder { + static MSG: &'static str = "adding a default type should never fail"; + for &(name, exts) in DEFAULT_TYPES { + for ext in exts { + self.add(name, ext).expect(MSG); + } + } + self + } +} + +#[cfg(test)] +mod tests { + use super::TypesBuilder; + + macro_rules! matched { + ($name:ident, $types:expr, $sel:expr, $selnot:expr, + $path:expr) => { + matched!($name, $types, $sel, $selnot, $path, true); + }; + (not, $name:ident, $types:expr, $sel:expr, $selnot:expr, + $path:expr) => { + matched!($name, $types, $sel, $selnot, $path, false); + }; + ($name:ident, $types:expr, $sel:expr, $selnot:expr, + $path:expr, $matched:expr) => { + #[test] + fn $name() { + let mut btypes = TypesBuilder::new(); + for tydef in $types { + btypes.add_def(tydef).unwrap(); + } + for sel in $sel { + btypes.select(sel); + } + for selnot in $selnot { + btypes.negate(selnot); + } + let types = btypes.build().unwrap(); + let mat = types.matched($path, false); + assert_eq!($matched, !mat.is_ignore()); + } + }; + } + + fn types() -> Vec<&'static str> { + vec![ + "html:*.html", + "html:*.htm", + "rust:*.rs", + "js:*.js", + "foo:*.{rs,foo}", + ] + } + + matched!(match1, types(), vec!["rust"], vec![], "lib.rs"); + matched!(match2, types(), vec!["html"], vec![], "index.html"); + matched!(match3, types(), vec!["html"], vec![], "index.htm"); + matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs"); + matched!(match5, types(), vec![], vec![], "index.html"); + matched!(match6, types(), vec![], vec!["rust"], "index.html"); + matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo"); + + matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html"); + matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs"); + matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs"); + matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs"); + matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo"); +} diff --git a/ignore/src/walk.rs b/ignore/src/walk.rs new file mode 100644 index 00000000..0bcc6136 --- /dev/null +++ b/ignore/src/walk.rs @@ -0,0 +1,592 @@ +use std::ffi::OsStr; +use std::fs::{FileType, Metadata}; +use std::io; +use std::path::{Path, PathBuf}; +use std::vec; + +use walkdir::{self, WalkDir, WalkDirIterator}; + +use dir::{Ignore, IgnoreBuilder}; +use gitignore::GitignoreBuilder; +use overrides::Override; +use types::Types; +use {Error, PartialErrorBuilder}; + +/// WalkBuilder builds a recursive directory iterator. +/// +/// The builder supports a large number of configurable options. This includes +/// specific glob overrides, file type matching, toggling whether hidden +/// files are ignored or not, and of course, support for respecting gitignore +/// files. +/// +/// By default, all ignore files found are respected. This includes `.ignore`, +/// `.gitignore`, `.git/info/exclude` and even your global gitignore +/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`. +/// +/// Some standard recursive directory options are also supported, such as +/// limiting the recursive depth or whether to follow symbolic links (disabled +/// by default). +/// +/// # Ignore rules +/// +/// There are many rules that influence whether a particular file or directory +/// is skipped by this iterator. Those rules are documented here. Note that +/// the rules assume a default configuration. +/// +/// * First, glob overrides are checked. If a path matches a glob override, +/// then matching stops. The path is then only skipped if the glob that matched +/// the path is an ignore glob. (An override glob is a whitelist glob unless it +/// starts with a `!`, in which case it is an ignore glob.) +/// * Second, ignore files are checked. Ignore files currently only come from +/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured +/// global gitignore file), plain `.ignore` files, which have the same format +/// as gitignore files, or explicitly added ignore files. The precedence order +/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and +/// finally explicitly added ignore files. Note that precedence between +/// different types of ignore files is not impacted by the directory hierarchy; +/// any `.ignore` file overrides all `.gitignore` files. Within each +/// precedence level, more nested ignore files have a higher precedence over +/// less nested ignore files. +/// * Third, if the previous step yields an ignore match, than all matching +/// is stopped and the path is skipped.. If it yields a whitelist match, then +/// process continues. A whitelist match can be overridden by a later matcher. +/// * Fourth, unless the path is a directory, the file type matcher is run on +/// the path. As above, if it's an ignore match, then all matching is stopped +/// and the path is skipped. If it's a whitelist match, then matching +/// continues. +/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the +/// path is skipped. +/// * Sixth, if the path has made it this far then it is yielded in the +/// iterator. +pub struct WalkBuilder { + paths: Vec, + ig_builder: IgnoreBuilder, + parents: bool, + max_depth: Option, + follow_links: bool, +} + +impl WalkBuilder { + /// Create a new builder for a recursive directory iterator for the + /// directory given. + /// + /// Note that if you want to traverse multiple different directories, it + /// is better to call `add` on this builder than to create multiple + /// `Walk` values. + pub fn new>(path: P) -> WalkBuilder { + WalkBuilder { + paths: vec![path.as_ref().to_path_buf()], + ig_builder: IgnoreBuilder::new(), + parents: true, + max_depth: None, + follow_links: false, + } + } + + /// Build a new `Walk` iterator. + pub fn build(&self) -> Walk { + let follow_links = self.follow_links; + let max_depth = self.max_depth; + let its = self.paths.iter().map(move |p| { + if p == Path::new("-") { + (p.to_path_buf(), None) + } else { + let mut wd = WalkDir::new(p); + wd = wd.follow_links(follow_links || p.is_file()); + if let Some(max_depth) = max_depth { + wd = wd.max_depth(max_depth); + } + (p.to_path_buf(), Some(WalkEventIter::from(wd))) + } + }).collect::>().into_iter(); + let ig_root = self.ig_builder.build(); + Walk { + its: its, + it: None, + ig_root: ig_root.clone(), + ig: ig_root.clone(), + parents: self.parents, + } + } + + /// Add a file path to the iterator. + /// + /// Each additional file path added is traversed recursively. This should + /// be preferred over building multiple `Walk` iterators since this + /// enables reusing resources across iteration. + pub fn add>(&mut self, path: P) -> &mut WalkBuilder { + self.paths.push(path.as_ref().to_path_buf()); + self + } + + /// The maximum depth to recurse. + /// + /// The default, `None`, imposes no depth restriction. + pub fn max_depth(&mut self, depth: Option) -> &mut WalkBuilder { + self.max_depth = depth; + self + } + + /// Whether to follow symbolic links or not. + pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder { + self.follow_links = yes; + self + } + + /// Add an ignore file to the matcher. + /// + /// This has lower precedence than all other sources of ignore rules. + /// + /// If there was a problem adding the ignore file, then an error is + /// returned. Note that the error may indicate *partial* failure. For + /// example, if an ignore file contains an invalid glob, all other globs + /// are still applied. + pub fn add_ignore>(&mut self, path: P) -> Option { + let mut builder = GitignoreBuilder::new(""); + let mut errs = PartialErrorBuilder::default(); + errs.maybe_push_ignore_io(builder.add(path)); + match builder.build() { + Ok(gi) => { self.ig_builder.add_ignore(gi); } + Err(err) => { errs.push(err); } + } + errs.into_error_option() + } + + /// Add an override matcher. + /// + /// By default, no override matcher is used. + /// + /// This overrides any previous setting. + pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder { + self.ig_builder.overrides(overrides); + self + } + + /// Add a file type matcher. + /// + /// By default, no file type matcher is used. + /// + /// This overrides any previous setting. + pub fn types(&mut self, types: Types) -> &mut WalkBuilder { + self.ig_builder.types(types); + self + } + + /// Enables ignoring hidden files. + /// + /// This is enabled by default. + pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.hidden(yes); + self + } + + /// Enables reading ignore files from parent directories. + /// + /// If this is enabled, then the parent directories of each file path given + /// are traversed for ignore files (subject to the ignore settings on + /// this builder). Note that file paths are canonicalized with respect to + /// the current working directory in order to determine parent directories. + /// + /// This is enabled by default. + pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder { + self.parents = yes; + self + } + + /// Enables reading `.ignore` files. + /// + /// `.ignore` files have the same semantics as `gitignore` files and are + /// supported by search tools such as ripgrep and The Silver Searcher. + /// + /// This is enabled by default. + pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.ignore(yes); + self + } + + /// Enables reading a global gitignore file, whose path is specified in + /// git's `core.excludesFile` config option. + /// + /// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig` + /// does not exist or does not specify `core.excludesFile`, then + /// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not + /// set or is empty, then `$HOME/.config/git/ignore` is used instead. + pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_global(yes); + self + } + + /// Enables reading `.gitignore` files. + /// + /// `.gitignore` files have match semantics as described in the `gitignore` + /// man page. + /// + /// This is enabled by default. + pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_ignore(yes); + self + } + + /// Enables reading `.git/info/exclude` files. + /// + /// `.git/info/exclude` files have match semantics as described in the + /// `gitignore` man page. + /// + /// This is enabled by default. + pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder { + self.ig_builder.git_exclude(yes); + self + } +} + +/// Walk is a recursive directory iterator over file paths in a directory. +/// +/// Only file and directory paths matching the rules are returned. By default, +/// ignore files like `.gitignore` are respected. The precise matching rules +/// and precedence is explained in the documentation for `WalkBuilder`. +pub struct Walk { + its: vec::IntoIter<(PathBuf, Option)>, + it: Option, + ig_root: Ignore, + ig: Ignore, + parents: bool, +} + +impl Walk { + /// Creates a new recursive directory iterator for the file path given. + /// + /// Note that this uses default settings, which include respecting + /// `.gitignore` files. To configure the iterator, use `WalkBuilder` + /// instead. + pub fn new>(path: P) -> Walk { + WalkBuilder::new(path).build() + } + + fn skip_entry(&self, ent: &walkdir::DirEntry) -> bool { + if ent.depth() == 0 { + // Never skip the root directory. + return false; + } + let m = self.ig.matched(ent.path(), ent.file_type().is_dir()); + if m.is_ignore() { + debug!("ignoring {}: {:?}", ent.path().display(), m); + return true; + } else if m.is_whitelist() { + debug!("whitelisting {}: {:?}", ent.path().display(), m); + } + false + } +} + +impl Iterator for Walk { + type Item = Result; + + #[inline(always)] + fn next(&mut self) -> Option> { + loop { + let ev = match self.it.as_mut().and_then(|it| it.next()) { + Some(ev) => ev, + None => { + match self.its.next() { + None => return None, + Some((_, None)) => { + return Some(Ok(DirEntry { + dent: None, + err: None, + })); + } + Some((path, Some(it))) => { + self.it = Some(it); + if self.parents && path.is_dir() { + let (ig, err) = self.ig_root.add_parents(path); + self.ig = ig; + if let Some(err) = err { + return Some(Err(err)); + } + } else { + self.ig = self.ig_root.clone(); + } + } + } + continue; + } + }; + match ev { + Err(err) => { + let path = err.path().map(|p| p.to_path_buf()); + let mut ig_err = Error::Io(io::Error::from(err)); + if let Some(path) = path { + ig_err = Error::WithPath { + path: path.to_path_buf(), + err: Box::new(ig_err), + }; + } + return Some(Err(ig_err)); + } + Ok(WalkEvent::Exit) => { + self.ig = self.ig.parent().unwrap(); + } + Ok(WalkEvent::Dir(ent)) => { + if self.skip_entry(&ent) { + self.it.as_mut().unwrap().it.skip_current_dir(); + // Still need to push this on the stack because + // we'll get a WalkEvent::Exit event for this dir. + // We don't care if it errors though. + let (igtmp, _) = self.ig.add_child(ent.path()); + self.ig = igtmp; + continue; + } + let (igtmp, err) = self.ig.add_child(ent.path()); + self.ig = igtmp; + return Some(Ok(DirEntry { dent: Some(ent), err: err })); + } + Ok(WalkEvent::File(ent)) => { + if self.skip_entry(&ent) { + continue; + } + // If this isn't actually a file (e.g., a symlink), + // then skip it. + if !ent.file_type().is_file() { + continue; + } + return Some(Ok(DirEntry { dent: Some(ent), err: None })); + } + } + } + } +} + +/// A directory entry with a possible error attached. +/// +/// The error typically refers to a problem parsing ignore files in a +/// particular directory. +#[derive(Debug)] +pub struct DirEntry { + dent: Option, + err: Option, +} + +impl DirEntry { + /// The full path that this entry represents. + pub fn path(&self) -> &Path { + self.dent.as_ref().map_or(Path::new(""), |x| x.path()) + } + + /// Whether this entry corresponds to a symbolic link or not. + pub fn path_is_symbolic_link(&self) -> bool { + self.dent.as_ref().map_or(false, |x| x.path_is_symbolic_link()) + } + + /// Returns true if and only if this entry corresponds to stdin. + /// + /// i.e., The entry has depth 0 and its file name is `-`. + pub fn is_stdin(&self) -> bool { + self.dent.is_none() + } + + /// Return the metadata for the file that this entry points to. + pub fn metadata(&self) -> Result { + if let Some(dent) = self.dent.as_ref() { + dent.metadata().map_err(|err| Error::WithPath { + path: self.path().to_path_buf(), + err: Box::new(Error::Io(io::Error::from(err))), + }) + } else { + let ioerr = io::Error::new( + io::ErrorKind::Other, "stdin has no metadata"); + Err(Error::WithPath { + path: Path::new("").to_path_buf(), + err: Box::new(Error::Io(ioerr)), + }) + } + } + + /// Return the file type for the file that this entry points to. + /// + /// This entry doesn't have a file type if it corresponds to stdin. + pub fn file_type(&self) -> Option { + self.dent.as_ref().map(|x| x.file_type()) + } + + /// Return the file name of this entry. + /// + /// If this entry has no file name (e.g., `/`), then the full path is + /// returned. + pub fn file_name(&self) -> &OsStr { + self.dent.as_ref().map_or(OsStr::new(""), |x| x.file_name()) + } + + /// Returns the depth at which this entry was created relative to the root. + pub fn depth(&self) -> usize { + self.dent.as_ref().map_or(0, |x| x.depth()) + } + + /// Returns an error, if one exists, associated with processing this entry. + /// + /// An example of an error is one that occurred while parsing an ignore + /// file. + pub fn error(&self) -> Option<&Error> { + self.err.as_ref() + } +} + +/// WalkEventIter transforms a WalkDir iterator into an iterator that more +/// accurately describes the directory tree. Namely, it emits events that are +/// one of three types: directory, file or "exit." An "exit" event means that +/// the entire contents of a directory have been enumerated. +struct WalkEventIter { + depth: usize, + it: walkdir::Iter, + next: Option>, +} + +#[derive(Debug)] +enum WalkEvent { + Dir(walkdir::DirEntry), + File(walkdir::DirEntry), + Exit, +} + +impl From for WalkEventIter { + fn from(it: WalkDir) -> WalkEventIter { + WalkEventIter { depth: 0, it: it.into_iter(), next: None } + } +} + +impl Iterator for WalkEventIter { + type Item = walkdir::Result; + + #[inline(always)] + fn next(&mut self) -> Option> { + let dent = self.next.take().or_else(|| self.it.next()); + let depth = match dent { + None => 0, + Some(Ok(ref dent)) => dent.depth(), + Some(Err(ref err)) => err.depth(), + }; + if depth < self.depth { + self.depth -= 1; + self.next = dent; + return Some(Ok(WalkEvent::Exit)); + } + self.depth = depth; + match dent { + None => None, + Some(Err(err)) => Some(Err(err)), + Some(Ok(dent)) => { + if dent.file_type().is_dir() { + self.depth += 1; + Some(Ok(WalkEvent::Dir(dent))) + } else { + Some(Ok(WalkEvent::File(dent))) + } + } + } + } +} + +#[cfg(test)] +mod tests { + use std::fs::{self, File}; + use std::io::Write; + use std::path::Path; + + use tempdir::TempDir; + + use super::{Walk, WalkBuilder}; + + fn wfile>(path: P, contents: &str) { + let mut file = File::create(path).unwrap(); + file.write_all(contents.as_bytes()).unwrap(); + } + + fn mkdirp>(path: P) { + fs::create_dir_all(path).unwrap(); + } + + fn normal_path(unix: &str) -> String { + if cfg!(windows) { + unix.replace("\\", "/") + } else { + unix.to_string() + } + } + + fn walk_collect(prefix: &Path, walk: Walk) -> Vec { + let mut paths = vec![]; + for dent in walk { + let dent = dent.unwrap(); + let path = dent.path().strip_prefix(prefix).unwrap(); + if path.as_os_str().is_empty() { + continue; + } + paths.push(normal_path(path.to_str().unwrap())); + } + paths.sort(); + paths + } + + fn mkpaths(paths: &[&str]) -> Vec { + let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect(); + paths.sort(); + paths + } + + #[test] + fn no_ignores() { + let td = TempDir::new("walk-test-").unwrap(); + mkdirp(td.path().join("a/b/c")); + mkdirp(td.path().join("x/y")); + wfile(td.path().join("a/b/foo"), ""); + wfile(td.path().join("x/y/foo"), ""); + + let got = walk_collect(td.path(), Walk::new(td.path())); + assert_eq!(got, mkpaths(&[ + "x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c", + ])); + } + + #[test] + fn gitignore() { + let td = TempDir::new("walk-test-").unwrap(); + mkdirp(td.path().join("a")); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let got = walk_collect(td.path(), Walk::new(td.path())); + assert_eq!(got, mkpaths(&["bar", "a", "a/bar"])); + } + + #[test] + fn explicit_ignore() { + let td = TempDir::new("walk-test-").unwrap(); + let igpath = td.path().join(".not-an-ignore"); + mkdirp(td.path().join("a")); + wfile(&igpath, "foo"); + wfile(td.path().join("foo"), ""); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("bar"), ""); + wfile(td.path().join("a/bar"), ""); + + let mut builder = WalkBuilder::new(td.path()); + assert!(builder.add_ignore(&igpath).is_none()); + let got = walk_collect(td.path(), builder.build()); + assert_eq!(got, mkpaths(&["bar", "a", "a/bar"])); + } + + #[test] + fn gitignore_parent() { + let td = TempDir::new("walk-test-").unwrap(); + mkdirp(td.path().join("a")); + wfile(td.path().join(".gitignore"), "foo"); + wfile(td.path().join("a/foo"), ""); + wfile(td.path().join("a/bar"), ""); + + let root = td.path().join("a"); + let got = walk_collect(&root, Walk::new(&root)); + assert_eq!(got, mkpaths(&["bar"])); + } +} diff --git a/src/args.rs b/src/args.rs index 012d9150..9d2923b8 100644 --- a/src/args.rs +++ b/src/args.rs @@ -14,19 +14,17 @@ use term::Terminal; use term; #[cfg(windows)] use term::WinConsole; -use walkdir::WalkDir; use atty; -use gitignore::{Gitignore, GitignoreBuilder}; -use ignore::Ignore; +use ignore::overrides::{Override, OverrideBuilder}; +use ignore::types::{FileTypeDef, Types, TypesBuilder}; +use ignore; use out::{Out, ColoredTerminal}; use printer::Printer; use search_buffer::BufferSearcher; use search_stream::{InputBuffer, Searcher}; #[cfg(windows)] use terminal_win::WindowsBuffer; -use types::{FileTypeDef, Types, TypesBuilder}; -use walk; use Result; @@ -131,6 +129,13 @@ Less common options: Search hidden directories and files. (Hidden directories and files are skipped by default.) + --ignore-file FILE ... + Specify additional ignore files for filtering file paths. Ignore files + should be in the gitignore format and are matched relative to the + current working directory. These ignore files have lower precedence + than all other ignore file types. When specifying multiple ignore + files, earlier files have lower precedence than later files. + -L, --follow Follow symlinks. @@ -234,6 +239,7 @@ pub struct RawArgs { flag_heading: bool, flag_hidden: bool, flag_ignore_case: bool, + flag_ignore_file: Vec, flag_invert_match: bool, flag_line_number: bool, flag_fixed_strings: bool, @@ -279,11 +285,12 @@ pub struct Args { eol: u8, files: bool, follow: bool, - glob_overrides: Option, + glob_overrides: Override, grep: Grep, heading: bool, hidden: bool, ignore_case: bool, + ignore_files: Vec, invert_match: bool, line_number: bool, line_per_match: bool, @@ -347,14 +354,13 @@ impl RawArgs { } let glob_overrides = if self.flag_glob.is_empty() { - None + Override::empty() } else { - let cwd = try!(env::current_dir()); - let mut bgi = GitignoreBuilder::new(cwd); + let mut ovr = OverrideBuilder::new(try!(env::current_dir())); for pat in &self.flag_glob { - try!(bgi.add("", pat)); + try!(ovr.add(pat)); } - Some(try!(bgi.build())) + try!(ovr.build()) }; let threads = if self.flag_threads == 0 { @@ -382,6 +388,9 @@ impl RawArgs { let no_ignore = self.flag_no_ignore || self.flag_unrestricted >= 1; let hidden = self.flag_hidden || self.flag_unrestricted >= 2; let text = self.flag_text || self.flag_unrestricted >= 3; + let ignore_files: Vec<_> = self.flag_ignore_file.iter().map(|p| { + Path::new(p).to_path_buf() + }).collect(); let mut args = Args { paths: paths, after_context: after_context, @@ -399,6 +408,7 @@ impl RawArgs { heading: !self.flag_no_heading && self.flag_heading, hidden: hidden, ignore_case: self.flag_ignore_case, + ignore_files: ignore_files, invert_match: self.flag_invert_match, line_number: !self.flag_no_line_number && self.flag_line_number, line_per_match: self.flag_vimgrep, @@ -711,31 +721,30 @@ impl Args { self.type_list } - /// Create a new recursive directory iterator at the path given. - pub fn walker(&self, path: &Path) -> Result { - // Always follow symlinks for explicitly specified files. - let mut wd = WalkDir::new(path).follow_links( - self.follow || path.is_file()); - if let Some(maxdepth) = self.maxdepth { - wd = wd.max_depth(maxdepth); + /// Create a new recursive directory iterator over the paths in argv. + pub fn walker(&self) -> Walk { + let paths = self.paths(); + let mut wd = ignore::WalkBuilder::new(&paths[0]); + for path in &paths[1..] { + wd.add(path); } - let mut ig = Ignore::new(); - // Only register ignore rules if this is a directory. If it's a file, - // then it was explicitly given by the end user, so we always search - // it. - if path.is_dir() { - ig.ignore_hidden(!self.hidden); - ig.no_ignore(self.no_ignore); - ig.no_ignore_vcs(self.no_ignore_vcs); - ig.add_types(self.types.clone()); - if !self.no_ignore_parent { - try!(ig.push_parents(path)); - } - if let Some(ref overrides) = self.glob_overrides { - ig.add_override(overrides.clone()); + for path in &self.ignore_files { + if let Some(err) = wd.add_ignore(path) { + eprintln!("{}", err); } } - Ok(walk::Iter::new(ig, wd)) + + wd.follow_links(self.follow); + wd.hidden(!self.hidden); + wd.max_depth(self.maxdepth); + wd.overrides(self.glob_overrides.clone()); + wd.types(self.types.clone()); + wd.git_global(!self.no_ignore && !self.no_ignore_vcs); + wd.git_ignore(!self.no_ignore && !self.no_ignore_vcs); + wd.git_exclude(!self.no_ignore && !self.no_ignore_vcs); + wd.ignore(!self.no_ignore); + wd.parents(!self.no_ignore_parent); + Walk(wd.build()) } } @@ -752,6 +761,34 @@ fn version() -> String { } } +/// A simple wrapper around the ignore::Walk iterator. This will +/// automatically emit error messages to stderr and will skip directories. +pub struct Walk(ignore::Walk); + +impl Iterator for Walk { + type Item = ignore::DirEntry; + + fn next(&mut self) -> Option { + while let Some(result) = self.0.next() { + match result { + Ok(dent) => { + if let Some(err) = dent.error() { + eprintln!("{}", err); + } + if dent.file_type().map_or(false, |x| x.is_dir()) { + continue; + } + return Some(dent); + } + Err(err) => { + eprintln!("{}", err); + } + } + } + None + } +} + /// A single state in the state machine used by `unescape`. #[derive(Clone, Copy, Eq, PartialEq)] enum State { @@ -761,7 +798,7 @@ enum State { Literal, } -/// Unescapes a string given on the command line. It supports a limit set of +/// Unescapes a string given on the command line. It supports a limited set of /// escape sequences: /// /// * \t, \r and \n are mapped to their corresponding ASCII bytes. diff --git a/src/gitignore.rs b/src/gitignore.rs deleted file mode 100644 index 9daeb3cb..00000000 --- a/src/gitignore.rs +++ /dev/null @@ -1,455 +0,0 @@ -/*! -The gitignore module provides a way of reading a gitignore file and applying -it to a particular file name to determine whether it should be ignore or not. -The motivation for this submodule is performance and portability: - -1. There is a gitignore crate on crates.io, but it uses the standard `glob` - crate and checks patterns one-by-one. This is a reasonable implementation, - but not suitable for the performance we need here. -2. We could shell out to a `git` sub-command like ls-files or status, but it - seems better to not rely on the existence of external programs for a search - tool. Besides, we need to implement this logic anyway to support things like - an .ignore file. - -The key implementation detail here is that a single gitignore file is compiled -into a single RegexSet, which can be used to report which globs match a -particular file name. We can then do a quick post-processing step to implement -additional rules such as whitelists (prefix of `!`) or directory-only globs -(suffix of `/`). -*/ - -// TODO(burntsushi): Implement something similar, but for Mercurial. We can't -// use this exact implementation because hgignore files are different. - -use std::cell::RefCell; -use std::error::Error as StdError; -use std::fmt; -use std::fs::File; -use std::io::{self, BufRead}; -use std::path::{Path, PathBuf}; -use std::sync::Arc; - -use globset::{self, Candidate, GlobBuilder, GlobSet, GlobSetBuilder}; -use regex; -use thread_local::ThreadLocal; - -use pathutil::{is_file_name, strip_prefix}; - -/// Represents an error that can occur when parsing a gitignore file. -#[derive(Debug)] -pub enum Error { - Glob(globset::Error), - Regex(regex::Error), - Io(io::Error), -} - -impl StdError for Error { - fn description(&self) -> &str { - match *self { - Error::Glob(ref err) => err.description(), - Error::Regex(ref err) => err.description(), - Error::Io(ref err) => err.description(), - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::Glob(ref err) => err.fmt(f), - Error::Regex(ref err) => err.fmt(f), - Error::Io(ref err) => err.fmt(f), - } - } -} - -impl From for Error { - fn from(err: globset::Error) -> Error { - Error::Glob(err) - } -} - -impl From for Error { - fn from(err: regex::Error) -> Error { - Error::Regex(err) - } -} - -impl From for Error { - fn from(err: io::Error) -> Error { - Error::Io(err) - } -} - -/// Gitignore is a matcher for the glob patterns in a single gitignore file. -#[derive(Clone, Debug)] -pub struct Gitignore { - set: GlobSet, - root: PathBuf, - patterns: Vec, - num_ignores: u64, - num_whitelist: u64, - matches: Arc>>>, -} - -impl Gitignore { - /// Create a new gitignore glob matcher from the given root directory and - /// string containing the contents of a gitignore file. - #[allow(dead_code)] - fn from_str>( - root: P, - gitignore: &str, - ) -> Result { - let mut builder = GitignoreBuilder::new(root); - try!(builder.add_str(gitignore)); - builder.build() - } - - /// Returns true if and only if the given file path should be ignored - /// according to the globs in this gitignore. `is_dir` should be true if - /// the path refers to a directory and false otherwise. - /// - /// Before matching path, its prefix (as determined by a common suffix - /// of the directory containing this gitignore) is stripped. If there is - /// no common suffix/prefix overlap, then path is assumed to reside in the - /// same directory as this gitignore file. - pub fn matched>(&self, path: P, is_dir: bool) -> Match { - let mut path = path.as_ref(); - if let Some(p) = strip_prefix("./", path) { - path = p; - } - // Strip any common prefix between the candidate path and the root - // of the gitignore, to make sure we get relative matching right. - // BUT, a file name might not have any directory components to it, - // in which case, we don't want to accidentally strip any part of the - // file name. - if !is_file_name(path) { - if let Some(p) = strip_prefix(&self.root, path) { - path = p; - } - } - if let Some(p) = strip_prefix("/", path) { - path = p; - } - self.matched_stripped(path, is_dir) - } - - /// Like matched, but takes a path that has already been stripped. - pub fn matched_stripped(&self, path: &Path, is_dir: bool) -> Match { - let _matches = self.matches.get_default(); - let mut matches = _matches.borrow_mut(); - let candidate = Candidate::new(path); - self.set.matches_candidate_into(&candidate, &mut *matches); - for &i in matches.iter().rev() { - let pat = &self.patterns[i]; - if !pat.only_dir || is_dir { - return if pat.whitelist { - Match::Whitelist(pat) - } else { - Match::Ignored(pat) - }; - } - } - Match::None - } - - /// Returns the total number of ignore patterns. - pub fn num_ignores(&self) -> u64 { - self.num_ignores - } -} - -/// The result of a glob match. -/// -/// The lifetime `'a` refers to the lifetime of the pattern that resulted in -/// a match (whether ignored or whitelisted). -#[derive(Clone, Debug)] -pub enum Match<'a> { - /// The path didn't match any glob in the gitignore file. - None, - /// The last glob matched indicates the path should be ignored. - Ignored(&'a Pattern), - /// The last glob matched indicates the path should be whitelisted. - Whitelist(&'a Pattern), -} - -impl<'a> Match<'a> { - /// Returns true if the match result implies the path should be ignored. - #[allow(dead_code)] - pub fn is_ignored(&self) -> bool { - match *self { - Match::Ignored(_) => true, - Match::None | Match::Whitelist(_) => false, - } - } - - /// Returns true if the match result didn't match any globs. - pub fn is_none(&self) -> bool { - match *self { - Match::None => true, - Match::Ignored(_) | Match::Whitelist(_) => false, - } - } - - /// Inverts the match so that Ignored becomes Whitelisted and Whitelisted - /// becomes Ignored. A non-match remains the same. - pub fn invert(self) -> Match<'a> { - match self { - Match::None => Match::None, - Match::Ignored(pat) => Match::Whitelist(pat), - Match::Whitelist(pat) => Match::Ignored(pat), - } - } -} - -/// GitignoreBuilder constructs a matcher for a single set of globs from a -/// .gitignore file. -pub struct GitignoreBuilder { - builder: GlobSetBuilder, - root: PathBuf, - patterns: Vec, -} - -/// Pattern represents a single pattern in a gitignore file. It doesn't -/// know how to do glob matching directly, but it does store additional -/// options on a pattern, such as whether it's whitelisted. -#[derive(Clone, Debug)] -pub struct Pattern { - /// The file path that this pattern was extracted from (may be empty). - pub from: PathBuf, - /// The original glob pattern string. - pub original: String, - /// The actual glob pattern string used to convert to a regex. - pub pat: String, - /// Whether this is a whitelisted pattern or not. - pub whitelist: bool, - /// Whether this pattern should only match directories or not. - pub only_dir: bool, -} - -impl GitignoreBuilder { - /// Create a new builder for a gitignore file. - /// - /// The path given should be the path at which the globs for this gitignore - /// file should be matched. - pub fn new>(root: P) -> GitignoreBuilder { - let root = strip_prefix("./", root.as_ref()).unwrap_or(root.as_ref()); - GitignoreBuilder { - builder: GlobSetBuilder::new(), - root: root.to_path_buf(), - patterns: vec![], - } - } - - /// Builds a new matcher from the glob patterns added so far. - /// - /// Once a matcher is built, no new glob patterns can be added to it. - pub fn build(self) -> Result { - let nignores = self.patterns.iter().filter(|p| !p.whitelist).count(); - let nwhitelist = self.patterns.iter().filter(|p| p.whitelist).count(); - Ok(Gitignore { - set: try!(self.builder.build()), - root: self.root, - patterns: self.patterns, - num_ignores: nignores as u64, - num_whitelist: nwhitelist as u64, - matches: Arc::new(ThreadLocal::default()), - }) - } - - /// Add each pattern line from the file path given. - pub fn add_path>(&mut self, path: P) -> Result<(), Error> { - let rdr = io::BufReader::new(try!(File::open(&path))); - debug!("gitignore: {}", path.as_ref().display()); - for (i, line) in rdr.lines().enumerate() { - let line = match line { - Ok(line) => line, - Err(err) => { - debug!("error reading line {} in {}: {}", - i, path.as_ref().display(), err); - continue; - } - }; - if let Err(err) = self.add(&path, &line) { - debug!("error adding gitignore pattern: '{}': {}", line, err); - } - } - Ok(()) - } - - /// Add each pattern line from the string given. - pub fn add_str(&mut self, gitignore: &str) -> Result<(), Error> { - for line in gitignore.lines() { - try!(self.add("", line)); - } - Ok(()) - } - - /// Add a line from a gitignore file to this builder. - /// - /// If the line could not be parsed as a glob, then an error is returned. - pub fn add>( - &mut self, - from: P, - mut line: &str, - ) -> Result<(), Error> { - if line.starts_with("#") { - return Ok(()); - } - if !line.ends_with("\\ ") { - line = line.trim_right(); - } - if line.is_empty() { - return Ok(()); - } - let mut pat = Pattern { - from: from.as_ref().to_path_buf(), - original: line.to_string(), - pat: String::new(), - whitelist: false, - only_dir: false, - }; - let mut literal_separator = false; - let has_slash = line.chars().any(|c| c == '/'); - let is_absolute = line.chars().nth(0).unwrap() == '/'; - if line.starts_with("\\!") || line.starts_with("\\#") { - line = &line[1..]; - } else { - if line.starts_with("!") { - pat.whitelist = true; - line = &line[1..]; - } - if line.starts_with("/") { - // `man gitignore` says that if a glob starts with a slash, - // then the glob can only match the beginning of a path - // (relative to the location of gitignore). We achieve this by - // simply banning wildcards from matching /. - literal_separator = true; - line = &line[1..]; - } - } - // If it ends with a slash, then this should only match directories, - // but the slash should otherwise not be used while globbing. - if let Some((i, c)) = line.char_indices().rev().nth(0) { - if c == '/' { - pat.only_dir = true; - line = &line[..i]; - } - } - // If there is a literal slash, then we note that so that globbing - // doesn't let wildcards match slashes. - pat.pat = line.to_string(); - if has_slash { - literal_separator = true; - } - // If there was a leading slash, then this is a pattern that must - // match the entire path name. Otherwise, we should let it match - // anywhere, so use a **/ prefix. - if !is_absolute { - // ... but only if we don't already have a **/ prefix. - if !pat.pat.starts_with("**/") { - pat.pat = format!("**/{}", pat.pat); - } - } - // If the pattern ends with `/**`, then we should only match everything - // inside a directory, but not the directory itself. Standard globs - // will match the directory. So we add `/*` to force the issue. - if pat.pat.ends_with("/**") { - pat.pat = format!("{}/*", pat.pat); - } - let parsed = try!( - GlobBuilder::new(&pat.pat) - .literal_separator(literal_separator) - .build()); - self.builder.add(parsed); - self.patterns.push(pat); - Ok(()) - } -} - -#[cfg(test)] -mod tests { - use super::Gitignore; - - macro_rules! ignored { - ($name:ident, $root:expr, $gi:expr, $path:expr) => { - ignored!($name, $root, $gi, $path, false); - }; - ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { - #[test] - fn $name() { - let gi = Gitignore::from_str($root, $gi).unwrap(); - assert!(gi.matched($path, $is_dir).is_ignored()); - } - }; - } - - macro_rules! not_ignored { - ($name:ident, $root:expr, $gi:expr, $path:expr) => { - not_ignored!($name, $root, $gi, $path, false); - }; - ($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => { - #[test] - fn $name() { - let gi = Gitignore::from_str($root, $gi).unwrap(); - assert!(!gi.matched($path, $is_dir).is_ignored()); - } - }; - } - - const ROOT: &'static str = "/home/foobar/rust/rg"; - - ignored!(ig1, ROOT, "months", "months"); - ignored!(ig2, ROOT, "*.lock", "Cargo.lock"); - ignored!(ig3, ROOT, "*.rs", "src/main.rs"); - ignored!(ig4, ROOT, "src/*.rs", "src/main.rs"); - ignored!(ig5, ROOT, "/*.c", "cat-file.c"); - ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs"); - ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs"); - ignored!(ig8, ROOT, "foo/", "foo", true); - ignored!(ig9, ROOT, "**/foo", "foo"); - ignored!(ig10, ROOT, "**/foo", "src/foo"); - ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar"); - ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz"); - ignored!(ig13, ROOT, "**/foo/bar", "foo/bar"); - ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar"); - ignored!(ig15, ROOT, "abc/**", "abc/x"); - ignored!(ig16, ROOT, "abc/**", "abc/x/y"); - ignored!(ig17, ROOT, "abc/**", "abc/x/y/z"); - ignored!(ig18, ROOT, "a/**/b", "a/b"); - ignored!(ig19, ROOT, "a/**/b", "a/x/b"); - ignored!(ig20, ROOT, "a/**/b", "a/x/y/b"); - ignored!(ig21, ROOT, r"\!xy", "!xy"); - ignored!(ig22, ROOT, r"\#foo", "#foo"); - ignored!(ig23, ROOT, "foo", "./foo"); - ignored!(ig24, ROOT, "target", "grep/target"); - ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock"); - ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz"); - ignored!(ig27, ROOT, "foo/", "xyz/foo", true); - ignored!(ig28, ROOT, "src/*.rs", "src/grep/src/main.rs"); - ignored!(ig29, "./src", "/llvm/", "./src/llvm", true); - ignored!(ig30, ROOT, "node_modules/ ", "node_modules", true); - - not_ignored!(ignot1, ROOT, "amonths", "months"); - not_ignored!(ignot2, ROOT, "monthsa", "months"); - not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs"); - not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c"); - not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs"); - not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs"); - not_ignored!(ignot7, ROOT, "foo/", "foo", false); - not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz"); - not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz"); - not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar"); - not_ignored!(ignot11, ROOT, "#foo", "#foo"); - not_ignored!(ignot12, ROOT, "\n\n\n", "foo"); - not_ignored!(ignot13, ROOT, "foo/**", "foo", true); - not_ignored!( - ignot14, "./third_party/protobuf", "m4/ltoptions.m4", - "./third_party/protobuf/csharp/src/packages/repositories.config"); - - // See: https://github.com/BurntSushi/ripgrep/issues/106 - #[test] - fn regression_106() { - Gitignore::from_str("/", " ").unwrap(); - } -} diff --git a/src/ignore.rs b/src/ignore.rs deleted file mode 100644 index a8cbac1a..00000000 --- a/src/ignore.rs +++ /dev/null @@ -1,493 +0,0 @@ -/*! -The ignore module is responsible for managing the state required to determine -whether a *single* file path should be searched or not. - -In general, there are two ways to ignore a particular file: - -1. Specify an ignore rule in some "global" configuration, such as a - $HOME/.ignore or on the command line. -2. A specific ignore file (like .gitignore) found during directory traversal. - -The `IgnoreDir` type handles ignore patterns for any one particular directory -(including "global" ignore patterns), while the `Ignore` type handles a stack -of `IgnoreDir`s for use during directory traversal. -*/ - -use std::error::Error as StdError; -use std::ffi::OsString; -use std::fmt; -use std::io; -use std::path::{Path, PathBuf}; - -use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern}; -use pathutil::{file_name, is_hidden, strip_prefix}; -use types::Types; - -const IGNORE_NAMES: &'static [&'static str] = &[ - ".gitignore", - ".ignore", - ".rgignore", -]; - -/// Represents an error that can occur when parsing a gitignore file. -#[derive(Debug)] -pub enum Error { - Gitignore(gitignore::Error), - Io { - path: PathBuf, - err: io::Error, - }, -} - -impl Error { - fn from_io>(path: P, err: io::Error) -> Error { - Error::Io { path: path.as_ref().to_path_buf(), err: err } - } -} - -impl StdError for Error { - fn description(&self) -> &str { - match *self { - Error::Gitignore(ref err) => err.description(), - Error::Io { ref err, .. } => err.description(), - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::Gitignore(ref err) => err.fmt(f), - Error::Io { ref path, ref err } => { - write!(f, "{}: {}", path.display(), err) - } - } - } -} - -impl From for Error { - fn from(err: gitignore::Error) -> Error { - Error::Gitignore(err) - } -} - -/// Ignore represents a collection of ignore patterns organized by directory. -/// In particular, a stack is maintained, where the top of the stack -/// corresponds to the current directory being searched and the bottom of the -/// stack represents the root of a search. Ignore patterns at the top of the -/// stack take precedence over ignore patterns at the bottom of the stack. -pub struct Ignore { - /// A stack of ignore patterns at each directory level of traversal. - /// A directory that contributes no ignore patterns is `None`. - stack: Vec, - /// A stack of parent directories above the root of the current search. - parent_stack: Vec, - /// A set of override globs that are always checked first. A match (whether - /// it's whitelist or blacklist) trumps anything in stack. - overrides: Overrides, - /// A file type matcher. - types: Types, - /// Whether to ignore hidden files or not. - ignore_hidden: bool, - /// When true, don't look at .gitignore or .ignore files for ignore - /// rules. - no_ignore: bool, - /// When true, don't look at .gitignore files for ignore rules. - no_ignore_vcs: bool, -} - -impl Ignore { - /// Create an empty set of ignore patterns. - pub fn new() -> Ignore { - Ignore { - stack: vec![], - parent_stack: vec![], - overrides: Overrides::new(None), - types: Types::empty(), - ignore_hidden: true, - no_ignore: false, - no_ignore_vcs: true, - } - } - - /// Set whether hidden files/folders should be ignored (defaults to true). - pub fn ignore_hidden(&mut self, yes: bool) -> &mut Ignore { - self.ignore_hidden = yes; - self - } - - /// When set, ignore files are ignored. - pub fn no_ignore(&mut self, yes: bool) -> &mut Ignore { - self.no_ignore = yes; - self - } - - /// When set, VCS ignore files are ignored. - pub fn no_ignore_vcs(&mut self, yes: bool) -> &mut Ignore { - self.no_ignore_vcs = yes; - self - } - - /// Add a set of globs that overrides all other match logic. - pub fn add_override(&mut self, gi: Gitignore) -> &mut Ignore { - self.overrides = Overrides::new(Some(gi)); - self - } - - /// Add a file type matcher. The file type matcher has the lowest - /// precedence. - pub fn add_types(&mut self, types: Types) -> &mut Ignore { - self.types = types; - self - } - - /// Push parent directories of `path` on to the stack. - pub fn push_parents>( - &mut self, - path: P, - ) -> Result<(), Error> { - let path = try!(path.as_ref().canonicalize().map_err(|err| { - Error::from_io(path.as_ref(), err) - })); - let mut path = &*path; - let mut saw_git = path.join(".git").is_dir(); - let mut ignore_names = IGNORE_NAMES.to_vec(); - if self.no_ignore_vcs { - ignore_names.retain(|&name| name != ".gitignore"); - } - let mut ignore_dir_results = vec![]; - while let Some(parent) = path.parent() { - if self.no_ignore { - ignore_dir_results.push(Ok(IgnoreDir::empty(parent))); - } else { - if saw_git { - ignore_names.retain(|&name| name != ".gitignore"); - } else { - saw_git = parent.join(".git").is_dir(); - } - let ignore_dir_result = - IgnoreDir::with_ignore_names(parent, ignore_names.iter()); - ignore_dir_results.push(ignore_dir_result); - } - path = parent; - } - - for ignore_dir_result in ignore_dir_results.into_iter().rev() { - self.parent_stack.push(try!(ignore_dir_result)); - } - Ok(()) - } - - /// Add a directory to the stack. - /// - /// Note that even if this returns an error, the directory is added to the - /// stack (and therefore should be popped). - pub fn push>(&mut self, path: P) -> Result<(), Error> { - if self.no_ignore { - self.stack.push(IgnoreDir::empty(path)); - Ok(()) - } else if self.no_ignore_vcs { - self.push_ignore_dir(IgnoreDir::without_vcs(path)) - } else { - self.push_ignore_dir(IgnoreDir::new(path)) - } - } - - /// Pushes the result of building a directory matcher on to the stack. - /// - /// If the result given contains an error, then it is returned. - pub fn push_ignore_dir( - &mut self, - result: Result, - ) -> Result<(), Error> { - match result { - Ok(id) => { - self.stack.push(id); - Ok(()) - } - Err(err) => { - // Don't leave the stack in an inconsistent state. - self.stack.push(IgnoreDir::empty("error")); - Err(err) - } - } - } - - /// Pop a directory from the stack. - /// - /// This panics if the stack is empty. - pub fn pop(&mut self) { - self.stack.pop().expect("non-empty stack"); - } - - /// Returns true if and only if the given file path should be ignored. - pub fn ignored>(&self, path: P, is_dir: bool) -> bool { - let mut path = path.as_ref(); - if let Some(p) = strip_prefix("./", path) { - path = p; - } - let mat = self.overrides.matched(path, is_dir); - if let Some(is_ignored) = self.ignore_match(path, mat) { - return is_ignored; - } - let mut whitelisted = false; - if !self.no_ignore { - for id in self.stack.iter().rev() { - let mat = id.matched(path, is_dir); - if let Some(is_ignored) = self.ignore_match(path, mat) { - if is_ignored { - return true; - } - // If this path is whitelisted by an ignore, then - // fallthrough and let the file type matcher have a say. - whitelisted = true; - break; - } - } - // If the file has been whitelisted, then we have to stop checking - // parent directories. The only thing that can override a whitelist - // at this point is a type filter. - if !whitelisted { - let mut path = path.to_path_buf(); - for id in self.parent_stack.iter().rev() { - if let Some(ref dirname) = id.name { - path = Path::new(dirname).join(path); - } - let mat = id.matched(&*path, is_dir); - if let Some(is_ignored) = self.ignore_match(&*path, mat) { - if is_ignored { - return true; - } - // If this path is whitelisted by an ignore, then - // fallthrough and let the file type matcher have a - // say. - whitelisted = true; - break; - } - } - } - } - let mat = self.types.matched(path, is_dir); - if let Some(is_ignored) = self.ignore_match(path, mat) { - if is_ignored { - return true; - } - whitelisted = true; - } - if !whitelisted && self.ignore_hidden && is_hidden(&path) { - debug!("{} ignored because it is hidden", path.display()); - return true; - } - false - } - - /// Returns true if the given match says the given pattern should be - /// ignored or false if the given pattern should be explicitly whitelisted. - /// Returns None otherwise. - pub fn ignore_match>( - &self, - path: P, - mat: Match, - ) -> Option { - let path = path.as_ref(); - match mat { - Match::Whitelist(ref pat) => { - debug!("{} whitelisted by {:?}", path.display(), pat); - Some(false) - } - Match::Ignored(ref pat) => { - debug!("{} ignored by {:?}", path.display(), pat); - Some(true) - } - Match::None => None, - } - } -} - -/// IgnoreDir represents a set of ignore patterns retrieved from a single -/// directory. -#[derive(Debug)] -pub struct IgnoreDir { - /// The path to this directory as given. - path: PathBuf, - /// The directory name, if one exists. - name: Option, - /// A single accumulation of glob patterns for this directory, matched - /// using gitignore semantics. - /// - /// This will include patterns from rgignore as well. The patterns are - /// ordered so that precedence applies automatically (e.g., rgignore - /// patterns procede gitignore patterns). - gi: Option, - // TODO(burntsushi): Matching other types of glob patterns that don't - // conform to gitignore will probably require refactoring this approach. -} - -impl IgnoreDir { - /// Create a new matcher for the given directory. - pub fn new>(path: P) -> Result { - IgnoreDir::with_ignore_names(path, IGNORE_NAMES.iter()) - } - - /// Create a new matcher for the given directory. - /// - /// Don't respect VCS ignore files. - pub fn without_vcs>(path: P) -> Result { - let names = IGNORE_NAMES.iter().filter(|name| **name != ".gitignore"); - IgnoreDir::with_ignore_names(path, names) - } - - /// Create a new IgnoreDir that never matches anything with the given path. - pub fn empty>(path: P) -> IgnoreDir { - IgnoreDir { - path: path.as_ref().to_path_buf(), - name: file_name(path.as_ref()).map(|s| s.to_os_string()), - gi: None, - } - } - - /// Create a new matcher for the given directory using only the ignore - /// patterns found in the file names given. - /// - /// If no ignore glob patterns could be found in the directory then `None` - /// is returned. - /// - /// Note that the order of the names given is meaningful. Names appearing - /// later in the list have precedence over names appearing earlier in the - /// list. - pub fn with_ignore_names, S, I>( - path: P, - names: I, - ) -> Result - where P: AsRef, S: AsRef, I: Iterator { - let mut id = IgnoreDir::empty(path); - let mut ok = false; - let mut builder = GitignoreBuilder::new(&id.path); - // The ordering here is important. Later globs have higher precedence. - for name in names { - ok = builder.add_path(id.path.join(name.as_ref())).is_ok() || ok; - } - if !ok { - return Ok(id); - } - id.gi = Some(try!(builder.build())); - Ok(id) - } - - /// Returns true if and only if the given file path should be ignored - /// according to the globs in this directory. `is_dir` should be true if - /// the path refers to a directory and false otherwise. - /// - /// Before matching path, its prefix (as determined by a common suffix - /// of this directory) is stripped. If there is - /// no common suffix/prefix overlap, then path is assumed to reside - /// directly in this directory. - /// - /// If the given path has a `./` prefix then it is stripped before - /// matching. - pub fn matched>(&self, path: P, is_dir: bool) -> Match { - self.gi.as_ref() - .map(|gi| gi.matched(path, is_dir)) - .unwrap_or(Match::None) - } -} - -/// Manages a set of overrides provided explicitly by the end user. -struct Overrides { - gi: Option, - unmatched_pat: Pattern, -} - -impl Overrides { - /// Creates a new set of overrides from the gitignore matcher provided. - /// If no matcher is provided, then the resulting overrides have no effect. - fn new(gi: Option) -> Overrides { - Overrides { - gi: gi, - unmatched_pat: Pattern { - from: Path::new("").to_path_buf(), - original: "".to_string(), - pat: "".to_string(), - whitelist: false, - only_dir: false, - }, - } - } - - /// Returns a match for the given path against this set of overrides. - /// - /// If there are no overrides, then this always returns Match::None. - /// - /// If there is at least one positive override, then this never returns - /// Match::None (and interpreting non-matches as ignored) unless is_dir - /// is true. - pub fn matched>(&self, path: P, is_dir: bool) -> Match { - let path = path.as_ref(); - self.gi.as_ref() - .map(|gi| { - let mat = gi.matched_stripped(path, is_dir).invert(); - if mat.is_none() && !is_dir { - if gi.num_ignores() > 0 { - return Match::Ignored(&self.unmatched_pat); - } - } - mat - }) - .unwrap_or(Match::None) - } -} - -#[cfg(test)] -mod tests { - use std::path::Path; - use gitignore::GitignoreBuilder; - use super::IgnoreDir; - - macro_rules! ignored_dir { - ($name:ident, $root:expr, $gi:expr, $xi:expr, $path:expr) => { - #[test] - fn $name() { - let mut builder = GitignoreBuilder::new(&$root); - builder.add_str($gi).unwrap(); - builder.add_str($xi).unwrap(); - let gi = builder.build().unwrap(); - let id = IgnoreDir { - path: Path::new($root).to_path_buf(), - name: Path::new($root).file_name().map(|s| { - s.to_os_string() - }), - gi: Some(gi), - }; - assert!(id.matched($path, false).is_ignored()); - } - }; - } - - macro_rules! not_ignored_dir { - ($name:ident, $root:expr, $gi:expr, $xi:expr, $path:expr) => { - #[test] - fn $name() { - let mut builder = GitignoreBuilder::new(&$root); - builder.add_str($gi).unwrap(); - builder.add_str($xi).unwrap(); - let gi = builder.build().unwrap(); - let id = IgnoreDir { - path: Path::new($root).to_path_buf(), - name: Path::new($root).file_name().map(|s| { - s.to_os_string() - }), - gi: Some(gi), - }; - assert!(!id.matched($path, false).is_ignored()); - } - }; - } - - const ROOT: &'static str = "/home/foobar/rust/rg"; - - ignored_dir!(id1, ROOT, "src/main.rs", "", "src/main.rs"); - ignored_dir!(id2, ROOT, "", "src/main.rs", "src/main.rs"); - ignored_dir!(id3, ROOT, "!src/main.rs", "*.rs", "src/main.rs"); - - not_ignored_dir!(idnot1, ROOT, "*.rs", "!src/main.rs", "src/main.rs"); -} diff --git a/src/main.rs b/src/main.rs index 7a6ac021..e64ddf9c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,8 +1,8 @@ extern crate deque; extern crate docopt; extern crate env_logger; -extern crate globset; extern crate grep; +extern crate ignore; #[cfg(windows)] extern crate kernel32; #[macro_use] @@ -16,8 +16,6 @@ extern crate num_cpus; extern crate regex; extern crate rustc_serialize; extern crate term; -extern crate thread_local; -extern crate walkdir; #[cfg(windows)] extern crate winapi; @@ -36,7 +34,7 @@ use deque::{Stealer, Stolen}; use grep::Grep; use memmap::{Mmap, Protection}; use term::Terminal; -use walkdir::DirEntry; +use ignore::DirEntry; use args::Args; use out::{ColoredTerminal, Out}; @@ -61,8 +59,6 @@ macro_rules! eprintln { mod args; mod atty; -mod gitignore; -mod ignore; mod out; mod pathutil; mod printer; @@ -70,8 +66,6 @@ mod search_buffer; mod search_stream; #[cfg(windows)] mod terminal_win; -mod types; -mod walk; pub type Result = result::Result>; @@ -101,7 +95,6 @@ fn run(args: Args) -> Result { if threads == 1 || isone { return run_one_thread(args.clone()); } - let out = Arc::new(Mutex::new(args.out())); let quiet_matched = QuietMatched::new(args.quiet()); let mut workers = vec![]; @@ -126,21 +119,15 @@ fn run(args: Args) -> Result { workq }; let mut paths_searched: u64 = 0; - for p in paths { + for dent in args.walker() { if quiet_matched.has_match() { break; } - if p == Path::new("-") { - paths_searched += 1; + paths_searched += 1; + if dent.is_stdin() { workq.push(Work::Stdin); } else { - for ent in try!(args.walker(p)) { - if quiet_matched.has_match() { - break; - } - paths_searched += 1; - workq.push(Work::File(ent)); - } + workq.push(Work::File(dent)); } } if !paths.is_empty() && paths_searched == 0 { @@ -165,47 +152,33 @@ fn run_one_thread(args: Arc) -> Result { grep: args.grep(), match_count: 0, }; - let paths = args.paths(); let mut term = args.stdout(); - let mut paths_searched: u64 = 0; - for p in paths { - if args.quiet() && worker.match_count > 0 { - break; - } - if p == Path::new("-") { - paths_searched += 1; - let mut printer = args.printer(&mut term); - if worker.match_count > 0 { - if let Some(sep) = args.file_separator() { - printer = printer.file_separator(sep); - } + for dent in args.walker() { + let mut printer = args.printer(&mut term); + if worker.match_count > 0 { + if args.quiet() { + break; } + if let Some(sep) = args.file_separator() { + printer = printer.file_separator(sep); + } + } + paths_searched += 1; + if dent.is_stdin() { worker.do_work(&mut printer, WorkReady::Stdin); } else { - for ent in try!(args.walker(p)) { - paths_searched += 1; - let mut printer = args.printer(&mut term); - if worker.match_count > 0 { - if args.quiet() { - break; - } - if let Some(sep) = args.file_separator() { - printer = printer.file_separator(sep); - } + let file = match File::open(dent.path()) { + Ok(file) => file, + Err(err) => { + eprintln!("{}: {}", dent.path().display(), err); + continue; } - let file = match File::open(ent.path()) { - Ok(file) => file, - Err(err) => { - eprintln!("{}: {}", ent.path().display(), err); - continue; - } - }; - worker.do_work(&mut printer, WorkReady::DirFile(ent, file)); - } + }; + worker.do_work(&mut printer, WorkReady::DirFile(dent, file)); } } - if !paths.is_empty() && paths_searched == 0 { + if !args.paths().is_empty() && paths_searched == 0 { eprintln!("No files were searched, which means ripgrep probably \ applied a filter you didn't expect. \ Try running again with --debug."); @@ -217,16 +190,9 @@ fn run_files(args: Arc) -> Result { let term = args.stdout(); let mut printer = args.printer(term); let mut file_count = 0; - for p in args.paths() { - if p == Path::new("-") { - printer.path(&Path::new("")); - file_count += 1; - } else { - for ent in try!(args.walker(p)) { - printer.path(ent.path()); - file_count += 1; - } - } + for dent in args.walker() { + printer.path(dent.path()); + file_count += 1; } Ok(file_count) } diff --git a/src/pathutil.rs b/src/pathutil.rs index 073010be..085c9dbc 100644 --- a/src/pathutil.rs +++ b/src/pathutil.rs @@ -8,7 +8,6 @@ with the raw bytes directly. On large repositories (like chromium), this can have a ~25% performance improvement on just listing the files to search (!). */ -use std::ffi::OsStr; use std::path::Path; /// Strip `prefix` from the `path` and return the remainder. @@ -19,6 +18,7 @@ pub fn strip_prefix<'a, P: AsRef + ?Sized>( prefix: &'a P, path: &'a Path, ) -> Option<&'a Path> { + use std::ffi::OsStr; use std::os::unix::ffi::OsStrExt; let prefix = prefix.as_ref().as_os_str().as_bytes(); @@ -40,79 +40,3 @@ pub fn strip_prefix<'a, P: AsRef + ?Sized>( ) -> Option<&'a Path> { path.strip_prefix(prefix).ok() } - -/// The final component of the path, if it is a normal file. -/// -/// If the path terminates in ., .., or consists solely of a root of prefix, -/// file_name will return None. -#[cfg(unix)] -pub fn file_name<'a, P: AsRef + ?Sized>( - path: &'a P, -) -> Option<&'a OsStr> { - use std::os::unix::ffi::OsStrExt; - use memchr::memrchr; - - let path = path.as_ref().as_os_str().as_bytes(); - if path.is_empty() { - return None; - } else if path.len() == 1 && path[0] == b'.' { - return None; - } else if path.last() == Some(&b'.') { - return None; - } else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] { - return None; - } - let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0); - Some(OsStr::from_bytes(&path[last_slash..])) -} - -/// The final component of the path, if it is a normal file. -/// -/// If the path terminates in ., .., or consists solely of a root of prefix, -/// file_name will return None. -#[cfg(not(unix))] -pub fn file_name<'a, P: AsRef + ?Sized>( - path: &'a P, -) -> Option<&'a OsStr> { - path.as_ref().file_name() -} - -/// Returns true if and only if this file path is considered to be hidden. -#[cfg(unix)] -pub fn is_hidden>(path: P) -> bool { - use std::os::unix::ffi::OsStrExt; - - if let Some(name) = file_name(path.as_ref()) { - name.as_bytes().get(0) == Some(&b'.') - } else { - false - } -} - -/// Returns true if and only if this file path is considered to be hidden. -#[cfg(not(unix))] -pub fn is_hidden>(path: P) -> bool { - if let Some(name) = file_name(path.as_ref()) { - name.to_str().map(|s| s.starts_with(".")).unwrap_or(false) - } else { - false - } -} - -/// Returns true if this file path is just a file name. i.e., Its parent is -/// the empty string. -#[cfg(unix)] -pub fn is_file_name>(path: P) -> bool { - use std::os::unix::ffi::OsStrExt; - use memchr::memchr; - - let path = path.as_ref().as_os_str().as_bytes(); - memchr(b'/', path).is_none() -} - -/// Returns true if this file path is just a file name. i.e., Its parent is -/// the empty string. -#[cfg(not(unix))] -pub fn is_file_name>(path: P) -> bool { - path.as_ref().parent().map(|p| p.as_os_str().is_empty()).unwrap_or(false) -} diff --git a/src/printer.rs b/src/printer.rs index 9a5c649d..e7373bce 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -5,7 +5,7 @@ use term::{Attr, Terminal}; use term::color; use pathutil::strip_prefix; -use types::FileTypeDef; +use ignore::types::FileTypeDef; /// Printer encapsulates all output logic for searching. /// @@ -168,11 +168,11 @@ impl Printer { self.write(def.name().as_bytes()); self.write(b": "); let mut first = true; - for pat in def.patterns() { + for glob in def.globs() { if !first { self.write(b", "); } - self.write(pat.as_bytes()); + self.write(glob.as_bytes()); first = false; } self.write_eol(); diff --git a/src/terminal.rs b/src/terminal.rs deleted file mode 100644 index e69de29b..00000000 diff --git a/src/types.rs b/src/types.rs deleted file mode 100644 index 7c33a48c..00000000 --- a/src/types.rs +++ /dev/null @@ -1,458 +0,0 @@ -/*! -The types module provides a way of associating glob patterns on file names to -file types. -*/ - -use std::collections::HashMap; -use std::error::Error as StdError; -use std::fmt; -use std::path::Path; - -use regex; - -use gitignore::{Match, Pattern}; -use globset::{self, GlobBuilder, GlobSet, GlobSetBuilder}; - -const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[ - ("asm", &["*.asm", "*.s", "*.S"]), - ("awk", &["*.awk"]), - ("c", &["*.c", "*.h", "*.H"]), - ("cbor", &["*.cbor"]), - ("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]), - ("cmake", &["*.cmake", "CMakeLists.txt"]), - ("coffeescript", &["*.coffee"]), - ("config", &["*.config"]), - ("cpp", &[ - "*.C", "*.cc", "*.cpp", "*.cxx", - "*.h", "*.H", "*.hh", "*.hpp", - ]), - ("csharp", &["*.cs"]), - ("css", &["*.css"]), - ("cython", &["*.pyx"]), - ("dart", &["*.dart"]), - ("d", &["*.d"]), - ("elisp", &["*.el"]), - ("erlang", &["*.erl", "*.hrl"]), - ("fortran", &[ - "*.f", "*.F", "*.f77", "*.F77", "*.pfo", - "*.f90", "*.F90", "*.f95", "*.F95", - ]), - ("fsharp", &["*.fs", "*.fsx", "*.fsi"]), - ("go", &["*.go"]), - ("groovy", &["*.groovy", "*.gradle"]), - ("hbs", &["*.hbs"]), - ("haskell", &["*.hs", "*.lhs"]), - ("html", &["*.htm", "*.html"]), - ("java", &["*.java"]), - ("jinja", &["*.jinja", "*.jinja2"]), - ("js", &[ - "*.js", "*.jsx", "*.vue", - ]), - ("json", &["*.json"]), - ("jsonl", &["*.jsonl"]), - ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]), - ("lua", &["*.lua"]), - ("m4", &["*.ac", "*.m4"]), - ("make", &["gnumakefile", "Gnumakefile", "makefile", "Makefile", "*.mk"]), - ("markdown", &["*.md"]), - ("md", &["*.md"]), - ("matlab", &["*.m"]), - ("mk", &["mkfile"]), - ("ml", &["*.ml"]), - ("nim", &["*.nim"]), - ("objc", &["*.h", "*.m"]), - ("objcpp", &["*.h", "*.mm"]), - ("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]), - ("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]), - ("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]), - ("py", &["*.py", "*.pyx"]), - ("readme", &["README*", "*README"]), - ("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]), - ("rst", &["*.rst"]), - ("ruby", &["*.rb"]), - ("rust", &["*.rs"]), - ("scala", &["*.scala"]), - ("sh", &["*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh"]), - ("spark", &["*.spark"]), - ("sql", &["*.sql"]), - ("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]), - ("swift", &["*.swift"]), - ("tcl", &["*.tcl"]), - ("tex", &["*.tex", "*.cls", "*.sty"]), - ("ts", &["*.ts", "*.tsx"]), - ("txt", &["*.txt"]), - ("toml", &["*.toml", "Cargo.lock"]), - ("vala", &["*.vala"]), - ("vb", &["*.vb"]), - ("vimscript", &["*.vim"]), - ("xml", &["*.xml"]), - ("yacc", &["*.y"]), - ("yaml", &["*.yaml", "*.yml"]), - ("zsh", &["*.zsh", ".zshenv", ".zlogin", ".zprofile", ".zshrc"]), -]; - -/// Describes all the possible failure conditions for building a file type -/// matcher. -#[derive(Debug)] -pub enum Error { - /// We tried to select (or negate) a file type that is not defined. - UnrecognizedFileType(String), - /// A user specified file type definition could not be parsed. - InvalidDefinition, - /// There was an error building the matcher (probably a bad glob). - Glob(globset::Error), - /// There was an error compiling a glob as a regex. - Regex(regex::Error), -} - -impl StdError for Error { - fn description(&self) -> &str { - match *self { - Error::UnrecognizedFileType(_) => "unrecognized file type", - Error::InvalidDefinition => "invalid definition", - Error::Glob(ref err) => err.description(), - Error::Regex(ref err) => err.description(), - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::UnrecognizedFileType(ref ty) => { - write!(f, "unrecognized file type: {}", ty) - } - Error::InvalidDefinition => { - write!(f, "invalid definition (format is type:glob, e.g., \ - html:*.html)") - } - Error::Glob(ref err) => err.fmt(f), - Error::Regex(ref err) => err.fmt(f), - } - } -} - -impl From for Error { - fn from(err: globset::Error) -> Error { - Error::Glob(err) - } -} - -impl From for Error { - fn from(err: regex::Error) -> Error { - Error::Regex(err) - } -} - -/// A single file type definition. -#[derive(Clone, Debug)] -pub struct FileTypeDef { - name: String, - pats: Vec, -} - -impl FileTypeDef { - /// Return the name of this file type. - pub fn name(&self) -> &str { - &self.name - } - - /// Return the glob patterns used to recognize this file type. - pub fn patterns(&self) -> &[String] { - &self.pats - } -} - -/// Types is a file type matcher. -#[derive(Clone, Debug)] -pub struct Types { - defs: Vec, - selected: Option, - negated: Option, - has_selected: bool, - unmatched_pat: Pattern, -} - -impl Types { - /// Creates a new file type matcher from the given Gitignore matcher. If - /// not Gitignore matcher is provided, then the file type matcher has no - /// effect. - /// - /// If has_selected is true, then at least one file type was selected. - /// Therefore, any non-matches should be ignored. - fn new( - selected: Option, - negated: Option, - has_selected: bool, - defs: Vec, - ) -> Types { - Types { - defs: defs, - selected: selected, - negated: negated, - has_selected: has_selected, - unmatched_pat: Pattern { - from: Path::new("").to_path_buf(), - original: "".to_string(), - pat: "".to_string(), - whitelist: false, - only_dir: false, - }, - } - } - - /// Creates a new file type matcher that never matches. - pub fn empty() -> Types { - Types::new(None, None, false, vec![]) - } - - /// Returns a match for the given path against this file type matcher. - /// - /// The path is considered whitelisted if it matches a selected file type. - /// The path is considered ignored if it matched a negated file type. - /// If at least one file type is selected and path doesn't match, then - /// the path is also considered ignored. - pub fn matched>(&self, path: P, is_dir: bool) -> Match { - // If we don't have any matcher, then we can't do anything. - if self.negated.is_none() && self.selected.is_none() { - return Match::None; - } - // File types don't apply to directories. - if is_dir { - return Match::None; - } - let path = path.as_ref(); - let name = match path.file_name() { - Some(name) => name.to_string_lossy(), - None if self.has_selected => { - return Match::Ignored(&self.unmatched_pat); - } - None => { - return Match::None; - } - }; - if self.negated.as_ref().map(|s| s.is_match(&*name)).unwrap_or(false) { - return Match::Ignored(&self.unmatched_pat); - } - if self.selected.as_ref().map(|s|s.is_match(&*name)).unwrap_or(false) { - return Match::Whitelist(&self.unmatched_pat); - } - if self.has_selected { - Match::Ignored(&self.unmatched_pat) - } else { - Match::None - } - } - - /// Return the set of current file type definitions. - pub fn definitions(&self) -> &[FileTypeDef] { - &self.defs - } -} - -/// TypesBuilder builds a type matcher from a set of file type definitions and -/// a set of file type selections. -pub struct TypesBuilder { - types: HashMap>, - selected: Vec, - negated: Vec, -} - -impl TypesBuilder { - /// Create a new builder for a file type matcher. - pub fn new() -> TypesBuilder { - TypesBuilder { - types: HashMap::new(), - selected: vec![], - negated: vec![], - } - } - - /// Build the current set of file type definitions *and* selections into - /// a file type matcher. - pub fn build(&self) -> Result { - let selected_globs = - if self.selected.is_empty() { - None - } else { - let mut bset = GlobSetBuilder::new(); - for name in &self.selected { - let globs = match self.types.get(name) { - Some(globs) => globs, - None => { - let msg = name.to_string(); - return Err(Error::UnrecognizedFileType(msg)); - } - }; - for glob in globs { - let pat = try!( - GlobBuilder::new(glob) - .literal_separator(true).build()); - bset.add(pat); - } - } - Some(try!(bset.build())) - }; - let negated_globs = - if self.negated.is_empty() { - None - } else { - let mut bset = GlobSetBuilder::new(); - for name in &self.negated { - let globs = match self.types.get(name) { - Some(globs) => globs, - None => { - let msg = name.to_string(); - return Err(Error::UnrecognizedFileType(msg)); - } - }; - for glob in globs { - let pat = try!( - GlobBuilder::new(glob) - .literal_separator(true).build()); - bset.add(pat); - } - } - Some(try!(bset.build())) - }; - Ok(Types::new( - selected_globs, - negated_globs, - !self.selected.is_empty(), - self.definitions(), - )) - } - - /// Return the set of current file type definitions. - pub fn definitions(&self) -> Vec { - let mut defs = vec![]; - for (ref name, ref pats) in &self.types { - let mut pats = pats.to_vec(); - pats.sort(); - defs.push(FileTypeDef { - name: name.to_string(), - pats: pats, - }); - } - defs.sort_by(|def1, def2| def1.name().cmp(def2.name())); - defs - } - - /// Select the file type given by `name`. - /// - /// If `name` is `all`, then all file types are selected. - pub fn select(&mut self, name: &str) -> &mut TypesBuilder { - if name == "all" { - for name in self.types.keys() { - self.selected.push(name.to_string()); - } - } else { - self.selected.push(name.to_string()); - } - self - } - - /// Ignore the file type given by `name`. - /// - /// If `name` is `all`, then all file types are negated. - pub fn negate(&mut self, name: &str) -> &mut TypesBuilder { - if name == "all" { - for name in self.types.keys() { - self.negated.push(name.to_string()); - } - } else { - self.negated.push(name.to_string()); - } - self - } - - /// Clear any file type definitions for the type given. - pub fn clear(&mut self, name: &str) -> &mut TypesBuilder { - self.types.remove(name); - self - } - - /// Add a new file type definition. `name` can be arbitrary and `pat` - /// should be a glob recognizing file paths belonging to the `name` type. - pub fn add(&mut self, name: &str, pat: &str) -> &mut TypesBuilder { - self.types.entry(name.to_string()) - .or_insert(vec![]).push(pat.to_string()); - self - } - - /// Add a new file type definition specified in string form. The format - /// is `name:glob`. Names may not include a colon. - pub fn add_def(&mut self, def: &str) -> Result<(), Error> { - let name: String = def.chars().take_while(|&c| c != ':').collect(); - let pat: String = def.chars().skip(name.chars().count() + 1).collect(); - if name.is_empty() || pat.is_empty() { - return Err(Error::InvalidDefinition); - } - self.add(&name, &pat); - Ok(()) - } - - /// Add a set of default file type definitions. - pub fn add_defaults(&mut self) -> &mut TypesBuilder { - for &(name, exts) in TYPE_EXTENSIONS { - for ext in exts { - self.add(name, ext); - } - } - self - } -} - -#[cfg(test)] -mod tests { - use super::TypesBuilder; - - macro_rules! matched { - ($name:ident, $types:expr, $sel:expr, $selnot:expr, - $path:expr) => { - matched!($name, $types, $sel, $selnot, $path, true); - }; - (not, $name:ident, $types:expr, $sel:expr, $selnot:expr, - $path:expr) => { - matched!($name, $types, $sel, $selnot, $path, false); - }; - ($name:ident, $types:expr, $sel:expr, $selnot:expr, - $path:expr, $matched:expr) => { - #[test] - fn $name() { - let mut btypes = TypesBuilder::new(); - for tydef in $types { - btypes.add_def(tydef).unwrap(); - } - for sel in $sel { - btypes.select(sel); - } - for selnot in $selnot { - btypes.negate(selnot); - } - let types = btypes.build().unwrap(); - let mat = types.matched($path, false); - assert_eq!($matched, !mat.is_ignored()); - } - }; - } - - fn types() -> Vec<&'static str> { - vec![ - "html:*.html", - "html:*.htm", - "rust:*.rs", - "js:*.js", - ] - } - - matched!(match1, types(), vec!["rust"], vec![], "lib.rs"); - matched!(match2, types(), vec!["html"], vec![], "index.html"); - matched!(match3, types(), vec!["html"], vec![], "index.htm"); - matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs"); - matched!(match5, types(), vec![], vec![], "index.html"); - matched!(match6, types(), vec![], vec!["rust"], "index.html"); - - matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html"); - matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs"); -} diff --git a/src/walk.rs b/src/walk.rs deleted file mode 100644 index f661c4cf..00000000 --- a/src/walk.rs +++ /dev/null @@ -1,140 +0,0 @@ -/*! -The walk module implements a recursive directory iterator (using the `walkdir`) -crate that can efficiently skip and ignore files and directories specified in -a user's ignore patterns. -*/ - -use walkdir::{self, DirEntry, WalkDir, WalkDirIterator}; - -use ignore::Ignore; - -/// Iter is a recursive directory iterator over file paths in a directory. -/// Only file paths should be searched are yielded. -pub struct Iter { - ig: Ignore, - it: WalkEventIter, -} - -impl Iter { - /// Create a new recursive directory iterator using the ignore patterns - /// and walkdir iterator given. - pub fn new(ig: Ignore, wd: WalkDir) -> Iter { - Iter { - ig: ig, - it: WalkEventIter::from(wd), - } - } - - /// Returns true if this entry should be skipped. - #[inline(always)] - fn skip_entry(&self, ent: &DirEntry) -> bool { - if ent.depth() == 0 { - // Never skip the root directory. - return false; - } - if self.ig.ignored(ent.path(), ent.file_type().is_dir()) { - return true; - } - false - } -} - -impl Iterator for Iter { - type Item = DirEntry; - - #[inline(always)] - fn next(&mut self) -> Option { - while let Some(ev) = self.it.next() { - match ev { - Err(err) => { - eprintln!("{}", err); - } - Ok(WalkEvent::Exit) => { - self.ig.pop(); - } - Ok(WalkEvent::Dir(ent)) => { - if self.skip_entry(&ent) { - self.it.it.skip_current_dir(); - // Still need to push this on the stack because we'll - // get a WalkEvent::Exit event for this dir. We don't - // care if it errors though. - let _ = self.ig.push(ent.path()); - continue; - } - if let Err(err) = self.ig.push(ent.path()) { - eprintln!("{}", err); - self.it.it.skip_current_dir(); - continue; - } - } - Ok(WalkEvent::File(ent)) => { - if self.skip_entry(&ent) { - continue; - } - // If this isn't actually a file (e.g., a symlink), then - // skip it. - if !ent.file_type().is_file() { - continue; - } - return Some(ent); - } - } - } - None - } -} - -/// WalkEventIter transforms a WalkDir iterator into an iterator that more -/// accurately describes the directory tree. Namely, it emits events that are -/// one of three types: directory, file or "exit." An "exit" event means that -/// the entire contents of a directory have been enumerated. -struct WalkEventIter { - depth: usize, - it: walkdir::Iter, - next: Option>, -} - -#[derive(Debug)] -enum WalkEvent { - Dir(DirEntry), - File(DirEntry), - Exit, -} - -impl From for WalkEventIter { - fn from(it: WalkDir) -> WalkEventIter { - WalkEventIter { depth: 0, it: it.into_iter(), next: None } - } -} - -impl Iterator for WalkEventIter { - type Item = walkdir::Result; - - #[inline(always)] - fn next(&mut self) -> Option> { - let dent = self.next.take().or_else(|| self.it.next()); - let depth = match dent { - None => 0, - Some(Ok(ref dent)) => dent.depth(), - Some(Err(ref err)) => err.depth(), - }; - if depth < self.depth { - self.depth -= 1; - self.next = dent; - return Some(Ok(WalkEvent::Exit)); - } - self.depth = depth; - match dent { - None => None, - Some(Err(err)) => Some(Err(err)), - Some(Ok(dent)) => { - if dent.file_type().is_dir() { - self.depth += 1; - Some(Ok(WalkEvent::Dir(dent))) - } else { - Some(Ok(WalkEvent::File(dent))) - } - } - } - } -} diff --git a/tests/tests.rs b/tests/tests.rs index a559045c..795c0996 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -54,6 +54,20 @@ fn path(unix: &str) -> String { } } +fn paths(unix: &[&str]) -> Vec { + let mut xs: Vec<_> = unix.iter().map(|s| path(s)).collect(); + xs.sort(); + xs +} + +fn paths_from_stdout(stdout: String) -> Vec { + let mut paths: Vec<_> = stdout.lines().map(|s| { + s.split(":").next().unwrap().to_string() + }).collect(); + paths.sort(); + paths +} + fn sort_lines(lines: &str) -> String { let mut lines: Vec = lines.trim().lines().map(|s| s.to_owned()).collect(); @@ -864,6 +878,74 @@ be, to a very large extent, the result of luck. Sherlock Holmes assert_eq!(lines, expected); }); +// See: https://github.com/BurntSushi/ripgrep/issues/45 +sherlock!(feature_45_relative_cwd, "test", ".", +|wd: WorkDir, mut cmd: Command| { + wd.create(".not-an-ignore", "foo\n/bar"); + wd.create_dir("bar"); + wd.create_dir("baz/bar"); + wd.create_dir("baz/baz/bar"); + wd.create("bar/test", "test"); + wd.create("baz/bar/test", "test"); + wd.create("baz/baz/bar/test", "test"); + wd.create("baz/foo", "test"); + wd.create("baz/test", "test"); + wd.create("foo", "test"); + wd.create("test", "test"); + + // First, get a baseline without applying ignore rules. + let lines = paths_from_stdout(wd.stdout(&mut cmd)); + assert_eq!(lines, paths(&[ + "bar/test", "baz/bar/test", "baz/baz/bar/test", "baz/foo", + "baz/test", "foo", "test", + ])); + + // Now try again with the ignore file activated. + cmd.arg("--ignore-file").arg(".not-an-ignore"); + let lines = paths_from_stdout(wd.stdout(&mut cmd)); + assert_eq!(lines, paths(&[ + "baz/bar/test", "baz/baz/bar/test", "baz/test", "test", + ])); + + // Now do it again, but inside the baz directory. + // Since the ignore file is interpreted relative to the CWD, this will + // cause the /bar anchored pattern to filter out baz/bar, which is a + // subtle difference between true parent ignore files and manually + // specified ignore files. + let mut cmd = wd.command(); + cmd.arg("test").arg(".").arg("--ignore-file").arg("../.not-an-ignore"); + cmd.current_dir(wd.path().join("baz")); + let lines = paths_from_stdout(wd.stdout(&mut cmd)); + assert_eq!(lines, paths(&["baz/bar/test", "test"])); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/45 +sherlock!(feature_45_precedence_with_others, "test", ".", +|wd: WorkDir, mut cmd: Command| { + wd.create(".not-an-ignore", "*.log"); + wd.create(".ignore", "!imp.log"); + wd.create("imp.log", "test"); + wd.create("wat.log", "test"); + + cmd.arg("--ignore-file").arg(".not-an-ignore"); + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines, "imp.log:test\n"); +}); + +// See: https://github.com/BurntSushi/ripgrep/issues/45 +sherlock!(feature_45_precedence_internal, "test", ".", +|wd: WorkDir, mut cmd: Command| { + wd.create(".not-an-ignore1", "*.log"); + wd.create(".not-an-ignore2", "!imp.log"); + wd.create("imp.log", "test"); + wd.create("wat.log", "test"); + + cmd.arg("--ignore-file").arg(".not-an-ignore1"); + cmd.arg("--ignore-file").arg(".not-an-ignore2"); + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines, "imp.log:test\n"); +}); + // See: https://github.com/BurntSushi/ripgrep/issues/68 clean!(feature_68_no_ignore_vcs, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create(".gitignore", "foo");