mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-11 14:30:24 +02:00
Merge pull request #202 from BurntSushi/ignore
Move all gitignore matching to separate crate.
This commit is contained in:
57
Cargo.lock
generated
57
Cargo.lock
generated
@ -5,20 +5,18 @@ dependencies = [
|
||||
"deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"globset 0.1.0",
|
||||
"grep 0.1.3",
|
||||
"ignore 0.1.0",
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
@ -44,7 +42,7 @@ version = "0.6.86"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
@ -55,7 +53,7 @@ version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -65,7 +63,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "fs2"
|
||||
version = "0.2.5"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -82,7 +80,7 @@ dependencies = [
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -91,9 +89,22 @@ version = "0.1.3"
|
||||
dependencies = [
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ignore"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"globset 0.1.0",
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"walkdir 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -130,10 +141,10 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "memmap"
|
||||
version = "0.2.3"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fs2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -157,12 +168,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "0.1.77"
|
||||
version = "0.1.80"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -170,7 +181,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.3.7"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
@ -221,7 +232,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "0.1.8"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -244,17 +255,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
"checksum docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)" = "4a7ef30445607f6fc8720f0a0a2c7442284b629cf0d049286860fae23e71c4d9"
|
||||
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
|
||||
"checksum fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc484842f1e2884faf56f529f960cc12ad8c71ce96cc7abba0a067c98fee344"
|
||||
"checksum fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "bcd414e5a1a979b931bb92f41b7a54106d3f6d2e6c253e9ce943b7cd468251ef"
|
||||
"checksum fs2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "640001e1bd865c7c32806292822445af576a6866175b5225aa2087ca5e3de551"
|
||||
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
|
||||
"checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f"
|
||||
"checksum libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "044d1360593a78f5c8e5e710beccdc24ab71d1f01bc19a29bcacdba22e8475d8"
|
||||
"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
|
||||
"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
|
||||
"checksum memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f20f72ed93291a72e22e8b16bb18762183bb4943f0f483da5b8be1a9e8192752"
|
||||
"checksum memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "065ce59af31c18ea2c419100bda6247dd4ec3099423202b12f0bd32e529fabd2"
|
||||
"checksum num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8890e6084723d57d0df8d2720b0d60c6ee67d6c93e7169630e4371e88765dcad"
|
||||
"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"
|
||||
"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665"
|
||||
"checksum regex-syntax 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "48f0573bcee95a48da786f8823465b5f2a1fae288a55407aca991e5b3e0eae11"
|
||||
"checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f"
|
||||
"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957"
|
||||
"checksum rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "6159e4e6e559c81bd706afe9c8fd68f547d3e851ce12e76b1de7914bab61691b"
|
||||
"checksum simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "63b5847c2d766ca7ce7227672850955802fabd779ba616aeabead4c2c3877023"
|
||||
"checksum strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "50c069df92e4b01425a8bf3576d5d417943a6a7272fbabaf5bd80b1aaa76442e"
|
||||
@ -262,6 +273,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
|
||||
"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
|
||||
"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
|
||||
"checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780"
|
||||
"checksum walkdir 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "98da26f00240118fbb7a06fa29579d1b39d34cd6e0505ea5c125b26d5260a967"
|
||||
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
|
||||
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
|
||||
|
@ -27,19 +27,17 @@ path = "tests/tests.rs"
|
||||
deque = "0.3"
|
||||
docopt = "0.6"
|
||||
env_logger = "0.3"
|
||||
globset = { version = "0.1.0", path = "globset" }
|
||||
grep = { version = "0.1.3", path = "grep" }
|
||||
ignore = { version = "0.1.0", path = "ignore" }
|
||||
lazy_static = "0.2"
|
||||
libc = "0.2"
|
||||
log = "0.3"
|
||||
memchr = "0.1"
|
||||
memmap = "0.2"
|
||||
memmap = "0.5"
|
||||
num_cpus = "1"
|
||||
regex = "0.1.77"
|
||||
rustc-serialize = "0.3"
|
||||
term = "0.4"
|
||||
thread_local = "0.2.7"
|
||||
walkdir = "0.1"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
kernel32-sys = "0.2"
|
||||
|
@ -30,6 +30,7 @@ test_script:
|
||||
- cargo test --verbose
|
||||
- cargo test --verbose --manifest-path grep/Cargo.toml
|
||||
- cargo test --verbose --manifest-path globset/Cargo.toml
|
||||
- cargo test --verbose --manifest-path ignore/Cargo.toml
|
||||
|
||||
before_deploy:
|
||||
# Generate artifacts for release
|
||||
@ -59,7 +60,8 @@ deploy:
|
||||
|
||||
branches:
|
||||
only:
|
||||
- appveyor
|
||||
- /\d+\.\d+\.\d+/
|
||||
except:
|
||||
- master
|
||||
# - appveyor
|
||||
# - /\d+\.\d+\.\d+/
|
||||
# except:
|
||||
# - master
|
||||
|
@ -23,6 +23,8 @@ run_test_suite() {
|
||||
cargo test --target $TARGET --verbose --manifest-path grep/Cargo.toml
|
||||
cargo build --target $TARGET --verbose --manifest-path globset/Cargo.toml
|
||||
cargo test --target $TARGET --verbose --manifest-path globset/Cargo.toml
|
||||
cargo build --target $TARGET --verbose --manifest-path ignore/Cargo.toml
|
||||
cargo test --target $TARGET --verbose --manifest-path ignore/Cargo.toml
|
||||
|
||||
# sanity check the file type
|
||||
file target/$TARGET/debug/rg
|
||||
|
@ -28,3 +28,6 @@ regex = "0.1.77"
|
||||
|
||||
[dev-dependencies]
|
||||
glob = "0.2"
|
||||
|
||||
[features]
|
||||
simd-accel = ["regex/simd-accel"]
|
||||
|
@ -11,6 +11,9 @@ extern crate lazy_static;
|
||||
extern crate regex;
|
||||
extern crate test;
|
||||
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
use globset::{Candidate, Glob, GlobMatcher, GlobSet, GlobSetBuilder};
|
||||
|
||||
const EXT: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";
|
||||
|
@ -226,10 +226,21 @@ type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
|
||||
/// single pass.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct GlobSet {
|
||||
len: usize,
|
||||
strats: Vec<GlobSetMatchStrategy>,
|
||||
}
|
||||
|
||||
impl GlobSet {
|
||||
/// Returns true if this set is empty, and therefore matches nothing.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len == 0
|
||||
}
|
||||
|
||||
/// Returns the number of globs in this set.
|
||||
pub fn len(&self) -> usize {
|
||||
self.len
|
||||
}
|
||||
|
||||
/// Returns true if any glob in this set matches the path given.
|
||||
pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
|
||||
self.is_match_candidate(&Candidate::new(path.as_ref()))
|
||||
@ -240,6 +251,9 @@ impl GlobSet {
|
||||
/// This takes a Candidate as input, which can be used to amortize the
|
||||
/// cost of preparing a path for matching.
|
||||
pub fn is_match_candidate(&self, path: &Candidate) -> bool {
|
||||
if self.is_empty() {
|
||||
return false;
|
||||
}
|
||||
for strat in &self.strats {
|
||||
if strat.is_match(path) {
|
||||
return true;
|
||||
@ -250,9 +264,6 @@ impl GlobSet {
|
||||
|
||||
/// Returns the sequence number of every glob pattern that matches the
|
||||
/// given path.
|
||||
///
|
||||
/// This takes a Candidate as input, which can be used to amortize the
|
||||
/// cost of preparing a path for matching.
|
||||
pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
|
||||
self.matches_candidate(&Candidate::new(path.as_ref()))
|
||||
}
|
||||
@ -264,6 +275,9 @@ impl GlobSet {
|
||||
/// cost of preparing a path for matching.
|
||||
pub fn matches_candidate(&self, path: &Candidate) -> Vec<usize> {
|
||||
let mut into = vec![];
|
||||
if self.is_empty() {
|
||||
return into;
|
||||
}
|
||||
self.matches_candidate_into(path, &mut into);
|
||||
into
|
||||
}
|
||||
@ -274,12 +288,32 @@ impl GlobSet {
|
||||
/// `into` is is cleared before matching begins, and contains the set of
|
||||
/// sequence numbers (in ascending order) after matching ends. If no globs
|
||||
/// were matched, then `into` will be empty.
|
||||
pub fn matches_into<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
into: &mut Vec<usize>,
|
||||
) {
|
||||
self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
|
||||
}
|
||||
|
||||
/// Adds the sequence number of every glob pattern that matches the given
|
||||
/// path to the vec given.
|
||||
///
|
||||
/// `into` is is cleared before matching begins, and contains the set of
|
||||
/// sequence numbers (in ascending order) after matching ends. If no globs
|
||||
/// were matched, then `into` will be empty.
|
||||
///
|
||||
/// This takes a Candidate as input, which can be used to amortize the
|
||||
/// cost of preparing a path for matching.
|
||||
pub fn matches_candidate_into(
|
||||
&self,
|
||||
path: &Candidate,
|
||||
into: &mut Vec<usize>,
|
||||
) {
|
||||
into.clear();
|
||||
if self.is_empty() {
|
||||
return;
|
||||
}
|
||||
for strat in &self.strats {
|
||||
strat.matches_into(path, into);
|
||||
}
|
||||
@ -288,6 +322,9 @@ impl GlobSet {
|
||||
}
|
||||
|
||||
fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
|
||||
if pats.is_empty() {
|
||||
return Ok(GlobSet { len: 0, strats: vec![] });
|
||||
}
|
||||
let mut lits = LiteralStrategy::new();
|
||||
let mut base_lits = BasenameLiteralStrategy::new();
|
||||
let mut exts = ExtensionStrategy::new();
|
||||
@ -330,6 +367,7 @@ impl GlobSet {
|
||||
prefixes.literals.len(), suffixes.literals.len(),
|
||||
required_exts.0.len(), regexes.literals.len());
|
||||
Ok(GlobSet {
|
||||
len: pats.len(),
|
||||
strats: vec![
|
||||
GlobSetMatchStrategy::Extension(exts),
|
||||
GlobSetMatchStrategy::BasenameLiteral(base_lits),
|
||||
@ -750,4 +788,11 @@ mod tests {
|
||||
assert_eq!(0, matches[0]);
|
||||
assert_eq!(2, matches[1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_set_works() {
|
||||
let set = GlobSetBuilder::new().build().unwrap();
|
||||
assert!(!set.is_match(""));
|
||||
assert!(!set.is_match("a"));
|
||||
}
|
||||
}
|
||||
|
@ -89,16 +89,14 @@ pub fn path_bytes(path: &Path) -> Cow<[u8]> {
|
||||
os_str_bytes(path.as_os_str())
|
||||
}
|
||||
|
||||
/// Return the raw bytes of the given OS string, transcoded to UTF-8 if
|
||||
/// necessary.
|
||||
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
|
||||
#[cfg(unix)]
|
||||
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
Cow::Borrowed(s.as_bytes())
|
||||
}
|
||||
|
||||
/// Return the raw bytes of the given OS string, transcoded to UTF-8 if
|
||||
/// necessary.
|
||||
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
|
||||
#[cfg(not(unix))]
|
||||
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
|
||||
// TODO(burntsushi): On Windows, OS strings are WTF-8, which is a superset
|
||||
|
@ -15,6 +15,6 @@ license = "Unlicense/MIT"
|
||||
[dependencies]
|
||||
log = "0.3"
|
||||
memchr = "0.1"
|
||||
memmap = "0.2"
|
||||
memmap = "0.5"
|
||||
regex = "0.1.77"
|
||||
regex-syntax = "0.3.5"
|
||||
|
170
ignore/Cargo.lock
generated
Normal file
170
ignore/Cargo.lock
generated
Normal file
@ -0,0 +1,170 @@
|
||||
[root]
|
||||
name = "ignore"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"globset 0.1.0",
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"walkdir 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "globset"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "kernel32-sys"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "0.1.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "0.1.77"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "simd"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "tempdir"
|
||||
version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread-id"
|
||||
version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "utf8-ranges"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-build"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[metadata]
|
||||
"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
|
||||
"checksum fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc484842f1e2884faf56f529f960cc12ad8c71ce96cc7abba0a067c98fee344"
|
||||
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
|
||||
"checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f"
|
||||
"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d"
|
||||
"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
|
||||
"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
|
||||
"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"
|
||||
"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665"
|
||||
"checksum regex-syntax 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "48f0573bcee95a48da786f8823465b5f2a1fae288a55407aca991e5b3e0eae11"
|
||||
"checksum simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "63b5847c2d766ca7ce7227672850955802fabd779ba616aeabead4c2c3877023"
|
||||
"checksum tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "87974a6f5c1dfb344d733055601650059a3363de2a6104819293baff662132d6"
|
||||
"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
|
||||
"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
|
||||
"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
|
||||
"checksum walkdir 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "98da26f00240118fbb7a06fa29579d1b39d34cd6e0505ea5c125b26d5260a967"
|
||||
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
|
||||
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
|
36
ignore/Cargo.toml
Normal file
36
ignore/Cargo.toml
Normal file
@ -0,0 +1,36 @@
|
||||
[package]
|
||||
name = "ignore"
|
||||
version = "0.1.0" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
A fast library for efficiently matching ignore files such as `.gitignore`
|
||||
against file paths.
|
||||
"""
|
||||
documentation = "https://docs.rs/ignore"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/ignore"
|
||||
repository = "https://github.com/BurntSushi/ripgrep/tree/master/ignore"
|
||||
readme = "README.md"
|
||||
keywords = ["glob", "ignore", "gitignore", "pattern", "file"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[lib]
|
||||
name = "ignore"
|
||||
bench = false
|
||||
|
||||
[dependencies]
|
||||
globset = { version = "0.1.0", path = "../globset" }
|
||||
lazy_static = "0.2"
|
||||
log = "0.3"
|
||||
memchr = "0.1"
|
||||
regex = "0.1.77"
|
||||
thread_local = "0.2.7"
|
||||
walkdir = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
tempdir = "0.3.5"
|
||||
|
||||
[features]
|
||||
simd-accel = ["globset/simd-accel"]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
66
ignore/README.md
Normal file
66
ignore/README.md
Normal file
@ -0,0 +1,66 @@
|
||||
ignore
|
||||
======
|
||||
The ignore crate provides a fast recursive directory iterator that respects
|
||||
various filters such as globs, file types and `.gitignore` files. This crate
|
||||
also provides lower level direct access to gitignore and file type matchers.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/ignore)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/ignore](https://docs.rs/ignore)
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
ignore = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate ignore;
|
||||
```
|
||||
|
||||
### Example
|
||||
|
||||
This example shows the most basic usage of this crate. This code will
|
||||
recursively traverse the current directory while automatically filtering out
|
||||
files and directories according to ignore globs found in files like
|
||||
`.ignore` and `.gitignore`:
|
||||
|
||||
|
||||
```rust,no_run
|
||||
use ignore::Walk;
|
||||
|
||||
for result in Walk::new("./") {
|
||||
// Each item yielded by the iterator is either a directory entry or an
|
||||
// error, so either print the path or the error.
|
||||
match result {
|
||||
Ok(entry) => println!("{}", entry.path().display()),
|
||||
Err(err) => println!("ERROR: {}", err),
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Example: advanced
|
||||
|
||||
By default, the recursive directory iterator will ignore hidden files and
|
||||
directories. This can be disabled by building the iterator with `WalkBuilder`:
|
||||
|
||||
```rust,no_run
|
||||
use ignore::WalkBuilder;
|
||||
|
||||
for result in WalkBuilder::new("./").hidden(false).build() {
|
||||
println!("{:?}", result);
|
||||
}
|
||||
```
|
||||
|
||||
See the documentation for `WalkBuilder` for many other options.
|
28
ignore/examples/walk.rs
Normal file
28
ignore/examples/walk.rs
Normal file
@ -0,0 +1,28 @@
|
||||
/*
|
||||
extern crate ignore;
|
||||
extern crate walkdir;
|
||||
|
||||
use std::env;
|
||||
use std::io::{self, Write};
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
use ignore::ignore::IgnoreBuilder;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
fn main() {
|
||||
let path = env::args().nth(1).unwrap();
|
||||
let ig = IgnoreBuilder::new().build();
|
||||
let wd = WalkDir::new(path);
|
||||
let walker = ignore::walk::Iter::new(ig, wd);
|
||||
|
||||
let mut stdout = io::BufWriter::new(io::stdout());
|
||||
// let mut count = 0;
|
||||
for dirent in walker {
|
||||
// count += 1;
|
||||
stdout.write(dirent.path().as_os_str().as_bytes()).unwrap();
|
||||
stdout.write(b"\n").unwrap();
|
||||
}
|
||||
// println!("{}", count);
|
||||
}
|
||||
*/
|
||||
fn main() {}
|
803
ignore/src/dir.rs
Normal file
803
ignore/src/dir.rs
Normal file
@ -0,0 +1,803 @@
|
||||
// This module provides a data structure, `Ignore`, that connects "directory
|
||||
// traversal" with "ignore matchers." Specifically, it knows about gitignore
|
||||
// semantics and precedence, and is organized based on directory hierarchy.
|
||||
// Namely, every matcher logically corresponds to ignore rules from a single
|
||||
// directory, and points to the matcher for its corresponding parent directory.
|
||||
// In this sense, `Ignore` is a *persistent* data structure.
|
||||
//
|
||||
// This design was specifically chosen to make it possible to use this data
|
||||
// structure in a parallel directory iterator.
|
||||
//
|
||||
// My initial intention was to expose this module as part of this crate's
|
||||
// public API, but I think the data structure's public API is too complicated
|
||||
// with non-obvious failure modes. Alas, such things haven't been documented
|
||||
// well.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsString;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use pathutil::{is_hidden, strip_prefix};
|
||||
use overrides::{self, Override};
|
||||
use types::{self, Types};
|
||||
use {Error, Match, PartialErrorBuilder};
|
||||
|
||||
/// IgnoreMatch represents information about where a match came from when using
|
||||
/// the `Ignore` matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IgnoreMatch<'a>(IgnoreMatchInner<'a>);
|
||||
|
||||
/// IgnoreMatchInner describes precisely where the match information came from.
|
||||
/// This is private to allow expansion to more matchers in the future.
|
||||
#[derive(Clone, Debug)]
|
||||
enum IgnoreMatchInner<'a> {
|
||||
Override(overrides::Glob<'a>),
|
||||
Gitignore(&'a gitignore::Glob),
|
||||
Types(types::Glob<'a>),
|
||||
Hidden,
|
||||
}
|
||||
|
||||
impl<'a> IgnoreMatch<'a> {
|
||||
fn overrides(x: overrides::Glob<'a>) -> IgnoreMatch<'a> {
|
||||
IgnoreMatch(IgnoreMatchInner::Override(x))
|
||||
}
|
||||
|
||||
fn gitignore(x: &'a gitignore::Glob) -> IgnoreMatch<'a> {
|
||||
IgnoreMatch(IgnoreMatchInner::Gitignore(x))
|
||||
}
|
||||
|
||||
fn types(x: types::Glob<'a>) -> IgnoreMatch<'a> {
|
||||
IgnoreMatch(IgnoreMatchInner::Types(x))
|
||||
}
|
||||
|
||||
fn hidden() -> IgnoreMatch<'static> {
|
||||
IgnoreMatch(IgnoreMatchInner::Hidden)
|
||||
}
|
||||
}
|
||||
|
||||
/// Options for the ignore matcher, shared between the matcher itself and the
|
||||
/// builder.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct IgnoreOptions {
|
||||
/// Whether to ignore hidden file paths or not.
|
||||
hidden: bool,
|
||||
/// Whether to read .ignore files.
|
||||
ignore: bool,
|
||||
/// Whether to read git's global gitignore file.
|
||||
git_global: bool,
|
||||
/// Whether to read .gitignore files.
|
||||
git_ignore: bool,
|
||||
/// Whether to read .git/info/exclude files.
|
||||
git_exclude: bool,
|
||||
}
|
||||
|
||||
impl IgnoreOptions {
|
||||
/// Returns true if at least one type of ignore rules should be matched.
|
||||
fn should_ignores(&self) -> bool {
|
||||
self.ignore || self.git_global || self.git_ignore || self.git_exclude
|
||||
}
|
||||
}
|
||||
|
||||
/// Ignore is a matcher useful for recursively walking one or more directories.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Ignore(Arc<IgnoreInner>);
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct IgnoreInner {
|
||||
/// A map of all existing directories that have already been
|
||||
/// compiled into matchers.
|
||||
///
|
||||
/// Note that this is never used during matching, only when adding new
|
||||
/// parent directory matchers. This avoids needing to rebuild glob sets for
|
||||
/// parent directories if many paths are being searched.
|
||||
compiled: Arc<RwLock<HashMap<OsString, Ignore>>>,
|
||||
/// The path to the directory that this matcher was built from.
|
||||
dir: PathBuf,
|
||||
/// An override matcher (default is empty).
|
||||
overrides: Arc<Override>,
|
||||
/// A file type matcher.
|
||||
types: Arc<Types>,
|
||||
/// The parent directory to match next.
|
||||
///
|
||||
/// If this is the root directory or there are otherwise no more
|
||||
/// directories to match, then `parent` is `None`.
|
||||
parent: Option<Ignore>,
|
||||
/// Whether this is an absolute parent matcher, as added by add_parent.
|
||||
is_absolute_parent: bool,
|
||||
/// The absolute base path of this matcher. Populated only if parent
|
||||
/// directories are added.
|
||||
absolute_base: Option<Arc<PathBuf>>,
|
||||
/// Explicit ignore matchers specified by the caller.
|
||||
explicit_ignores: Arc<Vec<Gitignore>>,
|
||||
/// The matcher for .ignore files.
|
||||
ignore_matcher: Gitignore,
|
||||
/// A global gitignore matcher, usually from $XDG_CONFIG_HOME/git/ignore.
|
||||
git_global_matcher: Arc<Gitignore>,
|
||||
/// The matcher for .gitignore files.
|
||||
git_ignore_matcher: Gitignore,
|
||||
/// Special matcher for `.git/info/exclude` files.
|
||||
git_exclude_matcher: Gitignore,
|
||||
/// Whether this directory contains a .git sub-directory.
|
||||
has_git: bool,
|
||||
/// Ignore config.
|
||||
opts: IgnoreOptions,
|
||||
}
|
||||
|
||||
impl Ignore {
|
||||
/// Return the directory path of this matcher.
|
||||
#[allow(dead_code)]
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.0.dir
|
||||
}
|
||||
|
||||
/// Return true if this matcher has no parent.
|
||||
pub fn is_root(&self) -> bool {
|
||||
self.0.parent.is_none()
|
||||
}
|
||||
|
||||
/// Return this matcher's parent, if one exists.
|
||||
pub fn parent(&self) -> Option<Ignore> {
|
||||
self.0.parent.clone()
|
||||
}
|
||||
|
||||
/// Create a new `Ignore` matcher with the parent directories of `dir`.
|
||||
///
|
||||
/// Note that this can only be called on an `Ignore` matcher with no
|
||||
/// parents (i.e., `is_root` returns `true`). This will panic otherwise.
|
||||
pub fn add_parents<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
) -> (Ignore, Option<Error>) {
|
||||
if !self.is_root() {
|
||||
panic!("Ignore::add_parents called on non-root matcher");
|
||||
}
|
||||
let absolute_base = match path.as_ref().canonicalize() {
|
||||
Ok(path) => Arc::new(path),
|
||||
Err(_) => {
|
||||
// There's not much we can do here, so just return our
|
||||
// existing matcher. We drop the error to be consistent
|
||||
// with our general pattern of ignoring I/O errors when
|
||||
// processing ignore files.
|
||||
return (self.clone(), None);
|
||||
}
|
||||
};
|
||||
// List of parents, from child to root.
|
||||
let mut parents = vec![];
|
||||
let mut path = &**absolute_base;
|
||||
while let Some(parent) = path.parent() {
|
||||
parents.push(parent);
|
||||
path = parent;
|
||||
}
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
let mut ig = self.clone();
|
||||
for parent in parents.into_iter().rev() {
|
||||
let mut compiled = self.0.compiled.write().unwrap();
|
||||
if let Some(prebuilt) = compiled.get(parent.as_os_str()) {
|
||||
ig = prebuilt.clone();
|
||||
continue;
|
||||
}
|
||||
let (mut igtmp, err) = ig.add_child_path(parent);
|
||||
errs.maybe_push(err);
|
||||
igtmp.is_absolute_parent = true;
|
||||
igtmp.absolute_base = Some(absolute_base.clone());
|
||||
ig = Ignore(Arc::new(igtmp));
|
||||
compiled.insert(parent.as_os_str().to_os_string(), ig.clone());
|
||||
}
|
||||
(ig, errs.into_error_option())
|
||||
}
|
||||
|
||||
/// Create a new `Ignore` matcher for the given child directory.
|
||||
///
|
||||
/// Since building the matcher may require reading from multiple
|
||||
/// files, it's possible that this method partially succeeds. Therefore,
|
||||
/// a matcher is always returned (which may match nothing) and an error is
|
||||
/// returned if it exists.
|
||||
///
|
||||
/// Note that all I/O errors are completely ignored.
|
||||
pub fn add_child<P: AsRef<Path>>(
|
||||
&self,
|
||||
dir: P,
|
||||
) -> (Ignore, Option<Error>) {
|
||||
let (ig, err) = self.add_child_path(dir.as_ref());
|
||||
(Ignore(Arc::new(ig)), err)
|
||||
}
|
||||
|
||||
/// Like add_child, but takes a full path and returns an IgnoreInner.
|
||||
fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option<Error>) {
|
||||
static IG_NAMES: &'static [&'static str] = &[".rgignore", ".ignore"];
|
||||
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
let ig_matcher =
|
||||
if !self.0.opts.ignore {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(&dir, IG_NAMES);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let gi_matcher =
|
||||
if !self.0.opts.git_ignore {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(&dir, &[".gitignore"]);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let gi_exclude_matcher =
|
||||
if !self.0.opts.git_exclude {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(&dir, &[".git/info/exclude"]);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let ig = IgnoreInner {
|
||||
compiled: self.0.compiled.clone(),
|
||||
dir: dir.to_path_buf(),
|
||||
overrides: self.0.overrides.clone(),
|
||||
types: self.0.types.clone(),
|
||||
parent: Some(self.clone()),
|
||||
is_absolute_parent: false,
|
||||
absolute_base: self.0.absolute_base.clone(),
|
||||
explicit_ignores: self.0.explicit_ignores.clone(),
|
||||
ignore_matcher: ig_matcher,
|
||||
git_global_matcher: self.0.git_global_matcher.clone(),
|
||||
git_ignore_matcher: gi_matcher,
|
||||
git_exclude_matcher: gi_exclude_matcher,
|
||||
has_git: dir.join(".git").is_dir(),
|
||||
opts: self.0.opts,
|
||||
};
|
||||
(ig, errs.into_error_option())
|
||||
}
|
||||
|
||||
/// Returns a match indicating whether the given file path should be
|
||||
/// ignored or not.
|
||||
///
|
||||
/// The match contains information about its origin.
|
||||
pub fn matched<'a, P: AsRef<Path>>(
|
||||
&'a self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<IgnoreMatch<'a>> {
|
||||
// We need to be careful with our path. If it has a leading ./, then
|
||||
// strip it because it causes nothing but trouble.
|
||||
let mut path = path.as_ref();
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
// Match against the override patterns. If an override matches
|
||||
// regardless of whether it's whitelist/ignore, then we quit and
|
||||
// return that result immediately. Overrides have the highest
|
||||
// precedence.
|
||||
if !self.0.overrides.is_empty() {
|
||||
let mat =
|
||||
self.0.overrides.matched(path, is_dir)
|
||||
.map(IgnoreMatch::overrides);
|
||||
if !mat.is_none() {
|
||||
return mat;
|
||||
}
|
||||
}
|
||||
let mut whitelisted = Match::None;
|
||||
if self.0.opts.should_ignores() {
|
||||
let mat = self.matched_ignore(path, is_dir);
|
||||
if mat.is_ignore() {
|
||||
return mat;
|
||||
} else if mat.is_whitelist() {
|
||||
whitelisted = mat;
|
||||
}
|
||||
}
|
||||
if !self.0.types.is_empty() {
|
||||
let mat =
|
||||
self.0.types.matched(path, is_dir).map(IgnoreMatch::types);
|
||||
if mat.is_ignore() {
|
||||
return mat;
|
||||
} else if mat.is_whitelist() {
|
||||
whitelisted = mat;
|
||||
}
|
||||
}
|
||||
if whitelisted.is_none() && self.0.opts.hidden && is_hidden(path) {
|
||||
return Match::Ignore(IgnoreMatch::hidden());
|
||||
}
|
||||
whitelisted
|
||||
}
|
||||
|
||||
/// Performs matching only on the ignore files for this directory and
|
||||
/// all parent directories.
|
||||
fn matched_ignore<'a>(
|
||||
&'a self,
|
||||
path: &Path,
|
||||
is_dir: bool,
|
||||
) -> Match<IgnoreMatch<'a>> {
|
||||
let (mut m_ignore, mut m_gi, mut m_gi_exclude, mut m_explicit) =
|
||||
(Match::None, Match::None, Match::None, Match::None);
|
||||
let mut saw_git = false;
|
||||
for ig in self.parents().take_while(|ig| !ig.0.is_absolute_parent) {
|
||||
if m_ignore.is_none() {
|
||||
m_ignore =
|
||||
ig.0.ignore_matcher.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if !saw_git && m_gi.is_none() {
|
||||
m_gi =
|
||||
ig.0.git_ignore_matcher.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if !saw_git && m_gi_exclude.is_none() {
|
||||
m_gi_exclude =
|
||||
ig.0.git_exclude_matcher.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
saw_git = saw_git || ig.0.has_git;
|
||||
}
|
||||
if let Some(abs_parent_path) = self.absolute_base() {
|
||||
let path = abs_parent_path.join(path);
|
||||
for ig in self.parents().skip_while(|ig|!ig.0.is_absolute_parent) {
|
||||
if m_ignore.is_none() {
|
||||
m_ignore =
|
||||
ig.0.ignore_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if !saw_git && m_gi.is_none() {
|
||||
m_gi =
|
||||
ig.0.git_ignore_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if !saw_git && m_gi_exclude.is_none() {
|
||||
m_gi_exclude =
|
||||
ig.0.git_exclude_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
saw_git = saw_git || ig.0.has_git;
|
||||
}
|
||||
}
|
||||
for gi in self.0.explicit_ignores.iter().rev() {
|
||||
if !m_explicit.is_none() {
|
||||
break;
|
||||
}
|
||||
m_explicit = gi.matched(&path, is_dir).map(IgnoreMatch::gitignore);
|
||||
}
|
||||
let m_global = self.0.git_global_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
if !m_ignore.is_none() {
|
||||
m_ignore
|
||||
} else if !m_gi.is_none() {
|
||||
m_gi
|
||||
} else if !m_gi_exclude.is_none() {
|
||||
m_gi_exclude
|
||||
} else if !m_global.is_none() {
|
||||
m_global
|
||||
} else if !m_explicit.is_none() {
|
||||
m_explicit
|
||||
} else {
|
||||
Match::None
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator over parent ignore matchers, including this one.
|
||||
fn parents(&self) -> Parents {
|
||||
Parents(Some(self))
|
||||
}
|
||||
|
||||
/// Returns the first absolute path of the first absolute parent, if
|
||||
/// one exists.
|
||||
fn absolute_base(&self) -> Option<&Path> {
|
||||
self.0.absolute_base.as_ref().map(|p| &***p)
|
||||
}
|
||||
}
|
||||
|
||||
struct Parents<'a>(Option<&'a Ignore>);
|
||||
|
||||
impl<'a> Iterator for Parents<'a> {
|
||||
type Item = &'a Ignore;
|
||||
|
||||
fn next(&mut self) -> Option<&'a Ignore> {
|
||||
match self.0.take() {
|
||||
None => None,
|
||||
Some(ig) => {
|
||||
self.0 = ig.0.parent.as_ref();
|
||||
Some(ig)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A builder for creating an Ignore matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IgnoreBuilder {
|
||||
/// The root directory path for this ignore matcher.
|
||||
dir: PathBuf,
|
||||
/// An override matcher (default is empty).
|
||||
overrides: Arc<Override>,
|
||||
/// A type matcher (default is empty).
|
||||
types: Arc<Types>,
|
||||
/// Explicit ignore matchers.
|
||||
explicit_ignores: Vec<Gitignore>,
|
||||
/// Ignore config.
|
||||
opts: IgnoreOptions,
|
||||
}
|
||||
|
||||
impl IgnoreBuilder {
|
||||
/// Create a new builder for an `Ignore` matcher.
|
||||
///
|
||||
/// All relative file paths are resolved with respect to the current
|
||||
/// working directory.
|
||||
pub fn new() -> IgnoreBuilder {
|
||||
IgnoreBuilder {
|
||||
dir: Path::new("").to_path_buf(),
|
||||
overrides: Arc::new(Override::empty()),
|
||||
types: Arc::new(Types::empty()),
|
||||
explicit_ignores: vec![],
|
||||
opts: IgnoreOptions {
|
||||
hidden: true,
|
||||
ignore: true,
|
||||
git_global: true,
|
||||
git_ignore: true,
|
||||
git_exclude: true,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a new `Ignore` matcher.
|
||||
///
|
||||
/// The matcher returned won't match anything until ignore rules from
|
||||
/// directories are added to it.
|
||||
pub fn build(&self) -> Ignore {
|
||||
let git_global_matcher =
|
||||
if !self.opts.git_global {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (gi, err) = Gitignore::global();
|
||||
if let Some(err) = err {
|
||||
debug!("{}", err);
|
||||
}
|
||||
gi
|
||||
};
|
||||
Ignore(Arc::new(IgnoreInner {
|
||||
compiled: Arc::new(RwLock::new(HashMap::new())),
|
||||
dir: self.dir.clone(),
|
||||
overrides: self.overrides.clone(),
|
||||
types: self.types.clone(),
|
||||
parent: None,
|
||||
is_absolute_parent: true,
|
||||
absolute_base: None,
|
||||
explicit_ignores: Arc::new(self.explicit_ignores.clone()),
|
||||
ignore_matcher: Gitignore::empty(),
|
||||
git_global_matcher: Arc::new(git_global_matcher),
|
||||
git_ignore_matcher: Gitignore::empty(),
|
||||
git_exclude_matcher: Gitignore::empty(),
|
||||
has_git: false,
|
||||
opts: self.opts,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Add an override matcher.
|
||||
///
|
||||
/// By default, no override matcher is used.
|
||||
///
|
||||
/// This overrides any previous setting.
|
||||
pub fn overrides(&mut self, overrides: Override) -> &mut IgnoreBuilder {
|
||||
self.overrides = Arc::new(overrides);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a file type matcher.
|
||||
///
|
||||
/// By default, no file type matcher is used.
|
||||
///
|
||||
/// This overrides any previous setting.
|
||||
pub fn types(&mut self, types: Types) -> &mut IgnoreBuilder {
|
||||
self.types = Arc::new(types);
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds a new global ignore matcher from the ignore file path given.
|
||||
pub fn add_ignore(&mut self, ig: Gitignore) -> &mut IgnoreBuilder {
|
||||
self.explicit_ignores.push(ig);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables ignoring hidden files.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn hidden(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.hidden = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading `.ignore` files.
|
||||
///
|
||||
/// `.ignore` files have the same semantics as `gitignore` files and are
|
||||
/// supported by search tools such as ripgrep and The Silver Searcher.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn ignore(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.ignore = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a global gitignore matcher.
|
||||
///
|
||||
/// Its precedence is lower than both normal `.gitignore` files and
|
||||
/// `.git/info/exclude` files.
|
||||
///
|
||||
/// This overwrites any previous global gitignore setting.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn git_global(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.git_global = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading `.gitignore` files.
|
||||
///
|
||||
/// `.gitignore` files have match semantics as described in the `gitignore`
|
||||
/// man page.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn git_ignore(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.git_ignore = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading `.git/info/exclude` files.
|
||||
///
|
||||
/// `.git/info/exclude` files have match semantics as described in the
|
||||
/// `gitignore` man page.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn git_exclude(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.git_exclude = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new gitignore matcher for the directory given.
|
||||
///
|
||||
/// Ignore globs are extracted from each of the file names in `dir` in the
|
||||
/// order given (earlier names have lower precedence than later names).
|
||||
///
|
||||
/// I/O errors are ignored.
|
||||
pub fn create_gitignore(
|
||||
dir: &Path,
|
||||
names: &[&str],
|
||||
) -> (Gitignore, Option<Error>) {
|
||||
let mut builder = GitignoreBuilder::new(dir);
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
for name in names {
|
||||
let gipath = dir.join(name);
|
||||
errs.maybe_push_ignore_io(builder.add(gipath));
|
||||
}
|
||||
let gi = match builder.build() {
|
||||
Ok(gi) => gi,
|
||||
Err(err) => {
|
||||
errs.push(err);
|
||||
GitignoreBuilder::new(dir).build().unwrap()
|
||||
}
|
||||
};
|
||||
(gi, errs.into_error_option())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::fs::{self, File};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use tempdir::TempDir;
|
||||
|
||||
use dir::IgnoreBuilder;
|
||||
use gitignore::Gitignore;
|
||||
use Error;
|
||||
|
||||
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
|
||||
let mut file = File::create(path).unwrap();
|
||||
file.write_all(contents.as_bytes()).unwrap();
|
||||
}
|
||||
|
||||
fn mkdirp<P: AsRef<Path>>(path: P) {
|
||||
fs::create_dir_all(path).unwrap();
|
||||
}
|
||||
|
||||
fn partial(err: Error) -> Vec<Error> {
|
||||
match err {
|
||||
Error::Partial(errs) => errs,
|
||||
_ => panic!("expected partial error but got {:?}", err),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join("not-an-ignore"), "foo\n!bar");
|
||||
|
||||
let (gi, err) = Gitignore::new(td.path().join("not-an-ignore"));
|
||||
assert!(err.is_none());
|
||||
let (ig, err) = IgnoreBuilder::new()
|
||||
.add_ignore(gi).build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_ignore());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
assert!(ig.matched("baz", false).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_exclude() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
mkdirp(td.path().join(".git/info"));
|
||||
wfile(td.path().join(".git/info/exclude"), "foo\n!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_ignore());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
assert!(ig.matched("baz", false).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gitignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "foo\n!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_ignore());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
assert!(ig.matched("baz", false).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".ignore"), "foo\n!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_ignore());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
assert!(ig.matched("baz", false).is_none());
|
||||
}
|
||||
|
||||
// Tests that an .ignore will override a .gitignore.
|
||||
#[test]
|
||||
fn ignore_over_gitignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
wfile(td.path().join(".ignore"), "!foo");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_whitelist());
|
||||
}
|
||||
|
||||
// Tests that exclude has lower precedent than both .ignore and .gitignore.
|
||||
#[test]
|
||||
fn exclude_lowest() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "!foo");
|
||||
wfile(td.path().join(".ignore"), "!bar");
|
||||
mkdirp(td.path().join(".git/info"));
|
||||
wfile(td.path().join(".git/info/exclude"), "foo\nbar\nbaz");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("baz", false).is_ignore());
|
||||
assert!(ig.matched("foo", false).is_whitelist());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errored() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo");
|
||||
|
||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errored_both() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo");
|
||||
wfile(td.path().join(".ignore"), "fo**o");
|
||||
|
||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert_eq!(2, partial(err.expect("an error")).len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errored_partial() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo\nbar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_some());
|
||||
assert!(ig.matched("bar", false).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errored_partial_and_ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo\nbar");
|
||||
wfile(td.path().join(".ignore"), "!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_some());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn not_present_empty() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
|
||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stops_at_git_dir() {
|
||||
// This tests that .gitignore files beyond a .git barrier aren't
|
||||
// matched, but .ignore files are.
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("foo/.git"));
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
wfile(td.path().join(".ignore"), "bar");
|
||||
|
||||
let ig0 = IgnoreBuilder::new().build();
|
||||
let (ig1, err) = ig0.add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
let (ig2, err) = ig1.add_child(ig1.path().join("foo"));
|
||||
assert!(err.is_none());
|
||||
|
||||
assert!(ig1.matched("foo", false).is_ignore());
|
||||
assert!(ig2.matched("foo", false).is_none());
|
||||
|
||||
assert!(ig1.matched("bar", false).is_ignore());
|
||||
assert!(ig2.matched("bar", false).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn absolute_parent() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("foo"));
|
||||
wfile(td.path().join(".gitignore"), "bar");
|
||||
|
||||
// First, check that the parent gitignore file isn't detected if the
|
||||
// parent isn't added. This establishes a baseline.
|
||||
let ig0 = IgnoreBuilder::new().build();
|
||||
let (ig1, err) = ig0.add_child(td.path().join("foo"));
|
||||
assert!(err.is_none());
|
||||
assert!(ig1.matched("bar", false).is_none());
|
||||
|
||||
// Second, check that adding a parent directory actually works.
|
||||
let ig0 = IgnoreBuilder::new().build();
|
||||
let (ig1, err) = ig0.add_parents(td.path().join("foo"));
|
||||
assert!(err.is_none());
|
||||
let (ig2, err) = ig1.add_child(td.path().join("foo"));
|
||||
assert!(err.is_none());
|
||||
assert!(ig2.matched("bar", false).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn absolute_parent_anchored() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("src/llvm"));
|
||||
wfile(td.path().join(".gitignore"), "/llvm/\nfoo");
|
||||
|
||||
let ig0 = IgnoreBuilder::new().build();
|
||||
let (ig1, err) = ig0.add_parents(td.path().join("src"));
|
||||
assert!(err.is_none());
|
||||
let (ig2, err) = ig1.add_child("src");
|
||||
assert!(err.is_none());
|
||||
|
||||
assert!(ig1.matched("llvm", true).is_none());
|
||||
assert!(ig2.matched("llvm", true).is_none());
|
||||
assert!(ig2.matched("src/llvm", true).is_none());
|
||||
assert!(ig2.matched("foo", false).is_ignore());
|
||||
assert!(ig2.matched("src/foo", false).is_ignore());
|
||||
}
|
||||
}
|
607
ignore/src/gitignore.rs
Normal file
607
ignore/src/gitignore.rs
Normal file
@ -0,0 +1,607 @@
|
||||
/*!
|
||||
The gitignore module provides a way to match globs from a gitignore file
|
||||
against file paths.
|
||||
|
||||
Note that this module implements the specification as described in the
|
||||
`gitignore` man page from scratch. That is, this module does *not* shell out to
|
||||
the `git` command line tool.
|
||||
*/
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::env;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufRead, Read};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str;
|
||||
use std::sync::Arc;
|
||||
|
||||
use globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder};
|
||||
use regex::bytes::Regex;
|
||||
use thread_local::ThreadLocal;
|
||||
|
||||
use pathutil::{is_file_name, strip_prefix};
|
||||
use {Error, Match, PartialErrorBuilder};
|
||||
|
||||
/// Glob represents a single glob in a gitignore file.
|
||||
///
|
||||
/// This is used to report information about the highest precedent glob that
|
||||
/// matched in one or more gitignore files.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Glob {
|
||||
/// The file path that this glob was extracted from.
|
||||
from: Option<PathBuf>,
|
||||
/// The original glob string.
|
||||
original: String,
|
||||
/// The actual glob string used to convert to a regex.
|
||||
actual: String,
|
||||
/// Whether this is a whitelisted glob or not.
|
||||
is_whitelist: bool,
|
||||
/// Whether this glob should only match directories or not.
|
||||
is_only_dir: bool,
|
||||
}
|
||||
|
||||
impl Glob {
|
||||
/// Returns the file path that defined this glob.
|
||||
pub fn from(&self) -> Option<&Path> {
|
||||
self.from.as_ref().map(|p| &**p)
|
||||
}
|
||||
|
||||
/// The original glob as it was defined in a gitignore file.
|
||||
pub fn original(&self) -> &str {
|
||||
&self.original
|
||||
}
|
||||
|
||||
/// The actual glob that was compiled to respect gitignore
|
||||
/// semantics.
|
||||
pub fn actual(&self) -> &str {
|
||||
&self.actual
|
||||
}
|
||||
|
||||
/// Whether this was a whitelisted glob or not.
|
||||
pub fn is_whitelist(&self) -> bool {
|
||||
self.is_whitelist
|
||||
}
|
||||
|
||||
/// Whether this glob must match a directory or not.
|
||||
pub fn is_only_dir(&self) -> bool {
|
||||
self.is_only_dir
|
||||
}
|
||||
}
|
||||
|
||||
/// Gitignore is a matcher for the globs in one or more gitignore files
|
||||
/// in the same directory.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Gitignore {
|
||||
set: GlobSet,
|
||||
root: PathBuf,
|
||||
globs: Vec<Glob>,
|
||||
num_ignores: u64,
|
||||
num_whitelists: u64,
|
||||
matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
|
||||
}
|
||||
|
||||
impl Gitignore {
|
||||
/// Creates a new gitignore matcher from the gitignore file path given.
|
||||
///
|
||||
/// If it's desirable to include multiple gitignore files in a single
|
||||
/// matcher, or read gitignore globs from a different source, then
|
||||
/// use `GitignoreBuilder`.
|
||||
///
|
||||
/// This always returns a valid matcher, even if it's empty. In particular,
|
||||
/// a Gitignore file can be partially valid, e.g., when one glob is invalid
|
||||
/// but the rest aren't.
|
||||
///
|
||||
/// Note that I/O errors are ignored. For more granular control over
|
||||
/// errors, use `GitignoreBuilder`.
|
||||
pub fn new<P: AsRef<Path>>(
|
||||
gitignore_path: P,
|
||||
) -> (Gitignore, Option<Error>) {
|
||||
let path = gitignore_path.as_ref();
|
||||
let parent = path.parent().unwrap_or(Path::new("/"));
|
||||
let mut builder = GitignoreBuilder::new(parent);
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
errs.maybe_push_ignore_io(builder.add(path));
|
||||
match builder.build() {
|
||||
Ok(gi) => (gi, errs.into_error_option()),
|
||||
Err(err) => {
|
||||
errs.push(err);
|
||||
(Gitignore::empty(), errs.into_error_option())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new gitignore matcher from the global ignore file, if one
|
||||
/// exists.
|
||||
///
|
||||
/// The global config file path is specified by git's `core.excludesFile`
|
||||
/// config option.
|
||||
///
|
||||
/// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
|
||||
/// does not exist or does not specify `core.excludesFile`, then
|
||||
/// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
|
||||
/// set or is empty, then `$HOME/.config/git/ignore` is used instead.
|
||||
pub fn global() -> (Gitignore, Option<Error>) {
|
||||
match gitconfig_excludes_path() {
|
||||
None => (Gitignore::empty(), None),
|
||||
Some(path) => {
|
||||
if !path.is_file() {
|
||||
(Gitignore::empty(), None)
|
||||
} else {
|
||||
Gitignore::new(path)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new empty gitignore matcher that never matches anything.
|
||||
///
|
||||
/// Its path is empty.
|
||||
pub fn empty() -> Gitignore {
|
||||
GitignoreBuilder::new("").build().unwrap()
|
||||
}
|
||||
|
||||
/// Returns the directory containing this gitignore matcher.
|
||||
///
|
||||
/// All matches are done relative to this path.
|
||||
pub fn path(&self) -> &Path {
|
||||
&*self.root
|
||||
}
|
||||
|
||||
/// Returns true if and only if this gitignore has zero globs, and
|
||||
/// therefore never matches any file path.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.set.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the total number of globs, which should be equivalent to
|
||||
/// `num_ignores + num_whitelists`.
|
||||
pub fn len(&self) -> usize {
|
||||
self.set.len()
|
||||
}
|
||||
|
||||
/// Returns the total number of ignore globs.
|
||||
pub fn num_ignores(&self) -> u64 {
|
||||
self.num_ignores
|
||||
}
|
||||
|
||||
/// Returns the total number of whitelisted globs.
|
||||
pub fn num_whitelists(&self) -> u64 {
|
||||
self.num_whitelists
|
||||
}
|
||||
|
||||
/// Returns whether the given file path matched a pattern in this gitignore
|
||||
/// matcher.
|
||||
///
|
||||
/// `is_dir` should be true if the path refers to a directory and false
|
||||
/// otherwise.
|
||||
///
|
||||
/// The given path is matched relative to the path given when building
|
||||
/// the matcher. Specifically, before matching `path`, its prefix (as
|
||||
/// determined by a common suffix of the directory containing this
|
||||
/// gitignore) is stripped. If there is no common suffix/prefix overlap,
|
||||
/// then `path` is assumed to be relative to this matcher.
|
||||
pub fn matched<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<&Glob> {
|
||||
if self.is_empty() {
|
||||
return Match::None;
|
||||
}
|
||||
self.matched_stripped(self.strip(path.as_ref()), is_dir)
|
||||
}
|
||||
|
||||
/// Like matched, but takes a path that has already been stripped.
|
||||
fn matched_stripped<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<&Glob> {
|
||||
if self.is_empty() {
|
||||
return Match::None;
|
||||
}
|
||||
let path = path.as_ref();
|
||||
let _matches = self.matches.get_default();
|
||||
let mut matches = _matches.borrow_mut();
|
||||
let candidate = Candidate::new(path);
|
||||
self.set.matches_candidate_into(&candidate, &mut *matches);
|
||||
for &i in matches.iter().rev() {
|
||||
let glob = &self.globs[i];
|
||||
if !glob.is_only_dir() || is_dir {
|
||||
return if glob.is_whitelist() {
|
||||
Match::Whitelist(glob)
|
||||
} else {
|
||||
Match::Ignore(glob)
|
||||
};
|
||||
}
|
||||
}
|
||||
Match::None
|
||||
}
|
||||
|
||||
/// Strips the given path such that it's suitable for matching with this
|
||||
/// gitignore matcher.
|
||||
fn strip<'a, P: 'a + AsRef<Path> + ?Sized>(
|
||||
&'a self,
|
||||
path: &'a P,
|
||||
) -> &'a Path {
|
||||
let mut path = path.as_ref();
|
||||
// A leading ./ is completely superfluous. We also strip it from
|
||||
// our gitignore root path, so we need to strip it from our candidate
|
||||
// path too.
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
// Strip any common prefix between the candidate path and the root
|
||||
// of the gitignore, to make sure we get relative matching right.
|
||||
// BUT, a file name might not have any directory components to it,
|
||||
// in which case, we don't want to accidentally strip any part of the
|
||||
// file name.
|
||||
if !is_file_name(path) {
|
||||
if let Some(p) = strip_prefix(&self.root, path) {
|
||||
path = p;
|
||||
// If we're left with a leading slash, get rid of it.
|
||||
if let Some(p) = strip_prefix("/", path) {
|
||||
path = p;
|
||||
}
|
||||
}
|
||||
}
|
||||
path
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a matcher for a single set of globs from a .gitignore file.
|
||||
pub struct GitignoreBuilder {
|
||||
builder: GlobSetBuilder,
|
||||
root: PathBuf,
|
||||
globs: Vec<Glob>,
|
||||
}
|
||||
|
||||
impl GitignoreBuilder {
|
||||
/// Create a new builder for a gitignore file.
|
||||
///
|
||||
/// The path given should be the path at which the globs for this gitignore
|
||||
/// file should be matched. Note that paths are always matched relative
|
||||
/// to the root path given here. Generally, the root path should correspond
|
||||
/// to the *directory* containing a `.gitignore` file.
|
||||
pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
|
||||
let root = root.as_ref();
|
||||
GitignoreBuilder {
|
||||
builder: GlobSetBuilder::new(),
|
||||
root: strip_prefix("./", root).unwrap_or(root).to_path_buf(),
|
||||
globs: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a new matcher from the globs added so far.
|
||||
///
|
||||
/// Once a matcher is built, no new globs can be added to it.
|
||||
pub fn build(&self) -> Result<Gitignore, Error> {
|
||||
let nignore = self.globs.iter().filter(|g| !g.is_whitelist()).count();
|
||||
let nwhite = self.globs.iter().filter(|g| g.is_whitelist()).count();
|
||||
let set = try!(
|
||||
self.builder.build().map_err(|err| Error::Glob(err.to_string())));
|
||||
Ok(Gitignore {
|
||||
set: set,
|
||||
root: self.root.clone(),
|
||||
globs: self.globs.clone(),
|
||||
num_ignores: nignore as u64,
|
||||
num_whitelists: nwhite as u64,
|
||||
matches: Arc::new(ThreadLocal::default()),
|
||||
})
|
||||
}
|
||||
|
||||
/// Add each glob from the file path given.
|
||||
///
|
||||
/// The file given should be formatted as a `gitignore` file.
|
||||
///
|
||||
/// Note that partial errors can be returned. For example, if there was
|
||||
/// a problem adding one glob, an error for that will be returned, but
|
||||
/// all other valid globs will still be added.
|
||||
pub fn add<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
|
||||
let path = path.as_ref();
|
||||
let file = match File::open(path) {
|
||||
Err(err) => return Some(Error::Io(err).with_path(path)),
|
||||
Ok(file) => file,
|
||||
};
|
||||
let rdr = io::BufReader::new(file);
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
for (i, line) in rdr.lines().enumerate() {
|
||||
let lineno = (i + 1) as u64;
|
||||
let line = match line {
|
||||
Ok(line) => line,
|
||||
Err(err) => {
|
||||
errs.push(Error::Io(err).tagged(path, lineno));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Err(err) = self.add_line(Some(path.to_path_buf()), &line) {
|
||||
errs.push(err.tagged(path, lineno));
|
||||
}
|
||||
}
|
||||
errs.into_error_option()
|
||||
}
|
||||
|
||||
/// Add each glob line from the string given.
|
||||
///
|
||||
/// If this string came from a particular `gitignore` file, then its path
|
||||
/// should be provided here.
|
||||
///
|
||||
/// The string given should be formatted as a `gitignore` file.
|
||||
#[cfg(test)]
|
||||
fn add_str(
|
||||
&mut self,
|
||||
from: Option<PathBuf>,
|
||||
gitignore: &str,
|
||||
) -> Result<&mut GitignoreBuilder, Error> {
|
||||
for line in gitignore.lines() {
|
||||
try!(self.add_line(from.clone(), line));
|
||||
}
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Add a line from a gitignore file to this builder.
|
||||
///
|
||||
/// If this line came from a particular `gitignore` file, then its path
|
||||
/// should be provided here.
|
||||
///
|
||||
/// If the line could not be parsed as a glob, then an error is returned.
|
||||
pub fn add_line(
|
||||
&mut self,
|
||||
from: Option<PathBuf>,
|
||||
mut line: &str,
|
||||
) -> Result<&mut GitignoreBuilder, Error> {
|
||||
if line.starts_with("#") {
|
||||
return Ok(self);
|
||||
}
|
||||
if !line.ends_with("\\ ") {
|
||||
line = line.trim_right();
|
||||
}
|
||||
if line.is_empty() {
|
||||
return Ok(self);
|
||||
}
|
||||
let mut glob = Glob {
|
||||
from: from,
|
||||
original: line.to_string(),
|
||||
actual: String::new(),
|
||||
is_whitelist: false,
|
||||
is_only_dir: false,
|
||||
};
|
||||
let mut literal_separator = false;
|
||||
let has_slash = line.chars().any(|c| c == '/');
|
||||
let is_absolute = line.chars().nth(0).unwrap() == '/';
|
||||
if line.starts_with("\\!") || line.starts_with("\\#") {
|
||||
line = &line[1..];
|
||||
} else {
|
||||
if line.starts_with("!") {
|
||||
glob.is_whitelist = true;
|
||||
line = &line[1..];
|
||||
}
|
||||
if line.starts_with("/") {
|
||||
// `man gitignore` says that if a glob starts with a slash,
|
||||
// then the glob can only match the beginning of a path
|
||||
// (relative to the location of gitignore). We achieve this by
|
||||
// simply banning wildcards from matching /.
|
||||
literal_separator = true;
|
||||
line = &line[1..];
|
||||
}
|
||||
}
|
||||
// If it ends with a slash, then this should only match directories,
|
||||
// but the slash should otherwise not be used while globbing.
|
||||
if let Some((i, c)) = line.char_indices().rev().nth(0) {
|
||||
if c == '/' {
|
||||
glob.is_only_dir = true;
|
||||
line = &line[..i];
|
||||
}
|
||||
}
|
||||
// If there is a literal slash, then we note that so that globbing
|
||||
// doesn't let wildcards match slashes.
|
||||
glob.actual = line.to_string();
|
||||
if has_slash {
|
||||
literal_separator = true;
|
||||
}
|
||||
// If there was a leading slash, then this is a glob that must
|
||||
// match the entire path name. Otherwise, we should let it match
|
||||
// anywhere, so use a **/ prefix.
|
||||
if !is_absolute {
|
||||
// ... but only if we don't already have a **/ prefix.
|
||||
if !glob.actual.starts_with("**/") {
|
||||
glob.actual = format!("**/{}", glob.actual);
|
||||
}
|
||||
}
|
||||
// If the glob ends with `/**`, then we should only match everything
|
||||
// inside a directory, but not the directory itself. Standard globs
|
||||
// will match the directory. So we add `/*` to force the issue.
|
||||
if glob.actual.ends_with("/**") {
|
||||
glob.actual = format!("{}/*", glob.actual);
|
||||
}
|
||||
let parsed = try!(
|
||||
GlobBuilder::new(&glob.actual)
|
||||
.literal_separator(literal_separator)
|
||||
.build()
|
||||
.map_err(|err| Error::Glob(err.to_string())));
|
||||
self.builder.add(parsed);
|
||||
self.globs.push(glob);
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the file path of the current environment's global gitignore file.
|
||||
///
|
||||
/// Note that the file path returned may not exist.
|
||||
fn gitconfig_excludes_path() -> Option<PathBuf> {
|
||||
gitconfig_contents()
|
||||
.and_then(|data| parse_excludes_file(&data))
|
||||
.or_else(excludes_file_default)
|
||||
}
|
||||
|
||||
/// Returns the file contents of git's global config file, if one exists.
|
||||
fn gitconfig_contents() -> Option<Vec<u8>> {
|
||||
let home = match env::var_os("HOME") {
|
||||
None => return None,
|
||||
Some(home) => PathBuf::from(home),
|
||||
};
|
||||
let mut file = match File::open(home.join(".gitconfig")) {
|
||||
Err(_) => return None,
|
||||
Ok(file) => io::BufReader::new(file),
|
||||
};
|
||||
let mut contents = vec![];
|
||||
file.read_to_end(&mut contents).ok().map(|_| contents)
|
||||
}
|
||||
|
||||
/// Returns the default file path for a global .gitignore file.
|
||||
///
|
||||
/// Specifically, this respects XDG_CONFIG_HOME.
|
||||
fn excludes_file_default() -> Option<PathBuf> {
|
||||
env::var_os("XDG_CONFIG_HOME")
|
||||
.and_then(|x| if x.is_empty() { None } else { Some(x) })
|
||||
.or_else(|| env::var_os("HOME"))
|
||||
.map(|x| PathBuf::from(x).join("git/ignore"))
|
||||
}
|
||||
|
||||
/// Extract git's `core.excludesfile` config setting from the raw file contents
|
||||
/// given.
|
||||
fn parse_excludes_file(data: &[u8]) -> Option<PathBuf> {
|
||||
// N.B. This is the lazy approach, and isn't technically correct, but
|
||||
// probably works in more circumstances. I guess we would ideally have
|
||||
// a full INI parser. Yuck.
|
||||
lazy_static! {
|
||||
static ref RE: Regex = Regex::new(
|
||||
r"(?ium)^\s*excludesfile\s*=\s*(.+)\s*$").unwrap();
|
||||
};
|
||||
let caps = match RE.captures(data) {
|
||||
None => return None,
|
||||
Some(caps) => caps,
|
||||
};
|
||||
str::from_utf8(&caps[1]).ok().map(|s| PathBuf::from(expand_tilde(s)))
|
||||
}
|
||||
|
||||
/// Expands ~ in file paths to the value of $HOME.
|
||||
fn expand_tilde(path: &str) -> String {
|
||||
let home = match env::var("HOME") {
|
||||
Err(_) => return path.to_string(),
|
||||
Ok(home) => home,
|
||||
};
|
||||
path.replace("~", &home)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
use super::{Gitignore, GitignoreBuilder};
|
||||
|
||||
fn gi_from_str<P: AsRef<Path>>(root: P, s: &str) -> Gitignore {
|
||||
let mut builder = GitignoreBuilder::new(root);
|
||||
builder.add_str(None, s).unwrap();
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
macro_rules! ignored {
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr) => {
|
||||
ignored!($name, $root, $gi, $path, false);
|
||||
};
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let gi = gi_from_str($root, $gi);
|
||||
assert!(gi.matched($path, $is_dir).is_ignore());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! not_ignored {
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr) => {
|
||||
not_ignored!($name, $root, $gi, $path, false);
|
||||
};
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let gi = gi_from_str($root, $gi);
|
||||
assert!(!gi.matched($path, $is_dir).is_ignore());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const ROOT: &'static str = "/home/foobar/rust/rg";
|
||||
|
||||
ignored!(ig1, ROOT, "months", "months");
|
||||
ignored!(ig2, ROOT, "*.lock", "Cargo.lock");
|
||||
ignored!(ig3, ROOT, "*.rs", "src/main.rs");
|
||||
ignored!(ig4, ROOT, "src/*.rs", "src/main.rs");
|
||||
ignored!(ig5, ROOT, "/*.c", "cat-file.c");
|
||||
ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs");
|
||||
ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs");
|
||||
ignored!(ig8, ROOT, "foo/", "foo", true);
|
||||
ignored!(ig9, ROOT, "**/foo", "foo");
|
||||
ignored!(ig10, ROOT, "**/foo", "src/foo");
|
||||
ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar");
|
||||
ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz");
|
||||
ignored!(ig13, ROOT, "**/foo/bar", "foo/bar");
|
||||
ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar");
|
||||
ignored!(ig15, ROOT, "abc/**", "abc/x");
|
||||
ignored!(ig16, ROOT, "abc/**", "abc/x/y");
|
||||
ignored!(ig17, ROOT, "abc/**", "abc/x/y/z");
|
||||
ignored!(ig18, ROOT, "a/**/b", "a/b");
|
||||
ignored!(ig19, ROOT, "a/**/b", "a/x/b");
|
||||
ignored!(ig20, ROOT, "a/**/b", "a/x/y/b");
|
||||
ignored!(ig21, ROOT, r"\!xy", "!xy");
|
||||
ignored!(ig22, ROOT, r"\#foo", "#foo");
|
||||
ignored!(ig23, ROOT, "foo", "./foo");
|
||||
ignored!(ig24, ROOT, "target", "grep/target");
|
||||
ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
|
||||
ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz");
|
||||
ignored!(ig27, ROOT, "foo/", "xyz/foo", true);
|
||||
ignored!(ig28, ROOT, "src/*.rs", "src/grep/src/main.rs");
|
||||
ignored!(ig29, "./src", "/llvm/", "./src/llvm", true);
|
||||
ignored!(ig30, ROOT, "node_modules/ ", "node_modules", true);
|
||||
|
||||
not_ignored!(ignot1, ROOT, "amonths", "months");
|
||||
not_ignored!(ignot2, ROOT, "monthsa", "months");
|
||||
not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs");
|
||||
not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c");
|
||||
not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs");
|
||||
not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs");
|
||||
not_ignored!(ignot7, ROOT, "foo/", "foo", false);
|
||||
not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz");
|
||||
not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz");
|
||||
not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar");
|
||||
not_ignored!(ignot11, ROOT, "#foo", "#foo");
|
||||
not_ignored!(ignot12, ROOT, "\n\n\n", "foo");
|
||||
not_ignored!(ignot13, ROOT, "foo/**", "foo", true);
|
||||
not_ignored!(
|
||||
ignot14, "./third_party/protobuf", "m4/ltoptions.m4",
|
||||
"./third_party/protobuf/csharp/src/packages/repositories.config");
|
||||
|
||||
fn bytes(s: &str) -> Vec<u8> {
|
||||
s.to_string().into_bytes()
|
||||
}
|
||||
|
||||
fn path_string<P: AsRef<Path>>(path: P) -> String {
|
||||
path.as_ref().to_str().unwrap().to_string()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_excludes_file1() {
|
||||
let data = bytes("[core]\nexcludesFile = /foo/bar");
|
||||
let got = super::parse_excludes_file(&data).unwrap();
|
||||
assert_eq!(path_string(got), "/foo/bar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_excludes_file2() {
|
||||
let data = bytes("[core]\nexcludesFile = ~/foo/bar");
|
||||
let got = super::parse_excludes_file(&data).unwrap();
|
||||
assert_eq!(path_string(got), super::expand_tilde("~/foo/bar"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_excludes_file3() {
|
||||
let data = bytes("[core]\nexcludeFile = /foo/bar");
|
||||
assert!(super::parse_excludes_file(&data).is_none());
|
||||
}
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/106
|
||||
#[test]
|
||||
fn regression_106() {
|
||||
gi_from_str("/", " ");
|
||||
}
|
||||
}
|
300
ignore/src/lib.rs
Normal file
300
ignore/src/lib.rs
Normal file
@ -0,0 +1,300 @@
|
||||
/*!
|
||||
The ignore crate provides a fast recursive directory iterator that respects
|
||||
various filters such as globs, file types and `.gitignore` files. The precise
|
||||
matching rules and precedence is explained in the documentation for
|
||||
`WalkBuilder`.
|
||||
|
||||
Secondarily, this crate exposes gitignore and file type matchers for use cases
|
||||
that demand more fine-grained control.
|
||||
|
||||
# Example
|
||||
|
||||
This example shows the most basic usage of this crate. This code will
|
||||
recursively traverse the current directory while automatically filtering out
|
||||
files and directories according to ignore globs found in files like
|
||||
`.ignore` and `.gitignore`:
|
||||
|
||||
|
||||
```rust,no_run
|
||||
use ignore::Walk;
|
||||
|
||||
for result in Walk::new("./") {
|
||||
// Each item yielded by the iterator is either a directory entry or an
|
||||
// error, so either print the path or the error.
|
||||
match result {
|
||||
Ok(entry) => println!("{}", entry.path().display()),
|
||||
Err(err) => println!("ERROR: {}", err),
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
# Example: advanced
|
||||
|
||||
By default, the recursive directory iterator will ignore hidden files and
|
||||
directories. This can be disabled by building the iterator with `WalkBuilder`:
|
||||
|
||||
```rust,no_run
|
||||
use ignore::WalkBuilder;
|
||||
|
||||
for result in WalkBuilder::new("./").hidden(false).build() {
|
||||
println!("{:?}", result);
|
||||
}
|
||||
```
|
||||
|
||||
See the documentation for `WalkBuilder` for many other options.
|
||||
*/
|
||||
|
||||
extern crate globset;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate regex;
|
||||
#[cfg(test)]
|
||||
extern crate tempdir;
|
||||
extern crate thread_local;
|
||||
extern crate walkdir;
|
||||
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
pub use walk::{DirEntry, Walk, WalkBuilder};
|
||||
|
||||
mod dir;
|
||||
pub mod gitignore;
|
||||
mod pathutil;
|
||||
pub mod overrides;
|
||||
pub mod types;
|
||||
mod walk;
|
||||
|
||||
/// Represents an error that can occur when parsing a gitignore file.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// A collection of "soft" errors. These occur when adding an ignore
|
||||
/// file partially succeeded.
|
||||
Partial(Vec<Error>),
|
||||
/// An error associated with a specific line number.
|
||||
WithLineNumber { line: u64, err: Box<Error> },
|
||||
/// An error associated with a particular file path.
|
||||
WithPath { path: PathBuf, err: Box<Error> },
|
||||
/// An error that occurs when doing I/O, such as reading an ignore file.
|
||||
Io(io::Error),
|
||||
/// An error that occurs when trying to parse a glob.
|
||||
Glob(String),
|
||||
/// A type selection for a file type that is not defined.
|
||||
UnrecognizedFileType(String),
|
||||
/// A user specified file type definition could not be parsed.
|
||||
InvalidDefinition,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
/// Returns true if this is a partial error.
|
||||
///
|
||||
/// A partial error occurs when only some operations failed while others
|
||||
/// may have succeeded. For example, an ignore file may contain an invalid
|
||||
/// glob among otherwise valid globs.
|
||||
pub fn is_partial(&self) -> bool {
|
||||
match *self {
|
||||
Error::Partial(_) => true,
|
||||
Error::WithLineNumber { ref err, .. } => err.is_partial(),
|
||||
Error::WithPath { ref err, .. } => err.is_partial(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this error is exclusively an I/O error.
|
||||
pub fn is_io(&self) -> bool {
|
||||
match *self {
|
||||
Error::Partial(ref errs) => errs.len() == 1 && errs[0].is_io(),
|
||||
Error::WithLineNumber { ref err, .. } => err.is_io(),
|
||||
Error::WithPath { ref err, .. } => err.is_io(),
|
||||
Error::Io(_) => true,
|
||||
Error::Glob(_) => false,
|
||||
Error::UnrecognizedFileType(_) => false,
|
||||
Error::InvalidDefinition => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Turn an error into a tagged error with the given file path.
|
||||
fn with_path<P: AsRef<Path>>(self, path: P) -> Error {
|
||||
Error::WithPath {
|
||||
path: path.as_ref().to_path_buf(),
|
||||
err: Box::new(self),
|
||||
}
|
||||
}
|
||||
|
||||
/// Turn an error into a tagged error with the given file path and line
|
||||
/// number. If path is empty, then it is omitted from the error.
|
||||
fn tagged<P: AsRef<Path>>(self, path: P, lineno: u64) -> Error {
|
||||
let errline = Error::WithLineNumber {
|
||||
line: lineno,
|
||||
err: Box::new(self),
|
||||
};
|
||||
if path.as_ref().as_os_str().is_empty() {
|
||||
return errline;
|
||||
}
|
||||
errline.with_path(path)
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for Error {
|
||||
fn description(&self) -> &str {
|
||||
match *self {
|
||||
Error::Partial(_) => "partial error",
|
||||
Error::WithLineNumber { ref err, .. } => err.description(),
|
||||
Error::WithPath { ref err, .. } => err.description(),
|
||||
Error::Io(ref err) => err.description(),
|
||||
Error::Glob(ref msg) => msg,
|
||||
Error::UnrecognizedFileType(_) => "unrecognized file type",
|
||||
Error::InvalidDefinition => "invalid definition",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Error::Partial(ref errs) => {
|
||||
let msgs: Vec<String> =
|
||||
errs.iter().map(|err| err.to_string()).collect();
|
||||
write!(f, "{}", msgs.join("\n"))
|
||||
}
|
||||
Error::WithLineNumber { line, ref err } => {
|
||||
write!(f, "line {}: {}", line, err)
|
||||
}
|
||||
Error::WithPath { ref path, ref err } => {
|
||||
write!(f, "{}: {}", path.display(), err)
|
||||
}
|
||||
Error::Io(ref err) => err.fmt(f),
|
||||
Error::Glob(ref msg) => write!(f, "{}", msg),
|
||||
Error::UnrecognizedFileType(ref ty) => {
|
||||
write!(f, "unrecognized file type: {}", ty)
|
||||
}
|
||||
Error::InvalidDefinition => {
|
||||
write!(f, "invalid definition (format is type:glob, e.g., \
|
||||
html:*.html)")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
fn from(err: io::Error) -> Error {
|
||||
Error::Io(err)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct PartialErrorBuilder(Vec<Error>);
|
||||
|
||||
impl PartialErrorBuilder {
|
||||
fn push(&mut self, err: Error) {
|
||||
self.0.push(err);
|
||||
}
|
||||
|
||||
fn push_ignore_io(&mut self, err: Error) {
|
||||
if !err.is_io() {
|
||||
self.push(err);
|
||||
}
|
||||
}
|
||||
|
||||
fn maybe_push(&mut self, err: Option<Error>) {
|
||||
if let Some(err) = err {
|
||||
self.push(err);
|
||||
}
|
||||
}
|
||||
|
||||
fn maybe_push_ignore_io(&mut self, err: Option<Error>) {
|
||||
if let Some(err) = err {
|
||||
self.push_ignore_io(err);
|
||||
}
|
||||
}
|
||||
|
||||
fn into_error_option(mut self) -> Option<Error> {
|
||||
if self.0.is_empty() {
|
||||
None
|
||||
} else if self.0.len() == 1 {
|
||||
Some(self.0.pop().unwrap())
|
||||
} else {
|
||||
Some(Error::Partial(self.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The result of a glob match.
|
||||
///
|
||||
/// The type parameter `T` typically refers to a type that provides more
|
||||
/// information about a particular match. For example, it might identify
|
||||
/// the specific gitignore file and the specific glob pattern that caused
|
||||
/// the match.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Match<T> {
|
||||
/// The path didn't match any glob.
|
||||
None,
|
||||
/// The highest precedent glob matched indicates the path should be
|
||||
/// ignored.
|
||||
Ignore(T),
|
||||
/// The highest precedent glob matched indicates the path should be
|
||||
/// whitelisted.
|
||||
Whitelist(T),
|
||||
}
|
||||
|
||||
impl<T> Match<T> {
|
||||
/// Returns true if the match result didn't match any globs.
|
||||
pub fn is_none(&self) -> bool {
|
||||
match *self {
|
||||
Match::None => true,
|
||||
Match::Ignore(_) | Match::Whitelist(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the match result implies the path should be ignored.
|
||||
pub fn is_ignore(&self) -> bool {
|
||||
match *self {
|
||||
Match::Ignore(_) => true,
|
||||
Match::None | Match::Whitelist(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the match result implies the path should be
|
||||
/// whitelisted.
|
||||
pub fn is_whitelist(&self) -> bool {
|
||||
match *self {
|
||||
Match::Whitelist(_) => true,
|
||||
Match::None | Match::Ignore(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Inverts the match so that `Ignore` becomes `Whitelist` and
|
||||
/// `Whitelist` becomes `Ignore`. A non-match remains the same.
|
||||
pub fn invert(self) -> Match<T> {
|
||||
match self {
|
||||
Match::None => Match::None,
|
||||
Match::Ignore(t) => Match::Whitelist(t),
|
||||
Match::Whitelist(t) => Match::Ignore(t),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the value inside this match if it exists.
|
||||
pub fn inner(&self) -> Option<&T> {
|
||||
match *self {
|
||||
Match::None => None,
|
||||
Match::Ignore(ref t) => Some(t),
|
||||
Match::Whitelist(ref t) => Some(t),
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply the given function to the value inside this match.
|
||||
///
|
||||
/// If the match has no value, then return the match unchanged.
|
||||
pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Match<U> {
|
||||
match self {
|
||||
Match::None => Match::None,
|
||||
Match::Ignore(t) => Match::Ignore(f(t)),
|
||||
Match::Whitelist(t) => Match::Whitelist(f(t)),
|
||||
}
|
||||
}
|
||||
}
|
202
ignore/src/overrides.rs
Normal file
202
ignore/src/overrides.rs
Normal file
@ -0,0 +1,202 @@
|
||||
/*!
|
||||
The overrides module provides a way to specify a set of override globs.
|
||||
This provides functionality similar to `--include` or `--exclude` in command
|
||||
line tools.
|
||||
*/
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use {Error, Match};
|
||||
|
||||
/// Glob represents a single glob in an override matcher.
|
||||
///
|
||||
/// This is used to report information about the highest precedent glob
|
||||
/// that matched.
|
||||
///
|
||||
/// Note that not all matches necessarily correspond to a specific glob. For
|
||||
/// example, if there are one or more whitelist globs and a file path doesn't
|
||||
/// match any glob in the set, then the file path is considered to be ignored.
|
||||
///
|
||||
/// The lifetime `'a` refers to the lifetime of the matcher that produced
|
||||
/// this glob.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Glob<'a>(GlobInner<'a>);
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum GlobInner<'a> {
|
||||
/// No glob matched, but the file path should still be ignored.
|
||||
UnmatchedIgnore,
|
||||
/// A glob matched.
|
||||
Matched(&'a gitignore::Glob),
|
||||
}
|
||||
|
||||
impl<'a> Glob<'a> {
|
||||
fn unmatched() -> Glob<'a> {
|
||||
Glob(GlobInner::UnmatchedIgnore)
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages a set of overrides provided explicitly by the end user.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Override(Gitignore);
|
||||
|
||||
impl Override {
|
||||
/// Returns an empty matcher that never matches any file path.
|
||||
pub fn empty() -> Override {
|
||||
Override(Gitignore::empty())
|
||||
}
|
||||
|
||||
/// Returns the directory of this override set.
|
||||
///
|
||||
/// All matches are done relative to this path.
|
||||
pub fn path(&self) -> &Path {
|
||||
self.0.path()
|
||||
}
|
||||
|
||||
/// Returns true if and only if this matcher is empty.
|
||||
///
|
||||
/// When a matcher is empty, it will never match any file path.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the total number of ignore globs.
|
||||
pub fn num_ignores(&self) -> u64 {
|
||||
self.0.num_whitelists()
|
||||
}
|
||||
|
||||
/// Returns the total number of whitelisted globs.
|
||||
pub fn num_whitelists(&self) -> u64 {
|
||||
self.0.num_ignores()
|
||||
}
|
||||
|
||||
/// Returns whether the given file path matched a pattern in this override
|
||||
/// matcher.
|
||||
///
|
||||
/// `is_dir` should be true if the path refers to a directory and false
|
||||
/// otherwise.
|
||||
///
|
||||
/// If there are no overrides, then this always returns `Match::None`.
|
||||
///
|
||||
/// If there is at least one whitelist override, then this never returns
|
||||
/// `Match::None`, since non-matches are interpreted as ignored.
|
||||
///
|
||||
/// The given path is matched to the globs relative to the path given
|
||||
/// when building the override matcher. Specifically, before matching
|
||||
/// `path`, its prefix (as determined by a common suffix of the directory
|
||||
/// given) is stripped. If there is no common suffix/prefix overlap, then
|
||||
/// `path` is assumed to reside in the same directory as the root path for
|
||||
/// this set of overrides.
|
||||
pub fn matched<'a, P: AsRef<Path>>(
|
||||
&'a self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<Glob<'a>> {
|
||||
if self.is_empty() {
|
||||
return Match::None;
|
||||
}
|
||||
let mat = self.0.matched(path, is_dir).invert();
|
||||
if mat.is_none() && self.num_whitelists() > 0 {
|
||||
return Match::Ignore(Glob::unmatched());
|
||||
}
|
||||
mat.map(move |giglob| Glob(GlobInner::Matched(giglob)))
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a matcher for a set of glob overrides.
|
||||
pub struct OverrideBuilder {
|
||||
builder: GitignoreBuilder,
|
||||
}
|
||||
|
||||
impl OverrideBuilder {
|
||||
/// Create a new override builder.
|
||||
///
|
||||
/// Matching is done relative to the directory path provided.
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> OverrideBuilder {
|
||||
OverrideBuilder {
|
||||
builder: GitignoreBuilder::new(path),
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a new override matcher from the globs added so far.
|
||||
///
|
||||
/// Once a matcher is built, no new globs can be added to it.
|
||||
pub fn build(&self) -> Result<Override, Error> {
|
||||
Ok(Override(try!(self.builder.build())))
|
||||
}
|
||||
|
||||
/// Add a glob to the set of overrides.
|
||||
///
|
||||
/// Globs provided here have precisely the same semantics as a single
|
||||
/// line in a `gitignore` file, where the meaning of `!` is inverted:
|
||||
/// namely, `!` at the beginning of a glob will ignore a file. Without `!`,
|
||||
/// all matches of the glob provided are treated as whitelist matches.
|
||||
pub fn add(&mut self, glob: &str) -> Result<&mut OverrideBuilder, Error> {
|
||||
try!(self.builder.add_line(None, glob));
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{Override, OverrideBuilder};
|
||||
|
||||
const ROOT: &'static str = "/home/andrew/foo";
|
||||
|
||||
fn ov(globs: &[&str]) -> Override {
|
||||
let mut builder = OverrideBuilder::new(ROOT);
|
||||
for glob in globs {
|
||||
builder.add(glob).unwrap();
|
||||
}
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
let ov = ov(&[]);
|
||||
assert!(ov.matched("a.foo", false).is_none());
|
||||
assert!(ov.matched("a", false).is_none());
|
||||
assert!(ov.matched("", false).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple() {
|
||||
let ov = ov(&["*.foo", "!*.bar"]);
|
||||
assert!(ov.matched("a.foo", false).is_whitelist());
|
||||
assert!(ov.matched("a.foo", true).is_whitelist());
|
||||
assert!(ov.matched("a.rs", false).is_ignore());
|
||||
assert!(ov.matched("a.rs", true).is_ignore());
|
||||
assert!(ov.matched("a.bar", false).is_ignore());
|
||||
assert!(ov.matched("a.bar", true).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn only_ignores() {
|
||||
let ov = ov(&["!*.bar"]);
|
||||
assert!(ov.matched("a.rs", false).is_none());
|
||||
assert!(ov.matched("a.rs", true).is_none());
|
||||
assert!(ov.matched("a.bar", false).is_ignore());
|
||||
assert!(ov.matched("a.bar", true).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn precedence() {
|
||||
let ov = ov(&["*.foo", "!*.bar.foo"]);
|
||||
assert!(ov.matched("a.foo", false).is_whitelist());
|
||||
assert!(ov.matched("a.baz", false).is_ignore());
|
||||
assert!(ov.matched("a.bar.foo", false).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gitignore() {
|
||||
let ov = ov(&["/foo", "bar/*.rs", "baz/**"]);
|
||||
assert!(ov.matched("bar/wat/lib.rs", false).is_ignore());
|
||||
assert!(ov.matched("wat/bar/lib.rs", false).is_whitelist());
|
||||
assert!(ov.matched("foo", false).is_whitelist());
|
||||
assert!(ov.matched("wat/foo", false).is_ignore());
|
||||
assert!(ov.matched("baz", false).is_ignore());
|
||||
assert!(ov.matched("baz/a", false).is_whitelist());
|
||||
assert!(ov.matched("baz/a/b", false).is_whitelist());
|
||||
}
|
||||
}
|
108
ignore/src/pathutil.rs
Normal file
108
ignore/src/pathutil.rs
Normal file
@ -0,0 +1,108 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
#[cfg(unix)]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
if let Some(name) = file_name(path.as_ref()) {
|
||||
name.as_bytes().get(0) == Some(&b'.')
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
#[cfg(not(unix))]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
if let Some(name) = file_name(path.as_ref()) {
|
||||
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip `prefix` from the `path` and return the remainder.
|
||||
///
|
||||
/// If `path` doesn't have a prefix `prefix`, then return `None`.
|
||||
#[cfg(unix)]
|
||||
pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
|
||||
prefix: &'a P,
|
||||
path: &'a Path,
|
||||
) -> Option<&'a Path> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
let prefix = prefix.as_ref().as_os_str().as_bytes();
|
||||
let path = path.as_os_str().as_bytes();
|
||||
if prefix.len() > path.len() || prefix != &path[0..prefix.len()] {
|
||||
None
|
||||
} else {
|
||||
Some(&Path::new(OsStr::from_bytes(&path[prefix.len()..])))
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip `prefix` from the `path` and return the remainder.
|
||||
///
|
||||
/// If `path` doesn't have a prefix `prefix`, then return `None`.
|
||||
#[cfg(not(unix))]
|
||||
pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
|
||||
prefix: &'a P,
|
||||
path: &'a Path,
|
||||
) -> Option<&'a Path> {
|
||||
path.strip_prefix(prefix).ok()
|
||||
}
|
||||
|
||||
/// Returns true if this file path is just a file name. i.e., Its parent is
|
||||
/// the empty string.
|
||||
#[cfg(unix)]
|
||||
pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use memchr::memchr;
|
||||
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
memchr(b'/', path).is_none()
|
||||
}
|
||||
|
||||
/// Returns true if this file path is just a file name. i.e., Its parent is
|
||||
/// the empty string.
|
||||
#[cfg(not(unix))]
|
||||
pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool {
|
||||
path.as_ref().parent().map(|p| p.as_os_str().is_empty()).unwrap_or(false)
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(unix)]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use memchr::memrchr;
|
||||
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
if path.is_empty() {
|
||||
return None;
|
||||
} else if path.len() == 1 && path[0] == b'.' {
|
||||
return None;
|
||||
} else if path.last() == Some(&b'.') {
|
||||
return None;
|
||||
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
|
||||
return None;
|
||||
}
|
||||
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
|
||||
Some(OsStr::from_bytes(&path[last_slash..]))
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(not(unix))]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
path.as_ref().file_name()
|
||||
}
|
568
ignore/src/types.rs
Normal file
568
ignore/src/types.rs
Normal file
@ -0,0 +1,568 @@
|
||||
/*!
|
||||
The types module provides a way of associating globs on file names to file
|
||||
types.
|
||||
|
||||
This can be used to match specific types of files. For example, among
|
||||
the default file types provided, the Rust file type is defined to be `*.rs`
|
||||
with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with
|
||||
name `c`.
|
||||
|
||||
Note that the set of default types may change over time.
|
||||
|
||||
# Example
|
||||
|
||||
This shows how to create and use a simple file type matcher using the default
|
||||
file types defined in this crate.
|
||||
|
||||
```
|
||||
use ignore::types::TypesBuilder;
|
||||
|
||||
let mut builder = TypesBuilder::new();
|
||||
builder.add_defaults();
|
||||
builder.select("rust");
|
||||
let matcher = builder.build().unwrap();
|
||||
|
||||
assert!(matcher.matched("foo.rs", false).is_whitelist());
|
||||
assert!(matcher.matched("foo.c", false).is_ignore());
|
||||
```
|
||||
|
||||
# Example: negation
|
||||
|
||||
This is like the previous example, but shows how negating a file type works.
|
||||
That is, this will let us match file paths that *don't* correspond to a
|
||||
particular file type.
|
||||
|
||||
```
|
||||
use ignore::types::TypesBuilder;
|
||||
|
||||
let mut builder = TypesBuilder::new();
|
||||
builder.add_defaults();
|
||||
builder.negate("c");
|
||||
let matcher = builder.build().unwrap();
|
||||
|
||||
assert!(matcher.matched("foo.rs", false).is_none());
|
||||
assert!(matcher.matched("foo.c", false).is_ignore());
|
||||
```
|
||||
|
||||
# Example: custom file type definitions
|
||||
|
||||
This shows how to extend this library default file type definitions with
|
||||
your own.
|
||||
|
||||
```
|
||||
use ignore::types::TypesBuilder;
|
||||
|
||||
let mut builder = TypesBuilder::new();
|
||||
builder.add_defaults();
|
||||
builder.add("foo", "*.foo");
|
||||
// Another way of adding a file type definition.
|
||||
// This is useful when accepting input from an end user.
|
||||
builder.add_def("bar:*.bar");
|
||||
// Note: we only select `foo`, not `bar`.
|
||||
builder.select("foo");
|
||||
let matcher = builder.build().unwrap();
|
||||
|
||||
assert!(matcher.matched("x.foo", false).is_whitelist());
|
||||
// This is ignored because we only selected the `foo` file type.
|
||||
assert!(matcher.matched("x.bar", false).is_ignore());
|
||||
```
|
||||
*/
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
|
||||
use thread_local::ThreadLocal;
|
||||
|
||||
use pathutil::file_name;
|
||||
use {Error, Match};
|
||||
|
||||
const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("asm", &["*.asm", "*.s", "*.S"]),
|
||||
("awk", &["*.awk"]),
|
||||
("c", &["*.c", "*.h", "*.H"]),
|
||||
("cbor", &["*.cbor"]),
|
||||
("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
|
||||
("cmake", &["*.cmake", "CMakeLists.txt"]),
|
||||
("coffeescript", &["*.coffee"]),
|
||||
("config", &["*.config"]),
|
||||
("cpp", &[
|
||||
"*.C", "*.cc", "*.cpp", "*.cxx",
|
||||
"*.h", "*.H", "*.hh", "*.hpp",
|
||||
]),
|
||||
("csharp", &["*.cs"]),
|
||||
("css", &["*.css"]),
|
||||
("cython", &["*.pyx"]),
|
||||
("dart", &["*.dart"]),
|
||||
("d", &["*.d"]),
|
||||
("elisp", &["*.el"]),
|
||||
("erlang", &["*.erl", "*.hrl"]),
|
||||
("fortran", &[
|
||||
"*.f", "*.F", "*.f77", "*.F77", "*.pfo",
|
||||
"*.f90", "*.F90", "*.f95", "*.F95",
|
||||
]),
|
||||
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
|
||||
("go", &["*.go"]),
|
||||
("groovy", &["*.groovy", "*.gradle"]),
|
||||
("hbs", &["*.hbs"]),
|
||||
("haskell", &["*.hs", "*.lhs"]),
|
||||
("html", &["*.htm", "*.html"]),
|
||||
("java", &["*.java"]),
|
||||
("jinja", &["*.jinja", "*.jinja2"]),
|
||||
("js", &[
|
||||
"*.js", "*.jsx", "*.vue",
|
||||
]),
|
||||
("json", &["*.json"]),
|
||||
("jsonl", &["*.jsonl"]),
|
||||
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
||||
("lua", &["*.lua"]),
|
||||
("m4", &["*.ac", "*.m4"]),
|
||||
("make", &["gnumakefile", "Gnumakefile", "makefile", "Makefile", "*.mk"]),
|
||||
("markdown", &["*.md"]),
|
||||
("md", &["*.md"]),
|
||||
("matlab", &["*.m"]),
|
||||
("mk", &["mkfile"]),
|
||||
("ml", &["*.ml"]),
|
||||
("nim", &["*.nim"]),
|
||||
("objc", &["*.h", "*.m"]),
|
||||
("objcpp", &["*.h", "*.mm"]),
|
||||
("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
|
||||
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]),
|
||||
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
|
||||
("py", &["*.py"]),
|
||||
("readme", &["README*", "*README"]),
|
||||
("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
|
||||
("rst", &["*.rst"]),
|
||||
("ruby", &["*.rb"]),
|
||||
("rust", &["*.rs"]),
|
||||
("scala", &["*.scala"]),
|
||||
("sh", &["*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh"]),
|
||||
("spark", &["*.spark"]),
|
||||
("sql", &["*.sql"]),
|
||||
("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
|
||||
("swift", &["*.swift"]),
|
||||
("tcl", &["*.tcl"]),
|
||||
("tex", &["*.tex", "*.cls", "*.sty"]),
|
||||
("ts", &["*.ts", "*.tsx"]),
|
||||
("txt", &["*.txt"]),
|
||||
("toml", &["*.toml", "Cargo.lock"]),
|
||||
("vala", &["*.vala"]),
|
||||
("vb", &["*.vb"]),
|
||||
("vimscript", &["*.vim"]),
|
||||
("xml", &["*.xml"]),
|
||||
("yacc", &["*.y"]),
|
||||
("yaml", &["*.yaml", "*.yml"]),
|
||||
];
|
||||
|
||||
/// Glob represents a single glob in a set of file type definitions.
|
||||
///
|
||||
/// There may be more than one glob for a particular file type.
|
||||
///
|
||||
/// This is used to report information about the highest precedent glob
|
||||
/// that matched.
|
||||
///
|
||||
/// Note that not all matches necessarily correspond to a specific glob.
|
||||
/// For example, if there are one or more selections and a file path doesn't
|
||||
/// match any of those selections, then the file path is considered to be
|
||||
/// ignored.
|
||||
///
|
||||
/// The lifetime `'a` refers to the lifetime of the underlying file type
|
||||
/// definition, which corresponds to the lifetime of the file type matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Glob<'a>(GlobInner<'a>);
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum GlobInner<'a> {
|
||||
/// No glob matched, but the file path should still be ignored.
|
||||
UnmatchedIgnore,
|
||||
/// A glob matched.
|
||||
Matched {
|
||||
/// The file type definition which provided the glob.
|
||||
def: &'a FileTypeDef,
|
||||
/// The index of the glob that matched inside the file type definition.
|
||||
which: usize,
|
||||
/// Whether the selection was negated or not.
|
||||
negated: bool,
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Glob<'a> {
|
||||
fn unmatched() -> Glob<'a> {
|
||||
Glob(GlobInner::UnmatchedIgnore)
|
||||
}
|
||||
}
|
||||
|
||||
/// A single file type definition.
|
||||
///
|
||||
/// File type definitions can be retrieved in aggregate from a file type
|
||||
/// matcher. File type definitions are also reported when its responsible
|
||||
/// for a match.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FileTypeDef {
|
||||
name: String,
|
||||
globs: Vec<String>,
|
||||
}
|
||||
|
||||
impl FileTypeDef {
|
||||
/// Return the name of this file type.
|
||||
pub fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
|
||||
/// Return the globs used to recognize this file type.
|
||||
pub fn globs(&self) -> &[String] {
|
||||
&self.globs
|
||||
}
|
||||
}
|
||||
|
||||
/// Types is a file type matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Types {
|
||||
/// All of the file type definitions, sorted lexicographically by name.
|
||||
defs: Vec<FileTypeDef>,
|
||||
/// All of the selections made by the user.
|
||||
selections: Vec<Selection<FileTypeDef>>,
|
||||
/// Whether there is at least one Selection::Select in our selections.
|
||||
/// When this is true, a Match::None is converted to Match::Ignore.
|
||||
has_selected: bool,
|
||||
/// A mapping from glob index in the set to two indices. The first is an
|
||||
/// index into `selections` and the second is an index into the
|
||||
/// corresponding file type definition's list of globs.
|
||||
glob_to_selection: Vec<(usize, usize)>,
|
||||
/// The set of all glob selections, used for actual matching.
|
||||
set: GlobSet,
|
||||
/// Temporary storage for globs that match.
|
||||
matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
|
||||
}
|
||||
|
||||
/// Indicates the type of a selection for a particular file type.
|
||||
#[derive(Clone, Debug)]
|
||||
enum Selection<T> {
|
||||
Select(String, T),
|
||||
Negate(String, T),
|
||||
}
|
||||
|
||||
impl<T> Selection<T> {
|
||||
fn is_negated(&self) -> bool {
|
||||
match *self {
|
||||
Selection::Select(..) => false,
|
||||
Selection::Negate(..) => true,
|
||||
}
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
match *self {
|
||||
Selection::Select(ref name, _) => name,
|
||||
Selection::Negate(ref name, _) => name,
|
||||
}
|
||||
}
|
||||
|
||||
fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Selection<U> {
|
||||
match self {
|
||||
Selection::Select(name, inner) => {
|
||||
Selection::Select(name, f(inner))
|
||||
}
|
||||
Selection::Negate(name, inner) => {
|
||||
Selection::Negate(name, f(inner))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn inner(&self) -> &T {
|
||||
match *self {
|
||||
Selection::Select(_, ref inner) => inner,
|
||||
Selection::Negate(_, ref inner) => inner,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Types {
|
||||
/// Creates a new file type matcher that never matches any path and
|
||||
/// contains no file type definitions.
|
||||
pub fn empty() -> Types {
|
||||
Types {
|
||||
defs: vec![],
|
||||
selections: vec![],
|
||||
has_selected: false,
|
||||
glob_to_selection: vec![],
|
||||
set: GlobSetBuilder::new().build().unwrap(),
|
||||
matches: Arc::new(ThreadLocal::default()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if this matcher has zero selections.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.selections.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the number of selections used in this matcher.
|
||||
pub fn len(&self) -> usize {
|
||||
self.selections.len()
|
||||
}
|
||||
|
||||
/// Return the set of current file type definitions.
|
||||
///
|
||||
/// Definitions and globs are sorted.
|
||||
pub fn definitions(&self) -> &[FileTypeDef] {
|
||||
&self.defs
|
||||
}
|
||||
|
||||
/// Returns a match for the given path against this file type matcher.
|
||||
///
|
||||
/// The path is considered whitelisted if it matches a selected file type.
|
||||
/// The path is considered ignored if it matches a negated file type.
|
||||
/// If at least one file type is selected and `path` doesn't match, then
|
||||
/// the path is also considered ignored.
|
||||
pub fn matched<'a, P: AsRef<Path>>(
|
||||
&'a self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<Glob<'a>> {
|
||||
// File types don't apply to directories, and we can't do anything
|
||||
// if our glob set is empty.
|
||||
if is_dir || self.set.is_empty() {
|
||||
return Match::None;
|
||||
}
|
||||
// We only want to match against the file name, so extract it.
|
||||
// If one doesn't exist, then we can't match it.
|
||||
let name = match file_name(path.as_ref()) {
|
||||
Some(name) => name,
|
||||
None if self.has_selected => {
|
||||
return Match::Ignore(Glob::unmatched());
|
||||
}
|
||||
None => {
|
||||
return Match::None;
|
||||
}
|
||||
};
|
||||
let mut matches = self.matches.get_default().borrow_mut();
|
||||
self.set.matches_into(name, &mut *matches);
|
||||
// The highest precedent match is the last one.
|
||||
if let Some(&i) = matches.last() {
|
||||
let (isel, iglob) = self.glob_to_selection[i];
|
||||
let sel = &self.selections[isel];
|
||||
let glob = Glob(GlobInner::Matched {
|
||||
def: sel.inner(),
|
||||
which: iglob,
|
||||
negated: sel.is_negated(),
|
||||
});
|
||||
return if sel.is_negated() {
|
||||
Match::Ignore(glob)
|
||||
} else {
|
||||
Match::Whitelist(glob)
|
||||
};
|
||||
}
|
||||
if self.has_selected {
|
||||
Match::Ignore(Glob::unmatched())
|
||||
} else {
|
||||
Match::None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// TypesBuilder builds a type matcher from a set of file type definitions and
|
||||
/// a set of file type selections.
|
||||
pub struct TypesBuilder {
|
||||
types: HashMap<String, FileTypeDef>,
|
||||
selections: Vec<Selection<()>>,
|
||||
}
|
||||
|
||||
impl TypesBuilder {
|
||||
/// Create a new builder for a file type matcher.
|
||||
///
|
||||
/// The builder contains *no* type definitions to start with. A set
|
||||
/// of default type definitions can be added with `add_defaults`, and
|
||||
/// additional type definitions can be added with `select` and `negate`.
|
||||
pub fn new() -> TypesBuilder {
|
||||
TypesBuilder {
|
||||
types: HashMap::new(),
|
||||
selections: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the current set of file type definitions *and* selections into
|
||||
/// a file type matcher.
|
||||
pub fn build(&self) -> Result<Types, Error> {
|
||||
let defs = self.definitions();
|
||||
let has_selected = self.selections.iter().any(|s| !s.is_negated());
|
||||
|
||||
let mut selections = vec![];
|
||||
let mut glob_to_selection = vec![];
|
||||
let mut build_set = GlobSetBuilder::new();
|
||||
for (isel, selection) in self.selections.iter().enumerate() {
|
||||
let def = match self.types.get(selection.name()) {
|
||||
Some(def) => def.clone(),
|
||||
None => {
|
||||
let name = selection.name().to_string();
|
||||
return Err(Error::UnrecognizedFileType(name));
|
||||
}
|
||||
};
|
||||
for (iglob, glob) in def.globs.iter().enumerate() {
|
||||
build_set.add(try!(
|
||||
GlobBuilder::new(glob)
|
||||
.literal_separator(true)
|
||||
.build()
|
||||
.map_err(|err| Error::Glob(err.to_string()))));
|
||||
glob_to_selection.push((isel, iglob));
|
||||
}
|
||||
selections.push(selection.clone().map(move |_| def));
|
||||
}
|
||||
let set = try!(build_set.build().map_err(|err| {
|
||||
Error::Glob(err.to_string())
|
||||
}));
|
||||
Ok(Types {
|
||||
defs: defs,
|
||||
selections: selections,
|
||||
has_selected: has_selected,
|
||||
glob_to_selection: glob_to_selection,
|
||||
set: set,
|
||||
matches: Arc::new(ThreadLocal::default()),
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the set of current file type definitions.
|
||||
///
|
||||
/// Definitions and globs are sorted.
|
||||
pub fn definitions(&self) -> Vec<FileTypeDef> {
|
||||
let mut defs = vec![];
|
||||
for def in self.types.values() {
|
||||
let mut def = def.clone();
|
||||
def.globs.sort();
|
||||
defs.push(def);
|
||||
}
|
||||
defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
|
||||
defs
|
||||
}
|
||||
|
||||
/// Select the file type given by `name`.
|
||||
///
|
||||
/// If `name` is `all`, then all file types currently defined are selected.
|
||||
pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
if name == "all" {
|
||||
for name in self.types.keys() {
|
||||
self.selections.push(Selection::Select(name.to_string(), ()));
|
||||
}
|
||||
} else {
|
||||
self.selections.push(Selection::Select(name.to_string(), ()));
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Ignore the file type given by `name`.
|
||||
///
|
||||
/// If `name` is `all`, then all file types currently defined are negated.
|
||||
pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
if name == "all" {
|
||||
for name in self.types.keys() {
|
||||
self.selections.push(Selection::Negate(name.to_string(), ()));
|
||||
}
|
||||
} else {
|
||||
self.selections.push(Selection::Negate(name.to_string(), ()));
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Clear any file type definitions for the type name given.
|
||||
pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
self.types.remove(name);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a new file type definition. `name` can be arbitrary and `pat`
|
||||
/// should be a glob recognizing file paths belonging to the `name` type.
|
||||
///
|
||||
/// If `name` is `all` or otherwise contains a `:`, then an error is
|
||||
/// returned.
|
||||
pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> {
|
||||
if name == "all" || name.contains(':') {
|
||||
return Err(Error::InvalidDefinition);
|
||||
}
|
||||
let (key, glob) = (name.to_string(), glob.to_string());
|
||||
self.types.entry(key).or_insert_with(|| {
|
||||
FileTypeDef { name: name.to_string(), globs: vec![] }
|
||||
}).globs.push(glob);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a new file type definition specified in string form. The format
|
||||
/// is `name:glob`. Names may not include a colon.
|
||||
pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
|
||||
let name: String = def.chars().take_while(|&c| c != ':').collect();
|
||||
let pat: String = def.chars().skip(name.chars().count() + 1).collect();
|
||||
if name.is_empty() || pat.is_empty() {
|
||||
return Err(Error::InvalidDefinition);
|
||||
}
|
||||
self.add(&name, &pat)
|
||||
}
|
||||
|
||||
/// Add a set of default file type definitions.
|
||||
pub fn add_defaults(&mut self) -> &mut TypesBuilder {
|
||||
static MSG: &'static str = "adding a default type should never fail";
|
||||
for &(name, exts) in DEFAULT_TYPES {
|
||||
for ext in exts {
|
||||
self.add(name, ext).expect(MSG);
|
||||
}
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::TypesBuilder;
|
||||
|
||||
macro_rules! matched {
|
||||
($name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr) => {
|
||||
matched!($name, $types, $sel, $selnot, $path, true);
|
||||
};
|
||||
(not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr) => {
|
||||
matched!($name, $types, $sel, $selnot, $path, false);
|
||||
};
|
||||
($name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr, $matched:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut btypes = TypesBuilder::new();
|
||||
for tydef in $types {
|
||||
btypes.add_def(tydef).unwrap();
|
||||
}
|
||||
for sel in $sel {
|
||||
btypes.select(sel);
|
||||
}
|
||||
for selnot in $selnot {
|
||||
btypes.negate(selnot);
|
||||
}
|
||||
let types = btypes.build().unwrap();
|
||||
let mat = types.matched($path, false);
|
||||
assert_eq!($matched, !mat.is_ignore());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn types() -> Vec<&'static str> {
|
||||
vec![
|
||||
"html:*.html",
|
||||
"html:*.htm",
|
||||
"rust:*.rs",
|
||||
"js:*.js",
|
||||
"foo:*.{rs,foo}",
|
||||
]
|
||||
}
|
||||
|
||||
matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
|
||||
matched!(match2, types(), vec!["html"], vec![], "index.html");
|
||||
matched!(match3, types(), vec!["html"], vec![], "index.htm");
|
||||
matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
|
||||
matched!(match5, types(), vec![], vec![], "index.html");
|
||||
matched!(match6, types(), vec![], vec!["rust"], "index.html");
|
||||
matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo");
|
||||
|
||||
matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
|
||||
matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
|
||||
matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs");
|
||||
matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs");
|
||||
matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo");
|
||||
}
|
592
ignore/src/walk.rs
Normal file
592
ignore/src/walk.rs
Normal file
@ -0,0 +1,592 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::fs::{FileType, Metadata};
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::vec;
|
||||
|
||||
use walkdir::{self, WalkDir, WalkDirIterator};
|
||||
|
||||
use dir::{Ignore, IgnoreBuilder};
|
||||
use gitignore::GitignoreBuilder;
|
||||
use overrides::Override;
|
||||
use types::Types;
|
||||
use {Error, PartialErrorBuilder};
|
||||
|
||||
/// WalkBuilder builds a recursive directory iterator.
|
||||
///
|
||||
/// The builder supports a large number of configurable options. This includes
|
||||
/// specific glob overrides, file type matching, toggling whether hidden
|
||||
/// files are ignored or not, and of course, support for respecting gitignore
|
||||
/// files.
|
||||
///
|
||||
/// By default, all ignore files found are respected. This includes `.ignore`,
|
||||
/// `.gitignore`, `.git/info/exclude` and even your global gitignore
|
||||
/// globs, usually found in `$XDG_CONFIG_HOME/git/ignore`.
|
||||
///
|
||||
/// Some standard recursive directory options are also supported, such as
|
||||
/// limiting the recursive depth or whether to follow symbolic links (disabled
|
||||
/// by default).
|
||||
///
|
||||
/// # Ignore rules
|
||||
///
|
||||
/// There are many rules that influence whether a particular file or directory
|
||||
/// is skipped by this iterator. Those rules are documented here. Note that
|
||||
/// the rules assume a default configuration.
|
||||
///
|
||||
/// * First, glob overrides are checked. If a path matches a glob override,
|
||||
/// then matching stops. The path is then only skipped if the glob that matched
|
||||
/// the path is an ignore glob. (An override glob is a whitelist glob unless it
|
||||
/// starts with a `!`, in which case it is an ignore glob.)
|
||||
/// * Second, ignore files are checked. Ignore files currently only come from
|
||||
/// git ignore files (`.gitignore`, `.git/info/exclude` and the configured
|
||||
/// global gitignore file), plain `.ignore` files, which have the same format
|
||||
/// as gitignore files, or explicitly added ignore files. The precedence order
|
||||
/// is: `.ignore`, `.gitignore`, `.git/info/exclude`, global gitignore and
|
||||
/// finally explicitly added ignore files. Note that precedence between
|
||||
/// different types of ignore files is not impacted by the directory hierarchy;
|
||||
/// any `.ignore` file overrides all `.gitignore` files. Within each
|
||||
/// precedence level, more nested ignore files have a higher precedence over
|
||||
/// less nested ignore files.
|
||||
/// * Third, if the previous step yields an ignore match, than all matching
|
||||
/// is stopped and the path is skipped.. If it yields a whitelist match, then
|
||||
/// process continues. A whitelist match can be overridden by a later matcher.
|
||||
/// * Fourth, unless the path is a directory, the file type matcher is run on
|
||||
/// the path. As above, if it's an ignore match, then all matching is stopped
|
||||
/// and the path is skipped. If it's a whitelist match, then matching
|
||||
/// continues.
|
||||
/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
|
||||
/// path is skipped.
|
||||
/// * Sixth, if the path has made it this far then it is yielded in the
|
||||
/// iterator.
|
||||
pub struct WalkBuilder {
|
||||
paths: Vec<PathBuf>,
|
||||
ig_builder: IgnoreBuilder,
|
||||
parents: bool,
|
||||
max_depth: Option<usize>,
|
||||
follow_links: bool,
|
||||
}
|
||||
|
||||
impl WalkBuilder {
|
||||
/// Create a new builder for a recursive directory iterator for the
|
||||
/// directory given.
|
||||
///
|
||||
/// Note that if you want to traverse multiple different directories, it
|
||||
/// is better to call `add` on this builder than to create multiple
|
||||
/// `Walk` values.
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> WalkBuilder {
|
||||
WalkBuilder {
|
||||
paths: vec![path.as_ref().to_path_buf()],
|
||||
ig_builder: IgnoreBuilder::new(),
|
||||
parents: true,
|
||||
max_depth: None,
|
||||
follow_links: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a new `Walk` iterator.
|
||||
pub fn build(&self) -> Walk {
|
||||
let follow_links = self.follow_links;
|
||||
let max_depth = self.max_depth;
|
||||
let its = self.paths.iter().map(move |p| {
|
||||
if p == Path::new("-") {
|
||||
(p.to_path_buf(), None)
|
||||
} else {
|
||||
let mut wd = WalkDir::new(p);
|
||||
wd = wd.follow_links(follow_links || p.is_file());
|
||||
if let Some(max_depth) = max_depth {
|
||||
wd = wd.max_depth(max_depth);
|
||||
}
|
||||
(p.to_path_buf(), Some(WalkEventIter::from(wd)))
|
||||
}
|
||||
}).collect::<Vec<_>>().into_iter();
|
||||
let ig_root = self.ig_builder.build();
|
||||
Walk {
|
||||
its: its,
|
||||
it: None,
|
||||
ig_root: ig_root.clone(),
|
||||
ig: ig_root.clone(),
|
||||
parents: self.parents,
|
||||
}
|
||||
}
|
||||
|
||||
/// Add a file path to the iterator.
|
||||
///
|
||||
/// Each additional file path added is traversed recursively. This should
|
||||
/// be preferred over building multiple `Walk` iterators since this
|
||||
/// enables reusing resources across iteration.
|
||||
pub fn add<P: AsRef<Path>>(&mut self, path: P) -> &mut WalkBuilder {
|
||||
self.paths.push(path.as_ref().to_path_buf());
|
||||
self
|
||||
}
|
||||
|
||||
/// The maximum depth to recurse.
|
||||
///
|
||||
/// The default, `None`, imposes no depth restriction.
|
||||
pub fn max_depth(&mut self, depth: Option<usize>) -> &mut WalkBuilder {
|
||||
self.max_depth = depth;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to follow symbolic links or not.
|
||||
pub fn follow_links(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||
self.follow_links = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Add an ignore file to the matcher.
|
||||
///
|
||||
/// This has lower precedence than all other sources of ignore rules.
|
||||
///
|
||||
/// If there was a problem adding the ignore file, then an error is
|
||||
/// returned. Note that the error may indicate *partial* failure. For
|
||||
/// example, if an ignore file contains an invalid glob, all other globs
|
||||
/// are still applied.
|
||||
pub fn add_ignore<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
|
||||
let mut builder = GitignoreBuilder::new("");
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
errs.maybe_push_ignore_io(builder.add(path));
|
||||
match builder.build() {
|
||||
Ok(gi) => { self.ig_builder.add_ignore(gi); }
|
||||
Err(err) => { errs.push(err); }
|
||||
}
|
||||
errs.into_error_option()
|
||||
}
|
||||
|
||||
/// Add an override matcher.
|
||||
///
|
||||
/// By default, no override matcher is used.
|
||||
///
|
||||
/// This overrides any previous setting.
|
||||
pub fn overrides(&mut self, overrides: Override) -> &mut WalkBuilder {
|
||||
self.ig_builder.overrides(overrides);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a file type matcher.
|
||||
///
|
||||
/// By default, no file type matcher is used.
|
||||
///
|
||||
/// This overrides any previous setting.
|
||||
pub fn types(&mut self, types: Types) -> &mut WalkBuilder {
|
||||
self.ig_builder.types(types);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables ignoring hidden files.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn hidden(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||
self.ig_builder.hidden(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading ignore files from parent directories.
|
||||
///
|
||||
/// If this is enabled, then the parent directories of each file path given
|
||||
/// are traversed for ignore files (subject to the ignore settings on
|
||||
/// this builder). Note that file paths are canonicalized with respect to
|
||||
/// the current working directory in order to determine parent directories.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn parents(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||
self.parents = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading `.ignore` files.
|
||||
///
|
||||
/// `.ignore` files have the same semantics as `gitignore` files and are
|
||||
/// supported by search tools such as ripgrep and The Silver Searcher.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn ignore(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||
self.ig_builder.ignore(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading a global gitignore file, whose path is specified in
|
||||
/// git's `core.excludesFile` config option.
|
||||
///
|
||||
/// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
|
||||
/// does not exist or does not specify `core.excludesFile`, then
|
||||
/// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
|
||||
/// set or is empty, then `$HOME/.config/git/ignore` is used instead.
|
||||
pub fn git_global(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||
self.ig_builder.git_global(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading `.gitignore` files.
|
||||
///
|
||||
/// `.gitignore` files have match semantics as described in the `gitignore`
|
||||
/// man page.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn git_ignore(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||
self.ig_builder.git_ignore(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading `.git/info/exclude` files.
|
||||
///
|
||||
/// `.git/info/exclude` files have match semantics as described in the
|
||||
/// `gitignore` man page.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn git_exclude(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||
self.ig_builder.git_exclude(yes);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Walk is a recursive directory iterator over file paths in a directory.
|
||||
///
|
||||
/// Only file and directory paths matching the rules are returned. By default,
|
||||
/// ignore files like `.gitignore` are respected. The precise matching rules
|
||||
/// and precedence is explained in the documentation for `WalkBuilder`.
|
||||
pub struct Walk {
|
||||
its: vec::IntoIter<(PathBuf, Option<WalkEventIter>)>,
|
||||
it: Option<WalkEventIter>,
|
||||
ig_root: Ignore,
|
||||
ig: Ignore,
|
||||
parents: bool,
|
||||
}
|
||||
|
||||
impl Walk {
|
||||
/// Creates a new recursive directory iterator for the file path given.
|
||||
///
|
||||
/// Note that this uses default settings, which include respecting
|
||||
/// `.gitignore` files. To configure the iterator, use `WalkBuilder`
|
||||
/// instead.
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> Walk {
|
||||
WalkBuilder::new(path).build()
|
||||
}
|
||||
|
||||
fn skip_entry(&self, ent: &walkdir::DirEntry) -> bool {
|
||||
if ent.depth() == 0 {
|
||||
// Never skip the root directory.
|
||||
return false;
|
||||
}
|
||||
let m = self.ig.matched(ent.path(), ent.file_type().is_dir());
|
||||
if m.is_ignore() {
|
||||
debug!("ignoring {}: {:?}", ent.path().display(), m);
|
||||
return true;
|
||||
} else if m.is_whitelist() {
|
||||
debug!("whitelisting {}: {:?}", ent.path().display(), m);
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Walk {
|
||||
type Item = Result<DirEntry, Error>;
|
||||
|
||||
#[inline(always)]
|
||||
fn next(&mut self) -> Option<Result<DirEntry, Error>> {
|
||||
loop {
|
||||
let ev = match self.it.as_mut().and_then(|it| it.next()) {
|
||||
Some(ev) => ev,
|
||||
None => {
|
||||
match self.its.next() {
|
||||
None => return None,
|
||||
Some((_, None)) => {
|
||||
return Some(Ok(DirEntry {
|
||||
dent: None,
|
||||
err: None,
|
||||
}));
|
||||
}
|
||||
Some((path, Some(it))) => {
|
||||
self.it = Some(it);
|
||||
if self.parents && path.is_dir() {
|
||||
let (ig, err) = self.ig_root.add_parents(path);
|
||||
self.ig = ig;
|
||||
if let Some(err) = err {
|
||||
return Some(Err(err));
|
||||
}
|
||||
} else {
|
||||
self.ig = self.ig_root.clone();
|
||||
}
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
};
|
||||
match ev {
|
||||
Err(err) => {
|
||||
let path = err.path().map(|p| p.to_path_buf());
|
||||
let mut ig_err = Error::Io(io::Error::from(err));
|
||||
if let Some(path) = path {
|
||||
ig_err = Error::WithPath {
|
||||
path: path.to_path_buf(),
|
||||
err: Box::new(ig_err),
|
||||
};
|
||||
}
|
||||
return Some(Err(ig_err));
|
||||
}
|
||||
Ok(WalkEvent::Exit) => {
|
||||
self.ig = self.ig.parent().unwrap();
|
||||
}
|
||||
Ok(WalkEvent::Dir(ent)) => {
|
||||
if self.skip_entry(&ent) {
|
||||
self.it.as_mut().unwrap().it.skip_current_dir();
|
||||
// Still need to push this on the stack because
|
||||
// we'll get a WalkEvent::Exit event for this dir.
|
||||
// We don't care if it errors though.
|
||||
let (igtmp, _) = self.ig.add_child(ent.path());
|
||||
self.ig = igtmp;
|
||||
continue;
|
||||
}
|
||||
let (igtmp, err) = self.ig.add_child(ent.path());
|
||||
self.ig = igtmp;
|
||||
return Some(Ok(DirEntry { dent: Some(ent), err: err }));
|
||||
}
|
||||
Ok(WalkEvent::File(ent)) => {
|
||||
if self.skip_entry(&ent) {
|
||||
continue;
|
||||
}
|
||||
// If this isn't actually a file (e.g., a symlink),
|
||||
// then skip it.
|
||||
if !ent.file_type().is_file() {
|
||||
continue;
|
||||
}
|
||||
return Some(Ok(DirEntry { dent: Some(ent), err: None }));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A directory entry with a possible error attached.
|
||||
///
|
||||
/// The error typically refers to a problem parsing ignore files in a
|
||||
/// particular directory.
|
||||
#[derive(Debug)]
|
||||
pub struct DirEntry {
|
||||
dent: Option<walkdir::DirEntry>,
|
||||
err: Option<Error>,
|
||||
}
|
||||
|
||||
impl DirEntry {
|
||||
/// The full path that this entry represents.
|
||||
pub fn path(&self) -> &Path {
|
||||
self.dent.as_ref().map_or(Path::new("<stdin>"), |x| x.path())
|
||||
}
|
||||
|
||||
/// Whether this entry corresponds to a symbolic link or not.
|
||||
pub fn path_is_symbolic_link(&self) -> bool {
|
||||
self.dent.as_ref().map_or(false, |x| x.path_is_symbolic_link())
|
||||
}
|
||||
|
||||
/// Returns true if and only if this entry corresponds to stdin.
|
||||
///
|
||||
/// i.e., The entry has depth 0 and its file name is `-`.
|
||||
pub fn is_stdin(&self) -> bool {
|
||||
self.dent.is_none()
|
||||
}
|
||||
|
||||
/// Return the metadata for the file that this entry points to.
|
||||
pub fn metadata(&self) -> Result<Metadata, Error> {
|
||||
if let Some(dent) = self.dent.as_ref() {
|
||||
dent.metadata().map_err(|err| Error::WithPath {
|
||||
path: self.path().to_path_buf(),
|
||||
err: Box::new(Error::Io(io::Error::from(err))),
|
||||
})
|
||||
} else {
|
||||
let ioerr = io::Error::new(
|
||||
io::ErrorKind::Other, "stdin has no metadata");
|
||||
Err(Error::WithPath {
|
||||
path: Path::new("<stdin>").to_path_buf(),
|
||||
err: Box::new(Error::Io(ioerr)),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the file type for the file that this entry points to.
|
||||
///
|
||||
/// This entry doesn't have a file type if it corresponds to stdin.
|
||||
pub fn file_type(&self) -> Option<FileType> {
|
||||
self.dent.as_ref().map(|x| x.file_type())
|
||||
}
|
||||
|
||||
/// Return the file name of this entry.
|
||||
///
|
||||
/// If this entry has no file name (e.g., `/`), then the full path is
|
||||
/// returned.
|
||||
pub fn file_name(&self) -> &OsStr {
|
||||
self.dent.as_ref().map_or(OsStr::new("<stdin>"), |x| x.file_name())
|
||||
}
|
||||
|
||||
/// Returns the depth at which this entry was created relative to the root.
|
||||
pub fn depth(&self) -> usize {
|
||||
self.dent.as_ref().map_or(0, |x| x.depth())
|
||||
}
|
||||
|
||||
/// Returns an error, if one exists, associated with processing this entry.
|
||||
///
|
||||
/// An example of an error is one that occurred while parsing an ignore
|
||||
/// file.
|
||||
pub fn error(&self) -> Option<&Error> {
|
||||
self.err.as_ref()
|
||||
}
|
||||
}
|
||||
|
||||
/// WalkEventIter transforms a WalkDir iterator into an iterator that more
|
||||
/// accurately describes the directory tree. Namely, it emits events that are
|
||||
/// one of three types: directory, file or "exit." An "exit" event means that
|
||||
/// the entire contents of a directory have been enumerated.
|
||||
struct WalkEventIter {
|
||||
depth: usize,
|
||||
it: walkdir::Iter,
|
||||
next: Option<Result<walkdir::DirEntry, walkdir::Error>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum WalkEvent {
|
||||
Dir(walkdir::DirEntry),
|
||||
File(walkdir::DirEntry),
|
||||
Exit,
|
||||
}
|
||||
|
||||
impl From<WalkDir> for WalkEventIter {
|
||||
fn from(it: WalkDir) -> WalkEventIter {
|
||||
WalkEventIter { depth: 0, it: it.into_iter(), next: None }
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for WalkEventIter {
|
||||
type Item = walkdir::Result<WalkEvent>;
|
||||
|
||||
#[inline(always)]
|
||||
fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
|
||||
let dent = self.next.take().or_else(|| self.it.next());
|
||||
let depth = match dent {
|
||||
None => 0,
|
||||
Some(Ok(ref dent)) => dent.depth(),
|
||||
Some(Err(ref err)) => err.depth(),
|
||||
};
|
||||
if depth < self.depth {
|
||||
self.depth -= 1;
|
||||
self.next = dent;
|
||||
return Some(Ok(WalkEvent::Exit));
|
||||
}
|
||||
self.depth = depth;
|
||||
match dent {
|
||||
None => None,
|
||||
Some(Err(err)) => Some(Err(err)),
|
||||
Some(Ok(dent)) => {
|
||||
if dent.file_type().is_dir() {
|
||||
self.depth += 1;
|
||||
Some(Ok(WalkEvent::Dir(dent)))
|
||||
} else {
|
||||
Some(Ok(WalkEvent::File(dent)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::fs::{self, File};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use tempdir::TempDir;
|
||||
|
||||
use super::{Walk, WalkBuilder};
|
||||
|
||||
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
|
||||
let mut file = File::create(path).unwrap();
|
||||
file.write_all(contents.as_bytes()).unwrap();
|
||||
}
|
||||
|
||||
fn mkdirp<P: AsRef<Path>>(path: P) {
|
||||
fs::create_dir_all(path).unwrap();
|
||||
}
|
||||
|
||||
fn normal_path(unix: &str) -> String {
|
||||
if cfg!(windows) {
|
||||
unix.replace("\\", "/")
|
||||
} else {
|
||||
unix.to_string()
|
||||
}
|
||||
}
|
||||
|
||||
fn walk_collect(prefix: &Path, walk: Walk) -> Vec<String> {
|
||||
let mut paths = vec![];
|
||||
for dent in walk {
|
||||
let dent = dent.unwrap();
|
||||
let path = dent.path().strip_prefix(prefix).unwrap();
|
||||
if path.as_os_str().is_empty() {
|
||||
continue;
|
||||
}
|
||||
paths.push(normal_path(path.to_str().unwrap()));
|
||||
}
|
||||
paths.sort();
|
||||
paths
|
||||
}
|
||||
|
||||
fn mkpaths(paths: &[&str]) -> Vec<String> {
|
||||
let mut paths: Vec<_> = paths.iter().map(|s| s.to_string()).collect();
|
||||
paths.sort();
|
||||
paths
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_ignores() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
mkdirp(td.path().join("a/b/c"));
|
||||
mkdirp(td.path().join("x/y"));
|
||||
wfile(td.path().join("a/b/foo"), "");
|
||||
wfile(td.path().join("x/y/foo"), "");
|
||||
|
||||
let got = walk_collect(td.path(), Walk::new(td.path()));
|
||||
assert_eq!(got, mkpaths(&[
|
||||
"x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c",
|
||||
]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gitignore() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
mkdirp(td.path().join("a"));
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
wfile(td.path().join("foo"), "");
|
||||
wfile(td.path().join("a/foo"), "");
|
||||
wfile(td.path().join("bar"), "");
|
||||
wfile(td.path().join("a/bar"), "");
|
||||
|
||||
let got = walk_collect(td.path(), Walk::new(td.path()));
|
||||
assert_eq!(got, mkpaths(&["bar", "a", "a/bar"]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_ignore() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
let igpath = td.path().join(".not-an-ignore");
|
||||
mkdirp(td.path().join("a"));
|
||||
wfile(&igpath, "foo");
|
||||
wfile(td.path().join("foo"), "");
|
||||
wfile(td.path().join("a/foo"), "");
|
||||
wfile(td.path().join("bar"), "");
|
||||
wfile(td.path().join("a/bar"), "");
|
||||
|
||||
let mut builder = WalkBuilder::new(td.path());
|
||||
assert!(builder.add_ignore(&igpath).is_none());
|
||||
let got = walk_collect(td.path(), builder.build());
|
||||
assert_eq!(got, mkpaths(&["bar", "a", "a/bar"]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gitignore_parent() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
mkdirp(td.path().join("a"));
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
wfile(td.path().join("a/foo"), "");
|
||||
wfile(td.path().join("a/bar"), "");
|
||||
|
||||
let root = td.path().join("a");
|
||||
let got = walk_collect(&root, Walk::new(&root));
|
||||
assert_eq!(got, mkpaths(&["bar"]));
|
||||
}
|
||||
}
|
105
src/args.rs
105
src/args.rs
@ -14,19 +14,17 @@ use term::Terminal;
|
||||
use term;
|
||||
#[cfg(windows)]
|
||||
use term::WinConsole;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use atty;
|
||||
use gitignore::{Gitignore, GitignoreBuilder};
|
||||
use ignore::Ignore;
|
||||
use ignore::overrides::{Override, OverrideBuilder};
|
||||
use ignore::types::{FileTypeDef, Types, TypesBuilder};
|
||||
use ignore;
|
||||
use out::{Out, ColoredTerminal};
|
||||
use printer::Printer;
|
||||
use search_buffer::BufferSearcher;
|
||||
use search_stream::{InputBuffer, Searcher};
|
||||
#[cfg(windows)]
|
||||
use terminal_win::WindowsBuffer;
|
||||
use types::{FileTypeDef, Types, TypesBuilder};
|
||||
use walk;
|
||||
|
||||
use Result;
|
||||
|
||||
@ -131,6 +129,13 @@ Less common options:
|
||||
Search hidden directories and files. (Hidden directories and files are
|
||||
skipped by default.)
|
||||
|
||||
--ignore-file FILE ...
|
||||
Specify additional ignore files for filtering file paths. Ignore files
|
||||
should be in the gitignore format and are matched relative to the
|
||||
current working directory. These ignore files have lower precedence
|
||||
than all other ignore file types. When specifying multiple ignore
|
||||
files, earlier files have lower precedence than later files.
|
||||
|
||||
-L, --follow
|
||||
Follow symlinks.
|
||||
|
||||
@ -234,6 +239,7 @@ pub struct RawArgs {
|
||||
flag_heading: bool,
|
||||
flag_hidden: bool,
|
||||
flag_ignore_case: bool,
|
||||
flag_ignore_file: Vec<String>,
|
||||
flag_invert_match: bool,
|
||||
flag_line_number: bool,
|
||||
flag_fixed_strings: bool,
|
||||
@ -279,11 +285,12 @@ pub struct Args {
|
||||
eol: u8,
|
||||
files: bool,
|
||||
follow: bool,
|
||||
glob_overrides: Option<Gitignore>,
|
||||
glob_overrides: Override,
|
||||
grep: Grep,
|
||||
heading: bool,
|
||||
hidden: bool,
|
||||
ignore_case: bool,
|
||||
ignore_files: Vec<PathBuf>,
|
||||
invert_match: bool,
|
||||
line_number: bool,
|
||||
line_per_match: bool,
|
||||
@ -347,14 +354,13 @@ impl RawArgs {
|
||||
}
|
||||
let glob_overrides =
|
||||
if self.flag_glob.is_empty() {
|
||||
None
|
||||
Override::empty()
|
||||
} else {
|
||||
let cwd = try!(env::current_dir());
|
||||
let mut bgi = GitignoreBuilder::new(cwd);
|
||||
let mut ovr = OverrideBuilder::new(try!(env::current_dir()));
|
||||
for pat in &self.flag_glob {
|
||||
try!(bgi.add("<argv>", pat));
|
||||
try!(ovr.add(pat));
|
||||
}
|
||||
Some(try!(bgi.build()))
|
||||
try!(ovr.build())
|
||||
};
|
||||
let threads =
|
||||
if self.flag_threads == 0 {
|
||||
@ -382,6 +388,9 @@ impl RawArgs {
|
||||
let no_ignore = self.flag_no_ignore || self.flag_unrestricted >= 1;
|
||||
let hidden = self.flag_hidden || self.flag_unrestricted >= 2;
|
||||
let text = self.flag_text || self.flag_unrestricted >= 3;
|
||||
let ignore_files: Vec<_> = self.flag_ignore_file.iter().map(|p| {
|
||||
Path::new(p).to_path_buf()
|
||||
}).collect();
|
||||
let mut args = Args {
|
||||
paths: paths,
|
||||
after_context: after_context,
|
||||
@ -399,6 +408,7 @@ impl RawArgs {
|
||||
heading: !self.flag_no_heading && self.flag_heading,
|
||||
hidden: hidden,
|
||||
ignore_case: self.flag_ignore_case,
|
||||
ignore_files: ignore_files,
|
||||
invert_match: self.flag_invert_match,
|
||||
line_number: !self.flag_no_line_number && self.flag_line_number,
|
||||
line_per_match: self.flag_vimgrep,
|
||||
@ -711,31 +721,30 @@ impl Args {
|
||||
self.type_list
|
||||
}
|
||||
|
||||
/// Create a new recursive directory iterator at the path given.
|
||||
pub fn walker(&self, path: &Path) -> Result<walk::Iter> {
|
||||
// Always follow symlinks for explicitly specified files.
|
||||
let mut wd = WalkDir::new(path).follow_links(
|
||||
self.follow || path.is_file());
|
||||
if let Some(maxdepth) = self.maxdepth {
|
||||
wd = wd.max_depth(maxdepth);
|
||||
/// Create a new recursive directory iterator over the paths in argv.
|
||||
pub fn walker(&self) -> Walk {
|
||||
let paths = self.paths();
|
||||
let mut wd = ignore::WalkBuilder::new(&paths[0]);
|
||||
for path in &paths[1..] {
|
||||
wd.add(path);
|
||||
}
|
||||
let mut ig = Ignore::new();
|
||||
// Only register ignore rules if this is a directory. If it's a file,
|
||||
// then it was explicitly given by the end user, so we always search
|
||||
// it.
|
||||
if path.is_dir() {
|
||||
ig.ignore_hidden(!self.hidden);
|
||||
ig.no_ignore(self.no_ignore);
|
||||
ig.no_ignore_vcs(self.no_ignore_vcs);
|
||||
ig.add_types(self.types.clone());
|
||||
if !self.no_ignore_parent {
|
||||
try!(ig.push_parents(path));
|
||||
}
|
||||
if let Some(ref overrides) = self.glob_overrides {
|
||||
ig.add_override(overrides.clone());
|
||||
for path in &self.ignore_files {
|
||||
if let Some(err) = wd.add_ignore(path) {
|
||||
eprintln!("{}", err);
|
||||
}
|
||||
}
|
||||
Ok(walk::Iter::new(ig, wd))
|
||||
|
||||
wd.follow_links(self.follow);
|
||||
wd.hidden(!self.hidden);
|
||||
wd.max_depth(self.maxdepth);
|
||||
wd.overrides(self.glob_overrides.clone());
|
||||
wd.types(self.types.clone());
|
||||
wd.git_global(!self.no_ignore && !self.no_ignore_vcs);
|
||||
wd.git_ignore(!self.no_ignore && !self.no_ignore_vcs);
|
||||
wd.git_exclude(!self.no_ignore && !self.no_ignore_vcs);
|
||||
wd.ignore(!self.no_ignore);
|
||||
wd.parents(!self.no_ignore_parent);
|
||||
Walk(wd.build())
|
||||
}
|
||||
}
|
||||
|
||||
@ -752,6 +761,34 @@ fn version() -> String {
|
||||
}
|
||||
}
|
||||
|
||||
/// A simple wrapper around the ignore::Walk iterator. This will
|
||||
/// automatically emit error messages to stderr and will skip directories.
|
||||
pub struct Walk(ignore::Walk);
|
||||
|
||||
impl Iterator for Walk {
|
||||
type Item = ignore::DirEntry;
|
||||
|
||||
fn next(&mut self) -> Option<ignore::DirEntry> {
|
||||
while let Some(result) = self.0.next() {
|
||||
match result {
|
||||
Ok(dent) => {
|
||||
if let Some(err) = dent.error() {
|
||||
eprintln!("{}", err);
|
||||
}
|
||||
if dent.file_type().map_or(false, |x| x.is_dir()) {
|
||||
continue;
|
||||
}
|
||||
return Some(dent);
|
||||
}
|
||||
Err(err) => {
|
||||
eprintln!("{}", err);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// A single state in the state machine used by `unescape`.
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
enum State {
|
||||
@ -761,7 +798,7 @@ enum State {
|
||||
Literal,
|
||||
}
|
||||
|
||||
/// Unescapes a string given on the command line. It supports a limit set of
|
||||
/// Unescapes a string given on the command line. It supports a limited set of
|
||||
/// escape sequences:
|
||||
///
|
||||
/// * \t, \r and \n are mapped to their corresponding ASCII bytes.
|
||||
|
455
src/gitignore.rs
455
src/gitignore.rs
@ -1,455 +0,0 @@
|
||||
/*!
|
||||
The gitignore module provides a way of reading a gitignore file and applying
|
||||
it to a particular file name to determine whether it should be ignore or not.
|
||||
The motivation for this submodule is performance and portability:
|
||||
|
||||
1. There is a gitignore crate on crates.io, but it uses the standard `glob`
|
||||
crate and checks patterns one-by-one. This is a reasonable implementation,
|
||||
but not suitable for the performance we need here.
|
||||
2. We could shell out to a `git` sub-command like ls-files or status, but it
|
||||
seems better to not rely on the existence of external programs for a search
|
||||
tool. Besides, we need to implement this logic anyway to support things like
|
||||
an .ignore file.
|
||||
|
||||
The key implementation detail here is that a single gitignore file is compiled
|
||||
into a single RegexSet, which can be used to report which globs match a
|
||||
particular file name. We can then do a quick post-processing step to implement
|
||||
additional rules such as whitelists (prefix of `!`) or directory-only globs
|
||||
(suffix of `/`).
|
||||
*/
|
||||
|
||||
// TODO(burntsushi): Implement something similar, but for Mercurial. We can't
|
||||
// use this exact implementation because hgignore files are different.
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::error::Error as StdError;
|
||||
use std::fmt;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufRead};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::Arc;
|
||||
|
||||
use globset::{self, Candidate, GlobBuilder, GlobSet, GlobSetBuilder};
|
||||
use regex;
|
||||
use thread_local::ThreadLocal;
|
||||
|
||||
use pathutil::{is_file_name, strip_prefix};
|
||||
|
||||
/// Represents an error that can occur when parsing a gitignore file.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
Glob(globset::Error),
|
||||
Regex(regex::Error),
|
||||
Io(io::Error),
|
||||
}
|
||||
|
||||
impl StdError for Error {
|
||||
fn description(&self) -> &str {
|
||||
match *self {
|
||||
Error::Glob(ref err) => err.description(),
|
||||
Error::Regex(ref err) => err.description(),
|
||||
Error::Io(ref err) => err.description(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Error::Glob(ref err) => err.fmt(f),
|
||||
Error::Regex(ref err) => err.fmt(f),
|
||||
Error::Io(ref err) => err.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<globset::Error> for Error {
|
||||
fn from(err: globset::Error) -> Error {
|
||||
Error::Glob(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<regex::Error> for Error {
|
||||
fn from(err: regex::Error) -> Error {
|
||||
Error::Regex(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
fn from(err: io::Error) -> Error {
|
||||
Error::Io(err)
|
||||
}
|
||||
}
|
||||
|
||||
/// Gitignore is a matcher for the glob patterns in a single gitignore file.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Gitignore {
|
||||
set: GlobSet,
|
||||
root: PathBuf,
|
||||
patterns: Vec<Pattern>,
|
||||
num_ignores: u64,
|
||||
num_whitelist: u64,
|
||||
matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
|
||||
}
|
||||
|
||||
impl Gitignore {
|
||||
/// Create a new gitignore glob matcher from the given root directory and
|
||||
/// string containing the contents of a gitignore file.
|
||||
#[allow(dead_code)]
|
||||
fn from_str<P: AsRef<Path>>(
|
||||
root: P,
|
||||
gitignore: &str,
|
||||
) -> Result<Gitignore, Error> {
|
||||
let mut builder = GitignoreBuilder::new(root);
|
||||
try!(builder.add_str(gitignore));
|
||||
builder.build()
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given file path should be ignored
|
||||
/// according to the globs in this gitignore. `is_dir` should be true if
|
||||
/// the path refers to a directory and false otherwise.
|
||||
///
|
||||
/// Before matching path, its prefix (as determined by a common suffix
|
||||
/// of the directory containing this gitignore) is stripped. If there is
|
||||
/// no common suffix/prefix overlap, then path is assumed to reside in the
|
||||
/// same directory as this gitignore file.
|
||||
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
|
||||
let mut path = path.as_ref();
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
// Strip any common prefix between the candidate path and the root
|
||||
// of the gitignore, to make sure we get relative matching right.
|
||||
// BUT, a file name might not have any directory components to it,
|
||||
// in which case, we don't want to accidentally strip any part of the
|
||||
// file name.
|
||||
if !is_file_name(path) {
|
||||
if let Some(p) = strip_prefix(&self.root, path) {
|
||||
path = p;
|
||||
}
|
||||
}
|
||||
if let Some(p) = strip_prefix("/", path) {
|
||||
path = p;
|
||||
}
|
||||
self.matched_stripped(path, is_dir)
|
||||
}
|
||||
|
||||
/// Like matched, but takes a path that has already been stripped.
|
||||
pub fn matched_stripped(&self, path: &Path, is_dir: bool) -> Match {
|
||||
let _matches = self.matches.get_default();
|
||||
let mut matches = _matches.borrow_mut();
|
||||
let candidate = Candidate::new(path);
|
||||
self.set.matches_candidate_into(&candidate, &mut *matches);
|
||||
for &i in matches.iter().rev() {
|
||||
let pat = &self.patterns[i];
|
||||
if !pat.only_dir || is_dir {
|
||||
return if pat.whitelist {
|
||||
Match::Whitelist(pat)
|
||||
} else {
|
||||
Match::Ignored(pat)
|
||||
};
|
||||
}
|
||||
}
|
||||
Match::None
|
||||
}
|
||||
|
||||
/// Returns the total number of ignore patterns.
|
||||
pub fn num_ignores(&self) -> u64 {
|
||||
self.num_ignores
|
||||
}
|
||||
}
|
||||
|
||||
/// The result of a glob match.
|
||||
///
|
||||
/// The lifetime `'a` refers to the lifetime of the pattern that resulted in
|
||||
/// a match (whether ignored or whitelisted).
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Match<'a> {
|
||||
/// The path didn't match any glob in the gitignore file.
|
||||
None,
|
||||
/// The last glob matched indicates the path should be ignored.
|
||||
Ignored(&'a Pattern),
|
||||
/// The last glob matched indicates the path should be whitelisted.
|
||||
Whitelist(&'a Pattern),
|
||||
}
|
||||
|
||||
impl<'a> Match<'a> {
|
||||
/// Returns true if the match result implies the path should be ignored.
|
||||
#[allow(dead_code)]
|
||||
pub fn is_ignored(&self) -> bool {
|
||||
match *self {
|
||||
Match::Ignored(_) => true,
|
||||
Match::None | Match::Whitelist(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the match result didn't match any globs.
|
||||
pub fn is_none(&self) -> bool {
|
||||
match *self {
|
||||
Match::None => true,
|
||||
Match::Ignored(_) | Match::Whitelist(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Inverts the match so that Ignored becomes Whitelisted and Whitelisted
|
||||
/// becomes Ignored. A non-match remains the same.
|
||||
pub fn invert(self) -> Match<'a> {
|
||||
match self {
|
||||
Match::None => Match::None,
|
||||
Match::Ignored(pat) => Match::Whitelist(pat),
|
||||
Match::Whitelist(pat) => Match::Ignored(pat),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// GitignoreBuilder constructs a matcher for a single set of globs from a
|
||||
/// .gitignore file.
|
||||
pub struct GitignoreBuilder {
|
||||
builder: GlobSetBuilder,
|
||||
root: PathBuf,
|
||||
patterns: Vec<Pattern>,
|
||||
}
|
||||
|
||||
/// Pattern represents a single pattern in a gitignore file. It doesn't
|
||||
/// know how to do glob matching directly, but it does store additional
|
||||
/// options on a pattern, such as whether it's whitelisted.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Pattern {
|
||||
/// The file path that this pattern was extracted from (may be empty).
|
||||
pub from: PathBuf,
|
||||
/// The original glob pattern string.
|
||||
pub original: String,
|
||||
/// The actual glob pattern string used to convert to a regex.
|
||||
pub pat: String,
|
||||
/// Whether this is a whitelisted pattern or not.
|
||||
pub whitelist: bool,
|
||||
/// Whether this pattern should only match directories or not.
|
||||
pub only_dir: bool,
|
||||
}
|
||||
|
||||
impl GitignoreBuilder {
|
||||
/// Create a new builder for a gitignore file.
|
||||
///
|
||||
/// The path given should be the path at which the globs for this gitignore
|
||||
/// file should be matched.
|
||||
pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
|
||||
let root = strip_prefix("./", root.as_ref()).unwrap_or(root.as_ref());
|
||||
GitignoreBuilder {
|
||||
builder: GlobSetBuilder::new(),
|
||||
root: root.to_path_buf(),
|
||||
patterns: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a new matcher from the glob patterns added so far.
|
||||
///
|
||||
/// Once a matcher is built, no new glob patterns can be added to it.
|
||||
pub fn build(self) -> Result<Gitignore, Error> {
|
||||
let nignores = self.patterns.iter().filter(|p| !p.whitelist).count();
|
||||
let nwhitelist = self.patterns.iter().filter(|p| p.whitelist).count();
|
||||
Ok(Gitignore {
|
||||
set: try!(self.builder.build()),
|
||||
root: self.root,
|
||||
patterns: self.patterns,
|
||||
num_ignores: nignores as u64,
|
||||
num_whitelist: nwhitelist as u64,
|
||||
matches: Arc::new(ThreadLocal::default()),
|
||||
})
|
||||
}
|
||||
|
||||
/// Add each pattern line from the file path given.
|
||||
pub fn add_path<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
|
||||
let rdr = io::BufReader::new(try!(File::open(&path)));
|
||||
debug!("gitignore: {}", path.as_ref().display());
|
||||
for (i, line) in rdr.lines().enumerate() {
|
||||
let line = match line {
|
||||
Ok(line) => line,
|
||||
Err(err) => {
|
||||
debug!("error reading line {} in {}: {}",
|
||||
i, path.as_ref().display(), err);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Err(err) = self.add(&path, &line) {
|
||||
debug!("error adding gitignore pattern: '{}': {}", line, err);
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add each pattern line from the string given.
|
||||
pub fn add_str(&mut self, gitignore: &str) -> Result<(), Error> {
|
||||
for line in gitignore.lines() {
|
||||
try!(self.add("", line));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a line from a gitignore file to this builder.
|
||||
///
|
||||
/// If the line could not be parsed as a glob, then an error is returned.
|
||||
pub fn add<P: AsRef<Path>>(
|
||||
&mut self,
|
||||
from: P,
|
||||
mut line: &str,
|
||||
) -> Result<(), Error> {
|
||||
if line.starts_with("#") {
|
||||
return Ok(());
|
||||
}
|
||||
if !line.ends_with("\\ ") {
|
||||
line = line.trim_right();
|
||||
}
|
||||
if line.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
let mut pat = Pattern {
|
||||
from: from.as_ref().to_path_buf(),
|
||||
original: line.to_string(),
|
||||
pat: String::new(),
|
||||
whitelist: false,
|
||||
only_dir: false,
|
||||
};
|
||||
let mut literal_separator = false;
|
||||
let has_slash = line.chars().any(|c| c == '/');
|
||||
let is_absolute = line.chars().nth(0).unwrap() == '/';
|
||||
if line.starts_with("\\!") || line.starts_with("\\#") {
|
||||
line = &line[1..];
|
||||
} else {
|
||||
if line.starts_with("!") {
|
||||
pat.whitelist = true;
|
||||
line = &line[1..];
|
||||
}
|
||||
if line.starts_with("/") {
|
||||
// `man gitignore` says that if a glob starts with a slash,
|
||||
// then the glob can only match the beginning of a path
|
||||
// (relative to the location of gitignore). We achieve this by
|
||||
// simply banning wildcards from matching /.
|
||||
literal_separator = true;
|
||||
line = &line[1..];
|
||||
}
|
||||
}
|
||||
// If it ends with a slash, then this should only match directories,
|
||||
// but the slash should otherwise not be used while globbing.
|
||||
if let Some((i, c)) = line.char_indices().rev().nth(0) {
|
||||
if c == '/' {
|
||||
pat.only_dir = true;
|
||||
line = &line[..i];
|
||||
}
|
||||
}
|
||||
// If there is a literal slash, then we note that so that globbing
|
||||
// doesn't let wildcards match slashes.
|
||||
pat.pat = line.to_string();
|
||||
if has_slash {
|
||||
literal_separator = true;
|
||||
}
|
||||
// If there was a leading slash, then this is a pattern that must
|
||||
// match the entire path name. Otherwise, we should let it match
|
||||
// anywhere, so use a **/ prefix.
|
||||
if !is_absolute {
|
||||
// ... but only if we don't already have a **/ prefix.
|
||||
if !pat.pat.starts_with("**/") {
|
||||
pat.pat = format!("**/{}", pat.pat);
|
||||
}
|
||||
}
|
||||
// If the pattern ends with `/**`, then we should only match everything
|
||||
// inside a directory, but not the directory itself. Standard globs
|
||||
// will match the directory. So we add `/*` to force the issue.
|
||||
if pat.pat.ends_with("/**") {
|
||||
pat.pat = format!("{}/*", pat.pat);
|
||||
}
|
||||
let parsed = try!(
|
||||
GlobBuilder::new(&pat.pat)
|
||||
.literal_separator(literal_separator)
|
||||
.build());
|
||||
self.builder.add(parsed);
|
||||
self.patterns.push(pat);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Gitignore;
|
||||
|
||||
macro_rules! ignored {
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr) => {
|
||||
ignored!($name, $root, $gi, $path, false);
|
||||
};
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let gi = Gitignore::from_str($root, $gi).unwrap();
|
||||
assert!(gi.matched($path, $is_dir).is_ignored());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! not_ignored {
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr) => {
|
||||
not_ignored!($name, $root, $gi, $path, false);
|
||||
};
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let gi = Gitignore::from_str($root, $gi).unwrap();
|
||||
assert!(!gi.matched($path, $is_dir).is_ignored());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const ROOT: &'static str = "/home/foobar/rust/rg";
|
||||
|
||||
ignored!(ig1, ROOT, "months", "months");
|
||||
ignored!(ig2, ROOT, "*.lock", "Cargo.lock");
|
||||
ignored!(ig3, ROOT, "*.rs", "src/main.rs");
|
||||
ignored!(ig4, ROOT, "src/*.rs", "src/main.rs");
|
||||
ignored!(ig5, ROOT, "/*.c", "cat-file.c");
|
||||
ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs");
|
||||
ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs");
|
||||
ignored!(ig8, ROOT, "foo/", "foo", true);
|
||||
ignored!(ig9, ROOT, "**/foo", "foo");
|
||||
ignored!(ig10, ROOT, "**/foo", "src/foo");
|
||||
ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar");
|
||||
ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz");
|
||||
ignored!(ig13, ROOT, "**/foo/bar", "foo/bar");
|
||||
ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar");
|
||||
ignored!(ig15, ROOT, "abc/**", "abc/x");
|
||||
ignored!(ig16, ROOT, "abc/**", "abc/x/y");
|
||||
ignored!(ig17, ROOT, "abc/**", "abc/x/y/z");
|
||||
ignored!(ig18, ROOT, "a/**/b", "a/b");
|
||||
ignored!(ig19, ROOT, "a/**/b", "a/x/b");
|
||||
ignored!(ig20, ROOT, "a/**/b", "a/x/y/b");
|
||||
ignored!(ig21, ROOT, r"\!xy", "!xy");
|
||||
ignored!(ig22, ROOT, r"\#foo", "#foo");
|
||||
ignored!(ig23, ROOT, "foo", "./foo");
|
||||
ignored!(ig24, ROOT, "target", "grep/target");
|
||||
ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
|
||||
ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz");
|
||||
ignored!(ig27, ROOT, "foo/", "xyz/foo", true);
|
||||
ignored!(ig28, ROOT, "src/*.rs", "src/grep/src/main.rs");
|
||||
ignored!(ig29, "./src", "/llvm/", "./src/llvm", true);
|
||||
ignored!(ig30, ROOT, "node_modules/ ", "node_modules", true);
|
||||
|
||||
not_ignored!(ignot1, ROOT, "amonths", "months");
|
||||
not_ignored!(ignot2, ROOT, "monthsa", "months");
|
||||
not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs");
|
||||
not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c");
|
||||
not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs");
|
||||
not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs");
|
||||
not_ignored!(ignot7, ROOT, "foo/", "foo", false);
|
||||
not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz");
|
||||
not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz");
|
||||
not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar");
|
||||
not_ignored!(ignot11, ROOT, "#foo", "#foo");
|
||||
not_ignored!(ignot12, ROOT, "\n\n\n", "foo");
|
||||
not_ignored!(ignot13, ROOT, "foo/**", "foo", true);
|
||||
not_ignored!(
|
||||
ignot14, "./third_party/protobuf", "m4/ltoptions.m4",
|
||||
"./third_party/protobuf/csharp/src/packages/repositories.config");
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/106
|
||||
#[test]
|
||||
fn regression_106() {
|
||||
Gitignore::from_str("/", " ").unwrap();
|
||||
}
|
||||
}
|
493
src/ignore.rs
493
src/ignore.rs
@ -1,493 +0,0 @@
|
||||
/*!
|
||||
The ignore module is responsible for managing the state required to determine
|
||||
whether a *single* file path should be searched or not.
|
||||
|
||||
In general, there are two ways to ignore a particular file:
|
||||
|
||||
1. Specify an ignore rule in some "global" configuration, such as a
|
||||
$HOME/.ignore or on the command line.
|
||||
2. A specific ignore file (like .gitignore) found during directory traversal.
|
||||
|
||||
The `IgnoreDir` type handles ignore patterns for any one particular directory
|
||||
(including "global" ignore patterns), while the `Ignore` type handles a stack
|
||||
of `IgnoreDir`s for use during directory traversal.
|
||||
*/
|
||||
|
||||
use std::error::Error as StdError;
|
||||
use std::ffi::OsString;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern};
|
||||
use pathutil::{file_name, is_hidden, strip_prefix};
|
||||
use types::Types;
|
||||
|
||||
const IGNORE_NAMES: &'static [&'static str] = &[
|
||||
".gitignore",
|
||||
".ignore",
|
||||
".rgignore",
|
||||
];
|
||||
|
||||
/// Represents an error that can occur when parsing a gitignore file.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
Gitignore(gitignore::Error),
|
||||
Io {
|
||||
path: PathBuf,
|
||||
err: io::Error,
|
||||
},
|
||||
}
|
||||
|
||||
impl Error {
|
||||
fn from_io<P: AsRef<Path>>(path: P, err: io::Error) -> Error {
|
||||
Error::Io { path: path.as_ref().to_path_buf(), err: err }
|
||||
}
|
||||
}
|
||||
|
||||
impl StdError for Error {
|
||||
fn description(&self) -> &str {
|
||||
match *self {
|
||||
Error::Gitignore(ref err) => err.description(),
|
||||
Error::Io { ref err, .. } => err.description(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Error::Gitignore(ref err) => err.fmt(f),
|
||||
Error::Io { ref path, ref err } => {
|
||||
write!(f, "{}: {}", path.display(), err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<gitignore::Error> for Error {
|
||||
fn from(err: gitignore::Error) -> Error {
|
||||
Error::Gitignore(err)
|
||||
}
|
||||
}
|
||||
|
||||
/// Ignore represents a collection of ignore patterns organized by directory.
|
||||
/// In particular, a stack is maintained, where the top of the stack
|
||||
/// corresponds to the current directory being searched and the bottom of the
|
||||
/// stack represents the root of a search. Ignore patterns at the top of the
|
||||
/// stack take precedence over ignore patterns at the bottom of the stack.
|
||||
pub struct Ignore {
|
||||
/// A stack of ignore patterns at each directory level of traversal.
|
||||
/// A directory that contributes no ignore patterns is `None`.
|
||||
stack: Vec<IgnoreDir>,
|
||||
/// A stack of parent directories above the root of the current search.
|
||||
parent_stack: Vec<IgnoreDir>,
|
||||
/// A set of override globs that are always checked first. A match (whether
|
||||
/// it's whitelist or blacklist) trumps anything in stack.
|
||||
overrides: Overrides,
|
||||
/// A file type matcher.
|
||||
types: Types,
|
||||
/// Whether to ignore hidden files or not.
|
||||
ignore_hidden: bool,
|
||||
/// When true, don't look at .gitignore or .ignore files for ignore
|
||||
/// rules.
|
||||
no_ignore: bool,
|
||||
/// When true, don't look at .gitignore files for ignore rules.
|
||||
no_ignore_vcs: bool,
|
||||
}
|
||||
|
||||
impl Ignore {
|
||||
/// Create an empty set of ignore patterns.
|
||||
pub fn new() -> Ignore {
|
||||
Ignore {
|
||||
stack: vec![],
|
||||
parent_stack: vec![],
|
||||
overrides: Overrides::new(None),
|
||||
types: Types::empty(),
|
||||
ignore_hidden: true,
|
||||
no_ignore: false,
|
||||
no_ignore_vcs: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set whether hidden files/folders should be ignored (defaults to true).
|
||||
pub fn ignore_hidden(&mut self, yes: bool) -> &mut Ignore {
|
||||
self.ignore_hidden = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// When set, ignore files are ignored.
|
||||
pub fn no_ignore(&mut self, yes: bool) -> &mut Ignore {
|
||||
self.no_ignore = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// When set, VCS ignore files are ignored.
|
||||
pub fn no_ignore_vcs(&mut self, yes: bool) -> &mut Ignore {
|
||||
self.no_ignore_vcs = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a set of globs that overrides all other match logic.
|
||||
pub fn add_override(&mut self, gi: Gitignore) -> &mut Ignore {
|
||||
self.overrides = Overrides::new(Some(gi));
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a file type matcher. The file type matcher has the lowest
|
||||
/// precedence.
|
||||
pub fn add_types(&mut self, types: Types) -> &mut Ignore {
|
||||
self.types = types;
|
||||
self
|
||||
}
|
||||
|
||||
/// Push parent directories of `path` on to the stack.
|
||||
pub fn push_parents<P: AsRef<Path>>(
|
||||
&mut self,
|
||||
path: P,
|
||||
) -> Result<(), Error> {
|
||||
let path = try!(path.as_ref().canonicalize().map_err(|err| {
|
||||
Error::from_io(path.as_ref(), err)
|
||||
}));
|
||||
let mut path = &*path;
|
||||
let mut saw_git = path.join(".git").is_dir();
|
||||
let mut ignore_names = IGNORE_NAMES.to_vec();
|
||||
if self.no_ignore_vcs {
|
||||
ignore_names.retain(|&name| name != ".gitignore");
|
||||
}
|
||||
let mut ignore_dir_results = vec![];
|
||||
while let Some(parent) = path.parent() {
|
||||
if self.no_ignore {
|
||||
ignore_dir_results.push(Ok(IgnoreDir::empty(parent)));
|
||||
} else {
|
||||
if saw_git {
|
||||
ignore_names.retain(|&name| name != ".gitignore");
|
||||
} else {
|
||||
saw_git = parent.join(".git").is_dir();
|
||||
}
|
||||
let ignore_dir_result =
|
||||
IgnoreDir::with_ignore_names(parent, ignore_names.iter());
|
||||
ignore_dir_results.push(ignore_dir_result);
|
||||
}
|
||||
path = parent;
|
||||
}
|
||||
|
||||
for ignore_dir_result in ignore_dir_results.into_iter().rev() {
|
||||
self.parent_stack.push(try!(ignore_dir_result));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a directory to the stack.
|
||||
///
|
||||
/// Note that even if this returns an error, the directory is added to the
|
||||
/// stack (and therefore should be popped).
|
||||
pub fn push<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
|
||||
if self.no_ignore {
|
||||
self.stack.push(IgnoreDir::empty(path));
|
||||
Ok(())
|
||||
} else if self.no_ignore_vcs {
|
||||
self.push_ignore_dir(IgnoreDir::without_vcs(path))
|
||||
} else {
|
||||
self.push_ignore_dir(IgnoreDir::new(path))
|
||||
}
|
||||
}
|
||||
|
||||
/// Pushes the result of building a directory matcher on to the stack.
|
||||
///
|
||||
/// If the result given contains an error, then it is returned.
|
||||
pub fn push_ignore_dir(
|
||||
&mut self,
|
||||
result: Result<IgnoreDir, Error>,
|
||||
) -> Result<(), Error> {
|
||||
match result {
|
||||
Ok(id) => {
|
||||
self.stack.push(id);
|
||||
Ok(())
|
||||
}
|
||||
Err(err) => {
|
||||
// Don't leave the stack in an inconsistent state.
|
||||
self.stack.push(IgnoreDir::empty("error"));
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Pop a directory from the stack.
|
||||
///
|
||||
/// This panics if the stack is empty.
|
||||
pub fn pop(&mut self) {
|
||||
self.stack.pop().expect("non-empty stack");
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given file path should be ignored.
|
||||
pub fn ignored<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> bool {
|
||||
let mut path = path.as_ref();
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
let mat = self.overrides.matched(path, is_dir);
|
||||
if let Some(is_ignored) = self.ignore_match(path, mat) {
|
||||
return is_ignored;
|
||||
}
|
||||
let mut whitelisted = false;
|
||||
if !self.no_ignore {
|
||||
for id in self.stack.iter().rev() {
|
||||
let mat = id.matched(path, is_dir);
|
||||
if let Some(is_ignored) = self.ignore_match(path, mat) {
|
||||
if is_ignored {
|
||||
return true;
|
||||
}
|
||||
// If this path is whitelisted by an ignore, then
|
||||
// fallthrough and let the file type matcher have a say.
|
||||
whitelisted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If the file has been whitelisted, then we have to stop checking
|
||||
// parent directories. The only thing that can override a whitelist
|
||||
// at this point is a type filter.
|
||||
if !whitelisted {
|
||||
let mut path = path.to_path_buf();
|
||||
for id in self.parent_stack.iter().rev() {
|
||||
if let Some(ref dirname) = id.name {
|
||||
path = Path::new(dirname).join(path);
|
||||
}
|
||||
let mat = id.matched(&*path, is_dir);
|
||||
if let Some(is_ignored) = self.ignore_match(&*path, mat) {
|
||||
if is_ignored {
|
||||
return true;
|
||||
}
|
||||
// If this path is whitelisted by an ignore, then
|
||||
// fallthrough and let the file type matcher have a
|
||||
// say.
|
||||
whitelisted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let mat = self.types.matched(path, is_dir);
|
||||
if let Some(is_ignored) = self.ignore_match(path, mat) {
|
||||
if is_ignored {
|
||||
return true;
|
||||
}
|
||||
whitelisted = true;
|
||||
}
|
||||
if !whitelisted && self.ignore_hidden && is_hidden(&path) {
|
||||
debug!("{} ignored because it is hidden", path.display());
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Returns true if the given match says the given pattern should be
|
||||
/// ignored or false if the given pattern should be explicitly whitelisted.
|
||||
/// Returns None otherwise.
|
||||
pub fn ignore_match<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
mat: Match,
|
||||
) -> Option<bool> {
|
||||
let path = path.as_ref();
|
||||
match mat {
|
||||
Match::Whitelist(ref pat) => {
|
||||
debug!("{} whitelisted by {:?}", path.display(), pat);
|
||||
Some(false)
|
||||
}
|
||||
Match::Ignored(ref pat) => {
|
||||
debug!("{} ignored by {:?}", path.display(), pat);
|
||||
Some(true)
|
||||
}
|
||||
Match::None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// IgnoreDir represents a set of ignore patterns retrieved from a single
|
||||
/// directory.
|
||||
#[derive(Debug)]
|
||||
pub struct IgnoreDir {
|
||||
/// The path to this directory as given.
|
||||
path: PathBuf,
|
||||
/// The directory name, if one exists.
|
||||
name: Option<OsString>,
|
||||
/// A single accumulation of glob patterns for this directory, matched
|
||||
/// using gitignore semantics.
|
||||
///
|
||||
/// This will include patterns from rgignore as well. The patterns are
|
||||
/// ordered so that precedence applies automatically (e.g., rgignore
|
||||
/// patterns procede gitignore patterns).
|
||||
gi: Option<Gitignore>,
|
||||
// TODO(burntsushi): Matching other types of glob patterns that don't
|
||||
// conform to gitignore will probably require refactoring this approach.
|
||||
}
|
||||
|
||||
impl IgnoreDir {
|
||||
/// Create a new matcher for the given directory.
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> Result<IgnoreDir, Error> {
|
||||
IgnoreDir::with_ignore_names(path, IGNORE_NAMES.iter())
|
||||
}
|
||||
|
||||
/// Create a new matcher for the given directory.
|
||||
///
|
||||
/// Don't respect VCS ignore files.
|
||||
pub fn without_vcs<P: AsRef<Path>>(path: P) -> Result<IgnoreDir, Error> {
|
||||
let names = IGNORE_NAMES.iter().filter(|name| **name != ".gitignore");
|
||||
IgnoreDir::with_ignore_names(path, names)
|
||||
}
|
||||
|
||||
/// Create a new IgnoreDir that never matches anything with the given path.
|
||||
pub fn empty<P: AsRef<Path>>(path: P) -> IgnoreDir {
|
||||
IgnoreDir {
|
||||
path: path.as_ref().to_path_buf(),
|
||||
name: file_name(path.as_ref()).map(|s| s.to_os_string()),
|
||||
gi: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new matcher for the given directory using only the ignore
|
||||
/// patterns found in the file names given.
|
||||
///
|
||||
/// If no ignore glob patterns could be found in the directory then `None`
|
||||
/// is returned.
|
||||
///
|
||||
/// Note that the order of the names given is meaningful. Names appearing
|
||||
/// later in the list have precedence over names appearing earlier in the
|
||||
/// list.
|
||||
pub fn with_ignore_names<P: AsRef<Path>, S, I>(
|
||||
path: P,
|
||||
names: I,
|
||||
) -> Result<IgnoreDir, Error>
|
||||
where P: AsRef<Path>, S: AsRef<str>, I: Iterator<Item=S> {
|
||||
let mut id = IgnoreDir::empty(path);
|
||||
let mut ok = false;
|
||||
let mut builder = GitignoreBuilder::new(&id.path);
|
||||
// The ordering here is important. Later globs have higher precedence.
|
||||
for name in names {
|
||||
ok = builder.add_path(id.path.join(name.as_ref())).is_ok() || ok;
|
||||
}
|
||||
if !ok {
|
||||
return Ok(id);
|
||||
}
|
||||
id.gi = Some(try!(builder.build()));
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given file path should be ignored
|
||||
/// according to the globs in this directory. `is_dir` should be true if
|
||||
/// the path refers to a directory and false otherwise.
|
||||
///
|
||||
/// Before matching path, its prefix (as determined by a common suffix
|
||||
/// of this directory) is stripped. If there is
|
||||
/// no common suffix/prefix overlap, then path is assumed to reside
|
||||
/// directly in this directory.
|
||||
///
|
||||
/// If the given path has a `./` prefix then it is stripped before
|
||||
/// matching.
|
||||
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
|
||||
self.gi.as_ref()
|
||||
.map(|gi| gi.matched(path, is_dir))
|
||||
.unwrap_or(Match::None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages a set of overrides provided explicitly by the end user.
|
||||
struct Overrides {
|
||||
gi: Option<Gitignore>,
|
||||
unmatched_pat: Pattern,
|
||||
}
|
||||
|
||||
impl Overrides {
|
||||
/// Creates a new set of overrides from the gitignore matcher provided.
|
||||
/// If no matcher is provided, then the resulting overrides have no effect.
|
||||
fn new(gi: Option<Gitignore>) -> Overrides {
|
||||
Overrides {
|
||||
gi: gi,
|
||||
unmatched_pat: Pattern {
|
||||
from: Path::new("<argv>").to_path_buf(),
|
||||
original: "<none>".to_string(),
|
||||
pat: "<none>".to_string(),
|
||||
whitelist: false,
|
||||
only_dir: false,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a match for the given path against this set of overrides.
|
||||
///
|
||||
/// If there are no overrides, then this always returns Match::None.
|
||||
///
|
||||
/// If there is at least one positive override, then this never returns
|
||||
/// Match::None (and interpreting non-matches as ignored) unless is_dir
|
||||
/// is true.
|
||||
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
|
||||
let path = path.as_ref();
|
||||
self.gi.as_ref()
|
||||
.map(|gi| {
|
||||
let mat = gi.matched_stripped(path, is_dir).invert();
|
||||
if mat.is_none() && !is_dir {
|
||||
if gi.num_ignores() > 0 {
|
||||
return Match::Ignored(&self.unmatched_pat);
|
||||
}
|
||||
}
|
||||
mat
|
||||
})
|
||||
.unwrap_or(Match::None)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
use gitignore::GitignoreBuilder;
|
||||
use super::IgnoreDir;
|
||||
|
||||
macro_rules! ignored_dir {
|
||||
($name:ident, $root:expr, $gi:expr, $xi:expr, $path:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut builder = GitignoreBuilder::new(&$root);
|
||||
builder.add_str($gi).unwrap();
|
||||
builder.add_str($xi).unwrap();
|
||||
let gi = builder.build().unwrap();
|
||||
let id = IgnoreDir {
|
||||
path: Path::new($root).to_path_buf(),
|
||||
name: Path::new($root).file_name().map(|s| {
|
||||
s.to_os_string()
|
||||
}),
|
||||
gi: Some(gi),
|
||||
};
|
||||
assert!(id.matched($path, false).is_ignored());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! not_ignored_dir {
|
||||
($name:ident, $root:expr, $gi:expr, $xi:expr, $path:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut builder = GitignoreBuilder::new(&$root);
|
||||
builder.add_str($gi).unwrap();
|
||||
builder.add_str($xi).unwrap();
|
||||
let gi = builder.build().unwrap();
|
||||
let id = IgnoreDir {
|
||||
path: Path::new($root).to_path_buf(),
|
||||
name: Path::new($root).file_name().map(|s| {
|
||||
s.to_os_string()
|
||||
}),
|
||||
gi: Some(gi),
|
||||
};
|
||||
assert!(!id.matched($path, false).is_ignored());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const ROOT: &'static str = "/home/foobar/rust/rg";
|
||||
|
||||
ignored_dir!(id1, ROOT, "src/main.rs", "", "src/main.rs");
|
||||
ignored_dir!(id2, ROOT, "", "src/main.rs", "src/main.rs");
|
||||
ignored_dir!(id3, ROOT, "!src/main.rs", "*.rs", "src/main.rs");
|
||||
|
||||
not_ignored_dir!(idnot1, ROOT, "*.rs", "!src/main.rs", "src/main.rs");
|
||||
}
|
66
src/main.rs
66
src/main.rs
@ -1,8 +1,8 @@
|
||||
extern crate deque;
|
||||
extern crate docopt;
|
||||
extern crate env_logger;
|
||||
extern crate globset;
|
||||
extern crate grep;
|
||||
extern crate ignore;
|
||||
#[cfg(windows)]
|
||||
extern crate kernel32;
|
||||
#[macro_use]
|
||||
@ -16,8 +16,6 @@ extern crate num_cpus;
|
||||
extern crate regex;
|
||||
extern crate rustc_serialize;
|
||||
extern crate term;
|
||||
extern crate thread_local;
|
||||
extern crate walkdir;
|
||||
#[cfg(windows)]
|
||||
extern crate winapi;
|
||||
|
||||
@ -36,7 +34,7 @@ use deque::{Stealer, Stolen};
|
||||
use grep::Grep;
|
||||
use memmap::{Mmap, Protection};
|
||||
use term::Terminal;
|
||||
use walkdir::DirEntry;
|
||||
use ignore::DirEntry;
|
||||
|
||||
use args::Args;
|
||||
use out::{ColoredTerminal, Out};
|
||||
@ -61,8 +59,6 @@ macro_rules! eprintln {
|
||||
|
||||
mod args;
|
||||
mod atty;
|
||||
mod gitignore;
|
||||
mod ignore;
|
||||
mod out;
|
||||
mod pathutil;
|
||||
mod printer;
|
||||
@ -70,8 +66,6 @@ mod search_buffer;
|
||||
mod search_stream;
|
||||
#[cfg(windows)]
|
||||
mod terminal_win;
|
||||
mod types;
|
||||
mod walk;
|
||||
|
||||
pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;
|
||||
|
||||
@ -101,7 +95,6 @@ fn run(args: Args) -> Result<u64> {
|
||||
if threads == 1 || isone {
|
||||
return run_one_thread(args.clone());
|
||||
}
|
||||
|
||||
let out = Arc::new(Mutex::new(args.out()));
|
||||
let quiet_matched = QuietMatched::new(args.quiet());
|
||||
let mut workers = vec![];
|
||||
@ -126,21 +119,15 @@ fn run(args: Args) -> Result<u64> {
|
||||
workq
|
||||
};
|
||||
let mut paths_searched: u64 = 0;
|
||||
for p in paths {
|
||||
for dent in args.walker() {
|
||||
if quiet_matched.has_match() {
|
||||
break;
|
||||
}
|
||||
if p == Path::new("-") {
|
||||
paths_searched += 1;
|
||||
if dent.is_stdin() {
|
||||
workq.push(Work::Stdin);
|
||||
} else {
|
||||
for ent in try!(args.walker(p)) {
|
||||
if quiet_matched.has_match() {
|
||||
break;
|
||||
}
|
||||
paths_searched += 1;
|
||||
workq.push(Work::File(ent));
|
||||
}
|
||||
workq.push(Work::File(dent));
|
||||
}
|
||||
}
|
||||
if !paths.is_empty() && paths_searched == 0 {
|
||||
@ -165,26 +152,9 @@ fn run_one_thread(args: Arc<Args>) -> Result<u64> {
|
||||
grep: args.grep(),
|
||||
match_count: 0,
|
||||
};
|
||||
let paths = args.paths();
|
||||
let mut term = args.stdout();
|
||||
|
||||
let mut paths_searched: u64 = 0;
|
||||
for p in paths {
|
||||
if args.quiet() && worker.match_count > 0 {
|
||||
break;
|
||||
}
|
||||
if p == Path::new("-") {
|
||||
paths_searched += 1;
|
||||
let mut printer = args.printer(&mut term);
|
||||
if worker.match_count > 0 {
|
||||
if let Some(sep) = args.file_separator() {
|
||||
printer = printer.file_separator(sep);
|
||||
}
|
||||
}
|
||||
worker.do_work(&mut printer, WorkReady::Stdin);
|
||||
} else {
|
||||
for ent in try!(args.walker(p)) {
|
||||
paths_searched += 1;
|
||||
for dent in args.walker() {
|
||||
let mut printer = args.printer(&mut term);
|
||||
if worker.match_count > 0 {
|
||||
if args.quiet() {
|
||||
@ -194,18 +164,21 @@ fn run_one_thread(args: Arc<Args>) -> Result<u64> {
|
||||
printer = printer.file_separator(sep);
|
||||
}
|
||||
}
|
||||
let file = match File::open(ent.path()) {
|
||||
paths_searched += 1;
|
||||
if dent.is_stdin() {
|
||||
worker.do_work(&mut printer, WorkReady::Stdin);
|
||||
} else {
|
||||
let file = match File::open(dent.path()) {
|
||||
Ok(file) => file,
|
||||
Err(err) => {
|
||||
eprintln!("{}: {}", ent.path().display(), err);
|
||||
eprintln!("{}: {}", dent.path().display(), err);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
worker.do_work(&mut printer, WorkReady::DirFile(ent, file));
|
||||
worker.do_work(&mut printer, WorkReady::DirFile(dent, file));
|
||||
}
|
||||
}
|
||||
}
|
||||
if !paths.is_empty() && paths_searched == 0 {
|
||||
if !args.paths().is_empty() && paths_searched == 0 {
|
||||
eprintln!("No files were searched, which means ripgrep probably \
|
||||
applied a filter you didn't expect. \
|
||||
Try running again with --debug.");
|
||||
@ -217,16 +190,9 @@ fn run_files(args: Arc<Args>) -> Result<u64> {
|
||||
let term = args.stdout();
|
||||
let mut printer = args.printer(term);
|
||||
let mut file_count = 0;
|
||||
for p in args.paths() {
|
||||
if p == Path::new("-") {
|
||||
printer.path(&Path::new("<stdin>"));
|
||||
for dent in args.walker() {
|
||||
printer.path(dent.path());
|
||||
file_count += 1;
|
||||
} else {
|
||||
for ent in try!(args.walker(p)) {
|
||||
printer.path(ent.path());
|
||||
file_count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(file_count)
|
||||
}
|
||||
|
@ -8,7 +8,6 @@ with the raw bytes directly.
|
||||
On large repositories (like chromium), this can have a ~25% performance
|
||||
improvement on just listing the files to search (!).
|
||||
*/
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
/// Strip `prefix` from the `path` and return the remainder.
|
||||
@ -19,6 +18,7 @@ pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
|
||||
prefix: &'a P,
|
||||
path: &'a Path,
|
||||
) -> Option<&'a Path> {
|
||||
use std::ffi::OsStr;
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
let prefix = prefix.as_ref().as_os_str().as_bytes();
|
||||
@ -40,79 +40,3 @@ pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
|
||||
) -> Option<&'a Path> {
|
||||
path.strip_prefix(prefix).ok()
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(unix)]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use memchr::memrchr;
|
||||
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
if path.is_empty() {
|
||||
return None;
|
||||
} else if path.len() == 1 && path[0] == b'.' {
|
||||
return None;
|
||||
} else if path.last() == Some(&b'.') {
|
||||
return None;
|
||||
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
|
||||
return None;
|
||||
}
|
||||
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
|
||||
Some(OsStr::from_bytes(&path[last_slash..]))
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(not(unix))]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
path.as_ref().file_name()
|
||||
}
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
#[cfg(unix)]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
if let Some(name) = file_name(path.as_ref()) {
|
||||
name.as_bytes().get(0) == Some(&b'.')
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
#[cfg(not(unix))]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
if let Some(name) = file_name(path.as_ref()) {
|
||||
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this file path is just a file name. i.e., Its parent is
|
||||
/// the empty string.
|
||||
#[cfg(unix)]
|
||||
pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use memchr::memchr;
|
||||
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
memchr(b'/', path).is_none()
|
||||
}
|
||||
|
||||
/// Returns true if this file path is just a file name. i.e., Its parent is
|
||||
/// the empty string.
|
||||
#[cfg(not(unix))]
|
||||
pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool {
|
||||
path.as_ref().parent().map(|p| p.as_os_str().is_empty()).unwrap_or(false)
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ use term::{Attr, Terminal};
|
||||
use term::color;
|
||||
|
||||
use pathutil::strip_prefix;
|
||||
use types::FileTypeDef;
|
||||
use ignore::types::FileTypeDef;
|
||||
|
||||
/// Printer encapsulates all output logic for searching.
|
||||
///
|
||||
@ -168,11 +168,11 @@ impl<W: Terminal + Send> Printer<W> {
|
||||
self.write(def.name().as_bytes());
|
||||
self.write(b": ");
|
||||
let mut first = true;
|
||||
for pat in def.patterns() {
|
||||
for glob in def.globs() {
|
||||
if !first {
|
||||
self.write(b", ");
|
||||
}
|
||||
self.write(pat.as_bytes());
|
||||
self.write(glob.as_bytes());
|
||||
first = false;
|
||||
}
|
||||
self.write_eol();
|
||||
|
458
src/types.rs
458
src/types.rs
@ -1,458 +0,0 @@
|
||||
/*!
|
||||
The types module provides a way of associating glob patterns on file names to
|
||||
file types.
|
||||
*/
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::error::Error as StdError;
|
||||
use std::fmt;
|
||||
use std::path::Path;
|
||||
|
||||
use regex;
|
||||
|
||||
use gitignore::{Match, Pattern};
|
||||
use globset::{self, GlobBuilder, GlobSet, GlobSetBuilder};
|
||||
|
||||
const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("asm", &["*.asm", "*.s", "*.S"]),
|
||||
("awk", &["*.awk"]),
|
||||
("c", &["*.c", "*.h", "*.H"]),
|
||||
("cbor", &["*.cbor"]),
|
||||
("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
|
||||
("cmake", &["*.cmake", "CMakeLists.txt"]),
|
||||
("coffeescript", &["*.coffee"]),
|
||||
("config", &["*.config"]),
|
||||
("cpp", &[
|
||||
"*.C", "*.cc", "*.cpp", "*.cxx",
|
||||
"*.h", "*.H", "*.hh", "*.hpp",
|
||||
]),
|
||||
("csharp", &["*.cs"]),
|
||||
("css", &["*.css"]),
|
||||
("cython", &["*.pyx"]),
|
||||
("dart", &["*.dart"]),
|
||||
("d", &["*.d"]),
|
||||
("elisp", &["*.el"]),
|
||||
("erlang", &["*.erl", "*.hrl"]),
|
||||
("fortran", &[
|
||||
"*.f", "*.F", "*.f77", "*.F77", "*.pfo",
|
||||
"*.f90", "*.F90", "*.f95", "*.F95",
|
||||
]),
|
||||
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
|
||||
("go", &["*.go"]),
|
||||
("groovy", &["*.groovy", "*.gradle"]),
|
||||
("hbs", &["*.hbs"]),
|
||||
("haskell", &["*.hs", "*.lhs"]),
|
||||
("html", &["*.htm", "*.html"]),
|
||||
("java", &["*.java"]),
|
||||
("jinja", &["*.jinja", "*.jinja2"]),
|
||||
("js", &[
|
||||
"*.js", "*.jsx", "*.vue",
|
||||
]),
|
||||
("json", &["*.json"]),
|
||||
("jsonl", &["*.jsonl"]),
|
||||
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
||||
("lua", &["*.lua"]),
|
||||
("m4", &["*.ac", "*.m4"]),
|
||||
("make", &["gnumakefile", "Gnumakefile", "makefile", "Makefile", "*.mk"]),
|
||||
("markdown", &["*.md"]),
|
||||
("md", &["*.md"]),
|
||||
("matlab", &["*.m"]),
|
||||
("mk", &["mkfile"]),
|
||||
("ml", &["*.ml"]),
|
||||
("nim", &["*.nim"]),
|
||||
("objc", &["*.h", "*.m"]),
|
||||
("objcpp", &["*.h", "*.mm"]),
|
||||
("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
|
||||
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]),
|
||||
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
|
||||
("py", &["*.py", "*.pyx"]),
|
||||
("readme", &["README*", "*README"]),
|
||||
("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
|
||||
("rst", &["*.rst"]),
|
||||
("ruby", &["*.rb"]),
|
||||
("rust", &["*.rs"]),
|
||||
("scala", &["*.scala"]),
|
||||
("sh", &["*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh"]),
|
||||
("spark", &["*.spark"]),
|
||||
("sql", &["*.sql"]),
|
||||
("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
|
||||
("swift", &["*.swift"]),
|
||||
("tcl", &["*.tcl"]),
|
||||
("tex", &["*.tex", "*.cls", "*.sty"]),
|
||||
("ts", &["*.ts", "*.tsx"]),
|
||||
("txt", &["*.txt"]),
|
||||
("toml", &["*.toml", "Cargo.lock"]),
|
||||
("vala", &["*.vala"]),
|
||||
("vb", &["*.vb"]),
|
||||
("vimscript", &["*.vim"]),
|
||||
("xml", &["*.xml"]),
|
||||
("yacc", &["*.y"]),
|
||||
("yaml", &["*.yaml", "*.yml"]),
|
||||
("zsh", &["*.zsh", ".zshenv", ".zlogin", ".zprofile", ".zshrc"]),
|
||||
];
|
||||
|
||||
/// Describes all the possible failure conditions for building a file type
|
||||
/// matcher.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// We tried to select (or negate) a file type that is not defined.
|
||||
UnrecognizedFileType(String),
|
||||
/// A user specified file type definition could not be parsed.
|
||||
InvalidDefinition,
|
||||
/// There was an error building the matcher (probably a bad glob).
|
||||
Glob(globset::Error),
|
||||
/// There was an error compiling a glob as a regex.
|
||||
Regex(regex::Error),
|
||||
}
|
||||
|
||||
impl StdError for Error {
|
||||
fn description(&self) -> &str {
|
||||
match *self {
|
||||
Error::UnrecognizedFileType(_) => "unrecognized file type",
|
||||
Error::InvalidDefinition => "invalid definition",
|
||||
Error::Glob(ref err) => err.description(),
|
||||
Error::Regex(ref err) => err.description(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Error::UnrecognizedFileType(ref ty) => {
|
||||
write!(f, "unrecognized file type: {}", ty)
|
||||
}
|
||||
Error::InvalidDefinition => {
|
||||
write!(f, "invalid definition (format is type:glob, e.g., \
|
||||
html:*.html)")
|
||||
}
|
||||
Error::Glob(ref err) => err.fmt(f),
|
||||
Error::Regex(ref err) => err.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<globset::Error> for Error {
|
||||
fn from(err: globset::Error) -> Error {
|
||||
Error::Glob(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<regex::Error> for Error {
|
||||
fn from(err: regex::Error) -> Error {
|
||||
Error::Regex(err)
|
||||
}
|
||||
}
|
||||
|
||||
/// A single file type definition.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FileTypeDef {
|
||||
name: String,
|
||||
pats: Vec<String>,
|
||||
}
|
||||
|
||||
impl FileTypeDef {
|
||||
/// Return the name of this file type.
|
||||
pub fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
|
||||
/// Return the glob patterns used to recognize this file type.
|
||||
pub fn patterns(&self) -> &[String] {
|
||||
&self.pats
|
||||
}
|
||||
}
|
||||
|
||||
/// Types is a file type matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Types {
|
||||
defs: Vec<FileTypeDef>,
|
||||
selected: Option<GlobSet>,
|
||||
negated: Option<GlobSet>,
|
||||
has_selected: bool,
|
||||
unmatched_pat: Pattern,
|
||||
}
|
||||
|
||||
impl Types {
|
||||
/// Creates a new file type matcher from the given Gitignore matcher. If
|
||||
/// not Gitignore matcher is provided, then the file type matcher has no
|
||||
/// effect.
|
||||
///
|
||||
/// If has_selected is true, then at least one file type was selected.
|
||||
/// Therefore, any non-matches should be ignored.
|
||||
fn new(
|
||||
selected: Option<GlobSet>,
|
||||
negated: Option<GlobSet>,
|
||||
has_selected: bool,
|
||||
defs: Vec<FileTypeDef>,
|
||||
) -> Types {
|
||||
Types {
|
||||
defs: defs,
|
||||
selected: selected,
|
||||
negated: negated,
|
||||
has_selected: has_selected,
|
||||
unmatched_pat: Pattern {
|
||||
from: Path::new("<filetype>").to_path_buf(),
|
||||
original: "<N/A>".to_string(),
|
||||
pat: "<N/A>".to_string(),
|
||||
whitelist: false,
|
||||
only_dir: false,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new file type matcher that never matches.
|
||||
pub fn empty() -> Types {
|
||||
Types::new(None, None, false, vec![])
|
||||
}
|
||||
|
||||
/// Returns a match for the given path against this file type matcher.
|
||||
///
|
||||
/// The path is considered whitelisted if it matches a selected file type.
|
||||
/// The path is considered ignored if it matched a negated file type.
|
||||
/// If at least one file type is selected and path doesn't match, then
|
||||
/// the path is also considered ignored.
|
||||
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
|
||||
// If we don't have any matcher, then we can't do anything.
|
||||
if self.negated.is_none() && self.selected.is_none() {
|
||||
return Match::None;
|
||||
}
|
||||
// File types don't apply to directories.
|
||||
if is_dir {
|
||||
return Match::None;
|
||||
}
|
||||
let path = path.as_ref();
|
||||
let name = match path.file_name() {
|
||||
Some(name) => name.to_string_lossy(),
|
||||
None if self.has_selected => {
|
||||
return Match::Ignored(&self.unmatched_pat);
|
||||
}
|
||||
None => {
|
||||
return Match::None;
|
||||
}
|
||||
};
|
||||
if self.negated.as_ref().map(|s| s.is_match(&*name)).unwrap_or(false) {
|
||||
return Match::Ignored(&self.unmatched_pat);
|
||||
}
|
||||
if self.selected.as_ref().map(|s|s.is_match(&*name)).unwrap_or(false) {
|
||||
return Match::Whitelist(&self.unmatched_pat);
|
||||
}
|
||||
if self.has_selected {
|
||||
Match::Ignored(&self.unmatched_pat)
|
||||
} else {
|
||||
Match::None
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the set of current file type definitions.
|
||||
pub fn definitions(&self) -> &[FileTypeDef] {
|
||||
&self.defs
|
||||
}
|
||||
}
|
||||
|
||||
/// TypesBuilder builds a type matcher from a set of file type definitions and
|
||||
/// a set of file type selections.
|
||||
pub struct TypesBuilder {
|
||||
types: HashMap<String, Vec<String>>,
|
||||
selected: Vec<String>,
|
||||
negated: Vec<String>,
|
||||
}
|
||||
|
||||
impl TypesBuilder {
|
||||
/// Create a new builder for a file type matcher.
|
||||
pub fn new() -> TypesBuilder {
|
||||
TypesBuilder {
|
||||
types: HashMap::new(),
|
||||
selected: vec![],
|
||||
negated: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the current set of file type definitions *and* selections into
|
||||
/// a file type matcher.
|
||||
pub fn build(&self) -> Result<Types, Error> {
|
||||
let selected_globs =
|
||||
if self.selected.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let mut bset = GlobSetBuilder::new();
|
||||
for name in &self.selected {
|
||||
let globs = match self.types.get(name) {
|
||||
Some(globs) => globs,
|
||||
None => {
|
||||
let msg = name.to_string();
|
||||
return Err(Error::UnrecognizedFileType(msg));
|
||||
}
|
||||
};
|
||||
for glob in globs {
|
||||
let pat = try!(
|
||||
GlobBuilder::new(glob)
|
||||
.literal_separator(true).build());
|
||||
bset.add(pat);
|
||||
}
|
||||
}
|
||||
Some(try!(bset.build()))
|
||||
};
|
||||
let negated_globs =
|
||||
if self.negated.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let mut bset = GlobSetBuilder::new();
|
||||
for name in &self.negated {
|
||||
let globs = match self.types.get(name) {
|
||||
Some(globs) => globs,
|
||||
None => {
|
||||
let msg = name.to_string();
|
||||
return Err(Error::UnrecognizedFileType(msg));
|
||||
}
|
||||
};
|
||||
for glob in globs {
|
||||
let pat = try!(
|
||||
GlobBuilder::new(glob)
|
||||
.literal_separator(true).build());
|
||||
bset.add(pat);
|
||||
}
|
||||
}
|
||||
Some(try!(bset.build()))
|
||||
};
|
||||
Ok(Types::new(
|
||||
selected_globs,
|
||||
negated_globs,
|
||||
!self.selected.is_empty(),
|
||||
self.definitions(),
|
||||
))
|
||||
}
|
||||
|
||||
/// Return the set of current file type definitions.
|
||||
pub fn definitions(&self) -> Vec<FileTypeDef> {
|
||||
let mut defs = vec![];
|
||||
for (ref name, ref pats) in &self.types {
|
||||
let mut pats = pats.to_vec();
|
||||
pats.sort();
|
||||
defs.push(FileTypeDef {
|
||||
name: name.to_string(),
|
||||
pats: pats,
|
||||
});
|
||||
}
|
||||
defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
|
||||
defs
|
||||
}
|
||||
|
||||
/// Select the file type given by `name`.
|
||||
///
|
||||
/// If `name` is `all`, then all file types are selected.
|
||||
pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
if name == "all" {
|
||||
for name in self.types.keys() {
|
||||
self.selected.push(name.to_string());
|
||||
}
|
||||
} else {
|
||||
self.selected.push(name.to_string());
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Ignore the file type given by `name`.
|
||||
///
|
||||
/// If `name` is `all`, then all file types are negated.
|
||||
pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
if name == "all" {
|
||||
for name in self.types.keys() {
|
||||
self.negated.push(name.to_string());
|
||||
}
|
||||
} else {
|
||||
self.negated.push(name.to_string());
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Clear any file type definitions for the type given.
|
||||
pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
self.types.remove(name);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a new file type definition. `name` can be arbitrary and `pat`
|
||||
/// should be a glob recognizing file paths belonging to the `name` type.
|
||||
pub fn add(&mut self, name: &str, pat: &str) -> &mut TypesBuilder {
|
||||
self.types.entry(name.to_string())
|
||||
.or_insert(vec![]).push(pat.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a new file type definition specified in string form. The format
|
||||
/// is `name:glob`. Names may not include a colon.
|
||||
pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
|
||||
let name: String = def.chars().take_while(|&c| c != ':').collect();
|
||||
let pat: String = def.chars().skip(name.chars().count() + 1).collect();
|
||||
if name.is_empty() || pat.is_empty() {
|
||||
return Err(Error::InvalidDefinition);
|
||||
}
|
||||
self.add(&name, &pat);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a set of default file type definitions.
|
||||
pub fn add_defaults(&mut self) -> &mut TypesBuilder {
|
||||
for &(name, exts) in TYPE_EXTENSIONS {
|
||||
for ext in exts {
|
||||
self.add(name, ext);
|
||||
}
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::TypesBuilder;
|
||||
|
||||
macro_rules! matched {
|
||||
($name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr) => {
|
||||
matched!($name, $types, $sel, $selnot, $path, true);
|
||||
};
|
||||
(not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr) => {
|
||||
matched!($name, $types, $sel, $selnot, $path, false);
|
||||
};
|
||||
($name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr, $matched:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut btypes = TypesBuilder::new();
|
||||
for tydef in $types {
|
||||
btypes.add_def(tydef).unwrap();
|
||||
}
|
||||
for sel in $sel {
|
||||
btypes.select(sel);
|
||||
}
|
||||
for selnot in $selnot {
|
||||
btypes.negate(selnot);
|
||||
}
|
||||
let types = btypes.build().unwrap();
|
||||
let mat = types.matched($path, false);
|
||||
assert_eq!($matched, !mat.is_ignored());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn types() -> Vec<&'static str> {
|
||||
vec![
|
||||
"html:*.html",
|
||||
"html:*.htm",
|
||||
"rust:*.rs",
|
||||
"js:*.js",
|
||||
]
|
||||
}
|
||||
|
||||
matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
|
||||
matched!(match2, types(), vec!["html"], vec![], "index.html");
|
||||
matched!(match3, types(), vec!["html"], vec![], "index.htm");
|
||||
matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
|
||||
matched!(match5, types(), vec![], vec![], "index.html");
|
||||
matched!(match6, types(), vec![], vec!["rust"], "index.html");
|
||||
|
||||
matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
|
||||
matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
|
||||
}
|
140
src/walk.rs
140
src/walk.rs
@ -1,140 +0,0 @@
|
||||
/*!
|
||||
The walk module implements a recursive directory iterator (using the `walkdir`)
|
||||
crate that can efficiently skip and ignore files and directories specified in
|
||||
a user's ignore patterns.
|
||||
*/
|
||||
|
||||
use walkdir::{self, DirEntry, WalkDir, WalkDirIterator};
|
||||
|
||||
use ignore::Ignore;
|
||||
|
||||
/// Iter is a recursive directory iterator over file paths in a directory.
|
||||
/// Only file paths should be searched are yielded.
|
||||
pub struct Iter {
|
||||
ig: Ignore,
|
||||
it: WalkEventIter,
|
||||
}
|
||||
|
||||
impl Iter {
|
||||
/// Create a new recursive directory iterator using the ignore patterns
|
||||
/// and walkdir iterator given.
|
||||
pub fn new(ig: Ignore, wd: WalkDir) -> Iter {
|
||||
Iter {
|
||||
ig: ig,
|
||||
it: WalkEventIter::from(wd),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this entry should be skipped.
|
||||
#[inline(always)]
|
||||
fn skip_entry(&self, ent: &DirEntry) -> bool {
|
||||
if ent.depth() == 0 {
|
||||
// Never skip the root directory.
|
||||
return false;
|
||||
}
|
||||
if self.ig.ignored(ent.path(), ent.file_type().is_dir()) {
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Iter {
|
||||
type Item = DirEntry;
|
||||
|
||||
#[inline(always)]
|
||||
fn next(&mut self) -> Option<DirEntry> {
|
||||
while let Some(ev) = self.it.next() {
|
||||
match ev {
|
||||
Err(err) => {
|
||||
eprintln!("{}", err);
|
||||
}
|
||||
Ok(WalkEvent::Exit) => {
|
||||
self.ig.pop();
|
||||
}
|
||||
Ok(WalkEvent::Dir(ent)) => {
|
||||
if self.skip_entry(&ent) {
|
||||
self.it.it.skip_current_dir();
|
||||
// Still need to push this on the stack because we'll
|
||||
// get a WalkEvent::Exit event for this dir. We don't
|
||||
// care if it errors though.
|
||||
let _ = self.ig.push(ent.path());
|
||||
continue;
|
||||
}
|
||||
if let Err(err) = self.ig.push(ent.path()) {
|
||||
eprintln!("{}", err);
|
||||
self.it.it.skip_current_dir();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Ok(WalkEvent::File(ent)) => {
|
||||
if self.skip_entry(&ent) {
|
||||
continue;
|
||||
}
|
||||
// If this isn't actually a file (e.g., a symlink), then
|
||||
// skip it.
|
||||
if !ent.file_type().is_file() {
|
||||
continue;
|
||||
}
|
||||
return Some(ent);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// WalkEventIter transforms a WalkDir iterator into an iterator that more
|
||||
/// accurately describes the directory tree. Namely, it emits events that are
|
||||
/// one of three types: directory, file or "exit." An "exit" event means that
|
||||
/// the entire contents of a directory have been enumerated.
|
||||
struct WalkEventIter {
|
||||
depth: usize,
|
||||
it: walkdir::Iter,
|
||||
next: Option<Result<DirEntry, walkdir::Error>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum WalkEvent {
|
||||
Dir(DirEntry),
|
||||
File(DirEntry),
|
||||
Exit,
|
||||
}
|
||||
|
||||
impl From<WalkDir> for WalkEventIter {
|
||||
fn from(it: WalkDir) -> WalkEventIter {
|
||||
WalkEventIter { depth: 0, it: it.into_iter(), next: None }
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for WalkEventIter {
|
||||
type Item = walkdir::Result<WalkEvent>;
|
||||
|
||||
#[inline(always)]
|
||||
fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
|
||||
let dent = self.next.take().or_else(|| self.it.next());
|
||||
let depth = match dent {
|
||||
None => 0,
|
||||
Some(Ok(ref dent)) => dent.depth(),
|
||||
Some(Err(ref err)) => err.depth(),
|
||||
};
|
||||
if depth < self.depth {
|
||||
self.depth -= 1;
|
||||
self.next = dent;
|
||||
return Some(Ok(WalkEvent::Exit));
|
||||
}
|
||||
self.depth = depth;
|
||||
match dent {
|
||||
None => None,
|
||||
Some(Err(err)) => Some(Err(err)),
|
||||
Some(Ok(dent)) => {
|
||||
if dent.file_type().is_dir() {
|
||||
self.depth += 1;
|
||||
Some(Ok(WalkEvent::Dir(dent)))
|
||||
} else {
|
||||
Some(Ok(WalkEvent::File(dent)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -54,6 +54,20 @@ fn path(unix: &str) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
fn paths(unix: &[&str]) -> Vec<String> {
|
||||
let mut xs: Vec<_> = unix.iter().map(|s| path(s)).collect();
|
||||
xs.sort();
|
||||
xs
|
||||
}
|
||||
|
||||
fn paths_from_stdout(stdout: String) -> Vec<String> {
|
||||
let mut paths: Vec<_> = stdout.lines().map(|s| {
|
||||
s.split(":").next().unwrap().to_string()
|
||||
}).collect();
|
||||
paths.sort();
|
||||
paths
|
||||
}
|
||||
|
||||
fn sort_lines(lines: &str) -> String {
|
||||
let mut lines: Vec<String> =
|
||||
lines.trim().lines().map(|s| s.to_owned()).collect();
|
||||
@ -864,6 +878,74 @@ be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/45
|
||||
sherlock!(feature_45_relative_cwd, "test", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".not-an-ignore", "foo\n/bar");
|
||||
wd.create_dir("bar");
|
||||
wd.create_dir("baz/bar");
|
||||
wd.create_dir("baz/baz/bar");
|
||||
wd.create("bar/test", "test");
|
||||
wd.create("baz/bar/test", "test");
|
||||
wd.create("baz/baz/bar/test", "test");
|
||||
wd.create("baz/foo", "test");
|
||||
wd.create("baz/test", "test");
|
||||
wd.create("foo", "test");
|
||||
wd.create("test", "test");
|
||||
|
||||
// First, get a baseline without applying ignore rules.
|
||||
let lines = paths_from_stdout(wd.stdout(&mut cmd));
|
||||
assert_eq!(lines, paths(&[
|
||||
"bar/test", "baz/bar/test", "baz/baz/bar/test", "baz/foo",
|
||||
"baz/test", "foo", "test",
|
||||
]));
|
||||
|
||||
// Now try again with the ignore file activated.
|
||||
cmd.arg("--ignore-file").arg(".not-an-ignore");
|
||||
let lines = paths_from_stdout(wd.stdout(&mut cmd));
|
||||
assert_eq!(lines, paths(&[
|
||||
"baz/bar/test", "baz/baz/bar/test", "baz/test", "test",
|
||||
]));
|
||||
|
||||
// Now do it again, but inside the baz directory.
|
||||
// Since the ignore file is interpreted relative to the CWD, this will
|
||||
// cause the /bar anchored pattern to filter out baz/bar, which is a
|
||||
// subtle difference between true parent ignore files and manually
|
||||
// specified ignore files.
|
||||
let mut cmd = wd.command();
|
||||
cmd.arg("test").arg(".").arg("--ignore-file").arg("../.not-an-ignore");
|
||||
cmd.current_dir(wd.path().join("baz"));
|
||||
let lines = paths_from_stdout(wd.stdout(&mut cmd));
|
||||
assert_eq!(lines, paths(&["baz/bar/test", "test"]));
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/45
|
||||
sherlock!(feature_45_precedence_with_others, "test", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".not-an-ignore", "*.log");
|
||||
wd.create(".ignore", "!imp.log");
|
||||
wd.create("imp.log", "test");
|
||||
wd.create("wat.log", "test");
|
||||
|
||||
cmd.arg("--ignore-file").arg(".not-an-ignore");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "imp.log:test\n");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/45
|
||||
sherlock!(feature_45_precedence_internal, "test", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".not-an-ignore1", "*.log");
|
||||
wd.create(".not-an-ignore2", "!imp.log");
|
||||
wd.create("imp.log", "test");
|
||||
wd.create("wat.log", "test");
|
||||
|
||||
cmd.arg("--ignore-file").arg(".not-an-ignore1");
|
||||
cmd.arg("--ignore-file").arg(".not-an-ignore2");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "imp.log:test\n");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/68
|
||||
clean!(feature_68_no_ignore_vcs, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".gitignore", "foo");
|
||||
|
Reference in New Issue
Block a user