diff --git a/Cargo.toml b/Cargo.toml index 15b268c9..9a19a660 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,8 +19,11 @@ path = "src/main.rs" name = "xrep" [dependencies] +crossbeam = "0.2" docopt = "0.6" +env_logger = "0.3" grep = { version = "0.1", path = "grep" } +log = "0.3" memchr = "0.1" memmap = "0.2" num_cpus = "1" diff --git a/grep/src/lib.rs b/grep/src/lib.rs index 2f225c61..d57b8fb0 100644 --- a/grep/src/lib.rs +++ b/grep/src/lib.rs @@ -12,7 +12,7 @@ use std::error; use std::fmt; use std::result; -pub use search::{Grep, GrepBuilder}; +pub use search::{Grep, GrepBuilder, Iter, Match}; mod literals; mod nonl; diff --git a/src/gitignore.rs b/src/gitignore.rs index f39cfc48..ffaf5bb3 100644 --- a/src/gitignore.rs +++ b/src/gitignore.rs @@ -136,7 +136,8 @@ impl Gitignore { pub fn matched_utf8(&self, path: &str, is_dir: bool) -> Match { // A single regex with a bunch of alternations of glob patterns is // unfortunately typically faster than a regex, so we use it as a - // first pass filter. + // first pass filter. We still need to run the RegexSet to most + // recently defined glob that matched. if !self.set.is_match(path) { return Match::None; } @@ -145,9 +146,9 @@ impl Gitignore { Some(i) => &self.patterns[i], }; if pat.whitelist { - Match::Whitelist + Match::Whitelist(&pat) } else if !pat.only_dir || is_dir { - Match::Ignored + Match::Ignored(&pat) } else { Match::None } @@ -155,22 +156,25 @@ impl Gitignore { } /// The result of a glob match. +/// +/// The lifetime `'a` refers to the lifetime of the pattern that resulted in +/// a match (whether ignored or whitelisted). #[derive(Clone, Debug)] -pub enum Match { +pub enum Match<'a> { /// The path didn't match any glob in the gitignore file. None, /// The last glob matched indicates the path should be ignored. - Ignored, + Ignored(&'a Pattern), /// The last glob matched indicates the path should be whitelisted. - Whitelist, + Whitelist(&'a Pattern), } -impl Match { +impl<'a> Match<'a> { /// Returns true if the match result implies the path should be ignored. pub fn is_ignored(&self) -> bool { match *self { - Match::Ignored => true, - Match::None | Match::Whitelist => false, + Match::Ignored(_) => true, + Match::None | Match::Whitelist(_) => false, } } } @@ -186,11 +190,18 @@ pub struct GitignoreBuilder { /// Pattern represents a single pattern in a gitignore file. It doesn't /// know how to do glob matching directly, but it does store additional /// options on a pattern, such as whether it's whitelisted. -#[derive(Clone, Debug, Default)] -struct Pattern { - pat: String, - whitelist: bool, // prefix of '!' - only_dir: bool, // suffix of '/' +#[derive(Clone, Debug)] +pub struct Pattern { + /// The file path that this pattern was extracted from (may be empty). + pub from: PathBuf, + /// The original glob pattern string. + pub original: String, + /// The actual glob pattern string used to convert to a regex. + pub pat: String, + /// Whether this is a whitelisted pattern or not. + pub whitelist: bool, + /// Whether this pattern should only match directories or not. + pub only_dir: bool, } impl GitignoreBuilder { @@ -222,7 +233,7 @@ impl GitignoreBuilder { let rdr = io::BufReader::new(try!(File::open(&path))); // println!("adding ignores from: {}", path.as_ref().display()); for line in rdr.lines() { - try!(self.add(&try!(line))); + try!(self.add(&path, &try!(line))); } Ok(()) } @@ -230,7 +241,7 @@ impl GitignoreBuilder { /// Add each pattern line from the string given. pub fn add_str(&mut self, gitignore: &str) -> Result<(), Error> { for line in gitignore.lines() { - try!(self.add(line)); + try!(self.add("", line)); } Ok(()) } @@ -238,11 +249,21 @@ impl GitignoreBuilder { /// Add a line from a gitignore file to this builder. /// /// If the line could not be parsed as a glob, then an error is returned. - pub fn add(&mut self, mut line: &str) -> Result<(), Error> { + pub fn add>( + &mut self, + from: P, + mut line: &str, + ) -> Result<(), Error> { if line.is_empty() { return Ok(()); } - let mut pat = Pattern::default(); + let mut pat = Pattern { + from: from.as_ref().to_path_buf(), + original: line.to_string(), + pat: String::new(), + whitelist: false, + only_dir: false, + }; let mut opts = glob::MatchOptions::default(); let has_slash = line.chars().any(|c| c == '/'); // If the line starts with an escaped '!', then remove the escape. @@ -352,6 +373,7 @@ mod tests { ignored!(ig22, ROOT, r"\#foo", "#foo"); ignored!(ig23, ROOT, "foo", "./foo"); ignored!(ig24, ROOT, "target", "grep/target"); + ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock"); not_ignored!(ignot1, ROOT, "amonths", "months"); not_ignored!(ignot2, ROOT, "monthsa", "months"); diff --git a/src/glob.rs b/src/glob.rs index c1d28809..be0d3fd8 100644 --- a/src/glob.rs +++ b/src/glob.rs @@ -1,7 +1,7 @@ /*! -The glob submodule provides standard shell globbing, but is specifically -implemented by converting glob syntax to regular expressions. The reasoning -is two fold: +The glob module provides standard shell globbing, but is specifically +implemented by converting glob syntax to regular expressions. The reasoning is +two fold: 1. The regex library is *really* fast. Regaining performance in a distinct implementation of globbing is non-trivial. diff --git a/src/ignore.rs b/src/ignore.rs index 08b4541d..a91b75a2 100644 --- a/src/ignore.rs +++ b/src/ignore.rs @@ -56,19 +56,40 @@ pub struct Ignore { /// A stack of ignore patterns at each directory level of traversal. /// A directory that contributes no ignore patterns is `None`. stack: Vec>, - // TODO(burntsushi): Add other patterns from the command line here. + ignore_hidden: bool, } impl Ignore { /// Create an empty set of ignore patterns. pub fn new() -> Ignore { - Ignore { stack: vec![] } + Ignore { + stack: vec![], + ignore_hidden: true, + } + } + + /// Set whether hidden files/folders should be ignored (defaults to true). + pub fn ignore_hidden(&mut self, yes: bool) -> &mut Ignore { + self.ignore_hidden = yes; + self } /// Add a directory to the stack. + /// + /// Note that even if this returns an error, the directory is added to the + /// stack (and therefore should be popped). pub fn push>(&mut self, path: P) -> Result<(), Error> { - self.stack.push(try!(IgnoreDir::new(path))); - Ok(()) + match IgnoreDir::new(path) { + Ok(id) => { + self.stack.push(id); + Ok(()) + } + Err(err) => { + // Don't leave the stack in an inconsistent state. + self.stack.push(None); + Err(err) + } + } } /// Pop a directory from the stack. @@ -81,10 +102,19 @@ impl Ignore { /// Returns true if and only if the given file path should be ignored. pub fn ignored>(&self, path: P, is_dir: bool) -> bool { let path = path.as_ref(); + if self.ignore_hidden && is_hidden(&path) { + return true; + } for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) { match id.matched(path, is_dir) { - Match::Whitelist => return false, - Match::Ignored => return true, + Match::Whitelist(ref pat) => { + debug!("{} whitelisted by {:?}", path.display(), pat); + return false; + } + Match::Ignored(ref pat) => { + debug!("{} ignored by {:?}", path.display(), pat); + return true; + } Match::None => {} } } @@ -150,6 +180,14 @@ impl IgnoreDir { } } +fn is_hidden>(path: P) -> bool { + if let Some(name) = path.as_ref().file_name() { + name.to_str().map(|s| s.starts_with(".")).unwrap_or(false) + } else { + false + } +} + #[cfg(test)] mod tests { use std::path::Path; diff --git a/src/main.rs b/src/main.rs index da0d8efc..31da883c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,7 +1,11 @@ #![allow(dead_code, unused_variables)] +extern crate crossbeam; extern crate docopt; +extern crate env_logger; extern crate grep; +#[macro_use] +extern crate log; extern crate memchr; extern crate memmap; extern crate num_cpus; @@ -10,27 +14,22 @@ extern crate regex_syntax as syntax; extern crate rustc_serialize; extern crate walkdir; -const USAGE: &'static str = " -Usage: xrep [options] ... - -xrep is like the silver searcher, but faster than it and grep. - -At least one path is required. Searching stdin isn't yet supported. - -Options: - -c, --count Suppress normal output and show count of line matches. -"; - use std::error::Error; use std::io::{self, Write}; +use std::path::PathBuf; use std::process; use std::result; +use std::sync::Arc; +use std::thread; +use crossbeam::sync::{MsQueue, TreiberStack}; use docopt::Docopt; -use grep::Grep; -use walkdir::{WalkDir, WalkDirIterator}; +use grep::{Grep, GrepBuilder}; +use walkdir::WalkDir; use ignore::Ignore; +use printer::Printer; +use search::Searcher; macro_rules! errored { ($($tt:tt)*) => { @@ -48,21 +47,54 @@ macro_rules! eprintln { mod gitignore; mod glob; mod ignore; +mod printer; +mod search; +mod walk; -pub type Result = result::Result>; +const USAGE: &'static str = " +Usage: xrep [options] [ ...] + +xrep is like the silver searcher and grep, but faster than both. + +WARNING: Searching stdin isn't yet supported. + +Options: + -c, --count Suppress normal output and show count of line matches. + --debug Show debug messages. + --files Print each file that would be searched + (but don't search). + -L, --follow Follow symlinks. + --hidden Search hidden directories and files. + -i, --ignore-case Case insensitive search. + --threads ARG The number of threads to use. Defaults to the number + of logical CPUs. [default: 0] +"; #[derive(RustcDecodable)] struct Args { arg_pattern: String, arg_path: Vec, flag_count: bool, + flag_debug: bool, + flag_files: bool, + flag_follow: bool, + flag_hidden: bool, + flag_ignore_case: bool, + flag_threads: usize, } +impl Args { + fn printer(&self, wtr: W) -> Printer { + Printer::new(wtr) + } +} + +pub type Result = result::Result>; + fn main() { let args: Args = Docopt::new(USAGE).and_then(|d| d.decode()) .unwrap_or_else(|e| e.exit()); - match args.run() { - Ok(count) if count == 0 => process::exit(1), + match real_main(args) { Ok(_) => process::exit(0), Err(err) => { let _ = writeln!(&mut io::stderr(), "{}", err); @@ -71,118 +103,193 @@ fn main() { } } -impl Args { - fn run(&self) -> Result { - if self.arg_path.is_empty() { - return errored!("Searching stdin is not currently supported."); +fn real_main(args: Args) -> Result<()> { + let mut logb = env_logger::LogBuilder::new(); + if args.flag_debug { + logb.filter(None, log::LogLevelFilter::Debug); + } else { + logb.filter(None, log::LogLevelFilter::Warn); + } + if let Err(err) = logb.init() { + return errored!("failed to initialize logger: {}", err); + } + + let mut main = Main::new(args); + try!(main.run_workers()); + let writer = main.run_writer(); + main.scan(); + main.finish_workers(); + main.chan_results.push(Message::Quit); + writer.join().unwrap(); + Ok(()) +} + +type ChanWork = Arc>>; + +type ChanResults = Arc>>>; + +enum Message { + Some(T), + Quit, +} + +struct Main { + args: Arc, + chan_work: ChanWork, + chan_results: ChanResults, + bufs: Arc, + workers: Vec>, +} + +impl Main { + fn new(mut args: Args) -> Main { + if args.arg_path.is_empty() { + args.arg_path.push("./".to_string()); } - let mut stdout = io::BufWriter::new(io::stdout()); - let mut ig = Ignore::new(); - for p in &self.arg_path { - let mut it = WalkEventIter::from(WalkDir::new(p)); - loop { - let ev = match it.next() { - None => break, - Some(Ok(ev)) => ev, - Some(Err(err)) => { - eprintln!("{}", err); - continue; - } - }; - match ev { - WalkEvent::Exit => { - ig.pop(); - } - WalkEvent::Dir(ent) => { - try!(ig.push(ent.path())); - if is_hidden(&ent) || ig.ignored(ent.path(), true) { - // if is_hidden(&ent) { - it.it.skip_current_dir(); - continue; - } - } - WalkEvent::File(ent) => { - if is_hidden(&ent) || ig.ignored(ent.path(), false) { - // if is_hidden(&ent) { - continue; - } - let _ = writeln!( - &mut stdout, "{}", ent.path().display()); - } + Main { + args: Arc::new(args), + chan_work: Arc::new(MsQueue::new()), + chan_results: Arc::new(MsQueue::new()), + bufs: Arc::new(Bufs::new()), + workers: vec![], + } + } + + fn scan(&mut self) { + for p in &self.args.arg_path { + if p == "-" { + eprintln!("searching isn't yet supported"); + continue; + } + let wd = WalkDir::new(p).follow_links(self.args.flag_follow); + let mut ig = Ignore::new(); + ig.ignore_hidden(!self.args.flag_hidden); + + for ent in walk::Iter::new(ig, wd) { + let mut path = ent.path(); + if let Ok(p) = path.strip_prefix("./") { + path = p; } + self.chan_work.push(Message::Some(Work { + path: path.to_path_buf(), + out: self.bufs.pop(), + })); } } - Ok(0) } - fn run_mmap_count_only(&self, searcher: &Grep) -> Result { - use memmap::{Mmap, Protection}; - - assert!(self.arg_path.len() == 1); - let mut wtr = io::BufWriter::new(io::stdout()); - let mmap = try!(Mmap::open_path(&self.arg_path[0], Protection::Read)); - let text = unsafe { mmap.as_slice() }; - let count = searcher.iter(text).count() as u64; - try!(writeln!(wtr, "{}", count)); - Ok(count) - } -} - -/// WalkEventIter transforms a WalkDir iterator into an iterator that more -/// accurately describes the directory tree. Namely, it emits events that are -/// one of three types: directory, file or "exit." An "exit" event means that -/// the entire contents of a directory have been enumerated. -struct WalkEventIter { - depth: usize, - it: walkdir::Iter, - next: Option>, -} - -#[derive(Debug)] -enum WalkEvent { - Dir(walkdir::DirEntry), - File(walkdir::DirEntry), - Exit, -} - -impl From for WalkEventIter { - fn from(it: walkdir::WalkDir) -> WalkEventIter { - WalkEventIter { depth: 0, it: it.into_iter(), next: None } - } -} - -impl Iterator for WalkEventIter { - type Item = io::Result; - - fn next(&mut self) -> Option> { - let dent = self.next.take().or_else(|| self.it.next()); - let depth = match dent { - None => 0, - Some(Ok(ref dent)) => dent.depth(), - Some(Err(ref err)) => err.depth(), + fn run_writer(&self) -> thread::JoinHandle<()> { + let wtr = Writer { + args: self.args.clone(), + chan_results: self.chan_results.clone(), + bufs: self.bufs.clone(), }; - if depth < self.depth { - self.depth -= 1; - self.next = dent; - return Some(Ok(WalkEvent::Exit)); + thread::spawn(move || wtr.run()) + } + + fn run_workers(&mut self) -> Result<()> { + let mut num = self.args.flag_threads; + if num == 0 { + num = num_cpus::get(); } - self.depth = depth; - match dent { - None => None, - Some(Err(err)) => Some(Err(From::from(err))), - Some(Ok(dent)) => { - if dent.file_type().is_dir() { - self.depth += 1; - Some(Ok(WalkEvent::Dir(dent))) - } else { - Some(Ok(WalkEvent::File(dent))) - } - } + if num < 4 { + num = 1; + } else { + num -= 2; + } + println!("running {} workers", num); + for _ in 0..num { + try!(self.run_worker()); + } + Ok(()) + } + + fn run_worker(&mut self) -> Result<()> { + let grepb = + GrepBuilder::new(&self.args.arg_pattern) + .case_insensitive(self.args.flag_ignore_case); + let worker = Worker { + args: self.args.clone(), + chan_work: self.chan_work.clone(), + chan_results: self.chan_results.clone(), + grep: try!(grepb.build()), + }; + self.workers.push(thread::spawn(move || worker.run())); + Ok(()) + } + + fn finish_workers(&mut self) { + // We can stop all of the works by sending a quit message. + // Each worker is guaranteed to receive the quit message exactly + // once, so we only need to send `self.workers.len()` of them + for _ in 0..self.workers.len() { + self.chan_work.push(Message::Quit); + } + // Now wait for each to finish. + while let Some(thread) = self.workers.pop() { + thread.join().unwrap(); } } } -fn is_hidden(ent: &walkdir::DirEntry) -> bool { - ent.depth() > 0 && - ent.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false) +struct Writer { + args: Arc, + chan_results: ChanResults, + bufs: Arc, +} + +impl Writer { + fn run(self) { + let mut stdout = io::BufWriter::new(io::stdout()); + while let Message::Some(res) = self.chan_results.pop() { + let _ = stdout.write_all(&res); + self.bufs.push(res); + } + } +} + +struct Work { + path: PathBuf, + out: Vec, +} + +struct Worker { + args: Arc, + chan_work: ChanWork, + chan_results: ChanResults, + grep: Grep, +} + +impl Worker { + fn run(self) { + while let Message::Some(mut work) = self.chan_work.pop() { + work.out.clear(); + let printer = self.args.printer(work.out); + let searcher = Searcher::new(&self.grep, work.path).unwrap(); + let buf = searcher.search(printer); + self.chan_results.push(Message::Some(buf)); + } + } +} + +/// A pool of buffers used by each worker thread to write matches. +struct Bufs { + bufs: TreiberStack>, +} + +impl Bufs { + pub fn new() -> Bufs { + Bufs { bufs: TreiberStack::new() } + } + + pub fn pop(&self) -> Vec { + match self.bufs.pop() { + None => vec![], + Some(buf) => buf, + } + } + + pub fn push(&self, buf: Vec) { + self.bufs.push(buf); + } } diff --git a/src/printer.rs b/src/printer.rs new file mode 100644 index 00000000..229fe151 --- /dev/null +++ b/src/printer.rs @@ -0,0 +1,50 @@ +use std::io; +use std::path::Path; + +use grep::Match; + +macro_rules! wln { + ($($tt:tt)*) => { + let _ = writeln!($($tt)*); + } +} + +pub struct Printer { + wtr: W, +} + +impl Printer { + pub fn new(wtr: W) -> Printer { + Printer { + wtr: wtr, + } + } + + pub fn into_inner(self) -> W { + self.wtr + } + + pub fn path>(&mut self, path: P) { + wln!(&mut self.wtr, "{}", path.as_ref().display()); + } + + pub fn count(&mut self, count: u64) { + wln!(&mut self.wtr, "{}", count); + } + + pub fn matched>( + &mut self, + path: P, + buf: &[u8], + m: &Match, + ) { + let _ = self.wtr.write(path.as_ref().to_string_lossy().as_bytes()); + let _ = self.wtr.write(b":"); + let _ = self.wtr.write(&buf[m.start()..m.end()]); + let _ = self.wtr.write(b"\n"); + } + + pub fn binary_matched>(&mut self, path: P) { + wln!(&mut self.wtr, "binary file {} matches", path.as_ref().display()); + } +} diff --git a/src/search.rs b/src/search.rs new file mode 100644 index 00000000..f0e297ab --- /dev/null +++ b/src/search.rs @@ -0,0 +1,144 @@ +/*! +The search module is responsible for searching a single file and printing +matches. +*/ + +use std::cmp; +use std::error::Error as StdError; +use std::fmt; +use std::fs::File; +use std::io; +use std::path::{Path, PathBuf}; + +use grep::Grep; +use memchr::memchr; +use memmap::{Mmap, Protection}; + +use printer::Printer; + +/// Error describes errors that can occur while searching. +#[derive(Debug)] +pub enum Error { + /// Normal IO or Mmap errors suck. Include the path the originated them. + Io { + err: io::Error, + path: PathBuf, + } +} + +impl Error { + fn from_io>(err: io::Error, path: P) -> Error { + Error::Io { err: err, path: path.as_ref().to_path_buf() } + } +} + +impl StdError for Error { + fn description(&self) -> &str { + match *self { + Error::Io { ref err, .. } => err.description(), + } + } + + fn cause(&self) -> Option<&StdError> { + match *self { + Error::Io { ref err, .. } => Some(err), + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match *self { + Error::Io { ref err, ref path } => { + write!(f, "{}: {}", path.display(), err) + } + } + } +} + +/// Searcher searches a memory mapped buffer. +/// +/// The `'g` lifetime refers to the lifetime of the underlying matcher. +pub struct Searcher<'g> { + grep: &'g Grep, + path: PathBuf, + mmap: Option, +} + +impl<'g> Searcher<'g> { + /// Create a new memory map based searcher using the given matcher for the + /// file path given. + pub fn new>( + grep: &'g Grep, + path: P, + ) -> Result, Error> { + let file = try!(File::open(&path).map_err(|err| { + Error::from_io(err, &path) + })); + let md = try!(file.metadata().map_err(|err| { + Error::from_io(err, &path) + })); + let mmap = + if md.len() == 0 { + None + } else { + Some(try!(Mmap::open(&file, Protection::Read).map_err(|err| { + Error::from_io(err, &path) + }))) + }; + Ok(Searcher { + grep: grep, + path: path.as_ref().to_path_buf(), + mmap: mmap, + }) + } + + /// Execute the search, writing the results to the printer given and + /// returning the underlying buffer. + pub fn search(&self, printer: Printer) -> W { + Search { + grep: &self.grep, + path: &*self.path, + buf: self.buf(), + printer: printer, + }.run() + } + + /// Execute the search, returning a count of the number of hits. + pub fn count(&self) -> u64 { + self.grep.iter(self.buf()).count() as u64 + } + + fn buf(&self) -> &[u8] { + self.mmap.as_ref().map(|m| unsafe { m.as_slice() }).unwrap_or(&[]) + } +} + +struct Search<'a, W> { + grep: &'a Grep, + path: &'a Path, + buf: &'a [u8], + printer: Printer, +} + +impl<'a, W: io::Write> Search<'a, W> { + fn run(mut self) -> W { + let is_binary = self.is_binary(); + let mut it = self.grep.iter(self.buf).peekable(); + if is_binary && it.peek().is_some() { + self.printer.binary_matched(self.path); + return self.printer.into_inner(); + } + for m in it { + self.printer.matched(self.path, self.buf, &m); + } + self.printer.into_inner() + } + + fn is_binary(&self) -> bool { + if self.buf.len() >= 4 && &self.buf[0..4] == b"%PDF" { + return true; + } + memchr(b'\x00', &self.buf[0..cmp::min(1024, self.buf.len())]).is_some() + } +} diff --git a/src/walk.rs b/src/walk.rs new file mode 100644 index 00000000..e60e2605 --- /dev/null +++ b/src/walk.rs @@ -0,0 +1,142 @@ +/*! +The walk module implements a recursive directory iterator (using the `walkdir`) +crate that can efficiently skip and ignore files and directories specified in +a user's ignore patterns. +*/ + +use walkdir::{self, DirEntry, WalkDir, WalkDirIterator}; + +use ignore::Ignore; + +/// Iter is a recursive directory iterator over file paths in a directory. +/// Only file paths should be searched are yielded. +pub struct Iter { + ig: Ignore, + it: WalkEventIter, +} + +impl Iter { + /// Create a new recursive directory iterator using the ignore patterns + /// and walkdir iterator given. + pub fn new(ig: Ignore, wd: WalkDir) -> Iter { + Iter { + ig: ig, + it: WalkEventIter::from(wd), + } + } + + /// Returns true if this entry should be skipped. + fn skip_entry(&self, ent: &DirEntry) -> bool { + if ent.depth() == 0 { + // Never skip the root directory. + return false; + } + if self.ig.ignored(ent.path(), ent.file_type().is_dir()) { + return true; + } + false + } +} + +impl Iterator for Iter { + type Item = DirEntry; + + fn next(&mut self) -> Option { + while let Some(ev) = self.it.next() { + match ev { + Err(err) => { + eprintln!("{}", err); + } + Ok(WalkEvent::Exit) => { + self.ig.pop(); + } + Ok(WalkEvent::Dir(ent)) => { + if self.skip_entry(&ent) { + self.it.it.skip_current_dir(); + // Still need to push this on the stack because we'll + // get a WalkEvent::Exit event for this dir. We don't + // care if it errors though. + let _ = self.ig.push(ent.path()); + continue; + } + if let Err(err) = self.ig.push(ent.path()) { + eprintln!("{}", err); + self.it.it.skip_current_dir(); + continue; + } + } + Ok(WalkEvent::File(ent)) => { + if self.skip_entry(&ent) { + continue; + } + // If this isn't actually a file (e.g., a symlink), then + // skip it. + if !ent.file_type().is_file() { + continue; + } + return Some(ent); + } + } + } + None + } +} + +/// WalkEventIter transforms a WalkDir iterator into an iterator that more +/// accurately describes the directory tree. Namely, it emits events that are +/// one of three types: directory, file or "exit." An "exit" event means that +/// the entire contents of a directory have been enumerated. +struct WalkEventIter { + depth: usize, + it: walkdir::Iter, + next: Option>, +} + +#[derive(Debug)] +enum WalkEvent { + Dir(DirEntry), + File(DirEntry), + Exit, +} + +impl From for WalkEventIter { + fn from(it: WalkDir) -> WalkEventIter { + WalkEventIter { depth: 0, it: it.into_iter(), next: None } + } +} + +impl Iterator for WalkEventIter { + type Item = walkdir::Result; + + fn next(&mut self) -> Option> { + let dent = self.next.take().or_else(|| self.it.next()); + let depth = match dent { + None => 0, + Some(Ok(ref dent)) => dent.depth(), + Some(Err(ref err)) => err.depth(), + }; + if depth < self.depth { + self.depth -= 1; + self.next = dent; + return Some(Ok(WalkEvent::Exit)); + } + self.depth = depth; + match dent { + None => None, + Some(Err(err)) => Some(Err(err)), + Some(Ok(dent)) => { + if dent.file_type().is_dir() { + self.depth += 1; + Some(Ok(WalkEvent::Dir(dent))) + } else { + Some(Ok(WalkEvent::File(dent))) + } + } + } + } +} + +fn is_hidden(ent: &DirEntry) -> bool { + ent.depth() > 0 && + ent.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false) +}