1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-06-30 22:23:44 +02:00

File path filtering works and is pretty fast.

I'm pretty disappointed by the performance of regex sets. They are
apparently spending a lot of their time in construction of the DFA,
which probably means that the DFA is just too big.

It turns out that it's actually faster to build an *additional* normal
regex with the alternation of every glob and use it as a first-pass
filter over every file path. If there's a match, only then do we try the
more expensive RegexSet.
This commit is contained in:
Andrew Gallant
2016-08-27 01:01:06 -04:00
parent b55ecf34c7
commit 065c449980
5 changed files with 673 additions and 26 deletions

View File

@ -30,6 +30,8 @@ use docopt::Docopt;
use grep::Grep;
use walkdir::{WalkDir, WalkDirIterator};
use ignore::Ignore;
macro_rules! errored {
($($tt:tt)*) => {
return Err(From::from(format!($($tt)*)));
@ -43,7 +45,9 @@ macro_rules! eprintln {
}}
}
mod gitignore;
mod glob;
mod ignore;
pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;
@ -72,24 +76,40 @@ impl Args {
if self.arg_path.is_empty() {
return errored!("Searching stdin is not currently supported.");
}
let mut stdout = io::BufWriter::new(io::stdout());
let mut ig = Ignore::new();
for p in &self.arg_path {
let mut it = WalkDir::new(p).into_iter();
let mut it = WalkEventIter::from(WalkDir::new(p));
loop {
let ent = match it.next() {
let ev = match it.next() {
None => break,
Some(Ok(ev)) => ev,
Some(Err(err)) => {
eprintln!("{}", err);
continue;
}
Some(Ok(ent)) => ent,
};
if is_hidden(&ent) {
if ent.file_type().is_dir() {
it.skip_current_dir();
match ev {
WalkEvent::Exit => {
ig.pop();
}
WalkEvent::Dir(ent) => {
try!(ig.push(ent.path()));
if is_hidden(&ent) || ig.ignored(ent.path(), true) {
// if is_hidden(&ent) {
it.it.skip_current_dir();
continue;
}
}
WalkEvent::File(ent) => {
if is_hidden(&ent) || ig.ignored(ent.path(), false) {
// if is_hidden(&ent) {
continue;
}
let _ = writeln!(
&mut stdout, "{}", ent.path().display());
}
continue;
}
println!("{}", ent.path().display());
}
}
Ok(0)
@ -108,6 +128,60 @@ impl Args {
}
}
/// WalkEventIter transforms a WalkDir iterator into an iterator that more
/// accurately describes the directory tree. Namely, it emits events that are
/// one of three types: directory, file or "exit." An "exit" event means that
/// the entire contents of a directory have been enumerated.
struct WalkEventIter {
depth: usize,
it: walkdir::Iter,
next: Option<result::Result<walkdir::DirEntry, walkdir::Error>>,
}
#[derive(Debug)]
enum WalkEvent {
Dir(walkdir::DirEntry),
File(walkdir::DirEntry),
Exit,
}
impl From<walkdir::WalkDir> for WalkEventIter {
fn from(it: walkdir::WalkDir) -> WalkEventIter {
WalkEventIter { depth: 0, it: it.into_iter(), next: None }
}
}
impl Iterator for WalkEventIter {
type Item = io::Result<WalkEvent>;
fn next(&mut self) -> Option<io::Result<WalkEvent>> {
let dent = self.next.take().or_else(|| self.it.next());
let depth = match dent {
None => 0,
Some(Ok(ref dent)) => dent.depth(),
Some(Err(ref err)) => err.depth(),
};
if depth < self.depth {
self.depth -= 1;
self.next = dent;
return Some(Ok(WalkEvent::Exit));
}
self.depth = depth;
match dent {
None => None,
Some(Err(err)) => Some(Err(From::from(err))),
Some(Ok(dent)) => {
if dent.file_type().is_dir() {
self.depth += 1;
Some(Ok(WalkEvent::Dir(dent)))
} else {
Some(Ok(WalkEvent::File(dent)))
}
}
}
}
}
fn is_hidden(ent: &walkdir::DirEntry) -> bool {
ent.depth() > 0 &&
ent.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false)