1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-06-09 14:07:45 +02:00

Make file type filtering a lot faster.

We do this by avoiding using a RegexSet (*sigh*). In particular, file
type matching has much simpler semantics than gitignore files, so we don't
actually need to care which file type matched. Therefore, we can get away
with a single regex with a giant alternation.
This commit is contained in:
Andrew Gallant 2016-09-11 13:26:53 -04:00
parent 37544c092f
commit 2b943eda47
3 changed files with 140 additions and 56 deletions

View File

@ -367,7 +367,7 @@ impl RawArgs {
types.select(ty); types.select(ty);
} }
for ty in &self.flag_type_not { for ty in &self.flag_type_not {
types.select_not(ty); types.negate(ty);
} }
Ok(()) Ok(())
} }

View File

@ -8,7 +8,10 @@ use std::error::Error as StdError;
use std::fmt; use std::fmt;
use std::path::Path; use std::path::Path;
use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern}; use regex;
use gitignore::{Match, Pattern};
use glob::{self, MatchOptions};
const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[ const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
("asm", &["*.asm", "*.s", "*.S"]), ("asm", &["*.asm", "*.s", "*.S"]),
@ -55,6 +58,7 @@ const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]), ("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]),
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]), ("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
("py", &["*.py"]), ("py", &["*.py"]),
("readme", &["README*", "*README"]),
("rr", &["*.R"]), ("rr", &["*.R"]),
("rst", &["*.rst"]), ("rst", &["*.rst"]),
("ruby", &["*.rb"]), ("ruby", &["*.rb"]),
@ -81,7 +85,9 @@ pub enum Error {
/// A user specified file type definition could not be parsed. /// A user specified file type definition could not be parsed.
InvalidDefinition, InvalidDefinition,
/// There was an error building the matcher (probably a bad glob). /// There was an error building the matcher (probably a bad glob).
Gitignore(gitignore::Error), Glob(glob::Error),
/// There was an error compiling a glob as a regex.
Regex(regex::Error),
} }
impl StdError for Error { impl StdError for Error {
@ -89,7 +95,8 @@ impl StdError for Error {
match *self { match *self {
Error::UnrecognizedFileType(_) => "unrecognized file type", Error::UnrecognizedFileType(_) => "unrecognized file type",
Error::InvalidDefinition => "invalid definition", Error::InvalidDefinition => "invalid definition",
Error::Gitignore(ref err) => err.description(), Error::Glob(ref err) => err.description(),
Error::Regex(ref err) => err.description(),
} }
} }
} }
@ -104,14 +111,21 @@ impl fmt::Display for Error {
write!(f, "invalid definition (format is type:glob, e.g., \ write!(f, "invalid definition (format is type:glob, e.g., \
html:*.html)") html:*.html)")
} }
Error::Gitignore(ref err) => err.fmt(f), Error::Glob(ref err) => err.fmt(f),
Error::Regex(ref err) => err.fmt(f),
} }
} }
} }
impl From<gitignore::Error> for Error { impl From<glob::Error> for Error {
fn from(err: gitignore::Error) -> Error { fn from(err: glob::Error) -> Error {
Error::Gitignore(err) Error::Glob(err)
}
}
impl From<regex::Error> for Error {
fn from(err: regex::Error) -> Error {
Error::Regex(err)
} }
} }
@ -137,7 +151,8 @@ impl FileTypeDef {
/// Types is a file type matcher. /// Types is a file type matcher.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Types { pub struct Types {
gi: Option<Gitignore>, selected: Option<glob::Set>,
negated: Option<glob::Set>,
has_selected: bool, has_selected: bool,
unmatched_pat: Pattern, unmatched_pat: Pattern,
} }
@ -149,14 +164,19 @@ impl Types {
/// ///
/// If has_selected is true, then at least one file type was selected. /// If has_selected is true, then at least one file type was selected.
/// Therefore, any non-matches should be ignored. /// Therefore, any non-matches should be ignored.
fn new(gi: Option<Gitignore>, has_selected: bool) -> Types { fn new(
selected: Option<glob::Set>,
negated: Option<glob::Set>,
has_selected: bool,
) -> Types {
Types { Types {
gi: gi, selected: selected,
negated: negated,
has_selected: has_selected, has_selected: has_selected,
unmatched_pat: Pattern { unmatched_pat: Pattern {
from: Path::new("<filetype>").to_path_buf(), from: Path::new("<filetype>").to_path_buf(),
original: "<none>".to_string(), original: "<N/A>".to_string(),
pat: "<none>".to_string(), pat: "<N/A>".to_string(),
whitelist: false, whitelist: false,
only_dir: false, only_dir: false,
}, },
@ -165,7 +185,7 @@ impl Types {
/// Creates a new file type matcher that never matches. /// Creates a new file type matcher that never matches.
pub fn empty() -> Types { pub fn empty() -> Types {
Types::new(None, false) Types::new(None, None, false)
} }
/// Returns a match for the given path against this file type matcher. /// Returns a match for the given path against this file type matcher.
@ -175,22 +195,35 @@ impl Types {
/// If at least one file type is selected and path doesn't match, then /// If at least one file type is selected and path doesn't match, then
/// the path is also considered ignored. /// the path is also considered ignored.
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match { pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
// If we don't have any matcher, then we can't do anything.
if self.negated.is_none() && self.selected.is_none() {
return Match::None;
}
// File types don't apply to directories. // File types don't apply to directories.
if is_dir { if is_dir {
return Match::None; return Match::None;
} }
let path = path.as_ref(); let path = path.as_ref();
self.gi.as_ref() let name = match path.file_name() {
.map(|gi| { Some(name) => name.to_string_lossy(),
let path = &*path.to_string_lossy(); None if self.has_selected => {
let mat = gi.matched_utf8(path, is_dir).invert(); return Match::Ignored(&self.unmatched_pat);
if self.has_selected && mat.is_none() { }
None => {
return Match::None;
}
};
if self.negated.as_ref().map(|s| s.is_match(&*name)).unwrap_or(false) {
return Match::Ignored(&self.unmatched_pat);
}
if self.selected.as_ref().map(|s| s.is_match(&*name)).unwrap_or(false) {
return Match::Whitelist(&self.unmatched_pat);
}
if self.has_selected {
Match::Ignored(&self.unmatched_pat) Match::Ignored(&self.unmatched_pat)
} else { } else {
mat Match::None
} }
})
.unwrap_or(Match::None)
} }
} }
@ -198,8 +231,8 @@ impl Types {
/// a set of file type selections. /// a set of file type selections.
pub struct TypesBuilder { pub struct TypesBuilder {
types: HashMap<String, Vec<String>>, types: HashMap<String, Vec<String>>,
select: Vec<String>, selected: Vec<String>,
select_not: Vec<String>, negated: Vec<String>,
} }
impl TypesBuilder { impl TypesBuilder {
@ -207,41 +240,57 @@ impl TypesBuilder {
pub fn new() -> TypesBuilder { pub fn new() -> TypesBuilder {
TypesBuilder { TypesBuilder {
types: HashMap::new(), types: HashMap::new(),
select: vec![], selected: vec![],
select_not: vec![], negated: vec![],
} }
} }
/// Build the current set of file type definitions *and* selections into /// Build the current set of file type definitions *and* selections into
/// a file type matcher. /// a file type matcher.
pub fn build(&self) -> Result<Types, Error> { pub fn build(&self) -> Result<Types, Error> {
if self.select.is_empty() && self.select_not.is_empty() { let opts = MatchOptions {
return Ok(Types::new(None, false)); require_literal_separator: true, ..MatchOptions::default()
} };
let mut bgi = GitignoreBuilder::new("/"); let selected_globs =
for name in &self.select { if self.selected.is_empty() {
None
} else {
let mut bset = glob::SetBuilder::new();
for name in &self.selected {
let globs = match self.types.get(name) { let globs = match self.types.get(name) {
Some(globs) => globs, Some(globs) => globs,
None => { None => {
return Err(Error::UnrecognizedFileType(name.to_string())); let msg = name.to_string();
return Err(Error::UnrecognizedFileType(msg));
} }
}; };
for glob in globs { for glob in globs {
try!(bgi.add("<filetype>", glob)); try!(bset.add_with(glob, &opts));
} }
} }
for name in &self.select_not { Some(try!(bset.build()))
};
let negated_globs =
if self.negated.is_empty() {
None
} else {
let mut bset = glob::SetBuilder::new();
for name in &self.negated {
let globs = match self.types.get(name) { let globs = match self.types.get(name) {
Some(globs) => globs, Some(globs) => globs,
None => { None => {
return Err(Error::UnrecognizedFileType(name.to_string())); let msg = name.to_string();
return Err(Error::UnrecognizedFileType(msg));
} }
}; };
for glob in globs { for glob in globs {
try!(bgi.add("<filetype>", &format!("!{}", glob))); try!(bset.add_with(glob, &opts));
} }
} }
Ok(Types::new(Some(try!(bgi.build())), !self.select.is_empty())) Some(try!(bset.build()))
};
Ok(Types::new(
selected_globs, negated_globs, !self.selected.is_empty()))
} }
/// Return the set of current file type definitions. /// Return the set of current file type definitions.
@ -260,14 +309,30 @@ impl TypesBuilder {
} }
/// Select the file type given by `name`. /// Select the file type given by `name`.
///
/// If `name` is `all`, then all file types are selected.
pub fn select(&mut self, name: &str) -> &mut TypesBuilder { pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
self.select.push(name.to_string()); if name == "all" {
for name in self.types.keys() {
self.selected.push(name.to_string());
}
} else {
self.selected.push(name.to_string());
}
self self
} }
/// Ignore the file type given by `name`. /// Ignore the file type given by `name`.
pub fn select_not(&mut self, name: &str) -> &mut TypesBuilder { ///
self.select_not.push(name.to_string()); /// If `name` is `all`, then all file types are negated.
pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
if name == "all" {
for name in self.types.keys() {
self.negated.push(name.to_string());
}
} else {
self.negated.push(name.to_string());
}
self self
} }
@ -333,7 +398,7 @@ mod tests {
btypes.select(sel); btypes.select(sel);
} }
for selnot in $selnot { for selnot in $selnot {
btypes.select_not(selnot); btypes.negate(selnot);
} }
let types = btypes.build().unwrap(); let types = btypes.build().unwrap();
let mat = types.matched($path, false); let mat = types.matched($path, false);

View File

@ -219,6 +219,13 @@ sherlock!(file_types, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
assert_eq!(lines, "file.rs:Sherlock\n"); assert_eq!(lines, "file.rs:Sherlock\n");
}); });
sherlock!(file_types_all, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file.py", "Sherlock");
cmd.arg("-t").arg("all");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file.py:Sherlock\n");
});
sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.remove("sherlock"); wd.remove("sherlock");
wd.create("file.py", "Sherlock"); wd.create("file.py", "Sherlock");
@ -228,6 +235,18 @@ sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
assert_eq!(lines, "file.py:Sherlock\n"); assert_eq!(lines, "file.py:Sherlock\n");
}); });
sherlock!(file_types_negate_all, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
wd.create("file.py", "Sherlock");
cmd.arg("-T").arg("all");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
");
});
sherlock!(file_type_clear, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { sherlock!(file_type_clear, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file.py", "Sherlock"); wd.create("file.py", "Sherlock");
wd.create("file.rs", "Sherlock"); wd.create("file.rs", "Sherlock");