mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-06-09 14:07:45 +02:00
Make file type filtering a lot faster.
We do this by avoiding using a RegexSet (*sigh*). In particular, file type matching has much simpler semantics than gitignore files, so we don't actually need to care which file type matched. Therefore, we can get away with a single regex with a giant alternation.
This commit is contained in:
parent
37544c092f
commit
2b943eda47
@ -367,7 +367,7 @@ impl RawArgs {
|
|||||||
types.select(ty);
|
types.select(ty);
|
||||||
}
|
}
|
||||||
for ty in &self.flag_type_not {
|
for ty in &self.flag_type_not {
|
||||||
types.select_not(ty);
|
types.negate(ty);
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
145
src/types.rs
145
src/types.rs
@ -8,7 +8,10 @@ use std::error::Error as StdError;
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern};
|
use regex;
|
||||||
|
|
||||||
|
use gitignore::{Match, Pattern};
|
||||||
|
use glob::{self, MatchOptions};
|
||||||
|
|
||||||
const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
|
const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
|
||||||
("asm", &["*.asm", "*.s", "*.S"]),
|
("asm", &["*.asm", "*.s", "*.S"]),
|
||||||
@ -55,6 +58,7 @@ const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
|
|||||||
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]),
|
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]),
|
||||||
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
|
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
|
||||||
("py", &["*.py"]),
|
("py", &["*.py"]),
|
||||||
|
("readme", &["README*", "*README"]),
|
||||||
("rr", &["*.R"]),
|
("rr", &["*.R"]),
|
||||||
("rst", &["*.rst"]),
|
("rst", &["*.rst"]),
|
||||||
("ruby", &["*.rb"]),
|
("ruby", &["*.rb"]),
|
||||||
@ -81,7 +85,9 @@ pub enum Error {
|
|||||||
/// A user specified file type definition could not be parsed.
|
/// A user specified file type definition could not be parsed.
|
||||||
InvalidDefinition,
|
InvalidDefinition,
|
||||||
/// There was an error building the matcher (probably a bad glob).
|
/// There was an error building the matcher (probably a bad glob).
|
||||||
Gitignore(gitignore::Error),
|
Glob(glob::Error),
|
||||||
|
/// There was an error compiling a glob as a regex.
|
||||||
|
Regex(regex::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl StdError for Error {
|
impl StdError for Error {
|
||||||
@ -89,7 +95,8 @@ impl StdError for Error {
|
|||||||
match *self {
|
match *self {
|
||||||
Error::UnrecognizedFileType(_) => "unrecognized file type",
|
Error::UnrecognizedFileType(_) => "unrecognized file type",
|
||||||
Error::InvalidDefinition => "invalid definition",
|
Error::InvalidDefinition => "invalid definition",
|
||||||
Error::Gitignore(ref err) => err.description(),
|
Error::Glob(ref err) => err.description(),
|
||||||
|
Error::Regex(ref err) => err.description(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -104,14 +111,21 @@ impl fmt::Display for Error {
|
|||||||
write!(f, "invalid definition (format is type:glob, e.g., \
|
write!(f, "invalid definition (format is type:glob, e.g., \
|
||||||
html:*.html)")
|
html:*.html)")
|
||||||
}
|
}
|
||||||
Error::Gitignore(ref err) => err.fmt(f),
|
Error::Glob(ref err) => err.fmt(f),
|
||||||
|
Error::Regex(ref err) => err.fmt(f),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<gitignore::Error> for Error {
|
impl From<glob::Error> for Error {
|
||||||
fn from(err: gitignore::Error) -> Error {
|
fn from(err: glob::Error) -> Error {
|
||||||
Error::Gitignore(err)
|
Error::Glob(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<regex::Error> for Error {
|
||||||
|
fn from(err: regex::Error) -> Error {
|
||||||
|
Error::Regex(err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -137,7 +151,8 @@ impl FileTypeDef {
|
|||||||
/// Types is a file type matcher.
|
/// Types is a file type matcher.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct Types {
|
pub struct Types {
|
||||||
gi: Option<Gitignore>,
|
selected: Option<glob::Set>,
|
||||||
|
negated: Option<glob::Set>,
|
||||||
has_selected: bool,
|
has_selected: bool,
|
||||||
unmatched_pat: Pattern,
|
unmatched_pat: Pattern,
|
||||||
}
|
}
|
||||||
@ -149,14 +164,19 @@ impl Types {
|
|||||||
///
|
///
|
||||||
/// If has_selected is true, then at least one file type was selected.
|
/// If has_selected is true, then at least one file type was selected.
|
||||||
/// Therefore, any non-matches should be ignored.
|
/// Therefore, any non-matches should be ignored.
|
||||||
fn new(gi: Option<Gitignore>, has_selected: bool) -> Types {
|
fn new(
|
||||||
|
selected: Option<glob::Set>,
|
||||||
|
negated: Option<glob::Set>,
|
||||||
|
has_selected: bool,
|
||||||
|
) -> Types {
|
||||||
Types {
|
Types {
|
||||||
gi: gi,
|
selected: selected,
|
||||||
|
negated: negated,
|
||||||
has_selected: has_selected,
|
has_selected: has_selected,
|
||||||
unmatched_pat: Pattern {
|
unmatched_pat: Pattern {
|
||||||
from: Path::new("<filetype>").to_path_buf(),
|
from: Path::new("<filetype>").to_path_buf(),
|
||||||
original: "<none>".to_string(),
|
original: "<N/A>".to_string(),
|
||||||
pat: "<none>".to_string(),
|
pat: "<N/A>".to_string(),
|
||||||
whitelist: false,
|
whitelist: false,
|
||||||
only_dir: false,
|
only_dir: false,
|
||||||
},
|
},
|
||||||
@ -165,7 +185,7 @@ impl Types {
|
|||||||
|
|
||||||
/// Creates a new file type matcher that never matches.
|
/// Creates a new file type matcher that never matches.
|
||||||
pub fn empty() -> Types {
|
pub fn empty() -> Types {
|
||||||
Types::new(None, false)
|
Types::new(None, None, false)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a match for the given path against this file type matcher.
|
/// Returns a match for the given path against this file type matcher.
|
||||||
@ -175,22 +195,35 @@ impl Types {
|
|||||||
/// If at least one file type is selected and path doesn't match, then
|
/// If at least one file type is selected and path doesn't match, then
|
||||||
/// the path is also considered ignored.
|
/// the path is also considered ignored.
|
||||||
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
|
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
|
||||||
|
// If we don't have any matcher, then we can't do anything.
|
||||||
|
if self.negated.is_none() && self.selected.is_none() {
|
||||||
|
return Match::None;
|
||||||
|
}
|
||||||
// File types don't apply to directories.
|
// File types don't apply to directories.
|
||||||
if is_dir {
|
if is_dir {
|
||||||
return Match::None;
|
return Match::None;
|
||||||
}
|
}
|
||||||
let path = path.as_ref();
|
let path = path.as_ref();
|
||||||
self.gi.as_ref()
|
let name = match path.file_name() {
|
||||||
.map(|gi| {
|
Some(name) => name.to_string_lossy(),
|
||||||
let path = &*path.to_string_lossy();
|
None if self.has_selected => {
|
||||||
let mat = gi.matched_utf8(path, is_dir).invert();
|
return Match::Ignored(&self.unmatched_pat);
|
||||||
if self.has_selected && mat.is_none() {
|
}
|
||||||
|
None => {
|
||||||
|
return Match::None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
if self.negated.as_ref().map(|s| s.is_match(&*name)).unwrap_or(false) {
|
||||||
|
return Match::Ignored(&self.unmatched_pat);
|
||||||
|
}
|
||||||
|
if self.selected.as_ref().map(|s| s.is_match(&*name)).unwrap_or(false) {
|
||||||
|
return Match::Whitelist(&self.unmatched_pat);
|
||||||
|
}
|
||||||
|
if self.has_selected {
|
||||||
Match::Ignored(&self.unmatched_pat)
|
Match::Ignored(&self.unmatched_pat)
|
||||||
} else {
|
} else {
|
||||||
mat
|
Match::None
|
||||||
}
|
}
|
||||||
})
|
|
||||||
.unwrap_or(Match::None)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -198,8 +231,8 @@ impl Types {
|
|||||||
/// a set of file type selections.
|
/// a set of file type selections.
|
||||||
pub struct TypesBuilder {
|
pub struct TypesBuilder {
|
||||||
types: HashMap<String, Vec<String>>,
|
types: HashMap<String, Vec<String>>,
|
||||||
select: Vec<String>,
|
selected: Vec<String>,
|
||||||
select_not: Vec<String>,
|
negated: Vec<String>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TypesBuilder {
|
impl TypesBuilder {
|
||||||
@ -207,41 +240,57 @@ impl TypesBuilder {
|
|||||||
pub fn new() -> TypesBuilder {
|
pub fn new() -> TypesBuilder {
|
||||||
TypesBuilder {
|
TypesBuilder {
|
||||||
types: HashMap::new(),
|
types: HashMap::new(),
|
||||||
select: vec![],
|
selected: vec![],
|
||||||
select_not: vec![],
|
negated: vec![],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Build the current set of file type definitions *and* selections into
|
/// Build the current set of file type definitions *and* selections into
|
||||||
/// a file type matcher.
|
/// a file type matcher.
|
||||||
pub fn build(&self) -> Result<Types, Error> {
|
pub fn build(&self) -> Result<Types, Error> {
|
||||||
if self.select.is_empty() && self.select_not.is_empty() {
|
let opts = MatchOptions {
|
||||||
return Ok(Types::new(None, false));
|
require_literal_separator: true, ..MatchOptions::default()
|
||||||
}
|
};
|
||||||
let mut bgi = GitignoreBuilder::new("/");
|
let selected_globs =
|
||||||
for name in &self.select {
|
if self.selected.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
let mut bset = glob::SetBuilder::new();
|
||||||
|
for name in &self.selected {
|
||||||
let globs = match self.types.get(name) {
|
let globs = match self.types.get(name) {
|
||||||
Some(globs) => globs,
|
Some(globs) => globs,
|
||||||
None => {
|
None => {
|
||||||
return Err(Error::UnrecognizedFileType(name.to_string()));
|
let msg = name.to_string();
|
||||||
|
return Err(Error::UnrecognizedFileType(msg));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
for glob in globs {
|
for glob in globs {
|
||||||
try!(bgi.add("<filetype>", glob));
|
try!(bset.add_with(glob, &opts));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for name in &self.select_not {
|
Some(try!(bset.build()))
|
||||||
|
};
|
||||||
|
let negated_globs =
|
||||||
|
if self.negated.is_empty() {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
let mut bset = glob::SetBuilder::new();
|
||||||
|
for name in &self.negated {
|
||||||
let globs = match self.types.get(name) {
|
let globs = match self.types.get(name) {
|
||||||
Some(globs) => globs,
|
Some(globs) => globs,
|
||||||
None => {
|
None => {
|
||||||
return Err(Error::UnrecognizedFileType(name.to_string()));
|
let msg = name.to_string();
|
||||||
|
return Err(Error::UnrecognizedFileType(msg));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
for glob in globs {
|
for glob in globs {
|
||||||
try!(bgi.add("<filetype>", &format!("!{}", glob)));
|
try!(bset.add_with(glob, &opts));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(Types::new(Some(try!(bgi.build())), !self.select.is_empty()))
|
Some(try!(bset.build()))
|
||||||
|
};
|
||||||
|
Ok(Types::new(
|
||||||
|
selected_globs, negated_globs, !self.selected.is_empty()))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the set of current file type definitions.
|
/// Return the set of current file type definitions.
|
||||||
@ -260,14 +309,30 @@ impl TypesBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Select the file type given by `name`.
|
/// Select the file type given by `name`.
|
||||||
|
///
|
||||||
|
/// If `name` is `all`, then all file types are selected.
|
||||||
pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
|
pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
|
||||||
self.select.push(name.to_string());
|
if name == "all" {
|
||||||
|
for name in self.types.keys() {
|
||||||
|
self.selected.push(name.to_string());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.selected.push(name.to_string());
|
||||||
|
}
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Ignore the file type given by `name`.
|
/// Ignore the file type given by `name`.
|
||||||
pub fn select_not(&mut self, name: &str) -> &mut TypesBuilder {
|
///
|
||||||
self.select_not.push(name.to_string());
|
/// If `name` is `all`, then all file types are negated.
|
||||||
|
pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
|
||||||
|
if name == "all" {
|
||||||
|
for name in self.types.keys() {
|
||||||
|
self.negated.push(name.to_string());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.negated.push(name.to_string());
|
||||||
|
}
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -333,7 +398,7 @@ mod tests {
|
|||||||
btypes.select(sel);
|
btypes.select(sel);
|
||||||
}
|
}
|
||||||
for selnot in $selnot {
|
for selnot in $selnot {
|
||||||
btypes.select_not(selnot);
|
btypes.negate(selnot);
|
||||||
}
|
}
|
||||||
let types = btypes.build().unwrap();
|
let types = btypes.build().unwrap();
|
||||||
let mat = types.matched($path, false);
|
let mat = types.matched($path, false);
|
||||||
|
@ -219,6 +219,13 @@ sherlock!(file_types, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
|||||||
assert_eq!(lines, "file.rs:Sherlock\n");
|
assert_eq!(lines, "file.rs:Sherlock\n");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
sherlock!(file_types_all, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||||
|
wd.create("file.py", "Sherlock");
|
||||||
|
cmd.arg("-t").arg("all");
|
||||||
|
let lines: String = wd.stdout(&mut cmd);
|
||||||
|
assert_eq!(lines, "file.py:Sherlock\n");
|
||||||
|
});
|
||||||
|
|
||||||
sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||||
wd.remove("sherlock");
|
wd.remove("sherlock");
|
||||||
wd.create("file.py", "Sherlock");
|
wd.create("file.py", "Sherlock");
|
||||||
@ -228,6 +235,18 @@ sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
|||||||
assert_eq!(lines, "file.py:Sherlock\n");
|
assert_eq!(lines, "file.py:Sherlock\n");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
sherlock!(file_types_negate_all, "Sherlock", ".",
|
||||||
|
|wd: WorkDir, mut cmd: Command| {
|
||||||
|
wd.create("file.py", "Sherlock");
|
||||||
|
cmd.arg("-T").arg("all");
|
||||||
|
|
||||||
|
let lines: String = wd.stdout(&mut cmd);
|
||||||
|
assert_eq!(lines, "\
|
||||||
|
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||||
|
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||||
|
");
|
||||||
|
});
|
||||||
|
|
||||||
sherlock!(file_type_clear, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
sherlock!(file_type_clear, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||||
wd.create("file.py", "Sherlock");
|
wd.create("file.py", "Sherlock");
|
||||||
wd.create("file.rs", "Sherlock");
|
wd.create("file.rs", "Sherlock");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user