mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-04-19 09:02:15 +02:00
Some minor performance tweaks.
This includes moving basename-only globs into separate regexes. The hope is that if the regex processes less input, it will be faster.
This commit is contained in:
parent
1c5884b2f9
commit
0d14c74e63
12
Cargo.lock
generated
12
Cargo.lock
generated
@ -14,11 +14,11 @@ dependencies = [
|
|||||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
"memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"num_cpus 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
|
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
"term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"walkdir 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
"walkdir 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -134,7 +134,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "num_cpus"
|
name = "num_cpus"
|
||||||
version = "1.0.0"
|
version = "1.1.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
@ -214,7 +214,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "walkdir"
|
name = "walkdir"
|
||||||
version = "0.1.6"
|
version = "0.1.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
@ -245,7 +245,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
|
"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
|
||||||
"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
|
"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
|
||||||
"checksum memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f20f72ed93291a72e22e8b16bb18762183bb4943f0f483da5b8be1a9e8192752"
|
"checksum memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f20f72ed93291a72e22e8b16bb18762183bb4943f0f483da5b8be1a9e8192752"
|
||||||
"checksum num_cpus 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a859041cbf7a70ea1ece4b87d1a2c6ef364dcb68749c88db1f97304b9ec09d5f"
|
"checksum num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8890e6084723d57d0df8d2720b0d60c6ee67d6c93e7169630e4371e88765dcad"
|
||||||
"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"
|
"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"
|
||||||
"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665"
|
"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665"
|
||||||
"checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd"
|
"checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd"
|
||||||
@ -256,6 +256,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
|
"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
|
||||||
"checksum thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "55dd963dbaeadc08aa7266bf7f91c3154a7805e32bb94b820b769d2ef3b4744d"
|
"checksum thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "55dd963dbaeadc08aa7266bf7f91c3154a7805e32bb94b820b769d2ef3b4744d"
|
||||||
"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
|
"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
|
||||||
"checksum walkdir 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "d42144c31c9909882ce76e696b306b88a5b091721251137d5d522d1ef3da7cf9"
|
"checksum walkdir 0.1.7 (registry+https://github.com/rust-lang/crates.io-index)" = "5e415f89803a053390d21ecb49244deb5b30fb34aeec4a38badb747c83a4c668"
|
||||||
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
|
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
|
||||||
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
|
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
|
||||||
|
37
src/glob.rs
37
src/glob.rs
@ -124,6 +124,8 @@ pub struct Set {
|
|||||||
base_prefixes_map: Vec<usize>,
|
base_prefixes_map: Vec<usize>,
|
||||||
base_suffixes: Vec<Vec<u8>>,
|
base_suffixes: Vec<Vec<u8>>,
|
||||||
base_suffixes_map: Vec<usize>,
|
base_suffixes_map: Vec<usize>,
|
||||||
|
base_regexes: RegexSet,
|
||||||
|
base_regexes_map: Vec<usize>,
|
||||||
regexes: RegexSet,
|
regexes: RegexSet,
|
||||||
regexes_map: Vec<usize>,
|
regexes_map: Vec<usize>,
|
||||||
}
|
}
|
||||||
@ -195,7 +197,14 @@ impl Set {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
into.extend(self.regexes.matches(path_bytes));
|
if let Some(ref basename) = basename {
|
||||||
|
for i in self.base_regexes.matches(&**basename) {
|
||||||
|
into.push(self.base_regexes_map[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for i in self.regexes.matches(path_bytes) {
|
||||||
|
into.push(self.regexes_map[i]);
|
||||||
|
}
|
||||||
into.sort();
|
into.sort();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -207,6 +216,7 @@ impl Set {
|
|||||||
let (mut base_prefixes, mut base_prefixes_map) = (vec![], vec![]);
|
let (mut base_prefixes, mut base_prefixes_map) = (vec![], vec![]);
|
||||||
let (mut base_suffixes, mut base_suffixes_map) = (vec![], vec![]);
|
let (mut base_suffixes, mut base_suffixes_map) = (vec![], vec![]);
|
||||||
let (mut regexes, mut regexes_map) = (vec![], vec![]);
|
let (mut regexes, mut regexes_map) = (vec![], vec![]);
|
||||||
|
let (mut base_regexes, mut base_regexes_map) = (vec![], vec![]);
|
||||||
for (i, &(ref p, ref o)) in pats.iter().enumerate() {
|
for (i, &(ref p, ref o)) in pats.iter().enumerate() {
|
||||||
if let Some(ext) = p.ext() {
|
if let Some(ext) = p.ext() {
|
||||||
exts.entry(ext).or_insert(vec![]).push(i);
|
exts.entry(ext).or_insert(vec![]).push(i);
|
||||||
@ -221,6 +231,10 @@ impl Set {
|
|||||||
} else if let Some(literal) = p.base_literal_suffix() {
|
} else if let Some(literal) = p.base_literal_suffix() {
|
||||||
base_suffixes.push(literal.into_bytes());
|
base_suffixes.push(literal.into_bytes());
|
||||||
base_suffixes_map.push(i);
|
base_suffixes_map.push(i);
|
||||||
|
} else if p.is_only_basename() {
|
||||||
|
let part = format!("(?:{})", p.to_regex_with(o));
|
||||||
|
base_regexes.push(part);
|
||||||
|
base_regexes_map.push(i);
|
||||||
} else {
|
} else {
|
||||||
let part = format!("(?:{})", p.to_regex_with(o));
|
let part = format!("(?:{})", p.to_regex_with(o));
|
||||||
regexes.push(part);
|
regexes.push(part);
|
||||||
@ -236,6 +250,8 @@ impl Set {
|
|||||||
base_prefixes_map: base_prefixes_map,
|
base_prefixes_map: base_prefixes_map,
|
||||||
base_suffixes: base_suffixes,
|
base_suffixes: base_suffixes,
|
||||||
base_suffixes_map: base_suffixes_map,
|
base_suffixes_map: base_suffixes_map,
|
||||||
|
base_regexes: try!(RegexSet::new(base_regexes)),
|
||||||
|
base_regexes_map: base_regexes_map,
|
||||||
regexes: try!(RegexSet::new(regexes)),
|
regexes: try!(RegexSet::new(regexes)),
|
||||||
regexes_map: regexes_map,
|
regexes_map: regexes_map,
|
||||||
})
|
})
|
||||||
@ -402,6 +418,25 @@ impl Pattern {
|
|||||||
Some(lit)
|
Some(lit)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if this pattern only inspects the basename
|
||||||
|
/// of a path.
|
||||||
|
pub fn is_only_basename(&self) -> bool {
|
||||||
|
match self.tokens.get(0) {
|
||||||
|
Some(&Token::RecursivePrefix) => {}
|
||||||
|
_ => return false,
|
||||||
|
}
|
||||||
|
for t in &self.tokens[1..] {
|
||||||
|
match *t {
|
||||||
|
Token::Literal(c) if c == '/' || c == '\\' => return false,
|
||||||
|
Token::RecursivePrefix
|
||||||
|
| Token::RecursiveSuffix
|
||||||
|
| Token::RecursiveZeroOrMore => return false,
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns the pattern as a literal if and only if the pattern must match
|
/// Returns the pattern as a literal if and only if the pattern must match
|
||||||
/// an entire path exactly.
|
/// an entire path exactly.
|
||||||
///
|
///
|
||||||
|
@ -19,6 +19,7 @@ use std::io;
|
|||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern};
|
use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern};
|
||||||
|
use pathutil::is_hidden;
|
||||||
use types::Types;
|
use types::Types;
|
||||||
|
|
||||||
const IGNORE_NAMES: &'static [&'static str] = &[
|
const IGNORE_NAMES: &'static [&'static str] = &[
|
||||||
@ -377,14 +378,6 @@ impl Overrides {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
|
||||||
if let Some(name) = path.as_ref().file_name() {
|
|
||||||
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
@ -11,6 +11,8 @@ improvement on just listing the files to search (!).
|
|||||||
use std::ffi::OsStr;
|
use std::ffi::OsStr;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
|
use memchr::memrchr;
|
||||||
|
|
||||||
/// Strip `prefix` from the `path` and return the remainder.
|
/// Strip `prefix` from the `path` and return the remainder.
|
||||||
///
|
///
|
||||||
/// If `path` doesn't have a prefix `prefix`, then return `None`.
|
/// If `path` doesn't have a prefix `prefix`, then return `None`.
|
||||||
@ -58,13 +60,7 @@ pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
|||||||
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
|
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
let mut last_slash = 0;
|
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
|
||||||
for (i, &b) in path.iter().enumerate().rev() {
|
|
||||||
if b == b'/' {
|
|
||||||
last_slash = i + 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Some(OsStr::from_bytes(&path[last_slash..]))
|
Some(OsStr::from_bytes(&path[last_slash..]))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -78,3 +74,25 @@ pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
|||||||
) -> Option<&'a OsStr> {
|
) -> Option<&'a OsStr> {
|
||||||
path.as_ref().file_name()
|
path.as_ref().file_name()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if this file path is considered to be hidden.
|
||||||
|
#[cfg(unix)]
|
||||||
|
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||||
|
use std::os::unix::ffi::OsStrExt;
|
||||||
|
|
||||||
|
if let Some(name) = file_name(path.as_ref()) {
|
||||||
|
name.as_bytes().get(0) == Some(&b'.')
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if this file path is considered to be hidden.
|
||||||
|
#[cfg(not(unix))]
|
||||||
|
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||||
|
if let Some(name) = file_name(path) {
|
||||||
|
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -26,6 +26,7 @@ impl Iter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if this entry should be skipped.
|
/// Returns true if this entry should be skipped.
|
||||||
|
#[inline(always)]
|
||||||
fn skip_entry(&self, ent: &DirEntry) -> bool {
|
fn skip_entry(&self, ent: &DirEntry) -> bool {
|
||||||
if ent.depth() == 0 {
|
if ent.depth() == 0 {
|
||||||
// Never skip the root directory.
|
// Never skip the root directory.
|
||||||
@ -41,6 +42,7 @@ impl Iter {
|
|||||||
impl Iterator for Iter {
|
impl Iterator for Iter {
|
||||||
type Item = DirEntry;
|
type Item = DirEntry;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
fn next(&mut self) -> Option<DirEntry> {
|
fn next(&mut self) -> Option<DirEntry> {
|
||||||
while let Some(ev) = self.it.next() {
|
while let Some(ev) = self.it.next() {
|
||||||
match ev {
|
match ev {
|
||||||
@ -108,6 +110,7 @@ impl From<WalkDir> for WalkEventIter {
|
|||||||
impl Iterator for WalkEventIter {
|
impl Iterator for WalkEventIter {
|
||||||
type Item = walkdir::Result<WalkEvent>;
|
type Item = walkdir::Result<WalkEvent>;
|
||||||
|
|
||||||
|
#[inline(always)]
|
||||||
fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
|
fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
|
||||||
let dent = self.next.take().or_else(|| self.it.next());
|
let dent = self.next.take().or_else(|| self.it.next());
|
||||||
let depth = match dent {
|
let depth = match dent {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user