mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-03-28 12:42:13 +02:00
1023 lines
31 KiB
Rust
1023 lines
31 KiB
Rust
/*!
|
|
The globset crate provides cross platform single glob and glob set matching.
|
|
|
|
Glob set matching is the process of matching one or more glob patterns against
|
|
a single candidate path simultaneously, and returning all of the globs that
|
|
matched. For example, given this set of globs:
|
|
|
|
* `*.rs`
|
|
* `src/lib.rs`
|
|
* `src/**/foo.rs`
|
|
|
|
and a path `src/bar/baz/foo.rs`, then the set would report the first and third
|
|
globs as matching.
|
|
|
|
# Example: one glob
|
|
|
|
This example shows how to match a single glob against a single file path.
|
|
|
|
```
|
|
use globset::Glob;
|
|
|
|
let glob = Glob::new("*.rs")?.compile_matcher();
|
|
|
|
assert!(glob.is_match("foo.rs"));
|
|
assert!(glob.is_match("foo/bar.rs"));
|
|
assert!(!glob.is_match("Cargo.toml"));
|
|
# Ok::<(), Box<dyn std::error::Error>>(())
|
|
```
|
|
|
|
# Example: configuring a glob matcher
|
|
|
|
This example shows how to use a `GlobBuilder` to configure aspects of match
|
|
semantics. In this example, we prevent wildcards from matching path separators.
|
|
|
|
```
|
|
use globset::GlobBuilder;
|
|
|
|
let glob = GlobBuilder::new("*.rs")
|
|
.literal_separator(true).build()?.compile_matcher();
|
|
|
|
assert!(glob.is_match("foo.rs"));
|
|
assert!(!glob.is_match("foo/bar.rs")); // no longer matches
|
|
assert!(!glob.is_match("Cargo.toml"));
|
|
# Ok::<(), Box<dyn std::error::Error>>(())
|
|
```
|
|
|
|
# Example: match multiple globs at once
|
|
|
|
This example shows how to match multiple glob patterns at once.
|
|
|
|
```
|
|
use globset::{Glob, GlobSetBuilder};
|
|
|
|
let mut builder = GlobSetBuilder::new();
|
|
// A GlobBuilder can be used to configure each glob's match semantics
|
|
// independently.
|
|
builder.add(Glob::new("*.rs")?);
|
|
builder.add(Glob::new("src/lib.rs")?);
|
|
builder.add(Glob::new("src/**/foo.rs")?);
|
|
let set = builder.build()?;
|
|
|
|
assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
|
|
# Ok::<(), Box<dyn std::error::Error>>(())
|
|
```
|
|
|
|
# Syntax
|
|
|
|
Standard Unix-style glob syntax is supported:
|
|
|
|
* `?` matches any single character. (If the `literal_separator` option is
|
|
enabled, then `?` can never match a path separator.)
|
|
* `*` matches zero or more characters. (If the `literal_separator` option is
|
|
enabled, then `*` can never match a path separator.)
|
|
* `**` recursively matches directories but are only legal in three situations.
|
|
First, if the glob starts with <code>\*\*/</code>, then it matches
|
|
all directories. For example, <code>\*\*/foo</code> matches `foo`
|
|
and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with
|
|
<code>/\*\*</code>, then it matches all sub-entries. For example,
|
|
<code>foo/\*\*</code> matches `foo/a` and `foo/a/b`, but not `foo`.
|
|
Thirdly, if the glob contains <code>/\*\*/</code> anywhere within
|
|
the pattern, then it matches zero or more directories. Using `**` anywhere
|
|
else is illegal (N.B. the glob `**` is allowed and means "match everything").
|
|
* `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns.
|
|
(N.B. Nesting `{...}` is not currently allowed.)
|
|
* `[ab]` matches `a` or `b` where `a` and `b` are characters. Use
|
|
`[!ab]` to match any character except for `a` and `b`.
|
|
* Metacharacters such as `*` and `?` can be escaped with character class
|
|
notation. e.g., `[*]` matches `*`.
|
|
* When backslash escapes are enabled, a backslash (`\`) will escape all meta
|
|
characters in a glob. If it precedes a non-meta character, then the slash is
|
|
ignored. A `\\` will match a literal `\\`. Note that this mode is only
|
|
enabled on Unix platforms by default, but can be enabled on any platform
|
|
via the `backslash_escape` setting on `Glob`.
|
|
|
|
A `GlobBuilder` can be used to prevent wildcards from matching path separators,
|
|
or to enable case insensitive matching.
|
|
*/
|
|
|
|
#![deny(missing_docs)]
|
|
|
|
use std::{
|
|
borrow::Cow,
|
|
panic::{RefUnwindSafe, UnwindSafe},
|
|
path::Path,
|
|
sync::Arc,
|
|
};
|
|
|
|
use {
|
|
aho_corasick::AhoCorasick,
|
|
bstr::{ByteSlice, ByteVec, B},
|
|
regex_automata::{
|
|
meta::Regex,
|
|
util::pool::{Pool, PoolGuard},
|
|
PatternSet,
|
|
},
|
|
};
|
|
|
|
use crate::{
|
|
glob::MatchStrategy,
|
|
pathutil::{file_name, file_name_ext, normalize_path},
|
|
};
|
|
|
|
pub use crate::glob::{Glob, GlobBuilder, GlobMatcher};
|
|
|
|
mod fnv;
|
|
mod glob;
|
|
mod pathutil;
|
|
|
|
#[cfg(feature = "serde1")]
|
|
mod serde_impl;
|
|
|
|
#[cfg(feature = "log")]
|
|
macro_rules! debug {
|
|
($($token:tt)*) => (::log::debug!($($token)*);)
|
|
}
|
|
|
|
#[cfg(not(feature = "log"))]
|
|
macro_rules! debug {
|
|
($($token:tt)*) => {};
|
|
}
|
|
|
|
/// Represents an error that can occur when parsing a glob pattern.
|
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
|
pub struct Error {
|
|
/// The original glob provided by the caller.
|
|
glob: Option<String>,
|
|
/// The kind of error.
|
|
kind: ErrorKind,
|
|
}
|
|
|
|
/// The kind of error that can occur when parsing a glob pattern.
|
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
|
pub enum ErrorKind {
|
|
/// **DEPRECATED**.
|
|
///
|
|
/// This error used to occur for consistency with git's glob specification,
|
|
/// but the specification now accepts all uses of `**`. When `**` does not
|
|
/// appear adjacent to a path separator or at the beginning/end of a glob,
|
|
/// it is now treated as two consecutive `*` patterns. As such, this error
|
|
/// is no longer used.
|
|
InvalidRecursive,
|
|
/// Occurs when a character class (e.g., `[abc]`) is not closed.
|
|
UnclosedClass,
|
|
/// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For
|
|
/// example, if the range starts with a lexicographically larger character
|
|
/// than it ends with.
|
|
InvalidRange(char, char),
|
|
/// Occurs when a `}` is found without a matching `{`.
|
|
UnopenedAlternates,
|
|
/// Occurs when a `{` is found without a matching `}`.
|
|
UnclosedAlternates,
|
|
/// Occurs when an alternating group is nested inside another alternating
|
|
/// group, e.g., `{{a,b},{c,d}}`.
|
|
NestedAlternates,
|
|
/// Occurs when an unescaped '\' is found at the end of a glob.
|
|
DanglingEscape,
|
|
/// An error associated with parsing or compiling a regex.
|
|
Regex(String),
|
|
/// Hints that destructuring should not be exhaustive.
|
|
///
|
|
/// This enum may grow additional variants, so this makes sure clients
|
|
/// don't count on exhaustive matching. (Otherwise, adding a new variant
|
|
/// could break existing code.)
|
|
#[doc(hidden)]
|
|
__Nonexhaustive,
|
|
}
|
|
|
|
impl std::error::Error for Error {
|
|
fn description(&self) -> &str {
|
|
self.kind.description()
|
|
}
|
|
}
|
|
|
|
impl Error {
|
|
/// Return the glob that caused this error, if one exists.
|
|
pub fn glob(&self) -> Option<&str> {
|
|
self.glob.as_ref().map(|s| &**s)
|
|
}
|
|
|
|
/// Return the kind of this error.
|
|
pub fn kind(&self) -> &ErrorKind {
|
|
&self.kind
|
|
}
|
|
}
|
|
|
|
impl ErrorKind {
|
|
fn description(&self) -> &str {
|
|
match *self {
|
|
ErrorKind::InvalidRecursive => {
|
|
"invalid use of **; must be one path component"
|
|
}
|
|
ErrorKind::UnclosedClass => {
|
|
"unclosed character class; missing ']'"
|
|
}
|
|
ErrorKind::InvalidRange(_, _) => "invalid character range",
|
|
ErrorKind::UnopenedAlternates => {
|
|
"unopened alternate group; missing '{' \
|
|
(maybe escape '}' with '[}]'?)"
|
|
}
|
|
ErrorKind::UnclosedAlternates => {
|
|
"unclosed alternate group; missing '}' \
|
|
(maybe escape '{' with '[{]'?)"
|
|
}
|
|
ErrorKind::NestedAlternates => {
|
|
"nested alternate groups are not allowed"
|
|
}
|
|
ErrorKind::DanglingEscape => "dangling '\\'",
|
|
ErrorKind::Regex(ref err) => err,
|
|
ErrorKind::__Nonexhaustive => unreachable!(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Display for Error {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
match self.glob {
|
|
None => self.kind.fmt(f),
|
|
Some(ref glob) => {
|
|
write!(f, "error parsing glob '{}': {}", glob, self.kind)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
impl std::fmt::Display for ErrorKind {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
match *self {
|
|
ErrorKind::InvalidRecursive
|
|
| ErrorKind::UnclosedClass
|
|
| ErrorKind::UnopenedAlternates
|
|
| ErrorKind::UnclosedAlternates
|
|
| ErrorKind::NestedAlternates
|
|
| ErrorKind::DanglingEscape
|
|
| ErrorKind::Regex(_) => write!(f, "{}", self.description()),
|
|
ErrorKind::InvalidRange(s, e) => {
|
|
write!(f, "invalid range; '{}' > '{}'", s, e)
|
|
}
|
|
ErrorKind::__Nonexhaustive => unreachable!(),
|
|
}
|
|
}
|
|
}
|
|
|
|
fn new_regex(pat: &str) -> Result<Regex, Error> {
|
|
let syntax = regex_automata::util::syntax::Config::new()
|
|
.utf8(false)
|
|
.dot_matches_new_line(true);
|
|
let config = Regex::config()
|
|
.utf8_empty(false)
|
|
.nfa_size_limit(Some(10 * (1 << 20)))
|
|
.hybrid_cache_capacity(10 * (1 << 20));
|
|
Regex::builder().syntax(syntax).configure(config).build(pat).map_err(
|
|
|err| Error {
|
|
glob: Some(pat.to_string()),
|
|
kind: ErrorKind::Regex(err.to_string()),
|
|
},
|
|
)
|
|
}
|
|
|
|
fn new_regex_set(pats: Vec<String>) -> Result<Regex, Error> {
|
|
let syntax = regex_automata::util::syntax::Config::new()
|
|
.utf8(false)
|
|
.dot_matches_new_line(true);
|
|
let config = Regex::config()
|
|
.match_kind(regex_automata::MatchKind::All)
|
|
.utf8_empty(false)
|
|
.nfa_size_limit(Some(10 * (1 << 20)))
|
|
.hybrid_cache_capacity(10 * (1 << 20));
|
|
Regex::builder()
|
|
.syntax(syntax)
|
|
.configure(config)
|
|
.build_many(&pats)
|
|
.map_err(|err| Error {
|
|
glob: None,
|
|
kind: ErrorKind::Regex(err.to_string()),
|
|
})
|
|
}
|
|
|
|
/// GlobSet represents a group of globs that can be matched together in a
|
|
/// single pass.
|
|
#[derive(Clone, Debug)]
|
|
pub struct GlobSet {
|
|
len: usize,
|
|
strats: Vec<GlobSetMatchStrategy>,
|
|
}
|
|
|
|
impl GlobSet {
|
|
/// Create a new [`GlobSetBuilder`]. A `GlobSetBuilder` can be used to add
|
|
/// new patterns. Once all patterns have been added, `build` should be
|
|
/// called to produce a `GlobSet`, which can then be used for matching.
|
|
#[inline]
|
|
pub fn builder() -> GlobSetBuilder {
|
|
GlobSetBuilder::new()
|
|
}
|
|
|
|
/// Create an empty `GlobSet`. An empty set matches nothing.
|
|
#[inline]
|
|
pub fn empty() -> GlobSet {
|
|
GlobSet { len: 0, strats: vec![] }
|
|
}
|
|
|
|
/// Returns true if this set is empty, and therefore matches nothing.
|
|
#[inline]
|
|
pub fn is_empty(&self) -> bool {
|
|
self.len == 0
|
|
}
|
|
|
|
/// Returns the number of globs in this set.
|
|
#[inline]
|
|
pub fn len(&self) -> usize {
|
|
self.len
|
|
}
|
|
|
|
/// Returns true if any glob in this set matches the path given.
|
|
pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
|
|
self.is_match_candidate(&Candidate::new(path.as_ref()))
|
|
}
|
|
|
|
/// Returns true if any glob in this set matches the path given.
|
|
///
|
|
/// This takes a Candidate as input, which can be used to amortize the
|
|
/// cost of preparing a path for matching.
|
|
pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
|
|
if self.is_empty() {
|
|
return false;
|
|
}
|
|
for strat in &self.strats {
|
|
if strat.is_match(path) {
|
|
return true;
|
|
}
|
|
}
|
|
false
|
|
}
|
|
|
|
/// Returns the sequence number of every glob pattern that matches the
|
|
/// given path.
|
|
pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
|
|
self.matches_candidate(&Candidate::new(path.as_ref()))
|
|
}
|
|
|
|
/// Returns the sequence number of every glob pattern that matches the
|
|
/// given path.
|
|
///
|
|
/// This takes a Candidate as input, which can be used to amortize the
|
|
/// cost of preparing a path for matching.
|
|
pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec<usize> {
|
|
let mut into = vec![];
|
|
if self.is_empty() {
|
|
return into;
|
|
}
|
|
self.matches_candidate_into(path, &mut into);
|
|
into
|
|
}
|
|
|
|
/// Adds the sequence number of every glob pattern that matches the given
|
|
/// path to the vec given.
|
|
///
|
|
/// `into` is cleared before matching begins, and contains the set of
|
|
/// sequence numbers (in ascending order) after matching ends. If no globs
|
|
/// were matched, then `into` will be empty.
|
|
pub fn matches_into<P: AsRef<Path>>(
|
|
&self,
|
|
path: P,
|
|
into: &mut Vec<usize>,
|
|
) {
|
|
self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
|
|
}
|
|
|
|
/// Adds the sequence number of every glob pattern that matches the given
|
|
/// path to the vec given.
|
|
///
|
|
/// `into` is cleared before matching begins, and contains the set of
|
|
/// sequence numbers (in ascending order) after matching ends. If no globs
|
|
/// were matched, then `into` will be empty.
|
|
///
|
|
/// This takes a Candidate as input, which can be used to amortize the
|
|
/// cost of preparing a path for matching.
|
|
pub fn matches_candidate_into(
|
|
&self,
|
|
path: &Candidate<'_>,
|
|
into: &mut Vec<usize>,
|
|
) {
|
|
into.clear();
|
|
if self.is_empty() {
|
|
return;
|
|
}
|
|
for strat in &self.strats {
|
|
strat.matches_into(path, into);
|
|
}
|
|
into.sort();
|
|
into.dedup();
|
|
}
|
|
|
|
fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
|
|
if pats.is_empty() {
|
|
return Ok(GlobSet { len: 0, strats: vec![] });
|
|
}
|
|
let mut lits = LiteralStrategy::new();
|
|
let mut base_lits = BasenameLiteralStrategy::new();
|
|
let mut exts = ExtensionStrategy::new();
|
|
let mut prefixes = MultiStrategyBuilder::new();
|
|
let mut suffixes = MultiStrategyBuilder::new();
|
|
let mut required_exts = RequiredExtensionStrategyBuilder::new();
|
|
let mut regexes = MultiStrategyBuilder::new();
|
|
for (i, p) in pats.iter().enumerate() {
|
|
match MatchStrategy::new(p) {
|
|
MatchStrategy::Literal(lit) => {
|
|
lits.add(i, lit);
|
|
}
|
|
MatchStrategy::BasenameLiteral(lit) => {
|
|
base_lits.add(i, lit);
|
|
}
|
|
MatchStrategy::Extension(ext) => {
|
|
exts.add(i, ext);
|
|
}
|
|
MatchStrategy::Prefix(prefix) => {
|
|
prefixes.add(i, prefix);
|
|
}
|
|
MatchStrategy::Suffix { suffix, component } => {
|
|
if component {
|
|
lits.add(i, suffix[1..].to_string());
|
|
}
|
|
suffixes.add(i, suffix);
|
|
}
|
|
MatchStrategy::RequiredExtension(ext) => {
|
|
required_exts.add(i, ext, p.regex().to_owned());
|
|
}
|
|
MatchStrategy::Regex => {
|
|
debug!("glob converted to regex: {:?}", p);
|
|
regexes.add(i, p.regex().to_owned());
|
|
}
|
|
}
|
|
}
|
|
debug!(
|
|
"built glob set; {} literals, {} basenames, {} extensions, \
|
|
{} prefixes, {} suffixes, {} required extensions, {} regexes",
|
|
lits.0.len(),
|
|
base_lits.0.len(),
|
|
exts.0.len(),
|
|
prefixes.literals.len(),
|
|
suffixes.literals.len(),
|
|
required_exts.0.len(),
|
|
regexes.literals.len()
|
|
);
|
|
Ok(GlobSet {
|
|
len: pats.len(),
|
|
strats: vec![
|
|
GlobSetMatchStrategy::Extension(exts),
|
|
GlobSetMatchStrategy::BasenameLiteral(base_lits),
|
|
GlobSetMatchStrategy::Literal(lits),
|
|
GlobSetMatchStrategy::Suffix(suffixes.suffix()),
|
|
GlobSetMatchStrategy::Prefix(prefixes.prefix()),
|
|
GlobSetMatchStrategy::RequiredExtension(
|
|
required_exts.build()?,
|
|
),
|
|
GlobSetMatchStrategy::Regex(regexes.regex_set()?),
|
|
],
|
|
})
|
|
}
|
|
}
|
|
|
|
impl Default for GlobSet {
|
|
/// Create a default empty GlobSet.
|
|
fn default() -> Self {
|
|
GlobSet::empty()
|
|
}
|
|
}
|
|
|
|
/// GlobSetBuilder builds a group of patterns that can be used to
|
|
/// simultaneously match a file path.
|
|
#[derive(Clone, Debug)]
|
|
pub struct GlobSetBuilder {
|
|
pats: Vec<Glob>,
|
|
}
|
|
|
|
impl GlobSetBuilder {
|
|
/// Create a new `GlobSetBuilder`. A `GlobSetBuilder` can be used to add new
|
|
/// patterns. Once all patterns have been added, `build` should be called
|
|
/// to produce a [`GlobSet`], which can then be used for matching.
|
|
pub fn new() -> GlobSetBuilder {
|
|
GlobSetBuilder { pats: vec![] }
|
|
}
|
|
|
|
/// Builds a new matcher from all of the glob patterns added so far.
|
|
///
|
|
/// Once a matcher is built, no new patterns can be added to it.
|
|
pub fn build(&self) -> Result<GlobSet, Error> {
|
|
GlobSet::new(&self.pats)
|
|
}
|
|
|
|
/// Add a new pattern to this set.
|
|
pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
|
|
self.pats.push(pat);
|
|
self
|
|
}
|
|
}
|
|
|
|
/// A candidate path for matching.
|
|
///
|
|
/// All glob matching in this crate operates on `Candidate` values.
|
|
/// Constructing candidates has a very small cost associated with it, so
|
|
/// callers may find it beneficial to amortize that cost when matching a single
|
|
/// path against multiple globs or sets of globs.
|
|
#[derive(Clone)]
|
|
pub struct Candidate<'a> {
|
|
path: Cow<'a, [u8]>,
|
|
basename: Cow<'a, [u8]>,
|
|
ext: Cow<'a, [u8]>,
|
|
}
|
|
|
|
impl<'a> std::fmt::Debug for Candidate<'a> {
|
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
|
f.debug_struct("Candidate")
|
|
.field("path", &self.path.as_bstr())
|
|
.field("basename", &self.basename.as_bstr())
|
|
.field("ext", &self.ext.as_bstr())
|
|
.finish()
|
|
}
|
|
}
|
|
|
|
impl<'a> Candidate<'a> {
|
|
/// Create a new candidate for matching from the given path.
|
|
pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
|
|
let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
|
|
let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
|
|
let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
|
|
Candidate { path, basename, ext }
|
|
}
|
|
|
|
fn path_prefix(&self, max: usize) -> &[u8] {
|
|
if self.path.len() <= max {
|
|
&*self.path
|
|
} else {
|
|
&self.path[..max]
|
|
}
|
|
}
|
|
|
|
fn path_suffix(&self, max: usize) -> &[u8] {
|
|
if self.path.len() <= max {
|
|
&*self.path
|
|
} else {
|
|
&self.path[self.path.len() - max..]
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
enum GlobSetMatchStrategy {
|
|
Literal(LiteralStrategy),
|
|
BasenameLiteral(BasenameLiteralStrategy),
|
|
Extension(ExtensionStrategy),
|
|
Prefix(PrefixStrategy),
|
|
Suffix(SuffixStrategy),
|
|
RequiredExtension(RequiredExtensionStrategy),
|
|
Regex(RegexSetStrategy),
|
|
}
|
|
|
|
impl GlobSetMatchStrategy {
|
|
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
|
use self::GlobSetMatchStrategy::*;
|
|
match *self {
|
|
Literal(ref s) => s.is_match(candidate),
|
|
BasenameLiteral(ref s) => s.is_match(candidate),
|
|
Extension(ref s) => s.is_match(candidate),
|
|
Prefix(ref s) => s.is_match(candidate),
|
|
Suffix(ref s) => s.is_match(candidate),
|
|
RequiredExtension(ref s) => s.is_match(candidate),
|
|
Regex(ref s) => s.is_match(candidate),
|
|
}
|
|
}
|
|
|
|
fn matches_into(
|
|
&self,
|
|
candidate: &Candidate<'_>,
|
|
matches: &mut Vec<usize>,
|
|
) {
|
|
use self::GlobSetMatchStrategy::*;
|
|
match *self {
|
|
Literal(ref s) => s.matches_into(candidate, matches),
|
|
BasenameLiteral(ref s) => s.matches_into(candidate, matches),
|
|
Extension(ref s) => s.matches_into(candidate, matches),
|
|
Prefix(ref s) => s.matches_into(candidate, matches),
|
|
Suffix(ref s) => s.matches_into(candidate, matches),
|
|
RequiredExtension(ref s) => s.matches_into(candidate, matches),
|
|
Regex(ref s) => s.matches_into(candidate, matches),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct LiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
|
|
|
|
impl LiteralStrategy {
|
|
fn new() -> LiteralStrategy {
|
|
LiteralStrategy(fnv::HashMap::default())
|
|
}
|
|
|
|
fn add(&mut self, global_index: usize, lit: String) {
|
|
self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
|
|
}
|
|
|
|
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
|
self.0.contains_key(candidate.path.as_bytes())
|
|
}
|
|
|
|
#[inline(never)]
|
|
fn matches_into(
|
|
&self,
|
|
candidate: &Candidate<'_>,
|
|
matches: &mut Vec<usize>,
|
|
) {
|
|
if let Some(hits) = self.0.get(candidate.path.as_bytes()) {
|
|
matches.extend(hits);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct BasenameLiteralStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
|
|
|
|
impl BasenameLiteralStrategy {
|
|
fn new() -> BasenameLiteralStrategy {
|
|
BasenameLiteralStrategy(fnv::HashMap::default())
|
|
}
|
|
|
|
fn add(&mut self, global_index: usize, lit: String) {
|
|
self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
|
|
}
|
|
|
|
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
|
if candidate.basename.is_empty() {
|
|
return false;
|
|
}
|
|
self.0.contains_key(candidate.basename.as_bytes())
|
|
}
|
|
|
|
#[inline(never)]
|
|
fn matches_into(
|
|
&self,
|
|
candidate: &Candidate<'_>,
|
|
matches: &mut Vec<usize>,
|
|
) {
|
|
if candidate.basename.is_empty() {
|
|
return;
|
|
}
|
|
if let Some(hits) = self.0.get(candidate.basename.as_bytes()) {
|
|
matches.extend(hits);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct ExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<usize>>);
|
|
|
|
impl ExtensionStrategy {
|
|
fn new() -> ExtensionStrategy {
|
|
ExtensionStrategy(fnv::HashMap::default())
|
|
}
|
|
|
|
fn add(&mut self, global_index: usize, ext: String) {
|
|
self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
|
|
}
|
|
|
|
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
|
if candidate.ext.is_empty() {
|
|
return false;
|
|
}
|
|
self.0.contains_key(candidate.ext.as_bytes())
|
|
}
|
|
|
|
#[inline(never)]
|
|
fn matches_into(
|
|
&self,
|
|
candidate: &Candidate<'_>,
|
|
matches: &mut Vec<usize>,
|
|
) {
|
|
if candidate.ext.is_empty() {
|
|
return;
|
|
}
|
|
if let Some(hits) = self.0.get(candidate.ext.as_bytes()) {
|
|
matches.extend(hits);
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct PrefixStrategy {
|
|
matcher: AhoCorasick,
|
|
map: Vec<usize>,
|
|
longest: usize,
|
|
}
|
|
|
|
impl PrefixStrategy {
|
|
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
|
let path = candidate.path_prefix(self.longest);
|
|
for m in self.matcher.find_overlapping_iter(path) {
|
|
if m.start() == 0 {
|
|
return true;
|
|
}
|
|
}
|
|
false
|
|
}
|
|
|
|
fn matches_into(
|
|
&self,
|
|
candidate: &Candidate<'_>,
|
|
matches: &mut Vec<usize>,
|
|
) {
|
|
let path = candidate.path_prefix(self.longest);
|
|
for m in self.matcher.find_overlapping_iter(path) {
|
|
if m.start() == 0 {
|
|
matches.push(self.map[m.pattern()]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct SuffixStrategy {
|
|
matcher: AhoCorasick,
|
|
map: Vec<usize>,
|
|
longest: usize,
|
|
}
|
|
|
|
impl SuffixStrategy {
|
|
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
|
let path = candidate.path_suffix(self.longest);
|
|
for m in self.matcher.find_overlapping_iter(path) {
|
|
if m.end() == path.len() {
|
|
return true;
|
|
}
|
|
}
|
|
false
|
|
}
|
|
|
|
fn matches_into(
|
|
&self,
|
|
candidate: &Candidate<'_>,
|
|
matches: &mut Vec<usize>,
|
|
) {
|
|
let path = candidate.path_suffix(self.longest);
|
|
for m in self.matcher.find_overlapping_iter(path) {
|
|
if m.end() == path.len() {
|
|
matches.push(self.map[m.pattern()]);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct RequiredExtensionStrategy(fnv::HashMap<Vec<u8>, Vec<(usize, Regex)>>);
|
|
|
|
impl RequiredExtensionStrategy {
|
|
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
|
if candidate.ext.is_empty() {
|
|
return false;
|
|
}
|
|
match self.0.get(candidate.ext.as_bytes()) {
|
|
None => false,
|
|
Some(regexes) => {
|
|
for &(_, ref re) in regexes {
|
|
if re.is_match(candidate.path.as_bytes()) {
|
|
return true;
|
|
}
|
|
}
|
|
false
|
|
}
|
|
}
|
|
}
|
|
|
|
#[inline(never)]
|
|
fn matches_into(
|
|
&self,
|
|
candidate: &Candidate<'_>,
|
|
matches: &mut Vec<usize>,
|
|
) {
|
|
if candidate.ext.is_empty() {
|
|
return;
|
|
}
|
|
if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) {
|
|
for &(global_index, ref re) in regexes {
|
|
if re.is_match(candidate.path.as_bytes()) {
|
|
matches.push(global_index);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct RegexSetStrategy {
|
|
matcher: Regex,
|
|
map: Vec<usize>,
|
|
// We use a pool of PatternSets to hopefully allocating a fresh one on each
|
|
// call.
|
|
//
|
|
// TODO: In the next semver breaking release, we should drop this pool and
|
|
// expose an opaque type that wraps PatternSet. Then callers can provide
|
|
// it to `matches_into` directly. Callers might still want to use a pool
|
|
// or similar to amortize allocation, but that matches the status quo and
|
|
// absolves us of needing to do it here.
|
|
patset: Arc<Pool<PatternSet, PatternSetPoolFn>>,
|
|
}
|
|
|
|
type PatternSetPoolFn =
|
|
Box<dyn Fn() -> PatternSet + Send + Sync + UnwindSafe + RefUnwindSafe>;
|
|
|
|
impl RegexSetStrategy {
|
|
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
|
self.matcher.is_match(candidate.path.as_bytes())
|
|
}
|
|
|
|
fn matches_into(
|
|
&self,
|
|
candidate: &Candidate<'_>,
|
|
matches: &mut Vec<usize>,
|
|
) {
|
|
let input = regex_automata::Input::new(candidate.path.as_bytes());
|
|
let mut patset = self.patset.get();
|
|
patset.clear();
|
|
self.matcher.which_overlapping_matches(&input, &mut patset);
|
|
for i in patset.iter() {
|
|
matches.push(self.map[i]);
|
|
}
|
|
PoolGuard::put(patset);
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct MultiStrategyBuilder {
|
|
literals: Vec<String>,
|
|
map: Vec<usize>,
|
|
longest: usize,
|
|
}
|
|
|
|
impl MultiStrategyBuilder {
|
|
fn new() -> MultiStrategyBuilder {
|
|
MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 }
|
|
}
|
|
|
|
fn add(&mut self, global_index: usize, literal: String) {
|
|
if literal.len() > self.longest {
|
|
self.longest = literal.len();
|
|
}
|
|
self.map.push(global_index);
|
|
self.literals.push(literal);
|
|
}
|
|
|
|
fn prefix(self) -> PrefixStrategy {
|
|
PrefixStrategy {
|
|
matcher: AhoCorasick::new(&self.literals).unwrap(),
|
|
map: self.map,
|
|
longest: self.longest,
|
|
}
|
|
}
|
|
|
|
fn suffix(self) -> SuffixStrategy {
|
|
SuffixStrategy {
|
|
matcher: AhoCorasick::new(&self.literals).unwrap(),
|
|
map: self.map,
|
|
longest: self.longest,
|
|
}
|
|
}
|
|
|
|
fn regex_set(self) -> Result<RegexSetStrategy, Error> {
|
|
let matcher = new_regex_set(self.literals)?;
|
|
let pattern_len = matcher.pattern_len();
|
|
let create: PatternSetPoolFn =
|
|
Box::new(move || PatternSet::new(pattern_len));
|
|
Ok(RegexSetStrategy {
|
|
matcher,
|
|
map: self.map,
|
|
patset: Arc::new(Pool::new(create)),
|
|
})
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
struct RequiredExtensionStrategyBuilder(
|
|
fnv::HashMap<Vec<u8>, Vec<(usize, String)>>,
|
|
);
|
|
|
|
impl RequiredExtensionStrategyBuilder {
|
|
fn new() -> RequiredExtensionStrategyBuilder {
|
|
RequiredExtensionStrategyBuilder(fnv::HashMap::default())
|
|
}
|
|
|
|
fn add(&mut self, global_index: usize, ext: String, regex: String) {
|
|
self.0
|
|
.entry(ext.into_bytes())
|
|
.or_insert(vec![])
|
|
.push((global_index, regex));
|
|
}
|
|
|
|
fn build(self) -> Result<RequiredExtensionStrategy, Error> {
|
|
let mut exts = fnv::HashMap::default();
|
|
for (ext, regexes) in self.0.into_iter() {
|
|
exts.insert(ext.clone(), vec![]);
|
|
for (global_index, regex) in regexes {
|
|
let compiled = new_regex(®ex)?;
|
|
exts.get_mut(&ext).unwrap().push((global_index, compiled));
|
|
}
|
|
}
|
|
Ok(RequiredExtensionStrategy(exts))
|
|
}
|
|
}
|
|
|
|
/// Escape meta-characters within the given glob pattern.
|
|
///
|
|
/// The escaping works by surrounding meta-characters with brackets. For
|
|
/// example, `*` becomes `[*]`.
|
|
pub fn escape(s: &str) -> String {
|
|
let mut escaped = String::with_capacity(s.len());
|
|
for c in s.chars() {
|
|
match c {
|
|
// note that ! does not need escaping because it is only special
|
|
// inside brackets
|
|
'?' | '*' | '[' | ']' => {
|
|
escaped.push('[');
|
|
escaped.push(c);
|
|
escaped.push(']');
|
|
}
|
|
c => {
|
|
escaped.push(c);
|
|
}
|
|
}
|
|
}
|
|
escaped
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use crate::glob::Glob;
|
|
|
|
use super::{GlobSet, GlobSetBuilder};
|
|
|
|
#[test]
|
|
fn set_works() {
|
|
let mut builder = GlobSetBuilder::new();
|
|
builder.add(Glob::new("src/**/*.rs").unwrap());
|
|
builder.add(Glob::new("*.c").unwrap());
|
|
builder.add(Glob::new("src/lib.rs").unwrap());
|
|
let set = builder.build().unwrap();
|
|
|
|
assert!(set.is_match("foo.c"));
|
|
assert!(set.is_match("src/foo.c"));
|
|
assert!(!set.is_match("foo.rs"));
|
|
assert!(!set.is_match("tests/foo.rs"));
|
|
assert!(set.is_match("src/foo.rs"));
|
|
assert!(set.is_match("src/grep/src/main.rs"));
|
|
|
|
let matches = set.matches("src/lib.rs");
|
|
assert_eq!(2, matches.len());
|
|
assert_eq!(0, matches[0]);
|
|
assert_eq!(2, matches[1]);
|
|
}
|
|
|
|
#[test]
|
|
fn empty_set_works() {
|
|
let set = GlobSetBuilder::new().build().unwrap();
|
|
assert!(!set.is_match(""));
|
|
assert!(!set.is_match("a"));
|
|
}
|
|
|
|
#[test]
|
|
fn default_set_is_empty_works() {
|
|
let set: GlobSet = Default::default();
|
|
assert!(!set.is_match(""));
|
|
assert!(!set.is_match("a"));
|
|
}
|
|
|
|
#[test]
|
|
fn escape() {
|
|
use super::escape;
|
|
assert_eq!("foo", escape("foo"));
|
|
assert_eq!("foo[*]", escape("foo*"));
|
|
assert_eq!("[[][]]", escape("[]"));
|
|
assert_eq!("[*][?]", escape("*?"));
|
|
assert_eq!("src/[*][*]/[*].rs", escape("src/**/*.rs"));
|
|
assert_eq!("bar[[]ab[]]baz", escape("bar[ab]baz"));
|
|
assert_eq!("bar[[]!![]]!baz", escape("bar[!!]!baz"));
|
|
}
|
|
|
|
// This tests that regex matching doesn't "remember" the results of
|
|
// previous searches. That is, if any memory is reused from a previous
|
|
// search, then it should be cleared first.
|
|
#[test]
|
|
fn set_does_not_remember() {
|
|
let mut builder = GlobSetBuilder::new();
|
|
builder.add(Glob::new("*foo*").unwrap());
|
|
builder.add(Glob::new("*bar*").unwrap());
|
|
builder.add(Glob::new("*quux*").unwrap());
|
|
let set = builder.build().unwrap();
|
|
|
|
let matches = set.matches("ZfooZquuxZ");
|
|
assert_eq!(2, matches.len());
|
|
assert_eq!(0, matches[0]);
|
|
assert_eq!(2, matches[1]);
|
|
|
|
let matches = set.matches("nada");
|
|
assert_eq!(0, matches.len());
|
|
}
|
|
}
|