From 7f456404010005142a6493fb48bd9fd06aca2731 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Tue, 26 Sep 2023 15:01:20 -0400 Subject: [PATCH] globset: polishing This brings the code in line with my current style. It also inlines the dozen or so lines of code for FNV hashing instead of bringing in a micro-crate for it. Finally, it drops the dependency on regex in favor of using regex-syntax and regex-automata directly. --- Cargo.lock | 10 +-- crates/globset/Cargo.toml | 26 ++++--- crates/globset/src/fnv.rs | 30 ++++++++ crates/globset/src/glob.rs | 115 +++++++++++++---------------- crates/globset/src/lib.rs | 130 ++++++++++++++++++--------------- crates/globset/src/pathutil.rs | 20 ++--- 6 files changed, 179 insertions(+), 152 deletions(-) create mode 100644 crates/globset/src/fnv.rs diff --git a/Cargo.lock b/Cargo.lock index 9d8c9109..7be0552b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -130,12 +130,6 @@ dependencies = [ "encoding_rs", ] -[[package]] -name = "fnv" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" - [[package]] name = "glob" version = "0.3.1" @@ -148,11 +142,11 @@ version = "0.4.13" dependencies = [ "aho-corasick", "bstr", - "fnv", "glob", "lazy_static", "log", - "regex", + "regex-automata", + "regex-syntax", "serde", "serde_json", ] diff --git a/crates/globset/Cargo.toml b/crates/globset/Cargo.toml index 799224e8..75486ddb 100644 --- a/crates/globset/Cargo.toml +++ b/crates/globset/Cargo.toml @@ -13,24 +13,32 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/globset" readme = "README.md" keywords = ["regex", "glob", "multiple", "set", "pattern"] license = "Unlicense OR MIT" -edition = "2018" +edition = "2021" [lib] name = "globset" bench = false [dependencies] -aho-corasick = "1.0.2" -bstr = { version = "1.6.0", default-features = false, features = ["std"] } -fnv = "1.0.6" -log = { version = "0.4.5", optional = true } -regex = { version = "1.8.3", default-features = false, features = ["perf", "std"] } -serde = { version = "1.0.104", optional = true } +aho-corasick = "1.1.1" +bstr = { version = "1.6.2", default-features = false, features = ["std"] } +log = { version = "0.4.20", optional = true } +serde = { version = "1.0.188", optional = true } + +[dependencies.regex-syntax] +version = "0.7.5" +default-features = false +features = ["std"] + +[dependencies.regex-automata] +version = "0.3.8" +default-features = false +features = ["std", "perf", "syntax", "meta", "nfa", "hybrid"] [dev-dependencies] -glob = "0.3.0" +glob = "0.3.1" lazy_static = "1" -serde_json = "1.0.45" +serde_json = "1.0.107" [features] default = ["log"] diff --git a/crates/globset/src/fnv.rs b/crates/globset/src/fnv.rs new file mode 100644 index 00000000..91174e20 --- /dev/null +++ b/crates/globset/src/fnv.rs @@ -0,0 +1,30 @@ +/// A convenience alias for creating a hash map with an FNV hasher. +pub(crate) type HashMap = + std::collections::HashMap>; + +/// A hasher that implements the Fowler–Noll–Vo (FNV) hash. +pub(crate) struct Hasher(u64); + +impl Hasher { + const OFFSET_BASIS: u64 = 0xcbf29ce484222325; + const PRIME: u64 = 0x100000001b3; +} + +impl Default for Hasher { + fn default() -> Hasher { + Hasher(Hasher::OFFSET_BASIS) + } +} + +impl std::hash::Hasher for Hasher { + fn finish(&self) -> u64 { + self.0 + } + + fn write(&mut self, bytes: &[u8]) { + for &byte in bytes.iter() { + self.0 = self.0 ^ u64::from(byte); + self.0 = self.0.wrapping_mul(Hasher::PRIME); + } + } +} diff --git a/crates/globset/src/glob.rs b/crates/globset/src/glob.rs index d19c70ed..83c08344 100644 --- a/crates/globset/src/glob.rs +++ b/crates/globset/src/glob.rs @@ -1,12 +1,6 @@ -use std::fmt; -use std::hash; -use std::iter; -use std::ops::{Deref, DerefMut}; use std::path::{is_separator, Path}; -use std::str; -use regex; -use regex::bytes::Regex; +use regex_automata::meta::Regex; use crate::{new_regex, Candidate, Error, ErrorKind}; @@ -18,7 +12,7 @@ use crate::{new_regex, Candidate, Error, ErrorKind}; /// possible to test whether any of those patterns matches by looking up a /// file path's extension in a hash table. #[derive(Clone, Debug, Eq, PartialEq)] -pub enum MatchStrategy { +pub(crate) enum MatchStrategy { /// A pattern matches if and only if the entire file path matches this /// literal string. Literal(String), @@ -53,7 +47,7 @@ pub enum MatchStrategy { impl MatchStrategy { /// Returns a matching strategy for the given pattern. - pub fn new(pat: &Glob) -> MatchStrategy { + pub(crate) fn new(pat: &Glob) -> MatchStrategy { if let Some(lit) = pat.basename_literal() { MatchStrategy::BasenameLiteral(lit) } else if let Some(lit) = pat.literal() { @@ -63,7 +57,7 @@ impl MatchStrategy { } else if let Some(prefix) = pat.prefix() { MatchStrategy::Prefix(prefix) } else if let Some((suffix, component)) = pat.suffix() { - MatchStrategy::Suffix { suffix: suffix, component: component } + MatchStrategy::Suffix { suffix, component } } else if let Some(ext) = pat.required_ext() { MatchStrategy::RequiredExtension(ext) } else { @@ -90,20 +84,20 @@ impl PartialEq for Glob { } } -impl hash::Hash for Glob { - fn hash(&self, state: &mut H) { +impl std::hash::Hash for Glob { + fn hash(&self, state: &mut H) { self.glob.hash(state); self.opts.hash(state); } } -impl fmt::Display for Glob { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl std::fmt::Display for Glob { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { self.glob.fmt(f) } } -impl str::FromStr for Glob { +impl std::str::FromStr for Glob { type Err = Error; fn from_str(glob: &str) -> Result { @@ -227,14 +221,14 @@ impl GlobOptions { #[derive(Clone, Debug, Default, Eq, PartialEq)] struct Tokens(Vec); -impl Deref for Tokens { +impl std::ops::Deref for Tokens { type Target = Vec; fn deref(&self) -> &Vec { &self.0 } } -impl DerefMut for Tokens { +impl std::ops::DerefMut for Tokens { fn deref_mut(&mut self) -> &mut Vec { &mut self.0 } @@ -262,7 +256,7 @@ impl Glob { pub fn compile_matcher(&self) -> GlobMatcher { let re = new_regex(&self.re).expect("regex compilation shouldn't fail"); - GlobMatcher { pat: self.clone(), re: re } + GlobMatcher { pat: self.clone(), re } } /// Returns a strategic matcher. @@ -275,7 +269,7 @@ impl Glob { let strategy = MatchStrategy::new(self); let re = new_regex(&self.re).expect("regex compilation shouldn't fail"); - GlobStrategic { strategy: strategy, re: re } + GlobStrategic { strategy, re } } /// Returns the original glob pattern used to build this pattern. @@ -311,10 +305,8 @@ impl Glob { } let mut lit = String::new(); for t in &*self.tokens { - match *t { - Token::Literal(c) => lit.push(c), - _ => return None, - } + let Token::Literal(c) = *t else { return None }; + lit.push(c); } if lit.is_empty() { None @@ -334,13 +326,12 @@ impl Glob { if self.opts.case_insensitive { return None; } - let start = match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => 1, - Some(_) => 0, - _ => return None, + let start = match *self.tokens.get(0)? { + Token::RecursivePrefix => 1, + _ => 0, }; - match self.tokens.get(start) { - Some(&Token::ZeroOrMore) => { + match *self.tokens.get(start)? { + Token::ZeroOrMore => { // If there was no recursive prefix, then we only permit // `*` if `*` can match a `/`. For example, if `*` can't // match `/`, then `*.c` doesn't match `foo/bar.c`. @@ -350,8 +341,8 @@ impl Glob { } _ => return None, } - match self.tokens.get(start + 1) { - Some(&Token::Literal('.')) => {} + match *self.tokens.get(start + 1)? { + Token::Literal('.') => {} _ => return None, } let mut lit = ".".to_string(); @@ -405,8 +396,8 @@ impl Glob { if self.opts.case_insensitive { return None; } - let (end, need_sep) = match self.tokens.last() { - Some(&Token::ZeroOrMore) => { + let (end, need_sep) = match *self.tokens.last()? { + Token::ZeroOrMore => { if self.opts.literal_separator { // If a trailing `*` can't match a `/`, then we can't // assume a match of the prefix corresponds to a match @@ -418,15 +409,13 @@ impl Glob { } (self.tokens.len() - 1, false) } - Some(&Token::RecursiveSuffix) => (self.tokens.len() - 1, true), + Token::RecursiveSuffix => (self.tokens.len() - 1, true), _ => (self.tokens.len(), false), }; let mut lit = String::new(); for t in &self.tokens[0..end] { - match *t { - Token::Literal(c) => lit.push(c), - _ => return None, - } + let Token::Literal(c) = *t else { return None }; + lit.push(c); } if need_sep { lit.push('/'); @@ -455,8 +444,8 @@ impl Glob { return None; } let mut lit = String::new(); - let (start, entire) = match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => { + let (start, entire) = match *self.tokens.get(0)? { + Token::RecursivePrefix => { // We only care if this follows a path component if the next // token is a literal. if let Some(&Token::Literal(_)) = self.tokens.get(1) { @@ -468,8 +457,8 @@ impl Glob { } _ => (0, false), }; - let start = match self.tokens.get(start) { - Some(&Token::ZeroOrMore) => { + let start = match *self.tokens.get(start)? { + Token::ZeroOrMore => { // If literal_separator is enabled, then a `*` can't // necessarily match everything, so reporting a suffix match // as a match of the pattern would be a false positive. @@ -481,10 +470,8 @@ impl Glob { _ => start, }; for t in &self.tokens[start..] { - match *t { - Token::Literal(c) => lit.push(c), - _ => return None, - } + let Token::Literal(c) = *t else { return None }; + lit.push(c); } if lit.is_empty() || lit == "/" { None @@ -508,8 +495,8 @@ impl Glob { if self.opts.case_insensitive { return None; } - let start = match self.tokens.get(0) { - Some(&Token::RecursivePrefix) => 1, + let start = match *self.tokens.get(0)? { + Token::RecursivePrefix => 1, _ => { // With nothing to gobble up the parent portion of a path, // we can't assume that matching on only the basename is @@ -520,7 +507,7 @@ impl Glob { if self.tokens[start..].is_empty() { return None; } - for t in &self.tokens[start..] { + for t in self.tokens[start..].iter() { match *t { Token::Literal('/') => return None, Token::Literal(_) => {} // OK @@ -554,16 +541,11 @@ impl Glob { /// The basic format of these patterns is `**/{literal}`, where `{literal}` /// does not contain a path separator. fn basename_literal(&self) -> Option { - let tokens = match self.basename_tokens() { - None => return None, - Some(tokens) => tokens, - }; + let tokens = self.basename_tokens()?; let mut lit = String::new(); for t in tokens { - match *t { - Token::Literal(c) => lit.push(c), - _ => return None, - } + let Token::Literal(c) = *t else { return None }; + lit.push(c); } Some(lit) } @@ -574,7 +556,7 @@ impl<'a> GlobBuilder<'a> { /// /// The pattern is not compiled until `build` is called. pub fn new(glob: &'a str) -> GlobBuilder<'a> { - GlobBuilder { glob: glob, opts: GlobOptions::default() } + GlobBuilder { glob, opts: GlobOptions::default() } } /// Parses and builds the pattern. @@ -604,7 +586,7 @@ impl<'a> GlobBuilder<'a> { glob: self.glob.to_string(), re: tokens.to_regex_with(&self.opts), opts: self.opts, - tokens: tokens, + tokens, }) } } @@ -640,7 +622,8 @@ impl<'a> GlobBuilder<'a> { /// Toggle whether an empty pattern in a list of alternates is accepted. /// - /// For example, if this is set then the glob `foo{,.txt}` will match both `foo` and `foo.txt`. + /// For example, if this is set then the glob `foo{,.txt}` will match both + /// `foo` and `foo.txt`. /// /// By default this is false. pub fn empty_alternates(&mut self, yes: bool) -> &mut GlobBuilder<'a> { @@ -678,7 +661,7 @@ impl Tokens { tokens: &[Token], re: &mut String, ) { - for tok in tokens { + for tok in tokens.iter() { match *tok { Token::Literal(c) => { re.push_str(&char_to_escaped_literal(c)); @@ -758,7 +741,9 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String { let mut s = String::with_capacity(bs.len()); for &b in bs { if b <= 0x7F { - s.push_str(®ex::escape(&(b as char).to_string())); + s.push_str(®ex_syntax::escape( + char::from(b).encode_utf8(&mut [0; 4]), + )); } else { s.push_str(&format!("\\x{:02x}", b)); } @@ -769,7 +754,7 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String { struct Parser<'a> { glob: &'a str, stack: Vec, - chars: iter::Peekable>, + chars: std::iter::Peekable>, prev: Option, cur: Option, opts: &'a GlobOptions, @@ -777,7 +762,7 @@ struct Parser<'a> { impl<'a> Parser<'a> { fn error(&self, kind: ErrorKind) -> Error { - Error { glob: Some(self.glob.to_string()), kind: kind } + Error { glob: Some(self.glob.to_string()), kind } } fn parse(&mut self) -> Result<(), Error> { @@ -996,7 +981,7 @@ impl<'a> Parser<'a> { // it as a literal. ranges.push(('-', '-')); } - self.push_token(Token::Class { negated: negated, ranges: ranges }) + self.push_token(Token::Class { negated, ranges }) } fn bump(&mut self) -> Option { diff --git a/crates/globset/src/lib.rs b/crates/globset/src/lib.rs index 7a357489..15eeefbb 100644 --- a/crates/globset/src/lib.rs +++ b/crates/globset/src/lib.rs @@ -5,11 +5,9 @@ Glob set matching is the process of matching one or more glob patterns against a single candidate path simultaneously, and returning all of the globs that matched. For example, given this set of globs: -```ignore -*.rs -src/lib.rs -src/**/foo.rs -``` +* `*.rs` +* `src/lib.rs` +* `src/**/foo.rs` and a path `src/bar/baz/foo.rs`, then the set would report the first and third globs as matching. @@ -19,7 +17,6 @@ globs as matching. This example shows how to match a single glob against a single file path. ``` -# fn example() -> Result<(), globset::Error> { use globset::Glob; let glob = Glob::new("*.rs")?.compile_matcher(); @@ -27,7 +24,7 @@ let glob = Glob::new("*.rs")?.compile_matcher(); assert!(glob.is_match("foo.rs")); assert!(glob.is_match("foo/bar.rs")); assert!(!glob.is_match("Cargo.toml")); -# Ok(()) } example().unwrap(); +# Ok::<(), Box>(()) ``` # Example: configuring a glob matcher @@ -36,7 +33,6 @@ This example shows how to use a `GlobBuilder` to configure aspects of match semantics. In this example, we prevent wildcards from matching path separators. ``` -# fn example() -> Result<(), globset::Error> { use globset::GlobBuilder; let glob = GlobBuilder::new("*.rs") @@ -45,7 +41,7 @@ let glob = GlobBuilder::new("*.rs") assert!(glob.is_match("foo.rs")); assert!(!glob.is_match("foo/bar.rs")); // no longer matches assert!(!glob.is_match("Cargo.toml")); -# Ok(()) } example().unwrap(); +# Ok::<(), Box>(()) ``` # Example: match multiple globs at once @@ -53,7 +49,6 @@ assert!(!glob.is_match("Cargo.toml")); This example shows how to match multiple glob patterns at once. ``` -# fn example() -> Result<(), globset::Error> { use globset::{Glob, GlobSetBuilder}; let mut builder = GlobSetBuilder::new(); @@ -65,7 +60,7 @@ builder.add(Glob::new("src/**/foo.rs")?); let set = builder.build()?; assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]); -# Ok(()) } example().unwrap(); +# Ok::<(), Box>(()) ``` # Syntax @@ -103,22 +98,22 @@ or to enable case insensitive matching. #![deny(missing_docs)] -use std::borrow::Cow; -use std::collections::{BTreeMap, HashMap}; -use std::error::Error as StdError; -use std::fmt; -use std::hash; -use std::path::Path; -use std::str; +use std::{borrow::Cow, path::Path}; -use aho_corasick::AhoCorasick; -use bstr::{ByteSlice, ByteVec, B}; -use regex::bytes::{Regex, RegexBuilder, RegexSet}; +use { + aho_corasick::AhoCorasick, + bstr::{ByteSlice, ByteVec, B}, + regex_automata::meta::Regex, +}; + +use crate::{ + glob::MatchStrategy, + pathutil::{file_name, file_name_ext, normalize_path}, +}; -use crate::glob::MatchStrategy; pub use crate::glob::{Glob, GlobBuilder, GlobMatcher}; -use crate::pathutil::{file_name, file_name_ext, normalize_path}; +mod fnv; mod glob; mod pathutil; @@ -181,7 +176,7 @@ pub enum ErrorKind { __Nonexhaustive, } -impl StdError for Error { +impl std::error::Error for Error { fn description(&self) -> &str { self.kind.description() } @@ -227,8 +222,8 @@ impl ErrorKind { } } -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self.glob { None => self.kind.fmt(f), Some(ref glob) => { @@ -238,8 +233,8 @@ impl fmt::Display for Error { } } -impl fmt::Display for ErrorKind { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl std::fmt::Display for ErrorKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match *self { ErrorKind::InvalidRecursive | ErrorKind::UnclosedClass @@ -257,30 +252,40 @@ impl fmt::Display for ErrorKind { } fn new_regex(pat: &str) -> Result { - RegexBuilder::new(pat) - .dot_matches_new_line(true) - .size_limit(10 * (1 << 20)) - .dfa_size_limit(10 * (1 << 20)) - .build() - .map_err(|err| Error { + let syntax = regex_automata::util::syntax::Config::new() + .utf8(false) + .dot_matches_new_line(true); + let config = Regex::config() + .utf8_empty(false) + .nfa_size_limit(Some(10 * (1 << 20))) + .hybrid_cache_capacity(10 * (1 << 20)); + Regex::builder().syntax(syntax).configure(config).build(pat).map_err( + |err| Error { glob: Some(pat.to_string()), kind: ErrorKind::Regex(err.to_string()), + }, + ) +} + +fn new_regex_set(pats: Vec) -> Result { + let syntax = regex_automata::util::syntax::Config::new() + .utf8(false) + .dot_matches_new_line(true); + let config = Regex::config() + .match_kind(regex_automata::MatchKind::All) + .utf8_empty(false) + .nfa_size_limit(Some(10 * (1 << 20))) + .hybrid_cache_capacity(10 * (1 << 20)); + Regex::builder() + .syntax(syntax) + .configure(config) + .build_many(&pats) + .map_err(|err| Error { + glob: None, + kind: ErrorKind::Regex(err.to_string()), }) } -fn new_regex_set(pats: I) -> Result -where - S: AsRef, - I: IntoIterator, -{ - RegexSet::new(pats).map_err(|err| Error { - glob: None, - kind: ErrorKind::Regex(err.to_string()), - }) -} - -type Fnv = hash::BuildHasherDefault; - /// GlobSet represents a group of globs that can be matched together in a /// single pass. #[derive(Clone, Debug)] @@ -521,7 +526,7 @@ impl<'a> Candidate<'a> { let path = normalize_path(Vec::from_path_lossy(path.as_ref())); let basename = file_name(&path).unwrap_or(Cow::Borrowed(B(""))); let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B(""))); - Candidate { path: path, basename: basename, ext: ext } + Candidate { path, basename, ext } } fn path_prefix(&self, max: usize) -> &[u8] { @@ -585,11 +590,11 @@ impl GlobSetMatchStrategy { } #[derive(Clone, Debug)] -struct LiteralStrategy(BTreeMap, Vec>); +struct LiteralStrategy(fnv::HashMap, Vec>); impl LiteralStrategy { fn new() -> LiteralStrategy { - LiteralStrategy(BTreeMap::new()) + LiteralStrategy(fnv::HashMap::default()) } fn add(&mut self, global_index: usize, lit: String) { @@ -613,11 +618,11 @@ impl LiteralStrategy { } #[derive(Clone, Debug)] -struct BasenameLiteralStrategy(BTreeMap, Vec>); +struct BasenameLiteralStrategy(fnv::HashMap, Vec>); impl BasenameLiteralStrategy { fn new() -> BasenameLiteralStrategy { - BasenameLiteralStrategy(BTreeMap::new()) + BasenameLiteralStrategy(fnv::HashMap::default()) } fn add(&mut self, global_index: usize, lit: String) { @@ -647,11 +652,11 @@ impl BasenameLiteralStrategy { } #[derive(Clone, Debug)] -struct ExtensionStrategy(HashMap, Vec, Fnv>); +struct ExtensionStrategy(fnv::HashMap, Vec>); impl ExtensionStrategy { fn new() -> ExtensionStrategy { - ExtensionStrategy(HashMap::with_hasher(Fnv::default())) + ExtensionStrategy(fnv::HashMap::default()) } fn add(&mut self, global_index: usize, ext: String) { @@ -745,7 +750,7 @@ impl SuffixStrategy { } #[derive(Clone, Debug)] -struct RequiredExtensionStrategy(HashMap, Vec<(usize, Regex)>, Fnv>); +struct RequiredExtensionStrategy(fnv::HashMap, Vec<(usize, Regex)>>); impl RequiredExtensionStrategy { fn is_match(&self, candidate: &Candidate<'_>) -> bool { @@ -786,8 +791,9 @@ impl RequiredExtensionStrategy { #[derive(Clone, Debug)] struct RegexSetStrategy { - matcher: RegexSet, + matcher: Regex, map: Vec, + // patset: regex_automata::PatternSet, } impl RegexSetStrategy { @@ -800,7 +806,11 @@ impl RegexSetStrategy { candidate: &Candidate<'_>, matches: &mut Vec, ) { - for i in self.matcher.matches(candidate.path.as_bytes()) { + let input = regex_automata::Input::new(candidate.path.as_bytes()); + let mut patset = + regex_automata::PatternSet::new(self.matcher.pattern_len()); + self.matcher.which_overlapping_matches(&input, &mut patset); + for i in patset.iter() { matches.push(self.map[i]); } } @@ -852,12 +862,12 @@ impl MultiStrategyBuilder { #[derive(Clone, Debug)] struct RequiredExtensionStrategyBuilder( - HashMap, Vec<(usize, String)>>, + fnv::HashMap, Vec<(usize, String)>>, ); impl RequiredExtensionStrategyBuilder { fn new() -> RequiredExtensionStrategyBuilder { - RequiredExtensionStrategyBuilder(HashMap::new()) + RequiredExtensionStrategyBuilder(fnv::HashMap::default()) } fn add(&mut self, global_index: usize, ext: String, regex: String) { @@ -868,7 +878,7 @@ impl RequiredExtensionStrategyBuilder { } fn build(self) -> Result { - let mut exts = HashMap::with_hasher(Fnv::default()); + let mut exts = fnv::HashMap::default(); for (ext, regexes) in self.0.into_iter() { exts.insert(ext.clone(), vec![]); for (global_index, regex) in regexes { diff --git a/crates/globset/src/pathutil.rs b/crates/globset/src/pathutil.rs index 522df340..8488e74f 100644 --- a/crates/globset/src/pathutil.rs +++ b/crates/globset/src/pathutil.rs @@ -4,12 +4,10 @@ use bstr::{ByteSlice, ByteVec}; /// The final component of the path, if it is a normal file. /// -/// If the path terminates in ., .., or consists solely of a root of prefix, -/// file_name will return None. -pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option> { - if path.is_empty() { - return None; - } else if path.last_byte() == Some(b'.') { +/// If the path terminates in `.`, `..`, or consists solely of a root of +/// prefix, file_name will return None. +pub(crate) fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option> { + if path.last_byte().map_or(true, |b| b == b'.') { return None; } let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0); @@ -39,7 +37,9 @@ pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option> { /// a pattern like `*.rs` is obviously trying to match files with a `rs` /// extension, but it also matches files like `.rs`, which doesn't have an /// extension according to std::path::Path::extension. -pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option> { +pub(crate) fn file_name_ext<'a>( + name: &Cow<'a, [u8]>, +) -> Option> { if name.is_empty() { return None; } @@ -60,7 +60,7 @@ pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option> { /// Normalizes a path to use `/` as a separator everywhere, even on platforms /// that recognize other characters as separators. #[cfg(unix)] -pub fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> { +pub(crate) fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> { // UNIX only uses /, so we're good. path } @@ -68,11 +68,11 @@ pub fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> { /// Normalizes a path to use `/` as a separator everywhere, even on platforms /// that recognize other characters as separators. #[cfg(not(unix))] -pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> { +pub(crate) fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> { use std::path::is_separator; for i in 0..path.len() { - if path[i] == b'/' || !is_separator(path[i] as char) { + if path[i] == b'/' || !is_separator(char::from(path[i])) { continue; } path.to_mut()[i] = b'/';