1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-07-11 14:30:24 +02:00

Move glob implementation to new crate.

It is isolated and complex enough that it deserves attention all on its
own. It's also eminently reusable.
This commit is contained in:
Andrew Gallant
2016-09-30 19:42:41 -04:00
parent b9d5f22a4d
commit fdf24317ac
9 changed files with 88 additions and 24 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
tags tags
target target
/grep/Cargo.lock /grep/Cargo.lock
/globset/Cargo.lock

12
Cargo.lock generated
View File

@ -5,8 +5,8 @@ dependencies = [
"deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)", "docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", "glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"globset 0.1.0",
"grep 0.1.3", "grep 0.1.3",
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -78,6 +78,16 @@ name = "glob"
version = "0.2.11" version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "globset"
version = "0.1.0"
dependencies = [
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "grep" name = "grep"
version = "0.1.3" version = "0.1.3"

View File

@ -26,7 +26,7 @@ path = "tests/tests.rs"
deque = "0.3" deque = "0.3"
docopt = "0.6" docopt = "0.6"
env_logger = "0.3" env_logger = "0.3"
fnv = "1.0" globset = { version = "0.1.0", path = "globset" }
grep = { version = "0.1.3", path = "grep" } grep = { version = "0.1.3", path = "grep" }
lazy_static = "0.2" lazy_static = "0.2"
libc = "0.2" libc = "0.2"

10
globset/Cargo.toml Normal file
View File

@ -0,0 +1,10 @@
[package]
name = "globset"
version = "0.1.0"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
[dependencies]
fnv = "1.0"
lazy_static = "0.2"
memchr = "0.1"
regex = "0.1.77"

View File

@ -26,6 +26,12 @@ to make its way into `glob` proper.
// at the .gitignore for the chromium repo---just about every pattern satisfies // at the .gitignore for the chromium repo---just about every pattern satisfies
// that assumption.) // that assumption.)
extern crate fnv;
#[macro_use]
extern crate lazy_static;
extern crate memchr;
extern crate regex;
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
use std::error::Error as StdError; use std::error::Error as StdError;
@ -36,12 +42,12 @@ use std::iter;
use std::path::Path; use std::path::Path;
use std::str; use std::str;
use fnv;
use regex;
use regex::bytes::Regex; use regex::bytes::Regex;
use pathutil::file_name; use pathutil::file_name;
mod pathutil;
lazy_static! { lazy_static! {
static ref FILE_SEPARATORS: String = regex::quote(r"/\"); static ref FILE_SEPARATORS: String = regex::quote(r"/\");
} }

38
globset/src/pathutil.rs Normal file
View File

@ -0,0 +1,38 @@
use std::ffi::OsStr;
use std::path::Path;
/// The final component of the path, if it is a normal file.
///
/// If the path terminates in ., .., or consists solely of a root of prefix,
/// file_name will return None.
#[cfg(unix)]
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
path: &'a P,
) -> Option<&'a OsStr> {
use std::os::unix::ffi::OsStrExt;
use memchr::memrchr;
let path = path.as_ref().as_os_str().as_bytes();
if path.is_empty() {
return None;
} else if path.len() == 1 && path[0] == b'.' {
return None;
} else if path.last() == Some(&b'.') {
return None;
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
return None;
}
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
Some(OsStr::from_bytes(&path[last_slash..]))
}
/// The final component of the path, if it is a normal file.
///
/// If the path terminates in ., .., or consists solely of a root of prefix,
/// file_name will return None.
#[cfg(not(unix))]
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
path: &'a P,
) -> Option<&'a OsStr> {
path.as_ref().file_name()
}

View File

@ -28,15 +28,15 @@ use std::fs::File;
use std::io::{self, BufRead}; use std::io::{self, BufRead};
use std::path::{Path, PathBuf}; use std::path::{Path, PathBuf};
use globset;
use regex; use regex;
use glob;
use pathutil::{is_file_name, strip_prefix}; use pathutil::{is_file_name, strip_prefix};
/// Represents an error that can occur when parsing a gitignore file. /// Represents an error that can occur when parsing a gitignore file.
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
Glob(glob::Error), Glob(globset::Error),
Regex(regex::Error), Regex(regex::Error),
Io(io::Error), Io(io::Error),
} }
@ -61,8 +61,8 @@ impl fmt::Display for Error {
} }
} }
impl From<glob::Error> for Error { impl From<globset::Error> for Error {
fn from(err: glob::Error) -> Error { fn from(err: globset::Error) -> Error {
Error::Glob(err) Error::Glob(err)
} }
} }
@ -82,7 +82,7 @@ impl From<io::Error> for Error {
/// Gitignore is a matcher for the glob patterns in a single gitignore file. /// Gitignore is a matcher for the glob patterns in a single gitignore file.
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Gitignore { pub struct Gitignore {
set: glob::Set, set: globset::Set,
root: PathBuf, root: PathBuf,
patterns: Vec<Pattern>, patterns: Vec<Pattern>,
num_ignores: u64, num_ignores: u64,
@ -207,7 +207,7 @@ impl<'a> Match<'a> {
/// GitignoreBuilder constructs a matcher for a single set of globs from a /// GitignoreBuilder constructs a matcher for a single set of globs from a
/// .gitignore file. /// .gitignore file.
pub struct GitignoreBuilder { pub struct GitignoreBuilder {
builder: glob::SetBuilder, builder: globset::SetBuilder,
root: PathBuf, root: PathBuf,
patterns: Vec<Pattern>, patterns: Vec<Pattern>,
} }
@ -237,7 +237,7 @@ impl GitignoreBuilder {
pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder { pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
let root = strip_prefix("./", root.as_ref()).unwrap_or(root.as_ref()); let root = strip_prefix("./", root.as_ref()).unwrap_or(root.as_ref());
GitignoreBuilder { GitignoreBuilder {
builder: glob::SetBuilder::new(), builder: globset::SetBuilder::new(),
root: root.to_path_buf(), root: root.to_path_buf(),
patterns: vec![], patterns: vec![],
} }
@ -299,7 +299,7 @@ impl GitignoreBuilder {
whitelist: false, whitelist: false,
only_dir: false, only_dir: false,
}; };
let mut opts = glob::MatchOptions::default(); let mut opts = globset::MatchOptions::default();
let has_slash = line.chars().any(|c| c == '/'); let has_slash = line.chars().any(|c| c == '/');
let is_absolute = line.chars().nth(0).unwrap() == '/'; let is_absolute = line.chars().nth(0).unwrap() == '/';
if line.starts_with("\\!") || line.starts_with("\\#") { if line.starts_with("\\!") || line.starts_with("\\#") {

View File

@ -1,7 +1,7 @@
extern crate deque; extern crate deque;
extern crate docopt; extern crate docopt;
extern crate env_logger; extern crate env_logger;
extern crate fnv; extern crate globset;
extern crate grep; extern crate grep;
#[cfg(windows)] #[cfg(windows)]
extern crate kernel32; extern crate kernel32;
@ -61,7 +61,6 @@ macro_rules! eprintln {
mod args; mod args;
mod atty; mod atty;
mod gitignore; mod gitignore;
mod glob;
mod ignore; mod ignore;
mod out; mod out;
mod pathutil; mod pathutil;

View File

@ -11,7 +11,7 @@ use std::path::Path;
use regex; use regex;
use gitignore::{Match, Pattern}; use gitignore::{Match, Pattern};
use glob::{self, MatchOptions}; use globset::{self, MatchOptions};
const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[ const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
("asm", &["*.asm", "*.s", "*.S"]), ("asm", &["*.asm", "*.s", "*.S"]),
@ -93,7 +93,7 @@ pub enum Error {
/// A user specified file type definition could not be parsed. /// A user specified file type definition could not be parsed.
InvalidDefinition, InvalidDefinition,
/// There was an error building the matcher (probably a bad glob). /// There was an error building the matcher (probably a bad glob).
Glob(glob::Error), Glob(globset::Error),
/// There was an error compiling a glob as a regex. /// There was an error compiling a glob as a regex.
Regex(regex::Error), Regex(regex::Error),
} }
@ -125,8 +125,8 @@ impl fmt::Display for Error {
} }
} }
impl From<glob::Error> for Error { impl From<globset::Error> for Error {
fn from(err: glob::Error) -> Error { fn from(err: globset::Error) -> Error {
Error::Glob(err) Error::Glob(err)
} }
} }
@ -160,8 +160,8 @@ impl FileTypeDef {
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Types { pub struct Types {
defs: Vec<FileTypeDef>, defs: Vec<FileTypeDef>,
selected: Option<glob::SetYesNo>, selected: Option<globset::SetYesNo>,
negated: Option<glob::SetYesNo>, negated: Option<globset::SetYesNo>,
has_selected: bool, has_selected: bool,
unmatched_pat: Pattern, unmatched_pat: Pattern,
} }
@ -174,8 +174,8 @@ impl Types {
/// If has_selected is true, then at least one file type was selected. /// If has_selected is true, then at least one file type was selected.
/// Therefore, any non-matches should be ignored. /// Therefore, any non-matches should be ignored.
fn new( fn new(
selected: Option<glob::SetYesNo>, selected: Option<globset::SetYesNo>,
negated: Option<glob::SetYesNo>, negated: Option<globset::SetYesNo>,
has_selected: bool, has_selected: bool,
defs: Vec<FileTypeDef>, defs: Vec<FileTypeDef>,
) -> Types { ) -> Types {
@ -271,7 +271,7 @@ impl TypesBuilder {
if self.selected.is_empty() { if self.selected.is_empty() {
None None
} else { } else {
let mut bset = glob::SetBuilder::new(); let mut bset = globset::SetBuilder::new();
for name in &self.selected { for name in &self.selected {
let globs = match self.types.get(name) { let globs = match self.types.get(name) {
Some(globs) => globs, Some(globs) => globs,
@ -290,7 +290,7 @@ impl TypesBuilder {
if self.negated.is_empty() { if self.negated.is_empty() {
None None
} else { } else {
let mut bset = glob::SetBuilder::new(); let mut bset = globset::SetBuilder::new();
for name in &self.negated { for name in &self.negated {
let globs = match self.types.get(name) { let globs = match self.types.get(name) {
Some(globs) => globs, Some(globs) => globs,