1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-01-19 05:49:14 +02:00

Move glob implementation to new crate.

It is isolated and complex enough that it deserves attention all on its
own. It's also eminently reusable.
This commit is contained in:
Andrew Gallant 2016-09-30 19:42:41 -04:00
parent b9d5f22a4d
commit fdf24317ac
9 changed files with 88 additions and 24 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
tags
target
/grep/Cargo.lock
/globset/Cargo.lock

12
Cargo.lock generated
View File

@ -5,8 +5,8 @@ dependencies = [
"deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"globset 0.1.0",
"grep 0.1.3",
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -78,6 +78,16 @@ name = "glob"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "globset"
version = "0.1.0"
dependencies = [
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "grep"
version = "0.1.3"

View File

@ -26,7 +26,7 @@ path = "tests/tests.rs"
deque = "0.3"
docopt = "0.6"
env_logger = "0.3"
fnv = "1.0"
globset = { version = "0.1.0", path = "globset" }
grep = { version = "0.1.3", path = "grep" }
lazy_static = "0.2"
libc = "0.2"

10
globset/Cargo.toml Normal file
View File

@ -0,0 +1,10 @@
[package]
name = "globset"
version = "0.1.0"
authors = ["Andrew Gallant <jamslam@gmail.com>"]
[dependencies]
fnv = "1.0"
lazy_static = "0.2"
memchr = "0.1"
regex = "0.1.77"

View File

@ -26,6 +26,12 @@ to make its way into `glob` proper.
// at the .gitignore for the chromium repo---just about every pattern satisfies
// that assumption.)
extern crate fnv;
#[macro_use]
extern crate lazy_static;
extern crate memchr;
extern crate regex;
use std::borrow::Cow;
use std::collections::HashMap;
use std::error::Error as StdError;
@ -36,12 +42,12 @@ use std::iter;
use std::path::Path;
use std::str;
use fnv;
use regex;
use regex::bytes::Regex;
use pathutil::file_name;
mod pathutil;
lazy_static! {
static ref FILE_SEPARATORS: String = regex::quote(r"/\");
}

38
globset/src/pathutil.rs Normal file
View File

@ -0,0 +1,38 @@
use std::ffi::OsStr;
use std::path::Path;
/// The final component of the path, if it is a normal file.
///
/// If the path terminates in ., .., or consists solely of a root of prefix,
/// file_name will return None.
#[cfg(unix)]
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
path: &'a P,
) -> Option<&'a OsStr> {
use std::os::unix::ffi::OsStrExt;
use memchr::memrchr;
let path = path.as_ref().as_os_str().as_bytes();
if path.is_empty() {
return None;
} else if path.len() == 1 && path[0] == b'.' {
return None;
} else if path.last() == Some(&b'.') {
return None;
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
return None;
}
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
Some(OsStr::from_bytes(&path[last_slash..]))
}
/// The final component of the path, if it is a normal file.
///
/// If the path terminates in ., .., or consists solely of a root of prefix,
/// file_name will return None.
#[cfg(not(unix))]
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
path: &'a P,
) -> Option<&'a OsStr> {
path.as_ref().file_name()
}

View File

@ -28,15 +28,15 @@ use std::fs::File;
use std::io::{self, BufRead};
use std::path::{Path, PathBuf};
use globset;
use regex;
use glob;
use pathutil::{is_file_name, strip_prefix};
/// Represents an error that can occur when parsing a gitignore file.
#[derive(Debug)]
pub enum Error {
Glob(glob::Error),
Glob(globset::Error),
Regex(regex::Error),
Io(io::Error),
}
@ -61,8 +61,8 @@ impl fmt::Display for Error {
}
}
impl From<glob::Error> for Error {
fn from(err: glob::Error) -> Error {
impl From<globset::Error> for Error {
fn from(err: globset::Error) -> Error {
Error::Glob(err)
}
}
@ -82,7 +82,7 @@ impl From<io::Error> for Error {
/// Gitignore is a matcher for the glob patterns in a single gitignore file.
#[derive(Clone, Debug)]
pub struct Gitignore {
set: glob::Set,
set: globset::Set,
root: PathBuf,
patterns: Vec<Pattern>,
num_ignores: u64,
@ -207,7 +207,7 @@ impl<'a> Match<'a> {
/// GitignoreBuilder constructs a matcher for a single set of globs from a
/// .gitignore file.
pub struct GitignoreBuilder {
builder: glob::SetBuilder,
builder: globset::SetBuilder,
root: PathBuf,
patterns: Vec<Pattern>,
}
@ -237,7 +237,7 @@ impl GitignoreBuilder {
pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
let root = strip_prefix("./", root.as_ref()).unwrap_or(root.as_ref());
GitignoreBuilder {
builder: glob::SetBuilder::new(),
builder: globset::SetBuilder::new(),
root: root.to_path_buf(),
patterns: vec![],
}
@ -299,7 +299,7 @@ impl GitignoreBuilder {
whitelist: false,
only_dir: false,
};
let mut opts = glob::MatchOptions::default();
let mut opts = globset::MatchOptions::default();
let has_slash = line.chars().any(|c| c == '/');
let is_absolute = line.chars().nth(0).unwrap() == '/';
if line.starts_with("\\!") || line.starts_with("\\#") {

View File

@ -1,7 +1,7 @@
extern crate deque;
extern crate docopt;
extern crate env_logger;
extern crate fnv;
extern crate globset;
extern crate grep;
#[cfg(windows)]
extern crate kernel32;
@ -61,7 +61,6 @@ macro_rules! eprintln {
mod args;
mod atty;
mod gitignore;
mod glob;
mod ignore;
mod out;
mod pathutil;

View File

@ -11,7 +11,7 @@ use std::path::Path;
use regex;
use gitignore::{Match, Pattern};
use glob::{self, MatchOptions};
use globset::{self, MatchOptions};
const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
("asm", &["*.asm", "*.s", "*.S"]),
@ -93,7 +93,7 @@ pub enum Error {
/// A user specified file type definition could not be parsed.
InvalidDefinition,
/// There was an error building the matcher (probably a bad glob).
Glob(glob::Error),
Glob(globset::Error),
/// There was an error compiling a glob as a regex.
Regex(regex::Error),
}
@ -125,8 +125,8 @@ impl fmt::Display for Error {
}
}
impl From<glob::Error> for Error {
fn from(err: glob::Error) -> Error {
impl From<globset::Error> for Error {
fn from(err: globset::Error) -> Error {
Error::Glob(err)
}
}
@ -160,8 +160,8 @@ impl FileTypeDef {
#[derive(Clone, Debug)]
pub struct Types {
defs: Vec<FileTypeDef>,
selected: Option<glob::SetYesNo>,
negated: Option<glob::SetYesNo>,
selected: Option<globset::SetYesNo>,
negated: Option<globset::SetYesNo>,
has_selected: bool,
unmatched_pat: Pattern,
}
@ -174,8 +174,8 @@ impl Types {
/// If has_selected is true, then at least one file type was selected.
/// Therefore, any non-matches should be ignored.
fn new(
selected: Option<glob::SetYesNo>,
negated: Option<glob::SetYesNo>,
selected: Option<globset::SetYesNo>,
negated: Option<globset::SetYesNo>,
has_selected: bool,
defs: Vec<FileTypeDef>,
) -> Types {
@ -271,7 +271,7 @@ impl TypesBuilder {
if self.selected.is_empty() {
None
} else {
let mut bset = glob::SetBuilder::new();
let mut bset = globset::SetBuilder::new();
for name in &self.selected {
let globs = match self.types.get(name) {
Some(globs) => globs,
@ -290,7 +290,7 @@ impl TypesBuilder {
if self.negated.is_empty() {
None
} else {
let mut bset = glob::SetBuilder::new();
let mut bset = globset::SetBuilder::new();
for name in &self.negated {
let globs = match self.types.get(name) {
Some(globs) => globs,