Finish overhaul of glob matching.

This commit completes the initial move of glob matching to an external crate, including fixing up cross platform support, polishing the external crate for others to use and fixing a number of bugs in the process. Fixes #87, #127, #131
2025-08-04 21:52:54 +02:00 · 2016-10-10 19:16:52 -04:00
parent bc5accc035
commit e96d93034a
13 changed files with 585 additions and 362 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -5,7 +5,6 @@ dependencies = [
 "deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)",
 "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
- "glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
 "globset 0.1.0",
 "grep 0.1.3",
 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -73,11 +72,6 @@ dependencies = [
 "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

-[[package]]
-name = "glob"
-version = "0.2.11"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-
 [[package]]
 name = "globset"
 version = "0.1.0"
@ -250,7 +244,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
 "checksum fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc484842f1e2884faf56f529f960cc12ad8c71ce96cc7abba0a067c98fee344"
 "checksum fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "bcd414e5a1a979b931bb92f41b7a54106d3f6d2e6c253e9ce943b7cd468251ef"
-"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
 "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
 "checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f"
 "checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -46,8 +46,5 @@ winapi = "0.2"
 [features]
 simd-accel = ["regex/simd-accel"]

-[dev-dependencies]
-glob = "0.2"
-
 [profile.release]
 debug = true
--- a/benches/README.md
+++ b/benches/README.md
@ -1,5 +0,0 @@
-These are internal microbenchmarks for tracking the peformance of individual
-components inside of ripgrep. At the moment, they aren't heavily used.
-
-For performance benchmarks of ripgrep proper, see the sibling `benchsuite`
-directory.
--- a/ci/script.sh
+++ b/ci/script.sh
@ -19,6 +19,10 @@ run_test_suite() {
    cargo clean --target $TARGET --verbose
    cargo build --target $TARGET --verbose
    cargo test --target $TARGET --verbose
+    cargo build --target $TARGET --verbose --manifest-path grep/Cargo.toml
+    cargo test --target $TARGET --verbose --manifest-path grep/Cargo.toml
+    cargo build --target $TARGET --verbose --manifest-path globset/Cargo.toml
+    cargo test --target $TARGET --verbose --manifest-path globset/Cargo.toml

    # sanity check the file type
    file target/$TARGET/debug/rg
--- a/globset/Cargo.toml
+++ b/globset/Cargo.toml
@ -3,6 +3,10 @@ name = "globset"
 version = "0.1.0"
 authors = ["Andrew Gallant <jamslam@gmail.com>"]

+[lib]
+name = "globset"
+bench = false
+
 [dependencies]
 aho-corasick = "0.5.3"
 fnv = "1.0"
@ -10,3 +14,6 @@ lazy_static = "0.2"
 log = "0.3"
 memchr = "0.1"
 regex = "0.1.77"
+
+[dev-dependencies]
+glob = "0.2"
--- a/globset/README.md
+++ b/globset/README.md
@ -0,0 +1,122 @@
+globset
+=======
+Cross platform single glob and glob set matching. Glob set matching is the
+process of matching one or more glob patterns against a single candidate path
+simultaneously, and returning all of the globs that matched.
+
+[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.png)](https://travis-ci.org/BurntSushi/ripgrep)
+[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep)
+[![](https://img.shields.io/crates/v/globset.svg)](https://crates.io/crates/globset)
+
+Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
+
+### Documentation
+
+[https://docs.rs/globset](https://docs.rs/globset)
+
+### Usage
+
+Add this to your `Cargo.toml`:
+
+```toml
+[dependencies]
+globset = "0.1"
+```
+
+and this to your crate root:
+
+```rust
+extern crate globset;
+```
+
+### Example: one glob
+
+This example shows how to match a single glob against a single file path.
+
+```rust
+use globset::Glob;
+
+let glob = try!(Glob::new("*.rs")).compile_matcher();
+
+assert!(glob.is_match("foo.rs"));
+assert!(glob.is_match("foo/bar.rs"));
+assert!(!glob.is_match("Cargo.toml"));
+```
+
+### Example: configuring a glob matcher
+
+This example shows how to use a `GlobBuilder` to configure aspects of match
+semantics. In this example, we prevent wildcards from matching path separators.
+
+```rust
+use globset::GlobBuilder;
+
+let glob = try!(GlobBuilder::new("*.rs")
+    .literal_separator(true).build()).compile_matcher();
+
+assert!(glob.is_match("foo.rs"));
+assert!(!glob.is_match("foo/bar.rs")); // no longer matches
+assert!(!glob.is_match("Cargo.toml"));
+```
+
+### Example: match multiple globs at once
+
+This example shows how to match multiple glob patterns at once.
+
+```rust
+use globset::{Glob, GlobSetBuilder};
+
+let mut builder = GlobSetBuilder::new();
+// A GlobBuilder can be used to configure each glob's match semantics
+// independently.
+builder.add(try!(Glob::new("*.rs")));
+builder.add(try!(Glob::new("src/lib.rs")));
+builder.add(try!(Glob::new("src/**/foo.rs")));
+let set = try!(builder.build());
+
+assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
+```
+
+### Performance
+
+This crate implements globs by converting them to regular expressions, and
+executing them with the
+[`regex`](https://github.com/rust-lang-nursery/regex)
+crate.
+
+For single glob matching, performance of this crate should be roughly on par
+with the performance of the
+[`glob`](https://github.com/rust-lang-nursery/glob)
+crate. (`*_regex` correspond to benchmarks for this library while `*_glob`
+correspond to benchmarks for the `glob` library.)
+Optimizations in the `regex` crate may propel this library past `glob`,
+particularly when matching longer paths.
+
+```
+test ext_glob             ... bench:         425 ns/iter (+/- 21)
+test ext_regex            ... bench:         175 ns/iter (+/- 10)
+test long_glob            ... bench:         182 ns/iter (+/- 11)
+test long_regex           ... bench:         173 ns/iter (+/- 10)
+test short_glob           ... bench:          69 ns/iter (+/- 4)
+test short_regex          ... bench:          83 ns/iter (+/- 2)
+```
+
+The primary performance advantage of this crate is when matching multiple
+globs against a single path. With the `glob` crate, one must match each glob
+synchronously, one after the other. In this crate, many can be matched
+simultaneously. For example:
+
+```
+test many_short_glob      ... bench:       1,063 ns/iter (+/- 47)
+test many_short_regex_set ... bench:         186 ns/iter (+/- 11)
+```
+
+### Comparison with the [`glob`](https://github.com/rust-lang-nursery/glob) crate
+
+* Supports alternate "or" globs, e.g., `*.{foo,bar}`.
+* Can match non-UTF-8 file paths correctly.
+* Supports matching multiple globs at once.
+* Doesn't provide a recursive directory iterator of matching file paths,
+  although I believe this crate should grow one eventually.
+* Supports case insensitive and require-literal-separator match options, but
+  **doesn't** support the require-literal-leading-dot option.
--- a/globset/benches/bench.rs
+++ b/globset/benches/bench.rs
@ -5,37 +5,50 @@ tool itself, see the benchsuite directory.
 #![feature(test)]

 extern crate glob;
+extern crate globset;
 #[macro_use]
 extern crate lazy_static;
 extern crate regex;
 extern crate test;

+use globset::{Candidate, Glob, GlobMatcher, GlobSet, GlobSetBuilder};
+
+const EXT: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";
+const EXT_PAT: &'static str = "*.txt";
+
 const SHORT: &'static str = "some/needle.txt";
 const SHORT_PAT: &'static str = "some/**/needle.txt";

 const LONG: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";
 const LONG_PAT: &'static str = "some/**/needle.txt";

-#[allow(dead_code, unused_variables)]
-#[path = "../src/glob.rs"]
-mod reglob;
-
 fn new_glob(pat: &str) -> glob::Pattern {
    glob::Pattern::new(pat).unwrap()
 }

-fn new_reglob(pat: &str) -> reglob::Set {
-    let mut builder = reglob::SetBuilder::new();
-    builder.add(pat).unwrap();
+fn new_reglob(pat: &str) -> GlobMatcher {
+    Glob::new(pat).unwrap().compile_matcher()
+}
+
+fn new_reglob_many(pats: &[&str]) -> GlobSet {
+    let mut builder = GlobSetBuilder::new();
+    for pat in pats {
+        builder.add(Glob::new(pat).unwrap());
+    }
    builder.build().unwrap()
 }

-fn new_reglob_many(pats: &[&str]) -> reglob::Set {
-    let mut builder = reglob::SetBuilder::new();
-    for pat in pats {
-        builder.add(pat).unwrap();
-    }
-    builder.build().unwrap()
+#[bench]
+fn ext_glob(b: &mut test::Bencher) {
+    let pat = new_glob(EXT_PAT);
+    b.iter(|| assert!(pat.matches(EXT)));
+}
+
+#[bench]
+fn ext_regex(b: &mut test::Bencher) {
+    let set = new_reglob(EXT_PAT);
+    let cand = Candidate::new(EXT);
+    b.iter(|| assert!(set.is_match_candidate(&cand)));
 }

 #[bench]
@ -47,7 +60,8 @@ fn short_glob(b: &mut test::Bencher) {
 #[bench]
 fn short_regex(b: &mut test::Bencher) {
    let set = new_reglob(SHORT_PAT);
-    b.iter(|| assert!(set.is_match(SHORT)));
+    let cand = Candidate::new(SHORT);
+    b.iter(|| assert!(set.is_match_candidate(&cand)));
 }

 #[bench]
@ -59,7 +73,8 @@ fn long_glob(b: &mut test::Bencher) {
 #[bench]
 fn long_regex(b: &mut test::Bencher) {
    let set = new_reglob(LONG_PAT);
-    b.iter(|| assert!(set.is_match(LONG)));
+    let cand = Candidate::new(LONG);
+    b.iter(|| assert!(set.is_match_candidate(&cand)));
 }

 const MANY_SHORT_GLOBS: &'static [&'static str] = &[
@ -101,26 +116,3 @@ fn many_short_regex_set(b: &mut test::Bencher) {
    let set = new_reglob_many(MANY_SHORT_GLOBS);
    b.iter(|| assert_eq!(2, set.matches(MANY_SHORT_SEARCH).iter().count()));
 }
-
-// This is the fastest on my system (beating many_glob by about 2x). This
-// suggests that a RegexSet needs quite a few regexes (or a larger haystack)
-// in order for it to scale.
-//
-// TODO(burntsushi): come up with a benchmark that uses more complex patterns
-// or a longer haystack.
-#[bench]
-fn many_short_regex_pattern(b: &mut test::Bencher) {
-    let pats: Vec<_> = MANY_SHORT_GLOBS.iter().map(|&s| {
-        let pat = reglob::Pattern::new(s).unwrap();
-        regex::Regex::new(&pat.to_regex()).unwrap()
-    }).collect();
-    b.iter(|| {
-        let mut count = 0;
-        for pat in &pats {
-            if pat.is_match(MANY_SHORT_SEARCH) {
-                count += 1;
-            }
-        }
-        assert_eq!(2, count);
-    })
-}
--- a/globset/src/pattern.rs
+++ b/globset/src/pattern.rs
@ -2,14 +2,13 @@ use std::ffi::{OsStr, OsString};
 use std::fmt;
 use std::iter;
 use std::ops::{Deref, DerefMut};
-use std::path::Path;
+use std::path::{Path, is_separator};
 use std::str;

 use regex;
 use regex::bytes::Regex;

-use {Error, FILE_SEPARATORS, new_regex};
-use pathutil::path_bytes;
+use {Candidate, Error, new_regex};

 /// Describes a matching strategy for a particular pattern.
 ///
@ -54,7 +53,7 @@ pub enum MatchStrategy {

 impl MatchStrategy {
    /// Returns a matching strategy for the given pattern.
-    pub fn new(pat: &Pattern) -> MatchStrategy {
+    pub fn new(pat: &Glob) -> MatchStrategy {
        if let Some(lit) = pat.basename_literal() {
            MatchStrategy::BasenameLiteral(lit)
        } else if let Some(lit) = pat.literal() {
@ -73,19 +72,19 @@ impl MatchStrategy {
    }
 }

-/// Pattern represents a successfully parsed shell glob pattern.
+/// Glob represents a successfully parsed shell glob pattern.
 ///
 /// It cannot be used directly to match file paths, but it can be converted
-/// to a regular expression string.
+/// to a regular expression string or a matcher.
 #[derive(Clone, Debug, Eq, PartialEq)]
-pub struct Pattern {
+pub struct Glob {
    glob: String,
    re: String,
-    opts: PatternOptions,
+    opts: GlobOptions,
    tokens: Tokens,
 }

-impl fmt::Display for Pattern {
+impl fmt::Display for Glob {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        self.glob.fmt(f)
    }
@ -93,52 +92,55 @@ impl fmt::Display for Pattern {

 /// A matcher for a single pattern.
 #[derive(Clone, Debug)]
-pub struct PatternMatcher {
+pub struct GlobMatcher {
    /// The underlying pattern.
-    pat: Pattern,
+    pat: Glob,
    /// The pattern, as a compiled regex.
    re: Regex,
 }

-impl PatternMatcher {
+impl GlobMatcher {
    /// Tests whether the given path matches this pattern or not.
    pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
-        self.re.is_match(&*path_bytes(path.as_ref()))
+        self.is_match_candidate(&Candidate::new(path.as_ref()))
+    }
+
+    /// Tests whether the given path matches this pattern or not.
+    pub fn is_match_candidate(&self, path: &Candidate) -> bool {
+        self.re.is_match(&path.path)
    }
 }

 /// A strategic matcher for a single pattern.
 #[cfg(test)]
 #[derive(Clone, Debug)]
-struct PatternStrategic {
+struct GlobStrategic {
    /// The match strategy to use.
    strategy: MatchStrategy,
    /// The underlying pattern.
-    pat: Pattern,
+    pat: Glob,
    /// The pattern, as a compiled regex.
    re: Regex,
 }

 #[cfg(test)]
-impl PatternStrategic {
+impl GlobStrategic {
    /// Tests whether the given path matches this pattern or not.
-    pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
-        use pathutil::file_name_ext;
+    fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
+        self.is_match_candidate(&Candidate::new(path.as_ref()))
+    }

-        let cow_path = path_bytes(path.as_ref());
-        let byte_path = &*cow_path;
+    /// Tests whether the given path matches this pattern or not.
+    fn is_match_candidate(&self, candidate: &Candidate) -> bool {
+        let byte_path = &*candidate.path;

        match self.strategy {
            MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
            MatchStrategy::BasenameLiteral(ref lit) => {
-                let lit = OsStr::new(lit);
-                path.as_ref().file_name().map(|n| n == lit).unwrap_or(false)
+                lit.as_bytes() == &*candidate.basename
            }
            MatchStrategy::Extension(ref ext) => {
-                path.as_ref().file_name()
-                    .and_then(file_name_ext)
-                    .map(|got| got == ext)
-                    .unwrap_or(false)
+                candidate.ext == ext
            }
            MatchStrategy::Prefix(ref pre) => {
                starts_with(pre.as_bytes(), byte_path)
@ -150,10 +152,7 @@ impl PatternStrategic {
                ends_with(suffix.as_bytes(), byte_path)
            }
            MatchStrategy::RequiredExtension(ref ext) => {
-                path.as_ref().file_name()
-                    .and_then(file_name_ext)
-                    .map(|got| got == ext && self.re.is_match(byte_path))
-                    .unwrap_or(false)
+                candidate.ext == ext && self.re.is_match(byte_path)
            }
            MatchStrategy::Regex => self.re.is_match(byte_path),
        }
@ -167,15 +166,15 @@ impl PatternStrategic {
 ///
 /// The lifetime `'a` refers to the lifetime of the pattern string.
 #[derive(Clone, Debug)]
-pub struct PatternBuilder<'a> {
+pub struct GlobBuilder<'a> {
    /// The glob pattern to compile.
    glob: &'a str,
    /// Options for the pattern.
-    opts: PatternOptions,
+    opts: GlobOptions,
 }

 #[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
-struct PatternOptions {
+struct GlobOptions {
    /// Whether to match case insensitively.
    case_insensitive: bool,
    /// Whether to require a literal separator to match a separator in a file
@ -210,17 +209,17 @@ enum Token {
    Alternates(Vec<Tokens>),
 }

-impl Pattern {
+impl Glob {
    /// Builds a new pattern with default options.
-    pub fn new(glob: &str) -> Result<Pattern, Error> {
-        PatternBuilder::new(glob).build()
+    pub fn new(glob: &str) -> Result<Glob, Error> {
+        GlobBuilder::new(glob).build()
    }

    /// Returns a matcher for this pattern.
-    pub fn compile_matcher(&self) -> PatternMatcher {
+    pub fn compile_matcher(&self) -> GlobMatcher {
        let re = new_regex(&self.re)
            .expect("regex compilation shouldn't fail");
-        PatternMatcher {
+        GlobMatcher {
            pat: self.clone(),
            re: re,
        }
@ -230,13 +229,13 @@ impl Pattern {
    ///
    /// This isn't exposed because it's not clear whether it's actually
    /// faster than just running a regex for a *single* pattern. If it
-    /// is faster, then PatternMatcher should do it automatically.
+    /// is faster, then GlobMatcher should do it automatically.
    #[cfg(test)]
-    fn compile_strategic_matcher(&self) -> PatternStrategic {
+    fn compile_strategic_matcher(&self) -> GlobStrategic {
        let strategy = MatchStrategy::new(self);
        let re = new_regex(&self.re)
            .expect("regex compilation shouldn't fail");
-        PatternStrategic {
+        GlobStrategic {
            strategy: strategy,
            pat: self.clone(),
            re: re,
@ -253,30 +252,11 @@ impl Pattern {
        &self.re
    }

-    /// Returns true if and only if this pattern only inspects the basename
-    /// of a path.
-    pub fn is_only_basename(&self) -> bool {
-        match self.tokens.get(0) {
-            Some(&Token::RecursivePrefix) => {}
-            _ => return false,
-        }
-        for t in &self.tokens[1..] {
-            match *t {
-                Token::Literal(c) if c == '/' || c == '\\' => return false,
-                Token::RecursivePrefix
-                | Token::RecursiveSuffix
-                | Token::RecursiveZeroOrMore => return false,
-                _ => {}
-            }
-        }
-        true
-    }
-
    /// Returns the pattern as a literal if and only if the pattern must match
    /// an entire path exactly.
    ///
    /// The basic format of these patterns is `{literal}`.
-    pub fn literal(&self) -> Option<String> {
+    fn literal(&self) -> Option<String> {
        if self.opts.case_insensitive {
            return None;
        }
@ -301,7 +281,7 @@ impl Pattern {
    /// std::path::Path::extension returns. Namely, this extension includes
    /// the '.'. Also, paths like `.rs` are considered to have an extension
    /// of `.rs`.
-    pub fn ext(&self) -> Option<OsString> {
+    fn ext(&self) -> Option<OsString> {
        if self.opts.case_insensitive {
            return None;
        }
@ -343,7 +323,7 @@ impl Pattern {
    /// This is like `ext`, but returns an extension even if it isn't sufficent
    /// to imply a match. Namely, if an extension is returned, then it is
    /// necessary but not sufficient for a match.
-    pub fn required_ext(&self) -> Option<OsString> {
+    fn required_ext(&self) -> Option<OsString> {
        if self.opts.case_insensitive {
            return None;
        }
@ -372,7 +352,7 @@ impl Pattern {

    /// Returns a literal prefix of this pattern if the entire pattern matches
    /// if the literal prefix matches.
-    pub fn prefix(&self) -> Option<String> {
+    fn prefix(&self) -> Option<String> {
        if self.opts.case_insensitive {
            return None;
        }
@ -417,7 +397,7 @@ impl Pattern {
    ///
    /// When this returns true, the suffix literal is guaranteed to start with
    /// a `/`.
-    pub fn suffix(&self) -> Option<(String, bool)> {
+    fn suffix(&self) -> Option<(String, bool)> {
        if self.opts.case_insensitive {
            return None;
        }
@ -520,16 +500,7 @@ impl Pattern {
    ///
    /// The basic format of these patterns is `**/{literal}`, where `{literal}`
    /// does not contain a path separator.
-    pub fn basename_literal(&self) -> Option<String> {
-        self.base_literal()
-    }
-
-    /// Returns the pattern as a literal if and only if the pattern exclusiely
-    /// matches the basename of a file path *and* is a literal.
-    ///
-    /// The basic format of these patterns is `**/{literal}`, where `{literal}`
-    /// does not contain a path separator.
-    pub fn base_literal(&self) -> Option<String> {
+    fn basename_literal(&self) -> Option<String> {
        let tokens = match self.basename_tokens() {
            None => return None,
            Some(tokens) => tokens,
@ -543,102 +514,21 @@ impl Pattern {
        }
        Some(lit)
    }
-
-    /// Returns a literal prefix of this pattern if and only if the entire
-    /// pattern matches if the literal prefix matches.
-    pub fn literal_prefix(&self) -> Option<String> {
-        match self.tokens.last() {
-            Some(&Token::ZeroOrMore) => {}
-            _ => return None,
-        }
-        let mut lit = String::new();
-        for t in &self.tokens[0..self.tokens.len()-1] {
-            match *t {
-                Token::Literal(c) => lit.push(c),
-                _ => return None,
-            }
-        }
-        Some(lit)
-    }
-
-    /// Returns a literal suffix of this pattern if and only if the entire
-    /// pattern matches if the literal suffix matches.
-    pub fn literal_suffix(&self) -> Option<String> {
-        match self.tokens.get(0) {
-            Some(&Token::RecursivePrefix) => {}
-            _ => return None,
-        }
-        let start =
-            match self.tokens.get(1) {
-                Some(&Token::ZeroOrMore) => 2,
-                _ => 1,
-            };
-        let mut lit = String::new();
-        for t in &self.tokens[start..] {
-            match *t {
-                Token::Literal(c) => lit.push(c),
-                _ => return None,
-            }
-        }
-        Some(lit)
-    }
-
-    /// Returns a basename literal prefix of this pattern.
-    pub fn base_literal_prefix(&self) -> Option<String> {
-        match self.tokens.get(0) {
-            Some(&Token::RecursivePrefix) => {}
-            _ => return None,
-        }
-        match self.tokens.last() {
-            Some(&Token::ZeroOrMore) => {}
-            _ => return None,
-        }
-        let mut lit = String::new();
-        for t in &self.tokens[1..self.tokens.len()-1] {
-            match *t {
-                Token::Literal(c) if c == '/' || c == '\\' => return None,
-                Token::Literal(c) => lit.push(c),
-                _ => return None,
-            }
-        }
-        Some(lit)
-    }
-
-    /// Returns a basename literal suffix of this pattern.
-    pub fn base_literal_suffix(&self) -> Option<String> {
-        match self.tokens.get(0) {
-            Some(&Token::RecursivePrefix) => {}
-            _ => return None,
-        }
-        match self.tokens.get(1) {
-            Some(&Token::ZeroOrMore) => {}
-            _ => return None,
-        }
-        let mut lit = String::new();
-        for t in &self.tokens[2..] {
-            match *t {
-                Token::Literal(c) if c == '/' || c == '\\' => return None,
-                Token::Literal(c) => lit.push(c),
-                _ => return None,
-            }
-        }
-        Some(lit)
-    }
 }

-impl<'a> PatternBuilder<'a> {
+impl<'a> GlobBuilder<'a> {
    /// Create a new builder for the pattern given.
    ///
    /// The pattern is not compiled until `build` is called.
-    pub fn new(glob: &'a str) -> PatternBuilder<'a> {
-        PatternBuilder {
+    pub fn new(glob: &'a str) -> GlobBuilder<'a> {
+        GlobBuilder {
            glob: glob,
-            opts: PatternOptions::default(),
+            opts: GlobOptions::default(),
        }
    }

    /// Parses and builds the pattern.
-    pub fn build(&self) -> Result<Pattern, Error> {
+    pub fn build(&self) -> Result<Glob, Error> {
        let mut p = Parser {
            stack: vec![Tokens::default()],
            chars: self.glob.chars().peekable(),
@ -652,7 +542,7 @@ impl<'a> PatternBuilder<'a> {
            Err(Error::UnclosedAlternates)
        } else {
            let tokens = p.stack.pop().unwrap();
-            Ok(Pattern {
+            Ok(Glob {
                glob: self.glob.to_string(),
                re: tokens.to_regex_with(&self.opts),
                opts: self.opts,
@ -664,13 +554,13 @@ impl<'a> PatternBuilder<'a> {
    /// Toggle whether the pattern matches case insensitively or not.
    ///
    /// This is disabled by default.
-    pub fn case_insensitive(&mut self, yes: bool) -> &mut PatternBuilder<'a> {
+    pub fn case_insensitive(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
        self.opts.case_insensitive = yes;
        self
    }

    /// Toggle whether a literal `/` is required to match a path separator.
-    pub fn literal_separator(&mut self, yes: bool) -> &mut PatternBuilder<'a> {
+    pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
        self.opts.literal_separator = yes;
        self
    }
@ -680,7 +570,7 @@ impl Tokens {
    /// Convert this pattern to a string that is guaranteed to be a valid
    /// regular expression and will represent the matching semantics of this
    /// glob pattern and the options given.
-    fn to_regex_with(&self, options: &PatternOptions) -> String {
+    fn to_regex_with(&self, options: &GlobOptions) -> String {
        let mut re = String::new();
        re.push_str("(?-u)");
        if options.case_insensitive {
@ -699,43 +589,39 @@ impl Tokens {
        re
    }

-
    fn tokens_to_regex(
        &self,
-        options: &PatternOptions,
+        options: &GlobOptions,
        tokens: &[Token],
        re: &mut String,
    ) {
-        let seps = &*FILE_SEPARATORS;
-
        for tok in tokens {
            match *tok {
                Token::Literal(c) => {
-                    re.push_str(&regex::quote(&c.to_string()));
+                    re.push_str(&char_to_escaped_literal(c));
                }
                Token::Any => {
                    if options.literal_separator {
-                        re.push_str(&format!("[^{}]", seps));
+                        re.push_str("[^/]");
                    } else {
                        re.push_str(".");
                    }
                }
                Token::ZeroOrMore => {
                    if options.literal_separator {
-                        re.push_str(&format!("[^{}]*", seps));
+                        re.push_str("[^/]*");
                    } else {
                        re.push_str(".*");
                    }
                }
                Token::RecursivePrefix => {
-                    re.push_str(&format!("(?:[{sep}]?|.*[{sep}])", sep=seps));
+                    re.push_str("(?:/?|.*/)");
                }
                Token::RecursiveSuffix => {
-                    re.push_str(&format!("(?:[{sep}]?|[{sep}].*)", sep=seps));
+                    re.push_str("(?:/?|/.*)");
                }
                Token::RecursiveZeroOrMore => {
-                    re.push_str(&format!("(?:[{sep}]|[{sep}].*[{sep}])",
-                                         sep=seps));
+                    re.push_str("(?:/|/.*/)");
                }
                Token::Class { negated, ref ranges } => {
                    re.push('[');
@ -745,11 +631,11 @@ impl Tokens {
                    for r in ranges {
                        if r.0 == r.1 {
                            // Not strictly necessary, but nicer to look at.
-                            re.push_str(&regex::quote(&r.0.to_string()));
+                            re.push_str(&char_to_escaped_literal(r.0));
                        } else {
-                            re.push_str(&regex::quote(&r.0.to_string()));
+                            re.push_str(&char_to_escaped_literal(r.0));
                            re.push('-');
-                            re.push_str(&regex::quote(&r.1.to_string()));
+                            re.push_str(&char_to_escaped_literal(r.1));
                        }
                    }
                    re.push(']');
@ -768,6 +654,26 @@ impl Tokens {
    }
 }

+/// Convert a Unicode scalar value to an escaped string suitable for use as
+/// a literal in a non-Unicode regex.
+fn char_to_escaped_literal(c: char) -> String {
+    bytes_to_escaped_literal(&c.to_string().into_bytes())
+}
+
+/// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
+/// code units are converted to their escaped form.
+fn bytes_to_escaped_literal(bs: &[u8]) -> String {
+    let mut s = String::with_capacity(bs.len());
+    for &b in bs {
+        if b <= 0x7F {
+            s.push_str(&regex::quote(&(b as char).to_string()));
+        } else {
+            s.push_str(&format!("\\x{:02x}", b));
+        }
+    }
+    s
+}
+
 struct Parser<'a> {
    stack: Vec<Tokens>,
    chars: iter::Peekable<str::Chars<'a>>,
@ -785,7 +691,14 @@ impl<'a> Parser<'a> {
                '{' => try!(self.push_alternate()),
                '}' => try!(self.pop_alternate()),
                ',' => try!(self.parse_comma()),
-                c => try!(self.push_token(Token::Literal(c))),
+                c => {
+                    if is_separator(c) {
+                        // Normalize all patterns to use / as a separator.
+                        try!(self.push_token(Token::Literal('/')))
+                    } else {
+                        try!(self.push_token(Token::Literal(c)))
+                    }
+                }
            }
        }
        Ok(())
@ -848,13 +761,13 @@ impl<'a> Parser<'a> {
        if !try!(self.have_tokens()) {
            try!(self.push_token(Token::RecursivePrefix));
            let next = self.bump();
-            if !next.is_none() && next != Some('/') {
+            if !next.map(is_separator).unwrap_or(true) {
                return Err(Error::InvalidRecursive);
            }
            return Ok(());
        }
        try!(self.pop_token());
-        if prev != Some('/') {
+        if !prev.map(is_separator).unwrap_or(false) {
            if self.stack.len() <= 1
                || (prev != Some(',') && prev != Some('{')) {
                return Err(Error::InvalidRecursive);
@ -868,8 +781,8 @@ impl<'a> Parser<'a> {
            Some(&',') | Some(&'}') if self.stack.len() >= 2 => {
                self.push_token(Token::RecursiveSuffix)
            }
-            Some(&'/') => {
-                assert!(self.bump() == Some('/'));
+            Some(&c) if is_separator(c) => {
+                assert!(self.bump().map(is_separator).unwrap_or(false));
                self.push_token(Token::RecursiveZeroOrMore)
            }
            _ => Err(Error::InvalidRecursive),
@ -973,8 +886,8 @@ fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
 mod tests {
    use std::ffi::{OsStr, OsString};

-    use {SetBuilder, Error};
-    use super::{Pattern, PatternBuilder, Token};
+    use {GlobSetBuilder, Error};
+    use super::{Glob, GlobBuilder, Token};
    use super::Token::*;

    #[derive(Clone, Copy, Debug, Default)]
@ -987,7 +900,7 @@ mod tests {
        ($name:ident, $pat:expr, $tokens:expr) => {
            #[test]
            fn $name() {
-                let pat = Pattern::new($pat).unwrap();
+                let pat = Glob::new($pat).unwrap();
                assert_eq!($tokens, pat.tokens.0);
            }
        }
@ -997,7 +910,7 @@ mod tests {
        ($name:ident, $pat:expr, $err:expr) => {
            #[test]
            fn $name() {
-                let err = Pattern::new($pat).unwrap_err();
+                let err = Glob::new($pat).unwrap_err();
                assert_eq!($err, err);
            }
        }
@ -1010,7 +923,7 @@ mod tests {
        ($name:ident, $pat:expr, $re:expr, $options:expr) => {
            #[test]
            fn $name() {
-                let pat = PatternBuilder::new($pat)
+                let pat = GlobBuilder::new($pat)
                    .case_insensitive($options.casei)
                    .literal_separator($options.litsep)
                    .build()
@ -1027,14 +940,14 @@ mod tests {
        ($name:ident, $pat:expr, $path:expr, $options:expr) => {
            #[test]
            fn $name() {
-                let pat = PatternBuilder::new($pat)
+                let pat = GlobBuilder::new($pat)
                    .case_insensitive($options.casei)
                    .literal_separator($options.litsep)
                    .build()
                    .unwrap();
                let matcher = pat.compile_matcher();
                let strategic = pat.compile_strategic_matcher();
-                let set = SetBuilder::new().add(pat).build().unwrap();
+                let set = GlobSetBuilder::new().add(pat).build().unwrap();
                assert!(matcher.is_match($path));
                assert!(strategic.is_match($path));
                assert!(set.is_match($path));
@ -1049,14 +962,14 @@ mod tests {
        ($name:ident, $pat:expr, $path:expr, $options:expr) => {
            #[test]
            fn $name() {
-                let pat = PatternBuilder::new($pat)
+                let pat = GlobBuilder::new($pat)
                    .case_insensitive($options.casei)
                    .literal_separator($options.litsep)
                    .build()
                    .unwrap();
                let matcher = pat.compile_matcher();
                let strategic = pat.compile_strategic_matcher();
-                let set = SetBuilder::new().add(pat).build().unwrap();
+                let set = GlobSetBuilder::new().add(pat).build().unwrap();
                assert!(!matcher.is_match($path));
                assert!(!strategic.is_match($path));
                assert!(!set.is_match($path));
@ -1146,8 +1059,8 @@ mod tests {

    toregex!(re_casei, "a", "(?i)^a$", &CASEI);

-    toregex!(re_slash1, "?", r"^[^/\\]$", SLASHLIT);
-    toregex!(re_slash2, "*", r"^[^/\\]*$", SLASHLIT);
+    toregex!(re_slash1, "?", r"^[^/]$", SLASHLIT);
+    toregex!(re_slash2, "*", r"^[^/]*$", SLASHLIT);

    toregex!(re1, "a", "^a$");
    toregex!(re2, "?", "^.$");
@ -1160,6 +1073,7 @@ mod tests {
    toregex!(re9, "[+]", r"^[\+]$");
    toregex!(re10, "+", r"^\+$");
    toregex!(re11, "**", r"^.*$");
+    toregex!(re12, "☃", r"^\xe2\x98\x83$");

    matches!(match1, "a", "a");
    matches!(match2, "a*b", "a_b");
@ -1170,6 +1084,7 @@ mod tests {
    matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
    matches!(match8, "a*b[xyz]c*d", "abxcdbxcddd");
    matches!(match9, "*.rs", ".rs");
+    matches!(match10, "☃", "☃");

    matches!(matchrec1, "some/**/needle.txt", "some/needle.txt");
    matches!(matchrec2, "some/**/needle.txt", "some/one/needle.txt");
@ -1239,10 +1154,16 @@ mod tests {
    matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");

    matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
+    #[cfg(unix)]
    nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
-    nmatches!(matchslash2_win, "abc?def", "abc\\def", SLASHLIT);
+    #[cfg(not(unix))]
+    nmatches!(matchslash2, "abc?def", "abc\\def", SLASHLIT);
    nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
    matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
+    #[cfg(unix)]
+    nmatches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
+    #[cfg(not(unix))]
+    matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);

    nmatches!(matchnot1, "a*b*c", "abcd");
    nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
@ -1281,7 +1202,7 @@ mod tests {
        ($which:ident, $name:ident, $pat:expr, $expect:expr, $opts:expr) => {
            #[test]
            fn $name() {
-                let pat = PatternBuilder::new($pat)
+                let pat = GlobBuilder::new($pat)
                    .case_insensitive($opts.casei)
                    .literal_separator($opts.litsep)
                    .build().unwrap();
--- a/globset/src/lib.rs
+++ b/globset/src/lib.rs
@ -1,16 +1,101 @@
 /*!
-The glob module provides standard shell globbing, but is specifically
-implemented by converting glob syntax to regular expressions. The reasoning is
-two fold:
+The globset crate provides cross platform single glob and glob set matching.

-1. The regex library is *really* fast. Regaining performance in a distinct
-   implementation of globbing is non-trivial.
-2. Most crucially, a `RegexSet` can be used to match many globs simultaneously.
+Glob set matching is the process of matching one or more glob patterns against
+a single candidate path simultaneously, and returning all of the globs that
+matched. For example, given this set of globs:

-This module is written with some amount of intention of eventually splitting it
-out into its own separate crate, but I didn't quite have the energy for all
-that rigamorole when I wrote this. In particular, it could be fast/good enough
-to make its way into `glob` proper.
+```ignore
+*.rs
+src/lib.rs
+src/**/foo.rs
+```
+
+and a path `src/bar/baz/foo.rs`, then the set would report the first and third
+globs as matching.
+
+Single glob matching is also provided and is done by converting globs to
+
+# Example: one glob
+
+This example shows how to match a single glob against a single file path.
+
+```
+# fn example() -> Result<(), globset::Error> {
+use globset::Glob;
+
+let glob = try!(Glob::new("*.rs")).compile_matcher();
+
+assert!(glob.is_match("foo.rs"));
+assert!(glob.is_match("foo/bar.rs"));
+assert!(!glob.is_match("Cargo.toml"));
+# Ok(()) } example().unwrap();
+```
+
+# Example: configuring a glob matcher
+
+This example shows how to use a `GlobBuilder` to configure aspects of match
+semantics. In this example, we prevent wildcards from matching path separators.
+
+```
+# fn example() -> Result<(), globset::Error> {
+use globset::GlobBuilder;
+
+let glob = try!(GlobBuilder::new("*.rs")
+    .literal_separator(true).build()).compile_matcher();
+
+assert!(glob.is_match("foo.rs"));
+assert!(!glob.is_match("foo/bar.rs")); // no longer matches
+assert!(!glob.is_match("Cargo.toml"));
+# Ok(()) } example().unwrap();
+```
+
+# Example: match multiple globs at once
+
+This example shows how to match multiple glob patterns at once.
+
+```
+# fn example() -> Result<(), globset::Error> {
+use globset::{Glob, GlobSetBuilder};
+
+let mut builder = GlobSetBuilder::new();
+// A GlobBuilder can be used to configure each glob's match semantics
+// independently.
+builder.add(try!(Glob::new("*.rs")));
+builder.add(try!(Glob::new("src/lib.rs")));
+builder.add(try!(Glob::new("src/**/foo.rs")));
+let set = try!(builder.build());
+
+assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
+# Ok(()) } example().unwrap();
+```
+
+# Syntax
+
+Standard Unix-style glob syntax is supported:
+
+* `?` matches any single character. (If the `literal_separator` option is
+  enabled, then `?` can never match a path separator.)
+* `*` matches zero or more characters. (If the `literal_separator` option is
+  enabled, then `*` can never match a path separator.)
+* `**` recursively matches directories but are only legal in three situations.
+  First, if the glob starts with <code>\*\*&#x2F;</code>, then it matches
+  all directories. For example, <code>\*\*&#x2F;foo</code> matches `foo`
+  and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with
+  <code>&#x2F;\*\*</code>, then it matches all sub-entries. For example,
+  <code>foo&#x2F;\*\*</code> matches `foo/a` and `foo/a/b`, but not `foo`.
+  Thirdly, if the glob contains <code>&#x2F;\*\*&#x2F;</code> anywhere within
+  the pattern, then it matches zero or more directories. Using `**` anywhere
+  else is illegal (N.B. the glob `**` is allowed and means "match everything").
+* `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns.
+  (N.B. Nesting `{...}` is not currently allowed.)
+* `[ab]` matches `a` or `b` where `a` and `b` are characters. Use
+  `[!ab]` to match any character except for `a` and `b`.
+* Metacharacters such as `*` and `?` can be escaped with character class
+  notation. e.g., `[*]` matches `*`.
+
+A `GlobBuilder` can be used to prevent wildcards from matching path separators,
+or to enable case insensitive matching.
 */

 #![deny(missing_docs)]
@ -36,12 +121,14 @@ use std::str;
 use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
 use regex::bytes::{Regex, RegexBuilder, RegexSet};

-use pathutil::{file_name, file_name_ext, os_str_bytes, path_bytes};
-use pattern::MatchStrategy;
-pub use pattern::{Pattern, PatternBuilder, PatternMatcher};
+use pathutil::{
+    file_name, file_name_ext, normalize_path, os_str_bytes, path_bytes,
+};
+use glob::MatchStrategy;
+pub use glob::{Glob, GlobBuilder, GlobMatcher};

+mod glob;
 mod pathutil;
-mod pattern;

 macro_rules! eprintln {
    ($($tt:tt)*) => {{
@ -50,10 +137,6 @@ macro_rules! eprintln {
    }}
 }

-lazy_static! {
-    static ref FILE_SEPARATORS: String = regex::quote(r"/\");
-}
-
 /// Represents an error that can occur when parsing a glob pattern.
 #[derive(Clone, Debug, Eq, PartialEq)]
 pub enum Error {
@ -139,19 +222,26 @@ fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>

 type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;

-/// Set represents a group of globs that can be matched together in a single
-/// pass.
+/// GlobSet represents a group of globs that can be matched together in a
+/// single pass.
 #[derive(Clone, Debug)]
-pub struct Set {
-    strats: Vec<SetMatchStrategy>,
+pub struct GlobSet {
+    strats: Vec<GlobSetMatchStrategy>,
 }

-impl Set {
+impl GlobSet {
    /// Returns true if any glob in this set matches the path given.
-    pub fn is_match<T: AsRef<Path>>(&self, path: T) -> bool {
-        let candidate = Candidate::new(path.as_ref());
+    pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
+        self.is_match_candidate(&Candidate::new(path.as_ref()))
+    }
+
+    /// Returns true if any glob in this set matches the path given.
+    ///
+    /// This takes a Candidate as input, which can be used to amortize the
+    /// cost of preparing a path for matching.
+    pub fn is_match_candidate(&self, path: &Candidate) -> bool {
        for strat in &self.strats {
-            if strat.is_match(&candidate) {
+            if strat.is_match(path) {
                return true;
            }
        }
@ -160,30 +250,44 @@ impl Set {

    /// Returns the sequence number of every glob pattern that matches the
    /// given path.
-    #[allow(dead_code)]
-    pub fn matches<T: AsRef<Path>>(&self, path: T) -> Vec<usize> {
+    ///
+    /// This takes a Candidate as input, which can be used to amortize the
+    /// cost of preparing a path for matching.
+    pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
+        self.matches_candidate(&Candidate::new(path.as_ref()))
+    }
+
+    /// Returns the sequence number of every glob pattern that matches the
+    /// given path.
+    ///
+    /// This takes a Candidate as input, which can be used to amortize the
+    /// cost of preparing a path for matching.
+    pub fn matches_candidate(&self, path: &Candidate) -> Vec<usize> {
        let mut into = vec![];
-        self.matches_into(path, &mut into);
+        self.matches_candidate_into(path, &mut into);
        into
    }

    /// Adds the sequence number of every glob pattern that matches the given
    /// path to the vec given.
-    pub fn matches_into<T: AsRef<Path>>(
+    ///
+    /// `into` is is cleared before matching begins, and contains the set of
+    /// sequence numbers (in ascending order) after matching ends. If no globs
+    /// were matched, then `into` will be empty.
+    pub fn matches_candidate_into(
        &self,
-        path: T,
+        path: &Candidate,
        into: &mut Vec<usize>,
    ) {
        into.clear();
-        let candidate = Candidate::new(path.as_ref());
        for strat in &self.strats {
-            strat.matches_into(&candidate, into);
+            strat.matches_into(path, into);
        }
        into.sort();
        into.dedup();
    }

-    fn new(pats: &[Pattern]) -> Result<Set, Error> {
+    fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
        let mut lits = LiteralStrategy::new();
        let mut base_lits = BasenameLiteralStrategy::new();
        let mut exts = ExtensionStrategy::new();
@ -225,63 +329,70 @@ impl Set {
                lits.0.len(), base_lits.0.len(), exts.0.len(),
                prefixes.literals.len(), suffixes.literals.len(),
                required_exts.0.len(), regexes.literals.len());
-        Ok(Set {
+        Ok(GlobSet {
            strats: vec![
-                SetMatchStrategy::Extension(exts),
-                SetMatchStrategy::BasenameLiteral(base_lits),
-                SetMatchStrategy::Literal(lits),
-                SetMatchStrategy::Suffix(suffixes.suffix()),
-                SetMatchStrategy::Prefix(prefixes.prefix()),
-                SetMatchStrategy::RequiredExtension(
+                GlobSetMatchStrategy::Extension(exts),
+                GlobSetMatchStrategy::BasenameLiteral(base_lits),
+                GlobSetMatchStrategy::Literal(lits),
+                GlobSetMatchStrategy::Suffix(suffixes.suffix()),
+                GlobSetMatchStrategy::Prefix(prefixes.prefix()),
+                GlobSetMatchStrategy::RequiredExtension(
                    try!(required_exts.build())),
-                SetMatchStrategy::Regex(try!(regexes.regex_set())),
+                GlobSetMatchStrategy::Regex(try!(regexes.regex_set())),
            ],
        })
    }
 }

-/// SetBuilder builds a group of patterns that can be used to simultaneously
-/// match a file path.
-pub struct SetBuilder {
-    pats: Vec<Pattern>,
+/// GlobSetBuilder builds a group of patterns that can be used to
+/// simultaneously match a file path.
+pub struct GlobSetBuilder {
+    pats: Vec<Glob>,
 }

-impl SetBuilder {
-    /// Create a new SetBuilder. A SetBuilder can be used to add new patterns.
-    /// Once all patterns have been added, `build` should be called to produce
-    /// a `Set`, which can then be used for matching.
-    pub fn new() -> SetBuilder {
-        SetBuilder { pats: vec![] }
+impl GlobSetBuilder {
+    /// Create a new GlobSetBuilder. A GlobSetBuilder can be used to add new
+    /// patterns. Once all patterns have been added, `build` should be called
+    /// to produce a `GlobSet`, which can then be used for matching.
+    pub fn new() -> GlobSetBuilder {
+        GlobSetBuilder { pats: vec![] }
    }

    /// Builds a new matcher from all of the glob patterns added so far.
    ///
    /// Once a matcher is built, no new patterns can be added to it.
-    pub fn build(&self) -> Result<Set, Error> {
-        Set::new(&self.pats)
+    pub fn build(&self) -> Result<GlobSet, Error> {
+        GlobSet::new(&self.pats)
    }

    /// Add a new pattern to this set.
    #[allow(dead_code)]
-    pub fn add(&mut self, pat: Pattern) -> &mut SetBuilder {
+    pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
        self.pats.push(pat);
        self
    }
 }

+/// A candidate path for matching.
+///
+/// All glob matching in this crate operates on `Candidate` values.
+/// Constructing candidates has a very small cost associated with it, so
+/// callers may find it beneficial to amortize that cost when matching a single
+/// path against multiple globs or sets of globs.
 #[derive(Clone, Debug)]
-struct Candidate<'a> {
+pub struct Candidate<'a> {
    path: Cow<'a, [u8]>,
    basename: Cow<'a, [u8]>,
    ext: &'a OsStr,
 }

 impl<'a> Candidate<'a> {
-    fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
+    /// Create a new candidate for matching from the given path.
+    pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
        let path = path.as_ref();
        let basename = file_name(path).unwrap_or(OsStr::new(""));
        Candidate {
-            path: path_bytes(path),
+            path: normalize_path(path_bytes(path)),
            basename: os_str_bytes(basename),
            ext: file_name_ext(basename).unwrap_or(OsStr::new("")),
        }
@ -305,7 +416,7 @@ impl<'a> Candidate<'a> {
 }

 #[derive(Clone, Debug)]
-enum SetMatchStrategy {
+enum GlobSetMatchStrategy {
    Literal(LiteralStrategy),
    BasenameLiteral(BasenameLiteralStrategy),
    Extension(ExtensionStrategy),
@ -315,9 +426,9 @@ enum SetMatchStrategy {
    Regex(RegexSetStrategy),
 }

-impl SetMatchStrategy {
+impl GlobSetMatchStrategy {
    fn is_match(&self, candidate: &Candidate) -> bool {
-        use self::SetMatchStrategy::*;
+        use self::GlobSetMatchStrategy::*;
        match *self {
            Literal(ref s) => s.is_match(candidate),
            BasenameLiteral(ref s) => s.is_match(candidate),
@ -330,7 +441,7 @@ impl SetMatchStrategy {
    }

    fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
-        use self::SetMatchStrategy::*;
+        use self::GlobSetMatchStrategy::*;
        match *self {
            Literal(ref s) => s.matches_into(candidate, matches),
            BasenameLiteral(ref s) => s.matches_into(candidate, matches),
@ -616,29 +727,23 @@ impl RequiredExtensionStrategyBuilder {

 #[cfg(test)]
 mod tests {
-    use super::{Set, SetBuilder};
-    use pattern::Pattern;
+    use super::GlobSetBuilder;
+    use glob::Glob;

    #[test]
    fn set_works() {
-        let mut builder = SetBuilder::new();
-        builder.add(Pattern::new("src/**/*.rs").unwrap());
-        builder.add(Pattern::new("*.c").unwrap());
-        builder.add(Pattern::new("src/lib.rs").unwrap());
+        let mut builder = GlobSetBuilder::new();
+        builder.add(Glob::new("src/**/*.rs").unwrap());
+        builder.add(Glob::new("*.c").unwrap());
+        builder.add(Glob::new("src/lib.rs").unwrap());
        let set = builder.build().unwrap();

-        fn is_match(set: &Set, s: &str) -> bool {
-            let mut matches = vec![];
-            set.matches_into(s, &mut matches);
-            !matches.is_empty()
-        }
-
-        assert!(is_match(&set, "foo.c"));
-        assert!(is_match(&set, "src/foo.c"));
-        assert!(!is_match(&set, "foo.rs"));
-        assert!(!is_match(&set, "tests/foo.rs"));
-        assert!(is_match(&set, "src/foo.rs"));
-        assert!(is_match(&set, "src/grep/src/main.rs"));
+        assert!(set.is_match("foo.c"));
+        assert!(set.is_match("src/foo.c"));
+        assert!(!set.is_match("foo.rs"));
+        assert!(!set.is_match("tests/foo.rs"));
+        assert!(set.is_match("src/foo.rs"));
+        assert!(set.is_match("src/grep/src/main.rs"));

        let matches = set.matches("src/lib.rs");
        assert_eq!(2, matches.len());
--- a/globset/src/pathutil.rs
+++ b/globset/src/pathutil.rs
@ -101,20 +101,45 @@ pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
 /// necessary.
 #[cfg(not(unix))]
 pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
-    // TODO(burntsushi): On Windows, OS strings are probably UTF-16, so even
-    // if we could get at the raw bytes, they wouldn't be useful. We *must*
-    // convert to UTF-8 before doing path matching. Unfortunate, but necessary.
+    // TODO(burntsushi): On Windows, OS strings are WTF-8, which is a superset
+    // of UTF-8, so even if we could get at the raw bytes, they wouldn't
+    // be useful. We *must* convert to UTF-8 before doing path matching.
+    // Unfortunate, but necessary.
    match s.to_string_lossy() {
        Cow::Owned(s) => Cow::Owned(s.into_bytes()),
        Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
    }
 }

+/// Normalizes a path to use `/` as a separator everywhere, even on platforms
+/// that recognize other characters as separators.
+#[cfg(unix)]
+pub fn normalize_path(path: Cow<[u8]>) -> Cow<[u8]> {
+    // UNIX only uses /, so we're good.
+    path
+}
+
+/// Normalizes a path to use `/` as a separator everywhere, even on platforms
+/// that recognize other characters as separators.
+#[cfg(not(unix))]
+pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
+    use std::path::is_separator;
+
+    for i in 0..path.len() {
+        if path[i] == b'/' || !is_separator(path[i] as char) {
+            continue;
+        }
+        path.to_mut()[i] = b'/';
+    }
+    path
+}
+
 #[cfg(test)]
 mod tests {
+    use std::borrow::Cow;
    use std::ffi::OsStr;

-    use super::file_name_ext;
+    use super::{file_name_ext, normalize_path};

    macro_rules! ext {
        ($name:ident, $file_name:expr, $ext:expr) => {
@ -131,4 +156,25 @@ mod tests {
    ext!(ext3, "..rs", Some(".rs"));
    ext!(ext4, "", None::<&str>);
    ext!(ext5, "foo", None::<&str>);
+
+    macro_rules! normalize {
+        ($name:ident, $path:expr, $expected:expr) => {
+            #[test]
+            fn $name() {
+                let got = normalize_path(Cow::Owned($path.to_vec()));
+                assert_eq!($expected.to_vec(), got.into_owned());
+            }
+        };
+    }
+
+    normalize!(normal1, b"foo", b"foo");
+    normalize!(normal2, b"foo/bar", b"foo/bar");
+    #[cfg(unix)]
+    normalize!(normal3, b"foo\\bar", b"foo\\bar");
+    #[cfg(not(unix))]
+    normalize!(normal3, b"foo\\bar", b"foo/bar");
+    #[cfg(unix)]
+    normalize!(normal4, b"foo\\bar/baz", b"foo\\bar/baz");
+    #[cfg(not(unix))]
+    normalize!(normal4, b"foo\\bar/baz", b"foo/bar/baz");
 }
--- a/src/gitignore.rs
+++ b/src/gitignore.rs
@ -28,7 +28,7 @@ use std::fs::File;
 use std::io::{self, BufRead};
 use std::path::{Path, PathBuf};

-use globset::{self, PatternBuilder, Set, SetBuilder};
+use globset::{self, Candidate, GlobBuilder, GlobSet, GlobSetBuilder};
 use regex;

 use pathutil::{is_file_name, strip_prefix};
@ -82,7 +82,7 @@ impl From<io::Error> for Error {
 /// Gitignore is a matcher for the glob patterns in a single gitignore file.
 #[derive(Clone, Debug)]
 pub struct Gitignore {
-    set: Set,
+    set: GlobSet,
    root: PathBuf,
    patterns: Vec<Pattern>,
    num_ignores: u64,
@ -140,7 +140,8 @@ impl Gitignore {
        };
        MATCHES.with(|matches| {
            let mut matches = matches.borrow_mut();
-            self.set.matches_into(path, &mut *matches);
+            let candidate = Candidate::new(path);
+            self.set.matches_candidate_into(&candidate, &mut *matches);
            for &i in matches.iter().rev() {
                let pat = &self.patterns[i];
                if !pat.only_dir || is_dir {
@ -207,7 +208,7 @@ impl<'a> Match<'a> {
 /// GitignoreBuilder constructs a matcher for a single set of globs from a
 /// .gitignore file.
 pub struct GitignoreBuilder {
-    builder: SetBuilder,
+    builder: GlobSetBuilder,
    root: PathBuf,
    patterns: Vec<Pattern>,
 }
@ -237,7 +238,7 @@ impl GitignoreBuilder {
    pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
        let root = strip_prefix("./", root.as_ref()).unwrap_or(root.as_ref());
        GitignoreBuilder {
-            builder: SetBuilder::new(),
+            builder: GlobSetBuilder::new(),
            root: root.to_path_buf(),
            patterns: vec![],
        }
@ -262,8 +263,18 @@ impl GitignoreBuilder {
    pub fn add_path<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
        let rdr = io::BufReader::new(try!(File::open(&path)));
        debug!("gitignore: {}", path.as_ref().display());
-        for line in rdr.lines() {
-            try!(self.add(&path, &try!(line)));
+        for (i, line) in rdr.lines().enumerate() {
+            let line = match line {
+                Ok(line) => line,
+                Err(err) => {
+                    debug!("error reading line {} in {}: {}",
+                           i, path.as_ref().display(), err);
+                    continue;
+                }
+            };
+            if let Err(err) = self.add(&path, &line) {
+                debug!("error adding gitignore pattern: '{}': {}", line, err);
+            }
        }
        Ok(())
    }
@ -349,7 +360,7 @@ impl GitignoreBuilder {
            pat.pat = format!("{}/*", pat.pat);
        }
        let parsed = try!(
-            PatternBuilder::new(&pat.pat)
+            GlobBuilder::new(&pat.pat)
                .literal_separator(literal_separator)
                .build());
        self.builder.add(parsed);
--- a/src/types.rs
+++ b/src/types.rs
@ -11,7 +11,7 @@ use std::path::Path;
 use regex;

 use gitignore::{Match, Pattern};
-use globset::{self, PatternBuilder, Set, SetBuilder};
+use globset::{self, GlobBuilder, GlobSet, GlobSetBuilder};

 const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
    ("asm", &["*.asm", "*.s", "*.S"]),
@ -164,8 +164,8 @@ impl FileTypeDef {
 #[derive(Clone, Debug)]
 pub struct Types {
    defs: Vec<FileTypeDef>,
-    selected: Option<Set>,
-    negated: Option<Set>,
+    selected: Option<GlobSet>,
+    negated: Option<GlobSet>,
    has_selected: bool,
    unmatched_pat: Pattern,
 }
@ -178,8 +178,8 @@ impl Types {
    /// If has_selected is true, then at least one file type was selected.
    /// Therefore, any non-matches should be ignored.
    fn new(
-        selected: Option<Set>,
-        negated: Option<Set>,
+        selected: Option<GlobSet>,
+        negated: Option<GlobSet>,
        has_selected: bool,
        defs: Vec<FileTypeDef>,
    ) -> Types {
@ -272,7 +272,7 @@ impl TypesBuilder {
            if self.selected.is_empty() {
                None
            } else {
-                let mut bset = SetBuilder::new();
+                let mut bset = GlobSetBuilder::new();
                for name in &self.selected {
                    let globs = match self.types.get(name) {
                        Some(globs) => globs,
@ -283,7 +283,7 @@ impl TypesBuilder {
                    };
                    for glob in globs {
                        let pat = try!(
-                            PatternBuilder::new(glob)
+                            GlobBuilder::new(glob)
                                .literal_separator(true).build());
                        bset.add(pat);
                    }
@ -294,7 +294,7 @@ impl TypesBuilder {
            if self.negated.is_empty() {
                None
            } else {
-                let mut bset = SetBuilder::new();
+                let mut bset = GlobSetBuilder::new();
                for name in &self.negated {
                    let globs = match self.types.get(name) {
                        Some(globs) => globs,
@ -305,7 +305,7 @@ impl TypesBuilder {
                    };
                    for glob in globs {
                        let pat = try!(
-                            PatternBuilder::new(glob)
+                            GlobBuilder::new(glob)
                                .literal_separator(true).build());
                        bset.add(pat);
                    }
--- a/tests/tests.rs
+++ b/tests/tests.rs
@ -611,17 +611,6 @@ sherlock!(unrestricted2, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
    assert_eq!(lines, expected);
 });

-#[cfg(not(windows))]
-sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| {
-    wd.create("file", "foo\x00bar\nfoo\x00baz\n");
-    cmd.arg("-uuu");
-
-    let lines: String = wd.stdout(&mut cmd);
-    assert_eq!(lines, "file:foo\x00bar\nfile:foo\x00baz\n");
-});
-
-// On Windows, this test uses memory maps, so the NUL bytes don't get replaced.
-#[cfg(windows)]
 sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| {
    wd.create("file", "foo\x00bar\nfoo\x00baz\n");
    cmd.arg("-uuu");
@ -723,6 +712,13 @@ clean!(regression_67, "test", ".", |wd: WorkDir, mut cmd: Command| {
    assert_eq!(lines, path("dir/bar:test\n"));
 });

+// See: https://github.com/BurntSushi/ripgrep/issues/87
+clean!(regression_87, "test", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.create(".gitignore", "foo\n**no-vcs**");
+    wd.create("foo", "test");
+    wd.assert_err(&mut cmd);
+});
+
 // See: https://github.com/BurntSushi/ripgrep/issues/90
 clean!(regression_90, "test", ".", |wd: WorkDir, mut cmd: Command| {
    wd.create(".gitignore", "!.foo");
@ -771,6 +767,40 @@ clean!(regression_105_part2, "test", ".", |wd: WorkDir, mut cmd: Command| {
    assert_eq!(lines, "foo:3:zztest\n");
 });

+// See: https://github.com/BurntSushi/ripgrep/issues/127
+clean!(regression_127, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
+    // Set up a directory hierarchy like this:
+    //
+    // .gitignore
+    // foo/
+    //   sherlock
+    //   watson
+    //
+    // Where `.gitignore` contains `foo/sherlock`.
+    //
+    // ripgrep should ignore 'foo/sherlock' giving us results only from
+    // 'foo/watson' but on Windows ripgrep will include both 'foo/sherlock' and
+    // 'foo/watson' in the search results.
+    wd.create(".gitignore", "foo/sherlock\n");
+    wd.create_dir("foo");
+    wd.create("foo/sherlock", hay::SHERLOCK);
+    wd.create("foo/watson", hay::SHERLOCK);
+
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = format!("\
+{path}:For the Doctor Watsons of this world, as opposed to the Sherlock
+{path}:be, to a very large extent, the result of luck. Sherlock Holmes
+", path=path("foo/watson"));
+    assert_eq!(lines, expected);
+});
+
+// See: https://github.com/BurntSushi/ripgrep/issues/131
+clean!(regression_131, "test", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.create(".gitignore", "TopÑapa");
+    wd.create("TopÑapa", "test");
+    wd.assert_err(&mut cmd);
+});
+
 // See: https://github.com/BurntSushi/ripgrep/issues/20
 sherlock!(feature_20_no_filename, "Sherlock", ".",
 |wd: WorkDir, mut cmd: Command| {