1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-04-24 17:12:16 +02:00

globset: support backslash escaping

From `man 7 glob`:

    One can remove the special meaning of '?', '*' and '[' by preceding
    them by a backslash, or, in case this is part of a shell command
    line, enclosing them in quotes.

Conform to glob / fnmatch / git implementations by making `\` escape the
following character - for example `\?` will match a literal `?`.

However, only enable this by default on Unix platforms. Windows builds
will continue to use `\` as a path separator, but can still get the new
behavior by calling `globset.backslash_escape(true)`.

Adding tests for the `Globset::backslash_escape` option was a bit
involved, since the default value of this option is platform-dependent.

Extend the options framework to hold an `Option<T>` for each
knob, where `None` means "default" and `Some(v)` means "override with
`v`". This way we only have to specify the default values once in
`GlobOptions::default()` rather than replicated in both code and tests.

Finally write a few behavioral tests, and some tests to confirm it
varies by platform.
This commit is contained in:
Brian Malehorn 2018-02-22 23:13:36 -08:00 committed by Andrew Gallant
parent c0c80e0209
commit e2516ed095
No known key found for this signature in database
GPG Key ID: B2E3A4923F8B0D44
2 changed files with 131 additions and 35 deletions

View File

@ -187,13 +187,26 @@ pub struct GlobBuilder<'a> {
opts: GlobOptions, opts: GlobOptions,
} }
#[derive(Clone, Copy, Debug, Default, Eq, Hash, PartialEq)] #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
struct GlobOptions { struct GlobOptions {
/// Whether to match case insensitively. /// Whether to match case insensitively.
case_insensitive: bool, case_insensitive: bool,
/// Whether to require a literal separator to match a separator in a file /// Whether to require a literal separator to match a separator in a file
/// path. e.g., when enabled, `*` won't match `/`. /// path. e.g., when enabled, `*` won't match `/`.
literal_separator: bool, literal_separator: bool,
/// Whether or not to use `\` to escape special characters.
/// e.g., when enabled, `\*` will match a literal `*`.
backslash_escape: bool,
}
impl GlobOptions {
fn default() -> GlobOptions {
GlobOptions {
case_insensitive: false,
literal_separator: false,
backslash_escape: !is_separator('\\'),
}
}
} }
#[derive(Clone, Debug, Default, Eq, PartialEq)] #[derive(Clone, Debug, Default, Eq, PartialEq)]
@ -549,6 +562,7 @@ impl<'a> GlobBuilder<'a> {
chars: self.glob.chars().peekable(), chars: self.glob.chars().peekable(),
prev: None, prev: None,
cur: None, cur: None,
opts: &self.opts,
}; };
p.parse()?; p.parse()?;
if p.stack.is_empty() { if p.stack.is_empty() {
@ -585,6 +599,19 @@ impl<'a> GlobBuilder<'a> {
self.opts.literal_separator = yes; self.opts.literal_separator = yes;
self self
} }
/// When enabled, a back slash (`\`) may be used to escape
/// special characters in a glob pattern. Additionally, this will
/// prevent `\` from being interpreted as a path separator on all
/// platforms.
///
/// This is enabled by default on platforms where `\` is not a
/// path separator and disabled by default on platforms where `\`
/// is a path separator.
pub fn backslash_escape(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
self.opts.backslash_escape = yes;
self
}
} }
impl Tokens { impl Tokens {
@ -710,6 +737,7 @@ struct Parser<'a> {
chars: iter::Peekable<str::Chars<'a>>, chars: iter::Peekable<str::Chars<'a>>,
prev: Option<char>, prev: Option<char>,
cur: Option<char>, cur: Option<char>,
opts: &'a GlobOptions,
} }
impl<'a> Parser<'a> { impl<'a> Parser<'a> {
@ -726,14 +754,8 @@ impl<'a> Parser<'a> {
'{' => self.push_alternate()?, '{' => self.push_alternate()?,
'}' => self.pop_alternate()?, '}' => self.pop_alternate()?,
',' => self.parse_comma()?, ',' => self.parse_comma()?,
c => { '\\' => self.parse_backslash()?,
if is_separator(c) { c => self.push_token(Token::Literal(c))?,
// Normalize all patterns to use / as a separator.
self.push_token(Token::Literal('/'))?
} else {
self.push_token(Token::Literal(c))?
}
}
} }
} }
Ok(()) Ok(())
@ -786,6 +808,20 @@ impl<'a> Parser<'a> {
} }
} }
fn parse_backslash(&mut self) -> Result<(), Error> {
if self.opts.backslash_escape {
match self.bump() {
None => Err(self.error(ErrorKind::DanglingEscape)),
Some(c) => self.push_token(Token::Literal(c)),
}
} else if is_separator('\\') {
// Normalize all patterns to use / as a separator.
self.push_token(Token::Literal('/'))
} else {
self.push_token(Token::Literal('\\'))
}
}
fn parse_star(&mut self) -> Result<(), Error> { fn parse_star(&mut self) -> Result<(), Error> {
let prev = self.prev; let prev = self.prev;
if self.chars.peek() != Some(&'*') { if self.chars.peek() != Some(&'*') {
@ -933,8 +969,9 @@ mod tests {
#[derive(Clone, Copy, Debug, Default)] #[derive(Clone, Copy, Debug, Default)]
struct Options { struct Options {
casei: bool, casei: Option<bool>,
litsep: bool, litsep: Option<bool>,
bsesc: Option<bool>,
} }
macro_rules! syntax { macro_rules! syntax {
@ -964,11 +1001,17 @@ mod tests {
($name:ident, $pat:expr, $re:expr, $options:expr) => { ($name:ident, $pat:expr, $re:expr, $options:expr) => {
#[test] #[test]
fn $name() { fn $name() {
let pat = GlobBuilder::new($pat) let mut builder = GlobBuilder::new($pat);
.case_insensitive($options.casei) if let Some(casei) = $options.casei {
.literal_separator($options.litsep) builder.case_insensitive(casei);
.build() }
.unwrap(); if let Some(litsep) = $options.litsep {
builder.literal_separator(litsep);
}
if let Some(bsesc) = $options.bsesc {
builder.backslash_escape(bsesc);
}
let pat = builder.build().unwrap();
assert_eq!(format!("(?-u){}", $re), pat.regex()); assert_eq!(format!("(?-u){}", $re), pat.regex());
} }
}; };
@ -981,11 +1024,17 @@ mod tests {
($name:ident, $pat:expr, $path:expr, $options:expr) => { ($name:ident, $pat:expr, $path:expr, $options:expr) => {
#[test] #[test]
fn $name() { fn $name() {
let pat = GlobBuilder::new($pat) let mut builder = GlobBuilder::new($pat);
.case_insensitive($options.casei) if let Some(casei) = $options.casei {
.literal_separator($options.litsep) builder.case_insensitive(casei);
.build() }
.unwrap(); if let Some(litsep) = $options.litsep {
builder.literal_separator(litsep);
}
if let Some(bsesc) = $options.bsesc {
builder.backslash_escape(bsesc);
}
let pat = builder.build().unwrap();
let matcher = pat.compile_matcher(); let matcher = pat.compile_matcher();
let strategic = pat.compile_strategic_matcher(); let strategic = pat.compile_strategic_matcher();
let set = GlobSetBuilder::new().add(pat).build().unwrap(); let set = GlobSetBuilder::new().add(pat).build().unwrap();
@ -1003,11 +1052,17 @@ mod tests {
($name:ident, $pat:expr, $path:expr, $options:expr) => { ($name:ident, $pat:expr, $path:expr, $options:expr) => {
#[test] #[test]
fn $name() { fn $name() {
let pat = GlobBuilder::new($pat) let mut builder = GlobBuilder::new($pat);
.case_insensitive($options.casei) if let Some(casei) = $options.casei {
.literal_separator($options.litsep) builder.case_insensitive(casei);
.build() }
.unwrap(); if let Some(litsep) = $options.litsep {
builder.literal_separator(litsep);
}
if let Some(bsesc) = $options.bsesc {
builder.backslash_escape(bsesc);
}
let pat = builder.build().unwrap();
let matcher = pat.compile_matcher(); let matcher = pat.compile_matcher();
let strategic = pat.compile_strategic_matcher(); let strategic = pat.compile_strategic_matcher();
let set = GlobSetBuilder::new().add(pat).build().unwrap(); let set = GlobSetBuilder::new().add(pat).build().unwrap();
@ -1091,12 +1146,24 @@ mod tests {
syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-')); syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
const CASEI: Options = Options { const CASEI: Options = Options {
casei: true, casei: Some(true),
litsep: false, litsep: None,
bsesc: None,
}; };
const SLASHLIT: Options = Options { const SLASHLIT: Options = Options {
casei: false, casei: None,
litsep: true, litsep: Some(true),
bsesc: None,
};
const NOBSESC: Options = Options {
casei: None,
litsep: None,
bsesc: Some(false),
};
const BSESC: Options = Options {
casei: None,
litsep: None,
bsesc: Some(true),
}; };
toregex!(re_casei, "a", "(?i)^a$", &CASEI); toregex!(re_casei, "a", "(?i)^a$", &CASEI);
@ -1209,6 +1276,17 @@ mod tests {
#[cfg(not(unix))] #[cfg(not(unix))]
matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT); matches!(matchslash5, "abc\\def", "abc/def", SLASHLIT);
matches!(matchbackslash1, "\\[", "[", BSESC);
matches!(matchbackslash2, "\\?", "?", BSESC);
matches!(matchbackslash3, "\\*", "*", BSESC);
matches!(matchbackslash4, "\\[a-z]", "\\a", NOBSESC);
matches!(matchbackslash5, "\\?", "\\a", NOBSESC);
matches!(matchbackslash6, "\\*", "\\\\", NOBSESC);
#[cfg(unix)]
matches!(matchbackslash7, "\\a", "a");
#[cfg(not(unix))]
matches!(matchbackslash8, "\\a", "/a");
nmatches!(matchnot1, "a*b*c", "abcd"); nmatches!(matchnot1, "a*b*c", "abcd");
nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca"); nmatches!(matchnot2, "abc*abc*abc", "abcabcabcabcabcabcabca");
nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt"); nmatches!(matchnot3, "some/**/needle.txt", "some/other/notthis.txt");
@ -1253,13 +1331,20 @@ mod tests {
($which:ident, $name:ident, $pat:expr, $expect:expr) => { ($which:ident, $name:ident, $pat:expr, $expect:expr) => {
extract!($which, $name, $pat, $expect, Options::default()); extract!($which, $name, $pat, $expect, Options::default());
}; };
($which:ident, $name:ident, $pat:expr, $expect:expr, $opts:expr) => { ($which:ident, $name:ident, $pat:expr, $expect:expr, $options:expr) => {
#[test] #[test]
fn $name() { fn $name() {
let pat = GlobBuilder::new($pat) let mut builder = GlobBuilder::new($pat);
.case_insensitive($opts.casei) if let Some(casei) = $options.casei {
.literal_separator($opts.litsep) builder.case_insensitive(casei);
.build().unwrap(); }
if let Some(litsep) = $options.litsep {
builder.literal_separator(litsep);
}
if let Some(bsesc) = $options.bsesc {
builder.backslash_escape(bsesc);
}
let pat = builder.build().unwrap();
assert_eq!($expect, pat.$which()); assert_eq!($expect, pat.$which());
} }
}; };

View File

@ -91,6 +91,11 @@ Standard Unix-style glob syntax is supported:
`[!ab]` to match any character except for `a` and `b`. `[!ab]` to match any character except for `a` and `b`.
* Metacharacters such as `*` and `?` can be escaped with character class * Metacharacters such as `*` and `?` can be escaped with character class
notation. e.g., `[*]` matches `*`. notation. e.g., `[*]` matches `*`.
* When backslash escapes are enabled, a backslash (`\`) will escape all meta
characters in a glob. If it precedes a non-meta character, then the slash is
ignored. A `\\` will match a literal `\\`. Note that this mode is only
enabled on Unix platforms by default, but can be enabled on any platform
via the `backslash_escape` setting on `Glob`.
A `GlobBuilder` can be used to prevent wildcards from matching path separators, A `GlobBuilder` can be used to prevent wildcards from matching path separators,
or to enable case insensitive matching. or to enable case insensitive matching.
@ -154,6 +159,8 @@ pub enum ErrorKind {
/// Occurs when an alternating group is nested inside another alternating /// Occurs when an alternating group is nested inside another alternating
/// group, e.g., `{{a,b},{c,d}}`. /// group, e.g., `{{a,b},{c,d}}`.
NestedAlternates, NestedAlternates,
/// Occurs when an unescaped '\' is found at the end of a glob.
DanglingEscape,
/// An error associated with parsing or compiling a regex. /// An error associated with parsing or compiling a regex.
Regex(String), Regex(String),
} }
@ -199,6 +206,9 @@ impl ErrorKind {
ErrorKind::NestedAlternates => { ErrorKind::NestedAlternates => {
"nested alternate groups are not allowed" "nested alternate groups are not allowed"
} }
ErrorKind::DanglingEscape => {
"dangling '\\'"
}
ErrorKind::Regex(ref err) => err, ErrorKind::Regex(ref err) => err,
} }
} }
@ -223,6 +233,7 @@ impl fmt::Display for ErrorKind {
| ErrorKind::UnopenedAlternates | ErrorKind::UnopenedAlternates
| ErrorKind::UnclosedAlternates | ErrorKind::UnclosedAlternates
| ErrorKind::NestedAlternates | ErrorKind::NestedAlternates
| ErrorKind::DanglingEscape
| ErrorKind::Regex(_) => { | ErrorKind::Regex(_) => {
write!(f, "{}", self.description()) write!(f, "{}", self.description())
} }