mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-11-23 21:54:45 +02:00
globset: add allow_unclosed_class toggle
When enabled, patterns like `[abc`, `[]`, `[!]` are treated as if the opening `[` is just a literal. This is in contrast the default behavior, which prioritizes better error messages, of returning a parse error. Fixes #3127, Closes #3145
This commit is contained in:
@@ -229,6 +229,11 @@ struct GlobOptions {
|
||||
/// Whether or not an empty case in an alternate will be removed.
|
||||
/// e.g., when enabled, `{,a}` will match "" and "a".
|
||||
empty_alternates: bool,
|
||||
/// Whether or not an unclosed character class is allowed. When an unclosed
|
||||
/// character class is found, the opening `[` is treated as a literal `[`.
|
||||
/// When this isn't enabled, an opening `[` without a corresponding `]` is
|
||||
/// treated as an error.
|
||||
allow_unclosed_class: bool,
|
||||
}
|
||||
|
||||
impl GlobOptions {
|
||||
@@ -238,6 +243,7 @@ impl GlobOptions {
|
||||
literal_separator: false,
|
||||
backslash_escape: !is_separator('\\'),
|
||||
empty_alternates: false,
|
||||
allow_unclosed_class: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -594,6 +600,7 @@ impl<'a> GlobBuilder<'a> {
|
||||
chars: self.glob.chars().peekable(),
|
||||
prev: None,
|
||||
cur: None,
|
||||
found_unclosed_class: false,
|
||||
opts: &self.opts,
|
||||
};
|
||||
p.parse()?;
|
||||
@@ -657,6 +664,22 @@ impl<'a> GlobBuilder<'a> {
|
||||
self.opts.empty_alternates = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Toggle whether unclosed character classes are allowed. When allowed,
|
||||
/// a `[` without a matching `]` is treated literally instead of resulting
|
||||
/// in a parse error.
|
||||
///
|
||||
/// For example, if this is set then the glob `[abc` will be treated as the
|
||||
/// literal string `[abc` instead of returning an error.
|
||||
///
|
||||
/// By default, this is false. Generally speaking, enabling this leads to
|
||||
/// worse failure modes since the glob parser becomes more permissive. You
|
||||
/// might want to enable this when compatibility (e.g., with POSIX glob
|
||||
/// implementations) is more important than good error messages.
|
||||
pub fn allow_unclosed_class(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
|
||||
self.opts.allow_unclosed_class = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Tokens {
|
||||
@@ -782,15 +805,29 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String {
|
||||
}
|
||||
|
||||
struct Parser<'a> {
|
||||
/// The glob to parse.
|
||||
glob: &'a str,
|
||||
// Marks the index in `stack` where the alternation started.
|
||||
/// Marks the index in `stack` where the alternation started.
|
||||
alternates_stack: Vec<usize>,
|
||||
// The set of active alternation branches being parsed.
|
||||
// Tokens are added to the end of the last one.
|
||||
/// The set of active alternation branches being parsed.
|
||||
/// Tokens are added to the end of the last one.
|
||||
branches: Vec<Tokens>,
|
||||
/// A character iterator over the glob pattern to parse.
|
||||
chars: std::iter::Peekable<std::str::Chars<'a>>,
|
||||
/// The previous character seen.
|
||||
prev: Option<char>,
|
||||
/// The current character.
|
||||
cur: Option<char>,
|
||||
/// Whether we failed to find a closing `]` for a character
|
||||
/// class. This can only be true when `GlobOptions::allow_unclosed_class`
|
||||
/// is enabled. When enabled, it is impossible to ever parse another
|
||||
/// character class with this glob. That's because classes cannot be
|
||||
/// nested *and* the only way this happens is when there is never a `]`.
|
||||
///
|
||||
/// We track this state so that we don't end up spending quadratic time
|
||||
/// trying to parse something like `[[[[[[[[[[[[[[[[[[[[[[[...`.
|
||||
found_unclosed_class: bool,
|
||||
/// Glob options, which may influence parsing.
|
||||
opts: &'a GlobOptions,
|
||||
}
|
||||
|
||||
@@ -804,7 +841,7 @@ impl<'a> Parser<'a> {
|
||||
match c {
|
||||
'?' => self.push_token(Token::Any)?,
|
||||
'*' => self.parse_star()?,
|
||||
'[' => self.parse_class()?,
|
||||
'[' if !self.found_unclosed_class => self.parse_class()?,
|
||||
'{' => self.push_alternate()?,
|
||||
'}' => self.pop_alternate()?,
|
||||
',' => self.parse_comma()?,
|
||||
@@ -939,6 +976,11 @@ impl<'a> Parser<'a> {
|
||||
}
|
||||
|
||||
fn parse_class(&mut self) -> Result<(), Error> {
|
||||
// Save parser state for potential rollback to literal '[' parsing.
|
||||
let saved_chars = self.chars.clone();
|
||||
let saved_prev = self.prev;
|
||||
let saved_cur = self.cur;
|
||||
|
||||
fn add_to_last_range(
|
||||
glob: &str,
|
||||
r: &mut (char, char),
|
||||
@@ -966,11 +1008,17 @@ impl<'a> Parser<'a> {
|
||||
let mut first = true;
|
||||
let mut in_range = false;
|
||||
loop {
|
||||
let c = match self.bump() {
|
||||
Some(c) => c,
|
||||
// The only way to successfully break this loop is to observe
|
||||
// a ']'.
|
||||
None => return Err(self.error(ErrorKind::UnclosedClass)),
|
||||
let Some(c) = self.bump() else {
|
||||
return if self.opts.allow_unclosed_class == true {
|
||||
self.chars = saved_chars;
|
||||
self.cur = saved_cur;
|
||||
self.prev = saved_prev;
|
||||
self.found_unclosed_class = true;
|
||||
|
||||
self.push_token(Token::Literal('['))
|
||||
} else {
|
||||
Err(self.error(ErrorKind::UnclosedClass))
|
||||
};
|
||||
};
|
||||
match c {
|
||||
']' => {
|
||||
@@ -1055,6 +1103,7 @@ mod tests {
|
||||
litsep: Option<bool>,
|
||||
bsesc: Option<bool>,
|
||||
ealtre: Option<bool>,
|
||||
unccls: Option<bool>,
|
||||
}
|
||||
|
||||
macro_rules! syntax {
|
||||
@@ -1097,6 +1146,10 @@ mod tests {
|
||||
if let Some(ealtre) = $options.ealtre {
|
||||
builder.empty_alternates(ealtre);
|
||||
}
|
||||
if let Some(unccls) = $options.unccls {
|
||||
builder.allow_unclosed_class(unccls);
|
||||
}
|
||||
|
||||
let pat = builder.build().unwrap();
|
||||
assert_eq!(format!("(?-u){}", $re), pat.regex());
|
||||
}
|
||||
@@ -1242,24 +1295,73 @@ mod tests {
|
||||
syntaxerr!(err_alt3, "a,b}", ErrorKind::UnopenedAlternates);
|
||||
syntaxerr!(err_alt4, "{a,b}}", ErrorKind::UnopenedAlternates);
|
||||
|
||||
const CASEI: Options =
|
||||
Options { casei: Some(true), litsep: None, bsesc: None, ealtre: None };
|
||||
const SLASHLIT: Options =
|
||||
Options { casei: None, litsep: Some(true), bsesc: None, ealtre: None };
|
||||
const CASEI: Options = Options {
|
||||
casei: Some(true),
|
||||
litsep: None,
|
||||
bsesc: None,
|
||||
ealtre: None,
|
||||
unccls: None,
|
||||
};
|
||||
const SLASHLIT: Options = Options {
|
||||
casei: None,
|
||||
litsep: Some(true),
|
||||
bsesc: None,
|
||||
ealtre: None,
|
||||
unccls: None,
|
||||
};
|
||||
const NOBSESC: Options = Options {
|
||||
casei: None,
|
||||
litsep: None,
|
||||
bsesc: Some(false),
|
||||
ealtre: None,
|
||||
unccls: None,
|
||||
};
|
||||
const BSESC: Options = Options {
|
||||
casei: None,
|
||||
litsep: None,
|
||||
bsesc: Some(true),
|
||||
ealtre: None,
|
||||
unccls: None,
|
||||
};
|
||||
const BSESC: Options =
|
||||
Options { casei: None, litsep: None, bsesc: Some(true), ealtre: None };
|
||||
const EALTRE: Options = Options {
|
||||
casei: None,
|
||||
litsep: None,
|
||||
bsesc: Some(true),
|
||||
ealtre: Some(true),
|
||||
unccls: None,
|
||||
};
|
||||
const UNCCLS: Options = Options {
|
||||
casei: None,
|
||||
litsep: None,
|
||||
bsesc: None,
|
||||
ealtre: None,
|
||||
unccls: Some(true),
|
||||
};
|
||||
|
||||
toregex!(allow_unclosed_class_single, r"[", r"^\[$", &UNCCLS);
|
||||
toregex!(allow_unclosed_class_many, r"[abc", r"^\[abc$", &UNCCLS);
|
||||
toregex!(allow_unclosed_class_empty1, r"[]", r"^\[\]$", &UNCCLS);
|
||||
toregex!(allow_unclosed_class_empty2, r"[][", r"^\[\]\[$", &UNCCLS);
|
||||
toregex!(allow_unclosed_class_negated_unclosed, r"[!", r"^\[!$", &UNCCLS);
|
||||
toregex!(allow_unclosed_class_negated_empty, r"[!]", r"^\[!\]$", &UNCCLS);
|
||||
toregex!(
|
||||
allow_unclosed_class_brace1,
|
||||
r"{[abc,xyz}",
|
||||
r"^(?:\[abc|xyz)$",
|
||||
&UNCCLS
|
||||
);
|
||||
toregex!(
|
||||
allow_unclosed_class_brace2,
|
||||
r"{[abc,[xyz}",
|
||||
r"^(?:\[abc|\[xyz)$",
|
||||
&UNCCLS
|
||||
);
|
||||
toregex!(
|
||||
allow_unclosed_class_brace3,
|
||||
r"{[abc],[xyz}",
|
||||
r"^(?:[abc]|\[xyz)$",
|
||||
&UNCCLS
|
||||
);
|
||||
|
||||
toregex!(re_empty, "", "^$");
|
||||
|
||||
|
||||
@@ -308,6 +308,7 @@ pub struct GitignoreBuilder {
|
||||
root: PathBuf,
|
||||
globs: Vec<Glob>,
|
||||
case_insensitive: bool,
|
||||
allow_unclosed_class: bool,
|
||||
}
|
||||
|
||||
impl GitignoreBuilder {
|
||||
@@ -324,6 +325,7 @@ impl GitignoreBuilder {
|
||||
root: strip_prefix("./", root).unwrap_or(root).to_path_buf(),
|
||||
globs: vec![],
|
||||
case_insensitive: false,
|
||||
allow_unclosed_class: true,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -511,6 +513,7 @@ impl GitignoreBuilder {
|
||||
.literal_separator(true)
|
||||
.case_insensitive(self.case_insensitive)
|
||||
.backslash_escape(true)
|
||||
.allow_unclosed_class(self.allow_unclosed_class)
|
||||
.build()
|
||||
.map_err(|err| Error::Glob {
|
||||
glob: Some(glob.original.clone()),
|
||||
@@ -536,6 +539,26 @@ impl GitignoreBuilder {
|
||||
self.case_insensitive = yes;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Toggle whether unclosed character classes are allowed. When allowed,
|
||||
/// a `[` without a matching `]` is treated literally instead of resulting
|
||||
/// in a parse error.
|
||||
///
|
||||
/// For example, if this is set then the glob `[abc` will be treated as the
|
||||
/// literal string `[abc` instead of returning an error.
|
||||
///
|
||||
/// By default, this is true in order to match established `gitignore`
|
||||
/// semantics. Generally speaking, enabling this leads to worse failure
|
||||
/// modes since the glob parser becomes more permissive. You might want to
|
||||
/// enable this when compatibility (e.g., with POSIX glob implementations)
|
||||
/// is more important than good error messages.
|
||||
pub fn allow_unclosed_class(
|
||||
&mut self,
|
||||
yes: bool,
|
||||
) -> &mut GitignoreBuilder {
|
||||
self.allow_unclosed_class = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the file path of the current environment's global gitignore file.
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
/*!
|
||||
The overrides module provides a way to specify a set of override globs.
|
||||
|
||||
This provides functionality similar to `--include` or `--exclude` in command
|
||||
line tools.
|
||||
*/
|
||||
@@ -120,7 +121,9 @@ impl OverrideBuilder {
|
||||
///
|
||||
/// Matching is done relative to the directory path provided.
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> OverrideBuilder {
|
||||
OverrideBuilder { builder: GitignoreBuilder::new(path) }
|
||||
let mut builder = GitignoreBuilder::new(path);
|
||||
builder.allow_unclosed_class(false);
|
||||
OverrideBuilder { builder }
|
||||
}
|
||||
|
||||
/// Builds a new override matcher from the globs added so far.
|
||||
@@ -143,7 +146,8 @@ impl OverrideBuilder {
|
||||
|
||||
/// Toggle whether the globs should be matched case insensitively or not.
|
||||
///
|
||||
/// When this option is changed, only globs added after the change will be affected.
|
||||
/// When this option is changed, only globs added after the change will be
|
||||
/// affected.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn case_insensitive(
|
||||
@@ -155,6 +159,28 @@ impl OverrideBuilder {
|
||||
self.builder.case_insensitive(yes)?;
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Toggle whether unclosed character classes are allowed. When allowed,
|
||||
/// a `[` without a matching `]` is treated literally instead of resulting
|
||||
/// in a parse error.
|
||||
///
|
||||
/// For example, if this is set then the glob `[abc` will be treated as the
|
||||
/// literal string `[abc` instead of returning an error.
|
||||
///
|
||||
/// By default, this is false. Generally speaking, enabling this leads to
|
||||
/// worse failure modes since the glob parser becomes more permissive. You
|
||||
/// might want to enable this when compatibility (e.g., with POSIX glob
|
||||
/// implementations) is more important than good error messages.
|
||||
///
|
||||
/// This default is different from the default for [`Gitignore`]. Namely,
|
||||
/// [`Gitignore`] is intended to match git's behavior as-is. But this
|
||||
/// abstraction for "override" globs does not necessarily conform to any
|
||||
/// other known specification and instead prioritizes better error
|
||||
/// messages.
|
||||
pub fn allow_unclosed_class(&mut self, yes: bool) -> &mut OverrideBuilder {
|
||||
self.builder.allow_unclosed_class(yes);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
||||
Reference in New Issue
Block a user