mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-06-14 22:15:13 +02:00
globset: permit ** to appear anywhere
Previously, `man gitignore` specified that `**` was invalid unless it
was used in one of a few specific circumstances, i.e., `**`, `a/**`,
`**/b` or `a/**/b`. That is, `**` always had to be surrounded by either
a path separator or the beginning/end of the pattern.
It turns out that git itself has treated `**` outside the above contexts
as valid for quite a while, so there was an inconsistency between the
spec `man gitignore` and the implementation, and it wasn't clear which
was actually correct.
@okdana filed a bug against git[1] and got this fixed. The spec was wrong,
which has now been fixed [2] and updated[2].
This commit brings ripgrep in line with git and treats `**` outside of
the above contexts as two consecutive `*` patterns. We deprecate the
`InvalidRecursive` error since it is no longer used.
Fixes #373, Fixes #1098
[1] - https://public-inbox.org/git/C16A9F17-0375-42F9-90A9-A92C9F3D8BBA@dana.is
[2] - 627186d020
[3] - https://git-scm.com/docs/gitignore
This commit is contained in:
@ -837,40 +837,49 @@ impl<'a> Parser<'a> {
|
||||
|
||||
fn parse_star(&mut self) -> Result<(), Error> {
|
||||
let prev = self.prev;
|
||||
if self.chars.peek() != Some(&'*') {
|
||||
if self.peek() != Some('*') {
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
return Ok(());
|
||||
}
|
||||
assert!(self.bump() == Some('*'));
|
||||
if !self.have_tokens()? {
|
||||
self.push_token(Token::RecursivePrefix)?;
|
||||
let next = self.bump();
|
||||
if !next.map(is_separator).unwrap_or(true) {
|
||||
return Err(self.error(ErrorKind::InvalidRecursive));
|
||||
if !self.peek().map_or(true, is_separator) {
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
} else {
|
||||
self.push_token(Token::RecursivePrefix)?;
|
||||
assert!(self.bump().map_or(true, is_separator));
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
if !prev.map(is_separator).unwrap_or(false) {
|
||||
if self.stack.len() <= 1
|
||||
|| (prev != Some(',') && prev != Some('{')) {
|
||||
return Err(self.error(ErrorKind::InvalidRecursive));
|
||||
|| (prev != Some(',') && prev != Some('{'))
|
||||
{
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
let is_suffix =
|
||||
match self.chars.peek() {
|
||||
match self.peek() {
|
||||
None => {
|
||||
assert!(self.bump().is_none());
|
||||
true
|
||||
}
|
||||
Some(&',') | Some(&'}') if self.stack.len() >= 2 => {
|
||||
Some(',') | Some('}') if self.stack.len() >= 2 => {
|
||||
true
|
||||
}
|
||||
Some(&c) if is_separator(c) => {
|
||||
Some(c) if is_separator(c) => {
|
||||
assert!(self.bump().map(is_separator).unwrap_or(false));
|
||||
false
|
||||
}
|
||||
_ => return Err(self.error(ErrorKind::InvalidRecursive)),
|
||||
_ => {
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
match self.pop_token()? {
|
||||
Token::RecursivePrefix => {
|
||||
@ -976,6 +985,10 @@ impl<'a> Parser<'a> {
|
||||
self.cur = self.chars.next();
|
||||
self.cur
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<char> {
|
||||
self.chars.peek().map(|&ch| ch)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@ -1161,13 +1174,6 @@ mod tests {
|
||||
syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
|
||||
syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
|
||||
|
||||
syntaxerr!(err_rseq1, "a**", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq2, "**a", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq3, "a**b", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq4, "***", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq5, "/a**", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq6, "/**a", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq7, "/a**b", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
|
||||
syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
|
||||
syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
|
||||
@ -1228,6 +1234,13 @@ mod tests {
|
||||
toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
|
||||
toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
|
||||
toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
|
||||
toregex!(re28, "a**", r"^a.*.*$");
|
||||
toregex!(re29, "**a", r"^.*.*a$");
|
||||
toregex!(re30, "a**b", r"^a.*.*b$");
|
||||
toregex!(re31, "***", r"^.*.*.*$");
|
||||
toregex!(re32, "/a**", r"^/a.*.*$");
|
||||
toregex!(re33, "/**a", r"^/.*.*a$");
|
||||
toregex!(re34, "/a**b", r"^/a.*.*b$");
|
||||
|
||||
matches!(match1, "a", "a");
|
||||
matches!(match2, "a*b", "a_b");
|
||||
|
@ -143,8 +143,13 @@ pub struct Error {
|
||||
/// The kind of error that can occur when parsing a glob pattern.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum ErrorKind {
|
||||
/// Occurs when a use of `**` is invalid. Namely, `**` can only appear
|
||||
/// adjacent to a path separator, or the beginning/end of a glob.
|
||||
/// **DEPRECATED**.
|
||||
///
|
||||
/// This error used to occur for consistency with git's glob specification,
|
||||
/// but the specification now accepts all uses of `**`. When `**` does not
|
||||
/// appear adjacent to a path separator or at the beginning/end of a glob,
|
||||
/// it is now treated as two consecutive `*` patterns. As such, this error
|
||||
/// is no longer used.
|
||||
InvalidRecursive,
|
||||
/// Occurs when a character class (e.g., `[abc]`) is not closed.
|
||||
UnclosedClass,
|
||||
|
Reference in New Issue
Block a user