1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-04-24 17:12:16 +02:00

globset: permit ** to appear anywhere

Previously, `man gitignore` specified that `**` was invalid unless it
was used in one of a few specific circumstances, i.e., `**`, `a/**`,
`**/b` or `a/**/b`. That is, `**` always had to be surrounded by either
a path separator or the beginning/end of the pattern.

It turns out that git itself has treated `**` outside the above contexts
as valid for quite a while, so there was an inconsistency between the
spec `man gitignore` and the implementation, and it wasn't clear which
was actually correct.

@okdana filed a bug against git[1] and got this fixed. The spec was wrong,
which has now been fixed [2] and updated[2].

This commit brings ripgrep in line with git and treats `**` outside of
the above contexts as two consecutive `*` patterns. We deprecate the
`InvalidRecursive` error since it is no longer used.

Fixes #373, Fixes #1098

[1] - https://public-inbox.org/git/C16A9F17-0375-42F9-90A9-A92C9F3D8BBA@dana.is
[2] - 627186d020
[3] - https://git-scm.com/docs/gitignore
This commit is contained in:
Andrew Gallant 2019-01-23 19:46:15 -05:00
parent 0a167021c3
commit 9c940b45f4
No known key found for this signature in database
GPG Key ID: B2E3A4923F8B0D44
5 changed files with 54 additions and 25 deletions

View File

@ -11,6 +11,9 @@ Feature enhancements:
Bug fixes: Bug fixes:
* [BUG #373](https://github.com/BurntSushi/ripgrep/issues/373),
[BUG #1098](https://github.com/BurntSushi/ripgrep/issues/1098):
`**` is now accepted as valid syntax anywhere in a glob.
* [BUG #1106](https://github.com/BurntSushi/ripgrep/issues/1106): * [BUG #1106](https://github.com/BurntSushi/ripgrep/issues/1106):
`--files-with-matches` and `--files-without-match` work with one file. `--files-with-matches` and `--files-without-match` work with one file.
* [BUG #1093](https://github.com/BurntSushi/ripgrep/pull/1093): * [BUG #1093](https://github.com/BurntSushi/ripgrep/pull/1093):

View File

@ -837,40 +837,49 @@ impl<'a> Parser<'a> {
fn parse_star(&mut self) -> Result<(), Error> { fn parse_star(&mut self) -> Result<(), Error> {
let prev = self.prev; let prev = self.prev;
if self.chars.peek() != Some(&'*') { if self.peek() != Some('*') {
self.push_token(Token::ZeroOrMore)?; self.push_token(Token::ZeroOrMore)?;
return Ok(()); return Ok(());
} }
assert!(self.bump() == Some('*')); assert!(self.bump() == Some('*'));
if !self.have_tokens()? { if !self.have_tokens()? {
self.push_token(Token::RecursivePrefix)?; if !self.peek().map_or(true, is_separator) {
let next = self.bump(); self.push_token(Token::ZeroOrMore)?;
if !next.map(is_separator).unwrap_or(true) { self.push_token(Token::ZeroOrMore)?;
return Err(self.error(ErrorKind::InvalidRecursive)); } else {
self.push_token(Token::RecursivePrefix)?;
assert!(self.bump().map_or(true, is_separator));
} }
return Ok(()); return Ok(());
} }
if !prev.map(is_separator).unwrap_or(false) { if !prev.map(is_separator).unwrap_or(false) {
if self.stack.len() <= 1 if self.stack.len() <= 1
|| (prev != Some(',') && prev != Some('{')) { || (prev != Some(',') && prev != Some('{'))
return Err(self.error(ErrorKind::InvalidRecursive)); {
self.push_token(Token::ZeroOrMore)?;
self.push_token(Token::ZeroOrMore)?;
return Ok(());
} }
} }
let is_suffix = let is_suffix =
match self.chars.peek() { match self.peek() {
None => { None => {
assert!(self.bump().is_none()); assert!(self.bump().is_none());
true true
} }
Some(&',') | Some(&'}') if self.stack.len() >= 2 => { Some(',') | Some('}') if self.stack.len() >= 2 => {
true true
} }
Some(&c) if is_separator(c) => { Some(c) if is_separator(c) => {
assert!(self.bump().map(is_separator).unwrap_or(false)); assert!(self.bump().map(is_separator).unwrap_or(false));
false false
} }
_ => return Err(self.error(ErrorKind::InvalidRecursive)), _ => {
self.push_token(Token::ZeroOrMore)?;
self.push_token(Token::ZeroOrMore)?;
return Ok(());
}
}; };
match self.pop_token()? { match self.pop_token()? {
Token::RecursivePrefix => { Token::RecursivePrefix => {
@ -976,6 +985,10 @@ impl<'a> Parser<'a> {
self.cur = self.chars.next(); self.cur = self.chars.next();
self.cur self.cur
} }
fn peek(&mut self) -> Option<char> {
self.chars.peek().map(|&ch| ch)
}
} }
#[cfg(test)] #[cfg(test)]
@ -1161,13 +1174,6 @@ mod tests {
syntax!(cls20, "[^a]", vec![classn('a', 'a')]); syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]); syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
syntaxerr!(err_rseq1, "a**", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq2, "**a", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq3, "a**b", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq4, "***", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq5, "/a**", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq6, "/**a", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq7, "/a**b", ErrorKind::InvalidRecursive);
syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass); syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass); syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass); syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
@ -1228,6 +1234,13 @@ mod tests {
toregex!(re25, "**/b", r"^(?:/?|.*/)b$"); toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$"); toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$"); toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
toregex!(re28, "a**", r"^a.*.*$");
toregex!(re29, "**a", r"^.*.*a$");
toregex!(re30, "a**b", r"^a.*.*b$");
toregex!(re31, "***", r"^.*.*.*$");
toregex!(re32, "/a**", r"^/a.*.*$");
toregex!(re33, "/**a", r"^/.*.*a$");
toregex!(re34, "/a**b", r"^/a.*.*b$");
matches!(match1, "a", "a"); matches!(match1, "a", "a");
matches!(match2, "a*b", "a_b"); matches!(match2, "a*b", "a_b");

View File

@ -143,8 +143,13 @@ pub struct Error {
/// The kind of error that can occur when parsing a glob pattern. /// The kind of error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)] #[derive(Clone, Debug, Eq, PartialEq)]
pub enum ErrorKind { pub enum ErrorKind {
/// Occurs when a use of `**` is invalid. Namely, `**` can only appear /// **DEPRECATED**.
/// adjacent to a path separator, or the beginning/end of a glob. ///
/// This error used to occur for consistency with git's glob specification,
/// but the specification now accepts all uses of `**`. When `**` does not
/// appear adjacent to a path separator or at the beginning/end of a glob,
/// it is now treated as two consecutive `*` patterns. As such, this error
/// is no longer used.
InvalidRecursive, InvalidRecursive,
/// Occurs when a character class (e.g., `[abc]`) is not closed. /// Occurs when a character class (e.g., `[abc]`) is not closed.
UnclosedClass, UnclosedClass,

View File

@ -869,7 +869,7 @@ mod tests {
#[test] #[test]
fn errored() { fn errored() {
let td = tmpdir("ignore-test-"); let td = tmpdir("ignore-test-");
wfile(td.path().join(".gitignore"), "f**oo"); wfile(td.path().join(".gitignore"), "{foo");
let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
assert!(err.is_some()); assert!(err.is_some());
@ -878,8 +878,8 @@ mod tests {
#[test] #[test]
fn errored_both() { fn errored_both() {
let td = tmpdir("ignore-test-"); let td = tmpdir("ignore-test-");
wfile(td.path().join(".gitignore"), "f**oo"); wfile(td.path().join(".gitignore"), "{foo");
wfile(td.path().join(".ignore"), "fo**o"); wfile(td.path().join(".ignore"), "{bar");
let (_, err) = IgnoreBuilder::new().build().add_child(td.path()); let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
assert_eq!(2, partial(err.expect("an error")).len()); assert_eq!(2, partial(err.expect("an error")).len());
@ -889,7 +889,7 @@ mod tests {
fn errored_partial() { fn errored_partial() {
let td = tmpdir("ignore-test-"); let td = tmpdir("ignore-test-");
mkdirp(td.path().join(".git")); mkdirp(td.path().join(".git"));
wfile(td.path().join(".gitignore"), "f**oo\nbar"); wfile(td.path().join(".gitignore"), "{foo\nbar");
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
assert!(err.is_some()); assert!(err.is_some());
@ -899,7 +899,7 @@ mod tests {
#[test] #[test]
fn errored_partial_and_ignore() { fn errored_partial_and_ignore() {
let td = tmpdir("ignore-test-"); let td = tmpdir("ignore-test-");
wfile(td.path().join(".gitignore"), "f**oo\nbar"); wfile(td.path().join(".gitignore"), "{foo\nbar");
wfile(td.path().join(".ignore"), "!bar"); wfile(td.path().join(".ignore"), "!bar");
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path()); let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());

View File

@ -569,6 +569,14 @@ rgtest!(r1064, |dir: Dir, mut cmd: TestCommand| {
eqnice!("input:abc\n", cmd.arg("a(.*c)").stdout()); eqnice!("input:abc\n", cmd.arg("a(.*c)").stdout());
}); });
// See: https://github.com/BurntSushi/ripgrep/issues/1174
rgtest!(r1098, |dir: Dir, mut cmd: TestCommand| {
dir.create_dir(".git");
dir.create(".gitignore", "a**b");
dir.create("afoob", "test");
cmd.arg("test").assert_err();
});
// See: https://github.com/BurntSushi/ripgrep/issues/1130 // See: https://github.com/BurntSushi/ripgrep/issues/1130
rgtest!(r1130, |dir: Dir, mut cmd: TestCommand| { rgtest!(r1130, |dir: Dir, mut cmd: TestCommand| {
dir.create("foo", "test"); dir.create("foo", "test");