1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-03-17 20:28:03 +02:00

Fixes a bug with --smart-case.

This was a subtle bug, but the big picture was that the smart case
information wasn't being carried through to the literal extraction in
some cases. When this happened, it was possible to get back an incomplete
set of literals, which would therefore miss some valid matches.

The fix to this is to actually parse the regex and determine whether
smart case applies before doing anything else. It's a little extra work,
but parsing is pretty fast.

Fixes #199
This commit is contained in:
Andrew Gallant 2016-11-06 12:07:47 -05:00
parent 5bd0edbbe1
commit 0222e024fe
3 changed files with 51 additions and 20 deletions

View File

@ -9,7 +9,7 @@ principled.
*/ */
use std::cmp; use std::cmp;
use regex::bytes::Regex; use regex::bytes::RegexBuilder;
use syntax::{ use syntax::{
Expr, Literals, Lit, Expr, Literals, Lit,
ByteClass, ByteRange, CharClass, ClassRange, Repeater, ByteClass, ByteRange, CharClass, ClassRange, Repeater,
@ -33,7 +33,7 @@ impl LiteralSets {
} }
} }
pub fn to_regex(&self) -> Option<Regex> { pub fn to_regex_builder(&self) -> Option<RegexBuilder> {
if self.prefixes.all_complete() && !self.prefixes.is_empty() { if self.prefixes.all_complete() && !self.prefixes.is_empty() {
debug!("literal prefixes detected: {:?}", self.prefixes); debug!("literal prefixes detected: {:?}", self.prefixes);
// When this is true, the regex engine will do a literal scan. // When this is true, the regex engine will do a literal scan.
@ -79,14 +79,12 @@ impl LiteralSets {
debug!("required literals found: {:?}", req_lits); debug!("required literals found: {:?}", req_lits);
let alts: Vec<String> = let alts: Vec<String> =
req_lits.into_iter().map(|x| bytes_to_regex(x)).collect(); req_lits.into_iter().map(|x| bytes_to_regex(x)).collect();
// Literals always compile. Some(RegexBuilder::new(&alts.join("|")))
Some(Regex::new(&alts.join("|")).unwrap())
} else if lit.is_empty() { } else if lit.is_empty() {
None None
} else { } else {
// Literals always compile.
debug!("required literal found: {:?}", show(lit)); debug!("required literal found: {:?}", show(lit));
Some(Regex::new(&bytes_to_regex(lit)).unwrap()) Some(RegexBuilder::new(&bytes_to_regex(lit)))
} }
} }
} }

View File

@ -144,14 +144,19 @@ impl GrepBuilder {
let expr = try!(self.parse()); let expr = try!(self.parse());
let literals = LiteralSets::create(&expr); let literals = LiteralSets::create(&expr);
let re = try!(self.regex(&expr)); let re = try!(self.regex(&expr));
let required = literals.to_regex().or_else(|| { let required = match literals.to_regex_builder() {
let expr = match strip_unicode_word_boundaries(&expr) { Some(builder) => Some(try!(self.regex_build(builder))),
None => return None, None => {
Some(expr) => expr, match strip_unicode_word_boundaries(&expr) {
}; None => None,
debug!("Stripped Unicode word boundaries. New AST:\n{:?}", expr); Some(expr) => {
debug!("Stripped Unicode word boundaries. \
New AST:\n{:?}", expr);
self.regex(&expr).ok() self.regex(&expr).ok()
}); }
}
}
};
Ok(Grep { Ok(Grep {
re: re, re: re,
required: required, required: required,
@ -162,11 +167,12 @@ impl GrepBuilder {
/// Creates a new regex from the given expression with the current /// Creates a new regex from the given expression with the current
/// configuration. /// configuration.
fn regex(&self, expr: &Expr) -> Result<Regex> { fn regex(&self, expr: &Expr) -> Result<Regex> {
let casei = self.regex_build(RegexBuilder::new(&expr.to_string()))
self.opts.case_insensitive }
|| (self.opts.case_smart && !has_uppercase_literal(expr));
RegexBuilder::new(&expr.to_string()) /// Builds a new regex from the given builder using the caller's settings.
.case_insensitive(casei) fn regex_build(&self, builder: RegexBuilder) -> Result<Regex> {
builder
.multi_line(true) .multi_line(true)
.unicode(true) .unicode(true)
.size_limit(self.opts.size_limit) .size_limit(self.opts.size_limit)
@ -182,12 +188,30 @@ impl GrepBuilder {
try!(syntax::ExprBuilder::new() try!(syntax::ExprBuilder::new()
.allow_bytes(true) .allow_bytes(true)
.unicode(true) .unicode(true)
.case_insensitive(self.opts.case_insensitive) .case_insensitive(try!(self.is_case_insensitive()))
.parse(&self.pattern)); .parse(&self.pattern));
let expr = try!(nonl::remove(expr, self.opts.line_terminator)); let expr = try!(nonl::remove(expr, self.opts.line_terminator));
debug!("regex ast:\n{:#?}", expr); debug!("regex ast:\n{:#?}", expr);
Ok(expr) Ok(expr)
} }
/// Determines whether the case insensitive flag should be enabled or not.
///
/// An error is returned if the regex could not be parsed.
fn is_case_insensitive(&self) -> Result<bool> {
if self.opts.case_insensitive {
return Ok(true);
}
if !self.opts.case_smart {
return Ok(false);
}
let expr =
try!(syntax::ExprBuilder::new()
.allow_bytes(true)
.unicode(true)
.parse(&self.pattern));
Ok(!has_uppercase_literal(&expr))
}
} }
impl Grep { impl Grep {

View File

@ -874,6 +874,15 @@ clean!(regression_184, "test", ".", |wd: WorkDir, mut cmd: Command| {
assert_eq!(lines, "baz:test\n"); assert_eq!(lines, "baz:test\n");
}); });
// See: https://github.com/BurntSushi/ripgrep/issues/199
clean!(regression_199, r"\btest\b", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("foo", "tEsT");
cmd.arg("--smart-case");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:tEsT\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/206 // See: https://github.com/BurntSushi/ripgrep/issues/206
clean!(regression_206, "test", ".", |wd: WorkDir, mut cmd: Command| { clean!(regression_206, "test", ".", |wd: WorkDir, mut cmd: Command| {
wd.create_dir("foo"); wd.create_dir("foo");