mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-03-17 20:28:03 +02:00
Fixes a bug with --smart-case.
This was a subtle bug, but the big picture was that the smart case information wasn't being carried through to the literal extraction in some cases. When this happened, it was possible to get back an incomplete set of literals, which would therefore miss some valid matches. The fix to this is to actually parse the regex and determine whether smart case applies before doing anything else. It's a little extra work, but parsing is pretty fast. Fixes #199
This commit is contained in:
parent
5bd0edbbe1
commit
0222e024fe
@ -9,7 +9,7 @@ principled.
|
||||
*/
|
||||
use std::cmp;
|
||||
|
||||
use regex::bytes::Regex;
|
||||
use regex::bytes::RegexBuilder;
|
||||
use syntax::{
|
||||
Expr, Literals, Lit,
|
||||
ByteClass, ByteRange, CharClass, ClassRange, Repeater,
|
||||
@ -33,7 +33,7 @@ impl LiteralSets {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_regex(&self) -> Option<Regex> {
|
||||
pub fn to_regex_builder(&self) -> Option<RegexBuilder> {
|
||||
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
||||
debug!("literal prefixes detected: {:?}", self.prefixes);
|
||||
// When this is true, the regex engine will do a literal scan.
|
||||
@ -79,14 +79,12 @@ impl LiteralSets {
|
||||
debug!("required literals found: {:?}", req_lits);
|
||||
let alts: Vec<String> =
|
||||
req_lits.into_iter().map(|x| bytes_to_regex(x)).collect();
|
||||
// Literals always compile.
|
||||
Some(Regex::new(&alts.join("|")).unwrap())
|
||||
Some(RegexBuilder::new(&alts.join("|")))
|
||||
} else if lit.is_empty() {
|
||||
None
|
||||
} else {
|
||||
// Literals always compile.
|
||||
debug!("required literal found: {:?}", show(lit));
|
||||
Some(Regex::new(&bytes_to_regex(lit)).unwrap())
|
||||
Some(RegexBuilder::new(&bytes_to_regex(lit)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -144,14 +144,19 @@ impl GrepBuilder {
|
||||
let expr = try!(self.parse());
|
||||
let literals = LiteralSets::create(&expr);
|
||||
let re = try!(self.regex(&expr));
|
||||
let required = literals.to_regex().or_else(|| {
|
||||
let expr = match strip_unicode_word_boundaries(&expr) {
|
||||
None => return None,
|
||||
Some(expr) => expr,
|
||||
};
|
||||
debug!("Stripped Unicode word boundaries. New AST:\n{:?}", expr);
|
||||
self.regex(&expr).ok()
|
||||
});
|
||||
let required = match literals.to_regex_builder() {
|
||||
Some(builder) => Some(try!(self.regex_build(builder))),
|
||||
None => {
|
||||
match strip_unicode_word_boundaries(&expr) {
|
||||
None => None,
|
||||
Some(expr) => {
|
||||
debug!("Stripped Unicode word boundaries. \
|
||||
New AST:\n{:?}", expr);
|
||||
self.regex(&expr).ok()
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(Grep {
|
||||
re: re,
|
||||
required: required,
|
||||
@ -162,11 +167,12 @@ impl GrepBuilder {
|
||||
/// Creates a new regex from the given expression with the current
|
||||
/// configuration.
|
||||
fn regex(&self, expr: &Expr) -> Result<Regex> {
|
||||
let casei =
|
||||
self.opts.case_insensitive
|
||||
|| (self.opts.case_smart && !has_uppercase_literal(expr));
|
||||
RegexBuilder::new(&expr.to_string())
|
||||
.case_insensitive(casei)
|
||||
self.regex_build(RegexBuilder::new(&expr.to_string()))
|
||||
}
|
||||
|
||||
/// Builds a new regex from the given builder using the caller's settings.
|
||||
fn regex_build(&self, builder: RegexBuilder) -> Result<Regex> {
|
||||
builder
|
||||
.multi_line(true)
|
||||
.unicode(true)
|
||||
.size_limit(self.opts.size_limit)
|
||||
@ -182,12 +188,30 @@ impl GrepBuilder {
|
||||
try!(syntax::ExprBuilder::new()
|
||||
.allow_bytes(true)
|
||||
.unicode(true)
|
||||
.case_insensitive(self.opts.case_insensitive)
|
||||
.case_insensitive(try!(self.is_case_insensitive()))
|
||||
.parse(&self.pattern));
|
||||
let expr = try!(nonl::remove(expr, self.opts.line_terminator));
|
||||
debug!("regex ast:\n{:#?}", expr);
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
/// Determines whether the case insensitive flag should be enabled or not.
|
||||
///
|
||||
/// An error is returned if the regex could not be parsed.
|
||||
fn is_case_insensitive(&self) -> Result<bool> {
|
||||
if self.opts.case_insensitive {
|
||||
return Ok(true);
|
||||
}
|
||||
if !self.opts.case_smart {
|
||||
return Ok(false);
|
||||
}
|
||||
let expr =
|
||||
try!(syntax::ExprBuilder::new()
|
||||
.allow_bytes(true)
|
||||
.unicode(true)
|
||||
.parse(&self.pattern));
|
||||
Ok(!has_uppercase_literal(&expr))
|
||||
}
|
||||
}
|
||||
|
||||
impl Grep {
|
||||
|
@ -874,6 +874,15 @@ clean!(regression_184, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
assert_eq!(lines, "baz:test\n");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/199
|
||||
clean!(regression_199, r"\btest\b", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("foo", "tEsT");
|
||||
cmd.arg("--smart-case");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "foo:tEsT\n");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/206
|
||||
clean!(regression_206, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create_dir("foo");
|
||||
|
Loading…
x
Reference in New Issue
Block a user