mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-03-17 20:28:03 +02:00
Fixes a bug with --smart-case.
This was a subtle bug, but the big picture was that the smart case information wasn't being carried through to the literal extraction in some cases. When this happened, it was possible to get back an incomplete set of literals, which would therefore miss some valid matches. The fix to this is to actually parse the regex and determine whether smart case applies before doing anything else. It's a little extra work, but parsing is pretty fast. Fixes #199
This commit is contained in:
parent
5bd0edbbe1
commit
0222e024fe
@ -9,7 +9,7 @@ principled.
|
|||||||
*/
|
*/
|
||||||
use std::cmp;
|
use std::cmp;
|
||||||
|
|
||||||
use regex::bytes::Regex;
|
use regex::bytes::RegexBuilder;
|
||||||
use syntax::{
|
use syntax::{
|
||||||
Expr, Literals, Lit,
|
Expr, Literals, Lit,
|
||||||
ByteClass, ByteRange, CharClass, ClassRange, Repeater,
|
ByteClass, ByteRange, CharClass, ClassRange, Repeater,
|
||||||
@ -33,7 +33,7 @@ impl LiteralSets {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn to_regex(&self) -> Option<Regex> {
|
pub fn to_regex_builder(&self) -> Option<RegexBuilder> {
|
||||||
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
||||||
debug!("literal prefixes detected: {:?}", self.prefixes);
|
debug!("literal prefixes detected: {:?}", self.prefixes);
|
||||||
// When this is true, the regex engine will do a literal scan.
|
// When this is true, the regex engine will do a literal scan.
|
||||||
@ -79,14 +79,12 @@ impl LiteralSets {
|
|||||||
debug!("required literals found: {:?}", req_lits);
|
debug!("required literals found: {:?}", req_lits);
|
||||||
let alts: Vec<String> =
|
let alts: Vec<String> =
|
||||||
req_lits.into_iter().map(|x| bytes_to_regex(x)).collect();
|
req_lits.into_iter().map(|x| bytes_to_regex(x)).collect();
|
||||||
// Literals always compile.
|
Some(RegexBuilder::new(&alts.join("|")))
|
||||||
Some(Regex::new(&alts.join("|")).unwrap())
|
|
||||||
} else if lit.is_empty() {
|
} else if lit.is_empty() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
// Literals always compile.
|
|
||||||
debug!("required literal found: {:?}", show(lit));
|
debug!("required literal found: {:?}", show(lit));
|
||||||
Some(Regex::new(&bytes_to_regex(lit)).unwrap())
|
Some(RegexBuilder::new(&bytes_to_regex(lit)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -144,14 +144,19 @@ impl GrepBuilder {
|
|||||||
let expr = try!(self.parse());
|
let expr = try!(self.parse());
|
||||||
let literals = LiteralSets::create(&expr);
|
let literals = LiteralSets::create(&expr);
|
||||||
let re = try!(self.regex(&expr));
|
let re = try!(self.regex(&expr));
|
||||||
let required = literals.to_regex().or_else(|| {
|
let required = match literals.to_regex_builder() {
|
||||||
let expr = match strip_unicode_word_boundaries(&expr) {
|
Some(builder) => Some(try!(self.regex_build(builder))),
|
||||||
None => return None,
|
None => {
|
||||||
Some(expr) => expr,
|
match strip_unicode_word_boundaries(&expr) {
|
||||||
};
|
None => None,
|
||||||
debug!("Stripped Unicode word boundaries. New AST:\n{:?}", expr);
|
Some(expr) => {
|
||||||
|
debug!("Stripped Unicode word boundaries. \
|
||||||
|
New AST:\n{:?}", expr);
|
||||||
self.regex(&expr).ok()
|
self.regex(&expr).ok()
|
||||||
});
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
Ok(Grep {
|
Ok(Grep {
|
||||||
re: re,
|
re: re,
|
||||||
required: required,
|
required: required,
|
||||||
@ -162,11 +167,12 @@ impl GrepBuilder {
|
|||||||
/// Creates a new regex from the given expression with the current
|
/// Creates a new regex from the given expression with the current
|
||||||
/// configuration.
|
/// configuration.
|
||||||
fn regex(&self, expr: &Expr) -> Result<Regex> {
|
fn regex(&self, expr: &Expr) -> Result<Regex> {
|
||||||
let casei =
|
self.regex_build(RegexBuilder::new(&expr.to_string()))
|
||||||
self.opts.case_insensitive
|
}
|
||||||
|| (self.opts.case_smart && !has_uppercase_literal(expr));
|
|
||||||
RegexBuilder::new(&expr.to_string())
|
/// Builds a new regex from the given builder using the caller's settings.
|
||||||
.case_insensitive(casei)
|
fn regex_build(&self, builder: RegexBuilder) -> Result<Regex> {
|
||||||
|
builder
|
||||||
.multi_line(true)
|
.multi_line(true)
|
||||||
.unicode(true)
|
.unicode(true)
|
||||||
.size_limit(self.opts.size_limit)
|
.size_limit(self.opts.size_limit)
|
||||||
@ -182,12 +188,30 @@ impl GrepBuilder {
|
|||||||
try!(syntax::ExprBuilder::new()
|
try!(syntax::ExprBuilder::new()
|
||||||
.allow_bytes(true)
|
.allow_bytes(true)
|
||||||
.unicode(true)
|
.unicode(true)
|
||||||
.case_insensitive(self.opts.case_insensitive)
|
.case_insensitive(try!(self.is_case_insensitive()))
|
||||||
.parse(&self.pattern));
|
.parse(&self.pattern));
|
||||||
let expr = try!(nonl::remove(expr, self.opts.line_terminator));
|
let expr = try!(nonl::remove(expr, self.opts.line_terminator));
|
||||||
debug!("regex ast:\n{:#?}", expr);
|
debug!("regex ast:\n{:#?}", expr);
|
||||||
Ok(expr)
|
Ok(expr)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Determines whether the case insensitive flag should be enabled or not.
|
||||||
|
///
|
||||||
|
/// An error is returned if the regex could not be parsed.
|
||||||
|
fn is_case_insensitive(&self) -> Result<bool> {
|
||||||
|
if self.opts.case_insensitive {
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
if !self.opts.case_smart {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
let expr =
|
||||||
|
try!(syntax::ExprBuilder::new()
|
||||||
|
.allow_bytes(true)
|
||||||
|
.unicode(true)
|
||||||
|
.parse(&self.pattern));
|
||||||
|
Ok(!has_uppercase_literal(&expr))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Grep {
|
impl Grep {
|
||||||
|
@ -874,6 +874,15 @@ clean!(regression_184, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
|||||||
assert_eq!(lines, "baz:test\n");
|
assert_eq!(lines, "baz:test\n");
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// See: https://github.com/BurntSushi/ripgrep/issues/199
|
||||||
|
clean!(regression_199, r"\btest\b", ".", |wd: WorkDir, mut cmd: Command| {
|
||||||
|
wd.create("foo", "tEsT");
|
||||||
|
cmd.arg("--smart-case");
|
||||||
|
|
||||||
|
let lines: String = wd.stdout(&mut cmd);
|
||||||
|
assert_eq!(lines, "foo:tEsT\n");
|
||||||
|
});
|
||||||
|
|
||||||
// See: https://github.com/BurntSushi/ripgrep/issues/206
|
// See: https://github.com/BurntSushi/ripgrep/issues/206
|
||||||
clean!(regression_206, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
clean!(regression_206, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||||
wd.create_dir("foo");
|
wd.create_dir("foo");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user