diff --git a/grep/src/literals.rs b/grep/src/literals.rs index 4cd34a87..d931f135 100644 --- a/grep/src/literals.rs +++ b/grep/src/literals.rs @@ -9,7 +9,7 @@ principled. */ use std::cmp; -use regex::bytes::Regex; +use regex::bytes::RegexBuilder; use syntax::{ Expr, Literals, Lit, ByteClass, ByteRange, CharClass, ClassRange, Repeater, @@ -33,7 +33,7 @@ impl LiteralSets { } } - pub fn to_regex(&self) -> Option { + pub fn to_regex_builder(&self) -> Option { if self.prefixes.all_complete() && !self.prefixes.is_empty() { debug!("literal prefixes detected: {:?}", self.prefixes); // When this is true, the regex engine will do a literal scan. @@ -79,14 +79,12 @@ impl LiteralSets { debug!("required literals found: {:?}", req_lits); let alts: Vec = req_lits.into_iter().map(|x| bytes_to_regex(x)).collect(); - // Literals always compile. - Some(Regex::new(&alts.join("|")).unwrap()) + Some(RegexBuilder::new(&alts.join("|"))) } else if lit.is_empty() { None } else { - // Literals always compile. debug!("required literal found: {:?}", show(lit)); - Some(Regex::new(&bytes_to_regex(lit)).unwrap()) + Some(RegexBuilder::new(&bytes_to_regex(lit))) } } } diff --git a/grep/src/search.rs b/grep/src/search.rs index c7639937..4e4c48e9 100644 --- a/grep/src/search.rs +++ b/grep/src/search.rs @@ -144,14 +144,19 @@ impl GrepBuilder { let expr = try!(self.parse()); let literals = LiteralSets::create(&expr); let re = try!(self.regex(&expr)); - let required = literals.to_regex().or_else(|| { - let expr = match strip_unicode_word_boundaries(&expr) { - None => return None, - Some(expr) => expr, - }; - debug!("Stripped Unicode word boundaries. New AST:\n{:?}", expr); - self.regex(&expr).ok() - }); + let required = match literals.to_regex_builder() { + Some(builder) => Some(try!(self.regex_build(builder))), + None => { + match strip_unicode_word_boundaries(&expr) { + None => None, + Some(expr) => { + debug!("Stripped Unicode word boundaries. \ + New AST:\n{:?}", expr); + self.regex(&expr).ok() + } + } + } + }; Ok(Grep { re: re, required: required, @@ -162,11 +167,12 @@ impl GrepBuilder { /// Creates a new regex from the given expression with the current /// configuration. fn regex(&self, expr: &Expr) -> Result { - let casei = - self.opts.case_insensitive - || (self.opts.case_smart && !has_uppercase_literal(expr)); - RegexBuilder::new(&expr.to_string()) - .case_insensitive(casei) + self.regex_build(RegexBuilder::new(&expr.to_string())) + } + + /// Builds a new regex from the given builder using the caller's settings. + fn regex_build(&self, builder: RegexBuilder) -> Result { + builder .multi_line(true) .unicode(true) .size_limit(self.opts.size_limit) @@ -182,12 +188,30 @@ impl GrepBuilder { try!(syntax::ExprBuilder::new() .allow_bytes(true) .unicode(true) - .case_insensitive(self.opts.case_insensitive) + .case_insensitive(try!(self.is_case_insensitive())) .parse(&self.pattern)); let expr = try!(nonl::remove(expr, self.opts.line_terminator)); debug!("regex ast:\n{:#?}", expr); Ok(expr) } + + /// Determines whether the case insensitive flag should be enabled or not. + /// + /// An error is returned if the regex could not be parsed. + fn is_case_insensitive(&self) -> Result { + if self.opts.case_insensitive { + return Ok(true); + } + if !self.opts.case_smart { + return Ok(false); + } + let expr = + try!(syntax::ExprBuilder::new() + .allow_bytes(true) + .unicode(true) + .parse(&self.pattern)); + Ok(!has_uppercase_literal(&expr)) + } } impl Grep { diff --git a/tests/tests.rs b/tests/tests.rs index 48fb8185..bf6f471d 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -874,6 +874,15 @@ clean!(regression_184, "test", ".", |wd: WorkDir, mut cmd: Command| { assert_eq!(lines, "baz:test\n"); }); +// See: https://github.com/BurntSushi/ripgrep/issues/199 +clean!(regression_199, r"\btest\b", ".", |wd: WorkDir, mut cmd: Command| { + wd.create("foo", "tEsT"); + cmd.arg("--smart-case"); + + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines, "foo:tEsT\n"); +}); + // See: https://github.com/BurntSushi/ripgrep/issues/206 clean!(regression_206, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create_dir("foo");