mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2024-12-12 19:18:24 +02:00
Disable Unicode mode for literal regex.
When ripgrep detects a literal, it emits them as raw hex escaped byte sequences to Regex::new. This permits literal optimizations for arbitrary byte sequences (i.e., possibly invalid UTF-8). The problem is that Regex::new interprets hex escaped byte sequences as *Unicode codepoints* by default, but we want them to actually stand for their raw byte values. Therefore, disable Unicode mode. This is OK, since the regex is composed entirely of literals and literal extraction does Unicode case folding. Fixes #251
This commit is contained in:
parent
301a3fd71d
commit
0473df1ef5
@ -79,12 +79,12 @@ impl LiteralSets {
|
||||
debug!("required literals found: {:?}", req_lits);
|
||||
let alts: Vec<String> =
|
||||
req_lits.into_iter().map(|x| bytes_to_regex(x)).collect();
|
||||
Some(RegexBuilder::new(&alts.join("|")))
|
||||
Some(RegexBuilder::new(&alts.join("|")).unicode(false))
|
||||
} else if lit.is_empty() {
|
||||
None
|
||||
} else {
|
||||
debug!("required literal found: {:?}", show(lit));
|
||||
Some(RegexBuilder::new(&bytes_to_regex(lit)))
|
||||
Some(RegexBuilder::new(&bytes_to_regex(&lit)).unicode(false))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -167,14 +167,13 @@ impl GrepBuilder {
|
||||
/// Creates a new regex from the given expression with the current
|
||||
/// configuration.
|
||||
fn regex(&self, expr: &Expr) -> Result<Regex> {
|
||||
self.regex_build(RegexBuilder::new(&expr.to_string()))
|
||||
self.regex_build(RegexBuilder::new(&expr.to_string()).unicode(true))
|
||||
}
|
||||
|
||||
/// Builds a new regex from the given builder using the caller's settings.
|
||||
fn regex_build(&self, builder: RegexBuilder) -> Result<Regex> {
|
||||
builder
|
||||
.multi_line(true)
|
||||
.unicode(true)
|
||||
.size_limit(self.opts.size_limit)
|
||||
.dfa_size_limit(self.opts.dfa_size_limit)
|
||||
.compile()
|
||||
|
@ -936,6 +936,15 @@ clean!(regression_229, "[E]conomie", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/251
|
||||
clean!(regression_251, "привет", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("foo", "привет\nПривет\nПрИвЕт");
|
||||
cmd.arg("-i");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "foo:привет\nfoo:Привет\nfoo:ПрИвЕт\n");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/7
|
||||
sherlock!(feature_7, "-fpat", "sherlock", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("pat", "Sherlock\nHolmes");
|
||||
|
Loading…
Reference in New Issue
Block a user