mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-05-13 21:26:27 +02:00
regex: fix a perf bug when using -w flag
When looking for an inner literal to speed up searches, if only a prefix is found, then we generally give up doing inner literal optimizations since the regex engine will generally handle it for us. Unfortunately, this decision was being made *before* we wrap the regex in (^|\W)...($|\W) when using the -w/--word-regexp flag, which would then defeat the literal optimizations inside the regex engine. We fix this with a bit of a hack that says, "if we're doing a word regexp, then give me back any literal you find, even if it's a prefix."
This commit is contained in:
parent
254b8b67bb
commit
9f15e3b671
@ -207,7 +207,7 @@ impl ConfiguredHIR {
|
|||||||
if self.config.line_terminator.is_none() {
|
if self.config.line_terminator.is_none() {
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
match LiteralSets::new(&self.expr).one_regex() {
|
match LiteralSets::new(&self.expr).one_regex(self.config.word) {
|
||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
Some(pattern) => self.pattern_to_regex(&pattern).map(Some),
|
Some(pattern) => self.pattern_to_regex(&pattern).map(Some),
|
||||||
}
|
}
|
||||||
|
@ -47,19 +47,24 @@ impl LiteralSets {
|
|||||||
/// generated these literal sets. The idea here is that the pattern
|
/// generated these literal sets. The idea here is that the pattern
|
||||||
/// returned by this method is much cheaper to search for. i.e., It is
|
/// returned by this method is much cheaper to search for. i.e., It is
|
||||||
/// usually a single literal or an alternation of literals.
|
/// usually a single literal or an alternation of literals.
|
||||||
pub fn one_regex(&self) -> Option<String> {
|
pub fn one_regex(&self, word: bool) -> Option<String> {
|
||||||
// TODO: The logic in this function is basically inscrutable. It grew
|
// TODO: The logic in this function is basically inscrutable. It grew
|
||||||
// organically in the old grep 0.1 crate. Ideally, it would be
|
// organically in the old grep 0.1 crate. Ideally, it would be
|
||||||
// re-worked. In fact, the entire inner literal extraction should be
|
// re-worked. In fact, the entire inner literal extraction should be
|
||||||
// re-worked. Actually, most of regex-syntax's literal extraction
|
// re-worked. Actually, most of regex-syntax's literal extraction
|
||||||
// should also be re-worked. Alas... only so much time in the day.
|
// should also be re-worked. Alas... only so much time in the day.
|
||||||
|
|
||||||
|
if !word {
|
||||||
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
||||||
debug!("literal prefixes detected: {:?}", self.prefixes);
|
debug!("literal prefixes detected: {:?}", self.prefixes);
|
||||||
// When this is true, the regex engine will do a literal scan,
|
// When this is true, the regex engine will do a literal scan,
|
||||||
// so we don't need to return anything.
|
// so we don't need to return anything. But we only do this
|
||||||
|
// if we aren't doing a word regex, since a word regex adds
|
||||||
|
// a `(?:\W|^)` to the beginning of the regex, thereby
|
||||||
|
// defeating the regex engine's literal detection.
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Out of inner required literals, prefixes and suffixes, which one
|
// Out of inner required literals, prefixes and suffixes, which one
|
||||||
// is the longest? We pick the longest to do fast literal scan under
|
// is the longest? We pick the longest to do fast literal scan under
|
||||||
@ -285,7 +290,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn one_regex(pattern: &str) -> Option<String> {
|
fn one_regex(pattern: &str) -> Option<String> {
|
||||||
sets(pattern).one_regex()
|
sets(pattern).one_regex(false)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Put a pattern into the same format as the one returned by `one_regex`.
|
// Put a pattern into the same format as the one returned by `one_regex`.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user