mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-04-08 16:53:58 +02:00
regex: simplify AST analysis a bit
The verbatim literal stuff hasn't been used for a while and I don't foresee it being used. If it's really needed, it would probably better to just implement it by looking at the pattern string itself, which avoids parsing it into an AST altogether.
This commit is contained in:
parent
d9bd261be8
commit
51480d57a6
@ -1,17 +1,13 @@
|
||||
use regex_syntax::ast::parse::Parser;
|
||||
use regex_syntax::ast::{self, Ast};
|
||||
|
||||
/// The results of analyzing AST of a regular expression (e.g., for supporting
|
||||
/// smart case).
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AstAnalysis {
|
||||
pub(crate) struct AstAnalysis {
|
||||
/// True if and only if a literal uppercase character occurs in the regex.
|
||||
any_uppercase: bool,
|
||||
/// True if and only if the regex contains any literal at all.
|
||||
any_literal: bool,
|
||||
/// True if and only if the regex consists entirely of a literal and no
|
||||
/// other special regex characters.
|
||||
all_verbatim_literal: bool,
|
||||
}
|
||||
|
||||
impl AstAnalysis {
|
||||
@ -19,16 +15,16 @@ impl AstAnalysis {
|
||||
///
|
||||
/// If `pattern` is not a valid regular expression, then `None` is
|
||||
/// returned.
|
||||
#[allow(dead_code)]
|
||||
pub fn from_pattern(pattern: &str) -> Option<AstAnalysis> {
|
||||
Parser::new()
|
||||
#[cfg(test)]
|
||||
pub(crate) fn from_pattern(pattern: &str) -> Option<AstAnalysis> {
|
||||
regex_syntax::ast::parse::Parser::new()
|
||||
.parse(pattern)
|
||||
.map(|ast| AstAnalysis::from_ast(&ast))
|
||||
.ok()
|
||||
}
|
||||
|
||||
/// Perform an AST analysis given the AST.
|
||||
pub fn from_ast(ast: &Ast) -> AstAnalysis {
|
||||
pub(crate) fn from_ast(ast: &Ast) -> AstAnalysis {
|
||||
let mut analysis = AstAnalysis::new();
|
||||
analysis.from_ast_impl(ast);
|
||||
analysis
|
||||
@ -40,7 +36,7 @@ impl AstAnalysis {
|
||||
/// For example, a pattern like `\pL` contains no uppercase literals,
|
||||
/// even though `L` is uppercase and the `\pL` class contains uppercase
|
||||
/// characters.
|
||||
pub fn any_uppercase(&self) -> bool {
|
||||
pub(crate) fn any_uppercase(&self) -> bool {
|
||||
self.any_uppercase
|
||||
}
|
||||
|
||||
@ -48,32 +44,13 @@ impl AstAnalysis {
|
||||
///
|
||||
/// For example, a pattern like `\pL` reports `false`, but a pattern like
|
||||
/// `\pLfoo` reports `true`.
|
||||
pub fn any_literal(&self) -> bool {
|
||||
pub(crate) fn any_literal(&self) -> bool {
|
||||
self.any_literal
|
||||
}
|
||||
|
||||
/// Returns true if and only if the entire pattern is a verbatim literal
|
||||
/// with no special meta characters.
|
||||
///
|
||||
/// When this is true, then the pattern satisfies the following law:
|
||||
/// `escape(pattern) == pattern`. Notable examples where this returns
|
||||
/// `false` include patterns like `a\u0061` even though `\u0061` is just
|
||||
/// a literal `a`.
|
||||
///
|
||||
/// The purpose of this flag is to determine whether the patterns can be
|
||||
/// given to non-regex substring search algorithms as-is.
|
||||
#[allow(dead_code)]
|
||||
pub fn all_verbatim_literal(&self) -> bool {
|
||||
self.all_verbatim_literal
|
||||
}
|
||||
|
||||
/// Creates a new `AstAnalysis` value with an initial configuration.
|
||||
fn new() -> AstAnalysis {
|
||||
AstAnalysis {
|
||||
any_uppercase: false,
|
||||
any_literal: false,
|
||||
all_verbatim_literal: true,
|
||||
}
|
||||
AstAnalysis { any_uppercase: false, any_literal: false }
|
||||
}
|
||||
|
||||
fn from_ast_impl(&mut self, ast: &Ast) {
|
||||
@ -86,26 +63,20 @@ impl AstAnalysis {
|
||||
| Ast::Dot(_)
|
||||
| Ast::Assertion(_)
|
||||
| Ast::Class(ast::Class::Unicode(_))
|
||||
| Ast::Class(ast::Class::Perl(_)) => {
|
||||
self.all_verbatim_literal = false;
|
||||
}
|
||||
| Ast::Class(ast::Class::Perl(_)) => {}
|
||||
Ast::Literal(ref x) => {
|
||||
self.from_ast_literal(x);
|
||||
}
|
||||
Ast::Class(ast::Class::Bracketed(ref x)) => {
|
||||
self.all_verbatim_literal = false;
|
||||
self.from_ast_class_set(&x.kind);
|
||||
}
|
||||
Ast::Repetition(ref x) => {
|
||||
self.all_verbatim_literal = false;
|
||||
self.from_ast_impl(&x.ast);
|
||||
}
|
||||
Ast::Group(ref x) => {
|
||||
self.all_verbatim_literal = false;
|
||||
self.from_ast_impl(&x.ast);
|
||||
}
|
||||
Ast::Alternation(ref alt) => {
|
||||
self.all_verbatim_literal = false;
|
||||
for x in &alt.asts {
|
||||
self.from_ast_impl(x);
|
||||
}
|
||||
@ -161,9 +132,6 @@ impl AstAnalysis {
|
||||
}
|
||||
|
||||
fn from_ast_literal(&mut self, ast: &ast::Literal) {
|
||||
if ast.kind != ast::LiteralKind::Verbatim {
|
||||
self.all_verbatim_literal = false;
|
||||
}
|
||||
self.any_literal = true;
|
||||
self.any_uppercase = self.any_uppercase || ast.c.is_uppercase();
|
||||
}
|
||||
@ -171,7 +139,7 @@ impl AstAnalysis {
|
||||
/// Returns true if and only if the attributes can never change no matter
|
||||
/// what other AST it might see.
|
||||
fn done(&self) -> bool {
|
||||
self.any_uppercase && self.any_literal && !self.all_verbatim_literal
|
||||
self.any_uppercase && self.any_literal
|
||||
}
|
||||
}
|
||||
|
||||
@ -188,76 +156,61 @@ mod tests {
|
||||
let x = analysis("");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(!x.any_literal);
|
||||
assert!(x.all_verbatim_literal);
|
||||
|
||||
let x = analysis("foo");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(x.all_verbatim_literal);
|
||||
|
||||
let x = analysis("Foo");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(x.all_verbatim_literal);
|
||||
|
||||
let x = analysis("foO");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo\\");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo\w");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo\S");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo\p{Ll}");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo[a-z]");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo[A-Z]");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo[\S\t]");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo\\S");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"\p{Ll}");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(!x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"aBc\w");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"a\u0061");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user