From 51480d57a67c229d624e5d1edb4aa3782e883696 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 17 Jun 2023 10:00:07 -0400 Subject: [PATCH] regex: simplify AST analysis a bit The verbatim literal stuff hasn't been used for a while and I don't foresee it being used. If it's really needed, it would probably better to just implement it by looking at the pattern string itself, which avoids parsing it into an AST altogether. --- crates/regex/src/ast.rs | 67 ++++++----------------------------------- 1 file changed, 10 insertions(+), 57 deletions(-) diff --git a/crates/regex/src/ast.rs b/crates/regex/src/ast.rs index a4979ded..4d170565 100644 --- a/crates/regex/src/ast.rs +++ b/crates/regex/src/ast.rs @@ -1,17 +1,13 @@ -use regex_syntax::ast::parse::Parser; use regex_syntax::ast::{self, Ast}; /// The results of analyzing AST of a regular expression (e.g., for supporting /// smart case). #[derive(Clone, Debug)] -pub struct AstAnalysis { +pub(crate) struct AstAnalysis { /// True if and only if a literal uppercase character occurs in the regex. any_uppercase: bool, /// True if and only if the regex contains any literal at all. any_literal: bool, - /// True if and only if the regex consists entirely of a literal and no - /// other special regex characters. - all_verbatim_literal: bool, } impl AstAnalysis { @@ -19,16 +15,16 @@ impl AstAnalysis { /// /// If `pattern` is not a valid regular expression, then `None` is /// returned. - #[allow(dead_code)] - pub fn from_pattern(pattern: &str) -> Option { - Parser::new() + #[cfg(test)] + pub(crate) fn from_pattern(pattern: &str) -> Option { + regex_syntax::ast::parse::Parser::new() .parse(pattern) .map(|ast| AstAnalysis::from_ast(&ast)) .ok() } /// Perform an AST analysis given the AST. - pub fn from_ast(ast: &Ast) -> AstAnalysis { + pub(crate) fn from_ast(ast: &Ast) -> AstAnalysis { let mut analysis = AstAnalysis::new(); analysis.from_ast_impl(ast); analysis @@ -40,7 +36,7 @@ impl AstAnalysis { /// For example, a pattern like `\pL` contains no uppercase literals, /// even though `L` is uppercase and the `\pL` class contains uppercase /// characters. - pub fn any_uppercase(&self) -> bool { + pub(crate) fn any_uppercase(&self) -> bool { self.any_uppercase } @@ -48,32 +44,13 @@ impl AstAnalysis { /// /// For example, a pattern like `\pL` reports `false`, but a pattern like /// `\pLfoo` reports `true`. - pub fn any_literal(&self) -> bool { + pub(crate) fn any_literal(&self) -> bool { self.any_literal } - /// Returns true if and only if the entire pattern is a verbatim literal - /// with no special meta characters. - /// - /// When this is true, then the pattern satisfies the following law: - /// `escape(pattern) == pattern`. Notable examples where this returns - /// `false` include patterns like `a\u0061` even though `\u0061` is just - /// a literal `a`. - /// - /// The purpose of this flag is to determine whether the patterns can be - /// given to non-regex substring search algorithms as-is. - #[allow(dead_code)] - pub fn all_verbatim_literal(&self) -> bool { - self.all_verbatim_literal - } - /// Creates a new `AstAnalysis` value with an initial configuration. fn new() -> AstAnalysis { - AstAnalysis { - any_uppercase: false, - any_literal: false, - all_verbatim_literal: true, - } + AstAnalysis { any_uppercase: false, any_literal: false } } fn from_ast_impl(&mut self, ast: &Ast) { @@ -86,26 +63,20 @@ impl AstAnalysis { | Ast::Dot(_) | Ast::Assertion(_) | Ast::Class(ast::Class::Unicode(_)) - | Ast::Class(ast::Class::Perl(_)) => { - self.all_verbatim_literal = false; - } + | Ast::Class(ast::Class::Perl(_)) => {} Ast::Literal(ref x) => { self.from_ast_literal(x); } Ast::Class(ast::Class::Bracketed(ref x)) => { - self.all_verbatim_literal = false; self.from_ast_class_set(&x.kind); } Ast::Repetition(ref x) => { - self.all_verbatim_literal = false; self.from_ast_impl(&x.ast); } Ast::Group(ref x) => { - self.all_verbatim_literal = false; self.from_ast_impl(&x.ast); } Ast::Alternation(ref alt) => { - self.all_verbatim_literal = false; for x in &alt.asts { self.from_ast_impl(x); } @@ -161,9 +132,6 @@ impl AstAnalysis { } fn from_ast_literal(&mut self, ast: &ast::Literal) { - if ast.kind != ast::LiteralKind::Verbatim { - self.all_verbatim_literal = false; - } self.any_literal = true; self.any_uppercase = self.any_uppercase || ast.c.is_uppercase(); } @@ -171,7 +139,7 @@ impl AstAnalysis { /// Returns true if and only if the attributes can never change no matter /// what other AST it might see. fn done(&self) -> bool { - self.any_uppercase && self.any_literal && !self.all_verbatim_literal + self.any_uppercase && self.any_literal } } @@ -188,76 +156,61 @@ mod tests { let x = analysis(""); assert!(!x.any_uppercase); assert!(!x.any_literal); - assert!(x.all_verbatim_literal); let x = analysis("foo"); assert!(!x.any_uppercase); assert!(x.any_literal); - assert!(x.all_verbatim_literal); let x = analysis("Foo"); assert!(x.any_uppercase); assert!(x.any_literal); - assert!(x.all_verbatim_literal); let x = analysis("foO"); assert!(x.any_uppercase); assert!(x.any_literal); - assert!(x.all_verbatim_literal); let x = analysis(r"foo\\"); assert!(!x.any_uppercase); assert!(x.any_literal); - assert!(!x.all_verbatim_literal); let x = analysis(r"foo\w"); assert!(!x.any_uppercase); assert!(x.any_literal); - assert!(!x.all_verbatim_literal); let x = analysis(r"foo\S"); assert!(!x.any_uppercase); assert!(x.any_literal); - assert!(!x.all_verbatim_literal); let x = analysis(r"foo\p{Ll}"); assert!(!x.any_uppercase); assert!(x.any_literal); - assert!(!x.all_verbatim_literal); let x = analysis(r"foo[a-z]"); assert!(!x.any_uppercase); assert!(x.any_literal); - assert!(!x.all_verbatim_literal); let x = analysis(r"foo[A-Z]"); assert!(x.any_uppercase); assert!(x.any_literal); - assert!(!x.all_verbatim_literal); let x = analysis(r"foo[\S\t]"); assert!(!x.any_uppercase); assert!(x.any_literal); - assert!(!x.all_verbatim_literal); let x = analysis(r"foo\\S"); assert!(x.any_uppercase); assert!(x.any_literal); - assert!(!x.all_verbatim_literal); let x = analysis(r"\p{Ll}"); assert!(!x.any_uppercase); assert!(!x.any_literal); - assert!(!x.all_verbatim_literal); let x = analysis(r"aBc\w"); assert!(x.any_uppercase); assert!(x.any_literal); - assert!(!x.all_verbatim_literal); let x = analysis(r"a\u0061"); assert!(!x.any_uppercase); assert!(x.any_literal); - assert!(!x.all_verbatim_literal); } }