mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-05-19 09:40:22 -07:00
libripgrep is not any one library, but rather, a collection of libraries that roughly separate the following key distinct phases in a grep implementation: 1. Pattern matching (e.g., by a regex engine). 2. Searching a file using a pattern matcher. 3. Printing results. Ultimately, both (1) and (3) are defined by de-coupled interfaces, of which there may be multiple implementations. Namely, (1) is satisfied by the `Matcher` trait in the `grep-matcher` crate and (3) is satisfied by the `Sink` trait in the `grep2` crate. The searcher (2) ties everything together and finds results using a matcher and reports those results using a `Sink` implementation.
264 lines
8.0 KiB
Rust
264 lines
8.0 KiB
Rust
use regex_syntax::ast::{self, Ast};
|
|
use regex_syntax::ast::parse::Parser;
|
|
|
|
/// The results of analyzing AST of a regular expression (e.g., for supporting
|
|
/// smart case).
|
|
#[derive(Clone, Debug)]
|
|
pub struct AstAnalysis {
|
|
/// True if and only if a literal uppercase character occurs in the regex.
|
|
any_uppercase: bool,
|
|
/// True if and only if the regex contains any literal at all.
|
|
any_literal: bool,
|
|
/// True if and only if the regex consists entirely of a literal and no
|
|
/// other special regex characters.
|
|
all_verbatim_literal: bool,
|
|
}
|
|
|
|
impl AstAnalysis {
|
|
/// Returns a `AstAnalysis` value by doing analysis on the AST of `pattern`.
|
|
///
|
|
/// If `pattern` is not a valid regular expression, then `None` is
|
|
/// returned.
|
|
#[allow(dead_code)]
|
|
pub fn from_pattern(pattern: &str) -> Option<AstAnalysis> {
|
|
Parser::new()
|
|
.parse(pattern)
|
|
.map(|ast| AstAnalysis::from_ast(&ast))
|
|
.ok()
|
|
}
|
|
|
|
/// Perform an AST analysis given the AST.
|
|
pub fn from_ast(ast: &Ast) -> AstAnalysis {
|
|
let mut analysis = AstAnalysis::new();
|
|
analysis.from_ast_impl(ast);
|
|
analysis
|
|
}
|
|
|
|
/// Returns true if and only if a literal uppercase character occurs in
|
|
/// the pattern.
|
|
///
|
|
/// For example, a pattern like `\pL` contains no uppercase literals,
|
|
/// even though `L` is uppercase and the `\pL` class contains uppercase
|
|
/// characters.
|
|
pub fn any_uppercase(&self) -> bool {
|
|
self.any_uppercase
|
|
}
|
|
|
|
/// Returns true if and only if the regex contains any literal at all.
|
|
///
|
|
/// For example, a pattern like `\pL` reports `false`, but a pattern like
|
|
/// `\pLfoo` reports `true`.
|
|
pub fn any_literal(&self) -> bool {
|
|
self.any_literal
|
|
}
|
|
|
|
/// Returns true if and only if the entire pattern is a verbatim literal
|
|
/// with no special meta characters.
|
|
///
|
|
/// When this is true, then the pattern satisfies the following law:
|
|
/// `escape(pattern) == pattern`. Notable examples where this returns
|
|
/// `false` include patterns like `a\u0061` even though `\u0061` is just
|
|
/// a literal `a`.
|
|
///
|
|
/// The purpose of this flag is to determine whether the patterns can be
|
|
/// given to non-regex substring search algorithms as-is.
|
|
#[allow(dead_code)]
|
|
pub fn all_verbatim_literal(&self) -> bool {
|
|
self.all_verbatim_literal
|
|
}
|
|
|
|
/// Creates a new `AstAnalysis` value with an initial configuration.
|
|
fn new() -> AstAnalysis {
|
|
AstAnalysis {
|
|
any_uppercase: false,
|
|
any_literal: false,
|
|
all_verbatim_literal: true,
|
|
}
|
|
}
|
|
|
|
fn from_ast_impl(&mut self, ast: &Ast) {
|
|
if self.done() {
|
|
return;
|
|
}
|
|
match *ast {
|
|
Ast::Empty(_) => {}
|
|
Ast::Flags(_)
|
|
| Ast::Dot(_)
|
|
| Ast::Assertion(_)
|
|
| Ast::Class(ast::Class::Unicode(_))
|
|
| Ast::Class(ast::Class::Perl(_)) => {
|
|
self.all_verbatim_literal = false;
|
|
}
|
|
Ast::Literal(ref x) => {
|
|
self.from_ast_literal(x);
|
|
}
|
|
Ast::Class(ast::Class::Bracketed(ref x)) => {
|
|
self.all_verbatim_literal = false;
|
|
self.from_ast_class_set(&x.kind);
|
|
}
|
|
Ast::Repetition(ref x) => {
|
|
self.all_verbatim_literal = false;
|
|
self.from_ast_impl(&x.ast);
|
|
}
|
|
Ast::Group(ref x) => {
|
|
self.all_verbatim_literal = false;
|
|
self.from_ast_impl(&x.ast);
|
|
}
|
|
Ast::Alternation(ref alt) => {
|
|
self.all_verbatim_literal = false;
|
|
for x in &alt.asts {
|
|
self.from_ast_impl(x);
|
|
}
|
|
}
|
|
Ast::Concat(ref alt) => {
|
|
for x in &alt.asts {
|
|
self.from_ast_impl(x);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn from_ast_class_set(&mut self, ast: &ast::ClassSet) {
|
|
if self.done() {
|
|
return;
|
|
}
|
|
match *ast {
|
|
ast::ClassSet::Item(ref item) => {
|
|
self.from_ast_class_set_item(item);
|
|
}
|
|
ast::ClassSet::BinaryOp(ref x) => {
|
|
self.from_ast_class_set(&x.lhs);
|
|
self.from_ast_class_set(&x.rhs);
|
|
}
|
|
}
|
|
}
|
|
|
|
fn from_ast_class_set_item(&mut self, ast: &ast::ClassSetItem) {
|
|
if self.done() {
|
|
return;
|
|
}
|
|
match *ast {
|
|
ast::ClassSetItem::Empty(_)
|
|
| ast::ClassSetItem::Ascii(_)
|
|
| ast::ClassSetItem::Unicode(_)
|
|
| ast::ClassSetItem::Perl(_) => {}
|
|
ast::ClassSetItem::Literal(ref x) => {
|
|
self.from_ast_literal(x);
|
|
}
|
|
ast::ClassSetItem::Range(ref x) => {
|
|
self.from_ast_literal(&x.start);
|
|
self.from_ast_literal(&x.end);
|
|
}
|
|
ast::ClassSetItem::Bracketed(ref x) => {
|
|
self.from_ast_class_set(&x.kind);
|
|
}
|
|
ast::ClassSetItem::Union(ref union) => {
|
|
for x in &union.items {
|
|
self.from_ast_class_set_item(x);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn from_ast_literal(&mut self, ast: &ast::Literal) {
|
|
if ast.kind != ast::LiteralKind::Verbatim {
|
|
self.all_verbatim_literal = false;
|
|
}
|
|
self.any_literal = true;
|
|
self.any_uppercase = self.any_uppercase || ast.c.is_uppercase();
|
|
}
|
|
|
|
/// Returns true if and only if the attributes can never change no matter
|
|
/// what other AST it might see.
|
|
fn done(&self) -> bool {
|
|
self.any_uppercase && self.any_literal && !self.all_verbatim_literal
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn analysis(pattern: &str) -> AstAnalysis {
|
|
AstAnalysis::from_pattern(pattern).unwrap()
|
|
}
|
|
|
|
#[test]
|
|
fn various() {
|
|
let x = analysis("");
|
|
assert!(!x.any_uppercase);
|
|
assert!(!x.any_literal);
|
|
assert!(x.all_verbatim_literal);
|
|
|
|
let x = analysis("foo");
|
|
assert!(!x.any_uppercase);
|
|
assert!(x.any_literal);
|
|
assert!(x.all_verbatim_literal);
|
|
|
|
let x = analysis("Foo");
|
|
assert!(x.any_uppercase);
|
|
assert!(x.any_literal);
|
|
assert!(x.all_verbatim_literal);
|
|
|
|
let x = analysis("foO");
|
|
assert!(x.any_uppercase);
|
|
assert!(x.any_literal);
|
|
assert!(x.all_verbatim_literal);
|
|
|
|
let x = analysis(r"foo\\");
|
|
assert!(!x.any_uppercase);
|
|
assert!(x.any_literal);
|
|
assert!(!x.all_verbatim_literal);
|
|
|
|
let x = analysis(r"foo\w");
|
|
assert!(!x.any_uppercase);
|
|
assert!(x.any_literal);
|
|
assert!(!x.all_verbatim_literal);
|
|
|
|
let x = analysis(r"foo\S");
|
|
assert!(!x.any_uppercase);
|
|
assert!(x.any_literal);
|
|
assert!(!x.all_verbatim_literal);
|
|
|
|
let x = analysis(r"foo\p{Ll}");
|
|
assert!(!x.any_uppercase);
|
|
assert!(x.any_literal);
|
|
assert!(!x.all_verbatim_literal);
|
|
|
|
let x = analysis(r"foo[a-z]");
|
|
assert!(!x.any_uppercase);
|
|
assert!(x.any_literal);
|
|
assert!(!x.all_verbatim_literal);
|
|
|
|
let x = analysis(r"foo[A-Z]");
|
|
assert!(x.any_uppercase);
|
|
assert!(x.any_literal);
|
|
assert!(!x.all_verbatim_literal);
|
|
|
|
let x = analysis(r"foo[\S\t]");
|
|
assert!(!x.any_uppercase);
|
|
assert!(x.any_literal);
|
|
assert!(!x.all_verbatim_literal);
|
|
|
|
let x = analysis(r"foo\\S");
|
|
assert!(x.any_uppercase);
|
|
assert!(x.any_literal);
|
|
assert!(!x.all_verbatim_literal);
|
|
|
|
let x = analysis(r"\p{Ll}");
|
|
assert!(!x.any_uppercase);
|
|
assert!(!x.any_literal);
|
|
assert!(!x.all_verbatim_literal);
|
|
|
|
let x = analysis(r"aBc\w");
|
|
assert!(x.any_uppercase);
|
|
assert!(x.any_literal);
|
|
assert!(!x.all_verbatim_literal);
|
|
|
|
let x = analysis(r"a\u0061");
|
|
assert!(!x.any_uppercase);
|
|
assert!(x.any_literal);
|
|
assert!(!x.all_verbatim_literal);
|
|
}
|
|
}
|