From 04dde9a4eb5bbf6856e0a4ad16a6a9e3f1dd38ab Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Tue, 20 Jun 2023 08:09:23 -0400 Subject: [PATCH] regex: tweak DFA settings This increases the limits a bit for when the regex engine will build and use a fully compiled DFA. They can faster in some circumstances. For example, '(?-u)^\w{30,}$' gets a nice speed boost from state acceleration. We are also able to remove `regex` proper as a dependency. Wow. --- Cargo.lock | 1 - crates/regex/Cargo.toml | 3 +-- crates/regex/src/config.rs | 9 +++++++++ 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aa01606f..cd03344e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -200,7 +200,6 @@ dependencies = [ "bstr", "grep-matcher", "log", - "regex", "regex-automata 0.3.0", "regex-syntax", ] diff --git a/crates/regex/Cargo.toml b/crates/regex/Cargo.toml index 8aece183..039f0b09 100644 --- a/crates/regex/Cargo.toml +++ b/crates/regex/Cargo.toml @@ -18,6 +18,5 @@ aho-corasick = "1.0.2" bstr = "1.5.0" grep-matcher = { version = "0.1.6", path = "../matcher" } log = "0.4.19" -regex = "1.8.3" -regex-automata = { version = "0.3.0" } +regex-automata = "0.3.0" regex-syntax = "0.7.2" diff --git a/crates/regex/src/config.rs b/crates/regex/src/config.rs index 732cda70..d767def6 100644 --- a/crates/regex/src/config.rs +++ b/crates/regex/src/config.rs @@ -233,6 +233,15 @@ impl ConfiguredHIR { let meta = Regex::config() .utf8_empty(false) .nfa_size_limit(Some(self.config.size_limit)) + // We don't expose a knob for this because the one-pass DFA is + // usually not a perf bottleneck for ripgrep. But we give it some + // extra room than the default. + .onepass_size_limit(Some(10 * (1 << 20))) + // Same deal here. The default limit for full DFAs is VERY small, + // but with ripgrep we can afford to spend a bit more time on + // building them I think. + .dfa_size_limit(Some(10 * (1 << 20))) + .dfa_state_limit(Some(10_000)) .hybrid_cache_capacity(self.config.dfa_size_limit); Regex::builder() .configure(meta)