mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-08-05 14:42:07 -07:00
regex: refactor matcher construction
This does a little bit of refactoring so that we can pass both a ConfiguredHIR and a Regex to the inner literal extraction routine. One downside of this approach is that a regex object hangs on to a ConfiguredHIR. But the extra memory usage is probably negligible. A benefit though is that converting the HIR to its concrete syntax is now lazy and only happens when logging is enabled.
This commit is contained in:
@@ -8,8 +8,8 @@ use {
|
||||
};
|
||||
|
||||
use crate::{
|
||||
ast::AstAnalysis, error::Error, literal::LiteralSets,
|
||||
non_matching::non_matching_bytes, strip::strip_from_match,
|
||||
ast::AstAnalysis, error::Error, non_matching::non_matching_bytes,
|
||||
strip::strip_from_match,
|
||||
};
|
||||
|
||||
/// Config represents the configuration of a regex matcher in this crate.
|
||||
@@ -228,6 +228,11 @@ impl ConfiguredHIR {
|
||||
&self.config
|
||||
}
|
||||
|
||||
/// Return a reference to the underyling HIR.
|
||||
pub(crate) fn hir(&self) -> &Hir {
|
||||
&self.hir
|
||||
}
|
||||
|
||||
/// Convert this HIR to a regex that can be used for matching.
|
||||
pub(crate) fn to_regex(&self) -> Result<Regex, Error> {
|
||||
let meta = Regex::config()
|
||||
@@ -240,8 +245,8 @@ impl ConfiguredHIR {
|
||||
// Same deal here. The default limit for full DFAs is VERY small,
|
||||
// but with ripgrep we can afford to spend a bit more time on
|
||||
// building them I think.
|
||||
.dfa_size_limit(Some(10 * (1 << 20)))
|
||||
.dfa_state_limit(Some(10_000))
|
||||
.dfa_size_limit(Some(1 * (1 << 20)))
|
||||
.dfa_state_limit(Some(1_000))
|
||||
.hybrid_cache_capacity(self.config.dfa_size_limit);
|
||||
Regex::builder()
|
||||
.configure(meta)
|
||||
@@ -249,31 +254,6 @@ impl ConfiguredHIR {
|
||||
.map_err(Error::regex)
|
||||
}
|
||||
|
||||
/// Convert this HIR to its concrete syntax.
|
||||
pub(crate) fn to_pattern(&self) -> String {
|
||||
self.hir.to_string()
|
||||
}
|
||||
|
||||
/// Attempt to extract a "fast" regex that can be used for quickly finding
|
||||
/// candidates lines for a match.
|
||||
///
|
||||
/// If no line terminator was configured, then this always returns
|
||||
/// `Ok(None)`. If a line terminator is configured, then this may return a
|
||||
/// regex.
|
||||
pub(crate) fn to_fast_line_regex(&self) -> Result<Option<Regex>, Error> {
|
||||
if self.config.line_terminator.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
match LiteralSets::new(&self.hir).one_regex(self.config.word) {
|
||||
None => Ok(None),
|
||||
Some(pattern) => {
|
||||
let config = self.config.clone();
|
||||
let chir = ConfiguredHIR::new(config, &[pattern])?;
|
||||
Ok(Some(chir.to_regex()?))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the set of non-matching bytes for this HIR expression.
|
||||
pub(crate) fn non_matching_bytes(&self) -> ByteSet {
|
||||
non_matching_bytes(&self.hir)
|
||||
|
Reference in New Issue
Block a user