regex: s/locations/captures

Now that we use regex-automata, we no longer use any type with
"locations" in it. Instead, that's mostly legacy from the top-level
regex crate.
This commit is contained in:
Andrew Gallant 2023-06-17 10:04:54 -04:00
parent 51480d57a6
commit a6dbff502f
3 changed files with 23 additions and 24 deletions

View File

@ -14,7 +14,6 @@ impl Error {
} }
pub(crate) fn regex(err: regex_automata::meta::BuildError) -> Error { pub(crate) fn regex(err: regex_automata::meta::BuildError) -> Error {
// Error { kind: ErrorKind::Regex(err.to_string()) }
if let Some(size_limit) = err.size_limit() { if let Some(size_limit) = err.size_limit() {
let kind = ErrorKind::Regex(format!( let kind = ErrorKind::Regex(format!(
"compiled regex exceeds size limit of {size_limit}", "compiled regex exceeds size limit of {size_limit}",
@ -66,7 +65,7 @@ pub enum ErrorKind {
impl std::error::Error for Error {} impl std::error::Error for Error {}
impl std::fmt::Display for Error { impl std::fmt::Display for Error {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
use bstr::ByteSlice; use bstr::ByteSlice;
match self.kind { match self.kind {

View File

@ -791,7 +791,7 @@ impl Matcher for StandardMatcher {
caps: &mut RegexCaptures, caps: &mut RegexCaptures,
) -> Result<bool, NoError> { ) -> Result<bool, NoError> {
let input = Input::new(haystack).span(at..haystack.len()); let input = Input::new(haystack).span(at..haystack.len());
let caps = caps.locations_mut(); let caps = caps.captures_mut();
self.regex.search_captures(&input, caps); self.regex.search_captures(&input, caps);
Ok(caps.is_match()) Ok(caps.is_match())
} }
@ -830,8 +830,8 @@ enum RegexCapturesImp {
mat: Option<Match>, mat: Option<Match>,
}, },
Regex { Regex {
/// Where the locations are stored. /// Where the captures are stored.
locs: AutomataCaptures, caps: AutomataCaptures,
/// These captures behave as if the capturing groups begin at the given /// These captures behave as if the capturing groups begin at the given
/// offset. When set to `0`, this has no affect and capture groups are /// offset. When set to `0`, this has no affect and capture groups are
/// indexed like normal. /// indexed like normal.
@ -850,8 +850,8 @@ impl Captures for RegexCaptures {
fn len(&self) -> usize { fn len(&self) -> usize {
match self.0 { match self.0 {
RegexCapturesImp::AhoCorasick { .. } => 1, RegexCapturesImp::AhoCorasick { .. } => 1,
RegexCapturesImp::Regex { ref locs, offset, .. } => { RegexCapturesImp::Regex { ref caps, offset, .. } => {
locs.group_info().all_group_len().checked_sub(offset).unwrap() caps.group_info().all_group_len().checked_sub(offset).unwrap()
} }
} }
} }
@ -865,9 +865,9 @@ impl Captures for RegexCaptures {
None None
} }
} }
RegexCapturesImp::Regex { ref locs, offset } => { RegexCapturesImp::Regex { ref caps, offset } => {
let actual = i.checked_add(offset).unwrap(); let actual = i.checked_add(offset).unwrap();
locs.get_group(actual).map(|sp| Match::new(sp.start, sp.end)) caps.get_group(actual).map(|sp| Match::new(sp.start, sp.end))
} }
} }
} }
@ -878,23 +878,23 @@ impl RegexCaptures {
RegexCaptures(RegexCapturesImp::AhoCorasick { mat: None }) RegexCaptures(RegexCapturesImp::AhoCorasick { mat: None })
} }
pub(crate) fn new(locs: AutomataCaptures) -> RegexCaptures { pub(crate) fn new(caps: AutomataCaptures) -> RegexCaptures {
RegexCaptures::with_offset(locs, 0) RegexCaptures::with_offset(caps, 0)
} }
pub(crate) fn with_offset( pub(crate) fn with_offset(
locs: AutomataCaptures, caps: AutomataCaptures,
offset: usize, offset: usize,
) -> RegexCaptures { ) -> RegexCaptures {
RegexCaptures(RegexCapturesImp::Regex { locs, offset }) RegexCaptures(RegexCapturesImp::Regex { caps, offset })
} }
pub(crate) fn locations_mut(&mut self) -> &mut AutomataCaptures { pub(crate) fn captures_mut(&mut self) -> &mut AutomataCaptures {
match self.0 { match self.0 {
RegexCapturesImp::AhoCorasick { .. } => { RegexCapturesImp::AhoCorasick { .. } => {
panic!("getting locations for simple captures is invalid") panic!("getting captures for multi-literal matcher is invalid")
} }
RegexCapturesImp::Regex { ref mut locs, .. } => locs, RegexCapturesImp::Regex { ref mut caps, .. } => caps,
} }
} }

View File

@ -22,21 +22,21 @@ pub struct WordMatcher {
original: Regex, original: Regex,
/// A map from capture group name to capture group index. /// A map from capture group name to capture group index.
names: HashMap<String, usize>, names: HashMap<String, usize>,
/// A reusable buffer for finding the match location of the inner group. /// A reusable buffer for finding the match offset of the inner group.
locs: Arc<ThreadLocal<RefCell<Captures>>>, caps: Arc<ThreadLocal<RefCell<Captures>>>,
} }
impl Clone for WordMatcher { impl Clone for WordMatcher {
fn clone(&self) -> WordMatcher { fn clone(&self) -> WordMatcher {
// We implement Clone manually so that we get a fresh ThreadLocal such // We implement Clone manually so that we get a fresh ThreadLocal such
// that it can set its own thread owner. This permits each thread // that it can set its own thread owner. This permits each thread
// usings `locs` to hit the fast path. // usings `caps` to hit the fast path.
WordMatcher { WordMatcher {
regex: self.regex.clone(), regex: self.regex.clone(),
pattern: self.pattern.clone(), pattern: self.pattern.clone(),
original: self.original.clone(), original: self.original.clone(),
names: self.names.clone(), names: self.names.clone(),
locs: Arc::new(ThreadLocal::new()), caps: Arc::new(ThreadLocal::new()),
} }
} }
} }
@ -57,7 +57,7 @@ impl WordMatcher {
})?; })?;
let regex = word_expr.regex()?; let regex = word_expr.regex()?;
let pattern = word_expr.pattern(); let pattern = word_expr.pattern();
let locs = Arc::new(ThreadLocal::new()); let caps = Arc::new(ThreadLocal::new());
let mut names = HashMap::new(); let mut names = HashMap::new();
let it = regex.group_info().pattern_names(PatternID::ZERO); let it = regex.group_info().pattern_names(PatternID::ZERO);
@ -66,7 +66,7 @@ impl WordMatcher {
names.insert(name.to_string(), i.checked_sub(1).unwrap()); names.insert(name.to_string(), i.checked_sub(1).unwrap());
} }
} }
Ok(WordMatcher { regex, pattern, original, names, locs }) Ok(WordMatcher { regex, pattern, original, names, caps })
} }
/// Return the underlying pattern string for the regex used by this /// Return the underlying pattern string for the regex used by this
@ -161,7 +161,7 @@ impl Matcher for WordMatcher {
} }
let cell = let cell =
self.locs.get_or(|| RefCell::new(self.regex.create_captures())); self.caps.get_or(|| RefCell::new(self.regex.create_captures()));
let input = Input::new(haystack).span(at..haystack.len()); let input = Input::new(haystack).span(at..haystack.len());
let mut caps = cell.borrow_mut(); let mut caps = cell.borrow_mut();
self.regex.search_captures(&input, &mut caps); self.regex.search_captures(&input, &mut caps);
@ -187,7 +187,7 @@ impl Matcher for WordMatcher {
caps: &mut RegexCaptures, caps: &mut RegexCaptures,
) -> Result<bool, NoError> { ) -> Result<bool, NoError> {
let input = Input::new(haystack).span(at..haystack.len()); let input = Input::new(haystack).span(at..haystack.len());
let caps = caps.locations_mut(); let caps = caps.captures_mut();
self.regex.search_captures(&input, caps); self.regex.search_captures(&input, caps);
Ok(caps.is_match()) Ok(caps.is_match())
} }