/*! An interface for regular expressions, with a focus on line oriented search. */ #![deny(missing_docs)] extern crate memchr; use std::fmt; use std::io; use std::ops; use std::u64; use interpolate::interpolate; mod interpolate; /// The type of a match. /// /// The type of a match is a possibly empty range pointing to a contiguous /// block of addressable memory. /// /// Every `Match` is guaranteed to satisfy the invariant that `start <= end`. /// /// # Indexing /// /// This type is structurally identical to `std::ops::Range<usize>`, but /// is a bit more ergonomic for dealing with match indices. In particular, /// this type implements `Copy` and provides methods for building new `Match` /// values based on old `Match` values. Finally, the invariant that `start` /// is always less than or equal to `end` is enforced. /// /// A `Match` can be used to slice a `&[u8]`, `&mut [u8]` or `&str` using /// range notation. e.g., /// /// ``` /// use grep_matcher::Match; /// /// let m = Match::new(2, 5); /// let bytes = b"abcdefghi"; /// assert_eq!(b"cde", &bytes[m]); /// ``` #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct Match { start: usize, end: usize, } impl Match { /// Create a new match. /// /// # Panics /// /// This function panics if `start > end`. #[inline] pub fn new(start: usize, end: usize) -> Match { assert!(start <= end); Match { start, end } } /// Creates a zero width match at the given offset. #[inline] pub fn zero(offset: usize) -> Match { Match { start: offset, end: offset } } /// Return the start offset of this match. #[inline] pub fn start(&self) -> usize { self.start } /// Return the end offset of this match. #[inline] pub fn end(&self) -> usize { self.end } /// Return a new match with the start offset replaced with the given /// value. /// /// # Panics /// /// This method panics if `start > self.end`. #[inline] pub fn with_start(&self, start: usize) -> Match { assert!(start <= self.end); Match { start, ..*self } } /// Return a new match with the end offset replaced with the given /// value. /// /// # Panics /// /// This method panics if `self.start > end`. #[inline] pub fn with_end(&self, end: usize) -> Match { assert!(self.start <= end); Match { end, ..*self } } /// Offset this match by the given amount and return a new match. /// /// This adds the given offset to the start and end of this match, and /// returns the resulting match. /// /// # Panics /// /// This panics if adding the given amount to either the start or end /// offset would result in an overflow. #[inline] pub fn offset(&self, amount: usize) -> Match { Match { start: self.start.checked_add(amount).unwrap(), end: self.end.checked_add(amount).unwrap(), } } /// Returns the number of bytes in this match. #[inline] pub fn len(&self) -> usize { self.end - self.start } /// Returns true if and only if this match is empty. #[inline] pub fn is_empty(&self) -> bool { self.len() == 0 } } impl ops::Index<Match> for [u8] { type Output = [u8]; #[inline] fn index(&self, index: Match) -> &[u8] { &self[index.start..index.end] } } impl ops::IndexMut<Match> for [u8] { #[inline] fn index_mut(&mut self, index: Match) -> &mut [u8] { &mut self[index.start..index.end] } } impl ops::Index<Match> for str { type Output = str; #[inline] fn index(&self, index: Match) -> &str { &self[index.start..index.end] } } /// A line terminator. /// /// A line terminator represents the end of a line. Generally, every line is /// either "terminated" by the end of a stream or a specific byte (or sequence /// of bytes). /// /// Generally, a line terminator is a single byte, specifically, `\n`, on /// Unix-like systems. On Windows, a line terminator is `\r\n` (referred to /// as `CRLF` for `Carriage Return; Line Feed`). /// /// The default line terminator is `\n` on all platforms. #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] pub struct LineTerminator(LineTerminatorImp); #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] enum LineTerminatorImp { /// Any single byte representing a line terminator. /// /// We represent this as an array so we can safely convert it to a slice /// for convenient access. At some point, we can use `std::slice::from_ref` /// instead. Byte([u8; 1]), /// A line terminator represented by `\r\n`. /// /// When this option is used, consumers may generally treat a lone `\n` as /// a line terminator in addition to `\r\n`. CRLF, } impl LineTerminator { /// Return a new single-byte line terminator. Any byte is valid. pub fn byte(byte: u8) -> LineTerminator { LineTerminator(LineTerminatorImp::Byte([byte])) } /// Return a new line terminator represented by `\r\n`. /// /// When this option is used, consumers may generally treat a lone `\n` as /// a line terminator in addition to `\r\n`. pub fn crlf() -> LineTerminator { LineTerminator(LineTerminatorImp::CRLF) } /// Returns true if and only if this line terminator is CRLF. pub fn is_crlf(&self) -> bool { self.0 == LineTerminatorImp::CRLF } /// Returns this line terminator as a single byte. /// /// If the line terminator is CRLF, then this returns `\n`. This is /// useful for routines that, for example, find line boundaries by treating /// `\n` as a line terminator even when it isn't preceded by `\r`. pub fn as_byte(&self) -> u8 { match self.0 { LineTerminatorImp::Byte(array) => array[0], LineTerminatorImp::CRLF => b'\n', } } /// Returns this line terminator as a sequence of bytes. /// /// This returns a singleton sequence for all line terminators except for /// `CRLF`, in which case, it returns `\r\n`. /// /// The slice returned is guaranteed to have length at least `1`. pub fn as_bytes(&self) -> &[u8] { match self.0 { LineTerminatorImp::Byte(ref array) => array, LineTerminatorImp::CRLF => &[b'\r', b'\n'], } } } impl Default for LineTerminator { fn default() -> LineTerminator { LineTerminator::byte(b'\n') } } /// A set of bytes. /// /// In this crate, byte sets are used to express bytes that can never appear /// anywhere in a match for a particular implementation of the `Matcher` trait. /// Specifically, if such a set can be determined, then it's possible for /// callers to perform additional operations on the basis that certain bytes /// may never match. /// /// For example, if a search is configured to possibly produce results that /// span multiple lines but a caller provided pattern can never match across /// multiple lines, then it may make sense to divert to more optimized line /// oriented routines that don't need to handle the multi-line match case. #[derive(Clone, Debug)] pub struct ByteSet(BitSet); #[derive(Clone, Copy)] struct BitSet([u64; 4]); impl fmt::Debug for BitSet { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { let mut fmtd = f.debug_set(); for b in (0..256).map(|b| b as u8) { if ByteSet(*self).contains(b) { fmtd.entry(&b); } } fmtd.finish() } } impl ByteSet { /// Create an empty set of bytes. pub fn empty() -> ByteSet { ByteSet(BitSet([0; 4])) } /// Create a full set of bytes such that every possible byte is in the set /// returned. pub fn full() -> ByteSet { ByteSet(BitSet([u64::MAX; 4])) } /// Add a byte to this set. /// /// If the given byte already belongs to this set, then this is a no-op. pub fn add(&mut self, byte: u8) { let bucket = byte / 64; let bit = byte % 64; (self.0).0[bucket as usize] |= 1 << bit; } /// Add an inclusive range of bytes. pub fn add_all(&mut self, start: u8, end: u8) { for b in (start as u64..end as u64 + 1).map(|b| b as u8) { self.add(b); } } /// Remove a byte from this set. /// /// If the given byte is not in this set, then this is a no-op. pub fn remove(&mut self, byte: u8) { let bucket = byte / 64; let bit = byte % 64; (self.0).0[bucket as usize] &= !(1 << bit); } /// Remove an inclusive range of bytes. pub fn remove_all(&mut self, start: u8, end: u8) { for b in (start as u64..end as u64 + 1).map(|b| b as u8) { self.remove(b); } } /// Return true if and only if the given byte is in this set. pub fn contains(&self, byte: u8) -> bool { let bucket = byte / 64; let bit = byte % 64; (self.0).0[bucket as usize] & (1 << bit) > 0 } } /// A trait that describes implementations of capturing groups. /// /// When a matcher supports capturing group extraction, then it is the /// matcher's responsibility to provide an implementation of this trait. /// /// Principally, this trait provides a way to access capturing groups /// in a uniform way that does not require any specific representation. /// Namely, differ matcher implementations may require different in-memory /// representations of capturing groups. This trait permits matchers to /// maintain their specific in-memory representation. /// /// Note that this trait explicitly does not provide a way to construct a new /// captures value. Instead, it is the responsibility of a `Matcher` to build /// one, which might require knowledge of the matcher's internal implementation /// details. pub trait Captures { /// Return the total number of capturing groups. This includes capturing /// groups that have not matched anything. fn len(&self) -> usize; /// Return the capturing group match at the given index. If no match of /// that capturing group exists, then this returns `None`. /// /// When a matcher reports a match with capturing groups, then the first /// capturing group (at index `0`) must always correspond to the offsets /// for the overall match. fn get(&self, i: usize) -> Option<Match>; /// Returns true if and only if these captures are empty. This occurs /// when `len` is `0`. /// /// Note that capturing groups that have non-zero length but otherwise /// contain no matching groups are *not* empty. fn is_empty(&self) -> bool { self.len() == 0 } /// Expands all instances of `$name` in `replacement` to the corresponding /// capture group `name`, and writes them to the `dst` buffer given. /// /// (Note: If you're looking for a convenient way to perform replacements /// with interpolation, then you'll want to use the `replace_with_captures` /// method on the `Matcher` trait.) /// /// `name` may be an integer corresponding to the index of the /// capture group (counted by order of opening parenthesis where `0` is the /// entire match) or it can be a name (consisting of letters, digits or /// underscores) corresponding to a named capture group. /// /// A `name` is translated to a capture group index via the given /// `name_to_index` function. If `name` isn't a valid capture group /// (whether the name doesn't exist or isn't a valid index), then it is /// replaced with the empty string. /// /// The longest possible name is used. e.g., `$1a` looks up the capture /// group named `1a` and not the capture group at index `1`. To exert /// more precise control over the name, use braces, e.g., `${1}a`. In all /// cases, capture group names are limited to ASCII letters, numbers and /// underscores. /// /// To write a literal `$` use `$$`. /// /// Note that the capture group match indices are resolved by slicing /// the given `haystack`. Generally, this means that `haystack` should be /// the same slice that was searched to get the current capture group /// matches. fn interpolate<F>( &self, name_to_index: F, haystack: &[u8], replacement: &[u8], dst: &mut Vec<u8>, ) where F: FnMut(&str) -> Option<usize> { interpolate( replacement, |i, dst| { if let Some(range) = self.get(i) { dst.extend(&haystack[range]); } }, name_to_index, dst, ) } } /// NoCaptures provides an always-empty implementation of the `Captures` trait. /// /// This type is useful for implementations of `Matcher` that don't support /// capturing groups. #[derive(Clone, Debug)] pub struct NoCaptures(()); impl NoCaptures { /// Create an empty set of capturing groups. pub fn new() -> NoCaptures { NoCaptures(()) } } impl Captures for NoCaptures { fn len(&self) -> usize { 0 } fn get(&self, _: usize) -> Option<Match> { None } } /// NoError provides an error type for matchers that never produce errors. /// /// This error type implements the `std::error::Error` and `fmt::Display` /// traits for use in matcher implementations that can never produce errors. /// /// The `fmt::Display` impl for this type panics. #[derive(Debug, Eq, PartialEq)] pub struct NoError(()); impl ::std::error::Error for NoError { fn description(&self) -> &str { "no error" } } impl fmt::Display for NoError { fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result { panic!("BUG for NoError: an impossible error occurred") } } impl From<NoError> for io::Error { fn from(_: NoError) -> io::Error { panic!("BUG for NoError: an impossible error occurred") } } /// The type of match for a line oriented matcher. #[derive(Clone, Copy, Debug)] pub enum LineMatchKind { /// A position inside a line that is known to contain a match. /// /// This position can be anywhere in the line. It does not need to point /// at the location of the match. Confirmed(usize), /// A position inside a line that may contain a match, and must be searched /// for verification. /// /// This position can be anywhere in the line. It does not need to point /// at the location of the match. Candidate(usize), } /// A matcher defines an interface for regular expression implementations. pub trait Matcher { /// The concrete type of capturing groups used for this matcher. /// /// If this implementation does not support capturing groups, then set /// this to `NoCaptures`. type Captures: Captures; /// The error type used by this matcher. /// /// For matchers in which an error is not possible, they are encouraged to /// use the `NoError` type in this crate. In the future, when the "never" /// (spelled `!`) type is stabilized, then it should probably be used /// instead. type Error: fmt::Display; /// Returns the start and end byte range of the first match in `haystack` /// after `at`, where the byte offsets are relative to that start of /// `haystack` (and not `at`). If no match exists, then `None` is returned. /// /// The text encoding of `haystack` is not strictly specified. Matchers are /// advised to assume UTF-8, or at worst, some ASCII compatible encoding. /// /// The significance of the starting point is that it takes the surrounding /// context into consideration. For example, the `\A` anchor can only /// match when `at == 0`. fn find_at( &self, haystack: &[u8], at: usize, ) -> Result<Option<Match>, Self::Error>; /// Creates an empty group of captures suitable for use with the capturing /// APIs of this trait. /// /// Implementations that don't support capturing groups should use /// the `NoCaptures` type and implement this method by calling /// `NoCaptures::new()`. fn new_captures(&self) -> Result<Self::Captures, Self::Error>; /// Returns the total number of capturing groups in this matcher. /// /// If a matcher supports capturing groups, then this value must always be /// at least 1, where the first capturing group always corresponds to the /// overall match. /// /// If a matcher does not support capturing groups, then this should /// always return 0. /// /// By default, capturing groups are not supported, so this always /// returns 0. fn capture_count(&self) -> usize { 0 } /// Maps the given capture group name to its corresponding capture group /// index, if one exists. If one does not exist, then `None` is returned. /// /// If the given capture group name maps to multiple indices, then it is /// not specified which one is returned. However, it is guaranteed that /// one of them is returned. /// /// By default, capturing groups are not supported, so this always returns /// `None`. fn capture_index(&self, _name: &str) -> Option<usize> { None } /// Returns the start and end byte range of the first match in `haystack`. /// If no match exists, then `None` is returned. /// /// The text encoding of `haystack` is not strictly specified. Matchers are /// advised to assume UTF-8, or at worst, some ASCII compatible encoding. fn find( &self, haystack: &[u8], ) -> Result<Option<Match>, Self::Error> { self.find_at(haystack, 0) } /// Executes the given function over successive non-overlapping matches /// in `haystack`. If no match exists, then the given function is never /// called. If the function returns `false`, then iteration stops. fn find_iter<F>( &self, haystack: &[u8], mut matched: F, ) -> Result<(), Self::Error> where F: FnMut(Match) -> bool { self.try_find_iter(haystack, |m| Ok(matched(m))) .map(|r: Result<(), ()>| r.unwrap()) } /// Executes the given function over successive non-overlapping matches /// in `haystack`. If no match exists, then the given function is never /// called. If the function returns `false`, then iteration stops. /// Similarly, if the function returns an error then iteration stops and /// the error is yielded. If an error occurs while executing the search, /// then it is converted to /// `E`. fn try_find_iter<F, E>( &self, haystack: &[u8], mut matched: F, ) -> Result<Result<(), E>, Self::Error> where F: FnMut(Match) -> Result<bool, E> { let mut last_end = 0; let mut last_match = None; loop { if last_end > haystack.len() { return Ok(Ok(())); } let m = match self.find_at(haystack, last_end)? { None => return Ok(Ok(())), Some(m) => m, }; if m.start == m.end { // This is an empty match. To ensure we make progress, start // the next search at the smallest possible starting position // of the next match following this one. last_end = m.end + 1; // Don't accept empty matches immediately following a match. // Just move on to the next match. if Some(m.end) == last_match { continue; } } else { last_end = m.end; } last_match = Some(m.end); match matched(m) { Ok(true) => continue, Ok(false) => return Ok(Ok(())), Err(err) => return Ok(Err(err)), } } } /// Populates the first set of capture group matches from `haystack` into /// `caps`. If no match exists, then `false` is returned. /// /// The text encoding of `haystack` is not strictly specified. Matchers are /// advised to assume UTF-8, or at worst, some ASCII compatible encoding. fn captures( &self, haystack: &[u8], caps: &mut Self::Captures, ) -> Result<bool, Self::Error> { self.captures_at(haystack, 0, caps) } /// Executes the given function over successive non-overlapping matches /// in `haystack` with capture groups extracted from each match. If no /// match exists, then the given function is never called. If the function /// returns `false`, then iteration stops. fn captures_iter<F>( &self, haystack: &[u8], caps: &mut Self::Captures, mut matched: F, ) -> Result<(), Self::Error> where F: FnMut(&Self::Captures) -> bool { self.try_captures_iter(haystack, caps, |caps| Ok(matched(caps))) .map(|r: Result<(), ()>| r.unwrap()) } /// Executes the given function over successive non-overlapping matches /// in `haystack` with capture groups extracted from each match. If no /// match exists, then the given function is never called. If the function /// returns `false`, then iteration stops. Similarly, if the function /// returns an error then iteration stops and the error is yielded. If /// an error occurs while executing the search, then it is converted to /// `E`. fn try_captures_iter<F, E>( &self, haystack: &[u8], caps: &mut Self::Captures, mut matched: F, ) -> Result<Result<(), E>, Self::Error> where F: FnMut(&Self::Captures) -> Result<bool, E> { let mut last_end = 0; let mut last_match = None; loop { if last_end > haystack.len() { return Ok(Ok(())); } if !self.captures_at(haystack, last_end, caps)? { return Ok(Ok(())); } let m = caps.get(0).unwrap(); if m.start == m.end { // This is an empty match. To ensure we make progress, start // the next search at the smallest possible starting position // of the next match following this one. last_end = m.end + 1; // Don't accept empty matches immediately following a match. // Just move on to the next match. if Some(m.end) == last_match { continue; } } else { last_end = m.end; } last_match = Some(m.end); match matched(caps) { Ok(true) => continue, Ok(false) => return Ok(Ok(())), Err(err) => return Ok(Err(err)), } } } /// Populates the first set of capture group matches from `haystack` /// into `matches` after `at`, where the byte offsets in each capturing /// group are relative to the start of `haystack` (and not `at`). If no /// match exists, then `false` is returned and the contents of the given /// capturing groups are unspecified. /// /// The text encoding of `haystack` is not strictly specified. Matchers are /// advised to assume UTF-8, or at worst, some ASCII compatible encoding. /// /// The significance of the starting point is that it takes the surrounding /// context into consideration. For example, the `\A` anchor can only /// match when `at == 0`. /// /// By default, capturing groups aren't supported, and this implementation /// will always behave as if a match were impossible. /// /// Implementors that provide support for capturing groups must guarantee /// that when a match occurs, the first capture match (at index `0`) is /// always set to the overall match offsets. /// /// Note that if implementors seek to support capturing groups, then they /// should implement this method. Other methods that match based on /// captures will then work automatically. fn captures_at( &self, _haystack: &[u8], _at: usize, _caps: &mut Self::Captures, ) -> Result<bool, Self::Error> { Ok(false) } /// Replaces every match in the given haystack with the result of calling /// `append`. `append` is given the start and end of a match, along with /// a handle to the `dst` buffer provided. /// /// If the given `append` function returns `false`, then replacement stops. fn replace<F>( &self, haystack: &[u8], dst: &mut Vec<u8>, mut append: F, ) -> Result<(), Self::Error> where F: FnMut(Match, &mut Vec<u8>) -> bool { let mut last_match = 0; self.find_iter(haystack, |m| { dst.extend(&haystack[last_match..m.start]); last_match = m.end; append(m, dst) })?; dst.extend(&haystack[last_match..]); Ok(()) } /// Replaces every match in the given haystack with the result of calling /// `append` with the matching capture groups. /// /// If the given `append` function returns `false`, then replacement stops. fn replace_with_captures<F>( &self, haystack: &[u8], caps: &mut Self::Captures, dst: &mut Vec<u8>, mut append: F, ) -> Result<(), Self::Error> where F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool { let mut last_match = 0; self.captures_iter(haystack, caps, |caps| { let m = caps.get(0).unwrap(); dst.extend(&haystack[last_match..m.start]); last_match = m.end; append(caps, dst) })?; dst.extend(&haystack[last_match..]); Ok(()) } /// Returns true if and only if the matcher matches the given haystack. /// /// By default, this method is implemented by calling `shortest_match`. fn is_match(&self, haystack: &[u8]) -> Result<bool, Self::Error> { self.is_match_at(haystack, 0) } /// Returns true if and only if the matcher matches the given haystack /// starting at the given position. /// /// By default, this method is implemented by calling `shortest_match_at`. /// /// The significance of the starting point is that it takes the surrounding /// context into consideration. For example, the `\A` anchor can only /// match when `at == 0`. fn is_match_at( &self, haystack: &[u8], at: usize, ) -> Result<bool, Self::Error> { Ok(self.shortest_match_at(haystack, at)?.is_some()) } /// Returns an end location of the first match in `haystack`. If no match /// exists, then `None` is returned. /// /// Note that the end location reported by this method may be less than the /// same end location reported by `find`. For example, running `find` with /// the pattern `a+` on the haystack `aaa` should report a range of `[0, /// 3)`, but `shortest_match` may report `1` as the ending location since /// that is the place at which a match is guaranteed to occur. /// /// This method should never report false positives or false negatives. The /// point of this method is that some implementors may be able to provide /// a faster implementation of this than what `find` does. /// /// By default, this method is implemented by calling `find`. fn shortest_match( &self, haystack: &[u8], ) -> Result<Option<usize>, Self::Error> { self.shortest_match_at(haystack, 0) } /// Returns an end location of the first match in `haystack` starting at /// the given position. If no match exists, then `None` is returned. /// /// Note that the end location reported by this method may be less than the /// same end location reported by `find`. For example, running `find` with /// the pattern `a+` on the haystack `aaa` should report a range of `[0, /// 3)`, but `shortest_match` may report `1` as the ending location since /// that is the place at which a match is guaranteed to occur. /// /// This method should never report false positives or false negatives. The /// point of this method is that some implementors may be able to provide /// a faster implementation of this than what `find` does. /// /// By default, this method is implemented by calling `find_at`. /// /// The significance of the starting point is that it takes the surrounding /// context into consideration. For example, the `\A` anchor can only /// match when `at == 0`. fn shortest_match_at( &self, haystack: &[u8], at: usize, ) -> Result<Option<usize>, Self::Error> { Ok(self.find_at(haystack, at)?.map(|m| m.end)) } /// If available, return a set of bytes that will never appear in a match /// produced by an implementation. /// /// Specifically, if such a set can be determined, then it's possible for /// callers to perform additional operations on the basis that certain /// bytes may never match. /// /// For example, if a search is configured to possibly produce results /// that span multiple lines but a caller provided pattern can never /// match across multiple lines, then it may make sense to divert to /// more optimized line oriented routines that don't need to handle the /// multi-line match case. /// /// Implementations that produce this set must never report false /// positives, but may produce false negatives. That is, is a byte is in /// this set then it must be guaranteed that it is never in a match. But, /// if a byte is not in this set, then callers cannot assume that a match /// exists with that byte. /// /// By default, this returns `None`. fn non_matching_bytes(&self) -> Option<&ByteSet> { None } /// If this matcher was compiled as a line oriented matcher, then this /// method returns the line terminator if and only if the line terminator /// never appears in any match produced by this matcher. If this wasn't /// compiled as a line oriented matcher, or if the aforementioned guarantee /// cannot be made, then this must return `None`, which is the default. /// It is **never wrong** to return `None`, but returning a line terminator /// when it can appear in a match results in unspecified behavior. /// /// The line terminator is typically `b'\n'`, but can be any single byte or /// `CRLF`. /// /// By default, this returns `None`. fn line_terminator(&self) -> Option<LineTerminator> { None } /// Return one of the following: a confirmed line match, a candidate line /// match (which may be a false positive) or no match at all (which **must /// not** be a false negative). When reporting a confirmed or candidate /// match, the position returned can be any position in the line. /// /// By default, this never returns a candidate match, and always either /// returns a confirmed match or no match at all. /// /// When a matcher can match spans over multiple lines, then the behavior /// of this method is unspecified. Namely, use of this method only /// makes sense in a context where the caller is looking for the next /// matching line. That is, callers should only use this method when /// `line_terminator` does not return `None`. /// /// # Design rationale /// /// A line matcher is, fundamentally, a normal matcher with the addition /// of one optional method: finding a line. By default, this routine /// is implemented via the matcher's `shortest_match` method, which /// always yields either no match or a `LineMatchKind::Confirmed`. However, /// implementors may provide a routine for this that can return candidate /// lines that need subsequent verification to be confirmed as a match. /// This can be useful in cases where it may be quicker to find candidate /// lines via some other means instead of relying on the more general /// implementations for `find` and `shortest_match`. /// /// For example, consider the regex `\w+foo\s+`. Both `find` and /// `shortest_match` must consider the entire regex, including the `\w+` /// and `\s+`, while searching. However, this method could look for lines /// containing `foo` and return them as candidates. Finding `foo` might /// be implemented as a highly optimized substring search routine (like /// `memmem`), which is likely to be faster than whatever more generalized /// routine is required for resolving `\w+foo\s+`. The caller is then /// responsible for confirming whether a match exists or not. /// /// Note that while this method may report false positives, it must never /// report false negatives. That is, it can never skip over lines that /// contain a match. fn find_candidate_line( &self, haystack: &[u8], ) -> Result<Option<LineMatchKind>, Self::Error> { Ok(self.shortest_match(haystack)?.map(LineMatchKind::Confirmed)) } } impl<'a, M: Matcher> Matcher for &'a M { type Captures = M::Captures; type Error = M::Error; fn find_at( &self, haystack: &[u8], at: usize, ) -> Result<Option<Match>, Self::Error> { (*self).find_at(haystack, at) } fn new_captures(&self) -> Result<Self::Captures, Self::Error> { (*self).new_captures() } fn captures_at( &self, haystack: &[u8], at: usize, caps: &mut Self::Captures, ) -> Result<bool, Self::Error> { (*self).captures_at(haystack, at, caps) } fn capture_index(&self, name: &str) -> Option<usize> { (*self).capture_index(name) } fn capture_count(&self) -> usize { (*self).capture_count() } fn find( &self, haystack: &[u8] ) -> Result<Option<Match>, Self::Error> { (*self).find(haystack) } fn find_iter<F>( &self, haystack: &[u8], matched: F, ) -> Result<(), Self::Error> where F: FnMut(Match) -> bool { (*self).find_iter(haystack, matched) } fn try_find_iter<F, E>( &self, haystack: &[u8], matched: F, ) -> Result<Result<(), E>, Self::Error> where F: FnMut(Match) -> Result<bool, E> { (*self).try_find_iter(haystack, matched) } fn captures( &self, haystack: &[u8], caps: &mut Self::Captures, ) -> Result<bool, Self::Error> { (*self).captures(haystack, caps) } fn captures_iter<F>( &self, haystack: &[u8], caps: &mut Self::Captures, matched: F, ) -> Result<(), Self::Error> where F: FnMut(&Self::Captures) -> bool { (*self).captures_iter(haystack, caps, matched) } fn try_captures_iter<F, E>( &self, haystack: &[u8], caps: &mut Self::Captures, matched: F, ) -> Result<Result<(), E>, Self::Error> where F: FnMut(&Self::Captures) -> Result<bool, E> { (*self).try_captures_iter(haystack, caps, matched) } fn replace<F>( &self, haystack: &[u8], dst: &mut Vec<u8>, append: F, ) -> Result<(), Self::Error> where F: FnMut(Match, &mut Vec<u8>) -> bool { (*self).replace(haystack, dst, append) } fn replace_with_captures<F>( &self, haystack: &[u8], caps: &mut Self::Captures, dst: &mut Vec<u8>, append: F, ) -> Result<(), Self::Error> where F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool { (*self).replace_with_captures(haystack, caps, dst, append) } fn is_match(&self, haystack: &[u8]) -> Result<bool, Self::Error> { (*self).is_match(haystack) } fn is_match_at( &self, haystack: &[u8], at: usize ) -> Result<bool, Self::Error> { (*self).is_match_at(haystack, at) } fn shortest_match( &self, haystack: &[u8], ) -> Result<Option<usize>, Self::Error> { (*self).shortest_match(haystack) } fn shortest_match_at( &self, haystack: &[u8], at: usize, ) -> Result<Option<usize>, Self::Error> { (*self).shortest_match_at(haystack, at) } fn non_matching_bytes(&self) -> Option<&ByteSet> { (*self).non_matching_bytes() } fn line_terminator(&self) -> Option<LineTerminator> { (*self).line_terminator() } fn find_candidate_line( &self, haystack: &[u8], ) -> Result<Option<LineMatchKind>, Self::Error> { (*self).find_candidate_line(haystack) } }