ripgrep/grep-matcher/src/lib.rs

/*!
An interface for regular expressions, with a focus on line oriented search.
*/

#![deny(missing_docs)]

extern crate memchr;

use std::fmt;
use std::io;
use std::ops;
use std::u64;

use interpolate::interpolate;

mod interpolate;

/// The type of a match.
///
/// The type of a match is a possibly empty range pointing to a contiguous
/// block of addressable memory.
///
/// Every `Match` is guaranteed to satisfy the invariant that `start <= end`.
///
/// # Indexing
///
/// This type is structurally identical to `std::ops::Range<usize>`, but
/// is a bit more ergonomic for dealing with match indices. In particular,
/// this type implements `Copy` and provides methods for building new `Match`
/// values based on old `Match` values. Finally, the invariant that `start`
/// is always less than or equal to `end` is enforced.
///
/// A `Match` can be used to slice a `&[u8]`, `&mut [u8]` or `&str` using
/// range notation. e.g.,
///
/// ```
/// use grep_matcher::Match;
///
/// let m = Match::new(2, 5);
/// let bytes = b"abcdefghi";
/// assert_eq!(b"cde", &bytes[m]);
/// ```
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub struct Match {
    start: usize,
    end: usize,
}

impl Match {
    /// Create a new match.
    ///
    /// # Panics
    ///
    /// This function panics if `start > end`.
    #[inline]
    pub fn new(start: usize, end: usize) -> Match {
        assert!(start <= end);
        Match { start, end }
    }

    /// Creates a zero width match at the given offset.
    #[inline]
    pub fn zero(offset: usize) -> Match {
        Match { start: offset, end: offset }
    }

    /// Return the start offset of this match.
    #[inline]
    pub fn start(&self) -> usize {
        self.start
    }

    /// Return the end offset of this match.
    #[inline]
    pub fn end(&self) -> usize {
        self.end
    }

    /// Return a new match with the start offset replaced with the given
    /// value.
    ///
    /// # Panics
    ///
    /// This method panics if `start > self.end`.
    #[inline]
    pub fn with_start(&self, start: usize) -> Match {
        assert!(start <= self.end);
        Match { start, ..*self }
    }

    /// Return a new match with the end offset replaced with the given
    /// value.
    ///
    /// # Panics
    ///
    /// This method panics if `self.start > end`.
    #[inline]
    pub fn with_end(&self, end: usize) -> Match {
        assert!(self.start <= end);
        Match { end, ..*self }
    }

    /// Offset this match by the given amount and return a new match.
    ///
    /// This adds the given offset to the start and end of this match, and
    /// returns the resulting match.
    ///
    /// # Panics
    ///
    /// This panics if adding the given amount to either the start or end
    /// offset would result in an overflow.
    #[inline]
    pub fn offset(&self, amount: usize) -> Match {
        Match {
            start: self.start.checked_add(amount).unwrap(),
            end: self.end.checked_add(amount).unwrap(),
        }
    }

    /// Returns the number of bytes in this match.
    #[inline]
    pub fn len(&self) -> usize {
        self.end - self.start
    }

    /// Returns true if and only if this match is empty.
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.len() == 0
    }
}

impl ops::Index<Match> for [u8] {
    type Output = [u8];

    #[inline]
    fn index(&self, index: Match) -> &[u8] {
        &self[index.start..index.end]
    }
}

impl ops::IndexMut<Match> for [u8] {
    #[inline]
    fn index_mut(&mut self, index: Match) -> &mut [u8] {
        &mut self[index.start..index.end]
    }
}

impl ops::Index<Match> for str {
    type Output = str;

    #[inline]
    fn index(&self, index: Match) -> &str {
        &self[index.start..index.end]
    }
}

/// A line terminator.
///
/// A line terminator represents the end of a line. Generally, every line is
/// either "terminated" by the end of a stream or a specific byte (or sequence
/// of bytes).
///
/// Generally, a line terminator is a single byte, specifically, `\n`, on
/// Unix-like systems. On Windows, a line terminator is `\r\n` (referred to
/// as `CRLF` for `Carriage Return; Line Feed`).
///
/// The default line terminator is `\n` on all platforms.
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
pub struct LineTerminator(LineTerminatorImp);

#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
enum LineTerminatorImp {
    /// Any single byte representing a line terminator.
    ///
    /// We represent this as an array so we can safely convert it to a slice
    /// for convenient access. At some point, we can use `std::slice::from_ref`
    /// instead.
    Byte([u8; 1]),
    /// A line terminator represented by `\r\n`.
    ///
    /// When this option is used, consumers may generally treat a lone `\n` as
    /// a line terminator in addition to `\r\n`.
    CRLF,
}

impl LineTerminator {
    /// Return a new single-byte line terminator. Any byte is valid.
    pub fn byte(byte: u8) -> LineTerminator {
        LineTerminator(LineTerminatorImp::Byte([byte]))
    }

    /// Return a new line terminator represented by `\r\n`.
    ///
    /// When this option is used, consumers may generally treat a lone `\n` as
    /// a line terminator in addition to `\r\n`.
    pub fn crlf() -> LineTerminator {
        LineTerminator(LineTerminatorImp::CRLF)
    }

    /// Returns true if and only if this line terminator is CRLF.
    pub fn is_crlf(&self) -> bool {
        self.0 == LineTerminatorImp::CRLF
    }

    /// Returns this line terminator as a single byte.
    ///
    /// If the line terminator is CRLF, then this returns `\n`. This is
    /// useful for routines that, for example, find line boundaries by treating
    /// `\n` as a line terminator even when it isn't preceded by `\r`.
    pub fn as_byte(&self) -> u8 {
        match self.0 {
            LineTerminatorImp::Byte(array) => array[0],
            LineTerminatorImp::CRLF => b'\n',
        }
    }

    /// Returns this line terminator as a sequence of bytes.
    ///
    /// This returns a singleton sequence for all line terminators except for
    /// `CRLF`, in which case, it returns `\r\n`.
    ///
    /// The slice returned is guaranteed to have length at least `1`.
    pub fn as_bytes(&self) -> &[u8] {
        match self.0 {
            LineTerminatorImp::Byte(ref array) => array,
            LineTerminatorImp::CRLF => &[b'\r', b'\n'],
        }
    }
}

impl Default for  LineTerminator {
    fn default() -> LineTerminator {
        LineTerminator::byte(b'\n')
    }
}

/// A set of bytes.
///
/// In this crate, byte sets are used to express bytes that can never appear
/// anywhere in a match for a particular implementation of the `Matcher` trait.
/// Specifically, if such a set can be determined, then it's possible for
/// callers to perform additional operations on the basis that certain bytes
/// may never match.
///
/// For example, if a search is configured to possibly produce results that
/// span multiple lines but a caller provided pattern can never match across
/// multiple lines, then it may make sense to divert to more optimized line
/// oriented routines that don't need to handle the multi-line match case.
#[derive(Clone, Debug)]
pub struct ByteSet(BitSet);

#[derive(Clone, Copy)]
struct BitSet([u64; 4]);

impl fmt::Debug for BitSet {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        let mut fmtd = f.debug_set();
        for b in (0..256).map(|b| b as u8) {
            if ByteSet(*self).contains(b) {
                fmtd.entry(&b);
            }
        }
        fmtd.finish()
    }
}

impl ByteSet {
    /// Create an empty set of bytes.
    pub fn empty() -> ByteSet {
        ByteSet(BitSet([0; 4]))
    }

    /// Create a full set of bytes such that every possible byte is in the set
    /// returned.
    pub fn full() -> ByteSet {
        ByteSet(BitSet([u64::MAX; 4]))
    }

    /// Add a byte to this set.
    ///
    /// If the given byte already belongs to this set, then this is a no-op.
    pub fn add(&mut self, byte: u8) {
        let bucket = byte / 64;
        let bit = byte % 64;
        (self.0).0[bucket as usize] |= 1 << bit;
    }

    /// Add an inclusive range of bytes.
    pub fn add_all(&mut self, start: u8, end: u8) {
        for b in (start as u64..end as u64 + 1).map(|b| b as u8) {
            self.add(b);
        }
    }

    /// Remove a byte from this set.
    ///
    /// If the given byte is not in this set, then this is a no-op.
    pub fn remove(&mut self, byte: u8) {
        let bucket = byte / 64;
        let bit = byte % 64;
        (self.0).0[bucket as usize] &= !(1 << bit);
    }

    /// Remove an inclusive range of bytes.
    pub fn remove_all(&mut self, start: u8, end: u8) {
        for b in (start as u64..end as u64 + 1).map(|b| b as u8) {
            self.remove(b);
        }
    }

    /// Return true if and only if the given byte is in this set.
    pub fn contains(&self, byte: u8) -> bool {
        let bucket = byte / 64;
        let bit = byte % 64;
        (self.0).0[bucket as usize] & (1 << bit) > 0
    }
}

/// A trait that describes implementations of capturing groups.
///
/// When a matcher supports capturing group extraction, then it is the
/// matcher's responsibility to provide an implementation of this trait.
///
/// Principally, this trait provides a way to access capturing groups
/// in a uniform way that does not require any specific representation.
/// Namely, differ matcher implementations may require different in-memory
/// representations of capturing groups. This trait permits matchers to
/// maintain their specific in-memory representation.
///
/// Note that this trait explicitly does not provide a way to construct a new
/// captures value. Instead, it is the responsibility of a `Matcher` to build
/// one, which might require knowledge of the matcher's internal implementation
/// details.
pub trait Captures {
    /// Return the total number of capturing groups. This includes capturing
    /// groups that have not matched anything.
    fn len(&self) -> usize;

    /// Return the capturing group match at the given index. If no match of
    /// that capturing group exists, then this returns `None`.
    ///
    /// When a matcher reports a match with capturing groups, then the first
    /// capturing group (at index `0`) must always correspond to the offsets
    /// for the overall match.
    fn get(&self, i: usize) -> Option<Match>;

    /// Returns true if and only if these captures are empty. This occurs
    /// when `len` is `0`.
    ///
    /// Note that capturing groups that have non-zero length but otherwise
    /// contain no matching groups are *not* empty.
    fn is_empty(&self) -> bool {
        self.len() == 0
    }

    /// Expands all instances of `$name` in `replacement` to the corresponding
    /// capture group `name`, and writes them to the `dst` buffer given.
    ///
    /// (Note: If you're looking for a convenient way to perform replacements
    /// with interpolation, then you'll want to use the `replace_with_captures`
    /// method on the `Matcher` trait.)
    ///
    /// `name` may be an integer corresponding to the index of the
    /// capture group (counted by order of opening parenthesis where `0` is the
    /// entire match) or it can be a name (consisting of letters, digits or
    /// underscores) corresponding to a named capture group.
    ///
    /// A `name` is translated to a capture group index via the given
    /// `name_to_index` function. If `name` isn't a valid capture group
    /// (whether the name doesn't exist or isn't a valid index), then it is
    /// replaced with the empty string.
    ///
    /// The longest possible name is used. e.g., `$1a` looks up the capture
    /// group named `1a` and not the capture group at index `1`. To exert
    /// more precise control over the name, use braces, e.g., `${1}a`. In all
    /// cases, capture group names are limited to ASCII letters, numbers and
    /// underscores.
    ///
    /// To write a literal `$` use `$$`.
    ///
    /// Note that the capture group match indices are resolved by slicing
    /// the given `haystack`. Generally, this means that `haystack` should be
    /// the same slice that was searched to get the current capture group
    /// matches.
    fn interpolate<F>(
        &self,
        name_to_index: F,
        haystack: &[u8],
        replacement: &[u8],
        dst: &mut Vec<u8>,
    ) where F: FnMut(&str) -> Option<usize>
    {
        interpolate(
            replacement,
            |i, dst| {
                if let Some(range) = self.get(i) {
                    dst.extend(&haystack[range]);
                }
            },
            name_to_index,
            dst,
        )
    }
}

/// NoCaptures provides an always-empty implementation of the `Captures` trait.
///
/// This type is useful for implementations of `Matcher` that don't support
/// capturing groups.
#[derive(Clone, Debug)]
pub struct NoCaptures(());

impl NoCaptures {
    /// Create an empty set of capturing groups.
    pub fn new() -> NoCaptures { NoCaptures(()) }
}

impl Captures for NoCaptures {
    fn len(&self) -> usize { 0 }
    fn get(&self, _: usize) -> Option<Match> { None }
}

/// NoError provides an error type for matchers that never produce errors.
///
/// This error type implements the `std::error::Error` and `fmt::Display`
/// traits for use in matcher implementations that can never produce errors.
///
/// The `fmt::Display` impl for this type panics.
#[derive(Debug, Eq, PartialEq)]
pub struct NoError(());

impl ::std::error::Error for NoError {
    fn description(&self) -> &str { "no error" }
}

impl fmt::Display for NoError {
    fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
        panic!("BUG for NoError: an impossible error occurred")
    }
}

impl From<NoError> for io::Error {
    fn from(_: NoError) -> io::Error {
        panic!("BUG for NoError: an impossible error occurred")
    }
}

/// The type of match for a line oriented matcher.
#[derive(Clone, Copy, Debug)]
pub enum LineMatchKind {
    /// A position inside a line that is known to contain a match.
    ///
    /// This position can be anywhere in the line. It does not need to point
    /// at the location of the match.
    Confirmed(usize),
    /// A position inside a line that may contain a match, and must be searched
    /// for verification.
    ///
    /// This position can be anywhere in the line. It does not need to point
    /// at the location of the match.
    Candidate(usize),
}

/// A matcher defines an interface for regular expression implementations.
pub trait Matcher {
    /// The concrete type of capturing groups used for this matcher.
    ///
    /// If this implementation does not support capturing groups, then set
    /// this to `NoCaptures`.
    type Captures: Captures;

    /// The error type used by this matcher.
    ///
    /// For matchers in which an error is not possible, they are encouraged to
    /// use the `NoError` type in this crate. In the future, when the "never"
    /// (spelled `!`) type is stabilized, then it should probably be used
    /// instead.
    type Error: fmt::Display;

    /// Returns the start and end byte range of the first match in `haystack`
    /// after `at`, where the byte offsets are relative to that start of
    /// `haystack` (and not `at`). If no match exists, then `None` is returned.
    ///
    /// The text encoding of `haystack` is not strictly specified. Matchers are
    /// advised to assume UTF-8, or at worst, some ASCII compatible encoding.
    ///
    /// The significance of the starting point is that it takes the surrounding
    /// context into consideration. For example, the `\A` anchor can only
    /// match when `at == 0`.
    fn find_at(
        &self,
        haystack: &[u8],
        at: usize,
    ) -> Result<Option<Match>, Self::Error>;

    /// Creates an empty group of captures suitable for use with the capturing
    /// APIs of this trait.
    ///
    /// Implementations that don't support capturing groups should use
    /// the `NoCaptures` type and implement this method by calling
    /// `NoCaptures::new()`.
    fn new_captures(&self) -> Result<Self::Captures, Self::Error>;

    /// Returns the total number of capturing groups in this matcher.
    ///
    /// If a matcher supports capturing groups, then this value must always be
    /// at least 1, where the first capturing group always corresponds to the
    /// overall match.
    ///
    /// If a matcher does not support capturing groups, then this should
    /// always return 0.
    ///
    /// By default, capturing groups are not supported, so this always
    /// returns 0.
    fn capture_count(&self) -> usize {
        0
    }

    /// Maps the given capture group name to its corresponding capture group
    /// index, if one exists. If one does not exist, then `None` is returned.
    ///
    /// If the given capture group name maps to multiple indices, then it is
    /// not specified which one is returned. However, it is guaranteed that
    /// one of them is returned.
    ///
    /// By default, capturing groups are not supported, so this always returns
    /// `None`.
    fn capture_index(&self, _name: &str) -> Option<usize> {
        None
    }

    /// Returns the start and end byte range of the first match in `haystack`.
    /// If no match exists, then `None` is returned.
    ///
    /// The text encoding of `haystack` is not strictly specified. Matchers are
    /// advised to assume UTF-8, or at worst, some ASCII compatible encoding.
    fn find(
        &self,
        haystack: &[u8],
    ) -> Result<Option<Match>, Self::Error> {
        self.find_at(haystack, 0)
    }

    /// Executes the given function over successive non-overlapping matches
    /// in `haystack`. If no match exists, then the given function is never
    /// called. If the function returns `false`, then iteration stops.
    fn find_iter<F>(
        &self,
        haystack: &[u8],
        mut matched: F,
    ) -> Result<(), Self::Error>
    where F: FnMut(Match) -> bool
    {
        self.try_find_iter(haystack, |m| Ok(matched(m)))
            .map(|r: Result<(), ()>| r.unwrap())
    }

    /// Executes the given function over successive non-overlapping matches
    /// in `haystack`. If no match exists, then the given function is never
    /// called. If the function returns `false`, then iteration stops.
    /// Similarly, if the function returns an error then iteration stops and
    /// the error is yielded. If an error occurs while executing the search,
    /// then it is converted to
    /// `E`.
    fn try_find_iter<F, E>(
        &self,
        haystack: &[u8],
        mut matched: F,
    ) -> Result<Result<(), E>, Self::Error>
    where F: FnMut(Match) -> Result<bool, E>
    {
        let mut last_end = 0;
        let mut last_match = None;

        loop {
            if last_end > haystack.len() {
                return Ok(Ok(()));
            }
            let m = match self.find_at(haystack, last_end)? {
                None => return Ok(Ok(())),
                Some(m) => m,
            };
            if m.start == m.end {
                // This is an empty match. To ensure we make progress, start
                // the next search at the smallest possible starting position
                // of the next match following this one.
                last_end = m.end + 1;
                // Don't accept empty matches immediately following a match.
                // Just move on to the next match.
                if Some(m.end) == last_match {
                    continue;
                }
            } else {
                last_end = m.end;
            }
            last_match = Some(m.end);
            match matched(m) {
                Ok(true) => continue,
                Ok(false) => return Ok(Ok(())),
                Err(err) => return Ok(Err(err)),
            }
        }
    }

    /// Populates the first set of capture group matches from `haystack` into
    /// `caps`. If no match exists, then `false` is returned.
    ///
    /// The text encoding of `haystack` is not strictly specified. Matchers are
    /// advised to assume UTF-8, or at worst, some ASCII compatible encoding.
    fn captures(
        &self,
        haystack: &[u8],
        caps: &mut Self::Captures,
    ) -> Result<bool, Self::Error> {
        self.captures_at(haystack, 0, caps)
    }

    /// Executes the given function over successive non-overlapping matches
    /// in `haystack` with capture groups extracted from each match. If no
    /// match exists, then the given function is never called. If the function
    /// returns `false`, then iteration stops.
    fn captures_iter<F>(
        &self,
        haystack: &[u8],
        caps: &mut Self::Captures,
        mut matched: F,
    ) -> Result<(), Self::Error>
    where F: FnMut(&Self::Captures) -> bool
    {
        self.try_captures_iter(haystack, caps, |caps| Ok(matched(caps)))
            .map(|r: Result<(), ()>| r.unwrap())
    }

    /// Executes the given function over successive non-overlapping matches
    /// in `haystack` with capture groups extracted from each match. If no
    /// match exists, then the given function is never called. If the function
    /// returns `false`, then iteration stops. Similarly, if the function
    /// returns an error then iteration stops and the error is yielded. If
    /// an error occurs while executing the search, then it is converted to
    /// `E`.
    fn try_captures_iter<F, E>(
        &self,
        haystack: &[u8],
        caps: &mut Self::Captures,
        mut matched: F,
    ) -> Result<Result<(), E>, Self::Error>
    where F: FnMut(&Self::Captures) -> Result<bool, E>
    {
        let mut last_end = 0;
        let mut last_match = None;

        loop {
            if last_end > haystack.len() {
                return Ok(Ok(()));
            }
            if !self.captures_at(haystack, last_end, caps)? {
                return Ok(Ok(()));
            }
            let m = caps.get(0).unwrap();
            if m.start == m.end {
                // This is an empty match. To ensure we make progress, start
                // the next search at the smallest possible starting position
                // of the next match following this one.
                last_end = m.end + 1;
                // Don't accept empty matches immediately following a match.
                // Just move on to the next match.
                if Some(m.end) == last_match {
                    continue;
                }
            } else {
                last_end = m.end;
            }
            last_match = Some(m.end);
            match matched(caps) {
                Ok(true) => continue,
                Ok(false) => return Ok(Ok(())),
                Err(err) => return Ok(Err(err)),
            }
        }
    }

    /// Populates the first set of capture group matches from `haystack`
    /// into `matches` after `at`, where the byte offsets in each capturing
    /// group are relative to the start of `haystack` (and not `at`). If no
    /// match exists, then `false` is returned and the contents of the given
    /// capturing groups are unspecified.
    ///
    /// The text encoding of `haystack` is not strictly specified. Matchers are
    /// advised to assume UTF-8, or at worst, some ASCII compatible encoding.
    ///
    /// The significance of the starting point is that it takes the surrounding
    /// context into consideration. For example, the `\A` anchor can only
    /// match when `at == 0`.
    ///
    /// By default, capturing groups aren't supported, and this implementation
    /// will always behave as if a match were impossible.
    ///
    /// Implementors that provide support for capturing groups must guarantee
    /// that when a match occurs, the first capture match (at index `0`) is
    /// always set to the overall match offsets.
    ///
    /// Note that if implementors seek to support capturing groups, then they
    /// should implement this method. Other methods that match based on
    /// captures will then work automatically.
    fn captures_at(
        &self,
        _haystack: &[u8],
        _at: usize,
        _caps: &mut Self::Captures,
    ) -> Result<bool, Self::Error> {
        Ok(false)
    }

    /// Replaces every match in the given haystack with the result of calling
    /// `append`. `append` is given the start and end of a match, along with
    /// a handle to the `dst` buffer provided.
    ///
    /// If the given `append` function returns `false`, then replacement stops.
    fn replace<F>(
        &self,
        haystack: &[u8],
        dst: &mut Vec<u8>,
        mut append: F,
    ) -> Result<(), Self::Error>
    where F: FnMut(Match, &mut Vec<u8>) -> bool
    {
        let mut last_match = 0;
        self.find_iter(haystack, |m| {
            dst.extend(&haystack[last_match..m.start]);
            last_match = m.end;
            append(m, dst)
        })?;
        dst.extend(&haystack[last_match..]);
        Ok(())
    }

    /// Replaces every match in the given haystack with the result of calling
    /// `append` with the matching capture groups.
    ///
    /// If the given `append` function returns `false`, then replacement stops.
    fn replace_with_captures<F>(
        &self,
        haystack: &[u8],
        caps: &mut Self::Captures,
        dst: &mut Vec<u8>,
        mut append: F,
    ) -> Result<(), Self::Error>
    where F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool
    {
        let mut last_match = 0;
        self.captures_iter(haystack, caps, |caps| {
            let m = caps.get(0).unwrap();
            dst.extend(&haystack[last_match..m.start]);
            last_match = m.end;
            append(caps, dst)
        })?;
        dst.extend(&haystack[last_match..]);
        Ok(())
    }

    /// Returns true if and only if the matcher matches the given haystack.
    ///
    /// By default, this method is implemented by calling `shortest_match`.
    fn is_match(&self, haystack: &[u8]) -> Result<bool, Self::Error> {
        self.is_match_at(haystack, 0)
    }

    /// Returns true if and only if the matcher matches the given haystack
    /// starting at the given position.
    ///
    /// By default, this method is implemented by calling `shortest_match_at`.
    ///
    /// The significance of the starting point is that it takes the surrounding
    /// context into consideration. For example, the `\A` anchor can only
    /// match when `at == 0`.
    fn is_match_at(
        &self,
        haystack: &[u8],
        at: usize,
    ) -> Result<bool, Self::Error> {
        Ok(self.shortest_match_at(haystack, at)?.is_some())
    }

    /// Returns an end location of the first match in `haystack`. If no match
    /// exists, then `None` is returned.
    ///
    /// Note that the end location reported by this method may be less than the
    /// same end location reported by `find`. For example, running `find` with
    /// the pattern `a+` on the haystack `aaa` should report a range of `[0,
    /// 3)`, but `shortest_match` may report `1` as the ending location since
    /// that is the place at which a match is guaranteed to occur.
    ///
    /// This method should never report false positives or false negatives. The
    /// point of this method is that some implementors may be able to provide
    /// a faster implementation of this than what `find` does.
    ///
    /// By default, this method is implemented by calling `find`.
    fn shortest_match(
        &self,
        haystack: &[u8],
    ) -> Result<Option<usize>, Self::Error> {
        self.shortest_match_at(haystack, 0)
    }

    /// Returns an end location of the first match in `haystack` starting at
    /// the given position. If no match exists, then `None` is returned.
    ///
    /// Note that the end location reported by this method may be less than the
    /// same end location reported by `find`. For example, running `find` with
    /// the pattern `a+` on the haystack `aaa` should report a range of `[0,
    /// 3)`, but `shortest_match` may report `1` as the ending location since
    /// that is the place at which a match is guaranteed to occur.
    ///
    /// This method should never report false positives or false negatives. The
    /// point of this method is that some implementors may be able to provide
    /// a faster implementation of this than what `find` does.
    ///
    /// By default, this method is implemented by calling `find_at`.
    ///
    /// The significance of the starting point is that it takes the surrounding
    /// context into consideration. For example, the `\A` anchor can only
    /// match when `at == 0`.
    fn shortest_match_at(
        &self,
        haystack: &[u8],
        at: usize,
    ) -> Result<Option<usize>, Self::Error> {
        Ok(self.find_at(haystack, at)?.map(|m| m.end))
    }

    /// If available, return a set of bytes that will never appear in a match
    /// produced by an implementation.
    ///
    /// Specifically, if such a set can be determined, then it's possible for
    /// callers to perform additional operations on the basis that certain
    /// bytes may never match.
    ///
    /// For example, if a search is configured to possibly produce results
    /// that span multiple lines but a caller provided pattern can never
    /// match across multiple lines, then it may make sense to divert to
    /// more optimized line oriented routines that don't need to handle the
    /// multi-line match case.
    ///
    /// Implementations that produce this set must never report false
    /// positives, but may produce false negatives. That is, is a byte is in
    /// this set then it must be guaranteed that it is never in a match. But,
    /// if a byte is not in this set, then callers cannot assume that a match
    /// exists with that byte.
    ///
    /// By default, this returns `None`.
    fn non_matching_bytes(&self) -> Option<&ByteSet> {
        None
    }

    /// If this matcher was compiled as a line oriented matcher, then this
    /// method returns the line terminator if and only if the line terminator
    /// never appears in any match produced by this matcher. If this wasn't
    /// compiled as a line oriented matcher, or if the aforementioned guarantee
    /// cannot be made, then this must return `None`, which is the default.
    /// It is **never wrong** to return `None`, but returning a line terminator
    /// when it can appear in a match results in unspecified behavior.
    ///
    /// The line terminator is typically `b'\n'`, but can be any single byte or
    /// `CRLF`.
    ///
    /// By default, this returns `None`.
    fn line_terminator(&self) -> Option<LineTerminator> {
        None
    }

    /// Return one of the following: a confirmed line match, a candidate line
    /// match (which may be a false positive) or no match at all (which **must
    /// not** be a false negative). When reporting a confirmed or candidate
    /// match, the position returned can be any position in the line.
    ///
    /// By default, this never returns a candidate match, and always either
    /// returns a confirmed match or no match at all.
    ///
    /// When a matcher can match spans over multiple lines, then the behavior
    /// of this method is unspecified. Namely, use of this method only
    /// makes sense in a context where the caller is looking for the next
    /// matching line. That is, callers should only use this method when
    /// `line_terminator` does not return `None`.
    ///
    /// # Design rationale
    ///
    /// A line matcher is, fundamentally, a normal matcher with the addition
    /// of one optional method: finding a line. By default, this routine
    /// is implemented via the matcher's `shortest_match` method, which
    /// always yields either no match or a `LineMatchKind::Confirmed`. However,
    /// implementors may provide a routine for this that can return candidate
    /// lines that need subsequent verification to be confirmed as a match.
    /// This can be useful in cases where it may be quicker to find candidate
    /// lines via some other means instead of relying on the more general
    /// implementations for `find` and `shortest_match`.
    ///
    /// For example, consider the regex `\w+foo\s+`. Both `find` and
    /// `shortest_match` must consider the entire regex, including the `\w+`
    /// and `\s+`, while searching. However, this method could look for lines
    /// containing `foo` and return them as candidates. Finding `foo` might
    /// be implemented as a highly optimized substring search routine (like
    /// `memmem`), which is likely to be faster than whatever more generalized
    /// routine is required for resolving `\w+foo\s+`. The caller is then
    /// responsible for confirming whether a match exists or not.
    ///
    /// Note that while this method may report false positives, it must never
    /// report false negatives. That is, it can never skip over lines that
    /// contain a match.
    fn find_candidate_line(
        &self,
        haystack: &[u8],
    ) -> Result<Option<LineMatchKind>, Self::Error> {
        Ok(self.shortest_match(haystack)?.map(LineMatchKind::Confirmed))
    }
}

impl<'a, M: Matcher> Matcher for &'a M {
    type Captures = M::Captures;
    type Error = M::Error;

    fn find_at(
        &self,
        haystack: &[u8],
        at: usize,
    ) -> Result<Option<Match>, Self::Error> {
        (*self).find_at(haystack, at)
    }

    fn new_captures(&self) -> Result<Self::Captures, Self::Error> {
        (*self).new_captures()
    }

    fn captures_at(
        &self,
        haystack: &[u8],
        at: usize,
        caps: &mut Self::Captures,
    ) -> Result<bool, Self::Error> {
        (*self).captures_at(haystack, at, caps)
    }

    fn capture_index(&self, name: &str) -> Option<usize> {
        (*self).capture_index(name)
    }

    fn capture_count(&self) -> usize {
        (*self).capture_count()
    }

    fn find(
        &self,
        haystack: &[u8]
    ) -> Result<Option<Match>, Self::Error> {
        (*self).find(haystack)
    }

    fn find_iter<F>(
        &self,
        haystack: &[u8],
        matched: F,
    ) -> Result<(), Self::Error>
    where F: FnMut(Match) -> bool
    {
        (*self).find_iter(haystack, matched)
    }

    fn try_find_iter<F, E>(
        &self,
        haystack: &[u8],
        matched: F,
    ) -> Result<Result<(), E>, Self::Error>
    where F: FnMut(Match) -> Result<bool, E>
    {
        (*self).try_find_iter(haystack, matched)
    }

    fn captures(
        &self,
        haystack: &[u8],
        caps: &mut Self::Captures,
    ) -> Result<bool, Self::Error> {
        (*self).captures(haystack, caps)
    }

    fn captures_iter<F>(
        &self,
        haystack: &[u8],
        caps: &mut Self::Captures,
        matched: F,
    ) -> Result<(), Self::Error>
    where F: FnMut(&Self::Captures) -> bool
    {
        (*self).captures_iter(haystack, caps, matched)
    }

    fn try_captures_iter<F, E>(
        &self,
        haystack: &[u8],
        caps: &mut Self::Captures,
        matched: F,
    ) -> Result<Result<(), E>, Self::Error>
    where F: FnMut(&Self::Captures) -> Result<bool, E>
    {
        (*self).try_captures_iter(haystack, caps, matched)
    }

    fn replace<F>(
        &self,
        haystack: &[u8],
        dst: &mut Vec<u8>,
        append: F,
    ) -> Result<(), Self::Error>
    where F: FnMut(Match, &mut Vec<u8>) -> bool
    {
        (*self).replace(haystack, dst, append)
    }

    fn replace_with_captures<F>(
        &self,
        haystack: &[u8],
        caps: &mut Self::Captures,
        dst: &mut Vec<u8>,
        append: F,
    ) -> Result<(), Self::Error>
    where F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool
    {
        (*self).replace_with_captures(haystack, caps, dst, append)
    }

    fn is_match(&self, haystack: &[u8]) -> Result<bool, Self::Error> {
        (*self).is_match(haystack)
    }

    fn is_match_at(
        &self,
        haystack: &[u8],
        at: usize
    ) -> Result<bool, Self::Error> {
        (*self).is_match_at(haystack, at)
    }

    fn shortest_match(
        &self,
        haystack: &[u8],
    ) -> Result<Option<usize>, Self::Error> {
        (*self).shortest_match(haystack)
    }

    fn shortest_match_at(
        &self,
        haystack: &[u8],
        at: usize,
    ) -> Result<Option<usize>, Self::Error> {
        (*self).shortest_match_at(haystack, at)
    }

    fn non_matching_bytes(&self) -> Option<&ByteSet> {
        (*self).non_matching_bytes()
    }

    fn line_terminator(&self) -> Option<LineTerminator> {
        (*self).line_terminator()
    }

    fn find_candidate_line(
        &self,
        haystack: &[u8],
    ) -> Result<Option<LineMatchKind>, Self::Error> {
        (*self).find_candidate_line(haystack)
    }
}