From b9de003f8125b7257e70dd72183ad6250facd3da Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 8 Oct 2023 14:48:29 -0400 Subject: [PATCH] matcher: add a bunch of inline annotations Many of these functions should be inlineable, but I'm not 100% sure that they can be inlined without these annotations. We don't want to force things, but we do try and nudge the compiler in the right direction. --- crates/matcher/src/interpolate.rs | 5 +++ crates/matcher/src/lib.rs | 61 +++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/crates/matcher/src/interpolate.rs b/crates/matcher/src/interpolate.rs index 1a9471d4..934e814d 100644 --- a/crates/matcher/src/interpolate.rs +++ b/crates/matcher/src/interpolate.rs @@ -10,6 +10,7 @@ use memchr::memchr; /// of a capture group reference and is expected to resolve the index to its /// corresponding matched text. If no such match exists, then `append` should /// not write anything to its given buffer. +#[inline] pub fn interpolate( mut replacement: &[u8], mut append: A, @@ -75,12 +76,14 @@ enum Ref<'a> { } impl<'a> From<&'a str> for Ref<'a> { + #[inline] fn from(x: &'a str) -> Ref<'a> { Ref::Named(x) } } impl From for Ref<'static> { + #[inline] fn from(x: usize) -> Ref<'static> { Ref::Number(x) } @@ -90,6 +93,7 @@ impl From for Ref<'static> { /// starting at the beginning of `replacement`. /// /// If no such valid reference could be found, None is returned. +#[inline] fn find_cap_ref(replacement: &[u8]) -> Option> { let mut i = 0; if replacement.len() <= 1 || replacement[0] != b'$' { @@ -130,6 +134,7 @@ fn find_cap_ref(replacement: &[u8]) -> Option> { } /// Returns true if and only if the given byte is allowed in a capture name. +#[inline] fn is_valid_cap_letter(b: &u8) -> bool { match *b { b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true, diff --git a/crates/matcher/src/lib.rs b/crates/matcher/src/lib.rs index 306fed53..40cbff5d 100644 --- a/crates/matcher/src/lib.rs +++ b/crates/matcher/src/lib.rs @@ -304,12 +304,14 @@ impl std::fmt::Debug for BitSet { impl ByteSet { /// Create an empty set of bytes. + #[inline] pub fn empty() -> ByteSet { ByteSet(BitSet([0; 4])) } /// Create a full set of bytes such that every possible byte is in the set /// returned. + #[inline] pub fn full() -> ByteSet { ByteSet(BitSet([u64::MAX; 4])) } @@ -317,6 +319,7 @@ impl ByteSet { /// Add a byte to this set. /// /// If the given byte already belongs to this set, then this is a no-op. + #[inline] pub fn add(&mut self, byte: u8) { let bucket = byte / 64; let bit = byte % 64; @@ -324,6 +327,7 @@ impl ByteSet { } /// Add an inclusive range of bytes. + #[inline] pub fn add_all(&mut self, start: u8, end: u8) { for b in start..=end { self.add(b); @@ -333,6 +337,7 @@ impl ByteSet { /// Remove a byte from this set. /// /// If the given byte is not in this set, then this is a no-op. + #[inline] pub fn remove(&mut self, byte: u8) { let bucket = byte / 64; let bit = byte % 64; @@ -340,6 +345,7 @@ impl ByteSet { } /// Remove an inclusive range of bytes. + #[inline] pub fn remove_all(&mut self, start: u8, end: u8) { for b in start..=end { self.remove(b); @@ -347,6 +353,7 @@ impl ByteSet { } /// Return true if and only if the given byte is in this set. + #[inline] pub fn contains(&self, byte: u8) -> bool { let bucket = byte / 64; let bit = byte % 64; @@ -387,6 +394,7 @@ pub trait Captures { /// /// Note that capturing groups that have non-zero length but otherwise /// contain no matching groups are *not* empty. + #[inline] fn is_empty(&self) -> bool { self.len() == 0 } @@ -420,6 +428,7 @@ pub trait Captures { /// the given `haystack`. Generally, this means that `haystack` should be /// the same slice that was searched to get the current capture group /// matches. + #[inline] fn interpolate( &self, name_to_index: F, @@ -451,15 +460,19 @@ pub struct NoCaptures(()); impl NoCaptures { /// Create an empty set of capturing groups. + #[inline] pub fn new() -> NoCaptures { NoCaptures(()) } } impl Captures for NoCaptures { + #[inline] fn len(&self) -> usize { 0 } + + #[inline] fn get(&self, _: usize) -> Option { None } @@ -571,6 +584,7 @@ pub trait Matcher { /// /// By default, capturing groups are not supported, so this always /// returns 0. + #[inline] fn capture_count(&self) -> usize { 0 } @@ -584,6 +598,7 @@ pub trait Matcher { /// /// By default, capturing groups are not supported, so this always returns /// `None`. + #[inline] fn capture_index(&self, _name: &str) -> Option { None } @@ -593,6 +608,7 @@ pub trait Matcher { /// /// The text encoding of `haystack` is not strictly specified. Matchers are /// advised to assume UTF-8, or at worst, some ASCII compatible encoding. + #[inline] fn find(&self, haystack: &[u8]) -> Result, Self::Error> { self.find_at(haystack, 0) } @@ -600,6 +616,7 @@ pub trait Matcher { /// Executes the given function over successive non-overlapping matches /// in `haystack`. If no match exists, then the given function is never /// called. If the function returns `false`, then iteration stops. + #[inline] fn find_iter( &self, haystack: &[u8], @@ -618,6 +635,7 @@ pub trait Matcher { /// The significance of the starting point is that it takes the surrounding /// context into consideration. For example, the `\A` anchor can only /// match when `at == 0`. + #[inline] fn find_iter_at( &self, haystack: &[u8], @@ -638,6 +656,7 @@ pub trait Matcher { /// the error is yielded. If an error occurs while executing the search, /// then it is converted to /// `E`. + #[inline] fn try_find_iter( &self, haystack: &[u8], @@ -660,6 +679,7 @@ pub trait Matcher { /// The significance of the starting point is that it takes the surrounding /// context into consideration. For example, the `\A` anchor can only /// match when `at == 0`. + #[inline] fn try_find_iter_at( &self, haystack: &[u8], @@ -707,6 +727,7 @@ pub trait Matcher { /// /// The text encoding of `haystack` is not strictly specified. Matchers are /// advised to assume UTF-8, or at worst, some ASCII compatible encoding. + #[inline] fn captures( &self, haystack: &[u8], @@ -719,6 +740,7 @@ pub trait Matcher { /// in `haystack` with capture groups extracted from each match. If no /// match exists, then the given function is never called. If the function /// returns `false`, then iteration stops. + #[inline] fn captures_iter( &self, haystack: &[u8], @@ -739,6 +761,7 @@ pub trait Matcher { /// The significance of the starting point is that it takes the surrounding /// context into consideration. For example, the `\A` anchor can only /// match when `at == 0`. + #[inline] fn captures_iter_at( &self, haystack: &[u8], @@ -760,6 +783,7 @@ pub trait Matcher { /// returns an error then iteration stops and the error is yielded. If /// an error occurs while executing the search, then it is converted to /// `E`. + #[inline] fn try_captures_iter( &self, haystack: &[u8], @@ -783,6 +807,7 @@ pub trait Matcher { /// The significance of the starting point is that it takes the surrounding /// context into consideration. For example, the `\A` anchor can only /// match when `at == 0`. + #[inline] fn try_captures_iter_at( &self, haystack: &[u8], @@ -849,6 +874,7 @@ pub trait Matcher { /// Note that if implementors seek to support capturing groups, then they /// should implement this method. Other methods that match based on /// captures will then work automatically. + #[inline] fn captures_at( &self, _haystack: &[u8], @@ -863,6 +889,7 @@ pub trait Matcher { /// a handle to the `dst` buffer provided. /// /// If the given `append` function returns `false`, then replacement stops. + #[inline] fn replace( &self, haystack: &[u8], @@ -886,6 +913,7 @@ pub trait Matcher { /// `append` with the matching capture groups. /// /// If the given `append` function returns `false`, then replacement stops. + #[inline] fn replace_with_captures( &self, haystack: &[u8], @@ -907,6 +935,7 @@ pub trait Matcher { /// The significance of the starting point is that it takes the surrounding /// context into consideration. For example, the `\A` anchor can only /// match when `at == 0`. + #[inline] fn replace_with_captures_at( &self, haystack: &[u8], @@ -932,6 +961,7 @@ pub trait Matcher { /// Returns true if and only if the matcher matches the given haystack. /// /// By default, this method is implemented by calling `shortest_match`. + #[inline] fn is_match(&self, haystack: &[u8]) -> Result { self.is_match_at(haystack, 0) } @@ -944,6 +974,7 @@ pub trait Matcher { /// The significance of the starting point is that it takes the surrounding /// context into consideration. For example, the `\A` anchor can only /// match when `at == 0`. + #[inline] fn is_match_at( &self, haystack: &[u8], @@ -966,6 +997,7 @@ pub trait Matcher { /// a faster implementation of this than what `find` does. /// /// By default, this method is implemented by calling `find`. + #[inline] fn shortest_match( &self, haystack: &[u8], @@ -991,6 +1023,7 @@ pub trait Matcher { /// The significance of the starting point is that it takes the surrounding /// context into consideration. For example, the `\A` anchor can only /// match when `at == 0`. + #[inline] fn shortest_match_at( &self, haystack: &[u8], @@ -1019,6 +1052,7 @@ pub trait Matcher { /// exists with that byte. /// /// By default, this returns `None`. + #[inline] fn non_matching_bytes(&self) -> Option<&ByteSet> { None } @@ -1035,6 +1069,7 @@ pub trait Matcher { /// `CRLF`. /// /// By default, this returns `None`. + #[inline] fn line_terminator(&self) -> Option { None } @@ -1077,6 +1112,7 @@ pub trait Matcher { /// Note that while this method may report false positives, it must never /// report false negatives. That is, it can never skip over lines that /// contain a match. + #[inline] fn find_candidate_line( &self, haystack: &[u8], @@ -1089,6 +1125,7 @@ impl<'a, M: Matcher> Matcher for &'a M { type Captures = M::Captures; type Error = M::Error; + #[inline] fn find_at( &self, haystack: &[u8], @@ -1097,10 +1134,12 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).find_at(haystack, at) } + #[inline] fn new_captures(&self) -> Result { (*self).new_captures() } + #[inline] fn captures_at( &self, haystack: &[u8], @@ -1110,18 +1149,22 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).captures_at(haystack, at, caps) } + #[inline] fn capture_index(&self, name: &str) -> Option { (*self).capture_index(name) } + #[inline] fn capture_count(&self) -> usize { (*self).capture_count() } + #[inline] fn find(&self, haystack: &[u8]) -> Result, Self::Error> { (*self).find(haystack) } + #[inline] fn find_iter( &self, haystack: &[u8], @@ -1133,6 +1176,7 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).find_iter(haystack, matched) } + #[inline] fn find_iter_at( &self, haystack: &[u8], @@ -1145,6 +1189,7 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).find_iter_at(haystack, at, matched) } + #[inline] fn try_find_iter( &self, haystack: &[u8], @@ -1156,6 +1201,7 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).try_find_iter(haystack, matched) } + #[inline] fn try_find_iter_at( &self, haystack: &[u8], @@ -1168,6 +1214,7 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).try_find_iter_at(haystack, at, matched) } + #[inline] fn captures( &self, haystack: &[u8], @@ -1176,6 +1223,7 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).captures(haystack, caps) } + #[inline] fn captures_iter( &self, haystack: &[u8], @@ -1188,6 +1236,7 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).captures_iter(haystack, caps, matched) } + #[inline] fn captures_iter_at( &self, haystack: &[u8], @@ -1201,6 +1250,7 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).captures_iter_at(haystack, at, caps, matched) } + #[inline] fn try_captures_iter( &self, haystack: &[u8], @@ -1213,6 +1263,7 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).try_captures_iter(haystack, caps, matched) } + #[inline] fn try_captures_iter_at( &self, haystack: &[u8], @@ -1226,6 +1277,7 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).try_captures_iter_at(haystack, at, caps, matched) } + #[inline] fn replace( &self, haystack: &[u8], @@ -1238,6 +1290,7 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).replace(haystack, dst, append) } + #[inline] fn replace_with_captures( &self, haystack: &[u8], @@ -1251,6 +1304,7 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).replace_with_captures(haystack, caps, dst, append) } + #[inline] fn replace_with_captures_at( &self, haystack: &[u8], @@ -1265,10 +1319,12 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).replace_with_captures_at(haystack, at, caps, dst, append) } + #[inline] fn is_match(&self, haystack: &[u8]) -> Result { (*self).is_match(haystack) } + #[inline] fn is_match_at( &self, haystack: &[u8], @@ -1277,6 +1333,7 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).is_match_at(haystack, at) } + #[inline] fn shortest_match( &self, haystack: &[u8], @@ -1284,6 +1341,7 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).shortest_match(haystack) } + #[inline] fn shortest_match_at( &self, haystack: &[u8], @@ -1292,14 +1350,17 @@ impl<'a, M: Matcher> Matcher for &'a M { (*self).shortest_match_at(haystack, at) } + #[inline] fn non_matching_bytes(&self) -> Option<&ByteSet> { (*self).non_matching_bytes() } + #[inline] fn line_terminator(&self) -> Option { (*self).line_terminator() } + #[inline] fn find_candidate_line( &self, haystack: &[u8],