From 96f01b92a082cc3497e8c8bc3a63257afd07ea3a Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Mon, 25 Sep 2023 16:48:25 -0400 Subject: [PATCH] matcher: polish the grep-matcher crate Not much here. Just updating to reflect my current style and bringing the crate to the 2021 edition. --- crates/matcher/Cargo.toml | 6 +-- crates/matcher/src/interpolate.rs | 4 +- crates/matcher/src/lib.rs | 55 ++++++++++++---------------- crates/matcher/tests/test_matcher.rs | 6 ++- crates/matcher/tests/util.rs | 19 +++++----- 5 files changed, 41 insertions(+), 49 deletions(-) diff --git a/crates/matcher/Cargo.toml b/crates/matcher/Cargo.toml index 43ab76fa..297e8d7a 100644 --- a/crates/matcher/Cargo.toml +++ b/crates/matcher/Cargo.toml @@ -12,13 +12,13 @@ readme = "README.md" keywords = ["regex", "pattern", "trait"] license = "Unlicense OR MIT" autotests = false -edition = "2018" +edition = "2021" [dependencies] -memchr = "2.1" +memchr = "2.6.3" [dev-dependencies] -regex = "1.1" +regex = "1.9.5" [[test]] name = "integration" diff --git a/crates/matcher/src/interpolate.rs b/crates/matcher/src/interpolate.rs index 42482ad3..1a9471d4 100644 --- a/crates/matcher/src/interpolate.rs +++ b/crates/matcher/src/interpolate.rs @@ -1,5 +1,3 @@ -use std::str; - use memchr::memchr; /// Interpolate capture references in `replacement` and write the interpolation @@ -114,7 +112,7 @@ fn find_cap_ref(replacement: &[u8]) -> Option> { // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8 // check with an unchecked conversion or by parsing the number straight // from &[u8]. - let cap = str::from_utf8(&replacement[i..cap_end]) + let cap = std::str::from_utf8(&replacement[i..cap_end]) .expect("valid UTF-8 capture name"); if brace { if !replacement.get(cap_end).map_or(false, |&b| b == b'}') { diff --git a/crates/matcher/src/lib.rs b/crates/matcher/src/lib.rs index 5b43b0d8..b9c1b8a8 100644 --- a/crates/matcher/src/lib.rs +++ b/crates/matcher/src/lib.rs @@ -38,11 +38,6 @@ implementations. #![deny(missing_docs)] -use std::fmt; -use std::io; -use std::ops; -use std::u64; - use crate::interpolate::interpolate; mod interpolate; @@ -162,7 +157,7 @@ impl Match { } } -impl ops::Index for [u8] { +impl std::ops::Index for [u8] { type Output = [u8]; #[inline] @@ -171,14 +166,14 @@ impl ops::Index for [u8] { } } -impl ops::IndexMut for [u8] { +impl std::ops::IndexMut for [u8] { #[inline] fn index_mut(&mut self, index: Match) -> &mut [u8] { &mut self[index.start..index.end] } } -impl ops::Index for str { +impl std::ops::Index for str { type Output = str; #[inline] @@ -204,11 +199,7 @@ pub struct LineTerminator(LineTerminatorImp); #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] enum LineTerminatorImp { /// Any single byte representing a line terminator. - /// - /// We represent this as an array so we can safely convert it to a slice - /// for convenient access. At some point, we can use `std::slice::from_ref` - /// instead. - Byte([u8; 1]), + Byte(u8), /// A line terminator represented by `\r\n`. /// /// When this option is used, consumers may generally treat a lone `\n` as @@ -220,7 +211,7 @@ impl LineTerminator { /// Return a new single-byte line terminator. Any byte is valid. #[inline] pub fn byte(byte: u8) -> LineTerminator { - LineTerminator(LineTerminatorImp::Byte([byte])) + LineTerminator(LineTerminatorImp::Byte(byte)) } /// Return a new line terminator represented by `\r\n`. @@ -246,7 +237,7 @@ impl LineTerminator { #[inline] pub fn as_byte(&self) -> u8 { match self.0 { - LineTerminatorImp::Byte(array) => array[0], + LineTerminatorImp::Byte(byte) => byte, LineTerminatorImp::CRLF => b'\n', } } @@ -260,7 +251,7 @@ impl LineTerminator { #[inline] pub fn as_bytes(&self) -> &[u8] { match self.0 { - LineTerminatorImp::Byte(ref array) => array, + LineTerminatorImp::Byte(ref byte) => std::slice::from_ref(byte), LineTerminatorImp::CRLF => &[b'\r', b'\n'], } } @@ -301,10 +292,10 @@ pub struct ByteSet(BitSet); #[derive(Clone, Copy)] struct BitSet([u64; 4]); -impl fmt::Debug for BitSet { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl std::fmt::Debug for BitSet { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { let mut fmtd = f.debug_set(); - for b in (0..256).map(|b| b as u8) { + for b in 0..=255 { if ByteSet(*self).contains(b) { fmtd.entry(&b); } @@ -331,12 +322,12 @@ impl ByteSet { pub fn add(&mut self, byte: u8) { let bucket = byte / 64; let bit = byte % 64; - (self.0).0[bucket as usize] |= 1 << bit; + (self.0).0[usize::from(bucket)] |= 1 << bit; } /// Add an inclusive range of bytes. pub fn add_all(&mut self, start: u8, end: u8) { - for b in (start as u64..end as u64 + 1).map(|b| b as u8) { + for b in start..=end { self.add(b); } } @@ -347,12 +338,12 @@ impl ByteSet { pub fn remove(&mut self, byte: u8) { let bucket = byte / 64; let bit = byte % 64; - (self.0).0[bucket as usize] &= !(1 << bit); + (self.0).0[usize::from(bucket)] &= !(1 << bit); } /// Remove an inclusive range of bytes. pub fn remove_all(&mut self, start: u8, end: u8) { - for b in (start as u64..end as u64 + 1).map(|b| b as u8) { + for b in start..=end { self.remove(b); } } @@ -361,7 +352,7 @@ impl ByteSet { pub fn contains(&self, byte: u8) -> bool { let bucket = byte / 64; let bit = byte % 64; - (self.0).0[bucket as usize] & (1 << bit) > 0 + (self.0).0[usize::from(bucket)] & (1 << bit) > 0 } } @@ -478,27 +469,27 @@ impl Captures for NoCaptures { /// NoError provides an error type for matchers that never produce errors. /// -/// This error type implements the `std::error::Error` and `fmt::Display` +/// This error type implements the `std::error::Error` and `std::fmt::Display` /// traits for use in matcher implementations that can never produce errors. /// -/// The `fmt::Debug` and `fmt::Display` impls for this type panics. +/// The `std::fmt::Debug` and `std::fmt::Display` impls for this type panics. #[derive(Debug, Eq, PartialEq)] pub struct NoError(()); -impl ::std::error::Error for NoError { +impl std::error::Error for NoError { fn description(&self) -> &str { "no error" } } -impl fmt::Display for NoError { - fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result { +impl std::fmt::Display for NoError { + fn fmt(&self, _: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { panic!("BUG for NoError: an impossible error occurred") } } -impl From for io::Error { - fn from(_: NoError) -> io::Error { +impl From for std::io::Error { + fn from(_: NoError) -> std::io::Error { panic!("BUG for NoError: an impossible error occurred") } } @@ -547,7 +538,7 @@ pub trait Matcher { /// use the `NoError` type in this crate. In the future, when the "never" /// (spelled `!`) type is stabilized, then it should probably be used /// instead. - type Error: fmt::Display; + type Error: std::fmt::Display; /// Returns the start and end byte range of the first match in `haystack` /// after `at`, where the byte offsets are relative to that start of diff --git a/crates/matcher/tests/test_matcher.rs b/crates/matcher/tests/test_matcher.rs index ebfb8be2..14ab49b3 100644 --- a/crates/matcher/tests/test_matcher.rs +++ b/crates/matcher/tests/test_matcher.rs @@ -1,5 +1,7 @@ -use grep_matcher::{Captures, Match, Matcher}; -use regex::bytes::Regex; +use { + grep_matcher::{Captures, Match, Matcher}, + regex::bytes::Regex, +}; use crate::util::{RegexMatcher, RegexMatcherNoCaps}; diff --git a/crates/matcher/tests/util.rs b/crates/matcher/tests/util.rs index c99d55c7..38954ead 100644 --- a/crates/matcher/tests/util.rs +++ b/crates/matcher/tests/util.rs @@ -1,28 +1,29 @@ use std::collections::HashMap; -use std::result; -use grep_matcher::{Captures, Match, Matcher, NoCaptures, NoError}; -use regex::bytes::{CaptureLocations, Regex}; +use { + grep_matcher::{Captures, Match, Matcher, NoCaptures, NoError}, + regex::bytes::{CaptureLocations, Regex}, +}; #[derive(Debug)] -pub struct RegexMatcher { +pub(crate) struct RegexMatcher { pub re: Regex, pub names: HashMap, } impl RegexMatcher { - pub fn new(re: Regex) -> RegexMatcher { + pub(crate) fn new(re: Regex) -> RegexMatcher { let mut names = HashMap::new(); for (i, optional_name) in re.capture_names().enumerate() { if let Some(name) = optional_name { names.insert(name.to_string(), i); } } - RegexMatcher { re: re, names: names } + RegexMatcher { re, names } } } -type Result = result::Result; +type Result = std::result::Result; impl Matcher for RegexMatcher { type Captures = RegexCaptures; @@ -63,7 +64,7 @@ impl Matcher for RegexMatcher { } #[derive(Debug)] -pub struct RegexMatcherNoCaps(pub Regex); +pub(crate) struct RegexMatcherNoCaps(pub(crate) Regex); impl Matcher for RegexMatcherNoCaps { type Captures = NoCaptures; @@ -82,7 +83,7 @@ impl Matcher for RegexMatcherNoCaps { } #[derive(Clone, Debug)] -pub struct RegexCaptures(CaptureLocations); +pub(crate) struct RegexCaptures(CaptureLocations); impl Captures for RegexCaptures { fn len(&self) -> usize {