diff --git a/Cargo.lock b/Cargo.lock index 7be0552b..8e45d316 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -319,9 +319,9 @@ checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" [[package]] name = "memmap2" -version = "0.5.10" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" +checksum = "43a5a03cefb0d953ec0be133036f14e109412fa594edc2f77227249db66cc3ed" dependencies = [ "libc", ] diff --git a/crates/searcher/Cargo.toml b/crates/searcher/Cargo.toml index 579d1370..5508bb0d 100644 --- a/crates/searcher/Cargo.toml +++ b/crates/searcher/Cargo.toml @@ -11,16 +11,16 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/searcher" readme = "README.md" keywords = ["regex", "grep", "egrep", "search", "pattern"] license = "Unlicense OR MIT" -edition = "2018" +edition = "2021" [dependencies] -bstr = { version = "1.6.0", default-features = false, features = ["std"] } -encoding_rs = "0.8.14" -encoding_rs_io = "0.1.6" +bstr = { version = "1.6.2", default-features = false, features = ["std"] } +encoding_rs = "0.8.33" +encoding_rs_io = "0.1.7" grep-matcher = { version = "0.1.6", path = "../matcher" } -log = "0.4.5" -memchr = "2.6.2" -memmap = { package = "memmap2", version = "0.5.3" } +log = "0.4.20" +memchr = "2.6.3" +memmap = { package = "memmap2", version = "0.8.0" } [dev-dependencies] grep-regex = { version = "0.1.11", path = "../regex" } diff --git a/crates/searcher/src/lib.rs b/crates/searcher/src/lib.rs index 20d38ffe..4f5bd159 100644 --- a/crates/searcher/src/lib.rs +++ b/crates/searcher/src/lib.rs @@ -38,12 +38,12 @@ This example shows how to execute the searcher and read the search results using the [`UTF8`](sinks::UTF8) implementation of `Sink`. ``` -use std::error::Error; - -use grep_matcher::Matcher; -use grep_regex::RegexMatcher; -use grep_searcher::Searcher; -use grep_searcher::sinks::UTF8; +use { + grep_matcher::Matcher, + grep_regex::RegexMatcher, + grep_searcher::Searcher, + grep_searcher::sinks::UTF8, +}; const SHERLOCK: &'static [u8] = b"\ For the Doctor Watsons of this world, as opposed to the Sherlock @@ -54,28 +54,26 @@ but Doctor Watson has to have it taken out for him and dusted, and exhibited clearly, with a label attached. "; -# fn main() { example().unwrap() } -fn example() -> Result<(), Box> { - let matcher = RegexMatcher::new(r"Doctor \w+")?; - let mut matches: Vec<(u64, String)> = vec![]; - Searcher::new().search_slice(&matcher, SHERLOCK, UTF8(|lnum, line| { - // We are guaranteed to find a match, so the unwrap is OK. - let mymatch = matcher.find(line.as_bytes())?.unwrap(); - matches.push((lnum, line[mymatch].to_string())); - Ok(true) - }))?; +let matcher = RegexMatcher::new(r"Doctor \w+")?; +let mut matches: Vec<(u64, String)> = vec![]; +Searcher::new().search_slice(&matcher, SHERLOCK, UTF8(|lnum, line| { + // We are guaranteed to find a match, so the unwrap is OK. + let mymatch = matcher.find(line.as_bytes())?.unwrap(); + matches.push((lnum, line[mymatch].to_string())); + Ok(true) +}))?; - assert_eq!(matches.len(), 2); - assert_eq!( - matches[0], - (1, "Doctor Watsons".to_string()) - ); - assert_eq!( - matches[1], - (5, "Doctor Watson".to_string()) - ); - Ok(()) -} +assert_eq!(matches.len(), 2); +assert_eq!( + matches[0], + (1, "Doctor Watsons".to_string()) +); +assert_eq!( + matches[1], + (5, "Doctor Watson".to_string()) +); + +# Ok::<(), Box>(()) ``` See also `examples/search-stdin.rs` from the root of this crate's directory @@ -85,14 +83,16 @@ searches stdin. #![deny(missing_docs)] -pub use crate::lines::{LineIter, LineStep}; -pub use crate::searcher::{ - BinaryDetection, ConfigError, Encoding, MmapChoice, Searcher, - SearcherBuilder, -}; -pub use crate::sink::sinks; -pub use crate::sink::{ - Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch, +pub use crate::{ + lines::{LineIter, LineStep}, + searcher::{ + BinaryDetection, ConfigError, Encoding, MmapChoice, Searcher, + SearcherBuilder, + }, + sink::{ + sinks, Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, + SinkMatch, + }, }; #[macro_use] diff --git a/crates/searcher/src/line_buffer.rs b/crates/searcher/src/line_buffer.rs index aaa81d26..a09d4e17 100644 --- a/crates/searcher/src/line_buffer.rs +++ b/crates/searcher/src/line_buffer.rs @@ -1,4 +1,3 @@ -use std::cmp; use std::io; use bstr::ByteSlice; @@ -15,7 +14,7 @@ pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 64 * (1 << 10); // 64 KB /// /// The default is to eagerly allocate without a limit. #[derive(Clone, Copy, Debug)] -pub enum BufferAllocation { +pub(crate) enum BufferAllocation { /// Attempt to expand the size of the buffer until either at least the next /// line fits into memory or until all available memory is exhausted. /// @@ -35,7 +34,7 @@ impl Default for BufferAllocation { /// Create a new error to be used when a configured allocation limit has been /// reached. -pub fn alloc_error(limit: usize) -> io::Error { +pub(crate) fn alloc_error(limit: usize) -> io::Error { let msg = format!("configured allocation limit ({}) exceeded", limit); io::Error::new(io::ErrorKind::Other, msg) } @@ -49,7 +48,7 @@ pub fn alloc_error(limit: usize) -> io::Error { /// using textual patterns. Of course, there are many cases in which this isn't /// true, which is why binary detection is disabled by default. #[derive(Clone, Copy, Debug)] -pub enum BinaryDetection { +pub(crate) enum BinaryDetection { /// No binary detection is performed. Data reported by the line buffer may /// contain arbitrary bytes. None, @@ -108,18 +107,18 @@ impl Default for Config { /// A builder for constructing line buffers. #[derive(Clone, Debug, Default)] -pub struct LineBufferBuilder { +pub(crate) struct LineBufferBuilder { config: Config, } impl LineBufferBuilder { /// Create a new builder for a buffer. - pub fn new() -> LineBufferBuilder { + pub(crate) fn new() -> LineBufferBuilder { LineBufferBuilder { config: Config::default() } } /// Create a new line buffer from this builder's configuration. - pub fn build(&self) -> LineBuffer { + pub(crate) fn build(&self) -> LineBuffer { LineBuffer { config: self.config, buf: vec![0; self.config.capacity], @@ -139,7 +138,10 @@ impl LineBufferBuilder { /// /// This is set to a reasonable default and probably shouldn't be changed /// unless there's a specific reason to do so. - pub fn capacity(&mut self, capacity: usize) -> &mut LineBufferBuilder { + pub(crate) fn capacity( + &mut self, + capacity: usize, + ) -> &mut LineBufferBuilder { self.config.capacity = capacity; self } @@ -155,7 +157,10 @@ impl LineBufferBuilder { /// is incomplete. /// /// By default, this is set to `b'\n'`. - pub fn line_terminator(&mut self, lineterm: u8) -> &mut LineBufferBuilder { + pub(crate) fn line_terminator( + &mut self, + lineterm: u8, + ) -> &mut LineBufferBuilder { self.config.lineterm = lineterm; self } @@ -174,7 +179,7 @@ impl LineBufferBuilder { /// a value of `0` is sensible, and in particular, will guarantee that a /// line buffer will never allocate additional memory beyond its initial /// capacity. - pub fn buffer_alloc( + pub(crate) fn buffer_alloc( &mut self, behavior: BufferAllocation, ) -> &mut LineBufferBuilder { @@ -188,7 +193,7 @@ impl LineBufferBuilder { /// /// By default, this is disabled. In general, binary detection should be /// viewed as an imperfect heuristic. - pub fn binary_detection( + pub(crate) fn binary_detection( &mut self, detection: BinaryDetection, ) -> &mut LineBufferBuilder { @@ -200,7 +205,7 @@ impl LineBufferBuilder { /// A line buffer reader efficiently reads a line oriented buffer from an /// arbitrary reader. #[derive(Debug)] -pub struct LineBufferReader<'b, R> { +pub(crate) struct LineBufferReader<'b, R> { rdr: R, line_buffer: &'b mut LineBuffer, } @@ -211,7 +216,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> { /// /// This does not change the binary detection behavior of the given line /// buffer. - pub fn new( + pub(crate) fn new( rdr: R, line_buffer: &'b mut LineBuffer, ) -> LineBufferReader<'b, R> { @@ -225,13 +230,13 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> { /// correspond to an offset in memory. It is typically used for reporting /// purposes. It can also be used for counting the number of bytes that /// have been searched. - pub fn absolute_byte_offset(&self) -> u64 { + pub(crate) fn absolute_byte_offset(&self) -> u64 { self.line_buffer.absolute_byte_offset() } /// If binary data was detected, then this returns the absolute byte offset /// at which binary data was initially found. - pub fn binary_byte_offset(&self) -> Option { + pub(crate) fn binary_byte_offset(&self) -> Option { self.line_buffer.binary_byte_offset() } @@ -248,25 +253,25 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> { /// This forwards any errors returned by the underlying reader, and will /// also return an error if the buffer must be expanded past its allocation /// limit, as governed by the buffer allocation strategy. - pub fn fill(&mut self) -> Result { + pub(crate) fn fill(&mut self) -> Result { self.line_buffer.fill(&mut self.rdr) } /// Return the contents of this buffer. - pub fn buffer(&self) -> &[u8] { + pub(crate) fn buffer(&self) -> &[u8] { self.line_buffer.buffer() } /// Return the buffer as a BStr, used for convenient equality checking /// in tests only. #[cfg(test)] - fn bstr(&self) -> &::bstr::BStr { + fn bstr(&self) -> &bstr::BStr { self.buffer().as_bstr() } /// Consume the number of bytes provided. This must be less than or equal /// to the number of bytes returned by `buffer`. - pub fn consume(&mut self, amt: usize) { + pub(crate) fn consume(&mut self, amt: usize) { self.line_buffer.consume(amt); } @@ -286,7 +291,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> { /// Line buffers cannot be used directly, but instead must be used via the /// LineBufferReader. #[derive(Clone, Debug)] -pub struct LineBuffer { +pub(crate) struct LineBuffer { /// The configuration of this buffer. config: Config, /// The primary buffer with which to hold data. @@ -322,7 +327,7 @@ impl LineBuffer { /// /// This permits dynamically changing the binary detection strategy on /// an existing line buffer without needing to create a new one. - pub fn set_binary_detection(&mut self, binary: BinaryDetection) { + pub(crate) fn set_binary_detection(&mut self, binary: BinaryDetection) { self.config.binary = binary; } @@ -497,12 +502,12 @@ impl LineBuffer { } // `len` is used for computing the next allocation size. The capacity // is permitted to start at `0`, so we make sure it's at least `1`. - let len = cmp::max(1, self.buf.len()); + let len = std::cmp::max(1, self.buf.len()); let additional = match self.config.buffer_alloc { BufferAllocation::Eager => len * 2, BufferAllocation::Error(limit) => { let used = self.buf.len() - self.config.capacity; - let n = cmp::min(len * 2, limit - used); + let n = std::cmp::min(len * 2, limit - used); if n == 0 { return Err(alloc_error(self.config.capacity + limit)); } @@ -541,9 +546,9 @@ fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option { #[cfg(test)] mod tests { - use super::*; use bstr::{ByteSlice, ByteVec}; - use std::str; + + use super::*; const SHERLOCK: &'static str = "\ For the Doctor Watsons of this world, as opposed to the Sherlock diff --git a/crates/searcher/src/lines.rs b/crates/searcher/src/lines.rs index 5e47c9b3..98f54fa4 100644 --- a/crates/searcher/src/lines.rs +++ b/crates/searcher/src/lines.rs @@ -2,8 +2,10 @@ A collection of routines for performing operations on lines. */ -use bstr::ByteSlice; -use grep_matcher::{LineTerminator, Match}; +use { + bstr::ByteSlice, + grep_matcher::{LineTerminator, Match}, +}; /// An iterator over lines in a particular slice of bytes. /// @@ -21,10 +23,8 @@ impl<'b> LineIter<'b> { /// Create a new line iterator that yields lines in the given bytes that /// are terminated by `line_term`. pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> { - LineIter { - bytes: bytes, - stepper: LineStep::new(line_term, 0, bytes.len()), - } + let stepper = LineStep::new(line_term, 0, bytes.len()); + LineIter { bytes, stepper } } } @@ -61,7 +61,7 @@ impl LineStep { /// /// This panics if `start` is not less than or equal to `end`. pub fn new(line_term: u8, start: usize, end: usize) -> LineStep { - LineStep { line_term, pos: start, end: end } + LineStep { line_term, pos: start, end } } /// Return the start and end position of the next line in the given bytes. @@ -108,14 +108,17 @@ impl LineStep { } /// Count the number of occurrences of `line_term` in `bytes`. -pub fn count(bytes: &[u8], line_term: u8) -> u64 { +pub(crate) fn count(bytes: &[u8], line_term: u8) -> u64 { memchr::memchr_iter(line_term, bytes).count() as u64 } /// Given a line that possibly ends with a terminator, return that line without /// the terminator. #[inline(always)] -pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] { +pub(crate) fn without_terminator( + bytes: &[u8], + line_term: LineTerminator, +) -> &[u8] { let line_term = line_term.as_bytes(); let start = bytes.len().saturating_sub(line_term.len()); if bytes.get(start..) == Some(line_term) { @@ -129,7 +132,7 @@ pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] { /// /// Line terminators are considered part of the line they terminate. #[inline(always)] -pub fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match { +pub(crate) fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match { let line_start = bytes[..range.start()].rfind_byte(line_term).map_or(0, |i| i + 1); let line_end = @@ -151,7 +154,7 @@ pub fn locate(bytes: &[u8], line_term: u8, range: Match) -> Match { /// /// If `bytes` ends with a line terminator, then the terminator itself is /// considered part of the last line. -pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize { +pub(crate) fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize { preceding_by_pos(bytes, bytes.len(), line_term, count) } @@ -195,10 +198,9 @@ fn preceding_by_pos( #[cfg(test)] mod tests { - use super::*; use grep_matcher::Match; - use std::ops::Range; - use std::str; + + use super::*; const SHERLOCK: &'static str = "\ For the Doctor Watsons of this world, as opposed to the Sherlock @@ -222,7 +224,7 @@ and exhibited clearly, with a label attached.\ results } - fn line_ranges(text: &str) -> Vec> { + fn line_ranges(text: &str) -> Vec> { let mut results = vec![]; let mut it = LineStep::new(b'\n', 0, text.len()); while let Some(m) = it.next_match(text.as_bytes()) { diff --git a/crates/searcher/src/searcher/core.rs b/crates/searcher/src/searcher/core.rs index 95b4ba6a..e6836e6a 100644 --- a/crates/searcher/src/searcher/core.rs +++ b/crates/searcher/src/searcher/core.rs @@ -1,15 +1,16 @@ -use std::cmp; - use bstr::ByteSlice; -use crate::line_buffer::BinaryDetection; -use crate::lines::{self, LineStep}; -use crate::searcher::{Config, Range, Searcher}; -use crate::sink::{ - Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch, -}; use grep_matcher::{LineMatchKind, Matcher}; +use crate::{ + line_buffer::BinaryDetection, + lines::{self, LineStep}, + searcher::{Config, Range, Searcher}, + sink::{ + Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch, + }, +}; + enum FastMatchResult { Continue, Stop, @@ -17,7 +18,7 @@ enum FastMatchResult { } #[derive(Debug)] -pub struct Core<'s, M: 's, S> { +pub(crate) struct Core<'s, M: 's, S> { config: &'s Config, matcher: M, searcher: &'s Searcher, @@ -35,7 +36,7 @@ pub struct Core<'s, M: 's, S> { } impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { - pub fn new( + pub(crate) fn new( searcher: &'s Searcher, matcher: M, sink: S, @@ -45,14 +46,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { if searcher.config.line_number { Some(1) } else { None }; let core = Core { config: &searcher.config, - matcher: matcher, - searcher: searcher, - sink: sink, - binary: binary, + matcher, + searcher, + sink, + binary, pos: 0, absolute_byte_offset: 0, binary_byte_offset: None, - line_number: line_number, + line_number, last_line_counted: 0, last_line_visited: 0, after_context_left: 0, @@ -69,23 +70,23 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { core } - pub fn pos(&self) -> usize { + pub(crate) fn pos(&self) -> usize { self.pos } - pub fn set_pos(&mut self, pos: usize) { + pub(crate) fn set_pos(&mut self, pos: usize) { self.pos = pos; } - pub fn binary_byte_offset(&self) -> Option { + pub(crate) fn binary_byte_offset(&self) -> Option { self.binary_byte_offset.map(|offset| offset as u64) } - pub fn matcher(&self) -> &M { + pub(crate) fn matcher(&self) -> &M { &self.matcher } - pub fn matched( + pub(crate) fn matched( &mut self, buf: &[u8], range: &Range, @@ -93,18 +94,18 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { self.sink_matched(buf, range) } - pub fn binary_data( + pub(crate) fn binary_data( &mut self, binary_byte_offset: u64, ) -> Result { self.sink.binary_data(&self.searcher, binary_byte_offset) } - pub fn begin(&mut self) -> Result { + pub(crate) fn begin(&mut self) -> Result { self.sink.begin(&self.searcher) } - pub fn finish( + pub(crate) fn finish( &mut self, byte_count: u64, binary_byte_offset: Option, @@ -115,7 +116,10 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { ) } - pub fn match_by_line(&mut self, buf: &[u8]) -> Result { + pub(crate) fn match_by_line( + &mut self, + buf: &[u8], + ) -> Result { if self.is_line_by_line_fast() { match self.match_by_line_fast(buf)? { FastMatchResult::SwitchToSlow => self.match_by_line_slow(buf), @@ -127,7 +131,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { } } - pub fn roll(&mut self, buf: &[u8]) -> usize { + pub(crate) fn roll(&mut self, buf: &[u8]) -> usize { let consumed = if self.config.max_context() == 0 { buf.len() } else { @@ -141,7 +145,8 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { self.config.line_term.as_byte(), self.config.max_context(), ); - let consumed = cmp::max(context_start, self.last_line_visited); + let consumed = + std::cmp::max(context_start, self.last_line_visited); consumed }; self.count_lines(buf, consumed); @@ -152,7 +157,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { consumed } - pub fn detect_binary( + pub(crate) fn detect_binary( &mut self, buf: &[u8], range: &Range, @@ -177,7 +182,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { } } - pub fn before_context_by_line( + pub(crate) fn before_context_by_line( &mut self, buf: &[u8], upto: usize, @@ -213,7 +218,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { Ok(true) } - pub fn after_context_by_line( + pub(crate) fn after_context_by_line( &mut self, buf: &[u8], upto: usize, @@ -238,7 +243,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> { Ok(true) } - pub fn other_context_by_line( + pub(crate) fn other_context_by_line( &mut self, buf: &[u8], upto: usize, diff --git a/crates/searcher/src/searcher/glue.rs b/crates/searcher/src/searcher/glue.rs index 217c70e4..006afad3 100644 --- a/crates/searcher/src/searcher/glue.rs +++ b/crates/searcher/src/searcher/glue.rs @@ -1,16 +1,14 @@ -use std::cmp; -use std::io; - -use crate::line_buffer::{LineBufferReader, DEFAULT_BUFFER_CAPACITY}; -use crate::lines::{self, LineStep}; -use crate::sink::{Sink, SinkError}; use grep_matcher::Matcher; -use crate::searcher::core::Core; -use crate::searcher::{Config, Range, Searcher}; +use crate::{ + line_buffer::{LineBufferReader, DEFAULT_BUFFER_CAPACITY}, + lines::{self, LineStep}, + searcher::{core::Core, Config, Range, Searcher}, + sink::{Sink, SinkError}, +}; #[derive(Debug)] -pub struct ReadByLine<'s, M, R, S> { +pub(crate) struct ReadByLine<'s, M, R, S> { config: &'s Config, core: Core<'s, M, S>, rdr: LineBufferReader<'s, R>, @@ -19,10 +17,10 @@ pub struct ReadByLine<'s, M, R, S> { impl<'s, M, R, S> ReadByLine<'s, M, R, S> where M: Matcher, - R: io::Read, + R: std::io::Read, S: Sink, { - pub fn new( + pub(crate) fn new( searcher: &'s Searcher, matcher: M, read_from: LineBufferReader<'s, R>, @@ -37,7 +35,7 @@ where } } - pub fn run(mut self) -> Result<(), S::Error> { + pub(crate) fn run(mut self) -> Result<(), S::Error> { if self.core.begin()? { while self.fill()? && self.core.match_by_line(self.rdr.buffer())? { } @@ -87,13 +85,13 @@ where } #[derive(Debug)] -pub struct SliceByLine<'s, M, S> { +pub(crate) struct SliceByLine<'s, M, S> { core: Core<'s, M, S>, slice: &'s [u8], } impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> { - pub fn new( + pub(crate) fn new( searcher: &'s Searcher, matcher: M, slice: &'s [u8], @@ -103,14 +101,14 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> { SliceByLine { core: Core::new(searcher, matcher, write_to, true), - slice: slice, + slice, } } - pub fn run(mut self) -> Result<(), S::Error> { + pub(crate) fn run(mut self) -> Result<(), S::Error> { if self.core.begin()? { let binary_upto = - cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY); + std::cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY); let binary_range = Range::new(0, binary_upto); if !self.core.detect_binary(self.slice, &binary_range)? { while !self.slice[self.core.pos()..].is_empty() @@ -132,7 +130,7 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> { } #[derive(Debug)] -pub struct MultiLine<'s, M, S> { +pub(crate) struct MultiLine<'s, M, S> { config: &'s Config, core: Core<'s, M, S>, slice: &'s [u8], @@ -140,7 +138,7 @@ pub struct MultiLine<'s, M, S> { } impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> { - pub fn new( + pub(crate) fn new( searcher: &'s Searcher, matcher: M, slice: &'s [u8], @@ -151,15 +149,15 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> { MultiLine { config: &searcher.config, core: Core::new(searcher, matcher, write_to, true), - slice: slice, + slice, last_match: None, } } - pub fn run(mut self) -> Result<(), S::Error> { + pub(crate) fn run(mut self) -> Result<(), S::Error> { if self.core.begin()? { let binary_upto = - cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY); + std::cmp::min(self.slice.len(), DEFAULT_BUFFER_CAPACITY); let binary_range = Range::new(0, binary_upto); if !self.core.detect_binary(self.slice, &binary_range)? { let mut keepgoing = true; @@ -347,8 +345,10 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> { #[cfg(test)] mod tests { - use crate::searcher::{BinaryDetection, SearcherBuilder}; - use crate::testutil::{KitchenSink, RegexMatcher, SearcherTester}; + use crate::{ + searcher::{BinaryDetection, SearcherBuilder}, + testutil::{KitchenSink, RegexMatcher, SearcherTester}, + }; use super::*; diff --git a/crates/searcher/src/searcher/mmap.rs b/crates/searcher/src/searcher/mmap.rs index 0ab2d53f..3774a621 100644 --- a/crates/searcher/src/searcher/mmap.rs +++ b/crates/searcher/src/searcher/mmap.rs @@ -1,5 +1,4 @@ -use std::fs::File; -use std::path::Path; +use std::{fs::File, path::Path}; use memmap::Mmap; diff --git a/crates/searcher/src/searcher/mod.rs b/crates/searcher/src/searcher/mod.rs index b6b8f38f..abbc0209 100644 --- a/crates/searcher/src/searcher/mod.rs +++ b/crates/searcher/src/searcher/mod.rs @@ -1,19 +1,25 @@ -use std::cell::RefCell; -use std::cmp; -use std::fmt; -use std::fs::File; -use std::io::{self, Read}; -use std::path::Path; - -use crate::line_buffer::{ - self, alloc_error, BufferAllocation, LineBuffer, LineBufferBuilder, - LineBufferReader, DEFAULT_BUFFER_CAPACITY, +use std::{ + cell::RefCell, + cmp, + fs::File, + io::{self, Read}, + path::Path, +}; + +use { + encoding_rs, + encoding_rs_io::DecodeReaderBytesBuilder, + grep_matcher::{LineTerminator, Match, Matcher}, +}; + +use crate::{ + line_buffer::{ + self, alloc_error, BufferAllocation, LineBuffer, LineBufferBuilder, + LineBufferReader, DEFAULT_BUFFER_CAPACITY, + }, + searcher::glue::{MultiLine, ReadByLine, SliceByLine}, + sink::{Sink, SinkError}, }; -use crate::searcher::glue::{MultiLine, ReadByLine, SliceByLine}; -use crate::sink::{Sink, SinkError}; -use encoding_rs; -use encoding_rs_io::DecodeReaderBytesBuilder; -use grep_matcher::{LineTerminator, Match, Matcher}; pub use self::mmap::MmapChoice; @@ -232,6 +238,7 @@ impl Config { /// This error occurs when a non-sensical configuration is present when trying /// to construct a `Searcher` from a `SearcherBuilder`. #[derive(Clone, Debug, Eq, PartialEq)] +#[non_exhaustive] pub enum ConfigError { /// Indicates that the heap limit configuration prevents all possible /// search strategies from being used. For example, if the heap limit is @@ -250,23 +257,12 @@ pub enum ConfigError { /// The provided encoding label that could not be found. label: Vec, }, - /// Hints that destructuring should not be exhaustive. - /// - /// This enum may grow additional variants, so this makes sure clients - /// don't count on exhaustive matching. (Otherwise, adding a new variant - /// could break existing code.) - #[doc(hidden)] - __Nonexhaustive, } -impl ::std::error::Error for ConfigError { - fn description(&self) -> &str { - "grep-searcher configuration error" - } -} +impl std::error::Error for ConfigError {} -impl fmt::Display for ConfigError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { +impl std::fmt::Display for ConfigError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match *self { ConfigError::SearchUnavailable => { write!(f, "grep config error: no available searchers") @@ -284,7 +280,6 @@ impl fmt::Display for ConfigError { "grep config error: unknown encoding: {}", String::from_utf8_lossy(label), ), - _ => panic!("BUG: unexpected variant found"), } } } @@ -331,8 +326,8 @@ impl SearcherBuilder { .bom_sniffing(self.config.bom_sniffing); Searcher { - config: config, - decode_builder: decode_builder, + config, + decode_builder, decode_buffer: RefCell::new(vec![0; 8 * (1 << 10)]), line_buffer: RefCell::new(self.config.line_buffer()), multi_line_buffer: RefCell::new(vec![]), @@ -676,9 +671,9 @@ impl Searcher { log::trace!("{:?}: searching via memory map", path); return self.search_slice(matcher, &mmap, write_to); } - // Fast path for multi-line searches of files when memory maps are - // not enabled. This pre-allocates a buffer roughly the size of the - // file, which isn't possible when searching an arbitrary io::Read. + // Fast path for multi-line searches of files when memory maps are not + // enabled. This pre-allocates a buffer roughly the size of the file, + // which isn't possible when searching an arbitrary std::io::Read. if self.multi_line_with_matcher(&matcher) { log::trace!( "{:?}: reading entire file on to heap for mulitline", @@ -699,8 +694,8 @@ impl Searcher { } } - /// Execute a search over any implementation of `io::Read` and write the - /// results to the given sink. + /// Execute a search over any implementation of `std::io::Read` and write + /// the results to the given sink. /// /// When possible, this implementation will search the reader incrementally /// without reading it into memory. In some cases---for example, if multi @@ -1016,9 +1011,10 @@ fn slice_has_bom(slice: &[u8]) -> bool { #[cfg(test)] mod tests { - use super::*; use crate::testutil::{KitchenSink, RegexMatcher}; + use super::*; + #[test] fn config_error_heap_limit() { let matcher = RegexMatcher::new(""); diff --git a/crates/searcher/src/sink.rs b/crates/searcher/src/sink.rs index 8621e73a..67d68987 100644 --- a/crates/searcher/src/sink.rs +++ b/crates/searcher/src/sink.rs @@ -1,23 +1,24 @@ -use std::error; -use std::fmt; use std::io; use grep_matcher::LineTerminator; -use crate::lines::LineIter; -use crate::searcher::{ConfigError, Searcher}; +use crate::{ + lines::LineIter, + searcher::{ConfigError, Searcher}, +}; /// A trait that describes errors that can be reported by searchers and /// implementations of `Sink`. /// /// Unless you have a specialized use case, you probably don't need to -/// implement this trait explicitly. It's likely that using `io::Error` (which -/// implements this trait) for your error type is good enough, largely because -/// most errors that occur during search will likely be an `io::Error`. +/// implement this trait explicitly. It's likely that using `std::io::Error` +/// (which implements this trait) for your error type is good enough, +/// largely because most errors that occur during search will likely be an +/// `std::io::Error`. pub trait SinkError: Sized { /// A constructor for converting any value that satisfies the - /// `fmt::Display` trait into an error. - fn error_message(message: T) -> Self; + /// `std::fmt::Display` trait into an error. + fn error_message(message: T) -> Self; /// A constructor for converting I/O errors that occur while searching into /// an error of this type. @@ -36,10 +37,10 @@ pub trait SinkError: Sized { } } -/// An `io::Error` can be used as an error for `Sink` implementations out of -/// the box. +/// An `std::io::Error` can be used as an error for `Sink` implementations out +/// of the box. impl SinkError for io::Error { - fn error_message(message: T) -> io::Error { + fn error_message(message: T) -> io::Error { io::Error::new(io::ErrorKind::Other, message.to_string()) } @@ -48,11 +49,13 @@ impl SinkError for io::Error { } } -/// A `Box` can be used as an error for `Sink` +/// A `Box` can be used as an error for `Sink` /// implementations out of the box. -impl SinkError for Box { - fn error_message(message: T) -> Box { - Box::::from(message.to_string()) +impl SinkError for Box { + fn error_message( + message: T, + ) -> Box { + Box::::from(message.to_string()) } } @@ -74,7 +77,7 @@ impl SinkError for Box { /// /// * What to do when a match is found. Callers must provide this. /// * What to do when an error occurs. Callers must provide this via the -/// [`SinkError`] trait. Generally, callers can just use `io::Error` for +/// [`SinkError`] trait. Generally, callers can just use `std::io::Error` for /// this, which already implements `SinkError`. /// * What to do when a contextual line is found. By default, these are /// ignored. @@ -408,13 +411,14 @@ impl<'b> SinkMatch<'b> { self.line_number } - /// TODO + /// Exposes as much of the underlying buffer that was search as possible. #[inline] pub fn buffer(&self) -> &'b [u8] { self.buffer } - /// TODO + /// Returns a range that corresponds to where [`SinkMatch::bytes`] appears + /// in [`SinkMatch::buffer`]. #[inline] pub fn bytes_range_in_buffer(&self) -> std::ops::Range { self.bytes_range_in_buffer.clone() @@ -506,16 +510,16 @@ impl<'b> SinkContext<'b> { /// an error is reported at the first match and searching stops. /// * Context lines, context breaks and summary data reported at the end of /// a search are all ignored. -/// * Implementors are forced to use `io::Error` as their error type. +/// * Implementors are forced to use `std::io::Error` as their error type. /// /// If you need more flexibility, then you're advised to implement the `Sink` /// trait directly. pub mod sinks { use std::io; - use std::str; + + use crate::searcher::Searcher; use super::{Sink, SinkError, SinkMatch}; - use crate::searcher::Searcher; /// A sink that provides line numbers and matches as strings while ignoring /// everything else. @@ -527,8 +531,8 @@ pub mod sinks { /// /// The closure accepts two parameters: a line number and a UTF-8 string /// containing the matched data. The closure returns a - /// `Result`. If the `bool` is `false`, then the search - /// stops immediately. Otherwise, searching continues. + /// `Result`. If the `bool` is `false`, then the + /// search stops immediately. Otherwise, searching continues. /// /// If multi line mode was enabled, the line number refers to the line /// number of the first line in the match. @@ -548,7 +552,7 @@ pub mod sinks { _searcher: &Searcher, mat: &SinkMatch<'_>, ) -> Result { - let matched = match str::from_utf8(mat.bytes()) { + let matched = match std::str::from_utf8(mat.bytes()) { Ok(matched) => matched, Err(err) => return Err(io::Error::error_message(err)), }; @@ -575,8 +579,8 @@ pub mod sinks { /// /// The closure accepts two parameters: a line number and a UTF-8 string /// containing the matched data. The closure returns a - /// `Result`. If the `bool` is `false`, then the search - /// stops immediately. Otherwise, searching continues. + /// `Result`. If the `bool` is `false`, then the + /// search stops immediately. Otherwise, searching continues. /// /// If multi line mode was enabled, the line number refers to the line /// number of the first line in the match. @@ -598,7 +602,7 @@ pub mod sinks { ) -> Result { use std::borrow::Cow; - let matched = match str::from_utf8(mat.bytes()) { + let matched = match std::str::from_utf8(mat.bytes()) { Ok(matched) => Cow::Borrowed(matched), // TODO: In theory, it should be possible to amortize // allocation here, but `std` doesn't provide such an API. @@ -624,9 +628,9 @@ pub mod sinks { /// searcher was not configured to count lines. /// /// The closure accepts two parameters: a line number and a raw byte string - /// containing the matched data. The closure returns a `Result`. If the `bool` is `false`, then the search stops - /// immediately. Otherwise, searching continues. + /// containing the matched data. The closure returns a + /// `Result`. If the `bool` is `false`, then the + /// search stops immediately. Otherwise, searching continues. /// /// If multi line mode was enabled, the line number refers to the line /// number of the first line in the match. diff --git a/crates/searcher/src/testutil.rs b/crates/searcher/src/testutil.rs index 659f9cd4..49d28e5e 100644 --- a/crates/searcher/src/testutil.rs +++ b/crates/searcher/src/testutil.rs @@ -1,14 +1,17 @@ use std::io::{self, Write}; -use std::str; -use bstr::ByteSlice; -use grep_matcher::{ - LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError, +use { + bstr::ByteSlice, + grep_matcher::{ + LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError, + }, + regex::bytes::{Regex, RegexBuilder}, }; -use regex::bytes::{Regex, RegexBuilder}; -use crate::searcher::{BinaryDetection, Searcher, SearcherBuilder}; -use crate::sink::{Sink, SinkContext, SinkFinish, SinkMatch}; +use crate::{ + searcher::{BinaryDetection, Searcher, SearcherBuilder}, + sink::{Sink, SinkContext, SinkFinish, SinkMatch}, +}; /// A simple regex matcher. /// @@ -18,7 +21,7 @@ use crate::sink::{Sink, SinkContext, SinkFinish, SinkMatch}; /// this optimization is detected automatically by inspecting and possibly /// modifying the regex itself.) #[derive(Clone, Debug)] -pub struct RegexMatcher { +pub(crate) struct RegexMatcher { regex: Regex, line_term: Option, every_line_is_candidate: bool, @@ -26,22 +29,18 @@ pub struct RegexMatcher { impl RegexMatcher { /// Create a new regex matcher. - pub fn new(pattern: &str) -> RegexMatcher { + pub(crate) fn new(pattern: &str) -> RegexMatcher { let regex = RegexBuilder::new(pattern) .multi_line(true) // permits ^ and $ to match at \n boundaries .build() .unwrap(); - RegexMatcher { - regex: regex, - line_term: None, - every_line_is_candidate: false, - } + RegexMatcher { regex, line_term: None, every_line_is_candidate: false } } /// Forcefully set the line terminator of this matcher. /// /// By default, this matcher has no line terminator set. - pub fn set_line_term( + pub(crate) fn set_line_term( &mut self, line_term: Option, ) -> &mut RegexMatcher { @@ -52,7 +51,10 @@ impl RegexMatcher { /// Whether to return every line as a candidate or not. /// /// This forces searchers to handle the case of reporting a false positive. - pub fn every_line_is_candidate(&mut self, yes: bool) -> &mut RegexMatcher { + pub(crate) fn every_line_is_candidate( + &mut self, + yes: bool, + ) -> &mut RegexMatcher { self.every_line_is_candidate = yes; self } @@ -108,17 +110,17 @@ impl Matcher for RegexMatcher { /// This is useful for tests because it lets us easily confirm whether data /// is being passed to Sink correctly. #[derive(Clone, Debug)] -pub struct KitchenSink(Vec); +pub(crate) struct KitchenSink(Vec); impl KitchenSink { /// Create a new implementation of Sink that includes everything in the /// kitchen. - pub fn new() -> KitchenSink { + pub(crate) fn new() -> KitchenSink { KitchenSink(vec![]) } /// Return the data written to this sink. - pub fn as_bytes(&self) -> &[u8] { + pub(crate) fn as_bytes(&self) -> &[u8] { &self.0 } } @@ -199,7 +201,7 @@ impl Sink for KitchenSink { /// The tester works by assuming you want to test all pertinent code paths. /// These can be trimmed down as necessary via the various builder methods. #[derive(Debug)] -pub struct SearcherTester { +pub(crate) struct SearcherTester { haystack: String, pattern: String, filter: Option<::regex::Regex>, @@ -221,7 +223,7 @@ pub struct SearcherTester { impl SearcherTester { /// Create a new tester for testing searchers. - pub fn new(haystack: &str, pattern: &str) -> SearcherTester { + pub(crate) fn new(haystack: &str, pattern: &str) -> SearcherTester { SearcherTester { haystack: haystack.to_string(), pattern: pattern.to_string(), @@ -245,7 +247,7 @@ impl SearcherTester { /// Execute the test. If the test succeeds, then this returns successfully. /// If the test fails, then it panics with an informative message. - pub fn test(&self) { + pub(crate) fn test(&self) { // Check for configuration errors. if self.expected_no_line_number.is_none() { panic!("an 'expected' string with NO line numbers must be given"); @@ -300,7 +302,7 @@ impl SearcherTester { /// printf debugging and only want one particular test configuration to /// execute. #[allow(dead_code)] - pub fn filter(&mut self, pattern: &str) -> &mut SearcherTester { + pub(crate) fn filter(&mut self, pattern: &str) -> &mut SearcherTester { self.filter = Some(::regex::Regex::new(pattern).unwrap()); self } @@ -311,13 +313,13 @@ impl SearcherTester { /// Note that in order to see these in tests that aren't failing, you'll /// want to use `cargo test -- --nocapture`. #[allow(dead_code)] - pub fn print_labels(&mut self, yes: bool) -> &mut SearcherTester { + pub(crate) fn print_labels(&mut self, yes: bool) -> &mut SearcherTester { self.print_labels = yes; self } /// Set the expected search results, without line numbers. - pub fn expected_no_line_number( + pub(crate) fn expected_no_line_number( &mut self, exp: &str, ) -> &mut SearcherTester { @@ -326,7 +328,7 @@ impl SearcherTester { } /// Set the expected search results, with line numbers. - pub fn expected_with_line_number( + pub(crate) fn expected_with_line_number( &mut self, exp: &str, ) -> &mut SearcherTester { @@ -337,7 +339,7 @@ impl SearcherTester { /// Set the expected search results, without line numbers, when performing /// a search on a slice. When not present, `expected_no_line_number` is /// used instead. - pub fn expected_slice_no_line_number( + pub(crate) fn expected_slice_no_line_number( &mut self, exp: &str, ) -> &mut SearcherTester { @@ -349,7 +351,7 @@ impl SearcherTester { /// search on a slice. When not present, `expected_with_line_number` is /// used instead. #[allow(dead_code)] - pub fn expected_slice_with_line_number( + pub(crate) fn expected_slice_with_line_number( &mut self, exp: &str, ) -> &mut SearcherTester { @@ -362,7 +364,7 @@ impl SearcherTester { /// This is enabled by default. When enabled, the string that is expected /// when line numbers are present must be provided. Otherwise, the expected /// string isn't required. - pub fn line_number(&mut self, yes: bool) -> &mut SearcherTester { + pub(crate) fn line_number(&mut self, yes: bool) -> &mut SearcherTester { self.line_number = yes; self } @@ -370,7 +372,7 @@ impl SearcherTester { /// Whether to test search using the line-by-line searcher or not. /// /// By default, this is enabled. - pub fn by_line(&mut self, yes: bool) -> &mut SearcherTester { + pub(crate) fn by_line(&mut self, yes: bool) -> &mut SearcherTester { self.by_line = yes; self } @@ -379,7 +381,7 @@ impl SearcherTester { /// /// By default, this is enabled. #[allow(dead_code)] - pub fn multi_line(&mut self, yes: bool) -> &mut SearcherTester { + pub(crate) fn multi_line(&mut self, yes: bool) -> &mut SearcherTester { self.multi_line = yes; self } @@ -387,7 +389,7 @@ impl SearcherTester { /// Whether to perform an inverted search or not. /// /// By default, this is disabled. - pub fn invert_match(&mut self, yes: bool) -> &mut SearcherTester { + pub(crate) fn invert_match(&mut self, yes: bool) -> &mut SearcherTester { self.invert_match = yes; self } @@ -395,7 +397,7 @@ impl SearcherTester { /// Whether to enable binary detection on all searches. /// /// By default, this is disabled. - pub fn binary_detection( + pub(crate) fn binary_detection( &mut self, detection: BinaryDetection, ) -> &mut SearcherTester { @@ -412,7 +414,10 @@ impl SearcherTester { /// impact the number of bytes searched when performing binary detection. /// For convenience, it can be useful to disable the automatic heap limit /// test. - pub fn auto_heap_limit(&mut self, yes: bool) -> &mut SearcherTester { + pub(crate) fn auto_heap_limit( + &mut self, + yes: bool, + ) -> &mut SearcherTester { self.auto_heap_limit = yes; self } @@ -420,7 +425,10 @@ impl SearcherTester { /// Set the number of lines to include in the "after" context. /// /// The default is `0`, which is equivalent to not printing any context. - pub fn after_context(&mut self, lines: usize) -> &mut SearcherTester { + pub(crate) fn after_context( + &mut self, + lines: usize, + ) -> &mut SearcherTester { self.after_context = lines; self } @@ -428,7 +436,10 @@ impl SearcherTester { /// Set the number of lines to include in the "before" context. /// /// The default is `0`, which is equivalent to not printing any context. - pub fn before_context(&mut self, lines: usize) -> &mut SearcherTester { + pub(crate) fn before_context( + &mut self, + lines: usize, + ) -> &mut SearcherTester { self.before_context = lines; self } @@ -440,7 +451,7 @@ impl SearcherTester { /// requesting an unbounded number of before and after contextual lines. /// /// This is disabled by default. - pub fn passthru(&mut self, yes: bool) -> &mut SearcherTester { + pub(crate) fn passthru(&mut self, yes: bool) -> &mut SearcherTester { self.passthru = yes; self }