From d011cea0534308ec62b2e04d3533ecd94f3341b4 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Mon, 29 Aug 2016 22:44:15 -0400 Subject: [PATCH] The search code is a mess, but... ... we now support inverted matches and line numbers! --- grep/src/search.rs | 1 + src/main.rs | 23 +++--- src/printer.rs | 28 ++++++-- src/search.rs | 171 +++++++++++++++++++++++++++++++++++++++++---- 4 files changed, 196 insertions(+), 27 deletions(-) diff --git a/grep/src/search.rs b/grep/src/search.rs index 44089391..6e35ec9e 100644 --- a/grep/src/search.rs +++ b/grep/src/search.rs @@ -150,6 +150,7 @@ impl GrepBuilder { try!(syntax::ExprBuilder::new() .allow_bytes(true) .unicode(true) + .case_insensitive(self.opts.case_insensitive) .parse(&self.pattern)); Ok(try!(nonl::remove(expr, self.opts.line_terminator))) } diff --git a/src/main.rs b/src/main.rs index 067b0847..ee5f5cf2 100644 --- a/src/main.rs +++ b/src/main.rs @@ -69,11 +69,13 @@ Options: --debug Show debug messages. --files Print each file that would be searched (but don't search). - -L, --follow Follow symlinks. --hidden Search hidden directories and files. -i, --ignore-case Case insensitive search. + -L, --follow Follow symlinks. + -n, --line-number Show line numbers (1-based). -t, --threads ARG The number of threads to use. Defaults to the number of logical CPUs. [default: 0] + -v, --invert-match Invert matching. "; #[derive(RustcDecodable)] @@ -86,6 +88,8 @@ struct Args { flag_follow: bool, flag_hidden: bool, flag_ignore_case: bool, + flag_invert_match: bool, + flag_line_number: bool, flag_threads: usize, } @@ -224,13 +228,16 @@ impl Worker { outbuf.clear(); let mut printer = self.args.printer(outbuf); { - let searcher = Searcher { - grep: &self.grep, - path: &path, - haystack: file, - inp: &mut self.inpbuf, - printer: &mut printer, - }; + let mut searcher = Searcher::new( + &mut self.inpbuf, + &mut printer, + &self.grep, + &path, + file, + ); + searcher = searcher.count(self.args.flag_count); + searcher = searcher.line_number(self.args.flag_line_number); + searcher = searcher.invert_match(self.args.flag_invert_match); if let Err(err) = searcher.run() { eprintln!("{}", err); } diff --git a/src/printer.rs b/src/printer.rs index e0885047..88fad900 100644 --- a/src/printer.rs +++ b/src/printer.rs @@ -1,14 +1,18 @@ use std::io; use std::path::Path; -use grep::Match; - macro_rules! wln { ($($tt:tt)*) => { let _ = writeln!($($tt)*); } } +macro_rules! w { + ($($tt:tt)*) => { + let _ = write!($($tt)*); + } +} + pub struct Printer { wtr: W, } @@ -40,15 +44,25 @@ impl Printer { &mut self, path: P, buf: &[u8], - m: &Match, + start: usize, + end: usize, + line_number: Option, ) { - let _ = self.wtr.write(path.as_ref().to_string_lossy().as_bytes()); - let _ = self.wtr.write(b":"); - let _ = self.wtr.write(&buf[m.start()..m.end()]); - let _ = self.wtr.write(b"\n"); + self.write(path.as_ref().to_string_lossy().as_bytes()); + self.write(b":"); + if let Some(line_number) = line_number { + self.write(line_number.to_string().as_bytes()); + self.write(b":"); + } + self.write(&buf[start..end]); + self.write(b"\n"); } pub fn binary_matched>(&mut self, path: P) { wln!(&mut self.wtr, "binary file {} matches", path.as_ref().display()); } + + fn write(&mut self, buf: &[u8]) { + let _ = self.wtr.write_all(buf); + } } diff --git a/src/search.rs b/src/search.rs index 7d1fc523..89b5c4a1 100644 --- a/src/search.rs +++ b/src/search.rs @@ -14,6 +14,7 @@ use memchr::{memchr, memrchr}; use printer::Printer; +/// The default read size (capacity of input buffer). const READ_SIZE: usize = 8 * (1<<10); /// Error describes errors that can occur while searching. @@ -57,37 +58,165 @@ impl fmt::Display for Error { } pub struct Searcher<'a, R, W: 'a> { - pub grep: &'a Grep, - pub path: &'a Path, - pub haystack: R, - pub inp: &'a mut InputBuffer, - pub printer: &'a mut Printer, + inp: &'a mut InputBuffer, + printer: &'a mut Printer, + grep: &'a Grep, + path: &'a Path, + haystack: R, + count: bool, + invert_match: bool, + line_number: bool, } impl<'a, R: io::Read, W: io::Write> Searcher<'a, R, W> { + /// Create a new searcher. + /// + /// `inp` is a reusable input buffer that is used as scratch space by this + /// searcher. + /// + /// `printer` is used to output all results of searching. + /// + /// `grep` is the actual matcher. + /// + /// `path` is the file path being searched. + /// + /// `haystack` is a reader of text to search. + pub fn new( + inp: &'a mut InputBuffer, + printer: &'a mut Printer, + grep: &'a Grep, + path: &'a Path, + haystack: R, + ) -> Searcher<'a, R, W> { + Searcher { + inp: inp, + printer: printer, + grep: grep, + path: path, + haystack: haystack, + count: false, + invert_match: false, + line_number: false, + } + } + + /// If enabled, searching will print a count instead of each match. + /// + /// Disabled by default. + pub fn count(mut self, yes: bool) -> Self { + self.count = yes; + self + } + + /// If enabled, matching is inverted so that lines that *don't* match the + /// given pattern are treated as matches. + pub fn invert_match(mut self, yes: bool) -> Self { + self.invert_match = yes; + self + } + + /// If enabled, compute line numbers and prefix each line of output with + /// them. + pub fn line_number(mut self, yes: bool) -> Self { + self.line_number = yes; + self + } + + /// Execute the search. Results are written to the printer and the total + /// number of matches is returned. #[inline(never)] - pub fn run(mut self) -> Result<(), Error> { + pub fn run(mut self) -> Result { self.inp.reset(); + let mut match_count = 0; + let mut line_count = if self.line_number { Some(0) } else { None }; let mut mat = Match::default(); loop { let ok = try!(self.inp.fill(&mut self.haystack).map_err(|err| { Error::from_io(err, &self.path) })); if !ok { - return Ok(()); + break; } - loop { + while self.inp.pos < self.inp.lastnl { let ok = self.grep.read_match( &mut mat, &mut self.inp.buf[..self.inp.lastnl], self.inp.pos); if !ok { + if self.invert_match { + while let Some(pos) = memchr(b'\n', &self.inp.buf[self.inp.pos..self.inp.lastnl]) { + if let Some(ref mut line_count) = line_count { + *line_count += 1; + } + self.printer.matched( + &self.path, + &self.inp.buf, + self.inp.pos, + self.inp.pos + pos, + line_count, + ); + self.inp.pos += pos + 1; + match_count += 1; + if self.inp.pos >= self.inp.lastnl { + break; + } + } + self.inp.pos = self.inp.lastnl; + } else if let Some(ref mut line_count) = line_count { + *line_count += count_lines( + &self.inp.buf[self.inp.pos..self.inp.lastnl]); + } break; } - self.inp.pos = mat.end() + 1; - self.printer.matched(self.path, &self.inp.buf, &mat); + if self.invert_match { + while let Some(pos) = memchr(b'\n', &self.inp.buf[self.inp.pos..mat.start()]) { + if let Some(ref mut line_count) = line_count { + *line_count += 1; + } + self.printer.matched( + &self.path, + &self.inp.buf, + self.inp.pos, + self.inp.pos + pos, + line_count, + ); + self.inp.pos += pos + 1; + match_count += 1; + } + if let Some(ref mut line_count) = line_count { + *line_count += 1; + } + self.inp.pos = mat.end() + 1; + } else { + if let Some(ref mut line_count) = line_count { + // mat.end() always points immediately after the end + // of a match, which could be *at* a nl or past our + // current search buffer. Either way, count it as one + // more line. + *line_count += 1 + count_lines( + &self.inp.buf[self.inp.pos..mat.end()]); + } + match_count += 1; + if !self.count { + self.printer.matched( + self.path, + &self.inp.buf, + mat.start(), + mat.end(), + line_count, + ); + } + // Move the position one past the end of the match so that + // the next search starts after the nl. If we're at EOF, + // then pos will be past EOF. + self.inp.pos = mat.end() + 1; + } } } + if self.count && match_count > 0 { + self.printer.path_count(self.path, match_count); + } + Ok(match_count) } } @@ -102,9 +231,18 @@ pub struct InputBuffer { } impl InputBuffer { + /// Create a new buffer with a default capacity. pub fn new() -> InputBuffer { + InputBuffer::with_capacity(READ_SIZE) + } + + /// Create a new buffer with the capacity given. + /// + /// The capacity determines the size of each read from the underlying + /// reader. + pub fn with_capacity(cap: usize) -> InputBuffer { InputBuffer { - buf: vec![0; READ_SIZE], + buf: vec![0; cap], tmp: vec![], pos: 0, lastnl: 0, @@ -160,7 +298,7 @@ impl InputBuffer { // 2) Subsequent iterations only occur if no nl could be found. self.lastnl = memrchr(b'\n', &self.buf[self.end..self.end + n]) - .map(|i| self.end + i) + .map(|i| self.end + i + 1) .unwrap_or(0); self.end += n; } @@ -174,3 +312,12 @@ fn is_binary(buf: &[u8]) -> bool { } memchr(b'\x00', &buf[0..cmp::min(1024, buf.len())]).is_some() } + +fn count_lines(mut buf: &[u8]) -> u64 { + let mut count = 0; + while let Some(pos) = memchr(b'\n', buf) { + count += 1; + buf = &buf[pos + 1..]; + } + count +}