Refactor buffered test.

This commit is contained in:
Andrew Gallant 2016-08-08 19:17:25 -04:00
parent 076eeff3ea
commit e97d75c024

View File

@ -105,7 +105,7 @@ impl GrepBuilder {
/// Sets whether line numbers are reported for each match. /// Sets whether line numbers are reported for each match.
/// ///
/// When enabled (disabled by default), every matching line is tagged with /// When enabled (disabled by default), every matching line is tagged with
/// its corresponding line number accoring to the line terminator that is /// its corresponding line number according to the line terminator that is
/// set. Note that this requires extra processing which can slow down /// set. Note that this requires extra processing which can slow down
/// search. /// search.
pub fn line_numbers(mut self, yes: bool) -> GrepBuilder { pub fn line_numbers(mut self, yes: bool) -> GrepBuilder {
@ -326,6 +326,8 @@ impl<'g, R: io::Read> GrepBuffered<'g, R> {
mat: &mut Match, mat: &mut Match,
) -> Result<bool> { ) -> Result<bool> {
loop { loop {
// If the starting position is equal to the end of the last search,
// then it's time to refill the buffer for more searching.
if self.start == self.lastnl { if self.start == self.lastnl {
if !try!(self.fill()) { if !try!(self.fill()) {
return Ok(false); return Ok(false);
@ -334,6 +336,8 @@ impl<'g, R: io::Read> GrepBuffered<'g, R> {
let ok = self.grep.read_match( let ok = self.grep.read_match(
mat, &self.b.buf[..self.lastnl], self.start); mat, &self.b.buf[..self.lastnl], self.start);
if !ok { if !ok {
// This causes the next iteration to refill the buffer with
// more bytes to search.
self.start = self.lastnl; self.start = self.lastnl;
continue; continue;
} }
@ -364,7 +368,7 @@ impl<'g, R: io::Read> GrepBuffered<'g, R> {
self.b.buf[0..self.b.tmp.len()].copy_from_slice(&self.b.tmp); self.b.buf[0..self.b.tmp.len()].copy_from_slice(&self.b.tmp);
// Fill the rest with fresh bytes. // Fill the rest with fresh bytes.
let nread = try!(self.rdr.read(&mut self.b.buf[self.b.tmp.len()..])); let nread = try!(self.rdr.read(&mut self.b.buf[self.b.tmp.len()..]));
// Now update our various positions. // Now update our position in all of the bytes searched.
self.pos += self.start; self.pos += self.start;
self.start = 0; self.start = 0;
// The end is the total number of bytes read plus whatever we had for // The end is the total number of bytes read plus whatever we had for
@ -374,7 +378,7 @@ impl<'g, R: io::Read> GrepBuffered<'g, R> {
// at this position since any proceding bytes may correspond to a // at this position since any proceding bytes may correspond to a
// partial line. // partial line.
// //
// This is a little complicated because must handle the case where // This is a little complicated because we must handle the case where
// the buffer is not full and no new line character could be found. // the buffer is not full and no new line character could be found.
// We detect this case because this could potentially be a partial // We detect this case because this could potentially be a partial
// line. If we fill our buffer and still can't find a `\n`, then we // line. If we fill our buffer and still can't find a `\n`, then we
@ -397,7 +401,7 @@ impl<'g, R: io::Read> GrepBuffered<'g, R> {
// Otherwise we try to ask for more bytes and look again. // Otherwise we try to ask for more bytes and look again.
let nread = try!( let nread = try!(
self.rdr.read(&mut self.b.buf[self.end..])); self.rdr.read(&mut self.b.buf[self.end..]));
// If we got nothing than we're at EOF and we no longer // If we got nothing then we're at EOF and we no longer
// need to care about leftovers. // need to care about leftovers.
if nread == 0 { if nread == 0 {
self.lastnl = self.end; self.lastnl = self.end;
@ -466,24 +470,30 @@ mod tests {
static SHERLOCK: &'static [u8] = include_bytes!("./data/sherlock.txt"); static SHERLOCK: &'static [u8] = include_bytes!("./data/sherlock.txt");
#[test] fn find_lines(pat: &str, haystack: &[u8]) -> Vec<(usize, usize)> {
fn buffered() { let re = Regex::new(pat).unwrap();
// Find the expected number of matches and the position of the last let mut lines = vec![];
// match. for (s, e) in re.find_iter(haystack) {
let re = Regex::new("Sherlock Holmes").unwrap(); let start = memrchr(b'\n', &haystack[..s])
let ms: Vec<_> = re.find_iter(SHERLOCK).collect(); .map_or(0, |i| i + 1);
let expected_count = ms.len(); let end = memchr(b'\n', &haystack[e..])
let (start, end) = *ms.last().unwrap(); .map_or(haystack.len(), |i| e + i);
let start = memrchr(b'\n', &SHERLOCK[..start]).unwrap() + 1; lines.push((start, end));
let end = memchr(b'\n', &SHERLOCK[end..]).unwrap() + end; }
lines
}
// Now compare it with what Grep finds. fn grep_lines(pat: &str, haystack: &[u8]) -> Vec<(usize, usize)> {
let g = GrepBuilder::new("Sherlock Holmes").create().unwrap(); let g = GrepBuilder::new(pat).create().unwrap();
let mut bg = g.buffered_reader(Buffer::new(), SHERLOCK); let mut bg = g.buffered_reader(Buffer::new(), haystack);
let ms: Vec<_> = bg.iter().map(|r| r.unwrap()).collect(); bg.iter().map(|r| r.unwrap()).map(|m| (m.start(), m.end())).collect()
let m = ms.last().unwrap(); }
assert_eq!(expected_count, ms.len());
assert_eq!(start, m.start()); #[test]
assert_eq!(end, m.end()); fn buffered_literal() {
let expected = find_lines("Sherlock Holmes", SHERLOCK);
let got = grep_lines("Sherlock Holmes", SHERLOCK);
assert_eq!(expected.len(), got.len());
assert_eq!(expected, got);
} }
} }