search: migrate to bstr

This is an initial attempt at migrating grep-searcher to use the new
bstr crate (not yet published).

This is mostly an improvement, although a significant problem is that
the grep-matcher crate controls the `Index` impls for the `Match` type,
which we use quite heavily. Thus, in order to impl `Index` for `BStr`,
we need add bstr as a public dependency to grep-matcher. This is really
bad news because grep-matcher is supposed to be a light-weight core
crate that defines a matcher interface, which is itself intended to be a
public dependency. Thus, a semver bump on bstr will have very
undesirable ripple effects thoughout ripgrep's library crates.

This would be something we could stomach if bstr was solid at 1.0 and
committed to avoiding breaking changes. But it's not there yet.
This commit is contained in:
Andrew Gallant 2019-01-20 12:32:09 -05:00
parent 7cbc535d70
commit 4b88e08f41
No known key found for this signature in database
GPG Key ID: B2E3A4923F8B0D44
12 changed files with 169 additions and 158 deletions

11
Cargo.lock generated
View File

@ -34,6 +34,13 @@ name = "bitflags"
version = "1.0.4" version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bstr"
version = "0.0.1"
dependencies = [
"memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "bytecount" name = "bytecount"
version = "0.5.0" version = "0.5.0"
@ -180,7 +187,7 @@ dependencies = [
name = "grep-matcher" name = "grep-matcher"
version = "0.1.1" version = "0.1.1"
dependencies = [ dependencies = [
"memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "bstr 0.0.1",
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
@ -222,13 +229,13 @@ dependencies = [
name = "grep-searcher" name = "grep-searcher"
version = "0.1.1" version = "0.1.1"
dependencies = [ dependencies = [
"bstr 0.0.1",
"bytecount 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "bytecount 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)", "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)", "encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"grep-matcher 0.1.1", "grep-matcher 0.1.1",
"grep-regex 0.1.1", "grep-regex 0.1.1",
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
"memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", "memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
] ]

View File

@ -13,8 +13,11 @@ keywords = ["regex", "pattern", "trait"]
license = "Unlicense/MIT" license = "Unlicense/MIT"
autotests = false autotests = false
[dependencies] [dependencies.bstr]
memchr = "2.1" version = "*"
path = "/home/andrew/rust/bstr"
default-features = false
features = ["std"]
[dev-dependencies] [dev-dependencies]
regex = "1.1" regex = "1.1"

View File

@ -1,6 +1,6 @@
use std::str; use std::str;
use memchr::memchr; use bstr::B;
/// Interpolate capture references in `replacement` and write the interpolation /// Interpolate capture references in `replacement` and write the interpolation
/// result to `dst`. References in `replacement` take the form of $N or $name, /// result to `dst`. References in `replacement` take the form of $N or $name,
@ -22,7 +22,7 @@ pub fn interpolate<A, N>(
N: FnMut(&str) -> Option<usize> N: FnMut(&str) -> Option<usize>
{ {
while !replacement.is_empty() { while !replacement.is_empty() {
match memchr(b'$', replacement) { match B(replacement).find_byte(b'$') {
None => break, None => break,
Some(i) => { Some(i) => {
dst.extend(&replacement[..i]); dst.extend(&replacement[..i]);

View File

@ -38,13 +38,15 @@ implementations.
#![deny(missing_docs)] #![deny(missing_docs)]
extern crate memchr; extern crate bstr;
use std::fmt; use std::fmt;
use std::io; use std::io;
use std::ops; use std::ops;
use std::u64; use std::u64;
use bstr::BStr;
use interpolate::interpolate; use interpolate::interpolate;
mod interpolate; mod interpolate;
@ -180,6 +182,22 @@ impl ops::IndexMut<Match> for [u8] {
} }
} }
impl ops::Index<Match> for BStr {
type Output = BStr;
#[inline]
fn index(&self, index: Match) -> &BStr {
&self[index.start..index.end]
}
}
impl ops::IndexMut<Match> for BStr {
#[inline]
fn index_mut(&mut self, index: Match) -> &mut BStr {
&mut self[index.start..index.end]
}
}
impl ops::Index<Match> for str { impl ops::Index<Match> for str {
type Output = str; type Output = str;

View File

@ -18,9 +18,14 @@ encoding_rs = "0.8.14"
encoding_rs_io = "0.1.3" encoding_rs_io = "0.1.3"
grep-matcher = { version = "0.1.1", path = "../grep-matcher" } grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
log = "0.4.5" log = "0.4.5"
memchr = "2.1"
memmap = "0.7" memmap = "0.7"
[dependencies.bstr]
version = "*"
path = "/home/andrew/rust/bstr"
default-features = false
features = ["std"]
[dev-dependencies] [dev-dependencies]
grep-regex = { version = "0.1.1", path = "../grep-regex" } grep-regex = { version = "0.1.1", path = "../grep-regex" }
regex = "1.1" regex = "1.1"

View File

@ -99,13 +99,13 @@ searches stdin.
#![deny(missing_docs)] #![deny(missing_docs)]
extern crate bstr;
extern crate bytecount; extern crate bytecount;
extern crate encoding_rs; extern crate encoding_rs;
extern crate encoding_rs_io; extern crate encoding_rs_io;
extern crate grep_matcher; extern crate grep_matcher;
#[macro_use] #[macro_use]
extern crate log; extern crate log;
extern crate memchr;
extern crate memmap; extern crate memmap;
#[cfg(test)] #[cfg(test)]
extern crate regex; extern crate regex;

View File

@ -1,8 +1,7 @@
use std::cmp; use std::cmp;
use std::io; use std::io;
use std::ptr;
use memchr::{memchr, memrchr}; use bstr::{BStr, BString};
/// The default buffer capacity that we use for the line buffer. /// The default buffer capacity that we use for the line buffer.
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
@ -123,7 +122,7 @@ impl LineBufferBuilder {
pub fn build(&self) -> LineBuffer { pub fn build(&self) -> LineBuffer {
LineBuffer { LineBuffer {
config: self.config, config: self.config,
buf: vec![0; self.config.capacity], buf: BString::from(vec![0; self.config.capacity]),
pos: 0, pos: 0,
last_lineterm: 0, last_lineterm: 0,
end: 0, end: 0,
@ -254,7 +253,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
} }
/// Return the contents of this buffer. /// Return the contents of this buffer.
pub fn buffer(&self) -> &[u8] { pub fn buffer(&self) -> &BStr {
self.line_buffer.buffer() self.line_buffer.buffer()
} }
@ -284,7 +283,7 @@ pub struct LineBuffer {
/// The configuration of this buffer. /// The configuration of this buffer.
config: Config, config: Config,
/// The primary buffer with which to hold data. /// The primary buffer with which to hold data.
buf: Vec<u8>, buf: BString,
/// The current position of this buffer. This is always a valid sliceable /// The current position of this buffer. This is always a valid sliceable
/// index into `buf`, and its maximum value is the length of `buf`. /// index into `buf`, and its maximum value is the length of `buf`.
pos: usize, pos: usize,
@ -339,13 +338,13 @@ impl LineBuffer {
} }
/// Return the contents of this buffer. /// Return the contents of this buffer.
fn buffer(&self) -> &[u8] { fn buffer(&self) -> &BStr {
&self.buf[self.pos..self.last_lineterm] &self.buf[self.pos..self.last_lineterm]
} }
/// Return the contents of the free space beyond the end of the buffer as /// Return the contents of the free space beyond the end of the buffer as
/// a mutable slice. /// a mutable slice.
fn free_buffer(&mut self) -> &mut [u8] { fn free_buffer(&mut self) -> &mut BStr {
&mut self.buf[self.end..] &mut self.buf[self.end..]
} }
@ -396,7 +395,7 @@ impl LineBuffer {
assert_eq!(self.pos, 0); assert_eq!(self.pos, 0);
loop { loop {
self.ensure_capacity()?; self.ensure_capacity()?;
let readlen = rdr.read(self.free_buffer())?; let readlen = rdr.read(self.free_buffer().as_bytes_mut())?;
if readlen == 0 { if readlen == 0 {
// We're only done reading for good once the caller has // We're only done reading for good once the caller has
// consumed everything. // consumed everything.
@ -416,7 +415,7 @@ impl LineBuffer {
match self.config.binary { match self.config.binary {
BinaryDetection::None => {} // nothing to do BinaryDetection::None => {} // nothing to do
BinaryDetection::Quit(byte) => { BinaryDetection::Quit(byte) => {
if let Some(i) = memchr(byte, newbytes) { if let Some(i) = newbytes.find_byte(byte) {
self.end = oldend + i; self.end = oldend + i;
self.last_lineterm = self.end; self.last_lineterm = self.end;
self.binary_byte_offset = self.binary_byte_offset =
@ -444,7 +443,7 @@ impl LineBuffer {
} }
// Update our `last_lineterm` positions if we read one. // Update our `last_lineterm` positions if we read one.
if let Some(i) = memrchr(self.config.lineterm, newbytes) { if let Some(i) = newbytes.rfind_byte(self.config.lineterm) {
self.last_lineterm = oldend + i + 1; self.last_lineterm = oldend + i + 1;
return Ok(true); return Ok(true);
} }
@ -467,40 +466,8 @@ impl LineBuffer {
return; return;
} }
assert!(self.pos < self.end && self.end <= self.buf.len());
let roll_len = self.end - self.pos; let roll_len = self.end - self.pos;
unsafe { self.buf.copy_within(self.pos.., 0);
// SAFETY: A buffer contains Copy data, so there's no problem
// moving it around. Safety also depends on our indices being
// in bounds, which they should always be, and we enforce with
// an assert above.
//
// It seems like it should be possible to do this in safe code that
// results in the same codegen. I tried the obvious:
//
// for (src, dst) in (self.pos..self.end).zip(0..) {
// self.buf[dst] = self.buf[src];
// }
//
// But the above does not work, and in fact compiles down to a slow
// byte-by-byte loop. I tried a few other minor variations, but
// alas, better minds might prevail.
//
// Overall, this doesn't save us *too* much. It mostly matters when
// the number of bytes we're copying is large, which can happen
// if the searcher is asked to produce a lot of context. We could
// decide this isn't worth it, but it does make an appreciable
// impact at or around the context=30 range on my machine.
//
// We could also use a temporary buffer that compiles down to two
// memcpys and is faster than the byte-at-a-time loop, but it
// complicates our options for limiting memory allocation a bit.
ptr::copy(
self.buf[self.pos..].as_ptr(),
self.buf.as_mut_ptr(),
roll_len,
);
}
self.pos = 0; self.pos = 0;
self.last_lineterm = roll_len; self.last_lineterm = roll_len;
self.end = roll_len; self.end = roll_len;
@ -536,14 +503,15 @@ impl LineBuffer {
} }
} }
/// Replaces `src` with `replacement` in bytes. /// Replaces `src` with `replacement` in bytes, and return the offset of the
fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> { /// first replacement, if one exists.
fn replace_bytes(bytes: &mut BStr, src: u8, replacement: u8) -> Option<usize> {
if src == replacement { if src == replacement {
return None; return None;
} }
let mut first_pos = None; let mut first_pos = None;
let mut pos = 0; let mut pos = 0;
while let Some(i) = memchr(src, &bytes[pos..]).map(|i| pos + i) { while let Some(i) = bytes[pos..].find_byte(src).map(|i| pos + i) {
if first_pos.is_none() { if first_pos.is_none() {
first_pos = Some(i); first_pos = Some(i);
} }
@ -560,6 +528,7 @@ fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::str; use std::str;
use bstr::BString;
use super::*; use super::*;
const SHERLOCK: &'static str = "\ const SHERLOCK: &'static str = "\
@ -575,18 +544,14 @@ and exhibited clearly, with a label attached.\
slice.to_string() slice.to_string()
} }
fn btos(slice: &[u8]) -> &str {
str::from_utf8(slice).unwrap()
}
fn replace_str( fn replace_str(
slice: &str, slice: &str,
src: u8, src: u8,
replacement: u8, replacement: u8,
) -> (String, Option<usize>) { ) -> (String, Option<usize>) {
let mut dst = slice.to_string().into_bytes(); let mut dst = BString::from(slice);
let result = replace_bytes(&mut dst, src, replacement); let result = replace_bytes(&mut dst, src, replacement);
(String::from_utf8(dst).unwrap(), result) (dst.into_string().unwrap(), result)
} }
#[test] #[test]
@ -607,7 +572,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty()); assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "homer\nlisa\n"); assert_eq!(rdr.buffer(), "homer\nlisa\n");
assert_eq!(rdr.absolute_byte_offset(), 0); assert_eq!(rdr.absolute_byte_offset(), 0);
rdr.consume(5); rdr.consume(5);
assert_eq!(rdr.absolute_byte_offset(), 5); assert_eq!(rdr.absolute_byte_offset(), 5);
@ -615,7 +580,7 @@ and exhibited clearly, with a label attached.\
assert_eq!(rdr.absolute_byte_offset(), 11); assert_eq!(rdr.absolute_byte_offset(), 11);
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "maggie"); assert_eq!(rdr.buffer(), "maggie");
rdr.consume_all(); rdr.consume_all();
assert!(!rdr.fill().unwrap()); assert!(!rdr.fill().unwrap());
@ -630,7 +595,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n"); assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n");
rdr.consume_all(); rdr.consume_all();
assert!(!rdr.fill().unwrap()); assert!(!rdr.fill().unwrap());
@ -645,7 +610,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "\n"); assert_eq!(rdr.buffer(), "\n");
rdr.consume_all(); rdr.consume_all();
assert!(!rdr.fill().unwrap()); assert!(!rdr.fill().unwrap());
@ -660,7 +625,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "\n\n"); assert_eq!(rdr.buffer(), "\n\n");
rdr.consume_all(); rdr.consume_all();
assert!(!rdr.fill().unwrap()); assert!(!rdr.fill().unwrap());
@ -698,12 +663,12 @@ and exhibited clearly, with a label attached.\
let mut linebuf = LineBufferBuilder::new().capacity(1).build(); let mut linebuf = LineBufferBuilder::new().capacity(1).build();
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
let mut got = vec![]; let mut got = BString::new();
while rdr.fill().unwrap() { while rdr.fill().unwrap() {
got.extend(rdr.buffer()); got.push(rdr.buffer());
rdr.consume_all(); rdr.consume_all();
} }
assert_eq!(bytes, btos(&got)); assert_eq!(bytes, got);
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64); assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
assert_eq!(rdr.binary_byte_offset(), None); assert_eq!(rdr.binary_byte_offset(), None);
} }
@ -718,11 +683,11 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "homer\n"); assert_eq!(rdr.buffer(), "homer\n");
rdr.consume_all(); rdr.consume_all();
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "lisa\n"); assert_eq!(rdr.buffer(), "lisa\n");
rdr.consume_all(); rdr.consume_all();
// This returns an error because while we have just enough room to // This returns an error because while we have just enough room to
@ -732,11 +697,11 @@ and exhibited clearly, with a label attached.\
assert!(rdr.fill().is_err()); assert!(rdr.fill().is_err());
// We can mush on though! // We can mush on though!
assert_eq!(btos(rdr.buffer()), "m"); assert_eq!(rdr.buffer(), "m");
rdr.consume_all(); rdr.consume_all();
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "aggie"); assert_eq!(rdr.buffer(), "aggie");
rdr.consume_all(); rdr.consume_all();
assert!(!rdr.fill().unwrap()); assert!(!rdr.fill().unwrap());
@ -752,16 +717,16 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "homer\n"); assert_eq!(rdr.buffer(), "homer\n");
rdr.consume_all(); rdr.consume_all();
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "lisa\n"); assert_eq!(rdr.buffer(), "lisa\n");
rdr.consume_all(); rdr.consume_all();
// We have just enough space. // We have just enough space.
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "maggie"); assert_eq!(rdr.buffer(), "maggie");
rdr.consume_all(); rdr.consume_all();
assert!(!rdr.fill().unwrap()); assert!(!rdr.fill().unwrap());
@ -777,7 +742,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(rdr.fill().is_err()); assert!(rdr.fill().is_err());
assert_eq!(btos(rdr.buffer()), ""); assert_eq!(rdr.buffer(), "");
} }
#[test] #[test]
@ -789,7 +754,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty()); assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "homer\nli\x00sa\nmaggie\n"); assert_eq!(rdr.buffer(), "homer\nli\x00sa\nmaggie\n");
rdr.consume_all(); rdr.consume_all();
assert!(!rdr.fill().unwrap()); assert!(!rdr.fill().unwrap());
@ -808,7 +773,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty()); assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "homer\nli"); assert_eq!(rdr.buffer(), "homer\nli");
rdr.consume_all(); rdr.consume_all();
assert!(!rdr.fill().unwrap()); assert!(!rdr.fill().unwrap());
@ -825,7 +790,7 @@ and exhibited clearly, with a label attached.\
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf); let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
assert!(!rdr.fill().unwrap()); assert!(!rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), ""); assert_eq!(rdr.buffer(), "");
assert_eq!(rdr.absolute_byte_offset(), 0); assert_eq!(rdr.absolute_byte_offset(), 0);
assert_eq!(rdr.binary_byte_offset(), Some(0)); assert_eq!(rdr.binary_byte_offset(), Some(0));
} }
@ -841,7 +806,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty()); assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n"); assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n");
rdr.consume_all(); rdr.consume_all();
assert!(!rdr.fill().unwrap()); assert!(!rdr.fill().unwrap());
@ -860,7 +825,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty()); assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie"); assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie");
rdr.consume_all(); rdr.consume_all();
assert!(!rdr.fill().unwrap()); assert!(!rdr.fill().unwrap());
@ -878,7 +843,7 @@ and exhibited clearly, with a label attached.\
assert!(rdr.buffer().is_empty()); assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "\ assert_eq!(rdr.buffer(), "\
For the Doctor Watsons of this world, as opposed to the Sherlock For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, s\ Holmeses, s\
"); ");
@ -901,7 +866,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty()); assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "homer\nli\nsa\nmaggie\n"); assert_eq!(rdr.buffer(), "homer\nli\nsa\nmaggie\n");
rdr.consume_all(); rdr.consume_all();
assert!(!rdr.fill().unwrap()); assert!(!rdr.fill().unwrap());
@ -920,7 +885,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty()); assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "\nhomer\nlisa\nmaggie\n"); assert_eq!(rdr.buffer(), "\nhomer\nlisa\nmaggie\n");
rdr.consume_all(); rdr.consume_all();
assert!(!rdr.fill().unwrap()); assert!(!rdr.fill().unwrap());
@ -939,7 +904,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty()); assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n"); assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n\n");
rdr.consume_all(); rdr.consume_all();
assert!(!rdr.fill().unwrap()); assert!(!rdr.fill().unwrap());
@ -958,7 +923,7 @@ Holmeses, s\
assert!(rdr.buffer().is_empty()); assert!(rdr.buffer().is_empty());
assert!(rdr.fill().unwrap()); assert!(rdr.fill().unwrap());
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n"); assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n\n");
rdr.consume_all(); rdr.consume_all();
assert!(!rdr.fill().unwrap()); assert!(!rdr.fill().unwrap());

View File

@ -2,8 +2,8 @@
A collection of routines for performing operations on lines. A collection of routines for performing operations on lines.
*/ */
use bstr::{B, BStr};
use bytecount; use bytecount;
use memchr::{memchr, memrchr};
use grep_matcher::{LineTerminator, Match}; use grep_matcher::{LineTerminator, Match};
/// An iterator over lines in a particular slice of bytes. /// An iterator over lines in a particular slice of bytes.
@ -14,7 +14,7 @@ use grep_matcher::{LineTerminator, Match};
/// `'b` refers to the lifetime of the underlying bytes. /// `'b` refers to the lifetime of the underlying bytes.
#[derive(Debug)] #[derive(Debug)]
pub struct LineIter<'b> { pub struct LineIter<'b> {
bytes: &'b [u8], bytes: &'b BStr,
stepper: LineStep, stepper: LineStep,
} }
@ -23,7 +23,7 @@ impl<'b> LineIter<'b> {
/// are terminated by `line_term`. /// are terminated by `line_term`.
pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> { pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> {
LineIter { LineIter {
bytes: bytes, bytes: B(bytes),
stepper: LineStep::new(line_term, 0, bytes.len()), stepper: LineStep::new(line_term, 0, bytes.len()),
} }
} }
@ -33,7 +33,7 @@ impl<'b> Iterator for LineIter<'b> {
type Item = &'b [u8]; type Item = &'b [u8];
fn next(&mut self) -> Option<&'b [u8]> { fn next(&mut self) -> Option<&'b [u8]> {
self.stepper.next_match(self.bytes).map(|m| &self.bytes[m]) self.stepper.next_match(self.bytes).map(|m| self.bytes[m].as_bytes())
} }
} }
@ -73,19 +73,19 @@ impl LineStep {
/// The range returned includes the line terminator. Ranges are always /// The range returned includes the line terminator. Ranges are always
/// non-empty. /// non-empty.
pub fn next(&mut self, bytes: &[u8]) -> Option<(usize, usize)> { pub fn next(&mut self, bytes: &[u8]) -> Option<(usize, usize)> {
self.next_impl(bytes) self.next_impl(B(bytes))
} }
/// Like next, but returns a `Match` instead of a tuple. /// Like next, but returns a `Match` instead of a tuple.
#[inline(always)] #[inline(always)]
pub(crate) fn next_match(&mut self, bytes: &[u8]) -> Option<Match> { pub(crate) fn next_match(&mut self, bytes: &BStr) -> Option<Match> {
self.next_impl(bytes).map(|(s, e)| Match::new(s, e)) self.next_impl(bytes).map(|(s, e)| Match::new(s, e))
} }
#[inline(always)] #[inline(always)]
fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> { fn next_impl(&mut self, mut bytes: &BStr) -> Option<(usize, usize)> {
bytes = &bytes[..self.end]; bytes = &bytes[..self.end];
match memchr(self.line_term, &bytes[self.pos..]) { match bytes[self.pos..].find_byte(self.line_term) {
None => { None => {
if self.pos < bytes.len() { if self.pos < bytes.len() {
let m = (self.pos, bytes.len()); let m = (self.pos, bytes.len());
@ -109,15 +109,15 @@ impl LineStep {
} }
/// Count the number of occurrences of `line_term` in `bytes`. /// Count the number of occurrences of `line_term` in `bytes`.
pub fn count(bytes: &[u8], line_term: u8) -> u64 { pub fn count(bytes: &BStr, line_term: u8) -> u64 {
bytecount::count(bytes, line_term) as u64 bytecount::count(bytes.as_bytes(), line_term) as u64
} }
/// Given a line that possibly ends with a terminator, return that line without /// Given a line that possibly ends with a terminator, return that line without
/// the terminator. /// the terminator.
#[inline(always)] #[inline(always)]
pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] { pub fn without_terminator(bytes: &BStr, line_term: LineTerminator) -> &BStr {
let line_term = line_term.as_bytes(); let line_term = BStr::new(line_term.as_bytes());
let start = bytes.len().saturating_sub(line_term.len()); let start = bytes.len().saturating_sub(line_term.len());
if bytes.get(start..) == Some(line_term) { if bytes.get(start..) == Some(line_term) {
return &bytes[..bytes.len() - line_term.len()]; return &bytes[..bytes.len() - line_term.len()];
@ -131,18 +131,20 @@ pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
/// Line terminators are considered part of the line they terminate. /// Line terminators are considered part of the line they terminate.
#[inline(always)] #[inline(always)]
pub fn locate( pub fn locate(
bytes: &[u8], bytes: &BStr,
line_term: u8, line_term: u8,
range: Match, range: Match,
) -> Match { ) -> Match {
let line_start = memrchr(line_term, &bytes[0..range.start()]) let line_start = bytes[..range.start()]
.rfind_byte(line_term)
.map_or(0, |i| i + 1); .map_or(0, |i| i + 1);
let line_end = let line_end =
if range.end() > line_start && bytes[range.end() - 1] == line_term { if range.end() > line_start && bytes[range.end() - 1] == line_term {
range.end() range.end()
} else { } else {
memchr(line_term, &bytes[range.end()..]) bytes[range.end()..]
.map_or(bytes.len(), |i| range.end() + i + 1) .find_byte(line_term)
.map_or(bytes.len(), |i| range.end() + i + 1)
}; };
Match::new(line_start, line_end) Match::new(line_start, line_end)
} }
@ -155,7 +157,7 @@ pub fn locate(
/// ///
/// If `bytes` ends with a line terminator, then the terminator itself is /// If `bytes` ends with a line terminator, then the terminator itself is
/// considered part of the last line. /// considered part of the last line.
pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize { pub fn preceding(bytes: &BStr, line_term: u8, count: usize) -> usize {
preceding_by_pos(bytes, bytes.len(), line_term, count) preceding_by_pos(bytes, bytes.len(), line_term, count)
} }
@ -169,7 +171,7 @@ pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
/// and `pos = 7`, `preceding(bytes, pos, b'\n', 0)` returns `4` (as does `pos /// and `pos = 7`, `preceding(bytes, pos, b'\n', 0)` returns `4` (as does `pos
/// = 8`) and `preceding(bytes, pos, `b'\n', 1)` returns `0`. /// = 8`) and `preceding(bytes, pos, `b'\n', 1)` returns `0`.
fn preceding_by_pos( fn preceding_by_pos(
bytes: &[u8], bytes: &BStr,
mut pos: usize, mut pos: usize,
line_term: u8, line_term: u8,
mut count: usize, mut count: usize,
@ -180,7 +182,7 @@ fn preceding_by_pos(
pos -= 1; pos -= 1;
} }
loop { loop {
match memrchr(line_term, &bytes[..pos]) { match bytes[..pos].rfind_byte(line_term) {
None => { None => {
return 0; return 0;
} }
@ -201,7 +203,10 @@ fn preceding_by_pos(
mod tests { mod tests {
use std::ops::Range; use std::ops::Range;
use std::str; use std::str;
use bstr::B;
use grep_matcher::Match; use grep_matcher::Match;
use super::*; use super::*;
const SHERLOCK: &'static str = "\ const SHERLOCK: &'static str = "\
@ -220,7 +225,7 @@ and exhibited clearly, with a label attached.\
fn lines(text: &str) -> Vec<&str> { fn lines(text: &str) -> Vec<&str> {
let mut results = vec![]; let mut results = vec![];
let mut it = LineStep::new(b'\n', 0, text.len()); let mut it = LineStep::new(b'\n', 0, text.len());
while let Some(m) = it.next_match(text.as_bytes()) { while let Some(m) = it.next_match(B(text)) {
results.push(&text[m]); results.push(&text[m]);
} }
results results
@ -229,26 +234,26 @@ and exhibited clearly, with a label attached.\
fn line_ranges(text: &str) -> Vec<Range<usize>> { fn line_ranges(text: &str) -> Vec<Range<usize>> {
let mut results = vec![]; let mut results = vec![];
let mut it = LineStep::new(b'\n', 0, text.len()); let mut it = LineStep::new(b'\n', 0, text.len());
while let Some(m) = it.next_match(text.as_bytes()) { while let Some(m) = it.next_match(B(text)) {
results.push(m.start()..m.end()); results.push(m.start()..m.end());
} }
results results
} }
fn prev(text: &str, pos: usize, count: usize) -> usize { fn prev(text: &str, pos: usize, count: usize) -> usize {
preceding_by_pos(text.as_bytes(), pos, b'\n', count) preceding_by_pos(B(text), pos, b'\n', count)
} }
fn loc(text: &str, start: usize, end: usize) -> Match { fn loc(text: &str, start: usize, end: usize) -> Match {
locate(text.as_bytes(), b'\n', Match::new(start, end)) locate(B(text), b'\n', Match::new(start, end))
} }
#[test] #[test]
fn line_count() { fn line_count() {
assert_eq!(0, count(b"", b'\n')); assert_eq!(0, count(B(""), b'\n'));
assert_eq!(1, count(b"\n", b'\n')); assert_eq!(1, count(B("\n"), b'\n'));
assert_eq!(2, count(b"\n\n", b'\n')); assert_eq!(2, count(B("\n\n"), b'\n'));
assert_eq!(2, count(b"a\nb\nc", b'\n')); assert_eq!(2, count(B("a\nb\nc"), b'\n'));
} }
#[test] #[test]
@ -331,7 +336,7 @@ and exhibited clearly, with a label attached.\
#[test] #[test]
fn preceding_lines_doc() { fn preceding_lines_doc() {
// These are the examples mentions in the documentation of `preceding`. // These are the examples mentions in the documentation of `preceding`.
let bytes = b"abc\nxyz\n"; let bytes = B("abc\nxyz\n");
assert_eq!(4, preceding_by_pos(bytes, 7, b'\n', 0)); assert_eq!(4, preceding_by_pos(bytes, 7, b'\n', 0));
assert_eq!(4, preceding_by_pos(bytes, 8, b'\n', 0)); assert_eq!(4, preceding_by_pos(bytes, 8, b'\n', 0));
assert_eq!(0, preceding_by_pos(bytes, 7, b'\n', 1)); assert_eq!(0, preceding_by_pos(bytes, 7, b'\n', 1));

View File

@ -1,6 +1,6 @@
use std::cmp; use std::cmp;
use memchr::memchr; use bstr::BStr;
use grep_matcher::{LineMatchKind, Matcher}; use grep_matcher::{LineMatchKind, Matcher};
use lines::{self, LineStep}; use lines::{self, LineStep};
@ -84,7 +84,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn matched( pub fn matched(
&mut self, &mut self,
buf: &[u8], buf: &BStr,
range: &Range, range: &Range,
) -> Result<bool, S::Error> { ) -> Result<bool, S::Error> {
self.sink_matched(buf, range) self.sink_matched(buf, range)
@ -107,7 +107,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}) })
} }
pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> { pub fn match_by_line(&mut self, buf: &BStr) -> Result<bool, S::Error> {
if self.is_line_by_line_fast() { if self.is_line_by_line_fast() {
self.match_by_line_fast(buf) self.match_by_line_fast(buf)
} else { } else {
@ -115,7 +115,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
} }
} }
pub fn roll(&mut self, buf: &[u8]) -> usize { pub fn roll(&mut self, buf: &BStr) -> usize {
let consumed = let consumed =
if self.config.max_context() == 0 { if self.config.max_context() == 0 {
buf.len() buf.len()
@ -141,7 +141,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
consumed consumed
} }
pub fn detect_binary(&mut self, buf: &[u8], range: &Range) -> bool { pub fn detect_binary(&mut self, buf: &BStr, range: &Range) -> bool {
if self.binary_byte_offset.is_some() { if self.binary_byte_offset.is_some() {
return true; return true;
} }
@ -149,7 +149,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
BinaryDetection::Quit(b) => b, BinaryDetection::Quit(b) => b,
_ => return false, _ => return false,
}; };
if let Some(i) = memchr(binary_byte, &buf[*range]) { if let Some(i) = buf[*range].find_byte(binary_byte) {
self.binary_byte_offset = Some(range.start() + i); self.binary_byte_offset = Some(range.start() + i);
true true
} else { } else {
@ -159,7 +159,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn before_context_by_line( pub fn before_context_by_line(
&mut self, &mut self,
buf: &[u8], buf: &BStr,
upto: usize, upto: usize,
) -> Result<bool, S::Error> { ) -> Result<bool, S::Error> {
if self.config.before_context == 0 { if self.config.before_context == 0 {
@ -194,7 +194,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn after_context_by_line( pub fn after_context_by_line(
&mut self, &mut self,
buf: &[u8], buf: &BStr,
upto: usize, upto: usize,
) -> Result<bool, S::Error> { ) -> Result<bool, S::Error> {
if self.after_context_left == 0 { if self.after_context_left == 0 {
@ -219,7 +219,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
pub fn other_context_by_line( pub fn other_context_by_line(
&mut self, &mut self,
buf: &[u8], buf: &BStr,
upto: usize, upto: usize,
) -> Result<bool, S::Error> { ) -> Result<bool, S::Error> {
let range = Range::new(self.last_line_visited, upto); let range = Range::new(self.last_line_visited, upto);
@ -236,7 +236,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
Ok(true) Ok(true)
} }
fn match_by_line_slow(&mut self, buf: &[u8]) -> Result<bool, S::Error> { fn match_by_line_slow(&mut self, buf: &BStr) -> Result<bool, S::Error> {
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher)); debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
let range = Range::new(self.pos(), buf.len()); let range = Range::new(self.pos(), buf.len());
@ -255,7 +255,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
&buf[line], &buf[line],
self.config.line_term, self.config.line_term,
); );
match self.matcher.shortest_match(slice) { match self.matcher.shortest_match(slice.as_bytes()) {
Err(err) => return Err(S::Error::error_message(err)), Err(err) => return Err(S::Error::error_message(err)),
Ok(result) => result.is_some(), Ok(result) => result.is_some(),
} }
@ -281,7 +281,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
Ok(true) Ok(true)
} }
fn match_by_line_fast(&mut self, buf: &[u8]) -> Result<bool, S::Error> { fn match_by_line_fast(&mut self, buf: &BStr) -> Result<bool, S::Error> {
debug_assert!(!self.config.passthru); debug_assert!(!self.config.passthru);
while !buf[self.pos()..].is_empty() { while !buf[self.pos()..].is_empty() {
@ -316,7 +316,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
#[inline(always)] #[inline(always)]
fn match_by_line_fast_invert( fn match_by_line_fast_invert(
&mut self, &mut self,
buf: &[u8], buf: &BStr,
) -> Result<bool, S::Error> { ) -> Result<bool, S::Error> {
assert!(self.config.invert_match); assert!(self.config.invert_match);
@ -357,14 +357,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
#[inline(always)] #[inline(always)]
fn find_by_line_fast( fn find_by_line_fast(
&self, &self,
buf: &[u8], buf: &BStr,
) -> Result<Option<Range>, S::Error> { ) -> Result<Option<Range>, S::Error> {
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher)); debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
debug_assert!(self.is_line_by_line_fast()); debug_assert!(self.is_line_by_line_fast());
let mut pos = self.pos(); let mut pos = self.pos();
while !buf[pos..].is_empty() { while !buf[pos..].is_empty() {
match self.matcher.find_candidate_line(&buf[pos..]) { match self.matcher.find_candidate_line(buf[pos..].as_bytes()) {
Err(err) => return Err(S::Error::error_message(err)), Err(err) => return Err(S::Error::error_message(err)),
Ok(None) => return Ok(None), Ok(None) => return Ok(None),
Ok(Some(LineMatchKind::Confirmed(i))) => { Ok(Some(LineMatchKind::Confirmed(i))) => {
@ -396,7 +396,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
&buf[line], &buf[line],
self.config.line_term, self.config.line_term,
); );
match self.matcher.is_match(slice) { match self.matcher.is_match(slice.as_bytes()) {
Err(err) => return Err(S::Error::error_message(err)), Err(err) => return Err(S::Error::error_message(err)),
Ok(true) => return Ok(Some(line)), Ok(true) => return Ok(Some(line)),
Ok(false) => { Ok(false) => {
@ -413,7 +413,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
#[inline(always)] #[inline(always)]
fn sink_matched( fn sink_matched(
&mut self, &mut self,
buf: &[u8], buf: &BStr,
range: &Range, range: &Range,
) -> Result<bool, S::Error> { ) -> Result<bool, S::Error> {
if self.binary && self.detect_binary(buf, range) { if self.binary && self.detect_binary(buf, range) {
@ -438,7 +438,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
&self.searcher, &self.searcher,
&SinkMatch { &SinkMatch {
line_term: self.config.line_term, line_term: self.config.line_term,
bytes: linebuf, bytes: linebuf.as_bytes(),
absolute_byte_offset: offset, absolute_byte_offset: offset,
line_number: self.line_number, line_number: self.line_number,
}, },
@ -454,7 +454,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
fn sink_before_context( fn sink_before_context(
&mut self, &mut self,
buf: &[u8], buf: &BStr,
range: &Range, range: &Range,
) -> Result<bool, S::Error> { ) -> Result<bool, S::Error> {
if self.binary && self.detect_binary(buf, range) { if self.binary && self.detect_binary(buf, range) {
@ -466,7 +466,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
&self.searcher, &self.searcher,
&SinkContext { &SinkContext {
line_term: self.config.line_term, line_term: self.config.line_term,
bytes: &buf[*range], bytes: buf[*range].as_bytes(),
kind: SinkContextKind::Before, kind: SinkContextKind::Before,
absolute_byte_offset: offset, absolute_byte_offset: offset,
line_number: self.line_number, line_number: self.line_number,
@ -482,7 +482,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
fn sink_after_context( fn sink_after_context(
&mut self, &mut self,
buf: &[u8], buf: &BStr,
range: &Range, range: &Range,
) -> Result<bool, S::Error> { ) -> Result<bool, S::Error> {
assert!(self.after_context_left >= 1); assert!(self.after_context_left >= 1);
@ -496,7 +496,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
&self.searcher, &self.searcher,
&SinkContext { &SinkContext {
line_term: self.config.line_term, line_term: self.config.line_term,
bytes: &buf[*range], bytes: buf[*range].as_bytes(),
kind: SinkContextKind::After, kind: SinkContextKind::After,
absolute_byte_offset: offset, absolute_byte_offset: offset,
line_number: self.line_number, line_number: self.line_number,
@ -513,7 +513,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
fn sink_other_context( fn sink_other_context(
&mut self, &mut self,
buf: &[u8], buf: &BStr,
range: &Range, range: &Range,
) -> Result<bool, S::Error> { ) -> Result<bool, S::Error> {
if self.binary && self.detect_binary(buf, range) { if self.binary && self.detect_binary(buf, range) {
@ -525,7 +525,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
&self.searcher, &self.searcher,
&SinkContext { &SinkContext {
line_term: self.config.line_term, line_term: self.config.line_term,
bytes: &buf[*range], bytes: buf[*range].as_bytes(),
kind: SinkContextKind::Other, kind: SinkContextKind::Other,
absolute_byte_offset: offset, absolute_byte_offset: offset,
line_number: self.line_number, line_number: self.line_number,
@ -555,7 +555,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
} }
} }
fn count_lines(&mut self, buf: &[u8], upto: usize) { fn count_lines(&mut self, buf: &BStr, upto: usize) {
if let Some(ref mut line_number) = self.line_number { if let Some(ref mut line_number) = self.line_number {
if self.last_line_counted >= upto { if self.last_line_counted >= upto {
return; return;

View File

@ -1,7 +1,9 @@
use std::cmp; use std::cmp;
use std::io; use std::io;
use bstr::BStr;
use grep_matcher::Matcher; use grep_matcher::Matcher;
use lines::{self, LineStep}; use lines::{self, LineStep};
use line_buffer::{DEFAULT_BUFFER_CAPACITY, LineBufferReader}; use line_buffer::{DEFAULT_BUFFER_CAPACITY, LineBufferReader};
use sink::{Sink, SinkError}; use sink::{Sink, SinkError};
@ -77,14 +79,14 @@ where M: Matcher,
pub struct SliceByLine<'s, M: 's, S> { pub struct SliceByLine<'s, M: 's, S> {
config: &'s Config, config: &'s Config,
core: Core<'s, M, S>, core: Core<'s, M, S>,
slice: &'s [u8], slice: &'s BStr,
} }
impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> { impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
pub fn new( pub fn new(
searcher: &'s Searcher, searcher: &'s Searcher,
matcher: M, matcher: M,
slice: &'s [u8], slice: &'s BStr,
write_to: S, write_to: S,
) -> SliceByLine<'s, M, S> { ) -> SliceByLine<'s, M, S> {
debug_assert!(!searcher.multi_line_with_matcher(&matcher)); debug_assert!(!searcher.multi_line_with_matcher(&matcher));
@ -127,7 +129,7 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
pub struct MultiLine<'s, M: 's, S> { pub struct MultiLine<'s, M: 's, S> {
config: &'s Config, config: &'s Config,
core: Core<'s, M, S>, core: Core<'s, M, S>,
slice: &'s [u8], slice: &'s BStr,
last_match: Option<Range>, last_match: Option<Range>,
} }
@ -135,7 +137,7 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
pub fn new( pub fn new(
searcher: &'s Searcher, searcher: &'s Searcher,
matcher: M, matcher: M,
slice: &'s [u8], slice: &'s BStr,
write_to: S, write_to: S,
) -> MultiLine<'s, M, S> { ) -> MultiLine<'s, M, S> {
debug_assert!(searcher.multi_line_with_matcher(&matcher)); debug_assert!(searcher.multi_line_with_matcher(&matcher));
@ -306,7 +308,8 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
} }
fn find(&mut self) -> Result<Option<Range>, S::Error> { fn find(&mut self) -> Result<Option<Range>, S::Error> {
match self.core.matcher().find(&self.slice[self.core.pos()..]) { let haystack = &self.slice[self.core.pos()..];
match self.core.matcher().find(haystack.as_bytes()) {
Err(err) => Err(S::Error::error_message(err)), Err(err) => Err(S::Error::error_message(err)),
Ok(None) => Ok(None), Ok(None) => Ok(None),
Ok(Some(m)) => Ok(Some(m.offset(self.core.pos()))), Ok(Some(m)) => Ok(Some(m.offset(self.core.pos()))),

View File

@ -5,6 +5,7 @@ use std::fs::File;
use std::io::{self, Read}; use std::io::{self, Read};
use std::path::Path; use std::path::Path;
use bstr::{B, BStr, BString};
use encoding_rs; use encoding_rs;
use encoding_rs_io::DecodeReaderBytesBuilder; use encoding_rs_io::DecodeReaderBytesBuilder;
use grep_matcher::{LineTerminator, Match, Matcher}; use grep_matcher::{LineTerminator, Match, Matcher};
@ -311,9 +312,9 @@ impl SearcherBuilder {
Searcher { Searcher {
config: config, config: config,
decode_builder: decode_builder, decode_builder: decode_builder,
decode_buffer: RefCell::new(vec![0; 8 * (1<<10)]), decode_buffer: RefCell::new(BString::from(vec![0; 8 * (1<<10)])),
line_buffer: RefCell::new(self.config.line_buffer()), line_buffer: RefCell::new(self.config.line_buffer()),
multi_line_buffer: RefCell::new(vec![]), multi_line_buffer: RefCell::new(BString::new()),
} }
} }
@ -543,7 +544,7 @@ pub struct Searcher {
/// through the underlying bytes with no additional overhead. /// through the underlying bytes with no additional overhead.
decode_builder: DecodeReaderBytesBuilder, decode_builder: DecodeReaderBytesBuilder,
/// A buffer that is used for transcoding scratch space. /// A buffer that is used for transcoding scratch space.
decode_buffer: RefCell<Vec<u8>>, decode_buffer: RefCell<BString>,
/// A line buffer for use in line oriented searching. /// A line buffer for use in line oriented searching.
/// ///
/// We wrap it in a RefCell to permit lending out borrows of `Searcher` /// We wrap it in a RefCell to permit lending out borrows of `Searcher`
@ -555,7 +556,7 @@ pub struct Searcher {
/// multi line search. In particular, multi line searches cannot be /// multi line search. In particular, multi line searches cannot be
/// performed incrementally, and need the entire haystack in memory at /// performed incrementally, and need the entire haystack in memory at
/// once. /// once.
multi_line_buffer: RefCell<Vec<u8>>, multi_line_buffer: RefCell<BString>,
} }
impl Searcher { impl Searcher {
@ -666,7 +667,7 @@ impl Searcher {
let mut decode_buffer = self.decode_buffer.borrow_mut(); let mut decode_buffer = self.decode_buffer.borrow_mut();
let read_from = self.decode_builder let read_from = self.decode_builder
.build_with_buffer(read_from, &mut *decode_buffer) .build_with_buffer(read_from, decode_buffer.as_mut_vec())
.map_err(S::Error::error_io)?; .map_err(S::Error::error_io)?;
if self.multi_line_with_matcher(&matcher) { if self.multi_line_with_matcher(&matcher) {
@ -698,12 +699,13 @@ impl Searcher {
where M: Matcher, where M: Matcher,
S: Sink, S: Sink,
{ {
let slice = B(slice);
self.check_config(&matcher).map_err(S::Error::error_config)?; self.check_config(&matcher).map_err(S::Error::error_config)?;
// We can search the slice directly, unless we need to do transcoding. // We can search the slice directly, unless we need to do transcoding.
if self.slice_needs_transcoding(slice) { if self.slice_needs_transcoding(slice) {
trace!("slice reader: needs transcoding, using generic reader"); trace!("slice reader: needs transcoding, using generic reader");
return self.search_reader(matcher, slice, write_to); return self.search_reader(matcher, slice.as_bytes(), write_to);
} }
if self.multi_line_with_matcher(&matcher) { if self.multi_line_with_matcher(&matcher) {
trace!("slice reader: searching via multiline strategy"); trace!("slice reader: searching via multiline strategy");
@ -736,7 +738,7 @@ impl Searcher {
} }
/// Returns true if and only if the given slice needs to be transcoded. /// Returns true if and only if the given slice needs to be transcoded.
fn slice_needs_transcoding(&self, slice: &[u8]) -> bool { fn slice_needs_transcoding(&self, slice: &BStr) -> bool {
self.config.encoding.is_some() || slice_has_utf16_bom(slice) self.config.encoding.is_some() || slice_has_utf16_bom(slice)
} }
} }
@ -851,7 +853,9 @@ impl Searcher {
.map(|m| m.len() as usize + 1) .map(|m| m.len() as usize + 1)
.unwrap_or(0); .unwrap_or(0);
buf.reserve(cap); buf.reserve(cap);
read_from.read_to_end(&mut *buf).map_err(S::Error::error_io)?; read_from
.read_to_end(buf.as_mut_vec())
.map_err(S::Error::error_io)?;
return Ok(()); return Ok(());
} }
self.fill_multi_line_buffer_from_reader::<_, S>(read_from) self.fill_multi_line_buffer_from_reader::<_, S>(read_from)
@ -868,6 +872,7 @@ impl Searcher {
assert!(self.config.multi_line); assert!(self.config.multi_line);
let mut buf = self.multi_line_buffer.borrow_mut(); let mut buf = self.multi_line_buffer.borrow_mut();
let buf = buf.as_mut_vec();
buf.clear(); buf.clear();
// If we don't have a heap limit, then we can defer to std's // If we don't have a heap limit, then we can defer to std's
@ -919,8 +924,8 @@ impl Searcher {
/// ///
/// This is used by the searcher to determine if a transcoder is necessary. /// This is used by the searcher to determine if a transcoder is necessary.
/// Otherwise, it is advantageous to search the slice directly. /// Otherwise, it is advantageous to search the slice directly.
fn slice_has_utf16_bom(slice: &[u8]) -> bool { fn slice_has_utf16_bom(slice: &BStr) -> bool {
let enc = match encoding_rs::Encoding::for_bom(slice) { let enc = match encoding_rs::Encoding::for_bom(slice.as_bytes()) {
None => return false, None => return false,
Some((enc, _)) => enc, Some((enc, _)) => enc,
}; };

View File

@ -1,10 +1,10 @@
use std::io::{self, Write}; use std::io::{self, Write};
use std::str; use std::str;
use bstr::B;
use grep_matcher::{ use grep_matcher::{
LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError, LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError,
}; };
use memchr::memchr;
use regex::bytes::{Regex, RegexBuilder}; use regex::bytes::{Regex, RegexBuilder};
use searcher::{BinaryDetection, Searcher, SearcherBuilder}; use searcher::{BinaryDetection, Searcher, SearcherBuilder};
@ -94,8 +94,8 @@ impl Matcher for RegexMatcher {
} }
// Make it interesting and return the last byte in the current // Make it interesting and return the last byte in the current
// line. // line.
let i = memchr(self.line_term.unwrap().as_byte(), haystack) let i = B(haystack)
.map(|i| i) .find_byte(self.line_term.unwrap().as_byte())
.unwrap_or(haystack.len() - 1); .unwrap_or(haystack.len() - 1);
Ok(Some(LineMatchKind::Candidate(i))) Ok(Some(LineMatchKind::Candidate(i)))
} else { } else {