search: migrate to bstr

This is an initial attempt at migrating grep-searcher to use the new bstr crate (not yet published). This is mostly an improvement, although a significant problem is that the grep-matcher crate controls the `Index` impls for the `Match` type, which we use quite heavily. Thus, in order to impl `Index` for `BStr`, we need add bstr as a public dependency to grep-matcher. This is really bad news because grep-matcher is supposed to be a light-weight core crate that defines a matcher interface, which is itself intended to be a public dependency. Thus, a semver bump on bstr will have very undesirable ripple effects thoughout ripgrep's library crates. This would be something we could stomach if bstr was solid at 1.0 and committed to avoiding breaking changes. But it's not there yet.
2025-05-19 01:30:21 -07:00 · 2019-01-20 12:32:09 -05:00 · 2019-01-20 12:32:09 -05:00 · 4b88e08f41
commit 4b88e08f41
parent 7cbc535d70
12 changed files with 169 additions and 158 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -34,6 +34,13 @@ name = "bitflags"
 version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 [[package]]
 name = "bstr"
 version = "0.0.1"
 dependencies = [
 "memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 [[package]]
 name = "bytecount"
 version = "0.5.0"
@ -180,7 +187,7 @@ dependencies = [
 name = "grep-matcher"
 version = "0.1.1"
 dependencies = [
- "memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "bstr 0.0.1",
 "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
@ -222,13 +229,13 @@ dependencies = [
 name = "grep-searcher"
 version = "0.1.1"
 dependencies = [
 "bstr 0.0.1",
 "bytecount 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
 "encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
 "grep-matcher 0.1.1",
 "grep-regex 0.1.1",
 "log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
 "memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
 "memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
--- a/grep-matcher/Cargo.toml
+++ b/grep-matcher/Cargo.toml
@ -13,8 +13,11 @@ keywords = ["regex", "pattern", "trait"]
 license = "Unlicense/MIT"
 autotests = false
-[dependencies]
+[dependencies.bstr]
-memchr = "2.1"
+version = "*"
 path = "/home/andrew/rust/bstr"
 default-features = false
 features = ["std"]
 [dev-dependencies]
 regex = "1.1"
--- a/grep-matcher/src/interpolate.rs
+++ b/grep-matcher/src/interpolate.rs
@ -1,6 +1,6 @@
 use std::str;
-use memchr::memchr;
+use bstr::B;
 /// Interpolate capture references in `replacement` and write the interpolation
 /// result to `dst`. References in `replacement` take the form of $N or $name,
@ -22,7 +22,7 @@ pub fn interpolate<A, N>(
    N: FnMut(&str) -> Option<usize>
 {
    while !replacement.is_empty() {
-        match memchr(b'$', replacement) {
+        match B(replacement).find_byte(b'$') {
            None => break,
            Some(i) => {
                dst.extend(&replacement[..i]);
--- a/grep-matcher/src/lib.rs
+++ b/grep-matcher/src/lib.rs
@ -38,13 +38,15 @@ implementations.
 #![deny(missing_docs)]
-extern crate memchr;
+extern crate bstr;
 use std::fmt;
 use std::io;
 use std::ops;
 use std::u64;
 use bstr::BStr;
 use interpolate::interpolate;
 mod interpolate;
@ -180,6 +182,22 @@ impl ops::IndexMut<Match> for [u8] {
    }
 }
 impl ops::Index<Match> for BStr {
    type Output = BStr;
    #[inline]
    fn index(&self, index: Match) -> &BStr {
        &self[index.start..index.end]
    }
 }
 impl ops::IndexMut<Match> for BStr {
    #[inline]
    fn index_mut(&mut self, index: Match) -> &mut BStr {
        &mut self[index.start..index.end]
    }
 }
 impl ops::Index<Match> for str {
    type Output = str;
--- a/grep-searcher/Cargo.toml
+++ b/grep-searcher/Cargo.toml
@ -18,9 +18,14 @@ encoding_rs = "0.8.14"
 encoding_rs_io = "0.1.3"
 grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
 log = "0.4.5"
 memchr = "2.1"
 memmap = "0.7"
 [dependencies.bstr]
 version = "*"
 path = "/home/andrew/rust/bstr"
 default-features = false
 features = ["std"]
 [dev-dependencies]
 grep-regex = { version = "0.1.1", path = "../grep-regex" }
 regex = "1.1"
--- a/grep-searcher/src/lib.rs
+++ b/grep-searcher/src/lib.rs
@ -99,13 +99,13 @@ searches stdin.
 #![deny(missing_docs)]
 extern crate bstr;
 extern crate bytecount;
 extern crate encoding_rs;
 extern crate encoding_rs_io;
 extern crate grep_matcher;
 #[macro_use]
 extern crate log;
 extern crate memchr;
 extern crate memmap;
 #[cfg(test)]
 extern crate regex;
--- a/grep-searcher/src/line_buffer.rs
+++ b/grep-searcher/src/line_buffer.rs
@ -1,8 +1,7 @@
 use std::cmp;
 use std::io;
 use std::ptr;
-use memchr::{memchr, memrchr};
+use bstr::{BStr, BString};
 /// The default buffer capacity that we use for the line buffer.
 pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
@ -123,7 +122,7 @@ impl LineBufferBuilder {
    pub fn build(&self) -> LineBuffer {
        LineBuffer {
            config: self.config,
-            buf: vec![0; self.config.capacity],
+            buf: BString::from(vec![0; self.config.capacity]),
            pos: 0,
            last_lineterm: 0,
            end: 0,
@ -254,7 +253,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
    }
    /// Return the contents of this buffer.
-    pub fn buffer(&self) -> &[u8] {
+    pub fn buffer(&self) -> &BStr {
        self.line_buffer.buffer()
    }
@ -284,7 +283,7 @@ pub struct LineBuffer {
    /// The configuration of this buffer.
    config: Config,
    /// The primary buffer with which to hold data.
-    buf: Vec<u8>,
+    buf: BString,
    /// The current position of this buffer. This is always a valid sliceable
    /// index into `buf`, and its maximum value is the length of `buf`.
    pos: usize,
@ -339,13 +338,13 @@ impl LineBuffer {
    }
    /// Return the contents of this buffer.
-    fn buffer(&self) -> &[u8] {
+    fn buffer(&self) -> &BStr {
        &self.buf[self.pos..self.last_lineterm]
    }
    /// Return the contents of the free space beyond the end of the buffer as
    /// a mutable slice.
-    fn free_buffer(&mut self) -> &mut [u8] {
+    fn free_buffer(&mut self) -> &mut BStr {
        &mut self.buf[self.end..]
    }
@ -396,7 +395,7 @@ impl LineBuffer {
        assert_eq!(self.pos, 0);
        loop {
            self.ensure_capacity()?;
-            let readlen = rdr.read(self.free_buffer())?;
+            let readlen = rdr.read(self.free_buffer().as_bytes_mut())?;
            if readlen == 0 {
                // We're only done reading for good once the caller has
                // consumed everything.
@ -416,7 +415,7 @@ impl LineBuffer {
            match self.config.binary {
                BinaryDetection::None => {} // nothing to do
                BinaryDetection::Quit(byte) => {
-                    if let Some(i) = memchr(byte, newbytes) {
+                    if let Some(i) = newbytes.find_byte(byte) {
                        self.end = oldend + i;
                        self.last_lineterm = self.end;
                        self.binary_byte_offset =
@ -444,7 +443,7 @@ impl LineBuffer {
            }
            // Update our `last_lineterm` positions if we read one.
-            if let Some(i) = memrchr(self.config.lineterm, newbytes) {
+            if let Some(i) = newbytes.rfind_byte(self.config.lineterm) {
                self.last_lineterm = oldend + i + 1;
                return Ok(true);
            }
@ -467,40 +466,8 @@ impl LineBuffer {
            return;
        }
        assert!(self.pos < self.end && self.end <= self.buf.len());
        let roll_len = self.end - self.pos;
-        unsafe {
+        self.buf.copy_within(self.pos.., 0);
            // SAFETY: A buffer contains Copy data, so there's no problem
            // moving it around. Safety also depends on our indices being
            // in bounds, which they should always be, and we enforce with
            // an assert above.
            //
            // It seems like it should be possible to do this in safe code that
            // results in the same codegen. I tried the obvious:
            //
            //   for (src, dst) in (self.pos..self.end).zip(0..) {
            //     self.buf[dst] = self.buf[src];
            //   }
            //
            // But the above does not work, and in fact compiles down to a slow
            // byte-by-byte loop. I tried a few other minor variations, but
            // alas, better minds might prevail.
            //
            // Overall, this doesn't save us *too* much. It mostly matters when
            // the number of bytes we're copying is large, which can happen
            // if the searcher is asked to produce a lot of context. We could
            // decide this isn't worth it, but it does make an appreciable
            // impact at or around the context=30 range on my machine.
            //
            // We could also use a temporary buffer that compiles down to two
            // memcpys and is faster than the byte-at-a-time loop, but it
            // complicates our options for limiting memory allocation a bit.
            ptr::copy(
                self.buf[self.pos..].as_ptr(),
                self.buf.as_mut_ptr(),
                roll_len,
            );
        }
        self.pos = 0;
        self.last_lineterm = roll_len;
        self.end = roll_len;
@ -536,14 +503,15 @@ impl LineBuffer {
    }
 }
-/// Replaces `src` with `replacement` in bytes.
+/// Replaces `src` with `replacement` in bytes, and return the offset of the
-fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
+/// first replacement, if one exists.
 fn replace_bytes(bytes: &mut BStr, src: u8, replacement: u8) -> Option<usize> {
    if src == replacement {
        return None;
    }
    let mut first_pos = None;
    let mut pos = 0;
-    while let Some(i) = memchr(src, &bytes[pos..]).map(|i| pos + i) {
+    while let Some(i) = bytes[pos..].find_byte(src).map(|i| pos + i) {
        if first_pos.is_none() {
            first_pos = Some(i);
        }
@ -560,6 +528,7 @@ fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
 #[cfg(test)]
 mod tests {
    use std::str;
    use bstr::BString;
    use super::*;
    const SHERLOCK: &'static str = "\
@ -575,18 +544,14 @@ and exhibited clearly, with a label attached.\
        slice.to_string()
    }
    fn btos(slice: &[u8]) -> &str {
        str::from_utf8(slice).unwrap()
    }
    fn replace_str(
        slice: &str,
        src: u8,
        replacement: u8,
    ) -> (String, Option<usize>) {
-        let mut dst = slice.to_string().into_bytes();
+        let mut dst = BString::from(slice);
        let result = replace_bytes(&mut dst, src, replacement);
-        (String::from_utf8(dst).unwrap(), result)
+        (dst.into_string().unwrap(), result)
    }
    #[test]
@ -607,7 +572,7 @@ and exhibited clearly, with a label attached.\
        assert!(rdr.buffer().is_empty());
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "homer\nlisa\n");
+        assert_eq!(rdr.buffer(), "homer\nlisa\n");
        assert_eq!(rdr.absolute_byte_offset(), 0);
        rdr.consume(5);
        assert_eq!(rdr.absolute_byte_offset(), 5);
@ -615,7 +580,7 @@ and exhibited clearly, with a label attached.\
        assert_eq!(rdr.absolute_byte_offset(), 11);
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "maggie");
+        assert_eq!(rdr.buffer(), "maggie");
        rdr.consume_all();
        assert!(!rdr.fill().unwrap());
@ -630,7 +595,7 @@ and exhibited clearly, with a label attached.\
        let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
+        assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n");
        rdr.consume_all();
        assert!(!rdr.fill().unwrap());
@ -645,7 +610,7 @@ and exhibited clearly, with a label attached.\
        let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "\n");
+        assert_eq!(rdr.buffer(), "\n");
        rdr.consume_all();
        assert!(!rdr.fill().unwrap());
@ -660,7 +625,7 @@ and exhibited clearly, with a label attached.\
        let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "\n\n");
+        assert_eq!(rdr.buffer(), "\n\n");
        rdr.consume_all();
        assert!(!rdr.fill().unwrap());
@ -698,12 +663,12 @@ and exhibited clearly, with a label attached.\
        let mut linebuf = LineBufferBuilder::new().capacity(1).build();
        let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
-        let mut got = vec![];
+        let mut got = BString::new();
        while rdr.fill().unwrap() {
-            got.extend(rdr.buffer());
+            got.push(rdr.buffer());
            rdr.consume_all();
        }
-        assert_eq!(bytes, btos(&got));
+        assert_eq!(bytes, got);
        assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
        assert_eq!(rdr.binary_byte_offset(), None);
    }
@ -718,11 +683,11 @@ and exhibited clearly, with a label attached.\
        let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "homer\n");
+        assert_eq!(rdr.buffer(), "homer\n");
        rdr.consume_all();
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "lisa\n");
+        assert_eq!(rdr.buffer(), "lisa\n");
        rdr.consume_all();
        // This returns an error because while we have just enough room to
@ -732,11 +697,11 @@ and exhibited clearly, with a label attached.\
        assert!(rdr.fill().is_err());
        // We can mush on though!
-        assert_eq!(btos(rdr.buffer()), "m");
+        assert_eq!(rdr.buffer(), "m");
        rdr.consume_all();
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "aggie");
+        assert_eq!(rdr.buffer(), "aggie");
        rdr.consume_all();
        assert!(!rdr.fill().unwrap());
@ -752,16 +717,16 @@ and exhibited clearly, with a label attached.\
        let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "homer\n");
+        assert_eq!(rdr.buffer(), "homer\n");
        rdr.consume_all();
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "lisa\n");
+        assert_eq!(rdr.buffer(), "lisa\n");
        rdr.consume_all();
        // We have just enough space.
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "maggie");
+        assert_eq!(rdr.buffer(), "maggie");
        rdr.consume_all();
        assert!(!rdr.fill().unwrap());
@ -777,7 +742,7 @@ and exhibited clearly, with a label attached.\
        let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
        assert!(rdr.fill().is_err());
-        assert_eq!(btos(rdr.buffer()), "");
+        assert_eq!(rdr.buffer(), "");
    }
    #[test]
@ -789,7 +754,7 @@ and exhibited clearly, with a label attached.\
        assert!(rdr.buffer().is_empty());
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "homer\nli\x00sa\nmaggie\n");
+        assert_eq!(rdr.buffer(), "homer\nli\x00sa\nmaggie\n");
        rdr.consume_all();
        assert!(!rdr.fill().unwrap());
@ -808,7 +773,7 @@ and exhibited clearly, with a label attached.\
        assert!(rdr.buffer().is_empty());
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "homer\nli");
+        assert_eq!(rdr.buffer(), "homer\nli");
        rdr.consume_all();
        assert!(!rdr.fill().unwrap());
@ -825,7 +790,7 @@ and exhibited clearly, with a label attached.\
        let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
        assert!(!rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "");
+        assert_eq!(rdr.buffer(), "");
        assert_eq!(rdr.absolute_byte_offset(), 0);
        assert_eq!(rdr.binary_byte_offset(), Some(0));
    }
@ -841,7 +806,7 @@ and exhibited clearly, with a label attached.\
        assert!(rdr.buffer().is_empty());
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
+        assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n");
        rdr.consume_all();
        assert!(!rdr.fill().unwrap());
@ -860,7 +825,7 @@ and exhibited clearly, with a label attached.\
        assert!(rdr.buffer().is_empty());
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie");
+        assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie");
        rdr.consume_all();
        assert!(!rdr.fill().unwrap());
@ -878,7 +843,7 @@ and exhibited clearly, with a label attached.\
        assert!(rdr.buffer().is_empty());
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "\
+        assert_eq!(rdr.buffer(), "\
 For the Doctor Watsons of this world, as opposed to the Sherlock
 Holmeses, s\
 ");
@ -901,7 +866,7 @@ Holmeses, s\
        assert!(rdr.buffer().is_empty());
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "homer\nli\nsa\nmaggie\n");
+        assert_eq!(rdr.buffer(), "homer\nli\nsa\nmaggie\n");
        rdr.consume_all();
        assert!(!rdr.fill().unwrap());
@ -920,7 +885,7 @@ Holmeses, s\
        assert!(rdr.buffer().is_empty());
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "\nhomer\nlisa\nmaggie\n");
+        assert_eq!(rdr.buffer(), "\nhomer\nlisa\nmaggie\n");
        rdr.consume_all();
        assert!(!rdr.fill().unwrap());
@ -939,7 +904,7 @@ Holmeses, s\
        assert!(rdr.buffer().is_empty());
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
+        assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n\n");
        rdr.consume_all();
        assert!(!rdr.fill().unwrap());
@ -958,7 +923,7 @@ Holmeses, s\
        assert!(rdr.buffer().is_empty());
        assert!(rdr.fill().unwrap());
-        assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
+        assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n\n");
        rdr.consume_all();
        assert!(!rdr.fill().unwrap());
--- a/grep-searcher/src/lines.rs
+++ b/grep-searcher/src/lines.rs
@ -2,8 +2,8 @@
 A collection of routines for performing operations on lines.
 */
 use bstr::{B, BStr};
 use bytecount;
 use memchr::{memchr, memrchr};
 use grep_matcher::{LineTerminator, Match};
 /// An iterator over lines in a particular slice of bytes.
@ -14,7 +14,7 @@ use grep_matcher::{LineTerminator, Match};
 /// `'b` refers to the lifetime of the underlying bytes.
 #[derive(Debug)]
 pub struct LineIter<'b> {
-    bytes: &'b [u8],
+    bytes: &'b BStr,
    stepper: LineStep,
 }
@ -23,7 +23,7 @@ impl<'b> LineIter<'b> {
    /// are terminated by `line_term`.
    pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> {
        LineIter {
-            bytes: bytes,
+            bytes: B(bytes),
            stepper: LineStep::new(line_term, 0, bytes.len()),
        }
    }
@ -33,7 +33,7 @@ impl<'b> Iterator for LineIter<'b> {
    type Item = &'b [u8];
    fn next(&mut self) -> Option<&'b [u8]> {
-        self.stepper.next_match(self.bytes).map(|m| &self.bytes[m])
+        self.stepper.next_match(self.bytes).map(|m| self.bytes[m].as_bytes())
    }
 }
@ -73,19 +73,19 @@ impl LineStep {
    /// The range returned includes the line terminator. Ranges are always
    /// non-empty.
    pub fn next(&mut self, bytes: &[u8]) -> Option<(usize, usize)> {
-        self.next_impl(bytes)
+        self.next_impl(B(bytes))
    }
    /// Like next, but returns a `Match` instead of a tuple.
    #[inline(always)]
-    pub(crate) fn next_match(&mut self, bytes: &[u8]) -> Option<Match> {
+    pub(crate) fn next_match(&mut self, bytes: &BStr) -> Option<Match> {
        self.next_impl(bytes).map(|(s, e)| Match::new(s, e))
    }
    #[inline(always)]
-    fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> {
+    fn next_impl(&mut self, mut bytes: &BStr) -> Option<(usize, usize)> {
        bytes = &bytes[..self.end];
-        match memchr(self.line_term, &bytes[self.pos..]) {
+        match bytes[self.pos..].find_byte(self.line_term) {
            None => {
                if self.pos < bytes.len() {
                    let m = (self.pos, bytes.len());
@ -109,15 +109,15 @@ impl LineStep {
 }
 /// Count the number of occurrences of `line_term` in `bytes`.
-pub fn count(bytes: &[u8], line_term: u8) -> u64 {
+pub fn count(bytes: &BStr, line_term: u8) -> u64 {
-    bytecount::count(bytes, line_term) as u64
+    bytecount::count(bytes.as_bytes(), line_term) as u64
 }
 /// Given a line that possibly ends with a terminator, return that line without
 /// the terminator.
 #[inline(always)]
-pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
+pub fn without_terminator(bytes: &BStr, line_term: LineTerminator) -> &BStr {
-    let line_term = line_term.as_bytes();
+    let line_term = BStr::new(line_term.as_bytes());
    let start = bytes.len().saturating_sub(line_term.len());
    if bytes.get(start..) == Some(line_term) {
        return &bytes[..bytes.len() - line_term.len()];
@ -131,18 +131,20 @@ pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
 /// Line terminators are considered part of the line they terminate.
 #[inline(always)]
 pub fn locate(
-    bytes: &[u8],
+    bytes: &BStr,
    line_term: u8,
    range: Match,
 ) -> Match {
-    let line_start = memrchr(line_term, &bytes[0..range.start()])
+    let line_start = bytes[..range.start()]
        .rfind_byte(line_term)
        .map_or(0, |i| i + 1);
    let line_end =
        if range.end() > line_start && bytes[range.end() - 1] == line_term {
            range.end()
        } else {
-            memchr(line_term, &bytes[range.end()..])
+            bytes[range.end()..]
-            .map_or(bytes.len(), |i| range.end() + i + 1)
+                .find_byte(line_term)
                .map_or(bytes.len(), |i| range.end() + i + 1)
        };
    Match::new(line_start, line_end)
 }
@ -155,7 +157,7 @@ pub fn locate(
 ///
 /// If `bytes` ends with a line terminator, then the terminator itself is
 /// considered part of the last line.
-pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
+pub fn preceding(bytes: &BStr, line_term: u8, count: usize) -> usize {
    preceding_by_pos(bytes, bytes.len(), line_term, count)
 }
@ -169,7 +171,7 @@ pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
 /// and `pos = 7`, `preceding(bytes, pos, b'\n', 0)` returns `4` (as does `pos
 /// = 8`) and `preceding(bytes, pos, `b'\n', 1)` returns `0`.
 fn preceding_by_pos(
-    bytes: &[u8],
+    bytes: &BStr,
    mut pos: usize,
    line_term: u8,
    mut count: usize,
@ -180,7 +182,7 @@ fn preceding_by_pos(
        pos -= 1;
    }
    loop {
-        match memrchr(line_term, &bytes[..pos]) {
+        match bytes[..pos].rfind_byte(line_term) {
            None => {
                return 0;
            }
@ -201,7 +203,10 @@ fn preceding_by_pos(
 mod tests {
    use std::ops::Range;
    use std::str;
    use bstr::B;
    use grep_matcher::Match;
    use super::*;
    const SHERLOCK: &'static str = "\
@ -220,7 +225,7 @@ and exhibited clearly, with a label attached.\
    fn lines(text: &str) -> Vec<&str> {
        let mut results = vec![];
        let mut it = LineStep::new(b'\n', 0, text.len());
-        while let Some(m) = it.next_match(text.as_bytes()) {
+        while let Some(m) = it.next_match(B(text)) {
            results.push(&text[m]);
        }
        results
@ -229,26 +234,26 @@ and exhibited clearly, with a label attached.\
    fn line_ranges(text: &str) -> Vec<Range<usize>> {
        let mut results = vec![];
        let mut it = LineStep::new(b'\n', 0, text.len());
-        while let Some(m) = it.next_match(text.as_bytes()) {
+        while let Some(m) = it.next_match(B(text)) {
            results.push(m.start()..m.end());
        }
        results
    }
    fn prev(text: &str, pos: usize, count: usize) -> usize {
-        preceding_by_pos(text.as_bytes(), pos, b'\n', count)
+        preceding_by_pos(B(text), pos, b'\n', count)
    }
    fn loc(text: &str, start: usize, end: usize) -> Match {
-        locate(text.as_bytes(), b'\n', Match::new(start, end))
+        locate(B(text), b'\n', Match::new(start, end))
    }
    #[test]
    fn line_count() {
-        assert_eq!(0, count(b"", b'\n'));
+        assert_eq!(0, count(B(""), b'\n'));
-        assert_eq!(1, count(b"\n", b'\n'));
+        assert_eq!(1, count(B("\n"), b'\n'));
-        assert_eq!(2, count(b"\n\n", b'\n'));
+        assert_eq!(2, count(B("\n\n"), b'\n'));
-        assert_eq!(2, count(b"a\nb\nc", b'\n'));
+        assert_eq!(2, count(B("a\nb\nc"), b'\n'));
    }
    #[test]
@ -331,7 +336,7 @@ and exhibited clearly, with a label attached.\
    #[test]
    fn preceding_lines_doc() {
        // These are the examples mentions in the documentation of `preceding`.
-        let bytes = b"abc\nxyz\n";
+        let bytes = B("abc\nxyz\n");
        assert_eq!(4, preceding_by_pos(bytes, 7, b'\n', 0));
        assert_eq!(4, preceding_by_pos(bytes, 8, b'\n', 0));
        assert_eq!(0, preceding_by_pos(bytes, 7, b'\n', 1));
--- a/grep-searcher/src/searcher/core.rs
+++ b/grep-searcher/src/searcher/core.rs
@ -1,6 +1,6 @@
 use std::cmp;
-use memchr::memchr;
+use bstr::BStr;
 use grep_matcher::{LineMatchKind, Matcher};
 use lines::{self, LineStep};
@ -84,7 +84,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
    pub fn matched(
        &mut self,
-        buf: &[u8],
+        buf: &BStr,
        range: &Range,
    ) -> Result<bool, S::Error> {
        self.sink_matched(buf, range)
@ -107,7 +107,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
            })
    }
-    pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
+    pub fn match_by_line(&mut self, buf: &BStr) -> Result<bool, S::Error> {
        if self.is_line_by_line_fast() {
            self.match_by_line_fast(buf)
        } else {
@ -115,7 +115,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
        }
    }
-    pub fn roll(&mut self, buf: &[u8]) -> usize {
+    pub fn roll(&mut self, buf: &BStr) -> usize {
        let consumed =
            if self.config.max_context() == 0 {
                buf.len()
@ -141,7 +141,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
        consumed
    }
-    pub fn detect_binary(&mut self, buf: &[u8], range: &Range) -> bool {
+    pub fn detect_binary(&mut self, buf: &BStr, range: &Range) -> bool {
        if self.binary_byte_offset.is_some() {
            return true;
        }
@ -149,7 +149,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
            BinaryDetection::Quit(b) => b,
            _ => return false,
        };
-        if let Some(i) = memchr(binary_byte, &buf[*range]) {
+        if let Some(i) = buf[*range].find_byte(binary_byte) {
            self.binary_byte_offset = Some(range.start() + i);
            true
        } else {
@ -159,7 +159,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
    pub fn before_context_by_line(
        &mut self,
-        buf: &[u8],
+        buf: &BStr,
        upto: usize,
    ) -> Result<bool, S::Error> {
        if self.config.before_context == 0 {
@ -194,7 +194,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
    pub fn after_context_by_line(
        &mut self,
-        buf: &[u8],
+        buf: &BStr,
        upto: usize,
    ) -> Result<bool, S::Error> {
        if self.after_context_left == 0 {
@ -219,7 +219,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
    pub fn other_context_by_line(
        &mut self,
-        buf: &[u8],
+        buf: &BStr,
        upto: usize,
    ) -> Result<bool, S::Error> {
        let range = Range::new(self.last_line_visited, upto);
@ -236,7 +236,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
        Ok(true)
    }
-    fn match_by_line_slow(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
+    fn match_by_line_slow(&mut self, buf: &BStr) -> Result<bool, S::Error> {
        debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
        let range = Range::new(self.pos(), buf.len());
@ -255,7 +255,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
                    &buf[line],
                    self.config.line_term,
                );
-                match self.matcher.shortest_match(slice) {
+                match self.matcher.shortest_match(slice.as_bytes()) {
                    Err(err) => return Err(S::Error::error_message(err)),
                    Ok(result) => result.is_some(),
                }
@ -281,7 +281,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
        Ok(true)
    }
-    fn match_by_line_fast(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
+    fn match_by_line_fast(&mut self, buf: &BStr) -> Result<bool, S::Error> {
        debug_assert!(!self.config.passthru);
        while !buf[self.pos()..].is_empty() {
@ -316,7 +316,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
    #[inline(always)]
    fn match_by_line_fast_invert(
        &mut self,
-        buf: &[u8],
+        buf: &BStr,
    ) -> Result<bool, S::Error> {
        assert!(self.config.invert_match);
@ -357,14 +357,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
    #[inline(always)]
    fn find_by_line_fast(
        &self,
-        buf: &[u8],
+        buf: &BStr,
    ) -> Result<Option<Range>, S::Error> {
        debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
        debug_assert!(self.is_line_by_line_fast());
        let mut pos = self.pos();
        while !buf[pos..].is_empty() {
-            match self.matcher.find_candidate_line(&buf[pos..]) {
+            match self.matcher.find_candidate_line(buf[pos..].as_bytes()) {
                Err(err) => return Err(S::Error::error_message(err)),
                Ok(None) => return Ok(None),
                Ok(Some(LineMatchKind::Confirmed(i))) => {
@ -396,7 +396,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
                        &buf[line],
                        self.config.line_term,
                    );
-                    match self.matcher.is_match(slice) {
+                    match self.matcher.is_match(slice.as_bytes()) {
                        Err(err) => return Err(S::Error::error_message(err)),
                        Ok(true) => return Ok(Some(line)),
                        Ok(false) => {
@ -413,7 +413,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
    #[inline(always)]
    fn sink_matched(
        &mut self,
-        buf: &[u8],
+        buf: &BStr,
        range: &Range,
    ) -> Result<bool, S::Error> {
        if self.binary && self.detect_binary(buf, range) {
@ -438,7 +438,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
            &self.searcher,
            &SinkMatch {
                line_term: self.config.line_term,
-                bytes: linebuf,
+                bytes: linebuf.as_bytes(),
                absolute_byte_offset: offset,
                line_number: self.line_number,
            },
@ -454,7 +454,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
    fn sink_before_context(
        &mut self,
-        buf: &[u8],
+        buf: &BStr,
        range: &Range,
    ) -> Result<bool, S::Error> {
        if self.binary && self.detect_binary(buf, range) {
@ -466,7 +466,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
            &self.searcher,
            &SinkContext {
                line_term: self.config.line_term,
-                bytes: &buf[*range],
+                bytes: buf[*range].as_bytes(),
                kind: SinkContextKind::Before,
                absolute_byte_offset: offset,
                line_number: self.line_number,
@ -482,7 +482,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
    fn sink_after_context(
        &mut self,
-        buf: &[u8],
+        buf: &BStr,
        range: &Range,
    ) -> Result<bool, S::Error> {
        assert!(self.after_context_left >= 1);
@ -496,7 +496,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
            &self.searcher,
            &SinkContext {
                line_term: self.config.line_term,
-                bytes: &buf[*range],
+                bytes: buf[*range].as_bytes(),
                kind: SinkContextKind::After,
                absolute_byte_offset: offset,
                line_number: self.line_number,
@ -513,7 +513,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
    fn sink_other_context(
        &mut self,
-        buf: &[u8],
+        buf: &BStr,
        range: &Range,
    ) -> Result<bool, S::Error> {
        if self.binary && self.detect_binary(buf, range) {
@ -525,7 +525,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
            &self.searcher,
            &SinkContext {
                line_term: self.config.line_term,
-                bytes: &buf[*range],
+                bytes: buf[*range].as_bytes(),
                kind: SinkContextKind::Other,
                absolute_byte_offset: offset,
                line_number: self.line_number,
@ -555,7 +555,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
        }
    }
-    fn count_lines(&mut self, buf: &[u8], upto: usize) {
+    fn count_lines(&mut self, buf: &BStr, upto: usize) {
        if let Some(ref mut line_number) = self.line_number {
            if self.last_line_counted >= upto {
                return;
--- a/grep-searcher/src/searcher/glue.rs
+++ b/grep-searcher/src/searcher/glue.rs
@ -1,7 +1,9 @@
 use std::cmp;
 use std::io;
 use bstr::BStr;
 use grep_matcher::Matcher;
 use lines::{self, LineStep};
 use line_buffer::{DEFAULT_BUFFER_CAPACITY, LineBufferReader};
 use sink::{Sink, SinkError};
@ -77,14 +79,14 @@ where M: Matcher,
 pub struct SliceByLine<'s, M: 's, S> {
    config: &'s Config,
    core: Core<'s, M, S>,
-    slice: &'s [u8],
+    slice: &'s BStr,
 }
 impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
    pub fn new(
        searcher: &'s Searcher,
        matcher: M,
-        slice: &'s [u8],
+        slice: &'s BStr,
        write_to: S,
    ) -> SliceByLine<'s, M, S> {
        debug_assert!(!searcher.multi_line_with_matcher(&matcher));
@ -127,7 +129,7 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
 pub struct MultiLine<'s, M: 's, S> {
    config: &'s Config,
    core: Core<'s, M, S>,
-    slice: &'s [u8],
+    slice: &'s BStr,
    last_match: Option<Range>,
 }
@ -135,7 +137,7 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
    pub fn new(
        searcher: &'s Searcher,
        matcher: M,
-        slice: &'s [u8],
+        slice: &'s BStr,
        write_to: S,
    ) -> MultiLine<'s, M, S> {
        debug_assert!(searcher.multi_line_with_matcher(&matcher));
@ -306,7 +308,8 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
    }
    fn find(&mut self) -> Result<Option<Range>, S::Error> {
-        match self.core.matcher().find(&self.slice[self.core.pos()..]) {
+        let haystack = &self.slice[self.core.pos()..];
        match self.core.matcher().find(haystack.as_bytes()) {
            Err(err) => Err(S::Error::error_message(err)),
            Ok(None) => Ok(None),
            Ok(Some(m)) => Ok(Some(m.offset(self.core.pos()))),
--- a/grep-searcher/src/searcher/mod.rs
+++ b/grep-searcher/src/searcher/mod.rs
@ -5,6 +5,7 @@ use std::fs::File;
 use std::io::{self, Read};
 use std::path::Path;
 use bstr::{B, BStr, BString};
 use encoding_rs;
 use encoding_rs_io::DecodeReaderBytesBuilder;
 use grep_matcher::{LineTerminator, Match, Matcher};
@ -311,9 +312,9 @@ impl SearcherBuilder {
        Searcher {
            config: config,
            decode_builder: decode_builder,
-            decode_buffer: RefCell::new(vec![0; 8 * (1<<10)]),
+            decode_buffer: RefCell::new(BString::from(vec![0; 8 * (1<<10)])),
            line_buffer: RefCell::new(self.config.line_buffer()),
-            multi_line_buffer: RefCell::new(vec![]),
+            multi_line_buffer: RefCell::new(BString::new()),
        }
    }
@ -543,7 +544,7 @@ pub struct Searcher {
    /// through the underlying bytes with no additional overhead.
    decode_builder: DecodeReaderBytesBuilder,
    /// A buffer that is used for transcoding scratch space.
-    decode_buffer: RefCell<Vec<u8>>,
+    decode_buffer: RefCell<BString>,
    /// A line buffer for use in line oriented searching.
    ///
    /// We wrap it in a RefCell to permit lending out borrows of `Searcher`
@ -555,7 +556,7 @@ pub struct Searcher {
    /// multi line search. In particular, multi line searches cannot be
    /// performed incrementally, and need the entire haystack in memory at
    /// once.
-    multi_line_buffer: RefCell<Vec<u8>>,
+    multi_line_buffer: RefCell<BString>,
 }
 impl Searcher {
@ -666,7 +667,7 @@ impl Searcher {
        let mut decode_buffer = self.decode_buffer.borrow_mut();
        let read_from = self.decode_builder
-            .build_with_buffer(read_from, &mut *decode_buffer)
+            .build_with_buffer(read_from, decode_buffer.as_mut_vec())
            .map_err(S::Error::error_io)?;
        if self.multi_line_with_matcher(&matcher) {
@ -698,12 +699,13 @@ impl Searcher {
    where M: Matcher,
          S: Sink,
    {
        let slice = B(slice);
        self.check_config(&matcher).map_err(S::Error::error_config)?;
        // We can search the slice directly, unless we need to do transcoding.
        if self.slice_needs_transcoding(slice) {
            trace!("slice reader: needs transcoding, using generic reader");
-            return self.search_reader(matcher, slice, write_to);
+            return self.search_reader(matcher, slice.as_bytes(), write_to);
        }
        if self.multi_line_with_matcher(&matcher) {
            trace!("slice reader: searching via multiline strategy");
@ -736,7 +738,7 @@ impl Searcher {
    }
    /// Returns true if and only if the given slice needs to be transcoded.
-    fn slice_needs_transcoding(&self, slice: &[u8]) -> bool {
+    fn slice_needs_transcoding(&self, slice: &BStr) -> bool {
        self.config.encoding.is_some() || slice_has_utf16_bom(slice)
    }
 }
@ -851,7 +853,9 @@ impl Searcher {
                .map(|m| m.len() as usize + 1)
                .unwrap_or(0);
            buf.reserve(cap);
-            read_from.read_to_end(&mut *buf).map_err(S::Error::error_io)?;
+            read_from
                .read_to_end(buf.as_mut_vec())
                .map_err(S::Error::error_io)?;
            return Ok(());
        }
        self.fill_multi_line_buffer_from_reader::<_, S>(read_from)
@ -868,6 +872,7 @@ impl Searcher {
        assert!(self.config.multi_line);
        let mut buf = self.multi_line_buffer.borrow_mut();
        let buf = buf.as_mut_vec();
        buf.clear();
        // If we don't have a heap limit, then we can defer to std's
@ -919,8 +924,8 @@ impl Searcher {
 ///
 /// This is used by the searcher to determine if a transcoder is necessary.
 /// Otherwise, it is advantageous to search the slice directly.
-fn slice_has_utf16_bom(slice: &[u8]) -> bool {
+fn slice_has_utf16_bom(slice: &BStr) -> bool {
-    let enc = match encoding_rs::Encoding::for_bom(slice) {
+    let enc = match encoding_rs::Encoding::for_bom(slice.as_bytes()) {
        None => return false,
        Some((enc, _)) => enc,
    };
--- a/grep-searcher/src/testutil.rs
+++ b/grep-searcher/src/testutil.rs
@ -1,10 +1,10 @@
 use std::io::{self, Write};
 use std::str;
 use bstr::B;
 use grep_matcher::{
    LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError,
 };
 use memchr::memchr;
 use regex::bytes::{Regex, RegexBuilder};
 use searcher::{BinaryDetection, Searcher, SearcherBuilder};
@ -94,8 +94,8 @@ impl Matcher for RegexMatcher {
            }
            // Make it interesting and return the last byte in the current
            // line.
-            let i = memchr(self.line_term.unwrap().as_byte(), haystack)
+            let i = B(haystack)
-                .map(|i| i)
+                .find_byte(self.line_term.unwrap().as_byte())
                .unwrap_or(haystack.len() - 1);
            Ok(Some(LineMatchKind::Candidate(i)))
        } else {