mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-25 17:21:57 -07:00
Compare commits
1 Commits
grep-print
...
ag/bstr-mi
Author | SHA1 | Date | |
---|---|---|---|
|
4b88e08f41 |
11
Cargo.lock
generated
11
Cargo.lock
generated
@@ -34,6 +34,13 @@ name = "bitflags"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytecount"
|
||||
version = "0.5.0"
|
||||
@@ -180,7 +187,7 @@ dependencies = [
|
||||
name = "grep-matcher"
|
||||
version = "0.1.1"
|
||||
dependencies = [
|
||||
"memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bstr 0.0.1",
|
||||
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
@@ -222,13 +229,13 @@ dependencies = [
|
||||
name = "grep-searcher"
|
||||
version = "0.1.1"
|
||||
dependencies = [
|
||||
"bstr 0.0.1",
|
||||
"bytecount 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep-matcher 0.1.1",
|
||||
"grep-regex 0.1.1",
|
||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
@@ -13,8 +13,11 @@ keywords = ["regex", "pattern", "trait"]
|
||||
license = "Unlicense/MIT"
|
||||
autotests = false
|
||||
|
||||
[dependencies]
|
||||
memchr = "2.1"
|
||||
[dependencies.bstr]
|
||||
version = "*"
|
||||
path = "/home/andrew/rust/bstr"
|
||||
default-features = false
|
||||
features = ["std"]
|
||||
|
||||
[dev-dependencies]
|
||||
regex = "1.1"
|
||||
|
@@ -1,6 +1,6 @@
|
||||
use std::str;
|
||||
|
||||
use memchr::memchr;
|
||||
use bstr::B;
|
||||
|
||||
/// Interpolate capture references in `replacement` and write the interpolation
|
||||
/// result to `dst`. References in `replacement` take the form of $N or $name,
|
||||
@@ -22,7 +22,7 @@ pub fn interpolate<A, N>(
|
||||
N: FnMut(&str) -> Option<usize>
|
||||
{
|
||||
while !replacement.is_empty() {
|
||||
match memchr(b'$', replacement) {
|
||||
match B(replacement).find_byte(b'$') {
|
||||
None => break,
|
||||
Some(i) => {
|
||||
dst.extend(&replacement[..i]);
|
||||
|
@@ -38,13 +38,15 @@ implementations.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate memchr;
|
||||
extern crate bstr;
|
||||
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::ops;
|
||||
use std::u64;
|
||||
|
||||
use bstr::BStr;
|
||||
|
||||
use interpolate::interpolate;
|
||||
|
||||
mod interpolate;
|
||||
@@ -180,6 +182,22 @@ impl ops::IndexMut<Match> for [u8] {
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Index<Match> for BStr {
|
||||
type Output = BStr;
|
||||
|
||||
#[inline]
|
||||
fn index(&self, index: Match) -> &BStr {
|
||||
&self[index.start..index.end]
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::IndexMut<Match> for BStr {
|
||||
#[inline]
|
||||
fn index_mut(&mut self, index: Match) -> &mut BStr {
|
||||
&mut self[index.start..index.end]
|
||||
}
|
||||
}
|
||||
|
||||
impl ops::Index<Match> for str {
|
||||
type Output = str;
|
||||
|
||||
|
@@ -18,9 +18,14 @@ encoding_rs = "0.8.14"
|
||||
encoding_rs_io = "0.1.3"
|
||||
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
|
||||
log = "0.4.5"
|
||||
memchr = "2.1"
|
||||
memmap = "0.7"
|
||||
|
||||
[dependencies.bstr]
|
||||
version = "*"
|
||||
path = "/home/andrew/rust/bstr"
|
||||
default-features = false
|
||||
features = ["std"]
|
||||
|
||||
[dev-dependencies]
|
||||
grep-regex = { version = "0.1.1", path = "../grep-regex" }
|
||||
regex = "1.1"
|
||||
|
@@ -99,13 +99,13 @@ searches stdin.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate bstr;
|
||||
extern crate bytecount;
|
||||
extern crate encoding_rs;
|
||||
extern crate encoding_rs_io;
|
||||
extern crate grep_matcher;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate memmap;
|
||||
#[cfg(test)]
|
||||
extern crate regex;
|
||||
|
@@ -1,8 +1,7 @@
|
||||
use std::cmp;
|
||||
use std::io;
|
||||
use std::ptr;
|
||||
|
||||
use memchr::{memchr, memrchr};
|
||||
use bstr::{BStr, BString};
|
||||
|
||||
/// The default buffer capacity that we use for the line buffer.
|
||||
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
|
||||
@@ -123,7 +122,7 @@ impl LineBufferBuilder {
|
||||
pub fn build(&self) -> LineBuffer {
|
||||
LineBuffer {
|
||||
config: self.config,
|
||||
buf: vec![0; self.config.capacity],
|
||||
buf: BString::from(vec![0; self.config.capacity]),
|
||||
pos: 0,
|
||||
last_lineterm: 0,
|
||||
end: 0,
|
||||
@@ -254,7 +253,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
|
||||
}
|
||||
|
||||
/// Return the contents of this buffer.
|
||||
pub fn buffer(&self) -> &[u8] {
|
||||
pub fn buffer(&self) -> &BStr {
|
||||
self.line_buffer.buffer()
|
||||
}
|
||||
|
||||
@@ -284,7 +283,7 @@ pub struct LineBuffer {
|
||||
/// The configuration of this buffer.
|
||||
config: Config,
|
||||
/// The primary buffer with which to hold data.
|
||||
buf: Vec<u8>,
|
||||
buf: BString,
|
||||
/// The current position of this buffer. This is always a valid sliceable
|
||||
/// index into `buf`, and its maximum value is the length of `buf`.
|
||||
pos: usize,
|
||||
@@ -339,13 +338,13 @@ impl LineBuffer {
|
||||
}
|
||||
|
||||
/// Return the contents of this buffer.
|
||||
fn buffer(&self) -> &[u8] {
|
||||
fn buffer(&self) -> &BStr {
|
||||
&self.buf[self.pos..self.last_lineterm]
|
||||
}
|
||||
|
||||
/// Return the contents of the free space beyond the end of the buffer as
|
||||
/// a mutable slice.
|
||||
fn free_buffer(&mut self) -> &mut [u8] {
|
||||
fn free_buffer(&mut self) -> &mut BStr {
|
||||
&mut self.buf[self.end..]
|
||||
}
|
||||
|
||||
@@ -396,7 +395,7 @@ impl LineBuffer {
|
||||
assert_eq!(self.pos, 0);
|
||||
loop {
|
||||
self.ensure_capacity()?;
|
||||
let readlen = rdr.read(self.free_buffer())?;
|
||||
let readlen = rdr.read(self.free_buffer().as_bytes_mut())?;
|
||||
if readlen == 0 {
|
||||
// We're only done reading for good once the caller has
|
||||
// consumed everything.
|
||||
@@ -416,7 +415,7 @@ impl LineBuffer {
|
||||
match self.config.binary {
|
||||
BinaryDetection::None => {} // nothing to do
|
||||
BinaryDetection::Quit(byte) => {
|
||||
if let Some(i) = memchr(byte, newbytes) {
|
||||
if let Some(i) = newbytes.find_byte(byte) {
|
||||
self.end = oldend + i;
|
||||
self.last_lineterm = self.end;
|
||||
self.binary_byte_offset =
|
||||
@@ -444,7 +443,7 @@ impl LineBuffer {
|
||||
}
|
||||
|
||||
// Update our `last_lineterm` positions if we read one.
|
||||
if let Some(i) = memrchr(self.config.lineterm, newbytes) {
|
||||
if let Some(i) = newbytes.rfind_byte(self.config.lineterm) {
|
||||
self.last_lineterm = oldend + i + 1;
|
||||
return Ok(true);
|
||||
}
|
||||
@@ -467,40 +466,8 @@ impl LineBuffer {
|
||||
return;
|
||||
}
|
||||
|
||||
assert!(self.pos < self.end && self.end <= self.buf.len());
|
||||
let roll_len = self.end - self.pos;
|
||||
unsafe {
|
||||
// SAFETY: A buffer contains Copy data, so there's no problem
|
||||
// moving it around. Safety also depends on our indices being
|
||||
// in bounds, which they should always be, and we enforce with
|
||||
// an assert above.
|
||||
//
|
||||
// It seems like it should be possible to do this in safe code that
|
||||
// results in the same codegen. I tried the obvious:
|
||||
//
|
||||
// for (src, dst) in (self.pos..self.end).zip(0..) {
|
||||
// self.buf[dst] = self.buf[src];
|
||||
// }
|
||||
//
|
||||
// But the above does not work, and in fact compiles down to a slow
|
||||
// byte-by-byte loop. I tried a few other minor variations, but
|
||||
// alas, better minds might prevail.
|
||||
//
|
||||
// Overall, this doesn't save us *too* much. It mostly matters when
|
||||
// the number of bytes we're copying is large, which can happen
|
||||
// if the searcher is asked to produce a lot of context. We could
|
||||
// decide this isn't worth it, but it does make an appreciable
|
||||
// impact at or around the context=30 range on my machine.
|
||||
//
|
||||
// We could also use a temporary buffer that compiles down to two
|
||||
// memcpys and is faster than the byte-at-a-time loop, but it
|
||||
// complicates our options for limiting memory allocation a bit.
|
||||
ptr::copy(
|
||||
self.buf[self.pos..].as_ptr(),
|
||||
self.buf.as_mut_ptr(),
|
||||
roll_len,
|
||||
);
|
||||
}
|
||||
self.buf.copy_within(self.pos.., 0);
|
||||
self.pos = 0;
|
||||
self.last_lineterm = roll_len;
|
||||
self.end = roll_len;
|
||||
@@ -536,14 +503,15 @@ impl LineBuffer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Replaces `src` with `replacement` in bytes.
|
||||
fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
|
||||
/// Replaces `src` with `replacement` in bytes, and return the offset of the
|
||||
/// first replacement, if one exists.
|
||||
fn replace_bytes(bytes: &mut BStr, src: u8, replacement: u8) -> Option<usize> {
|
||||
if src == replacement {
|
||||
return None;
|
||||
}
|
||||
let mut first_pos = None;
|
||||
let mut pos = 0;
|
||||
while let Some(i) = memchr(src, &bytes[pos..]).map(|i| pos + i) {
|
||||
while let Some(i) = bytes[pos..].find_byte(src).map(|i| pos + i) {
|
||||
if first_pos.is_none() {
|
||||
first_pos = Some(i);
|
||||
}
|
||||
@@ -560,6 +528,7 @@ fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::str;
|
||||
use bstr::BString;
|
||||
use super::*;
|
||||
|
||||
const SHERLOCK: &'static str = "\
|
||||
@@ -575,18 +544,14 @@ and exhibited clearly, with a label attached.\
|
||||
slice.to_string()
|
||||
}
|
||||
|
||||
fn btos(slice: &[u8]) -> &str {
|
||||
str::from_utf8(slice).unwrap()
|
||||
}
|
||||
|
||||
fn replace_str(
|
||||
slice: &str,
|
||||
src: u8,
|
||||
replacement: u8,
|
||||
) -> (String, Option<usize>) {
|
||||
let mut dst = slice.to_string().into_bytes();
|
||||
let mut dst = BString::from(slice);
|
||||
let result = replace_bytes(&mut dst, src, replacement);
|
||||
(String::from_utf8(dst).unwrap(), result)
|
||||
(dst.into_string().unwrap(), result)
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -607,7 +572,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\n");
|
||||
assert_eq!(rdr.buffer(), "homer\nlisa\n");
|
||||
assert_eq!(rdr.absolute_byte_offset(), 0);
|
||||
rdr.consume(5);
|
||||
assert_eq!(rdr.absolute_byte_offset(), 5);
|
||||
@@ -615,7 +580,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert_eq!(rdr.absolute_byte_offset(), 11);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "maggie");
|
||||
assert_eq!(rdr.buffer(), "maggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -630,7 +595,7 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
|
||||
assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -645,7 +610,7 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "\n");
|
||||
assert_eq!(rdr.buffer(), "\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -660,7 +625,7 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "\n\n");
|
||||
assert_eq!(rdr.buffer(), "\n\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -698,12 +663,12 @@ and exhibited clearly, with a label attached.\
|
||||
let mut linebuf = LineBufferBuilder::new().capacity(1).build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
let mut got = vec![];
|
||||
let mut got = BString::new();
|
||||
while rdr.fill().unwrap() {
|
||||
got.extend(rdr.buffer());
|
||||
got.push(rdr.buffer());
|
||||
rdr.consume_all();
|
||||
}
|
||||
assert_eq!(bytes, btos(&got));
|
||||
assert_eq!(bytes, got);
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||
assert_eq!(rdr.binary_byte_offset(), None);
|
||||
}
|
||||
@@ -718,11 +683,11 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\n");
|
||||
assert_eq!(rdr.buffer(), "homer\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "lisa\n");
|
||||
assert_eq!(rdr.buffer(), "lisa\n");
|
||||
rdr.consume_all();
|
||||
|
||||
// This returns an error because while we have just enough room to
|
||||
@@ -732,11 +697,11 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.fill().is_err());
|
||||
|
||||
// We can mush on though!
|
||||
assert_eq!(btos(rdr.buffer()), "m");
|
||||
assert_eq!(rdr.buffer(), "m");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "aggie");
|
||||
assert_eq!(rdr.buffer(), "aggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -752,16 +717,16 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\n");
|
||||
assert_eq!(rdr.buffer(), "homer\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "lisa\n");
|
||||
assert_eq!(rdr.buffer(), "lisa\n");
|
||||
rdr.consume_all();
|
||||
|
||||
// We have just enough space.
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "maggie");
|
||||
assert_eq!(rdr.buffer(), "maggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -777,7 +742,7 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().is_err());
|
||||
assert_eq!(btos(rdr.buffer()), "");
|
||||
assert_eq!(rdr.buffer(), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -789,7 +754,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nli\x00sa\nmaggie\n");
|
||||
assert_eq!(rdr.buffer(), "homer\nli\x00sa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -808,7 +773,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nli");
|
||||
assert_eq!(rdr.buffer(), "homer\nli");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -825,7 +790,7 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "");
|
||||
assert_eq!(rdr.buffer(), "");
|
||||
assert_eq!(rdr.absolute_byte_offset(), 0);
|
||||
assert_eq!(rdr.binary_byte_offset(), Some(0));
|
||||
}
|
||||
@@ -841,7 +806,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
|
||||
assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -860,7 +825,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie");
|
||||
assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -878,7 +843,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "\
|
||||
assert_eq!(rdr.buffer(), "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, s\
|
||||
");
|
||||
@@ -901,7 +866,7 @@ Holmeses, s\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nli\nsa\nmaggie\n");
|
||||
assert_eq!(rdr.buffer(), "homer\nli\nsa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -920,7 +885,7 @@ Holmeses, s\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "\nhomer\nlisa\nmaggie\n");
|
||||
assert_eq!(rdr.buffer(), "\nhomer\nlisa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -939,7 +904,7 @@ Holmeses, s\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
|
||||
assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -958,7 +923,7 @@ Holmeses, s\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
|
||||
assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
|
@@ -2,8 +2,8 @@
|
||||
A collection of routines for performing operations on lines.
|
||||
*/
|
||||
|
||||
use bstr::{B, BStr};
|
||||
use bytecount;
|
||||
use memchr::{memchr, memrchr};
|
||||
use grep_matcher::{LineTerminator, Match};
|
||||
|
||||
/// An iterator over lines in a particular slice of bytes.
|
||||
@@ -14,7 +14,7 @@ use grep_matcher::{LineTerminator, Match};
|
||||
/// `'b` refers to the lifetime of the underlying bytes.
|
||||
#[derive(Debug)]
|
||||
pub struct LineIter<'b> {
|
||||
bytes: &'b [u8],
|
||||
bytes: &'b BStr,
|
||||
stepper: LineStep,
|
||||
}
|
||||
|
||||
@@ -23,7 +23,7 @@ impl<'b> LineIter<'b> {
|
||||
/// are terminated by `line_term`.
|
||||
pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> {
|
||||
LineIter {
|
||||
bytes: bytes,
|
||||
bytes: B(bytes),
|
||||
stepper: LineStep::new(line_term, 0, bytes.len()),
|
||||
}
|
||||
}
|
||||
@@ -33,7 +33,7 @@ impl<'b> Iterator for LineIter<'b> {
|
||||
type Item = &'b [u8];
|
||||
|
||||
fn next(&mut self) -> Option<&'b [u8]> {
|
||||
self.stepper.next_match(self.bytes).map(|m| &self.bytes[m])
|
||||
self.stepper.next_match(self.bytes).map(|m| self.bytes[m].as_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -73,19 +73,19 @@ impl LineStep {
|
||||
/// The range returned includes the line terminator. Ranges are always
|
||||
/// non-empty.
|
||||
pub fn next(&mut self, bytes: &[u8]) -> Option<(usize, usize)> {
|
||||
self.next_impl(bytes)
|
||||
self.next_impl(B(bytes))
|
||||
}
|
||||
|
||||
/// Like next, but returns a `Match` instead of a tuple.
|
||||
#[inline(always)]
|
||||
pub(crate) fn next_match(&mut self, bytes: &[u8]) -> Option<Match> {
|
||||
pub(crate) fn next_match(&mut self, bytes: &BStr) -> Option<Match> {
|
||||
self.next_impl(bytes).map(|(s, e)| Match::new(s, e))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> {
|
||||
fn next_impl(&mut self, mut bytes: &BStr) -> Option<(usize, usize)> {
|
||||
bytes = &bytes[..self.end];
|
||||
match memchr(self.line_term, &bytes[self.pos..]) {
|
||||
match bytes[self.pos..].find_byte(self.line_term) {
|
||||
None => {
|
||||
if self.pos < bytes.len() {
|
||||
let m = (self.pos, bytes.len());
|
||||
@@ -109,15 +109,15 @@ impl LineStep {
|
||||
}
|
||||
|
||||
/// Count the number of occurrences of `line_term` in `bytes`.
|
||||
pub fn count(bytes: &[u8], line_term: u8) -> u64 {
|
||||
bytecount::count(bytes, line_term) as u64
|
||||
pub fn count(bytes: &BStr, line_term: u8) -> u64 {
|
||||
bytecount::count(bytes.as_bytes(), line_term) as u64
|
||||
}
|
||||
|
||||
/// Given a line that possibly ends with a terminator, return that line without
|
||||
/// the terminator.
|
||||
#[inline(always)]
|
||||
pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
|
||||
let line_term = line_term.as_bytes();
|
||||
pub fn without_terminator(bytes: &BStr, line_term: LineTerminator) -> &BStr {
|
||||
let line_term = BStr::new(line_term.as_bytes());
|
||||
let start = bytes.len().saturating_sub(line_term.len());
|
||||
if bytes.get(start..) == Some(line_term) {
|
||||
return &bytes[..bytes.len() - line_term.len()];
|
||||
@@ -131,18 +131,20 @@ pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
|
||||
/// Line terminators are considered part of the line they terminate.
|
||||
#[inline(always)]
|
||||
pub fn locate(
|
||||
bytes: &[u8],
|
||||
bytes: &BStr,
|
||||
line_term: u8,
|
||||
range: Match,
|
||||
) -> Match {
|
||||
let line_start = memrchr(line_term, &bytes[0..range.start()])
|
||||
let line_start = bytes[..range.start()]
|
||||
.rfind_byte(line_term)
|
||||
.map_or(0, |i| i + 1);
|
||||
let line_end =
|
||||
if range.end() > line_start && bytes[range.end() - 1] == line_term {
|
||||
range.end()
|
||||
} else {
|
||||
memchr(line_term, &bytes[range.end()..])
|
||||
.map_or(bytes.len(), |i| range.end() + i + 1)
|
||||
bytes[range.end()..]
|
||||
.find_byte(line_term)
|
||||
.map_or(bytes.len(), |i| range.end() + i + 1)
|
||||
};
|
||||
Match::new(line_start, line_end)
|
||||
}
|
||||
@@ -155,7 +157,7 @@ pub fn locate(
|
||||
///
|
||||
/// If `bytes` ends with a line terminator, then the terminator itself is
|
||||
/// considered part of the last line.
|
||||
pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
|
||||
pub fn preceding(bytes: &BStr, line_term: u8, count: usize) -> usize {
|
||||
preceding_by_pos(bytes, bytes.len(), line_term, count)
|
||||
}
|
||||
|
||||
@@ -169,7 +171,7 @@ pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
|
||||
/// and `pos = 7`, `preceding(bytes, pos, b'\n', 0)` returns `4` (as does `pos
|
||||
/// = 8`) and `preceding(bytes, pos, `b'\n', 1)` returns `0`.
|
||||
fn preceding_by_pos(
|
||||
bytes: &[u8],
|
||||
bytes: &BStr,
|
||||
mut pos: usize,
|
||||
line_term: u8,
|
||||
mut count: usize,
|
||||
@@ -180,7 +182,7 @@ fn preceding_by_pos(
|
||||
pos -= 1;
|
||||
}
|
||||
loop {
|
||||
match memrchr(line_term, &bytes[..pos]) {
|
||||
match bytes[..pos].rfind_byte(line_term) {
|
||||
None => {
|
||||
return 0;
|
||||
}
|
||||
@@ -201,7 +203,10 @@ fn preceding_by_pos(
|
||||
mod tests {
|
||||
use std::ops::Range;
|
||||
use std::str;
|
||||
|
||||
use bstr::B;
|
||||
use grep_matcher::Match;
|
||||
|
||||
use super::*;
|
||||
|
||||
const SHERLOCK: &'static str = "\
|
||||
@@ -220,7 +225,7 @@ and exhibited clearly, with a label attached.\
|
||||
fn lines(text: &str) -> Vec<&str> {
|
||||
let mut results = vec![];
|
||||
let mut it = LineStep::new(b'\n', 0, text.len());
|
||||
while let Some(m) = it.next_match(text.as_bytes()) {
|
||||
while let Some(m) = it.next_match(B(text)) {
|
||||
results.push(&text[m]);
|
||||
}
|
||||
results
|
||||
@@ -229,26 +234,26 @@ and exhibited clearly, with a label attached.\
|
||||
fn line_ranges(text: &str) -> Vec<Range<usize>> {
|
||||
let mut results = vec![];
|
||||
let mut it = LineStep::new(b'\n', 0, text.len());
|
||||
while let Some(m) = it.next_match(text.as_bytes()) {
|
||||
while let Some(m) = it.next_match(B(text)) {
|
||||
results.push(m.start()..m.end());
|
||||
}
|
||||
results
|
||||
}
|
||||
|
||||
fn prev(text: &str, pos: usize, count: usize) -> usize {
|
||||
preceding_by_pos(text.as_bytes(), pos, b'\n', count)
|
||||
preceding_by_pos(B(text), pos, b'\n', count)
|
||||
}
|
||||
|
||||
fn loc(text: &str, start: usize, end: usize) -> Match {
|
||||
locate(text.as_bytes(), b'\n', Match::new(start, end))
|
||||
locate(B(text), b'\n', Match::new(start, end))
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn line_count() {
|
||||
assert_eq!(0, count(b"", b'\n'));
|
||||
assert_eq!(1, count(b"\n", b'\n'));
|
||||
assert_eq!(2, count(b"\n\n", b'\n'));
|
||||
assert_eq!(2, count(b"a\nb\nc", b'\n'));
|
||||
assert_eq!(0, count(B(""), b'\n'));
|
||||
assert_eq!(1, count(B("\n"), b'\n'));
|
||||
assert_eq!(2, count(B("\n\n"), b'\n'));
|
||||
assert_eq!(2, count(B("a\nb\nc"), b'\n'));
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -331,7 +336,7 @@ and exhibited clearly, with a label attached.\
|
||||
#[test]
|
||||
fn preceding_lines_doc() {
|
||||
// These are the examples mentions in the documentation of `preceding`.
|
||||
let bytes = b"abc\nxyz\n";
|
||||
let bytes = B("abc\nxyz\n");
|
||||
assert_eq!(4, preceding_by_pos(bytes, 7, b'\n', 0));
|
||||
assert_eq!(4, preceding_by_pos(bytes, 8, b'\n', 0));
|
||||
assert_eq!(0, preceding_by_pos(bytes, 7, b'\n', 1));
|
||||
|
@@ -1,6 +1,6 @@
|
||||
use std::cmp;
|
||||
|
||||
use memchr::memchr;
|
||||
use bstr::BStr;
|
||||
|
||||
use grep_matcher::{LineMatchKind, Matcher};
|
||||
use lines::{self, LineStep};
|
||||
@@ -84,7 +84,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
|
||||
pub fn matched(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
buf: &BStr,
|
||||
range: &Range,
|
||||
) -> Result<bool, S::Error> {
|
||||
self.sink_matched(buf, range)
|
||||
@@ -107,7 +107,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
})
|
||||
}
|
||||
|
||||
pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
||||
pub fn match_by_line(&mut self, buf: &BStr) -> Result<bool, S::Error> {
|
||||
if self.is_line_by_line_fast() {
|
||||
self.match_by_line_fast(buf)
|
||||
} else {
|
||||
@@ -115,7 +115,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn roll(&mut self, buf: &[u8]) -> usize {
|
||||
pub fn roll(&mut self, buf: &BStr) -> usize {
|
||||
let consumed =
|
||||
if self.config.max_context() == 0 {
|
||||
buf.len()
|
||||
@@ -141,7 +141,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
consumed
|
||||
}
|
||||
|
||||
pub fn detect_binary(&mut self, buf: &[u8], range: &Range) -> bool {
|
||||
pub fn detect_binary(&mut self, buf: &BStr, range: &Range) -> bool {
|
||||
if self.binary_byte_offset.is_some() {
|
||||
return true;
|
||||
}
|
||||
@@ -149,7 +149,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
BinaryDetection::Quit(b) => b,
|
||||
_ => return false,
|
||||
};
|
||||
if let Some(i) = memchr(binary_byte, &buf[*range]) {
|
||||
if let Some(i) = buf[*range].find_byte(binary_byte) {
|
||||
self.binary_byte_offset = Some(range.start() + i);
|
||||
true
|
||||
} else {
|
||||
@@ -159,7 +159,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
|
||||
pub fn before_context_by_line(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
buf: &BStr,
|
||||
upto: usize,
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.config.before_context == 0 {
|
||||
@@ -194,7 +194,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
|
||||
pub fn after_context_by_line(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
buf: &BStr,
|
||||
upto: usize,
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.after_context_left == 0 {
|
||||
@@ -219,7 +219,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
|
||||
pub fn other_context_by_line(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
buf: &BStr,
|
||||
upto: usize,
|
||||
) -> Result<bool, S::Error> {
|
||||
let range = Range::new(self.last_line_visited, upto);
|
||||
@@ -236,7 +236,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn match_by_line_slow(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
||||
fn match_by_line_slow(&mut self, buf: &BStr) -> Result<bool, S::Error> {
|
||||
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
|
||||
|
||||
let range = Range::new(self.pos(), buf.len());
|
||||
@@ -255,7 +255,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
&buf[line],
|
||||
self.config.line_term,
|
||||
);
|
||||
match self.matcher.shortest_match(slice) {
|
||||
match self.matcher.shortest_match(slice.as_bytes()) {
|
||||
Err(err) => return Err(S::Error::error_message(err)),
|
||||
Ok(result) => result.is_some(),
|
||||
}
|
||||
@@ -281,7 +281,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn match_by_line_fast(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
||||
fn match_by_line_fast(&mut self, buf: &BStr) -> Result<bool, S::Error> {
|
||||
debug_assert!(!self.config.passthru);
|
||||
|
||||
while !buf[self.pos()..].is_empty() {
|
||||
@@ -316,7 +316,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
#[inline(always)]
|
||||
fn match_by_line_fast_invert(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
buf: &BStr,
|
||||
) -> Result<bool, S::Error> {
|
||||
assert!(self.config.invert_match);
|
||||
|
||||
@@ -357,14 +357,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
#[inline(always)]
|
||||
fn find_by_line_fast(
|
||||
&self,
|
||||
buf: &[u8],
|
||||
buf: &BStr,
|
||||
) -> Result<Option<Range>, S::Error> {
|
||||
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
|
||||
debug_assert!(self.is_line_by_line_fast());
|
||||
|
||||
let mut pos = self.pos();
|
||||
while !buf[pos..].is_empty() {
|
||||
match self.matcher.find_candidate_line(&buf[pos..]) {
|
||||
match self.matcher.find_candidate_line(buf[pos..].as_bytes()) {
|
||||
Err(err) => return Err(S::Error::error_message(err)),
|
||||
Ok(None) => return Ok(None),
|
||||
Ok(Some(LineMatchKind::Confirmed(i))) => {
|
||||
@@ -396,7 +396,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
&buf[line],
|
||||
self.config.line_term,
|
||||
);
|
||||
match self.matcher.is_match(slice) {
|
||||
match self.matcher.is_match(slice.as_bytes()) {
|
||||
Err(err) => return Err(S::Error::error_message(err)),
|
||||
Ok(true) => return Ok(Some(line)),
|
||||
Ok(false) => {
|
||||
@@ -413,7 +413,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
#[inline(always)]
|
||||
fn sink_matched(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
buf: &BStr,
|
||||
range: &Range,
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.binary && self.detect_binary(buf, range) {
|
||||
@@ -438,7 +438,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
&self.searcher,
|
||||
&SinkMatch {
|
||||
line_term: self.config.line_term,
|
||||
bytes: linebuf,
|
||||
bytes: linebuf.as_bytes(),
|
||||
absolute_byte_offset: offset,
|
||||
line_number: self.line_number,
|
||||
},
|
||||
@@ -454,7 +454,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
|
||||
fn sink_before_context(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
buf: &BStr,
|
||||
range: &Range,
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.binary && self.detect_binary(buf, range) {
|
||||
@@ -466,7 +466,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
&self.searcher,
|
||||
&SinkContext {
|
||||
line_term: self.config.line_term,
|
||||
bytes: &buf[*range],
|
||||
bytes: buf[*range].as_bytes(),
|
||||
kind: SinkContextKind::Before,
|
||||
absolute_byte_offset: offset,
|
||||
line_number: self.line_number,
|
||||
@@ -482,7 +482,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
|
||||
fn sink_after_context(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
buf: &BStr,
|
||||
range: &Range,
|
||||
) -> Result<bool, S::Error> {
|
||||
assert!(self.after_context_left >= 1);
|
||||
@@ -496,7 +496,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
&self.searcher,
|
||||
&SinkContext {
|
||||
line_term: self.config.line_term,
|
||||
bytes: &buf[*range],
|
||||
bytes: buf[*range].as_bytes(),
|
||||
kind: SinkContextKind::After,
|
||||
absolute_byte_offset: offset,
|
||||
line_number: self.line_number,
|
||||
@@ -513,7 +513,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
|
||||
fn sink_other_context(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
buf: &BStr,
|
||||
range: &Range,
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.binary && self.detect_binary(buf, range) {
|
||||
@@ -525,7 +525,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
&self.searcher,
|
||||
&SinkContext {
|
||||
line_term: self.config.line_term,
|
||||
bytes: &buf[*range],
|
||||
bytes: buf[*range].as_bytes(),
|
||||
kind: SinkContextKind::Other,
|
||||
absolute_byte_offset: offset,
|
||||
line_number: self.line_number,
|
||||
@@ -555,7 +555,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
}
|
||||
}
|
||||
|
||||
fn count_lines(&mut self, buf: &[u8], upto: usize) {
|
||||
fn count_lines(&mut self, buf: &BStr, upto: usize) {
|
||||
if let Some(ref mut line_number) = self.line_number {
|
||||
if self.last_line_counted >= upto {
|
||||
return;
|
||||
|
@@ -1,7 +1,9 @@
|
||||
use std::cmp;
|
||||
use std::io;
|
||||
|
||||
use bstr::BStr;
|
||||
use grep_matcher::Matcher;
|
||||
|
||||
use lines::{self, LineStep};
|
||||
use line_buffer::{DEFAULT_BUFFER_CAPACITY, LineBufferReader};
|
||||
use sink::{Sink, SinkError};
|
||||
@@ -77,14 +79,14 @@ where M: Matcher,
|
||||
pub struct SliceByLine<'s, M: 's, S> {
|
||||
config: &'s Config,
|
||||
core: Core<'s, M, S>,
|
||||
slice: &'s [u8],
|
||||
slice: &'s BStr,
|
||||
}
|
||||
|
||||
impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
|
||||
pub fn new(
|
||||
searcher: &'s Searcher,
|
||||
matcher: M,
|
||||
slice: &'s [u8],
|
||||
slice: &'s BStr,
|
||||
write_to: S,
|
||||
) -> SliceByLine<'s, M, S> {
|
||||
debug_assert!(!searcher.multi_line_with_matcher(&matcher));
|
||||
@@ -127,7 +129,7 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
|
||||
pub struct MultiLine<'s, M: 's, S> {
|
||||
config: &'s Config,
|
||||
core: Core<'s, M, S>,
|
||||
slice: &'s [u8],
|
||||
slice: &'s BStr,
|
||||
last_match: Option<Range>,
|
||||
}
|
||||
|
||||
@@ -135,7 +137,7 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
|
||||
pub fn new(
|
||||
searcher: &'s Searcher,
|
||||
matcher: M,
|
||||
slice: &'s [u8],
|
||||
slice: &'s BStr,
|
||||
write_to: S,
|
||||
) -> MultiLine<'s, M, S> {
|
||||
debug_assert!(searcher.multi_line_with_matcher(&matcher));
|
||||
@@ -306,7 +308,8 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
|
||||
}
|
||||
|
||||
fn find(&mut self) -> Result<Option<Range>, S::Error> {
|
||||
match self.core.matcher().find(&self.slice[self.core.pos()..]) {
|
||||
let haystack = &self.slice[self.core.pos()..];
|
||||
match self.core.matcher().find(haystack.as_bytes()) {
|
||||
Err(err) => Err(S::Error::error_message(err)),
|
||||
Ok(None) => Ok(None),
|
||||
Ok(Some(m)) => Ok(Some(m.offset(self.core.pos()))),
|
||||
|
@@ -5,6 +5,7 @@ use std::fs::File;
|
||||
use std::io::{self, Read};
|
||||
use std::path::Path;
|
||||
|
||||
use bstr::{B, BStr, BString};
|
||||
use encoding_rs;
|
||||
use encoding_rs_io::DecodeReaderBytesBuilder;
|
||||
use grep_matcher::{LineTerminator, Match, Matcher};
|
||||
@@ -311,9 +312,9 @@ impl SearcherBuilder {
|
||||
Searcher {
|
||||
config: config,
|
||||
decode_builder: decode_builder,
|
||||
decode_buffer: RefCell::new(vec![0; 8 * (1<<10)]),
|
||||
decode_buffer: RefCell::new(BString::from(vec![0; 8 * (1<<10)])),
|
||||
line_buffer: RefCell::new(self.config.line_buffer()),
|
||||
multi_line_buffer: RefCell::new(vec![]),
|
||||
multi_line_buffer: RefCell::new(BString::new()),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -543,7 +544,7 @@ pub struct Searcher {
|
||||
/// through the underlying bytes with no additional overhead.
|
||||
decode_builder: DecodeReaderBytesBuilder,
|
||||
/// A buffer that is used for transcoding scratch space.
|
||||
decode_buffer: RefCell<Vec<u8>>,
|
||||
decode_buffer: RefCell<BString>,
|
||||
/// A line buffer for use in line oriented searching.
|
||||
///
|
||||
/// We wrap it in a RefCell to permit lending out borrows of `Searcher`
|
||||
@@ -555,7 +556,7 @@ pub struct Searcher {
|
||||
/// multi line search. In particular, multi line searches cannot be
|
||||
/// performed incrementally, and need the entire haystack in memory at
|
||||
/// once.
|
||||
multi_line_buffer: RefCell<Vec<u8>>,
|
||||
multi_line_buffer: RefCell<BString>,
|
||||
}
|
||||
|
||||
impl Searcher {
|
||||
@@ -666,7 +667,7 @@ impl Searcher {
|
||||
|
||||
let mut decode_buffer = self.decode_buffer.borrow_mut();
|
||||
let read_from = self.decode_builder
|
||||
.build_with_buffer(read_from, &mut *decode_buffer)
|
||||
.build_with_buffer(read_from, decode_buffer.as_mut_vec())
|
||||
.map_err(S::Error::error_io)?;
|
||||
|
||||
if self.multi_line_with_matcher(&matcher) {
|
||||
@@ -698,12 +699,13 @@ impl Searcher {
|
||||
where M: Matcher,
|
||||
S: Sink,
|
||||
{
|
||||
let slice = B(slice);
|
||||
self.check_config(&matcher).map_err(S::Error::error_config)?;
|
||||
|
||||
// We can search the slice directly, unless we need to do transcoding.
|
||||
if self.slice_needs_transcoding(slice) {
|
||||
trace!("slice reader: needs transcoding, using generic reader");
|
||||
return self.search_reader(matcher, slice, write_to);
|
||||
return self.search_reader(matcher, slice.as_bytes(), write_to);
|
||||
}
|
||||
if self.multi_line_with_matcher(&matcher) {
|
||||
trace!("slice reader: searching via multiline strategy");
|
||||
@@ -736,7 +738,7 @@ impl Searcher {
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given slice needs to be transcoded.
|
||||
fn slice_needs_transcoding(&self, slice: &[u8]) -> bool {
|
||||
fn slice_needs_transcoding(&self, slice: &BStr) -> bool {
|
||||
self.config.encoding.is_some() || slice_has_utf16_bom(slice)
|
||||
}
|
||||
}
|
||||
@@ -851,7 +853,9 @@ impl Searcher {
|
||||
.map(|m| m.len() as usize + 1)
|
||||
.unwrap_or(0);
|
||||
buf.reserve(cap);
|
||||
read_from.read_to_end(&mut *buf).map_err(S::Error::error_io)?;
|
||||
read_from
|
||||
.read_to_end(buf.as_mut_vec())
|
||||
.map_err(S::Error::error_io)?;
|
||||
return Ok(());
|
||||
}
|
||||
self.fill_multi_line_buffer_from_reader::<_, S>(read_from)
|
||||
@@ -868,6 +872,7 @@ impl Searcher {
|
||||
assert!(self.config.multi_line);
|
||||
|
||||
let mut buf = self.multi_line_buffer.borrow_mut();
|
||||
let buf = buf.as_mut_vec();
|
||||
buf.clear();
|
||||
|
||||
// If we don't have a heap limit, then we can defer to std's
|
||||
@@ -919,8 +924,8 @@ impl Searcher {
|
||||
///
|
||||
/// This is used by the searcher to determine if a transcoder is necessary.
|
||||
/// Otherwise, it is advantageous to search the slice directly.
|
||||
fn slice_has_utf16_bom(slice: &[u8]) -> bool {
|
||||
let enc = match encoding_rs::Encoding::for_bom(slice) {
|
||||
fn slice_has_utf16_bom(slice: &BStr) -> bool {
|
||||
let enc = match encoding_rs::Encoding::for_bom(slice.as_bytes()) {
|
||||
None => return false,
|
||||
Some((enc, _)) => enc,
|
||||
};
|
||||
|
@@ -1,10 +1,10 @@
|
||||
use std::io::{self, Write};
|
||||
use std::str;
|
||||
|
||||
use bstr::B;
|
||||
use grep_matcher::{
|
||||
LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError,
|
||||
};
|
||||
use memchr::memchr;
|
||||
use regex::bytes::{Regex, RegexBuilder};
|
||||
|
||||
use searcher::{BinaryDetection, Searcher, SearcherBuilder};
|
||||
@@ -94,8 +94,8 @@ impl Matcher for RegexMatcher {
|
||||
}
|
||||
// Make it interesting and return the last byte in the current
|
||||
// line.
|
||||
let i = memchr(self.line_term.unwrap().as_byte(), haystack)
|
||||
.map(|i| i)
|
||||
let i = B(haystack)
|
||||
.find_byte(self.line_term.unwrap().as_byte())
|
||||
.unwrap_or(haystack.len() - 1);
|
||||
Ok(Some(LineMatchKind::Candidate(i)))
|
||||
} else {
|
||||
|
Reference in New Issue
Block a user