mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-26 09:42:00 -07:00
Compare commits
1 Commits
ignore-0.4
...
ag/bstr-mi
Author | SHA1 | Date | |
---|---|---|---|
|
4b88e08f41 |
11
Cargo.lock
generated
11
Cargo.lock
generated
@@ -34,6 +34,13 @@ name = "bitflags"
|
|||||||
version = "1.0.4"
|
version = "1.0.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "bstr"
|
||||||
|
version = "0.0.1"
|
||||||
|
dependencies = [
|
||||||
|
"memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bytecount"
|
name = "bytecount"
|
||||||
version = "0.5.0"
|
version = "0.5.0"
|
||||||
@@ -180,7 +187,7 @@ dependencies = [
|
|||||||
name = "grep-matcher"
|
name = "grep-matcher"
|
||||||
version = "0.1.1"
|
version = "0.1.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
"bstr 0.0.1",
|
||||||
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -222,13 +229,13 @@ dependencies = [
|
|||||||
name = "grep-searcher"
|
name = "grep-searcher"
|
||||||
version = "0.1.1"
|
version = "0.1.1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"bstr 0.0.1",
|
||||||
"bytecount 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"bytecount 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
"encoding_rs 0.8.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
"encoding_rs_io 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"grep-matcher 0.1.1",
|
"grep-matcher 0.1.1",
|
||||||
"grep-regex 0.1.1",
|
"grep-regex 0.1.1",
|
||||||
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
"log 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"memchr 2.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
||||||
"memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
@@ -13,8 +13,11 @@ keywords = ["regex", "pattern", "trait"]
|
|||||||
license = "Unlicense/MIT"
|
license = "Unlicense/MIT"
|
||||||
autotests = false
|
autotests = false
|
||||||
|
|
||||||
[dependencies]
|
[dependencies.bstr]
|
||||||
memchr = "2.1"
|
version = "*"
|
||||||
|
path = "/home/andrew/rust/bstr"
|
||||||
|
default-features = false
|
||||||
|
features = ["std"]
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
regex = "1.1"
|
regex = "1.1"
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
use memchr::memchr;
|
use bstr::B;
|
||||||
|
|
||||||
/// Interpolate capture references in `replacement` and write the interpolation
|
/// Interpolate capture references in `replacement` and write the interpolation
|
||||||
/// result to `dst`. References in `replacement` take the form of $N or $name,
|
/// result to `dst`. References in `replacement` take the form of $N or $name,
|
||||||
@@ -22,7 +22,7 @@ pub fn interpolate<A, N>(
|
|||||||
N: FnMut(&str) -> Option<usize>
|
N: FnMut(&str) -> Option<usize>
|
||||||
{
|
{
|
||||||
while !replacement.is_empty() {
|
while !replacement.is_empty() {
|
||||||
match memchr(b'$', replacement) {
|
match B(replacement).find_byte(b'$') {
|
||||||
None => break,
|
None => break,
|
||||||
Some(i) => {
|
Some(i) => {
|
||||||
dst.extend(&replacement[..i]);
|
dst.extend(&replacement[..i]);
|
||||||
|
@@ -38,13 +38,15 @@ implementations.
|
|||||||
|
|
||||||
#![deny(missing_docs)]
|
#![deny(missing_docs)]
|
||||||
|
|
||||||
extern crate memchr;
|
extern crate bstr;
|
||||||
|
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::ops;
|
use std::ops;
|
||||||
use std::u64;
|
use std::u64;
|
||||||
|
|
||||||
|
use bstr::BStr;
|
||||||
|
|
||||||
use interpolate::interpolate;
|
use interpolate::interpolate;
|
||||||
|
|
||||||
mod interpolate;
|
mod interpolate;
|
||||||
@@ -180,6 +182,22 @@ impl ops::IndexMut<Match> for [u8] {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl ops::Index<Match> for BStr {
|
||||||
|
type Output = BStr;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn index(&self, index: Match) -> &BStr {
|
||||||
|
&self[index.start..index.end]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ops::IndexMut<Match> for BStr {
|
||||||
|
#[inline]
|
||||||
|
fn index_mut(&mut self, index: Match) -> &mut BStr {
|
||||||
|
&mut self[index.start..index.end]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl ops::Index<Match> for str {
|
impl ops::Index<Match> for str {
|
||||||
type Output = str;
|
type Output = str;
|
||||||
|
|
||||||
|
@@ -18,9 +18,14 @@ encoding_rs = "0.8.14"
|
|||||||
encoding_rs_io = "0.1.3"
|
encoding_rs_io = "0.1.3"
|
||||||
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
|
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
|
||||||
log = "0.4.5"
|
log = "0.4.5"
|
||||||
memchr = "2.1"
|
|
||||||
memmap = "0.7"
|
memmap = "0.7"
|
||||||
|
|
||||||
|
[dependencies.bstr]
|
||||||
|
version = "*"
|
||||||
|
path = "/home/andrew/rust/bstr"
|
||||||
|
default-features = false
|
||||||
|
features = ["std"]
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
grep-regex = { version = "0.1.1", path = "../grep-regex" }
|
grep-regex = { version = "0.1.1", path = "../grep-regex" }
|
||||||
regex = "1.1"
|
regex = "1.1"
|
||||||
|
@@ -99,13 +99,13 @@ searches stdin.
|
|||||||
|
|
||||||
#![deny(missing_docs)]
|
#![deny(missing_docs)]
|
||||||
|
|
||||||
|
extern crate bstr;
|
||||||
extern crate bytecount;
|
extern crate bytecount;
|
||||||
extern crate encoding_rs;
|
extern crate encoding_rs;
|
||||||
extern crate encoding_rs_io;
|
extern crate encoding_rs_io;
|
||||||
extern crate grep_matcher;
|
extern crate grep_matcher;
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate log;
|
extern crate log;
|
||||||
extern crate memchr;
|
|
||||||
extern crate memmap;
|
extern crate memmap;
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
extern crate regex;
|
extern crate regex;
|
||||||
|
@@ -1,8 +1,7 @@
|
|||||||
use std::cmp;
|
use std::cmp;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::ptr;
|
|
||||||
|
|
||||||
use memchr::{memchr, memrchr};
|
use bstr::{BStr, BString};
|
||||||
|
|
||||||
/// The default buffer capacity that we use for the line buffer.
|
/// The default buffer capacity that we use for the line buffer.
|
||||||
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
|
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
|
||||||
@@ -123,7 +122,7 @@ impl LineBufferBuilder {
|
|||||||
pub fn build(&self) -> LineBuffer {
|
pub fn build(&self) -> LineBuffer {
|
||||||
LineBuffer {
|
LineBuffer {
|
||||||
config: self.config,
|
config: self.config,
|
||||||
buf: vec![0; self.config.capacity],
|
buf: BString::from(vec![0; self.config.capacity]),
|
||||||
pos: 0,
|
pos: 0,
|
||||||
last_lineterm: 0,
|
last_lineterm: 0,
|
||||||
end: 0,
|
end: 0,
|
||||||
@@ -254,7 +253,7 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Return the contents of this buffer.
|
/// Return the contents of this buffer.
|
||||||
pub fn buffer(&self) -> &[u8] {
|
pub fn buffer(&self) -> &BStr {
|
||||||
self.line_buffer.buffer()
|
self.line_buffer.buffer()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -284,7 +283,7 @@ pub struct LineBuffer {
|
|||||||
/// The configuration of this buffer.
|
/// The configuration of this buffer.
|
||||||
config: Config,
|
config: Config,
|
||||||
/// The primary buffer with which to hold data.
|
/// The primary buffer with which to hold data.
|
||||||
buf: Vec<u8>,
|
buf: BString,
|
||||||
/// The current position of this buffer. This is always a valid sliceable
|
/// The current position of this buffer. This is always a valid sliceable
|
||||||
/// index into `buf`, and its maximum value is the length of `buf`.
|
/// index into `buf`, and its maximum value is the length of `buf`.
|
||||||
pos: usize,
|
pos: usize,
|
||||||
@@ -339,13 +338,13 @@ impl LineBuffer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Return the contents of this buffer.
|
/// Return the contents of this buffer.
|
||||||
fn buffer(&self) -> &[u8] {
|
fn buffer(&self) -> &BStr {
|
||||||
&self.buf[self.pos..self.last_lineterm]
|
&self.buf[self.pos..self.last_lineterm]
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the contents of the free space beyond the end of the buffer as
|
/// Return the contents of the free space beyond the end of the buffer as
|
||||||
/// a mutable slice.
|
/// a mutable slice.
|
||||||
fn free_buffer(&mut self) -> &mut [u8] {
|
fn free_buffer(&mut self) -> &mut BStr {
|
||||||
&mut self.buf[self.end..]
|
&mut self.buf[self.end..]
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -396,7 +395,7 @@ impl LineBuffer {
|
|||||||
assert_eq!(self.pos, 0);
|
assert_eq!(self.pos, 0);
|
||||||
loop {
|
loop {
|
||||||
self.ensure_capacity()?;
|
self.ensure_capacity()?;
|
||||||
let readlen = rdr.read(self.free_buffer())?;
|
let readlen = rdr.read(self.free_buffer().as_bytes_mut())?;
|
||||||
if readlen == 0 {
|
if readlen == 0 {
|
||||||
// We're only done reading for good once the caller has
|
// We're only done reading for good once the caller has
|
||||||
// consumed everything.
|
// consumed everything.
|
||||||
@@ -416,7 +415,7 @@ impl LineBuffer {
|
|||||||
match self.config.binary {
|
match self.config.binary {
|
||||||
BinaryDetection::None => {} // nothing to do
|
BinaryDetection::None => {} // nothing to do
|
||||||
BinaryDetection::Quit(byte) => {
|
BinaryDetection::Quit(byte) => {
|
||||||
if let Some(i) = memchr(byte, newbytes) {
|
if let Some(i) = newbytes.find_byte(byte) {
|
||||||
self.end = oldend + i;
|
self.end = oldend + i;
|
||||||
self.last_lineterm = self.end;
|
self.last_lineterm = self.end;
|
||||||
self.binary_byte_offset =
|
self.binary_byte_offset =
|
||||||
@@ -444,7 +443,7 @@ impl LineBuffer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Update our `last_lineterm` positions if we read one.
|
// Update our `last_lineterm` positions if we read one.
|
||||||
if let Some(i) = memrchr(self.config.lineterm, newbytes) {
|
if let Some(i) = newbytes.rfind_byte(self.config.lineterm) {
|
||||||
self.last_lineterm = oldend + i + 1;
|
self.last_lineterm = oldend + i + 1;
|
||||||
return Ok(true);
|
return Ok(true);
|
||||||
}
|
}
|
||||||
@@ -467,40 +466,8 @@ impl LineBuffer {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert!(self.pos < self.end && self.end <= self.buf.len());
|
|
||||||
let roll_len = self.end - self.pos;
|
let roll_len = self.end - self.pos;
|
||||||
unsafe {
|
self.buf.copy_within(self.pos.., 0);
|
||||||
// SAFETY: A buffer contains Copy data, so there's no problem
|
|
||||||
// moving it around. Safety also depends on our indices being
|
|
||||||
// in bounds, which they should always be, and we enforce with
|
|
||||||
// an assert above.
|
|
||||||
//
|
|
||||||
// It seems like it should be possible to do this in safe code that
|
|
||||||
// results in the same codegen. I tried the obvious:
|
|
||||||
//
|
|
||||||
// for (src, dst) in (self.pos..self.end).zip(0..) {
|
|
||||||
// self.buf[dst] = self.buf[src];
|
|
||||||
// }
|
|
||||||
//
|
|
||||||
// But the above does not work, and in fact compiles down to a slow
|
|
||||||
// byte-by-byte loop. I tried a few other minor variations, but
|
|
||||||
// alas, better minds might prevail.
|
|
||||||
//
|
|
||||||
// Overall, this doesn't save us *too* much. It mostly matters when
|
|
||||||
// the number of bytes we're copying is large, which can happen
|
|
||||||
// if the searcher is asked to produce a lot of context. We could
|
|
||||||
// decide this isn't worth it, but it does make an appreciable
|
|
||||||
// impact at or around the context=30 range on my machine.
|
|
||||||
//
|
|
||||||
// We could also use a temporary buffer that compiles down to two
|
|
||||||
// memcpys and is faster than the byte-at-a-time loop, but it
|
|
||||||
// complicates our options for limiting memory allocation a bit.
|
|
||||||
ptr::copy(
|
|
||||||
self.buf[self.pos..].as_ptr(),
|
|
||||||
self.buf.as_mut_ptr(),
|
|
||||||
roll_len,
|
|
||||||
);
|
|
||||||
}
|
|
||||||
self.pos = 0;
|
self.pos = 0;
|
||||||
self.last_lineterm = roll_len;
|
self.last_lineterm = roll_len;
|
||||||
self.end = roll_len;
|
self.end = roll_len;
|
||||||
@@ -536,14 +503,15 @@ impl LineBuffer {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Replaces `src` with `replacement` in bytes.
|
/// Replaces `src` with `replacement` in bytes, and return the offset of the
|
||||||
fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
|
/// first replacement, if one exists.
|
||||||
|
fn replace_bytes(bytes: &mut BStr, src: u8, replacement: u8) -> Option<usize> {
|
||||||
if src == replacement {
|
if src == replacement {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
let mut first_pos = None;
|
let mut first_pos = None;
|
||||||
let mut pos = 0;
|
let mut pos = 0;
|
||||||
while let Some(i) = memchr(src, &bytes[pos..]).map(|i| pos + i) {
|
while let Some(i) = bytes[pos..].find_byte(src).map(|i| pos + i) {
|
||||||
if first_pos.is_none() {
|
if first_pos.is_none() {
|
||||||
first_pos = Some(i);
|
first_pos = Some(i);
|
||||||
}
|
}
|
||||||
@@ -560,6 +528,7 @@ fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::str;
|
use std::str;
|
||||||
|
use bstr::BString;
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
const SHERLOCK: &'static str = "\
|
const SHERLOCK: &'static str = "\
|
||||||
@@ -575,18 +544,14 @@ and exhibited clearly, with a label attached.\
|
|||||||
slice.to_string()
|
slice.to_string()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn btos(slice: &[u8]) -> &str {
|
|
||||||
str::from_utf8(slice).unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn replace_str(
|
fn replace_str(
|
||||||
slice: &str,
|
slice: &str,
|
||||||
src: u8,
|
src: u8,
|
||||||
replacement: u8,
|
replacement: u8,
|
||||||
) -> (String, Option<usize>) {
|
) -> (String, Option<usize>) {
|
||||||
let mut dst = slice.to_string().into_bytes();
|
let mut dst = BString::from(slice);
|
||||||
let result = replace_bytes(&mut dst, src, replacement);
|
let result = replace_bytes(&mut dst, src, replacement);
|
||||||
(String::from_utf8(dst).unwrap(), result)
|
(dst.into_string().unwrap(), result)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -607,7 +572,7 @@ and exhibited clearly, with a label attached.\
|
|||||||
assert!(rdr.buffer().is_empty());
|
assert!(rdr.buffer().is_empty());
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\n");
|
assert_eq!(rdr.buffer(), "homer\nlisa\n");
|
||||||
assert_eq!(rdr.absolute_byte_offset(), 0);
|
assert_eq!(rdr.absolute_byte_offset(), 0);
|
||||||
rdr.consume(5);
|
rdr.consume(5);
|
||||||
assert_eq!(rdr.absolute_byte_offset(), 5);
|
assert_eq!(rdr.absolute_byte_offset(), 5);
|
||||||
@@ -615,7 +580,7 @@ and exhibited clearly, with a label attached.\
|
|||||||
assert_eq!(rdr.absolute_byte_offset(), 11);
|
assert_eq!(rdr.absolute_byte_offset(), 11);
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "maggie");
|
assert_eq!(rdr.buffer(), "maggie");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(!rdr.fill().unwrap());
|
assert!(!rdr.fill().unwrap());
|
||||||
@@ -630,7 +595,7 @@ and exhibited clearly, with a label attached.\
|
|||||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
|
assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(!rdr.fill().unwrap());
|
assert!(!rdr.fill().unwrap());
|
||||||
@@ -645,7 +610,7 @@ and exhibited clearly, with a label attached.\
|
|||||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "\n");
|
assert_eq!(rdr.buffer(), "\n");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(!rdr.fill().unwrap());
|
assert!(!rdr.fill().unwrap());
|
||||||
@@ -660,7 +625,7 @@ and exhibited clearly, with a label attached.\
|
|||||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "\n\n");
|
assert_eq!(rdr.buffer(), "\n\n");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(!rdr.fill().unwrap());
|
assert!(!rdr.fill().unwrap());
|
||||||
@@ -698,12 +663,12 @@ and exhibited clearly, with a label attached.\
|
|||||||
let mut linebuf = LineBufferBuilder::new().capacity(1).build();
|
let mut linebuf = LineBufferBuilder::new().capacity(1).build();
|
||||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||||
|
|
||||||
let mut got = vec![];
|
let mut got = BString::new();
|
||||||
while rdr.fill().unwrap() {
|
while rdr.fill().unwrap() {
|
||||||
got.extend(rdr.buffer());
|
got.push(rdr.buffer());
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
}
|
}
|
||||||
assert_eq!(bytes, btos(&got));
|
assert_eq!(bytes, got);
|
||||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||||
assert_eq!(rdr.binary_byte_offset(), None);
|
assert_eq!(rdr.binary_byte_offset(), None);
|
||||||
}
|
}
|
||||||
@@ -718,11 +683,11 @@ and exhibited clearly, with a label attached.\
|
|||||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "homer\n");
|
assert_eq!(rdr.buffer(), "homer\n");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "lisa\n");
|
assert_eq!(rdr.buffer(), "lisa\n");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
// This returns an error because while we have just enough room to
|
// This returns an error because while we have just enough room to
|
||||||
@@ -732,11 +697,11 @@ and exhibited clearly, with a label attached.\
|
|||||||
assert!(rdr.fill().is_err());
|
assert!(rdr.fill().is_err());
|
||||||
|
|
||||||
// We can mush on though!
|
// We can mush on though!
|
||||||
assert_eq!(btos(rdr.buffer()), "m");
|
assert_eq!(rdr.buffer(), "m");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "aggie");
|
assert_eq!(rdr.buffer(), "aggie");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(!rdr.fill().unwrap());
|
assert!(!rdr.fill().unwrap());
|
||||||
@@ -752,16 +717,16 @@ and exhibited clearly, with a label attached.\
|
|||||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "homer\n");
|
assert_eq!(rdr.buffer(), "homer\n");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "lisa\n");
|
assert_eq!(rdr.buffer(), "lisa\n");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
// We have just enough space.
|
// We have just enough space.
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "maggie");
|
assert_eq!(rdr.buffer(), "maggie");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(!rdr.fill().unwrap());
|
assert!(!rdr.fill().unwrap());
|
||||||
@@ -777,7 +742,7 @@ and exhibited clearly, with a label attached.\
|
|||||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||||
|
|
||||||
assert!(rdr.fill().is_err());
|
assert!(rdr.fill().is_err());
|
||||||
assert_eq!(btos(rdr.buffer()), "");
|
assert_eq!(rdr.buffer(), "");
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -789,7 +754,7 @@ and exhibited clearly, with a label attached.\
|
|||||||
assert!(rdr.buffer().is_empty());
|
assert!(rdr.buffer().is_empty());
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "homer\nli\x00sa\nmaggie\n");
|
assert_eq!(rdr.buffer(), "homer\nli\x00sa\nmaggie\n");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(!rdr.fill().unwrap());
|
assert!(!rdr.fill().unwrap());
|
||||||
@@ -808,7 +773,7 @@ and exhibited clearly, with a label attached.\
|
|||||||
assert!(rdr.buffer().is_empty());
|
assert!(rdr.buffer().is_empty());
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "homer\nli");
|
assert_eq!(rdr.buffer(), "homer\nli");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(!rdr.fill().unwrap());
|
assert!(!rdr.fill().unwrap());
|
||||||
@@ -825,7 +790,7 @@ and exhibited clearly, with a label attached.\
|
|||||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||||
|
|
||||||
assert!(!rdr.fill().unwrap());
|
assert!(!rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "");
|
assert_eq!(rdr.buffer(), "");
|
||||||
assert_eq!(rdr.absolute_byte_offset(), 0);
|
assert_eq!(rdr.absolute_byte_offset(), 0);
|
||||||
assert_eq!(rdr.binary_byte_offset(), Some(0));
|
assert_eq!(rdr.binary_byte_offset(), Some(0));
|
||||||
}
|
}
|
||||||
@@ -841,7 +806,7 @@ and exhibited clearly, with a label attached.\
|
|||||||
assert!(rdr.buffer().is_empty());
|
assert!(rdr.buffer().is_empty());
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
|
assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(!rdr.fill().unwrap());
|
assert!(!rdr.fill().unwrap());
|
||||||
@@ -860,7 +825,7 @@ and exhibited clearly, with a label attached.\
|
|||||||
assert!(rdr.buffer().is_empty());
|
assert!(rdr.buffer().is_empty());
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie");
|
assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(!rdr.fill().unwrap());
|
assert!(!rdr.fill().unwrap());
|
||||||
@@ -878,7 +843,7 @@ and exhibited clearly, with a label attached.\
|
|||||||
assert!(rdr.buffer().is_empty());
|
assert!(rdr.buffer().is_empty());
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "\
|
assert_eq!(rdr.buffer(), "\
|
||||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||||
Holmeses, s\
|
Holmeses, s\
|
||||||
");
|
");
|
||||||
@@ -901,7 +866,7 @@ Holmeses, s\
|
|||||||
assert!(rdr.buffer().is_empty());
|
assert!(rdr.buffer().is_empty());
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "homer\nli\nsa\nmaggie\n");
|
assert_eq!(rdr.buffer(), "homer\nli\nsa\nmaggie\n");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(!rdr.fill().unwrap());
|
assert!(!rdr.fill().unwrap());
|
||||||
@@ -920,7 +885,7 @@ Holmeses, s\
|
|||||||
assert!(rdr.buffer().is_empty());
|
assert!(rdr.buffer().is_empty());
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "\nhomer\nlisa\nmaggie\n");
|
assert_eq!(rdr.buffer(), "\nhomer\nlisa\nmaggie\n");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(!rdr.fill().unwrap());
|
assert!(!rdr.fill().unwrap());
|
||||||
@@ -939,7 +904,7 @@ Holmeses, s\
|
|||||||
assert!(rdr.buffer().is_empty());
|
assert!(rdr.buffer().is_empty());
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
|
assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n\n");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(!rdr.fill().unwrap());
|
assert!(!rdr.fill().unwrap());
|
||||||
@@ -958,7 +923,7 @@ Holmeses, s\
|
|||||||
assert!(rdr.buffer().is_empty());
|
assert!(rdr.buffer().is_empty());
|
||||||
|
|
||||||
assert!(rdr.fill().unwrap());
|
assert!(rdr.fill().unwrap());
|
||||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
|
assert_eq!(rdr.buffer(), "homer\nlisa\nmaggie\n\n");
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
|
|
||||||
assert!(!rdr.fill().unwrap());
|
assert!(!rdr.fill().unwrap());
|
||||||
|
@@ -2,8 +2,8 @@
|
|||||||
A collection of routines for performing operations on lines.
|
A collection of routines for performing operations on lines.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
use bstr::{B, BStr};
|
||||||
use bytecount;
|
use bytecount;
|
||||||
use memchr::{memchr, memrchr};
|
|
||||||
use grep_matcher::{LineTerminator, Match};
|
use grep_matcher::{LineTerminator, Match};
|
||||||
|
|
||||||
/// An iterator over lines in a particular slice of bytes.
|
/// An iterator over lines in a particular slice of bytes.
|
||||||
@@ -14,7 +14,7 @@ use grep_matcher::{LineTerminator, Match};
|
|||||||
/// `'b` refers to the lifetime of the underlying bytes.
|
/// `'b` refers to the lifetime of the underlying bytes.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct LineIter<'b> {
|
pub struct LineIter<'b> {
|
||||||
bytes: &'b [u8],
|
bytes: &'b BStr,
|
||||||
stepper: LineStep,
|
stepper: LineStep,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -23,7 +23,7 @@ impl<'b> LineIter<'b> {
|
|||||||
/// are terminated by `line_term`.
|
/// are terminated by `line_term`.
|
||||||
pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> {
|
pub fn new(line_term: u8, bytes: &'b [u8]) -> LineIter<'b> {
|
||||||
LineIter {
|
LineIter {
|
||||||
bytes: bytes,
|
bytes: B(bytes),
|
||||||
stepper: LineStep::new(line_term, 0, bytes.len()),
|
stepper: LineStep::new(line_term, 0, bytes.len()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -33,7 +33,7 @@ impl<'b> Iterator for LineIter<'b> {
|
|||||||
type Item = &'b [u8];
|
type Item = &'b [u8];
|
||||||
|
|
||||||
fn next(&mut self) -> Option<&'b [u8]> {
|
fn next(&mut self) -> Option<&'b [u8]> {
|
||||||
self.stepper.next_match(self.bytes).map(|m| &self.bytes[m])
|
self.stepper.next_match(self.bytes).map(|m| self.bytes[m].as_bytes())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -73,19 +73,19 @@ impl LineStep {
|
|||||||
/// The range returned includes the line terminator. Ranges are always
|
/// The range returned includes the line terminator. Ranges are always
|
||||||
/// non-empty.
|
/// non-empty.
|
||||||
pub fn next(&mut self, bytes: &[u8]) -> Option<(usize, usize)> {
|
pub fn next(&mut self, bytes: &[u8]) -> Option<(usize, usize)> {
|
||||||
self.next_impl(bytes)
|
self.next_impl(B(bytes))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Like next, but returns a `Match` instead of a tuple.
|
/// Like next, but returns a `Match` instead of a tuple.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub(crate) fn next_match(&mut self, bytes: &[u8]) -> Option<Match> {
|
pub(crate) fn next_match(&mut self, bytes: &BStr) -> Option<Match> {
|
||||||
self.next_impl(bytes).map(|(s, e)| Match::new(s, e))
|
self.next_impl(bytes).map(|(s, e)| Match::new(s, e))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> {
|
fn next_impl(&mut self, mut bytes: &BStr) -> Option<(usize, usize)> {
|
||||||
bytes = &bytes[..self.end];
|
bytes = &bytes[..self.end];
|
||||||
match memchr(self.line_term, &bytes[self.pos..]) {
|
match bytes[self.pos..].find_byte(self.line_term) {
|
||||||
None => {
|
None => {
|
||||||
if self.pos < bytes.len() {
|
if self.pos < bytes.len() {
|
||||||
let m = (self.pos, bytes.len());
|
let m = (self.pos, bytes.len());
|
||||||
@@ -109,15 +109,15 @@ impl LineStep {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Count the number of occurrences of `line_term` in `bytes`.
|
/// Count the number of occurrences of `line_term` in `bytes`.
|
||||||
pub fn count(bytes: &[u8], line_term: u8) -> u64 {
|
pub fn count(bytes: &BStr, line_term: u8) -> u64 {
|
||||||
bytecount::count(bytes, line_term) as u64
|
bytecount::count(bytes.as_bytes(), line_term) as u64
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Given a line that possibly ends with a terminator, return that line without
|
/// Given a line that possibly ends with a terminator, return that line without
|
||||||
/// the terminator.
|
/// the terminator.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
|
pub fn without_terminator(bytes: &BStr, line_term: LineTerminator) -> &BStr {
|
||||||
let line_term = line_term.as_bytes();
|
let line_term = BStr::new(line_term.as_bytes());
|
||||||
let start = bytes.len().saturating_sub(line_term.len());
|
let start = bytes.len().saturating_sub(line_term.len());
|
||||||
if bytes.get(start..) == Some(line_term) {
|
if bytes.get(start..) == Some(line_term) {
|
||||||
return &bytes[..bytes.len() - line_term.len()];
|
return &bytes[..bytes.len() - line_term.len()];
|
||||||
@@ -131,18 +131,20 @@ pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
|
|||||||
/// Line terminators are considered part of the line they terminate.
|
/// Line terminators are considered part of the line they terminate.
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
pub fn locate(
|
pub fn locate(
|
||||||
bytes: &[u8],
|
bytes: &BStr,
|
||||||
line_term: u8,
|
line_term: u8,
|
||||||
range: Match,
|
range: Match,
|
||||||
) -> Match {
|
) -> Match {
|
||||||
let line_start = memrchr(line_term, &bytes[0..range.start()])
|
let line_start = bytes[..range.start()]
|
||||||
|
.rfind_byte(line_term)
|
||||||
.map_or(0, |i| i + 1);
|
.map_or(0, |i| i + 1);
|
||||||
let line_end =
|
let line_end =
|
||||||
if range.end() > line_start && bytes[range.end() - 1] == line_term {
|
if range.end() > line_start && bytes[range.end() - 1] == line_term {
|
||||||
range.end()
|
range.end()
|
||||||
} else {
|
} else {
|
||||||
memchr(line_term, &bytes[range.end()..])
|
bytes[range.end()..]
|
||||||
.map_or(bytes.len(), |i| range.end() + i + 1)
|
.find_byte(line_term)
|
||||||
|
.map_or(bytes.len(), |i| range.end() + i + 1)
|
||||||
};
|
};
|
||||||
Match::new(line_start, line_end)
|
Match::new(line_start, line_end)
|
||||||
}
|
}
|
||||||
@@ -155,7 +157,7 @@ pub fn locate(
|
|||||||
///
|
///
|
||||||
/// If `bytes` ends with a line terminator, then the terminator itself is
|
/// If `bytes` ends with a line terminator, then the terminator itself is
|
||||||
/// considered part of the last line.
|
/// considered part of the last line.
|
||||||
pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
|
pub fn preceding(bytes: &BStr, line_term: u8, count: usize) -> usize {
|
||||||
preceding_by_pos(bytes, bytes.len(), line_term, count)
|
preceding_by_pos(bytes, bytes.len(), line_term, count)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -169,7 +171,7 @@ pub fn preceding(bytes: &[u8], line_term: u8, count: usize) -> usize {
|
|||||||
/// and `pos = 7`, `preceding(bytes, pos, b'\n', 0)` returns `4` (as does `pos
|
/// and `pos = 7`, `preceding(bytes, pos, b'\n', 0)` returns `4` (as does `pos
|
||||||
/// = 8`) and `preceding(bytes, pos, `b'\n', 1)` returns `0`.
|
/// = 8`) and `preceding(bytes, pos, `b'\n', 1)` returns `0`.
|
||||||
fn preceding_by_pos(
|
fn preceding_by_pos(
|
||||||
bytes: &[u8],
|
bytes: &BStr,
|
||||||
mut pos: usize,
|
mut pos: usize,
|
||||||
line_term: u8,
|
line_term: u8,
|
||||||
mut count: usize,
|
mut count: usize,
|
||||||
@@ -180,7 +182,7 @@ fn preceding_by_pos(
|
|||||||
pos -= 1;
|
pos -= 1;
|
||||||
}
|
}
|
||||||
loop {
|
loop {
|
||||||
match memrchr(line_term, &bytes[..pos]) {
|
match bytes[..pos].rfind_byte(line_term) {
|
||||||
None => {
|
None => {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@@ -201,7 +203,10 @@ fn preceding_by_pos(
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
|
use bstr::B;
|
||||||
use grep_matcher::Match;
|
use grep_matcher::Match;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
const SHERLOCK: &'static str = "\
|
const SHERLOCK: &'static str = "\
|
||||||
@@ -220,7 +225,7 @@ and exhibited clearly, with a label attached.\
|
|||||||
fn lines(text: &str) -> Vec<&str> {
|
fn lines(text: &str) -> Vec<&str> {
|
||||||
let mut results = vec![];
|
let mut results = vec![];
|
||||||
let mut it = LineStep::new(b'\n', 0, text.len());
|
let mut it = LineStep::new(b'\n', 0, text.len());
|
||||||
while let Some(m) = it.next_match(text.as_bytes()) {
|
while let Some(m) = it.next_match(B(text)) {
|
||||||
results.push(&text[m]);
|
results.push(&text[m]);
|
||||||
}
|
}
|
||||||
results
|
results
|
||||||
@@ -229,26 +234,26 @@ and exhibited clearly, with a label attached.\
|
|||||||
fn line_ranges(text: &str) -> Vec<Range<usize>> {
|
fn line_ranges(text: &str) -> Vec<Range<usize>> {
|
||||||
let mut results = vec![];
|
let mut results = vec![];
|
||||||
let mut it = LineStep::new(b'\n', 0, text.len());
|
let mut it = LineStep::new(b'\n', 0, text.len());
|
||||||
while let Some(m) = it.next_match(text.as_bytes()) {
|
while let Some(m) = it.next_match(B(text)) {
|
||||||
results.push(m.start()..m.end());
|
results.push(m.start()..m.end());
|
||||||
}
|
}
|
||||||
results
|
results
|
||||||
}
|
}
|
||||||
|
|
||||||
fn prev(text: &str, pos: usize, count: usize) -> usize {
|
fn prev(text: &str, pos: usize, count: usize) -> usize {
|
||||||
preceding_by_pos(text.as_bytes(), pos, b'\n', count)
|
preceding_by_pos(B(text), pos, b'\n', count)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn loc(text: &str, start: usize, end: usize) -> Match {
|
fn loc(text: &str, start: usize, end: usize) -> Match {
|
||||||
locate(text.as_bytes(), b'\n', Match::new(start, end))
|
locate(B(text), b'\n', Match::new(start, end))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn line_count() {
|
fn line_count() {
|
||||||
assert_eq!(0, count(b"", b'\n'));
|
assert_eq!(0, count(B(""), b'\n'));
|
||||||
assert_eq!(1, count(b"\n", b'\n'));
|
assert_eq!(1, count(B("\n"), b'\n'));
|
||||||
assert_eq!(2, count(b"\n\n", b'\n'));
|
assert_eq!(2, count(B("\n\n"), b'\n'));
|
||||||
assert_eq!(2, count(b"a\nb\nc", b'\n'));
|
assert_eq!(2, count(B("a\nb\nc"), b'\n'));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@@ -331,7 +336,7 @@ and exhibited clearly, with a label attached.\
|
|||||||
#[test]
|
#[test]
|
||||||
fn preceding_lines_doc() {
|
fn preceding_lines_doc() {
|
||||||
// These are the examples mentions in the documentation of `preceding`.
|
// These are the examples mentions in the documentation of `preceding`.
|
||||||
let bytes = b"abc\nxyz\n";
|
let bytes = B("abc\nxyz\n");
|
||||||
assert_eq!(4, preceding_by_pos(bytes, 7, b'\n', 0));
|
assert_eq!(4, preceding_by_pos(bytes, 7, b'\n', 0));
|
||||||
assert_eq!(4, preceding_by_pos(bytes, 8, b'\n', 0));
|
assert_eq!(4, preceding_by_pos(bytes, 8, b'\n', 0));
|
||||||
assert_eq!(0, preceding_by_pos(bytes, 7, b'\n', 1));
|
assert_eq!(0, preceding_by_pos(bytes, 7, b'\n', 1));
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
use std::cmp;
|
use std::cmp;
|
||||||
|
|
||||||
use memchr::memchr;
|
use bstr::BStr;
|
||||||
|
|
||||||
use grep_matcher::{LineMatchKind, Matcher};
|
use grep_matcher::{LineMatchKind, Matcher};
|
||||||
use lines::{self, LineStep};
|
use lines::{self, LineStep};
|
||||||
@@ -84,7 +84,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
|
|
||||||
pub fn matched(
|
pub fn matched(
|
||||||
&mut self,
|
&mut self,
|
||||||
buf: &[u8],
|
buf: &BStr,
|
||||||
range: &Range,
|
range: &Range,
|
||||||
) -> Result<bool, S::Error> {
|
) -> Result<bool, S::Error> {
|
||||||
self.sink_matched(buf, range)
|
self.sink_matched(buf, range)
|
||||||
@@ -107,7 +107,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
pub fn match_by_line(&mut self, buf: &BStr) -> Result<bool, S::Error> {
|
||||||
if self.is_line_by_line_fast() {
|
if self.is_line_by_line_fast() {
|
||||||
self.match_by_line_fast(buf)
|
self.match_by_line_fast(buf)
|
||||||
} else {
|
} else {
|
||||||
@@ -115,7 +115,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn roll(&mut self, buf: &[u8]) -> usize {
|
pub fn roll(&mut self, buf: &BStr) -> usize {
|
||||||
let consumed =
|
let consumed =
|
||||||
if self.config.max_context() == 0 {
|
if self.config.max_context() == 0 {
|
||||||
buf.len()
|
buf.len()
|
||||||
@@ -141,7 +141,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
consumed
|
consumed
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn detect_binary(&mut self, buf: &[u8], range: &Range) -> bool {
|
pub fn detect_binary(&mut self, buf: &BStr, range: &Range) -> bool {
|
||||||
if self.binary_byte_offset.is_some() {
|
if self.binary_byte_offset.is_some() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -149,7 +149,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
BinaryDetection::Quit(b) => b,
|
BinaryDetection::Quit(b) => b,
|
||||||
_ => return false,
|
_ => return false,
|
||||||
};
|
};
|
||||||
if let Some(i) = memchr(binary_byte, &buf[*range]) {
|
if let Some(i) = buf[*range].find_byte(binary_byte) {
|
||||||
self.binary_byte_offset = Some(range.start() + i);
|
self.binary_byte_offset = Some(range.start() + i);
|
||||||
true
|
true
|
||||||
} else {
|
} else {
|
||||||
@@ -159,7 +159,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
|
|
||||||
pub fn before_context_by_line(
|
pub fn before_context_by_line(
|
||||||
&mut self,
|
&mut self,
|
||||||
buf: &[u8],
|
buf: &BStr,
|
||||||
upto: usize,
|
upto: usize,
|
||||||
) -> Result<bool, S::Error> {
|
) -> Result<bool, S::Error> {
|
||||||
if self.config.before_context == 0 {
|
if self.config.before_context == 0 {
|
||||||
@@ -194,7 +194,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
|
|
||||||
pub fn after_context_by_line(
|
pub fn after_context_by_line(
|
||||||
&mut self,
|
&mut self,
|
||||||
buf: &[u8],
|
buf: &BStr,
|
||||||
upto: usize,
|
upto: usize,
|
||||||
) -> Result<bool, S::Error> {
|
) -> Result<bool, S::Error> {
|
||||||
if self.after_context_left == 0 {
|
if self.after_context_left == 0 {
|
||||||
@@ -219,7 +219,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
|
|
||||||
pub fn other_context_by_line(
|
pub fn other_context_by_line(
|
||||||
&mut self,
|
&mut self,
|
||||||
buf: &[u8],
|
buf: &BStr,
|
||||||
upto: usize,
|
upto: usize,
|
||||||
) -> Result<bool, S::Error> {
|
) -> Result<bool, S::Error> {
|
||||||
let range = Range::new(self.last_line_visited, upto);
|
let range = Range::new(self.last_line_visited, upto);
|
||||||
@@ -236,7 +236,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn match_by_line_slow(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
fn match_by_line_slow(&mut self, buf: &BStr) -> Result<bool, S::Error> {
|
||||||
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
|
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
|
||||||
|
|
||||||
let range = Range::new(self.pos(), buf.len());
|
let range = Range::new(self.pos(), buf.len());
|
||||||
@@ -255,7 +255,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
&buf[line],
|
&buf[line],
|
||||||
self.config.line_term,
|
self.config.line_term,
|
||||||
);
|
);
|
||||||
match self.matcher.shortest_match(slice) {
|
match self.matcher.shortest_match(slice.as_bytes()) {
|
||||||
Err(err) => return Err(S::Error::error_message(err)),
|
Err(err) => return Err(S::Error::error_message(err)),
|
||||||
Ok(result) => result.is_some(),
|
Ok(result) => result.is_some(),
|
||||||
}
|
}
|
||||||
@@ -281,7 +281,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn match_by_line_fast(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
fn match_by_line_fast(&mut self, buf: &BStr) -> Result<bool, S::Error> {
|
||||||
debug_assert!(!self.config.passthru);
|
debug_assert!(!self.config.passthru);
|
||||||
|
|
||||||
while !buf[self.pos()..].is_empty() {
|
while !buf[self.pos()..].is_empty() {
|
||||||
@@ -316,7 +316,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn match_by_line_fast_invert(
|
fn match_by_line_fast_invert(
|
||||||
&mut self,
|
&mut self,
|
||||||
buf: &[u8],
|
buf: &BStr,
|
||||||
) -> Result<bool, S::Error> {
|
) -> Result<bool, S::Error> {
|
||||||
assert!(self.config.invert_match);
|
assert!(self.config.invert_match);
|
||||||
|
|
||||||
@@ -357,14 +357,14 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn find_by_line_fast(
|
fn find_by_line_fast(
|
||||||
&self,
|
&self,
|
||||||
buf: &[u8],
|
buf: &BStr,
|
||||||
) -> Result<Option<Range>, S::Error> {
|
) -> Result<Option<Range>, S::Error> {
|
||||||
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
|
debug_assert!(!self.searcher.multi_line_with_matcher(&self.matcher));
|
||||||
debug_assert!(self.is_line_by_line_fast());
|
debug_assert!(self.is_line_by_line_fast());
|
||||||
|
|
||||||
let mut pos = self.pos();
|
let mut pos = self.pos();
|
||||||
while !buf[pos..].is_empty() {
|
while !buf[pos..].is_empty() {
|
||||||
match self.matcher.find_candidate_line(&buf[pos..]) {
|
match self.matcher.find_candidate_line(buf[pos..].as_bytes()) {
|
||||||
Err(err) => return Err(S::Error::error_message(err)),
|
Err(err) => return Err(S::Error::error_message(err)),
|
||||||
Ok(None) => return Ok(None),
|
Ok(None) => return Ok(None),
|
||||||
Ok(Some(LineMatchKind::Confirmed(i))) => {
|
Ok(Some(LineMatchKind::Confirmed(i))) => {
|
||||||
@@ -396,7 +396,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
&buf[line],
|
&buf[line],
|
||||||
self.config.line_term,
|
self.config.line_term,
|
||||||
);
|
);
|
||||||
match self.matcher.is_match(slice) {
|
match self.matcher.is_match(slice.as_bytes()) {
|
||||||
Err(err) => return Err(S::Error::error_message(err)),
|
Err(err) => return Err(S::Error::error_message(err)),
|
||||||
Ok(true) => return Ok(Some(line)),
|
Ok(true) => return Ok(Some(line)),
|
||||||
Ok(false) => {
|
Ok(false) => {
|
||||||
@@ -413,7 +413,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn sink_matched(
|
fn sink_matched(
|
||||||
&mut self,
|
&mut self,
|
||||||
buf: &[u8],
|
buf: &BStr,
|
||||||
range: &Range,
|
range: &Range,
|
||||||
) -> Result<bool, S::Error> {
|
) -> Result<bool, S::Error> {
|
||||||
if self.binary && self.detect_binary(buf, range) {
|
if self.binary && self.detect_binary(buf, range) {
|
||||||
@@ -438,7 +438,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
&self.searcher,
|
&self.searcher,
|
||||||
&SinkMatch {
|
&SinkMatch {
|
||||||
line_term: self.config.line_term,
|
line_term: self.config.line_term,
|
||||||
bytes: linebuf,
|
bytes: linebuf.as_bytes(),
|
||||||
absolute_byte_offset: offset,
|
absolute_byte_offset: offset,
|
||||||
line_number: self.line_number,
|
line_number: self.line_number,
|
||||||
},
|
},
|
||||||
@@ -454,7 +454,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
|
|
||||||
fn sink_before_context(
|
fn sink_before_context(
|
||||||
&mut self,
|
&mut self,
|
||||||
buf: &[u8],
|
buf: &BStr,
|
||||||
range: &Range,
|
range: &Range,
|
||||||
) -> Result<bool, S::Error> {
|
) -> Result<bool, S::Error> {
|
||||||
if self.binary && self.detect_binary(buf, range) {
|
if self.binary && self.detect_binary(buf, range) {
|
||||||
@@ -466,7 +466,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
&self.searcher,
|
&self.searcher,
|
||||||
&SinkContext {
|
&SinkContext {
|
||||||
line_term: self.config.line_term,
|
line_term: self.config.line_term,
|
||||||
bytes: &buf[*range],
|
bytes: buf[*range].as_bytes(),
|
||||||
kind: SinkContextKind::Before,
|
kind: SinkContextKind::Before,
|
||||||
absolute_byte_offset: offset,
|
absolute_byte_offset: offset,
|
||||||
line_number: self.line_number,
|
line_number: self.line_number,
|
||||||
@@ -482,7 +482,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
|
|
||||||
fn sink_after_context(
|
fn sink_after_context(
|
||||||
&mut self,
|
&mut self,
|
||||||
buf: &[u8],
|
buf: &BStr,
|
||||||
range: &Range,
|
range: &Range,
|
||||||
) -> Result<bool, S::Error> {
|
) -> Result<bool, S::Error> {
|
||||||
assert!(self.after_context_left >= 1);
|
assert!(self.after_context_left >= 1);
|
||||||
@@ -496,7 +496,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
&self.searcher,
|
&self.searcher,
|
||||||
&SinkContext {
|
&SinkContext {
|
||||||
line_term: self.config.line_term,
|
line_term: self.config.line_term,
|
||||||
bytes: &buf[*range],
|
bytes: buf[*range].as_bytes(),
|
||||||
kind: SinkContextKind::After,
|
kind: SinkContextKind::After,
|
||||||
absolute_byte_offset: offset,
|
absolute_byte_offset: offset,
|
||||||
line_number: self.line_number,
|
line_number: self.line_number,
|
||||||
@@ -513,7 +513,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
|
|
||||||
fn sink_other_context(
|
fn sink_other_context(
|
||||||
&mut self,
|
&mut self,
|
||||||
buf: &[u8],
|
buf: &BStr,
|
||||||
range: &Range,
|
range: &Range,
|
||||||
) -> Result<bool, S::Error> {
|
) -> Result<bool, S::Error> {
|
||||||
if self.binary && self.detect_binary(buf, range) {
|
if self.binary && self.detect_binary(buf, range) {
|
||||||
@@ -525,7 +525,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
&self.searcher,
|
&self.searcher,
|
||||||
&SinkContext {
|
&SinkContext {
|
||||||
line_term: self.config.line_term,
|
line_term: self.config.line_term,
|
||||||
bytes: &buf[*range],
|
bytes: buf[*range].as_bytes(),
|
||||||
kind: SinkContextKind::Other,
|
kind: SinkContextKind::Other,
|
||||||
absolute_byte_offset: offset,
|
absolute_byte_offset: offset,
|
||||||
line_number: self.line_number,
|
line_number: self.line_number,
|
||||||
@@ -555,7 +555,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn count_lines(&mut self, buf: &[u8], upto: usize) {
|
fn count_lines(&mut self, buf: &BStr, upto: usize) {
|
||||||
if let Some(ref mut line_number) = self.line_number {
|
if let Some(ref mut line_number) = self.line_number {
|
||||||
if self.last_line_counted >= upto {
|
if self.last_line_counted >= upto {
|
||||||
return;
|
return;
|
||||||
|
@@ -1,7 +1,9 @@
|
|||||||
use std::cmp;
|
use std::cmp;
|
||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
|
use bstr::BStr;
|
||||||
use grep_matcher::Matcher;
|
use grep_matcher::Matcher;
|
||||||
|
|
||||||
use lines::{self, LineStep};
|
use lines::{self, LineStep};
|
||||||
use line_buffer::{DEFAULT_BUFFER_CAPACITY, LineBufferReader};
|
use line_buffer::{DEFAULT_BUFFER_CAPACITY, LineBufferReader};
|
||||||
use sink::{Sink, SinkError};
|
use sink::{Sink, SinkError};
|
||||||
@@ -77,14 +79,14 @@ where M: Matcher,
|
|||||||
pub struct SliceByLine<'s, M: 's, S> {
|
pub struct SliceByLine<'s, M: 's, S> {
|
||||||
config: &'s Config,
|
config: &'s Config,
|
||||||
core: Core<'s, M, S>,
|
core: Core<'s, M, S>,
|
||||||
slice: &'s [u8],
|
slice: &'s BStr,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
|
impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
searcher: &'s Searcher,
|
searcher: &'s Searcher,
|
||||||
matcher: M,
|
matcher: M,
|
||||||
slice: &'s [u8],
|
slice: &'s BStr,
|
||||||
write_to: S,
|
write_to: S,
|
||||||
) -> SliceByLine<'s, M, S> {
|
) -> SliceByLine<'s, M, S> {
|
||||||
debug_assert!(!searcher.multi_line_with_matcher(&matcher));
|
debug_assert!(!searcher.multi_line_with_matcher(&matcher));
|
||||||
@@ -127,7 +129,7 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
|
|||||||
pub struct MultiLine<'s, M: 's, S> {
|
pub struct MultiLine<'s, M: 's, S> {
|
||||||
config: &'s Config,
|
config: &'s Config,
|
||||||
core: Core<'s, M, S>,
|
core: Core<'s, M, S>,
|
||||||
slice: &'s [u8],
|
slice: &'s BStr,
|
||||||
last_match: Option<Range>,
|
last_match: Option<Range>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -135,7 +137,7 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
|
|||||||
pub fn new(
|
pub fn new(
|
||||||
searcher: &'s Searcher,
|
searcher: &'s Searcher,
|
||||||
matcher: M,
|
matcher: M,
|
||||||
slice: &'s [u8],
|
slice: &'s BStr,
|
||||||
write_to: S,
|
write_to: S,
|
||||||
) -> MultiLine<'s, M, S> {
|
) -> MultiLine<'s, M, S> {
|
||||||
debug_assert!(searcher.multi_line_with_matcher(&matcher));
|
debug_assert!(searcher.multi_line_with_matcher(&matcher));
|
||||||
@@ -306,7 +308,8 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn find(&mut self) -> Result<Option<Range>, S::Error> {
|
fn find(&mut self) -> Result<Option<Range>, S::Error> {
|
||||||
match self.core.matcher().find(&self.slice[self.core.pos()..]) {
|
let haystack = &self.slice[self.core.pos()..];
|
||||||
|
match self.core.matcher().find(haystack.as_bytes()) {
|
||||||
Err(err) => Err(S::Error::error_message(err)),
|
Err(err) => Err(S::Error::error_message(err)),
|
||||||
Ok(None) => Ok(None),
|
Ok(None) => Ok(None),
|
||||||
Ok(Some(m)) => Ok(Some(m.offset(self.core.pos()))),
|
Ok(Some(m)) => Ok(Some(m.offset(self.core.pos()))),
|
||||||
|
@@ -5,6 +5,7 @@ use std::fs::File;
|
|||||||
use std::io::{self, Read};
|
use std::io::{self, Read};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
|
use bstr::{B, BStr, BString};
|
||||||
use encoding_rs;
|
use encoding_rs;
|
||||||
use encoding_rs_io::DecodeReaderBytesBuilder;
|
use encoding_rs_io::DecodeReaderBytesBuilder;
|
||||||
use grep_matcher::{LineTerminator, Match, Matcher};
|
use grep_matcher::{LineTerminator, Match, Matcher};
|
||||||
@@ -311,9 +312,9 @@ impl SearcherBuilder {
|
|||||||
Searcher {
|
Searcher {
|
||||||
config: config,
|
config: config,
|
||||||
decode_builder: decode_builder,
|
decode_builder: decode_builder,
|
||||||
decode_buffer: RefCell::new(vec![0; 8 * (1<<10)]),
|
decode_buffer: RefCell::new(BString::from(vec![0; 8 * (1<<10)])),
|
||||||
line_buffer: RefCell::new(self.config.line_buffer()),
|
line_buffer: RefCell::new(self.config.line_buffer()),
|
||||||
multi_line_buffer: RefCell::new(vec![]),
|
multi_line_buffer: RefCell::new(BString::new()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -543,7 +544,7 @@ pub struct Searcher {
|
|||||||
/// through the underlying bytes with no additional overhead.
|
/// through the underlying bytes with no additional overhead.
|
||||||
decode_builder: DecodeReaderBytesBuilder,
|
decode_builder: DecodeReaderBytesBuilder,
|
||||||
/// A buffer that is used for transcoding scratch space.
|
/// A buffer that is used for transcoding scratch space.
|
||||||
decode_buffer: RefCell<Vec<u8>>,
|
decode_buffer: RefCell<BString>,
|
||||||
/// A line buffer for use in line oriented searching.
|
/// A line buffer for use in line oriented searching.
|
||||||
///
|
///
|
||||||
/// We wrap it in a RefCell to permit lending out borrows of `Searcher`
|
/// We wrap it in a RefCell to permit lending out borrows of `Searcher`
|
||||||
@@ -555,7 +556,7 @@ pub struct Searcher {
|
|||||||
/// multi line search. In particular, multi line searches cannot be
|
/// multi line search. In particular, multi line searches cannot be
|
||||||
/// performed incrementally, and need the entire haystack in memory at
|
/// performed incrementally, and need the entire haystack in memory at
|
||||||
/// once.
|
/// once.
|
||||||
multi_line_buffer: RefCell<Vec<u8>>,
|
multi_line_buffer: RefCell<BString>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Searcher {
|
impl Searcher {
|
||||||
@@ -666,7 +667,7 @@ impl Searcher {
|
|||||||
|
|
||||||
let mut decode_buffer = self.decode_buffer.borrow_mut();
|
let mut decode_buffer = self.decode_buffer.borrow_mut();
|
||||||
let read_from = self.decode_builder
|
let read_from = self.decode_builder
|
||||||
.build_with_buffer(read_from, &mut *decode_buffer)
|
.build_with_buffer(read_from, decode_buffer.as_mut_vec())
|
||||||
.map_err(S::Error::error_io)?;
|
.map_err(S::Error::error_io)?;
|
||||||
|
|
||||||
if self.multi_line_with_matcher(&matcher) {
|
if self.multi_line_with_matcher(&matcher) {
|
||||||
@@ -698,12 +699,13 @@ impl Searcher {
|
|||||||
where M: Matcher,
|
where M: Matcher,
|
||||||
S: Sink,
|
S: Sink,
|
||||||
{
|
{
|
||||||
|
let slice = B(slice);
|
||||||
self.check_config(&matcher).map_err(S::Error::error_config)?;
|
self.check_config(&matcher).map_err(S::Error::error_config)?;
|
||||||
|
|
||||||
// We can search the slice directly, unless we need to do transcoding.
|
// We can search the slice directly, unless we need to do transcoding.
|
||||||
if self.slice_needs_transcoding(slice) {
|
if self.slice_needs_transcoding(slice) {
|
||||||
trace!("slice reader: needs transcoding, using generic reader");
|
trace!("slice reader: needs transcoding, using generic reader");
|
||||||
return self.search_reader(matcher, slice, write_to);
|
return self.search_reader(matcher, slice.as_bytes(), write_to);
|
||||||
}
|
}
|
||||||
if self.multi_line_with_matcher(&matcher) {
|
if self.multi_line_with_matcher(&matcher) {
|
||||||
trace!("slice reader: searching via multiline strategy");
|
trace!("slice reader: searching via multiline strategy");
|
||||||
@@ -736,7 +738,7 @@ impl Searcher {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if and only if the given slice needs to be transcoded.
|
/// Returns true if and only if the given slice needs to be transcoded.
|
||||||
fn slice_needs_transcoding(&self, slice: &[u8]) -> bool {
|
fn slice_needs_transcoding(&self, slice: &BStr) -> bool {
|
||||||
self.config.encoding.is_some() || slice_has_utf16_bom(slice)
|
self.config.encoding.is_some() || slice_has_utf16_bom(slice)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -851,7 +853,9 @@ impl Searcher {
|
|||||||
.map(|m| m.len() as usize + 1)
|
.map(|m| m.len() as usize + 1)
|
||||||
.unwrap_or(0);
|
.unwrap_or(0);
|
||||||
buf.reserve(cap);
|
buf.reserve(cap);
|
||||||
read_from.read_to_end(&mut *buf).map_err(S::Error::error_io)?;
|
read_from
|
||||||
|
.read_to_end(buf.as_mut_vec())
|
||||||
|
.map_err(S::Error::error_io)?;
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
self.fill_multi_line_buffer_from_reader::<_, S>(read_from)
|
self.fill_multi_line_buffer_from_reader::<_, S>(read_from)
|
||||||
@@ -868,6 +872,7 @@ impl Searcher {
|
|||||||
assert!(self.config.multi_line);
|
assert!(self.config.multi_line);
|
||||||
|
|
||||||
let mut buf = self.multi_line_buffer.borrow_mut();
|
let mut buf = self.multi_line_buffer.borrow_mut();
|
||||||
|
let buf = buf.as_mut_vec();
|
||||||
buf.clear();
|
buf.clear();
|
||||||
|
|
||||||
// If we don't have a heap limit, then we can defer to std's
|
// If we don't have a heap limit, then we can defer to std's
|
||||||
@@ -919,8 +924,8 @@ impl Searcher {
|
|||||||
///
|
///
|
||||||
/// This is used by the searcher to determine if a transcoder is necessary.
|
/// This is used by the searcher to determine if a transcoder is necessary.
|
||||||
/// Otherwise, it is advantageous to search the slice directly.
|
/// Otherwise, it is advantageous to search the slice directly.
|
||||||
fn slice_has_utf16_bom(slice: &[u8]) -> bool {
|
fn slice_has_utf16_bom(slice: &BStr) -> bool {
|
||||||
let enc = match encoding_rs::Encoding::for_bom(slice) {
|
let enc = match encoding_rs::Encoding::for_bom(slice.as_bytes()) {
|
||||||
None => return false,
|
None => return false,
|
||||||
Some((enc, _)) => enc,
|
Some((enc, _)) => enc,
|
||||||
};
|
};
|
||||||
|
@@ -1,10 +1,10 @@
|
|||||||
use std::io::{self, Write};
|
use std::io::{self, Write};
|
||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
|
use bstr::B;
|
||||||
use grep_matcher::{
|
use grep_matcher::{
|
||||||
LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError,
|
LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError,
|
||||||
};
|
};
|
||||||
use memchr::memchr;
|
|
||||||
use regex::bytes::{Regex, RegexBuilder};
|
use regex::bytes::{Regex, RegexBuilder};
|
||||||
|
|
||||||
use searcher::{BinaryDetection, Searcher, SearcherBuilder};
|
use searcher::{BinaryDetection, Searcher, SearcherBuilder};
|
||||||
@@ -94,8 +94,8 @@ impl Matcher for RegexMatcher {
|
|||||||
}
|
}
|
||||||
// Make it interesting and return the last byte in the current
|
// Make it interesting and return the last byte in the current
|
||||||
// line.
|
// line.
|
||||||
let i = memchr(self.line_term.unwrap().as_byte(), haystack)
|
let i = B(haystack)
|
||||||
.map(|i| i)
|
.find_byte(self.line_term.unwrap().as_byte())
|
||||||
.unwrap_or(haystack.len() - 1);
|
.unwrap_or(haystack.len() - 1);
|
||||||
Ok(Some(LineMatchKind::Candidate(i)))
|
Ok(Some(LineMatchKind::Candidate(i)))
|
||||||
} else {
|
} else {
|
||||||
|
Reference in New Issue
Block a user