mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-05-19 01:30:21 -07:00
deps: drop bytecount in favor of memchr_iter(..).count()
As of the memchr 2.6 release, its Iterator::count method is specialized to only count the number of occurrences instead of finding the offset of each occurrence. This replaces ripgrep's use of the bytecount crate. While micro-benchmarks suggest that memchr's method has better throughput than bytecount, it turned out to be an illusion. Namely, on a ~13GB haystack prior to this change: $ time rg-bytecount 'You killed my friend, my best friend, my lifelong friend!' OpenSubtitles2018.raw.en --line-number 441450441:- You killed my friend, my best friend, my lifelong friend! real 1.473 user 1.186 sys 0.286 maxmem 12512 MB faults 0 And then after: $ time rg 'You killed my friend, my best friend, my lifelong friend!' OpenSubtitles2018.raw.en --line-number 441450441:- You killed my friend, my best friend, my lifelong friend! real 1.532 user 1.280 sys 0.250 maxmem 12512 MB faults 0 But perf is just about in the same ballpark. That's good enough for me at the moment in order to drop the extra dependency. I did this because the marginal cost of adding the Iterator::count() specialization to memchr was extremely small.
This commit is contained in:
parent
551ad3bada
commit
6cdb99ea61
8
Cargo.lock
generated
8
Cargo.lock
generated
@ -40,12 +40,6 @@ dependencies = [
|
|||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "bytecount"
|
|
||||||
version = "0.6.3"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cc"
|
name = "cc"
|
||||||
version = "1.0.83"
|
version = "1.0.83"
|
||||||
@ -215,12 +209,12 @@ name = "grep-searcher"
|
|||||||
version = "0.1.11"
|
version = "0.1.11"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"bstr",
|
"bstr",
|
||||||
"bytecount",
|
|
||||||
"encoding_rs",
|
"encoding_rs",
|
||||||
"encoding_rs_io",
|
"encoding_rs_io",
|
||||||
"grep-matcher",
|
"grep-matcher",
|
||||||
"grep-regex",
|
"grep-regex",
|
||||||
"log",
|
"log",
|
||||||
|
"memchr",
|
||||||
"memmap2",
|
"memmap2",
|
||||||
"regex",
|
"regex",
|
||||||
]
|
]
|
||||||
|
@ -15,19 +15,18 @@ edition = "2018"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
bstr = { version = "1.6.0", default-features = false, features = ["std"] }
|
bstr = { version = "1.6.0", default-features = false, features = ["std"] }
|
||||||
bytecount = "0.6"
|
|
||||||
encoding_rs = "0.8.14"
|
encoding_rs = "0.8.14"
|
||||||
encoding_rs_io = "0.1.6"
|
encoding_rs_io = "0.1.6"
|
||||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||||
log = "0.4.5"
|
log = "0.4.5"
|
||||||
|
memchr = "2.6.2"
|
||||||
memmap = { package = "memmap2", version = "0.5.3" }
|
memmap = { package = "memmap2", version = "0.5.3" }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
grep-regex = { version = "0.1.11", path = "../regex" }
|
grep-regex = { version = "0.1.11", path = "../regex" }
|
||||||
regex = "1.1"
|
regex = "1.9.5"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = ["bytecount/runtime-dispatch-simd"]
|
|
||||||
simd-accel = ["encoding_rs/simd-accel"]
|
simd-accel = ["encoding_rs/simd-accel"]
|
||||||
|
|
||||||
# This feature is DEPRECATED. Runtime dispatch is used for SIMD now.
|
# This feature is DEPRECATED. Runtime dispatch is used for SIMD now.
|
||||||
|
@ -3,7 +3,6 @@ A collection of routines for performing operations on lines.
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
use bstr::ByteSlice;
|
use bstr::ByteSlice;
|
||||||
use bytecount;
|
|
||||||
use grep_matcher::{LineTerminator, Match};
|
use grep_matcher::{LineTerminator, Match};
|
||||||
|
|
||||||
/// An iterator over lines in a particular slice of bytes.
|
/// An iterator over lines in a particular slice of bytes.
|
||||||
@ -110,7 +109,7 @@ impl LineStep {
|
|||||||
|
|
||||||
/// Count the number of occurrences of `line_term` in `bytes`.
|
/// Count the number of occurrences of `line_term` in `bytes`.
|
||||||
pub fn count(bytes: &[u8], line_term: u8) -> u64 {
|
pub fn count(bytes: &[u8], line_term: u8) -> u64 {
|
||||||
bytecount::count(bytes, line_term) as u64
|
memchr::memchr_iter(line_term, bytes).count() as u64
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Given a line that possibly ends with a terminator, return that line without
|
/// Given a line that possibly ends with a terminator, return that line without
|
||||||
|
Loading…
x
Reference in New Issue
Block a user