From 48bf39458511b38f185a24d52dfef61e0a8796ae Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 17 Aug 2025 10:44:02 -0400 Subject: [PATCH] stats: fix case where "bytes searched" could be wrong Specifically, if the search was instructed to quit early, we might not have correctly marked the number of bytes consumed. I don't think this bug occurs when memory maps are used to read the haystack. Closes #2944 --- CHANGELOG.md | 2 ++ crates/searcher/src/searcher/glue.rs | 11 ++++++++++- tests/regression.rs | 7 +++++++ 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5aff37e7..e0159bb0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,8 @@ Bug fixes: Make `rg -vf file` where `file` is empty match everything. * [BUG #2177](https://github.com/BurntSushi/ripgrep/issues/2177): Ignore a UTF-8 BOM marker at the start of `.gitignore` (and similar files). +* [BUG #2944](https://github.com/BurntSushi/ripgrep/pull/2944): + Fix a bug where the "bytes searched" in `--stats` output could be incorrect. Feature enhancements: diff --git a/crates/searcher/src/searcher/glue.rs b/crates/searcher/src/searcher/glue.rs index 006afad3..5db57019 100644 --- a/crates/searcher/src/searcher/glue.rs +++ b/crates/searcher/src/searcher/glue.rs @@ -37,7 +37,11 @@ where pub(crate) fn run(mut self) -> Result<(), S::Error> { if self.core.begin()? { - while self.fill()? && self.core.match_by_line(self.rdr.buffer())? { + while self.fill()? { + if !self.core.match_by_line(self.rdr.buffer())? { + self.consume_remaining(); + break; + } } } self.core.finish( @@ -46,6 +50,11 @@ where ) } + fn consume_remaining(&mut self) { + let consumed = self.core.pos(); + self.rdr.consume(consumed); + } + fn fill(&mut self) -> Result { assert!(self.rdr.buffer()[self.core.pos()..].is_empty()); diff --git a/tests/regression.rs b/tests/regression.rs index bd845905..4fd3c0f8 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -1454,3 +1454,10 @@ rgtest!(r2658_null_data_line_regexp, |dir: Dir, mut cmd: TestCommand| { let got = cmd.args(&["--null-data", "--line-regexp", r"bar"]).stdout(); eqnice!("haystack:bar\0", got); }); + +// See: https://github.com/BurntSushi/ripgrep/pull/2944 +rgtest!(r2944_incorrect_bytes_searched, |dir: Dir, mut cmd: TestCommand| { + dir.create("haystack", "foo1\nfoo2\nfoo3\nfoo4\nfoo5\n"); + let got = cmd.args(&["--stats", "-m2", "foo", "."]).stdout(); + assert!(got.contains("10 bytes searched\n")); +});