diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a987858..e98fc5e9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,8 @@ Bug fixes: * [BUG #1277](https://github.com/BurntSushi/ripgrep/issues/1277): Document cygwin path translation behavior in the FAQ. +* [BUG #1642](https://github.com/BurntSushi/ripgrep/issues/1642): + Fixes a bug where using `-m` and `-A` printed more matches than the limit. * [BUG #1703](https://github.com/BurntSushi/ripgrep/issues/1703): Clarify the function of `-u/--unrestricted`. * [BUG #1708](https://github.com/BurntSushi/ripgrep/issues/1708): diff --git a/crates/printer/src/json.rs b/crates/printer/src/json.rs index 5fcfe93e..b1a6fa2e 100644 --- a/crates/printer/src/json.rs +++ b/crates/printer/src/json.rs @@ -644,6 +644,16 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> { self.after_context_remaining == 0 } + /// Returns whether the current match count exceeds the configured limit. + /// If there is no limit, then this always returns false. + fn match_more_than_limit(&self) -> bool { + let limit = match self.json.config.max_matches { + None => return false, + Some(limit) => limit, + }; + self.match_count > limit + } + /// Write the "begin" message. fn write_begin_message(&mut self) -> io::Result<()> { if self.begin_printed { @@ -667,7 +677,20 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> { self.write_begin_message()?; self.match_count += 1; - self.after_context_remaining = searcher.after_context() as u64; + // When we've exceeded our match count, then the remaining context + // lines should not be reset, but instead, decremented. This avoids a + // bug where we display more matches than a configured limit. The main + // idea here is that 'matched' might be called again while printing + // an after-context line. In that case, we should treat this as a + // contextual line rather than a matching line for the purposes of + // termination. + if self.match_more_than_limit() { + self.after_context_remaining = + self.after_context_remaining.saturating_sub(1); + } else { + self.after_context_remaining = searcher.after_context() as u64; + } + self.record_matches(mat.bytes())?; self.stats.add_matches(self.json.matches.len() as u64); self.stats.add_matched_lines(mat.lines().count() as u64); @@ -871,6 +894,38 @@ and exhibited clearly, with a label attached.\ assert_eq!(got.lines().count(), 3); } + #[test] + fn max_matches_after_context() { + let haystack = "\ +a +b +c +d +e +d +e +d +e +d +e +"; + let matcher = RegexMatcher::new(r"d").unwrap(); + let mut printer = + JSONBuilder::new().max_matches(Some(1)).build(vec![]); + SearcherBuilder::new() + .after_context(2) + .build() + .search_reader( + &matcher, + haystack.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + let got = printer_contents(&mut printer); + + assert_eq!(got.lines().count(), 5); + } + #[test] fn no_match() { let matcher = RegexMatcher::new(r"DOES NOT MATCH").unwrap(); diff --git a/crates/printer/src/standard.rs b/crates/printer/src/standard.rs index 27f91c0c..31dc1ad8 100644 --- a/crates/printer/src/standard.rs +++ b/crates/printer/src/standard.rs @@ -724,6 +724,16 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> { } self.after_context_remaining == 0 } + + /// Returns whether the current match count exceeds the configured limit. + /// If there is no limit, then this always returns false. + fn match_more_than_limit(&self) -> bool { + let limit = match self.standard.config.max_matches { + None => return false, + Some(limit) => limit, + }; + self.match_count > limit + } } impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { @@ -735,7 +745,19 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> { mat: &SinkMatch, ) -> Result { self.match_count += 1; - self.after_context_remaining = searcher.after_context() as u64; + // When we've exceeded our match count, then the remaining context + // lines should not be reset, but instead, decremented. This avoids a + // bug where we display more matches than a configured limit. The main + // idea here is that 'matched' might be called again while printing + // an after-context line. In that case, we should treat this as a + // contextual line rather than a matching line for the purposes of + // termination. + if self.match_more_than_limit() { + self.after_context_remaining = + self.after_context_remaining.saturating_sub(1); + } else { + self.after_context_remaining = searcher.after_context() as u64; + } self.record_matches(mat.bytes())?; self.replace(mat.bytes())?; @@ -3413,4 +3435,40 @@ and xxx clearly, with a label attached. let got = printer_contents_ansi(&mut printer); assert!(!got.is_empty()); } + + #[test] + fn regression_after_context_with_match() { + let haystack = "\ +a +b +c +d +e +d +e +d +e +d +e +"; + + let matcher = RegexMatcherBuilder::new().build(r"d").unwrap(); + let mut printer = StandardBuilder::new() + .max_matches(Some(1)) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .after_context(2) + .build() + .search_reader( + &matcher, + haystack.as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "4:d\n5-e\n6:d\n"; + assert_eq_printed!(expected, got); + } } diff --git a/tests/regression.rs b/tests/regression.rs index 1bf239b8..8c132795 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -763,6 +763,28 @@ rgtest!(r1334_crazy_literals, |dir: Dir, mut cmd: TestCommand| { ); }); +// See: https://github.com/BurntSushi/ripgrep/issues/1380 +rgtest!(r1380, |dir: Dir, mut cmd: TestCommand| { + dir.create( + "foo", + "\ +a +b +c +d +e +d +e +d +e +d +e +", + ); + + eqnice!("d\ne\nd\n", cmd.args(&["-A2", "-m1", "d", "foo"]).stdout()); +}); + // See: https://github.com/BurntSushi/ripgrep/issues/1389 rgtest!(r1389_bad_symlinks_no_biscuit, |dir: Dir, mut cmd: TestCommand| { dir.create_dir("mydir");