diff --git a/CHANGELOG.md b/CHANGELOG.md index 3d147752..900162bf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -62,6 +62,8 @@ Bug fixes: * [BUG #1277](https://github.com/BurntSushi/ripgrep/issues/1277): Document cygwin path translation behavior in the FAQ. +* [BUG #1311](https://github.com/BurntSushi/ripgrep/issues/1311): + Fix multi-line bug where a search & replace for `\n` didn't work as expected. * [BUG #1642](https://github.com/BurntSushi/ripgrep/issues/1642): Fixes a bug where using `-m` and `-A` printed more matches than the limit. * [BUG #1703](https://github.com/BurntSushi/ripgrep/issues/1703): diff --git a/crates/printer/src/standard.rs b/crates/printer/src/standard.rs index 7657e41d..e7fe7c38 100644 --- a/crates/printer/src/standard.rs +++ b/crates/printer/src/standard.rs @@ -3224,6 +3224,80 @@ Holmeses, success in the province of detective work must always assert_eq_printed!(expected, got); } + // This is a somewhat weird test that checks the behavior of attempting + // to replace a line terminator with something else. + // + // See: https://github.com/BurntSushi/ripgrep/issues/1311 + #[test] + fn replacement_multi_line() { + let matcher = RegexMatcher::new(r"\n").unwrap(); + let mut printer = StandardBuilder::new() + .replacement(Some(b"?".to_vec())) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .multi_line(true) + .build() + .search_reader( + &matcher, + "hello\nworld\n".as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "1:hello?world?\n"; + assert_eq_printed!(expected, got); + } + + #[test] + fn replacement_multi_line_diff_line_term() { + let matcher = RegexMatcherBuilder::new() + .line_terminator(Some(b'\x00')) + .build(r"\n") + .unwrap(); + let mut printer = StandardBuilder::new() + .replacement(Some(b"?".to_vec())) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_terminator(LineTerminator::byte(b'\x00')) + .line_number(true) + .multi_line(true) + .build() + .search_reader( + &matcher, + "hello\nworld\n".as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "1:hello?world?\x00"; + assert_eq_printed!(expected, got); + } + + #[test] + fn replacement_multi_line_combine_lines() { + let matcher = RegexMatcher::new(r"\n(.)?").unwrap(); + let mut printer = StandardBuilder::new() + .replacement(Some(b"?$1".to_vec())) + .build(NoColor::new(vec![])); + SearcherBuilder::new() + .line_number(true) + .multi_line(true) + .build() + .search_reader( + &matcher, + "hello\nworld\n".as_bytes(), + printer.sink(&matcher), + ) + .unwrap(); + + let got = printer_contents(&mut printer); + let expected = "1:hello?world?\n"; + assert_eq_printed!(expected, got); + } + #[test] fn replacement_max_columns() { let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap(); diff --git a/crates/searcher/src/searcher/glue.rs b/crates/searcher/src/searcher/glue.rs index e73376a8..7d678d70 100644 --- a/crates/searcher/src/searcher/glue.rs +++ b/crates/searcher/src/searcher/glue.rs @@ -226,10 +226,19 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> { } Some(last_match) => { // If the lines in the previous match overlap with the lines - // in this match, then simply grow the match and move on. - // This happens when the next match begins on the same line - // that the last match ends on. - if last_match.end() > line.start() { + // in this match, then simply grow the match and move on. This + // happens when the next match begins on the same line that the + // last match ends on. + // + // Note that we do not technically require strict overlap here. + // Instead, we only require that the lines are adjacent. This + // provides larger blocks of lines to the printer, and results + // in overall better behavior with respect to how replacements + // are handled. + // + // See: https://github.com/BurntSushi/ripgrep/issues/1311 + // And also the associated commit fixing #1311. + if last_match.end() >= line.start() { self.last_match = Some(last_match.with_end(line.end())); Ok(true) } else { @@ -714,21 +723,23 @@ d haystack.push_str("zzz\n"); } haystack.push_str("a\n"); + haystack.push_str("zzz\n"); haystack.push_str("a\x00a\n"); + haystack.push_str("zzz\n"); haystack.push_str("a\n"); // The line buffered searcher has slightly different semantics here. // Namely, it will *always* detect binary data in the current buffer // before searching it. Thus, the total number of bytes searched is // smaller than below. - let exp = "0:a\n\nbyte count:262146\nbinary offset:262149\n"; + let exp = "0:a\n\nbyte count:262146\nbinary offset:262153\n"; // In contrast, the slice readers (for multi line as well) will only // look for binary data in the initial chunk of bytes. After that // point, it only looks for binary data in matches. Note though that // the binary offset remains the same. (See the binary4 test for a case // where the offset is explicitly different.) let exp_slice = - "0:a\n262146:a\n\nbyte count:262149\nbinary offset:262149\n"; + "0:a\n262146:a\n\nbyte count:262153\nbinary offset:262153\n"; SearcherTester::new(&haystack, "a") .binary_detection(BinaryDetection::quit(0)) diff --git a/tests/json.rs b/tests/json.rs index 477b36df..97d8e719 100644 --- a/tests/json.rs +++ b/tests/json.rs @@ -323,24 +323,19 @@ rgtest!(r1095_crlf_empty_match, |dir: Dir, mut cmd: TestCommand| { // Check without --crlf flag. let msgs = json_decode(&cmd.arg("-U").arg("--json").arg("\n").stdout()); - assert_eq!(msgs.len(), 5); + assert_eq!(msgs.len(), 4); let m = msgs[1].unwrap_match(); - assert_eq!(m.lines, Data::text("test\r\n")); - assert_eq!(m.submatches[0].m, Data::text("\n")); - - let m = msgs[2].unwrap_match(); - assert_eq!(m.lines, Data::text("\n")); + assert_eq!(m.lines, Data::text("test\r\n\n")); assert_eq!(m.submatches[0].m, Data::text("\n")); + assert_eq!(m.submatches[1].m, Data::text("\n")); // Now check with --crlf flag. let msgs = json_decode(&cmd.arg("--crlf").stdout()); + assert_eq!(msgs.len(), 4); let m = msgs[1].unwrap_match(); - assert_eq!(m.lines, Data::text("test\r\n")); - assert_eq!(m.submatches[0].m, Data::text("\n")); - - let m = msgs[2].unwrap_match(); - assert_eq!(m.lines, Data::text("\n")); + assert_eq!(m.lines, Data::text("test\r\n\n")); assert_eq!(m.submatches[0].m, Data::text("\n")); + assert_eq!(m.submatches[1].m, Data::text("\n")); }); diff --git a/tests/regression.rs b/tests/regression.rs index 203ac140..94e62969 100644 --- a/tests/regression.rs +++ b/tests/regression.rs @@ -744,6 +744,15 @@ rgtest!(r1259_drop_last_byte_nonl, |dir: Dir, mut cmd: TestCommand| { eqnice!("fz\n", cmd.arg("-f").arg("patterns-nl").arg("test").stdout()); }); +// See: https://github.com/BurntSushi/ripgrep/issues/1311 +rgtest!(r1311_multi_line_term_replace, |dir: Dir, mut cmd: TestCommand| { + dir.create("input", "hello\nworld\n"); + eqnice!( + "1:hello?world?\n", + cmd.args(&["-U", "-r?", "-n", "\n", "input"]).stdout() + ); +}); + // See: https://github.com/BurntSushi/ripgrep/issues/1319 rgtest!(r1319, |dir: Dir, mut cmd: TestCommand| { dir.create("input", "CCAGCTACTCGGGAGGCTGAGGCTGGAGGATCGCTTGAGTCCAGGAGTTC");