From b034b777984ab0eeeafd82119e75c60a80254b5d Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 25 Sep 2016 21:26:49 -0400 Subject: [PATCH] Don't replace NUL bytes when searching binary files as text. This was a result of misinterpreting a feature in grep where NUL bytes are replaced with \n. The primary reason for doing this is to avoid excessive memory usage on truly binary data. However, grep only does this when searching binary files as if they were binary, and which only reports whether the file matched or not. When grep is told to search binary data as text (the -a/--text flag), then it doesn't do any replacement so we shouldn't either. In general, this makes sense, because the user is essentially asserting that a particular file that looks like binary is actually text. In that case, we shouldn't try to replace any NUL bytes. ripgrep doesn't actually support searching binary data for whether it matches or not, so we don't actually need the replace_buf function. However, it does seem like a potentially useful feature. --- src/search_stream.rs | 7 ++----- tests/tests.rs | 10 +++++++--- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/search_stream.rs b/src/search_stream.rs index 0b5bfaf6..3407366e 100644 --- a/src/search_stream.rs +++ b/src/search_stream.rs @@ -532,10 +532,6 @@ impl InputBuffer { if self.first && is_binary(&self.buf[self.end..self.end + n]) { self.is_binary = true; } - if self.is_binary { - replace_buf( - &mut self.buf[self.end..self.end + n], b'\x00', self.eol); - } self.first = false; // We assume that reading 0 bytes means we've hit EOF. if n == 0 { @@ -658,6 +654,7 @@ pub fn count_lines(buf: &[u8], eol: u8) -> u64 { } /// Replaces a with b in buf. +#[allow(dead_code)] fn replace_buf(buf: &mut [u8], a: u8, b: u8) { if a == b { return; @@ -999,7 +996,7 @@ fn main() { let text = "Sherlock\n\x00Holmes\n"; let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true)); assert_eq!(2, count); - assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:Holmes\n"); + assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n"); } #[test] diff --git a/tests/tests.rs b/tests/tests.rs index a0b01c6a..3ae568e7 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -138,7 +138,11 @@ be, to a very large extent, the result of luck. Sherlock Holmes foo Sherlock Holmes lives on Baker Street. "; - assert!(lines == expected1 || lines == expected2); + if lines != expected1 { + assert_eq!(lines, expected2); + } else { + assert_eq!(lines, expected1); + } }); sherlock!(inverted, |wd: WorkDir, mut cmd: Command| { @@ -587,7 +591,7 @@ sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-uuu"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file:foo\nfile:foo\n"); + assert_eq!(lines, "file:foo\x00bar\nfile:foo\x00baz\n"); }); // On Windows, this test uses memory maps, so the NUL bytes don't get replaced. @@ -785,7 +789,7 @@ fn binary_search_no_mmap() { let mut cmd = wd.command(); cmd.arg("-a").arg("--no-mmap").arg("foo").arg("file"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo\nfoo\n"); + assert_eq!(lines, "foo\x00bar\nfoo\x00baz\n"); } #[test]