mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-08-22 23:53:48 -07:00
This resolves a TODO comment I wrote a while back. Memory maps behave a little differently in terms of detecting binary data, so the tests have somewhat different results than the tests that disable memory maps. Closes #3002
421 lines
16 KiB
Rust
421 lines
16 KiB
Rust
use crate::util::{Dir, TestCommand};
|
|
|
|
// This file contains a smattering of tests specifically for checking ripgrep's
|
|
// handling of binary files. There's quite a bit of discussion on this in this
|
|
// bug report: https://github.com/BurntSushi/ripgrep/issues/306
|
|
|
|
// Our haystack is the first 2,133 lines of Gutenberg's copy of "A Study in
|
|
// Scarlet," with a NUL byte at line 1870: `abcdef\x00`.
|
|
//
|
|
// The position and size of the haystack is, unfortunately, significant. In
|
|
// particular, the NUL byte is specifically inserted at some point *after* the
|
|
// first 65,536 bytes, which corresponds to the initial capacity of the buffer
|
|
// that ripgrep uses to read files. (grep for DEFAULT_BUFFER_CAPACITY.) The
|
|
// position of the NUL byte ensures that we can execute some search on the
|
|
// initial buffer contents without ever detecting any binary data. Moreover,
|
|
// when using a memory map for searching, only the first 65,536 bytes are
|
|
// scanned for a NUL byte, so no binary bytes are detected at all when using
|
|
// a memory map (unless our query matches line 1898).
|
|
//
|
|
// One last note: in the tests below, we use --no-mmap heavily because binary
|
|
// detection with memory maps is a bit different. Namely, NUL bytes are only
|
|
// searched for in the first few KB of the file and in a match. Normally, NUL
|
|
// bytes are searched for everywhere.
|
|
const HAY: &'static [u8] = include_bytes!("./data/sherlock-nul.txt");
|
|
|
|
// Tests for binary file detection when using memory maps.
|
|
// As noted in the original comments, with memory maps binary detection
|
|
// works differently - NUL bytes are only searched for in the first few KB
|
|
// of the file and in matches.
|
|
|
|
// Test that matches in a binary file with memory maps work as expected
|
|
// with implicit file search (via glob pattern).
|
|
rgtest!(mmap_match_implicit, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--mmap", "-n", "Project Gutenberg EBook", "-g", "hay"]);
|
|
|
|
// With mmap, we get a match and a warning about binary content
|
|
let expected = "\
|
|
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Test with an explicit file argument when using memory maps.
|
|
rgtest!(mmap_match_explicit, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--mmap", "-n", "Project Gutenberg EBook", "hay"]);
|
|
|
|
let expected = "\
|
|
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Test specifically with a pattern that matches near the NUL byte which should
|
|
// trigger binary detection with memory maps.
|
|
rgtest!(mmap_match_near_nul, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
// Pattern that matches around line 1898 where the NUL byte is.
|
|
// Note: Using direct file path instead of glob.
|
|
cmd.args(&["--mmap", "-n", "abcdef", "hay"]);
|
|
|
|
let expected = "\
|
|
binary file matches (found \"\\0\" byte around offset 77041)
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Test with --count option to ensure full file scanning works with mmap.
|
|
rgtest!(mmap_match_count, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--mmap", "-c", "Project Gutenberg EBook|Heaven", "hay"]);
|
|
|
|
// With mmap, since we're counting all matches and might not
|
|
// encounter the NUL byte during initial detection, the count
|
|
// should still be reported.
|
|
eqnice!("2\n", cmd.stdout());
|
|
});
|
|
|
|
// Test binary detection with mmap when pattern would match before and after NUL
|
|
// byte.
|
|
rgtest!(mmap_match_multiple, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
// Use explicit file path.
|
|
cmd.args(&["--mmap", "-n", "Project Gutenberg EBook|Heaven", "hay"]);
|
|
|
|
// With explicit file and memory maps, matches before and after NUL byte
|
|
// are shown.
|
|
let expected = "\
|
|
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
|
1871:\"No. Heaven knows what the objects of his studies are. But here we
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Test that --binary flag can have odd results when searching with a memory
|
|
// map.
|
|
rgtest!(mmap_binary_flag, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
// Use glob pattern.
|
|
cmd.args(&["--mmap", "-n", "--binary", "Heaven", "-g", "hay"]);
|
|
|
|
let expected = "\
|
|
hay:1871:\"No. Heaven knows what the objects of his studies are. But here we
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Test that using -a/--text flag works as expected with mmap.
|
|
rgtest!(mmap_text_flag, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--mmap", "-n", "--text", "Heaven", "-g", "hay"]);
|
|
|
|
// With --text flag, binary detection should be disabled.
|
|
let expected = "\
|
|
hay:1871:\"No. Heaven knows what the objects of his studies are. But here we
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Test pattern that matches before and after the NUL byte with memory maps.
|
|
rgtest!(mmap_after_nul_match, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
// Use explicit file path.
|
|
cmd.args(&["--mmap", "-n", "medical student", "hay"]);
|
|
|
|
// With explicit file and memory maps, all matches are shown
|
|
let expected = "\
|
|
176:\"A medical student, I suppose?\" said I.
|
|
409:\"A medical student, I suppose?\" said I.
|
|
642:\"A medical student, I suppose?\" said I.
|
|
875:\"A medical student, I suppose?\" said I.
|
|
1108:\"A medical student, I suppose?\" said I.
|
|
1341:\"A medical student, I suppose?\" said I.
|
|
1574:\"A medical student, I suppose?\" said I.
|
|
1807:\"A medical student, I suppose?\" said I.
|
|
1867:\"And yet you say he is not a medical student?\"
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// This tests that ripgrep prints a warning message if it finds and prints a
|
|
// match in a binary file before detecting that it is a binary file. The point
|
|
// here is to notify that user that the search of the file is only partially
|
|
// complete.
|
|
//
|
|
// This applies to files that are *implicitly* searched via a recursive
|
|
// directory traversal. In particular, this results in a WARNING message being
|
|
// printed. We make our file "implicit" by doing a recursive search with a glob
|
|
// that matches our file.
|
|
rgtest!(after_match1_implicit, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--no-mmap", "-n", "Project Gutenberg EBook", "-g", "hay"]);
|
|
|
|
let expected = "\
|
|
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
|
hay: WARNING: stopped searching binary file after match (found \"\\0\" byte around offset 77041)
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Like after_match1_implicit, except we provide a file to search
|
|
// explicitly. This results in identical behavior, but a different message.
|
|
rgtest!(after_match1_explicit, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--no-mmap", "-n", "Project Gutenberg EBook", "hay"]);
|
|
|
|
let expected = "\
|
|
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
|
binary file matches (found \"\\0\" byte around offset 77041)
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Like after_match1_explicit, except we feed our content on stdin.
|
|
rgtest!(after_match1_stdin, |_: Dir, mut cmd: TestCommand| {
|
|
cmd.args(&["--no-mmap", "-n", "Project Gutenberg EBook"]);
|
|
|
|
let expected = "\
|
|
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
|
binary file matches (found \"\\0\" byte around offset 77041)
|
|
";
|
|
eqnice!(expected, cmd.pipe(HAY));
|
|
});
|
|
|
|
// Like after_match1_implicit, but provides the --binary flag, which
|
|
// disables binary filtering. Thus, this matches the behavior of ripgrep as
|
|
// if the file were given explicitly.
|
|
rgtest!(after_match1_implicit_binary, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&[
|
|
"--no-mmap",
|
|
"-n",
|
|
"--binary",
|
|
"Project Gutenberg EBook",
|
|
"-g",
|
|
"hay",
|
|
]);
|
|
|
|
let expected = "\
|
|
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
|
hay: binary file matches (found \"\\0\" byte around offset 77041)
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Like after_match1_implicit, but enables -a/--text, so no binary
|
|
// detection should be performed.
|
|
rgtest!(after_match1_implicit_text, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&[
|
|
"--no-mmap",
|
|
"-n",
|
|
"--text",
|
|
"Project Gutenberg EBook",
|
|
"-g",
|
|
"hay",
|
|
]);
|
|
|
|
let expected = "\
|
|
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Like after_match1_implicit_text, but enables -a/--text, so no binary
|
|
// detection should be performed.
|
|
rgtest!(after_match1_explicit_text, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--no-mmap", "-n", "--text", "Project Gutenberg EBook", "hay"]);
|
|
|
|
let expected = "\
|
|
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Like after_match1_implicit, except this asks ripgrep to print all matching
|
|
// files.
|
|
//
|
|
// This is an interesting corner case that one might consider a bug, however,
|
|
// it's unlikely to be fixed. Namely, ripgrep probably shouldn't print `hay`
|
|
// as a matching file since it is in fact a binary file, and thus should be
|
|
// filtered out by default. However, the --files-with-matches flag will print
|
|
// out the path of a matching file as soon as a match is seen and then stop
|
|
// searching completely. Therefore, the NUL byte is never actually detected.
|
|
//
|
|
// The only way to fix this would be to kill ripgrep's performance in this case
|
|
// and continue searching the entire file for a NUL byte. (Similarly if the
|
|
// --quiet flag is set. See the next test.)
|
|
rgtest!(after_match1_implicit_path, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--no-mmap", "-l", "Project Gutenberg EBook", "-g", "hay"]);
|
|
eqnice!("hay\n", cmd.stdout());
|
|
});
|
|
|
|
// Like after_match1_implicit_path, except this indicates that a match was
|
|
// found with no other output. (This is the same bug described above, but
|
|
// manifest as an exit code with no output.)
|
|
rgtest!(after_match1_implicit_quiet, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--no-mmap", "-q", "Project Gutenberg EBook", "-g", "hay"]);
|
|
eqnice!("", cmd.stdout());
|
|
});
|
|
|
|
// This sets up the same test as after_match1_implicit_path, but instead of
|
|
// just printing the matching files, this includes the full count of matches.
|
|
// In this case, we need to search the entire file, so ripgrep correctly
|
|
// detects the binary data and suppresses output.
|
|
rgtest!(after_match1_implicit_count, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--no-mmap", "-c", "Project Gutenberg EBook", "-g", "hay"]);
|
|
cmd.assert_err();
|
|
});
|
|
|
|
// Like after_match1_implicit_count, except the --binary flag is provided,
|
|
// which makes ripgrep disable binary data filtering even for implicit files.
|
|
rgtest!(
|
|
after_match1_implicit_count_binary,
|
|
|dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&[
|
|
"--no-mmap",
|
|
"-c",
|
|
"--binary",
|
|
"Project Gutenberg EBook",
|
|
"-g",
|
|
"hay",
|
|
]);
|
|
eqnice!("hay:1\n", cmd.stdout());
|
|
}
|
|
);
|
|
|
|
// Like after_match1_implicit_count, except the file path is provided
|
|
// explicitly, so binary filtering is disabled and a count is correctly
|
|
// reported.
|
|
rgtest!(after_match1_explicit_count, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--no-mmap", "-c", "Project Gutenberg EBook", "hay"]);
|
|
eqnice!("1\n", cmd.stdout());
|
|
});
|
|
|
|
// This tests that a match way before the NUL byte is shown, but a match after
|
|
// the NUL byte is not.
|
|
rgtest!(after_match2_implicit, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&[
|
|
"--no-mmap",
|
|
"-n",
|
|
"Project Gutenberg EBook|a medical student",
|
|
"-g",
|
|
"hay",
|
|
]);
|
|
|
|
let expected = "\
|
|
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
|
hay: WARNING: stopped searching binary file after match (found \"\\0\" byte around offset 77041)
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Like after_match2_implicit, but enables -a/--text, so no binary
|
|
// detection should be performed.
|
|
rgtest!(after_match2_implicit_text, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&[
|
|
"--no-mmap",
|
|
"-n",
|
|
"--text",
|
|
"Project Gutenberg EBook|a medical student",
|
|
"-g",
|
|
"hay",
|
|
]);
|
|
|
|
let expected = "\
|
|
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
|
hay:1867:\"And yet you say he is not a medical student?\"
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// This tests that ripgrep *silently* quits before finding a match that occurs
|
|
// after a NUL byte.
|
|
rgtest!(before_match1_implicit, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--no-mmap", "-n", "Heaven", "-g", "hay"]);
|
|
cmd.assert_err();
|
|
});
|
|
|
|
// This tests that ripgrep *does not* silently quit before finding a match that
|
|
// occurs after a NUL byte when a file is explicitly searched.
|
|
rgtest!(before_match1_explicit, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--no-mmap", "-n", "Heaven", "hay"]);
|
|
|
|
let expected = "\
|
|
binary file matches (found \"\\0\" byte around offset 77041)
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Like before_match1_implicit, but enables the --binary flag, which
|
|
// disables binary filtering. Thus, this matches the behavior of ripgrep as if
|
|
// the file were given explicitly.
|
|
rgtest!(before_match1_implicit_binary, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--no-mmap", "-n", "--binary", "Heaven", "-g", "hay"]);
|
|
|
|
let expected = "\
|
|
hay: binary file matches (found \"\\0\" byte around offset 77041)
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Like before_match1_implicit, but enables -a/--text, so no binary
|
|
// detection should be performed.
|
|
rgtest!(before_match1_implicit_text, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--no-mmap", "-n", "--text", "Heaven", "-g", "hay"]);
|
|
|
|
let expected = "\
|
|
hay:1871:\"No. Heaven knows what the objects of his studies are. But here we
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// This tests that ripgrep *silently* quits before finding a match that occurs
|
|
// before a NUL byte, but within the same buffer as the NUL byte.
|
|
rgtest!(before_match2_implicit, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--no-mmap", "-n", "a medical student", "-g", "hay"]);
|
|
cmd.assert_err();
|
|
});
|
|
|
|
// This tests that ripgrep *does not* silently quit before finding a match that
|
|
// occurs before a NUL byte, but within the same buffer as the NUL byte. Even
|
|
// though the match occurs before the NUL byte, ripgrep still doesn't print it
|
|
// because it has already scanned ahead to detect the NUL byte. (This matches
|
|
// the behavior of GNU grep.)
|
|
rgtest!(before_match2_explicit, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--no-mmap", "-n", "a medical student", "hay"]);
|
|
|
|
let expected = "\
|
|
binary file matches (found \"\\0\" byte around offset 77041)
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|
|
|
|
// Like before_match1_implicit, but enables -a/--text, so no binary
|
|
// detection should be performed.
|
|
rgtest!(before_match2_implicit_text, |dir: Dir, mut cmd: TestCommand| {
|
|
dir.create_bytes("hay", HAY);
|
|
cmd.args(&["--no-mmap", "-n", "--text", "a medical student", "-g", "hay"]);
|
|
|
|
let expected = "\
|
|
hay:1867:\"And yet you say he is not a medical student?\"
|
|
";
|
|
eqnice!(expected, cmd.stdout());
|
|
});
|