mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-08-18 05:33:49 -07:00
test: check binary file detection when using memory maps
This resolves a TODO comment I wrote a while back. Memory maps behave a little differently in terms of detecting binary data, so the tests have somewhat different results than the tests that disable memory maps. Closes #3002
This commit is contained in:
122
tests/binary.rs
122
tests/binary.rs
@@ -4,8 +4,8 @@ use crate::util::{Dir, TestCommand};
|
||||
// handling of binary files. There's quite a bit of discussion on this in this
|
||||
// bug report: https://github.com/BurntSushi/ripgrep/issues/306
|
||||
|
||||
// Our haystack is the first 500 lines of Gutenberg's copy of "A Study in
|
||||
// Scarlet," with a NUL byte at line 1898: `abcdef\x00`.
|
||||
// Our haystack is the first 2,133 lines of Gutenberg's copy of "A Study in
|
||||
// Scarlet," with a NUL byte at line 1870: `abcdef\x00`.
|
||||
//
|
||||
// The position and size of the haystack is, unfortunately, significant. In
|
||||
// particular, the NUL byte is specifically inserted at some point *after* the
|
||||
@@ -21,10 +21,124 @@ use crate::util::{Dir, TestCommand};
|
||||
// detection with memory maps is a bit different. Namely, NUL bytes are only
|
||||
// searched for in the first few KB of the file and in a match. Normally, NUL
|
||||
// bytes are searched for everywhere.
|
||||
//
|
||||
// TODO: Add tests for binary file detection when using memory maps.
|
||||
const HAY: &'static [u8] = include_bytes!("./data/sherlock-nul.txt");
|
||||
|
||||
// Tests for binary file detection when using memory maps.
|
||||
// As noted in the original comments, with memory maps binary detection
|
||||
// works differently - NUL bytes are only searched for in the first few KB
|
||||
// of the file and in matches.
|
||||
|
||||
// Test that matches in a binary file with memory maps work as expected
|
||||
// with implicit file search (via glob pattern).
|
||||
rgtest!(mmap_match_implicit, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create_bytes("hay", HAY);
|
||||
cmd.args(&["--mmap", "-n", "Project Gutenberg EBook", "-g", "hay"]);
|
||||
|
||||
// With mmap, we get a match and a warning about binary content
|
||||
let expected = "\
|
||||
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
||||
// Test with an explicit file argument when using memory maps.
|
||||
rgtest!(mmap_match_explicit, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create_bytes("hay", HAY);
|
||||
cmd.args(&["--mmap", "-n", "Project Gutenberg EBook", "hay"]);
|
||||
|
||||
let expected = "\
|
||||
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
||||
// Test specifically with a pattern that matches near the NUL byte which should
|
||||
// trigger binary detection with memory maps.
|
||||
rgtest!(mmap_match_near_nul, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create_bytes("hay", HAY);
|
||||
// Pattern that matches around line 1898 where the NUL byte is.
|
||||
// Note: Using direct file path instead of glob.
|
||||
cmd.args(&["--mmap", "-n", "abcdef", "hay"]);
|
||||
|
||||
let expected = "\
|
||||
binary file matches (found \"\\0\" byte around offset 77041)
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
||||
// Test with --count option to ensure full file scanning works with mmap.
|
||||
rgtest!(mmap_match_count, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create_bytes("hay", HAY);
|
||||
cmd.args(&["--mmap", "-c", "Project Gutenberg EBook|Heaven", "hay"]);
|
||||
|
||||
// With mmap, since we're counting all matches and might not
|
||||
// encounter the NUL byte during initial detection, the count
|
||||
// should still be reported.
|
||||
eqnice!("2\n", cmd.stdout());
|
||||
});
|
||||
|
||||
// Test binary detection with mmap when pattern would match before and after NUL
|
||||
// byte.
|
||||
rgtest!(mmap_match_multiple, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create_bytes("hay", HAY);
|
||||
// Use explicit file path.
|
||||
cmd.args(&["--mmap", "-n", "Project Gutenberg EBook|Heaven", "hay"]);
|
||||
|
||||
// With explicit file and memory maps, matches before and after NUL byte
|
||||
// are shown.
|
||||
let expected = "\
|
||||
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||
1871:\"No. Heaven knows what the objects of his studies are. But here we
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
||||
// Test that --binary flag can have odd results when searching with a memory
|
||||
// map.
|
||||
rgtest!(mmap_binary_flag, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create_bytes("hay", HAY);
|
||||
// Use glob pattern.
|
||||
cmd.args(&["--mmap", "-n", "--binary", "Heaven", "-g", "hay"]);
|
||||
|
||||
let expected = "\
|
||||
hay:1871:\"No. Heaven knows what the objects of his studies are. But here we
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
||||
// Test that using -a/--text flag works as expected with mmap.
|
||||
rgtest!(mmap_text_flag, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create_bytes("hay", HAY);
|
||||
cmd.args(&["--mmap", "-n", "--text", "Heaven", "-g", "hay"]);
|
||||
|
||||
// With --text flag, binary detection should be disabled.
|
||||
let expected = "\
|
||||
hay:1871:\"No. Heaven knows what the objects of his studies are. But here we
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
||||
// Test pattern that matches before and after the NUL byte with memory maps.
|
||||
rgtest!(mmap_after_nul_match, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create_bytes("hay", HAY);
|
||||
// Use explicit file path.
|
||||
cmd.args(&["--mmap", "-n", "medical student", "hay"]);
|
||||
|
||||
// With explicit file and memory maps, all matches are shown
|
||||
let expected = "\
|
||||
176:\"A medical student, I suppose?\" said I.
|
||||
409:\"A medical student, I suppose?\" said I.
|
||||
642:\"A medical student, I suppose?\" said I.
|
||||
875:\"A medical student, I suppose?\" said I.
|
||||
1108:\"A medical student, I suppose?\" said I.
|
||||
1341:\"A medical student, I suppose?\" said I.
|
||||
1574:\"A medical student, I suppose?\" said I.
|
||||
1807:\"A medical student, I suppose?\" said I.
|
||||
1867:\"And yet you say he is not a medical student?\"
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
||||
// This tests that ripgrep prints a warning message if it finds and prints a
|
||||
// match in a binary file before detecting that it is a binary file. The point
|
||||
// here is to notify that user that the search of the file is only partially
|
||||
|
Reference in New Issue
Block a user