From f007f940c53a4818ead58f2fe2e0fac95cc3a40a Mon Sep 17 00:00:00 2001 From: Balaji Sivaraman Date: Sun, 7 Jan 2018 21:35:58 +0530 Subject: [PATCH] search: add support for searching compressed files This commit adds opt-in support for searching compressed files during recursive search. This behavior is only enabled when the `-z/--search-zip` flag is passed to ripgrep. When enabled, a limited set of common compression formats are recognized via file extension, and a new process is spawned to perform the decompression. ripgrep then searches the stdout of that spawned process. Closes #539 --- .travis.yml | 4 +- Cargo.lock | 1 + Cargo.toml | 1 + README.md | 8 +- complete/_rg | 1 + doc/rg.1 | 13 ++- doc/rg.1.md | 7 ++ ignore/src/types.rs | 4 + src/app.rs | 14 ++- src/args.rs | 3 + src/decompressor.rs | 191 +++++++++++++++++++++++++++++++++++++++ src/main.rs | 2 + src/worker.rs | 46 +++++++--- tests/data/sherlock.bz2 | Bin 0 -> 272 bytes tests/data/sherlock.gz | Bin 0 -> 263 bytes tests/data/sherlock.lzma | Bin 0 -> 286 bytes tests/data/sherlock.xz | Bin 0 -> 332 bytes tests/tests.rs | 102 +++++++++++++++++++++ 18 files changed, 373 insertions(+), 24 deletions(-) create mode 100644 src/decompressor.rs create mode 100644 tests/data/sherlock.bz2 create mode 100644 tests/data/sherlock.gz create mode 100644 tests/data/sherlock.lzma create mode 100644 tests/data/sherlock.xz diff --git a/.travis.yml b/.travis.yml index a1b43c58..ee46cd7c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -9,8 +9,10 @@ env: addons: apt: packages: - # Needed for completion-function test + # Needed for completion-function test. - zsh + # Needed for testing decompression search. + - xz-utils matrix: fast_finish: true diff --git a/Cargo.lock b/Cargo.lock index d2c30bd1..9b80e479 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -237,6 +237,7 @@ dependencies = [ "clap 2.29.0 (registry+https://github.com/rust-lang/crates.io-index)", "encoding_rs 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + "globset 0.2.1", "grep 0.1.7", "ignore 0.3.1", "lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index 924a2397..6a250fa0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -49,6 +49,7 @@ num_cpus = "1" regex = "0.2.4" same-file = "1" termcolor = { version = "0.3.3", path = "termcolor" } +globset = { version = "0.2.1", path = "globset" } [build-dependencies] clap = "2.26" diff --git a/README.md b/README.md index 27aead57..ac676863 100644 --- a/README.md +++ b/README.md @@ -91,6 +91,8 @@ increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep. as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for automatically detecting UTF-16 is provided. Other text encodings must be specifically specified with the `-E/--encoding` flag.) +* `ripgrep` supports searching files compressed in a common format (gzip, xz, + lzma or bzip2 current) with the `-z/--search-zip` flag. In other words, use `ripgrep` if you like speed, filtering by default, fewer bugs, and Unicode support. @@ -109,12 +111,10 @@ give you a glimpse at some important downsides or missing features of support for Unicode categories (e.g., `\p{Sc}` to match currency symbols or `\p{Lu}` to match any uppercase letter). (Fancier regexes will never be supported.) -* `ripgrep` doesn't yet support searching compressed files. (Likely to be - supported in the future.) * `ripgrep` doesn't have multiline search. (Unlikely to ever be supported.) -In other words, if you like fancy regexes, searching compressed files or -multiline search, then `ripgrep` may not quite meet your needs (yet). +In other words, if you like fancy regexes or multiline search, then `ripgrep` +may not quite meet your needs (yet). ### Feature comparison diff --git a/complete/_rg b/complete/_rg index 8455c804..6b62c169 100644 --- a/complete/_rg +++ b/complete/_rg @@ -87,6 +87,7 @@ _rg() { '(-w -x --line-regexp --word-regexp)'{-w,--word-regexp}'[only show matches surrounded by word boundaries]' '(-e -f --file --files --regexp --type-list)1: :_rg_pattern' '(--type-list)*:file:_files' + '(-z --search-zip)'{-z,--search-zip}'[search in compressed files]' ) [[ ${_RG_COMPLETE_LIST_ARGS:-} == (1|t*|y*) ]] && { diff --git a/doc/rg.1 b/doc/rg.1 index d8272e74..fd562e81 100644 --- a/doc/rg.1 +++ b/doc/rg.1 @@ -184,6 +184,15 @@ Only show matches surrounded by line boundaries. This is equivalent to putting ^...$ around the search pattern. .RS .RE +.TP +.B \-z, \-\-search\-zip +Search in compressed files. +Currently gz, bz2, xz and lzma formats are supported. +.RS +.PP +Note that ripgrep expects to find the decompression binaries for the +respective formats in your system\[aq]s PATH for use with this flag. +.RE .SH LESS COMMON OPTIONS .TP .B \-A, \-\-after\-context \f[I]NUM\f[] @@ -437,9 +446,7 @@ such part on a separate output line. .TP .B \-\-passthru, \-\-passthrough Show both matching and non\-matching lines. -This is equivalent to adding ^ to the list of search patterns. -This option overrides \-\-count and cannot be used with -\-\-only\-matching or \-\-replace. +This option cannot be used with \-\-only\-matching or \-\-replace. .RS .RE .TP diff --git a/doc/rg.1.md b/doc/rg.1.md index 93b401ab..6b054286 100644 --- a/doc/rg.1.md +++ b/doc/rg.1.md @@ -125,6 +125,13 @@ Project home page: https://github.com/BurntSushi/ripgrep : Only show matches surrounded by line boundaries. This is equivalent to putting ^...$ around the search pattern. +-z, --search-zip +: Search in compressed files. Currently gz, bz2, xz and lzma + formats are supported. + + Note that ripgrep expects to find the decompression binaries for the + respective formats in your system's PATH for use with this flag. + # LESS COMMON OPTIONS -A, --after-context *NUM* diff --git a/ignore/src/types.rs b/ignore/src/types.rs index d231b577..a9a9687e 100644 --- a/ignore/src/types.rs +++ b/ignore/src/types.rs @@ -103,6 +103,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[ ("avro", &["*.avdl", "*.avpr", "*.avsc"]), ("awk", &["*.awk"]), ("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]), + ("bzip2", &["*.bz2"]), ("c", &["*.c", "*.h", "*.H"]), ("cabal", &["*.cabal"]), ("cbor", &["*.cbor"]), @@ -137,6 +138,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[ ("fsharp", &["*.fs", "*.fsx", "*.fsi"]), ("gn", &["*.gn", "*.gni"]), ("go", &["*.go"]), + ("gzip", &["*.gz"]), ("groovy", &["*.groovy", "*.gradle"]), ("h", &["*.h", "*.hpp"]), ("hbs", &["*.hbs"]), @@ -184,6 +186,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[ ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]), ("log", &["*.log"]), ("lua", &["*.lua"]), + ("lzma", &["*.lzma"]), ("m4", &["*.ac", "*.m4"]), ("make", &[ "gnumakefile", "Gnumakefile", "GNUmakefile", @@ -276,6 +279,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[ ("wiki", &["*.mediawiki", "*.wiki"]), ("webidl", &["*.idl", "*.webidl", "*.widl"]), ("xml", &["*.xml", "*.xml.dist"]), + ("xz", &["*.xz"]), ("yacc", &["*.y"]), ("yaml", &["*.yaml", "*.yml"]), ("zsh", &[ diff --git a/src/app.rs b/src/app.rs index 3133e1e2..f2f084e3 100644 --- a/src/app.rs +++ b/src/app.rs @@ -191,6 +191,7 @@ pub fn app() -> App<'static, 'static> { .arg(flag("type-clear") .value_name("TYPE").takes_value(true) .multiple(true).number_of_values(1)) + .arg(flag("search-zip").short("z")) } struct Usage { @@ -450,7 +451,8 @@ lazy_static! { can be specified by using the --ignore-file flag several times. \ When specifying multiple ignore files, earlier files have lower \ precedence than later files. If you are looking for a way to \ - include or exclude files and directories directly used -g instead."); + include or exclude files and directories directly used -g \ + instead."); doc!(h, "follow", "Follow symbolic links."); doc!(h, "max-count", @@ -592,6 +594,11 @@ lazy_static! { only clears the default type definitions that are found inside \ of ripgrep.\n\nNote that this MUST be passed to every \ invocation of ripgrep. Type settings are NOT persisted."); + doc!(h, "search-zip", + "Search in compressed files.", + "Search in compressed files. Currently gz, bz2, xz, and \ + lzma files are supported. This option expects the decompression \ + binaries to be available in the system PATH."); h }; @@ -599,8 +606,9 @@ lazy_static! { fn validate_line_number_width(s: String) -> Result<(), String> { if s.starts_with("0") { - Err(String::from("Custom padding characters are currently not supported. \ - Please enter only a numeric value.")) + Err(String::from( + "Custom padding characters are currently not supported. \ + Please enter only a numeric value.")) } else { validate_number(s) } diff --git a/src/args.rs b/src/args.rs index 56dacc97..030adf0f 100644 --- a/src/args.rs +++ b/src/args.rs @@ -77,6 +77,7 @@ pub struct Args { type_list: bool, types: Types, with_filename: bool, + search_zip_files: bool } impl Args { @@ -229,6 +230,7 @@ impl Args { .no_messages(self.no_messages) .quiet(self.quiet) .text(self.text) + .search_zip_files(self.search_zip_files) .build() } @@ -365,6 +367,7 @@ impl<'a> ArgMatches<'a> { type_list: self.is_present("type-list"), types: self.types()?, with_filename: with_filename, + search_zip_files: self.is_present("search-zip") }; if args.mmap { debug!("will try to use memory maps"); diff --git a/src/decompressor.rs b/src/decompressor.rs new file mode 100644 index 00000000..a94948af --- /dev/null +++ b/src/decompressor.rs @@ -0,0 +1,191 @@ +use std::collections::HashMap; +use std::ffi::OsStr; +use std::fmt; +use std::io::{self, Read}; +use std::path::Path; +use std::process::{self, Stdio}; + +use globset::{Glob, GlobSet, GlobSetBuilder}; + +/// A decompression command, contains the command to be spawned as well as any +/// necessary CLI args. +#[derive(Clone, Copy, Debug)] +struct DecompressionCommand { + cmd: &'static str, + args: &'static [&'static str], +} + +impl DecompressionCommand { + /// Create a new decompress command + fn new( + cmd: &'static str, + args: &'static [&'static str], + ) -> DecompressionCommand { + DecompressionCommand { + cmd, args + } + } +} + +impl fmt::Display for DecompressionCommand { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{} {}", self.cmd, self.args.join(" ")) + } +} + +lazy_static! { + static ref DECOMPRESSION_COMMANDS: HashMap< + &'static str, + DecompressionCommand, + > = { + let mut m = HashMap::new(); + + const ARGS: &[&str] = &["-d", "-c"]; + m.insert("gz", DecompressionCommand::new("gzip", ARGS)); + m.insert("bz2", DecompressionCommand::new("bzip2", ARGS)); + m.insert("xz", DecompressionCommand::new("xz", ARGS)); + + const LZMA_ARGS: &[&str] = &["--format=lzma", "-d", "-c"]; + m.insert("lzma", DecompressionCommand::new("xz", LZMA_ARGS)); + + m + }; + static ref SUPPORTED_COMPRESSION_FORMATS: GlobSet = { + let mut builder = GlobSetBuilder::new(); + builder.add(Glob::new("*.gz").unwrap()); + builder.add(Glob::new("*.bz2").unwrap()); + builder.add(Glob::new("*.xz").unwrap()); + builder.add(Glob::new("*.lzma").unwrap()); + builder.build().unwrap() + }; + static ref TAR_ARCHIVE_FORMATS: GlobSet = { + let mut builder = GlobSetBuilder::new(); + builder.add(Glob::new("*.tar.gz").unwrap()); + builder.add(Glob::new("*.tar.xz").unwrap()); + builder.add(Glob::new("*.tar.bz2").unwrap()); + builder.add(Glob::new("*.tgz").unwrap()); + builder.add(Glob::new("*.txz").unwrap()); + builder.add(Glob::new("*.tbz2").unwrap()); + builder.build().unwrap() + }; +} + +/// DecompressionReader provides an `io::Read` implementation for a limited +/// set of compression formats. +#[derive(Debug)] +pub struct DecompressionReader { + cmd: DecompressionCommand, + child: process::Child, + done: bool, +} + +impl DecompressionReader { + /// Returns a handle to the stdout of the spawned decompression process for + /// `path`, which can be directly searched in the worker. When the returned + /// value is exhausted, the underlying process is reaped. If the underlying + /// process fails, then its stderr is read and converted into a normal + /// io::Error. + /// + /// If there is any error in spawning the decompression command, then + /// return `None`, after outputting any necessary debug or error messages. + pub fn from_path(path: &Path) -> Option { + if is_tar_archive(path) { + debug!("{}: skipping tar archive", path.display()); + return None; + } + let extension = match path.extension().and_then(OsStr::to_str) { + Some(extension) => extension, + None => { + debug!( + "{}: failed to get compresson extension", path.display()); + return None; + } + }; + let decompression_cmd = match DECOMPRESSION_COMMANDS.get(extension) { + Some(cmd) => cmd, + None => { + debug!( + "{}: failed to get decompression command", path.display()); + return None; + } + }; + let cmd = process::Command::new(decompression_cmd.cmd) + .args(decompression_cmd.args) + .arg(path) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()) + .spawn(); + let child = match cmd { + Ok(process) => process, + Err(_) => { + debug!( + "{}: decompression command '{}' not found", + path.display(), decompression_cmd.cmd); + return None; + } + }; + Some(DecompressionReader::new(*decompression_cmd, child)) + } + + fn new( + cmd: DecompressionCommand, + child: process::Child, + ) -> DecompressionReader { + DecompressionReader { + cmd: cmd, + child: child, + done: false, + } + } + + fn read_error(&mut self) -> io::Result { + let mut errbytes = vec![]; + self.child.stderr.as_mut().unwrap().read_to_end(&mut errbytes)?; + let errstr = String::from_utf8_lossy(&errbytes); + let errstr = errstr.trim(); + + Ok(if errstr.is_empty() { + let msg = format!("decompression command failed: '{}'", self.cmd); + io::Error::new(io::ErrorKind::Other, msg) + } else { + let msg = format!( + "decompression command '{}' failed: {}", self.cmd, errstr); + io::Error::new(io::ErrorKind::Other, msg) + }) + } +} + +impl io::Read for DecompressionReader { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + if self.done { + return Ok(0); + } + let nread = self.child.stdout.as_mut().unwrap().read(buf)?; + if nread == 0 { + self.done = true; + // Reap the child now that we're done reading. + // If the command failed, report stderr as an error. + if !self.child.wait()?.success() { + return Err(self.read_error()?); + } + } + Ok(nread) + } +} + +/// Returns true if the given path contains a supported compression format or +/// is a TAR archive. +pub fn is_compressed(path: &Path) -> bool { + is_supported_compression_format(path) || is_tar_archive(path) +} + +/// Returns true if the given path matches any one of the supported compression +/// formats +fn is_supported_compression_format(path: &Path) -> bool { + SUPPORTED_COMPRESSION_FORMATS.is_match(path) +} + +/// Returns true if the given path matches any of the known TAR file formats. +fn is_tar_archive(path: &Path) -> bool { + TAR_ARCHIVE_FORMATS.is_match(path) +} diff --git a/src/main.rs b/src/main.rs index 1b35bb82..ebcc401c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,6 +4,7 @@ extern crate bytecount; extern crate clap; extern crate encoding_rs; extern crate env_logger; +extern crate globset; extern crate grep; extern crate ignore; #[macro_use] @@ -44,6 +45,7 @@ macro_rules! eprintln { mod app; mod args; mod decoder; +mod decompressor; mod pathutil; mod printer; mod search_buffer; diff --git a/src/worker.rs b/src/worker.rs index b487c7d5..3c00cc07 100644 --- a/src/worker.rs +++ b/src/worker.rs @@ -9,6 +9,7 @@ use memmap::Mmap; use termcolor::WriteColor; use decoder::DecodeReader; +use decompressor::{self, DecompressionReader}; use pathutil::strip_prefix; use printer::Printer; use search_buffer::BufferSearcher; @@ -42,6 +43,7 @@ struct Options { no_messages: bool, quiet: bool, text: bool, + search_zip_files: bool } impl Default for Options { @@ -61,6 +63,7 @@ impl Default for Options { no_messages: false, quiet: false, text: false, + search_zip_files: false, } } } @@ -190,6 +193,12 @@ impl WorkerBuilder { self.opts.text = yes; self } + + /// If enabled, search through compressed files as well + pub fn search_zip_files(mut self, yes: bool) -> Self { + self.opts.search_zip_files = yes; + self + } } /// Worker is responsible for executing searches on file paths, while choosing @@ -218,22 +227,33 @@ impl Worker { } Work::DirEntry(dent) => { let mut path = dent.path(); - let file = match File::open(path) { - Ok(file) => file, - Err(err) => { - if !self.opts.no_messages { - eprintln!("{}: {}", path.display(), err); + if self.opts.search_zip_files + && decompressor::is_compressed(path) + { + match DecompressionReader::from_path(path) { + Some(reader) => self.search(printer, path, reader), + None => { + return 0; } - return 0; } - }; - if let Some(p) = strip_prefix("./", path) { - path = p; - } - if self.opts.mmap { - self.search_mmap(printer, path, &file) } else { - self.search(printer, path, file) + let file = match File::open(path) { + Ok(file) => file, + Err(err) => { + if !self.opts.no_messages { + eprintln!("{}: {}", path.display(), err); + } + return 0; + } + }; + if let Some(p) = strip_prefix("./", path) { + path = p; + } + if self.opts.mmap { + self.search_mmap(printer, path, &file) + } else { + self.search(printer, path, file) + } } } }; diff --git a/tests/data/sherlock.bz2 b/tests/data/sherlock.bz2 new file mode 100644 index 0000000000000000000000000000000000000000..e4a6454e9f3a6003cfbaf4cabdf8454fb47b91ad GIT binary patch literal 272 zcmV+r0q_1oT4*^jL0KkKSw8H7J^%nHUw{A*Km`B@1waUZKkwh*FaYcVDd^Fuw166E zv_LV0#L0k|00yU^fSLg{9;cy{Zse~T>fwDrhD$;ebtgG7ffXot*uyhFlR^aS!ZvX% zWo#nOXM2UCFf?1W^W1JpfErIEMd-h`vrO~~Q)%b%2i@-U^c@1=z{C-9;{lI)S%UXH zN22W|Q66Gvx2oX`pbR#9QGx+J$BfrtOiwFq1dSqGv19NC)a%^v7Yc6znbO2S4L2kt$3`F;w!UsVA0^L?s_g!@D z80UlHK!ly-=k_(JRH-{-#&2dmS_jJv?~TmIZ!~RHq0PQ63TNw*BA{=!ttlt4cI&?~ zcWJbQ&n7D?YXseiSQSdW6n=7RveWYTGlpN|Kv;*qBRLXC_=LH))#tP5Hty6vPw;XGv*2Q3jO;}|``UB5a58=2`h47LM78DJ;cP}*akjkDR-NX_3~%a6;3H-U=5C}zbz;?Y z`w?Vk3);}Fg)Yf_e!e6T*}b8O6e)o?{`Mvt?=KuxIAf?%$0CaWgJ9RGjC~ko=%*WN znHGZ)N30;D3rK?Q8o|l6PDhClZYx>^4hj_<8DPM)Wihg3T!DWCsJdG~w#CO^Lv8+%0dG@;k9VHhB@`n zYR9zu0lx%^g_s277T5+|>*zw*C8hT=!U0DV&AycCun8a5!D%-6+&tgEp)THmoZBu) ztCp6Ko*fFekml`V{&H=ZaWv5YQjF|HKKt5px^Oam;re{sYeco>-QjFTTXD9$(pH`3 z$qaAmO5h`A3FdC3Lv>=+bo&uxXA9cUt%WYhe15(p5!t<=i4-Y;IR5q~8}Bb1RXAg) zQpX~T0E1xHsf>LXW$33HYnc{<5l5^bqYFra?i#_#wN6Kg5N<141r7=o92sE1v}G}} zWL$xN1gN@OK(@ulUqlBbS$X16%i)u<*rUEmr`D@JFCHyYAsGELU@+Ys00000D8O7Y e)y-i50jC1*0ssK(BNQ;P#Ao{g000001X)_CGnBvp literal 0 HcmV?d00001 diff --git a/tests/tests.rs b/tests/tests.rs index 5f8fa2ec..dc19350c 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -75,6 +75,10 @@ fn sort_lines(lines: &str) -> String { format!("{}\n", lines.join("\n")) } +fn cmd_exists(name: &str) -> bool { + Command::new(name).arg("--help").output().is_ok() +} + sherlock!(single_file, |wd: WorkDir, mut cmd| { let lines: String = wd.stdout(&mut cmd); let expected = "\ @@ -1609,6 +1613,104 @@ clean!(suggest_fixed_strings_for_invalid_regex, "foo(", ".", assert_eq!(err.contains("--fixed-strings"), true); }); +#[test] +fn compressed_gzip() { + if !cmd_exists("gzip") { + return; + } + let gzip_file = include_bytes!("./data/sherlock.gz"); + + let wd = WorkDir::new("feature_search_compressed"); + wd.create_bytes("sherlock.gz", gzip_file); + + let mut cmd = wd.command(); + cmd.arg("-z").arg("Sherlock").arg("sherlock.gz"); + let lines: String = wd.stdout(&mut cmd); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + assert_eq!(lines, expected); +} + +#[test] +fn compressed_bzip2() { + if !cmd_exists("bzip2") { + return; + } + let bzip2_file = include_bytes!("./data/sherlock.bz2"); + + let wd = WorkDir::new("feature_search_compressed"); + wd.create_bytes("sherlock.bz2", bzip2_file); + + let mut cmd = wd.command(); + cmd.arg("-z").arg("Sherlock").arg("sherlock.bz2"); + let lines: String = wd.stdout(&mut cmd); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + assert_eq!(lines, expected); +} + +#[test] +fn compressed_xz() { + if !cmd_exists("xz") { + return; + } + let xz_file = include_bytes!("./data/sherlock.xz"); + + let wd = WorkDir::new("feature_search_compressed"); + wd.create_bytes("sherlock.xz", xz_file); + + let mut cmd = wd.command(); + cmd.arg("-z").arg("Sherlock").arg("sherlock.xz"); + let lines: String = wd.stdout(&mut cmd); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + assert_eq!(lines, expected); +} + +#[test] +fn compressed_lzma() { + if !cmd_exists("xz") { + return; + } + let lzma_file = include_bytes!("./data/sherlock.lzma"); + + let wd = WorkDir::new("feature_search_compressed"); + wd.create_bytes("sherlock.lzma", lzma_file); + + let mut cmd = wd.command(); + cmd.arg("-z").arg("Sherlock").arg("sherlock.lzma"); + let lines: String = wd.stdout(&mut cmd); + let expected = "\ +For the Doctor Watsons of this world, as opposed to the Sherlock +be, to a very large extent, the result of luck. Sherlock Holmes +"; + assert_eq!(lines, expected); +} + +#[test] +fn compressed_failing_gzip() { + if !cmd_exists("gzip") { + return; + } + let wd = WorkDir::new("feature_search_compressed"); + wd.create("sherlock.gz", hay::SHERLOCK); + + let mut cmd = wd.command(); + cmd.arg("-z").arg("Sherlock").arg("sherlock.gz"); + + wd.assert_non_empty_stderr(&mut cmd); + + let output = cmd.output().unwrap(); + let err = String::from_utf8_lossy(&output.stderr); + assert_eq!(err.contains("not in gzip format"), true); +} + #[test] fn feature_740_passthru() { let wd = WorkDir::new("feature_740");