From f007f940c53a4818ead58f2fe2e0fac95cc3a40a Mon Sep 17 00:00:00 2001
From: Balaji Sivaraman <balaji@balajisivaraman.com>
Date: Sun, 7 Jan 2018 21:35:58 +0530
Subject: [PATCH] search: add support for searching compressed files

This commit adds opt-in support for searching compressed files during
recursive search. This behavior is only enabled when the
`-z/--search-zip` flag is passed to ripgrep. When enabled, a limited set
of common compression formats are recognized via file extension, and a
new process is spawned to perform the decompression. ripgrep then
searches the stdout of that spawned process.

Closes #539
---
 .travis.yml              |   4 +-
 Cargo.lock               |   1 +
 Cargo.toml               |   1 +
 README.md                |   8 +-
 complete/_rg             |   1 +
 doc/rg.1                 |  13 ++-
 doc/rg.1.md              |   7 ++
 ignore/src/types.rs      |   4 +
 src/app.rs               |  14 ++-
 src/args.rs              |   3 +
 src/decompressor.rs      | 191 +++++++++++++++++++++++++++++++++++++++
 src/main.rs              |   2 +
 src/worker.rs            |  46 +++++++---
 tests/data/sherlock.bz2  | Bin 0 -> 272 bytes
 tests/data/sherlock.gz   | Bin 0 -> 263 bytes
 tests/data/sherlock.lzma | Bin 0 -> 286 bytes
 tests/data/sherlock.xz   | Bin 0 -> 332 bytes
 tests/tests.rs           | 102 +++++++++++++++++++++
 18 files changed, 373 insertions(+), 24 deletions(-)
 create mode 100644 src/decompressor.rs
 create mode 100644 tests/data/sherlock.bz2
 create mode 100644 tests/data/sherlock.gz
 create mode 100644 tests/data/sherlock.lzma
 create mode 100644 tests/data/sherlock.xz

diff --git a/.travis.yml b/.travis.yml
index a1b43c58..ee46cd7c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,8 +9,10 @@ env:
 addons:
   apt:
     packages:
-      # Needed for completion-function test
+      # Needed for completion-function test.
       - zsh
+      # Needed for testing decompression search.
+      - xz-utils
 
 matrix:
   fast_finish: true
diff --git a/Cargo.lock b/Cargo.lock
index d2c30bd1..9b80e479 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -237,6 +237,7 @@ dependencies = [
  "clap 2.29.0 (registry+https://github.com/rust-lang/crates.io-index)",
  "encoding_rs 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
  "env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "globset 0.2.1",
  "grep 0.1.7",
  "ignore 0.3.1",
  "lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
diff --git a/Cargo.toml b/Cargo.toml
index 924a2397..6a250fa0 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -49,6 +49,7 @@ num_cpus = "1"
 regex = "0.2.4"
 same-file = "1"
 termcolor = { version = "0.3.3", path = "termcolor" }
+globset = { version = "0.2.1", path = "globset" }
 
 [build-dependencies]
 clap = "2.26"
diff --git a/README.md b/README.md
index 27aead57..ac676863 100644
--- a/README.md
+++ b/README.md
@@ -91,6 +91,8 @@ increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep.
   as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for
   automatically detecting UTF-16 is provided. Other text encodings must be
   specifically specified with the `-E/--encoding` flag.)
+* `ripgrep` supports searching files compressed in a common format (gzip, xz,
+  lzma or bzip2 current) with the `-z/--search-zip` flag.
 
 In other words, use `ripgrep` if you like speed, filtering by default, fewer
 bugs, and Unicode support.
@@ -109,12 +111,10 @@ give you a glimpse at some important downsides or missing features of
   support for Unicode categories (e.g., `\p{Sc}` to match currency symbols or
   `\p{Lu}` to match any uppercase letter). (Fancier regexes will never be
   supported.)
-* `ripgrep` doesn't yet support searching compressed files. (Likely to be
-  supported in the future.)
 * `ripgrep` doesn't have multiline search. (Unlikely to ever be supported.)
 
-In other words, if you like fancy regexes, searching compressed files or
-multiline search, then `ripgrep` may not quite meet your needs (yet).
+In other words, if you like fancy regexes or multiline search, then `ripgrep`
+may not quite meet your needs (yet).
 
 ### Feature comparison
 
diff --git a/complete/_rg b/complete/_rg
index 8455c804..6b62c169 100644
--- a/complete/_rg
+++ b/complete/_rg
@@ -87,6 +87,7 @@ _rg() {
     '(-w -x --line-regexp --word-regexp)'{-w,--word-regexp}'[only show matches surrounded by word boundaries]'
     '(-e -f --file --files --regexp --type-list)1: :_rg_pattern'
     '(--type-list)*:file:_files'
+    '(-z --search-zip)'{-z,--search-zip}'[search in compressed files]'
   )
 
   [[ ${_RG_COMPLETE_LIST_ARGS:-} == (1|t*|y*) ]] && {
diff --git a/doc/rg.1 b/doc/rg.1
index d8272e74..fd562e81 100644
--- a/doc/rg.1
+++ b/doc/rg.1
@@ -184,6 +184,15 @@ Only show matches surrounded by line boundaries.
 This is equivalent to putting ^...$ around the search pattern.
 .RS
 .RE
+.TP
+.B \-z, \-\-search\-zip
+Search in compressed files.
+Currently gz, bz2, xz and lzma formats are supported.
+.RS
+.PP
+Note that ripgrep expects to find the decompression binaries for the
+respective formats in your system\[aq]s PATH for use with this flag.
+.RE
 .SH LESS COMMON OPTIONS
 .TP
 .B \-A, \-\-after\-context \f[I]NUM\f[]
@@ -437,9 +446,7 @@ such part on a separate output line.
 .TP
 .B \-\-passthru, \-\-passthrough
 Show both matching and non\-matching lines.
-This is equivalent to adding ^ to the list of search patterns.
-This option overrides \-\-count and cannot be used with
-\-\-only\-matching or \-\-replace.
+This option cannot be used with \-\-only\-matching or \-\-replace.
 .RS
 .RE
 .TP
diff --git a/doc/rg.1.md b/doc/rg.1.md
index 93b401ab..6b054286 100644
--- a/doc/rg.1.md
+++ b/doc/rg.1.md
@@ -125,6 +125,13 @@ Project home page: https://github.com/BurntSushi/ripgrep
 : Only show matches surrounded by line boundaries. This is equivalent to
   putting ^...$ around the search pattern.
 
+-z, --search-zip
+: Search in compressed files. Currently gz, bz2, xz and lzma
+  formats are supported.
+
+    Note that ripgrep expects to find the decompression binaries for the
+    respective formats in your system's PATH for use with this flag.
+
 # LESS COMMON OPTIONS
 
 -A, --after-context *NUM*
diff --git a/ignore/src/types.rs b/ignore/src/types.rs
index d231b577..a9a9687e 100644
--- a/ignore/src/types.rs
+++ b/ignore/src/types.rs
@@ -103,6 +103,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
     ("avro", &["*.avdl", "*.avpr", "*.avsc"]),
     ("awk", &["*.awk"]),
     ("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
+    ("bzip2", &["*.bz2"]),
     ("c", &["*.c", "*.h", "*.H"]),
     ("cabal", &["*.cabal"]),
     ("cbor", &["*.cbor"]),
@@ -137,6 +138,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
     ("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
     ("gn", &["*.gn", "*.gni"]),
     ("go", &["*.go"]),
+    ("gzip", &["*.gz"]),
     ("groovy", &["*.groovy", "*.gradle"]),
     ("h", &["*.h", "*.hpp"]),
     ("hbs", &["*.hbs"]),
@@ -184,6 +186,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
     ("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
     ("log", &["*.log"]),
     ("lua", &["*.lua"]),
+    ("lzma", &["*.lzma"]),
     ("m4", &["*.ac", "*.m4"]),
     ("make", &[
         "gnumakefile", "Gnumakefile", "GNUmakefile",
@@ -276,6 +279,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
     ("wiki", &["*.mediawiki", "*.wiki"]),
     ("webidl", &["*.idl", "*.webidl", "*.widl"]),
     ("xml", &["*.xml", "*.xml.dist"]),
+    ("xz", &["*.xz"]),
     ("yacc", &["*.y"]),
     ("yaml", &["*.yaml", "*.yml"]),
     ("zsh", &[
diff --git a/src/app.rs b/src/app.rs
index 3133e1e2..f2f084e3 100644
--- a/src/app.rs
+++ b/src/app.rs
@@ -191,6 +191,7 @@ pub fn app() -> App<'static, 'static> {
         .arg(flag("type-clear")
              .value_name("TYPE").takes_value(true)
              .multiple(true).number_of_values(1))
+        .arg(flag("search-zip").short("z"))
 }
 
 struct Usage {
@@ -450,7 +451,8 @@ lazy_static! {
               can be specified by using the --ignore-file flag several times. \
               When specifying multiple ignore files, earlier files have lower \
               precedence than later files. If you are looking for a way to \
-              include or exclude files and directories directly used -g instead.");
+              include or exclude files and directories directly used -g \
+              instead.");
         doc!(h, "follow",
              "Follow symbolic links.");
         doc!(h, "max-count",
@@ -592,6 +594,11 @@ lazy_static! {
               only clears the default type definitions that are found inside \
               of ripgrep.\n\nNote that this MUST be passed to every \
               invocation of ripgrep. Type settings are NOT persisted.");
+        doc!(h, "search-zip",
+             "Search in compressed files.",
+             "Search in compressed files. Currently gz, bz2, xz, and \
+              lzma files are supported. This option expects the decompression \
+              binaries to be available in the system PATH.");
 
         h
     };
@@ -599,8 +606,9 @@ lazy_static! {
 
 fn validate_line_number_width(s: String) -> Result<(), String> {
     if s.starts_with("0") {
-        Err(String::from("Custom padding characters are currently not supported. \
-        Please enter only a numeric value."))
+        Err(String::from(
+            "Custom padding characters are currently not supported. \
+             Please enter only a numeric value."))
     } else {
         validate_number(s)
     }
diff --git a/src/args.rs b/src/args.rs
index 56dacc97..030adf0f 100644
--- a/src/args.rs
+++ b/src/args.rs
@@ -77,6 +77,7 @@ pub struct Args {
     type_list: bool,
     types: Types,
     with_filename: bool,
+    search_zip_files: bool
 }
 
 impl Args {
@@ -229,6 +230,7 @@ impl Args {
             .no_messages(self.no_messages)
             .quiet(self.quiet)
             .text(self.text)
+            .search_zip_files(self.search_zip_files)
             .build()
     }
 
@@ -365,6 +367,7 @@ impl<'a> ArgMatches<'a> {
             type_list: self.is_present("type-list"),
             types: self.types()?,
             with_filename: with_filename,
+            search_zip_files: self.is_present("search-zip")
         };
         if args.mmap {
             debug!("will try to use memory maps");
diff --git a/src/decompressor.rs b/src/decompressor.rs
new file mode 100644
index 00000000..a94948af
--- /dev/null
+++ b/src/decompressor.rs
@@ -0,0 +1,191 @@
+use std::collections::HashMap;
+use std::ffi::OsStr;
+use std::fmt;
+use std::io::{self, Read};
+use std::path::Path;
+use std::process::{self, Stdio};
+
+use globset::{Glob, GlobSet, GlobSetBuilder};
+
+/// A decompression command, contains the command to be spawned as well as any
+/// necessary CLI args.
+#[derive(Clone, Copy, Debug)]
+struct DecompressionCommand {
+    cmd: &'static str,
+    args: &'static [&'static str],
+}
+
+impl DecompressionCommand {
+    /// Create a new decompress command
+    fn new(
+        cmd: &'static str,
+        args: &'static [&'static str],
+    ) -> DecompressionCommand {
+        DecompressionCommand {
+            cmd, args
+        }
+    }
+}
+
+impl fmt::Display for DecompressionCommand {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        write!(f, "{} {}", self.cmd, self.args.join(" "))
+    }
+}
+
+lazy_static! {
+    static ref DECOMPRESSION_COMMANDS: HashMap<
+        &'static str,
+        DecompressionCommand,
+    > = {
+        let mut m = HashMap::new();
+
+        const ARGS: &[&str] = &["-d", "-c"];
+        m.insert("gz", DecompressionCommand::new("gzip", ARGS));
+        m.insert("bz2", DecompressionCommand::new("bzip2", ARGS));
+        m.insert("xz", DecompressionCommand::new("xz", ARGS));
+
+        const LZMA_ARGS: &[&str] = &["--format=lzma", "-d", "-c"];
+        m.insert("lzma", DecompressionCommand::new("xz", LZMA_ARGS));
+
+        m
+    };
+    static ref SUPPORTED_COMPRESSION_FORMATS: GlobSet = {
+        let mut builder = GlobSetBuilder::new();
+        builder.add(Glob::new("*.gz").unwrap());
+        builder.add(Glob::new("*.bz2").unwrap());
+        builder.add(Glob::new("*.xz").unwrap());
+        builder.add(Glob::new("*.lzma").unwrap());
+        builder.build().unwrap()
+    };
+    static ref TAR_ARCHIVE_FORMATS: GlobSet = {
+        let mut builder = GlobSetBuilder::new();
+        builder.add(Glob::new("*.tar.gz").unwrap());
+        builder.add(Glob::new("*.tar.xz").unwrap());
+        builder.add(Glob::new("*.tar.bz2").unwrap());
+        builder.add(Glob::new("*.tgz").unwrap());
+        builder.add(Glob::new("*.txz").unwrap());
+        builder.add(Glob::new("*.tbz2").unwrap());
+        builder.build().unwrap()
+    };
+}
+
+/// DecompressionReader provides an `io::Read` implementation for a limited
+/// set of compression formats.
+#[derive(Debug)]
+pub struct DecompressionReader {
+    cmd: DecompressionCommand,
+    child: process::Child,
+    done: bool,
+}
+
+impl DecompressionReader {
+    /// Returns a handle to the stdout of the spawned decompression process for
+    /// `path`, which can be directly searched in the worker. When the returned
+    /// value is exhausted, the underlying process is reaped. If the underlying
+    /// process fails, then its stderr is read and converted into a normal
+    /// io::Error.
+    ///
+    /// If there is any error in spawning the decompression command, then
+    /// return `None`, after outputting any necessary debug or error messages.
+    pub fn from_path(path: &Path) -> Option<DecompressionReader> {
+        if is_tar_archive(path) {
+            debug!("{}: skipping tar archive", path.display());
+            return None;
+        }
+        let extension = match path.extension().and_then(OsStr::to_str) {
+            Some(extension) => extension,
+            None => {
+                debug!(
+                    "{}: failed to get compresson extension", path.display());
+                return None;
+            }
+        };
+        let decompression_cmd = match DECOMPRESSION_COMMANDS.get(extension) {
+            Some(cmd) => cmd,
+            None => {
+                debug!(
+                    "{}: failed to get decompression command", path.display());
+                return None;
+            }
+        };
+        let cmd = process::Command::new(decompression_cmd.cmd)
+            .args(decompression_cmd.args)
+            .arg(path)
+            .stdout(Stdio::piped())
+            .stderr(Stdio::piped())
+            .spawn();
+        let child = match cmd {
+            Ok(process) => process,
+            Err(_) => {
+                debug!(
+                    "{}: decompression command '{}' not found",
+                    path.display(), decompression_cmd.cmd);
+                return None;
+            }
+        };
+        Some(DecompressionReader::new(*decompression_cmd, child))
+    }
+
+    fn new(
+        cmd: DecompressionCommand,
+        child: process::Child,
+    ) -> DecompressionReader {
+        DecompressionReader {
+            cmd: cmd,
+            child: child,
+            done: false,
+        }
+    }
+
+    fn read_error(&mut self) -> io::Result<io::Error> {
+        let mut errbytes = vec![];
+        self.child.stderr.as_mut().unwrap().read_to_end(&mut errbytes)?;
+        let errstr = String::from_utf8_lossy(&errbytes);
+        let errstr = errstr.trim();
+
+        Ok(if errstr.is_empty() {
+            let msg = format!("decompression command failed: '{}'", self.cmd);
+            io::Error::new(io::ErrorKind::Other, msg)
+        } else {
+            let msg = format!(
+                "decompression command '{}' failed: {}", self.cmd, errstr);
+            io::Error::new(io::ErrorKind::Other, msg)
+        })
+    }
+}
+
+impl io::Read for DecompressionReader {
+    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
+        if self.done {
+            return Ok(0);
+        }
+        let nread = self.child.stdout.as_mut().unwrap().read(buf)?;
+        if nread == 0 {
+            self.done = true;
+            // Reap the child now that we're done reading.
+            // If the command failed, report stderr as an error.
+            if !self.child.wait()?.success() {
+                return Err(self.read_error()?);
+            }
+        }
+        Ok(nread)
+    }
+}
+
+/// Returns true if the given path contains a supported compression format or
+/// is a TAR archive.
+pub fn is_compressed(path: &Path) -> bool {
+    is_supported_compression_format(path) || is_tar_archive(path)
+}
+
+/// Returns true if the given path matches any one of the supported compression
+/// formats
+fn is_supported_compression_format(path: &Path) -> bool {
+    SUPPORTED_COMPRESSION_FORMATS.is_match(path)
+}
+
+/// Returns true if the given path matches any of the known TAR file formats.
+fn is_tar_archive(path: &Path) -> bool {
+    TAR_ARCHIVE_FORMATS.is_match(path)
+}
diff --git a/src/main.rs b/src/main.rs
index 1b35bb82..ebcc401c 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -4,6 +4,7 @@ extern crate bytecount;
 extern crate clap;
 extern crate encoding_rs;
 extern crate env_logger;
+extern crate globset;
 extern crate grep;
 extern crate ignore;
 #[macro_use]
@@ -44,6 +45,7 @@ macro_rules! eprintln {
 mod app;
 mod args;
 mod decoder;
+mod decompressor;
 mod pathutil;
 mod printer;
 mod search_buffer;
diff --git a/src/worker.rs b/src/worker.rs
index b487c7d5..3c00cc07 100644
--- a/src/worker.rs
+++ b/src/worker.rs
@@ -9,6 +9,7 @@ use memmap::Mmap;
 use termcolor::WriteColor;
 
 use decoder::DecodeReader;
+use decompressor::{self, DecompressionReader};
 use pathutil::strip_prefix;
 use printer::Printer;
 use search_buffer::BufferSearcher;
@@ -42,6 +43,7 @@ struct Options {
     no_messages: bool,
     quiet: bool,
     text: bool,
+    search_zip_files: bool
 }
 
 impl Default for Options {
@@ -61,6 +63,7 @@ impl Default for Options {
             no_messages: false,
             quiet: false,
             text: false,
+            search_zip_files: false,
         }
     }
 }
@@ -190,6 +193,12 @@ impl WorkerBuilder {
         self.opts.text = yes;
         self
     }
+
+    /// If enabled, search through compressed files as well
+    pub fn search_zip_files(mut self, yes: bool) -> Self {
+        self.opts.search_zip_files = yes;
+        self
+    }
 }
 
 /// Worker is responsible for executing searches on file paths, while choosing
@@ -218,22 +227,33 @@ impl Worker {
             }
             Work::DirEntry(dent) => {
                 let mut path = dent.path();
-                let file = match File::open(path) {
-                    Ok(file) => file,
-                    Err(err) => {
-                        if !self.opts.no_messages {
-                            eprintln!("{}: {}", path.display(), err);
+                if self.opts.search_zip_files
+                     && decompressor::is_compressed(path)
+                {
+                    match DecompressionReader::from_path(path) {
+                        Some(reader) => self.search(printer, path, reader),
+                        None => {
+                            return 0;
                         }
-                        return 0;
                     }
-                };
-                if let Some(p) = strip_prefix("./", path) {
-                    path = p;
-                }
-                if self.opts.mmap {
-                    self.search_mmap(printer, path, &file)
                 } else {
-                    self.search(printer, path, file)
+                    let file = match File::open(path) {
+                        Ok(file) => file,
+                        Err(err) => {
+                            if !self.opts.no_messages {
+                                eprintln!("{}: {}", path.display(), err);
+                            }
+                            return 0;
+                        }
+                    };
+                    if let Some(p) = strip_prefix("./", path) {
+                        path = p;
+                    }
+                    if self.opts.mmap {
+                        self.search_mmap(printer, path, &file)
+                    } else {
+                        self.search(printer, path, file)
+                    }
                 }
             }
         };
diff --git a/tests/data/sherlock.bz2 b/tests/data/sherlock.bz2
new file mode 100644
index 0000000000000000000000000000000000000000..e4a6454e9f3a6003cfbaf4cabdf8454fb47b91ad
GIT binary patch
literal 272
zcmV+r0q_1oT4*^jL0KkKSw8H7J^%nHUw{A*Km`B@1waUZKkwh*FaYcVDd^Fuw166E
zv_LV0#L0k|00yU^fSLg{9;cy{Zse~T>fwDrhD$;ebtgG7ffXot*uyhFlR^aS!ZvX%
zWo#nOXM2UCFf?1W^W1JpfErIEMd-h`vrO~~Q)%b%2i@-U^c@1=z{C-9;{lI)S%UXH
zN22W|Q66Gvx2oX`pbR#9QGx+J$Bfr<w@w!{j73i4X@3`SkU_&qTRj82QNo2<!XxOE
z2ZEagh&S^g^`#}jj4nk2s*8lISK*&g8MMmOGUzN3HKJ=8&U0ySjR8Xf6{`b7&pcxq
W)oj#Y78<L6i@744C`cc6K_39;$#et&

literal 0
HcmV?d00001

diff --git a/tests/data/sherlock.gz b/tests/data/sherlock.gz
new file mode 100644
index 0000000000000000000000000000000000000000..5629cbef57d48243ff5df80ca567bd012cb593fc
GIT binary patch
literal 263
zcmV+i0r>tOiwFq1dSqGv19NC)a%^v7Yc6znbO2S4L2kt$3`F;w!UsVA0^L?s_g!@D
z80UlHK!ly-=k_(JRH-{-#&2dmS_jJv?~TmIZ!~RHq0PQ63TNw*BA{=!ttlt4cI&?~
zcWJbQ&n7D?YXseiSQSdW6<cpdsl<bo*qE#wJb(+VyBeUJcxjk84kp6E{Q|-MWZ;)^
zwRmGX>n=7RveWYTGlpN|Kv;*qBRLXC_=LH))#tP5Hty6vPw;XGv*2<EqVhyvXnqUR
zZvWdu_PQP^qvtIK<~J3Z%VyW);_R9rc?V80jG14#OvRtWAG=)9%`Lm3;KTxGhLkxy
N!Vh$`7ME`U008Zpe;@z=

literal 0
HcmV?d00001

diff --git a/tests/data/sherlock.lzma b/tests/data/sherlock.lzma
new file mode 100644
index 0000000000000000000000000000000000000000..bfdf7fb42ddb5c2afb072dbc54aba8f1a799769d
GIT binary patch
literal 286
zcmV+(0pb2#004jh|NsC0|NsC003#d9LIXYFwP?GBIrY$L$F%zazXXYem;~e&*alte
z=t9^frS~$z0Y?<gzLe^)2_M$MX*T)XJm0^eF5ZEh+b&3}mX?s79SXOQ=Ivzua&4J$
zG|>Q3jO;}|``UB5a58=2`h47LM78DJ;cP}*akjkDR-NX_3~%a6;3H-U=5C}zbz;?Y
z`w?Vk3);}Fg)Yf_e!e6T*}b8O6e)o?{`Mvt?=KuxIAf?%$0CaWgJ9RGjC~ko=%*WN
znHGZ)N30;D3rK?Q8o|l6PDhClZYx>^4hj_<8DPM)Wihg3T!DWCsJdG~w#CO^L<c2V
kdE!vZ;gho1qrOU~)~h}*9xYNK82vL~xO7F2|CW%az;~mL$^ZZW

literal 0
HcmV?d00001

diff --git a/tests/data/sherlock.xz b/tests/data/sherlock.xz
new file mode 100644
index 0000000000000000000000000000000000000000..39cca0f54a0ef196c882a5f0d1c1903aebd993b0
GIT binary patch
literal 332
zcmV-S0ki)7H+ooF000E$*0e?f03iVu0001VFXf})0d4^cT>v8+%0dG@;k9VHhB@`n
zYR9zu0lx%^g_s277T5+|>*zw*C8hT=!U0DV&AycCun8a5!D%-6+&tgEp)THmoZBu)
ztCp6Ko*fFekml`V{&H=ZaWv5YQjF|HKKt5px^Oam;re{sYeco>-QjFTTXD9$(pH`3
z$qaAmO5h`A3FdC3Lv>=+bo&uxXA9cUt%WYhe15(p5!t<=i4-Y;IR5q~8}Bb1RXAg)
zQpX~T0E1xHsf>LXW$33HYnc{<5l5^bqYFra?i#_#wN6Kg5N<141r7=o92sE1v}G}}
zWL$xN1gN@OK(@ulUqlBbS$X16%i)u<*rUEmr`D@JFCHyYAsGELU@+Ys00000D8O7Y
e)y-i50jC1*0ssK(BNQ;P#Ao{g000001X)_CGnBvp

literal 0
HcmV?d00001

diff --git a/tests/tests.rs b/tests/tests.rs
index 5f8fa2ec..dc19350c 100644
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -75,6 +75,10 @@ fn sort_lines(lines: &str) -> String {
     format!("{}\n", lines.join("\n"))
 }
 
+fn cmd_exists(name: &str) -> bool {
+    Command::new(name).arg("--help").output().is_ok()
+}
+
 sherlock!(single_file, |wd: WorkDir, mut cmd| {
     let lines: String = wd.stdout(&mut cmd);
     let expected = "\
@@ -1609,6 +1613,104 @@ clean!(suggest_fixed_strings_for_invalid_regex, "foo(", ".",
     assert_eq!(err.contains("--fixed-strings"), true);
 });
 
+#[test]
+fn compressed_gzip() {
+    if !cmd_exists("gzip") {
+        return;
+    }
+    let gzip_file = include_bytes!("./data/sherlock.gz");
+
+    let wd = WorkDir::new("feature_search_compressed");
+    wd.create_bytes("sherlock.gz", gzip_file);
+
+    let mut cmd = wd.command();
+    cmd.arg("-z").arg("Sherlock").arg("sherlock.gz");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+}
+
+#[test]
+fn compressed_bzip2() {
+    if !cmd_exists("bzip2") {
+        return;
+    }
+    let bzip2_file = include_bytes!("./data/sherlock.bz2");
+
+    let wd = WorkDir::new("feature_search_compressed");
+    wd.create_bytes("sherlock.bz2", bzip2_file);
+
+    let mut cmd = wd.command();
+    cmd.arg("-z").arg("Sherlock").arg("sherlock.bz2");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+}
+
+#[test]
+fn compressed_xz() {
+    if !cmd_exists("xz") {
+        return;
+    }
+    let xz_file = include_bytes!("./data/sherlock.xz");
+
+    let wd = WorkDir::new("feature_search_compressed");
+    wd.create_bytes("sherlock.xz", xz_file);
+
+    let mut cmd = wd.command();
+    cmd.arg("-z").arg("Sherlock").arg("sherlock.xz");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+}
+
+#[test]
+fn compressed_lzma() {
+    if !cmd_exists("xz") {
+        return;
+    }
+    let lzma_file = include_bytes!("./data/sherlock.lzma");
+
+    let wd = WorkDir::new("feature_search_compressed");
+    wd.create_bytes("sherlock.lzma", lzma_file);
+
+    let mut cmd = wd.command();
+    cmd.arg("-z").arg("Sherlock").arg("sherlock.lzma");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+}
+
+#[test]
+fn compressed_failing_gzip() {
+    if !cmd_exists("gzip") {
+        return;
+    }
+    let wd = WorkDir::new("feature_search_compressed");
+    wd.create("sherlock.gz", hay::SHERLOCK);
+
+    let mut cmd = wd.command();
+    cmd.arg("-z").arg("Sherlock").arg("sherlock.gz");
+
+    wd.assert_non_empty_stderr(&mut cmd);
+
+    let output = cmd.output().unwrap();
+    let err = String::from_utf8_lossy(&output.stderr);
+    assert_eq!(err.contains("not in gzip format"), true);
+}
+
 #[test]
 fn feature_740_passthru() {
     let wd = WorkDir::new("feature_740");