mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-05-19 01:30:21 -07:00
search: add support for searching compressed files
This commit adds opt-in support for searching compressed files during recursive search. This behavior is only enabled when the `-z/--search-zip` flag is passed to ripgrep. When enabled, a limited set of common compression formats are recognized via file extension, and a new process is spawned to perform the decompression. ripgrep then searches the stdout of that spawned process. Closes #539
This commit is contained in:
parent
a8543f798d
commit
f007f940c5
@ -9,8 +9,10 @@ env:
|
|||||||
addons:
|
addons:
|
||||||
apt:
|
apt:
|
||||||
packages:
|
packages:
|
||||||
# Needed for completion-function test
|
# Needed for completion-function test.
|
||||||
- zsh
|
- zsh
|
||||||
|
# Needed for testing decompression search.
|
||||||
|
- xz-utils
|
||||||
|
|
||||||
matrix:
|
matrix:
|
||||||
fast_finish: true
|
fast_finish: true
|
||||||
|
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -237,6 +237,7 @@ dependencies = [
|
|||||||
"clap 2.29.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"clap 2.29.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"encoding_rs 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"encoding_rs 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
"env_logger 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
"globset 0.2.1",
|
||||||
"grep 0.1.7",
|
"grep 0.1.7",
|
||||||
"ignore 0.3.1",
|
"ignore 0.3.1",
|
||||||
"lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"lazy_static 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
@ -49,6 +49,7 @@ num_cpus = "1"
|
|||||||
regex = "0.2.4"
|
regex = "0.2.4"
|
||||||
same-file = "1"
|
same-file = "1"
|
||||||
termcolor = { version = "0.3.3", path = "termcolor" }
|
termcolor = { version = "0.3.3", path = "termcolor" }
|
||||||
|
globset = { version = "0.2.1", path = "globset" }
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
clap = "2.26"
|
clap = "2.26"
|
||||||
|
@ -91,6 +91,8 @@ increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep.
|
|||||||
as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for
|
as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for
|
||||||
automatically detecting UTF-16 is provided. Other text encodings must be
|
automatically detecting UTF-16 is provided. Other text encodings must be
|
||||||
specifically specified with the `-E/--encoding` flag.)
|
specifically specified with the `-E/--encoding` flag.)
|
||||||
|
* `ripgrep` supports searching files compressed in a common format (gzip, xz,
|
||||||
|
lzma or bzip2 current) with the `-z/--search-zip` flag.
|
||||||
|
|
||||||
In other words, use `ripgrep` if you like speed, filtering by default, fewer
|
In other words, use `ripgrep` if you like speed, filtering by default, fewer
|
||||||
bugs, and Unicode support.
|
bugs, and Unicode support.
|
||||||
@ -109,12 +111,10 @@ give you a glimpse at some important downsides or missing features of
|
|||||||
support for Unicode categories (e.g., `\p{Sc}` to match currency symbols or
|
support for Unicode categories (e.g., `\p{Sc}` to match currency symbols or
|
||||||
`\p{Lu}` to match any uppercase letter). (Fancier regexes will never be
|
`\p{Lu}` to match any uppercase letter). (Fancier regexes will never be
|
||||||
supported.)
|
supported.)
|
||||||
* `ripgrep` doesn't yet support searching compressed files. (Likely to be
|
|
||||||
supported in the future.)
|
|
||||||
* `ripgrep` doesn't have multiline search. (Unlikely to ever be supported.)
|
* `ripgrep` doesn't have multiline search. (Unlikely to ever be supported.)
|
||||||
|
|
||||||
In other words, if you like fancy regexes, searching compressed files or
|
In other words, if you like fancy regexes or multiline search, then `ripgrep`
|
||||||
multiline search, then `ripgrep` may not quite meet your needs (yet).
|
may not quite meet your needs (yet).
|
||||||
|
|
||||||
### Feature comparison
|
### Feature comparison
|
||||||
|
|
||||||
|
@ -87,6 +87,7 @@ _rg() {
|
|||||||
'(-w -x --line-regexp --word-regexp)'{-w,--word-regexp}'[only show matches surrounded by word boundaries]'
|
'(-w -x --line-regexp --word-regexp)'{-w,--word-regexp}'[only show matches surrounded by word boundaries]'
|
||||||
'(-e -f --file --files --regexp --type-list)1: :_rg_pattern'
|
'(-e -f --file --files --regexp --type-list)1: :_rg_pattern'
|
||||||
'(--type-list)*:file:_files'
|
'(--type-list)*:file:_files'
|
||||||
|
'(-z --search-zip)'{-z,--search-zip}'[search in compressed files]'
|
||||||
)
|
)
|
||||||
|
|
||||||
[[ ${_RG_COMPLETE_LIST_ARGS:-} == (1|t*|y*) ]] && {
|
[[ ${_RG_COMPLETE_LIST_ARGS:-} == (1|t*|y*) ]] && {
|
||||||
|
13
doc/rg.1
13
doc/rg.1
@ -184,6 +184,15 @@ Only show matches surrounded by line boundaries.
|
|||||||
This is equivalent to putting ^...$ around the search pattern.
|
This is equivalent to putting ^...$ around the search pattern.
|
||||||
.RS
|
.RS
|
||||||
.RE
|
.RE
|
||||||
|
.TP
|
||||||
|
.B \-z, \-\-search\-zip
|
||||||
|
Search in compressed files.
|
||||||
|
Currently gz, bz2, xz and lzma formats are supported.
|
||||||
|
.RS
|
||||||
|
.PP
|
||||||
|
Note that ripgrep expects to find the decompression binaries for the
|
||||||
|
respective formats in your system\[aq]s PATH for use with this flag.
|
||||||
|
.RE
|
||||||
.SH LESS COMMON OPTIONS
|
.SH LESS COMMON OPTIONS
|
||||||
.TP
|
.TP
|
||||||
.B \-A, \-\-after\-context \f[I]NUM\f[]
|
.B \-A, \-\-after\-context \f[I]NUM\f[]
|
||||||
@ -437,9 +446,7 @@ such part on a separate output line.
|
|||||||
.TP
|
.TP
|
||||||
.B \-\-passthru, \-\-passthrough
|
.B \-\-passthru, \-\-passthrough
|
||||||
Show both matching and non\-matching lines.
|
Show both matching and non\-matching lines.
|
||||||
This is equivalent to adding ^ to the list of search patterns.
|
This option cannot be used with \-\-only\-matching or \-\-replace.
|
||||||
This option overrides \-\-count and cannot be used with
|
|
||||||
\-\-only\-matching or \-\-replace.
|
|
||||||
.RS
|
.RS
|
||||||
.RE
|
.RE
|
||||||
.TP
|
.TP
|
||||||
|
@ -125,6 +125,13 @@ Project home page: https://github.com/BurntSushi/ripgrep
|
|||||||
: Only show matches surrounded by line boundaries. This is equivalent to
|
: Only show matches surrounded by line boundaries. This is equivalent to
|
||||||
putting ^...$ around the search pattern.
|
putting ^...$ around the search pattern.
|
||||||
|
|
||||||
|
-z, --search-zip
|
||||||
|
: Search in compressed files. Currently gz, bz2, xz and lzma
|
||||||
|
formats are supported.
|
||||||
|
|
||||||
|
Note that ripgrep expects to find the decompression binaries for the
|
||||||
|
respective formats in your system's PATH for use with this flag.
|
||||||
|
|
||||||
# LESS COMMON OPTIONS
|
# LESS COMMON OPTIONS
|
||||||
|
|
||||||
-A, --after-context *NUM*
|
-A, --after-context *NUM*
|
||||||
|
@ -103,6 +103,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
|||||||
("avro", &["*.avdl", "*.avpr", "*.avsc"]),
|
("avro", &["*.avdl", "*.avpr", "*.avsc"]),
|
||||||
("awk", &["*.awk"]),
|
("awk", &["*.awk"]),
|
||||||
("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
|
("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
|
||||||
|
("bzip2", &["*.bz2"]),
|
||||||
("c", &["*.c", "*.h", "*.H"]),
|
("c", &["*.c", "*.h", "*.H"]),
|
||||||
("cabal", &["*.cabal"]),
|
("cabal", &["*.cabal"]),
|
||||||
("cbor", &["*.cbor"]),
|
("cbor", &["*.cbor"]),
|
||||||
@ -137,6 +138,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
|||||||
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
|
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
|
||||||
("gn", &["*.gn", "*.gni"]),
|
("gn", &["*.gn", "*.gni"]),
|
||||||
("go", &["*.go"]),
|
("go", &["*.go"]),
|
||||||
|
("gzip", &["*.gz"]),
|
||||||
("groovy", &["*.groovy", "*.gradle"]),
|
("groovy", &["*.groovy", "*.gradle"]),
|
||||||
("h", &["*.h", "*.hpp"]),
|
("h", &["*.h", "*.hpp"]),
|
||||||
("hbs", &["*.hbs"]),
|
("hbs", &["*.hbs"]),
|
||||||
@ -184,6 +186,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
|||||||
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
||||||
("log", &["*.log"]),
|
("log", &["*.log"]),
|
||||||
("lua", &["*.lua"]),
|
("lua", &["*.lua"]),
|
||||||
|
("lzma", &["*.lzma"]),
|
||||||
("m4", &["*.ac", "*.m4"]),
|
("m4", &["*.ac", "*.m4"]),
|
||||||
("make", &[
|
("make", &[
|
||||||
"gnumakefile", "Gnumakefile", "GNUmakefile",
|
"gnumakefile", "Gnumakefile", "GNUmakefile",
|
||||||
@ -276,6 +279,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
|||||||
("wiki", &["*.mediawiki", "*.wiki"]),
|
("wiki", &["*.mediawiki", "*.wiki"]),
|
||||||
("webidl", &["*.idl", "*.webidl", "*.widl"]),
|
("webidl", &["*.idl", "*.webidl", "*.widl"]),
|
||||||
("xml", &["*.xml", "*.xml.dist"]),
|
("xml", &["*.xml", "*.xml.dist"]),
|
||||||
|
("xz", &["*.xz"]),
|
||||||
("yacc", &["*.y"]),
|
("yacc", &["*.y"]),
|
||||||
("yaml", &["*.yaml", "*.yml"]),
|
("yaml", &["*.yaml", "*.yml"]),
|
||||||
("zsh", &[
|
("zsh", &[
|
||||||
|
12
src/app.rs
12
src/app.rs
@ -191,6 +191,7 @@ pub fn app() -> App<'static, 'static> {
|
|||||||
.arg(flag("type-clear")
|
.arg(flag("type-clear")
|
||||||
.value_name("TYPE").takes_value(true)
|
.value_name("TYPE").takes_value(true)
|
||||||
.multiple(true).number_of_values(1))
|
.multiple(true).number_of_values(1))
|
||||||
|
.arg(flag("search-zip").short("z"))
|
||||||
}
|
}
|
||||||
|
|
||||||
struct Usage {
|
struct Usage {
|
||||||
@ -450,7 +451,8 @@ lazy_static! {
|
|||||||
can be specified by using the --ignore-file flag several times. \
|
can be specified by using the --ignore-file flag several times. \
|
||||||
When specifying multiple ignore files, earlier files have lower \
|
When specifying multiple ignore files, earlier files have lower \
|
||||||
precedence than later files. If you are looking for a way to \
|
precedence than later files. If you are looking for a way to \
|
||||||
include or exclude files and directories directly used -g instead.");
|
include or exclude files and directories directly used -g \
|
||||||
|
instead.");
|
||||||
doc!(h, "follow",
|
doc!(h, "follow",
|
||||||
"Follow symbolic links.");
|
"Follow symbolic links.");
|
||||||
doc!(h, "max-count",
|
doc!(h, "max-count",
|
||||||
@ -592,6 +594,11 @@ lazy_static! {
|
|||||||
only clears the default type definitions that are found inside \
|
only clears the default type definitions that are found inside \
|
||||||
of ripgrep.\n\nNote that this MUST be passed to every \
|
of ripgrep.\n\nNote that this MUST be passed to every \
|
||||||
invocation of ripgrep. Type settings are NOT persisted.");
|
invocation of ripgrep. Type settings are NOT persisted.");
|
||||||
|
doc!(h, "search-zip",
|
||||||
|
"Search in compressed files.",
|
||||||
|
"Search in compressed files. Currently gz, bz2, xz, and \
|
||||||
|
lzma files are supported. This option expects the decompression \
|
||||||
|
binaries to be available in the system PATH.");
|
||||||
|
|
||||||
h
|
h
|
||||||
};
|
};
|
||||||
@ -599,7 +606,8 @@ lazy_static! {
|
|||||||
|
|
||||||
fn validate_line_number_width(s: String) -> Result<(), String> {
|
fn validate_line_number_width(s: String) -> Result<(), String> {
|
||||||
if s.starts_with("0") {
|
if s.starts_with("0") {
|
||||||
Err(String::from("Custom padding characters are currently not supported. \
|
Err(String::from(
|
||||||
|
"Custom padding characters are currently not supported. \
|
||||||
Please enter only a numeric value."))
|
Please enter only a numeric value."))
|
||||||
} else {
|
} else {
|
||||||
validate_number(s)
|
validate_number(s)
|
||||||
|
@ -77,6 +77,7 @@ pub struct Args {
|
|||||||
type_list: bool,
|
type_list: bool,
|
||||||
types: Types,
|
types: Types,
|
||||||
with_filename: bool,
|
with_filename: bool,
|
||||||
|
search_zip_files: bool
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Args {
|
impl Args {
|
||||||
@ -229,6 +230,7 @@ impl Args {
|
|||||||
.no_messages(self.no_messages)
|
.no_messages(self.no_messages)
|
||||||
.quiet(self.quiet)
|
.quiet(self.quiet)
|
||||||
.text(self.text)
|
.text(self.text)
|
||||||
|
.search_zip_files(self.search_zip_files)
|
||||||
.build()
|
.build()
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -365,6 +367,7 @@ impl<'a> ArgMatches<'a> {
|
|||||||
type_list: self.is_present("type-list"),
|
type_list: self.is_present("type-list"),
|
||||||
types: self.types()?,
|
types: self.types()?,
|
||||||
with_filename: with_filename,
|
with_filename: with_filename,
|
||||||
|
search_zip_files: self.is_present("search-zip")
|
||||||
};
|
};
|
||||||
if args.mmap {
|
if args.mmap {
|
||||||
debug!("will try to use memory maps");
|
debug!("will try to use memory maps");
|
||||||
|
191
src/decompressor.rs
Normal file
191
src/decompressor.rs
Normal file
@ -0,0 +1,191 @@
|
|||||||
|
use std::collections::HashMap;
|
||||||
|
use std::ffi::OsStr;
|
||||||
|
use std::fmt;
|
||||||
|
use std::io::{self, Read};
|
||||||
|
use std::path::Path;
|
||||||
|
use std::process::{self, Stdio};
|
||||||
|
|
||||||
|
use globset::{Glob, GlobSet, GlobSetBuilder};
|
||||||
|
|
||||||
|
/// A decompression command, contains the command to be spawned as well as any
|
||||||
|
/// necessary CLI args.
|
||||||
|
#[derive(Clone, Copy, Debug)]
|
||||||
|
struct DecompressionCommand {
|
||||||
|
cmd: &'static str,
|
||||||
|
args: &'static [&'static str],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DecompressionCommand {
|
||||||
|
/// Create a new decompress command
|
||||||
|
fn new(
|
||||||
|
cmd: &'static str,
|
||||||
|
args: &'static [&'static str],
|
||||||
|
) -> DecompressionCommand {
|
||||||
|
DecompressionCommand {
|
||||||
|
cmd, args
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for DecompressionCommand {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
write!(f, "{} {}", self.cmd, self.args.join(" "))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
lazy_static! {
|
||||||
|
static ref DECOMPRESSION_COMMANDS: HashMap<
|
||||||
|
&'static str,
|
||||||
|
DecompressionCommand,
|
||||||
|
> = {
|
||||||
|
let mut m = HashMap::new();
|
||||||
|
|
||||||
|
const ARGS: &[&str] = &["-d", "-c"];
|
||||||
|
m.insert("gz", DecompressionCommand::new("gzip", ARGS));
|
||||||
|
m.insert("bz2", DecompressionCommand::new("bzip2", ARGS));
|
||||||
|
m.insert("xz", DecompressionCommand::new("xz", ARGS));
|
||||||
|
|
||||||
|
const LZMA_ARGS: &[&str] = &["--format=lzma", "-d", "-c"];
|
||||||
|
m.insert("lzma", DecompressionCommand::new("xz", LZMA_ARGS));
|
||||||
|
|
||||||
|
m
|
||||||
|
};
|
||||||
|
static ref SUPPORTED_COMPRESSION_FORMATS: GlobSet = {
|
||||||
|
let mut builder = GlobSetBuilder::new();
|
||||||
|
builder.add(Glob::new("*.gz").unwrap());
|
||||||
|
builder.add(Glob::new("*.bz2").unwrap());
|
||||||
|
builder.add(Glob::new("*.xz").unwrap());
|
||||||
|
builder.add(Glob::new("*.lzma").unwrap());
|
||||||
|
builder.build().unwrap()
|
||||||
|
};
|
||||||
|
static ref TAR_ARCHIVE_FORMATS: GlobSet = {
|
||||||
|
let mut builder = GlobSetBuilder::new();
|
||||||
|
builder.add(Glob::new("*.tar.gz").unwrap());
|
||||||
|
builder.add(Glob::new("*.tar.xz").unwrap());
|
||||||
|
builder.add(Glob::new("*.tar.bz2").unwrap());
|
||||||
|
builder.add(Glob::new("*.tgz").unwrap());
|
||||||
|
builder.add(Glob::new("*.txz").unwrap());
|
||||||
|
builder.add(Glob::new("*.tbz2").unwrap());
|
||||||
|
builder.build().unwrap()
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// DecompressionReader provides an `io::Read` implementation for a limited
|
||||||
|
/// set of compression formats.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct DecompressionReader {
|
||||||
|
cmd: DecompressionCommand,
|
||||||
|
child: process::Child,
|
||||||
|
done: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DecompressionReader {
|
||||||
|
/// Returns a handle to the stdout of the spawned decompression process for
|
||||||
|
/// `path`, which can be directly searched in the worker. When the returned
|
||||||
|
/// value is exhausted, the underlying process is reaped. If the underlying
|
||||||
|
/// process fails, then its stderr is read and converted into a normal
|
||||||
|
/// io::Error.
|
||||||
|
///
|
||||||
|
/// If there is any error in spawning the decompression command, then
|
||||||
|
/// return `None`, after outputting any necessary debug or error messages.
|
||||||
|
pub fn from_path(path: &Path) -> Option<DecompressionReader> {
|
||||||
|
if is_tar_archive(path) {
|
||||||
|
debug!("{}: skipping tar archive", path.display());
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let extension = match path.extension().and_then(OsStr::to_str) {
|
||||||
|
Some(extension) => extension,
|
||||||
|
None => {
|
||||||
|
debug!(
|
||||||
|
"{}: failed to get compresson extension", path.display());
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let decompression_cmd = match DECOMPRESSION_COMMANDS.get(extension) {
|
||||||
|
Some(cmd) => cmd,
|
||||||
|
None => {
|
||||||
|
debug!(
|
||||||
|
"{}: failed to get decompression command", path.display());
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let cmd = process::Command::new(decompression_cmd.cmd)
|
||||||
|
.args(decompression_cmd.args)
|
||||||
|
.arg(path)
|
||||||
|
.stdout(Stdio::piped())
|
||||||
|
.stderr(Stdio::piped())
|
||||||
|
.spawn();
|
||||||
|
let child = match cmd {
|
||||||
|
Ok(process) => process,
|
||||||
|
Err(_) => {
|
||||||
|
debug!(
|
||||||
|
"{}: decompression command '{}' not found",
|
||||||
|
path.display(), decompression_cmd.cmd);
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Some(DecompressionReader::new(*decompression_cmd, child))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new(
|
||||||
|
cmd: DecompressionCommand,
|
||||||
|
child: process::Child,
|
||||||
|
) -> DecompressionReader {
|
||||||
|
DecompressionReader {
|
||||||
|
cmd: cmd,
|
||||||
|
child: child,
|
||||||
|
done: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn read_error(&mut self) -> io::Result<io::Error> {
|
||||||
|
let mut errbytes = vec![];
|
||||||
|
self.child.stderr.as_mut().unwrap().read_to_end(&mut errbytes)?;
|
||||||
|
let errstr = String::from_utf8_lossy(&errbytes);
|
||||||
|
let errstr = errstr.trim();
|
||||||
|
|
||||||
|
Ok(if errstr.is_empty() {
|
||||||
|
let msg = format!("decompression command failed: '{}'", self.cmd);
|
||||||
|
io::Error::new(io::ErrorKind::Other, msg)
|
||||||
|
} else {
|
||||||
|
let msg = format!(
|
||||||
|
"decompression command '{}' failed: {}", self.cmd, errstr);
|
||||||
|
io::Error::new(io::ErrorKind::Other, msg)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl io::Read for DecompressionReader {
|
||||||
|
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||||
|
if self.done {
|
||||||
|
return Ok(0);
|
||||||
|
}
|
||||||
|
let nread = self.child.stdout.as_mut().unwrap().read(buf)?;
|
||||||
|
if nread == 0 {
|
||||||
|
self.done = true;
|
||||||
|
// Reap the child now that we're done reading.
|
||||||
|
// If the command failed, report stderr as an error.
|
||||||
|
if !self.child.wait()?.success() {
|
||||||
|
return Err(self.read_error()?);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(nread)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if the given path contains a supported compression format or
|
||||||
|
/// is a TAR archive.
|
||||||
|
pub fn is_compressed(path: &Path) -> bool {
|
||||||
|
is_supported_compression_format(path) || is_tar_archive(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if the given path matches any one of the supported compression
|
||||||
|
/// formats
|
||||||
|
fn is_supported_compression_format(path: &Path) -> bool {
|
||||||
|
SUPPORTED_COMPRESSION_FORMATS.is_match(path)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if the given path matches any of the known TAR file formats.
|
||||||
|
fn is_tar_archive(path: &Path) -> bool {
|
||||||
|
TAR_ARCHIVE_FORMATS.is_match(path)
|
||||||
|
}
|
@ -4,6 +4,7 @@ extern crate bytecount;
|
|||||||
extern crate clap;
|
extern crate clap;
|
||||||
extern crate encoding_rs;
|
extern crate encoding_rs;
|
||||||
extern crate env_logger;
|
extern crate env_logger;
|
||||||
|
extern crate globset;
|
||||||
extern crate grep;
|
extern crate grep;
|
||||||
extern crate ignore;
|
extern crate ignore;
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
@ -44,6 +45,7 @@ macro_rules! eprintln {
|
|||||||
mod app;
|
mod app;
|
||||||
mod args;
|
mod args;
|
||||||
mod decoder;
|
mod decoder;
|
||||||
|
mod decompressor;
|
||||||
mod pathutil;
|
mod pathutil;
|
||||||
mod printer;
|
mod printer;
|
||||||
mod search_buffer;
|
mod search_buffer;
|
||||||
|
@ -9,6 +9,7 @@ use memmap::Mmap;
|
|||||||
use termcolor::WriteColor;
|
use termcolor::WriteColor;
|
||||||
|
|
||||||
use decoder::DecodeReader;
|
use decoder::DecodeReader;
|
||||||
|
use decompressor::{self, DecompressionReader};
|
||||||
use pathutil::strip_prefix;
|
use pathutil::strip_prefix;
|
||||||
use printer::Printer;
|
use printer::Printer;
|
||||||
use search_buffer::BufferSearcher;
|
use search_buffer::BufferSearcher;
|
||||||
@ -42,6 +43,7 @@ struct Options {
|
|||||||
no_messages: bool,
|
no_messages: bool,
|
||||||
quiet: bool,
|
quiet: bool,
|
||||||
text: bool,
|
text: bool,
|
||||||
|
search_zip_files: bool
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Options {
|
impl Default for Options {
|
||||||
@ -61,6 +63,7 @@ impl Default for Options {
|
|||||||
no_messages: false,
|
no_messages: false,
|
||||||
quiet: false,
|
quiet: false,
|
||||||
text: false,
|
text: false,
|
||||||
|
search_zip_files: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -190,6 +193,12 @@ impl WorkerBuilder {
|
|||||||
self.opts.text = yes;
|
self.opts.text = yes;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// If enabled, search through compressed files as well
|
||||||
|
pub fn search_zip_files(mut self, yes: bool) -> Self {
|
||||||
|
self.opts.search_zip_files = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Worker is responsible for executing searches on file paths, while choosing
|
/// Worker is responsible for executing searches on file paths, while choosing
|
||||||
@ -218,6 +227,16 @@ impl Worker {
|
|||||||
}
|
}
|
||||||
Work::DirEntry(dent) => {
|
Work::DirEntry(dent) => {
|
||||||
let mut path = dent.path();
|
let mut path = dent.path();
|
||||||
|
if self.opts.search_zip_files
|
||||||
|
&& decompressor::is_compressed(path)
|
||||||
|
{
|
||||||
|
match DecompressionReader::from_path(path) {
|
||||||
|
Some(reader) => self.search(printer, path, reader),
|
||||||
|
None => {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
let file = match File::open(path) {
|
let file = match File::open(path) {
|
||||||
Ok(file) => file,
|
Ok(file) => file,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
@ -236,6 +255,7 @@ impl Worker {
|
|||||||
self.search(printer, path, file)
|
self.search(printer, path, file)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
match result {
|
match result {
|
||||||
Ok(count) => {
|
Ok(count) => {
|
||||||
|
BIN
tests/data/sherlock.bz2
Normal file
BIN
tests/data/sherlock.bz2
Normal file
Binary file not shown.
BIN
tests/data/sherlock.gz
Normal file
BIN
tests/data/sherlock.gz
Normal file
Binary file not shown.
BIN
tests/data/sherlock.lzma
Normal file
BIN
tests/data/sherlock.lzma
Normal file
Binary file not shown.
BIN
tests/data/sherlock.xz
Normal file
BIN
tests/data/sherlock.xz
Normal file
Binary file not shown.
102
tests/tests.rs
102
tests/tests.rs
@ -75,6 +75,10 @@ fn sort_lines(lines: &str) -> String {
|
|||||||
format!("{}\n", lines.join("\n"))
|
format!("{}\n", lines.join("\n"))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn cmd_exists(name: &str) -> bool {
|
||||||
|
Command::new(name).arg("--help").output().is_ok()
|
||||||
|
}
|
||||||
|
|
||||||
sherlock!(single_file, |wd: WorkDir, mut cmd| {
|
sherlock!(single_file, |wd: WorkDir, mut cmd| {
|
||||||
let lines: String = wd.stdout(&mut cmd);
|
let lines: String = wd.stdout(&mut cmd);
|
||||||
let expected = "\
|
let expected = "\
|
||||||
@ -1609,6 +1613,104 @@ clean!(suggest_fixed_strings_for_invalid_regex, "foo(", ".",
|
|||||||
assert_eq!(err.contains("--fixed-strings"), true);
|
assert_eq!(err.contains("--fixed-strings"), true);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn compressed_gzip() {
|
||||||
|
if !cmd_exists("gzip") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let gzip_file = include_bytes!("./data/sherlock.gz");
|
||||||
|
|
||||||
|
let wd = WorkDir::new("feature_search_compressed");
|
||||||
|
wd.create_bytes("sherlock.gz", gzip_file);
|
||||||
|
|
||||||
|
let mut cmd = wd.command();
|
||||||
|
cmd.arg("-z").arg("Sherlock").arg("sherlock.gz");
|
||||||
|
let lines: String = wd.stdout(&mut cmd);
|
||||||
|
let expected = "\
|
||||||
|
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||||
|
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||||
|
";
|
||||||
|
assert_eq!(lines, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn compressed_bzip2() {
|
||||||
|
if !cmd_exists("bzip2") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let bzip2_file = include_bytes!("./data/sherlock.bz2");
|
||||||
|
|
||||||
|
let wd = WorkDir::new("feature_search_compressed");
|
||||||
|
wd.create_bytes("sherlock.bz2", bzip2_file);
|
||||||
|
|
||||||
|
let mut cmd = wd.command();
|
||||||
|
cmd.arg("-z").arg("Sherlock").arg("sherlock.bz2");
|
||||||
|
let lines: String = wd.stdout(&mut cmd);
|
||||||
|
let expected = "\
|
||||||
|
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||||
|
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||||
|
";
|
||||||
|
assert_eq!(lines, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn compressed_xz() {
|
||||||
|
if !cmd_exists("xz") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let xz_file = include_bytes!("./data/sherlock.xz");
|
||||||
|
|
||||||
|
let wd = WorkDir::new("feature_search_compressed");
|
||||||
|
wd.create_bytes("sherlock.xz", xz_file);
|
||||||
|
|
||||||
|
let mut cmd = wd.command();
|
||||||
|
cmd.arg("-z").arg("Sherlock").arg("sherlock.xz");
|
||||||
|
let lines: String = wd.stdout(&mut cmd);
|
||||||
|
let expected = "\
|
||||||
|
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||||
|
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||||
|
";
|
||||||
|
assert_eq!(lines, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn compressed_lzma() {
|
||||||
|
if !cmd_exists("xz") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let lzma_file = include_bytes!("./data/sherlock.lzma");
|
||||||
|
|
||||||
|
let wd = WorkDir::new("feature_search_compressed");
|
||||||
|
wd.create_bytes("sherlock.lzma", lzma_file);
|
||||||
|
|
||||||
|
let mut cmd = wd.command();
|
||||||
|
cmd.arg("-z").arg("Sherlock").arg("sherlock.lzma");
|
||||||
|
let lines: String = wd.stdout(&mut cmd);
|
||||||
|
let expected = "\
|
||||||
|
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||||
|
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||||
|
";
|
||||||
|
assert_eq!(lines, expected);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn compressed_failing_gzip() {
|
||||||
|
if !cmd_exists("gzip") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let wd = WorkDir::new("feature_search_compressed");
|
||||||
|
wd.create("sherlock.gz", hay::SHERLOCK);
|
||||||
|
|
||||||
|
let mut cmd = wd.command();
|
||||||
|
cmd.arg("-z").arg("Sherlock").arg("sherlock.gz");
|
||||||
|
|
||||||
|
wd.assert_non_empty_stderr(&mut cmd);
|
||||||
|
|
||||||
|
let output = cmd.output().unwrap();
|
||||||
|
let err = String::from_utf8_lossy(&output.stderr);
|
||||||
|
assert_eq!(err.contains("not in gzip format"), true);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn feature_740_passthru() {
|
fn feature_740_passthru() {
|
||||||
let wd = WorkDir::new("feature_740");
|
let wd = WorkDir::new("feature_740");
|
||||||
|
Loading…
x
Reference in New Issue
Block a user