diff --git a/CHANGELOG.md b/CHANGELOG.md index 226ca6a7..f757f550 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -46,6 +46,8 @@ Feature enhancements: The `--passthru` flag now works with the `--replace` flag. * FEATURE: Add `--line-buffered` and `--block-buffered` for forcing a buffer strategy. +* FEATURE: + Add `--pre-glob` for filtering files through the `--pre` flag. Bug fixes: diff --git a/complete/_rg b/complete/_rg index 2bbdf992..6f7b0ef8 100644 --- a/complete/_rg +++ b/complete/_rg @@ -183,6 +183,9 @@ _rg() { '(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e' $no'--no-pre[disable preprocessor utility]' + + pre-glob # Preprocessing glob options + '*--pre-glob[include/exclude files for preprocessing with --pre]' + + '(pretty-vimgrep)' # Pretty/vimgrep display options '(heading)'{-p,--pretty}'[alias for --color=always --heading -n]' '(heading passthru)--vimgrep[show results in vim-compatible format]' diff --git a/src/app.rs b/src/app.rs index e0e1eda7..03239277 100644 --- a/src/app.rs +++ b/src/app.rs @@ -598,6 +598,7 @@ pub fn all_args_and_flags() -> Vec { flag_passthru(&mut args); flag_pcre2(&mut args); flag_pre(&mut args); + flag_pre_glob(&mut args); flag_pretty(&mut args); flag_quiet(&mut args); flag_regex_size_limit(&mut args); @@ -1819,6 +1820,97 @@ This flag can be disabled with --no-pcre2. args.push(arg); } +fn flag_pre(args: &mut Vec) { + const SHORT: &str = "search outputs of COMMAND FILE for each FILE"; + const LONG: &str = long!("\ +For each input FILE, search the standard output of COMMAND FILE rather than the +contents of FILE. This option expects the COMMAND program to either be an +absolute path or to be available in your PATH. Either an empty string COMMAND +or the `--no-pre` flag will disable this behavior. + + WARNING: When this flag is set, ripgrep will unconditionally spawn a + process for every file that is searched. Therefore, this can incur an + unnecessarily large performance penalty if you don't otherwise need the + flexibility offered by this flag. + +A preprocessor is not run when ripgrep is searching stdin. + +When searching over sets of files that may require one of several decoders +as preprocessors, COMMAND should be a wrapper program or script which first +classifies FILE based on magic numbers/content or based on the FILE name and +then dispatches to an appropriate preprocessor. Each COMMAND also has its +standard input connected to FILE for convenience. + +For example, a shell script for COMMAND might look like: + + case \"$1\" in + *.pdf) + exec pdftotext \"$1\" - + ;; + *) + case $(file \"$1\") in + *Zstandard*) + exec pzstd -cdq + ;; + *) + exec cat + ;; + esac + ;; + esac + +The above script uses `pdftotext` to convert a PDF file to plain text. For +all other files, the script uses the `file` utility to sniff the type of the +file based on its contents. If it is a compressed file in the Zstandard format, +then `pzstd` is used to decompress the contents to stdout. + +This overrides the -z/--search-zip flag. +"); + let arg = RGArg::flag("pre", "COMMAND") + .help(SHORT).long_help(LONG) + .overrides("no-pre") + .overrides("search-zip"); + args.push(arg); + + let arg = RGArg::switch("no-pre") + .hidden() + .overrides("pre"); + args.push(arg); +} + +fn flag_pre_glob(args: &mut Vec) { + const SHORT: &str = + "Include or exclude files from a preprocessing command."; + const LONG: &str = long!("\ +This flag works in conjunction with the --pre flag. Namely, when one or more +--pre-glob flags are given, then only files that match the given set of globs +will be handed to the command specified by the --pre flag. Any non-matching +files will be searched without using the preprocessor command. + +This flag is useful when searching many files with the --pre flag. Namely, +it permits the ability to avoid process overhead for files that don't need +preprocessing. For example, given the following shell script, 'pre-pdftotext': + + #!/bin/sh + + pdftotext \"$1\" - + +then it is possible to use '--pre pre-pdftotext --pre-glob \'*.pdf\'' to make +it so ripgrep only executes the 'pre-pdftotext' command on files with a '.pdf' +extension. + +Multiple --pre-glob flags may be used. Globbing rules match .gitignore globs. +Precede a glob with a ! to exclude it. + +This flag has no effect if the --pre flag is not used. +"); + let arg = RGArg::flag("pre-glob", "GLOB") + .help(SHORT).long_help(LONG) + .multiple() + .allow_leading_hyphen(); + args.push(arg); +} + fn flag_pretty(args: &mut Vec) { const SHORT: &str = "Alias for --color always --heading --line-number."; const LONG: &str = long!("\ @@ -1924,64 +2016,6 @@ This flag can be disabled with --no-search-zip. args.push(arg); } -fn flag_pre(args: &mut Vec) { - const SHORT: &str = "search outputs of COMMAND FILE for each FILE"; - const LONG: &str = long!("\ -For each input FILE, search the standard output of COMMAND FILE rather than the -contents of FILE. This option expects the COMMAND program to either be an -absolute path or to be available in your PATH. Either an empty string COMMAND -or the `--no-pre` flag will disable this behavior. - - WARNING: When this flag is set, ripgrep will unconditionally spawn a - process for every file that is searched. Therefore, this can incur an - unnecessarily large performance penalty if you don't otherwise need the - flexibility offered by this flag. - -A preprocessor is not run when ripgrep is searching stdin. - -When searching over sets of files that may require one of several decoders -as preprocessors, COMMAND should be a wrapper program or script which first -classifies FILE based on magic numbers/content or based on the FILE name and -then dispatches to an appropriate preprocessor. Each COMMAND also has its -standard input connected to FILE for convenience. - -For example, a shell script for COMMAND might look like: - - case \"$1\" in - *.pdf) - exec pdftotext \"$1\" - - ;; - *) - case $(file \"$1\") in - *Zstandard*) - exec pzstd -cdq - ;; - *) - exec cat - ;; - esac - ;; - esac - -The above script uses `pdftotext` to convert a PDF file to plain text. For -all other files, the script uses the `file` utility to sniff the type of the -file based on its contents. If it is a compressed file in the Zstandard format, -then `pzstd` is used to decompress the contents to stdout. - -This overrides the -z/--search-zip flag. -"); - let arg = RGArg::flag("pre", "COMMAND") - .help(SHORT).long_help(LONG) - .overrides("no-pre") - .overrides("search-zip"); - args.push(arg); - - let arg = RGArg::switch("no-pre") - .hidden() - .overrides("pre"); - args.push(arg); -} - fn flag_smart_case(args: &mut Vec) { const SHORT: &str = "Smart case search."; const LONG: &str = long!("\ diff --git a/src/args.rs b/src/args.rs index f8a29cae..1a38d3ef 100644 --- a/src/args.rs +++ b/src/args.rs @@ -285,6 +285,7 @@ impl Args { builder .json_stats(self.matches().is_present("json")) .preprocessor(self.matches().preprocessor()) + .preprocessor_globs(self.matches().preprocessor_globs()?) .search_zip(self.matches().is_present("search-zip")); Ok(builder.build(matcher, searcher, printer)) } @@ -1323,6 +1324,17 @@ impl ArgMatches { Some(Path::new(path).to_path_buf()) } + /// Builds the set of globs for filtering files to apply to the --pre + /// flag. If no --pre-globs are available, then this always returns an + /// empty set of globs. + fn preprocessor_globs(&self) -> Result { + let mut builder = OverrideBuilder::new(env::current_dir()?); + for glob in self.values_of_lossy_vec("pre-glob") { + builder.add(&glob)?; + } + Ok(builder.build()?) + } + /// Parse the regex-size-limit argument option into a byte count. fn regex_size_limit(&self) -> Result> { let r = self.parse_human_readable_size("regex-size-limit")?; diff --git a/src/search.rs b/src/search.rs index 457f8f7a..9baf513f 100644 --- a/src/search.rs +++ b/src/search.rs @@ -11,6 +11,7 @@ use grep::pcre2::{RegexMatcher as PCRE2RegexMatcher}; use grep::printer::{JSON, Standard, Summary, Stats}; use grep::regex::{RegexMatcher as RustRegexMatcher}; use grep::searcher::Searcher; +use ignore::overrides::Override; use serde_json as json; use termcolor::WriteColor; @@ -23,6 +24,7 @@ use subject::Subject; struct Config { json_stats: bool, preprocessor: Option, + preprocessor_globs: Override, search_zip: bool, } @@ -31,6 +33,7 @@ impl Default for Config { Config { json_stats: false, preprocessor: None, + preprocessor_globs: Override::empty(), search_zip: false, } } @@ -108,6 +111,17 @@ impl SearchWorkerBuilder { self } + /// Set the globs for determining which files should be run through the + /// preprocessor. By default, with no globs and a preprocessor specified, + /// every file is run through the preprocessor. + pub fn preprocessor_globs( + &mut self, + globs: Override, + ) -> &mut SearchWorkerBuilder { + self.config.preprocessor_globs = globs; + self + } + /// Enable the decompression and searching of common compressed files. /// /// When enabled, if a particular file path is recognized as a compressed @@ -298,7 +312,7 @@ impl SearchWorker { let stdin = io::stdin(); // A `return` here appeases the borrow checker. NLL will fix this. return self.search_reader(path, stdin.lock()); - } else if self.config.preprocessor.is_some() { + } else if self.should_preprocess(path) { self.search_preprocessor(path) } else if self.should_decompress(path) { self.search_decompress(path) @@ -316,6 +330,20 @@ impl SearchWorker { self.decomp_builder.get_matcher().has_command(path) } + /// Returns true if and only if the given file path should be run through + /// the preprocessor. + fn should_preprocess(&self, path: &Path) -> bool { + if !self.config.preprocessor.is_some() { + return false; + } + if self.config.preprocessor_globs.is_empty() { + return true; + } + !self.config.preprocessor_globs.matched(path, false).is_ignore() + } + + /// Search the given file path by first asking the preprocessor for the + /// data to search instead of opening the path directly. fn search_preprocessor( &mut self, path: &Path, @@ -333,6 +361,9 @@ impl SearchWorker { }) } + /// Attempt to decompress the data at the given file path and search the + /// result. If the given file path isn't recognized as a compressed file, + /// then search it without doing any decompression. fn search_decompress( &mut self, path: &Path, diff --git a/tests/misc.rs b/tests/misc.rs index 62226ceb..9b5a7a75 100644 --- a/tests/misc.rs +++ b/tests/misc.rs @@ -816,6 +816,24 @@ be, to a very large extent, the result of luck. Sherlock Holmes eqnice!(expected, cmd.stdout()); }); +rgtest!(preprocessing_glob, |dir: Dir, mut cmd: TestCommand| { + if !cmd_exists("xzcat") { + return; + } + + dir.create("sherlock", SHERLOCK); + dir.create_bytes("sherlock.xz", include_bytes!("./data/sherlock.xz")); + cmd.args(&["--pre", "xzcat", "--pre-glob", "*.xz", "Sherlock"]); + + let expected = "\ +sherlock.xz:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock.xz:be, to a very large extent, the result of luck. Sherlock Holmes +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +"; + eqnice!(sort_lines(expected), sort_lines(&cmd.stdout())); +}); + rgtest!(compressed_gzip, |dir: Dir, mut cmd: TestCommand| { if !cmd_exists("gzip") { return;