diff --git a/crates/cli/src/decompress.rs b/crates/cli/src/decompress.rs index 94e118b1..813cca6c 100644 --- a/crates/cli/src/decompress.rs +++ b/crates/cli/src/decompress.rs @@ -1,7 +1,7 @@ use std::ffi::{OsStr, OsString}; use std::fs::File; use std::io; -use std::path::Path; +use std::path::{Path, PathBuf}; use std::process::Command; use globset::{Glob, GlobSet, GlobSetBuilder}; @@ -24,7 +24,7 @@ struct DecompressionCommand { /// The glob that matches this command. glob: String, /// The command or binary name. - bin: OsString, + bin: PathBuf, /// The arguments to invoke with the command. args: Vec, } @@ -83,23 +83,60 @@ impl DecompressionMatcherBuilder { /// /// The syntax for the glob is documented in the /// [`globset` crate](https://docs.rs/globset/#syntax). + /// + /// The `program` given is resolved with respect to `PATH` and turned + /// into an absolute path internally before being executed by the current + /// platform. Notably, on Windows, this avoids a security problem where + /// passing a relative path to `CreateProcess` will automatically search + /// the current directory for a matching program. If the program could + /// not be resolved, then it is silently ignored and the association is + /// dropped. For this reason, callers should prefer `try_associate`. pub fn associate( &mut self, glob: &str, program: P, args: I, ) -> &mut DecompressionMatcherBuilder + where + P: AsRef, + I: IntoIterator, + A: AsRef, + { + let _ = self.try_associate(glob, program, args); + self + } + + /// Associates a glob with a command to decompress files matching the glob. + /// + /// If multiple globs match the same file, then the most recently added + /// glob takes precedence. + /// + /// The syntax for the glob is documented in the + /// [`globset` crate](https://docs.rs/globset/#syntax). + /// + /// The `program` given is resolved with respect to `PATH` and turned + /// into an absolute path internally before being executed by the current + /// platform. Notably, on Windows, this avoids a security problem where + /// passing a relative path to `CreateProcess` will automatically search + /// the current directory for a matching program. If the program could not + /// be resolved, then an error is returned. + pub fn try_associate( + &mut self, + glob: &str, + program: P, + args: I, + ) -> Result<&mut DecompressionMatcherBuilder, CommandError> where P: AsRef, I: IntoIterator, A: AsRef, { let glob = glob.to_string(); - let bin = program.as_ref().to_os_string(); + let bin = resolve_binary(Path::new(program.as_ref()))?; let args = args.into_iter().map(|a| a.as_ref().to_os_string()).collect(); self.commands.push(DecompressionCommand { glob, bin, args }); - self + Ok(self) } } @@ -340,6 +377,70 @@ impl io::Read for DecompressionReader { } } +/// Resolves a path to a program to a path by searching for the program in +/// `PATH`. +/// +/// If the program could not be resolved, then an error is returned. +/// +/// The purpose of doing this instead of passing the path to the program +/// directly to Command::new is that Command::new will hand relative paths +/// to CreateProcess on Windows, which will implicitly search the current +/// working directory for the executable. This could be undesirable for +/// security reasons. e.g., running ripgrep with the -z/--search-zip flag on an +/// untrusted directory tree could result in arbitrary programs executing on +/// Windows. +/// +/// Note that this could still return a relative path if PATH contains a +/// relative path. We permit this since it is assumed that the user has set +/// this explicitly, and thus, desires this behavior. +/// +/// On non-Windows, this is a no-op. +pub fn resolve_binary>( + prog: P, +) -> Result { + use std::env; + + fn is_exe(path: &Path) -> bool { + let md = match path.metadata() { + Err(_) => return false, + Ok(md) => md, + }; + !md.is_dir() + } + + let prog = prog.as_ref(); + if !cfg!(windows) || prog.is_absolute() { + return Ok(prog.to_path_buf()); + } + let syspaths = match env::var_os("PATH") { + Some(syspaths) => syspaths, + None => { + let msg = "system PATH environment variable not found"; + return Err(CommandError::io(io::Error::new( + io::ErrorKind::Other, + msg, + ))); + } + }; + for syspath in env::split_paths(&syspaths) { + if syspath.as_os_str().is_empty() { + continue; + } + let abs_prog = syspath.join(prog); + if is_exe(&abs_prog) { + return Ok(abs_prog.to_path_buf()); + } + if abs_prog.extension().is_none() { + let abs_prog = abs_prog.with_extension("exe"); + if is_exe(&abs_prog) { + return Ok(abs_prog.to_path_buf()); + } + } + } + let msg = format!("{}: could not find executable in PATH", prog.display()); + return Err(CommandError::io(io::Error::new(io::ErrorKind::Other, msg))); +} + fn default_decompression_commands() -> Vec { const ARGS_GZIP: &[&str] = &["gzip", "-d", "-c"]; const ARGS_BZIP: &[&str] = &["bzip2", "-d", "-c"]; @@ -350,29 +451,36 @@ fn default_decompression_commands() -> Vec { const ARGS_ZSTD: &[&str] = &["zstd", "-q", "-d", "-c"]; const ARGS_UNCOMPRESS: &[&str] = &["uncompress", "-c"]; - fn cmd(glob: &str, args: &[&str]) -> DecompressionCommand { - DecompressionCommand { + fn add(glob: &str, args: &[&str], cmds: &mut Vec) { + let bin = match resolve_binary(Path::new(args[0])) { + Ok(bin) => bin, + Err(err) => { + debug!("{}", err); + return; + } + }; + cmds.push(DecompressionCommand { glob: glob.to_string(), - bin: OsStr::new(&args[0]).to_os_string(), + bin, args: args .iter() .skip(1) .map(|s| OsStr::new(s).to_os_string()) .collect(), - } + }); } - vec![ - cmd("*.gz", ARGS_GZIP), - cmd("*.tgz", ARGS_GZIP), - cmd("*.bz2", ARGS_BZIP), - cmd("*.tbz2", ARGS_BZIP), - cmd("*.xz", ARGS_XZ), - cmd("*.txz", ARGS_XZ), - cmd("*.lz4", ARGS_LZ4), - cmd("*.lzma", ARGS_LZMA), - cmd("*.br", ARGS_BROTLI), - cmd("*.zst", ARGS_ZSTD), - cmd("*.zstd", ARGS_ZSTD), - cmd("*.Z", ARGS_UNCOMPRESS), - ] + let mut cmds = vec![]; + add("*.gz", ARGS_GZIP, &mut cmds); + add("*.tgz", ARGS_GZIP, &mut cmds); + add("*.bz2", ARGS_BZIP, &mut cmds); + add("*.tbz2", ARGS_BZIP, &mut cmds); + add("*.xz", ARGS_XZ, &mut cmds); + add("*.txz", ARGS_XZ, &mut cmds); + add("*.lz4", ARGS_LZ4, &mut cmds); + add("*.lzma", ARGS_LZMA, &mut cmds); + add("*.br", ARGS_BROTLI, &mut cmds); + add("*.zst", ARGS_ZSTD, &mut cmds); + add("*.zstd", ARGS_ZSTD, &mut cmds); + add("*.Z", ARGS_UNCOMPRESS, &mut cmds); + cmds } diff --git a/crates/cli/src/lib.rs b/crates/cli/src/lib.rs index 9fe1cf3c..5453ccce 100644 --- a/crates/cli/src/lib.rs +++ b/crates/cli/src/lib.rs @@ -179,8 +179,8 @@ mod process; mod wtr; pub use decompress::{ - DecompressionMatcher, DecompressionMatcherBuilder, DecompressionReader, - DecompressionReaderBuilder, + resolve_binary, DecompressionMatcher, DecompressionMatcherBuilder, + DecompressionReader, DecompressionReaderBuilder, }; pub use escape::{escape, escape_os, unescape, unescape_os}; pub use human::{parse_human_readable_size, ParseSizeError}; diff --git a/crates/core/args.rs b/crates/core/args.rs index e889f7d2..9c490e4e 100644 --- a/crates/core/args.rs +++ b/crates/core/args.rs @@ -290,7 +290,7 @@ impl Args { let mut builder = SearchWorkerBuilder::new(); builder .json_stats(matches.is_present("json")) - .preprocessor(matches.preprocessor()) + .preprocessor(matches.preprocessor())? .preprocessor_globs(matches.preprocessor_globs()?) .search_zip(matches.is_present("search-zip")) .binary_detection_implicit(matches.binary_detection_implicit()) diff --git a/crates/core/search.rs b/crates/core/search.rs index 4da4057b..c57e9ee6 100644 --- a/crates/core/search.rs +++ b/crates/core/search.rs @@ -115,9 +115,14 @@ impl SearchWorkerBuilder { pub fn preprocessor( &mut self, cmd: Option, - ) -> &mut SearchWorkerBuilder { - self.config.preprocessor = cmd; - self + ) -> crate::Result<&mut SearchWorkerBuilder> { + if let Some(ref prog) = cmd { + let bin = cli::resolve_binary(prog)?; + self.config.preprocessor = Some(bin); + } else { + self.config.preprocessor = None; + } + Ok(self) } /// Set the globs for determining which files should be run through the