mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-08-16 12:43:49 -07:00
Lots of progress:
- Refactored interaction between CLI args and rest of xrep. - Filling in a lot more options, including file type filtering. - Fixing some bugs in globbing/ignoring. - More documentation.
This commit is contained in:
551
src/args.rs
Normal file
551
src/args.rs
Normal file
@@ -0,0 +1,551 @@
|
||||
use std::cmp;
|
||||
use std::env;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use docopt::Docopt;
|
||||
use env_logger;
|
||||
use grep::{Grep, GrepBuilder};
|
||||
use log;
|
||||
use num_cpus;
|
||||
use regex;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use gitignore::{Gitignore, GitignoreBuilder};
|
||||
use ignore::Ignore;
|
||||
use out::Out;
|
||||
use printer::Printer;
|
||||
use search::{InputBuffer, Searcher};
|
||||
use types::{FileTypeDef, Types, TypesBuilder};
|
||||
use walk;
|
||||
|
||||
use Result;
|
||||
|
||||
/// The Docopt usage string.
|
||||
///
|
||||
/// If you've never heard of Docopt before, see: http://docopt.org
|
||||
/// (TL;DR: The CLI parser is generated from the usage string below.)
|
||||
const USAGE: &'static str = "
|
||||
Usage: xrep [options] <pattern> [<path> ...]
|
||||
xrep [options] --files [<path> ...]
|
||||
xrep [options] --type-list
|
||||
xrep --help
|
||||
xrep --version
|
||||
|
||||
xrep is like the silver searcher and grep, but faster than both.
|
||||
|
||||
Common options:
|
||||
-a, --text Search binary files as if they were text.
|
||||
-c, --count Only show count of line matches for each file.
|
||||
-g, --glob GLOB ... Include or exclude files for searching that
|
||||
match the given glob. This always overrides any
|
||||
other ignore logic. Multiple glob flags may be
|
||||
used. Globbing rules match .gitignore globs.
|
||||
Precede a glob with a '!' to exclude it.
|
||||
-h, --help Show this usage message.
|
||||
-i, --ignore-case Case insensitive search.
|
||||
-n, --line-number Show line numbers (1-based).
|
||||
-q, --quiet Do not print anything to stdout.
|
||||
-t, --type TYPE ... Only search files matching TYPE. Multiple type
|
||||
flags may be provided. Use the --type-list flag
|
||||
to list all available types.
|
||||
-T, --type-not TYPE ... Do not search files matching TYPE. Multiple
|
||||
not-type flags may be provided.
|
||||
-v, --invert-match Invert matching.
|
||||
-w, --word-regexp Only show matches surrounded by word boundaries.
|
||||
This is equivalent to putting \\b before and
|
||||
after the search pattern.
|
||||
|
||||
Less common options:
|
||||
-A, --after-context NUM
|
||||
Show NUM lines after each match.
|
||||
|
||||
-B, --before-context NUM
|
||||
Show NUM lines before each match.
|
||||
|
||||
-C, --context NUM
|
||||
Show NUM lines before and after each match.
|
||||
|
||||
--context-separator ARG
|
||||
The string to use when separating non-continuous context lines. Escape
|
||||
sequences may be used. [default: --]
|
||||
|
||||
--debug
|
||||
Show debug messages.
|
||||
|
||||
--files
|
||||
Print each file that would be searched (but don't search).
|
||||
|
||||
-H, --with-filename
|
||||
Prefix each match with the file name that contains it. This is the
|
||||
default when more than one file is searched.
|
||||
|
||||
--hidden
|
||||
Search hidden directories and files.
|
||||
|
||||
-L, --follow
|
||||
Follow symlinks.
|
||||
|
||||
--line-terminator ARG
|
||||
The byte to use for a line terminator. Escape sequences may be used.
|
||||
[default: \\n]
|
||||
|
||||
--no-ignore
|
||||
Don't respect ignore files (.gitignore, .xrepignore, etc.)
|
||||
|
||||
-Q, --literal
|
||||
Treat the pattern as a literal string instead of a regular expression.
|
||||
|
||||
--threads ARG
|
||||
The number of threads to use. Defaults to the number of logical CPUs
|
||||
(capped at 6). [default: 0]
|
||||
|
||||
--version
|
||||
Show the version number of xrep and exit.
|
||||
|
||||
File type management options:
|
||||
--type-list
|
||||
Show all supported file types and their associated globs.
|
||||
|
||||
--type-add ARG ...
|
||||
Add a new glob for a particular file type.
|
||||
Example: --type-add html:*.html,*.htm
|
||||
|
||||
--type-clear TYPE ...
|
||||
Clear the file type globs for TYPE.
|
||||
";
|
||||
|
||||
/// RawArgs are the args as they are parsed from Docopt. They aren't used
|
||||
/// directly by the rest of xrep.
|
||||
#[derive(Debug, RustcDecodable)]
|
||||
pub struct RawArgs {
|
||||
arg_pattern: String,
|
||||
arg_path: Vec<String>,
|
||||
flag_after_context: usize,
|
||||
flag_before_context: usize,
|
||||
flag_context: usize,
|
||||
flag_context_separator: String,
|
||||
flag_count: bool,
|
||||
flag_debug: bool,
|
||||
flag_files: bool,
|
||||
flag_follow: bool,
|
||||
flag_glob: Vec<String>,
|
||||
flag_hidden: bool,
|
||||
flag_ignore_case: bool,
|
||||
flag_invert_match: bool,
|
||||
flag_line_number: bool,
|
||||
flag_line_terminator: String,
|
||||
flag_literal: bool,
|
||||
flag_no_ignore: bool,
|
||||
flag_quiet: bool,
|
||||
flag_text: bool,
|
||||
flag_threads: usize,
|
||||
flag_type: Vec<String>,
|
||||
flag_type_not: Vec<String>,
|
||||
flag_type_list: bool,
|
||||
flag_type_add: Vec<String>,
|
||||
flag_type_clear: Vec<String>,
|
||||
flag_with_filename: bool,
|
||||
flag_word_regexp: bool,
|
||||
}
|
||||
|
||||
/// Args are transformed/normalized from RawArgs.
|
||||
#[derive(Debug)]
|
||||
pub struct Args {
|
||||
pattern: String,
|
||||
paths: Vec<PathBuf>,
|
||||
after_context: usize,
|
||||
before_context: usize,
|
||||
context_separator: Vec<u8>,
|
||||
count: bool,
|
||||
eol: u8,
|
||||
files: bool,
|
||||
follow: bool,
|
||||
glob_overrides: Option<Gitignore>,
|
||||
hidden: bool,
|
||||
ignore_case: bool,
|
||||
invert_match: bool,
|
||||
line_number: bool,
|
||||
no_ignore: bool,
|
||||
quiet: bool,
|
||||
text: bool,
|
||||
threads: usize,
|
||||
type_defs: Vec<FileTypeDef>,
|
||||
type_list: bool,
|
||||
types: Types,
|
||||
with_filename: bool,
|
||||
}
|
||||
|
||||
impl RawArgs {
|
||||
/// Convert arguments parsed into a configuration used by xrep.
|
||||
fn to_args(&self) -> Result<Args> {
|
||||
let pattern = {
|
||||
let pattern =
|
||||
if self.flag_literal {
|
||||
regex::quote(&self.arg_pattern)
|
||||
} else {
|
||||
self.arg_pattern.clone()
|
||||
};
|
||||
if self.flag_word_regexp {
|
||||
format!(r"\b{}\b", pattern)
|
||||
} else {
|
||||
pattern
|
||||
}
|
||||
};
|
||||
let paths =
|
||||
if self.arg_path.is_empty() {
|
||||
vec![Path::new("./").to_path_buf()]
|
||||
} else {
|
||||
self.arg_path.iter().map(|p| {
|
||||
Path::new(p).to_path_buf()
|
||||
}).collect()
|
||||
};
|
||||
let (after_context, before_context) =
|
||||
if self.flag_context > 0 {
|
||||
(self.flag_context, self.flag_context)
|
||||
} else {
|
||||
(self.flag_after_context, self.flag_before_context)
|
||||
};
|
||||
let eol = {
|
||||
let eol = unescape(&self.flag_line_terminator);
|
||||
if eol.is_empty() {
|
||||
errored!("Empty line terminator is not allowed.");
|
||||
} else if eol.len() > 1 {
|
||||
errored!("Line terminators are limited to exactly 1 byte.");
|
||||
}
|
||||
eol[0]
|
||||
};
|
||||
let glob_overrides =
|
||||
if self.flag_glob.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let cwd = try!(env::current_dir());
|
||||
let mut bgi = GitignoreBuilder::new(cwd);
|
||||
for pat in &self.flag_glob {
|
||||
try!(bgi.add("<argv>", pat));
|
||||
}
|
||||
Some(try!(bgi.build()))
|
||||
};
|
||||
let threads =
|
||||
if self.flag_threads == 0 {
|
||||
cmp::min(6, num_cpus::get())
|
||||
} else {
|
||||
self.flag_threads
|
||||
};
|
||||
let mut with_filename = self.flag_with_filename;
|
||||
if !with_filename {
|
||||
with_filename = paths.len() > 1 || paths[0].is_dir();
|
||||
}
|
||||
let mut btypes = TypesBuilder::new();
|
||||
btypes.add_defaults();
|
||||
try!(self.add_types(&mut btypes));
|
||||
let types = try!(btypes.build());
|
||||
Ok(Args {
|
||||
pattern: pattern,
|
||||
paths: paths,
|
||||
after_context: after_context,
|
||||
before_context: before_context,
|
||||
context_separator: unescape(&self.flag_context_separator),
|
||||
count: self.flag_count,
|
||||
eol: eol,
|
||||
files: self.flag_files,
|
||||
follow: self.flag_follow,
|
||||
glob_overrides: glob_overrides,
|
||||
hidden: self.flag_hidden,
|
||||
ignore_case: self.flag_ignore_case,
|
||||
invert_match: self.flag_invert_match,
|
||||
line_number: self.flag_line_number,
|
||||
no_ignore: self.flag_no_ignore,
|
||||
quiet: self.flag_quiet,
|
||||
text: self.flag_text,
|
||||
threads: threads,
|
||||
type_defs: btypes.definitions(),
|
||||
type_list: self.flag_type_list,
|
||||
types: types,
|
||||
with_filename: with_filename,
|
||||
})
|
||||
}
|
||||
|
||||
fn add_types(&self, types: &mut TypesBuilder) -> Result<()> {
|
||||
for ty in &self.flag_type_clear {
|
||||
types.clear(ty);
|
||||
}
|
||||
for def in &self.flag_type_add {
|
||||
try!(types.add_def(def));
|
||||
}
|
||||
for ty in &self.flag_type {
|
||||
types.select(ty);
|
||||
}
|
||||
for ty in &self.flag_type_not {
|
||||
types.select_not(ty);
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl Args {
|
||||
/// Parse the command line arguments for this process.
|
||||
///
|
||||
/// If a CLI usage error occurred, then exit the process and print a usage
|
||||
/// or error message. Similarly, if the user requested the version of
|
||||
/// xrep, then print the version and exit.
|
||||
///
|
||||
/// Also, initialize a global logger.
|
||||
pub fn parse() -> Result<Args> {
|
||||
let raw: RawArgs =
|
||||
Docopt::new(USAGE)
|
||||
.and_then(|d| d.version(Some(version())).decode())
|
||||
.unwrap_or_else(|e| e.exit());
|
||||
|
||||
let mut logb = env_logger::LogBuilder::new();
|
||||
if raw.flag_debug {
|
||||
logb.filter(None, log::LogLevelFilter::Debug);
|
||||
} else {
|
||||
logb.filter(None, log::LogLevelFilter::Warn);
|
||||
}
|
||||
if let Err(err) = logb.init() {
|
||||
errored!("failed to initialize logger: {}", err);
|
||||
}
|
||||
|
||||
raw.to_args().map_err(From::from)
|
||||
}
|
||||
|
||||
/// Returns true if xrep should print the files it will search and exit
|
||||
/// (but not do any actual searching).
|
||||
pub fn files(&self) -> bool {
|
||||
self.files
|
||||
}
|
||||
|
||||
/// Create a new line based matcher. The matcher returned can be used
|
||||
/// across multiple threads simultaneously. This matcher only supports
|
||||
/// basic searching of regular expressions in a single buffer.
|
||||
///
|
||||
/// The pattern and other flags are taken from the command line.
|
||||
pub fn grep(&self) -> Result<Grep> {
|
||||
GrepBuilder::new(&self.pattern)
|
||||
.case_insensitive(self.ignore_case)
|
||||
.line_terminator(self.eol)
|
||||
.build()
|
||||
.map_err(From::from)
|
||||
}
|
||||
|
||||
/// Creates a new input buffer that is used in searching.
|
||||
pub fn input_buffer(&self) -> InputBuffer {
|
||||
let mut inp = InputBuffer::new();
|
||||
inp.eol(self.eol);
|
||||
inp
|
||||
}
|
||||
|
||||
/// Create a new printer of individual search results that writes to the
|
||||
/// writer given.
|
||||
pub fn printer<W: io::Write>(&self, wtr: W) -> Printer<W> {
|
||||
Printer::new(wtr)
|
||||
.context_separator(self.context_separator.clone())
|
||||
.eol(self.eol)
|
||||
.quiet(self.quiet)
|
||||
.with_filename(self.with_filename)
|
||||
}
|
||||
|
||||
/// Create a new printer of search results for an entire file that writes
|
||||
/// to the writer given.
|
||||
pub fn out<W: io::Write>(&self, wtr: W) -> Out<W> {
|
||||
let mut out = Out::new(wtr);
|
||||
if self.before_context > 0 || self.after_context > 0 {
|
||||
out = out.file_separator(self.context_separator.clone());
|
||||
}
|
||||
out
|
||||
}
|
||||
|
||||
/// Return the paths that should be searched.
|
||||
pub fn paths(&self) -> &[PathBuf] {
|
||||
&self.paths
|
||||
}
|
||||
|
||||
/// Create a new line based searcher whose configuration is taken from the
|
||||
/// command line. This searcher supports a dizzying array of features:
|
||||
/// inverted matching, line counting, context control and more.
|
||||
pub fn searcher<'a, R: io::Read, W: io::Write>(
|
||||
&self,
|
||||
inp: &'a mut InputBuffer,
|
||||
printer: &'a mut Printer<W>,
|
||||
grep: &'a Grep,
|
||||
path: &'a Path,
|
||||
rdr: R,
|
||||
) -> Searcher<'a, R, W> {
|
||||
Searcher::new(inp, printer, grep, path, rdr)
|
||||
.after_context(self.after_context)
|
||||
.before_context(self.before_context)
|
||||
.count(self.count)
|
||||
.eol(self.eol)
|
||||
.line_number(self.line_number)
|
||||
.invert_match(self.invert_match)
|
||||
.text(self.text)
|
||||
}
|
||||
|
||||
/// Returns the number of worker search threads that should be used.
|
||||
pub fn threads(&self) -> usize {
|
||||
self.threads
|
||||
}
|
||||
|
||||
/// Returns a list of type definitions currently loaded.
|
||||
pub fn type_defs(&self) -> &[FileTypeDef] {
|
||||
&self.type_defs
|
||||
}
|
||||
|
||||
/// Returns true if xrep should print the type definitions currently loaded
|
||||
/// and then exit.
|
||||
pub fn type_list(&self) -> bool {
|
||||
self.type_list
|
||||
}
|
||||
|
||||
/// Create a new recursive directory iterator at the path given.
|
||||
pub fn walker(&self, path: &Path) -> walk::Iter {
|
||||
let wd = WalkDir::new(path).follow_links(self.follow);
|
||||
let mut ig = Ignore::new();
|
||||
ig.ignore_hidden(!self.hidden);
|
||||
ig.no_ignore(self.no_ignore);
|
||||
ig.add_types(self.types.clone());
|
||||
if let Some(ref overrides) = self.glob_overrides {
|
||||
ig.add_override(overrides.clone());
|
||||
}
|
||||
walk::Iter::new(ig, wd)
|
||||
}
|
||||
}
|
||||
|
||||
fn version() -> String {
|
||||
let (maj, min, pat) = (
|
||||
option_env!("CARGO_PKG_VERSION_MAJOR"),
|
||||
option_env!("CARGO_PKG_VERSION_MINOR"),
|
||||
option_env!("CARGO_PKG_VERSION_PATCH"),
|
||||
);
|
||||
match (maj, min, pat) {
|
||||
(Some(maj), Some(min), Some(pat)) =>
|
||||
format!("{}.{}.{}", maj, min, pat),
|
||||
_ => "".to_owned(),
|
||||
}
|
||||
}
|
||||
|
||||
/// A single state in the state machine used by `unescape`.
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
enum State {
|
||||
Escape,
|
||||
HexFirst,
|
||||
HexSecond(char),
|
||||
Literal,
|
||||
}
|
||||
|
||||
/// Unescapes a string given on the command line. It supports a limit set of
|
||||
/// escape sequences:
|
||||
///
|
||||
/// * \t, \r and \n are mapped to their corresponding ASCII bytes.
|
||||
/// * \xZZ hexadecimal escapes are mapped to their byte.
|
||||
fn unescape(s: &str) -> Vec<u8> {
|
||||
use self::State::*;
|
||||
|
||||
let mut bytes = vec![];
|
||||
let mut state = Literal;
|
||||
for c in s.chars() {
|
||||
match state {
|
||||
Escape => {
|
||||
match c {
|
||||
'n' => { bytes.push(b'\n'); state = Literal; }
|
||||
'r' => { bytes.push(b'\r'); state = Literal; }
|
||||
't' => { bytes.push(b'\t'); state = Literal; }
|
||||
'x' => { state = HexFirst; }
|
||||
c => {
|
||||
bytes.extend(&format!(r"\{}", c).into_bytes());
|
||||
state = Literal;
|
||||
}
|
||||
}
|
||||
}
|
||||
HexFirst => {
|
||||
match c {
|
||||
'0'...'9' | 'A'...'F' | 'a'...'f' => {
|
||||
state = HexSecond(c);
|
||||
}
|
||||
c => {
|
||||
bytes.extend(&format!(r"\x{}", c).into_bytes());
|
||||
state = Literal;
|
||||
}
|
||||
}
|
||||
}
|
||||
HexSecond(first) => {
|
||||
match c {
|
||||
'0'...'9' | 'A'...'F' | 'a'...'f' => {
|
||||
let ordinal = format!("{}{}", first, c);
|
||||
let byte = u8::from_str_radix(&ordinal, 16).unwrap();
|
||||
bytes.push(byte);
|
||||
state = Literal;
|
||||
}
|
||||
c => {
|
||||
let original = format!(r"\x{}{}", first, c);
|
||||
bytes.extend(&original.into_bytes());
|
||||
state = Literal;
|
||||
}
|
||||
}
|
||||
}
|
||||
Literal => {
|
||||
match c {
|
||||
'\\' => { state = Escape; }
|
||||
c => { bytes.extend(c.to_string().as_bytes()); }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
match state {
|
||||
Escape => bytes.push(b'\\'),
|
||||
HexFirst => bytes.extend(b"\\x"),
|
||||
HexSecond(c) => bytes.extend(&format!("\\x{}", c).into_bytes()),
|
||||
Literal => {}
|
||||
}
|
||||
bytes
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::unescape;
|
||||
|
||||
fn b(bytes: &'static [u8]) -> Vec<u8> {
|
||||
bytes.to_vec()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_nul() {
|
||||
assert_eq!(b(b"\x00"), unescape(r"\x00"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_nl() {
|
||||
assert_eq!(b(b"\n"), unescape(r"\n"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_tab() {
|
||||
assert_eq!(b(b"\t"), unescape(r"\t"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_carriage() {
|
||||
assert_eq!(b(b"\r"), unescape(r"\r"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_nothing_simple() {
|
||||
assert_eq!(b(b"\\a"), unescape(r"\a"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_nothing_hex0() {
|
||||
assert_eq!(b(b"\\x"), unescape(r"\x"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_nothing_hex1() {
|
||||
assert_eq!(b(b"\\xz"), unescape(r"\xz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unescape_nothing_hex2() {
|
||||
assert_eq!(b(b"\\xzz"), unescape(r"\xzz"));
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user