mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-05-19 01:30:21 -07:00
Implementing core functionality.
Initially experimenting with crossbeam to manage synchronization.
This commit is contained in:
parent
065c449980
commit
1c8379f55a
@ -19,8 +19,11 @@ path = "src/main.rs"
|
|||||||
name = "xrep"
|
name = "xrep"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
crossbeam = "0.2"
|
||||||
docopt = "0.6"
|
docopt = "0.6"
|
||||||
|
env_logger = "0.3"
|
||||||
grep = { version = "0.1", path = "grep" }
|
grep = { version = "0.1", path = "grep" }
|
||||||
|
log = "0.3"
|
||||||
memchr = "0.1"
|
memchr = "0.1"
|
||||||
memmap = "0.2"
|
memmap = "0.2"
|
||||||
num_cpus = "1"
|
num_cpus = "1"
|
||||||
|
@ -12,7 +12,7 @@ use std::error;
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::result;
|
use std::result;
|
||||||
|
|
||||||
pub use search::{Grep, GrepBuilder};
|
pub use search::{Grep, GrepBuilder, Iter, Match};
|
||||||
|
|
||||||
mod literals;
|
mod literals;
|
||||||
mod nonl;
|
mod nonl;
|
||||||
|
@ -136,7 +136,8 @@ impl Gitignore {
|
|||||||
pub fn matched_utf8(&self, path: &str, is_dir: bool) -> Match {
|
pub fn matched_utf8(&self, path: &str, is_dir: bool) -> Match {
|
||||||
// A single regex with a bunch of alternations of glob patterns is
|
// A single regex with a bunch of alternations of glob patterns is
|
||||||
// unfortunately typically faster than a regex, so we use it as a
|
// unfortunately typically faster than a regex, so we use it as a
|
||||||
// first pass filter.
|
// first pass filter. We still need to run the RegexSet to most
|
||||||
|
// recently defined glob that matched.
|
||||||
if !self.set.is_match(path) {
|
if !self.set.is_match(path) {
|
||||||
return Match::None;
|
return Match::None;
|
||||||
}
|
}
|
||||||
@ -145,9 +146,9 @@ impl Gitignore {
|
|||||||
Some(i) => &self.patterns[i],
|
Some(i) => &self.patterns[i],
|
||||||
};
|
};
|
||||||
if pat.whitelist {
|
if pat.whitelist {
|
||||||
Match::Whitelist
|
Match::Whitelist(&pat)
|
||||||
} else if !pat.only_dir || is_dir {
|
} else if !pat.only_dir || is_dir {
|
||||||
Match::Ignored
|
Match::Ignored(&pat)
|
||||||
} else {
|
} else {
|
||||||
Match::None
|
Match::None
|
||||||
}
|
}
|
||||||
@ -155,22 +156,25 @@ impl Gitignore {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// The result of a glob match.
|
/// The result of a glob match.
|
||||||
|
///
|
||||||
|
/// The lifetime `'a` refers to the lifetime of the pattern that resulted in
|
||||||
|
/// a match (whether ignored or whitelisted).
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub enum Match {
|
pub enum Match<'a> {
|
||||||
/// The path didn't match any glob in the gitignore file.
|
/// The path didn't match any glob in the gitignore file.
|
||||||
None,
|
None,
|
||||||
/// The last glob matched indicates the path should be ignored.
|
/// The last glob matched indicates the path should be ignored.
|
||||||
Ignored,
|
Ignored(&'a Pattern),
|
||||||
/// The last glob matched indicates the path should be whitelisted.
|
/// The last glob matched indicates the path should be whitelisted.
|
||||||
Whitelist,
|
Whitelist(&'a Pattern),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Match {
|
impl<'a> Match<'a> {
|
||||||
/// Returns true if the match result implies the path should be ignored.
|
/// Returns true if the match result implies the path should be ignored.
|
||||||
pub fn is_ignored(&self) -> bool {
|
pub fn is_ignored(&self) -> bool {
|
||||||
match *self {
|
match *self {
|
||||||
Match::Ignored => true,
|
Match::Ignored(_) => true,
|
||||||
Match::None | Match::Whitelist => false,
|
Match::None | Match::Whitelist(_) => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -186,11 +190,18 @@ pub struct GitignoreBuilder {
|
|||||||
/// Pattern represents a single pattern in a gitignore file. It doesn't
|
/// Pattern represents a single pattern in a gitignore file. It doesn't
|
||||||
/// know how to do glob matching directly, but it does store additional
|
/// know how to do glob matching directly, but it does store additional
|
||||||
/// options on a pattern, such as whether it's whitelisted.
|
/// options on a pattern, such as whether it's whitelisted.
|
||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug)]
|
||||||
struct Pattern {
|
pub struct Pattern {
|
||||||
pat: String,
|
/// The file path that this pattern was extracted from (may be empty).
|
||||||
whitelist: bool, // prefix of '!'
|
pub from: PathBuf,
|
||||||
only_dir: bool, // suffix of '/'
|
/// The original glob pattern string.
|
||||||
|
pub original: String,
|
||||||
|
/// The actual glob pattern string used to convert to a regex.
|
||||||
|
pub pat: String,
|
||||||
|
/// Whether this is a whitelisted pattern or not.
|
||||||
|
pub whitelist: bool,
|
||||||
|
/// Whether this pattern should only match directories or not.
|
||||||
|
pub only_dir: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GitignoreBuilder {
|
impl GitignoreBuilder {
|
||||||
@ -222,7 +233,7 @@ impl GitignoreBuilder {
|
|||||||
let rdr = io::BufReader::new(try!(File::open(&path)));
|
let rdr = io::BufReader::new(try!(File::open(&path)));
|
||||||
// println!("adding ignores from: {}", path.as_ref().display());
|
// println!("adding ignores from: {}", path.as_ref().display());
|
||||||
for line in rdr.lines() {
|
for line in rdr.lines() {
|
||||||
try!(self.add(&try!(line)));
|
try!(self.add(&path, &try!(line)));
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -230,7 +241,7 @@ impl GitignoreBuilder {
|
|||||||
/// Add each pattern line from the string given.
|
/// Add each pattern line from the string given.
|
||||||
pub fn add_str(&mut self, gitignore: &str) -> Result<(), Error> {
|
pub fn add_str(&mut self, gitignore: &str) -> Result<(), Error> {
|
||||||
for line in gitignore.lines() {
|
for line in gitignore.lines() {
|
||||||
try!(self.add(line));
|
try!(self.add("", line));
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -238,11 +249,21 @@ impl GitignoreBuilder {
|
|||||||
/// Add a line from a gitignore file to this builder.
|
/// Add a line from a gitignore file to this builder.
|
||||||
///
|
///
|
||||||
/// If the line could not be parsed as a glob, then an error is returned.
|
/// If the line could not be parsed as a glob, then an error is returned.
|
||||||
pub fn add(&mut self, mut line: &str) -> Result<(), Error> {
|
pub fn add<P: AsRef<Path>>(
|
||||||
|
&mut self,
|
||||||
|
from: P,
|
||||||
|
mut line: &str,
|
||||||
|
) -> Result<(), Error> {
|
||||||
if line.is_empty() {
|
if line.is_empty() {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
let mut pat = Pattern::default();
|
let mut pat = Pattern {
|
||||||
|
from: from.as_ref().to_path_buf(),
|
||||||
|
original: line.to_string(),
|
||||||
|
pat: String::new(),
|
||||||
|
whitelist: false,
|
||||||
|
only_dir: false,
|
||||||
|
};
|
||||||
let mut opts = glob::MatchOptions::default();
|
let mut opts = glob::MatchOptions::default();
|
||||||
let has_slash = line.chars().any(|c| c == '/');
|
let has_slash = line.chars().any(|c| c == '/');
|
||||||
// If the line starts with an escaped '!', then remove the escape.
|
// If the line starts with an escaped '!', then remove the escape.
|
||||||
@ -352,6 +373,7 @@ mod tests {
|
|||||||
ignored!(ig22, ROOT, r"\#foo", "#foo");
|
ignored!(ig22, ROOT, r"\#foo", "#foo");
|
||||||
ignored!(ig23, ROOT, "foo", "./foo");
|
ignored!(ig23, ROOT, "foo", "./foo");
|
||||||
ignored!(ig24, ROOT, "target", "grep/target");
|
ignored!(ig24, ROOT, "target", "grep/target");
|
||||||
|
ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
|
||||||
|
|
||||||
not_ignored!(ignot1, ROOT, "amonths", "months");
|
not_ignored!(ignot1, ROOT, "amonths", "months");
|
||||||
not_ignored!(ignot2, ROOT, "monthsa", "months");
|
not_ignored!(ignot2, ROOT, "monthsa", "months");
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*!
|
/*!
|
||||||
The glob submodule provides standard shell globbing, but is specifically
|
The glob module provides standard shell globbing, but is specifically
|
||||||
implemented by converting glob syntax to regular expressions. The reasoning
|
implemented by converting glob syntax to regular expressions. The reasoning is
|
||||||
is two fold:
|
two fold:
|
||||||
|
|
||||||
1. The regex library is *really* fast. Regaining performance in a distinct
|
1. The regex library is *really* fast. Regaining performance in a distinct
|
||||||
implementation of globbing is non-trivial.
|
implementation of globbing is non-trivial.
|
||||||
|
@ -56,20 +56,41 @@ pub struct Ignore {
|
|||||||
/// A stack of ignore patterns at each directory level of traversal.
|
/// A stack of ignore patterns at each directory level of traversal.
|
||||||
/// A directory that contributes no ignore patterns is `None`.
|
/// A directory that contributes no ignore patterns is `None`.
|
||||||
stack: Vec<Option<IgnoreDir>>,
|
stack: Vec<Option<IgnoreDir>>,
|
||||||
// TODO(burntsushi): Add other patterns from the command line here.
|
ignore_hidden: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Ignore {
|
impl Ignore {
|
||||||
/// Create an empty set of ignore patterns.
|
/// Create an empty set of ignore patterns.
|
||||||
pub fn new() -> Ignore {
|
pub fn new() -> Ignore {
|
||||||
Ignore { stack: vec![] }
|
Ignore {
|
||||||
|
stack: vec![],
|
||||||
|
ignore_hidden: true,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set whether hidden files/folders should be ignored (defaults to true).
|
||||||
|
pub fn ignore_hidden(&mut self, yes: bool) -> &mut Ignore {
|
||||||
|
self.ignore_hidden = yes;
|
||||||
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add a directory to the stack.
|
/// Add a directory to the stack.
|
||||||
|
///
|
||||||
|
/// Note that even if this returns an error, the directory is added to the
|
||||||
|
/// stack (and therefore should be popped).
|
||||||
pub fn push<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
|
pub fn push<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
|
||||||
self.stack.push(try!(IgnoreDir::new(path)));
|
match IgnoreDir::new(path) {
|
||||||
|
Ok(id) => {
|
||||||
|
self.stack.push(id);
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
Err(err) => {
|
||||||
|
// Don't leave the stack in an inconsistent state.
|
||||||
|
self.stack.push(None);
|
||||||
|
Err(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Pop a directory from the stack.
|
/// Pop a directory from the stack.
|
||||||
///
|
///
|
||||||
@ -81,10 +102,19 @@ impl Ignore {
|
|||||||
/// Returns true if and only if the given file path should be ignored.
|
/// Returns true if and only if the given file path should be ignored.
|
||||||
pub fn ignored<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> bool {
|
pub fn ignored<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> bool {
|
||||||
let path = path.as_ref();
|
let path = path.as_ref();
|
||||||
|
if self.ignore_hidden && is_hidden(&path) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
|
for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
|
||||||
match id.matched(path, is_dir) {
|
match id.matched(path, is_dir) {
|
||||||
Match::Whitelist => return false,
|
Match::Whitelist(ref pat) => {
|
||||||
Match::Ignored => return true,
|
debug!("{} whitelisted by {:?}", path.display(), pat);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
Match::Ignored(ref pat) => {
|
||||||
|
debug!("{} ignored by {:?}", path.display(), pat);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
Match::None => {}
|
Match::None => {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -150,6 +180,14 @@ impl IgnoreDir {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||||
|
if let Some(name) = path.as_ref().file_name() {
|
||||||
|
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
353
src/main.rs
353
src/main.rs
@ -1,7 +1,11 @@
|
|||||||
#![allow(dead_code, unused_variables)]
|
#![allow(dead_code, unused_variables)]
|
||||||
|
|
||||||
|
extern crate crossbeam;
|
||||||
extern crate docopt;
|
extern crate docopt;
|
||||||
|
extern crate env_logger;
|
||||||
extern crate grep;
|
extern crate grep;
|
||||||
|
#[macro_use]
|
||||||
|
extern crate log;
|
||||||
extern crate memchr;
|
extern crate memchr;
|
||||||
extern crate memmap;
|
extern crate memmap;
|
||||||
extern crate num_cpus;
|
extern crate num_cpus;
|
||||||
@ -10,27 +14,22 @@ extern crate regex_syntax as syntax;
|
|||||||
extern crate rustc_serialize;
|
extern crate rustc_serialize;
|
||||||
extern crate walkdir;
|
extern crate walkdir;
|
||||||
|
|
||||||
const USAGE: &'static str = "
|
|
||||||
Usage: xrep [options] <pattern> <path> ...
|
|
||||||
|
|
||||||
xrep is like the silver searcher, but faster than it and grep.
|
|
||||||
|
|
||||||
At least one path is required. Searching stdin isn't yet supported.
|
|
||||||
|
|
||||||
Options:
|
|
||||||
-c, --count Suppress normal output and show count of line matches.
|
|
||||||
";
|
|
||||||
|
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
use std::io::{self, Write};
|
use std::io::{self, Write};
|
||||||
|
use std::path::PathBuf;
|
||||||
use std::process;
|
use std::process;
|
||||||
use std::result;
|
use std::result;
|
||||||
|
use std::sync::Arc;
|
||||||
|
use std::thread;
|
||||||
|
|
||||||
|
use crossbeam::sync::{MsQueue, TreiberStack};
|
||||||
use docopt::Docopt;
|
use docopt::Docopt;
|
||||||
use grep::Grep;
|
use grep::{Grep, GrepBuilder};
|
||||||
use walkdir::{WalkDir, WalkDirIterator};
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
use ignore::Ignore;
|
use ignore::Ignore;
|
||||||
|
use printer::Printer;
|
||||||
|
use search::Searcher;
|
||||||
|
|
||||||
macro_rules! errored {
|
macro_rules! errored {
|
||||||
($($tt:tt)*) => {
|
($($tt:tt)*) => {
|
||||||
@ -48,21 +47,54 @@ macro_rules! eprintln {
|
|||||||
mod gitignore;
|
mod gitignore;
|
||||||
mod glob;
|
mod glob;
|
||||||
mod ignore;
|
mod ignore;
|
||||||
|
mod printer;
|
||||||
|
mod search;
|
||||||
|
mod walk;
|
||||||
|
|
||||||
pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;
|
const USAGE: &'static str = "
|
||||||
|
Usage: xrep [options] <pattern> [<path> ...]
|
||||||
|
|
||||||
|
xrep is like the silver searcher and grep, but faster than both.
|
||||||
|
|
||||||
|
WARNING: Searching stdin isn't yet supported.
|
||||||
|
|
||||||
|
Options:
|
||||||
|
-c, --count Suppress normal output and show count of line matches.
|
||||||
|
--debug Show debug messages.
|
||||||
|
--files Print each file that would be searched
|
||||||
|
(but don't search).
|
||||||
|
-L, --follow Follow symlinks.
|
||||||
|
--hidden Search hidden directories and files.
|
||||||
|
-i, --ignore-case Case insensitive search.
|
||||||
|
--threads ARG The number of threads to use. Defaults to the number
|
||||||
|
of logical CPUs. [default: 0]
|
||||||
|
";
|
||||||
|
|
||||||
#[derive(RustcDecodable)]
|
#[derive(RustcDecodable)]
|
||||||
struct Args {
|
struct Args {
|
||||||
arg_pattern: String,
|
arg_pattern: String,
|
||||||
arg_path: Vec<String>,
|
arg_path: Vec<String>,
|
||||||
flag_count: bool,
|
flag_count: bool,
|
||||||
|
flag_debug: bool,
|
||||||
|
flag_files: bool,
|
||||||
|
flag_follow: bool,
|
||||||
|
flag_hidden: bool,
|
||||||
|
flag_ignore_case: bool,
|
||||||
|
flag_threads: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Args {
|
||||||
|
fn printer<W: io::Write>(&self, wtr: W) -> Printer<W> {
|
||||||
|
Printer::new(wtr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
let args: Args = Docopt::new(USAGE).and_then(|d| d.decode())
|
let args: Args = Docopt::new(USAGE).and_then(|d| d.decode())
|
||||||
.unwrap_or_else(|e| e.exit());
|
.unwrap_or_else(|e| e.exit());
|
||||||
match args.run() {
|
match real_main(args) {
|
||||||
Ok(count) if count == 0 => process::exit(1),
|
|
||||||
Ok(_) => process::exit(0),
|
Ok(_) => process::exit(0),
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
let _ = writeln!(&mut io::stderr(), "{}", err);
|
let _ = writeln!(&mut io::stderr(), "{}", err);
|
||||||
@ -71,118 +103,193 @@ fn main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Args {
|
fn real_main(args: Args) -> Result<()> {
|
||||||
fn run(&self) -> Result<u64> {
|
let mut logb = env_logger::LogBuilder::new();
|
||||||
if self.arg_path.is_empty() {
|
if args.flag_debug {
|
||||||
return errored!("Searching stdin is not currently supported.");
|
logb.filter(None, log::LogLevelFilter::Debug);
|
||||||
}
|
|
||||||
let mut stdout = io::BufWriter::new(io::stdout());
|
|
||||||
let mut ig = Ignore::new();
|
|
||||||
for p in &self.arg_path {
|
|
||||||
let mut it = WalkEventIter::from(WalkDir::new(p));
|
|
||||||
loop {
|
|
||||||
let ev = match it.next() {
|
|
||||||
None => break,
|
|
||||||
Some(Ok(ev)) => ev,
|
|
||||||
Some(Err(err)) => {
|
|
||||||
eprintln!("{}", err);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
match ev {
|
|
||||||
WalkEvent::Exit => {
|
|
||||||
ig.pop();
|
|
||||||
}
|
|
||||||
WalkEvent::Dir(ent) => {
|
|
||||||
try!(ig.push(ent.path()));
|
|
||||||
if is_hidden(&ent) || ig.ignored(ent.path(), true) {
|
|
||||||
// if is_hidden(&ent) {
|
|
||||||
it.it.skip_current_dir();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
WalkEvent::File(ent) => {
|
|
||||||
if is_hidden(&ent) || ig.ignored(ent.path(), false) {
|
|
||||||
// if is_hidden(&ent) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
let _ = writeln!(
|
|
||||||
&mut stdout, "{}", ent.path().display());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn run_mmap_count_only(&self, searcher: &Grep) -> Result<u64> {
|
|
||||||
use memmap::{Mmap, Protection};
|
|
||||||
|
|
||||||
assert!(self.arg_path.len() == 1);
|
|
||||||
let mut wtr = io::BufWriter::new(io::stdout());
|
|
||||||
let mmap = try!(Mmap::open_path(&self.arg_path[0], Protection::Read));
|
|
||||||
let text = unsafe { mmap.as_slice() };
|
|
||||||
let count = searcher.iter(text).count() as u64;
|
|
||||||
try!(writeln!(wtr, "{}", count));
|
|
||||||
Ok(count)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// WalkEventIter transforms a WalkDir iterator into an iterator that more
|
|
||||||
/// accurately describes the directory tree. Namely, it emits events that are
|
|
||||||
/// one of three types: directory, file or "exit." An "exit" event means that
|
|
||||||
/// the entire contents of a directory have been enumerated.
|
|
||||||
struct WalkEventIter {
|
|
||||||
depth: usize,
|
|
||||||
it: walkdir::Iter,
|
|
||||||
next: Option<result::Result<walkdir::DirEntry, walkdir::Error>>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug)]
|
|
||||||
enum WalkEvent {
|
|
||||||
Dir(walkdir::DirEntry),
|
|
||||||
File(walkdir::DirEntry),
|
|
||||||
Exit,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl From<walkdir::WalkDir> for WalkEventIter {
|
|
||||||
fn from(it: walkdir::WalkDir) -> WalkEventIter {
|
|
||||||
WalkEventIter { depth: 0, it: it.into_iter(), next: None }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Iterator for WalkEventIter {
|
|
||||||
type Item = io::Result<WalkEvent>;
|
|
||||||
|
|
||||||
fn next(&mut self) -> Option<io::Result<WalkEvent>> {
|
|
||||||
let dent = self.next.take().or_else(|| self.it.next());
|
|
||||||
let depth = match dent {
|
|
||||||
None => 0,
|
|
||||||
Some(Ok(ref dent)) => dent.depth(),
|
|
||||||
Some(Err(ref err)) => err.depth(),
|
|
||||||
};
|
|
||||||
if depth < self.depth {
|
|
||||||
self.depth -= 1;
|
|
||||||
self.next = dent;
|
|
||||||
return Some(Ok(WalkEvent::Exit));
|
|
||||||
}
|
|
||||||
self.depth = depth;
|
|
||||||
match dent {
|
|
||||||
None => None,
|
|
||||||
Some(Err(err)) => Some(Err(From::from(err))),
|
|
||||||
Some(Ok(dent)) => {
|
|
||||||
if dent.file_type().is_dir() {
|
|
||||||
self.depth += 1;
|
|
||||||
Some(Ok(WalkEvent::Dir(dent)))
|
|
||||||
} else {
|
} else {
|
||||||
Some(Ok(WalkEvent::File(dent)))
|
logb.filter(None, log::LogLevelFilter::Warn);
|
||||||
|
}
|
||||||
|
if let Err(err) = logb.init() {
|
||||||
|
return errored!("failed to initialize logger: {}", err);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut main = Main::new(args);
|
||||||
|
try!(main.run_workers());
|
||||||
|
let writer = main.run_writer();
|
||||||
|
main.scan();
|
||||||
|
main.finish_workers();
|
||||||
|
main.chan_results.push(Message::Quit);
|
||||||
|
writer.join().unwrap();
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
type ChanWork = Arc<MsQueue<Message<Work>>>;
|
||||||
|
|
||||||
|
type ChanResults = Arc<MsQueue<Message<Vec<u8>>>>;
|
||||||
|
|
||||||
|
enum Message<T> {
|
||||||
|
Some(T),
|
||||||
|
Quit,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Main {
|
||||||
|
args: Arc<Args>,
|
||||||
|
chan_work: ChanWork,
|
||||||
|
chan_results: ChanResults,
|
||||||
|
bufs: Arc<Bufs>,
|
||||||
|
workers: Vec<thread::JoinHandle<()>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Main {
|
||||||
|
fn new(mut args: Args) -> Main {
|
||||||
|
if args.arg_path.is_empty() {
|
||||||
|
args.arg_path.push("./".to_string());
|
||||||
|
}
|
||||||
|
Main {
|
||||||
|
args: Arc::new(args),
|
||||||
|
chan_work: Arc::new(MsQueue::new()),
|
||||||
|
chan_results: Arc::new(MsQueue::new()),
|
||||||
|
bufs: Arc::new(Bufs::new()),
|
||||||
|
workers: vec![],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn scan(&mut self) {
|
||||||
|
for p in &self.args.arg_path {
|
||||||
|
if p == "-" {
|
||||||
|
eprintln!("searching <stdin> isn't yet supported");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let wd = WalkDir::new(p).follow_links(self.args.flag_follow);
|
||||||
|
let mut ig = Ignore::new();
|
||||||
|
ig.ignore_hidden(!self.args.flag_hidden);
|
||||||
|
|
||||||
|
for ent in walk::Iter::new(ig, wd) {
|
||||||
|
let mut path = ent.path();
|
||||||
|
if let Ok(p) = path.strip_prefix("./") {
|
||||||
|
path = p;
|
||||||
|
}
|
||||||
|
self.chan_work.push(Message::Some(Work {
|
||||||
|
path: path.to_path_buf(),
|
||||||
|
out: self.bufs.pop(),
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_writer(&self) -> thread::JoinHandle<()> {
|
||||||
|
let wtr = Writer {
|
||||||
|
args: self.args.clone(),
|
||||||
|
chan_results: self.chan_results.clone(),
|
||||||
|
bufs: self.bufs.clone(),
|
||||||
|
};
|
||||||
|
thread::spawn(move || wtr.run())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_workers(&mut self) -> Result<()> {
|
||||||
|
let mut num = self.args.flag_threads;
|
||||||
|
if num == 0 {
|
||||||
|
num = num_cpus::get();
|
||||||
|
}
|
||||||
|
if num < 4 {
|
||||||
|
num = 1;
|
||||||
|
} else {
|
||||||
|
num -= 2;
|
||||||
|
}
|
||||||
|
println!("running {} workers", num);
|
||||||
|
for _ in 0..num {
|
||||||
|
try!(self.run_worker());
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn run_worker(&mut self) -> Result<()> {
|
||||||
|
let grepb =
|
||||||
|
GrepBuilder::new(&self.args.arg_pattern)
|
||||||
|
.case_insensitive(self.args.flag_ignore_case);
|
||||||
|
let worker = Worker {
|
||||||
|
args: self.args.clone(),
|
||||||
|
chan_work: self.chan_work.clone(),
|
||||||
|
chan_results: self.chan_results.clone(),
|
||||||
|
grep: try!(grepb.build()),
|
||||||
|
};
|
||||||
|
self.workers.push(thread::spawn(move || worker.run()));
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn finish_workers(&mut self) {
|
||||||
|
// We can stop all of the works by sending a quit message.
|
||||||
|
// Each worker is guaranteed to receive the quit message exactly
|
||||||
|
// once, so we only need to send `self.workers.len()` of them
|
||||||
|
for _ in 0..self.workers.len() {
|
||||||
|
self.chan_work.push(Message::Quit);
|
||||||
|
}
|
||||||
|
// Now wait for each to finish.
|
||||||
|
while let Some(thread) = self.workers.pop() {
|
||||||
|
thread.join().unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_hidden(ent: &walkdir::DirEntry) -> bool {
|
struct Writer {
|
||||||
ent.depth() > 0 &&
|
args: Arc<Args>,
|
||||||
ent.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
chan_results: ChanResults,
|
||||||
|
bufs: Arc<Bufs>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Writer {
|
||||||
|
fn run(self) {
|
||||||
|
let mut stdout = io::BufWriter::new(io::stdout());
|
||||||
|
while let Message::Some(res) = self.chan_results.pop() {
|
||||||
|
let _ = stdout.write_all(&res);
|
||||||
|
self.bufs.push(res);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Work {
|
||||||
|
path: PathBuf,
|
||||||
|
out: Vec<u8>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Worker {
|
||||||
|
args: Arc<Args>,
|
||||||
|
chan_work: ChanWork,
|
||||||
|
chan_results: ChanResults,
|
||||||
|
grep: Grep,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Worker {
|
||||||
|
fn run(self) {
|
||||||
|
while let Message::Some(mut work) = self.chan_work.pop() {
|
||||||
|
work.out.clear();
|
||||||
|
let printer = self.args.printer(work.out);
|
||||||
|
let searcher = Searcher::new(&self.grep, work.path).unwrap();
|
||||||
|
let buf = searcher.search(printer);
|
||||||
|
self.chan_results.push(Message::Some(buf));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A pool of buffers used by each worker thread to write matches.
|
||||||
|
struct Bufs {
|
||||||
|
bufs: TreiberStack<Vec<u8>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Bufs {
|
||||||
|
pub fn new() -> Bufs {
|
||||||
|
Bufs { bufs: TreiberStack::new() }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn pop(&self) -> Vec<u8> {
|
||||||
|
match self.bufs.pop() {
|
||||||
|
None => vec![],
|
||||||
|
Some(buf) => buf,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn push(&self, buf: Vec<u8>) {
|
||||||
|
self.bufs.push(buf);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
50
src/printer.rs
Normal file
50
src/printer.rs
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
use std::io;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
|
use grep::Match;
|
||||||
|
|
||||||
|
macro_rules! wln {
|
||||||
|
($($tt:tt)*) => {
|
||||||
|
let _ = writeln!($($tt)*);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Printer<W> {
|
||||||
|
wtr: W,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<W: io::Write> Printer<W> {
|
||||||
|
pub fn new(wtr: W) -> Printer<W> {
|
||||||
|
Printer {
|
||||||
|
wtr: wtr,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_inner(self) -> W {
|
||||||
|
self.wtr
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn path<P: AsRef<Path>>(&mut self, path: P) {
|
||||||
|
wln!(&mut self.wtr, "{}", path.as_ref().display());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn count(&mut self, count: u64) {
|
||||||
|
wln!(&mut self.wtr, "{}", count);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn matched<P: AsRef<Path>>(
|
||||||
|
&mut self,
|
||||||
|
path: P,
|
||||||
|
buf: &[u8],
|
||||||
|
m: &Match,
|
||||||
|
) {
|
||||||
|
let _ = self.wtr.write(path.as_ref().to_string_lossy().as_bytes());
|
||||||
|
let _ = self.wtr.write(b":");
|
||||||
|
let _ = self.wtr.write(&buf[m.start()..m.end()]);
|
||||||
|
let _ = self.wtr.write(b"\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn binary_matched<P: AsRef<Path>>(&mut self, path: P) {
|
||||||
|
wln!(&mut self.wtr, "binary file {} matches", path.as_ref().display());
|
||||||
|
}
|
||||||
|
}
|
144
src/search.rs
Normal file
144
src/search.rs
Normal file
@ -0,0 +1,144 @@
|
|||||||
|
/*!
|
||||||
|
The search module is responsible for searching a single file and printing
|
||||||
|
matches.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use std::cmp;
|
||||||
|
use std::error::Error as StdError;
|
||||||
|
use std::fmt;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use grep::Grep;
|
||||||
|
use memchr::memchr;
|
||||||
|
use memmap::{Mmap, Protection};
|
||||||
|
|
||||||
|
use printer::Printer;
|
||||||
|
|
||||||
|
/// Error describes errors that can occur while searching.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum Error {
|
||||||
|
/// Normal IO or Mmap errors suck. Include the path the originated them.
|
||||||
|
Io {
|
||||||
|
err: io::Error,
|
||||||
|
path: PathBuf,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error {
|
||||||
|
fn from_io<P: AsRef<Path>>(err: io::Error, path: P) -> Error {
|
||||||
|
Error::Io { err: err, path: path.as_ref().to_path_buf() }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StdError for Error {
|
||||||
|
fn description(&self) -> &str {
|
||||||
|
match *self {
|
||||||
|
Error::Io { ref err, .. } => err.description(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn cause(&self) -> Option<&StdError> {
|
||||||
|
match *self {
|
||||||
|
Error::Io { ref err, .. } => Some(err),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Error {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match *self {
|
||||||
|
Error::Io { ref err, ref path } => {
|
||||||
|
write!(f, "{}: {}", path.display(), err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Searcher searches a memory mapped buffer.
|
||||||
|
///
|
||||||
|
/// The `'g` lifetime refers to the lifetime of the underlying matcher.
|
||||||
|
pub struct Searcher<'g> {
|
||||||
|
grep: &'g Grep,
|
||||||
|
path: PathBuf,
|
||||||
|
mmap: Option<Mmap>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'g> Searcher<'g> {
|
||||||
|
/// Create a new memory map based searcher using the given matcher for the
|
||||||
|
/// file path given.
|
||||||
|
pub fn new<P: AsRef<Path>>(
|
||||||
|
grep: &'g Grep,
|
||||||
|
path: P,
|
||||||
|
) -> Result<Searcher<'g>, Error> {
|
||||||
|
let file = try!(File::open(&path).map_err(|err| {
|
||||||
|
Error::from_io(err, &path)
|
||||||
|
}));
|
||||||
|
let md = try!(file.metadata().map_err(|err| {
|
||||||
|
Error::from_io(err, &path)
|
||||||
|
}));
|
||||||
|
let mmap =
|
||||||
|
if md.len() == 0 {
|
||||||
|
None
|
||||||
|
} else {
|
||||||
|
Some(try!(Mmap::open(&file, Protection::Read).map_err(|err| {
|
||||||
|
Error::from_io(err, &path)
|
||||||
|
})))
|
||||||
|
};
|
||||||
|
Ok(Searcher {
|
||||||
|
grep: grep,
|
||||||
|
path: path.as_ref().to_path_buf(),
|
||||||
|
mmap: mmap,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Execute the search, writing the results to the printer given and
|
||||||
|
/// returning the underlying buffer.
|
||||||
|
pub fn search<W: io::Write>(&self, printer: Printer<W>) -> W {
|
||||||
|
Search {
|
||||||
|
grep: &self.grep,
|
||||||
|
path: &*self.path,
|
||||||
|
buf: self.buf(),
|
||||||
|
printer: printer,
|
||||||
|
}.run()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Execute the search, returning a count of the number of hits.
|
||||||
|
pub fn count(&self) -> u64 {
|
||||||
|
self.grep.iter(self.buf()).count() as u64
|
||||||
|
}
|
||||||
|
|
||||||
|
fn buf(&self) -> &[u8] {
|
||||||
|
self.mmap.as_ref().map(|m| unsafe { m.as_slice() }).unwrap_or(&[])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Search<'a, W> {
|
||||||
|
grep: &'a Grep,
|
||||||
|
path: &'a Path,
|
||||||
|
buf: &'a [u8],
|
||||||
|
printer: Printer<W>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, W: io::Write> Search<'a, W> {
|
||||||
|
fn run(mut self) -> W {
|
||||||
|
let is_binary = self.is_binary();
|
||||||
|
let mut it = self.grep.iter(self.buf).peekable();
|
||||||
|
if is_binary && it.peek().is_some() {
|
||||||
|
self.printer.binary_matched(self.path);
|
||||||
|
return self.printer.into_inner();
|
||||||
|
}
|
||||||
|
for m in it {
|
||||||
|
self.printer.matched(self.path, self.buf, &m);
|
||||||
|
}
|
||||||
|
self.printer.into_inner()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_binary(&self) -> bool {
|
||||||
|
if self.buf.len() >= 4 && &self.buf[0..4] == b"%PDF" {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
memchr(b'\x00', &self.buf[0..cmp::min(1024, self.buf.len())]).is_some()
|
||||||
|
}
|
||||||
|
}
|
142
src/walk.rs
Normal file
142
src/walk.rs
Normal file
@ -0,0 +1,142 @@
|
|||||||
|
/*!
|
||||||
|
The walk module implements a recursive directory iterator (using the `walkdir`)
|
||||||
|
crate that can efficiently skip and ignore files and directories specified in
|
||||||
|
a user's ignore patterns.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use walkdir::{self, DirEntry, WalkDir, WalkDirIterator};
|
||||||
|
|
||||||
|
use ignore::Ignore;
|
||||||
|
|
||||||
|
/// Iter is a recursive directory iterator over file paths in a directory.
|
||||||
|
/// Only file paths should be searched are yielded.
|
||||||
|
pub struct Iter {
|
||||||
|
ig: Ignore,
|
||||||
|
it: WalkEventIter,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iter {
|
||||||
|
/// Create a new recursive directory iterator using the ignore patterns
|
||||||
|
/// and walkdir iterator given.
|
||||||
|
pub fn new(ig: Ignore, wd: WalkDir) -> Iter {
|
||||||
|
Iter {
|
||||||
|
ig: ig,
|
||||||
|
it: WalkEventIter::from(wd),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if this entry should be skipped.
|
||||||
|
fn skip_entry(&self, ent: &DirEntry) -> bool {
|
||||||
|
if ent.depth() == 0 {
|
||||||
|
// Never skip the root directory.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if self.ig.ignored(ent.path(), ent.file_type().is_dir()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for Iter {
|
||||||
|
type Item = DirEntry;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<DirEntry> {
|
||||||
|
while let Some(ev) = self.it.next() {
|
||||||
|
match ev {
|
||||||
|
Err(err) => {
|
||||||
|
eprintln!("{}", err);
|
||||||
|
}
|
||||||
|
Ok(WalkEvent::Exit) => {
|
||||||
|
self.ig.pop();
|
||||||
|
}
|
||||||
|
Ok(WalkEvent::Dir(ent)) => {
|
||||||
|
if self.skip_entry(&ent) {
|
||||||
|
self.it.it.skip_current_dir();
|
||||||
|
// Still need to push this on the stack because we'll
|
||||||
|
// get a WalkEvent::Exit event for this dir. We don't
|
||||||
|
// care if it errors though.
|
||||||
|
let _ = self.ig.push(ent.path());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if let Err(err) = self.ig.push(ent.path()) {
|
||||||
|
eprintln!("{}", err);
|
||||||
|
self.it.it.skip_current_dir();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(WalkEvent::File(ent)) => {
|
||||||
|
if self.skip_entry(&ent) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// If this isn't actually a file (e.g., a symlink), then
|
||||||
|
// skip it.
|
||||||
|
if !ent.file_type().is_file() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
return Some(ent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// WalkEventIter transforms a WalkDir iterator into an iterator that more
|
||||||
|
/// accurately describes the directory tree. Namely, it emits events that are
|
||||||
|
/// one of three types: directory, file or "exit." An "exit" event means that
|
||||||
|
/// the entire contents of a directory have been enumerated.
|
||||||
|
struct WalkEventIter {
|
||||||
|
depth: usize,
|
||||||
|
it: walkdir::Iter,
|
||||||
|
next: Option<Result<DirEntry, walkdir::Error>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum WalkEvent {
|
||||||
|
Dir(DirEntry),
|
||||||
|
File(DirEntry),
|
||||||
|
Exit,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<WalkDir> for WalkEventIter {
|
||||||
|
fn from(it: WalkDir) -> WalkEventIter {
|
||||||
|
WalkEventIter { depth: 0, it: it.into_iter(), next: None }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for WalkEventIter {
|
||||||
|
type Item = walkdir::Result<WalkEvent>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
|
||||||
|
let dent = self.next.take().or_else(|| self.it.next());
|
||||||
|
let depth = match dent {
|
||||||
|
None => 0,
|
||||||
|
Some(Ok(ref dent)) => dent.depth(),
|
||||||
|
Some(Err(ref err)) => err.depth(),
|
||||||
|
};
|
||||||
|
if depth < self.depth {
|
||||||
|
self.depth -= 1;
|
||||||
|
self.next = dent;
|
||||||
|
return Some(Ok(WalkEvent::Exit));
|
||||||
|
}
|
||||||
|
self.depth = depth;
|
||||||
|
match dent {
|
||||||
|
None => None,
|
||||||
|
Some(Err(err)) => Some(Err(err)),
|
||||||
|
Some(Ok(dent)) => {
|
||||||
|
if dent.file_type().is_dir() {
|
||||||
|
self.depth += 1;
|
||||||
|
Some(Ok(WalkEvent::Dir(dent)))
|
||||||
|
} else {
|
||||||
|
Some(Ok(WalkEvent::File(dent)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_hidden(ent: &DirEntry) -> bool {
|
||||||
|
ent.depth() > 0 &&
|
||||||
|
ent.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user