Implementing core functionality.

Initially experimenting with crossbeam to manage synchronization.
This commit is contained in:
Andrew Gallant 2016-08-28 01:37:12 -04:00
parent 065c449980
commit 1c8379f55a
9 changed files with 652 additions and 146 deletions

View File

@ -19,8 +19,11 @@ path = "src/main.rs"
name = "xrep" name = "xrep"
[dependencies] [dependencies]
crossbeam = "0.2"
docopt = "0.6" docopt = "0.6"
env_logger = "0.3"
grep = { version = "0.1", path = "grep" } grep = { version = "0.1", path = "grep" }
log = "0.3"
memchr = "0.1" memchr = "0.1"
memmap = "0.2" memmap = "0.2"
num_cpus = "1" num_cpus = "1"

View File

@ -12,7 +12,7 @@ use std::error;
use std::fmt; use std::fmt;
use std::result; use std::result;
pub use search::{Grep, GrepBuilder}; pub use search::{Grep, GrepBuilder, Iter, Match};
mod literals; mod literals;
mod nonl; mod nonl;

View File

@ -136,7 +136,8 @@ impl Gitignore {
pub fn matched_utf8(&self, path: &str, is_dir: bool) -> Match { pub fn matched_utf8(&self, path: &str, is_dir: bool) -> Match {
// A single regex with a bunch of alternations of glob patterns is // A single regex with a bunch of alternations of glob patterns is
// unfortunately typically faster than a regex, so we use it as a // unfortunately typically faster than a regex, so we use it as a
// first pass filter. // first pass filter. We still need to run the RegexSet to most
// recently defined glob that matched.
if !self.set.is_match(path) { if !self.set.is_match(path) {
return Match::None; return Match::None;
} }
@ -145,9 +146,9 @@ impl Gitignore {
Some(i) => &self.patterns[i], Some(i) => &self.patterns[i],
}; };
if pat.whitelist { if pat.whitelist {
Match::Whitelist Match::Whitelist(&pat)
} else if !pat.only_dir || is_dir { } else if !pat.only_dir || is_dir {
Match::Ignored Match::Ignored(&pat)
} else { } else {
Match::None Match::None
} }
@ -155,22 +156,25 @@ impl Gitignore {
} }
/// The result of a glob match. /// The result of a glob match.
///
/// The lifetime `'a` refers to the lifetime of the pattern that resulted in
/// a match (whether ignored or whitelisted).
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub enum Match { pub enum Match<'a> {
/// The path didn't match any glob in the gitignore file. /// The path didn't match any glob in the gitignore file.
None, None,
/// The last glob matched indicates the path should be ignored. /// The last glob matched indicates the path should be ignored.
Ignored, Ignored(&'a Pattern),
/// The last glob matched indicates the path should be whitelisted. /// The last glob matched indicates the path should be whitelisted.
Whitelist, Whitelist(&'a Pattern),
} }
impl Match { impl<'a> Match<'a> {
/// Returns true if the match result implies the path should be ignored. /// Returns true if the match result implies the path should be ignored.
pub fn is_ignored(&self) -> bool { pub fn is_ignored(&self) -> bool {
match *self { match *self {
Match::Ignored => true, Match::Ignored(_) => true,
Match::None | Match::Whitelist => false, Match::None | Match::Whitelist(_) => false,
} }
} }
} }
@ -186,11 +190,18 @@ pub struct GitignoreBuilder {
/// Pattern represents a single pattern in a gitignore file. It doesn't /// Pattern represents a single pattern in a gitignore file. It doesn't
/// know how to do glob matching directly, but it does store additional /// know how to do glob matching directly, but it does store additional
/// options on a pattern, such as whether it's whitelisted. /// options on a pattern, such as whether it's whitelisted.
#[derive(Clone, Debug, Default)] #[derive(Clone, Debug)]
struct Pattern { pub struct Pattern {
pat: String, /// The file path that this pattern was extracted from (may be empty).
whitelist: bool, // prefix of '!' pub from: PathBuf,
only_dir: bool, // suffix of '/' /// The original glob pattern string.
pub original: String,
/// The actual glob pattern string used to convert to a regex.
pub pat: String,
/// Whether this is a whitelisted pattern or not.
pub whitelist: bool,
/// Whether this pattern should only match directories or not.
pub only_dir: bool,
} }
impl GitignoreBuilder { impl GitignoreBuilder {
@ -222,7 +233,7 @@ impl GitignoreBuilder {
let rdr = io::BufReader::new(try!(File::open(&path))); let rdr = io::BufReader::new(try!(File::open(&path)));
// println!("adding ignores from: {}", path.as_ref().display()); // println!("adding ignores from: {}", path.as_ref().display());
for line in rdr.lines() { for line in rdr.lines() {
try!(self.add(&try!(line))); try!(self.add(&path, &try!(line)));
} }
Ok(()) Ok(())
} }
@ -230,7 +241,7 @@ impl GitignoreBuilder {
/// Add each pattern line from the string given. /// Add each pattern line from the string given.
pub fn add_str(&mut self, gitignore: &str) -> Result<(), Error> { pub fn add_str(&mut self, gitignore: &str) -> Result<(), Error> {
for line in gitignore.lines() { for line in gitignore.lines() {
try!(self.add(line)); try!(self.add("", line));
} }
Ok(()) Ok(())
} }
@ -238,11 +249,21 @@ impl GitignoreBuilder {
/// Add a line from a gitignore file to this builder. /// Add a line from a gitignore file to this builder.
/// ///
/// If the line could not be parsed as a glob, then an error is returned. /// If the line could not be parsed as a glob, then an error is returned.
pub fn add(&mut self, mut line: &str) -> Result<(), Error> { pub fn add<P: AsRef<Path>>(
&mut self,
from: P,
mut line: &str,
) -> Result<(), Error> {
if line.is_empty() { if line.is_empty() {
return Ok(()); return Ok(());
} }
let mut pat = Pattern::default(); let mut pat = Pattern {
from: from.as_ref().to_path_buf(),
original: line.to_string(),
pat: String::new(),
whitelist: false,
only_dir: false,
};
let mut opts = glob::MatchOptions::default(); let mut opts = glob::MatchOptions::default();
let has_slash = line.chars().any(|c| c == '/'); let has_slash = line.chars().any(|c| c == '/');
// If the line starts with an escaped '!', then remove the escape. // If the line starts with an escaped '!', then remove the escape.
@ -352,6 +373,7 @@ mod tests {
ignored!(ig22, ROOT, r"\#foo", "#foo"); ignored!(ig22, ROOT, r"\#foo", "#foo");
ignored!(ig23, ROOT, "foo", "./foo"); ignored!(ig23, ROOT, "foo", "./foo");
ignored!(ig24, ROOT, "target", "grep/target"); ignored!(ig24, ROOT, "target", "grep/target");
ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
not_ignored!(ignot1, ROOT, "amonths", "months"); not_ignored!(ignot1, ROOT, "amonths", "months");
not_ignored!(ignot2, ROOT, "monthsa", "months"); not_ignored!(ignot2, ROOT, "monthsa", "months");

View File

@ -1,7 +1,7 @@
/*! /*!
The glob submodule provides standard shell globbing, but is specifically The glob module provides standard shell globbing, but is specifically
implemented by converting glob syntax to regular expressions. The reasoning implemented by converting glob syntax to regular expressions. The reasoning is
is two fold: two fold:
1. The regex library is *really* fast. Regaining performance in a distinct 1. The regex library is *really* fast. Regaining performance in a distinct
implementation of globbing is non-trivial. implementation of globbing is non-trivial.

View File

@ -56,20 +56,41 @@ pub struct Ignore {
/// A stack of ignore patterns at each directory level of traversal. /// A stack of ignore patterns at each directory level of traversal.
/// A directory that contributes no ignore patterns is `None`. /// A directory that contributes no ignore patterns is `None`.
stack: Vec<Option<IgnoreDir>>, stack: Vec<Option<IgnoreDir>>,
// TODO(burntsushi): Add other patterns from the command line here. ignore_hidden: bool,
} }
impl Ignore { impl Ignore {
/// Create an empty set of ignore patterns. /// Create an empty set of ignore patterns.
pub fn new() -> Ignore { pub fn new() -> Ignore {
Ignore { stack: vec![] } Ignore {
stack: vec![],
ignore_hidden: true,
}
}
/// Set whether hidden files/folders should be ignored (defaults to true).
pub fn ignore_hidden(&mut self, yes: bool) -> &mut Ignore {
self.ignore_hidden = yes;
self
} }
/// Add a directory to the stack. /// Add a directory to the stack.
///
/// Note that even if this returns an error, the directory is added to the
/// stack (and therefore should be popped).
pub fn push<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> { pub fn push<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
self.stack.push(try!(IgnoreDir::new(path))); match IgnoreDir::new(path) {
Ok(id) => {
self.stack.push(id);
Ok(()) Ok(())
} }
Err(err) => {
// Don't leave the stack in an inconsistent state.
self.stack.push(None);
Err(err)
}
}
}
/// Pop a directory from the stack. /// Pop a directory from the stack.
/// ///
@ -81,10 +102,19 @@ impl Ignore {
/// Returns true if and only if the given file path should be ignored. /// Returns true if and only if the given file path should be ignored.
pub fn ignored<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> bool { pub fn ignored<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> bool {
let path = path.as_ref(); let path = path.as_ref();
if self.ignore_hidden && is_hidden(&path) {
return true;
}
for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) { for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
match id.matched(path, is_dir) { match id.matched(path, is_dir) {
Match::Whitelist => return false, Match::Whitelist(ref pat) => {
Match::Ignored => return true, debug!("{} whitelisted by {:?}", path.display(), pat);
return false;
}
Match::Ignored(ref pat) => {
debug!("{} ignored by {:?}", path.display(), pat);
return true;
}
Match::None => {} Match::None => {}
} }
} }
@ -150,6 +180,14 @@ impl IgnoreDir {
} }
} }
fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
if let Some(name) = path.as_ref().file_name() {
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
} else {
false
}
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::path::Path; use std::path::Path;

View File

@ -1,7 +1,11 @@
#![allow(dead_code, unused_variables)] #![allow(dead_code, unused_variables)]
extern crate crossbeam;
extern crate docopt; extern crate docopt;
extern crate env_logger;
extern crate grep; extern crate grep;
#[macro_use]
extern crate log;
extern crate memchr; extern crate memchr;
extern crate memmap; extern crate memmap;
extern crate num_cpus; extern crate num_cpus;
@ -10,27 +14,22 @@ extern crate regex_syntax as syntax;
extern crate rustc_serialize; extern crate rustc_serialize;
extern crate walkdir; extern crate walkdir;
const USAGE: &'static str = "
Usage: xrep [options] <pattern> <path> ...
xrep is like the silver searcher, but faster than it and grep.
At least one path is required. Searching stdin isn't yet supported.
Options:
-c, --count Suppress normal output and show count of line matches.
";
use std::error::Error; use std::error::Error;
use std::io::{self, Write}; use std::io::{self, Write};
use std::path::PathBuf;
use std::process; use std::process;
use std::result; use std::result;
use std::sync::Arc;
use std::thread;
use crossbeam::sync::{MsQueue, TreiberStack};
use docopt::Docopt; use docopt::Docopt;
use grep::Grep; use grep::{Grep, GrepBuilder};
use walkdir::{WalkDir, WalkDirIterator}; use walkdir::WalkDir;
use ignore::Ignore; use ignore::Ignore;
use printer::Printer;
use search::Searcher;
macro_rules! errored { macro_rules! errored {
($($tt:tt)*) => { ($($tt:tt)*) => {
@ -48,21 +47,54 @@ macro_rules! eprintln {
mod gitignore; mod gitignore;
mod glob; mod glob;
mod ignore; mod ignore;
mod printer;
mod search;
mod walk;
pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>; const USAGE: &'static str = "
Usage: xrep [options] <pattern> [<path> ...]
xrep is like the silver searcher and grep, but faster than both.
WARNING: Searching stdin isn't yet supported.
Options:
-c, --count Suppress normal output and show count of line matches.
--debug Show debug messages.
--files Print each file that would be searched
(but don't search).
-L, --follow Follow symlinks.
--hidden Search hidden directories and files.
-i, --ignore-case Case insensitive search.
--threads ARG The number of threads to use. Defaults to the number
of logical CPUs. [default: 0]
";
#[derive(RustcDecodable)] #[derive(RustcDecodable)]
struct Args { struct Args {
arg_pattern: String, arg_pattern: String,
arg_path: Vec<String>, arg_path: Vec<String>,
flag_count: bool, flag_count: bool,
flag_debug: bool,
flag_files: bool,
flag_follow: bool,
flag_hidden: bool,
flag_ignore_case: bool,
flag_threads: usize,
} }
impl Args {
fn printer<W: io::Write>(&self, wtr: W) -> Printer<W> {
Printer::new(wtr)
}
}
pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;
fn main() { fn main() {
let args: Args = Docopt::new(USAGE).and_then(|d| d.decode()) let args: Args = Docopt::new(USAGE).and_then(|d| d.decode())
.unwrap_or_else(|e| e.exit()); .unwrap_or_else(|e| e.exit());
match args.run() { match real_main(args) {
Ok(count) if count == 0 => process::exit(1),
Ok(_) => process::exit(0), Ok(_) => process::exit(0),
Err(err) => { Err(err) => {
let _ = writeln!(&mut io::stderr(), "{}", err); let _ = writeln!(&mut io::stderr(), "{}", err);
@ -71,118 +103,193 @@ fn main() {
} }
} }
impl Args { fn real_main(args: Args) -> Result<()> {
fn run(&self) -> Result<u64> { let mut logb = env_logger::LogBuilder::new();
if self.arg_path.is_empty() { if args.flag_debug {
return errored!("Searching stdin is not currently supported."); logb.filter(None, log::LogLevelFilter::Debug);
}
let mut stdout = io::BufWriter::new(io::stdout());
let mut ig = Ignore::new();
for p in &self.arg_path {
let mut it = WalkEventIter::from(WalkDir::new(p));
loop {
let ev = match it.next() {
None => break,
Some(Ok(ev)) => ev,
Some(Err(err)) => {
eprintln!("{}", err);
continue;
}
};
match ev {
WalkEvent::Exit => {
ig.pop();
}
WalkEvent::Dir(ent) => {
try!(ig.push(ent.path()));
if is_hidden(&ent) || ig.ignored(ent.path(), true) {
// if is_hidden(&ent) {
it.it.skip_current_dir();
continue;
}
}
WalkEvent::File(ent) => {
if is_hidden(&ent) || ig.ignored(ent.path(), false) {
// if is_hidden(&ent) {
continue;
}
let _ = writeln!(
&mut stdout, "{}", ent.path().display());
}
}
}
}
Ok(0)
}
fn run_mmap_count_only(&self, searcher: &Grep) -> Result<u64> {
use memmap::{Mmap, Protection};
assert!(self.arg_path.len() == 1);
let mut wtr = io::BufWriter::new(io::stdout());
let mmap = try!(Mmap::open_path(&self.arg_path[0], Protection::Read));
let text = unsafe { mmap.as_slice() };
let count = searcher.iter(text).count() as u64;
try!(writeln!(wtr, "{}", count));
Ok(count)
}
}
/// WalkEventIter transforms a WalkDir iterator into an iterator that more
/// accurately describes the directory tree. Namely, it emits events that are
/// one of three types: directory, file or "exit." An "exit" event means that
/// the entire contents of a directory have been enumerated.
struct WalkEventIter {
depth: usize,
it: walkdir::Iter,
next: Option<result::Result<walkdir::DirEntry, walkdir::Error>>,
}
#[derive(Debug)]
enum WalkEvent {
Dir(walkdir::DirEntry),
File(walkdir::DirEntry),
Exit,
}
impl From<walkdir::WalkDir> for WalkEventIter {
fn from(it: walkdir::WalkDir) -> WalkEventIter {
WalkEventIter { depth: 0, it: it.into_iter(), next: None }
}
}
impl Iterator for WalkEventIter {
type Item = io::Result<WalkEvent>;
fn next(&mut self) -> Option<io::Result<WalkEvent>> {
let dent = self.next.take().or_else(|| self.it.next());
let depth = match dent {
None => 0,
Some(Ok(ref dent)) => dent.depth(),
Some(Err(ref err)) => err.depth(),
};
if depth < self.depth {
self.depth -= 1;
self.next = dent;
return Some(Ok(WalkEvent::Exit));
}
self.depth = depth;
match dent {
None => None,
Some(Err(err)) => Some(Err(From::from(err))),
Some(Ok(dent)) => {
if dent.file_type().is_dir() {
self.depth += 1;
Some(Ok(WalkEvent::Dir(dent)))
} else { } else {
Some(Ok(WalkEvent::File(dent))) logb.filter(None, log::LogLevelFilter::Warn);
}
if let Err(err) = logb.init() {
return errored!("failed to initialize logger: {}", err);
}
let mut main = Main::new(args);
try!(main.run_workers());
let writer = main.run_writer();
main.scan();
main.finish_workers();
main.chan_results.push(Message::Quit);
writer.join().unwrap();
Ok(())
}
type ChanWork = Arc<MsQueue<Message<Work>>>;
type ChanResults = Arc<MsQueue<Message<Vec<u8>>>>;
enum Message<T> {
Some(T),
Quit,
}
struct Main {
args: Arc<Args>,
chan_work: ChanWork,
chan_results: ChanResults,
bufs: Arc<Bufs>,
workers: Vec<thread::JoinHandle<()>>,
}
impl Main {
fn new(mut args: Args) -> Main {
if args.arg_path.is_empty() {
args.arg_path.push("./".to_string());
}
Main {
args: Arc::new(args),
chan_work: Arc::new(MsQueue::new()),
chan_results: Arc::new(MsQueue::new()),
bufs: Arc::new(Bufs::new()),
workers: vec![],
} }
} }
fn scan(&mut self) {
for p in &self.args.arg_path {
if p == "-" {
eprintln!("searching <stdin> isn't yet supported");
continue;
}
let wd = WalkDir::new(p).follow_links(self.args.flag_follow);
let mut ig = Ignore::new();
ig.ignore_hidden(!self.args.flag_hidden);
for ent in walk::Iter::new(ig, wd) {
let mut path = ent.path();
if let Ok(p) = path.strip_prefix("./") {
path = p;
}
self.chan_work.push(Message::Some(Work {
path: path.to_path_buf(),
out: self.bufs.pop(),
}));
}
}
}
fn run_writer(&self) -> thread::JoinHandle<()> {
let wtr = Writer {
args: self.args.clone(),
chan_results: self.chan_results.clone(),
bufs: self.bufs.clone(),
};
thread::spawn(move || wtr.run())
}
fn run_workers(&mut self) -> Result<()> {
let mut num = self.args.flag_threads;
if num == 0 {
num = num_cpus::get();
}
if num < 4 {
num = 1;
} else {
num -= 2;
}
println!("running {} workers", num);
for _ in 0..num {
try!(self.run_worker());
}
Ok(())
}
fn run_worker(&mut self) -> Result<()> {
let grepb =
GrepBuilder::new(&self.args.arg_pattern)
.case_insensitive(self.args.flag_ignore_case);
let worker = Worker {
args: self.args.clone(),
chan_work: self.chan_work.clone(),
chan_results: self.chan_results.clone(),
grep: try!(grepb.build()),
};
self.workers.push(thread::spawn(move || worker.run()));
Ok(())
}
fn finish_workers(&mut self) {
// We can stop all of the works by sending a quit message.
// Each worker is guaranteed to receive the quit message exactly
// once, so we only need to send `self.workers.len()` of them
for _ in 0..self.workers.len() {
self.chan_work.push(Message::Quit);
}
// Now wait for each to finish.
while let Some(thread) = self.workers.pop() {
thread.join().unwrap();
} }
} }
} }
fn is_hidden(ent: &walkdir::DirEntry) -> bool { struct Writer {
ent.depth() > 0 && args: Arc<Args>,
ent.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false) chan_results: ChanResults,
bufs: Arc<Bufs>,
}
impl Writer {
fn run(self) {
let mut stdout = io::BufWriter::new(io::stdout());
while let Message::Some(res) = self.chan_results.pop() {
let _ = stdout.write_all(&res);
self.bufs.push(res);
}
}
}
struct Work {
path: PathBuf,
out: Vec<u8>,
}
struct Worker {
args: Arc<Args>,
chan_work: ChanWork,
chan_results: ChanResults,
grep: Grep,
}
impl Worker {
fn run(self) {
while let Message::Some(mut work) = self.chan_work.pop() {
work.out.clear();
let printer = self.args.printer(work.out);
let searcher = Searcher::new(&self.grep, work.path).unwrap();
let buf = searcher.search(printer);
self.chan_results.push(Message::Some(buf));
}
}
}
/// A pool of buffers used by each worker thread to write matches.
struct Bufs {
bufs: TreiberStack<Vec<u8>>,
}
impl Bufs {
pub fn new() -> Bufs {
Bufs { bufs: TreiberStack::new() }
}
pub fn pop(&self) -> Vec<u8> {
match self.bufs.pop() {
None => vec![],
Some(buf) => buf,
}
}
pub fn push(&self, buf: Vec<u8>) {
self.bufs.push(buf);
}
} }

50
src/printer.rs Normal file
View File

@ -0,0 +1,50 @@
use std::io;
use std::path::Path;
use grep::Match;
macro_rules! wln {
($($tt:tt)*) => {
let _ = writeln!($($tt)*);
}
}
pub struct Printer<W> {
wtr: W,
}
impl<W: io::Write> Printer<W> {
pub fn new(wtr: W) -> Printer<W> {
Printer {
wtr: wtr,
}
}
pub fn into_inner(self) -> W {
self.wtr
}
pub fn path<P: AsRef<Path>>(&mut self, path: P) {
wln!(&mut self.wtr, "{}", path.as_ref().display());
}
pub fn count(&mut self, count: u64) {
wln!(&mut self.wtr, "{}", count);
}
pub fn matched<P: AsRef<Path>>(
&mut self,
path: P,
buf: &[u8],
m: &Match,
) {
let _ = self.wtr.write(path.as_ref().to_string_lossy().as_bytes());
let _ = self.wtr.write(b":");
let _ = self.wtr.write(&buf[m.start()..m.end()]);
let _ = self.wtr.write(b"\n");
}
pub fn binary_matched<P: AsRef<Path>>(&mut self, path: P) {
wln!(&mut self.wtr, "binary file {} matches", path.as_ref().display());
}
}

144
src/search.rs Normal file
View File

@ -0,0 +1,144 @@
/*!
The search module is responsible for searching a single file and printing
matches.
*/
use std::cmp;
use std::error::Error as StdError;
use std::fmt;
use std::fs::File;
use std::io;
use std::path::{Path, PathBuf};
use grep::Grep;
use memchr::memchr;
use memmap::{Mmap, Protection};
use printer::Printer;
/// Error describes errors that can occur while searching.
#[derive(Debug)]
pub enum Error {
/// Normal IO or Mmap errors suck. Include the path the originated them.
Io {
err: io::Error,
path: PathBuf,
}
}
impl Error {
fn from_io<P: AsRef<Path>>(err: io::Error, path: P) -> Error {
Error::Io { err: err, path: path.as_ref().to_path_buf() }
}
}
impl StdError for Error {
fn description(&self) -> &str {
match *self {
Error::Io { ref err, .. } => err.description(),
}
}
fn cause(&self) -> Option<&StdError> {
match *self {
Error::Io { ref err, .. } => Some(err),
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::Io { ref err, ref path } => {
write!(f, "{}: {}", path.display(), err)
}
}
}
}
/// Searcher searches a memory mapped buffer.
///
/// The `'g` lifetime refers to the lifetime of the underlying matcher.
pub struct Searcher<'g> {
grep: &'g Grep,
path: PathBuf,
mmap: Option<Mmap>,
}
impl<'g> Searcher<'g> {
/// Create a new memory map based searcher using the given matcher for the
/// file path given.
pub fn new<P: AsRef<Path>>(
grep: &'g Grep,
path: P,
) -> Result<Searcher<'g>, Error> {
let file = try!(File::open(&path).map_err(|err| {
Error::from_io(err, &path)
}));
let md = try!(file.metadata().map_err(|err| {
Error::from_io(err, &path)
}));
let mmap =
if md.len() == 0 {
None
} else {
Some(try!(Mmap::open(&file, Protection::Read).map_err(|err| {
Error::from_io(err, &path)
})))
};
Ok(Searcher {
grep: grep,
path: path.as_ref().to_path_buf(),
mmap: mmap,
})
}
/// Execute the search, writing the results to the printer given and
/// returning the underlying buffer.
pub fn search<W: io::Write>(&self, printer: Printer<W>) -> W {
Search {
grep: &self.grep,
path: &*self.path,
buf: self.buf(),
printer: printer,
}.run()
}
/// Execute the search, returning a count of the number of hits.
pub fn count(&self) -> u64 {
self.grep.iter(self.buf()).count() as u64
}
fn buf(&self) -> &[u8] {
self.mmap.as_ref().map(|m| unsafe { m.as_slice() }).unwrap_or(&[])
}
}
struct Search<'a, W> {
grep: &'a Grep,
path: &'a Path,
buf: &'a [u8],
printer: Printer<W>,
}
impl<'a, W: io::Write> Search<'a, W> {
fn run(mut self) -> W {
let is_binary = self.is_binary();
let mut it = self.grep.iter(self.buf).peekable();
if is_binary && it.peek().is_some() {
self.printer.binary_matched(self.path);
return self.printer.into_inner();
}
for m in it {
self.printer.matched(self.path, self.buf, &m);
}
self.printer.into_inner()
}
fn is_binary(&self) -> bool {
if self.buf.len() >= 4 && &self.buf[0..4] == b"%PDF" {
return true;
}
memchr(b'\x00', &self.buf[0..cmp::min(1024, self.buf.len())]).is_some()
}
}

142
src/walk.rs Normal file
View File

@ -0,0 +1,142 @@
/*!
The walk module implements a recursive directory iterator (using the `walkdir`)
crate that can efficiently skip and ignore files and directories specified in
a user's ignore patterns.
*/
use walkdir::{self, DirEntry, WalkDir, WalkDirIterator};
use ignore::Ignore;
/// Iter is a recursive directory iterator over file paths in a directory.
/// Only file paths should be searched are yielded.
pub struct Iter {
ig: Ignore,
it: WalkEventIter,
}
impl Iter {
/// Create a new recursive directory iterator using the ignore patterns
/// and walkdir iterator given.
pub fn new(ig: Ignore, wd: WalkDir) -> Iter {
Iter {
ig: ig,
it: WalkEventIter::from(wd),
}
}
/// Returns true if this entry should be skipped.
fn skip_entry(&self, ent: &DirEntry) -> bool {
if ent.depth() == 0 {
// Never skip the root directory.
return false;
}
if self.ig.ignored(ent.path(), ent.file_type().is_dir()) {
return true;
}
false
}
}
impl Iterator for Iter {
type Item = DirEntry;
fn next(&mut self) -> Option<DirEntry> {
while let Some(ev) = self.it.next() {
match ev {
Err(err) => {
eprintln!("{}", err);
}
Ok(WalkEvent::Exit) => {
self.ig.pop();
}
Ok(WalkEvent::Dir(ent)) => {
if self.skip_entry(&ent) {
self.it.it.skip_current_dir();
// Still need to push this on the stack because we'll
// get a WalkEvent::Exit event for this dir. We don't
// care if it errors though.
let _ = self.ig.push(ent.path());
continue;
}
if let Err(err) = self.ig.push(ent.path()) {
eprintln!("{}", err);
self.it.it.skip_current_dir();
continue;
}
}
Ok(WalkEvent::File(ent)) => {
if self.skip_entry(&ent) {
continue;
}
// If this isn't actually a file (e.g., a symlink), then
// skip it.
if !ent.file_type().is_file() {
continue;
}
return Some(ent);
}
}
}
None
}
}
/// WalkEventIter transforms a WalkDir iterator into an iterator that more
/// accurately describes the directory tree. Namely, it emits events that are
/// one of three types: directory, file or "exit." An "exit" event means that
/// the entire contents of a directory have been enumerated.
struct WalkEventIter {
depth: usize,
it: walkdir::Iter,
next: Option<Result<DirEntry, walkdir::Error>>,
}
#[derive(Debug)]
enum WalkEvent {
Dir(DirEntry),
File(DirEntry),
Exit,
}
impl From<WalkDir> for WalkEventIter {
fn from(it: WalkDir) -> WalkEventIter {
WalkEventIter { depth: 0, it: it.into_iter(), next: None }
}
}
impl Iterator for WalkEventIter {
type Item = walkdir::Result<WalkEvent>;
fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
let dent = self.next.take().or_else(|| self.it.next());
let depth = match dent {
None => 0,
Some(Ok(ref dent)) => dent.depth(),
Some(Err(ref err)) => err.depth(),
};
if depth < self.depth {
self.depth -= 1;
self.next = dent;
return Some(Ok(WalkEvent::Exit));
}
self.depth = depth;
match dent {
None => None,
Some(Err(err)) => Some(Err(err)),
Some(Ok(dent)) => {
if dent.file_type().is_dir() {
self.depth += 1;
Some(Ok(WalkEvent::Dir(dent)))
} else {
Some(Ok(WalkEvent::File(dent)))
}
}
}
}
}
fn is_hidden(ent: &DirEntry) -> bool {
ent.depth() > 0 &&
ent.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false)
}