Implementing core functionality.

Initially experimenting with crossbeam to manage synchronization.
2025-05-19 01:30:21 -07:00 · 2016-08-28 01:37:12 -04:00 · 2016-08-28 01:37:12 -04:00 · 1c8379f55a
commit 1c8379f55a
parent 065c449980
9 changed files with 652 additions and 146 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -19,8 +19,11 @@ path = "src/main.rs"
 name = "xrep"
 [dependencies]
 crossbeam = "0.2"
 docopt = "0.6"
 env_logger = "0.3"
 grep = { version = "0.1", path = "grep" }
 log = "0.3"
 memchr = "0.1"
 memmap = "0.2"
 num_cpus = "1"
--- a/grep/src/lib.rs
+++ b/grep/src/lib.rs
@ -12,7 +12,7 @@ use std::error;
 use std::fmt;
 use std::result;
-pub use search::{Grep, GrepBuilder};
+pub use search::{Grep, GrepBuilder, Iter, Match};
 mod literals;
 mod nonl;
--- a/src/gitignore.rs
+++ b/src/gitignore.rs
@ -136,7 +136,8 @@ impl Gitignore {
    pub fn matched_utf8(&self, path: &str, is_dir: bool) -> Match {
        // A single regex with a bunch of alternations of glob patterns is
        // unfortunately typically faster than a regex, so we use it as a
-        // first pass filter.
+        // first pass filter. We still need to run the RegexSet to most
        // recently defined glob that matched.
        if !self.set.is_match(path) {
            return Match::None;
        }
@ -145,9 +146,9 @@ impl Gitignore {
            Some(i) => &self.patterns[i],
        };
        if pat.whitelist {
-            Match::Whitelist
+            Match::Whitelist(&pat)
        } else if !pat.only_dir || is_dir {
-            Match::Ignored
+            Match::Ignored(&pat)
        } else {
            Match::None
        }
@ -155,22 +156,25 @@ impl Gitignore {
 }
 /// The result of a glob match.
 ///
 /// The lifetime `'a` refers to the lifetime of the pattern that resulted in
 /// a match (whether ignored or whitelisted).
 #[derive(Clone, Debug)]
-pub enum Match {
+pub enum Match<'a> {
    /// The path didn't match any glob in the gitignore file.
    None,
    /// The last glob matched indicates the path should be ignored.
-    Ignored,
+    Ignored(&'a Pattern),
    /// The last glob matched indicates the path should be whitelisted.
-    Whitelist,
+    Whitelist(&'a Pattern),
 }
-impl Match {
+impl<'a> Match<'a> {
    /// Returns true if the match result implies the path should be ignored.
    pub fn is_ignored(&self) -> bool {
        match *self {
-            Match::Ignored => true,
+            Match::Ignored(_) => true,
-            Match::None | Match::Whitelist => false,
+            Match::None | Match::Whitelist(_) => false,
        }
    }
 }
@ -186,11 +190,18 @@ pub struct GitignoreBuilder {
 /// Pattern represents a single pattern in a gitignore file. It doesn't
 /// know how to do glob matching directly, but it does store additional
 /// options on a pattern, such as whether it's whitelisted.
-#[derive(Clone, Debug, Default)]
+#[derive(Clone, Debug)]
-struct Pattern {
+pub struct Pattern {
-    pat: String,
+    /// The file path that this pattern was extracted from (may be empty).
-    whitelist: bool, // prefix of '!'
+    pub from: PathBuf,
-    only_dir: bool, // suffix of '/'
+    /// The original glob pattern string.
    pub original: String,
    /// The actual glob pattern string used to convert to a regex.
    pub pat: String,
    /// Whether this is a whitelisted pattern or not.
    pub whitelist: bool,
    /// Whether this pattern should only match directories or not.
    pub only_dir: bool,
 }
 impl GitignoreBuilder {
@ -222,7 +233,7 @@ impl GitignoreBuilder {
        let rdr = io::BufReader::new(try!(File::open(&path)));
        // println!("adding ignores from: {}", path.as_ref().display());
        for line in rdr.lines() {
-            try!(self.add(&try!(line)));
+            try!(self.add(&path, &try!(line)));
        }
        Ok(())
    }
@ -230,7 +241,7 @@ impl GitignoreBuilder {
    /// Add each pattern line from the string given.
    pub fn add_str(&mut self, gitignore: &str) -> Result<(), Error> {
        for line in gitignore.lines() {
-            try!(self.add(line));
+            try!(self.add("", line));
        }
        Ok(())
    }
@ -238,11 +249,21 @@ impl GitignoreBuilder {
    /// Add a line from a gitignore file to this builder.
    ///
    /// If the line could not be parsed as a glob, then an error is returned.
-    pub fn add(&mut self, mut line: &str) -> Result<(), Error> {
+    pub fn add<P: AsRef<Path>>(
        &mut self,
        from: P,
        mut line: &str,
    ) -> Result<(), Error> {
        if line.is_empty() {
            return Ok(());
        }
-        let mut pat = Pattern::default();
+        let mut pat = Pattern {
            from: from.as_ref().to_path_buf(),
            original: line.to_string(),
            pat: String::new(),
            whitelist: false,
            only_dir: false,
        };
        let mut opts = glob::MatchOptions::default();
        let has_slash = line.chars().any(|c| c == '/');
        // If the line starts with an escaped '!', then remove the escape.
@ -352,6 +373,7 @@ mod tests {
    ignored!(ig22, ROOT, r"\#foo", "#foo");
    ignored!(ig23, ROOT, "foo", "./foo");
    ignored!(ig24, ROOT, "target", "grep/target");
    ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
    not_ignored!(ignot1, ROOT, "amonths", "months");
    not_ignored!(ignot2, ROOT, "monthsa", "months");
--- a/src/glob.rs
+++ b/src/glob.rs
@ -1,7 +1,7 @@
 /*!
-The glob submodule provides standard shell globbing, but is specifically
+The glob module provides standard shell globbing, but is specifically
-implemented by converting glob syntax to regular expressions. The reasoning
+implemented by converting glob syntax to regular expressions. The reasoning is
-is two fold:
+two fold:
 1. The regex library is *really* fast. Regaining performance in a distinct
   implementation of globbing is non-trivial.
--- a/src/ignore.rs
+++ b/src/ignore.rs
@ -56,20 +56,41 @@ pub struct Ignore {
    /// A stack of ignore patterns at each directory level of traversal.
    /// A directory that contributes no ignore patterns is `None`.
    stack: Vec<Option<IgnoreDir>>,
-    // TODO(burntsushi): Add other patterns from the command line here.
+    ignore_hidden: bool,
 }
 impl Ignore {
    /// Create an empty set of ignore patterns.
    pub fn new() -> Ignore {
-        Ignore { stack: vec![] }
+        Ignore {
            stack: vec![],
            ignore_hidden: true,
        }
    }
    /// Set whether hidden files/folders should be ignored (defaults to true).
    pub fn ignore_hidden(&mut self, yes: bool) -> &mut Ignore {
        self.ignore_hidden = yes;
        self
    }
    /// Add a directory to the stack.
    ///
    /// Note that even if this returns an error, the directory is added to the
    /// stack (and therefore should be popped).
    pub fn push<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
-        self.stack.push(try!(IgnoreDir::new(path)));
+        match IgnoreDir::new(path) {
            Ok(id) => {
                self.stack.push(id);
                Ok(())
            }
            Err(err) => {
                // Don't leave the stack in an inconsistent state.
                self.stack.push(None);
                Err(err)
            }
        }
    }
    /// Pop a directory from the stack.
    ///
@ -81,10 +102,19 @@ impl Ignore {
    /// Returns true if and only if the given file path should be ignored.
    pub fn ignored<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> bool {
        let path = path.as_ref();
        if self.ignore_hidden && is_hidden(&path) {
            return true;
        }
        for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
            match id.matched(path, is_dir) {
-                Match::Whitelist => return false,
+                Match::Whitelist(ref pat) => {
-                Match::Ignored => return true,
+                    debug!("{} whitelisted by {:?}", path.display(), pat);
                    return false;
                }
                Match::Ignored(ref pat) => {
                    debug!("{} ignored by {:?}", path.display(), pat);
                    return true;
                }
                Match::None => {}
            }
        }
@ -150,6 +180,14 @@ impl IgnoreDir {
    }
 }
 fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
    if let Some(name) = path.as_ref().file_name() {
        name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
    } else {
        false
    }
 }
 #[cfg(test)]
 mod tests {
    use std::path::Path;
--- a/src/main.rs
+++ b/src/main.rs
@ -1,7 +1,11 @@
 #![allow(dead_code, unused_variables)]
 extern crate crossbeam;
 extern crate docopt;
 extern crate env_logger;
 extern crate grep;
 #[macro_use]
 extern crate log;
 extern crate memchr;
 extern crate memmap;
 extern crate num_cpus;
@ -10,27 +14,22 @@ extern crate regex_syntax as syntax;
 extern crate rustc_serialize;
 extern crate walkdir;
 const USAGE: &'static str = "
 Usage: xrep [options] <pattern> <path> ...
 xrep is like the silver searcher, but faster than it and grep.
 At least one path is required. Searching stdin isn't yet supported.
 Options:
    -c, --count   Suppress normal output and show count of line matches.
 ";
 use std::error::Error;
 use std::io::{self, Write};
 use std::path::PathBuf;
 use std::process;
 use std::result;
 use std::sync::Arc;
 use std::thread;
 use crossbeam::sync::{MsQueue, TreiberStack};
 use docopt::Docopt;
-use grep::Grep;
+use grep::{Grep, GrepBuilder};
-use walkdir::{WalkDir, WalkDirIterator};
+use walkdir::WalkDir;
 use ignore::Ignore;
 use printer::Printer;
 use search::Searcher;
 macro_rules! errored {
    ($($tt:tt)*) => {
@ -48,21 +47,54 @@ macro_rules! eprintln {
 mod gitignore;
 mod glob;
 mod ignore;
 mod printer;
 mod search;
 mod walk;
-pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;
+const USAGE: &'static str = "
 Usage: xrep [options] <pattern> [<path> ...]
 xrep is like the silver searcher and grep, but faster than both.
 WARNING: Searching stdin isn't yet supported.
 Options:
    -c, --count         Suppress normal output and show count of line matches.
    --debug             Show debug messages.
    --files             Print each file that would be searched
                        (but don't search).
    -L, --follow        Follow symlinks.
    --hidden            Search hidden directories and files.
    -i, --ignore-case   Case insensitive search.
    --threads ARG       The number of threads to use. Defaults to the number
                        of logical CPUs. [default: 0]
 ";
 #[derive(RustcDecodable)]
 struct Args {
    arg_pattern: String,
    arg_path: Vec<String>,
    flag_count: bool,
    flag_debug: bool,
    flag_files: bool,
    flag_follow: bool,
    flag_hidden: bool,
    flag_ignore_case: bool,
    flag_threads: usize,
 }
 impl Args {
    fn printer<W: io::Write>(&self, wtr: W) -> Printer<W> {
        Printer::new(wtr)
    }
 }
 pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;
 fn main() {
    let args: Args = Docopt::new(USAGE).and_then(|d| d.decode())
                                       .unwrap_or_else(|e| e.exit());
-    match args.run() {
+    match real_main(args) {
        Ok(count) if count == 0 => process::exit(1),
        Ok(_) => process::exit(0),
        Err(err) => {
            let _ = writeln!(&mut io::stderr(), "{}", err);
@ -71,118 +103,193 @@ fn main() {
    }
 }
-impl Args {
+fn real_main(args: Args) -> Result<()> {
-    fn run(&self) -> Result<u64> {
+    let mut logb = env_logger::LogBuilder::new();
-        if self.arg_path.is_empty() {
+    if args.flag_debug {
-            return errored!("Searching stdin is not currently supported.");
+        logb.filter(None, log::LogLevelFilter::Debug);
        }
        let mut stdout = io::BufWriter::new(io::stdout());
        let mut ig = Ignore::new();
        for p in &self.arg_path {
            let mut it = WalkEventIter::from(WalkDir::new(p));
            loop {
                let ev = match it.next() {
                    None => break,
                    Some(Ok(ev)) => ev,
                    Some(Err(err)) => {
                        eprintln!("{}", err);
                        continue;
                    }
                };
                match ev {
                    WalkEvent::Exit => {
                        ig.pop();
                    }
                    WalkEvent::Dir(ent) => {
                        try!(ig.push(ent.path()));
                        if is_hidden(&ent) || ig.ignored(ent.path(), true) {
                        // if is_hidden(&ent) {
                            it.it.skip_current_dir();
                            continue;
                        }
                    }
                    WalkEvent::File(ent) => {
                        if is_hidden(&ent) || ig.ignored(ent.path(), false) {
                        // if is_hidden(&ent) {
                            continue;
                        }
                        let _ = writeln!(
                            &mut stdout, "{}", ent.path().display());
                    }
                }
            }
        }
        Ok(0)
    }
    fn run_mmap_count_only(&self, searcher: &Grep) -> Result<u64> {
        use memmap::{Mmap, Protection};
        assert!(self.arg_path.len() == 1);
        let mut wtr = io::BufWriter::new(io::stdout());
        let mmap = try!(Mmap::open_path(&self.arg_path[0], Protection::Read));
        let text = unsafe { mmap.as_slice() };
        let count = searcher.iter(text).count() as u64;
        try!(writeln!(wtr, "{}", count));
        Ok(count)
    }
 }
 /// WalkEventIter transforms a WalkDir iterator into an iterator that more
 /// accurately describes the directory tree. Namely, it emits events that are
 /// one of three types: directory, file or "exit." An "exit" event means that
 /// the entire contents of a directory have been enumerated.
 struct WalkEventIter {
    depth: usize,
    it: walkdir::Iter,
    next: Option<result::Result<walkdir::DirEntry, walkdir::Error>>,
 }
 #[derive(Debug)]
 enum WalkEvent {
    Dir(walkdir::DirEntry),
    File(walkdir::DirEntry),
    Exit,
 }
 impl From<walkdir::WalkDir> for WalkEventIter {
    fn from(it: walkdir::WalkDir) -> WalkEventIter {
        WalkEventIter { depth: 0, it: it.into_iter(), next: None }
    }
 }
 impl Iterator for WalkEventIter {
    type Item = io::Result<WalkEvent>;
    fn next(&mut self) -> Option<io::Result<WalkEvent>> {
        let dent = self.next.take().or_else(|| self.it.next());
        let depth = match dent {
            None => 0,
            Some(Ok(ref dent)) => dent.depth(),
            Some(Err(ref err)) => err.depth(),
        };
        if depth < self.depth {
            self.depth -= 1;
            self.next = dent;
            return Some(Ok(WalkEvent::Exit));
        }
        self.depth = depth;
        match dent {
            None => None,
            Some(Err(err)) => Some(Err(From::from(err))),
            Some(Ok(dent)) => {
                if dent.file_type().is_dir() {
                    self.depth += 1;
                    Some(Ok(WalkEvent::Dir(dent)))
    } else {
-                    Some(Ok(WalkEvent::File(dent)))
+        logb.filter(None, log::LogLevelFilter::Warn);
    }
    if let Err(err) = logb.init() {
        return errored!("failed to initialize logger: {}", err);
    }
    let mut main = Main::new(args);
    try!(main.run_workers());
    let writer = main.run_writer();
    main.scan();
    main.finish_workers();
    main.chan_results.push(Message::Quit);
    writer.join().unwrap();
    Ok(())
 }
 type ChanWork = Arc<MsQueue<Message<Work>>>;
 type ChanResults = Arc<MsQueue<Message<Vec<u8>>>>;
 enum Message<T> {
    Some(T),
    Quit,
 }
 struct Main {
    args: Arc<Args>,
    chan_work: ChanWork,
    chan_results: ChanResults,
    bufs: Arc<Bufs>,
    workers: Vec<thread::JoinHandle<()>>,
 }
 impl Main {
    fn new(mut args: Args) -> Main {
        if args.arg_path.is_empty() {
            args.arg_path.push("./".to_string());
        }
        Main {
            args: Arc::new(args),
            chan_work: Arc::new(MsQueue::new()),
            chan_results: Arc::new(MsQueue::new()),
            bufs: Arc::new(Bufs::new()),
            workers: vec![],
        }
    }
    fn scan(&mut self) {
        for p in &self.args.arg_path {
            if p == "-" {
                eprintln!("searching <stdin> isn't yet supported");
                continue;
            }
            let wd = WalkDir::new(p).follow_links(self.args.flag_follow);
            let mut ig = Ignore::new();
            ig.ignore_hidden(!self.args.flag_hidden);
            for ent in walk::Iter::new(ig, wd) {
                let mut path = ent.path();
                if let Ok(p) = path.strip_prefix("./") {
                    path = p;
                }
                self.chan_work.push(Message::Some(Work {
                    path: path.to_path_buf(),
                    out: self.bufs.pop(),
                }));
            }
        }
    }
    fn run_writer(&self) -> thread::JoinHandle<()> {
        let wtr = Writer {
            args: self.args.clone(),
            chan_results: self.chan_results.clone(),
            bufs: self.bufs.clone(),
        };
        thread::spawn(move || wtr.run())
    }
    fn run_workers(&mut self) -> Result<()> {
        let mut num = self.args.flag_threads;
        if num == 0 {
            num = num_cpus::get();
        }
        if num < 4 {
            num = 1;
        } else {
            num -= 2;
        }
        println!("running {} workers", num);
        for _ in 0..num {
            try!(self.run_worker());
        }
        Ok(())
    }
    fn run_worker(&mut self) -> Result<()> {
        let grepb =
            GrepBuilder::new(&self.args.arg_pattern)
            .case_insensitive(self.args.flag_ignore_case);
        let worker = Worker {
            args: self.args.clone(),
            chan_work: self.chan_work.clone(),
            chan_results: self.chan_results.clone(),
            grep: try!(grepb.build()),
        };
        self.workers.push(thread::spawn(move || worker.run()));
        Ok(())
    }
    fn finish_workers(&mut self) {
        // We can stop all of the works by sending a quit message.
        // Each worker is guaranteed to receive the quit message exactly
        // once, so we only need to send `self.workers.len()` of them
        for _ in 0..self.workers.len() {
            self.chan_work.push(Message::Quit);
        }
        // Now wait for each to finish.
        while let Some(thread) = self.workers.pop() {
            thread.join().unwrap();
        }
    }
 }
-fn is_hidden(ent: &walkdir::DirEntry) -> bool {
+struct Writer {
-    ent.depth() > 0 &&
+    args: Arc<Args>,
-    ent.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false)
+    chan_results: ChanResults,
    bufs: Arc<Bufs>,
 }
 impl Writer {
    fn run(self) {
        let mut stdout = io::BufWriter::new(io::stdout());
        while let Message::Some(res) = self.chan_results.pop() {
            let _ = stdout.write_all(&res);
            self.bufs.push(res);
        }
    }
 }
 struct Work {
    path: PathBuf,
    out: Vec<u8>,
 }
 struct Worker {
    args: Arc<Args>,
    chan_work: ChanWork,
    chan_results: ChanResults,
    grep: Grep,
 }
 impl Worker {
    fn run(self) {
        while let Message::Some(mut work) = self.chan_work.pop() {
            work.out.clear();
            let printer = self.args.printer(work.out);
            let searcher = Searcher::new(&self.grep, work.path).unwrap();
            let buf = searcher.search(printer);
            self.chan_results.push(Message::Some(buf));
        }
    }
 }
 /// A pool of buffers used by each worker thread to write matches.
 struct Bufs {
    bufs: TreiberStack<Vec<u8>>,
 }
 impl Bufs {
    pub fn new() -> Bufs {
        Bufs { bufs: TreiberStack::new() }
    }
    pub fn pop(&self) -> Vec<u8> {
        match self.bufs.pop() {
            None => vec![],
            Some(buf) => buf,
        }
    }
    pub fn push(&self, buf: Vec<u8>) {
        self.bufs.push(buf);
    }
 }
--- a/src/printer.rs
+++ b/src/printer.rs
@ -0,0 +1,50 @@
 use std::io;
 use std::path::Path;
 use grep::Match;
 macro_rules! wln {
    ($($tt:tt)*) => {
        let _ = writeln!($($tt)*);
    }
 }
 pub struct Printer<W> {
    wtr: W,
 }
 impl<W: io::Write> Printer<W> {
    pub fn new(wtr: W) -> Printer<W> {
        Printer {
            wtr: wtr,
        }
    }
    pub fn into_inner(self) -> W {
        self.wtr
    }
    pub fn path<P: AsRef<Path>>(&mut self, path: P) {
        wln!(&mut self.wtr, "{}", path.as_ref().display());
    }
    pub fn count(&mut self, count: u64) {
        wln!(&mut self.wtr, "{}", count);
    }
    pub fn matched<P: AsRef<Path>>(
        &mut self,
        path: P,
        buf: &[u8],
        m: &Match,
    ) {
        let _ = self.wtr.write(path.as_ref().to_string_lossy().as_bytes());
        let _ = self.wtr.write(b":");
        let _ = self.wtr.write(&buf[m.start()..m.end()]);
        let _ = self.wtr.write(b"\n");
    }
    pub fn binary_matched<P: AsRef<Path>>(&mut self, path: P) {
        wln!(&mut self.wtr, "binary file {} matches", path.as_ref().display());
    }
 }
--- a/src/search.rs
+++ b/src/search.rs
@ -0,0 +1,144 @@
 /*!
 The search module is responsible for searching a single file and printing
 matches.
 */
 use std::cmp;
 use std::error::Error as StdError;
 use std::fmt;
 use std::fs::File;
 use std::io;
 use std::path::{Path, PathBuf};
 use grep::Grep;
 use memchr::memchr;
 use memmap::{Mmap, Protection};
 use printer::Printer;
 /// Error describes errors that can occur while searching.
 #[derive(Debug)]
 pub enum Error {
    /// Normal IO or Mmap errors suck. Include the path the originated them.
    Io {
        err: io::Error,
        path: PathBuf,
    }
 }
 impl Error {
    fn from_io<P: AsRef<Path>>(err: io::Error, path: P) -> Error {
        Error::Io { err: err, path: path.as_ref().to_path_buf() }
    }
 }
 impl StdError for Error {
    fn description(&self) -> &str {
        match *self {
            Error::Io { ref err, .. } => err.description(),
        }
    }
    fn cause(&self) -> Option<&StdError> {
        match *self {
            Error::Io { ref err, .. } => Some(err),
        }
    }
 }
 impl fmt::Display for Error {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        match *self {
            Error::Io { ref err, ref path } => {
                write!(f, "{}: {}", path.display(), err)
            }
        }
    }
 }
 /// Searcher searches a memory mapped buffer.
 ///
 /// The `'g` lifetime refers to the lifetime of the underlying matcher.
 pub struct Searcher<'g> {
    grep: &'g Grep,
    path: PathBuf,
    mmap: Option<Mmap>,
 }
 impl<'g> Searcher<'g> {
    /// Create a new memory map based searcher using the given matcher for the
    /// file path given.
    pub fn new<P: AsRef<Path>>(
        grep: &'g Grep,
        path: P,
    ) -> Result<Searcher<'g>, Error> {
        let file = try!(File::open(&path).map_err(|err| {
            Error::from_io(err, &path)
        }));
        let md = try!(file.metadata().map_err(|err| {
            Error::from_io(err, &path)
        }));
        let mmap =
            if md.len() == 0 {
                None
            } else {
                Some(try!(Mmap::open(&file, Protection::Read).map_err(|err| {
                    Error::from_io(err, &path)
                })))
            };
        Ok(Searcher {
            grep: grep,
            path: path.as_ref().to_path_buf(),
            mmap: mmap,
        })
    }
    /// Execute the search, writing the results to the printer given and
    /// returning the underlying buffer.
    pub fn search<W: io::Write>(&self, printer: Printer<W>) -> W {
        Search {
            grep: &self.grep,
            path: &*self.path,
            buf: self.buf(),
            printer: printer,
        }.run()
    }
    /// Execute the search, returning a count of the number of hits.
    pub fn count(&self) -> u64 {
        self.grep.iter(self.buf()).count() as u64
    }
    fn buf(&self) -> &[u8] {
        self.mmap.as_ref().map(|m| unsafe { m.as_slice() }).unwrap_or(&[])
    }
 }
 struct Search<'a, W> {
    grep: &'a Grep,
    path: &'a Path,
    buf: &'a [u8],
    printer: Printer<W>,
 }
 impl<'a, W: io::Write> Search<'a, W> {
    fn run(mut self) -> W {
        let is_binary = self.is_binary();
        let mut it = self.grep.iter(self.buf).peekable();
        if is_binary && it.peek().is_some() {
            self.printer.binary_matched(self.path);
            return self.printer.into_inner();
        }
        for m in it {
            self.printer.matched(self.path, self.buf, &m);
        }
        self.printer.into_inner()
    }
    fn is_binary(&self) -> bool {
        if self.buf.len() >= 4 && &self.buf[0..4] == b"%PDF" {
            return true;
        }
        memchr(b'\x00', &self.buf[0..cmp::min(1024, self.buf.len())]).is_some()
    }
 }
--- a/src/walk.rs
+++ b/src/walk.rs
@ -0,0 +1,142 @@
 /*!
 The walk module implements a recursive directory iterator (using the `walkdir`)
 crate that can efficiently skip and ignore files and directories specified in
 a user's ignore patterns.
 */
 use walkdir::{self, DirEntry, WalkDir, WalkDirIterator};
 use ignore::Ignore;
 /// Iter is a recursive directory iterator over file paths in a directory.
 /// Only file paths should be searched are yielded.
 pub struct Iter {
    ig: Ignore,
    it: WalkEventIter,
 }
 impl Iter {
    /// Create a new recursive directory iterator using the ignore patterns
    /// and walkdir iterator given.
    pub fn new(ig: Ignore, wd: WalkDir) -> Iter {
        Iter {
            ig: ig,
            it: WalkEventIter::from(wd),
        }
    }
    /// Returns true if this entry should be skipped.
    fn skip_entry(&self, ent: &DirEntry) -> bool {
        if ent.depth() == 0 {
            // Never skip the root directory.
            return false;
        }
        if self.ig.ignored(ent.path(), ent.file_type().is_dir()) {
            return true;
        }
        false
    }
 }
 impl Iterator for Iter {
    type Item = DirEntry;
    fn next(&mut self) -> Option<DirEntry> {
        while let Some(ev) = self.it.next() {
            match ev {
                Err(err) => {
                    eprintln!("{}", err);
                }
                Ok(WalkEvent::Exit) => {
                    self.ig.pop();
                }
                Ok(WalkEvent::Dir(ent)) => {
                    if self.skip_entry(&ent) {
                        self.it.it.skip_current_dir();
                        // Still need to push this on the stack because we'll
                        // get a WalkEvent::Exit event for this dir. We don't
                        // care if it errors though.
                        let _ = self.ig.push(ent.path());
                        continue;
                    }
                    if let Err(err) = self.ig.push(ent.path()) {
                        eprintln!("{}", err);
                        self.it.it.skip_current_dir();
                        continue;
                    }
                }
                Ok(WalkEvent::File(ent)) => {
                    if self.skip_entry(&ent) {
                        continue;
                    }
                    // If this isn't actually a file (e.g., a symlink), then
                    // skip it.
                    if !ent.file_type().is_file() {
                        continue;
                    }
                    return Some(ent);
                }
            }
        }
        None
    }
 }
 /// WalkEventIter transforms a WalkDir iterator into an iterator that more
 /// accurately describes the directory tree. Namely, it emits events that are
 /// one of three types: directory, file or "exit." An "exit" event means that
 /// the entire contents of a directory have been enumerated.
 struct WalkEventIter {
    depth: usize,
    it: walkdir::Iter,
    next: Option<Result<DirEntry, walkdir::Error>>,
 }
 #[derive(Debug)]
 enum WalkEvent {
    Dir(DirEntry),
    File(DirEntry),
    Exit,
 }
 impl From<WalkDir> for WalkEventIter {
    fn from(it: WalkDir) -> WalkEventIter {
        WalkEventIter { depth: 0, it: it.into_iter(), next: None }
    }
 }
 impl Iterator for WalkEventIter {
    type Item = walkdir::Result<WalkEvent>;
    fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
        let dent = self.next.take().or_else(|| self.it.next());
        let depth = match dent {
            None => 0,
            Some(Ok(ref dent)) => dent.depth(),
            Some(Err(ref err)) => err.depth(),
        };
        if depth < self.depth {
            self.depth -= 1;
            self.next = dent;
            return Some(Ok(WalkEvent::Exit));
        }
        self.depth = depth;
        match dent {
            None => None,
            Some(Err(err)) => Some(Err(err)),
            Some(Ok(dent)) => {
                if dent.file_type().is_dir() {
                    self.depth += 1;
                    Some(Ok(WalkEvent::Dir(dent)))
                } else {
                    Some(Ok(WalkEvent::File(dent)))
                }
            }
        }
    }
 }
 fn is_hidden(ent: &DirEntry) -> bool {
    ent.depth() > 0 &&
    ent.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false)
 }