update distributable to include readme and license

Rename xrep to ripgrep.
Hack in Windows console coloring.
2025-07-28 10:41:58 -07:00 · 2016-09-08 16:21:37 -04:00 · 2016-09-08 16:15:44 -04:00 · 2016-09-07 21:54:28 -04:00 · 2016-09-06 21:47:33 -04:00 · 2016-09-06 21:45:41 -04:00
18 changed files with 756 additions and 133 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,14 +1,14 @@
 [package]
 publish = false
-name = "xrep"
+name = "ripgrep"
 version = "0.1.0"  #:version
 authors = ["Andrew Gallant <jamslam@gmail.com>"]
 description = """
 Line oriented search tool using Rust's regex library.
 """
-documentation = "https://github.com/BurntSushi/xrep"
-homepage = "https://github.com/BurntSushi/xrep"
-repository = "https://github.com/BurntSushi/xrep"
+documentation = "https://github.com/BurntSushi/ripgrep"
+homepage = "https://github.com/BurntSushi/ripgrep"
+repository = "https://github.com/BurntSushi/ripgrep"
 readme = "README.md"
 keywords = ["regex", "grep", "egrep", "search", "pattern"]
 license = "Unlicense/MIT"
@@ -16,7 +16,7 @@ license = "Unlicense/MIT"
 [[bin]]
 bench = false
 path = "src/main.rs"
-name = "xrep"
+name = "rg"

 [dependencies]
 crossbeam = "0.2"
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,6 +1,6 @@
 environment:
  global:
-    PROJECT_NAME: xrep
+    PROJECT_NAME: rg
  matrix:
    # Nightly channel
    - TARGET: i686-pc-windows-gnu
@@ -32,16 +32,14 @@ build: false
 # Equivalent to Travis' `script` phase
 # TODO modify this phase as you see fit
 test_script:
-  # - cargo build --verbose
-  - cargo test matchslash2 -- --nocapture
+  - cargo test --verbose

 before_deploy:
  # Generate artifacts for release
-  - SET RUSTFLAGS="-C target-feature=+ssse3"
-  - cargo build --release --features simd-accel
+  # TODO(burntsushi): How can we enable SSSE3 on Windows?
+  - cargo build --release
  - mkdir staging
-  # TODO update this part to copy the artifacts that make sense for your project
-  - copy target\release\xrep.exe staging
+  - copy target\release\rg.exe staging
  - cd staging
    # release zipfile will look like 'rust-everywhere-v1.2.3-x86_64-pc-windows-msvc'
  - 7z a ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip *
--- a/ci/before_deploy.sh
+++ b/ci/before_deploy.sh
@@ -6,23 +6,22 @@ set -ex

 # Generate artifacts for release
 mk_artifacts() {
-    RUSTFLAGS="-C target-feature=+ssse3" cargo build --target $TARGET --release --features simd-accel
+    RUSTFLAGS="-C target-feature=+ssse3" \
+      cargo build --target $TARGET --release --features simd-accel
 }

 mk_tarball() {
    # create a "staging" directory
    local td=$(mktempd)
    local out_dir=$(pwd)
+    local name="${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}"
+    mkdir "$td/$name"

-    # TODO update this part to copy the artifacts that make sense for your project
-    # NOTE All Cargo build artifacts will be under the 'target/$TARGET/{debug,release}'
-    cp target/$TARGET/release/xrep $td
+    cp target/$TARGET/release/rg "$td/$name/"
+    cp {README,UNLICENSE,COPYING,LICENSE_MIT} "$td/$name/"

    pushd $td
-
-    # release tarball will look like 'rust-everywhere-v1.2.3-x86_64-unknown-linux-gnu.tar.gz'
-    tar czf $out_dir/${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}.tar.gz *
-
+    tar czf "$out_dir/$name.tar.gz" *
    popd
    rm -r $td
 }
--- a/ci/script.sh
+++ b/ci/script.sh
@@ -42,7 +42,7 @@ run_test_suite() {
    cargo test --target $TARGET

    # sanity check the file type
-    file target/$TARGET/debug/xrep
+    file target/$TARGET/debug/rg
 }

 main() {
--- a/grep/Cargo.toml
+++ b/grep/Cargo.toml
@@ -6,14 +6,15 @@ authors = ["Andrew Gallant <jamslam@gmail.com>"]
 description = """
 Fast line oriented regex searching as a library.
 """
-documentation = "https://github.com/BurntSushi/xrep"
-homepage = "https://github.com/BurntSushi/xrep"
-repository = "https://github.com/BurntSushi/xrep"
+documentation = "https://github.com/BurntSushi/ripgrep"
+homepage = "https://github.com/BurntSushi/ripgrep"
+repository = "https://github.com/BurntSushi/ripgrep"
 readme = "README.md"
 keywords = ["regex", "grep", "egrep", "search", "pattern"]
 license = "Unlicense/MIT"

 [dependencies]
+log = "0.3"
 memchr = "0.1"
 memmap = "0.2"
 regex = "0.1.75"
--- a/grep/src/lib.rs
+++ b/grep/src/lib.rs
@@ -4,6 +4,8 @@
 A fast line oriented regex searcher.
 */

+#[macro_use]
+extern crate log;
 extern crate memchr;
 extern crate regex;
 extern crate regex_syntax as syntax;
--- a/grep/src/literals.rs
+++ b/grep/src/literals.rs
@@ -1,13 +1,22 @@
+/*!
+The literals module is responsible for extracting *inner* literals out of the
+AST of a regular expression. Normally this is the job of the regex engine
+itself, but the regex engine doesn't look for inner literals. Since we're doing
+line based searching, we can use them, so we need to do it ourselves.
+
+Note that this implementation is incredibly suspicious. We need something more
+principled.
+*/
 use std::cmp;
 use std::iter;

 use regex::bytes::Regex;
 use syntax::{
    Expr, Literals, Lit,
-    Repeater,
+    ByteClass, ByteRange, CharClass, ClassRange, Repeater,
 };

-#[derive(Debug)]
+#[derive(Clone, Debug)]
 pub struct LiteralSets {
    prefixes: Literals,
    suffixes: Literals,
@@ -27,6 +36,7 @@ impl LiteralSets {

    pub fn to_regex(&self) -> Option<Regex> {
        if self.prefixes.all_complete() && !self.prefixes.is_empty() {
+            debug!("literal prefixes detected: {:?}", self.prefixes);
            // When this is true, the regex engine will do a literal scan.
            return None;
        }
@@ -56,13 +66,27 @@ impl LiteralSets {
        if suf_lcs.len() > lit.len() {
            lit = suf_lcs;
        }
-        if req.len() > lit.len() {
+        if req_lits.len() == 1 && req.len() > lit.len() {
            lit = req;
        }
-        if lit.is_empty() {
+
+        // Special case: if we detected an alternation of inner required
+        // literals and its longest literal is bigger than the longest
+        // prefix/suffix, then choose the alternation. In practice, this
+        // helps with case insensitive matching, which can generate lots of
+        // inner required literals.
+        let any_empty = req_lits.iter().any(|lit| lit.is_empty());
+        if req.len() > lit.len() && req_lits.len() > 1 && !any_empty {
+            debug!("required literals found: {:?}", req_lits);
+            let alts: Vec<String> =
+                req_lits.into_iter().map(|x| bytes_to_regex(x)).collect();
+            // Literals always compile.
+            Some(Regex::new(&alts.join("|")).unwrap())
+        } else if lit.is_empty() {
            None
        } else {
            // Literals always compile.
+            debug!("required literal found: {:?}", show(lit));
            Some(Regex::new(&bytes_to_regex(lit)).unwrap())
        }
    }
@@ -75,14 +99,30 @@ fn union_required(expr: &Expr, lits: &mut Literals) {
            let s: String = chars.iter().cloned().collect();
            lits.cross_add(s.as_bytes());
        }
-        Literal { casei: true, .. } => {
-            lits.cut();
+        Literal { ref chars, casei: true } => {
+            for &c in chars {
+                let cls = CharClass::new(vec![
+                    ClassRange { start: c, end: c },
+                ]).case_fold();
+                if !lits.add_char_class(&cls) {
+                    lits.cut();
+                    return;
+                }
+            }
        }
        LiteralBytes { ref bytes, casei: false } => {
            lits.cross_add(bytes);
        }
-        LiteralBytes { casei: true, .. } => {
-            lits.cut();
+        LiteralBytes { ref bytes, casei: true } => {
+            for &b in bytes {
+                let cls = ByteClass::new(vec![
+                    ByteRange { start: b, end: b },
+                ]).case_fold();
+                if !lits.add_byte_class(&cls) {
+                    lits.cut();
+                    return;
+                }
+            }
        }
        Class(_) => {
            lits.cut();
@@ -205,3 +245,18 @@ fn bytes_to_regex(bs: &[u8]) -> String {
    }
    s
 }
+
+/// Converts arbitrary bytes to a nice string.
+fn show(bs: &[u8]) -> String {
+    // Why aren't we using this to feed to the regex? Doesn't really matter
+    // I guess. ---AG
+    use std::ascii::escape_default;
+    use std::str;
+
+    let mut nice = String::new();
+    for &b in bs {
+        let part: Vec<u8> = escape_default(b).collect();
+        nice.push_str(str::from_utf8(&part).unwrap());
+    }
+    nice
+}
--- a/grep/src/search.rs
+++ b/grep/src/search.rs
@@ -152,6 +152,7 @@ impl GrepBuilder {
                 .unicode(true)
                 .case_insensitive(self.opts.case_insensitive)
                 .parse(&self.pattern));
+        debug!("regex ast:\n{:#?}", expr);
        Ok(try!(nonl::remove(expr, self.opts.line_terminator)))
    }
 }
@@ -253,7 +254,7 @@ impl<'b, 's> Iterator for Iter<'b, 's> {
            self.start = self.buf.len();
            return None;
        }
-        self.start = mat.end + 1;
+        self.start = mat.end;
        Some(mat)
    }
 }
--- a/src/args.rs
+++ b/src/args.rs
@@ -16,6 +16,7 @@ use ignore::Ignore;
 use out::Out;
 use printer::Printer;
 use search::{InputBuffer, Searcher};
+use search_buffer::BufferSearcher;
 use sys;
 use types::{FileTypeDef, Types, TypesBuilder};
 use walk;
@@ -27,13 +28,13 @@ use Result;
 /// If you've never heard of Docopt before, see: http://docopt.org
 /// (TL;DR: The CLI parser is generated from the usage string below.)
 const USAGE: &'static str = "
-Usage: xrep [options] <pattern> [<path> ...]
-       xrep [options] --files [<path> ...]
-       xrep [options] --type-list
-       xrep --help
-       xrep --version
+Usage: rg [options] <pattern> [<path> ...]
+       rg [options] --files [<path> ...]
+       rg [options] --type-list
+       rg --help
+       rg --version

-xrep is like the silver searcher and grep, but faster than both.
+rg combines the usability of the silver search with the raw speed of grep.

 Common options:
    -a, --text                 Search binary files as if they were text.
@@ -75,6 +76,11 @@ Less common options:
    -C, --context NUM
        Show NUM lines before and after each match.

+    --column
+        Show column numbers (1 based) in output. This only shows the column
+        numbers for the first match on each line. Note that this doesn't try
+        to account for Unicode. One byte is equal to one column.
+
    --context-separator ARG
        The string to use when separating non-continuous context lines. Escape
        sequences may be used. [default: --]
@@ -106,8 +112,16 @@ Less common options:
        The byte to use for a line terminator. Escape sequences may be used.
        [default: \\n]

+    --mmap
+        Search using memory maps when possible. This is enabled by default
+        when ripgrep thinks it will be faster. (Note that mmap searching
+        doesn't current support the various context related options.)
+
+    --no-mmap
+        Never use memory maps, even when they might be faster.
+
    --no-ignore
-        Don't respect ignore files (.gitignore, .xrepignore, etc.)
+        Don't respect ignore files (.gitignore, .rgignore, etc.)

    --no-ignore-parent
        Don't respect ignore files in parent directories.
@@ -123,7 +137,7 @@ Less common options:
        (capped at 6). [default: 0]

    --version
-        Show the version number of xrep and exit.
+        Show the version number of ripgrep and exit.

 File type management options:
    --type-list
@@ -138,7 +152,7 @@ File type management options:
 ";

 /// RawArgs are the args as they are parsed from Docopt. They aren't used
-/// directly by the rest of xrep.
+/// directly by the rest of ripgrep.
 #[derive(Debug, RustcDecodable)]
 pub struct RawArgs {
    arg_pattern: String,
@@ -146,6 +160,7 @@ pub struct RawArgs {
    flag_after_context: usize,
    flag_before_context: usize,
    flag_color: String,
+    flag_column: bool,
    flag_context: usize,
    flag_context_separator: String,
    flag_count: bool,
@@ -160,10 +175,12 @@ pub struct RawArgs {
    flag_line_number: bool,
    flag_line_terminator: String,
    flag_literal: bool,
+    flag_mmap: bool,
    flag_no_heading: bool,
    flag_no_ignore: bool,
    flag_no_ignore_parent: bool,
    flag_no_line_number: bool,
+    flag_no_mmap: bool,
    flag_pretty: bool,
    flag_quiet: bool,
    flag_replace: Option<String>,
@@ -186,17 +203,20 @@ pub struct Args {
    after_context: usize,
    before_context: usize,
    color: bool,
+    column: bool,
    context_separator: Vec<u8>,
    count: bool,
    eol: u8,
    files: bool,
    follow: bool,
    glob_overrides: Option<Gitignore>,
+    grep: Grep,
    heading: bool,
    hidden: bool,
    ignore_case: bool,
    invert_match: bool,
    line_number: bool,
+    mmap: bool,
    no_ignore: bool,
    no_ignore_parent: bool,
    quiet: bool,
@@ -210,7 +230,7 @@ pub struct Args {
 }

 impl RawArgs {
-    /// Convert arguments parsed into a configuration used by xrep.
+    /// Convert arguments parsed into a configuration used by ripgrep.
    fn to_args(&self) -> Result<Args> {
        let pattern = {
            let pattern =
@@ -243,6 +263,19 @@ impl RawArgs {
            } else {
                (self.flag_after_context, self.flag_before_context)
            };
+        let mmap =
+            if before_context > 0 || after_context > 0 || self.flag_no_mmap {
+                false
+            } else if self.flag_mmap {
+                true
+            } else {
+                // If we're only searching a few paths and all of them are
+                // files, then memory maps are probably faster.
+                paths.len() <= 10 && paths.iter().all(|p| p.is_file())
+            };
+        if mmap {
+            debug!("will try to use memory maps");
+        }
        let eol = {
            let eol = unescape(&self.flag_line_terminator);
            if eol.is_empty() {
@@ -283,23 +316,32 @@ impl RawArgs {
        btypes.add_defaults();
        try!(self.add_types(&mut btypes));
        let types = try!(btypes.build());
+        let grep = try!(
+            GrepBuilder::new(&pattern)
+                .case_insensitive(self.flag_ignore_case)
+                .line_terminator(eol)
+                .build()
+        );
        let mut args = Args {
            pattern: pattern,
            paths: paths,
            after_context: after_context,
            before_context: before_context,
            color: color,
+            column: self.flag_column,
            context_separator: unescape(&self.flag_context_separator),
            count: self.flag_count,
            eol: eol,
            files: self.flag_files,
            follow: self.flag_follow,
            glob_overrides: glob_overrides,
+            grep: grep,
            heading: !self.flag_no_heading && self.flag_heading,
            hidden: self.flag_hidden,
            ignore_case: self.flag_ignore_case,
            invert_match: self.flag_invert_match,
            line_number: !self.flag_no_line_number && self.flag_line_number,
+            mmap: mmap,
            no_ignore: self.flag_no_ignore,
            no_ignore_parent: self.flag_no_ignore_parent,
            quiet: self.flag_quiet,
@@ -345,7 +387,7 @@ impl Args {
    ///
    /// If a CLI usage error occurred, then exit the process and print a usage
    /// or error message. Similarly, if the user requested the version of
-    /// xrep, then print the version and exit.
+    /// ripgrep, then print the version and exit.
    ///
    /// Also, initialize a global logger.
    pub fn parse() -> Result<Args> {
@@ -367,7 +409,7 @@ impl Args {
        raw.to_args().map_err(From::from)
    }

-    /// Returns true if xrep should print the files it will search and exit
+    /// Returns true if ripgrep should print the files it will search and exit
    /// (but not do any actual searching).
    pub fn files(&self) -> bool {
        self.files
@@ -378,12 +420,8 @@ impl Args {
    /// basic searching of regular expressions in a single buffer.
    ///
    /// The pattern and other flags are taken from the command line.
-    pub fn grep(&self) -> Result<Grep> {
-        GrepBuilder::new(&self.pattern)
-            .case_insensitive(self.ignore_case)
-            .line_terminator(self.eol)
-            .build()
-            .map_err(From::from)
+    pub fn grep(&self) -> Grep {
+        self.grep.clone()
    }

    /// Creates a new input buffer that is used in searching.
@@ -393,10 +431,16 @@ impl Args {
        inp
    }

+    /// Whether we should prefer memory maps for searching or not.
+    pub fn mmap(&self) -> bool {
+        self.mmap
+    }
+
    /// Create a new printer of individual search results that writes to the
    /// writer given.
    pub fn printer<W: Send + io::Write>(&self, wtr: W) -> Printer<W> {
        let mut p = Printer::new(wtr, self.color)
+            .column(self.column)
            .context_separator(self.context_separator.clone())
            .eol(self.eol)
            .heading(self.heading)
@@ -446,6 +490,24 @@ impl Args {
            .text(self.text)
    }

+    /// Create a new line based searcher whose configuration is taken from the
+    /// command line. This search operates on an entire file all once (which
+    /// may have been memory mapped).
+    pub fn searcher_buffer<'a, W: Send + io::Write>(
+        &self,
+        printer: &'a mut Printer<W>,
+        grep: &'a Grep,
+        path: &'a Path,
+        buf: &'a [u8],
+    ) -> BufferSearcher<'a, W> {
+        BufferSearcher::new(printer, grep, path, buf)
+            .count(self.count)
+            .eol(self.eol)
+            .line_number(self.line_number)
+            .invert_match(self.invert_match)
+            .text(self.text)
+    }
+
    /// Returns the number of worker search threads that should be used.
    pub fn threads(&self) -> usize {
        self.threads
@@ -456,8 +518,8 @@ impl Args {
        &self.type_defs
    }

-    /// Returns true if xrep should print the type definitions currently loaded
-    /// and then exit.
+    /// Returns true if ripgrep should print the type definitions currently
+    /// loaded and then exit.
    pub fn type_list(&self) -> bool {
        self.type_list
    }
--- a/src/gitignore.rs
+++ b/src/gitignore.rs
@@ -9,7 +9,7 @@ The motivation for this submodule is performance and portability:
 2. We could shell out to a `git` sub-command like ls-files or status, but it
   seems better to not rely on the existence of external programs for a search
   tool. Besides, we need to implement this logic anyway to support things like
-   an .xrepignore file.
+   an .rgignore file.

 The key implementation detail here is that a single gitignore file is compiled
 into a single RegexSet, which can be used to report which globs match a
@@ -379,7 +379,7 @@ mod tests {
        };
    }

-    const ROOT: &'static str = "/home/foobar/rust/xrep";
+    const ROOT: &'static str = "/home/foobar/rust/rg";

    ignored!(ig1, ROOT, "months", "months");
    ignored!(ig2, ROOT, "*.lock", "Cargo.lock");
--- a/src/glob.rs
+++ b/src/glob.rs
@@ -29,7 +29,6 @@ to make its way into `glob` proper.
 use std::error::Error as StdError;
 use std::fmt;
 use std::iter;
-use std::path;
 use std::str;

 use regex;
@@ -214,7 +213,7 @@ impl Pattern {
    /// regular expression and will represent the matching semantics of this
    /// glob pattern and the options given.
    pub fn to_regex_with(&self, options: &MatchOptions) -> String {
-        let sep = regex::quote(&path::MAIN_SEPARATOR.to_string());
+        let seps = regex::quote(r"/\");
        let mut re = String::new();
        re.push_str("(?-u)");
        if options.case_insensitive {
@@ -235,26 +234,27 @@ impl Pattern {
                }
                Token::Any => {
                    if options.require_literal_separator {
-                        re.push_str(&format!("[^{}]", sep));
+                        re.push_str(&format!("[^{}]", seps));
                    } else {
                        re.push_str(".");
                    }
                }
                Token::ZeroOrMore => {
                    if options.require_literal_separator {
-                        re.push_str(&format!("[^{}]*", sep));
+                        re.push_str(&format!("[^{}]*", seps));
                    } else {
                        re.push_str(".*");
                    }
                }
                Token::RecursivePrefix => {
-                    re.push_str(&format!("(?:{sep}?|.*{sep})", sep=sep));
+                    re.push_str(&format!("(?:[{sep}]?|.*[{sep}])", sep=seps));
                }
                Token::RecursiveSuffix => {
-                    re.push_str(&format!("(?:{sep}?|{sep}.*)", sep=sep));
+                    re.push_str(&format!("(?:[{sep}]?|[{sep}].*)", sep=seps));
                }
                Token::RecursiveZeroOrMore => {
-                    re.push_str(&format!("(?:{sep}|{sep}.*{sep})", sep=sep));
+                    re.push_str(&format!("(?:[{sep}]|[{sep}].*[{sep}])",
+                                         sep=seps));
                }
                Token::Class { negated, ref ranges } => {
                    re.push('[');
@@ -480,9 +480,9 @@ mod tests {
                let pat = Pattern::new($pat).unwrap();
                let path = &Path::new($path).to_str().unwrap();
                let re = Regex::new(&pat.to_regex_with(&$options)).unwrap();
-                println!("PATTERN: {}", $pat);
-                println!("REGEX: {:?}", re);
-                println!("PATH: {}", path);
+                // println!("PATTERN: {}", $pat);
+                // println!("REGEX: {:?}", re);
+                // println!("PATH: {}", path);
                assert!(!re.is_match(path.as_bytes()));
            }
        };
@@ -564,12 +564,11 @@ mod tests {
        case_insensitive: true,
        require_literal_separator: false,
    };
-    const SEP: char = ::std::path::MAIN_SEPARATOR;

    toregex!(re_casei, "a", "(?i)^a$", &CASEI);

-    toregex!(re_slash1, "?", format!("^[^{}]$", SEP), SLASHLIT);
-    toregex!(re_slash2, "*", format!("^[^{}]*$", SEP), SLASHLIT);
+    toregex!(re_slash1, "?", r"^[^/\\]$", SLASHLIT);
+    toregex!(re_slash2, "*", r"^[^/\\]*$", SLASHLIT);

    toregex!(re1, "a", "^a$");
    toregex!(re2, "?", "^.$");
--- a/src/ignore.rs
+++ b/src/ignore.rs
@@ -5,7 +5,7 @@ whether a *single* file path should be searched or not.
 In general, there are two ways to ignore a particular file:

 1. Specify an ignore rule in some "global" configuration, such as a
-   $HOME/.xrepignore or on the command line.
+   $HOME/.rgignore or on the command line.
 2. A specific ignore file (like .gitignore) found during directory traversal.

 The `IgnoreDir` type handles ignore patterns for any one particular directory
@@ -24,7 +24,7 @@ use types::Types;
 const IGNORE_NAMES: &'static [&'static str] = &[
    ".gitignore",
    ".agignore",
-    ".xrepignore",
+    ".rgignore",
 ];

 /// Represents an error that can occur when parsing a gitignore file.
@@ -257,8 +257,8 @@ pub struct IgnoreDir {
    /// A single accumulation of glob patterns for this directory, matched
    /// using gitignore semantics.
    ///
-    /// This will include patterns from xrepignore as well. The patterns are
-    /// ordered so that precedence applies automatically (e.g., xrepignore
+    /// This will include patterns from rgignore as well. The patterns are
+    /// ordered so that precedence applies automatically (e.g., rgignore
    /// patterns procede gitignore patterns).
    gi: Option<Gitignore>,
    // TODO(burntsushi): Matching other types of glob patterns that don't
@@ -422,7 +422,7 @@ mod tests {
        };
    }

-    const ROOT: &'static str = "/home/foobar/rust/xrep";
+    const ROOT: &'static str = "/home/foobar/rust/rg";

    ignored_dir!(id1, ROOT, "src/main.rs", "", "src/main.rs");
    ignored_dir!(id2, ROOT, "", "src/main.rs", "src/main.rs");
--- a/src/main.rs
+++ b/src/main.rs
@@ -34,6 +34,8 @@ use std::thread;

 use crossbeam::sync::chase_lev::{self, Steal, Stealer};
 use grep::Grep;
+use memmap::{Mmap, Protection};
+use term::Terminal;
 use walkdir::DirEntry;

 use args::Args;
@@ -61,6 +63,7 @@ mod ignore;
 mod out;
 mod printer;
 mod search;
+mod search_buffer;
 mod sys;
 mod terminal;
 mod types;
@@ -99,7 +102,7 @@ fn run(args: Args) -> Result<u64> {
                chan_work: stealer.clone(),
                inpbuf: args.input_buffer(),
                outbuf: Some(vec![]),
-                grep: try!(args.grep()),
+                grep: args.grep(),
                match_count: 0,
            };
            workers.push(thread::spawn(move || worker.run()));
@@ -196,11 +199,11 @@ impl Worker {
            let mut printer = self.args.printer(outbuf);
            self.do_work(&mut printer, work);
            let outbuf = printer.into_inner();
-            if !outbuf.is_empty() {
+            if !outbuf.get_ref().is_empty() {
                let mut out = self.out.lock().unwrap();
                out.write(&outbuf);
            }
-            self.outbuf = Some(outbuf);
+            self.outbuf = Some(outbuf.into_inner());
        }
        self.match_count
    }
@@ -221,7 +224,11 @@ impl Worker {
                if let Ok(p) = path.strip_prefix("./") {
                    path = p;
                }
-                self.search(printer, path, file)
+                if self.args.mmap() {
+                    self.search_mmap(printer, path, &file)
+                } else {
+                    self.search(printer, path, file)
+                }
            }
        };
        match result {
@@ -248,4 +255,23 @@ impl Worker {
            rdr,
        ).run().map_err(From::from)
    }
+
+    fn search_mmap<W: Send + io::Write>(
+        &mut self,
+        printer: &mut Printer<W>,
+        path: &Path,
+        file: &File,
+    ) -> Result<u64> {
+        if try!(file.metadata()).len() == 0 {
+            // Opening a memory map with an empty file results in an error.
+            return Ok(0);
+        }
+        let mmap = try!(Mmap::open(file, Protection::Read));
+        Ok(self.args.searcher_buffer(
+            printer,
+            &self.grep,
+            path,
+            unsafe { mmap.as_slice() },
+        ).run())
+    }
 }
--- a/src/out.rs
+++ b/src/out.rs
@@ -1,5 +1,11 @@
 use std::io::{self, Write};

+use term::{StdoutTerminal, Terminal};
+#[cfg(windows)]
+use term::WinConsole;
+
+use printer::Writer;
+
 /// Out controls the actual output of all search results for a particular file
 /// to the end user.
 ///
@@ -8,15 +14,32 @@ use std::io::{self, Write};
 /// file as a whole. For example, it knows when to print a file separator.)
 pub struct Out<W: io::Write> {
    wtr: io::BufWriter<W>,
+    term: Option<Box<StdoutTerminal>>,
    printed: bool,
    file_separator: Option<Vec<u8>>,
 }

+/// This is like term::stdout, but on Windows always uses WinConsole instead
+/// of trying for a TerminfoTerminal. This may be a mistake.
+#[cfg(windows)]
+fn term_stdout() -> Option<Box<StdoutTerminal>> {
+    WinConsole::new(io::stdout())
+        .ok()
+        .map(|t| Box::new(t) as Box<StdoutTerminal>)
+}
+
+#[cfg(not(windows))]
+fn term_stdout() -> Option<Box<StdoutTerminal>> {
+    // We never use this crap on *nix.
+    None
+}
+
 impl<W: io::Write> Out<W> {
    /// Create a new Out that writes to the wtr given.
    pub fn new(wtr: W) -> Out<W> {
        Out {
            wtr: io::BufWriter::new(wtr),
+            term: term_stdout(),
            printed: false,
            file_separator: None,
        }
@@ -33,14 +56,31 @@ impl<W: io::Write> Out<W> {

    /// Write the search results of a single file to the underlying wtr and
    /// flush wtr.
-    pub fn write(&mut self, buf: &[u8]) {
+    pub fn write(&mut self, buf: &Writer<Vec<u8>>) {
        if let Some(ref sep) = self.file_separator {
            if self.printed {
                let _ = self.wtr.write_all(sep);
                let _ = self.wtr.write_all(b"\n");
            }
        }
-        let _ = self.wtr.write_all(buf);
+        match *buf {
+            Writer::Colored(ref tt) => {
+                let _ = self.wtr.write_all(tt.get_ref());
+            }
+            Writer::Windows(ref w) => {
+                match self.term {
+                    None => {
+                        let _ = self.wtr.write_all(w.get_ref());
+                    }
+                    Some(ref mut stdout) => {
+                        w.print_stdout(stdout);
+                    }
+                }
+            }
+            Writer::NoColor(ref buf) => {
+                let _ = self.wtr.write_all(buf);
+            }
+        }
        let _ = self.wtr.flush();
        self.printed = true;
    }
--- a/src/printer.rs
+++ b/src/printer.rs
@@ -3,7 +3,7 @@ use std::path::Path;
 use std::sync::Arc;

 use regex::bytes::Regex;
-use term::{self, Terminal};
+use term::{self, StdoutTerminal, Terminal};
 use term::color::*;
 use term::terminfo::TermInfo;

@@ -22,6 +22,8 @@ pub struct Printer<W> {
    wtr: Writer<W>,
    /// Whether anything has been printed to wtr yet.
    has_printed: bool,
+    /// Whether to show column numbers for the first match or not.
+    column: bool,
    /// The string to use to separate non-contiguous runs of context lines.
    context_separator: Vec<u8>,
    /// The end-of-line terminator used by the printer. In general, eols are
@@ -48,6 +50,7 @@ impl<W: Send + io::Write> Printer<W> {
        Printer {
            wtr: Writer::new(wtr, color),
            has_printed: false,
+            column: false,
            context_separator: "--".to_string().into_bytes(),
            eol: b'\n',
            heading: false,
@@ -57,6 +60,13 @@ impl<W: Send + io::Write> Printer<W> {
        }
    }

+    /// When set, column numbers will be printed for the first match on each
+    /// line.
+    pub fn column(mut self, yes: bool) -> Printer<W> {
+        self.column = yes;
+        self
+    }
+
    /// Set the context separator. The default is `--`.
    pub fn context_separator(mut self, sep: Vec<u8>) -> Printer<W> {
        self.context_separator = sep;
@@ -105,9 +115,9 @@ impl<W: Send + io::Write> Printer<W> {
    }

    /// Flushes the underlying writer and returns it.
-    pub fn into_inner(mut self) -> W {
+    pub fn into_inner(mut self) -> Writer<W> {
        let _ = self.wtr.flush();
-        self.wtr.into_inner()
+        self.wtr
    }

    /// Prints a type definition.
@@ -173,6 +183,11 @@ impl<W: Send + io::Write> Printer<W> {
        if let Some(line_number) = line_number {
            self.line_number(line_number, b':');
        }
+        if self.column {
+            let c = re.find(&buf[start..end]).map(|(s, _)| s + 1).unwrap_or(0);
+            self.write(c.to_string().as_bytes());
+            self.write(b":");
+        }
        if self.replace.is_some() {
            let line = re.replace_all(
                &buf[start..end], &**self.replace.as_ref().unwrap());
@@ -227,7 +242,8 @@ impl<W: Send + io::Write> Printer<W> {

    fn write_heading<P: AsRef<Path>>(&mut self, path: P) {
        if self.wtr.is_color() {
-            let _ = self.wtr.fg(GREEN);
+            let _ = self.wtr.fg(BRIGHT_GREEN);
+            let _ = self.wtr.attr(term::Attr::Bold);
        }
        self.write(path.as_ref().to_string_lossy().as_bytes());
        self.write_eol();
@@ -238,7 +254,7 @@ impl<W: Send + io::Write> Printer<W> {

    fn line_number(&mut self, n: u64, sep: u8) {
        if self.wtr.is_color() {
-            let _ = self.wtr.fg(YELLOW);
+            let _ = self.wtr.fg(BRIGHT_BLUE);
            let _ = self.wtr.attr(term::Attr::Bold);
        }
        self.write(n.to_string().as_bytes());
@@ -262,11 +278,32 @@ impl<W: Send + io::Write> Printer<W> {
    }
 }

-enum Writer<W> {
+/// Writer corresponds to the final output buffer for search results. All
+/// search results are written to a Writer and then a Writer is flushed to
+/// stdout only after the full search has completed.
+pub enum Writer<W> {
    Colored(TerminfoTerminal<W>),
+    Windows(WindowsWriter<W>),
    NoColor(W),
 }

+pub struct WindowsWriter<W> {
+    wtr: W,
+    pos: usize,
+    colors: Vec<WindowsColor>,
+}
+
+pub struct WindowsColor {
+    pos: usize,
+    opt: WindowsOption,
+}
+
+pub enum WindowsOption {
+    Foreground(Color),
+    Background(Color),
+    Reset,
+}
+
 lazy_static! {
    static ref TERMINFO: Option<Arc<TermInfo>> = {
        match term::terminfo::TermInfo::from_env() {
@@ -284,7 +321,13 @@ impl<W: Send + io::Write> Writer<W> {
        // If we want color, build a TerminfoTerminal and see if the current
        // environment supports coloring. If not, bail with NoColor. To avoid
        // losing our writer (ownership), do this the long way.
-        if !color || TERMINFO.is_none() {
+        if !color {
+            return NoColor(wtr);
+        }
+        if cfg!(windows) {
+            return Windows(WindowsWriter { wtr: wtr, pos: 0, colors: vec![] });
+        }
+        if TERMINFO.is_none() {
            return NoColor(wtr);
        }
        let info = TERMINFO.clone().unwrap();
@@ -299,28 +342,35 @@ impl<W: Send + io::Write> Writer<W> {
    fn is_color(&self) -> bool {
        match *self {
            Colored(_) => true,
+            Windows(_) => true,
            NoColor(_) => false,
        }
    }

-    fn map_result<F>(
+    fn map_result<F, G>(
        &mut self,
        mut f: F,
+        mut g: G,
    ) -> term::Result<()>
-    where F: FnMut(&mut TerminfoTerminal<W>) -> term::Result<()> {
+    where F: FnMut(&mut TerminfoTerminal<W>) -> term::Result<()>,
+          G: FnMut(&mut WindowsWriter<W>) -> term::Result<()> {
        match *self {
            Colored(ref mut w) => f(w),
+            Windows(ref mut w) => g(w),
            NoColor(_) => Err(term::Error::NotSupported),
        }
    }

-    fn map_bool<F>(
+    fn map_bool<F, G>(
        &self,
        mut f: F,
+        mut g: G,
    ) -> bool
-    where F: FnMut(&TerminfoTerminal<W>) -> bool {
+    where F: FnMut(&TerminfoTerminal<W>) -> bool,
+          G: FnMut(&WindowsWriter<W>) -> bool {
        match *self {
            Colored(ref w) => f(w),
+            Windows(ref w) => g(w),
            NoColor(_) => false,
        }
    }
@@ -330,6 +380,7 @@ impl<W: Send + io::Write> io::Write for Writer<W> {
    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
        match *self {
            Colored(ref mut w) => w.write(buf),
+            Windows(ref mut w) => w.write(buf),
            NoColor(ref mut w) => w.write(buf),
        }
    }
@@ -337,6 +388,7 @@ impl<W: Send + io::Write> io::Write for Writer<W> {
    fn flush(&mut self) -> io::Result<()> {
        match *self {
            Colored(ref mut w) => w.flush(),
+            Windows(ref mut w) => w.flush(),
            NoColor(ref mut w) => w.flush(),
        }
    }
@@ -346,48 +398,49 @@ impl<W: Send + io::Write> term::Terminal for Writer<W> {
    type Output = W;

    fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
-        self.map_result(|w| w.fg(fg))
+        self.map_result(|w| w.fg(fg), |w| w.fg(fg))
    }

    fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
-        self.map_result(|w| w.bg(bg))
+        self.map_result(|w| w.bg(bg), |w| w.bg(bg))
    }

    fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
-        self.map_result(|w| w.attr(attr))
+        self.map_result(|w| w.attr(attr), |w| w.attr(attr))
    }

    fn supports_attr(&self, attr: term::Attr) -> bool {
-        self.map_bool(|w| w.supports_attr(attr))
+        self.map_bool(|w| w.supports_attr(attr), |w| w.supports_attr(attr))
    }

    fn reset(&mut self) -> term::Result<()> {
-        self.map_result(|w| w.reset())
+        self.map_result(|w| w.reset(), |w| w.reset())
    }

    fn supports_reset(&self) -> bool {
-        self.map_bool(|w| w.supports_reset())
+        self.map_bool(|w| w.supports_reset(), |w| w.supports_reset())
    }

    fn supports_color(&self) -> bool {
-        self.map_bool(|w| w.supports_color())
+        self.map_bool(|w| w.supports_color(), |w| w.supports_color())
    }

    fn cursor_up(&mut self) -> term::Result<()> {
-        self.map_result(|w| w.cursor_up())
+        self.map_result(|w| w.cursor_up(), |w| w.cursor_up())
    }

    fn delete_line(&mut self) -> term::Result<()> {
-        self.map_result(|w| w.delete_line())
+        self.map_result(|w| w.delete_line(), |w| w.delete_line())
    }

    fn carriage_return(&mut self) -> term::Result<()> {
-        self.map_result(|w| w.carriage_return())
+        self.map_result(|w| w.carriage_return(), |w| w.carriage_return())
    }

    fn get_ref(&self) -> &W {
        match *self {
            Colored(ref w) => w.get_ref(),
+            Windows(ref w) => w.get_ref(),
            NoColor(ref w) => w,
        }
    }
@@ -395,6 +448,7 @@ impl<W: Send + io::Write> term::Terminal for Writer<W> {
    fn get_mut(&mut self) -> &mut W {
        match *self {
            Colored(ref mut w) => w.get_mut(),
+            Windows(ref mut w) => w.get_mut(),
            NoColor(ref mut w) => w,
        }
    }
@@ -402,7 +456,110 @@ impl<W: Send + io::Write> term::Terminal for Writer<W> {
    fn into_inner(self) -> W {
        match self {
            Colored(w) => w.into_inner(),
+            Windows(w) => w.into_inner(),
            NoColor(w) => w,
        }
    }
 }
+
+impl<W: Send + io::Write> WindowsWriter<W> {
+    fn push(&mut self, opt: WindowsOption) {
+        let pos = self.pos;
+        self.colors.push(WindowsColor { pos: pos, opt: opt });
+    }
+}
+
+impl WindowsWriter<Vec<u8>> {
+    /// Print the contents to the given terminal.
+    pub fn print_stdout(&self, tt: &mut Box<StdoutTerminal>) {
+        let mut last = 0;
+        for col in &self.colors {
+            let _ = tt.write_all(&self.wtr[last..col.pos]);
+            match col.opt {
+                WindowsOption::Foreground(c) => {
+                    let _ = tt.fg(c);
+                }
+                WindowsOption::Background(c) => {
+                    let _ = tt.bg(c);
+                }
+                WindowsOption::Reset => {
+                    let _ = tt.reset();
+                }
+            }
+            last = col.pos;
+        }
+        let _ = tt.write_all(&self.wtr[last..]);
+        let _ = tt.flush();
+    }
+}
+
+impl<W: Send + io::Write> io::Write for WindowsWriter<W> {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        let n = try!(self.wtr.write(buf));
+        self.pos += n;
+        Ok(n)
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        self.wtr.flush()
+    }
+}
+
+impl<W: Send + io::Write> term::Terminal for WindowsWriter<W> {
+    type Output = W;
+
+    fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
+        self.push(WindowsOption::Foreground(fg));
+        Ok(())
+    }
+
+    fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
+        self.push(WindowsOption::Background(bg));
+        Ok(())
+    }
+
+    fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
+        Err(term::Error::NotSupported)
+    }
+
+    fn supports_attr(&self, attr: term::Attr) -> bool {
+        false
+    }
+
+    fn reset(&mut self) -> term::Result<()> {
+        self.push(WindowsOption::Reset);
+        Ok(())
+    }
+
+    fn supports_reset(&self) -> bool {
+        true
+    }
+
+    fn supports_color(&self) -> bool {
+        true
+    }
+
+    fn cursor_up(&mut self) -> term::Result<()> {
+        Err(term::Error::NotSupported)
+    }
+
+    fn delete_line(&mut self) -> term::Result<()> {
+        Err(term::Error::NotSupported)
+    }
+
+    fn carriage_return(&mut self) -> term::Result<()> {
+        Err(term::Error::NotSupported)
+    }
+
+    fn get_ref(&self) -> &W {
+        &self.wtr
+    }
+
+    fn get_mut(&mut self) -> &mut W {
+        &mut self.wtr
+    }
+
+    fn into_inner(self) -> W {
+        self.wtr
+    }
+}
--- a/src/search.rs
+++ b/src/search.rs
@@ -74,14 +74,14 @@ pub struct Searcher<'a, R, W: 'a> {

 /// Options for configuring search.
 #[derive(Clone)]
-struct Options {
-    after_context: usize,
-    before_context: usize,
-    count: bool,
-    eol: u8,
-    invert_match: bool,
-    line_number: bool,
-    text: bool,
+pub struct Options {
+    pub after_context: usize,
+    pub before_context: usize,
+    pub count: bool,
+    pub eol: u8,
+    pub invert_match: bool,
+    pub line_number: bool,
+    pub text: bool,
 }

 impl Default for Options {
@@ -219,14 +219,11 @@ impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {
                        self.print_inverted_matches(upto);
                    }
                } else if matched {
-                    self.match_count += 1;
-                    if !self.opts.count {
-                        let start = self.last_match.start();
-                        let end = self.last_match.end();
-                        self.print_after_context(start);
-                        self.print_before_context(start);
-                        self.print_match(start, end);
-                    }
+                    let start = self.last_match.start();
+                    let end = self.last_match.end();
+                    self.print_after_context(start);
+                    self.print_before_context(start);
+                    self.print_match(start, end);
                }
                if matched {
                    self.inp.pos = self.last_match.end();
@@ -275,11 +272,8 @@ impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {
        debug_assert!(self.opts.invert_match);
        let mut it = IterLines::new(self.opts.eol, self.inp.pos);
        while let Some((start, end)) = it.next(&self.inp.buf[..upto]) {
-            if !self.opts.count {
-                self.print_match(start, end);
-            }
+            self.print_match(start, end);
            self.inp.pos = end;
-            self.match_count += 1;
        }
    }

@@ -325,11 +319,15 @@ impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {

    #[inline(always)]
    fn print_match(&mut self, start: usize, end: usize) {
+        self.match_count += 1;
+        if self.opts.count {
+            return;
+        }
        self.print_separator(start);
        self.count_lines(start);
        self.add_line(end);
        self.printer.matched(
-            self.grep.regex(), &self.path,
+            self.grep.regex(), self.path,
            &self.inp.buf, start, end, self.line_count);
        self.last_printed = end;
        self.after_context_remaining = self.opts.after_context;
@@ -535,7 +533,7 @@ impl InputBuffer {
 ///
 /// Note that this may return both false positives and false negatives.
 #[inline(always)]
-fn is_binary(buf: &[u8]) -> bool {
+pub fn is_binary(buf: &[u8]) -> bool {
    if buf.len() >= 4 && &buf[0..4] == b"%PDF" {
        return true;
    }
@@ -544,7 +542,7 @@ fn is_binary(buf: &[u8]) -> bool {

 /// Count the number of lines in the given buffer.
 #[inline(always)]
-fn count_lines(mut buf: &[u8], eol: u8) -> u64 {
+pub fn count_lines(mut buf: &[u8], eol: u8) -> u64 {
    let mut count = 0;
    while let Some(pos) = memchr(eol, buf) {
        count += 1;
@@ -575,7 +573,7 @@ fn replace_buf(buf: &mut [u8], a: u8, b: u8) {
 /// advance over the positions of each line. We neglect that approach to avoid
 /// the borrow in the search code. (Because the borrow prevents composition
 /// through other mutable methods.)
-struct IterLines {
+pub struct IterLines {
    eol: u8,
    pos: usize,
 }
@@ -585,7 +583,7 @@ impl IterLines {
    ///
    /// The buffer is passed to the `next` method.
    #[inline(always)]
-    fn new(eol: u8, start: usize) -> IterLines {
+    pub fn new(eol: u8, start: usize) -> IterLines {
        IterLines {
            eol: eol,
            pos: start,
@@ -597,7 +595,7 @@ impl IterLines {
    ///
    /// The range returned includes the new line.
    #[inline(always)]
-    fn next(&mut self, buf: &[u8]) -> Option<(usize, usize)> {
+    pub fn next(&mut self, buf: &[u8]) -> Option<(usize, usize)> {
        match memchr(self.eol, &buf[self.pos..]) {
            None => {
                if self.pos < buf.len() {
@@ -689,6 +687,7 @@ mod tests {
    use std::path::Path;

    use grep::{Grep, GrepBuilder};
+    use term::Terminal;

    use printer::Printer;

@@ -747,7 +746,7 @@ fn main() {
                &mut inp, &mut pp, &grep, test_path(), hay(haystack));
            map(searcher).run().unwrap()
        };
-        (count, String::from_utf8(pp.into_inner()).unwrap())
+        (count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
    }

    fn search<F: FnMut(TestSearcher) -> TestSearcher>(
@@ -763,7 +762,7 @@ fn main() {
                &mut inp, &mut pp, &grep, test_path(), hay(haystack));
            map(searcher).run().unwrap()
        };
-        (count, String::from_utf8(pp.into_inner()).unwrap())
+        (count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
    }

    #[test]
@@ -870,7 +869,7 @@ fn main() {
    }

    #[test]
-    fn basic_search() {
+    fn basic_search1() {
        let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s|s);
        assert_eq!(2, count);
        assert_eq!(out, "\
@@ -887,7 +886,6 @@ fn main() {
        assert_eq!(out, "");
    }

-
    #[test]
    fn binary_text() {
        let text = "Sherlock\n\x00Holmes\n";
--- a/src/search_buffer.rs
+++ b/src/search_buffer.rs
@@ -0,0 +1,285 @@
+use std::cmp;
+use std::io;
+use std::path::Path;
+
+use grep::Grep;
+
+use printer::Printer;
+use search::{IterLines, Options, count_lines, is_binary};
+
+pub struct BufferSearcher<'a, W: 'a> {
+    opts: Options,
+    printer: &'a mut Printer<W>,
+    grep: &'a Grep,
+    path: &'a Path,
+    buf: &'a [u8],
+    match_count: u64,
+    line_count: Option<u64>,
+    last_line: usize,
+}
+
+impl<'a, W: Send + io::Write> BufferSearcher<'a, W> {
+    pub fn new(
+        printer: &'a mut Printer<W>,
+        grep: &'a Grep,
+        path: &'a Path,
+        buf: &'a [u8],
+    ) -> BufferSearcher<'a, W> {
+        BufferSearcher {
+            opts: Options::default(),
+            printer: printer,
+            grep: grep,
+            path: path,
+            buf: buf,
+            match_count: 0,
+            line_count: None,
+            last_line: 0,
+        }
+    }
+
+    /// If enabled, searching will print a count instead of each match.
+    ///
+    /// Disabled by default.
+    pub fn count(mut self, yes: bool) -> Self {
+        self.opts.count = yes;
+        self
+    }
+
+    /// Set the end-of-line byte used by this searcher.
+    pub fn eol(mut self, eol: u8) -> Self {
+        self.opts.eol = eol;
+        self
+    }
+
+    /// If enabled, matching is inverted so that lines that *don't* match the
+    /// given pattern are treated as matches.
+    pub fn invert_match(mut self, yes: bool) -> Self {
+        self.opts.invert_match = yes;
+        self
+    }
+
+    /// If enabled, compute line numbers and prefix each line of output with
+    /// them.
+    pub fn line_number(mut self, yes: bool) -> Self {
+        self.opts.line_number = yes;
+        self
+    }
+
+    /// If enabled, search binary files as if they were text.
+    pub fn text(mut self, yes: bool) -> Self {
+        self.opts.text = yes;
+        self
+    }
+
+    #[inline(never)]
+    pub fn run(mut self) -> u64 {
+        let binary_upto = cmp::min(4096, self.buf.len());
+        if !self.opts.text && is_binary(&self.buf[..binary_upto]) {
+            return 0;
+        }
+
+        self.match_count = 0;
+        self.line_count = if self.opts.line_number { Some(0) } else { None };
+        let mut last_end = 0;
+        for m in self.grep.iter(self.buf) {
+            if self.opts.invert_match {
+                self.print_inverted_matches(last_end, m.start());
+            } else {
+                self.print_match(m.start(), m.end());
+            }
+            last_end = m.end();
+        }
+        if self.opts.invert_match {
+            let upto = self.buf.len();
+            self.print_inverted_matches(last_end, upto);
+        }
+        if self.opts.count && self.match_count > 0 {
+            self.printer.path_count(self.path, self.match_count);
+        }
+        self.match_count
+    }
+
+    #[inline(always)]
+    pub fn print_match(&mut self, start: usize, end: usize) {
+        self.match_count += 1;
+        if self.opts.count {
+            return;
+        }
+        self.count_lines(start);
+        self.add_line(end);
+        self.printer.matched(
+            self.grep.regex(), self.path, self.buf,
+            start, end, self.line_count);
+    }
+
+    #[inline(always)]
+    fn print_inverted_matches(&mut self, start: usize, end: usize) {
+        debug_assert!(self.opts.invert_match);
+        let mut it = IterLines::new(self.opts.eol, start);
+        while let Some((s, e)) = it.next(&self.buf[..end]) {
+            self.print_match(s, e);
+        }
+    }
+
+    #[inline(always)]
+    fn count_lines(&mut self, upto: usize) {
+        if let Some(ref mut line_count) = self.line_count {
+            *line_count += count_lines(
+                &self.buf[self.last_line..upto], self.opts.eol);
+            self.last_line = upto;
+        }
+    }
+
+    #[inline(always)]
+    fn add_line(&mut self, line_end: usize) {
+        if let Some(ref mut line_count) = self.line_count {
+            *line_count += 1;
+            self.last_line = line_end;
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::path::Path;
+
+    use grep::{Grep, GrepBuilder};
+    use term::Terminal;
+
+    use printer::Printer;
+
+    use super::BufferSearcher;
+
+    lazy_static! {
+        static ref SHERLOCK: &'static str = "\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+Holmeses, success in the province of detective work must always
+be, to a very large extent, the result of luck. Sherlock Holmes
+can extract a clew from a wisp of straw or a flake of cigar ash;
+but Doctor Watson has to have it taken out for him and dusted,
+and exhibited clearly, with a label attached.\
+";
+        static ref CODE: &'static str = "\
+extern crate snap;
+
+use std::io;
+
+fn main() {
+    let stdin = io::stdin();
+    let stdout = io::stdout();
+
+    // Wrap the stdin reader in a Snappy reader.
+    let mut rdr = snap::Reader::new(stdin.lock());
+    let mut wtr = stdout.lock();
+    io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
+}
+";
+    }
+
+    fn matcher(pat: &str) -> Grep {
+        GrepBuilder::new(pat).build().unwrap()
+    }
+
+    fn test_path() -> &'static Path {
+        &Path::new("/baz.rs")
+    }
+
+    type TestSearcher<'a> = BufferSearcher<'a, Vec<u8>>;
+
+    fn search<F: FnMut(TestSearcher) -> TestSearcher>(
+        pat: &str,
+        haystack: &str,
+        mut map: F,
+    ) -> (u64, String) {
+        let mut pp = Printer::new(vec![], false).with_filename(true);
+        let grep = GrepBuilder::new(pat).build().unwrap();
+        let count = {
+            let searcher = BufferSearcher::new(
+                &mut pp, &grep, test_path(), haystack.as_bytes());
+            map(searcher).run()
+        };
+        (count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
+    }
+
+    #[test]
+    fn basic_search() {
+        let (count, out) = search("Sherlock", &*SHERLOCK, |s|s);
+        assert_eq!(2, count);
+        assert_eq!(out, "\
+/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
+/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes
+");
+    }
+
+    #[test]
+    fn binary() {
+        let text = "Sherlock\n\x00Holmes\n";
+        let (count, out) = search("Sherlock|Holmes", text, |s|s);
+        assert_eq!(0, count);
+        assert_eq!(out, "");
+    }
+
+
+    #[test]
+    fn binary_text() {
+        let text = "Sherlock\n\x00Holmes\n";
+        let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true));
+        assert_eq!(2, count);
+        assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n");
+    }
+
+    #[test]
+    fn line_numbers() {
+        let (count, out) = search(
+            "Sherlock", &*SHERLOCK, |s| s.line_number(true));
+        assert_eq!(2, count);
+        assert_eq!(out, "\
+/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
+/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes
+");
+    }
+
+    #[test]
+    fn count() {
+        let (count, out) = search(
+            "Sherlock", &*SHERLOCK, |s| s.count(true));
+        assert_eq!(2, count);
+        assert_eq!(out, "/baz.rs:2\n");
+    }
+
+    #[test]
+    fn invert_match() {
+        let (count, out) = search(
+            "Sherlock", &*SHERLOCK, |s| s.invert_match(true));
+        assert_eq!(4, count);
+        assert_eq!(out, "\
+/baz.rs:Holmeses, success in the province of detective work must always
+/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash;
+/baz.rs:but Doctor Watson has to have it taken out for him and dusted,
+/baz.rs:and exhibited clearly, with a label attached.
+");
+    }
+
+    #[test]
+    fn invert_match_line_numbers() {
+        let (count, out) = search("Sherlock", &*SHERLOCK, |s| {
+            s.invert_match(true).line_number(true)
+        });
+        assert_eq!(4, count);
+        assert_eq!(out, "\
+/baz.rs:2:Holmeses, success in the province of detective work must always
+/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash;
+/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted,
+/baz.rs:6:and exhibited clearly, with a label attached.
+");
+    }
+
+    #[test]
+    fn invert_match_count() {
+        let (count, out) = search("Sherlock", &*SHERLOCK, |s| {
+            s.invert_match(true).count(true)
+        });
+        assert_eq!(4, count);
+        assert_eq!(out, "/baz.rs:4\n");
+    }
+}
--- a/src/sys.rs
+++ b/src/sys.rs
@@ -1,19 +1,19 @@
 /*!
 This io module contains various platform specific functions for detecting
-how xrep is being used. e.g., Is stdin being piped into it? Is stdout being
+how ripgrep is being used. e.g., Is stdin being piped into it? Is stdout being
 redirected to a file? etc... We use this information to tweak various default
 configuration parameters such as colors and match formatting.
 */

-use libc;
-
 #[cfg(unix)]
 pub fn stdin_is_atty() -> bool {
+    use libc;
    0 < unsafe { libc::isatty(libc::STDIN_FILENO) }
 }

 #[cfg(unix)]
 pub fn stdout_is_atty() -> bool {
+    use libc;
    0 < unsafe { libc::isatty(libc::STDOUT_FILENO) }
 }
Author	SHA1	Message	Date
Andrew Gallant	96e87ab738	update distributable to include readme and license	2016-09-08 16:21:37 -04:00
Andrew Gallant	a744ec133d	Rename xrep to ripgrep.	2016-09-08 16:15:44 -04:00
Andrew Gallant	0042dce949	Hack in Windows console coloring. The code has suffered and needs refactoring/commenting. BUT... IT WORKS!	2016-09-07 21:54:28 -04:00
Andrew Gallant	ca058d7584	Add support for memory maps. I though plain `read` had usurped them, but when searching a very small number of files, mmaps can be around 20% faster on Linux. It'd be really unfortunate to leave that on the table. Mmap searching doesn't support contexts yet, but we probably don't really care. And duplicating that logic doesn't sound fun. Without contexts, mmap searching is delightfully simple.	2016-09-06 21:47:33 -04:00
Andrew Gallant	af3b56a623	Fix grep match iterator.	2016-09-06 21:45:41 -04:00
Andrew Gallant	5938bed339	Add support for printing column numbers.	2016-09-06 19:50:27 -04:00
Andrew Gallant	feff1849c8	Tweak colors.	2016-09-06 19:35:52 -04:00
Andrew Gallant	9948e0ca07	Only create the Grep searcher once.	2016-09-06 19:33:19 -04:00
Andrew Gallant	fd3e5069b6	Fix required literal handling and add debug prints. In particular, if we had an inner literal and were doing a case insensitive search, then the literals are dropped because we previously only allowed a single inner literal to have an effect. Now we allow alternations of inner literals, but still don't quite take full advantage.	2016-09-06 19:33:03 -04:00
Andrew Gallant	0891b4a3c0	update appveyor	2016-09-05 22:01:53 -04:00
Andrew Gallant	af48aaa647	another try	2016-09-05 21:57:57 -04:00