Initial cut at a benchmark suite for CLI search tools.

Bump default thread count to 8.
Add alternative compile strategy (in a comment).
2025-07-27 10:11:58 -07:00 · 2016-09-11 01:05:36 -04:00 · 2016-09-11 00:42:39 -04:00 · 2016-09-11 00:42:30 -04:00 · 2016-09-11 00:42:19 -04:00 · 2016-09-10 01:35:44 -04:00
25 changed files with 2938 additions and 416 deletions
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,37 +1,49 @@
-#language: rust
-#rust:
-#  - stable
-#  - beta
-#  - nightly
-#script:
-#  - cargo build --verbose
-#  - cargo doc
-#  - cargo test --verbose
-#  - if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
-#      cargo bench --verbose;
-#    fi
-
 language: rust
 cache: cargo

 env:
  global:
-    - PROJECT_NAME=xrep
+    - PROJECT_NAME=ripgrep
 matrix:
  include:
-    # Nightly channel
-    - os: osx
-      rust: nightly
-      env: TARGET=i686-apple-darwin
-    - os: osx
-      rust: nightly
-      env: TARGET=x86_64-apple-darwin
+    # Nightly channel.
+    # (All *nix releases are done on the nightly channel to take advantage
+    # of the regex library's multiple pattern SIMD search.)
    - os: linux
      rust: nightly
      env: TARGET=i686-unknown-linux-musl
    - os: linux
      rust: nightly
      env: TARGET=x86_64-unknown-linux-musl
+    - os: linux
+      rust: nightly
+      env: TARGET=x86_64-unknown-linux-gnu
+    - os: osx
+      rust: nightly
+      env: TARGET=i686-apple-darwin
+    - os: osx
+      rust: nightly
+      env: TARGET=x86_64-apple-darwin
+    # Beta channel.
+    - os: linux
+      rust: beta
+      env: TARGET=x86_64-unknown-linux-musl
+    - os: linux
+      rust: beta
+      env: TARGET=x86_64-unknown-linux-gnu
+    - os: osx
+      rust: beta
+      env: TARGET=x86_64-apple-darwin
+    # Minimum Rust supported channel.
+    - os: linux
+      rust: 1.9.0
+      env: TARGET=x86_64-unknown-linux-musl
+    - os: linux
+      rust: 1.9.0
+      env: TARGET=x86_64-unknown-linux-gnu
+    - os: osx
+      rust: 1.9.0
+      env: TARGET=x86_64-apple-darwin

 before_install:
  - export PATH="$PATH:$HOME/.cargo/bin"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,14 +1,14 @@
 [package]
 publish = false
-name = "xrep"
+name = "ripgrep"
 version = "0.1.0"  #:version
 authors = ["Andrew Gallant <jamslam@gmail.com>"]
 description = """
 Line oriented search tool using Rust's regex library.
 """
-documentation = "https://github.com/BurntSushi/xrep"
-homepage = "https://github.com/BurntSushi/xrep"
-repository = "https://github.com/BurntSushi/xrep"
+documentation = "https://github.com/BurntSushi/ripgrep"
+homepage = "https://github.com/BurntSushi/ripgrep"
+repository = "https://github.com/BurntSushi/ripgrep"
 readme = "README.md"
 keywords = ["regex", "grep", "egrep", "search", "pattern"]
 license = "Unlicense/MIT"
@@ -16,7 +16,11 @@ license = "Unlicense/MIT"
 [[bin]]
 bench = false
 path = "src/main.rs"
-name = "xrep"
+name = "rg"
+
+[[test]]
+name = "integration"
+path = "tests/tests.rs"

 [dependencies]
 crossbeam = "0.2"
--- a/appveyor.yml
+++ b/appveyor.yml
@@ -1,6 +1,6 @@
 environment:
  global:
-    PROJECT_NAME: xrep
+    PROJECT_NAME: ripgrep
  matrix:
    # Nightly channel
    - TARGET: i686-pc-windows-gnu
@@ -32,16 +32,14 @@ build: false
 # Equivalent to Travis' `script` phase
 # TODO modify this phase as you see fit
 test_script:
-  - cargo build --verbose
-  - cargo test
+  - cargo test --verbose

 before_deploy:
  # Generate artifacts for release
-  - SET RUSTFLAGS="-C target-feature=+ssse3"
-  - cargo build --release --features simd-accel
+  # TODO(burntsushi): How can we enable SSSE3 on Windows?
+  - cargo build --release
  - mkdir staging
-  # TODO update this part to copy the artifacts that make sense for your project
-  - copy target\release\xrep.exe staging
+  - copy target\release\rg.exe staging
  - cd staging
    # release zipfile will look like 'rust-everywhere-v1.2.3-x86_64-pc-windows-msvc'
  - 7z a ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip *
--- a/benches/bench.rs
+++ b/benches/bench.rs
@@ -1,3 +1,7 @@
+/*!
+This module benchmarks the glob implementation. For benchmarks on the ripgrep
+tool itself, see the benchsuite directory.
+*/
 #![feature(test)]

 extern crate glob;
--- a/918
+++ b/918
@@ -0,0 +1,918 @@
+#!/usr/bin/env python
+
+'''
+benchsuite is a benchmark runner for comparing command line search tools.
+'''
+
+import argparse
+import csv
+import os
+import os.path as path
+from multiprocessing import cpu_count
+import re
+import statistics
+import subprocess
+import sys
+import time
+
+# Some constants for identifying the corpora we use to run tests.
+# We establish two very different kinds of corpora: a small number of large
+# files and a large number of small files. These are vastly different use cases
+# not only because of their performance characteristics, but also the
+# strategies used to increase the relevance of results returned.
+
+SUBTITLES_DIR = 'subtitles'
+SUBTITLES_EN_NAME = 'OpenSubtitles2016.raw.en'
+SUBTITLES_EN_NAME_GZ = '%s.gz' % SUBTITLES_EN_NAME
+SUBTITLES_EN_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.en.gz'
+SUBTITLES_RU_NAME = 'OpenSubtitles2016.raw.ru'
+SUBTITLES_RU_NAME_GZ = '%s.gz' % SUBTITLES_RU_NAME
+SUBTITLES_RU_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.ru.gz'
+
+LINUX_DIR = 'linux'
+LINUX_CLONE = 'git://github.com/BurntSushi/linux'
+
+
+def bench_linux_literal_default(suite_dir):
+    '''
+    Benchmark the speed of a literal using *default* settings.
+
+    This is a purposefully unfair benchmark for use in performance
+    analysis, but it is pedagogically useful.
+    '''
+    require(suite_dir, 'linux')
+    cwd = path.join(suite_dir, LINUX_DIR)
+    pat = 'PM_RESUME'
+
+    def mkcmd(*args, **kwargs):
+        kwargs['cwd'] = cwd
+        return Command(*args, **kwargs)
+
+    # N.B. This is a purposefully unfair benchmark for illustrative purposes
+    # of how the default modes for each search tool differ.
+    return Benchmark(pattern=pat, commands=[
+        mkcmd('rg', ['rg', pat]),
+        mkcmd('ag', ['ag', pat]),
+        # ucg reports the exact same matches as ag and rg even though it
+        # doesn't read gitignore files. Instead, it has a file whitelist
+        # that happens to match up exactly with the gitignores for this search.
+        mkcmd('ucg', ['ucg', pat]),
+        mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'C'}),
+        mkcmd('pt', ['pt', pat]),
+        # sift reports an extra line here for a binary file matched.
+        mkcmd('sift', ['sift', pat]),
+    ])
+
+
+def bench_linux_literal(suite_dir):
+    '''
+    Benchmark the speed of a literal, attempting to be fair.
+
+    This tries to use the minimum set of options available in all tools
+    to test how fast they are. For example, it makes sure there is no
+    case insensitive matching and that line numbers are computed.
+    '''
+    require(suite_dir, 'linux')
+    cwd = path.join(suite_dir, LINUX_DIR)
+    pat = 'PM_RESUME'
+
+    def mkcmd(*args, **kwargs):
+        kwargs['cwd'] = cwd
+        return Command(*args, **kwargs)
+
+    return Benchmark(pattern=pat, commands=[
+        mkcmd('rg', ['rg', '-n', pat]),
+        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
+        mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
+        mkcmd('ag', ['ag', '-s', pat]),
+        mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
+        mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
+        mkcmd('git grep', [
+            'git', 'grep', '-I', '-n', pat,
+        ], env={'LC_ALL': 'C'}),
+        mkcmd('pt', ['pt', pat]),
+        mkcmd('sift', [
+            'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
+        ]),
+    ])
+
+
+def bench_linux_literal_casei(suite_dir):
+    '''
+    Benchmark the speed of a case insensitive literal search.
+
+    This is like the linux_literal benchmark, except we ask the
+    search tools to do case insensitive search.
+    '''
+    require(suite_dir, 'linux')
+    cwd = path.join(suite_dir, LINUX_DIR)
+    pat = 'PM_RESUME'
+
+    def mkcmd(*args, **kwargs):
+        kwargs['cwd'] = cwd
+        return Command(*args, **kwargs)
+
+    return Benchmark(pattern=pat, commands=[
+        mkcmd('rg', ['rg', '-n', '-i', pat]),
+        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
+        mkcmd('rg-novcs-mmap', [
+            'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
+        ]),
+        mkcmd('ag', ['ag', '-i', pat]),
+        mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-i', pat]),
+        mkcmd('ucg', ['ucg', '-i', pat]),
+        mkcmd('git grep', [
+            'git', 'grep', '-I', '-n', '-i', pat,
+        ], env={'LC_ALL': 'C'}),
+        # sift yields more matches than it should here. Specifically, it gets
+        # matches in Module.symvers and System.map in the repo root. Both of
+        # those files show up in the repo root's .gitignore file.
+        mkcmd('sift', [
+            'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-i', pat,
+        ]),
+    ])
+
+
+def bench_linux_re_literal_suffix(suite_dir):
+    '''
+    Benchmark the speed of a literal inside a regex.
+
+    This, for example, inhibits a prefix byte optimization used
+    inside of Go's regex engine (relevant for sift and pt).
+    '''
+    require(suite_dir, 'linux')
+    cwd = path.join(suite_dir, LINUX_DIR)
+    pat = '[A-Z]+_RESUME'
+
+    def mkcmd(*args, **kwargs):
+        kwargs['cwd'] = cwd
+        return Command(*args, **kwargs)
+
+    return Benchmark(pattern=pat, commands=[
+        mkcmd('rg', ['rg', '-n', pat]),
+        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
+        mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
+        mkcmd('ag', ['ag', '-s', pat]),
+        mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
+        mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
+        mkcmd(
+            'git grep',
+            ['git', 'grep', '-E', '-I', '-n', pat],
+            env={'LC_ALL': 'C'},
+        ),
+        mkcmd('sift', [
+            'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
+        ]),
+    ])
+
+
+def bench_linux_word(suite_dir):
+    '''
+    Benchmark use of the -w ("match word") flag in each tool.
+
+    sift has a lot of trouble with this because it forces it into Go's
+    regex engine by surrounding the pattern with \b assertions.
+    '''
+    require(suite_dir, 'linux')
+    cwd = path.join(suite_dir, LINUX_DIR)
+    pat = 'PM_RESUME'
+
+    def mkcmd(*args, **kwargs):
+        kwargs['cwd'] = cwd
+        return Command(*args, **kwargs)
+
+    return Benchmark(pattern=pat, commands=[
+        mkcmd('rg', ['rg', '-n', '-w', pat]),
+        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-w', pat]),
+        mkcmd('rg-novcs-mmap', [
+            'rg', '--mmap', '--no-ignore', '-n', '-w', pat,
+        ]),
+        mkcmd('ag', ['ag', '-s', '-w', pat]),
+        mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', '-w', pat]),
+        mkcmd('ucg', ['ucg', '--nosmart-case', '-w', pat]),
+        mkcmd(
+            'git grep',
+            ['git', 'grep', '-E', '-I', '-n', '-w', pat],
+            env={'LC_ALL': 'C'},
+        ),
+        mkcmd('sift', [
+            'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-w', pat,
+        ]),
+    ])
+
+
+def bench_linux_unicode_greek(suite_dir):
+    '''
+    Benchmark matching of a Unicode category.
+
+    Only three tools (ripgrep, sift and pt) support this.
+    '''
+    require(suite_dir, 'linux')
+    cwd = path.join(suite_dir, LINUX_DIR)
+    pat = r'\p{Greek}'
+
+    def mkcmd(*args, **kwargs):
+        kwargs['cwd'] = cwd
+        return Command(*args, **kwargs)
+
+    return Benchmark(pattern=pat, commands=[
+        mkcmd('rg', ['rg', '-n', pat]),
+        # sift tries to search a bunch of PDF files and clutters up the
+        # results, even though --binary-skip is provided. They are excluded
+        # here explicitly, but don't have a measurable impact on performance.
+        mkcmd('sift', [
+            'sift', '-n', '--binary-skip',
+            '--exclude-files', '.*',
+            '--exclude-files', '*.pdf',
+            pat,
+        ]),
+    ])
+
+
+def bench_linux_unicode_greek_casei(suite_dir):
+    '''
+    Benchmark matching of a Unicode category, case insensitively.
+
+    Only ripgrep gets this right (and it's still fast).
+    '''
+    require(suite_dir, 'linux')
+    cwd = path.join(suite_dir, LINUX_DIR)
+    pat = r'\p{Greek}'
+
+    def mkcmd(*args, **kwargs):
+        kwargs['cwd'] = cwd
+        return Command(*args, **kwargs)
+
+    return Benchmark(pattern=pat, commands=[
+        mkcmd('rg', ['rg', '-n', '-i', pat]),
+        # sift tries to search a bunch of PDF files and clutters up the
+        # results, even though --binary-skip is provided. They are excluded
+        # here explicitly, but don't have a measurable impact on performance.
+        mkcmd('sift', [
+            'sift', '-n', '--binary-skip',
+            '--exclude-files', '.*',
+            '--exclude-files', '*.pdf',
+            pat,
+        ]),
+    ])
+
+
+def bench_linux_unicode_word(suite_dir):
+    '''
+    Benchmark Unicode aware \w character class.
+
+    Only ripgrep and git-grep (with LC_ALL=en_US.UTF-8) actually get
+    this right. Everything else uses the standard ASCII interpretation
+    of \w.
+    '''
+    require(suite_dir, 'linux')
+    cwd = path.join(suite_dir, LINUX_DIR)
+    pat = r'\wAh'
+
+    def mkcmd(*args, **kwargs):
+        kwargs['cwd'] = cwd
+        return Command(*args, **kwargs)
+
+    return Benchmark(pattern=pat, commands=[
+        mkcmd('rg', ['rg', '-n', pat]),
+        mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
+        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
+        mkcmd('rg-novcs-mmap', [
+            'rg', '--mmap', '--no-ignore', '-n', pat,
+        ]),
+        mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
+        mkcmd('ag-novcs (no Unicode)', [
+            'ag', '--skip-vcs-ignores', '-s', pat,
+        ]),
+        mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
+        mkcmd(
+            'git grep',
+            ['git', 'grep', '-E', '-I', '-n', pat],
+            env={'LC_ALL': 'en_US.UTF-8'},
+        ),
+        mkcmd(
+            'git grep (no Unicode)',
+            ['git', 'grep', '-E', '-I', '-n', pat],
+            env={'LC_ALL': 'C'},
+        ),
+        mkcmd('sift (no Unicode)', [
+            'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
+        ]),
+    ])
+
+
+def bench_linux_no_literal(suite_dir):
+    '''
+    Benchmark a regex that defeats all literal optimizations.
+
+    Most search patterns have some kind of literal in them, which
+    typically permits searches to take some shortcuts. Therefore, the
+    applicability of this benchmark is somewhat suspicious, but the
+    suite wouldn't feel complete without it.
+    '''
+    require(suite_dir, 'linux')
+    cwd = path.join(suite_dir, LINUX_DIR)
+    pat = r'\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}'
+
+    def mkcmd(*args, **kwargs):
+        kwargs['cwd'] = cwd
+        return Command(*args, **kwargs)
+
+    return Benchmark(pattern=pat, commands=[
+        mkcmd('rg', ['rg', '-n', pat]),
+        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
+        mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
+        mkcmd('rg-novcs (no Unicode)', [
+            'rg', '--no-ignore', '-n', '(?-u)' + pat,
+        ]),
+        mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
+        mkcmd('ag-novcs (no Unicode)', [
+            'ag', '--skip-vcs-ignores', '-s', pat,
+        ]),
+        mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
+        mkcmd(
+            'git grep',
+            ['git', 'grep', '-E', '-I', '-n', pat],
+            env={'LC_ALL': 'en_US.UTF-8'},
+        ),
+        mkcmd(
+            'git grep (no Unicode)',
+            ['git', 'grep', '-E', '-I', '-n', pat],
+            env={'LC_ALL': 'C'},
+        ),
+        mkcmd('sift (no Unicode)', [
+            'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
+        ]),
+    ])
+
+
+def bench_linux_alternates(suite_dir):
+    '''
+    Benchmark a small alternation of literals.
+
+    sift doesn't make the cut. It's more than 10x slower than the next
+    fastest result. The slowdown is likely because the Go regexp engine
+    doesn't do any literal optimizations for this case (there is no
+    common leading byte).
+    '''
+    require(suite_dir, 'linux')
+    cwd = path.join(suite_dir, LINUX_DIR)
+    pat = 'ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT'
+
+    def mkcmd(*args, **kwargs):
+        kwargs['cwd'] = cwd
+        return Command(*args, **kwargs)
+
+    return Benchmark(pattern=pat, commands=[
+        mkcmd('rg', ['rg', '-n', pat]),
+        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
+        mkcmd('rg-novcs-mmap', [
+            'rg', '--mmap', '--no-ignore', '-n', pat,
+        ]),
+        mkcmd('ag', ['ag', '-s', pat]),
+        mkcmd('ag-novcs', [
+            'ag', '--skip-vcs-ignores', '-s', pat,
+        ]),
+        mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
+        mkcmd(
+            'git grep',
+            ['git', 'grep', '-E', '-I', '-n', pat],
+            env={'LC_ALL': 'C'},
+        ),
+    ])
+
+
+def bench_linux_alternates_casei(suite_dir):
+    'Benchmark a small alternation of literals case insensitively.'
+    require(suite_dir, 'linux')
+    cwd = path.join(suite_dir, LINUX_DIR)
+    pat = 'ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT'
+
+    def mkcmd(*args, **kwargs):
+        kwargs['cwd'] = cwd
+        return Command(*args, **kwargs)
+
+    return Benchmark(pattern=pat, commands=[
+        mkcmd('rg', ['rg', '-n', '-i', pat]),
+        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
+        mkcmd('rg-novcs-mmap', [
+            'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
+        ]),
+        mkcmd('ag', ['ag', '-i', pat]),
+        mkcmd('ag-novcs', [
+            'ag', '--skip-vcs-ignores', '-i', pat,
+        ]),
+        mkcmd('ucg', ['ucg', '-i', pat]),
+        mkcmd(
+            'git grep',
+            ['git', 'grep', '-E', '-I', '-n', '-i', pat],
+            env={'LC_ALL': 'C'},
+        ),
+    ])
+
+
+# BREADCRUMBS(burntsushi): We should benchmark an alternation for `linux` as
+# well.
+
+def bench_sherlock(suite_dir):
+    'TODO: Fix this and add more single file benchmarks.'
+    require(suite_dir, 'subtitles-en')
+    en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME)
+    pat = 'Sherlock'
+
+    return Benchmark(pattern=pat, commands=[
+        Command('rg', ['rg', pat, en]),
+        Command('grep', ['grep', '-a', pat, en])
+    ])
+
+
+class MissingDependencies(Exception):
+    '''
+    A missing dependency exception.
+
+    This exception occurs when running a benchmark that requires a
+    particular corpus that isn't available.
+
+    :ivar list(str) missing_names:
+        A list of missing dependency names. These names correspond to
+        names that can be used with the --download flag.
+    '''
+    def __init__(self, missing_names):
+        self.missing_names = missing_names
+
+    def __str__(self):
+        return 'MissingDependency(%s)' % repr(self.missing_names)
+
+
+class Benchmark(object):
+    '''
+    A single benchmark corresponding to a grouping of commands.
+
+    The main purpose of a benchmark is to compare the performance
+    characteristics of a group of commands.
+    '''
+
+    def __init__(self, name=None, pattern=None, commands=None,
+                 warmup_count=1, count=3, line_count=True):
+        '''
+        Create a single benchmark.
+
+        A single benchmark is composed of a set of commands that are
+        benchmarked and compared against one another. A benchmark may
+        have multiple commands that use the same search tool (but
+        probably should have something differentiating them).
+
+        The grouping of commands is a purely human driven process.
+
+        By default, the output of every command is sent to /dev/null.
+        Other types of behavior are available via the methods defined
+        on this benchmark.
+
+        :param str name:
+            A human readable string denoting the name of this
+            benchmark.
+        :param str pattern:
+            The pattern that is used in search.
+        :param list(Command) commands:
+            A list of commands to initialize this benchmark with. More
+            commands may be added before running the benchmark.
+        :param int warmup_count:
+            The number of times to run each command before recording
+            samples.
+        :param int count:
+            The number of samples to collect from each command.
+        :param bool line_count:
+            When set, the lines of each search are counted and included
+            in the samples produced.
+        '''
+        self.name = name
+        self.pattern = pattern
+        self.commands = commands or []
+        self.warmup_count = warmup_count
+        self.count = count
+        self.line_count = line_count
+
+    def run(self):
+        '''
+        Runs this benchmark and returns the results.
+
+        :rtype: Result
+        '''
+        result = Result(self)
+        for cmd in self.commands:
+            # Do a warmup first.
+            for _ in range(self.warmup_count):
+                self.run_one(cmd)
+            for _ in range(self.count):
+                result.add(cmd, **self.run_one(cmd))
+        return result
+
+    def run_one(self, cmd):
+        '''
+        Runs the given command exactly once.
+
+        Returns an object that includes the time taken by the command.
+        If this benchmark was configured to count the number of lines
+        returned, then the line count is also returned.
+
+        :param Command cmd: The command to run.
+        :returns:
+            A dict with two fields, duration and line_count.
+            The duration is in seconds, with fractional milliseconds,
+            and is guaranteed to be available. The line_count is set
+            to None unless line counting is enabled, in which case,
+            it is the number of lines in the search output.
+        :rtype: int
+        '''
+        cmd.kwargs['stderr'] = subprocess.DEVNULL
+        if self.line_count:
+            cmd.kwargs['stdout'] = subprocess.PIPE
+        else:
+            cmd.kwargs['stdout'] = subprocess.DEVNULL
+
+        start = time.time()
+        completed = cmd.run()
+        end = time.time()
+
+        line_count = None
+        if self.line_count:
+            line_count = completed.stdout.count(b'\n')
+        return {
+            'duration': end - start,
+            'line_count': line_count,
+        }
+
+
+class Result(object):
+    '''
+    The result of running a benchmark.
+
+    Benchmark results consist of a set of samples, where each sample
+    corresponds to a single run of a single command in the benchmark.
+    Various statistics can be computed from these samples such as mean
+    and standard deviation.
+    '''
+    def __init__(self, benchmark):
+        '''
+        Create a new set of results, initially empty.
+
+        :param Benchmarl benchmark:
+            The benchmark that produced these results.
+        '''
+        self.benchmark = benchmark
+        self.samples = []
+
+    def add(self, cmd, duration, line_count=None):
+        '''
+        Add a new sample to this result set.
+
+        :param Command cmd:
+            The command that produced this sample.
+        :param int duration:
+            The duration, in milliseconds, that the command took to
+            run.
+        :param int line_count:
+            The number of lines in the search output. This is optional.
+        '''
+        self.samples.append({
+            'cmd': cmd,
+            'duration': duration,
+            'line_count': line_count,
+        })
+
+    def fastest_sample(self):
+        '''
+        Returns the fastest recorded sample.
+        '''
+        return min(self.samples, key=lambda s: s['duration'])
+
+    def fastest_cmd(self):
+        '''
+        Returns the fastest command according to distribution.
+        '''
+        means = []
+        for cmd in self.benchmark.commands:
+            mean, _ = self.distribution_for(cmd)
+            means.append((cmd, mean))
+        return min(means, key=lambda tup: tup[1])[0]
+
+    def samples_for(self, cmd):
+        'Returns an iterable of samples for cmd'
+        yield from (s for s in self.samples if s['cmd'].name == cmd.name)
+
+    def line_counts_for(self, cmd):
+        '''
+        Returns the line counts recorded for each command.
+
+        :returns:
+            A dictionary from command name to a set of line
+            counts recorded.
+        '''
+        return {s['line_count'] for s in self.samples_for(cmd)
+                                if s['line_count'] is not None}
+
+    def distribution_for(self, cmd):
+        '''
+        Returns the distribution (mean +/- std) of the given command.
+
+        :rtype: (float, float)
+        :returns:
+            A tuple containing the mean and standard deviation, in that
+            order.
+        '''
+        mean = statistics.mean(
+            s['duration'] for s in self.samples_for(cmd))
+        stdev = statistics.stdev(
+            s['duration'] for s in self.samples_for(cmd))
+        return mean, stdev
+
+
+class Command(object):
+    def __init__(self, name, cmd, *args, **kwargs):
+        '''
+        Create a new command that is run as part of a benchmark.
+
+        *args and **kwargs are passed directly to ``subprocess.run``.
+        An exception to this is stdin/stdout/stderr. Output
+        redirection is completely controlled by the benchmark harness.
+        Trying to set them here will trigger an assert.
+
+        :param str name:
+            The human readable name of this command. This is
+            particularly useful if the same search tool is used
+            multiple times in the same benchmark with different
+            arguments.
+        :param list(str) cmd:
+            The command to run as a list of arguments (including the
+            command name itself).
+        '''
+        assert 'stdin' not in kwargs
+        assert 'stdout' not in kwargs
+        assert 'stderr' not in kwargs
+        self.name = name
+        self.cmd = cmd
+        self.args = args
+        self.kwargs = kwargs
+
+    def run(self):
+        '''
+        Runs this command and returns its status.
+
+        :rtype: subprocess.CompletedProcess
+        '''
+        return subprocess.run(self.cmd, *self.args, **self.kwargs)
+
+
+def eprint(*args, **kwargs):
+    'Like print, but to stderr.'
+    kwargs['file'] = sys.stderr
+    print(*args, **kwargs)
+
+
+def run_cmd(cmd, *args, **kwargs):
+    '''
+    Print the command to stderr and run it.
+
+    If the command fails, throw a traceback.
+    '''
+    eprint('# %s' % ' '.join(cmd))
+    kwargs['check'] = True
+    return subprocess.run(cmd, *args, **kwargs)
+
+
+def require(suite_dir, *names):
+    '''
+    Declare a dependency on the given names for a benchmark.
+
+    If any dependency doesn't exist, then fail with an error message.
+    '''
+    errs = []
+    for name in names:
+        fun_name = name.replace('-', '_')
+        if not globals()['has_%s' % fun_name](suite_dir):
+            errs.append(name)
+    if len(errs) > 0:
+        raise MissingDependencies(errs)
+
+
+def download_linux(suite_dir):
+    'Download and build the Linux kernel.'
+    checkout_dir = path.join(suite_dir, LINUX_DIR)
+    if not os.path.isdir(checkout_dir):
+        # Clone from my fork so that we always get the same corpus *and* still
+        # do a shallow clone. Shallow clones are much much cheaper than full
+        # clones.
+        run_cmd(['git', 'clone', '--depth', '1', LINUX_CLONE, checkout_dir])
+    # We want to build the kernel because the process of building it produces
+    # a lot of junk in the repository that a search tool probably shouldn't
+    # touch.
+    if not os.path.exists(path.join(checkout_dir, 'vmlinux')):
+        eprint('# Building Linux kernel...')
+        run_cmd(['make', 'defconfig'], cwd=checkout_dir)
+        run_cmd(['make', '-j', str(cpu_count())], cwd=checkout_dir)
+
+
+def has_linux(suite_dir):
+    'Returns true if we believe the Linux kernel is built.'
+    checkout_dir = path.join(suite_dir, LINUX_DIR)
+    return path.exists(path.join(checkout_dir, 'vmlinux'))
+
+
+def download_subtitles_en(suite_dir):
+    'Download and decompress English subtitles.'
+    subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
+    en_path_gz = path.join(subtitle_dir, SUBTITLES_EN_NAME_GZ)
+    en_path = path.join(subtitle_dir, SUBTITLES_EN_NAME)
+
+    if not os.path.isdir(subtitle_dir):
+        os.makedirs(subtitle_dir)
+    if not os.path.exists(en_path):
+        if not os.path.exists(en_path_gz):
+            run_cmd(['curl', '-LO', SUBTITLES_EN_URL], cwd=subtitle_dir)
+        run_cmd(['gunzip', en_path_gz], cwd=subtitle_dir)
+
+
+def has_subtitles_en(suite_dir):
+    'Returns true if English subtitles have been downloaded.'
+    subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
+    return path.exists(path.join(subtitle_dir, SUBTITLES_EN_NAME))
+
+
+def download_subtitles_ru(suite_dir):
+    'Download and decompress Russian subtitles.'
+    subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
+    ru_path_gz = path.join(subtitle_dir, SUBTITLES_RU_NAME_GZ)
+    ru_path = path.join(subtitle_dir, SUBTITLES_RU_NAME)
+
+    if not os.path.isdir(subtitle_dir):
+        os.makedirs(subtitle_dir)
+    if not os.path.exists(ru_path):
+        if not os.path.exists(ru_path_gz):
+            run_cmd(['curl', '-LO', SUBTITLES_RU_URL], cwd=subtitle_dir)
+        run_cmd(['gunzip', ru_path_gz], cwd=subtitle_dir)
+
+
+def has_subtitles_ru(suite_dir):
+    'Returns true if Russian subtitles have been downloaded.'
+    subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
+    return path.exists(path.join(subtitle_dir, SUBTITLES_RU_NAME))
+
+
+def download(suite_dir, choices):
+    '''
+    Download choices into suite_dir.
+
+    Specifically, choices specifies a list of corpora to fetch.
+
+    :param str suite_dir:
+        The directory in which to download corpora.
+    :param list(str) choices:
+        A list of corpora to download. Available choices are:
+        all, linux, subtitles-en, subtitles-ru.
+    '''
+    for choice in args.download:
+        if choice == 'linux':
+            download_linux(suite_dir)
+        elif choice == 'subtitles-en':
+            download_subtitles_en(suite_dir)
+        elif choice == 'subtitles-ru':
+            download_subtitles_ru(suite_dir)
+        elif choice == 'all':
+            download_linux(suite_dir)
+            download_subtitles_en(suite_dir)
+            download_subtitles_ru(suite_dir)
+        else:
+            eprint('Unrecognized download choice: %s' % choice)
+            sys.exit(1)
+
+
+def collect_benchmarks(suite_dir, filter_pat=None):
+    '''
+    Return an iterable of all runnable benchmarks.
+
+    :param str suite_dir:
+        The directory containing corpora.
+    :param str filter_pat:
+        A single regular expression that is used to filter benchmarks
+        by their name. When not specified, all benchmarks are run.
+    :returns:
+        An iterable over all runnable benchmarks. If a benchmark
+        requires corpora that are missing, then a log message is
+        emitted to stderr and it is not yielded.
+    '''
+    for fun in sorted(globals()):
+        if not fun.startswith('bench_'):
+            continue
+        name = re.sub('^bench_', '', fun)
+        if filter_pat is not None and not re.search(filter_pat, name):
+            continue
+        try:
+            benchmark = globals()[fun](suite_dir)
+        except MissingDependencies as e:
+            eprint(
+                'missing: %s, skipping benchmark %s (try running with: %s)' % (
+                    ', '.join(e.missing_names),
+                    name,
+                    ' '.join(['--download %s' % n for n in e.missing_names]),
+                ))
+            continue
+        benchmark.name = name
+        yield benchmark
+
+
+def main():
+    p = argparse.ArgumentParser('Command line search tool benchmark suite.')
+    p.add_argument(
+        '--dir', metavar='PATH', default=os.getcwd(),
+        help='The directory in which to download data and perform searches.')
+    p.add_argument(
+        '--download', metavar='CORPUS', action='append',
+        choices=['all', 'linux', 'subtitles-en', 'subtitles-ru'],
+        help='Download and prepare corpus data, then exit without running '
+             'any benchmarks. Note that this command is intended to be '
+             'idempotent. WARNING: This downloads over a gigabyte of data, '
+             'and also includes building the Linux kernel. If "all" is used '
+             'then the total uncompressed size is around 13 GB.')
+    p.add_argument(
+        '-f', '--force', action='store_true',
+        help='Overwrite existing files if there is a conflict.')
+    p.add_argument(
+        '--list', action='store_true',
+        help='List available benchmarks by name.')
+    p.add_argument(
+        '--raw', metavar='PATH',
+        help='Dump raw data (all samples collected) in CSV format to the '
+             'file path provided.')
+    p.add_argument(
+        'bench', metavar='PAT', nargs='?',
+        help='A regex pattern that will only run benchmarks that match.')
+    args = p.parse_args()
+
+    if args.download is not None and len(args.download) > 0:
+        download(args.dir, args.choices)
+        sys.exit(0)
+
+    if not path.isdir(args.dir):
+        os.makedirs(args.dir)
+    if args.raw is not None and path.exists(args.raw) and not args.force:
+        eprint('File %s already exists (delete it or use --force)' % args.raw)
+        sys.exit(1)
+    raw_handle, raw_csv_wtr = None, None
+    if args.raw is not None:
+        fields = [
+            'benchmark', 'warmup_iter', 'iter',
+            'name', 'command', 'duration', 'lines', 'env',
+        ]
+        raw_handle = open(args.raw, 'w+')
+        raw_csv_wtr = csv.DictWriter(raw_handle, fields)
+        raw_csv_wtr.writerow({x: x for x in fields})
+
+    benchmarks = collect_benchmarks(args.dir, filter_pat=args.bench)
+    for i, b in enumerate(benchmarks):
+        result = b.run()
+        fastest_cmd = result.fastest_cmd()
+        fastest_sample = result.fastest_sample()
+        max_name_len = max(len(cmd.name) for cmd in b.commands)
+
+        if i > 0:
+            print()
+        header = '%s (pattern: %s)' % (b.name, b.pattern)
+        print('%s\n%s' % (header, '-' * len(header)))
+        for cmd in b.commands:
+            name = cmd.name
+            mean, stdev = result.distribution_for(cmd)
+            line_counts = result.line_counts_for(cmd)
+            show_fast_cmd, show_line_counts = '', ''
+            if fastest_cmd.name == cmd.name:
+                show_fast_cmd = '*'
+            if fastest_sample['cmd'].name == cmd.name:
+                name += '*'
+            if len(line_counts) > 0:
+                counts = map(str, line_counts)
+                show_line_counts = ' (lines: %s)' % ', '.join(counts)
+            fmt = '{name:{pad}} {mean:0.3f} +/- {stdev:0.3f}{lines}{fast_cmd}'
+            print(fmt.format(
+                name=name, pad=max_name_len + 2, fast_cmd=show_fast_cmd,
+                mean=mean, stdev=stdev, lines=show_line_counts))
+        sys.stdout.flush()
+
+        if raw_csv_wtr is not None:
+            for sample in result.samples:
+                cmd, duration = sample['cmd'], sample['duration']
+                env = ' '.join(['%s=%s' % (k, v)
+                                for k, v in cmd.kwargs.get('env', {}).items()])
+                raw_csv_wtr.writerow({
+                    'benchmark': b.name,
+                    'warmup_iter': b.warmup_count,
+                    'iter': b.count,
+                    'name': sample['cmd'].name,
+                    'command': ' '.join(cmd.cmd),
+                    'duration': duration,
+                    'lines': sample['line_count'] or '',
+                    'env': env,
+                })
+            raw_handle.flush()
+
+
+if __name__ == '__main__':
+    main()
--- a/ci/before_deploy.sh
+++ b/ci/before_deploy.sh
@@ -6,23 +6,22 @@ set -ex

 # Generate artifacts for release
 mk_artifacts() {
-    RUSTFLAGS="-C target-feature=+ssse3" cargo build --target $TARGET --release --features simd-accel
+    RUSTFLAGS="-C target-feature=+ssse3" \
+      cargo build --target $TARGET --release --features simd-accel
 }

 mk_tarball() {
    # create a "staging" directory
    local td=$(mktempd)
    local out_dir=$(pwd)
+    local name="${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}"
+    mkdir "$td/$name"

-    # TODO update this part to copy the artifacts that make sense for your project
-    # NOTE All Cargo build artifacts will be under the 'target/$TARGET/{debug,release}'
-    cp target/$TARGET/release/xrep $td
+    cp target/$TARGET/release/rg "$td/$name/"
+    cp {README.md,UNLICENSE,COPYING,LICENSE-MIT} "$td/$name/"

    pushd $td
-
-    # release tarball will look like 'rust-everywhere-v1.2.3-x86_64-unknown-linux-gnu.tar.gz'
-    tar czf $out_dir/${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}.tar.gz *
-
+    tar czf "$out_dir/$name.tar.gz" *
    popd
    rm -r $td
 }
--- a/ci/script.sh
+++ b/ci/script.sh
@@ -11,42 +11,20 @@ disable_cross_doctests() {
        if [ "$TRAVIS_OS_NAME" = "osx" ]; then
            brew install gnu-sed --default-names
        fi
-
        find src -name '*.rs' -type f | xargs sed -i -e 's:\(//.\s*```\):\1 ignore,:g'
    fi
 }

-# TODO modify this function as you see fit
-# PROTIP Always pass `--target $TARGET` to cargo commands, this makes cargo output build artifacts
-# to target/$TARGET/{debug,release} which can reduce the number of needed conditionals in the
-# `before_deploy`/packaging phase
 run_test_suite() {
-    case $TARGET in
-        # configure emulation for transparent execution of foreign binaries
-        aarch64-unknown-linux-gnu)
-            export QEMU_LD_PREFIX=/usr/aarch64-linux-gnu
-            ;;
-        arm*-unknown-linux-gnueabihf)
-            export QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf
-            ;;
-        *)
-            ;;
-    esac
-
-    if [ ! -z "$QEMU_LD_PREFIX" ]; then
-        # Run tests on a single thread when using QEMU user emulation
-        export RUST_TEST_THREADS=1
-    fi
-
    cargo build --target $TARGET --verbose
-    cargo test --target $TARGET
+    cargo test --target $TARGET --verbose

    # sanity check the file type
-    file target/$TARGET/debug/xrep
+    file target/$TARGET/debug/rg
 }

 main() {
-    disable_cross_doctests
+    # disable_cross_doctests
    run_test_suite
 }

--- a/1
+++ b/1
@@ -1,4 +1,5 @@
 #!/bin/sh

 export RUSTFLAGS="-C target-feature=+ssse3"
+# export RUSTFLAGS="-C target-cpu=native"
 cargo build --release --features simd-accel
--- a/grep/Cargo.toml
+++ b/grep/Cargo.toml
@@ -6,14 +6,15 @@ authors = ["Andrew Gallant <jamslam@gmail.com>"]
 description = """
 Fast line oriented regex searching as a library.
 """
-documentation = "https://github.com/BurntSushi/xrep"
-homepage = "https://github.com/BurntSushi/xrep"
-repository = "https://github.com/BurntSushi/xrep"
+documentation = "https://github.com/BurntSushi/ripgrep"
+homepage = "https://github.com/BurntSushi/ripgrep"
+repository = "https://github.com/BurntSushi/ripgrep"
 readme = "README.md"
 keywords = ["regex", "grep", "egrep", "search", "pattern"]
 license = "Unlicense/MIT"

 [dependencies]
+log = "0.3"
 memchr = "0.1"
 memmap = "0.2"
 regex = "0.1.75"
--- a/grep/src/lib.rs
+++ b/grep/src/lib.rs
@@ -4,6 +4,8 @@
 A fast line oriented regex searcher.
 */

+#[macro_use]
+extern crate log;
 extern crate memchr;
 extern crate regex;
 extern crate regex_syntax as syntax;
--- a/grep/src/literals.rs
+++ b/grep/src/literals.rs
@@ -1,13 +1,22 @@
+/*!
+The literals module is responsible for extracting *inner* literals out of the
+AST of a regular expression. Normally this is the job of the regex engine
+itself, but the regex engine doesn't look for inner literals. Since we're doing
+line based searching, we can use them, so we need to do it ourselves.
+
+Note that this implementation is incredibly suspicious. We need something more
+principled.
+*/
 use std::cmp;
 use std::iter;

 use regex::bytes::Regex;
 use syntax::{
    Expr, Literals, Lit,
-    Repeater,
+    ByteClass, ByteRange, CharClass, ClassRange, Repeater,
 };

-#[derive(Debug)]
+#[derive(Clone, Debug)]
 pub struct LiteralSets {
    prefixes: Literals,
    suffixes: Literals,
@@ -27,6 +36,7 @@ impl LiteralSets {

    pub fn to_regex(&self) -> Option<Regex> {
        if self.prefixes.all_complete() && !self.prefixes.is_empty() {
+            debug!("literal prefixes detected: {:?}", self.prefixes);
            // When this is true, the regex engine will do a literal scan.
            return None;
        }
@@ -56,13 +66,27 @@ impl LiteralSets {
        if suf_lcs.len() > lit.len() {
            lit = suf_lcs;
        }
-        if req.len() > lit.len() {
+        if req_lits.len() == 1 && req.len() > lit.len() {
            lit = req;
        }
-        if lit.is_empty() {
+
+        // Special case: if we detected an alternation of inner required
+        // literals and its longest literal is bigger than the longest
+        // prefix/suffix, then choose the alternation. In practice, this
+        // helps with case insensitive matching, which can generate lots of
+        // inner required literals.
+        let any_empty = req_lits.iter().any(|lit| lit.is_empty());
+        if req.len() > lit.len() && req_lits.len() > 1 && !any_empty {
+            debug!("required literals found: {:?}", req_lits);
+            let alts: Vec<String> =
+                req_lits.into_iter().map(|x| bytes_to_regex(x)).collect();
+            // Literals always compile.
+            Some(Regex::new(&alts.join("|")).unwrap())
+        } else if lit.is_empty() {
            None
        } else {
            // Literals always compile.
+            debug!("required literal found: {:?}", show(lit));
            Some(Regex::new(&bytes_to_regex(lit)).unwrap())
        }
    }
@@ -75,14 +99,30 @@ fn union_required(expr: &Expr, lits: &mut Literals) {
            let s: String = chars.iter().cloned().collect();
            lits.cross_add(s.as_bytes());
        }
-        Literal { casei: true, .. } => {
-            lits.cut();
+        Literal { ref chars, casei: true } => {
+            for &c in chars {
+                let cls = CharClass::new(vec![
+                    ClassRange { start: c, end: c },
+                ]).case_fold();
+                if !lits.add_char_class(&cls) {
+                    lits.cut();
+                    return;
+                }
+            }
        }
        LiteralBytes { ref bytes, casei: false } => {
            lits.cross_add(bytes);
        }
-        LiteralBytes { casei: true, .. } => {
-            lits.cut();
+        LiteralBytes { ref bytes, casei: true } => {
+            for &b in bytes {
+                let cls = ByteClass::new(vec![
+                    ByteRange { start: b, end: b },
+                ]).case_fold();
+                if !lits.add_byte_class(&cls) {
+                    lits.cut();
+                    return;
+                }
+            }
        }
        Class(_) => {
            lits.cut();
@@ -205,3 +245,18 @@ fn bytes_to_regex(bs: &[u8]) -> String {
    }
    s
 }
+
+/// Converts arbitrary bytes to a nice string.
+fn show(bs: &[u8]) -> String {
+    // Why aren't we using this to feed to the regex? Doesn't really matter
+    // I guess. ---AG
+    use std::ascii::escape_default;
+    use std::str;
+
+    let mut nice = String::new();
+    for &b in bs {
+        let part: Vec<u8> = escape_default(b).collect();
+        nice.push_str(str::from_utf8(&part).unwrap());
+    }
+    nice
+}
--- a/grep/src/search.rs
+++ b/grep/src/search.rs
@@ -152,6 +152,7 @@ impl GrepBuilder {
                 .unicode(true)
                 .case_insensitive(self.opts.case_insensitive)
                 .parse(&self.pattern));
+        debug!("regex ast:\n{:#?}", expr);
        Ok(try!(nonl::remove(expr, self.opts.line_terminator)))
    }
 }
@@ -194,7 +195,7 @@ impl Grep {
                let (prevnl, nextnl) = self.find_line(buf, e, e);
                match self.re.shortest_match(&buf[prevnl..nextnl]) {
                    None => {
-                        start = nextnl + 1;
+                        start = nextnl;
                        continue;
                    }
                    Some(_) => {
@@ -253,7 +254,7 @@ impl<'b, 's> Iterator for Iter<'b, 's> {
            self.start = self.buf.len();
            return None;
        }
-        self.start = mat.end + 1;
+        self.start = mat.end;
        Some(mat)
    }
 }
--- a/src/args.rs
+++ b/src/args.rs
@@ -9,14 +9,16 @@ use grep::{Grep, GrepBuilder};
 use log;
 use num_cpus;
 use regex;
+use term::Terminal;
 use walkdir::WalkDir;

+use atty;
 use gitignore::{Gitignore, GitignoreBuilder};
 use ignore::Ignore;
-use out::Out;
+use out::{Out, OutBuffer};
 use printer::Printer;
-use search::{InputBuffer, Searcher};
-use sys;
+use search_buffer::BufferSearcher;
+use search_stream::{InputBuffer, Searcher};
 use types::{FileTypeDef, Types, TypesBuilder};
 use walk;

@@ -27,13 +29,13 @@ use Result;
 /// If you've never heard of Docopt before, see: http://docopt.org
 /// (TL;DR: The CLI parser is generated from the usage string below.)
 const USAGE: &'static str = "
-Usage: xrep [options] <pattern> [<path> ...]
-       xrep [options] --files [<path> ...]
-       xrep [options] --type-list
-       xrep --help
-       xrep --version
+Usage: rg [options] <pattern> [<path> ...]
+       rg [options] --files [<path> ...]
+       rg [options] --type-list
+       rg --help
+       rg --version

-xrep is like the silver searcher and grep, but faster than both.
+rg combines the usability of the silver search with the raw speed of grep.

 Common options:
    -a, --text                 Search binary files as if they were text.
@@ -75,6 +77,11 @@ Less common options:
    -C, --context NUM
        Show NUM lines before and after each match.

+    --column
+        Show column numbers (1 based) in output. This only shows the column
+        numbers for the first match on each line. Note that this doesn't try
+        to account for Unicode. One byte is equal to one column.
+
    --context-separator ARG
        The string to use when separating non-continuous context lines. Escape
        sequences may be used. [default: --]
@@ -97,17 +104,22 @@ Less common options:
        Don't show any file name heading.

    --hidden
-        Search hidden directories and files.
+        Search hidden directories and files. (Hidden directories and files are
+        skipped by default.)

    -L, --follow
        Follow symlinks.

-    --line-terminator ARG
-        The byte to use for a line terminator. Escape sequences may be used.
-        [default: \\n]
+    --mmap
+        Search using memory maps when possible. This is enabled by default
+        when ripgrep thinks it will be faster. (Note that mmap searching
+        doesn't current support the various context related options.)
+
+    --no-mmap
+        Never use memory maps, even when they might be faster.

    --no-ignore
-        Don't respect ignore files (.gitignore, .xrepignore, etc.)
+        Don't respect ignore files (.gitignore, .rgignore, etc.)

    --no-ignore-parent
        Don't respect ignore files in parent directories.
@@ -123,7 +135,7 @@ Less common options:
        (capped at 6). [default: 0]

    --version
-        Show the version number of xrep and exit.
+        Show the version number of ripgrep and exit.

 File type management options:
    --type-list
@@ -138,7 +150,7 @@ File type management options:
 ";

 /// RawArgs are the args as they are parsed from Docopt. They aren't used
-/// directly by the rest of xrep.
+/// directly by the rest of ripgrep.
 #[derive(Debug, RustcDecodable)]
 pub struct RawArgs {
    arg_pattern: String,
@@ -146,6 +158,7 @@ pub struct RawArgs {
    flag_after_context: usize,
    flag_before_context: usize,
    flag_color: String,
+    flag_column: bool,
    flag_context: usize,
    flag_context_separator: String,
    flag_count: bool,
@@ -158,12 +171,13 @@ pub struct RawArgs {
    flag_ignore_case: bool,
    flag_invert_match: bool,
    flag_line_number: bool,
-    flag_line_terminator: String,
    flag_literal: bool,
+    flag_mmap: bool,
    flag_no_heading: bool,
    flag_no_ignore: bool,
    flag_no_ignore_parent: bool,
    flag_no_line_number: bool,
+    flag_no_mmap: bool,
    flag_pretty: bool,
    flag_quiet: bool,
    flag_replace: Option<String>,
@@ -186,17 +200,20 @@ pub struct Args {
    after_context: usize,
    before_context: usize,
    color: bool,
+    column: bool,
    context_separator: Vec<u8>,
    count: bool,
    eol: u8,
    files: bool,
    follow: bool,
    glob_overrides: Option<Gitignore>,
+    grep: Grep,
    heading: bool,
    hidden: bool,
    ignore_case: bool,
    invert_match: bool,
    line_number: bool,
+    mmap: bool,
    no_ignore: bool,
    no_ignore_parent: bool,
    quiet: bool,
@@ -210,7 +227,7 @@ pub struct Args {
 }

 impl RawArgs {
-    /// Convert arguments parsed into a configuration used by xrep.
+    /// Convert arguments parsed into a configuration used by ripgrep.
    fn to_args(&self) -> Result<Args> {
        let pattern = {
            let pattern =
@@ -227,7 +244,9 @@ impl RawArgs {
        };
        let paths =
            if self.arg_path.is_empty() {
-                if sys::stdin_is_atty() {
+                if atty::on_stdin()
+                    || self.flag_files
+                    || self.flag_type_list {
                    vec![Path::new("./").to_path_buf()]
                } else {
                    vec![Path::new("-").to_path_buf()]
@@ -243,15 +262,19 @@ impl RawArgs {
            } else {
                (self.flag_after_context, self.flag_before_context)
            };
-        let eol = {
-            let eol = unescape(&self.flag_line_terminator);
-            if eol.is_empty() {
-                errored!("Empty line terminator is not allowed.");
-            } else if eol.len() > 1 {
-                errored!("Line terminators are limited to exactly 1 byte.");
-            }
-            eol[0]
-        };
+        let mmap =
+            if before_context > 0 || after_context > 0 || self.flag_no_mmap {
+                false
+            } else if self.flag_mmap {
+                true
+            } else {
+                // If we're only searching a few paths and all of them are
+                // files, then memory maps are probably faster.
+                paths.len() <= 10 && paths.iter().all(|p| p.is_file())
+            };
+        if mmap {
+            debug!("will try to use memory maps");
+        }
        let glob_overrides =
            if self.flag_glob.is_empty() {
                None
@@ -265,16 +288,17 @@ impl RawArgs {
            };
        let threads =
            if self.flag_threads == 0 {
-                cmp::min(6, num_cpus::get())
+                cmp::min(8, num_cpus::get())
            } else {
                self.flag_threads
            };
        let color =
            if self.flag_color == "auto" {
-                sys::stdout_is_atty() || self.flag_pretty
+                atty::on_stdout() || self.flag_pretty
            } else {
                self.flag_color == "always"
            };
+        let eol = b'\n';
        let mut with_filename = self.flag_with_filename;
        if !with_filename {
            with_filename = paths.len() > 1 || paths[0].is_dir();
@@ -283,23 +307,32 @@ impl RawArgs {
        btypes.add_defaults();
        try!(self.add_types(&mut btypes));
        let types = try!(btypes.build());
+        let grep = try!(
+            GrepBuilder::new(&pattern)
+                .case_insensitive(self.flag_ignore_case)
+                .line_terminator(eol)
+                .build()
+        );
        let mut args = Args {
            pattern: pattern,
            paths: paths,
            after_context: after_context,
            before_context: before_context,
            color: color,
+            column: self.flag_column,
            context_separator: unescape(&self.flag_context_separator),
            count: self.flag_count,
            eol: eol,
            files: self.flag_files,
            follow: self.flag_follow,
            glob_overrides: glob_overrides,
+            grep: grep,
            heading: !self.flag_no_heading && self.flag_heading,
            hidden: self.flag_hidden,
            ignore_case: self.flag_ignore_case,
            invert_match: self.flag_invert_match,
            line_number: !self.flag_no_line_number && self.flag_line_number,
+            mmap: mmap,
            no_ignore: self.flag_no_ignore,
            no_ignore_parent: self.flag_no_ignore_parent,
            quiet: self.flag_quiet,
@@ -312,7 +345,7 @@ impl RawArgs {
            with_filename: with_filename,
        };
        // If stdout is a tty, then apply some special default options.
-        if sys::stdout_is_atty() || self.flag_pretty {
+        if atty::on_stdout() || self.flag_pretty {
            if !self.flag_no_line_number && !args.count {
                args.line_number = true;
            }
@@ -345,7 +378,7 @@ impl Args {
    ///
    /// If a CLI usage error occurred, then exit the process and print a usage
    /// or error message. Similarly, if the user requested the version of
-    /// xrep, then print the version and exit.
+    /// ripgrep, then print the version and exit.
    ///
    /// Also, initialize a global logger.
    pub fn parse() -> Result<Args> {
@@ -367,7 +400,7 @@ impl Args {
        raw.to_args().map_err(From::from)
    }

-    /// Returns true if xrep should print the files it will search and exit
+    /// Returns true if ripgrep should print the files it will search and exit
    /// (but not do any actual searching).
    pub fn files(&self) -> bool {
        self.files
@@ -378,12 +411,8 @@ impl Args {
    /// basic searching of regular expressions in a single buffer.
    ///
    /// The pattern and other flags are taken from the command line.
-    pub fn grep(&self) -> Result<Grep> {
-        GrepBuilder::new(&self.pattern)
-            .case_insensitive(self.ignore_case)
-            .line_terminator(self.eol)
-            .build()
-            .map_err(From::from)
+    pub fn grep(&self) -> Grep {
+        self.grep.clone()
    }

    /// Creates a new input buffer that is used in searching.
@@ -393,10 +422,16 @@ impl Args {
        inp
    }

+    /// Whether we should prefer memory maps for searching or not.
+    pub fn mmap(&self) -> bool {
+        self.mmap
+    }
+
    /// Create a new printer of individual search results that writes to the
    /// writer given.
-    pub fn printer<W: Send + io::Write>(&self, wtr: W) -> Printer<W> {
-        let mut p = Printer::new(wtr, self.color)
+    pub fn printer<W: Send + Terminal>(&self, wtr: W) -> Printer<W> {
+        let mut p = Printer::new(wtr)
+            .column(self.column)
            .context_separator(self.context_separator.clone())
            .eol(self.eol)
            .heading(self.heading)
@@ -410,8 +445,8 @@ impl Args {

    /// Create a new printer of search results for an entire file that writes
    /// to the writer given.
-    pub fn out<W: io::Write>(&self, wtr: W) -> Out<W> {
-        let mut out = Out::new(wtr);
+    pub fn out(&self) -> Out {
+        let mut out = Out::new(self.color);
        if self.heading && !self.count {
            out = out.file_separator(b"".to_vec());
        } else if self.before_context > 0 || self.after_context > 0 {
@@ -420,6 +455,11 @@ impl Args {
        out
    }

+    /// Create a new buffer for use with searching.
+    pub fn outbuf(&self) -> OutBuffer {
+        OutBuffer::new(self.color)
+    }
+
    /// Return the paths that should be searched.
    pub fn paths(&self) -> &[PathBuf] {
        &self.paths
@@ -428,7 +468,7 @@ impl Args {
    /// Create a new line based searcher whose configuration is taken from the
    /// command line. This searcher supports a dizzying array of features:
    /// inverted matching, line counting, context control and more.
-    pub fn searcher<'a, R: io::Read, W: Send + io::Write>(
+    pub fn searcher<'a, R: io::Read, W: Send + Terminal>(
        &self,
        inp: &'a mut InputBuffer,
        printer: &'a mut Printer<W>,
@@ -446,6 +486,24 @@ impl Args {
            .text(self.text)
    }

+    /// Create a new line based searcher whose configuration is taken from the
+    /// command line. This search operates on an entire file all once (which
+    /// may have been memory mapped).
+    pub fn searcher_buffer<'a, W: Send + Terminal>(
+        &self,
+        printer: &'a mut Printer<W>,
+        grep: &'a Grep,
+        path: &'a Path,
+        buf: &'a [u8],
+    ) -> BufferSearcher<'a, W> {
+        BufferSearcher::new(printer, grep, path, buf)
+            .count(self.count)
+            .eol(self.eol)
+            .line_number(self.line_number)
+            .invert_match(self.invert_match)
+            .text(self.text)
+    }
+
    /// Returns the number of worker search threads that should be used.
    pub fn threads(&self) -> usize {
        self.threads
@@ -456,8 +514,8 @@ impl Args {
        &self.type_defs
    }

-    /// Returns true if xrep should print the type definitions currently loaded
-    /// and then exit.
+    /// Returns true if ripgrep should print the type definitions currently
+    /// loaded and then exit.
    pub fn type_list(&self) -> bool {
        self.type_list
    }
--- a/src/atty.rs
+++ b/src/atty.rs
@@ -1,24 +1,23 @@
 /*!
-This io module contains various platform specific functions for detecting
-how xrep is being used. e.g., Is stdin being piped into it? Is stdout being
-redirected to a file? etc... We use this information to tweak various default
-configuration parameters such as colors and match formatting.
+This atty module contains functions for detecting whether ripgrep is being fed
+from (or to) a terminal. Windows and Unix do this differently, so implement
+both here.
 */

-use libc;
-
 #[cfg(unix)]
-pub fn stdin_is_atty() -> bool {
+pub fn on_stdin() -> bool {
+    use libc;
    0 < unsafe { libc::isatty(libc::STDIN_FILENO) }
 }

 #[cfg(unix)]
-pub fn stdout_is_atty() -> bool {
+pub fn on_stdout() -> bool {
+    use libc;
    0 < unsafe { libc::isatty(libc::STDOUT_FILENO) }
 }

 #[cfg(windows)]
-pub fn stdin_is_atty() -> bool {
+pub fn on_stdin() -> bool {
    use kernel32;
    use winapi;

@@ -30,7 +29,7 @@ pub fn stdin_is_atty() -> bool {
 }

 #[cfg(windows)]
-pub fn stdout_is_atty() -> bool {
+pub fn on_stdout() -> bool {
    use kernel32;
    use winapi;

--- a/src/gitignore.rs
+++ b/src/gitignore.rs
@@ -9,7 +9,7 @@ The motivation for this submodule is performance and portability:
 2. We could shell out to a `git` sub-command like ls-files or status, but it
   seems better to not rely on the existence of external programs for a search
   tool. Besides, we need to implement this logic anyway to support things like
-   an .xrepignore file.
+   an .rgignore file.

 The key implementation detail here is that a single gitignore file is compiled
 into a single RegexSet, which can be used to report which globs match a
@@ -379,7 +379,7 @@ mod tests {
        };
    }

-    const ROOT: &'static str = "/home/foobar/rust/xrep";
+    const ROOT: &'static str = "/home/foobar/rust/rg";

    ignored!(ig1, ROOT, "months", "months");
    ignored!(ig2, ROOT, "*.lock", "Cargo.lock");
--- a/src/glob.rs
+++ b/src/glob.rs
@@ -29,7 +29,6 @@ to make its way into `glob` proper.
 use std::error::Error as StdError;
 use std::fmt;
 use std::iter;
-use std::path;
 use std::str;

 use regex;
@@ -214,7 +213,7 @@ impl Pattern {
    /// regular expression and will represent the matching semantics of this
    /// glob pattern and the options given.
    pub fn to_regex_with(&self, options: &MatchOptions) -> String {
-        let sep = regex::quote(&path::MAIN_SEPARATOR.to_string());
+        let seps = regex::quote(r"/\");
        let mut re = String::new();
        re.push_str("(?-u)");
        if options.case_insensitive {
@@ -235,26 +234,27 @@ impl Pattern {
                }
                Token::Any => {
                    if options.require_literal_separator {
-                        re.push_str(&format!("[^{}]", sep));
+                        re.push_str(&format!("[^{}]", seps));
                    } else {
                        re.push_str(".");
                    }
                }
                Token::ZeroOrMore => {
                    if options.require_literal_separator {
-                        re.push_str(&format!("[^{}]*", sep));
+                        re.push_str(&format!("[^{}]*", seps));
                    } else {
                        re.push_str(".*");
                    }
                }
                Token::RecursivePrefix => {
-                    re.push_str(&format!("(?:{sep}?|.*{sep})", sep=sep));
+                    re.push_str(&format!("(?:[{sep}]?|.*[{sep}])", sep=seps));
                }
                Token::RecursiveSuffix => {
-                    re.push_str(&format!("(?:{sep}?|{sep}.*)", sep=sep));
+                    re.push_str(&format!("(?:[{sep}]?|[{sep}].*)", sep=seps));
                }
                Token::RecursiveZeroOrMore => {
-                    re.push_str(&format!("(?:{sep}|{sep}.*{sep})", sep=sep));
+                    re.push_str(&format!("(?:[{sep}]|[{sep}].*[{sep}])",
+                                         sep=seps));
                }
                Token::Class { negated, ref ranges } => {
                    re.push('[');
@@ -480,6 +480,9 @@ mod tests {
                let pat = Pattern::new($pat).unwrap();
                let path = &Path::new($path).to_str().unwrap();
                let re = Regex::new(&pat.to_regex_with(&$options)).unwrap();
+                // println!("PATTERN: {}", $pat);
+                // println!("REGEX: {:?}", re);
+                // println!("PATH: {}", path);
                assert!(!re.is_match(path.as_bytes()));
            }
        };
@@ -561,12 +564,11 @@ mod tests {
        case_insensitive: true,
        require_literal_separator: false,
    };
-    const SEP: char = ::std::path::MAIN_SEPARATOR;

    toregex!(re_casei, "a", "(?i)^a$", &CASEI);

-    toregex!(re_slash1, "?", format!("^[^{}]$", SEP), SLASHLIT);
-    toregex!(re_slash2, "*", format!("^[^{}]*$", SEP), SLASHLIT);
+    toregex!(re_slash1, "?", r"^[^/\\]$", SLASHLIT);
+    toregex!(re_slash2, "*", r"^[^/\\]*$", SLASHLIT);

    toregex!(re1, "a", "^a$");
    toregex!(re2, "?", "^.$");
@@ -642,6 +644,7 @@ mod tests {

    matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
    nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
+    nmatches!(matchslash2_win, "abc?def", "abc\\def", SLASHLIT);
    nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
    matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs

--- a/src/ignore.rs
+++ b/src/ignore.rs
@@ -5,7 +5,7 @@ whether a *single* file path should be searched or not.
 In general, there are two ways to ignore a particular file:

 1. Specify an ignore rule in some "global" configuration, such as a
-   $HOME/.xrepignore or on the command line.
+   $HOME/.rgignore or on the command line.
 2. A specific ignore file (like .gitignore) found during directory traversal.

 The `IgnoreDir` type handles ignore patterns for any one particular directory
@@ -24,7 +24,7 @@ use types::Types;
 const IGNORE_NAMES: &'static [&'static str] = &[
    ".gitignore",
    ".agignore",
-    ".xrepignore",
+    ".rgignore",
 ];

 /// Represents an error that can occur when parsing a gitignore file.
@@ -257,8 +257,8 @@ pub struct IgnoreDir {
    /// A single accumulation of glob patterns for this directory, matched
    /// using gitignore semantics.
    ///
-    /// This will include patterns from xrepignore as well. The patterns are
-    /// ordered so that precedence applies automatically (e.g., xrepignore
+    /// This will include patterns from rgignore as well. The patterns are
+    /// ordered so that precedence applies automatically (e.g., rgignore
    /// patterns procede gitignore patterns).
    gi: Option<Gitignore>,
    // TODO(burntsushi): Matching other types of glob patterns that don't
@@ -422,7 +422,7 @@ mod tests {
        };
    }

-    const ROOT: &'static str = "/home/foobar/rust/xrep";
+    const ROOT: &'static str = "/home/foobar/rust/rg";

    ignored_dir!(id1, ROOT, "src/main.rs", "", "src/main.rs");
    ignored_dir!(id2, ROOT, "", "src/main.rs", "src/main.rs");
--- a/src/main.rs
+++ b/src/main.rs
@@ -34,12 +34,14 @@ use std::thread;

 use crossbeam::sync::chase_lev::{self, Steal, Stealer};
 use grep::Grep;
+use memmap::{Mmap, Protection};
+use term::Terminal;
 use walkdir::DirEntry;

 use args::Args;
-use out::Out;
+use out::{NoColorTerminal, Out, OutBuffer};
 use printer::Printer;
-use search::InputBuffer;
+use search_stream::InputBuffer;

 macro_rules! errored {
    ($($tt:tt)*) => {
@@ -55,13 +57,14 @@ macro_rules! eprintln {
 }

 mod args;
+mod atty;
 mod gitignore;
 mod glob;
 mod ignore;
 mod out;
 mod printer;
-mod search;
-mod sys;
+mod search_buffer;
+mod search_stream;
 mod terminal;
 mod types;
 mod walk;
@@ -87,7 +90,8 @@ fn run(args: Args) -> Result<u64> {
        return run_types(args);
    }
    let args = Arc::new(args);
-    let out = Arc::new(Mutex::new(args.out(io::stdout())));
+    let out = Arc::new(Mutex::new(args.out()));
+    let outbuf = args.outbuf();
    let mut workers = vec![];

    let mut workq = {
@@ -98,8 +102,8 @@ fn run(args: Args) -> Result<u64> {
                out: out.clone(),
                chan_work: stealer.clone(),
                inpbuf: args.input_buffer(),
-                outbuf: Some(vec![]),
-                grep: try!(args.grep()),
+                outbuf: Some(outbuf.clone()),
+                grep: args.grep(),
                match_count: 0,
            };
            workers.push(thread::spawn(move || worker.run()));
@@ -126,7 +130,8 @@ fn run(args: Args) -> Result<u64> {
 }

 fn run_files(args: Args) -> Result<u64> {
-    let mut printer = args.printer(io::BufWriter::new(io::stdout()));
+    let term = NoColorTerminal::new(io::BufWriter::new(io::stdout()));
+    let mut printer = args.printer(term);
    let mut file_count = 0;
    for p in args.paths() {
        if p == Path::new("-") {
@@ -143,7 +148,8 @@ fn run_files(args: Args) -> Result<u64> {
 }

 fn run_types(args: Args) -> Result<u64> {
-    let mut printer = args.printer(io::BufWriter::new(io::stdout()));
+    let term = NoColorTerminal::new(io::BufWriter::new(io::stdout()));
+    let mut printer = args.printer(term);
    let mut ty_count = 0;
    for def in args.type_defs() {
        printer.type_def(def);
@@ -165,10 +171,10 @@ enum WorkReady {

 struct Worker {
    args: Arc<Args>,
-    out: Arc<Mutex<Out<io::Stdout>>>,
+    out: Arc<Mutex<Out>>,
    chan_work: Stealer<Work>,
    inpbuf: InputBuffer,
-    outbuf: Option<Vec<u8>>,
+    outbuf: Option<OutBuffer>,
    grep: Grep,
    match_count: u64,
 }
@@ -196,7 +202,7 @@ impl Worker {
            let mut printer = self.args.printer(outbuf);
            self.do_work(&mut printer, work);
            let outbuf = printer.into_inner();
-            if !outbuf.is_empty() {
+            if !outbuf.get_ref().is_empty() {
                let mut out = self.out.lock().unwrap();
                out.write(&outbuf);
            }
@@ -205,7 +211,7 @@ impl Worker {
        self.match_count
    }

-    fn do_work<W: Send + io::Write>(
+    fn do_work<W: Send + Terminal>(
        &mut self,
        printer: &mut Printer<W>,
        work: WorkReady,
@@ -221,7 +227,11 @@ impl Worker {
                if let Ok(p) = path.strip_prefix("./") {
                    path = p;
                }
-                self.search(printer, path, file)
+                if self.args.mmap() {
+                    self.search_mmap(printer, path, &file)
+                } else {
+                    self.search(printer, path, file)
+                }
            }
        };
        match result {
@@ -234,7 +244,7 @@ impl Worker {
        }
    }

-    fn search<R: io::Read, W: Send + io::Write>(
+    fn search<R: io::Read, W: Send + Terminal>(
        &mut self,
        printer: &mut Printer<W>,
        path: &Path,
@@ -248,4 +258,23 @@ impl Worker {
            rdr,
        ).run().map_err(From::from)
    }
+
+    fn search_mmap<W: Send + Terminal>(
+        &mut self,
+        printer: &mut Printer<W>,
+        path: &Path,
+        file: &File,
+    ) -> Result<u64> {
+        if try!(file.metadata()).len() == 0 {
+            // Opening a memory map with an empty file results in an error.
+            return Ok(0);
+        }
+        let mmap = try!(Mmap::open(file, Protection::Read));
+        Ok(self.args.searcher_buffer(
+            printer,
+            &self.grep,
+            path,
+            unsafe { mmap.as_slice() },
+        ).run())
+    }
 }
--- a/src/out.rs
+++ b/src/out.rs
@@ -1,4 +1,40 @@
 use std::io::{self, Write};
+use std::sync::Arc;
+
+use term::{self, Terminal};
+use term::color::Color;
+use term::terminfo::TermInfo;
+#[cfg(windows)]
+use term::WinConsole;
+
+use terminal::TerminfoTerminal;
+
+pub type StdoutTerminal = Box<Terminal<Output=io::Stdout> + Send>;
+
+/// Gets a terminal that supports color if available.
+#[cfg(windows)]
+fn term_stdout(color: bool) -> StdoutTerminal {
+    let stdout = io::stdout();
+    WinConsole::new(stdout)
+        .ok()
+        .map(|t| Box::new(t) as StdoutTerminal)
+        .unwrap_or_else(|| {
+            let stdout = io::stdout();
+            Box::new(NoColorTerminal::new(stdout)) as StdoutTerminal
+        })
+}
+
+/// Gets a terminal that supports color if available.
+#[cfg(not(windows))]
+fn term_stdout(color: bool) -> StdoutTerminal {
+    let stdout = io::stdout();
+    if !color || TERMINFO.is_none() {
+        Box::new(NoColorTerminal::new(stdout))
+    } else {
+        let info = TERMINFO.clone().unwrap();
+        Box::new(TerminfoTerminal::new_with_terminfo(stdout, info))
+    }
+}

 /// Out controls the actual output of all search results for a particular file
 /// to the end user.
@@ -6,17 +42,17 @@ use std::io::{self, Write};
 /// (The difference between Out and Printer is that a Printer works with
 /// individual search results where as Out works with search results for each
 /// file as a whole. For example, it knows when to print a file separator.)
-pub struct Out<W: io::Write> {
-    wtr: io::BufWriter<W>,
+pub struct Out {
+    term: StdoutTerminal,
    printed: bool,
    file_separator: Option<Vec<u8>>,
 }

-impl<W: io::Write> Out<W> {
+impl Out {
    /// Create a new Out that writes to the wtr given.
-    pub fn new(wtr: W) -> Out<W> {
+    pub fn new(color: bool) -> Out {
        Out {
-            wtr: io::BufWriter::new(wtr),
+            term: term_stdout(color),
            printed: false,
            file_separator: None,
        }
@@ -26,22 +62,422 @@ impl<W: io::Write> Out<W> {
    /// By default, no separator is printed.
    ///
    /// If sep is empty, then no file separator is printed.
-    pub fn file_separator(mut self, sep: Vec<u8>) -> Out<W> {
+    pub fn file_separator(mut self, sep: Vec<u8>) -> Out {
        self.file_separator = Some(sep);
        self
    }

    /// Write the search results of a single file to the underlying wtr and
    /// flush wtr.
-    pub fn write(&mut self, buf: &[u8]) {
+    pub fn write(&mut self, buf: &OutBuffer) {
        if let Some(ref sep) = self.file_separator {
            if self.printed {
-                let _ = self.wtr.write_all(sep);
-                let _ = self.wtr.write_all(b"\n");
+                let _ = self.term.write_all(sep);
+                let _ = self.term.write_all(b"\n");
            }
        }
-        let _ = self.wtr.write_all(buf);
-        let _ = self.wtr.flush();
+        match *buf {
+            OutBuffer::Colored(ref tt) => {
+                let _ = self.term.write_all(tt.get_ref());
+            }
+            OutBuffer::Windows(ref w) => {
+                w.print_stdout(&mut self.term);
+            }
+            OutBuffer::NoColor(ref buf) => {
+                let _ = self.term.write_all(buf);
+            }
+        }
+        let _ = self.term.flush();
        self.printed = true;
    }
 }
+
+/// OutBuffer corresponds to the final output buffer for search results. All
+/// search results are written to a buffer and then a buffer is flushed to
+/// stdout only after the full search has completed.
+#[derive(Clone, Debug)]
+pub enum OutBuffer {
+    Colored(TerminfoTerminal<Vec<u8>>),
+    Windows(WindowsBuffer),
+    NoColor(Vec<u8>),
+}
+
+#[derive(Clone, Debug)]
+pub struct WindowsBuffer {
+    buf: Vec<u8>,
+    pos: usize,
+    colors: Vec<WindowsColor>,
+}
+
+#[derive(Clone, Debug)]
+pub struct WindowsColor {
+    pos: usize,
+    opt: WindowsOption,
+}
+
+#[derive(Clone, Debug)]
+pub enum WindowsOption {
+    Foreground(Color),
+    Background(Color),
+    Reset,
+}
+
+lazy_static! {
+    static ref TERMINFO: Option<Arc<TermInfo>> = {
+        match TermInfo::from_env() {
+            Ok(info) => Some(Arc::new(info)),
+            Err(err) => {
+                debug!("error loading terminfo for coloring: {}", err);
+                None
+            }
+        }
+    };
+}
+
+impl OutBuffer {
+    /// Create a new output buffer.
+    ///
+    /// When color is true, the buffer will attempt to support coloring.
+    pub fn new(color: bool) -> OutBuffer {
+        // If we want color, build a TerminfoTerminal and see if the current
+        // environment supports coloring. If not, bail with NoColor. To avoid
+        // losing our writer (ownership), do this the long way.
+        if !color {
+            return OutBuffer::NoColor(vec![]);
+        }
+        if cfg!(windows) {
+            return OutBuffer::Windows(WindowsBuffer {
+                buf: vec![],
+                pos: 0,
+                colors: vec![]
+            });
+        }
+        if TERMINFO.is_none() {
+            return OutBuffer::NoColor(vec![]);
+        }
+        let info = TERMINFO.clone().unwrap();
+        let tt = TerminfoTerminal::new_with_terminfo(vec![], info);
+        if !tt.supports_color() {
+            debug!("environment doesn't support coloring");
+            return OutBuffer::NoColor(tt.into_inner());
+        }
+        OutBuffer::Colored(tt)
+    }
+
+    /// Clear the give buffer of all search results such that it is reusable
+    /// in another search.
+    pub fn clear(&mut self) {
+        match *self {
+            OutBuffer::Colored(ref mut tt) => {
+                tt.get_mut().clear();
+            }
+            OutBuffer::Windows(ref mut win) => {
+                win.buf.clear();
+                win.colors.clear();
+                win.pos = 0;
+            }
+            OutBuffer::NoColor(ref mut buf) => {
+                buf.clear();
+            }
+        }
+    }
+
+    fn map_result<F, G>(
+        &mut self,
+        mut f: F,
+        mut g: G,
+    ) -> term::Result<()>
+    where F: FnMut(&mut TerminfoTerminal<Vec<u8>>) -> term::Result<()>,
+          G: FnMut(&mut WindowsBuffer) -> term::Result<()> {
+        match *self {
+            OutBuffer::Colored(ref mut w) => f(w),
+            OutBuffer::Windows(ref mut w) => g(w),
+            OutBuffer::NoColor(_) => Err(term::Error::NotSupported),
+        }
+    }
+
+    fn map_bool<F, G>(
+        &self,
+        mut f: F,
+        mut g: G,
+    ) -> bool
+    where F: FnMut(&TerminfoTerminal<Vec<u8>>) -> bool,
+          G: FnMut(&WindowsBuffer) -> bool {
+        match *self {
+            OutBuffer::Colored(ref w) => f(w),
+            OutBuffer::Windows(ref w) => g(w),
+            OutBuffer::NoColor(_) => false,
+        }
+    }
+}
+
+impl io::Write for OutBuffer {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        match *self {
+            OutBuffer::Colored(ref mut w) => w.write(buf),
+            OutBuffer::Windows(ref mut w) => w.write(buf),
+            OutBuffer::NoColor(ref mut w) => w.write(buf),
+        }
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        Ok(())
+    }
+}
+
+impl term::Terminal for OutBuffer {
+    type Output = Vec<u8>;
+
+    fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
+        self.map_result(|w| w.fg(fg), |w| w.fg(fg))
+    }
+
+    fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
+        self.map_result(|w| w.bg(bg), |w| w.bg(bg))
+    }
+
+    fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
+        self.map_result(|w| w.attr(attr), |w| w.attr(attr))
+    }
+
+    fn supports_attr(&self, attr: term::Attr) -> bool {
+        self.map_bool(|w| w.supports_attr(attr), |w| w.supports_attr(attr))
+    }
+
+    fn reset(&mut self) -> term::Result<()> {
+        self.map_result(|w| w.reset(), |w| w.reset())
+    }
+
+    fn supports_reset(&self) -> bool {
+        self.map_bool(|w| w.supports_reset(), |w| w.supports_reset())
+    }
+
+    fn supports_color(&self) -> bool {
+        self.map_bool(|w| w.supports_color(), |w| w.supports_color())
+    }
+
+    fn cursor_up(&mut self) -> term::Result<()> {
+        self.map_result(|w| w.cursor_up(), |w| w.cursor_up())
+    }
+
+    fn delete_line(&mut self) -> term::Result<()> {
+        self.map_result(|w| w.delete_line(), |w| w.delete_line())
+    }
+
+    fn carriage_return(&mut self) -> term::Result<()> {
+        self.map_result(|w| w.carriage_return(), |w| w.carriage_return())
+    }
+
+    fn get_ref(&self) -> &Vec<u8> {
+        match *self {
+            OutBuffer::Colored(ref w) => w.get_ref(),
+            OutBuffer::Windows(ref w) => w.get_ref(),
+            OutBuffer::NoColor(ref w) => w,
+        }
+    }
+
+    fn get_mut(&mut self) -> &mut Vec<u8> {
+        match *self {
+            OutBuffer::Colored(ref mut w) => w.get_mut(),
+            OutBuffer::Windows(ref mut w) => w.get_mut(),
+            OutBuffer::NoColor(ref mut w) => w,
+        }
+    }
+
+    fn into_inner(self) -> Vec<u8> {
+        match self {
+            OutBuffer::Colored(w) => w.into_inner(),
+            OutBuffer::Windows(w) => w.into_inner(),
+            OutBuffer::NoColor(w) => w,
+        }
+    }
+}
+
+impl WindowsBuffer {
+    fn push(&mut self, opt: WindowsOption) {
+        let pos = self.pos;
+        self.colors.push(WindowsColor { pos: pos, opt: opt });
+    }
+}
+
+impl WindowsBuffer {
+    /// Print the contents to the given terminal.
+    pub fn print_stdout(&self, tt: &mut StdoutTerminal) {
+        if !tt.supports_color() {
+            let _ = tt.write_all(&self.buf);
+            let _ = tt.flush();
+            return;
+        }
+        let mut last = 0;
+        for col in &self.colors {
+            let _ = tt.write_all(&self.buf[last..col.pos]);
+            match col.opt {
+                WindowsOption::Foreground(c) => {
+                    let _ = tt.fg(c);
+                }
+                WindowsOption::Background(c) => {
+                    let _ = tt.bg(c);
+                }
+                WindowsOption::Reset => {
+                    let _ = tt.reset();
+                }
+            }
+            last = col.pos;
+        }
+        let _ = tt.write_all(&self.buf[last..]);
+        let _ = tt.flush();
+    }
+}
+
+impl io::Write for WindowsBuffer {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        let n = try!(self.buf.write(buf));
+        self.pos += n;
+        Ok(n)
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        Ok(())
+    }
+}
+
+impl term::Terminal for WindowsBuffer {
+    type Output = Vec<u8>;
+
+    fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
+        self.push(WindowsOption::Foreground(fg));
+        Ok(())
+    }
+
+    fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
+        self.push(WindowsOption::Background(bg));
+        Ok(())
+    }
+
+    fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
+        Err(term::Error::NotSupported)
+    }
+
+    fn supports_attr(&self, attr: term::Attr) -> bool {
+        false
+    }
+
+    fn reset(&mut self) -> term::Result<()> {
+        self.push(WindowsOption::Reset);
+        Ok(())
+    }
+
+    fn supports_reset(&self) -> bool {
+        true
+    }
+
+    fn supports_color(&self) -> bool {
+        true
+    }
+
+    fn cursor_up(&mut self) -> term::Result<()> {
+        Err(term::Error::NotSupported)
+    }
+
+    fn delete_line(&mut self) -> term::Result<()> {
+        Err(term::Error::NotSupported)
+    }
+
+    fn carriage_return(&mut self) -> term::Result<()> {
+        Err(term::Error::NotSupported)
+    }
+
+    fn get_ref(&self) -> &Vec<u8> {
+        &self.buf
+    }
+
+    fn get_mut(&mut self) -> &mut Vec<u8> {
+        &mut self.buf
+    }
+
+    fn into_inner(self) -> Vec<u8> {
+        self.buf
+    }
+}
+
+/// NoColorTerminal implements Terminal, but supports no coloring.
+///
+/// Its useful when an API requires a Terminal, but coloring isn't needed.
+pub struct NoColorTerminal<W> {
+    wtr: W,
+}
+
+impl<W: Send + io::Write> NoColorTerminal<W> {
+    /// Wrap the given writer in a Terminal interface.
+    pub fn new(wtr: W) -> NoColorTerminal<W> {
+        NoColorTerminal {
+            wtr: wtr,
+        }
+    }
+}
+
+impl<W: Send + io::Write> io::Write for NoColorTerminal<W> {
+    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
+        self.wtr.write(buf)
+    }
+
+    fn flush(&mut self) -> io::Result<()> {
+        self.wtr.flush()
+    }
+}
+
+impl<W: Send + io::Write> term::Terminal for NoColorTerminal<W> {
+    type Output = W;
+
+    fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
+        Err(term::Error::NotSupported)
+    }
+
+    fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
+        Err(term::Error::NotSupported)
+    }
+
+    fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
+        Err(term::Error::NotSupported)
+    }
+
+    fn supports_attr(&self, attr: term::Attr) -> bool {
+        false
+    }
+
+    fn reset(&mut self) -> term::Result<()> {
+        Err(term::Error::NotSupported)
+    }
+
+    fn supports_reset(&self) -> bool {
+        false
+    }
+
+    fn supports_color(&self) -> bool {
+        false
+    }
+
+    fn cursor_up(&mut self) -> term::Result<()> {
+        Err(term::Error::NotSupported)
+    }
+
+    fn delete_line(&mut self) -> term::Result<()> {
+        Err(term::Error::NotSupported)
+    }
+
+    fn carriage_return(&mut self) -> term::Result<()> {
+        Err(term::Error::NotSupported)
+    }
+
+    fn get_ref(&self) -> &W {
+        &self.wtr
+    }
+
+    fn get_mut(&mut self) -> &mut W {
+        &mut self.wtr
+    }
+
+    fn into_inner(self) -> W {
+        self.wtr
+    }
+}
--- a/src/printer.rs
+++ b/src/printer.rs
@@ -1,17 +1,11 @@
-use std::io::{self, Write};
 use std::path::Path;
-use std::sync::Arc;

 use regex::bytes::Regex;
-use term::{self, Terminal};
-use term::color::*;
-use term::terminfo::TermInfo;
+use term::{Attr, Terminal};
+use term::color;

-use terminal::TerminfoTerminal;
 use types::FileTypeDef;

-use self::Writer::*;
-
 /// Printer encapsulates all output logic for searching.
 ///
 /// Note that we currently ignore all write errors. It's probably worthwhile
@@ -19,9 +13,11 @@ use self::Writer::*;
 /// writes to memory, neither of which commonly fail.
 pub struct Printer<W> {
    /// The underlying writer.
-    wtr: Writer<W>,
+    wtr: W,
    /// Whether anything has been printed to wtr yet.
    has_printed: bool,
+    /// Whether to show column numbers for the first match or not.
+    column: bool,
    /// The string to use to separate non-contiguous runs of context lines.
    context_separator: Vec<u8>,
    /// The end-of-line terminator used by the printer. In general, eols are
@@ -40,14 +36,13 @@ pub struct Printer<W> {
    with_filename: bool,
 }

-impl<W: Send + io::Write> Printer<W> {
+impl<W: Send + Terminal> Printer<W> {
    /// Create a new printer that writes to wtr.
-    ///
-    /// `color` should be true if the printer should try to use coloring.
-    pub fn new(wtr: W, color: bool) -> Printer<W> {
+    pub fn new(wtr: W) -> Printer<W> {
        Printer {
-            wtr: Writer::new(wtr, color),
+            wtr: wtr,
            has_printed: false,
+            column: false,
            context_separator: "--".to_string().into_bytes(),
            eol: b'\n',
            heading: false,
@@ -57,6 +52,13 @@ impl<W: Send + io::Write> Printer<W> {
        }
    }

+    /// When set, column numbers will be printed for the first match on each
+    /// line.
+    pub fn column(mut self, yes: bool) -> Printer<W> {
+        self.column = yes;
+        self
+    }
+
    /// Set the context separator. The default is `--`.
    pub fn context_separator(mut self, sep: Vec<u8>) -> Printer<W> {
        self.context_separator = sep;
@@ -107,7 +109,7 @@ impl<W: Send + io::Write> Printer<W> {
    /// Flushes the underlying writer and returns it.
    pub fn into_inner(mut self) -> W {
        let _ = self.wtr.flush();
-        self.wtr.into_inner()
+        self.wtr
    }

    /// Prints a type definition.
@@ -173,6 +175,11 @@ impl<W: Send + io::Write> Printer<W> {
        if let Some(line_number) = line_number {
            self.line_number(line_number, b':');
        }
+        if self.column {
+            let c = re.find(&buf[start..end]).map(|(s, _)| s + 1).unwrap_or(0);
+            self.write(c.to_string().as_bytes());
+            self.write(b":");
+        }
        if self.replace.is_some() {
            let line = re.replace_all(
                &buf[start..end], &**self.replace.as_ref().unwrap());
@@ -186,15 +193,15 @@ impl<W: Send + io::Write> Printer<W> {
    }

    pub fn write_match(&mut self, re: &Regex, buf: &[u8]) {
-        if !self.wtr.is_color() {
+        if !self.wtr.supports_color() {
            self.write(buf);
            return;
        }
        let mut last_written = 0;
        for (s, e) in re.find_iter(buf) {
            self.write(&buf[last_written..s]);
-            let _ = self.wtr.fg(BRIGHT_RED);
-            let _ = self.wtr.attr(term::Attr::Bold);
+            let _ = self.wtr.fg(color::BRIGHT_RED);
+            let _ = self.wtr.attr(Attr::Bold);
            self.write(&buf[s..e]);
            let _ = self.wtr.reset();
            last_written = e;
@@ -226,23 +233,24 @@ impl<W: Send + io::Write> Printer<W> {
    }

    fn write_heading<P: AsRef<Path>>(&mut self, path: P) {
-        if self.wtr.is_color() {
-            let _ = self.wtr.fg(GREEN);
+        if self.wtr.supports_color() {
+            let _ = self.wtr.fg(color::BRIGHT_GREEN);
+            let _ = self.wtr.attr(Attr::Bold);
        }
        self.write(path.as_ref().to_string_lossy().as_bytes());
        self.write_eol();
-        if self.wtr.is_color() {
+        if self.wtr.supports_color() {
            let _ = self.wtr.reset();
        }
    }

    fn line_number(&mut self, n: u64, sep: u8) {
-        if self.wtr.is_color() {
-            let _ = self.wtr.fg(YELLOW);
-            let _ = self.wtr.attr(term::Attr::Bold);
+        if self.wtr.supports_color() {
+            let _ = self.wtr.fg(color::BRIGHT_BLUE);
+            let _ = self.wtr.attr(Attr::Bold);
        }
        self.write(n.to_string().as_bytes());
-        if self.wtr.is_color() {
+        if self.wtr.supports_color() {
            let _ = self.wtr.reset();
        }
        self.write(&[sep]);
@@ -261,148 +269,3 @@ impl<W: Send + io::Write> Printer<W> {
        self.write(&[eol]);
    }
 }
-
-enum Writer<W> {
-    Colored(TerminfoTerminal<W>),
-    NoColor(W),
-}
-
-lazy_static! {
-    static ref TERMINFO: Option<Arc<TermInfo>> = {
-        match term::terminfo::TermInfo::from_env() {
-            Ok(info) => Some(Arc::new(info)),
-            Err(err) => {
-                debug!("error loading terminfo for coloring: {}", err);
-                None
-            }
-        }
-    };
-}
-
-impl<W: Send + io::Write> Writer<W> {
-    fn new(wtr: W, color: bool) -> Writer<W> {
-        // If we want color, build a TerminfoTerminal and see if the current
-        // environment supports coloring. If not, bail with NoColor. To avoid
-        // losing our writer (ownership), do this the long way.
-        if !color || TERMINFO.is_none() {
-            return NoColor(wtr);
-        }
-        let info = TERMINFO.clone().unwrap();
-        let tt = TerminfoTerminal::new_with_terminfo(wtr, info);
-        if !tt.supports_color() {
-            debug!("environment doesn't support coloring");
-            return NoColor(tt.into_inner());
-        }
-        Colored(tt)
-    }
-
-    fn is_color(&self) -> bool {
-        match *self {
-            Colored(_) => true,
-            NoColor(_) => false,
-        }
-    }
-
-    fn map_result<F>(
-        &mut self,
-        mut f: F,
-    ) -> term::Result<()>
-    where F: FnMut(&mut TerminfoTerminal<W>) -> term::Result<()> {
-        match *self {
-            Colored(ref mut w) => f(w),
-            NoColor(_) => Err(term::Error::NotSupported),
-        }
-    }
-
-    fn map_bool<F>(
-        &self,
-        mut f: F,
-    ) -> bool
-    where F: FnMut(&TerminfoTerminal<W>) -> bool {
-        match *self {
-            Colored(ref w) => f(w),
-            NoColor(_) => false,
-        }
-    }
-}
-
-impl<W: Send + io::Write> io::Write for Writer<W> {
-    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
-        match *self {
-            Colored(ref mut w) => w.write(buf),
-            NoColor(ref mut w) => w.write(buf),
-        }
-    }
-
-    fn flush(&mut self) -> io::Result<()> {
-        match *self {
-            Colored(ref mut w) => w.flush(),
-            NoColor(ref mut w) => w.flush(),
-        }
-    }
-}
-
-impl<W: Send + io::Write> term::Terminal for Writer<W> {
-    type Output = W;
-
-    fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
-        self.map_result(|w| w.fg(fg))
-    }
-
-    fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
-        self.map_result(|w| w.bg(bg))
-    }
-
-    fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
-        self.map_result(|w| w.attr(attr))
-    }
-
-    fn supports_attr(&self, attr: term::Attr) -> bool {
-        self.map_bool(|w| w.supports_attr(attr))
-    }
-
-    fn reset(&mut self) -> term::Result<()> {
-        self.map_result(|w| w.reset())
-    }
-
-    fn supports_reset(&self) -> bool {
-        self.map_bool(|w| w.supports_reset())
-    }
-
-    fn supports_color(&self) -> bool {
-        self.map_bool(|w| w.supports_color())
-    }
-
-    fn cursor_up(&mut self) -> term::Result<()> {
-        self.map_result(|w| w.cursor_up())
-    }
-
-    fn delete_line(&mut self) -> term::Result<()> {
-        self.map_result(|w| w.delete_line())
-    }
-
-    fn carriage_return(&mut self) -> term::Result<()> {
-        self.map_result(|w| w.carriage_return())
-    }
-
-    fn get_ref(&self) -> &W {
-        match *self {
-            Colored(ref w) => w.get_ref(),
-            NoColor(ref w) => w,
-        }
-    }
-
-    fn get_mut(&mut self) -> &mut W {
-        match *self {
-            Colored(ref mut w) => w.get_mut(),
-            NoColor(ref mut w) => w,
-        }
-    }
-
-    fn into_inner(self) -> W {
-        match self {
-            Colored(w) => w.into_inner(),
-            NoColor(w) => w,
-        }
-    }
-}
--- a/src/search_buffer.rs
+++ b/src/search_buffer.rs
@@ -0,0 +1,294 @@
+/*!
+The search_buffer module is responsible for searching a single file all in a
+single buffer. Typically, the source of the buffer is a memory map. This can
+be useful for when memory maps are faster than streaming search.
+
+Note that this module doesn't quite support everything that search_stream does.
+Notably, showing contexts.
+*/
+use std::cmp;
+use std::path::Path;
+
+use grep::Grep;
+use term::Terminal;
+
+use printer::Printer;
+use search_stream::{IterLines, Options, count_lines, is_binary};
+
+pub struct BufferSearcher<'a, W: 'a> {
+    opts: Options,
+    printer: &'a mut Printer<W>,
+    grep: &'a Grep,
+    path: &'a Path,
+    buf: &'a [u8],
+    match_count: u64,
+    line_count: Option<u64>,
+    last_line: usize,
+}
+
+impl<'a, W: Send + Terminal> BufferSearcher<'a, W> {
+    pub fn new(
+        printer: &'a mut Printer<W>,
+        grep: &'a Grep,
+        path: &'a Path,
+        buf: &'a [u8],
+    ) -> BufferSearcher<'a, W> {
+        BufferSearcher {
+            opts: Options::default(),
+            printer: printer,
+            grep: grep,
+            path: path,
+            buf: buf,
+            match_count: 0,
+            line_count: None,
+            last_line: 0,
+        }
+    }
+
+    /// If enabled, searching will print a count instead of each match.
+    ///
+    /// Disabled by default.
+    pub fn count(mut self, yes: bool) -> Self {
+        self.opts.count = yes;
+        self
+    }
+
+    /// Set the end-of-line byte used by this searcher.
+    pub fn eol(mut self, eol: u8) -> Self {
+        self.opts.eol = eol;
+        self
+    }
+
+    /// If enabled, matching is inverted so that lines that *don't* match the
+    /// given pattern are treated as matches.
+    pub fn invert_match(mut self, yes: bool) -> Self {
+        self.opts.invert_match = yes;
+        self
+    }
+
+    /// If enabled, compute line numbers and prefix each line of output with
+    /// them.
+    pub fn line_number(mut self, yes: bool) -> Self {
+        self.opts.line_number = yes;
+        self
+    }
+
+    /// If enabled, search binary files as if they were text.
+    pub fn text(mut self, yes: bool) -> Self {
+        self.opts.text = yes;
+        self
+    }
+
+    #[inline(never)]
+    pub fn run(mut self) -> u64 {
+        let binary_upto = cmp::min(4096, self.buf.len());
+        if !self.opts.text && is_binary(&self.buf[..binary_upto]) {
+            return 0;
+        }
+
+        self.match_count = 0;
+        self.line_count = if self.opts.line_number { Some(0) } else { None };
+        let mut last_end = 0;
+        for m in self.grep.iter(self.buf) {
+            if self.opts.invert_match {
+                self.print_inverted_matches(last_end, m.start());
+            } else {
+                self.print_match(m.start(), m.end());
+            }
+            last_end = m.end();
+        }
+        if self.opts.invert_match {
+            let upto = self.buf.len();
+            self.print_inverted_matches(last_end, upto);
+        }
+        if self.opts.count && self.match_count > 0 {
+            self.printer.path_count(self.path, self.match_count);
+        }
+        self.match_count
+    }
+
+    #[inline(always)]
+    pub fn print_match(&mut self, start: usize, end: usize) {
+        self.match_count += 1;
+        if self.opts.count {
+            return;
+        }
+        self.count_lines(start);
+        self.add_line(end);
+        self.printer.matched(
+            self.grep.regex(), self.path, self.buf,
+            start, end, self.line_count);
+    }
+
+    #[inline(always)]
+    fn print_inverted_matches(&mut self, start: usize, end: usize) {
+        debug_assert!(self.opts.invert_match);
+        let mut it = IterLines::new(self.opts.eol, start);
+        while let Some((s, e)) = it.next(&self.buf[..end]) {
+            self.print_match(s, e);
+        }
+    }
+
+    #[inline(always)]
+    fn count_lines(&mut self, upto: usize) {
+        if let Some(ref mut line_count) = self.line_count {
+            *line_count += count_lines(
+                &self.buf[self.last_line..upto], self.opts.eol);
+            self.last_line = upto;
+        }
+    }
+
+    #[inline(always)]
+    fn add_line(&mut self, line_end: usize) {
+        if let Some(ref mut line_count) = self.line_count {
+            *line_count += 1;
+            self.last_line = line_end;
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::path::Path;
+
+    use grep::{Grep, GrepBuilder};
+    use term::Terminal;
+
+    use out::OutBuffer;
+    use printer::Printer;
+
+    use super::BufferSearcher;
+
+    const SHERLOCK: &'static str = "\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+Holmeses, success in the province of detective work must always
+be, to a very large extent, the result of luck. Sherlock Holmes
+can extract a clew from a wisp of straw or a flake of cigar ash;
+but Doctor Watson has to have it taken out for him and dusted,
+and exhibited clearly, with a label attached.\
+";
+
+    const CODE: &'static str = "\
+extern crate snap;
+
+use std::io;
+
+fn main() {
+    let stdin = io::stdin();
+    let stdout = io::stdout();
+
+    // Wrap the stdin reader in a Snappy reader.
+    let mut rdr = snap::Reader::new(stdin.lock());
+    let mut wtr = stdout.lock();
+    io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
+}
+";
+
+    fn matcher(pat: &str) -> Grep {
+        GrepBuilder::new(pat).build().unwrap()
+    }
+
+    fn test_path() -> &'static Path {
+        &Path::new("/baz.rs")
+    }
+
+    type TestSearcher<'a> = BufferSearcher<'a, OutBuffer>;
+
+    fn search<F: FnMut(TestSearcher) -> TestSearcher>(
+        pat: &str,
+        haystack: &str,
+        mut map: F,
+    ) -> (u64, String) {
+        let outbuf = OutBuffer::NoColor(vec![]);
+        let mut pp = Printer::new(outbuf).with_filename(true);
+        let grep = GrepBuilder::new(pat).build().unwrap();
+        let count = {
+            let searcher = BufferSearcher::new(
+                &mut pp, &grep, test_path(), haystack.as_bytes());
+            map(searcher).run()
+        };
+        (count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
+    }
+
+    #[test]
+    fn basic_search() {
+        let (count, out) = search("Sherlock", SHERLOCK, |s|s);
+        assert_eq!(2, count);
+        assert_eq!(out, "\
+/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
+/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes
+");
+    }
+
+    #[test]
+    fn binary() {
+        let text = "Sherlock\n\x00Holmes\n";
+        let (count, out) = search("Sherlock|Holmes", text, |s|s);
+        assert_eq!(0, count);
+        assert_eq!(out, "");
+    }
+
+
+    #[test]
+    fn binary_text() {
+        let text = "Sherlock\n\x00Holmes\n";
+        let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true));
+        assert_eq!(2, count);
+        assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n");
+    }
+
+    #[test]
+    fn line_numbers() {
+        let (count, out) = search(
+            "Sherlock", SHERLOCK, |s| s.line_number(true));
+        assert_eq!(2, count);
+        assert_eq!(out, "\
+/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
+/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes
+");
+    }
+
+    #[test]
+    fn count() {
+        let (count, out) = search(
+            "Sherlock", SHERLOCK, |s| s.count(true));
+        assert_eq!(2, count);
+        assert_eq!(out, "/baz.rs:2\n");
+    }
+
+    #[test]
+    fn invert_match() {
+        let (count, out) = search(
+            "Sherlock", SHERLOCK, |s| s.invert_match(true));
+        assert_eq!(4, count);
+        assert_eq!(out, "\
+/baz.rs:Holmeses, success in the province of detective work must always
+/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash;
+/baz.rs:but Doctor Watson has to have it taken out for him and dusted,
+/baz.rs:and exhibited clearly, with a label attached.
+");
+    }
+
+    #[test]
+    fn invert_match_line_numbers() {
+        let (count, out) = search("Sherlock", SHERLOCK, |s| {
+            s.invert_match(true).line_number(true)
+        });
+        assert_eq!(4, count);
+        assert_eq!(out, "\
+/baz.rs:2:Holmeses, success in the province of detective work must always
+/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash;
+/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted,
+/baz.rs:6:and exhibited clearly, with a label attached.
+");
+    }
+
+    #[test]
+    fn invert_match_count() {
+        let (count, out) = search("Sherlock", SHERLOCK, |s| {
+            s.invert_match(true).count(true)
+        });
+        assert_eq!(4, count);
+        assert_eq!(out, "/baz.rs:4\n");
+    }
+}
--- a/src/search_stream.rs
+++ b/src/search_stream.rs
@@ -1,6 +1,7 @@
 /*!
-The search module is responsible for searching a single file and printing
-matches.
+The search_stream module is responsible for searching a single file and
+printing matches. In particular, it searches the file in a streaming fashion
+using `read` calls and a (roughly) fixed size buffer.
 */

 use std::cmp;
@@ -11,6 +12,7 @@ use std::path::{Path, PathBuf};

 use grep::{Grep, Match};
 use memchr::{memchr, memrchr};
+use term::Terminal;

 use printer::Printer;

@@ -74,14 +76,14 @@ pub struct Searcher<'a, R, W: 'a> {

 /// Options for configuring search.
 #[derive(Clone)]
-struct Options {
-    after_context: usize,
-    before_context: usize,
-    count: bool,
-    eol: u8,
-    invert_match: bool,
-    line_number: bool,
-    text: bool,
+pub struct Options {
+    pub after_context: usize,
+    pub before_context: usize,
+    pub count: bool,
+    pub eol: u8,
+    pub invert_match: bool,
+    pub line_number: bool,
+    pub text: bool,
 }

 impl Default for Options {
@@ -98,7 +100,7 @@ impl Default for Options {
    }
 }

-impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {
+impl<'a, R: io::Read, W: Send + Terminal> Searcher<'a, R, W> {
    /// Create a new searcher.
    ///
    /// `inp` is a reusable input buffer that is used as scratch space by this
@@ -219,14 +221,11 @@ impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {
                        self.print_inverted_matches(upto);
                    }
                } else if matched {
-                    self.match_count += 1;
-                    if !self.opts.count {
-                        let start = self.last_match.start();
-                        let end = self.last_match.end();
-                        self.print_after_context(start);
-                        self.print_before_context(start);
-                        self.print_match(start, end);
-                    }
+                    let start = self.last_match.start();
+                    let end = self.last_match.end();
+                    self.print_after_context(start);
+                    self.print_before_context(start);
+                    self.print_match(start, end);
                }
                if matched {
                    self.inp.pos = self.last_match.end();
@@ -275,11 +274,8 @@ impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {
        debug_assert!(self.opts.invert_match);
        let mut it = IterLines::new(self.opts.eol, self.inp.pos);
        while let Some((start, end)) = it.next(&self.inp.buf[..upto]) {
-            if !self.opts.count {
-                self.print_match(start, end);
-            }
+            self.print_match(start, end);
            self.inp.pos = end;
-            self.match_count += 1;
        }
    }

@@ -325,11 +321,15 @@ impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {

    #[inline(always)]
    fn print_match(&mut self, start: usize, end: usize) {
+        self.match_count += 1;
+        if self.opts.count {
+            return;
+        }
        self.print_separator(start);
        self.count_lines(start);
        self.add_line(end);
        self.printer.matched(
-            self.grep.regex(), &self.path,
+            self.grep.regex(), self.path,
            &self.inp.buf, start, end, self.line_count);
        self.last_printed = end;
        self.after_context_remaining = self.opts.after_context;
@@ -535,7 +535,7 @@ impl InputBuffer {
 ///
 /// Note that this may return both false positives and false negatives.
 #[inline(always)]
-fn is_binary(buf: &[u8]) -> bool {
+pub fn is_binary(buf: &[u8]) -> bool {
    if buf.len() >= 4 && &buf[0..4] == b"%PDF" {
        return true;
    }
@@ -543,13 +543,88 @@ fn is_binary(buf: &[u8]) -> bool {
 }

 /// Count the number of lines in the given buffer.
-#[inline(always)]
-fn count_lines(mut buf: &[u8], eol: u8) -> u64 {
-    let mut count = 0;
-    while let Some(pos) = memchr(eol, buf) {
-        count += 1;
-        buf = &buf[pos + 1..];
+#[inline(never)]
+
+#[inline(never)]
+pub fn count_lines(buf: &[u8], eol: u8) -> u64 {
+    // This was adapted from code in the memchr crate. The specific benefit
+    // here is that we can avoid a branch in the inner loop because all we're
+    // doing is counting.
+
+    // The technique to count EOL bytes was adapted from:
+    // http://bits.stephan-brumme.com/null.html
+    const LO_U64: u64 = 0x0101010101010101;
+    const HI_U64: u64 = 0x8080808080808080;
+
+    // use truncation
+    const LO_USIZE: usize = LO_U64 as usize;
+    const HI_USIZE: usize = HI_U64 as usize;
+
+    #[cfg(target_pointer_width = "32")]
+    const USIZE_BYTES: usize = 4;
+    #[cfg(target_pointer_width = "64")]
+    const USIZE_BYTES: usize = 8;
+
+    fn count_eol(eol: usize) -> u64 {
+        // Ideally, this would compile down to a POPCNT instruction, but
+        // it looks like you need to set RUSTFLAGS="-C target-cpu=native"
+        // (or target-feature=+popcnt) to get that to work. Bummer.
+        (eol.wrapping_sub(LO_USIZE) & !eol & HI_USIZE).count_ones() as u64
    }
+
+    #[cfg(target_pointer_width = "32")]
+    fn repeat_byte(b: u8) -> usize {
+        let mut rep = (b as usize) << 8 | b as usize;
+        rep = rep << 16 | rep;
+        rep
+    }
+
+    #[cfg(target_pointer_width = "64")]
+    fn repeat_byte(b: u8) -> usize {
+        let mut rep = (b as usize) << 8 | b as usize;
+        rep = rep << 16 | rep;
+        rep = rep << 32 | rep;
+        rep
+    }
+
+    fn count_lines_slow(mut buf: &[u8], eol: u8) -> u64 {
+        let mut count = 0;
+        while let Some(pos) = memchr(eol, buf) {
+            count += 1;
+            buf = &buf[pos + 1..];
+        }
+        count
+    }
+
+    let len = buf.len();
+    let ptr = buf.as_ptr();
+    let mut count = 0;
+
+    // Search up to an aligned boundary...
+    let align = (ptr as usize) & (USIZE_BYTES - 1);
+    let mut i = 0;
+    if align > 0 {
+        i = cmp::min(USIZE_BYTES - align, len);
+        count += count_lines_slow(&buf[..i], eol);
+    }
+
+    // ... and search the rest.
+    let repeated_eol = repeat_byte(eol);
+
+    if len >= 2 * USIZE_BYTES {
+        while i <= len - (2 * USIZE_BYTES) {
+            unsafe {
+                let u = *(ptr.offset(i as isize) as *const usize);
+                let v = *(ptr.offset((i + USIZE_BYTES) as isize)
+                          as *const usize);
+
+                count += count_eol(u ^ repeated_eol);
+                count += count_eol(v ^ repeated_eol);
+            }
+            i += USIZE_BYTES * 2;
+        }
+    }
+    count += count_lines_slow(&buf[i..], eol);
    count
 }

@@ -575,7 +650,7 @@ fn replace_buf(buf: &mut [u8], a: u8, b: u8) {
 /// advance over the positions of each line. We neglect that approach to avoid
 /// the borrow in the search code. (Because the borrow prevents composition
 /// through other mutable methods.)
-struct IterLines {
+pub struct IterLines {
    eol: u8,
    pos: usize,
 }
@@ -585,7 +660,7 @@ impl IterLines {
    ///
    /// The buffer is passed to the `next` method.
    #[inline(always)]
-    fn new(eol: u8, start: usize) -> IterLines {
+    pub fn new(eol: u8, start: usize) -> IterLines {
        IterLines {
            eol: eol,
            pos: start,
@@ -597,7 +672,7 @@ impl IterLines {
    ///
    /// The range returned includes the new line.
    #[inline(always)]
-    fn next(&mut self, buf: &[u8]) -> Option<(usize, usize)> {
+    pub fn next(&mut self, buf: &[u8]) -> Option<(usize, usize)> {
        match memchr(self.eol, &buf[self.pos..]) {
            None => {
                if self.pos < buf.len() {
@@ -689,13 +764,14 @@ mod tests {
    use std::path::Path;

    use grep::{Grep, GrepBuilder};
+    use term::Terminal;

+    use out::OutBuffer;
    use printer::Printer;

    use super::{InputBuffer, Searcher, start_of_previous_lines};

-    lazy_static! {
-        static ref SHERLOCK: &'static str = "\
+    const SHERLOCK: &'static str = "\
 For the Doctor Watsons of this world, as opposed to the Sherlock
 Holmeses, success in the province of detective work must always
 be, to a very large extent, the result of luck. Sherlock Holmes
@@ -703,7 +779,8 @@ can extract a clew from a wisp of straw or a flake of cigar ash;
 but Doctor Watson has to have it taken out for him and dusted,
 and exhibited clearly, with a label attached.\
 ";
-        static ref CODE: &'static str = "\
+
+    const CODE: &'static str = "\
 extern crate snap;

 use std::io;
@@ -718,7 +795,6 @@ fn main() {
    io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
 }
 ";
-    }

    fn hay(s: &str) -> io::Cursor<Vec<u8>> {
        io::Cursor::new(s.to_string().into_bytes())
@@ -732,7 +808,7 @@ fn main() {
        &Path::new("/baz.rs")
    }

-    type TestSearcher<'a> = Searcher<'a, io::Cursor<Vec<u8>>, Vec<u8>>;
+    type TestSearcher<'a> = Searcher<'a, io::Cursor<Vec<u8>>, OutBuffer>;

    fn search_smallcap<F: FnMut(TestSearcher) -> TestSearcher>(
        pat: &str,
@@ -740,14 +816,15 @@ fn main() {
        mut map: F,
    ) -> (u64, String) {
        let mut inp = InputBuffer::with_capacity(1);
-        let mut pp = Printer::new(vec![], false).with_filename(true);
+        let outbuf = OutBuffer::NoColor(vec![]);
+        let mut pp = Printer::new(outbuf).with_filename(true);
        let grep = GrepBuilder::new(pat).build().unwrap();
        let count = {
            let searcher = Searcher::new(
                &mut inp, &mut pp, &grep, test_path(), hay(haystack));
            map(searcher).run().unwrap()
        };
-        (count, String::from_utf8(pp.into_inner()).unwrap())
+        (count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
    }

    fn search<F: FnMut(TestSearcher) -> TestSearcher>(
@@ -756,14 +833,15 @@ fn main() {
        mut map: F,
    ) -> (u64, String) {
        let mut inp = InputBuffer::with_capacity(4096);
-        let mut pp = Printer::new(vec![], false).with_filename(true);
+        let outbuf = OutBuffer::NoColor(vec![]);
+        let mut pp = Printer::new(outbuf).with_filename(true);
        let grep = GrepBuilder::new(pat).build().unwrap();
        let count = {
            let searcher = Searcher::new(
                &mut inp, &mut pp, &grep, test_path(), hay(haystack));
            map(searcher).run().unwrap()
        };
-        (count, String::from_utf8(pp.into_inner()).unwrap())
+        (count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
    }

    #[test]
@@ -870,8 +948,8 @@ fn main() {
    }

    #[test]
-    fn basic_search() {
-        let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s|s);
+    fn basic_search1() {
+        let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s|s);
        assert_eq!(2, count);
        assert_eq!(out, "\
 /baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -887,7 +965,6 @@ fn main() {
        assert_eq!(out, "");
    }

-
    #[test]
    fn binary_text() {
        let text = "Sherlock\n\x00Holmes\n";
@@ -899,7 +976,7 @@ fn main() {
    #[test]
    fn line_numbers() {
        let (count, out) = search_smallcap(
-            "Sherlock", &*SHERLOCK, |s| s.line_number(true));
+            "Sherlock", SHERLOCK, |s| s.line_number(true));
        assert_eq!(2, count);
        assert_eq!(out, "\
 /baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -910,7 +987,7 @@ fn main() {
    #[test]
    fn count() {
        let (count, out) = search_smallcap(
-            "Sherlock", &*SHERLOCK, |s| s.count(true));
+            "Sherlock", SHERLOCK, |s| s.count(true));
        assert_eq!(2, count);
        assert_eq!(out, "/baz.rs:2\n");
    }
@@ -918,7 +995,7 @@ fn main() {
    #[test]
    fn invert_match() {
        let (count, out) = search_smallcap(
-            "Sherlock", &*SHERLOCK, |s| s.invert_match(true));
+            "Sherlock", SHERLOCK, |s| s.invert_match(true));
        assert_eq!(4, count);
        assert_eq!(out, "\
 /baz.rs:Holmeses, success in the province of detective work must always
@@ -930,7 +1007,7 @@ fn main() {

    #[test]
    fn invert_match_line_numbers() {
-        let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
+        let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
            s.invert_match(true).line_number(true)
        });
        assert_eq!(4, count);
@@ -944,7 +1021,7 @@ fn main() {

    #[test]
    fn invert_match_count() {
-        let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
+        let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
            s.invert_match(true).count(true)
        });
        assert_eq!(4, count);
@@ -953,7 +1030,7 @@ fn main() {

    #[test]
    fn before_context_one1() {
-        let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
+        let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
            s.line_number(true).before_context(1)
        });
        assert_eq!(2, count);
@@ -966,7 +1043,7 @@ fn main() {

    #[test]
    fn before_context_invert_one1() {
-        let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
+        let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
            s.line_number(true).before_context(1).invert_match(true)
        });
        assert_eq!(4, count);
@@ -982,7 +1059,7 @@ fn main() {

    #[test]
    fn before_context_invert_one2() {
-        let (count, out) = search_smallcap(" a ", &*SHERLOCK, |s| {
+        let (count, out) = search_smallcap(" a ", SHERLOCK, |s| {
            s.line_number(true).before_context(1).invert_match(true)
        });
        assert_eq!(3, count);
@@ -997,7 +1074,7 @@ fn main() {

    #[test]
    fn before_context_two1() {
-        let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
+        let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
            s.line_number(true).before_context(2)
        });
        assert_eq!(2, count);
@@ -1010,7 +1087,7 @@ fn main() {

    #[test]
    fn before_context_two2() {
-        let (count, out) = search_smallcap("dusted", &*SHERLOCK, |s| {
+        let (count, out) = search_smallcap("dusted", SHERLOCK, |s| {
            s.line_number(true).before_context(2)
        });
        assert_eq!(1, count);
@@ -1024,7 +1101,7 @@ fn main() {
    #[test]
    fn before_context_two3() {
        let (count, out) = search_smallcap(
-            "success|attached", &*SHERLOCK, |s| {
+            "success|attached", SHERLOCK, |s| {
                s.line_number(true).before_context(2)
            });
        assert_eq!(2, count);
@@ -1040,7 +1117,7 @@ fn main() {

    #[test]
    fn before_context_two4() {
-        let (count, out) = search("stdin", &*CODE, |s| {
+        let (count, out) = search("stdin", CODE, |s| {
            s.line_number(true).before_context(2)
        });
        assert_eq!(3, count);
@@ -1057,7 +1134,7 @@ fn main() {

    #[test]
    fn before_context_two5() {
-        let (count, out) = search("stdout", &*CODE, |s| {
+        let (count, out) = search("stdout", CODE, |s| {
            s.line_number(true).before_context(2)
        });
        assert_eq!(2, count);
@@ -1074,7 +1151,7 @@ fn main() {

    #[test]
    fn before_context_three1() {
-        let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
+        let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
                s.line_number(true).before_context(3)
            });
        assert_eq!(2, count);
@@ -1087,7 +1164,7 @@ fn main() {

    #[test]
    fn after_context_one1() {
-        let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
+        let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
            s.line_number(true).after_context(1)
        });
        assert_eq!(2, count);
@@ -1101,7 +1178,7 @@ fn main() {

    #[test]
    fn after_context_invert_one1() {
-        let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
+        let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
            s.line_number(true).after_context(1).invert_match(true)
        });
        assert_eq!(4, count);
@@ -1116,7 +1193,7 @@ fn main() {

    #[test]
    fn after_context_invert_one2() {
-        let (count, out) = search_smallcap(" a ", &*SHERLOCK, |s| {
+        let (count, out) = search_smallcap(" a ", SHERLOCK, |s| {
            s.line_number(true).after_context(1).invert_match(true)
        });
        assert_eq!(3, count);
@@ -1132,7 +1209,7 @@ fn main() {

    #[test]
    fn after_context_two1() {
-        let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
+        let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
            s.line_number(true).after_context(2)
        });
        assert_eq!(2, count);
@@ -1147,7 +1224,7 @@ fn main() {

    #[test]
    fn after_context_two2() {
-        let (count, out) = search_smallcap("dusted", &*SHERLOCK, |s| {
+        let (count, out) = search_smallcap("dusted", SHERLOCK, |s| {
            s.line_number(true).after_context(2)
        });
        assert_eq!(1, count);
@@ -1160,7 +1237,7 @@ fn main() {
    #[test]
    fn after_context_two3() {
        let (count, out) = search_smallcap(
-            "success|attached", &*SHERLOCK, |s| {
+            "success|attached", SHERLOCK, |s| {
                s.line_number(true).after_context(2)
            });
        assert_eq!(2, count);
@@ -1175,7 +1252,7 @@ fn main() {

    #[test]
    fn after_context_three1() {
-        let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
+        let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
            s.line_number(true).after_context(3)
        });
        assert_eq!(2, count);
@@ -1192,7 +1269,7 @@ fn main() {
    #[test]
    fn before_after_context_two1() {
        let (count, out) = search(
-            r"fn main|let mut rdr", &*CODE, |s| {
+            r"fn main|let mut rdr", CODE, |s| {
                s.line_number(true).after_context(2).before_context(2)
            });
        assert_eq!(2, count);
--- a/tests/hay.rs
+++ b/tests/hay.rs
@@ -0,0 +1,24 @@
+pub const SHERLOCK: &'static str = "\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+Holmeses, success in the province of detective work must always
+be, to a very large extent, the result of luck. Sherlock Holmes
+can extract a clew from a wisp of straw or a flake of cigar ash;
+but Doctor Watson has to have it taken out for him and dusted,
+and exhibited clearly, with a label attached.
+";
+
+pub const CODE: &'static str = "\
+extern crate snap;
+
+use std::io;
+
+fn main() {
+    let stdin = io::stdin();
+    let stdout = io::stdout();
+
+    // Wrap the stdin reader in a Snappy reader.
+    let mut rdr = snap::Reader::new(stdin.lock());
+    let mut wtr = stdout.lock();
+    io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
+}
+";
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -0,0 +1,577 @@
+/*!
+This module contains *integration* tests. Their purpose is to test the CLI
+interface. Namely, that passing a flag does what it says on the tin.
+
+Tests for more fine grained behavior (like the search or the globber) should be
+unit tests in their respective modules.
+*/
+
+#![allow(dead_code, unused_imports)]
+
+use std::process::Command;
+
+use workdir::WorkDir;
+
+mod hay;
+mod workdir;
+
+macro_rules! sherlock {
+    ($name:ident, $fun:expr) => {
+        sherlock!($name, "Sherlock", $fun);
+    };
+    ($name:ident, $query:expr, $fun:expr) => {
+        sherlock!($name, $query, "sherlock", $fun);
+    };
+    ($name:ident, $query:expr, $path:expr, $fun:expr) => {
+        #[test]
+        fn $name() {
+            let wd = WorkDir::new(stringify!($name));
+            wd.create("sherlock", hay::SHERLOCK);
+            let mut cmd = wd.command();
+            cmd.arg($query).arg($path);
+            $fun(wd, cmd);
+        }
+    };
+}
+
+sherlock!(single_file, |wd: WorkDir, mut cmd| {
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(dir, "Sherlock", ".", |wd: WorkDir, mut cmd| {
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
+sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(line_numbers, |wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-n");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+1:For the Doctor Watsons of this world, as opposed to the Sherlock
+3:be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(columns, |wd: WorkDir, mut cmd: Command| {
+    cmd.arg("--column");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+57:For the Doctor Watsons of this world, as opposed to the Sherlock
+49:be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(with_filename, |wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-H");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
+sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(with_heading, |wd: WorkDir, mut cmd: Command| {
+    // This forces the issue since --with-filename is disabled by default
+    // when searching one fil.e
+    cmd.arg("--with-filename").arg("--heading");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+sherlock
+For the Doctor Watsons of this world, as opposed to the Sherlock
+be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(with_heading_default, "Sherlock", ".",
+|wd: WorkDir, mut cmd: Command| {
+    // Search two or more and get --with-filename enabled by default.
+    // Use -j1 to get deterministic results.
+    wd.create("foo", "Sherlock Holmes lives on Baker Street.");
+    cmd.arg("-j1").arg("--heading");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected1 = "\
+foo
+Sherlock Holmes lives on Baker Street.
+
+sherlock
+For the Doctor Watsons of this world, as opposed to the Sherlock
+be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    let expected2 = "\
+sherlock
+For the Doctor Watsons of this world, as opposed to the Sherlock
+be, to a very large extent, the result of luck. Sherlock Holmes
+
+foo
+Sherlock Holmes lives on Baker Street.
+";
+    assert!(lines == expected1 || lines == expected2);
+});
+
+sherlock!(inverted, |wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-v");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+Holmeses, success in the province of detective work must always
+can extract a clew from a wisp of straw or a flake of cigar ash;
+but Doctor Watson has to have it taken out for him and dusted,
+and exhibited clearly, with a label attached.
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(inverted_line_numbers, |wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-n").arg("-v");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+2:Holmeses, success in the province of detective work must always
+4:can extract a clew from a wisp of straw or a flake of cigar ash;
+5:but Doctor Watson has to have it taken out for him and dusted,
+6:and exhibited clearly, with a label attached.
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(case_insensitive, "sherlock", |wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-i");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(word, "as", |wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-w");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(literal, "()", "file", |wd: WorkDir, mut cmd: Command| {
+    wd.create("file", "blib\n()\nblab\n");
+    cmd.arg("-Q");
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, "()\n");
+});
+
+sherlock!(quiet, |wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-q");
+    let lines: String = wd.stdout(&mut cmd);
+    assert!(lines.is_empty());
+});
+
+sherlock!(replace, |wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-r").arg("FooBar");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+For the Doctor Watsons of this world, as opposed to the FooBar
+be, to a very large extent, the result of luck. FooBar Holmes
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(replace_groups, "([A-Z][a-z]+) ([A-Z][a-z]+)",
+|wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-r").arg("$2, $1");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+For the Watsons, Doctor of this world, as opposed to the Sherlock
+be, to a very large extent, the result of luck. Holmes, Sherlock
+but Watson, Doctor has to have it taken out for him and dusted,
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(replace_named_groups, "(?P<first>[A-Z][a-z]+) (?P<last>[A-Z][a-z]+)",
+|wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-r").arg("$last, $first");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+For the Watsons, Doctor of this world, as opposed to the Sherlock
+be, to a very large extent, the result of luck. Holmes, Sherlock
+but Watson, Doctor has to have it taken out for him and dusted,
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(file_types, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.create("file.py", "Sherlock");
+    wd.create("file.rs", "Sherlock");
+    cmd.arg("-t").arg("rust");
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, "file.rs:Sherlock\n");
+});
+
+sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.remove("sherlock");
+    wd.create("file.py", "Sherlock");
+    wd.create("file.rs", "Sherlock");
+    cmd.arg("-T").arg("rust");
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, "file.py:Sherlock\n");
+});
+
+sherlock!(file_type_clear, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.create("file.py", "Sherlock");
+    wd.create("file.rs", "Sherlock");
+    cmd.arg("--type-clear").arg("rust").arg("-t").arg("rust");
+    wd.assert_err(&mut cmd);
+});
+
+sherlock!(file_type_add, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.create("file.py", "Sherlock");
+    wd.create("file.rs", "Sherlock");
+    wd.create("file.wat", "Sherlock");
+    cmd.arg("--type-add").arg("wat:*.wat").arg("-t").arg("wat");
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, "file.wat:Sherlock\n");
+});
+
+sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.create("file.py", "Sherlock");
+    wd.create("file.rs", "Sherlock");
+    cmd.arg("-g").arg("*.rs");
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, "file.rs:Sherlock\n");
+});
+
+sherlock!(glob_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.remove("sherlock");
+    wd.create("file.py", "Sherlock");
+    wd.create("file.rs", "Sherlock");
+    cmd.arg("-g").arg("!*.rs");
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, "file.py:Sherlock\n");
+});
+
+sherlock!(after_context, |wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-A").arg("1");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+Holmeses, success in the province of detective work must always
+be, to a very large extent, the result of luck. Sherlock Holmes
+can extract a clew from a wisp of straw or a flake of cigar ash;
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(after_context_line_numbers, |wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-A").arg("1").arg("-n");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+1:For the Doctor Watsons of this world, as opposed to the Sherlock
+2-Holmeses, success in the province of detective work must always
+3:be, to a very large extent, the result of luck. Sherlock Holmes
+4-can extract a clew from a wisp of straw or a flake of cigar ash;
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(before_context, |wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-B").arg("1");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+Holmeses, success in the province of detective work must always
+be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(before_context_line_numbers, |wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-B").arg("1").arg("-n");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+1:For the Doctor Watsons of this world, as opposed to the Sherlock
+2-Holmeses, success in the province of detective work must always
+3:be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(context, "world|attached", |wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-C").arg("1");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+For the Doctor Watsons of this world, as opposed to the Sherlock
+Holmeses, success in the province of detective work must always
+--
+but Doctor Watson has to have it taken out for him and dusted,
+and exhibited clearly, with a label attached.
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(context_line_numbers, "world|attached",
+|wd: WorkDir, mut cmd: Command| {
+    cmd.arg("-C").arg("1").arg("-n");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+1:For the Doctor Watsons of this world, as opposed to the Sherlock
+2-Holmeses, success in the province of detective work must always
+--
+5-but Doctor Watson has to have it taken out for him and dusted,
+6:and exhibited clearly, with a label attached.
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.remove("sherlock");
+    wd.create(".sherlock", hay::SHERLOCK);
+    wd.assert_err(&mut cmd);
+});
+
+sherlock!(no_ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.remove("sherlock");
+    wd.create(".sherlock", hay::SHERLOCK);
+
+    cmd.arg("--hidden");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
+.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(ignore_git, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.create(".gitignore", "sherlock\n");
+    wd.assert_err(&mut cmd);
+});
+
+sherlock!(ignore_ripgrep, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.create(".rgignore", "sherlock\n");
+    wd.assert_err(&mut cmd);
+});
+
+sherlock!(no_ignore, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.create(".gitignore", "sherlock\n");
+    cmd.arg("--no-ignore");
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
+sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(ignore_git_parent, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.remove("sherlock");
+    wd.create(".gitignore", "sherlock\n");
+    wd.create_dir(".git");
+    wd.create_dir("foo");
+    wd.create("foo/sherlock", hay::SHERLOCK);
+    // Even though we search in foo/, which has no .gitignore, ripgrep will
+    // search parent directories and respect the gitignore files found.
+    cmd.current_dir(wd.path().join("foo"));
+    wd.assert_err(&mut cmd);
+});
+
+sherlock!(ignore_git_parent_stop, "Sherlock", ".",
+|wd: WorkDir, mut cmd: Command| {
+    // This tests that searching parent directories for .gitignore files stops
+    // after it sees a .git directory. To test this, we create this directory
+    // hierarchy:
+    //
+    // .gitignore (contains `sherlock`)
+    // foo/
+    //   .git
+    //   bar/
+    //      sherlock
+    //
+    // And we perform the search inside `foo/bar/`. ripgrep will stop looking
+    // for .gitignore files after it sees `foo/.git/`, and therefore not
+    // respect the top-level `.gitignore` containing `sherlock`.
+    wd.remove("sherlock");
+    wd.create(".gitignore", "sherlock\n");
+    wd.create_dir("foo");
+    wd.create_dir("foo/.git");
+    wd.create_dir("foo/bar");
+    wd.create("foo/bar/sherlock", hay::SHERLOCK);
+    cmd.current_dir(wd.path().join("foo").join("bar"));
+
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
+sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(ignore_ripgrep_parent_no_stop, "Sherlock", ".",
+|wd: WorkDir, mut cmd: Command| {
+    // This is like the `ignore_git_parent_stop` test, except it checks that
+    // ripgrep *doesn't* stop checking for .rgignore files.
+    wd.remove("sherlock");
+    wd.create(".rgignore", "sherlock\n");
+    wd.create_dir("foo");
+    wd.create_dir("foo/.git");
+    wd.create_dir("foo/bar");
+    wd.create("foo/bar/sherlock", hay::SHERLOCK);
+    cmd.current_dir(wd.path().join("foo").join("bar"));
+    // The top-level .rgignore applies.
+    wd.assert_err(&mut cmd);
+});
+
+sherlock!(no_parent_ignore_git, "Sherlock", ".",
+|wd: WorkDir, mut cmd: Command| {
+    // Set up a directory hierarchy like this:
+    //
+    // .gitignore
+    // foo/
+    //   .gitignore
+    //   sherlock
+    //   watson
+    //
+    // Where `.gitignore` contains `sherlock` and `foo/.gitignore` contains
+    // `watson`.
+    //
+    // Now *do the search* from the foo directory. By default, ripgrep will
+    // search parent directories for .gitignore files. The --no-ignore-parent
+    // flag should prevent that. At the same time, the `foo/.gitignore` file
+    // will still be respected (since the search is happening in `foo/`).
+    //
+    // In other words, we should only see results from `sherlock`, not from
+    // `watson`.
+    wd.remove("sherlock");
+    wd.create(".gitignore", "sherlock\n");
+    wd.create_dir("foo");
+    wd.create("foo/.gitignore", "watson\n");
+    wd.create("foo/sherlock", hay::SHERLOCK);
+    wd.create("foo/watson", hay::SHERLOCK);
+    cmd.current_dir(wd.path().join("foo"));
+    cmd.arg("--no-ignore-parent");
+
+    let lines: String = wd.stdout(&mut cmd);
+    let expected = "\
+sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
+sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
+";
+    assert_eq!(lines, expected);
+});
+
+sherlock!(symlink_nofollow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.remove("sherlock");
+    wd.create_dir("foo");
+    wd.create_dir("foo/bar");
+    wd.link("foo/baz", "foo/bar/baz");
+    wd.create_dir("foo/baz");
+    wd.create("foo/baz/sherlock", hay::SHERLOCK);
+    cmd.current_dir(wd.path().join("foo/bar"));
+    wd.assert_err(&mut cmd);
+});
+
+sherlock!(symlink_follow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
+    wd.remove("sherlock");
+    wd.create_dir("foo");
+    wd.create_dir("foo/bar");
+    wd.create_dir("foo/baz");
+    wd.create("foo/baz/sherlock", hay::SHERLOCK);
+    wd.link("foo/baz", "foo/bar/baz");
+    cmd.arg("-L");
+    cmd.current_dir(wd.path().join("foo/bar"));
+
+    let lines: String = wd.stdout(&mut cmd);
+    if cfg!(windows) {
+        let expected = "\
+baz\\sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
+baz\\sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
+";
+        assert_eq!(lines, expected);
+    } else {
+        let expected = "\
+baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
+baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
+";
+        assert_eq!(lines, expected);
+    }
+});
+
+#[test]
+fn binary_nosearch() {
+    let wd = WorkDir::new("binary_nosearch");
+    wd.create("file", "foo\x00bar\nfoo\x00baz\n");
+    let mut cmd = wd.command();
+    cmd.arg("foo").arg("file");
+    wd.assert_err(&mut cmd);
+}
+
+// The following two tests show a discrepancy in search results between
+// searching with memory mapped files and stream searching. Stream searching
+// uses a heuristic (that GNU grep also uses) where NUL bytes are replaced with
+// the EOL terminator, which tends to avoid allocating large amounts of memory
+// for really long "lines." The memory map searcher has no need to worry about
+// such things, and more than that, it would be pretty hard for it to match
+// the semantics of streaming search in this case.
+//
+// Binary files with lots of NULs aren't really part of the use case of ripgrep
+// (or any other grep-like tool for that matter), so we shouldn't feel too bad
+// about it.
+#[test]
+fn binary_search_mmap() {
+    let wd = WorkDir::new("binary_search_mmap");
+    wd.create("file", "foo\x00bar\nfoo\x00baz\n");
+    let mut cmd = wd.command();
+    cmd.arg("-a").arg("--mmap").arg("foo").arg("file");
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, "foo\x00bar\nfoo\x00baz\n");
+}
+
+#[test]
+fn binary_search_no_mmap() {
+    let wd = WorkDir::new("binary_search_no_mmap");
+    wd.create("file", "foo\x00bar\nfoo\x00baz\n");
+    let mut cmd = wd.command();
+    cmd.arg("-a").arg("--no-mmap").arg("foo").arg("file");
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq!(lines, "foo\nfoo\n");
+}
+
+#[test]
+fn files() {
+    let wd = WorkDir::new("files");
+    wd.create("file", "");
+    wd.create_dir("dir");
+    wd.create("dir/file", "");
+
+    let mut cmd = wd.command();
+    cmd.arg("--files");
+    let lines: String = wd.stdout(&mut cmd);
+    if cfg!(windows) {
+        assert!(lines == "./dir\\file\n./file\n"
+                || lines == "./file\n./dir\\file\n");
+    } else {
+        assert!(lines == "./file\n./dir/file\n"
+                || lines == "./dir/file\n./file\n");
+    }
+}
+
+#[test]
+fn type_list() {
+    let wd = WorkDir::new("type_list");
+
+    let mut cmd = wd.command();
+    cmd.arg("--type-list");
+    let lines: String = wd.stdout(&mut cmd);
+    // This can change over time, so just make sure we print something.
+    assert!(!lines.is_empty());
+}
--- a/tests/workdir.rs
+++ b/tests/workdir.rs
@@ -0,0 +1,189 @@
+use std::env;
+use std::error;
+use std::fmt;
+use std::fs::{self, File};
+use std::io::{self, Write};
+use std::path::{Path, PathBuf};
+use std::process;
+use std::str::FromStr;
+use std::sync::atomic::{ATOMIC_USIZE_INIT, AtomicUsize, Ordering};
+use std::thread;
+use std::time::Duration;
+
+static TEST_DIR: &'static str = "ripgrep-tests";
+static NEXT_ID: AtomicUsize = ATOMIC_USIZE_INIT;
+
+/// WorkDir represents a directory in which tests are run.
+///
+/// Directories are created from a global atomic counter to avoid duplicates.
+#[derive(Debug)]
+pub struct WorkDir {
+    /// The directory in which this test executable is running.
+    root: PathBuf,
+    /// The directory in which the test should run. If a test needs to create
+    /// files, they should go in here.
+    dir: PathBuf,
+}
+
+impl WorkDir {
+    /// Create a new test working directory with the given name. The name
+    /// does not need to be distinct for each invocation, but should correspond
+    /// to a logical grouping of tests.
+    pub fn new(name: &str) -> WorkDir {
+        let id = NEXT_ID.fetch_add(1, Ordering::SeqCst);
+        let root = env::current_exe().unwrap()
+            .parent().expect("executable's directory").to_path_buf();
+        let dir = root.join(TEST_DIR).join(name).join(&format!("{}", id));
+        nice_err(&dir, repeat(|| fs::create_dir_all(&dir)));
+        WorkDir {
+            root: root,
+            dir: dir,
+        }
+    }
+
+    /// Create a new file with the given name and contents in this directory.
+    pub fn create<P: AsRef<Path>>(&self, name: P, contents: &str) {
+        let path = self.dir.join(name);
+        let mut file = nice_err(&path, File::create(&path));
+        nice_err(&path, file.write_all(contents.as_bytes()));
+        nice_err(&path, file.flush());
+    }
+
+    /// Remove a file with the given name from this directory.
+    pub fn remove<P: AsRef<Path>>(&self, name: P) {
+        let path = self.dir.join(name);
+        nice_err(&path, fs::remove_file(&path));
+    }
+
+    /// Create a new directory with the given path (and any directories above
+    /// it) inside this directory.
+    pub fn create_dir<P: AsRef<Path>>(&self, path: P) {
+        let path = self.dir.join(path);
+        nice_err(&path, repeat(|| fs::create_dir_all(&path)));
+    }
+
+    /// Creates a new command that is set to use the ripgrep executable in
+    /// this working directory.
+    pub fn command(&self) -> process::Command {
+        let mut cmd = process::Command::new(&self.bin());
+        cmd.current_dir(&self.dir);
+        cmd
+    }
+
+    /// Returns the path to the ripgrep executable.
+    pub fn bin(&self) -> PathBuf {
+        self.root.join("rg")
+    }
+
+    /// Returns the path to this directory.
+    pub fn path(&self) -> &Path {
+        &self.dir
+    }
+
+    /// Creates a directory symlink to the src with the given target name
+    /// in this directory.
+    #[cfg(not(windows))]
+    pub fn link<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
+        use std::os::unix::fs::symlink;
+        let src = self.dir.join(src);
+        let target = self.dir.join(target);
+        let _ = fs::remove_file(&target);
+        nice_err(&target, symlink(&src, &target));
+    }
+
+    #[cfg(windows)]
+    pub fn link<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
+        use std::os::windows::fs::symlink_dir;
+        let src = self.dir.join(src);
+        let target = self.dir.join(target);
+        let _ = fs::remove_dir(&target);
+        nice_err(&target, symlink_dir(&src, &target));
+    }
+
+    /// Runs and captures the stdout of the given command.
+    ///
+    /// If the return type could not be created from a string, then this
+    /// panics.
+    pub fn stdout<E: fmt::Debug, T: FromStr<Err=E>>(
+        &self,
+        cmd: &mut process::Command,
+    ) -> T {
+        let o = self.output(cmd);
+        let stdout = String::from_utf8_lossy(&o.stdout);
+        match stdout.parse() {
+            Ok(t) => t,
+            Err(err) => {
+                panic!("could not convert from string: {:?}\n\n{}", err, stdout);
+            }
+        }
+    }
+
+    /// Gets the output of a command. If the command failed, then this panics.
+    pub fn output(&self, cmd: &mut process::Command) -> process::Output {
+        let o = cmd.output().unwrap();
+        if !o.status.success() {
+            let suggest =
+                if o.stderr.is_empty() {
+                    "\n\nDid your search end up with no results?".to_string()
+                } else {
+                    "".to_string()
+                };
+
+            panic!("\n\n==========\n\
+                    command failed but expected success!\
+                    {}\
+                    \n\ncommand: {:?}\
+                    \ncwd: {}\
+                    \n\nstatus: {}\
+                    \n\nstdout: {}\
+                    \n\nstderr: {}\
+                    \n\n==========\n",
+                   suggest, cmd, self.dir.display(), o.status,
+                   String::from_utf8_lossy(&o.stdout),
+                   String::from_utf8_lossy(&o.stderr));
+        }
+        o
+    }
+
+    /// Runs the given command and asserts that it resulted in an error exit
+    /// code.
+    pub fn assert_err(&self, cmd: &mut process::Command) {
+        let o = cmd.output().unwrap();
+        if o.status.success() {
+            panic!("\n\n===== {:?} =====\n\
+                    command succeeded but expected failure!\
+                    \n\ncwd: {}\
+                    \n\nstatus: {}\
+                    \n\nstdout: {}\n\nstderr: {}\
+                    \n\n=====\n",
+                   cmd, self.dir.display(), o.status,
+                   String::from_utf8_lossy(&o.stdout),
+                   String::from_utf8_lossy(&o.stderr));
+        }
+    }
+}
+
+fn nice_err<P: AsRef<Path>, T, E: error::Error>(
+    path: P,
+    res: Result<T, E>,
+) -> T {
+    match res {
+        Ok(t) => t,
+        Err(err) => {
+            panic!("{}: {:?}", path.as_ref().display(), err);
+        }
+    }
+}
+
+fn repeat<F: FnMut() -> io::Result<()>>(mut f: F) -> io::Result<()> {
+    let mut last_err = None;
+    for _ in 0..10 {
+        if let Err(err) = f() {
+            last_err = Some(err);
+            thread::sleep(Duration::from_millis(500));
+        } else {
+            return Ok(());
+        }
+    }
+    Err(last_err.unwrap())
+}
Author	SHA1	Message	Date
Andrew Gallant	9bf7696ec8	Initial cut at a benchmark suite for CLI search tools.	2016-09-11 01:05:36 -04:00
Andrew Gallant	cb0f8fd2fa	Bump default thread count to 8.	2016-09-11 00:42:39 -04:00
Andrew Gallant	fa8112ec34	Add alternative compile strategy (in a comment).	2016-09-11 00:42:30 -04:00
Andrew Gallant	cf21b4a97e	Add doc.	2016-09-11 00:42:19 -04:00
Andrew Gallant	19615245cd	Make line counting much faster.	2016-09-10 01:35:44 -04:00
Andrew Gallant	98a48b44bc	Fix off-by-one bug in searcher.	2016-09-10 01:35:30 -04:00
Andrew Gallant	e3da726836	Rename search module to search_stream. The name better reflects the difference between it and the search_buffer module.	2016-09-10 00:08:42 -04:00
Andrew Gallant	5b36c86c15	Rejigger the atty detection stuff.	2016-09-10 00:05:20 -04:00
Andrew Gallant	76331e5fec	Fix test that relied on non-deterministic order of results.	2016-09-09 23:24:01 -04:00
Andrew Gallant	1e678d7052	Fix `files` test. What a pain.	2016-09-09 23:19:46 -04:00
Andrew Gallant	dd986d7fe9	Add standard Linux CI (GNU libc).	2016-09-09 23:19:37 -04:00
Andrew Gallant	f83cd63b11	Add integration tests.	2016-09-09 22:58:30 -04:00
Andrew Gallant	9a4527d107	fix Rust version number in CI	2016-09-09 18:47:05 -04:00
Andrew Gallant	8f0d3d78ca	clean up CI script	2016-09-09 18:10:20 -04:00
Andrew Gallant	3f7cd977bc	expand Rust versions we test on.	2016-09-09 18:07:30 -04:00
Andrew Gallant	cc6b6dcf5b	fix windows build	2016-09-09 08:53:10 -04:00
Andrew Gallant	48878bbb8f	update project name	2016-09-08 21:47:49 -04:00
Andrew Gallant	0766617e07	Refactor how coloring is done. All in the name of appeasing Windows.	2016-09-08 21:46:14 -04:00
Andrew Gallant	afd99c43d7	fix deploy	2016-09-08 16:35:48 -04:00
Andrew Gallant	96e87ab738	update distributable to include readme and license	2016-09-08 16:21:37 -04:00
Andrew Gallant	a744ec133d	Rename xrep to ripgrep.	2016-09-08 16:15:44 -04:00
Andrew Gallant	0042dce949	Hack in Windows console coloring. The code has suffered and needs refactoring/commenting. BUT... IT WORKS!	2016-09-07 21:54:28 -04:00
Andrew Gallant	ca058d7584	Add support for memory maps. I though plain `read` had usurped them, but when searching a very small number of files, mmaps can be around 20% faster on Linux. It'd be really unfortunate to leave that on the table. Mmap searching doesn't support contexts yet, but we probably don't really care. And duplicating that logic doesn't sound fun. Without contexts, mmap searching is delightfully simple.	2016-09-06 21:47:33 -04:00
Andrew Gallant	af3b56a623	Fix grep match iterator.	2016-09-06 21:45:41 -04:00
Andrew Gallant	5938bed339	Add support for printing column numbers.	2016-09-06 19:50:27 -04:00
Andrew Gallant	feff1849c8	Tweak colors.	2016-09-06 19:35:52 -04:00
Andrew Gallant	9948e0ca07	Only create the Grep searcher once.	2016-09-06 19:33:19 -04:00
Andrew Gallant	fd3e5069b6	Fix required literal handling and add debug prints. In particular, if we had an inner literal and were doing a case insensitive search, then the literals are dropped because we previously only allowed a single inner literal to have an effect. Now we allow alternations of inner literals, but still don't quite take full advantage.	2016-09-06 19:33:03 -04:00
Andrew Gallant	0891b4a3c0	update appveyor	2016-09-05 22:01:53 -04:00
Andrew Gallant	af48aaa647	another try	2016-09-05 21:57:57 -04:00
Andrew Gallant	ee7f300ae2	windows debug, take 1	2016-09-05 21:46:11 -04:00