0.1.2

bump grep
grep 0.1.1
2025-08-02 05:02:01 -07:00 · 2016-09-17 11:36:11 -04:00 · 2016-09-17 11:34:27 -04:00 · 2016-09-17 11:32:47 -04:00 · 2016-09-17 11:30:01 -04:00 · 2016-09-16 21:02:46 -04:00
21 changed files with 1021 additions and 340 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1,23 +1,24 @@
 [root]
 name = "ripgrep"
-version = "0.1.1"
+version = "0.1.2"
 dependencies = [
- "crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
+ "deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "docopt 0.6.83 (registry+https://github.com/rust-lang/crates.io-index)",
 "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
+ "fnv 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
 "glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
- "grep 0.1.0",
+ "grep 0.1.1",
 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
 "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
 "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
 "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
 "memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "num_cpus 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
 "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
 "term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "walkdir 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
 "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

@@ -30,9 +31,12 @@ dependencies = [
 ]

 [[package]]
-name = "crossbeam"
-version = "0.2.10"
+name = "deque"
+version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
+]

 [[package]]
 name = "docopt"
@@ -54,6 +58,11 @@ dependencies = [
 "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

+[[package]]
+name = "fnv"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [[package]]
 name = "fs2"
 version = "0.2.5"
@@ -71,7 +80,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
 name = "grep"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
 "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
 "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -125,7 +134,15 @@ dependencies = [

 [[package]]
 name = "num_cpus"
-version = "1.0.0"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+dependencies = [
+ "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
+]
+
+[[package]]
+name = "rand"
+version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -140,7 +157,7 @@ dependencies = [
 "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
 "regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
 "simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
 "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

@@ -184,7 +201,7 @@ dependencies = [

 [[package]]
 name = "thread_local"
-version = "0.2.6"
+version = "0.2.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -197,7 +214,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
 name = "walkdir"
-version = "0.1.6"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -216,9 +233,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"

 [metadata]
 "checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
-"checksum crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0c5ea215664ca264da8a9d9c3be80d2eaf30923c259d03e870388eb927508f97"
+"checksum deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1614659040e711785ed8ea24219140654da1729f3ec8a47a9719d041112fe7bf"
 "checksum docopt 0.6.83 (registry+https://github.com/rust-lang/crates.io-index)" = "fc42c6077823a361410c37d47c2535b73a190cbe10838dc4f400fe87c10c8c3b"
 "checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
+"checksum fnv 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8e8af7b5408ab0c4910cad114c8f9eb454bf75df7afe8964307eeafb68a13a5e"
 "checksum fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "bcd414e5a1a979b931bb92f41b7a54106d3f6d2e6c253e9ce943b7cd468251ef"
 "checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
 "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
@@ -227,7 +245,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
 "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
 "checksum memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f20f72ed93291a72e22e8b16bb18762183bb4943f0f483da5b8be1a9e8192752"
-"checksum num_cpus 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a859041cbf7a70ea1ece4b87d1a2c6ef364dcb68749c88db1f97304b9ec09d5f"
+"checksum num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8890e6084723d57d0df8d2720b0d60c6ee67d6c93e7169630e4371e88765dcad"
+"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"
 "checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665"
 "checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd"
 "checksum rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "6159e4e6e559c81bd706afe9c8fd68f547d3e851ce12e76b1de7914bab61691b"
@@ -235,8 +254,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "50c069df92e4b01425a8bf3576d5d417943a6a7272fbabaf5bd80b1aaa76442e"
 "checksum term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3deff8a2b3b6607d6d7cc32ac25c0b33709453ca9cceac006caac51e963cf94a"
 "checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
-"checksum thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "55dd963dbaeadc08aa7266bf7f91c3154a7805e32bb94b820b769d2ef3b4744d"
+"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
 "checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
-"checksum walkdir 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "d42144c31c9909882ce76e696b306b88a5b091721251137d5d522d1ef3da7cf9"
+"checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780"
 "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
 "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "ripgrep"
-version = "0.1.1"  #:version
+version = "0.1.2"  #:version
 authors = ["Andrew Gallant <jamslam@gmail.com>"]
 description = """
 Line oriented search tool using Rust's regex library. Combines the raw
@@ -23,10 +23,11 @@ name = "integration"
 path = "tests/tests.rs"

 [dependencies]
-crossbeam = "0.2"
+deque = "0.3"
 docopt = "0.6"
 env_logger = "0.3"
-grep = { version = "0.1", path = "grep" }
+fnv = "1.0"
+grep = { version = "0.1.1", path = "grep" }
 lazy_static = "0.2"
 libc = "0.2"
 log = "0.3"
--- a/14
+++ b/14
@@ -1,14 +0,0 @@
-all:
-	echo Nothing to do...
-
-ctags:
-	ctags --options=ctags.rust --languages=Rust src/*.rs src/*/*.rs
-
-docs:
-	cargo doc
-	in-dir ./target/doc fix-perms
-	rscp ./target/doc/* gopher:~/www/burntsushi.net/rustdoc/
-
-push:
-	git push origin master
-	git push github master
--- a/README.md
+++ b/README.md
@@ -1,3 +1,6 @@
+**UNDER DEVELOPMENT.**
+
 ripgrep (rg)
 ------------
-ripgrep combines the usability of the silver searcher with the raw speed of grep.
+ripgrep combines the usability of the silver searcher with the raw speed of
+grep.
--- a/benches/README.md
+++ b/benches/README.md
@@ -0,0 +1,5 @@
+These are internal microbenchmarks for tracking the peformance of individual
+components inside of ripgrep. At the moment, they aren't heavily used.
+
+For performance benchmarks of ripgrep proper, see the sibling `benchsuite`
+directory.
--- a/benchsuite/benchsuite
+++ b/benchsuite/benchsuite
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3

 '''
 benchsuite is a benchmark runner for comparing command line search tools.
@@ -10,6 +10,7 @@ import os
 import os.path as path
 from multiprocessing import cpu_count
 import re
+import shutil
 import statistics
 import subprocess
 import sys
@@ -39,13 +40,23 @@ LINUX_CLONE = 'git://github.com/BurntSushi/linux'
 GREP_ASCII = {'LC_ALL': 'C'}
 GREP_UNICODE = {'LC_ALL': 'en_US.UTF-8'}

+# Sift tries really hard to search everything by default. In our code search
+# benchmarks, we don't want that.
+SIFT = [
+    'sift',
+    '--binary-skip',
+    '--exclude-files', '.*',
+    '--exclude-files', '*.pdf',
+]
+

 def bench_linux_literal_default(suite_dir):
    '''
    Benchmark the speed of a literal using *default* settings.

    This is a purposefully unfair benchmark for use in performance
-    analysis, but it is pedagogically useful.
+    analysis, but it is pedagogically useful to demonstrate how
+    default behaviors differ.
    '''
    require(suite_dir, 'linux')
    cwd = path.join(suite_dir, LINUX_DIR)
@@ -55,8 +66,6 @@ def bench_linux_literal_default(suite_dir):
        kwargs['cwd'] = cwd
        return Command(*args, **kwargs)

-    # N.B. This is a purposefully unfair benchmark for illustrative purposes
-    # of how the default modes for each search tool differ.
    return Benchmark(pattern=pat, commands=[
        mkcmd('rg', ['rg', pat]),
        mkcmd('ag', ['ag', pat]),
@@ -64,10 +73,12 @@ def bench_linux_literal_default(suite_dir):
        # doesn't read gitignore files. Instead, it has a file whitelist
        # that happens to match up exactly with the gitignores for this search.
        mkcmd('ucg', ['ucg', pat]),
-        mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'C'}),
+        # I guess setting LC_ALL=en_US.UTF-8 probably isn't necessarily the
+        # default, but I'd guess it to be on most desktop systems.
        mkcmd('pt', ['pt', pat]),
        # sift reports an extra line here for a binary file matched.
        mkcmd('sift', ['sift', pat]),
+        mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'en_US.UTF-8'}),
    ])


@@ -76,8 +87,9 @@ def bench_linux_literal(suite_dir):
    Benchmark the speed of a literal, attempting to be fair.

    This tries to use the minimum set of options available in all tools
-    to test how fast they are. For example, it makes sure there is no
-    case insensitive matching and that line numbers are computed.
+    to test how fast they are. For example, it makes sure there is
+    no case insensitive matching and that line numbers are computed
+    (because some tools don't permit disabling line numbers).
    '''
    require(suite_dir, 'linux')
    cwd = path.join(suite_dir, LINUX_DIR)
@@ -88,19 +100,16 @@ def bench_linux_literal(suite_dir):
        return Command(*args, **kwargs)

    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', pat]),
-        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
-        mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
-        mkcmd('ag', ['ag', '-s', pat]),
-        mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
-        mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
-        mkcmd('git grep', [
+        mkcmd('rg (ignore)', ['rg', '-n', pat]),
+        mkcmd('rg (ignore) (mmap)', ['rg', '-n', '--mmap', pat]),
+        mkcmd('ag (ignore) (mmap)', ['ag', '-s', pat]),
+        mkcmd('pt (ignore)', ['pt', pat]),
+        mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
+        mkcmd('git grep (ignore)', [
            'git', 'grep', '-I', '-n', pat,
        ], env={'LC_ALL': 'C'}),
-        mkcmd('pt', ['pt', pat]),
-        mkcmd('sift', [
-            'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
-        ]),
+        mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
+        mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
    ])


@@ -120,23 +129,21 @@ def bench_linux_literal_casei(suite_dir):
        return Command(*args, **kwargs)

    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', '-i', pat]),
-        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
-        mkcmd('rg-novcs-mmap', [
-            'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
-        ]),
-        mkcmd('ag', ['ag', '-i', pat]),
-        mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-i', pat]),
-        mkcmd('ucg', ['ucg', '-i', pat]),
-        mkcmd('git grep', [
+        mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]),
+        mkcmd('rg (ignore) (mmap)', ['rg', '-n', '-i', '--mmap', pat]),
+        mkcmd('ag (ignore) (mmap)', ['ag', '-i', pat]),
+        mkcmd('sift (ignore)', SIFT + ['-n', '-i', '--git', pat]),
+        # It'd technically be more appropriate to set LC_ALL=en_US.UTF-8 here,
+        # since that is certainly what ripgrep is doing, but this is for an
+        # ASCII literal, so we should give `git grep` all the opportunity to
+        # do its best.
+        mkcmd('git grep (ignore)', [
            'git', 'grep', '-I', '-n', '-i', pat,
        ], env={'LC_ALL': 'C'}),
-        # sift yields more matches than it should here. Specifically, it gets
-        # matches in Module.symvers and System.map in the repo root. Both of
-        # those files show up in the repo root's .gitignore file.
-        mkcmd('sift', [
-            'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-i', pat,
+        mkcmd('rg (whitelist)', [
+            'rg', '-n', '-i', '--no-ignore', '-tall', pat,
        ]),
+        mkcmd('ucg (whitelist)', ['ucg', '-i', pat]),
    ])


@@ -156,20 +163,16 @@ def bench_linux_re_literal_suffix(suite_dir):
        return Command(*args, **kwargs)

    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', pat]),
-        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
-        mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
-        mkcmd('ag', ['ag', '-s', pat]),
-        mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
-        mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
+        mkcmd('rg (ignore)', ['rg', '-n', pat]),
+        mkcmd('ag (ignore)', ['ag', '-s', pat]),
+        mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
        mkcmd(
-            'git grep',
+            'git grep (ignore)',
            ['git', 'grep', '-E', '-I', '-n', pat],
            env={'LC_ALL': 'C'},
        ),
-        mkcmd('sift', [
-            'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
-        ]),
+        mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
+        mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
    ])


@@ -189,22 +192,18 @@ def bench_linux_word(suite_dir):
        return Command(*args, **kwargs)

    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', '-w', pat]),
-        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-w', pat]),
-        mkcmd('rg-novcs-mmap', [
-            'rg', '--mmap', '--no-ignore', '-n', '-w', pat,
-        ]),
-        mkcmd('ag', ['ag', '-s', '-w', pat]),
-        mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', '-w', pat]),
-        mkcmd('ucg', ['ucg', '--nosmart-case', '-w', pat]),
+        mkcmd('rg (ignore)', ['rg', '-n', '-w', pat]),
+        mkcmd('ag (ignore)', ['ag', '-s', '-w', pat]),
+        mkcmd('sift (ignore)', SIFT + ['-n', '-w', '--git', pat]),
        mkcmd(
-            'git grep',
+            'git grep (ignore)',
            ['git', 'grep', '-E', '-I', '-n', '-w', pat],
            env={'LC_ALL': 'C'},
        ),
-        mkcmd('sift', [
-            'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-w', pat,
+        mkcmd('rg (whitelist)', [
+            'rg', '-n', '-w', '--no-ignore', '-tall', pat,
        ]),
+        mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', '-w', pat]),
    ])


@@ -212,7 +211,8 @@ def bench_linux_unicode_greek(suite_dir):
    '''
    Benchmark matching of a Unicode category.

-    Only three tools (ripgrep, sift and pt) support this.
+    Only three tools (ripgrep, sift and pt) support this. We omit
+    pt because it is too slow.
    '''
    require(suite_dir, 'linux')
    cwd = path.join(suite_dir, LINUX_DIR)
@@ -224,15 +224,7 @@ def bench_linux_unicode_greek(suite_dir):

    return Benchmark(pattern=pat, commands=[
        mkcmd('rg', ['rg', '-n', pat]),
-        # sift tries to search a bunch of PDF files and clutters up the
-        # results, even though --binary-skip is provided. They are excluded
-        # here explicitly, but don't have a measurable impact on performance.
-        mkcmd('sift', [
-            'sift', '-n', '--binary-skip',
-            '--exclude-files', '.*',
-            '--exclude-files', '*.pdf',
-            pat,
-        ]),
+        mkcmd('sift', SIFT + ['-n', '--git', pat]),
    ])


@@ -252,15 +244,7 @@ def bench_linux_unicode_greek_casei(suite_dir):

    return Benchmark(pattern=pat, commands=[
        mkcmd('rg', ['rg', '-n', '-i', pat]),
-        # sift tries to search a bunch of PDF files and clutters up the
-        # results, even though --binary-skip is provided. They are excluded
-        # here explicitly, but don't have a measurable impact on performance.
-        mkcmd('sift', [
-            'sift', '-n', '--binary-skip',
-            '--exclude-files', '.*',
-            '--exclude-files', '*.pdf',
-            pat,
-        ]),
+        mkcmd('sift', SIFT + ['-n', '-i', '--git', pat]),
    ])


@@ -281,30 +265,25 @@ def bench_linux_unicode_word(suite_dir):
        return Command(*args, **kwargs)

    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', pat]),
-        mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
-        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
-        mkcmd('rg-novcs-mmap', [
-            'rg', '--mmap', '--no-ignore', '-n', pat,
-        ]),
-        mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
-        mkcmd('ag-novcs (no Unicode)', [
-            'ag', '--skip-vcs-ignores', '-s', pat,
-        ]),
-        mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
+        mkcmd('rg (ignore)', ['rg', '-n', pat]),
+        mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
+        mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
+        mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', pat]),
        mkcmd(
-            'git grep',
+            'git grep (ignore)',
            ['git', 'grep', '-E', '-I', '-n', pat],
            env={'LC_ALL': 'en_US.UTF-8'},
        ),
        mkcmd(
-            'git grep (no Unicode)',
+            'git grep (ignore) (ASCII)',
            ['git', 'grep', '-E', '-I', '-n', pat],
            env={'LC_ALL': 'C'},
        ),
-        mkcmd('sift (no Unicode)', [
-            'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
+        mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
+        mkcmd('rg (whitelist) (ASCII)', [
+            'rg', '-n', '--no-ignore', '-tall', '(?-u)' + pat,
        ]),
+        mkcmd('ucg (ASCII)', ['ucg', '--nosmart-case', pat]),
    ])


@@ -326,30 +305,25 @@ def bench_linux_no_literal(suite_dir):
        return Command(*args, **kwargs)

    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', pat]),
-        mkcmd('rg-whitelist', ['rg', '-tall', '--no-ignore', '-n', pat]),
-        mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
-        mkcmd('rg-whitelist (no Unicode)', [
-            'rg', '-tall', '--no-ignore', '-n', '(?-u)' + pat,
-        ]),
-        mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
-        mkcmd('ag-novcs (no Unicode)', [
-            'ag', '--skip-vcs-ignores', '-s', pat,
-        ]),
-        mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
+        mkcmd('rg (ignore)', ['rg', '-n', pat]),
+        mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
+        mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
+        mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', pat]),
        mkcmd(
-            'git grep',
+            'git grep (ignore)',
            ['git', 'grep', '-E', '-I', '-n', pat],
            env={'LC_ALL': 'en_US.UTF-8'},
        ),
        mkcmd(
-            'git grep (no Unicode)',
+            'git grep (ignore) (ASCII)',
            ['git', 'grep', '-E', '-I', '-n', pat],
            env={'LC_ALL': 'C'},
        ),
-        mkcmd('sift (no Unicode)', [
-            'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
+        mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
+        mkcmd('rg (whitelist) (ASCII)', [
+            'rg', '-n', '--no-ignore', '-tall', '(?-u)' + pat,
        ]),
+        mkcmd('ucg (whitelist) (ASCII)', ['ucg', '--nosmart-case', pat]),
    ])


@@ -371,21 +345,15 @@ def bench_linux_alternates(suite_dir):
        return Command(*args, **kwargs)

    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', pat]),
-        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
-        mkcmd('rg-novcs-mmap', [
-            'rg', '--mmap', '--no-ignore', '-n', pat,
-        ]),
-        mkcmd('ag', ['ag', '-s', pat]),
-        mkcmd('ag-novcs', [
-            'ag', '--skip-vcs-ignores', '-s', pat,
-        ]),
-        mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
+        mkcmd('rg (ignore)', ['rg', '-n', pat]),
+        mkcmd('ag (ignore)', ['ag', '-s', pat]),
        mkcmd(
-            'git grep',
+            'git grep (ignore)',
            ['git', 'grep', '-E', '-I', '-n', pat],
            env={'LC_ALL': 'C'},
        ),
+        mkcmd('rg (whitelist)', ['rg', '--no-ignore', '-n', pat]),
+        mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
    ])


@@ -400,21 +368,15 @@ def bench_linux_alternates_casei(suite_dir):
        return Command(*args, **kwargs)

    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', '-i', pat]),
-        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
-        mkcmd('rg-novcs-mmap', [
-            'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
-        ]),
-        mkcmd('ag', ['ag', '-i', pat]),
-        mkcmd('ag-novcs', [
-            'ag', '--skip-vcs-ignores', '-i', pat,
-        ]),
-        mkcmd('ucg', ['ucg', '-i', pat]),
+        mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]),
+        mkcmd('ag (ignore)', ['ag', '-i', pat]),
        mkcmd(
-            'git grep',
+            'git grep (ignore)',
            ['git', 'grep', '-E', '-I', '-n', '-i', pat],
            env={'LC_ALL': 'C'},
        ),
+        mkcmd('rg (whitelist)', ['rg', '--no-ignore', '-n', '-i', pat]),
+        mkcmd('ucg (whitelist)', ['ucg', '-i', pat]),
    ])


@@ -423,22 +385,159 @@ def bench_subtitles_en_literal(suite_dir):
    Benchmark the speed of an ASCII string literal.
    '''
    require(suite_dir, 'subtitles-en')
-    ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
+    en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
    pat = 'Sherlock Holmes'

    return Benchmark(pattern=pat, commands=[
-        Command('rg', ['rg', '-n', pat, ru]),
-        Command('rg (no line numbers)', ['rg', pat, ru]),
-        Command('ag', ['ag', '-s', pat, ru]),
-        Command('ucg', ['ucg', '--nosmart-case', pat, ru]),
-        Command('grep', ['grep', '-an', pat, ru], env=GREP_ASCII),
-        Command('grep (no line numbers)', [
-            'grep', '-a', pat, ru,
+        Command('rg', ['rg', pat, en]),
+        Command('pt', ['pt', '-N', pat, en]),
+        Command('sift', ['sift', pat, en]),
+        Command('grep', ['grep', '-a', pat, en], env=GREP_ASCII),
+        Command('rg (lines)', ['rg', '-n', pat, en]),
+        Command('ag (lines)', ['ag', '-s', pat, en]),
+        Command('ucg (lines)', ['ucg', '--nosmart-case', pat, en]),
+        Command('pt (lines)', ['pt', pat, en]),
+        Command('sift (lines)', ['sift', '-n', pat, en]),
+        Command('grep (lines)', ['grep', '-an', pat, en], env=GREP_ASCII),
+    ])
+
+
+def bench_subtitles_en_literal_casei(suite_dir):
+    '''
+    Benchmark the speed of a Unicode-y string case insensitively.
+    '''
+    require(suite_dir, 'subtitles-en')
+    en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
+    pat = 'Sherlock Holmes'
+
+    return Benchmark(pattern=pat, commands=[
+        Command('rg', ['rg', '-i', pat, en]),
+        Command('grep', ['grep', '-ai', pat, en], env=GREP_UNICODE),
+        Command('grep (ASCII)', [
+            'grep', '-E', '-ai', pat, en,
+        ], env=GREP_ASCII),
+        Command('rg (lines)', ['rg', '-n', '-i', pat, en]),
+        Command('ag (lines) (ASCII)', ['ag', '-i', pat, en]),
+        Command('ucg (lines) (ASCII)', ['ucg', '-i', pat, en]),
+    ])
+
+
+def bench_subtitles_en_literal_word(suite_dir):
+    '''
+    Benchmark the speed of finding a literal inside word boundaries.
+    '''
+    require(suite_dir, 'subtitles-en')
+    en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
+    pat = 'Sherlock Holmes'
+
+    return Benchmark(pattern=pat, commands=[
+        Command('rg (ASCII)', [
+            'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', en,
+        ]),
+        Command('ag (ASCII)', ['ag', '-sw', pat, en]),
+        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
+        Command('grep (ASCII)', [
+            'grep', '-anw', pat, en,
+        ], env=GREP_ASCII),
+        Command('rg', ['rg', '-nw', pat, en]),
+        Command('grep', ['grep', '-anw', pat, en], env=GREP_UNICODE),
+    ])
+
+
+def bench_subtitles_en_alternate(suite_dir):
+    '''
+    Benchmark the speed of a set of alternate literals.
+    '''
+    require(suite_dir, 'subtitles-en')
+    en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
+    pat = '|'.join([
+        'Sherlock Holmes',
+        'John Watson',
+        'Irene Adler',
+        'Inspector Lestrade',
+        'Professor Moriarty',
+    ])
+
+    return Benchmark(pattern=pat, commands=[
+        Command('rg (lines)', ['rg', '-n', pat, en]),
+        Command('ag (lines)', ['ag', '-s', pat, en]),
+        Command('ucg (lines)', ['ucg', '--nosmart-case', pat, en]),
+        Command('grep (lines)', [
+            'grep', '-E', '-an', pat, en,
+        ], env=GREP_ASCII),
+        Command('rg', ['rg', pat, en]),
+        Command('grep', [
+            'grep', '-E', '-a', pat, en,
+        ], env=GREP_ASCII),
+    ])
+
+
+def bench_subtitles_en_alternate_casei(suite_dir):
+    '''
+    Benchmark the speed of a set of alternate literals.
+    '''
+    require(suite_dir, 'subtitles-en')
+    en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
+    pat = '|'.join([
+        'Sherlock Holmes',
+        'John Watson',
+        'Irene Adler',
+        'Inspector Lestrade',
+        'Professor Moriarty',
+    ])
+
+    return Benchmark(pattern=pat, commands=[
+        Command('ag (ASCII)', ['ag', '-s', '-i', pat, en]),
+        Command('ucg (ASCII)', ['ucg', '-i', pat, en]),
+        Command('grep (ASCII)', [
+            'grep', '-E', '-ani', pat, en,
+        ], env=GREP_ASCII),
+        Command('rg', ['rg', '-n', '-i', pat, en]),
+        Command('grep', ['grep', '-E', '-ani', pat, en], env=GREP_UNICODE),
+    ])
+
+
+def bench_subtitles_en_surrounding_words(suite_dir):
+    '''
+    Benchmark a more complex regex with an inner literal.
+    '''
+    require(suite_dir, 'subtitles-en')
+    en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
+    pat = r'\w+\s+Holmes\s+\w+'
+
+    return Benchmark(pattern=pat, commands=[
+        Command('rg', ['rg', '-n', pat, en]),
+        Command('grep', ['grep', '-E', '-an', pat, en], env=GREP_UNICODE),
+        Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
+        Command('ag (ASCII)', ['ag', '-s', pat, en]),
+        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
+        Command('grep (ASCII)', [
+            'grep', '-E', '-an', pat, en,
+        ], env=GREP_ASCII),
+    ])
+
+
+def bench_subtitles_en_no_literal(suite_dir):
+    '''
+    Benchmark the speed of a regex with no literals.
+
+    Note that we don't even try to run grep with Unicode support
+    on this one. While it should eventually get the right answer,
+    I killed it after it had already been running for two minutes
+    and showed no signs of finishing soon.
+    '''
+    require(suite_dir, 'subtitles-en')
+    en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
+    pat = r'\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}'
+
+    return Benchmark(pattern=pat, commands=[
+        Command('rg', ['rg', '-n', pat, en]),
+        Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
+        Command('ag (ASCII)', ['ag', '-s', pat, en]),
+        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
+        Command('grep (ASCII)', [
+            'grep', '-E', '-an', pat, en,
        ], env=GREP_ASCII),
-        Command('pt', ['pt', pat, ru]),
-        Command('pt (no line numbers)', ['pt', '-N', pat, ru]),
-        Command('sift', ['sift', '-n', pat, ru]),
-        Command('sift (no line numbers)', ['sift', pat, ru]),
    ])


@@ -451,18 +550,16 @@ def bench_subtitles_ru_literal(suite_dir):
    pat = 'Шерлок Холмс'  # Sherlock Holmes

    return Benchmark(pattern=pat, commands=[
-        Command('rg', ['rg', '-n', pat, ru]),
-        Command('rg (no line numbers)', ['rg', pat, ru]),
-        Command('ag', ['ag', '-s', pat, ru]),
-        Command('ucg', ['ucg', '--nosmart-case', pat, ru]),
-        Command('grep', ['grep', '-an', pat, ru], env=GREP_ASCII),
-        Command('grep (no line numbers)', [
-            'grep', '-a', pat, ru,
-        ], env=GREP_ASCII),
-        Command('pt', ['pt', pat, ru]),
-        Command('pt (no line numbers)', ['pt', '-N', pat, ru]),
-        Command('sift', ['sift', '-n', pat, ru]),
-        Command('sift (no line numbers)', ['sift', pat, ru]),
+        Command('rg', ['rg', pat, ru]),
+        Command('pt', ['pt', '-N', pat, ru]),
+        Command('sift', ['sift', pat, ru]),
+        Command('grep', ['grep', '-a', pat, ru], env=GREP_ASCII),
+        Command('rg (lines)', ['rg', '-n', pat, ru]),
+        Command('ag (lines)', ['ag', '-s', pat, ru]),
+        Command('ucg (lines)', ['ucg', '--nosmart-case', pat, ru]),
+        Command('pt (lines)', ['pt', pat, ru]),
+        Command('sift (lines)', ['sift', '-n', pat, ru]),
+        Command('grep (lines)', ['grep', '-an', pat, ru], env=GREP_ASCII),
    ])


@@ -475,13 +572,14 @@ def bench_subtitles_ru_literal_casei(suite_dir):
    pat = 'Шерлок Холмс'  # Sherlock Holmes

    return Benchmark(pattern=pat, commands=[
-        Command('rg', ['rg', '-n', '-i', pat, ru]),
-        Command('ag (not Unicode)', ['ag', '-i', pat, ru]),
-        Command('ucg (not Unicode)', ['ucg', '-i', pat, ru]),
-        Command('grep', ['grep', '-ani', pat, ru], env=GREP_UNICODE),
-        Command('grep (not Unicode)', [
-            'grep', '-E', '-ani', pat, ru,
+        Command('rg', ['rg', '-i', pat, ru]),
+        Command('grep', ['grep', '-ai', pat, ru], env=GREP_UNICODE),
+        Command('grep (ASCII)', [
+            'grep', '-E', '-ai', pat, ru,
        ], env=GREP_ASCII),
+        Command('rg (lines)', ['rg', '-n', '-i', pat, ru]),
+        Command('ag (lines) (ASCII)', ['ag', '-i', pat, ru]),
+        Command('ucg (lines) (ASCII)', ['ucg', '-i', pat, ru]),
    ])


@@ -494,15 +592,15 @@ def bench_subtitles_ru_literal_word(suite_dir):
    pat = 'Шерлок Холмс'  # Sherlock Holmes

    return Benchmark(pattern=pat, commands=[
-        Command('rg', ['rg', '-nw', pat, ru]),
-        Command('rg (not Unicode)', [
+        Command('rg (ASCII)', [
            'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', ru,
        ]),
-        Command('ag (not Unicode)', ['ag', '-sw', pat, ru]),
-        Command('ucg (not Unicode)', ['ucg', '--nosmart-case', pat, ru]),
-        Command('grep (not Unicode)', [
+        Command('ag (ASCII)', ['ag', '-sw', pat, ru]),
+        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
+        Command('grep (ASCII)', [
            'grep', '-anw', pat, ru,
        ], env=GREP_ASCII),
+        Command('rg', ['rg', '-nw', pat, ru]),
        Command('grep', ['grep', '-anw', pat, ru], env=GREP_UNICODE),
    ])

@@ -522,11 +620,14 @@ def bench_subtitles_ru_alternate(suite_dir):
    ])

    return Benchmark(pattern=pat, commands=[
-        Command('rg', ['rg', '-n', pat, ru]),
-        Command('rg (no line numbers)', ['rg', pat, ru]),
-        Command('ucg', ['ucg', '--nosmart-case', pat, ru]),
-        Command('grep', ['grep', '-E', '-an', pat, ru], env=GREP_ASCII),
-        Command('grep (no line numbers)', [
+        Command('rg (lines)', ['rg', '-n', pat, ru]),
+        Command('ag (lines)', ['ag', '-s', pat, ru]),
+        Command('ucg (lines)', ['ucg', '--nosmart-case', pat, ru]),
+        Command('grep (lines)', [
+            'grep', '-E', '-an', pat, ru,
+        ], env=GREP_ASCII),
+        Command('rg', ['rg', pat, ru]),
+        Command('grep', [
            'grep', '-E', '-a', pat, ru,
        ], env=GREP_ASCII),
    ])
@@ -547,12 +648,32 @@ def bench_subtitles_ru_alternate_casei(suite_dir):
    ])

    return Benchmark(pattern=pat, commands=[
-        Command('rg', ['rg', '-n', '-i', pat, ru]),
-        Command('ucg (not Unicode)', ['ucg', '-i', pat, ru]),
-        Command('grep', ['grep', '-E', '-ani', pat, ru], env=GREP_UNICODE),
-        Command('grep (not Unicode)', [
+        Command('ag (ASCII)', ['ag', '-s', '-i', pat, ru]),
+        Command('ucg (ASCII)', ['ucg', '-i', pat, ru]),
+        Command('grep (ASCII)', [
            'grep', '-E', '-ani', pat, ru,
        ], env=GREP_ASCII),
+        Command('rg', ['rg', '-n', '-i', pat, ru]),
+        Command('grep', ['grep', '-E', '-ani', pat, ru], env=GREP_UNICODE),
+    ])
+
+
+def bench_subtitles_ru_surrounding_words(suite_dir):
+    '''
+    Benchmark a more complex regex with an inner literal.
+    '''
+    require(suite_dir, 'subtitles-en')
+    ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_RU_NAME)
+    pat = r'\w+\s+Холмс\s+\w+'
+
+    return Benchmark(pattern=pat, commands=[
+        Command('rg', ['rg', '-n', pat, ru]),
+        Command('grep', ['grep', '-E', '-an', pat, ru], env=GREP_UNICODE),
+        Command('ag (ASCII)', ['ag', '-s', pat, ru]),
+        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
+        Command('grep (ASCII)', [
+            'grep', '-E', '-an', pat, ru,
+        ], env=GREP_ASCII),
    ])


@@ -571,9 +692,10 @@ def bench_subtitles_ru_no_literal(suite_dir):

    return Benchmark(pattern=pat, commands=[
        Command('rg', ['rg', '-n', pat, ru]),
-        Command('rg (no line numbers)', ['rg', pat, ru]),
-        Command('ucg (no Unicode)', ['ucg', '--nosmart-case', pat, ru]),
-        Command('grep (no Unicode)', [
+        Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, ru]),
+        Command('ag (ASCII)', ['ag', '-s', pat, ru]),
+        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
+        Command('grep (ASCII)', [
            'grep', '-E', '-an', pat, ru,
        ], env=GREP_ASCII),
    ])
@@ -597,6 +719,23 @@ class MissingDependencies(Exception):
        return 'MissingDependency(%s)' % repr(self.missing_names)


+class MissingCommands(Exception):
+    '''
+    A missing command exception.
+
+    This exception occurs when running a command in a benchmark
+    where the command could not be found on the current system.
+
+    :ivar list(str) missing_names:
+        The names of the command binaries that could not be found.
+    '''
+    def __init__(self, missing_names):
+        self.missing_names = sorted(set(missing_names))
+
+    def __str__(self):
+        return 'MissingCommands(%s)' % repr(self.missing_names)
+
+
 class Benchmark(object):
    '''
    A single benchmark corresponding to a grouping of commands.
@@ -606,7 +745,8 @@ class Benchmark(object):
    '''

    def __init__(self, name=None, pattern=None, commands=None,
-                 warmup_count=1, count=3, line_count=True):
+                 warmup_count=1, count=3, line_count=True,
+                 allow_missing_commands=False):
        '''
        Create a single benchmark.

@@ -644,15 +784,37 @@ class Benchmark(object):
        self.warmup_count = warmup_count
        self.count = count
        self.line_count = line_count
+        self.allow_missing_commands = allow_missing_commands
+
+    def raise_if_missing(self):
+        '''
+        Raises a MissingCommands exception if applicable.
+
+        A MissingCommands exception is raised when the following
+        criteria are met: 1) allow_missing_commands is False, and 2) at
+        least one command in this benchmark could not be found on this
+        system.
+        '''
+        missing_commands = \
+            [c.binary_name for c in self.commands if not c.exists()]
+        if not self.allow_missing_commands and len(missing_commands) > 0:
+            raise MissingCommands(missing_commands)

    def run(self):
        '''
        Runs this benchmark and returns the results.

        :rtype: Result
+        :raises:
+            MissingCommands if any command doesn't exist.
+            (Unless allow_missing_commands is enabled.)
        '''
+        self.raise_if_missing()
        result = Result(self)
        for cmd in self.commands:
+            if self.allow_missing_commands and not cmd.exists():
+                # Skip this command if we're OK with it.
+                continue
            # Do a warmup first.
            for _ in range(self.warmup_count):
                self.run_one(cmd)
@@ -677,6 +839,8 @@ class Benchmark(object):
            it is the number of lines in the search output.
        :rtype: int
        '''
+        if not cmd.exists():
+            raise MissingCommand(cmd.cmd[0])
        cmd.kwargs['stderr'] = subprocess.DEVNULL
        if self.line_count:
            cmd.kwargs['stdout'] = subprocess.PIPE
@@ -746,6 +910,8 @@ class Result(object):
        means = []
        for cmd in self.benchmark.commands:
            mean, _ = self.distribution_for(cmd)
+            if mean is None:
+                continue
            means.append((cmd, mean))
        return min(means, key=lambda tup: tup[1])[0]

@@ -768,16 +934,18 @@ class Result(object):
        '''
        Returns the distribution (mean +/- std) of the given command.

+        If there are no samples for this command (i.e., it was skipped),
+        then return ``(None, None)``.
+
        :rtype: (float, float)
        :returns:
            A tuple containing the mean and standard deviation, in that
            order.
        '''
-        mean = statistics.mean(
-            s['duration'] for s in self.samples_for(cmd))
-        stdev = statistics.stdev(
-            s['duration'] for s in self.samples_for(cmd))
-        return mean, stdev
+        samples = list(s['duration'] for s in self.samples_for(cmd))
+        if len(samples) == 0:
+            return None, None
+        return statistics.mean(samples), statistics.stdev(samples)


 class Command(object):
@@ -807,6 +975,15 @@ class Command(object):
        self.args = args
        self.kwargs = kwargs

+    def exists(self):
+        'Returns true if and only if this command exists.'
+        return shutil.which(self.binary_name) is not None
+
+    @property
+    def binary_name(self):
+        'Return the binary name of this command.'
+        return self.cmd[0]
+
    def run(self):
        '''
        Runs this command and returns its status.
@@ -947,7 +1124,8 @@ def download(suite_dir, choices):
            sys.exit(1)


-def collect_benchmarks(suite_dir, filter_pat=None):
+def collect_benchmarks(suite_dir, filter_pat=None,
+                       allow_missing_commands=False):
    '''
    Return an iterable of all runnable benchmarks.

@@ -969,6 +1147,9 @@ def collect_benchmarks(suite_dir, filter_pat=None):
            continue
        try:
            benchmark = globals()[fun](suite_dir)
+            benchmark.name = name
+            benchmark.allow_missing_commands = allow_missing_commands
+            benchmark.raise_if_missing()
        except MissingDependencies as e:
            eprint(
                'missing: %s, skipping benchmark %s (try running with: %s)' % (
@@ -976,24 +1157,32 @@ def collect_benchmarks(suite_dir, filter_pat=None):
                    name,
                    ' '.join(['--download %s' % n for n in e.missing_names]),
                ))
+        except MissingCommands as e:
+            fmt = 'missing commands: %s, skipping benchmark %s ' \
+                  '(run with --allow-missing to run incomplete benchmarks)'
+            eprint(fmt % (', '.join(e.missing_names), name))
            continue
-        benchmark.name = name
        yield benchmark


 def main():
+    download_choices = ['all', 'linux', 'subtitles-en', 'subtitles-ru']
    p = argparse.ArgumentParser('Command line search tool benchmark suite.')
    p.add_argument(
        '--dir', metavar='PATH', default=os.getcwd(),
        help='The directory in which to download data and perform searches.')
    p.add_argument(
        '--download', metavar='CORPUS', action='append',
-        choices=['all', 'linux', 'subtitles-en', 'subtitles-ru'],
+        choices=download_choices,
        help='Download and prepare corpus data, then exit without running '
             'any benchmarks. Note that this command is intended to be '
             'idempotent. WARNING: This downloads over a gigabyte of data, '
             'and also includes building the Linux kernel. If "all" is used '
-             'then the total uncompressed size is around 13 GB.')
+             'then the total uncompressed size is around 13 GB. '
+             'Choices: %s' % ', '.join(download_choices))
+    p.add_argument(
+        '--allow-missing', action='store_true',
+        help='Permit benchmarks to run even if some commands are missing.')
    p.add_argument(
        '-f', '--force', action='store_true',
        help='Overwrite existing files if there is a conflict.')
@@ -1009,6 +1198,13 @@ def main():
        help='A regex pattern that will only run benchmarks that match.')
    args = p.parse_args()

+    if args.list:
+        benchmarks = collect_benchmarks(
+            args.dir, filter_pat=args.bench,
+            allow_missing_commands=args.allow_missing)
+        for b in benchmarks:
+            print(b.name)
+        sys.exit(0)
    if args.download is not None and len(args.download) > 0:
        download(args.dir, args.download)
        sys.exit(0)
@@ -1028,7 +1224,9 @@ def main():
        raw_csv_wtr = csv.DictWriter(raw_handle, fields)
        raw_csv_wtr.writerow({x: x for x in fields})

-    benchmarks = collect_benchmarks(args.dir, filter_pat=args.bench)
+    benchmarks = collect_benchmarks(
+        args.dir, filter_pat=args.bench,
+        allow_missing_commands=args.allow_missing)
    for i, b in enumerate(benchmarks):
        result = b.run()
        fastest_cmd = result.fastest_cmd()
@@ -1042,6 +1240,12 @@ def main():
        for cmd in b.commands:
            name = cmd.name
            mean, stdev = result.distribution_for(cmd)
+            if mean is None:
+                # If we couldn't get a distribution for this command then
+                # it was skipped.
+                print('{name:{pad}} SKIPPED'.format(
+                    name=name, pad=max_name_len + 2))
+                continue
            line_counts = result.line_counts_for(cmd)
            show_fast_cmd, show_line_counts = '', ''
            if fastest_cmd.name == cmd.name:
--- a/benchsuite/raw.csv
+++ b/benchsuite/raw.csv
--- a/benchsuite/summary
+++ b/benchsuite/summary
--- a/ctags.rust
+++ b/ctags.rust
@@ -1,11 +0,0 @@
--langdef=Rust
--langmap=Rust:.rs
--regex-Rust=/^[ \t]*(#\[[^\]]\][ \t]*)*(pub[ \t]+)?(extern[ \t]+)?("[^"]+"[ \t]+)?(unsafe[ \t]+)?fn[ \t]+([a-zA-Z0-9_]+)/\6/f,functions,function definitions/
--regex-Rust=/^[ \t]*(pub[ \t]+)?type[ \t]+([a-zA-Z0-9_]+)/\2/T,types,type definitions/
--regex-Rust=/^[ \t]*(pub[ \t]+)?enum[ \t]+([a-zA-Z0-9_]+)/\2/g,enum,enumeration names/
--regex-Rust=/^[ \t]*(pub[ \t]+)?struct[ \t]+([a-zA-Z0-9_]+)/\2/s,structure names/
--regex-Rust=/^[ \t]*(pub[ \t]+)?mod[ \t]+([a-zA-Z0-9_]+)/\2/m,modules,module names/
--regex-Rust=/^[ \t]*(pub[ \t]+)?static[ \t]+([a-zA-Z0-9_]+)/\2/c,consts,static constants/
--regex-Rust=/^[ \t]*(pub[ \t]+)?trait[ \t]+([a-zA-Z0-9_]+)/\2/t,traits,traits/
--regex-Rust=/^[ \t]*(pub[ \t]+)?impl([ \t\n]+<.*>)?[ \t]+([a-zA-Z0-9_]+)/\3/i,impls,trait implementations/
--regex-Rust=/^[ \t]*macro_rules![ \t]+([a-zA-Z0-9_]+)/\1/d,macros,macro definitions/
--- a/grep/Cargo.toml
+++ b/grep/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "grep"
-version = "0.1.0"  #:version
+version = "0.1.1"  #:version
 authors = ["Andrew Gallant <jamslam@gmail.com>"]
 description = """
 Fast line oriented regex searching as a library.
--- a/grep/src/lib.rs
+++ b/grep/src/lib.rs
@@ -62,7 +62,7 @@ impl fmt::Display for Error {
        match *self {
            Error::Regex(ref err) => err.fmt(f),
            Error::LiteralNotAllowed(chr) => {
-                write!(f, "Literal '{}' not allowed.", chr)
+                write!(f, "Literal {:?} not allowed.", chr)
            }
            Error::__Nonexhaustive => unreachable!(),
        }
--- a/grep/src/nonl.rs
+++ b/grep/src/nonl.rs
@@ -10,6 +10,10 @@ use {Error, Result};
 /// If `byte` is not an ASCII character (i.e., greater than `0x7F`), then this
 /// function panics.
 pub fn remove(expr: Expr, byte: u8) -> Result<Expr> {
+    // TODO(burntsushi): There is a bug in this routine where only `\n` is
+    // handled correctly. Namely, `AnyChar` and `AnyByte` need to be translated
+    // to proper character classes instead of the special `AnyCharNoNL` and
+    // `AnyByteNoNL` classes.
    use syntax::Expr::*;
    assert!(byte <= 0x7F);
    let chr = byte as char;
--- a/session.vim
+++ b/session.vim
@@ -1 +0,0 @@
-au BufWritePost *.rs silent!make ctags > /dev/null 2>&1
--- a/src/args.rs
+++ b/src/args.rs
@@ -124,6 +124,7 @@ Less common options:

    --no-ignore
        Don't respect ignore files (.gitignore, .rgignore, etc.)
+        This implies --no-ignore-parent.

    --no-ignore-parent
        Don't respect ignore files in parent directories.
@@ -338,7 +339,9 @@ impl RawArgs {
            line_number: !self.flag_no_line_number && self.flag_line_number,
            mmap: mmap,
            no_ignore: self.flag_no_ignore,
-            no_ignore_parent: self.flag_no_ignore_parent,
+            no_ignore_parent:
+                // --no-ignore implies --no-ignore-parent
+                self.flag_no_ignore_parent || self.flag_no_ignore,
            quiet: self.flag_quiet,
            replace: self.flag_replace.clone().map(|s| s.into_bytes()),
            text: self.flag_text,
--- a/src/gitignore.rs
+++ b/src/gitignore.rs
@@ -21,6 +21,7 @@ additional rules such as whitelists (prefix of `!`) or directory-only globs
 // TODO(burntsushi): Implement something similar, but for Mercurial. We can't
 // use this exact implementation because hgignore files are different.

+use std::cell::RefCell;
 use std::error::Error as StdError;
 use std::fmt;
 use std::fs::File;
@@ -30,6 +31,7 @@ use std::path::{Path, PathBuf};
 use regex;

 use glob;
+use pathutil::strip_prefix;

 /// Represents an error that can occur when parsing a gitignore file.
 #[derive(Debug)]
@@ -110,37 +112,37 @@ impl Gitignore {
    /// same directory as this gitignore file.
    pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
        let mut path = path.as_ref();
-        if let Ok(p) = path.strip_prefix(&self.root) {
+        if let Some(p) = strip_prefix("./", path) {
            path = p;
        }
-        self.matched_utf8(&*path.to_string_lossy(), is_dir)
+        if let Some(p) = strip_prefix(&self.root, path) {
+            path = p;
+        }
+        self.matched_stripped(path, is_dir)
    }

-    /// Like matched, but takes a path that has already been stripped and
-    /// converted to UTF-8.
-    pub fn matched_utf8(&self, path: &str, is_dir: bool) -> Match {
-        // A single regex with a bunch of alternations of glob patterns is
-        // unfortunately typically faster than a regex, so we use it as a
-        // first pass filter. We still need to run the RegexSet to get the most
-        // recently defined glob that matched.
-        if !self.set.is_match(path) {
-            return Match::None;
-        }
-        // The regex set can't actually pick the right glob that matched all
-        // on its own. In particular, some globs require that only directories
-        // can match. Thus, only accept a match from the regex set if the given
-        // path satisfies the corresponding glob's directory criteria.
-        for i in self.set.matches(path).iter().rev() {
-            let pat = &self.patterns[i];
-            if !pat.only_dir || is_dir {
-                return if pat.whitelist {
-                    Match::Whitelist(pat)
-                } else {
-                    Match::Ignored(pat)
-                };
+    /// Like matched, but takes a path that has already been stripped.
+    pub fn matched_stripped(&self, path: &Path, is_dir: bool) -> Match {
+        thread_local! {
+            static MATCHES: RefCell<Vec<usize>> = {
+                RefCell::new(vec![])
            }
-        }
-        Match::None
+        };
+        MATCHES.with(|matches| {
+            let mut matches = matches.borrow_mut();
+            self.set.matches_into(path, &mut *matches);
+            for &i in matches.iter().rev() {
+                let pat = &self.patterns[i];
+                if !pat.only_dir || is_dir {
+                    return if pat.whitelist {
+                        Match::Whitelist(pat)
+                    } else {
+                        Match::Ignored(pat)
+                    };
+                }
+            }
+            Match::None
+        })
    }

    /// Returns the total number of ignore patterns.
@@ -390,6 +392,7 @@ mod tests {
    ignored!(ig23, ROOT, "foo", "./foo");
    ignored!(ig24, ROOT, "target", "grep/target");
    ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
+    ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz");

    not_ignored!(ignot1, ROOT, "amonths", "months");
    not_ignored!(ignot2, ROOT, "monthsa", "months");
--- a/src/glob.rs
+++ b/src/glob.rs
@@ -26,13 +26,22 @@ to make its way into `glob` proper.
 // at the .gitignore for the chromium repo---just about every pattern satisfies
 // that assumption.)

+use std::borrow::Cow;
+use std::collections::HashMap;
 use std::error::Error as StdError;
+use std::ffi::{OsStr, OsString};
 use std::fmt;
+use std::hash;
 use std::iter;
+use std::path::Path;
 use std::str;

+use fnv;
 use regex;
-use regex::bytes::{Regex, RegexSet, SetMatches};
+use regex::bytes::Regex;
+use regex::bytes::RegexSet;
+
+use pathutil::file_name;

 /// Represents an error that can occur when parsing a glob pattern.
 #[derive(Clone, Debug, Eq, PartialEq)]
@@ -71,33 +80,181 @@ impl fmt::Display for Error {
    }
 }

+/// SetYesNo represents a group of globs that can be matched together in a
+/// single pass. SetYesNo can only determine whether a particular path matched
+/// any pattern in the set.
+#[derive(Clone, Debug)]
+pub struct SetYesNo {
+    re: Regex,
+}
+
+impl SetYesNo {
+    /// Returns true if and only if the given path matches at least one glob
+    /// in this set.
+    pub fn is_match<T: AsRef<Path>>(&self, path: T) -> bool {
+        self.re.is_match(&*path_bytes(path.as_ref()))
+    }
+
+    fn new(
+        pats: &[(Pattern, MatchOptions)],
+    ) -> Result<SetYesNo, regex::Error> {
+        let mut joined = String::new();
+        for &(ref p, ref o) in pats {
+            let part = format!("(?:{})", p.to_regex_with(o));
+            if !joined.is_empty() {
+                joined.push('|');
+            }
+            joined.push_str(&part);
+        }
+        Ok(SetYesNo { re: try!(Regex::new(&joined)) })
+    }
+}
+
+type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
+
 /// Set represents a group of globs that can be matched together in a single
 /// pass.
 #[derive(Clone, Debug)]
 pub struct Set {
-    re: Regex,
-    set: RegexSet,
+    yesno: SetYesNo,
+    exts: HashMap<OsString, Vec<usize>, Fnv>,
+    literals: HashMap<Vec<u8>, Vec<usize>, Fnv>,
+    base_literals: HashMap<Vec<u8>, Vec<usize>, Fnv>,
+    base_prefixes: Vec<Vec<u8>>,
+    base_prefixes_map: Vec<usize>,
+    base_suffixes: Vec<Vec<u8>>,
+    base_suffixes_map: Vec<usize>,
+    base_regexes: RegexSet,
+    base_regexes_map: Vec<usize>,
+    regexes: RegexSet,
+    regexes_map: Vec<usize>,
 }

 impl Set {
-    /// Returns true if and only if the given path matches at least one glob
-    /// in this set.
-    pub fn is_match<T: AsRef<[u8]>>(&self, path: T) -> bool {
-        self.re.is_match(path.as_ref())
-    }
-
-    /// Returns every glob pattern (by sequence number) that matches the given
-    /// path.
-    pub fn matches<T: AsRef<[u8]>>(&self, path: T) -> SetMatches {
-        // TODO(burntsushi): If we split this out into a separate crate, don't
-        // expose the regex::SetMatches type in the public API.
-        self.set.matches(path.as_ref())
-    }
-
-    /// Returns the number of glob patterns in this set.
+    /// Returns the sequence number of every glob pattern that matches the
+    /// given path.
    #[allow(dead_code)]
-    pub fn len(&self) -> usize {
-        self.set.len()
+    pub fn matches<T: AsRef<Path>>(&self, path: T) -> Vec<usize> {
+        let mut into = vec![];
+        self.matches_into(path, &mut into);
+        into
+    }
+
+    /// Adds the sequence number of every glob pattern that matches the given
+    /// path to the vec given.
+    pub fn matches_into<T: AsRef<Path>>(
+        &self,
+        path: T,
+        into: &mut Vec<usize>,
+    ) {
+        into.clear();
+        let path = path.as_ref();
+        let path_bytes = &*path_bytes(path);
+        let basename = file_name(path).map(|b| os_str_bytes(b));
+        if !self.yesno.is_match(path) {
+            return;
+        }
+        if !self.exts.is_empty() {
+            if let Some(ext) = path.extension() {
+                if let Some(matches) = self.exts.get(ext) {
+                    into.extend(matches.as_slice());
+                }
+            }
+        }
+        if !self.literals.is_empty() {
+            if let Some(matches) = self.literals.get(path_bytes) {
+                into.extend(matches.as_slice());
+            }
+        }
+        if !self.base_literals.is_empty() {
+            if let Some(ref basename) = basename {
+                if let Some(matches) = self.base_literals.get(&**basename) {
+                    into.extend(matches.as_slice());
+                }
+            }
+        }
+        if !self.base_prefixes.is_empty() {
+            if let Some(ref basename) = basename {
+                let basename = &**basename;
+                for (i, pre) in self.base_prefixes.iter().enumerate() {
+                    if pre.len() <= basename.len() && &**pre == &basename[0..pre.len()] {
+                        into.push(self.base_prefixes_map[i]);
+                    }
+                }
+            }
+        }
+        if !self.base_suffixes.is_empty() {
+            if let Some(ref basename) = basename {
+                let basename = &**basename;
+                for (i, suf) in self.base_suffixes.iter().enumerate() {
+                    if suf.len() > basename.len() {
+                        continue;
+                    }
+                    let (s, e) = (basename.len() - suf.len(), basename.len());
+                    if &**suf == &basename[s..e] {
+                        into.push(self.base_suffixes_map[i]);
+                    }
+                }
+            }
+        }
+        if let Some(ref basename) = basename {
+            for i in self.base_regexes.matches(&**basename) {
+                into.push(self.base_regexes_map[i]);
+            }
+        }
+        for i in self.regexes.matches(path_bytes) {
+            into.push(self.regexes_map[i]);
+        }
+        into.sort();
+    }
+
+    fn new(pats: &[(Pattern, MatchOptions)]) -> Result<Set, regex::Error> {
+        let fnv = Fnv::default();
+        let mut exts = HashMap::with_hasher(fnv.clone());
+        let mut literals = HashMap::with_hasher(fnv.clone());
+        let mut base_literals = HashMap::with_hasher(fnv.clone());
+        let (mut base_prefixes, mut base_prefixes_map) = (vec![], vec![]);
+        let (mut base_suffixes, mut base_suffixes_map) = (vec![], vec![]);
+        let (mut regexes, mut regexes_map) = (vec![], vec![]);
+        let (mut base_regexes, mut base_regexes_map) = (vec![], vec![]);
+        for (i, &(ref p, ref o)) in pats.iter().enumerate() {
+            if let Some(ext) = p.ext() {
+                exts.entry(ext).or_insert(vec![]).push(i);
+            } else if let Some(literal) = p.literal() {
+                literals.entry(literal.into_bytes()).or_insert(vec![]).push(i);
+            } else if let Some(literal) = p.base_literal() {
+                base_literals
+                    .entry(literal.into_bytes()).or_insert(vec![]).push(i);
+            } else if let Some(literal) = p.base_literal_prefix() {
+                base_prefixes.push(literal.into_bytes());
+                base_prefixes_map.push(i);
+            } else if let Some(literal) = p.base_literal_suffix() {
+                base_suffixes.push(literal.into_bytes());
+                base_suffixes_map.push(i);
+            } else if p.is_only_basename() {
+                let part = format!("(?:{})", p.to_regex_with(o));
+                base_regexes.push(part);
+                base_regexes_map.push(i);
+            } else {
+                let part = format!("(?:{})", p.to_regex_with(o));
+                regexes.push(part);
+                regexes_map.push(i);
+            }
+        }
+        Ok(Set {
+            yesno: try!(SetYesNo::new(pats)),
+            exts: exts,
+            literals: literals,
+            base_literals: base_literals,
+            base_prefixes: base_prefixes,
+            base_prefixes_map: base_prefixes_map,
+            base_suffixes: base_suffixes,
+            base_suffixes_map: base_suffixes_map,
+            base_regexes: try!(RegexSet::new(base_regexes)),
+            base_regexes_map: base_regexes_map,
+            regexes: try!(RegexSet::new(regexes)),
+            regexes_map: regexes_map,
+        })
    }
 }

@@ -119,19 +276,12 @@ impl SetBuilder {
    ///
    /// Once a matcher is built, no new patterns can be added to it.
    pub fn build(&self) -> Result<Set, regex::Error> {
-        let it = self.pats.iter().map(|&(ref p, ref o)| p.to_regex_with(o));
-        let set = try!(RegexSet::new(it));
+        Set::new(&self.pats)
+    }

-        let mut joined = String::new();
-        for &(ref p, ref o) in &self.pats {
-            let part = format!("(?:{})", p.to_regex_with(o));
-            if !joined.is_empty() {
-                joined.push('|');
-            }
-            joined.push_str(&part);
-        }
-        let re = try!(Regex::new(&joined));
-        Ok(Set { re: re, set: set })
+    /// Like `build`, but returns a matcher that can only answer yes/no.
+    pub fn build_yesno(&self) -> Result<SetYesNo, regex::Error> {
+        SetYesNo::new(&self.pats)
    }

    /// Add a new pattern to this set.
@@ -149,8 +299,21 @@ impl SetBuilder {
        pat: &str,
        opts: &MatchOptions,
    ) -> Result<(), Error> {
-        let pat = try!(Pattern::new(pat));
-        self.pats.push((pat, opts.clone()));
+        let parsed = try!(Pattern::new(pat));
+        // if let Some(ext) = parsed.ext() {
+            // eprintln!("ext :: {:?} :: {:?}", ext, pat);
+        // } else if let Some(lit) = parsed.literal() {
+            // eprintln!("literal :: {:?} :: {:?}", lit, pat);
+        // } else if let Some(lit) = parsed.base_literal() {
+            // eprintln!("base_literal :: {:?} :: {:?}", lit, pat);
+        // } else if let Some(lit) = parsed.base_literal_prefix() {
+            // eprintln!("base_literal :: {:?} :: {:?}", lit, pat);
+        // } else if let Some(lit) = parsed.base_literal_suffix() {
+            // eprintln!("base_literal :: {:?} :: {:?}", lit, pat);
+        // } else {
+            // eprintln!("regex :: {:?} :: {:?}", pat, parsed);
+        // }
+        self.pats.push((parsed, opts.clone()));
        Ok(())
    }
 }
@@ -204,6 +367,133 @@ impl Pattern {
        Ok(p.p)
    }

+    /// Returns an extension if this pattern exclusively matches it.
+    pub fn ext(&self) -> Option<OsString> {
+        if self.tokens.len() <= 3 {
+            return None;
+        }
+        match self.tokens.get(0) {
+            Some(&Token::RecursivePrefix) => {}
+            _ => return None,
+        }
+        match self.tokens.get(1) {
+            Some(&Token::ZeroOrMore) => {}
+            _ => return None,
+        }
+        match self.tokens.get(2) {
+            Some(&Token::Literal(c)) if c == '.' => {}
+            _ => return None,
+        }
+        let mut lit = OsString::new();
+        for t in self.tokens[3..].iter() {
+            match *t {
+                Token::Literal(c) if c == '/' || c == '\\' || c == '.' => {
+                    return None;
+                }
+                Token::Literal(c) => lit.push(c.to_string()),
+                _ => return None,
+            }
+        }
+        Some(lit)
+    }
+
+    /// Returns the pattern as a literal if and only if the pattern exclusiely
+    /// matches the basename of a file path *and* is a literal.
+    ///
+    /// The basic format of these patterns is `**/{literal}`, where `{literal}`
+    /// does not contain a path separator.
+    pub fn base_literal(&self) -> Option<String> {
+        match self.tokens.get(0) {
+            Some(&Token::RecursivePrefix) => {}
+            _ => return None,
+        }
+        let mut lit = String::new();
+        for t in &self.tokens[1..] {
+            match *t {
+                Token::Literal(c) if c == '/' || c == '\\' => return None,
+                Token::Literal(c) => lit.push(c),
+                _ => return None,
+            }
+        }
+        Some(lit)
+    }
+
+    /// Returns true if and only if this pattern only inspects the basename
+    /// of a path.
+    pub fn is_only_basename(&self) -> bool {
+        match self.tokens.get(0) {
+            Some(&Token::RecursivePrefix) => {}
+            _ => return false,
+        }
+        for t in &self.tokens[1..] {
+            match *t {
+                Token::Literal(c) if c == '/' || c == '\\' => return false,
+                Token::RecursivePrefix
+                | Token::RecursiveSuffix
+                | Token::RecursiveZeroOrMore => return false,
+                _ => {}
+            }
+        }
+        true
+    }
+
+    /// Returns the pattern as a literal if and only if the pattern must match
+    /// an entire path exactly.
+    ///
+    /// The basic format of these patterns is `{literal}`.
+    pub fn literal(&self) -> Option<String> {
+        let mut lit = String::new();
+        for t in &self.tokens {
+            match *t {
+                Token::Literal(c) => lit.push(c),
+                _ => return None,
+            }
+        }
+        Some(lit)
+    }
+
+    /// Returns a basename literal prefix of this pattern.
+    pub fn base_literal_prefix(&self) -> Option<String> {
+        match self.tokens.get(0) {
+            Some(&Token::RecursivePrefix) => {}
+            _ => return None,
+        }
+        match self.tokens.last() {
+            Some(&Token::ZeroOrMore) => {}
+            _ => return None,
+        }
+        let mut lit = String::new();
+        for t in &self.tokens[1..self.tokens.len()-1] {
+            match *t {
+                Token::Literal(c) if c == '/' || c == '\\' => return None,
+                Token::Literal(c) => lit.push(c),
+                _ => return None,
+            }
+        }
+        Some(lit)
+    }
+
+    /// Returns a basename literal suffix of this pattern.
+    pub fn base_literal_suffix(&self) -> Option<String> {
+        match self.tokens.get(0) {
+            Some(&Token::RecursivePrefix) => {}
+            _ => return None,
+        }
+        match self.tokens.get(1) {
+            Some(&Token::ZeroOrMore) => {}
+            _ => return None,
+        }
+        let mut lit = String::new();
+        for t in &self.tokens[2..] {
+            match *t {
+                Token::Literal(c) if c == '/' || c == '\\' => return None,
+                Token::Literal(c) => lit.push(c),
+                _ => return None,
+            }
+        }
+        Some(lit)
+    }
+
    /// Convert this pattern to a string that is guaranteed to be a valid
    /// regular expression and will represent the matching semantics of this
    /// glob pattern. This uses a default set of options.
@@ -415,13 +705,34 @@ impl<'a> Parser<'a> {
    }
 }

+fn path_bytes(path: &Path) -> Cow<[u8]> {
+    os_str_bytes(path.as_os_str())
+}
+
+#[cfg(unix)]
+fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
+    use std::os::unix::ffi::OsStrExt;
+    Cow::Borrowed(s.as_bytes())
+}
+
+#[cfg(not(unix))]
+fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
+    // TODO(burntsushi): On Windows, OS strings are probably UTF-16, so even
+    // if we could get at the raw bytes, they wouldn't be useful. We *must*
+    // convert to UTF-8 before doing path matching. Unfortunate, but necessary.
+    match s.to_string_lossy() {
+        Cow::Owned(s) => Cow::Owned(s.into_bytes()),
+        Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use std::path::Path;

    use regex::bytes::Regex;

-    use super::{Error, Pattern, MatchOptions, SetBuilder, Token};
+    use super::{Error, Pattern, MatchOptions, Set, SetBuilder, Token};
    use super::Token::*;

    macro_rules! syntax {
@@ -483,14 +794,42 @@ mod tests {
                let pat = Pattern::new($pat).unwrap();
                let path = &Path::new($path).to_str().unwrap();
                let re = Regex::new(&pat.to_regex_with(&$options)).unwrap();
-                // println!("PATTERN: {}", $pat);
-                // println!("REGEX: {:?}", re);
-                // println!("PATH: {}", path);
                assert!(!re.is_match(path.as_bytes()));
            }
        };
    }

+    macro_rules! ext {
+        ($name:ident, $pat:expr, $ext:expr) => {
+            #[test]
+            fn $name() {
+                let pat = Pattern::new($pat).unwrap();
+                let ext = pat.ext().map(|e| e.to_string_lossy().into_owned());
+                assert_eq!($ext, ext.as_ref().map(|s| &**s));
+            }
+        };
+    }
+
+    macro_rules! baseliteral {
+        ($name:ident, $pat:expr, $yes:expr) => {
+            #[test]
+            fn $name() {
+                let pat = Pattern::new($pat).unwrap();
+                assert_eq!($yes, pat.base_literal().is_some());
+            }
+        };
+    }
+
+    macro_rules! basesuffix {
+        ($name:ident, $pat:expr, $yes:expr) => {
+            #[test]
+            fn $name() {
+                let pat = Pattern::new($pat).unwrap();
+                assert_eq!($yes, pat.is_literal_suffix());
+            }
+        };
+    }
+
    fn class(s: char, e: char) -> Token {
        Class { negated: false, ranges: vec![(s, e)] }
    }
@@ -585,6 +924,26 @@ mod tests {
    toregex!(re10, "+", r"^\+$");
    toregex!(re11, "**", r"^.*$");

+    ext!(ext1, "**/*.rs", Some("rs"));
+
+    baseliteral!(lit1, "**", true);
+    baseliteral!(lit2, "**/a", true);
+    baseliteral!(lit3, "**/ab", true);
+    baseliteral!(lit4, "**/a*b", false);
+    baseliteral!(lit5, "z/**/a*b", false);
+    baseliteral!(lit6, "[ab]", false);
+    baseliteral!(lit7, "?", false);
+
+    /*
+    issuffix!(suf1, "", false);
+    issuffix!(suf2, "a", true);
+    issuffix!(suf3, "ab", true);
+    issuffix!(suf4, "*ab", true);
+    issuffix!(suf5, "*.ab", true);
+    issuffix!(suf6, "?.ab", true);
+    issuffix!(suf7, "ab*", false);
+    */
+
    matches!(match1, "a", "a");
    matches!(match2, "a*b", "a_b");
    matches!(match3, "a*b*c", "abc");
@@ -681,16 +1040,22 @@ mod tests {
        builder.add("src/lib.rs").unwrap();
        let set = builder.build().unwrap();

-        assert!(set.is_match("foo.c"));
-        assert!(set.is_match("src/foo.c"));
-        assert!(!set.is_match("foo.rs"));
-        assert!(!set.is_match("tests/foo.rs"));
-        assert!(set.is_match("src/foo.rs"));
-        assert!(set.is_match("src/grep/src/main.rs"));
+        fn is_match(set: &Set, s: &str) -> bool {
+            let mut matches = vec![];
+            set.matches_into(s, &mut matches);
+            !matches.is_empty()
+        }

-        assert_eq!(2, set.matches("src/lib.rs").iter().count());
-        assert!(set.matches("src/lib.rs").matched(0));
-        assert!(!set.matches("src/lib.rs").matched(1));
-        assert!(set.matches("src/lib.rs").matched(2));
+        assert!(is_match(&set, "foo.c"));
+        assert!(is_match(&set, "src/foo.c"));
+        assert!(!is_match(&set, "foo.rs"));
+        assert!(!is_match(&set, "tests/foo.rs"));
+        assert!(is_match(&set, "src/foo.rs"));
+        assert!(is_match(&set, "src/grep/src/main.rs"));
+
+        let matches = set.matches("src/lib.rs");
+        assert_eq!(2, matches.len());
+        assert_eq!(0, matches[0]);
+        assert_eq!(2, matches[1]);
    }
 }
--- a/src/ignore.rs
+++ b/src/ignore.rs
@@ -19,11 +19,11 @@ use std::io;
 use std::path::{Path, PathBuf};

 use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern};
+use pathutil::is_hidden;
 use types::Types;

 const IGNORE_NAMES: &'static [&'static str] = &[
    ".gitignore",
-    ".agignore",
    ".rgignore",
 ];

@@ -83,7 +83,10 @@ pub struct Ignore {
    overrides: Overrides,
    /// A file type matcher.
    types: Types,
+    /// Whether to ignore hidden files or not.
    ignore_hidden: bool,
+    /// When true, don't look at .gitignore or .agignore files for ignore
+    /// rules.
    no_ignore: bool,
 }

@@ -208,15 +211,17 @@ impl Ignore {
            debug!("{} ignored because it is hidden", path.display());
            return true;
        }
-        for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
-            let mat = id.matched(path, is_dir);
-            if let Some(is_ignored) = self.ignore_match(path, mat) {
-                if is_ignored {
-                    return true;
+        if !self.no_ignore {
+            for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
+                let mat = id.matched(path, is_dir);
+                if let Some(is_ignored) = self.ignore_match(path, mat) {
+                    if is_ignored {
+                        return true;
+                    }
+                    // If this path is whitelisted by an ignore, then
+                    // fallthrough and let the file type matcher have a say.
+                    break;
                }
-                // If this path is whitelisted by an ignore, then fallthrough
-                // and let the file type matcher have a say.
-                break;
            }
        }
        let mat = self.types.matched(path, is_dir);
@@ -361,8 +366,7 @@ impl Overrides {
        let path = path.as_ref();
        self.gi.as_ref()
            .map(|gi| {
-                let path = &*path.to_string_lossy();
-                let mat = gi.matched_utf8(path, is_dir).invert();
+                let mat = gi.matched_stripped(path, is_dir).invert();
                if mat.is_none() && !is_dir {
                    if gi.num_ignores() > 0 {
                        return Match::Ignored(&self.unmatched_pat);
@@ -374,14 +378,6 @@ impl Overrides {
    }
 }

-fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
-    if let Some(name) = path.as_ref().file_name() {
-        name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
-    } else {
-        false
-    }
-}
-
 #[cfg(test)]
 mod tests {
    use std::path::Path;
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,6 +1,7 @@
-extern crate crossbeam;
+extern crate deque;
 extern crate docopt;
 extern crate env_logger;
+extern crate fnv;
 extern crate grep;
 #[cfg(windows)]
 extern crate kernel32;
@@ -28,7 +29,7 @@ use std::result;
 use std::sync::{Arc, Mutex};
 use std::thread;

-use crossbeam::sync::chase_lev::{self, Steal, Stealer};
+use deque::{Stealer, Stolen};
 use grep::Grep;
 use memmap::{Mmap, Protection};
 use term::Terminal;
@@ -36,6 +37,7 @@ use walkdir::DirEntry;

 use args::Args;
 use out::{ColoredTerminal, Out};
+use pathutil::strip_prefix;
 use printer::Printer;
 use search_stream::InputBuffer;
 #[cfg(windows)]
@@ -60,6 +62,7 @@ mod gitignore;
 mod glob;
 mod ignore;
 mod out;
+mod pathutil;
 mod printer;
 mod search_buffer;
 mod search_stream;
@@ -97,8 +100,8 @@ fn run(args: Args) -> Result<u64> {
    let out = Arc::new(Mutex::new(args.out()));
    let mut workers = vec![];

-    let mut workq = {
-        let (workq, stealer) = chase_lev::deque();
+    let workq = {
+        let (workq, stealer) = deque::new();
        for _ in 0..args.threads() {
            let worker = MultiWorker {
                chan_work: stealer.clone(),
@@ -215,10 +218,10 @@ impl MultiWorker {
    fn run(mut self) -> u64 {
        loop {
            let work = match self.chan_work.steal() {
-                Steal::Empty | Steal::Abort => continue,
-                Steal::Data(Work::Quit) => break,
-                Steal::Data(Work::Stdin) => WorkReady::Stdin,
-                Steal::Data(Work::File(ent)) => {
+                Stolen::Empty | Stolen::Abort => continue,
+                Stolen::Data(Work::Quit) => break,
+                Stolen::Data(Work::Stdin) => WorkReady::Stdin,
+                Stolen::Data(Work::File(ent)) => {
                    match File::open(ent.path()) {
                        Ok(file) => WorkReady::DirFile(ent, file),
                        Err(err) => {
@@ -257,7 +260,7 @@ impl Worker {
            }
            WorkReady::DirFile(ent, file) => {
                let mut path = ent.path();
-                if let Ok(p) = path.strip_prefix("./") {
+                if let Some(p) = strip_prefix("./", path) {
                    path = p;
                }
                if self.args.mmap() {
@@ -268,7 +271,7 @@ impl Worker {
            }
            WorkReady::PathFile(path, file) => {
                let mut path = &*path;
-                if let Ok(p) = path.strip_prefix("./") {
+                if let Some(p) = strip_prefix("./", path) {
                    path = p;
                }
                if self.args.mmap() {
--- a/src/pathutil.rs
+++ b/src/pathutil.rs
@@ -0,0 +1,98 @@
+/*!
+The pathutil module provides platform specific operations on paths that are
+typically faster than the same operations as provided in std::path. In
+particular, we really want to avoid the costly operation of parsing the path
+into its constituent components. We give up on Windows, but on Unix, we deal
+with the raw bytes directly.
+
+On large repositories (like chromium), this can have a ~25% performance
+improvement on just listing the files to search (!).
+*/
+use std::ffi::OsStr;
+use std::path::Path;
+
+use memchr::memrchr;
+
+/// Strip `prefix` from the `path` and return the remainder.
+///
+/// If `path` doesn't have a prefix `prefix`, then return `None`.
+#[cfg(unix)]
+pub fn strip_prefix<'a, P: AsRef<Path>>(
+    prefix: P,
+    path: &'a Path,
+) -> Option<&'a Path> {
+    use std::os::unix::ffi::OsStrExt;
+
+    let prefix = prefix.as_ref().as_os_str().as_bytes();
+    let path = path.as_os_str().as_bytes();
+    if prefix.len() > path.len() || prefix != &path[0..prefix.len()] {
+        None
+    } else {
+        Some(&Path::new(OsStr::from_bytes(&path[prefix.len()..])))
+    }
+}
+
+/// Strip `prefix` from the `path` and return the remainder.
+///
+/// If `path` doesn't have a prefix `prefix`, then return `None`.
+#[cfg(not(unix))]
+pub fn strip_prefix<'a>(prefix: &Path, path: &'a Path) -> Option<&'a Path> {
+    path.strip_prefix(prefix).ok()
+}
+
+/// The final component of the path, if it is a normal file.
+///
+/// If the path terminates in ., .., or consists solely of a root of prefix,
+/// file_name will return None.
+#[cfg(unix)]
+pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
+    path: &'a P,
+) -> Option<&'a OsStr> {
+    use std::os::unix::ffi::OsStrExt;
+
+    let path = path.as_ref().as_os_str().as_bytes();
+    if path.is_empty() {
+        return None;
+    } else if path.len() == 1 && path[0] == b'.' {
+        return None;
+    } else if path.last() == Some(&b'.') {
+        return None;
+    } else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
+        return None;
+    }
+    let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
+    Some(OsStr::from_bytes(&path[last_slash..]))
+}
+
+/// The final component of the path, if it is a normal file.
+///
+/// If the path terminates in ., .., or consists solely of a root of prefix,
+/// file_name will return None.
+#[cfg(not(unix))]
+pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
+    path: &'a P,
+) -> Option<&'a OsStr> {
+    path.as_ref().file_name()
+}
+
+/// Returns true if and only if this file path is considered to be hidden.
+#[cfg(unix)]
+pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
+    use std::os::unix::ffi::OsStrExt;
+
+    if let Some(name) = file_name(path.as_ref()) {
+        name.as_bytes().get(0) == Some(&b'.')
+    } else {
+        false
+    }
+}
+
+/// Returns true if and only if this file path is considered to be hidden.
+#[cfg(not(unix))]
+pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
+    if let Some(name) = file_name(path) {
+        name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
+    } else {
+        false
+    }
+}
--- a/src/types.rs
+++ b/src/types.rs
@@ -151,8 +151,8 @@ impl FileTypeDef {
 /// Types is a file type matcher.
 #[derive(Clone, Debug)]
 pub struct Types {
-    selected: Option<glob::Set>,
-    negated: Option<glob::Set>,
+    selected: Option<glob::SetYesNo>,
+    negated: Option<glob::SetYesNo>,
    has_selected: bool,
    unmatched_pat: Pattern,
 }
@@ -165,8 +165,8 @@ impl Types {
    /// If has_selected is true, then at least one file type was selected.
    /// Therefore, any non-matches should be ignored.
    fn new(
-        selected: Option<glob::Set>,
-        negated: Option<glob::Set>,
+        selected: Option<glob::SetYesNo>,
+        negated: Option<glob::SetYesNo>,
        has_selected: bool,
    ) -> Types {
        Types {
@@ -268,7 +268,7 @@ impl TypesBuilder {
                        try!(bset.add_with(glob, &opts));
                    }
                }
-                Some(try!(bset.build()))
+                Some(try!(bset.build_yesno()))
            };
        let negated_globs =
            if self.negated.is_empty() {
@@ -287,7 +287,7 @@ impl TypesBuilder {
                        try!(bset.add_with(glob, &opts));
                    }
                }
-                Some(try!(bset.build()))
+                Some(try!(bset.build_yesno()))
            };
        Ok(Types::new(
            selected_globs, negated_globs, !self.selected.is_empty()))
--- a/src/walk.rs
+++ b/src/walk.rs
@@ -26,6 +26,7 @@ impl Iter {
    }

    /// Returns true if this entry should be skipped.
+    #[inline(always)]
    fn skip_entry(&self, ent: &DirEntry) -> bool {
        if ent.depth() == 0 {
            // Never skip the root directory.
@@ -41,6 +42,7 @@ impl Iter {
 impl Iterator for Iter {
    type Item = DirEntry;

+    #[inline(always)]
    fn next(&mut self) -> Option<DirEntry> {
        while let Some(ev) = self.it.next() {
            match ev {
@@ -108,6 +110,7 @@ impl From<WalkDir> for WalkEventIter {
 impl Iterator for WalkEventIter {
    type Item = walkdir::Result<WalkEvent>;

+    #[inline(always)]
    fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
        let dent = self.next.take().or_else(|| self.it.next());
        let depth = match dent {
Author	SHA1	Message	Date
Andrew Gallant	8f87a4e8ac	0.1.2	2016-09-17 11:36:11 -04:00
Andrew Gallant	d27d3e675f	bump grep	2016-09-17 11:34:27 -04:00
Andrew Gallant	bf5d873099	grep 0.1.1	2016-09-17 11:32:47 -04:00
Andrew Gallant	bc9d12c4c8	Improve ergonomics of benchsuite. The runner now detects if commands exist and permits running incomplete benchmarks. Also, explicitly use Python 3 since that's what default Ubuntu 16.04 seems to want.	2016-09-17 11:30:01 -04:00
Andrew Gallant	5a0c873f61	Fixing, polishing and adding benchmarks.	2016-09-16 21:02:46 -04:00
Andrew Gallant	65fec147d6	rename	2016-09-16 18:27:34 -04:00
Andrew Gallant	7fbf2f014c	Reorganize some files.	2016-09-16 18:22:35 -04:00
Andrew Gallant	d22a3ca3e5	Improve the "bad literal" error message. Incidentally, this was done by using the Debug impl for `char` instead of the Display impl. Cute. Fixes #5.	2016-09-16 18:12:00 -04:00
Andrew Gallant	e9ec52b7f9	Update walkdir	2016-09-16 17:56:44 -04:00
Andrew Gallant	0d14c74e63	Some minor performance tweaks. This includes moving basename-only globs into separate regexes. The hope is that if the regex processes less input, it will be faster.	2016-09-16 16:13:28 -04:00
Andrew Gallant	1c5884b2f9	try again...	2016-09-16 07:12:06 -04:00
Andrew Gallant	8203a80ac7	fix tests	2016-09-16 06:58:10 -04:00
Andrew Gallant	0e46171e3b	Rework glob sets. We try to reduce the pressure on regexes and offload some of it to Aho-Corasick or exact lookups.	2016-09-15 22:06:04 -04:00
Andrew Gallant	f5c85827ce	Don't traverse directory stack if we don't need to.	2016-09-15 12:40:28 -04:00
Andrew Gallant	7cefc55238	Remove .agignore from ignore file list.	2016-09-15 12:40:08 -04:00
Andrew Gallant	92c918ebd9	--no-ignore implies --no-ignore-parent	2016-09-14 14:33:37 -04:00
Andrew Gallant	c24f8fd50f	Replace crossbeam with deque. deque appears faster.	2016-09-14 07:40:46 -04:00
Andrew Gallant	73272cf8a6	notice	2016-09-13 21:23:22 -04:00
				`@@ -1 +0,0 @@`
				`au BufWritePost *.rs silent!make ctags > /dev/null 2>&1`