0.1.2

bump grep
grep 0.1.1
2025-08-02 05:02:01 -07:00 · 2016-09-17 11:36:11 -04:00 · 2016-09-17 11:34:27 -04:00 · 2016-09-17 11:32:47 -04:00 · 2016-09-17 11:30:01 -04:00 · 2016-09-16 21:02:46 -04:00
21 changed files with 1030 additions and 352 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1,24 +1,24 @@
 [root]
 name = "ripgrep"
-version = "0.1.0"
+version = "0.1.2"
 dependencies = [
- "crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
+ "deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "docopt 0.6.83 (registry+https://github.com/rust-lang/crates.io-index)",
- "env_logger 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
+ "env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
 "fnv 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
 "glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
- "grep 0.1.0",
+ "grep 0.1.1",
 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
 "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
 "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
 "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
 "memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "num_cpus 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
- "regex 0.1.76 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
 "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
 "term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
 "walkdir 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
 "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
@@ -31,9 +31,12 @@ dependencies = [
 ]
 [[package]]
-name = "crossbeam"
+name = "deque"
-version = "0.2.10"
+version = "0.3.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 [[package]]
 name = "docopt"
@@ -41,20 +44,25 @@ version = "0.6.83"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "regex 0.1.76 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
 "rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
 "strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 [[package]]
 name = "env_logger"
-version = "0.3.4"
+version = "0.3.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
- "regex 0.1.76 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 [[package]]
 name = "fnv"
 version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 [[package]]
 name = "fs2"
 version = "0.2.5"
@@ -72,12 +80,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 [[package]]
 name = "grep"
-version = "0.1.0"
+version = "0.1.1"
 dependencies = [
 "log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
 "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
 "memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
- "regex 0.1.76 (registry+https://github.com/rust-lang/crates.io-index)",
+ "regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
 "regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
@@ -126,7 +134,15 @@ dependencies = [
 [[package]]
 name = "num_cpus"
-version = "1.0.0"
+version = "1.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
 [[package]]
 name = "rand"
 version = "0.3.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -134,14 +150,14 @@ dependencies = [
 [[package]]
 name = "regex"
-version = "0.1.76"
+version = "0.1.77"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
 "memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
 "regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
 "simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
- "thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
+ "thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
 "utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
@@ -185,7 +201,7 @@ dependencies = [
 [[package]]
 name = "thread_local"
-version = "0.2.6"
+version = "0.2.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -198,7 +214,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 [[package]]
 name = "walkdir"
-version = "0.1.6"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -217,9 +233,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 [metadata]
 "checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
-"checksum crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0c5ea215664ca264da8a9d9c3be80d2eaf30923c259d03e870388eb927508f97"
+"checksum deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1614659040e711785ed8ea24219140654da1729f3ec8a47a9719d041112fe7bf"
 "checksum docopt 0.6.83 (registry+https://github.com/rust-lang/crates.io-index)" = "fc42c6077823a361410c37d47c2535b73a190cbe10838dc4f400fe87c10c8c3b"
-"checksum env_logger 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "82dcb9ceed3868a03b335657b85a159736c961900f7e7747d3b0b97b9ccb5ccb"
+"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
 "checksum fnv 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8e8af7b5408ab0c4910cad114c8f9eb454bf75df7afe8964307eeafb68a13a5e"
 "checksum fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "bcd414e5a1a979b931bb92f41b7a54106d3f6d2e6c253e9ce943b7cd468251ef"
 "checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
 "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
@@ -228,16 +245,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 "checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
 "checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
 "checksum memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f20f72ed93291a72e22e8b16bb18762183bb4943f0f483da5b8be1a9e8192752"
-"checksum num_cpus 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a859041cbf7a70ea1ece4b87d1a2c6ef364dcb68749c88db1f97304b9ec09d5f"
+"checksum num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8890e6084723d57d0df8d2720b0d60c6ee67d6c93e7169630e4371e88765dcad"
-"checksum regex 0.1.76 (registry+https://github.com/rust-lang/crates.io-index)" = "63b49f873f36ddc838d773972511e5fed2ef7350885af07d58e2f48ce8073dcd"
+"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"
 "checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665"
 "checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd"
 "checksum rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "6159e4e6e559c81bd706afe9c8fd68f547d3e851ce12e76b1de7914bab61691b"
 "checksum simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "63b5847c2d766ca7ce7227672850955802fabd779ba616aeabead4c2c3877023"
 "checksum strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "50c069df92e4b01425a8bf3576d5d417943a6a7272fbabaf5bd80b1aaa76442e"
 "checksum term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3deff8a2b3b6607d6d7cc32ac25c0b33709453ca9cceac006caac51e963cf94a"
 "checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
-"checksum thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "55dd963dbaeadc08aa7266bf7f91c3154a7805e32bb94b820b769d2ef3b4744d"
+"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
 "checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
-"checksum walkdir 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "d42144c31c9909882ce76e696b306b88a5b091721251137d5d522d1ef3da7cf9"
+"checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780"
 "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
 "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "ripgrep"
-version = "0.1.0"  #:version
+version = "0.1.2"  #:version
 authors = ["Andrew Gallant <jamslam@gmail.com>"]
 description = """
 Line oriented search tool using Rust's regex library. Combines the raw
@@ -23,10 +23,11 @@ name = "integration"
 path = "tests/tests.rs"
 [dependencies]
-crossbeam = "0.2"
+deque = "0.3"
 docopt = "0.6"
 env_logger = "0.3"
-grep = { version = "0.1", path = "grep" }
+fnv = "1.0"
 grep = { version = "0.1.1", path = "grep" }
 lazy_static = "0.2"
 libc = "0.2"
 log = "0.3"
@@ -36,7 +37,6 @@ num_cpus = "1"
 regex = "0.1.76"
 rustc-serialize = "0.3"
 term = "0.4"
 thread_local = "0.2"
 walkdir = "0.1"
 [target.'cfg(windows)'.dependencies]
--- a/14
+++ b/14
@@ -1,14 +0,0 @@
 all:
 	echo Nothing to do...
 ctags:
 	ctags --options=ctags.rust --languages=Rust src/*.rs src/*/*.rs
 docs:
 	cargo doc
 	in-dir ./target/doc fix-perms
 	rscp ./target/doc/* gopher:~/www/burntsushi.net/rustdoc/
 push:
 	git push origin master
 	git push github master
--- a/README.md
+++ b/README.md
@@ -1,3 +1,6 @@
 **UNDER DEVELOPMENT.**
 ripgrep (rg)
 ------------
-ripgrep combines the usability of the silver searcher with the raw speed of grep.
+ripgrep combines the usability of the silver searcher with the raw speed of
 grep.
--- a/benches/README.md
+++ b/benches/README.md
@@ -0,0 +1,5 @@
 These are internal microbenchmarks for tracking the peformance of individual
 components inside of ripgrep. At the moment, they aren't heavily used.
 For performance benchmarks of ripgrep proper, see the sibling `benchsuite`
 directory.
--- a/benchsuite/benchsuite
+++ b/benchsuite/benchsuite
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 '''
 benchsuite is a benchmark runner for comparing command line search tools.
@@ -10,6 +10,7 @@ import os
 import os.path as path
 from multiprocessing import cpu_count
 import re
 import shutil
 import statistics
 import subprocess
 import sys
@@ -39,13 +40,23 @@ LINUX_CLONE = 'git://github.com/BurntSushi/linux'
 GREP_ASCII = {'LC_ALL': 'C'}
 GREP_UNICODE = {'LC_ALL': 'en_US.UTF-8'}
 # Sift tries really hard to search everything by default. In our code search
 # benchmarks, we don't want that.
 SIFT = [
    'sift',
    '--binary-skip',
    '--exclude-files', '.*',
    '--exclude-files', '*.pdf',
 ]
 def bench_linux_literal_default(suite_dir):
    '''
    Benchmark the speed of a literal using *default* settings.
    This is a purposefully unfair benchmark for use in performance
-    analysis, but it is pedagogically useful.
+    analysis, but it is pedagogically useful to demonstrate how
    default behaviors differ.
    '''
    require(suite_dir, 'linux')
    cwd = path.join(suite_dir, LINUX_DIR)
@@ -55,8 +66,6 @@ def bench_linux_literal_default(suite_dir):
        kwargs['cwd'] = cwd
        return Command(*args, **kwargs)
    # N.B. This is a purposefully unfair benchmark for illustrative purposes
    # of how the default modes for each search tool differ.
    return Benchmark(pattern=pat, commands=[
        mkcmd('rg', ['rg', pat]),
        mkcmd('ag', ['ag', pat]),
@@ -64,10 +73,12 @@ def bench_linux_literal_default(suite_dir):
        # doesn't read gitignore files. Instead, it has a file whitelist
        # that happens to match up exactly with the gitignores for this search.
        mkcmd('ucg', ['ucg', pat]),
-        mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'C'}),
+        # I guess setting LC_ALL=en_US.UTF-8 probably isn't necessarily the
        # default, but I'd guess it to be on most desktop systems.
        mkcmd('pt', ['pt', pat]),
        # sift reports an extra line here for a binary file matched.
        mkcmd('sift', ['sift', pat]),
        mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'en_US.UTF-8'}),
    ])
@@ -76,8 +87,9 @@ def bench_linux_literal(suite_dir):
    Benchmark the speed of a literal, attempting to be fair.
    This tries to use the minimum set of options available in all tools
-    to test how fast they are. For example, it makes sure there is no
+    to test how fast they are. For example, it makes sure there is
-    case insensitive matching and that line numbers are computed.
+    no case insensitive matching and that line numbers are computed
    (because some tools don't permit disabling line numbers).
    '''
    require(suite_dir, 'linux')
    cwd = path.join(suite_dir, LINUX_DIR)
@@ -88,19 +100,16 @@ def bench_linux_literal(suite_dir):
        return Command(*args, **kwargs)
    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', pat]),
+        mkcmd('rg (ignore)', ['rg', '-n', pat]),
-        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
+        mkcmd('rg (ignore) (mmap)', ['rg', '-n', '--mmap', pat]),
-        mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
+        mkcmd('ag (ignore) (mmap)', ['ag', '-s', pat]),
-        mkcmd('ag', ['ag', '-s', pat]),
+        mkcmd('pt (ignore)', ['pt', pat]),
-        mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
+        mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
-        mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
+        mkcmd('git grep (ignore)', [
        mkcmd('git grep', [
            'git', 'grep', '-I', '-n', pat,
        ], env={'LC_ALL': 'C'}),
-        mkcmd('pt', ['pt', pat]),
+        mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
-        mkcmd('sift', [
+        mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
            'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
        ]),
    ])
@@ -120,23 +129,21 @@ def bench_linux_literal_casei(suite_dir):
        return Command(*args, **kwargs)
    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', '-i', pat]),
+        mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]),
-        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
+        mkcmd('rg (ignore) (mmap)', ['rg', '-n', '-i', '--mmap', pat]),
-        mkcmd('rg-novcs-mmap', [
+        mkcmd('ag (ignore) (mmap)', ['ag', '-i', pat]),
-            'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
+        mkcmd('sift (ignore)', SIFT + ['-n', '-i', '--git', pat]),
-        ]),
+        # It'd technically be more appropriate to set LC_ALL=en_US.UTF-8 here,
-        mkcmd('ag', ['ag', '-i', pat]),
+        # since that is certainly what ripgrep is doing, but this is for an
-        mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-i', pat]),
+        # ASCII literal, so we should give `git grep` all the opportunity to
-        mkcmd('ucg', ['ucg', '-i', pat]),
+        # do its best.
-        mkcmd('git grep', [
+        mkcmd('git grep (ignore)', [
            'git', 'grep', '-I', '-n', '-i', pat,
        ], env={'LC_ALL': 'C'}),
-        # sift yields more matches than it should here. Specifically, it gets
+        mkcmd('rg (whitelist)', [
-        # matches in Module.symvers and System.map in the repo root. Both of
+            'rg', '-n', '-i', '--no-ignore', '-tall', pat,
        # those files show up in the repo root's .gitignore file.
        mkcmd('sift', [
            'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-i', pat,
        ]),
        mkcmd('ucg (whitelist)', ['ucg', '-i', pat]),
    ])
@@ -156,20 +163,16 @@ def bench_linux_re_literal_suffix(suite_dir):
        return Command(*args, **kwargs)
    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', pat]),
+        mkcmd('rg (ignore)', ['rg', '-n', pat]),
-        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
+        mkcmd('ag (ignore)', ['ag', '-s', pat]),
-        mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
+        mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
        mkcmd('ag', ['ag', '-s', pat]),
        mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
        mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
        mkcmd(
-            'git grep',
+            'git grep (ignore)',
            ['git', 'grep', '-E', '-I', '-n', pat],
            env={'LC_ALL': 'C'},
        ),
-        mkcmd('sift', [
+        mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
-            'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
+        mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
        ]),
    ])
@@ -189,22 +192,18 @@ def bench_linux_word(suite_dir):
        return Command(*args, **kwargs)
    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', '-w', pat]),
+        mkcmd('rg (ignore)', ['rg', '-n', '-w', pat]),
-        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-w', pat]),
+        mkcmd('ag (ignore)', ['ag', '-s', '-w', pat]),
-        mkcmd('rg-novcs-mmap', [
+        mkcmd('sift (ignore)', SIFT + ['-n', '-w', '--git', pat]),
            'rg', '--mmap', '--no-ignore', '-n', '-w', pat,
        ]),
        mkcmd('ag', ['ag', '-s', '-w', pat]),
        mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', '-w', pat]),
        mkcmd('ucg', ['ucg', '--nosmart-case', '-w', pat]),
        mkcmd(
-            'git grep',
+            'git grep (ignore)',
            ['git', 'grep', '-E', '-I', '-n', '-w', pat],
            env={'LC_ALL': 'C'},
        ),
-        mkcmd('sift', [
+        mkcmd('rg (whitelist)', [
-            'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-w', pat,
+            'rg', '-n', '-w', '--no-ignore', '-tall', pat,
        ]),
        mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', '-w', pat]),
    ])
@@ -212,7 +211,8 @@ def bench_linux_unicode_greek(suite_dir):
    '''
    Benchmark matching of a Unicode category.
-    Only three tools (ripgrep, sift and pt) support this.
+    Only three tools (ripgrep, sift and pt) support this. We omit
    pt because it is too slow.
    '''
    require(suite_dir, 'linux')
    cwd = path.join(suite_dir, LINUX_DIR)
@@ -224,15 +224,7 @@ def bench_linux_unicode_greek(suite_dir):
    return Benchmark(pattern=pat, commands=[
        mkcmd('rg', ['rg', '-n', pat]),
-        # sift tries to search a bunch of PDF files and clutters up the
+        mkcmd('sift', SIFT + ['-n', '--git', pat]),
        # results, even though --binary-skip is provided. They are excluded
        # here explicitly, but don't have a measurable impact on performance.
        mkcmd('sift', [
            'sift', '-n', '--binary-skip',
            '--exclude-files', '.*',
            '--exclude-files', '*.pdf',
            pat,
        ]),
    ])
@@ -252,15 +244,7 @@ def bench_linux_unicode_greek_casei(suite_dir):
    return Benchmark(pattern=pat, commands=[
        mkcmd('rg', ['rg', '-n', '-i', pat]),
-        # sift tries to search a bunch of PDF files and clutters up the
+        mkcmd('sift', SIFT + ['-n', '-i', '--git', pat]),
        # results, even though --binary-skip is provided. They are excluded
        # here explicitly, but don't have a measurable impact on performance.
        mkcmd('sift', [
            'sift', '-n', '--binary-skip',
            '--exclude-files', '.*',
            '--exclude-files', '*.pdf',
            pat,
        ]),
    ])
@@ -281,30 +265,25 @@ def bench_linux_unicode_word(suite_dir):
        return Command(*args, **kwargs)
    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', pat]),
+        mkcmd('rg (ignore)', ['rg', '-n', pat]),
-        mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
+        mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
-        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
+        mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
-        mkcmd('rg-novcs-mmap', [
+        mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', pat]),
            'rg', '--mmap', '--no-ignore', '-n', pat,
        ]),
        mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
        mkcmd('ag-novcs (no Unicode)', [
            'ag', '--skip-vcs-ignores', '-s', pat,
        ]),
        mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
        mkcmd(
-            'git grep',
+            'git grep (ignore)',
            ['git', 'grep', '-E', '-I', '-n', pat],
            env={'LC_ALL': 'en_US.UTF-8'},
        ),
        mkcmd(
-            'git grep (no Unicode)',
+            'git grep (ignore) (ASCII)',
            ['git', 'grep', '-E', '-I', '-n', pat],
            env={'LC_ALL': 'C'},
        ),
-        mkcmd('sift (no Unicode)', [
+        mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
-            'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
+        mkcmd('rg (whitelist) (ASCII)', [
            'rg', '-n', '--no-ignore', '-tall', '(?-u)' + pat,
        ]),
        mkcmd('ucg (ASCII)', ['ucg', '--nosmart-case', pat]),
    ])
@@ -326,30 +305,25 @@ def bench_linux_no_literal(suite_dir):
        return Command(*args, **kwargs)
    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', pat]),
+        mkcmd('rg (ignore)', ['rg', '-n', pat]),
-        mkcmd('rg-whitelist', ['rg', '-tall', '--no-ignore', '-n', pat]),
+        mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
-        mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
+        mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
-        mkcmd('rg-whitelist (no Unicode)', [
+        mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', pat]),
            'rg', '-tall', '--no-ignore', '-n', '(?-u)' + pat,
        ]),
        mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
        mkcmd('ag-novcs (no Unicode)', [
            'ag', '--skip-vcs-ignores', '-s', pat,
        ]),
        mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
        mkcmd(
-            'git grep',
+            'git grep (ignore)',
            ['git', 'grep', '-E', '-I', '-n', pat],
            env={'LC_ALL': 'en_US.UTF-8'},
        ),
        mkcmd(
-            'git grep (no Unicode)',
+            'git grep (ignore) (ASCII)',
            ['git', 'grep', '-E', '-I', '-n', pat],
            env={'LC_ALL': 'C'},
        ),
-        mkcmd('sift (no Unicode)', [
+        mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
-            'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
+        mkcmd('rg (whitelist) (ASCII)', [
            'rg', '-n', '--no-ignore', '-tall', '(?-u)' + pat,
        ]),
        mkcmd('ucg (whitelist) (ASCII)', ['ucg', '--nosmart-case', pat]),
    ])
@@ -371,21 +345,15 @@ def bench_linux_alternates(suite_dir):
        return Command(*args, **kwargs)
    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', pat]),
+        mkcmd('rg (ignore)', ['rg', '-n', pat]),
-        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
+        mkcmd('ag (ignore)', ['ag', '-s', pat]),
        mkcmd('rg-novcs-mmap', [
            'rg', '--mmap', '--no-ignore', '-n', pat,
        ]),
        mkcmd('ag', ['ag', '-s', pat]),
        mkcmd('ag-novcs', [
            'ag', '--skip-vcs-ignores', '-s', pat,
        ]),
        mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
        mkcmd(
-            'git grep',
+            'git grep (ignore)',
            ['git', 'grep', '-E', '-I', '-n', pat],
            env={'LC_ALL': 'C'},
        ),
        mkcmd('rg (whitelist)', ['rg', '--no-ignore', '-n', pat]),
        mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
    ])
@@ -400,21 +368,15 @@ def bench_linux_alternates_casei(suite_dir):
        return Command(*args, **kwargs)
    return Benchmark(pattern=pat, commands=[
-        mkcmd('rg', ['rg', '-n', '-i', pat]),
+        mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]),
-        mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
+        mkcmd('ag (ignore)', ['ag', '-i', pat]),
        mkcmd('rg-novcs-mmap', [
            'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
        ]),
        mkcmd('ag', ['ag', '-i', pat]),
        mkcmd('ag-novcs', [
            'ag', '--skip-vcs-ignores', '-i', pat,
        ]),
        mkcmd('ucg', ['ucg', '-i', pat]),
        mkcmd(
-            'git grep',
+            'git grep (ignore)',
            ['git', 'grep', '-E', '-I', '-n', '-i', pat],
            env={'LC_ALL': 'C'},
        ),
        mkcmd('rg (whitelist)', ['rg', '--no-ignore', '-n', '-i', pat]),
        mkcmd('ucg (whitelist)', ['ucg', '-i', pat]),
    ])
@@ -423,22 +385,159 @@ def bench_subtitles_en_literal(suite_dir):
    Benchmark the speed of an ASCII string literal.
    '''
    require(suite_dir, 'subtitles-en')
-    ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
+    en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
    pat = 'Sherlock Holmes'
    return Benchmark(pattern=pat, commands=[
-        Command('rg', ['rg', '-n', pat, ru]),
+        Command('rg', ['rg', pat, en]),
-        Command('rg (no line numbers)', ['rg', pat, ru]),
+        Command('pt', ['pt', '-N', pat, en]),
-        Command('ag', ['ag', '-s', pat, ru]),
+        Command('sift', ['sift', pat, en]),
-        Command('ucg', ['ucg', '--nosmart-case', pat, ru]),
+        Command('grep', ['grep', '-a', pat, en], env=GREP_ASCII),
-        Command('grep', ['grep', '-an', pat, ru], env=GREP_ASCII),
+        Command('rg (lines)', ['rg', '-n', pat, en]),
-        Command('grep (no line numbers)', [
+        Command('ag (lines)', ['ag', '-s', pat, en]),
-            'grep', '-a', pat, ru,
+        Command('ucg (lines)', ['ucg', '--nosmart-case', pat, en]),
        Command('pt (lines)', ['pt', pat, en]),
        Command('sift (lines)', ['sift', '-n', pat, en]),
        Command('grep (lines)', ['grep', '-an', pat, en], env=GREP_ASCII),
    ])
 def bench_subtitles_en_literal_casei(suite_dir):
    '''
    Benchmark the speed of a Unicode-y string case insensitively.
    '''
    require(suite_dir, 'subtitles-en')
    en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
    pat = 'Sherlock Holmes'
    return Benchmark(pattern=pat, commands=[
        Command('rg', ['rg', '-i', pat, en]),
        Command('grep', ['grep', '-ai', pat, en], env=GREP_UNICODE),
        Command('grep (ASCII)', [
            'grep', '-E', '-ai', pat, en,
        ], env=GREP_ASCII),
        Command('rg (lines)', ['rg', '-n', '-i', pat, en]),
        Command('ag (lines) (ASCII)', ['ag', '-i', pat, en]),
        Command('ucg (lines) (ASCII)', ['ucg', '-i', pat, en]),
    ])
 def bench_subtitles_en_literal_word(suite_dir):
    '''
    Benchmark the speed of finding a literal inside word boundaries.
    '''
    require(suite_dir, 'subtitles-en')
    en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
    pat = 'Sherlock Holmes'
    return Benchmark(pattern=pat, commands=[
        Command('rg (ASCII)', [
            'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', en,
        ]),
        Command('ag (ASCII)', ['ag', '-sw', pat, en]),
        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
        Command('grep (ASCII)', [
            'grep', '-anw', pat, en,
        ], env=GREP_ASCII),
        Command('rg', ['rg', '-nw', pat, en]),
        Command('grep', ['grep', '-anw', pat, en], env=GREP_UNICODE),
    ])
 def bench_subtitles_en_alternate(suite_dir):
    '''
    Benchmark the speed of a set of alternate literals.
    '''
    require(suite_dir, 'subtitles-en')
    en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
    pat = '|'.join([
        'Sherlock Holmes',
        'John Watson',
        'Irene Adler',
        'Inspector Lestrade',
        'Professor Moriarty',
    ])
    return Benchmark(pattern=pat, commands=[
        Command('rg (lines)', ['rg', '-n', pat, en]),
        Command('ag (lines)', ['ag', '-s', pat, en]),
        Command('ucg (lines)', ['ucg', '--nosmart-case', pat, en]),
        Command('grep (lines)', [
            'grep', '-E', '-an', pat, en,
        ], env=GREP_ASCII),
        Command('rg', ['rg', pat, en]),
        Command('grep', [
            'grep', '-E', '-a', pat, en,
        ], env=GREP_ASCII),
    ])
 def bench_subtitles_en_alternate_casei(suite_dir):
    '''
    Benchmark the speed of a set of alternate literals.
    '''
    require(suite_dir, 'subtitles-en')
    en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
    pat = '|'.join([
        'Sherlock Holmes',
        'John Watson',
        'Irene Adler',
        'Inspector Lestrade',
        'Professor Moriarty',
    ])
    return Benchmark(pattern=pat, commands=[
        Command('ag (ASCII)', ['ag', '-s', '-i', pat, en]),
        Command('ucg (ASCII)', ['ucg', '-i', pat, en]),
        Command('grep (ASCII)', [
            'grep', '-E', '-ani', pat, en,
        ], env=GREP_ASCII),
        Command('rg', ['rg', '-n', '-i', pat, en]),
        Command('grep', ['grep', '-E', '-ani', pat, en], env=GREP_UNICODE),
    ])
 def bench_subtitles_en_surrounding_words(suite_dir):
    '''
    Benchmark a more complex regex with an inner literal.
    '''
    require(suite_dir, 'subtitles-en')
    en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
    pat = r'\w+\s+Holmes\s+\w+'
    return Benchmark(pattern=pat, commands=[
        Command('rg', ['rg', '-n', pat, en]),
        Command('grep', ['grep', '-E', '-an', pat, en], env=GREP_UNICODE),
        Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
        Command('ag (ASCII)', ['ag', '-s', pat, en]),
        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
        Command('grep (ASCII)', [
            'grep', '-E', '-an', pat, en,
        ], env=GREP_ASCII),
    ])
 def bench_subtitles_en_no_literal(suite_dir):
    '''
    Benchmark the speed of a regex with no literals.
    Note that we don't even try to run grep with Unicode support
    on this one. While it should eventually get the right answer,
    I killed it after it had already been running for two minutes
    and showed no signs of finishing soon.
    '''
    require(suite_dir, 'subtitles-en')
    en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
    pat = r'\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}'
    return Benchmark(pattern=pat, commands=[
        Command('rg', ['rg', '-n', pat, en]),
        Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
        Command('ag (ASCII)', ['ag', '-s', pat, en]),
        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
        Command('grep (ASCII)', [
            'grep', '-E', '-an', pat, en,
        ], env=GREP_ASCII),
        Command('pt', ['pt', pat, ru]),
        Command('pt (no line numbers)', ['pt', '-N', pat, ru]),
        Command('sift', ['sift', '-n', pat, ru]),
        Command('sift (no line numbers)', ['sift', pat, ru]),
    ])
@@ -451,18 +550,16 @@ def bench_subtitles_ru_literal(suite_dir):
    pat = 'Шерлок Холмс'  # Sherlock Holmes
    return Benchmark(pattern=pat, commands=[
-        Command('rg', ['rg', '-n', pat, ru]),
+        Command('rg', ['rg', pat, ru]),
-        Command('rg (no line numbers)', ['rg', pat, ru]),
+        Command('pt', ['pt', '-N', pat, ru]),
-        Command('ag', ['ag', '-s', pat, ru]),
+        Command('sift', ['sift', pat, ru]),
-        Command('ucg', ['ucg', '--nosmart-case', pat, ru]),
+        Command('grep', ['grep', '-a', pat, ru], env=GREP_ASCII),
-        Command('grep', ['grep', '-an', pat, ru], env=GREP_ASCII),
+        Command('rg (lines)', ['rg', '-n', pat, ru]),
-        Command('grep (no line numbers)', [
+        Command('ag (lines)', ['ag', '-s', pat, ru]),
-            'grep', '-a', pat, ru,
+        Command('ucg (lines)', ['ucg', '--nosmart-case', pat, ru]),
-        ], env=GREP_ASCII),
+        Command('pt (lines)', ['pt', pat, ru]),
-        Command('pt', ['pt', pat, ru]),
+        Command('sift (lines)', ['sift', '-n', pat, ru]),
-        Command('pt (no line numbers)', ['pt', '-N', pat, ru]),
+        Command('grep (lines)', ['grep', '-an', pat, ru], env=GREP_ASCII),
        Command('sift', ['sift', '-n', pat, ru]),
        Command('sift (no line numbers)', ['sift', pat, ru]),
    ])
@@ -475,13 +572,14 @@ def bench_subtitles_ru_literal_casei(suite_dir):
    pat = 'Шерлок Холмс'  # Sherlock Holmes
    return Benchmark(pattern=pat, commands=[
-        Command('rg', ['rg', '-n', '-i', pat, ru]),
+        Command('rg', ['rg', '-i', pat, ru]),
-        Command('ag (not Unicode)', ['ag', '-i', pat, ru]),
+        Command('grep', ['grep', '-ai', pat, ru], env=GREP_UNICODE),
-        Command('ucg (not Unicode)', ['ucg', '-i', pat, ru]),
+        Command('grep (ASCII)', [
-        Command('grep', ['grep', '-ani', pat, ru], env=GREP_UNICODE),
+            'grep', '-E', '-ai', pat, ru,
        Command('grep (not Unicode)', [
            'grep', '-E', '-ani', pat, ru,
        ], env=GREP_ASCII),
        Command('rg (lines)', ['rg', '-n', '-i', pat, ru]),
        Command('ag (lines) (ASCII)', ['ag', '-i', pat, ru]),
        Command('ucg (lines) (ASCII)', ['ucg', '-i', pat, ru]),
    ])
@@ -494,15 +592,15 @@ def bench_subtitles_ru_literal_word(suite_dir):
    pat = 'Шерлок Холмс'  # Sherlock Holmes
    return Benchmark(pattern=pat, commands=[
-        Command('rg', ['rg', '-nw', pat, ru]),
+        Command('rg (ASCII)', [
        Command('rg (not Unicode)', [
            'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', ru,
        ]),
-        Command('ag (not Unicode)', ['ag', '-sw', pat, ru]),
+        Command('ag (ASCII)', ['ag', '-sw', pat, ru]),
-        Command('ucg (not Unicode)', ['ucg', '--nosmart-case', pat, ru]),
+        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
-        Command('grep (not Unicode)', [
+        Command('grep (ASCII)', [
            'grep', '-anw', pat, ru,
        ], env=GREP_ASCII),
        Command('rg', ['rg', '-nw', pat, ru]),
        Command('grep', ['grep', '-anw', pat, ru], env=GREP_UNICODE),
    ])
@@ -522,11 +620,14 @@ def bench_subtitles_ru_alternate(suite_dir):
    ])
    return Benchmark(pattern=pat, commands=[
-        Command('rg', ['rg', '-n', pat, ru]),
+        Command('rg (lines)', ['rg', '-n', pat, ru]),
-        Command('rg (no line numbers)', ['rg', pat, ru]),
+        Command('ag (lines)', ['ag', '-s', pat, ru]),
-        Command('ucg', ['ucg', '--nosmart-case', pat, ru]),
+        Command('ucg (lines)', ['ucg', '--nosmart-case', pat, ru]),
-        Command('grep', ['grep', '-E', '-an', pat, ru], env=GREP_ASCII),
+        Command('grep (lines)', [
-        Command('grep (no line numbers)', [
+            'grep', '-E', '-an', pat, ru,
        ], env=GREP_ASCII),
        Command('rg', ['rg', pat, ru]),
        Command('grep', [
            'grep', '-E', '-a', pat, ru,
        ], env=GREP_ASCII),
    ])
@@ -547,12 +648,32 @@ def bench_subtitles_ru_alternate_casei(suite_dir):
    ])
    return Benchmark(pattern=pat, commands=[
-        Command('rg', ['rg', '-n', '-i', pat, ru]),
+        Command('ag (ASCII)', ['ag', '-s', '-i', pat, ru]),
-        Command('ucg (not Unicode)', ['ucg', '-i', pat, ru]),
+        Command('ucg (ASCII)', ['ucg', '-i', pat, ru]),
-        Command('grep', ['grep', '-E', '-ani', pat, ru], env=GREP_UNICODE),
+        Command('grep (ASCII)', [
        Command('grep (not Unicode)', [
            'grep', '-E', '-ani', pat, ru,
        ], env=GREP_ASCII),
        Command('rg', ['rg', '-n', '-i', pat, ru]),
        Command('grep', ['grep', '-E', '-ani', pat, ru], env=GREP_UNICODE),
    ])
 def bench_subtitles_ru_surrounding_words(suite_dir):
    '''
    Benchmark a more complex regex with an inner literal.
    '''
    require(suite_dir, 'subtitles-en')
    ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_RU_NAME)
    pat = r'\w+\s+Холмс\s+\w+'
    return Benchmark(pattern=pat, commands=[
        Command('rg', ['rg', '-n', pat, ru]),
        Command('grep', ['grep', '-E', '-an', pat, ru], env=GREP_UNICODE),
        Command('ag (ASCII)', ['ag', '-s', pat, ru]),
        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
        Command('grep (ASCII)', [
            'grep', '-E', '-an', pat, ru,
        ], env=GREP_ASCII),
    ])
@@ -571,9 +692,10 @@ def bench_subtitles_ru_no_literal(suite_dir):
    return Benchmark(pattern=pat, commands=[
        Command('rg', ['rg', '-n', pat, ru]),
-        Command('rg (no line numbers)', ['rg', pat, ru]),
+        Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, ru]),
-        Command('ucg (no Unicode)', ['ucg', '--nosmart-case', pat, ru]),
+        Command('ag (ASCII)', ['ag', '-s', pat, ru]),
-        Command('grep (no Unicode)', [
+        Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
        Command('grep (ASCII)', [
            'grep', '-E', '-an', pat, ru,
        ], env=GREP_ASCII),
    ])
@@ -597,6 +719,23 @@ class MissingDependencies(Exception):
        return 'MissingDependency(%s)' % repr(self.missing_names)
 class MissingCommands(Exception):
    '''
    A missing command exception.
    This exception occurs when running a command in a benchmark
    where the command could not be found on the current system.
    :ivar list(str) missing_names:
        The names of the command binaries that could not be found.
    '''
    def __init__(self, missing_names):
        self.missing_names = sorted(set(missing_names))
    def __str__(self):
        return 'MissingCommands(%s)' % repr(self.missing_names)
 class Benchmark(object):
    '''
    A single benchmark corresponding to a grouping of commands.
@@ -606,7 +745,8 @@ class Benchmark(object):
    '''
    def __init__(self, name=None, pattern=None, commands=None,
-                 warmup_count=1, count=3, line_count=True):
+                 warmup_count=1, count=3, line_count=True,
                 allow_missing_commands=False):
        '''
        Create a single benchmark.
@@ -644,15 +784,37 @@ class Benchmark(object):
        self.warmup_count = warmup_count
        self.count = count
        self.line_count = line_count
        self.allow_missing_commands = allow_missing_commands
    def raise_if_missing(self):
        '''
        Raises a MissingCommands exception if applicable.
        A MissingCommands exception is raised when the following
        criteria are met: 1) allow_missing_commands is False, and 2) at
        least one command in this benchmark could not be found on this
        system.
        '''
        missing_commands = \
            [c.binary_name for c in self.commands if not c.exists()]
        if not self.allow_missing_commands and len(missing_commands) > 0:
            raise MissingCommands(missing_commands)
    def run(self):
        '''
        Runs this benchmark and returns the results.
        :rtype: Result
        :raises:
            MissingCommands if any command doesn't exist.
            (Unless allow_missing_commands is enabled.)
        '''
        self.raise_if_missing()
        result = Result(self)
        for cmd in self.commands:
            if self.allow_missing_commands and not cmd.exists():
                # Skip this command if we're OK with it.
                continue
            # Do a warmup first.
            for _ in range(self.warmup_count):
                self.run_one(cmd)
@@ -677,6 +839,8 @@ class Benchmark(object):
            it is the number of lines in the search output.
        :rtype: int
        '''
        if not cmd.exists():
            raise MissingCommand(cmd.cmd[0])
        cmd.kwargs['stderr'] = subprocess.DEVNULL
        if self.line_count:
            cmd.kwargs['stdout'] = subprocess.PIPE
@@ -746,6 +910,8 @@ class Result(object):
        means = []
        for cmd in self.benchmark.commands:
            mean, _ = self.distribution_for(cmd)
            if mean is None:
                continue
            means.append((cmd, mean))
        return min(means, key=lambda tup: tup[1])[0]
@@ -768,16 +934,18 @@ class Result(object):
        '''
        Returns the distribution (mean +/- std) of the given command.
        If there are no samples for this command (i.e., it was skipped),
        then return ``(None, None)``.
        :rtype: (float, float)
        :returns:
            A tuple containing the mean and standard deviation, in that
            order.
        '''
-        mean = statistics.mean(
+        samples = list(s['duration'] for s in self.samples_for(cmd))
-            s['duration'] for s in self.samples_for(cmd))
+        if len(samples) == 0:
-        stdev = statistics.stdev(
+            return None, None
-            s['duration'] for s in self.samples_for(cmd))
+        return statistics.mean(samples), statistics.stdev(samples)
        return mean, stdev
 class Command(object):
@@ -807,6 +975,15 @@ class Command(object):
        self.args = args
        self.kwargs = kwargs
    def exists(self):
        'Returns true if and only if this command exists.'
        return shutil.which(self.binary_name) is not None
    @property
    def binary_name(self):
        'Return the binary name of this command.'
        return self.cmd[0]
    def run(self):
        '''
        Runs this command and returns its status.
@@ -947,7 +1124,8 @@ def download(suite_dir, choices):
            sys.exit(1)
-def collect_benchmarks(suite_dir, filter_pat=None):
+def collect_benchmarks(suite_dir, filter_pat=None,
                       allow_missing_commands=False):
    '''
    Return an iterable of all runnable benchmarks.
@@ -969,6 +1147,9 @@ def collect_benchmarks(suite_dir, filter_pat=None):
            continue
        try:
            benchmark = globals()[fun](suite_dir)
            benchmark.name = name
            benchmark.allow_missing_commands = allow_missing_commands
            benchmark.raise_if_missing()
        except MissingDependencies as e:
            eprint(
                'missing: %s, skipping benchmark %s (try running with: %s)' % (
@@ -976,24 +1157,32 @@ def collect_benchmarks(suite_dir, filter_pat=None):
                    name,
                    ' '.join(['--download %s' % n for n in e.missing_names]),
                ))
        except MissingCommands as e:
            fmt = 'missing commands: %s, skipping benchmark %s ' \
                  '(run with --allow-missing to run incomplete benchmarks)'
            eprint(fmt % (', '.join(e.missing_names), name))
            continue
        benchmark.name = name
        yield benchmark
 def main():
    download_choices = ['all', 'linux', 'subtitles-en', 'subtitles-ru']
    p = argparse.ArgumentParser('Command line search tool benchmark suite.')
    p.add_argument(
        '--dir', metavar='PATH', default=os.getcwd(),
        help='The directory in which to download data and perform searches.')
    p.add_argument(
        '--download', metavar='CORPUS', action='append',
-        choices=['all', 'linux', 'subtitles-en', 'subtitles-ru'],
+        choices=download_choices,
        help='Download and prepare corpus data, then exit without running '
             'any benchmarks. Note that this command is intended to be '
             'idempotent. WARNING: This downloads over a gigabyte of data, '
             'and also includes building the Linux kernel. If "all" is used '
-             'then the total uncompressed size is around 13 GB.')
+             'then the total uncompressed size is around 13 GB. '
             'Choices: %s' % ', '.join(download_choices))
    p.add_argument(
        '--allow-missing', action='store_true',
        help='Permit benchmarks to run even if some commands are missing.')
    p.add_argument(
        '-f', '--force', action='store_true',
        help='Overwrite existing files if there is a conflict.')
@@ -1009,6 +1198,13 @@ def main():
        help='A regex pattern that will only run benchmarks that match.')
    args = p.parse_args()
    if args.list:
        benchmarks = collect_benchmarks(
            args.dir, filter_pat=args.bench,
            allow_missing_commands=args.allow_missing)
        for b in benchmarks:
            print(b.name)
        sys.exit(0)
    if args.download is not None and len(args.download) > 0:
        download(args.dir, args.download)
        sys.exit(0)
@@ -1028,7 +1224,9 @@ def main():
        raw_csv_wtr = csv.DictWriter(raw_handle, fields)
        raw_csv_wtr.writerow({x: x for x in fields})
-    benchmarks = collect_benchmarks(args.dir, filter_pat=args.bench)
+    benchmarks = collect_benchmarks(
        args.dir, filter_pat=args.bench,
        allow_missing_commands=args.allow_missing)
    for i, b in enumerate(benchmarks):
        result = b.run()
        fastest_cmd = result.fastest_cmd()
@@ -1042,6 +1240,12 @@ def main():
        for cmd in b.commands:
            name = cmd.name
            mean, stdev = result.distribution_for(cmd)
            if mean is None:
                # If we couldn't get a distribution for this command then
                # it was skipped.
                print('{name:{pad}} SKIPPED'.format(
                    name=name, pad=max_name_len + 2))
                continue
            line_counts = result.line_counts_for(cmd)
            show_fast_cmd, show_line_counts = '', ''
            if fastest_cmd.name == cmd.name:
--- a/benchsuite/raw.csv
+++ b/benchsuite/raw.csv
--- a/benchsuite/summary
+++ b/benchsuite/summary
--- a/ctags.rust
+++ b/ctags.rust
@@ -1,11 +0,0 @@
 --langdef=Rust
 --langmap=Rust:.rs
 --regex-Rust=/^[ \t]*(#\[[^\]]\][ \t]*)*(pub[ \t]+)?(extern[ \t]+)?("[^"]+"[ \t]+)?(unsafe[ \t]+)?fn[ \t]+([a-zA-Z0-9_]+)/\6/f,functions,function definitions/
 --regex-Rust=/^[ \t]*(pub[ \t]+)?type[ \t]+([a-zA-Z0-9_]+)/\2/T,types,type definitions/
 --regex-Rust=/^[ \t]*(pub[ \t]+)?enum[ \t]+([a-zA-Z0-9_]+)/\2/g,enum,enumeration names/
 --regex-Rust=/^[ \t]*(pub[ \t]+)?struct[ \t]+([a-zA-Z0-9_]+)/\2/s,structure names/
 --regex-Rust=/^[ \t]*(pub[ \t]+)?mod[ \t]+([a-zA-Z0-9_]+)/\2/m,modules,module names/
 --regex-Rust=/^[ \t]*(pub[ \t]+)?static[ \t]+([a-zA-Z0-9_]+)/\2/c,consts,static constants/
 --regex-Rust=/^[ \t]*(pub[ \t]+)?trait[ \t]+([a-zA-Z0-9_]+)/\2/t,traits,traits/
 --regex-Rust=/^[ \t]*(pub[ \t]+)?impl([ \t\n]+<.*>)?[ \t]+([a-zA-Z0-9_]+)/\3/i,impls,trait implementations/
 --regex-Rust=/^[ \t]*macro_rules![ \t]+([a-zA-Z0-9_]+)/\1/d,macros,macro definitions/
--- a/grep/Cargo.toml
+++ b/grep/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "grep"
-version = "0.1.0"  #:version
+version = "0.1.1"  #:version
 authors = ["Andrew Gallant <jamslam@gmail.com>"]
 description = """
 Fast line oriented regex searching as a library.
--- a/grep/src/lib.rs
+++ b/grep/src/lib.rs
@@ -62,7 +62,7 @@ impl fmt::Display for Error {
        match *self {
            Error::Regex(ref err) => err.fmt(f),
            Error::LiteralNotAllowed(chr) => {
-                write!(f, "Literal '{}' not allowed.", chr)
+                write!(f, "Literal {:?} not allowed.", chr)
            }
            Error::__Nonexhaustive => unreachable!(),
        }
--- a/grep/src/nonl.rs
+++ b/grep/src/nonl.rs
@@ -10,6 +10,10 @@ use {Error, Result};
 /// If `byte` is not an ASCII character (i.e., greater than `0x7F`), then this
 /// function panics.
 pub fn remove(expr: Expr, byte: u8) -> Result<Expr> {
    // TODO(burntsushi): There is a bug in this routine where only `\n` is
    // handled correctly. Namely, `AnyChar` and `AnyByte` need to be translated
    // to proper character classes instead of the special `AnyCharNoNL` and
    // `AnyByteNoNL` classes.
    use syntax::Expr::*;
    assert!(byte <= 0x7F);
    let chr = byte as char;
--- a/session.vim
+++ b/session.vim
@@ -1 +0,0 @@
 au BufWritePost *.rs silent!make ctags > /dev/null 2>&1
--- a/src/args.rs
+++ b/src/args.rs
@@ -124,6 +124,7 @@ Less common options:
    --no-ignore
        Don't respect ignore files (.gitignore, .rgignore, etc.)
        This implies --no-ignore-parent.
    --no-ignore-parent
        Don't respect ignore files in parent directories.
@@ -338,7 +339,9 @@ impl RawArgs {
            line_number: !self.flag_no_line_number && self.flag_line_number,
            mmap: mmap,
            no_ignore: self.flag_no_ignore,
-            no_ignore_parent: self.flag_no_ignore_parent,
+            no_ignore_parent:
                // --no-ignore implies --no-ignore-parent
                self.flag_no_ignore_parent || self.flag_no_ignore,
            quiet: self.flag_quiet,
            replace: self.flag_replace.clone().map(|s| s.into_bytes()),
            text: self.flag_text,
--- a/src/gitignore.rs
+++ b/src/gitignore.rs
@@ -21,6 +21,7 @@ additional rules such as whitelists (prefix of `!`) or directory-only globs
 // TODO(burntsushi): Implement something similar, but for Mercurial. We can't
 // use this exact implementation because hgignore files are different.
 use std::cell::RefCell;
 use std::error::Error as StdError;
 use std::fmt;
 use std::fs::File;
@@ -30,6 +31,7 @@ use std::path::{Path, PathBuf};
 use regex;
 use glob;
 use pathutil::strip_prefix;
 /// Represents an error that can occur when parsing a gitignore file.
 #[derive(Debug)]
@@ -110,37 +112,37 @@ impl Gitignore {
    /// same directory as this gitignore file.
    pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
        let mut path = path.as_ref();
-        if let Ok(p) = path.strip_prefix(&self.root) {
+        if let Some(p) = strip_prefix("./", path) {
            path = p;
        }
-        self.matched_utf8(&*path.to_string_lossy(), is_dir)
+        if let Some(p) = strip_prefix(&self.root, path) {
            path = p;
        }
        self.matched_stripped(path, is_dir)
    }
-    /// Like matched, but takes a path that has already been stripped and
+    /// Like matched, but takes a path that has already been stripped.
-    /// converted to UTF-8.
+    pub fn matched_stripped(&self, path: &Path, is_dir: bool) -> Match {
-    pub fn matched_utf8(&self, path: &str, is_dir: bool) -> Match {
+        thread_local! {
-        // A single regex with a bunch of alternations of glob patterns is
+            static MATCHES: RefCell<Vec<usize>> = {
-        // unfortunately typically faster than a regex, so we use it as a
+                RefCell::new(vec![])
        // first pass filter. We still need to run the RegexSet to get the most
        // recently defined glob that matched.
        if !self.set.is_match(path) {
            return Match::None;
        }
        // The regex set can't actually pick the right glob that matched all
        // on its own. In particular, some globs require that only directories
        // can match. Thus, only accept a match from the regex set if the given
        // path satisfies the corresponding glob's directory criteria.
        for i in self.set.matches(path).iter().rev() {
            let pat = &self.patterns[i];
            if !pat.only_dir || is_dir {
                return if pat.whitelist {
                    Match::Whitelist(pat)
                } else {
                    Match::Ignored(pat)
                };
            }
-        }
+        };
-        Match::None
+        MATCHES.with(|matches| {
            let mut matches = matches.borrow_mut();
            self.set.matches_into(path, &mut *matches);
            for &i in matches.iter().rev() {
                let pat = &self.patterns[i];
                if !pat.only_dir || is_dir {
                    return if pat.whitelist {
                        Match::Whitelist(pat)
                    } else {
                        Match::Ignored(pat)
                    };
                }
            }
            Match::None
        })
    }
    /// Returns the total number of ignore patterns.
@@ -390,6 +392,7 @@ mod tests {
    ignored!(ig23, ROOT, "foo", "./foo");
    ignored!(ig24, ROOT, "target", "grep/target");
    ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
    ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz");
    not_ignored!(ignot1, ROOT, "amonths", "months");
    not_ignored!(ignot2, ROOT, "monthsa", "months");
--- a/src/glob.rs
+++ b/src/glob.rs
@@ -26,13 +26,22 @@ to make its way into `glob` proper.
 // at the .gitignore for the chromium repo---just about every pattern satisfies
 // that assumption.)
 use std::borrow::Cow;
 use std::collections::HashMap;
 use std::error::Error as StdError;
 use std::ffi::{OsStr, OsString};
 use std::fmt;
 use std::hash;
 use std::iter;
 use std::path::Path;
 use std::str;
 use fnv;
 use regex;
-use regex::bytes::{Regex, RegexSet, SetMatches};
+use regex::bytes::Regex;
 use regex::bytes::RegexSet;
 use pathutil::file_name;
 /// Represents an error that can occur when parsing a glob pattern.
 #[derive(Clone, Debug, Eq, PartialEq)]
@@ -71,33 +80,181 @@ impl fmt::Display for Error {
    }
 }
 /// SetYesNo represents a group of globs that can be matched together in a
 /// single pass. SetYesNo can only determine whether a particular path matched
 /// any pattern in the set.
 #[derive(Clone, Debug)]
 pub struct SetYesNo {
    re: Regex,
 }
 impl SetYesNo {
    /// Returns true if and only if the given path matches at least one glob
    /// in this set.
    pub fn is_match<T: AsRef<Path>>(&self, path: T) -> bool {
        self.re.is_match(&*path_bytes(path.as_ref()))
    }
    fn new(
        pats: &[(Pattern, MatchOptions)],
    ) -> Result<SetYesNo, regex::Error> {
        let mut joined = String::new();
        for &(ref p, ref o) in pats {
            let part = format!("(?:{})", p.to_regex_with(o));
            if !joined.is_empty() {
                joined.push('|');
            }
            joined.push_str(&part);
        }
        Ok(SetYesNo { re: try!(Regex::new(&joined)) })
    }
 }
 type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
 /// Set represents a group of globs that can be matched together in a single
 /// pass.
 #[derive(Clone, Debug)]
 pub struct Set {
-    re: Regex,
+    yesno: SetYesNo,
-    set: RegexSet,
+    exts: HashMap<OsString, Vec<usize>, Fnv>,
    literals: HashMap<Vec<u8>, Vec<usize>, Fnv>,
    base_literals: HashMap<Vec<u8>, Vec<usize>, Fnv>,
    base_prefixes: Vec<Vec<u8>>,
    base_prefixes_map: Vec<usize>,
    base_suffixes: Vec<Vec<u8>>,
    base_suffixes_map: Vec<usize>,
    base_regexes: RegexSet,
    base_regexes_map: Vec<usize>,
    regexes: RegexSet,
    regexes_map: Vec<usize>,
 }
 impl Set {
-    /// Returns true if and only if the given path matches at least one glob
+    /// Returns the sequence number of every glob pattern that matches the
-    /// in this set.
+    /// given path.
    pub fn is_match<T: AsRef<[u8]>>(&self, path: T) -> bool {
        self.re.is_match(path.as_ref())
    }
    /// Returns every glob pattern (by sequence number) that matches the given
    /// path.
    pub fn matches<T: AsRef<[u8]>>(&self, path: T) -> SetMatches {
        // TODO(burntsushi): If we split this out into a separate crate, don't
        // expose the regex::SetMatches type in the public API.
        self.set.matches(path.as_ref())
    }
    /// Returns the number of glob patterns in this set.
    #[allow(dead_code)]
-    pub fn len(&self) -> usize {
+    pub fn matches<T: AsRef<Path>>(&self, path: T) -> Vec<usize> {
-        self.set.len()
+        let mut into = vec![];
        self.matches_into(path, &mut into);
        into
    }
    /// Adds the sequence number of every glob pattern that matches the given
    /// path to the vec given.
    pub fn matches_into<T: AsRef<Path>>(
        &self,
        path: T,
        into: &mut Vec<usize>,
    ) {
        into.clear();
        let path = path.as_ref();
        let path_bytes = &*path_bytes(path);
        let basename = file_name(path).map(|b| os_str_bytes(b));
        if !self.yesno.is_match(path) {
            return;
        }
        if !self.exts.is_empty() {
            if let Some(ext) = path.extension() {
                if let Some(matches) = self.exts.get(ext) {
                    into.extend(matches.as_slice());
                }
            }
        }
        if !self.literals.is_empty() {
            if let Some(matches) = self.literals.get(path_bytes) {
                into.extend(matches.as_slice());
            }
        }
        if !self.base_literals.is_empty() {
            if let Some(ref basename) = basename {
                if let Some(matches) = self.base_literals.get(&**basename) {
                    into.extend(matches.as_slice());
                }
            }
        }
        if !self.base_prefixes.is_empty() {
            if let Some(ref basename) = basename {
                let basename = &**basename;
                for (i, pre) in self.base_prefixes.iter().enumerate() {
                    if pre.len() <= basename.len() && &**pre == &basename[0..pre.len()] {
                        into.push(self.base_prefixes_map[i]);
                    }
                }
            }
        }
        if !self.base_suffixes.is_empty() {
            if let Some(ref basename) = basename {
                let basename = &**basename;
                for (i, suf) in self.base_suffixes.iter().enumerate() {
                    if suf.len() > basename.len() {
                        continue;
                    }
                    let (s, e) = (basename.len() - suf.len(), basename.len());
                    if &**suf == &basename[s..e] {
                        into.push(self.base_suffixes_map[i]);
                    }
                }
            }
        }
        if let Some(ref basename) = basename {
            for i in self.base_regexes.matches(&**basename) {
                into.push(self.base_regexes_map[i]);
            }
        }
        for i in self.regexes.matches(path_bytes) {
            into.push(self.regexes_map[i]);
        }
        into.sort();
    }
    fn new(pats: &[(Pattern, MatchOptions)]) -> Result<Set, regex::Error> {
        let fnv = Fnv::default();
        let mut exts = HashMap::with_hasher(fnv.clone());
        let mut literals = HashMap::with_hasher(fnv.clone());
        let mut base_literals = HashMap::with_hasher(fnv.clone());
        let (mut base_prefixes, mut base_prefixes_map) = (vec![], vec![]);
        let (mut base_suffixes, mut base_suffixes_map) = (vec![], vec![]);
        let (mut regexes, mut regexes_map) = (vec![], vec![]);
        let (mut base_regexes, mut base_regexes_map) = (vec![], vec![]);
        for (i, &(ref p, ref o)) in pats.iter().enumerate() {
            if let Some(ext) = p.ext() {
                exts.entry(ext).or_insert(vec![]).push(i);
            } else if let Some(literal) = p.literal() {
                literals.entry(literal.into_bytes()).or_insert(vec![]).push(i);
            } else if let Some(literal) = p.base_literal() {
                base_literals
                    .entry(literal.into_bytes()).or_insert(vec![]).push(i);
            } else if let Some(literal) = p.base_literal_prefix() {
                base_prefixes.push(literal.into_bytes());
                base_prefixes_map.push(i);
            } else if let Some(literal) = p.base_literal_suffix() {
                base_suffixes.push(literal.into_bytes());
                base_suffixes_map.push(i);
            } else if p.is_only_basename() {
                let part = format!("(?:{})", p.to_regex_with(o));
                base_regexes.push(part);
                base_regexes_map.push(i);
            } else {
                let part = format!("(?:{})", p.to_regex_with(o));
                regexes.push(part);
                regexes_map.push(i);
            }
        }
        Ok(Set {
            yesno: try!(SetYesNo::new(pats)),
            exts: exts,
            literals: literals,
            base_literals: base_literals,
            base_prefixes: base_prefixes,
            base_prefixes_map: base_prefixes_map,
            base_suffixes: base_suffixes,
            base_suffixes_map: base_suffixes_map,
            base_regexes: try!(RegexSet::new(base_regexes)),
            base_regexes_map: base_regexes_map,
            regexes: try!(RegexSet::new(regexes)),
            regexes_map: regexes_map,
        })
    }
 }
@@ -119,19 +276,12 @@ impl SetBuilder {
    ///
    /// Once a matcher is built, no new patterns can be added to it.
    pub fn build(&self) -> Result<Set, regex::Error> {
-        let it = self.pats.iter().map(|&(ref p, ref o)| p.to_regex_with(o));
+        Set::new(&self.pats)
-        let set = try!(RegexSet::new(it));
+    }
-        let mut joined = String::new();
+    /// Like `build`, but returns a matcher that can only answer yes/no.
-        for &(ref p, ref o) in &self.pats {
+    pub fn build_yesno(&self) -> Result<SetYesNo, regex::Error> {
-            let part = format!("(?:{})", p.to_regex_with(o));
+        SetYesNo::new(&self.pats)
            if !joined.is_empty() {
                joined.push('|');
            }
            joined.push_str(&part);
        }
        let re = try!(Regex::new(&joined));
        Ok(Set { re: re, set: set })
    }
    /// Add a new pattern to this set.
@@ -149,8 +299,21 @@ impl SetBuilder {
        pat: &str,
        opts: &MatchOptions,
    ) -> Result<(), Error> {
-        let pat = try!(Pattern::new(pat));
+        let parsed = try!(Pattern::new(pat));
-        self.pats.push((pat, opts.clone()));
+        // if let Some(ext) = parsed.ext() {
            // eprintln!("ext :: {:?} :: {:?}", ext, pat);
        // } else if let Some(lit) = parsed.literal() {
            // eprintln!("literal :: {:?} :: {:?}", lit, pat);
        // } else if let Some(lit) = parsed.base_literal() {
            // eprintln!("base_literal :: {:?} :: {:?}", lit, pat);
        // } else if let Some(lit) = parsed.base_literal_prefix() {
            // eprintln!("base_literal :: {:?} :: {:?}", lit, pat);
        // } else if let Some(lit) = parsed.base_literal_suffix() {
            // eprintln!("base_literal :: {:?} :: {:?}", lit, pat);
        // } else {
            // eprintln!("regex :: {:?} :: {:?}", pat, parsed);
        // }
        self.pats.push((parsed, opts.clone()));
        Ok(())
    }
 }
@@ -204,6 +367,133 @@ impl Pattern {
        Ok(p.p)
    }
    /// Returns an extension if this pattern exclusively matches it.
    pub fn ext(&self) -> Option<OsString> {
        if self.tokens.len() <= 3 {
            return None;
        }
        match self.tokens.get(0) {
            Some(&Token::RecursivePrefix) => {}
            _ => return None,
        }
        match self.tokens.get(1) {
            Some(&Token::ZeroOrMore) => {}
            _ => return None,
        }
        match self.tokens.get(2) {
            Some(&Token::Literal(c)) if c == '.' => {}
            _ => return None,
        }
        let mut lit = OsString::new();
        for t in self.tokens[3..].iter() {
            match *t {
                Token::Literal(c) if c == '/' || c == '\\' || c == '.' => {
                    return None;
                }
                Token::Literal(c) => lit.push(c.to_string()),
                _ => return None,
            }
        }
        Some(lit)
    }
    /// Returns the pattern as a literal if and only if the pattern exclusiely
    /// matches the basename of a file path *and* is a literal.
    ///
    /// The basic format of these patterns is `**/{literal}`, where `{literal}`
    /// does not contain a path separator.
    pub fn base_literal(&self) -> Option<String> {
        match self.tokens.get(0) {
            Some(&Token::RecursivePrefix) => {}
            _ => return None,
        }
        let mut lit = String::new();
        for t in &self.tokens[1..] {
            match *t {
                Token::Literal(c) if c == '/' || c == '\\' => return None,
                Token::Literal(c) => lit.push(c),
                _ => return None,
            }
        }
        Some(lit)
    }
    /// Returns true if and only if this pattern only inspects the basename
    /// of a path.
    pub fn is_only_basename(&self) -> bool {
        match self.tokens.get(0) {
            Some(&Token::RecursivePrefix) => {}
            _ => return false,
        }
        for t in &self.tokens[1..] {
            match *t {
                Token::Literal(c) if c == '/' || c == '\\' => return false,
                Token::RecursivePrefix
                | Token::RecursiveSuffix
                | Token::RecursiveZeroOrMore => return false,
                _ => {}
            }
        }
        true
    }
    /// Returns the pattern as a literal if and only if the pattern must match
    /// an entire path exactly.
    ///
    /// The basic format of these patterns is `{literal}`.
    pub fn literal(&self) -> Option<String> {
        let mut lit = String::new();
        for t in &self.tokens {
            match *t {
                Token::Literal(c) => lit.push(c),
                _ => return None,
            }
        }
        Some(lit)
    }
    /// Returns a basename literal prefix of this pattern.
    pub fn base_literal_prefix(&self) -> Option<String> {
        match self.tokens.get(0) {
            Some(&Token::RecursivePrefix) => {}
            _ => return None,
        }
        match self.tokens.last() {
            Some(&Token::ZeroOrMore) => {}
            _ => return None,
        }
        let mut lit = String::new();
        for t in &self.tokens[1..self.tokens.len()-1] {
            match *t {
                Token::Literal(c) if c == '/' || c == '\\' => return None,
                Token::Literal(c) => lit.push(c),
                _ => return None,
            }
        }
        Some(lit)
    }
    /// Returns a basename literal suffix of this pattern.
    pub fn base_literal_suffix(&self) -> Option<String> {
        match self.tokens.get(0) {
            Some(&Token::RecursivePrefix) => {}
            _ => return None,
        }
        match self.tokens.get(1) {
            Some(&Token::ZeroOrMore) => {}
            _ => return None,
        }
        let mut lit = String::new();
        for t in &self.tokens[2..] {
            match *t {
                Token::Literal(c) if c == '/' || c == '\\' => return None,
                Token::Literal(c) => lit.push(c),
                _ => return None,
            }
        }
        Some(lit)
    }
    /// Convert this pattern to a string that is guaranteed to be a valid
    /// regular expression and will represent the matching semantics of this
    /// glob pattern. This uses a default set of options.
@@ -415,13 +705,34 @@ impl<'a> Parser<'a> {
    }
 }
 fn path_bytes(path: &Path) -> Cow<[u8]> {
    os_str_bytes(path.as_os_str())
 }
 #[cfg(unix)]
 fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
    use std::os::unix::ffi::OsStrExt;
    Cow::Borrowed(s.as_bytes())
 }
 #[cfg(not(unix))]
 fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
    // TODO(burntsushi): On Windows, OS strings are probably UTF-16, so even
    // if we could get at the raw bytes, they wouldn't be useful. We *must*
    // convert to UTF-8 before doing path matching. Unfortunate, but necessary.
    match s.to_string_lossy() {
        Cow::Owned(s) => Cow::Owned(s.into_bytes()),
        Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
    }
 }
 #[cfg(test)]
 mod tests {
    use std::path::Path;
    use regex::bytes::Regex;
-    use super::{Error, Pattern, MatchOptions, SetBuilder, Token};
+    use super::{Error, Pattern, MatchOptions, Set, SetBuilder, Token};
    use super::Token::*;
    macro_rules! syntax {
@@ -483,14 +794,42 @@ mod tests {
                let pat = Pattern::new($pat).unwrap();
                let path = &Path::new($path).to_str().unwrap();
                let re = Regex::new(&pat.to_regex_with(&$options)).unwrap();
                // println!("PATTERN: {}", $pat);
                // println!("REGEX: {:?}", re);
                // println!("PATH: {}", path);
                assert!(!re.is_match(path.as_bytes()));
            }
        };
    }
    macro_rules! ext {
        ($name:ident, $pat:expr, $ext:expr) => {
            #[test]
            fn $name() {
                let pat = Pattern::new($pat).unwrap();
                let ext = pat.ext().map(|e| e.to_string_lossy().into_owned());
                assert_eq!($ext, ext.as_ref().map(|s| &**s));
            }
        };
    }
    macro_rules! baseliteral {
        ($name:ident, $pat:expr, $yes:expr) => {
            #[test]
            fn $name() {
                let pat = Pattern::new($pat).unwrap();
                assert_eq!($yes, pat.base_literal().is_some());
            }
        };
    }
    macro_rules! basesuffix {
        ($name:ident, $pat:expr, $yes:expr) => {
            #[test]
            fn $name() {
                let pat = Pattern::new($pat).unwrap();
                assert_eq!($yes, pat.is_literal_suffix());
            }
        };
    }
    fn class(s: char, e: char) -> Token {
        Class { negated: false, ranges: vec![(s, e)] }
    }
@@ -585,6 +924,26 @@ mod tests {
    toregex!(re10, "+", r"^\+$");
    toregex!(re11, "**", r"^.*$");
    ext!(ext1, "**/*.rs", Some("rs"));
    baseliteral!(lit1, "**", true);
    baseliteral!(lit2, "**/a", true);
    baseliteral!(lit3, "**/ab", true);
    baseliteral!(lit4, "**/a*b", false);
    baseliteral!(lit5, "z/**/a*b", false);
    baseliteral!(lit6, "[ab]", false);
    baseliteral!(lit7, "?", false);
    /*
    issuffix!(suf1, "", false);
    issuffix!(suf2, "a", true);
    issuffix!(suf3, "ab", true);
    issuffix!(suf4, "*ab", true);
    issuffix!(suf5, "*.ab", true);
    issuffix!(suf6, "?.ab", true);
    issuffix!(suf7, "ab*", false);
    */
    matches!(match1, "a", "a");
    matches!(match2, "a*b", "a_b");
    matches!(match3, "a*b*c", "abc");
@@ -681,16 +1040,22 @@ mod tests {
        builder.add("src/lib.rs").unwrap();
        let set = builder.build().unwrap();
-        assert!(set.is_match("foo.c"));
+        fn is_match(set: &Set, s: &str) -> bool {
-        assert!(set.is_match("src/foo.c"));
+            let mut matches = vec![];
-        assert!(!set.is_match("foo.rs"));
+            set.matches_into(s, &mut matches);
-        assert!(!set.is_match("tests/foo.rs"));
+            !matches.is_empty()
-        assert!(set.is_match("src/foo.rs"));
+        }
        assert!(set.is_match("src/grep/src/main.rs"));
-        assert_eq!(2, set.matches("src/lib.rs").iter().count());
+        assert!(is_match(&set, "foo.c"));
-        assert!(set.matches("src/lib.rs").matched(0));
+        assert!(is_match(&set, "src/foo.c"));
-        assert!(!set.matches("src/lib.rs").matched(1));
+        assert!(!is_match(&set, "foo.rs"));
-        assert!(set.matches("src/lib.rs").matched(2));
+        assert!(!is_match(&set, "tests/foo.rs"));
        assert!(is_match(&set, "src/foo.rs"));
        assert!(is_match(&set, "src/grep/src/main.rs"));
        let matches = set.matches("src/lib.rs");
        assert_eq!(2, matches.len());
        assert_eq!(0, matches[0]);
        assert_eq!(2, matches[1]);
    }
 }
--- a/src/ignore.rs
+++ b/src/ignore.rs
@@ -19,11 +19,11 @@ use std::io;
 use std::path::{Path, PathBuf};
 use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern};
 use pathutil::is_hidden;
 use types::Types;
 const IGNORE_NAMES: &'static [&'static str] = &[
    ".gitignore",
    ".agignore",
    ".rgignore",
 ];
@@ -83,7 +83,10 @@ pub struct Ignore {
    overrides: Overrides,
    /// A file type matcher.
    types: Types,
    /// Whether to ignore hidden files or not.
    ignore_hidden: bool,
    /// When true, don't look at .gitignore or .agignore files for ignore
    /// rules.
    no_ignore: bool,
 }
@@ -208,15 +211,17 @@ impl Ignore {
            debug!("{} ignored because it is hidden", path.display());
            return true;
        }
-        for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
+        if !self.no_ignore {
-            let mat = id.matched(path, is_dir);
+            for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
-            if let Some(is_ignored) = self.ignore_match(path, mat) {
+                let mat = id.matched(path, is_dir);
-                if is_ignored {
+                if let Some(is_ignored) = self.ignore_match(path, mat) {
-                    return true;
+                    if is_ignored {
                        return true;
                    }
                    // If this path is whitelisted by an ignore, then
                    // fallthrough and let the file type matcher have a say.
                    break;
                }
                // If this path is whitelisted by an ignore, then fallthrough
                // and let the file type matcher have a say.
                break;
            }
        }
        let mat = self.types.matched(path, is_dir);
@@ -361,8 +366,7 @@ impl Overrides {
        let path = path.as_ref();
        self.gi.as_ref()
            .map(|gi| {
-                let path = &*path.to_string_lossy();
+                let mat = gi.matched_stripped(path, is_dir).invert();
                let mat = gi.matched_utf8(path, is_dir).invert();
                if mat.is_none() && !is_dir {
                    if gi.num_ignores() > 0 {
                        return Match::Ignored(&self.unmatched_pat);
@@ -374,14 +378,6 @@ impl Overrides {
    }
 }
 fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
    if let Some(name) = path.as_ref().file_name() {
        name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
    } else {
        false
    }
 }
 #[cfg(test)]
 mod tests {
    use std::path::Path;
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,6 +1,7 @@
-extern crate crossbeam;
+extern crate deque;
 extern crate docopt;
 extern crate env_logger;
 extern crate fnv;
 extern crate grep;
 #[cfg(windows)]
 extern crate kernel32;
@@ -15,7 +16,6 @@ extern crate num_cpus;
 extern crate regex;
 extern crate rustc_serialize;
 extern crate term;
 extern crate thread_local;
 extern crate walkdir;
 #[cfg(windows)]
 extern crate winapi;
@@ -29,7 +29,7 @@ use std::result;
 use std::sync::{Arc, Mutex};
 use std::thread;
-use crossbeam::sync::chase_lev::{self, Steal, Stealer};
+use deque::{Stealer, Stolen};
 use grep::Grep;
 use memmap::{Mmap, Protection};
 use term::Terminal;
@@ -37,6 +37,7 @@ use walkdir::DirEntry;
 use args::Args;
 use out::{ColoredTerminal, Out};
 use pathutil::strip_prefix;
 use printer::Printer;
 use search_stream::InputBuffer;
 #[cfg(windows)]
@@ -61,6 +62,7 @@ mod gitignore;
 mod glob;
 mod ignore;
 mod out;
 mod pathutil;
 mod printer;
 mod search_buffer;
 mod search_stream;
@@ -98,8 +100,8 @@ fn run(args: Args) -> Result<u64> {
    let out = Arc::new(Mutex::new(args.out()));
    let mut workers = vec![];
-    let mut workq = {
+    let workq = {
-        let (workq, stealer) = chase_lev::deque();
+        let (workq, stealer) = deque::new();
        for _ in 0..args.threads() {
            let worker = MultiWorker {
                chan_work: stealer.clone(),
@@ -216,10 +218,10 @@ impl MultiWorker {
    fn run(mut self) -> u64 {
        loop {
            let work = match self.chan_work.steal() {
-                Steal::Empty | Steal::Abort => continue,
+                Stolen::Empty | Stolen::Abort => continue,
-                Steal::Data(Work::Quit) => break,
+                Stolen::Data(Work::Quit) => break,
-                Steal::Data(Work::Stdin) => WorkReady::Stdin,
+                Stolen::Data(Work::Stdin) => WorkReady::Stdin,
-                Steal::Data(Work::File(ent)) => {
+                Stolen::Data(Work::File(ent)) => {
                    match File::open(ent.path()) {
                        Ok(file) => WorkReady::DirFile(ent, file),
                        Err(err) => {
@@ -258,7 +260,7 @@ impl Worker {
            }
            WorkReady::DirFile(ent, file) => {
                let mut path = ent.path();
-                if let Ok(p) = path.strip_prefix("./") {
+                if let Some(p) = strip_prefix("./", path) {
                    path = p;
                }
                if self.args.mmap() {
@@ -269,7 +271,7 @@ impl Worker {
            }
            WorkReady::PathFile(path, file) => {
                let mut path = &*path;
-                if let Ok(p) = path.strip_prefix("./") {
+                if let Some(p) = strip_prefix("./", path) {
                    path = p;
                }
                if self.args.mmap() {
--- a/src/pathutil.rs
+++ b/src/pathutil.rs
@@ -0,0 +1,98 @@
 /*!
 The pathutil module provides platform specific operations on paths that are
 typically faster than the same operations as provided in std::path. In
 particular, we really want to avoid the costly operation of parsing the path
 into its constituent components. We give up on Windows, but on Unix, we deal
 with the raw bytes directly.
 On large repositories (like chromium), this can have a ~25% performance
 improvement on just listing the files to search (!).
 */
 use std::ffi::OsStr;
 use std::path::Path;
 use memchr::memrchr;
 /// Strip `prefix` from the `path` and return the remainder.
 ///
 /// If `path` doesn't have a prefix `prefix`, then return `None`.
 #[cfg(unix)]
 pub fn strip_prefix<'a, P: AsRef<Path>>(
    prefix: P,
    path: &'a Path,
 ) -> Option<&'a Path> {
    use std::os::unix::ffi::OsStrExt;
    let prefix = prefix.as_ref().as_os_str().as_bytes();
    let path = path.as_os_str().as_bytes();
    if prefix.len() > path.len() || prefix != &path[0..prefix.len()] {
        None
    } else {
        Some(&Path::new(OsStr::from_bytes(&path[prefix.len()..])))
    }
 }
 /// Strip `prefix` from the `path` and return the remainder.
 ///
 /// If `path` doesn't have a prefix `prefix`, then return `None`.
 #[cfg(not(unix))]
 pub fn strip_prefix<'a>(prefix: &Path, path: &'a Path) -> Option<&'a Path> {
    path.strip_prefix(prefix).ok()
 }
 /// The final component of the path, if it is a normal file.
 ///
 /// If the path terminates in ., .., or consists solely of a root of prefix,
 /// file_name will return None.
 #[cfg(unix)]
 pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
    path: &'a P,
 ) -> Option<&'a OsStr> {
    use std::os::unix::ffi::OsStrExt;
    let path = path.as_ref().as_os_str().as_bytes();
    if path.is_empty() {
        return None;
    } else if path.len() == 1 && path[0] == b'.' {
        return None;
    } else if path.last() == Some(&b'.') {
        return None;
    } else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
        return None;
    }
    let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
    Some(OsStr::from_bytes(&path[last_slash..]))
 }
 /// The final component of the path, if it is a normal file.
 ///
 /// If the path terminates in ., .., or consists solely of a root of prefix,
 /// file_name will return None.
 #[cfg(not(unix))]
 pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
    path: &'a P,
 ) -> Option<&'a OsStr> {
    path.as_ref().file_name()
 }
 /// Returns true if and only if this file path is considered to be hidden.
 #[cfg(unix)]
 pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
    use std::os::unix::ffi::OsStrExt;
    if let Some(name) = file_name(path.as_ref()) {
        name.as_bytes().get(0) == Some(&b'.')
    } else {
        false
    }
 }
 /// Returns true if and only if this file path is considered to be hidden.
 #[cfg(not(unix))]
 pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
    if let Some(name) = file_name(path) {
        name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
    } else {
        false
    }
 }
--- a/src/types.rs
+++ b/src/types.rs
@@ -151,8 +151,8 @@ impl FileTypeDef {
 /// Types is a file type matcher.
 #[derive(Clone, Debug)]
 pub struct Types {
-    selected: Option<glob::Set>,
+    selected: Option<glob::SetYesNo>,
-    negated: Option<glob::Set>,
+    negated: Option<glob::SetYesNo>,
    has_selected: bool,
    unmatched_pat: Pattern,
 }
@@ -165,8 +165,8 @@ impl Types {
    /// If has_selected is true, then at least one file type was selected.
    /// Therefore, any non-matches should be ignored.
    fn new(
-        selected: Option<glob::Set>,
+        selected: Option<glob::SetYesNo>,
-        negated: Option<glob::Set>,
+        negated: Option<glob::SetYesNo>,
        has_selected: bool,
    ) -> Types {
        Types {
@@ -268,7 +268,7 @@ impl TypesBuilder {
                        try!(bset.add_with(glob, &opts));
                    }
                }
-                Some(try!(bset.build()))
+                Some(try!(bset.build_yesno()))
            };
        let negated_globs =
            if self.negated.is_empty() {
@@ -287,7 +287,7 @@ impl TypesBuilder {
                        try!(bset.add_with(glob, &opts));
                    }
                }
-                Some(try!(bset.build()))
+                Some(try!(bset.build_yesno()))
            };
        Ok(Types::new(
            selected_globs, negated_globs, !self.selected.is_empty()))
--- a/src/walk.rs
+++ b/src/walk.rs
@@ -26,6 +26,7 @@ impl Iter {
    }
    /// Returns true if this entry should be skipped.
    #[inline(always)]
    fn skip_entry(&self, ent: &DirEntry) -> bool {
        if ent.depth() == 0 {
            // Never skip the root directory.
@@ -41,6 +42,7 @@ impl Iter {
 impl Iterator for Iter {
    type Item = DirEntry;
    #[inline(always)]
    fn next(&mut self) -> Option<DirEntry> {
        while let Some(ev) = self.it.next() {
            match ev {
@@ -108,6 +110,7 @@ impl From<WalkDir> for WalkEventIter {
 impl Iterator for WalkEventIter {
    type Item = walkdir::Result<WalkEvent>;
    #[inline(always)]
    fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
        let dent = self.next.take().or_else(|| self.it.next());
        let depth = match dent {
Author	SHA1	Message	Date
Andrew Gallant	8f87a4e8ac	0.1.2	2016-09-17 11:36:11 -04:00
Andrew Gallant	d27d3e675f	bump grep	2016-09-17 11:34:27 -04:00
Andrew Gallant	bf5d873099	grep 0.1.1	2016-09-17 11:32:47 -04:00
Andrew Gallant	bc9d12c4c8	Improve ergonomics of benchsuite. The runner now detects if commands exist and permits running incomplete benchmarks. Also, explicitly use Python 3 since that's what default Ubuntu 16.04 seems to want.	2016-09-17 11:30:01 -04:00
Andrew Gallant	5a0c873f61	Fixing, polishing and adding benchmarks.	2016-09-16 21:02:46 -04:00
Andrew Gallant	65fec147d6	rename	2016-09-16 18:27:34 -04:00
Andrew Gallant	7fbf2f014c	Reorganize some files.	2016-09-16 18:22:35 -04:00
Andrew Gallant	d22a3ca3e5	Improve the "bad literal" error message. Incidentally, this was done by using the Debug impl for `char` instead of the Display impl. Cute. Fixes #5.	2016-09-16 18:12:00 -04:00
Andrew Gallant	e9ec52b7f9	Update walkdir	2016-09-16 17:56:44 -04:00
Andrew Gallant	0d14c74e63	Some minor performance tweaks. This includes moving basename-only globs into separate regexes. The hope is that if the regex processes less input, it will be faster.	2016-09-16 16:13:28 -04:00
Andrew Gallant	1c5884b2f9	try again...	2016-09-16 07:12:06 -04:00
Andrew Gallant	8203a80ac7	fix tests	2016-09-16 06:58:10 -04:00
Andrew Gallant	0e46171e3b	Rework glob sets. We try to reduce the pressure on regexes and offload some of it to Aho-Corasick or exact lookups.	2016-09-15 22:06:04 -04:00
Andrew Gallant	f5c85827ce	Don't traverse directory stack if we don't need to.	2016-09-15 12:40:28 -04:00
Andrew Gallant	7cefc55238	Remove .agignore from ignore file list.	2016-09-15 12:40:08 -04:00
Andrew Gallant	92c918ebd9	--no-ignore implies --no-ignore-parent	2016-09-14 14:33:37 -04:00
Andrew Gallant	c24f8fd50f	Replace crossbeam with deque. deque appears faster.	2016-09-14 07:40:46 -04:00
Andrew Gallant	73272cf8a6	notice	2016-09-13 21:23:22 -04:00
Andrew Gallant	4212a8b9cb	0.1.1	2016-09-13 21:21:45 -04:00
Andrew Gallant	983c7fd6f9	We don't use thread_local any more, so remove it.	2016-09-13 21:21:36 -04:00
		`@@ -1 +0,0 @@`
			`au BufWritePost *.rs silent!make ctags > /dev/null 2>&1`