mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-08-02 05:02:01 -07:00
Compare commits
18 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
8f87a4e8ac | ||
|
d27d3e675f | ||
|
bf5d873099 | ||
|
bc9d12c4c8 | ||
|
5a0c873f61 | ||
|
65fec147d6 | ||
|
7fbf2f014c | ||
|
d22a3ca3e5 | ||
|
e9ec52b7f9 | ||
|
0d14c74e63 | ||
|
1c5884b2f9 | ||
|
8203a80ac7 | ||
|
0e46171e3b | ||
|
f5c85827ce | ||
|
7cefc55238 | ||
|
92c918ebd9 | ||
|
c24f8fd50f | ||
|
73272cf8a6 |
51
Cargo.lock
generated
51
Cargo.lock
generated
@@ -1,23 +1,24 @@
|
||||
[root]
|
||||
name = "ripgrep"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
dependencies = [
|
||||
"crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"docopt 0.6.83 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fnv 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep 0.1.0",
|
||||
"grep 0.1.1",
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num_cpus 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"walkdir 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
@@ -30,9 +31,12 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam"
|
||||
version = "0.2.10"
|
||||
name = "deque"
|
||||
version = "0.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "docopt"
|
||||
@@ -54,6 +58,11 @@ dependencies = [
|
||||
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "fs2"
|
||||
version = "0.2.5"
|
||||
@@ -71,7 +80,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "grep"
|
||||
version = "0.1.0"
|
||||
version = "0.1.1"
|
||||
dependencies = [
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@@ -125,7 +134,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.0.0"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@@ -140,7 +157,7 @@ dependencies = [
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
@@ -184,7 +201,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "0.2.6"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@@ -197,7 +214,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "0.1.6"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@@ -216,9 +233,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[metadata]
|
||||
"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
|
||||
"checksum crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0c5ea215664ca264da8a9d9c3be80d2eaf30923c259d03e870388eb927508f97"
|
||||
"checksum deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1614659040e711785ed8ea24219140654da1729f3ec8a47a9719d041112fe7bf"
|
||||
"checksum docopt 0.6.83 (registry+https://github.com/rust-lang/crates.io-index)" = "fc42c6077823a361410c37d47c2535b73a190cbe10838dc4f400fe87c10c8c3b"
|
||||
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
|
||||
"checksum fnv 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8e8af7b5408ab0c4910cad114c8f9eb454bf75df7afe8964307eeafb68a13a5e"
|
||||
"checksum fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "bcd414e5a1a979b931bb92f41b7a54106d3f6d2e6c253e9ce943b7cd468251ef"
|
||||
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
|
||||
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
|
||||
@@ -227,7 +245,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
|
||||
"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
|
||||
"checksum memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f20f72ed93291a72e22e8b16bb18762183bb4943f0f483da5b8be1a9e8192752"
|
||||
"checksum num_cpus 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a859041cbf7a70ea1ece4b87d1a2c6ef364dcb68749c88db1f97304b9ec09d5f"
|
||||
"checksum num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8890e6084723d57d0df8d2720b0d60c6ee67d6c93e7169630e4371e88765dcad"
|
||||
"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"
|
||||
"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665"
|
||||
"checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd"
|
||||
"checksum rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "6159e4e6e559c81bd706afe9c8fd68f547d3e851ce12e76b1de7914bab61691b"
|
||||
@@ -235,8 +254,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
"checksum strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "50c069df92e4b01425a8bf3576d5d417943a6a7272fbabaf5bd80b1aaa76442e"
|
||||
"checksum term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3deff8a2b3b6607d6d7cc32ac25c0b33709453ca9cceac006caac51e963cf94a"
|
||||
"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
|
||||
"checksum thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "55dd963dbaeadc08aa7266bf7f91c3154a7805e32bb94b820b769d2ef3b4744d"
|
||||
"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
|
||||
"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
|
||||
"checksum walkdir 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "d42144c31c9909882ce76e696b306b88a5b091721251137d5d522d1ef3da7cf9"
|
||||
"checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780"
|
||||
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
|
||||
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "ripgrep"
|
||||
version = "0.1.1" #:version
|
||||
version = "0.1.2" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Line oriented search tool using Rust's regex library. Combines the raw
|
||||
@@ -23,10 +23,11 @@ name = "integration"
|
||||
path = "tests/tests.rs"
|
||||
|
||||
[dependencies]
|
||||
crossbeam = "0.2"
|
||||
deque = "0.3"
|
||||
docopt = "0.6"
|
||||
env_logger = "0.3"
|
||||
grep = { version = "0.1", path = "grep" }
|
||||
fnv = "1.0"
|
||||
grep = { version = "0.1.1", path = "grep" }
|
||||
lazy_static = "0.2"
|
||||
libc = "0.2"
|
||||
log = "0.3"
|
||||
|
14
Makefile
14
Makefile
@@ -1,14 +0,0 @@
|
||||
all:
|
||||
echo Nothing to do...
|
||||
|
||||
ctags:
|
||||
ctags --options=ctags.rust --languages=Rust src/*.rs src/*/*.rs
|
||||
|
||||
docs:
|
||||
cargo doc
|
||||
in-dir ./target/doc fix-perms
|
||||
rscp ./target/doc/* gopher:~/www/burntsushi.net/rustdoc/
|
||||
|
||||
push:
|
||||
git push origin master
|
||||
git push github master
|
@@ -1,3 +1,6 @@
|
||||
**UNDER DEVELOPMENT.**
|
||||
|
||||
ripgrep (rg)
|
||||
------------
|
||||
ripgrep combines the usability of the silver searcher with the raw speed of grep.
|
||||
ripgrep combines the usability of the silver searcher with the raw speed of
|
||||
grep.
|
||||
|
5
benches/README.md
Normal file
5
benches/README.md
Normal file
@@ -0,0 +1,5 @@
|
||||
These are internal microbenchmarks for tracking the peformance of individual
|
||||
components inside of ripgrep. At the moment, they aren't heavily used.
|
||||
|
||||
For performance benchmarks of ripgrep proper, see the sibling `benchsuite`
|
||||
directory.
|
@@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env python
|
||||
#!/usr/bin/env python3
|
||||
|
||||
'''
|
||||
benchsuite is a benchmark runner for comparing command line search tools.
|
||||
@@ -10,6 +10,7 @@ import os
|
||||
import os.path as path
|
||||
from multiprocessing import cpu_count
|
||||
import re
|
||||
import shutil
|
||||
import statistics
|
||||
import subprocess
|
||||
import sys
|
||||
@@ -39,13 +40,23 @@ LINUX_CLONE = 'git://github.com/BurntSushi/linux'
|
||||
GREP_ASCII = {'LC_ALL': 'C'}
|
||||
GREP_UNICODE = {'LC_ALL': 'en_US.UTF-8'}
|
||||
|
||||
# Sift tries really hard to search everything by default. In our code search
|
||||
# benchmarks, we don't want that.
|
||||
SIFT = [
|
||||
'sift',
|
||||
'--binary-skip',
|
||||
'--exclude-files', '.*',
|
||||
'--exclude-files', '*.pdf',
|
||||
]
|
||||
|
||||
|
||||
def bench_linux_literal_default(suite_dir):
|
||||
'''
|
||||
Benchmark the speed of a literal using *default* settings.
|
||||
|
||||
This is a purposefully unfair benchmark for use in performance
|
||||
analysis, but it is pedagogically useful.
|
||||
analysis, but it is pedagogically useful to demonstrate how
|
||||
default behaviors differ.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
@@ -55,8 +66,6 @@ def bench_linux_literal_default(suite_dir):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
# N.B. This is a purposefully unfair benchmark for illustrative purposes
|
||||
# of how the default modes for each search tool differ.
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', pat]),
|
||||
mkcmd('ag', ['ag', pat]),
|
||||
@@ -64,10 +73,12 @@ def bench_linux_literal_default(suite_dir):
|
||||
# doesn't read gitignore files. Instead, it has a file whitelist
|
||||
# that happens to match up exactly with the gitignores for this search.
|
||||
mkcmd('ucg', ['ucg', pat]),
|
||||
mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'C'}),
|
||||
# I guess setting LC_ALL=en_US.UTF-8 probably isn't necessarily the
|
||||
# default, but I'd guess it to be on most desktop systems.
|
||||
mkcmd('pt', ['pt', pat]),
|
||||
# sift reports an extra line here for a binary file matched.
|
||||
mkcmd('sift', ['sift', pat]),
|
||||
mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'en_US.UTF-8'}),
|
||||
])
|
||||
|
||||
|
||||
@@ -76,8 +87,9 @@ def bench_linux_literal(suite_dir):
|
||||
Benchmark the speed of a literal, attempting to be fair.
|
||||
|
||||
This tries to use the minimum set of options available in all tools
|
||||
to test how fast they are. For example, it makes sure there is no
|
||||
case insensitive matching and that line numbers are computed.
|
||||
to test how fast they are. For example, it makes sure there is
|
||||
no case insensitive matching and that line numbers are computed
|
||||
(because some tools don't permit disabling line numbers).
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
@@ -88,19 +100,16 @@ def bench_linux_literal(suite_dir):
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
|
||||
mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
|
||||
mkcmd('ag', ['ag', '-s', pat]),
|
||||
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
|
||||
mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
|
||||
mkcmd('git grep', [
|
||||
mkcmd('rg (ignore)', ['rg', '-n', pat]),
|
||||
mkcmd('rg (ignore) (mmap)', ['rg', '-n', '--mmap', pat]),
|
||||
mkcmd('ag (ignore) (mmap)', ['ag', '-s', pat]),
|
||||
mkcmd('pt (ignore)', ['pt', pat]),
|
||||
mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
|
||||
mkcmd('git grep (ignore)', [
|
||||
'git', 'grep', '-I', '-n', pat,
|
||||
], env={'LC_ALL': 'C'}),
|
||||
mkcmd('pt', ['pt', pat]),
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
|
||||
]),
|
||||
mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
|
||||
mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
|
||||
])
|
||||
|
||||
|
||||
@@ -120,23 +129,21 @@ def bench_linux_literal_casei(suite_dir):
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', '-i', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
|
||||
mkcmd('rg-novcs-mmap', [
|
||||
'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
|
||||
]),
|
||||
mkcmd('ag', ['ag', '-i', pat]),
|
||||
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-i', pat]),
|
||||
mkcmd('ucg', ['ucg', '-i', pat]),
|
||||
mkcmd('git grep', [
|
||||
mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]),
|
||||
mkcmd('rg (ignore) (mmap)', ['rg', '-n', '-i', '--mmap', pat]),
|
||||
mkcmd('ag (ignore) (mmap)', ['ag', '-i', pat]),
|
||||
mkcmd('sift (ignore)', SIFT + ['-n', '-i', '--git', pat]),
|
||||
# It'd technically be more appropriate to set LC_ALL=en_US.UTF-8 here,
|
||||
# since that is certainly what ripgrep is doing, but this is for an
|
||||
# ASCII literal, so we should give `git grep` all the opportunity to
|
||||
# do its best.
|
||||
mkcmd('git grep (ignore)', [
|
||||
'git', 'grep', '-I', '-n', '-i', pat,
|
||||
], env={'LC_ALL': 'C'}),
|
||||
# sift yields more matches than it should here. Specifically, it gets
|
||||
# matches in Module.symvers and System.map in the repo root. Both of
|
||||
# those files show up in the repo root's .gitignore file.
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-i', pat,
|
||||
mkcmd('rg (whitelist)', [
|
||||
'rg', '-n', '-i', '--no-ignore', '-tall', pat,
|
||||
]),
|
||||
mkcmd('ucg (whitelist)', ['ucg', '-i', pat]),
|
||||
])
|
||||
|
||||
|
||||
@@ -156,20 +163,16 @@ def bench_linux_re_literal_suffix(suite_dir):
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
|
||||
mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
|
||||
mkcmd('ag', ['ag', '-s', pat]),
|
||||
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
|
||||
mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
|
||||
mkcmd('rg (ignore)', ['rg', '-n', pat]),
|
||||
mkcmd('ag (ignore)', ['ag', '-s', pat]),
|
||||
mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
'git grep (ignore)',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
|
||||
]),
|
||||
mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
|
||||
mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
|
||||
])
|
||||
|
||||
|
||||
@@ -189,22 +192,18 @@ def bench_linux_word(suite_dir):
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', '-w', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-w', pat]),
|
||||
mkcmd('rg-novcs-mmap', [
|
||||
'rg', '--mmap', '--no-ignore', '-n', '-w', pat,
|
||||
]),
|
||||
mkcmd('ag', ['ag', '-s', '-w', pat]),
|
||||
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', '-w', pat]),
|
||||
mkcmd('ucg', ['ucg', '--nosmart-case', '-w', pat]),
|
||||
mkcmd('rg (ignore)', ['rg', '-n', '-w', pat]),
|
||||
mkcmd('ag (ignore)', ['ag', '-s', '-w', pat]),
|
||||
mkcmd('sift (ignore)', SIFT + ['-n', '-w', '--git', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
'git grep (ignore)',
|
||||
['git', 'grep', '-E', '-I', '-n', '-w', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-w', pat,
|
||||
mkcmd('rg (whitelist)', [
|
||||
'rg', '-n', '-w', '--no-ignore', '-tall', pat,
|
||||
]),
|
||||
mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', '-w', pat]),
|
||||
])
|
||||
|
||||
|
||||
@@ -212,7 +211,8 @@ def bench_linux_unicode_greek(suite_dir):
|
||||
'''
|
||||
Benchmark matching of a Unicode category.
|
||||
|
||||
Only three tools (ripgrep, sift and pt) support this.
|
||||
Only three tools (ripgrep, sift and pt) support this. We omit
|
||||
pt because it is too slow.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
@@ -224,15 +224,7 @@ def bench_linux_unicode_greek(suite_dir):
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
# sift tries to search a bunch of PDF files and clutters up the
|
||||
# results, even though --binary-skip is provided. They are excluded
|
||||
# here explicitly, but don't have a measurable impact on performance.
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip',
|
||||
'--exclude-files', '.*',
|
||||
'--exclude-files', '*.pdf',
|
||||
pat,
|
||||
]),
|
||||
mkcmd('sift', SIFT + ['-n', '--git', pat]),
|
||||
])
|
||||
|
||||
|
||||
@@ -252,15 +244,7 @@ def bench_linux_unicode_greek_casei(suite_dir):
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', '-i', pat]),
|
||||
# sift tries to search a bunch of PDF files and clutters up the
|
||||
# results, even though --binary-skip is provided. They are excluded
|
||||
# here explicitly, but don't have a measurable impact on performance.
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip',
|
||||
'--exclude-files', '.*',
|
||||
'--exclude-files', '*.pdf',
|
||||
pat,
|
||||
]),
|
||||
mkcmd('sift', SIFT + ['-n', '-i', '--git', pat]),
|
||||
])
|
||||
|
||||
|
||||
@@ -281,30 +265,25 @@ def bench_linux_unicode_word(suite_dir):
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
|
||||
mkcmd('rg-novcs-mmap', [
|
||||
'rg', '--mmap', '--no-ignore', '-n', pat,
|
||||
]),
|
||||
mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
|
||||
mkcmd('ag-novcs (no Unicode)', [
|
||||
'ag', '--skip-vcs-ignores', '-s', pat,
|
||||
]),
|
||||
mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
|
||||
mkcmd('rg (ignore)', ['rg', '-n', pat]),
|
||||
mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
|
||||
mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
|
||||
mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
'git grep (ignore)',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'en_US.UTF-8'},
|
||||
),
|
||||
mkcmd(
|
||||
'git grep (no Unicode)',
|
||||
'git grep (ignore) (ASCII)',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
mkcmd('sift (no Unicode)', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
|
||||
mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
|
||||
mkcmd('rg (whitelist) (ASCII)', [
|
||||
'rg', '-n', '--no-ignore', '-tall', '(?-u)' + pat,
|
||||
]),
|
||||
mkcmd('ucg (ASCII)', ['ucg', '--nosmart-case', pat]),
|
||||
])
|
||||
|
||||
|
||||
@@ -326,30 +305,25 @@ def bench_linux_no_literal(suite_dir):
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('rg-whitelist', ['rg', '-tall', '--no-ignore', '-n', pat]),
|
||||
mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
|
||||
mkcmd('rg-whitelist (no Unicode)', [
|
||||
'rg', '-tall', '--no-ignore', '-n', '(?-u)' + pat,
|
||||
]),
|
||||
mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
|
||||
mkcmd('ag-novcs (no Unicode)', [
|
||||
'ag', '--skip-vcs-ignores', '-s', pat,
|
||||
]),
|
||||
mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
|
||||
mkcmd('rg (ignore)', ['rg', '-n', pat]),
|
||||
mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
|
||||
mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
|
||||
mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
'git grep (ignore)',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'en_US.UTF-8'},
|
||||
),
|
||||
mkcmd(
|
||||
'git grep (no Unicode)',
|
||||
'git grep (ignore) (ASCII)',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
mkcmd('sift (no Unicode)', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
|
||||
mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
|
||||
mkcmd('rg (whitelist) (ASCII)', [
|
||||
'rg', '-n', '--no-ignore', '-tall', '(?-u)' + pat,
|
||||
]),
|
||||
mkcmd('ucg (whitelist) (ASCII)', ['ucg', '--nosmart-case', pat]),
|
||||
])
|
||||
|
||||
|
||||
@@ -371,21 +345,15 @@ def bench_linux_alternates(suite_dir):
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
|
||||
mkcmd('rg-novcs-mmap', [
|
||||
'rg', '--mmap', '--no-ignore', '-n', pat,
|
||||
]),
|
||||
mkcmd('ag', ['ag', '-s', pat]),
|
||||
mkcmd('ag-novcs', [
|
||||
'ag', '--skip-vcs-ignores', '-s', pat,
|
||||
]),
|
||||
mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
|
||||
mkcmd('rg (ignore)', ['rg', '-n', pat]),
|
||||
mkcmd('ag (ignore)', ['ag', '-s', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
'git grep (ignore)',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
mkcmd('rg (whitelist)', ['rg', '--no-ignore', '-n', pat]),
|
||||
mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
|
||||
])
|
||||
|
||||
|
||||
@@ -400,21 +368,15 @@ def bench_linux_alternates_casei(suite_dir):
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', '-i', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
|
||||
mkcmd('rg-novcs-mmap', [
|
||||
'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
|
||||
]),
|
||||
mkcmd('ag', ['ag', '-i', pat]),
|
||||
mkcmd('ag-novcs', [
|
||||
'ag', '--skip-vcs-ignores', '-i', pat,
|
||||
]),
|
||||
mkcmd('ucg', ['ucg', '-i', pat]),
|
||||
mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]),
|
||||
mkcmd('ag (ignore)', ['ag', '-i', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
'git grep (ignore)',
|
||||
['git', 'grep', '-E', '-I', '-n', '-i', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
mkcmd('rg (whitelist)', ['rg', '--no-ignore', '-n', '-i', pat]),
|
||||
mkcmd('ucg (whitelist)', ['ucg', '-i', pat]),
|
||||
])
|
||||
|
||||
|
||||
@@ -423,22 +385,159 @@ def bench_subtitles_en_literal(suite_dir):
|
||||
Benchmark the speed of an ASCII string literal.
|
||||
'''
|
||||
require(suite_dir, 'subtitles-en')
|
||||
ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
|
||||
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
|
||||
pat = 'Sherlock Holmes'
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', '-n', pat, ru]),
|
||||
Command('rg (no line numbers)', ['rg', pat, ru]),
|
||||
Command('ag', ['ag', '-s', pat, ru]),
|
||||
Command('ucg', ['ucg', '--nosmart-case', pat, ru]),
|
||||
Command('grep', ['grep', '-an', pat, ru], env=GREP_ASCII),
|
||||
Command('grep (no line numbers)', [
|
||||
'grep', '-a', pat, ru,
|
||||
Command('rg', ['rg', pat, en]),
|
||||
Command('pt', ['pt', '-N', pat, en]),
|
||||
Command('sift', ['sift', pat, en]),
|
||||
Command('grep', ['grep', '-a', pat, en], env=GREP_ASCII),
|
||||
Command('rg (lines)', ['rg', '-n', pat, en]),
|
||||
Command('ag (lines)', ['ag', '-s', pat, en]),
|
||||
Command('ucg (lines)', ['ucg', '--nosmart-case', pat, en]),
|
||||
Command('pt (lines)', ['pt', pat, en]),
|
||||
Command('sift (lines)', ['sift', '-n', pat, en]),
|
||||
Command('grep (lines)', ['grep', '-an', pat, en], env=GREP_ASCII),
|
||||
])
|
||||
|
||||
|
||||
def bench_subtitles_en_literal_casei(suite_dir):
|
||||
'''
|
||||
Benchmark the speed of a Unicode-y string case insensitively.
|
||||
'''
|
||||
require(suite_dir, 'subtitles-en')
|
||||
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
|
||||
pat = 'Sherlock Holmes'
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', '-i', pat, en]),
|
||||
Command('grep', ['grep', '-ai', pat, en], env=GREP_UNICODE),
|
||||
Command('grep (ASCII)', [
|
||||
'grep', '-E', '-ai', pat, en,
|
||||
], env=GREP_ASCII),
|
||||
Command('rg (lines)', ['rg', '-n', '-i', pat, en]),
|
||||
Command('ag (lines) (ASCII)', ['ag', '-i', pat, en]),
|
||||
Command('ucg (lines) (ASCII)', ['ucg', '-i', pat, en]),
|
||||
])
|
||||
|
||||
|
||||
def bench_subtitles_en_literal_word(suite_dir):
|
||||
'''
|
||||
Benchmark the speed of finding a literal inside word boundaries.
|
||||
'''
|
||||
require(suite_dir, 'subtitles-en')
|
||||
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
|
||||
pat = 'Sherlock Holmes'
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg (ASCII)', [
|
||||
'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', en,
|
||||
]),
|
||||
Command('ag (ASCII)', ['ag', '-sw', pat, en]),
|
||||
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
|
||||
Command('grep (ASCII)', [
|
||||
'grep', '-anw', pat, en,
|
||||
], env=GREP_ASCII),
|
||||
Command('rg', ['rg', '-nw', pat, en]),
|
||||
Command('grep', ['grep', '-anw', pat, en], env=GREP_UNICODE),
|
||||
])
|
||||
|
||||
|
||||
def bench_subtitles_en_alternate(suite_dir):
|
||||
'''
|
||||
Benchmark the speed of a set of alternate literals.
|
||||
'''
|
||||
require(suite_dir, 'subtitles-en')
|
||||
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
|
||||
pat = '|'.join([
|
||||
'Sherlock Holmes',
|
||||
'John Watson',
|
||||
'Irene Adler',
|
||||
'Inspector Lestrade',
|
||||
'Professor Moriarty',
|
||||
])
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg (lines)', ['rg', '-n', pat, en]),
|
||||
Command('ag (lines)', ['ag', '-s', pat, en]),
|
||||
Command('ucg (lines)', ['ucg', '--nosmart-case', pat, en]),
|
||||
Command('grep (lines)', [
|
||||
'grep', '-E', '-an', pat, en,
|
||||
], env=GREP_ASCII),
|
||||
Command('rg', ['rg', pat, en]),
|
||||
Command('grep', [
|
||||
'grep', '-E', '-a', pat, en,
|
||||
], env=GREP_ASCII),
|
||||
])
|
||||
|
||||
|
||||
def bench_subtitles_en_alternate_casei(suite_dir):
|
||||
'''
|
||||
Benchmark the speed of a set of alternate literals.
|
||||
'''
|
||||
require(suite_dir, 'subtitles-en')
|
||||
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
|
||||
pat = '|'.join([
|
||||
'Sherlock Holmes',
|
||||
'John Watson',
|
||||
'Irene Adler',
|
||||
'Inspector Lestrade',
|
||||
'Professor Moriarty',
|
||||
])
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('ag (ASCII)', ['ag', '-s', '-i', pat, en]),
|
||||
Command('ucg (ASCII)', ['ucg', '-i', pat, en]),
|
||||
Command('grep (ASCII)', [
|
||||
'grep', '-E', '-ani', pat, en,
|
||||
], env=GREP_ASCII),
|
||||
Command('rg', ['rg', '-n', '-i', pat, en]),
|
||||
Command('grep', ['grep', '-E', '-ani', pat, en], env=GREP_UNICODE),
|
||||
])
|
||||
|
||||
|
||||
def bench_subtitles_en_surrounding_words(suite_dir):
|
||||
'''
|
||||
Benchmark a more complex regex with an inner literal.
|
||||
'''
|
||||
require(suite_dir, 'subtitles-en')
|
||||
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
|
||||
pat = r'\w+\s+Holmes\s+\w+'
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', '-n', pat, en]),
|
||||
Command('grep', ['grep', '-E', '-an', pat, en], env=GREP_UNICODE),
|
||||
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
|
||||
Command('ag (ASCII)', ['ag', '-s', pat, en]),
|
||||
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
|
||||
Command('grep (ASCII)', [
|
||||
'grep', '-E', '-an', pat, en,
|
||||
], env=GREP_ASCII),
|
||||
])
|
||||
|
||||
|
||||
def bench_subtitles_en_no_literal(suite_dir):
|
||||
'''
|
||||
Benchmark the speed of a regex with no literals.
|
||||
|
||||
Note that we don't even try to run grep with Unicode support
|
||||
on this one. While it should eventually get the right answer,
|
||||
I killed it after it had already been running for two minutes
|
||||
and showed no signs of finishing soon.
|
||||
'''
|
||||
require(suite_dir, 'subtitles-en')
|
||||
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
|
||||
pat = r'\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}'
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', '-n', pat, en]),
|
||||
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
|
||||
Command('ag (ASCII)', ['ag', '-s', pat, en]),
|
||||
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
|
||||
Command('grep (ASCII)', [
|
||||
'grep', '-E', '-an', pat, en,
|
||||
], env=GREP_ASCII),
|
||||
Command('pt', ['pt', pat, ru]),
|
||||
Command('pt (no line numbers)', ['pt', '-N', pat, ru]),
|
||||
Command('sift', ['sift', '-n', pat, ru]),
|
||||
Command('sift (no line numbers)', ['sift', pat, ru]),
|
||||
])
|
||||
|
||||
|
||||
@@ -451,18 +550,16 @@ def bench_subtitles_ru_literal(suite_dir):
|
||||
pat = 'Шерлок Холмс' # Sherlock Holmes
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', '-n', pat, ru]),
|
||||
Command('rg (no line numbers)', ['rg', pat, ru]),
|
||||
Command('ag', ['ag', '-s', pat, ru]),
|
||||
Command('ucg', ['ucg', '--nosmart-case', pat, ru]),
|
||||
Command('grep', ['grep', '-an', pat, ru], env=GREP_ASCII),
|
||||
Command('grep (no line numbers)', [
|
||||
'grep', '-a', pat, ru,
|
||||
], env=GREP_ASCII),
|
||||
Command('pt', ['pt', pat, ru]),
|
||||
Command('pt (no line numbers)', ['pt', '-N', pat, ru]),
|
||||
Command('sift', ['sift', '-n', pat, ru]),
|
||||
Command('sift (no line numbers)', ['sift', pat, ru]),
|
||||
Command('rg', ['rg', pat, ru]),
|
||||
Command('pt', ['pt', '-N', pat, ru]),
|
||||
Command('sift', ['sift', pat, ru]),
|
||||
Command('grep', ['grep', '-a', pat, ru], env=GREP_ASCII),
|
||||
Command('rg (lines)', ['rg', '-n', pat, ru]),
|
||||
Command('ag (lines)', ['ag', '-s', pat, ru]),
|
||||
Command('ucg (lines)', ['ucg', '--nosmart-case', pat, ru]),
|
||||
Command('pt (lines)', ['pt', pat, ru]),
|
||||
Command('sift (lines)', ['sift', '-n', pat, ru]),
|
||||
Command('grep (lines)', ['grep', '-an', pat, ru], env=GREP_ASCII),
|
||||
])
|
||||
|
||||
|
||||
@@ -475,13 +572,14 @@ def bench_subtitles_ru_literal_casei(suite_dir):
|
||||
pat = 'Шерлок Холмс' # Sherlock Holmes
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', '-n', '-i', pat, ru]),
|
||||
Command('ag (not Unicode)', ['ag', '-i', pat, ru]),
|
||||
Command('ucg (not Unicode)', ['ucg', '-i', pat, ru]),
|
||||
Command('grep', ['grep', '-ani', pat, ru], env=GREP_UNICODE),
|
||||
Command('grep (not Unicode)', [
|
||||
'grep', '-E', '-ani', pat, ru,
|
||||
Command('rg', ['rg', '-i', pat, ru]),
|
||||
Command('grep', ['grep', '-ai', pat, ru], env=GREP_UNICODE),
|
||||
Command('grep (ASCII)', [
|
||||
'grep', '-E', '-ai', pat, ru,
|
||||
], env=GREP_ASCII),
|
||||
Command('rg (lines)', ['rg', '-n', '-i', pat, ru]),
|
||||
Command('ag (lines) (ASCII)', ['ag', '-i', pat, ru]),
|
||||
Command('ucg (lines) (ASCII)', ['ucg', '-i', pat, ru]),
|
||||
])
|
||||
|
||||
|
||||
@@ -494,15 +592,15 @@ def bench_subtitles_ru_literal_word(suite_dir):
|
||||
pat = 'Шерлок Холмс' # Sherlock Holmes
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', '-nw', pat, ru]),
|
||||
Command('rg (not Unicode)', [
|
||||
Command('rg (ASCII)', [
|
||||
'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', ru,
|
||||
]),
|
||||
Command('ag (not Unicode)', ['ag', '-sw', pat, ru]),
|
||||
Command('ucg (not Unicode)', ['ucg', '--nosmart-case', pat, ru]),
|
||||
Command('grep (not Unicode)', [
|
||||
Command('ag (ASCII)', ['ag', '-sw', pat, ru]),
|
||||
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
|
||||
Command('grep (ASCII)', [
|
||||
'grep', '-anw', pat, ru,
|
||||
], env=GREP_ASCII),
|
||||
Command('rg', ['rg', '-nw', pat, ru]),
|
||||
Command('grep', ['grep', '-anw', pat, ru], env=GREP_UNICODE),
|
||||
])
|
||||
|
||||
@@ -522,11 +620,14 @@ def bench_subtitles_ru_alternate(suite_dir):
|
||||
])
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', '-n', pat, ru]),
|
||||
Command('rg (no line numbers)', ['rg', pat, ru]),
|
||||
Command('ucg', ['ucg', '--nosmart-case', pat, ru]),
|
||||
Command('grep', ['grep', '-E', '-an', pat, ru], env=GREP_ASCII),
|
||||
Command('grep (no line numbers)', [
|
||||
Command('rg (lines)', ['rg', '-n', pat, ru]),
|
||||
Command('ag (lines)', ['ag', '-s', pat, ru]),
|
||||
Command('ucg (lines)', ['ucg', '--nosmart-case', pat, ru]),
|
||||
Command('grep (lines)', [
|
||||
'grep', '-E', '-an', pat, ru,
|
||||
], env=GREP_ASCII),
|
||||
Command('rg', ['rg', pat, ru]),
|
||||
Command('grep', [
|
||||
'grep', '-E', '-a', pat, ru,
|
||||
], env=GREP_ASCII),
|
||||
])
|
||||
@@ -547,12 +648,32 @@ def bench_subtitles_ru_alternate_casei(suite_dir):
|
||||
])
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', '-n', '-i', pat, ru]),
|
||||
Command('ucg (not Unicode)', ['ucg', '-i', pat, ru]),
|
||||
Command('grep', ['grep', '-E', '-ani', pat, ru], env=GREP_UNICODE),
|
||||
Command('grep (not Unicode)', [
|
||||
Command('ag (ASCII)', ['ag', '-s', '-i', pat, ru]),
|
||||
Command('ucg (ASCII)', ['ucg', '-i', pat, ru]),
|
||||
Command('grep (ASCII)', [
|
||||
'grep', '-E', '-ani', pat, ru,
|
||||
], env=GREP_ASCII),
|
||||
Command('rg', ['rg', '-n', '-i', pat, ru]),
|
||||
Command('grep', ['grep', '-E', '-ani', pat, ru], env=GREP_UNICODE),
|
||||
])
|
||||
|
||||
|
||||
def bench_subtitles_ru_surrounding_words(suite_dir):
|
||||
'''
|
||||
Benchmark a more complex regex with an inner literal.
|
||||
'''
|
||||
require(suite_dir, 'subtitles-en')
|
||||
ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_RU_NAME)
|
||||
pat = r'\w+\s+Холмс\s+\w+'
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', '-n', pat, ru]),
|
||||
Command('grep', ['grep', '-E', '-an', pat, ru], env=GREP_UNICODE),
|
||||
Command('ag (ASCII)', ['ag', '-s', pat, ru]),
|
||||
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
|
||||
Command('grep (ASCII)', [
|
||||
'grep', '-E', '-an', pat, ru,
|
||||
], env=GREP_ASCII),
|
||||
])
|
||||
|
||||
|
||||
@@ -571,9 +692,10 @@ def bench_subtitles_ru_no_literal(suite_dir):
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', '-n', pat, ru]),
|
||||
Command('rg (no line numbers)', ['rg', pat, ru]),
|
||||
Command('ucg (no Unicode)', ['ucg', '--nosmart-case', pat, ru]),
|
||||
Command('grep (no Unicode)', [
|
||||
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, ru]),
|
||||
Command('ag (ASCII)', ['ag', '-s', pat, ru]),
|
||||
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
|
||||
Command('grep (ASCII)', [
|
||||
'grep', '-E', '-an', pat, ru,
|
||||
], env=GREP_ASCII),
|
||||
])
|
||||
@@ -597,6 +719,23 @@ class MissingDependencies(Exception):
|
||||
return 'MissingDependency(%s)' % repr(self.missing_names)
|
||||
|
||||
|
||||
class MissingCommands(Exception):
|
||||
'''
|
||||
A missing command exception.
|
||||
|
||||
This exception occurs when running a command in a benchmark
|
||||
where the command could not be found on the current system.
|
||||
|
||||
:ivar list(str) missing_names:
|
||||
The names of the command binaries that could not be found.
|
||||
'''
|
||||
def __init__(self, missing_names):
|
||||
self.missing_names = sorted(set(missing_names))
|
||||
|
||||
def __str__(self):
|
||||
return 'MissingCommands(%s)' % repr(self.missing_names)
|
||||
|
||||
|
||||
class Benchmark(object):
|
||||
'''
|
||||
A single benchmark corresponding to a grouping of commands.
|
||||
@@ -606,7 +745,8 @@ class Benchmark(object):
|
||||
'''
|
||||
|
||||
def __init__(self, name=None, pattern=None, commands=None,
|
||||
warmup_count=1, count=3, line_count=True):
|
||||
warmup_count=1, count=3, line_count=True,
|
||||
allow_missing_commands=False):
|
||||
'''
|
||||
Create a single benchmark.
|
||||
|
||||
@@ -644,15 +784,37 @@ class Benchmark(object):
|
||||
self.warmup_count = warmup_count
|
||||
self.count = count
|
||||
self.line_count = line_count
|
||||
self.allow_missing_commands = allow_missing_commands
|
||||
|
||||
def raise_if_missing(self):
|
||||
'''
|
||||
Raises a MissingCommands exception if applicable.
|
||||
|
||||
A MissingCommands exception is raised when the following
|
||||
criteria are met: 1) allow_missing_commands is False, and 2) at
|
||||
least one command in this benchmark could not be found on this
|
||||
system.
|
||||
'''
|
||||
missing_commands = \
|
||||
[c.binary_name for c in self.commands if not c.exists()]
|
||||
if not self.allow_missing_commands and len(missing_commands) > 0:
|
||||
raise MissingCommands(missing_commands)
|
||||
|
||||
def run(self):
|
||||
'''
|
||||
Runs this benchmark and returns the results.
|
||||
|
||||
:rtype: Result
|
||||
:raises:
|
||||
MissingCommands if any command doesn't exist.
|
||||
(Unless allow_missing_commands is enabled.)
|
||||
'''
|
||||
self.raise_if_missing()
|
||||
result = Result(self)
|
||||
for cmd in self.commands:
|
||||
if self.allow_missing_commands and not cmd.exists():
|
||||
# Skip this command if we're OK with it.
|
||||
continue
|
||||
# Do a warmup first.
|
||||
for _ in range(self.warmup_count):
|
||||
self.run_one(cmd)
|
||||
@@ -677,6 +839,8 @@ class Benchmark(object):
|
||||
it is the number of lines in the search output.
|
||||
:rtype: int
|
||||
'''
|
||||
if not cmd.exists():
|
||||
raise MissingCommand(cmd.cmd[0])
|
||||
cmd.kwargs['stderr'] = subprocess.DEVNULL
|
||||
if self.line_count:
|
||||
cmd.kwargs['stdout'] = subprocess.PIPE
|
||||
@@ -746,6 +910,8 @@ class Result(object):
|
||||
means = []
|
||||
for cmd in self.benchmark.commands:
|
||||
mean, _ = self.distribution_for(cmd)
|
||||
if mean is None:
|
||||
continue
|
||||
means.append((cmd, mean))
|
||||
return min(means, key=lambda tup: tup[1])[0]
|
||||
|
||||
@@ -768,16 +934,18 @@ class Result(object):
|
||||
'''
|
||||
Returns the distribution (mean +/- std) of the given command.
|
||||
|
||||
If there are no samples for this command (i.e., it was skipped),
|
||||
then return ``(None, None)``.
|
||||
|
||||
:rtype: (float, float)
|
||||
:returns:
|
||||
A tuple containing the mean and standard deviation, in that
|
||||
order.
|
||||
'''
|
||||
mean = statistics.mean(
|
||||
s['duration'] for s in self.samples_for(cmd))
|
||||
stdev = statistics.stdev(
|
||||
s['duration'] for s in self.samples_for(cmd))
|
||||
return mean, stdev
|
||||
samples = list(s['duration'] for s in self.samples_for(cmd))
|
||||
if len(samples) == 0:
|
||||
return None, None
|
||||
return statistics.mean(samples), statistics.stdev(samples)
|
||||
|
||||
|
||||
class Command(object):
|
||||
@@ -807,6 +975,15 @@ class Command(object):
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
def exists(self):
|
||||
'Returns true if and only if this command exists.'
|
||||
return shutil.which(self.binary_name) is not None
|
||||
|
||||
@property
|
||||
def binary_name(self):
|
||||
'Return the binary name of this command.'
|
||||
return self.cmd[0]
|
||||
|
||||
def run(self):
|
||||
'''
|
||||
Runs this command and returns its status.
|
||||
@@ -947,7 +1124,8 @@ def download(suite_dir, choices):
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def collect_benchmarks(suite_dir, filter_pat=None):
|
||||
def collect_benchmarks(suite_dir, filter_pat=None,
|
||||
allow_missing_commands=False):
|
||||
'''
|
||||
Return an iterable of all runnable benchmarks.
|
||||
|
||||
@@ -969,6 +1147,9 @@ def collect_benchmarks(suite_dir, filter_pat=None):
|
||||
continue
|
||||
try:
|
||||
benchmark = globals()[fun](suite_dir)
|
||||
benchmark.name = name
|
||||
benchmark.allow_missing_commands = allow_missing_commands
|
||||
benchmark.raise_if_missing()
|
||||
except MissingDependencies as e:
|
||||
eprint(
|
||||
'missing: %s, skipping benchmark %s (try running with: %s)' % (
|
||||
@@ -976,24 +1157,32 @@ def collect_benchmarks(suite_dir, filter_pat=None):
|
||||
name,
|
||||
' '.join(['--download %s' % n for n in e.missing_names]),
|
||||
))
|
||||
except MissingCommands as e:
|
||||
fmt = 'missing commands: %s, skipping benchmark %s ' \
|
||||
'(run with --allow-missing to run incomplete benchmarks)'
|
||||
eprint(fmt % (', '.join(e.missing_names), name))
|
||||
continue
|
||||
benchmark.name = name
|
||||
yield benchmark
|
||||
|
||||
|
||||
def main():
|
||||
download_choices = ['all', 'linux', 'subtitles-en', 'subtitles-ru']
|
||||
p = argparse.ArgumentParser('Command line search tool benchmark suite.')
|
||||
p.add_argument(
|
||||
'--dir', metavar='PATH', default=os.getcwd(),
|
||||
help='The directory in which to download data and perform searches.')
|
||||
p.add_argument(
|
||||
'--download', metavar='CORPUS', action='append',
|
||||
choices=['all', 'linux', 'subtitles-en', 'subtitles-ru'],
|
||||
choices=download_choices,
|
||||
help='Download and prepare corpus data, then exit without running '
|
||||
'any benchmarks. Note that this command is intended to be '
|
||||
'idempotent. WARNING: This downloads over a gigabyte of data, '
|
||||
'and also includes building the Linux kernel. If "all" is used '
|
||||
'then the total uncompressed size is around 13 GB.')
|
||||
'then the total uncompressed size is around 13 GB. '
|
||||
'Choices: %s' % ', '.join(download_choices))
|
||||
p.add_argument(
|
||||
'--allow-missing', action='store_true',
|
||||
help='Permit benchmarks to run even if some commands are missing.')
|
||||
p.add_argument(
|
||||
'-f', '--force', action='store_true',
|
||||
help='Overwrite existing files if there is a conflict.')
|
||||
@@ -1009,6 +1198,13 @@ def main():
|
||||
help='A regex pattern that will only run benchmarks that match.')
|
||||
args = p.parse_args()
|
||||
|
||||
if args.list:
|
||||
benchmarks = collect_benchmarks(
|
||||
args.dir, filter_pat=args.bench,
|
||||
allow_missing_commands=args.allow_missing)
|
||||
for b in benchmarks:
|
||||
print(b.name)
|
||||
sys.exit(0)
|
||||
if args.download is not None and len(args.download) > 0:
|
||||
download(args.dir, args.download)
|
||||
sys.exit(0)
|
||||
@@ -1028,7 +1224,9 @@ def main():
|
||||
raw_csv_wtr = csv.DictWriter(raw_handle, fields)
|
||||
raw_csv_wtr.writerow({x: x for x in fields})
|
||||
|
||||
benchmarks = collect_benchmarks(args.dir, filter_pat=args.bench)
|
||||
benchmarks = collect_benchmarks(
|
||||
args.dir, filter_pat=args.bench,
|
||||
allow_missing_commands=args.allow_missing)
|
||||
for i, b in enumerate(benchmarks):
|
||||
result = b.run()
|
||||
fastest_cmd = result.fastest_cmd()
|
||||
@@ -1042,6 +1240,12 @@ def main():
|
||||
for cmd in b.commands:
|
||||
name = cmd.name
|
||||
mean, stdev = result.distribution_for(cmd)
|
||||
if mean is None:
|
||||
# If we couldn't get a distribution for this command then
|
||||
# it was skipped.
|
||||
print('{name:{pad}} SKIPPED'.format(
|
||||
name=name, pad=max_name_len + 2))
|
||||
continue
|
||||
line_counts = result.line_counts_for(cmd)
|
||||
show_fast_cmd, show_line_counts = '', ''
|
||||
if fastest_cmd.name == cmd.name:
|
11
ctags.rust
11
ctags.rust
@@ -1,11 +0,0 @@
|
||||
--langdef=Rust
|
||||
--langmap=Rust:.rs
|
||||
--regex-Rust=/^[ \t]*(#\[[^\]]\][ \t]*)*(pub[ \t]+)?(extern[ \t]+)?("[^"]+"[ \t]+)?(unsafe[ \t]+)?fn[ \t]+([a-zA-Z0-9_]+)/\6/f,functions,function definitions/
|
||||
--regex-Rust=/^[ \t]*(pub[ \t]+)?type[ \t]+([a-zA-Z0-9_]+)/\2/T,types,type definitions/
|
||||
--regex-Rust=/^[ \t]*(pub[ \t]+)?enum[ \t]+([a-zA-Z0-9_]+)/\2/g,enum,enumeration names/
|
||||
--regex-Rust=/^[ \t]*(pub[ \t]+)?struct[ \t]+([a-zA-Z0-9_]+)/\2/s,structure names/
|
||||
--regex-Rust=/^[ \t]*(pub[ \t]+)?mod[ \t]+([a-zA-Z0-9_]+)/\2/m,modules,module names/
|
||||
--regex-Rust=/^[ \t]*(pub[ \t]+)?static[ \t]+([a-zA-Z0-9_]+)/\2/c,consts,static constants/
|
||||
--regex-Rust=/^[ \t]*(pub[ \t]+)?trait[ \t]+([a-zA-Z0-9_]+)/\2/t,traits,traits/
|
||||
--regex-Rust=/^[ \t]*(pub[ \t]+)?impl([ \t\n]+<.*>)?[ \t]+([a-zA-Z0-9_]+)/\3/i,impls,trait implementations/
|
||||
--regex-Rust=/^[ \t]*macro_rules![ \t]+([a-zA-Z0-9_]+)/\1/d,macros,macro definitions/
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep"
|
||||
version = "0.1.0" #:version
|
||||
version = "0.1.1" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Fast line oriented regex searching as a library.
|
||||
|
@@ -62,7 +62,7 @@ impl fmt::Display for Error {
|
||||
match *self {
|
||||
Error::Regex(ref err) => err.fmt(f),
|
||||
Error::LiteralNotAllowed(chr) => {
|
||||
write!(f, "Literal '{}' not allowed.", chr)
|
||||
write!(f, "Literal {:?} not allowed.", chr)
|
||||
}
|
||||
Error::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
|
@@ -10,6 +10,10 @@ use {Error, Result};
|
||||
/// If `byte` is not an ASCII character (i.e., greater than `0x7F`), then this
|
||||
/// function panics.
|
||||
pub fn remove(expr: Expr, byte: u8) -> Result<Expr> {
|
||||
// TODO(burntsushi): There is a bug in this routine where only `\n` is
|
||||
// handled correctly. Namely, `AnyChar` and `AnyByte` need to be translated
|
||||
// to proper character classes instead of the special `AnyCharNoNL` and
|
||||
// `AnyByteNoNL` classes.
|
||||
use syntax::Expr::*;
|
||||
assert!(byte <= 0x7F);
|
||||
let chr = byte as char;
|
||||
|
@@ -1 +0,0 @@
|
||||
au BufWritePost *.rs silent!make ctags > /dev/null 2>&1
|
@@ -124,6 +124,7 @@ Less common options:
|
||||
|
||||
--no-ignore
|
||||
Don't respect ignore files (.gitignore, .rgignore, etc.)
|
||||
This implies --no-ignore-parent.
|
||||
|
||||
--no-ignore-parent
|
||||
Don't respect ignore files in parent directories.
|
||||
@@ -338,7 +339,9 @@ impl RawArgs {
|
||||
line_number: !self.flag_no_line_number && self.flag_line_number,
|
||||
mmap: mmap,
|
||||
no_ignore: self.flag_no_ignore,
|
||||
no_ignore_parent: self.flag_no_ignore_parent,
|
||||
no_ignore_parent:
|
||||
// --no-ignore implies --no-ignore-parent
|
||||
self.flag_no_ignore_parent || self.flag_no_ignore,
|
||||
quiet: self.flag_quiet,
|
||||
replace: self.flag_replace.clone().map(|s| s.into_bytes()),
|
||||
text: self.flag_text,
|
||||
|
@@ -21,6 +21,7 @@ additional rules such as whitelists (prefix of `!`) or directory-only globs
|
||||
// TODO(burntsushi): Implement something similar, but for Mercurial. We can't
|
||||
// use this exact implementation because hgignore files are different.
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::error::Error as StdError;
|
||||
use std::fmt;
|
||||
use std::fs::File;
|
||||
@@ -30,6 +31,7 @@ use std::path::{Path, PathBuf};
|
||||
use regex;
|
||||
|
||||
use glob;
|
||||
use pathutil::strip_prefix;
|
||||
|
||||
/// Represents an error that can occur when parsing a gitignore file.
|
||||
#[derive(Debug)]
|
||||
@@ -110,37 +112,37 @@ impl Gitignore {
|
||||
/// same directory as this gitignore file.
|
||||
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
|
||||
let mut path = path.as_ref();
|
||||
if let Ok(p) = path.strip_prefix(&self.root) {
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
self.matched_utf8(&*path.to_string_lossy(), is_dir)
|
||||
if let Some(p) = strip_prefix(&self.root, path) {
|
||||
path = p;
|
||||
}
|
||||
self.matched_stripped(path, is_dir)
|
||||
}
|
||||
|
||||
/// Like matched, but takes a path that has already been stripped and
|
||||
/// converted to UTF-8.
|
||||
pub fn matched_utf8(&self, path: &str, is_dir: bool) -> Match {
|
||||
// A single regex with a bunch of alternations of glob patterns is
|
||||
// unfortunately typically faster than a regex, so we use it as a
|
||||
// first pass filter. We still need to run the RegexSet to get the most
|
||||
// recently defined glob that matched.
|
||||
if !self.set.is_match(path) {
|
||||
return Match::None;
|
||||
}
|
||||
// The regex set can't actually pick the right glob that matched all
|
||||
// on its own. In particular, some globs require that only directories
|
||||
// can match. Thus, only accept a match from the regex set if the given
|
||||
// path satisfies the corresponding glob's directory criteria.
|
||||
for i in self.set.matches(path).iter().rev() {
|
||||
let pat = &self.patterns[i];
|
||||
if !pat.only_dir || is_dir {
|
||||
return if pat.whitelist {
|
||||
Match::Whitelist(pat)
|
||||
} else {
|
||||
Match::Ignored(pat)
|
||||
};
|
||||
/// Like matched, but takes a path that has already been stripped.
|
||||
pub fn matched_stripped(&self, path: &Path, is_dir: bool) -> Match {
|
||||
thread_local! {
|
||||
static MATCHES: RefCell<Vec<usize>> = {
|
||||
RefCell::new(vec![])
|
||||
}
|
||||
}
|
||||
Match::None
|
||||
};
|
||||
MATCHES.with(|matches| {
|
||||
let mut matches = matches.borrow_mut();
|
||||
self.set.matches_into(path, &mut *matches);
|
||||
for &i in matches.iter().rev() {
|
||||
let pat = &self.patterns[i];
|
||||
if !pat.only_dir || is_dir {
|
||||
return if pat.whitelist {
|
||||
Match::Whitelist(pat)
|
||||
} else {
|
||||
Match::Ignored(pat)
|
||||
};
|
||||
}
|
||||
}
|
||||
Match::None
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the total number of ignore patterns.
|
||||
@@ -390,6 +392,7 @@ mod tests {
|
||||
ignored!(ig23, ROOT, "foo", "./foo");
|
||||
ignored!(ig24, ROOT, "target", "grep/target");
|
||||
ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
|
||||
ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz");
|
||||
|
||||
not_ignored!(ignot1, ROOT, "amonths", "months");
|
||||
not_ignored!(ignot2, ROOT, "monthsa", "months");
|
||||
|
461
src/glob.rs
461
src/glob.rs
@@ -26,13 +26,22 @@ to make its way into `glob` proper.
|
||||
// at the .gitignore for the chromium repo---just about every pattern satisfies
|
||||
// that assumption.)
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::error::Error as StdError;
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::fmt;
|
||||
use std::hash;
|
||||
use std::iter;
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
|
||||
use fnv;
|
||||
use regex;
|
||||
use regex::bytes::{Regex, RegexSet, SetMatches};
|
||||
use regex::bytes::Regex;
|
||||
use regex::bytes::RegexSet;
|
||||
|
||||
use pathutil::file_name;
|
||||
|
||||
/// Represents an error that can occur when parsing a glob pattern.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
@@ -71,33 +80,181 @@ impl fmt::Display for Error {
|
||||
}
|
||||
}
|
||||
|
||||
/// SetYesNo represents a group of globs that can be matched together in a
|
||||
/// single pass. SetYesNo can only determine whether a particular path matched
|
||||
/// any pattern in the set.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct SetYesNo {
|
||||
re: Regex,
|
||||
}
|
||||
|
||||
impl SetYesNo {
|
||||
/// Returns true if and only if the given path matches at least one glob
|
||||
/// in this set.
|
||||
pub fn is_match<T: AsRef<Path>>(&self, path: T) -> bool {
|
||||
self.re.is_match(&*path_bytes(path.as_ref()))
|
||||
}
|
||||
|
||||
fn new(
|
||||
pats: &[(Pattern, MatchOptions)],
|
||||
) -> Result<SetYesNo, regex::Error> {
|
||||
let mut joined = String::new();
|
||||
for &(ref p, ref o) in pats {
|
||||
let part = format!("(?:{})", p.to_regex_with(o));
|
||||
if !joined.is_empty() {
|
||||
joined.push('|');
|
||||
}
|
||||
joined.push_str(&part);
|
||||
}
|
||||
Ok(SetYesNo { re: try!(Regex::new(&joined)) })
|
||||
}
|
||||
}
|
||||
|
||||
type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
|
||||
|
||||
/// Set represents a group of globs that can be matched together in a single
|
||||
/// pass.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Set {
|
||||
re: Regex,
|
||||
set: RegexSet,
|
||||
yesno: SetYesNo,
|
||||
exts: HashMap<OsString, Vec<usize>, Fnv>,
|
||||
literals: HashMap<Vec<u8>, Vec<usize>, Fnv>,
|
||||
base_literals: HashMap<Vec<u8>, Vec<usize>, Fnv>,
|
||||
base_prefixes: Vec<Vec<u8>>,
|
||||
base_prefixes_map: Vec<usize>,
|
||||
base_suffixes: Vec<Vec<u8>>,
|
||||
base_suffixes_map: Vec<usize>,
|
||||
base_regexes: RegexSet,
|
||||
base_regexes_map: Vec<usize>,
|
||||
regexes: RegexSet,
|
||||
regexes_map: Vec<usize>,
|
||||
}
|
||||
|
||||
impl Set {
|
||||
/// Returns true if and only if the given path matches at least one glob
|
||||
/// in this set.
|
||||
pub fn is_match<T: AsRef<[u8]>>(&self, path: T) -> bool {
|
||||
self.re.is_match(path.as_ref())
|
||||
}
|
||||
|
||||
/// Returns every glob pattern (by sequence number) that matches the given
|
||||
/// path.
|
||||
pub fn matches<T: AsRef<[u8]>>(&self, path: T) -> SetMatches {
|
||||
// TODO(burntsushi): If we split this out into a separate crate, don't
|
||||
// expose the regex::SetMatches type in the public API.
|
||||
self.set.matches(path.as_ref())
|
||||
}
|
||||
|
||||
/// Returns the number of glob patterns in this set.
|
||||
/// Returns the sequence number of every glob pattern that matches the
|
||||
/// given path.
|
||||
#[allow(dead_code)]
|
||||
pub fn len(&self) -> usize {
|
||||
self.set.len()
|
||||
pub fn matches<T: AsRef<Path>>(&self, path: T) -> Vec<usize> {
|
||||
let mut into = vec![];
|
||||
self.matches_into(path, &mut into);
|
||||
into
|
||||
}
|
||||
|
||||
/// Adds the sequence number of every glob pattern that matches the given
|
||||
/// path to the vec given.
|
||||
pub fn matches_into<T: AsRef<Path>>(
|
||||
&self,
|
||||
path: T,
|
||||
into: &mut Vec<usize>,
|
||||
) {
|
||||
into.clear();
|
||||
let path = path.as_ref();
|
||||
let path_bytes = &*path_bytes(path);
|
||||
let basename = file_name(path).map(|b| os_str_bytes(b));
|
||||
if !self.yesno.is_match(path) {
|
||||
return;
|
||||
}
|
||||
if !self.exts.is_empty() {
|
||||
if let Some(ext) = path.extension() {
|
||||
if let Some(matches) = self.exts.get(ext) {
|
||||
into.extend(matches.as_slice());
|
||||
}
|
||||
}
|
||||
}
|
||||
if !self.literals.is_empty() {
|
||||
if let Some(matches) = self.literals.get(path_bytes) {
|
||||
into.extend(matches.as_slice());
|
||||
}
|
||||
}
|
||||
if !self.base_literals.is_empty() {
|
||||
if let Some(ref basename) = basename {
|
||||
if let Some(matches) = self.base_literals.get(&**basename) {
|
||||
into.extend(matches.as_slice());
|
||||
}
|
||||
}
|
||||
}
|
||||
if !self.base_prefixes.is_empty() {
|
||||
if let Some(ref basename) = basename {
|
||||
let basename = &**basename;
|
||||
for (i, pre) in self.base_prefixes.iter().enumerate() {
|
||||
if pre.len() <= basename.len() && &**pre == &basename[0..pre.len()] {
|
||||
into.push(self.base_prefixes_map[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if !self.base_suffixes.is_empty() {
|
||||
if let Some(ref basename) = basename {
|
||||
let basename = &**basename;
|
||||
for (i, suf) in self.base_suffixes.iter().enumerate() {
|
||||
if suf.len() > basename.len() {
|
||||
continue;
|
||||
}
|
||||
let (s, e) = (basename.len() - suf.len(), basename.len());
|
||||
if &**suf == &basename[s..e] {
|
||||
into.push(self.base_suffixes_map[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if let Some(ref basename) = basename {
|
||||
for i in self.base_regexes.matches(&**basename) {
|
||||
into.push(self.base_regexes_map[i]);
|
||||
}
|
||||
}
|
||||
for i in self.regexes.matches(path_bytes) {
|
||||
into.push(self.regexes_map[i]);
|
||||
}
|
||||
into.sort();
|
||||
}
|
||||
|
||||
fn new(pats: &[(Pattern, MatchOptions)]) -> Result<Set, regex::Error> {
|
||||
let fnv = Fnv::default();
|
||||
let mut exts = HashMap::with_hasher(fnv.clone());
|
||||
let mut literals = HashMap::with_hasher(fnv.clone());
|
||||
let mut base_literals = HashMap::with_hasher(fnv.clone());
|
||||
let (mut base_prefixes, mut base_prefixes_map) = (vec![], vec![]);
|
||||
let (mut base_suffixes, mut base_suffixes_map) = (vec![], vec![]);
|
||||
let (mut regexes, mut regexes_map) = (vec![], vec![]);
|
||||
let (mut base_regexes, mut base_regexes_map) = (vec![], vec![]);
|
||||
for (i, &(ref p, ref o)) in pats.iter().enumerate() {
|
||||
if let Some(ext) = p.ext() {
|
||||
exts.entry(ext).or_insert(vec![]).push(i);
|
||||
} else if let Some(literal) = p.literal() {
|
||||
literals.entry(literal.into_bytes()).or_insert(vec![]).push(i);
|
||||
} else if let Some(literal) = p.base_literal() {
|
||||
base_literals
|
||||
.entry(literal.into_bytes()).or_insert(vec![]).push(i);
|
||||
} else if let Some(literal) = p.base_literal_prefix() {
|
||||
base_prefixes.push(literal.into_bytes());
|
||||
base_prefixes_map.push(i);
|
||||
} else if let Some(literal) = p.base_literal_suffix() {
|
||||
base_suffixes.push(literal.into_bytes());
|
||||
base_suffixes_map.push(i);
|
||||
} else if p.is_only_basename() {
|
||||
let part = format!("(?:{})", p.to_regex_with(o));
|
||||
base_regexes.push(part);
|
||||
base_regexes_map.push(i);
|
||||
} else {
|
||||
let part = format!("(?:{})", p.to_regex_with(o));
|
||||
regexes.push(part);
|
||||
regexes_map.push(i);
|
||||
}
|
||||
}
|
||||
Ok(Set {
|
||||
yesno: try!(SetYesNo::new(pats)),
|
||||
exts: exts,
|
||||
literals: literals,
|
||||
base_literals: base_literals,
|
||||
base_prefixes: base_prefixes,
|
||||
base_prefixes_map: base_prefixes_map,
|
||||
base_suffixes: base_suffixes,
|
||||
base_suffixes_map: base_suffixes_map,
|
||||
base_regexes: try!(RegexSet::new(base_regexes)),
|
||||
base_regexes_map: base_regexes_map,
|
||||
regexes: try!(RegexSet::new(regexes)),
|
||||
regexes_map: regexes_map,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -119,19 +276,12 @@ impl SetBuilder {
|
||||
///
|
||||
/// Once a matcher is built, no new patterns can be added to it.
|
||||
pub fn build(&self) -> Result<Set, regex::Error> {
|
||||
let it = self.pats.iter().map(|&(ref p, ref o)| p.to_regex_with(o));
|
||||
let set = try!(RegexSet::new(it));
|
||||
Set::new(&self.pats)
|
||||
}
|
||||
|
||||
let mut joined = String::new();
|
||||
for &(ref p, ref o) in &self.pats {
|
||||
let part = format!("(?:{})", p.to_regex_with(o));
|
||||
if !joined.is_empty() {
|
||||
joined.push('|');
|
||||
}
|
||||
joined.push_str(&part);
|
||||
}
|
||||
let re = try!(Regex::new(&joined));
|
||||
Ok(Set { re: re, set: set })
|
||||
/// Like `build`, but returns a matcher that can only answer yes/no.
|
||||
pub fn build_yesno(&self) -> Result<SetYesNo, regex::Error> {
|
||||
SetYesNo::new(&self.pats)
|
||||
}
|
||||
|
||||
/// Add a new pattern to this set.
|
||||
@@ -149,8 +299,21 @@ impl SetBuilder {
|
||||
pat: &str,
|
||||
opts: &MatchOptions,
|
||||
) -> Result<(), Error> {
|
||||
let pat = try!(Pattern::new(pat));
|
||||
self.pats.push((pat, opts.clone()));
|
||||
let parsed = try!(Pattern::new(pat));
|
||||
// if let Some(ext) = parsed.ext() {
|
||||
// eprintln!("ext :: {:?} :: {:?}", ext, pat);
|
||||
// } else if let Some(lit) = parsed.literal() {
|
||||
// eprintln!("literal :: {:?} :: {:?}", lit, pat);
|
||||
// } else if let Some(lit) = parsed.base_literal() {
|
||||
// eprintln!("base_literal :: {:?} :: {:?}", lit, pat);
|
||||
// } else if let Some(lit) = parsed.base_literal_prefix() {
|
||||
// eprintln!("base_literal :: {:?} :: {:?}", lit, pat);
|
||||
// } else if let Some(lit) = parsed.base_literal_suffix() {
|
||||
// eprintln!("base_literal :: {:?} :: {:?}", lit, pat);
|
||||
// } else {
|
||||
// eprintln!("regex :: {:?} :: {:?}", pat, parsed);
|
||||
// }
|
||||
self.pats.push((parsed, opts.clone()));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
@@ -204,6 +367,133 @@ impl Pattern {
|
||||
Ok(p.p)
|
||||
}
|
||||
|
||||
/// Returns an extension if this pattern exclusively matches it.
|
||||
pub fn ext(&self) -> Option<OsString> {
|
||||
if self.tokens.len() <= 3 {
|
||||
return None;
|
||||
}
|
||||
match self.tokens.get(0) {
|
||||
Some(&Token::RecursivePrefix) => {}
|
||||
_ => return None,
|
||||
}
|
||||
match self.tokens.get(1) {
|
||||
Some(&Token::ZeroOrMore) => {}
|
||||
_ => return None,
|
||||
}
|
||||
match self.tokens.get(2) {
|
||||
Some(&Token::Literal(c)) if c == '.' => {}
|
||||
_ => return None,
|
||||
}
|
||||
let mut lit = OsString::new();
|
||||
for t in self.tokens[3..].iter() {
|
||||
match *t {
|
||||
Token::Literal(c) if c == '/' || c == '\\' || c == '.' => {
|
||||
return None;
|
||||
}
|
||||
Token::Literal(c) => lit.push(c.to_string()),
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
Some(lit)
|
||||
}
|
||||
|
||||
/// Returns the pattern as a literal if and only if the pattern exclusiely
|
||||
/// matches the basename of a file path *and* is a literal.
|
||||
///
|
||||
/// The basic format of these patterns is `**/{literal}`, where `{literal}`
|
||||
/// does not contain a path separator.
|
||||
pub fn base_literal(&self) -> Option<String> {
|
||||
match self.tokens.get(0) {
|
||||
Some(&Token::RecursivePrefix) => {}
|
||||
_ => return None,
|
||||
}
|
||||
let mut lit = String::new();
|
||||
for t in &self.tokens[1..] {
|
||||
match *t {
|
||||
Token::Literal(c) if c == '/' || c == '\\' => return None,
|
||||
Token::Literal(c) => lit.push(c),
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
Some(lit)
|
||||
}
|
||||
|
||||
/// Returns true if and only if this pattern only inspects the basename
|
||||
/// of a path.
|
||||
pub fn is_only_basename(&self) -> bool {
|
||||
match self.tokens.get(0) {
|
||||
Some(&Token::RecursivePrefix) => {}
|
||||
_ => return false,
|
||||
}
|
||||
for t in &self.tokens[1..] {
|
||||
match *t {
|
||||
Token::Literal(c) if c == '/' || c == '\\' => return false,
|
||||
Token::RecursivePrefix
|
||||
| Token::RecursiveSuffix
|
||||
| Token::RecursiveZeroOrMore => return false,
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Returns the pattern as a literal if and only if the pattern must match
|
||||
/// an entire path exactly.
|
||||
///
|
||||
/// The basic format of these patterns is `{literal}`.
|
||||
pub fn literal(&self) -> Option<String> {
|
||||
let mut lit = String::new();
|
||||
for t in &self.tokens {
|
||||
match *t {
|
||||
Token::Literal(c) => lit.push(c),
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
Some(lit)
|
||||
}
|
||||
|
||||
/// Returns a basename literal prefix of this pattern.
|
||||
pub fn base_literal_prefix(&self) -> Option<String> {
|
||||
match self.tokens.get(0) {
|
||||
Some(&Token::RecursivePrefix) => {}
|
||||
_ => return None,
|
||||
}
|
||||
match self.tokens.last() {
|
||||
Some(&Token::ZeroOrMore) => {}
|
||||
_ => return None,
|
||||
}
|
||||
let mut lit = String::new();
|
||||
for t in &self.tokens[1..self.tokens.len()-1] {
|
||||
match *t {
|
||||
Token::Literal(c) if c == '/' || c == '\\' => return None,
|
||||
Token::Literal(c) => lit.push(c),
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
Some(lit)
|
||||
}
|
||||
|
||||
/// Returns a basename literal suffix of this pattern.
|
||||
pub fn base_literal_suffix(&self) -> Option<String> {
|
||||
match self.tokens.get(0) {
|
||||
Some(&Token::RecursivePrefix) => {}
|
||||
_ => return None,
|
||||
}
|
||||
match self.tokens.get(1) {
|
||||
Some(&Token::ZeroOrMore) => {}
|
||||
_ => return None,
|
||||
}
|
||||
let mut lit = String::new();
|
||||
for t in &self.tokens[2..] {
|
||||
match *t {
|
||||
Token::Literal(c) if c == '/' || c == '\\' => return None,
|
||||
Token::Literal(c) => lit.push(c),
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
Some(lit)
|
||||
}
|
||||
|
||||
/// Convert this pattern to a string that is guaranteed to be a valid
|
||||
/// regular expression and will represent the matching semantics of this
|
||||
/// glob pattern. This uses a default set of options.
|
||||
@@ -415,13 +705,34 @@ impl<'a> Parser<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn path_bytes(path: &Path) -> Cow<[u8]> {
|
||||
os_str_bytes(path.as_os_str())
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
Cow::Borrowed(s.as_bytes())
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
|
||||
// TODO(burntsushi): On Windows, OS strings are probably UTF-16, so even
|
||||
// if we could get at the raw bytes, they wouldn't be useful. We *must*
|
||||
// convert to UTF-8 before doing path matching. Unfortunate, but necessary.
|
||||
match s.to_string_lossy() {
|
||||
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
|
||||
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
|
||||
use regex::bytes::Regex;
|
||||
|
||||
use super::{Error, Pattern, MatchOptions, SetBuilder, Token};
|
||||
use super::{Error, Pattern, MatchOptions, Set, SetBuilder, Token};
|
||||
use super::Token::*;
|
||||
|
||||
macro_rules! syntax {
|
||||
@@ -483,14 +794,42 @@ mod tests {
|
||||
let pat = Pattern::new($pat).unwrap();
|
||||
let path = &Path::new($path).to_str().unwrap();
|
||||
let re = Regex::new(&pat.to_regex_with(&$options)).unwrap();
|
||||
// println!("PATTERN: {}", $pat);
|
||||
// println!("REGEX: {:?}", re);
|
||||
// println!("PATH: {}", path);
|
||||
assert!(!re.is_match(path.as_bytes()));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! ext {
|
||||
($name:ident, $pat:expr, $ext:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let pat = Pattern::new($pat).unwrap();
|
||||
let ext = pat.ext().map(|e| e.to_string_lossy().into_owned());
|
||||
assert_eq!($ext, ext.as_ref().map(|s| &**s));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! baseliteral {
|
||||
($name:ident, $pat:expr, $yes:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let pat = Pattern::new($pat).unwrap();
|
||||
assert_eq!($yes, pat.base_literal().is_some());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! basesuffix {
|
||||
($name:ident, $pat:expr, $yes:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let pat = Pattern::new($pat).unwrap();
|
||||
assert_eq!($yes, pat.is_literal_suffix());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn class(s: char, e: char) -> Token {
|
||||
Class { negated: false, ranges: vec![(s, e)] }
|
||||
}
|
||||
@@ -585,6 +924,26 @@ mod tests {
|
||||
toregex!(re10, "+", r"^\+$");
|
||||
toregex!(re11, "**", r"^.*$");
|
||||
|
||||
ext!(ext1, "**/*.rs", Some("rs"));
|
||||
|
||||
baseliteral!(lit1, "**", true);
|
||||
baseliteral!(lit2, "**/a", true);
|
||||
baseliteral!(lit3, "**/ab", true);
|
||||
baseliteral!(lit4, "**/a*b", false);
|
||||
baseliteral!(lit5, "z/**/a*b", false);
|
||||
baseliteral!(lit6, "[ab]", false);
|
||||
baseliteral!(lit7, "?", false);
|
||||
|
||||
/*
|
||||
issuffix!(suf1, "", false);
|
||||
issuffix!(suf2, "a", true);
|
||||
issuffix!(suf3, "ab", true);
|
||||
issuffix!(suf4, "*ab", true);
|
||||
issuffix!(suf5, "*.ab", true);
|
||||
issuffix!(suf6, "?.ab", true);
|
||||
issuffix!(suf7, "ab*", false);
|
||||
*/
|
||||
|
||||
matches!(match1, "a", "a");
|
||||
matches!(match2, "a*b", "a_b");
|
||||
matches!(match3, "a*b*c", "abc");
|
||||
@@ -681,16 +1040,22 @@ mod tests {
|
||||
builder.add("src/lib.rs").unwrap();
|
||||
let set = builder.build().unwrap();
|
||||
|
||||
assert!(set.is_match("foo.c"));
|
||||
assert!(set.is_match("src/foo.c"));
|
||||
assert!(!set.is_match("foo.rs"));
|
||||
assert!(!set.is_match("tests/foo.rs"));
|
||||
assert!(set.is_match("src/foo.rs"));
|
||||
assert!(set.is_match("src/grep/src/main.rs"));
|
||||
fn is_match(set: &Set, s: &str) -> bool {
|
||||
let mut matches = vec![];
|
||||
set.matches_into(s, &mut matches);
|
||||
!matches.is_empty()
|
||||
}
|
||||
|
||||
assert_eq!(2, set.matches("src/lib.rs").iter().count());
|
||||
assert!(set.matches("src/lib.rs").matched(0));
|
||||
assert!(!set.matches("src/lib.rs").matched(1));
|
||||
assert!(set.matches("src/lib.rs").matched(2));
|
||||
assert!(is_match(&set, "foo.c"));
|
||||
assert!(is_match(&set, "src/foo.c"));
|
||||
assert!(!is_match(&set, "foo.rs"));
|
||||
assert!(!is_match(&set, "tests/foo.rs"));
|
||||
assert!(is_match(&set, "src/foo.rs"));
|
||||
assert!(is_match(&set, "src/grep/src/main.rs"));
|
||||
|
||||
let matches = set.matches("src/lib.rs");
|
||||
assert_eq!(2, matches.len());
|
||||
assert_eq!(0, matches[0]);
|
||||
assert_eq!(2, matches[1]);
|
||||
}
|
||||
}
|
||||
|
@@ -19,11 +19,11 @@ use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern};
|
||||
use pathutil::is_hidden;
|
||||
use types::Types;
|
||||
|
||||
const IGNORE_NAMES: &'static [&'static str] = &[
|
||||
".gitignore",
|
||||
".agignore",
|
||||
".rgignore",
|
||||
];
|
||||
|
||||
@@ -83,7 +83,10 @@ pub struct Ignore {
|
||||
overrides: Overrides,
|
||||
/// A file type matcher.
|
||||
types: Types,
|
||||
/// Whether to ignore hidden files or not.
|
||||
ignore_hidden: bool,
|
||||
/// When true, don't look at .gitignore or .agignore files for ignore
|
||||
/// rules.
|
||||
no_ignore: bool,
|
||||
}
|
||||
|
||||
@@ -208,15 +211,17 @@ impl Ignore {
|
||||
debug!("{} ignored because it is hidden", path.display());
|
||||
return true;
|
||||
}
|
||||
for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
|
||||
let mat = id.matched(path, is_dir);
|
||||
if let Some(is_ignored) = self.ignore_match(path, mat) {
|
||||
if is_ignored {
|
||||
return true;
|
||||
if !self.no_ignore {
|
||||
for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
|
||||
let mat = id.matched(path, is_dir);
|
||||
if let Some(is_ignored) = self.ignore_match(path, mat) {
|
||||
if is_ignored {
|
||||
return true;
|
||||
}
|
||||
// If this path is whitelisted by an ignore, then
|
||||
// fallthrough and let the file type matcher have a say.
|
||||
break;
|
||||
}
|
||||
// If this path is whitelisted by an ignore, then fallthrough
|
||||
// and let the file type matcher have a say.
|
||||
break;
|
||||
}
|
||||
}
|
||||
let mat = self.types.matched(path, is_dir);
|
||||
@@ -361,8 +366,7 @@ impl Overrides {
|
||||
let path = path.as_ref();
|
||||
self.gi.as_ref()
|
||||
.map(|gi| {
|
||||
let path = &*path.to_string_lossy();
|
||||
let mat = gi.matched_utf8(path, is_dir).invert();
|
||||
let mat = gi.matched_stripped(path, is_dir).invert();
|
||||
if mat.is_none() && !is_dir {
|
||||
if gi.num_ignores() > 0 {
|
||||
return Match::Ignored(&self.unmatched_pat);
|
||||
@@ -374,14 +378,6 @@ impl Overrides {
|
||||
}
|
||||
}
|
||||
|
||||
fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
if let Some(name) = path.as_ref().file_name() {
|
||||
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
|
23
src/main.rs
23
src/main.rs
@@ -1,6 +1,7 @@
|
||||
extern crate crossbeam;
|
||||
extern crate deque;
|
||||
extern crate docopt;
|
||||
extern crate env_logger;
|
||||
extern crate fnv;
|
||||
extern crate grep;
|
||||
#[cfg(windows)]
|
||||
extern crate kernel32;
|
||||
@@ -28,7 +29,7 @@ use std::result;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::thread;
|
||||
|
||||
use crossbeam::sync::chase_lev::{self, Steal, Stealer};
|
||||
use deque::{Stealer, Stolen};
|
||||
use grep::Grep;
|
||||
use memmap::{Mmap, Protection};
|
||||
use term::Terminal;
|
||||
@@ -36,6 +37,7 @@ use walkdir::DirEntry;
|
||||
|
||||
use args::Args;
|
||||
use out::{ColoredTerminal, Out};
|
||||
use pathutil::strip_prefix;
|
||||
use printer::Printer;
|
||||
use search_stream::InputBuffer;
|
||||
#[cfg(windows)]
|
||||
@@ -60,6 +62,7 @@ mod gitignore;
|
||||
mod glob;
|
||||
mod ignore;
|
||||
mod out;
|
||||
mod pathutil;
|
||||
mod printer;
|
||||
mod search_buffer;
|
||||
mod search_stream;
|
||||
@@ -97,8 +100,8 @@ fn run(args: Args) -> Result<u64> {
|
||||
let out = Arc::new(Mutex::new(args.out()));
|
||||
let mut workers = vec![];
|
||||
|
||||
let mut workq = {
|
||||
let (workq, stealer) = chase_lev::deque();
|
||||
let workq = {
|
||||
let (workq, stealer) = deque::new();
|
||||
for _ in 0..args.threads() {
|
||||
let worker = MultiWorker {
|
||||
chan_work: stealer.clone(),
|
||||
@@ -215,10 +218,10 @@ impl MultiWorker {
|
||||
fn run(mut self) -> u64 {
|
||||
loop {
|
||||
let work = match self.chan_work.steal() {
|
||||
Steal::Empty | Steal::Abort => continue,
|
||||
Steal::Data(Work::Quit) => break,
|
||||
Steal::Data(Work::Stdin) => WorkReady::Stdin,
|
||||
Steal::Data(Work::File(ent)) => {
|
||||
Stolen::Empty | Stolen::Abort => continue,
|
||||
Stolen::Data(Work::Quit) => break,
|
||||
Stolen::Data(Work::Stdin) => WorkReady::Stdin,
|
||||
Stolen::Data(Work::File(ent)) => {
|
||||
match File::open(ent.path()) {
|
||||
Ok(file) => WorkReady::DirFile(ent, file),
|
||||
Err(err) => {
|
||||
@@ -257,7 +260,7 @@ impl Worker {
|
||||
}
|
||||
WorkReady::DirFile(ent, file) => {
|
||||
let mut path = ent.path();
|
||||
if let Ok(p) = path.strip_prefix("./") {
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
if self.args.mmap() {
|
||||
@@ -268,7 +271,7 @@ impl Worker {
|
||||
}
|
||||
WorkReady::PathFile(path, file) => {
|
||||
let mut path = &*path;
|
||||
if let Ok(p) = path.strip_prefix("./") {
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
if self.args.mmap() {
|
||||
|
98
src/pathutil.rs
Normal file
98
src/pathutil.rs
Normal file
@@ -0,0 +1,98 @@
|
||||
/*!
|
||||
The pathutil module provides platform specific operations on paths that are
|
||||
typically faster than the same operations as provided in std::path. In
|
||||
particular, we really want to avoid the costly operation of parsing the path
|
||||
into its constituent components. We give up on Windows, but on Unix, we deal
|
||||
with the raw bytes directly.
|
||||
|
||||
On large repositories (like chromium), this can have a ~25% performance
|
||||
improvement on just listing the files to search (!).
|
||||
*/
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
use memchr::memrchr;
|
||||
|
||||
/// Strip `prefix` from the `path` and return the remainder.
|
||||
///
|
||||
/// If `path` doesn't have a prefix `prefix`, then return `None`.
|
||||
#[cfg(unix)]
|
||||
pub fn strip_prefix<'a, P: AsRef<Path>>(
|
||||
prefix: P,
|
||||
path: &'a Path,
|
||||
) -> Option<&'a Path> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
let prefix = prefix.as_ref().as_os_str().as_bytes();
|
||||
let path = path.as_os_str().as_bytes();
|
||||
if prefix.len() > path.len() || prefix != &path[0..prefix.len()] {
|
||||
None
|
||||
} else {
|
||||
Some(&Path::new(OsStr::from_bytes(&path[prefix.len()..])))
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip `prefix` from the `path` and return the remainder.
|
||||
///
|
||||
/// If `path` doesn't have a prefix `prefix`, then return `None`.
|
||||
#[cfg(not(unix))]
|
||||
pub fn strip_prefix<'a>(prefix: &Path, path: &'a Path) -> Option<&'a Path> {
|
||||
path.strip_prefix(prefix).ok()
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(unix)]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
if path.is_empty() {
|
||||
return None;
|
||||
} else if path.len() == 1 && path[0] == b'.' {
|
||||
return None;
|
||||
} else if path.last() == Some(&b'.') {
|
||||
return None;
|
||||
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
|
||||
return None;
|
||||
}
|
||||
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
|
||||
Some(OsStr::from_bytes(&path[last_slash..]))
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(not(unix))]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
path.as_ref().file_name()
|
||||
}
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
#[cfg(unix)]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
if let Some(name) = file_name(path.as_ref()) {
|
||||
name.as_bytes().get(0) == Some(&b'.')
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
#[cfg(not(unix))]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
if let Some(name) = file_name(path) {
|
||||
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
12
src/types.rs
12
src/types.rs
@@ -151,8 +151,8 @@ impl FileTypeDef {
|
||||
/// Types is a file type matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Types {
|
||||
selected: Option<glob::Set>,
|
||||
negated: Option<glob::Set>,
|
||||
selected: Option<glob::SetYesNo>,
|
||||
negated: Option<glob::SetYesNo>,
|
||||
has_selected: bool,
|
||||
unmatched_pat: Pattern,
|
||||
}
|
||||
@@ -165,8 +165,8 @@ impl Types {
|
||||
/// If has_selected is true, then at least one file type was selected.
|
||||
/// Therefore, any non-matches should be ignored.
|
||||
fn new(
|
||||
selected: Option<glob::Set>,
|
||||
negated: Option<glob::Set>,
|
||||
selected: Option<glob::SetYesNo>,
|
||||
negated: Option<glob::SetYesNo>,
|
||||
has_selected: bool,
|
||||
) -> Types {
|
||||
Types {
|
||||
@@ -268,7 +268,7 @@ impl TypesBuilder {
|
||||
try!(bset.add_with(glob, &opts));
|
||||
}
|
||||
}
|
||||
Some(try!(bset.build()))
|
||||
Some(try!(bset.build_yesno()))
|
||||
};
|
||||
let negated_globs =
|
||||
if self.negated.is_empty() {
|
||||
@@ -287,7 +287,7 @@ impl TypesBuilder {
|
||||
try!(bset.add_with(glob, &opts));
|
||||
}
|
||||
}
|
||||
Some(try!(bset.build()))
|
||||
Some(try!(bset.build_yesno()))
|
||||
};
|
||||
Ok(Types::new(
|
||||
selected_globs, negated_globs, !self.selected.is_empty()))
|
||||
|
@@ -26,6 +26,7 @@ impl Iter {
|
||||
}
|
||||
|
||||
/// Returns true if this entry should be skipped.
|
||||
#[inline(always)]
|
||||
fn skip_entry(&self, ent: &DirEntry) -> bool {
|
||||
if ent.depth() == 0 {
|
||||
// Never skip the root directory.
|
||||
@@ -41,6 +42,7 @@ impl Iter {
|
||||
impl Iterator for Iter {
|
||||
type Item = DirEntry;
|
||||
|
||||
#[inline(always)]
|
||||
fn next(&mut self) -> Option<DirEntry> {
|
||||
while let Some(ev) = self.it.next() {
|
||||
match ev {
|
||||
@@ -108,6 +110,7 @@ impl From<WalkDir> for WalkEventIter {
|
||||
impl Iterator for WalkEventIter {
|
||||
type Item = walkdir::Result<WalkEvent>;
|
||||
|
||||
#[inline(always)]
|
||||
fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
|
||||
let dent = self.next.take().or_else(|| self.it.next());
|
||||
let depth = match dent {
|
||||
|
Reference in New Issue
Block a user