Compare commits

...

27 Commits

Author SHA1 Message Date
Andrew Gallant
8f87a4e8ac 0.1.2 2016-09-17 11:36:11 -04:00
Andrew Gallant
d27d3e675f bump grep 2016-09-17 11:34:27 -04:00
Andrew Gallant
bf5d873099 grep 0.1.1 2016-09-17 11:32:47 -04:00
Andrew Gallant
bc9d12c4c8 Improve ergonomics of benchsuite.
The runner now detects if commands exist and permits running incomplete
benchmarks.

Also, explicitly use Python 3 since that's what default Ubuntu 16.04 seems
to want.
2016-09-17 11:30:01 -04:00
Andrew Gallant
5a0c873f61 Fixing, polishing and adding benchmarks. 2016-09-16 21:02:46 -04:00
Andrew Gallant
65fec147d6 rename 2016-09-16 18:27:34 -04:00
Andrew Gallant
7fbf2f014c Reorganize some files. 2016-09-16 18:22:35 -04:00
Andrew Gallant
d22a3ca3e5 Improve the "bad literal" error message.
Incidentally, this was done by using the Debug impl for `char` instead
of the Display impl. Cute.

Fixes #5.
2016-09-16 18:12:00 -04:00
Andrew Gallant
e9ec52b7f9 Update walkdir 2016-09-16 17:56:44 -04:00
Andrew Gallant
0d14c74e63 Some minor performance tweaks.
This includes moving basename-only globs into separate regexes. The hope
is that if the regex processes less input, it will be faster.
2016-09-16 16:13:28 -04:00
Andrew Gallant
1c5884b2f9 try again... 2016-09-16 07:12:06 -04:00
Andrew Gallant
8203a80ac7 fix tests 2016-09-16 06:58:10 -04:00
Andrew Gallant
0e46171e3b Rework glob sets.
We try to reduce the pressure on regexes and offload some of it to
Aho-Corasick or exact lookups.
2016-09-15 22:06:04 -04:00
Andrew Gallant
f5c85827ce Don't traverse directory stack if we don't need to. 2016-09-15 12:40:28 -04:00
Andrew Gallant
7cefc55238 Remove .agignore from ignore file list. 2016-09-15 12:40:08 -04:00
Andrew Gallant
92c918ebd9 --no-ignore implies --no-ignore-parent 2016-09-14 14:33:37 -04:00
Andrew Gallant
c24f8fd50f Replace crossbeam with deque.
deque appears faster.
2016-09-14 07:40:46 -04:00
Andrew Gallant
73272cf8a6 notice 2016-09-13 21:23:22 -04:00
Andrew Gallant
4212a8b9cb 0.1.1 2016-09-13 21:21:45 -04:00
Andrew Gallant
983c7fd6f9 We don't use thread_local any more, so remove it. 2016-09-13 21:21:36 -04:00
Andrew Gallant
7cd02e9b7e update Cargo.toml description 2016-09-13 21:16:29 -04:00
Andrew Gallant
5fdfae2f15 add readme 2016-09-13 21:15:10 -04:00
Andrew Gallant
7057ee91de update grep Cargo.toml 2016-09-13 21:13:33 -04:00
Andrew Gallant
fdca74148d Stream results when feasible.
For example, when only a single file (or stdin) is being searched, then we
should be able to print directly to the terminal instead of intermediate
buffers. (The buffers are only necessary for parallelism.)

Closes #4.
2016-09-13 21:11:46 -04:00
Andrew Gallant
f11d9fb922 Add a word benchmark.
Add ag to case insensitive benchmark.
2016-09-12 19:35:59 -04:00
Andrew Gallant
1115c23a4c fix typos 2016-09-11 19:50:16 -04:00
Andrew Gallant
8c5eaa40b2 teaser 2016-09-11 19:27:50 -04:00
28 changed files with 1544 additions and 999 deletions

70
Cargo.lock generated
View File

@@ -1,24 +1,24 @@
[root]
name = "ripgrep"
version = "0.1.0"
version = "0.1.2"
dependencies = [
"crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
"deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"docopt 0.6.83 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"grep 0.1.0",
"grep 0.1.1",
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
"memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"num_cpus 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.76 (registry+https://github.com/rust-lang/crates.io-index)",
"num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
"term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
"walkdir 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
"walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
@@ -31,9 +31,12 @@ dependencies = [
]
[[package]]
name = "crossbeam"
version = "0.2.10"
name = "deque"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "docopt"
@@ -41,20 +44,25 @@ version = "0.6.83"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.76 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
"strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "env_logger"
version = "0.3.4"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.76 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "fnv"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "fs2"
version = "0.2.5"
@@ -72,12 +80,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "grep"
version = "0.1.0"
version = "0.1.1"
dependencies = [
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
"memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.76 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
@@ -126,7 +134,15 @@ dependencies = [
[[package]]
name = "num_cpus"
version = "1.0.0"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "rand"
version = "0.3.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -134,14 +150,14 @@ dependencies = [
[[package]]
name = "regex"
version = "0.1.76"
version = "0.1.77"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
"utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
]
@@ -185,7 +201,7 @@ dependencies = [
[[package]]
name = "thread_local"
version = "0.2.6"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -198,7 +214,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "walkdir"
version = "0.1.6"
version = "0.1.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -217,9 +233,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
"checksum crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0c5ea215664ca264da8a9d9c3be80d2eaf30923c259d03e870388eb927508f97"
"checksum deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1614659040e711785ed8ea24219140654da1729f3ec8a47a9719d041112fe7bf"
"checksum docopt 0.6.83 (registry+https://github.com/rust-lang/crates.io-index)" = "fc42c6077823a361410c37d47c2535b73a190cbe10838dc4f400fe87c10c8c3b"
"checksum env_logger 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "82dcb9ceed3868a03b335657b85a159736c961900f7e7747d3b0b97b9ccb5ccb"
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
"checksum fnv 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8e8af7b5408ab0c4910cad114c8f9eb454bf75df7afe8964307eeafb68a13a5e"
"checksum fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "bcd414e5a1a979b931bb92f41b7a54106d3f6d2e6c253e9ce943b7cd468251ef"
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
@@ -228,16 +245,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
"checksum memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f20f72ed93291a72e22e8b16bb18762183bb4943f0f483da5b8be1a9e8192752"
"checksum num_cpus 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a859041cbf7a70ea1ece4b87d1a2c6ef364dcb68749c88db1f97304b9ec09d5f"
"checksum regex 0.1.76 (registry+https://github.com/rust-lang/crates.io-index)" = "63b49f873f36ddc838d773972511e5fed2ef7350885af07d58e2f48ce8073dcd"
"checksum num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8890e6084723d57d0df8d2720b0d60c6ee67d6c93e7169630e4371e88765dcad"
"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"
"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665"
"checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd"
"checksum rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "6159e4e6e559c81bd706afe9c8fd68f547d3e851ce12e76b1de7914bab61691b"
"checksum simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "63b5847c2d766ca7ce7227672850955802fabd779ba616aeabead4c2c3877023"
"checksum strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "50c069df92e4b01425a8bf3576d5d417943a6a7272fbabaf5bd80b1aaa76442e"
"checksum term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3deff8a2b3b6607d6d7cc32ac25c0b33709453ca9cceac006caac51e963cf94a"
"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
"checksum thread_local 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "55dd963dbaeadc08aa7266bf7f91c3154a7805e32bb94b820b769d2ef3b4744d"
"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
"checksum walkdir 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "d42144c31c9909882ce76e696b306b88a5b091721251137d5d522d1ef3da7cf9"
"checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780"
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"

View File

@@ -1,10 +1,10 @@
[package]
publish = false
name = "ripgrep"
version = "0.1.0" #:version
version = "0.1.2" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Line oriented search tool using Rust's regex library.
Line oriented search tool using Rust's regex library. Combines the raw
performance of grep with the usability of the silver searcher.
"""
documentation = "https://github.com/BurntSushi/ripgrep"
homepage = "https://github.com/BurntSushi/ripgrep"
@@ -23,10 +23,11 @@ name = "integration"
path = "tests/tests.rs"
[dependencies]
crossbeam = "0.2"
deque = "0.3"
docopt = "0.6"
env_logger = "0.3"
grep = { version = "0.1", path = "grep" }
fnv = "1.0"
grep = { version = "0.1.1", path = "grep" }
lazy_static = "0.2"
libc = "0.2"
log = "0.3"
@@ -36,7 +37,6 @@ num_cpus = "1"
regex = "0.1.76"
rustc-serialize = "0.3"
term = "0.4"
thread_local = "0.2"
walkdir = "0.1"
[target.'cfg(windows)'.dependencies]

View File

@@ -1,14 +0,0 @@
all:
echo Nothing to do...
ctags:
ctags --options=ctags.rust --languages=Rust src/*.rs src/*/*.rs
docs:
cargo doc
in-dir ./target/doc fix-perms
rscp ./target/doc/* gopher:~/www/burntsushi.net/rustdoc/
push:
git push origin master
git push github master

View File

@@ -1,5 +1,6 @@
rep
---
A faster `grep`, written in Rust using the
[`regex`](https://github.com/rust-lang-nursery/regex)
crate.
**UNDER DEVELOPMENT.**
ripgrep (rg)
------------
ripgrep combines the usability of the silver searcher with the raw speed of
grep.

5
benches/README.md Normal file
View File

@@ -0,0 +1,5 @@
These are internal microbenchmarks for tracking the peformance of individual
components inside of ripgrep. At the moment, they aren't heavily used.
For performance benchmarks of ripgrep proper, see the sibling `benchsuite`
directory.

View File

@@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
'''
benchsuite is a benchmark runner for comparing command line search tools.
@@ -10,6 +10,7 @@ import os
import os.path as path
from multiprocessing import cpu_count
import re
import shutil
import statistics
import subprocess
import sys
@@ -39,13 +40,23 @@ LINUX_CLONE = 'git://github.com/BurntSushi/linux'
GREP_ASCII = {'LC_ALL': 'C'}
GREP_UNICODE = {'LC_ALL': 'en_US.UTF-8'}
# Sift tries really hard to search everything by default. In our code search
# benchmarks, we don't want that.
SIFT = [
'sift',
'--binary-skip',
'--exclude-files', '.*',
'--exclude-files', '*.pdf',
]
def bench_linux_literal_default(suite_dir):
'''
Benchmark the speed of a literal using *default* settings.
This is a purposefully unfair benchmark for use in performance
analysis, but it is pedagogically useful.
analysis, but it is pedagogically useful to demonstrate how
default behaviors differ.
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
@@ -55,8 +66,6 @@ def bench_linux_literal_default(suite_dir):
kwargs['cwd'] = cwd
return Command(*args, **kwargs)
# N.B. This is a purposefully unfair benchmark for illustrative purposes
# of how the default modes for each search tool differ.
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', pat]),
mkcmd('ag', ['ag', pat]),
@@ -64,10 +73,12 @@ def bench_linux_literal_default(suite_dir):
# doesn't read gitignore files. Instead, it has a file whitelist
# that happens to match up exactly with the gitignores for this search.
mkcmd('ucg', ['ucg', pat]),
mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'C'}),
# I guess setting LC_ALL=en_US.UTF-8 probably isn't necessarily the
# default, but I'd guess it to be on most desktop systems.
mkcmd('pt', ['pt', pat]),
# sift reports an extra line here for a binary file matched.
mkcmd('sift', ['sift', pat]),
mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'en_US.UTF-8'}),
])
@@ -76,8 +87,9 @@ def bench_linux_literal(suite_dir):
Benchmark the speed of a literal, attempting to be fair.
This tries to use the minimum set of options available in all tools
to test how fast they are. For example, it makes sure there is no
case insensitive matching and that line numbers are computed.
to test how fast they are. For example, it makes sure there is
no case insensitive matching and that line numbers are computed
(because some tools don't permit disabling line numbers).
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
@@ -88,19 +100,16 @@ def bench_linux_literal(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]),
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
mkcmd('ag', ['ag', '-s', pat]),
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
mkcmd('git grep', [
mkcmd('rg (ignore)', ['rg', '-n', pat]),
mkcmd('rg (ignore) (mmap)', ['rg', '-n', '--mmap', pat]),
mkcmd('ag (ignore) (mmap)', ['ag', '-s', pat]),
mkcmd('pt (ignore)', ['pt', pat]),
mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
mkcmd('git grep (ignore)', [
'git', 'grep', '-I', '-n', pat,
], env={'LC_ALL': 'C'}),
mkcmd('pt', ['pt', pat]),
mkcmd('sift', [
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
]),
mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
])
@@ -120,23 +129,21 @@ def bench_linux_literal_casei(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', '-i', pat]),
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
mkcmd('rg-novcs-mmap', [
'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
]),
mkcmd('ag', ['ag', '-i', pat]),
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-i', pat]),
mkcmd('ucg', ['ucg', '-i', pat]),
mkcmd('git grep', [
mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]),
mkcmd('rg (ignore) (mmap)', ['rg', '-n', '-i', '--mmap', pat]),
mkcmd('ag (ignore) (mmap)', ['ag', '-i', pat]),
mkcmd('sift (ignore)', SIFT + ['-n', '-i', '--git', pat]),
# It'd technically be more appropriate to set LC_ALL=en_US.UTF-8 here,
# since that is certainly what ripgrep is doing, but this is for an
# ASCII literal, so we should give `git grep` all the opportunity to
# do its best.
mkcmd('git grep (ignore)', [
'git', 'grep', '-I', '-n', '-i', pat,
], env={'LC_ALL': 'C'}),
# sift yields more matches than it should here. Specifically, it gets
# matches in Module.symvers and System.map in the repo root. Both of
# those files show up in the repo root's .gitignore file.
mkcmd('sift', [
'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-i', pat,
mkcmd('rg (whitelist)', [
'rg', '-n', '-i', '--no-ignore', '-tall', pat,
]),
mkcmd('ucg (whitelist)', ['ucg', '-i', pat]),
])
@@ -156,20 +163,16 @@ def bench_linux_re_literal_suffix(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]),
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
mkcmd('ag', ['ag', '-s', pat]),
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
mkcmd('rg (ignore)', ['rg', '-n', pat]),
mkcmd('ag (ignore)', ['ag', '-s', pat]),
mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
mkcmd(
'git grep',
'git grep (ignore)',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
mkcmd('sift', [
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
]),
mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
])
@@ -189,22 +192,18 @@ def bench_linux_word(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', '-w', pat]),
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-w', pat]),
mkcmd('rg-novcs-mmap', [
'rg', '--mmap', '--no-ignore', '-n', '-w', pat,
]),
mkcmd('ag', ['ag', '-s', '-w', pat]),
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', '-w', pat]),
mkcmd('ucg', ['ucg', '--nosmart-case', '-w', pat]),
mkcmd('rg (ignore)', ['rg', '-n', '-w', pat]),
mkcmd('ag (ignore)', ['ag', '-s', '-w', pat]),
mkcmd('sift (ignore)', SIFT + ['-n', '-w', '--git', pat]),
mkcmd(
'git grep',
'git grep (ignore)',
['git', 'grep', '-E', '-I', '-n', '-w', pat],
env={'LC_ALL': 'C'},
),
mkcmd('sift', [
'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-w', pat,
mkcmd('rg (whitelist)', [
'rg', '-n', '-w', '--no-ignore', '-tall', pat,
]),
mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', '-w', pat]),
])
@@ -212,7 +211,8 @@ def bench_linux_unicode_greek(suite_dir):
'''
Benchmark matching of a Unicode category.
Only three tools (ripgrep, sift and pt) support this.
Only three tools (ripgrep, sift and pt) support this. We omit
pt because it is too slow.
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
@@ -224,15 +224,7 @@ def bench_linux_unicode_greek(suite_dir):
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]),
# sift tries to search a bunch of PDF files and clutters up the
# results, even though --binary-skip is provided. They are excluded
# here explicitly, but don't have a measurable impact on performance.
mkcmd('sift', [
'sift', '-n', '--binary-skip',
'--exclude-files', '.*',
'--exclude-files', '*.pdf',
pat,
]),
mkcmd('sift', SIFT + ['-n', '--git', pat]),
])
@@ -252,15 +244,7 @@ def bench_linux_unicode_greek_casei(suite_dir):
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', '-i', pat]),
# sift tries to search a bunch of PDF files and clutters up the
# results, even though --binary-skip is provided. They are excluded
# here explicitly, but don't have a measurable impact on performance.
mkcmd('sift', [
'sift', '-n', '--binary-skip',
'--exclude-files', '.*',
'--exclude-files', '*.pdf',
pat,
]),
mkcmd('sift', SIFT + ['-n', '-i', '--git', pat]),
])
@@ -281,30 +265,25 @@ def bench_linux_unicode_word(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]),
mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
mkcmd('rg-novcs-mmap', [
'rg', '--mmap', '--no-ignore', '-n', pat,
]),
mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
mkcmd('ag-novcs (no Unicode)', [
'ag', '--skip-vcs-ignores', '-s', pat,
]),
mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
mkcmd('rg (ignore)', ['rg', '-n', pat]),
mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', pat]),
mkcmd(
'git grep',
'git grep (ignore)',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'en_US.UTF-8'},
),
mkcmd(
'git grep (no Unicode)',
'git grep (ignore) (ASCII)',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
mkcmd('sift (no Unicode)', [
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
mkcmd('rg (whitelist) (ASCII)', [
'rg', '-n', '--no-ignore', '-tall', '(?-u)' + pat,
]),
mkcmd('ucg (ASCII)', ['ucg', '--nosmart-case', pat]),
])
@@ -326,30 +305,25 @@ def bench_linux_no_literal(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]),
mkcmd('rg-whitelist', ['rg', '-tall', '--no-ignore', '-n', pat]),
mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
mkcmd('rg-whitelist (no Unicode)', [
'rg', '-tall', '--no-ignore', '-n', '(?-u)' + pat,
]),
mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
mkcmd('ag-novcs (no Unicode)', [
'ag', '--skip-vcs-ignores', '-s', pat,
]),
mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
mkcmd('rg (ignore)', ['rg', '-n', pat]),
mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', pat]),
mkcmd(
'git grep',
'git grep (ignore)',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'en_US.UTF-8'},
),
mkcmd(
'git grep (no Unicode)',
'git grep (ignore) (ASCII)',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
mkcmd('sift (no Unicode)', [
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
mkcmd('rg (whitelist)', ['rg', '-n', '--no-ignore', '-tall', pat]),
mkcmd('rg (whitelist) (ASCII)', [
'rg', '-n', '--no-ignore', '-tall', '(?-u)' + pat,
]),
mkcmd('ucg (whitelist) (ASCII)', ['ucg', '--nosmart-case', pat]),
])
@@ -371,21 +345,15 @@ def bench_linux_alternates(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]),
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
mkcmd('rg-novcs-mmap', [
'rg', '--mmap', '--no-ignore', '-n', pat,
]),
mkcmd('ag', ['ag', '-s', pat]),
mkcmd('ag-novcs', [
'ag', '--skip-vcs-ignores', '-s', pat,
]),
mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
mkcmd('rg (ignore)', ['rg', '-n', pat]),
mkcmd('ag (ignore)', ['ag', '-s', pat]),
mkcmd(
'git grep',
'git grep (ignore)',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
mkcmd('rg (whitelist)', ['rg', '--no-ignore', '-n', pat]),
mkcmd('ucg (whitelist)', ['ucg', '--nosmart-case', pat]),
])
@@ -400,21 +368,15 @@ def bench_linux_alternates_casei(suite_dir):
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', '-i', pat]),
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
mkcmd('rg-novcs-mmap', [
'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
]),
mkcmd('ag', ['ag', '-i', pat]),
mkcmd('ag-novcs', [
'ag', '--skip-vcs-ignores', '-i', pat,
]),
mkcmd('ucg', ['ucg', '-i', pat]),
mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]),
mkcmd('ag (ignore)', ['ag', '-i', pat]),
mkcmd(
'git grep',
'git grep (ignore)',
['git', 'grep', '-E', '-I', '-n', '-i', pat],
env={'LC_ALL': 'C'},
),
mkcmd('rg (whitelist)', ['rg', '--no-ignore', '-n', '-i', pat]),
mkcmd('ucg (whitelist)', ['ucg', '-i', pat]),
])
@@ -423,22 +385,159 @@ def bench_subtitles_en_literal(suite_dir):
Benchmark the speed of an ASCII string literal.
'''
require(suite_dir, 'subtitles-en')
ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
pat = 'Sherlock Holmes'
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, ru]),
Command('rg (no line numbers)', ['rg', pat, ru]),
Command('ag', ['ag', '-s', pat, ru]),
Command('ucg', ['ucg', '--nosmart-case', pat, ru]),
Command('grep', ['grep', '-an', pat, ru], env=GREP_ASCII),
Command('grep (no line numbers)', [
'grep', '-a', pat, ru,
Command('rg', ['rg', pat, en]),
Command('pt', ['pt', '-N', pat, en]),
Command('sift', ['sift', pat, en]),
Command('grep', ['grep', '-a', pat, en], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', pat, en]),
Command('ag (lines)', ['ag', '-s', pat, en]),
Command('ucg (lines)', ['ucg', '--nosmart-case', pat, en]),
Command('pt (lines)', ['pt', pat, en]),
Command('sift (lines)', ['sift', '-n', pat, en]),
Command('grep (lines)', ['grep', '-an', pat, en], env=GREP_ASCII),
])
def bench_subtitles_en_literal_casei(suite_dir):
'''
Benchmark the speed of a Unicode-y string case insensitively.
'''
require(suite_dir, 'subtitles-en')
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
pat = 'Sherlock Holmes'
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-i', pat, en]),
Command('grep', ['grep', '-ai', pat, en], env=GREP_UNICODE),
Command('grep (ASCII)', [
'grep', '-E', '-ai', pat, en,
], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', '-i', pat, en]),
Command('ag (lines) (ASCII)', ['ag', '-i', pat, en]),
Command('ucg (lines) (ASCII)', ['ucg', '-i', pat, en]),
])
def bench_subtitles_en_literal_word(suite_dir):
'''
Benchmark the speed of finding a literal inside word boundaries.
'''
require(suite_dir, 'subtitles-en')
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
pat = 'Sherlock Holmes'
return Benchmark(pattern=pat, commands=[
Command('rg (ASCII)', [
'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', en,
]),
Command('ag (ASCII)', ['ag', '-sw', pat, en]),
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
Command('grep (ASCII)', [
'grep', '-anw', pat, en,
], env=GREP_ASCII),
Command('rg', ['rg', '-nw', pat, en]),
Command('grep', ['grep', '-anw', pat, en], env=GREP_UNICODE),
])
def bench_subtitles_en_alternate(suite_dir):
'''
Benchmark the speed of a set of alternate literals.
'''
require(suite_dir, 'subtitles-en')
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
pat = '|'.join([
'Sherlock Holmes',
'John Watson',
'Irene Adler',
'Inspector Lestrade',
'Professor Moriarty',
])
return Benchmark(pattern=pat, commands=[
Command('rg (lines)', ['rg', '-n', pat, en]),
Command('ag (lines)', ['ag', '-s', pat, en]),
Command('ucg (lines)', ['ucg', '--nosmart-case', pat, en]),
Command('grep (lines)', [
'grep', '-E', '-an', pat, en,
], env=GREP_ASCII),
Command('rg', ['rg', pat, en]),
Command('grep', [
'grep', '-E', '-a', pat, en,
], env=GREP_ASCII),
])
def bench_subtitles_en_alternate_casei(suite_dir):
'''
Benchmark the speed of a set of alternate literals.
'''
require(suite_dir, 'subtitles-en')
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
pat = '|'.join([
'Sherlock Holmes',
'John Watson',
'Irene Adler',
'Inspector Lestrade',
'Professor Moriarty',
])
return Benchmark(pattern=pat, commands=[
Command('ag (ASCII)', ['ag', '-s', '-i', pat, en]),
Command('ucg (ASCII)', ['ucg', '-i', pat, en]),
Command('grep (ASCII)', [
'grep', '-E', '-ani', pat, en,
], env=GREP_ASCII),
Command('rg', ['rg', '-n', '-i', pat, en]),
Command('grep', ['grep', '-E', '-ani', pat, en], env=GREP_UNICODE),
])
def bench_subtitles_en_surrounding_words(suite_dir):
'''
Benchmark a more complex regex with an inner literal.
'''
require(suite_dir, 'subtitles-en')
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
pat = r'\w+\s+Holmes\s+\w+'
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, en]),
Command('grep', ['grep', '-E', '-an', pat, en], env=GREP_UNICODE),
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
Command('ag (ASCII)', ['ag', '-s', pat, en]),
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
Command('grep (ASCII)', [
'grep', '-E', '-an', pat, en,
], env=GREP_ASCII),
])
def bench_subtitles_en_no_literal(suite_dir):
'''
Benchmark the speed of a regex with no literals.
Note that we don't even try to run grep with Unicode support
on this one. While it should eventually get the right answer,
I killed it after it had already been running for two minutes
and showed no signs of finishing soon.
'''
require(suite_dir, 'subtitles-en')
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME_SAMPLE)
pat = r'\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}'
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, en]),
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, en]),
Command('ag (ASCII)', ['ag', '-s', pat, en]),
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, en]),
Command('grep (ASCII)', [
'grep', '-E', '-an', pat, en,
], env=GREP_ASCII),
Command('pt', ['pt', pat, ru]),
Command('pt (no line numbers)', ['pt', '-N', pat, ru]),
Command('sift', ['sift', '-n', pat, ru]),
Command('sift (no line numbers)', ['sift', pat, ru]),
])
@@ -451,18 +550,16 @@ def bench_subtitles_ru_literal(suite_dir):
pat = 'Шерлок Холмс' # Sherlock Holmes
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, ru]),
Command('rg (no line numbers)', ['rg', pat, ru]),
Command('ag', ['ag', '-s', pat, ru]),
Command('ucg', ['ucg', '--nosmart-case', pat, ru]),
Command('grep', ['grep', '-an', pat, ru], env=GREP_ASCII),
Command('grep (no line numbers)', [
'grep', '-a', pat, ru,
], env=GREP_ASCII),
Command('pt', ['pt', pat, ru]),
Command('pt (no line numbers)', ['pt', '-N', pat, ru]),
Command('sift', ['sift', '-n', pat, ru]),
Command('sift (no line numbers)', ['sift', pat, ru]),
Command('rg', ['rg', pat, ru]),
Command('pt', ['pt', '-N', pat, ru]),
Command('sift', ['sift', pat, ru]),
Command('grep', ['grep', '-a', pat, ru], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', pat, ru]),
Command('ag (lines)', ['ag', '-s', pat, ru]),
Command('ucg (lines)', ['ucg', '--nosmart-case', pat, ru]),
Command('pt (lines)', ['pt', pat, ru]),
Command('sift (lines)', ['sift', '-n', pat, ru]),
Command('grep (lines)', ['grep', '-an', pat, ru], env=GREP_ASCII),
])
@@ -475,12 +572,36 @@ def bench_subtitles_ru_literal_casei(suite_dir):
pat = 'Шерлок Холмс' # Sherlock Holmes
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', '-i', pat, ru]),
Command('ucg (not Unicode)', ['ucg', '-i', pat, ru]),
Command('grep', ['grep', '-ani', pat, ru], env=GREP_UNICODE),
Command('grep (not Unicode)', [
'grep', '-E', '-ani', pat, ru,
Command('rg', ['rg', '-i', pat, ru]),
Command('grep', ['grep', '-ai', pat, ru], env=GREP_UNICODE),
Command('grep (ASCII)', [
'grep', '-E', '-ai', pat, ru,
], env=GREP_ASCII),
Command('rg (lines)', ['rg', '-n', '-i', pat, ru]),
Command('ag (lines) (ASCII)', ['ag', '-i', pat, ru]),
Command('ucg (lines) (ASCII)', ['ucg', '-i', pat, ru]),
])
def bench_subtitles_ru_literal_word(suite_dir):
'''
Benchmark the speed of finding a literal inside word boundaries.
'''
require(suite_dir, 'subtitles-ru')
ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_RU_NAME)
pat = 'Шерлок Холмс' # Sherlock Holmes
return Benchmark(pattern=pat, commands=[
Command('rg (ASCII)', [
'rg', '-n', r'(?-u:\b)' + pat + r'(?-u:\b)', ru,
]),
Command('ag (ASCII)', ['ag', '-sw', pat, ru]),
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
Command('grep (ASCII)', [
'grep', '-anw', pat, ru,
], env=GREP_ASCII),
Command('rg', ['rg', '-nw', pat, ru]),
Command('grep', ['grep', '-anw', pat, ru], env=GREP_UNICODE),
])
@@ -499,11 +620,14 @@ def bench_subtitles_ru_alternate(suite_dir):
])
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, ru]),
Command('rg (no line numbers)', ['rg', pat, ru]),
Command('ucg', ['ucg', '--nosmart-case', pat, ru]),
Command('grep', ['grep', '-E', '-an', pat, ru], env=GREP_ASCII),
Command('grep (no line numbers)', [
Command('rg (lines)', ['rg', '-n', pat, ru]),
Command('ag (lines)', ['ag', '-s', pat, ru]),
Command('ucg (lines)', ['ucg', '--nosmart-case', pat, ru]),
Command('grep (lines)', [
'grep', '-E', '-an', pat, ru,
], env=GREP_ASCII),
Command('rg', ['rg', pat, ru]),
Command('grep', [
'grep', '-E', '-a', pat, ru,
], env=GREP_ASCII),
])
@@ -524,12 +648,32 @@ def bench_subtitles_ru_alternate_casei(suite_dir):
])
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', '-i', pat, ru]),
Command('ucg (not Unicode)', ['ucg', '-i', pat, ru]),
Command('grep', ['grep', '-E', '-ani', pat, ru], env=GREP_UNICODE),
Command('grep (not Unicode)', [
Command('ag (ASCII)', ['ag', '-s', '-i', pat, ru]),
Command('ucg (ASCII)', ['ucg', '-i', pat, ru]),
Command('grep (ASCII)', [
'grep', '-E', '-ani', pat, ru,
], env=GREP_ASCII),
Command('rg', ['rg', '-n', '-i', pat, ru]),
Command('grep', ['grep', '-E', '-ani', pat, ru], env=GREP_UNICODE),
])
def bench_subtitles_ru_surrounding_words(suite_dir):
'''
Benchmark a more complex regex with an inner literal.
'''
require(suite_dir, 'subtitles-en')
ru = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_RU_NAME)
pat = r'\w+\s+Холмс\s+\w+'
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, ru]),
Command('grep', ['grep', '-E', '-an', pat, ru], env=GREP_UNICODE),
Command('ag (ASCII)', ['ag', '-s', pat, ru]),
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
Command('grep (ASCII)', [
'grep', '-E', '-an', pat, ru,
], env=GREP_ASCII),
])
@@ -548,9 +692,10 @@ def bench_subtitles_ru_no_literal(suite_dir):
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', '-n', pat, ru]),
Command('rg (no line numbers)', ['rg', pat, ru]),
Command('ucg (no Unicode)', ['ucg', '--nosmart-case', pat, ru]),
Command('grep (no Unicode)', [
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, ru]),
Command('ag (ASCII)', ['ag', '-s', pat, ru]),
Command('ucg (ASCII)', ['ucg', '--nosmart-case', pat, ru]),
Command('grep (ASCII)', [
'grep', '-E', '-an', pat, ru,
], env=GREP_ASCII),
])
@@ -574,6 +719,23 @@ class MissingDependencies(Exception):
return 'MissingDependency(%s)' % repr(self.missing_names)
class MissingCommands(Exception):
'''
A missing command exception.
This exception occurs when running a command in a benchmark
where the command could not be found on the current system.
:ivar list(str) missing_names:
The names of the command binaries that could not be found.
'''
def __init__(self, missing_names):
self.missing_names = sorted(set(missing_names))
def __str__(self):
return 'MissingCommands(%s)' % repr(self.missing_names)
class Benchmark(object):
'''
A single benchmark corresponding to a grouping of commands.
@@ -583,7 +745,8 @@ class Benchmark(object):
'''
def __init__(self, name=None, pattern=None, commands=None,
warmup_count=1, count=3, line_count=True):
warmup_count=1, count=3, line_count=True,
allow_missing_commands=False):
'''
Create a single benchmark.
@@ -621,15 +784,37 @@ class Benchmark(object):
self.warmup_count = warmup_count
self.count = count
self.line_count = line_count
self.allow_missing_commands = allow_missing_commands
def raise_if_missing(self):
'''
Raises a MissingCommands exception if applicable.
A MissingCommands exception is raised when the following
criteria are met: 1) allow_missing_commands is False, and 2) at
least one command in this benchmark could not be found on this
system.
'''
missing_commands = \
[c.binary_name for c in self.commands if not c.exists()]
if not self.allow_missing_commands and len(missing_commands) > 0:
raise MissingCommands(missing_commands)
def run(self):
'''
Runs this benchmark and returns the results.
:rtype: Result
:raises:
MissingCommands if any command doesn't exist.
(Unless allow_missing_commands is enabled.)
'''
self.raise_if_missing()
result = Result(self)
for cmd in self.commands:
if self.allow_missing_commands and not cmd.exists():
# Skip this command if we're OK with it.
continue
# Do a warmup first.
for _ in range(self.warmup_count):
self.run_one(cmd)
@@ -654,6 +839,8 @@ class Benchmark(object):
it is the number of lines in the search output.
:rtype: int
'''
if not cmd.exists():
raise MissingCommand(cmd.cmd[0])
cmd.kwargs['stderr'] = subprocess.DEVNULL
if self.line_count:
cmd.kwargs['stdout'] = subprocess.PIPE
@@ -723,6 +910,8 @@ class Result(object):
means = []
for cmd in self.benchmark.commands:
mean, _ = self.distribution_for(cmd)
if mean is None:
continue
means.append((cmd, mean))
return min(means, key=lambda tup: tup[1])[0]
@@ -745,16 +934,18 @@ class Result(object):
'''
Returns the distribution (mean +/- std) of the given command.
If there are no samples for this command (i.e., it was skipped),
then return ``(None, None)``.
:rtype: (float, float)
:returns:
A tuple containing the mean and standard deviation, in that
order.
'''
mean = statistics.mean(
s['duration'] for s in self.samples_for(cmd))
stdev = statistics.stdev(
s['duration'] for s in self.samples_for(cmd))
return mean, stdev
samples = list(s['duration'] for s in self.samples_for(cmd))
if len(samples) == 0:
return None, None
return statistics.mean(samples), statistics.stdev(samples)
class Command(object):
@@ -784,6 +975,15 @@ class Command(object):
self.args = args
self.kwargs = kwargs
def exists(self):
'Returns true if and only if this command exists.'
return shutil.which(self.binary_name) is not None
@property
def binary_name(self):
'Return the binary name of this command.'
return self.cmd[0]
def run(self):
'''
Runs this command and returns its status.
@@ -924,7 +1124,8 @@ def download(suite_dir, choices):
sys.exit(1)
def collect_benchmarks(suite_dir, filter_pat=None):
def collect_benchmarks(suite_dir, filter_pat=None,
allow_missing_commands=False):
'''
Return an iterable of all runnable benchmarks.
@@ -946,6 +1147,9 @@ def collect_benchmarks(suite_dir, filter_pat=None):
continue
try:
benchmark = globals()[fun](suite_dir)
benchmark.name = name
benchmark.allow_missing_commands = allow_missing_commands
benchmark.raise_if_missing()
except MissingDependencies as e:
eprint(
'missing: %s, skipping benchmark %s (try running with: %s)' % (
@@ -953,24 +1157,32 @@ def collect_benchmarks(suite_dir, filter_pat=None):
name,
' '.join(['--download %s' % n for n in e.missing_names]),
))
except MissingCommands as e:
fmt = 'missing commands: %s, skipping benchmark %s ' \
'(run with --allow-missing to run incomplete benchmarks)'
eprint(fmt % (', '.join(e.missing_names), name))
continue
benchmark.name = name
yield benchmark
def main():
download_choices = ['all', 'linux', 'subtitles-en', 'subtitles-ru']
p = argparse.ArgumentParser('Command line search tool benchmark suite.')
p.add_argument(
'--dir', metavar='PATH', default=os.getcwd(),
help='The directory in which to download data and perform searches.')
p.add_argument(
'--download', metavar='CORPUS', action='append',
choices=['all', 'linux', 'subtitles-en', 'subtitles-ru'],
choices=download_choices,
help='Download and prepare corpus data, then exit without running '
'any benchmarks. Note that this command is intended to be '
'idempotent. WARNING: This downloads over a gigabyte of data, '
'and also includes building the Linux kernel. If "all" is used '
'then the total uncompressed size is around 13 GB.')
'then the total uncompressed size is around 13 GB. '
'Choices: %s' % ', '.join(download_choices))
p.add_argument(
'--allow-missing', action='store_true',
help='Permit benchmarks to run even if some commands are missing.')
p.add_argument(
'-f', '--force', action='store_true',
help='Overwrite existing files if there is a conflict.')
@@ -986,6 +1198,13 @@ def main():
help='A regex pattern that will only run benchmarks that match.')
args = p.parse_args()
if args.list:
benchmarks = collect_benchmarks(
args.dir, filter_pat=args.bench,
allow_missing_commands=args.allow_missing)
for b in benchmarks:
print(b.name)
sys.exit(0)
if args.download is not None and len(args.download) > 0:
download(args.dir, args.download)
sys.exit(0)
@@ -1005,7 +1224,9 @@ def main():
raw_csv_wtr = csv.DictWriter(raw_handle, fields)
raw_csv_wtr.writerow({x: x for x in fields})
benchmarks = collect_benchmarks(args.dir, filter_pat=args.bench)
benchmarks = collect_benchmarks(
args.dir, filter_pat=args.bench,
allow_missing_commands=args.allow_missing)
for i, b in enumerate(benchmarks):
result = b.run()
fastest_cmd = result.fastest_cmd()
@@ -1019,6 +1240,12 @@ def main():
for cmd in b.commands:
name = cmd.name
mean, stdev = result.distribution_for(cmd)
if mean is None:
# If we couldn't get a distribution for this command then
# it was skipped.
print('{name:{pad}} SKIPPED'.format(
name=name, pad=max_name_len + 2))
continue
line_counts = result.line_counts_for(cmd)
show_fast_cmd, show_line_counts = '', ''
if fastest_cmd.name == cmd.name:

View File

@@ -1,11 +0,0 @@
--langdef=Rust
--langmap=Rust:.rs
--regex-Rust=/^[ \t]*(#\[[^\]]\][ \t]*)*(pub[ \t]+)?(extern[ \t]+)?("[^"]+"[ \t]+)?(unsafe[ \t]+)?fn[ \t]+([a-zA-Z0-9_]+)/\6/f,functions,function definitions/
--regex-Rust=/^[ \t]*(pub[ \t]+)?type[ \t]+([a-zA-Z0-9_]+)/\2/T,types,type definitions/
--regex-Rust=/^[ \t]*(pub[ \t]+)?enum[ \t]+([a-zA-Z0-9_]+)/\2/g,enum,enumeration names/
--regex-Rust=/^[ \t]*(pub[ \t]+)?struct[ \t]+([a-zA-Z0-9_]+)/\2/s,structure names/
--regex-Rust=/^[ \t]*(pub[ \t]+)?mod[ \t]+([a-zA-Z0-9_]+)/\2/m,modules,module names/
--regex-Rust=/^[ \t]*(pub[ \t]+)?static[ \t]+([a-zA-Z0-9_]+)/\2/c,consts,static constants/
--regex-Rust=/^[ \t]*(pub[ \t]+)?trait[ \t]+([a-zA-Z0-9_]+)/\2/t,traits,traits/
--regex-Rust=/^[ \t]*(pub[ \t]+)?impl([ \t\n]+<.*>)?[ \t]+([a-zA-Z0-9_]+)/\3/i,impls,trait implementations/
--regex-Rust=/^[ \t]*macro_rules![ \t]+([a-zA-Z0-9_]+)/\1/d,macros,macro definitions/

View File

@@ -1,12 +1,11 @@
[package]
publish = false
name = "grep"
version = "0.1.0" #:version
version = "0.1.1" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Fast line oriented regex searching as a library.
"""
documentation = "https://github.com/BurntSushi/ripgrep"
documentation = "http://burntsushi.net/rustdoc/grep/"
homepage = "https://github.com/BurntSushi/ripgrep"
repository = "https://github.com/BurntSushi/ripgrep"
readme = "README.md"

4
grep/README.md Normal file
View File

@@ -0,0 +1,4 @@
grep
----
This is a *library* that provides grep-style line-by-line regex searching (with
comparable performance to `grep` itself).

View File

@@ -62,7 +62,7 @@ impl fmt::Display for Error {
match *self {
Error::Regex(ref err) => err.fmt(f),
Error::LiteralNotAllowed(chr) => {
write!(f, "Literal '{}' not allowed.", chr)
write!(f, "Literal {:?} not allowed.", chr)
}
Error::__Nonexhaustive => unreachable!(),
}

View File

@@ -10,6 +10,10 @@ use {Error, Result};
/// If `byte` is not an ASCII character (i.e., greater than `0x7F`), then this
/// function panics.
pub fn remove(expr: Expr, byte: u8) -> Result<Expr> {
// TODO(burntsushi): There is a bug in this routine where only `\n` is
// handled correctly. Namely, `AnyChar` and `AnyByte` need to be translated
// to proper character classes instead of the special `AnyCharNoNL` and
// `AnyByteNoNL` classes.
use syntax::Expr::*;
assert!(byte <= 0x7F);
let chr = byte as char;

View File

@@ -1 +0,0 @@
au BufWritePost *.rs silent!make ctags > /dev/null 2>&1

View File

@@ -9,16 +9,20 @@ use grep::{Grep, GrepBuilder};
use log;
use num_cpus;
use regex;
use term::Terminal;
use term::{self, Terminal};
#[cfg(windows)]
use term::WinConsole;
use walkdir::WalkDir;
use atty;
use gitignore::{Gitignore, GitignoreBuilder};
use ignore::Ignore;
use out::{Out, OutBuffer};
use out::{Out, ColoredTerminal};
use printer::Printer;
use search_buffer::BufferSearcher;
use search_stream::{InputBuffer, Searcher};
#[cfg(windows)]
use terminal_win::WindowsBuffer;
use types::{FileTypeDef, Types, TypesBuilder};
use walk;
@@ -120,6 +124,7 @@ Less common options:
--no-ignore
Don't respect ignore files (.gitignore, .rgignore, etc.)
This implies --no-ignore-parent.
--no-ignore-parent
Don't respect ignore files in parent directories.
@@ -334,7 +339,9 @@ impl RawArgs {
line_number: !self.flag_no_line_number && self.flag_line_number,
mmap: mmap,
no_ignore: self.flag_no_ignore,
no_ignore_parent: self.flag_no_ignore_parent,
no_ignore_parent:
// --no-ignore implies --no-ignore-parent
self.flag_no_ignore_parent || self.flag_no_ignore,
quiet: self.flag_quiet,
replace: self.flag_replace.clone().map(|s| s.into_bytes()),
text: self.flag_text,
@@ -442,7 +449,7 @@ impl Args {
/// Create a new printer of individual search results that writes to the
/// writer given.
pub fn printer<W: Send + Terminal>(&self, wtr: W) -> Printer<W> {
pub fn printer<W: Terminal + Send>(&self, wtr: W) -> Printer<W> {
let mut p = Printer::new(wtr)
.column(self.column)
.context_separator(self.context_separator.clone())
@@ -469,8 +476,29 @@ impl Args {
}
/// Create a new buffer for use with searching.
pub fn outbuf(&self) -> OutBuffer {
OutBuffer::new(self.color)
#[cfg(not(windows))]
pub fn outbuf(&self) -> ColoredTerminal<term::TerminfoTerminal<Vec<u8>>> {
ColoredTerminal::new(vec![], self.color)
}
/// Create a new buffer for use with searching.
#[cfg(windows)]
pub fn outbuf(&self) -> ColoredTerminal<WindowsBuffer> {
ColoredTerminal::new_buffer(self.color)
}
/// Create a new buffer for use with searching.
#[cfg(not(windows))]
pub fn stdout(
&self,
) -> ColoredTerminal<term::TerminfoTerminal<io::BufWriter<io::Stdout>>> {
ColoredTerminal::new(io::BufWriter::new(io::stdout()), self.color)
}
/// Create a new buffer for use with searching.
#[cfg(windows)]
pub fn stdout(&self) -> ColoredTerminal<WinConsole<io::Stdout>> {
ColoredTerminal::new_stdout(self.color)
}
/// Return the paths that should be searched.

View File

@@ -21,7 +21,7 @@ additional rules such as whitelists (prefix of `!`) or directory-only globs
// TODO(burntsushi): Implement something similar, but for Mercurial. We can't
// use this exact implementation because hgignore files are different.
use std::env;
use std::cell::RefCell;
use std::error::Error as StdError;
use std::fmt;
use std::fs::File;
@@ -31,6 +31,7 @@ use std::path::{Path, PathBuf};
use regex;
use glob;
use pathutil::strip_prefix;
/// Represents an error that can occur when parsing a gitignore file.
#[derive(Debug)]
@@ -89,21 +90,10 @@ pub struct Gitignore {
}
impl Gitignore {
/// Create a new gitignore glob matcher from the gitignore file at the
/// given path. The root of the gitignore file is the basename of path.
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Gitignore, Error> {
let root = match path.as_ref().parent() {
Some(parent) => parent.to_path_buf(),
None => env::current_dir().unwrap_or(Path::new("/").to_path_buf()),
};
let mut builder = GitignoreBuilder::new(root);
try!(builder.add_path(path));
builder.build()
}
/// Create a new gitignore glob matcher from the given root directory and
/// string containing the contents of a gitignore file.
pub fn from_str<P: AsRef<Path>>(
#[allow(dead_code)]
fn from_str<P: AsRef<Path>>(
root: P,
gitignore: &str,
) -> Result<Gitignore, Error> {
@@ -122,48 +112,43 @@ impl Gitignore {
/// same directory as this gitignore file.
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
let mut path = path.as_ref();
if let Ok(p) = path.strip_prefix(&self.root) {
if let Some(p) = strip_prefix("./", path) {
path = p;
}
self.matched_utf8(&*path.to_string_lossy(), is_dir)
if let Some(p) = strip_prefix(&self.root, path) {
path = p;
}
self.matched_stripped(path, is_dir)
}
/// Like matched, but takes a path that has already been stripped and
/// converted to UTF-8.
pub fn matched_utf8(&self, path: &str, is_dir: bool) -> Match {
// A single regex with a bunch of alternations of glob patterns is
// unfortunately typically faster than a regex, so we use it as a
// first pass filter. We still need to run the RegexSet to get the most
// recently defined glob that matched.
if !self.set.is_match(path) {
return Match::None;
}
// The regex set can't actually pick the right glob that matched all
// on its own. In particular, some globs require that only directories
// can match. Thus, only accept a match from the regex set if the given
// path satisfies the corresponding glob's directory criteria.
for i in self.set.matches(path).iter().rev() {
let pat = &self.patterns[i];
if !pat.only_dir || is_dir {
return if pat.whitelist {
Match::Whitelist(pat)
} else {
Match::Ignored(pat)
};
/// Like matched, but takes a path that has already been stripped.
pub fn matched_stripped(&self, path: &Path, is_dir: bool) -> Match {
thread_local! {
static MATCHES: RefCell<Vec<usize>> = {
RefCell::new(vec![])
}
}
Match::None
};
MATCHES.with(|matches| {
let mut matches = matches.borrow_mut();
self.set.matches_into(path, &mut *matches);
for &i in matches.iter().rev() {
let pat = &self.patterns[i];
if !pat.only_dir || is_dir {
return if pat.whitelist {
Match::Whitelist(pat)
} else {
Match::Ignored(pat)
};
}
}
Match::None
})
}
/// Returns the total number of ignore patterns.
pub fn num_ignores(&self) -> u64 {
self.num_ignores
}
/// Returns the total number of whitelisted patterns.
pub fn num_whitelist(&self) -> u64 {
self.num_whitelist
}
}
/// The result of a glob match.
@@ -182,6 +167,7 @@ pub enum Match<'a> {
impl<'a> Match<'a> {
/// Returns true if the match result implies the path should be ignored.
#[allow(dead_code)]
pub fn is_ignored(&self) -> bool {
match *self {
Match::Ignored(_) => true,
@@ -406,6 +392,7 @@ mod tests {
ignored!(ig23, ROOT, "foo", "./foo");
ignored!(ig24, ROOT, "target", "grep/target");
ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz");
not_ignored!(ignot1, ROOT, "amonths", "months");
not_ignored!(ignot2, ROOT, "monthsa", "months");

View File

@@ -26,13 +26,22 @@ to make its way into `glob` proper.
// at the .gitignore for the chromium repo---just about every pattern satisfies
// that assumption.)
use std::borrow::Cow;
use std::collections::HashMap;
use std::error::Error as StdError;
use std::ffi::{OsStr, OsString};
use std::fmt;
use std::hash;
use std::iter;
use std::path::Path;
use std::str;
use fnv;
use regex;
use regex::bytes::{Regex, RegexSet, SetMatches};
use regex::bytes::Regex;
use regex::bytes::RegexSet;
use pathutil::file_name;
/// Represents an error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
@@ -71,32 +80,181 @@ impl fmt::Display for Error {
}
}
/// SetYesNo represents a group of globs that can be matched together in a
/// single pass. SetYesNo can only determine whether a particular path matched
/// any pattern in the set.
#[derive(Clone, Debug)]
pub struct SetYesNo {
re: Regex,
}
impl SetYesNo {
/// Returns true if and only if the given path matches at least one glob
/// in this set.
pub fn is_match<T: AsRef<Path>>(&self, path: T) -> bool {
self.re.is_match(&*path_bytes(path.as_ref()))
}
fn new(
pats: &[(Pattern, MatchOptions)],
) -> Result<SetYesNo, regex::Error> {
let mut joined = String::new();
for &(ref p, ref o) in pats {
let part = format!("(?:{})", p.to_regex_with(o));
if !joined.is_empty() {
joined.push('|');
}
joined.push_str(&part);
}
Ok(SetYesNo { re: try!(Regex::new(&joined)) })
}
}
type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
/// Set represents a group of globs that can be matched together in a single
/// pass.
#[derive(Clone, Debug)]
pub struct Set {
re: Regex,
set: RegexSet,
yesno: SetYesNo,
exts: HashMap<OsString, Vec<usize>, Fnv>,
literals: HashMap<Vec<u8>, Vec<usize>, Fnv>,
base_literals: HashMap<Vec<u8>, Vec<usize>, Fnv>,
base_prefixes: Vec<Vec<u8>>,
base_prefixes_map: Vec<usize>,
base_suffixes: Vec<Vec<u8>>,
base_suffixes_map: Vec<usize>,
base_regexes: RegexSet,
base_regexes_map: Vec<usize>,
regexes: RegexSet,
regexes_map: Vec<usize>,
}
impl Set {
/// Returns true if and only if the given path matches at least one glob
/// in this set.
pub fn is_match<T: AsRef<[u8]>>(&self, path: T) -> bool {
self.re.is_match(path.as_ref())
/// Returns the sequence number of every glob pattern that matches the
/// given path.
#[allow(dead_code)]
pub fn matches<T: AsRef<Path>>(&self, path: T) -> Vec<usize> {
let mut into = vec![];
self.matches_into(path, &mut into);
into
}
/// Returns every glob pattern (by sequence number) that matches the given
/// path.
pub fn matches<T: AsRef<[u8]>>(&self, path: T) -> SetMatches {
// TODO(burntsushi): If we split this out into a separate crate, don't
// expose the regex::SetMatches type in the public API.
self.set.matches(path.as_ref())
/// Adds the sequence number of every glob pattern that matches the given
/// path to the vec given.
pub fn matches_into<T: AsRef<Path>>(
&self,
path: T,
into: &mut Vec<usize>,
) {
into.clear();
let path = path.as_ref();
let path_bytes = &*path_bytes(path);
let basename = file_name(path).map(|b| os_str_bytes(b));
if !self.yesno.is_match(path) {
return;
}
if !self.exts.is_empty() {
if let Some(ext) = path.extension() {
if let Some(matches) = self.exts.get(ext) {
into.extend(matches.as_slice());
}
}
}
if !self.literals.is_empty() {
if let Some(matches) = self.literals.get(path_bytes) {
into.extend(matches.as_slice());
}
}
if !self.base_literals.is_empty() {
if let Some(ref basename) = basename {
if let Some(matches) = self.base_literals.get(&**basename) {
into.extend(matches.as_slice());
}
}
}
if !self.base_prefixes.is_empty() {
if let Some(ref basename) = basename {
let basename = &**basename;
for (i, pre) in self.base_prefixes.iter().enumerate() {
if pre.len() <= basename.len() && &**pre == &basename[0..pre.len()] {
into.push(self.base_prefixes_map[i]);
}
}
}
}
if !self.base_suffixes.is_empty() {
if let Some(ref basename) = basename {
let basename = &**basename;
for (i, suf) in self.base_suffixes.iter().enumerate() {
if suf.len() > basename.len() {
continue;
}
let (s, e) = (basename.len() - suf.len(), basename.len());
if &**suf == &basename[s..e] {
into.push(self.base_suffixes_map[i]);
}
}
}
}
if let Some(ref basename) = basename {
for i in self.base_regexes.matches(&**basename) {
into.push(self.base_regexes_map[i]);
}
}
for i in self.regexes.matches(path_bytes) {
into.push(self.regexes_map[i]);
}
into.sort();
}
/// Returns the number of glob patterns in this set.
pub fn len(&self) -> usize {
self.set.len()
fn new(pats: &[(Pattern, MatchOptions)]) -> Result<Set, regex::Error> {
let fnv = Fnv::default();
let mut exts = HashMap::with_hasher(fnv.clone());
let mut literals = HashMap::with_hasher(fnv.clone());
let mut base_literals = HashMap::with_hasher(fnv.clone());
let (mut base_prefixes, mut base_prefixes_map) = (vec![], vec![]);
let (mut base_suffixes, mut base_suffixes_map) = (vec![], vec![]);
let (mut regexes, mut regexes_map) = (vec![], vec![]);
let (mut base_regexes, mut base_regexes_map) = (vec![], vec![]);
for (i, &(ref p, ref o)) in pats.iter().enumerate() {
if let Some(ext) = p.ext() {
exts.entry(ext).or_insert(vec![]).push(i);
} else if let Some(literal) = p.literal() {
literals.entry(literal.into_bytes()).or_insert(vec![]).push(i);
} else if let Some(literal) = p.base_literal() {
base_literals
.entry(literal.into_bytes()).or_insert(vec![]).push(i);
} else if let Some(literal) = p.base_literal_prefix() {
base_prefixes.push(literal.into_bytes());
base_prefixes_map.push(i);
} else if let Some(literal) = p.base_literal_suffix() {
base_suffixes.push(literal.into_bytes());
base_suffixes_map.push(i);
} else if p.is_only_basename() {
let part = format!("(?:{})", p.to_regex_with(o));
base_regexes.push(part);
base_regexes_map.push(i);
} else {
let part = format!("(?:{})", p.to_regex_with(o));
regexes.push(part);
regexes_map.push(i);
}
}
Ok(Set {
yesno: try!(SetYesNo::new(pats)),
exts: exts,
literals: literals,
base_literals: base_literals,
base_prefixes: base_prefixes,
base_prefixes_map: base_prefixes_map,
base_suffixes: base_suffixes,
base_suffixes_map: base_suffixes_map,
base_regexes: try!(RegexSet::new(base_regexes)),
base_regexes_map: base_regexes_map,
regexes: try!(RegexSet::new(regexes)),
regexes_map: regexes_map,
})
}
}
@@ -118,25 +276,19 @@ impl SetBuilder {
///
/// Once a matcher is built, no new patterns can be added to it.
pub fn build(&self) -> Result<Set, regex::Error> {
let it = self.pats.iter().map(|&(ref p, ref o)| p.to_regex_with(o));
let set = try!(RegexSet::new(it));
Set::new(&self.pats)
}
let mut joined = String::new();
for &(ref p, ref o) in &self.pats {
let part = format!("(?:{})", p.to_regex_with(o));
if !joined.is_empty() {
joined.push('|');
}
joined.push_str(&part);
}
let re = try!(Regex::new(&joined));
Ok(Set { re: re, set: set })
/// Like `build`, but returns a matcher that can only answer yes/no.
pub fn build_yesno(&self) -> Result<SetYesNo, regex::Error> {
SetYesNo::new(&self.pats)
}
/// Add a new pattern to this set.
///
/// If the pattern could not be parsed as a glob, then an error is
/// returned.
#[allow(dead_code)]
pub fn add(&mut self, pat: &str) -> Result<(), Error> {
self.add_with(pat, &MatchOptions::default())
}
@@ -147,8 +299,21 @@ impl SetBuilder {
pat: &str,
opts: &MatchOptions,
) -> Result<(), Error> {
let pat = try!(Pattern::new(pat));
self.pats.push((pat, opts.clone()));
let parsed = try!(Pattern::new(pat));
// if let Some(ext) = parsed.ext() {
// eprintln!("ext :: {:?} :: {:?}", ext, pat);
// } else if let Some(lit) = parsed.literal() {
// eprintln!("literal :: {:?} :: {:?}", lit, pat);
// } else if let Some(lit) = parsed.base_literal() {
// eprintln!("base_literal :: {:?} :: {:?}", lit, pat);
// } else if let Some(lit) = parsed.base_literal_prefix() {
// eprintln!("base_literal :: {:?} :: {:?}", lit, pat);
// } else if let Some(lit) = parsed.base_literal_suffix() {
// eprintln!("base_literal :: {:?} :: {:?}", lit, pat);
// } else {
// eprintln!("regex :: {:?} :: {:?}", pat, parsed);
// }
self.pats.push((parsed, opts.clone()));
Ok(())
}
}
@@ -202,9 +367,137 @@ impl Pattern {
Ok(p.p)
}
/// Returns an extension if this pattern exclusively matches it.
pub fn ext(&self) -> Option<OsString> {
if self.tokens.len() <= 3 {
return None;
}
match self.tokens.get(0) {
Some(&Token::RecursivePrefix) => {}
_ => return None,
}
match self.tokens.get(1) {
Some(&Token::ZeroOrMore) => {}
_ => return None,
}
match self.tokens.get(2) {
Some(&Token::Literal(c)) if c == '.' => {}
_ => return None,
}
let mut lit = OsString::new();
for t in self.tokens[3..].iter() {
match *t {
Token::Literal(c) if c == '/' || c == '\\' || c == '.' => {
return None;
}
Token::Literal(c) => lit.push(c.to_string()),
_ => return None,
}
}
Some(lit)
}
/// Returns the pattern as a literal if and only if the pattern exclusiely
/// matches the basename of a file path *and* is a literal.
///
/// The basic format of these patterns is `**/{literal}`, where `{literal}`
/// does not contain a path separator.
pub fn base_literal(&self) -> Option<String> {
match self.tokens.get(0) {
Some(&Token::RecursivePrefix) => {}
_ => return None,
}
let mut lit = String::new();
for t in &self.tokens[1..] {
match *t {
Token::Literal(c) if c == '/' || c == '\\' => return None,
Token::Literal(c) => lit.push(c),
_ => return None,
}
}
Some(lit)
}
/// Returns true if and only if this pattern only inspects the basename
/// of a path.
pub fn is_only_basename(&self) -> bool {
match self.tokens.get(0) {
Some(&Token::RecursivePrefix) => {}
_ => return false,
}
for t in &self.tokens[1..] {
match *t {
Token::Literal(c) if c == '/' || c == '\\' => return false,
Token::RecursivePrefix
| Token::RecursiveSuffix
| Token::RecursiveZeroOrMore => return false,
_ => {}
}
}
true
}
/// Returns the pattern as a literal if and only if the pattern must match
/// an entire path exactly.
///
/// The basic format of these patterns is `{literal}`.
pub fn literal(&self) -> Option<String> {
let mut lit = String::new();
for t in &self.tokens {
match *t {
Token::Literal(c) => lit.push(c),
_ => return None,
}
}
Some(lit)
}
/// Returns a basename literal prefix of this pattern.
pub fn base_literal_prefix(&self) -> Option<String> {
match self.tokens.get(0) {
Some(&Token::RecursivePrefix) => {}
_ => return None,
}
match self.tokens.last() {
Some(&Token::ZeroOrMore) => {}
_ => return None,
}
let mut lit = String::new();
for t in &self.tokens[1..self.tokens.len()-1] {
match *t {
Token::Literal(c) if c == '/' || c == '\\' => return None,
Token::Literal(c) => lit.push(c),
_ => return None,
}
}
Some(lit)
}
/// Returns a basename literal suffix of this pattern.
pub fn base_literal_suffix(&self) -> Option<String> {
match self.tokens.get(0) {
Some(&Token::RecursivePrefix) => {}
_ => return None,
}
match self.tokens.get(1) {
Some(&Token::ZeroOrMore) => {}
_ => return None,
}
let mut lit = String::new();
for t in &self.tokens[2..] {
match *t {
Token::Literal(c) if c == '/' || c == '\\' => return None,
Token::Literal(c) => lit.push(c),
_ => return None,
}
}
Some(lit)
}
/// Convert this pattern to a string that is guaranteed to be a valid
/// regular expression and will represent the matching semantics of this
/// glob pattern. This uses a default set of options.
#[allow(dead_code)]
pub fn to_regex(&self) -> String {
self.to_regex_with(&MatchOptions::default())
}
@@ -315,7 +608,7 @@ impl<'a> Parser<'a> {
}
return Ok(());
}
let last = self.p.tokens.pop().unwrap();
self.p.tokens.pop().unwrap();
if prev != Some('/') {
return Err(Error::InvalidRecursive);
}
@@ -412,13 +705,34 @@ impl<'a> Parser<'a> {
}
}
fn path_bytes(path: &Path) -> Cow<[u8]> {
os_str_bytes(path.as_os_str())
}
#[cfg(unix)]
fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
use std::os::unix::ffi::OsStrExt;
Cow::Borrowed(s.as_bytes())
}
#[cfg(not(unix))]
fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
// TODO(burntsushi): On Windows, OS strings are probably UTF-16, so even
// if we could get at the raw bytes, they wouldn't be useful. We *must*
// convert to UTF-8 before doing path matching. Unfortunate, but necessary.
match s.to_string_lossy() {
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
}
}
#[cfg(test)]
mod tests {
use std::path::Path;
use regex::bytes::Regex;
use super::{Error, Pattern, MatchOptions, SetBuilder, Token};
use super::{Error, Pattern, MatchOptions, Set, SetBuilder, Token};
use super::Token::*;
macro_rules! syntax {
@@ -480,14 +794,42 @@ mod tests {
let pat = Pattern::new($pat).unwrap();
let path = &Path::new($path).to_str().unwrap();
let re = Regex::new(&pat.to_regex_with(&$options)).unwrap();
// println!("PATTERN: {}", $pat);
// println!("REGEX: {:?}", re);
// println!("PATH: {}", path);
assert!(!re.is_match(path.as_bytes()));
}
};
}
macro_rules! ext {
($name:ident, $pat:expr, $ext:expr) => {
#[test]
fn $name() {
let pat = Pattern::new($pat).unwrap();
let ext = pat.ext().map(|e| e.to_string_lossy().into_owned());
assert_eq!($ext, ext.as_ref().map(|s| &**s));
}
};
}
macro_rules! baseliteral {
($name:ident, $pat:expr, $yes:expr) => {
#[test]
fn $name() {
let pat = Pattern::new($pat).unwrap();
assert_eq!($yes, pat.base_literal().is_some());
}
};
}
macro_rules! basesuffix {
($name:ident, $pat:expr, $yes:expr) => {
#[test]
fn $name() {
let pat = Pattern::new($pat).unwrap();
assert_eq!($yes, pat.is_literal_suffix());
}
};
}
fn class(s: char, e: char) -> Token {
Class { negated: false, ranges: vec![(s, e)] }
}
@@ -582,6 +924,26 @@ mod tests {
toregex!(re10, "+", r"^\+$");
toregex!(re11, "**", r"^.*$");
ext!(ext1, "**/*.rs", Some("rs"));
baseliteral!(lit1, "**", true);
baseliteral!(lit2, "**/a", true);
baseliteral!(lit3, "**/ab", true);
baseliteral!(lit4, "**/a*b", false);
baseliteral!(lit5, "z/**/a*b", false);
baseliteral!(lit6, "[ab]", false);
baseliteral!(lit7, "?", false);
/*
issuffix!(suf1, "", false);
issuffix!(suf2, "a", true);
issuffix!(suf3, "ab", true);
issuffix!(suf4, "*ab", true);
issuffix!(suf5, "*.ab", true);
issuffix!(suf6, "?.ab", true);
issuffix!(suf7, "ab*", false);
*/
matches!(match1, "a", "a");
matches!(match2, "a*b", "a_b");
matches!(match3, "a*b*c", "abc");
@@ -678,16 +1040,22 @@ mod tests {
builder.add("src/lib.rs").unwrap();
let set = builder.build().unwrap();
assert!(set.is_match("foo.c"));
assert!(set.is_match("src/foo.c"));
assert!(!set.is_match("foo.rs"));
assert!(!set.is_match("tests/foo.rs"));
assert!(set.is_match("src/foo.rs"));
assert!(set.is_match("src/grep/src/main.rs"));
fn is_match(set: &Set, s: &str) -> bool {
let mut matches = vec![];
set.matches_into(s, &mut matches);
!matches.is_empty()
}
assert_eq!(2, set.matches("src/lib.rs").iter().count());
assert!(set.matches("src/lib.rs").matched(0));
assert!(!set.matches("src/lib.rs").matched(1));
assert!(set.matches("src/lib.rs").matched(2));
assert!(is_match(&set, "foo.c"));
assert!(is_match(&set, "src/foo.c"));
assert!(!is_match(&set, "foo.rs"));
assert!(!is_match(&set, "tests/foo.rs"));
assert!(is_match(&set, "src/foo.rs"));
assert!(is_match(&set, "src/grep/src/main.rs"));
let matches = set.matches("src/lib.rs");
assert_eq!(2, matches.len());
assert_eq!(0, matches[0]);
assert_eq!(2, matches[1]);
}
}

View File

@@ -19,11 +19,11 @@ use std::io;
use std::path::{Path, PathBuf};
use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern};
use pathutil::is_hidden;
use types::Types;
const IGNORE_NAMES: &'static [&'static str] = &[
".gitignore",
".agignore",
".rgignore",
];
@@ -83,7 +83,10 @@ pub struct Ignore {
overrides: Overrides,
/// A file type matcher.
types: Types,
/// Whether to ignore hidden files or not.
ignore_hidden: bool,
/// When true, don't look at .gitignore or .agignore files for ignore
/// rules.
no_ignore: bool,
}
@@ -208,15 +211,17 @@ impl Ignore {
debug!("{} ignored because it is hidden", path.display());
return true;
}
for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
let mat = id.matched(path, is_dir);
if let Some(is_ignored) = self.ignore_match(path, mat) {
if is_ignored {
return true;
if !self.no_ignore {
for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
let mat = id.matched(path, is_dir);
if let Some(is_ignored) = self.ignore_match(path, mat) {
if is_ignored {
return true;
}
// If this path is whitelisted by an ignore, then
// fallthrough and let the file type matcher have a say.
break;
}
// If this path is whitelisted by an ignore, then fallthrough
// and let the file type matcher have a say.
break;
}
}
let mat = self.types.matched(path, is_dir);
@@ -361,8 +366,7 @@ impl Overrides {
let path = path.as_ref();
self.gi.as_ref()
.map(|gi| {
let path = &*path.to_string_lossy();
let mat = gi.matched_utf8(path, is_dir).invert();
let mat = gi.matched_stripped(path, is_dir).invert();
if mat.is_none() && !is_dir {
if gi.num_ignores() > 0 {
return Match::Ignored(&self.unmatched_pat);
@@ -374,14 +378,6 @@ impl Overrides {
}
}
fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
if let Some(name) = path.as_ref().file_name() {
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
} else {
false
}
}
#[cfg(test)]
mod tests {
use std::path::Path;

View File

@@ -1,8 +1,7 @@
#![allow(dead_code, unused_variables)]
extern crate crossbeam;
extern crate deque;
extern crate docopt;
extern crate env_logger;
extern crate fnv;
extern crate grep;
#[cfg(windows)]
extern crate kernel32;
@@ -17,7 +16,6 @@ extern crate num_cpus;
extern crate regex;
extern crate rustc_serialize;
extern crate term;
extern crate thread_local;
extern crate walkdir;
#[cfg(windows)]
extern crate winapi;
@@ -25,22 +23,25 @@ extern crate winapi;
use std::error::Error;
use std::fs::File;
use std::io;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::process;
use std::result;
use std::sync::{Arc, Mutex};
use std::thread;
use crossbeam::sync::chase_lev::{self, Steal, Stealer};
use deque::{Stealer, Stolen};
use grep::Grep;
use memmap::{Mmap, Protection};
use term::Terminal;
use walkdir::DirEntry;
use args::Args;
use out::{NoColorTerminal, Out, OutBuffer};
use out::{ColoredTerminal, Out};
use pathutil::strip_prefix;
use printer::Printer;
use search_stream::InputBuffer;
#[cfg(windows)]
use terminal_win::WindowsBuffer;
macro_rules! errored {
($($tt:tt)*) => {
@@ -61,10 +62,12 @@ mod gitignore;
mod glob;
mod ignore;
mod out;
mod pathutil;
mod printer;
mod search_buffer;
mod search_stream;
mod terminal;
#[cfg(windows)]
mod terminal_win;
mod types;
mod walk;
@@ -73,7 +76,7 @@ pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;
fn main() {
match Args::parse().and_then(run) {
Ok(count) if count == 0 => process::exit(1),
Ok(count) => process::exit(0),
Ok(_) => process::exit(0),
Err(err) => {
eprintln!("{}", err);
process::exit(1);
@@ -82,34 +85,40 @@ fn main() {
}
fn run(args: Args) -> Result<u64> {
let args = Arc::new(args);
let paths = args.paths();
if args.files() {
return run_files(args);
return run_files(args.clone());
}
if args.type_list() {
return run_types(args);
return run_types(args.clone());
}
let args = Arc::new(args);
if paths.len() == 1 && (paths[0] == Path::new("-") || paths[0].is_file()) {
return run_one(args.clone(), &paths[0]);
}
let out = Arc::new(Mutex::new(args.out()));
let outbuf = args.outbuf();
let mut workers = vec![];
let mut workq = {
let (workq, stealer) = chase_lev::deque();
let workq = {
let (workq, stealer) = deque::new();
for _ in 0..args.threads() {
let worker = Worker {
args: args.clone(),
out: out.clone(),
let worker = MultiWorker {
chan_work: stealer.clone(),
inpbuf: args.input_buffer(),
outbuf: Some(outbuf.clone()),
grep: args.grep(),
match_count: 0,
out: out.clone(),
outbuf: Some(args.outbuf()),
worker: Worker {
args: args.clone(),
inpbuf: args.input_buffer(),
grep: args.grep(),
match_count: 0,
},
};
workers.push(thread::spawn(move || worker.run()));
}
workq
};
for p in args.paths() {
for p in paths {
if p == Path::new("-") {
workq.push(Work::Stdin)
} else {
@@ -128,8 +137,27 @@ fn run(args: Args) -> Result<u64> {
Ok(match_count)
}
fn run_files(args: Args) -> Result<u64> {
let term = NoColorTerminal::new(io::BufWriter::new(io::stdout()));
fn run_one(args: Arc<Args>, path: &Path) -> Result<u64> {
let mut worker = Worker {
args: args.clone(),
inpbuf: args.input_buffer(),
grep: args.grep(),
match_count: 0,
};
let term = args.stdout();
let mut printer = args.printer(term);
let work =
if path == Path::new("-") {
WorkReady::Stdin
} else {
WorkReady::PathFile(path.to_path_buf(), try!(File::open(path)))
};
worker.do_work(&mut printer, work);
Ok(worker.match_count)
}
fn run_files(args: Arc<Args>) -> Result<u64> {
let term = args.stdout();
let mut printer = args.printer(term);
let mut file_count = 0;
for p in args.paths() {
@@ -146,8 +174,8 @@ fn run_files(args: Args) -> Result<u64> {
Ok(file_count)
}
fn run_types(args: Args) -> Result<u64> {
let term = NoColorTerminal::new(io::BufWriter::new(io::stdout()));
fn run_types(args: Arc<Args>) -> Result<u64> {
let term = args.stdout();
let mut printer = args.printer(term);
let mut ty_count = 0;
for def in args.type_defs() {
@@ -165,30 +193,37 @@ enum Work {
enum WorkReady {
Stdin,
File(DirEntry, File),
DirFile(DirEntry, File),
PathFile(PathBuf, File),
}
struct MultiWorker {
chan_work: Stealer<Work>,
out: Arc<Mutex<Out>>,
#[cfg(not(windows))]
outbuf: Option<ColoredTerminal<term::TerminfoTerminal<Vec<u8>>>>,
#[cfg(windows)]
outbuf: Option<ColoredTerminal<WindowsBuffer>>,
worker: Worker,
}
struct Worker {
args: Arc<Args>,
out: Arc<Mutex<Out>>,
chan_work: Stealer<Work>,
inpbuf: InputBuffer,
outbuf: Option<OutBuffer>,
grep: Grep,
match_count: u64,
}
impl Worker {
impl MultiWorker {
fn run(mut self) -> u64 {
self.match_count = 0;
loop {
let work = match self.chan_work.steal() {
Steal::Empty | Steal::Abort => continue,
Steal::Data(Work::Quit) => break,
Steal::Data(Work::Stdin) => WorkReady::Stdin,
Steal::Data(Work::File(ent)) => {
Stolen::Empty | Stolen::Abort => continue,
Stolen::Data(Work::Quit) => break,
Stolen::Data(Work::Stdin) => WorkReady::Stdin,
Stolen::Data(Work::File(ent)) => {
match File::open(ent.path()) {
Ok(file) => WorkReady::File(ent, file),
Ok(file) => WorkReady::DirFile(ent, file),
Err(err) => {
eprintln!("{}: {}", ent.path().display(), err);
continue;
@@ -198,8 +233,8 @@ impl Worker {
};
let mut outbuf = self.outbuf.take().unwrap();
outbuf.clear();
let mut printer = self.args.printer(outbuf);
self.do_work(&mut printer, work);
let mut printer = self.worker.args.printer(outbuf);
self.worker.do_work(&mut printer, work);
let outbuf = printer.into_inner();
if !outbuf.get_ref().is_empty() {
let mut out = self.out.lock().unwrap();
@@ -207,10 +242,12 @@ impl Worker {
}
self.outbuf = Some(outbuf);
}
self.match_count
self.worker.match_count
}
}
fn do_work<W: Send + Terminal>(
impl Worker {
fn do_work<W: Terminal + Send>(
&mut self,
printer: &mut Printer<W>,
work: WorkReady,
@@ -221,9 +258,20 @@ impl Worker {
let stdin = stdin.lock();
self.search(printer, &Path::new("<stdin>"), stdin)
}
WorkReady::File(ent, file) => {
WorkReady::DirFile(ent, file) => {
let mut path = ent.path();
if let Ok(p) = path.strip_prefix("./") {
if let Some(p) = strip_prefix("./", path) {
path = p;
}
if self.args.mmap() {
self.search_mmap(printer, path, &file)
} else {
self.search(printer, path, file)
}
}
WorkReady::PathFile(path, file) => {
let mut path = &*path;
if let Some(p) = strip_prefix("./", path) {
path = p;
}
if self.args.mmap() {
@@ -243,7 +291,7 @@ impl Worker {
}
}
fn search<R: io::Read, W: Send + Terminal>(
fn search<R: io::Read, W: Terminal + Send>(
&mut self,
printer: &mut Printer<W>,
path: &Path,
@@ -258,7 +306,7 @@ impl Worker {
).run().map_err(From::from)
}
fn search_mmap<W: Send + Terminal>(
fn search_mmap<W: Terminal + Send>(
&mut self,
printer: &mut Printer<W>,
path: &Path,

View File

@@ -1,40 +1,12 @@
use std::io::{self, Write};
use std::sync::Arc;
use term::{self, Terminal};
use term::color::Color;
use term::terminfo::TermInfo;
#[cfg(windows)]
use term::WinConsole;
use terminal::TerminfoTerminal;
pub type StdoutTerminal = Box<Terminal<Output=io::Stdout> + Send>;
/// Gets a terminal that supports color if available.
#[cfg(windows)]
fn term_stdout(color: bool) -> StdoutTerminal {
let stdout = io::stdout();
WinConsole::new(stdout)
.ok()
.map(|t| Box::new(t) as StdoutTerminal)
.unwrap_or_else(|| {
let stdout = io::stdout();
Box::new(NoColorTerminal::new(stdout)) as StdoutTerminal
})
}
/// Gets a terminal that supports color if available.
#[cfg(not(windows))]
fn term_stdout(color: bool) -> StdoutTerminal {
let stdout = io::stdout();
if !color || TERMINFO.is_none() {
Box::new(NoColorTerminal::new(stdout))
} else {
let info = TERMINFO.clone().unwrap();
Box::new(TerminfoTerminal::new_with_terminfo(stdout, info))
}
}
use terminal_win::WindowsBuffer;
/// Out controls the actual output of all search results for a particular file
/// to the end user.
@@ -43,16 +15,31 @@ fn term_stdout(color: bool) -> StdoutTerminal {
/// individual search results where as Out works with search results for each
/// file as a whole. For example, it knows when to print a file separator.)
pub struct Out {
term: StdoutTerminal,
#[cfg(not(windows))]
term: ColoredTerminal<term::TerminfoTerminal<io::BufWriter<io::Stdout>>>,
#[cfg(windows)]
term: ColoredTerminal<WinConsole<io::Stdout>>,
printed: bool,
file_separator: Option<Vec<u8>>,
}
impl Out {
/// Create a new Out that writes to the wtr given.
#[cfg(not(windows))]
pub fn new(color: bool) -> Out {
let wtr = io::BufWriter::new(io::stdout());
Out {
term: ColoredTerminal::new(wtr, color),
printed: false,
file_separator: None,
}
}
/// Create a new Out that writes to the wtr given.
#[cfg(windows)]
pub fn new(color: bool) -> Out {
Out {
term: term_stdout(color),
term: ColoredTerminal::new_stdout(color),
printed: false,
file_separator: None,
}
@@ -69,154 +56,194 @@ impl Out {
/// Write the search results of a single file to the underlying wtr and
/// flush wtr.
pub fn write(&mut self, buf: &OutBuffer) {
#[cfg(not(windows))]
pub fn write(
&mut self,
buf: &ColoredTerminal<term::TerminfoTerminal<Vec<u8>>>,
) {
self.write_sep();
match *buf {
ColoredTerminal::Colored(ref tt) => {
let _ = self.term.write_all(tt.get_ref());
}
ColoredTerminal::NoColor(ref buf) => {
let _ = self.term.write_all(buf);
}
}
self.write_done();
}
/// Write the search results of a single file to the underlying wtr and
/// flush wtr.
#[cfg(windows)]
pub fn write(
&mut self,
buf: &ColoredTerminal<WindowsBuffer>,
) {
self.write_sep();
match *buf {
ColoredTerminal::Colored(ref tt) => {
tt.print_stdout(&mut self.term);
}
ColoredTerminal::NoColor(ref buf) => {
let _ = self.term.write_all(buf);
}
}
self.write_done();
}
fn write_sep(&mut self) {
if let Some(ref sep) = self.file_separator {
if self.printed {
let _ = self.term.write_all(sep);
let _ = self.term.write_all(b"\n");
}
}
match *buf {
OutBuffer::Colored(ref tt) => {
let _ = self.term.write_all(tt.get_ref());
}
OutBuffer::Windows(ref w) => {
w.print_stdout(&mut self.term);
}
OutBuffer::NoColor(ref buf) => {
let _ = self.term.write_all(buf);
}
}
}
fn write_done(&mut self) {
let _ = self.term.flush();
self.printed = true;
}
}
/// OutBuffer corresponds to the final output buffer for search results. All
/// search results are written to a buffer and then a buffer is flushed to
/// stdout only after the full search has completed.
/// ColoredTerminal provides optional colored output through the term::Terminal
/// trait. In particular, it will dynamically configure itself to use coloring
/// if it's available in the environment.
#[derive(Clone, Debug)]
pub enum OutBuffer {
Colored(TerminfoTerminal<Vec<u8>>),
Windows(WindowsBuffer),
NoColor(Vec<u8>),
pub enum ColoredTerminal<T: Terminal + Send> {
Colored(T),
NoColor(T::Output),
}
#[derive(Clone, Debug)]
pub struct WindowsBuffer {
buf: Vec<u8>,
pos: usize,
colors: Vec<WindowsColor>,
}
#[derive(Clone, Debug)]
pub struct WindowsColor {
pos: usize,
opt: WindowsOption,
}
#[derive(Clone, Debug)]
pub enum WindowsOption {
Foreground(Color),
Background(Color),
Reset,
}
lazy_static! {
static ref TERMINFO: Option<Arc<TermInfo>> = {
match TermInfo::from_env() {
Ok(info) => Some(Arc::new(info)),
Err(err) => {
debug!("error loading terminfo for coloring: {}", err);
None
}
}
};
}
impl OutBuffer {
#[cfg(not(windows))]
impl<W: io::Write + Send> ColoredTerminal<term::TerminfoTerminal<W>> {
/// Create a new output buffer.
///
/// When color is true, the buffer will attempt to support coloring.
pub fn new(color: bool) -> OutBuffer {
// If we want color, build a TerminfoTerminal and see if the current
// environment supports coloring. If not, bail with NoColor. To avoid
// losing our writer (ownership), do this the long way.
pub fn new(wtr: W, color: bool) -> Self {
lazy_static! {
// Only pay for parsing the terminfo once.
static ref TERMINFO: Option<TermInfo> = {
match TermInfo::from_env() {
Ok(info) => Some(info),
Err(err) => {
debug!("error loading terminfo for coloring: {}", err);
None
}
}
};
}
// If we want color, build a term::TerminfoTerminal and see if the
// current environment supports coloring. If not, bail with NoColor. To
// avoid losing our writer (ownership), do this the long way.
if !color {
return OutBuffer::NoColor(vec![]);
return ColoredTerminal::NoColor(wtr);
}
if cfg!(windows) {
return OutBuffer::Windows(WindowsBuffer {
buf: vec![],
pos: 0,
colors: vec![]
});
}
if TERMINFO.is_none() {
return OutBuffer::NoColor(vec![]);
}
let info = TERMINFO.clone().unwrap();
let tt = TerminfoTerminal::new_with_terminfo(vec![], info);
let terminfo = match *TERMINFO {
None => return ColoredTerminal::NoColor(wtr),
Some(ref ti) => {
// Ug, this should go away with the next release of `term`.
TermInfo {
names: ti.names.clone(),
bools: ti.bools.clone(),
numbers: ti.numbers.clone(),
strings: ti.strings.clone(),
}
}
};
let tt = term::TerminfoTerminal::new_with_terminfo(wtr, terminfo);
if !tt.supports_color() {
debug!("environment doesn't support coloring");
return OutBuffer::NoColor(tt.into_inner());
return ColoredTerminal::NoColor(tt.into_inner());
}
ColoredTerminal::Colored(tt)
}
}
#[cfg(not(windows))]
impl ColoredTerminal<term::TerminfoTerminal<Vec<u8>>> {
/// Clear the give buffer of all search results such that it is reusable
/// in another search.
pub fn clear(&mut self) {
match *self {
ColoredTerminal::Colored(ref mut tt) => {
tt.get_mut().clear();
}
ColoredTerminal::NoColor(ref mut buf) => {
buf.clear();
}
}
}
}
#[cfg(windows)]
impl ColoredTerminal<WindowsBuffer> {
/// Create a new output buffer.
///
/// When color is true, the buffer will attempt to support coloring.
pub fn new_buffer(color: bool) -> Self {
if !color {
ColoredTerminal::NoColor(vec![])
} else {
ColoredTerminal::Colored(WindowsBuffer::new())
}
OutBuffer::Colored(tt)
}
/// Clear the give buffer of all search results such that it is reusable
/// in another search.
pub fn clear(&mut self) {
match *self {
OutBuffer::Colored(ref mut tt) => {
tt.get_mut().clear();
}
OutBuffer::Windows(ref mut win) => {
win.buf.clear();
win.colors.clear();
win.pos = 0;
}
OutBuffer::NoColor(ref mut buf) => {
buf.clear();
}
ColoredTerminal::Colored(ref mut win) => win.clear(),
ColoredTerminal::NoColor(ref mut buf) => buf.clear(),
}
}
}
fn map_result<F, G>(
#[cfg(windows)]
impl ColoredTerminal<WinConsole<io::Stdout>> {
/// Create a new output buffer.
///
/// When color is true, the buffer will attempt to support coloring.
pub fn new_stdout(color: bool) -> Self {
if !color {
return ColoredTerminal::NoColor(io::stdout());
}
match WinConsole::new(io::stdout()) {
Ok(win) => ColoredTerminal::Colored(win),
Err(_) => ColoredTerminal::NoColor(io::stdout()),
}
}
}
impl<T: Terminal + Send> ColoredTerminal<T> {
fn map_result<F>(
&mut self,
mut f: F,
mut g: G,
) -> term::Result<()>
where F: FnMut(&mut TerminfoTerminal<Vec<u8>>) -> term::Result<()>,
G: FnMut(&mut WindowsBuffer) -> term::Result<()> {
where F: FnMut(&mut T) -> term::Result<()> {
match *self {
OutBuffer::Colored(ref mut w) => f(w),
OutBuffer::Windows(ref mut w) => g(w),
OutBuffer::NoColor(_) => Err(term::Error::NotSupported),
ColoredTerminal::Colored(ref mut w) => f(w),
ColoredTerminal::NoColor(_) => Err(term::Error::NotSupported),
}
}
fn map_bool<F, G>(
fn map_bool<F>(
&self,
mut f: F,
mut g: G,
) -> bool
where F: FnMut(&TerminfoTerminal<Vec<u8>>) -> bool,
G: FnMut(&WindowsBuffer) -> bool {
where F: FnMut(&T) -> bool {
match *self {
OutBuffer::Colored(ref w) => f(w),
OutBuffer::Windows(ref w) => g(w),
OutBuffer::NoColor(_) => false,
ColoredTerminal::Colored(ref w) => f(w),
ColoredTerminal::NoColor(_) => false,
}
}
}
impl io::Write for OutBuffer {
impl<T: Terminal + Send> io::Write for ColoredTerminal<T> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
match *self {
OutBuffer::Colored(ref mut w) => w.write(buf),
OutBuffer::Windows(ref mut w) => w.write(buf),
OutBuffer::NoColor(ref mut w) => w.write(buf),
ColoredTerminal::Colored(ref mut w) => w.write(buf),
ColoredTerminal::NoColor(ref mut w) => w.write(buf),
}
}
@@ -225,259 +252,67 @@ impl io::Write for OutBuffer {
}
}
impl term::Terminal for OutBuffer {
type Output = Vec<u8>;
impl<T: Terminal + Send> term::Terminal for ColoredTerminal<T> {
type Output = T::Output;
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
self.map_result(|w| w.fg(fg), |w| w.fg(fg))
self.map_result(|w| w.fg(fg))
}
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
self.map_result(|w| w.bg(bg), |w| w.bg(bg))
self.map_result(|w| w.bg(bg))
}
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
self.map_result(|w| w.attr(attr), |w| w.attr(attr))
self.map_result(|w| w.attr(attr))
}
fn supports_attr(&self, attr: term::Attr) -> bool {
self.map_bool(|w| w.supports_attr(attr), |w| w.supports_attr(attr))
self.map_bool(|w| w.supports_attr(attr))
}
fn reset(&mut self) -> term::Result<()> {
self.map_result(|w| w.reset(), |w| w.reset())
self.map_result(|w| w.reset())
}
fn supports_reset(&self) -> bool {
self.map_bool(|w| w.supports_reset(), |w| w.supports_reset())
self.map_bool(|w| w.supports_reset())
}
fn supports_color(&self) -> bool {
self.map_bool(|w| w.supports_color(), |w| w.supports_color())
self.map_bool(|w| w.supports_color())
}
fn cursor_up(&mut self) -> term::Result<()> {
self.map_result(|w| w.cursor_up(), |w| w.cursor_up())
self.map_result(|w| w.cursor_up())
}
fn delete_line(&mut self) -> term::Result<()> {
self.map_result(|w| w.delete_line(), |w| w.delete_line())
self.map_result(|w| w.delete_line())
}
fn carriage_return(&mut self) -> term::Result<()> {
self.map_result(|w| w.carriage_return(), |w| w.carriage_return())
self.map_result(|w| w.carriage_return())
}
fn get_ref(&self) -> &Vec<u8> {
fn get_ref(&self) -> &Self::Output {
match *self {
OutBuffer::Colored(ref w) => w.get_ref(),
OutBuffer::Windows(ref w) => w.get_ref(),
OutBuffer::NoColor(ref w) => w,
ColoredTerminal::Colored(ref w) => w.get_ref(),
ColoredTerminal::NoColor(ref w) => w,
}
}
fn get_mut(&mut self) -> &mut Vec<u8> {
fn get_mut(&mut self) -> &mut Self::Output {
match *self {
OutBuffer::Colored(ref mut w) => w.get_mut(),
OutBuffer::Windows(ref mut w) => w.get_mut(),
OutBuffer::NoColor(ref mut w) => w,
ColoredTerminal::Colored(ref mut w) => w.get_mut(),
ColoredTerminal::NoColor(ref mut w) => w,
}
}
fn into_inner(self) -> Vec<u8> {
fn into_inner(self) -> Self::Output {
match self {
OutBuffer::Colored(w) => w.into_inner(),
OutBuffer::Windows(w) => w.into_inner(),
OutBuffer::NoColor(w) => w,
ColoredTerminal::Colored(w) => w.into_inner(),
ColoredTerminal::NoColor(w) => w,
}
}
}
impl WindowsBuffer {
fn push(&mut self, opt: WindowsOption) {
let pos = self.pos;
self.colors.push(WindowsColor { pos: pos, opt: opt });
}
}
impl WindowsBuffer {
/// Print the contents to the given terminal.
pub fn print_stdout(&self, tt: &mut StdoutTerminal) {
if !tt.supports_color() {
let _ = tt.write_all(&self.buf);
let _ = tt.flush();
return;
}
let mut last = 0;
for col in &self.colors {
let _ = tt.write_all(&self.buf[last..col.pos]);
match col.opt {
WindowsOption::Foreground(c) => {
let _ = tt.fg(c);
}
WindowsOption::Background(c) => {
let _ = tt.bg(c);
}
WindowsOption::Reset => {
let _ = tt.reset();
}
}
last = col.pos;
}
let _ = tt.write_all(&self.buf[last..]);
let _ = tt.flush();
}
}
impl io::Write for WindowsBuffer {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
let n = try!(self.buf.write(buf));
self.pos += n;
Ok(n)
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
impl term::Terminal for WindowsBuffer {
type Output = Vec<u8>;
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
self.push(WindowsOption::Foreground(fg));
Ok(())
}
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
self.push(WindowsOption::Background(bg));
Ok(())
}
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn supports_attr(&self, attr: term::Attr) -> bool {
false
}
fn reset(&mut self) -> term::Result<()> {
self.push(WindowsOption::Reset);
Ok(())
}
fn supports_reset(&self) -> bool {
true
}
fn supports_color(&self) -> bool {
true
}
fn cursor_up(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn delete_line(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn carriage_return(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn get_ref(&self) -> &Vec<u8> {
&self.buf
}
fn get_mut(&mut self) -> &mut Vec<u8> {
&mut self.buf
}
fn into_inner(self) -> Vec<u8> {
self.buf
}
}
/// NoColorTerminal implements Terminal, but supports no coloring.
///
/// Its useful when an API requires a Terminal, but coloring isn't needed.
pub struct NoColorTerminal<W> {
wtr: W,
}
impl<W: Send + io::Write> NoColorTerminal<W> {
/// Wrap the given writer in a Terminal interface.
pub fn new(wtr: W) -> NoColorTerminal<W> {
NoColorTerminal {
wtr: wtr,
}
}
}
impl<W: Send + io::Write> io::Write for NoColorTerminal<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.wtr.write(buf)
}
fn flush(&mut self) -> io::Result<()> {
self.wtr.flush()
}
}
impl<W: Send + io::Write> term::Terminal for NoColorTerminal<W> {
type Output = W;
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn supports_attr(&self, attr: term::Attr) -> bool {
false
}
fn reset(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn supports_reset(&self) -> bool {
false
}
fn supports_color(&self) -> bool {
false
}
fn cursor_up(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn delete_line(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn carriage_return(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn get_ref(&self) -> &W {
&self.wtr
}
fn get_mut(&mut self) -> &mut W {
&mut self.wtr
}
fn into_inner(self) -> W {
self.wtr
}
}

98
src/pathutil.rs Normal file
View File

@@ -0,0 +1,98 @@
/*!
The pathutil module provides platform specific operations on paths that are
typically faster than the same operations as provided in std::path. In
particular, we really want to avoid the costly operation of parsing the path
into its constituent components. We give up on Windows, but on Unix, we deal
with the raw bytes directly.
On large repositories (like chromium), this can have a ~25% performance
improvement on just listing the files to search (!).
*/
use std::ffi::OsStr;
use std::path::Path;
use memchr::memrchr;
/// Strip `prefix` from the `path` and return the remainder.
///
/// If `path` doesn't have a prefix `prefix`, then return `None`.
#[cfg(unix)]
pub fn strip_prefix<'a, P: AsRef<Path>>(
prefix: P,
path: &'a Path,
) -> Option<&'a Path> {
use std::os::unix::ffi::OsStrExt;
let prefix = prefix.as_ref().as_os_str().as_bytes();
let path = path.as_os_str().as_bytes();
if prefix.len() > path.len() || prefix != &path[0..prefix.len()] {
None
} else {
Some(&Path::new(OsStr::from_bytes(&path[prefix.len()..])))
}
}
/// Strip `prefix` from the `path` and return the remainder.
///
/// If `path` doesn't have a prefix `prefix`, then return `None`.
#[cfg(not(unix))]
pub fn strip_prefix<'a>(prefix: &Path, path: &'a Path) -> Option<&'a Path> {
path.strip_prefix(prefix).ok()
}
/// The final component of the path, if it is a normal file.
///
/// If the path terminates in ., .., or consists solely of a root of prefix,
/// file_name will return None.
#[cfg(unix)]
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
path: &'a P,
) -> Option<&'a OsStr> {
use std::os::unix::ffi::OsStrExt;
let path = path.as_ref().as_os_str().as_bytes();
if path.is_empty() {
return None;
} else if path.len() == 1 && path[0] == b'.' {
return None;
} else if path.last() == Some(&b'.') {
return None;
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
return None;
}
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
Some(OsStr::from_bytes(&path[last_slash..]))
}
/// The final component of the path, if it is a normal file.
///
/// If the path terminates in ., .., or consists solely of a root of prefix,
/// file_name will return None.
#[cfg(not(unix))]
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
path: &'a P,
) -> Option<&'a OsStr> {
path.as_ref().file_name()
}
/// Returns true if and only if this file path is considered to be hidden.
#[cfg(unix)]
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
use std::os::unix::ffi::OsStrExt;
if let Some(name) = file_name(path.as_ref()) {
name.as_bytes().get(0) == Some(&b'.')
} else {
false
}
}
/// Returns true if and only if this file path is considered to be hidden.
#[cfg(not(unix))]
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
if let Some(name) = file_name(path) {
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
} else {
false
}
}

View File

@@ -36,7 +36,7 @@ pub struct Printer<W> {
with_filename: bool,
}
impl<W: Send + Terminal> Printer<W> {
impl<W: Terminal + Send> Printer<W> {
/// Create a new printer that writes to wtr.
pub fn new(wtr: W) -> Printer<W> {
Printer {

View File

@@ -151,10 +151,10 @@ impl<'a, W: Send + Terminal> BufferSearcher<'a, W> {
mod tests {
use std::path::Path;
use grep::{Grep, GrepBuilder};
use term::Terminal;
use grep::GrepBuilder;
use term::{Terminal, TerminfoTerminal};
use out::OutBuffer;
use out::ColoredTerminal;
use printer::Printer;
use super::BufferSearcher;
@@ -168,38 +168,19 @@ but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.\
";
const CODE: &'static str = "\
extern crate snap;
use std::io;
fn main() {
let stdin = io::stdin();
let stdout = io::stdout();
// Wrap the stdin reader in a Snappy reader.
let mut rdr = snap::Reader::new(stdin.lock());
let mut wtr = stdout.lock();
io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
}
";
fn matcher(pat: &str) -> Grep {
GrepBuilder::new(pat).build().unwrap()
}
fn test_path() -> &'static Path {
&Path::new("/baz.rs")
}
type TestSearcher<'a> = BufferSearcher<'a, OutBuffer>;
type TestSearcher<'a> =
BufferSearcher<'a, ColoredTerminal<TerminfoTerminal<Vec<u8>>>>;
fn search<F: FnMut(TestSearcher) -> TestSearcher>(
pat: &str,
haystack: &str,
mut map: F,
) -> (u64, String) {
let outbuf = OutBuffer::NoColor(vec![]);
let outbuf = ColoredTerminal::NoColor(vec![]);
let mut pp = Printer::new(outbuf).with_filename(true);
let grep = GrepBuilder::new(pat).build().unwrap();
let count = {

View File

@@ -100,7 +100,7 @@ impl Default for Options {
}
}
impl<'a, R: io::Read, W: Send + Terminal> Searcher<'a, R, W> {
impl<'a, R: io::Read, W: Terminal + Send> Searcher<'a, R, W> {
/// Create a new searcher.
///
/// `inp` is a reusable input buffer that is used as scratch space by this
@@ -763,10 +763,10 @@ mod tests {
use std::io;
use std::path::Path;
use grep::{Grep, GrepBuilder};
use term::Terminal;
use grep::GrepBuilder;
use term::{Terminal, TerminfoTerminal};
use out::OutBuffer;
use out::ColoredTerminal;
use printer::Printer;
use super::{InputBuffer, Searcher, start_of_previous_lines};
@@ -800,15 +800,15 @@ fn main() {
io::Cursor::new(s.to_string().into_bytes())
}
fn matcher(pat: &str) -> Grep {
GrepBuilder::new(pat).build().unwrap()
}
fn test_path() -> &'static Path {
&Path::new("/baz.rs")
}
type TestSearcher<'a> = Searcher<'a, io::Cursor<Vec<u8>>, OutBuffer>;
type TestSearcher<'a> = Searcher<
'a,
io::Cursor<Vec<u8>>,
ColoredTerminal<TerminfoTerminal<Vec<u8>>>,
>;
fn search_smallcap<F: FnMut(TestSearcher) -> TestSearcher>(
pat: &str,
@@ -816,7 +816,7 @@ fn main() {
mut map: F,
) -> (u64, String) {
let mut inp = InputBuffer::with_capacity(1);
let outbuf = OutBuffer::NoColor(vec![]);
let outbuf = ColoredTerminal::NoColor(vec![]);
let mut pp = Printer::new(outbuf).with_filename(true);
let grep = GrepBuilder::new(pat).build().unwrap();
let count = {
@@ -833,7 +833,7 @@ fn main() {
mut map: F,
) -> (u64, String) {
let mut inp = InputBuffer::with_capacity(4096);
let outbuf = OutBuffer::NoColor(vec![]);
let outbuf = ColoredTerminal::NoColor(vec![]);
let mut pp = Printer::new(outbuf).with_filename(true);
let grep = GrepBuilder::new(pat).build().unwrap();
let count = {

View File

@@ -1,202 +0,0 @@
/*!
This module contains an implementation of the `term::Terminal` trait.
The actual implementation is copied almost verbatim from the `term` crate, so
this code is under the same license (MIT/Apache).
The specific reason why this is copied here is to wrap an Arc<TermInfo> instead
of a TermInfo. This makes multithreaded sharing much more performant.
N.B. This is temporary until this issue is fixed:
https://github.com/Stebalien/term/issues/64
*/
use std::io::{self, Write};
use std::sync::Arc;
use term::{Attr, Error, Result, Terminal, color};
use term::terminfo::TermInfo;
use term::terminfo::parm::{Param, Variables, expand};
/// A Terminal that knows how many colors it supports, with a reference to its
/// parsed Terminfo database record.
#[derive(Clone, Debug)]
pub struct TerminfoTerminal<T> {
num_colors: u16,
out: T,
ti: Arc<TermInfo>,
}
impl<T: Write + Send> Terminal for TerminfoTerminal<T> {
type Output = T;
fn fg(&mut self, color: color::Color) -> Result<()> {
let color = self.dim_if_necessary(color);
if self.num_colors > color {
return apply_cap(&self.ti, "setaf", &[Param::Number(color as i32)], &mut self.out);
}
Err(Error::ColorOutOfRange)
}
fn bg(&mut self, color: color::Color) -> Result<()> {
let color = self.dim_if_necessary(color);
if self.num_colors > color {
return apply_cap(&self.ti, "setab", &[Param::Number(color as i32)], &mut self.out);
}
Err(Error::ColorOutOfRange)
}
fn attr(&mut self, attr: Attr) -> Result<()> {
match attr {
Attr::ForegroundColor(c) => self.fg(c),
Attr::BackgroundColor(c) => self.bg(c),
_ => apply_cap(&self.ti, cap_for_attr(attr), &[], &mut self.out),
}
}
fn supports_attr(&self, attr: Attr) -> bool {
match attr {
Attr::ForegroundColor(_) | Attr::BackgroundColor(_) => self.num_colors > 0,
_ => {
let cap = cap_for_attr(attr);
self.ti.strings.get(cap).is_some()
}
}
}
fn reset(&mut self) -> Result<()> {
reset(&self.ti, &mut self.out)
}
fn supports_reset(&self) -> bool {
["sgr0", "sgr", "op"].iter().any(|&cap| self.ti.strings.get(cap).is_some())
}
fn supports_color(&self) -> bool {
self.num_colors > 0 && self.supports_reset()
}
fn cursor_up(&mut self) -> Result<()> {
apply_cap(&self.ti, "cuu1", &[], &mut self.out)
}
fn delete_line(&mut self) -> Result<()> {
apply_cap(&self.ti, "dl", &[], &mut self.out)
}
fn carriage_return(&mut self) -> Result<()> {
apply_cap(&self.ti, "cr", &[], &mut self.out)
}
fn get_ref(&self) -> &T {
&self.out
}
fn get_mut(&mut self) -> &mut T {
&mut self.out
}
fn into_inner(self) -> T
where Self: Sized
{
self.out
}
}
impl<T: Write + Send> TerminfoTerminal<T> {
/// Create a new TerminfoTerminal with the given TermInfo and Write.
pub fn new_with_terminfo(out: T, terminfo: Arc<TermInfo>) -> TerminfoTerminal<T> {
let nc = if terminfo.strings.contains_key("setaf") &&
terminfo.strings.contains_key("setab") {
terminfo.numbers.get("colors").map_or(0, |&n| n)
} else {
0
};
TerminfoTerminal {
out: out,
ti: terminfo,
num_colors: nc,
}
}
/// Create a new TerminfoTerminal for the current environment with the given Write.
///
/// Returns `None` when the terminfo cannot be found or parsed.
pub fn new(out: T) -> Option<TerminfoTerminal<T>> {
TermInfo::from_env().map(move |ti| {
TerminfoTerminal::new_with_terminfo(out, Arc::new(ti))
}).ok()
}
fn dim_if_necessary(&self, color: color::Color) -> color::Color {
if color >= self.num_colors && color >= 8 && color < 16 {
color - 8
} else {
color
}
}
}
impl<T: Write> Write for TerminfoTerminal<T> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.out.write(buf)
}
fn flush(&mut self) -> io::Result<()> {
self.out.flush()
}
}
fn cap_for_attr(attr: Attr) -> &'static str {
match attr {
Attr::Bold => "bold",
Attr::Dim => "dim",
Attr::Italic(true) => "sitm",
Attr::Italic(false) => "ritm",
Attr::Underline(true) => "smul",
Attr::Underline(false) => "rmul",
Attr::Blink => "blink",
Attr::Standout(true) => "smso",
Attr::Standout(false) => "rmso",
Attr::Reverse => "rev",
Attr::Secure => "invis",
Attr::ForegroundColor(_) => "setaf",
Attr::BackgroundColor(_) => "setab",
}
}
fn apply_cap(ti: &TermInfo, cmd: &str, params: &[Param], out: &mut io::Write) -> Result<()> {
match ti.strings.get(cmd) {
Some(cmd) => {
match expand(cmd, params, &mut Variables::new()) {
Ok(s) => {
try!(out.write_all(&s));
Ok(())
}
Err(e) => Err(e.into()),
}
}
None => Err(Error::NotSupported),
}
}
fn reset(ti: &TermInfo, out: &mut io::Write) -> Result<()> {
// are there any terminals that have color/attrs and not sgr0?
// Try falling back to sgr, then op
let cmd = match [("sgr0", &[] as &[Param]), ("sgr", &[Param::Number(0)]), ("op", &[])]
.iter()
.filter_map(|&(cap, params)| {
ti.strings.get(cap).map(|c| (c, params))
})
.next() {
Some((op, params)) => {
match expand(op, params, &mut Variables::new()) {
Ok(cmd) => cmd,
Err(e) => return Err(e.into()),
}
}
None => return Err(Error::NotSupported),
};
try!(out.write_all(&cmd));
Ok(())
}

176
src/terminal_win.rs Normal file
View File

@@ -0,0 +1,176 @@
/*!
This module contains a Windows-only *in-memory* implementation of the
`term::Terminal` trait.
This particular implementation is a bit idiosyncratic, and the "in-memory"
specification is to blame. In particular, on Windows, coloring requires
communicating with the console synchronously as data is written to stdout.
This is anathema to how ripgrep fundamentally works: by writing search results
to intermediate thread local buffers in order to maximize parallelism.
Eliminating parallelism on Windows isn't an option, because that would negate
a tremendous performance benefit just for coloring.
We've worked around this by providing an implementation of `term::Terminal`
that records precisely where a color or a reset should be invoked, according
to a byte offset in the in memory buffer. When the buffer is actually printed,
we copy the bytes from the buffer to stdout incrementally while invoking the
corresponding console APIs for coloring at the right location.
(Another approach would be to do ANSI coloring unconditionally, then parse that
and translate it to console commands. The advantage of that approach is that
it doesn't require any additional memory for storing offsets. In practice
though, coloring is only used in the terminal, which tends to correspond to
searches that produce very few results with respect to the corpus searched.
Therefore, this is an acceptable trade off. Namely, we do not pay for it when
coloring is disabled.
*/
use std::io;
use term::{self, Terminal};
use term::color::Color;
/// An in-memory buffer that provides Windows console coloring.
#[derive(Clone, Debug)]
pub struct WindowsBuffer {
buf: Vec<u8>,
pos: usize,
colors: Vec<WindowsColor>,
}
/// A color associated with a particular location in a buffer.
#[derive(Clone, Debug)]
struct WindowsColor {
pos: usize,
opt: WindowsOption,
}
/// A color or reset directive that can be translated into an instruction to
/// the Windows console.
#[derive(Clone, Debug)]
enum WindowsOption {
Foreground(Color),
Background(Color),
Reset,
}
impl WindowsBuffer {
/// Create a new empty buffer for Windows console coloring.
pub fn new() -> WindowsBuffer {
WindowsBuffer {
buf: vec![],
pos: 0,
colors: vec![],
}
}
fn push(&mut self, opt: WindowsOption) {
let pos = self.pos;
self.colors.push(WindowsColor { pos: pos, opt: opt });
}
/// Print the contents to the given terminal.
pub fn print_stdout<T: Terminal + Send>(&self, tt: &mut T) {
if !tt.supports_color() {
let _ = tt.write_all(&self.buf);
let _ = tt.flush();
return;
}
let mut last = 0;
for col in &self.colors {
let _ = tt.write_all(&self.buf[last..col.pos]);
match col.opt {
WindowsOption::Foreground(c) => {
let _ = tt.fg(c);
}
WindowsOption::Background(c) => {
let _ = tt.bg(c);
}
WindowsOption::Reset => {
let _ = tt.reset();
}
}
last = col.pos;
}
let _ = tt.write_all(&self.buf[last..]);
let _ = tt.flush();
}
/// Clear the buffer.
pub fn clear(&mut self) {
self.buf.clear();
self.colors.clear();
self.pos = 0;
}
}
impl io::Write for WindowsBuffer {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
let n = try!(self.buf.write(buf));
self.pos += n;
Ok(n)
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
impl Terminal for WindowsBuffer {
type Output = Vec<u8>;
fn fg(&mut self, fg: Color) -> term::Result<()> {
self.push(WindowsOption::Foreground(fg));
Ok(())
}
fn bg(&mut self, bg: Color) -> term::Result<()> {
self.push(WindowsOption::Background(bg));
Ok(())
}
fn attr(&mut self, _attr: term::Attr) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn supports_attr(&self, _attr: term::Attr) -> bool {
false
}
fn reset(&mut self) -> term::Result<()> {
self.push(WindowsOption::Reset);
Ok(())
}
fn supports_reset(&self) -> bool {
true
}
fn supports_color(&self) -> bool {
true
}
fn cursor_up(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn delete_line(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn carriage_return(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn get_ref(&self) -> &Vec<u8> {
&self.buf
}
fn get_mut(&mut self) -> &mut Vec<u8> {
&mut self.buf
}
fn into_inner(self) -> Vec<u8> {
self.buf
}
}

View File

@@ -151,8 +151,8 @@ impl FileTypeDef {
/// Types is a file type matcher.
#[derive(Clone, Debug)]
pub struct Types {
selected: Option<glob::Set>,
negated: Option<glob::Set>,
selected: Option<glob::SetYesNo>,
negated: Option<glob::SetYesNo>,
has_selected: bool,
unmatched_pat: Pattern,
}
@@ -165,8 +165,8 @@ impl Types {
/// If has_selected is true, then at least one file type was selected.
/// Therefore, any non-matches should be ignored.
fn new(
selected: Option<glob::Set>,
negated: Option<glob::Set>,
selected: Option<glob::SetYesNo>,
negated: Option<glob::SetYesNo>,
has_selected: bool,
) -> Types {
Types {
@@ -216,7 +216,7 @@ impl Types {
if self.negated.as_ref().map(|s| s.is_match(&*name)).unwrap_or(false) {
return Match::Ignored(&self.unmatched_pat);
}
if self.selected.as_ref().map(|s| s.is_match(&*name)).unwrap_or(false) {
if self.selected.as_ref().map(|s|s.is_match(&*name)).unwrap_or(false) {
return Match::Whitelist(&self.unmatched_pat);
}
if self.has_selected {
@@ -268,7 +268,7 @@ impl TypesBuilder {
try!(bset.add_with(glob, &opts));
}
}
Some(try!(bset.build()))
Some(try!(bset.build_yesno()))
};
let negated_globs =
if self.negated.is_empty() {
@@ -287,7 +287,7 @@ impl TypesBuilder {
try!(bset.add_with(glob, &opts));
}
}
Some(try!(bset.build()))
Some(try!(bset.build_yesno()))
};
Ok(Types::new(
selected_globs, negated_globs, !self.selected.is_empty()))

View File

@@ -26,6 +26,7 @@ impl Iter {
}
/// Returns true if this entry should be skipped.
#[inline(always)]
fn skip_entry(&self, ent: &DirEntry) -> bool {
if ent.depth() == 0 {
// Never skip the root directory.
@@ -41,6 +42,7 @@ impl Iter {
impl Iterator for Iter {
type Item = DirEntry;
#[inline(always)]
fn next(&mut self) -> Option<DirEntry> {
while let Some(ev) = self.it.next() {
match ev {
@@ -108,6 +110,7 @@ impl From<WalkDir> for WalkEventIter {
impl Iterator for WalkEventIter {
type Item = walkdir::Result<WalkEvent>;
#[inline(always)]
fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
let dent = self.next.take().or_else(|| self.it.next());
let depth = match dent {
@@ -135,8 +138,3 @@ impl Iterator for WalkEventIter {
}
}
}
fn is_hidden(ent: &DirEntry) -> bool {
ent.depth() > 0 &&
ent.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false)
}