mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-08-02 13:11:58 -07:00
Compare commits
20 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
fe84928c85 | ||
|
f7eaf67fc3 | ||
|
c1c92e4fee | ||
|
5644bbe43a | ||
|
aeb3a5ba0f | ||
|
24e14a0341 | ||
|
2a2b1506d4 | ||
|
4d6b3c727e | ||
|
c2bf9e3d45 | ||
|
dad73b92eb | ||
|
b0d8ff6f4a | ||
|
0263a401f6 | ||
|
4cb1b9ccc0 | ||
|
6f80e2e126 | ||
|
f9bff90842 | ||
|
5af4ec0056 | ||
|
9e2f10b893 | ||
|
69095cf5c3 | ||
|
7402db7b43 | ||
|
7698b60256 |
@@ -15,9 +15,6 @@ matrix:
|
|||||||
- os: linux
|
- os: linux
|
||||||
rust: nightly
|
rust: nightly
|
||||||
env: TARGET=x86_64-unknown-linux-musl
|
env: TARGET=x86_64-unknown-linux-musl
|
||||||
- os: linux
|
|
||||||
rust: nightly
|
|
||||||
env: TARGET=x86_64-unknown-linux-gnu
|
|
||||||
- os: osx
|
- os: osx
|
||||||
rust: nightly
|
rust: nightly
|
||||||
env: TARGET=i686-apple-darwin
|
env: TARGET=i686-apple-darwin
|
||||||
|
6
Cargo.lock
generated
6
Cargo.lock
generated
@@ -1,13 +1,13 @@
|
|||||||
[root]
|
[root]
|
||||||
name = "ripgrep"
|
name = "ripgrep"
|
||||||
version = "0.1.5"
|
version = "0.1.11"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"docopt 0.6.83 (registry+https://github.com/rust-lang/crates.io-index)",
|
"docopt 0.6.83 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"grep 0.1.1",
|
"grep 0.1.2",
|
||||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
@@ -80,7 +80,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "grep"
|
name = "grep"
|
||||||
version = "0.1.1"
|
version = "0.1.2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "ripgrep"
|
name = "ripgrep"
|
||||||
version = "0.1.5" #:version
|
version = "0.1.11" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
Line oriented search tool using Rust's regex library. Combines the raw
|
Line oriented search tool using Rust's regex library. Combines the raw
|
||||||
@@ -27,14 +27,14 @@ deque = "0.3"
|
|||||||
docopt = "0.6"
|
docopt = "0.6"
|
||||||
env_logger = "0.3"
|
env_logger = "0.3"
|
||||||
fnv = "1.0"
|
fnv = "1.0"
|
||||||
grep = { version = "0.1.1", path = "grep" }
|
grep = { version = "0.1.2", path = "grep" }
|
||||||
lazy_static = "0.2"
|
lazy_static = "0.2"
|
||||||
libc = "0.2"
|
libc = "0.2"
|
||||||
log = "0.3"
|
log = "0.3"
|
||||||
memchr = "0.1"
|
memchr = "0.1"
|
||||||
memmap = "0.2"
|
memmap = "0.2"
|
||||||
num_cpus = "1"
|
num_cpus = "1"
|
||||||
regex = "0.1.76"
|
regex = "0.1.77"
|
||||||
rustc-serialize = "0.3"
|
rustc-serialize = "0.3"
|
||||||
term = "0.4"
|
term = "0.4"
|
||||||
walkdir = "0.1"
|
walkdir = "0.1"
|
||||||
|
@@ -18,8 +18,8 @@ Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
|||||||
### Quick example comparing tools
|
### Quick example comparing tools
|
||||||
|
|
||||||
This example searches the entire Linux kernel source tree (after running
|
This example searches the entire Linux kernel source tree (after running
|
||||||
`make`) for `[A-Z]+_SUSPEND`, where all matches must be words. Timings were
|
`make defconfig && make -j8`) for `[A-Z]+_SUSPEND`, where all matches must be
|
||||||
collected on a system with an Intel i7-6900K 3.2 GHz.
|
words. Timings were collected on a system with an Intel i7-6900K 3.2 GHz.
|
||||||
|
|
||||||
Please remember that a single benchmark is never enough! See my
|
Please remember that a single benchmark is never enough! See my
|
||||||
[blog post on `ripgrep`](http://blog.burntsushi.net/ripgrep/)
|
[blog post on `ripgrep`](http://blog.burntsushi.net/ripgrep/)
|
||||||
@@ -57,6 +57,9 @@ for a very detailed comparison with more benchmarks and analysis.
|
|||||||
color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while
|
color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while
|
||||||
supporting Unicode (which is always on).
|
supporting Unicode (which is always on).
|
||||||
|
|
||||||
|
In other words, use `ripgrep` if you like speed, sane defaults, fewer bugs and
|
||||||
|
Unicode.
|
||||||
|
|
||||||
### Is it really faster than everything else?
|
### Is it really faster than everything else?
|
||||||
|
|
||||||
Yes. A large number of benchmarks with detailed analysis for each is
|
Yes. A large number of benchmarks with detailed analysis for each is
|
||||||
@@ -96,6 +99,14 @@ but you'll need to have the
|
|||||||
Tools](http://landinghub.visualstudio.com/visual-cpp-build-tools)
|
Tools](http://landinghub.visualstudio.com/visual-cpp-build-tools)
|
||||||
installed.
|
installed.
|
||||||
|
|
||||||
|
If you're a `brew` user, then you can install it with a custom formula
|
||||||
|
(N.B. `ripgrep` isn't actually in Homebrew yet. This just installs the binary
|
||||||
|
directly):
|
||||||
|
|
||||||
|
```
|
||||||
|
$ brew install https://raw.githubusercontent.com/BurntSushi/ripgrep/master/pkg/brew/ripgrep.rb
|
||||||
|
```
|
||||||
|
|
||||||
If you're a Rust programmer, `ripgrep` can be installed with `cargo`:
|
If you're a Rust programmer, `ripgrep` can be installed with `cargo`:
|
||||||
|
|
||||||
```
|
```
|
||||||
@@ -120,7 +131,7 @@ simply not work on UTF-16 encoded files or other more exotic encodings.
|
|||||||
happen.](https://github.com/BurntSushi/ripgrep/issues/1)
|
happen.](https://github.com/BurntSushi/ripgrep/issues/1)
|
||||||
|
|
||||||
To recursively search the current directory, while respecting all `.gitignore`
|
To recursively search the current directory, while respecting all `.gitignore`
|
||||||
files:
|
files, ignore hidden files and directories and skip binary files:
|
||||||
|
|
||||||
```
|
```
|
||||||
$ rg foobar
|
$ rg foobar
|
||||||
@@ -131,10 +142,14 @@ directories. `.rgignore` files can be used when `.gitignore` files are
|
|||||||
insufficient. In all cases, `.rgignore` patterns take precedence over
|
insufficient. In all cases, `.rgignore` patterns take precedence over
|
||||||
`.gitignore`.
|
`.gitignore`.
|
||||||
|
|
||||||
To ignore all ignore files, use `--no-ignore`:
|
To ignore all ignore files, use `-u`. To additionally search hidden files
|
||||||
|
and directories, use `-uu`. To additionally search binary files, use `-uuu`.
|
||||||
|
(In other words, "search everything, dammit!") In particular, `rg -uuu` is
|
||||||
|
equivalent to `grep -a -r`.
|
||||||
|
|
||||||
```
|
```
|
||||||
$ rg --no-ignore foobar
|
$ rg -uu foobar # equivalent to `grep -r`
|
||||||
|
$ rg -uuu foobar # equivalent to `grep -a -r`
|
||||||
```
|
```
|
||||||
|
|
||||||
(Tip: If your ignore files aren't being adhered to like you expect, run your
|
(Tip: If your ignore files aren't being adhered to like you expect, run your
|
||||||
|
23
appveyor.yml
23
appveyor.yml
@@ -2,27 +2,22 @@ environment:
|
|||||||
global:
|
global:
|
||||||
PROJECT_NAME: ripgrep
|
PROJECT_NAME: ripgrep
|
||||||
matrix:
|
matrix:
|
||||||
# Nightly channel
|
|
||||||
- TARGET: i686-pc-windows-gnu
|
- TARGET: i686-pc-windows-gnu
|
||||||
CHANNEL: nightly
|
CHANNEL: stable
|
||||||
- TARGET: i686-pc-windows-msvc
|
- TARGET: i686-pc-windows-msvc
|
||||||
CHANNEL: nightly
|
CHANNEL: stable
|
||||||
- TARGET: x86_64-pc-windows-gnu
|
- TARGET: x86_64-pc-windows-gnu
|
||||||
CHANNEL: nightly
|
CHANNEL: stable
|
||||||
- TARGET: x86_64-pc-windows-msvc
|
- TARGET: x86_64-pc-windows-msvc
|
||||||
CHANNEL: nightly
|
CHANNEL: stable
|
||||||
|
|
||||||
# Install Rust and Cargo
|
# Install Rust and Cargo
|
||||||
# (Based on from https://github.com/rust-lang/libc/blob/master/appveyor.yml)
|
# (Based on from https://github.com/rust-lang/libc/blob/master/appveyor.yml)
|
||||||
install:
|
install:
|
||||||
- ps: Start-FileDownload "https://static.rust-lang.org/dist/channel-rust-stable"
|
- curl -sSf -o rustup-init.exe https://win.rustup.rs/
|
||||||
- ps: $env:RUST_VERSION = Get-Content channel-rust-stable | select -first 1 | %{$_.split('-')[1]}
|
- rustup-init.exe -y --default-host %TARGET%
|
||||||
- if NOT "%CHANNEL%" == "stable" set RUST_VERSION=%CHANNEL%
|
- set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
|
||||||
- ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-${env:RUST_VERSION}-${env:TARGET}.exe"
|
- if defined MSYS2_BITS set PATH=%PATH%;C:\msys64\mingw%MSYS2_BITS%\bin
|
||||||
- rust-%RUST_VERSION%-%TARGET%.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust"
|
|
||||||
- SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin
|
|
||||||
- if "%TARGET%" == "i686-pc-windows-gnu" set PATH=%PATH%;C:\msys64\mingw32\bin
|
|
||||||
- if "%TARGET%" == "x86_64-pc-windows-gnu" set PATH=%PATH%;C:\msys64\mingw64\bin
|
|
||||||
- rustc -V
|
- rustc -V
|
||||||
- cargo -V
|
- cargo -V
|
||||||
|
|
||||||
@@ -57,7 +52,7 @@ deploy:
|
|||||||
# channel to use to produce the release artifacts
|
# channel to use to produce the release artifacts
|
||||||
# NOTE make sure you only release *once* per target
|
# NOTE make sure you only release *once* per target
|
||||||
# TODO you may want to pick a different channel
|
# TODO you may want to pick a different channel
|
||||||
CHANNEL: nightly
|
CHANNEL: stable
|
||||||
appveyor_repo_tag: true
|
appveyor_repo_tag: true
|
||||||
|
|
||||||
branches:
|
branches:
|
||||||
|
@@ -132,6 +132,7 @@ def bench_linux_literal_casei(suite_dir):
|
|||||||
mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]),
|
mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]),
|
||||||
mkcmd('rg (ignore) (mmap)', ['rg', '-n', '-i', '--mmap', pat]),
|
mkcmd('rg (ignore) (mmap)', ['rg', '-n', '-i', '--mmap', pat]),
|
||||||
mkcmd('ag (ignore) (mmap)', ['ag', '-i', pat]),
|
mkcmd('ag (ignore) (mmap)', ['ag', '-i', pat]),
|
||||||
|
mkcmd('pt (ignore)', ['pt', '-i', pat]),
|
||||||
mkcmd('sift (ignore)', SIFT + ['-n', '-i', '--git', pat]),
|
mkcmd('sift (ignore)', SIFT + ['-n', '-i', '--git', pat]),
|
||||||
# It'd technically be more appropriate to set LC_ALL=en_US.UTF-8 here,
|
# It'd technically be more appropriate to set LC_ALL=en_US.UTF-8 here,
|
||||||
# since that is certainly what ripgrep is doing, but this is for an
|
# since that is certainly what ripgrep is doing, but this is for an
|
||||||
@@ -165,6 +166,7 @@ def bench_linux_re_literal_suffix(suite_dir):
|
|||||||
return Benchmark(pattern=pat, commands=[
|
return Benchmark(pattern=pat, commands=[
|
||||||
mkcmd('rg (ignore)', ['rg', '-n', pat]),
|
mkcmd('rg (ignore)', ['rg', '-n', pat]),
|
||||||
mkcmd('ag (ignore)', ['ag', '-s', pat]),
|
mkcmd('ag (ignore)', ['ag', '-s', pat]),
|
||||||
|
mkcmd('pt (ignore)', ['pt', '-e', pat]),
|
||||||
mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
|
mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
|
||||||
mkcmd(
|
mkcmd(
|
||||||
'git grep (ignore)',
|
'git grep (ignore)',
|
||||||
@@ -194,6 +196,7 @@ def bench_linux_word(suite_dir):
|
|||||||
return Benchmark(pattern=pat, commands=[
|
return Benchmark(pattern=pat, commands=[
|
||||||
mkcmd('rg (ignore)', ['rg', '-n', '-w', pat]),
|
mkcmd('rg (ignore)', ['rg', '-n', '-w', pat]),
|
||||||
mkcmd('ag (ignore)', ['ag', '-s', '-w', pat]),
|
mkcmd('ag (ignore)', ['ag', '-s', '-w', pat]),
|
||||||
|
mkcmd('pt (ignore)', ['pt', '-w', pat]),
|
||||||
mkcmd('sift (ignore)', SIFT + ['-n', '-w', '--git', pat]),
|
mkcmd('sift (ignore)', SIFT + ['-n', '-w', '--git', pat]),
|
||||||
mkcmd(
|
mkcmd(
|
||||||
'git grep (ignore)',
|
'git grep (ignore)',
|
||||||
@@ -224,6 +227,7 @@ def bench_linux_unicode_greek(suite_dir):
|
|||||||
|
|
||||||
return Benchmark(pattern=pat, commands=[
|
return Benchmark(pattern=pat, commands=[
|
||||||
mkcmd('rg', ['rg', '-n', pat]),
|
mkcmd('rg', ['rg', '-n', pat]),
|
||||||
|
mkcmd('pt', ['pt', '-e', pat]),
|
||||||
mkcmd('sift', SIFT + ['-n', '--git', pat]),
|
mkcmd('sift', SIFT + ['-n', '--git', pat]),
|
||||||
])
|
])
|
||||||
|
|
||||||
@@ -244,6 +248,7 @@ def bench_linux_unicode_greek_casei(suite_dir):
|
|||||||
|
|
||||||
return Benchmark(pattern=pat, commands=[
|
return Benchmark(pattern=pat, commands=[
|
||||||
mkcmd('rg', ['rg', '-n', '-i', pat]),
|
mkcmd('rg', ['rg', '-n', '-i', pat]),
|
||||||
|
mkcmd('pt', ['pt', '-i', '-e', pat]),
|
||||||
mkcmd('sift', SIFT + ['-n', '-i', '--git', pat]),
|
mkcmd('sift', SIFT + ['-n', '-i', '--git', pat]),
|
||||||
])
|
])
|
||||||
|
|
||||||
@@ -268,7 +273,8 @@ def bench_linux_unicode_word(suite_dir):
|
|||||||
mkcmd('rg (ignore)', ['rg', '-n', pat]),
|
mkcmd('rg (ignore)', ['rg', '-n', pat]),
|
||||||
mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
|
mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
|
||||||
mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
|
mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
|
||||||
mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', pat]),
|
mkcmd('pt (ignore) (ASCII)', ['pt', '-e', pat]),
|
||||||
|
mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', '--git', pat]),
|
||||||
mkcmd(
|
mkcmd(
|
||||||
'git grep (ignore)',
|
'git grep (ignore)',
|
||||||
['git', 'grep', '-E', '-I', '-n', pat],
|
['git', 'grep', '-E', '-I', '-n', pat],
|
||||||
@@ -308,7 +314,8 @@ def bench_linux_no_literal(suite_dir):
|
|||||||
mkcmd('rg (ignore)', ['rg', '-n', pat]),
|
mkcmd('rg (ignore)', ['rg', '-n', pat]),
|
||||||
mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
|
mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
|
||||||
mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
|
mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
|
||||||
mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', pat]),
|
mkcmd('pt (ignore) (ASCII)', ['pt', '-e', pat]),
|
||||||
|
mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', '--git', pat]),
|
||||||
mkcmd(
|
mkcmd(
|
||||||
'git grep (ignore)',
|
'git grep (ignore)',
|
||||||
['git', 'grep', '-E', '-I', '-n', pat],
|
['git', 'grep', '-E', '-I', '-n', pat],
|
||||||
@@ -1125,7 +1132,8 @@ def download(suite_dir, choices):
|
|||||||
|
|
||||||
|
|
||||||
def collect_benchmarks(suite_dir, filter_pat=None,
|
def collect_benchmarks(suite_dir, filter_pat=None,
|
||||||
allow_missing_commands=False):
|
allow_missing_commands=False,
|
||||||
|
warmup_iter=1, bench_iter=3):
|
||||||
'''
|
'''
|
||||||
Return an iterable of all runnable benchmarks.
|
Return an iterable of all runnable benchmarks.
|
||||||
|
|
||||||
@@ -1148,6 +1156,8 @@ def collect_benchmarks(suite_dir, filter_pat=None,
|
|||||||
try:
|
try:
|
||||||
benchmark = globals()[fun](suite_dir)
|
benchmark = globals()[fun](suite_dir)
|
||||||
benchmark.name = name
|
benchmark.name = name
|
||||||
|
benchmark.warmup_count = warmup_iter
|
||||||
|
benchmark.count = bench_iter
|
||||||
benchmark.allow_missing_commands = allow_missing_commands
|
benchmark.allow_missing_commands = allow_missing_commands
|
||||||
benchmark.raise_if_missing()
|
benchmark.raise_if_missing()
|
||||||
except MissingDependencies as e:
|
except MissingDependencies as e:
|
||||||
@@ -1157,7 +1167,6 @@ def collect_benchmarks(suite_dir, filter_pat=None,
|
|||||||
name,
|
name,
|
||||||
' '.join(['--download %s' % n for n in e.missing_names]),
|
' '.join(['--download %s' % n for n in e.missing_names]),
|
||||||
))
|
))
|
||||||
continue
|
|
||||||
except MissingCommands as e:
|
except MissingCommands as e:
|
||||||
fmt = 'missing commands: %s, skipping benchmark %s ' \
|
fmt = 'missing commands: %s, skipping benchmark %s ' \
|
||||||
'(run with --allow-missing to run incomplete benchmarks)'
|
'(run with --allow-missing to run incomplete benchmarks)'
|
||||||
@@ -1194,6 +1203,14 @@ def main():
|
|||||||
'--raw', metavar='PATH',
|
'--raw', metavar='PATH',
|
||||||
help='Dump raw data (all samples collected) in CSV format to the '
|
help='Dump raw data (all samples collected) in CSV format to the '
|
||||||
'file path provided.')
|
'file path provided.')
|
||||||
|
p.add_argument(
|
||||||
|
'--warmup-iter', metavar='INTEGER', type=int, default=1,
|
||||||
|
help='The number of iterations to run each command before '
|
||||||
|
'recording measurements.')
|
||||||
|
p.add_argument(
|
||||||
|
'--bench-iter', metavar='INTEGER', type=int, default=3,
|
||||||
|
help='The number of iterations to run each command while '
|
||||||
|
'recording measurements.')
|
||||||
p.add_argument(
|
p.add_argument(
|
||||||
'bench', metavar='PAT', nargs='?',
|
'bench', metavar='PAT', nargs='?',
|
||||||
help='A regex pattern that will only run benchmarks that match.')
|
help='A regex pattern that will only run benchmarks that match.')
|
||||||
@@ -1202,7 +1219,8 @@ def main():
|
|||||||
if args.list:
|
if args.list:
|
||||||
benchmarks = collect_benchmarks(
|
benchmarks = collect_benchmarks(
|
||||||
args.dir, filter_pat=args.bench,
|
args.dir, filter_pat=args.bench,
|
||||||
allow_missing_commands=args.allow_missing)
|
allow_missing_commands=args.allow_missing,
|
||||||
|
warmup_iter=args.warmup_iter, bench_iter=args.bench_iter)
|
||||||
for b in benchmarks:
|
for b in benchmarks:
|
||||||
print(b.name)
|
print(b.name)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
@@ -1227,7 +1245,8 @@ def main():
|
|||||||
|
|
||||||
benchmarks = collect_benchmarks(
|
benchmarks = collect_benchmarks(
|
||||||
args.dir, filter_pat=args.bench,
|
args.dir, filter_pat=args.bench,
|
||||||
allow_missing_commands=args.allow_missing)
|
allow_missing_commands=args.allow_missing,
|
||||||
|
warmup_iter=args.warmup_iter, bench_iter=args.bench_iter)
|
||||||
for i, b in enumerate(benchmarks):
|
for i, b in enumerate(benchmarks):
|
||||||
result = b.run()
|
result = b.run()
|
||||||
fastest_cmd = result.fastest_cmd()
|
fastest_cmd = result.fastest_cmd()
|
||||||
|
93
benchsuite/runs/2016-09-20-ubuntu1604-ec2/README.SETUP
Normal file
93
benchsuite/runs/2016-09-20-ubuntu1604-ec2/README.SETUP
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
Ubuntu 16.04 HVM AMI
|
||||||
|
c3.2xlarge, Xeon E5-2680, 2.8 GHz, 8 CPUs, 16 GB memory, 80 GB SSD
|
||||||
|
|
||||||
|
# Generic system setup
|
||||||
|
|
||||||
|
mkfs.ext4 /dev/xvdb
|
||||||
|
sudo mount /dev/xvdb /mnt
|
||||||
|
sudo chown ubuntu /mnt
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install \ # for building Linux kernel
|
||||||
|
make gcc bc
|
||||||
|
sudo apt-get install \ # for the silver searcher
|
||||||
|
automake pkg-config zlib1g-dev liblzma-dev libpcre3 libpcre3-dev
|
||||||
|
sudo apt-get install \ # for Universal Code Grep
|
||||||
|
libtool libpcre2-8-0 libpcre2-dev
|
||||||
|
sudo apt-get install \ # for sift and the platinum searcher
|
||||||
|
go
|
||||||
|
|
||||||
|
# Get benchmark corpora
|
||||||
|
|
||||||
|
cd /mnt
|
||||||
|
mkdir /mnt/bench
|
||||||
|
git clone git://github.com/BurntSushi/ripgrep
|
||||||
|
cd ripgrep/benchsuite
|
||||||
|
./benchsuite --dir /mnt/bench/ --download all # takes around 15 minutes
|
||||||
|
|
||||||
|
# Install search tools
|
||||||
|
mkdir /mnt/bin/
|
||||||
|
|
||||||
|
## ripgrep
|
||||||
|
|
||||||
|
cd /mnt
|
||||||
|
mkdir ripgrep-bin
|
||||||
|
cd ripgrep-bin
|
||||||
|
curl -LO 'https://github.com/BurntSushi/ripgrep/releases/download/0.1.2/ripgrep-0.1.2-x86_64-unknown-linux-musl.tar.gz'
|
||||||
|
cp ripgrep-0.1.2-x86_64-unknown-linux-musl/rg /mnt/bin/
|
||||||
|
|
||||||
|
## The Silver Searcher
|
||||||
|
|
||||||
|
cd /mnt
|
||||||
|
git clone git://github.com/ggreer/the_silver_searcher
|
||||||
|
cd the_silver_searcher
|
||||||
|
git checkout cda635
|
||||||
|
./build.sh
|
||||||
|
cp ag /mnt/bin/
|
||||||
|
|
||||||
|
## Universal Code Grep
|
||||||
|
|
||||||
|
cd /mnt
|
||||||
|
git clone git://github.com/gvansickle/ucg
|
||||||
|
cd ucg
|
||||||
|
git checkout 487bfb
|
||||||
|
autoreconf -i
|
||||||
|
./configure
|
||||||
|
make
|
||||||
|
cp ucg /mnt/bin/
|
||||||
|
|
||||||
|
## The Platinum Searcher
|
||||||
|
|
||||||
|
export GOPATH=/mnt/go
|
||||||
|
go get github.com/monochromegane/the_platinum_searcher
|
||||||
|
cd /mnt/go/src/github.com/monochromegane/the_platinum_searcher
|
||||||
|
git checkout 509368
|
||||||
|
go install github.com/monochromegane/the_platinum_searcher/cmd/...
|
||||||
|
cp /mnt/go/bin/pt /mnt/bin/
|
||||||
|
|
||||||
|
## Sift
|
||||||
|
|
||||||
|
export GOPATH=/mnt/go
|
||||||
|
go get github.com/svent/sift
|
||||||
|
cd /mnt/go/src/github.com/svent/sift
|
||||||
|
git checkout 2d175c
|
||||||
|
go install
|
||||||
|
cp /mnt/go/bin/sift /mnt/bin/
|
||||||
|
|
||||||
|
## 'git grep' and GNU grep
|
||||||
|
|
||||||
|
They are part of the standard Ubuntu install, and are pretty recent (as of
|
||||||
|
September 2016).
|
||||||
|
|
||||||
|
$ git --version
|
||||||
|
git version 2.7.4
|
||||||
|
$ grep --version
|
||||||
|
grep (GNU grep) 2.25
|
||||||
|
|
||||||
|
|
||||||
|
# Running benchmarks
|
||||||
|
|
||||||
|
export PATH="/mnt/bin:$PATH"
|
||||||
|
cd /mnt/ripgrep/benchsuite
|
||||||
|
./benchsuite \
|
||||||
|
--dir /mnt/bench/ --raw /mnt/bench/raw.csv --warmup-iter 3 --bench-iter 10
|
||||||
|
# The above took around 120 minutes to run to completion.
|
1591
benchsuite/runs/2016-09-20-ubuntu1604-ec2/raw.csv
Normal file
1591
benchsuite/runs/2016-09-20-ubuntu1604-ec2/raw.csv
Normal file
File diff suppressed because it is too large
Load Diff
233
benchsuite/runs/2016-09-20-ubuntu1604-ec2/summary
Normal file
233
benchsuite/runs/2016-09-20-ubuntu1604-ec2/summary
Normal file
@@ -0,0 +1,233 @@
|
|||||||
|
linux_alternates (pattern: ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT)
|
||||||
|
-------------------------------------------------------------------------
|
||||||
|
rg (ignore) 0.351 +/- 0.074 (lines: 68)
|
||||||
|
ag (ignore) 1.747 +/- 0.005 (lines: 68)
|
||||||
|
git grep (ignore) 0.501 +/- 0.003 (lines: 68)
|
||||||
|
rg (whitelist)* 0.216 +/- 0.031 (lines: 68)
|
||||||
|
ucg (whitelist) 0.214 +/- 0.008 (lines: 68)*
|
||||||
|
|
||||||
|
linux_alternates_casei (pattern: ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT)
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
rg (ignore) 0.391 +/- 0.078 (lines: 160)
|
||||||
|
ag (ignore) 1.968 +/- 0.009 (lines: 160)
|
||||||
|
git grep (ignore) 2.018 +/- 0.006 (lines: 160)
|
||||||
|
rg (whitelist)* 0.222 +/- 0.001 (lines: 160)*
|
||||||
|
ucg (whitelist) 0.522 +/- 0.002 (lines: 160)
|
||||||
|
|
||||||
|
linux_literal (pattern: PM_RESUME)
|
||||||
|
----------------------------------
|
||||||
|
rg (ignore) 0.334 +/- 0.053 (lines: 16)
|
||||||
|
rg (ignore) (mmap) 1.611 +/- 0.009 (lines: 16)
|
||||||
|
ag (ignore) (mmap) 1.588 +/- 0.011 (lines: 16)
|
||||||
|
pt (ignore) 0.456 +/- 0.025 (lines: 16)
|
||||||
|
sift (ignore) 0.630 +/- 0.004 (lines: 16)
|
||||||
|
git grep (ignore) 0.345 +/- 0.007 (lines: 16)
|
||||||
|
rg (whitelist)* 0.228 +/- 0.042 (lines: 16)
|
||||||
|
ucg (whitelist) 0.218 +/- 0.007 (lines: 16)*
|
||||||
|
|
||||||
|
linux_literal_casei (pattern: PM_RESUME)
|
||||||
|
----------------------------------------
|
||||||
|
rg (ignore) 0.345 +/- 0.073 (lines: 370)
|
||||||
|
rg (ignore) (mmap) 1.612 +/- 0.011 (lines: 370)
|
||||||
|
ag (ignore) (mmap) 1.609 +/- 0.015 (lines: 370)
|
||||||
|
pt (ignore) 17.204 +/- 0.126 (lines: 370)
|
||||||
|
sift (ignore) 0.805 +/- 0.005 (lines: 370)
|
||||||
|
git grep (ignore) 0.343 +/- 0.007 (lines: 370)
|
||||||
|
rg (whitelist)* 0.222 +/- 0.021 (lines: 370)
|
||||||
|
ucg (whitelist) 0.217 +/- 0.006 (lines: 370)*
|
||||||
|
|
||||||
|
linux_literal_default (pattern: PM_RESUME)
|
||||||
|
------------------------------------------
|
||||||
|
rg 0.349 +/- 0.104 (lines: 16)
|
||||||
|
ag 1.589 +/- 0.009 (lines: 16)
|
||||||
|
ucg* 0.218 +/- 0.007 (lines: 16)*
|
||||||
|
pt 0.462 +/- 0.012 (lines: 16)
|
||||||
|
sift 0.352 +/- 0.018 (lines: 16)
|
||||||
|
git grep 0.342 +/- 0.005 (lines: 16)
|
||||||
|
|
||||||
|
linux_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5})
|
||||||
|
-----------------------------------------------------------------
|
||||||
|
rg (ignore) 0.577 +/- 0.003 (lines: 490)
|
||||||
|
rg (ignore) (ASCII) 0.416 +/- 0.025 (lines: 490)
|
||||||
|
ag (ignore) (ASCII) 2.339 +/- 0.010 (lines: 766)
|
||||||
|
pt (ignore) (ASCII) 22.066 +/- 0.057 (lines: 490)
|
||||||
|
sift (ignore) (ASCII) 25.563 +/- 0.108 (lines: 490)
|
||||||
|
git grep (ignore) 26.382 +/- 0.044 (lines: 490)
|
||||||
|
git grep (ignore) (ASCII) 4.153 +/- 0.010 (lines: 490)
|
||||||
|
rg (whitelist) 0.503 +/- 0.011 (lines: 419)
|
||||||
|
rg (whitelist) (ASCII)* 0.343 +/- 0.038 (lines: 419)*
|
||||||
|
ucg (whitelist) (ASCII) 1.130 +/- 0.003 (lines: 416)
|
||||||
|
|
||||||
|
linux_re_literal_suffix (pattern: [A-Z]+_RESUME)
|
||||||
|
------------------------------------------------
|
||||||
|
rg (ignore) 0.318 +/- 0.034 (lines: 1652)
|
||||||
|
ag (ignore) 1.899 +/- 0.008 (lines: 1652)
|
||||||
|
pt (ignore) 13.713 +/- 0.241 (lines: 1652)
|
||||||
|
sift (ignore) 10.172 +/- 0.186 (lines: 1652)
|
||||||
|
git grep (ignore) 1.108 +/- 0.004 (lines: 1652)
|
||||||
|
rg (whitelist)* 0.221 +/- 0.022 (lines: 1630)*
|
||||||
|
ucg (whitelist) 0.301 +/- 0.001 (lines: 1630)
|
||||||
|
|
||||||
|
linux_unicode_greek (pattern: \p{Greek})
|
||||||
|
----------------------------------------
|
||||||
|
rg* 0.414 +/- 0.021 (lines: 23)*
|
||||||
|
pt 12.745 +/- 0.166 (lines: 23)
|
||||||
|
sift 7.767 +/- 0.264 (lines: 23)
|
||||||
|
|
||||||
|
linux_unicode_greek_casei (pattern: \p{Greek})
|
||||||
|
----------------------------------------------
|
||||||
|
rg 0.425 +/- 0.027 (lines: 103)
|
||||||
|
pt 12.612 +/- 0.217 (lines: 23)
|
||||||
|
sift* 0.002 +/- 0.000 (lines: 0)*
|
||||||
|
|
||||||
|
linux_unicode_word (pattern: \wAh)
|
||||||
|
----------------------------------
|
||||||
|
rg (ignore) 0.355 +/- 0.073 (lines: 186)
|
||||||
|
rg (ignore) (ASCII) 0.329 +/- 0.060 (lines: 174)
|
||||||
|
ag (ignore) (ASCII) 1.774 +/- 0.011 (lines: 174)
|
||||||
|
pt (ignore) (ASCII) 14.180 +/- 0.180 (lines: 174)
|
||||||
|
sift (ignore) (ASCII) 11.087 +/- 0.108 (lines: 174)
|
||||||
|
git grep (ignore) 13.045 +/- 0.008 (lines: 186)
|
||||||
|
git grep (ignore) (ASCII) 2.991 +/- 0.004 (lines: 174)
|
||||||
|
rg (whitelist) 0.235 +/- 0.031 (lines: 180)
|
||||||
|
rg (whitelist) (ASCII)* 0.225 +/- 0.023 (lines: 168)*
|
||||||
|
ucg (ASCII) 0.229 +/- 0.007 (lines: 168)
|
||||||
|
|
||||||
|
linux_word (pattern: PM_RESUME)
|
||||||
|
-------------------------------
|
||||||
|
rg (ignore) 0.362 +/- 0.080 (lines: 6)
|
||||||
|
ag (ignore) 1.603 +/- 0.009 (lines: 6)
|
||||||
|
pt (ignore) 14.417 +/- 0.144 (lines: 6)
|
||||||
|
sift (ignore) 7.840 +/- 0.123 (lines: 6)
|
||||||
|
git grep (ignore) 0.341 +/- 0.005 (lines: 6)
|
||||||
|
rg (whitelist)* 0.220 +/- 0.026 (lines: 6)*
|
||||||
|
ucg (whitelist) 0.221 +/- 0.007 (lines: 6)
|
||||||
|
|
||||||
|
subtitles_en_alternate (pattern: Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty)
|
||||||
|
---------------------------------------------------------------------------------------------------------------
|
||||||
|
rg (lines) 0.619 +/- 0.001 (lines: 848)
|
||||||
|
ag (lines) 3.757 +/- 0.001 (lines: 848)
|
||||||
|
ucg (lines) 1.479 +/- 0.002 (lines: 848)
|
||||||
|
grep (lines) 3.412 +/- 0.004 (lines: 848)
|
||||||
|
rg* 0.294 +/- 0.001 (lines: 848)*
|
||||||
|
grep 2.955 +/- 0.003 (lines: 848)
|
||||||
|
|
||||||
|
subtitles_en_alternate_casei (pattern: Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty)
|
||||||
|
---------------------------------------------------------------------------------------------------------------------
|
||||||
|
ag (ASCII) 5.170 +/- 0.004 (lines: 862)
|
||||||
|
ucg (ASCII) 3.453 +/- 0.005 (lines: 862)
|
||||||
|
grep (ASCII) 4.537 +/- 0.025 (lines: 862)
|
||||||
|
rg* 2.724 +/- 0.002 (lines: 862)*
|
||||||
|
grep 5.125 +/- 0.006 (lines: 862)
|
||||||
|
|
||||||
|
subtitles_en_literal (pattern: Sherlock Holmes)
|
||||||
|
-----------------------------------------------
|
||||||
|
rg* 0.269 +/- 0.000 (lines: 629)*
|
||||||
|
pt 3.436 +/- 0.001 (lines: 629)
|
||||||
|
sift 0.327 +/- 0.002 (lines: 629)
|
||||||
|
grep 0.517 +/- 0.001 (lines: 629)
|
||||||
|
rg (lines) 0.596 +/- 0.001 (lines: 629)
|
||||||
|
ag (lines) 2.730 +/- 0.003 (lines: 629)
|
||||||
|
ucg (lines) 0.814 +/- 0.003 (lines: 629)
|
||||||
|
pt (lines) 3.438 +/- 0.004 (lines: 629)
|
||||||
|
sift (lines) 0.759 +/- 0.003 (lines: 629)
|
||||||
|
grep (lines) 0.971 +/- 0.001 (lines: 629)
|
||||||
|
|
||||||
|
subtitles_en_literal_casei (pattern: Sherlock Holmes)
|
||||||
|
-----------------------------------------------------
|
||||||
|
rg* 0.366 +/- 0.001 (lines: 642)*
|
||||||
|
grep 4.084 +/- 0.005 (lines: 642)
|
||||||
|
grep (ASCII) 0.614 +/- 0.001 (lines: 642)
|
||||||
|
rg (lines) 0.696 +/- 0.002 (lines: 642)
|
||||||
|
ag (lines) (ASCII) 2.775 +/- 0.004 (lines: 642)
|
||||||
|
ucg (lines) (ASCII) 0.841 +/- 0.002 (lines: 642)
|
||||||
|
|
||||||
|
subtitles_en_literal_word (pattern: Sherlock Holmes)
|
||||||
|
----------------------------------------------------
|
||||||
|
rg (ASCII) 0.596 +/- 0.001 (lines: 629)
|
||||||
|
ag (ASCII) 2.729 +/- 0.001 (lines: 629)
|
||||||
|
ucg (ASCII) 0.810 +/- 0.002 (lines: 629)
|
||||||
|
grep (ASCII) 0.970 +/- 0.000 (lines: 629)
|
||||||
|
rg* 0.596 +/- 0.001 (lines: 629)*
|
||||||
|
grep 0.972 +/- 0.003 (lines: 629)
|
||||||
|
|
||||||
|
subtitles_en_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5})
|
||||||
|
----------------------------------------------------------------------------------------
|
||||||
|
rg 2.777 +/- 0.003 (lines: 13)
|
||||||
|
rg (ASCII)* 2.541 +/- 0.005 (lines: 13)*
|
||||||
|
ag (ASCII) 10.076 +/- 0.005 (lines: 48)
|
||||||
|
ucg (ASCII) 7.771 +/- 0.004 (lines: 13)
|
||||||
|
grep (ASCII) 4.411 +/- 0.004 (lines: 13)
|
||||||
|
|
||||||
|
subtitles_en_surrounding_words (pattern: \w+\s+Holmes\s+\w+)
|
||||||
|
------------------------------------------------------------
|
||||||
|
rg 0.605 +/- 0.000 (lines: 317)
|
||||||
|
grep 1.286 +/- 0.002 (lines: 317)
|
||||||
|
rg (ASCII)* 0.602 +/- 0.000 (lines: 317)*
|
||||||
|
ag (ASCII) 11.663 +/- 0.008 (lines: 323)
|
||||||
|
ucg (ASCII) 4.690 +/- 0.002 (lines: 317)
|
||||||
|
grep (ASCII) 1.276 +/- 0.002 (lines: 317)
|
||||||
|
|
||||||
|
subtitles_ru_alternate (pattern: Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти)
|
||||||
|
-----------------------------------------------------------------------------------------------------------
|
||||||
|
rg (lines) 1.902 +/- 0.002 (lines: 691)
|
||||||
|
ag (lines) 5.892 +/- 0.003 (lines: 691)
|
||||||
|
ucg (lines) 2.864 +/- 0.006 (lines: 691)
|
||||||
|
grep (lines) 8.511 +/- 0.005 (lines: 691)
|
||||||
|
rg* 1.300 +/- 0.002 (lines: 691)*
|
||||||
|
grep 7.994 +/- 0.017 (lines: 691)
|
||||||
|
|
||||||
|
subtitles_ru_alternate_casei (pattern: Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти)
|
||||||
|
-----------------------------------------------------------------------------------------------------------------
|
||||||
|
ag (ASCII) 5.891 +/- 0.001 (lines: 691)
|
||||||
|
ucg (ASCII)* 2.868 +/- 0.005 (lines: 691)*
|
||||||
|
grep (ASCII) 8.572 +/- 0.009 (lines: 691)
|
||||||
|
rg 4.834 +/- 0.004 (lines: 735)
|
||||||
|
grep 8.729 +/- 0.004 (lines: 735)
|
||||||
|
|
||||||
|
subtitles_ru_literal (pattern: Шерлок Холмс)
|
||||||
|
--------------------------------------------
|
||||||
|
rg* 0.326 +/- 0.001 (lines: 583)*
|
||||||
|
pt 12.922 +/- 0.010 (lines: 583)
|
||||||
|
sift 16.424 +/- 0.010 (lines: 583)
|
||||||
|
grep 0.786 +/- 0.003 (lines: 583)
|
||||||
|
rg (lines) 0.927 +/- 0.002 (lines: 583)
|
||||||
|
ag (lines) 4.481 +/- 0.003 (lines: 583)
|
||||||
|
ucg (lines) 1.897 +/- 0.009 (lines: 583)
|
||||||
|
pt (lines) 12.937 +/- 0.006 (lines: 583)
|
||||||
|
sift (lines) 17.178 +/- 0.008 (lines: 583)
|
||||||
|
grep (lines) 1.301 +/- 0.005 (lines: 583)
|
||||||
|
|
||||||
|
subtitles_ru_literal_casei (pattern: Шерлок Холмс)
|
||||||
|
--------------------------------------------------
|
||||||
|
rg 1.131 +/- 0.001 (lines: 604)
|
||||||
|
grep 8.187 +/- 0.006 (lines: 604)
|
||||||
|
grep (ASCII) 0.785 +/- 0.001 (lines: 583)
|
||||||
|
rg (lines) 1.733 +/- 0.002 (lines: 604)
|
||||||
|
ag (lines) (ASCII)* 0.729 +/- 0.001 (lines: 0)*
|
||||||
|
ucg (lines) (ASCII) 1.896 +/- 0.005 (lines: 583)
|
||||||
|
|
||||||
|
subtitles_ru_literal_word (pattern: Шерлок Холмс)
|
||||||
|
-------------------------------------------------
|
||||||
|
rg (ASCII)* 0.325 +/- 0.000 (lines: 0)*
|
||||||
|
ag (ASCII) 0.753 +/- 0.001 (lines: 0)
|
||||||
|
ucg (ASCII) 1.891 +/- 0.004 (lines: 583)
|
||||||
|
grep (ASCII) 1.303 +/- 0.004 (lines: 583)
|
||||||
|
rg 0.929 +/- 0.001 (lines: 579)
|
||||||
|
grep 1.304 +/- 0.003 (lines: 579)
|
||||||
|
|
||||||
|
subtitles_ru_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5})
|
||||||
|
----------------------------------------------------------------------------------------
|
||||||
|
rg 4.905 +/- 0.003 (lines: 41)
|
||||||
|
rg (ASCII) 3.973 +/- 0.002 (lines: 0)
|
||||||
|
ag (ASCII)* 2.395 +/- 0.004 (lines: 0)*
|
||||||
|
ucg (ASCII) 3.006 +/- 0.005 (lines: 0)
|
||||||
|
grep (ASCII) 2.483 +/- 0.005 (lines: 0)
|
||||||
|
|
||||||
|
subtitles_ru_surrounding_words (pattern: \w+\s+Холмс\s+\w+)
|
||||||
|
-----------------------------------------------------------
|
||||||
|
rg* 0.957 +/- 0.001 (lines: 278)*
|
||||||
|
grep 1.660 +/- 0.002 (lines: 278)
|
||||||
|
ag (ASCII) 2.411 +/- 0.001 (lines: 0)
|
||||||
|
ucg (ASCII) 2.980 +/- 0.002 (lines: 0)
|
||||||
|
grep (ASCII) 1.596 +/- 0.003 (lines: 0)
|
15
ci/sha256.sh
Normal file
15
ci/sha256.sh
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
if [ $# != 1 ]; then
|
||||||
|
echo "Usage: $(basename $0) version" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
version="$1"
|
||||||
|
|
||||||
|
for arch in i686 x86_64; do
|
||||||
|
for target in apple-darwin unknown-linux-musl; do
|
||||||
|
url="https://github.com/BurntSushi/ripgrep/releases/download/$version/ripgrep-$version-$arch-$target.tar.gz"
|
||||||
|
sha=$(curl -L -s "$url" | sha256sum)
|
||||||
|
echo $version-$arch-$target $sha
|
||||||
|
done
|
||||||
|
done
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "grep"
|
name = "grep"
|
||||||
version = "0.1.1" #:version
|
version = "0.1.2" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
Fast line oriented regex searching as a library.
|
Fast line oriented regex searching as a library.
|
||||||
@@ -16,5 +16,5 @@ license = "Unlicense/MIT"
|
|||||||
log = "0.3"
|
log = "0.3"
|
||||||
memchr = "0.1"
|
memchr = "0.1"
|
||||||
memmap = "0.2"
|
memmap = "0.2"
|
||||||
regex = "0.1.76"
|
regex = "0.1.77"
|
||||||
regex-syntax = "0.3.5"
|
regex-syntax = "0.3.5"
|
||||||
|
@@ -19,6 +19,7 @@ pub use search::{Grep, GrepBuilder, Iter, Match};
|
|||||||
mod literals;
|
mod literals;
|
||||||
mod nonl;
|
mod nonl;
|
||||||
mod search;
|
mod search;
|
||||||
|
mod word_boundary;
|
||||||
|
|
||||||
/// Result is a convenient type alias that fixes the type of the error to
|
/// Result is a convenient type alias that fixes the type of the error to
|
||||||
/// the `Error` type defined in this crate.
|
/// the `Error` type defined in this crate.
|
||||||
|
@@ -4,6 +4,8 @@ use syntax;
|
|||||||
|
|
||||||
use literals::LiteralSets;
|
use literals::LiteralSets;
|
||||||
use nonl;
|
use nonl;
|
||||||
|
use syntax::Expr;
|
||||||
|
use word_boundary::strip_unicode_word_boundaries;
|
||||||
use Result;
|
use Result;
|
||||||
|
|
||||||
/// A matched line.
|
/// A matched line.
|
||||||
@@ -127,22 +129,35 @@ impl GrepBuilder {
|
|||||||
pub fn build(self) -> Result<Grep> {
|
pub fn build(self) -> Result<Grep> {
|
||||||
let expr = try!(self.parse());
|
let expr = try!(self.parse());
|
||||||
let literals = LiteralSets::create(&expr);
|
let literals = LiteralSets::create(&expr);
|
||||||
let re = try!(
|
let re = try!(self.regex(&expr));
|
||||||
RegexBuilder::new(&expr.to_string())
|
let required = literals.to_regex().or_else(|| {
|
||||||
.case_insensitive(self.opts.case_insensitive)
|
let expr = match strip_unicode_word_boundaries(&expr) {
|
||||||
.multi_line(true)
|
None => return None,
|
||||||
.unicode(true)
|
Some(expr) => expr,
|
||||||
.size_limit(self.opts.size_limit)
|
};
|
||||||
.dfa_size_limit(self.opts.dfa_size_limit)
|
debug!("Stripped Unicode word boundaries. New AST:\n{:?}", expr);
|
||||||
.compile()
|
self.regex(&expr).ok()
|
||||||
);
|
});
|
||||||
Ok(Grep {
|
Ok(Grep {
|
||||||
re: re,
|
re: re,
|
||||||
required: literals.to_regex(),
|
required: required,
|
||||||
opts: self.opts,
|
opts: self.opts,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Creates a new regex from the given expression with the current
|
||||||
|
/// configuration.
|
||||||
|
fn regex(&self, expr: &Expr) -> Result<Regex> {
|
||||||
|
RegexBuilder::new(&expr.to_string())
|
||||||
|
.case_insensitive(self.opts.case_insensitive)
|
||||||
|
.multi_line(true)
|
||||||
|
.unicode(true)
|
||||||
|
.size_limit(self.opts.size_limit)
|
||||||
|
.dfa_size_limit(self.opts.dfa_size_limit)
|
||||||
|
.compile()
|
||||||
|
.map_err(From::from)
|
||||||
|
}
|
||||||
|
|
||||||
/// Parses the underlying pattern and ensures the pattern can never match
|
/// Parses the underlying pattern and ensures the pattern can never match
|
||||||
/// the line terminator.
|
/// the line terminator.
|
||||||
fn parse(&self) -> Result<syntax::Expr> {
|
fn parse(&self) -> Result<syntax::Expr> {
|
||||||
|
54
grep/src/word_boundary.rs
Normal file
54
grep/src/word_boundary.rs
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
use syntax::Expr;
|
||||||
|
|
||||||
|
/// Strips Unicode word boundaries from the given expression.
|
||||||
|
///
|
||||||
|
/// The key invariant this maintains is that the expression returned will match
|
||||||
|
/// *at least* every where the expression given will match. Namely, a match of
|
||||||
|
/// the returned expression can report false positives but it will never report
|
||||||
|
/// false negatives.
|
||||||
|
///
|
||||||
|
/// If no word boundaries could be stripped, then None is returned.
|
||||||
|
pub fn strip_unicode_word_boundaries(expr: &Expr) -> Option<Expr> {
|
||||||
|
// The real reason we do this is because Unicode word boundaries are the
|
||||||
|
// one thing that Rust's regex DFA engine can't handle. When it sees a
|
||||||
|
// Unicode word boundary among non-ASCII text, it falls back to one of the
|
||||||
|
// slower engines. We work around this limitation by attempting to use
|
||||||
|
// a regex to find candidate matches without a Unicode word boundary. We'll
|
||||||
|
// only then use the full (and slower) regex to confirm a candidate as a
|
||||||
|
// match or not during search.
|
||||||
|
use syntax::Expr::*;
|
||||||
|
|
||||||
|
match *expr {
|
||||||
|
Concat(ref es) if !es.is_empty() => {
|
||||||
|
let first = is_unicode_word_boundary(&es[0]);
|
||||||
|
let last = is_unicode_word_boundary(es.last().unwrap());
|
||||||
|
// Be careful not to strip word boundaries if there are no other
|
||||||
|
// expressions to match.
|
||||||
|
match (first, last) {
|
||||||
|
(true, false) if es.len() > 1 => {
|
||||||
|
Some(Concat(es[1..].to_vec()))
|
||||||
|
}
|
||||||
|
(false, true) if es.len() > 1 => {
|
||||||
|
Some(Concat(es[..es.len() - 1].to_vec()))
|
||||||
|
}
|
||||||
|
(true, true) if es.len() > 2 => {
|
||||||
|
Some(Concat(es[1..es.len() - 1].to_vec()))
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if the given expression is a Unicode word boundary.
|
||||||
|
fn is_unicode_word_boundary(expr: &Expr) -> bool {
|
||||||
|
use syntax::Expr::*;
|
||||||
|
|
||||||
|
match *expr {
|
||||||
|
WordBoundary => true,
|
||||||
|
NotWordBoundary => true,
|
||||||
|
Group { ref e, .. } => is_unicode_word_boundary(e),
|
||||||
|
_ => false,
|
||||||
|
}
|
||||||
|
}
|
18
pkg/brew/ripgrep.rb
Normal file
18
pkg/brew/ripgrep.rb
Normal file
@@ -0,0 +1,18 @@
|
|||||||
|
require 'formula'
|
||||||
|
class Ripgrep < Formula
|
||||||
|
version '0.1.8'
|
||||||
|
desc "Search tool like grep and The Silver Searcher."
|
||||||
|
homepage "https://github.com/BurntSushi/ripgrep"
|
||||||
|
|
||||||
|
if Hardware::CPU.is_64_bit?
|
||||||
|
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz"
|
||||||
|
sha256 "893e0e7fac88ebbef024829466fafef6eae5b1060273bbfca3806090e660b06b"
|
||||||
|
else
|
||||||
|
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-i686-apple-darwin.tar.gz"
|
||||||
|
sha256 "2296c8081a2bfe28b43dea4326a9e8ce9c2821fd628a1ca366e824aceddc5fad"
|
||||||
|
end
|
||||||
|
|
||||||
|
def install
|
||||||
|
bin.install "rg"
|
||||||
|
end
|
||||||
|
end
|
18
src/args.rs
18
src/args.rs
@@ -74,6 +74,12 @@ Common options:
|
|||||||
to list all available types.
|
to list all available types.
|
||||||
-T, --type-not TYPE ... Do not search files matching TYPE. Multiple
|
-T, --type-not TYPE ... Do not search files matching TYPE. Multiple
|
||||||
not-type flags may be provided.
|
not-type flags may be provided.
|
||||||
|
-u, --unrestricted ... Reduce the level of 'smart' searching. A
|
||||||
|
single -u doesn't respect .gitignore (etc.)
|
||||||
|
files. Two -u flags will search hidden files
|
||||||
|
and directories. Three -u flags will search
|
||||||
|
binary files. -uu is equivalent to grep -r,
|
||||||
|
and -uuu is equivalent to grep -a -r.
|
||||||
-v, --invert-match Invert matching.
|
-v, --invert-match Invert matching.
|
||||||
-w, --word-regexp Only show matches surrounded by word boundaries.
|
-w, --word-regexp Only show matches surrounded by word boundaries.
|
||||||
This is equivalent to putting \\b before and
|
This is equivalent to putting \\b before and
|
||||||
@@ -199,6 +205,7 @@ pub struct RawArgs {
|
|||||||
flag_type_list: bool,
|
flag_type_list: bool,
|
||||||
flag_type_add: Vec<String>,
|
flag_type_add: Vec<String>,
|
||||||
flag_type_clear: Vec<String>,
|
flag_type_clear: Vec<String>,
|
||||||
|
flag_unrestricted: u32,
|
||||||
flag_with_filename: bool,
|
flag_with_filename: bool,
|
||||||
flag_word_regexp: bool,
|
flag_word_regexp: bool,
|
||||||
}
|
}
|
||||||
@@ -312,6 +319,9 @@ impl RawArgs {
|
|||||||
.line_terminator(eol)
|
.line_terminator(eol)
|
||||||
.build()
|
.build()
|
||||||
);
|
);
|
||||||
|
let no_ignore = self.flag_no_ignore || self.flag_unrestricted >= 1;
|
||||||
|
let hidden = self.flag_hidden || self.flag_unrestricted >= 2;
|
||||||
|
let text = self.flag_text || self.flag_unrestricted >= 3;
|
||||||
let mut args = Args {
|
let mut args = Args {
|
||||||
pattern: pattern,
|
pattern: pattern,
|
||||||
paths: paths,
|
paths: paths,
|
||||||
@@ -327,18 +337,18 @@ impl RawArgs {
|
|||||||
glob_overrides: glob_overrides,
|
glob_overrides: glob_overrides,
|
||||||
grep: grep,
|
grep: grep,
|
||||||
heading: !self.flag_no_heading && self.flag_heading,
|
heading: !self.flag_no_heading && self.flag_heading,
|
||||||
hidden: self.flag_hidden,
|
hidden: hidden,
|
||||||
ignore_case: self.flag_ignore_case,
|
ignore_case: self.flag_ignore_case,
|
||||||
invert_match: self.flag_invert_match,
|
invert_match: self.flag_invert_match,
|
||||||
line_number: !self.flag_no_line_number && self.flag_line_number,
|
line_number: !self.flag_no_line_number && self.flag_line_number,
|
||||||
mmap: mmap,
|
mmap: mmap,
|
||||||
no_ignore: self.flag_no_ignore,
|
no_ignore: no_ignore,
|
||||||
no_ignore_parent:
|
no_ignore_parent:
|
||||||
// --no-ignore implies --no-ignore-parent
|
// --no-ignore implies --no-ignore-parent
|
||||||
self.flag_no_ignore_parent || self.flag_no_ignore,
|
self.flag_no_ignore_parent || no_ignore,
|
||||||
quiet: self.flag_quiet,
|
quiet: self.flag_quiet,
|
||||||
replace: self.flag_replace.clone().map(|s| s.into_bytes()),
|
replace: self.flag_replace.clone().map(|s| s.into_bytes()),
|
||||||
text: self.flag_text,
|
text: text,
|
||||||
threads: threads,
|
threads: threads,
|
||||||
type_defs: btypes.definitions(),
|
type_defs: btypes.definitions(),
|
||||||
type_list: self.flag_type_list,
|
type_list: self.flag_type_list,
|
||||||
|
10
src/main.rs
10
src/main.rs
@@ -118,15 +118,23 @@ fn run(args: Args) -> Result<u64> {
|
|||||||
}
|
}
|
||||||
workq
|
workq
|
||||||
};
|
};
|
||||||
|
let mut paths_searched: u64 = 0;
|
||||||
for p in paths {
|
for p in paths {
|
||||||
if p == Path::new("-") {
|
if p == Path::new("-") {
|
||||||
workq.push(Work::Stdin)
|
paths_searched += 1;
|
||||||
|
workq.push(Work::Stdin);
|
||||||
} else {
|
} else {
|
||||||
for ent in try!(args.walker(p)) {
|
for ent in try!(args.walker(p)) {
|
||||||
|
paths_searched += 1;
|
||||||
workq.push(Work::File(ent));
|
workq.push(Work::File(ent));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if !paths.is_empty() && paths_searched == 0 {
|
||||||
|
eprintln!("No files were searched, which means ripgrep probably \
|
||||||
|
applied a filter you didn't expect. \
|
||||||
|
Try running again with --debug.");
|
||||||
|
}
|
||||||
for _ in 0..workers.len() {
|
for _ in 0..workers.len() {
|
||||||
workq.push(Work::Quit);
|
workq.push(Work::Quit);
|
||||||
}
|
}
|
||||||
|
@@ -525,6 +525,39 @@ baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
sherlock!(unrestricted1, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||||
|
wd.create(".gitignore", "sherlock\n");
|
||||||
|
cmd.arg("-u");
|
||||||
|
|
||||||
|
let lines: String = wd.stdout(&mut cmd);
|
||||||
|
let expected = "\
|
||||||
|
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||||
|
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||||
|
";
|
||||||
|
assert_eq!(lines, expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
sherlock!(unrestricted2, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||||
|
wd.remove("sherlock");
|
||||||
|
wd.create(".sherlock", hay::SHERLOCK);
|
||||||
|
cmd.arg("-uu");
|
||||||
|
|
||||||
|
let lines: String = wd.stdout(&mut cmd);
|
||||||
|
let expected = "\
|
||||||
|
.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||||
|
.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||||
|
";
|
||||||
|
assert_eq!(lines, expected);
|
||||||
|
});
|
||||||
|
|
||||||
|
sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| {
|
||||||
|
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
|
||||||
|
cmd.arg("-uuu");
|
||||||
|
|
||||||
|
let lines: String = wd.stdout(&mut cmd);
|
||||||
|
assert_eq!(lines, "file:foo\nfile:foo\n");
|
||||||
|
});
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn binary_nosearch() {
|
fn binary_nosearch() {
|
||||||
let wd = WorkDir::new("binary_nosearch");
|
let wd = WorkDir::new("binary_nosearch");
|
||||||
|
Reference in New Issue
Block a user