mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-27 10:11:58 -07:00
Compare commits
43 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
1b14e245be | ||
|
49003e8488 | ||
|
80c362623a | ||
|
c1c484d1a7 | ||
|
263e2b012f | ||
|
b80a986721 | ||
|
8a91d3132f | ||
|
525d051172 | ||
|
5a9883d27c | ||
|
f462d092e7 | ||
|
fe84928c85 | ||
|
f7eaf67fc3 | ||
|
c1c92e4fee | ||
|
5644bbe43a | ||
|
aeb3a5ba0f | ||
|
24e14a0341 | ||
|
2a2b1506d4 | ||
|
4d6b3c727e | ||
|
c2bf9e3d45 | ||
|
dad73b92eb | ||
|
b0d8ff6f4a | ||
|
0263a401f6 | ||
|
4cb1b9ccc0 | ||
|
6f80e2e126 | ||
|
f9bff90842 | ||
|
5af4ec0056 | ||
|
9e2f10b893 | ||
|
69095cf5c3 | ||
|
7402db7b43 | ||
|
7698b60256 | ||
|
e7fb0fd267 | ||
|
29b59074c7 | ||
|
ee5eb2d659 | ||
|
bf8094344a | ||
|
a0819978aa | ||
|
5b7c17e2fb | ||
|
bf56b3bb8e | ||
|
9299d84d41 | ||
|
2cf1a08969 | ||
|
665b6016e3 | ||
|
33231622f3 | ||
|
919c5c7299 | ||
|
f9bf1e4a22 |
@@ -15,9 +15,6 @@ matrix:
|
||||
- os: linux
|
||||
rust: nightly
|
||||
env: TARGET=x86_64-unknown-linux-musl
|
||||
- os: linux
|
||||
rust: nightly
|
||||
env: TARGET=x86_64-unknown-linux-gnu
|
||||
- os: osx
|
||||
rust: nightly
|
||||
env: TARGET=i686-apple-darwin
|
||||
|
12
Cargo.lock
generated
12
Cargo.lock
generated
@@ -1,13 +1,13 @@
|
||||
[root]
|
||||
name = "ripgrep"
|
||||
version = "0.1.3"
|
||||
version = "0.1.14"
|
||||
dependencies = [
|
||||
"deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"docopt 0.6.83 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fnv 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep 0.1.1",
|
||||
"grep 0.1.2",
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@@ -60,7 +60,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "fnv"
|
||||
version = "1.0.4"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
@@ -80,7 +80,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "grep"
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
dependencies = [
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@@ -236,7 +236,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
"checksum deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1614659040e711785ed8ea24219140654da1729f3ec8a47a9719d041112fe7bf"
|
||||
"checksum docopt 0.6.83 (registry+https://github.com/rust-lang/crates.io-index)" = "fc42c6077823a361410c37d47c2535b73a190cbe10838dc4f400fe87c10c8c3b"
|
||||
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
|
||||
"checksum fnv 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8e8af7b5408ab0c4910cad114c8f9eb454bf75df7afe8964307eeafb68a13a5e"
|
||||
"checksum fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc484842f1e2884faf56f529f960cc12ad8c71ce96cc7abba0a067c98fee344"
|
||||
"checksum fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "bcd414e5a1a979b931bb92f41b7a54106d3f6d2e6c253e9ce943b7cd468251ef"
|
||||
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
|
||||
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "ripgrep"
|
||||
version = "0.1.3" #:version
|
||||
version = "0.1.14" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Line oriented search tool using Rust's regex library. Combines the raw
|
||||
@@ -27,14 +27,14 @@ deque = "0.3"
|
||||
docopt = "0.6"
|
||||
env_logger = "0.3"
|
||||
fnv = "1.0"
|
||||
grep = { version = "0.1.1", path = "grep" }
|
||||
grep = { version = "0.1.2", path = "grep" }
|
||||
lazy_static = "0.2"
|
||||
libc = "0.2"
|
||||
log = "0.3"
|
||||
memchr = "0.1"
|
||||
memmap = "0.2"
|
||||
num_cpus = "1"
|
||||
regex = "0.1.76"
|
||||
regex = "0.1.77"
|
||||
rustc-serialize = "0.3"
|
||||
term = "0.4"
|
||||
walkdir = "0.1"
|
||||
|
263
README-NEW.md
Normal file
263
README-NEW.md
Normal file
@@ -0,0 +1,263 @@
|
||||
ripgrep (rg)
|
||||
------------
|
||||
`ripgrep` is a command line search tool that combines the usability of The
|
||||
Silver Searcher (an `ack` clone) with the raw speed of GNU grep. `ripgrep` has
|
||||
first class support on Windows, Mac and Linux, with binary downloads available
|
||||
for [every release](https://github.com/BurntSushi/ripgrep/releases).
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/ripgrep)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
### Screenshot of search results
|
||||
|
||||
[](http://burntsushi.net/stuff/ripgrep1.png)
|
||||
|
||||
### Quick example comparing tools
|
||||
|
||||
This example searches the entire Linux kernel source tree (after running
|
||||
`make defconfig && make -j8`) for `[A-Z]+_SUSPEND`, where all matches must be
|
||||
words. Timings were collected on a system with an Intel i7-6900K 3.2 GHz.
|
||||
|
||||
Please remember that a single benchmark is never enough! See my
|
||||
[blog post on `ripgrep`](http://blog.burntsushi.net/ripgrep/)
|
||||
for a very detailed comparison with more benchmarks and analysis.
|
||||
|
||||
| Tool | Command | Line count | Time |
|
||||
| ---- | ------- | ---------- | ---- |
|
||||
| ripgrep | `rg -n -w '[A-Z]+_SUSPEND'` | 450 | **0.245s** |
|
||||
| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 450 | 0.753s |
|
||||
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 0.823s |
|
||||
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 2.880s |
|
||||
| [sift](https://github.com/svent/sift) | `sift --git -n -w '[A-Z]+_SUSPEND'` | 450 | 3.656s |
|
||||
| [The Platinum Searcher](https://github.com/monochromegane/the_platinum_searcher) | `pt -w -e '[A-Z]+_SUSPEND'` | 450 | 12.369s |
|
||||
| [ack](http://beyondgrep.com/) | `ack -w '[A-Z]+_SUSPEND'` | 1878 | 16.952s |
|
||||
|
||||
(Yes, `ack` [has](https://github.com/petdance/ack2/issues/445) a
|
||||
[bug](https://github.com/petdance/ack2/issues/14).)
|
||||
|
||||
### Why should I use `ripgrep`?
|
||||
|
||||
* It can replace both The Silver Searcher and GNU grep because it is faster
|
||||
than both. (N.B. It is not, strictly speaking, a "drop-in" replacement for
|
||||
both, but the feature sets are far more similar than different.)
|
||||
* Like The Silver Searcher, `ripgrep` defaults to recursive directory search
|
||||
and won't search files ignored by your `.gitignore` files. It also ignores
|
||||
hidden and binary files by default. `ripgrep` also implements full support
|
||||
for `.gitignore`, where as there are many bugs related to that functionality
|
||||
in The Silver Searcher.
|
||||
* `ripgrep` can search specific types of files. For example, `rg -tpy foo`
|
||||
limits your search to Python files and `rg -Tjs foo` excludes Javascript
|
||||
files from your search. `ripgrep` can be taught about new file types with
|
||||
custom matching rules.
|
||||
* `ripgrep` supports many features found in `grep`, such as showing the context
|
||||
of search results, searching multiple patterns, highlighting matches with
|
||||
color and full Unicode support. Unlike GNU grep, `ripgrep` stays fast while
|
||||
supporting Unicode (which is always on).
|
||||
|
||||
In other words, use `ripgrep` if you like speed, sane defaults, fewer bugs and
|
||||
Unicode.
|
||||
|
||||
### Is it really faster than everything else?
|
||||
|
||||
Yes. A large number of benchmarks with detailed analysis for each is
|
||||
[available on my blog](http://blog.burntsushi.net/ripgrep/).
|
||||
|
||||
Summarizing, `ripgrep` is fast because:
|
||||
|
||||
* It is built on top of
|
||||
[Rust's regex engine](https://github.com/rust-lang-nursery/regex).
|
||||
Rust's regex engine uses finite automata, SIMD and aggressive literal
|
||||
optimizations to make searching very fast.
|
||||
* Rust's regex library maintains performance with full Unicode support by
|
||||
building UTF-8 decoding directly into its deterministic finite automaton
|
||||
engine.
|
||||
* It supports searching with either memory maps or by searching incrementally
|
||||
with an intermediate buffer. The former is better for single files and the
|
||||
latter is better for large directories. `ripgrep` chooses the best searching
|
||||
strategy for you automatically.
|
||||
* Applies your ignore patterns in `.gitignore` files using a
|
||||
[`RegexSet`](https://doc.rust-lang.org/regex/regex/struct.RegexSet.html).
|
||||
That means a single file path can be matched against multiple glob patterns
|
||||
simultaneously.
|
||||
* Uses a Chase-Lev work-stealing queue for quickly distributing work to
|
||||
multiple threads.
|
||||
|
||||
### Installation
|
||||
|
||||
The binary name for `ripgrep` is `rg`.
|
||||
|
||||
[Binaries for `ripgrep` are available for Windows, Mac and
|
||||
Linux.](https://github.com/BurntSushi/ripgrep/releases) Linux binaries are
|
||||
static executables. Windows binaries are available either as built with MinGW
|
||||
(GNU) or with Microsoft Visual C++ (MSVC). When possible, prefer MSVC over GNU,
|
||||
but you'll need to have the
|
||||
[Microsoft Visual C++ Build
|
||||
Tools](http://landinghub.visualstudio.com/visual-cpp-build-tools)
|
||||
installed.
|
||||
|
||||
If you're a **Homebrew** user, then you can install it with a custom formula
|
||||
(N.B. `ripgrep` isn't actually in Homebrew yet. This just installs the binary
|
||||
directly):
|
||||
|
||||
```
|
||||
$ brew install https://raw.githubusercontent.com/BurntSushi/ripgrep/master/pkg/brew/ripgrep.rb
|
||||
```
|
||||
|
||||
If you're an **Archlinux** user, then you can install `ripgrep` from the
|
||||
[`ripgrep` AUR package](https://aur.archlinux.org/packages/ripgrep/), e.g.,
|
||||
|
||||
```
|
||||
$ yaourt -S ripgrep
|
||||
```
|
||||
|
||||
If you're a **Rust programmer**, `ripgrep` can be installed with `cargo`:
|
||||
|
||||
```
|
||||
$ cargo install ripgrep
|
||||
```
|
||||
|
||||
`ripgrep` isn't currently in any other package repositories.
|
||||
[I'd like to change that](https://github.com/BurntSushi/ripgrep/issues/10).
|
||||
|
||||
### Whirlwind tour
|
||||
|
||||
The command line usage of `ripgrep` doesn't differ much from other tools that
|
||||
perform a similar function, so you probably already know how to use `ripgrep`.
|
||||
The full details can be found in `rg --help`, but let's go on a whirlwind tour.
|
||||
|
||||
`ripgrep` detects when its printing to a terminal, and will automatically
|
||||
colorize your output and show line numbers, just like The Silver Searcher.
|
||||
Coloring works on Windows too! Colors can be controlled more granularly with
|
||||
the `--color` flag.
|
||||
|
||||
One last thing before we get started: `ripgrep` assumes UTF-8 *everywhere*. It
|
||||
can still search files that are invalid UTF-8 (like, say, latin-1), but it will
|
||||
simply not work on UTF-16 encoded files or other more exotic encodings.
|
||||
[Support for other encodings may
|
||||
happen.](https://github.com/BurntSushi/ripgrep/issues/1)
|
||||
|
||||
To recursively search the current directory, while respecting all `.gitignore`
|
||||
files, ignore hidden files and directories and skip binary files:
|
||||
|
||||
```
|
||||
$ rg foobar
|
||||
```
|
||||
|
||||
The above command also respects all `.rgignore` files, including in parent
|
||||
directories. `.rgignore` files can be used when `.gitignore` files are
|
||||
insufficient. In all cases, `.rgignore` patterns take precedence over
|
||||
`.gitignore`.
|
||||
|
||||
To ignore all ignore files, use `-u`. To additionally search hidden files
|
||||
and directories, use `-uu`. To additionally search binary files, use `-uuu`.
|
||||
(In other words, "search everything, dammit!") In particular, `rg -uuu` is
|
||||
equivalent to `grep -a -r`.
|
||||
|
||||
```
|
||||
$ rg -uu foobar # equivalent to `grep -r`
|
||||
$ rg -uuu foobar # equivalent to `grep -a -r`
|
||||
```
|
||||
|
||||
(Tip: If your ignore files aren't being adhered to like you expect, run your
|
||||
search with the `--debug` flag.)
|
||||
|
||||
Make the search case insensitive with `-i`, invert the search with `-v` or
|
||||
show the 2 lines before and after every search result with `-C2`.
|
||||
|
||||
Force all matches to be surrounded by word boundaries with `-w`.
|
||||
|
||||
Search and replace (find first and last names and swap them):
|
||||
|
||||
```
|
||||
$ rg '([A-Z][a-z]+)\s+([A-Z][a-z]+)' --replace '$2, $1'
|
||||
```
|
||||
|
||||
Named groups are supported:
|
||||
|
||||
```
|
||||
$ rg '(?P<first>[A-Z][a-z]+)\s+(?P<last>[A-Z][a-z]+)' --replace '$last, $first'
|
||||
```
|
||||
|
||||
Up the ante with full Unicode support, by matching any uppercase Unicode letter
|
||||
followed by any sequence of lowercase Unicode letters (good luck doing this
|
||||
with other search tools!):
|
||||
|
||||
```
|
||||
$ rg '(\p{Lu}\p{Ll}+)\s+(\p{Lu}\p{Ll}+)' --replace '$2, $1'
|
||||
```
|
||||
|
||||
Search only files matching a particular glob:
|
||||
|
||||
```
|
||||
$ rg foo -g 'README.*'
|
||||
```
|
||||
|
||||
<!--*-->
|
||||
|
||||
Or exclude files matching a particular glob:
|
||||
|
||||
```
|
||||
$ rg foo -g '!*.min.js'
|
||||
```
|
||||
|
||||
Search only HTML and CSS files:
|
||||
|
||||
```
|
||||
$ rg -thtml -tcss foobar
|
||||
```
|
||||
|
||||
Search everything except for Javascript files:
|
||||
|
||||
```
|
||||
$ rg -Tjs foobar
|
||||
```
|
||||
|
||||
To see a list of types supported, run `rg --type-list`. To add a new type, use
|
||||
`--type-add`:
|
||||
|
||||
```
|
||||
$ rg --type-add 'foo:*.foo,*.foobar'
|
||||
```
|
||||
|
||||
The type `foo` will now match any file ending with the `.foo` or `.foobar`
|
||||
extensions.
|
||||
|
||||
### Regex syntax
|
||||
|
||||
The syntax supported is
|
||||
[documented as part of Rust's regex library](https://doc.rust-lang.org/regex/regex/index.html#syntax).
|
||||
|
||||
### Building
|
||||
|
||||
`ripgrep` is written in Rust, so you'll need to grab a
|
||||
[Rust installation](https://www.rust-lang.org/) in order to compile it.
|
||||
`ripgrep` compiles with Rust 1.9 (stable) or newer. Building is easy:
|
||||
|
||||
```
|
||||
$ git clone git://github.com/BurntSushi/ripgrep
|
||||
$ cd ripgrep
|
||||
$ cargo build --release
|
||||
$ ./target/release/rg --version
|
||||
0.1.3
|
||||
```
|
||||
|
||||
If you have a Rust nightly compiler, then you can enable optional SIMD
|
||||
acceleration like so:
|
||||
|
||||
```
|
||||
RUSTFLAGS="-C target-cpu=native" cargo build --release --features simd-accel
|
||||
```
|
||||
|
||||
### Running tests
|
||||
|
||||
`ripgrep` is relatively well tested, including both unit tests and integration
|
||||
tests. To run the full test suite, use:
|
||||
|
||||
```
|
||||
$ cargo test
|
||||
```
|
||||
|
||||
from the repository root.
|
23
appveyor.yml
23
appveyor.yml
@@ -2,27 +2,22 @@ environment:
|
||||
global:
|
||||
PROJECT_NAME: ripgrep
|
||||
matrix:
|
||||
# Nightly channel
|
||||
- TARGET: i686-pc-windows-gnu
|
||||
CHANNEL: nightly
|
||||
CHANNEL: stable
|
||||
- TARGET: i686-pc-windows-msvc
|
||||
CHANNEL: nightly
|
||||
CHANNEL: stable
|
||||
- TARGET: x86_64-pc-windows-gnu
|
||||
CHANNEL: nightly
|
||||
CHANNEL: stable
|
||||
- TARGET: x86_64-pc-windows-msvc
|
||||
CHANNEL: nightly
|
||||
CHANNEL: stable
|
||||
|
||||
# Install Rust and Cargo
|
||||
# (Based on from https://github.com/rust-lang/libc/blob/master/appveyor.yml)
|
||||
install:
|
||||
- ps: Start-FileDownload "https://static.rust-lang.org/dist/channel-rust-stable"
|
||||
- ps: $env:RUST_VERSION = Get-Content channel-rust-stable | select -first 1 | %{$_.split('-')[1]}
|
||||
- if NOT "%CHANNEL%" == "stable" set RUST_VERSION=%CHANNEL%
|
||||
- ps: Start-FileDownload "https://static.rust-lang.org/dist/rust-${env:RUST_VERSION}-${env:TARGET}.exe"
|
||||
- rust-%RUST_VERSION%-%TARGET%.exe /VERYSILENT /NORESTART /DIR="C:\Program Files (x86)\Rust"
|
||||
- SET PATH=%PATH%;C:\Program Files (x86)\Rust\bin
|
||||
- if "%TARGET%" == "i686-pc-windows-gnu" set PATH=%PATH%;C:\msys64\mingw32\bin
|
||||
- if "%TARGET%" == "x86_64-pc-windows-gnu" set PATH=%PATH%;C:\msys64\mingw64\bin
|
||||
- curl -sSf -o rustup-init.exe https://win.rustup.rs/
|
||||
- rustup-init.exe -y --default-host %TARGET%
|
||||
- set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
|
||||
- if defined MSYS2_BITS set PATH=%PATH%;C:\msys64\mingw%MSYS2_BITS%\bin
|
||||
- rustc -V
|
||||
- cargo -V
|
||||
|
||||
@@ -57,7 +52,7 @@ deploy:
|
||||
# channel to use to produce the release artifacts
|
||||
# NOTE make sure you only release *once* per target
|
||||
# TODO you may want to pick a different channel
|
||||
CHANNEL: nightly
|
||||
CHANNEL: stable
|
||||
appveyor_repo_tag: true
|
||||
|
||||
branches:
|
||||
|
@@ -132,6 +132,7 @@ def bench_linux_literal_casei(suite_dir):
|
||||
mkcmd('rg (ignore)', ['rg', '-n', '-i', pat]),
|
||||
mkcmd('rg (ignore) (mmap)', ['rg', '-n', '-i', '--mmap', pat]),
|
||||
mkcmd('ag (ignore) (mmap)', ['ag', '-i', pat]),
|
||||
mkcmd('pt (ignore)', ['pt', '-i', pat]),
|
||||
mkcmd('sift (ignore)', SIFT + ['-n', '-i', '--git', pat]),
|
||||
# It'd technically be more appropriate to set LC_ALL=en_US.UTF-8 here,
|
||||
# since that is certainly what ripgrep is doing, but this is for an
|
||||
@@ -165,6 +166,7 @@ def bench_linux_re_literal_suffix(suite_dir):
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg (ignore)', ['rg', '-n', pat]),
|
||||
mkcmd('ag (ignore)', ['ag', '-s', pat]),
|
||||
mkcmd('pt (ignore)', ['pt', '-e', pat]),
|
||||
mkcmd('sift (ignore)', SIFT + ['-n', '--git', pat]),
|
||||
mkcmd(
|
||||
'git grep (ignore)',
|
||||
@@ -194,6 +196,7 @@ def bench_linux_word(suite_dir):
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg (ignore)', ['rg', '-n', '-w', pat]),
|
||||
mkcmd('ag (ignore)', ['ag', '-s', '-w', pat]),
|
||||
mkcmd('pt (ignore)', ['pt', '-w', pat]),
|
||||
mkcmd('sift (ignore)', SIFT + ['-n', '-w', '--git', pat]),
|
||||
mkcmd(
|
||||
'git grep (ignore)',
|
||||
@@ -224,6 +227,7 @@ def bench_linux_unicode_greek(suite_dir):
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('pt', ['pt', '-e', pat]),
|
||||
mkcmd('sift', SIFT + ['-n', '--git', pat]),
|
||||
])
|
||||
|
||||
@@ -244,6 +248,7 @@ def bench_linux_unicode_greek_casei(suite_dir):
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', '-i', pat]),
|
||||
mkcmd('pt', ['pt', '-i', '-e', pat]),
|
||||
mkcmd('sift', SIFT + ['-n', '-i', '--git', pat]),
|
||||
])
|
||||
|
||||
@@ -268,7 +273,8 @@ def bench_linux_unicode_word(suite_dir):
|
||||
mkcmd('rg (ignore)', ['rg', '-n', pat]),
|
||||
mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
|
||||
mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
|
||||
mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', pat]),
|
||||
mkcmd('pt (ignore) (ASCII)', ['pt', '-e', pat]),
|
||||
mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', '--git', pat]),
|
||||
mkcmd(
|
||||
'git grep (ignore)',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
@@ -308,7 +314,8 @@ def bench_linux_no_literal(suite_dir):
|
||||
mkcmd('rg (ignore)', ['rg', '-n', pat]),
|
||||
mkcmd('rg (ignore) (ASCII)', ['rg', '-n', '(?-u)' + pat]),
|
||||
mkcmd('ag (ignore) (ASCII)', ['ag', '-s', pat]),
|
||||
mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', pat]),
|
||||
mkcmd('pt (ignore) (ASCII)', ['pt', '-e', pat]),
|
||||
mkcmd('sift (ignore) (ASCII)', SIFT + ['-n', '--git', pat]),
|
||||
mkcmd(
|
||||
'git grep (ignore)',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
@@ -390,6 +397,7 @@ def bench_subtitles_en_literal(suite_dir):
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', pat, en]),
|
||||
Command('rg (no mmap)', ['rg', '--no-mmap', pat, en]),
|
||||
Command('pt', ['pt', '-N', pat, en]),
|
||||
Command('sift', ['sift', pat, en]),
|
||||
Command('grep', ['grep', '-a', pat, en], env=GREP_ASCII),
|
||||
@@ -551,6 +559,7 @@ def bench_subtitles_ru_literal(suite_dir):
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', pat, ru]),
|
||||
Command('rg (no mmap)', ['rg', '--no-mmap', pat, ru]),
|
||||
Command('pt', ['pt', '-N', pat, ru]),
|
||||
Command('sift', ['sift', pat, ru]),
|
||||
Command('grep', ['grep', '-a', pat, ru], env=GREP_ASCII),
|
||||
@@ -1125,7 +1134,8 @@ def download(suite_dir, choices):
|
||||
|
||||
|
||||
def collect_benchmarks(suite_dir, filter_pat=None,
|
||||
allow_missing_commands=False):
|
||||
allow_missing_commands=False,
|
||||
warmup_iter=1, bench_iter=3):
|
||||
'''
|
||||
Return an iterable of all runnable benchmarks.
|
||||
|
||||
@@ -1148,6 +1158,8 @@ def collect_benchmarks(suite_dir, filter_pat=None,
|
||||
try:
|
||||
benchmark = globals()[fun](suite_dir)
|
||||
benchmark.name = name
|
||||
benchmark.warmup_count = warmup_iter
|
||||
benchmark.count = bench_iter
|
||||
benchmark.allow_missing_commands = allow_missing_commands
|
||||
benchmark.raise_if_missing()
|
||||
except MissingDependencies as e:
|
||||
@@ -1157,7 +1169,6 @@ def collect_benchmarks(suite_dir, filter_pat=None,
|
||||
name,
|
||||
' '.join(['--download %s' % n for n in e.missing_names]),
|
||||
))
|
||||
continue
|
||||
except MissingCommands as e:
|
||||
fmt = 'missing commands: %s, skipping benchmark %s ' \
|
||||
'(run with --allow-missing to run incomplete benchmarks)'
|
||||
@@ -1194,6 +1205,14 @@ def main():
|
||||
'--raw', metavar='PATH',
|
||||
help='Dump raw data (all samples collected) in CSV format to the '
|
||||
'file path provided.')
|
||||
p.add_argument(
|
||||
'--warmup-iter', metavar='INTEGER', type=int, default=1,
|
||||
help='The number of iterations to run each command before '
|
||||
'recording measurements.')
|
||||
p.add_argument(
|
||||
'--bench-iter', metavar='INTEGER', type=int, default=3,
|
||||
help='The number of iterations to run each command while '
|
||||
'recording measurements.')
|
||||
p.add_argument(
|
||||
'bench', metavar='PAT', nargs='?',
|
||||
help='A regex pattern that will only run benchmarks that match.')
|
||||
@@ -1202,7 +1221,8 @@ def main():
|
||||
if args.list:
|
||||
benchmarks = collect_benchmarks(
|
||||
args.dir, filter_pat=args.bench,
|
||||
allow_missing_commands=args.allow_missing)
|
||||
allow_missing_commands=args.allow_missing,
|
||||
warmup_iter=args.warmup_iter, bench_iter=args.bench_iter)
|
||||
for b in benchmarks:
|
||||
print(b.name)
|
||||
sys.exit(0)
|
||||
@@ -1227,7 +1247,8 @@ def main():
|
||||
|
||||
benchmarks = collect_benchmarks(
|
||||
args.dir, filter_pat=args.bench,
|
||||
allow_missing_commands=args.allow_missing)
|
||||
allow_missing_commands=args.allow_missing,
|
||||
warmup_iter=args.warmup_iter, bench_iter=args.bench_iter)
|
||||
for i, b in enumerate(benchmarks):
|
||||
result = b.run()
|
||||
fastest_cmd = result.fastest_cmd()
|
||||
|
93
benchsuite/runs/2016-09-20-ubuntu1604-ec2/README.SETUP
Normal file
93
benchsuite/runs/2016-09-20-ubuntu1604-ec2/README.SETUP
Normal file
@@ -0,0 +1,93 @@
|
||||
Ubuntu 16.04 HVM AMI
|
||||
c3.2xlarge, Xeon E5-2680, 2.8 GHz, 8 CPUs, 16 GB memory, 80 GB SSD
|
||||
|
||||
# Generic system setup
|
||||
|
||||
mkfs.ext4 /dev/xvdb
|
||||
sudo mount /dev/xvdb /mnt
|
||||
sudo chown ubuntu /mnt
|
||||
sudo apt-get update
|
||||
sudo apt-get install \ # for building Linux kernel
|
||||
make gcc bc
|
||||
sudo apt-get install \ # for the silver searcher
|
||||
automake pkg-config zlib1g-dev liblzma-dev libpcre3 libpcre3-dev
|
||||
sudo apt-get install \ # for Universal Code Grep
|
||||
libtool libpcre2-8-0 libpcre2-dev
|
||||
sudo apt-get install \ # for sift and the platinum searcher
|
||||
go
|
||||
|
||||
# Get benchmark corpora
|
||||
|
||||
cd /mnt
|
||||
mkdir /mnt/bench
|
||||
git clone git://github.com/BurntSushi/ripgrep
|
||||
cd ripgrep/benchsuite
|
||||
./benchsuite --dir /mnt/bench/ --download all # takes around 15 minutes
|
||||
|
||||
# Install search tools
|
||||
mkdir /mnt/bin/
|
||||
|
||||
## ripgrep
|
||||
|
||||
cd /mnt
|
||||
mkdir ripgrep-bin
|
||||
cd ripgrep-bin
|
||||
curl -LO 'https://github.com/BurntSushi/ripgrep/releases/download/0.1.2/ripgrep-0.1.2-x86_64-unknown-linux-musl.tar.gz'
|
||||
cp ripgrep-0.1.2-x86_64-unknown-linux-musl/rg /mnt/bin/
|
||||
|
||||
## The Silver Searcher
|
||||
|
||||
cd /mnt
|
||||
git clone git://github.com/ggreer/the_silver_searcher
|
||||
cd the_silver_searcher
|
||||
git checkout cda635
|
||||
./build.sh
|
||||
cp ag /mnt/bin/
|
||||
|
||||
## Universal Code Grep
|
||||
|
||||
cd /mnt
|
||||
git clone git://github.com/gvansickle/ucg
|
||||
cd ucg
|
||||
git checkout 487bfb
|
||||
autoreconf -i
|
||||
./configure
|
||||
make
|
||||
cp ucg /mnt/bin/
|
||||
|
||||
## The Platinum Searcher
|
||||
|
||||
export GOPATH=/mnt/go
|
||||
go get github.com/monochromegane/the_platinum_searcher
|
||||
cd /mnt/go/src/github.com/monochromegane/the_platinum_searcher
|
||||
git checkout 509368
|
||||
go install github.com/monochromegane/the_platinum_searcher/cmd/...
|
||||
cp /mnt/go/bin/pt /mnt/bin/
|
||||
|
||||
## Sift
|
||||
|
||||
export GOPATH=/mnt/go
|
||||
go get github.com/svent/sift
|
||||
cd /mnt/go/src/github.com/svent/sift
|
||||
git checkout 2d175c
|
||||
go install
|
||||
cp /mnt/go/bin/sift /mnt/bin/
|
||||
|
||||
## 'git grep' and GNU grep
|
||||
|
||||
They are part of the standard Ubuntu install, and are pretty recent (as of
|
||||
September 2016).
|
||||
|
||||
$ git --version
|
||||
git version 2.7.4
|
||||
$ grep --version
|
||||
grep (GNU grep) 2.25
|
||||
|
||||
|
||||
# Running benchmarks
|
||||
|
||||
export PATH="/mnt/bin:$PATH"
|
||||
cd /mnt/ripgrep/benchsuite
|
||||
./benchsuite \
|
||||
--dir /mnt/bench/ --raw /mnt/bench/raw.csv --warmup-iter 3 --bench-iter 10
|
||||
# The above took around 120 minutes to run to completion.
|
1611
benchsuite/runs/2016-09-20-ubuntu1604-ec2/raw.csv
Normal file
1611
benchsuite/runs/2016-09-20-ubuntu1604-ec2/raw.csv
Normal file
File diff suppressed because it is too large
Load Diff
235
benchsuite/runs/2016-09-20-ubuntu1604-ec2/summary
Normal file
235
benchsuite/runs/2016-09-20-ubuntu1604-ec2/summary
Normal file
@@ -0,0 +1,235 @@
|
||||
linux_alternates (pattern: ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT)
|
||||
-------------------------------------------------------------------------
|
||||
rg (ignore) 0.351 +/- 0.074 (lines: 68)
|
||||
ag (ignore) 1.747 +/- 0.005 (lines: 68)
|
||||
git grep (ignore) 0.501 +/- 0.003 (lines: 68)
|
||||
rg (whitelist)* 0.216 +/- 0.031 (lines: 68)
|
||||
ucg (whitelist) 0.214 +/- 0.008 (lines: 68)*
|
||||
|
||||
linux_alternates_casei (pattern: ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT)
|
||||
-------------------------------------------------------------------------------
|
||||
rg (ignore) 0.391 +/- 0.078 (lines: 160)
|
||||
ag (ignore) 1.968 +/- 0.009 (lines: 160)
|
||||
git grep (ignore) 2.018 +/- 0.006 (lines: 160)
|
||||
rg (whitelist)* 0.222 +/- 0.001 (lines: 160)*
|
||||
ucg (whitelist) 0.522 +/- 0.002 (lines: 160)
|
||||
|
||||
linux_literal (pattern: PM_RESUME)
|
||||
----------------------------------
|
||||
rg (ignore) 0.334 +/- 0.053 (lines: 16)
|
||||
rg (ignore) (mmap) 1.611 +/- 0.009 (lines: 16)
|
||||
ag (ignore) (mmap) 1.588 +/- 0.011 (lines: 16)
|
||||
pt (ignore) 0.456 +/- 0.025 (lines: 16)
|
||||
sift (ignore) 0.630 +/- 0.004 (lines: 16)
|
||||
git grep (ignore) 0.345 +/- 0.007 (lines: 16)
|
||||
rg (whitelist)* 0.228 +/- 0.042 (lines: 16)
|
||||
ucg (whitelist) 0.218 +/- 0.007 (lines: 16)*
|
||||
|
||||
linux_literal_casei (pattern: PM_RESUME)
|
||||
----------------------------------------
|
||||
rg (ignore) 0.345 +/- 0.073 (lines: 370)
|
||||
rg (ignore) (mmap) 1.612 +/- 0.011 (lines: 370)
|
||||
ag (ignore) (mmap) 1.609 +/- 0.015 (lines: 370)
|
||||
pt (ignore) 17.204 +/- 0.126 (lines: 370)
|
||||
sift (ignore) 0.805 +/- 0.005 (lines: 370)
|
||||
git grep (ignore) 0.343 +/- 0.007 (lines: 370)
|
||||
rg (whitelist)* 0.222 +/- 0.021 (lines: 370)
|
||||
ucg (whitelist) 0.217 +/- 0.006 (lines: 370)*
|
||||
|
||||
linux_literal_default (pattern: PM_RESUME)
|
||||
------------------------------------------
|
||||
rg 0.349 +/- 0.104 (lines: 16)
|
||||
ag 1.589 +/- 0.009 (lines: 16)
|
||||
ucg* 0.218 +/- 0.007 (lines: 16)*
|
||||
pt 0.462 +/- 0.012 (lines: 16)
|
||||
sift 0.352 +/- 0.018 (lines: 16)
|
||||
git grep 0.342 +/- 0.005 (lines: 16)
|
||||
|
||||
linux_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5})
|
||||
-----------------------------------------------------------------
|
||||
rg (ignore) 0.577 +/- 0.003 (lines: 490)
|
||||
rg (ignore) (ASCII) 0.416 +/- 0.025 (lines: 490)
|
||||
ag (ignore) (ASCII) 2.339 +/- 0.010 (lines: 766)
|
||||
pt (ignore) (ASCII) 22.066 +/- 0.057 (lines: 490)
|
||||
sift (ignore) (ASCII) 25.563 +/- 0.108 (lines: 490)
|
||||
git grep (ignore) 26.382 +/- 0.044 (lines: 490)
|
||||
git grep (ignore) (ASCII) 4.153 +/- 0.010 (lines: 490)
|
||||
rg (whitelist) 0.503 +/- 0.011 (lines: 419)
|
||||
rg (whitelist) (ASCII)* 0.343 +/- 0.038 (lines: 419)*
|
||||
ucg (whitelist) (ASCII) 1.130 +/- 0.003 (lines: 416)
|
||||
|
||||
linux_re_literal_suffix (pattern: [A-Z]+_RESUME)
|
||||
------------------------------------------------
|
||||
rg (ignore) 0.318 +/- 0.034 (lines: 1652)
|
||||
ag (ignore) 1.899 +/- 0.008 (lines: 1652)
|
||||
pt (ignore) 13.713 +/- 0.241 (lines: 1652)
|
||||
sift (ignore) 10.172 +/- 0.186 (lines: 1652)
|
||||
git grep (ignore) 1.108 +/- 0.004 (lines: 1652)
|
||||
rg (whitelist)* 0.221 +/- 0.022 (lines: 1630)*
|
||||
ucg (whitelist) 0.301 +/- 0.001 (lines: 1630)
|
||||
|
||||
linux_unicode_greek (pattern: \p{Greek})
|
||||
----------------------------------------
|
||||
rg* 0.414 +/- 0.021 (lines: 23)*
|
||||
pt 12.745 +/- 0.166 (lines: 23)
|
||||
sift 7.767 +/- 0.264 (lines: 23)
|
||||
|
||||
linux_unicode_greek_casei (pattern: \p{Greek})
|
||||
----------------------------------------------
|
||||
rg 0.425 +/- 0.027 (lines: 103)
|
||||
pt 12.612 +/- 0.217 (lines: 23)
|
||||
sift* 0.002 +/- 0.000 (lines: 0)*
|
||||
|
||||
linux_unicode_word (pattern: \wAh)
|
||||
----------------------------------
|
||||
rg (ignore) 0.355 +/- 0.073 (lines: 186)
|
||||
rg (ignore) (ASCII) 0.329 +/- 0.060 (lines: 174)
|
||||
ag (ignore) (ASCII) 1.774 +/- 0.011 (lines: 174)
|
||||
pt (ignore) (ASCII) 14.180 +/- 0.180 (lines: 174)
|
||||
sift (ignore) (ASCII) 11.087 +/- 0.108 (lines: 174)
|
||||
git grep (ignore) 13.045 +/- 0.008 (lines: 186)
|
||||
git grep (ignore) (ASCII) 2.991 +/- 0.004 (lines: 174)
|
||||
rg (whitelist) 0.235 +/- 0.031 (lines: 180)
|
||||
rg (whitelist) (ASCII)* 0.225 +/- 0.023 (lines: 168)*
|
||||
ucg (ASCII) 0.229 +/- 0.007 (lines: 168)
|
||||
|
||||
linux_word (pattern: PM_RESUME)
|
||||
-------------------------------
|
||||
rg (ignore) 0.362 +/- 0.080 (lines: 6)
|
||||
ag (ignore) 1.603 +/- 0.009 (lines: 6)
|
||||
pt (ignore) 14.417 +/- 0.144 (lines: 6)
|
||||
sift (ignore) 7.840 +/- 0.123 (lines: 6)
|
||||
git grep (ignore) 0.341 +/- 0.005 (lines: 6)
|
||||
rg (whitelist)* 0.220 +/- 0.026 (lines: 6)*
|
||||
ucg (whitelist) 0.221 +/- 0.007 (lines: 6)
|
||||
|
||||
subtitles_en_alternate (pattern: Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty)
|
||||
---------------------------------------------------------------------------------------------------------------
|
||||
rg (lines) 0.619 +/- 0.001 (lines: 848)
|
||||
ag (lines) 3.757 +/- 0.001 (lines: 848)
|
||||
ucg (lines) 1.479 +/- 0.002 (lines: 848)
|
||||
grep (lines) 3.412 +/- 0.004 (lines: 848)
|
||||
rg* 0.294 +/- 0.001 (lines: 848)*
|
||||
grep 2.955 +/- 0.003 (lines: 848)
|
||||
|
||||
subtitles_en_alternate_casei (pattern: Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty)
|
||||
---------------------------------------------------------------------------------------------------------------------
|
||||
ag (ASCII) 5.170 +/- 0.004 (lines: 862)
|
||||
ucg (ASCII) 3.453 +/- 0.005 (lines: 862)
|
||||
grep (ASCII) 4.537 +/- 0.025 (lines: 862)
|
||||
rg* 2.724 +/- 0.002 (lines: 862)*
|
||||
grep 5.125 +/- 0.006 (lines: 862)
|
||||
|
||||
subtitles_en_literal (pattern: Sherlock Holmes)
|
||||
-----------------------------------------------
|
||||
rg* 0.268 +/- 0.000 (lines: 629)*
|
||||
rg (no mmap) 0.336 +/- 0.001 (lines: 629)
|
||||
pt 3.433 +/- 0.002 (lines: 629)
|
||||
sift 0.326 +/- 0.002 (lines: 629)
|
||||
grep 0.516 +/- 0.001 (lines: 629)
|
||||
rg (lines) 0.595 +/- 0.001 (lines: 629)
|
||||
ag (lines) 2.730 +/- 0.003 (lines: 629)
|
||||
ucg (lines) 0.745 +/- 0.001 (lines: 629)
|
||||
pt (lines) 3.434 +/- 0.005 (lines: 629)
|
||||
sift (lines) 0.756 +/- 0.002 (lines: 629)
|
||||
grep (lines) 0.969 +/- 0.001 (lines: 629)
|
||||
|
||||
subtitles_en_literal_casei (pattern: Sherlock Holmes)
|
||||
-----------------------------------------------------
|
||||
rg* 0.366 +/- 0.001 (lines: 642)*
|
||||
grep 4.084 +/- 0.005 (lines: 642)
|
||||
grep (ASCII) 0.614 +/- 0.001 (lines: 642)
|
||||
rg (lines) 0.696 +/- 0.002 (lines: 642)
|
||||
ag (lines) (ASCII) 2.775 +/- 0.004 (lines: 642)
|
||||
ucg (lines) (ASCII) 0.841 +/- 0.002 (lines: 642)
|
||||
|
||||
subtitles_en_literal_word (pattern: Sherlock Holmes)
|
||||
----------------------------------------------------
|
||||
rg (ASCII) 0.596 +/- 0.001 (lines: 629)
|
||||
ag (ASCII) 2.729 +/- 0.001 (lines: 629)
|
||||
ucg (ASCII) 0.810 +/- 0.002 (lines: 629)
|
||||
grep (ASCII) 0.970 +/- 0.000 (lines: 629)
|
||||
rg* 0.596 +/- 0.001 (lines: 629)*
|
||||
grep 0.972 +/- 0.003 (lines: 629)
|
||||
|
||||
subtitles_en_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5})
|
||||
----------------------------------------------------------------------------------------
|
||||
rg 2.777 +/- 0.003 (lines: 13)
|
||||
rg (ASCII)* 2.541 +/- 0.005 (lines: 13)*
|
||||
ag (ASCII) 10.076 +/- 0.005 (lines: 48)
|
||||
ucg (ASCII) 7.771 +/- 0.004 (lines: 13)
|
||||
grep (ASCII) 4.411 +/- 0.004 (lines: 13)
|
||||
|
||||
subtitles_en_surrounding_words (pattern: \w+\s+Holmes\s+\w+)
|
||||
------------------------------------------------------------
|
||||
rg 0.605 +/- 0.000 (lines: 317)
|
||||
grep 1.286 +/- 0.002 (lines: 317)
|
||||
rg (ASCII)* 0.602 +/- 0.000 (lines: 317)*
|
||||
ag (ASCII) 11.663 +/- 0.008 (lines: 323)
|
||||
ucg (ASCII) 4.690 +/- 0.002 (lines: 317)
|
||||
grep (ASCII) 1.276 +/- 0.002 (lines: 317)
|
||||
|
||||
subtitles_ru_alternate (pattern: Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти)
|
||||
-----------------------------------------------------------------------------------------------------------
|
||||
rg (lines) 1.902 +/- 0.002 (lines: 691)
|
||||
ag (lines) 5.892 +/- 0.003 (lines: 691)
|
||||
ucg (lines) 2.864 +/- 0.006 (lines: 691)
|
||||
grep (lines) 8.511 +/- 0.005 (lines: 691)
|
||||
rg* 1.300 +/- 0.002 (lines: 691)*
|
||||
grep 7.994 +/- 0.017 (lines: 691)
|
||||
|
||||
subtitles_ru_alternate_casei (pattern: Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти)
|
||||
-----------------------------------------------------------------------------------------------------------------
|
||||
ag (ASCII) 5.891 +/- 0.001 (lines: 691)
|
||||
ucg (ASCII)* 2.868 +/- 0.005 (lines: 691)*
|
||||
grep (ASCII) 8.572 +/- 0.009 (lines: 691)
|
||||
rg 4.834 +/- 0.004 (lines: 735)
|
||||
grep 8.729 +/- 0.004 (lines: 735)
|
||||
|
||||
subtitles_ru_literal (pattern: Шерлок Холмс)
|
||||
--------------------------------------------
|
||||
rg* 0.325 +/- 0.001 (lines: 583)*
|
||||
rg (no mmap) 0.452 +/- 0.002 (lines: 583)
|
||||
pt 12.917 +/- 0.009 (lines: 583)
|
||||
sift 16.418 +/- 0.008 (lines: 583)
|
||||
grep 0.780 +/- 0.001 (lines: 583)
|
||||
rg (lines) 0.926 +/- 0.001 (lines: 583)
|
||||
ag (lines) 4.481 +/- 0.003 (lines: 583)
|
||||
ucg (lines) 1.889 +/- 0.004 (lines: 583)
|
||||
pt (lines) 12.935 +/- 0.011 (lines: 583)
|
||||
sift (lines) 17.177 +/- 0.010 (lines: 583)
|
||||
grep (lines) 1.300 +/- 0.003 (lines: 583)
|
||||
|
||||
subtitles_ru_literal_casei (pattern: Шерлок Холмс)
|
||||
--------------------------------------------------
|
||||
rg 1.131 +/- 0.001 (lines: 604)
|
||||
grep 8.187 +/- 0.006 (lines: 604)
|
||||
grep (ASCII) 0.785 +/- 0.001 (lines: 583)
|
||||
rg (lines) 1.733 +/- 0.002 (lines: 604)
|
||||
ag (lines) (ASCII)* 0.729 +/- 0.001 (lines: 0)*
|
||||
ucg (lines) (ASCII) 1.896 +/- 0.005 (lines: 583)
|
||||
|
||||
subtitles_ru_literal_word (pattern: Шерлок Холмс)
|
||||
-------------------------------------------------
|
||||
rg (ASCII)* 0.325 +/- 0.000 (lines: 0)*
|
||||
ag (ASCII) 0.753 +/- 0.001 (lines: 0)
|
||||
ucg (ASCII) 1.891 +/- 0.004 (lines: 583)
|
||||
grep (ASCII) 1.303 +/- 0.004 (lines: 583)
|
||||
rg 0.929 +/- 0.001 (lines: 579)
|
||||
grep 1.304 +/- 0.003 (lines: 579)
|
||||
|
||||
subtitles_ru_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5})
|
||||
----------------------------------------------------------------------------------------
|
||||
rg 4.905 +/- 0.003 (lines: 41)
|
||||
rg (ASCII) 3.973 +/- 0.002 (lines: 0)
|
||||
ag (ASCII)* 2.395 +/- 0.004 (lines: 0)*
|
||||
ucg (ASCII) 3.006 +/- 0.005 (lines: 0)
|
||||
grep (ASCII) 2.483 +/- 0.005 (lines: 0)
|
||||
|
||||
subtitles_ru_surrounding_words (pattern: \w+\s+Холмс\s+\w+)
|
||||
-----------------------------------------------------------
|
||||
rg* 0.957 +/- 0.001 (lines: 278)*
|
||||
grep 1.660 +/- 0.002 (lines: 278)
|
||||
ag (ASCII) 2.411 +/- 0.001 (lines: 0)
|
||||
ucg (ASCII) 2.980 +/- 0.002 (lines: 0)
|
||||
grep (ASCII) 1.596 +/- 0.003 (lines: 0)
|
25
ci/sha256.sh
Normal file
25
ci/sha256.sh
Normal file
@@ -0,0 +1,25 @@
|
||||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
|
||||
if [ $# != 1 ]; then
|
||||
echo "Usage: $(basename $0) version" >&2
|
||||
exit 1
|
||||
fi
|
||||
version="$1"
|
||||
|
||||
# Linux and Darwin builds.
|
||||
for arch in i686 x86_64; do
|
||||
for target in apple-darwin unknown-linux-musl; do
|
||||
url="https://github.com/BurntSushi/ripgrep/releases/download/$version/ripgrep-$version-$arch-$target.tar.gz"
|
||||
sha=$(curl -sfSL "$url" | sha256sum)
|
||||
echo "$version-$arch-$target $sha"
|
||||
done
|
||||
done
|
||||
|
||||
# Source.
|
||||
for ext in zip tar.gz; do
|
||||
url="https://github.com/BurntSushi/ripgrep/archive/$version.$ext"
|
||||
sha=$(curl -sfSL "$url" | sha256sum)
|
||||
echo "source.$ext $sha"
|
||||
done
|
4
doc/convert-to-man
Executable file
4
doc/convert-to-man
Executable file
@@ -0,0 +1,4 @@
|
||||
#!/bin/sh
|
||||
|
||||
pandoc -s -t man rg.1.md -o rg.1
|
||||
sed -i 's/\.TH.*/.TH "rg" "1"/g' rg.1
|
256
doc/rg.1
Normal file
256
doc/rg.1
Normal file
@@ -0,0 +1,256 @@
|
||||
.\" Automatically generated by Pandoc 1.17.2
|
||||
.\"
|
||||
.TH "rg" "1"
|
||||
.hy
|
||||
.SH NAME
|
||||
.PP
|
||||
rg \- recursively search current directory for lines matching a pattern
|
||||
.SH SYNOPSIS
|
||||
.PP
|
||||
rg [\f[I]options\f[]] \-e PATTERN ...
|
||||
[\f[I]<\f[]path\f[I]> ...\f[]]
|
||||
.PP
|
||||
rg [\f[I]options\f[]] <\f[I]pattern\f[]> [\f[I]<\f[]path\f[I]> ...\f[]]
|
||||
.PP
|
||||
rg [\f[I]options\f[]] \-\-files [\f[I]<\f[]path\f[I]> ...\f[]]
|
||||
.PP
|
||||
rg [\f[I]options\f[]] \-\-type\-list
|
||||
.PP
|
||||
rg \-\-help
|
||||
.PP
|
||||
rg \-\-version
|
||||
.SH DESCRIPTION
|
||||
.PP
|
||||
rg (ripgrep) combines the usability of The Silver Searcher (an ack
|
||||
clone) with the raw speed of grep.
|
||||
.SH COMMON OPTIONS
|
||||
.TP
|
||||
.B \-a, \-\-text
|
||||
Search binary files as if they were text.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-c, \-\-count
|
||||
Only show count of line matches for each file.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-color \f[I]WHEN\f[]
|
||||
Whether to use coloring in match.
|
||||
Valid values are never, always or auto.
|
||||
[default: auto]
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-e, \-\-regexp \f[I]PATTERN\f[] ...
|
||||
Use PATTERN to search.
|
||||
This option can be provided multiple times, where all patterns given are
|
||||
searched.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-F, \-\-fixed\-strings
|
||||
Treat the pattern as a literal string instead of a regular expression.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-g, \-\-glob \f[I]GLOB\f[] ...
|
||||
Include or exclude files for searching that match the given glob.
|
||||
This always overrides any other ignore logic.
|
||||
Multiple glob flags may be used.
|
||||
Globbing rules match .gitignore globs.
|
||||
Precede a glob with a \[aq]!\[aq] to exclude it.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-h, \-\-help
|
||||
Show this usage message.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-i, \-\-ignore\-case
|
||||
Case insensitive search.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-n, \-\-line\-number
|
||||
Show line numbers (1\-based).
|
||||
This is enabled by default at a tty.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-N, \-\-no\-line\-number
|
||||
Suppress line numbers.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-q, \-\-quiet
|
||||
Do not print anything to stdout.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-r, \-\-replace \f[I]ARG\f[]
|
||||
Replace every match with the string given.
|
||||
Capture group indices (e.g., $5) and names (e.g., $foo) are supported.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-t, \-\-type \f[I]TYPE\f[] ...
|
||||
Only search files matching TYPE.
|
||||
Multiple type flags may be provided.
|
||||
Use the \-\-type\-list flag to list all available types.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-T, \-\-type\-not \f[I]TYPE\f[] ...
|
||||
Do not search files matching TYPE.
|
||||
Multiple not\-type flags may be provided.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-u, \-\-unrestricted ...
|
||||
Reduce the level of \[aq]smart\[aq] searching.
|
||||
A single \-u doesn\[aq]t respect .gitignore (etc.) files.
|
||||
Two \-u flags will search hidden files and directories.
|
||||
Three \-u flags will search binary files.
|
||||
\-uu is equivalent to grep \-r, and \-uuu is equivalent to grep \-a \-r.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-v, \-\-invert\-match
|
||||
Invert matching.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-w, \-\-word\-regexp
|
||||
Only show matches surrounded by word boundaries.
|
||||
This is equivalent to putting \\b before and after the search pattern.
|
||||
.RS
|
||||
.RE
|
||||
.SH LESS COMMON OPTIONS
|
||||
.TP
|
||||
.B \-A, \-\-after\-context \f[I]NUM\f[]
|
||||
Show NUM lines after each match.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-B, \-\-before\-context \f[I]NUM\f[]
|
||||
Show NUM lines before each match.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-C, \-\-context \f[I]NUM\f[]
|
||||
Show NUM lines before and after each match.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-column
|
||||
Show column numbers (1 based) in output.
|
||||
This only shows the column numbers for the first match on each line.
|
||||
Note that this doesn\[aq]t try to account for Unicode.
|
||||
One byte is equal to one column.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-context\-separator \f[I]ARG\f[]
|
||||
The string to use when separating non\-continuous context lines.
|
||||
Escape sequences may be used.
|
||||
[default: \-\-]
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-debug
|
||||
Show debug messages.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-files
|
||||
Print each file that would be searched (but don\[aq]t search).
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-H, \-\-with\-filename
|
||||
Prefix each match with the file name that contains it.
|
||||
This is the default when more than one file is searched.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-heading
|
||||
Show the file name above clusters of matches from each file.
|
||||
This is the default mode at a tty.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-no\-heading
|
||||
Don\[aq]t show any file name heading.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-hidden
|
||||
Search hidden directories and files.
|
||||
(Hidden directories and files are skipped by default.)
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-L, \-\-follow
|
||||
Follow symlinks.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-mmap
|
||||
Search using memory maps when possible.
|
||||
This is enabled by default when ripgrep thinks it will be faster.
|
||||
(Note that mmap searching doesn\[aq]t currently support the various
|
||||
context related options.)
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-no\-mmap
|
||||
Never use memory maps, even when they might be faster.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-no\-ignore
|
||||
Don\[aq]t respect ignore files (.gitignore, .rgignore, etc.) This
|
||||
implies \-\-no\-ignore\-parent.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-no\-ignore\-parent
|
||||
Don\[aq]t respect ignore files in parent directories.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-p, \-\-pretty
|
||||
Alias for \-\-color=always \-\-heading \-n.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-j, \-\-threads \f[I]ARG\f[]
|
||||
The number of threads to use.
|
||||
Defaults to the number of logical CPUs (capped at 6).
|
||||
[default: 0]
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-version
|
||||
Show the version number of ripgrep and exit.
|
||||
.RS
|
||||
.RE
|
||||
.SH FILE TYPE MANAGEMENT OPTIONS
|
||||
.TP
|
||||
.B \-\-type\-list
|
||||
Show all supported file types and their associated globs.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-type\-add \f[I]ARG\f[] ...
|
||||
Add a new glob for a particular file type.
|
||||
Example: \-\-type\-add html:\f[I]\&.html,\f[].htm
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-type\-clear \f[I]TYPE\f[] ...
|
||||
Clear the file type globs for TYPE.
|
||||
.RS
|
||||
.RE
|
166
doc/rg.1.md
Normal file
166
doc/rg.1.md
Normal file
@@ -0,0 +1,166 @@
|
||||
# NAME
|
||||
|
||||
rg - recursively search current directory for lines matching a pattern
|
||||
|
||||
# SYNOPSIS
|
||||
|
||||
rg [*options*] -e PATTERN ... [*<*path*> ...*]
|
||||
|
||||
rg [*options*] <*pattern*> [*<*path*> ...*]
|
||||
|
||||
rg [*options*] --files [*<*path*> ...*]
|
||||
|
||||
rg [*options*] --type-list
|
||||
|
||||
rg --help
|
||||
|
||||
rg --version
|
||||
|
||||
# DESCRIPTION
|
||||
|
||||
rg (ripgrep) combines the usability of The Silver Searcher (an ack clone) with
|
||||
the raw speed of grep.
|
||||
|
||||
# COMMON OPTIONS
|
||||
|
||||
-a, --text
|
||||
: Search binary files as if they were text.
|
||||
|
||||
-c, --count
|
||||
: Only show count of line matches for each file.
|
||||
|
||||
--color *WHEN*
|
||||
: Whether to use coloring in match. Valid values are never, always or auto.
|
||||
[default: auto]
|
||||
|
||||
-e, --regexp *PATTERN* ...
|
||||
: Use PATTERN to search. This option can be provided multiple times, where all
|
||||
patterns given are searched.
|
||||
|
||||
-F, --fixed-strings
|
||||
: Treat the pattern as a literal string instead of a regular expression.
|
||||
|
||||
-g, --glob *GLOB* ...
|
||||
: Include or exclude files for searching that match the given glob. This always
|
||||
overrides any other ignore logic. Multiple glob flags may be used. Globbing
|
||||
rules match .gitignore globs. Precede a glob with a '!' to exclude it.
|
||||
|
||||
-h, --help
|
||||
: Show this usage message.
|
||||
|
||||
-i, --ignore-case
|
||||
: Case insensitive search.
|
||||
|
||||
-n, --line-number
|
||||
: Show line numbers (1-based). This is enabled by default at a tty.
|
||||
|
||||
-N, --no-line-number
|
||||
: Suppress line numbers.
|
||||
|
||||
-q, --quiet
|
||||
: Do not print anything to stdout.
|
||||
|
||||
-r, --replace *ARG*
|
||||
: Replace every match with the string given. Capture group indices (e.g., $5)
|
||||
and names (e.g., $foo) are supported.
|
||||
|
||||
-t, --type *TYPE* ...
|
||||
: Only search files matching TYPE. Multiple type flags may be provided. Use the
|
||||
--type-list flag to list all available types.
|
||||
|
||||
-T, --type-not *TYPE* ...
|
||||
: Do not search files matching TYPE. Multiple not-type flags may be provided.
|
||||
|
||||
-u, --unrestricted ...
|
||||
: Reduce the level of 'smart' searching. A single -u doesn't respect .gitignore
|
||||
(etc.) files. Two -u flags will search hidden files and directories. Three
|
||||
-u flags will search binary files. -uu is equivalent to grep -r, and -uuu is
|
||||
equivalent to grep -a -r.
|
||||
|
||||
-v, --invert-match
|
||||
: Invert matching.
|
||||
|
||||
-w, --word-regexp
|
||||
: Only show matches surrounded by word boundaries. This is equivalent to
|
||||
putting \\b before and after the search pattern.
|
||||
|
||||
# LESS COMMON OPTIONS
|
||||
|
||||
-A, --after-context *NUM*
|
||||
: Show NUM lines after each match.
|
||||
|
||||
-B, --before-context *NUM*
|
||||
: Show NUM lines before each match.
|
||||
|
||||
-C, --context *NUM*
|
||||
: Show NUM lines before and after each match.
|
||||
|
||||
--column
|
||||
: Show column numbers (1 based) in output. This only shows the column
|
||||
numbers for the first match on each line. Note that this doesn't try
|
||||
to account for Unicode. One byte is equal to one column.
|
||||
|
||||
--context-separator *ARG*
|
||||
: The string to use when separating non-continuous context lines. Escape
|
||||
sequences may be used. [default: --]
|
||||
|
||||
--debug
|
||||
: Show debug messages.
|
||||
|
||||
--files
|
||||
: Print each file that would be searched (but don't search).
|
||||
|
||||
-H, --with-filename
|
||||
: Prefix each match with the file name that contains it. This is the
|
||||
default when more than one file is searched.
|
||||
|
||||
--heading
|
||||
: Show the file name above clusters of matches from each file.
|
||||
This is the default mode at a tty.
|
||||
|
||||
--no-heading
|
||||
: Don't show any file name heading.
|
||||
|
||||
--hidden
|
||||
: Search hidden directories and files. (Hidden directories and files are
|
||||
skipped by default.)
|
||||
|
||||
-L, --follow
|
||||
: Follow symlinks.
|
||||
|
||||
--mmap
|
||||
: Search using memory maps when possible. This is enabled by default
|
||||
when ripgrep thinks it will be faster. (Note that mmap searching
|
||||
doesn't currently support the various context related options.)
|
||||
|
||||
--no-mmap
|
||||
: Never use memory maps, even when they might be faster.
|
||||
|
||||
--no-ignore
|
||||
: Don't respect ignore files (.gitignore, .rgignore, etc.)
|
||||
This implies --no-ignore-parent.
|
||||
|
||||
--no-ignore-parent
|
||||
: Don't respect ignore files in parent directories.
|
||||
|
||||
-p, --pretty
|
||||
: Alias for --color=always --heading -n.
|
||||
|
||||
-j, --threads *ARG*
|
||||
: The number of threads to use. Defaults to the number of logical CPUs
|
||||
(capped at 6). [default: 0]
|
||||
|
||||
--version
|
||||
: Show the version number of ripgrep and exit.
|
||||
|
||||
# FILE TYPE MANAGEMENT OPTIONS
|
||||
|
||||
--type-list
|
||||
: Show all supported file types and their associated globs.
|
||||
|
||||
--type-add *ARG* ...
|
||||
: Add a new glob for a particular file type.
|
||||
Example: --type-add html:*.html,*.htm
|
||||
|
||||
--type-clear *TYPE* ...
|
||||
: Clear the file type globs for TYPE.
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep"
|
||||
version = "0.1.1" #:version
|
||||
version = "0.1.2" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Fast line oriented regex searching as a library.
|
||||
@@ -16,5 +16,5 @@ license = "Unlicense/MIT"
|
||||
log = "0.3"
|
||||
memchr = "0.1"
|
||||
memmap = "0.2"
|
||||
regex = "0.1.76"
|
||||
regex = "0.1.77"
|
||||
regex-syntax = "0.3.5"
|
||||
|
@@ -19,6 +19,7 @@ pub use search::{Grep, GrepBuilder, Iter, Match};
|
||||
mod literals;
|
||||
mod nonl;
|
||||
mod search;
|
||||
mod word_boundary;
|
||||
|
||||
/// Result is a convenient type alias that fixes the type of the error to
|
||||
/// the `Error` type defined in this crate.
|
||||
|
@@ -4,6 +4,8 @@ use syntax;
|
||||
|
||||
use literals::LiteralSets;
|
||||
use nonl;
|
||||
use syntax::Expr;
|
||||
use word_boundary::strip_unicode_word_boundaries;
|
||||
use Result;
|
||||
|
||||
/// A matched line.
|
||||
@@ -127,22 +129,35 @@ impl GrepBuilder {
|
||||
pub fn build(self) -> Result<Grep> {
|
||||
let expr = try!(self.parse());
|
||||
let literals = LiteralSets::create(&expr);
|
||||
let re = try!(
|
||||
RegexBuilder::new(&expr.to_string())
|
||||
.case_insensitive(self.opts.case_insensitive)
|
||||
.multi_line(true)
|
||||
.unicode(true)
|
||||
.size_limit(self.opts.size_limit)
|
||||
.dfa_size_limit(self.opts.dfa_size_limit)
|
||||
.compile()
|
||||
);
|
||||
let re = try!(self.regex(&expr));
|
||||
let required = literals.to_regex().or_else(|| {
|
||||
let expr = match strip_unicode_word_boundaries(&expr) {
|
||||
None => return None,
|
||||
Some(expr) => expr,
|
||||
};
|
||||
debug!("Stripped Unicode word boundaries. New AST:\n{:?}", expr);
|
||||
self.regex(&expr).ok()
|
||||
});
|
||||
Ok(Grep {
|
||||
re: re,
|
||||
required: literals.to_regex(),
|
||||
required: required,
|
||||
opts: self.opts,
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates a new regex from the given expression with the current
|
||||
/// configuration.
|
||||
fn regex(&self, expr: &Expr) -> Result<Regex> {
|
||||
RegexBuilder::new(&expr.to_string())
|
||||
.case_insensitive(self.opts.case_insensitive)
|
||||
.multi_line(true)
|
||||
.unicode(true)
|
||||
.size_limit(self.opts.size_limit)
|
||||
.dfa_size_limit(self.opts.dfa_size_limit)
|
||||
.compile()
|
||||
.map_err(From::from)
|
||||
}
|
||||
|
||||
/// Parses the underlying pattern and ensures the pattern can never match
|
||||
/// the line terminator.
|
||||
fn parse(&self) -> Result<syntax::Expr> {
|
||||
|
54
grep/src/word_boundary.rs
Normal file
54
grep/src/word_boundary.rs
Normal file
@@ -0,0 +1,54 @@
|
||||
use syntax::Expr;
|
||||
|
||||
/// Strips Unicode word boundaries from the given expression.
|
||||
///
|
||||
/// The key invariant this maintains is that the expression returned will match
|
||||
/// *at least* every where the expression given will match. Namely, a match of
|
||||
/// the returned expression can report false positives but it will never report
|
||||
/// false negatives.
|
||||
///
|
||||
/// If no word boundaries could be stripped, then None is returned.
|
||||
pub fn strip_unicode_word_boundaries(expr: &Expr) -> Option<Expr> {
|
||||
// The real reason we do this is because Unicode word boundaries are the
|
||||
// one thing that Rust's regex DFA engine can't handle. When it sees a
|
||||
// Unicode word boundary among non-ASCII text, it falls back to one of the
|
||||
// slower engines. We work around this limitation by attempting to use
|
||||
// a regex to find candidate matches without a Unicode word boundary. We'll
|
||||
// only then use the full (and slower) regex to confirm a candidate as a
|
||||
// match or not during search.
|
||||
use syntax::Expr::*;
|
||||
|
||||
match *expr {
|
||||
Concat(ref es) if !es.is_empty() => {
|
||||
let first = is_unicode_word_boundary(&es[0]);
|
||||
let last = is_unicode_word_boundary(es.last().unwrap());
|
||||
// Be careful not to strip word boundaries if there are no other
|
||||
// expressions to match.
|
||||
match (first, last) {
|
||||
(true, false) if es.len() > 1 => {
|
||||
Some(Concat(es[1..].to_vec()))
|
||||
}
|
||||
(false, true) if es.len() > 1 => {
|
||||
Some(Concat(es[..es.len() - 1].to_vec()))
|
||||
}
|
||||
(true, true) if es.len() > 2 => {
|
||||
Some(Concat(es[1..es.len() - 1].to_vec()))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the given expression is a Unicode word boundary.
|
||||
fn is_unicode_word_boundary(expr: &Expr) -> bool {
|
||||
use syntax::Expr::*;
|
||||
|
||||
match *expr {
|
||||
WordBoundary => true,
|
||||
NotWordBoundary => true,
|
||||
Group { ref e, .. } => is_unicode_word_boundary(e),
|
||||
_ => false,
|
||||
}
|
||||
}
|
35
pkg/archlinux/PKGBUILD
Normal file
35
pkg/archlinux/PKGBUILD
Normal file
@@ -0,0 +1,35 @@
|
||||
# Contributor: Andrew Gallant <jamslam@gmail.com>
|
||||
# Maintainer: Andrew Gallant
|
||||
pkgname=ripgrep
|
||||
pkgver=0.1.11
|
||||
pkgrel=1
|
||||
pkgdesc="A search tool that combines the usability of The Silver Searcher with the raw speed of grep."
|
||||
arch=('i686' 'x86_64')
|
||||
url="https://github.com/BurntSushi/ripgrep"
|
||||
license=('UNLICENSE')
|
||||
makedepends=('cargo')
|
||||
source=("https://github.com/BurntSushi/$pkgname/archive/$pkgver.tar.gz")
|
||||
sha256sums=('d29beb1a43a263d75ce4ef23a07253ed6ea306b14ffb5b37bc4972fb5d98238c')
|
||||
|
||||
build() {
|
||||
cd "$pkgname-$pkgver"
|
||||
if command -v rustup > /dev/null 2>&1; then
|
||||
RUSTFLAGS="-C target-cpu=native" rustup run nightly \
|
||||
cargo build --release --features simd-accel
|
||||
elif rustc --version | grep -q nightly; then
|
||||
RUSTFLAGS="-C target-cpu=native" \
|
||||
cargo build --release --features simd-accel
|
||||
else
|
||||
cargo build --release
|
||||
fi
|
||||
}
|
||||
|
||||
package() {
|
||||
cd "$pkgname-$pkgver"
|
||||
|
||||
install -Dm755 "target/release/rg" "$pkgdir/usr/bin/rg"
|
||||
install -Dm644 "README-NEW.md" "$pkgdir/usr/share/doc/ripgrep/README.md"
|
||||
install -Dm644 "COPYING" "$pkgdir/usr/share/doc/ripgrep/COPYING"
|
||||
install -Dm644 "LICENSE-MIT" "$pkgdir/usr/share/doc/ripgrep/LICENSE-MIT"
|
||||
install -Dm644 "UNLICENSE" "$pkgdir/usr/share/doc/ripgrep/UNLICENSE"
|
||||
}
|
18
pkg/brew/ripgrep.rb
Normal file
18
pkg/brew/ripgrep.rb
Normal file
@@ -0,0 +1,18 @@
|
||||
require 'formula'
|
||||
class Ripgrep < Formula
|
||||
version '0.1.8'
|
||||
desc "Search tool like grep and The Silver Searcher."
|
||||
homepage "https://github.com/BurntSushi/ripgrep"
|
||||
|
||||
if Hardware::CPU.is_64_bit?
|
||||
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz"
|
||||
sha256 "893e0e7fac88ebbef024829466fafef6eae5b1060273bbfca3806090e660b06b"
|
||||
else
|
||||
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-i686-apple-darwin.tar.gz"
|
||||
sha256 "2296c8081a2bfe28b43dea4326a9e8ce9c2821fd628a1ca366e824aceddc5fad"
|
||||
end
|
||||
|
||||
def install
|
||||
bin.install "rg"
|
||||
end
|
||||
end
|
23
src/args.rs
23
src/args.rs
@@ -74,6 +74,12 @@ Common options:
|
||||
to list all available types.
|
||||
-T, --type-not TYPE ... Do not search files matching TYPE. Multiple
|
||||
not-type flags may be provided.
|
||||
-u, --unrestricted ... Reduce the level of 'smart' searching. A
|
||||
single -u doesn't respect .gitignore (etc.)
|
||||
files. Two -u flags will search hidden files
|
||||
and directories. Three -u flags will search
|
||||
binary files. -uu is equivalent to grep -r,
|
||||
and -uuu is equivalent to grep -a -r.
|
||||
-v, --invert-match Invert matching.
|
||||
-w, --word-regexp Only show matches surrounded by word boundaries.
|
||||
This is equivalent to putting \\b before and
|
||||
@@ -125,7 +131,7 @@ Less common options:
|
||||
--mmap
|
||||
Search using memory maps when possible. This is enabled by default
|
||||
when ripgrep thinks it will be faster. (Note that mmap searching
|
||||
doesn't current support the various context related options.)
|
||||
doesn't currently support the various context related options.)
|
||||
|
||||
--no-mmap
|
||||
Never use memory maps, even when they might be faster.
|
||||
@@ -199,6 +205,7 @@ pub struct RawArgs {
|
||||
flag_type_list: bool,
|
||||
flag_type_add: Vec<String>,
|
||||
flag_type_clear: Vec<String>,
|
||||
flag_unrestricted: u32,
|
||||
flag_with_filename: bool,
|
||||
flag_word_regexp: bool,
|
||||
}
|
||||
@@ -266,6 +273,9 @@ impl RawArgs {
|
||||
false
|
||||
} else if self.flag_mmap {
|
||||
true
|
||||
} else if cfg!(windows) {
|
||||
// On Windows, memory maps appear faster than read calls. Neat.
|
||||
true
|
||||
} else {
|
||||
// If we're only searching a few paths and all of them are
|
||||
// files, then memory maps are probably faster.
|
||||
@@ -312,6 +322,9 @@ impl RawArgs {
|
||||
.line_terminator(eol)
|
||||
.build()
|
||||
);
|
||||
let no_ignore = self.flag_no_ignore || self.flag_unrestricted >= 1;
|
||||
let hidden = self.flag_hidden || self.flag_unrestricted >= 2;
|
||||
let text = self.flag_text || self.flag_unrestricted >= 3;
|
||||
let mut args = Args {
|
||||
pattern: pattern,
|
||||
paths: paths,
|
||||
@@ -327,18 +340,18 @@ impl RawArgs {
|
||||
glob_overrides: glob_overrides,
|
||||
grep: grep,
|
||||
heading: !self.flag_no_heading && self.flag_heading,
|
||||
hidden: self.flag_hidden,
|
||||
hidden: hidden,
|
||||
ignore_case: self.flag_ignore_case,
|
||||
invert_match: self.flag_invert_match,
|
||||
line_number: !self.flag_no_line_number && self.flag_line_number,
|
||||
mmap: mmap,
|
||||
no_ignore: self.flag_no_ignore,
|
||||
no_ignore: no_ignore,
|
||||
no_ignore_parent:
|
||||
// --no-ignore implies --no-ignore-parent
|
||||
self.flag_no_ignore_parent || self.flag_no_ignore,
|
||||
self.flag_no_ignore_parent || no_ignore,
|
||||
quiet: self.flag_quiet,
|
||||
replace: self.flag_replace.clone().map(|s| s.into_bytes()),
|
||||
text: self.flag_text,
|
||||
text: text,
|
||||
threads: threads,
|
||||
type_defs: btypes.definitions(),
|
||||
type_list: self.flag_type_list,
|
||||
|
10
src/main.rs
10
src/main.rs
@@ -118,15 +118,23 @@ fn run(args: Args) -> Result<u64> {
|
||||
}
|
||||
workq
|
||||
};
|
||||
let mut paths_searched: u64 = 0;
|
||||
for p in paths {
|
||||
if p == Path::new("-") {
|
||||
workq.push(Work::Stdin)
|
||||
paths_searched += 1;
|
||||
workq.push(Work::Stdin);
|
||||
} else {
|
||||
for ent in try!(args.walker(p)) {
|
||||
paths_searched += 1;
|
||||
workq.push(Work::File(ent));
|
||||
}
|
||||
}
|
||||
}
|
||||
if !paths.is_empty() && paths_searched == 0 {
|
||||
eprintln!("No files were searched, which means ripgrep probably \
|
||||
applied a filter you didn't expect. \
|
||||
Try running again with --debug.");
|
||||
}
|
||||
for _ in 0..workers.len() {
|
||||
workq.push(Work::Quit);
|
||||
}
|
||||
|
@@ -525,6 +525,50 @@ baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
}
|
||||
});
|
||||
|
||||
sherlock!(unrestricted1, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".gitignore", "sherlock\n");
|
||||
cmd.arg("-u");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(unrestricted2, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create(".sherlock", hay::SHERLOCK);
|
||||
cmd.arg("-uu");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
#[cfg(not(windows))]
|
||||
sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
|
||||
cmd.arg("-uuu");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "file:foo\nfile:foo\n");
|
||||
});
|
||||
|
||||
// On Windows, this test uses memory maps, so the NUL bytes don't get replaced.
|
||||
#[cfg(windows)]
|
||||
sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
|
||||
cmd.arg("-uuu");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "file:foo\x00bar\nfile:foo\x00baz\n");
|
||||
});
|
||||
|
||||
#[test]
|
||||
fn binary_nosearch() {
|
||||
let wd = WorkDir::new("binary_nosearch");
|
||||
|
Reference in New Issue
Block a user