mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-29 03:01:57 -07:00
Compare commits
289 Commits
ag/libripg
...
ag/moarci
Author | SHA1 | Date | |
---|---|---|---|
|
3853608eb6 | ||
|
4858267f3b | ||
|
5011dba2fd | ||
|
e14f9195e5 | ||
|
ef0e7af56a | ||
|
b266818aa5 | ||
|
81415ae52d | ||
|
5c4584aa7c | ||
|
0972c6e7c7 | ||
|
0a372bf2e4 | ||
|
345124a7fa | ||
|
31807f805a | ||
|
4de227fd9a | ||
|
d7ce274722 | ||
|
5b10328f41 | ||
|
813c676eca | ||
|
f625d72b6f | ||
|
3de31f7527 | ||
|
e402d6c260 | ||
|
48b5bdc441 | ||
|
709ca91f50 | ||
|
9c220f9a9b | ||
|
9085bed139 | ||
|
931ab35f76 | ||
|
b5e5979ff1 | ||
|
052c857da0 | ||
|
5e84e784c8 | ||
|
01e8e11621 | ||
|
9268ff8e8d | ||
|
c2cb0a4de4 | ||
|
adb9332f52 | ||
|
bc37c32717 | ||
|
08ae4da2b7 | ||
|
7ac95c1f50 | ||
|
7a6903bd4e | ||
|
9801fae29f | ||
|
abdf7140d7 | ||
|
b83e7968ef | ||
|
8ebc113847 | ||
|
785c1f1766 | ||
|
8b734cb490 | ||
|
b93762ea7a | ||
|
34677d2622 | ||
|
d1389db2e3 | ||
|
50bcb7409e | ||
|
7b9972c308 | ||
|
9f000c2910 | ||
|
392682d352 | ||
|
7d3f794588 | ||
|
290fd2a7b6 | ||
|
d1e4d28f30 | ||
|
5ce2d7351d | ||
|
9dcfd9a205 | ||
|
36b276c6d0 | ||
|
03bf37ff4a | ||
|
e7829c05d3 | ||
|
a6222939f9 | ||
|
6ffd434232 | ||
|
1f1cd9b467 | ||
|
973de50c9e | ||
|
5f8805a496 | ||
|
fdde2bcd38 | ||
|
7b3fe6b325 | ||
|
b3dd3ae203 | ||
|
f3083e4574 | ||
|
d03e30707e | ||
|
d7f57d9aab | ||
|
1a2a24ea74 | ||
|
d66610b295 | ||
|
019ae1989b | ||
|
36d3f235dc | ||
|
79018eb693 | ||
|
44cd344438 | ||
|
e493e54b9b | ||
|
8e8215aa65 | ||
|
3fe701498e | ||
|
e79085e9e4 | ||
|
764c197022 | ||
|
ef1611b5f5 | ||
|
45d12abbc5 | ||
|
5fde8391f9 | ||
|
3edb11c513 | ||
|
ed144be775 | ||
|
967e7ad0de | ||
|
9952ba2068 | ||
|
b751758d60 | ||
|
8f14cb18a5 | ||
|
da9d720431 | ||
|
a9d71a0368 | ||
|
f3646242cc | ||
|
601f212a0b | ||
|
5a565354f8 | ||
|
2a6532ae71 | ||
|
ece1f50cfe | ||
|
a7d26c8f14 | ||
|
bd222ae93f | ||
|
4359d8aac0 | ||
|
308819fb1f | ||
|
09108b7fda | ||
|
743d64f2e4 | ||
|
5962abc465 | ||
|
1604a18db3 | ||
|
9eeb0b01ce | ||
|
df4400209a | ||
|
77439f99a4 | ||
|
be7d6dd9ce | ||
|
9f15e3b671 | ||
|
254b8b67bb | ||
|
8a7f43b84d | ||
|
d968a27ed5 | ||
|
9b8f5cbaba | ||
|
c52da74ac3 | ||
|
7dcbff9a9b | ||
|
bef1f0e770 | ||
|
cd9815cb37 | ||
|
3f22c3a658 | ||
|
0913972104 | ||
|
f19b84fb23 | ||
|
59fc583aeb | ||
|
1c7c4e6640 | ||
|
69c5e3938d | ||
|
d9cf05ad50 | ||
|
af8b6caebb | ||
|
c84cfb6756 | ||
|
895e26a000 | ||
|
8c95290ff6 | ||
|
d6feeb7ff2 | ||
|
626ed00c19 | ||
|
332ad18401 | ||
|
fc3cf41247 | ||
|
a4868b8835 | ||
|
f99b991117 | ||
|
de0bc78982 | ||
|
147e96914c | ||
|
0abc40c23c | ||
|
f768796e4f | ||
|
da0c0c4705 | ||
|
05411b2b32 | ||
|
cc93db3b18 | ||
|
049354b766 | ||
|
386dd2806d | ||
|
5fe9a954e6 | ||
|
f158a42a71 | ||
|
5724391d39 | ||
|
0df71240ff | ||
|
f3164f2615 | ||
|
31d3e24130 | ||
|
bf842dbc7f | ||
|
6d5dba85bd | ||
|
afb89bcdad | ||
|
332dc56372 | ||
|
12a6ca45f9 | ||
|
9d703110cf | ||
|
e99b6bda0e | ||
|
276e2c9b9a | ||
|
9a9f54d44c | ||
|
47833b9ce7 | ||
|
44a9e37737 | ||
|
8fd05cacee | ||
|
4691d11034 | ||
|
519a6b68af | ||
|
9c940b45f4 | ||
|
0a167021c3 | ||
|
aeaa5fc1b1 | ||
|
7048a06c31 | ||
|
23be3cf850 | ||
|
b48bbf527d | ||
|
8eabe47b57 | ||
|
ff712bfd9d | ||
|
a7f2d48234 | ||
|
57500ad013 | ||
|
0b04553aff | ||
|
1ae121122f | ||
|
688003e51c | ||
|
718a00f6f2 | ||
|
7cbc535d70 | ||
|
7a6a40bae1 | ||
|
1e9ee2cc85 | ||
|
968491f8e9 | ||
|
63b0f31a22 | ||
|
7ecee299a5 | ||
|
dd396ff34e | ||
|
fb0a82f3c3 | ||
|
dbc8ca9cc1 | ||
|
c3db8db93d | ||
|
17ef4c40f3 | ||
|
a9e0477ea8 | ||
|
b3c5773266 | ||
|
118b950085 | ||
|
b45b2f58ea | ||
|
662a9bc73d | ||
|
401add0a99 | ||
|
f81b72721b | ||
|
1d4fccaadc | ||
|
09e464e674 | ||
|
31adff6f3c | ||
|
b41e596327 | ||
|
fb62266620 | ||
|
acf226c39d | ||
|
8299625e48 | ||
|
db256c87eb | ||
|
ba533f390e | ||
|
ba503eb677 | ||
|
f72c2dfd90 | ||
|
c0aa58b4f7 | ||
|
184ee4c328 | ||
|
e82fbf2c46 | ||
|
eb18da0450 | ||
|
0f7494216f | ||
|
442a278635 | ||
|
7ebed3ace6 | ||
|
8a7db1a918 | ||
|
ce80d794c0 | ||
|
c5d467a2ab | ||
|
a62cd553c2 | ||
|
ce5188335b | ||
|
b7a456ae83 | ||
|
d14f0b37d6 | ||
|
3ddc3c040f | ||
|
eeaa42ecaf | ||
|
3797a2a5cb | ||
|
0e2f8f7b47 | ||
|
3dd4b77dfb | ||
|
3b5cdea862 | ||
|
54b3e9eb10 | ||
|
56e8864426 | ||
|
b8f619d16e | ||
|
83dff33326 | ||
|
003c3695f4 | ||
|
10777c150d | ||
|
827179250b | ||
|
fd22cd520b | ||
|
241bc8f8fc | ||
|
b6e30124e0 | ||
|
4846d63539 | ||
|
13c47530a6 | ||
|
328f4369e6 | ||
|
04518e32e7 | ||
|
f2eaf5b977 | ||
|
3edeeca6e9 | ||
|
c41b353009 | ||
|
d18839f3dc | ||
|
8f978a3cf7 | ||
|
87b745454d | ||
|
e5bb750995 | ||
|
d599f0b3c7 | ||
|
40e310a9f9 | ||
|
510f15f4da | ||
|
f9ce7a84a8 | ||
|
1b6089674e | ||
|
05a0389555 | ||
|
16353bad6e | ||
|
fe442de091 | ||
|
1bb8b7170f | ||
|
55ed698a98 | ||
|
f1e025873f | ||
|
033ad2b8e4 | ||
|
098a8ee843 | ||
|
2f3dbf5fee | ||
|
5c80e4adb6 | ||
|
fcd1853031 | ||
|
74a89be641 | ||
|
5b1ce8bdc2 | ||
|
1529ce3341 | ||
|
95a4f15916 | ||
|
0eef05142a | ||
|
edd6eb4e06 | ||
|
7ac9782970 | ||
|
180054d7dc | ||
|
7eaaa04c69 | ||
|
87a627631c | ||
|
9df60e164e | ||
|
afa06c518a | ||
|
e46aeb34f8 | ||
|
d8f187e990 | ||
|
7d93d2ab05 | ||
|
9ca2d68e94 | ||
|
60b0e3ff80 | ||
|
3a1c081c13 | ||
|
d5c0b03030 | ||
|
eb184d7711 | ||
|
bb110c1ebe | ||
|
d9ca529356 | ||
|
0958837ee1 | ||
|
94be3bd4bb | ||
|
deb1de6e1e | ||
|
6afdf15d85 | ||
|
6cda7b24e9 | ||
|
ad9befbc1d |
99
.github/workflows/ci.yml
vendored
Normal file
99
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,99 @@
|
||||
# test
|
||||
name: ci
|
||||
on:
|
||||
pull_request:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
schedule:
|
||||
- cron: '00 01 * * *'
|
||||
jobs:
|
||||
test:
|
||||
name: test
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix:
|
||||
# The docs seem to suggest that we can have a matrix with just an
|
||||
# include directive, but it result in a "matrix must define at least
|
||||
# one vector" error in the CI system.
|
||||
build:
|
||||
- pinned-glibc
|
||||
- pinned-musl
|
||||
- stable
|
||||
- beta
|
||||
# We test musl with nightly because every once in a while, this will
|
||||
# catch an upstream regression.
|
||||
- nightly-glibc
|
||||
- nightly-musl
|
||||
- macos
|
||||
- win-msvc-32
|
||||
- win-msvc-64
|
||||
- win-gnu-32
|
||||
- win-gnu-64
|
||||
include:
|
||||
# - build: pinned-glibc
|
||||
# os: ubuntu-18.04
|
||||
# rust: 1.34.0
|
||||
# target: x86_64-unknown-linux-gnu
|
||||
- build: pinned-musl
|
||||
os: ubuntu-18.04
|
||||
rust: 1.34.0
|
||||
target: x86_64-unknown-linux-musl
|
||||
- build: stable
|
||||
os: ubuntu-18.04
|
||||
rust: stable
|
||||
target: x86_64-unknown-linux-gnu
|
||||
# - build: beta
|
||||
# os: ubuntu-18.04
|
||||
# rust: beta
|
||||
# target: x86_64-unknown-linux-gnu
|
||||
# - build: nightly-glibc
|
||||
# os: ubuntu-18.04
|
||||
# rust: nightly
|
||||
# target: x86_64-unknown-linux-gnu
|
||||
# - build: nightly-musl
|
||||
# os: ubuntu-18.04
|
||||
# rust: nightly
|
||||
# target: x86_64-unknown-linux-musl
|
||||
# - build: macos
|
||||
# os: macOS-10.14
|
||||
# rust: stable
|
||||
# target: x86_64-apple-darwin
|
||||
# - build: win-msvc-32
|
||||
# os: windows-2019
|
||||
# rust: stable
|
||||
# target: i686-pc-windows-msvc
|
||||
# - build: win-msvc-64
|
||||
# os: windows-2019
|
||||
# rust: stable
|
||||
# target: x86_64-pc-windows-msvc
|
||||
# - build: win-gnu-32
|
||||
# os: windows-2019
|
||||
# rust: stable-i686-gnu
|
||||
# target: i686-pc-windows-gnu
|
||||
# - build: win-gnu-64
|
||||
# os: windows-2019
|
||||
# rust: stable-x86_64-gnu
|
||||
# target: x86_64-pc-windows-gnu
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v1
|
||||
with:
|
||||
fetch-depth: 1
|
||||
- name: Install Rust
|
||||
uses: hecrj/setup-rust-action@v1
|
||||
with:
|
||||
rust-version: ${{ matrix.rust }}
|
||||
- name: Install Rust Target
|
||||
run: rustup target add ${{ matrix.target }}
|
||||
# - name: Install musl-gcc
|
||||
# if: contains(matrix.target, 'musl')
|
||||
# run: |
|
||||
# apt-get install musl-tools
|
||||
- name: Build everything
|
||||
run: cargo build --verbose --target ${{ matrix.target }} --all --features pcre2
|
||||
- name: Test zsh auto-completions
|
||||
if: matrix.build == 'stable'
|
||||
run: ./ci/test_complete.sh
|
||||
- name: Run tests
|
||||
run: cargo test --verbose --target ${{ matrix.target }} --all --features pcre2
|
12
.travis.yml
12
.travis.yml
@@ -1,4 +1,5 @@
|
||||
language: rust
|
||||
dist: xenial
|
||||
env:
|
||||
global:
|
||||
- PROJECT_NAME: ripgrep
|
||||
@@ -17,6 +18,8 @@ addons:
|
||||
# Needed for testing decompression search.
|
||||
- xz-utils
|
||||
- liblz4-tool
|
||||
# For building MUSL static builds on Linux.
|
||||
- musl-tools
|
||||
matrix:
|
||||
fast_finish: true
|
||||
include:
|
||||
@@ -60,13 +63,13 @@ matrix:
|
||||
# Minimum Rust supported channel. We enable these to make sure ripgrep
|
||||
# continues to work on the advertised minimum Rust version.
|
||||
- os: linux
|
||||
rust: 1.23.0
|
||||
rust: 1.34.0
|
||||
env: TARGET=x86_64-unknown-linux-gnu
|
||||
- os: linux
|
||||
rust: 1.23.0
|
||||
rust: 1.34.0
|
||||
env: TARGET=x86_64-unknown-linux-musl
|
||||
- os: linux
|
||||
rust: 1.23.0
|
||||
rust: 1.34.0
|
||||
env: TARGET=arm-unknown-linux-gnueabihf GCC_VERSION=4.8
|
||||
addons:
|
||||
apt:
|
||||
@@ -91,7 +94,7 @@ deploy:
|
||||
skip_cleanup: true
|
||||
on:
|
||||
condition: $TRAVIS_RUST_VERSION = nightly
|
||||
branch: master
|
||||
branch: master # i guess we do need this after all?
|
||||
tags: true
|
||||
api_key:
|
||||
secure: "IbSnsbGkxSydR/sozOf1/SRvHplzwRUHzcTjM7BKnr7GccL86gRPUrsrvD103KjQUGWIc1TnK1YTq5M0Onswg/ORDjqa1JEJPkPdPnVh9ipbF7M2De/7IlB4X4qXLKoApn8+bx2x/mfYXu4G+G1/2QdbaKK2yfXZKyjz0YFx+6CNrVCT2Nk8q7aHvOOzAL58vsG8iPDpupuhxlMDDn/UhyOWVInmPPQ0iJR1ZUJN8xJwXvKvBbfp3AhaBiAzkhXHNLgBR8QC5noWWMXnuVDMY3k4f3ic0V+p/qGUCN/nhptuceLxKFicMCYObSZeUzE5RAI0/OBW7l3z2iCoc+TbAnn+JrX/ObJCfzgAOXAU3tLaBFMiqQPGFKjKg1ltSYXomOFP/F7zALjpvFp4lYTBajRR+O3dqaxA9UQuRjw27vOeUpMcga4ZzL4VXFHzrxZKBHN//XIGjYAVhJ1NSSeGpeJV5/+jYzzWKfwSagRxQyVCzMooYFFXzn8Yxdm3PJlmp3GaAogNkdB9qKcrEvRINCelalzALPi0hD/HUDi8DD2PNTCLLMo6VSYtvc685Zbe+KgNzDV1YyTrRCUW6JotrS0r2ULLwnsh40hSB//nNv3XmwNmC/CmW5QAnIGj8cBMF4S2t6ohADIndojdAfNiptmaZOIT6owK7bWMgPMyopo="
|
||||
@@ -99,7 +102,6 @@ branches:
|
||||
only:
|
||||
# Pushes and PR to the master branch
|
||||
- master
|
||||
- ag/libripgrep
|
||||
# Ruby regex to match tags. Required, or travis won't trigger deploys when
|
||||
# a new tag is pushed.
|
||||
- /^\d+\.\d+\.\d+.*$/
|
||||
|
268
CHANGELOG.md
268
CHANGELOG.md
@@ -1,3 +1,269 @@
|
||||
TBD
|
||||
===
|
||||
TODO
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #1335](https://github.com/BurntSushi/ripgrep/issues/1335):
|
||||
Fixes a performance bug when searching plain text files with very long lines.
|
||||
|
||||
|
||||
11.0.2 (2019-08-01)
|
||||
===================
|
||||
ripgrep 11.0.2 is a new patch release that fixes a few bugs, including a
|
||||
performance regression and a matching bug when using the `-F/--fixed-strings`
|
||||
flag.
|
||||
|
||||
Feature enhancements:
|
||||
|
||||
* [FEATURE #1293](https://github.com/BurntSushi/ripgrep/issues/1293):
|
||||
Added `--glob-case-insensitive` flag that makes `--glob` behave as `--iglob`.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #1246](https://github.com/BurntSushi/ripgrep/issues/1246):
|
||||
Add translations to README, starting with an unofficial Chinese translation.
|
||||
* [BUG #1259](https://github.com/BurntSushi/ripgrep/issues/1259):
|
||||
Fix bug where the last byte of a `-f file` was stripped if it wasn't a `\n`.
|
||||
* [BUG #1261](https://github.com/BurntSushi/ripgrep/issues/1261):
|
||||
Document that no error is reported when searching for `\n` with `-P/--pcre2`.
|
||||
* [BUG #1284](https://github.com/BurntSushi/ripgrep/issues/1284):
|
||||
Mention `.ignore` and `.rgignore` more prominently in the README.
|
||||
* [BUG #1292](https://github.com/BurntSushi/ripgrep/issues/1292):
|
||||
Fix bug where `--with-filename` was sometimes enabled incorrectly.
|
||||
* [BUG #1268](https://github.com/BurntSushi/ripgrep/issues/1268):
|
||||
Fix major performance regression in GitHub `x86_64-linux` binary release.
|
||||
* [BUG #1302](https://github.com/BurntSushi/ripgrep/issues/1302):
|
||||
Show better error messages when a non-existent preprocessor command is given.
|
||||
* [BUG #1334](https://github.com/BurntSushi/ripgrep/issues/1334):
|
||||
Fix match regression with `-F` flag when patterns contain meta characters.
|
||||
|
||||
|
||||
11.0.1 (2019-04-16)
|
||||
===================
|
||||
ripgrep 11.0.1 is a new patch release that fixes a search regression introduced
|
||||
in the previous 11.0.0 release. In particular, ripgrep can enter an infinite
|
||||
loop for some search patterns when searching invalid UTF-8.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #1247](https://github.com/BurntSushi/ripgrep/issues/1247):
|
||||
Fix search bug that can cause ripgrep to enter an infinite loop.
|
||||
|
||||
|
||||
11.0.0 (2019-04-15)
|
||||
===================
|
||||
ripgrep 11 is a new major version release of ripgrep that contains many bug
|
||||
fixes, some performance improvements and a few feature enhancements. Notably,
|
||||
ripgrep's user experience for binary file filtering has been improved. See the
|
||||
[guide's new section on binary data](GUIDE.md#binary-data) for more details.
|
||||
|
||||
This release also marks a change in ripgrep's versioning. Where as the previous
|
||||
version was `0.10.0`, this version is `11.0.0`. Moving forward, ripgrep's
|
||||
major version will be increased a few times per year. ripgrep will continue to
|
||||
be conservative with respect to backwards compatibility, but may occasionally
|
||||
introduce breaking changes, which will always be documented in this CHANGELOG.
|
||||
See [issue 1172](https://github.com/BurntSushi/ripgrep/issues/1172) for a bit
|
||||
more detail on why this versioning change was made.
|
||||
|
||||
This release increases the **minimum supported Rust version** from 1.28.0 to
|
||||
1.34.0.
|
||||
|
||||
**BREAKING CHANGES**:
|
||||
|
||||
* ripgrep has tweaked its exit status codes to be more like GNU grep's. Namely,
|
||||
if a non-fatal error occurs during a search, then ripgrep will now always
|
||||
emit a `2` exit status code, regardless of whether a match is found or not.
|
||||
Previously, ripgrep would only emit a `2` exit status code for a catastrophic
|
||||
error (e.g., regex syntax error). One exception to this is if ripgrep is run
|
||||
with `-q/--quiet`. In that case, if an error occurs and a match is found,
|
||||
then ripgrep will exit with a `0` exit status code.
|
||||
* Supplying the `-u/--unrestricted` flag three times is now equivalent to
|
||||
supplying `--no-ignore --hidden --binary`. Previously, `-uuu` was equivalent
|
||||
to `--no-ignore --hidden --text`. The difference is that `--binary` disables
|
||||
binary file filtering without potentially dumping binary data into your
|
||||
terminal. That is, `rg -uuu foo` should now be equivalent to `grep -r foo`.
|
||||
* The `avx-accel` feature of ripgrep has been removed since it is no longer
|
||||
necessary. All uses of AVX in ripgrep are now enabled automatically via
|
||||
runtime CPU feature detection. The `simd-accel` feature does remain available
|
||||
(only for enabling SIMD for transcoding), however, it does increase
|
||||
compilation times substantially at the moment.
|
||||
|
||||
Performance improvements:
|
||||
|
||||
* [PERF #497](https://github.com/BurntSushi/ripgrep/issues/497),
|
||||
[PERF #838](https://github.com/BurntSushi/ripgrep/issues/838):
|
||||
Make `rg -F -f dictionary-of-literals` much faster.
|
||||
|
||||
Feature enhancements:
|
||||
|
||||
* Added or improved file type filtering for Apache Thrift, ASP, Bazel, Brotli,
|
||||
BuildStream, bzip2, C, C++, Cython, gzip, Java, Make, Postscript, QML, Tex,
|
||||
XML, xz, zig and zstd.
|
||||
* [FEATURE #855](https://github.com/BurntSushi/ripgrep/issues/855):
|
||||
Add `--binary` flag for disabling binary file filtering.
|
||||
* [FEATURE #1078](https://github.com/BurntSushi/ripgrep/pull/1078):
|
||||
Add `--max-columns-preview` flag for showing a preview of long lines.
|
||||
* [FEATURE #1099](https://github.com/BurntSushi/ripgrep/pull/1099):
|
||||
Add support for Brotli and Zstd to the `-z/--search-zip` flag.
|
||||
* [FEATURE #1138](https://github.com/BurntSushi/ripgrep/pull/1138):
|
||||
Add `--no-ignore-dot` flag for ignoring `.ignore` files.
|
||||
* [FEATURE #1155](https://github.com/BurntSushi/ripgrep/pull/1155):
|
||||
Add `--auto-hybrid-regex` flag for automatically falling back to PCRE2.
|
||||
* [FEATURE #1159](https://github.com/BurntSushi/ripgrep/pull/1159):
|
||||
ripgrep's exit status logic should now match GNU grep. See updated man page.
|
||||
* [FEATURE #1164](https://github.com/BurntSushi/ripgrep/pull/1164):
|
||||
Add `--ignore-file-case-insensitive` for case insensitive ignore globs.
|
||||
* [FEATURE #1185](https://github.com/BurntSushi/ripgrep/pull/1185):
|
||||
Add `-I` flag as a short option for the `--no-filename` flag.
|
||||
* [FEATURE #1207](https://github.com/BurntSushi/ripgrep/pull/1207):
|
||||
Add `none` value to `-E/--encoding` to forcefully disable all transcoding.
|
||||
* [FEATURE da9d7204](https://github.com/BurntSushi/ripgrep/commit/da9d7204):
|
||||
Add `--pcre2-version` for querying showing PCRE2 version information.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #306](https://github.com/BurntSushi/ripgrep/issues/306),
|
||||
[BUG #855](https://github.com/BurntSushi/ripgrep/issues/855):
|
||||
Improve the user experience for ripgrep's binary file filtering.
|
||||
* [BUG #373](https://github.com/BurntSushi/ripgrep/issues/373),
|
||||
[BUG #1098](https://github.com/BurntSushi/ripgrep/issues/1098):
|
||||
`**` is now accepted as valid syntax anywhere in a glob.
|
||||
* [BUG #916](https://github.com/BurntSushi/ripgrep/issues/916):
|
||||
ripgrep no longer hangs when searching `/proc` with a zombie process present.
|
||||
* [BUG #1052](https://github.com/BurntSushi/ripgrep/issues/1052):
|
||||
Fix bug where ripgrep could panic when transcoding UTF-16 files.
|
||||
* [BUG #1055](https://github.com/BurntSushi/ripgrep/issues/1055):
|
||||
Suggest `-U/--multiline` when a pattern contains a `\n`.
|
||||
* [BUG #1063](https://github.com/BurntSushi/ripgrep/issues/1063):
|
||||
Always strip a BOM if it's present, even for UTF-8.
|
||||
* [BUG #1064](https://github.com/BurntSushi/ripgrep/issues/1064):
|
||||
Fix inner literal detection that could lead to incorrect matches.
|
||||
* [BUG #1079](https://github.com/BurntSushi/ripgrep/issues/1079):
|
||||
Fixes a bug where the order of globs could result in missing a match.
|
||||
* [BUG #1089](https://github.com/BurntSushi/ripgrep/issues/1089):
|
||||
Fix another bug where ripgrep could panic when transcoding UTF-16 files.
|
||||
* [BUG #1091](https://github.com/BurntSushi/ripgrep/issues/1091):
|
||||
Add note about inverted flags to the man page.
|
||||
* [BUG #1093](https://github.com/BurntSushi/ripgrep/pull/1093):
|
||||
Fix handling of literal slashes in gitignore patterns.
|
||||
* [BUG #1095](https://github.com/BurntSushi/ripgrep/issues/1095):
|
||||
Fix corner cases involving the `--crlf` flag.
|
||||
* [BUG #1101](https://github.com/BurntSushi/ripgrep/issues/1101):
|
||||
Fix AsciiDoc escaping for man page output.
|
||||
* [BUG #1103](https://github.com/BurntSushi/ripgrep/issues/1103):
|
||||
Clarify what `--encoding auto` does.
|
||||
* [BUG #1106](https://github.com/BurntSushi/ripgrep/issues/1106):
|
||||
`--files-with-matches` and `--files-without-match` work with one file.
|
||||
* [BUG #1121](https://github.com/BurntSushi/ripgrep/issues/1121):
|
||||
Fix bug that was triggering Windows antimalware when using the `--files`
|
||||
flag.
|
||||
* [BUG #1125](https://github.com/BurntSushi/ripgrep/issues/1125),
|
||||
[BUG #1159](https://github.com/BurntSushi/ripgrep/issues/1159):
|
||||
ripgrep shouldn't panic for `rg -h | rg` and should emit correct exit status.
|
||||
* [BUG #1144](https://github.com/BurntSushi/ripgrep/issues/1144):
|
||||
Fixes a bug where line numbers could be wrong on big-endian machines.
|
||||
* [BUG #1154](https://github.com/BurntSushi/ripgrep/issues/1154):
|
||||
Windows files with "hidden" attribute are now treated as hidden.
|
||||
* [BUG #1173](https://github.com/BurntSushi/ripgrep/issues/1173):
|
||||
Fix handling of `**` patterns in gitignore files.
|
||||
* [BUG #1174](https://github.com/BurntSushi/ripgrep/issues/1174):
|
||||
Fix handling of repeated `**` patterns in gitignore files.
|
||||
* [BUG #1176](https://github.com/BurntSushi/ripgrep/issues/1176):
|
||||
Fix bug where `-F`/`-x` weren't applied to patterns given via `-f`.
|
||||
* [BUG #1189](https://github.com/BurntSushi/ripgrep/issues/1189):
|
||||
Document cases where ripgrep may use a lot of memory.
|
||||
* [BUG #1203](https://github.com/BurntSushi/ripgrep/issues/1203):
|
||||
Fix a matching bug related to the suffix literal optimization.
|
||||
* [BUG 8f14cb18](https://github.com/BurntSushi/ripgrep/commit/8f14cb18):
|
||||
Increase the default stack size for PCRE2's JIT.
|
||||
|
||||
|
||||
0.10.0 (2018-09-07)
|
||||
===================
|
||||
This is a new minor version release of ripgrep that contains some major new
|
||||
features, a huge number of bug fixes, and is the first release based on
|
||||
libripgrep. The entirety of ripgrep's core search and printing code has been
|
||||
rewritten and generalized so that anyone can make use of it.
|
||||
|
||||
Major new features include PCRE2 support, multi-line search and a JSON output
|
||||
format.
|
||||
|
||||
**BREAKING CHANGES**:
|
||||
|
||||
* The minimum version required to compile Rust has now changed to track the
|
||||
latest stable version of Rust. Patch releases will continue to compile with
|
||||
the same version of Rust as the previous patch release, but new minor
|
||||
versions will use the current stable version of the Rust compile as its
|
||||
minimum supported version.
|
||||
* The match semantics of `-w/--word-regexp` have changed slightly. They used
|
||||
to be `\b(?:<your pattern>)\b`, but now it's
|
||||
`(?:^|\W)(?:<your pattern>)(?:$|\W)`. This matches the behavior of GNU grep
|
||||
and is believed to be closer to the intended semantics of the flag. See
|
||||
[#389](https://github.com/BurntSushi/ripgrep/issues/389) for more details.
|
||||
|
||||
Feature enhancements:
|
||||
|
||||
* [FEATURE #162](https://github.com/BurntSushi/ripgrep/issues/162):
|
||||
libripgrep is now a thing. The primary crate is
|
||||
[`grep`](https://docs.rs/grep).
|
||||
* [FEATURE #176](https://github.com/BurntSushi/ripgrep/issues/176):
|
||||
Add `-U/--multiline` flag that permits matching over multiple lines.
|
||||
* [FEATURE #188](https://github.com/BurntSushi/ripgrep/issues/188):
|
||||
Add `-P/--pcre2` flag that gives support for look-around and backreferences.
|
||||
* [FEATURE #244](https://github.com/BurntSushi/ripgrep/issues/244):
|
||||
Add `--json` flag that prints results in a JSON Lines format.
|
||||
* [FEATURE #321](https://github.com/BurntSushi/ripgrep/issues/321):
|
||||
Add `--one-file-system` flag to skip directories on different file systems.
|
||||
* [FEATURE #404](https://github.com/BurntSushi/ripgrep/issues/404):
|
||||
Add `--sort` and `--sortr` flag for more sorting. Deprecate `--sort-files`.
|
||||
* [FEATURE #416](https://github.com/BurntSushi/ripgrep/issues/416):
|
||||
Add `--crlf` flag to permit `$` to work with carriage returns on Windows.
|
||||
* [FEATURE #917](https://github.com/BurntSushi/ripgrep/issues/917):
|
||||
The `--trim` flag strips prefix whitespace from all lines printed.
|
||||
* [FEATURE #993](https://github.com/BurntSushi/ripgrep/issues/993):
|
||||
Add `--null-data` flag, which makes ripgrep use NUL as a line terminator.
|
||||
* [FEATURE #997](https://github.com/BurntSushi/ripgrep/issues/997):
|
||||
The `--passthru` flag now works with the `--replace` flag.
|
||||
* [FEATURE #1038-1](https://github.com/BurntSushi/ripgrep/issues/1038):
|
||||
Add `--line-buffered` and `--block-buffered` for forcing a buffer strategy.
|
||||
* [FEATURE #1038-2](https://github.com/BurntSushi/ripgrep/issues/1038):
|
||||
Add `--pre-glob` for filtering files through the `--pre` flag.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #2](https://github.com/BurntSushi/ripgrep/issues/2):
|
||||
Searching with non-zero context can now use memory maps if appropriate.
|
||||
* [BUG #200](https://github.com/BurntSushi/ripgrep/issues/200):
|
||||
ripgrep will now stop correctly when its output pipe is closed.
|
||||
* [BUG #389](https://github.com/BurntSushi/ripgrep/issues/389):
|
||||
The `-w/--word-regexp` flag now works more intuitively.
|
||||
* [BUG #643](https://github.com/BurntSushi/ripgrep/issues/643):
|
||||
Detection of readable stdin has improved on Windows.
|
||||
* [BUG #441](https://github.com/BurntSushi/ripgrep/issues/441),
|
||||
[BUG #690](https://github.com/BurntSushi/ripgrep/issues/690),
|
||||
[BUG #980](https://github.com/BurntSushi/ripgrep/issues/980):
|
||||
Matching empty lines now works correctly in several corner cases.
|
||||
* [BUG #764](https://github.com/BurntSushi/ripgrep/issues/764):
|
||||
Color escape sequences now coalesce, which reduces output size.
|
||||
* [BUG #842](https://github.com/BurntSushi/ripgrep/issues/842):
|
||||
Add man page to binary Debian package.
|
||||
* [BUG #922](https://github.com/BurntSushi/ripgrep/issues/922):
|
||||
ripgrep is now more robust with respect to memory maps failing.
|
||||
* [BUG #937](https://github.com/BurntSushi/ripgrep/issues/937):
|
||||
Color escape sequences are no longer emitted for empty matches.
|
||||
* [BUG #940](https://github.com/BurntSushi/ripgrep/issues/940):
|
||||
Context from the `--passthru` flag should not impact process exit status.
|
||||
* [BUG #984](https://github.com/BurntSushi/ripgrep/issues/984):
|
||||
Fixes bug in `ignore` crate where first path was always treated as a symlink.
|
||||
* [BUG #990](https://github.com/BurntSushi/ripgrep/issues/990):
|
||||
Read stderr asynchronously when running a process.
|
||||
* [BUG #1013](https://github.com/BurntSushi/ripgrep/issues/1013):
|
||||
Add compile time and runtime CPU features to `--version` output.
|
||||
* [BUG #1028](https://github.com/BurntSushi/ripgrep/pull/1028):
|
||||
Don't complete bare pattern after `-f` in zsh.
|
||||
|
||||
|
||||
0.9.0 (2018-08-03)
|
||||
==================
|
||||
This is a new minor version release of ripgrep that contains some minor new
|
||||
@@ -31,7 +297,7 @@ multi-line search support and a JSON output format.
|
||||
|
||||
Feature enhancements:
|
||||
|
||||
* Added or improved file type filtering for Android, Bazel, Fuschia, Haskell,
|
||||
* Added or improved file type filtering for Android, Bazel, Fuchsia, Haskell,
|
||||
Java and Puppet.
|
||||
* [FEATURE #411](https://github.com/BurntSushi/ripgrep/issues/411):
|
||||
Add a `--stats` flag, which emits aggregate statistics after search results.
|
||||
|
629
Cargo.lock
generated
629
Cargo.lock
generated
@@ -1,99 +1,108 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.6.6"
|
||||
version = "0.7.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ansi_term"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.11"
|
||||
version = "0.2.13"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.9.2"
|
||||
version = "0.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.0.3"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "bytecount"
|
||||
version = "0.3.1"
|
||||
name = "bstr"
|
||||
version = "0.2.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"simd 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-automata 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytecount"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.2.3"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.41"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "0.1.4"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "2.32.0"
|
||||
version = "2.33.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-width 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam"
|
||||
version = "0.3.2"
|
||||
name = "crossbeam-channel"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dtoa"
|
||||
version = "0.4.3"
|
||||
name = "crossbeam-utils"
|
||||
version = "0.6.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.4"
|
||||
version = "0.8.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"simd 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"packed_simd 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs_io"
|
||||
version = "0.1.1"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -102,393 +111,387 @@ version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "fuchsia-zircon"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "fuchsia-zircon-sys"
|
||||
version = "0.3.3"
|
||||
name = "fs_extra"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.2.11"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "globset"
|
||||
version = "0.4.1"
|
||||
version = "0.4.4"
|
||||
dependencies = [
|
||||
"aho-corasick 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bstr 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep"
|
||||
version = "0.2.0"
|
||||
version = "0.2.4"
|
||||
dependencies = [
|
||||
"grep-matcher 0.0.1",
|
||||
"grep-printer 0.0.1",
|
||||
"grep-regex 0.0.1",
|
||||
"grep-searcher 0.0.1",
|
||||
"grep-cli 0.1.3",
|
||||
"grep-matcher 0.1.3",
|
||||
"grep-pcre2 0.1.3",
|
||||
"grep-printer 0.1.3",
|
||||
"grep-regex 0.1.5",
|
||||
"grep-searcher 0.1.6",
|
||||
"termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"walkdir 2.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-cli"
|
||||
version = "0.1.3"
|
||||
dependencies = [
|
||||
"atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bstr 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"globset 0.4.4",
|
||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"same-file 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-matcher"
|
||||
version = "0.0.1"
|
||||
version = "0.1.3"
|
||||
dependencies = [
|
||||
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-pcre2"
|
||||
version = "0.1.3"
|
||||
dependencies = [
|
||||
"grep-matcher 0.1.3",
|
||||
"pcre2 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-printer"
|
||||
version = "0.0.1"
|
||||
version = "0.1.3"
|
||||
dependencies = [
|
||||
"base64 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep-matcher 0.0.1",
|
||||
"grep-regex 0.0.1",
|
||||
"grep-searcher 0.0.1",
|
||||
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"termcolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bstr 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep-matcher 0.1.3",
|
||||
"grep-regex 0.1.5",
|
||||
"grep-searcher 0.1.6",
|
||||
"serde 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-regex"
|
||||
version = "0.0.1"
|
||||
version = "0.1.5"
|
||||
dependencies = [
|
||||
"grep-matcher 0.0.1",
|
||||
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep-matcher 0.1.3",
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-searcher"
|
||||
version = "0.0.1"
|
||||
version = "0.1.6"
|
||||
dependencies = [
|
||||
"bytecount 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs_io 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep-matcher 0.0.1",
|
||||
"grep-regex 0.0.1",
|
||||
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bstr 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bytecount 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs_io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep-matcher 0.1.3",
|
||||
"grep-regex 0.1.5",
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ignore"
|
||||
version = "0.4.3"
|
||||
version = "0.4.10"
|
||||
dependencies = [
|
||||
"crossbeam 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"globset 0.4.1",
|
||||
"lazy_static 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"walkdir 2.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"crossbeam-channel 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"globset 0.4.4",
|
||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"same-file 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"walkdir 2.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.2"
|
||||
version = "0.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "jemalloc-sys"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cc 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fs_extra 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jemallocator"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"jemalloc-sys 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "lazy_static"
|
||||
version = "1.0.2"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.42"
|
||||
version = "0.2.62"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.3"
|
||||
version = "0.4.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.0.1"
|
||||
version = "2.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memmap"
|
||||
version = "0.6.2"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.8.0"
|
||||
version = "1.10.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "0.4.9"
|
||||
name = "packed_simd"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pcre2"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"pcre2-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pcre2-sys"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"cc 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"pkg-config 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "0.6.3"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.1.40"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "redox_termios"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"proc-macro2 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.0.2"
|
||||
version = "1.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"aho-corasick 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.1.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.2"
|
||||
version = "0.6.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "remove_dir_all"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ripgrep"
|
||||
version = "0.9.0"
|
||||
version = "11.0.2"
|
||||
dependencies = [
|
||||
"atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"globset 0.4.1",
|
||||
"grep 0.2.0",
|
||||
"ignore 0.4.3",
|
||||
"lazy_static 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"termcolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bstr 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep 0.2.4",
|
||||
"ignore 0.4.10",
|
||||
"jemallocator 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num_cpus 1.10.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "safemem"
|
||||
version = "0.2.0"
|
||||
name = "ryu"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
version = "1.0.2"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.70"
|
||||
version = "1.0.99"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.70"
|
||||
version = "1.0.99"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 0.14.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"proc-macro2 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"syn 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.24"
|
||||
version = "1.0.40"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"dtoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"itoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "simd"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "strsim"
|
||||
version = "0.7.0"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "0.14.4"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"proc-macro2 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "tempdir"
|
||||
version = "0.3.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"proc-macro2 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.0.1"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"wincolor 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termion"
|
||||
version = "1.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"wincolor 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "textwrap"
|
||||
version = "0.10.0"
|
||||
version = "0.11.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unicode-width 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "0.3.5"
|
||||
version = "0.3.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"lazy_static 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ucd-util"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.5"
|
||||
version = "0.1.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "unreachable"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "utf8-ranges"
|
||||
version = "1.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "void"
|
||||
version = "1.0.2"
|
||||
version = "0.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.1.4"
|
||||
version = "2.2.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"same-file 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi"
|
||||
version = "0.3.5"
|
||||
version = "0.3.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@@ -500,6 +503,14 @@ name = "winapi-i686-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "winapi-util"
|
||||
version = "0.1.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "winapi-x86_64-pc-windows-gnu"
|
||||
version = "0.4.0"
|
||||
@@ -507,66 +518,64 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "wincolor"
|
||||
version = "1.0.0"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[metadata]
|
||||
"checksum aho-corasick 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c1c6d463cbe7ed28720b5b489e7c083eeb8f90d08be2a0d6bb9e1ffea9ce1afa"
|
||||
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
|
||||
"checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652"
|
||||
"checksum base64 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "85415d2594767338a74a30c1d370b2f3262ec1b4ed2d7bba5b3faf4de40467d9"
|
||||
"checksum bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d0c54bb8f454c567f21197eefcdbf5679d0bd99f2ddbe52e84c77061952e6789"
|
||||
"checksum bytecount 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "882585cd7ec84e902472df34a5e01891202db3bf62614e1f0afe459c1afcf744"
|
||||
"checksum byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "74c0b906e9446b0a2e4f760cdb3fa4b2c48cdc6db8766a845c54b6ff063fd2e9"
|
||||
"checksum cfg-if 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "efe5c877e17a9c717a0bf3613b2709f723202c4e4675cc8f12926ded29bcb17e"
|
||||
"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"
|
||||
"checksum crossbeam 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "24ce9782d4d5c53674646a6a4c1863a21a8fc0cb649b3c94dfc16e45071dea19"
|
||||
"checksum dtoa 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "6d301140eb411af13d3115f9a562c85cc6b541ade9dfa314132244aaee7489dd"
|
||||
"checksum encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)" = "88a1b66a0d28af4b03a8c8278c6dcb90e6e600d89c14500a9e7a02e64b9ee3ac"
|
||||
"checksum encoding_rs_io 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "ad0ffe753ba194ef1bc070e8d61edaadb1536c05e364fc9178ca6cbde10922c4"
|
||||
"checksum aho-corasick 0.7.6 (registry+https://github.com/rust-lang/crates.io-index)" = "58fb5e95d83b38284460a5fda7d6470aa0b8844d283a0b614b8535e880800d2d"
|
||||
"checksum atty 0.2.13 (registry+https://github.com/rust-lang/crates.io-index)" = "1803c647a3ec87095e7ae7acfca019e98de5ec9a7d01343f611cf3152ed71a90"
|
||||
"checksum base64 0.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0b25d992356d2eb0ed82172f5248873db5560c4721f564b13cb5193bda5e668e"
|
||||
"checksum bitflags 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "3d155346769a6855b86399e9bc3814ab343cd3d62c7e985113d46a0ec3c281fd"
|
||||
"checksum bstr 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "94cdf78eb7e94c566c1f5dbe2abf8fc70a548fc902942a48c4b3a98b48ca9ade"
|
||||
"checksum bytecount 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "be0fdd54b507df8f22012890aadd099979befdba27713c767993f8380112ca7c"
|
||||
"checksum byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c3dd8985a7111efc5c80b44e23ecdd8c007de8ade3b96595387e812b957cf5"
|
||||
"checksum cc 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)" = "8dae9c4b8fedcae85592ba623c4fd08cfdab3e3b72d6df780c6ead964a69bfff"
|
||||
"checksum cfg-if 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "b486ce3ccf7ffd79fdeb678eac06a9e6c09fc88d33836340becb8fffe87c5e33"
|
||||
"checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
|
||||
"checksum crossbeam-channel 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "c8ec7fcd21571dc78f96cc96243cab8d8f035247c3efd16c687be154c3fa9efa"
|
||||
"checksum crossbeam-utils 0.6.6 (registry+https://github.com/rust-lang/crates.io-index)" = "04973fa96e96579258a5091af6003abde64af786b860f18622b82e026cca60e6"
|
||||
"checksum encoding_rs 0.8.17 (registry+https://github.com/rust-lang/crates.io-index)" = "4155785c79f2f6701f185eb2e6b4caf0555ec03477cb4c70db67b465311620ed"
|
||||
"checksum encoding_rs_io 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "9619ee7a2bf4e777e020b95c1439abaf008f8ea8041b78a0552c4f1bcf4df32c"
|
||||
"checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3"
|
||||
"checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82"
|
||||
"checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
|
||||
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
|
||||
"checksum itoa 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5adb58558dcd1d786b5f0bd15f3226ee23486e24b7b58304b60f64dc68e62606"
|
||||
"checksum lazy_static 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "fb497c35d362b6a331cfd94956a07fc2c78a4604cdbee844a81170386b996dd3"
|
||||
"checksum libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)" = "b685088df2b950fccadf07a7187c8ef846a959c142338a48f9dc0b94517eb5f1"
|
||||
"checksum log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "61bd98ae7f7b754bc53dca7d44b604f733c6bba044ea6f41bc8d89272d8161d2"
|
||||
"checksum memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "796fba70e76612589ed2ce7f45282f5af869e0fdd7cc6199fa1aa1f1d591ba9d"
|
||||
"checksum memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff"
|
||||
"checksum num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c51a3322e4bca9d212ad9a158a02abc6934d005490c054a2778df73a70aa0a30"
|
||||
"checksum proc-macro2 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)" = "cccdc7557a98fe98453030f077df7f3a042052fae465bb61d2c2c41435cfd9b6"
|
||||
"checksum quote 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)" = "e44651a0dc4cdd99f71c83b561e221f714912d11af1a4dff0631f923d53af035"
|
||||
"checksum rand 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "eba5f8cb59cc50ed56be8880a5c7b496bfd9bd26394e176bc67884094145c2c5"
|
||||
"checksum redox_syscall 0.1.40 (registry+https://github.com/rust-lang/crates.io-index)" = "c214e91d3ecf43e9a4e41e578973adeb14b474f2bee858742d127af75a0112b1"
|
||||
"checksum redox_termios 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
|
||||
"checksum regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "5bbbea44c5490a1e84357ff28b7d518b4619a159fed5d25f6c1de2d19cc42814"
|
||||
"checksum regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "747ba3b235651f6e2f67dfa8bcdcd073ddb7c243cb21c442fc12395dfcac212d"
|
||||
"checksum remove_dir_all 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3488ba1b9a2084d38645c4c08276a1752dcbf2c7130d74f1569681ad5d2799c5"
|
||||
"checksum safemem 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e27a8b19b835f7aea908818e871f5cc3a5a186550c30773be987e155e8163d8f"
|
||||
"checksum same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "cfb6eded0b06a0b512c8ddbcf04089138c9b4362c2f696f3c3d76039d68f3637"
|
||||
"checksum serde 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)" = "0c3adf19c07af6d186d91dae8927b83b0553d07ca56cbf7f2f32560455c91920"
|
||||
"checksum serde_derive 1.0.70 (registry+https://github.com/rust-lang/crates.io-index)" = "3525a779832b08693031b8ecfb0de81cd71cfd3812088fafe9a7496789572124"
|
||||
"checksum serde_json 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)" = "c3c6908c7b925cd6c590358a4034de93dbddb20c45e1d021931459fd419bf0e2"
|
||||
"checksum simd 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "ed3686dd9418ebcc3a26a0c0ae56deab0681e53fe899af91f5bbcee667ebffb1"
|
||||
"checksum strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "bb4f380125926a99e52bc279241539c018323fab05ad6368b56f93d9369ff550"
|
||||
"checksum syn 0.14.4 (registry+https://github.com/rust-lang/crates.io-index)" = "2beff8ebc3658f07512a413866875adddd20f4fd47b2a4e6c9da65cd281baaea"
|
||||
"checksum tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)" = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8"
|
||||
"checksum termcolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "722426c4a0539da2c4ffd9b419d90ad540b4cff4a053be9069c908d4d07e2836"
|
||||
"checksum termion 1.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "689a3bdfaab439fd92bc87df5c4c78417d3cbe537487274e9b0b2dce76e92096"
|
||||
"checksum textwrap 0.10.0 (registry+https://github.com/rust-lang/crates.io-index)" = "307686869c93e71f94da64286f9a9524c0f308a9e1c87a583de8e9c9039ad3f6"
|
||||
"checksum thread_local 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279ef31c19ededf577bfd12dfae728040a21f635b06a24cd670ff510edd38963"
|
||||
"checksum ucd-util 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "fd2be2d6639d0f8fe6cdda291ad456e23629558d466e2789d2c3e9892bda285d"
|
||||
"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
|
||||
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
|
||||
"checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
|
||||
"checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122"
|
||||
"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
|
||||
"checksum walkdir 2.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "63636bd0eb3d00ccb8b9036381b526efac53caf112b7783b730ab3f8e44da369"
|
||||
"checksum winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "773ef9dcc5f24b7d850d0ff101e542ff24c3b090a9768e03ff889fdef41f00fd"
|
||||
"checksum fs_extra 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5f2a4a2034423744d2cc7ca2068453168dcdb82c438419e639a26bd87839c674"
|
||||
"checksum glob 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||
"checksum itoa 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "501266b7edd0174f8530248f87f99c88fbe60ca4ef3dd486835b8d8d53136f7f"
|
||||
"checksum jemalloc-sys 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "0d3b9f3f5c9b31aa0f5ed3260385ac205db665baa41d49bb8338008ae94ede45"
|
||||
"checksum jemallocator 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "43ae63fcfc45e99ab3d1b29a46782ad679e98436c3169d15a167a1108a724b69"
|
||||
"checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
"checksum libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)" = "34fcd2c08d2f832f376f4173a231990fa5aef4e99fb569867318a227ef4c06ba"
|
||||
"checksum log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)" = "14b6052be84e6b71ab17edffc2eeabf5c2c3ae1fdb464aae35ac50c67a44e1f7"
|
||||
"checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e"
|
||||
"checksum memmap 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
|
||||
"checksum num_cpus 1.10.1 (registry+https://github.com/rust-lang/crates.io-index)" = "bcef43580c035376c0705c42792c294b66974abbfd2789b511784023f71f3273"
|
||||
"checksum packed_simd 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a85ea9fc0d4ac0deb6fe7911d38786b32fc11119afd9e9d38b84ff691ce64220"
|
||||
"checksum pcre2 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "603da5e101220b9b306bf28e4f1f8703458ce2f64d2787b374e1a19494317180"
|
||||
"checksum pcre2-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "876c72d05059d23a84bd9fcdc3b1d31c50ea7fe00fe1522b4e68cd3608db8d5b"
|
||||
"checksum pkg-config 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "a7c1d2cfa5a714db3b5f24f0915e74fcdf91d09d496ba61329705dda7774d2af"
|
||||
"checksum proc-macro2 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4c5c2380ae88876faae57698be9e9775e3544decad214599c3a6266cca6ac802"
|
||||
"checksum quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "053a8c8bcc71fcce321828dc897a98ab9760bef03a4fc36693c231e5b3216cfe"
|
||||
"checksum regex 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88c3d9193984285d544df4a30c23a4e62ead42edf70a4452ceb76dac1ce05c26"
|
||||
"checksum regex-automata 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "92b73c2a1770c255c240eaa4ee600df1704a38dc3feaa6e949e7fcd4f8dc09f9"
|
||||
"checksum regex-syntax 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)" = "b143cceb2ca5e56d5671988ef8b15615733e7ee16cd348e064333b251b89343f"
|
||||
"checksum ryu 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c92464b447c0ee8c4fb3824ecc8383b81717b9f1e74ba2e72540aef7b9f82997"
|
||||
"checksum same-file 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "585e8ddcedc187886a30fa705c47985c3fa88d06624095856b36ca0b82ff4421"
|
||||
"checksum serde 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)" = "fec2851eb56d010dc9a21b89ca53ee75e6528bab60c11e89d38390904982da9f"
|
||||
"checksum serde_derive 1.0.99 (registry+https://github.com/rust-lang/crates.io-index)" = "cb4dc18c61206b08dc98216c98faa0232f4337e1e1b8574551d5bad29ea1b425"
|
||||
"checksum serde_json 1.0.40 (registry+https://github.com/rust-lang/crates.io-index)" = "051c49229f282f7c6f3813f8286cc1e3323e8051823fce42c7ea80fe13521704"
|
||||
"checksum strsim 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||
"checksum syn 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "66850e97125af79138385e9b88339cbcd037e3f28ceab8c5ad98e64f0f1f80bf"
|
||||
"checksum termcolor 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "96d6098003bde162e4277c70665bd87c326f5a0c3f3fbfb285787fa482d54e6e"
|
||||
"checksum textwrap 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
|
||||
"checksum thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c6b53e329000edc2b34dbe8545fd20e55a333362d0a321909685a19bd28c3f1b"
|
||||
"checksum unicode-width 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7007dbd421b92cc6e28410fe7362e2e0a2503394908f417b68ec8d1c364c4e20"
|
||||
"checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
|
||||
"checksum walkdir 2.2.9 (registry+https://github.com/rust-lang/crates.io-index)" = "9658c94fa8b940eab2250bd5a457f9c48b748420d71293b165c8cdbe2f55f71e"
|
||||
"checksum winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)" = "8093091eeb260906a183e6ae1abdba2ef5ef2257a21801128899c3fc699229c6"
|
||||
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
|
||||
"checksum winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7168bab6e1daee33b4557efd0e95d5ca70a03706d39fa5f3fe7a236f584b03c9"
|
||||
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
|
||||
"checksum wincolor 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b9dc3aa9dcda98b5a16150c54619c1ead22e3d3a5d458778ae914be760aa981a"
|
||||
"checksum wincolor 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "96f5016b18804d24db43cebf3c77269e7569b8954a8464501c216cc5e070eaa9"
|
||||
|
85
Cargo.toml
85
Cargo.toml
@@ -1,11 +1,11 @@
|
||||
[package]
|
||||
name = "ripgrep"
|
||||
version = "0.9.0" #:version
|
||||
version = "11.0.2" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
ripgrep is a line-oriented search tool that recursively searches your current
|
||||
directory for a regex pattern while respecting your gitignore rules. ripgrep
|
||||
has first class support on Windows, macOS and Linux
|
||||
has first class support on Windows, macOS and Linux.
|
||||
"""
|
||||
documentation = "https://github.com/BurntSushi/ripgrep"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
@@ -17,6 +17,7 @@ license = "Unlicense OR MIT"
|
||||
exclude = ["HomebrewFormula"]
|
||||
build = "build.rs"
|
||||
autotests = false
|
||||
edition = "2018"
|
||||
|
||||
[badges]
|
||||
travis-ci = { repository = "BurntSushi/ripgrep" }
|
||||
@@ -33,42 +34,78 @@ path = "tests/tests.rs"
|
||||
|
||||
[workspace]
|
||||
members = [
|
||||
"grep", "globset", "ignore",
|
||||
"grep-matcher", "grep-printer", "grep-regex", "grep-searcher",
|
||||
"globset",
|
||||
"grep",
|
||||
"grep-cli",
|
||||
"grep-matcher",
|
||||
"grep-pcre2",
|
||||
"grep-printer",
|
||||
"grep-regex",
|
||||
"grep-searcher",
|
||||
"ignore",
|
||||
]
|
||||
|
||||
[dependencies]
|
||||
atty = "0.2.11"
|
||||
globset = { version = "0.4.0", path = "globset" }
|
||||
grep = { version = "0.2.0", path = "grep" }
|
||||
ignore = { version = "0.4.0", path = "ignore" }
|
||||
lazy_static = "1"
|
||||
log = "0.4"
|
||||
num_cpus = "1"
|
||||
regex = "1"
|
||||
same-file = "1"
|
||||
termcolor = "1"
|
||||
bstr = "0.2.0"
|
||||
grep = { version = "0.2.4", path = "grep" }
|
||||
ignore = { version = "0.4.7", path = "ignore" }
|
||||
lazy_static = "1.1.0"
|
||||
log = "0.4.5"
|
||||
num_cpus = "1.8.0"
|
||||
regex = "1.0.5"
|
||||
serde_json = "1.0.23"
|
||||
termcolor = "1.0.3"
|
||||
|
||||
[dependencies.clap]
|
||||
version = "2.29.4"
|
||||
version = "2.32.0"
|
||||
default-features = false
|
||||
features = ["suggestions", "color"]
|
||||
features = ["suggestions"]
|
||||
|
||||
[target.'cfg(windows)'.dependencies.winapi]
|
||||
version = "0.3"
|
||||
features = ["std", "winnt"]
|
||||
[target.'cfg(all(target_env = "musl", target_pointer_width = "64"))'.dependencies.jemallocator]
|
||||
version = "0.3.0"
|
||||
|
||||
[build-dependencies]
|
||||
lazy_static = "1"
|
||||
lazy_static = "1.1.0"
|
||||
|
||||
[build-dependencies.clap]
|
||||
version = "2.29.4"
|
||||
version = "2.32.0"
|
||||
default-features = false
|
||||
features = ["suggestions", "color"]
|
||||
features = ["suggestions"]
|
||||
|
||||
[dev-dependencies]
|
||||
serde = "1.0.77"
|
||||
serde_derive = "1.0.77"
|
||||
|
||||
[features]
|
||||
avx-accel = ["grep/avx-accel"]
|
||||
simd-accel = ["grep/simd-accel"]
|
||||
pcre2 = ["grep/pcre2"]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
debug = 1
|
||||
|
||||
[package.metadata.deb]
|
||||
features = ["pcre2"]
|
||||
section = "utils"
|
||||
assets = [
|
||||
["target/release/rg", "usr/bin/", "755"],
|
||||
["COPYING", "usr/share/doc/ripgrep/", "644"],
|
||||
["LICENSE-MIT", "usr/share/doc/ripgrep/", "644"],
|
||||
["UNLICENSE", "usr/share/doc/ripgrep/", "644"],
|
||||
["CHANGELOG.md", "usr/share/doc/ripgrep/CHANGELOG", "644"],
|
||||
["README.md", "usr/share/doc/ripgrep/README", "644"],
|
||||
["FAQ.md", "usr/share/doc/ripgrep/FAQ", "644"],
|
||||
# The man page is automatically generated by ripgrep's build process, so
|
||||
# this file isn't actually commited. Instead, to create a dpkg, either
|
||||
# create a deployment/deb directory and copy the man page to it, or use the
|
||||
# 'ci/build_deb.sh' script.
|
||||
["deployment/deb/rg.1", "usr/share/man/man1/rg.1", "644"],
|
||||
# Similarly for shell completions.
|
||||
["deployment/deb/rg.bash", "usr/share/bash-completion/completions/rg", "644"],
|
||||
["deployment/deb/rg.fish", "usr/share/fish/completions/rg.fish", "644"],
|
||||
["deployment/deb/_rg", "usr/share/zsh/vendor-completions/", "644"],
|
||||
]
|
||||
extended-description = """\
|
||||
ripgrep (rg) recursively searches your current directory for a regex pattern.
|
||||
By default, ripgrep will respect your .gitignore and automatically skip hidden
|
||||
files/directories and binary files.
|
||||
"""
|
||||
|
2
Cross.toml
Normal file
2
Cross.toml
Normal file
@@ -0,0 +1,2 @@
|
||||
[target.x86_64-unknown-linux-musl]
|
||||
image = "burntsushi/x86_64-unknown-linux-musl:v0.1.14"
|
349
FAQ.md
349
FAQ.md
@@ -16,6 +16,7 @@
|
||||
* [How do I get around the regex size limit?](#size-limit)
|
||||
* [How do I make the `-f/--file` flag faster?](#dfa-size)
|
||||
* [How do I make the output look like The Silver Searcher's output?](#silver-searcher-output)
|
||||
* [Why does ripgrep get slower when I enabled PCRE2 regexes?](#pcre2-slow)
|
||||
* [When I run `rg`, why does it execute some other command?](#rg-other-cmd)
|
||||
* [How do I create an alias for ripgrep on Windows?](#rg-alias-windows)
|
||||
* [How do I create a PowerShell profile?](#powershell-profile)
|
||||
@@ -117,7 +118,7 @@ from run to run of ripgrep.
|
||||
The only way to make the order of results consistent is to ask ripgrep to
|
||||
sort the output. Currently, this will disable all parallelism. (On smaller
|
||||
repositories, you might not notice much of a performance difference!) You
|
||||
can achieve this with the `--sort-files` flag.
|
||||
can achieve this with the `--sort path` flag.
|
||||
|
||||
There is more discussion on this topic here:
|
||||
https://github.com/BurntSushi/ripgrep/issues/152
|
||||
@@ -135,10 +136,10 @@ How do I search compressed files?
|
||||
</h3>
|
||||
|
||||
ripgrep's `-z/--search-zip` flag will cause it to search compressed files
|
||||
automatically. Currently, this supports gzip, bzip2, lzma, lz4 and xz only and
|
||||
requires the corresponding `gzip`, `bzip2` and `xz` binaries to be installed on
|
||||
your system. (That is, ripgrep does decompression by shelling out to another
|
||||
process.)
|
||||
automatically. Currently, this supports gzip, bzip2, xz, lzma, lz4, Brotli and
|
||||
Zstd. Each of these requires requires the corresponding `gzip`, `bzip2`, `xz`,
|
||||
`lz4`, `brotli` and `zstd` binaries to be installed on your system. (That is,
|
||||
ripgrep does decompression by shelling out to another process.)
|
||||
|
||||
ripgrep currently does not search archive formats, so `*.tar.gz` files, for
|
||||
example, are skipped.
|
||||
@@ -148,22 +149,45 @@ example, are skipped.
|
||||
How do I search over multiple lines?
|
||||
</h3>
|
||||
|
||||
This isn't currently possible. ripgrep is fundamentally a line-oriented search
|
||||
tool. With that said,
|
||||
[multiline search is a planned opt-in feature](https://github.com/BurntSushi/ripgrep/issues/176).
|
||||
The `-U/--multiline` flag enables ripgrep to report results that span over
|
||||
multiple lines.
|
||||
|
||||
|
||||
<h3 name="fancy">
|
||||
How do I use lookaround and/or backreferences?
|
||||
</h3>
|
||||
|
||||
This isn't currently possible. ripgrep uses finite automata to implement
|
||||
regular expression search, and in turn, guarantees linear time searching on all
|
||||
inputs. It is difficult to efficiently support lookaround and backreferences in
|
||||
finite automata engines, so ripgrep does not provide these features.
|
||||
ripgrep's default regex engine does not support lookaround or backreferences.
|
||||
This is primarily because the default regex engine is implemented using finite
|
||||
state machines in order to guarantee a linear worst case time complexity on all
|
||||
inputs. Backreferences are not possible to implement in this paradigm, and
|
||||
lookaround appears difficult to do efficiently.
|
||||
|
||||
If a production quality regular expression engine with these features is ever
|
||||
written in Rust, then it is possible ripgrep will provide it as an opt-in
|
||||
However, ripgrep optionally supports using PCRE2 as the regex engine instead of
|
||||
the default one based on finite state machines. You can enable PCRE2 with the
|
||||
`-P/--pcre2` flag. For example, in the root of the ripgrep repo, you can easily
|
||||
find all palindromes:
|
||||
|
||||
```
|
||||
$ rg -P '(\w{10})\1'
|
||||
tests/misc.rs
|
||||
483: cmd.arg("--max-filesize").arg("44444444444444444444");
|
||||
globset/src/glob.rs
|
||||
1206: matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
|
||||
```
|
||||
|
||||
If your version of ripgrep doesn't support PCRE2, then you'll get an error
|
||||
message when you try to use the `-P/--pcre2` flag:
|
||||
|
||||
```
|
||||
$ rg -P '(\w{10})\1'
|
||||
PCRE2 is not available in this build of ripgrep
|
||||
```
|
||||
|
||||
Most of the releases distributed by the ripgrep project here on GitHub will
|
||||
come bundled with PCRE2 enabled. If you installed ripgrep through a different
|
||||
means (like your system's package manager), then please reach out to the
|
||||
maintainer of that package to see whether it's possible to enable the PCRE2
|
||||
feature.
|
||||
|
||||
|
||||
@@ -368,6 +392,301 @@ $ RIPGREP_CONFIG_PATH=$HOME/.config/ripgrep/rc rg foo
|
||||
```
|
||||
|
||||
|
||||
<h3 name="pcre2-slow">
|
||||
Why does ripgrep get slower when I enable PCRE2 regexes?
|
||||
</h3>
|
||||
|
||||
When you use the `--pcre2` (`-P` for short) flag, ripgrep will use the PCRE2
|
||||
regex engine instead of the default. Both regex engines are quite fast,
|
||||
but PCRE2 provides a number of additional features such as look-around and
|
||||
backreferences that many enjoy using. This is largely because PCRE2 uses
|
||||
a backtracking implementation where as the default regex engine uses a finite
|
||||
automaton based implementation. The former provides the ability to add lots of
|
||||
bells and whistles over the latter, but the latter executes with worst case
|
||||
linear time complexity.
|
||||
|
||||
With that out of the way, if you've used `-P` with ripgrep, you may have
|
||||
noticed that it can be slower. The reasons for why this is are quite complex,
|
||||
and they are complex because the optimizations that ripgrep uses to implement
|
||||
fast search are complex.
|
||||
|
||||
The task ripgrep has before it is somewhat simple; all it needs to do is search
|
||||
a file for occurrences of some pattern and then print the lines containing
|
||||
those occurrences. The problem lies in what is considered a valid match and how
|
||||
exactly we read the bytes from a file.
|
||||
|
||||
In terms of what is considered a valid match, remember that ripgrep will only
|
||||
report matches spanning a single line by default. The problem here is that
|
||||
some patterns can match across multiple lines, and ripgrep needs to prevent
|
||||
that from happening. For example, `foo\sbar` will match `foo\nbar`. The most
|
||||
obvious way to achieve this is to read the data from a file, and then apply
|
||||
the pattern search to that data for each line. The problem with this approach
|
||||
is that it can be quite slow; it would be much faster to let the pattern
|
||||
search across as much data as possible. It's faster because it gets rid of the
|
||||
overhead of finding the boundaries of every line, and also because it gets rid
|
||||
of the overhead of starting and stopping the pattern search for every single
|
||||
line. (This is operating under the general assumption that matching lines are
|
||||
much rarer than non-matching lines.)
|
||||
|
||||
It turns out that we can use the faster approach by applying a very simple
|
||||
restriction to the pattern: *statically prevent* the pattern from matching
|
||||
through a `\n` character. Namely, when given a pattern like `foo\sbar`,
|
||||
ripgrep will remove `\n` from the `\s` character class automatically. In some
|
||||
cases, a simple removal is not so easy. For example, ripgrep will return an
|
||||
error when your pattern includes a `\n` literal:
|
||||
|
||||
```
|
||||
$ rg '\n'
|
||||
the literal '"\n"' is not allowed in a regex
|
||||
```
|
||||
|
||||
So what does this have to do with PCRE2? Well, ripgrep's default regex engine
|
||||
exposes APIs for doing syntactic analysis on the pattern in a way that makes
|
||||
it quite easy to strip `\n` from the pattern (or otherwise detect it and report
|
||||
an error if stripping isn't possible). PCRE2 seemingly does not provide a
|
||||
similar API, so ripgrep does not do any stripping when PCRE2 is enabled. This
|
||||
forces ripgrep to use the "slow" search strategy of searching each line
|
||||
individually.
|
||||
|
||||
OK, so if enabling PCRE2 slows down the default method of searching because it
|
||||
forces matches to be limited to a single line, then why is PCRE2 also sometimes
|
||||
slower when performing multiline searches? Well, that's because there are
|
||||
*multiple* reasons why using PCRE2 in ripgrep can be slower than the default
|
||||
regex engine. This time, blame PCRE2's Unicode support, which ripgrep enables
|
||||
by default. In particular, PCRE2 cannot simultaneously enable Unicode support
|
||||
and search arbitrary data. That is, when PCRE2's Unicode support is enabled,
|
||||
the data **must** be valid UTF-8 (to do otherwise is to invoke undefined
|
||||
behavior). This is in contrast to ripgrep's default regex engine, which can
|
||||
enable Unicode support and still search arbitrary data. ripgrep's default
|
||||
regex engine simply won't match invalid UTF-8 for a pattern that can otherwise
|
||||
only match valid UTF-8. Why doesn't PCRE2 do the same? This author isn't
|
||||
familiar with its internals, so we can't comment on it here.
|
||||
|
||||
The bottom line here is that we can't enable PCRE2's Unicode support without
|
||||
simultaneously incurring a performance penalty for ensuring that we are
|
||||
searching valid UTF-8. In particular, ripgrep will transcode the contents
|
||||
of each file to UTF-8 while replacing invalid UTF-8 data with the Unicode
|
||||
replacement codepoint. ripgrep then disables PCRE2's own internal UTF-8
|
||||
checking, since we've guaranteed the data we hand it will be valid UTF-8. The
|
||||
reason why ripgrep takes this approach is because if we do hand PCRE2 invalid
|
||||
UTF-8, then it will report a match error if it comes across an invalid UTF-8
|
||||
sequence. This is not good news for ripgrep, since it will stop it from
|
||||
searching the rest of the file, and will also print potentially undesirable
|
||||
error messages to users.
|
||||
|
||||
All right, the above is a lot of information to swallow if you aren't already
|
||||
familiar with ripgrep internals. Let's make this concrete with some examples.
|
||||
First, let's get some data big enough to magnify the performance differences:
|
||||
|
||||
```
|
||||
$ curl -O 'https://burntsushi.net/stuff/subtitles2016-sample.gz'
|
||||
$ gzip -d subtitles2016-sample
|
||||
$ md5sum subtitles2016-sample
|
||||
e3cb796a20bbc602fbfd6bb43bda45f5 subtitles2016-sample
|
||||
```
|
||||
|
||||
To search this data, we will use the pattern `^\w{42}$`, which contains exactly
|
||||
one hit in the file and has no literals. Having no literals is important,
|
||||
because it ensures that the regex engine won't use literal optimizations to
|
||||
speed up the search. In other words, it lets us reason coherently about the
|
||||
actual task that the regex engine is performing.
|
||||
|
||||
Let's now walk through a few examples in light of the information above. First,
|
||||
let's consider the default search using ripgrep's default regex engine and
|
||||
then the same search with PCRE2:
|
||||
|
||||
```
|
||||
$ time rg '^\w{42}$' subtitles2016-sample
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m1.783s
|
||||
user 0m1.731s
|
||||
sys 0m0.051s
|
||||
|
||||
$ time rg -P '^\w{42}$' subtitles2016-sample
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m2.458s
|
||||
user 0m2.419s
|
||||
sys 0m0.038s
|
||||
```
|
||||
|
||||
In this particular example, both pattern searches are using a Unicode aware
|
||||
`\w` character class and both are counting lines in order to report line
|
||||
numbers. The key difference here is that the first search will not search
|
||||
line by line, but the second one will. We can observe which strategy ripgrep
|
||||
uses by passing the `--trace` flag:
|
||||
|
||||
```
|
||||
$ rg '^\w{42}$' subtitles2016-sample --trace
|
||||
[... snip ...]
|
||||
TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:622: Some("subtitles2016-sample"): searching via memory map
|
||||
TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:712: slice reader: searching via slice-by-line strategy
|
||||
TRACE|grep_searcher::searcher::core|grep-searcher/src/searcher/core.rs:61: searcher core: will use fast line searcher
|
||||
[... snip ...]
|
||||
|
||||
$ rg -P '^\w{42}$' subtitles2016-sample --trace
|
||||
[... snip ...]
|
||||
TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:622: Some("subtitles2016-sample"): searching via memory map
|
||||
TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:705: slice reader: needs transcoding, using generic reader
|
||||
TRACE|grep_searcher::searcher|grep-searcher/src/searcher/mod.rs:685: generic reader: searching via roll buffer strategy
|
||||
TRACE|grep_searcher::searcher::core|grep-searcher/src/searcher/core.rs:63: searcher core: will use slow line searcher
|
||||
[... snip ...]
|
||||
```
|
||||
|
||||
The first says it is using the "fast line searcher" where as the latter says
|
||||
it is using the "slow line searcher." The latter also shows that we are
|
||||
decoding the contents of the file, which also impacts performance.
|
||||
|
||||
Interestingly, in this case, the pattern does not match a `\n` and the file
|
||||
we're searching is valid UTF-8, so neither the slow line-by-line search
|
||||
strategy nor the decoding are necessary. We could fix the former issue with
|
||||
better PCRE2 introspection APIs. We can actually fix the latter issue with
|
||||
ripgrep's `--no-encoding` flag, which prevents the automatic UTF-8 decoding,
|
||||
but will enable PCRE2's own UTF-8 validity checking. Unfortunately, it's slower
|
||||
in my build of ripgrep:
|
||||
|
||||
```
|
||||
$ time rg -P '^\w{42}$' subtitles2016-sample --no-encoding
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m3.074s
|
||||
user 0m3.021s
|
||||
sys 0m0.051s
|
||||
```
|
||||
|
||||
(Tip: use the `--trace` flag to verify that no decoding in ripgrep is
|
||||
happening.)
|
||||
|
||||
A possible reason why PCRE2's UTF-8 checking is slower is because it might
|
||||
not be better than the highly optimized UTF-8 checking routines found in the
|
||||
[`encoding_rs`](https://github.com/hsivonen/encoding_rs) library, which is what
|
||||
ripgrep uses for UTF-8 decoding. Moreover, my build of ripgrep enables
|
||||
`encoding_rs`'s SIMD optimizations, which may be in play here.
|
||||
|
||||
Also, note that using the `--no-encoding` flag can cause PCRE2 to report
|
||||
invalid UTF-8 errors, which causes ripgrep to stop searching the file:
|
||||
|
||||
```
|
||||
$ cat invalid-utf8
|
||||
foobar
|
||||
|
||||
$ xxd invalid-utf8
|
||||
00000000: 666f 6fff 6261 720a foo.bar.
|
||||
|
||||
$ rg foo invalid-utf8
|
||||
1:foobar
|
||||
|
||||
$ rg -P foo invalid-utf8
|
||||
1:foo<6F>bar
|
||||
|
||||
$ rg -P foo invalid-utf8 --no-encoding
|
||||
invalid-utf8: PCRE2: error matching: UTF-8 error: illegal byte (0xfe or 0xff)
|
||||
```
|
||||
|
||||
All right, so at this point, you might think that we could remove the penalty
|
||||
for line-by-line searching by enabling multiline search. After all, our
|
||||
particular pattern can't match across multiple lines anyway, so we'll still get
|
||||
the results we want. Let's try it:
|
||||
|
||||
```
|
||||
$ time rg -U '^\w{42}$' subtitles2016-sample
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m1.803s
|
||||
user 0m1.748s
|
||||
sys 0m0.054s
|
||||
|
||||
$ time rg -P -U '^\w{42}$' subtitles2016-sample
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m2.962s
|
||||
user 0m2.246s
|
||||
sys 0m0.713s
|
||||
```
|
||||
|
||||
Search times remain the same with the default regex engine, but the PCRE2
|
||||
search gets _slower_. What happened? The secrets can be revealed with the
|
||||
`--trace` flag once again. In the former case, ripgrep actually detects that
|
||||
the pattern can't match across multiple lines, and so will fall back to the
|
||||
"fast line search" strategy as with our search without `-U`.
|
||||
|
||||
However, for PCRE2, things are much worse. Namely, since Unicode mode is still
|
||||
enabled, ripgrep is still going to decode UTF-8 to ensure that it hands only
|
||||
valid UTF-8 to PCRE2. Unfortunately, one key downside of multiline search is
|
||||
that ripgrep cannot do it incrementally. Since matches can be arbitrarily long,
|
||||
ripgrep actually needs the entire file in memory at once. Normally, we can use
|
||||
a memory map for this, but because we need to UTF-8 decode the file before
|
||||
searching it, ripgrep winds up reading the entire contents of the file on to
|
||||
the heap before executing a search. Owch.
|
||||
|
||||
OK, so Unicode is killing us here. The file we're searching is _mostly_ ASCII,
|
||||
so maybe we're OK with missing some data. (Try `rg '[\w--\p{ascii}]'` to see
|
||||
non-ASCII word characters that an ASCII-only `\w` character class would miss.)
|
||||
We can disable Unicode in both searches, but this is done differently depending
|
||||
on the regex engine we use:
|
||||
|
||||
```
|
||||
$ time rg '(?-u)^\w{42}$' subtitles2016-sample
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m1.714s
|
||||
user 0m1.669s
|
||||
sys 0m0.044s
|
||||
|
||||
$ time rg -P '^\w{42}$' subtitles2016-sample --no-pcre2-unicode
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m1.997s
|
||||
user 0m1.958s
|
||||
sys 0m0.037s
|
||||
```
|
||||
|
||||
For the most part, ripgrep's default regex engine performs about the same.
|
||||
PCRE2 does improve a little bit, and is now almost as fast as the default
|
||||
regex engine. If you look at the output of `--trace`, you'll see that ripgrep
|
||||
will no longer perform UTF-8 decoding, but it does still use the slow
|
||||
line-by-line searcher.
|
||||
|
||||
At this point, we can combine all of our insights above: let's try to get off
|
||||
of the slow line-by-line searcher by enabling multiline mode, and let's stop
|
||||
UTF-8 decoding by disabling Unicode support:
|
||||
|
||||
```
|
||||
$ time rg -U '(?-u)^\w{42}$' subtitles2016-sample
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m1.714s
|
||||
user 0m1.655s
|
||||
sys 0m0.058s
|
||||
|
||||
$ time rg -P -U '^\w{42}$' subtitles2016-sample --no-pcre2-unicode
|
||||
21225780:EverymajordevelopmentinthehistoryofAmerica
|
||||
|
||||
real 0m1.121s
|
||||
user 0m1.071s
|
||||
sys 0m0.048s
|
||||
```
|
||||
|
||||
Ah, there's PCRE2's JIT shining! ripgrep's default regex engine once again
|
||||
remains about the same, but PCRE2 no longer needs to search line-by-line and it
|
||||
no longer needs to do any kind of UTF-8 checks. This allows the file to get
|
||||
memory mapped and passed right through PCRE2's JIT at impressive speeds. (As
|
||||
a brief and interesting historical note, the configuration of "memory map +
|
||||
multiline + no-Unicode" is exactly the configuration used by The Silver
|
||||
Searcher. This analysis perhaps sheds some reasoning as to why that
|
||||
configuration is useful!)
|
||||
|
||||
In summary, if you want PCRE2 to go as fast as possible and you don't care
|
||||
about Unicode and you don't care about matches possibly spanning across
|
||||
multiple lines, then enable multiline mode with `-U` and disable PCRE2's
|
||||
Unicode support with the `--no-pcre2-unicode` flag.
|
||||
|
||||
Caveat emptor: This author is not a PCRE2 expert, so there may be APIs that can
|
||||
improve performance that the author missed. Similarly, there may be alternative
|
||||
designs for a searching tool that are more amenable to how PCRE2 works.
|
||||
|
||||
|
||||
<h3 name="rg-other-cmd">
|
||||
When I run <code>rg</code>, why does it execute some other command?
|
||||
</h3>
|
||||
@@ -616,7 +935,7 @@ for the previous section apply.
|
||||
|
||||
* Are you writing portable shell scripts intended to work in a variety of
|
||||
environments? Great, probably not a good idea to use ripgrep! ripgrep is has
|
||||
nowhere near the ubquity of grep, so if you do use ripgrep, you might need
|
||||
nowhere near the ubiquity of grep, so if you do use ripgrep, you might need
|
||||
to futz with the installation process more than you would with grep.
|
||||
* Do you care about POSIX compatibility? If so, then you can't use ripgrep
|
||||
because it never was, isn't and never will be POSIX compatible.
|
||||
|
132
GUIDE.md
132
GUIDE.md
@@ -18,6 +18,7 @@ translatable to any command line shell environment.
|
||||
* [Replacements](#replacements)
|
||||
* [Configuration file](#configuration-file)
|
||||
* [File encoding](#file-encoding)
|
||||
* [Binary data](#binary-data)
|
||||
* [Common options](#common-options)
|
||||
|
||||
|
||||
@@ -109,7 +110,7 @@ colors, you'll notice that `faster` will be highlighted instead of just the
|
||||
|
||||
It is beyond the scope of this guide to provide a full tutorial on regular
|
||||
expressions, but ripgrep's specific syntax is documented here:
|
||||
https://docs.rs/regex/0.2.5/regex/#syntax
|
||||
https://docs.rs/regex/*/regex/#syntax
|
||||
|
||||
|
||||
### Recursive search
|
||||
@@ -227,7 +228,7 @@ with the following contents:
|
||||
```
|
||||
|
||||
ripgrep treats `.ignore` files with higher precedence than `.gitignore` files
|
||||
(and treats `.rgignore` files with higher precdence than `.ignore` files).
|
||||
(and treats `.rgignore` files with higher precedence than `.ignore` files).
|
||||
This means ripgrep will see the `!log/` whitelist rule first and search that
|
||||
directory.
|
||||
|
||||
@@ -235,6 +236,11 @@ Like `.gitignore`, a `.ignore` file can be placed in any directory. Its rules
|
||||
will be processed with respect to the directory it resides in, just like
|
||||
`.gitignore`.
|
||||
|
||||
To process `.gitignore` and `.ignore` files case insensitively, use the flag
|
||||
`--ignore-file-case-insensitive`. This is especially useful on case insensitive
|
||||
file systems like those on Windows and macOS. Note though that this can come
|
||||
with a significant performance penalty, and is therefore disabled by default.
|
||||
|
||||
For a more in depth description of how glob patterns in a `.gitignore` file
|
||||
are interpreted, please see `man gitignore`.
|
||||
|
||||
@@ -520,9 +526,9 @@ config file. Once the environment variable is set, open the file and just type
|
||||
in the flags you want set automatically. There are only two rules for
|
||||
describing the format of the config file:
|
||||
|
||||
1. Every line is a shell argument, after trimming ASCII whitespace.
|
||||
2. Lines starting with `#` (optionally preceded by any amount of
|
||||
ASCII whitespace) are ignored.
|
||||
1. Every line is a shell argument, after trimming whitespace.
|
||||
2. Lines starting with `#` (optionally preceded by any amount of whitespace)
|
||||
are ignored.
|
||||
|
||||
In particular, there is no escaping. Each line is given to ripgrep as a single
|
||||
command line argument verbatim.
|
||||
@@ -532,8 +538,9 @@ formatting peculiarities:
|
||||
|
||||
```
|
||||
$ cat $HOME/.ripgreprc
|
||||
# Don't let ripgrep vomit really long lines to my terminal.
|
||||
# Don't let ripgrep vomit really long lines to my terminal, and show a preview.
|
||||
--max-columns=150
|
||||
--max-columns-preview
|
||||
|
||||
# Add my 'web' type.
|
||||
--type-add
|
||||
@@ -580,7 +587,7 @@ override it.
|
||||
|
||||
If you're confused about what configuration file ripgrep is reading arguments
|
||||
from, then running ripgrep with the `--debug` flag should help clarify things.
|
||||
The debug output should note what config file is being loaded and the arugments
|
||||
The debug output should note what config file is being loaded and the arguments
|
||||
that have been read from the configuration.
|
||||
|
||||
Finally, if you want to make absolutely sure that ripgrep *isn't* reading a
|
||||
@@ -598,13 +605,14 @@ topic, but we can try to summarize its relevancy to ripgrep:
|
||||
* Files are generally just a bundle of bytes. There is no reliable way to know
|
||||
their encoding.
|
||||
* Either the encoding of the pattern must match the encoding of the files being
|
||||
searched, or a form of transcoding must be performed converts either the
|
||||
searched, or a form of transcoding must be performed that converts either the
|
||||
pattern or the file to the same encoding as the other.
|
||||
* ripgrep tends to work best on plain text files, and among plain text files,
|
||||
the most popular encodings likely consist of ASCII, latin1 or UTF-8. As
|
||||
a special exception, UTF-16 is prevalent in Windows environments
|
||||
|
||||
In light of the above, here is how ripgrep behaves:
|
||||
In light of the above, here is how ripgrep behaves when `--encoding auto` is
|
||||
given, which is the default:
|
||||
|
||||
* All input is assumed to be ASCII compatible (which means every byte that
|
||||
corresponds to an ASCII codepoint actually is an ASCII codepoint). This
|
||||
@@ -620,12 +628,15 @@ In light of the above, here is how ripgrep behaves:
|
||||
they correspond to a UTF-16 BOM, then ripgrep will transcode the contents of
|
||||
the file from UTF-16 to UTF-8, and then execute the search on the transcoded
|
||||
version of the file. (This incurs a performance penalty since transcoding
|
||||
is slower than regex searching.)
|
||||
is slower than regex searching.) If the file contains invalid UTF-16, then
|
||||
the Unicode replacement codepoint is substituted in place of invalid code
|
||||
units.
|
||||
* To handle other cases, ripgrep provides a `-E/--encoding` flag, which permits
|
||||
you to specify an encoding from the
|
||||
[Encoding Standard](https://encoding.spec.whatwg.org/#concept-encoding-get).
|
||||
ripgrep will assume *all* files searched are the encoding specified and
|
||||
will perform a transcoding step just like in the UTF-16 case described above.
|
||||
ripgrep will assume *all* files searched are the encoding specified (unless
|
||||
the file has a BOM) and will perform a transcoding step just like in the
|
||||
UTF-16 case described above.
|
||||
|
||||
By default, ripgrep will not require its input be valid UTF-8. That is, ripgrep
|
||||
can and will search arbitrary bytes. The key here is that if you're searching
|
||||
@@ -635,9 +646,26 @@ pattern won't find anything. With all that said, this mode of operation is
|
||||
important, because it lets you find ASCII or UTF-8 *within* files that are
|
||||
otherwise arbitrary bytes.
|
||||
|
||||
As a special case, the `-E/--encoding` flag supports the value `none`, which
|
||||
will completely disable all encoding related logic, including BOM sniffing.
|
||||
When `-E/--encoding` is set to `none`, ripgrep will search the raw bytes of
|
||||
the underlying file with no transcoding step. For example, here's how you might
|
||||
search the raw UTF-16 encoding of the string `Шерлок`:
|
||||
|
||||
```
|
||||
$ rg '(?-u)\(\x045\x04@\x04;\x04>\x04:\x04' -E none -a some-utf16-file
|
||||
```
|
||||
|
||||
Of course, that's just an example meant to show how one can drop down into
|
||||
raw bytes. Namely, the simpler command works as you might expect automatically:
|
||||
|
||||
```
|
||||
$ rg 'Шерлок' some-utf16-file
|
||||
```
|
||||
|
||||
Finally, it is possible to disable ripgrep's Unicode support from within the
|
||||
pattern regular expression. For example, let's say you wanted `.` to match any
|
||||
byte rather than any Unicode codepoint. (You might want this while searching a
|
||||
regular expression. For example, let's say you wanted `.` to match any byte
|
||||
rather than any Unicode codepoint. (You might want this while searching a
|
||||
binary file, since `.` by default will not match invalid UTF-8.) You could do
|
||||
this by disabling Unicode via a regular expression flag:
|
||||
|
||||
@@ -654,6 +682,76 @@ $ rg '\w(?-u:\w)\w'
|
||||
```
|
||||
|
||||
|
||||
### Binary data
|
||||
|
||||
In addition to skipping hidden files and files in your `.gitignore` by default,
|
||||
ripgrep also attempts to skip binary files. ripgrep does this by default
|
||||
because binary files (like PDFs or images) are typically not things you want to
|
||||
search when searching for regex matches. Moreover, if content in a binary file
|
||||
did match, then it's possible for undesirable binary data to be printed to your
|
||||
terminal and wreak havoc.
|
||||
|
||||
Unfortunately, unlike skipping hidden files and respecting your `.gitignore`
|
||||
rules, a file cannot as easily be classified as binary. In order to figure out
|
||||
whether a file is binary, the most effective heuristic that balances
|
||||
correctness with performance is to simply look for `NUL` bytes. At that point,
|
||||
the determination is simple: a file is considered "binary" if and only if it
|
||||
contains a `NUL` byte somewhere in its contents.
|
||||
|
||||
The issue is that while most binary files will have a `NUL` byte toward the
|
||||
beginning of its contents, this is not necessarily true. The `NUL` byte might
|
||||
be the very last byte in a large file, but that file is still considered
|
||||
binary. While this leads to a fair amount of complexity inside ripgrep's
|
||||
implementation, it also results in some unintuitive user experiences.
|
||||
|
||||
At a high level, ripgrep operates in three different modes with respect to
|
||||
binary files:
|
||||
|
||||
1. The default mode is to attempt to remove binary files from a search
|
||||
completely. This is meant to mirror how ripgrep removes hidden files and
|
||||
files in your `.gitignore` automatically. That is, as soon as a file is
|
||||
detected as binary, searching stops. If a match was already printed (because
|
||||
it was detected long before a `NUL` byte), then ripgrep will print a warning
|
||||
message indicating that the search stopped prematurely. This default mode
|
||||
**only applies to files searched by ripgrep as a result of recursive
|
||||
directory traversal**, which is consistent with ripgrep's other automatic
|
||||
filtering. For example, `rg foo .file` will search `.file` even though it
|
||||
is hidden. Similarly, `rg foo binary-file` search `binary-file` in "binary"
|
||||
mode automatically.
|
||||
2. Binary mode is similar to the default mode, except it will not always
|
||||
stop searching after it sees a `NUL` byte. Namely, in this mode, ripgrep
|
||||
will continue searching a file that is known to be binary until the first
|
||||
of two conditions is met: 1) the end of the file has been reached or 2) a
|
||||
match is or has been seen. This means that in binary mode, if ripgrep
|
||||
reports no matches, then there are no matches in the file. When a match does
|
||||
occur, ripgrep prints a message similar to one it prints when in its default
|
||||
mode indicating that the search has stopped prematurely. This mode can be
|
||||
forcefully enabled for all files with the `--binary` flag. The purpose of
|
||||
binary mode is to provide a way to discover matches in all files, but to
|
||||
avoid having binary data dumped into your terminal.
|
||||
3. Text mode completely disables all binary detection and searches all files
|
||||
as if they were text. This is useful when searching a file that is
|
||||
predominantly text but contains a `NUL` byte, or if you are specifically
|
||||
trying to search binary data. This mode can be enabled with the `-a/--text`
|
||||
flag. Note that when using this mode on very large binary files, it is
|
||||
possible for ripgrep to use a lot of memory.
|
||||
|
||||
Unfortunately, there is one additional complexity in ripgrep that can make it
|
||||
difficult to reason about binary files. That is, the way binary detection works
|
||||
depends on the way that ripgrep searches your files. Specifically:
|
||||
|
||||
* When ripgrep uses memory maps, then binary detection is only performed on the
|
||||
first few kilobytes of the file in addition to every matching line.
|
||||
* When ripgrep doesn't use memory maps, then binary detection is performed on
|
||||
all bytes searched.
|
||||
|
||||
This means that whether a file is detected as binary or not can change based
|
||||
on the internal search strategy used by ripgrep. If you prefer to keep
|
||||
ripgrep's binary file detection consistent, then you can disable memory maps
|
||||
via the `--no-mmap` flag. (The cost will be a small performance regression when
|
||||
searching very large files on some platforms.)
|
||||
|
||||
|
||||
### Common options
|
||||
|
||||
ripgrep has a lot of flags. Too many to keep in your head at once. This section
|
||||
@@ -675,10 +773,10 @@ used options that will likely impact how you use ripgrep on a regular basis.
|
||||
* `--files`: Print the files that ripgrep *would* search, but don't actually
|
||||
search them.
|
||||
* `-a/--text`: Search binary files as if they were plain text.
|
||||
* `-z/--search-zip`: Search compressed files (gzip, bzip2, lzma, xz). This is
|
||||
disabled by default.
|
||||
* `-z/--search-zip`: Search compressed files (gzip, bzip2, lzma, xz, lz4,
|
||||
brotli, zstd). This is disabled by default.
|
||||
* `-C/--context`: Show the lines surrounding a match.
|
||||
* `--sort-files`: Force ripgrep to sort its output by file name. (This disables
|
||||
* `--sort path`: Force ripgrep to sort its output by file name. (This disables
|
||||
parallelism, so it might be slower.)
|
||||
* `-L/--follow`: Follow symbolic links while recursively searching.
|
||||
* `-M/--max-columns`: Limit the length of lines printed by ripgrep.
|
||||
|
215
README.md
215
README.md
@@ -1,15 +1,17 @@
|
||||
ripgrep (rg)
|
||||
------------
|
||||
ripgrep is a line-oriented search tool that recursively searches your current
|
||||
directory for a regex pattern while respecting your gitignore rules. ripgrep
|
||||
directory for a regex pattern. By default, ripgrep will respect your .gitignore
|
||||
and automatically skip hidden files/directories and binary files. ripgrep
|
||||
has first class support on Windows, macOS and Linux, with binary downloads
|
||||
available for [every release](https://github.com/BurntSushi/ripgrep/releases).
|
||||
ripgrep is similar to other popular search tools like The Silver Searcher,
|
||||
ack and grep.
|
||||
ripgrep is similar to other popular search tools like The Silver Searcher, ack
|
||||
and grep.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/ripgrep)
|
||||
[](https://repology.org/project/ripgrep/badges)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
@@ -23,10 +25,11 @@ Please see the [CHANGELOG](CHANGELOG.md) for a release history.
|
||||
* [Installation](#installation)
|
||||
* [User Guide](GUIDE.md)
|
||||
* [Frequently Asked Questions](FAQ.md)
|
||||
* [Regex syntax](https://docs.rs/regex/0.2.5/regex/#syntax)
|
||||
* [Regex syntax](https://docs.rs/regex/1/regex/#syntax)
|
||||
* [Configuration files](GUIDE.md#configuration-file)
|
||||
* [Shell completions](FAQ.md#complete)
|
||||
* [Building](#building)
|
||||
* [Translations](#translations)
|
||||
|
||||
|
||||
### Screenshot of search results
|
||||
@@ -85,14 +88,16 @@ increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep.
|
||||
|
||||
### Why should I use ripgrep?
|
||||
|
||||
* It can replace many use cases served by both The Silver Searcher and GNU grep
|
||||
because it is generally faster than both. (See [the FAQ](FAQ.md#posix4ever)
|
||||
for more details on whether ripgrep can truly replace grep.)
|
||||
* Like The Silver Searcher, ripgrep defaults to recursive directory search
|
||||
and won't search files ignored by your `.gitignore` files. It also ignores
|
||||
hidden and binary files by default. ripgrep also implements full support
|
||||
for `.gitignore`, whereas there are many bugs related to that functionality
|
||||
in The Silver Searcher.
|
||||
* It can replace many use cases served by other search tools
|
||||
because it contains most of their features and is generally faster. (See
|
||||
[the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly
|
||||
replace grep.)
|
||||
* Like other tools specialized to code search, ripgrep defaults to recursive
|
||||
directory search and won't search files ignored by your
|
||||
`.gitignore`/`.ignore`/`.rgignore` files. It also ignores hidden and binary
|
||||
files by default. ripgrep also implements full support for `.gitignore`,
|
||||
whereas there are many bugs related to that functionality in other code
|
||||
search tools claiming to provide the same functionality.
|
||||
* ripgrep can search specific types of files. For example, `rg -tpy foo`
|
||||
limits your search to Python files and `rg -Tjs foo` excludes Javascript
|
||||
files from your search. ripgrep can be taught about new file types with
|
||||
@@ -101,38 +106,45 @@ increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep.
|
||||
of search results, searching multiple patterns, highlighting matches with
|
||||
color and full Unicode support. Unlike GNU grep, ripgrep stays fast while
|
||||
supporting Unicode (which is always on).
|
||||
* ripgrep has optional support for switching its regex engine to use PCRE2.
|
||||
Among other things, this makes it possible to use look-around and
|
||||
backreferences in your patterns, which are not supported in ripgrep's default
|
||||
regex engine. PCRE2 support can be enabled with `-P/--pcre2` (use PCRE2
|
||||
always) or `--auto-hybrid-regex` (use PCRE2 only if needed).
|
||||
* ripgrep supports searching files in text encodings other than UTF-8, such
|
||||
as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for
|
||||
automatically detecting UTF-16 is provided. Other text encodings must be
|
||||
specifically specified with the `-E/--encoding` flag.)
|
||||
* ripgrep supports searching files compressed in a common format (gzip, xz,
|
||||
lzma, bzip2 or lz4) with the `-z/--search-zip` flag.
|
||||
* ripgrep supports searching files compressed in a common format (brotli,
|
||||
bzip2, gzip, lz4, lzma, xz, or zstandard) with the `-z/--search-zip` flag.
|
||||
* ripgrep supports arbitrary input preprocessing filters which could be PDF
|
||||
text extraction, less supported decompression, decrypting, automatic encoding
|
||||
detection and so on.
|
||||
|
||||
In other words, use ripgrep if you like speed, filtering by default, fewer
|
||||
bugs, and Unicode support.
|
||||
bugs and Unicode support.
|
||||
|
||||
|
||||
### Why shouldn't I use ripgrep?
|
||||
|
||||
I'd like to try to convince you why you *shouldn't* use ripgrep. This should
|
||||
give you a glimpse at some important downsides or missing features of
|
||||
ripgrep.
|
||||
Despite initially not wanting to add every feature under the sun to ripgrep,
|
||||
over time, ripgrep has grown support for most features found in other file
|
||||
searching tools. This includes searching for results spanning across multiple
|
||||
lines, and opt-in support for PCRE2, which provides look-around and
|
||||
backreference support.
|
||||
|
||||
* ripgrep uses a regex engine based on finite automata, so if you want fancy
|
||||
regex features such as backreferences or lookaround, ripgrep won't provide
|
||||
them to you. ripgrep does support lots of things though, including, but not
|
||||
limited to: lazy quantification (e.g., `a+?`), repetitions (e.g., `a{2,5}`),
|
||||
begin/end assertions (e.g., `^\w+$`), word boundaries (e.g., `\bfoo\b`), and
|
||||
support for Unicode categories (e.g., `\p{Sc}` to match currency symbols or
|
||||
`\p{Lu}` to match any uppercase letter). (Fancier regexes will never be
|
||||
supported.)
|
||||
* ripgrep doesn't have multiline search. (Will happen as an opt-in feature.)
|
||||
At this point, the primary reasons not to use ripgrep probably consist of one
|
||||
or more of the following:
|
||||
|
||||
In other words, if you like fancy regexes or multiline search, then ripgrep
|
||||
may not quite meet your needs (yet).
|
||||
* You need a portable and ubiquitous tool. While ripgrep works on Windows,
|
||||
macOS and Linux, it is not ubiquitous and it does not conform to any
|
||||
standard such as POSIX. The best tool for this job is good old grep.
|
||||
* There still exists some other feature (or bug) not listed in this README that
|
||||
you rely on that's in another tool that isn't in ripgrep.
|
||||
* There is a performance edge case where ripgrep doesn't do well where another
|
||||
tool does do well. (Please file a bug report!)
|
||||
* ripgrep isn't possible to install on your machine or isn't available for your
|
||||
platform. (Please file a bug report!)
|
||||
|
||||
|
||||
### Is it really faster than everything else?
|
||||
@@ -145,7 +157,8 @@ Summarizing, ripgrep is fast because:
|
||||
* It is built on top of
|
||||
[Rust's regex engine](https://github.com/rust-lang-nursery/regex).
|
||||
Rust's regex engine uses finite automata, SIMD and aggressive literal
|
||||
optimizations to make searching very fast.
|
||||
optimizations to make searching very fast. (PCRE2 support can be opted into
|
||||
with the `-P/--pcre2` flag.)
|
||||
* Rust's regex library maintains performance with full Unicode support by
|
||||
building UTF-8 decoding directly into its deterministic finite automaton
|
||||
engine.
|
||||
@@ -154,7 +167,7 @@ Summarizing, ripgrep is fast because:
|
||||
latter is better for large directories. ripgrep chooses the best searching
|
||||
strategy for you automatically.
|
||||
* Applies your ignore patterns in `.gitignore` files using a
|
||||
[`RegexSet`](https://docs.rs/regex/1.0.0/regex/struct.RegexSet.html).
|
||||
[`RegexSet`](https://docs.rs/regex/1/regex/struct.RegexSet.html).
|
||||
That means a single file path can be matched against multiple glob patterns
|
||||
simultaneously.
|
||||
* It uses a lock-free parallel recursive directory iterator, courtesy of
|
||||
@@ -168,6 +181,11 @@ Andy Lester, author of [ack](https://beyondgrep.com/), has published an
|
||||
excellent table comparing the features of ack, ag, git-grep, GNU grep and
|
||||
ripgrep: https://beyondgrep.com/feature-comparison/
|
||||
|
||||
Note that ripgrep has grown a few significant new features recently that
|
||||
are not yet present in Andy's table. This includes, but is not limited to,
|
||||
configuration files, passthru, support for searching compressed files,
|
||||
multiline search and opt-in fancy regex support via PCRE2.
|
||||
|
||||
|
||||
### Installation
|
||||
|
||||
@@ -192,14 +210,6 @@ from homebrew-core, (compiled with rust stable, no SIMD):
|
||||
$ brew install ripgrep
|
||||
```
|
||||
|
||||
or you can install a binary compiled with rust nightly (including SIMD and all
|
||||
optimizations) by utilizing a custom tap:
|
||||
|
||||
```
|
||||
$ brew tap burntsushi/ripgrep https://github.com/BurntSushi/ripgrep.git
|
||||
$ brew install ripgrep-bin
|
||||
```
|
||||
|
||||
If you're a **MacPorts** user, then you can install ripgrep from the
|
||||
[official ports](https://www.macports.org/ports.php?by=name&substr=ripgrep):
|
||||
|
||||
@@ -207,13 +217,15 @@ If you're a **MacPorts** user, then you can install ripgrep from the
|
||||
$ sudo port install ripgrep
|
||||
```
|
||||
|
||||
If you're a **Windows Chocolatey** user, then you can install ripgrep from the [official repo](https://chocolatey.org/packages/ripgrep):
|
||||
If you're a **Windows Chocolatey** user, then you can install ripgrep from the
|
||||
[official repo](https://chocolatey.org/packages/ripgrep):
|
||||
|
||||
```
|
||||
$ choco install ripgrep
|
||||
```
|
||||
|
||||
If you're a **Windows Scoop** user, then you can install ripgrep from the [official bucket](https://github.com/lukesampson/scoop/blob/master/bucket/ripgrep.json):
|
||||
If you're a **Windows Scoop** user, then you can install ripgrep from the
|
||||
[official bucket](https://github.com/ScoopInstaller/Main/blob/master/bucket/ripgrep.json):
|
||||
|
||||
```
|
||||
$ scoop install ripgrep
|
||||
@@ -225,32 +237,38 @@ If you're an **Arch Linux** user, then you can install ripgrep from the official
|
||||
$ pacman -S ripgrep
|
||||
```
|
||||
|
||||
If you're a **Gentoo** user, you can install ripgrep from the [official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep):
|
||||
If you're a **Gentoo** user, you can install ripgrep from the
|
||||
[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep):
|
||||
|
||||
```
|
||||
$ emerge sys-apps/ripgrep
|
||||
```
|
||||
|
||||
If you're a **Fedora 27+** user, you can install ripgrep from official repositories.
|
||||
If you're a **Fedora** user, you can install ripgrep from official
|
||||
repositories.
|
||||
|
||||
```
|
||||
$ sudo dnf install ripgrep
|
||||
```
|
||||
|
||||
If you're a **Fedora 24+** user, you can install ripgrep from [copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
|
||||
If you're an **openSUSE Leap 15.0** user, you can install ripgrep from the
|
||||
[utilities repo](https://build.opensuse.org/package/show/utilities/ripgrep):
|
||||
|
||||
```
|
||||
$ sudo dnf copr enable carlwgeorge/ripgrep
|
||||
$ sudo dnf install ripgrep
|
||||
$ sudo zypper ar https://download.opensuse.org/repositories/utilities/openSUSE_Leap_15.0/utilities.repo
|
||||
$ sudo zypper install ripgrep
|
||||
```
|
||||
|
||||
If you're an **openSUSE Tumbleweed** user, you can install ripgrep from the [official repo](http://software.opensuse.org/package/ripgrep):
|
||||
|
||||
If you're an **openSUSE Tumbleweed** user, you can install ripgrep from the
|
||||
[official repo](http://software.opensuse.org/package/ripgrep):
|
||||
|
||||
```
|
||||
$ sudo zypper install ripgrep
|
||||
```
|
||||
|
||||
If you're a **RHEL/CentOS 7** user, you can install ripgrep from [copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
|
||||
If you're a **RHEL/CentOS 7** user, you can install ripgrep from
|
||||
[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
|
||||
|
||||
```
|
||||
$ sudo yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo
|
||||
@@ -267,12 +285,25 @@ $ # (Or using the attribute name, which is also ripgrep.)
|
||||
|
||||
If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**),
|
||||
then ripgrep can be installed using a binary `.deb` file provided in each
|
||||
[ripgrep release](https://github.com/BurntSushi/ripgrep/releases). Note that
|
||||
ripgrep is not in the official Debian or Ubuntu repositories.
|
||||
[ripgrep release](https://github.com/BurntSushi/ripgrep/releases).
|
||||
|
||||
```
|
||||
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/0.8.1/ripgrep_0.8.1_amd64.deb
|
||||
$ sudo dpkg -i ripgrep_0.8.1_amd64.deb
|
||||
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/11.0.2/ripgrep_11.0.2_amd64.deb
|
||||
$ sudo dpkg -i ripgrep_11.0.2_amd64.deb
|
||||
```
|
||||
|
||||
If you run Debian Buster (currently Debian stable) or Debian sid, ripgrep is
|
||||
[officially maintained by Debian](https://tracker.debian.org/pkg/rust-ripgrep).
|
||||
```
|
||||
$ sudo apt-get install ripgrep
|
||||
```
|
||||
|
||||
If you're an **Ubuntu Cosmic (18.10)** (or newer) user, ripgrep is
|
||||
[available](https://launchpad.net/ubuntu/+source/rust-ripgrep) using the same
|
||||
packaging as Debian:
|
||||
|
||||
```
|
||||
$ sudo apt-get install ripgrep
|
||||
```
|
||||
|
||||
(N.B. Various snaps for ripgrep on Ubuntu are also available, but none of them
|
||||
@@ -280,26 +311,30 @@ seem to work right and generate a number of very strange bug reports that I
|
||||
don't know how to fix and don't have the time to fix. Therefore, it is no
|
||||
longer a recommended installation option.)
|
||||
|
||||
If you're a **FreeBSD** user, then you can install ripgrep from the [official ports](https://www.freshports.org/textproc/ripgrep/):
|
||||
If you're a **FreeBSD** user, then you can install ripgrep from the
|
||||
[official ports](https://www.freshports.org/textproc/ripgrep/):
|
||||
|
||||
```
|
||||
# pkg install ripgrep
|
||||
```
|
||||
|
||||
If you're an **OpenBSD** user, then you can install ripgrep from the [official ports](http://openports.se/textproc/ripgrep):
|
||||
If you're an **OpenBSD** user, then you can install ripgrep from the
|
||||
[official ports](http://openports.se/textproc/ripgrep):
|
||||
|
||||
```
|
||||
$ doas pkg_add ripgrep
|
||||
```
|
||||
|
||||
If you're a **NetBSD** user, then you can install ripgrep from [pkgsrc](http://pkgsrc.se/textproc/ripgrep):
|
||||
If you're a **NetBSD** user, then you can install ripgrep from
|
||||
[pkgsrc](http://pkgsrc.se/textproc/ripgrep):
|
||||
|
||||
```
|
||||
# pkgin install ripgrep
|
||||
```
|
||||
|
||||
If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
|
||||
* Note that the minimum supported version of Rust for ripgrep is **1.23.0**,
|
||||
|
||||
* Note that the minimum supported version of Rust for ripgrep is **1.34.0**,
|
||||
although ripgrep may work with older versions.
|
||||
* Note that the binary may be bigger than expected because it contains debug
|
||||
symbols. This is intentional. To remove debug symbols and therefore reduce
|
||||
@@ -309,18 +344,15 @@ If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
|
||||
$ cargo install ripgrep
|
||||
```
|
||||
|
||||
When compiling with Rust 1.27 or newer, this will automatically enable SIMD
|
||||
optimizations for search.
|
||||
|
||||
ripgrep isn't currently in any other package repositories.
|
||||
[I'd like to change that](https://github.com/BurntSushi/ripgrep/issues/10).
|
||||
|
||||
|
||||
### Building
|
||||
|
||||
ripgrep is written in Rust, so you'll need to grab a
|
||||
[Rust installation](https://www.rust-lang.org/) in order to compile it.
|
||||
ripgrep compiles with Rust 1.23.0 (stable) or newer. Building is easy:
|
||||
ripgrep compiles with Rust 1.34.0 (stable) or newer. In general, ripgrep tracks
|
||||
the latest stable release of the Rust compiler.
|
||||
|
||||
To build ripgrep:
|
||||
|
||||
```
|
||||
$ git clone https://github.com/BurntSushi/ripgrep
|
||||
@@ -334,18 +366,47 @@ If you have a Rust nightly compiler and a recent Intel CPU, then you can enable
|
||||
additional optional SIMD acceleration like so:
|
||||
|
||||
```
|
||||
RUSTFLAGS="-C target-cpu=native" cargo build --release --features 'simd-accel avx-accel'
|
||||
RUSTFLAGS="-C target-cpu=native" cargo build --release --features 'simd-accel'
|
||||
```
|
||||
|
||||
If your machine doesn't support AVX instructions, then simply remove
|
||||
`avx-accel` from the features list. Similarly for SIMD (which corresponds
|
||||
roughly to SSE instructions).
|
||||
The `simd-accel` feature enables SIMD support in certain ripgrep dependencies
|
||||
(responsible for transcoding). They are not necessary to get SIMD optimizations
|
||||
for search; those are enabled automatically. Hopefully, some day, the
|
||||
`simd-accel` feature will similarly become unnecessary. **WARNING:** Currently,
|
||||
enabling this option can increase compilation times dramatically.
|
||||
|
||||
The `simd-accel` and `avx-accel` features enable SIMD support in certain
|
||||
ripgrep dependencies (responsible for counting lines and transcoding). They
|
||||
are not necessary to get SIMD optimizations for search; those are enabled
|
||||
automatically. Hopefully, some day, the `simd-accel` and `avx-accel` features
|
||||
will similarly become unnecessary.
|
||||
Finally, optional PCRE2 support can be built with ripgrep by enabling the
|
||||
`pcre2` feature:
|
||||
|
||||
```
|
||||
$ cargo build --release --features 'pcre2'
|
||||
```
|
||||
|
||||
(Tip: use `--features 'pcre2 simd-accel'` to also include compile time SIMD
|
||||
optimizations, which will only work with a nightly compiler.)
|
||||
|
||||
Enabling the PCRE2 feature works with a stable Rust compiler and will
|
||||
attempt to automatically find and link with your system's PCRE2 library via
|
||||
`pkg-config`. If one doesn't exist, then ripgrep will build PCRE2 from source
|
||||
using your system's C compiler and then statically link it into the final
|
||||
executable. Static linking can be forced even when there is an available PCRE2
|
||||
system library by either building ripgrep with the MUSL target or by setting
|
||||
`PCRE2_SYS_STATIC=1`.
|
||||
|
||||
ripgrep can be built with the MUSL target on Linux by first installing the MUSL
|
||||
library on your system (consult your friendly neighborhood package manager).
|
||||
Then you just need to add MUSL support to your Rust toolchain and rebuild
|
||||
ripgrep, which yields a fully static executable:
|
||||
|
||||
```
|
||||
$ rustup target add x86_64-unknown-linux-musl
|
||||
$ cargo build --release --target x86_64-unknown-linux-musl
|
||||
```
|
||||
|
||||
Applying the `--features` flag from above works as expected. If you want to
|
||||
build a static executable with MUSL and with PCRE2, then you will need to have
|
||||
`musl-gcc` installed, which might be in a separate package from the actual
|
||||
MUSL library, depending on your Linux distribution.
|
||||
|
||||
|
||||
### Running tests
|
||||
@@ -358,3 +419,11 @@ $ cargo test --all
|
||||
```
|
||||
|
||||
from the repository root.
|
||||
|
||||
|
||||
### Translations
|
||||
|
||||
The following is a list of known translations of ripgrep's documentation. These
|
||||
are unofficially maintained and may not be up to date.
|
||||
|
||||
* [Chinese](https://github.com/chinanf-boy/ripgrep-zh#%E6%9B%B4%E6%96%B0-)
|
||||
|
40
appveyor.yml
40
appveyor.yml
@@ -1,8 +1,6 @@
|
||||
# Inspired from https://github.com/habitat-sh/habitat/blob/master/appveyor.yml
|
||||
cache:
|
||||
- c:\cargo\registry
|
||||
- c:\cargo\git
|
||||
- c:\projects\ripgrep\target
|
||||
|
||||
init:
|
||||
- mkdir c:\cargo
|
||||
@@ -19,14 +17,20 @@ environment:
|
||||
PROJECT_NAME: ripgrep
|
||||
RUST_BACKTRACE: full
|
||||
matrix:
|
||||
- TARGET: i686-pc-windows-gnu
|
||||
CHANNEL: stable
|
||||
- TARGET: i686-pc-windows-msvc
|
||||
CHANNEL: stable
|
||||
- TARGET: x86_64-pc-windows-gnu
|
||||
CHANNEL: stable
|
||||
BITS: 64
|
||||
MSYS2: 1
|
||||
- TARGET: x86_64-pc-windows-msvc
|
||||
CHANNEL: stable
|
||||
BITS: 64
|
||||
- TARGET: i686-pc-windows-gnu
|
||||
CHANNEL: stable
|
||||
BITS: 32
|
||||
MSYS2: 1
|
||||
- TARGET: i686-pc-windows-msvc
|
||||
CHANNEL: stable
|
||||
BITS: 32
|
||||
|
||||
matrix:
|
||||
fast_finish: true
|
||||
@@ -35,27 +39,27 @@ matrix:
|
||||
# (Based on from https://github.com/rust-lang/libc/blob/master/appveyor.yml)
|
||||
install:
|
||||
- curl -sSf -o rustup-init.exe https://win.rustup.rs/
|
||||
- rustup-init.exe -y --default-host %TARGET% --no-modify-path
|
||||
- if defined MSYS2_BITS set PATH=%PATH%;C:\msys64\mingw%MSYS2_BITS%\bin
|
||||
- rustup-init.exe -y --default-host %TARGET%
|
||||
- set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
|
||||
- if defined MSYS2 set PATH=C:\msys64\mingw%BITS%\bin;%PATH%
|
||||
- rustc -V
|
||||
- cargo -V
|
||||
|
||||
# ???
|
||||
# Hack to work around a harmless warning in Appveyor builds?
|
||||
build: false
|
||||
|
||||
# Equivalent to Travis' `script` phase
|
||||
# TODO modify this phase as you see fit
|
||||
test_script:
|
||||
- cargo test --verbose --all
|
||||
- cargo test --verbose --all --features pcre2
|
||||
|
||||
before_deploy:
|
||||
# Generate artifacts for release
|
||||
- cargo build --release
|
||||
- cargo build --release --features pcre2
|
||||
- mkdir staging
|
||||
- copy target\release\rg.exe staging
|
||||
- ps: copy target\release\build\ripgrep-*\out\_rg.ps1 staging
|
||||
- cd staging
|
||||
# release zipfile will look like 'rust-everywhere-v1.2.3-x86_64-pc-windows-msvc'
|
||||
# release zipfile will look like 'ripgrep-1.2.3-x86_64-pc-windows-msvc'
|
||||
- 7z a ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip *
|
||||
- appveyor PushArtifact ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip
|
||||
|
||||
@@ -68,18 +72,10 @@ deploy:
|
||||
provider: GitHub
|
||||
# deploy when a new tag is pushed and only on the stable channel
|
||||
on:
|
||||
# channel to use to produce the release artifacts
|
||||
# NOTE make sure you only release *once* per target
|
||||
# TODO you may want to pick a different channel
|
||||
CHANNEL: stable
|
||||
appveyor_repo_tag: true
|
||||
|
||||
branches:
|
||||
only:
|
||||
- /\d+\.\d+\.\d+/
|
||||
- /^\d+\.\d+\.\d+$/
|
||||
- master
|
||||
- ag/libripgrep
|
||||
# - appveyor
|
||||
# - /\d+\.\d+\.\d+/
|
||||
# except:
|
||||
# - master
|
||||
|
12
build.rs
12
build.rs
@@ -1,8 +1,3 @@
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
use std::env;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{self, Read, Write};
|
||||
@@ -168,7 +163,12 @@ fn formatted_arg(arg: &RGArg) -> io::Result<String> {
|
||||
}
|
||||
|
||||
fn formatted_doc_txt(arg: &RGArg) -> io::Result<String> {
|
||||
let paragraphs: Vec<&str> = arg.doc_long.split("\n\n").collect();
|
||||
let paragraphs: Vec<String> = arg.doc_long
|
||||
.replace("{", "{")
|
||||
.replace("}", r"}")
|
||||
.split("\n\n")
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
if paragraphs.is_empty() {
|
||||
return Err(ioerr(format!("missing docs for --{}", arg.name)));
|
||||
}
|
||||
|
@@ -8,7 +8,14 @@ set -ex
|
||||
|
||||
# Generate artifacts for release
|
||||
mk_artifacts() {
|
||||
cargo build --target "$TARGET" --release
|
||||
CARGO="$(builder)"
|
||||
if is_arm; then
|
||||
"$CARGO" build --target "$TARGET" --release
|
||||
else
|
||||
# Technically, MUSL builds will force PCRE2 to get statically compiled,
|
||||
# but we also want PCRE2 statically build for macOS binaries.
|
||||
PCRE2_SYS_STATIC=1 "$CARGO" build --target "$TARGET" --release --features 'pcre2'
|
||||
fi
|
||||
}
|
||||
|
||||
mk_tarball() {
|
||||
|
43
ci/build_deb.sh
Executable file
43
ci/build_deb.sh
Executable file
@@ -0,0 +1,43 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# This script builds a binary dpkg for Debian based distros. It does not
|
||||
# currently run in CI, and is instead run manually and the resulting dpkg is
|
||||
# uploaded to GitHub via the web UI.
|
||||
#
|
||||
# Note that this requires 'cargo deb', which can be installed with
|
||||
# 'cargo install cargo-deb'.
|
||||
#
|
||||
# This should be run from the root of the ripgrep repo.
|
||||
|
||||
if ! command -V cargo-deb > /dev/null 2>&1; then
|
||||
echo "cargo-deb command missing" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 'cargo deb' does not seem to provide a way to specify an asset that is
|
||||
# created at build time, such as ripgrep's man page. To work around this,
|
||||
# we force a debug build, copy out the man page (and shell completions)
|
||||
# produced from that build, put it into a predictable location and then build
|
||||
# the deb, which knows where to look.
|
||||
|
||||
DEPLOY_DIR=deployment/deb
|
||||
mkdir -p "$DEPLOY_DIR"
|
||||
cargo build
|
||||
|
||||
# Find and copy man page.
|
||||
manpage="$(find ./target/debug -name rg.1 -print0 | xargs -0 ls -t | head -n1)"
|
||||
cp "$manpage" "$DEPLOY_DIR/"
|
||||
|
||||
# Do the same for shell completions.
|
||||
compbash="$(find ./target/debug -name rg.bash -print0 | xargs -0 ls -t | head -n1)"
|
||||
cp "$compbash" "$DEPLOY_DIR/"
|
||||
compfish="$(find ./target/debug -name rg.fish -print0 | xargs -0 ls -t | head -n1)"
|
||||
cp "$compfish" "$DEPLOY_DIR/"
|
||||
compzsh="complete/_rg"
|
||||
cp "$compzsh" "$DEPLOY_DIR/"
|
||||
|
||||
# Since we're distributing the dpkg, we don't know whether the user will have
|
||||
# PCRE2 installed, so just do a static build.
|
||||
PCRE2_SYS_STATIC=1 cargo deb
|
5
ci/docker/x86_64-unknown-linux-musl/Dockerfile
Normal file
5
ci/docker/x86_64-unknown-linux-musl/Dockerfile
Normal file
@@ -0,0 +1,5 @@
|
||||
FROM japaric/x86_64-unknown-linux-musl:v0.1.14
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
libxslt1-dev asciidoc docbook-xsl xsltproc libxml2-utils
|
10
ci/script.sh
10
ci/script.sh
@@ -7,8 +7,14 @@ set -ex
|
||||
. "$(dirname $0)/utils.sh"
|
||||
|
||||
main() {
|
||||
CARGO="$(builder)"
|
||||
|
||||
# Test a normal debug build.
|
||||
cargo build --target "$TARGET" --verbose --all
|
||||
if is_arm; then
|
||||
"$CARGO" build --target "$TARGET" --verbose
|
||||
else
|
||||
"$CARGO" build --target "$TARGET" --verbose --all --features 'pcre2'
|
||||
fi
|
||||
|
||||
# Show the output of the most recent build.rs stderr.
|
||||
set +x
|
||||
@@ -40,7 +46,7 @@ main() {
|
||||
"$(dirname "${0}")/test_complete.sh"
|
||||
|
||||
# Run tests for ripgrep and all sub-crates.
|
||||
cargo test --target "$TARGET" --verbose --all
|
||||
"$CARGO" test --target "$TARGET" --verbose --all --features 'pcre2'
|
||||
}
|
||||
|
||||
main
|
||||
|
@@ -39,12 +39,14 @@ main() {
|
||||
print -rl - 'Comparing options:' "-$rg" "+$_rg"
|
||||
|
||||
# 'Parse' options out of the `--help` output. To prevent false positives we
|
||||
# only look at lines where the first non-white-space character is `-`
|
||||
# only look at lines where the first non-white-space character is `-`, or
|
||||
# where a long option starting with certain letters (see `_rg`) is found.
|
||||
# Occasionally we may have to handle some manually, however
|
||||
help_args=( ${(f)"$(
|
||||
$rg --help |
|
||||
$rg -- '^\s*-' |
|
||||
$rg -io -- '[\t ,](-[a-z0-9]|--[a-z0-9-]+)\b' |
|
||||
tr -d '\t ,' |
|
||||
$rg -i -- '^\s+--?[a-z0-9]|--[imnp]' |
|
||||
$rg -ior '$1' -- $'[\t /\"\'`.,](-[a-z0-9]|--[a-z0-9-]+)\\b' |
|
||||
$rg -v -- --print0 | # False positives
|
||||
sort -u
|
||||
)"} )
|
||||
|
||||
@@ -58,8 +60,6 @@ main() {
|
||||
comp_args=( ${comp_args%%-[:[]*} ) # Strip everything after -optname-
|
||||
comp_args=( ${comp_args%%[:+=[]*} ) # Strip everything after other optspecs
|
||||
comp_args=( ${comp_args##[^-]*} ) # Remove non-options
|
||||
|
||||
# This probably isn't necessary, but we should ensure the same order
|
||||
comp_args=( ${(f)"$( print -rl - $comp_args | sort -u )"} )
|
||||
|
||||
(( $#help_args )) || {
|
||||
|
24
ci/utils.sh
24
ci/utils.sh
@@ -55,10 +55,10 @@ gcc_prefix() {
|
||||
esac
|
||||
}
|
||||
|
||||
is_ssse3_target() {
|
||||
case "$(architecture)" in
|
||||
amd64) return 0 ;;
|
||||
*) return 1 ;;
|
||||
is_musl() {
|
||||
case "$TARGET" in
|
||||
*-musl) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
@@ -69,6 +69,13 @@ is_x86() {
|
||||
esac
|
||||
}
|
||||
|
||||
is_x86_64() {
|
||||
case "$(architecture)" in
|
||||
amd64) return 0 ;;
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
is_arm() {
|
||||
case "$(architecture)" in
|
||||
armhf) return 0 ;;
|
||||
@@ -89,3 +96,12 @@ is_osx() {
|
||||
*) return 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
builder() {
|
||||
if is_musl && is_x86_64; then
|
||||
cargo install cross
|
||||
echo "cross"
|
||||
else
|
||||
echo "cargo"
|
||||
fi
|
||||
}
|
||||
|
281
complete/_rg
281
complete/_rg
@@ -6,8 +6,8 @@
|
||||
# Run ci/test_complete.sh after building to ensure that the options supported by
|
||||
# this function stay in synch with the `rg` binary.
|
||||
#
|
||||
# @see http://zsh.sourceforge.net/Doc/Release/Completion-System.html
|
||||
# @see https://github.com/zsh-users/zsh/blob/master/Etc/completion-style-guide
|
||||
# For convenience, a completion reference guide is included at the bottom of
|
||||
# this file.
|
||||
#
|
||||
# Originally based on code from the zsh-users project — see copyright notice
|
||||
# below.
|
||||
@@ -26,8 +26,10 @@ _rg() {
|
||||
# style set. Note that this prefix check has to be updated manually to account
|
||||
# for all of the potential negation options listed below!
|
||||
if
|
||||
# (--[imn]* => --ignore*, --messages, --no-*)
|
||||
[[ $PREFIX$SUFFIX == --[imn]* ]] ||
|
||||
# We also want to list all of these options during testing
|
||||
[[ $_RG_COMPLETE_LIST_ARGS == (1|t*|y*) ]] ||
|
||||
# (--[imnp]* => --ignore*, --messages, --no-*, --pcre2-unicode)
|
||||
[[ $PREFIX$SUFFIX == --[imnp]* ]] ||
|
||||
zstyle -t ":complete:$curcontext:*" complete-all
|
||||
then
|
||||
no=
|
||||
@@ -41,6 +43,13 @@ _rg() {
|
||||
+ '(exclusive)' # Misc. fully exclusive options
|
||||
'(: * -)'{-h,--help}'[display help information]'
|
||||
'(: * -)'{-V,--version}'[display version information]'
|
||||
'(: * -)'--pcre2-version'[print the version of PCRE2 used by ripgrep, if available]'
|
||||
|
||||
+ '(buffered)' # buffering options
|
||||
'--line-buffered[force line buffering]'
|
||||
$no"--no-line-buffered[don't force line buffering]"
|
||||
'--block-buffered[force block buffering]'
|
||||
$no"--no-block-buffered[don't force block buffering]"
|
||||
|
||||
+ '(case)' # Case-sensitivity options
|
||||
{-i,--ignore-case}'[search case-insensitively]'
|
||||
@@ -61,11 +70,15 @@ _rg() {
|
||||
$no"--no-column[don't show column numbers for matches]"
|
||||
|
||||
+ '(count)' # Counting options
|
||||
'(passthru)'{-c,--count}'[only show count of matching lines for each file]'
|
||||
'(passthru)--count-matches[only show count of individual matches for each file]'
|
||||
{-c,--count}'[only show count of matching lines for each file]'
|
||||
'--count-matches[only show count of individual matches for each file]'
|
||||
|
||||
+ '(encoding)' # Encoding options
|
||||
{-E+,--encoding=}'[specify text encoding of files to search]: :_rg_encodings'
|
||||
$no'--no-encoding[use default text encoding]'
|
||||
|
||||
+ file # File-input options
|
||||
'*'{-f+,--file=}'[specify file containing patterns to search for]: :_files'
|
||||
'(1)*'{-f+,--file=}'[specify file containing patterns to search for]: :_files'
|
||||
|
||||
+ '(file-match)' # Files with/without match options
|
||||
'(stats)'{-l,--files-with-matches}'[only show names of files with matches]'
|
||||
@@ -73,7 +86,11 @@ _rg() {
|
||||
|
||||
+ '(file-name)' # File-name options
|
||||
{-H,--with-filename}'[show file name for matches]'
|
||||
"--no-filename[don't show file name for matches]"
|
||||
{-I,--no-filename}"[don't show file name for matches]"
|
||||
|
||||
+ '(file-system)' # File system options
|
||||
"--one-file-system[don't descend into directories on other file systems]"
|
||||
$no'--no-one-file-system[descend into directories on other file systems]'
|
||||
|
||||
+ '(fixed)' # Fixed-string options
|
||||
{-F,--fixed-strings}'[treat pattern as literal string instead of regular expression]'
|
||||
@@ -87,6 +104,10 @@ _rg() {
|
||||
'*'{-g+,--glob=}'[include/exclude files matching specified glob]:glob'
|
||||
'*--iglob=[include/exclude files matching specified case-insensitive glob]:glob'
|
||||
|
||||
+ '(glob-case-insensitive)' # File-glob case sensitivity options
|
||||
'--glob-case-insensitive[treat -g/--glob patterns case insensitively]'
|
||||
$no'--no-glob-case-insensitive[treat -g/--glob patterns case sensitively]'
|
||||
|
||||
+ '(heading)' # Heading options
|
||||
'(pretty-vimgrep)--heading[show matches grouped by file name]'
|
||||
"(pretty-vimgrep)--no-heading[don't show matches grouped by file name]"
|
||||
@@ -95,9 +116,17 @@ _rg() {
|
||||
'--hidden[search hidden files and directories]'
|
||||
$no"--no-hidden[don't search hidden files and directories]"
|
||||
|
||||
+ '(hybrid)' # hybrid regex options
|
||||
'--auto-hybrid-regex[dynamically use PCRE2 if necessary]'
|
||||
$no"--no-auto-hybrid-regex[don't dynamically use PCRE2 if necessary]"
|
||||
|
||||
+ '(ignore)' # Ignore-file options
|
||||
"(--no-ignore-global --no-ignore-parent --no-ignore-vcs)--no-ignore[don't respect ignore files]"
|
||||
$no'(--ignore-global --ignore-parent --ignore-vcs)--ignore[respect ignore files]'
|
||||
"(--no-ignore-global --no-ignore-parent --no-ignore-vcs --no-ignore-dot)--no-ignore[don't respect ignore files]"
|
||||
$no'(--ignore-global --ignore-parent --ignore-vcs --ignore-dot)--ignore[respect ignore files]'
|
||||
|
||||
+ '(ignore-file-case-insensitive)' # Ignore-file case sensitivity options
|
||||
'--ignore-file-case-insensitive[process ignore files case insensitively]'
|
||||
$no'--no-ignore-file-case-insensitive[process ignore files case sensitively]'
|
||||
|
||||
+ '(ignore-global)' # Global ignore-file options
|
||||
"--no-ignore-global[don't respect global ignore files]"
|
||||
@@ -111,10 +140,27 @@ _rg() {
|
||||
"--no-ignore-vcs[don't respect version control ignore files]"
|
||||
$no'--ignore-vcs[respect version control ignore files]'
|
||||
|
||||
+ '(line)' # Line-number options
|
||||
+ '(ignore-dot)' # .ignore-file options
|
||||
"--no-ignore-dot[don't respect .ignore files]"
|
||||
$no'--ignore-dot[respect .ignore files]'
|
||||
|
||||
+ '(json)' # JSON options
|
||||
'--json[output results in JSON Lines format]'
|
||||
$no"--no-json[don't output results in JSON Lines format]"
|
||||
|
||||
+ '(line-number)' # Line-number options
|
||||
{-n,--line-number}'[show line numbers for matches]'
|
||||
{-N,--no-line-number}"[don't show line numbers for matches]"
|
||||
|
||||
+ '(line-terminator)' # Line-terminator options
|
||||
'--crlf[use CRLF as line terminator]'
|
||||
$no"--no-crlf[don't use CRLF as line terminator]"
|
||||
'(text)--null-data[use NUL as line terminator]'
|
||||
|
||||
+ '(max-columns-preview)' # max column preview options
|
||||
'--max-columns-preview[show preview for long lines (with -M)]'
|
||||
$no"--no-max-columns-preview[don't show preview for long lines (with -M)]"
|
||||
|
||||
+ '(max-depth)' # Directory-depth options
|
||||
'--max-depth=[specify max number of directories to descend]:number of directories'
|
||||
'!--maxdepth=:number of directories'
|
||||
@@ -131,21 +177,36 @@ _rg() {
|
||||
'--mmap[search using memory maps when possible]'
|
||||
"--no-mmap[don't search using memory maps]"
|
||||
|
||||
+ '(multiline)' # multiline options
|
||||
'--multiline[permit matching across multiple lines]'
|
||||
$no"--no-multiline[restrict matches to at most one line each]"
|
||||
+ '(multiline)' # Multiline options
|
||||
{-U,--multiline}'[permit matching across multiple lines]'
|
||||
$no'(multiline-dotall)--no-multiline[restrict matches to at most one line each]'
|
||||
|
||||
+ '(multiline-dotall)' # Multiline DOTALL options
|
||||
'(--no-multiline)--multiline-dotall[allow "." to match newline (with -U)]'
|
||||
$no"(--no-multiline)--no-multiline-dotall[don't allow \".\" to match newline (with -U)]"
|
||||
|
||||
+ '(only)' # Only-match options
|
||||
'(passthru replace)'{-o,--only-matching}'[show only matching part of each line]'
|
||||
{-o,--only-matching}'[show only matching part of each line]'
|
||||
|
||||
+ '(passthru)' # Pass-through options
|
||||
'(--vimgrep count only replace)--passthru[show both matching and non-matching lines]'
|
||||
'!(--vimgrep count only replace)--passthrough'
|
||||
'(--vimgrep)--passthru[show both matching and non-matching lines]'
|
||||
'!(--vimgrep)--passthrough'
|
||||
|
||||
+ '(pcre2)' # PCRE2 options
|
||||
{-P,--pcre2}'[enable matching with PCRE2]'
|
||||
$no'(pcre2-unicode)--no-pcre2[disable matching with PCRE2]'
|
||||
|
||||
+ '(pcre2-unicode)' # PCRE2 Unicode options
|
||||
$no'(--no-pcre2 --no-pcre2-unicode)--pcre2-unicode[enable PCRE2 Unicode mode (with -P)]'
|
||||
'(--no-pcre2 --pcre2-unicode)--no-pcre2-unicode[disable PCRE2 Unicode mode (with -P)]'
|
||||
|
||||
+ '(pre)' # Preprocessing options
|
||||
'(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e'
|
||||
$no'--no-pre[disable preprocessor utility]'
|
||||
|
||||
+ pre-glob # Preprocessing glob options
|
||||
'*--pre-glob[include/exclude files for preprocessing with --pre]'
|
||||
|
||||
+ '(pretty-vimgrep)' # Pretty/vimgrep display options
|
||||
'(heading)'{-p,--pretty}'[alias for --color=always --heading -n]'
|
||||
'(heading passthru)--vimgrep[show results in vim-compatible format]'
|
||||
@@ -154,21 +215,41 @@ _rg() {
|
||||
'(1 file)*'{-e+,--regexp=}'[specify pattern]:pattern'
|
||||
|
||||
+ '(replace)' # Replacement options
|
||||
'(count only passthru)'{-r+,--replace=}'[specify string used to replace matches]:replace string'
|
||||
{-r+,--replace=}'[specify string used to replace matches]:replace string'
|
||||
|
||||
+ '(sort)' # File-sorting options
|
||||
'(threads)--sort-files[sort results by file path (disables parallelism)]'
|
||||
$no"--no-sort-files[don't sort results by file path]"
|
||||
'(threads)--sort=[sort results in ascending order (disables parallelism)]:sort method:((
|
||||
none\:"no sorting"
|
||||
path\:"sort by file path"
|
||||
modified\:"sort by last modified time"
|
||||
accessed\:"sort by last accessed time"
|
||||
created\:"sort by creation time"
|
||||
))'
|
||||
'(threads)--sortr=[sort results in descending order (disables parallelism)]:sort method:((
|
||||
none\:"no sorting"
|
||||
path\:"sort by file path"
|
||||
modified\:"sort by last modified time"
|
||||
accessed\:"sort by last accessed time"
|
||||
created\:"sort by creation time"
|
||||
))'
|
||||
'!(threads)--sort-files[sort results by file path (disables parallelism)]'
|
||||
|
||||
+ stats # Statistics options
|
||||
+ '(stats)' # Statistics options
|
||||
'(--files file-match)--stats[show search statistics]'
|
||||
$no"--no-stats[don't show search statistics]"
|
||||
|
||||
+ '(text)' # Binary-search options
|
||||
{-a,--text}'[search binary files as if they were text]'
|
||||
$no"--no-text[don't search binary files as if they were text]"
|
||||
"--binary[search binary files, don't print binary data]"
|
||||
$no"--no-binary[don't search binary files]"
|
||||
$no"(--null-data)--no-text[don't search binary files as if they were text]"
|
||||
|
||||
+ '(threads)' # Thread-count options
|
||||
'(--sort-files)'{-j+,--threads=}'[specify approximate number of threads to use]:number of threads'
|
||||
'(sort)'{-j+,--threads=}'[specify approximate number of threads to use]:number of threads'
|
||||
|
||||
+ '(trim)' # Trim options
|
||||
'--trim[trim any ASCII whitespace prefix from each line]'
|
||||
$no"--no-trim[don't trim ASCII whitespace prefix from each line]"
|
||||
|
||||
+ type # Type options
|
||||
'*'{-t+,--type=}'[only search files matching specified type]: :_rg_types'
|
||||
@@ -198,7 +279,6 @@ _rg() {
|
||||
'--context-separator=[specify string used to separate non-continuous context lines in output]:separator'
|
||||
'--debug[show debug messages]'
|
||||
'--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size (bytes)'
|
||||
'(-E --encoding)'{-E+,--encoding=}'[specify text encoding of files to search]: :_rg_encodings'
|
||||
"(1 stats)--files[show each file that would be searched (but don't search)]"
|
||||
'*--ignore-file=[specify additional ignore file]:ignore file:_files'
|
||||
'(-v --invert-match)'{-v,--invert-match}'[invert matching]'
|
||||
@@ -313,7 +393,7 @@ _rg_encodings() {
|
||||
shift{-,_}jis csshiftjis {,x-}sjis ms_kanji ms932
|
||||
utf{,-}8 utf-16{,be,le} unicode-1-1-utf-8
|
||||
windows-{31j,874,949,125{0..8}} dos-874 tis-620 ansi_x3.4-1968
|
||||
x-user-defined auto
|
||||
x-user-defined auto none
|
||||
)
|
||||
|
||||
_wanted encodings expl encoding compadd -a "$@" - _encodings
|
||||
@@ -331,6 +411,157 @@ _rg_types() {
|
||||
|
||||
_rg "$@"
|
||||
|
||||
################################################################################
|
||||
# ZSH COMPLETION REFERENCE
|
||||
#
|
||||
# For the convenience of developers who aren't especially familiar with zsh
|
||||
# completion functions, a brief reference guide follows. This is in no way
|
||||
# comprehensive; it covers just enough of the basic structure, syntax, and
|
||||
# conventions to help someone make simple changes like adding new options. For
|
||||
# more complete documentation regarding zsh completion functions, please see the
|
||||
# following:
|
||||
#
|
||||
# * http://zsh.sourceforge.net/Doc/Release/Completion-System.html
|
||||
# * https://github.com/zsh-users/zsh/blob/master/Etc/completion-style-guide
|
||||
#
|
||||
# OVERVIEW
|
||||
#
|
||||
# Most zsh completion functions are defined in terms of `_arguments`, which is a
|
||||
# shell function that takes a series of argument specifications. The specs for
|
||||
# `rg` are stored in an array, which is common for more complex functions; the
|
||||
# elements of the array are passed to `_arguments` on invocation.
|
||||
#
|
||||
# ARGUMENT-SPECIFICATION SYNTAX
|
||||
#
|
||||
# The following is a contrived example of the argument specs for a simple tool:
|
||||
#
|
||||
# '(: * -)'{-h,--help}'[display help information]'
|
||||
# '(-q -v --quiet --verbose)'{-q,--quiet}'[decrease output verbosity]'
|
||||
# '!(-q -v --quiet --verbose)--silent'
|
||||
# '(-q -v --quiet --verbose)'{-v,--verbose}'[increase output verbosity]'
|
||||
# '--color=[specify when to use colors]:when:(always never auto)'
|
||||
# '*:example file:_files'
|
||||
#
|
||||
# Although there may appear to be six specs here, there are actually nine; we
|
||||
# use brace expansion to combine specs for options that go by multiple names,
|
||||
# like `-q` and `--quiet`. This is customary, and ties in with the fact that zsh
|
||||
# merges completion possibilities together when they have the same description.
|
||||
#
|
||||
# The first line defines the option `-h`/`--help`. With most tools, it isn't
|
||||
# useful to complete anything after `--help` because it effectively overrides
|
||||
# all others; the `(: * -)` at the beginning of the spec tells zsh not to
|
||||
# complete any other operands (`:` and `*`) or options (`-`) after this one has
|
||||
# been used. The `[...]` at the end associates a description with `-h`/`--help`;
|
||||
# as mentioned, zsh will see the identical descriptions and merge these options
|
||||
# together when offering completion possibilities.
|
||||
#
|
||||
# The next line defines `-q`/`--quiet`. Here we don't want to suppress further
|
||||
# completions entirely, but we don't want to offer `-q` if `--quiet` has been
|
||||
# given (since they do the same thing), nor do we want to offer `-v` (since it
|
||||
# doesn't make sense to be quiet and verbose at the same time). We don't need to
|
||||
# tell zsh not to offer `--quiet` a second time, since that's the default
|
||||
# behaviour, but since this line expands to two specs describing `-q` *and*
|
||||
# `--quiet` we do need to explicitly list all of them here.
|
||||
#
|
||||
# The next line defines a hidden option `--silent` — maybe it's a deprecated
|
||||
# synonym for `--quiet`. The leading `!` indicates that zsh shouldn't offer this
|
||||
# option during completion. The benefit of providing a spec for an option that
|
||||
# shouldn't be completed is that, if someone *does* use it, we can correctly
|
||||
# suppress completion of other options afterwards.
|
||||
#
|
||||
# The next line defines `-v`/`--verbose`; this works just like `-q`/`--quiet`.
|
||||
#
|
||||
# The next line defines `--color`. In this example, `--color` doesn't have a
|
||||
# corresponding short option, so we don't need to use brace expansion. Further,
|
||||
# there are no other options it's exclusive with (just itself), so we don't need
|
||||
# to define those at the beginning. However, it does take a mandatory argument.
|
||||
# The `=` at the end of `--color=` indicates that the argument may appear either
|
||||
# like `--color always` or like `--color=always`; this is how most GNU-style
|
||||
# command-line tools work. The corresponding short option would normally use `+`
|
||||
# — for example, `-c+` would allow either `-c always` or `-calways`. For this
|
||||
# option, the arguments are known ahead of time, so we can simply list them in
|
||||
# parentheses at the end (`when` is used as the description for the argument).
|
||||
#
|
||||
# The last line defines an operand (a non-option argument). In this example, the
|
||||
# operand can be used any number of times (the leading `*`), and it should be a
|
||||
# file path, so we tell zsh to call the `_files` function to complete it. The
|
||||
# `example file` in the middle is the description to use for this operand; we
|
||||
# could use a space instead to accept the default provided by `_files`.
|
||||
#
|
||||
# GROUPING ARGUMENT SPECIFICATIONS
|
||||
#
|
||||
# Newer versions of zsh support grouping argument specs together. All specs
|
||||
# following a `+` and then a group name are considered to be members of the
|
||||
# named group. Grouping is useful mostly for organisational purposes; it makes
|
||||
# the relationship between different options more obvious, and makes it easier
|
||||
# to specify exclusions.
|
||||
#
|
||||
# We could rewrite our example above using grouping as follows:
|
||||
#
|
||||
# '(: * -)'{-h,--help}'[display help information]'
|
||||
# '--color=[specify when to use colors]:when:(always never auto)'
|
||||
# '*:example file:_files'
|
||||
# + '(verbosity)'
|
||||
# {-q,--quiet}'[decrease output verbosity]'
|
||||
# '!--silent'
|
||||
# {-v,--verbose}'[increase output verbosity]'
|
||||
#
|
||||
# Here we take advantage of a useful feature of spec grouping — when the group
|
||||
# name is surrounded by parentheses, as in `(verbosity)`, it tells zsh that all
|
||||
# of the options in that group are exclusive with each other. As a result, we
|
||||
# don't need to manually list out the exclusions at the beginning of each
|
||||
# option.
|
||||
#
|
||||
# Groups can also be referred to by name in other argument specs; for example:
|
||||
#
|
||||
# '(xyz)--aaa' '*: :_files'
|
||||
# + xyz --xxx --yyy --zzz
|
||||
#
|
||||
# Here we use the group name `xyz` to tell zsh that `--xxx`, `--yyy`, and
|
||||
# `--zzz` are not to be completed after `--aaa`. This makes the exclusion list
|
||||
# much more compact and reusable.
|
||||
#
|
||||
# CONVENTIONS
|
||||
#
|
||||
# zsh completion functions generally adhere to the following conventions:
|
||||
#
|
||||
# * Use two spaces for indentation
|
||||
# * Combine specs for options with different names using brace expansion
|
||||
# * In combined specs, list the short option first (as in `{-a,--text}`)
|
||||
# * Use `+` or `=` as described above for options that take arguments
|
||||
# * Provide a description for all options, option-arguments, and operands
|
||||
# * Capitalise/punctuate argument descriptions as phrases, not complete
|
||||
# sentences — 'display help information', never 'Display help information.'
|
||||
# (but still capitalise acronyms and proper names)
|
||||
# * Write argument descriptions as verb phrases — 'display x', 'enable y',
|
||||
# 'use z'
|
||||
# * Word descriptions to make it clear when an option expects an argument;
|
||||
# usually this is done with the word 'specify', as in 'specify x' or
|
||||
# 'use specified x')
|
||||
# * Write argument descriptions as tersely as possible — for example, articles
|
||||
# like 'a' and 'the' should be omitted unless it would be confusing
|
||||
#
|
||||
# Other conventions currently used by this function:
|
||||
#
|
||||
# * Order argument specs alphabetically by group name, then option name
|
||||
# * Group options that are directly related, mutually exclusive, or frequently
|
||||
# referenced by other argument specs
|
||||
# * Use only characters in the set [a-z0-9_-] in group names
|
||||
# * Order exclusion lists as follows: short options, long options, groups
|
||||
# * Use American English in descriptions
|
||||
# * Use 'don't' in descriptions instead of 'do not'
|
||||
# * Word descriptions for related options as similarly as possible. For example,
|
||||
# `--foo[enable foo]` and `--no-foo[disable foo]`, or `--foo[use foo]` and
|
||||
# `--no-foo[don't use foo]`
|
||||
# * Word descriptions to make it clear when an option only makes sense with
|
||||
# another option, usually by adding '(with -x)' to the end
|
||||
# * Don't quote strings or variables unnecessarily. When quotes are required,
|
||||
# prefer single-quotes to double-quotes
|
||||
# * Prefix option specs with `$no` when the option serves only to negate the
|
||||
# behaviour of another option that must be provided explicitly by the user.
|
||||
# This prevents rarely used options from cluttering up the completion menu
|
||||
################################################################################
|
||||
|
||||
# ------------------------------------------------------------------------------
|
||||
# Copyright (c) 2011 Github zsh-users - http://github.com/zsh-users
|
||||
# All rights reserved.
|
||||
|
101
doc/rg.1.txt.tpl
101
doc/rg.1.txt.tpl
@@ -28,27 +28,40 @@ Synopsis
|
||||
DESCRIPTION
|
||||
-----------
|
||||
ripgrep (rg) recursively searches your current directory for a regex pattern.
|
||||
By default, ripgrep will respect your `.gitignore` and automatically skip
|
||||
hidden files/directories and binary files.
|
||||
By default, ripgrep will respect your .gitignore and automatically skip hidden
|
||||
files/directories and binary files.
|
||||
|
||||
ripgrep's regex engine uses finite automata and guarantees linear time
|
||||
searching. Because of this, features like backreferences and arbitrary
|
||||
lookaround are not supported.
|
||||
ripgrep's default regex engine uses finite automata and guarantees linear
|
||||
time searching. Because of this, features like backreferences and arbitrary
|
||||
look-around are not supported. However, if ripgrep is built with PCRE2, then
|
||||
the *--pcre2* flag can be used to enable backreferences and look-around.
|
||||
|
||||
ripgrep supports configuration files. Set *RIPGREP_CONFIG_PATH* to a
|
||||
configuration file. The file can specify one shell argument per line. Lines
|
||||
starting with *#* are ignored. For more details, see the man page or the
|
||||
*README*.
|
||||
|
||||
Tip: to disable all smart filtering and make ripgrep behave a bit more like
|
||||
classical grep, use *rg -uuu*.
|
||||
|
||||
|
||||
REGEX SYNTAX
|
||||
------------
|
||||
ripgrep uses Rust's regex engine, which documents its syntax:
|
||||
https://docs.rs/regex/0.2.5/regex/#syntax
|
||||
ripgrep uses Rust's regex engine by default, which documents its syntax:
|
||||
https://docs.rs/regex/*/regex/#syntax
|
||||
|
||||
ripgrep uses byte-oriented regexes, which has some additional documentation:
|
||||
https://docs.rs/regex/0.2.5/regex/bytes/index.html#syntax
|
||||
https://docs.rs/regex/*/regex/bytes/index.html#syntax
|
||||
|
||||
To a first approximation, ripgrep uses Perl-like regexes without look-around or
|
||||
backreferences. This makes them very similar to the "extended" (ERE) regular
|
||||
expressions supported by `egrep`, but with a few additional features like
|
||||
expressions supported by *egrep*, but with a few additional features like
|
||||
Unicode character classes.
|
||||
|
||||
If you're using ripgrep with the *--pcre2* flag, then please consult
|
||||
https://www.pcre.org or the PCRE2 man pages for documentation on the supported
|
||||
syntax.
|
||||
|
||||
|
||||
POSITIONAL ARGUMENTS
|
||||
--------------------
|
||||
@@ -58,18 +71,37 @@ _PATTERN_::
|
||||
|
||||
_PATH_::
|
||||
A file or directory to search. Directories are searched recursively. Paths
|
||||
specified expicitly on the command line override glob and ignore rules.
|
||||
specified explicitly on the command line override glob and ignore rules.
|
||||
|
||||
|
||||
OPTIONS
|
||||
-------
|
||||
Note that for many options, there exist flags to disable them. In some cases,
|
||||
those flags are not listed in a first class way below. For example, the
|
||||
*--column* flag (listed below) enables column numbers in ripgrep's output, but
|
||||
the *--no-column* flag (not listed below) disables them. The reverse can also
|
||||
exist. For example, the *--no-ignore* flag (listed below) disables ripgrep's
|
||||
*gitignore* logic, but the *--ignore* flag (not listed below) enables it. These
|
||||
flags are useful for overriding a ripgrep configuration file on the command
|
||||
line. Each flag's documentation notes whether an inverted flag exists. In all
|
||||
cases, the flag specified last takes precedence.
|
||||
|
||||
{OPTIONS}
|
||||
|
||||
|
||||
EXIT STATUS
|
||||
-----------
|
||||
If ripgrep finds a match, then the exit status of the program is 0. If no match
|
||||
could be found, then the exit status is non-zero.
|
||||
could be found, then the exit status is 1. If an error occurred, then the exit
|
||||
status is always 2 unless ripgrep was run with the *--quiet* flag and a match
|
||||
was found. In summary:
|
||||
|
||||
* `0` exit status occurs only when at least one match was found, and if
|
||||
no error occurred, unless *--quiet* was given.
|
||||
* `1` exit status occurs only when no match was found and no error occurred.
|
||||
* `2` exit status occurs when an error occurred. This is true for both
|
||||
catastrophic errors (e.g., a regex syntax error) and for soft errors (e.g.,
|
||||
unable to read a file).
|
||||
|
||||
|
||||
CONFIGURATION FILES
|
||||
@@ -78,12 +110,12 @@ ripgrep supports reading configuration files that change ripgrep's default
|
||||
behavior. The format of the configuration file is an "rc" style and is very
|
||||
simple. It is defined by two rules:
|
||||
|
||||
1. Every line is a shell argument, after trimming ASCII whitespace.
|
||||
2. Lines starting with _#_ (optionally preceded by any amount of
|
||||
ASCII whitespace) are ignored.
|
||||
1. Every line is a shell argument, after trimming whitespace.
|
||||
2. Lines starting with *#* (optionally preceded by any amount of
|
||||
whitespace) are ignored.
|
||||
|
||||
ripgrep will look for a single configuration file if and only if the
|
||||
_RIPGREP_CONFIG_PATH_ environment variable is set and is non-empty.
|
||||
*RIPGREP_CONFIG_PATH* environment variable is set and is non-empty.
|
||||
ripgrep will parse shell arguments from this file on startup and will
|
||||
behave as if the arguments in this file were prepended to any explicit
|
||||
arguments given to ripgrep on the command line.
|
||||
@@ -111,16 +143,16 @@ would behave identically to the following command
|
||||
|
||||
same with using globs
|
||||
|
||||
--glob=!git/*
|
||||
--glob=!.git
|
||||
|
||||
or
|
||||
|
||||
--glob
|
||||
!git/*
|
||||
!.git
|
||||
|
||||
would behave identically to the following command
|
||||
|
||||
rg --glob '!git/*' foo
|
||||
rg --glob '!.git' foo
|
||||
|
||||
ripgrep also provides a flag, *--no-config*, that when present will suppress
|
||||
any and all support for configuration. This includes any future support
|
||||
@@ -145,20 +177,35 @@ SHELL COMPLETION
|
||||
Shell completion files are included in the release tarball for Bash, Fish, Zsh
|
||||
and PowerShell.
|
||||
|
||||
For *bash*, move `rg.bash` to `$XDG_CONFIG_HOME/bash_completion`
|
||||
or `/etc/bash_completion.d/`.
|
||||
For *bash*, move *rg.bash* to *$XDG_CONFIG_HOME/bash_completion*
|
||||
or */etc/bash_completion.d/*.
|
||||
|
||||
For *fish*, move `rg.fish` to `$HOME/.config/fish/completions`.
|
||||
For *fish*, move *rg.fish* to *$HOME/.config/fish/completions*.
|
||||
|
||||
For *zsh*, move `_rg` to one of your `$fpath` directories.
|
||||
For *zsh*, move *_rg* to one of your *$fpath* directories.
|
||||
|
||||
|
||||
CAVEATS
|
||||
-------
|
||||
ripgrep may abort unexpectedly when using default settings if it searches a
|
||||
file that is simultaneously truncated. This behavior can be avoided by passing
|
||||
the --no-mmap flag which will forcefully disable the use of memory maps in all
|
||||
cases.
|
||||
the *--no-mmap* flag which will forcefully disable the use of memory maps in
|
||||
all cases.
|
||||
|
||||
ripgrep may use a large amount of memory depending on a few factors. Firstly,
|
||||
if ripgrep uses parallelism for search (the default), then the entire output
|
||||
for each individual file is buffered into memory in order to prevent
|
||||
interleaving matches in the output. To avoid this, you can disable parallelism
|
||||
with the *-j1* flag. Secondly, ripgrep always needs to have at least a single
|
||||
line in memory in order to execute a search. A file with a very long line can
|
||||
thus cause ripgrep to use a lot of memory. Generally, this only occurs when
|
||||
searching binary data with the *-a* flag enabled. (When the *-a* flag isn't
|
||||
enabled, ripgrep will replace all NUL bytes with line terminators, which
|
||||
typically prevents exorbitant memory usage.) Thirdly, when ripgrep searches
|
||||
a large file using a memory map, the process will report its resident memory
|
||||
usage as the size of the file. However, this does not mean ripgrep actually
|
||||
needed to use that much memory; the operating system will generally handle this
|
||||
for you.
|
||||
|
||||
|
||||
VERSION
|
||||
@@ -170,7 +217,11 @@ HOMEPAGE
|
||||
--------
|
||||
https://github.com/BurntSushi/ripgrep
|
||||
|
||||
Please report bugs and feature requests in the issue tracker.
|
||||
Please report bugs and feature requests in the issue tracker. Please do your
|
||||
best to provide a reproducible test case for bugs. This should include the
|
||||
corpus being searched, the *rg* command, the actual output and the expected
|
||||
output. Please also include the output of running the same *rg* command but
|
||||
with the *--debug* flag.
|
||||
|
||||
|
||||
AUTHORS
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "globset"
|
||||
version = "0.4.1" #:version
|
||||
version = "0.4.4" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Cross platform single glob and glob set matching. Glob set matching is the
|
||||
@@ -19,14 +19,14 @@ name = "globset"
|
||||
bench = false
|
||||
|
||||
[dependencies]
|
||||
aho-corasick = "0.6.0"
|
||||
fnv = "1.0"
|
||||
log = "0.4"
|
||||
memchr = "2"
|
||||
regex = "1"
|
||||
aho-corasick = "0.7.3"
|
||||
bstr = { version = "0.2.0", default-features = false, features = ["std"] }
|
||||
fnv = "1.0.6"
|
||||
log = "0.4.5"
|
||||
regex = "1.1.5"
|
||||
|
||||
[dev-dependencies]
|
||||
glob = "0.2"
|
||||
glob = "0.3.0"
|
||||
|
||||
[features]
|
||||
simd-accel = []
|
||||
|
@@ -4,7 +4,7 @@ Cross platform single glob and glob set matching. Glob set matching is the
|
||||
process of matching one or more glob patterns against a single candidate path
|
||||
simultaneously, and returning all of the globs that matched.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/globset)
|
||||
|
||||
|
@@ -837,40 +837,66 @@ impl<'a> Parser<'a> {
|
||||
|
||||
fn parse_star(&mut self) -> Result<(), Error> {
|
||||
let prev = self.prev;
|
||||
if self.chars.peek() != Some(&'*') {
|
||||
if self.peek() != Some('*') {
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
return Ok(());
|
||||
}
|
||||
assert!(self.bump() == Some('*'));
|
||||
if !self.have_tokens()? {
|
||||
self.push_token(Token::RecursivePrefix)?;
|
||||
let next = self.bump();
|
||||
if !next.map(is_separator).unwrap_or(true) {
|
||||
return Err(self.error(ErrorKind::InvalidRecursive));
|
||||
if !self.peek().map_or(true, is_separator) {
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
} else {
|
||||
self.push_token(Token::RecursivePrefix)?;
|
||||
assert!(self.bump().map_or(true, is_separator));
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
self.pop_token()?;
|
||||
|
||||
if !prev.map(is_separator).unwrap_or(false) {
|
||||
if self.stack.len() <= 1
|
||||
|| (prev != Some(',') && prev != Some('{')) {
|
||||
return Err(self.error(ErrorKind::InvalidRecursive));
|
||||
|| (prev != Some(',') && prev != Some('{'))
|
||||
{
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
match self.chars.peek() {
|
||||
None => {
|
||||
assert!(self.bump().is_none());
|
||||
self.push_token(Token::RecursiveSuffix)
|
||||
let is_suffix =
|
||||
match self.peek() {
|
||||
None => {
|
||||
assert!(self.bump().is_none());
|
||||
true
|
||||
}
|
||||
Some(',') | Some('}') if self.stack.len() >= 2 => {
|
||||
true
|
||||
}
|
||||
Some(c) if is_separator(c) => {
|
||||
assert!(self.bump().map(is_separator).unwrap_or(false));
|
||||
false
|
||||
}
|
||||
_ => {
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
match self.pop_token()? {
|
||||
Token::RecursivePrefix => {
|
||||
self.push_token(Token::RecursivePrefix)?;
|
||||
}
|
||||
Some(&',') | Some(&'}') if self.stack.len() >= 2 => {
|
||||
self.push_token(Token::RecursiveSuffix)
|
||||
Token::RecursiveSuffix => {
|
||||
self.push_token(Token::RecursiveSuffix)?;
|
||||
}
|
||||
Some(&c) if is_separator(c) => {
|
||||
assert!(self.bump().map(is_separator).unwrap_or(false));
|
||||
self.push_token(Token::RecursiveZeroOrMore)
|
||||
_ => {
|
||||
if is_suffix {
|
||||
self.push_token(Token::RecursiveSuffix)?;
|
||||
} else {
|
||||
self.push_token(Token::RecursiveZeroOrMore)?;
|
||||
}
|
||||
}
|
||||
_ => Err(self.error(ErrorKind::InvalidRecursive)),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_class(&mut self) -> Result<(), Error> {
|
||||
@@ -959,6 +985,10 @@ impl<'a> Parser<'a> {
|
||||
self.cur = self.chars.next();
|
||||
self.cur
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<char> {
|
||||
self.chars.peek().map(|&ch| ch)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -1144,13 +1174,6 @@ mod tests {
|
||||
syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
|
||||
syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
|
||||
|
||||
syntaxerr!(err_rseq1, "a**", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq2, "**a", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq3, "a**b", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq4, "***", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq5, "/a**", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq6, "/**a", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq7, "/a**b", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
|
||||
syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
|
||||
syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
|
||||
@@ -1194,8 +1217,30 @@ mod tests {
|
||||
toregex!(re8, "[*]", r"^[\*]$");
|
||||
toregex!(re9, "[+]", r"^[\+]$");
|
||||
toregex!(re10, "+", r"^\+$");
|
||||
toregex!(re11, "**", r"^.*$");
|
||||
toregex!(re12, "☃", r"^\xe2\x98\x83$");
|
||||
toregex!(re11, "☃", r"^\xe2\x98\x83$");
|
||||
toregex!(re12, "**", r"^.*$");
|
||||
toregex!(re13, "**/", r"^.*$");
|
||||
toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
|
||||
toregex!(re15, "**/**", r"^.*$");
|
||||
toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
|
||||
toregex!(re17, "**/**/**", r"^.*$");
|
||||
toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
|
||||
toregex!(re19, "a/**", r"^a(?:/?|/.*)$");
|
||||
toregex!(re20, "a/**/**", r"^a(?:/?|/.*)$");
|
||||
toregex!(re21, "a/**/**/**", r"^a(?:/?|/.*)$");
|
||||
toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
|
||||
toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
|
||||
toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
|
||||
toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
|
||||
toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
|
||||
toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
|
||||
toregex!(re28, "a**", r"^a.*.*$");
|
||||
toregex!(re29, "**a", r"^.*.*a$");
|
||||
toregex!(re30, "a**b", r"^a.*.*b$");
|
||||
toregex!(re31, "***", r"^.*.*.*$");
|
||||
toregex!(re32, "/a**", r"^/a.*.*$");
|
||||
toregex!(re33, "/**a", r"^/.*.*a$");
|
||||
toregex!(re34, "/a**b", r"^/a.*.*b$");
|
||||
|
||||
matches!(match1, "a", "a");
|
||||
matches!(match2, "a*b", "a_b");
|
||||
|
@@ -104,27 +104,25 @@ or to enable case insensitive matching.
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate aho_corasick;
|
||||
extern crate bstr;
|
||||
extern crate fnv;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate regex;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::error::Error as StdError;
|
||||
use std::ffi::OsStr;
|
||||
use std::fmt;
|
||||
use std::hash;
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
|
||||
use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
|
||||
use aho_corasick::AhoCorasick;
|
||||
use bstr::{B, ByteSlice, ByteVec};
|
||||
use regex::bytes::{Regex, RegexBuilder, RegexSet};
|
||||
|
||||
use pathutil::{
|
||||
file_name, file_name_ext, normalize_path, os_str_bytes, path_bytes,
|
||||
};
|
||||
use pathutil::{file_name, file_name_ext, normalize_path};
|
||||
use glob::MatchStrategy;
|
||||
pub use glob::{Glob, GlobBuilder, GlobMatcher};
|
||||
|
||||
@@ -143,8 +141,13 @@ pub struct Error {
|
||||
/// The kind of error that can occur when parsing a glob pattern.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum ErrorKind {
|
||||
/// Occurs when a use of `**` is invalid. Namely, `**` can only appear
|
||||
/// adjacent to a path separator, or the beginning/end of a glob.
|
||||
/// **DEPRECATED**.
|
||||
///
|
||||
/// This error used to occur for consistency with git's glob specification,
|
||||
/// but the specification now accepts all uses of `**`. When `**` does not
|
||||
/// appear adjacent to a path separator or at the beginning/end of a glob,
|
||||
/// it is now treated as two consecutive `*` patterns. As such, this error
|
||||
/// is no longer used.
|
||||
InvalidRecursive,
|
||||
/// Occurs when a character class (e.g., `[abc]`) is not closed.
|
||||
UnclosedClass,
|
||||
@@ -289,6 +292,7 @@ pub struct GlobSet {
|
||||
|
||||
impl GlobSet {
|
||||
/// Create an empty `GlobSet`. An empty set matches nothing.
|
||||
#[inline]
|
||||
pub fn empty() -> GlobSet {
|
||||
GlobSet {
|
||||
len: 0,
|
||||
@@ -297,11 +301,13 @@ impl GlobSet {
|
||||
}
|
||||
|
||||
/// Returns true if this set is empty, and therefore matches nothing.
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len == 0
|
||||
}
|
||||
|
||||
/// Returns the number of globs in this set.
|
||||
#[inline]
|
||||
pub fn len(&self) -> usize {
|
||||
self.len
|
||||
}
|
||||
@@ -470,7 +476,6 @@ impl GlobSetBuilder {
|
||||
}
|
||||
|
||||
/// Add a new pattern to this set.
|
||||
#[allow(dead_code)]
|
||||
pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
|
||||
self.pats.push(pat);
|
||||
self
|
||||
@@ -493,12 +498,13 @@ pub struct Candidate<'a> {
|
||||
impl<'a> Candidate<'a> {
|
||||
/// Create a new candidate for matching from the given path.
|
||||
pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
|
||||
let path = path.as_ref();
|
||||
let basename = file_name(path).unwrap_or(OsStr::new(""));
|
||||
let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
|
||||
let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
|
||||
let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
|
||||
Candidate {
|
||||
path: normalize_path(path_bytes(path)),
|
||||
basename: os_str_bytes(basename),
|
||||
ext: file_name_ext(basename).unwrap_or(Cow::Borrowed(b"")),
|
||||
path: path,
|
||||
basename: basename,
|
||||
ext: ext,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -571,12 +577,12 @@ impl LiteralStrategy {
|
||||
}
|
||||
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
self.0.contains_key(&*candidate.path)
|
||||
self.0.contains_key(candidate.path.as_bytes())
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
if let Some(hits) = self.0.get(&*candidate.path) {
|
||||
if let Some(hits) = self.0.get(candidate.path.as_bytes()) {
|
||||
matches.extend(hits);
|
||||
}
|
||||
}
|
||||
@@ -598,7 +604,7 @@ impl BasenameLiteralStrategy {
|
||||
if candidate.basename.is_empty() {
|
||||
return false;
|
||||
}
|
||||
self.0.contains_key(&*candidate.basename)
|
||||
self.0.contains_key(candidate.basename.as_bytes())
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
@@ -606,7 +612,7 @@ impl BasenameLiteralStrategy {
|
||||
if candidate.basename.is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Some(hits) = self.0.get(&*candidate.basename) {
|
||||
if let Some(hits) = self.0.get(candidate.basename.as_bytes()) {
|
||||
matches.extend(hits);
|
||||
}
|
||||
}
|
||||
@@ -628,7 +634,7 @@ impl ExtensionStrategy {
|
||||
if candidate.ext.is_empty() {
|
||||
return false;
|
||||
}
|
||||
self.0.contains_key(&*candidate.ext)
|
||||
self.0.contains_key(candidate.ext.as_bytes())
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
@@ -636,7 +642,7 @@ impl ExtensionStrategy {
|
||||
if candidate.ext.is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Some(hits) = self.0.get(&*candidate.ext) {
|
||||
if let Some(hits) = self.0.get(candidate.ext.as_bytes()) {
|
||||
matches.extend(hits);
|
||||
}
|
||||
}
|
||||
@@ -644,7 +650,7 @@ impl ExtensionStrategy {
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct PrefixStrategy {
|
||||
matcher: FullAcAutomaton<Vec<u8>>,
|
||||
matcher: AhoCorasick,
|
||||
map: Vec<usize>,
|
||||
longest: usize,
|
||||
}
|
||||
@@ -652,8 +658,8 @@ struct PrefixStrategy {
|
||||
impl PrefixStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
let path = candidate.path_prefix(self.longest);
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.start == 0 {
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.start() == 0 {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -662,9 +668,9 @@ impl PrefixStrategy {
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
let path = candidate.path_prefix(self.longest);
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.start == 0 {
|
||||
matches.push(self.map[m.pati]);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.start() == 0 {
|
||||
matches.push(self.map[m.pattern()]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -672,7 +678,7 @@ impl PrefixStrategy {
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct SuffixStrategy {
|
||||
matcher: FullAcAutomaton<Vec<u8>>,
|
||||
matcher: AhoCorasick,
|
||||
map: Vec<usize>,
|
||||
longest: usize,
|
||||
}
|
||||
@@ -680,8 +686,8 @@ struct SuffixStrategy {
|
||||
impl SuffixStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
let path = candidate.path_suffix(self.longest);
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.end == path.len() {
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.end() == path.len() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -690,9 +696,9 @@ impl SuffixStrategy {
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
let path = candidate.path_suffix(self.longest);
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.end == path.len() {
|
||||
matches.push(self.map[m.pati]);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.end() == path.len() {
|
||||
matches.push(self.map[m.pattern()]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -706,11 +712,11 @@ impl RequiredExtensionStrategy {
|
||||
if candidate.ext.is_empty() {
|
||||
return false;
|
||||
}
|
||||
match self.0.get(&*candidate.ext) {
|
||||
match self.0.get(candidate.ext.as_bytes()) {
|
||||
None => false,
|
||||
Some(regexes) => {
|
||||
for &(_, ref re) in regexes {
|
||||
if re.is_match(&*candidate.path) {
|
||||
if re.is_match(candidate.path.as_bytes()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -724,9 +730,9 @@ impl RequiredExtensionStrategy {
|
||||
if candidate.ext.is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Some(regexes) = self.0.get(&*candidate.ext) {
|
||||
if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) {
|
||||
for &(global_index, ref re) in regexes {
|
||||
if re.is_match(&*candidate.path) {
|
||||
if re.is_match(candidate.path.as_bytes()) {
|
||||
matches.push(global_index);
|
||||
}
|
||||
}
|
||||
@@ -742,11 +748,11 @@ struct RegexSetStrategy {
|
||||
|
||||
impl RegexSetStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
self.matcher.is_match(&*candidate.path)
|
||||
self.matcher.is_match(candidate.path.as_bytes())
|
||||
}
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
for i in self.matcher.matches(&*candidate.path) {
|
||||
for i in self.matcher.matches(candidate.path.as_bytes()) {
|
||||
matches.push(self.map[i]);
|
||||
}
|
||||
}
|
||||
@@ -777,18 +783,16 @@ impl MultiStrategyBuilder {
|
||||
}
|
||||
|
||||
fn prefix(self) -> PrefixStrategy {
|
||||
let it = self.literals.into_iter().map(|s| s.into_bytes());
|
||||
PrefixStrategy {
|
||||
matcher: AcAutomaton::new(it).into_full(),
|
||||
matcher: AhoCorasick::new_auto_configured(&self.literals),
|
||||
map: self.map,
|
||||
longest: self.longest,
|
||||
}
|
||||
}
|
||||
|
||||
fn suffix(self) -> SuffixStrategy {
|
||||
let it = self.literals.into_iter().map(|s| s.into_bytes());
|
||||
SuffixStrategy {
|
||||
matcher: AcAutomaton::new(it).into_full(),
|
||||
matcher: AhoCorasick::new_auto_configured(&self.literals),
|
||||
map: self.map,
|
||||
longest: self.longest,
|
||||
}
|
||||
|
@@ -1,41 +1,26 @@
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
use bstr::{ByteSlice, ByteVec};
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(unix)]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use memchr::memrchr;
|
||||
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
|
||||
if path.is_empty() {
|
||||
return None;
|
||||
} else if path.len() == 1 && path[0] == b'.' {
|
||||
return None;
|
||||
} else if path.last() == Some(&b'.') {
|
||||
return None;
|
||||
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
|
||||
} else if path.last_byte() == Some(b'.') {
|
||||
return None;
|
||||
}
|
||||
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
|
||||
Some(OsStr::from_bytes(&path[last_slash..]))
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(not(unix))]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
path.as_ref().file_name()
|
||||
let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0);
|
||||
Some(match *path {
|
||||
Cow::Borrowed(path) => Cow::Borrowed(&path[last_slash..]),
|
||||
Cow::Owned(ref path) => {
|
||||
let mut path = path.clone();
|
||||
path.drain_bytes(..last_slash);
|
||||
Cow::Owned(path)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Return a file extension given a path's file name.
|
||||
@@ -54,55 +39,24 @@ pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
/// a pattern like `*.rs` is obviously trying to match files with a `rs`
|
||||
/// extension, but it also matches files like `.rs`, which doesn't have an
|
||||
/// extension according to std::path::Path::extension.
|
||||
pub fn file_name_ext(name: &OsStr) -> Option<Cow<[u8]>> {
|
||||
pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
|
||||
if name.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let name = os_str_bytes(name);
|
||||
let last_dot_at = {
|
||||
let result = name
|
||||
.iter().enumerate().rev()
|
||||
.find(|&(_, &b)| b == b'.')
|
||||
.map(|(i, _)| i);
|
||||
match result {
|
||||
None => return None,
|
||||
Some(i) => i,
|
||||
}
|
||||
let last_dot_at = match name.rfind_byte(b'.') {
|
||||
None => return None,
|
||||
Some(i) => i,
|
||||
};
|
||||
Some(match name {
|
||||
Some(match *name {
|
||||
Cow::Borrowed(name) => Cow::Borrowed(&name[last_dot_at..]),
|
||||
Cow::Owned(mut name) => {
|
||||
name.drain(..last_dot_at);
|
||||
Cow::Owned(ref name) => {
|
||||
let mut name = name.clone();
|
||||
name.drain_bytes(..last_dot_at);
|
||||
Cow::Owned(name)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Return raw bytes of a path, transcoded to UTF-8 if necessary.
|
||||
pub fn path_bytes(path: &Path) -> Cow<[u8]> {
|
||||
os_str_bytes(path.as_os_str())
|
||||
}
|
||||
|
||||
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
|
||||
#[cfg(unix)]
|
||||
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
Cow::Borrowed(s.as_bytes())
|
||||
}
|
||||
|
||||
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
|
||||
#[cfg(not(unix))]
|
||||
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
|
||||
// TODO(burntsushi): On Windows, OS strings are WTF-8, which is a superset
|
||||
// of UTF-8, so even if we could get at the raw bytes, they wouldn't
|
||||
// be useful. We *must* convert to UTF-8 before doing path matching.
|
||||
// Unfortunate, but necessary.
|
||||
match s.to_string_lossy() {
|
||||
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
|
||||
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
|
||||
/// that recognize other characters as separators.
|
||||
#[cfg(unix)]
|
||||
@@ -129,7 +83,8 @@ pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::OsStr;
|
||||
|
||||
use bstr::{B, ByteVec};
|
||||
|
||||
use super::{file_name_ext, normalize_path};
|
||||
|
||||
@@ -137,8 +92,9 @@ mod tests {
|
||||
($name:ident, $file_name:expr, $ext:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let got = file_name_ext(OsStr::new($file_name));
|
||||
assert_eq!($ext.map(|s| Cow::Borrowed(s.as_bytes())), got);
|
||||
let bs = Vec::from($file_name);
|
||||
let got = file_name_ext(&Cow::Owned(bs));
|
||||
assert_eq!($ext.map(|s| Cow::Borrowed(B(s))), got);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -153,7 +109,8 @@ mod tests {
|
||||
($name:ident, $path:expr, $expected:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let got = normalize_path(Cow::Owned($path.to_vec()));
|
||||
let bs = Vec::from_slice($path);
|
||||
let got = normalize_path(Cow::Owned(bs));
|
||||
assert_eq!($expected.to_vec(), got.into_owned());
|
||||
}
|
||||
};
|
||||
|
26
grep-cli/Cargo.toml
Normal file
26
grep-cli/Cargo.toml
Normal file
@@ -0,0 +1,26 @@
|
||||
[package]
|
||||
name = "grep-cli"
|
||||
version = "0.1.3" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Utilities for search oriented command line applications.
|
||||
"""
|
||||
documentation = "https://docs.rs/grep-cli"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "cli", "utility", "util"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[dependencies]
|
||||
atty = "0.2.11"
|
||||
bstr = "0.2.0"
|
||||
globset = { version = "0.4.3", path = "../globset" }
|
||||
lazy_static = "1.1.0"
|
||||
log = "0.4.5"
|
||||
regex = "1.1"
|
||||
same-file = "1.0.4"
|
||||
termcolor = "1.0.4"
|
||||
|
||||
[target.'cfg(windows)'.dependencies.winapi-util]
|
||||
version = "0.1.1"
|
21
grep-cli/LICENSE-MIT
Normal file
21
grep-cli/LICENSE-MIT
Normal file
@@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Andrew Gallant
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
38
grep-cli/README.md
Normal file
38
grep-cli/README.md
Normal file
@@ -0,0 +1,38 @@
|
||||
grep-cli
|
||||
--------
|
||||
A utility library that provides common routines desired in search oriented
|
||||
command line applications. This includes, but is not limited to, parsing hex
|
||||
escapes, detecting whether stdin is readable and more. To the extent possible,
|
||||
this crate strives for compatibility across Windows, macOS and Linux.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/grep-cli)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/grep-cli](https://docs.rs/grep-cli)
|
||||
|
||||
**NOTE:** You probably don't want to use this crate directly. Instead, you
|
||||
should prefer the facade defined in the
|
||||
[`grep`](https://docs.rs/grep)
|
||||
crate.
|
||||
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
grep-cli = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_cli;
|
||||
```
|
24
grep-cli/UNLICENSE
Normal file
24
grep-cli/UNLICENSE
Normal file
@@ -0,0 +1,24 @@
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
382
grep-cli/src/decompress.rs
Normal file
382
grep-cli/src/decompress.rs
Normal file
@@ -0,0 +1,382 @@
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
|
||||
use globset::{Glob, GlobSet, GlobSetBuilder};
|
||||
|
||||
use process::{CommandError, CommandReader, CommandReaderBuilder};
|
||||
|
||||
/// A builder for a matcher that determines which files get decompressed.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DecompressionMatcherBuilder {
|
||||
/// The commands for each matching glob.
|
||||
commands: Vec<DecompressionCommand>,
|
||||
/// Whether to include the default matching rules.
|
||||
defaults: bool,
|
||||
}
|
||||
|
||||
/// A representation of a single command for decompressing data
|
||||
/// out-of-proccess.
|
||||
#[derive(Clone, Debug)]
|
||||
struct DecompressionCommand {
|
||||
/// The glob that matches this command.
|
||||
glob: String,
|
||||
/// The command or binary name.
|
||||
bin: OsString,
|
||||
/// The arguments to invoke with the command.
|
||||
args: Vec<OsString>,
|
||||
}
|
||||
|
||||
impl Default for DecompressionMatcherBuilder {
|
||||
fn default() -> DecompressionMatcherBuilder {
|
||||
DecompressionMatcherBuilder::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl DecompressionMatcherBuilder {
|
||||
/// Create a new builder for configuring a decompression matcher.
|
||||
pub fn new() -> DecompressionMatcherBuilder {
|
||||
DecompressionMatcherBuilder {
|
||||
commands: vec![],
|
||||
defaults: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a matcher for determining how to decompress files.
|
||||
///
|
||||
/// If there was a problem compiling the matcher, then an error is
|
||||
/// returned.
|
||||
pub fn build(&self) -> Result<DecompressionMatcher, CommandError> {
|
||||
let defaults =
|
||||
if !self.defaults {
|
||||
vec![]
|
||||
} else {
|
||||
default_decompression_commands()
|
||||
};
|
||||
let mut glob_builder = GlobSetBuilder::new();
|
||||
let mut commands = vec![];
|
||||
for decomp_cmd in defaults.iter().chain(&self.commands) {
|
||||
let glob = Glob::new(&decomp_cmd.glob).map_err(|err| {
|
||||
CommandError::io(io::Error::new(io::ErrorKind::Other, err))
|
||||
})?;
|
||||
glob_builder.add(glob);
|
||||
commands.push(decomp_cmd.clone());
|
||||
}
|
||||
let globs = glob_builder.build().map_err(|err| {
|
||||
CommandError::io(io::Error::new(io::ErrorKind::Other, err))
|
||||
})?;
|
||||
Ok(DecompressionMatcher { globs, commands })
|
||||
}
|
||||
|
||||
/// When enabled, the default matching rules will be compiled into this
|
||||
/// matcher before any other associations. When disabled, only the
|
||||
/// rules explicitly given to this builder will be used.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn defaults(&mut self, yes: bool) -> &mut DecompressionMatcherBuilder {
|
||||
self.defaults = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Associates a glob with a command to decompress files matching the glob.
|
||||
///
|
||||
/// If multiple globs match the same file, then the most recently added
|
||||
/// glob takes precedence.
|
||||
///
|
||||
/// The syntax for the glob is documented in the
|
||||
/// [`globset` crate](https://docs.rs/globset/#syntax).
|
||||
pub fn associate<P, I, A>(
|
||||
&mut self,
|
||||
glob: &str,
|
||||
program: P,
|
||||
args: I,
|
||||
) -> &mut DecompressionMatcherBuilder
|
||||
where P: AsRef<OsStr>,
|
||||
I: IntoIterator<Item=A>,
|
||||
A: AsRef<OsStr>,
|
||||
{
|
||||
|
||||
let glob = glob.to_string();
|
||||
let bin = program.as_ref().to_os_string();
|
||||
let args = args
|
||||
.into_iter()
|
||||
.map(|a| a.as_ref().to_os_string())
|
||||
.collect();
|
||||
self.commands.push(DecompressionCommand { glob, bin, args });
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A matcher for determining how to decompress files.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct DecompressionMatcher {
|
||||
/// The set of globs to match. Each glob has a corresponding entry in
|
||||
/// `commands`. When a glob matches, the corresponding command should be
|
||||
/// used to perform out-of-process decompression.
|
||||
globs: GlobSet,
|
||||
/// The commands for each matching glob.
|
||||
commands: Vec<DecompressionCommand>,
|
||||
}
|
||||
|
||||
impl Default for DecompressionMatcher {
|
||||
fn default() -> DecompressionMatcher {
|
||||
DecompressionMatcher::new()
|
||||
}
|
||||
}
|
||||
|
||||
impl DecompressionMatcher {
|
||||
/// Create a new matcher with default rules.
|
||||
///
|
||||
/// To add more matching rules, build a matcher with
|
||||
/// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html).
|
||||
pub fn new() -> DecompressionMatcher {
|
||||
DecompressionMatcherBuilder::new()
|
||||
.build()
|
||||
.expect("built-in matching rules should always compile")
|
||||
}
|
||||
|
||||
/// Return a pre-built command based on the given file path that can
|
||||
/// decompress its contents. If no such decompressor is known, then this
|
||||
/// returns `None`.
|
||||
///
|
||||
/// If there are multiple possible commands matching the given path, then
|
||||
/// the command added last takes precedence.
|
||||
pub fn command<P: AsRef<Path>>(&self, path: P) -> Option<Command> {
|
||||
for i in self.globs.matches(path).into_iter().rev() {
|
||||
let decomp_cmd = &self.commands[i];
|
||||
let mut cmd = Command::new(&decomp_cmd.bin);
|
||||
cmd.args(&decomp_cmd.args);
|
||||
return Some(cmd);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given file path has at least one
|
||||
/// matching command to perform decompression on.
|
||||
pub fn has_command<P: AsRef<Path>>(&self, path: P) -> bool {
|
||||
self.globs.is_match(path)
|
||||
}
|
||||
}
|
||||
|
||||
/// Configures and builds a streaming reader for decompressing data.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct DecompressionReaderBuilder {
|
||||
matcher: DecompressionMatcher,
|
||||
command_builder: CommandReaderBuilder,
|
||||
}
|
||||
|
||||
impl DecompressionReaderBuilder {
|
||||
/// Create a new builder with the default configuration.
|
||||
pub fn new() -> DecompressionReaderBuilder {
|
||||
DecompressionReaderBuilder::default()
|
||||
}
|
||||
|
||||
/// Build a new streaming reader for decompressing data.
|
||||
///
|
||||
/// If decompression is done out-of-process and if there was a problem
|
||||
/// spawning the process, then its error is logged at the debug level and a
|
||||
/// passthru reader is returned that does no decompression. This behavior
|
||||
/// typically occurs when the given file path matches a decompression
|
||||
/// command, but is executing in an environment where the decompression
|
||||
/// command is not available.
|
||||
///
|
||||
/// If the given file path could not be matched with a decompression
|
||||
/// strategy, then a passthru reader is returned that does no
|
||||
/// decompression.
|
||||
pub fn build<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
) -> Result<DecompressionReader, CommandError> {
|
||||
let path = path.as_ref();
|
||||
let mut cmd = match self.matcher.command(path) {
|
||||
None => return DecompressionReader::new_passthru(path),
|
||||
Some(cmd) => cmd,
|
||||
};
|
||||
cmd.arg(path);
|
||||
|
||||
match self.command_builder.build(&mut cmd) {
|
||||
Ok(cmd_reader) => Ok(DecompressionReader { rdr: Ok(cmd_reader) }),
|
||||
Err(err) => {
|
||||
debug!(
|
||||
"{}: error spawning command '{:?}': {} \
|
||||
(falling back to uncompressed reader)",
|
||||
path.display(),
|
||||
cmd,
|
||||
err,
|
||||
);
|
||||
DecompressionReader::new_passthru(path)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the matcher to use to look up the decompression command for each
|
||||
/// file path.
|
||||
///
|
||||
/// A set of sensible rules is enabled by default. Setting this will
|
||||
/// completely replace the current rules.
|
||||
pub fn matcher(
|
||||
&mut self,
|
||||
matcher: DecompressionMatcher,
|
||||
) -> &mut DecompressionReaderBuilder {
|
||||
self.matcher = matcher;
|
||||
self
|
||||
}
|
||||
|
||||
/// Get the underlying matcher currently used by this builder.
|
||||
pub fn get_matcher(&self) -> &DecompressionMatcher {
|
||||
&self.matcher
|
||||
}
|
||||
|
||||
/// When enabled, the reader will asynchronously read the contents of the
|
||||
/// command's stderr output. When disabled, stderr is only read after the
|
||||
/// stdout stream has been exhausted (or if the process quits with an error
|
||||
/// code).
|
||||
///
|
||||
/// Note that when enabled, this may require launching an additional
|
||||
/// thread in order to read stderr. This is done so that the process being
|
||||
/// executed is never blocked from writing to stdout or stderr. If this is
|
||||
/// disabled, then it is possible for the process to fill up the stderr
|
||||
/// buffer and deadlock.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn async_stderr(
|
||||
&mut self,
|
||||
yes: bool,
|
||||
) -> &mut DecompressionReaderBuilder {
|
||||
self.command_builder.async_stderr(yes);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A streaming reader for decompressing the contents of a file.
|
||||
///
|
||||
/// The purpose of this reader is to provide a seamless way to decompress the
|
||||
/// contents of file using existing tools in the current environment. This is
|
||||
/// meant to be an alternative to using decompression libraries in favor of the
|
||||
/// simplicity and portability of using external commands such as `gzip` and
|
||||
/// `xz`. This does impose the overhead of spawning a process, so other means
|
||||
/// for performing decompression should be sought if this overhead isn't
|
||||
/// acceptable.
|
||||
///
|
||||
/// A decompression reader comes with a default set of matching rules that are
|
||||
/// meant to associate file paths with the corresponding command to use to
|
||||
/// decompress them. For example, a glob like `*.gz` matches gzip compressed
|
||||
/// files with the command `gzip -d -c`. If a file path does not match any
|
||||
/// existing rules, or if it matches a rule whose command does not exist in the
|
||||
/// current environment, then the decompression reader passes through the
|
||||
/// contents of the underlying file without doing any decompression.
|
||||
///
|
||||
/// The default matching rules are probably good enough for most cases, and if
|
||||
/// they require revision, pull requests are welcome. In cases where they must
|
||||
/// be changed or extended, they can be customized through the use of
|
||||
/// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html)
|
||||
/// and
|
||||
/// [`DecompressionReaderBuilder`](struct.DecompressionReaderBuilder.html).
|
||||
///
|
||||
/// By default, this reader will asynchronously read the processes' stderr.
|
||||
/// This prevents subtle deadlocking bugs for noisy processes that write a lot
|
||||
/// to stderr. Currently, the entire contents of stderr is read on to the heap.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// This example shows how to read the decompressed contents of a file without
|
||||
/// needing to explicitly choose the decompression command to run.
|
||||
///
|
||||
/// Note that if you need to decompress multiple files, it is better to use
|
||||
/// `DecompressionReaderBuilder`, which will amortize the cost of compiling the
|
||||
/// matcher.
|
||||
///
|
||||
/// ```no_run
|
||||
/// use std::io::Read;
|
||||
/// use std::process::Command;
|
||||
/// use grep_cli::DecompressionReader;
|
||||
///
|
||||
/// # fn example() -> Result<(), Box<::std::error::Error>> {
|
||||
/// let mut rdr = DecompressionReader::new("/usr/share/man/man1/ls.1.gz")?;
|
||||
/// let mut contents = vec![];
|
||||
/// rdr.read_to_end(&mut contents)?;
|
||||
/// # Ok(()) }
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct DecompressionReader {
|
||||
rdr: Result<CommandReader, File>,
|
||||
}
|
||||
|
||||
impl DecompressionReader {
|
||||
/// Build a new streaming reader for decompressing data.
|
||||
///
|
||||
/// If decompression is done out-of-process and if there was a problem
|
||||
/// spawning the process, then its error is returned.
|
||||
///
|
||||
/// If the given file path could not be matched with a decompression
|
||||
/// strategy, then a passthru reader is returned that does no
|
||||
/// decompression.
|
||||
///
|
||||
/// This uses the default matching rules for determining how to decompress
|
||||
/// the given file. To change those matching rules, use
|
||||
/// [`DecompressionReaderBuilder`](struct.DecompressionReaderBuilder.html)
|
||||
/// and
|
||||
/// [`DecompressionMatcherBuilder`](struct.DecompressionMatcherBuilder.html).
|
||||
///
|
||||
/// When creating readers for many paths. it is better to use the builder
|
||||
/// since it will amortize the cost of constructing the matcher.
|
||||
pub fn new<P: AsRef<Path>>(
|
||||
path: P,
|
||||
) -> Result<DecompressionReader, CommandError> {
|
||||
DecompressionReaderBuilder::new().build(path)
|
||||
}
|
||||
|
||||
/// Creates a new "passthru" decompression reader that reads from the file
|
||||
/// corresponding to the given path without doing decompression and without
|
||||
/// executing another process.
|
||||
fn new_passthru(path: &Path) -> Result<DecompressionReader, CommandError> {
|
||||
let file = File::open(path)?;
|
||||
Ok(DecompressionReader { rdr: Err(file) })
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Read for DecompressionReader {
|
||||
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
match self.rdr {
|
||||
Ok(ref mut rdr) => rdr.read(buf),
|
||||
Err(ref mut rdr) => rdr.read(buf),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn default_decompression_commands() -> Vec<DecompressionCommand> {
|
||||
const ARGS_GZIP: &[&str] = &["gzip", "-d", "-c"];
|
||||
const ARGS_BZIP: &[&str] = &["bzip2", "-d", "-c"];
|
||||
const ARGS_XZ: &[&str] = &["xz", "-d", "-c"];
|
||||
const ARGS_LZ4: &[&str] = &["lz4", "-d", "-c"];
|
||||
const ARGS_LZMA: &[&str] = &["xz", "--format=lzma", "-d", "-c"];
|
||||
const ARGS_BROTLI: &[&str] = &["brotli", "-d", "-c"];
|
||||
const ARGS_ZSTD: &[&str] = &["zstd", "-q", "-d", "-c"];
|
||||
|
||||
fn cmd(glob: &str, args: &[&str]) -> DecompressionCommand {
|
||||
DecompressionCommand {
|
||||
glob: glob.to_string(),
|
||||
bin: OsStr::new(&args[0]).to_os_string(),
|
||||
args: args
|
||||
.iter()
|
||||
.skip(1)
|
||||
.map(|s| OsStr::new(s).to_os_string())
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
vec![
|
||||
cmd("*.gz", ARGS_GZIP),
|
||||
cmd("*.tgz", ARGS_GZIP),
|
||||
cmd("*.bz2", ARGS_BZIP),
|
||||
cmd("*.tbz2", ARGS_BZIP),
|
||||
cmd("*.xz", ARGS_XZ),
|
||||
cmd("*.txz", ARGS_XZ),
|
||||
cmd("*.lz4", ARGS_LZ4),
|
||||
cmd("*.lzma", ARGS_LZMA),
|
||||
cmd("*.br", ARGS_BROTLI),
|
||||
cmd("*.zst", ARGS_ZSTD),
|
||||
cmd("*.zstd", ARGS_ZSTD),
|
||||
]
|
||||
}
|
262
grep-cli/src/escape.rs
Normal file
262
grep-cli/src/escape.rs
Normal file
@@ -0,0 +1,262 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::str;
|
||||
|
||||
use bstr::{ByteSlice, ByteVec};
|
||||
|
||||
/// A single state in the state machine used by `unescape`.
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
enum State {
|
||||
/// The state after seeing a `\`.
|
||||
Escape,
|
||||
/// The state after seeing a `\x`.
|
||||
HexFirst,
|
||||
/// The state after seeing a `\x[0-9A-Fa-f]`.
|
||||
HexSecond(char),
|
||||
/// Default state.
|
||||
Literal,
|
||||
}
|
||||
|
||||
/// Escapes arbitrary bytes into a human readable string.
|
||||
///
|
||||
/// This converts `\t`, `\r` and `\n` into their escaped forms. It also
|
||||
/// converts the non-printable subset of ASCII in addition to invalid UTF-8
|
||||
/// bytes to hexadecimal escape sequences. Everything else is left as is.
|
||||
///
|
||||
/// The dual of this routine is [`unescape`](fn.unescape.html).
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// This example shows how to convert a byte string that contains a `\n` and
|
||||
/// invalid UTF-8 bytes into a `String`.
|
||||
///
|
||||
/// Pay special attention to the use of raw strings. That is, `r"\n"` is
|
||||
/// equivalent to `"\\n"`.
|
||||
///
|
||||
/// ```
|
||||
/// use grep_cli::escape;
|
||||
///
|
||||
/// assert_eq!(r"foo\nbar\xFFbaz", escape(b"foo\nbar\xFFbaz"));
|
||||
/// ```
|
||||
pub fn escape(bytes: &[u8]) -> String {
|
||||
let mut escaped = String::new();
|
||||
for (s, e, ch) in bytes.char_indices() {
|
||||
if ch == '\u{FFFD}' {
|
||||
for b in bytes[s..e].bytes() {
|
||||
escape_byte(b, &mut escaped);
|
||||
}
|
||||
} else {
|
||||
escape_char(ch, &mut escaped);
|
||||
}
|
||||
}
|
||||
escaped
|
||||
}
|
||||
|
||||
/// Escapes an OS string into a human readable string.
|
||||
///
|
||||
/// This is like [`escape`](fn.escape.html), but accepts an OS string.
|
||||
pub fn escape_os(string: &OsStr) -> String {
|
||||
escape(Vec::from_os_str_lossy(string).as_bytes())
|
||||
}
|
||||
|
||||
/// Unescapes a string.
|
||||
///
|
||||
/// It supports a limited set of escape sequences:
|
||||
///
|
||||
/// * `\t`, `\r` and `\n` are mapped to their corresponding ASCII bytes.
|
||||
/// * `\xZZ` hexadecimal escapes are mapped to their byte.
|
||||
///
|
||||
/// Everything else is left as is, including non-hexadecimal escapes like
|
||||
/// `\xGG`.
|
||||
///
|
||||
/// This is useful when it is desirable for a command line argument to be
|
||||
/// capable of specifying arbitrary bytes or otherwise make it easier to
|
||||
/// specify non-printable characters.
|
||||
///
|
||||
/// The dual of this routine is [`escape`](fn.escape.html).
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// This example shows how to convert an escaped string (which is valid UTF-8)
|
||||
/// into a corresponding sequence of bytes. Each escape sequence is mapped to
|
||||
/// its bytes, which may include invalid UTF-8.
|
||||
///
|
||||
/// Pay special attention to the use of raw strings. That is, `r"\n"` is
|
||||
/// equivalent to `"\\n"`.
|
||||
///
|
||||
/// ```
|
||||
/// use grep_cli::unescape;
|
||||
///
|
||||
/// assert_eq!(&b"foo\nbar\xFFbaz"[..], &*unescape(r"foo\nbar\xFFbaz"));
|
||||
/// ```
|
||||
pub fn unescape(s: &str) -> Vec<u8> {
|
||||
use self::State::*;
|
||||
|
||||
let mut bytes = vec![];
|
||||
let mut state = Literal;
|
||||
for c in s.chars() {
|
||||
match state {
|
||||
Escape => {
|
||||
match c {
|
||||
'\\' => { bytes.push(b'\\'); state = Literal; }
|
||||
'n' => { bytes.push(b'\n'); state = Literal; }
|
||||
'r' => { bytes.push(b'\r'); state = Literal; }
|
||||
't' => { bytes.push(b'\t'); state = Literal; }
|
||||
'x' => { state = HexFirst; }
|
||||
c => {
|
||||
bytes.extend(format!(r"\{}", c).into_bytes());
|
||||
state = Literal;
|
||||
}
|
||||
}
|
||||
}
|
||||
HexFirst => {
|
||||
match c {
|
||||
'0'..='9' | 'A'..='F' | 'a'..='f' => {
|
||||
state = HexSecond(c);
|
||||
}
|
||||
c => {
|
||||
bytes.extend(format!(r"\x{}", c).into_bytes());
|
||||
state = Literal;
|
||||
}
|
||||
}
|
||||
}
|
||||
HexSecond(first) => {
|
||||
match c {
|
||||
'0'..='9' | 'A'..='F' | 'a'..='f' => {
|
||||
let ordinal = format!("{}{}", first, c);
|
||||
let byte = u8::from_str_radix(&ordinal, 16).unwrap();
|
||||
bytes.push(byte);
|
||||
state = Literal;
|
||||
}
|
||||
c => {
|
||||
let original = format!(r"\x{}{}", first, c);
|
||||
bytes.extend(original.into_bytes());
|
||||
state = Literal;
|
||||
}
|
||||
}
|
||||
}
|
||||
Literal => {
|
||||
match c {
|
||||
'\\' => { state = Escape; }
|
||||
c => { bytes.extend(c.to_string().as_bytes()); }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
match state {
|
||||
Escape => bytes.push(b'\\'),
|
||||
HexFirst => bytes.extend(b"\\x"),
|
||||
HexSecond(c) => bytes.extend(format!("\\x{}", c).into_bytes()),
|
||||
Literal => {}
|
||||
}
|
||||
bytes
|
||||
}
|
||||
|
||||
/// Unescapes an OS string.
|
||||
///
|
||||
/// This is like [`unescape`](fn.unescape.html), but accepts an OS string.
|
||||
///
|
||||
/// Note that this first lossily decodes the given OS string as UTF-8. That
|
||||
/// is, an escaped string (the thing given) should be valid UTF-8.
|
||||
pub fn unescape_os(string: &OsStr) -> Vec<u8> {
|
||||
unescape(&string.to_string_lossy())
|
||||
}
|
||||
|
||||
/// Adds the given codepoint to the given string, escaping it if necessary.
|
||||
fn escape_char(cp: char, into: &mut String) {
|
||||
if cp.is_ascii() {
|
||||
escape_byte(cp as u8, into);
|
||||
} else {
|
||||
into.push(cp);
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds the given byte to the given string, escaping it if necessary.
|
||||
fn escape_byte(byte: u8, into: &mut String) {
|
||||
match byte {
|
||||
0x21..=0x5B | 0x5D..=0x7D => into.push(byte as char),
|
||||
b'\n' => into.push_str(r"\n"),
|
||||
b'\r' => into.push_str(r"\r"),
|
||||
b'\t' => into.push_str(r"\t"),
|
||||
b'\\' => into.push_str(r"\\"),
|
||||
_ => into.push_str(&format!(r"\x{:02X}", byte)),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{escape, unescape};
|
||||
|
||||
fn b(bytes: &'static [u8]) -> Vec<u8> {
|
||||
bytes.to_vec()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
assert_eq!(b(b""), unescape(r""));
|
||||
assert_eq!(r"", escape(b""));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn backslash() {
|
||||
assert_eq!(b(b"\\"), unescape(r"\\"));
|
||||
assert_eq!(r"\\", escape(b"\\"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nul() {
|
||||
assert_eq!(b(b"\x00"), unescape(r"\x00"));
|
||||
assert_eq!(r"\x00", escape(b"\x00"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nl() {
|
||||
assert_eq!(b(b"\n"), unescape(r"\n"));
|
||||
assert_eq!(r"\n", escape(b"\n"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn tab() {
|
||||
assert_eq!(b(b"\t"), unescape(r"\t"));
|
||||
assert_eq!(r"\t", escape(b"\t"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn carriage() {
|
||||
assert_eq!(b(b"\r"), unescape(r"\r"));
|
||||
assert_eq!(r"\r", escape(b"\r"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nothing_simple() {
|
||||
assert_eq!(b(b"\\a"), unescape(r"\a"));
|
||||
assert_eq!(b(b"\\a"), unescape(r"\\a"));
|
||||
assert_eq!(r"\\a", escape(b"\\a"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nothing_hex0() {
|
||||
assert_eq!(b(b"\\x"), unescape(r"\x"));
|
||||
assert_eq!(b(b"\\x"), unescape(r"\\x"));
|
||||
assert_eq!(r"\\x", escape(b"\\x"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nothing_hex1() {
|
||||
assert_eq!(b(b"\\xz"), unescape(r"\xz"));
|
||||
assert_eq!(b(b"\\xz"), unescape(r"\\xz"));
|
||||
assert_eq!(r"\\xz", escape(b"\\xz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nothing_hex2() {
|
||||
assert_eq!(b(b"\\xzz"), unescape(r"\xzz"));
|
||||
assert_eq!(b(b"\\xzz"), unescape(r"\\xzz"));
|
||||
assert_eq!(r"\\xzz", escape(b"\\xzz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_utf8() {
|
||||
assert_eq!(r"\xFF", escape(b"\xFF"));
|
||||
assert_eq!(r"a\xFFb", escape(b"a\xFFb"));
|
||||
}
|
||||
}
|
171
grep-cli/src/human.rs
Normal file
171
grep-cli/src/human.rs
Normal file
@@ -0,0 +1,171 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::num::ParseIntError;
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
/// An error that occurs when parsing a human readable size description.
|
||||
///
|
||||
/// This error provides a end user friendly message describing why the
|
||||
/// description coudln't be parsed and what the expected format is.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct ParseSizeError {
|
||||
original: String,
|
||||
kind: ParseSizeErrorKind,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
enum ParseSizeErrorKind {
|
||||
InvalidFormat,
|
||||
InvalidInt(ParseIntError),
|
||||
Overflow,
|
||||
}
|
||||
|
||||
impl ParseSizeError {
|
||||
fn format(original: &str) -> ParseSizeError {
|
||||
ParseSizeError {
|
||||
original: original.to_string(),
|
||||
kind: ParseSizeErrorKind::InvalidFormat,
|
||||
}
|
||||
}
|
||||
|
||||
fn int(original: &str, err: ParseIntError) -> ParseSizeError {
|
||||
ParseSizeError {
|
||||
original: original.to_string(),
|
||||
kind: ParseSizeErrorKind::InvalidInt(err),
|
||||
}
|
||||
}
|
||||
|
||||
fn overflow(original: &str) -> ParseSizeError {
|
||||
ParseSizeError {
|
||||
original: original.to_string(),
|
||||
kind: ParseSizeErrorKind::Overflow,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for ParseSizeError {
|
||||
fn description(&self) -> &str { "invalid size" }
|
||||
}
|
||||
|
||||
impl fmt::Display for ParseSizeError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use self::ParseSizeErrorKind::*;
|
||||
|
||||
match self.kind {
|
||||
InvalidFormat => {
|
||||
write!(
|
||||
f,
|
||||
"invalid format for size '{}', which should be a sequence \
|
||||
of digits followed by an optional 'K', 'M' or 'G' \
|
||||
suffix",
|
||||
self.original
|
||||
)
|
||||
}
|
||||
InvalidInt(ref err) => {
|
||||
write!(
|
||||
f,
|
||||
"invalid integer found in size '{}': {}",
|
||||
self.original,
|
||||
err
|
||||
)
|
||||
}
|
||||
Overflow => {
|
||||
write!(f, "size too big in '{}'", self.original)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParseSizeError> for io::Error {
|
||||
fn from(size_err: ParseSizeError) -> io::Error {
|
||||
io::Error::new(io::ErrorKind::Other, size_err)
|
||||
}
|
||||
}
|
||||
|
||||
/// Parse a human readable size like `2M` into a corresponding number of bytes.
|
||||
///
|
||||
/// Supported size suffixes are `K` (for kilobyte), `M` (for megabyte) and `G`
|
||||
/// (for gigabyte). If a size suffix is missing, then the size is interpreted
|
||||
/// as bytes. If the size is too big to fit into a `u64`, then this returns an
|
||||
/// error.
|
||||
///
|
||||
/// Additional suffixes may be added over time.
|
||||
pub fn parse_human_readable_size(size: &str) -> Result<u64, ParseSizeError> {
|
||||
lazy_static! {
|
||||
// Normally I'd just parse something this simple by hand to avoid the
|
||||
// regex dep, but we bring regex in any way for glob matching, so might
|
||||
// as well use it.
|
||||
static ref RE: Regex = Regex::new(r"^([0-9]+)([KMG])?$").unwrap();
|
||||
}
|
||||
|
||||
let caps = match RE.captures(size) {
|
||||
Some(caps) => caps,
|
||||
None => return Err(ParseSizeError::format(size)),
|
||||
};
|
||||
let value: u64 = caps[1].parse().map_err(|err| {
|
||||
ParseSizeError::int(size, err)
|
||||
})?;
|
||||
let suffix = match caps.get(2) {
|
||||
None => return Ok(value),
|
||||
Some(cap) => cap.as_str(),
|
||||
};
|
||||
let bytes = match suffix {
|
||||
"K" => value.checked_mul(1<<10),
|
||||
"M" => value.checked_mul(1<<20),
|
||||
"G" => value.checked_mul(1<<30),
|
||||
// Because if the regex matches this group, it must be [KMG].
|
||||
_ => unreachable!(),
|
||||
};
|
||||
bytes.ok_or_else(|| ParseSizeError::overflow(size))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn suffix_none() {
|
||||
let x = parse_human_readable_size("123").unwrap();
|
||||
assert_eq!(123, x);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn suffix_k() {
|
||||
let x = parse_human_readable_size("123K").unwrap();
|
||||
assert_eq!(123 * (1<<10), x);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn suffix_m() {
|
||||
let x = parse_human_readable_size("123M").unwrap();
|
||||
assert_eq!(123 * (1<<20), x);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn suffix_g() {
|
||||
let x = parse_human_readable_size("123G").unwrap();
|
||||
assert_eq!(123 * (1<<30), x);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_empty() {
|
||||
assert!(parse_human_readable_size("").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_non_digit() {
|
||||
assert!(parse_human_readable_size("a").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_overflow() {
|
||||
assert!(parse_human_readable_size("9999999999999999G").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invalid_suffix() {
|
||||
assert!(parse_human_readable_size("123T").is_err());
|
||||
}
|
||||
}
|
252
grep-cli/src/lib.rs
Normal file
252
grep-cli/src/lib.rs
Normal file
@@ -0,0 +1,252 @@
|
||||
/*!
|
||||
This crate provides common routines used in command line applications, with a
|
||||
focus on routines useful for search oriented applications. As a utility
|
||||
library, there is no central type or function. However, a key focus of this
|
||||
crate is to improve failure modes and provide user friendly error messages
|
||||
when things go wrong.
|
||||
|
||||
To the best extent possible, everything in this crate works on Windows, macOS
|
||||
and Linux.
|
||||
|
||||
|
||||
# Standard I/O
|
||||
|
||||
The
|
||||
[`is_readable_stdin`](fn.is_readable_stdin.html),
|
||||
[`is_tty_stderr`](fn.is_tty_stderr.html),
|
||||
[`is_tty_stdin`](fn.is_tty_stdin.html)
|
||||
and
|
||||
[`is_tty_stdout`](fn.is_tty_stdout.html)
|
||||
routines query aspects of standard I/O. `is_readable_stdin` determines whether
|
||||
stdin can be usefully read from, while the `tty` methods determine whether a
|
||||
tty is attached to stdin/stdout/stderr.
|
||||
|
||||
`is_readable_stdin` is useful when writing an application that changes behavior
|
||||
based on whether the application was invoked with data on stdin. For example,
|
||||
`rg foo` might recursively search the current working directory for
|
||||
occurrences of `foo`, but `rg foo < file` might only search the contents of
|
||||
`file`.
|
||||
|
||||
The `tty` methods are useful for similar reasons. Namely, commands like `ls`
|
||||
will change their output depending on whether they are printing to a terminal
|
||||
or not. For example, `ls` shows a file on each line when stdout is redirected
|
||||
to a file or a pipe, but condenses the output to show possibly many files on
|
||||
each line when stdout is connected to a tty.
|
||||
|
||||
|
||||
# Coloring and buffering
|
||||
|
||||
The
|
||||
[`stdout`](fn.stdout.html),
|
||||
[`stdout_buffered_block`](fn.stdout_buffered_block.html)
|
||||
and
|
||||
[`stdout_buffered_line`](fn.stdout_buffered_line.html)
|
||||
routines are alternative constructors for
|
||||
[`StandardStream`](struct.StandardStream.html).
|
||||
A `StandardStream` implements `termcolor::WriteColor`, which provides a way
|
||||
to emit colors to terminals. Its key use is the encapsulation of buffering
|
||||
style. Namely, `stdout` will return a line buffered `StandardStream` if and
|
||||
only if stdout is connected to a tty, and will otherwise return a block
|
||||
buffered `StandardStream`. Line buffering is important for use with a tty
|
||||
because it typically decreases the latency at which the end user sees output.
|
||||
Block buffering is used otherwise because it is faster, and redirecting stdout
|
||||
to a file typically doesn't benefit from the decreased latency that line
|
||||
buffering provides.
|
||||
|
||||
The `stdout_buffered_block` and `stdout_buffered_line` can be used to
|
||||
explicitly set the buffering strategy regardless of whether stdout is connected
|
||||
to a tty or not.
|
||||
|
||||
|
||||
# Escaping
|
||||
|
||||
The
|
||||
[`escape`](fn.escape.html),
|
||||
[`escape_os`](fn.escape_os.html),
|
||||
[`unescape`](fn.unescape.html)
|
||||
and
|
||||
[`unescape_os`](fn.unescape_os.html)
|
||||
routines provide a user friendly way of dealing with UTF-8 encoded strings that
|
||||
can express arbitrary bytes. For example, you might want to accept a string
|
||||
containing arbitrary bytes as a command line argument, but most interactive
|
||||
shells make such strings difficult to type. Instead, we can ask users to use
|
||||
escape sequences.
|
||||
|
||||
For example, `a\xFFz` is itself a valid UTF-8 string corresponding to the
|
||||
following bytes:
|
||||
|
||||
```ignore
|
||||
[b'a', b'\\', b'x', b'F', b'F', b'z']
|
||||
```
|
||||
|
||||
However, we can
|
||||
interpret `\xFF` as an escape sequence with the `unescape`/`unescape_os`
|
||||
routines, which will yield
|
||||
|
||||
```ignore
|
||||
[b'a', b'\xFF', b'z']
|
||||
```
|
||||
|
||||
instead. For example:
|
||||
|
||||
```
|
||||
use grep_cli::unescape;
|
||||
|
||||
// Note the use of a raw string!
|
||||
assert_eq!(vec![b'a', b'\xFF', b'z'], unescape(r"a\xFFz"));
|
||||
```
|
||||
|
||||
The `escape`/`escape_os` routines provide the reverse transformation, which
|
||||
makes it easy to show user friendly error messages involving arbitrary bytes.
|
||||
|
||||
|
||||
# Building patterns
|
||||
|
||||
Typically, regular expression patterns must be valid UTF-8. However, command
|
||||
line arguments aren't guaranteed to be valid UTF-8. Unfortunately, the
|
||||
standard library's UTF-8 conversion functions from `OsStr`s do not provide
|
||||
good error messages. However, the
|
||||
[`pattern_from_bytes`](fn.pattern_from_bytes.html)
|
||||
and
|
||||
[`pattern_from_os`](fn.pattern_from_os.html)
|
||||
do, including reporting exactly where the first invalid UTF-8 byte is seen.
|
||||
|
||||
Additionally, it can be useful to read patterns from a file while reporting
|
||||
good error messages that include line numbers. The
|
||||
[`patterns_from_path`](fn.patterns_from_path.html),
|
||||
[`patterns_from_reader`](fn.patterns_from_reader.html)
|
||||
and
|
||||
[`patterns_from_stdin`](fn.patterns_from_stdin.html)
|
||||
routines do just that. If any pattern is found that is invalid UTF-8, then the
|
||||
error includes the file path (if available) along with the line number and the
|
||||
byte offset at which the first invalid UTF-8 byte was observed.
|
||||
|
||||
|
||||
# Read process output
|
||||
|
||||
Sometimes a command line application needs to execute other processes and read
|
||||
its stdout in a streaming fashion. The
|
||||
[`CommandReader`](struct.CommandReader.html)
|
||||
provides this functionality with an explicit goal of improving failure modes.
|
||||
In particular, if the process exits with an error code, then stderr is read
|
||||
and converted into a normal Rust error to show to end users. This makes the
|
||||
underlying failure modes explicit and gives more information to end users for
|
||||
debugging the problem.
|
||||
|
||||
As a special case,
|
||||
[`DecompressionReader`](struct.DecompressionReader.html)
|
||||
provides a way to decompress arbitrary files by matching their file extensions
|
||||
up with corresponding decompression programs (such as `gzip` and `xz`). This
|
||||
is useful as a means of performing simplistic decompression in a portable
|
||||
manner without binding to specific compression libraries. This does come with
|
||||
some overhead though, so if you need to decompress lots of small files, this
|
||||
may not be an appropriate convenience to use.
|
||||
|
||||
Each reader has a corresponding builder for additional configuration, such as
|
||||
whether to read stderr asynchronously in order to avoid deadlock (which is
|
||||
enabled by default).
|
||||
|
||||
|
||||
# Miscellaneous parsing
|
||||
|
||||
The
|
||||
[`parse_human_readable_size`](fn.parse_human_readable_size.html)
|
||||
routine parses strings like `2M` and converts them to the corresponding number
|
||||
of bytes (`2 * 1<<20` in this case). If an invalid size is found, then a good
|
||||
error message is crafted that typically tells the user how to fix the problem.
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate atty;
|
||||
extern crate bstr;
|
||||
extern crate globset;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate regex;
|
||||
extern crate same_file;
|
||||
extern crate termcolor;
|
||||
#[cfg(windows)]
|
||||
extern crate winapi_util;
|
||||
|
||||
mod decompress;
|
||||
mod escape;
|
||||
mod human;
|
||||
mod pattern;
|
||||
mod process;
|
||||
mod wtr;
|
||||
|
||||
pub use decompress::{
|
||||
DecompressionMatcher, DecompressionMatcherBuilder,
|
||||
DecompressionReader, DecompressionReaderBuilder,
|
||||
};
|
||||
pub use escape::{escape, escape_os, unescape, unescape_os};
|
||||
pub use human::{ParseSizeError, parse_human_readable_size};
|
||||
pub use pattern::{
|
||||
InvalidPatternError,
|
||||
pattern_from_os, pattern_from_bytes,
|
||||
patterns_from_path, patterns_from_reader, patterns_from_stdin,
|
||||
};
|
||||
pub use process::{CommandError, CommandReader, CommandReaderBuilder};
|
||||
pub use wtr::{
|
||||
StandardStream,
|
||||
stdout, stdout_buffered_line, stdout_buffered_block,
|
||||
};
|
||||
|
||||
/// Returns true if and only if stdin is believed to be readable.
|
||||
///
|
||||
/// When stdin is readable, command line programs may choose to behave
|
||||
/// differently than when stdin is not readable. For example, `command foo`
|
||||
/// might search the current directory for occurrences of `foo` where as
|
||||
/// `command foo < some-file` or `cat some-file | command foo` might instead
|
||||
/// only search stdin for occurrences of `foo`.
|
||||
pub fn is_readable_stdin() -> bool {
|
||||
#[cfg(unix)]
|
||||
fn imp() -> bool {
|
||||
use std::os::unix::fs::FileTypeExt;
|
||||
use same_file::Handle;
|
||||
|
||||
let ft = match Handle::stdin().and_then(|h| h.as_file().metadata()) {
|
||||
Err(_) => return false,
|
||||
Ok(md) => md.file_type(),
|
||||
};
|
||||
ft.is_file() || ft.is_fifo()
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
fn imp() -> bool {
|
||||
use winapi_util as winutil;
|
||||
|
||||
winutil::file::typ(winutil::HandleRef::stdin())
|
||||
.map(|t| t.is_disk() || t.is_pipe())
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
!is_tty_stdin() && imp()
|
||||
}
|
||||
|
||||
/// Returns true if and only if stdin is believed to be connectted to a tty
|
||||
/// or a console.
|
||||
pub fn is_tty_stdin() -> bool {
|
||||
atty::is(atty::Stream::Stdin)
|
||||
}
|
||||
|
||||
/// Returns true if and only if stdout is believed to be connectted to a tty
|
||||
/// or a console.
|
||||
///
|
||||
/// This is useful for when you want your command line program to produce
|
||||
/// different output depending on whether it's printing directly to a user's
|
||||
/// terminal or whether it's being redirected somewhere else. For example,
|
||||
/// implementations of `ls` will often show one item per line when stdout is
|
||||
/// redirected, but will condensed output when printing to a tty.
|
||||
pub fn is_tty_stdout() -> bool {
|
||||
atty::is(atty::Stream::Stdout)
|
||||
}
|
||||
|
||||
/// Returns true if and only if stderr is believed to be connectted to a tty
|
||||
/// or a console.
|
||||
pub fn is_tty_stderr() -> bool {
|
||||
atty::is(atty::Stream::Stderr)
|
||||
}
|
201
grep-cli/src/pattern.rs
Normal file
201
grep-cli/src/pattern.rs
Normal file
@@ -0,0 +1,201 @@
|
||||
use std::error;
|
||||
use std::ffi::OsStr;
|
||||
use std::fmt;
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
|
||||
use bstr::io::BufReadExt;
|
||||
|
||||
use escape::{escape, escape_os};
|
||||
|
||||
/// An error that occurs when a pattern could not be converted to valid UTF-8.
|
||||
///
|
||||
/// The purpose of this error is to give a more targeted failure mode for
|
||||
/// patterns written by end users that are not valid UTF-8.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct InvalidPatternError {
|
||||
original: String,
|
||||
valid_up_to: usize,
|
||||
}
|
||||
|
||||
impl InvalidPatternError {
|
||||
/// Returns the index in the given string up to which valid UTF-8 was
|
||||
/// verified.
|
||||
pub fn valid_up_to(&self) -> usize {
|
||||
self.valid_up_to
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for InvalidPatternError {
|
||||
fn description(&self) -> &str { "invalid pattern" }
|
||||
}
|
||||
|
||||
impl fmt::Display for InvalidPatternError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"found invalid UTF-8 in pattern at byte offset {} \
|
||||
(use hex escape sequences to match arbitrary bytes \
|
||||
in a pattern, e.g., \\xFF): '{}'",
|
||||
self.valid_up_to,
|
||||
self.original,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<InvalidPatternError> for io::Error {
|
||||
fn from(paterr: InvalidPatternError) -> io::Error {
|
||||
io::Error::new(io::ErrorKind::Other, paterr)
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert an OS string into a regular expression pattern.
|
||||
///
|
||||
/// This conversion fails if the given pattern is not valid UTF-8, in which
|
||||
/// case, a targeted error with more information about where the invalid UTF-8
|
||||
/// occurs is given. The error also suggests the use of hex escape sequences,
|
||||
/// which are supported by many regex engines.
|
||||
pub fn pattern_from_os(pattern: &OsStr) -> Result<&str, InvalidPatternError> {
|
||||
pattern.to_str().ok_or_else(|| {
|
||||
let valid_up_to = pattern
|
||||
.to_string_lossy()
|
||||
.find('\u{FFFD}')
|
||||
.expect("a Unicode replacement codepoint for invalid UTF-8");
|
||||
InvalidPatternError {
|
||||
original: escape_os(pattern),
|
||||
valid_up_to: valid_up_to,
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Convert arbitrary bytes into a regular expression pattern.
|
||||
///
|
||||
/// This conversion fails if the given pattern is not valid UTF-8, in which
|
||||
/// case, a targeted error with more information about where the invalid UTF-8
|
||||
/// occurs is given. The error also suggests the use of hex escape sequences,
|
||||
/// which are supported by many regex engines.
|
||||
pub fn pattern_from_bytes(
|
||||
pattern: &[u8],
|
||||
) -> Result<&str, InvalidPatternError> {
|
||||
str::from_utf8(pattern).map_err(|err| {
|
||||
InvalidPatternError {
|
||||
original: escape(pattern),
|
||||
valid_up_to: err.valid_up_to(),
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Read patterns from a file path, one per line.
|
||||
///
|
||||
/// If there was a problem reading or if any of the patterns contain invalid
|
||||
/// UTF-8, then an error is returned. If there was a problem with a specific
|
||||
/// pattern, then the error message will include the line number and the file
|
||||
/// path.
|
||||
pub fn patterns_from_path<P: AsRef<Path>>(path: P) -> io::Result<Vec<String>> {
|
||||
let path = path.as_ref();
|
||||
let file = File::open(path).map_err(|err| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("{}: {}", path.display(), err),
|
||||
)
|
||||
})?;
|
||||
patterns_from_reader(file).map_err(|err| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("{}:{}", path.display(), err),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Read patterns from stdin, one per line.
|
||||
///
|
||||
/// If there was a problem reading or if any of the patterns contain invalid
|
||||
/// UTF-8, then an error is returned. If there was a problem with a specific
|
||||
/// pattern, then the error message will include the line number and the fact
|
||||
/// that it came from stdin.
|
||||
pub fn patterns_from_stdin() -> io::Result<Vec<String>> {
|
||||
let stdin = io::stdin();
|
||||
let locked = stdin.lock();
|
||||
patterns_from_reader(locked).map_err(|err| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("<stdin>:{}", err),
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
/// Read patterns from any reader, one per line.
|
||||
///
|
||||
/// If there was a problem reading or if any of the patterns contain invalid
|
||||
/// UTF-8, then an error is returned. If there was a problem with a specific
|
||||
/// pattern, then the error message will include the line number.
|
||||
///
|
||||
/// Note that this routine uses its own internal buffer, so the caller should
|
||||
/// not provide their own buffered reader if possible.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// This shows how to parse patterns, one per line.
|
||||
///
|
||||
/// ```
|
||||
/// use grep_cli::patterns_from_reader;
|
||||
///
|
||||
/// # fn example() -> Result<(), Box<::std::error::Error>> {
|
||||
/// let patterns = "\
|
||||
/// foo
|
||||
/// bar\\s+foo
|
||||
/// [a-z]{3}
|
||||
/// ";
|
||||
///
|
||||
/// assert_eq!(patterns_from_reader(patterns.as_bytes())?, vec![
|
||||
/// r"foo",
|
||||
/// r"bar\s+foo",
|
||||
/// r"[a-z]{3}",
|
||||
/// ]);
|
||||
/// # Ok(()) }
|
||||
/// ```
|
||||
pub fn patterns_from_reader<R: io::Read>(rdr: R) -> io::Result<Vec<String>> {
|
||||
let mut patterns = vec![];
|
||||
let mut line_number = 0;
|
||||
io::BufReader::new(rdr).for_byte_line(|line| {
|
||||
line_number += 1;
|
||||
match pattern_from_bytes(line) {
|
||||
Ok(pattern) => {
|
||||
patterns.push(pattern.to_string());
|
||||
Ok(true)
|
||||
}
|
||||
Err(err) => {
|
||||
Err(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("{}: {}", line_number, err),
|
||||
))
|
||||
}
|
||||
}
|
||||
})?;
|
||||
Ok(patterns)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn bytes() {
|
||||
let pat = b"abc\xFFxyz";
|
||||
let err = pattern_from_bytes(pat).unwrap_err();
|
||||
assert_eq!(3, err.valid_up_to());
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(unix)]
|
||||
fn os() {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use std::ffi::OsStr;
|
||||
|
||||
let pat = OsStr::from_bytes(b"abc\xFFxyz");
|
||||
let err = pattern_from_os(pat).unwrap_err();
|
||||
assert_eq!(3, err.valid_up_to());
|
||||
}
|
||||
}
|
267
grep-cli/src/process.rs
Normal file
267
grep-cli/src/process.rs
Normal file
@@ -0,0 +1,267 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::io::{self, Read};
|
||||
use std::iter;
|
||||
use std::process;
|
||||
use std::thread::{self, JoinHandle};
|
||||
|
||||
/// An error that can occur while running a command and reading its output.
|
||||
///
|
||||
/// This error can be seamlessly converted to an `io::Error` via a `From`
|
||||
/// implementation.
|
||||
#[derive(Debug)]
|
||||
pub struct CommandError {
|
||||
kind: CommandErrorKind,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum CommandErrorKind {
|
||||
Io(io::Error),
|
||||
Stderr(Vec<u8>),
|
||||
}
|
||||
|
||||
impl CommandError {
|
||||
/// Create an error from an I/O error.
|
||||
pub(crate) fn io(ioerr: io::Error) -> CommandError {
|
||||
CommandError { kind: CommandErrorKind::Io(ioerr) }
|
||||
}
|
||||
|
||||
/// Create an error from the contents of stderr (which may be empty).
|
||||
pub(crate) fn stderr(bytes: Vec<u8>) -> CommandError {
|
||||
CommandError { kind: CommandErrorKind::Stderr(bytes) }
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for CommandError {
|
||||
fn description(&self) -> &str { "command error" }
|
||||
}
|
||||
|
||||
impl fmt::Display for CommandError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.kind {
|
||||
CommandErrorKind::Io(ref e) => e.fmt(f),
|
||||
CommandErrorKind::Stderr(ref bytes) => {
|
||||
let msg = String::from_utf8_lossy(bytes);
|
||||
if msg.trim().is_empty() {
|
||||
write!(f, "<stderr is empty>")
|
||||
} else {
|
||||
let div = iter::repeat('-').take(79).collect::<String>();
|
||||
write!(f, "\n{div}\n{msg}\n{div}", div=div, msg=msg.trim())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for CommandError {
|
||||
fn from(ioerr: io::Error) -> CommandError {
|
||||
CommandError { kind: CommandErrorKind::Io(ioerr) }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<CommandError> for io::Error {
|
||||
fn from(cmderr: CommandError) -> io::Error {
|
||||
match cmderr.kind {
|
||||
CommandErrorKind::Io(ioerr) => ioerr,
|
||||
CommandErrorKind::Stderr(_) => {
|
||||
io::Error::new(io::ErrorKind::Other, cmderr)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Configures and builds a streaming reader for process output.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct CommandReaderBuilder {
|
||||
async_stderr: bool,
|
||||
}
|
||||
|
||||
impl CommandReaderBuilder {
|
||||
/// Create a new builder with the default configuration.
|
||||
pub fn new() -> CommandReaderBuilder {
|
||||
CommandReaderBuilder::default()
|
||||
}
|
||||
|
||||
/// Build a new streaming reader for the given command's output.
|
||||
///
|
||||
/// The caller should set everything that's required on the given command
|
||||
/// before building a reader, such as its arguments, environment and
|
||||
/// current working directory. Settings such as the stdout and stderr (but
|
||||
/// not stdin) pipes will be overridden so that they can be controlled by
|
||||
/// the reader.
|
||||
///
|
||||
/// If there was a problem spawning the given command, then its error is
|
||||
/// returned.
|
||||
pub fn build(
|
||||
&self,
|
||||
command: &mut process::Command,
|
||||
) -> Result<CommandReader, CommandError> {
|
||||
let mut child = command
|
||||
.stdout(process::Stdio::piped())
|
||||
.stderr(process::Stdio::piped())
|
||||
.spawn()?;
|
||||
let stdout = child.stdout.take().unwrap();
|
||||
let stderr =
|
||||
if self.async_stderr {
|
||||
StderrReader::async(child.stderr.take().unwrap())
|
||||
} else {
|
||||
StderrReader::sync(child.stderr.take().unwrap())
|
||||
};
|
||||
Ok(CommandReader {
|
||||
child: child,
|
||||
stdout: stdout,
|
||||
stderr: stderr,
|
||||
done: false,
|
||||
})
|
||||
}
|
||||
|
||||
/// When enabled, the reader will asynchronously read the contents of the
|
||||
/// command's stderr output. When disabled, stderr is only read after the
|
||||
/// stdout stream has been exhausted (or if the process quits with an error
|
||||
/// code).
|
||||
///
|
||||
/// Note that when enabled, this may require launching an additional
|
||||
/// thread in order to read stderr. This is done so that the process being
|
||||
/// executed is never blocked from writing to stdout or stderr. If this is
|
||||
/// disabled, then it is possible for the process to fill up the stderr
|
||||
/// buffer and deadlock.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn async_stderr(&mut self, yes: bool) -> &mut CommandReaderBuilder {
|
||||
self.async_stderr = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A streaming reader for a command's output.
|
||||
///
|
||||
/// The purpose of this reader is to provide an easy way to execute processes
|
||||
/// whose stdout is read in a streaming way while also making the processes'
|
||||
/// stderr available when the process fails with an exit code. This makes it
|
||||
/// possible to execute processes while surfacing the underlying failure mode
|
||||
/// in the case of an error.
|
||||
///
|
||||
/// Moreover, by default, this reader will asynchronously read the processes'
|
||||
/// stderr. This prevents subtle deadlocking bugs for noisy processes that
|
||||
/// write a lot to stderr. Currently, the entire contents of stderr is read
|
||||
/// on to the heap.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// This example shows how to invoke `gzip` to decompress the contents of a
|
||||
/// file. If the `gzip` command reports a failing exit status, then its stderr
|
||||
/// is returned as an error.
|
||||
///
|
||||
/// ```no_run
|
||||
/// use std::io::Read;
|
||||
/// use std::process::Command;
|
||||
/// use grep_cli::CommandReader;
|
||||
///
|
||||
/// # fn example() -> Result<(), Box<::std::error::Error>> {
|
||||
/// let mut cmd = Command::new("gzip");
|
||||
/// cmd.arg("-d").arg("-c").arg("/usr/share/man/man1/ls.1.gz");
|
||||
///
|
||||
/// let mut rdr = CommandReader::new(&mut cmd)?;
|
||||
/// let mut contents = vec![];
|
||||
/// rdr.read_to_end(&mut contents)?;
|
||||
/// # Ok(()) }
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct CommandReader {
|
||||
child: process::Child,
|
||||
stdout: process::ChildStdout,
|
||||
stderr: StderrReader,
|
||||
done: bool,
|
||||
}
|
||||
|
||||
impl CommandReader {
|
||||
/// Create a new streaming reader for the given command using the default
|
||||
/// configuration.
|
||||
///
|
||||
/// The caller should set everything that's required on the given command
|
||||
/// before building a reader, such as its arguments, environment and
|
||||
/// current working directory. Settings such as the stdout and stderr (but
|
||||
/// not stdin) pipes will be overridden so that they can be controlled by
|
||||
/// the reader.
|
||||
///
|
||||
/// If there was a problem spawning the given command, then its error is
|
||||
/// returned.
|
||||
///
|
||||
/// If the caller requires additional configuration for the reader
|
||||
/// returned, then use
|
||||
/// [`CommandReaderBuilder`](struct.CommandReaderBuilder.html).
|
||||
pub fn new(
|
||||
cmd: &mut process::Command,
|
||||
) -> Result<CommandReader, CommandError> {
|
||||
CommandReaderBuilder::new().build(cmd)
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Read for CommandReader {
|
||||
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
if self.done {
|
||||
return Ok(0);
|
||||
}
|
||||
let nread = self.stdout.read(buf)?;
|
||||
if nread == 0 {
|
||||
self.done = true;
|
||||
// Reap the child now that we're done reading. If the command
|
||||
// failed, report stderr as an error.
|
||||
if !self.child.wait()?.success() {
|
||||
return Err(io::Error::from(self.stderr.read_to_end()));
|
||||
}
|
||||
}
|
||||
Ok(nread)
|
||||
}
|
||||
}
|
||||
|
||||
/// A reader that encapsulates the asynchronous or synchronous reading of
|
||||
/// stderr.
|
||||
#[derive(Debug)]
|
||||
enum StderrReader {
|
||||
Async(Option<JoinHandle<CommandError>>),
|
||||
Sync(process::ChildStderr),
|
||||
}
|
||||
|
||||
impl StderrReader {
|
||||
/// Create a reader for stderr that reads contents asynchronously.
|
||||
fn async(mut stderr: process::ChildStderr) -> StderrReader {
|
||||
let handle = thread::spawn(move || {
|
||||
stderr_to_command_error(&mut stderr)
|
||||
});
|
||||
StderrReader::Async(Some(handle))
|
||||
}
|
||||
|
||||
/// Create a reader for stderr that reads contents synchronously.
|
||||
fn sync(stderr: process::ChildStderr) -> StderrReader {
|
||||
StderrReader::Sync(stderr)
|
||||
}
|
||||
|
||||
/// Consumes all of stderr on to the heap and returns it as an error.
|
||||
///
|
||||
/// If there was a problem reading stderr itself, then this returns an I/O
|
||||
/// command error.
|
||||
fn read_to_end(&mut self) -> CommandError {
|
||||
match *self {
|
||||
StderrReader::Async(ref mut handle) => {
|
||||
let handle = handle
|
||||
.take()
|
||||
.expect("read_to_end cannot be called more than once");
|
||||
handle
|
||||
.join()
|
||||
.expect("stderr reading thread does not panic")
|
||||
}
|
||||
StderrReader::Sync(ref mut stderr) => {
|
||||
stderr_to_command_error(stderr)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn stderr_to_command_error(stderr: &mut process::ChildStderr) -> CommandError {
|
||||
let mut bytes = vec![];
|
||||
match stderr.read_to_end(&mut bytes) {
|
||||
Ok(_) => CommandError::stderr(bytes),
|
||||
Err(err) => CommandError::io(err),
|
||||
}
|
||||
}
|
133
grep-cli/src/wtr.rs
Normal file
133
grep-cli/src/wtr.rs
Normal file
@@ -0,0 +1,133 @@
|
||||
use std::io;
|
||||
|
||||
use termcolor;
|
||||
|
||||
use is_tty_stdout;
|
||||
|
||||
/// A writer that supports coloring with either line or block buffering.
|
||||
pub struct StandardStream(StandardStreamKind);
|
||||
|
||||
/// Returns a possibly buffered writer to stdout for the given color choice.
|
||||
///
|
||||
/// The writer returned is either line buffered or block buffered. The decision
|
||||
/// between these two is made automatically based on whether a tty is attached
|
||||
/// to stdout or not. If a tty is attached, then line buffering is used.
|
||||
/// Otherwise, block buffering is used. In general, block buffering is more
|
||||
/// efficient, but may increase the time it takes for the end user to see the
|
||||
/// first bits of output.
|
||||
///
|
||||
/// If you need more fine grained control over the buffering mode, then use one
|
||||
/// of `stdout_buffered_line` or `stdout_buffered_block`.
|
||||
///
|
||||
/// The color choice given is passed along to the underlying writer. To
|
||||
/// completely disable colors in all cases, use `ColorChoice::Never`.
|
||||
pub fn stdout(color_choice: termcolor::ColorChoice) -> StandardStream {
|
||||
if is_tty_stdout() {
|
||||
stdout_buffered_line(color_choice)
|
||||
} else {
|
||||
stdout_buffered_block(color_choice)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a line buffered writer to stdout for the given color choice.
|
||||
///
|
||||
/// This writer is useful when printing results directly to a tty such that
|
||||
/// users see output as soon as it's written. The downside of this approach
|
||||
/// is that it can be slower, especially when there is a lot of output.
|
||||
///
|
||||
/// You might consider using
|
||||
/// [`stdout`](fn.stdout.html)
|
||||
/// instead, which chooses the buffering strategy automatically based on
|
||||
/// whether stdout is connected to a tty.
|
||||
pub fn stdout_buffered_line(
|
||||
color_choice: termcolor::ColorChoice,
|
||||
) -> StandardStream {
|
||||
let out = termcolor::StandardStream::stdout(color_choice);
|
||||
StandardStream(StandardStreamKind::LineBuffered(out))
|
||||
}
|
||||
|
||||
/// Returns a block buffered writer to stdout for the given color choice.
|
||||
///
|
||||
/// This writer is useful when printing results to a file since it amortizes
|
||||
/// the cost of writing data. The downside of this approach is that it can
|
||||
/// increase the latency of display output when writing to a tty.
|
||||
///
|
||||
/// You might consider using
|
||||
/// [`stdout`](fn.stdout.html)
|
||||
/// instead, which chooses the buffering strategy automatically based on
|
||||
/// whether stdout is connected to a tty.
|
||||
pub fn stdout_buffered_block(
|
||||
color_choice: termcolor::ColorChoice,
|
||||
) -> StandardStream {
|
||||
let out = termcolor::BufferedStandardStream::stdout(color_choice);
|
||||
StandardStream(StandardStreamKind::BlockBuffered(out))
|
||||
}
|
||||
|
||||
enum StandardStreamKind {
|
||||
LineBuffered(termcolor::StandardStream),
|
||||
BlockBuffered(termcolor::BufferedStandardStream),
|
||||
}
|
||||
|
||||
impl io::Write for StandardStream {
|
||||
#[inline]
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
use self::StandardStreamKind::*;
|
||||
|
||||
match self.0 {
|
||||
LineBuffered(ref mut w) => w.write(buf),
|
||||
BlockBuffered(ref mut w) => w.write(buf),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
use self::StandardStreamKind::*;
|
||||
|
||||
match self.0 {
|
||||
LineBuffered(ref mut w) => w.flush(),
|
||||
BlockBuffered(ref mut w) => w.flush(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl termcolor::WriteColor for StandardStream {
|
||||
#[inline]
|
||||
fn supports_color(&self) -> bool {
|
||||
use self::StandardStreamKind::*;
|
||||
|
||||
match self.0 {
|
||||
LineBuffered(ref w) => w.supports_color(),
|
||||
BlockBuffered(ref w) => w.supports_color(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn set_color(&mut self, spec: &termcolor::ColorSpec) -> io::Result<()> {
|
||||
use self::StandardStreamKind::*;
|
||||
|
||||
match self.0 {
|
||||
LineBuffered(ref mut w) => w.set_color(spec),
|
||||
BlockBuffered(ref mut w) => w.set_color(spec),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn reset(&mut self) -> io::Result<()> {
|
||||
use self::StandardStreamKind::*;
|
||||
|
||||
match self.0 {
|
||||
LineBuffered(ref mut w) => w.reset(),
|
||||
BlockBuffered(ref mut w) => w.reset(),
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_synchronous(&self) -> bool {
|
||||
use self::StandardStreamKind::*;
|
||||
|
||||
match self.0 {
|
||||
LineBuffered(ref w) => w.is_synchronous(),
|
||||
BlockBuffered(ref w) => w.is_synchronous(),
|
||||
}
|
||||
}
|
||||
}
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-matcher"
|
||||
version = "0.0.1" #:version
|
||||
version = "0.1.3" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
A trait for regular expressions, with a focus on line oriented search.
|
||||
@@ -14,10 +14,10 @@ license = "Unlicense/MIT"
|
||||
autotests = false
|
||||
|
||||
[dependencies]
|
||||
memchr = "2"
|
||||
memchr = "2.1"
|
||||
|
||||
[dev-dependencies]
|
||||
regex = "1"
|
||||
regex = "1.1"
|
||||
|
||||
[[test]]
|
||||
name = "integration"
|
||||
|
@@ -1,4 +1,36 @@
|
||||
grep
|
||||
----
|
||||
This is a *library* that provides grep-style line-by-line regex searching (with
|
||||
comparable performance to `grep` itself).
|
||||
grep-matcher
|
||||
------------
|
||||
This crate provides a low level interface for describing regular expression
|
||||
matchers. The `grep` crate uses this interface in order to make the regex
|
||||
engine it uses pluggable.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/grep-matcher)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/grep-matcher](https://docs.rs/grep-matcher)
|
||||
|
||||
**NOTE:** You probably don't want to use this crate directly. Instead, you
|
||||
should prefer the facade defined in the
|
||||
[`grep`](https://docs.rs/grep)
|
||||
crate.
|
||||
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
grep-matcher = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_matcher;
|
||||
```
|
||||
|
@@ -134,7 +134,7 @@ fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
|
||||
/// Returns true if and only if the given byte is allowed in a capture name.
|
||||
fn is_valid_cap_letter(b: &u8) -> bool {
|
||||
match *b {
|
||||
b'0' ... b'9' | b'a' ... b'z' | b'A' ... b'Z' | b'_' => true,
|
||||
b'0' ..= b'9' | b'a' ..= b'z' | b'A' ..= b'Z' | b'_' => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
@@ -1,5 +1,39 @@
|
||||
/*!
|
||||
An interface for regular expressions, with a focus on line oriented search.
|
||||
This crate provides an interface for regular expressions, with a focus on line
|
||||
oriented search. The purpose of this crate is to provide a low level matching
|
||||
interface that permits any kind of substring or regex implementation to power
|
||||
the search routines provided by the
|
||||
[`grep-searcher`](https://docs.rs/grep-searcher)
|
||||
crate.
|
||||
|
||||
The primary thing provided by this crate is the
|
||||
[`Matcher`](trait.Matcher.html)
|
||||
trait. The trait defines an abstract interface for text search. It is robust
|
||||
enough to support everything from basic substring search all the way to
|
||||
arbitrarily complex regular expression implementations without sacrificing
|
||||
performance.
|
||||
|
||||
A key design decision made in this crate is the use of *internal iteration*,
|
||||
or otherwise known as the "push" model of searching. In this paradigm,
|
||||
implementations of the `Matcher` trait will drive search and execute callbacks
|
||||
provided by the caller when a match is found. This is in contrast to the
|
||||
usual style of *external iteration* (the "pull" model) found throughout the
|
||||
Rust ecosystem. There are two primary reasons why internal iteration was
|
||||
chosen:
|
||||
|
||||
* Some search implementations may themselves require internal iteration.
|
||||
Converting an internal iterator to an external iterator can be non-trivial
|
||||
and sometimes even practically impossible.
|
||||
* Rust's type system isn't quite expressive enough to write a generic interface
|
||||
using external iteration without giving something else up (namely, ease of
|
||||
use and/or performance).
|
||||
|
||||
In other words, internal iteration was chosen because it is the lowest common
|
||||
denominator and because it is probably the least bad way of expressing the
|
||||
interface in today's Rust. As a result, this trait isn't specifically intended
|
||||
for everyday use, although, you might find it to be a happy price to pay if you
|
||||
want to write code that is generic over multiple different regex
|
||||
implementations.
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
@@ -186,6 +220,7 @@ enum LineTerminatorImp {
|
||||
|
||||
impl LineTerminator {
|
||||
/// Return a new single-byte line terminator. Any byte is valid.
|
||||
#[inline]
|
||||
pub fn byte(byte: u8) -> LineTerminator {
|
||||
LineTerminator(LineTerminatorImp::Byte([byte]))
|
||||
}
|
||||
@@ -194,11 +229,13 @@ impl LineTerminator {
|
||||
///
|
||||
/// When this option is used, consumers may generally treat a lone `\n` as
|
||||
/// a line terminator in addition to `\r\n`.
|
||||
#[inline]
|
||||
pub fn crlf() -> LineTerminator {
|
||||
LineTerminator(LineTerminatorImp::CRLF)
|
||||
}
|
||||
|
||||
/// Returns true if and only if this line terminator is CRLF.
|
||||
#[inline]
|
||||
pub fn is_crlf(&self) -> bool {
|
||||
self.0 == LineTerminatorImp::CRLF
|
||||
}
|
||||
@@ -208,6 +245,7 @@ impl LineTerminator {
|
||||
/// If the line terminator is CRLF, then this returns `\n`. This is
|
||||
/// useful for routines that, for example, find line boundaries by treating
|
||||
/// `\n` as a line terminator even when it isn't preceded by `\r`.
|
||||
#[inline]
|
||||
pub fn as_byte(&self) -> u8 {
|
||||
match self.0 {
|
||||
LineTerminatorImp::Byte(array) => array[0],
|
||||
@@ -221,15 +259,27 @@ impl LineTerminator {
|
||||
/// `CRLF`, in which case, it returns `\r\n`.
|
||||
///
|
||||
/// The slice returned is guaranteed to have length at least `1`.
|
||||
#[inline]
|
||||
pub fn as_bytes(&self) -> &[u8] {
|
||||
match self.0 {
|
||||
LineTerminatorImp::Byte(ref array) => array,
|
||||
LineTerminatorImp::CRLF => &[b'\r', b'\n'],
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given slice ends with this line
|
||||
/// terminator.
|
||||
///
|
||||
/// If this line terminator is `CRLF`, then this only checks whether the
|
||||
/// last byte is `\n`.
|
||||
#[inline]
|
||||
pub fn is_suffix(&self, slice: &[u8]) -> bool {
|
||||
slice.last().map_or(false, |&b| b == self.as_byte())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for LineTerminator {
|
||||
#[inline]
|
||||
fn default() -> LineTerminator {
|
||||
LineTerminator::byte(b'\n')
|
||||
}
|
||||
@@ -324,12 +374,12 @@ impl ByteSet {
|
||||
///
|
||||
/// Principally, this trait provides a way to access capturing groups
|
||||
/// in a uniform way that does not require any specific representation.
|
||||
/// Namely, differ matcher implementations may require different in-memory
|
||||
/// Namely, different matcher implementations may require different in-memory
|
||||
/// representations of capturing groups. This trait permits matchers to
|
||||
/// maintain their specific in-memory representation.
|
||||
///
|
||||
/// Note that this trait explicitly does not provide a way to construct a new
|
||||
/// captures value. Instead, it is the responsibility of a `Matcher` to build
|
||||
/// capture value. Instead, it is the responsibility of a `Matcher` to build
|
||||
/// one, which might require knowledge of the matcher's internal implementation
|
||||
/// details.
|
||||
pub trait Captures {
|
||||
@@ -426,7 +476,7 @@ impl Captures for NoCaptures {
|
||||
/// This error type implements the `std::error::Error` and `fmt::Display`
|
||||
/// traits for use in matcher implementations that can never produce errors.
|
||||
///
|
||||
/// The `fmt::Display` impl for this type panics.
|
||||
/// The `fmt::Debug` and `fmt::Display` impls for this type panics.
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct NoError(());
|
||||
|
||||
@@ -463,6 +513,20 @@ pub enum LineMatchKind {
|
||||
}
|
||||
|
||||
/// A matcher defines an interface for regular expression implementations.
|
||||
///
|
||||
/// While this trait is large, there are only two required methods that
|
||||
/// implementors must provide: `find_at` and `new_captures`. If captures
|
||||
/// aren't supported by your implementation, then `new_captures` can be
|
||||
/// implemented with
|
||||
/// [`NoCaptures`](struct.NoCaptures.html). If your implementation does support
|
||||
/// capture groups, then you should also implement the other capture related
|
||||
/// methods, as dictated by the documentation. Crucially, this includes
|
||||
/// `captures_at`.
|
||||
///
|
||||
/// The rest of the methods on this trait provide default implementations on
|
||||
/// top of `find_at` and `new_captures`. It is not uncommon for implementations
|
||||
/// to be able to provide faster variants of some methods; in those cases,
|
||||
/// simply override the default implementation.
|
||||
pub trait Matcher {
|
||||
/// The concrete type of capturing groups used for this matcher.
|
||||
///
|
||||
|
17
grep-pcre2/Cargo.toml
Normal file
17
grep-pcre2/Cargo.toml
Normal file
@@ -0,0 +1,17 @@
|
||||
[package]
|
||||
name = "grep-pcre2"
|
||||
version = "0.1.3" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Use PCRE2 with the 'grep' crate.
|
||||
"""
|
||||
documentation = "https://docs.rs/grep-pcre2"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "pcre", "backreference", "look"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[dependencies]
|
||||
grep-matcher = { version = "0.1.2", path = "../grep-matcher" }
|
||||
pcre2 = "0.2.0"
|
21
grep-pcre2/LICENSE-MIT
Normal file
21
grep-pcre2/LICENSE-MIT
Normal file
@@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Andrew Gallant
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
39
grep-pcre2/README.md
Normal file
39
grep-pcre2/README.md
Normal file
@@ -0,0 +1,39 @@
|
||||
grep-pcre2
|
||||
----------
|
||||
The `grep-pcre2` crate provides an implementation of the `Matcher` trait from
|
||||
the `grep-matcher` crate. This implementation permits PCRE2 to be used in the
|
||||
`grep` crate for fast line oriented searching.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/grep-pcre2)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/grep-pcre2](https://docs.rs/grep-pcre2)
|
||||
|
||||
**NOTE:** You probably don't want to use this crate directly. Instead, you
|
||||
should prefer the facade defined in the
|
||||
[`grep`](https://docs.rs/grep)
|
||||
crate.
|
||||
|
||||
If you're looking to just use PCRE2 from Rust, then you probably want the
|
||||
[`pcre2`](https://docs.rs/pcre2)
|
||||
crate, which provide high level safe bindings to PCRE2.
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
grep-pcre2 = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_pcre2;
|
||||
```
|
24
grep-pcre2/UNLICENSE
Normal file
24
grep-pcre2/UNLICENSE
Normal file
@@ -0,0 +1,24 @@
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
59
grep-pcre2/src/error.rs
Normal file
59
grep-pcre2/src/error.rs
Normal file
@@ -0,0 +1,59 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
|
||||
/// An error that can occur in this crate.
|
||||
///
|
||||
/// Generally, this error corresponds to problems building a regular
|
||||
/// expression, whether it's in parsing, compilation or a problem with
|
||||
/// guaranteeing a configured optimization.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Error {
|
||||
kind: ErrorKind,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
pub(crate) fn regex<E: error::Error>(err: E) -> Error {
|
||||
Error { kind: ErrorKind::Regex(err.to_string()) }
|
||||
}
|
||||
|
||||
/// Return the kind of this error.
|
||||
pub fn kind(&self) -> &ErrorKind {
|
||||
&self.kind
|
||||
}
|
||||
}
|
||||
|
||||
/// The kind of an error that can occur.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum ErrorKind {
|
||||
/// An error that occurred as a result of parsing a regular expression.
|
||||
/// This can be a syntax error or an error that results from attempting to
|
||||
/// compile a regular expression that is too big.
|
||||
///
|
||||
/// The string here is the underlying error converted to a string.
|
||||
Regex(String),
|
||||
/// Hints that destructuring should not be exhaustive.
|
||||
///
|
||||
/// This enum may grow additional variants, so this makes sure clients
|
||||
/// don't count on exhaustive matching. (Otherwise, adding a new variant
|
||||
/// could break existing code.)
|
||||
#[doc(hidden)]
|
||||
__Nonexhaustive,
|
||||
}
|
||||
|
||||
impl error::Error for Error {
|
||||
fn description(&self) -> &str {
|
||||
match self.kind {
|
||||
ErrorKind::Regex(_) => "regex error",
|
||||
ErrorKind::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.kind {
|
||||
ErrorKind::Regex(ref s) => write!(f, "{}", s),
|
||||
ErrorKind::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
16
grep-pcre2/src/lib.rs
Normal file
16
grep-pcre2/src/lib.rs
Normal file
@@ -0,0 +1,16 @@
|
||||
/*!
|
||||
An implementation of `grep-matcher`'s `Matcher` trait for
|
||||
[PCRE2](https://www.pcre.org/).
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate grep_matcher;
|
||||
extern crate pcre2;
|
||||
|
||||
pub use error::{Error, ErrorKind};
|
||||
pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
||||
pub use pcre2::{is_jit_available, version};
|
||||
|
||||
mod error;
|
||||
mod matcher;
|
464
grep-pcre2/src/matcher.rs
Normal file
464
grep-pcre2/src/matcher.rs
Normal file
@@ -0,0 +1,464 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use grep_matcher::{Captures, Match, Matcher};
|
||||
use pcre2::bytes::{CaptureLocations, Regex, RegexBuilder};
|
||||
|
||||
use error::Error;
|
||||
|
||||
/// A builder for configuring the compilation of a PCRE2 regex.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexMatcherBuilder {
|
||||
builder: RegexBuilder,
|
||||
case_smart: bool,
|
||||
word: bool,
|
||||
}
|
||||
|
||||
impl RegexMatcherBuilder {
|
||||
/// Create a new matcher builder with a default configuration.
|
||||
pub fn new() -> RegexMatcherBuilder {
|
||||
RegexMatcherBuilder {
|
||||
builder: RegexBuilder::new(),
|
||||
case_smart: false,
|
||||
word: false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Compile the given pattern into a PCRE matcher using the current
|
||||
/// configuration.
|
||||
///
|
||||
/// If there was a problem compiling the pattern, then an error is
|
||||
/// returned.
|
||||
pub fn build(&self, pattern: &str) -> Result<RegexMatcher, Error> {
|
||||
let mut builder = self.builder.clone();
|
||||
if self.case_smart && !has_uppercase_literal(pattern) {
|
||||
builder.caseless(true);
|
||||
}
|
||||
let res =
|
||||
if self.word {
|
||||
let pattern = format!(r"(?<!\w)(?:{})(?!\w)", pattern);
|
||||
builder.build(&pattern)
|
||||
} else {
|
||||
builder.build(pattern)
|
||||
};
|
||||
res.map_err(Error::regex).map(|regex| {
|
||||
let mut names = HashMap::new();
|
||||
for (i, name) in regex.capture_names().iter().enumerate() {
|
||||
if let Some(ref name) = *name {
|
||||
names.insert(name.to_string(), i);
|
||||
}
|
||||
}
|
||||
RegexMatcher { regex, names }
|
||||
})
|
||||
}
|
||||
|
||||
/// Enables case insensitive matching.
|
||||
///
|
||||
/// If the `utf` option is also set, then Unicode case folding is used
|
||||
/// to determine case insensitivity. When the `utf` option is not set,
|
||||
/// then only standard ASCII case insensitivity is considered.
|
||||
///
|
||||
/// This option corresponds to the `i` flag.
|
||||
pub fn caseless(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.caseless(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to enable "smart case" or not.
|
||||
///
|
||||
/// When smart case is enabled, the builder will automatically enable
|
||||
/// case insensitive matching based on how the pattern is written. Namely,
|
||||
/// case insensitive mode is enabled when both of the following things
|
||||
/// are believed to be true:
|
||||
///
|
||||
/// 1. The pattern contains at least one literal character. For example,
|
||||
/// `a\w` contains a literal (`a`) but `\w` does not.
|
||||
/// 2. Of the literals in the pattern, none of them are considered to be
|
||||
/// uppercase according to Unicode. For example, `foo\pL` has no
|
||||
/// uppercase literals but `Foo\pL` does.
|
||||
///
|
||||
/// Note that the implementation of this is not perfect. Namely, `\p{Ll}`
|
||||
/// will prevent case insensitive matching even though it is part of a meta
|
||||
/// sequence. This bug will probably never be fixed.
|
||||
pub fn case_smart(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.case_smart = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables "dot all" matching.
|
||||
///
|
||||
/// When enabled, the `.` metacharacter in the pattern matches any
|
||||
/// character, include `\n`. When disabled (the default), `.` will match
|
||||
/// any character except for `\n`.
|
||||
///
|
||||
/// This option corresponds to the `s` flag.
|
||||
pub fn dotall(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.dotall(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable "extended" mode in the pattern, where whitespace is ignored.
|
||||
///
|
||||
/// This option corresponds to the `x` flag.
|
||||
pub fn extended(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.extended(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable multiline matching mode.
|
||||
///
|
||||
/// When enabled, the `^` and `$` anchors will match both at the beginning
|
||||
/// and end of a subject string, in addition to matching at the start of
|
||||
/// a line and the end of a line. When disabled, the `^` and `$` anchors
|
||||
/// will only match at the beginning and end of a subject string.
|
||||
///
|
||||
/// This option corresponds to the `m` flag.
|
||||
pub fn multi_line(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.multi_line(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable matching of CRLF as a line terminator.
|
||||
///
|
||||
/// When enabled, anchors such as `^` and `$` will match any of the
|
||||
/// following as a line terminator: `\r`, `\n` or `\r\n`.
|
||||
///
|
||||
/// This is disabled by default, in which case, only `\n` is recognized as
|
||||
/// a line terminator.
|
||||
pub fn crlf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.crlf(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Require that all matches occur on word boundaries.
|
||||
///
|
||||
/// Enabling this option is subtly different than putting `\b` assertions
|
||||
/// on both sides of your pattern. In particular, a `\b` assertion requires
|
||||
/// that one side of it match a word character while the other match a
|
||||
/// non-word character. This option, in contrast, merely requires that
|
||||
/// one side match a non-word character.
|
||||
///
|
||||
/// For example, `\b-2\b` will not match `foo -2 bar` since `-` is not a
|
||||
/// word character. However, `-2` with this `word` option enabled will
|
||||
/// match the `-2` in `foo -2 bar`.
|
||||
pub fn word(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.word = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable Unicode matching mode.
|
||||
///
|
||||
/// When enabled, the following patterns become Unicode aware: `\b`, `\B`,
|
||||
/// `\d`, `\D`, `\s`, `\S`, `\w`, `\W`.
|
||||
///
|
||||
/// When set, this implies UTF matching mode. It is not possible to enable
|
||||
/// Unicode matching mode without enabling UTF matching mode.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn ucp(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.ucp(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable UTF matching mode.
|
||||
///
|
||||
/// When enabled, characters are treated as sequences of code units that
|
||||
/// make up a single codepoint instead of as single bytes. For example,
|
||||
/// this will cause `.` to match any single UTF-8 encoded codepoint, where
|
||||
/// as when this is disabled, `.` will any single byte (except for `\n` in
|
||||
/// both cases, unless "dot all" mode is enabled).
|
||||
///
|
||||
/// Note that when UTF matching mode is enabled, every search performed
|
||||
/// will do a UTF-8 validation check, which can impact performance. The
|
||||
/// UTF-8 check can be disabled via the `disable_utf_check` option, but it
|
||||
/// is undefined behavior to enable UTF matching mode and search invalid
|
||||
/// UTF-8.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn utf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.utf(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// When UTF matching mode is enabled, this will disable the UTF checking
|
||||
/// that PCRE2 will normally perform automatically. If UTF matching mode
|
||||
/// is not enabled, then this has no effect.
|
||||
///
|
||||
/// UTF checking is enabled by default when UTF matching mode is enabled.
|
||||
/// If UTF matching mode is enabled and UTF checking is enabled, then PCRE2
|
||||
/// will return an error if you attempt to search a subject string that is
|
||||
/// not valid UTF-8.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// It is undefined behavior to disable the UTF check in UTF matching mode
|
||||
/// and search a subject string that is not valid UTF-8. When the UTF check
|
||||
/// is disabled, callers must guarantee that the subject string is valid
|
||||
/// UTF-8.
|
||||
pub unsafe fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder {
|
||||
self.builder.disable_utf_check();
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable PCRE2's JIT and return an error if it's not available.
|
||||
///
|
||||
/// This generally speeds up matching quite a bit. The downside is that it
|
||||
/// can increase the time it takes to compile a pattern.
|
||||
///
|
||||
/// If the JIT isn't available or if JIT compilation returns an error, then
|
||||
/// regex compilation will fail with the corresponding error.
|
||||
///
|
||||
/// This is disabled by default, and always overrides `jit_if_available`.
|
||||
pub fn jit(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.jit(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable PCRE2's JIT if it's available.
|
||||
///
|
||||
/// This generally speeds up matching quite a bit. The downside is that it
|
||||
/// can increase the time it takes to compile a pattern.
|
||||
///
|
||||
/// If the JIT isn't available or if JIT compilation returns an error,
|
||||
/// then a debug message with the error will be emitted and the regex will
|
||||
/// otherwise silently fall back to non-JIT matching.
|
||||
///
|
||||
/// This is disabled by default, and always overrides `jit`.
|
||||
pub fn jit_if_available(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.jit_if_available(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the maximum size of PCRE2's JIT stack, in bytes. If the JIT is
|
||||
/// not enabled, then this has no effect.
|
||||
///
|
||||
/// When `None` is given, no custom JIT stack will be created, and instead,
|
||||
/// the default JIT stack is used. When the default is used, its maximum
|
||||
/// size is 32 KB.
|
||||
///
|
||||
/// When this is set, then a new JIT stack will be created with the given
|
||||
/// maximum size as its limit.
|
||||
///
|
||||
/// Increasing the stack size can be useful for larger regular expressions.
|
||||
///
|
||||
/// By default, this is set to `None`.
|
||||
pub fn max_jit_stack_size(
|
||||
&mut self,
|
||||
bytes: Option<usize>,
|
||||
) -> &mut RegexMatcherBuilder {
|
||||
self.builder.max_jit_stack_size(bytes);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// An implementation of the `Matcher` trait using PCRE2.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexMatcher {
|
||||
regex: Regex,
|
||||
names: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl RegexMatcher {
|
||||
/// Create a new matcher from the given pattern using the default
|
||||
/// configuration.
|
||||
pub fn new(pattern: &str) -> Result<RegexMatcher, Error> {
|
||||
RegexMatcherBuilder::new().build(pattern)
|
||||
}
|
||||
}
|
||||
|
||||
impl Matcher for RegexMatcher {
|
||||
type Captures = RegexCaptures;
|
||||
type Error = Error;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>, Error> {
|
||||
Ok(self.regex
|
||||
.find_at(haystack, at)
|
||||
.map_err(Error::regex)?
|
||||
.map(|m| Match::new(m.start(), m.end())))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<RegexCaptures, Error> {
|
||||
Ok(RegexCaptures::new(self.regex.capture_locations()))
|
||||
}
|
||||
|
||||
fn capture_count(&self) -> usize {
|
||||
self.regex.captures_len()
|
||||
}
|
||||
|
||||
fn capture_index(&self, name: &str) -> Option<usize> {
|
||||
self.names.get(name).map(|i| *i)
|
||||
}
|
||||
|
||||
fn try_find_iter<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
mut matched: F,
|
||||
) -> Result<Result<(), E>, Error>
|
||||
where F: FnMut(Match) -> Result<bool, E>
|
||||
{
|
||||
for result in self.regex.find_iter(haystack) {
|
||||
let m = result.map_err(Error::regex)?;
|
||||
match matched(Match::new(m.start(), m.end())) {
|
||||
Ok(true) => continue,
|
||||
Ok(false) => return Ok(Ok(())),
|
||||
Err(err) => return Ok(Err(err)),
|
||||
}
|
||||
}
|
||||
Ok(Ok(()))
|
||||
}
|
||||
|
||||
fn captures_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool, Error> {
|
||||
Ok(self.regex
|
||||
.captures_read_at(&mut caps.locs, haystack, at)
|
||||
.map_err(Error::regex)?
|
||||
.is_some())
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents the match offsets of each capturing group in a match.
|
||||
///
|
||||
/// The first, or `0`th capture group, always corresponds to the entire match
|
||||
/// and is guaranteed to be present when a match occurs. The next capture
|
||||
/// group, at index `1`, corresponds to the first capturing group in the regex,
|
||||
/// ordered by the position at which the left opening parenthesis occurs.
|
||||
///
|
||||
/// Note that not all capturing groups are guaranteed to be present in a match.
|
||||
/// For example, in the regex, `(?P<foo>\w)|(?P<bar>\W)`, only one of `foo`
|
||||
/// or `bar` will ever be set in any given match.
|
||||
///
|
||||
/// In order to access a capture group by name, you'll need to first find the
|
||||
/// index of the group using the corresponding matcher's `capture_index`
|
||||
/// method, and then use that index with `RegexCaptures::get`.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexCaptures {
|
||||
/// Where the locations are stored.
|
||||
locs: CaptureLocations,
|
||||
}
|
||||
|
||||
impl Captures for RegexCaptures {
|
||||
fn len(&self) -> usize {
|
||||
self.locs.len()
|
||||
}
|
||||
|
||||
fn get(&self, i: usize) -> Option<Match> {
|
||||
self.locs.get(i).map(|(s, e)| Match::new(s, e))
|
||||
}
|
||||
}
|
||||
|
||||
impl RegexCaptures {
|
||||
pub(crate) fn new(locs: CaptureLocations) -> RegexCaptures {
|
||||
RegexCaptures { locs }
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine whether the pattern contains an uppercase character which should
|
||||
/// negate the effect of the smart-case option.
|
||||
///
|
||||
/// Ideally we would be able to check the AST in order to correctly handle
|
||||
/// things like '\p{Ll}' and '\p{Lu}' (which should be treated as explicitly
|
||||
/// cased), but PCRE doesn't expose enough details for that kind of analysis.
|
||||
/// For now, our 'good enough' solution is to simply perform a semi-naïve
|
||||
/// scan of the input pattern and ignore all characters following a '\'. The
|
||||
/// This at least lets us support the most common cases, like 'foo\w' and
|
||||
/// 'foo\S', in an intuitive manner.
|
||||
fn has_uppercase_literal(pattern: &str) -> bool {
|
||||
let mut chars = pattern.chars();
|
||||
while let Some(c) = chars.next() {
|
||||
if c == '\\' {
|
||||
chars.next();
|
||||
} else if c.is_uppercase() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use grep_matcher::{LineMatchKind, Matcher};
|
||||
use super::*;
|
||||
|
||||
// Test that enabling word matches does the right thing and demonstrate
|
||||
// the difference between it and surrounding the regex in `\b`.
|
||||
#[test]
|
||||
fn word() {
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.word(true)
|
||||
.build(r"-2")
|
||||
.unwrap();
|
||||
assert!(matcher.is_match(b"abc -2 foo").unwrap());
|
||||
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.word(false)
|
||||
.build(r"\b-2\b")
|
||||
.unwrap();
|
||||
assert!(!matcher.is_match(b"abc -2 foo").unwrap());
|
||||
}
|
||||
|
||||
// Test that enabling CRLF permits `$` to match at the end of a line.
|
||||
#[test]
|
||||
fn line_terminator_crlf() {
|
||||
// Test normal use of `$` with a `\n` line terminator.
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.multi_line(true)
|
||||
.build(r"abc$")
|
||||
.unwrap();
|
||||
assert!(matcher.is_match(b"abc\n").unwrap());
|
||||
|
||||
// Test that `$` doesn't match at `\r\n` boundary normally.
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.multi_line(true)
|
||||
.build(r"abc$")
|
||||
.unwrap();
|
||||
assert!(!matcher.is_match(b"abc\r\n").unwrap());
|
||||
|
||||
// Now check the CRLF handling.
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.multi_line(true)
|
||||
.crlf(true)
|
||||
.build(r"abc$")
|
||||
.unwrap();
|
||||
assert!(matcher.is_match(b"abc\r\n").unwrap());
|
||||
}
|
||||
|
||||
// Test that smart case works.
|
||||
#[test]
|
||||
fn case_smart() {
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.case_smart(true)
|
||||
.build(r"abc")
|
||||
.unwrap();
|
||||
assert!(matcher.is_match(b"ABC").unwrap());
|
||||
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.case_smart(true)
|
||||
.build(r"aBc")
|
||||
.unwrap();
|
||||
assert!(!matcher.is_match(b"ABC").unwrap());
|
||||
}
|
||||
|
||||
// Test that finding candidate lines works as expected.
|
||||
#[test]
|
||||
fn candidate_lines() {
|
||||
fn is_confirmed(m: LineMatchKind) -> bool {
|
||||
match m {
|
||||
LineMatchKind::Confirmed(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.build(r"\wfoo\s")
|
||||
.unwrap();
|
||||
let m = matcher.find_candidate_line(b"afoo ").unwrap().unwrap();
|
||||
assert!(is_confirmed(m));
|
||||
}
|
||||
}
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-printer"
|
||||
version = "0.0.1" #:version
|
||||
version = "0.1.3" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
An implementation of the grep crate's Sink trait that provides standard
|
||||
@@ -18,14 +18,14 @@ default = ["serde1"]
|
||||
serde1 = ["base64", "serde", "serde_derive", "serde_json"]
|
||||
|
||||
[dependencies]
|
||||
base64 = { version = "0.9", optional = true }
|
||||
grep-matcher = { version = "0.0.1", path = "../grep-matcher" }
|
||||
grep-searcher = { version = "0.0.1", path = "../grep-searcher" }
|
||||
log = "0.4"
|
||||
termcolor = "1"
|
||||
serde = { version = "1", optional = true }
|
||||
serde_derive = { version = "1", optional = true }
|
||||
serde_json = { version = "1", optional = true }
|
||||
base64 = { version = "0.10.0", optional = true }
|
||||
bstr = "0.2.0"
|
||||
grep-matcher = { version = "0.1.2", path = "../grep-matcher" }
|
||||
grep-searcher = { version = "0.1.4", path = "../grep-searcher" }
|
||||
termcolor = "1.0.4"
|
||||
serde = { version = "1.0.77", optional = true }
|
||||
serde_derive = { version = "1.0.77", optional = true }
|
||||
serde_json = { version = "1.0.27", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
grep-regex = { version = "0.0.1", path = "../grep-regex" }
|
||||
grep-regex = { version = "0.1.3", path = "../grep-regex" }
|
||||
|
@@ -1,4 +1,35 @@
|
||||
grep
|
||||
----
|
||||
This is a *library* that provides grep-style line-by-line regex searching (with
|
||||
comparable performance to `grep` itself).
|
||||
grep-printer
|
||||
------------
|
||||
Print results from line oriented searching in a human readable, aggregate or
|
||||
JSON Lines format.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/grep-printer)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/grep-printer](https://docs.rs/grep-printer)
|
||||
|
||||
**NOTE:** You probably don't want to use this crate directly. Instead, you
|
||||
should prefer the facade defined in the
|
||||
[`grep`](https://docs.rs/grep)
|
||||
crate.
|
||||
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
grep-printer = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_printer;
|
||||
```
|
||||
|
@@ -4,6 +4,25 @@ use std::str::FromStr;
|
||||
|
||||
use termcolor::{Color, ColorSpec, ParseColorError};
|
||||
|
||||
/// Returns a default set of color specifications.
|
||||
///
|
||||
/// This may change over time, but the color choices are meant to be fairly
|
||||
/// conservative that work across terminal themes.
|
||||
///
|
||||
/// Additional color specifications can be added to the list returned. More
|
||||
/// recently added specifications override previously added specifications.
|
||||
pub fn default_color_specs() -> Vec<UserColorSpec> {
|
||||
vec![
|
||||
#[cfg(unix)]
|
||||
"path:fg:magenta".parse().unwrap(),
|
||||
#[cfg(windows)]
|
||||
"path:fg:cyan".parse().unwrap(),
|
||||
"line:fg:green".parse().unwrap(),
|
||||
"match:fg:red".parse().unwrap(),
|
||||
"match:style:bold".parse().unwrap(),
|
||||
]
|
||||
}
|
||||
|
||||
/// An error that can occur when parsing color specifications.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum ColorError {
|
||||
@@ -227,6 +246,15 @@ impl ColorSpecs {
|
||||
merged
|
||||
}
|
||||
|
||||
/// Create a default set of specifications that have color.
|
||||
///
|
||||
/// This is distinct from `ColorSpecs`'s `Default` implementation in that
|
||||
/// this provides a set of default color choices, where as the `Default`
|
||||
/// implementation provides no color choices.
|
||||
pub fn default_with_color() -> ColorSpecs {
|
||||
ColorSpecs::new(&default_color_specs())
|
||||
}
|
||||
|
||||
/// Return the color specification for coloring file paths.
|
||||
pub fn path(&self) -> &ColorSpec {
|
||||
&self.path
|
||||
|
@@ -91,7 +91,7 @@ impl JSONBuilder {
|
||||
/// When enabled, the `begin` and `end` messages are always emitted, even
|
||||
/// when no match is found.
|
||||
///
|
||||
/// When disabled, the `begin` and `end` messages are only shown is there
|
||||
/// When disabled, the `begin` and `end` messages are only shown if there
|
||||
/// is at least one `match` or `context` message.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
@@ -108,7 +108,7 @@ impl JSONBuilder {
|
||||
///
|
||||
/// # Format
|
||||
///
|
||||
/// This section describe the JSON format used by this printer.
|
||||
/// This section describes the JSON format used by this printer.
|
||||
///
|
||||
/// To skip the rigamarole, take a look at the
|
||||
/// [example](#example)
|
||||
@@ -619,6 +619,13 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
|
||||
matches.push(m);
|
||||
true
|
||||
}).map_err(io::Error::error_message)?;
|
||||
// Don't report empty matches appearing at the end of the bytes.
|
||||
if !matches.is_empty()
|
||||
&& matches.last().unwrap().is_empty()
|
||||
&& matches.last().unwrap().start() >= bytes.len()
|
||||
{
|
||||
matches.pop().unwrap();
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@@ -810,7 +817,8 @@ impl<'a> SubMatches<'a> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use grep_regex::RegexMatcher;
|
||||
use grep_regex::{RegexMatcher, RegexMatcherBuilder};
|
||||
use grep_matcher::LineTerminator;
|
||||
use grep_searcher::SearcherBuilder;
|
||||
|
||||
use super::{JSON, JSONBuilder};
|
||||
@@ -911,4 +919,45 @@ and exhibited clearly, with a label attached.\
|
||||
assert_eq!(got.lines().count(), 2);
|
||||
assert!(got.contains("begin") && got.contains("end"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_crlf() {
|
||||
let haystack = "test\r\n".as_bytes();
|
||||
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.build("test")
|
||||
.unwrap();
|
||||
let mut printer = JSONBuilder::new()
|
||||
.build(vec![]);
|
||||
SearcherBuilder::new()
|
||||
.build()
|
||||
.search_reader(&matcher, haystack, printer.sink(&matcher))
|
||||
.unwrap();
|
||||
let got = printer_contents(&mut printer);
|
||||
assert_eq!(got.lines().count(), 3);
|
||||
assert!(
|
||||
got.lines().nth(1).unwrap().contains(r"test\r\n"),
|
||||
r"missing 'test\r\n' in '{}'",
|
||||
got.lines().nth(1).unwrap(),
|
||||
);
|
||||
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.crlf(true)
|
||||
.build("test")
|
||||
.unwrap();
|
||||
let mut printer = JSONBuilder::new()
|
||||
.build(vec![]);
|
||||
SearcherBuilder::new()
|
||||
.line_terminator(LineTerminator::crlf())
|
||||
.build()
|
||||
.search_reader(&matcher, haystack, printer.sink(&matcher))
|
||||
.unwrap();
|
||||
let got = printer_contents(&mut printer);
|
||||
assert_eq!(got.lines().count(), 3);
|
||||
assert!(
|
||||
got.lines().nth(1).unwrap().contains(r"test\r\n"),
|
||||
r"missing 'test\r\n' in '{}'",
|
||||
got.lines().nth(1).unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@@ -114,39 +114,6 @@ impl<'a> Data<'a> {
|
||||
// so we do the easy thing for now.
|
||||
Data::Text { text: path.to_string_lossy() }
|
||||
}
|
||||
|
||||
// Unused deserialization routines.
|
||||
|
||||
/*
|
||||
fn into_bytes(self) -> Vec<u8> {
|
||||
match self {
|
||||
Data::Text { text } => text.into_bytes(),
|
||||
Data::Bytes { bytes } => bytes,
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn into_path_buf(&self) -> PathBuf {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
match self {
|
||||
Data::Text { text } => PathBuf::from(text),
|
||||
Data::Bytes { bytes } => {
|
||||
PathBuf::from(OsStr::from_bytes(bytes))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn into_path_buf(&self) -> PathBuf {
|
||||
match self {
|
||||
Data::Text { text } => PathBuf::from(text),
|
||||
Data::Bytes { bytes } => {
|
||||
PathBuf::from(String::from_utf8_lossy(&bytes).into_owned())
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
fn to_base64<T, S>(
|
||||
@@ -178,36 +145,3 @@ where P: AsRef<Path>,
|
||||
{
|
||||
path.as_ref().map(|p| Data::from_path(p.as_ref())).serialize(ser)
|
||||
}
|
||||
|
||||
// The following are some deserialization helpers, in case we decide to support
|
||||
// deserialization of the above types.
|
||||
|
||||
/*
|
||||
fn from_base64<'de, D>(
|
||||
de: D,
|
||||
) -> Result<Vec<u8>, D::Error>
|
||||
where D: Deserializer<'de>
|
||||
{
|
||||
let encoded = String::deserialize(de)?;
|
||||
let decoded = base64::decode(encoded.as_bytes())
|
||||
.map_err(D::Error::custom)?;
|
||||
Ok(decoded)
|
||||
}
|
||||
|
||||
fn deser_bytes<'de, D>(
|
||||
de: D,
|
||||
) -> Result<Vec<u8>, D::Error>
|
||||
where D: Deserializer<'de>
|
||||
{
|
||||
Data::deserialize(de).map(|datum| datum.into_bytes())
|
||||
}
|
||||
|
||||
fn deser_path<'de, D>(
|
||||
de: D,
|
||||
) -> Result<Option<PathBuf>, D::Error>
|
||||
where D: Deserializer<'de>
|
||||
{
|
||||
Option::<Data>::deserialize(de)
|
||||
.map(|opt| opt.map(|datum| datum.into_path_buf()))
|
||||
}
|
||||
*/
|
||||
|
@@ -1,7 +1,7 @@
|
||||
/*!
|
||||
This crate provides a featureful and fast printer for showing search results
|
||||
in a human readable way, and another printer for showing results in a machine
|
||||
readable way.
|
||||
This crate provides featureful and fast printers that interoperate with the
|
||||
[`grep-searcher`](https://docs.rs/grep-searcher)
|
||||
crate.
|
||||
|
||||
# Brief overview
|
||||
|
||||
@@ -70,12 +70,11 @@ fn example() -> Result<(), Box<Error>> {
|
||||
|
||||
#[cfg(feature = "serde1")]
|
||||
extern crate base64;
|
||||
extern crate bstr;
|
||||
extern crate grep_matcher;
|
||||
#[cfg(test)]
|
||||
extern crate grep_regex;
|
||||
extern crate grep_searcher;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
#[cfg(feature = "serde1")]
|
||||
extern crate serde;
|
||||
#[cfg(feature = "serde1")]
|
||||
@@ -85,7 +84,7 @@ extern crate serde_derive;
|
||||
extern crate serde_json;
|
||||
extern crate termcolor;
|
||||
|
||||
pub use color::{ColorError, ColorSpecs, UserColorSpec};
|
||||
pub use color::{ColorError, ColorSpecs, UserColorSpec, default_color_specs};
|
||||
#[cfg(feature = "serde1")]
|
||||
pub use json::{JSON, JSONBuilder, JSONSink};
|
||||
pub use standard::{Standard, StandardBuilder, StandardSink};
|
||||
|
@@ -1,3 +1,4 @@
|
||||
/// Like assert_eq, but nicer output for long strings.
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! assert_eq_printed {
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -190,6 +190,8 @@ impl SummaryBuilder {
|
||||
/// A [`UserColorSpec`](struct.UserColorSpec.html) can be constructed from
|
||||
/// a string in accordance with the color specification format. See the
|
||||
/// `UserColorSpec` type documentation for more details on the format.
|
||||
/// A [`ColorSpecs`](struct.ColorSpecs.html) can then be generated from
|
||||
/// zero or more `UserColorSpec`s.
|
||||
///
|
||||
/// Regardless of the color specifications provided here, whether color
|
||||
/// is actually used or not is determined by the implementation of
|
||||
@@ -401,7 +403,7 @@ impl<W: WriteColor> Summary<W> {
|
||||
where M: Matcher,
|
||||
P: ?Sized + AsRef<Path>,
|
||||
{
|
||||
if !self.config.path {
|
||||
if !self.config.path && !self.config.kind.requires_path() {
|
||||
return self.sink(matcher);
|
||||
}
|
||||
let stats =
|
||||
@@ -475,7 +477,10 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> {
|
||||
/// This is unaffected by the result of searches before the previous
|
||||
/// search.
|
||||
pub fn has_match(&self) -> bool {
|
||||
self.match_count > 0
|
||||
match self.summary.config.kind {
|
||||
SummaryKind::PathWithoutMatch => self.match_count == 0,
|
||||
_ => self.match_count > 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// If binary data was found in the previous search, this returns the
|
||||
@@ -631,6 +636,34 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
|
||||
stats.add_bytes_searched(finish.byte_count());
|
||||
stats.add_bytes_printed(self.summary.wtr.borrow().count());
|
||||
}
|
||||
// If our binary detection method says to quit after seeing binary
|
||||
// data, then we shouldn't print any results at all, even if we've
|
||||
// found a match before detecting binary data. The intent here is to
|
||||
// keep BinaryDetection::quit as a form of filter. Otherwise, we can
|
||||
// present a matching file with a smaller number of matches than
|
||||
// there might be, which can be quite misleading.
|
||||
//
|
||||
// If our binary detection method is to convert binary data, then we
|
||||
// don't quit and therefore search the entire contents of the file.
|
||||
//
|
||||
// There is an unfortunate inconsistency here. Namely, when using
|
||||
// Quiet or PathWithMatch, then the printer can quit after the first
|
||||
// match seen, which could be long before seeing binary data. This
|
||||
// means that using PathWithMatch can print a path where as using
|
||||
// Count might not print it at all because of binary data.
|
||||
//
|
||||
// It's not possible to fix this without also potentially significantly
|
||||
// impacting the performance of Quiet or PathWithMatch, so we accept
|
||||
// the bug.
|
||||
if self.binary_byte_offset.is_some()
|
||||
&& searcher.binary_detection().quit_byte().is_some()
|
||||
{
|
||||
// Squash the match count. The statistics reported will still
|
||||
// contain the match count, but the "official" match count should
|
||||
// be zero.
|
||||
self.match_count = 0;
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let show_count =
|
||||
!self.summary.config.exclude_zero
|
||||
|
@@ -4,7 +4,8 @@ use std::io;
|
||||
use std::path::Path;
|
||||
use std::time;
|
||||
|
||||
use grep_matcher::{Captures, Match, Matcher};
|
||||
use bstr::{ByteSlice, ByteVec};
|
||||
use grep_matcher::{Captures, LineTerminator, Match, Matcher};
|
||||
use grep_searcher::{
|
||||
LineIter,
|
||||
SinkError, SinkContext, SinkContextKind, SinkMatch,
|
||||
@@ -157,6 +158,7 @@ pub struct Sunk<'a> {
|
||||
}
|
||||
|
||||
impl<'a> Sunk<'a> {
|
||||
#[inline]
|
||||
pub fn empty() -> Sunk<'static> {
|
||||
Sunk {
|
||||
bytes: &[],
|
||||
@@ -168,6 +170,7 @@ impl<'a> Sunk<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn from_sink_match(
|
||||
sunk: &'a SinkMatch<'a>,
|
||||
original_matches: &'a [Match],
|
||||
@@ -186,6 +189,7 @@ impl<'a> Sunk<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn from_sink_context(
|
||||
sunk: &'a SinkContext<'a>,
|
||||
original_matches: &'a [Match],
|
||||
@@ -204,30 +208,37 @@ impl<'a> Sunk<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn context_kind(&self) -> Option<&'a SinkContextKind> {
|
||||
self.context_kind
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn bytes(&self) -> &'a [u8] {
|
||||
self.bytes
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn matches(&self) -> &'a [Match] {
|
||||
self.matches
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn original_matches(&self) -> &'a [Match] {
|
||||
self.original_matches
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn lines(&self, line_term: u8) -> LineIter<'a> {
|
||||
LineIter::new(line_term, self.bytes())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn absolute_byte_offset(&self) -> u64 {
|
||||
self.absolute_byte_offset
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn line_number(&self) -> Option<u64> {
|
||||
self.line_number
|
||||
}
|
||||
@@ -257,21 +268,7 @@ pub struct PrinterPath<'a>(Cow<'a, [u8]>);
|
||||
impl<'a> PrinterPath<'a> {
|
||||
/// Create a new path suitable for printing.
|
||||
pub fn new(path: &'a Path) -> PrinterPath<'a> {
|
||||
PrinterPath::new_impl(path)
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn new_impl(path: &'a Path) -> PrinterPath<'a> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
PrinterPath(Cow::Borrowed(path.as_os_str().as_bytes()))
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn new_impl(path: &'a Path) -> PrinterPath<'a> {
|
||||
PrinterPath(match path.to_string_lossy() {
|
||||
Cow::Owned(path) => Cow::Owned(path.into_bytes()),
|
||||
Cow::Borrowed(path) => Cow::Borrowed(path.as_bytes()),
|
||||
})
|
||||
PrinterPath(Vec::from_path_lossy(path))
|
||||
}
|
||||
|
||||
/// Create a new printer path from the given path which can be efficiently
|
||||
@@ -292,7 +289,7 @@ impl<'a> PrinterPath<'a> {
|
||||
/// path separators that are both replaced by `new_sep`. In all other
|
||||
/// environments, only `/` is treated as a path separator.
|
||||
fn replace_separator(&mut self, new_sep: u8) {
|
||||
let transformed_path: Vec<_> = self.as_bytes().iter().map(|&b| {
|
||||
let transformed_path: Vec<u8> = self.0.bytes().map(|b| {
|
||||
if b == b'/' || (cfg!(windows) && b == b'\\') {
|
||||
new_sep
|
||||
} else {
|
||||
@@ -304,7 +301,7 @@ impl<'a> PrinterPath<'a> {
|
||||
|
||||
/// Return the raw bytes for this path.
|
||||
pub fn as_bytes(&self) -> &[u8] {
|
||||
&*self.0
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
@@ -317,7 +314,7 @@ pub struct NiceDuration(pub time::Duration);
|
||||
|
||||
impl fmt::Display for NiceDuration {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{:0.4}s", self.fractional_seconds())
|
||||
write!(f, "{:0.6}s", self.fractional_seconds())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -346,21 +343,26 @@ impl Serialize for NiceDuration {
|
||||
|
||||
/// Trim prefix ASCII spaces from the given slice and return the corresponding
|
||||
/// range.
|
||||
pub fn trim_ascii_prefix_range(slice: &[u8], range: Match) -> Match {
|
||||
fn is_space(b: &&u8) -> bool {
|
||||
match **b {
|
||||
///
|
||||
/// This stops trimming a prefix as soon as it sees non-whitespace or a line
|
||||
/// terminator.
|
||||
pub fn trim_ascii_prefix(
|
||||
line_term: LineTerminator,
|
||||
slice: &[u8],
|
||||
range: Match,
|
||||
) -> Match {
|
||||
fn is_space(b: u8) -> bool {
|
||||
match b {
|
||||
b'\t' | b'\n' | b'\x0B' | b'\x0C' | b'\r' | b' ' => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
let count = slice[range].iter().take_while(is_space).count();
|
||||
let count = slice[range]
|
||||
.iter()
|
||||
.take_while(|&&b| -> bool {
|
||||
is_space(b) && !line_term.as_bytes().contains(&b)
|
||||
})
|
||||
.count();
|
||||
range.with_start(range.start() + count)
|
||||
}
|
||||
|
||||
/// Trim prefix ASCII spaces from the given slice and return the corresponding
|
||||
/// sub-slice.
|
||||
pub fn trim_ascii_prefix(slice: &[u8]) -> &[u8] {
|
||||
let range = trim_ascii_prefix_range(slice, Match::new(0, slice.len()));
|
||||
&slice[range]
|
||||
}
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-regex"
|
||||
version = "0.0.1" #:version
|
||||
version = "0.1.5" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Use Rust's regex library with the 'grep' crate.
|
||||
@@ -13,9 +13,9 @@ keywords = ["regex", "grep", "search", "pattern", "line"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[dependencies]
|
||||
log = "0.4"
|
||||
grep-matcher = { version = "0.0.1", path = "../grep-matcher" }
|
||||
regex = "1"
|
||||
regex-syntax = "0.6"
|
||||
thread_local = "0.3.5"
|
||||
utf8-ranges = "1"
|
||||
aho-corasick = "0.7.3"
|
||||
grep-matcher = { version = "0.1.2", path = "../grep-matcher" }
|
||||
log = "0.4.5"
|
||||
regex = "1.1"
|
||||
regex-syntax = "0.6.5"
|
||||
thread_local = "0.3.6"
|
||||
|
@@ -1,4 +1,35 @@
|
||||
grep
|
||||
----
|
||||
This is a *library* that provides grep-style line-by-line regex searching (with
|
||||
comparable performance to `grep` itself).
|
||||
grep-regex
|
||||
----------
|
||||
The `grep-regex` crate provides an implementation of the `Matcher` trait from
|
||||
the `grep-matcher` crate. This implementation permits Rust's regex engine to
|
||||
be used in the `grep` crate for fast line oriented searching.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/grep-regex)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/grep-regex](https://docs.rs/grep-regex)
|
||||
|
||||
**NOTE:** You probably don't want to use this crate directly. Instead, you
|
||||
should prefer the facade defined in the
|
||||
[`grep`](https://docs.rs/grep)
|
||||
crate.
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
grep-regex = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_regex;
|
||||
```
|
||||
|
@@ -1,12 +1,13 @@
|
||||
use grep_matcher::{ByteSet, LineTerminator};
|
||||
use regex::bytes::{Regex, RegexBuilder};
|
||||
use regex_syntax::ast::{self, Ast};
|
||||
use regex_syntax::hir::Hir;
|
||||
use regex_syntax::hir::{self, Hir};
|
||||
|
||||
use ast::AstAnalysis;
|
||||
use crlf::crlfify;
|
||||
use error::Error;
|
||||
use literal::LiteralSets;
|
||||
use multi::alternation_literals;
|
||||
use non_matching::non_matching_bytes;
|
||||
use strip::strip_from_match;
|
||||
|
||||
@@ -67,19 +68,17 @@ impl Config {
|
||||
/// If there was a problem parsing the given expression then an error
|
||||
/// is returned.
|
||||
pub fn hir(&self, pattern: &str) -> Result<ConfiguredHIR, Error> {
|
||||
let analysis = self.analysis(pattern)?;
|
||||
let expr = ::regex_syntax::ParserBuilder::new()
|
||||
.nest_limit(self.nest_limit)
|
||||
.octal(self.octal)
|
||||
let ast = self.ast(pattern)?;
|
||||
let analysis = self.analysis(&ast)?;
|
||||
let expr = hir::translate::TranslatorBuilder::new()
|
||||
.allow_invalid_utf8(true)
|
||||
.ignore_whitespace(self.ignore_whitespace)
|
||||
.case_insensitive(self.is_case_insensitive(&analysis)?)
|
||||
.case_insensitive(self.is_case_insensitive(&analysis))
|
||||
.multi_line(self.multi_line)
|
||||
.dot_matches_new_line(self.dot_matches_new_line)
|
||||
.swap_greed(self.swap_greed)
|
||||
.unicode(self.unicode)
|
||||
.build()
|
||||
.parse(pattern)
|
||||
.translate(pattern, &ast)
|
||||
.map_err(Error::regex)?;
|
||||
let expr = match self.line_terminator {
|
||||
None => expr,
|
||||
@@ -99,21 +98,34 @@ impl Config {
|
||||
fn is_case_insensitive(
|
||||
&self,
|
||||
analysis: &AstAnalysis,
|
||||
) -> Result<bool, Error> {
|
||||
) -> bool {
|
||||
if self.case_insensitive {
|
||||
return Ok(true);
|
||||
return true;
|
||||
}
|
||||
if !self.case_smart {
|
||||
return Ok(false);
|
||||
return false;
|
||||
}
|
||||
Ok(analysis.any_literal() && !analysis.any_uppercase())
|
||||
analysis.any_literal() && !analysis.any_uppercase()
|
||||
}
|
||||
|
||||
/// Returns true if and only if this config is simple enough such that
|
||||
/// if the pattern is a simple alternation of literals, then it can be
|
||||
/// constructed via a plain Aho-Corasick automaton.
|
||||
///
|
||||
/// Note that it is OK to return true even when settings like `multi_line`
|
||||
/// are enabled, since if multi-line can impact the match semantics of a
|
||||
/// regex, then it is by definition not a simple alternation of literals.
|
||||
pub fn can_plain_aho_corasick(&self) -> bool {
|
||||
!self.word
|
||||
&& !self.case_insensitive
|
||||
&& !self.case_smart
|
||||
}
|
||||
|
||||
/// Perform analysis on the AST of this pattern.
|
||||
///
|
||||
/// This returns an error if the given pattern failed to parse.
|
||||
fn analysis(&self, pattern: &str) -> Result<AstAnalysis, Error> {
|
||||
Ok(AstAnalysis::from_ast(&self.ast(pattern)?))
|
||||
fn analysis(&self, ast: &Ast) -> Result<AstAnalysis, Error> {
|
||||
Ok(AstAnalysis::from_ast(ast))
|
||||
}
|
||||
|
||||
/// Parse the given pattern into its abstract syntax.
|
||||
@@ -160,11 +172,28 @@ impl ConfiguredHIR {
|
||||
non_matching_bytes(&self.expr)
|
||||
}
|
||||
|
||||
/// Returns true if and only if this regex needs to have its match offsets
|
||||
/// tweaked because of CRLF support. Specifically, this occurs when the
|
||||
/// CRLF hack is enabled and the regex is line anchored at the end. In
|
||||
/// this case, matches that end with a `\r` have the `\r` stripped.
|
||||
pub fn needs_crlf_stripped(&self) -> bool {
|
||||
self.config.crlf && self.expr.is_line_anchored_end()
|
||||
}
|
||||
|
||||
/// Builds a regular expression from this HIR expression.
|
||||
pub fn regex(&self) -> Result<Regex, Error> {
|
||||
self.pattern_to_regex(&self.expr.to_string())
|
||||
}
|
||||
|
||||
/// If this HIR corresponds to an alternation of literals with no
|
||||
/// capturing groups, then this returns those literals.
|
||||
pub fn alternation_literals(&self) -> Option<Vec<Vec<u8>>> {
|
||||
if !self.config.can_plain_aho_corasick() {
|
||||
return None;
|
||||
}
|
||||
alternation_literals(&self.expr)
|
||||
}
|
||||
|
||||
/// Applies the given function to the concrete syntax of this HIR and then
|
||||
/// generates a new HIR based on the result of the function in a way that
|
||||
/// preserves the configuration.
|
||||
@@ -199,23 +228,8 @@ impl ConfiguredHIR {
|
||||
if self.config.line_terminator.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
match LiteralSets::new(&self.expr).one_regex() {
|
||||
match LiteralSets::new(&self.expr).one_regex(self.config.word) {
|
||||
None => Ok(None),
|
||||
/*
|
||||
if !self.config.crlf {
|
||||
return Ok(None);
|
||||
}
|
||||
// If we're trying to support CRLF, then our "fast" line
|
||||
// oriented regex needs `$` to be able to match at a `\r\n`
|
||||
// boundary. The regex engine doesn't support this, so we
|
||||
// "fake" it by replacing `$` with `(?:\r?$)`. Since the
|
||||
// fast line regex is only used to detect lines, this never
|
||||
// infects match offsets. Namely, the regex generated via
|
||||
// `self.expr` is matched against lines with line terminators
|
||||
// stripped.
|
||||
let pattern = crlfify(self.expr.clone()).to_string();
|
||||
self.pattern_to_regex(&pattern).map(Some)
|
||||
*/
|
||||
Some(pattern) => self.pattern_to_regex(&pattern).map(Some),
|
||||
}
|
||||
}
|
||||
|
@@ -1,5 +1,112 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use grep_matcher::{Match, Matcher, NoError};
|
||||
use regex::bytes::Regex;
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
|
||||
use config::ConfiguredHIR;
|
||||
use error::Error;
|
||||
use matcher::RegexCaptures;
|
||||
|
||||
/// A matcher for implementing "word match" semantics.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CRLFMatcher {
|
||||
/// The regex.
|
||||
regex: Regex,
|
||||
/// A map from capture group name to capture group index.
|
||||
names: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl CRLFMatcher {
|
||||
/// Create a new matcher from the given pattern that strips `\r` from the
|
||||
/// end of every match.
|
||||
///
|
||||
/// This panics if the given expression doesn't need its CRLF stripped.
|
||||
pub fn new(expr: &ConfiguredHIR) -> Result<CRLFMatcher, Error> {
|
||||
assert!(expr.needs_crlf_stripped());
|
||||
|
||||
let regex = expr.regex()?;
|
||||
let mut names = HashMap::new();
|
||||
for (i, optional_name) in regex.capture_names().enumerate() {
|
||||
if let Some(name) = optional_name {
|
||||
names.insert(name.to_string(), i.checked_sub(1).unwrap());
|
||||
}
|
||||
}
|
||||
Ok(CRLFMatcher { regex, names })
|
||||
}
|
||||
|
||||
/// Return the underlying regex used by this matcher.
|
||||
pub fn regex(&self) -> &Regex {
|
||||
&self.regex
|
||||
}
|
||||
}
|
||||
|
||||
impl Matcher for CRLFMatcher {
|
||||
type Captures = RegexCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>, NoError> {
|
||||
let m = match self.regex.find_at(haystack, at) {
|
||||
None => return Ok(None),
|
||||
Some(m) => Match::new(m.start(), m.end()),
|
||||
};
|
||||
Ok(Some(adjust_match(haystack, m)))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
|
||||
Ok(RegexCaptures::new(self.regex.capture_locations()))
|
||||
}
|
||||
|
||||
fn capture_count(&self) -> usize {
|
||||
self.regex.captures_len().checked_sub(1).unwrap()
|
||||
}
|
||||
|
||||
fn capture_index(&self, name: &str) -> Option<usize> {
|
||||
self.names.get(name).map(|i| *i)
|
||||
}
|
||||
|
||||
fn captures_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool, NoError> {
|
||||
caps.strip_crlf(false);
|
||||
let r = self.regex.captures_read_at(
|
||||
caps.locations_mut(), haystack, at,
|
||||
);
|
||||
if !r.is_some() {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// If the end of our match includes a `\r`, then strip it from all
|
||||
// capture groups ending at the same location.
|
||||
let end = caps.locations().get(0).unwrap().1;
|
||||
if end > 0 && haystack.get(end - 1) == Some(&b'\r') {
|
||||
caps.strip_crlf(true);
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
// We specifically do not implement other methods like find_iter or
|
||||
// captures_iter. Namely, the iter methods are guaranteed to be correct
|
||||
// by virtue of implementing find_at and captures_at above.
|
||||
}
|
||||
|
||||
/// If the given match ends with a `\r`, then return a new match that ends
|
||||
/// immediately before the `\r`.
|
||||
pub fn adjust_match(haystack: &[u8], m: Match) -> Match {
|
||||
if m.end() > 0 && haystack.get(m.end() - 1) == Some(&b'\r') {
|
||||
m.with_end(m.end() - 1)
|
||||
} else {
|
||||
m
|
||||
}
|
||||
}
|
||||
|
||||
/// Substitutes all occurrences of multi-line enabled `$` with `(?:\r?$)`.
|
||||
///
|
||||
/// This does not preserve the exact semantics of the given expression,
|
||||
|
@@ -4,13 +4,13 @@ An implementation of `grep-matcher`'s `Matcher` trait for Rust's regex engine.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate aho_corasick;
|
||||
extern crate grep_matcher;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate regex;
|
||||
extern crate regex_syntax;
|
||||
extern crate thread_local;
|
||||
extern crate utf8_ranges;
|
||||
|
||||
pub use error::{Error, ErrorKind};
|
||||
pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
||||
@@ -21,6 +21,7 @@ mod crlf;
|
||||
mod error;
|
||||
mod literal;
|
||||
mod matcher;
|
||||
mod multi;
|
||||
mod non_matching;
|
||||
mod strip;
|
||||
mod util;
|
||||
|
@@ -47,18 +47,23 @@ impl LiteralSets {
|
||||
/// generated these literal sets. The idea here is that the pattern
|
||||
/// returned by this method is much cheaper to search for. i.e., It is
|
||||
/// usually a single literal or an alternation of literals.
|
||||
pub fn one_regex(&self) -> Option<String> {
|
||||
pub fn one_regex(&self, word: bool) -> Option<String> {
|
||||
// TODO: The logic in this function is basically inscrutable. It grew
|
||||
// organically in the old grep 0.1 crate. Ideally, it would be
|
||||
// re-worked. In fact, the entire inner literal extraction should be
|
||||
// re-worked. Actually, most of regex-syntax's literal extraction
|
||||
// should also be re-worked. Alas... only so much time in the day.
|
||||
|
||||
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
||||
debug!("literal prefixes detected: {:?}", self.prefixes);
|
||||
// When this is true, the regex engine will do a literal scan,
|
||||
// so we don't need to return anything.
|
||||
return None;
|
||||
if !word {
|
||||
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
||||
debug!("literal prefixes detected: {:?}", self.prefixes);
|
||||
// When this is true, the regex engine will do a literal scan,
|
||||
// so we don't need to return anything. But we only do this
|
||||
// if we aren't doing a word regex, since a word regex adds
|
||||
// a `(?:\W|^)` to the beginning of the regex, thereby
|
||||
// defeating the regex engine's literal detection.
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
// Out of inner required literals, prefixes and suffixes, which one
|
||||
@@ -166,10 +171,10 @@ fn union_required(expr: &Hir, lits: &mut Literals) {
|
||||
lits.cut();
|
||||
continue;
|
||||
}
|
||||
if lits2.contains_empty() {
|
||||
if lits2.contains_empty() || !is_simple(&e) {
|
||||
lits.cut();
|
||||
}
|
||||
if !lits.cross_product(&lits2) {
|
||||
if !lits.cross_product(&lits2) || !lits2.any_complete() {
|
||||
// If this expression couldn't yield any literal that
|
||||
// could be extended, then we need to quit. Since we're
|
||||
// short-circuiting, we also need to freeze every member.
|
||||
@@ -250,6 +255,20 @@ fn alternate_literals<F: FnMut(&Hir, &mut Literals)>(
|
||||
}
|
||||
}
|
||||
|
||||
fn is_simple(expr: &Hir) -> bool {
|
||||
match *expr.kind() {
|
||||
HirKind::Empty
|
||||
| HirKind::Literal(_)
|
||||
| HirKind::Class(_)
|
||||
| HirKind::Repetition(_)
|
||||
| HirKind::Concat(_)
|
||||
| HirKind::Alternation(_) => true,
|
||||
HirKind::Anchor(_)
|
||||
| HirKind::WordBoundary(_)
|
||||
| HirKind::Group(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the number of characters in the given class.
|
||||
fn count_unicode_class(cls: &hir::ClassUnicode) -> u32 {
|
||||
cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum()
|
||||
@@ -271,7 +290,7 @@ mod tests {
|
||||
}
|
||||
|
||||
fn one_regex(pattern: &str) -> Option<String> {
|
||||
sets(pattern).one_regex()
|
||||
sets(pattern).one_regex(false)
|
||||
}
|
||||
|
||||
// Put a pattern into the same format as the one returned by `one_regex`.
|
||||
@@ -301,4 +320,12 @@ mod tests {
|
||||
// assert_eq!(one_regex(r"\w(foo|bar|baz)"), pat("foo|bar|baz"));
|
||||
// assert_eq!(one_regex(r"\w(foo|bar|baz)\w"), pat("foo|bar|baz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regression_1064() {
|
||||
// Regression from:
|
||||
// https://github.com/BurntSushi/ripgrep/issues/1064
|
||||
// assert_eq!(one_regex(r"a.*c"), pat("a"));
|
||||
assert_eq!(one_regex(r"a(.*c)"), pat("a"));
|
||||
}
|
||||
}
|
||||
|
@@ -6,7 +6,9 @@ use grep_matcher::{
|
||||
use regex::bytes::{CaptureLocations, Regex};
|
||||
|
||||
use config::{Config, ConfiguredHIR};
|
||||
use crlf::CRLFMatcher;
|
||||
use error::Error;
|
||||
use multi::MultiLiteralMatcher;
|
||||
use word::WordMatcher;
|
||||
|
||||
/// A builder for constructing a `Matcher` using regular expressions.
|
||||
@@ -49,14 +51,61 @@ impl RegexMatcherBuilder {
|
||||
if let Some(ref re) = fast_line_regex {
|
||||
trace!("extracted fast line regex: {:?}", re);
|
||||
}
|
||||
|
||||
let matcher = RegexMatcherImpl::new(&chir)?;
|
||||
trace!("final regex: {:?}", matcher.regex());
|
||||
Ok(RegexMatcher {
|
||||
config: self.config.clone(),
|
||||
matcher: RegexMatcherImpl::new(&chir)?,
|
||||
matcher: matcher,
|
||||
fast_line_regex: fast_line_regex,
|
||||
non_matching_bytes: non_matching_bytes,
|
||||
})
|
||||
}
|
||||
|
||||
/// Build a new matcher from a plain alternation of literals.
|
||||
///
|
||||
/// Depending on the configuration set by the builder, this may be able to
|
||||
/// build a matcher substantially faster than by joining the patterns with
|
||||
/// a `|` and calling `build`.
|
||||
pub fn build_literals<B: AsRef<str>>(
|
||||
&self,
|
||||
literals: &[B],
|
||||
) -> Result<RegexMatcher, Error> {
|
||||
let mut has_escape = false;
|
||||
let mut slices = vec![];
|
||||
for lit in literals {
|
||||
slices.push(lit.as_ref());
|
||||
has_escape = has_escape || lit.as_ref().contains('\\');
|
||||
}
|
||||
// Even when we have a fixed set of literals, we might still want to
|
||||
// use the regex engine. Specifically, if any string has an escape
|
||||
// in it, then we probably can't feed it to Aho-Corasick without
|
||||
// removing the escape. Additionally, if there are any particular
|
||||
// special match semantics we need to honor, that Aho-Corasick isn't
|
||||
// enough. Finally, the regex engine can do really well with a small
|
||||
// number of literals (at time of writing, this is changing soon), so
|
||||
// we use it when there's a small set.
|
||||
//
|
||||
// Yes, this is one giant hack. Ideally, this entirely separate literal
|
||||
// matcher that uses Aho-Corasick would be pushed down into the regex
|
||||
// engine.
|
||||
if has_escape
|
||||
|| !self.config.can_plain_aho_corasick()
|
||||
|| literals.len() < 40
|
||||
{
|
||||
return self.build(&slices.join("|"));
|
||||
}
|
||||
|
||||
let matcher = MultiLiteralMatcher::new(&slices)?;
|
||||
let imp = RegexMatcherImpl::MultiLiteral(matcher);
|
||||
Ok(RegexMatcher {
|
||||
config: self.config.clone(),
|
||||
matcher: imp,
|
||||
fast_line_regex: None,
|
||||
non_matching_bytes: ByteSet::empty(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Set the value for the case insensitive (`i`) flag.
|
||||
///
|
||||
/// When enabled, letters in the pattern will match both upper case and
|
||||
@@ -263,7 +312,7 @@ impl RegexMatcherBuilder {
|
||||
/// be slightly different than what one would expect given the pattern.
|
||||
/// This is the trade off made: in many cases, `$` will "just work" in the
|
||||
/// presence of `\r\n` line terminators, but matches may require some
|
||||
/// trimming to faithfully represent the indended match.
|
||||
/// trimming to faithfully represent the intended match.
|
||||
///
|
||||
/// Note that if you do not wish to set the line terminator but would still
|
||||
/// like `$` to match `\r\n` line terminators, then it is valid to call
|
||||
@@ -323,8 +372,15 @@ impl RegexMatcher {
|
||||
/// Create a new matcher from the given pattern using the default
|
||||
/// configuration, but matches lines terminated by `\n`.
|
||||
///
|
||||
/// This returns an error if the given pattern contains a literal `\n`.
|
||||
/// Other uses of `\n` (such as in `\s`) are removed transparently.
|
||||
/// This is meant to be a convenience constructor for using a
|
||||
/// `RegexMatcherBuilder` and setting its
|
||||
/// [`line_terminator`](struct.RegexMatcherBuilder.html#method.line_terminator)
|
||||
/// to `\n`. The purpose of using this constructor is to permit special
|
||||
/// optimizations that help speed up line oriented search. These types of
|
||||
/// optimizations are only appropriate when matches span no more than one
|
||||
/// line. For this reason, this constructor will return an error if the
|
||||
/// given pattern contains a literal `\n`. Other uses of `\n` (such as in
|
||||
/// `\s`) are removed transparently.
|
||||
pub fn new_line_matcher(pattern: &str) -> Result<RegexMatcher, Error> {
|
||||
RegexMatcherBuilder::new()
|
||||
.line_terminator(Some(b'\n'))
|
||||
@@ -337,6 +393,13 @@ impl RegexMatcher {
|
||||
enum RegexMatcherImpl {
|
||||
/// The standard matcher used for all regular expressions.
|
||||
Standard(StandardMatcher),
|
||||
/// A matcher for an alternation of plain literals.
|
||||
MultiLiteral(MultiLiteralMatcher),
|
||||
/// A matcher that strips `\r` from the end of matches.
|
||||
///
|
||||
/// This is only used when the CRLF hack is enabled and the regex is line
|
||||
/// anchored at the end.
|
||||
CRLF(CRLFMatcher),
|
||||
/// A matcher that only matches at word boundaries. This transforms the
|
||||
/// regex to `(^|\W)(...)($|\W)` instead of the more intuitive `\b(...)\b`.
|
||||
/// Because of this, the WordMatcher provides its own implementation of
|
||||
@@ -351,10 +414,28 @@ impl RegexMatcherImpl {
|
||||
fn new(expr: &ConfiguredHIR) -> Result<RegexMatcherImpl, Error> {
|
||||
if expr.config().word {
|
||||
Ok(RegexMatcherImpl::Word(WordMatcher::new(expr)?))
|
||||
} else if expr.needs_crlf_stripped() {
|
||||
Ok(RegexMatcherImpl::CRLF(CRLFMatcher::new(expr)?))
|
||||
} else {
|
||||
if let Some(lits) = expr.alternation_literals() {
|
||||
if lits.len() >= 40 {
|
||||
let matcher = MultiLiteralMatcher::new(&lits)?;
|
||||
return Ok(RegexMatcherImpl::MultiLiteral(matcher));
|
||||
}
|
||||
}
|
||||
Ok(RegexMatcherImpl::Standard(StandardMatcher::new(expr)?))
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the underlying regex object used.
|
||||
fn regex(&self) -> String {
|
||||
match *self {
|
||||
RegexMatcherImpl::Word(ref x) => x.regex().to_string(),
|
||||
RegexMatcherImpl::CRLF(ref x) => x.regex().to_string(),
|
||||
RegexMatcherImpl::MultiLiteral(_) => "<N/A>".to_string(),
|
||||
RegexMatcherImpl::Standard(ref x) => x.regex.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This implementation just dispatches on the internal matcher impl except
|
||||
@@ -372,6 +453,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.find_at(haystack, at),
|
||||
MultiLiteral(ref m) => m.find_at(haystack, at),
|
||||
CRLF(ref m) => m.find_at(haystack, at),
|
||||
Word(ref m) => m.find_at(haystack, at),
|
||||
}
|
||||
}
|
||||
@@ -380,6 +463,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.new_captures(),
|
||||
MultiLiteral(ref m) => m.new_captures(),
|
||||
CRLF(ref m) => m.new_captures(),
|
||||
Word(ref m) => m.new_captures(),
|
||||
}
|
||||
}
|
||||
@@ -388,6 +473,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.capture_count(),
|
||||
MultiLiteral(ref m) => m.capture_count(),
|
||||
CRLF(ref m) => m.capture_count(),
|
||||
Word(ref m) => m.capture_count(),
|
||||
}
|
||||
}
|
||||
@@ -396,6 +483,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.capture_index(name),
|
||||
MultiLiteral(ref m) => m.capture_index(name),
|
||||
CRLF(ref m) => m.capture_index(name),
|
||||
Word(ref m) => m.capture_index(name),
|
||||
}
|
||||
}
|
||||
@@ -404,6 +493,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.find(haystack),
|
||||
MultiLiteral(ref m) => m.find(haystack),
|
||||
CRLF(ref m) => m.find(haystack),
|
||||
Word(ref m) => m.find(haystack),
|
||||
}
|
||||
}
|
||||
@@ -418,6 +509,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.find_iter(haystack, matched),
|
||||
MultiLiteral(ref m) => m.find_iter(haystack, matched),
|
||||
CRLF(ref m) => m.find_iter(haystack, matched),
|
||||
Word(ref m) => m.find_iter(haystack, matched),
|
||||
}
|
||||
}
|
||||
@@ -432,6 +525,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.try_find_iter(haystack, matched),
|
||||
MultiLiteral(ref m) => m.try_find_iter(haystack, matched),
|
||||
CRLF(ref m) => m.try_find_iter(haystack, matched),
|
||||
Word(ref m) => m.try_find_iter(haystack, matched),
|
||||
}
|
||||
}
|
||||
@@ -444,6 +539,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.captures(haystack, caps),
|
||||
MultiLiteral(ref m) => m.captures(haystack, caps),
|
||||
CRLF(ref m) => m.captures(haystack, caps),
|
||||
Word(ref m) => m.captures(haystack, caps),
|
||||
}
|
||||
}
|
||||
@@ -459,6 +556,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.captures_iter(haystack, caps, matched),
|
||||
MultiLiteral(ref m) => m.captures_iter(haystack, caps, matched),
|
||||
CRLF(ref m) => m.captures_iter(haystack, caps, matched),
|
||||
Word(ref m) => m.captures_iter(haystack, caps, matched),
|
||||
}
|
||||
}
|
||||
@@ -474,6 +573,10 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.try_captures_iter(haystack, caps, matched),
|
||||
MultiLiteral(ref m) => {
|
||||
m.try_captures_iter(haystack, caps, matched)
|
||||
}
|
||||
CRLF(ref m) => m.try_captures_iter(haystack, caps, matched),
|
||||
Word(ref m) => m.try_captures_iter(haystack, caps, matched),
|
||||
}
|
||||
}
|
||||
@@ -487,6 +590,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.captures_at(haystack, at, caps),
|
||||
MultiLiteral(ref m) => m.captures_at(haystack, at, caps),
|
||||
CRLF(ref m) => m.captures_at(haystack, at, caps),
|
||||
Word(ref m) => m.captures_at(haystack, at, caps),
|
||||
}
|
||||
}
|
||||
@@ -502,6 +607,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.replace(haystack, dst, append),
|
||||
MultiLiteral(ref m) => m.replace(haystack, dst, append),
|
||||
CRLF(ref m) => m.replace(haystack, dst, append),
|
||||
Word(ref m) => m.replace(haystack, dst, append),
|
||||
}
|
||||
}
|
||||
@@ -520,6 +627,12 @@ impl Matcher for RegexMatcher {
|
||||
Standard(ref m) => {
|
||||
m.replace_with_captures(haystack, caps, dst, append)
|
||||
}
|
||||
MultiLiteral(ref m) => {
|
||||
m.replace_with_captures(haystack, caps, dst, append)
|
||||
}
|
||||
CRLF(ref m) => {
|
||||
m.replace_with_captures(haystack, caps, dst, append)
|
||||
}
|
||||
Word(ref m) => {
|
||||
m.replace_with_captures(haystack, caps, dst, append)
|
||||
}
|
||||
@@ -530,6 +643,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.is_match(haystack),
|
||||
MultiLiteral(ref m) => m.is_match(haystack),
|
||||
CRLF(ref m) => m.is_match(haystack),
|
||||
Word(ref m) => m.is_match(haystack),
|
||||
}
|
||||
}
|
||||
@@ -542,6 +657,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.is_match_at(haystack, at),
|
||||
MultiLiteral(ref m) => m.is_match_at(haystack, at),
|
||||
CRLF(ref m) => m.is_match_at(haystack, at),
|
||||
Word(ref m) => m.is_match_at(haystack, at),
|
||||
}
|
||||
}
|
||||
@@ -553,6 +670,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.shortest_match(haystack),
|
||||
MultiLiteral(ref m) => m.shortest_match(haystack),
|
||||
CRLF(ref m) => m.shortest_match(haystack),
|
||||
Word(ref m) => m.shortest_match(haystack),
|
||||
}
|
||||
}
|
||||
@@ -565,6 +684,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.shortest_match_at(haystack, at),
|
||||
MultiLiteral(ref m) => m.shortest_match_at(haystack, at),
|
||||
CRLF(ref m) => m.shortest_match_at(haystack, at),
|
||||
Word(ref m) => m.shortest_match_at(haystack, at),
|
||||
}
|
||||
}
|
||||
@@ -664,7 +785,9 @@ impl Matcher for StandardMatcher {
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool, NoError> {
|
||||
Ok(self.regex.captures_read_at(&mut caps.locs, haystack, at).is_some())
|
||||
Ok(self.regex.captures_read_at(
|
||||
&mut caps.locations_mut(), haystack, at,
|
||||
).is_some())
|
||||
}
|
||||
|
||||
fn shortest_match_at(
|
||||
@@ -691,34 +814,84 @@ impl Matcher for StandardMatcher {
|
||||
/// index of the group using the corresponding matcher's `capture_index`
|
||||
/// method, and then use that index with `RegexCaptures::get`.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexCaptures {
|
||||
/// Where the locations are stored.
|
||||
locs: CaptureLocations,
|
||||
/// These captures behave as if the capturing groups begin at the given
|
||||
/// offset. When set to `0`, this has no affect and capture groups are
|
||||
/// indexed like normal.
|
||||
///
|
||||
/// This is useful when building matchers that wrap arbitrary regular
|
||||
/// expressions. For example, `WordMatcher` takes an existing regex `re`
|
||||
/// and creates `(?:^|\W)(re)(?:$|\W)`, but hides the fact that the regex
|
||||
/// has been wrapped from the caller. In order to do this, the matcher
|
||||
/// and the capturing groups must behave as if `(re)` is the `0`th capture
|
||||
/// group.
|
||||
offset: usize,
|
||||
pub struct RegexCaptures(RegexCapturesImp);
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum RegexCapturesImp {
|
||||
AhoCorasick {
|
||||
/// The start and end of the match, corresponding to capture group 0.
|
||||
mat: Option<Match>,
|
||||
},
|
||||
Regex {
|
||||
/// Where the locations are stored.
|
||||
locs: CaptureLocations,
|
||||
/// These captures behave as if the capturing groups begin at the given
|
||||
/// offset. When set to `0`, this has no affect and capture groups are
|
||||
/// indexed like normal.
|
||||
///
|
||||
/// This is useful when building matchers that wrap arbitrary regular
|
||||
/// expressions. For example, `WordMatcher` takes an existing regex
|
||||
/// `re` and creates `(?:^|\W)(re)(?:$|\W)`, but hides the fact that
|
||||
/// the regex has been wrapped from the caller. In order to do this,
|
||||
/// the matcher and the capturing groups must behave as if `(re)` is
|
||||
/// the `0`th capture group.
|
||||
offset: usize,
|
||||
/// When enable, the end of a match has `\r` stripped from it, if one
|
||||
/// exists.
|
||||
strip_crlf: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl Captures for RegexCaptures {
|
||||
fn len(&self) -> usize {
|
||||
self.locs.len().checked_sub(self.offset).unwrap()
|
||||
match self.0 {
|
||||
RegexCapturesImp::AhoCorasick { .. } => 1,
|
||||
RegexCapturesImp::Regex { ref locs, offset, .. } => {
|
||||
locs.len().checked_sub(offset).unwrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get(&self, i: usize) -> Option<Match> {
|
||||
let actual = i.checked_add(self.offset).unwrap();
|
||||
self.locs.pos(actual).map(|(s, e)| Match::new(s, e))
|
||||
match self.0 {
|
||||
RegexCapturesImp::AhoCorasick { mat, .. } => {
|
||||
if i == 0 {
|
||||
mat
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
RegexCapturesImp::Regex { ref locs, offset, strip_crlf } => {
|
||||
if !strip_crlf {
|
||||
let actual = i.checked_add(offset).unwrap();
|
||||
return locs.pos(actual).map(|(s, e)| Match::new(s, e));
|
||||
}
|
||||
|
||||
// currently don't support capture offsetting with CRLF
|
||||
// stripping
|
||||
assert_eq!(offset, 0);
|
||||
let m = match locs.pos(i).map(|(s, e)| Match::new(s, e)) {
|
||||
None => return None,
|
||||
Some(m) => m,
|
||||
};
|
||||
// If the end position of this match corresponds to the end
|
||||
// position of the overall match, then we apply our CRLF
|
||||
// stripping. Otherwise, we cannot assume stripping is correct.
|
||||
if i == 0 || m.end() == locs.pos(0).unwrap().1 {
|
||||
Some(m.with_end(m.end() - 1))
|
||||
} else {
|
||||
Some(m)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RegexCaptures {
|
||||
pub(crate) fn simple() -> RegexCaptures {
|
||||
RegexCaptures(RegexCapturesImp::AhoCorasick { mat: None })
|
||||
}
|
||||
|
||||
pub(crate) fn new(locs: CaptureLocations) -> RegexCaptures {
|
||||
RegexCaptures::with_offset(locs, 0)
|
||||
}
|
||||
@@ -727,11 +900,53 @@ impl RegexCaptures {
|
||||
locs: CaptureLocations,
|
||||
offset: usize,
|
||||
) -> RegexCaptures {
|
||||
RegexCaptures { locs, offset }
|
||||
RegexCaptures(RegexCapturesImp::Regex {
|
||||
locs, offset, strip_crlf: false,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn locations(&mut self) -> &mut CaptureLocations {
|
||||
&mut self.locs
|
||||
pub(crate) fn locations(&self) -> &CaptureLocations {
|
||||
match self.0 {
|
||||
RegexCapturesImp::AhoCorasick { .. } => {
|
||||
panic!("getting locations for simple captures is invalid")
|
||||
}
|
||||
RegexCapturesImp::Regex { ref locs, .. } => {
|
||||
locs
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn locations_mut(&mut self) -> &mut CaptureLocations {
|
||||
match self.0 {
|
||||
RegexCapturesImp::AhoCorasick { .. } => {
|
||||
panic!("getting locations for simple captures is invalid")
|
||||
}
|
||||
RegexCapturesImp::Regex { ref mut locs, .. } => {
|
||||
locs
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn strip_crlf(&mut self, yes: bool) {
|
||||
match self.0 {
|
||||
RegexCapturesImp::AhoCorasick { .. } => {
|
||||
panic!("setting strip_crlf for simple captures is invalid")
|
||||
}
|
||||
RegexCapturesImp::Regex { ref mut strip_crlf, .. } => {
|
||||
*strip_crlf = yes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn set_simple(&mut self, one: Option<Match>) {
|
||||
match self.0 {
|
||||
RegexCapturesImp::AhoCorasick { ref mut mat } => {
|
||||
*mat = one;
|
||||
}
|
||||
RegexCapturesImp::Regex { .. } => {
|
||||
panic!("setting simple captures for regex is invalid")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
127
grep-regex/src/multi.rs
Normal file
127
grep-regex/src/multi.rs
Normal file
@@ -0,0 +1,127 @@
|
||||
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
|
||||
use grep_matcher::{Matcher, Match, NoError};
|
||||
use regex_syntax::hir::Hir;
|
||||
|
||||
use error::Error;
|
||||
use matcher::RegexCaptures;
|
||||
|
||||
/// A matcher for an alternation of literals.
|
||||
///
|
||||
/// Ideally, this optimization would be pushed down into the regex engine, but
|
||||
/// making this work correctly there would require quite a bit of refactoring.
|
||||
/// Moreover, doing it one layer above lets us do thing like, "if we
|
||||
/// specifically only want to search for literals, then don't bother with
|
||||
/// regex parsing at all."
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct MultiLiteralMatcher {
|
||||
/// The Aho-Corasick automaton.
|
||||
ac: AhoCorasick,
|
||||
}
|
||||
|
||||
impl MultiLiteralMatcher {
|
||||
/// Create a new multi-literal matcher from the given literals.
|
||||
pub fn new<B: AsRef<[u8]>>(
|
||||
literals: &[B],
|
||||
) -> Result<MultiLiteralMatcher, Error> {
|
||||
let ac = AhoCorasickBuilder::new()
|
||||
.match_kind(MatchKind::LeftmostFirst)
|
||||
.auto_configure(literals)
|
||||
.build_with_size::<usize, _, _>(literals)
|
||||
.map_err(Error::regex)?;
|
||||
Ok(MultiLiteralMatcher { ac })
|
||||
}
|
||||
}
|
||||
|
||||
impl Matcher for MultiLiteralMatcher {
|
||||
type Captures = RegexCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>, NoError> {
|
||||
match self.ac.find(&haystack[at..]) {
|
||||
None => Ok(None),
|
||||
Some(m) => Ok(Some(Match::new(at + m.start(), at + m.end()))),
|
||||
}
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
|
||||
Ok(RegexCaptures::simple())
|
||||
}
|
||||
|
||||
fn capture_count(&self) -> usize {
|
||||
1
|
||||
}
|
||||
|
||||
fn capture_index(&self, _: &str) -> Option<usize> {
|
||||
None
|
||||
}
|
||||
|
||||
fn captures_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool, NoError> {
|
||||
caps.set_simple(None);
|
||||
let mat = self.find_at(haystack, at)?;
|
||||
caps.set_simple(mat);
|
||||
Ok(mat.is_some())
|
||||
}
|
||||
|
||||
// We specifically do not implement other methods like find_iter. Namely,
|
||||
// the iter methods are guaranteed to be correct by virtue of implementing
|
||||
// find_at above.
|
||||
}
|
||||
|
||||
/// Alternation literals checks if the given HIR is a simple alternation of
|
||||
/// literals, and if so, returns them. Otherwise, this returns None.
|
||||
pub fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
|
||||
use regex_syntax::hir::{HirKind, Literal};
|
||||
|
||||
// This is pretty hacky, but basically, if `is_alternation_literal` is
|
||||
// true, then we can make several assumptions about the structure of our
|
||||
// HIR. This is what justifies the `unreachable!` statements below.
|
||||
|
||||
if !expr.is_alternation_literal() {
|
||||
return None;
|
||||
}
|
||||
let alts = match *expr.kind() {
|
||||
HirKind::Alternation(ref alts) => alts,
|
||||
_ => return None, // one literal isn't worth it
|
||||
};
|
||||
|
||||
let extendlit = |lit: &Literal, dst: &mut Vec<u8>| {
|
||||
match *lit {
|
||||
Literal::Unicode(c) => {
|
||||
let mut buf = [0; 4];
|
||||
dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
|
||||
}
|
||||
Literal::Byte(b) => {
|
||||
dst.push(b);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
let mut lits = vec![];
|
||||
for alt in alts {
|
||||
let mut lit = vec![];
|
||||
match *alt.kind() {
|
||||
HirKind::Empty => {}
|
||||
HirKind::Literal(ref x) => extendlit(x, &mut lit),
|
||||
HirKind::Concat(ref exprs) => {
|
||||
for e in exprs {
|
||||
match *e.kind() {
|
||||
HirKind::Literal(ref x) => extendlit(x, &mut lit),
|
||||
_ => unreachable!("expected literal, got {:?}", e),
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => unreachable!("expected literal or concat, got {:?}", alt),
|
||||
}
|
||||
lits.push(lit);
|
||||
}
|
||||
Some(lits)
|
||||
}
|
@@ -1,6 +1,6 @@
|
||||
use grep_matcher::ByteSet;
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
use utf8_ranges::Utf8Sequences;
|
||||
use regex_syntax::utf8::Utf8Sequences;
|
||||
|
||||
/// Return a confirmed set of non-matching bytes from the given expression.
|
||||
pub fn non_matching_bytes(expr: &Hir) -> ByteSet {
|
||||
|
@@ -55,6 +55,11 @@ impl WordMatcher {
|
||||
}
|
||||
Ok(WordMatcher { regex, names, locs })
|
||||
}
|
||||
|
||||
/// Return the underlying regex used by this matcher.
|
||||
pub fn regex(&self) -> &Regex {
|
||||
&self.regex
|
||||
}
|
||||
}
|
||||
|
||||
impl Matcher for WordMatcher {
|
||||
@@ -98,7 +103,9 @@ impl Matcher for WordMatcher {
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool, NoError> {
|
||||
let r = self.regex.captures_read_at(caps.locations(), haystack, at);
|
||||
let r = self.regex.captures_read_at(
|
||||
caps.locations_mut(), haystack, at,
|
||||
);
|
||||
Ok(r.is_some())
|
||||
}
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-searcher"
|
||||
version = "0.0.1" #:version
|
||||
version = "0.1.6" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Fast line oriented regex searching as a library.
|
||||
@@ -13,23 +13,21 @@ keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[dependencies]
|
||||
bytecount = "0.3.1"
|
||||
encoding_rs = "0.8"
|
||||
encoding_rs_io = "0.1"
|
||||
grep-matcher = { version = "0.0.1", path = "../grep-matcher" }
|
||||
log = "0.4"
|
||||
memchr = "2"
|
||||
memmap = "0.6"
|
||||
bstr = { version = "0.2.0", default-features = false, features = ["std"] }
|
||||
bytecount = "0.5"
|
||||
encoding_rs = "0.8.14"
|
||||
encoding_rs_io = "0.1.6"
|
||||
grep-matcher = { version = "0.1.2", path = "../grep-matcher" }
|
||||
log = "0.4.5"
|
||||
memmap = "0.7"
|
||||
|
||||
[dev-dependencies]
|
||||
grep-regex = { version = "0.0.1", path = "../grep-regex" }
|
||||
regex = "1"
|
||||
grep-regex = { version = "0.1.3", path = "../grep-regex" }
|
||||
regex = "1.1"
|
||||
|
||||
[features]
|
||||
avx-accel = [
|
||||
"bytecount/avx-accel",
|
||||
]
|
||||
simd-accel = [
|
||||
"bytecount/simd-accel",
|
||||
"encoding_rs/simd-accel",
|
||||
]
|
||||
default = ["bytecount/runtime-dispatch-simd"]
|
||||
simd-accel = ["encoding_rs/simd-accel"]
|
||||
|
||||
# This feature is DEPRECATED. Runtime dispatch is used for SIMD now.
|
||||
avx-accel = []
|
||||
|
@@ -1,4 +1,37 @@
|
||||
grep
|
||||
----
|
||||
This is a *library* that provides grep-style line-by-line regex searching (with
|
||||
comparable performance to `grep` itself).
|
||||
grep-searcher
|
||||
-------------
|
||||
A high level library for executing fast line oriented searches. This handles
|
||||
things like reporting contextual lines, counting lines, inverting a search,
|
||||
detecting binary data, automatic UTF-16 transcoding and deciding whether or not
|
||||
to use memory maps.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/grep-searcher)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/grep-searcher](https://docs.rs/grep-searcher)
|
||||
|
||||
**NOTE:** You probably don't want to use this crate directly. Instead, you
|
||||
should prefer the facade defined in the
|
||||
[`grep`](https://docs.rs/grep)
|
||||
crate.
|
||||
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
grep-searcher = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_searcher;
|
||||
```
|
||||
|
@@ -17,7 +17,7 @@ fn main() {
|
||||
}
|
||||
}
|
||||
|
||||
fn example() -> Result<(), Box<Error>> {
|
||||
fn example() -> Result<(), Box<dyn Error>> {
|
||||
let pattern = match env::args().nth(1) {
|
||||
Some(pattern) => pattern,
|
||||
None => return Err(From::from(format!(
|
||||
|
@@ -74,14 +74,11 @@ fn example() -> Result<(), Box<Error>> {
|
||||
let mut matches: Vec<(u64, String)> = vec![];
|
||||
Searcher::new().search_slice(&matcher, SHERLOCK, UTF8(|lnum, line| {
|
||||
// We are guaranteed to find a match, so the unwrap is OK.
|
||||
eprintln!("LINE: {:?}", line);
|
||||
let mymatch = matcher.find(line.as_bytes())?.unwrap();
|
||||
matches.push((lnum, line[mymatch].to_string()));
|
||||
Ok(true)
|
||||
}))?;
|
||||
|
||||
eprintln!("MATCHES: {:?}", matches);
|
||||
|
||||
assert_eq!(matches.len(), 2);
|
||||
assert_eq!(
|
||||
matches[0],
|
||||
@@ -102,13 +99,13 @@ searches stdin.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate bstr;
|
||||
extern crate bytecount;
|
||||
extern crate encoding_rs;
|
||||
extern crate encoding_rs_io;
|
||||
extern crate grep_matcher;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate memmap;
|
||||
#[cfg(test)]
|
||||
extern crate regex;
|
||||
|
@@ -1,8 +1,7 @@
|
||||
use std::cmp;
|
||||
use std::io;
|
||||
use std::ptr;
|
||||
|
||||
use memchr::{memchr, memrchr};
|
||||
use bstr::ByteSlice;
|
||||
|
||||
/// The default buffer capacity that we use for the line buffer.
|
||||
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
|
||||
@@ -258,6 +257,13 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
|
||||
self.line_buffer.buffer()
|
||||
}
|
||||
|
||||
/// Return the buffer as a BStr, used for convenient equality checking
|
||||
/// in tests only.
|
||||
#[cfg(test)]
|
||||
fn bstr(&self) -> &::bstr::BStr {
|
||||
self.buffer().as_bstr()
|
||||
}
|
||||
|
||||
/// Consume the number of bytes provided. This must be less than or equal
|
||||
/// to the number of bytes returned by `buffer`.
|
||||
pub fn consume(&mut self, amt: usize) {
|
||||
@@ -294,8 +300,8 @@ pub struct LineBuffer {
|
||||
/// has been exhausted.
|
||||
last_lineterm: usize,
|
||||
/// The end position of the buffer. This is always greater than or equal to
|
||||
/// lastnl. The bytes between lastnl and end, if any, always correspond to
|
||||
/// a partial line.
|
||||
/// last_lineterm. The bytes between last_lineterm and end, if any, always
|
||||
/// correspond to a partial line.
|
||||
end: usize,
|
||||
/// The absolute byte offset corresponding to `pos`. This is most typically
|
||||
/// not a valid index into addressable memory, but rather, an offset that
|
||||
@@ -312,6 +318,14 @@ pub struct LineBuffer {
|
||||
}
|
||||
|
||||
impl LineBuffer {
|
||||
/// Set the binary detection method used on this line buffer.
|
||||
///
|
||||
/// This permits dynamically changing the binary detection strategy on
|
||||
/// an existing line buffer without needing to create a new one.
|
||||
pub fn set_binary_detection(&mut self, binary: BinaryDetection) {
|
||||
self.config.binary = binary;
|
||||
}
|
||||
|
||||
/// Reset this buffer, such that it can be used with a new reader.
|
||||
fn clear(&mut self) {
|
||||
self.pos = 0;
|
||||
@@ -396,7 +410,7 @@ impl LineBuffer {
|
||||
assert_eq!(self.pos, 0);
|
||||
loop {
|
||||
self.ensure_capacity()?;
|
||||
let readlen = rdr.read(self.free_buffer())?;
|
||||
let readlen = rdr.read(self.free_buffer().as_bytes_mut())?;
|
||||
if readlen == 0 {
|
||||
// We're only done reading for good once the caller has
|
||||
// consumed everything.
|
||||
@@ -416,7 +430,7 @@ impl LineBuffer {
|
||||
match self.config.binary {
|
||||
BinaryDetection::None => {} // nothing to do
|
||||
BinaryDetection::Quit(byte) => {
|
||||
if let Some(i) = memchr(byte, newbytes) {
|
||||
if let Some(i) = newbytes.find_byte(byte) {
|
||||
self.end = oldend + i;
|
||||
self.last_lineterm = self.end;
|
||||
self.binary_byte_offset =
|
||||
@@ -444,7 +458,7 @@ impl LineBuffer {
|
||||
}
|
||||
|
||||
// Update our `last_lineterm` positions if we read one.
|
||||
if let Some(i) = memrchr(self.config.lineterm, newbytes) {
|
||||
if let Some(i) = newbytes.rfind_byte(self.config.lineterm) {
|
||||
self.last_lineterm = oldend + i + 1;
|
||||
return Ok(true);
|
||||
}
|
||||
@@ -467,25 +481,11 @@ impl LineBuffer {
|
||||
return;
|
||||
}
|
||||
|
||||
assert!(self.pos < self.end && self.end <= self.buf.len());
|
||||
let roll_len = self.end - self.pos;
|
||||
unsafe {
|
||||
// SAFETY: A buffer contains Copy data, so there's no problem
|
||||
// moving it around. Safety also depends on our indices being
|
||||
// in bounds, which they should always be, and we enforce with
|
||||
// an assert above.
|
||||
//
|
||||
// TODO: It seems like it should be possible to do this in safe
|
||||
// code that results in the same codegen.
|
||||
ptr::copy(
|
||||
self.buf[self.pos..].as_ptr(),
|
||||
self.buf.as_mut_ptr(),
|
||||
roll_len,
|
||||
);
|
||||
}
|
||||
self.buf.copy_within_str(self.pos..self.end, 0);
|
||||
self.pos = 0;
|
||||
self.last_lineterm = roll_len;
|
||||
self.end = self.last_lineterm;
|
||||
self.end = roll_len;
|
||||
}
|
||||
|
||||
/// Ensures that the internal buffer has a non-zero amount of free space
|
||||
@@ -518,14 +518,15 @@ impl LineBuffer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Replaces `src` with `replacement` in bytes.
|
||||
/// Replaces `src` with `replacement` in bytes, and return the offset of the
|
||||
/// first replacement, if one exists.
|
||||
fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
|
||||
if src == replacement {
|
||||
return None;
|
||||
}
|
||||
let mut first_pos = None;
|
||||
let mut pos = 0;
|
||||
while let Some(i) = memchr(src, &bytes[pos..]).map(|i| pos + i) {
|
||||
while let Some(i) = bytes[pos..].find_byte(src).map(|i| pos + i) {
|
||||
if first_pos.is_none() {
|
||||
first_pos = Some(i);
|
||||
}
|
||||
@@ -542,6 +543,7 @@ fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::str;
|
||||
use bstr::{ByteSlice, ByteVec};
|
||||
use super::*;
|
||||
|
||||
const SHERLOCK: &'static str = "\
|
||||
@@ -557,18 +559,14 @@ and exhibited clearly, with a label attached.\
|
||||
slice.to_string()
|
||||
}
|
||||
|
||||
fn btos(slice: &[u8]) -> &str {
|
||||
str::from_utf8(slice).unwrap()
|
||||
}
|
||||
|
||||
fn replace_str(
|
||||
slice: &str,
|
||||
src: u8,
|
||||
replacement: u8,
|
||||
) -> (String, Option<usize>) {
|
||||
let mut dst = slice.to_string().into_bytes();
|
||||
let mut dst = Vec::from(slice);
|
||||
let result = replace_bytes(&mut dst, src, replacement);
|
||||
(String::from_utf8(dst).unwrap(), result)
|
||||
(dst.into_string().unwrap(), result)
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -589,7 +587,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\n");
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\n");
|
||||
assert_eq!(rdr.absolute_byte_offset(), 0);
|
||||
rdr.consume(5);
|
||||
assert_eq!(rdr.absolute_byte_offset(), 5);
|
||||
@@ -597,7 +595,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert_eq!(rdr.absolute_byte_offset(), 11);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "maggie");
|
||||
assert_eq!(rdr.bstr(), "maggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -612,7 +610,7 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -627,7 +625,7 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "\n");
|
||||
assert_eq!(rdr.bstr(), "\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -642,7 +640,7 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "\n\n");
|
||||
assert_eq!(rdr.bstr(), "\n\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -682,10 +680,10 @@ and exhibited clearly, with a label attached.\
|
||||
|
||||
let mut got = vec![];
|
||||
while rdr.fill().unwrap() {
|
||||
got.extend(rdr.buffer());
|
||||
got.push_str(rdr.buffer());
|
||||
rdr.consume_all();
|
||||
}
|
||||
assert_eq!(bytes, btos(&got));
|
||||
assert_eq!(bytes, got.as_bstr());
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||
assert_eq!(rdr.binary_byte_offset(), None);
|
||||
}
|
||||
@@ -700,11 +698,11 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\n");
|
||||
assert_eq!(rdr.bstr(), "homer\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "lisa\n");
|
||||
assert_eq!(rdr.bstr(), "lisa\n");
|
||||
rdr.consume_all();
|
||||
|
||||
// This returns an error because while we have just enough room to
|
||||
@@ -714,11 +712,11 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.fill().is_err());
|
||||
|
||||
// We can mush on though!
|
||||
assert_eq!(btos(rdr.buffer()), "m");
|
||||
assert_eq!(rdr.bstr(), "m");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "aggie");
|
||||
assert_eq!(rdr.bstr(), "aggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -734,16 +732,16 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\n");
|
||||
assert_eq!(rdr.bstr(), "homer\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "lisa\n");
|
||||
assert_eq!(rdr.bstr(), "lisa\n");
|
||||
rdr.consume_all();
|
||||
|
||||
// We have just enough space.
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "maggie");
|
||||
assert_eq!(rdr.bstr(), "maggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -759,7 +757,7 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().is_err());
|
||||
assert_eq!(btos(rdr.buffer()), "");
|
||||
assert_eq!(rdr.bstr(), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -771,7 +769,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nli\x00sa\nmaggie\n");
|
||||
assert_eq!(rdr.bstr(), "homer\nli\x00sa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -790,7 +788,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nli");
|
||||
assert_eq!(rdr.bstr(), "homer\nli");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -807,7 +805,7 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "");
|
||||
assert_eq!(rdr.bstr(), "");
|
||||
assert_eq!(rdr.absolute_byte_offset(), 0);
|
||||
assert_eq!(rdr.binary_byte_offset(), Some(0));
|
||||
}
|
||||
@@ -823,7 +821,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -842,7 +840,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie");
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -860,7 +858,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "\
|
||||
assert_eq!(rdr.bstr(), "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, s\
|
||||
");
|
||||
@@ -883,7 +881,7 @@ Holmeses, s\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nli\nsa\nmaggie\n");
|
||||
assert_eq!(rdr.bstr(), "homer\nli\nsa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -902,7 +900,7 @@ Holmeses, s\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "\nhomer\nlisa\nmaggie\n");
|
||||
assert_eq!(rdr.bstr(), "\nhomer\nlisa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -921,7 +919,7 @@ Holmeses, s\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -940,7 +938,7 @@ Holmeses, s\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
|
@@ -2,8 +2,8 @@
|
||||
A collection of routines for performing operations on lines.
|
||||
*/
|
||||
|
||||
use bstr::ByteSlice;
|
||||
use bytecount;
|
||||
use memchr::{memchr, memrchr};
|
||||
use grep_matcher::{LineTerminator, Match};
|
||||
|
||||
/// An iterator over lines in a particular slice of bytes.
|
||||
@@ -72,9 +72,20 @@ impl LineStep {
|
||||
///
|
||||
/// The range returned includes the line terminator. Ranges are always
|
||||
/// non-empty.
|
||||
pub fn next(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> {
|
||||
pub fn next(&mut self, bytes: &[u8]) -> Option<(usize, usize)> {
|
||||
self.next_impl(bytes)
|
||||
}
|
||||
|
||||
/// Like next, but returns a `Match` instead of a tuple.
|
||||
#[inline(always)]
|
||||
pub(crate) fn next_match(&mut self, bytes: &[u8]) -> Option<Match> {
|
||||
self.next_impl(bytes).map(|(s, e)| Match::new(s, e))
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> {
|
||||
bytes = &bytes[..self.end];
|
||||
match memchr(self.line_term, &bytes[self.pos..]) {
|
||||
match bytes[self.pos..].find_byte(self.line_term) {
|
||||
None => {
|
||||
if self.pos < bytes.len() {
|
||||
let m = (self.pos, bytes.len());
|
||||
@@ -95,11 +106,6 @@ impl LineStep {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Like next, but returns a `Match` instead of a tuple.
|
||||
pub(crate) fn next_match(&mut self, bytes: &[u8]) -> Option<Match> {
|
||||
self.next(bytes).map(|(s, e)| Match::new(s, e))
|
||||
}
|
||||
}
|
||||
|
||||
/// Count the number of occurrences of `line_term` in `bytes`.
|
||||
@@ -109,9 +115,11 @@ pub fn count(bytes: &[u8], line_term: u8) -> u64 {
|
||||
|
||||
/// Given a line that possibly ends with a terminator, return that line without
|
||||
/// the terminator.
|
||||
#[inline(always)]
|
||||
pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
|
||||
let line_term = line_term.as_bytes();
|
||||
if bytes.get(bytes.len().saturating_sub(line_term.len())..) == Some(line_term) {
|
||||
let start = bytes.len().saturating_sub(line_term.len());
|
||||
if bytes.get(start..) == Some(line_term) {
|
||||
return &bytes[..bytes.len() - line_term.len()];
|
||||
}
|
||||
bytes
|
||||
@@ -121,19 +129,22 @@ pub fn without_terminator(bytes: &[u8], line_term: LineTerminator) -> &[u8] {
|
||||
/// of bytes.
|
||||
///
|
||||
/// Line terminators are considered part of the line they terminate.
|
||||
#[inline(always)]
|
||||
pub fn locate(
|
||||
bytes: &[u8],
|
||||
line_term: u8,
|
||||
range: Match,
|
||||
) -> Match {
|
||||
let line_start = memrchr(line_term, &bytes[0..range.start()])
|
||||
let line_start = bytes[..range.start()]
|
||||
.rfind_byte(line_term)
|
||||
.map_or(0, |i| i + 1);
|
||||
let line_end =
|
||||
if range.end() > line_start && bytes[range.end() - 1] == line_term {
|
||||
range.end()
|
||||
} else {
|
||||
memchr(line_term, &bytes[range.end()..])
|
||||
.map_or(bytes.len(), |i| range.end() + i + 1)
|
||||
bytes[range.end()..]
|
||||
.find_byte(line_term)
|
||||
.map_or(bytes.len(), |i| range.end() + i + 1)
|
||||
};
|
||||
Match::new(line_start, line_end)
|
||||
}
|
||||
@@ -167,11 +178,11 @@ fn preceding_by_pos(
|
||||
) -> usize {
|
||||
if pos == 0 {
|
||||
return 0;
|
||||
} else if bytes[pos - 1] == b'\n' {
|
||||
} else if bytes[pos - 1] == line_term {
|
||||
pos -= 1;
|
||||
}
|
||||
loop {
|
||||
match memrchr(line_term, &bytes[..pos]) {
|
||||
match bytes[..pos].rfind_byte(line_term) {
|
||||
None => {
|
||||
return 0;
|
||||
}
|
||||
|
@@ -1,3 +1,4 @@
|
||||
/// Like assert_eq, but nicer output for long strings.
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! assert_eq_printed {
|
||||
|
@@ -1,6 +1,6 @@
|
||||
use std::cmp;
|
||||
|
||||
use memchr::memchr;
|
||||
use bstr::ByteSlice;
|
||||
|
||||
use grep_matcher::{LineMatchKind, Matcher};
|
||||
use lines::{self, LineStep};
|
||||
@@ -90,6 +90,13 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
self.sink_matched(buf, range)
|
||||
}
|
||||
|
||||
pub fn binary_data(
|
||||
&mut self,
|
||||
binary_byte_offset: u64,
|
||||
) -> Result<bool, S::Error> {
|
||||
self.sink.binary_data(&self.searcher, binary_byte_offset)
|
||||
}
|
||||
|
||||
pub fn begin(&mut self) -> Result<bool, S::Error> {
|
||||
self.sink.begin(&self.searcher)
|
||||
}
|
||||
@@ -141,19 +148,28 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
consumed
|
||||
}
|
||||
|
||||
pub fn detect_binary(&mut self, buf: &[u8], range: &Range) -> bool {
|
||||
pub fn detect_binary(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
range: &Range,
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.binary_byte_offset.is_some() {
|
||||
return true;
|
||||
return Ok(self.config.binary.quit_byte().is_some());
|
||||
}
|
||||
let binary_byte = match self.config.binary.0 {
|
||||
BinaryDetection::Quit(b) => b,
|
||||
_ => return false,
|
||||
BinaryDetection::Convert(b) => b,
|
||||
_ => return Ok(false),
|
||||
};
|
||||
if let Some(i) = memchr(binary_byte, &buf[*range]) {
|
||||
self.binary_byte_offset = Some(range.start() + i);
|
||||
true
|
||||
if let Some(i) = buf[*range].find_byte(binary_byte) {
|
||||
let offset = range.start() + i;
|
||||
self.binary_byte_offset = Some(offset);
|
||||
if !self.binary_data(offset as u64)? {
|
||||
return Ok(true);
|
||||
}
|
||||
Ok(self.config.binary.quit_byte().is_some())
|
||||
} else {
|
||||
false
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -290,11 +306,13 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
return Ok(false);
|
||||
}
|
||||
} else if let Some(line) = self.find_by_line_fast(buf)? {
|
||||
if !self.after_context_by_line(buf, line.start())? {
|
||||
return Ok(false);
|
||||
}
|
||||
if !self.before_context_by_line(buf, line.start())? {
|
||||
return Ok(false);
|
||||
if self.config.max_context() > 0 {
|
||||
if !self.after_context_by_line(buf, line.start())? {
|
||||
return Ok(false);
|
||||
}
|
||||
if !self.before_context_by_line(buf, line.start())? {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
self.set_pos(line.end());
|
||||
if !self.sink_matched(buf, &line)? {
|
||||
@@ -311,6 +329,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn match_by_line_fast_invert(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
@@ -351,6 +370,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn find_by_line_fast(
|
||||
&self,
|
||||
buf: &[u8],
|
||||
@@ -406,12 +426,13 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn sink_matched(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
range: &Range,
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.binary && self.detect_binary(buf, range) {
|
||||
if self.binary && self.detect_binary(buf, range)? {
|
||||
return Ok(false);
|
||||
}
|
||||
if !self.sink_break_context(range.start())? {
|
||||
@@ -419,11 +440,12 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
}
|
||||
self.count_lines(buf, range.start());
|
||||
let offset = self.absolute_byte_offset + range.start() as u64;
|
||||
let linebuf = &buf[*range];
|
||||
let keepgoing = self.sink.matched(
|
||||
&self.searcher,
|
||||
&SinkMatch {
|
||||
line_term: self.config.line_term,
|
||||
bytes: &buf[*range],
|
||||
bytes: linebuf,
|
||||
absolute_byte_offset: offset,
|
||||
line_number: self.line_number,
|
||||
},
|
||||
@@ -442,7 +464,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
buf: &[u8],
|
||||
range: &Range,
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.binary && self.detect_binary(buf, range) {
|
||||
if self.binary && self.detect_binary(buf, range)? {
|
||||
return Ok(false);
|
||||
}
|
||||
self.count_lines(buf, range.start());
|
||||
@@ -472,7 +494,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
) -> Result<bool, S::Error> {
|
||||
assert!(self.after_context_left >= 1);
|
||||
|
||||
if self.binary && self.detect_binary(buf, range) {
|
||||
if self.binary && self.detect_binary(buf, range)? {
|
||||
return Ok(false);
|
||||
}
|
||||
self.count_lines(buf, range.start());
|
||||
@@ -501,7 +523,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
buf: &[u8],
|
||||
range: &Range,
|
||||
) -> Result<bool, S::Error> {
|
||||
if self.binary && self.detect_binary(buf, range) {
|
||||
if self.binary && self.detect_binary(buf, range)? {
|
||||
return Ok(false);
|
||||
}
|
||||
self.count_lines(buf, range.start());
|
||||
|
@@ -51,6 +51,7 @@ where M: Matcher,
|
||||
fn fill(&mut self) -> Result<bool, S::Error> {
|
||||
assert!(self.rdr.buffer()[self.core.pos()..].is_empty());
|
||||
|
||||
let already_binary = self.rdr.binary_byte_offset().is_some();
|
||||
let old_buf_len = self.rdr.buffer().len();
|
||||
let consumed = self.core.roll(self.rdr.buffer());
|
||||
self.rdr.consume(consumed);
|
||||
@@ -58,7 +59,14 @@ where M: Matcher,
|
||||
Err(err) => return Err(S::Error::error_io(err)),
|
||||
Ok(didread) => didread,
|
||||
};
|
||||
if !didread || self.rdr.binary_byte_offset().is_some() {
|
||||
if !already_binary {
|
||||
if let Some(offset) = self.rdr.binary_byte_offset() {
|
||||
if !self.core.binary_data(offset)? {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
if !didread || self.should_binary_quit() {
|
||||
return Ok(false);
|
||||
}
|
||||
// If rolling the buffer didn't result in consuming anything and if
|
||||
@@ -71,6 +79,11 @@ where M: Matcher,
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn should_binary_quit(&self) -> bool {
|
||||
self.rdr.binary_byte_offset().is_some()
|
||||
&& self.config.binary.quit_byte().is_some()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
@@ -103,7 +116,7 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
|
||||
DEFAULT_BUFFER_CAPACITY,
|
||||
);
|
||||
let binary_range = Range::new(0, binary_upto);
|
||||
if !self.core.detect_binary(self.slice, &binary_range) {
|
||||
if !self.core.detect_binary(self.slice, &binary_range)? {
|
||||
while
|
||||
!self.slice[self.core.pos()..].is_empty()
|
||||
&& self.core.match_by_line(self.slice)?
|
||||
@@ -155,7 +168,7 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
|
||||
DEFAULT_BUFFER_CAPACITY,
|
||||
);
|
||||
let binary_range = Range::new(0, binary_upto);
|
||||
if !self.core.detect_binary(self.slice, &binary_range) {
|
||||
if !self.core.detect_binary(self.slice, &binary_range)? {
|
||||
let mut keepgoing = true;
|
||||
while !self.slice[self.core.pos()..].is_empty() && keepgoing {
|
||||
keepgoing = self.sink()?;
|
||||
|
@@ -76,9 +76,9 @@ impl MmapChoice {
|
||||
return None;
|
||||
}
|
||||
// SAFETY: This is acceptable because the only way `MmapChoiceImpl` can
|
||||
// be `Auto` is if the caller invoked the `auto` constructor. Thus,
|
||||
// this is a propagation of the caller's assertion that using memory
|
||||
// maps is safe.
|
||||
// be `Auto` is if the caller invoked the `auto` constructor, which
|
||||
// is itself not safe. Thus, this is a propagation of the caller's
|
||||
// assertion that using memory maps is safe.
|
||||
match unsafe { Mmap::map(file) } {
|
||||
Ok(mmap) => Some(mmap),
|
||||
Err(err) => {
|
||||
|
@@ -75,25 +75,41 @@ impl BinaryDetection {
|
||||
BinaryDetection(line_buffer::BinaryDetection::Quit(binary_byte))
|
||||
}
|
||||
|
||||
// TODO(burntsushi): Figure out how to make binary conversion work. This
|
||||
// permits implementing GNU grep's default behavior, which is to zap NUL
|
||||
// bytes but still execute a search (if a match is detected, then GNU grep
|
||||
// stops and reports that a match was found but doesn't print the matching
|
||||
// line itself).
|
||||
//
|
||||
// This behavior is pretty simple to implement using the line buffer (and
|
||||
// in fact, it is already implemented and tested), since there's a fixed
|
||||
// size buffer that we can easily write to. The issue arises when searching
|
||||
// a `&[u8]` (whether on the heap or via a memory map), since this isn't
|
||||
// something we can easily write to.
|
||||
|
||||
/// The given byte is searched in all contents read by the line buffer. If
|
||||
/// it occurs, then it is replaced by the line terminator. The line buffer
|
||||
/// guarantees that this byte will never be observable by callers.
|
||||
#[allow(dead_code)]
|
||||
fn convert(binary_byte: u8) -> BinaryDetection {
|
||||
/// Binary detection is performed by looking for the given byte, and
|
||||
/// replacing it with the line terminator configured on the searcher.
|
||||
/// (If the searcher is configured to use `CRLF` as the line terminator,
|
||||
/// then this byte is replaced by just `LF`.)
|
||||
///
|
||||
/// When searching is performed using a fixed size buffer, then the
|
||||
/// contents of that buffer are always searched for the presence of this
|
||||
/// byte and replaced with the line terminator. In effect, the caller is
|
||||
/// guaranteed to never observe this byte while searching.
|
||||
///
|
||||
/// When searching is performed with the entire contents mapped into
|
||||
/// memory, then this setting has no effect and is ignored.
|
||||
pub fn convert(binary_byte: u8) -> BinaryDetection {
|
||||
BinaryDetection(line_buffer::BinaryDetection::Convert(binary_byte))
|
||||
}
|
||||
|
||||
/// If this binary detection uses the "quit" strategy, then this returns
|
||||
/// the byte that will cause a search to quit. In any other case, this
|
||||
/// returns `None`.
|
||||
pub fn quit_byte(&self) -> Option<u8> {
|
||||
match self.0 {
|
||||
line_buffer::BinaryDetection::Quit(b) => Some(b),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// If this binary detection uses the "convert" strategy, then this returns
|
||||
/// the byte that will be replaced by the line terminator. In any other
|
||||
/// case, this returns `None`.
|
||||
pub fn convert_byte(&self) -> Option<u8> {
|
||||
match self.0 {
|
||||
line_buffer::BinaryDetection::Convert(b) => Some(b),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An encoding to use when searching.
|
||||
@@ -155,6 +171,8 @@ pub struct Config {
|
||||
/// An encoding that, when present, causes the searcher to transcode all
|
||||
/// input from the encoding to UTF-8.
|
||||
encoding: Option<Encoding>,
|
||||
/// Whether to do automatic transcoding based on a BOM or not.
|
||||
bom_sniffing: bool,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
@@ -171,6 +189,7 @@ impl Default for Config {
|
||||
binary: BinaryDetection::default(),
|
||||
multi_line: false,
|
||||
encoding: None,
|
||||
bom_sniffing: true,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -296,17 +315,22 @@ impl SearcherBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
/// Builder a searcher with the given matcher.
|
||||
/// Build a searcher with the given matcher.
|
||||
pub fn build(&self) -> Searcher {
|
||||
let mut config = self.config.clone();
|
||||
if config.passthru {
|
||||
config.before_context = 0;
|
||||
config.after_context = 0;
|
||||
}
|
||||
|
||||
let mut decode_builder = DecodeReaderBytesBuilder::new();
|
||||
decode_builder
|
||||
.encoding(self.config.encoding.as_ref().map(|e| e.0))
|
||||
.utf8_passthru(true);
|
||||
.utf8_passthru(true)
|
||||
.strip_bom(self.config.bom_sniffing)
|
||||
.bom_override(true)
|
||||
.bom_sniffing(self.config.bom_sniffing);
|
||||
|
||||
Searcher {
|
||||
config: config,
|
||||
decode_builder: decode_builder,
|
||||
@@ -318,7 +342,7 @@ impl SearcherBuilder {
|
||||
|
||||
/// Set the line terminator that is used by the searcher.
|
||||
///
|
||||
/// When building a searcher, if the matcher provided has a line terminator
|
||||
/// When using a searcher, if the matcher provided has a line terminator
|
||||
/// set, then it must be the same as this one. If they aren't, building
|
||||
/// a searcher will return an error.
|
||||
///
|
||||
@@ -453,12 +477,25 @@ impl SearcherBuilder {
|
||||
/// enabled, then the entire contents will be read on to the heap before
|
||||
/// searching begins.
|
||||
///
|
||||
/// The default behavior is **never**. Generally speaking, command line
|
||||
/// programs probably want to enable memory maps. The only reason to keep
|
||||
/// memory maps disabled is if there are concerns using them. For example,
|
||||
/// if your process is searching a file backed memory map at the same time
|
||||
/// that file is truncated, then it's possible for the process to terminate
|
||||
/// with a bus error.
|
||||
/// The default behavior is **never**. Generally speaking, and perhaps
|
||||
/// against conventional wisdom, memory maps don't necessarily enable
|
||||
/// faster searching. For example, depending on the platform, using memory
|
||||
/// maps while searching a large directory can actually be quite a bit
|
||||
/// slower than using normal read calls because of the overhead of managing
|
||||
/// the memory maps.
|
||||
///
|
||||
/// Memory maps can be faster in some cases however. On some platforms,
|
||||
/// when searching a very large file that *is already in memory*, it can
|
||||
/// be slightly faster to search it as a memory map instead of using
|
||||
/// normal read calls.
|
||||
///
|
||||
/// Finally, memory maps have a somewhat complicated safety story in Rust.
|
||||
/// If you aren't sure whether enabling memory maps is worth it, then just
|
||||
/// don't bother with it.
|
||||
///
|
||||
/// **WARNING**: If your process is searching a file backed memory map
|
||||
/// at the same time that file is truncated, then it's possible for the
|
||||
/// process to terminate with a bus error.
|
||||
pub fn memory_map(
|
||||
&mut self,
|
||||
strategy: MmapChoice,
|
||||
@@ -486,16 +523,18 @@ impl SearcherBuilder {
|
||||
/// Set the encoding used to read the source data before searching.
|
||||
///
|
||||
/// When an encoding is provided, then the source data is _unconditionally_
|
||||
/// transcoded using the encoding. This will disable BOM sniffing. If the
|
||||
/// transcoded using the encoding, unless a BOM is present. If a BOM is
|
||||
/// present, then the encoding indicated by the BOM is used instead. If the
|
||||
/// transcoding process encounters an error, then bytes are replaced with
|
||||
/// the Unicode replacement codepoint.
|
||||
///
|
||||
/// When no encoding is specified (the default), then BOM sniffing is used
|
||||
/// to determine whether the source data is UTF-8 or UTF-16, and
|
||||
/// transcoding will be performed automatically. If no BOM could be found,
|
||||
/// then the source data is searched _as if_ it were UTF-8. However, so
|
||||
/// long as the source data is at least ASCII compatible, then it is
|
||||
/// possible for a search to produce useful results.
|
||||
/// When no encoding is specified (the default), then BOM sniffing is
|
||||
/// used (if it's enabled, which it is, by default) to determine whether
|
||||
/// the source data is UTF-8 or UTF-16, and transcoding will be performed
|
||||
/// automatically. If no BOM could be found, then the source data is
|
||||
/// searched _as if_ it were UTF-8. However, so long as the source data is
|
||||
/// at least ASCII compatible, then it is possible for a search to produce
|
||||
/// useful results.
|
||||
pub fn encoding(
|
||||
&mut self,
|
||||
encoding: Option<Encoding>,
|
||||
@@ -503,6 +542,23 @@ impl SearcherBuilder {
|
||||
self.config.encoding = encoding;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable automatic transcoding based on BOM sniffing.
|
||||
///
|
||||
/// When this is enabled and an explicit encoding is not set, then this
|
||||
/// searcher will try to detect the encoding of the bytes being searched
|
||||
/// by sniffing its byte-order mark (BOM). In particular, when this is
|
||||
/// enabled, UTF-16 encoded files will be searched seamlessly.
|
||||
///
|
||||
/// When this is disabled and if an explicit encoding is not set, then
|
||||
/// the bytes from the source stream will be passed through unchanged,
|
||||
/// including its BOM, if one is present.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn bom_sniffing(&mut self, yes: bool) -> &mut SearcherBuilder {
|
||||
self.config.bom_sniffing = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A searcher executes searches over a haystack and writes results to a caller
|
||||
@@ -699,6 +755,12 @@ impl Searcher {
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the binary detection method used on this searcher.
|
||||
pub fn set_binary_detection(&mut self, detection: BinaryDetection) {
|
||||
self.config.binary = detection.clone();
|
||||
self.line_buffer.borrow_mut().set_binary_detection(detection.0);
|
||||
}
|
||||
|
||||
/// Check that the searcher's configuration and the matcher are consistent
|
||||
/// with each other.
|
||||
fn check_config<M: Matcher>(&self, matcher: M) -> Result<(), ConfigError> {
|
||||
@@ -722,7 +784,8 @@ impl Searcher {
|
||||
|
||||
/// Returns true if and only if the given slice needs to be transcoded.
|
||||
fn slice_needs_transcoding(&self, slice: &[u8]) -> bool {
|
||||
self.config.encoding.is_some() || slice_has_utf16_bom(slice)
|
||||
self.config.encoding.is_some()
|
||||
|| (self.config.bom_sniffing && slice_has_utf16_bom(slice))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -732,25 +795,35 @@ impl Searcher {
|
||||
/// where the output may be tailored based on how the searcher is configured.
|
||||
impl Searcher {
|
||||
/// Returns the line terminator used by this searcher.
|
||||
#[inline]
|
||||
pub fn line_terminator(&self) -> LineTerminator {
|
||||
self.config.line_term
|
||||
}
|
||||
|
||||
/// Returns the type of binary detection configured on this searcher.
|
||||
#[inline]
|
||||
pub fn binary_detection(&self) -> &BinaryDetection {
|
||||
&self.config.binary
|
||||
}
|
||||
|
||||
/// Returns true if and only if this searcher is configured to invert its
|
||||
/// search results. That is, matching lines are lines that do **not** match
|
||||
/// the searcher's matcher.
|
||||
#[inline]
|
||||
pub fn invert_match(&self) -> bool {
|
||||
self.config.invert_match
|
||||
}
|
||||
|
||||
/// Returns true if and only if this searcher is configured to count line
|
||||
/// numbers.
|
||||
#[inline]
|
||||
pub fn line_number(&self) -> bool {
|
||||
self.config.line_number
|
||||
}
|
||||
|
||||
/// Returns true if and only if this searcher is configured to perform
|
||||
/// multi line search.
|
||||
#[inline]
|
||||
pub fn multi_line(&self) -> bool {
|
||||
self.config.multi_line
|
||||
}
|
||||
@@ -785,17 +858,20 @@ impl Searcher {
|
||||
|
||||
/// Returns the number of "after" context lines to report. When context
|
||||
/// reporting is not enabled, this returns `0`.
|
||||
#[inline]
|
||||
pub fn after_context(&self) -> usize {
|
||||
self.config.after_context
|
||||
}
|
||||
|
||||
/// Returns the number of "before" context lines to report. When context
|
||||
/// reporting is not enabled, this returns `0`.
|
||||
#[inline]
|
||||
pub fn before_context(&self) -> usize {
|
||||
self.config.before_context
|
||||
}
|
||||
|
||||
/// Returns true if and only if the searcher has "passthru" mode enabled.
|
||||
#[inline]
|
||||
pub fn passthru(&self) -> bool {
|
||||
self.config.passthru
|
||||
}
|
||||
|
@@ -1,3 +1,4 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
|
||||
@@ -49,9 +50,9 @@ impl SinkError for io::Error {
|
||||
|
||||
/// A `Box<std::error::Error>` can be used as an error for `Sink`
|
||||
/// implementations out of the box.
|
||||
impl SinkError for Box<::std::error::Error> {
|
||||
fn error_message<T: fmt::Display>(message: T) -> Box<::std::error::Error> {
|
||||
Box::<::std::error::Error>::from(message.to_string())
|
||||
impl SinkError for Box<dyn error::Error> {
|
||||
fn error_message<T: fmt::Display>(message: T) -> Box<dyn error::Error> {
|
||||
Box::<dyn error::Error>::from(message.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -69,7 +70,7 @@ impl SinkError for Box<::std::error::Error> {
|
||||
/// an implementation of this trait to a searcher, and the searcher is then
|
||||
/// responsible for calling the methods on this trait.
|
||||
///
|
||||
/// This trait defines five behaviors:
|
||||
/// This trait defines several behaviors:
|
||||
///
|
||||
/// * What to do when a match is found. Callers must provide this.
|
||||
/// * What to do when an error occurs. Callers must provide this via the
|
||||
@@ -167,6 +168,28 @@ pub trait Sink {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// This method is called whenever binary detection is enabled and binary
|
||||
/// data is found. If binary data is found, then this is called at least
|
||||
/// once for the first occurrence with the absolute byte offset at which
|
||||
/// the binary data begins.
|
||||
///
|
||||
/// If this returns `true`, then searching continues. If this returns
|
||||
/// `false`, then searching is stopped immediately and `finish` is called.
|
||||
///
|
||||
/// If this returns an error, then searching is stopped immediately,
|
||||
/// `finish` is not called and the error is bubbled back up to the caller
|
||||
/// of the searcher.
|
||||
///
|
||||
/// By default, it does nothing and returns `true`.
|
||||
#[inline]
|
||||
fn binary_data(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
_binary_byte_offset: u64,
|
||||
) -> Result<bool, Self::Error> {
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
/// This method is called when a search has begun, before any search is
|
||||
/// executed. By default, this does nothing.
|
||||
///
|
||||
@@ -228,6 +251,71 @@ impl<'a, S: Sink> Sink for &'a mut S {
|
||||
(**self).context_break(searcher)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn binary_data(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
binary_byte_offset: u64,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).binary_data(searcher, binary_byte_offset)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn begin(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).begin(searcher)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn finish(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
sink_finish: &SinkFinish,
|
||||
) -> Result<(), S::Error> {
|
||||
(**self).finish(searcher, sink_finish)
|
||||
}
|
||||
}
|
||||
|
||||
impl<S: Sink + ?Sized> Sink for Box<S> {
|
||||
type Error = S::Error;
|
||||
|
||||
#[inline]
|
||||
fn matched(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).matched(searcher, mat)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn context(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
context: &SinkContext,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).context(searcher, context)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn context_break(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).context_break(searcher)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn binary_data(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
binary_byte_offset: u64,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).binary_data(searcher, binary_byte_offset)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn begin(
|
||||
&mut self,
|
||||
|
@@ -1,10 +1,10 @@
|
||||
use std::io::{self, Write};
|
||||
use std::str;
|
||||
|
||||
use bstr::ByteSlice;
|
||||
use grep_matcher::{
|
||||
LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError,
|
||||
};
|
||||
use memchr::memchr;
|
||||
use regex::bytes::{Regex, RegexBuilder};
|
||||
|
||||
use searcher::{BinaryDetection, Searcher, SearcherBuilder};
|
||||
@@ -94,7 +94,8 @@ impl Matcher for RegexMatcher {
|
||||
}
|
||||
// Make it interesting and return the last byte in the current
|
||||
// line.
|
||||
let i = memchr(self.line_term.unwrap().as_byte(), haystack)
|
||||
let i = haystack
|
||||
.find_byte(self.line_term.unwrap().as_byte())
|
||||
.map(|i| i)
|
||||
.unwrap_or(haystack.len() - 1);
|
||||
Ok(Some(LineMatchKind::Candidate(i)))
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep"
|
||||
version = "0.2.0" #:version
|
||||
version = "0.2.4" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Fast line oriented regex searching as a library.
|
||||
@@ -13,11 +13,20 @@ keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[dependencies]
|
||||
grep-matcher = { version = "0.0.1", path = "../grep-matcher" }
|
||||
grep-printer = { version = "0.0.1", path = "../grep-printer" }
|
||||
grep-regex = { version = "0.0.1", path = "../grep-regex" }
|
||||
grep-searcher = { version = "0.0.1", path = "../grep-searcher" }
|
||||
grep-cli = { version = "0.1.2", path = "../grep-cli" }
|
||||
grep-matcher = { version = "0.1.2", path = "../grep-matcher" }
|
||||
grep-pcre2 = { version = "0.1.3", path = "../grep-pcre2", optional = true }
|
||||
grep-printer = { version = "0.1.2", path = "../grep-printer" }
|
||||
grep-regex = { version = "0.1.3", path = "../grep-regex" }
|
||||
grep-searcher = { version = "0.1.4", path = "../grep-searcher" }
|
||||
|
||||
[dev-dependencies]
|
||||
termcolor = "1.0.4"
|
||||
walkdir = "2.2.7"
|
||||
|
||||
[features]
|
||||
avx-accel = ["grep-searcher/avx-accel"]
|
||||
simd-accel = ["grep-searcher/simd-accel"]
|
||||
pcre2 = ["grep-pcre2"]
|
||||
|
||||
# This feature is DEPRECATED. Runtime dispatch is used for SIMD now.
|
||||
avx-accel = []
|
||||
|
@@ -1,4 +1,41 @@
|
||||
grep
|
||||
----
|
||||
This is a *library* that provides grep-style line-by-line regex searching (with
|
||||
comparable performance to `grep` itself).
|
||||
ripgrep, as a library.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/grep)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/grep](https://docs.rs/grep)
|
||||
|
||||
NOTE: This crate isn't ready for wide use yet. Ambitious individuals can
|
||||
probably piece together the parts, but there is no high level documentation
|
||||
describing how all of the pieces fit together.
|
||||
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
grep = "0.2"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep;
|
||||
```
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
This crate provides a `pcre2` feature (disabled by default) which, when
|
||||
enabled, re-exports the `grep-pcre2` crate as an alternative `Matcher`
|
||||
implementation to the standard `grep-regex` implementation.
|
||||
|
74
grep/examples/simplegrep.rs
Normal file
74
grep/examples/simplegrep.rs
Normal file
@@ -0,0 +1,74 @@
|
||||
extern crate grep;
|
||||
extern crate termcolor;
|
||||
extern crate walkdir;
|
||||
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::ffi::OsString;
|
||||
use std::process;
|
||||
|
||||
use grep::cli;
|
||||
use grep::printer::{ColorSpecs, StandardBuilder};
|
||||
use grep::regex::RegexMatcher;
|
||||
use grep::searcher::{BinaryDetection, SearcherBuilder};
|
||||
use termcolor::ColorChoice;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
fn main() {
|
||||
if let Err(err) = try_main() {
|
||||
eprintln!("{}", err);
|
||||
process::exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
fn try_main() -> Result<(), Box<dyn Error>> {
|
||||
let mut args: Vec<OsString> = env::args_os().collect();
|
||||
if args.len() < 2 {
|
||||
return Err("Usage: simplegrep <pattern> [<path> ...]".into());
|
||||
}
|
||||
if args.len() == 2 {
|
||||
args.push(OsString::from("./"));
|
||||
}
|
||||
search(cli::pattern_from_os(&args[1])?, &args[2..])
|
||||
}
|
||||
|
||||
fn search(pattern: &str, paths: &[OsString]) -> Result<(), Box<dyn Error>> {
|
||||
let matcher = RegexMatcher::new_line_matcher(&pattern)?;
|
||||
let mut searcher = SearcherBuilder::new()
|
||||
.binary_detection(BinaryDetection::quit(b'\x00'))
|
||||
.line_number(false)
|
||||
.build();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.color_specs(ColorSpecs::default_with_color())
|
||||
.build(cli::stdout(
|
||||
if cli::is_tty_stdout() {
|
||||
ColorChoice::Auto
|
||||
} else {
|
||||
ColorChoice::Never
|
||||
}
|
||||
));
|
||||
|
||||
for path in paths {
|
||||
for result in WalkDir::new(path) {
|
||||
let dent = match result {
|
||||
Ok(dent) => dent,
|
||||
Err(err) => {
|
||||
eprintln!("{}", err);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if !dent.file_type().is_file() {
|
||||
continue;
|
||||
}
|
||||
let result = searcher.search_path(
|
||||
&matcher,
|
||||
dent.path(),
|
||||
printer.sink_with_path(&matcher, dent.path()),
|
||||
);
|
||||
if let Err(err) = result {
|
||||
eprintln!("{}: {}", dent.path().display(), err);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
@@ -1,10 +1,23 @@
|
||||
/*!
|
||||
TODO.
|
||||
ripgrep, as a library.
|
||||
|
||||
This library is intended to provide a high level facade to the crates that
|
||||
make up ripgrep's core searching routines. However, there is no high level
|
||||
documentation available yet guiding users on how to fit all of the pieces
|
||||
together.
|
||||
|
||||
Every public API item in the constituent crates is documented, but examples
|
||||
are sparse.
|
||||
|
||||
A cookbook and a guide are planned.
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
pub extern crate grep_cli as cli;
|
||||
pub extern crate grep_matcher as matcher;
|
||||
#[cfg(feature = "pcre2")]
|
||||
pub extern crate grep_pcre2 as pcre2;
|
||||
pub extern crate grep_printer as printer;
|
||||
pub extern crate grep_regex as regex;
|
||||
pub extern crate grep_searcher as searcher;
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "ignore"
|
||||
version = "0.4.3" #:version
|
||||
version = "0.4.10" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
A fast library for efficiently matching ignore files such as `.gitignore`
|
||||
@@ -18,22 +18,18 @@ name = "ignore"
|
||||
bench = false
|
||||
|
||||
[dependencies]
|
||||
crossbeam = "0.3"
|
||||
globset = { version = "0.4.0", path = "../globset" }
|
||||
lazy_static = "1"
|
||||
log = "0.4"
|
||||
memchr = "2"
|
||||
regex = "1"
|
||||
same-file = "1"
|
||||
thread_local = "0.3.2"
|
||||
walkdir = "2"
|
||||
crossbeam-channel = "0.3.6"
|
||||
globset = { version = "0.4.3", path = "../globset" }
|
||||
lazy_static = "1.1"
|
||||
log = "0.4.5"
|
||||
memchr = "2.1"
|
||||
regex = "1.1"
|
||||
same-file = "1.0.4"
|
||||
thread_local = "0.3.6"
|
||||
walkdir = "2.2.7"
|
||||
|
||||
[target.'cfg(windows)'.dependencies.winapi]
|
||||
version = "0.3"
|
||||
features = ["std", "winnt"]
|
||||
|
||||
[dev-dependencies]
|
||||
tempdir = "0.3.5"
|
||||
[target.'cfg(windows)'.dependencies.winapi-util]
|
||||
version = "0.1.2"
|
||||
|
||||
[features]
|
||||
simd-accel = ["globset/simd-accel"]
|
||||
|
@@ -4,7 +4,7 @@ The ignore crate provides a fast recursive directory iterator that respects
|
||||
various filters such as globs, file types and `.gitignore` files. This crate
|
||||
also provides lower level direct access to gitignore and file type matchers.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/ignore)
|
||||
|
||||
|
@@ -1,17 +1,12 @@
|
||||
#![allow(dead_code, unused_imports, unused_mut, unused_variables)]
|
||||
|
||||
extern crate crossbeam;
|
||||
extern crate crossbeam_channel as channel;
|
||||
extern crate ignore;
|
||||
extern crate walkdir;
|
||||
|
||||
use std::env;
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::thread;
|
||||
|
||||
use crossbeam::sync::MsQueue;
|
||||
use ignore::WalkBuilder;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
@@ -19,7 +14,7 @@ fn main() {
|
||||
let mut path = env::args().nth(1).unwrap();
|
||||
let mut parallel = false;
|
||||
let mut simple = false;
|
||||
let queue: Arc<MsQueue<Option<DirEntry>>> = Arc::new(MsQueue::new());
|
||||
let (tx, rx) = channel::bounded::<DirEntry>(100);
|
||||
if path == "parallel" {
|
||||
path = env::args().nth(2).unwrap();
|
||||
parallel = true;
|
||||
@@ -28,10 +23,9 @@ fn main() {
|
||||
simple = true;
|
||||
}
|
||||
|
||||
let stdout_queue = queue.clone();
|
||||
let stdout_thread = thread::spawn(move || {
|
||||
let mut stdout = io::BufWriter::new(io::stdout());
|
||||
while let Some(dent) = stdout_queue.pop() {
|
||||
for dent in rx {
|
||||
write_path(&mut stdout, dent.path());
|
||||
}
|
||||
});
|
||||
@@ -39,28 +33,26 @@ fn main() {
|
||||
if parallel {
|
||||
let walker = WalkBuilder::new(path).threads(6).build_parallel();
|
||||
walker.run(|| {
|
||||
let queue = queue.clone();
|
||||
let tx = tx.clone();
|
||||
Box::new(move |result| {
|
||||
use ignore::WalkState::*;
|
||||
|
||||
queue.push(Some(DirEntry::Y(result.unwrap())));
|
||||
tx.send(DirEntry::Y(result.unwrap())).unwrap();
|
||||
Continue
|
||||
})
|
||||
});
|
||||
} else if simple {
|
||||
let mut stdout = io::BufWriter::new(io::stdout());
|
||||
let walker = WalkDir::new(path);
|
||||
for result in walker {
|
||||
queue.push(Some(DirEntry::X(result.unwrap())));
|
||||
tx.send(DirEntry::X(result.unwrap())).unwrap();
|
||||
}
|
||||
} else {
|
||||
let mut stdout = io::BufWriter::new(io::stdout());
|
||||
let walker = WalkBuilder::new(path).build();
|
||||
for result in walker {
|
||||
queue.push(Some(DirEntry::Y(result.unwrap())));
|
||||
tx.send(DirEntry::Y(result.unwrap())).unwrap();
|
||||
}
|
||||
}
|
||||
queue.push(None);
|
||||
drop(tx);
|
||||
stdout_thread.join().unwrap();
|
||||
}
|
||||
|
||||
|
@@ -14,14 +14,15 @@
|
||||
// well.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::{OsString, OsStr};
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use pathutil::{is_hidden, strip_prefix};
|
||||
use overrides::{self, Override};
|
||||
use pathutil::{is_hidden, strip_prefix};
|
||||
use types::{self, Types};
|
||||
use walk::DirEntry;
|
||||
use {Error, Match, PartialErrorBuilder};
|
||||
|
||||
/// IgnoreMatch represents information about where a match came from when using
|
||||
@@ -73,6 +74,8 @@ struct IgnoreOptions {
|
||||
git_ignore: bool,
|
||||
/// Whether to read .git/info/exclude files.
|
||||
git_exclude: bool,
|
||||
/// Whether to ignore files case insensitively
|
||||
ignore_case_insensitive: bool,
|
||||
}
|
||||
|
||||
/// Ignore is a matcher useful for recursively walking one or more directories.
|
||||
@@ -149,10 +152,7 @@ impl Ignore {
|
||||
///
|
||||
/// Note that this can only be called on an `Ignore` matcher with no
|
||||
/// parents (i.e., `is_root` returns `true`). This will panic otherwise.
|
||||
pub fn add_parents<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
) -> (Ignore, Option<Error>) {
|
||||
pub fn add_parents<P: AsRef<Path>>(&self, path: P) -> (Ignore, Option<Error>) {
|
||||
if !self.0.opts.parents
|
||||
&& !self.0.opts.git_ignore
|
||||
&& !self.0.opts.git_exclude
|
||||
@@ -194,7 +194,11 @@ impl Ignore {
|
||||
errs.maybe_push(err);
|
||||
igtmp.is_absolute_parent = true;
|
||||
igtmp.absolute_base = Some(absolute_base.clone());
|
||||
igtmp.has_git = parent.join(".git").exists();
|
||||
igtmp.has_git = if self.0.opts.git_ignore {
|
||||
parent.join(".git").exists()
|
||||
} else {
|
||||
false
|
||||
};
|
||||
ig = Ignore(Arc::new(igtmp));
|
||||
compiled.insert(parent.as_os_str().to_os_string(), ig.clone());
|
||||
}
|
||||
@@ -209,10 +213,7 @@ impl Ignore {
|
||||
/// returned if it exists.
|
||||
///
|
||||
/// Note that all I/O errors are completely ignored.
|
||||
pub fn add_child<P: AsRef<Path>>(
|
||||
&self,
|
||||
dir: P,
|
||||
) -> (Ignore, Option<Error>) {
|
||||
pub fn add_child<P: AsRef<Path>>(&self, dir: P) -> (Ignore, Option<Error>) {
|
||||
let (ig, err) = self.add_child_path(dir.as_ref());
|
||||
(Ignore(Arc::new(ig)), err)
|
||||
}
|
||||
@@ -220,39 +221,49 @@ impl Ignore {
|
||||
/// Like add_child, but takes a full path and returns an IgnoreInner.
|
||||
fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option<Error>) {
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
let custom_ig_matcher =
|
||||
if self.0.custom_ignore_filenames.is_empty() {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) =
|
||||
create_gitignore(&dir, &self.0.custom_ignore_filenames);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let ig_matcher =
|
||||
if !self.0.opts.ignore {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(&dir, &[".ignore"]);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let gi_matcher =
|
||||
if !self.0.opts.git_ignore {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(&dir, &[".gitignore"]);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let gi_exclude_matcher =
|
||||
if !self.0.opts.git_exclude {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(&dir, &[".git/info/exclude"]);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let custom_ig_matcher = if self.0.custom_ignore_filenames.is_empty() {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(
|
||||
&dir,
|
||||
&self.0.custom_ignore_filenames,
|
||||
self.0.opts.ignore_case_insensitive,
|
||||
);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let ig_matcher = if !self.0.opts.ignore {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) =
|
||||
create_gitignore(&dir, &[".ignore"], self.0.opts.ignore_case_insensitive);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let gi_matcher = if !self.0.opts.git_ignore {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) =
|
||||
create_gitignore(&dir, &[".gitignore"], self.0.opts.ignore_case_insensitive);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let gi_exclude_matcher = if !self.0.opts.git_exclude {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(
|
||||
&dir,
|
||||
&[".git/info/exclude"],
|
||||
self.0.opts.ignore_case_insensitive,
|
||||
);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let has_git = if self.0.opts.git_ignore {
|
||||
dir.join(".git").exists()
|
||||
} else {
|
||||
false
|
||||
};
|
||||
let ig = IgnoreInner {
|
||||
compiled: self.0.compiled.clone(),
|
||||
dir: dir.to_path_buf(),
|
||||
@@ -268,7 +279,7 @@ impl Ignore {
|
||||
git_global_matcher: self.0.git_global_matcher.clone(),
|
||||
git_ignore_matcher: gi_matcher,
|
||||
git_exclude_matcher: gi_exclude_matcher,
|
||||
has_git: dir.join(".git").exists(),
|
||||
has_git: has_git,
|
||||
opts: self.0.opts,
|
||||
};
|
||||
(ig, errs.into_error_option())
|
||||
@@ -280,20 +291,28 @@ impl Ignore {
|
||||
let has_custom_ignore_files = !self.0.custom_ignore_filenames.is_empty();
|
||||
let has_explicit_ignores = !self.0.explicit_ignores.is_empty();
|
||||
|
||||
opts.ignore || opts.git_global || opts.git_ignore
|
||||
|| opts.git_exclude || has_custom_ignore_files
|
||||
|| has_explicit_ignores
|
||||
opts.ignore
|
||||
|| opts.git_global
|
||||
|| opts.git_ignore
|
||||
|| opts.git_exclude
|
||||
|| has_custom_ignore_files
|
||||
|| has_explicit_ignores
|
||||
}
|
||||
|
||||
/// Like `matched`, but works with a directory entry instead.
|
||||
pub fn matched_dir_entry<'a>(&'a self, dent: &DirEntry) -> Match<IgnoreMatch<'a>> {
|
||||
let m = self.matched(dent.path(), dent.is_dir());
|
||||
if m.is_none() && self.0.opts.hidden && is_hidden(dent) {
|
||||
return Match::Ignore(IgnoreMatch::hidden());
|
||||
}
|
||||
m
|
||||
}
|
||||
|
||||
/// Returns a match indicating whether the given file path should be
|
||||
/// ignored or not.
|
||||
///
|
||||
/// The match contains information about its origin.
|
||||
pub fn matched<'a, P: AsRef<Path>>(
|
||||
&'a self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<IgnoreMatch<'a>> {
|
||||
fn matched<'a, P: AsRef<Path>>(&'a self, path: P, is_dir: bool) -> Match<IgnoreMatch<'a>> {
|
||||
// We need to be careful with our path. If it has a leading ./, then
|
||||
// strip it because it causes nothing but trouble.
|
||||
let mut path = path.as_ref();
|
||||
@@ -305,9 +324,11 @@ impl Ignore {
|
||||
// return that result immediately. Overrides have the highest
|
||||
// precedence.
|
||||
if !self.0.overrides.is_empty() {
|
||||
let mat =
|
||||
self.0.overrides.matched(path, is_dir)
|
||||
.map(IgnoreMatch::overrides);
|
||||
let mat = self
|
||||
.0
|
||||
.overrides
|
||||
.matched(path, is_dir)
|
||||
.map(IgnoreMatch::overrides);
|
||||
if !mat.is_none() {
|
||||
return mat;
|
||||
}
|
||||
@@ -322,77 +343,82 @@ impl Ignore {
|
||||
}
|
||||
}
|
||||
if !self.0.types.is_empty() {
|
||||
let mat =
|
||||
self.0.types.matched(path, is_dir).map(IgnoreMatch::types);
|
||||
let mat = self.0.types.matched(path, is_dir).map(IgnoreMatch::types);
|
||||
if mat.is_ignore() {
|
||||
return mat;
|
||||
} else if mat.is_whitelist() {
|
||||
whitelisted = mat;
|
||||
}
|
||||
}
|
||||
if whitelisted.is_none() && self.0.opts.hidden && is_hidden(path) {
|
||||
return Match::Ignore(IgnoreMatch::hidden());
|
||||
}
|
||||
whitelisted
|
||||
}
|
||||
|
||||
/// Performs matching only on the ignore files for this directory and
|
||||
/// all parent directories.
|
||||
fn matched_ignore<'a>(
|
||||
&'a self,
|
||||
path: &Path,
|
||||
is_dir: bool,
|
||||
) -> Match<IgnoreMatch<'a>> {
|
||||
let (mut m_custom_ignore, mut m_ignore, mut m_gi, mut m_gi_exclude, mut m_explicit) =
|
||||
(Match::None, Match::None, Match::None, Match::None, Match::None);
|
||||
fn matched_ignore<'a>(&'a self, path: &Path, is_dir: bool) -> Match<IgnoreMatch<'a>> {
|
||||
let (mut m_custom_ignore, mut m_ignore, mut m_gi, mut m_gi_exclude, mut m_explicit) = (
|
||||
Match::None,
|
||||
Match::None,
|
||||
Match::None,
|
||||
Match::None,
|
||||
Match::None,
|
||||
);
|
||||
let any_git = self.parents().any(|ig| ig.0.has_git);
|
||||
let mut saw_git = false;
|
||||
for ig in self.parents().take_while(|ig| !ig.0.is_absolute_parent) {
|
||||
if m_custom_ignore.is_none() {
|
||||
m_custom_ignore =
|
||||
ig.0.custom_ignore_matcher.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
ig.0.custom_ignore_matcher
|
||||
.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if m_ignore.is_none() {
|
||||
m_ignore =
|
||||
ig.0.ignore_matcher.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
ig.0.ignore_matcher
|
||||
.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if any_git && !saw_git && m_gi.is_none() {
|
||||
m_gi =
|
||||
ig.0.git_ignore_matcher.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
ig.0.git_ignore_matcher
|
||||
.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if any_git && !saw_git && m_gi_exclude.is_none() {
|
||||
m_gi_exclude =
|
||||
ig.0.git_exclude_matcher.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
ig.0.git_exclude_matcher
|
||||
.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
saw_git = saw_git || ig.0.has_git;
|
||||
}
|
||||
if self.0.opts.parents {
|
||||
if let Some(abs_parent_path) = self.absolute_base() {
|
||||
let path = abs_parent_path.join(path);
|
||||
for ig in self.parents().skip_while(|ig|!ig.0.is_absolute_parent) {
|
||||
for ig in self.parents().skip_while(|ig| !ig.0.is_absolute_parent) {
|
||||
if m_custom_ignore.is_none() {
|
||||
m_custom_ignore =
|
||||
ig.0.custom_ignore_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
ig.0.custom_ignore_matcher
|
||||
.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if m_ignore.is_none() {
|
||||
m_ignore =
|
||||
ig.0.ignore_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
ig.0.ignore_matcher
|
||||
.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if any_git && !saw_git && m_gi.is_none() {
|
||||
m_gi =
|
||||
ig.0.git_ignore_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
ig.0.git_ignore_matcher
|
||||
.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if any_git && !saw_git && m_gi_exclude.is_none() {
|
||||
m_gi_exclude =
|
||||
ig.0.git_exclude_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
ig.0.git_exclude_matcher
|
||||
.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
saw_git = saw_git || ig.0.has_git;
|
||||
}
|
||||
@@ -404,16 +430,21 @@ impl Ignore {
|
||||
}
|
||||
m_explicit = gi.matched(&path, is_dir).map(IgnoreMatch::gitignore);
|
||||
}
|
||||
let m_global =
|
||||
if any_git {
|
||||
self.0.git_global_matcher
|
||||
.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore)
|
||||
} else {
|
||||
Match::None
|
||||
};
|
||||
let m_global = if any_git {
|
||||
self.0
|
||||
.git_global_matcher
|
||||
.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore)
|
||||
} else {
|
||||
Match::None
|
||||
};
|
||||
|
||||
m_custom_ignore.or(m_ignore).or(m_gi).or(m_gi_exclude).or(m_global).or(m_explicit)
|
||||
m_custom_ignore
|
||||
.or(m_ignore)
|
||||
.or(m_gi)
|
||||
.or(m_gi_exclude)
|
||||
.or(m_global)
|
||||
.or(m_explicit)
|
||||
}
|
||||
|
||||
/// Returns an iterator over parent ignore matchers, including this one.
|
||||
@@ -483,6 +514,7 @@ impl IgnoreBuilder {
|
||||
git_global: true,
|
||||
git_ignore: true,
|
||||
git_exclude: true,
|
||||
ignore_case_insensitive: false,
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -492,16 +524,19 @@ impl IgnoreBuilder {
|
||||
/// The matcher returned won't match anything until ignore rules from
|
||||
/// directories are added to it.
|
||||
pub fn build(&self) -> Ignore {
|
||||
let git_global_matcher =
|
||||
if !self.opts.git_global {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (gi, err) = Gitignore::global();
|
||||
if let Some(err) = err {
|
||||
debug!("{}", err);
|
||||
}
|
||||
gi
|
||||
};
|
||||
let git_global_matcher = if !self.opts.git_global {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let mut builder = GitignoreBuilder::new("");
|
||||
builder
|
||||
.case_insensitive(self.opts.ignore_case_insensitive)
|
||||
.unwrap();
|
||||
let (gi, err) = builder.build_global();
|
||||
if let Some(err) = err {
|
||||
debug!("{}", err);
|
||||
}
|
||||
gi
|
||||
};
|
||||
|
||||
Ignore(Arc::new(IgnoreInner {
|
||||
compiled: Arc::new(RwLock::new(HashMap::new())),
|
||||
@@ -557,9 +592,10 @@ impl IgnoreBuilder {
|
||||
/// later names.
|
||||
pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
|
||||
&mut self,
|
||||
file_name: S
|
||||
file_name: S,
|
||||
) -> &mut IgnoreBuilder {
|
||||
self.custom_ignore_filenames.push(file_name.as_ref().to_os_string());
|
||||
self.custom_ignore_filenames
|
||||
.push(file_name.as_ref().to_os_string());
|
||||
self
|
||||
}
|
||||
|
||||
@@ -627,6 +663,14 @@ impl IgnoreBuilder {
|
||||
self.opts.git_exclude = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Process ignore files case insensitively
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.ignore_case_insensitive = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new gitignore matcher for the directory given.
|
||||
@@ -638,9 +682,11 @@ impl IgnoreBuilder {
|
||||
pub fn create_gitignore<T: AsRef<OsStr>>(
|
||||
dir: &Path,
|
||||
names: &[T],
|
||||
case_insensitive: bool,
|
||||
) -> (Gitignore, Option<Error>) {
|
||||
let mut builder = GitignoreBuilder::new(dir);
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
builder.case_insensitive(case_insensitive).unwrap();
|
||||
for name in names {
|
||||
let gipath = dir.join(name.as_ref());
|
||||
errs.maybe_push_ignore_io(builder.add(gipath));
|
||||
@@ -661,10 +707,9 @@ mod tests {
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use tempdir::TempDir;
|
||||
|
||||
use dir::IgnoreBuilder;
|
||||
use gitignore::Gitignore;
|
||||
use tests::TempDir;
|
||||
use Error;
|
||||
|
||||
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
|
||||
@@ -683,15 +728,21 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn tmpdir() -> TempDir {
|
||||
TempDir::new().unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir();
|
||||
wfile(td.path().join("not-an-ignore"), "foo\n!bar");
|
||||
|
||||
let (gi, err) = Gitignore::new(td.path().join("not-an-ignore"));
|
||||
assert!(err.is_none());
|
||||
let (ig, err) = IgnoreBuilder::new()
|
||||
.add_ignore(gi).build().add_child(td.path());
|
||||
.add_ignore(gi)
|
||||
.build()
|
||||
.add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_ignore());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
@@ -700,7 +751,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn git_exclude() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir();
|
||||
mkdirp(td.path().join(".git/info"));
|
||||
wfile(td.path().join(".git/info/exclude"), "foo\n!bar");
|
||||
|
||||
@@ -713,7 +764,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn gitignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir();
|
||||
mkdirp(td.path().join(".git"));
|
||||
wfile(td.path().join(".gitignore"), "foo\n!bar");
|
||||
|
||||
@@ -726,7 +777,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn gitignore_no_git() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir();
|
||||
wfile(td.path().join(".gitignore"), "foo\n!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
@@ -738,7 +789,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir();
|
||||
wfile(td.path().join(".ignore"), "foo\n!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
@@ -750,13 +801,14 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn custom_ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir();
|
||||
let custom_ignore = ".customignore";
|
||||
wfile(td.path().join(custom_ignore), "foo\n!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new()
|
||||
.add_custom_ignore_filename(custom_ignore)
|
||||
.build().add_child(td.path());
|
||||
.build()
|
||||
.add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_ignore());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
@@ -766,14 +818,15 @@ mod tests {
|
||||
// Tests that a custom ignore file will override an .ignore.
|
||||
#[test]
|
||||
fn custom_ignore_over_ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir();
|
||||
let custom_ignore = ".customignore";
|
||||
wfile(td.path().join(".ignore"), "foo");
|
||||
wfile(td.path().join(custom_ignore), "!foo");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new()
|
||||
.add_custom_ignore_filename(custom_ignore)
|
||||
.build().add_child(td.path());
|
||||
.build()
|
||||
.add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_whitelist());
|
||||
}
|
||||
@@ -781,7 +834,7 @@ mod tests {
|
||||
// Tests that earlier custom ignore files have lower precedence than later.
|
||||
#[test]
|
||||
fn custom_ignore_precedence() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir();
|
||||
let custom_ignore1 = ".customignore1";
|
||||
let custom_ignore2 = ".customignore2";
|
||||
wfile(td.path().join(custom_ignore1), "foo");
|
||||
@@ -790,7 +843,8 @@ mod tests {
|
||||
let (ig, err) = IgnoreBuilder::new()
|
||||
.add_custom_ignore_filename(custom_ignore1)
|
||||
.add_custom_ignore_filename(custom_ignore2)
|
||||
.build().add_child(td.path());
|
||||
.build()
|
||||
.add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_whitelist());
|
||||
}
|
||||
@@ -798,7 +852,7 @@ mod tests {
|
||||
// Tests that an .ignore will override a .gitignore.
|
||||
#[test]
|
||||
fn ignore_over_gitignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir();
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
wfile(td.path().join(".ignore"), "!foo");
|
||||
|
||||
@@ -810,7 +864,7 @@ mod tests {
|
||||
// Tests that exclude has lower precedent than both .ignore and .gitignore.
|
||||
#[test]
|
||||
fn exclude_lowest() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir();
|
||||
wfile(td.path().join(".gitignore"), "!foo");
|
||||
wfile(td.path().join(".ignore"), "!bar");
|
||||
mkdirp(td.path().join(".git/info"));
|
||||
@@ -825,8 +879,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn errored() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo");
|
||||
let td = tmpdir();
|
||||
wfile(td.path().join(".gitignore"), "{foo");
|
||||
|
||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_some());
|
||||
@@ -834,9 +888,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn errored_both() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo");
|
||||
wfile(td.path().join(".ignore"), "fo**o");
|
||||
let td = tmpdir();
|
||||
wfile(td.path().join(".gitignore"), "{foo");
|
||||
wfile(td.path().join(".ignore"), "{bar");
|
||||
|
||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert_eq!(2, partial(err.expect("an error")).len());
|
||||
@@ -844,9 +898,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn errored_partial() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir();
|
||||
mkdirp(td.path().join(".git"));
|
||||
wfile(td.path().join(".gitignore"), "f**oo\nbar");
|
||||
wfile(td.path().join(".gitignore"), "{foo\nbar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_some());
|
||||
@@ -855,8 +909,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn errored_partial_and_ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo\nbar");
|
||||
let td = tmpdir();
|
||||
wfile(td.path().join(".gitignore"), "{foo\nbar");
|
||||
wfile(td.path().join(".ignore"), "!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
@@ -866,7 +920,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn not_present_empty() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir();
|
||||
|
||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
@@ -876,7 +930,7 @@ mod tests {
|
||||
fn stops_at_git_dir() {
|
||||
// This tests that .gitignore files beyond a .git barrier aren't
|
||||
// matched, but .ignore files are.
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir();
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("foo/.git"));
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
@@ -897,7 +951,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn absolute_parent() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir();
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("foo"));
|
||||
wfile(td.path().join(".gitignore"), "bar");
|
||||
@@ -920,7 +974,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn absolute_parent_anchored() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir();
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("src/llvm"));
|
||||
wfile(td.path().join(".gitignore"), "/llvm/\nfoo");
|
||||
|
@@ -69,8 +69,7 @@ impl Glob {
|
||||
|
||||
/// Returns true if and only if this glob has a `**/` prefix.
|
||||
fn has_doublestar_prefix(&self) -> bool {
|
||||
self.actual.starts_with("**/")
|
||||
|| (self.actual == "**" && self.is_only_dir)
|
||||
self.actual.starts_with("**/") || self.actual == "**"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -127,16 +126,7 @@ impl Gitignore {
|
||||
/// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
|
||||
/// set or is empty, then `$HOME/.config/git/ignore` is used instead.
|
||||
pub fn global() -> (Gitignore, Option<Error>) {
|
||||
match gitconfig_excludes_path() {
|
||||
None => (Gitignore::empty(), None),
|
||||
Some(path) => {
|
||||
if !path.is_file() {
|
||||
(Gitignore::empty(), None)
|
||||
} else {
|
||||
Gitignore::new(path)
|
||||
}
|
||||
}
|
||||
}
|
||||
GitignoreBuilder::new("").build_global()
|
||||
}
|
||||
|
||||
/// Creates a new empty gitignore matcher that never matches anything.
|
||||
@@ -359,6 +349,36 @@ impl GitignoreBuilder {
|
||||
})
|
||||
}
|
||||
|
||||
/// Build a global gitignore matcher using the configuration in this
|
||||
/// builder.
|
||||
///
|
||||
/// This consumes ownership of the builder unlike `build` because it
|
||||
/// must mutate the builder to add the global gitignore globs.
|
||||
///
|
||||
/// Note that this ignores the path given to this builder's constructor
|
||||
/// and instead derives the path automatically from git's global
|
||||
/// configuration.
|
||||
pub fn build_global(mut self) -> (Gitignore, Option<Error>) {
|
||||
match gitconfig_excludes_path() {
|
||||
None => (Gitignore::empty(), None),
|
||||
Some(path) => {
|
||||
if !path.is_file() {
|
||||
(Gitignore::empty(), None)
|
||||
} else {
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
errs.maybe_push_ignore_io(self.add(path));
|
||||
match self.build() {
|
||||
Ok(gi) => (gi, errs.into_error_option()),
|
||||
Err(err) => {
|
||||
errs.push(err);
|
||||
(Gitignore::empty(), errs.into_error_option())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add each glob from the file path given.
|
||||
///
|
||||
/// The file given should be formatted as a `gitignore` file.
|
||||
@@ -419,6 +439,8 @@ impl GitignoreBuilder {
|
||||
from: Option<PathBuf>,
|
||||
mut line: &str,
|
||||
) -> Result<&mut GitignoreBuilder, Error> {
|
||||
#![allow(deprecated)]
|
||||
|
||||
if line.starts_with("#") {
|
||||
return Ok(self);
|
||||
}
|
||||
@@ -435,7 +457,6 @@ impl GitignoreBuilder {
|
||||
is_whitelist: false,
|
||||
is_only_dir: false,
|
||||
};
|
||||
let mut literal_separator = false;
|
||||
let mut is_absolute = false;
|
||||
if line.starts_with("\\!") || line.starts_with("\\#") {
|
||||
line = &line[1..];
|
||||
@@ -450,7 +471,6 @@ impl GitignoreBuilder {
|
||||
// then the glob can only match the beginning of a path
|
||||
// (relative to the location of gitignore). We achieve this by
|
||||
// simply banning wildcards from matching /.
|
||||
literal_separator = true;
|
||||
line = &line[1..];
|
||||
is_absolute = true;
|
||||
}
|
||||
@@ -463,16 +483,11 @@ impl GitignoreBuilder {
|
||||
line = &line[..i];
|
||||
}
|
||||
}
|
||||
// If there is a literal slash, then we note that so that globbing
|
||||
// doesn't let wildcards match slashes.
|
||||
glob.actual = line.to_string();
|
||||
if is_absolute || line.chars().any(|c| c == '/') {
|
||||
literal_separator = true;
|
||||
}
|
||||
// If there was a slash, then this is a glob that must match the entire
|
||||
// path name. Otherwise, we should let it match anywhere, so use a **/
|
||||
// prefix.
|
||||
if !literal_separator {
|
||||
// If there is a literal slash, then this is a glob that must match the
|
||||
// entire path name. Otherwise, we should let it match anywhere, so use
|
||||
// a **/ prefix.
|
||||
if !is_absolute && !line.chars().any(|c| c == '/') {
|
||||
// ... but only if we don't already have a **/ prefix.
|
||||
if !glob.has_doublestar_prefix() {
|
||||
glob.actual = format!("**/{}", glob.actual);
|
||||
@@ -486,7 +501,7 @@ impl GitignoreBuilder {
|
||||
}
|
||||
let parsed =
|
||||
GlobBuilder::new(&glob.actual)
|
||||
.literal_separator(literal_separator)
|
||||
.literal_separator(true)
|
||||
.case_insensitive(self.case_insensitive)
|
||||
.backslash_escape(true)
|
||||
.build()
|
||||
@@ -503,12 +518,16 @@ impl GitignoreBuilder {
|
||||
|
||||
/// Toggle whether the globs should be matched case insensitively or not.
|
||||
///
|
||||
/// When this option is changed, only globs added after the change will be affected.
|
||||
/// When this option is changed, only globs added after the change will be
|
||||
/// affected.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn case_insensitive(
|
||||
&mut self, yes: bool
|
||||
&mut self,
|
||||
yes: bool,
|
||||
) -> Result<&mut GitignoreBuilder, Error> {
|
||||
// TODO: This should not return a `Result`. Fix this in the next semver
|
||||
// release.
|
||||
self.case_insensitive = yes;
|
||||
Ok(self)
|
||||
}
|
||||
@@ -518,7 +537,7 @@ impl GitignoreBuilder {
|
||||
///
|
||||
/// Note that the file path returned may not exist.
|
||||
fn gitconfig_excludes_path() -> Option<PathBuf> {
|
||||
// git supports $HOME/.gitconfig and $XDG_CONFIG_DIR/git/config. Notably,
|
||||
// git supports $HOME/.gitconfig and $XDG_CONFIG_HOME/git/config. Notably,
|
||||
// both can be active at the same time, where $HOME/.gitconfig takes
|
||||
// precedent. So if $HOME/.gitconfig defines a `core.excludesFile`, then
|
||||
// we're done.
|
||||
@@ -549,7 +568,7 @@ fn gitconfig_home_contents() -> Option<Vec<u8>> {
|
||||
}
|
||||
|
||||
/// Returns the file contents of git's global config file, if one exists, in
|
||||
/// the user's XDG_CONFIG_DIR directory.
|
||||
/// the user's XDG_CONFIG_HOME directory.
|
||||
fn gitconfig_xdg_contents() -> Option<Vec<u8>> {
|
||||
let path = env::var_os("XDG_CONFIG_HOME")
|
||||
.and_then(|x| if x.is_empty() { None } else { Some(PathBuf::from(x)) })
|
||||
@@ -689,6 +708,9 @@ mod tests {
|
||||
ignored!(ig39, ROOT, "\\?", "?");
|
||||
ignored!(ig40, ROOT, "\\*", "*");
|
||||
ignored!(ig41, ROOT, "\\a", "a");
|
||||
ignored!(ig42, ROOT, "s*.rs", "sfoo.rs");
|
||||
ignored!(ig43, ROOT, "**", "foo.rs");
|
||||
ignored!(ig44, ROOT, "**/**/*", "a/foo.rs");
|
||||
|
||||
not_ignored!(ignot1, ROOT, "amonths", "months");
|
||||
not_ignored!(ignot2, ROOT, "monthsa", "months");
|
||||
@@ -710,6 +732,7 @@ mod tests {
|
||||
not_ignored!(ignot16, ROOT, "*\n!**/", "foo", true);
|
||||
not_ignored!(ignot17, ROOT, "src/*.rs", "src/grep/src/main.rs");
|
||||
not_ignored!(ignot18, ROOT, "path1/*", "path2/path1/foo");
|
||||
not_ignored!(ignot19, ROOT, "s*.rs", "src/foo.rs");
|
||||
|
||||
fn bytes(s: &str) -> Vec<u8> {
|
||||
s.to_string().into_bytes()
|
||||
|
@@ -46,7 +46,7 @@ See the documentation for `WalkBuilder` for many other options.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate crossbeam;
|
||||
extern crate crossbeam_channel as channel;
|
||||
extern crate globset;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
@@ -55,12 +55,10 @@ extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate regex;
|
||||
extern crate same_file;
|
||||
#[cfg(test)]
|
||||
extern crate tempdir;
|
||||
extern crate thread_local;
|
||||
extern crate walkdir;
|
||||
#[cfg(windows)]
|
||||
extern crate winapi;
|
||||
extern crate winapi_util;
|
||||
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
@@ -442,3 +440,66 @@ impl<T> Match<T> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::env;
|
||||
use std::error;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::result;
|
||||
|
||||
/// A convenient result type alias.
|
||||
pub type Result<T> =
|
||||
result::Result<T, Box<dyn error::Error + Send + Sync>>;
|
||||
|
||||
macro_rules! err {
|
||||
($($tt:tt)*) => {
|
||||
Box::<dyn error::Error + Send + Sync>::from(format!($($tt)*))
|
||||
}
|
||||
}
|
||||
|
||||
/// A simple wrapper for creating a temporary directory that is
|
||||
/// automatically deleted when it's dropped.
|
||||
///
|
||||
/// We use this in lieu of tempfile because tempfile brings in too many
|
||||
/// dependencies.
|
||||
#[derive(Debug)]
|
||||
pub struct TempDir(PathBuf);
|
||||
|
||||
impl Drop for TempDir {
|
||||
fn drop(&mut self) {
|
||||
fs::remove_dir_all(&self.0).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
impl TempDir {
|
||||
/// Create a new empty temporary directory under the system's configured
|
||||
/// temporary directory.
|
||||
pub fn new() -> Result<TempDir> {
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
static TRIES: usize = 100;
|
||||
static COUNTER: AtomicUsize = AtomicUsize::new(0);
|
||||
|
||||
let tmpdir = env::temp_dir();
|
||||
for _ in 0..TRIES {
|
||||
let count = COUNTER.fetch_add(1, Ordering::SeqCst);
|
||||
let path = tmpdir.join("rust-ignore").join(count.to_string());
|
||||
if path.is_dir() {
|
||||
continue;
|
||||
}
|
||||
fs::create_dir_all(&path).map_err(|e| {
|
||||
err!("failed to create {}: {}", path.display(), e)
|
||||
})?;
|
||||
return Ok(TempDir(path));
|
||||
}
|
||||
Err(err!("failed to create temp dir after {} tries", TRIES))
|
||||
}
|
||||
|
||||
/// Return the underlying path to this temporary directory.
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -139,13 +139,16 @@ impl OverrideBuilder {
|
||||
}
|
||||
|
||||
/// Toggle whether the globs should be matched case insensitively or not.
|
||||
///
|
||||
///
|
||||
/// When this option is changed, only globs added after the change will be affected.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn case_insensitive(
|
||||
&mut self, yes: bool
|
||||
&mut self,
|
||||
yes: bool,
|
||||
) -> Result<&mut OverrideBuilder, Error> {
|
||||
// TODO: This should not return a `Result`. Fix this in the next semver
|
||||
// release.
|
||||
self.builder.case_insensitive(yes)?;
|
||||
Ok(self)
|
||||
}
|
||||
|
@@ -1,22 +1,56 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
use walk::DirEntry;
|
||||
|
||||
/// Returns true if and only if this entry is considered to be hidden.
|
||||
///
|
||||
/// This only returns true if the base name of the path starts with a `.`.
|
||||
///
|
||||
/// On Unix, this implements a more optimized check.
|
||||
#[cfg(unix)]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
pub fn is_hidden(dent: &DirEntry) -> bool {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
if let Some(name) = file_name(path.as_ref()) {
|
||||
if let Some(name) = file_name(dent.path()) {
|
||||
name.as_bytes().get(0) == Some(&b'.')
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
#[cfg(not(unix))]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
if let Some(name) = file_name(path.as_ref()) {
|
||||
/// Returns true if and only if this entry is considered to be hidden.
|
||||
///
|
||||
/// On Windows, this returns true if one of the following is true:
|
||||
///
|
||||
/// * The base name of the path starts with a `.`.
|
||||
/// * The file attributes have the `HIDDEN` property set.
|
||||
#[cfg(windows)]
|
||||
pub fn is_hidden(dent: &DirEntry) -> bool {
|
||||
use std::os::windows::fs::MetadataExt;
|
||||
use winapi_util::file;
|
||||
|
||||
// This looks like we're doing an extra stat call, but on Windows, the
|
||||
// directory traverser reuses the metadata retrieved from each directory
|
||||
// entry and stores it on the DirEntry itself. So this is "free."
|
||||
if let Ok(md) = dent.metadata() {
|
||||
if file::is_hidden(md.file_attributes() as u64) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if let Some(name) = file_name(dent.path()) {
|
||||
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if this entry is considered to be hidden.
|
||||
///
|
||||
/// This only returns true if the base name of the path starts with a `.`.
|
||||
#[cfg(not(any(unix, windows)))]
|
||||
pub fn is_hidden(dent: &DirEntry) -> bool {
|
||||
if let Some(name) = file_name(dent.path()) {
|
||||
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
||||
} else {
|
||||
false
|
||||
|
@@ -98,16 +98,20 @@ use {Error, Match};
|
||||
|
||||
const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("agda", &["*.agda", "*.lagda"]),
|
||||
("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]),
|
||||
("aidl", &["*.aidl"]),
|
||||
("amake", &["*.mk", "*.bp"]),
|
||||
("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]),
|
||||
("asm", &["*.asm", "*.s", "*.S"]),
|
||||
("asp", &["*.aspx", "*.aspx.cs", "*.aspx.cs", "*.ascx", "*.ascx.cs", "*.ascx.vb"]),
|
||||
("avro", &["*.avdl", "*.avpr", "*.avsc"]),
|
||||
("awk", &["*.awk"]),
|
||||
("bazel", &["*.bzl", "WORKSPACE", "BUILD"]),
|
||||
("bazel", &["*.bzl", "WORKSPACE", "BUILD", "BUILD.bazel"]),
|
||||
("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
|
||||
("bzip2", &["*.bz2"]),
|
||||
("c", &["*.c", "*.h", "*.H"]),
|
||||
("brotli", &["*.br"]),
|
||||
("buildstream", &["*.bst"]),
|
||||
("bzip2", &["*.bz2", "*.tbz2"]),
|
||||
("c", &["*.[chH]", "*.[chH].in", "*.cats"]),
|
||||
("cabal", &["*.cabal"]),
|
||||
("cbor", &["*.cbor"]),
|
||||
("ceylon", &["*.ceylon"]),
|
||||
@@ -117,8 +121,8 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("creole", &["*.creole"]),
|
||||
("config", &["*.cfg", "*.conf", "*.config", "*.ini"]),
|
||||
("cpp", &[
|
||||
"*.C", "*.cc", "*.cpp", "*.cxx",
|
||||
"*.h", "*.H", "*.hh", "*.hpp", "*.hxx", "*.inl",
|
||||
"*.[ChH]", "*.cc", "*.[ch]pp", "*.[ch]xx", "*.hh", "*.inl",
|
||||
"*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in",
|
||||
]),
|
||||
("crystal", &["Projectfile", "*.cr"]),
|
||||
("cs", &["*.cs"]),
|
||||
@@ -126,10 +130,12 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("cshtml", &["*.cshtml"]),
|
||||
("css", &["*.css", "*.scss"]),
|
||||
("csv", &["*.csv"]),
|
||||
("cython", &["*.pyx"]),
|
||||
("cython", &["*.pyx", "*.pxi", "*.pxd"]),
|
||||
("dart", &["*.dart"]),
|
||||
("d", &["*.d"]),
|
||||
("dhall", &["*.dhall"]),
|
||||
("docker", &["*Dockerfile*"]),
|
||||
("edn", &["*.edn"]),
|
||||
("elisp", &["*.el"]),
|
||||
("elixir", &["*.ex", "*.eex", "*.exs"]),
|
||||
("elm", &["*.elm"]),
|
||||
@@ -141,16 +147,18 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
"*.f90", "*.F90", "*.f95", "*.F95",
|
||||
]),
|
||||
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
|
||||
("gap", &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]),
|
||||
("gn", &["*.gn", "*.gni"]),
|
||||
("go", &["*.go"]),
|
||||
("gzip", &["*.gz"]),
|
||||
("gzip", &["*.gz", "*.tgz"]),
|
||||
("groovy", &["*.groovy", "*.gradle"]),
|
||||
("h", &["*.h", "*.hpp"]),
|
||||
("hbs", &["*.hbs"]),
|
||||
("haskell", &["*.hs", "*.lhs"]),
|
||||
("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]),
|
||||
("hs", &["*.hs", "*.lhs"]),
|
||||
("html", &["*.htm", "*.html", "*.ejs"]),
|
||||
("java", &["*.java", "*.jsp"]),
|
||||
("idris", &["*.idr", "*.lidr"]),
|
||||
("java", &["*.java", "*.jsp", "*.jspx", "*.properties"]),
|
||||
("jinja", &["*.j2", "*.jinja", "*.jinja2"]),
|
||||
("js", &[
|
||||
"*.js", "*.jsx", "*.vue",
|
||||
@@ -190,16 +198,19 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
"OFL-*[0-9]*",
|
||||
]),
|
||||
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
||||
("lock", &["*.lock", "package-lock.json"]),
|
||||
("log", &["*.log"]),
|
||||
("lua", &["*.lua"]),
|
||||
("lzma", &["*.lzma"]),
|
||||
("lz4", &["*.lz4"]),
|
||||
("m4", &["*.ac", "*.m4"]),
|
||||
("make", &[
|
||||
"gnumakefile", "Gnumakefile", "GNUmakefile",
|
||||
"makefile", "Makefile",
|
||||
"[Gg][Nn][Uu]makefile", "[Mm]akefile",
|
||||
"[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am",
|
||||
"[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in",
|
||||
"*.mk", "*.mak"
|
||||
]),
|
||||
("mako", &["*.mako", "*.mao"]),
|
||||
("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
|
||||
("md", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
|
||||
("man", &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]),
|
||||
@@ -209,30 +220,34 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("msbuild", &[
|
||||
"*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets"
|
||||
]),
|
||||
("nim", &["*.nim"]),
|
||||
("nim", &["*.nim", "*.nimf", "*.nimble", "*.nims"]),
|
||||
("nix", &["*.nix"]),
|
||||
("objc", &["*.h", "*.m"]),
|
||||
("objcpp", &["*.h", "*.mm"]),
|
||||
("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
|
||||
("org", &["*.org"]),
|
||||
("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]),
|
||||
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]),
|
||||
("pdf", &["*.pdf"]),
|
||||
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
|
||||
("pod", &["*.pod"]),
|
||||
("postscript", &[".eps", ".ps"]),
|
||||
("protobuf", &["*.proto"]),
|
||||
("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]),
|
||||
("puppet", &["*.erb", "*.pp", "*.rb"]),
|
||||
("purs", &["*.purs"]),
|
||||
("py", &["*.py"]),
|
||||
("qmake", &["*.pro", "*.pri", "*.prf"]),
|
||||
("qml", &["*.qml"]),
|
||||
("readme", &["README*", "*README"]),
|
||||
("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
|
||||
("rdoc", &["*.rdoc"]),
|
||||
("robot", &["*.robot"]),
|
||||
("rst", &["*.rst"]),
|
||||
("ruby", &["Gemfile", "*.gemspec", ".irbrc", "Rakefile", "*.rb"]),
|
||||
("rust", &["*.rs"]),
|
||||
("sass", &["*.sass", "*.scss"]),
|
||||
("scala", &["*.scala"]),
|
||||
("scala", &["*.scala", "*.sbt"]),
|
||||
("sh", &[
|
||||
// Portable/misc. init files
|
||||
".login", ".logout", ".profile", "profile",
|
||||
@@ -273,8 +288,9 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
]),
|
||||
("taskpaper", &["*.taskpaper"]),
|
||||
("tcl", &["*.tcl"]),
|
||||
("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib"]),
|
||||
("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]),
|
||||
("textile", &["*.textile"]),
|
||||
("thrift", &["*.thrift"]),
|
||||
("tf", &["*.tf"]),
|
||||
("ts", &["*.ts", "*.tsx"]),
|
||||
("txt", &["*.txt"]),
|
||||
@@ -288,10 +304,14 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("vimscript", &["*.vim"]),
|
||||
("wiki", &["*.mediawiki", "*.wiki"]),
|
||||
("webidl", &["*.idl", "*.webidl", "*.widl"]),
|
||||
("xml", &["*.xml", "*.xml.dist"]),
|
||||
("xz", &["*.xz"]),
|
||||
("xml", &[
|
||||
"*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb",
|
||||
"*.rng", "*.sch",
|
||||
]),
|
||||
("xz", &["*.xz", "*.txz"]),
|
||||
("yacc", &["*.y"]),
|
||||
("yaml", &["*.yaml", "*.yml"]),
|
||||
("zig", &["*.zig"]),
|
||||
("zsh", &[
|
||||
".zshenv", "zshenv",
|
||||
".zlogin", "zlogin",
|
||||
@@ -300,6 +320,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
".zshrc", "zshrc",
|
||||
"*.zsh",
|
||||
]),
|
||||
("zstd", &["*.zst", "*.zstd"]),
|
||||
];
|
||||
|
||||
/// Glob represents a single glob in a set of file type definitions.
|
||||
@@ -338,6 +359,18 @@ impl<'a> Glob<'a> {
|
||||
fn unmatched() -> Glob<'a> {
|
||||
Glob(GlobInner::UnmatchedIgnore)
|
||||
}
|
||||
|
||||
/// Return the file type defintion that matched, if one exists. A file type
|
||||
/// definition always exists when a specific definition matches a file
|
||||
/// path.
|
||||
pub fn file_type_def(&self) -> Option<&FileTypeDef> {
|
||||
match self {
|
||||
Glob(GlobInner::UnmatchedIgnore) => None,
|
||||
Glob(GlobInner::Matched { def, .. }) => {
|
||||
Some(def)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A single file type definition.
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,14 +1,14 @@
|
||||
class RipgrepBin < Formula
|
||||
version '0.9.0'
|
||||
version '11.0.2'
|
||||
desc "Recursively search directories for a regex pattern."
|
||||
homepage "https://github.com/BurntSushi/ripgrep"
|
||||
|
||||
if OS.mac?
|
||||
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz"
|
||||
sha256 "36003ea8b62ad6274dc14140039f448cdf5026827d53cf24dad2d84005557a8c"
|
||||
sha256 "0ba26423691deedf2649b12b1abe3d2be294ee1cb17c40b68fe85efe194f4f57"
|
||||
elsif OS.linux?
|
||||
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-unknown-linux-musl.tar.gz"
|
||||
sha256 "2eb4443e58f95051ff76ea036ed1faf940d5a04af4e7ff5a7dbd74576b907e99"
|
||||
sha256 "2e7978e346553fbc45c0940d9fa11e12f9afbae8213b261aad19b698150e169a"
|
||||
end
|
||||
|
||||
conflicts_with "ripgrep"
|
||||
|
1
rustfmt.toml
Normal file
1
rustfmt.toml
Normal file
@@ -0,0 +1 @@
|
||||
disable_all_formatting = true
|
33
scripts/copy-examples
Executable file
33
scripts/copy-examples
Executable file
@@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
import argparse
|
||||
import codecs
|
||||
import os.path
|
||||
import re
|
||||
|
||||
RE_EACH_CODE_BLOCK = re.compile(
|
||||
r'(?s)(?:```|\{\{< high rust[^>]+>\}\})[^\n]*\n(.*?)(?:```|\{\{< /high >\}\})' # noqa
|
||||
)
|
||||
RE_MARKER = re.compile(r'^(?:# )?//([^/].*)$')
|
||||
RE_STRIP_COMMENT = re.compile(r'^# ?')
|
||||
|
||||
if __name__ == '__main__':
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument('--rust-file', default='src/cookbook.rs')
|
||||
p.add_argument('--example-dir', default='grep/examples')
|
||||
args = p.parse_args()
|
||||
|
||||
with codecs.open(args.rust_file, encoding='utf-8') as f:
|
||||
rustcode = f.read()
|
||||
for m in RE_EACH_CODE_BLOCK.finditer(rustcode):
|
||||
lines = m.group(1).splitlines()
|
||||
marker, codelines = lines[0], lines[1:]
|
||||
m = RE_MARKER.search(marker)
|
||||
if m is None:
|
||||
continue
|
||||
|
||||
code = '\n'.join(RE_STRIP_COMMENT.sub('', line) for line in codelines)
|
||||
fpath = os.path.join(args.example_dir, m.group(1))
|
||||
with codecs.open(fpath, mode='w+', encoding='utf-8') as f:
|
||||
print(code, file=f)
|
974
src/app.rs
974
src/app.rs
File diff suppressed because it is too large
Load Diff
796
src/args.rs
796
src/args.rs
File diff suppressed because it is too large
Load Diff
@@ -5,11 +5,14 @@
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufRead};
|
||||
use std::io;
|
||||
use std::ffi::OsString;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use Result;
|
||||
use bstr::{io::BufReadExt, ByteSlice};
|
||||
use log;
|
||||
|
||||
use crate::Result;
|
||||
|
||||
/// Return a sequence of arguments derived from ripgrep rc configuration files.
|
||||
pub fn args() -> Vec<OsString> {
|
||||
@@ -34,7 +37,7 @@ pub fn args() -> Vec<OsString> {
|
||||
message!("{}:{}", config_path.display(), err);
|
||||
}
|
||||
}
|
||||
debug!(
|
||||
log::debug!(
|
||||
"{}: arguments loaded from config file: {:?}",
|
||||
config_path.display(),
|
||||
args
|
||||
@@ -52,7 +55,7 @@ pub fn args() -> Vec<OsString> {
|
||||
/// for each line in addition to successfully parsed arguments.
|
||||
fn parse<P: AsRef<Path>>(
|
||||
path: P,
|
||||
) -> Result<(Vec<OsString>, Vec<Box<Error>>)> {
|
||||
) -> Result<(Vec<OsString>, Vec<Box<dyn Error>>)> {
|
||||
let path = path.as_ref();
|
||||
match File::open(&path) {
|
||||
Ok(file) => parse_reader(file),
|
||||
@@ -73,63 +76,30 @@ fn parse<P: AsRef<Path>>(
|
||||
/// in addition to successfully parsed arguments.
|
||||
fn parse_reader<R: io::Read>(
|
||||
rdr: R,
|
||||
) -> Result<(Vec<OsString>, Vec<Box<Error>>)> {
|
||||
let mut bufrdr = io::BufReader::new(rdr);
|
||||
) -> Result<(Vec<OsString>, Vec<Box<dyn Error>>)> {
|
||||
let bufrdr = io::BufReader::new(rdr);
|
||||
let (mut args, mut errs) = (vec![], vec![]);
|
||||
let mut line = vec![];
|
||||
let mut line_number = 0;
|
||||
while {
|
||||
line.clear();
|
||||
bufrdr.for_byte_line_with_terminator(|line| {
|
||||
line_number += 1;
|
||||
bufrdr.read_until(b'\n', &mut line)? > 0
|
||||
} {
|
||||
trim(&mut line);
|
||||
|
||||
let line = line.trim();
|
||||
if line.is_empty() || line[0] == b'#' {
|
||||
continue;
|
||||
return Ok(true);
|
||||
}
|
||||
match bytes_to_os_string(&line) {
|
||||
match line.to_os_str() {
|
||||
Ok(osstr) => {
|
||||
args.push(osstr);
|
||||
args.push(osstr.to_os_string());
|
||||
}
|
||||
Err(err) => {
|
||||
errs.push(format!("{}: {}", line_number, err).into());
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
})?;
|
||||
Ok((args, errs))
|
||||
}
|
||||
|
||||
/// Trim the given bytes of whitespace according to the ASCII definition.
|
||||
fn trim(x: &mut Vec<u8>) {
|
||||
let upto = x.iter().take_while(|b| is_space(**b)).count();
|
||||
x.drain(..upto);
|
||||
let revto = x.len() - x.iter().rev().take_while(|b| is_space(**b)).count();
|
||||
x.drain(revto..);
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given byte is an ASCII space character.
|
||||
fn is_space(b: u8) -> bool {
|
||||
b == b'\t'
|
||||
|| b == b'\n'
|
||||
|| b == b'\x0B'
|
||||
|| b == b'\x0C'
|
||||
|| b == b'\r'
|
||||
|| b == b' '
|
||||
}
|
||||
|
||||
/// On Unix, get an OsString from raw bytes.
|
||||
#[cfg(unix)]
|
||||
fn bytes_to_os_string(bytes: &[u8]) -> Result<OsString> {
|
||||
use std::os::unix::ffi::OsStringExt;
|
||||
Ok(OsString::from_vec(bytes.to_vec()))
|
||||
}
|
||||
|
||||
/// On non-Unix (like Windows), require UTF-8.
|
||||
#[cfg(not(unix))]
|
||||
fn bytes_to_os_string(bytes: &[u8]) -> Result<OsString> {
|
||||
String::from_utf8(bytes.to_vec()).map(OsString::from).map_err(From::from)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::ffi::OsString;
|
||||
|
@@ -1,190 +0,0 @@
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsStr;
|
||||
use std::fmt;
|
||||
use std::io::{self, Read};
|
||||
use std::path::Path;
|
||||
use std::process::{self, Stdio};
|
||||
|
||||
use globset::{Glob, GlobSet, GlobSetBuilder};
|
||||
|
||||
/// A decompression command, contains the command to be spawned as well as any
|
||||
/// necessary CLI args.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct DecompressionCommand {
|
||||
cmd: &'static str,
|
||||
args: &'static [&'static str],
|
||||
}
|
||||
|
||||
impl DecompressionCommand {
|
||||
/// Create a new decompress command
|
||||
fn new(
|
||||
cmd: &'static str,
|
||||
args: &'static [&'static str],
|
||||
) -> DecompressionCommand {
|
||||
DecompressionCommand {
|
||||
cmd, args
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for DecompressionCommand {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{} {}", self.cmd, self.args.join(" "))
|
||||
}
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref DECOMPRESSION_COMMANDS: HashMap<
|
||||
&'static str,
|
||||
DecompressionCommand,
|
||||
> = {
|
||||
let mut m = HashMap::new();
|
||||
|
||||
const ARGS: &[&str] = &["-d", "-c"];
|
||||
m.insert("gz", DecompressionCommand::new("gzip", ARGS));
|
||||
m.insert("bz2", DecompressionCommand::new("bzip2", ARGS));
|
||||
m.insert("xz", DecompressionCommand::new("xz", ARGS));
|
||||
m.insert("lz4", DecompressionCommand::new("lz4", ARGS));
|
||||
|
||||
const LZMA_ARGS: &[&str] = &["--format=lzma", "-d", "-c"];
|
||||
m.insert("lzma", DecompressionCommand::new("xz", LZMA_ARGS));
|
||||
|
||||
m
|
||||
};
|
||||
static ref SUPPORTED_COMPRESSION_FORMATS: GlobSet = {
|
||||
let mut builder = GlobSetBuilder::new();
|
||||
builder.add(Glob::new("*.gz").unwrap());
|
||||
builder.add(Glob::new("*.bz2").unwrap());
|
||||
builder.add(Glob::new("*.xz").unwrap());
|
||||
builder.add(Glob::new("*.lz4").unwrap());
|
||||
builder.add(Glob::new("*.lzma").unwrap());
|
||||
builder.build().unwrap()
|
||||
};
|
||||
static ref TAR_ARCHIVE_FORMATS: GlobSet = {
|
||||
let mut builder = GlobSetBuilder::new();
|
||||
builder.add(Glob::new("*.tar.gz").unwrap());
|
||||
builder.add(Glob::new("*.tar.xz").unwrap());
|
||||
builder.add(Glob::new("*.tar.bz2").unwrap());
|
||||
builder.add(Glob::new("*.tar.lz4").unwrap());
|
||||
builder.add(Glob::new("*.tgz").unwrap());
|
||||
builder.add(Glob::new("*.txz").unwrap());
|
||||
builder.add(Glob::new("*.tbz2").unwrap());
|
||||
builder.build().unwrap()
|
||||
};
|
||||
}
|
||||
|
||||
/// DecompressionReader provides an `io::Read` implementation for a limited
|
||||
/// set of compression formats.
|
||||
#[derive(Debug)]
|
||||
pub struct DecompressionReader {
|
||||
cmd: DecompressionCommand,
|
||||
child: process::Child,
|
||||
done: bool,
|
||||
}
|
||||
|
||||
impl DecompressionReader {
|
||||
/// Returns a handle to the stdout of the spawned decompression process for
|
||||
/// `path`, which can be directly searched in the worker. When the returned
|
||||
/// value is exhausted, the underlying process is reaped. If the underlying
|
||||
/// process fails, then its stderr is read and converted into a normal
|
||||
/// io::Error.
|
||||
///
|
||||
/// If there is any error in spawning the decompression command, then
|
||||
/// return `None`, after outputting any necessary debug or error messages.
|
||||
pub fn from_path(path: &Path) -> Option<DecompressionReader> {
|
||||
let extension = match path.extension().and_then(OsStr::to_str) {
|
||||
Some(extension) => extension,
|
||||
None => {
|
||||
debug!(
|
||||
"{}: failed to get compresson extension", path.display());
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let decompression_cmd = match DECOMPRESSION_COMMANDS.get(extension) {
|
||||
Some(cmd) => cmd,
|
||||
None => {
|
||||
debug!(
|
||||
"{}: failed to get decompression command", path.display());
|
||||
return None;
|
||||
}
|
||||
};
|
||||
let cmd = process::Command::new(decompression_cmd.cmd)
|
||||
.args(decompression_cmd.args)
|
||||
.arg(path)
|
||||
.stdout(Stdio::piped())
|
||||
.stderr(Stdio::piped())
|
||||
.spawn();
|
||||
let child = match cmd {
|
||||
Ok(process) => process,
|
||||
Err(_) => {
|
||||
debug!(
|
||||
"{}: decompression command '{}' not found",
|
||||
path.display(), decompression_cmd.cmd);
|
||||
return None;
|
||||
}
|
||||
};
|
||||
Some(DecompressionReader::new(*decompression_cmd, child))
|
||||
}
|
||||
|
||||
fn new(
|
||||
cmd: DecompressionCommand,
|
||||
child: process::Child,
|
||||
) -> DecompressionReader {
|
||||
DecompressionReader {
|
||||
cmd: cmd,
|
||||
child: child,
|
||||
done: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn read_error(&mut self) -> io::Result<io::Error> {
|
||||
let mut errbytes = vec![];
|
||||
self.child.stderr.as_mut().unwrap().read_to_end(&mut errbytes)?;
|
||||
let errstr = String::from_utf8_lossy(&errbytes);
|
||||
let errstr = errstr.trim();
|
||||
|
||||
Ok(if errstr.is_empty() {
|
||||
let msg = format!("decompression command failed: '{}'", self.cmd);
|
||||
io::Error::new(io::ErrorKind::Other, msg)
|
||||
} else {
|
||||
let msg = format!(
|
||||
"decompression command '{}' failed: {}", self.cmd, errstr);
|
||||
io::Error::new(io::ErrorKind::Other, msg)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Read for DecompressionReader {
|
||||
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
if self.done {
|
||||
return Ok(0);
|
||||
}
|
||||
let nread = self.child.stdout.as_mut().unwrap().read(buf)?;
|
||||
if nread == 0 {
|
||||
self.done = true;
|
||||
// Reap the child now that we're done reading.
|
||||
// If the command failed, report stderr as an error.
|
||||
if !self.child.wait()?.success() {
|
||||
return Err(self.read_error()?);
|
||||
}
|
||||
}
|
||||
Ok(nread)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the given path contains a supported compression format or
|
||||
/// is a TAR archive.
|
||||
pub fn is_compressed(path: &Path) -> bool {
|
||||
is_supported_compression_format(path) || is_tar_archive(path)
|
||||
}
|
||||
|
||||
/// Returns true if the given path matches any one of the supported compression
|
||||
/// formats
|
||||
fn is_supported_compression_format(path: &Path) -> bool {
|
||||
SUPPORTED_COMPRESSION_FORMATS.is_match(path)
|
||||
}
|
||||
|
||||
/// Returns true if the given path matches any of the known TAR file formats.
|
||||
fn is_tar_archive(path: &Path) -> bool {
|
||||
TAR_ARCHIVE_FORMATS.is_match(path)
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user