mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-08-21 07:03:48 -07:00
Compare commits
113 Commits
ag/disable
...
ignore-0.4
Author | SHA1 | Date | |
---|---|---|---|
|
00bfcd14a6 | ||
|
bf0ddc4675 | ||
|
0fb3f6a159 | ||
|
837fb5e21f | ||
|
2e1815606e | ||
|
cb2f6ddc61 | ||
|
bd7a42602f | ||
|
528ce56e1b | ||
|
8892bf648c | ||
|
8cb7271b64 | ||
|
4858267f3b | ||
|
5011dba2fd | ||
|
e14f9195e5 | ||
|
ef0e7af56a | ||
|
b266818aa5 | ||
|
81415ae52d | ||
|
5c4584aa7c | ||
|
0972c6e7c7 | ||
|
0a372bf2e4 | ||
|
345124a7fa | ||
|
31807f805a | ||
|
4de227fd9a | ||
|
d7ce274722 | ||
|
5b10328f41 | ||
|
813c676eca | ||
|
f625d72b6f | ||
|
3de31f7527 | ||
|
e402d6c260 | ||
|
48b5bdc441 | ||
|
709ca91f50 | ||
|
9c220f9a9b | ||
|
9085bed139 | ||
|
931ab35f76 | ||
|
b5e5979ff1 | ||
|
052c857da0 | ||
|
5e84e784c8 | ||
|
01e8e11621 | ||
|
9268ff8e8d | ||
|
c2cb0a4de4 | ||
|
adb9332f52 | ||
|
bc37c32717 | ||
|
08ae4da2b7 | ||
|
7ac95c1f50 | ||
|
7a6903bd4e | ||
|
9801fae29f | ||
|
abdf7140d7 | ||
|
b83e7968ef | ||
|
8ebc113847 | ||
|
785c1f1766 | ||
|
8b734cb490 | ||
|
b93762ea7a | ||
|
34677d2622 | ||
|
d1389db2e3 | ||
|
50bcb7409e | ||
|
7b9972c308 | ||
|
9f000c2910 | ||
|
392682d352 | ||
|
7d3f794588 | ||
|
290fd2a7b6 | ||
|
d1e4d28f30 | ||
|
5ce2d7351d | ||
|
9dcfd9a205 | ||
|
36b276c6d0 | ||
|
03bf37ff4a | ||
|
e7829c05d3 | ||
|
a6222939f9 | ||
|
6ffd434232 | ||
|
1f1cd9b467 | ||
|
973de50c9e | ||
|
5f8805a496 | ||
|
fdde2bcd38 | ||
|
7b3fe6b325 | ||
|
b3dd3ae203 | ||
|
f3083e4574 | ||
|
d03e30707e | ||
|
d7f57d9aab | ||
|
1a2a24ea74 | ||
|
d66610b295 | ||
|
019ae1989b | ||
|
36d3f235dc | ||
|
79018eb693 | ||
|
44cd344438 | ||
|
e493e54b9b | ||
|
8e8215aa65 | ||
|
3fe701498e | ||
|
e79085e9e4 | ||
|
764c197022 | ||
|
ef1611b5f5 | ||
|
45d12abbc5 | ||
|
5fde8391f9 | ||
|
3edb11c513 | ||
|
ed144be775 | ||
|
967e7ad0de | ||
|
9952ba2068 | ||
|
b751758d60 | ||
|
8f14cb18a5 | ||
|
da9d720431 | ||
|
a9d71a0368 | ||
|
f3646242cc | ||
|
601f212a0b | ||
|
5a565354f8 | ||
|
2a6532ae71 | ||
|
ece1f50cfe | ||
|
a7d26c8f14 | ||
|
bd222ae93f | ||
|
4359d8aac0 | ||
|
308819fb1f | ||
|
09108b7fda | ||
|
743d64f2e4 | ||
|
5962abc465 | ||
|
1604a18db3 | ||
|
9eeb0b01ce | ||
|
df4400209a |
108
.github/workflows/ci.yml
vendored
Normal file
108
.github/workflows/ci.yml
vendored
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
name: ci
|
||||||
|
on:
|
||||||
|
pull_request:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
schedule:
|
||||||
|
- cron: '00 01 * * *'
|
||||||
|
jobs:
|
||||||
|
test:
|
||||||
|
name: test
|
||||||
|
runs-on: ubuntu-18.04
|
||||||
|
steps:
|
||||||
|
- name: no-op
|
||||||
|
run: echo hello
|
||||||
|
|
||||||
|
# test:
|
||||||
|
# name: test
|
||||||
|
# runs-on: ${{ matrix.os }}
|
||||||
|
# strategy:
|
||||||
|
# matrix:
|
||||||
|
# # The docs seem to suggest that we can have a matrix with just an
|
||||||
|
# # include directive, but it result in a "matrix must define at least
|
||||||
|
# # one vector" error in the CI system.
|
||||||
|
# build:
|
||||||
|
# # - pinned-glibc
|
||||||
|
# - pinned-musl
|
||||||
|
# - stable
|
||||||
|
# # - beta
|
||||||
|
# # We test musl with nightly because every once in a while, this will
|
||||||
|
# # catch an upstream regression.
|
||||||
|
# # - nightly-glibc
|
||||||
|
# # - nightly-musl
|
||||||
|
# # - macos
|
||||||
|
# # - win-msvc-32
|
||||||
|
# # - win-msvc-64
|
||||||
|
# # - win-gnu-32
|
||||||
|
# # - win-gnu-64
|
||||||
|
# include:
|
||||||
|
# # - build: pinned-glibc
|
||||||
|
# # os: ubuntu-18.04
|
||||||
|
# # rust: 1.34.0
|
||||||
|
# # target: x86_64-unknown-linux-gnu
|
||||||
|
# - build: pinned-musl
|
||||||
|
# os: ubuntu-18.04
|
||||||
|
# rust: 1.34.0
|
||||||
|
# target: x86_64-unknown-linux-musl
|
||||||
|
# - build: stable
|
||||||
|
# os: ubuntu-18.04
|
||||||
|
# rust: stable
|
||||||
|
# target: x86_64-unknown-linux-gnu
|
||||||
|
# # - build: beta
|
||||||
|
# # os: ubuntu-18.04
|
||||||
|
# # rust: beta
|
||||||
|
# # target: x86_64-unknown-linux-gnu
|
||||||
|
# # - build: nightly-glibc
|
||||||
|
# # os: ubuntu-18.04
|
||||||
|
# # rust: nightly
|
||||||
|
# # target: x86_64-unknown-linux-gnu
|
||||||
|
# # - build: nightly-musl
|
||||||
|
# # os: ubuntu-18.04
|
||||||
|
# # rust: nightly
|
||||||
|
# # target: x86_64-unknown-linux-musl
|
||||||
|
# # - build: macos
|
||||||
|
# # os: macOS-10.14
|
||||||
|
# # rust: stable
|
||||||
|
# # target: x86_64-apple-darwin
|
||||||
|
# # - build: win-msvc-32
|
||||||
|
# # os: windows-2019
|
||||||
|
# # rust: stable
|
||||||
|
# # target: i686-pc-windows-msvc
|
||||||
|
# # - build: win-msvc-64
|
||||||
|
# # os: windows-2019
|
||||||
|
# # rust: stable
|
||||||
|
# # target: x86_64-pc-windows-msvc
|
||||||
|
# # - build: win-gnu-32
|
||||||
|
# # os: windows-2019
|
||||||
|
# # rust: stable-i686-gnu
|
||||||
|
# # target: i686-pc-windows-gnu
|
||||||
|
# # - build: win-gnu-64
|
||||||
|
# # os: windows-2019
|
||||||
|
# # rust: stable-x86_64-gnu
|
||||||
|
# # target: x86_64-pc-windows-gnu
|
||||||
|
# steps:
|
||||||
|
# - name: Checkout repository
|
||||||
|
# uses: actions/checkout@v1
|
||||||
|
# with:
|
||||||
|
# fetch-depth: 1
|
||||||
|
# - name: Install Rust
|
||||||
|
# uses: hecrj/setup-rust-action@v1
|
||||||
|
# with:
|
||||||
|
# rust-version: ${{ matrix.rust }}
|
||||||
|
# - name: Install Rust Target
|
||||||
|
# run: rustup target add ${{ matrix.target }}
|
||||||
|
# - name: Install musl-gcc
|
||||||
|
# if: contains(matrix.target, 'musl')
|
||||||
|
# run: |
|
||||||
|
# sudo apt-get install musl-tools
|
||||||
|
# - name: Build everything
|
||||||
|
# run: cargo build --verbose --target ${{ matrix.target }} --all --features pcre2
|
||||||
|
# - name: Install zsh
|
||||||
|
# if: matrix.build == 'stable'
|
||||||
|
# run: sudo apt-get install zsh
|
||||||
|
# - name: Test zsh auto-completions
|
||||||
|
# if: matrix.build == 'stable'
|
||||||
|
# run: ./ci/test_complete.sh
|
||||||
|
# - name: Run tests
|
||||||
|
# run: cargo test --verbose --target ${{ matrix.target }} --all --features pcre2
|
@@ -63,13 +63,13 @@ matrix:
|
|||||||
# Minimum Rust supported channel. We enable these to make sure ripgrep
|
# Minimum Rust supported channel. We enable these to make sure ripgrep
|
||||||
# continues to work on the advertised minimum Rust version.
|
# continues to work on the advertised minimum Rust version.
|
||||||
- os: linux
|
- os: linux
|
||||||
rust: 1.32.0
|
rust: 1.34.0
|
||||||
env: TARGET=x86_64-unknown-linux-gnu
|
env: TARGET=x86_64-unknown-linux-gnu
|
||||||
- os: linux
|
- os: linux
|
||||||
rust: 1.32.0
|
rust: 1.34.0
|
||||||
env: TARGET=x86_64-unknown-linux-musl
|
env: TARGET=x86_64-unknown-linux-musl
|
||||||
- os: linux
|
- os: linux
|
||||||
rust: 1.32.0
|
rust: 1.34.0
|
||||||
env: TARGET=arm-unknown-linux-gnueabihf GCC_VERSION=4.8
|
env: TARGET=arm-unknown-linux-gnueabihf GCC_VERSION=4.8
|
||||||
addons:
|
addons:
|
||||||
apt:
|
apt:
|
||||||
|
141
CHANGELOG.md
141
CHANGELOG.md
@@ -1,6 +1,73 @@
|
|||||||
0.11.0 (TBD)
|
TBD
|
||||||
============
|
===
|
||||||
TODO.
|
TODO
|
||||||
|
|
||||||
|
Bug fixes:
|
||||||
|
|
||||||
|
* [BUG #1335](https://github.com/BurntSushi/ripgrep/issues/1335):
|
||||||
|
Fixes a performance bug when searching plain text files with very long lines.
|
||||||
|
|
||||||
|
|
||||||
|
11.0.2 (2019-08-01)
|
||||||
|
===================
|
||||||
|
ripgrep 11.0.2 is a new patch release that fixes a few bugs, including a
|
||||||
|
performance regression and a matching bug when using the `-F/--fixed-strings`
|
||||||
|
flag.
|
||||||
|
|
||||||
|
Feature enhancements:
|
||||||
|
|
||||||
|
* [FEATURE #1293](https://github.com/BurntSushi/ripgrep/issues/1293):
|
||||||
|
Added `--glob-case-insensitive` flag that makes `--glob` behave as `--iglob`.
|
||||||
|
|
||||||
|
Bug fixes:
|
||||||
|
|
||||||
|
* [BUG #1246](https://github.com/BurntSushi/ripgrep/issues/1246):
|
||||||
|
Add translations to README, starting with an unofficial Chinese translation.
|
||||||
|
* [BUG #1259](https://github.com/BurntSushi/ripgrep/issues/1259):
|
||||||
|
Fix bug where the last byte of a `-f file` was stripped if it wasn't a `\n`.
|
||||||
|
* [BUG #1261](https://github.com/BurntSushi/ripgrep/issues/1261):
|
||||||
|
Document that no error is reported when searching for `\n` with `-P/--pcre2`.
|
||||||
|
* [BUG #1284](https://github.com/BurntSushi/ripgrep/issues/1284):
|
||||||
|
Mention `.ignore` and `.rgignore` more prominently in the README.
|
||||||
|
* [BUG #1292](https://github.com/BurntSushi/ripgrep/issues/1292):
|
||||||
|
Fix bug where `--with-filename` was sometimes enabled incorrectly.
|
||||||
|
* [BUG #1268](https://github.com/BurntSushi/ripgrep/issues/1268):
|
||||||
|
Fix major performance regression in GitHub `x86_64-linux` binary release.
|
||||||
|
* [BUG #1302](https://github.com/BurntSushi/ripgrep/issues/1302):
|
||||||
|
Show better error messages when a non-existent preprocessor command is given.
|
||||||
|
* [BUG #1334](https://github.com/BurntSushi/ripgrep/issues/1334):
|
||||||
|
Fix match regression with `-F` flag when patterns contain meta characters.
|
||||||
|
|
||||||
|
|
||||||
|
11.0.1 (2019-04-16)
|
||||||
|
===================
|
||||||
|
ripgrep 11.0.1 is a new patch release that fixes a search regression introduced
|
||||||
|
in the previous 11.0.0 release. In particular, ripgrep can enter an infinite
|
||||||
|
loop for some search patterns when searching invalid UTF-8.
|
||||||
|
|
||||||
|
Bug fixes:
|
||||||
|
|
||||||
|
* [BUG #1247](https://github.com/BurntSushi/ripgrep/issues/1247):
|
||||||
|
Fix search bug that can cause ripgrep to enter an infinite loop.
|
||||||
|
|
||||||
|
|
||||||
|
11.0.0 (2019-04-15)
|
||||||
|
===================
|
||||||
|
ripgrep 11 is a new major version release of ripgrep that contains many bug
|
||||||
|
fixes, some performance improvements and a few feature enhancements. Notably,
|
||||||
|
ripgrep's user experience for binary file filtering has been improved. See the
|
||||||
|
[guide's new section on binary data](GUIDE.md#binary-data) for more details.
|
||||||
|
|
||||||
|
This release also marks a change in ripgrep's versioning. Where as the previous
|
||||||
|
version was `0.10.0`, this version is `11.0.0`. Moving forward, ripgrep's
|
||||||
|
major version will be increased a few times per year. ripgrep will continue to
|
||||||
|
be conservative with respect to backwards compatibility, but may occasionally
|
||||||
|
introduce breaking changes, which will always be documented in this CHANGELOG.
|
||||||
|
See [issue 1172](https://github.com/BurntSushi/ripgrep/issues/1172) for a bit
|
||||||
|
more detail on why this versioning change was made.
|
||||||
|
|
||||||
|
This release increases the **minimum supported Rust version** from 1.28.0 to
|
||||||
|
1.34.0.
|
||||||
|
|
||||||
**BREAKING CHANGES**:
|
**BREAKING CHANGES**:
|
||||||
|
|
||||||
@@ -11,45 +78,91 @@ TODO.
|
|||||||
error (e.g., regex syntax error). One exception to this is if ripgrep is run
|
error (e.g., regex syntax error). One exception to this is if ripgrep is run
|
||||||
with `-q/--quiet`. In that case, if an error occurs and a match is found,
|
with `-q/--quiet`. In that case, if an error occurs and a match is found,
|
||||||
then ripgrep will exit with a `0` exit status code.
|
then ripgrep will exit with a `0` exit status code.
|
||||||
|
* Supplying the `-u/--unrestricted` flag three times is now equivalent to
|
||||||
|
supplying `--no-ignore --hidden --binary`. Previously, `-uuu` was equivalent
|
||||||
|
to `--no-ignore --hidden --text`. The difference is that `--binary` disables
|
||||||
|
binary file filtering without potentially dumping binary data into your
|
||||||
|
terminal. That is, `rg -uuu foo` should now be equivalent to `grep -r foo`.
|
||||||
* The `avx-accel` feature of ripgrep has been removed since it is no longer
|
* The `avx-accel` feature of ripgrep has been removed since it is no longer
|
||||||
necessary. All uses of AVX in ripgrep are now enabled automatically via
|
necessary. All uses of AVX in ripgrep are now enabled automatically via
|
||||||
runtime CPU feature detection. The `simd-accel` feature does remain
|
runtime CPU feature detection. The `simd-accel` feature does remain available
|
||||||
available, however, it does increase compilation times substantially at the
|
(only for enabling SIMD for transcoding), however, it does increase
|
||||||
moment.
|
compilation times substantially at the moment.
|
||||||
|
|
||||||
|
Performance improvements:
|
||||||
|
|
||||||
|
* [PERF #497](https://github.com/BurntSushi/ripgrep/issues/497),
|
||||||
|
[PERF #838](https://github.com/BurntSushi/ripgrep/issues/838):
|
||||||
|
Make `rg -F -f dictionary-of-literals` much faster.
|
||||||
|
|
||||||
Feature enhancements:
|
Feature enhancements:
|
||||||
|
|
||||||
|
* Added or improved file type filtering for Apache Thrift, ASP, Bazel, Brotli,
|
||||||
|
BuildStream, bzip2, C, C++, Cython, gzip, Java, Make, Postscript, QML, Tex,
|
||||||
|
XML, xz, zig and zstd.
|
||||||
|
* [FEATURE #855](https://github.com/BurntSushi/ripgrep/issues/855):
|
||||||
|
Add `--binary` flag for disabling binary file filtering.
|
||||||
|
* [FEATURE #1078](https://github.com/BurntSushi/ripgrep/pull/1078):
|
||||||
|
Add `--max-columns-preview` flag for showing a preview of long lines.
|
||||||
* [FEATURE #1099](https://github.com/BurntSushi/ripgrep/pull/1099):
|
* [FEATURE #1099](https://github.com/BurntSushi/ripgrep/pull/1099):
|
||||||
Add support for Brotli and Zstd to the `-z/--search-zip` flag.
|
Add support for Brotli and Zstd to the `-z/--search-zip` flag.
|
||||||
* [FEATURE #1138](https://github.com/BurntSushi/ripgrep/pull/1138):
|
* [FEATURE #1138](https://github.com/BurntSushi/ripgrep/pull/1138):
|
||||||
Add `--no-ignore-dot` flag for ignoring `.ignore` files.
|
Add `--no-ignore-dot` flag for ignoring `.ignore` files.
|
||||||
|
* [FEATURE #1155](https://github.com/BurntSushi/ripgrep/pull/1155):
|
||||||
|
Add `--auto-hybrid-regex` flag for automatically falling back to PCRE2.
|
||||||
* [FEATURE #1159](https://github.com/BurntSushi/ripgrep/pull/1159):
|
* [FEATURE #1159](https://github.com/BurntSushi/ripgrep/pull/1159):
|
||||||
ripgrep's exit status logic should now match GNU grep. See updated man page.
|
ripgrep's exit status logic should now match GNU grep. See updated man page.
|
||||||
* [FEATURE #1170](https://github.com/BurntSushi/ripgrep/pull/1170):
|
* [FEATURE #1164](https://github.com/BurntSushi/ripgrep/pull/1164):
|
||||||
Add `--ignore-file-case-insensitive` for case insensitive .ignore globs.
|
Add `--ignore-file-case-insensitive` for case insensitive ignore globs.
|
||||||
|
* [FEATURE #1185](https://github.com/BurntSushi/ripgrep/pull/1185):
|
||||||
|
Add `-I` flag as a short option for the `--no-filename` flag.
|
||||||
|
* [FEATURE #1207](https://github.com/BurntSushi/ripgrep/pull/1207):
|
||||||
|
Add `none` value to `-E/--encoding` to forcefully disable all transcoding.
|
||||||
|
* [FEATURE da9d7204](https://github.com/BurntSushi/ripgrep/commit/da9d7204):
|
||||||
|
Add `--pcre2-version` for querying showing PCRE2 version information.
|
||||||
|
|
||||||
Bug fixes:
|
Bug fixes:
|
||||||
|
|
||||||
|
* [BUG #306](https://github.com/BurntSushi/ripgrep/issues/306),
|
||||||
|
[BUG #855](https://github.com/BurntSushi/ripgrep/issues/855):
|
||||||
|
Improve the user experience for ripgrep's binary file filtering.
|
||||||
* [BUG #373](https://github.com/BurntSushi/ripgrep/issues/373),
|
* [BUG #373](https://github.com/BurntSushi/ripgrep/issues/373),
|
||||||
[BUG #1098](https://github.com/BurntSushi/ripgrep/issues/1098):
|
[BUG #1098](https://github.com/BurntSushi/ripgrep/issues/1098):
|
||||||
`**` is now accepted as valid syntax anywhere in a glob.
|
`**` is now accepted as valid syntax anywhere in a glob.
|
||||||
* [BUG #916](https://github.com/BurntSushi/ripgrep/issues/916):
|
* [BUG #916](https://github.com/BurntSushi/ripgrep/issues/916):
|
||||||
ripgrep no longer hangs when searching `/proc` with a zombie process present.
|
ripgrep no longer hangs when searching `/proc` with a zombie process present.
|
||||||
|
* [BUG #1052](https://github.com/BurntSushi/ripgrep/issues/1052):
|
||||||
|
Fix bug where ripgrep could panic when transcoding UTF-16 files.
|
||||||
|
* [BUG #1055](https://github.com/BurntSushi/ripgrep/issues/1055):
|
||||||
|
Suggest `-U/--multiline` when a pattern contains a `\n`.
|
||||||
|
* [BUG #1063](https://github.com/BurntSushi/ripgrep/issues/1063):
|
||||||
|
Always strip a BOM if it's present, even for UTF-8.
|
||||||
|
* [BUG #1064](https://github.com/BurntSushi/ripgrep/issues/1064):
|
||||||
|
Fix inner literal detection that could lead to incorrect matches.
|
||||||
|
* [BUG #1079](https://github.com/BurntSushi/ripgrep/issues/1079):
|
||||||
|
Fixes a bug where the order of globs could result in missing a match.
|
||||||
|
* [BUG #1089](https://github.com/BurntSushi/ripgrep/issues/1089):
|
||||||
|
Fix another bug where ripgrep could panic when transcoding UTF-16 files.
|
||||||
* [BUG #1091](https://github.com/BurntSushi/ripgrep/issues/1091):
|
* [BUG #1091](https://github.com/BurntSushi/ripgrep/issues/1091):
|
||||||
Add note about inverted flags to the man page.
|
Add note about inverted flags to the man page.
|
||||||
|
* [BUG #1093](https://github.com/BurntSushi/ripgrep/pull/1093):
|
||||||
|
Fix handling of literal slashes in gitignore patterns.
|
||||||
* [BUG #1095](https://github.com/BurntSushi/ripgrep/issues/1095):
|
* [BUG #1095](https://github.com/BurntSushi/ripgrep/issues/1095):
|
||||||
Fix corner cases involving the `--crlf` flag.
|
Fix corner cases involving the `--crlf` flag.
|
||||||
|
* [BUG #1101](https://github.com/BurntSushi/ripgrep/issues/1101):
|
||||||
|
Fix AsciiDoc escaping for man page output.
|
||||||
* [BUG #1103](https://github.com/BurntSushi/ripgrep/issues/1103):
|
* [BUG #1103](https://github.com/BurntSushi/ripgrep/issues/1103):
|
||||||
Clarify what `--encoding auto` does.
|
Clarify what `--encoding auto` does.
|
||||||
* [BUG #1106](https://github.com/BurntSushi/ripgrep/issues/1106):
|
* [BUG #1106](https://github.com/BurntSushi/ripgrep/issues/1106):
|
||||||
`--files-with-matches` and `--files-without-match` work with one file.
|
`--files-with-matches` and `--files-without-match` work with one file.
|
||||||
* [BUG #1093](https://github.com/BurntSushi/ripgrep/pull/1093):
|
|
||||||
Fix handling of literal slashes in gitignore patterns.
|
|
||||||
* [BUG #1121](https://github.com/BurntSushi/ripgrep/issues/1121):
|
* [BUG #1121](https://github.com/BurntSushi/ripgrep/issues/1121):
|
||||||
Fix bug that was triggering Windows antimalware when using the --files flag.
|
Fix bug that was triggering Windows antimalware when using the `--files`
|
||||||
|
flag.
|
||||||
* [BUG #1125](https://github.com/BurntSushi/ripgrep/issues/1125),
|
* [BUG #1125](https://github.com/BurntSushi/ripgrep/issues/1125),
|
||||||
[BUG #1159](https://github.com/BurntSushi/ripgrep/issues/1159):
|
[BUG #1159](https://github.com/BurntSushi/ripgrep/issues/1159):
|
||||||
ripgrep shouldn't panic for `rg -h | rg` and should emit correct exit status.
|
ripgrep shouldn't panic for `rg -h | rg` and should emit correct exit status.
|
||||||
|
* [BUG #1144](https://github.com/BurntSushi/ripgrep/issues/1144):
|
||||||
|
Fixes a bug where line numbers could be wrong on big-endian machines.
|
||||||
* [BUG #1154](https://github.com/BurntSushi/ripgrep/issues/1154):
|
* [BUG #1154](https://github.com/BurntSushi/ripgrep/issues/1154):
|
||||||
Windows files with "hidden" attribute are now treated as hidden.
|
Windows files with "hidden" attribute are now treated as hidden.
|
||||||
* [BUG #1173](https://github.com/BurntSushi/ripgrep/issues/1173):
|
* [BUG #1173](https://github.com/BurntSushi/ripgrep/issues/1173):
|
||||||
@@ -58,6 +171,12 @@ Bug fixes:
|
|||||||
Fix handling of repeated `**` patterns in gitignore files.
|
Fix handling of repeated `**` patterns in gitignore files.
|
||||||
* [BUG #1176](https://github.com/BurntSushi/ripgrep/issues/1176):
|
* [BUG #1176](https://github.com/BurntSushi/ripgrep/issues/1176):
|
||||||
Fix bug where `-F`/`-x` weren't applied to patterns given via `-f`.
|
Fix bug where `-F`/`-x` weren't applied to patterns given via `-f`.
|
||||||
|
* [BUG #1189](https://github.com/BurntSushi/ripgrep/issues/1189):
|
||||||
|
Document cases where ripgrep may use a lot of memory.
|
||||||
|
* [BUG #1203](https://github.com/BurntSushi/ripgrep/issues/1203):
|
||||||
|
Fix a matching bug related to the suffix literal optimization.
|
||||||
|
* [BUG 8f14cb18](https://github.com/BurntSushi/ripgrep/commit/8f14cb18):
|
||||||
|
Increase the default stack size for PCRE2's JIT.
|
||||||
|
|
||||||
|
|
||||||
0.10.0 (2018-09-07)
|
0.10.0 (2018-09-07)
|
||||||
|
729
Cargo.lock
generated
729
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
13
Cargo.toml
13
Cargo.toml
@@ -1,11 +1,11 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "ripgrep"
|
name = "ripgrep"
|
||||||
version = "0.10.0" #:version
|
version = "11.0.2" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
ripgrep is a line-oriented search tool that recursively searches your current
|
ripgrep is a line-oriented search tool that recursively searches your current
|
||||||
directory for a regex pattern while respecting your gitignore rules. ripgrep
|
directory for a regex pattern while respecting your gitignore rules. ripgrep
|
||||||
has first class support on Windows, macOS and Linux
|
has first class support on Windows, macOS and Linux.
|
||||||
"""
|
"""
|
||||||
documentation = "https://github.com/BurntSushi/ripgrep"
|
documentation = "https://github.com/BurntSushi/ripgrep"
|
||||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||||
@@ -46,9 +46,9 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
bstr = "0.1.2"
|
bstr = "0.2.0"
|
||||||
grep = { version = "0.2.3", path = "grep" }
|
grep = { version = "0.2.4", path = "grep" }
|
||||||
ignore = { version = "0.4.4", path = "ignore" }
|
ignore = { version = "0.4.7", path = "ignore" }
|
||||||
lazy_static = "1.1.0"
|
lazy_static = "1.1.0"
|
||||||
log = "0.4.5"
|
log = "0.4.5"
|
||||||
num_cpus = "1.8.0"
|
num_cpus = "1.8.0"
|
||||||
@@ -61,6 +61,9 @@ version = "2.32.0"
|
|||||||
default-features = false
|
default-features = false
|
||||||
features = ["suggestions"]
|
features = ["suggestions"]
|
||||||
|
|
||||||
|
[target.'cfg(all(target_env = "musl", target_pointer_width = "64"))'.dependencies.jemallocator]
|
||||||
|
version = "0.3.0"
|
||||||
|
|
||||||
[build-dependencies]
|
[build-dependencies]
|
||||||
lazy_static = "1.1.0"
|
lazy_static = "1.1.0"
|
||||||
|
|
||||||
|
2
Cross.toml
Normal file
2
Cross.toml
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
[target.x86_64-unknown-linux-musl]
|
||||||
|
image = "burntsushi/cross:x86_64-unknown-linux-musl"
|
4
FAQ.md
4
FAQ.md
@@ -934,8 +934,8 @@ Here are some cases where you might *not* want to use ripgrep. The same caveats
|
|||||||
for the previous section apply.
|
for the previous section apply.
|
||||||
|
|
||||||
* Are you writing portable shell scripts intended to work in a variety of
|
* Are you writing portable shell scripts intended to work in a variety of
|
||||||
environments? Great, probably not a good idea to use ripgrep! ripgrep is has
|
environments? Great, probably not a good idea to use ripgrep! ripgrep has
|
||||||
nowhere near the ubquity of grep, so if you do use ripgrep, you might need
|
nowhere near the ubiquity of grep, so if you do use ripgrep, you might need
|
||||||
to futz with the installation process more than you would with grep.
|
to futz with the installation process more than you would with grep.
|
||||||
* Do you care about POSIX compatibility? If so, then you can't use ripgrep
|
* Do you care about POSIX compatibility? If so, then you can't use ripgrep
|
||||||
because it never was, isn't and never will be POSIX compatible.
|
because it never was, isn't and never will be POSIX compatible.
|
||||||
|
108
GUIDE.md
108
GUIDE.md
@@ -18,6 +18,7 @@ translatable to any command line shell environment.
|
|||||||
* [Replacements](#replacements)
|
* [Replacements](#replacements)
|
||||||
* [Configuration file](#configuration-file)
|
* [Configuration file](#configuration-file)
|
||||||
* [File encoding](#file-encoding)
|
* [File encoding](#file-encoding)
|
||||||
|
* [Binary data](#binary-data)
|
||||||
* [Common options](#common-options)
|
* [Common options](#common-options)
|
||||||
|
|
||||||
|
|
||||||
@@ -109,7 +110,7 @@ colors, you'll notice that `faster` will be highlighted instead of just the
|
|||||||
|
|
||||||
It is beyond the scope of this guide to provide a full tutorial on regular
|
It is beyond the scope of this guide to provide a full tutorial on regular
|
||||||
expressions, but ripgrep's specific syntax is documented here:
|
expressions, but ripgrep's specific syntax is documented here:
|
||||||
https://docs.rs/regex/0.2.5/regex/#syntax
|
https://docs.rs/regex/*/regex/#syntax
|
||||||
|
|
||||||
|
|
||||||
### Recursive search
|
### Recursive search
|
||||||
@@ -537,8 +538,9 @@ formatting peculiarities:
|
|||||||
|
|
||||||
```
|
```
|
||||||
$ cat $HOME/.ripgreprc
|
$ cat $HOME/.ripgreprc
|
||||||
# Don't let ripgrep vomit really long lines to my terminal.
|
# Don't let ripgrep vomit really long lines to my terminal, and show a preview.
|
||||||
--max-columns=150
|
--max-columns=150
|
||||||
|
--max-columns-preview
|
||||||
|
|
||||||
# Add my 'web' type.
|
# Add my 'web' type.
|
||||||
--type-add
|
--type-add
|
||||||
@@ -603,7 +605,7 @@ topic, but we can try to summarize its relevancy to ripgrep:
|
|||||||
* Files are generally just a bundle of bytes. There is no reliable way to know
|
* Files are generally just a bundle of bytes. There is no reliable way to know
|
||||||
their encoding.
|
their encoding.
|
||||||
* Either the encoding of the pattern must match the encoding of the files being
|
* Either the encoding of the pattern must match the encoding of the files being
|
||||||
searched, or a form of transcoding must be performed converts either the
|
searched, or a form of transcoding must be performed that converts either the
|
||||||
pattern or the file to the same encoding as the other.
|
pattern or the file to the same encoding as the other.
|
||||||
* ripgrep tends to work best on plain text files, and among plain text files,
|
* ripgrep tends to work best on plain text files, and among plain text files,
|
||||||
the most popular encodings likely consist of ASCII, latin1 or UTF-8. As
|
the most popular encodings likely consist of ASCII, latin1 or UTF-8. As
|
||||||
@@ -626,12 +628,15 @@ given, which is the default:
|
|||||||
they correspond to a UTF-16 BOM, then ripgrep will transcode the contents of
|
they correspond to a UTF-16 BOM, then ripgrep will transcode the contents of
|
||||||
the file from UTF-16 to UTF-8, and then execute the search on the transcoded
|
the file from UTF-16 to UTF-8, and then execute the search on the transcoded
|
||||||
version of the file. (This incurs a performance penalty since transcoding
|
version of the file. (This incurs a performance penalty since transcoding
|
||||||
is slower than regex searching.)
|
is slower than regex searching.) If the file contains invalid UTF-16, then
|
||||||
|
the Unicode replacement codepoint is substituted in place of invalid code
|
||||||
|
units.
|
||||||
* To handle other cases, ripgrep provides a `-E/--encoding` flag, which permits
|
* To handle other cases, ripgrep provides a `-E/--encoding` flag, which permits
|
||||||
you to specify an encoding from the
|
you to specify an encoding from the
|
||||||
[Encoding Standard](https://encoding.spec.whatwg.org/#concept-encoding-get).
|
[Encoding Standard](https://encoding.spec.whatwg.org/#concept-encoding-get).
|
||||||
ripgrep will assume *all* files searched are the encoding specified and
|
ripgrep will assume *all* files searched are the encoding specified (unless
|
||||||
will perform a transcoding step just like in the UTF-16 case described above.
|
the file has a BOM) and will perform a transcoding step just like in the
|
||||||
|
UTF-16 case described above.
|
||||||
|
|
||||||
By default, ripgrep will not require its input be valid UTF-8. That is, ripgrep
|
By default, ripgrep will not require its input be valid UTF-8. That is, ripgrep
|
||||||
can and will search arbitrary bytes. The key here is that if you're searching
|
can and will search arbitrary bytes. The key here is that if you're searching
|
||||||
@@ -641,9 +646,26 @@ pattern won't find anything. With all that said, this mode of operation is
|
|||||||
important, because it lets you find ASCII or UTF-8 *within* files that are
|
important, because it lets you find ASCII or UTF-8 *within* files that are
|
||||||
otherwise arbitrary bytes.
|
otherwise arbitrary bytes.
|
||||||
|
|
||||||
|
As a special case, the `-E/--encoding` flag supports the value `none`, which
|
||||||
|
will completely disable all encoding related logic, including BOM sniffing.
|
||||||
|
When `-E/--encoding` is set to `none`, ripgrep will search the raw bytes of
|
||||||
|
the underlying file with no transcoding step. For example, here's how you might
|
||||||
|
search the raw UTF-16 encoding of the string `Шерлок`:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ rg '(?-u)\(\x045\x04@\x04;\x04>\x04:\x04' -E none -a some-utf16-file
|
||||||
|
```
|
||||||
|
|
||||||
|
Of course, that's just an example meant to show how one can drop down into
|
||||||
|
raw bytes. Namely, the simpler command works as you might expect automatically:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ rg 'Шерлок' some-utf16-file
|
||||||
|
```
|
||||||
|
|
||||||
Finally, it is possible to disable ripgrep's Unicode support from within the
|
Finally, it is possible to disable ripgrep's Unicode support from within the
|
||||||
pattern regular expression. For example, let's say you wanted `.` to match any
|
regular expression. For example, let's say you wanted `.` to match any byte
|
||||||
byte rather than any Unicode codepoint. (You might want this while searching a
|
rather than any Unicode codepoint. (You might want this while searching a
|
||||||
binary file, since `.` by default will not match invalid UTF-8.) You could do
|
binary file, since `.` by default will not match invalid UTF-8.) You could do
|
||||||
this by disabling Unicode via a regular expression flag:
|
this by disabling Unicode via a regular expression flag:
|
||||||
|
|
||||||
@@ -660,6 +682,76 @@ $ rg '\w(?-u:\w)\w'
|
|||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
### Binary data
|
||||||
|
|
||||||
|
In addition to skipping hidden files and files in your `.gitignore` by default,
|
||||||
|
ripgrep also attempts to skip binary files. ripgrep does this by default
|
||||||
|
because binary files (like PDFs or images) are typically not things you want to
|
||||||
|
search when searching for regex matches. Moreover, if content in a binary file
|
||||||
|
did match, then it's possible for undesirable binary data to be printed to your
|
||||||
|
terminal and wreak havoc.
|
||||||
|
|
||||||
|
Unfortunately, unlike skipping hidden files and respecting your `.gitignore`
|
||||||
|
rules, a file cannot as easily be classified as binary. In order to figure out
|
||||||
|
whether a file is binary, the most effective heuristic that balances
|
||||||
|
correctness with performance is to simply look for `NUL` bytes. At that point,
|
||||||
|
the determination is simple: a file is considered "binary" if and only if it
|
||||||
|
contains a `NUL` byte somewhere in its contents.
|
||||||
|
|
||||||
|
The issue is that while most binary files will have a `NUL` byte toward the
|
||||||
|
beginning of its contents, this is not necessarily true. The `NUL` byte might
|
||||||
|
be the very last byte in a large file, but that file is still considered
|
||||||
|
binary. While this leads to a fair amount of complexity inside ripgrep's
|
||||||
|
implementation, it also results in some unintuitive user experiences.
|
||||||
|
|
||||||
|
At a high level, ripgrep operates in three different modes with respect to
|
||||||
|
binary files:
|
||||||
|
|
||||||
|
1. The default mode is to attempt to remove binary files from a search
|
||||||
|
completely. This is meant to mirror how ripgrep removes hidden files and
|
||||||
|
files in your `.gitignore` automatically. That is, as soon as a file is
|
||||||
|
detected as binary, searching stops. If a match was already printed (because
|
||||||
|
it was detected long before a `NUL` byte), then ripgrep will print a warning
|
||||||
|
message indicating that the search stopped prematurely. This default mode
|
||||||
|
**only applies to files searched by ripgrep as a result of recursive
|
||||||
|
directory traversal**, which is consistent with ripgrep's other automatic
|
||||||
|
filtering. For example, `rg foo .file` will search `.file` even though it
|
||||||
|
is hidden. Similarly, `rg foo binary-file` search `binary-file` in "binary"
|
||||||
|
mode automatically.
|
||||||
|
2. Binary mode is similar to the default mode, except it will not always
|
||||||
|
stop searching after it sees a `NUL` byte. Namely, in this mode, ripgrep
|
||||||
|
will continue searching a file that is known to be binary until the first
|
||||||
|
of two conditions is met: 1) the end of the file has been reached or 2) a
|
||||||
|
match is or has been seen. This means that in binary mode, if ripgrep
|
||||||
|
reports no matches, then there are no matches in the file. When a match does
|
||||||
|
occur, ripgrep prints a message similar to one it prints when in its default
|
||||||
|
mode indicating that the search has stopped prematurely. This mode can be
|
||||||
|
forcefully enabled for all files with the `--binary` flag. The purpose of
|
||||||
|
binary mode is to provide a way to discover matches in all files, but to
|
||||||
|
avoid having binary data dumped into your terminal.
|
||||||
|
3. Text mode completely disables all binary detection and searches all files
|
||||||
|
as if they were text. This is useful when searching a file that is
|
||||||
|
predominantly text but contains a `NUL` byte, or if you are specifically
|
||||||
|
trying to search binary data. This mode can be enabled with the `-a/--text`
|
||||||
|
flag. Note that when using this mode on very large binary files, it is
|
||||||
|
possible for ripgrep to use a lot of memory.
|
||||||
|
|
||||||
|
Unfortunately, there is one additional complexity in ripgrep that can make it
|
||||||
|
difficult to reason about binary files. That is, the way binary detection works
|
||||||
|
depends on the way that ripgrep searches your files. Specifically:
|
||||||
|
|
||||||
|
* When ripgrep uses memory maps, then binary detection is only performed on the
|
||||||
|
first few kilobytes of the file in addition to every matching line.
|
||||||
|
* When ripgrep doesn't use memory maps, then binary detection is performed on
|
||||||
|
all bytes searched.
|
||||||
|
|
||||||
|
This means that whether a file is detected as binary or not can change based
|
||||||
|
on the internal search strategy used by ripgrep. If you prefer to keep
|
||||||
|
ripgrep's binary file detection consistent, then you can disable memory maps
|
||||||
|
via the `--no-mmap` flag. (The cost will be a small performance regression when
|
||||||
|
searching very large files on some platforms.)
|
||||||
|
|
||||||
|
|
||||||
### Common options
|
### Common options
|
||||||
|
|
||||||
ripgrep has a lot of flags. Too many to keep in your head at once. This section
|
ripgrep has a lot of flags. Too many to keep in your head at once. This section
|
||||||
|
53
README.md
53
README.md
@@ -11,6 +11,7 @@ and grep.
|
|||||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||||
[](https://crates.io/crates/ripgrep)
|
[](https://crates.io/crates/ripgrep)
|
||||||
|
[](https://repology.org/project/ripgrep/badges)
|
||||||
|
|
||||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||||
|
|
||||||
@@ -28,6 +29,7 @@ Please see the [CHANGELOG](CHANGELOG.md) for a release history.
|
|||||||
* [Configuration files](GUIDE.md#configuration-file)
|
* [Configuration files](GUIDE.md#configuration-file)
|
||||||
* [Shell completions](FAQ.md#complete)
|
* [Shell completions](FAQ.md#complete)
|
||||||
* [Building](#building)
|
* [Building](#building)
|
||||||
|
* [Translations](#translations)
|
||||||
|
|
||||||
|
|
||||||
### Screenshot of search results
|
### Screenshot of search results
|
||||||
@@ -91,11 +93,11 @@ increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep.
|
|||||||
[the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly
|
[the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly
|
||||||
replace grep.)
|
replace grep.)
|
||||||
* Like other tools specialized to code search, ripgrep defaults to recursive
|
* Like other tools specialized to code search, ripgrep defaults to recursive
|
||||||
directory search and won't search files ignored by your `.gitignore` files.
|
directory search and won't search files ignored by your
|
||||||
It also ignores hidden and binary files by default. ripgrep also implements
|
`.gitignore`/`.ignore`/`.rgignore` files. It also ignores hidden and binary
|
||||||
full support for `.gitignore`, whereas there are many bugs related to that
|
files by default. ripgrep also implements full support for `.gitignore`,
|
||||||
functionality in other code search tools claiming to provide the same
|
whereas there are many bugs related to that functionality in other code
|
||||||
functionality.
|
search tools claiming to provide the same functionality.
|
||||||
* ripgrep can search specific types of files. For example, `rg -tpy foo`
|
* ripgrep can search specific types of files. For example, `rg -tpy foo`
|
||||||
limits your search to Python files and `rg -Tjs foo` excludes Javascript
|
limits your search to Python files and `rg -Tjs foo` excludes Javascript
|
||||||
files from your search. ripgrep can be taught about new file types with
|
files from your search. ripgrep can be taught about new file types with
|
||||||
@@ -107,13 +109,14 @@ increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep.
|
|||||||
* ripgrep has optional support for switching its regex engine to use PCRE2.
|
* ripgrep has optional support for switching its regex engine to use PCRE2.
|
||||||
Among other things, this makes it possible to use look-around and
|
Among other things, this makes it possible to use look-around and
|
||||||
backreferences in your patterns, which are not supported in ripgrep's default
|
backreferences in your patterns, which are not supported in ripgrep's default
|
||||||
regex engine. PCRE2 support is enabled with `-P`.
|
regex engine. PCRE2 support can be enabled with `-P/--pcre2` (use PCRE2
|
||||||
|
always) or `--auto-hybrid-regex` (use PCRE2 only if needed).
|
||||||
* ripgrep supports searching files in text encodings other than UTF-8, such
|
* ripgrep supports searching files in text encodings other than UTF-8, such
|
||||||
as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for
|
as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for
|
||||||
automatically detecting UTF-16 is provided. Other text encodings must be
|
automatically detecting UTF-16 is provided. Other text encodings must be
|
||||||
specifically specified with the `-E/--encoding` flag.)
|
specifically specified with the `-E/--encoding` flag.)
|
||||||
* ripgrep supports searching files compressed in a common format (gzip, xz,
|
* ripgrep supports searching files compressed in a common format (brotli,
|
||||||
lzma, bzip2 or lz4) with the `-z/--search-zip` flag.
|
bzip2, gzip, lz4, lzma, xz, or zstandard) with the `-z/--search-zip` flag.
|
||||||
* ripgrep supports arbitrary input preprocessing filters which could be PDF
|
* ripgrep supports arbitrary input preprocessing filters which could be PDF
|
||||||
text extraction, less supported decompression, decrypting, automatic encoding
|
text extraction, less supported decompression, decrypting, automatic encoding
|
||||||
detection and so on.
|
detection and so on.
|
||||||
@@ -207,14 +210,6 @@ from homebrew-core, (compiled with rust stable, no SIMD):
|
|||||||
$ brew install ripgrep
|
$ brew install ripgrep
|
||||||
```
|
```
|
||||||
|
|
||||||
or you can install a binary compiled with rust nightly (including SIMD and all
|
|
||||||
optimizations) by utilizing a custom tap:
|
|
||||||
|
|
||||||
```
|
|
||||||
$ brew tap burntsushi/ripgrep https://github.com/BurntSushi/ripgrep.git
|
|
||||||
$ brew install ripgrep-bin
|
|
||||||
```
|
|
||||||
|
|
||||||
If you're a **MacPorts** user, then you can install ripgrep from the
|
If you're a **MacPorts** user, then you can install ripgrep from the
|
||||||
[official ports](https://www.macports.org/ports.php?by=name&substr=ripgrep):
|
[official ports](https://www.macports.org/ports.php?by=name&substr=ripgrep):
|
||||||
|
|
||||||
@@ -230,7 +225,7 @@ $ choco install ripgrep
|
|||||||
```
|
```
|
||||||
|
|
||||||
If you're a **Windows Scoop** user, then you can install ripgrep from the
|
If you're a **Windows Scoop** user, then you can install ripgrep from the
|
||||||
[official bucket](https://github.com/lukesampson/scoop/blob/master/bucket/ripgrep.json):
|
[official bucket](https://github.com/ScoopInstaller/Main/blob/master/bucket/ripgrep.json):
|
||||||
|
|
||||||
```
|
```
|
||||||
$ scoop install ripgrep
|
$ scoop install ripgrep
|
||||||
@@ -293,11 +288,11 @@ then ripgrep can be installed using a binary `.deb` file provided in each
|
|||||||
[ripgrep release](https://github.com/BurntSushi/ripgrep/releases).
|
[ripgrep release](https://github.com/BurntSushi/ripgrep/releases).
|
||||||
|
|
||||||
```
|
```
|
||||||
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/0.10.0/ripgrep_0.10.0_amd64.deb
|
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/11.0.2/ripgrep_11.0.2_amd64.deb
|
||||||
$ sudo dpkg -i ripgrep_0.10.0_amd64.deb
|
$ sudo dpkg -i ripgrep_11.0.2_amd64.deb
|
||||||
```
|
```
|
||||||
|
|
||||||
If you run Debian Buster (currently Debian testing) or Debian sid, ripgrep is
|
If you run Debian Buster (currently Debian stable) or Debian sid, ripgrep is
|
||||||
[officially maintained by Debian](https://tracker.debian.org/pkg/rust-ripgrep).
|
[officially maintained by Debian](https://tracker.debian.org/pkg/rust-ripgrep).
|
||||||
```
|
```
|
||||||
$ sudo apt-get install ripgrep
|
$ sudo apt-get install ripgrep
|
||||||
@@ -339,7 +334,7 @@ If you're a **NetBSD** user, then you can install ripgrep from
|
|||||||
|
|
||||||
If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
|
If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
|
||||||
|
|
||||||
* Note that the minimum supported version of Rust for ripgrep is **1.32.0**,
|
* Note that the minimum supported version of Rust for ripgrep is **1.34.0**,
|
||||||
although ripgrep may work with older versions.
|
although ripgrep may work with older versions.
|
||||||
* Note that the binary may be bigger than expected because it contains debug
|
* Note that the binary may be bigger than expected because it contains debug
|
||||||
symbols. This is intentional. To remove debug symbols and therefore reduce
|
symbols. This is intentional. To remove debug symbols and therefore reduce
|
||||||
@@ -349,18 +344,12 @@ If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
|
|||||||
$ cargo install ripgrep
|
$ cargo install ripgrep
|
||||||
```
|
```
|
||||||
|
|
||||||
When compiling with Rust 1.27 or newer, this will automatically enable SIMD
|
|
||||||
optimizations for search.
|
|
||||||
|
|
||||||
ripgrep isn't currently in any other package repositories.
|
|
||||||
[I'd like to change that](https://github.com/BurntSushi/ripgrep/issues/10).
|
|
||||||
|
|
||||||
|
|
||||||
### Building
|
### Building
|
||||||
|
|
||||||
ripgrep is written in Rust, so you'll need to grab a
|
ripgrep is written in Rust, so you'll need to grab a
|
||||||
[Rust installation](https://www.rust-lang.org/) in order to compile it.
|
[Rust installation](https://www.rust-lang.org/) in order to compile it.
|
||||||
ripgrep compiles with Rust 1.32.0 (stable) or newer. In general, ripgrep tracks
|
ripgrep compiles with Rust 1.34.0 (stable) or newer. In general, ripgrep tracks
|
||||||
the latest stable release of the Rust compiler.
|
the latest stable release of the Rust compiler.
|
||||||
|
|
||||||
To build ripgrep:
|
To build ripgrep:
|
||||||
@@ -430,3 +419,11 @@ $ cargo test --all
|
|||||||
```
|
```
|
||||||
|
|
||||||
from the repository root.
|
from the repository root.
|
||||||
|
|
||||||
|
|
||||||
|
### Translations
|
||||||
|
|
||||||
|
The following is a list of known translations of ripgrep's documentation. These
|
||||||
|
are unofficially maintained and may not be up to date.
|
||||||
|
|
||||||
|
* [Chinese](https://github.com/chinanf-boy/ripgrep-zh#%E6%9B%B4%E6%96%B0-)
|
||||||
|
@@ -8,12 +8,13 @@ set -ex
|
|||||||
|
|
||||||
# Generate artifacts for release
|
# Generate artifacts for release
|
||||||
mk_artifacts() {
|
mk_artifacts() {
|
||||||
|
CARGO="$(builder)"
|
||||||
if is_arm; then
|
if is_arm; then
|
||||||
cargo build --target "$TARGET" --release
|
"$CARGO" build --target "$TARGET" --release
|
||||||
else
|
else
|
||||||
# Technically, MUSL builds will force PCRE2 to get statically compiled,
|
# Technically, MUSL builds will force PCRE2 to get statically compiled,
|
||||||
# but we also want PCRE2 statically build for macOS binaries.
|
# but we also want PCRE2 statically build for macOS binaries.
|
||||||
PCRE2_SYS_STATIC=1 cargo build --target "$TARGET" --release --features 'pcre2'
|
PCRE2_SYS_STATIC=1 "$CARGO" build --target "$TARGET" --release --features 'pcre2'
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
5
ci/docker/x86_64-unknown-linux-musl/Dockerfile
Normal file
5
ci/docker/x86_64-unknown-linux-musl/Dockerfile
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
FROM rustembedded/cross:x86_64-unknown-linux-musl
|
||||||
|
|
||||||
|
RUN apt-get update \
|
||||||
|
&& apt-get install -y --no-install-recommends \
|
||||||
|
libxslt1-dev asciidoc docbook-xsl xsltproc libxml2-utils
|
@@ -7,11 +7,13 @@ set -ex
|
|||||||
. "$(dirname $0)/utils.sh"
|
. "$(dirname $0)/utils.sh"
|
||||||
|
|
||||||
main() {
|
main() {
|
||||||
|
CARGO="$(builder)"
|
||||||
|
|
||||||
# Test a normal debug build.
|
# Test a normal debug build.
|
||||||
if is_arm; then
|
if is_arm; then
|
||||||
cargo build --target "$TARGET" --verbose
|
"$CARGO" build --target "$TARGET" --verbose
|
||||||
else
|
else
|
||||||
cargo build --target "$TARGET" --verbose --all --features 'pcre2'
|
"$CARGO" build --target "$TARGET" --verbose --all --features 'pcre2'
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Show the output of the most recent build.rs stderr.
|
# Show the output of the most recent build.rs stderr.
|
||||||
@@ -44,7 +46,7 @@ main() {
|
|||||||
"$(dirname "${0}")/test_complete.sh"
|
"$(dirname "${0}")/test_complete.sh"
|
||||||
|
|
||||||
# Run tests for ripgrep and all sub-crates.
|
# Run tests for ripgrep and all sub-crates.
|
||||||
cargo test --target "$TARGET" --verbose --all --features 'pcre2'
|
"$CARGO" test --target "$TARGET" --verbose --all --features 'pcre2'
|
||||||
}
|
}
|
||||||
|
|
||||||
main
|
main
|
||||||
|
25
ci/utils.sh
25
ci/utils.sh
@@ -55,6 +55,13 @@ gcc_prefix() {
|
|||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
|
is_musl() {
|
||||||
|
case "$TARGET" in
|
||||||
|
*-musl) return 0 ;;
|
||||||
|
*) return 1 ;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
is_x86() {
|
is_x86() {
|
||||||
case "$(architecture)" in
|
case "$(architecture)" in
|
||||||
amd64|i386) return 0 ;;
|
amd64|i386) return 0 ;;
|
||||||
@@ -62,6 +69,13 @@ is_x86() {
|
|||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
|
is_x86_64() {
|
||||||
|
case "$(architecture)" in
|
||||||
|
amd64) return 0 ;;
|
||||||
|
*) return 1 ;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
is_arm() {
|
is_arm() {
|
||||||
case "$(architecture)" in
|
case "$(architecture)" in
|
||||||
armhf) return 0 ;;
|
armhf) return 0 ;;
|
||||||
@@ -82,3 +96,14 @@ is_osx() {
|
|||||||
*) return 1 ;;
|
*) return 1 ;;
|
||||||
esac
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
|
builder() {
|
||||||
|
if is_musl && is_x86_64; then
|
||||||
|
# cargo install cross
|
||||||
|
# To work around https://github.com/rust-embedded/cross/issues/357
|
||||||
|
cargo install --git https://github.com/rust-embedded/cross --force
|
||||||
|
echo "cross"
|
||||||
|
else
|
||||||
|
echo "cargo"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
19
complete/_rg
19
complete/_rg
@@ -43,6 +43,7 @@ _rg() {
|
|||||||
+ '(exclusive)' # Misc. fully exclusive options
|
+ '(exclusive)' # Misc. fully exclusive options
|
||||||
'(: * -)'{-h,--help}'[display help information]'
|
'(: * -)'{-h,--help}'[display help information]'
|
||||||
'(: * -)'{-V,--version}'[display version information]'
|
'(: * -)'{-V,--version}'[display version information]'
|
||||||
|
'(: * -)'--pcre2-version'[print the version of PCRE2 used by ripgrep, if available]'
|
||||||
|
|
||||||
+ '(buffered)' # buffering options
|
+ '(buffered)' # buffering options
|
||||||
'--line-buffered[force line buffering]'
|
'--line-buffered[force line buffering]'
|
||||||
@@ -85,7 +86,7 @@ _rg() {
|
|||||||
|
|
||||||
+ '(file-name)' # File-name options
|
+ '(file-name)' # File-name options
|
||||||
{-H,--with-filename}'[show file name for matches]'
|
{-H,--with-filename}'[show file name for matches]'
|
||||||
"--no-filename[don't show file name for matches]"
|
{-I,--no-filename}"[don't show file name for matches]"
|
||||||
|
|
||||||
+ '(file-system)' # File system options
|
+ '(file-system)' # File system options
|
||||||
"--one-file-system[don't descend into directories on other file systems]"
|
"--one-file-system[don't descend into directories on other file systems]"
|
||||||
@@ -103,6 +104,10 @@ _rg() {
|
|||||||
'*'{-g+,--glob=}'[include/exclude files matching specified glob]:glob'
|
'*'{-g+,--glob=}'[include/exclude files matching specified glob]:glob'
|
||||||
'*--iglob=[include/exclude files matching specified case-insensitive glob]:glob'
|
'*--iglob=[include/exclude files matching specified case-insensitive glob]:glob'
|
||||||
|
|
||||||
|
+ '(glob-case-insensitive)' # File-glob case sensitivity options
|
||||||
|
'--glob-case-insensitive[treat -g/--glob patterns case insensitively]'
|
||||||
|
$no'--no-glob-case-insensitive[treat -g/--glob patterns case sensitively]'
|
||||||
|
|
||||||
+ '(heading)' # Heading options
|
+ '(heading)' # Heading options
|
||||||
'(pretty-vimgrep)--heading[show matches grouped by file name]'
|
'(pretty-vimgrep)--heading[show matches grouped by file name]'
|
||||||
"(pretty-vimgrep)--no-heading[don't show matches grouped by file name]"
|
"(pretty-vimgrep)--no-heading[don't show matches grouped by file name]"
|
||||||
@@ -111,6 +116,10 @@ _rg() {
|
|||||||
'--hidden[search hidden files and directories]'
|
'--hidden[search hidden files and directories]'
|
||||||
$no"--no-hidden[don't search hidden files and directories]"
|
$no"--no-hidden[don't search hidden files and directories]"
|
||||||
|
|
||||||
|
+ '(hybrid)' # hybrid regex options
|
||||||
|
'--auto-hybrid-regex[dynamically use PCRE2 if necessary]'
|
||||||
|
$no"--no-auto-hybrid-regex[don't dynamically use PCRE2 if necessary]"
|
||||||
|
|
||||||
+ '(ignore)' # Ignore-file options
|
+ '(ignore)' # Ignore-file options
|
||||||
"(--no-ignore-global --no-ignore-parent --no-ignore-vcs --no-ignore-dot)--no-ignore[don't respect ignore files]"
|
"(--no-ignore-global --no-ignore-parent --no-ignore-vcs --no-ignore-dot)--no-ignore[don't respect ignore files]"
|
||||||
$no'(--ignore-global --ignore-parent --ignore-vcs --ignore-dot)--ignore[respect ignore files]'
|
$no'(--ignore-global --ignore-parent --ignore-vcs --ignore-dot)--ignore[respect ignore files]'
|
||||||
@@ -148,6 +157,10 @@ _rg() {
|
|||||||
$no"--no-crlf[don't use CRLF as line terminator]"
|
$no"--no-crlf[don't use CRLF as line terminator]"
|
||||||
'(text)--null-data[use NUL as line terminator]'
|
'(text)--null-data[use NUL as line terminator]'
|
||||||
|
|
||||||
|
+ '(max-columns-preview)' # max column preview options
|
||||||
|
'--max-columns-preview[show preview for long lines (with -M)]'
|
||||||
|
$no"--no-max-columns-preview[don't show preview for long lines (with -M)]"
|
||||||
|
|
||||||
+ '(max-depth)' # Directory-depth options
|
+ '(max-depth)' # Directory-depth options
|
||||||
'--max-depth=[specify max number of directories to descend]:number of directories'
|
'--max-depth=[specify max number of directories to descend]:number of directories'
|
||||||
'!--maxdepth=:number of directories'
|
'!--maxdepth=:number of directories'
|
||||||
@@ -227,6 +240,8 @@ _rg() {
|
|||||||
|
|
||||||
+ '(text)' # Binary-search options
|
+ '(text)' # Binary-search options
|
||||||
{-a,--text}'[search binary files as if they were text]'
|
{-a,--text}'[search binary files as if they were text]'
|
||||||
|
"--binary[search binary files, don't print binary data]"
|
||||||
|
$no"--no-binary[don't search binary files]"
|
||||||
$no"(--null-data)--no-text[don't search binary files as if they were text]"
|
$no"(--null-data)--no-text[don't search binary files as if they were text]"
|
||||||
|
|
||||||
+ '(threads)' # Thread-count options
|
+ '(threads)' # Thread-count options
|
||||||
@@ -378,7 +393,7 @@ _rg_encodings() {
|
|||||||
shift{-,_}jis csshiftjis {,x-}sjis ms_kanji ms932
|
shift{-,_}jis csshiftjis {,x-}sjis ms_kanji ms932
|
||||||
utf{,-}8 utf-16{,be,le} unicode-1-1-utf-8
|
utf{,-}8 utf-16{,be,le} unicode-1-1-utf-8
|
||||||
windows-{31j,874,949,125{0..8}} dos-874 tis-620 ansi_x3.4-1968
|
windows-{31j,874,949,125{0..8}} dos-874 tis-620 ansi_x3.4-1968
|
||||||
x-user-defined auto
|
x-user-defined auto none
|
||||||
)
|
)
|
||||||
|
|
||||||
_wanted encodings expl encoding compadd -a "$@" - _encodings
|
_wanted encodings expl encoding compadd -a "$@" - _encodings
|
||||||
|
@@ -41,6 +41,9 @@ configuration file. The file can specify one shell argument per line. Lines
|
|||||||
starting with *#* are ignored. For more details, see the man page or the
|
starting with *#* are ignored. For more details, see the man page or the
|
||||||
*README*.
|
*README*.
|
||||||
|
|
||||||
|
Tip: to disable all smart filtering and make ripgrep behave a bit more like
|
||||||
|
classical grep, use *rg -uuu*.
|
||||||
|
|
||||||
|
|
||||||
REGEX SYNTAX
|
REGEX SYNTAX
|
||||||
------------
|
------------
|
||||||
@@ -140,16 +143,16 @@ would behave identically to the following command
|
|||||||
|
|
||||||
same with using globs
|
same with using globs
|
||||||
|
|
||||||
--glob=!git/*
|
--glob=!.git
|
||||||
|
|
||||||
or
|
or
|
||||||
|
|
||||||
--glob
|
--glob
|
||||||
!git/*
|
!.git
|
||||||
|
|
||||||
would behave identically to the following command
|
would behave identically to the following command
|
||||||
|
|
||||||
rg --glob '!git/*' foo
|
rg --glob '!.git' foo
|
||||||
|
|
||||||
ripgrep also provides a flag, *--no-config*, that when present will suppress
|
ripgrep also provides a flag, *--no-config*, that when present will suppress
|
||||||
any and all support for configuration. This includes any future support
|
any and all support for configuration. This includes any future support
|
||||||
@@ -189,6 +192,21 @@ file that is simultaneously truncated. This behavior can be avoided by passing
|
|||||||
the *--no-mmap* flag which will forcefully disable the use of memory maps in
|
the *--no-mmap* flag which will forcefully disable the use of memory maps in
|
||||||
all cases.
|
all cases.
|
||||||
|
|
||||||
|
ripgrep may use a large amount of memory depending on a few factors. Firstly,
|
||||||
|
if ripgrep uses parallelism for search (the default), then the entire output
|
||||||
|
for each individual file is buffered into memory in order to prevent
|
||||||
|
interleaving matches in the output. To avoid this, you can disable parallelism
|
||||||
|
with the *-j1* flag. Secondly, ripgrep always needs to have at least a single
|
||||||
|
line in memory in order to execute a search. A file with a very long line can
|
||||||
|
thus cause ripgrep to use a lot of memory. Generally, this only occurs when
|
||||||
|
searching binary data with the *-a* flag enabled. (When the *-a* flag isn't
|
||||||
|
enabled, ripgrep will replace all NUL bytes with line terminators, which
|
||||||
|
typically prevents exorbitant memory usage.) Thirdly, when ripgrep searches
|
||||||
|
a large file using a memory map, the process will report its resident memory
|
||||||
|
usage as the size of the file. However, this does not mean ripgrep actually
|
||||||
|
needed to use that much memory; the operating system will generally handle this
|
||||||
|
for you.
|
||||||
|
|
||||||
|
|
||||||
VERSION
|
VERSION
|
||||||
-------
|
-------
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "globset"
|
name = "globset"
|
||||||
version = "0.4.2" #:version
|
version = "0.4.4" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
Cross platform single glob and glob set matching. Glob set matching is the
|
Cross platform single glob and glob set matching. Glob set matching is the
|
||||||
@@ -20,13 +20,13 @@ bench = false
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
aho-corasick = "0.7.3"
|
aho-corasick = "0.7.3"
|
||||||
bstr = { version = "0.1.2", default-features = false, features = ["std"] }
|
bstr = { version = "0.2.0", default-features = false, features = ["std"] }
|
||||||
fnv = "1.0.6"
|
fnv = "1.0.6"
|
||||||
log = "0.4.5"
|
log = "0.4.5"
|
||||||
regex = "1.1.5"
|
regex = "1.1.5"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
glob = "0.2.11"
|
glob = "0.3.0"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
simd-accel = []
|
simd-accel = []
|
||||||
|
@@ -120,7 +120,7 @@ impl GlobMatcher {
|
|||||||
|
|
||||||
/// Tests whether the given path matches this pattern or not.
|
/// Tests whether the given path matches this pattern or not.
|
||||||
pub fn is_match_candidate(&self, path: &Candidate) -> bool {
|
pub fn is_match_candidate(&self, path: &Candidate) -> bool {
|
||||||
self.re.is_match(path.path.as_bytes())
|
self.re.is_match(&path.path)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -145,7 +145,7 @@ impl GlobStrategic {
|
|||||||
|
|
||||||
/// Tests whether the given path matches this pattern or not.
|
/// Tests whether the given path matches this pattern or not.
|
||||||
fn is_match_candidate(&self, candidate: &Candidate) -> bool {
|
fn is_match_candidate(&self, candidate: &Candidate) -> bool {
|
||||||
let byte_path = candidate.path.as_bytes();
|
let byte_path = &*candidate.path;
|
||||||
|
|
||||||
match self.strategy {
|
match self.strategy {
|
||||||
MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
|
MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
|
||||||
|
@@ -119,7 +119,7 @@ use std::path::Path;
|
|||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
use aho_corasick::AhoCorasick;
|
use aho_corasick::AhoCorasick;
|
||||||
use bstr::{B, BStr, BString};
|
use bstr::{B, ByteSlice, ByteVec};
|
||||||
use regex::bytes::{Regex, RegexBuilder, RegexSet};
|
use regex::bytes::{Regex, RegexBuilder, RegexSet};
|
||||||
|
|
||||||
use pathutil::{file_name, file_name_ext, normalize_path};
|
use pathutil::{file_name, file_name_ext, normalize_path};
|
||||||
@@ -490,15 +490,15 @@ impl GlobSetBuilder {
|
|||||||
/// path against multiple globs or sets of globs.
|
/// path against multiple globs or sets of globs.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct Candidate<'a> {
|
pub struct Candidate<'a> {
|
||||||
path: Cow<'a, BStr>,
|
path: Cow<'a, [u8]>,
|
||||||
basename: Cow<'a, BStr>,
|
basename: Cow<'a, [u8]>,
|
||||||
ext: Cow<'a, BStr>,
|
ext: Cow<'a, [u8]>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Candidate<'a> {
|
impl<'a> Candidate<'a> {
|
||||||
/// Create a new candidate for matching from the given path.
|
/// Create a new candidate for matching from the given path.
|
||||||
pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
|
pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
|
||||||
let path = normalize_path(BString::from_path_lossy(path.as_ref()));
|
let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
|
||||||
let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
|
let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
|
||||||
let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
|
let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
|
||||||
Candidate {
|
Candidate {
|
||||||
@@ -508,7 +508,7 @@ impl<'a> Candidate<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn path_prefix(&self, max: usize) -> &BStr {
|
fn path_prefix(&self, max: usize) -> &[u8] {
|
||||||
if self.path.len() <= max {
|
if self.path.len() <= max {
|
||||||
&*self.path
|
&*self.path
|
||||||
} else {
|
} else {
|
||||||
@@ -516,7 +516,7 @@ impl<'a> Candidate<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn path_suffix(&self, max: usize) -> &BStr {
|
fn path_suffix(&self, max: usize) -> &[u8] {
|
||||||
if self.path.len() <= max {
|
if self.path.len() <= max {
|
||||||
&*self.path
|
&*self.path
|
||||||
} else {
|
} else {
|
||||||
|
@@ -1,15 +1,15 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use bstr::BStr;
|
use bstr::{ByteSlice, ByteVec};
|
||||||
|
|
||||||
/// The final component of the path, if it is a normal file.
|
/// The final component of the path, if it is a normal file.
|
||||||
///
|
///
|
||||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||||
/// file_name will return None.
|
/// file_name will return None.
|
||||||
pub fn file_name<'a>(path: &Cow<'a, BStr>) -> Option<Cow<'a, BStr>> {
|
pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
|
||||||
if path.is_empty() {
|
if path.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
} else if path.last() == Some(b'.') {
|
} else if path.last_byte() == Some(b'.') {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0);
|
let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0);
|
||||||
@@ -39,7 +39,7 @@ pub fn file_name<'a>(path: &Cow<'a, BStr>) -> Option<Cow<'a, BStr>> {
|
|||||||
/// a pattern like `*.rs` is obviously trying to match files with a `rs`
|
/// a pattern like `*.rs` is obviously trying to match files with a `rs`
|
||||||
/// extension, but it also matches files like `.rs`, which doesn't have an
|
/// extension, but it also matches files like `.rs`, which doesn't have an
|
||||||
/// extension according to std::path::Path::extension.
|
/// extension according to std::path::Path::extension.
|
||||||
pub fn file_name_ext<'a>(name: &Cow<'a, BStr>) -> Option<Cow<'a, BStr>> {
|
pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
|
||||||
if name.is_empty() {
|
if name.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
@@ -60,7 +60,7 @@ pub fn file_name_ext<'a>(name: &Cow<'a, BStr>) -> Option<Cow<'a, BStr>> {
|
|||||||
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
|
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
|
||||||
/// that recognize other characters as separators.
|
/// that recognize other characters as separators.
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
pub fn normalize_path(path: Cow<BStr>) -> Cow<BStr> {
|
pub fn normalize_path(path: Cow<[u8]>) -> Cow<[u8]> {
|
||||||
// UNIX only uses /, so we're good.
|
// UNIX only uses /, so we're good.
|
||||||
path
|
path
|
||||||
}
|
}
|
||||||
@@ -68,7 +68,7 @@ pub fn normalize_path(path: Cow<BStr>) -> Cow<BStr> {
|
|||||||
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
|
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
|
||||||
/// that recognize other characters as separators.
|
/// that recognize other characters as separators.
|
||||||
#[cfg(not(unix))]
|
#[cfg(not(unix))]
|
||||||
pub fn normalize_path(mut path: Cow<BStr>) -> Cow<BStr> {
|
pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
|
||||||
use std::path::is_separator;
|
use std::path::is_separator;
|
||||||
|
|
||||||
for i in 0..path.len() {
|
for i in 0..path.len() {
|
||||||
@@ -84,7 +84,7 @@ pub fn normalize_path(mut path: Cow<BStr>) -> Cow<BStr> {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use bstr::{B, BString};
|
use bstr::{B, ByteVec};
|
||||||
|
|
||||||
use super::{file_name_ext, normalize_path};
|
use super::{file_name_ext, normalize_path};
|
||||||
|
|
||||||
@@ -92,7 +92,7 @@ mod tests {
|
|||||||
($name:ident, $file_name:expr, $ext:expr) => {
|
($name:ident, $file_name:expr, $ext:expr) => {
|
||||||
#[test]
|
#[test]
|
||||||
fn $name() {
|
fn $name() {
|
||||||
let bs = BString::from($file_name);
|
let bs = Vec::from($file_name);
|
||||||
let got = file_name_ext(&Cow::Owned(bs));
|
let got = file_name_ext(&Cow::Owned(bs));
|
||||||
assert_eq!($ext.map(|s| Cow::Borrowed(B(s))), got);
|
assert_eq!($ext.map(|s| Cow::Borrowed(B(s))), got);
|
||||||
}
|
}
|
||||||
@@ -109,7 +109,7 @@ mod tests {
|
|||||||
($name:ident, $path:expr, $expected:expr) => {
|
($name:ident, $path:expr, $expected:expr) => {
|
||||||
#[test]
|
#[test]
|
||||||
fn $name() {
|
fn $name() {
|
||||||
let bs = BString::from_slice($path);
|
let bs = Vec::from_slice($path);
|
||||||
let got = normalize_path(Cow::Owned(bs));
|
let got = normalize_path(Cow::Owned(bs));
|
||||||
assert_eq!($expected.to_vec(), got.into_owned());
|
assert_eq!($expected.to_vec(), got.into_owned());
|
||||||
}
|
}
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "grep-cli"
|
name = "grep-cli"
|
||||||
version = "0.1.1" #:version
|
version = "0.1.3" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
Utilities for search oriented command line applications.
|
Utilities for search oriented command line applications.
|
||||||
@@ -14,8 +14,8 @@ license = "Unlicense/MIT"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
atty = "0.2.11"
|
atty = "0.2.11"
|
||||||
bstr = "0.1.2"
|
bstr = "0.2.0"
|
||||||
globset = { version = "0.4.2", path = "../globset" }
|
globset = { version = "0.4.3", path = "../globset" }
|
||||||
lazy_static = "1.1.0"
|
lazy_static = "1.1.0"
|
||||||
log = "0.4.5"
|
log = "0.4.5"
|
||||||
regex = "1.1"
|
regex = "1.1"
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
use std::ffi::OsStr;
|
use std::ffi::OsStr;
|
||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
use bstr::{BStr, BString};
|
use bstr::{ByteSlice, ByteVec};
|
||||||
|
|
||||||
/// A single state in the state machine used by `unescape`.
|
/// A single state in the state machine used by `unescape`.
|
||||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||||
@@ -38,7 +38,6 @@ enum State {
|
|||||||
/// assert_eq!(r"foo\nbar\xFFbaz", escape(b"foo\nbar\xFFbaz"));
|
/// assert_eq!(r"foo\nbar\xFFbaz", escape(b"foo\nbar\xFFbaz"));
|
||||||
/// ```
|
/// ```
|
||||||
pub fn escape(bytes: &[u8]) -> String {
|
pub fn escape(bytes: &[u8]) -> String {
|
||||||
let bytes = BStr::new(bytes);
|
|
||||||
let mut escaped = String::new();
|
let mut escaped = String::new();
|
||||||
for (s, e, ch) in bytes.char_indices() {
|
for (s, e, ch) in bytes.char_indices() {
|
||||||
if ch == '\u{FFFD}' {
|
if ch == '\u{FFFD}' {
|
||||||
@@ -56,7 +55,7 @@ pub fn escape(bytes: &[u8]) -> String {
|
|||||||
///
|
///
|
||||||
/// This is like [`escape`](fn.escape.html), but accepts an OS string.
|
/// This is like [`escape`](fn.escape.html), but accepts an OS string.
|
||||||
pub fn escape_os(string: &OsStr) -> String {
|
pub fn escape_os(string: &OsStr) -> String {
|
||||||
escape(BString::from_os_str_lossy(string).as_bytes())
|
escape(Vec::from_os_str_lossy(string).as_bytes())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Unescapes a string.
|
/// Unescapes a string.
|
||||||
@@ -111,7 +110,7 @@ pub fn unescape(s: &str) -> Vec<u8> {
|
|||||||
}
|
}
|
||||||
HexFirst => {
|
HexFirst => {
|
||||||
match c {
|
match c {
|
||||||
'0'...'9' | 'A'...'F' | 'a'...'f' => {
|
'0'..='9' | 'A'..='F' | 'a'..='f' => {
|
||||||
state = HexSecond(c);
|
state = HexSecond(c);
|
||||||
}
|
}
|
||||||
c => {
|
c => {
|
||||||
@@ -122,7 +121,7 @@ pub fn unescape(s: &str) -> Vec<u8> {
|
|||||||
}
|
}
|
||||||
HexSecond(first) => {
|
HexSecond(first) => {
|
||||||
match c {
|
match c {
|
||||||
'0'...'9' | 'A'...'F' | 'a'...'f' => {
|
'0'..='9' | 'A'..='F' | 'a'..='f' => {
|
||||||
let ordinal = format!("{}{}", first, c);
|
let ordinal = format!("{}{}", first, c);
|
||||||
let byte = u8::from_str_radix(&ordinal, 16).unwrap();
|
let byte = u8::from_str_radix(&ordinal, 16).unwrap();
|
||||||
bytes.push(byte);
|
bytes.push(byte);
|
||||||
@@ -174,7 +173,7 @@ fn escape_char(cp: char, into: &mut String) {
|
|||||||
/// Adds the given byte to the given string, escaping it if necessary.
|
/// Adds the given byte to the given string, escaping it if necessary.
|
||||||
fn escape_byte(byte: u8, into: &mut String) {
|
fn escape_byte(byte: u8, into: &mut String) {
|
||||||
match byte {
|
match byte {
|
||||||
0x21...0x5B | 0x5D...0x7D => into.push(byte as char),
|
0x21..=0x5B | 0x5D..=0x7D => into.push(byte as char),
|
||||||
b'\n' => into.push_str(r"\n"),
|
b'\n' => into.push_str(r"\n"),
|
||||||
b'\r' => into.push_str(r"\r"),
|
b'\r' => into.push_str(r"\r"),
|
||||||
b'\t' => into.push_str(r"\t"),
|
b'\t' => into.push_str(r"\t"),
|
||||||
|
@@ -2,10 +2,12 @@ use std::error;
|
|||||||
use std::ffi::OsStr;
|
use std::ffi::OsStr;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{self, BufRead};
|
use std::io;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
|
use bstr::io::BufReadExt;
|
||||||
|
|
||||||
use escape::{escape, escape_os};
|
use escape::{escape, escape_os};
|
||||||
|
|
||||||
/// An error that occurs when a pattern could not be converted to valid UTF-8.
|
/// An error that occurs when a pattern could not be converted to valid UTF-8.
|
||||||
@@ -156,28 +158,22 @@ pub fn patterns_from_stdin() -> io::Result<Vec<String>> {
|
|||||||
/// ```
|
/// ```
|
||||||
pub fn patterns_from_reader<R: io::Read>(rdr: R) -> io::Result<Vec<String>> {
|
pub fn patterns_from_reader<R: io::Read>(rdr: R) -> io::Result<Vec<String>> {
|
||||||
let mut patterns = vec![];
|
let mut patterns = vec![];
|
||||||
let mut bufrdr = io::BufReader::new(rdr);
|
|
||||||
let mut line = vec![];
|
|
||||||
let mut line_number = 0;
|
let mut line_number = 0;
|
||||||
while {
|
io::BufReader::new(rdr).for_byte_line(|line| {
|
||||||
line.clear();
|
|
||||||
line_number += 1;
|
line_number += 1;
|
||||||
bufrdr.read_until(b'\n', &mut line)? > 0
|
match pattern_from_bytes(line) {
|
||||||
} {
|
Ok(pattern) => {
|
||||||
line.pop().unwrap(); // remove trailing '\n'
|
patterns.push(pattern.to_string());
|
||||||
if line.last() == Some(&b'\r') {
|
Ok(true)
|
||||||
line.pop().unwrap();
|
|
||||||
}
|
}
|
||||||
match pattern_from_bytes(&line) {
|
|
||||||
Ok(pattern) => patterns.push(pattern.to_string()),
|
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
return Err(io::Error::new(
|
Err(io::Error::new(
|
||||||
io::ErrorKind::Other,
|
io::ErrorKind::Other,
|
||||||
format!("{}: {}", line_number, err),
|
format!("{}: {}", line_number, err),
|
||||||
));
|
))
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
})?;
|
||||||
Ok(patterns)
|
Ok(patterns)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "grep-matcher"
|
name = "grep-matcher"
|
||||||
version = "0.1.1" #:version
|
version = "0.1.3" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
A trait for regular expressions, with a focus on line oriented search.
|
A trait for regular expressions, with a focus on line oriented search.
|
||||||
|
@@ -134,7 +134,7 @@ fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
|
|||||||
/// Returns true if and only if the given byte is allowed in a capture name.
|
/// Returns true if and only if the given byte is allowed in a capture name.
|
||||||
fn is_valid_cap_letter(b: &u8) -> bool {
|
fn is_valid_cap_letter(b: &u8) -> bool {
|
||||||
match *b {
|
match *b {
|
||||||
b'0' ... b'9' | b'a' ... b'z' | b'A' ... b'Z' | b'_' => true,
|
b'0' ..= b'9' | b'a' ..= b'z' | b'A' ..= b'Z' | b'_' => true,
|
||||||
_ => false,
|
_ => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "grep-pcre2"
|
name = "grep-pcre2"
|
||||||
version = "0.1.2" #:version
|
version = "0.1.3" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
Use PCRE2 with the 'grep' crate.
|
Use PCRE2 with the 'grep' crate.
|
||||||
@@ -13,5 +13,5 @@ keywords = ["regex", "grep", "pcre", "backreference", "look"]
|
|||||||
license = "Unlicense/MIT"
|
license = "Unlicense/MIT"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
|
grep-matcher = { version = "0.1.2", path = "../grep-matcher" }
|
||||||
pcre2 = "0.1.1"
|
pcre2 = "0.2.0"
|
||||||
|
@@ -10,6 +10,7 @@ extern crate pcre2;
|
|||||||
|
|
||||||
pub use error::{Error, ErrorKind};
|
pub use error::{Error, ErrorKind};
|
||||||
pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
||||||
|
pub use pcre2::{is_jit_available, version};
|
||||||
|
|
||||||
mod error;
|
mod error;
|
||||||
mod matcher;
|
mod matcher;
|
||||||
|
@@ -227,6 +227,27 @@ impl RegexMatcherBuilder {
|
|||||||
self.builder.jit_if_available(yes);
|
self.builder.jit_if_available(yes);
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the maximum size of PCRE2's JIT stack, in bytes. If the JIT is
|
||||||
|
/// not enabled, then this has no effect.
|
||||||
|
///
|
||||||
|
/// When `None` is given, no custom JIT stack will be created, and instead,
|
||||||
|
/// the default JIT stack is used. When the default is used, its maximum
|
||||||
|
/// size is 32 KB.
|
||||||
|
///
|
||||||
|
/// When this is set, then a new JIT stack will be created with the given
|
||||||
|
/// maximum size as its limit.
|
||||||
|
///
|
||||||
|
/// Increasing the stack size can be useful for larger regular expressions.
|
||||||
|
///
|
||||||
|
/// By default, this is set to `None`.
|
||||||
|
pub fn max_jit_stack_size(
|
||||||
|
&mut self,
|
||||||
|
bytes: Option<usize>,
|
||||||
|
) -> &mut RegexMatcherBuilder {
|
||||||
|
self.builder.max_jit_stack_size(bytes);
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An implementation of the `Matcher` trait using PCRE2.
|
/// An implementation of the `Matcher` trait using PCRE2.
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "grep-printer"
|
name = "grep-printer"
|
||||||
version = "0.1.1" #:version
|
version = "0.1.3" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
An implementation of the grep crate's Sink trait that provides standard
|
An implementation of the grep crate's Sink trait that provides standard
|
||||||
@@ -18,14 +18,14 @@ default = ["serde1"]
|
|||||||
serde1 = ["base64", "serde", "serde_derive", "serde_json"]
|
serde1 = ["base64", "serde", "serde_derive", "serde_json"]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
base64 = { version = "0.10.0", optional = true }
|
base64 = { version = "0.11.0", optional = true }
|
||||||
bstr = "0.1.2"
|
bstr = "0.2.0"
|
||||||
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
|
grep-matcher = { version = "0.1.2", path = "../grep-matcher" }
|
||||||
grep-searcher = { version = "0.1.1", path = "../grep-searcher" }
|
grep-searcher = { version = "0.1.4", path = "../grep-searcher" }
|
||||||
termcolor = "1.0.4"
|
termcolor = "1.0.4"
|
||||||
serde = { version = "1.0.77", optional = true }
|
serde = { version = "1.0.77", optional = true }
|
||||||
serde_derive = { version = "1.0.77", optional = true }
|
serde_derive = { version = "1.0.77", optional = true }
|
||||||
serde_json = { version = "1.0.27", optional = true }
|
serde_json = { version = "1.0.27", optional = true }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
grep-regex = { version = "0.1.1", path = "../grep-regex" }
|
grep-regex = { version = "0.1.3", path = "../grep-regex" }
|
||||||
|
@@ -5,6 +5,7 @@ use std::path::Path;
|
|||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
|
use bstr::ByteSlice;
|
||||||
use grep_matcher::{Match, Matcher};
|
use grep_matcher::{Match, Matcher};
|
||||||
use grep_searcher::{
|
use grep_searcher::{
|
||||||
LineStep, Searcher,
|
LineStep, Searcher,
|
||||||
@@ -16,10 +17,7 @@ use termcolor::{ColorSpec, NoColor, WriteColor};
|
|||||||
use color::ColorSpecs;
|
use color::ColorSpecs;
|
||||||
use counter::CounterWriter;
|
use counter::CounterWriter;
|
||||||
use stats::Stats;
|
use stats::Stats;
|
||||||
use util::{
|
use util::{PrinterPath, Replacer, Sunk, trim_ascii_prefix};
|
||||||
PrinterPath, Replacer, Sunk,
|
|
||||||
trim_ascii_prefix, trim_ascii_prefix_range,
|
|
||||||
};
|
|
||||||
|
|
||||||
/// The configuration for the standard printer.
|
/// The configuration for the standard printer.
|
||||||
///
|
///
|
||||||
@@ -36,6 +34,7 @@ struct Config {
|
|||||||
per_match: bool,
|
per_match: bool,
|
||||||
replacement: Arc<Option<Vec<u8>>>,
|
replacement: Arc<Option<Vec<u8>>>,
|
||||||
max_columns: Option<u64>,
|
max_columns: Option<u64>,
|
||||||
|
max_columns_preview: bool,
|
||||||
max_matches: Option<u64>,
|
max_matches: Option<u64>,
|
||||||
column: bool,
|
column: bool,
|
||||||
byte_offset: bool,
|
byte_offset: bool,
|
||||||
@@ -59,6 +58,7 @@ impl Default for Config {
|
|||||||
per_match: false,
|
per_match: false,
|
||||||
replacement: Arc::new(None),
|
replacement: Arc::new(None),
|
||||||
max_columns: None,
|
max_columns: None,
|
||||||
|
max_columns_preview: false,
|
||||||
max_matches: None,
|
max_matches: None,
|
||||||
column: false,
|
column: false,
|
||||||
byte_offset: false,
|
byte_offset: false,
|
||||||
@@ -263,6 +263,21 @@ impl StandardBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// When enabled, if a line is found to be over the configured maximum
|
||||||
|
/// column limit (measured in terms of bytes), then a preview of the long
|
||||||
|
/// line will be printed instead.
|
||||||
|
///
|
||||||
|
/// The preview will correspond to the first `N` *grapheme clusters* of
|
||||||
|
/// the line, where `N` is the limit configured by `max_columns`.
|
||||||
|
///
|
||||||
|
/// If no limit is set, then enabling this has no effect.
|
||||||
|
///
|
||||||
|
/// This is disabled by default.
|
||||||
|
pub fn max_columns_preview(&mut self, yes: bool) -> &mut StandardBuilder {
|
||||||
|
self.config.max_columns_preview = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Set the maximum amount of matching lines that are printed.
|
/// Set the maximum amount of matching lines that are printed.
|
||||||
///
|
///
|
||||||
/// If multi line search is enabled and a match spans multiple lines, then
|
/// If multi line search is enabled and a match spans multiple lines, then
|
||||||
@@ -743,6 +758,11 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
|||||||
stats.add_matches(self.standard.matches.len() as u64);
|
stats.add_matches(self.standard.matches.len() as u64);
|
||||||
stats.add_matched_lines(mat.lines().count() as u64);
|
stats.add_matched_lines(mat.lines().count() as u64);
|
||||||
}
|
}
|
||||||
|
if searcher.binary_detection().convert_byte().is_some() {
|
||||||
|
if self.binary_byte_offset.is_some() {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
StandardImpl::from_match(searcher, self, mat).sink()?;
|
StandardImpl::from_match(searcher, self, mat).sink()?;
|
||||||
Ok(!self.should_quit())
|
Ok(!self.should_quit())
|
||||||
@@ -764,6 +784,12 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
|||||||
self.record_matches(ctx.bytes())?;
|
self.record_matches(ctx.bytes())?;
|
||||||
self.replace(ctx.bytes())?;
|
self.replace(ctx.bytes())?;
|
||||||
}
|
}
|
||||||
|
if searcher.binary_detection().convert_byte().is_some() {
|
||||||
|
if self.binary_byte_offset.is_some() {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
StandardImpl::from_context(searcher, self, ctx).sink()?;
|
StandardImpl::from_context(searcher, self, ctx).sink()?;
|
||||||
Ok(!self.should_quit())
|
Ok(!self.should_quit())
|
||||||
}
|
}
|
||||||
@@ -776,6 +802,15 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
|||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn binary_data(
|
||||||
|
&mut self,
|
||||||
|
_searcher: &Searcher,
|
||||||
|
binary_byte_offset: u64,
|
||||||
|
) -> Result<bool, io::Error> {
|
||||||
|
self.binary_byte_offset = Some(binary_byte_offset);
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
fn begin(
|
fn begin(
|
||||||
&mut self,
|
&mut self,
|
||||||
_searcher: &Searcher,
|
_searcher: &Searcher,
|
||||||
@@ -793,10 +828,12 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
|||||||
|
|
||||||
fn finish(
|
fn finish(
|
||||||
&mut self,
|
&mut self,
|
||||||
_searcher: &Searcher,
|
searcher: &Searcher,
|
||||||
finish: &SinkFinish,
|
finish: &SinkFinish,
|
||||||
) -> Result<(), io::Error> {
|
) -> Result<(), io::Error> {
|
||||||
self.binary_byte_offset = finish.binary_byte_offset();
|
if let Some(offset) = self.binary_byte_offset {
|
||||||
|
StandardImpl::new(searcher, self).write_binary_message(offset)?;
|
||||||
|
}
|
||||||
if let Some(stats) = self.stats.as_mut() {
|
if let Some(stats) = self.stats.as_mut() {
|
||||||
stats.add_elapsed(self.start_time.elapsed());
|
stats.add_elapsed(self.start_time.elapsed());
|
||||||
stats.add_searches(1);
|
stats.add_searches(1);
|
||||||
@@ -992,7 +1029,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
let mut stepper = LineStep::new(line_term, 0, bytes.len());
|
let mut stepper = LineStep::new(line_term, 0, bytes.len());
|
||||||
while let Some((start, end)) = stepper.next(bytes) {
|
while let Some((start, end)) = stepper.next(bytes) {
|
||||||
let mut line = Match::new(start, end);
|
let line = Match::new(start, end);
|
||||||
self.write_prelude(
|
self.write_prelude(
|
||||||
self.sunk.absolute_byte_offset() + line.start() as u64,
|
self.sunk.absolute_byte_offset() + line.start() as u64,
|
||||||
self.sunk.line_number().map(|n| n + count),
|
self.sunk.line_number().map(|n| n + count),
|
||||||
@@ -1000,44 +1037,12 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
)?;
|
)?;
|
||||||
count += 1;
|
count += 1;
|
||||||
if self.exceeds_max_columns(&bytes[line]) {
|
if self.exceeds_max_columns(&bytes[line]) {
|
||||||
self.write_exceeded_line()?;
|
self.write_exceeded_line(bytes, line, matches, &mut midx)?;
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if self.has_line_terminator(&bytes[line]) {
|
|
||||||
line = line.with_end(line.end() - 1);
|
|
||||||
}
|
|
||||||
if self.config().trim_ascii {
|
|
||||||
line = self.trim_ascii_prefix_range(bytes, line);
|
|
||||||
}
|
|
||||||
|
|
||||||
while !line.is_empty() {
|
|
||||||
if matches[midx].end() <= line.start() {
|
|
||||||
if midx + 1 < matches.len() {
|
|
||||||
midx += 1;
|
|
||||||
continue;
|
|
||||||
} else {
|
} else {
|
||||||
self.end_color_match()?;
|
self.write_colored_matches(bytes, line, matches, &mut midx)?;
|
||||||
self.write(&bytes[line])?;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let m = matches[midx];
|
|
||||||
|
|
||||||
if line.start() < m.start() {
|
|
||||||
let upto = cmp::min(line.end(), m.start());
|
|
||||||
self.end_color_match()?;
|
|
||||||
self.write(&bytes[line.with_end(upto)])?;
|
|
||||||
line = line.with_start(upto);
|
|
||||||
} else {
|
|
||||||
let upto = cmp::min(line.end(), m.end());
|
|
||||||
self.start_color_match()?;
|
|
||||||
self.write(&bytes[line.with_end(upto)])?;
|
|
||||||
line = line.with_start(upto);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self.end_color_match()?;
|
|
||||||
self.write_line_term()?;
|
self.write_line_term()?;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1051,12 +1056,8 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
let mut stepper = LineStep::new(line_term, 0, bytes.len());
|
let mut stepper = LineStep::new(line_term, 0, bytes.len());
|
||||||
while let Some((start, end)) = stepper.next(bytes) {
|
while let Some((start, end)) = stepper.next(bytes) {
|
||||||
let mut line = Match::new(start, end);
|
let mut line = Match::new(start, end);
|
||||||
if self.has_line_terminator(&bytes[line]) {
|
self.trim_line_terminator(bytes, &mut line);
|
||||||
line = line.with_end(line.end() - 1);
|
self.trim_ascii_prefix(bytes, &mut line);
|
||||||
}
|
|
||||||
if self.config().trim_ascii {
|
|
||||||
line = self.trim_ascii_prefix_range(bytes, line);
|
|
||||||
}
|
|
||||||
while !line.is_empty() {
|
while !line.is_empty() {
|
||||||
if matches[midx].end() <= line.start() {
|
if matches[midx].end() <= line.start() {
|
||||||
if midx + 1 < matches.len() {
|
if midx + 1 < matches.len() {
|
||||||
@@ -1079,16 +1080,21 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
Some(m.start() as u64 + 1),
|
Some(m.start() as u64 + 1),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let buf = &bytes[line.with_end(upto)];
|
let this_line = line.with_end(upto);
|
||||||
line = line.with_start(upto);
|
line = line.with_start(upto);
|
||||||
if self.exceeds_max_columns(&buf) {
|
if self.exceeds_max_columns(&bytes[this_line]) {
|
||||||
self.write_exceeded_line()?;
|
self.write_exceeded_line(
|
||||||
continue;
|
bytes,
|
||||||
}
|
this_line,
|
||||||
self.write_spec(spec, buf)?;
|
matches,
|
||||||
|
&mut midx,
|
||||||
|
)?;
|
||||||
|
} else {
|
||||||
|
self.write_spec(spec, &bytes[this_line])?;
|
||||||
self.write_line_term()?;
|
self.write_line_term()?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
count += 1;
|
count += 1;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -1099,7 +1105,6 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
let spec = self.config().colors.matched();
|
let spec = self.config().colors.matched();
|
||||||
let bytes = self.sunk.bytes();
|
let bytes = self.sunk.bytes();
|
||||||
for &m in self.sunk.matches() {
|
for &m in self.sunk.matches() {
|
||||||
let mut m = m;
|
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
let mut stepper = LineStep::new(line_term, 0, bytes.len());
|
let mut stepper = LineStep::new(line_term, 0, bytes.len());
|
||||||
while let Some((start, end)) = stepper.next(bytes) {
|
while let Some((start, end)) = stepper.next(bytes) {
|
||||||
@@ -1117,15 +1122,11 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
)?;
|
)?;
|
||||||
count += 1;
|
count += 1;
|
||||||
if self.exceeds_max_columns(&bytes[line]) {
|
if self.exceeds_max_columns(&bytes[line]) {
|
||||||
self.write_exceeded_line()?;
|
self.write_exceeded_line(bytes, line, &[m], &mut 0)?;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if self.has_line_terminator(&bytes[line]) {
|
self.trim_line_terminator(bytes, &mut line);
|
||||||
line = line.with_end(line.end() - 1);
|
self.trim_ascii_prefix(bytes, &mut line);
|
||||||
}
|
|
||||||
if self.config().trim_ascii {
|
|
||||||
line = self.trim_ascii_prefix_range(bytes, line);
|
|
||||||
}
|
|
||||||
|
|
||||||
while !line.is_empty() {
|
while !line.is_empty() {
|
||||||
if m.end() <= line.start() {
|
if m.end() <= line.start() {
|
||||||
@@ -1182,7 +1183,10 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
line: &[u8],
|
line: &[u8],
|
||||||
) -> io::Result<()> {
|
) -> io::Result<()> {
|
||||||
if self.exceeds_max_columns(line) {
|
if self.exceeds_max_columns(line) {
|
||||||
self.write_exceeded_line()?;
|
let range = Match::new(0, line.len());
|
||||||
|
self.write_exceeded_line(
|
||||||
|
line, range, self.sunk.matches(), &mut 0,
|
||||||
|
)?;
|
||||||
} else {
|
} else {
|
||||||
self.write_trim(line)?;
|
self.write_trim(line)?;
|
||||||
if !self.has_line_terminator(line) {
|
if !self.has_line_terminator(line) {
|
||||||
@@ -1195,50 +1199,114 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
fn write_colored_line(
|
fn write_colored_line(
|
||||||
&self,
|
&self,
|
||||||
matches: &[Match],
|
matches: &[Match],
|
||||||
line: &[u8],
|
bytes: &[u8],
|
||||||
) -> io::Result<()> {
|
) -> io::Result<()> {
|
||||||
// If we know we aren't going to emit color, then we can go faster.
|
// If we know we aren't going to emit color, then we can go faster.
|
||||||
let spec = self.config().colors.matched();
|
let spec = self.config().colors.matched();
|
||||||
if !self.wtr().borrow().supports_color() || spec.is_none() {
|
if !self.wtr().borrow().supports_color() || spec.is_none() {
|
||||||
return self.write_line(line);
|
return self.write_line(bytes);
|
||||||
}
|
|
||||||
if self.exceeds_max_columns(line) {
|
|
||||||
return self.write_exceeded_line();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut last_written =
|
let line = Match::new(0, bytes.len());
|
||||||
if !self.config().trim_ascii {
|
if self.exceeds_max_columns(bytes) {
|
||||||
0
|
self.write_exceeded_line(bytes, line, matches, &mut 0)
|
||||||
} else {
|
} else {
|
||||||
self.trim_ascii_prefix_range(
|
self.write_colored_matches(bytes, line, matches, &mut 0)?;
|
||||||
line,
|
|
||||||
Match::new(0, line.len()),
|
|
||||||
).start()
|
|
||||||
};
|
|
||||||
for mut m in matches.iter().map(|&m| m) {
|
|
||||||
if last_written < m.start() {
|
|
||||||
self.end_color_match()?;
|
|
||||||
self.write(&line[last_written..m.start()])?;
|
|
||||||
} else if last_written < m.end() {
|
|
||||||
m = m.with_start(last_written);
|
|
||||||
} else {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if !m.is_empty() {
|
|
||||||
self.start_color_match()?;
|
|
||||||
self.write(&line[m])?;
|
|
||||||
}
|
|
||||||
last_written = m.end();
|
|
||||||
}
|
|
||||||
self.end_color_match()?;
|
|
||||||
self.write(&line[last_written..])?;
|
|
||||||
if !self.has_line_terminator(line) {
|
|
||||||
self.write_line_term()?;
|
self.write_line_term()?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Write the `line` portion of `bytes`, with appropriate coloring for
|
||||||
|
/// each `match`, starting at `match_index`.
|
||||||
|
///
|
||||||
|
/// This accounts for trimming any whitespace prefix and will *never* print
|
||||||
|
/// a line terminator. If a match exceeds the range specified by `line`,
|
||||||
|
/// then only the part of the match within `line` (if any) is printed.
|
||||||
|
fn write_colored_matches(
|
||||||
|
&self,
|
||||||
|
bytes: &[u8],
|
||||||
|
mut line: Match,
|
||||||
|
matches: &[Match],
|
||||||
|
match_index: &mut usize,
|
||||||
|
) -> io::Result<()> {
|
||||||
|
self.trim_line_terminator(bytes, &mut line);
|
||||||
|
self.trim_ascii_prefix(bytes, &mut line);
|
||||||
|
if matches.is_empty() {
|
||||||
|
self.write(&bytes[line])?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
while !line.is_empty() {
|
||||||
|
if matches[*match_index].end() <= line.start() {
|
||||||
|
if *match_index + 1 < matches.len() {
|
||||||
|
*match_index += 1;
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
self.end_color_match()?;
|
||||||
|
self.write(&bytes[line])?;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let m = matches[*match_index];
|
||||||
|
if line.start() < m.start() {
|
||||||
|
let upto = cmp::min(line.end(), m.start());
|
||||||
|
self.end_color_match()?;
|
||||||
|
self.write(&bytes[line.with_end(upto)])?;
|
||||||
|
line = line.with_start(upto);
|
||||||
|
} else {
|
||||||
|
let upto = cmp::min(line.end(), m.end());
|
||||||
|
self.start_color_match()?;
|
||||||
|
self.write(&bytes[line.with_end(upto)])?;
|
||||||
|
line = line.with_start(upto);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
self.end_color_match()?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write_exceeded_line(&self) -> io::Result<()> {
|
fn write_exceeded_line(
|
||||||
|
&self,
|
||||||
|
bytes: &[u8],
|
||||||
|
mut line: Match,
|
||||||
|
matches: &[Match],
|
||||||
|
match_index: &mut usize,
|
||||||
|
) -> io::Result<()> {
|
||||||
|
if self.config().max_columns_preview {
|
||||||
|
let original = line;
|
||||||
|
let end = bytes[line]
|
||||||
|
.grapheme_indices()
|
||||||
|
.map(|(_, end, _)| end)
|
||||||
|
.take(self.config().max_columns.unwrap_or(0) as usize)
|
||||||
|
.last()
|
||||||
|
.unwrap_or(0) + line.start();
|
||||||
|
line = line.with_end(end);
|
||||||
|
self.write_colored_matches(bytes, line, matches, match_index)?;
|
||||||
|
|
||||||
|
if matches.is_empty() {
|
||||||
|
self.write(b" [... omitted end of long line]")?;
|
||||||
|
} else {
|
||||||
|
let remaining = matches
|
||||||
|
.iter()
|
||||||
|
.filter(|m| {
|
||||||
|
m.start() >= line.end() && m.start() < original.end()
|
||||||
|
})
|
||||||
|
.count();
|
||||||
|
let tense =
|
||||||
|
if remaining == 1 {
|
||||||
|
"match"
|
||||||
|
} else {
|
||||||
|
"matches"
|
||||||
|
};
|
||||||
|
write!(
|
||||||
|
self.wtr().borrow_mut(),
|
||||||
|
" [... {} more {}]",
|
||||||
|
remaining, tense,
|
||||||
|
)?;
|
||||||
|
}
|
||||||
|
self.write_line_term()?;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
if self.sunk.original_matches().is_empty() {
|
if self.sunk.original_matches().is_empty() {
|
||||||
if self.is_context() {
|
if self.is_context() {
|
||||||
self.write(b"[Omitted long context line]")?;
|
self.write(b"[Omitted long context line]")?;
|
||||||
@@ -1314,6 +1382,38 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn write_binary_message(&self, offset: u64) -> io::Result<()> {
|
||||||
|
if self.sink.match_count == 0 {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
let bin = self.searcher.binary_detection();
|
||||||
|
if let Some(byte) = bin.quit_byte() {
|
||||||
|
self.write(b"WARNING: stopped searching binary file ")?;
|
||||||
|
if let Some(path) = self.path() {
|
||||||
|
self.write_spec(self.config().colors.path(), path.as_bytes())?;
|
||||||
|
self.write(b" ")?;
|
||||||
|
}
|
||||||
|
let remainder = format!(
|
||||||
|
"after match (found {:?} byte around offset {})\n",
|
||||||
|
[byte].as_bstr(), offset,
|
||||||
|
);
|
||||||
|
self.write(remainder.as_bytes())?;
|
||||||
|
} else if let Some(byte) = bin.convert_byte() {
|
||||||
|
self.write(b"Binary file ")?;
|
||||||
|
if let Some(path) = self.path() {
|
||||||
|
self.write_spec(self.config().colors.path(), path.as_bytes())?;
|
||||||
|
self.write(b" ")?;
|
||||||
|
}
|
||||||
|
let remainder = format!(
|
||||||
|
"matches (found {:?} byte around offset {})\n",
|
||||||
|
[byte].as_bstr(), offset,
|
||||||
|
);
|
||||||
|
self.write(remainder.as_bytes())?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn write_context_separator(&self) -> io::Result<()> {
|
fn write_context_separator(&self) -> io::Result<()> {
|
||||||
if let Some(ref sep) = *self.config().separator_context {
|
if let Some(ref sep) = *self.config().separator_context {
|
||||||
self.write(sep)?;
|
self.write(sep)?;
|
||||||
@@ -1389,13 +1489,26 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
if !self.config().trim_ascii {
|
if !self.config().trim_ascii {
|
||||||
return self.write(buf);
|
return self.write(buf);
|
||||||
}
|
}
|
||||||
self.write(self.trim_ascii_prefix(buf))
|
let mut range = Match::new(0, buf.len());
|
||||||
|
self.trim_ascii_prefix(buf, &mut range);
|
||||||
|
self.write(&buf[range])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn write(&self, buf: &[u8]) -> io::Result<()> {
|
fn write(&self, buf: &[u8]) -> io::Result<()> {
|
||||||
self.wtr().borrow_mut().write_all(buf)
|
self.wtr().borrow_mut().write_all(buf)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn trim_line_terminator(&self, buf: &[u8], line: &mut Match) {
|
||||||
|
let lineterm = self.searcher.line_terminator();
|
||||||
|
if lineterm.is_suffix(&buf[*line]) {
|
||||||
|
let mut end = line.end() - 1;
|
||||||
|
if lineterm.is_crlf() && buf[end - 1] == b'\r' {
|
||||||
|
end -= 1;
|
||||||
|
}
|
||||||
|
*line = line.with_end(end);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn has_line_terminator(&self, buf: &[u8]) -> bool {
|
fn has_line_terminator(&self, buf: &[u8]) -> bool {
|
||||||
self.searcher.line_terminator().is_suffix(buf)
|
self.searcher.line_terminator().is_suffix(buf)
|
||||||
}
|
}
|
||||||
@@ -1451,14 +1564,12 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
|||||||
///
|
///
|
||||||
/// This stops trimming a prefix as soon as it sees non-whitespace or a
|
/// This stops trimming a prefix as soon as it sees non-whitespace or a
|
||||||
/// line terminator.
|
/// line terminator.
|
||||||
fn trim_ascii_prefix_range(&self, slice: &[u8], range: Match) -> Match {
|
fn trim_ascii_prefix(&self, slice: &[u8], range: &mut Match) {
|
||||||
trim_ascii_prefix_range(self.searcher.line_terminator(), slice, range)
|
if !self.config().trim_ascii {
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
let lineterm = self.searcher.line_terminator();
|
||||||
/// Trim prefix ASCII spaces from the given slice and return the
|
*range = trim_ascii_prefix(lineterm, slice, *range)
|
||||||
/// corresponding sub-slice.
|
|
||||||
fn trim_ascii_prefix<'s>(&self, slice: &'s [u8]) -> &'s [u8] {
|
|
||||||
trim_ascii_prefix(self.searcher.line_terminator(), slice)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2225,6 +2336,31 @@ but Doctor Watson has to have it taken out for him and dusted,
|
|||||||
assert_eq_printed!(expected, got);
|
assert_eq_printed!(expected, got);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn max_columns_preview() {
|
||||||
|
let matcher = RegexMatcher::new("exhibited|dusted").unwrap();
|
||||||
|
let mut printer = StandardBuilder::new()
|
||||||
|
.max_columns(Some(46))
|
||||||
|
.max_columns_preview(true)
|
||||||
|
.build(NoColor::new(vec![]));
|
||||||
|
SearcherBuilder::new()
|
||||||
|
.line_number(false)
|
||||||
|
.build()
|
||||||
|
.search_reader(
|
||||||
|
&matcher,
|
||||||
|
SHERLOCK.as_bytes(),
|
||||||
|
printer.sink(&matcher),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let got = printer_contents(&mut printer);
|
||||||
|
let expected = "\
|
||||||
|
but Doctor Watson has to have it taken out for [... omitted end of long line]
|
||||||
|
and exhibited clearly, with a label attached.
|
||||||
|
";
|
||||||
|
assert_eq_printed!(expected, got);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn max_columns_with_count() {
|
fn max_columns_with_count() {
|
||||||
let matcher = RegexMatcher::new("cigar|ash|dusted").unwrap();
|
let matcher = RegexMatcher::new("cigar|ash|dusted").unwrap();
|
||||||
@@ -2250,6 +2386,86 @@ but Doctor Watson has to have it taken out for him and dusted,
|
|||||||
assert_eq_printed!(expected, got);
|
assert_eq_printed!(expected, got);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn max_columns_with_count_preview_no_match() {
|
||||||
|
let matcher = RegexMatcher::new("exhibited|has to have it").unwrap();
|
||||||
|
let mut printer = StandardBuilder::new()
|
||||||
|
.stats(true)
|
||||||
|
.max_columns(Some(46))
|
||||||
|
.max_columns_preview(true)
|
||||||
|
.build(NoColor::new(vec![]));
|
||||||
|
SearcherBuilder::new()
|
||||||
|
.line_number(false)
|
||||||
|
.build()
|
||||||
|
.search_reader(
|
||||||
|
&matcher,
|
||||||
|
SHERLOCK.as_bytes(),
|
||||||
|
printer.sink(&matcher),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let got = printer_contents(&mut printer);
|
||||||
|
let expected = "\
|
||||||
|
but Doctor Watson has to have it taken out for [... 0 more matches]
|
||||||
|
and exhibited clearly, with a label attached.
|
||||||
|
";
|
||||||
|
assert_eq_printed!(expected, got);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn max_columns_with_count_preview_one_match() {
|
||||||
|
let matcher = RegexMatcher::new("exhibited|dusted").unwrap();
|
||||||
|
let mut printer = StandardBuilder::new()
|
||||||
|
.stats(true)
|
||||||
|
.max_columns(Some(46))
|
||||||
|
.max_columns_preview(true)
|
||||||
|
.build(NoColor::new(vec![]));
|
||||||
|
SearcherBuilder::new()
|
||||||
|
.line_number(false)
|
||||||
|
.build()
|
||||||
|
.search_reader(
|
||||||
|
&matcher,
|
||||||
|
SHERLOCK.as_bytes(),
|
||||||
|
printer.sink(&matcher),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let got = printer_contents(&mut printer);
|
||||||
|
let expected = "\
|
||||||
|
but Doctor Watson has to have it taken out for [... 1 more match]
|
||||||
|
and exhibited clearly, with a label attached.
|
||||||
|
";
|
||||||
|
assert_eq_printed!(expected, got);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn max_columns_with_count_preview_two_matches() {
|
||||||
|
let matcher = RegexMatcher::new(
|
||||||
|
"exhibited|dusted|has to have it",
|
||||||
|
).unwrap();
|
||||||
|
let mut printer = StandardBuilder::new()
|
||||||
|
.stats(true)
|
||||||
|
.max_columns(Some(46))
|
||||||
|
.max_columns_preview(true)
|
||||||
|
.build(NoColor::new(vec![]));
|
||||||
|
SearcherBuilder::new()
|
||||||
|
.line_number(false)
|
||||||
|
.build()
|
||||||
|
.search_reader(
|
||||||
|
&matcher,
|
||||||
|
SHERLOCK.as_bytes(),
|
||||||
|
printer.sink(&matcher),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let got = printer_contents(&mut printer);
|
||||||
|
let expected = "\
|
||||||
|
but Doctor Watson has to have it taken out for [... 1 more match]
|
||||||
|
and exhibited clearly, with a label attached.
|
||||||
|
";
|
||||||
|
assert_eq_printed!(expected, got);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn max_columns_multi_line() {
|
fn max_columns_multi_line() {
|
||||||
let matcher = RegexMatcher::new("(?s)ash.+dusted").unwrap();
|
let matcher = RegexMatcher::new("(?s)ash.+dusted").unwrap();
|
||||||
@@ -2275,6 +2491,36 @@ but Doctor Watson has to have it taken out for him and dusted,
|
|||||||
assert_eq_printed!(expected, got);
|
assert_eq_printed!(expected, got);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn max_columns_multi_line_preview() {
|
||||||
|
let matcher = RegexMatcher::new(
|
||||||
|
"(?s)clew|cigar ash.+have it|exhibited",
|
||||||
|
).unwrap();
|
||||||
|
let mut printer = StandardBuilder::new()
|
||||||
|
.stats(true)
|
||||||
|
.max_columns(Some(46))
|
||||||
|
.max_columns_preview(true)
|
||||||
|
.build(NoColor::new(vec![]));
|
||||||
|
SearcherBuilder::new()
|
||||||
|
.line_number(false)
|
||||||
|
.multi_line(true)
|
||||||
|
.build()
|
||||||
|
.search_reader(
|
||||||
|
&matcher,
|
||||||
|
SHERLOCK.as_bytes(),
|
||||||
|
printer.sink(&matcher),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let got = printer_contents(&mut printer);
|
||||||
|
let expected = "\
|
||||||
|
can extract a clew from a wisp of straw or a f [... 1 more match]
|
||||||
|
but Doctor Watson has to have it taken out for [... 0 more matches]
|
||||||
|
and exhibited clearly, with a label attached.
|
||||||
|
";
|
||||||
|
assert_eq_printed!(expected, got);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn max_matches() {
|
fn max_matches() {
|
||||||
let matcher = RegexMatcher::new("Sherlock").unwrap();
|
let matcher = RegexMatcher::new("Sherlock").unwrap();
|
||||||
@@ -2564,8 +2810,40 @@ Holmeses, success in the province of detective work must always
|
|||||||
assert_eq_printed!(expected, got);
|
assert_eq_printed!(expected, got);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn only_matching_max_columns_preview() {
|
||||||
|
let matcher = RegexMatcher::new("Doctor Watsons|Sherlock").unwrap();
|
||||||
|
let mut printer = StandardBuilder::new()
|
||||||
|
.only_matching(true)
|
||||||
|
.max_columns(Some(10))
|
||||||
|
.max_columns_preview(true)
|
||||||
|
.column(true)
|
||||||
|
.build(NoColor::new(vec![]));
|
||||||
|
SearcherBuilder::new()
|
||||||
|
.line_number(true)
|
||||||
|
.build()
|
||||||
|
.search_reader(
|
||||||
|
&matcher,
|
||||||
|
SHERLOCK.as_bytes(),
|
||||||
|
printer.sink(&matcher),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let got = printer_contents(&mut printer);
|
||||||
|
let expected = "\
|
||||||
|
1:9:Doctor Wat [... 0 more matches]
|
||||||
|
1:57:Sherlock
|
||||||
|
3:49:Sherlock
|
||||||
|
";
|
||||||
|
assert_eq_printed!(expected, got);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn only_matching_max_columns_multi_line1() {
|
fn only_matching_max_columns_multi_line1() {
|
||||||
|
// The `(?s:.{0})` trick fools the matcher into thinking that it
|
||||||
|
// can match across multiple lines without actually doing so. This is
|
||||||
|
// so we can test multi-line handling in the case of a match on only
|
||||||
|
// one line.
|
||||||
let matcher = RegexMatcher::new(
|
let matcher = RegexMatcher::new(
|
||||||
r"(?s:.{0})(Doctor Watsons|Sherlock)"
|
r"(?s:.{0})(Doctor Watsons|Sherlock)"
|
||||||
).unwrap();
|
).unwrap();
|
||||||
@@ -2594,6 +2872,41 @@ Holmeses, success in the province of detective work must always
|
|||||||
assert_eq_printed!(expected, got);
|
assert_eq_printed!(expected, got);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn only_matching_max_columns_preview_multi_line1() {
|
||||||
|
// The `(?s:.{0})` trick fools the matcher into thinking that it
|
||||||
|
// can match across multiple lines without actually doing so. This is
|
||||||
|
// so we can test multi-line handling in the case of a match on only
|
||||||
|
// one line.
|
||||||
|
let matcher = RegexMatcher::new(
|
||||||
|
r"(?s:.{0})(Doctor Watsons|Sherlock)"
|
||||||
|
).unwrap();
|
||||||
|
let mut printer = StandardBuilder::new()
|
||||||
|
.only_matching(true)
|
||||||
|
.max_columns(Some(10))
|
||||||
|
.max_columns_preview(true)
|
||||||
|
.column(true)
|
||||||
|
.build(NoColor::new(vec![]));
|
||||||
|
SearcherBuilder::new()
|
||||||
|
.multi_line(true)
|
||||||
|
.line_number(true)
|
||||||
|
.build()
|
||||||
|
.search_reader(
|
||||||
|
&matcher,
|
||||||
|
SHERLOCK.as_bytes(),
|
||||||
|
printer.sink(&matcher),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let got = printer_contents(&mut printer);
|
||||||
|
let expected = "\
|
||||||
|
1:9:Doctor Wat [... 0 more matches]
|
||||||
|
1:57:Sherlock
|
||||||
|
3:49:Sherlock
|
||||||
|
";
|
||||||
|
assert_eq_printed!(expected, got);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn only_matching_max_columns_multi_line2() {
|
fn only_matching_max_columns_multi_line2() {
|
||||||
let matcher = RegexMatcher::new(
|
let matcher = RegexMatcher::new(
|
||||||
@@ -2625,6 +2938,38 @@ Holmeses, success in the province of detective work must always
|
|||||||
assert_eq_printed!(expected, got);
|
assert_eq_printed!(expected, got);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn only_matching_max_columns_preview_multi_line2() {
|
||||||
|
let matcher = RegexMatcher::new(
|
||||||
|
r"(?s)Watson.+?(Holmeses|clearly)"
|
||||||
|
).unwrap();
|
||||||
|
let mut printer = StandardBuilder::new()
|
||||||
|
.only_matching(true)
|
||||||
|
.max_columns(Some(50))
|
||||||
|
.max_columns_preview(true)
|
||||||
|
.column(true)
|
||||||
|
.build(NoColor::new(vec![]));
|
||||||
|
SearcherBuilder::new()
|
||||||
|
.multi_line(true)
|
||||||
|
.line_number(true)
|
||||||
|
.build()
|
||||||
|
.search_reader(
|
||||||
|
&matcher,
|
||||||
|
SHERLOCK.as_bytes(),
|
||||||
|
printer.sink(&matcher),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let got = printer_contents(&mut printer);
|
||||||
|
let expected = "\
|
||||||
|
1:16:Watsons of this world, as opposed to the Sherlock
|
||||||
|
2:16:Holmeses
|
||||||
|
5:12:Watson has to have it taken out for him and dusted [... 0 more matches]
|
||||||
|
6:12:and exhibited clearly
|
||||||
|
";
|
||||||
|
assert_eq_printed!(expected, got);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn per_match() {
|
fn per_match() {
|
||||||
let matcher = RegexMatcher::new("Doctor Watsons|Sherlock").unwrap();
|
let matcher = RegexMatcher::new("Doctor Watsons|Sherlock").unwrap();
|
||||||
@@ -2820,6 +3165,61 @@ Holmeses, success in the province of detective work must always
|
|||||||
assert_eq_printed!(expected, got);
|
assert_eq_printed!(expected, got);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn replacement_max_columns_preview1() {
|
||||||
|
let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap();
|
||||||
|
let mut printer = StandardBuilder::new()
|
||||||
|
.max_columns(Some(67))
|
||||||
|
.max_columns_preview(true)
|
||||||
|
.replacement(Some(b"doctah $1 MD".to_vec()))
|
||||||
|
.build(NoColor::new(vec![]));
|
||||||
|
SearcherBuilder::new()
|
||||||
|
.line_number(true)
|
||||||
|
.build()
|
||||||
|
.search_reader(
|
||||||
|
&matcher,
|
||||||
|
SHERLOCK.as_bytes(),
|
||||||
|
printer.sink(&matcher),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let got = printer_contents(&mut printer);
|
||||||
|
let expected = "\
|
||||||
|
1:For the doctah Watsons MD of this world, as opposed to the doctah [... 0 more matches]
|
||||||
|
3:be, to a very large extent, the result of luck. doctah MD Holmes
|
||||||
|
5:but doctah Watson MD has to have it taken out for him and dusted,
|
||||||
|
";
|
||||||
|
assert_eq_printed!(expected, got);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn replacement_max_columns_preview2() {
|
||||||
|
let matcher = RegexMatcher::new(
|
||||||
|
"exhibited|dusted|has to have it",
|
||||||
|
).unwrap();
|
||||||
|
let mut printer = StandardBuilder::new()
|
||||||
|
.max_columns(Some(43))
|
||||||
|
.max_columns_preview(true)
|
||||||
|
.replacement(Some(b"xxx".to_vec()))
|
||||||
|
.build(NoColor::new(vec![]));
|
||||||
|
SearcherBuilder::new()
|
||||||
|
.line_number(false)
|
||||||
|
.build()
|
||||||
|
.search_reader(
|
||||||
|
&matcher,
|
||||||
|
SHERLOCK.as_bytes(),
|
||||||
|
printer.sink(&matcher),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let got = printer_contents(&mut printer);
|
||||||
|
let expected = "\
|
||||||
|
but Doctor Watson xxx taken out for him and [... 1 more match]
|
||||||
|
and xxx clearly, with a label attached.
|
||||||
|
";
|
||||||
|
assert_eq_printed!(expected, got);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn replacement_only_matching() {
|
fn replacement_only_matching() {
|
||||||
let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap();
|
let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap();
|
||||||
|
@@ -636,6 +636,34 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
|
|||||||
stats.add_bytes_searched(finish.byte_count());
|
stats.add_bytes_searched(finish.byte_count());
|
||||||
stats.add_bytes_printed(self.summary.wtr.borrow().count());
|
stats.add_bytes_printed(self.summary.wtr.borrow().count());
|
||||||
}
|
}
|
||||||
|
// If our binary detection method says to quit after seeing binary
|
||||||
|
// data, then we shouldn't print any results at all, even if we've
|
||||||
|
// found a match before detecting binary data. The intent here is to
|
||||||
|
// keep BinaryDetection::quit as a form of filter. Otherwise, we can
|
||||||
|
// present a matching file with a smaller number of matches than
|
||||||
|
// there might be, which can be quite misleading.
|
||||||
|
//
|
||||||
|
// If our binary detection method is to convert binary data, then we
|
||||||
|
// don't quit and therefore search the entire contents of the file.
|
||||||
|
//
|
||||||
|
// There is an unfortunate inconsistency here. Namely, when using
|
||||||
|
// Quiet or PathWithMatch, then the printer can quit after the first
|
||||||
|
// match seen, which could be long before seeing binary data. This
|
||||||
|
// means that using PathWithMatch can print a path where as using
|
||||||
|
// Count might not print it at all because of binary data.
|
||||||
|
//
|
||||||
|
// It's not possible to fix this without also potentially significantly
|
||||||
|
// impacting the performance of Quiet or PathWithMatch, so we accept
|
||||||
|
// the bug.
|
||||||
|
if self.binary_byte_offset.is_some()
|
||||||
|
&& searcher.binary_detection().quit_byte().is_some()
|
||||||
|
{
|
||||||
|
// Squash the match count. The statistics reported will still
|
||||||
|
// contain the match count, but the "official" match count should
|
||||||
|
// be zero.
|
||||||
|
self.match_count = 0;
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
let show_count =
|
let show_count =
|
||||||
!self.summary.config.exclude_zero
|
!self.summary.config.exclude_zero
|
||||||
|
@@ -4,7 +4,7 @@ use std::io;
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::time;
|
use std::time;
|
||||||
|
|
||||||
use bstr::{BStr, BString};
|
use bstr::{ByteSlice, ByteVec};
|
||||||
use grep_matcher::{Captures, LineTerminator, Match, Matcher};
|
use grep_matcher::{Captures, LineTerminator, Match, Matcher};
|
||||||
use grep_searcher::{
|
use grep_searcher::{
|
||||||
LineIter,
|
LineIter,
|
||||||
@@ -263,12 +263,12 @@ impl<'a> Sunk<'a> {
|
|||||||
/// portability with a small cost: on Windows, paths that are not valid UTF-16
|
/// portability with a small cost: on Windows, paths that are not valid UTF-16
|
||||||
/// will not roundtrip correctly.
|
/// will not roundtrip correctly.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct PrinterPath<'a>(Cow<'a, BStr>);
|
pub struct PrinterPath<'a>(Cow<'a, [u8]>);
|
||||||
|
|
||||||
impl<'a> PrinterPath<'a> {
|
impl<'a> PrinterPath<'a> {
|
||||||
/// Create a new path suitable for printing.
|
/// Create a new path suitable for printing.
|
||||||
pub fn new(path: &'a Path) -> PrinterPath<'a> {
|
pub fn new(path: &'a Path) -> PrinterPath<'a> {
|
||||||
PrinterPath(BString::from_path_lossy(path))
|
PrinterPath(Vec::from_path_lossy(path))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a new printer path from the given path which can be efficiently
|
/// Create a new printer path from the given path which can be efficiently
|
||||||
@@ -289,7 +289,7 @@ impl<'a> PrinterPath<'a> {
|
|||||||
/// path separators that are both replaced by `new_sep`. In all other
|
/// path separators that are both replaced by `new_sep`. In all other
|
||||||
/// environments, only `/` is treated as a path separator.
|
/// environments, only `/` is treated as a path separator.
|
||||||
fn replace_separator(&mut self, new_sep: u8) {
|
fn replace_separator(&mut self, new_sep: u8) {
|
||||||
let transformed_path: BString = self.0.bytes().map(|b| {
|
let transformed_path: Vec<u8> = self.0.bytes().map(|b| {
|
||||||
if b == b'/' || (cfg!(windows) && b == b'\\') {
|
if b == b'/' || (cfg!(windows) && b == b'\\') {
|
||||||
new_sep
|
new_sep
|
||||||
} else {
|
} else {
|
||||||
@@ -301,7 +301,7 @@ impl<'a> PrinterPath<'a> {
|
|||||||
|
|
||||||
/// Return the raw bytes for this path.
|
/// Return the raw bytes for this path.
|
||||||
pub fn as_bytes(&self) -> &[u8] {
|
pub fn as_bytes(&self) -> &[u8] {
|
||||||
self.0.as_bytes()
|
&self.0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -346,7 +346,7 @@ impl Serialize for NiceDuration {
|
|||||||
///
|
///
|
||||||
/// This stops trimming a prefix as soon as it sees non-whitespace or a line
|
/// This stops trimming a prefix as soon as it sees non-whitespace or a line
|
||||||
/// terminator.
|
/// terminator.
|
||||||
pub fn trim_ascii_prefix_range(
|
pub fn trim_ascii_prefix(
|
||||||
line_term: LineTerminator,
|
line_term: LineTerminator,
|
||||||
slice: &[u8],
|
slice: &[u8],
|
||||||
range: Match,
|
range: Match,
|
||||||
@@ -366,14 +366,3 @@ pub fn trim_ascii_prefix_range(
|
|||||||
.count();
|
.count();
|
||||||
range.with_start(range.start() + count)
|
range.with_start(range.start() + count)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Trim prefix ASCII spaces from the given slice and return the corresponding
|
|
||||||
/// sub-slice.
|
|
||||||
pub fn trim_ascii_prefix(line_term: LineTerminator, slice: &[u8]) -> &[u8] {
|
|
||||||
let range = trim_ascii_prefix_range(
|
|
||||||
line_term,
|
|
||||||
slice,
|
|
||||||
Match::new(0, slice.len()),
|
|
||||||
);
|
|
||||||
&slice[range]
|
|
||||||
}
|
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "grep-regex"
|
name = "grep-regex"
|
||||||
version = "0.1.2" #:version
|
version = "0.1.5" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
Use Rust's regex library with the 'grep' crate.
|
Use Rust's regex library with the 'grep' crate.
|
||||||
@@ -13,9 +13,9 @@ keywords = ["regex", "grep", "search", "pattern", "line"]
|
|||||||
license = "Unlicense/MIT"
|
license = "Unlicense/MIT"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
|
aho-corasick = "0.7.3"
|
||||||
|
grep-matcher = { version = "0.1.2", path = "../grep-matcher" }
|
||||||
log = "0.4.5"
|
log = "0.4.5"
|
||||||
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
|
|
||||||
regex = "1.1"
|
regex = "1.1"
|
||||||
regex-syntax = "0.6.5"
|
regex-syntax = "0.6.5"
|
||||||
thread_local = "0.3.6"
|
thread_local = "1"
|
||||||
utf8-ranges = "1.0.1"
|
|
||||||
|
@@ -1,12 +1,13 @@
|
|||||||
use grep_matcher::{ByteSet, LineTerminator};
|
use grep_matcher::{ByteSet, LineTerminator};
|
||||||
use regex::bytes::{Regex, RegexBuilder};
|
use regex::bytes::{Regex, RegexBuilder};
|
||||||
use regex_syntax::ast::{self, Ast};
|
use regex_syntax::ast::{self, Ast};
|
||||||
use regex_syntax::hir::Hir;
|
use regex_syntax::hir::{self, Hir};
|
||||||
|
|
||||||
use ast::AstAnalysis;
|
use ast::AstAnalysis;
|
||||||
use crlf::crlfify;
|
use crlf::crlfify;
|
||||||
use error::Error;
|
use error::Error;
|
||||||
use literal::LiteralSets;
|
use literal::LiteralSets;
|
||||||
|
use multi::alternation_literals;
|
||||||
use non_matching::non_matching_bytes;
|
use non_matching::non_matching_bytes;
|
||||||
use strip::strip_from_match;
|
use strip::strip_from_match;
|
||||||
|
|
||||||
@@ -67,19 +68,17 @@ impl Config {
|
|||||||
/// If there was a problem parsing the given expression then an error
|
/// If there was a problem parsing the given expression then an error
|
||||||
/// is returned.
|
/// is returned.
|
||||||
pub fn hir(&self, pattern: &str) -> Result<ConfiguredHIR, Error> {
|
pub fn hir(&self, pattern: &str) -> Result<ConfiguredHIR, Error> {
|
||||||
let analysis = self.analysis(pattern)?;
|
let ast = self.ast(pattern)?;
|
||||||
let expr = ::regex_syntax::ParserBuilder::new()
|
let analysis = self.analysis(&ast)?;
|
||||||
.nest_limit(self.nest_limit)
|
let expr = hir::translate::TranslatorBuilder::new()
|
||||||
.octal(self.octal)
|
|
||||||
.allow_invalid_utf8(true)
|
.allow_invalid_utf8(true)
|
||||||
.ignore_whitespace(self.ignore_whitespace)
|
.case_insensitive(self.is_case_insensitive(&analysis))
|
||||||
.case_insensitive(self.is_case_insensitive(&analysis)?)
|
|
||||||
.multi_line(self.multi_line)
|
.multi_line(self.multi_line)
|
||||||
.dot_matches_new_line(self.dot_matches_new_line)
|
.dot_matches_new_line(self.dot_matches_new_line)
|
||||||
.swap_greed(self.swap_greed)
|
.swap_greed(self.swap_greed)
|
||||||
.unicode(self.unicode)
|
.unicode(self.unicode)
|
||||||
.build()
|
.build()
|
||||||
.parse(pattern)
|
.translate(pattern, &ast)
|
||||||
.map_err(Error::regex)?;
|
.map_err(Error::regex)?;
|
||||||
let expr = match self.line_terminator {
|
let expr = match self.line_terminator {
|
||||||
None => expr,
|
None => expr,
|
||||||
@@ -99,21 +98,34 @@ impl Config {
|
|||||||
fn is_case_insensitive(
|
fn is_case_insensitive(
|
||||||
&self,
|
&self,
|
||||||
analysis: &AstAnalysis,
|
analysis: &AstAnalysis,
|
||||||
) -> Result<bool, Error> {
|
) -> bool {
|
||||||
if self.case_insensitive {
|
if self.case_insensitive {
|
||||||
return Ok(true);
|
return true;
|
||||||
}
|
}
|
||||||
if !self.case_smart {
|
if !self.case_smart {
|
||||||
return Ok(false);
|
return false;
|
||||||
}
|
}
|
||||||
Ok(analysis.any_literal() && !analysis.any_uppercase())
|
analysis.any_literal() && !analysis.any_uppercase()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if this config is simple enough such that
|
||||||
|
/// if the pattern is a simple alternation of literals, then it can be
|
||||||
|
/// constructed via a plain Aho-Corasick automaton.
|
||||||
|
///
|
||||||
|
/// Note that it is OK to return true even when settings like `multi_line`
|
||||||
|
/// are enabled, since if multi-line can impact the match semantics of a
|
||||||
|
/// regex, then it is by definition not a simple alternation of literals.
|
||||||
|
pub fn can_plain_aho_corasick(&self) -> bool {
|
||||||
|
!self.word
|
||||||
|
&& !self.case_insensitive
|
||||||
|
&& !self.case_smart
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Perform analysis on the AST of this pattern.
|
/// Perform analysis on the AST of this pattern.
|
||||||
///
|
///
|
||||||
/// This returns an error if the given pattern failed to parse.
|
/// This returns an error if the given pattern failed to parse.
|
||||||
fn analysis(&self, pattern: &str) -> Result<AstAnalysis, Error> {
|
fn analysis(&self, ast: &Ast) -> Result<AstAnalysis, Error> {
|
||||||
Ok(AstAnalysis::from_ast(&self.ast(pattern)?))
|
Ok(AstAnalysis::from_ast(ast))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parse the given pattern into its abstract syntax.
|
/// Parse the given pattern into its abstract syntax.
|
||||||
@@ -173,6 +185,15 @@ impl ConfiguredHIR {
|
|||||||
self.pattern_to_regex(&self.expr.to_string())
|
self.pattern_to_regex(&self.expr.to_string())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// If this HIR corresponds to an alternation of literals with no
|
||||||
|
/// capturing groups, then this returns those literals.
|
||||||
|
pub fn alternation_literals(&self) -> Option<Vec<Vec<u8>>> {
|
||||||
|
if !self.config.can_plain_aho_corasick() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
alternation_literals(&self.expr)
|
||||||
|
}
|
||||||
|
|
||||||
/// Applies the given function to the concrete syntax of this HIR and then
|
/// Applies the given function to the concrete syntax of this HIR and then
|
||||||
/// generates a new HIR based on the result of the function in a way that
|
/// generates a new HIR based on the result of the function in a way that
|
||||||
/// preserves the configuration.
|
/// preserves the configuration.
|
||||||
|
@@ -76,7 +76,9 @@ impl Matcher for CRLFMatcher {
|
|||||||
caps: &mut RegexCaptures,
|
caps: &mut RegexCaptures,
|
||||||
) -> Result<bool, NoError> {
|
) -> Result<bool, NoError> {
|
||||||
caps.strip_crlf(false);
|
caps.strip_crlf(false);
|
||||||
let r = self.regex.captures_read_at(caps.locations(), haystack, at);
|
let r = self.regex.captures_read_at(
|
||||||
|
caps.locations_mut(), haystack, at,
|
||||||
|
);
|
||||||
if !r.is_some() {
|
if !r.is_some() {
|
||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
|
@@ -4,13 +4,13 @@ An implementation of `grep-matcher`'s `Matcher` trait for Rust's regex engine.
|
|||||||
|
|
||||||
#![deny(missing_docs)]
|
#![deny(missing_docs)]
|
||||||
|
|
||||||
|
extern crate aho_corasick;
|
||||||
extern crate grep_matcher;
|
extern crate grep_matcher;
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate log;
|
extern crate log;
|
||||||
extern crate regex;
|
extern crate regex;
|
||||||
extern crate regex_syntax;
|
extern crate regex_syntax;
|
||||||
extern crate thread_local;
|
extern crate thread_local;
|
||||||
extern crate utf8_ranges;
|
|
||||||
|
|
||||||
pub use error::{Error, ErrorKind};
|
pub use error::{Error, ErrorKind};
|
||||||
pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
||||||
@@ -21,6 +21,7 @@ mod crlf;
|
|||||||
mod error;
|
mod error;
|
||||||
mod literal;
|
mod literal;
|
||||||
mod matcher;
|
mod matcher;
|
||||||
|
mod multi;
|
||||||
mod non_matching;
|
mod non_matching;
|
||||||
mod strip;
|
mod strip;
|
||||||
mod util;
|
mod util;
|
||||||
|
@@ -8,6 +8,7 @@ use regex::bytes::{CaptureLocations, Regex};
|
|||||||
use config::{Config, ConfiguredHIR};
|
use config::{Config, ConfiguredHIR};
|
||||||
use crlf::CRLFMatcher;
|
use crlf::CRLFMatcher;
|
||||||
use error::Error;
|
use error::Error;
|
||||||
|
use multi::MultiLiteralMatcher;
|
||||||
use word::WordMatcher;
|
use word::WordMatcher;
|
||||||
|
|
||||||
/// A builder for constructing a `Matcher` using regular expressions.
|
/// A builder for constructing a `Matcher` using regular expressions.
|
||||||
@@ -61,6 +62,50 @@ impl RegexMatcherBuilder {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Build a new matcher from a plain alternation of literals.
|
||||||
|
///
|
||||||
|
/// Depending on the configuration set by the builder, this may be able to
|
||||||
|
/// build a matcher substantially faster than by joining the patterns with
|
||||||
|
/// a `|` and calling `build`.
|
||||||
|
pub fn build_literals<B: AsRef<str>>(
|
||||||
|
&self,
|
||||||
|
literals: &[B],
|
||||||
|
) -> Result<RegexMatcher, Error> {
|
||||||
|
let mut has_escape = false;
|
||||||
|
let mut slices = vec![];
|
||||||
|
for lit in literals {
|
||||||
|
slices.push(lit.as_ref());
|
||||||
|
has_escape = has_escape || lit.as_ref().contains('\\');
|
||||||
|
}
|
||||||
|
// Even when we have a fixed set of literals, we might still want to
|
||||||
|
// use the regex engine. Specifically, if any string has an escape
|
||||||
|
// in it, then we probably can't feed it to Aho-Corasick without
|
||||||
|
// removing the escape. Additionally, if there are any particular
|
||||||
|
// special match semantics we need to honor, that Aho-Corasick isn't
|
||||||
|
// enough. Finally, the regex engine can do really well with a small
|
||||||
|
// number of literals (at time of writing, this is changing soon), so
|
||||||
|
// we use it when there's a small set.
|
||||||
|
//
|
||||||
|
// Yes, this is one giant hack. Ideally, this entirely separate literal
|
||||||
|
// matcher that uses Aho-Corasick would be pushed down into the regex
|
||||||
|
// engine.
|
||||||
|
if has_escape
|
||||||
|
|| !self.config.can_plain_aho_corasick()
|
||||||
|
|| literals.len() < 40
|
||||||
|
{
|
||||||
|
return self.build(&slices.join("|"));
|
||||||
|
}
|
||||||
|
|
||||||
|
let matcher = MultiLiteralMatcher::new(&slices)?;
|
||||||
|
let imp = RegexMatcherImpl::MultiLiteral(matcher);
|
||||||
|
Ok(RegexMatcher {
|
||||||
|
config: self.config.clone(),
|
||||||
|
matcher: imp,
|
||||||
|
fast_line_regex: None,
|
||||||
|
non_matching_bytes: ByteSet::empty(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
/// Set the value for the case insensitive (`i`) flag.
|
/// Set the value for the case insensitive (`i`) flag.
|
||||||
///
|
///
|
||||||
/// When enabled, letters in the pattern will match both upper case and
|
/// When enabled, letters in the pattern will match both upper case and
|
||||||
@@ -348,6 +393,8 @@ impl RegexMatcher {
|
|||||||
enum RegexMatcherImpl {
|
enum RegexMatcherImpl {
|
||||||
/// The standard matcher used for all regular expressions.
|
/// The standard matcher used for all regular expressions.
|
||||||
Standard(StandardMatcher),
|
Standard(StandardMatcher),
|
||||||
|
/// A matcher for an alternation of plain literals.
|
||||||
|
MultiLiteral(MultiLiteralMatcher),
|
||||||
/// A matcher that strips `\r` from the end of matches.
|
/// A matcher that strips `\r` from the end of matches.
|
||||||
///
|
///
|
||||||
/// This is only used when the CRLF hack is enabled and the regex is line
|
/// This is only used when the CRLF hack is enabled and the regex is line
|
||||||
@@ -370,16 +417,23 @@ impl RegexMatcherImpl {
|
|||||||
} else if expr.needs_crlf_stripped() {
|
} else if expr.needs_crlf_stripped() {
|
||||||
Ok(RegexMatcherImpl::CRLF(CRLFMatcher::new(expr)?))
|
Ok(RegexMatcherImpl::CRLF(CRLFMatcher::new(expr)?))
|
||||||
} else {
|
} else {
|
||||||
|
if let Some(lits) = expr.alternation_literals() {
|
||||||
|
if lits.len() >= 40 {
|
||||||
|
let matcher = MultiLiteralMatcher::new(&lits)?;
|
||||||
|
return Ok(RegexMatcherImpl::MultiLiteral(matcher));
|
||||||
|
}
|
||||||
|
}
|
||||||
Ok(RegexMatcherImpl::Standard(StandardMatcher::new(expr)?))
|
Ok(RegexMatcherImpl::Standard(StandardMatcher::new(expr)?))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the underlying regex object used.
|
/// Return the underlying regex object used.
|
||||||
fn regex(&self) -> &Regex {
|
fn regex(&self) -> String {
|
||||||
match *self {
|
match *self {
|
||||||
RegexMatcherImpl::Word(ref x) => x.regex(),
|
RegexMatcherImpl::Word(ref x) => x.regex().to_string(),
|
||||||
RegexMatcherImpl::CRLF(ref x) => x.regex(),
|
RegexMatcherImpl::CRLF(ref x) => x.regex().to_string(),
|
||||||
RegexMatcherImpl::Standard(ref x) => &x.regex,
|
RegexMatcherImpl::MultiLiteral(_) => "<N/A>".to_string(),
|
||||||
|
RegexMatcherImpl::Standard(ref x) => x.regex.to_string(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -399,6 +453,7 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.find_at(haystack, at),
|
Standard(ref m) => m.find_at(haystack, at),
|
||||||
|
MultiLiteral(ref m) => m.find_at(haystack, at),
|
||||||
CRLF(ref m) => m.find_at(haystack, at),
|
CRLF(ref m) => m.find_at(haystack, at),
|
||||||
Word(ref m) => m.find_at(haystack, at),
|
Word(ref m) => m.find_at(haystack, at),
|
||||||
}
|
}
|
||||||
@@ -408,6 +463,7 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.new_captures(),
|
Standard(ref m) => m.new_captures(),
|
||||||
|
MultiLiteral(ref m) => m.new_captures(),
|
||||||
CRLF(ref m) => m.new_captures(),
|
CRLF(ref m) => m.new_captures(),
|
||||||
Word(ref m) => m.new_captures(),
|
Word(ref m) => m.new_captures(),
|
||||||
}
|
}
|
||||||
@@ -417,6 +473,7 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.capture_count(),
|
Standard(ref m) => m.capture_count(),
|
||||||
|
MultiLiteral(ref m) => m.capture_count(),
|
||||||
CRLF(ref m) => m.capture_count(),
|
CRLF(ref m) => m.capture_count(),
|
||||||
Word(ref m) => m.capture_count(),
|
Word(ref m) => m.capture_count(),
|
||||||
}
|
}
|
||||||
@@ -426,6 +483,7 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.capture_index(name),
|
Standard(ref m) => m.capture_index(name),
|
||||||
|
MultiLiteral(ref m) => m.capture_index(name),
|
||||||
CRLF(ref m) => m.capture_index(name),
|
CRLF(ref m) => m.capture_index(name),
|
||||||
Word(ref m) => m.capture_index(name),
|
Word(ref m) => m.capture_index(name),
|
||||||
}
|
}
|
||||||
@@ -435,6 +493,7 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.find(haystack),
|
Standard(ref m) => m.find(haystack),
|
||||||
|
MultiLiteral(ref m) => m.find(haystack),
|
||||||
CRLF(ref m) => m.find(haystack),
|
CRLF(ref m) => m.find(haystack),
|
||||||
Word(ref m) => m.find(haystack),
|
Word(ref m) => m.find(haystack),
|
||||||
}
|
}
|
||||||
@@ -450,6 +509,7 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.find_iter(haystack, matched),
|
Standard(ref m) => m.find_iter(haystack, matched),
|
||||||
|
MultiLiteral(ref m) => m.find_iter(haystack, matched),
|
||||||
CRLF(ref m) => m.find_iter(haystack, matched),
|
CRLF(ref m) => m.find_iter(haystack, matched),
|
||||||
Word(ref m) => m.find_iter(haystack, matched),
|
Word(ref m) => m.find_iter(haystack, matched),
|
||||||
}
|
}
|
||||||
@@ -465,6 +525,7 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.try_find_iter(haystack, matched),
|
Standard(ref m) => m.try_find_iter(haystack, matched),
|
||||||
|
MultiLiteral(ref m) => m.try_find_iter(haystack, matched),
|
||||||
CRLF(ref m) => m.try_find_iter(haystack, matched),
|
CRLF(ref m) => m.try_find_iter(haystack, matched),
|
||||||
Word(ref m) => m.try_find_iter(haystack, matched),
|
Word(ref m) => m.try_find_iter(haystack, matched),
|
||||||
}
|
}
|
||||||
@@ -478,6 +539,7 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.captures(haystack, caps),
|
Standard(ref m) => m.captures(haystack, caps),
|
||||||
|
MultiLiteral(ref m) => m.captures(haystack, caps),
|
||||||
CRLF(ref m) => m.captures(haystack, caps),
|
CRLF(ref m) => m.captures(haystack, caps),
|
||||||
Word(ref m) => m.captures(haystack, caps),
|
Word(ref m) => m.captures(haystack, caps),
|
||||||
}
|
}
|
||||||
@@ -494,6 +556,7 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.captures_iter(haystack, caps, matched),
|
Standard(ref m) => m.captures_iter(haystack, caps, matched),
|
||||||
|
MultiLiteral(ref m) => m.captures_iter(haystack, caps, matched),
|
||||||
CRLF(ref m) => m.captures_iter(haystack, caps, matched),
|
CRLF(ref m) => m.captures_iter(haystack, caps, matched),
|
||||||
Word(ref m) => m.captures_iter(haystack, caps, matched),
|
Word(ref m) => m.captures_iter(haystack, caps, matched),
|
||||||
}
|
}
|
||||||
@@ -510,6 +573,9 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.try_captures_iter(haystack, caps, matched),
|
Standard(ref m) => m.try_captures_iter(haystack, caps, matched),
|
||||||
|
MultiLiteral(ref m) => {
|
||||||
|
m.try_captures_iter(haystack, caps, matched)
|
||||||
|
}
|
||||||
CRLF(ref m) => m.try_captures_iter(haystack, caps, matched),
|
CRLF(ref m) => m.try_captures_iter(haystack, caps, matched),
|
||||||
Word(ref m) => m.try_captures_iter(haystack, caps, matched),
|
Word(ref m) => m.try_captures_iter(haystack, caps, matched),
|
||||||
}
|
}
|
||||||
@@ -524,6 +590,7 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.captures_at(haystack, at, caps),
|
Standard(ref m) => m.captures_at(haystack, at, caps),
|
||||||
|
MultiLiteral(ref m) => m.captures_at(haystack, at, caps),
|
||||||
CRLF(ref m) => m.captures_at(haystack, at, caps),
|
CRLF(ref m) => m.captures_at(haystack, at, caps),
|
||||||
Word(ref m) => m.captures_at(haystack, at, caps),
|
Word(ref m) => m.captures_at(haystack, at, caps),
|
||||||
}
|
}
|
||||||
@@ -540,6 +607,7 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.replace(haystack, dst, append),
|
Standard(ref m) => m.replace(haystack, dst, append),
|
||||||
|
MultiLiteral(ref m) => m.replace(haystack, dst, append),
|
||||||
CRLF(ref m) => m.replace(haystack, dst, append),
|
CRLF(ref m) => m.replace(haystack, dst, append),
|
||||||
Word(ref m) => m.replace(haystack, dst, append),
|
Word(ref m) => m.replace(haystack, dst, append),
|
||||||
}
|
}
|
||||||
@@ -559,6 +627,9 @@ impl Matcher for RegexMatcher {
|
|||||||
Standard(ref m) => {
|
Standard(ref m) => {
|
||||||
m.replace_with_captures(haystack, caps, dst, append)
|
m.replace_with_captures(haystack, caps, dst, append)
|
||||||
}
|
}
|
||||||
|
MultiLiteral(ref m) => {
|
||||||
|
m.replace_with_captures(haystack, caps, dst, append)
|
||||||
|
}
|
||||||
CRLF(ref m) => {
|
CRLF(ref m) => {
|
||||||
m.replace_with_captures(haystack, caps, dst, append)
|
m.replace_with_captures(haystack, caps, dst, append)
|
||||||
}
|
}
|
||||||
@@ -572,6 +643,7 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.is_match(haystack),
|
Standard(ref m) => m.is_match(haystack),
|
||||||
|
MultiLiteral(ref m) => m.is_match(haystack),
|
||||||
CRLF(ref m) => m.is_match(haystack),
|
CRLF(ref m) => m.is_match(haystack),
|
||||||
Word(ref m) => m.is_match(haystack),
|
Word(ref m) => m.is_match(haystack),
|
||||||
}
|
}
|
||||||
@@ -585,6 +657,7 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.is_match_at(haystack, at),
|
Standard(ref m) => m.is_match_at(haystack, at),
|
||||||
|
MultiLiteral(ref m) => m.is_match_at(haystack, at),
|
||||||
CRLF(ref m) => m.is_match_at(haystack, at),
|
CRLF(ref m) => m.is_match_at(haystack, at),
|
||||||
Word(ref m) => m.is_match_at(haystack, at),
|
Word(ref m) => m.is_match_at(haystack, at),
|
||||||
}
|
}
|
||||||
@@ -597,6 +670,7 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.shortest_match(haystack),
|
Standard(ref m) => m.shortest_match(haystack),
|
||||||
|
MultiLiteral(ref m) => m.shortest_match(haystack),
|
||||||
CRLF(ref m) => m.shortest_match(haystack),
|
CRLF(ref m) => m.shortest_match(haystack),
|
||||||
Word(ref m) => m.shortest_match(haystack),
|
Word(ref m) => m.shortest_match(haystack),
|
||||||
}
|
}
|
||||||
@@ -610,6 +684,7 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.shortest_match_at(haystack, at),
|
Standard(ref m) => m.shortest_match_at(haystack, at),
|
||||||
|
MultiLiteral(ref m) => m.shortest_match_at(haystack, at),
|
||||||
CRLF(ref m) => m.shortest_match_at(haystack, at),
|
CRLF(ref m) => m.shortest_match_at(haystack, at),
|
||||||
Word(ref m) => m.shortest_match_at(haystack, at),
|
Word(ref m) => m.shortest_match_at(haystack, at),
|
||||||
}
|
}
|
||||||
@@ -710,7 +785,9 @@ impl Matcher for StandardMatcher {
|
|||||||
at: usize,
|
at: usize,
|
||||||
caps: &mut RegexCaptures,
|
caps: &mut RegexCaptures,
|
||||||
) -> Result<bool, NoError> {
|
) -> Result<bool, NoError> {
|
||||||
Ok(self.regex.captures_read_at(&mut caps.locs, haystack, at).is_some())
|
Ok(self.regex.captures_read_at(
|
||||||
|
&mut caps.locations_mut(), haystack, at,
|
||||||
|
).is_some())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn shortest_match_at(
|
fn shortest_match_at(
|
||||||
@@ -737,7 +814,15 @@ impl Matcher for StandardMatcher {
|
|||||||
/// index of the group using the corresponding matcher's `capture_index`
|
/// index of the group using the corresponding matcher's `capture_index`
|
||||||
/// method, and then use that index with `RegexCaptures::get`.
|
/// method, and then use that index with `RegexCaptures::get`.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct RegexCaptures {
|
pub struct RegexCaptures(RegexCapturesImp);
|
||||||
|
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
enum RegexCapturesImp {
|
||||||
|
AhoCorasick {
|
||||||
|
/// The start and end of the match, corresponding to capture group 0.
|
||||||
|
mat: Option<Match>,
|
||||||
|
},
|
||||||
|
Regex {
|
||||||
/// Where the locations are stored.
|
/// Where the locations are stored.
|
||||||
locs: CaptureLocations,
|
locs: CaptureLocations,
|
||||||
/// These captures behave as if the capturing groups begin at the given
|
/// These captures behave as if the capturing groups begin at the given
|
||||||
@@ -745,46 +830,68 @@ pub struct RegexCaptures {
|
|||||||
/// indexed like normal.
|
/// indexed like normal.
|
||||||
///
|
///
|
||||||
/// This is useful when building matchers that wrap arbitrary regular
|
/// This is useful when building matchers that wrap arbitrary regular
|
||||||
/// expressions. For example, `WordMatcher` takes an existing regex `re`
|
/// expressions. For example, `WordMatcher` takes an existing regex
|
||||||
/// and creates `(?:^|\W)(re)(?:$|\W)`, but hides the fact that the regex
|
/// `re` and creates `(?:^|\W)(re)(?:$|\W)`, but hides the fact that
|
||||||
/// has been wrapped from the caller. In order to do this, the matcher
|
/// the regex has been wrapped from the caller. In order to do this,
|
||||||
/// and the capturing groups must behave as if `(re)` is the `0`th capture
|
/// the matcher and the capturing groups must behave as if `(re)` is
|
||||||
/// group.
|
/// the `0`th capture group.
|
||||||
offset: usize,
|
offset: usize,
|
||||||
/// When enable, the end of a match has `\r` stripped from it, if one
|
/// When enable, the end of a match has `\r` stripped from it, if one
|
||||||
/// exists.
|
/// exists.
|
||||||
strip_crlf: bool,
|
strip_crlf: bool,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Captures for RegexCaptures {
|
impl Captures for RegexCaptures {
|
||||||
fn len(&self) -> usize {
|
fn len(&self) -> usize {
|
||||||
self.locs.len().checked_sub(self.offset).unwrap()
|
match self.0 {
|
||||||
|
RegexCapturesImp::AhoCorasick { .. } => 1,
|
||||||
|
RegexCapturesImp::Regex { ref locs, offset, .. } => {
|
||||||
|
locs.len().checked_sub(offset).unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get(&self, i: usize) -> Option<Match> {
|
fn get(&self, i: usize) -> Option<Match> {
|
||||||
if !self.strip_crlf {
|
match self.0 {
|
||||||
let actual = i.checked_add(self.offset).unwrap();
|
RegexCapturesImp::AhoCorasick { mat, .. } => {
|
||||||
return self.locs.pos(actual).map(|(s, e)| Match::new(s, e));
|
if i == 0 {
|
||||||
|
mat
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
RegexCapturesImp::Regex { ref locs, offset, strip_crlf } => {
|
||||||
|
if !strip_crlf {
|
||||||
|
let actual = i.checked_add(offset).unwrap();
|
||||||
|
return locs.pos(actual).map(|(s, e)| Match::new(s, e));
|
||||||
}
|
}
|
||||||
|
|
||||||
// currently don't support capture offsetting with CRLF stripping
|
// currently don't support capture offsetting with CRLF
|
||||||
assert_eq!(self.offset, 0);
|
// stripping
|
||||||
let m = match self.locs.pos(i).map(|(s, e)| Match::new(s, e)) {
|
assert_eq!(offset, 0);
|
||||||
|
let m = match locs.pos(i).map(|(s, e)| Match::new(s, e)) {
|
||||||
None => return None,
|
None => return None,
|
||||||
Some(m) => m,
|
Some(m) => m,
|
||||||
};
|
};
|
||||||
// If the end position of this match corresponds to the end position
|
// If the end position of this match corresponds to the end
|
||||||
// of the overall match, then we apply our CRLF stripping. Otherwise,
|
// position of the overall match, then we apply our CRLF
|
||||||
// we cannot assume stripping is correct.
|
// stripping. Otherwise, we cannot assume stripping is correct.
|
||||||
if i == 0 || m.end() == self.locs.pos(0).unwrap().1 {
|
if i == 0 || m.end() == locs.pos(0).unwrap().1 {
|
||||||
Some(m.with_end(m.end() - 1))
|
Some(m.with_end(m.end() - 1))
|
||||||
} else {
|
} else {
|
||||||
Some(m)
|
Some(m)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl RegexCaptures {
|
impl RegexCaptures {
|
||||||
|
pub(crate) fn simple() -> RegexCaptures {
|
||||||
|
RegexCaptures(RegexCapturesImp::AhoCorasick { mat: None })
|
||||||
|
}
|
||||||
|
|
||||||
pub(crate) fn new(locs: CaptureLocations) -> RegexCaptures {
|
pub(crate) fn new(locs: CaptureLocations) -> RegexCaptures {
|
||||||
RegexCaptures::with_offset(locs, 0)
|
RegexCaptures::with_offset(locs, 0)
|
||||||
}
|
}
|
||||||
@@ -793,15 +900,53 @@ impl RegexCaptures {
|
|||||||
locs: CaptureLocations,
|
locs: CaptureLocations,
|
||||||
offset: usize,
|
offset: usize,
|
||||||
) -> RegexCaptures {
|
) -> RegexCaptures {
|
||||||
RegexCaptures { locs, offset, strip_crlf: false }
|
RegexCaptures(RegexCapturesImp::Regex {
|
||||||
|
locs, offset, strip_crlf: false,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn locations(&mut self) -> &mut CaptureLocations {
|
pub(crate) fn locations(&self) -> &CaptureLocations {
|
||||||
&mut self.locs
|
match self.0 {
|
||||||
|
RegexCapturesImp::AhoCorasick { .. } => {
|
||||||
|
panic!("getting locations for simple captures is invalid")
|
||||||
|
}
|
||||||
|
RegexCapturesImp::Regex { ref locs, .. } => {
|
||||||
|
locs
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn locations_mut(&mut self) -> &mut CaptureLocations {
|
||||||
|
match self.0 {
|
||||||
|
RegexCapturesImp::AhoCorasick { .. } => {
|
||||||
|
panic!("getting locations for simple captures is invalid")
|
||||||
|
}
|
||||||
|
RegexCapturesImp::Regex { ref mut locs, .. } => {
|
||||||
|
locs
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn strip_crlf(&mut self, yes: bool) {
|
pub(crate) fn strip_crlf(&mut self, yes: bool) {
|
||||||
self.strip_crlf = yes;
|
match self.0 {
|
||||||
|
RegexCapturesImp::AhoCorasick { .. } => {
|
||||||
|
panic!("setting strip_crlf for simple captures is invalid")
|
||||||
|
}
|
||||||
|
RegexCapturesImp::Regex { ref mut strip_crlf, .. } => {
|
||||||
|
*strip_crlf = yes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn set_simple(&mut self, one: Option<Match>) {
|
||||||
|
match self.0 {
|
||||||
|
RegexCapturesImp::AhoCorasick { ref mut mat } => {
|
||||||
|
*mat = one;
|
||||||
|
}
|
||||||
|
RegexCapturesImp::Regex { .. } => {
|
||||||
|
panic!("setting simple captures for regex is invalid")
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
127
grep-regex/src/multi.rs
Normal file
127
grep-regex/src/multi.rs
Normal file
@@ -0,0 +1,127 @@
|
|||||||
|
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
|
||||||
|
use grep_matcher::{Matcher, Match, NoError};
|
||||||
|
use regex_syntax::hir::Hir;
|
||||||
|
|
||||||
|
use error::Error;
|
||||||
|
use matcher::RegexCaptures;
|
||||||
|
|
||||||
|
/// A matcher for an alternation of literals.
|
||||||
|
///
|
||||||
|
/// Ideally, this optimization would be pushed down into the regex engine, but
|
||||||
|
/// making this work correctly there would require quite a bit of refactoring.
|
||||||
|
/// Moreover, doing it one layer above lets us do thing like, "if we
|
||||||
|
/// specifically only want to search for literals, then don't bother with
|
||||||
|
/// regex parsing at all."
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub struct MultiLiteralMatcher {
|
||||||
|
/// The Aho-Corasick automaton.
|
||||||
|
ac: AhoCorasick,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl MultiLiteralMatcher {
|
||||||
|
/// Create a new multi-literal matcher from the given literals.
|
||||||
|
pub fn new<B: AsRef<[u8]>>(
|
||||||
|
literals: &[B],
|
||||||
|
) -> Result<MultiLiteralMatcher, Error> {
|
||||||
|
let ac = AhoCorasickBuilder::new()
|
||||||
|
.match_kind(MatchKind::LeftmostFirst)
|
||||||
|
.auto_configure(literals)
|
||||||
|
.build_with_size::<usize, _, _>(literals)
|
||||||
|
.map_err(Error::regex)?;
|
||||||
|
Ok(MultiLiteralMatcher { ac })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Matcher for MultiLiteralMatcher {
|
||||||
|
type Captures = RegexCaptures;
|
||||||
|
type Error = NoError;
|
||||||
|
|
||||||
|
fn find_at(
|
||||||
|
&self,
|
||||||
|
haystack: &[u8],
|
||||||
|
at: usize,
|
||||||
|
) -> Result<Option<Match>, NoError> {
|
||||||
|
match self.ac.find(&haystack[at..]) {
|
||||||
|
None => Ok(None),
|
||||||
|
Some(m) => Ok(Some(Match::new(at + m.start(), at + m.end()))),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
|
||||||
|
Ok(RegexCaptures::simple())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn capture_count(&self) -> usize {
|
||||||
|
1
|
||||||
|
}
|
||||||
|
|
||||||
|
fn capture_index(&self, _: &str) -> Option<usize> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
fn captures_at(
|
||||||
|
&self,
|
||||||
|
haystack: &[u8],
|
||||||
|
at: usize,
|
||||||
|
caps: &mut RegexCaptures,
|
||||||
|
) -> Result<bool, NoError> {
|
||||||
|
caps.set_simple(None);
|
||||||
|
let mat = self.find_at(haystack, at)?;
|
||||||
|
caps.set_simple(mat);
|
||||||
|
Ok(mat.is_some())
|
||||||
|
}
|
||||||
|
|
||||||
|
// We specifically do not implement other methods like find_iter. Namely,
|
||||||
|
// the iter methods are guaranteed to be correct by virtue of implementing
|
||||||
|
// find_at above.
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Alternation literals checks if the given HIR is a simple alternation of
|
||||||
|
/// literals, and if so, returns them. Otherwise, this returns None.
|
||||||
|
pub fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
|
||||||
|
use regex_syntax::hir::{HirKind, Literal};
|
||||||
|
|
||||||
|
// This is pretty hacky, but basically, if `is_alternation_literal` is
|
||||||
|
// true, then we can make several assumptions about the structure of our
|
||||||
|
// HIR. This is what justifies the `unreachable!` statements below.
|
||||||
|
|
||||||
|
if !expr.is_alternation_literal() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let alts = match *expr.kind() {
|
||||||
|
HirKind::Alternation(ref alts) => alts,
|
||||||
|
_ => return None, // one literal isn't worth it
|
||||||
|
};
|
||||||
|
|
||||||
|
let extendlit = |lit: &Literal, dst: &mut Vec<u8>| {
|
||||||
|
match *lit {
|
||||||
|
Literal::Unicode(c) => {
|
||||||
|
let mut buf = [0; 4];
|
||||||
|
dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
|
||||||
|
}
|
||||||
|
Literal::Byte(b) => {
|
||||||
|
dst.push(b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut lits = vec![];
|
||||||
|
for alt in alts {
|
||||||
|
let mut lit = vec![];
|
||||||
|
match *alt.kind() {
|
||||||
|
HirKind::Empty => {}
|
||||||
|
HirKind::Literal(ref x) => extendlit(x, &mut lit),
|
||||||
|
HirKind::Concat(ref exprs) => {
|
||||||
|
for e in exprs {
|
||||||
|
match *e.kind() {
|
||||||
|
HirKind::Literal(ref x) => extendlit(x, &mut lit),
|
||||||
|
_ => unreachable!("expected literal, got {:?}", e),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => unreachable!("expected literal or concat, got {:?}", alt),
|
||||||
|
}
|
||||||
|
lits.push(lit);
|
||||||
|
}
|
||||||
|
Some(lits)
|
||||||
|
}
|
@@ -1,6 +1,6 @@
|
|||||||
use grep_matcher::ByteSet;
|
use grep_matcher::ByteSet;
|
||||||
use regex_syntax::hir::{self, Hir, HirKind};
|
use regex_syntax::hir::{self, Hir, HirKind};
|
||||||
use utf8_ranges::Utf8Sequences;
|
use regex_syntax::utf8::Utf8Sequences;
|
||||||
|
|
||||||
/// Return a confirmed set of non-matching bytes from the given expression.
|
/// Return a confirmed set of non-matching bytes from the given expression.
|
||||||
pub fn non_matching_bytes(expr: &Hir) -> ByteSet {
|
pub fn non_matching_bytes(expr: &Hir) -> ByteSet {
|
||||||
|
@@ -78,7 +78,7 @@ impl Matcher for WordMatcher {
|
|||||||
// if it's worth it.
|
// if it's worth it.
|
||||||
|
|
||||||
let cell = self.locs.get_or(|| {
|
let cell = self.locs.get_or(|| {
|
||||||
Box::new(RefCell::new(self.regex.capture_locations()))
|
RefCell::new(self.regex.capture_locations())
|
||||||
});
|
});
|
||||||
let mut caps = cell.borrow_mut();
|
let mut caps = cell.borrow_mut();
|
||||||
self.regex.captures_read_at(&mut caps, haystack, at);
|
self.regex.captures_read_at(&mut caps, haystack, at);
|
||||||
@@ -103,7 +103,9 @@ impl Matcher for WordMatcher {
|
|||||||
at: usize,
|
at: usize,
|
||||||
caps: &mut RegexCaptures,
|
caps: &mut RegexCaptures,
|
||||||
) -> Result<bool, NoError> {
|
) -> Result<bool, NoError> {
|
||||||
let r = self.regex.captures_read_at(caps.locations(), haystack, at);
|
let r = self.regex.captures_read_at(
|
||||||
|
caps.locations_mut(), haystack, at,
|
||||||
|
);
|
||||||
Ok(r.is_some())
|
Ok(r.is_some())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "grep-searcher"
|
name = "grep-searcher"
|
||||||
version = "0.1.3" #:version
|
version = "0.1.6" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
Fast line oriented regex searching as a library.
|
Fast line oriented regex searching as a library.
|
||||||
@@ -13,16 +13,16 @@ keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
|||||||
license = "Unlicense/MIT"
|
license = "Unlicense/MIT"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
bstr = { version = "0.1.2", default-features = false, features = ["std"] }
|
bstr = { version = "0.2.0", default-features = false, features = ["std"] }
|
||||||
bytecount = "0.5"
|
bytecount = "0.6"
|
||||||
encoding_rs = "0.8.14"
|
encoding_rs = "0.8.14"
|
||||||
encoding_rs_io = "0.1.4"
|
encoding_rs_io = "0.1.6"
|
||||||
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
|
grep-matcher = { version = "0.1.2", path = "../grep-matcher" }
|
||||||
log = "0.4.5"
|
log = "0.4.5"
|
||||||
memmap = "0.7"
|
memmap = "0.7"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
grep-regex = { version = "0.1.1", path = "../grep-regex" }
|
grep-regex = { version = "0.1.3", path = "../grep-regex" }
|
||||||
regex = "1.1"
|
regex = "1.1"
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
|
@@ -17,7 +17,7 @@ fn main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn example() -> Result<(), Box<Error>> {
|
fn example() -> Result<(), Box<dyn Error>> {
|
||||||
let pattern = match env::args().nth(1) {
|
let pattern = match env::args().nth(1) {
|
||||||
Some(pattern) => pattern,
|
Some(pattern) => pattern,
|
||||||
None => return Err(From::from(format!(
|
None => return Err(From::from(format!(
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
use std::cmp;
|
use std::cmp;
|
||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
use bstr::{BStr, BString};
|
use bstr::ByteSlice;
|
||||||
|
|
||||||
/// The default buffer capacity that we use for the line buffer.
|
/// The default buffer capacity that we use for the line buffer.
|
||||||
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
|
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
|
||||||
@@ -122,7 +122,7 @@ impl LineBufferBuilder {
|
|||||||
pub fn build(&self) -> LineBuffer {
|
pub fn build(&self) -> LineBuffer {
|
||||||
LineBuffer {
|
LineBuffer {
|
||||||
config: self.config,
|
config: self.config,
|
||||||
buf: BString::from(vec![0; self.config.capacity]),
|
buf: vec![0; self.config.capacity],
|
||||||
pos: 0,
|
pos: 0,
|
||||||
last_lineterm: 0,
|
last_lineterm: 0,
|
||||||
end: 0,
|
end: 0,
|
||||||
@@ -254,13 +254,14 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
|
|||||||
|
|
||||||
/// Return the contents of this buffer.
|
/// Return the contents of this buffer.
|
||||||
pub fn buffer(&self) -> &[u8] {
|
pub fn buffer(&self) -> &[u8] {
|
||||||
self.line_buffer.buffer().as_bytes()
|
self.line_buffer.buffer()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the underlying buffer as a byte string. Used for tests only.
|
/// Return the buffer as a BStr, used for convenient equality checking
|
||||||
|
/// in tests only.
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
fn bstr(&self) -> &BStr {
|
fn bstr(&self) -> &::bstr::BStr {
|
||||||
self.line_buffer.buffer()
|
self.buffer().as_bstr()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Consume the number of bytes provided. This must be less than or equal
|
/// Consume the number of bytes provided. This must be less than or equal
|
||||||
@@ -289,7 +290,7 @@ pub struct LineBuffer {
|
|||||||
/// The configuration of this buffer.
|
/// The configuration of this buffer.
|
||||||
config: Config,
|
config: Config,
|
||||||
/// The primary buffer with which to hold data.
|
/// The primary buffer with which to hold data.
|
||||||
buf: BString,
|
buf: Vec<u8>,
|
||||||
/// The current position of this buffer. This is always a valid sliceable
|
/// The current position of this buffer. This is always a valid sliceable
|
||||||
/// index into `buf`, and its maximum value is the length of `buf`.
|
/// index into `buf`, and its maximum value is the length of `buf`.
|
||||||
pos: usize,
|
pos: usize,
|
||||||
@@ -317,6 +318,14 @@ pub struct LineBuffer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl LineBuffer {
|
impl LineBuffer {
|
||||||
|
/// Set the binary detection method used on this line buffer.
|
||||||
|
///
|
||||||
|
/// This permits dynamically changing the binary detection strategy on
|
||||||
|
/// an existing line buffer without needing to create a new one.
|
||||||
|
pub fn set_binary_detection(&mut self, binary: BinaryDetection) {
|
||||||
|
self.config.binary = binary;
|
||||||
|
}
|
||||||
|
|
||||||
/// Reset this buffer, such that it can be used with a new reader.
|
/// Reset this buffer, such that it can be used with a new reader.
|
||||||
fn clear(&mut self) {
|
fn clear(&mut self) {
|
||||||
self.pos = 0;
|
self.pos = 0;
|
||||||
@@ -344,13 +353,13 @@ impl LineBuffer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Return the contents of this buffer.
|
/// Return the contents of this buffer.
|
||||||
fn buffer(&self) -> &BStr {
|
fn buffer(&self) -> &[u8] {
|
||||||
&self.buf[self.pos..self.last_lineterm]
|
&self.buf[self.pos..self.last_lineterm]
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the contents of the free space beyond the end of the buffer as
|
/// Return the contents of the free space beyond the end of the buffer as
|
||||||
/// a mutable slice.
|
/// a mutable slice.
|
||||||
fn free_buffer(&mut self) -> &mut BStr {
|
fn free_buffer(&mut self) -> &mut [u8] {
|
||||||
&mut self.buf[self.end..]
|
&mut self.buf[self.end..]
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -473,7 +482,7 @@ impl LineBuffer {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let roll_len = self.end - self.pos;
|
let roll_len = self.end - self.pos;
|
||||||
self.buf.copy_within(self.pos.., 0);
|
self.buf.copy_within_str(self.pos..self.end, 0);
|
||||||
self.pos = 0;
|
self.pos = 0;
|
||||||
self.last_lineterm = roll_len;
|
self.last_lineterm = roll_len;
|
||||||
self.end = roll_len;
|
self.end = roll_len;
|
||||||
@@ -511,7 +520,7 @@ impl LineBuffer {
|
|||||||
|
|
||||||
/// Replaces `src` with `replacement` in bytes, and return the offset of the
|
/// Replaces `src` with `replacement` in bytes, and return the offset of the
|
||||||
/// first replacement, if one exists.
|
/// first replacement, if one exists.
|
||||||
fn replace_bytes(bytes: &mut BStr, src: u8, replacement: u8) -> Option<usize> {
|
fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
|
||||||
if src == replacement {
|
if src == replacement {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
@@ -534,7 +543,7 @@ fn replace_bytes(bytes: &mut BStr, src: u8, replacement: u8) -> Option<usize> {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::str;
|
use std::str;
|
||||||
use bstr::BString;
|
use bstr::{ByteSlice, ByteVec};
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
const SHERLOCK: &'static str = "\
|
const SHERLOCK: &'static str = "\
|
||||||
@@ -555,7 +564,7 @@ and exhibited clearly, with a label attached.\
|
|||||||
src: u8,
|
src: u8,
|
||||||
replacement: u8,
|
replacement: u8,
|
||||||
) -> (String, Option<usize>) {
|
) -> (String, Option<usize>) {
|
||||||
let mut dst = BString::from(slice);
|
let mut dst = Vec::from(slice);
|
||||||
let result = replace_bytes(&mut dst, src, replacement);
|
let result = replace_bytes(&mut dst, src, replacement);
|
||||||
(dst.into_string().unwrap(), result)
|
(dst.into_string().unwrap(), result)
|
||||||
}
|
}
|
||||||
@@ -669,12 +678,12 @@ and exhibited clearly, with a label attached.\
|
|||||||
let mut linebuf = LineBufferBuilder::new().capacity(1).build();
|
let mut linebuf = LineBufferBuilder::new().capacity(1).build();
|
||||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||||
|
|
||||||
let mut got = BString::new();
|
let mut got = vec![];
|
||||||
while rdr.fill().unwrap() {
|
while rdr.fill().unwrap() {
|
||||||
got.push(rdr.buffer());
|
got.push_str(rdr.buffer());
|
||||||
rdr.consume_all();
|
rdr.consume_all();
|
||||||
}
|
}
|
||||||
assert_eq!(bytes, got);
|
assert_eq!(bytes, got.as_bstr());
|
||||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||||
assert_eq!(rdr.binary_byte_offset(), None);
|
assert_eq!(rdr.binary_byte_offset(), None);
|
||||||
}
|
}
|
||||||
|
@@ -2,7 +2,7 @@
|
|||||||
A collection of routines for performing operations on lines.
|
A collection of routines for performing operations on lines.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use bstr::B;
|
use bstr::ByteSlice;
|
||||||
use bytecount;
|
use bytecount;
|
||||||
use grep_matcher::{LineTerminator, Match};
|
use grep_matcher::{LineTerminator, Match};
|
||||||
|
|
||||||
@@ -85,7 +85,7 @@ impl LineStep {
|
|||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> {
|
fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> {
|
||||||
bytes = &bytes[..self.end];
|
bytes = &bytes[..self.end];
|
||||||
match B(&bytes[self.pos..]).find_byte(self.line_term) {
|
match bytes[self.pos..].find_byte(self.line_term) {
|
||||||
None => {
|
None => {
|
||||||
if self.pos < bytes.len() {
|
if self.pos < bytes.len() {
|
||||||
let m = (self.pos, bytes.len());
|
let m = (self.pos, bytes.len());
|
||||||
@@ -135,14 +135,14 @@ pub fn locate(
|
|||||||
line_term: u8,
|
line_term: u8,
|
||||||
range: Match,
|
range: Match,
|
||||||
) -> Match {
|
) -> Match {
|
||||||
let line_start = B(&bytes[..range.start()])
|
let line_start = bytes[..range.start()]
|
||||||
.rfind_byte(line_term)
|
.rfind_byte(line_term)
|
||||||
.map_or(0, |i| i + 1);
|
.map_or(0, |i| i + 1);
|
||||||
let line_end =
|
let line_end =
|
||||||
if range.end() > line_start && bytes[range.end() - 1] == line_term {
|
if range.end() > line_start && bytes[range.end() - 1] == line_term {
|
||||||
range.end()
|
range.end()
|
||||||
} else {
|
} else {
|
||||||
B(&bytes[range.end()..])
|
bytes[range.end()..]
|
||||||
.find_byte(line_term)
|
.find_byte(line_term)
|
||||||
.map_or(bytes.len(), |i| range.end() + i + 1)
|
.map_or(bytes.len(), |i| range.end() + i + 1)
|
||||||
};
|
};
|
||||||
@@ -182,7 +182,7 @@ fn preceding_by_pos(
|
|||||||
pos -= 1;
|
pos -= 1;
|
||||||
}
|
}
|
||||||
loop {
|
loop {
|
||||||
match B(&bytes[..pos]).rfind_byte(line_term) {
|
match bytes[..pos].rfind_byte(line_term) {
|
||||||
None => {
|
None => {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
use std::cmp;
|
use std::cmp;
|
||||||
|
|
||||||
use bstr::B;
|
use bstr::ByteSlice;
|
||||||
|
|
||||||
use grep_matcher::{LineMatchKind, Matcher};
|
use grep_matcher::{LineMatchKind, Matcher};
|
||||||
use lines::{self, LineStep};
|
use lines::{self, LineStep};
|
||||||
@@ -90,6 +90,13 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
self.sink_matched(buf, range)
|
self.sink_matched(buf, range)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn binary_data(
|
||||||
|
&mut self,
|
||||||
|
binary_byte_offset: u64,
|
||||||
|
) -> Result<bool, S::Error> {
|
||||||
|
self.sink.binary_data(&self.searcher, binary_byte_offset)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn begin(&mut self) -> Result<bool, S::Error> {
|
pub fn begin(&mut self) -> Result<bool, S::Error> {
|
||||||
self.sink.begin(&self.searcher)
|
self.sink.begin(&self.searcher)
|
||||||
}
|
}
|
||||||
@@ -141,19 +148,28 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
consumed
|
consumed
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn detect_binary(&mut self, buf: &[u8], range: &Range) -> bool {
|
pub fn detect_binary(
|
||||||
|
&mut self,
|
||||||
|
buf: &[u8],
|
||||||
|
range: &Range,
|
||||||
|
) -> Result<bool, S::Error> {
|
||||||
if self.binary_byte_offset.is_some() {
|
if self.binary_byte_offset.is_some() {
|
||||||
return true;
|
return Ok(self.config.binary.quit_byte().is_some());
|
||||||
}
|
}
|
||||||
let binary_byte = match self.config.binary.0 {
|
let binary_byte = match self.config.binary.0 {
|
||||||
BinaryDetection::Quit(b) => b,
|
BinaryDetection::Quit(b) => b,
|
||||||
_ => return false,
|
BinaryDetection::Convert(b) => b,
|
||||||
|
_ => return Ok(false),
|
||||||
};
|
};
|
||||||
if let Some(i) = B(&buf[*range]).find_byte(binary_byte) {
|
if let Some(i) = buf[*range].find_byte(binary_byte) {
|
||||||
self.binary_byte_offset = Some(range.start() + i);
|
let offset = range.start() + i;
|
||||||
true
|
self.binary_byte_offset = Some(offset);
|
||||||
|
if !self.binary_data(offset as u64)? {
|
||||||
|
return Ok(true);
|
||||||
|
}
|
||||||
|
Ok(self.config.binary.quit_byte().is_some())
|
||||||
} else {
|
} else {
|
||||||
false
|
Ok(false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -416,7 +432,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
buf: &[u8],
|
buf: &[u8],
|
||||||
range: &Range,
|
range: &Range,
|
||||||
) -> Result<bool, S::Error> {
|
) -> Result<bool, S::Error> {
|
||||||
if self.binary && self.detect_binary(buf, range) {
|
if self.binary && self.detect_binary(buf, range)? {
|
||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
if !self.sink_break_context(range.start())? {
|
if !self.sink_break_context(range.start())? {
|
||||||
@@ -448,7 +464,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
buf: &[u8],
|
buf: &[u8],
|
||||||
range: &Range,
|
range: &Range,
|
||||||
) -> Result<bool, S::Error> {
|
) -> Result<bool, S::Error> {
|
||||||
if self.binary && self.detect_binary(buf, range) {
|
if self.binary && self.detect_binary(buf, range)? {
|
||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
self.count_lines(buf, range.start());
|
self.count_lines(buf, range.start());
|
||||||
@@ -478,7 +494,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
) -> Result<bool, S::Error> {
|
) -> Result<bool, S::Error> {
|
||||||
assert!(self.after_context_left >= 1);
|
assert!(self.after_context_left >= 1);
|
||||||
|
|
||||||
if self.binary && self.detect_binary(buf, range) {
|
if self.binary && self.detect_binary(buf, range)? {
|
||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
self.count_lines(buf, range.start());
|
self.count_lines(buf, range.start());
|
||||||
@@ -507,7 +523,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
buf: &[u8],
|
buf: &[u8],
|
||||||
range: &Range,
|
range: &Range,
|
||||||
) -> Result<bool, S::Error> {
|
) -> Result<bool, S::Error> {
|
||||||
if self.binary && self.detect_binary(buf, range) {
|
if self.binary && self.detect_binary(buf, range)? {
|
||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
self.count_lines(buf, range.start());
|
self.count_lines(buf, range.start());
|
||||||
|
@@ -51,6 +51,7 @@ where M: Matcher,
|
|||||||
fn fill(&mut self) -> Result<bool, S::Error> {
|
fn fill(&mut self) -> Result<bool, S::Error> {
|
||||||
assert!(self.rdr.buffer()[self.core.pos()..].is_empty());
|
assert!(self.rdr.buffer()[self.core.pos()..].is_empty());
|
||||||
|
|
||||||
|
let already_binary = self.rdr.binary_byte_offset().is_some();
|
||||||
let old_buf_len = self.rdr.buffer().len();
|
let old_buf_len = self.rdr.buffer().len();
|
||||||
let consumed = self.core.roll(self.rdr.buffer());
|
let consumed = self.core.roll(self.rdr.buffer());
|
||||||
self.rdr.consume(consumed);
|
self.rdr.consume(consumed);
|
||||||
@@ -58,7 +59,14 @@ where M: Matcher,
|
|||||||
Err(err) => return Err(S::Error::error_io(err)),
|
Err(err) => return Err(S::Error::error_io(err)),
|
||||||
Ok(didread) => didread,
|
Ok(didread) => didread,
|
||||||
};
|
};
|
||||||
if !didread || self.rdr.binary_byte_offset().is_some() {
|
if !already_binary {
|
||||||
|
if let Some(offset) = self.rdr.binary_byte_offset() {
|
||||||
|
if !self.core.binary_data(offset)? {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !didread || self.should_binary_quit() {
|
||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
// If rolling the buffer didn't result in consuming anything and if
|
// If rolling the buffer didn't result in consuming anything and if
|
||||||
@@ -71,6 +79,11 @@ where M: Matcher,
|
|||||||
}
|
}
|
||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn should_binary_quit(&self) -> bool {
|
||||||
|
self.rdr.binary_byte_offset().is_some()
|
||||||
|
&& self.config.binary.quit_byte().is_some()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
@@ -103,7 +116,7 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
|
|||||||
DEFAULT_BUFFER_CAPACITY,
|
DEFAULT_BUFFER_CAPACITY,
|
||||||
);
|
);
|
||||||
let binary_range = Range::new(0, binary_upto);
|
let binary_range = Range::new(0, binary_upto);
|
||||||
if !self.core.detect_binary(self.slice, &binary_range) {
|
if !self.core.detect_binary(self.slice, &binary_range)? {
|
||||||
while
|
while
|
||||||
!self.slice[self.core.pos()..].is_empty()
|
!self.slice[self.core.pos()..].is_empty()
|
||||||
&& self.core.match_by_line(self.slice)?
|
&& self.core.match_by_line(self.slice)?
|
||||||
@@ -155,7 +168,7 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
|
|||||||
DEFAULT_BUFFER_CAPACITY,
|
DEFAULT_BUFFER_CAPACITY,
|
||||||
);
|
);
|
||||||
let binary_range = Range::new(0, binary_upto);
|
let binary_range = Range::new(0, binary_upto);
|
||||||
if !self.core.detect_binary(self.slice, &binary_range) {
|
if !self.core.detect_binary(self.slice, &binary_range)? {
|
||||||
let mut keepgoing = true;
|
let mut keepgoing = true;
|
||||||
while !self.slice[self.core.pos()..].is_empty() && keepgoing {
|
while !self.slice[self.core.pos()..].is_empty() && keepgoing {
|
||||||
keepgoing = self.sink()?;
|
keepgoing = self.sink()?;
|
||||||
|
@@ -75,25 +75,41 @@ impl BinaryDetection {
|
|||||||
BinaryDetection(line_buffer::BinaryDetection::Quit(binary_byte))
|
BinaryDetection(line_buffer::BinaryDetection::Quit(binary_byte))
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(burntsushi): Figure out how to make binary conversion work. This
|
/// Binary detection is performed by looking for the given byte, and
|
||||||
// permits implementing GNU grep's default behavior, which is to zap NUL
|
/// replacing it with the line terminator configured on the searcher.
|
||||||
// bytes but still execute a search (if a match is detected, then GNU grep
|
/// (If the searcher is configured to use `CRLF` as the line terminator,
|
||||||
// stops and reports that a match was found but doesn't print the matching
|
/// then this byte is replaced by just `LF`.)
|
||||||
// line itself).
|
///
|
||||||
//
|
/// When searching is performed using a fixed size buffer, then the
|
||||||
// This behavior is pretty simple to implement using the line buffer (and
|
/// contents of that buffer are always searched for the presence of this
|
||||||
// in fact, it is already implemented and tested), since there's a fixed
|
/// byte and replaced with the line terminator. In effect, the caller is
|
||||||
// size buffer that we can easily write to. The issue arises when searching
|
/// guaranteed to never observe this byte while searching.
|
||||||
// a `&[u8]` (whether on the heap or via a memory map), since this isn't
|
///
|
||||||
// something we can easily write to.
|
/// When searching is performed with the entire contents mapped into
|
||||||
|
/// memory, then this setting has no effect and is ignored.
|
||||||
/// The given byte is searched in all contents read by the line buffer. If
|
pub fn convert(binary_byte: u8) -> BinaryDetection {
|
||||||
/// it occurs, then it is replaced by the line terminator. The line buffer
|
|
||||||
/// guarantees that this byte will never be observable by callers.
|
|
||||||
#[allow(dead_code)]
|
|
||||||
fn convert(binary_byte: u8) -> BinaryDetection {
|
|
||||||
BinaryDetection(line_buffer::BinaryDetection::Convert(binary_byte))
|
BinaryDetection(line_buffer::BinaryDetection::Convert(binary_byte))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// If this binary detection uses the "quit" strategy, then this returns
|
||||||
|
/// the byte that will cause a search to quit. In any other case, this
|
||||||
|
/// returns `None`.
|
||||||
|
pub fn quit_byte(&self) -> Option<u8> {
|
||||||
|
match self.0 {
|
||||||
|
line_buffer::BinaryDetection::Quit(b) => Some(b),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// If this binary detection uses the "convert" strategy, then this returns
|
||||||
|
/// the byte that will be replaced by the line terminator. In any other
|
||||||
|
/// case, this returns `None`.
|
||||||
|
pub fn convert_byte(&self) -> Option<u8> {
|
||||||
|
match self.0 {
|
||||||
|
line_buffer::BinaryDetection::Convert(b) => Some(b),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An encoding to use when searching.
|
/// An encoding to use when searching.
|
||||||
@@ -155,6 +171,8 @@ pub struct Config {
|
|||||||
/// An encoding that, when present, causes the searcher to transcode all
|
/// An encoding that, when present, causes the searcher to transcode all
|
||||||
/// input from the encoding to UTF-8.
|
/// input from the encoding to UTF-8.
|
||||||
encoding: Option<Encoding>,
|
encoding: Option<Encoding>,
|
||||||
|
/// Whether to do automatic transcoding based on a BOM or not.
|
||||||
|
bom_sniffing: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Config {
|
impl Default for Config {
|
||||||
@@ -171,6 +189,7 @@ impl Default for Config {
|
|||||||
binary: BinaryDetection::default(),
|
binary: BinaryDetection::default(),
|
||||||
multi_line: false,
|
multi_line: false,
|
||||||
encoding: None,
|
encoding: None,
|
||||||
|
bom_sniffing: true,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -303,12 +322,15 @@ impl SearcherBuilder {
|
|||||||
config.before_context = 0;
|
config.before_context = 0;
|
||||||
config.after_context = 0;
|
config.after_context = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut decode_builder = DecodeReaderBytesBuilder::new();
|
let mut decode_builder = DecodeReaderBytesBuilder::new();
|
||||||
decode_builder
|
decode_builder
|
||||||
.encoding(self.config.encoding.as_ref().map(|e| e.0))
|
.encoding(self.config.encoding.as_ref().map(|e| e.0))
|
||||||
.utf8_passthru(true)
|
.utf8_passthru(true)
|
||||||
.strip_bom(true)
|
.strip_bom(self.config.bom_sniffing)
|
||||||
.bom_override(true);
|
.bom_override(true)
|
||||||
|
.bom_sniffing(self.config.bom_sniffing);
|
||||||
|
|
||||||
Searcher {
|
Searcher {
|
||||||
config: config,
|
config: config,
|
||||||
decode_builder: decode_builder,
|
decode_builder: decode_builder,
|
||||||
@@ -506,12 +528,13 @@ impl SearcherBuilder {
|
|||||||
/// transcoding process encounters an error, then bytes are replaced with
|
/// transcoding process encounters an error, then bytes are replaced with
|
||||||
/// the Unicode replacement codepoint.
|
/// the Unicode replacement codepoint.
|
||||||
///
|
///
|
||||||
/// When no encoding is specified (the default), then BOM sniffing is used
|
/// When no encoding is specified (the default), then BOM sniffing is
|
||||||
/// to determine whether the source data is UTF-8 or UTF-16, and
|
/// used (if it's enabled, which it is, by default) to determine whether
|
||||||
/// transcoding will be performed automatically. If no BOM could be found,
|
/// the source data is UTF-8 or UTF-16, and transcoding will be performed
|
||||||
/// then the source data is searched _as if_ it were UTF-8. However, so
|
/// automatically. If no BOM could be found, then the source data is
|
||||||
/// long as the source data is at least ASCII compatible, then it is
|
/// searched _as if_ it were UTF-8. However, so long as the source data is
|
||||||
/// possible for a search to produce useful results.
|
/// at least ASCII compatible, then it is possible for a search to produce
|
||||||
|
/// useful results.
|
||||||
pub fn encoding(
|
pub fn encoding(
|
||||||
&mut self,
|
&mut self,
|
||||||
encoding: Option<Encoding>,
|
encoding: Option<Encoding>,
|
||||||
@@ -519,6 +542,23 @@ impl SearcherBuilder {
|
|||||||
self.config.encoding = encoding;
|
self.config.encoding = encoding;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Enable automatic transcoding based on BOM sniffing.
|
||||||
|
///
|
||||||
|
/// When this is enabled and an explicit encoding is not set, then this
|
||||||
|
/// searcher will try to detect the encoding of the bytes being searched
|
||||||
|
/// by sniffing its byte-order mark (BOM). In particular, when this is
|
||||||
|
/// enabled, UTF-16 encoded files will be searched seamlessly.
|
||||||
|
///
|
||||||
|
/// When this is disabled and if an explicit encoding is not set, then
|
||||||
|
/// the bytes from the source stream will be passed through unchanged,
|
||||||
|
/// including its BOM, if one is present.
|
||||||
|
///
|
||||||
|
/// This is enabled by default.
|
||||||
|
pub fn bom_sniffing(&mut self, yes: bool) -> &mut SearcherBuilder {
|
||||||
|
self.config.bom_sniffing = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A searcher executes searches over a haystack and writes results to a caller
|
/// A searcher executes searches over a haystack and writes results to a caller
|
||||||
@@ -715,6 +755,12 @@ impl Searcher {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the binary detection method used on this searcher.
|
||||||
|
pub fn set_binary_detection(&mut self, detection: BinaryDetection) {
|
||||||
|
self.config.binary = detection.clone();
|
||||||
|
self.line_buffer.borrow_mut().set_binary_detection(detection.0);
|
||||||
|
}
|
||||||
|
|
||||||
/// Check that the searcher's configuration and the matcher are consistent
|
/// Check that the searcher's configuration and the matcher are consistent
|
||||||
/// with each other.
|
/// with each other.
|
||||||
fn check_config<M: Matcher>(&self, matcher: M) -> Result<(), ConfigError> {
|
fn check_config<M: Matcher>(&self, matcher: M) -> Result<(), ConfigError> {
|
||||||
@@ -738,7 +784,8 @@ impl Searcher {
|
|||||||
|
|
||||||
/// Returns true if and only if the given slice needs to be transcoded.
|
/// Returns true if and only if the given slice needs to be transcoded.
|
||||||
fn slice_needs_transcoding(&self, slice: &[u8]) -> bool {
|
fn slice_needs_transcoding(&self, slice: &[u8]) -> bool {
|
||||||
self.config.encoding.is_some() || slice_has_utf16_bom(slice)
|
self.config.encoding.is_some()
|
||||||
|
|| (self.config.bom_sniffing && slice_has_utf16_bom(slice))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -753,6 +800,12 @@ impl Searcher {
|
|||||||
self.config.line_term
|
self.config.line_term
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the type of binary detection configured on this searcher.
|
||||||
|
#[inline]
|
||||||
|
pub fn binary_detection(&self) -> &BinaryDetection {
|
||||||
|
&self.config.binary
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if and only if this searcher is configured to invert its
|
/// Returns true if and only if this searcher is configured to invert its
|
||||||
/// search results. That is, matching lines are lines that do **not** match
|
/// search results. That is, matching lines are lines that do **not** match
|
||||||
/// the searcher's matcher.
|
/// the searcher's matcher.
|
||||||
|
@@ -1,3 +1,4 @@
|
|||||||
|
use std::error;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
@@ -49,9 +50,9 @@ impl SinkError for io::Error {
|
|||||||
|
|
||||||
/// A `Box<std::error::Error>` can be used as an error for `Sink`
|
/// A `Box<std::error::Error>` can be used as an error for `Sink`
|
||||||
/// implementations out of the box.
|
/// implementations out of the box.
|
||||||
impl SinkError for Box<::std::error::Error> {
|
impl SinkError for Box<dyn error::Error> {
|
||||||
fn error_message<T: fmt::Display>(message: T) -> Box<::std::error::Error> {
|
fn error_message<T: fmt::Display>(message: T) -> Box<dyn error::Error> {
|
||||||
Box::<::std::error::Error>::from(message.to_string())
|
Box::<dyn error::Error>::from(message.to_string())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -167,6 +168,28 @@ pub trait Sink {
|
|||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// This method is called whenever binary detection is enabled and binary
|
||||||
|
/// data is found. If binary data is found, then this is called at least
|
||||||
|
/// once for the first occurrence with the absolute byte offset at which
|
||||||
|
/// the binary data begins.
|
||||||
|
///
|
||||||
|
/// If this returns `true`, then searching continues. If this returns
|
||||||
|
/// `false`, then searching is stopped immediately and `finish` is called.
|
||||||
|
///
|
||||||
|
/// If this returns an error, then searching is stopped immediately,
|
||||||
|
/// `finish` is not called and the error is bubbled back up to the caller
|
||||||
|
/// of the searcher.
|
||||||
|
///
|
||||||
|
/// By default, it does nothing and returns `true`.
|
||||||
|
#[inline]
|
||||||
|
fn binary_data(
|
||||||
|
&mut self,
|
||||||
|
_searcher: &Searcher,
|
||||||
|
_binary_byte_offset: u64,
|
||||||
|
) -> Result<bool, Self::Error> {
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
/// This method is called when a search has begun, before any search is
|
/// This method is called when a search has begun, before any search is
|
||||||
/// executed. By default, this does nothing.
|
/// executed. By default, this does nothing.
|
||||||
///
|
///
|
||||||
@@ -228,6 +251,15 @@ impl<'a, S: Sink> Sink for &'a mut S {
|
|||||||
(**self).context_break(searcher)
|
(**self).context_break(searcher)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn binary_data(
|
||||||
|
&mut self,
|
||||||
|
searcher: &Searcher,
|
||||||
|
binary_byte_offset: u64,
|
||||||
|
) -> Result<bool, S::Error> {
|
||||||
|
(**self).binary_data(searcher, binary_byte_offset)
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn begin(
|
fn begin(
|
||||||
&mut self,
|
&mut self,
|
||||||
@@ -275,6 +307,15 @@ impl<S: Sink + ?Sized> Sink for Box<S> {
|
|||||||
(**self).context_break(searcher)
|
(**self).context_break(searcher)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn binary_data(
|
||||||
|
&mut self,
|
||||||
|
searcher: &Searcher,
|
||||||
|
binary_byte_offset: u64,
|
||||||
|
) -> Result<bool, S::Error> {
|
||||||
|
(**self).binary_data(searcher, binary_byte_offset)
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
fn begin(
|
fn begin(
|
||||||
&mut self,
|
&mut self,
|
||||||
|
@@ -1,7 +1,7 @@
|
|||||||
use std::io::{self, Write};
|
use std::io::{self, Write};
|
||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
use bstr::B;
|
use bstr::ByteSlice;
|
||||||
use grep_matcher::{
|
use grep_matcher::{
|
||||||
LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError,
|
LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError,
|
||||||
};
|
};
|
||||||
@@ -94,7 +94,7 @@ impl Matcher for RegexMatcher {
|
|||||||
}
|
}
|
||||||
// Make it interesting and return the last byte in the current
|
// Make it interesting and return the last byte in the current
|
||||||
// line.
|
// line.
|
||||||
let i = B(haystack)
|
let i = haystack
|
||||||
.find_byte(self.line_term.unwrap().as_byte())
|
.find_byte(self.line_term.unwrap().as_byte())
|
||||||
.map(|i| i)
|
.map(|i| i)
|
||||||
.unwrap_or(haystack.len() - 1);
|
.unwrap_or(haystack.len() - 1);
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "grep"
|
name = "grep"
|
||||||
version = "0.2.3" #:version
|
version = "0.2.4" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
Fast line oriented regex searching as a library.
|
Fast line oriented regex searching as a library.
|
||||||
@@ -13,12 +13,12 @@ keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
|||||||
license = "Unlicense/MIT"
|
license = "Unlicense/MIT"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
grep-cli = { version = "0.1.1", path = "../grep-cli" }
|
grep-cli = { version = "0.1.2", path = "../grep-cli" }
|
||||||
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
|
grep-matcher = { version = "0.1.2", path = "../grep-matcher" }
|
||||||
grep-pcre2 = { version = "0.1.2", path = "../grep-pcre2", optional = true }
|
grep-pcre2 = { version = "0.1.3", path = "../grep-pcre2", optional = true }
|
||||||
grep-printer = { version = "0.1.1", path = "../grep-printer" }
|
grep-printer = { version = "0.1.2", path = "../grep-printer" }
|
||||||
grep-regex = { version = "0.1.1", path = "../grep-regex" }
|
grep-regex = { version = "0.1.3", path = "../grep-regex" }
|
||||||
grep-searcher = { version = "0.1.1", path = "../grep-searcher" }
|
grep-searcher = { version = "0.1.4", path = "../grep-searcher" }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
termcolor = "1.0.4"
|
termcolor = "1.0.4"
|
||||||
|
@@ -21,7 +21,7 @@ fn main() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn try_main() -> Result<(), Box<Error>> {
|
fn try_main() -> Result<(), Box<dyn Error>> {
|
||||||
let mut args: Vec<OsString> = env::args_os().collect();
|
let mut args: Vec<OsString> = env::args_os().collect();
|
||||||
if args.len() < 2 {
|
if args.len() < 2 {
|
||||||
return Err("Usage: simplegrep <pattern> [<path> ...]".into());
|
return Err("Usage: simplegrep <pattern> [<path> ...]".into());
|
||||||
@@ -32,7 +32,7 @@ fn try_main() -> Result<(), Box<Error>> {
|
|||||||
search(cli::pattern_from_os(&args[1])?, &args[2..])
|
search(cli::pattern_from_os(&args[1])?, &args[2..])
|
||||||
}
|
}
|
||||||
|
|
||||||
fn search(pattern: &str, paths: &[OsString]) -> Result<(), Box<Error>> {
|
fn search(pattern: &str, paths: &[OsString]) -> Result<(), Box<dyn Error>> {
|
||||||
let matcher = RegexMatcher::new_line_matcher(&pattern)?;
|
let matcher = RegexMatcher::new_line_matcher(&pattern)?;
|
||||||
let mut searcher = SearcherBuilder::new()
|
let mut searcher = SearcherBuilder::new()
|
||||||
.binary_detection(BinaryDetection::quit(b'\x00'))
|
.binary_detection(BinaryDetection::quit(b'\x00'))
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "ignore"
|
name = "ignore"
|
||||||
version = "0.4.6" #:version
|
version = "0.4.11" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
A fast library for efficiently matching ignore files such as `.gitignore`
|
A fast library for efficiently matching ignore files such as `.gitignore`
|
||||||
@@ -18,21 +18,18 @@ name = "ignore"
|
|||||||
bench = false
|
bench = false
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
crossbeam-channel = "0.3.6"
|
crossbeam-channel = "0.4.0"
|
||||||
globset = { version = "0.4.2", path = "../globset" }
|
globset = { version = "0.4.3", path = "../globset" }
|
||||||
lazy_static = "1.1"
|
lazy_static = "1.1"
|
||||||
log = "0.4.5"
|
log = "0.4.5"
|
||||||
memchr = "2.1"
|
memchr = "2.1"
|
||||||
regex = "1.1"
|
regex = "1.1"
|
||||||
same-file = "1.0.4"
|
same-file = "1.0.4"
|
||||||
thread_local = "0.3.6"
|
thread_local = "1"
|
||||||
walkdir = "2.2.7"
|
walkdir = "2.2.7"
|
||||||
|
|
||||||
[target.'cfg(windows)'.dependencies.winapi-util]
|
[target.'cfg(windows)'.dependencies.winapi-util]
|
||||||
version = "0.1.2"
|
version = "0.1.2"
|
||||||
|
|
||||||
[dev-dependencies]
|
|
||||||
tempfile = "3.0.5"
|
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
simd-accel = ["globset/simd-accel"]
|
simd-accel = ["globset/simd-accel"]
|
||||||
|
@@ -14,13 +14,13 @@
|
|||||||
// well.
|
// well.
|
||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::ffi::{OsString, OsStr};
|
use std::ffi::{OsStr, OsString};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::{Arc, RwLock};
|
use std::sync::{Arc, RwLock};
|
||||||
|
|
||||||
use gitignore::{self, Gitignore, GitignoreBuilder};
|
use gitignore::{self, Gitignore, GitignoreBuilder};
|
||||||
use pathutil::{is_hidden, strip_prefix};
|
|
||||||
use overrides::{self, Override};
|
use overrides::{self, Override};
|
||||||
|
use pathutil::{is_hidden, strip_prefix};
|
||||||
use types::{self, Types};
|
use types::{self, Types};
|
||||||
use walk::DirEntry;
|
use walk::DirEntry;
|
||||||
use {Error, Match, PartialErrorBuilder};
|
use {Error, Match, PartialErrorBuilder};
|
||||||
@@ -152,10 +152,7 @@ impl Ignore {
|
|||||||
///
|
///
|
||||||
/// Note that this can only be called on an `Ignore` matcher with no
|
/// Note that this can only be called on an `Ignore` matcher with no
|
||||||
/// parents (i.e., `is_root` returns `true`). This will panic otherwise.
|
/// parents (i.e., `is_root` returns `true`). This will panic otherwise.
|
||||||
pub fn add_parents<P: AsRef<Path>>(
|
pub fn add_parents<P: AsRef<Path>>(&self, path: P) -> (Ignore, Option<Error>) {
|
||||||
&self,
|
|
||||||
path: P,
|
|
||||||
) -> (Ignore, Option<Error>) {
|
|
||||||
if !self.0.opts.parents
|
if !self.0.opts.parents
|
||||||
&& !self.0.opts.git_ignore
|
&& !self.0.opts.git_ignore
|
||||||
&& !self.0.opts.git_exclude
|
&& !self.0.opts.git_exclude
|
||||||
@@ -197,7 +194,11 @@ impl Ignore {
|
|||||||
errs.maybe_push(err);
|
errs.maybe_push(err);
|
||||||
igtmp.is_absolute_parent = true;
|
igtmp.is_absolute_parent = true;
|
||||||
igtmp.absolute_base = Some(absolute_base.clone());
|
igtmp.absolute_base = Some(absolute_base.clone());
|
||||||
igtmp.has_git = parent.join(".git").exists();
|
igtmp.has_git = if self.0.opts.git_ignore {
|
||||||
|
parent.join(".git").exists()
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
};
|
||||||
ig = Ignore(Arc::new(igtmp));
|
ig = Ignore(Arc::new(igtmp));
|
||||||
compiled.insert(parent.as_os_str().to_os_string(), ig.clone());
|
compiled.insert(parent.as_os_str().to_os_string(), ig.clone());
|
||||||
}
|
}
|
||||||
@@ -212,10 +213,7 @@ impl Ignore {
|
|||||||
/// returned if it exists.
|
/// returned if it exists.
|
||||||
///
|
///
|
||||||
/// Note that all I/O errors are completely ignored.
|
/// Note that all I/O errors are completely ignored.
|
||||||
pub fn add_child<P: AsRef<Path>>(
|
pub fn add_child<P: AsRef<Path>>(&self, dir: P) -> (Ignore, Option<Error>) {
|
||||||
&self,
|
|
||||||
dir: P,
|
|
||||||
) -> (Ignore, Option<Error>) {
|
|
||||||
let (ig, err) = self.add_child_path(dir.as_ref());
|
let (ig, err) = self.add_child_path(dir.as_ref());
|
||||||
(Ignore(Arc::new(ig)), err)
|
(Ignore(Arc::new(ig)), err)
|
||||||
}
|
}
|
||||||
@@ -223,12 +221,10 @@ impl Ignore {
|
|||||||
/// Like add_child, but takes a full path and returns an IgnoreInner.
|
/// Like add_child, but takes a full path and returns an IgnoreInner.
|
||||||
fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option<Error>) {
|
fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option<Error>) {
|
||||||
let mut errs = PartialErrorBuilder::default();
|
let mut errs = PartialErrorBuilder::default();
|
||||||
let custom_ig_matcher =
|
let custom_ig_matcher = if self.0.custom_ignore_filenames.is_empty() {
|
||||||
if self.0.custom_ignore_filenames.is_empty() {
|
|
||||||
Gitignore::empty()
|
Gitignore::empty()
|
||||||
} else {
|
} else {
|
||||||
let (m, err) =
|
let (m, err) = create_gitignore(
|
||||||
create_gitignore(
|
|
||||||
&dir,
|
&dir,
|
||||||
&self.0.custom_ignore_filenames,
|
&self.0.custom_ignore_filenames,
|
||||||
self.0.opts.ignore_case_insensitive,
|
self.0.opts.ignore_case_insensitive,
|
||||||
@@ -236,38 +232,26 @@ impl Ignore {
|
|||||||
errs.maybe_push(err);
|
errs.maybe_push(err);
|
||||||
m
|
m
|
||||||
};
|
};
|
||||||
let ig_matcher =
|
let ig_matcher = if !self.0.opts.ignore {
|
||||||
if !self.0.opts.ignore {
|
|
||||||
Gitignore::empty()
|
Gitignore::empty()
|
||||||
} else {
|
} else {
|
||||||
let (m, err) =
|
let (m, err) =
|
||||||
create_gitignore(
|
create_gitignore(&dir, &[".ignore"], self.0.opts.ignore_case_insensitive);
|
||||||
&dir,
|
|
||||||
&[".ignore"],
|
|
||||||
self.0.opts.ignore_case_insensitive,
|
|
||||||
);
|
|
||||||
errs.maybe_push(err);
|
errs.maybe_push(err);
|
||||||
m
|
m
|
||||||
};
|
};
|
||||||
let gi_matcher =
|
let gi_matcher = if !self.0.opts.git_ignore {
|
||||||
if !self.0.opts.git_ignore {
|
|
||||||
Gitignore::empty()
|
Gitignore::empty()
|
||||||
} else {
|
} else {
|
||||||
let (m, err) =
|
let (m, err) =
|
||||||
create_gitignore(
|
create_gitignore(&dir, &[".gitignore"], self.0.opts.ignore_case_insensitive);
|
||||||
&dir,
|
|
||||||
&[".gitignore"],
|
|
||||||
self.0.opts.ignore_case_insensitive,
|
|
||||||
);
|
|
||||||
errs.maybe_push(err);
|
errs.maybe_push(err);
|
||||||
m
|
m
|
||||||
};
|
};
|
||||||
let gi_exclude_matcher =
|
let gi_exclude_matcher = if !self.0.opts.git_exclude {
|
||||||
if !self.0.opts.git_exclude {
|
|
||||||
Gitignore::empty()
|
Gitignore::empty()
|
||||||
} else {
|
} else {
|
||||||
let (m, err) =
|
let (m, err) = create_gitignore(
|
||||||
create_gitignore(
|
|
||||||
&dir,
|
&dir,
|
||||||
&[".git/info/exclude"],
|
&[".git/info/exclude"],
|
||||||
self.0.opts.ignore_case_insensitive,
|
self.0.opts.ignore_case_insensitive,
|
||||||
@@ -275,6 +259,11 @@ impl Ignore {
|
|||||||
errs.maybe_push(err);
|
errs.maybe_push(err);
|
||||||
m
|
m
|
||||||
};
|
};
|
||||||
|
let has_git = if self.0.opts.git_ignore {
|
||||||
|
dir.join(".git").exists()
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
};
|
||||||
let ig = IgnoreInner {
|
let ig = IgnoreInner {
|
||||||
compiled: self.0.compiled.clone(),
|
compiled: self.0.compiled.clone(),
|
||||||
dir: dir.to_path_buf(),
|
dir: dir.to_path_buf(),
|
||||||
@@ -290,7 +279,7 @@ impl Ignore {
|
|||||||
git_global_matcher: self.0.git_global_matcher.clone(),
|
git_global_matcher: self.0.git_global_matcher.clone(),
|
||||||
git_ignore_matcher: gi_matcher,
|
git_ignore_matcher: gi_matcher,
|
||||||
git_exclude_matcher: gi_exclude_matcher,
|
git_exclude_matcher: gi_exclude_matcher,
|
||||||
has_git: dir.join(".git").exists(),
|
has_git: has_git,
|
||||||
opts: self.0.opts,
|
opts: self.0.opts,
|
||||||
};
|
};
|
||||||
(ig, errs.into_error_option())
|
(ig, errs.into_error_option())
|
||||||
@@ -302,16 +291,16 @@ impl Ignore {
|
|||||||
let has_custom_ignore_files = !self.0.custom_ignore_filenames.is_empty();
|
let has_custom_ignore_files = !self.0.custom_ignore_filenames.is_empty();
|
||||||
let has_explicit_ignores = !self.0.explicit_ignores.is_empty();
|
let has_explicit_ignores = !self.0.explicit_ignores.is_empty();
|
||||||
|
|
||||||
opts.ignore || opts.git_global || opts.git_ignore
|
opts.ignore
|
||||||
|| opts.git_exclude || has_custom_ignore_files
|
|| opts.git_global
|
||||||
|
|| opts.git_ignore
|
||||||
|
|| opts.git_exclude
|
||||||
|
|| has_custom_ignore_files
|
||||||
|| has_explicit_ignores
|
|| has_explicit_ignores
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Like `matched`, but works with a directory entry instead.
|
/// Like `matched`, but works with a directory entry instead.
|
||||||
pub fn matched_dir_entry<'a>(
|
pub fn matched_dir_entry<'a>(&'a self, dent: &DirEntry) -> Match<IgnoreMatch<'a>> {
|
||||||
&'a self,
|
|
||||||
dent: &DirEntry,
|
|
||||||
) -> Match<IgnoreMatch<'a>> {
|
|
||||||
let m = self.matched(dent.path(), dent.is_dir());
|
let m = self.matched(dent.path(), dent.is_dir());
|
||||||
if m.is_none() && self.0.opts.hidden && is_hidden(dent) {
|
if m.is_none() && self.0.opts.hidden && is_hidden(dent) {
|
||||||
return Match::Ignore(IgnoreMatch::hidden());
|
return Match::Ignore(IgnoreMatch::hidden());
|
||||||
@@ -323,11 +312,7 @@ impl Ignore {
|
|||||||
/// ignored or not.
|
/// ignored or not.
|
||||||
///
|
///
|
||||||
/// The match contains information about its origin.
|
/// The match contains information about its origin.
|
||||||
fn matched<'a, P: AsRef<Path>>(
|
fn matched<'a, P: AsRef<Path>>(&'a self, path: P, is_dir: bool) -> Match<IgnoreMatch<'a>> {
|
||||||
&'a self,
|
|
||||||
path: P,
|
|
||||||
is_dir: bool,
|
|
||||||
) -> Match<IgnoreMatch<'a>> {
|
|
||||||
// We need to be careful with our path. If it has a leading ./, then
|
// We need to be careful with our path. If it has a leading ./, then
|
||||||
// strip it because it causes nothing but trouble.
|
// strip it because it causes nothing but trouble.
|
||||||
let mut path = path.as_ref();
|
let mut path = path.as_ref();
|
||||||
@@ -339,8 +324,10 @@ impl Ignore {
|
|||||||
// return that result immediately. Overrides have the highest
|
// return that result immediately. Overrides have the highest
|
||||||
// precedence.
|
// precedence.
|
||||||
if !self.0.overrides.is_empty() {
|
if !self.0.overrides.is_empty() {
|
||||||
let mat =
|
let mat = self
|
||||||
self.0.overrides.matched(path, is_dir)
|
.0
|
||||||
|
.overrides
|
||||||
|
.matched(path, is_dir)
|
||||||
.map(IgnoreMatch::overrides);
|
.map(IgnoreMatch::overrides);
|
||||||
if !mat.is_none() {
|
if !mat.is_none() {
|
||||||
return mat;
|
return mat;
|
||||||
@@ -356,8 +343,7 @@ impl Ignore {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !self.0.types.is_empty() {
|
if !self.0.types.is_empty() {
|
||||||
let mat =
|
let mat = self.0.types.matched(path, is_dir).map(IgnoreMatch::types);
|
||||||
self.0.types.matched(path, is_dir).map(IgnoreMatch::types);
|
|
||||||
if mat.is_ignore() {
|
if mat.is_ignore() {
|
||||||
return mat;
|
return mat;
|
||||||
} else if mat.is_whitelist() {
|
} else if mat.is_whitelist() {
|
||||||
@@ -369,34 +355,39 @@ impl Ignore {
|
|||||||
|
|
||||||
/// Performs matching only on the ignore files for this directory and
|
/// Performs matching only on the ignore files for this directory and
|
||||||
/// all parent directories.
|
/// all parent directories.
|
||||||
fn matched_ignore<'a>(
|
fn matched_ignore<'a>(&'a self, path: &Path, is_dir: bool) -> Match<IgnoreMatch<'a>> {
|
||||||
&'a self,
|
let (mut m_custom_ignore, mut m_ignore, mut m_gi, mut m_gi_exclude, mut m_explicit) = (
|
||||||
path: &Path,
|
Match::None,
|
||||||
is_dir: bool,
|
Match::None,
|
||||||
) -> Match<IgnoreMatch<'a>> {
|
Match::None,
|
||||||
let (mut m_custom_ignore, mut m_ignore, mut m_gi, mut m_gi_exclude, mut m_explicit) =
|
Match::None,
|
||||||
(Match::None, Match::None, Match::None, Match::None, Match::None);
|
Match::None,
|
||||||
|
);
|
||||||
let any_git = self.parents().any(|ig| ig.0.has_git);
|
let any_git = self.parents().any(|ig| ig.0.has_git);
|
||||||
let mut saw_git = false;
|
let mut saw_git = false;
|
||||||
for ig in self.parents().take_while(|ig| !ig.0.is_absolute_parent) {
|
for ig in self.parents().take_while(|ig| !ig.0.is_absolute_parent) {
|
||||||
if m_custom_ignore.is_none() {
|
if m_custom_ignore.is_none() {
|
||||||
m_custom_ignore =
|
m_custom_ignore =
|
||||||
ig.0.custom_ignore_matcher.matched(path, is_dir)
|
ig.0.custom_ignore_matcher
|
||||||
|
.matched(path, is_dir)
|
||||||
.map(IgnoreMatch::gitignore);
|
.map(IgnoreMatch::gitignore);
|
||||||
}
|
}
|
||||||
if m_ignore.is_none() {
|
if m_ignore.is_none() {
|
||||||
m_ignore =
|
m_ignore =
|
||||||
ig.0.ignore_matcher.matched(path, is_dir)
|
ig.0.ignore_matcher
|
||||||
|
.matched(path, is_dir)
|
||||||
.map(IgnoreMatch::gitignore);
|
.map(IgnoreMatch::gitignore);
|
||||||
}
|
}
|
||||||
if any_git && !saw_git && m_gi.is_none() {
|
if any_git && !saw_git && m_gi.is_none() {
|
||||||
m_gi =
|
m_gi =
|
||||||
ig.0.git_ignore_matcher.matched(path, is_dir)
|
ig.0.git_ignore_matcher
|
||||||
|
.matched(path, is_dir)
|
||||||
.map(IgnoreMatch::gitignore);
|
.map(IgnoreMatch::gitignore);
|
||||||
}
|
}
|
||||||
if any_git && !saw_git && m_gi_exclude.is_none() {
|
if any_git && !saw_git && m_gi_exclude.is_none() {
|
||||||
m_gi_exclude =
|
m_gi_exclude =
|
||||||
ig.0.git_exclude_matcher.matched(path, is_dir)
|
ig.0.git_exclude_matcher
|
||||||
|
.matched(path, is_dir)
|
||||||
.map(IgnoreMatch::gitignore);
|
.map(IgnoreMatch::gitignore);
|
||||||
}
|
}
|
||||||
saw_git = saw_git || ig.0.has_git;
|
saw_git = saw_git || ig.0.has_git;
|
||||||
@@ -407,22 +398,26 @@ impl Ignore {
|
|||||||
for ig in self.parents().skip_while(|ig| !ig.0.is_absolute_parent) {
|
for ig in self.parents().skip_while(|ig| !ig.0.is_absolute_parent) {
|
||||||
if m_custom_ignore.is_none() {
|
if m_custom_ignore.is_none() {
|
||||||
m_custom_ignore =
|
m_custom_ignore =
|
||||||
ig.0.custom_ignore_matcher.matched(&path, is_dir)
|
ig.0.custom_ignore_matcher
|
||||||
|
.matched(&path, is_dir)
|
||||||
.map(IgnoreMatch::gitignore);
|
.map(IgnoreMatch::gitignore);
|
||||||
}
|
}
|
||||||
if m_ignore.is_none() {
|
if m_ignore.is_none() {
|
||||||
m_ignore =
|
m_ignore =
|
||||||
ig.0.ignore_matcher.matched(&path, is_dir)
|
ig.0.ignore_matcher
|
||||||
|
.matched(&path, is_dir)
|
||||||
.map(IgnoreMatch::gitignore);
|
.map(IgnoreMatch::gitignore);
|
||||||
}
|
}
|
||||||
if any_git && !saw_git && m_gi.is_none() {
|
if any_git && !saw_git && m_gi.is_none() {
|
||||||
m_gi =
|
m_gi =
|
||||||
ig.0.git_ignore_matcher.matched(&path, is_dir)
|
ig.0.git_ignore_matcher
|
||||||
|
.matched(&path, is_dir)
|
||||||
.map(IgnoreMatch::gitignore);
|
.map(IgnoreMatch::gitignore);
|
||||||
}
|
}
|
||||||
if any_git && !saw_git && m_gi_exclude.is_none() {
|
if any_git && !saw_git && m_gi_exclude.is_none() {
|
||||||
m_gi_exclude =
|
m_gi_exclude =
|
||||||
ig.0.git_exclude_matcher.matched(&path, is_dir)
|
ig.0.git_exclude_matcher
|
||||||
|
.matched(&path, is_dir)
|
||||||
.map(IgnoreMatch::gitignore);
|
.map(IgnoreMatch::gitignore);
|
||||||
}
|
}
|
||||||
saw_git = saw_git || ig.0.has_git;
|
saw_git = saw_git || ig.0.has_git;
|
||||||
@@ -435,16 +430,21 @@ impl Ignore {
|
|||||||
}
|
}
|
||||||
m_explicit = gi.matched(&path, is_dir).map(IgnoreMatch::gitignore);
|
m_explicit = gi.matched(&path, is_dir).map(IgnoreMatch::gitignore);
|
||||||
}
|
}
|
||||||
let m_global =
|
let m_global = if any_git {
|
||||||
if any_git {
|
self.0
|
||||||
self.0.git_global_matcher
|
.git_global_matcher
|
||||||
.matched(&path, is_dir)
|
.matched(&path, is_dir)
|
||||||
.map(IgnoreMatch::gitignore)
|
.map(IgnoreMatch::gitignore)
|
||||||
} else {
|
} else {
|
||||||
Match::None
|
Match::None
|
||||||
};
|
};
|
||||||
|
|
||||||
m_custom_ignore.or(m_ignore).or(m_gi).or(m_gi_exclude).or(m_global).or(m_explicit)
|
m_custom_ignore
|
||||||
|
.or(m_ignore)
|
||||||
|
.or(m_gi)
|
||||||
|
.or(m_gi_exclude)
|
||||||
|
.or(m_global)
|
||||||
|
.or(m_explicit)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns an iterator over parent ignore matchers, including this one.
|
/// Returns an iterator over parent ignore matchers, including this one.
|
||||||
@@ -524,8 +524,7 @@ impl IgnoreBuilder {
|
|||||||
/// The matcher returned won't match anything until ignore rules from
|
/// The matcher returned won't match anything until ignore rules from
|
||||||
/// directories are added to it.
|
/// directories are added to it.
|
||||||
pub fn build(&self) -> Ignore {
|
pub fn build(&self) -> Ignore {
|
||||||
let git_global_matcher =
|
let git_global_matcher = if !self.opts.git_global {
|
||||||
if !self.opts.git_global {
|
|
||||||
Gitignore::empty()
|
Gitignore::empty()
|
||||||
} else {
|
} else {
|
||||||
let mut builder = GitignoreBuilder::new("");
|
let mut builder = GitignoreBuilder::new("");
|
||||||
@@ -593,9 +592,10 @@ impl IgnoreBuilder {
|
|||||||
/// later names.
|
/// later names.
|
||||||
pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
|
pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
|
||||||
&mut self,
|
&mut self,
|
||||||
file_name: S
|
file_name: S,
|
||||||
) -> &mut IgnoreBuilder {
|
) -> &mut IgnoreBuilder {
|
||||||
self.custom_ignore_filenames.push(file_name.as_ref().to_os_string());
|
self.custom_ignore_filenames
|
||||||
|
.push(file_name.as_ref().to_os_string());
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -667,10 +667,7 @@ impl IgnoreBuilder {
|
|||||||
/// Process ignore files case insensitively
|
/// Process ignore files case insensitively
|
||||||
///
|
///
|
||||||
/// This is disabled by default.
|
/// This is disabled by default.
|
||||||
pub fn ignore_case_insensitive(
|
pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||||
&mut self,
|
|
||||||
yes: bool,
|
|
||||||
) -> &mut IgnoreBuilder {
|
|
||||||
self.opts.ignore_case_insensitive = yes;
|
self.opts.ignore_case_insensitive = yes;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
@@ -710,10 +707,9 @@ mod tests {
|
|||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use tempfile::{self, TempDir};
|
|
||||||
|
|
||||||
use dir::IgnoreBuilder;
|
use dir::IgnoreBuilder;
|
||||||
use gitignore::Gitignore;
|
use gitignore::Gitignore;
|
||||||
|
use tests::TempDir;
|
||||||
use Error;
|
use Error;
|
||||||
|
|
||||||
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
|
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
|
||||||
@@ -732,19 +728,21 @@ mod tests {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tmpdir(prefix: &str) -> TempDir {
|
fn tmpdir() -> TempDir {
|
||||||
tempfile::Builder::new().prefix(prefix).tempdir().unwrap()
|
TempDir::new().unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn explicit_ignore() {
|
fn explicit_ignore() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
wfile(td.path().join("not-an-ignore"), "foo\n!bar");
|
wfile(td.path().join("not-an-ignore"), "foo\n!bar");
|
||||||
|
|
||||||
let (gi, err) = Gitignore::new(td.path().join("not-an-ignore"));
|
let (gi, err) = Gitignore::new(td.path().join("not-an-ignore"));
|
||||||
assert!(err.is_none());
|
assert!(err.is_none());
|
||||||
let (ig, err) = IgnoreBuilder::new()
|
let (ig, err) = IgnoreBuilder::new()
|
||||||
.add_ignore(gi).build().add_child(td.path());
|
.add_ignore(gi)
|
||||||
|
.build()
|
||||||
|
.add_child(td.path());
|
||||||
assert!(err.is_none());
|
assert!(err.is_none());
|
||||||
assert!(ig.matched("foo", false).is_ignore());
|
assert!(ig.matched("foo", false).is_ignore());
|
||||||
assert!(ig.matched("bar", false).is_whitelist());
|
assert!(ig.matched("bar", false).is_whitelist());
|
||||||
@@ -753,7 +751,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn git_exclude() {
|
fn git_exclude() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
mkdirp(td.path().join(".git/info"));
|
mkdirp(td.path().join(".git/info"));
|
||||||
wfile(td.path().join(".git/info/exclude"), "foo\n!bar");
|
wfile(td.path().join(".git/info/exclude"), "foo\n!bar");
|
||||||
|
|
||||||
@@ -766,7 +764,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn gitignore() {
|
fn gitignore() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
mkdirp(td.path().join(".git"));
|
mkdirp(td.path().join(".git"));
|
||||||
wfile(td.path().join(".gitignore"), "foo\n!bar");
|
wfile(td.path().join(".gitignore"), "foo\n!bar");
|
||||||
|
|
||||||
@@ -779,7 +777,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn gitignore_no_git() {
|
fn gitignore_no_git() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
wfile(td.path().join(".gitignore"), "foo\n!bar");
|
wfile(td.path().join(".gitignore"), "foo\n!bar");
|
||||||
|
|
||||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||||
@@ -791,7 +789,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn ignore() {
|
fn ignore() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
wfile(td.path().join(".ignore"), "foo\n!bar");
|
wfile(td.path().join(".ignore"), "foo\n!bar");
|
||||||
|
|
||||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||||
@@ -803,13 +801,14 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn custom_ignore() {
|
fn custom_ignore() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
let custom_ignore = ".customignore";
|
let custom_ignore = ".customignore";
|
||||||
wfile(td.path().join(custom_ignore), "foo\n!bar");
|
wfile(td.path().join(custom_ignore), "foo\n!bar");
|
||||||
|
|
||||||
let (ig, err) = IgnoreBuilder::new()
|
let (ig, err) = IgnoreBuilder::new()
|
||||||
.add_custom_ignore_filename(custom_ignore)
|
.add_custom_ignore_filename(custom_ignore)
|
||||||
.build().add_child(td.path());
|
.build()
|
||||||
|
.add_child(td.path());
|
||||||
assert!(err.is_none());
|
assert!(err.is_none());
|
||||||
assert!(ig.matched("foo", false).is_ignore());
|
assert!(ig.matched("foo", false).is_ignore());
|
||||||
assert!(ig.matched("bar", false).is_whitelist());
|
assert!(ig.matched("bar", false).is_whitelist());
|
||||||
@@ -819,14 +818,15 @@ mod tests {
|
|||||||
// Tests that a custom ignore file will override an .ignore.
|
// Tests that a custom ignore file will override an .ignore.
|
||||||
#[test]
|
#[test]
|
||||||
fn custom_ignore_over_ignore() {
|
fn custom_ignore_over_ignore() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
let custom_ignore = ".customignore";
|
let custom_ignore = ".customignore";
|
||||||
wfile(td.path().join(".ignore"), "foo");
|
wfile(td.path().join(".ignore"), "foo");
|
||||||
wfile(td.path().join(custom_ignore), "!foo");
|
wfile(td.path().join(custom_ignore), "!foo");
|
||||||
|
|
||||||
let (ig, err) = IgnoreBuilder::new()
|
let (ig, err) = IgnoreBuilder::new()
|
||||||
.add_custom_ignore_filename(custom_ignore)
|
.add_custom_ignore_filename(custom_ignore)
|
||||||
.build().add_child(td.path());
|
.build()
|
||||||
|
.add_child(td.path());
|
||||||
assert!(err.is_none());
|
assert!(err.is_none());
|
||||||
assert!(ig.matched("foo", false).is_whitelist());
|
assert!(ig.matched("foo", false).is_whitelist());
|
||||||
}
|
}
|
||||||
@@ -834,7 +834,7 @@ mod tests {
|
|||||||
// Tests that earlier custom ignore files have lower precedence than later.
|
// Tests that earlier custom ignore files have lower precedence than later.
|
||||||
#[test]
|
#[test]
|
||||||
fn custom_ignore_precedence() {
|
fn custom_ignore_precedence() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
let custom_ignore1 = ".customignore1";
|
let custom_ignore1 = ".customignore1";
|
||||||
let custom_ignore2 = ".customignore2";
|
let custom_ignore2 = ".customignore2";
|
||||||
wfile(td.path().join(custom_ignore1), "foo");
|
wfile(td.path().join(custom_ignore1), "foo");
|
||||||
@@ -843,7 +843,8 @@ mod tests {
|
|||||||
let (ig, err) = IgnoreBuilder::new()
|
let (ig, err) = IgnoreBuilder::new()
|
||||||
.add_custom_ignore_filename(custom_ignore1)
|
.add_custom_ignore_filename(custom_ignore1)
|
||||||
.add_custom_ignore_filename(custom_ignore2)
|
.add_custom_ignore_filename(custom_ignore2)
|
||||||
.build().add_child(td.path());
|
.build()
|
||||||
|
.add_child(td.path());
|
||||||
assert!(err.is_none());
|
assert!(err.is_none());
|
||||||
assert!(ig.matched("foo", false).is_whitelist());
|
assert!(ig.matched("foo", false).is_whitelist());
|
||||||
}
|
}
|
||||||
@@ -851,7 +852,7 @@ mod tests {
|
|||||||
// Tests that an .ignore will override a .gitignore.
|
// Tests that an .ignore will override a .gitignore.
|
||||||
#[test]
|
#[test]
|
||||||
fn ignore_over_gitignore() {
|
fn ignore_over_gitignore() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
wfile(td.path().join(".gitignore"), "foo");
|
wfile(td.path().join(".gitignore"), "foo");
|
||||||
wfile(td.path().join(".ignore"), "!foo");
|
wfile(td.path().join(".ignore"), "!foo");
|
||||||
|
|
||||||
@@ -863,7 +864,7 @@ mod tests {
|
|||||||
// Tests that exclude has lower precedent than both .ignore and .gitignore.
|
// Tests that exclude has lower precedent than both .ignore and .gitignore.
|
||||||
#[test]
|
#[test]
|
||||||
fn exclude_lowest() {
|
fn exclude_lowest() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
wfile(td.path().join(".gitignore"), "!foo");
|
wfile(td.path().join(".gitignore"), "!foo");
|
||||||
wfile(td.path().join(".ignore"), "!bar");
|
wfile(td.path().join(".ignore"), "!bar");
|
||||||
mkdirp(td.path().join(".git/info"));
|
mkdirp(td.path().join(".git/info"));
|
||||||
@@ -878,7 +879,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn errored() {
|
fn errored() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
wfile(td.path().join(".gitignore"), "{foo");
|
wfile(td.path().join(".gitignore"), "{foo");
|
||||||
|
|
||||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||||
@@ -887,7 +888,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn errored_both() {
|
fn errored_both() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
wfile(td.path().join(".gitignore"), "{foo");
|
wfile(td.path().join(".gitignore"), "{foo");
|
||||||
wfile(td.path().join(".ignore"), "{bar");
|
wfile(td.path().join(".ignore"), "{bar");
|
||||||
|
|
||||||
@@ -897,7 +898,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn errored_partial() {
|
fn errored_partial() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
mkdirp(td.path().join(".git"));
|
mkdirp(td.path().join(".git"));
|
||||||
wfile(td.path().join(".gitignore"), "{foo\nbar");
|
wfile(td.path().join(".gitignore"), "{foo\nbar");
|
||||||
|
|
||||||
@@ -908,7 +909,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn errored_partial_and_ignore() {
|
fn errored_partial_and_ignore() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
wfile(td.path().join(".gitignore"), "{foo\nbar");
|
wfile(td.path().join(".gitignore"), "{foo\nbar");
|
||||||
wfile(td.path().join(".ignore"), "!bar");
|
wfile(td.path().join(".ignore"), "!bar");
|
||||||
|
|
||||||
@@ -919,7 +920,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn not_present_empty() {
|
fn not_present_empty() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
|
|
||||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||||
assert!(err.is_none());
|
assert!(err.is_none());
|
||||||
@@ -929,7 +930,7 @@ mod tests {
|
|||||||
fn stops_at_git_dir() {
|
fn stops_at_git_dir() {
|
||||||
// This tests that .gitignore files beyond a .git barrier aren't
|
// This tests that .gitignore files beyond a .git barrier aren't
|
||||||
// matched, but .ignore files are.
|
// matched, but .ignore files are.
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
mkdirp(td.path().join(".git"));
|
mkdirp(td.path().join(".git"));
|
||||||
mkdirp(td.path().join("foo/.git"));
|
mkdirp(td.path().join("foo/.git"));
|
||||||
wfile(td.path().join(".gitignore"), "foo");
|
wfile(td.path().join(".gitignore"), "foo");
|
||||||
@@ -950,7 +951,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn absolute_parent() {
|
fn absolute_parent() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
mkdirp(td.path().join(".git"));
|
mkdirp(td.path().join(".git"));
|
||||||
mkdirp(td.path().join("foo"));
|
mkdirp(td.path().join("foo"));
|
||||||
wfile(td.path().join(".gitignore"), "bar");
|
wfile(td.path().join(".gitignore"), "bar");
|
||||||
@@ -973,7 +974,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn absolute_parent_anchored() {
|
fn absolute_parent_anchored() {
|
||||||
let td = tmpdir("ignore-test-");
|
let td = tmpdir();
|
||||||
mkdirp(td.path().join(".git"));
|
mkdirp(td.path().join(".git"));
|
||||||
mkdirp(td.path().join("src/llvm"));
|
mkdirp(td.path().join("src/llvm"));
|
||||||
wfile(td.path().join(".gitignore"), "/llvm/\nfoo");
|
wfile(td.path().join(".gitignore"), "/llvm/\nfoo");
|
||||||
|
@@ -249,7 +249,7 @@ impl Gitignore {
|
|||||||
return Match::None;
|
return Match::None;
|
||||||
}
|
}
|
||||||
let path = path.as_ref();
|
let path = path.as_ref();
|
||||||
let _matches = self.matches.as_ref().unwrap().get_default();
|
let _matches = self.matches.as_ref().unwrap().get_or_default();
|
||||||
let mut matches = _matches.borrow_mut();
|
let mut matches = _matches.borrow_mut();
|
||||||
let candidate = Candidate::new(path);
|
let candidate = Candidate::new(path);
|
||||||
self.set.matches_candidate_into(&candidate, &mut *matches);
|
self.set.matches_candidate_into(&candidate, &mut *matches);
|
||||||
@@ -537,7 +537,7 @@ impl GitignoreBuilder {
|
|||||||
///
|
///
|
||||||
/// Note that the file path returned may not exist.
|
/// Note that the file path returned may not exist.
|
||||||
fn gitconfig_excludes_path() -> Option<PathBuf> {
|
fn gitconfig_excludes_path() -> Option<PathBuf> {
|
||||||
// git supports $HOME/.gitconfig and $XDG_CONFIG_DIR/git/config. Notably,
|
// git supports $HOME/.gitconfig and $XDG_CONFIG_HOME/git/config. Notably,
|
||||||
// both can be active at the same time, where $HOME/.gitconfig takes
|
// both can be active at the same time, where $HOME/.gitconfig takes
|
||||||
// precedent. So if $HOME/.gitconfig defines a `core.excludesFile`, then
|
// precedent. So if $HOME/.gitconfig defines a `core.excludesFile`, then
|
||||||
// we're done.
|
// we're done.
|
||||||
@@ -568,7 +568,7 @@ fn gitconfig_home_contents() -> Option<Vec<u8>> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the file contents of git's global config file, if one exists, in
|
/// Returns the file contents of git's global config file, if one exists, in
|
||||||
/// the user's XDG_CONFIG_DIR directory.
|
/// the user's XDG_CONFIG_HOME directory.
|
||||||
fn gitconfig_xdg_contents() -> Option<Vec<u8>> {
|
fn gitconfig_xdg_contents() -> Option<Vec<u8>> {
|
||||||
let path = env::var_os("XDG_CONFIG_HOME")
|
let path = env::var_os("XDG_CONFIG_HOME")
|
||||||
.and_then(|x| if x.is_empty() { None } else { Some(PathBuf::from(x)) })
|
.and_then(|x| if x.is_empty() { None } else { Some(PathBuf::from(x)) })
|
||||||
|
@@ -55,8 +55,6 @@ extern crate log;
|
|||||||
extern crate memchr;
|
extern crate memchr;
|
||||||
extern crate regex;
|
extern crate regex;
|
||||||
extern crate same_file;
|
extern crate same_file;
|
||||||
#[cfg(test)]
|
|
||||||
extern crate tempfile;
|
|
||||||
extern crate thread_local;
|
extern crate thread_local;
|
||||||
extern crate walkdir;
|
extern crate walkdir;
|
||||||
#[cfg(windows)]
|
#[cfg(windows)]
|
||||||
@@ -442,3 +440,66 @@ impl<T> Match<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use std::env;
|
||||||
|
use std::error;
|
||||||
|
use std::fs;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::result;
|
||||||
|
|
||||||
|
/// A convenient result type alias.
|
||||||
|
pub type Result<T> =
|
||||||
|
result::Result<T, Box<dyn error::Error + Send + Sync>>;
|
||||||
|
|
||||||
|
macro_rules! err {
|
||||||
|
($($tt:tt)*) => {
|
||||||
|
Box::<dyn error::Error + Send + Sync>::from(format!($($tt)*))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A simple wrapper for creating a temporary directory that is
|
||||||
|
/// automatically deleted when it's dropped.
|
||||||
|
///
|
||||||
|
/// We use this in lieu of tempfile because tempfile brings in too many
|
||||||
|
/// dependencies.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct TempDir(PathBuf);
|
||||||
|
|
||||||
|
impl Drop for TempDir {
|
||||||
|
fn drop(&mut self) {
|
||||||
|
fs::remove_dir_all(&self.0).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TempDir {
|
||||||
|
/// Create a new empty temporary directory under the system's configured
|
||||||
|
/// temporary directory.
|
||||||
|
pub fn new() -> Result<TempDir> {
|
||||||
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||||
|
|
||||||
|
static TRIES: usize = 100;
|
||||||
|
static COUNTER: AtomicUsize = AtomicUsize::new(0);
|
||||||
|
|
||||||
|
let tmpdir = env::temp_dir();
|
||||||
|
for _ in 0..TRIES {
|
||||||
|
let count = COUNTER.fetch_add(1, Ordering::SeqCst);
|
||||||
|
let path = tmpdir.join("rust-ignore").join(count.to_string());
|
||||||
|
if path.is_dir() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
fs::create_dir_all(&path).map_err(|e| {
|
||||||
|
err!("failed to create {}: {}", path.display(), e)
|
||||||
|
})?;
|
||||||
|
return Ok(TempDir(path));
|
||||||
|
}
|
||||||
|
Err(err!("failed to create temp dir after {} tries", TRIES))
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return the underlying path to this temporary directory.
|
||||||
|
pub fn path(&self) -> &Path {
|
||||||
|
&self.0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@@ -111,7 +111,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
|||||||
("brotli", &["*.br"]),
|
("brotli", &["*.br"]),
|
||||||
("buildstream", &["*.bst"]),
|
("buildstream", &["*.bst"]),
|
||||||
("bzip2", &["*.bz2", "*.tbz2"]),
|
("bzip2", &["*.bz2", "*.tbz2"]),
|
||||||
("c", &["*.c", "*.h", "*.H", "*.cats"]),
|
("c", &["*.[chH]", "*.[chH].in", "*.cats"]),
|
||||||
("cabal", &["*.cabal"]),
|
("cabal", &["*.cabal"]),
|
||||||
("cbor", &["*.cbor"]),
|
("cbor", &["*.cbor"]),
|
||||||
("ceylon", &["*.ceylon"]),
|
("ceylon", &["*.ceylon"]),
|
||||||
@@ -121,8 +121,8 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
|||||||
("creole", &["*.creole"]),
|
("creole", &["*.creole"]),
|
||||||
("config", &["*.cfg", "*.conf", "*.config", "*.ini"]),
|
("config", &["*.cfg", "*.conf", "*.config", "*.ini"]),
|
||||||
("cpp", &[
|
("cpp", &[
|
||||||
"*.C", "*.cc", "*.cpp", "*.cxx",
|
"*.[ChH]", "*.cc", "*.[ch]pp", "*.[ch]xx", "*.hh", "*.inl",
|
||||||
"*.h", "*.H", "*.hh", "*.hpp", "*.hxx", "*.inl",
|
"*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in",
|
||||||
]),
|
]),
|
||||||
("crystal", &["Projectfile", "*.cr"]),
|
("crystal", &["Projectfile", "*.cr"]),
|
||||||
("cs", &["*.cs"]),
|
("cs", &["*.cs"]),
|
||||||
@@ -135,6 +135,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
|||||||
("d", &["*.d"]),
|
("d", &["*.d"]),
|
||||||
("dhall", &["*.dhall"]),
|
("dhall", &["*.dhall"]),
|
||||||
("docker", &["*Dockerfile*"]),
|
("docker", &["*Dockerfile*"]),
|
||||||
|
("edn", &["*.edn"]),
|
||||||
("elisp", &["*.el"]),
|
("elisp", &["*.el"]),
|
||||||
("elixir", &["*.ex", "*.eex", "*.exs"]),
|
("elixir", &["*.ex", "*.eex", "*.exs"]),
|
||||||
("elm", &["*.elm"]),
|
("elm", &["*.elm"]),
|
||||||
@@ -146,6 +147,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
|||||||
"*.f90", "*.F90", "*.f95", "*.F95",
|
"*.f90", "*.F90", "*.f95", "*.F95",
|
||||||
]),
|
]),
|
||||||
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
|
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
|
||||||
|
("gap", &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]),
|
||||||
("gn", &["*.gn", "*.gni"]),
|
("gn", &["*.gn", "*.gni"]),
|
||||||
("go", &["*.go"]),
|
("go", &["*.go"]),
|
||||||
("gzip", &["*.gz", "*.tgz"]),
|
("gzip", &["*.gz", "*.tgz"]),
|
||||||
@@ -156,7 +158,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
|||||||
("hs", &["*.hs", "*.lhs"]),
|
("hs", &["*.hs", "*.lhs"]),
|
||||||
("html", &["*.htm", "*.html", "*.ejs"]),
|
("html", &["*.htm", "*.html", "*.ejs"]),
|
||||||
("idris", &["*.idr", "*.lidr"]),
|
("idris", &["*.idr", "*.lidr"]),
|
||||||
("java", &["*.java", "*.jsp"]),
|
("java", &["*.java", "*.jsp", "*.jspx", "*.properties"]),
|
||||||
("jinja", &["*.j2", "*.jinja", "*.jinja2"]),
|
("jinja", &["*.j2", "*.jinja", "*.jinja2"]),
|
||||||
("js", &[
|
("js", &[
|
||||||
"*.js", "*.jsx", "*.vue",
|
"*.js", "*.jsx", "*.vue",
|
||||||
@@ -196,14 +198,16 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
|||||||
"OFL-*[0-9]*",
|
"OFL-*[0-9]*",
|
||||||
]),
|
]),
|
||||||
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
||||||
|
("lock", &["*.lock", "package-lock.json"]),
|
||||||
("log", &["*.log"]),
|
("log", &["*.log"]),
|
||||||
("lua", &["*.lua"]),
|
("lua", &["*.lua"]),
|
||||||
("lzma", &["*.lzma"]),
|
("lzma", &["*.lzma"]),
|
||||||
("lz4", &["*.lz4"]),
|
("lz4", &["*.lz4"]),
|
||||||
("m4", &["*.ac", "*.m4"]),
|
("m4", &["*.ac", "*.m4"]),
|
||||||
("make", &[
|
("make", &[
|
||||||
"gnumakefile", "Gnumakefile", "GNUmakefile",
|
"[Gg][Nn][Uu]makefile", "[Mm]akefile",
|
||||||
"makefile", "Makefile",
|
"[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am",
|
||||||
|
"[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in",
|
||||||
"*.mk", "*.mak"
|
"*.mk", "*.mak"
|
||||||
]),
|
]),
|
||||||
("mako", &["*.mako", "*.mao"]),
|
("mako", &["*.mako", "*.mao"]),
|
||||||
@@ -216,7 +220,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
|||||||
("msbuild", &[
|
("msbuild", &[
|
||||||
"*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets"
|
"*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets"
|
||||||
]),
|
]),
|
||||||
("nim", &["*.nim"]),
|
("nim", &["*.nim", "*.nimf", "*.nimble", "*.nims"]),
|
||||||
("nix", &["*.nix"]),
|
("nix", &["*.nix"]),
|
||||||
("objc", &["*.h", "*.m"]),
|
("objc", &["*.h", "*.m"]),
|
||||||
("objcpp", &["*.h", "*.mm"]),
|
("objcpp", &["*.h", "*.mm"]),
|
||||||
@@ -238,6 +242,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
|||||||
("readme", &["README*", "*README"]),
|
("readme", &["README*", "*README"]),
|
||||||
("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
|
("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
|
||||||
("rdoc", &["*.rdoc"]),
|
("rdoc", &["*.rdoc"]),
|
||||||
|
("robot", &["*.robot"]),
|
||||||
("rst", &["*.rst"]),
|
("rst", &["*.rst"]),
|
||||||
("ruby", &["Gemfile", "*.gemspec", ".irbrc", "Rakefile", "*.rb"]),
|
("ruby", &["Gemfile", "*.gemspec", ".irbrc", "Rakefile", "*.rb"]),
|
||||||
("rust", &["*.rs"]),
|
("rust", &["*.rs"]),
|
||||||
@@ -299,7 +304,10 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
|||||||
("vimscript", &["*.vim"]),
|
("vimscript", &["*.vim"]),
|
||||||
("wiki", &["*.mediawiki", "*.wiki"]),
|
("wiki", &["*.mediawiki", "*.wiki"]),
|
||||||
("webidl", &["*.idl", "*.webidl", "*.widl"]),
|
("webidl", &["*.idl", "*.webidl", "*.widl"]),
|
||||||
("xml", &["*.xml", "*.xml.dist"]),
|
("xml", &[
|
||||||
|
"*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb",
|
||||||
|
"*.rng", "*.sch",
|
||||||
|
]),
|
||||||
("xz", &["*.xz", "*.txz"]),
|
("xz", &["*.xz", "*.txz"]),
|
||||||
("yacc", &["*.y"]),
|
("yacc", &["*.y"]),
|
||||||
("yaml", &["*.yaml", "*.yml"]),
|
("yaml", &["*.yaml", "*.yml"]),
|
||||||
@@ -507,7 +515,7 @@ impl Types {
|
|||||||
return Match::None;
|
return Match::None;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let mut matches = self.matches.get_default().borrow_mut();
|
let mut matches = self.matches.get_or_default().borrow_mut();
|
||||||
self.set.matches_into(name, &mut *matches);
|
self.set.matches_into(name, &mut *matches);
|
||||||
// The highest precedent match is the last one.
|
// The highest precedent match is the last one.
|
||||||
if let Some(&i) = matches.last() {
|
if let Some(&i) = matches.last() {
|
||||||
|
@@ -4,8 +4,8 @@ use std::fmt;
|
|||||||
use std::fs::{self, FileType, Metadata};
|
use std::fs::{self, FileType, Metadata};
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::Arc;
|
|
||||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||||
|
use std::sync::Arc;
|
||||||
use std::thread;
|
use std::thread;
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use std::vec;
|
use std::vec;
|
||||||
@@ -182,14 +182,14 @@ impl DirEntryInner {
|
|||||||
match *self {
|
match *self {
|
||||||
Stdin => {
|
Stdin => {
|
||||||
let err = Error::Io(io::Error::new(
|
let err = Error::Io(io::Error::new(
|
||||||
io::ErrorKind::Other, "<stdin> has no metadata"));
|
io::ErrorKind::Other,
|
||||||
|
"<stdin> has no metadata",
|
||||||
|
));
|
||||||
Err(err.with_path("<stdin>"))
|
Err(err.with_path("<stdin>"))
|
||||||
}
|
}
|
||||||
Walkdir(ref x) => {
|
Walkdir(ref x) => x
|
||||||
x.metadata().map_err(|err| {
|
.metadata()
|
||||||
Error::Io(io::Error::from(err)).with_path(x.path())
|
.map_err(|err| Error::Io(io::Error::from(err)).with_path(x.path())),
|
||||||
})
|
|
||||||
}
|
|
||||||
Raw(ref x) => x.metadata(),
|
Raw(ref x) => x.metadata(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -223,8 +223,8 @@ impl DirEntryInner {
|
|||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
fn ino(&self) -> Option<u64> {
|
fn ino(&self) -> Option<u64> {
|
||||||
use walkdir::DirEntryExt;
|
|
||||||
use self::DirEntryInner::*;
|
use self::DirEntryInner::*;
|
||||||
|
use walkdir::DirEntryExt;
|
||||||
match *self {
|
match *self {
|
||||||
Stdin => None,
|
Stdin => None,
|
||||||
Walkdir(ref x) => Some(x.ino()),
|
Walkdir(ref x) => Some(x.ino()),
|
||||||
@@ -297,7 +297,8 @@ impl DirEntryRaw {
|
|||||||
fs::metadata(&self.path)
|
fs::metadata(&self.path)
|
||||||
} else {
|
} else {
|
||||||
Ok(self.metadata.clone())
|
Ok(self.metadata.clone())
|
||||||
}.map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
|
}
|
||||||
|
.map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(not(windows))]
|
#[cfg(not(windows))]
|
||||||
@@ -306,7 +307,8 @@ impl DirEntryRaw {
|
|||||||
fs::metadata(&self.path)
|
fs::metadata(&self.path)
|
||||||
} else {
|
} else {
|
||||||
fs::symlink_metadata(&self.path)
|
fs::symlink_metadata(&self.path)
|
||||||
}.map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
|
}
|
||||||
|
.map_err(|err| Error::Io(io::Error::from(err)).with_path(&self.path))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn file_type(&self) -> FileType {
|
fn file_type(&self) -> FileType {
|
||||||
@@ -314,7 +316,9 @@ impl DirEntryRaw {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn file_name(&self) -> &OsStr {
|
fn file_name(&self) -> &OsStr {
|
||||||
self.path.file_name().unwrap_or_else(|| self.path.as_os_str())
|
self.path
|
||||||
|
.file_name()
|
||||||
|
.unwrap_or_else(|| self.path.as_os_str())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn depth(&self) -> usize {
|
fn depth(&self) -> usize {
|
||||||
@@ -326,10 +330,7 @@ impl DirEntryRaw {
|
|||||||
self.ino
|
self.ino
|
||||||
}
|
}
|
||||||
|
|
||||||
fn from_entry(
|
fn from_entry(depth: usize, ent: &fs::DirEntry) -> Result<DirEntryRaw, Error> {
|
||||||
depth: usize,
|
|
||||||
ent: &fs::DirEntry,
|
|
||||||
) -> Result<DirEntryRaw, Error> {
|
|
||||||
let ty = ent.file_type().map_err(|err| {
|
let ty = ent.file_type().map_err(|err| {
|
||||||
let err = Error::Io(io::Error::from(err)).with_path(ent.path());
|
let err = Error::Io(io::Error::from(err)).with_path(ent.path());
|
||||||
Error::WithDepth {
|
Error::WithDepth {
|
||||||
@@ -379,15 +380,22 @@ impl DirEntryRaw {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(not(unix))]
|
// Placeholder implementation to allow compiling on non-standard platforms (e.g. wasm32).
|
||||||
fn from_path(
|
#[cfg(not(any(windows, unix)))]
|
||||||
|
fn from_entry_os(
|
||||||
depth: usize,
|
depth: usize,
|
||||||
pb: PathBuf,
|
ent: &fs::DirEntry,
|
||||||
link: bool,
|
ty: fs::FileType,
|
||||||
) -> Result<DirEntryRaw, Error> {
|
) -> Result<DirEntryRaw, Error> {
|
||||||
let md = fs::metadata(&pb).map_err(|err| {
|
Err(Error::Io(io::Error::new(
|
||||||
Error::Io(err).with_path(&pb)
|
io::ErrorKind::Other,
|
||||||
})?;
|
"unsupported platform",
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(windows)]
|
||||||
|
fn from_path(depth: usize, pb: PathBuf, link: bool) -> Result<DirEntryRaw, Error> {
|
||||||
|
let md = fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
|
||||||
Ok(DirEntryRaw {
|
Ok(DirEntryRaw {
|
||||||
path: pb,
|
path: pb,
|
||||||
ty: md.file_type(),
|
ty: md.file_type(),
|
||||||
@@ -398,16 +406,10 @@ impl DirEntryRaw {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
fn from_path(
|
fn from_path(depth: usize, pb: PathBuf, link: bool) -> Result<DirEntryRaw, Error> {
|
||||||
depth: usize,
|
|
||||||
pb: PathBuf,
|
|
||||||
link: bool,
|
|
||||||
) -> Result<DirEntryRaw, Error> {
|
|
||||||
use std::os::unix::fs::MetadataExt;
|
use std::os::unix::fs::MetadataExt;
|
||||||
|
|
||||||
let md = fs::metadata(&pb).map_err(|err| {
|
let md = fs::metadata(&pb).map_err(|err| Error::Io(err).with_path(&pb))?;
|
||||||
Error::Io(err).with_path(&pb)
|
|
||||||
})?;
|
|
||||||
Ok(DirEntryRaw {
|
Ok(DirEntryRaw {
|
||||||
path: pb,
|
path: pb,
|
||||||
ty: md.file_type(),
|
ty: md.file_type(),
|
||||||
@@ -416,6 +418,15 @@ impl DirEntryRaw {
|
|||||||
ino: md.ino(),
|
ino: md.ino(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Placeholder implementation to allow compiling on non-standard platforms (e.g. wasm32).
|
||||||
|
#[cfg(not(any(windows, unix)))]
|
||||||
|
fn from_path(depth: usize, pb: PathBuf, link: bool) -> Result<DirEntryRaw, Error> {
|
||||||
|
Err(Error::Io(io::Error::new(
|
||||||
|
io::ErrorKind::Other,
|
||||||
|
"unsupported platform",
|
||||||
|
)))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// WalkBuilder builds a recursive directory iterator.
|
/// WalkBuilder builds a recursive directory iterator.
|
||||||
@@ -481,8 +492,8 @@ pub struct WalkBuilder {
|
|||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
enum Sorter {
|
enum Sorter {
|
||||||
ByName(Arc<Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static>),
|
ByName(Arc<dyn Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static>),
|
||||||
ByPath(Arc<Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static>),
|
ByPath(Arc<dyn Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static>),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Debug for WalkBuilder {
|
impl fmt::Debug for WalkBuilder {
|
||||||
@@ -525,7 +536,10 @@ impl WalkBuilder {
|
|||||||
let follow_links = self.follow_links;
|
let follow_links = self.follow_links;
|
||||||
let max_depth = self.max_depth;
|
let max_depth = self.max_depth;
|
||||||
let sorter = self.sorter.clone();
|
let sorter = self.sorter.clone();
|
||||||
let its = self.paths.iter().map(move |p| {
|
let its = self
|
||||||
|
.paths
|
||||||
|
.iter()
|
||||||
|
.map(move |p| {
|
||||||
if p == Path::new("-") {
|
if p == Path::new("-") {
|
||||||
(p.to_path_buf(), None)
|
(p.to_path_buf(), None)
|
||||||
} else {
|
} else {
|
||||||
@@ -538,20 +552,18 @@ impl WalkBuilder {
|
|||||||
if let Some(ref sorter) = sorter {
|
if let Some(ref sorter) = sorter {
|
||||||
match sorter.clone() {
|
match sorter.clone() {
|
||||||
Sorter::ByName(cmp) => {
|
Sorter::ByName(cmp) => {
|
||||||
wd = wd.sort_by(move |a, b| {
|
wd = wd.sort_by(move |a, b| cmp(a.file_name(), b.file_name()));
|
||||||
cmp(a.file_name(), b.file_name())
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
Sorter::ByPath(cmp) => {
|
Sorter::ByPath(cmp) => {
|
||||||
wd = wd.sort_by(move |a, b| {
|
wd = wd.sort_by(move |a, b| cmp(a.path(), b.path()));
|
||||||
cmp(a.path(), b.path())
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(p.to_path_buf(), Some(WalkEventIter::from(wd)))
|
(p.to_path_buf(), Some(WalkEventIter::from(wd)))
|
||||||
}
|
}
|
||||||
}).collect::<Vec<_>>().into_iter();
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
.into_iter();
|
||||||
let ig_root = self.ig_builder.build();
|
let ig_root = self.ig_builder.build();
|
||||||
Walk {
|
Walk {
|
||||||
its: its,
|
its: its,
|
||||||
@@ -635,8 +647,12 @@ impl WalkBuilder {
|
|||||||
let mut errs = PartialErrorBuilder::default();
|
let mut errs = PartialErrorBuilder::default();
|
||||||
errs.maybe_push(builder.add(path));
|
errs.maybe_push(builder.add(path));
|
||||||
match builder.build() {
|
match builder.build() {
|
||||||
Ok(gi) => { self.ig_builder.add_ignore(gi); }
|
Ok(gi) => {
|
||||||
Err(err) => { errs.push(err); }
|
self.ig_builder.add_ignore(gi);
|
||||||
|
}
|
||||||
|
Err(err) => {
|
||||||
|
errs.push(err);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
errs.into_error_option()
|
errs.into_error_option()
|
||||||
}
|
}
|
||||||
@@ -649,7 +665,7 @@ impl WalkBuilder {
|
|||||||
/// later names.
|
/// later names.
|
||||||
pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
|
pub fn add_custom_ignore_filename<S: AsRef<OsStr>>(
|
||||||
&mut self,
|
&mut self,
|
||||||
file_name: S
|
file_name: S,
|
||||||
) -> &mut WalkBuilder {
|
) -> &mut WalkBuilder {
|
||||||
self.ig_builder.add_custom_ignore_filename(file_name);
|
self.ig_builder.add_custom_ignore_filename(file_name);
|
||||||
self
|
self
|
||||||
@@ -786,11 +802,9 @@ impl WalkBuilder {
|
|||||||
/// by `sort_by_file_name`.
|
/// by `sort_by_file_name`.
|
||||||
///
|
///
|
||||||
/// Note that this is not used in the parallel iterator.
|
/// Note that this is not used in the parallel iterator.
|
||||||
pub fn sort_by_file_path<F>(
|
pub fn sort_by_file_path<F>(&mut self, cmp: F) -> &mut WalkBuilder
|
||||||
&mut self,
|
where
|
||||||
cmp: F,
|
F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static,
|
||||||
) -> &mut WalkBuilder
|
|
||||||
where F: Fn(&Path, &Path) -> cmp::Ordering + Send + Sync + 'static
|
|
||||||
{
|
{
|
||||||
self.sorter = Some(Sorter::ByPath(Arc::new(cmp)));
|
self.sorter = Some(Sorter::ByPath(Arc::new(cmp)));
|
||||||
self
|
self
|
||||||
@@ -808,7 +822,8 @@ impl WalkBuilder {
|
|||||||
///
|
///
|
||||||
/// Note that this is not used in the parallel iterator.
|
/// Note that this is not used in the parallel iterator.
|
||||||
pub fn sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder
|
pub fn sort_by_file_name<F>(&mut self, cmp: F) -> &mut WalkBuilder
|
||||||
where F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static
|
where
|
||||||
|
F: Fn(&OsStr, &OsStr) -> cmp::Ordering + Send + Sync + 'static,
|
||||||
{
|
{
|
||||||
self.sorter = Some(Sorter::ByName(Arc::new(cmp)));
|
self.sorter = Some(Sorter::ByName(Arc::new(cmp)));
|
||||||
self
|
self
|
||||||
@@ -989,7 +1004,11 @@ enum WalkEvent {
|
|||||||
|
|
||||||
impl From<WalkDir> for WalkEventIter {
|
impl From<WalkDir> for WalkEventIter {
|
||||||
fn from(it: WalkDir) -> WalkEventIter {
|
fn from(it: WalkDir) -> WalkEventIter {
|
||||||
WalkEventIter { depth: 0, it: it.into_iter(), next: None }
|
WalkEventIter {
|
||||||
|
depth: 0,
|
||||||
|
it: it.into_iter(),
|
||||||
|
next: None,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1072,10 +1091,10 @@ impl WalkParallel {
|
|||||||
/// Execute the parallel recursive directory iterator. `mkf` is called
|
/// Execute the parallel recursive directory iterator. `mkf` is called
|
||||||
/// for each thread used for iteration. The function produced by `mkf`
|
/// for each thread used for iteration. The function produced by `mkf`
|
||||||
/// is then in turn called for each visited file path.
|
/// is then in turn called for each visited file path.
|
||||||
pub fn run<F>(
|
pub fn run<F>(self, mut mkf: F)
|
||||||
self,
|
where
|
||||||
mut mkf: F,
|
F: FnMut() -> Box<dyn FnMut(Result<DirEntry, Error>) -> WalkState + Send + 'static>,
|
||||||
) where F: FnMut() -> Box<FnMut(Result<DirEntry, Error>) -> WalkState + Send + 'static> {
|
{
|
||||||
let mut f = mkf();
|
let mut f = mkf();
|
||||||
let threads = self.threads();
|
let threads = self.threads();
|
||||||
// TODO: Figure out how to use a bounded channel here. With an
|
// TODO: Figure out how to use a bounded channel here. With an
|
||||||
@@ -1092,12 +1111,10 @@ impl WalkParallel {
|
|||||||
// Note that we only send directories. For files, we send to them the
|
// Note that we only send directories. For files, we send to them the
|
||||||
// callback directly.
|
// callback directly.
|
||||||
for path in self.paths {
|
for path in self.paths {
|
||||||
let (dent, root_device) =
|
let (dent, root_device) = if path == Path::new("-") {
|
||||||
if path == Path::new("-") {
|
|
||||||
(DirEntry::new_stdin(), None)
|
(DirEntry::new_stdin(), None)
|
||||||
} else {
|
} else {
|
||||||
let root_device =
|
let root_device = if !self.same_file_system {
|
||||||
if !self.same_file_system {
|
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
match device_num(&path) {
|
match device_num(&path) {
|
||||||
@@ -1112,9 +1129,7 @@ impl WalkParallel {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
match DirEntryRaw::from_path(0, path, false) {
|
match DirEntryRaw::from_path(0, path, false) {
|
||||||
Ok(dent) => {
|
Ok(dent) => (DirEntry::new_raw(dent, None), root_device),
|
||||||
(DirEntry::new_raw(dent, None), root_device)
|
|
||||||
}
|
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
if f(Err(err)).is_quit() {
|
if f(Err(err)).is_quit() {
|
||||||
return;
|
return;
|
||||||
@@ -1127,7 +1142,8 @@ impl WalkParallel {
|
|||||||
dent: dent,
|
dent: dent,
|
||||||
ignore: self.ig_root.clone(),
|
ignore: self.ig_root.clone(),
|
||||||
root_device: root_device,
|
root_device: root_device,
|
||||||
})).unwrap();
|
}))
|
||||||
|
.unwrap();
|
||||||
any_work = true;
|
any_work = true;
|
||||||
}
|
}
|
||||||
// ... but there's no need to start workers if we don't need them.
|
// ... but there's no need to start workers if we don't need them.
|
||||||
@@ -1253,7 +1269,7 @@ impl Work {
|
|||||||
/// Note that a worker is *both* a producer and a consumer.
|
/// Note that a worker is *both* a producer and a consumer.
|
||||||
struct Worker {
|
struct Worker {
|
||||||
/// The caller's callback.
|
/// The caller's callback.
|
||||||
f: Box<FnMut(Result<DirEntry, Error>) -> WalkState + Send + 'static>,
|
f: Box<dyn FnMut(Result<DirEntry, Error>) -> WalkState + Send + 'static>,
|
||||||
/// The push side of our mpmc queue.
|
/// The push side of our mpmc queue.
|
||||||
tx: channel::Sender<Message>,
|
tx: channel::Sender<Message>,
|
||||||
/// The receive side of our mpmc queue.
|
/// The receive side of our mpmc queue.
|
||||||
@@ -1319,8 +1335,7 @@ impl Worker {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let descend =
|
let descend = if let Some(root_device) = work.root_device {
|
||||||
if let Some(root_device) = work.root_device {
|
|
||||||
match is_same_file_system(root_device, work.dent.path()) {
|
match is_same_file_system(root_device, work.dent.path()) {
|
||||||
Ok(true) => true,
|
Ok(true) => true,
|
||||||
Ok(false) => false,
|
Ok(false) => false,
|
||||||
@@ -1352,12 +1367,7 @@ impl Worker {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
for result in readdir {
|
for result in readdir {
|
||||||
let state = self.run_one(
|
let state = self.run_one(&work.ignore, depth + 1, work.root_device, result);
|
||||||
&work.ignore,
|
|
||||||
depth + 1,
|
|
||||||
work.root_device,
|
|
||||||
result,
|
|
||||||
);
|
|
||||||
if state.is_quit() {
|
if state.is_quit() {
|
||||||
self.quit_now();
|
self.quit_now();
|
||||||
return;
|
return;
|
||||||
@@ -1422,8 +1432,7 @@ impl Worker {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
let should_skip_path = should_skip_entry(ig, &dent);
|
let should_skip_path = should_skip_entry(ig, &dent);
|
||||||
let should_skip_filesize =
|
let should_skip_filesize = if self.max_filesize.is_some() && !dent.is_dir() {
|
||||||
if self.max_filesize.is_some() && !dent.is_dir() {
|
|
||||||
skip_filesize(
|
skip_filesize(
|
||||||
self.max_filesize.unwrap(),
|
self.max_filesize.unwrap(),
|
||||||
dent.path(),
|
dent.path(),
|
||||||
@@ -1434,11 +1443,13 @@ impl Worker {
|
|||||||
};
|
};
|
||||||
|
|
||||||
if !should_skip_path && !should_skip_filesize {
|
if !should_skip_path && !should_skip_filesize {
|
||||||
self.tx.send(Message::Work(Work {
|
self.tx
|
||||||
|
.send(Message::Work(Work {
|
||||||
dent: dent,
|
dent: dent,
|
||||||
ignore: ig.clone(),
|
ignore: ig.clone(),
|
||||||
root_device: root_device,
|
root_device: root_device,
|
||||||
})).unwrap();
|
}))
|
||||||
|
.unwrap();
|
||||||
}
|
}
|
||||||
WalkState::Continue
|
WalkState::Continue
|
||||||
}
|
}
|
||||||
@@ -1568,17 +1579,25 @@ fn check_symlink_loop(
|
|||||||
child_depth: usize,
|
child_depth: usize,
|
||||||
) -> Result<(), Error> {
|
) -> Result<(), Error> {
|
||||||
let hchild = Handle::from_path(child_path).map_err(|err| {
|
let hchild = Handle::from_path(child_path).map_err(|err| {
|
||||||
Error::from(err).with_path(child_path).with_depth(child_depth)
|
Error::from(err)
|
||||||
|
.with_path(child_path)
|
||||||
|
.with_depth(child_depth)
|
||||||
})?;
|
})?;
|
||||||
for ig in ig_parent.parents().take_while(|ig| !ig.is_absolute_parent()) {
|
for ig in ig_parent
|
||||||
|
.parents()
|
||||||
|
.take_while(|ig| !ig.is_absolute_parent())
|
||||||
|
{
|
||||||
let h = Handle::from_path(ig.path()).map_err(|err| {
|
let h = Handle::from_path(ig.path()).map_err(|err| {
|
||||||
Error::from(err).with_path(child_path).with_depth(child_depth)
|
Error::from(err)
|
||||||
|
.with_path(child_path)
|
||||||
|
.with_depth(child_depth)
|
||||||
})?;
|
})?;
|
||||||
if hchild == h {
|
if hchild == h {
|
||||||
return Err(Error::Loop {
|
return Err(Error::Loop {
|
||||||
ancestor: ig.path().to_path_buf(),
|
ancestor: ig.path().to_path_buf(),
|
||||||
child: child_path.to_path_buf(),
|
child: child_path.to_path_buf(),
|
||||||
}.with_depth(child_depth));
|
}
|
||||||
|
.with_depth(child_depth));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@@ -1586,14 +1605,10 @@ fn check_symlink_loop(
|
|||||||
|
|
||||||
// Before calling this function, make sure that you ensure that is really
|
// Before calling this function, make sure that you ensure that is really
|
||||||
// necessary as the arguments imply a file stat.
|
// necessary as the arguments imply a file stat.
|
||||||
fn skip_filesize(
|
fn skip_filesize(max_filesize: u64, path: &Path, ent: &Option<Metadata>) -> bool {
|
||||||
max_filesize: u64,
|
|
||||||
path: &Path,
|
|
||||||
ent: &Option<Metadata>
|
|
||||||
) -> bool {
|
|
||||||
let filesize = match *ent {
|
let filesize = match *ent {
|
||||||
Some(ref md) => Some(md.len()),
|
Some(ref md) => Some(md.len()),
|
||||||
None => None
|
None => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(fs) = filesize {
|
if let Some(fs) = filesize {
|
||||||
@@ -1608,10 +1623,7 @@ fn skip_filesize(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn should_skip_entry(
|
fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool {
|
||||||
ig: &Ignore,
|
|
||||||
dent: &DirEntry,
|
|
||||||
) -> bool {
|
|
||||||
let m = ig.matched_dir_entry(dent);
|
let m = ig.matched_dir_entry(dent);
|
||||||
if m.is_ignore() {
|
if m.is_ignore() {
|
||||||
debug!("ignoring {}: {:?}", dent.path().display(), m);
|
debug!("ignoring {}: {:?}", dent.path().display(), m);
|
||||||
@@ -1673,8 +1685,7 @@ fn path_equals(dent: &DirEntry, handle: &Handle) -> Result<bool, Error> {
|
|||||||
/// Returns true if and only if the given path is on the same device as the
|
/// Returns true if and only if the given path is on the same device as the
|
||||||
/// given root device.
|
/// given root device.
|
||||||
fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> {
|
fn is_same_file_system(root_device: u64, path: &Path) -> Result<bool, Error> {
|
||||||
let dent_device = device_num(path)
|
let dent_device = device_num(path).map_err(|err| Error::Io(err).with_path(path))?;
|
||||||
.map_err(|err| Error::Io(err).with_path(path))?;
|
|
||||||
Ok(root_device == dent_device)
|
Ok(root_device == dent_device)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1687,7 +1698,7 @@ fn device_num<P: AsRef<Path>>(path: P)-> io::Result<u64> {
|
|||||||
|
|
||||||
#[cfg(windows)]
|
#[cfg(windows)]
|
||||||
fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
|
fn device_num<P: AsRef<Path>>(path: P) -> io::Result<u64> {
|
||||||
use winapi_util::{Handle, file};
|
use winapi_util::{file, Handle};
|
||||||
|
|
||||||
let h = Handle::from_path_any(path)?;
|
let h = Handle::from_path_any(path)?;
|
||||||
file::information(h).map(|info| info.volume_serial_number())
|
file::information(h).map(|info| info.volume_serial_number())
|
||||||
@@ -1708,9 +1719,8 @@ mod tests {
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
|
|
||||||
use tempfile::{self, TempDir};
|
|
||||||
|
|
||||||
use super::{DirEntry, WalkBuilder, WalkState};
|
use super::{DirEntry, WalkBuilder, WalkState};
|
||||||
|
use tests::TempDir;
|
||||||
|
|
||||||
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
|
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
|
||||||
let mut file = File::create(path).unwrap();
|
let mut file = File::create(path).unwrap();
|
||||||
@@ -1757,10 +1767,7 @@ mod tests {
|
|||||||
paths
|
paths
|
||||||
}
|
}
|
||||||
|
|
||||||
fn walk_collect_parallel(
|
fn walk_collect_parallel(prefix: &Path, builder: &WalkBuilder) -> Vec<String> {
|
||||||
prefix: &Path,
|
|
||||||
builder: &WalkBuilder,
|
|
||||||
) -> Vec<String> {
|
|
||||||
let mut paths = vec![];
|
let mut paths = vec![];
|
||||||
for dent in walk_collect_entries_parallel(builder) {
|
for dent in walk_collect_entries_parallel(builder) {
|
||||||
let path = dent.path().strip_prefix(prefix).unwrap();
|
let path = dent.path().strip_prefix(prefix).unwrap();
|
||||||
@@ -1795,15 +1802,11 @@ mod tests {
|
|||||||
paths
|
paths
|
||||||
}
|
}
|
||||||
|
|
||||||
fn tmpdir(prefix: &str) -> TempDir {
|
fn tmpdir() -> TempDir {
|
||||||
tempfile::Builder::new().prefix(prefix).tempdir().unwrap()
|
TempDir::new().unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn assert_paths(
|
fn assert_paths(prefix: &Path, builder: &WalkBuilder, expected: &[&str]) {
|
||||||
prefix: &Path,
|
|
||||||
builder: &WalkBuilder,
|
|
||||||
expected: &[&str],
|
|
||||||
) {
|
|
||||||
let got = walk_collect(prefix, builder);
|
let got = walk_collect(prefix, builder);
|
||||||
assert_eq!(got, mkpaths(expected), "single threaded");
|
assert_eq!(got, mkpaths(expected), "single threaded");
|
||||||
let got = walk_collect_parallel(prefix, builder);
|
let got = walk_collect_parallel(prefix, builder);
|
||||||
@@ -1812,20 +1815,22 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn no_ignores() {
|
fn no_ignores() {
|
||||||
let td = tmpdir("walk-test-");
|
let td = tmpdir();
|
||||||
mkdirp(td.path().join("a/b/c"));
|
mkdirp(td.path().join("a/b/c"));
|
||||||
mkdirp(td.path().join("x/y"));
|
mkdirp(td.path().join("x/y"));
|
||||||
wfile(td.path().join("a/b/foo"), "");
|
wfile(td.path().join("a/b/foo"), "");
|
||||||
wfile(td.path().join("x/y/foo"), "");
|
wfile(td.path().join("x/y/foo"), "");
|
||||||
|
|
||||||
assert_paths(td.path(), &WalkBuilder::new(td.path()), &[
|
assert_paths(
|
||||||
"x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c",
|
td.path(),
|
||||||
]);
|
&WalkBuilder::new(td.path()),
|
||||||
|
&["x", "x/y", "x/y/foo", "a", "a/b", "a/b/foo", "a/b/c"],
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn custom_ignore() {
|
fn custom_ignore() {
|
||||||
let td = tmpdir("walk-test-");
|
let td = tmpdir();
|
||||||
let custom_ignore = ".customignore";
|
let custom_ignore = ".customignore";
|
||||||
mkdirp(td.path().join("a"));
|
mkdirp(td.path().join("a"));
|
||||||
wfile(td.path().join(custom_ignore), "foo");
|
wfile(td.path().join(custom_ignore), "foo");
|
||||||
@@ -1841,7 +1846,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn custom_ignore_exclusive_use() {
|
fn custom_ignore_exclusive_use() {
|
||||||
let td = tmpdir("walk-test-");
|
let td = tmpdir();
|
||||||
let custom_ignore = ".customignore";
|
let custom_ignore = ".customignore";
|
||||||
mkdirp(td.path().join("a"));
|
mkdirp(td.path().join("a"));
|
||||||
wfile(td.path().join(custom_ignore), "foo");
|
wfile(td.path().join(custom_ignore), "foo");
|
||||||
@@ -1861,7 +1866,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn gitignore() {
|
fn gitignore() {
|
||||||
let td = tmpdir("walk-test-");
|
let td = tmpdir();
|
||||||
mkdirp(td.path().join(".git"));
|
mkdirp(td.path().join(".git"));
|
||||||
mkdirp(td.path().join("a"));
|
mkdirp(td.path().join("a"));
|
||||||
wfile(td.path().join(".gitignore"), "foo");
|
wfile(td.path().join(".gitignore"), "foo");
|
||||||
@@ -1870,14 +1875,16 @@ mod tests {
|
|||||||
wfile(td.path().join("bar"), "");
|
wfile(td.path().join("bar"), "");
|
||||||
wfile(td.path().join("a/bar"), "");
|
wfile(td.path().join("a/bar"), "");
|
||||||
|
|
||||||
assert_paths(td.path(), &WalkBuilder::new(td.path()), &[
|
assert_paths(
|
||||||
"bar", "a", "a/bar",
|
td.path(),
|
||||||
]);
|
&WalkBuilder::new(td.path()),
|
||||||
|
&["bar", "a", "a/bar"],
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn explicit_ignore() {
|
fn explicit_ignore() {
|
||||||
let td = tmpdir("walk-test-");
|
let td = tmpdir();
|
||||||
let igpath = td.path().join(".not-an-ignore");
|
let igpath = td.path().join(".not-an-ignore");
|
||||||
mkdirp(td.path().join("a"));
|
mkdirp(td.path().join("a"));
|
||||||
wfile(&igpath, "foo");
|
wfile(&igpath, "foo");
|
||||||
@@ -1893,7 +1900,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn explicit_ignore_exclusive_use() {
|
fn explicit_ignore_exclusive_use() {
|
||||||
let td = tmpdir("walk-test-");
|
let td = tmpdir();
|
||||||
let igpath = td.path().join(".not-an-ignore");
|
let igpath = td.path().join(".not-an-ignore");
|
||||||
mkdirp(td.path().join("a"));
|
mkdirp(td.path().join("a"));
|
||||||
wfile(&igpath, "foo");
|
wfile(&igpath, "foo");
|
||||||
@@ -1905,13 +1912,16 @@ mod tests {
|
|||||||
let mut builder = WalkBuilder::new(td.path());
|
let mut builder = WalkBuilder::new(td.path());
|
||||||
builder.standard_filters(false);
|
builder.standard_filters(false);
|
||||||
assert!(builder.add_ignore(&igpath).is_none());
|
assert!(builder.add_ignore(&igpath).is_none());
|
||||||
assert_paths(td.path(), &builder,
|
assert_paths(
|
||||||
&[".not-an-ignore", "bar", "a", "a/bar"]);
|
td.path(),
|
||||||
|
&builder,
|
||||||
|
&[".not-an-ignore", "bar", "a", "a/bar"],
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn gitignore_parent() {
|
fn gitignore_parent() {
|
||||||
let td = tmpdir("walk-test-");
|
let td = tmpdir();
|
||||||
mkdirp(td.path().join(".git"));
|
mkdirp(td.path().join(".git"));
|
||||||
mkdirp(td.path().join("a"));
|
mkdirp(td.path().join("a"));
|
||||||
wfile(td.path().join(".gitignore"), "foo");
|
wfile(td.path().join(".gitignore"), "foo");
|
||||||
@@ -1924,7 +1934,7 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn max_depth() {
|
fn max_depth() {
|
||||||
let td = tmpdir("walk-test-");
|
let td = tmpdir();
|
||||||
mkdirp(td.path().join("a/b/c"));
|
mkdirp(td.path().join("a/b/c"));
|
||||||
wfile(td.path().join("foo"), "");
|
wfile(td.path().join("foo"), "");
|
||||||
wfile(td.path().join("a/foo"), "");
|
wfile(td.path().join("a/foo"), "");
|
||||||
@@ -1932,19 +1942,23 @@ mod tests {
|
|||||||
wfile(td.path().join("a/b/c/foo"), "");
|
wfile(td.path().join("a/b/c/foo"), "");
|
||||||
|
|
||||||
let mut builder = WalkBuilder::new(td.path());
|
let mut builder = WalkBuilder::new(td.path());
|
||||||
assert_paths(td.path(), &builder, &[
|
assert_paths(
|
||||||
"a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo",
|
td.path(),
|
||||||
]);
|
&builder,
|
||||||
|
&["a", "a/b", "a/b/c", "foo", "a/foo", "a/b/foo", "a/b/c/foo"],
|
||||||
|
);
|
||||||
assert_paths(td.path(), builder.max_depth(Some(0)), &[]);
|
assert_paths(td.path(), builder.max_depth(Some(0)), &[]);
|
||||||
assert_paths(td.path(), builder.max_depth(Some(1)), &["a", "foo"]);
|
assert_paths(td.path(), builder.max_depth(Some(1)), &["a", "foo"]);
|
||||||
assert_paths(td.path(), builder.max_depth(Some(2)), &[
|
assert_paths(
|
||||||
"a", "a/b", "foo", "a/foo",
|
td.path(),
|
||||||
]);
|
builder.max_depth(Some(2)),
|
||||||
|
&["a", "a/b", "foo", "a/foo"],
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn max_filesize() {
|
fn max_filesize() {
|
||||||
let td = tmpdir("walk-test-");
|
let td = tmpdir();
|
||||||
mkdirp(td.path().join("a/b"));
|
mkdirp(td.path().join("a/b"));
|
||||||
wfile_size(td.path().join("foo"), 0);
|
wfile_size(td.path().join("foo"), 0);
|
||||||
wfile_size(td.path().join("bar"), 400);
|
wfile_size(td.path().join("bar"), 400);
|
||||||
@@ -1954,41 +1968,49 @@ mod tests {
|
|||||||
wfile_size(td.path().join("a/baz"), 200);
|
wfile_size(td.path().join("a/baz"), 200);
|
||||||
|
|
||||||
let mut builder = WalkBuilder::new(td.path());
|
let mut builder = WalkBuilder::new(td.path());
|
||||||
assert_paths(td.path(), &builder, &[
|
assert_paths(
|
||||||
"a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz",
|
td.path(),
|
||||||
]);
|
&builder,
|
||||||
assert_paths(td.path(), builder.max_filesize(Some(0)), &[
|
&["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
|
||||||
"a", "a/b", "foo"
|
);
|
||||||
]);
|
assert_paths(
|
||||||
assert_paths(td.path(), builder.max_filesize(Some(500)), &[
|
td.path(),
|
||||||
"a", "a/b", "foo", "bar", "a/bar", "a/baz"
|
builder.max_filesize(Some(0)),
|
||||||
]);
|
&["a", "a/b", "foo"],
|
||||||
assert_paths(td.path(), builder.max_filesize(Some(50000)), &[
|
);
|
||||||
"a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz",
|
assert_paths(
|
||||||
]);
|
td.path(),
|
||||||
|
builder.max_filesize(Some(500)),
|
||||||
|
&["a", "a/b", "foo", "bar", "a/bar", "a/baz"],
|
||||||
|
);
|
||||||
|
assert_paths(
|
||||||
|
td.path(),
|
||||||
|
builder.max_filesize(Some(50000)),
|
||||||
|
&["a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz"],
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(unix)] // because symlinks on windows are weird
|
#[cfg(unix)] // because symlinks on windows are weird
|
||||||
#[test]
|
#[test]
|
||||||
fn symlinks() {
|
fn symlinks() {
|
||||||
let td = tmpdir("walk-test-");
|
let td = tmpdir();
|
||||||
mkdirp(td.path().join("a/b"));
|
mkdirp(td.path().join("a/b"));
|
||||||
symlink(td.path().join("a/b"), td.path().join("z"));
|
symlink(td.path().join("a/b"), td.path().join("z"));
|
||||||
wfile(td.path().join("a/b/foo"), "");
|
wfile(td.path().join("a/b/foo"), "");
|
||||||
|
|
||||||
let mut builder = WalkBuilder::new(td.path());
|
let mut builder = WalkBuilder::new(td.path());
|
||||||
assert_paths(td.path(), &builder, &[
|
assert_paths(td.path(), &builder, &["a", "a/b", "a/b/foo", "z"]);
|
||||||
"a", "a/b", "a/b/foo", "z",
|
assert_paths(
|
||||||
]);
|
td.path(),
|
||||||
assert_paths(td.path(), &builder.follow_links(true), &[
|
&builder.follow_links(true),
|
||||||
"a", "a/b", "a/b/foo", "z", "z/foo",
|
&["a", "a/b", "a/b/foo", "z", "z/foo"],
|
||||||
]);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(unix)] // because symlinks on windows are weird
|
#[cfg(unix)] // because symlinks on windows are weird
|
||||||
#[test]
|
#[test]
|
||||||
fn first_path_not_symlink() {
|
fn first_path_not_symlink() {
|
||||||
let td = tmpdir("walk-test-");
|
let td = tmpdir();
|
||||||
mkdirp(td.path().join("foo"));
|
mkdirp(td.path().join("foo"));
|
||||||
|
|
||||||
let dents = WalkBuilder::new(td.path().join("foo"))
|
let dents = WalkBuilder::new(td.path().join("foo"))
|
||||||
@@ -1999,9 +2021,7 @@ mod tests {
|
|||||||
assert_eq!(1, dents.len());
|
assert_eq!(1, dents.len());
|
||||||
assert!(!dents[0].path_is_symlink());
|
assert!(!dents[0].path_is_symlink());
|
||||||
|
|
||||||
let dents = walk_collect_entries_parallel(
|
let dents = walk_collect_entries_parallel(&WalkBuilder::new(td.path().join("foo")));
|
||||||
&WalkBuilder::new(td.path().join("foo")),
|
|
||||||
);
|
|
||||||
assert_eq!(1, dents.len());
|
assert_eq!(1, dents.len());
|
||||||
assert!(!dents[0].path_is_symlink());
|
assert!(!dents[0].path_is_symlink());
|
||||||
}
|
}
|
||||||
@@ -2009,17 +2029,13 @@ mod tests {
|
|||||||
#[cfg(unix)] // because symlinks on windows are weird
|
#[cfg(unix)] // because symlinks on windows are weird
|
||||||
#[test]
|
#[test]
|
||||||
fn symlink_loop() {
|
fn symlink_loop() {
|
||||||
let td = tmpdir("walk-test-");
|
let td = tmpdir();
|
||||||
mkdirp(td.path().join("a/b"));
|
mkdirp(td.path().join("a/b"));
|
||||||
symlink(td.path().join("a"), td.path().join("a/b/c"));
|
symlink(td.path().join("a"), td.path().join("a/b/c"));
|
||||||
|
|
||||||
let mut builder = WalkBuilder::new(td.path());
|
let mut builder = WalkBuilder::new(td.path());
|
||||||
assert_paths(td.path(), &builder, &[
|
assert_paths(td.path(), &builder, &["a", "a/b", "a/b/c"]);
|
||||||
"a", "a/b", "a/b/c",
|
assert_paths(td.path(), &builder.follow_links(true), &["a", "a/b"]);
|
||||||
]);
|
|
||||||
assert_paths(td.path(), &builder.follow_links(true), &[
|
|
||||||
"a", "a/b",
|
|
||||||
]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// It's a little tricky to test the 'same_file_system' option since
|
// It's a little tricky to test the 'same_file_system' option since
|
||||||
@@ -2039,7 +2055,7 @@ mod tests {
|
|||||||
|
|
||||||
// If our test directory actually isn't a different volume from /sys,
|
// If our test directory actually isn't a different volume from /sys,
|
||||||
// then this test is meaningless and we shouldn't run it.
|
// then this test is meaningless and we shouldn't run it.
|
||||||
let td = tmpdir("walk-test-");
|
let td = tmpdir();
|
||||||
if device_num(td.path()).unwrap() == device_num("/sys").unwrap() {
|
if device_num(td.path()).unwrap() == device_num("/sys").unwrap() {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -2053,8 +2069,6 @@ mod tests {
|
|||||||
// completely.
|
// completely.
|
||||||
let mut builder = WalkBuilder::new(td.path());
|
let mut builder = WalkBuilder::new(td.path());
|
||||||
builder.follow_links(true).same_file_system(true);
|
builder.follow_links(true).same_file_system(true);
|
||||||
assert_paths(td.path(), &builder, &[
|
assert_paths(td.path(), &builder, &["same_file", "same_file/alink"]);
|
||||||
"same_file", "same_file/alink",
|
|
||||||
]);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,14 +1,14 @@
|
|||||||
class RipgrepBin < Formula
|
class RipgrepBin < Formula
|
||||||
version '0.10.0'
|
version '11.0.2'
|
||||||
desc "Recursively search directories for a regex pattern."
|
desc "Recursively search directories for a regex pattern."
|
||||||
homepage "https://github.com/BurntSushi/ripgrep"
|
homepage "https://github.com/BurntSushi/ripgrep"
|
||||||
|
|
||||||
if OS.mac?
|
if OS.mac?
|
||||||
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz"
|
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz"
|
||||||
sha256 "32754b4173ac87a7bfffd436d601a49362676eb1841ab33440f2f49c002c8967"
|
sha256 "0ba26423691deedf2649b12b1abe3d2be294ee1cb17c40b68fe85efe194f4f57"
|
||||||
elsif OS.linux?
|
elsif OS.linux?
|
||||||
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-unknown-linux-musl.tar.gz"
|
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-unknown-linux-musl.tar.gz"
|
||||||
sha256 "c76080aa807a339b44139885d77d15ad60ab8cdd2c2fdaf345d0985625bc0f97"
|
sha256 "2e7978e346553fbc45c0940d9fa11e12f9afbae8213b261aad19b698150e169a"
|
||||||
end
|
end
|
||||||
|
|
||||||
conflicts_with "ripgrep"
|
conflicts_with "ripgrep"
|
||||||
|
207
src/app.rs
207
src/app.rs
@@ -27,6 +27,9 @@ configuration file. The file can specify one shell argument per line. Lines
|
|||||||
starting with '#' are ignored. For more details, see the man page or the
|
starting with '#' are ignored. For more details, see the man page or the
|
||||||
README.
|
README.
|
||||||
|
|
||||||
|
Tip: to disable all smart filtering and make ripgrep behave a bit more like
|
||||||
|
classical grep, use 'rg -uuu'.
|
||||||
|
|
||||||
Project home page: https://github.com/BurntSushi/ripgrep
|
Project home page: https://github.com/BurntSushi/ripgrep
|
||||||
|
|
||||||
Use -h for short descriptions and --help for more details.";
|
Use -h for short descriptions and --help for more details.";
|
||||||
@@ -544,7 +547,9 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
|
|||||||
// flags are hidden and merely mentioned in the docs of the corresponding
|
// flags are hidden and merely mentioned in the docs of the corresponding
|
||||||
// "positive" flag.
|
// "positive" flag.
|
||||||
flag_after_context(&mut args);
|
flag_after_context(&mut args);
|
||||||
|
flag_auto_hybrid_regex(&mut args);
|
||||||
flag_before_context(&mut args);
|
flag_before_context(&mut args);
|
||||||
|
flag_binary(&mut args);
|
||||||
flag_block_buffered(&mut args);
|
flag_block_buffered(&mut args);
|
||||||
flag_byte_offset(&mut args);
|
flag_byte_offset(&mut args);
|
||||||
flag_case_sensitive(&mut args);
|
flag_case_sensitive(&mut args);
|
||||||
@@ -566,6 +571,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
|
|||||||
flag_fixed_strings(&mut args);
|
flag_fixed_strings(&mut args);
|
||||||
flag_follow(&mut args);
|
flag_follow(&mut args);
|
||||||
flag_glob(&mut args);
|
flag_glob(&mut args);
|
||||||
|
flag_glob_case_insensitive(&mut args);
|
||||||
flag_heading(&mut args);
|
flag_heading(&mut args);
|
||||||
flag_hidden(&mut args);
|
flag_hidden(&mut args);
|
||||||
flag_iglob(&mut args);
|
flag_iglob(&mut args);
|
||||||
@@ -578,6 +584,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
|
|||||||
flag_line_number(&mut args);
|
flag_line_number(&mut args);
|
||||||
flag_line_regexp(&mut args);
|
flag_line_regexp(&mut args);
|
||||||
flag_max_columns(&mut args);
|
flag_max_columns(&mut args);
|
||||||
|
flag_max_columns_preview(&mut args);
|
||||||
flag_max_count(&mut args);
|
flag_max_count(&mut args);
|
||||||
flag_max_depth(&mut args);
|
flag_max_depth(&mut args);
|
||||||
flag_max_filesize(&mut args);
|
flag_max_filesize(&mut args);
|
||||||
@@ -600,6 +607,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
|
|||||||
flag_path_separator(&mut args);
|
flag_path_separator(&mut args);
|
||||||
flag_passthru(&mut args);
|
flag_passthru(&mut args);
|
||||||
flag_pcre2(&mut args);
|
flag_pcre2(&mut args);
|
||||||
|
flag_pcre2_version(&mut args);
|
||||||
flag_pre(&mut args);
|
flag_pre(&mut args);
|
||||||
flag_pre_glob(&mut args);
|
flag_pre_glob(&mut args);
|
||||||
flag_pretty(&mut args);
|
flag_pretty(&mut args);
|
||||||
@@ -646,7 +654,7 @@ will be provided. Namely, the following is equivalent to the above:
|
|||||||
let arg = RGArg::positional("pattern", "PATTERN")
|
let arg = RGArg::positional("pattern", "PATTERN")
|
||||||
.help(SHORT).long_help(LONG)
|
.help(SHORT).long_help(LONG)
|
||||||
.required_unless(&[
|
.required_unless(&[
|
||||||
"file", "files", "regexp", "type-list",
|
"file", "files", "regexp", "type-list", "pcre2-version",
|
||||||
]);
|
]);
|
||||||
args.push(arg);
|
args.push(arg);
|
||||||
}
|
}
|
||||||
@@ -677,6 +685,50 @@ This overrides the --context flag.
|
|||||||
args.push(arg);
|
args.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn flag_auto_hybrid_regex(args: &mut Vec<RGArg>) {
|
||||||
|
const SHORT: &str = "Dynamically use PCRE2 if necessary.";
|
||||||
|
const LONG: &str = long!("\
|
||||||
|
When this flag is used, ripgrep will dynamically choose between supported regex
|
||||||
|
engines depending on the features used in a pattern. When ripgrep chooses a
|
||||||
|
regex engine, it applies that choice for every regex provided to ripgrep (e.g.,
|
||||||
|
via multiple -e/--regexp or -f/--file flags).
|
||||||
|
|
||||||
|
As an example of how this flag might behave, ripgrep will attempt to use
|
||||||
|
its default finite automata based regex engine whenever the pattern can be
|
||||||
|
successfully compiled with that regex engine. If PCRE2 is enabled and if the
|
||||||
|
pattern given could not be compiled with the default regex engine, then PCRE2
|
||||||
|
will be automatically used for searching. If PCRE2 isn't available, then this
|
||||||
|
flag has no effect because there is only one regex engine to choose from.
|
||||||
|
|
||||||
|
In the future, ripgrep may adjust its heuristics for how it decides which
|
||||||
|
regex engine to use. In general, the heuristics will be limited to a static
|
||||||
|
analysis of the patterns, and not to any specific runtime behavior observed
|
||||||
|
while searching files.
|
||||||
|
|
||||||
|
The primary downside of using this flag is that it may not always be obvious
|
||||||
|
which regex engine ripgrep uses, and thus, the match semantics or performance
|
||||||
|
profile of ripgrep may subtly and unexpectedly change. However, in many cases,
|
||||||
|
all regex engines will agree on what constitutes a match and it can be nice
|
||||||
|
to transparently support more advanced regex features like look-around and
|
||||||
|
backreferences without explicitly needing to enable them.
|
||||||
|
|
||||||
|
This flag can be disabled with --no-auto-hybrid-regex.
|
||||||
|
");
|
||||||
|
let arg = RGArg::switch("auto-hybrid-regex")
|
||||||
|
.help(SHORT).long_help(LONG)
|
||||||
|
.overrides("no-auto-hybrid-regex")
|
||||||
|
.overrides("pcre2")
|
||||||
|
.overrides("no-pcre2");
|
||||||
|
args.push(arg);
|
||||||
|
|
||||||
|
let arg = RGArg::switch("no-auto-hybrid-regex")
|
||||||
|
.hidden()
|
||||||
|
.overrides("auto-hybrid-regex")
|
||||||
|
.overrides("pcre2")
|
||||||
|
.overrides("no-pcre2");
|
||||||
|
args.push(arg);
|
||||||
|
}
|
||||||
|
|
||||||
fn flag_before_context(args: &mut Vec<RGArg>) {
|
fn flag_before_context(args: &mut Vec<RGArg>) {
|
||||||
const SHORT: &str = "Show NUM lines before each match.";
|
const SHORT: &str = "Show NUM lines before each match.";
|
||||||
const LONG: &str = long!("\
|
const LONG: &str = long!("\
|
||||||
@@ -691,6 +743,55 @@ This overrides the --context flag.
|
|||||||
args.push(arg);
|
args.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn flag_binary(args: &mut Vec<RGArg>) {
|
||||||
|
const SHORT: &str = "Search binary files.";
|
||||||
|
const LONG: &str = long!("\
|
||||||
|
Enabling this flag will cause ripgrep to search binary files. By default,
|
||||||
|
ripgrep attempts to automatically skip binary files in order to improve the
|
||||||
|
relevance of results and make the search faster.
|
||||||
|
|
||||||
|
Binary files are heuristically detected based on whether they contain a NUL
|
||||||
|
byte or not. By default (without this flag set), once a NUL byte is seen,
|
||||||
|
ripgrep will stop searching the file. Usually, NUL bytes occur in the beginning
|
||||||
|
of most binary files. If a NUL byte occurs after a match, then ripgrep will
|
||||||
|
still stop searching the rest of the file, but a warning will be printed.
|
||||||
|
|
||||||
|
In contrast, when this flag is provided, ripgrep will continue searching a file
|
||||||
|
even if a NUL byte is found. In particular, if a NUL byte is found then ripgrep
|
||||||
|
will continue searching until either a match is found or the end of the file is
|
||||||
|
reached, whichever comes sooner. If a match is found, then ripgrep will stop
|
||||||
|
and print a warning saying that the search stopped prematurely.
|
||||||
|
|
||||||
|
If you want ripgrep to search a file without any special NUL byte handling at
|
||||||
|
all (and potentially print binary data to stdout), then you should use the
|
||||||
|
'-a/--text' flag.
|
||||||
|
|
||||||
|
The '--binary' flag is a flag for controlling ripgrep's automatic filtering
|
||||||
|
mechanism. As such, it does not need to be used when searching a file
|
||||||
|
explicitly or when searching stdin. That is, it is only applicable when
|
||||||
|
recursively searching a directory.
|
||||||
|
|
||||||
|
Note that when the '-u/--unrestricted' flag is provided for a third time, then
|
||||||
|
this flag is automatically enabled.
|
||||||
|
|
||||||
|
This flag can be disabled with '--no-binary'. It overrides the '-a/--text'
|
||||||
|
flag.
|
||||||
|
");
|
||||||
|
let arg = RGArg::switch("binary")
|
||||||
|
.help(SHORT).long_help(LONG)
|
||||||
|
.overrides("no-binary")
|
||||||
|
.overrides("text")
|
||||||
|
.overrides("no-text");
|
||||||
|
args.push(arg);
|
||||||
|
|
||||||
|
let arg = RGArg::switch("no-binary")
|
||||||
|
.hidden()
|
||||||
|
.overrides("binary")
|
||||||
|
.overrides("text")
|
||||||
|
.overrides("no-text");
|
||||||
|
args.push(arg);
|
||||||
|
}
|
||||||
|
|
||||||
fn flag_block_buffered(args: &mut Vec<RGArg>) {
|
fn flag_block_buffered(args: &mut Vec<RGArg>) {
|
||||||
const SHORT: &str = "Force block buffering.";
|
const SHORT: &str = "Force block buffering.";
|
||||||
const LONG: &str = long!("\
|
const LONG: &str = long!("\
|
||||||
@@ -984,7 +1085,9 @@ Specify the text encoding that ripgrep will use on all files searched. The
|
|||||||
default value is 'auto', which will cause ripgrep to do a best effort automatic
|
default value is 'auto', which will cause ripgrep to do a best effort automatic
|
||||||
detection of encoding on a per-file basis. Automatic detection in this case
|
detection of encoding on a per-file basis. Automatic detection in this case
|
||||||
only applies to files that begin with a UTF-8 or UTF-16 byte-order mark (BOM).
|
only applies to files that begin with a UTF-8 or UTF-16 byte-order mark (BOM).
|
||||||
No other automatic detection is performend.
|
No other automatic detection is performed. One can also specify 'none' which
|
||||||
|
will then completely disable BOM sniffing and always result in searching the
|
||||||
|
raw bytes, including a BOM if it's present, regardless of its encoding.
|
||||||
|
|
||||||
Other supported values can be found in the list of labels here:
|
Other supported values can be found in the list of labels here:
|
||||||
https://encoding.spec.whatwg.org/#concept-encoding-get
|
https://encoding.spec.whatwg.org/#concept-encoding-get
|
||||||
@@ -1116,6 +1219,25 @@ it.
|
|||||||
args.push(arg);
|
args.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn flag_glob_case_insensitive(args: &mut Vec<RGArg>) {
|
||||||
|
const SHORT: &str = "Process all glob patterns case insensitively.";
|
||||||
|
const LONG: &str = long!("\
|
||||||
|
Process glob patterns given with the -g/--glob flag case insensitively. This
|
||||||
|
effectively treats --glob as --iglob.
|
||||||
|
|
||||||
|
This flag can be disabled with the --no-glob-case-insensitive flag.
|
||||||
|
");
|
||||||
|
let arg = RGArg::switch("glob-case-insensitive")
|
||||||
|
.help(SHORT).long_help(LONG)
|
||||||
|
.overrides("no-glob-case-insensitive");
|
||||||
|
args.push(arg);
|
||||||
|
|
||||||
|
let arg = RGArg::switch("no-glob-case-insensitive")
|
||||||
|
.hidden()
|
||||||
|
.overrides("glob-case-insensitive");
|
||||||
|
args.push(arg);
|
||||||
|
}
|
||||||
|
|
||||||
fn flag_heading(args: &mut Vec<RGArg>) {
|
fn flag_heading(args: &mut Vec<RGArg>) {
|
||||||
const SHORT: &str = "Print matches grouped by each file.";
|
const SHORT: &str = "Print matches grouped by each file.";
|
||||||
const LONG: &str = long!("\
|
const LONG: &str = long!("\
|
||||||
@@ -1388,6 +1510,30 @@ When this flag is omitted or is set to 0, then it has no effect.
|
|||||||
args.push(arg);
|
args.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn flag_max_columns_preview(args: &mut Vec<RGArg>) {
|
||||||
|
const SHORT: &str = "Print a preview for lines exceeding the limit.";
|
||||||
|
const LONG: &str = long!("\
|
||||||
|
When the '--max-columns' flag is used, ripgrep will by default completely
|
||||||
|
replace any line that is too long with a message indicating that a matching
|
||||||
|
line was removed. When this flag is combined with '--max-columns', a preview
|
||||||
|
of the line (corresponding to the limit size) is shown instead, where the part
|
||||||
|
of the line exceeding the limit is not shown.
|
||||||
|
|
||||||
|
If the '--max-columns' flag is not set, then this has no effect.
|
||||||
|
|
||||||
|
This flag can be disabled with '--no-max-columns-preview'.
|
||||||
|
");
|
||||||
|
let arg = RGArg::switch("max-columns-preview")
|
||||||
|
.help(SHORT).long_help(LONG)
|
||||||
|
.overrides("no-max-columns-preview");
|
||||||
|
args.push(arg);
|
||||||
|
|
||||||
|
let arg = RGArg::switch("no-max-columns-preview")
|
||||||
|
.hidden()
|
||||||
|
.overrides("max-columns-preview");
|
||||||
|
args.push(arg);
|
||||||
|
}
|
||||||
|
|
||||||
fn flag_max_count(args: &mut Vec<RGArg>) {
|
fn flag_max_count(args: &mut Vec<RGArg>) {
|
||||||
const SHORT: &str = "Limit the number of matches.";
|
const SHORT: &str = "Limit the number of matches.";
|
||||||
const LONG: &str = long!("\
|
const LONG: &str = long!("\
|
||||||
@@ -1849,7 +1995,12 @@ or backreferences.
|
|||||||
|
|
||||||
Note that PCRE2 is an optional ripgrep feature. If PCRE2 wasn't included in
|
Note that PCRE2 is an optional ripgrep feature. If PCRE2 wasn't included in
|
||||||
your build of ripgrep, then using this flag will result in ripgrep printing
|
your build of ripgrep, then using this flag will result in ripgrep printing
|
||||||
an error message and exiting.
|
an error message and exiting. PCRE2 may also have worse user experience in
|
||||||
|
some cases, since it has fewer introspection APIs than ripgrep's default regex
|
||||||
|
engine. For example, if you use a '\n' in a PCRE2 regex without the
|
||||||
|
'-U/--multiline' flag, then ripgrep will silently fail to match anything
|
||||||
|
instead of reporting an error immediately (like it does with the default
|
||||||
|
regex engine).
|
||||||
|
|
||||||
Related flags: --no-pcre2-unicode
|
Related flags: --no-pcre2-unicode
|
||||||
|
|
||||||
@@ -1857,12 +2008,28 @@ This flag can be disabled with --no-pcre2.
|
|||||||
");
|
");
|
||||||
let arg = RGArg::switch("pcre2").short("P")
|
let arg = RGArg::switch("pcre2").short("P")
|
||||||
.help(SHORT).long_help(LONG)
|
.help(SHORT).long_help(LONG)
|
||||||
.overrides("no-pcre2");
|
.overrides("no-pcre2")
|
||||||
|
.overrides("auto-hybrid-regex")
|
||||||
|
.overrides("no-auto-hybrid-regex");
|
||||||
args.push(arg);
|
args.push(arg);
|
||||||
|
|
||||||
let arg = RGArg::switch("no-pcre2")
|
let arg = RGArg::switch("no-pcre2")
|
||||||
.hidden()
|
.hidden()
|
||||||
.overrides("pcre2");
|
.overrides("pcre2")
|
||||||
|
.overrides("auto-hybrid-regex")
|
||||||
|
.overrides("no-auto-hybrid-regex");
|
||||||
|
args.push(arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flag_pcre2_version(args: &mut Vec<RGArg>) {
|
||||||
|
const SHORT: &str = "Print the version of PCRE2 that ripgrep uses.";
|
||||||
|
const LONG: &str = long!("\
|
||||||
|
When this flag is present, ripgrep will print the version of PCRE2 in use,
|
||||||
|
along with other information, and then exit. If PCRE2 is not available, then
|
||||||
|
ripgrep will print an error message and exit with an error code.
|
||||||
|
");
|
||||||
|
let arg = RGArg::switch("pcre2-version")
|
||||||
|
.help(SHORT).long_help(LONG);
|
||||||
args.push(arg);
|
args.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1872,12 +2039,13 @@ fn flag_pre(args: &mut Vec<RGArg>) {
|
|||||||
For each input FILE, search the standard output of COMMAND FILE rather than the
|
For each input FILE, search the standard output of COMMAND FILE rather than the
|
||||||
contents of FILE. This option expects the COMMAND program to either be an
|
contents of FILE. This option expects the COMMAND program to either be an
|
||||||
absolute path or to be available in your PATH. Either an empty string COMMAND
|
absolute path or to be available in your PATH. Either an empty string COMMAND
|
||||||
or the `--no-pre` flag will disable this behavior.
|
or the '--no-pre' flag will disable this behavior.
|
||||||
|
|
||||||
WARNING: When this flag is set, ripgrep will unconditionally spawn a
|
WARNING: When this flag is set, ripgrep will unconditionally spawn a
|
||||||
process for every file that is searched. Therefore, this can incur an
|
process for every file that is searched. Therefore, this can incur an
|
||||||
unnecessarily large performance penalty if you don't otherwise need the
|
unnecessarily large performance penalty if you don't otherwise need the
|
||||||
flexibility offered by this flag.
|
flexibility offered by this flag. One possible mitigation to this is to use
|
||||||
|
the '--pre-glob' flag to limit which files a preprocessor is run with.
|
||||||
|
|
||||||
A preprocessor is not run when ripgrep is searching stdin.
|
A preprocessor is not run when ripgrep is searching stdin.
|
||||||
|
|
||||||
@@ -2028,7 +2196,10 @@ Replace every match with the text given when printing results. Neither this
|
|||||||
flag nor any other ripgrep flag will modify your files.
|
flag nor any other ripgrep flag will modify your files.
|
||||||
|
|
||||||
Capture group indices (e.g., $5) and names (e.g., $foo) are supported in the
|
Capture group indices (e.g., $5) and names (e.g., $foo) are supported in the
|
||||||
replacement string.
|
replacement string. In shells such as Bash and zsh, you should wrap the
|
||||||
|
pattern in single quotes instead of double quotes. Otherwise, capture group
|
||||||
|
indices will be replaced by expanded shell variables which will most likely
|
||||||
|
be empty.
|
||||||
|
|
||||||
Note that the replacement by default replaces each match, and NOT the entire
|
Note that the replacement by default replaces each match, and NOT the entire
|
||||||
line. To replace the entire line, you should match the entire line.
|
line. To replace the entire line, you should match the entire line.
|
||||||
@@ -2206,20 +2377,23 @@ escape codes to be printed that alter the behavior of your terminal.
|
|||||||
When binary file detection is enabled it is imperfect. In general, it uses
|
When binary file detection is enabled it is imperfect. In general, it uses
|
||||||
a simple heuristic. If a NUL byte is seen during search, then the file is
|
a simple heuristic. If a NUL byte is seen during search, then the file is
|
||||||
considered binary and search stops (unless this flag is present).
|
considered binary and search stops (unless this flag is present).
|
||||||
|
Alternatively, if the '--binary' flag is used, then ripgrep will only quit
|
||||||
|
when it sees a NUL byte after it sees a match (or searches the entire file).
|
||||||
|
|
||||||
Note that when the `-u/--unrestricted` flag is provided for a third time, then
|
This flag can be disabled with '--no-text'. It overrides the '--binary' flag.
|
||||||
this flag is automatically enabled.
|
|
||||||
|
|
||||||
This flag can be disabled with --no-text.
|
|
||||||
");
|
");
|
||||||
let arg = RGArg::switch("text").short("a")
|
let arg = RGArg::switch("text").short("a")
|
||||||
.help(SHORT).long_help(LONG)
|
.help(SHORT).long_help(LONG)
|
||||||
.overrides("no-text");
|
.overrides("no-text")
|
||||||
|
.overrides("binary")
|
||||||
|
.overrides("no-binary");
|
||||||
args.push(arg);
|
args.push(arg);
|
||||||
|
|
||||||
let arg = RGArg::switch("no-text")
|
let arg = RGArg::switch("no-text")
|
||||||
.hidden()
|
.hidden()
|
||||||
.overrides("text");
|
.overrides("text")
|
||||||
|
.overrides("binary")
|
||||||
|
.overrides("no-binary");
|
||||||
args.push(arg);
|
args.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2348,8 +2522,7 @@ Reduce the level of \"smart\" searching. A single -u won't respect .gitignore
|
|||||||
(etc.) files. Two -u flags will additionally search hidden files and
|
(etc.) files. Two -u flags will additionally search hidden files and
|
||||||
directories. Three -u flags will additionally search binary files.
|
directories. Three -u flags will additionally search binary files.
|
||||||
|
|
||||||
-uu is roughly equivalent to grep -r and -uuu is roughly equivalent to grep -a
|
'rg -uuu' is roughly equivalent to 'grep -r'.
|
||||||
-r.
|
|
||||||
");
|
");
|
||||||
let arg = RGArg::switch("unrestricted").short("u")
|
let arg = RGArg::switch("unrestricted").short("u")
|
||||||
.help(SHORT).long_help(LONG)
|
.help(SHORT).long_help(LONG)
|
||||||
@@ -2391,7 +2564,7 @@ ripgrep is explicitly instructed to search one file or stdin.
|
|||||||
|
|
||||||
This flag overrides --with-filename.
|
This flag overrides --with-filename.
|
||||||
");
|
");
|
||||||
let arg = RGArg::switch("no-filename")
|
let arg = RGArg::switch("no-filename").short("I")
|
||||||
.help(NO_SHORT).long_help(NO_LONG)
|
.help(NO_SHORT).long_help(NO_LONG)
|
||||||
.overrides("with-filename");
|
.overrides("with-filename");
|
||||||
args.push(arg);
|
args.push(arg);
|
||||||
|
183
src/args.rs
183
src/args.rs
@@ -73,6 +73,8 @@ pub enum Command {
|
|||||||
/// List all file type definitions configured, including the default file
|
/// List all file type definitions configured, including the default file
|
||||||
/// types and any additional file types added to the command line.
|
/// types and any additional file types added to the command line.
|
||||||
Types,
|
Types,
|
||||||
|
/// Print the version of PCRE2 in use.
|
||||||
|
PCRE2Version,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Command {
|
impl Command {
|
||||||
@@ -82,7 +84,11 @@ impl Command {
|
|||||||
|
|
||||||
match *self {
|
match *self {
|
||||||
Search | SearchParallel => true,
|
Search | SearchParallel => true,
|
||||||
SearchNever | Files | FilesParallel | Types => false,
|
| SearchNever
|
||||||
|
| Files
|
||||||
|
| FilesParallel
|
||||||
|
| Types
|
||||||
|
| PCRE2Version => false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -235,7 +241,9 @@ impl Args {
|
|||||||
let threads = self.matches().threads()?;
|
let threads = self.matches().threads()?;
|
||||||
let one_thread = is_one_search || threads == 1;
|
let one_thread = is_one_search || threads == 1;
|
||||||
|
|
||||||
Ok(if self.matches().is_present("type-list") {
|
Ok(if self.matches().is_present("pcre2-version") {
|
||||||
|
Command::PCRE2Version
|
||||||
|
} else if self.matches().is_present("type-list") {
|
||||||
Command::Types
|
Command::Types
|
||||||
} else if self.matches().is_present("files") {
|
} else if self.matches().is_present("files") {
|
||||||
if one_thread {
|
if one_thread {
|
||||||
@@ -286,15 +294,18 @@ impl Args {
|
|||||||
&self,
|
&self,
|
||||||
wtr: W,
|
wtr: W,
|
||||||
) -> Result<SearchWorker<W>> {
|
) -> Result<SearchWorker<W>> {
|
||||||
|
let matches = self.matches();
|
||||||
let matcher = self.matcher().clone();
|
let matcher = self.matcher().clone();
|
||||||
let printer = self.printer(wtr)?;
|
let printer = self.printer(wtr)?;
|
||||||
let searcher = self.matches().searcher(self.paths())?;
|
let searcher = matches.searcher(self.paths())?;
|
||||||
let mut builder = SearchWorkerBuilder::new();
|
let mut builder = SearchWorkerBuilder::new();
|
||||||
builder
|
builder
|
||||||
.json_stats(self.matches().is_present("json"))
|
.json_stats(matches.is_present("json"))
|
||||||
.preprocessor(self.matches().preprocessor())
|
.preprocessor(matches.preprocessor())
|
||||||
.preprocessor_globs(self.matches().preprocessor_globs()?)
|
.preprocessor_globs(matches.preprocessor_globs()?)
|
||||||
.search_zip(self.matches().is_present("search-zip"));
|
.search_zip(matches.is_present("search-zip"))
|
||||||
|
.binary_detection_implicit(matches.binary_detection_implicit())
|
||||||
|
.binary_detection_explicit(matches.binary_detection_explicit());
|
||||||
Ok(builder.build(matcher, searcher, printer))
|
Ok(builder.build(matcher, searcher, printer))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -483,6 +494,37 @@ impl SortByKind {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Encoding mode the searcher will use.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
enum EncodingMode {
|
||||||
|
/// Use an explicit encoding forcefully, but let BOM sniffing override it.
|
||||||
|
Some(Encoding),
|
||||||
|
/// Use only BOM sniffing to auto-detect an encoding.
|
||||||
|
Auto,
|
||||||
|
/// Use no explicit encoding and disable all BOM sniffing. This will
|
||||||
|
/// always result in searching the raw bytes, regardless of their
|
||||||
|
/// true encoding.
|
||||||
|
Disabled,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EncodingMode {
|
||||||
|
/// Checks if an explicit encoding has been set. Returns false for
|
||||||
|
/// automatic BOM sniffing and no sniffing.
|
||||||
|
///
|
||||||
|
/// This is only used to determine whether PCRE2 needs to have its own
|
||||||
|
/// UTF-8 checking enabled. If we have an explicit encoding set, then
|
||||||
|
/// we're always guaranteed to get UTF-8, so we can disable PCRE2's check.
|
||||||
|
/// Otherwise, we have no such guarantee, and must enable PCRE2' UTF-8
|
||||||
|
/// check.
|
||||||
|
#[cfg(feature = "pcre2")]
|
||||||
|
fn has_explicit_encoding(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
EncodingMode::Some(_) => true,
|
||||||
|
_ => false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl ArgMatches {
|
impl ArgMatches {
|
||||||
/// Create an ArgMatches from clap's parse result.
|
/// Create an ArgMatches from clap's parse result.
|
||||||
fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches {
|
fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches {
|
||||||
@@ -557,6 +599,25 @@ impl ArgMatches {
|
|||||||
if self.is_present("pcre2") {
|
if self.is_present("pcre2") {
|
||||||
let matcher = self.matcher_pcre2(patterns)?;
|
let matcher = self.matcher_pcre2(patterns)?;
|
||||||
Ok(PatternMatcher::PCRE2(matcher))
|
Ok(PatternMatcher::PCRE2(matcher))
|
||||||
|
} else if self.is_present("auto-hybrid-regex") {
|
||||||
|
let rust_err = match self.matcher_rust(patterns) {
|
||||||
|
Ok(matcher) => return Ok(PatternMatcher::RustRegex(matcher)),
|
||||||
|
Err(err) => err,
|
||||||
|
};
|
||||||
|
log::debug!(
|
||||||
|
"error building Rust regex in hybrid mode:\n{}", rust_err,
|
||||||
|
);
|
||||||
|
let pcre_err = match self.matcher_pcre2(patterns) {
|
||||||
|
Ok(matcher) => return Ok(PatternMatcher::PCRE2(matcher)),
|
||||||
|
Err(err) => err,
|
||||||
|
};
|
||||||
|
Err(From::from(format!(
|
||||||
|
"regex could not be compiled with either the default regex \
|
||||||
|
engine or with PCRE2.\n\n\
|
||||||
|
default regex engine error:\n{}\n{}\n{}\n\n\
|
||||||
|
PCRE2 regex engine error:\n{}",
|
||||||
|
"~".repeat(79), rust_err, "~".repeat(79), pcre_err,
|
||||||
|
)))
|
||||||
} else {
|
} else {
|
||||||
let matcher = match self.matcher_rust(patterns) {
|
let matcher = match self.matcher_rust(patterns) {
|
||||||
Ok(matcher) => matcher,
|
Ok(matcher) => matcher,
|
||||||
@@ -625,7 +686,13 @@ impl ArgMatches {
|
|||||||
if let Some(limit) = self.dfa_size_limit()? {
|
if let Some(limit) = self.dfa_size_limit()? {
|
||||||
builder.dfa_size_limit(limit);
|
builder.dfa_size_limit(limit);
|
||||||
}
|
}
|
||||||
match builder.build(&patterns.join("|")) {
|
let res =
|
||||||
|
if self.is_present("fixed-strings") {
|
||||||
|
builder.build_literals(patterns)
|
||||||
|
} else {
|
||||||
|
builder.build(&patterns.join("|"))
|
||||||
|
};
|
||||||
|
match res {
|
||||||
Ok(m) => Ok(m),
|
Ok(m) => Ok(m),
|
||||||
Err(err) => Err(From::from(suggest_multiline(err.to_string()))),
|
Err(err) => Err(From::from(suggest_multiline(err.to_string()))),
|
||||||
}
|
}
|
||||||
@@ -645,12 +712,17 @@ impl ArgMatches {
|
|||||||
.word(self.is_present("word-regexp"));
|
.word(self.is_present("word-regexp"));
|
||||||
// For whatever reason, the JIT craps out during regex compilation with
|
// For whatever reason, the JIT craps out during regex compilation with
|
||||||
// a "no more memory" error on 32 bit systems. So don't use it there.
|
// a "no more memory" error on 32 bit systems. So don't use it there.
|
||||||
if !cfg!(target_pointer_width = "32") {
|
if cfg!(target_pointer_width = "64") {
|
||||||
builder.jit_if_available(true);
|
builder
|
||||||
|
.jit_if_available(true)
|
||||||
|
// The PCRE2 docs say that 32KB is the default, and that 1MB
|
||||||
|
// should be big enough for anything. But let's crank it to
|
||||||
|
// 10MB.
|
||||||
|
.max_jit_stack_size(Some(10 * (1<<20)));
|
||||||
}
|
}
|
||||||
if self.pcre2_unicode() {
|
if self.pcre2_unicode() {
|
||||||
builder.utf(true).ucp(true);
|
builder.utf(true).ucp(true);
|
||||||
if self.encoding()?.is_some() {
|
if self.encoding()?.has_explicit_encoding() {
|
||||||
// SAFETY: If an encoding was specified, then we're guaranteed
|
// SAFETY: If an encoding was specified, then we're guaranteed
|
||||||
// to get valid UTF-8, so we can disable PCRE2's UTF checking.
|
// to get valid UTF-8, so we can disable PCRE2's UTF checking.
|
||||||
// (Feeding invalid UTF-8 to PCRE2 is undefined behavior.)
|
// (Feeding invalid UTF-8 to PCRE2 is undefined behavior.)
|
||||||
@@ -706,6 +778,7 @@ impl ArgMatches {
|
|||||||
.per_match(self.is_present("vimgrep"))
|
.per_match(self.is_present("vimgrep"))
|
||||||
.replacement(self.replacement())
|
.replacement(self.replacement())
|
||||||
.max_columns(self.max_columns()?)
|
.max_columns(self.max_columns()?)
|
||||||
|
.max_columns_preview(self.max_columns_preview())
|
||||||
.max_matches(self.max_count()?)
|
.max_matches(self.max_count()?)
|
||||||
.column(self.column())
|
.column(self.column())
|
||||||
.byte_offset(self.is_present("byte-offset"))
|
.byte_offset(self.is_present("byte-offset"))
|
||||||
@@ -765,9 +838,16 @@ impl ArgMatches {
|
|||||||
.before_context(ctx_before)
|
.before_context(ctx_before)
|
||||||
.after_context(ctx_after)
|
.after_context(ctx_after)
|
||||||
.passthru(self.is_present("passthru"))
|
.passthru(self.is_present("passthru"))
|
||||||
.memory_map(self.mmap_choice(paths))
|
.memory_map(self.mmap_choice(paths));
|
||||||
.binary_detection(self.binary_detection())
|
match self.encoding()? {
|
||||||
.encoding(self.encoding()?);
|
EncodingMode::Some(enc) => {
|
||||||
|
builder.encoding(Some(enc));
|
||||||
|
}
|
||||||
|
EncodingMode::Auto => {} // default for the searcher
|
||||||
|
EncodingMode::Disabled => {
|
||||||
|
builder.bom_sniffing(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
Ok(builder.build())
|
Ok(builder.build())
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -817,16 +897,39 @@ impl ArgMatches {
|
|||||||
///
|
///
|
||||||
/// Methods are sorted alphabetically.
|
/// Methods are sorted alphabetically.
|
||||||
impl ArgMatches {
|
impl ArgMatches {
|
||||||
/// Returns the form of binary detection to perform.
|
/// Returns the form of binary detection to perform on files that are
|
||||||
fn binary_detection(&self) -> BinaryDetection {
|
/// implicitly searched via recursive directory traversal.
|
||||||
|
fn binary_detection_implicit(&self) -> BinaryDetection {
|
||||||
|
let none =
|
||||||
|
self.is_present("text")
|
||||||
|
|| self.is_present("null-data");
|
||||||
|
let convert =
|
||||||
|
self.is_present("binary")
|
||||||
|
|| self.unrestricted_count() >= 3;
|
||||||
|
if none {
|
||||||
|
BinaryDetection::none()
|
||||||
|
} else if convert {
|
||||||
|
BinaryDetection::convert(b'\x00')
|
||||||
|
} else {
|
||||||
|
BinaryDetection::quit(b'\x00')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the form of binary detection to perform on files that are
|
||||||
|
/// explicitly searched via the user invoking ripgrep on a particular
|
||||||
|
/// file or files or stdin.
|
||||||
|
///
|
||||||
|
/// In general, this should never be BinaryDetection::quit, since that acts
|
||||||
|
/// as a filter (but quitting immediately once a NUL byte is seen), and we
|
||||||
|
/// should never filter out files that the user wants to explicitly search.
|
||||||
|
fn binary_detection_explicit(&self) -> BinaryDetection {
|
||||||
let none =
|
let none =
|
||||||
self.is_present("text")
|
self.is_present("text")
|
||||||
|| self.unrestricted_count() >= 3
|
|
||||||
|| self.is_present("null-data");
|
|| self.is_present("null-data");
|
||||||
if none {
|
if none {
|
||||||
BinaryDetection::none()
|
BinaryDetection::none()
|
||||||
} else {
|
} else {
|
||||||
BinaryDetection::quit(b'\x00')
|
BinaryDetection::convert(b'\x00')
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -952,24 +1055,30 @@ impl ArgMatches {
|
|||||||
u64_to_usize("dfa-size-limit", r)
|
u64_to_usize("dfa-size-limit", r)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the type of encoding to use.
|
/// Returns the encoding mode to use.
|
||||||
///
|
///
|
||||||
/// This only returns an encoding if one is explicitly specified. When no
|
/// This only returns an encoding if one is explicitly specified. Otherwise
|
||||||
/// encoding is present, the Searcher will still do BOM sniffing for UTF-16
|
/// if set to automatic, the Searcher will do BOM sniffing for UTF-16
|
||||||
/// and transcode seamlessly.
|
/// and transcode seamlessly. If disabled, no BOM sniffing nor transcoding
|
||||||
fn encoding(&self) -> Result<Option<Encoding>> {
|
/// will occur.
|
||||||
|
fn encoding(&self) -> Result<EncodingMode> {
|
||||||
if self.is_present("no-encoding") {
|
if self.is_present("no-encoding") {
|
||||||
return Ok(None);
|
return Ok(EncodingMode::Auto);
|
||||||
}
|
}
|
||||||
|
|
||||||
let label = match self.value_of_lossy("encoding") {
|
let label = match self.value_of_lossy("encoding") {
|
||||||
None if self.pcre2_unicode() => "utf-8".to_string(),
|
None if self.pcre2_unicode() => "utf-8".to_string(),
|
||||||
None => return Ok(None),
|
None => return Ok(EncodingMode::Auto),
|
||||||
Some(label) => label,
|
Some(label) => label,
|
||||||
};
|
};
|
||||||
|
|
||||||
if label == "auto" {
|
if label == "auto" {
|
||||||
return Ok(None);
|
return Ok(EncodingMode::Auto);
|
||||||
|
} else if label == "none" {
|
||||||
|
return Ok(EncodingMode::Disabled);
|
||||||
}
|
}
|
||||||
Ok(Some(Encoding::new(&label)?))
|
|
||||||
|
Ok(EncodingMode::Some(Encoding::new(&label)?))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the file separator to use based on the CLI configuration.
|
/// Return the file separator to use based on the CLI configuration.
|
||||||
@@ -1066,6 +1175,12 @@ impl ArgMatches {
|
|||||||
Ok(self.usize_of_nonzero("max-columns")?.map(|n| n as u64))
|
Ok(self.usize_of_nonzero("max-columns")?.map(|n| n as u64))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if a preview should be shown for lines that
|
||||||
|
/// exceed the maximum column limit.
|
||||||
|
fn max_columns_preview(&self) -> bool {
|
||||||
|
self.is_present("max-columns-preview")
|
||||||
|
}
|
||||||
|
|
||||||
/// The maximum number of matches permitted.
|
/// The maximum number of matches permitted.
|
||||||
fn max_count(&self) -> Result<Option<u64>> {
|
fn max_count(&self) -> Result<Option<u64>> {
|
||||||
Ok(self.usize_of("max-count")?.map(|n| n as u64))
|
Ok(self.usize_of("max-count")?.map(|n| n as u64))
|
||||||
@@ -1153,6 +1268,10 @@ impl ArgMatches {
|
|||||||
/// Builds the set of glob overrides from the command line flags.
|
/// Builds the set of glob overrides from the command line flags.
|
||||||
fn overrides(&self) -> Result<Override> {
|
fn overrides(&self) -> Result<Override> {
|
||||||
let mut builder = OverrideBuilder::new(env::current_dir()?);
|
let mut builder = OverrideBuilder::new(env::current_dir()?);
|
||||||
|
// Make all globs case insensitive with --glob-case-insensitive.
|
||||||
|
if self.is_present("glob-case-insensitive") {
|
||||||
|
builder.case_insensitive(true).unwrap();
|
||||||
|
}
|
||||||
for glob in self.values_of_lossy_vec("glob") {
|
for glob in self.values_of_lossy_vec("glob") {
|
||||||
builder.add(&glob)?;
|
builder.add(&glob)?;
|
||||||
}
|
}
|
||||||
@@ -1195,7 +1314,8 @@ impl ArgMatches {
|
|||||||
!cli::is_readable_stdin()
|
!cli::is_readable_stdin()
|
||||||
|| (self.is_present("file") && file_is_stdin)
|
|| (self.is_present("file") && file_is_stdin)
|
||||||
|| self.is_present("files")
|
|| self.is_present("files")
|
||||||
|| self.is_present("type-list");
|
|| self.is_present("type-list")
|
||||||
|
|| self.is_present("pcre2-version");
|
||||||
if search_cwd {
|
if search_cwd {
|
||||||
Path::new("./").to_path_buf()
|
Path::new("./").to_path_buf()
|
||||||
} else {
|
} else {
|
||||||
@@ -1474,10 +1594,11 @@ impl ArgMatches {
|
|||||||
if self.is_present("no-filename") {
|
if self.is_present("no-filename") {
|
||||||
false
|
false
|
||||||
} else {
|
} else {
|
||||||
|
let path_stdin = Path::new("-");
|
||||||
self.is_present("with-filename")
|
self.is_present("with-filename")
|
||||||
|| self.is_present("vimgrep")
|
|| self.is_present("vimgrep")
|
||||||
|| paths.len() > 1
|
|| paths.len() > 1
|
||||||
|| paths.get(0).map_or(false, |p| p.is_dir())
|
|| paths.get(0).map_or(false, |p| p != path_stdin && p.is_dir())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1654,12 +1775,12 @@ where I: IntoIterator<Item=T>,
|
|||||||
if err.use_stderr() {
|
if err.use_stderr() {
|
||||||
return Err(err.into());
|
return Err(err.into());
|
||||||
}
|
}
|
||||||
// Explicitly ignore any error returned by writeln!. The most likely error
|
// Explicitly ignore any error returned by write!. The most likely error
|
||||||
// at this point is a broken pipe error, in which case, we want to ignore
|
// at this point is a broken pipe error, in which case, we want to ignore
|
||||||
// it and exit quietly.
|
// it and exit quietly.
|
||||||
//
|
//
|
||||||
// (This is the point of this helper function. clap's functionality for
|
// (This is the point of this helper function. clap's functionality for
|
||||||
// doing this will panic on a broken pipe error.)
|
// doing this will panic on a broken pipe error.)
|
||||||
let _ = writeln!(io::stdout(), "{}", err);
|
let _ = write!(io::stdout(), "{}", err);
|
||||||
process::exit(0);
|
process::exit(0);
|
||||||
}
|
}
|
||||||
|
@@ -9,7 +9,7 @@ use std::io;
|
|||||||
use std::ffi::OsString;
|
use std::ffi::OsString;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
use bstr::io::BufReadExt;
|
use bstr::{io::BufReadExt, ByteSlice};
|
||||||
use log;
|
use log;
|
||||||
|
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
@@ -55,7 +55,7 @@ pub fn args() -> Vec<OsString> {
|
|||||||
/// for each line in addition to successfully parsed arguments.
|
/// for each line in addition to successfully parsed arguments.
|
||||||
fn parse<P: AsRef<Path>>(
|
fn parse<P: AsRef<Path>>(
|
||||||
path: P,
|
path: P,
|
||||||
) -> Result<(Vec<OsString>, Vec<Box<Error>>)> {
|
) -> Result<(Vec<OsString>, Vec<Box<dyn Error>>)> {
|
||||||
let path = path.as_ref();
|
let path = path.as_ref();
|
||||||
match File::open(&path) {
|
match File::open(&path) {
|
||||||
Ok(file) => parse_reader(file),
|
Ok(file) => parse_reader(file),
|
||||||
@@ -76,7 +76,7 @@ fn parse<P: AsRef<Path>>(
|
|||||||
/// in addition to successfully parsed arguments.
|
/// in addition to successfully parsed arguments.
|
||||||
fn parse_reader<R: io::Read>(
|
fn parse_reader<R: io::Read>(
|
||||||
rdr: R,
|
rdr: R,
|
||||||
) -> Result<(Vec<OsString>, Vec<Box<Error>>)> {
|
) -> Result<(Vec<OsString>, Vec<Box<dyn Error>>)> {
|
||||||
let bufrdr = io::BufReader::new(rdr);
|
let bufrdr = io::BufReader::new(rdr);
|
||||||
let (mut args, mut errs) = (vec![], vec![]);
|
let (mut args, mut errs) = (vec![], vec![]);
|
||||||
let mut line_number = 0;
|
let mut line_number = 0;
|
||||||
|
54
src/main.rs
54
src/main.rs
@@ -1,3 +1,4 @@
|
|||||||
|
use std::error;
|
||||||
use std::io::{self, Write};
|
use std::io::{self, Write};
|
||||||
use std::process;
|
use std::process;
|
||||||
use std::sync::{Arc, Mutex};
|
use std::sync::{Arc, Mutex};
|
||||||
@@ -19,7 +20,30 @@ mod path_printer;
|
|||||||
mod search;
|
mod search;
|
||||||
mod subject;
|
mod subject;
|
||||||
|
|
||||||
type Result<T> = ::std::result::Result<T, Box<::std::error::Error>>;
|
// Since Rust no longer uses jemalloc by default, ripgrep will, by default,
|
||||||
|
// use the system allocator. On Linux, this would normally be glibc's
|
||||||
|
// allocator, which is pretty good. In particular, ripgrep does not have a
|
||||||
|
// particularly allocation heavy workload, so there really isn't much
|
||||||
|
// difference (for ripgrep's purposes) between glibc's allocator and jemalloc.
|
||||||
|
//
|
||||||
|
// However, when ripgrep is built with musl, this means ripgrep will use musl's
|
||||||
|
// allocator, which appears to be substantially worse. (musl's goal is not to
|
||||||
|
// have the fastest version of everything. Its goal is to be small and amenable
|
||||||
|
// to static compilation.) Even though ripgrep isn't particularly allocation
|
||||||
|
// heavy, musl's allocator appears to slow down ripgrep quite a bit. Therefore,
|
||||||
|
// when building with musl, we use jemalloc.
|
||||||
|
//
|
||||||
|
// We don't unconditionally use jemalloc because it can be nice to use the
|
||||||
|
// system's default allocator by default. Moreover, jemalloc seems to increase
|
||||||
|
// compilation times by a bit.
|
||||||
|
//
|
||||||
|
// Moreover, we only do this on 64-bit systems since jemalloc doesn't support
|
||||||
|
// i686.
|
||||||
|
#[cfg(all(target_env = "musl", target_pointer_width = "64"))]
|
||||||
|
#[global_allocator]
|
||||||
|
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||||
|
|
||||||
|
type Result<T> = ::std::result::Result<T, Box<dyn error::Error>>;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
if let Err(err) = Args::parse().and_then(try_main) {
|
if let Err(err) = Args::parse().and_then(try_main) {
|
||||||
@@ -39,6 +63,7 @@ fn try_main(args: Args) -> Result<()> {
|
|||||||
Files => files(&args),
|
Files => files(&args),
|
||||||
FilesParallel => files_parallel(&args),
|
FilesParallel => files_parallel(&args),
|
||||||
Types => types(&args),
|
Types => types(&args),
|
||||||
|
PCRE2Version => pcre2_version(&args),
|
||||||
}?;
|
}?;
|
||||||
if matched && (args.quiet() || !messages::errored()) {
|
if matched && (args.quiet() || !messages::errored()) {
|
||||||
process::exit(0)
|
process::exit(0)
|
||||||
@@ -275,3 +300,30 @@ fn types(args: &Args) -> Result<bool> {
|
|||||||
}
|
}
|
||||||
Ok(count > 0)
|
Ok(count > 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The top-level entry point for --pcre2-version.
|
||||||
|
fn pcre2_version(args: &Args) -> Result<bool> {
|
||||||
|
#[cfg(feature = "pcre2")]
|
||||||
|
fn imp(args: &Args) -> Result<bool> {
|
||||||
|
use grep::pcre2;
|
||||||
|
|
||||||
|
let mut stdout = args.stdout();
|
||||||
|
|
||||||
|
let (major, minor) = pcre2::version();
|
||||||
|
writeln!(stdout, "PCRE2 {}.{} is available", major, minor)?;
|
||||||
|
|
||||||
|
if cfg!(target_pointer_width = "64") && pcre2::is_jit_available() {
|
||||||
|
writeln!(stdout, "JIT is available")?;
|
||||||
|
}
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(feature = "pcre2"))]
|
||||||
|
fn imp(args: &Args) -> Result<bool> {
|
||||||
|
let mut stdout = args.stdout();
|
||||||
|
writeln!(stdout, "PCRE2 is not available in this build of ripgrep.")?;
|
||||||
|
Ok(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
imp(args)
|
||||||
|
}
|
||||||
|
@@ -10,7 +10,7 @@ use grep::matcher::Matcher;
|
|||||||
use grep::pcre2::{RegexMatcher as PCRE2RegexMatcher};
|
use grep::pcre2::{RegexMatcher as PCRE2RegexMatcher};
|
||||||
use grep::printer::{JSON, Standard, Summary, Stats};
|
use grep::printer::{JSON, Standard, Summary, Stats};
|
||||||
use grep::regex::{RegexMatcher as RustRegexMatcher};
|
use grep::regex::{RegexMatcher as RustRegexMatcher};
|
||||||
use grep::searcher::Searcher;
|
use grep::searcher::{BinaryDetection, Searcher};
|
||||||
use ignore::overrides::Override;
|
use ignore::overrides::Override;
|
||||||
use serde_json as json;
|
use serde_json as json;
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
@@ -27,6 +27,8 @@ struct Config {
|
|||||||
preprocessor: Option<PathBuf>,
|
preprocessor: Option<PathBuf>,
|
||||||
preprocessor_globs: Override,
|
preprocessor_globs: Override,
|
||||||
search_zip: bool,
|
search_zip: bool,
|
||||||
|
binary_implicit: BinaryDetection,
|
||||||
|
binary_explicit: BinaryDetection,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Config {
|
impl Default for Config {
|
||||||
@@ -36,6 +38,8 @@ impl Default for Config {
|
|||||||
preprocessor: None,
|
preprocessor: None,
|
||||||
preprocessor_globs: Override::empty(),
|
preprocessor_globs: Override::empty(),
|
||||||
search_zip: false,
|
search_zip: false,
|
||||||
|
binary_implicit: BinaryDetection::none(),
|
||||||
|
binary_explicit: BinaryDetection::none(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -134,6 +138,37 @@ impl SearchWorkerBuilder {
|
|||||||
self.config.search_zip = yes;
|
self.config.search_zip = yes;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the binary detection that should be used when searching files
|
||||||
|
/// found via a recursive directory search.
|
||||||
|
///
|
||||||
|
/// Generally, this binary detection may be `BinaryDetection::quit` if
|
||||||
|
/// we want to skip binary files completely.
|
||||||
|
///
|
||||||
|
/// By default, no binary detection is performed.
|
||||||
|
pub fn binary_detection_implicit(
|
||||||
|
&mut self,
|
||||||
|
detection: BinaryDetection,
|
||||||
|
) -> &mut SearchWorkerBuilder {
|
||||||
|
self.config.binary_implicit = detection;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the binary detection that should be used when searching files
|
||||||
|
/// explicitly supplied by an end user.
|
||||||
|
///
|
||||||
|
/// Generally, this binary detection should NOT be `BinaryDetection::quit`,
|
||||||
|
/// since we never want to automatically filter files supplied by the end
|
||||||
|
/// user.
|
||||||
|
///
|
||||||
|
/// By default, no binary detection is performed.
|
||||||
|
pub fn binary_detection_explicit(
|
||||||
|
&mut self,
|
||||||
|
detection: BinaryDetection,
|
||||||
|
) -> &mut SearchWorkerBuilder {
|
||||||
|
self.config.binary_explicit = detection;
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The result of executing a search.
|
/// The result of executing a search.
|
||||||
@@ -280,7 +315,24 @@ pub struct SearchWorker<W> {
|
|||||||
impl<W: WriteColor> SearchWorker<W> {
|
impl<W: WriteColor> SearchWorker<W> {
|
||||||
/// Execute a search over the given subject.
|
/// Execute a search over the given subject.
|
||||||
pub fn search(&mut self, subject: &Subject) -> io::Result<SearchResult> {
|
pub fn search(&mut self, subject: &Subject) -> io::Result<SearchResult> {
|
||||||
self.search_impl(subject)
|
let bin =
|
||||||
|
if subject.is_explicit() {
|
||||||
|
self.config.binary_explicit.clone()
|
||||||
|
} else {
|
||||||
|
self.config.binary_implicit.clone()
|
||||||
|
};
|
||||||
|
self.searcher.set_binary_detection(bin);
|
||||||
|
|
||||||
|
let path = subject.path();
|
||||||
|
if subject.is_stdin() {
|
||||||
|
self.search_reader(path, io::stdin().lock())
|
||||||
|
} else if self.should_preprocess(path) {
|
||||||
|
self.search_preprocessor(path)
|
||||||
|
} else if self.should_decompress(path) {
|
||||||
|
self.search_decompress(path)
|
||||||
|
} else {
|
||||||
|
self.search_path(path)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return a mutable reference to the underlying printer.
|
/// Return a mutable reference to the underlying printer.
|
||||||
@@ -306,22 +358,6 @@ impl<W: WriteColor> SearchWorker<W> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Search the given subject using the appropriate strategy.
|
|
||||||
fn search_impl(&mut self, subject: &Subject) -> io::Result<SearchResult> {
|
|
||||||
let path = subject.path();
|
|
||||||
if subject.is_stdin() {
|
|
||||||
let stdin = io::stdin();
|
|
||||||
// A `return` here appeases the borrow checker. NLL will fix this.
|
|
||||||
return self.search_reader(path, stdin.lock());
|
|
||||||
} else if self.should_preprocess(path) {
|
|
||||||
self.search_preprocessor(path)
|
|
||||||
} else if self.should_decompress(path) {
|
|
||||||
self.search_decompress(path)
|
|
||||||
} else {
|
|
||||||
self.search_path(path)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true if and only if the given file path should be
|
/// Returns true if and only if the given file path should be
|
||||||
/// decompressed before searching.
|
/// decompressed before searching.
|
||||||
fn should_decompress(&self, path: &Path) -> bool {
|
fn should_decompress(&self, path: &Path) -> bool {
|
||||||
@@ -349,11 +385,23 @@ impl<W: WriteColor> SearchWorker<W> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
path: &Path,
|
path: &Path,
|
||||||
) -> io::Result<SearchResult> {
|
) -> io::Result<SearchResult> {
|
||||||
let bin = self.config.preprocessor.clone().unwrap();
|
let bin = self.config.preprocessor.as_ref().unwrap();
|
||||||
let mut cmd = Command::new(&bin);
|
let mut cmd = Command::new(bin);
|
||||||
cmd.arg(path).stdin(Stdio::from(File::open(path)?));
|
cmd.arg(path).stdin(Stdio::from(File::open(path)?));
|
||||||
|
|
||||||
let rdr = self.command_builder.build(&mut cmd)?;
|
let rdr = self
|
||||||
|
.command_builder
|
||||||
|
.build(&mut cmd)
|
||||||
|
.map_err(|err| {
|
||||||
|
io::Error::new(
|
||||||
|
io::ErrorKind::Other,
|
||||||
|
format!(
|
||||||
|
"preprocessor command could not start: '{:?}': {}",
|
||||||
|
cmd,
|
||||||
|
err,
|
||||||
|
),
|
||||||
|
)
|
||||||
|
})?;
|
||||||
self.search_reader(path, rdr).map_err(|err| {
|
self.search_reader(path, rdr).map_err(|err| {
|
||||||
io::Error::new(
|
io::Error::new(
|
||||||
io::ErrorKind::Other,
|
io::ErrorKind::Other,
|
||||||
|
@@ -59,17 +59,12 @@ impl SubjectBuilder {
|
|||||||
if let Some(ignore_err) = subj.dent.error() {
|
if let Some(ignore_err) = subj.dent.error() {
|
||||||
ignore_message!("{}", ignore_err);
|
ignore_message!("{}", ignore_err);
|
||||||
}
|
}
|
||||||
// If this entry represents stdin, then we always search it.
|
// If this entry was explicitly provided by an end user, then we always
|
||||||
if subj.dent.is_stdin() {
|
// want to search it.
|
||||||
|
if subj.is_explicit() {
|
||||||
return Some(subj);
|
return Some(subj);
|
||||||
}
|
}
|
||||||
// If this subject has a depth of 0, then it was provided explicitly
|
// At this point, we only want to search something if it's explicitly a
|
||||||
// by an end user (or via a shell glob). In this case, we always want
|
|
||||||
// to search it if it even smells like a file (e.g., a symlink).
|
|
||||||
if subj.dent.depth() == 0 && !subj.is_dir() {
|
|
||||||
return Some(subj);
|
|
||||||
}
|
|
||||||
// At this point, we only want to search something it's explicitly a
|
|
||||||
// file. This omits symlinks. (If ripgrep was configured to follow
|
// file. This omits symlinks. (If ripgrep was configured to follow
|
||||||
// symlinks, then they have already been followed by the directory
|
// symlinks, then they have already been followed by the directory
|
||||||
// traversal.)
|
// traversal.)
|
||||||
@@ -127,6 +122,26 @@ impl Subject {
|
|||||||
self.dent.is_stdin()
|
self.dent.is_stdin()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if this entry corresponds to a subject to
|
||||||
|
/// search that was explicitly supplied by an end user.
|
||||||
|
///
|
||||||
|
/// Generally, this corresponds to either stdin or an explicit file path
|
||||||
|
/// argument. e.g., in `rg foo some-file ./some-dir/`, `some-file` is
|
||||||
|
/// an explicit subject, but, e.g., `./some-dir/some-other-file` is not.
|
||||||
|
///
|
||||||
|
/// However, note that ripgrep does not see through shell globbing. e.g.,
|
||||||
|
/// in `rg foo ./some-dir/*`, `./some-dir/some-other-file` will be treated
|
||||||
|
/// as an explicit subject.
|
||||||
|
pub fn is_explicit(&self) -> bool {
|
||||||
|
// stdin is obvious. When an entry has a depth of 0, that means it
|
||||||
|
// was explicitly provided to our directory iterator, which means it
|
||||||
|
// was in turn explicitly provided by the end user. The !is_dir check
|
||||||
|
// means that we want to search files even if their symlinks, again,
|
||||||
|
// because they were explicitly provided. (And we never want to try
|
||||||
|
// to search a directory.)
|
||||||
|
self.is_stdin() || (self.dent.depth() == 0 && !self.is_dir())
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if and only if this subject points to a directory after
|
/// Returns true if and only if this subject points to a directory after
|
||||||
/// following symbolic links.
|
/// following symbolic links.
|
||||||
fn is_dir(&self) -> bool {
|
fn is_dir(&self) -> bool {
|
||||||
|
@@ -1,2 +0,0 @@
|
|||||||
termcolor has moved to its own repository:
|
|
||||||
https://github.com/BurntSushi/termcolor
|
|
315
tests/binary.rs
Normal file
315
tests/binary.rs
Normal file
@@ -0,0 +1,315 @@
|
|||||||
|
use crate::util::{Dir, TestCommand};
|
||||||
|
|
||||||
|
// This file contains a smattering of tests specifically for checking ripgrep's
|
||||||
|
// handling of binary files. There's quite a bit of discussion on this in this
|
||||||
|
// bug report: https://github.com/BurntSushi/ripgrep/issues/306
|
||||||
|
|
||||||
|
// Our haystack is the first 500 lines of Gutenberg's copy of "A Study in
|
||||||
|
// Scarlet," with a NUL byte at line 237: `abcdef\x00`.
|
||||||
|
//
|
||||||
|
// The position and size of the haystack is, unfortunately, significant. In
|
||||||
|
// particular, the NUL byte is specifically inserted at some point *after* the
|
||||||
|
// first 8192 bytes, which corresponds to the initial capacity of the buffer
|
||||||
|
// that ripgrep uses to read files. (grep for DEFAULT_BUFFER_CAPACITY.) The
|
||||||
|
// position of the NUL byte ensures that we can execute some search on the
|
||||||
|
// initial buffer contents without ever detecting any binary data. Moreover,
|
||||||
|
// when using a memory map for searching, only the first 8192 bytes are
|
||||||
|
// scanned for a NUL byte, so no binary bytes are detected at all when using
|
||||||
|
// a memory map (unless our query matches line 237).
|
||||||
|
//
|
||||||
|
// One last note: in the tests below, we use --no-mmap heavily because binary
|
||||||
|
// detection with memory maps is a bit different. Namely, NUL bytes are only
|
||||||
|
// searched for in the first few KB of the file and in a match. Normally, NUL
|
||||||
|
// bytes are searched for everywhere.
|
||||||
|
//
|
||||||
|
// TODO: Add tests for binary file detection when using memory maps.
|
||||||
|
const HAY: &'static [u8] = include_bytes!("./data/sherlock-nul.txt");
|
||||||
|
|
||||||
|
// This tests that ripgrep prints a warning message if it finds and prints a
|
||||||
|
// match in a binary file before detecting that it is a binary file. The point
|
||||||
|
// here is to notify that user that the search of the file is only partially
|
||||||
|
// complete.
|
||||||
|
//
|
||||||
|
// This applies to files that are *implicitly* searched via a recursive
|
||||||
|
// directory traversal. In particular, this results in a WARNING message being
|
||||||
|
// printed. We make our file "implicit" by doing a recursive search with a glob
|
||||||
|
// that matches our file.
|
||||||
|
rgtest!(after_match1_implicit, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-n", "Project Gutenberg EBook", "-g", "hay",
|
||||||
|
]);
|
||||||
|
|
||||||
|
let expected = "\
|
||||||
|
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||||
|
WARNING: stopped searching binary file hay after match (found \"\\u{0}\" byte around offset 9741)
|
||||||
|
";
|
||||||
|
eqnice!(expected, cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// Like after_match1_implicit, except we provide a file to search
|
||||||
|
// explicitly. This results in identical behavior, but a different message.
|
||||||
|
rgtest!(after_match1_explicit, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-n", "Project Gutenberg EBook", "hay",
|
||||||
|
]);
|
||||||
|
|
||||||
|
let expected = "\
|
||||||
|
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||||
|
Binary file matches (found \"\\u{0}\" byte around offset 9741)
|
||||||
|
";
|
||||||
|
eqnice!(expected, cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// Like after_match1_explicit, except we feed our content on stdin.
|
||||||
|
rgtest!(after_match1_stdin, |_: Dir, mut cmd: TestCommand| {
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-n", "Project Gutenberg EBook",
|
||||||
|
]);
|
||||||
|
|
||||||
|
let expected = "\
|
||||||
|
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||||
|
Binary file matches (found \"\\u{0}\" byte around offset 9741)
|
||||||
|
";
|
||||||
|
eqnice!(expected, cmd.pipe(HAY));
|
||||||
|
});
|
||||||
|
|
||||||
|
// Like after_match1_implicit, but provides the --binary flag, which
|
||||||
|
// disables binary filtering. Thus, this matches the behavior of ripgrep as
|
||||||
|
// if the file were given explicitly.
|
||||||
|
rgtest!(after_match1_implicit_binary, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-n", "--binary", "Project Gutenberg EBook", "-g", "hay",
|
||||||
|
]);
|
||||||
|
|
||||||
|
let expected = "\
|
||||||
|
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||||
|
Binary file hay matches (found \"\\u{0}\" byte around offset 9741)
|
||||||
|
";
|
||||||
|
eqnice!(expected, cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// Like after_match1_implicit, but enables -a/--text, so no binary
|
||||||
|
// detection should be performed.
|
||||||
|
rgtest!(after_match1_implicit_text, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-n", "--text", "Project Gutenberg EBook", "-g", "hay",
|
||||||
|
]);
|
||||||
|
|
||||||
|
let expected = "\
|
||||||
|
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||||
|
";
|
||||||
|
eqnice!(expected, cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// Like after_match1_implicit_text, but enables -a/--text, so no binary
|
||||||
|
// detection should be performed.
|
||||||
|
rgtest!(after_match1_explicit_text, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-n", "--text", "Project Gutenberg EBook", "hay",
|
||||||
|
]);
|
||||||
|
|
||||||
|
let expected = "\
|
||||||
|
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||||
|
";
|
||||||
|
eqnice!(expected, cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// Like after_match1_implicit, except this asks ripgrep to print all matching
|
||||||
|
// files.
|
||||||
|
//
|
||||||
|
// This is an interesting corner case that one might consider a bug, however,
|
||||||
|
// it's unlikely to be fixed. Namely, ripgrep probably shouldn't print `hay`
|
||||||
|
// as a matching file since it is in fact a binary file, and thus should be
|
||||||
|
// filtered out by default. However, the --files-with-matches flag will print
|
||||||
|
// out the path of a matching file as soon as a match is seen and then stop
|
||||||
|
// searching completely. Therefore, the NUL byte is never actually detected.
|
||||||
|
//
|
||||||
|
// The only way to fix this would be to kill ripgrep's performance in this case
|
||||||
|
// and continue searching the entire file for a NUL byte. (Similarly if the
|
||||||
|
// --quiet flag is set. See the next test.)
|
||||||
|
rgtest!(after_match1_implicit_path, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-l", "Project Gutenberg EBook", "-g", "hay",
|
||||||
|
]);
|
||||||
|
eqnice!("hay\n", cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// Like after_match1_implicit_path, except this indicates that a match was
|
||||||
|
// found with no other output. (This is the same bug described above, but
|
||||||
|
// manifest as an exit code with no output.)
|
||||||
|
rgtest!(after_match1_implicit_quiet, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-q", "Project Gutenberg EBook", "-g", "hay",
|
||||||
|
]);
|
||||||
|
eqnice!("", cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// This sets up the same test as after_match1_implicit_path, but instead of
|
||||||
|
// just printing the matching files, this includes the full count of matches.
|
||||||
|
// In this case, we need to search the entire file, so ripgrep correctly
|
||||||
|
// detects the binary data and suppresses output.
|
||||||
|
rgtest!(after_match1_implicit_count, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-c", "Project Gutenberg EBook", "-g", "hay",
|
||||||
|
]);
|
||||||
|
cmd.assert_err();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Like after_match1_implicit_count, except the --binary flag is provided,
|
||||||
|
// which makes ripgrep disable binary data filtering even for implicit files.
|
||||||
|
rgtest!(after_match1_implicit_count_binary, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-c", "--binary",
|
||||||
|
"Project Gutenberg EBook",
|
||||||
|
"-g", "hay",
|
||||||
|
]);
|
||||||
|
eqnice!("hay:1\n", cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// Like after_match1_implicit_count, except the file path is provided
|
||||||
|
// explicitly, so binary filtering is disabled and a count is correctly
|
||||||
|
// reported.
|
||||||
|
rgtest!(after_match1_explicit_count, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-c", "Project Gutenberg EBook", "hay",
|
||||||
|
]);
|
||||||
|
eqnice!("1\n", cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// This tests that a match way before the NUL byte is shown, but a match after
|
||||||
|
// the NUL byte is not.
|
||||||
|
rgtest!(after_match2_implicit, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-n",
|
||||||
|
"Project Gutenberg EBook|a medical student",
|
||||||
|
"-g", "hay",
|
||||||
|
]);
|
||||||
|
|
||||||
|
let expected = "\
|
||||||
|
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||||
|
WARNING: stopped searching binary file hay after match (found \"\\u{0}\" byte around offset 9741)
|
||||||
|
";
|
||||||
|
eqnice!(expected, cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// Like after_match2_implicit, but enables -a/--text, so no binary
|
||||||
|
// detection should be performed.
|
||||||
|
rgtest!(after_match2_implicit_text, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-n", "--text",
|
||||||
|
"Project Gutenberg EBook|a medical student",
|
||||||
|
"-g", "hay",
|
||||||
|
]);
|
||||||
|
|
||||||
|
let expected = "\
|
||||||
|
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||||
|
hay:236:\"And yet you say he is not a medical student?\"
|
||||||
|
";
|
||||||
|
eqnice!(expected, cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// This tests that ripgrep *silently* quits before finding a match that occurs
|
||||||
|
// after a NUL byte.
|
||||||
|
rgtest!(before_match1_implicit, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-n", "Heaven", "-g", "hay",
|
||||||
|
]);
|
||||||
|
cmd.assert_err();
|
||||||
|
});
|
||||||
|
|
||||||
|
// This tests that ripgrep *does not* silently quit before finding a match that
|
||||||
|
// occurs after a NUL byte when a file is explicitly searched.
|
||||||
|
rgtest!(before_match1_explicit, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-n", "Heaven", "hay",
|
||||||
|
]);
|
||||||
|
|
||||||
|
let expected = "\
|
||||||
|
Binary file matches (found \"\\u{0}\" byte around offset 9741)
|
||||||
|
";
|
||||||
|
eqnice!(expected, cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// Like before_match1_implicit, but enables the --binary flag, which
|
||||||
|
// disables binary filtering. Thus, this matches the behavior of ripgrep as if
|
||||||
|
// the file were given explicitly.
|
||||||
|
rgtest!(before_match1_implicit_binary, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-n", "--binary", "Heaven", "-g", "hay",
|
||||||
|
]);
|
||||||
|
|
||||||
|
let expected = "\
|
||||||
|
Binary file hay matches (found \"\\u{0}\" byte around offset 9741)
|
||||||
|
";
|
||||||
|
eqnice!(expected, cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// Like before_match1_implicit, but enables -a/--text, so no binary
|
||||||
|
// detection should be performed.
|
||||||
|
rgtest!(before_match1_implicit_text, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-n", "--text", "Heaven", "-g", "hay",
|
||||||
|
]);
|
||||||
|
|
||||||
|
let expected = "\
|
||||||
|
hay:238:\"No. Heaven knows what the objects of his studies are. But here we
|
||||||
|
";
|
||||||
|
eqnice!(expected, cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// This tests that ripgrep *silently* quits before finding a match that occurs
|
||||||
|
// before a NUL byte, but within the same buffer as the NUL byte.
|
||||||
|
rgtest!(before_match2_implicit, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-n", "a medical student", "-g", "hay",
|
||||||
|
]);
|
||||||
|
cmd.assert_err();
|
||||||
|
});
|
||||||
|
|
||||||
|
// This tests that ripgrep *does not* silently quit before finding a match that
|
||||||
|
// occurs before a NUL byte, but within the same buffer as the NUL byte. Even
|
||||||
|
// though the match occurs before the NUL byte, ripgrep still doesn't print it
|
||||||
|
// because it has already scanned ahead to detect the NUL byte. (This matches
|
||||||
|
// the behavior of GNU grep.)
|
||||||
|
rgtest!(before_match2_explicit, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-n", "a medical student", "hay",
|
||||||
|
]);
|
||||||
|
|
||||||
|
let expected = "\
|
||||||
|
Binary file matches (found \"\\u{0}\" byte around offset 9741)
|
||||||
|
";
|
||||||
|
eqnice!(expected, cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// Like before_match1_implicit, but enables -a/--text, so no binary
|
||||||
|
// detection should be performed.
|
||||||
|
rgtest!(before_match2_implicit_text, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create_bytes("hay", HAY);
|
||||||
|
cmd.args(&[
|
||||||
|
"--no-mmap", "-n", "--text", "a medical student", "-g", "hay",
|
||||||
|
]);
|
||||||
|
|
||||||
|
let expected = "\
|
||||||
|
hay:236:\"And yet you say he is not a medical student?\"
|
||||||
|
";
|
||||||
|
eqnice!(expected, cmd.stdout());
|
||||||
|
});
|
500
tests/data/sherlock-nul.txt
Normal file
500
tests/data/sherlock-nul.txt
Normal file
@@ -0,0 +1,500 @@
|
|||||||
|
The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||||
|
|
||||||
|
This eBook is for the use of anyone anywhere at no cost and with
|
||||||
|
almost no restrictions whatsoever. You may copy it, give it away or
|
||||||
|
re-use it under the terms of the Project Gutenberg License included
|
||||||
|
with this eBook or online at www.gutenberg.org
|
||||||
|
|
||||||
|
|
||||||
|
Title: A Study In Scarlet
|
||||||
|
|
||||||
|
Author: Arthur Conan Doyle
|
||||||
|
|
||||||
|
Posting Date: July 12, 2008 [EBook #244]
|
||||||
|
Release Date: April, 1995
|
||||||
|
[Last updated: February 17, 2013]
|
||||||
|
|
||||||
|
Language: English
|
||||||
|
|
||||||
|
|
||||||
|
*** START OF THIS PROJECT GUTENBERG EBOOK A STUDY IN SCARLET ***
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Produced by Roger Squires
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
A STUDY IN SCARLET.
|
||||||
|
|
||||||
|
By A. Conan Doyle
|
||||||
|
|
||||||
|
[1]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Original Transcriber's Note: This etext is prepared directly
|
||||||
|
from an 1887 edition, and care has been taken to duplicate the
|
||||||
|
original exactly, including typographical and punctuation
|
||||||
|
vagaries.
|
||||||
|
|
||||||
|
Additions to the text include adding the underscore character to
|
||||||
|
indicate italics, and textual end-notes in square braces.
|
||||||
|
|
||||||
|
Project Gutenberg Editor's Note: In reproofing and moving old PG
|
||||||
|
files such as this to the present PG directory system it is the
|
||||||
|
policy to reformat the text to conform to present PG Standards.
|
||||||
|
In this case however, in consideration of the note above of the
|
||||||
|
original transcriber describing his care to try to duplicate the
|
||||||
|
original 1887 edition as to typography and punctuation vagaries,
|
||||||
|
no changes have been made in this ascii text file. However, in
|
||||||
|
the Latin-1 file and this html file, present standards are
|
||||||
|
followed and the several French and Spanish words have been
|
||||||
|
given their proper accents.
|
||||||
|
|
||||||
|
Part II, The Country of the Saints, deals much with the Mormon Church.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
A STUDY IN SCARLET.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
PART I.
|
||||||
|
|
||||||
|
(_Being a reprint from the reminiscences of_ JOHN H. WATSON, M.D., _late
|
||||||
|
of the Army Medical Department._) [2]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
CHAPTER I. MR. SHERLOCK HOLMES.
|
||||||
|
|
||||||
|
|
||||||
|
IN the year 1878 I took my degree of Doctor of Medicine of the
|
||||||
|
University of London, and proceeded to Netley to go through the course
|
||||||
|
prescribed for surgeons in the army. Having completed my studies there,
|
||||||
|
I was duly attached to the Fifth Northumberland Fusiliers as Assistant
|
||||||
|
Surgeon. The regiment was stationed in India at the time, and before
|
||||||
|
I could join it, the second Afghan war had broken out. On landing at
|
||||||
|
Bombay, I learned that my corps had advanced through the passes, and
|
||||||
|
was already deep in the enemy's country. I followed, however, with many
|
||||||
|
other officers who were in the same situation as myself, and succeeded
|
||||||
|
in reaching Candahar in safety, where I found my regiment, and at once
|
||||||
|
entered upon my new duties.
|
||||||
|
|
||||||
|
The campaign brought honours and promotion to many, but for me it had
|
||||||
|
nothing but misfortune and disaster. I was removed from my brigade and
|
||||||
|
attached to the Berkshires, with whom I served at the fatal battle of
|
||||||
|
Maiwand. There I was struck on the shoulder by a Jezail bullet, which
|
||||||
|
shattered the bone and grazed the subclavian artery. I should have
|
||||||
|
fallen into the hands of the murderous Ghazis had it not been for the
|
||||||
|
devotion and courage shown by Murray, my orderly, who threw me across a
|
||||||
|
pack-horse, and succeeded in bringing me safely to the British lines.
|
||||||
|
|
||||||
|
Worn with pain, and weak from the prolonged hardships which I had
|
||||||
|
undergone, I was removed, with a great train of wounded sufferers, to
|
||||||
|
the base hospital at Peshawar. Here I rallied, and had already improved
|
||||||
|
so far as to be able to walk about the wards, and even to bask a little
|
||||||
|
upon the verandah, when I was struck down by enteric fever, that curse
|
||||||
|
of our Indian possessions. For months my life was despaired of, and
|
||||||
|
when at last I came to myself and became convalescent, I was so weak and
|
||||||
|
emaciated that a medical board determined that not a day should be lost
|
||||||
|
in sending me back to England. I was dispatched, accordingly, in the
|
||||||
|
troopship "Orontes," and landed a month later on Portsmouth jetty, with
|
||||||
|
my health irretrievably ruined, but with permission from a paternal
|
||||||
|
government to spend the next nine months in attempting to improve it.
|
||||||
|
|
||||||
|
I had neither kith nor kin in England, and was therefore as free as
|
||||||
|
air--or as free as an income of eleven shillings and sixpence a day will
|
||||||
|
permit a man to be. Under such circumstances, I naturally gravitated to
|
||||||
|
London, that great cesspool into which all the loungers and idlers of
|
||||||
|
the Empire are irresistibly drained. There I stayed for some time at
|
||||||
|
a private hotel in the Strand, leading a comfortless, meaningless
|
||||||
|
existence, and spending such money as I had, considerably more freely
|
||||||
|
than I ought. So alarming did the state of my finances become, that
|
||||||
|
I soon realized that I must either leave the metropolis and rusticate
|
||||||
|
somewhere in the country, or that I must make a complete alteration in
|
||||||
|
my style of living. Choosing the latter alternative, I began by making
|
||||||
|
up my mind to leave the hotel, and to take up my quarters in some less
|
||||||
|
pretentious and less expensive domicile.
|
||||||
|
|
||||||
|
On the very day that I had come to this conclusion, I was standing at
|
||||||
|
the Criterion Bar, when some one tapped me on the shoulder, and turning
|
||||||
|
round I recognized young Stamford, who had been a dresser under me at
|
||||||
|
Barts. The sight of a friendly face in the great wilderness of London is
|
||||||
|
a pleasant thing indeed to a lonely man. In old days Stamford had never
|
||||||
|
been a particular crony of mine, but now I hailed him with enthusiasm,
|
||||||
|
and he, in his turn, appeared to be delighted to see me. In the
|
||||||
|
exuberance of my joy, I asked him to lunch with me at the Holborn, and
|
||||||
|
we started off together in a hansom.
|
||||||
|
|
||||||
|
"Whatever have you been doing with yourself, Watson?" he asked in
|
||||||
|
undisguised wonder, as we rattled through the crowded London streets.
|
||||||
|
"You are as thin as a lath and as brown as a nut."
|
||||||
|
|
||||||
|
I gave him a short sketch of my adventures, and had hardly concluded it
|
||||||
|
by the time that we reached our destination.
|
||||||
|
|
||||||
|
"Poor devil!" he said, commiseratingly, after he had listened to my
|
||||||
|
misfortunes. "What are you up to now?"
|
||||||
|
|
||||||
|
"Looking for lodgings." [3] I answered. "Trying to solve the problem
|
||||||
|
as to whether it is possible to get comfortable rooms at a reasonable
|
||||||
|
price."
|
||||||
|
|
||||||
|
"That's a strange thing," remarked my companion; "you are the second man
|
||||||
|
to-day that has used that expression to me."
|
||||||
|
|
||||||
|
"And who was the first?" I asked.
|
||||||
|
|
||||||
|
"A fellow who is working at the chemical laboratory up at the hospital.
|
||||||
|
He was bemoaning himself this morning because he could not get someone
|
||||||
|
to go halves with him in some nice rooms which he had found, and which
|
||||||
|
were too much for his purse."
|
||||||
|
|
||||||
|
"By Jove!" I cried, "if he really wants someone to share the rooms and
|
||||||
|
the expense, I am the very man for him. I should prefer having a partner
|
||||||
|
to being alone."
|
||||||
|
|
||||||
|
Young Stamford looked rather strangely at me over his wine-glass. "You
|
||||||
|
don't know Sherlock Holmes yet," he said; "perhaps you would not care
|
||||||
|
for him as a constant companion."
|
||||||
|
|
||||||
|
"Why, what is there against him?"
|
||||||
|
|
||||||
|
"Oh, I didn't say there was anything against him. He is a little queer
|
||||||
|
in his ideas--an enthusiast in some branches of science. As far as I
|
||||||
|
know he is a decent fellow enough."
|
||||||
|
|
||||||
|
"A medical student, I suppose?" said I.
|
||||||
|
|
||||||
|
"No--I have no idea what he intends to go in for. I believe he is well
|
||||||
|
up in anatomy, and he is a first-class chemist; but, as far as I know,
|
||||||
|
he has never taken out any systematic medical classes. His studies are
|
||||||
|
very desultory and eccentric, but he has amassed a lot of out-of-the way
|
||||||
|
knowledge which would astonish his professors."
|
||||||
|
|
||||||
|
"Did you never ask him what he was going in for?" I asked.
|
||||||
|
|
||||||
|
"No; he is not a man that it is easy to draw out, though he can be
|
||||||
|
communicative enough when the fancy seizes him."
|
||||||
|
|
||||||
|
"I should like to meet him," I said. "If I am to lodge with anyone, I
|
||||||
|
should prefer a man of studious and quiet habits. I am not strong
|
||||||
|
enough yet to stand much noise or excitement. I had enough of both in
|
||||||
|
Afghanistan to last me for the remainder of my natural existence. How
|
||||||
|
could I meet this friend of yours?"
|
||||||
|
|
||||||
|
"He is sure to be at the laboratory," returned my companion. "He either
|
||||||
|
avoids the place for weeks, or else he works there from morning to
|
||||||
|
night. If you like, we shall drive round together after luncheon."
|
||||||
|
|
||||||
|
"Certainly," I answered, and the conversation drifted away into other
|
||||||
|
channels.
|
||||||
|
|
||||||
|
As we made our way to the hospital after leaving the Holborn, Stamford
|
||||||
|
gave me a few more particulars about the gentleman whom I proposed to
|
||||||
|
take as a fellow-lodger.
|
||||||
|
|
||||||
|
"You mustn't blame me if you don't get on with him," he said; "I know
|
||||||
|
nothing more of him than I have learned from meeting him occasionally in
|
||||||
|
the laboratory. You proposed this arrangement, so you must not hold me
|
||||||
|
responsible."
|
||||||
|
|
||||||
|
"If we don't get on it will be easy to part company," I answered. "It
|
||||||
|
seems to me, Stamford," I added, looking hard at my companion, "that you
|
||||||
|
have some reason for washing your hands of the matter. Is this fellow's
|
||||||
|
temper so formidable, or what is it? Don't be mealy-mouthed about it."
|
||||||
|
|
||||||
|
"It is not easy to express the inexpressible," he answered with a laugh.
|
||||||
|
"Holmes is a little too scientific for my tastes--it approaches to
|
||||||
|
cold-bloodedness. I could imagine his giving a friend a little pinch of
|
||||||
|
the latest vegetable alkaloid, not out of malevolence, you understand,
|
||||||
|
but simply out of a spirit of inquiry in order to have an accurate idea
|
||||||
|
of the effects. To do him justice, I think that he would take it himself
|
||||||
|
with the same readiness. He appears to have a passion for definite and
|
||||||
|
exact knowledge."
|
||||||
|
|
||||||
|
"Very right too."
|
||||||
|
|
||||||
|
"Yes, but it may be pushed to excess. When it comes to beating the
|
||||||
|
subjects in the dissecting-rooms with a stick, it is certainly taking
|
||||||
|
rather a bizarre shape."
|
||||||
|
|
||||||
|
"Beating the subjects!"
|
||||||
|
|
||||||
|
"Yes, to verify how far bruises may be produced after death. I saw him
|
||||||
|
at it with my own eyes."
|
||||||
|
|
||||||
|
"And yet you say he is not a medical student?"
|
||||||
|
abcdef |