mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-28 10:41:58 -07:00
Compare commits
108 Commits
grep-regex
...
ag/partial
Author | SHA1 | Date | |
---|---|---|---|
|
a872d33714 | ||
|
f08f274c5f | ||
|
db7e828989 | ||
|
fb6cad7152 | ||
|
8e1d40ed7d | ||
|
b1c064d5af | ||
|
26a83c6301 | ||
|
5e50a3c43c | ||
|
85417e52e9 | ||
|
bef1f0e770 | ||
|
cd9815cb37 | ||
|
3f22c3a658 | ||
|
0913972104 | ||
|
f19b84fb23 | ||
|
59fc583aeb | ||
|
1c7c4e6640 | ||
|
69c5e3938d | ||
|
d9cf05ad50 | ||
|
af8b6caebb | ||
|
c84cfb6756 | ||
|
895e26a000 | ||
|
8c95290ff6 | ||
|
d6feeb7ff2 | ||
|
626ed00c19 | ||
|
332ad18401 | ||
|
fc3cf41247 | ||
|
a4868b8835 | ||
|
f99b991117 | ||
|
de0bc78982 | ||
|
147e96914c | ||
|
0abc40c23c | ||
|
f768796e4f | ||
|
da0c0c4705 | ||
|
05411b2b32 | ||
|
cc93db3b18 | ||
|
049354b766 | ||
|
386dd2806d | ||
|
5fe9a954e6 | ||
|
f158a42a71 | ||
|
5724391d39 | ||
|
0df71240ff | ||
|
f3164f2615 | ||
|
31d3e24130 | ||
|
bf842dbc7f | ||
|
6d5dba85bd | ||
|
afb89bcdad | ||
|
332dc56372 | ||
|
12a6ca45f9 | ||
|
9d703110cf | ||
|
e99b6bda0e | ||
|
276e2c9b9a | ||
|
9a9f54d44c | ||
|
47833b9ce7 | ||
|
44a9e37737 | ||
|
8fd05cacee | ||
|
4691d11034 | ||
|
519a6b68af | ||
|
9c940b45f4 | ||
|
0a167021c3 | ||
|
aeaa5fc1b1 | ||
|
7048a06c31 | ||
|
23be3cf850 | ||
|
b48bbf527d | ||
|
8eabe47b57 | ||
|
ff712bfd9d | ||
|
a7f2d48234 | ||
|
57500ad013 | ||
|
0b04553aff | ||
|
1ae121122f | ||
|
688003e51c | ||
|
718a00f6f2 | ||
|
7cbc535d70 | ||
|
7a6a40bae1 | ||
|
1e9ee2cc85 | ||
|
968491f8e9 | ||
|
63b0f31a22 | ||
|
7ecee299a5 | ||
|
dd396ff34e | ||
|
fb0a82f3c3 | ||
|
dbc8ca9cc1 | ||
|
c3db8db93d | ||
|
17ef4c40f3 | ||
|
a9e0477ea8 | ||
|
b3c5773266 | ||
|
118b950085 | ||
|
b45b2f58ea | ||
|
662a9bc73d | ||
|
401add0a99 | ||
|
f81b72721b | ||
|
1d4fccaadc | ||
|
09e464e674 | ||
|
31adff6f3c | ||
|
b41e596327 | ||
|
fb62266620 | ||
|
acf226c39d | ||
|
8299625e48 | ||
|
db256c87eb | ||
|
ba533f390e | ||
|
ba503eb677 | ||
|
f72c2dfd90 | ||
|
c0aa58b4f7 | ||
|
184ee4c328 | ||
|
e82fbf2c46 | ||
|
eb18da0450 | ||
|
0f7494216f | ||
|
442a278635 | ||
|
7ebed3ace6 | ||
|
8a7db1a918 |
@@ -1,4 +1,5 @@
|
||||
language: rust
|
||||
dist: xenial
|
||||
env:
|
||||
global:
|
||||
- PROJECT_NAME: ripgrep
|
||||
@@ -62,13 +63,13 @@ matrix:
|
||||
# Minimum Rust supported channel. We enable these to make sure ripgrep
|
||||
# continues to work on the advertised minimum Rust version.
|
||||
- os: linux
|
||||
rust: 1.28.0
|
||||
rust: 1.32.0
|
||||
env: TARGET=x86_64-unknown-linux-gnu
|
||||
- os: linux
|
||||
rust: 1.28.0
|
||||
rust: 1.32.0
|
||||
env: TARGET=x86_64-unknown-linux-musl
|
||||
- os: linux
|
||||
rust: 1.28.0
|
||||
rust: 1.32.0
|
||||
env: TARGET=arm-unknown-linux-gnueabihf GCC_VERSION=4.8
|
||||
addons:
|
||||
apt:
|
||||
@@ -93,6 +94,7 @@ deploy:
|
||||
skip_cleanup: true
|
||||
on:
|
||||
condition: $TRAVIS_RUST_VERSION = nightly
|
||||
branch: master # i guess we do need this after all?
|
||||
tags: true
|
||||
api_key:
|
||||
secure: "IbSnsbGkxSydR/sozOf1/SRvHplzwRUHzcTjM7BKnr7GccL86gRPUrsrvD103KjQUGWIc1TnK1YTq5M0Onswg/ORDjqa1JEJPkPdPnVh9ipbF7M2De/7IlB4X4qXLKoApn8+bx2x/mfYXu4G+G1/2QdbaKK2yfXZKyjz0YFx+6CNrVCT2Nk8q7aHvOOzAL58vsG8iPDpupuhxlMDDn/UhyOWVInmPPQ0iJR1ZUJN8xJwXvKvBbfp3AhaBiAzkhXHNLgBR8QC5noWWMXnuVDMY3k4f3ic0V+p/qGUCN/nhptuceLxKFicMCYObSZeUzE5RAI0/OBW7l3z2iCoc+TbAnn+JrX/ObJCfzgAOXAU3tLaBFMiqQPGFKjKg1ltSYXomOFP/F7zALjpvFp4lYTBajRR+O3dqaxA9UQuRjw27vOeUpMcga4ZzL4VXFHzrxZKBHN//XIGjYAVhJ1NSSeGpeJV5/+jYzzWKfwSagRxQyVCzMooYFFXzn8Yxdm3PJlmp3GaAogNkdB9qKcrEvRINCelalzALPi0hD/HUDi8DD2PNTCLLMo6VSYtvc685Zbe+KgNzDV1YyTrRCUW6JotrS0r2ULLwnsh40hSB//nNv3XmwNmC/CmW5QAnIGj8cBMF4S2t6ohADIndojdAfNiptmaZOIT6owK7bWMgPMyopo="
|
||||
|
62
CHANGELOG.md
62
CHANGELOG.md
@@ -1,3 +1,65 @@
|
||||
0.11.0 (TBD)
|
||||
============
|
||||
TODO.
|
||||
|
||||
**BREAKING CHANGES**:
|
||||
|
||||
* ripgrep has tweaked its exit status codes to be more like GNU grep's. Namely,
|
||||
if a non-fatal error occurs during a search, then ripgrep will now always
|
||||
emit a `2` exit status code, regardless of whether a match is found or not.
|
||||
Previously, ripgrep would only emit a `2` exit status code for a catastrophic
|
||||
error (e.g., regex syntax error). One exception to this is if ripgrep is run
|
||||
with `-q/--quiet`. In that case, if an error occurs and a match is found,
|
||||
then ripgrep will exit with a `0` exit status code.
|
||||
* The `avx-accel` feature of ripgrep has been removed since it is no longer
|
||||
necessary. All uses of AVX in ripgrep are now enabled automatically via
|
||||
runtime CPU feature detection. The `simd-accel` feature does remain
|
||||
available, however, it does increase compilation times substantially at the
|
||||
moment.
|
||||
|
||||
Feature enhancements:
|
||||
|
||||
* [FEATURE #1099](https://github.com/BurntSushi/ripgrep/pull/1099):
|
||||
Add support for Brotli and Zstd to the `-z/--search-zip` flag.
|
||||
* [FEATURE #1138](https://github.com/BurntSushi/ripgrep/pull/1138):
|
||||
Add `--no-ignore-dot` flag for ignoring `.ignore` files.
|
||||
* [FEATURE #1159](https://github.com/BurntSushi/ripgrep/pull/1159):
|
||||
ripgrep's exit status logic should now match GNU grep. See updated man page.
|
||||
* [FEATURE #1170](https://github.com/BurntSushi/ripgrep/pull/1170):
|
||||
Add `--ignore-file-case-insensitive` for case insensitive .ignore globs.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #373](https://github.com/BurntSushi/ripgrep/issues/373),
|
||||
[BUG #1098](https://github.com/BurntSushi/ripgrep/issues/1098):
|
||||
`**` is now accepted as valid syntax anywhere in a glob.
|
||||
* [BUG #916](https://github.com/BurntSushi/ripgrep/issues/916):
|
||||
ripgrep no longer hangs when searching `/proc` with a zombie process present.
|
||||
* [BUG #1091](https://github.com/BurntSushi/ripgrep/issues/1091):
|
||||
Add note about inverted flags to the man page.
|
||||
* [BUG #1095](https://github.com/BurntSushi/ripgrep/issues/1095):
|
||||
Fix corner cases involving the `--crlf` flag.
|
||||
* [BUG #1103](https://github.com/BurntSushi/ripgrep/issues/1103):
|
||||
Clarify what `--encoding auto` does.
|
||||
* [BUG #1106](https://github.com/BurntSushi/ripgrep/issues/1106):
|
||||
`--files-with-matches` and `--files-without-match` work with one file.
|
||||
* [BUG #1093](https://github.com/BurntSushi/ripgrep/pull/1093):
|
||||
Fix handling of literal slashes in gitignore patterns.
|
||||
* [BUG #1121](https://github.com/BurntSushi/ripgrep/issues/1121):
|
||||
Fix bug that was triggering Windows antimalware when using the --files flag.
|
||||
* [BUG #1125](https://github.com/BurntSushi/ripgrep/issues/1125),
|
||||
[BUG #1159](https://github.com/BurntSushi/ripgrep/issues/1159):
|
||||
ripgrep shouldn't panic for `rg -h | rg` and should emit correct exit status.
|
||||
* [BUG #1154](https://github.com/BurntSushi/ripgrep/issues/1154):
|
||||
Windows files with "hidden" attribute are now treated as hidden.
|
||||
* [BUG #1173](https://github.com/BurntSushi/ripgrep/issues/1173):
|
||||
Fix handling of `**` patterns in gitignore files.
|
||||
* [BUG #1174](https://github.com/BurntSushi/ripgrep/issues/1174):
|
||||
Fix handling of repeated `**` patterns in gitignore files.
|
||||
* [BUG #1176](https://github.com/BurntSushi/ripgrep/issues/1176):
|
||||
Fix bug where `-F`/`-x` weren't applied to patterns given via `-f`.
|
||||
|
||||
|
||||
0.10.0 (2018-09-07)
|
||||
===================
|
||||
This is a new minor version release of ripgrep that contains some major new
|
||||
|
664
Cargo.lock
generated
664
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -17,6 +17,7 @@ license = "Unlicense OR MIT"
|
||||
exclude = ["HomebrewFormula"]
|
||||
build = "build.rs"
|
||||
autotests = false
|
||||
edition = "2018"
|
||||
|
||||
[badges]
|
||||
travis-ci = { repository = "BurntSushi/ripgrep" }
|
||||
@@ -45,7 +46,8 @@ members = [
|
||||
]
|
||||
|
||||
[dependencies]
|
||||
grep = { version = "0.2.2", path = "grep" }
|
||||
bstr = "0.1.2"
|
||||
grep = { version = "0.2.3", path = "grep" }
|
||||
ignore = { version = "0.4.4", path = "ignore" }
|
||||
lazy_static = "1.1.0"
|
||||
log = "0.4.5"
|
||||
@@ -72,7 +74,6 @@ serde = "1.0.77"
|
||||
serde_derive = "1.0.77"
|
||||
|
||||
[features]
|
||||
avx-accel = ["grep/avx-accel"]
|
||||
simd-accel = ["grep/simd-accel"]
|
||||
pcre2 = ["grep/pcre2"]
|
||||
|
||||
@@ -81,6 +82,7 @@ debug = 1
|
||||
|
||||
[package.metadata.deb]
|
||||
features = ["pcre2"]
|
||||
section = "utils"
|
||||
assets = [
|
||||
["target/release/rg", "usr/bin/", "755"],
|
||||
["COPYING", "usr/share/doc/ripgrep/", "644"],
|
||||
|
15
FAQ.md
15
FAQ.md
@@ -118,7 +118,7 @@ from run to run of ripgrep.
|
||||
The only way to make the order of results consistent is to ask ripgrep to
|
||||
sort the output. Currently, this will disable all parallelism. (On smaller
|
||||
repositories, you might not notice much of a performance difference!) You
|
||||
can achieve this with the `--sort-files` flag.
|
||||
can achieve this with the `--sort path` flag.
|
||||
|
||||
There is more discussion on this topic here:
|
||||
https://github.com/BurntSushi/ripgrep/issues/152
|
||||
@@ -136,10 +136,10 @@ How do I search compressed files?
|
||||
</h3>
|
||||
|
||||
ripgrep's `-z/--search-zip` flag will cause it to search compressed files
|
||||
automatically. Currently, this supports gzip, bzip2, lzma, lz4 and xz only and
|
||||
requires the corresponding `gzip`, `bzip2` and `xz` binaries to be installed on
|
||||
your system. (That is, ripgrep does decompression by shelling out to another
|
||||
process.)
|
||||
automatically. Currently, this supports gzip, bzip2, xz, lzma, lz4, Brotli and
|
||||
Zstd. Each of these requires requires the corresponding `gzip`, `bzip2`, `xz`,
|
||||
`lz4`, `brotli` and `zstd` binaries to be installed on your system. (That is,
|
||||
ripgrep does decompression by shelling out to another process.)
|
||||
|
||||
ripgrep currently does not search archive formats, so `*.tar.gz` files, for
|
||||
example, are skipped.
|
||||
@@ -149,9 +149,8 @@ example, are skipped.
|
||||
How do I search over multiple lines?
|
||||
</h3>
|
||||
|
||||
This isn't currently possible. ripgrep is fundamentally a line-oriented search
|
||||
tool. With that said,
|
||||
[multiline search is a planned opt-in feature](https://github.com/BurntSushi/ripgrep/issues/176).
|
||||
The `-U/--multiline` flag enables ripgrep to report results that span over
|
||||
multiple lines.
|
||||
|
||||
|
||||
<h3 name="fancy">
|
||||
|
20
GUIDE.md
20
GUIDE.md
@@ -235,6 +235,11 @@ Like `.gitignore`, a `.ignore` file can be placed in any directory. Its rules
|
||||
will be processed with respect to the directory it resides in, just like
|
||||
`.gitignore`.
|
||||
|
||||
To process `.gitignore` and `.ignore` files case insensitively, use the flag
|
||||
`--ignore-file-case-insensitive`. This is especially useful on case insensitive
|
||||
file systems like those on Windows and macOS. Note though that this can come
|
||||
with a significant performance penalty, and is therefore disabled by default.
|
||||
|
||||
For a more in depth description of how glob patterns in a `.gitignore` file
|
||||
are interpreted, please see `man gitignore`.
|
||||
|
||||
@@ -520,9 +525,9 @@ config file. Once the environment variable is set, open the file and just type
|
||||
in the flags you want set automatically. There are only two rules for
|
||||
describing the format of the config file:
|
||||
|
||||
1. Every line is a shell argument, after trimming ASCII whitespace.
|
||||
2. Lines starting with `#` (optionally preceded by any amount of
|
||||
ASCII whitespace) are ignored.
|
||||
1. Every line is a shell argument, after trimming whitespace.
|
||||
2. Lines starting with `#` (optionally preceded by any amount of whitespace)
|
||||
are ignored.
|
||||
|
||||
In particular, there is no escaping. Each line is given to ripgrep as a single
|
||||
command line argument verbatim.
|
||||
@@ -604,7 +609,8 @@ topic, but we can try to summarize its relevancy to ripgrep:
|
||||
the most popular encodings likely consist of ASCII, latin1 or UTF-8. As
|
||||
a special exception, UTF-16 is prevalent in Windows environments
|
||||
|
||||
In light of the above, here is how ripgrep behaves:
|
||||
In light of the above, here is how ripgrep behaves when `--encoding auto` is
|
||||
given, which is the default:
|
||||
|
||||
* All input is assumed to be ASCII compatible (which means every byte that
|
||||
corresponds to an ASCII codepoint actually is an ASCII codepoint). This
|
||||
@@ -675,10 +681,10 @@ used options that will likely impact how you use ripgrep on a regular basis.
|
||||
* `--files`: Print the files that ripgrep *would* search, but don't actually
|
||||
search them.
|
||||
* `-a/--text`: Search binary files as if they were plain text.
|
||||
* `-z/--search-zip`: Search compressed files (gzip, bzip2, lzma, xz). This is
|
||||
disabled by default.
|
||||
* `-z/--search-zip`: Search compressed files (gzip, bzip2, lzma, xz, lz4,
|
||||
brotli, zstd). This is disabled by default.
|
||||
* `-C/--context`: Show the lines surrounding a match.
|
||||
* `--sort-files`: Force ripgrep to sort its output by file name. (This disables
|
||||
* `--sort path`: Force ripgrep to sort its output by file name. (This disables
|
||||
parallelism, so it might be slower.)
|
||||
* `-L/--follow`: Follow symbolic links while recursively searching.
|
||||
* `-M/--max-columns`: Limit the length of lines printed by ripgrep.
|
||||
|
59
README.md
59
README.md
@@ -1,11 +1,12 @@
|
||||
ripgrep (rg)
|
||||
------------
|
||||
ripgrep is a line-oriented search tool that recursively searches your current
|
||||
directory for a regex pattern while respecting your gitignore rules. ripgrep
|
||||
directory for a regex pattern. By default, ripgrep will respect your .gitignore
|
||||
and automatically skip hidden files/directories and binary files. ripgrep
|
||||
has first class support on Windows, macOS and Linux, with binary downloads
|
||||
available for [every release](https://github.com/BurntSushi/ripgrep/releases).
|
||||
ripgrep is similar to other popular search tools like The Silver Searcher,
|
||||
ack and grep.
|
||||
ripgrep is similar to other popular search tools like The Silver Searcher, ack
|
||||
and grep.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
@@ -105,7 +106,7 @@ increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep.
|
||||
supporting Unicode (which is always on).
|
||||
* ripgrep has optional support for switching its regex engine to use PCRE2.
|
||||
Among other things, this makes it possible to use look-around and
|
||||
backreferences in your patterns, which are supported in ripgrep's default
|
||||
backreferences in your patterns, which are not supported in ripgrep's default
|
||||
regex engine. PCRE2 support is enabled with `-P`.
|
||||
* ripgrep supports searching files in text encodings other than UTF-8, such
|
||||
as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for
|
||||
@@ -248,21 +249,22 @@ If you're a **Gentoo** user, you can install ripgrep from the
|
||||
$ emerge sys-apps/ripgrep
|
||||
```
|
||||
|
||||
If you're a **Fedora 27+** user, you can install ripgrep from official
|
||||
If you're a **Fedora** user, you can install ripgrep from official
|
||||
repositories.
|
||||
|
||||
```
|
||||
$ sudo dnf install ripgrep
|
||||
```
|
||||
|
||||
If you're a **Fedora 24+** user, you can install ripgrep from
|
||||
[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
|
||||
If you're an **openSUSE Leap 15.0** user, you can install ripgrep from the
|
||||
[utilities repo](https://build.opensuse.org/package/show/utilities/ripgrep):
|
||||
|
||||
```
|
||||
$ sudo dnf copr enable carlwgeorge/ripgrep
|
||||
$ sudo dnf install ripgrep
|
||||
$ sudo zypper ar https://download.opensuse.org/repositories/utilities/openSUSE_Leap_15.0/utilities.repo
|
||||
$ sudo zypper install ripgrep
|
||||
```
|
||||
|
||||
|
||||
If you're an **openSUSE Tumbleweed** user, you can install ripgrep from the
|
||||
[official repo](http://software.opensuse.org/package/ripgrep):
|
||||
|
||||
@@ -288,12 +290,11 @@ $ # (Or using the attribute name, which is also ripgrep.)
|
||||
|
||||
If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**),
|
||||
then ripgrep can be installed using a binary `.deb` file provided in each
|
||||
[ripgrep release](https://github.com/BurntSushi/ripgrep/releases). Note that
|
||||
ripgrep is not in the official Debian or Ubuntu repositories.
|
||||
[ripgrep release](https://github.com/BurntSushi/ripgrep/releases).
|
||||
|
||||
```
|
||||
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/0.9.0/ripgrep_0.9.0_amd64.deb
|
||||
$ sudo dpkg -i ripgrep_0.9.0_amd64.deb
|
||||
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/0.10.0/ripgrep_0.10.0_amd64.deb
|
||||
$ sudo dpkg -i ripgrep_0.10.0_amd64.deb
|
||||
```
|
||||
|
||||
If you run Debian Buster (currently Debian testing) or Debian sid, ripgrep is
|
||||
@@ -302,6 +303,14 @@ If you run Debian Buster (currently Debian testing) or Debian sid, ripgrep is
|
||||
$ sudo apt-get install ripgrep
|
||||
```
|
||||
|
||||
If you're an **Ubuntu Cosmic (18.10)** (or newer) user, ripgrep is
|
||||
[available](https://launchpad.net/ubuntu/+source/rust-ripgrep) using the same
|
||||
packaging as Debian:
|
||||
|
||||
```
|
||||
$ sudo apt-get install ripgrep
|
||||
```
|
||||
|
||||
(N.B. Various snaps for ripgrep on Ubuntu are also available, but none of them
|
||||
seem to work right and generate a number of very strange bug reports that I
|
||||
don't know how to fix and don't have the time to fix. Therefore, it is no
|
||||
@@ -330,7 +339,7 @@ If you're a **NetBSD** user, then you can install ripgrep from
|
||||
|
||||
If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
|
||||
|
||||
* Note that the minimum supported version of Rust for ripgrep is **1.28.0**,
|
||||
* Note that the minimum supported version of Rust for ripgrep is **1.32.0**,
|
||||
although ripgrep may work with older versions.
|
||||
* Note that the binary may be bigger than expected because it contains debug
|
||||
symbols. This is intentional. To remove debug symbols and therefore reduce
|
||||
@@ -351,7 +360,7 @@ ripgrep isn't currently in any other package repositories.
|
||||
|
||||
ripgrep is written in Rust, so you'll need to grab a
|
||||
[Rust installation](https://www.rust-lang.org/) in order to compile it.
|
||||
ripgrep compiles with Rust 1.28.0 (stable) or newer. In general, ripgrep tracks
|
||||
ripgrep compiles with Rust 1.32.0 (stable) or newer. In general, ripgrep tracks
|
||||
the latest stable release of the Rust compiler.
|
||||
|
||||
To build ripgrep:
|
||||
@@ -368,18 +377,14 @@ If you have a Rust nightly compiler and a recent Intel CPU, then you can enable
|
||||
additional optional SIMD acceleration like so:
|
||||
|
||||
```
|
||||
RUSTFLAGS="-C target-cpu=native" cargo build --release --features 'simd-accel avx-accel'
|
||||
RUSTFLAGS="-C target-cpu=native" cargo build --release --features 'simd-accel'
|
||||
```
|
||||
|
||||
If your machine doesn't support AVX instructions, then simply remove
|
||||
`avx-accel` from the features list. Similarly for SIMD (which corresponds
|
||||
roughly to SSE instructions).
|
||||
|
||||
The `simd-accel` and `avx-accel` features enable SIMD support in certain
|
||||
ripgrep dependencies (responsible for counting lines and transcoding). They
|
||||
are not necessary to get SIMD optimizations for search; those are enabled
|
||||
automatically. Hopefully, some day, the `simd-accel` and `avx-accel` features
|
||||
will similarly become unnecessary.
|
||||
The `simd-accel` feature enables SIMD support in certain ripgrep dependencies
|
||||
(responsible for transcoding). They are not necessary to get SIMD optimizations
|
||||
for search; those are enabled automatically. Hopefully, some day, the
|
||||
`simd-accel` feature will similarly become unnecessary. **WARNING:** Currently,
|
||||
enabling this option can increase compilation times dramatically.
|
||||
|
||||
Finally, optional PCRE2 support can be built with ripgrep by enabling the
|
||||
`pcre2` feature:
|
||||
@@ -388,8 +393,8 @@ Finally, optional PCRE2 support can be built with ripgrep by enabling the
|
||||
$ cargo build --release --features 'pcre2'
|
||||
```
|
||||
|
||||
(Tip: use `--features 'pcre2 simd-accel avx-accel'` to also include compile
|
||||
time SIMD optimizations, which will only work with a nightly compiler.)
|
||||
(Tip: use `--features 'pcre2 simd-accel'` to also include compile time SIMD
|
||||
optimizations, which will only work with a nightly compiler.)
|
||||
|
||||
Enabling the PCRE2 feature works with a stable Rust compiler and will
|
||||
attempt to automatically find and link with your system's PCRE2 library via
|
||||
|
@@ -73,7 +73,7 @@ deploy:
|
||||
# deploy when a new tag is pushed and only on the stable channel
|
||||
on:
|
||||
CHANNEL: stable
|
||||
APPVEYOR_REPO_TAG: true
|
||||
appveyor_repo_tag: true
|
||||
|
||||
branches:
|
||||
only:
|
||||
|
12
build.rs
12
build.rs
@@ -1,8 +1,3 @@
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
||||
use std::env;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{self, Read, Write};
|
||||
@@ -168,7 +163,12 @@ fn formatted_arg(arg: &RGArg) -> io::Result<String> {
|
||||
}
|
||||
|
||||
fn formatted_doc_txt(arg: &RGArg) -> io::Result<String> {
|
||||
let paragraphs: Vec<&str> = arg.doc_long.split("\n\n").collect();
|
||||
let paragraphs: Vec<String> = arg.doc_long
|
||||
.replace("{", "{")
|
||||
.replace("}", r"}")
|
||||
.split("\n\n")
|
||||
.map(|s| s.to_string())
|
||||
.collect();
|
||||
if paragraphs.is_empty() {
|
||||
return Err(ioerr(format!("missing docs for --{}", arg.name)));
|
||||
}
|
||||
|
12
complete/_rg
12
complete/_rg
@@ -112,8 +112,12 @@ _rg() {
|
||||
$no"--no-hidden[don't search hidden files and directories]"
|
||||
|
||||
+ '(ignore)' # Ignore-file options
|
||||
"(--no-ignore-global --no-ignore-parent --no-ignore-vcs)--no-ignore[don't respect ignore files]"
|
||||
$no'(--ignore-global --ignore-parent --ignore-vcs)--ignore[respect ignore files]'
|
||||
"(--no-ignore-global --no-ignore-parent --no-ignore-vcs --no-ignore-dot)--no-ignore[don't respect ignore files]"
|
||||
$no'(--ignore-global --ignore-parent --ignore-vcs --ignore-dot)--ignore[respect ignore files]'
|
||||
|
||||
+ '(ignore-file-case-insensitive)' # Ignore-file case sensitivity options
|
||||
'--ignore-file-case-insensitive[process ignore files case insensitively]'
|
||||
$no'--no-ignore-file-case-insensitive[process ignore files case sensitively]'
|
||||
|
||||
+ '(ignore-global)' # Global ignore-file options
|
||||
"--no-ignore-global[don't respect global ignore files]"
|
||||
@@ -127,6 +131,10 @@ _rg() {
|
||||
"--no-ignore-vcs[don't respect version control ignore files]"
|
||||
$no'--ignore-vcs[respect version control ignore files]'
|
||||
|
||||
+ '(ignore-dot)' # .ignore-file options
|
||||
"--no-ignore-dot[don't respect .ignore files]"
|
||||
$no'--ignore-dot[respect .ignore files]'
|
||||
|
||||
+ '(json)' # JSON options
|
||||
'--json[output results in JSON Lines format]'
|
||||
$no"--no-json[don't output results in JSON Lines format]"
|
||||
|
@@ -34,12 +34,12 @@ files/directories and binary files.
|
||||
ripgrep's default regex engine uses finite automata and guarantees linear
|
||||
time searching. Because of this, features like backreferences and arbitrary
|
||||
look-around are not supported. However, if ripgrep is built with PCRE2, then
|
||||
the --pcre2 flag can be used to enable backreferences and look-around.
|
||||
the *--pcre2* flag can be used to enable backreferences and look-around.
|
||||
|
||||
ripgrep supports configuration files. Set RIPGREP_CONFIG_PATH to a
|
||||
ripgrep supports configuration files. Set *RIPGREP_CONFIG_PATH* to a
|
||||
configuration file. The file can specify one shell argument per line. Lines
|
||||
starting with '#' are ignored. For more details, see the man page or the
|
||||
README.
|
||||
starting with *#* are ignored. For more details, see the man page or the
|
||||
*README*.
|
||||
|
||||
|
||||
REGEX SYNTAX
|
||||
@@ -52,10 +52,10 @@ https://docs.rs/regex/*/regex/bytes/index.html#syntax
|
||||
|
||||
To a first approximation, ripgrep uses Perl-like regexes without look-around or
|
||||
backreferences. This makes them very similar to the "extended" (ERE) regular
|
||||
expressions supported by `egrep`, but with a few additional features like
|
||||
expressions supported by *egrep*, but with a few additional features like
|
||||
Unicode character classes.
|
||||
|
||||
If you're using ripgrep with the --pcre2 flag, then please consult
|
||||
If you're using ripgrep with the *--pcre2* flag, then please consult
|
||||
https://www.pcre.org or the PCRE2 man pages for documentation on the supported
|
||||
syntax.
|
||||
|
||||
@@ -68,18 +68,37 @@ _PATTERN_::
|
||||
|
||||
_PATH_::
|
||||
A file or directory to search. Directories are searched recursively. Paths
|
||||
specified expicitly on the command line override glob and ignore rules.
|
||||
specified explicitly on the command line override glob and ignore rules.
|
||||
|
||||
|
||||
OPTIONS
|
||||
-------
|
||||
Note that for many options, there exist flags to disable them. In some cases,
|
||||
those flags are not listed in a first class way below. For example, the
|
||||
*--column* flag (listed below) enables column numbers in ripgrep's output, but
|
||||
the *--no-column* flag (not listed below) disables them. The reverse can also
|
||||
exist. For example, the *--no-ignore* flag (listed below) disables ripgrep's
|
||||
*gitignore* logic, but the *--ignore* flag (not listed below) enables it. These
|
||||
flags are useful for overriding a ripgrep configuration file on the command
|
||||
line. Each flag's documentation notes whether an inverted flag exists. In all
|
||||
cases, the flag specified last takes precedence.
|
||||
|
||||
{OPTIONS}
|
||||
|
||||
|
||||
EXIT STATUS
|
||||
-----------
|
||||
If ripgrep finds a match, then the exit status of the program is 0. If no match
|
||||
could be found, then the exit status is non-zero.
|
||||
could be found, then the exit status is 1. If an error occurred, then the exit
|
||||
status is always 2 unless ripgrep was run with the *--quiet* flag and a match
|
||||
was found. In summary:
|
||||
|
||||
* `0` exit status occurs only when at least one match was found, and if
|
||||
no error occurred, unless *--quiet* was given.
|
||||
* `1` exit status occurs only when no match was found and no error occurred.
|
||||
* `2` exit status occurs when an error occurred. This is true for both
|
||||
catastrophic errors (e.g., a regex syntax error) and for soft errors (e.g.,
|
||||
unable to read a file).
|
||||
|
||||
|
||||
CONFIGURATION FILES
|
||||
@@ -88,12 +107,12 @@ ripgrep supports reading configuration files that change ripgrep's default
|
||||
behavior. The format of the configuration file is an "rc" style and is very
|
||||
simple. It is defined by two rules:
|
||||
|
||||
1. Every line is a shell argument, after trimming ASCII whitespace.
|
||||
2. Lines starting with _#_ (optionally preceded by any amount of
|
||||
ASCII whitespace) are ignored.
|
||||
1. Every line is a shell argument, after trimming whitespace.
|
||||
2. Lines starting with *#* (optionally preceded by any amount of
|
||||
whitespace) are ignored.
|
||||
|
||||
ripgrep will look for a single configuration file if and only if the
|
||||
_RIPGREP_CONFIG_PATH_ environment variable is set and is non-empty.
|
||||
*RIPGREP_CONFIG_PATH* environment variable is set and is non-empty.
|
||||
ripgrep will parse shell arguments from this file on startup and will
|
||||
behave as if the arguments in this file were prepended to any explicit
|
||||
arguments given to ripgrep on the command line.
|
||||
@@ -155,20 +174,20 @@ SHELL COMPLETION
|
||||
Shell completion files are included in the release tarball for Bash, Fish, Zsh
|
||||
and PowerShell.
|
||||
|
||||
For *bash*, move `rg.bash` to `$XDG_CONFIG_HOME/bash_completion`
|
||||
or `/etc/bash_completion.d/`.
|
||||
For *bash*, move *rg.bash* to *$XDG_CONFIG_HOME/bash_completion*
|
||||
or */etc/bash_completion.d/*.
|
||||
|
||||
For *fish*, move `rg.fish` to `$HOME/.config/fish/completions`.
|
||||
For *fish*, move *rg.fish* to *$HOME/.config/fish/completions*.
|
||||
|
||||
For *zsh*, move `_rg` to one of your `$fpath` directories.
|
||||
For *zsh*, move *_rg* to one of your *$fpath* directories.
|
||||
|
||||
|
||||
CAVEATS
|
||||
-------
|
||||
ripgrep may abort unexpectedly when using default settings if it searches a
|
||||
file that is simultaneously truncated. This behavior can be avoided by passing
|
||||
the --no-mmap flag which will forcefully disable the use of memory maps in all
|
||||
cases.
|
||||
the *--no-mmap* flag which will forcefully disable the use of memory maps in
|
||||
all cases.
|
||||
|
||||
|
||||
VERSION
|
||||
@@ -180,7 +199,11 @@ HOMEPAGE
|
||||
--------
|
||||
https://github.com/BurntSushi/ripgrep
|
||||
|
||||
Please report bugs and feature requests in the issue tracker.
|
||||
Please report bugs and feature requests in the issue tracker. Please do your
|
||||
best to provide a reproducible test case for bugs. This should include the
|
||||
corpus being searched, the *rg* command, the actual output and the expected
|
||||
output. Please also include the output of running the same *rg* command but
|
||||
with the *--debug* flag.
|
||||
|
||||
|
||||
AUTHORS
|
||||
|
@@ -19,11 +19,11 @@ name = "globset"
|
||||
bench = false
|
||||
|
||||
[dependencies]
|
||||
aho-corasick = "0.6.8"
|
||||
aho-corasick = "0.7.3"
|
||||
bstr = { version = "0.1.2", default-features = false, features = ["std"] }
|
||||
fnv = "1.0.6"
|
||||
log = "0.4.5"
|
||||
memchr = "2.0.2"
|
||||
regex = "1.0.5"
|
||||
regex = "1.1.5"
|
||||
|
||||
[dev-dependencies]
|
||||
glob = "0.2.11"
|
||||
|
@@ -120,7 +120,7 @@ impl GlobMatcher {
|
||||
|
||||
/// Tests whether the given path matches this pattern or not.
|
||||
pub fn is_match_candidate(&self, path: &Candidate) -> bool {
|
||||
self.re.is_match(&path.path)
|
||||
self.re.is_match(path.path.as_bytes())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -145,7 +145,7 @@ impl GlobStrategic {
|
||||
|
||||
/// Tests whether the given path matches this pattern or not.
|
||||
fn is_match_candidate(&self, candidate: &Candidate) -> bool {
|
||||
let byte_path = &*candidate.path;
|
||||
let byte_path = candidate.path.as_bytes();
|
||||
|
||||
match self.strategy {
|
||||
MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
|
||||
@@ -837,40 +837,66 @@ impl<'a> Parser<'a> {
|
||||
|
||||
fn parse_star(&mut self) -> Result<(), Error> {
|
||||
let prev = self.prev;
|
||||
if self.chars.peek() != Some(&'*') {
|
||||
if self.peek() != Some('*') {
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
return Ok(());
|
||||
}
|
||||
assert!(self.bump() == Some('*'));
|
||||
if !self.have_tokens()? {
|
||||
self.push_token(Token::RecursivePrefix)?;
|
||||
let next = self.bump();
|
||||
if !next.map(is_separator).unwrap_or(true) {
|
||||
return Err(self.error(ErrorKind::InvalidRecursive));
|
||||
if !self.peek().map_or(true, is_separator) {
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
} else {
|
||||
self.push_token(Token::RecursivePrefix)?;
|
||||
assert!(self.bump().map_or(true, is_separator));
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
self.pop_token()?;
|
||||
|
||||
if !prev.map(is_separator).unwrap_or(false) {
|
||||
if self.stack.len() <= 1
|
||||
|| (prev != Some(',') && prev != Some('{')) {
|
||||
return Err(self.error(ErrorKind::InvalidRecursive));
|
||||
|| (prev != Some(',') && prev != Some('{'))
|
||||
{
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
match self.chars.peek() {
|
||||
None => {
|
||||
assert!(self.bump().is_none());
|
||||
self.push_token(Token::RecursiveSuffix)
|
||||
let is_suffix =
|
||||
match self.peek() {
|
||||
None => {
|
||||
assert!(self.bump().is_none());
|
||||
true
|
||||
}
|
||||
Some(',') | Some('}') if self.stack.len() >= 2 => {
|
||||
true
|
||||
}
|
||||
Some(c) if is_separator(c) => {
|
||||
assert!(self.bump().map(is_separator).unwrap_or(false));
|
||||
false
|
||||
}
|
||||
_ => {
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
self.push_token(Token::ZeroOrMore)?;
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
match self.pop_token()? {
|
||||
Token::RecursivePrefix => {
|
||||
self.push_token(Token::RecursivePrefix)?;
|
||||
}
|
||||
Some(&',') | Some(&'}') if self.stack.len() >= 2 => {
|
||||
self.push_token(Token::RecursiveSuffix)
|
||||
Token::RecursiveSuffix => {
|
||||
self.push_token(Token::RecursiveSuffix)?;
|
||||
}
|
||||
Some(&c) if is_separator(c) => {
|
||||
assert!(self.bump().map(is_separator).unwrap_or(false));
|
||||
self.push_token(Token::RecursiveZeroOrMore)
|
||||
_ => {
|
||||
if is_suffix {
|
||||
self.push_token(Token::RecursiveSuffix)?;
|
||||
} else {
|
||||
self.push_token(Token::RecursiveZeroOrMore)?;
|
||||
}
|
||||
}
|
||||
_ => Err(self.error(ErrorKind::InvalidRecursive)),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_class(&mut self) -> Result<(), Error> {
|
||||
@@ -959,6 +985,10 @@ impl<'a> Parser<'a> {
|
||||
self.cur = self.chars.next();
|
||||
self.cur
|
||||
}
|
||||
|
||||
fn peek(&mut self) -> Option<char> {
|
||||
self.chars.peek().map(|&ch| ch)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -1144,13 +1174,6 @@ mod tests {
|
||||
syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
|
||||
syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
|
||||
|
||||
syntaxerr!(err_rseq1, "a**", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq2, "**a", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq3, "a**b", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq4, "***", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq5, "/a**", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq6, "/**a", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_rseq7, "/a**b", ErrorKind::InvalidRecursive);
|
||||
syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
|
||||
syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
|
||||
syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
|
||||
@@ -1194,8 +1217,30 @@ mod tests {
|
||||
toregex!(re8, "[*]", r"^[\*]$");
|
||||
toregex!(re9, "[+]", r"^[\+]$");
|
||||
toregex!(re10, "+", r"^\+$");
|
||||
toregex!(re11, "**", r"^.*$");
|
||||
toregex!(re12, "☃", r"^\xe2\x98\x83$");
|
||||
toregex!(re11, "☃", r"^\xe2\x98\x83$");
|
||||
toregex!(re12, "**", r"^.*$");
|
||||
toregex!(re13, "**/", r"^.*$");
|
||||
toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
|
||||
toregex!(re15, "**/**", r"^.*$");
|
||||
toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
|
||||
toregex!(re17, "**/**/**", r"^.*$");
|
||||
toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
|
||||
toregex!(re19, "a/**", r"^a(?:/?|/.*)$");
|
||||
toregex!(re20, "a/**/**", r"^a(?:/?|/.*)$");
|
||||
toregex!(re21, "a/**/**/**", r"^a(?:/?|/.*)$");
|
||||
toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
|
||||
toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
|
||||
toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
|
||||
toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
|
||||
toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
|
||||
toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
|
||||
toregex!(re28, "a**", r"^a.*.*$");
|
||||
toregex!(re29, "**a", r"^.*.*a$");
|
||||
toregex!(re30, "a**b", r"^a.*.*b$");
|
||||
toregex!(re31, "***", r"^.*.*.*$");
|
||||
toregex!(re32, "/a**", r"^/a.*.*$");
|
||||
toregex!(re33, "/**a", r"^/.*.*a$");
|
||||
toregex!(re34, "/a**b", r"^/a.*.*b$");
|
||||
|
||||
matches!(match1, "a", "a");
|
||||
matches!(match2, "a*b", "a_b");
|
||||
|
@@ -104,27 +104,25 @@ or to enable case insensitive matching.
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate aho_corasick;
|
||||
extern crate bstr;
|
||||
extern crate fnv;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate regex;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::error::Error as StdError;
|
||||
use std::ffi::OsStr;
|
||||
use std::fmt;
|
||||
use std::hash;
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
|
||||
use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
|
||||
use aho_corasick::AhoCorasick;
|
||||
use bstr::{B, BStr, BString};
|
||||
use regex::bytes::{Regex, RegexBuilder, RegexSet};
|
||||
|
||||
use pathutil::{
|
||||
file_name, file_name_ext, normalize_path, os_str_bytes, path_bytes,
|
||||
};
|
||||
use pathutil::{file_name, file_name_ext, normalize_path};
|
||||
use glob::MatchStrategy;
|
||||
pub use glob::{Glob, GlobBuilder, GlobMatcher};
|
||||
|
||||
@@ -143,8 +141,13 @@ pub struct Error {
|
||||
/// The kind of error that can occur when parsing a glob pattern.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum ErrorKind {
|
||||
/// Occurs when a use of `**` is invalid. Namely, `**` can only appear
|
||||
/// adjacent to a path separator, or the beginning/end of a glob.
|
||||
/// **DEPRECATED**.
|
||||
///
|
||||
/// This error used to occur for consistency with git's glob specification,
|
||||
/// but the specification now accepts all uses of `**`. When `**` does not
|
||||
/// appear adjacent to a path separator or at the beginning/end of a glob,
|
||||
/// it is now treated as two consecutive `*` patterns. As such, this error
|
||||
/// is no longer used.
|
||||
InvalidRecursive,
|
||||
/// Occurs when a character class (e.g., `[abc]`) is not closed.
|
||||
UnclosedClass,
|
||||
@@ -289,6 +292,7 @@ pub struct GlobSet {
|
||||
|
||||
impl GlobSet {
|
||||
/// Create an empty `GlobSet`. An empty set matches nothing.
|
||||
#[inline]
|
||||
pub fn empty() -> GlobSet {
|
||||
GlobSet {
|
||||
len: 0,
|
||||
@@ -297,11 +301,13 @@ impl GlobSet {
|
||||
}
|
||||
|
||||
/// Returns true if this set is empty, and therefore matches nothing.
|
||||
#[inline]
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len == 0
|
||||
}
|
||||
|
||||
/// Returns the number of globs in this set.
|
||||
#[inline]
|
||||
pub fn len(&self) -> usize {
|
||||
self.len
|
||||
}
|
||||
@@ -484,24 +490,25 @@ impl GlobSetBuilder {
|
||||
/// path against multiple globs or sets of globs.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Candidate<'a> {
|
||||
path: Cow<'a, [u8]>,
|
||||
basename: Cow<'a, [u8]>,
|
||||
ext: Cow<'a, [u8]>,
|
||||
path: Cow<'a, BStr>,
|
||||
basename: Cow<'a, BStr>,
|
||||
ext: Cow<'a, BStr>,
|
||||
}
|
||||
|
||||
impl<'a> Candidate<'a> {
|
||||
/// Create a new candidate for matching from the given path.
|
||||
pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
|
||||
let path = path.as_ref();
|
||||
let basename = file_name(path).unwrap_or(OsStr::new(""));
|
||||
let path = normalize_path(BString::from_path_lossy(path.as_ref()));
|
||||
let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
|
||||
let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
|
||||
Candidate {
|
||||
path: normalize_path(path_bytes(path)),
|
||||
basename: os_str_bytes(basename),
|
||||
ext: file_name_ext(basename).unwrap_or(Cow::Borrowed(b"")),
|
||||
path: path,
|
||||
basename: basename,
|
||||
ext: ext,
|
||||
}
|
||||
}
|
||||
|
||||
fn path_prefix(&self, max: usize) -> &[u8] {
|
||||
fn path_prefix(&self, max: usize) -> &BStr {
|
||||
if self.path.len() <= max {
|
||||
&*self.path
|
||||
} else {
|
||||
@@ -509,7 +516,7 @@ impl<'a> Candidate<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn path_suffix(&self, max: usize) -> &[u8] {
|
||||
fn path_suffix(&self, max: usize) -> &BStr {
|
||||
if self.path.len() <= max {
|
||||
&*self.path
|
||||
} else {
|
||||
@@ -570,12 +577,12 @@ impl LiteralStrategy {
|
||||
}
|
||||
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
self.0.contains_key(&*candidate.path)
|
||||
self.0.contains_key(candidate.path.as_bytes())
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
if let Some(hits) = self.0.get(&*candidate.path) {
|
||||
if let Some(hits) = self.0.get(candidate.path.as_bytes()) {
|
||||
matches.extend(hits);
|
||||
}
|
||||
}
|
||||
@@ -597,7 +604,7 @@ impl BasenameLiteralStrategy {
|
||||
if candidate.basename.is_empty() {
|
||||
return false;
|
||||
}
|
||||
self.0.contains_key(&*candidate.basename)
|
||||
self.0.contains_key(candidate.basename.as_bytes())
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
@@ -605,7 +612,7 @@ impl BasenameLiteralStrategy {
|
||||
if candidate.basename.is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Some(hits) = self.0.get(&*candidate.basename) {
|
||||
if let Some(hits) = self.0.get(candidate.basename.as_bytes()) {
|
||||
matches.extend(hits);
|
||||
}
|
||||
}
|
||||
@@ -627,7 +634,7 @@ impl ExtensionStrategy {
|
||||
if candidate.ext.is_empty() {
|
||||
return false;
|
||||
}
|
||||
self.0.contains_key(&*candidate.ext)
|
||||
self.0.contains_key(candidate.ext.as_bytes())
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
@@ -635,7 +642,7 @@ impl ExtensionStrategy {
|
||||
if candidate.ext.is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Some(hits) = self.0.get(&*candidate.ext) {
|
||||
if let Some(hits) = self.0.get(candidate.ext.as_bytes()) {
|
||||
matches.extend(hits);
|
||||
}
|
||||
}
|
||||
@@ -643,7 +650,7 @@ impl ExtensionStrategy {
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct PrefixStrategy {
|
||||
matcher: FullAcAutomaton<Vec<u8>>,
|
||||
matcher: AhoCorasick,
|
||||
map: Vec<usize>,
|
||||
longest: usize,
|
||||
}
|
||||
@@ -651,8 +658,8 @@ struct PrefixStrategy {
|
||||
impl PrefixStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
let path = candidate.path_prefix(self.longest);
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.start == 0 {
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.start() == 0 {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -661,9 +668,9 @@ impl PrefixStrategy {
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
let path = candidate.path_prefix(self.longest);
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.start == 0 {
|
||||
matches.push(self.map[m.pati]);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.start() == 0 {
|
||||
matches.push(self.map[m.pattern()]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -671,7 +678,7 @@ impl PrefixStrategy {
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct SuffixStrategy {
|
||||
matcher: FullAcAutomaton<Vec<u8>>,
|
||||
matcher: AhoCorasick,
|
||||
map: Vec<usize>,
|
||||
longest: usize,
|
||||
}
|
||||
@@ -679,8 +686,8 @@ struct SuffixStrategy {
|
||||
impl SuffixStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
let path = candidate.path_suffix(self.longest);
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.end == path.len() {
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.end() == path.len() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -689,9 +696,9 @@ impl SuffixStrategy {
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
let path = candidate.path_suffix(self.longest);
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.end == path.len() {
|
||||
matches.push(self.map[m.pati]);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.end() == path.len() {
|
||||
matches.push(self.map[m.pattern()]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -705,11 +712,11 @@ impl RequiredExtensionStrategy {
|
||||
if candidate.ext.is_empty() {
|
||||
return false;
|
||||
}
|
||||
match self.0.get(&*candidate.ext) {
|
||||
match self.0.get(candidate.ext.as_bytes()) {
|
||||
None => false,
|
||||
Some(regexes) => {
|
||||
for &(_, ref re) in regexes {
|
||||
if re.is_match(&*candidate.path) {
|
||||
if re.is_match(candidate.path.as_bytes()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@@ -723,9 +730,9 @@ impl RequiredExtensionStrategy {
|
||||
if candidate.ext.is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Some(regexes) = self.0.get(&*candidate.ext) {
|
||||
if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) {
|
||||
for &(global_index, ref re) in regexes {
|
||||
if re.is_match(&*candidate.path) {
|
||||
if re.is_match(candidate.path.as_bytes()) {
|
||||
matches.push(global_index);
|
||||
}
|
||||
}
|
||||
@@ -741,11 +748,11 @@ struct RegexSetStrategy {
|
||||
|
||||
impl RegexSetStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
self.matcher.is_match(&*candidate.path)
|
||||
self.matcher.is_match(candidate.path.as_bytes())
|
||||
}
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
for i in self.matcher.matches(&*candidate.path) {
|
||||
for i in self.matcher.matches(candidate.path.as_bytes()) {
|
||||
matches.push(self.map[i]);
|
||||
}
|
||||
}
|
||||
@@ -776,18 +783,16 @@ impl MultiStrategyBuilder {
|
||||
}
|
||||
|
||||
fn prefix(self) -> PrefixStrategy {
|
||||
let it = self.literals.into_iter().map(|s| s.into_bytes());
|
||||
PrefixStrategy {
|
||||
matcher: AcAutomaton::new(it).into_full(),
|
||||
matcher: AhoCorasick::new_auto_configured(&self.literals),
|
||||
map: self.map,
|
||||
longest: self.longest,
|
||||
}
|
||||
}
|
||||
|
||||
fn suffix(self) -> SuffixStrategy {
|
||||
let it = self.literals.into_iter().map(|s| s.into_bytes());
|
||||
SuffixStrategy {
|
||||
matcher: AcAutomaton::new(it).into_full(),
|
||||
matcher: AhoCorasick::new_auto_configured(&self.literals),
|
||||
map: self.map,
|
||||
longest: self.longest,
|
||||
}
|
||||
|
@@ -1,41 +1,26 @@
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
use bstr::BStr;
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(unix)]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use memchr::memrchr;
|
||||
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
pub fn file_name<'a>(path: &Cow<'a, BStr>) -> Option<Cow<'a, BStr>> {
|
||||
if path.is_empty() {
|
||||
return None;
|
||||
} else if path.len() == 1 && path[0] == b'.' {
|
||||
return None;
|
||||
} else if path.last() == Some(&b'.') {
|
||||
return None;
|
||||
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
|
||||
} else if path.last() == Some(b'.') {
|
||||
return None;
|
||||
}
|
||||
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
|
||||
Some(OsStr::from_bytes(&path[last_slash..]))
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(not(unix))]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
path.as_ref().file_name()
|
||||
let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0);
|
||||
Some(match *path {
|
||||
Cow::Borrowed(path) => Cow::Borrowed(&path[last_slash..]),
|
||||
Cow::Owned(ref path) => {
|
||||
let mut path = path.clone();
|
||||
path.drain_bytes(..last_slash);
|
||||
Cow::Owned(path)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Return a file extension given a path's file name.
|
||||
@@ -54,59 +39,28 @@ pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
/// a pattern like `*.rs` is obviously trying to match files with a `rs`
|
||||
/// extension, but it also matches files like `.rs`, which doesn't have an
|
||||
/// extension according to std::path::Path::extension.
|
||||
pub fn file_name_ext(name: &OsStr) -> Option<Cow<[u8]>> {
|
||||
pub fn file_name_ext<'a>(name: &Cow<'a, BStr>) -> Option<Cow<'a, BStr>> {
|
||||
if name.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let name = os_str_bytes(name);
|
||||
let last_dot_at = {
|
||||
let result = name
|
||||
.iter().enumerate().rev()
|
||||
.find(|&(_, &b)| b == b'.')
|
||||
.map(|(i, _)| i);
|
||||
match result {
|
||||
None => return None,
|
||||
Some(i) => i,
|
||||
}
|
||||
let last_dot_at = match name.rfind_byte(b'.') {
|
||||
None => return None,
|
||||
Some(i) => i,
|
||||
};
|
||||
Some(match name {
|
||||
Some(match *name {
|
||||
Cow::Borrowed(name) => Cow::Borrowed(&name[last_dot_at..]),
|
||||
Cow::Owned(mut name) => {
|
||||
name.drain(..last_dot_at);
|
||||
Cow::Owned(ref name) => {
|
||||
let mut name = name.clone();
|
||||
name.drain_bytes(..last_dot_at);
|
||||
Cow::Owned(name)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Return raw bytes of a path, transcoded to UTF-8 if necessary.
|
||||
pub fn path_bytes(path: &Path) -> Cow<[u8]> {
|
||||
os_str_bytes(path.as_os_str())
|
||||
}
|
||||
|
||||
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
|
||||
#[cfg(unix)]
|
||||
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
Cow::Borrowed(s.as_bytes())
|
||||
}
|
||||
|
||||
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
|
||||
#[cfg(not(unix))]
|
||||
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
|
||||
// TODO(burntsushi): On Windows, OS strings are WTF-8, which is a superset
|
||||
// of UTF-8, so even if we could get at the raw bytes, they wouldn't
|
||||
// be useful. We *must* convert to UTF-8 before doing path matching.
|
||||
// Unfortunate, but necessary.
|
||||
match s.to_string_lossy() {
|
||||
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
|
||||
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
|
||||
/// that recognize other characters as separators.
|
||||
#[cfg(unix)]
|
||||
pub fn normalize_path(path: Cow<[u8]>) -> Cow<[u8]> {
|
||||
pub fn normalize_path(path: Cow<BStr>) -> Cow<BStr> {
|
||||
// UNIX only uses /, so we're good.
|
||||
path
|
||||
}
|
||||
@@ -114,7 +68,7 @@ pub fn normalize_path(path: Cow<[u8]>) -> Cow<[u8]> {
|
||||
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
|
||||
/// that recognize other characters as separators.
|
||||
#[cfg(not(unix))]
|
||||
pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
|
||||
pub fn normalize_path(mut path: Cow<BStr>) -> Cow<BStr> {
|
||||
use std::path::is_separator;
|
||||
|
||||
for i in 0..path.len() {
|
||||
@@ -129,7 +83,8 @@ pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::OsStr;
|
||||
|
||||
use bstr::{B, BString};
|
||||
|
||||
use super::{file_name_ext, normalize_path};
|
||||
|
||||
@@ -137,8 +92,9 @@ mod tests {
|
||||
($name:ident, $file_name:expr, $ext:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let got = file_name_ext(OsStr::new($file_name));
|
||||
assert_eq!($ext.map(|s| Cow::Borrowed(s.as_bytes())), got);
|
||||
let bs = BString::from($file_name);
|
||||
let got = file_name_ext(&Cow::Owned(bs));
|
||||
assert_eq!($ext.map(|s| Cow::Borrowed(B(s))), got);
|
||||
}
|
||||
};
|
||||
}
|
||||
@@ -153,7 +109,8 @@ mod tests {
|
||||
($name:ident, $path:expr, $expected:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let got = normalize_path(Cow::Owned($path.to_vec()));
|
||||
let bs = BString::from_slice($path);
|
||||
let got = normalize_path(Cow::Owned(bs));
|
||||
assert_eq!($expected.to_vec(), got.into_owned());
|
||||
}
|
||||
};
|
||||
|
@@ -14,12 +14,13 @@ license = "Unlicense/MIT"
|
||||
|
||||
[dependencies]
|
||||
atty = "0.2.11"
|
||||
bstr = "0.1.2"
|
||||
globset = { version = "0.4.2", path = "../globset" }
|
||||
lazy_static = "1.1.0"
|
||||
log = "0.4.5"
|
||||
regex = "1.0.5"
|
||||
same-file = "1.0.3"
|
||||
termcolor = "1.0.3"
|
||||
regex = "1.1"
|
||||
same-file = "1.0.4"
|
||||
termcolor = "1.0.4"
|
||||
|
||||
[target.'cfg(windows)'.dependencies.winapi-util]
|
||||
version = "0.1.1"
|
||||
|
@@ -352,6 +352,8 @@ fn default_decompression_commands() -> Vec<DecompressionCommand> {
|
||||
const ARGS_XZ: &[&str] = &["xz", "-d", "-c"];
|
||||
const ARGS_LZ4: &[&str] = &["lz4", "-d", "-c"];
|
||||
const ARGS_LZMA: &[&str] = &["xz", "--format=lzma", "-d", "-c"];
|
||||
const ARGS_BROTLI: &[&str] = &["brotli", "-d", "-c"];
|
||||
const ARGS_ZSTD: &[&str] = &["zstd", "-q", "-d", "-c"];
|
||||
|
||||
fn cmd(glob: &str, args: &[&str]) -> DecompressionCommand {
|
||||
DecompressionCommand {
|
||||
@@ -367,15 +369,14 @@ fn default_decompression_commands() -> Vec<DecompressionCommand> {
|
||||
vec![
|
||||
cmd("*.gz", ARGS_GZIP),
|
||||
cmd("*.tgz", ARGS_GZIP),
|
||||
|
||||
cmd("*.bz2", ARGS_BZIP),
|
||||
cmd("*.tbz2", ARGS_BZIP),
|
||||
|
||||
cmd("*.xz", ARGS_XZ),
|
||||
cmd("*.txz", ARGS_XZ),
|
||||
|
||||
cmd("*.lz4", ARGS_LZ4),
|
||||
|
||||
cmd("*.lzma", ARGS_LZMA),
|
||||
cmd("*.br", ARGS_BROTLI),
|
||||
cmd("*.zst", ARGS_ZSTD),
|
||||
cmd("*.zstd", ARGS_ZSTD),
|
||||
]
|
||||
}
|
||||
|
@@ -1,6 +1,8 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::str;
|
||||
|
||||
use bstr::{BStr, BString};
|
||||
|
||||
/// A single state in the state machine used by `unescape`.
|
||||
#[derive(Clone, Copy, Eq, PartialEq)]
|
||||
enum State {
|
||||
@@ -35,18 +37,16 @@ enum State {
|
||||
///
|
||||
/// assert_eq!(r"foo\nbar\xFFbaz", escape(b"foo\nbar\xFFbaz"));
|
||||
/// ```
|
||||
pub fn escape(mut bytes: &[u8]) -> String {
|
||||
pub fn escape(bytes: &[u8]) -> String {
|
||||
let bytes = BStr::new(bytes);
|
||||
let mut escaped = String::new();
|
||||
while let Some(result) = decode_utf8(bytes) {
|
||||
match result {
|
||||
Ok(cp) => {
|
||||
escape_char(cp, &mut escaped);
|
||||
bytes = &bytes[cp.len_utf8()..];
|
||||
}
|
||||
Err(byte) => {
|
||||
escape_byte(byte, &mut escaped);
|
||||
bytes = &bytes[1..];
|
||||
for (s, e, ch) in bytes.char_indices() {
|
||||
if ch == '\u{FFFD}' {
|
||||
for b in bytes[s..e].bytes() {
|
||||
escape_byte(b, &mut escaped);
|
||||
}
|
||||
} else {
|
||||
escape_char(ch, &mut escaped);
|
||||
}
|
||||
}
|
||||
escaped
|
||||
@@ -56,19 +56,7 @@ pub fn escape(mut bytes: &[u8]) -> String {
|
||||
///
|
||||
/// This is like [`escape`](fn.escape.html), but accepts an OS string.
|
||||
pub fn escape_os(string: &OsStr) -> String {
|
||||
#[cfg(unix)]
|
||||
fn imp(string: &OsStr) -> String {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
escape(string.as_bytes())
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn imp(string: &OsStr) -> String {
|
||||
escape(string.to_string_lossy().as_bytes())
|
||||
}
|
||||
|
||||
imp(string)
|
||||
escape(BString::from_os_str_lossy(string).as_bytes())
|
||||
}
|
||||
|
||||
/// Unescapes a string.
|
||||
@@ -195,46 +183,6 @@ fn escape_byte(byte: u8, into: &mut String) {
|
||||
}
|
||||
}
|
||||
|
||||
/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
|
||||
///
|
||||
/// If no valid encoding of a codepoint exists at the beginning of the given
|
||||
/// byte slice, then the first byte is returned instead.
|
||||
///
|
||||
/// This returns `None` if and only if `bytes` is empty.
|
||||
fn decode_utf8(bytes: &[u8]) -> Option<Result<char, u8>> {
|
||||
if bytes.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let len = match utf8_len(bytes[0]) {
|
||||
None => return Some(Err(bytes[0])),
|
||||
Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
|
||||
Some(len) => len,
|
||||
};
|
||||
match str::from_utf8(&bytes[..len]) {
|
||||
Ok(s) => Some(Ok(s.chars().next().unwrap())),
|
||||
Err(_) => Some(Err(bytes[0])),
|
||||
}
|
||||
}
|
||||
|
||||
/// Given a UTF-8 leading byte, this returns the total number of code units
|
||||
/// in the following encoded codepoint.
|
||||
///
|
||||
/// If the given byte is not a valid UTF-8 leading byte, then this returns
|
||||
/// `None`.
|
||||
fn utf8_len(byte: u8) -> Option<usize> {
|
||||
if byte <= 0x7F {
|
||||
Some(1)
|
||||
} else if byte <= 0b110_11111 {
|
||||
Some(2)
|
||||
} else if byte <= 0b1110_1111 {
|
||||
Some(3)
|
||||
} else if byte <= 0b1111_0111 {
|
||||
Some(4)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{escape, unescape};
|
||||
|
@@ -159,6 +159,7 @@ error message is crafted that typically tells the user how to fix the problem.
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate atty;
|
||||
extern crate bstr;
|
||||
extern crate globset;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
|
@@ -14,10 +14,10 @@ license = "Unlicense/MIT"
|
||||
autotests = false
|
||||
|
||||
[dependencies]
|
||||
memchr = "2.0.2"
|
||||
memchr = "2.1"
|
||||
|
||||
[dev-dependencies]
|
||||
regex = "1.0.5"
|
||||
regex = "1.1"
|
||||
|
||||
[[test]]
|
||||
name = "integration"
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-pcre2"
|
||||
version = "0.1.1" #:version
|
||||
version = "0.1.2" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Use PCRE2 with the 'grep' crate.
|
||||
@@ -14,4 +14,4 @@ license = "Unlicense/MIT"
|
||||
|
||||
[dependencies]
|
||||
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
|
||||
pcre2 = "0.1.0"
|
||||
pcre2 = "0.1.1"
|
||||
|
@@ -199,16 +199,34 @@ impl RegexMatcherBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable PCRE2's JIT.
|
||||
/// Enable PCRE2's JIT and return an error if it's not available.
|
||||
///
|
||||
/// This generally speeds up matching quite a bit. The downside is that it
|
||||
/// can increase the time it takes to compile a pattern.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
/// If the JIT isn't available or if JIT compilation returns an error, then
|
||||
/// regex compilation will fail with the corresponding error.
|
||||
///
|
||||
/// This is disabled by default, and always overrides `jit_if_available`.
|
||||
pub fn jit(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.jit(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable PCRE2's JIT if it's available.
|
||||
///
|
||||
/// This generally speeds up matching quite a bit. The downside is that it
|
||||
/// can increase the time it takes to compile a pattern.
|
||||
///
|
||||
/// If the JIT isn't available or if JIT compilation returns an error,
|
||||
/// then a debug message with the error will be emitted and the regex will
|
||||
/// otherwise silently fall back to non-JIT matching.
|
||||
///
|
||||
/// This is disabled by default, and always overrides `jit`.
|
||||
pub fn jit_if_available(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.builder.jit_if_available(yes);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// An implementation of the `Matcher` trait using PCRE2.
|
||||
|
@@ -18,10 +18,11 @@ default = ["serde1"]
|
||||
serde1 = ["base64", "serde", "serde_derive", "serde_json"]
|
||||
|
||||
[dependencies]
|
||||
base64 = { version = "0.9.2", optional = true }
|
||||
base64 = { version = "0.10.0", optional = true }
|
||||
bstr = "0.1.2"
|
||||
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
|
||||
grep-searcher = { version = "0.1.1", path = "../grep-searcher" }
|
||||
termcolor = "1.0.3"
|
||||
termcolor = "1.0.4"
|
||||
serde = { version = "1.0.77", optional = true }
|
||||
serde_derive = { version = "1.0.77", optional = true }
|
||||
serde_json = { version = "1.0.27", optional = true }
|
||||
|
@@ -817,7 +817,8 @@ impl<'a> SubMatches<'a> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use grep_regex::RegexMatcher;
|
||||
use grep_regex::{RegexMatcher, RegexMatcherBuilder};
|
||||
use grep_matcher::LineTerminator;
|
||||
use grep_searcher::SearcherBuilder;
|
||||
|
||||
use super::{JSON, JSONBuilder};
|
||||
@@ -918,4 +919,45 @@ and exhibited clearly, with a label attached.\
|
||||
assert_eq!(got.lines().count(), 2);
|
||||
assert!(got.contains("begin") && got.contains("end"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn missing_crlf() {
|
||||
let haystack = "test\r\n".as_bytes();
|
||||
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.build("test")
|
||||
.unwrap();
|
||||
let mut printer = JSONBuilder::new()
|
||||
.build(vec![]);
|
||||
SearcherBuilder::new()
|
||||
.build()
|
||||
.search_reader(&matcher, haystack, printer.sink(&matcher))
|
||||
.unwrap();
|
||||
let got = printer_contents(&mut printer);
|
||||
assert_eq!(got.lines().count(), 3);
|
||||
assert!(
|
||||
got.lines().nth(1).unwrap().contains(r"test\r\n"),
|
||||
r"missing 'test\r\n' in '{}'",
|
||||
got.lines().nth(1).unwrap(),
|
||||
);
|
||||
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.crlf(true)
|
||||
.build("test")
|
||||
.unwrap();
|
||||
let mut printer = JSONBuilder::new()
|
||||
.build(vec![]);
|
||||
SearcherBuilder::new()
|
||||
.line_terminator(LineTerminator::crlf())
|
||||
.build()
|
||||
.search_reader(&matcher, haystack, printer.sink(&matcher))
|
||||
.unwrap();
|
||||
let got = printer_contents(&mut printer);
|
||||
assert_eq!(got.lines().count(), 3);
|
||||
assert!(
|
||||
got.lines().nth(1).unwrap().contains(r"test\r\n"),
|
||||
r"missing 'test\r\n' in '{}'",
|
||||
got.lines().nth(1).unwrap(),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
@@ -70,6 +70,7 @@ fn example() -> Result<(), Box<Error>> {
|
||||
|
||||
#[cfg(feature = "serde1")]
|
||||
extern crate base64;
|
||||
extern crate bstr;
|
||||
extern crate grep_matcher;
|
||||
#[cfg(test)]
|
||||
extern crate grep_regex;
|
||||
|
@@ -1,3 +1,4 @@
|
||||
/// Like assert_eq, but nicer output for long strings.
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! assert_eq_printed {
|
||||
|
@@ -403,7 +403,7 @@ impl<W: WriteColor> Summary<W> {
|
||||
where M: Matcher,
|
||||
P: ?Sized + AsRef<Path>,
|
||||
{
|
||||
if !self.config.path {
|
||||
if !self.config.path && !self.config.kind.requires_path() {
|
||||
return self.sink(matcher);
|
||||
}
|
||||
let stats =
|
||||
@@ -477,7 +477,10 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> {
|
||||
/// This is unaffected by the result of searches before the previous
|
||||
/// search.
|
||||
pub fn has_match(&self) -> bool {
|
||||
self.match_count > 0
|
||||
match self.summary.config.kind {
|
||||
SummaryKind::PathWithoutMatch => self.match_count == 0,
|
||||
_ => self.match_count > 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// If binary data was found in the previous search, this returns the
|
||||
|
@@ -4,6 +4,7 @@ use std::io;
|
||||
use std::path::Path;
|
||||
use std::time;
|
||||
|
||||
use bstr::{BStr, BString};
|
||||
use grep_matcher::{Captures, LineTerminator, Match, Matcher};
|
||||
use grep_searcher::{
|
||||
LineIter,
|
||||
@@ -262,26 +263,12 @@ impl<'a> Sunk<'a> {
|
||||
/// portability with a small cost: on Windows, paths that are not valid UTF-16
|
||||
/// will not roundtrip correctly.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct PrinterPath<'a>(Cow<'a, [u8]>);
|
||||
pub struct PrinterPath<'a>(Cow<'a, BStr>);
|
||||
|
||||
impl<'a> PrinterPath<'a> {
|
||||
/// Create a new path suitable for printing.
|
||||
pub fn new(path: &'a Path) -> PrinterPath<'a> {
|
||||
PrinterPath::new_impl(path)
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn new_impl(path: &'a Path) -> PrinterPath<'a> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
PrinterPath(Cow::Borrowed(path.as_os_str().as_bytes()))
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn new_impl(path: &'a Path) -> PrinterPath<'a> {
|
||||
PrinterPath(match path.to_string_lossy() {
|
||||
Cow::Owned(path) => Cow::Owned(path.into_bytes()),
|
||||
Cow::Borrowed(path) => Cow::Borrowed(path.as_bytes()),
|
||||
})
|
||||
PrinterPath(BString::from_path_lossy(path))
|
||||
}
|
||||
|
||||
/// Create a new printer path from the given path which can be efficiently
|
||||
@@ -302,7 +289,7 @@ impl<'a> PrinterPath<'a> {
|
||||
/// path separators that are both replaced by `new_sep`. In all other
|
||||
/// environments, only `/` is treated as a path separator.
|
||||
fn replace_separator(&mut self, new_sep: u8) {
|
||||
let transformed_path: Vec<_> = self.as_bytes().iter().map(|&b| {
|
||||
let transformed_path: BString = self.0.bytes().map(|b| {
|
||||
if b == b'/' || (cfg!(windows) && b == b'\\') {
|
||||
new_sep
|
||||
} else {
|
||||
@@ -314,7 +301,7 @@ impl<'a> PrinterPath<'a> {
|
||||
|
||||
/// Return the raw bytes for this path.
|
||||
pub fn as_bytes(&self) -> &[u8] {
|
||||
&*self.0
|
||||
self.0.as_bytes()
|
||||
}
|
||||
}
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-regex"
|
||||
version = "0.1.1" #:version
|
||||
version = "0.1.2" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Use Rust's regex library with the 'grep' crate.
|
||||
@@ -15,7 +15,7 @@ license = "Unlicense/MIT"
|
||||
[dependencies]
|
||||
log = "0.4.5"
|
||||
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
|
||||
regex = "1.0.5"
|
||||
regex-syntax = "0.6.2"
|
||||
regex = "1.1"
|
||||
regex-syntax = "0.6.5"
|
||||
thread_local = "0.3.6"
|
||||
utf8-ranges = "1.0.1"
|
||||
|
@@ -160,6 +160,14 @@ impl ConfiguredHIR {
|
||||
non_matching_bytes(&self.expr)
|
||||
}
|
||||
|
||||
/// Returns true if and only if this regex needs to have its match offsets
|
||||
/// tweaked because of CRLF support. Specifically, this occurs when the
|
||||
/// CRLF hack is enabled and the regex is line anchored at the end. In
|
||||
/// this case, matches that end with a `\r` have the `\r` stripped.
|
||||
pub fn needs_crlf_stripped(&self) -> bool {
|
||||
self.config.crlf && self.expr.is_line_anchored_end()
|
||||
}
|
||||
|
||||
/// Builds a regular expression from this HIR expression.
|
||||
pub fn regex(&self) -> Result<Regex, Error> {
|
||||
self.pattern_to_regex(&self.expr.to_string())
|
||||
@@ -199,7 +207,7 @@ impl ConfiguredHIR {
|
||||
if self.config.line_terminator.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
match LiteralSets::new(&self.expr).one_regex() {
|
||||
match LiteralSets::new(&self.expr).one_regex(self.config.word) {
|
||||
None => Ok(None),
|
||||
Some(pattern) => self.pattern_to_regex(&pattern).map(Some),
|
||||
}
|
||||
|
@@ -1,5 +1,110 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use grep_matcher::{Match, Matcher, NoError};
|
||||
use regex::bytes::Regex;
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
|
||||
use config::ConfiguredHIR;
|
||||
use error::Error;
|
||||
use matcher::RegexCaptures;
|
||||
|
||||
/// A matcher for implementing "word match" semantics.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CRLFMatcher {
|
||||
/// The regex.
|
||||
regex: Regex,
|
||||
/// A map from capture group name to capture group index.
|
||||
names: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl CRLFMatcher {
|
||||
/// Create a new matcher from the given pattern that strips `\r` from the
|
||||
/// end of every match.
|
||||
///
|
||||
/// This panics if the given expression doesn't need its CRLF stripped.
|
||||
pub fn new(expr: &ConfiguredHIR) -> Result<CRLFMatcher, Error> {
|
||||
assert!(expr.needs_crlf_stripped());
|
||||
|
||||
let regex = expr.regex()?;
|
||||
let mut names = HashMap::new();
|
||||
for (i, optional_name) in regex.capture_names().enumerate() {
|
||||
if let Some(name) = optional_name {
|
||||
names.insert(name.to_string(), i.checked_sub(1).unwrap());
|
||||
}
|
||||
}
|
||||
Ok(CRLFMatcher { regex, names })
|
||||
}
|
||||
|
||||
/// Return the underlying regex used by this matcher.
|
||||
pub fn regex(&self) -> &Regex {
|
||||
&self.regex
|
||||
}
|
||||
}
|
||||
|
||||
impl Matcher for CRLFMatcher {
|
||||
type Captures = RegexCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>, NoError> {
|
||||
let m = match self.regex.find_at(haystack, at) {
|
||||
None => return Ok(None),
|
||||
Some(m) => Match::new(m.start(), m.end()),
|
||||
};
|
||||
Ok(Some(adjust_match(haystack, m)))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
|
||||
Ok(RegexCaptures::new(self.regex.capture_locations()))
|
||||
}
|
||||
|
||||
fn capture_count(&self) -> usize {
|
||||
self.regex.captures_len().checked_sub(1).unwrap()
|
||||
}
|
||||
|
||||
fn capture_index(&self, name: &str) -> Option<usize> {
|
||||
self.names.get(name).map(|i| *i)
|
||||
}
|
||||
|
||||
fn captures_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool, NoError> {
|
||||
caps.strip_crlf(false);
|
||||
let r = self.regex.captures_read_at(caps.locations(), haystack, at);
|
||||
if !r.is_some() {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// If the end of our match includes a `\r`, then strip it from all
|
||||
// capture groups ending at the same location.
|
||||
let end = caps.locations().get(0).unwrap().1;
|
||||
if end > 0 && haystack.get(end - 1) == Some(&b'\r') {
|
||||
caps.strip_crlf(true);
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
// We specifically do not implement other methods like find_iter or
|
||||
// captures_iter. Namely, the iter methods are guaranteed to be correct
|
||||
// by virtue of implementing find_at and captures_at above.
|
||||
}
|
||||
|
||||
/// If the given match ends with a `\r`, then return a new match that ends
|
||||
/// immediately before the `\r`.
|
||||
pub fn adjust_match(haystack: &[u8], m: Match) -> Match {
|
||||
if m.end() > 0 && haystack.get(m.end() - 1) == Some(&b'\r') {
|
||||
m.with_end(m.end() - 1)
|
||||
} else {
|
||||
m
|
||||
}
|
||||
}
|
||||
|
||||
/// Substitutes all occurrences of multi-line enabled `$` with `(?:\r?$)`.
|
||||
///
|
||||
/// This does not preserve the exact semantics of the given expression,
|
||||
|
@@ -47,18 +47,23 @@ impl LiteralSets {
|
||||
/// generated these literal sets. The idea here is that the pattern
|
||||
/// returned by this method is much cheaper to search for. i.e., It is
|
||||
/// usually a single literal or an alternation of literals.
|
||||
pub fn one_regex(&self) -> Option<String> {
|
||||
pub fn one_regex(&self, word: bool) -> Option<String> {
|
||||
// TODO: The logic in this function is basically inscrutable. It grew
|
||||
// organically in the old grep 0.1 crate. Ideally, it would be
|
||||
// re-worked. In fact, the entire inner literal extraction should be
|
||||
// re-worked. Actually, most of regex-syntax's literal extraction
|
||||
// should also be re-worked. Alas... only so much time in the day.
|
||||
|
||||
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
||||
debug!("literal prefixes detected: {:?}", self.prefixes);
|
||||
// When this is true, the regex engine will do a literal scan,
|
||||
// so we don't need to return anything.
|
||||
return None;
|
||||
if !word {
|
||||
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
||||
debug!("literal prefixes detected: {:?}", self.prefixes);
|
||||
// When this is true, the regex engine will do a literal scan,
|
||||
// so we don't need to return anything. But we only do this
|
||||
// if we aren't doing a word regex, since a word regex adds
|
||||
// a `(?:\W|^)` to the beginning of the regex, thereby
|
||||
// defeating the regex engine's literal detection.
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
// Out of inner required literals, prefixes and suffixes, which one
|
||||
@@ -166,10 +171,10 @@ fn union_required(expr: &Hir, lits: &mut Literals) {
|
||||
lits.cut();
|
||||
continue;
|
||||
}
|
||||
if lits2.contains_empty() {
|
||||
if lits2.contains_empty() || !is_simple(&e) {
|
||||
lits.cut();
|
||||
}
|
||||
if !lits.cross_product(&lits2) {
|
||||
if !lits.cross_product(&lits2) || !lits2.any_complete() {
|
||||
// If this expression couldn't yield any literal that
|
||||
// could be extended, then we need to quit. Since we're
|
||||
// short-circuiting, we also need to freeze every member.
|
||||
@@ -250,6 +255,20 @@ fn alternate_literals<F: FnMut(&Hir, &mut Literals)>(
|
||||
}
|
||||
}
|
||||
|
||||
fn is_simple(expr: &Hir) -> bool {
|
||||
match *expr.kind() {
|
||||
HirKind::Empty
|
||||
| HirKind::Literal(_)
|
||||
| HirKind::Class(_)
|
||||
| HirKind::Repetition(_)
|
||||
| HirKind::Concat(_)
|
||||
| HirKind::Alternation(_) => true,
|
||||
HirKind::Anchor(_)
|
||||
| HirKind::WordBoundary(_)
|
||||
| HirKind::Group(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the number of characters in the given class.
|
||||
fn count_unicode_class(cls: &hir::ClassUnicode) -> u32 {
|
||||
cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum()
|
||||
@@ -271,7 +290,7 @@ mod tests {
|
||||
}
|
||||
|
||||
fn one_regex(pattern: &str) -> Option<String> {
|
||||
sets(pattern).one_regex()
|
||||
sets(pattern).one_regex(false)
|
||||
}
|
||||
|
||||
// Put a pattern into the same format as the one returned by `one_regex`.
|
||||
@@ -301,4 +320,12 @@ mod tests {
|
||||
// assert_eq!(one_regex(r"\w(foo|bar|baz)"), pat("foo|bar|baz"));
|
||||
// assert_eq!(one_regex(r"\w(foo|bar|baz)\w"), pat("foo|bar|baz"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regression_1064() {
|
||||
// Regression from:
|
||||
// https://github.com/BurntSushi/ripgrep/issues/1064
|
||||
// assert_eq!(one_regex(r"a.*c"), pat("a"));
|
||||
assert_eq!(one_regex(r"a(.*c)"), pat("a"));
|
||||
}
|
||||
}
|
||||
|
@@ -6,6 +6,7 @@ use grep_matcher::{
|
||||
use regex::bytes::{CaptureLocations, Regex};
|
||||
|
||||
use config::{Config, ConfiguredHIR};
|
||||
use crlf::CRLFMatcher;
|
||||
use error::Error;
|
||||
use word::WordMatcher;
|
||||
|
||||
@@ -49,9 +50,12 @@ impl RegexMatcherBuilder {
|
||||
if let Some(ref re) = fast_line_regex {
|
||||
trace!("extracted fast line regex: {:?}", re);
|
||||
}
|
||||
|
||||
let matcher = RegexMatcherImpl::new(&chir)?;
|
||||
trace!("final regex: {:?}", matcher.regex());
|
||||
Ok(RegexMatcher {
|
||||
config: self.config.clone(),
|
||||
matcher: RegexMatcherImpl::new(&chir)?,
|
||||
matcher: matcher,
|
||||
fast_line_regex: fast_line_regex,
|
||||
non_matching_bytes: non_matching_bytes,
|
||||
})
|
||||
@@ -344,6 +348,11 @@ impl RegexMatcher {
|
||||
enum RegexMatcherImpl {
|
||||
/// The standard matcher used for all regular expressions.
|
||||
Standard(StandardMatcher),
|
||||
/// A matcher that strips `\r` from the end of matches.
|
||||
///
|
||||
/// This is only used when the CRLF hack is enabled and the regex is line
|
||||
/// anchored at the end.
|
||||
CRLF(CRLFMatcher),
|
||||
/// A matcher that only matches at word boundaries. This transforms the
|
||||
/// regex to `(^|\W)(...)($|\W)` instead of the more intuitive `\b(...)\b`.
|
||||
/// Because of this, the WordMatcher provides its own implementation of
|
||||
@@ -358,10 +367,21 @@ impl RegexMatcherImpl {
|
||||
fn new(expr: &ConfiguredHIR) -> Result<RegexMatcherImpl, Error> {
|
||||
if expr.config().word {
|
||||
Ok(RegexMatcherImpl::Word(WordMatcher::new(expr)?))
|
||||
} else if expr.needs_crlf_stripped() {
|
||||
Ok(RegexMatcherImpl::CRLF(CRLFMatcher::new(expr)?))
|
||||
} else {
|
||||
Ok(RegexMatcherImpl::Standard(StandardMatcher::new(expr)?))
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the underlying regex object used.
|
||||
fn regex(&self) -> &Regex {
|
||||
match *self {
|
||||
RegexMatcherImpl::Word(ref x) => x.regex(),
|
||||
RegexMatcherImpl::CRLF(ref x) => x.regex(),
|
||||
RegexMatcherImpl::Standard(ref x) => &x.regex,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This implementation just dispatches on the internal matcher impl except
|
||||
@@ -379,6 +399,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.find_at(haystack, at),
|
||||
CRLF(ref m) => m.find_at(haystack, at),
|
||||
Word(ref m) => m.find_at(haystack, at),
|
||||
}
|
||||
}
|
||||
@@ -387,6 +408,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.new_captures(),
|
||||
CRLF(ref m) => m.new_captures(),
|
||||
Word(ref m) => m.new_captures(),
|
||||
}
|
||||
}
|
||||
@@ -395,6 +417,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.capture_count(),
|
||||
CRLF(ref m) => m.capture_count(),
|
||||
Word(ref m) => m.capture_count(),
|
||||
}
|
||||
}
|
||||
@@ -403,6 +426,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.capture_index(name),
|
||||
CRLF(ref m) => m.capture_index(name),
|
||||
Word(ref m) => m.capture_index(name),
|
||||
}
|
||||
}
|
||||
@@ -411,6 +435,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.find(haystack),
|
||||
CRLF(ref m) => m.find(haystack),
|
||||
Word(ref m) => m.find(haystack),
|
||||
}
|
||||
}
|
||||
@@ -425,6 +450,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.find_iter(haystack, matched),
|
||||
CRLF(ref m) => m.find_iter(haystack, matched),
|
||||
Word(ref m) => m.find_iter(haystack, matched),
|
||||
}
|
||||
}
|
||||
@@ -439,6 +465,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.try_find_iter(haystack, matched),
|
||||
CRLF(ref m) => m.try_find_iter(haystack, matched),
|
||||
Word(ref m) => m.try_find_iter(haystack, matched),
|
||||
}
|
||||
}
|
||||
@@ -451,6 +478,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.captures(haystack, caps),
|
||||
CRLF(ref m) => m.captures(haystack, caps),
|
||||
Word(ref m) => m.captures(haystack, caps),
|
||||
}
|
||||
}
|
||||
@@ -466,6 +494,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.captures_iter(haystack, caps, matched),
|
||||
CRLF(ref m) => m.captures_iter(haystack, caps, matched),
|
||||
Word(ref m) => m.captures_iter(haystack, caps, matched),
|
||||
}
|
||||
}
|
||||
@@ -481,6 +510,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.try_captures_iter(haystack, caps, matched),
|
||||
CRLF(ref m) => m.try_captures_iter(haystack, caps, matched),
|
||||
Word(ref m) => m.try_captures_iter(haystack, caps, matched),
|
||||
}
|
||||
}
|
||||
@@ -494,6 +524,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.captures_at(haystack, at, caps),
|
||||
CRLF(ref m) => m.captures_at(haystack, at, caps),
|
||||
Word(ref m) => m.captures_at(haystack, at, caps),
|
||||
}
|
||||
}
|
||||
@@ -509,6 +540,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.replace(haystack, dst, append),
|
||||
CRLF(ref m) => m.replace(haystack, dst, append),
|
||||
Word(ref m) => m.replace(haystack, dst, append),
|
||||
}
|
||||
}
|
||||
@@ -527,6 +559,9 @@ impl Matcher for RegexMatcher {
|
||||
Standard(ref m) => {
|
||||
m.replace_with_captures(haystack, caps, dst, append)
|
||||
}
|
||||
CRLF(ref m) => {
|
||||
m.replace_with_captures(haystack, caps, dst, append)
|
||||
}
|
||||
Word(ref m) => {
|
||||
m.replace_with_captures(haystack, caps, dst, append)
|
||||
}
|
||||
@@ -537,6 +572,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.is_match(haystack),
|
||||
CRLF(ref m) => m.is_match(haystack),
|
||||
Word(ref m) => m.is_match(haystack),
|
||||
}
|
||||
}
|
||||
@@ -549,6 +585,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.is_match_at(haystack, at),
|
||||
CRLF(ref m) => m.is_match_at(haystack, at),
|
||||
Word(ref m) => m.is_match_at(haystack, at),
|
||||
}
|
||||
}
|
||||
@@ -560,6 +597,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.shortest_match(haystack),
|
||||
CRLF(ref m) => m.shortest_match(haystack),
|
||||
Word(ref m) => m.shortest_match(haystack),
|
||||
}
|
||||
}
|
||||
@@ -572,6 +610,7 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.shortest_match_at(haystack, at),
|
||||
CRLF(ref m) => m.shortest_match_at(haystack, at),
|
||||
Word(ref m) => m.shortest_match_at(haystack, at),
|
||||
}
|
||||
}
|
||||
@@ -712,6 +751,9 @@ pub struct RegexCaptures {
|
||||
/// and the capturing groups must behave as if `(re)` is the `0`th capture
|
||||
/// group.
|
||||
offset: usize,
|
||||
/// When enable, the end of a match has `\r` stripped from it, if one
|
||||
/// exists.
|
||||
strip_crlf: bool,
|
||||
}
|
||||
|
||||
impl Captures for RegexCaptures {
|
||||
@@ -720,8 +762,25 @@ impl Captures for RegexCaptures {
|
||||
}
|
||||
|
||||
fn get(&self, i: usize) -> Option<Match> {
|
||||
let actual = i.checked_add(self.offset).unwrap();
|
||||
self.locs.pos(actual).map(|(s, e)| Match::new(s, e))
|
||||
if !self.strip_crlf {
|
||||
let actual = i.checked_add(self.offset).unwrap();
|
||||
return self.locs.pos(actual).map(|(s, e)| Match::new(s, e));
|
||||
}
|
||||
|
||||
// currently don't support capture offsetting with CRLF stripping
|
||||
assert_eq!(self.offset, 0);
|
||||
let m = match self.locs.pos(i).map(|(s, e)| Match::new(s, e)) {
|
||||
None => return None,
|
||||
Some(m) => m,
|
||||
};
|
||||
// If the end position of this match corresponds to the end position
|
||||
// of the overall match, then we apply our CRLF stripping. Otherwise,
|
||||
// we cannot assume stripping is correct.
|
||||
if i == 0 || m.end() == self.locs.pos(0).unwrap().1 {
|
||||
Some(m.with_end(m.end() - 1))
|
||||
} else {
|
||||
Some(m)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -734,12 +793,16 @@ impl RegexCaptures {
|
||||
locs: CaptureLocations,
|
||||
offset: usize,
|
||||
) -> RegexCaptures {
|
||||
RegexCaptures { locs, offset }
|
||||
RegexCaptures { locs, offset, strip_crlf: false }
|
||||
}
|
||||
|
||||
pub(crate) fn locations(&mut self) -> &mut CaptureLocations {
|
||||
&mut self.locs
|
||||
}
|
||||
|
||||
pub(crate) fn strip_crlf(&mut self, yes: bool) {
|
||||
self.strip_crlf = yes;
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
|
@@ -55,6 +55,11 @@ impl WordMatcher {
|
||||
}
|
||||
Ok(WordMatcher { regex, names, locs })
|
||||
}
|
||||
|
||||
/// Return the underlying regex used by this matcher.
|
||||
pub fn regex(&self) -> &Regex {
|
||||
&self.regex
|
||||
}
|
||||
}
|
||||
|
||||
impl Matcher for WordMatcher {
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-searcher"
|
||||
version = "0.1.1" #:version
|
||||
version = "0.1.3" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Fast line oriented regex searching as a library.
|
||||
@@ -13,23 +13,21 @@ keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[dependencies]
|
||||
bytecount = "0.3.2"
|
||||
encoding_rs = "0.8.6"
|
||||
encoding_rs_io = "0.1.2"
|
||||
bstr = { version = "0.1.2", default-features = false, features = ["std"] }
|
||||
bytecount = "0.5"
|
||||
encoding_rs = "0.8.14"
|
||||
encoding_rs_io = "0.1.4"
|
||||
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
|
||||
log = "0.4.5"
|
||||
memchr = "2.0.2"
|
||||
memmap = "0.6.2"
|
||||
memmap = "0.7"
|
||||
|
||||
[dev-dependencies]
|
||||
grep-regex = { version = "0.1.1", path = "../grep-regex" }
|
||||
regex = "1.0.5"
|
||||
regex = "1.1"
|
||||
|
||||
[features]
|
||||
avx-accel = [
|
||||
"bytecount/avx-accel",
|
||||
]
|
||||
simd-accel = [
|
||||
"bytecount/simd-accel",
|
||||
"encoding_rs/simd-accel",
|
||||
]
|
||||
default = ["bytecount/runtime-dispatch-simd"]
|
||||
simd-accel = ["encoding_rs/simd-accel"]
|
||||
|
||||
# This feature is DEPRECATED. Runtime dispatch is used for SIMD now.
|
||||
avx-accel = []
|
||||
|
@@ -99,13 +99,13 @@ searches stdin.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate bstr;
|
||||
extern crate bytecount;
|
||||
extern crate encoding_rs;
|
||||
extern crate encoding_rs_io;
|
||||
extern crate grep_matcher;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate memmap;
|
||||
#[cfg(test)]
|
||||
extern crate regex;
|
||||
|
@@ -1,8 +1,7 @@
|
||||
use std::cmp;
|
||||
use std::io;
|
||||
use std::ptr;
|
||||
|
||||
use memchr::{memchr, memrchr};
|
||||
use bstr::{BStr, BString};
|
||||
|
||||
/// The default buffer capacity that we use for the line buffer.
|
||||
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1<<10); // 8 KB
|
||||
@@ -123,7 +122,7 @@ impl LineBufferBuilder {
|
||||
pub fn build(&self) -> LineBuffer {
|
||||
LineBuffer {
|
||||
config: self.config,
|
||||
buf: vec![0; self.config.capacity],
|
||||
buf: BString::from(vec![0; self.config.capacity]),
|
||||
pos: 0,
|
||||
last_lineterm: 0,
|
||||
end: 0,
|
||||
@@ -255,6 +254,12 @@ impl<'b, R: io::Read> LineBufferReader<'b, R> {
|
||||
|
||||
/// Return the contents of this buffer.
|
||||
pub fn buffer(&self) -> &[u8] {
|
||||
self.line_buffer.buffer().as_bytes()
|
||||
}
|
||||
|
||||
/// Return the underlying buffer as a byte string. Used for tests only.
|
||||
#[cfg(test)]
|
||||
fn bstr(&self) -> &BStr {
|
||||
self.line_buffer.buffer()
|
||||
}
|
||||
|
||||
@@ -284,7 +289,7 @@ pub struct LineBuffer {
|
||||
/// The configuration of this buffer.
|
||||
config: Config,
|
||||
/// The primary buffer with which to hold data.
|
||||
buf: Vec<u8>,
|
||||
buf: BString,
|
||||
/// The current position of this buffer. This is always a valid sliceable
|
||||
/// index into `buf`, and its maximum value is the length of `buf`.
|
||||
pos: usize,
|
||||
@@ -339,13 +344,13 @@ impl LineBuffer {
|
||||
}
|
||||
|
||||
/// Return the contents of this buffer.
|
||||
fn buffer(&self) -> &[u8] {
|
||||
fn buffer(&self) -> &BStr {
|
||||
&self.buf[self.pos..self.last_lineterm]
|
||||
}
|
||||
|
||||
/// Return the contents of the free space beyond the end of the buffer as
|
||||
/// a mutable slice.
|
||||
fn free_buffer(&mut self) -> &mut [u8] {
|
||||
fn free_buffer(&mut self) -> &mut BStr {
|
||||
&mut self.buf[self.end..]
|
||||
}
|
||||
|
||||
@@ -396,7 +401,7 @@ impl LineBuffer {
|
||||
assert_eq!(self.pos, 0);
|
||||
loop {
|
||||
self.ensure_capacity()?;
|
||||
let readlen = rdr.read(self.free_buffer())?;
|
||||
let readlen = rdr.read(self.free_buffer().as_bytes_mut())?;
|
||||
if readlen == 0 {
|
||||
// We're only done reading for good once the caller has
|
||||
// consumed everything.
|
||||
@@ -416,7 +421,7 @@ impl LineBuffer {
|
||||
match self.config.binary {
|
||||
BinaryDetection::None => {} // nothing to do
|
||||
BinaryDetection::Quit(byte) => {
|
||||
if let Some(i) = memchr(byte, newbytes) {
|
||||
if let Some(i) = newbytes.find_byte(byte) {
|
||||
self.end = oldend + i;
|
||||
self.last_lineterm = self.end;
|
||||
self.binary_byte_offset =
|
||||
@@ -444,7 +449,7 @@ impl LineBuffer {
|
||||
}
|
||||
|
||||
// Update our `last_lineterm` positions if we read one.
|
||||
if let Some(i) = memrchr(self.config.lineterm, newbytes) {
|
||||
if let Some(i) = newbytes.rfind_byte(self.config.lineterm) {
|
||||
self.last_lineterm = oldend + i + 1;
|
||||
return Ok(true);
|
||||
}
|
||||
@@ -467,40 +472,8 @@ impl LineBuffer {
|
||||
return;
|
||||
}
|
||||
|
||||
assert!(self.pos < self.end && self.end <= self.buf.len());
|
||||
let roll_len = self.end - self.pos;
|
||||
unsafe {
|
||||
// SAFETY: A buffer contains Copy data, so there's no problem
|
||||
// moving it around. Safety also depends on our indices being
|
||||
// in bounds, which they should always be, and we enforce with
|
||||
// an assert above.
|
||||
//
|
||||
// It seems like it should be possible to do this in safe code that
|
||||
// results in the same codegen. I tried the obvious:
|
||||
//
|
||||
// for (src, dst) in (self.pos..self.end).zip(0..) {
|
||||
// self.buf[dst] = self.buf[src];
|
||||
// }
|
||||
//
|
||||
// But the above does not work, and in fact compiles down to a slow
|
||||
// byte-by-byte loop. I tried a few other minor variations, but
|
||||
// alas, better minds might prevail.
|
||||
//
|
||||
// Overall, this doesn't save us *too* much. It mostly matters when
|
||||
// the number of bytes we're copying is large, which can happen
|
||||
// if the searcher is asked to produce a lot of context. We could
|
||||
// decide this isn't worth it, but it does make an appreciable
|
||||
// impact at or around the context=30 range on my machine.
|
||||
//
|
||||
// We could also use a temporary buffer that compiles down to two
|
||||
// memcpys and is faster than the byte-at-a-time loop, but it
|
||||
// complicates our options for limiting memory allocation a bit.
|
||||
ptr::copy(
|
||||
self.buf[self.pos..].as_ptr(),
|
||||
self.buf.as_mut_ptr(),
|
||||
roll_len,
|
||||
);
|
||||
}
|
||||
self.buf.copy_within(self.pos.., 0);
|
||||
self.pos = 0;
|
||||
self.last_lineterm = roll_len;
|
||||
self.end = roll_len;
|
||||
@@ -536,14 +509,15 @@ impl LineBuffer {
|
||||
}
|
||||
}
|
||||
|
||||
/// Replaces `src` with `replacement` in bytes.
|
||||
fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
|
||||
/// Replaces `src` with `replacement` in bytes, and return the offset of the
|
||||
/// first replacement, if one exists.
|
||||
fn replace_bytes(bytes: &mut BStr, src: u8, replacement: u8) -> Option<usize> {
|
||||
if src == replacement {
|
||||
return None;
|
||||
}
|
||||
let mut first_pos = None;
|
||||
let mut pos = 0;
|
||||
while let Some(i) = memchr(src, &bytes[pos..]).map(|i| pos + i) {
|
||||
while let Some(i) = bytes[pos..].find_byte(src).map(|i| pos + i) {
|
||||
if first_pos.is_none() {
|
||||
first_pos = Some(i);
|
||||
}
|
||||
@@ -560,6 +534,7 @@ fn replace_bytes(bytes: &mut [u8], src: u8, replacement: u8) -> Option<usize> {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::str;
|
||||
use bstr::BString;
|
||||
use super::*;
|
||||
|
||||
const SHERLOCK: &'static str = "\
|
||||
@@ -575,18 +550,14 @@ and exhibited clearly, with a label attached.\
|
||||
slice.to_string()
|
||||
}
|
||||
|
||||
fn btos(slice: &[u8]) -> &str {
|
||||
str::from_utf8(slice).unwrap()
|
||||
}
|
||||
|
||||
fn replace_str(
|
||||
slice: &str,
|
||||
src: u8,
|
||||
replacement: u8,
|
||||
) -> (String, Option<usize>) {
|
||||
let mut dst = slice.to_string().into_bytes();
|
||||
let mut dst = BString::from(slice);
|
||||
let result = replace_bytes(&mut dst, src, replacement);
|
||||
(String::from_utf8(dst).unwrap(), result)
|
||||
(dst.into_string().unwrap(), result)
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -607,7 +578,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\n");
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\n");
|
||||
assert_eq!(rdr.absolute_byte_offset(), 0);
|
||||
rdr.consume(5);
|
||||
assert_eq!(rdr.absolute_byte_offset(), 5);
|
||||
@@ -615,7 +586,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert_eq!(rdr.absolute_byte_offset(), 11);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "maggie");
|
||||
assert_eq!(rdr.bstr(), "maggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -630,7 +601,7 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -645,7 +616,7 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "\n");
|
||||
assert_eq!(rdr.bstr(), "\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -660,7 +631,7 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "\n\n");
|
||||
assert_eq!(rdr.bstr(), "\n\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -698,12 +669,12 @@ and exhibited clearly, with a label attached.\
|
||||
let mut linebuf = LineBufferBuilder::new().capacity(1).build();
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
let mut got = vec![];
|
||||
let mut got = BString::new();
|
||||
while rdr.fill().unwrap() {
|
||||
got.extend(rdr.buffer());
|
||||
got.push(rdr.buffer());
|
||||
rdr.consume_all();
|
||||
}
|
||||
assert_eq!(bytes, btos(&got));
|
||||
assert_eq!(bytes, got);
|
||||
assert_eq!(rdr.absolute_byte_offset(), bytes.len() as u64);
|
||||
assert_eq!(rdr.binary_byte_offset(), None);
|
||||
}
|
||||
@@ -718,11 +689,11 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\n");
|
||||
assert_eq!(rdr.bstr(), "homer\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "lisa\n");
|
||||
assert_eq!(rdr.bstr(), "lisa\n");
|
||||
rdr.consume_all();
|
||||
|
||||
// This returns an error because while we have just enough room to
|
||||
@@ -732,11 +703,11 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.fill().is_err());
|
||||
|
||||
// We can mush on though!
|
||||
assert_eq!(btos(rdr.buffer()), "m");
|
||||
assert_eq!(rdr.bstr(), "m");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "aggie");
|
||||
assert_eq!(rdr.bstr(), "aggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -752,16 +723,16 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\n");
|
||||
assert_eq!(rdr.bstr(), "homer\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "lisa\n");
|
||||
assert_eq!(rdr.bstr(), "lisa\n");
|
||||
rdr.consume_all();
|
||||
|
||||
// We have just enough space.
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "maggie");
|
||||
assert_eq!(rdr.bstr(), "maggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -777,7 +748,7 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(rdr.fill().is_err());
|
||||
assert_eq!(btos(rdr.buffer()), "");
|
||||
assert_eq!(rdr.bstr(), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -789,7 +760,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nli\x00sa\nmaggie\n");
|
||||
assert_eq!(rdr.bstr(), "homer\nli\x00sa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -808,7 +779,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nli");
|
||||
assert_eq!(rdr.bstr(), "homer\nli");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -825,7 +796,7 @@ and exhibited clearly, with a label attached.\
|
||||
let mut rdr = LineBufferReader::new(bytes.as_bytes(), &mut linebuf);
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "");
|
||||
assert_eq!(rdr.bstr(), "");
|
||||
assert_eq!(rdr.absolute_byte_offset(), 0);
|
||||
assert_eq!(rdr.binary_byte_offset(), Some(0));
|
||||
}
|
||||
@@ -841,7 +812,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n");
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -860,7 +831,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie");
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -878,7 +849,7 @@ and exhibited clearly, with a label attached.\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "\
|
||||
assert_eq!(rdr.bstr(), "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, s\
|
||||
");
|
||||
@@ -901,7 +872,7 @@ Holmeses, s\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nli\nsa\nmaggie\n");
|
||||
assert_eq!(rdr.bstr(), "homer\nli\nsa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -920,7 +891,7 @@ Holmeses, s\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "\nhomer\nlisa\nmaggie\n");
|
||||
assert_eq!(rdr.bstr(), "\nhomer\nlisa\nmaggie\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -939,7 +910,7 @@ Holmeses, s\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
@@ -958,7 +929,7 @@ Holmeses, s\
|
||||
assert!(rdr.buffer().is_empty());
|
||||
|
||||
assert!(rdr.fill().unwrap());
|
||||
assert_eq!(btos(rdr.buffer()), "homer\nlisa\nmaggie\n\n");
|
||||
assert_eq!(rdr.bstr(), "homer\nlisa\nmaggie\n\n");
|
||||
rdr.consume_all();
|
||||
|
||||
assert!(!rdr.fill().unwrap());
|
||||
|
@@ -2,8 +2,8 @@
|
||||
A collection of routines for performing operations on lines.
|
||||
*/
|
||||
|
||||
use bstr::B;
|
||||
use bytecount;
|
||||
use memchr::{memchr, memrchr};
|
||||
use grep_matcher::{LineTerminator, Match};
|
||||
|
||||
/// An iterator over lines in a particular slice of bytes.
|
||||
@@ -85,7 +85,7 @@ impl LineStep {
|
||||
#[inline(always)]
|
||||
fn next_impl(&mut self, mut bytes: &[u8]) -> Option<(usize, usize)> {
|
||||
bytes = &bytes[..self.end];
|
||||
match memchr(self.line_term, &bytes[self.pos..]) {
|
||||
match B(&bytes[self.pos..]).find_byte(self.line_term) {
|
||||
None => {
|
||||
if self.pos < bytes.len() {
|
||||
let m = (self.pos, bytes.len());
|
||||
@@ -135,14 +135,16 @@ pub fn locate(
|
||||
line_term: u8,
|
||||
range: Match,
|
||||
) -> Match {
|
||||
let line_start = memrchr(line_term, &bytes[0..range.start()])
|
||||
let line_start = B(&bytes[..range.start()])
|
||||
.rfind_byte(line_term)
|
||||
.map_or(0, |i| i + 1);
|
||||
let line_end =
|
||||
if range.end() > line_start && bytes[range.end() - 1] == line_term {
|
||||
range.end()
|
||||
} else {
|
||||
memchr(line_term, &bytes[range.end()..])
|
||||
.map_or(bytes.len(), |i| range.end() + i + 1)
|
||||
B(&bytes[range.end()..])
|
||||
.find_byte(line_term)
|
||||
.map_or(bytes.len(), |i| range.end() + i + 1)
|
||||
};
|
||||
Match::new(line_start, line_end)
|
||||
}
|
||||
@@ -180,7 +182,7 @@ fn preceding_by_pos(
|
||||
pos -= 1;
|
||||
}
|
||||
loop {
|
||||
match memrchr(line_term, &bytes[..pos]) {
|
||||
match B(&bytes[..pos]).rfind_byte(line_term) {
|
||||
None => {
|
||||
return 0;
|
||||
}
|
||||
|
@@ -1,3 +1,4 @@
|
||||
/// Like assert_eq, but nicer output for long strings.
|
||||
#[cfg(test)]
|
||||
#[macro_export]
|
||||
macro_rules! assert_eq_printed {
|
||||
|
@@ -1,6 +1,6 @@
|
||||
use std::cmp;
|
||||
|
||||
use memchr::memchr;
|
||||
use bstr::B;
|
||||
|
||||
use grep_matcher::{LineMatchKind, Matcher};
|
||||
use lines::{self, LineStep};
|
||||
@@ -149,7 +149,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
BinaryDetection::Quit(b) => b,
|
||||
_ => return false,
|
||||
};
|
||||
if let Some(i) = memchr(binary_byte, &buf[*range]) {
|
||||
if let Some(i) = B(&buf[*range]).find_byte(binary_byte) {
|
||||
self.binary_byte_offset = Some(range.start() + i);
|
||||
true
|
||||
} else {
|
||||
@@ -424,16 +424,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
}
|
||||
self.count_lines(buf, range.start());
|
||||
let offset = self.absolute_byte_offset + range.start() as u64;
|
||||
let linebuf =
|
||||
if self.config.line_term.is_crlf() {
|
||||
// Normally, a line terminator is never part of a match, but
|
||||
// if the line terminator is CRLF, then it's possible for `\r`
|
||||
// to end up in the match, which we generally don't want. So
|
||||
// we strip it here.
|
||||
lines::without_terminator(&buf[*range], self.config.line_term)
|
||||
} else {
|
||||
&buf[*range]
|
||||
};
|
||||
let linebuf = &buf[*range];
|
||||
let keepgoing = self.sink.matched(
|
||||
&self.searcher,
|
||||
&SinkMatch {
|
||||
|
@@ -307,6 +307,7 @@ impl SearcherBuilder {
|
||||
decode_builder
|
||||
.encoding(self.config.encoding.as_ref().map(|e| e.0))
|
||||
.utf8_passthru(true)
|
||||
.strip_bom(true)
|
||||
.bom_override(true);
|
||||
Searcher {
|
||||
config: config,
|
||||
|
@@ -1,10 +1,10 @@
|
||||
use std::io::{self, Write};
|
||||
use std::str;
|
||||
|
||||
use bstr::B;
|
||||
use grep_matcher::{
|
||||
LineMatchKind, LineTerminator, Match, Matcher, NoCaptures, NoError,
|
||||
};
|
||||
use memchr::memchr;
|
||||
use regex::bytes::{Regex, RegexBuilder};
|
||||
|
||||
use searcher::{BinaryDetection, Searcher, SearcherBuilder};
|
||||
@@ -94,7 +94,8 @@ impl Matcher for RegexMatcher {
|
||||
}
|
||||
// Make it interesting and return the last byte in the current
|
||||
// line.
|
||||
let i = memchr(self.line_term.unwrap().as_byte(), haystack)
|
||||
let i = B(haystack)
|
||||
.find_byte(self.line_term.unwrap().as_byte())
|
||||
.map(|i| i)
|
||||
.unwrap_or(haystack.len() - 1);
|
||||
Ok(Some(LineMatchKind::Candidate(i)))
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep"
|
||||
version = "0.2.2" #:version
|
||||
version = "0.2.3" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Fast line oriented regex searching as a library.
|
||||
@@ -15,23 +15,18 @@ license = "Unlicense/MIT"
|
||||
[dependencies]
|
||||
grep-cli = { version = "0.1.1", path = "../grep-cli" }
|
||||
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
|
||||
grep-pcre2 = { version = "0.1.1", path = "../grep-pcre2", optional = true }
|
||||
grep-pcre2 = { version = "0.1.2", path = "../grep-pcre2", optional = true }
|
||||
grep-printer = { version = "0.1.1", path = "../grep-printer" }
|
||||
grep-regex = { version = "0.1.1", path = "../grep-regex" }
|
||||
grep-searcher = { version = "0.1.1", path = "../grep-searcher" }
|
||||
|
||||
[dev-dependencies]
|
||||
atty = "0.2.11"
|
||||
regex = "1"
|
||||
termcolor = "1"
|
||||
walkdir = "2.2.2"
|
||||
|
||||
[dev-dependencies.clap]
|
||||
version = "2.32.0"
|
||||
default-features = false
|
||||
features = ["suggestions"]
|
||||
termcolor = "1.0.4"
|
||||
walkdir = "2.2.7"
|
||||
|
||||
[features]
|
||||
avx-accel = ["grep-searcher/avx-accel"]
|
||||
simd-accel = ["grep-searcher/simd-accel"]
|
||||
pcre2 = ["grep-pcre2"]
|
||||
|
||||
# This feature is DEPRECATED. Runtime dispatch is used for SIMD now.
|
||||
avx-accel = []
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "ignore"
|
||||
version = "0.4.4" #:version
|
||||
version = "0.4.6" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
A fast library for efficiently matching ignore files such as `.gitignore`
|
||||
@@ -18,21 +18,21 @@ name = "ignore"
|
||||
bench = false
|
||||
|
||||
[dependencies]
|
||||
crossbeam-channel = "0.2.4"
|
||||
crossbeam-channel = "0.3.6"
|
||||
globset = { version = "0.4.2", path = "../globset" }
|
||||
lazy_static = "1.1.0"
|
||||
lazy_static = "1.1"
|
||||
log = "0.4.5"
|
||||
memchr = "2.0.2"
|
||||
regex = "1.0.5"
|
||||
same-file = "1.0.3"
|
||||
memchr = "2.1"
|
||||
regex = "1.1"
|
||||
same-file = "1.0.4"
|
||||
thread_local = "0.3.6"
|
||||
walkdir = "2.2.5"
|
||||
walkdir = "2.2.7"
|
||||
|
||||
[target.'cfg(windows)'.dependencies.winapi-util]
|
||||
version = "0.1.1"
|
||||
version = "0.1.2"
|
||||
|
||||
[dev-dependencies]
|
||||
tempdir = "0.3.7"
|
||||
tempfile = "3.0.5"
|
||||
|
||||
[features]
|
||||
simd-accel = ["globset/simd-accel"]
|
||||
|
@@ -37,19 +37,19 @@ fn main() {
|
||||
Box::new(move |result| {
|
||||
use ignore::WalkState::*;
|
||||
|
||||
tx.send(DirEntry::Y(result.unwrap()));
|
||||
tx.send(DirEntry::Y(result.unwrap())).unwrap();
|
||||
Continue
|
||||
})
|
||||
});
|
||||
} else if simple {
|
||||
let walker = WalkDir::new(path);
|
||||
for result in walker {
|
||||
tx.send(DirEntry::X(result.unwrap()));
|
||||
tx.send(DirEntry::X(result.unwrap())).unwrap();
|
||||
}
|
||||
} else {
|
||||
let walker = WalkBuilder::new(path).build();
|
||||
for result in walker {
|
||||
tx.send(DirEntry::Y(result.unwrap()));
|
||||
tx.send(DirEntry::Y(result.unwrap())).unwrap();
|
||||
}
|
||||
}
|
||||
drop(tx);
|
||||
|
@@ -22,6 +22,7 @@ use gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use pathutil::{is_hidden, strip_prefix};
|
||||
use overrides::{self, Override};
|
||||
use types::{self, Types};
|
||||
use walk::DirEntry;
|
||||
use {Error, Match, PartialErrorBuilder};
|
||||
|
||||
/// IgnoreMatch represents information about where a match came from when using
|
||||
@@ -73,6 +74,8 @@ struct IgnoreOptions {
|
||||
git_ignore: bool,
|
||||
/// Whether to read .git/info/exclude files.
|
||||
git_exclude: bool,
|
||||
/// Whether to ignore files case insensitively
|
||||
ignore_case_insensitive: bool,
|
||||
}
|
||||
|
||||
/// Ignore is a matcher useful for recursively walking one or more directories.
|
||||
@@ -225,7 +228,11 @@ impl Ignore {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) =
|
||||
create_gitignore(&dir, &self.0.custom_ignore_filenames);
|
||||
create_gitignore(
|
||||
&dir,
|
||||
&self.0.custom_ignore_filenames,
|
||||
self.0.opts.ignore_case_insensitive,
|
||||
);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
@@ -233,7 +240,12 @@ impl Ignore {
|
||||
if !self.0.opts.ignore {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(&dir, &[".ignore"]);
|
||||
let (m, err) =
|
||||
create_gitignore(
|
||||
&dir,
|
||||
&[".ignore"],
|
||||
self.0.opts.ignore_case_insensitive,
|
||||
);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
@@ -241,7 +253,12 @@ impl Ignore {
|
||||
if !self.0.opts.git_ignore {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(&dir, &[".gitignore"]);
|
||||
let (m, err) =
|
||||
create_gitignore(
|
||||
&dir,
|
||||
&[".gitignore"],
|
||||
self.0.opts.ignore_case_insensitive,
|
||||
);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
@@ -249,7 +266,12 @@ impl Ignore {
|
||||
if !self.0.opts.git_exclude {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(&dir, &[".git/info/exclude"]);
|
||||
let (m, err) =
|
||||
create_gitignore(
|
||||
&dir,
|
||||
&[".git/info/exclude"],
|
||||
self.0.opts.ignore_case_insensitive,
|
||||
);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
@@ -285,11 +307,23 @@ impl Ignore {
|
||||
|| has_explicit_ignores
|
||||
}
|
||||
|
||||
/// Like `matched`, but works with a directory entry instead.
|
||||
pub fn matched_dir_entry<'a>(
|
||||
&'a self,
|
||||
dent: &DirEntry,
|
||||
) -> Match<IgnoreMatch<'a>> {
|
||||
let m = self.matched(dent.path(), dent.is_dir());
|
||||
if m.is_none() && self.0.opts.hidden && is_hidden(dent) {
|
||||
return Match::Ignore(IgnoreMatch::hidden());
|
||||
}
|
||||
m
|
||||
}
|
||||
|
||||
/// Returns a match indicating whether the given file path should be
|
||||
/// ignored or not.
|
||||
///
|
||||
/// The match contains information about its origin.
|
||||
pub fn matched<'a, P: AsRef<Path>>(
|
||||
fn matched<'a, P: AsRef<Path>>(
|
||||
&'a self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
@@ -330,9 +364,6 @@ impl Ignore {
|
||||
whitelisted = mat;
|
||||
}
|
||||
}
|
||||
if whitelisted.is_none() && self.0.opts.hidden && is_hidden(path) {
|
||||
return Match::Ignore(IgnoreMatch::hidden());
|
||||
}
|
||||
whitelisted
|
||||
}
|
||||
|
||||
@@ -483,6 +514,7 @@ impl IgnoreBuilder {
|
||||
git_global: true,
|
||||
git_ignore: true,
|
||||
git_exclude: true,
|
||||
ignore_case_insensitive: false,
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -496,7 +528,11 @@ impl IgnoreBuilder {
|
||||
if !self.opts.git_global {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (gi, err) = Gitignore::global();
|
||||
let mut builder = GitignoreBuilder::new("");
|
||||
builder
|
||||
.case_insensitive(self.opts.ignore_case_insensitive)
|
||||
.unwrap();
|
||||
let (gi, err) = builder.build_global();
|
||||
if let Some(err) = err {
|
||||
debug!("{}", err);
|
||||
}
|
||||
@@ -627,6 +663,17 @@ impl IgnoreBuilder {
|
||||
self.opts.git_exclude = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Process ignore files case insensitively
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn ignore_case_insensitive(
|
||||
&mut self,
|
||||
yes: bool,
|
||||
) -> &mut IgnoreBuilder {
|
||||
self.opts.ignore_case_insensitive = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new gitignore matcher for the directory given.
|
||||
@@ -638,9 +685,11 @@ impl IgnoreBuilder {
|
||||
pub fn create_gitignore<T: AsRef<OsStr>>(
|
||||
dir: &Path,
|
||||
names: &[T],
|
||||
case_insensitive: bool,
|
||||
) -> (Gitignore, Option<Error>) {
|
||||
let mut builder = GitignoreBuilder::new(dir);
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
builder.case_insensitive(case_insensitive).unwrap();
|
||||
for name in names {
|
||||
let gipath = dir.join(name.as_ref());
|
||||
errs.maybe_push_ignore_io(builder.add(gipath));
|
||||
@@ -661,7 +710,7 @@ mod tests {
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use tempdir::TempDir;
|
||||
use tempfile::{self, TempDir};
|
||||
|
||||
use dir::IgnoreBuilder;
|
||||
use gitignore::Gitignore;
|
||||
@@ -683,9 +732,13 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
fn tmpdir(prefix: &str) -> TempDir {
|
||||
tempfile::Builder::new().prefix(prefix).tempdir().unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir("ignore-test-");
|
||||
wfile(td.path().join("not-an-ignore"), "foo\n!bar");
|
||||
|
||||
let (gi, err) = Gitignore::new(td.path().join("not-an-ignore"));
|
||||
@@ -700,7 +753,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn git_exclude() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir("ignore-test-");
|
||||
mkdirp(td.path().join(".git/info"));
|
||||
wfile(td.path().join(".git/info/exclude"), "foo\n!bar");
|
||||
|
||||
@@ -713,7 +766,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn gitignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir("ignore-test-");
|
||||
mkdirp(td.path().join(".git"));
|
||||
wfile(td.path().join(".gitignore"), "foo\n!bar");
|
||||
|
||||
@@ -726,7 +779,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn gitignore_no_git() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir("ignore-test-");
|
||||
wfile(td.path().join(".gitignore"), "foo\n!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
@@ -738,7 +791,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir("ignore-test-");
|
||||
wfile(td.path().join(".ignore"), "foo\n!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
@@ -750,7 +803,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn custom_ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir("ignore-test-");
|
||||
let custom_ignore = ".customignore";
|
||||
wfile(td.path().join(custom_ignore), "foo\n!bar");
|
||||
|
||||
@@ -766,7 +819,7 @@ mod tests {
|
||||
// Tests that a custom ignore file will override an .ignore.
|
||||
#[test]
|
||||
fn custom_ignore_over_ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir("ignore-test-");
|
||||
let custom_ignore = ".customignore";
|
||||
wfile(td.path().join(".ignore"), "foo");
|
||||
wfile(td.path().join(custom_ignore), "!foo");
|
||||
@@ -781,7 +834,7 @@ mod tests {
|
||||
// Tests that earlier custom ignore files have lower precedence than later.
|
||||
#[test]
|
||||
fn custom_ignore_precedence() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir("ignore-test-");
|
||||
let custom_ignore1 = ".customignore1";
|
||||
let custom_ignore2 = ".customignore2";
|
||||
wfile(td.path().join(custom_ignore1), "foo");
|
||||
@@ -798,7 +851,7 @@ mod tests {
|
||||
// Tests that an .ignore will override a .gitignore.
|
||||
#[test]
|
||||
fn ignore_over_gitignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir("ignore-test-");
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
wfile(td.path().join(".ignore"), "!foo");
|
||||
|
||||
@@ -810,7 +863,7 @@ mod tests {
|
||||
// Tests that exclude has lower precedent than both .ignore and .gitignore.
|
||||
#[test]
|
||||
fn exclude_lowest() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir("ignore-test-");
|
||||
wfile(td.path().join(".gitignore"), "!foo");
|
||||
wfile(td.path().join(".ignore"), "!bar");
|
||||
mkdirp(td.path().join(".git/info"));
|
||||
@@ -825,8 +878,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn errored() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo");
|
||||
let td = tmpdir("ignore-test-");
|
||||
wfile(td.path().join(".gitignore"), "{foo");
|
||||
|
||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_some());
|
||||
@@ -834,9 +887,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn errored_both() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo");
|
||||
wfile(td.path().join(".ignore"), "fo**o");
|
||||
let td = tmpdir("ignore-test-");
|
||||
wfile(td.path().join(".gitignore"), "{foo");
|
||||
wfile(td.path().join(".ignore"), "{bar");
|
||||
|
||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert_eq!(2, partial(err.expect("an error")).len());
|
||||
@@ -844,9 +897,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn errored_partial() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir("ignore-test-");
|
||||
mkdirp(td.path().join(".git"));
|
||||
wfile(td.path().join(".gitignore"), "f**oo\nbar");
|
||||
wfile(td.path().join(".gitignore"), "{foo\nbar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_some());
|
||||
@@ -855,8 +908,8 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn errored_partial_and_ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo\nbar");
|
||||
let td = tmpdir("ignore-test-");
|
||||
wfile(td.path().join(".gitignore"), "{foo\nbar");
|
||||
wfile(td.path().join(".ignore"), "!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
@@ -866,7 +919,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn not_present_empty() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir("ignore-test-");
|
||||
|
||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
@@ -876,7 +929,7 @@ mod tests {
|
||||
fn stops_at_git_dir() {
|
||||
// This tests that .gitignore files beyond a .git barrier aren't
|
||||
// matched, but .ignore files are.
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir("ignore-test-");
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("foo/.git"));
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
@@ -897,7 +950,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn absolute_parent() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir("ignore-test-");
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("foo"));
|
||||
wfile(td.path().join(".gitignore"), "bar");
|
||||
@@ -920,7 +973,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn absolute_parent_anchored() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
let td = tmpdir("ignore-test-");
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("src/llvm"));
|
||||
wfile(td.path().join(".gitignore"), "/llvm/\nfoo");
|
||||
|
@@ -69,8 +69,7 @@ impl Glob {
|
||||
|
||||
/// Returns true if and only if this glob has a `**/` prefix.
|
||||
fn has_doublestar_prefix(&self) -> bool {
|
||||
self.actual.starts_with("**/")
|
||||
|| (self.actual == "**" && self.is_only_dir)
|
||||
self.actual.starts_with("**/") || self.actual == "**"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -127,16 +126,7 @@ impl Gitignore {
|
||||
/// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
|
||||
/// set or is empty, then `$HOME/.config/git/ignore` is used instead.
|
||||
pub fn global() -> (Gitignore, Option<Error>) {
|
||||
match gitconfig_excludes_path() {
|
||||
None => (Gitignore::empty(), None),
|
||||
Some(path) => {
|
||||
if !path.is_file() {
|
||||
(Gitignore::empty(), None)
|
||||
} else {
|
||||
Gitignore::new(path)
|
||||
}
|
||||
}
|
||||
}
|
||||
GitignoreBuilder::new("").build_global()
|
||||
}
|
||||
|
||||
/// Creates a new empty gitignore matcher that never matches anything.
|
||||
@@ -359,6 +349,36 @@ impl GitignoreBuilder {
|
||||
})
|
||||
}
|
||||
|
||||
/// Build a global gitignore matcher using the configuration in this
|
||||
/// builder.
|
||||
///
|
||||
/// This consumes ownership of the builder unlike `build` because it
|
||||
/// must mutate the builder to add the global gitignore globs.
|
||||
///
|
||||
/// Note that this ignores the path given to this builder's constructor
|
||||
/// and instead derives the path automatically from git's global
|
||||
/// configuration.
|
||||
pub fn build_global(mut self) -> (Gitignore, Option<Error>) {
|
||||
match gitconfig_excludes_path() {
|
||||
None => (Gitignore::empty(), None),
|
||||
Some(path) => {
|
||||
if !path.is_file() {
|
||||
(Gitignore::empty(), None)
|
||||
} else {
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
errs.maybe_push_ignore_io(self.add(path));
|
||||
match self.build() {
|
||||
Ok(gi) => (gi, errs.into_error_option()),
|
||||
Err(err) => {
|
||||
errs.push(err);
|
||||
(Gitignore::empty(), errs.into_error_option())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Add each glob from the file path given.
|
||||
///
|
||||
/// The file given should be formatted as a `gitignore` file.
|
||||
@@ -419,6 +439,8 @@ impl GitignoreBuilder {
|
||||
from: Option<PathBuf>,
|
||||
mut line: &str,
|
||||
) -> Result<&mut GitignoreBuilder, Error> {
|
||||
#![allow(deprecated)]
|
||||
|
||||
if line.starts_with("#") {
|
||||
return Ok(self);
|
||||
}
|
||||
@@ -435,7 +457,6 @@ impl GitignoreBuilder {
|
||||
is_whitelist: false,
|
||||
is_only_dir: false,
|
||||
};
|
||||
let mut literal_separator = false;
|
||||
let mut is_absolute = false;
|
||||
if line.starts_with("\\!") || line.starts_with("\\#") {
|
||||
line = &line[1..];
|
||||
@@ -450,7 +471,6 @@ impl GitignoreBuilder {
|
||||
// then the glob can only match the beginning of a path
|
||||
// (relative to the location of gitignore). We achieve this by
|
||||
// simply banning wildcards from matching /.
|
||||
literal_separator = true;
|
||||
line = &line[1..];
|
||||
is_absolute = true;
|
||||
}
|
||||
@@ -463,16 +483,11 @@ impl GitignoreBuilder {
|
||||
line = &line[..i];
|
||||
}
|
||||
}
|
||||
// If there is a literal slash, then we note that so that globbing
|
||||
// doesn't let wildcards match slashes.
|
||||
glob.actual = line.to_string();
|
||||
if is_absolute || line.chars().any(|c| c == '/') {
|
||||
literal_separator = true;
|
||||
}
|
||||
// If there was a slash, then this is a glob that must match the entire
|
||||
// path name. Otherwise, we should let it match anywhere, so use a **/
|
||||
// prefix.
|
||||
if !literal_separator {
|
||||
// If there is a literal slash, then this is a glob that must match the
|
||||
// entire path name. Otherwise, we should let it match anywhere, so use
|
||||
// a **/ prefix.
|
||||
if !is_absolute && !line.chars().any(|c| c == '/') {
|
||||
// ... but only if we don't already have a **/ prefix.
|
||||
if !glob.has_doublestar_prefix() {
|
||||
glob.actual = format!("**/{}", glob.actual);
|
||||
@@ -486,7 +501,7 @@ impl GitignoreBuilder {
|
||||
}
|
||||
let parsed =
|
||||
GlobBuilder::new(&glob.actual)
|
||||
.literal_separator(literal_separator)
|
||||
.literal_separator(true)
|
||||
.case_insensitive(self.case_insensitive)
|
||||
.backslash_escape(true)
|
||||
.build()
|
||||
@@ -503,12 +518,16 @@ impl GitignoreBuilder {
|
||||
|
||||
/// Toggle whether the globs should be matched case insensitively or not.
|
||||
///
|
||||
/// When this option is changed, only globs added after the change will be affected.
|
||||
/// When this option is changed, only globs added after the change will be
|
||||
/// affected.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn case_insensitive(
|
||||
&mut self, yes: bool
|
||||
&mut self,
|
||||
yes: bool,
|
||||
) -> Result<&mut GitignoreBuilder, Error> {
|
||||
// TODO: This should not return a `Result`. Fix this in the next semver
|
||||
// release.
|
||||
self.case_insensitive = yes;
|
||||
Ok(self)
|
||||
}
|
||||
@@ -689,6 +708,9 @@ mod tests {
|
||||
ignored!(ig39, ROOT, "\\?", "?");
|
||||
ignored!(ig40, ROOT, "\\*", "*");
|
||||
ignored!(ig41, ROOT, "\\a", "a");
|
||||
ignored!(ig42, ROOT, "s*.rs", "sfoo.rs");
|
||||
ignored!(ig43, ROOT, "**", "foo.rs");
|
||||
ignored!(ig44, ROOT, "**/**/*", "a/foo.rs");
|
||||
|
||||
not_ignored!(ignot1, ROOT, "amonths", "months");
|
||||
not_ignored!(ignot2, ROOT, "monthsa", "months");
|
||||
@@ -710,6 +732,7 @@ mod tests {
|
||||
not_ignored!(ignot16, ROOT, "*\n!**/", "foo", true);
|
||||
not_ignored!(ignot17, ROOT, "src/*.rs", "src/grep/src/main.rs");
|
||||
not_ignored!(ignot18, ROOT, "path1/*", "path2/path1/foo");
|
||||
not_ignored!(ignot19, ROOT, "s*.rs", "src/foo.rs");
|
||||
|
||||
fn bytes(s: &str) -> Vec<u8> {
|
||||
s.to_string().into_bytes()
|
||||
|
@@ -56,7 +56,7 @@ extern crate memchr;
|
||||
extern crate regex;
|
||||
extern crate same_file;
|
||||
#[cfg(test)]
|
||||
extern crate tempdir;
|
||||
extern crate tempfile;
|
||||
extern crate thread_local;
|
||||
extern crate walkdir;
|
||||
#[cfg(windows)]
|
||||
|
@@ -139,13 +139,16 @@ impl OverrideBuilder {
|
||||
}
|
||||
|
||||
/// Toggle whether the globs should be matched case insensitively or not.
|
||||
///
|
||||
///
|
||||
/// When this option is changed, only globs added after the change will be affected.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn case_insensitive(
|
||||
&mut self, yes: bool
|
||||
&mut self,
|
||||
yes: bool,
|
||||
) -> Result<&mut OverrideBuilder, Error> {
|
||||
// TODO: This should not return a `Result`. Fix this in the next semver
|
||||
// release.
|
||||
self.builder.case_insensitive(yes)?;
|
||||
Ok(self)
|
||||
}
|
||||
|
@@ -1,22 +1,56 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
use walk::DirEntry;
|
||||
|
||||
/// Returns true if and only if this entry is considered to be hidden.
|
||||
///
|
||||
/// This only returns true if the base name of the path starts with a `.`.
|
||||
///
|
||||
/// On Unix, this implements a more optimized check.
|
||||
#[cfg(unix)]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
pub fn is_hidden(dent: &DirEntry) -> bool {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
if let Some(name) = file_name(path.as_ref()) {
|
||||
if let Some(name) = file_name(dent.path()) {
|
||||
name.as_bytes().get(0) == Some(&b'.')
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
#[cfg(not(unix))]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
if let Some(name) = file_name(path.as_ref()) {
|
||||
/// Returns true if and only if this entry is considered to be hidden.
|
||||
///
|
||||
/// On Windows, this returns true if one of the following is true:
|
||||
///
|
||||
/// * The base name of the path starts with a `.`.
|
||||
/// * The file attributes have the `HIDDEN` property set.
|
||||
#[cfg(windows)]
|
||||
pub fn is_hidden(dent: &DirEntry) -> bool {
|
||||
use std::os::windows::fs::MetadataExt;
|
||||
use winapi_util::file;
|
||||
|
||||
// This looks like we're doing an extra stat call, but on Windows, the
|
||||
// directory traverser reuses the metadata retrieved from each directory
|
||||
// entry and stores it on the DirEntry itself. So this is "free."
|
||||
if let Ok(md) = dent.metadata() {
|
||||
if file::is_hidden(md.file_attributes() as u64) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
if let Some(name) = file_name(dent.path()) {
|
||||
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if this entry is considered to be hidden.
|
||||
///
|
||||
/// This only returns true if the base name of the path starts with a `.`.
|
||||
#[cfg(not(any(unix, windows)))]
|
||||
pub fn is_hidden(dent: &DirEntry) -> bool {
|
||||
if let Some(name) = file_name(dent.path()) {
|
||||
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
||||
} else {
|
||||
false
|
||||
|
@@ -103,11 +103,14 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("amake", &["*.mk", "*.bp"]),
|
||||
("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]),
|
||||
("asm", &["*.asm", "*.s", "*.S"]),
|
||||
("asp", &["*.aspx", "*.aspx.cs", "*.aspx.cs", "*.ascx", "*.ascx.cs", "*.ascx.vb"]),
|
||||
("avro", &["*.avdl", "*.avpr", "*.avsc"]),
|
||||
("awk", &["*.awk"]),
|
||||
("bazel", &["*.bzl", "WORKSPACE", "BUILD"]),
|
||||
("bazel", &["*.bzl", "WORKSPACE", "BUILD", "BUILD.bazel"]),
|
||||
("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
|
||||
("bzip2", &["*.bz2"]),
|
||||
("brotli", &["*.br"]),
|
||||
("buildstream", &["*.bst"]),
|
||||
("bzip2", &["*.bz2", "*.tbz2"]),
|
||||
("c", &["*.c", "*.h", "*.H", "*.cats"]),
|
||||
("cabal", &["*.cabal"]),
|
||||
("cbor", &["*.cbor"]),
|
||||
@@ -127,7 +130,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("cshtml", &["*.cshtml"]),
|
||||
("css", &["*.css", "*.scss"]),
|
||||
("csv", &["*.csv"]),
|
||||
("cython", &["*.pyx"]),
|
||||
("cython", &["*.pyx", "*.pxi", "*.pxd"]),
|
||||
("dart", &["*.dart"]),
|
||||
("d", &["*.d"]),
|
||||
("dhall", &["*.dhall"]),
|
||||
@@ -145,7 +148,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
|
||||
("gn", &["*.gn", "*.gni"]),
|
||||
("go", &["*.go"]),
|
||||
("gzip", &["*.gz"]),
|
||||
("gzip", &["*.gz", "*.tgz"]),
|
||||
("groovy", &["*.groovy", "*.gradle"]),
|
||||
("h", &["*.h", "*.hpp"]),
|
||||
("hbs", &["*.hbs"]),
|
||||
@@ -224,12 +227,14 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("pdf", &["*.pdf"]),
|
||||
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
|
||||
("pod", &["*.pod"]),
|
||||
("postscript", &[".eps", ".ps"]),
|
||||
("protobuf", &["*.proto"]),
|
||||
("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]),
|
||||
("puppet", &["*.erb", "*.pp", "*.rb"]),
|
||||
("purs", &["*.purs"]),
|
||||
("py", &["*.py"]),
|
||||
("qmake", &["*.pro", "*.pri", "*.prf"]),
|
||||
("qml", &["*.qml"]),
|
||||
("readme", &["README*", "*README"]),
|
||||
("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
|
||||
("rdoc", &["*.rdoc"]),
|
||||
@@ -278,8 +283,9 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
]),
|
||||
("taskpaper", &["*.taskpaper"]),
|
||||
("tcl", &["*.tcl"]),
|
||||
("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib"]),
|
||||
("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]),
|
||||
("textile", &["*.textile"]),
|
||||
("thrift", &["*.thrift"]),
|
||||
("tf", &["*.tf"]),
|
||||
("ts", &["*.ts", "*.tsx"]),
|
||||
("txt", &["*.txt"]),
|
||||
@@ -294,9 +300,10 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("wiki", &["*.mediawiki", "*.wiki"]),
|
||||
("webidl", &["*.idl", "*.webidl", "*.widl"]),
|
||||
("xml", &["*.xml", "*.xml.dist"]),
|
||||
("xz", &["*.xz"]),
|
||||
("xz", &["*.xz", "*.txz"]),
|
||||
("yacc", &["*.y"]),
|
||||
("yaml", &["*.yaml", "*.yml"]),
|
||||
("zig", &["*.zig"]),
|
||||
("zsh", &[
|
||||
".zshenv", "zshenv",
|
||||
".zlogin", "zlogin",
|
||||
@@ -305,6 +312,7 @@ const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
".zshrc", "zshrc",
|
||||
"*.zsh",
|
||||
]),
|
||||
("zstd", &["*.zst", "*.zstd"]),
|
||||
];
|
||||
|
||||
/// Glob represents a single glob in a set of file type definitions.
|
||||
@@ -343,6 +351,18 @@ impl<'a> Glob<'a> {
|
||||
fn unmatched() -> Glob<'a> {
|
||||
Glob(GlobInner::UnmatchedIgnore)
|
||||
}
|
||||
|
||||
/// Return the file type defintion that matched, if one exists. A file type
|
||||
/// definition always exists when a specific definition matches a file
|
||||
/// path.
|
||||
pub fn file_type_def(&self) -> Option<&FileTypeDef> {
|
||||
match self {
|
||||
Glob(GlobInner::UnmatchedIgnore) => None,
|
||||
Glob(GlobInner::Matched { def, .. }) => {
|
||||
Some(def)
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A single file type definition.
|
||||
|
@@ -99,7 +99,7 @@ impl DirEntry {
|
||||
}
|
||||
|
||||
/// Returns true if and only if this entry points to a directory.
|
||||
fn is_dir(&self) -> bool {
|
||||
pub(crate) fn is_dir(&self) -> bool {
|
||||
self.dent.is_dir()
|
||||
}
|
||||
|
||||
@@ -764,6 +764,14 @@ impl WalkBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Process ignore files case insensitively
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn ignore_case_insensitive(&mut self, yes: bool) -> &mut WalkBuilder {
|
||||
self.ig_builder.ignore_case_insensitive(yes);
|
||||
self
|
||||
}
|
||||
|
||||
/// Set a function for sorting directory entries by their path.
|
||||
///
|
||||
/// If a compare function is set, the resulting iterator will return all
|
||||
@@ -875,16 +883,17 @@ impl Walk {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
let is_dir = ent.file_type().map_or(false, |ft| ft.is_dir());
|
||||
let max_size = self.max_filesize;
|
||||
let should_skip_path = skip_path(&self.ig, ent.path(), is_dir);
|
||||
let should_skip_filesize = if !is_dir && max_size.is_some() {
|
||||
skip_filesize(max_size.unwrap(), ent.path(), &ent.metadata().ok())
|
||||
} else {
|
||||
false
|
||||
};
|
||||
|
||||
Ok(should_skip_path || should_skip_filesize)
|
||||
if should_skip_entry(&self.ig, ent) {
|
||||
return Ok(true);
|
||||
}
|
||||
if self.max_filesize.is_some() && !ent.is_dir() {
|
||||
return Ok(skip_filesize(
|
||||
self.max_filesize.unwrap(),
|
||||
ent.path(),
|
||||
&ent.metadata().ok(),
|
||||
));
|
||||
}
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1118,7 +1127,7 @@ impl WalkParallel {
|
||||
dent: dent,
|
||||
ignore: self.ig_root.clone(),
|
||||
root_device: root_device,
|
||||
}));
|
||||
})).unwrap();
|
||||
any_work = true;
|
||||
}
|
||||
// ... but there's no need to start workers if we don't need them.
|
||||
@@ -1412,13 +1421,11 @@ impl Worker {
|
||||
return WalkState::Continue;
|
||||
}
|
||||
}
|
||||
let is_dir = dent.is_dir();
|
||||
let max_size = self.max_filesize;
|
||||
let should_skip_path = skip_path(ig, dent.path(), is_dir);
|
||||
let should_skip_path = should_skip_entry(ig, &dent);
|
||||
let should_skip_filesize =
|
||||
if !is_dir && max_size.is_some() {
|
||||
if self.max_filesize.is_some() && !dent.is_dir() {
|
||||
skip_filesize(
|
||||
max_size.unwrap(),
|
||||
self.max_filesize.unwrap(),
|
||||
dent.path(),
|
||||
&dent.metadata().ok(),
|
||||
)
|
||||
@@ -1431,7 +1438,7 @@ impl Worker {
|
||||
dent: dent,
|
||||
ignore: ig.clone(),
|
||||
root_device: root_device,
|
||||
}));
|
||||
})).unwrap();
|
||||
}
|
||||
WalkState::Continue
|
||||
}
|
||||
@@ -1446,12 +1453,12 @@ impl Worker {
|
||||
return None;
|
||||
}
|
||||
match self.rx.try_recv() {
|
||||
Some(Message::Work(work)) => {
|
||||
Ok(Message::Work(work)) => {
|
||||
self.waiting(false);
|
||||
self.quitting(false);
|
||||
return Some(work);
|
||||
}
|
||||
Some(Message::Quit) => {
|
||||
Ok(Message::Quit) => {
|
||||
// We can't just quit because a Message::Quit could be
|
||||
// spurious. For example, it's possible to observe that
|
||||
// all workers are waiting even if there's more work to
|
||||
@@ -1482,12 +1489,12 @@ impl Worker {
|
||||
// Otherwise, spin.
|
||||
}
|
||||
}
|
||||
None => {
|
||||
Err(_) => {
|
||||
self.waiting(true);
|
||||
self.quitting(false);
|
||||
if self.num_waiting() == self.threads {
|
||||
for _ in 0..self.threads {
|
||||
self.tx.send(Message::Quit);
|
||||
self.tx.send(Message::Quit).unwrap();
|
||||
}
|
||||
} else {
|
||||
// You're right to consider this suspicious, but it's
|
||||
@@ -1601,17 +1608,16 @@ fn skip_filesize(
|
||||
}
|
||||
}
|
||||
|
||||
fn skip_path(
|
||||
fn should_skip_entry(
|
||||
ig: &Ignore,
|
||||
path: &Path,
|
||||
is_dir: bool,
|
||||
dent: &DirEntry,
|
||||
) -> bool {
|
||||
let m = ig.matched(path, is_dir);
|
||||
let m = ig.matched_dir_entry(dent);
|
||||
if m.is_ignore() {
|
||||
debug!("ignoring {}: {:?}", path.display(), m);
|
||||
debug!("ignoring {}: {:?}", dent.path().display(), m);
|
||||
true
|
||||
} else if m.is_whitelist() {
|
||||
debug!("whitelisting {}: {:?}", path.display(), m);
|
||||
debug!("whitelisting {}: {:?}", dent.path().display(), m);
|
||||
false
|
||||
} else {
|
||||
false
|
||||
@@ -1702,7 +1708,7 @@ mod tests {
|
||||
use std::path::Path;
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use tempdir::TempDir;
|
||||
use tempfile::{self, TempDir};
|
||||
|
||||
use super::{DirEntry, WalkBuilder, WalkState};
|
||||
|
||||
@@ -1789,6 +1795,10 @@ mod tests {
|
||||
paths
|
||||
}
|
||||
|
||||
fn tmpdir(prefix: &str) -> TempDir {
|
||||
tempfile::Builder::new().prefix(prefix).tempdir().unwrap()
|
||||
}
|
||||
|
||||
fn assert_paths(
|
||||
prefix: &Path,
|
||||
builder: &WalkBuilder,
|
||||
@@ -1802,7 +1812,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn no_ignores() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
let td = tmpdir("walk-test-");
|
||||
mkdirp(td.path().join("a/b/c"));
|
||||
mkdirp(td.path().join("x/y"));
|
||||
wfile(td.path().join("a/b/foo"), "");
|
||||
@@ -1815,7 +1825,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn custom_ignore() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
let td = tmpdir("walk-test-");
|
||||
let custom_ignore = ".customignore";
|
||||
mkdirp(td.path().join("a"));
|
||||
wfile(td.path().join(custom_ignore), "foo");
|
||||
@@ -1831,7 +1841,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn custom_ignore_exclusive_use() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
let td = tmpdir("walk-test-");
|
||||
let custom_ignore = ".customignore";
|
||||
mkdirp(td.path().join("a"));
|
||||
wfile(td.path().join(custom_ignore), "foo");
|
||||
@@ -1851,7 +1861,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn gitignore() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
let td = tmpdir("walk-test-");
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("a"));
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
@@ -1867,7 +1877,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn explicit_ignore() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
let td = tmpdir("walk-test-");
|
||||
let igpath = td.path().join(".not-an-ignore");
|
||||
mkdirp(td.path().join("a"));
|
||||
wfile(&igpath, "foo");
|
||||
@@ -1883,7 +1893,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn explicit_ignore_exclusive_use() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
let td = tmpdir("walk-test-");
|
||||
let igpath = td.path().join(".not-an-ignore");
|
||||
mkdirp(td.path().join("a"));
|
||||
wfile(&igpath, "foo");
|
||||
@@ -1901,7 +1911,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn gitignore_parent() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
let td = tmpdir("walk-test-");
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("a"));
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
@@ -1914,7 +1924,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn max_depth() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
let td = tmpdir("walk-test-");
|
||||
mkdirp(td.path().join("a/b/c"));
|
||||
wfile(td.path().join("foo"), "");
|
||||
wfile(td.path().join("a/foo"), "");
|
||||
@@ -1934,7 +1944,7 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn max_filesize() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
let td = tmpdir("walk-test-");
|
||||
mkdirp(td.path().join("a/b"));
|
||||
wfile_size(td.path().join("foo"), 0);
|
||||
wfile_size(td.path().join("bar"), 400);
|
||||
@@ -1961,7 +1971,7 @@ mod tests {
|
||||
#[cfg(unix)] // because symlinks on windows are weird
|
||||
#[test]
|
||||
fn symlinks() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
let td = tmpdir("walk-test-");
|
||||
mkdirp(td.path().join("a/b"));
|
||||
symlink(td.path().join("a/b"), td.path().join("z"));
|
||||
wfile(td.path().join("a/b/foo"), "");
|
||||
@@ -1978,7 +1988,7 @@ mod tests {
|
||||
#[cfg(unix)] // because symlinks on windows are weird
|
||||
#[test]
|
||||
fn first_path_not_symlink() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
let td = tmpdir("walk-test-");
|
||||
mkdirp(td.path().join("foo"));
|
||||
|
||||
let dents = WalkBuilder::new(td.path().join("foo"))
|
||||
@@ -1999,7 +2009,7 @@ mod tests {
|
||||
#[cfg(unix)] // because symlinks on windows are weird
|
||||
#[test]
|
||||
fn symlink_loop() {
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
let td = tmpdir("walk-test-");
|
||||
mkdirp(td.path().join("a/b"));
|
||||
symlink(td.path().join("a"), td.path().join("a/b/c"));
|
||||
|
||||
@@ -2029,7 +2039,7 @@ mod tests {
|
||||
|
||||
// If our test directory actually isn't a different volume from /sys,
|
||||
// then this test is meaningless and we shouldn't run it.
|
||||
let td = TempDir::new("walk-test-").unwrap();
|
||||
let td = tmpdir("walk-test-");
|
||||
if device_num(td.path()).unwrap() == device_num("/sys").unwrap() {
|
||||
return;
|
||||
}
|
||||
|
@@ -1,14 +1,14 @@
|
||||
class RipgrepBin < Formula
|
||||
version '0.9.0'
|
||||
version '0.10.0'
|
||||
desc "Recursively search directories for a regex pattern."
|
||||
homepage "https://github.com/BurntSushi/ripgrep"
|
||||
|
||||
if OS.mac?
|
||||
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz"
|
||||
sha256 "36003ea8b62ad6274dc14140039f448cdf5026827d53cf24dad2d84005557a8c"
|
||||
sha256 "32754b4173ac87a7bfffd436d601a49362676eb1841ab33440f2f49c002c8967"
|
||||
elsif OS.linux?
|
||||
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-unknown-linux-musl.tar.gz"
|
||||
sha256 "2eb4443e58f95051ff76ea036ed1faf940d5a04af4e7ff5a7dbd74576b907e99"
|
||||
sha256 "c76080aa807a339b44139885d77d15ad60ab8cdd2c2fdaf345d0985625bc0f97"
|
||||
end
|
||||
|
||||
conflicts_with "ripgrep"
|
||||
|
1
rustfmt.toml
Normal file
1
rustfmt.toml
Normal file
@@ -0,0 +1 @@
|
||||
disable_all_formatting = true
|
76
src/app.rs
76
src/app.rs
@@ -9,7 +9,8 @@
|
||||
// is where we read clap's configuration from the end user's arguments and turn
|
||||
// it into a ripgrep-specific configuration type that is not coupled with clap.
|
||||
|
||||
use clap::{self, App, AppSettings};
|
||||
use clap::{self, App, AppSettings, crate_authors, crate_version};
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
const ABOUT: &str = "
|
||||
ripgrep (rg) recursively searches your current directory for a regex pattern.
|
||||
@@ -570,6 +571,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
|
||||
flag_iglob(&mut args);
|
||||
flag_ignore_case(&mut args);
|
||||
flag_ignore_file(&mut args);
|
||||
flag_ignore_file_case_insensitive(&mut args);
|
||||
flag_invert_match(&mut args);
|
||||
flag_json(&mut args);
|
||||
flag_line_buffered(&mut args);
|
||||
@@ -584,6 +586,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
|
||||
flag_multiline_dotall(&mut args);
|
||||
flag_no_config(&mut args);
|
||||
flag_no_ignore(&mut args);
|
||||
flag_no_ignore_dot(&mut args);
|
||||
flag_no_ignore_global(&mut args);
|
||||
flag_no_ignore_messages(&mut args);
|
||||
flag_no_ignore_parent(&mut args);
|
||||
@@ -788,17 +791,17 @@ to one of eight choices: red, blue, green, cyan, magenta, yellow, white and
|
||||
black. Styles are limited to nobold, bold, nointense, intense, nounderline
|
||||
or underline.
|
||||
|
||||
The format of the flag is `{type}:{attribute}:{value}`. `{type}` should be
|
||||
one of path, line, column or match. `{attribute}` can be fg, bg or style.
|
||||
`{value}` is either a color (for fg and bg) or a text style. A special format,
|
||||
`{type}:none`, will clear all color settings for `{type}`.
|
||||
The format of the flag is '{type}:{attribute}:{value}'. '{type}' should be
|
||||
one of path, line, column or match. '{attribute}' can be fg, bg or style.
|
||||
'{value}' is either a color (for fg and bg) or a text style. A special format,
|
||||
'{type}:none', will clear all color settings for '{type}'.
|
||||
|
||||
For example, the following command will change the match color to magenta and
|
||||
the background color for line numbers to yellow:
|
||||
|
||||
rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo.
|
||||
|
||||
Extended colors can be used for `{value}` when the terminal supports ANSI color
|
||||
Extended colors can be used for '{value}' when the terminal supports ANSI color
|
||||
sequences. These are specified as either 'x' (256-color) or 'x,x,x' (24-bit
|
||||
truecolor) where x is a number between 0 and 255 inclusive. x may be given as
|
||||
a normal decimal number or a hexadecimal number, which is prefixed by `0x`.
|
||||
@@ -979,10 +982,15 @@ fn flag_encoding(args: &mut Vec<RGArg>) {
|
||||
const LONG: &str = long!("\
|
||||
Specify the text encoding that ripgrep will use on all files searched. The
|
||||
default value is 'auto', which will cause ripgrep to do a best effort automatic
|
||||
detection of encoding on a per-file basis. Other supported values can be found
|
||||
in the list of labels here:
|
||||
detection of encoding on a per-file basis. Automatic detection in this case
|
||||
only applies to files that begin with a UTF-8 or UTF-16 byte-order mark (BOM).
|
||||
No other automatic detection is performend.
|
||||
|
||||
Other supported values can be found in the list of labels here:
|
||||
https://encoding.spec.whatwg.org/#concept-encoding-get
|
||||
|
||||
For more details on encoding and how ripgrep deals with it, see GUIDE.md.
|
||||
|
||||
This flag can be disabled with --no-encoding.
|
||||
");
|
||||
let arg = RGArg::flag("encoding", "ENCODING").short("E")
|
||||
@@ -1016,7 +1024,7 @@ fn flag_files(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Print each file that would be searched.";
|
||||
const LONG: &str = long!("\
|
||||
Print each file that would be searched without actually performing the search.
|
||||
This is useful to determine whether a particular file is being search or not.
|
||||
This is useful to determine whether a particular file is being searched or not.
|
||||
");
|
||||
let arg = RGArg::switch("files")
|
||||
.help(SHORT).long_help(LONG)
|
||||
@@ -1208,6 +1216,26 @@ directly on the command line, then used -g instead.
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_ignore_file_case_insensitive(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Process ignore files case insensitively.";
|
||||
const LONG: &str = long!("\
|
||||
Process ignore files (.gitignore, .ignore, etc.) case insensitively. Note that
|
||||
this comes with a performance penalty and is most useful on case insensitive
|
||||
file systems (such as Windows).
|
||||
|
||||
This flag can be disabled with the --no-ignore-file-case-insensitive flag.
|
||||
");
|
||||
let arg = RGArg::switch("ignore-file-case-insensitive")
|
||||
.help(SHORT).long_help(LONG)
|
||||
.overrides("no-ignore-file-case-insensitive");
|
||||
args.push(arg);
|
||||
|
||||
let arg = RGArg::switch("no-ignore-file-case-insensitive")
|
||||
.hidden()
|
||||
.overrides("ignore-file-case-insensitive");
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_invert_match(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Invert matching.";
|
||||
const LONG: &str = long!("\
|
||||
@@ -1535,7 +1563,7 @@ fn flag_no_ignore(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Don't respect ignore files.";
|
||||
const LONG: &str = long!("\
|
||||
Don't respect ignore files (.gitignore, .ignore, etc.). This implies
|
||||
--no-ignore-parent and --no-ignore-vcs.
|
||||
--no-ignore-parent, --no-ignore-dot and --no-ignore-vcs.
|
||||
|
||||
This flag can be disabled with the --ignore flag.
|
||||
");
|
||||
@@ -1550,6 +1578,24 @@ This flag can be disabled with the --ignore flag.
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_no_ignore_dot(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Don't respect .ignore files.";
|
||||
const LONG: &str = long!("\
|
||||
Don't respect .ignore files.
|
||||
|
||||
This flag can be disabled with the --ignore-dot flag.
|
||||
");
|
||||
let arg = RGArg::switch("no-ignore-dot")
|
||||
.help(SHORT).long_help(LONG)
|
||||
.overrides("ignore-dot");
|
||||
args.push(arg);
|
||||
|
||||
let arg = RGArg::switch("ignore-dot")
|
||||
.hidden()
|
||||
.overrides("no-ignore-dot");
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_no_ignore_global(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Don't respect global ignore files.";
|
||||
const LONG: &str = long!("\
|
||||
@@ -1998,9 +2044,9 @@ This flag can be used with the -o/--only-matching flag.
|
||||
fn flag_search_zip(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Search in compressed files.";
|
||||
const LONG: &str = long!("\
|
||||
Search in compressed files. Currently gz, bz2, xz, lzma and lz4 files are
|
||||
supported. This option expects the decompression binaries to be available in
|
||||
your PATH.
|
||||
Search in compressed files. Currently gzip, bzip2, xz, LZ4, LZMA, Brotli and
|
||||
Zstd files are supported. This option expects the decompression binaries to be
|
||||
available in your PATH.
|
||||
|
||||
This flag can be disabled with --no-search-zip.
|
||||
");
|
||||
@@ -2067,7 +2113,7 @@ for this flag are:
|
||||
path Sort by file path.
|
||||
modified Sort by the last modified time on a file.
|
||||
accessed Sort by the last accessed time on a file.
|
||||
created Sort by the cretion time on a file.
|
||||
created Sort by the creation time on a file.
|
||||
none Do not sort results.
|
||||
|
||||
If the sorting criteria isn't available on your system (for example, creation
|
||||
@@ -2100,7 +2146,7 @@ for this flag are:
|
||||
path Sort by file path.
|
||||
modified Sort by the last modified time on a file.
|
||||
accessed Sort by the last accessed time on a file.
|
||||
created Sort by the cretion time on a file.
|
||||
created Sort by the creation time on a file.
|
||||
none Do not sort results.
|
||||
|
||||
If the sorting criteria isn't available on your system (for example, creation
|
||||
|
141
src/args.rs
141
src/args.rs
@@ -1,9 +1,10 @@
|
||||
use std::cmp;
|
||||
use std::env;
|
||||
use std::ffi::OsStr;
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::fs;
|
||||
use std::io;
|
||||
use std::io::{self, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process;
|
||||
use std::sync::Arc;
|
||||
use std::time::SystemTime;
|
||||
|
||||
@@ -34,20 +35,22 @@ use ignore::types::{FileTypeDef, Types, TypesBuilder};
|
||||
use ignore::{Walk, WalkBuilder, WalkParallel};
|
||||
use log;
|
||||
use num_cpus;
|
||||
use path_printer::{PathPrinter, PathPrinterBuilder};
|
||||
use regex;
|
||||
use termcolor::{
|
||||
WriteColor,
|
||||
BufferWriter, ColorChoice,
|
||||
};
|
||||
|
||||
use app;
|
||||
use config;
|
||||
use logger::Logger;
|
||||
use messages::{set_messages, set_ignore_messages};
|
||||
use search::{PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder};
|
||||
use subject::SubjectBuilder;
|
||||
use Result;
|
||||
use crate::app;
|
||||
use crate::config;
|
||||
use crate::logger::Logger;
|
||||
use crate::messages::{set_messages, set_ignore_messages};
|
||||
use crate::path_printer::{PathPrinter, PathPrinterBuilder};
|
||||
use crate::search::{
|
||||
PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder,
|
||||
};
|
||||
use crate::subject::SubjectBuilder;
|
||||
use crate::Result;
|
||||
|
||||
/// The command that ripgrep should execute based on the command line
|
||||
/// configuration.
|
||||
@@ -128,7 +131,7 @@ impl Args {
|
||||
// trying to parse config files. If a config file exists and has
|
||||
// arguments, then we re-parse argv, otherwise we just use the matches
|
||||
// we have here.
|
||||
let early_matches = ArgMatches::new(app::app().get_matches());
|
||||
let early_matches = ArgMatches::new(clap_matches(env::args_os())?);
|
||||
set_messages(!early_matches.is_present("no-messages"));
|
||||
set_ignore_messages(!early_matches.is_present("no-ignore-messages"));
|
||||
|
||||
@@ -143,7 +146,7 @@ impl Args {
|
||||
log::set_max_level(log::LevelFilter::Warn);
|
||||
}
|
||||
|
||||
let matches = early_matches.reconfigure();
|
||||
let matches = early_matches.reconfigure()?;
|
||||
// The logging level may have changed if we brought in additional
|
||||
// arguments from a configuration file, so recheck it and set the log
|
||||
// level as appropriate.
|
||||
@@ -265,6 +268,11 @@ impl Args {
|
||||
Ok(builder.build(wtr))
|
||||
}
|
||||
|
||||
/// Returns true if and only if ripgrep should be "quiet."
|
||||
pub fn quiet(&self) -> bool {
|
||||
self.matches().is_present("quiet")
|
||||
}
|
||||
|
||||
/// Returns true if and only if the search should quit after finding the
|
||||
/// first match.
|
||||
pub fn quit_after_match(&self) -> Result<bool> {
|
||||
@@ -488,25 +496,27 @@ impl ArgMatches {
|
||||
///
|
||||
/// If there are no additional arguments from the environment (e.g., a
|
||||
/// config file), then the given matches are returned as is.
|
||||
fn reconfigure(self) -> ArgMatches {
|
||||
fn reconfigure(self) -> Result<ArgMatches> {
|
||||
// If the end user says no config, then respect it.
|
||||
if self.is_present("no-config") {
|
||||
debug!("not reading config files because --no-config is present");
|
||||
return self;
|
||||
log::debug!(
|
||||
"not reading config files because --no-config is present"
|
||||
);
|
||||
return Ok(self);
|
||||
}
|
||||
// If the user wants ripgrep to use a config file, then parse args
|
||||
// from that first.
|
||||
let mut args = config::args();
|
||||
if args.is_empty() {
|
||||
return self;
|
||||
return Ok(self);
|
||||
}
|
||||
let mut cliargs = env::args_os();
|
||||
if let Some(bin) = cliargs.next() {
|
||||
args.insert(0, bin);
|
||||
}
|
||||
args.extend(cliargs);
|
||||
debug!("final argv: {:?}", args);
|
||||
ArgMatches::new(app::app().get_matches_from(args))
|
||||
log::debug!("final argv: {:?}", args);
|
||||
Ok(ArgMatches(clap_matches(args)?))
|
||||
}
|
||||
|
||||
/// Convert the result of parsing CLI arguments into ripgrep's higher level
|
||||
@@ -615,7 +625,10 @@ impl ArgMatches {
|
||||
if let Some(limit) = self.dfa_size_limit()? {
|
||||
builder.dfa_size_limit(limit);
|
||||
}
|
||||
Ok(builder.build(&patterns.join("|"))?)
|
||||
match builder.build(&patterns.join("|")) {
|
||||
Ok(m) => Ok(m),
|
||||
Err(err) => Err(From::from(suggest_multiline(err.to_string()))),
|
||||
}
|
||||
}
|
||||
|
||||
/// Build a matcher using PCRE2.
|
||||
@@ -633,7 +646,7 @@ impl ArgMatches {
|
||||
// For whatever reason, the JIT craps out during regex compilation with
|
||||
// a "no more memory" error on 32 bit systems. So don't use it there.
|
||||
if !cfg!(target_pointer_width = "32") {
|
||||
builder.jit(true);
|
||||
builder.jit_if_available(true);
|
||||
}
|
||||
if self.pcre2_unicode() {
|
||||
builder.utf(true).ucp(true);
|
||||
@@ -779,18 +792,16 @@ impl ArgMatches {
|
||||
.max_filesize(self.max_file_size()?)
|
||||
.threads(self.threads()?)
|
||||
.same_file_system(self.is_present("one-file-system"))
|
||||
.skip_stdout(true)
|
||||
.skip_stdout(!self.is_present("files"))
|
||||
.overrides(self.overrides()?)
|
||||
.types(self.types()?)
|
||||
.hidden(!self.hidden())
|
||||
.parents(!self.no_ignore_parent())
|
||||
.ignore(!self.no_ignore())
|
||||
.git_global(
|
||||
!self.no_ignore()
|
||||
&& !self.no_ignore_vcs()
|
||||
&& !self.no_ignore_global())
|
||||
.git_ignore(!self.no_ignore() && !self.no_ignore_vcs())
|
||||
.git_exclude(!self.no_ignore() && !self.no_ignore_vcs());
|
||||
.ignore(!self.no_ignore_dot())
|
||||
.git_global(!self.no_ignore_vcs() && !self.no_ignore_global())
|
||||
.git_ignore(!self.no_ignore_vcs())
|
||||
.git_exclude(!self.no_ignore_vcs())
|
||||
.ignore_case_insensitive(self.ignore_file_case_insensitive());
|
||||
if !self.no_ignore() {
|
||||
builder.add_custom_ignore_filename(".rgignore");
|
||||
}
|
||||
@@ -996,6 +1007,11 @@ impl ArgMatches {
|
||||
self.is_present("hidden") || self.unrestricted_count() >= 2
|
||||
}
|
||||
|
||||
/// Returns true if ignore files should be processed case insensitively.
|
||||
fn ignore_file_case_insensitive(&self) -> bool {
|
||||
self.is_present("ignore-file-case-insensitive")
|
||||
}
|
||||
|
||||
/// Return all of the ignore file paths given on the command line.
|
||||
fn ignore_paths(&self) -> Vec<PathBuf> {
|
||||
let paths = match self.values_of_os("ignore-file") {
|
||||
@@ -1090,6 +1106,11 @@ impl ArgMatches {
|
||||
self.is_present("no-ignore") || self.unrestricted_count() >= 1
|
||||
}
|
||||
|
||||
/// Returns true if .ignore files should be ignored.
|
||||
fn no_ignore_dot(&self) -> bool {
|
||||
self.is_present("no-ignore-dot") || self.no_ignore()
|
||||
}
|
||||
|
||||
/// Returns true if global ignore files should be ignored.
|
||||
fn no_ignore_global(&self) -> bool {
|
||||
self.is_present("no-ignore-global") || self.no_ignore()
|
||||
@@ -1136,7 +1157,7 @@ impl ArgMatches {
|
||||
builder.add(&glob)?;
|
||||
}
|
||||
// This only enables case insensitivity for subsequent globs.
|
||||
builder.case_insensitive(true)?;
|
||||
builder.case_insensitive(true).unwrap();
|
||||
for glob in self.values_of_lossy_vec("iglob") {
|
||||
builder.add(&glob)?;
|
||||
}
|
||||
@@ -1250,9 +1271,15 @@ impl ArgMatches {
|
||||
if let Some(paths) = self.values_of_os("file") {
|
||||
for path in paths {
|
||||
if path == "-" {
|
||||
pats.extend(cli::patterns_from_stdin()?);
|
||||
pats.extend(cli::patterns_from_stdin()?
|
||||
.into_iter()
|
||||
.map(|p| self.pattern_from_string(p))
|
||||
);
|
||||
} else {
|
||||
pats.extend(cli::patterns_from_path(path)?);
|
||||
pats.extend(cli::patterns_from_path(path)?
|
||||
.into_iter()
|
||||
.map(|p| self.pattern_from_string(p))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1281,13 +1308,17 @@ impl ArgMatches {
|
||||
/// Converts a &str pattern to a String pattern. The pattern is escaped
|
||||
/// if -F/--fixed-strings is set.
|
||||
fn pattern_from_str(&self, pat: &str) -> String {
|
||||
let litpat = self.pattern_literal(pat.to_string());
|
||||
let s = self.pattern_line(litpat);
|
||||
self.pattern_from_string(pat.to_string())
|
||||
}
|
||||
|
||||
if s.is_empty() {
|
||||
/// Applies additional processing on the given pattern if necessary
|
||||
/// (such as escaping meta characters or turning it into a line regex).
|
||||
fn pattern_from_string(&self, pat: String) -> String {
|
||||
let pat = self.pattern_line(self.pattern_literal(pat));
|
||||
if pat.is_empty() {
|
||||
self.pattern_empty()
|
||||
} else {
|
||||
s
|
||||
pat
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1546,6 +1577,17 @@ and look-around.", msg)
|
||||
}
|
||||
}
|
||||
|
||||
fn suggest_multiline(msg: String) -> String {
|
||||
if msg.contains("the literal") && msg.contains("not allowed") {
|
||||
format!("{}
|
||||
|
||||
Consider enabling multiline mode with the --multiline flag (or -U for short).
|
||||
When multiline mode is enabled, new line characters can be matched.", msg)
|
||||
} else {
|
||||
msg
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the result of parsing a human readable file size to a `usize`,
|
||||
/// failing if the type does not fit.
|
||||
fn u64_to_usize(
|
||||
@@ -1592,3 +1634,32 @@ where G: Fn(&fs::Metadata) -> io::Result<SystemTime>
|
||||
t1.cmp(&t2)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a clap matches object if the given arguments parse successfully.
|
||||
///
|
||||
/// Otherwise, if an error occurred, then it is returned unless the error
|
||||
/// corresponds to a `--help` or `--version` request. In which case, the
|
||||
/// corresponding output is printed and the current process is exited
|
||||
/// successfully.
|
||||
fn clap_matches<I, T>(
|
||||
args: I,
|
||||
) -> Result<clap::ArgMatches<'static>>
|
||||
where I: IntoIterator<Item=T>,
|
||||
T: Into<OsString> + Clone
|
||||
{
|
||||
let err = match app::app().get_matches_from_safe(args) {
|
||||
Ok(matches) => return Ok(matches),
|
||||
Err(err) => err,
|
||||
};
|
||||
if err.use_stderr() {
|
||||
return Err(err.into());
|
||||
}
|
||||
// Explicitly ignore any error returned by writeln!. The most likely error
|
||||
// at this point is a broken pipe error, in which case, we want to ignore
|
||||
// it and exit quietly.
|
||||
//
|
||||
// (This is the point of this helper function. clap's functionality for
|
||||
// doing this will panic on a broken pipe error.)
|
||||
let _ = writeln!(io::stdout(), "{}", err);
|
||||
process::exit(0);
|
||||
}
|
||||
|
@@ -5,11 +5,14 @@
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufRead};
|
||||
use std::io;
|
||||
use std::ffi::OsString;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use Result;
|
||||
use bstr::io::BufReadExt;
|
||||
use log;
|
||||
|
||||
use crate::Result;
|
||||
|
||||
/// Return a sequence of arguments derived from ripgrep rc configuration files.
|
||||
pub fn args() -> Vec<OsString> {
|
||||
@@ -34,7 +37,7 @@ pub fn args() -> Vec<OsString> {
|
||||
message!("{}:{}", config_path.display(), err);
|
||||
}
|
||||
}
|
||||
debug!(
|
||||
log::debug!(
|
||||
"{}: arguments loaded from config file: {:?}",
|
||||
config_path.display(),
|
||||
args
|
||||
@@ -74,62 +77,29 @@ fn parse<P: AsRef<Path>>(
|
||||
fn parse_reader<R: io::Read>(
|
||||
rdr: R,
|
||||
) -> Result<(Vec<OsString>, Vec<Box<Error>>)> {
|
||||
let mut bufrdr = io::BufReader::new(rdr);
|
||||
let bufrdr = io::BufReader::new(rdr);
|
||||
let (mut args, mut errs) = (vec![], vec![]);
|
||||
let mut line = vec![];
|
||||
let mut line_number = 0;
|
||||
while {
|
||||
line.clear();
|
||||
bufrdr.for_byte_line_with_terminator(|line| {
|
||||
line_number += 1;
|
||||
bufrdr.read_until(b'\n', &mut line)? > 0
|
||||
} {
|
||||
trim(&mut line);
|
||||
|
||||
let line = line.trim();
|
||||
if line.is_empty() || line[0] == b'#' {
|
||||
continue;
|
||||
return Ok(true);
|
||||
}
|
||||
match bytes_to_os_string(&line) {
|
||||
match line.to_os_str() {
|
||||
Ok(osstr) => {
|
||||
args.push(osstr);
|
||||
args.push(osstr.to_os_string());
|
||||
}
|
||||
Err(err) => {
|
||||
errs.push(format!("{}: {}", line_number, err).into());
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
})?;
|
||||
Ok((args, errs))
|
||||
}
|
||||
|
||||
/// Trim the given bytes of whitespace according to the ASCII definition.
|
||||
fn trim(x: &mut Vec<u8>) {
|
||||
let upto = x.iter().take_while(|b| is_space(**b)).count();
|
||||
x.drain(..upto);
|
||||
let revto = x.len() - x.iter().rev().take_while(|b| is_space(**b)).count();
|
||||
x.drain(revto..);
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given byte is an ASCII space character.
|
||||
fn is_space(b: u8) -> bool {
|
||||
b == b'\t'
|
||||
|| b == b'\n'
|
||||
|| b == b'\x0B'
|
||||
|| b == b'\x0C'
|
||||
|| b == b'\r'
|
||||
|| b == b' '
|
||||
}
|
||||
|
||||
/// On Unix, get an OsString from raw bytes.
|
||||
#[cfg(unix)]
|
||||
fn bytes_to_os_string(bytes: &[u8]) -> Result<OsString> {
|
||||
use std::os::unix::ffi::OsStringExt;
|
||||
Ok(OsString::from_vec(bytes.to_vec()))
|
||||
}
|
||||
|
||||
/// On non-Unix (like Windows), require UTF-8.
|
||||
#[cfg(not(unix))]
|
||||
fn bytes_to_os_string(bytes: &[u8]) -> Result<OsString> {
|
||||
String::from_utf8(bytes.to_vec()).map(OsString::from).map_err(From::from)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::ffi::OsString;
|
||||
|
64
src/main.rs
64
src/main.rs
@@ -1,17 +1,3 @@
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
extern crate grep;
|
||||
extern crate ignore;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate num_cpus;
|
||||
extern crate regex;
|
||||
#[macro_use]
|
||||
extern crate serde_json;
|
||||
extern crate termcolor;
|
||||
|
||||
use std::io::{self, Write};
|
||||
use std::process;
|
||||
use std::sync::{Arc, Mutex};
|
||||
@@ -36,33 +22,37 @@ mod subject;
|
||||
type Result<T> = ::std::result::Result<T, Box<::std::error::Error>>;
|
||||
|
||||
fn main() {
|
||||
match Args::parse().and_then(try_main) {
|
||||
Ok(true) => process::exit(0),
|
||||
Ok(false) => process::exit(1),
|
||||
Err(err) => {
|
||||
eprintln!("{}", err);
|
||||
process::exit(2);
|
||||
}
|
||||
if let Err(err) = Args::parse().and_then(try_main) {
|
||||
eprintln!("{}", err);
|
||||
process::exit(2);
|
||||
}
|
||||
}
|
||||
|
||||
fn try_main(args: Args) -> Result<bool> {
|
||||
fn try_main(args: Args) -> Result<()> {
|
||||
use args::Command::*;
|
||||
|
||||
match args.command()? {
|
||||
Search => search(args),
|
||||
SearchParallel => search_parallel(args),
|
||||
SearchNever => Ok(false),
|
||||
Files => files(args),
|
||||
FilesParallel => files_parallel(args),
|
||||
Types => types(args),
|
||||
let matched =
|
||||
match args.command()? {
|
||||
Search => search(&args),
|
||||
SearchParallel => search_parallel(&args),
|
||||
SearchNever => Ok(false),
|
||||
Files => files(&args),
|
||||
FilesParallel => files_parallel(&args),
|
||||
Types => types(&args),
|
||||
}?;
|
||||
if matched && (args.quiet() || !messages::errored()) {
|
||||
process::exit(0)
|
||||
} else if messages::errored() {
|
||||
process::exit(2)
|
||||
} else {
|
||||
process::exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
/// The top-level entry point for single-threaded search. This recursively
|
||||
/// steps through the file list (current directory by default) and searches
|
||||
/// each file sequentially.
|
||||
fn search(args: Args) -> Result<bool> {
|
||||
fn search(args: &Args) -> Result<bool> {
|
||||
let started_at = Instant::now();
|
||||
let quit_after_match = args.quit_after_match()?;
|
||||
let subject_builder = args.subject_builder();
|
||||
@@ -82,7 +72,7 @@ fn search(args: Args) -> Result<bool> {
|
||||
if err.kind() == io::ErrorKind::BrokenPipe {
|
||||
break;
|
||||
}
|
||||
message!("{}: {}", subject.path().display(), err);
|
||||
err_message!("{}: {}", subject.path().display(), err);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
@@ -105,7 +95,7 @@ fn search(args: Args) -> Result<bool> {
|
||||
/// The top-level entry point for multi-threaded search. The parallelism is
|
||||
/// itself achieved by the recursive directory traversal. All we need to do is
|
||||
/// feed it a worker for performing a search on each file.
|
||||
fn search_parallel(args: Args) -> Result<bool> {
|
||||
fn search_parallel(args: &Args) -> Result<bool> {
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering::SeqCst;
|
||||
|
||||
@@ -141,7 +131,7 @@ fn search_parallel(args: Args) -> Result<bool> {
|
||||
let search_result = match searcher.search(&subject) {
|
||||
Ok(search_result) => search_result,
|
||||
Err(err) => {
|
||||
message!("{}: {}", subject.path().display(), err);
|
||||
err_message!("{}: {}", subject.path().display(), err);
|
||||
return WalkState::Continue;
|
||||
}
|
||||
};
|
||||
@@ -158,7 +148,7 @@ fn search_parallel(args: Args) -> Result<bool> {
|
||||
return WalkState::Quit;
|
||||
}
|
||||
// Otherwise, we continue on our merry way.
|
||||
message!("{}: {}", subject.path().display(), err);
|
||||
err_message!("{}: {}", subject.path().display(), err);
|
||||
}
|
||||
if matched.load(SeqCst) && quit_after_match {
|
||||
WalkState::Quit
|
||||
@@ -183,7 +173,7 @@ fn search_parallel(args: Args) -> Result<bool> {
|
||||
/// The top-level entry point for listing files without searching them. This
|
||||
/// recursively steps through the file list (current directory by default) and
|
||||
/// prints each path sequentially using a single thread.
|
||||
fn files(args: Args) -> Result<bool> {
|
||||
fn files(args: &Args) -> Result<bool> {
|
||||
let quit_after_match = args.quit_after_match()?;
|
||||
let subject_builder = args.subject_builder();
|
||||
let mut matched = false;
|
||||
@@ -213,7 +203,7 @@ fn files(args: Args) -> Result<bool> {
|
||||
/// The top-level entry point for listing files without searching them. This
|
||||
/// recursively steps through the file list (current directory by default) and
|
||||
/// prints each path sequentially using multiple threads.
|
||||
fn files_parallel(args: Args) -> Result<bool> {
|
||||
fn files_parallel(args: &Args) -> Result<bool> {
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering::SeqCst;
|
||||
use std::sync::mpsc;
|
||||
@@ -265,7 +255,7 @@ fn files_parallel(args: Args) -> Result<bool> {
|
||||
}
|
||||
|
||||
/// The top-level entry point for --type-list.
|
||||
fn types(args: Args) -> Result<bool> {
|
||||
fn types(args: &Args) -> Result<bool> {
|
||||
let mut count = 0;
|
||||
let mut stdout = args.stdout();
|
||||
for def in args.type_defs()? {
|
||||
|
@@ -1,21 +1,35 @@
|
||||
use std::sync::atomic::{ATOMIC_BOOL_INIT, AtomicBool, Ordering};
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
|
||||
static MESSAGES: AtomicBool = ATOMIC_BOOL_INIT;
|
||||
static IGNORE_MESSAGES: AtomicBool = ATOMIC_BOOL_INIT;
|
||||
static MESSAGES: AtomicBool = AtomicBool::new(false);
|
||||
static IGNORE_MESSAGES: AtomicBool = AtomicBool::new(false);
|
||||
static ERRORED: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
/// Emit a non-fatal error message, unless messages were disabled.
|
||||
#[macro_export]
|
||||
macro_rules! message {
|
||||
($($tt:tt)*) => {
|
||||
if ::messages::messages() {
|
||||
if crate::messages::messages() {
|
||||
eprintln!($($tt)*);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Like message, but sets ripgrep's "errored" flag, which controls the exit
|
||||
/// status.
|
||||
#[macro_export]
|
||||
macro_rules! err_message {
|
||||
($($tt:tt)*) => {
|
||||
crate::messages::set_errored();
|
||||
message!($($tt)*);
|
||||
}
|
||||
}
|
||||
|
||||
/// Emit a non-fatal ignore-related error message (like a parse error), unless
|
||||
/// ignore-messages were disabled.
|
||||
#[macro_export]
|
||||
macro_rules! ignore_message {
|
||||
($($tt:tt)*) => {
|
||||
if ::messages::messages() && ::messages::ignore_messages() {
|
||||
if crate::messages::messages() && crate::messages::ignore_messages() {
|
||||
eprintln!($($tt)*);
|
||||
}
|
||||
}
|
||||
@@ -48,3 +62,13 @@ pub fn ignore_messages() -> bool {
|
||||
pub fn set_ignore_messages(yes: bool) {
|
||||
IGNORE_MESSAGES.store(yes, Ordering::SeqCst)
|
||||
}
|
||||
|
||||
/// Returns true if and only if ripgrep came across a non-fatal error.
|
||||
pub fn errored() -> bool {
|
||||
ERRORED.load(Ordering::SeqCst)
|
||||
}
|
||||
|
||||
/// Indicate that ripgrep has come across a non-fatal error.
|
||||
pub fn set_errored() {
|
||||
ERRORED.store(true, Ordering::SeqCst);
|
||||
}
|
||||
|
@@ -13,9 +13,10 @@ use grep::regex::{RegexMatcher as RustRegexMatcher};
|
||||
use grep::searcher::Searcher;
|
||||
use ignore::overrides::Override;
|
||||
use serde_json as json;
|
||||
use serde_json::json;
|
||||
use termcolor::WriteColor;
|
||||
|
||||
use subject::Subject;
|
||||
use crate::subject::Subject;
|
||||
|
||||
/// The configuration for the search worker. Among a few other things, the
|
||||
/// configuration primarily controls the way we show search results to users
|
||||
|
@@ -1,6 +1,7 @@
|
||||
use std::path::Path;
|
||||
|
||||
use ignore::{self, DirEntry};
|
||||
use log;
|
||||
|
||||
/// A configuration for describing how subjects should be built.
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -40,7 +41,7 @@ impl SubjectBuilder {
|
||||
match result {
|
||||
Ok(dent) => self.build(dent),
|
||||
Err(err) => {
|
||||
message!("{}", err);
|
||||
err_message!("{}", err);
|
||||
None
|
||||
}
|
||||
}
|
||||
@@ -79,7 +80,7 @@ impl SubjectBuilder {
|
||||
// directory. Otherwise, emitting messages for directories is just
|
||||
// noisy.
|
||||
if !subj.is_dir() {
|
||||
debug!(
|
||||
log::debug!(
|
||||
"ignoring {}: failed to pass subject filter: \
|
||||
file type: {:?}, metadata: {:?}",
|
||||
subj.dent.path().display(),
|
||||
@@ -126,9 +127,19 @@ impl Subject {
|
||||
self.dent.is_stdin()
|
||||
}
|
||||
|
||||
/// Returns true if and only if this subject points to a directory.
|
||||
/// Returns true if and only if this subject points to a directory after
|
||||
/// following symbolic links.
|
||||
fn is_dir(&self) -> bool {
|
||||
self.dent.file_type().map_or(false, |ft| ft.is_dir())
|
||||
let ft = match self.dent.file_type() {
|
||||
None => return false,
|
||||
Some(ft) => ft,
|
||||
};
|
||||
if ft.is_dir() {
|
||||
return true;
|
||||
}
|
||||
// If this is a symlink, then we want to follow it to determine
|
||||
// whether it's a directory or not.
|
||||
self.dent.path_is_symlink() && self.dent.path().is_dir()
|
||||
}
|
||||
|
||||
/// Returns true if and only if this subject points to a file.
|
||||
|
2
tests/data/sherlock.br
Normal file
2
tests/data/sherlock.br
Normal file
@@ -0,0 +1,2 @@
|
||||
n<01><><EFBFBD>-_<>.<2E> <0C><><11><>cM<63><4D><EFBFBD><EFBFBD>Y<EFBFBD>4<08><><EFBFBD><EFBFBD><EFBFBD>Ya<0B>-L<>O(<28>8<EFBFBD>sn^Gwш!,<2C>
|
||||
KD<EFBFBD><EFBFBD>/7<><37>th<74><1A><0C><10>]j<02><><EFBFBD><EFBFBD><EFBFBD>E_;d<1E>rF<72>Qs<51>/:DIVB}<7D>T7<54><37>ѵ<04><16>H<EFBFBD>2<EFBFBD><32><EFBFBD>)<29><>M[u<><75><EFBFBD>i<EFBFBD><69><EFBFBD><0F>50ڮ<30>Y6<><36><EFBFBD><EFBFBD><17><><EFBFBD><EFBFBD><07>%<25>ר_<D7A8><5F>U by<62>4<EFBFBD><34>Ϡ<EFBFBD>!&<26>g<><15>#<23>
|
BIN
tests/data/sherlock.zst
Normal file
BIN
tests/data/sherlock.zst
Normal file
Binary file not shown.
@@ -1,5 +1,5 @@
|
||||
use hay::{SHERLOCK, SHERLOCK_CRLF};
|
||||
use util::{Dir, TestCommand, sort_lines};
|
||||
use crate::hay::{SHERLOCK, SHERLOCK_CRLF};
|
||||
use crate::util::{Dir, TestCommand, sort_lines};
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1
|
||||
rgtest!(f1_sjis, |dir: Dir, mut cmd: TestCommand| {
|
||||
@@ -629,3 +629,19 @@ rgtest!(f993_null_data, |dir: Dir, mut cmd: TestCommand| {
|
||||
let expected = "foo\x00bar\x00baz\x00";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1138
|
||||
rgtest!(f1138_no_ignore_dot, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create_dir(".git");
|
||||
dir.create(".gitignore", "foo");
|
||||
dir.create(".ignore", "bar");
|
||||
dir.create(".fzf-ignore", "quux");
|
||||
dir.create("foo", "");
|
||||
dir.create("bar", "");
|
||||
dir.create("quux", "");
|
||||
|
||||
cmd.arg("--sort").arg("path").arg("--files");
|
||||
eqnice!("quux\n", cmd.stdout());
|
||||
eqnice!("bar\nquux\n", cmd.arg("--no-ignore-dot").stdout());
|
||||
eqnice!("bar\n", cmd.arg("--ignore-file").arg(".fzf-ignore").stdout());
|
||||
});
|
||||
|
@@ -1,9 +1,10 @@
|
||||
use std::time;
|
||||
|
||||
use serde_derive::Deserialize;
|
||||
use serde_json as json;
|
||||
|
||||
use hay::{SHERLOCK, SHERLOCK_CRLF};
|
||||
use util::{Dir, TestCommand};
|
||||
use crate::hay::{SHERLOCK, SHERLOCK_CRLF};
|
||||
use crate::util::{Dir, TestCommand};
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
||||
#[serde(tag = "type", content = "data")]
|
||||
@@ -152,7 +153,10 @@ rgtest!(basic, |dir: Dir, mut cmd: TestCommand| {
|
||||
msgs[1].unwrap_context(),
|
||||
Context {
|
||||
path: Some(Data::text("sherlock")),
|
||||
lines: Data::text("Holmeses, success in the province of detective work must always\n"),
|
||||
lines: Data::text(
|
||||
"Holmeses, success in the province of \
|
||||
detective work must always\n",
|
||||
),
|
||||
line_number: Some(2),
|
||||
absolute_offset: 65,
|
||||
submatches: vec![],
|
||||
@@ -162,7 +166,10 @@ rgtest!(basic, |dir: Dir, mut cmd: TestCommand| {
|
||||
msgs[2].unwrap_match(),
|
||||
Match {
|
||||
path: Some(Data::text("sherlock")),
|
||||
lines: Data::text("be, to a very large extent, the result of luck. Sherlock Holmes\n"),
|
||||
lines: Data::text(
|
||||
"be, to a very large extent, the result of luck. \
|
||||
Sherlock Holmes\n",
|
||||
),
|
||||
line_number: Some(3),
|
||||
absolute_offset: 129,
|
||||
submatches: vec![
|
||||
@@ -211,7 +218,9 @@ rgtest!(notutf8, |dir: Dir, mut cmd: TestCommand| {
|
||||
let contents = &b"quux\xFFbaz"[..];
|
||||
|
||||
// APFS does not support creating files with invalid UTF-8 bytes, so just
|
||||
// skip the test if we can't create our file.
|
||||
// skip the test if we can't create our file. Presumably we don't need this
|
||||
// check if we're already skipping it on macOS, but maybe other file
|
||||
// systems won't like this test either?
|
||||
if !dir.try_create_bytes(OsStr::from_bytes(name), contents).is_ok() {
|
||||
return;
|
||||
}
|
||||
@@ -304,3 +313,52 @@ rgtest!(crlf, |dir: Dir, mut cmd: TestCommand| {
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1095
|
||||
//
|
||||
// This test checks that we don't drop the \r\n in a matching line when --crlf
|
||||
// mode is enabled.
|
||||
rgtest!(r1095_missing_crlf, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("foo", "test\r\n");
|
||||
|
||||
// Check without --crlf flag.
|
||||
let msgs = json_decode(&cmd.arg("--json").arg("test").stdout());
|
||||
assert_eq!(msgs.len(), 4);
|
||||
assert_eq!(msgs[1].unwrap_match().lines, Data::text("test\r\n"));
|
||||
|
||||
// Now check with --crlf flag.
|
||||
let msgs = json_decode(&cmd.arg("--crlf").stdout());
|
||||
assert_eq!(msgs.len(), 4);
|
||||
assert_eq!(msgs[1].unwrap_match().lines, Data::text("test\r\n"));
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1095
|
||||
//
|
||||
// This test checks that we don't return empty submatches when matching a `\n`
|
||||
// in CRLF mode.
|
||||
rgtest!(r1095_crlf_empty_match, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("foo", "test\r\n\n");
|
||||
|
||||
// Check without --crlf flag.
|
||||
let msgs = json_decode(&cmd.arg("-U").arg("--json").arg("\n").stdout());
|
||||
assert_eq!(msgs.len(), 5);
|
||||
|
||||
let m = msgs[1].unwrap_match();
|
||||
assert_eq!(m.lines, Data::text("test\r\n"));
|
||||
assert_eq!(m.submatches[0].m, Data::text("\n"));
|
||||
|
||||
let m = msgs[2].unwrap_match();
|
||||
assert_eq!(m.lines, Data::text("\n"));
|
||||
assert_eq!(m.submatches[0].m, Data::text("\n"));
|
||||
|
||||
// Now check with --crlf flag.
|
||||
let msgs = json_decode(&cmd.arg("--crlf").stdout());
|
||||
|
||||
let m = msgs[1].unwrap_match();
|
||||
assert_eq!(m.lines, Data::text("test\r\n"));
|
||||
assert_eq!(m.submatches[0].m, Data::text("\n"));
|
||||
|
||||
let m = msgs[2].unwrap_match();
|
||||
assert_eq!(m.lines, Data::text("\n"));
|
||||
assert_eq!(m.submatches[0].m, Data::text("\n"));
|
||||
});
|
||||
|
@@ -3,11 +3,11 @@ macro_rules! rgtest {
|
||||
($name:ident, $fun:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let (dir, cmd) = ::util::setup(stringify!($name));
|
||||
let (dir, cmd) = crate::util::setup(stringify!($name));
|
||||
$fun(dir, cmd);
|
||||
|
||||
if cfg!(feature = "pcre2") {
|
||||
let (dir, cmd) = ::util::setup_pcre2(stringify!($name));
|
||||
let (dir, cmd) = crate::util::setup_pcre2(stringify!($name));
|
||||
$fun(dir, cmd);
|
||||
}
|
||||
}
|
||||
|
@@ -1,5 +1,5 @@
|
||||
use hay::SHERLOCK;
|
||||
use util::{Dir, TestCommand, cmd_exists, sort_lines};
|
||||
use crate::hay::SHERLOCK;
|
||||
use crate::util::{Dir, TestCommand, cmd_exists, sort_lines};
|
||||
|
||||
// This file contains "miscellaneous" tests that were either written before
|
||||
// features were tracked more explicitly, or were simply written without
|
||||
@@ -909,6 +909,36 @@ be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
||||
rgtest!(compressed_brotli, |dir: Dir, mut cmd: TestCommand| {
|
||||
if !cmd_exists("brotli") {
|
||||
return;
|
||||
}
|
||||
|
||||
dir.create_bytes("sherlock.br", include_bytes!("./data/sherlock.br"));
|
||||
cmd.arg("-z").arg("Sherlock").arg("sherlock.br");
|
||||
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
||||
rgtest!(compressed_zstd, |dir: Dir, mut cmd: TestCommand| {
|
||||
if !cmd_exists("zstd") {
|
||||
return;
|
||||
}
|
||||
|
||||
dir.create_bytes("sherlock.zst", include_bytes!("./data/sherlock.zst"));
|
||||
cmd.arg("-z").arg("Sherlock").arg("sherlock.zst");
|
||||
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
||||
rgtest!(compressed_failing_gzip, |dir: Dir, mut cmd: TestCommand| {
|
||||
if !cmd_exists("gzip") {
|
||||
return;
|
||||
|
@@ -1,5 +1,5 @@
|
||||
use hay::SHERLOCK;
|
||||
use util::{Dir, TestCommand};
|
||||
use crate::hay::SHERLOCK;
|
||||
use crate::util::{Dir, TestCommand};
|
||||
|
||||
// This tests that multiline matches that span multiple lines, but where
|
||||
// multiple matches may begin and end on the same line work correctly.
|
||||
|
@@ -1,5 +1,5 @@
|
||||
use hay::SHERLOCK;
|
||||
use util::{Dir, TestCommand, sort_lines};
|
||||
use crate::hay::SHERLOCK;
|
||||
use crate::util::{Dir, TestCommand, sort_lines};
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/16
|
||||
rgtest!(r16, |dir: Dir, mut cmd: TestCommand| {
|
||||
@@ -562,3 +562,146 @@ rgtest!(r900, |dir: Dir, mut cmd: TestCommand| {
|
||||
|
||||
cmd.arg("-fpat").arg("sherlock").assert_err();
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1064
|
||||
rgtest!(r1064, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("input", "abc");
|
||||
eqnice!("input:abc\n", cmd.arg("a(.*c)").stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1174
|
||||
rgtest!(r1098, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create_dir(".git");
|
||||
dir.create(".gitignore", "a**b");
|
||||
dir.create("afoob", "test");
|
||||
cmd.arg("test").assert_err();
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1130
|
||||
rgtest!(r1130, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("foo", "test");
|
||||
eqnice!(
|
||||
"foo\n",
|
||||
cmd.arg("--files-with-matches").arg("test").arg("foo").stdout()
|
||||
);
|
||||
|
||||
let mut cmd = dir.command();
|
||||
eqnice!(
|
||||
"foo\n",
|
||||
cmd.arg("--files-without-match").arg("nada").arg("foo").stdout()
|
||||
);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1159
|
||||
rgtest!(r1159_invalid_flag, |_: Dir, mut cmd: TestCommand| {
|
||||
cmd.arg("--wat").assert_exit_code(2);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1159
|
||||
rgtest!(r1159_exit_status, |dir: Dir, _: TestCommand| {
|
||||
dir.create("foo", "test");
|
||||
|
||||
// search with a match gets 0 exit status.
|
||||
let mut cmd = dir.command();
|
||||
cmd.arg("test").assert_exit_code(0);
|
||||
|
||||
// search with --quiet and a match gets 0 exit status.
|
||||
let mut cmd = dir.command();
|
||||
cmd.arg("-q").arg("test").assert_exit_code(0);
|
||||
|
||||
// search with a match and an error gets 2 exit status.
|
||||
let mut cmd = dir.command();
|
||||
cmd.arg("test").arg("no-file").assert_exit_code(2);
|
||||
|
||||
// search with a match in --quiet mode and an error gets 0 exit status.
|
||||
let mut cmd = dir.command();
|
||||
cmd.arg("-q").arg("test").arg("foo").arg("no-file").assert_exit_code(0);
|
||||
|
||||
// search with no match gets 1 exit status.
|
||||
let mut cmd = dir.command();
|
||||
cmd.arg("nada").assert_exit_code(1);
|
||||
|
||||
// search with --quiet and no match gets 1 exit status.
|
||||
let mut cmd = dir.command();
|
||||
cmd.arg("-q").arg("nada").assert_exit_code(1);
|
||||
|
||||
// search with no match and an error gets 2 exit status.
|
||||
let mut cmd = dir.command();
|
||||
cmd.arg("nada").arg("no-file").assert_exit_code(2);
|
||||
|
||||
// search with no match in --quiet mode and an error gets 2 exit status.
|
||||
let mut cmd = dir.command();
|
||||
cmd.arg("-q").arg("nada").arg("foo").arg("no-file").assert_exit_code(2);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1163
|
||||
rgtest!(r1163, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("bom.txt", "\u{FEFF}test123\ntest123");
|
||||
eqnice!(
|
||||
"bom.txt:test123\nbom.txt:test123\n",
|
||||
cmd.arg("^test123").stdout()
|
||||
);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1164
|
||||
rgtest!(r1164, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create_dir(".git");
|
||||
dir.create(".gitignore", "myfile");
|
||||
dir.create("MYFILE", "test");
|
||||
|
||||
cmd.arg("--ignore-file-case-insensitive").arg("test").assert_err();
|
||||
eqnice!(
|
||||
"MYFILE:test\n",
|
||||
cmd.arg("--no-ignore-file-case-insensitive").stdout()
|
||||
);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1173
|
||||
rgtest!(r1173, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create_dir(".git");
|
||||
dir.create(".gitignore", "**");
|
||||
dir.create("foo", "test");
|
||||
cmd.arg("test").assert_err();
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1174
|
||||
rgtest!(r1174, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create_dir(".git");
|
||||
dir.create(".gitignore", "**/**/*");
|
||||
dir.create_dir("a");
|
||||
dir.create("a/foo", "test");
|
||||
cmd.arg("test").assert_err();
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1176
|
||||
rgtest!(r1176_literal_file, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("patterns", "foo(bar\n");
|
||||
dir.create("test", "foo(bar");
|
||||
|
||||
eqnice!(
|
||||
"foo(bar\n",
|
||||
cmd.arg("-F").arg("-f").arg("patterns").arg("test").stdout()
|
||||
);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1176
|
||||
rgtest!(r1176_line_regex, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("patterns", "foo\n");
|
||||
dir.create("test", "foobar\nfoo\nbarfoo\n");
|
||||
|
||||
eqnice!(
|
||||
"foo\n",
|
||||
cmd.arg("-x").arg("-f").arg("patterns").arg("test").stdout()
|
||||
);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1203
|
||||
rgtest!(r1203_reverse_suffix_literal, |dir: Dir, _: TestCommand| {
|
||||
dir.create("test", "153.230000\n");
|
||||
|
||||
let mut cmd = dir.command();
|
||||
eqnice!("153.230000\n", cmd.arg(r"\d\d\d00").arg("test").stdout());
|
||||
|
||||
let mut cmd = dir.command();
|
||||
eqnice!("153.230000\n", cmd.arg(r"\d\d\d000").arg("test").stdout());
|
||||
});
|
||||
|
@@ -1,8 +1,3 @@
|
||||
extern crate serde;
|
||||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
extern crate serde_json;
|
||||
|
||||
// Macros useful for testing.
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
|
@@ -5,12 +5,12 @@ use std::fs::{self, File};
|
||||
use std::io::{self, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{self, Command};
|
||||
use std::sync::atomic::{ATOMIC_USIZE_INIT, AtomicUsize, Ordering};
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
static TEST_DIR: &'static str = "ripgrep-tests";
|
||||
static NEXT_ID: AtomicUsize = ATOMIC_USIZE_INIT;
|
||||
static NEXT_ID: AtomicUsize = AtomicUsize::new(0);
|
||||
|
||||
/// Setup an empty work directory and return a command pointing to the ripgrep
|
||||
/// executable whose CWD is set to the work directory.
|
||||
|
Reference in New Issue
Block a user