mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-27 02:01:58 -07:00
Compare commits
171 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
18943b9317 | ||
|
68427b5b79 | ||
|
4ca15a8a51 | ||
|
2daef51fe5 | ||
|
43ed91dc5c | ||
|
dada75d2a7 | ||
|
76b9f01ad2 | ||
|
8baa0e56b7 | ||
|
301ee6d3f5 | ||
|
77ad7588ae | ||
|
58aca2efb2 | ||
|
351eddc17e | ||
|
277dda544c | ||
|
8c869cbd87 | ||
|
598b162fea | ||
|
0222e024fe | ||
|
5bd0edbbe1 | ||
|
4368913d8f | ||
|
02de97b8ce | ||
|
32db773d51 | ||
|
b272be25fa | ||
|
1aeae3e22d | ||
|
60d537c43d | ||
|
ef5c07476b | ||
|
4f6f34307c | ||
|
7cf560d27c | ||
|
15b263ff55 | ||
|
53121e0733 | ||
|
404785f950 | ||
|
103c4c953c | ||
|
82abf883c5 | ||
|
a2315d5ee5 | ||
|
201d0cb8c1 | ||
|
6f45478a7d | ||
|
9c2c569624 | ||
|
a1e4e0f85c | ||
|
caf31a769b | ||
|
920112e640 | ||
|
a84ffe603b | ||
|
e4f83f3161 | ||
|
fbca4a0332 | ||
|
65c7df1c25 | ||
|
18237da9b2 | ||
|
f147f3aa39 | ||
|
599c4fc3f3 | ||
|
d85a6dd5c8 | ||
|
40abade8ee | ||
|
fca4fdf6ea | ||
|
16975797fe | ||
|
6507a48f97 | ||
|
c8e2fa1869 | ||
|
f728708ce9 | ||
|
c302995d05 | ||
|
4a77cc8100 | ||
|
dc86666044 | ||
|
6b038511c7 | ||
|
c767bccade | ||
|
a075a462fa | ||
|
24f753c306 | ||
|
1aae2759ad | ||
|
91646f6cca | ||
|
031ace209d | ||
|
942e9c4743 | ||
|
12c9656b18 | ||
|
8bf3760cdb | ||
|
c96623e66a | ||
|
36f949633b | ||
|
811fcc1fe8 | ||
|
79a8d0ab3f | ||
|
fbf8265cde | ||
|
d79add341b | ||
|
12b2b1f624 | ||
|
3aaf550ca5 | ||
|
d4876cd064 | ||
|
867a57e176 | ||
|
ec4904df33 | ||
|
c4ea157cb7 | ||
|
0156967f4c | ||
|
3238707b0b | ||
|
31fbae597f | ||
|
f2e1711781 | ||
|
94d600e6e1 | ||
|
b904c5d9dc | ||
|
5a29417796 | ||
|
f694800768 | ||
|
bd1c9e9499 | ||
|
f04b0dd95c | ||
|
5487dffefa | ||
|
1fc6787648 | ||
|
11e164aec9 | ||
|
1c1331d926 | ||
|
cbe94823d2 | ||
|
d8712daf27 | ||
|
7cbbef019f | ||
|
4d29d886e5 | ||
|
247a9398f4 | ||
|
4c3025ab1c | ||
|
4981991a6e | ||
|
51440f59cd | ||
|
7b8a8d77d0 | ||
|
4737326ed3 | ||
|
a3537aa32a | ||
|
d3e118a786 | ||
|
4e52059ad6 | ||
|
60c016c243 | ||
|
4665128f25 | ||
|
dde5bd5a80 | ||
|
762ad44f71 | ||
|
705386934d | ||
|
97bbc6ef11 | ||
|
27a980c1bc | ||
|
e8645dc8ae | ||
|
e96d93034a | ||
|
bc5accc035 | ||
|
c9d0ca8257 | ||
|
45fe4aab96 | ||
|
97f981fbcb | ||
|
59329dcc61 | ||
|
604da8eb86 | ||
|
1c964372ad | ||
|
50a961960e | ||
|
7481c5fe29 | ||
|
3ae37b0937 | ||
|
4ee6dbe422 | ||
|
cd4bdcf810 | ||
|
175406df01 | ||
|
89811d43d4 | ||
|
f0053682c0 | ||
|
35045d6105 | ||
|
95f552fc06 | ||
|
48353bea17 | ||
|
703d5b558e | ||
|
47efea234f | ||
|
ca0d8998a2 | ||
|
fdf24317ac | ||
|
b9d5f22a4d | ||
|
67bb4f040f | ||
|
cee2f09a6d | ||
|
ced777e91f | ||
|
e9d9083898 | ||
|
46dff8f4be | ||
|
7aa6e87952 | ||
|
925d0db9f0 | ||
|
316ffd87b3 | ||
|
5943b1effe | ||
|
c42f97b4da | ||
|
0d9bba7816 | ||
|
3550f2e29a | ||
|
babe80d498 | ||
|
3e892a7a80 | ||
|
1df3f0b793 | ||
|
b3935935cb | ||
|
67abbf6f22 | ||
|
7b9f7d7dc6 | ||
|
7ab29a91d0 | ||
|
9fa38c6232 | ||
|
de79be2db2 | ||
|
416b69bae5 | ||
|
3e78fce3a3 | ||
|
7a3fd1f23f | ||
|
d306403440 | ||
|
ebabe1df6a | ||
|
f27aa3ff6f | ||
|
20ccd441f2 | ||
|
104d740f76 | ||
|
2da0eab2b8 | ||
|
b8c7864a02 | ||
|
ec26995655 | ||
|
a41235a3b5 | ||
|
1a91b900e7 | ||
|
2b15832655 |
2
.gitignore
vendored
2
.gitignore
vendored
@@ -2,3 +2,5 @@
|
||||
tags
|
||||
target
|
||||
/grep/Cargo.lock
|
||||
/globset/Cargo.lock
|
||||
/ignore/Cargo.lock
|
||||
|
@@ -15,9 +15,6 @@ matrix:
|
||||
- os: linux
|
||||
rust: nightly
|
||||
env: TARGET=x86_64-unknown-linux-musl
|
||||
- os: osx
|
||||
rust: nightly
|
||||
env: TARGET=i686-apple-darwin
|
||||
- os: osx
|
||||
rust: nightly
|
||||
env: TARGET=x86_64-apple-darwin
|
||||
|
166
CHANGELOG.md
166
CHANGELOG.md
@@ -1,3 +1,169 @@
|
||||
0.2.8
|
||||
=====
|
||||
Bug fixes:
|
||||
|
||||
* Fixed a bug with the SIMD/AVX features for using bytecount in commit
|
||||
`4ca15a`.
|
||||
|
||||
|
||||
0.2.7
|
||||
=====
|
||||
Performance improvements:
|
||||
|
||||
* [PERF #223](https://github.com/BurntSushi/ripgrep/pull/223):
|
||||
Added a parallel recursive directory iterator. This results in major
|
||||
performance improvements on large repositories.
|
||||
* [PERF #11](https://github.com/BurntSushi/ripgrep/pull/11):
|
||||
ripgrep now uses the `bytecount` library for counting new lines. In some
|
||||
cases, ripgrep runs twice as fast. Use
|
||||
`RUSTFLAGS="-C target-cpu=native" cargo build --release --features 'simd-accel avx-accel'`
|
||||
to get the fastest possible binary.
|
||||
|
||||
Feature enhancements:
|
||||
|
||||
* Added or improved file type filtering for Agda, Tex, Taskpaper, Markdown,
|
||||
asciidoc, textile, rdoc, org, creole, wiki, pod, C#, PDF, C, C++.
|
||||
* [FEATURE #149](https://github.com/BurntSushi/ripgrep/issues/149):
|
||||
Add a new `--no-messages` flag that suppresses error messages.
|
||||
Note that `rg foo 2> /dev/null` also works.
|
||||
* [FEATURE #159](https://github.com/BurntSushi/ripgrep/issues/159):
|
||||
Add a new `-m/--max-count` flag that limits the total number of matches
|
||||
printed for each file searched.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #199](https://github.com/BurntSushi/ripgrep/issues/199):
|
||||
Fixed a bug where `-S/--smart-case` wasn't being applied correctly to
|
||||
literal optimizations.
|
||||
* [BUG #203](https://github.com/BurntSushi/ripgrep/issues/203):
|
||||
Mention the full name, ripgrep, in more places. It now appears in
|
||||
the output of `--help` and `--version`. The repository URL is now also
|
||||
in the output of `--help` and the man page.
|
||||
* [BUG #215](https://github.com/BurntSushi/ripgrep/issues/215):
|
||||
Include small note about how to search for a pattern that starts with a `-`.
|
||||
|
||||
|
||||
0.2.6
|
||||
=====
|
||||
Feature enhancements:
|
||||
|
||||
* Added or improved file type filtering for Fish.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #206](https://github.com/BurntSushi/ripgrep/issues/206):
|
||||
Fixed a regression with `-g/--glob` flag in `0.2.5`.
|
||||
|
||||
|
||||
0.2.5
|
||||
=====
|
||||
Feature enhancements:
|
||||
|
||||
* Added or improved file type filtering for Groovy, Handlebars, Tcl, zsh and
|
||||
Python.
|
||||
* [FEATURE #9](https://github.com/BurntSushi/ripgrep/issues/9):
|
||||
Support global gitignore config and `.git/info/exclude` files.
|
||||
* [FEATURE #45](https://github.com/BurntSushi/ripgrep/issues/45):
|
||||
Add --ignore-file flag for specifying additional ignore files.
|
||||
* [FEATURE #202](https://github.com/BurntSushi/ripgrep/pull/202):
|
||||
Introduce a new
|
||||
[`ignore`](https://github.com/BurntSushi/ripgrep/tree/master/ignore)
|
||||
crate that encapsulates all of ripgrep's gitignore matching logic.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #44](https://github.com/BurntSushi/ripgrep/issues/44):
|
||||
ripgrep runs slowly when given lots of positional arguments that are
|
||||
directories.
|
||||
* [BUG #119](https://github.com/BurntSushi/ripgrep/issues/119):
|
||||
ripgrep didn't reset terminal colors if it was interrupted by `^C`.
|
||||
Fixed in [PR #187](https://github.com/BurntSushi/ripgrep/pull/187).
|
||||
* [BUG #184](https://github.com/BurntSushi/ripgrep/issues/184):
|
||||
Fixed a bug related to interpreting gitignore files in parent directories.
|
||||
|
||||
|
||||
0.2.4
|
||||
=====
|
||||
SKIPPED.
|
||||
|
||||
|
||||
0.2.3
|
||||
=====
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #164](https://github.com/BurntSushi/ripgrep/issues/164):
|
||||
Fixes a segfault on macos builds.
|
||||
* [BUG #167](https://github.com/BurntSushi/ripgrep/issues/167):
|
||||
Clarify documentation for --threads.
|
||||
|
||||
|
||||
0.2.2
|
||||
=====
|
||||
Packaging updates:
|
||||
|
||||
* `ripgrep` is now in homebrew-core. `brew install ripgrep` will do the trick
|
||||
on a Mac.
|
||||
* `ripgrep` is now in the Archlinux community repository.
|
||||
`pacman -S ripgrep` will do the trick on Archlinux.
|
||||
* Support has been discontinued for i686-darwin.
|
||||
* Glob matching has been moved out into its own crate:
|
||||
[`globset`](https://crates.io/crates/globset).
|
||||
|
||||
Feature enhancements:
|
||||
|
||||
* Added or improved file type filtering for CMake, config, Jinja, Markdown,
|
||||
Spark.
|
||||
* [FEATURE #109](https://github.com/BurntSushi/ripgrep/issues/109):
|
||||
Add a --max-depth flag for directory traversal.
|
||||
* [FEATURE #124](https://github.com/BurntSushi/ripgrep/issues/124):
|
||||
Add -s/--case-sensitive flag. Overrides --smart-case.
|
||||
* [FEATURE #139](https://github.com/BurntSushi/ripgrep/pull/139):
|
||||
The `ripgrep` repo is now a Homebrew tap. This is useful for installing
|
||||
SIMD accelerated binaries, which aren't available in homebrew-core.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #87](https://github.com/BurntSushi/ripgrep/issues/87),
|
||||
[BUG #127](https://github.com/BurntSushi/ripgrep/issues/127),
|
||||
[BUG #131](https://github.com/BurntSushi/ripgrep/issues/131):
|
||||
Various issues related to glob matching.
|
||||
* [BUG #116](https://github.com/BurntSushi/ripgrep/issues/116):
|
||||
--quiet should stop search after first match.
|
||||
* [BUG #121](https://github.com/BurntSushi/ripgrep/pull/121):
|
||||
--color always should show colors, even when --vimgrep is used.
|
||||
* [BUG #122](https://github.com/BurntSushi/ripgrep/pull/122):
|
||||
Colorize file path at beginning of line.
|
||||
* [BUG #134](https://github.com/BurntSushi/ripgrep/issues/134):
|
||||
Processing a large ignore file (thousands of globs) was very slow.
|
||||
* [BUG #137](https://github.com/BurntSushi/ripgrep/issues/137):
|
||||
Always follow symlinks when given as an explicit argument.
|
||||
* [BUG #147](https://github.com/BurntSushi/ripgrep/issues/147):
|
||||
Clarify documentation for --replace.
|
||||
|
||||
|
||||
0.2.1
|
||||
=====
|
||||
Feature enhancements:
|
||||
|
||||
* Added or improved file type filtering for Clojure and SystemVerilog.
|
||||
* [FEATURE #89](https://github.com/BurntSushi/ripgrep/issues/89):
|
||||
Add a --null flag that outputs a NUL byte after every file path.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #98](https://github.com/BurntSushi/ripgrep/issues/98):
|
||||
Fix a bug in single threaded mode when if opening a file failed, ripgrep
|
||||
quit instead of continuing the search.
|
||||
* [BUG #99](https://github.com/BurntSushi/ripgrep/issues/99):
|
||||
Fix another bug in single threaded mode where empty lines were being printed
|
||||
by mistake.
|
||||
* [BUG #105](https://github.com/BurntSushi/ripgrep/issues/105):
|
||||
Fix an off-by-one error with --column.
|
||||
* [BUG #106](https://github.com/BurntSushi/ripgrep/issues/106):
|
||||
Fix a bug where a whitespace only line in a gitignore file caused ripgrep
|
||||
to panic (i.e., crash).
|
||||
|
||||
|
||||
0.2.0
|
||||
=====
|
||||
Feature enhancements:
|
||||
|
170
Cargo.lock
generated
170
Cargo.lock
generated
@@ -1,24 +1,23 @@
|
||||
[root]
|
||||
name = "ripgrep"
|
||||
version = "0.2.0"
|
||||
version = "0.2.8"
|
||||
dependencies = [
|
||||
"deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"docopt 0.6.85 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bytecount 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"ctrlc 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep 0.1.3",
|
||||
"grep 0.1.4",
|
||||
"ignore 0.1.4",
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
@@ -31,20 +30,35 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "deque"
|
||||
version = "0.3.1"
|
||||
name = "bytecount"
|
||||
version = "0.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam"
|
||||
version = "0.2.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "ctrlc"
|
||||
version = "2.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "docopt"
|
||||
version = "0.6.85"
|
||||
version = "0.6.86"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
@@ -55,7 +69,7 @@ version = "0.3.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -65,28 +79,49 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "fs2"
|
||||
version = "0.2.5"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.2.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
name = "globset"
|
||||
version = "0.1.2"
|
||||
dependencies = [
|
||||
"aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep"
|
||||
version = "0.1.3"
|
||||
version = "0.1.4"
|
||||
dependencies = [
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ignore"
|
||||
version = "0.1.4"
|
||||
dependencies = [
|
||||
"crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"globset 0.1.2",
|
||||
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"walkdir 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -105,7 +140,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.16"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
@@ -118,17 +153,17 @@ name = "memchr"
|
||||
version = "0.1.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memmap"
|
||||
version = "0.2.3"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"fs2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
@@ -137,25 +172,17 @@ name = "num_cpus"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "rand"
|
||||
version = "0.3.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "0.1.77"
|
||||
version = "0.1.80"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@@ -163,7 +190,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.3.5"
|
||||
version = "0.3.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
@@ -196,7 +223,16 @@ version = "2.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread-id"
|
||||
version = "3.0.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -207,14 +243,36 @@ dependencies = [
|
||||
"thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"thread-id 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"unreachable 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unreachable"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "utf8-ranges"
|
||||
version = "0.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "void"
|
||||
version = "1.0.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "0.1.8"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@@ -233,29 +291,33 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[metadata]
|
||||
"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
|
||||
"checksum deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1614659040e711785ed8ea24219140654da1729f3ec8a47a9719d041112fe7bf"
|
||||
"checksum docopt 0.6.85 (registry+https://github.com/rust-lang/crates.io-index)" = "1b88d783674021c5570e7238e17985b9b8c7141d90f33de49031b8d56e7f0bf9"
|
||||
"checksum bytecount 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49e3c21915578e2300b08d3c174a8ac887e0c6421dff86fdc4d741dc29e5d413"
|
||||
"checksum crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0c5ea215664ca264da8a9d9c3be80d2eaf30923c259d03e870388eb927508f97"
|
||||
"checksum ctrlc 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "77f98bb69e3fefadcc5ca80a1368a55251f70295168203e01165bcaecb270891"
|
||||
"checksum docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)" = "4a7ef30445607f6fc8720f0a0a2c7442284b629cf0d049286860fae23e71c4d9"
|
||||
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
|
||||
"checksum fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc484842f1e2884faf56f529f960cc12ad8c71ce96cc7abba0a067c98fee344"
|
||||
"checksum fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "bcd414e5a1a979b931bb92f41b7a54106d3f6d2e6c253e9ce943b7cd468251ef"
|
||||
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
|
||||
"checksum fs2 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "640001e1bd865c7c32806292822445af576a6866175b5225aa2087ca5e3de551"
|
||||
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
|
||||
"checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f"
|
||||
"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d"
|
||||
"checksum libc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "044d1360593a78f5c8e5e710beccdc24ab71d1f01bc19a29bcacdba22e8475d8"
|
||||
"checksum log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ab83497bf8bf4ed2a74259c1c802351fcd67a65baa86394b6ba73c36f4838054"
|
||||
"checksum memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)" = "d8b629fb514376c675b98c1421e80b151d3817ac42d7c667717d282761418d20"
|
||||
"checksum memmap 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f20f72ed93291a72e22e8b16bb18762183bb4943f0f483da5b8be1a9e8192752"
|
||||
"checksum memmap 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "065ce59af31c18ea2c419100bda6247dd4ec3099423202b12f0bd32e529fabd2"
|
||||
"checksum num_cpus 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "8890e6084723d57d0df8d2720b0d60c6ee67d6c93e7169630e4371e88765dcad"
|
||||
"checksum rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)" = "2791d88c6defac799c3f20d74f094ca33b9332612d9aef9078519c82e4fe04a5"
|
||||
"checksum regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)" = "64b03446c466d35b42f2a8b203c8e03ed8b91c0f17b56e1f84f7210a257aa665"
|
||||
"checksum regex-syntax 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "279401017ae31cf4e15344aa3f085d0e2e5c1e70067289ef906906fdbe92c8fd"
|
||||
"checksum regex 0.1.80 (registry+https://github.com/rust-lang/crates.io-index)" = "4fd4ace6a8cf7860714a2c2280d6c1f7e6a413486c13298bbc86fd3da019402f"
|
||||
"checksum regex-syntax 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "f9ec002c35e86791825ed294b50008eea9ddfc8def4420124fbc6b08db834957"
|
||||
"checksum rustc-serialize 0.3.19 (registry+https://github.com/rust-lang/crates.io-index)" = "6159e4e6e559c81bd706afe9c8fd68f547d3e851ce12e76b1de7914bab61691b"
|
||||
"checksum simd 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "63b5847c2d766ca7ce7227672850955802fabd779ba616aeabead4c2c3877023"
|
||||
"checksum strsim 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "50c069df92e4b01425a8bf3576d5d417943a6a7272fbabaf5bd80b1aaa76442e"
|
||||
"checksum term 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "3deff8a2b3b6607d6d7cc32ac25c0b33709453ca9cceac006caac51e963cf94a"
|
||||
"checksum thread-id 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a9539db560102d1cef46b8b78ce737ff0bb64e7e18d35b2a5688f7d097d0ff03"
|
||||
"checksum thread-id 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4437c97558c70d129e40629a5b385b3fb1ffac301e63941335e4d354081ec14a"
|
||||
"checksum thread_local 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "8576dbbfcaef9641452d5cf0df9b0e7eeab7694956dd33bb61515fb8f18cfdd5"
|
||||
"checksum thread_local 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "50057ca52c629a39aed52d8eb253800cb727875fa6fc7c4b1445f0ac3b50c27c"
|
||||
"checksum unreachable 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1f2ae5ddb18e1c92664717616dd9549dde73f539f01bd7b77c2edb2446bdff91"
|
||||
"checksum utf8-ranges 0.1.3 (registry+https://github.com/rust-lang/crates.io-index)" = "a1ca13c08c41c9c3e04224ed9ff80461d97e121589ff27c753a16cb10830ae0f"
|
||||
"checksum walkdir 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "c66c0b9792f0a765345452775f3adbd28dde9d33f30d13e5dcc5ae17cf6f3780"
|
||||
"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
|
||||
"checksum walkdir 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "98da26f00240118fbb7a06fa29579d1b39d34cd6e0505ea5c125b26d5260a967"
|
||||
"checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a"
|
||||
"checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc"
|
||||
|
19
Cargo.toml
19
Cargo.toml
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "ripgrep"
|
||||
version = "0.2.0" #:version
|
||||
version = "0.2.8" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Line oriented search tool using Rust's regex library. Combines the raw
|
||||
@@ -12,6 +12,7 @@ repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
exclude = ["HomebrewFormula"]
|
||||
|
||||
[[bin]]
|
||||
bench = false
|
||||
@@ -23,31 +24,29 @@ name = "integration"
|
||||
path = "tests/tests.rs"
|
||||
|
||||
[dependencies]
|
||||
deque = "0.3"
|
||||
bytecount = "0.1.4"
|
||||
ctrlc = "2.0"
|
||||
docopt = "0.6"
|
||||
env_logger = "0.3"
|
||||
fnv = "1.0"
|
||||
grep = { version = "0.1.3", path = "grep" }
|
||||
grep = { version = "0.1.4", path = "grep" }
|
||||
ignore = { version = "0.1.4", path = "ignore" }
|
||||
lazy_static = "0.2"
|
||||
libc = "0.2"
|
||||
log = "0.3"
|
||||
memchr = "0.1"
|
||||
memmap = "0.2"
|
||||
memmap = "0.5"
|
||||
num_cpus = "1"
|
||||
regex = "0.1.77"
|
||||
rustc-serialize = "0.3"
|
||||
term = "0.4"
|
||||
walkdir = "0.1"
|
||||
|
||||
[target.'cfg(windows)'.dependencies]
|
||||
kernel32-sys = "0.2"
|
||||
winapi = "0.2"
|
||||
|
||||
[features]
|
||||
simd-accel = ["regex/simd-accel"]
|
||||
|
||||
[dev-dependencies]
|
||||
glob = "0.2"
|
||||
avx-accel = ["bytecount/avx-accel"]
|
||||
simd-accel = ["bytecount/simd-accel", "regex/simd-accel"]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
1
HomebrewFormula
Symbolic link
1
HomebrewFormula
Symbolic link
@@ -0,0 +1 @@
|
||||
pkg/brew
|
54
README.md
54
README.md
@@ -5,7 +5,7 @@ Silver Searcher (an `ack` clone) with the raw speed of GNU grep. `ripgrep` has
|
||||
first class support on Windows, Mac and Linux, with binary downloads available
|
||||
for [every release](https://github.com/BurntSushi/ripgrep/releases).
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/ripgrep)
|
||||
|
||||
@@ -30,7 +30,7 @@ for a very detailed comparison with more benchmarks and analysis.
|
||||
| ripgrep | `rg -n -w '[A-Z]+_SUSPEND'` | 450 | **0.245s** |
|
||||
| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 450 | 0.753s |
|
||||
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 0.823s |
|
||||
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 2.880s |
|
||||
| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 2.880s |
|
||||
| [sift](https://github.com/svent/sift) | `sift --git -n -w '[A-Z]+_SUSPEND'` | 450 | 3.656s |
|
||||
| [The Platinum Searcher](https://github.com/monochromegane/the_platinum_searcher) | `pt -w -e '[A-Z]+_SUSPEND'` | 450 | 12.369s |
|
||||
| [ack](http://beyondgrep.com/) | `ack -w '[A-Z]+_SUSPEND'` | 1878 | 16.952s |
|
||||
@@ -97,18 +97,53 @@ but you'll need to have the
|
||||
[Microsoft VC++ 2015 redistributable](https://www.microsoft.com/en-us/download/details.aspx?id=48145)
|
||||
installed.
|
||||
|
||||
If you're a **Homebrew** user, then you can install it with a custom formula
|
||||
(N.B. `ripgrep` isn't actually in Homebrew yet. This just installs the binary
|
||||
directly):
|
||||
If you're a **Mac OS X Homebrew** user, then you can install ripgrep either
|
||||
from homebrew-core, (compiled with rust stable, no SIMD):
|
||||
|
||||
```
|
||||
$ brew install https://raw.githubusercontent.com/BurntSushi/ripgrep/master/pkg/brew/ripgrep.rb
|
||||
$ brew install ripgrep
|
||||
```
|
||||
|
||||
or you can install a binary compiled with rust nightly (including SIMD and all
|
||||
optimizations) by utilizing a custom tap:
|
||||
|
||||
```
|
||||
$ brew tap burntsushi/ripgrep https://github.com/BurntSushi/ripgrep.git
|
||||
$ brew install burntsushi/ripgrep/ripgrep-bin
|
||||
```
|
||||
|
||||
If you're an **Arch Linux** user, then you can install `ripgrep` from the official repos:
|
||||
|
||||
```
|
||||
$ pacman -Syu ripgrep
|
||||
$ pacman -S ripgrep
|
||||
```
|
||||
|
||||
If you're a **Gentoo** user, you can install `ripgrep` from the [official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep):
|
||||
|
||||
```
|
||||
$ emerge ripgrep
|
||||
```
|
||||
|
||||
If you're a **Fedora 24+** user, you can install `ripgrep` from [copr](https://copr.fedorainfracloud.org/coprs/carlgeorge/ripgrep/):
|
||||
|
||||
```
|
||||
$ dnf copr enable carlgeorge/ripgrep
|
||||
$ dnf install ripgrep
|
||||
```
|
||||
|
||||
If you're a **RHEL/CentOS 7** user, you can install `ripgrep` from [copr](https://copr.fedorainfracloud.org/coprs/carlgeorge/ripgrep/):
|
||||
|
||||
```
|
||||
$ yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlgeorge/ripgrep/repo/epel-7/carlgeorge-ripgrep-epel-7.repo
|
||||
$ yum install ripgrep
|
||||
```
|
||||
|
||||
If you're a **Nix** user, you can install `ripgrep` from
|
||||
[nixpkgs](https://github.com/NixOS/nixpkgs/blob/master/pkgs/tools/text/ripgrep/default.nix):
|
||||
|
||||
```
|
||||
$ nix-env --install ripgrep
|
||||
$ # (Or using the attribute name, which is also `ripgrep`.)
|
||||
```
|
||||
|
||||
If you're a **Rust programmer**, `ripgrep` can be installed with `cargo`:
|
||||
@@ -214,10 +249,11 @@ $ rg -Tjs foobar
|
||||
```
|
||||
|
||||
To see a list of types supported, run `rg --type-list`. To add a new type, use
|
||||
`--type-add`:
|
||||
`--type-add`, which must be accompanied by a pattern for searching (`rg` won't
|
||||
persist your type settings):
|
||||
|
||||
```
|
||||
$ rg --type-add 'foo:*.foo,*.foobar'
|
||||
$ rg --type-add 'foo:*.{foo,foobar}' -tfoo bar
|
||||
```
|
||||
|
||||
The type `foo` will now match any file ending with the `.foo` or `.foobar`
|
||||
|
11
appveyor.yml
11
appveyor.yml
@@ -28,6 +28,9 @@ build: false
|
||||
# TODO modify this phase as you see fit
|
||||
test_script:
|
||||
- cargo test --verbose
|
||||
- cargo test --verbose --manifest-path grep/Cargo.toml
|
||||
- cargo test --verbose --manifest-path globset/Cargo.toml
|
||||
- cargo test --verbose --manifest-path ignore/Cargo.toml
|
||||
|
||||
before_deploy:
|
||||
# Generate artifacts for release
|
||||
@@ -41,7 +44,7 @@ before_deploy:
|
||||
- appveyor PushArtifact ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip
|
||||
|
||||
deploy:
|
||||
description: 'Windows release'
|
||||
description: 'Automatically deployed release'
|
||||
# All the zipped artifacts will be deployed
|
||||
artifact: /.*\.zip/
|
||||
auth_token:
|
||||
@@ -57,7 +60,9 @@ deploy:
|
||||
|
||||
branches:
|
||||
only:
|
||||
- appveyor
|
||||
- /\d+\.\d+\.\d+/
|
||||
except:
|
||||
- master
|
||||
# - appveyor
|
||||
# - /\d+\.\d+\.\d+/
|
||||
# except:
|
||||
# - master
|
||||
|
@@ -1,5 +0,0 @@
|
||||
These are internal microbenchmarks for tracking the peformance of individual
|
||||
components inside of ripgrep. At the moment, they aren't heavily used.
|
||||
|
||||
For performance benchmarks of ripgrep proper, see the sibling `benchsuite`
|
||||
directory.
|
@@ -19,6 +19,12 @@ run_test_suite() {
|
||||
cargo clean --target $TARGET --verbose
|
||||
cargo build --target $TARGET --verbose
|
||||
cargo test --target $TARGET --verbose
|
||||
cargo build --target $TARGET --verbose --manifest-path grep/Cargo.toml
|
||||
cargo test --target $TARGET --verbose --manifest-path grep/Cargo.toml
|
||||
cargo build --target $TARGET --verbose --manifest-path globset/Cargo.toml
|
||||
cargo test --target $TARGET --verbose --manifest-path globset/Cargo.toml
|
||||
cargo build --target $TARGET --verbose --manifest-path ignore/Cargo.toml
|
||||
cargo test --target $TARGET --verbose --manifest-path ignore/Cargo.toml
|
||||
|
||||
# sanity check the file type
|
||||
file target/$TARGET/debug/rg
|
||||
|
6
compile
6
compile
@@ -1,5 +1,5 @@
|
||||
#!/bin/sh
|
||||
|
||||
export RUSTFLAGS="-C target-feature=+ssse3"
|
||||
# export RUSTFLAGS="-C target-cpu=native"
|
||||
cargo build --release --features simd-accel
|
||||
# export RUSTFLAGS="-C target-feature=+ssse3"
|
||||
export RUSTFLAGS="-C target-cpu=native"
|
||||
cargo build --release --features 'simd-accel avx-accel'
|
||||
|
62
doc/rg.1
62
doc/rg.1
@@ -1,4 +1,4 @@
|
||||
.\" Automatically generated by Pandoc 1.17.2
|
||||
.\" Automatically generated by Pandoc 1.18
|
||||
.\"
|
||||
.TH "rg" "1"
|
||||
.hy
|
||||
@@ -21,8 +21,10 @@ rg [\f[I]options\f[]] \-\-help
|
||||
rg [\f[I]options\f[]] \-\-version
|
||||
.SH DESCRIPTION
|
||||
.PP
|
||||
rg (ripgrep) combines the usability of The Silver Searcher (an ack
|
||||
ripgrep (rg) combines the usability of The Silver Searcher (an ack
|
||||
clone) with the raw speed of grep.
|
||||
.PP
|
||||
Project home page: https://github.com/BurntSushi/ripgrep
|
||||
.SH COMMON OPTIONS
|
||||
.TP
|
||||
.B \-a, \-\-text
|
||||
@@ -46,6 +48,7 @@ Valid values are never, always or auto.
|
||||
Use PATTERN to search.
|
||||
This option can be provided multiple times, where all patterns given are
|
||||
searched.
|
||||
This is also useful when searching for patterns that start with a dash.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
@@ -70,6 +73,7 @@ Show this usage message.
|
||||
.TP
|
||||
.B \-i, \-\-ignore\-case
|
||||
Case insensitive search.
|
||||
Overridden by \-\-case\-sensitive.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
@@ -90,12 +94,6 @@ If a match is found in a file, stop searching that file.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-r, \-\-replace \f[I]ARG\f[]
|
||||
Replace every match with the string given.
|
||||
Capture group indices (e.g., $5) and names (e.g., $foo) are supported.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-t, \-\-type \f[I]TYPE\f[] ...
|
||||
Only search files matching TYPE.
|
||||
Multiple type flags may be provided.
|
||||
@@ -209,6 +207,17 @@ Follow symlinks.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-m, \-\-max\-count NUM
|
||||
Limit the number of matching lines per file searched to NUM.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-maxdepth \f[I]NUM\f[]
|
||||
Descend at most NUM directories below the command line arguments.
|
||||
A value of zero searches only the starting\-points themselves.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-mmap
|
||||
Search using memory maps when possible.
|
||||
This is enabled by default when ripgrep thinks it will be faster.
|
||||
@@ -217,6 +226,11 @@ context related options.)
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-no\-messages
|
||||
Suppress all error messages.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-no\-mmap
|
||||
Never use memory maps, even when they might be faster.
|
||||
.RS
|
||||
@@ -239,20 +253,44 @@ Note that .ignore files will continue to be respected.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-null
|
||||
Whenever a file name is printed, follow it with a NUL byte.
|
||||
This includes printing filenames before matches, and when printing a
|
||||
list of matching files such as with \-\-count, \-\-files\-with\-matches
|
||||
and \-\-files.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-p, \-\-pretty
|
||||
Alias for \-\-color=always \-\-heading \-n.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-r, \-\-replace \f[I]ARG\f[]
|
||||
Replace every match with the string given when printing search results.
|
||||
Neither this flag nor any other flag will modify your files.
|
||||
.RS
|
||||
.PP
|
||||
Capture group indices (e.g., $5) and names (e.g., $foo) are supported in
|
||||
the replacement string.
|
||||
.RE
|
||||
.TP
|
||||
.B \-s, \-\-case\-sensitive
|
||||
Search case sensitively.
|
||||
This overrides \-\-ignore\-case and \-\-smart\-case.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-S, \-\-smart\-case
|
||||
Search case insensitively if the pattern is all lowercase.
|
||||
Search case sensitively otherwise.
|
||||
This is overridden by either \-\-case\-sensitive or \-\-ignore\-case.
|
||||
.RS
|
||||
.RE
|
||||
.TP
|
||||
.B \-j, \-\-threads \f[I]ARG\f[]
|
||||
The number of threads to use.
|
||||
Defaults to the number of logical CPUs (capped at 6).
|
||||
0 means use the number of logical CPUs (capped at 6).
|
||||
[default: 0]
|
||||
.RS
|
||||
.RE
|
||||
@@ -283,10 +321,12 @@ Multiple \-\-type\-add flags can be provided.
|
||||
Unless \-\-type\-clear is used, globs are added to any existing globs
|
||||
inside of ripgrep.
|
||||
Note that this must be passed to every invocation of rg.
|
||||
Type settings are NOT persisted.
|
||||
.RS
|
||||
.RE
|
||||
.PP
|
||||
Example: \f[C]\-\-type\-add\ html:*.html\f[]
|
||||
Example:
|
||||
\f[C]rg\ \-\-type\-add\ \[aq]foo:*.foo\[aq]\ \-tfoo\ PATTERN\f[]
|
||||
.RE
|
||||
.TP
|
||||
.B \-\-type\-clear \f[I]TYPE\f[] ...
|
||||
Clear the file type globs previously defined for TYPE.
|
||||
|
49
doc/rg.1.md
49
doc/rg.1.md
@@ -18,9 +18,11 @@ rg [*options*] --version
|
||||
|
||||
# DESCRIPTION
|
||||
|
||||
rg (ripgrep) combines the usability of The Silver Searcher (an ack clone) with
|
||||
ripgrep (rg) combines the usability of The Silver Searcher (an ack clone) with
|
||||
the raw speed of grep.
|
||||
|
||||
Project home page: https://github.com/BurntSushi/ripgrep
|
||||
|
||||
# COMMON OPTIONS
|
||||
|
||||
-a, --text
|
||||
@@ -35,7 +37,8 @@ the raw speed of grep.
|
||||
|
||||
-e, --regexp *PATTERN* ...
|
||||
: Use PATTERN to search. This option can be provided multiple times, where all
|
||||
patterns given are searched.
|
||||
patterns given are searched. This is also useful when searching for patterns
|
||||
that start with a dash.
|
||||
|
||||
-F, --fixed-strings
|
||||
: Treat the pattern as a literal string instead of a regular expression.
|
||||
@@ -49,7 +52,7 @@ the raw speed of grep.
|
||||
: Show this usage message.
|
||||
|
||||
-i, --ignore-case
|
||||
: Case insensitive search.
|
||||
: Case insensitive search. Overridden by --case-sensitive.
|
||||
|
||||
-n, --line-number
|
||||
: Show line numbers (1-based). This is enabled by default at a tty.
|
||||
@@ -61,10 +64,6 @@ the raw speed of grep.
|
||||
: Do not print anything to stdout. If a match is found in a file, stop
|
||||
searching that file.
|
||||
|
||||
-r, --replace *ARG*
|
||||
: Replace every match with the string given. Capture group indices (e.g., $5)
|
||||
and names (e.g., $foo) are supported.
|
||||
|
||||
-t, --type *TYPE* ...
|
||||
: Only search files matching TYPE. Multiple type flags may be provided. Use the
|
||||
--type-list flag to list all available types.
|
||||
@@ -136,11 +135,21 @@ the raw speed of grep.
|
||||
-L, --follow
|
||||
: Follow symlinks.
|
||||
|
||||
-m, --max-count NUM
|
||||
: Limit the number of matching lines per file searched to NUM.
|
||||
|
||||
--maxdepth *NUM*
|
||||
: Descend at most NUM directories below the command line arguments.
|
||||
A value of zero searches only the starting-points themselves.
|
||||
|
||||
--mmap
|
||||
: Search using memory maps when possible. This is enabled by default
|
||||
when ripgrep thinks it will be faster. (Note that mmap searching
|
||||
doesn't currently support the various context related options.)
|
||||
|
||||
--no-messages
|
||||
: Suppress all error messages.
|
||||
|
||||
--no-mmap
|
||||
: Never use memory maps, even when they might be faster.
|
||||
|
||||
@@ -155,15 +164,32 @@ the raw speed of grep.
|
||||
: Don't respect version control ignore files (e.g., .gitignore).
|
||||
Note that .ignore files will continue to be respected.
|
||||
|
||||
--null
|
||||
: Whenever a file name is printed, follow it with a NUL byte.
|
||||
This includes printing filenames before matches, and when printing
|
||||
a list of matching files such as with --count, --files-with-matches
|
||||
and --files.
|
||||
|
||||
-p, --pretty
|
||||
: Alias for --color=always --heading -n.
|
||||
|
||||
-r, --replace *ARG*
|
||||
: Replace every match with the string given when printing search results.
|
||||
Neither this flag nor any other flag will modify your files.
|
||||
|
||||
Capture group indices (e.g., $5) and names (e.g., $foo) are supported
|
||||
in the replacement string.
|
||||
|
||||
-s, --case-sensitive
|
||||
: Search case sensitively. This overrides --ignore-case and --smart-case.
|
||||
|
||||
-S, --smart-case
|
||||
: Search case insensitively if the pattern is all lowercase.
|
||||
Search case sensitively otherwise.
|
||||
Search case sensitively otherwise. This is overridden by either
|
||||
--case-sensitive or --ignore-case.
|
||||
|
||||
-j, --threads *ARG*
|
||||
: The number of threads to use. Defaults to the number of logical CPUs
|
||||
: The number of threads to use. 0 means use the number of logical CPUs
|
||||
(capped at 6). [default: 0]
|
||||
|
||||
--version
|
||||
@@ -183,9 +209,10 @@ the raw speed of grep.
|
||||
: Add a new glob for a particular file type. Only one glob can be added
|
||||
at a time. Multiple --type-add flags can be provided. Unless --type-clear
|
||||
is used, globs are added to any existing globs inside of ripgrep. Note that
|
||||
this must be passed to every invocation of rg.
|
||||
this must be passed to every invocation of rg. Type settings are NOT
|
||||
persisted.
|
||||
|
||||
Example: `--type-add html:*.html`
|
||||
Example: `rg --type-add 'foo:*.foo' -tfoo PATTERN`
|
||||
|
||||
--type-clear *TYPE* ...
|
||||
: Clear the file type globs previously defined for TYPE. This only clears
|
||||
|
33
globset/Cargo.toml
Normal file
33
globset/Cargo.toml
Normal file
@@ -0,0 +1,33 @@
|
||||
[package]
|
||||
name = "globset"
|
||||
version = "0.1.2" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Cross platform single glob and glob set matching. Glob set matching is the
|
||||
process of matching one or more glob patterns against a single candidate path
|
||||
simultaneously, and returning all of the globs that matched.
|
||||
"""
|
||||
documentation = "https://docs.rs/globset"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/globset"
|
||||
repository = "https://github.com/BurntSushi/ripgrep/tree/master/globset"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "glob", "multiple", "set", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[lib]
|
||||
name = "globset"
|
||||
bench = false
|
||||
|
||||
[dependencies]
|
||||
aho-corasick = "0.5.3"
|
||||
fnv = "1.0"
|
||||
lazy_static = "0.2"
|
||||
log = "0.3"
|
||||
memchr = "0.1"
|
||||
regex = "0.1.77"
|
||||
|
||||
[dev-dependencies]
|
||||
glob = "0.2"
|
||||
|
||||
[features]
|
||||
simd-accel = ["regex/simd-accel"]
|
122
globset/README.md
Normal file
122
globset/README.md
Normal file
@@ -0,0 +1,122 @@
|
||||
globset
|
||||
=======
|
||||
Cross platform single glob and glob set matching. Glob set matching is the
|
||||
process of matching one or more glob patterns against a single candidate path
|
||||
simultaneously, and returning all of the globs that matched.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/globset)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/globset](https://docs.rs/globset)
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
globset = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate globset;
|
||||
```
|
||||
|
||||
### Example: one glob
|
||||
|
||||
This example shows how to match a single glob against a single file path.
|
||||
|
||||
```rust
|
||||
use globset::Glob;
|
||||
|
||||
let glob = try!(Glob::new("*.rs")).compile_matcher();
|
||||
|
||||
assert!(glob.is_match("foo.rs"));
|
||||
assert!(glob.is_match("foo/bar.rs"));
|
||||
assert!(!glob.is_match("Cargo.toml"));
|
||||
```
|
||||
|
||||
### Example: configuring a glob matcher
|
||||
|
||||
This example shows how to use a `GlobBuilder` to configure aspects of match
|
||||
semantics. In this example, we prevent wildcards from matching path separators.
|
||||
|
||||
```rust
|
||||
use globset::GlobBuilder;
|
||||
|
||||
let glob = try!(GlobBuilder::new("*.rs")
|
||||
.literal_separator(true).build()).compile_matcher();
|
||||
|
||||
assert!(glob.is_match("foo.rs"));
|
||||
assert!(!glob.is_match("foo/bar.rs")); // no longer matches
|
||||
assert!(!glob.is_match("Cargo.toml"));
|
||||
```
|
||||
|
||||
### Example: match multiple globs at once
|
||||
|
||||
This example shows how to match multiple glob patterns at once.
|
||||
|
||||
```rust
|
||||
use globset::{Glob, GlobSetBuilder};
|
||||
|
||||
let mut builder = GlobSetBuilder::new();
|
||||
// A GlobBuilder can be used to configure each glob's match semantics
|
||||
// independently.
|
||||
builder.add(try!(Glob::new("*.rs")));
|
||||
builder.add(try!(Glob::new("src/lib.rs")));
|
||||
builder.add(try!(Glob::new("src/**/foo.rs")));
|
||||
let set = try!(builder.build());
|
||||
|
||||
assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
|
||||
```
|
||||
|
||||
### Performance
|
||||
|
||||
This crate implements globs by converting them to regular expressions, and
|
||||
executing them with the
|
||||
[`regex`](https://github.com/rust-lang-nursery/regex)
|
||||
crate.
|
||||
|
||||
For single glob matching, performance of this crate should be roughly on par
|
||||
with the performance of the
|
||||
[`glob`](https://github.com/rust-lang-nursery/glob)
|
||||
crate. (`*_regex` correspond to benchmarks for this library while `*_glob`
|
||||
correspond to benchmarks for the `glob` library.)
|
||||
Optimizations in the `regex` crate may propel this library past `glob`,
|
||||
particularly when matching longer paths.
|
||||
|
||||
```
|
||||
test ext_glob ... bench: 425 ns/iter (+/- 21)
|
||||
test ext_regex ... bench: 175 ns/iter (+/- 10)
|
||||
test long_glob ... bench: 182 ns/iter (+/- 11)
|
||||
test long_regex ... bench: 173 ns/iter (+/- 10)
|
||||
test short_glob ... bench: 69 ns/iter (+/- 4)
|
||||
test short_regex ... bench: 83 ns/iter (+/- 2)
|
||||
```
|
||||
|
||||
The primary performance advantage of this crate is when matching multiple
|
||||
globs against a single path. With the `glob` crate, one must match each glob
|
||||
synchronously, one after the other. In this crate, many can be matched
|
||||
simultaneously. For example:
|
||||
|
||||
```
|
||||
test many_short_glob ... bench: 1,063 ns/iter (+/- 47)
|
||||
test many_short_regex_set ... bench: 186 ns/iter (+/- 11)
|
||||
```
|
||||
|
||||
### Comparison with the [`glob`](https://github.com/rust-lang-nursery/glob) crate
|
||||
|
||||
* Supports alternate "or" globs, e.g., `*.{foo,bar}`.
|
||||
* Can match non-UTF-8 file paths correctly.
|
||||
* Supports matching multiple globs at once.
|
||||
* Doesn't provide a recursive directory iterator of matching file paths,
|
||||
although I believe this crate should grow one eventually.
|
||||
* Supports case insensitive and require-literal-separator match options, but
|
||||
**doesn't** support the require-literal-leading-dot option.
|
@@ -5,37 +5,53 @@ tool itself, see the benchsuite directory.
|
||||
#![feature(test)]
|
||||
|
||||
extern crate glob;
|
||||
extern crate globset;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
extern crate regex;
|
||||
extern crate test;
|
||||
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
use globset::{Candidate, Glob, GlobMatcher, GlobSet, GlobSetBuilder};
|
||||
|
||||
const EXT: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";
|
||||
const EXT_PAT: &'static str = "*.txt";
|
||||
|
||||
const SHORT: &'static str = "some/needle.txt";
|
||||
const SHORT_PAT: &'static str = "some/**/needle.txt";
|
||||
|
||||
const LONG: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";
|
||||
const LONG_PAT: &'static str = "some/**/needle.txt";
|
||||
|
||||
#[allow(dead_code, unused_variables)]
|
||||
#[path = "../src/glob.rs"]
|
||||
mod reglob;
|
||||
|
||||
fn new_glob(pat: &str) -> glob::Pattern {
|
||||
glob::Pattern::new(pat).unwrap()
|
||||
}
|
||||
|
||||
fn new_reglob(pat: &str) -> reglob::Set {
|
||||
let mut builder = reglob::SetBuilder::new();
|
||||
builder.add(pat).unwrap();
|
||||
fn new_reglob(pat: &str) -> GlobMatcher {
|
||||
Glob::new(pat).unwrap().compile_matcher()
|
||||
}
|
||||
|
||||
fn new_reglob_many(pats: &[&str]) -> GlobSet {
|
||||
let mut builder = GlobSetBuilder::new();
|
||||
for pat in pats {
|
||||
builder.add(Glob::new(pat).unwrap());
|
||||
}
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
fn new_reglob_many(pats: &[&str]) -> reglob::Set {
|
||||
let mut builder = reglob::SetBuilder::new();
|
||||
for pat in pats {
|
||||
builder.add(pat).unwrap();
|
||||
}
|
||||
builder.build().unwrap()
|
||||
#[bench]
|
||||
fn ext_glob(b: &mut test::Bencher) {
|
||||
let pat = new_glob(EXT_PAT);
|
||||
b.iter(|| assert!(pat.matches(EXT)));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
fn ext_regex(b: &mut test::Bencher) {
|
||||
let set = new_reglob(EXT_PAT);
|
||||
let cand = Candidate::new(EXT);
|
||||
b.iter(|| assert!(set.is_match_candidate(&cand)));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
@@ -47,7 +63,8 @@ fn short_glob(b: &mut test::Bencher) {
|
||||
#[bench]
|
||||
fn short_regex(b: &mut test::Bencher) {
|
||||
let set = new_reglob(SHORT_PAT);
|
||||
b.iter(|| assert!(set.is_match(SHORT)));
|
||||
let cand = Candidate::new(SHORT);
|
||||
b.iter(|| assert!(set.is_match_candidate(&cand)));
|
||||
}
|
||||
|
||||
#[bench]
|
||||
@@ -59,7 +76,8 @@ fn long_glob(b: &mut test::Bencher) {
|
||||
#[bench]
|
||||
fn long_regex(b: &mut test::Bencher) {
|
||||
let set = new_reglob(LONG_PAT);
|
||||
b.iter(|| assert!(set.is_match(LONG)));
|
||||
let cand = Candidate::new(LONG);
|
||||
b.iter(|| assert!(set.is_match_candidate(&cand)));
|
||||
}
|
||||
|
||||
const MANY_SHORT_GLOBS: &'static [&'static str] = &[
|
||||
@@ -101,26 +119,3 @@ fn many_short_regex_set(b: &mut test::Bencher) {
|
||||
let set = new_reglob_many(MANY_SHORT_GLOBS);
|
||||
b.iter(|| assert_eq!(2, set.matches(MANY_SHORT_SEARCH).iter().count()));
|
||||
}
|
||||
|
||||
// This is the fastest on my system (beating many_glob by about 2x). This
|
||||
// suggests that a RegexSet needs quite a few regexes (or a larger haystack)
|
||||
// in order for it to scale.
|
||||
//
|
||||
// TODO(burntsushi): come up with a benchmark that uses more complex patterns
|
||||
// or a longer haystack.
|
||||
#[bench]
|
||||
fn many_short_regex_pattern(b: &mut test::Bencher) {
|
||||
let pats: Vec<_> = MANY_SHORT_GLOBS.iter().map(|&s| {
|
||||
let pat = reglob::Pattern::new(s).unwrap();
|
||||
regex::Regex::new(&pat.to_regex()).unwrap()
|
||||
}).collect();
|
||||
b.iter(|| {
|
||||
let mut count = 0;
|
||||
for pat in &pats {
|
||||
if pat.is_match(MANY_SHORT_SEARCH) {
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
assert_eq!(2, count);
|
||||
})
|
||||
}
|
1300
globset/src/glob.rs
Normal file
1300
globset/src/glob.rs
Normal file
File diff suppressed because it is too large
Load Diff
791
globset/src/lib.rs
Normal file
791
globset/src/lib.rs
Normal file
@@ -0,0 +1,791 @@
|
||||
/*!
|
||||
The globset crate provides cross platform single glob and glob set matching.
|
||||
|
||||
Glob set matching is the process of matching one or more glob patterns against
|
||||
a single candidate path simultaneously, and returning all of the globs that
|
||||
matched. For example, given this set of globs:
|
||||
|
||||
```ignore
|
||||
*.rs
|
||||
src/lib.rs
|
||||
src/**/foo.rs
|
||||
```
|
||||
|
||||
and a path `src/bar/baz/foo.rs`, then the set would report the first and third
|
||||
globs as matching.
|
||||
|
||||
Single glob matching is also provided and is done by converting globs to
|
||||
|
||||
# Example: one glob
|
||||
|
||||
This example shows how to match a single glob against a single file path.
|
||||
|
||||
```
|
||||
# fn example() -> Result<(), globset::Error> {
|
||||
use globset::Glob;
|
||||
|
||||
let glob = try!(Glob::new("*.rs")).compile_matcher();
|
||||
|
||||
assert!(glob.is_match("foo.rs"));
|
||||
assert!(glob.is_match("foo/bar.rs"));
|
||||
assert!(!glob.is_match("Cargo.toml"));
|
||||
# Ok(()) } example().unwrap();
|
||||
```
|
||||
|
||||
# Example: configuring a glob matcher
|
||||
|
||||
This example shows how to use a `GlobBuilder` to configure aspects of match
|
||||
semantics. In this example, we prevent wildcards from matching path separators.
|
||||
|
||||
```
|
||||
# fn example() -> Result<(), globset::Error> {
|
||||
use globset::GlobBuilder;
|
||||
|
||||
let glob = try!(GlobBuilder::new("*.rs")
|
||||
.literal_separator(true).build()).compile_matcher();
|
||||
|
||||
assert!(glob.is_match("foo.rs"));
|
||||
assert!(!glob.is_match("foo/bar.rs")); // no longer matches
|
||||
assert!(!glob.is_match("Cargo.toml"));
|
||||
# Ok(()) } example().unwrap();
|
||||
```
|
||||
|
||||
# Example: match multiple globs at once
|
||||
|
||||
This example shows how to match multiple glob patterns at once.
|
||||
|
||||
```
|
||||
# fn example() -> Result<(), globset::Error> {
|
||||
use globset::{Glob, GlobSetBuilder};
|
||||
|
||||
let mut builder = GlobSetBuilder::new();
|
||||
// A GlobBuilder can be used to configure each glob's match semantics
|
||||
// independently.
|
||||
builder.add(try!(Glob::new("*.rs")));
|
||||
builder.add(try!(Glob::new("src/lib.rs")));
|
||||
builder.add(try!(Glob::new("src/**/foo.rs")));
|
||||
let set = try!(builder.build());
|
||||
|
||||
assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
|
||||
# Ok(()) } example().unwrap();
|
||||
```
|
||||
|
||||
# Syntax
|
||||
|
||||
Standard Unix-style glob syntax is supported:
|
||||
|
||||
* `?` matches any single character. (If the `literal_separator` option is
|
||||
enabled, then `?` can never match a path separator.)
|
||||
* `*` matches zero or more characters. (If the `literal_separator` option is
|
||||
enabled, then `*` can never match a path separator.)
|
||||
* `**` recursively matches directories but are only legal in three situations.
|
||||
First, if the glob starts with <code>\*\*/</code>, then it matches
|
||||
all directories. For example, <code>\*\*/foo</code> matches `foo`
|
||||
and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with
|
||||
<code>/\*\*</code>, then it matches all sub-entries. For example,
|
||||
<code>foo/\*\*</code> matches `foo/a` and `foo/a/b`, but not `foo`.
|
||||
Thirdly, if the glob contains <code>/\*\*/</code> anywhere within
|
||||
the pattern, then it matches zero or more directories. Using `**` anywhere
|
||||
else is illegal (N.B. the glob `**` is allowed and means "match everything").
|
||||
* `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns.
|
||||
(N.B. Nesting `{...}` is not currently allowed.)
|
||||
* `[ab]` matches `a` or `b` where `a` and `b` are characters. Use
|
||||
`[!ab]` to match any character except for `a` and `b`.
|
||||
* Metacharacters such as `*` and `?` can be escaped with character class
|
||||
notation. e.g., `[*]` matches `*`.
|
||||
|
||||
A `GlobBuilder` can be used to prevent wildcards from matching path separators,
|
||||
or to enable case insensitive matching.
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate aho_corasick;
|
||||
extern crate fnv;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate regex;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::error::Error as StdError;
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::fmt;
|
||||
use std::hash;
|
||||
use std::path::Path;
|
||||
use std::str;
|
||||
|
||||
use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
|
||||
use regex::bytes::{Regex, RegexBuilder, RegexSet};
|
||||
|
||||
use pathutil::{
|
||||
file_name, file_name_ext, normalize_path, os_str_bytes, path_bytes,
|
||||
};
|
||||
use glob::MatchStrategy;
|
||||
pub use glob::{Glob, GlobBuilder, GlobMatcher};
|
||||
|
||||
mod glob;
|
||||
mod pathutil;
|
||||
|
||||
/// Represents an error that can occur when parsing a glob pattern.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum Error {
|
||||
/// Occurs when a use of `**` is invalid. Namely, `**` can only appear
|
||||
/// adjacent to a path separator, or the beginning/end of a glob.
|
||||
InvalidRecursive,
|
||||
/// Occurs when a character class (e.g., `[abc]`) is not closed.
|
||||
UnclosedClass,
|
||||
/// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For
|
||||
/// example, if the range starts with a lexicographically larger character
|
||||
/// than it ends with.
|
||||
InvalidRange(char, char),
|
||||
/// Occurs when a `}` is found without a matching `{`.
|
||||
UnopenedAlternates,
|
||||
/// Occurs when a `{` is found without a matching `}`.
|
||||
UnclosedAlternates,
|
||||
/// Occurs when an alternating group is nested inside another alternating
|
||||
/// group, e.g., `{{a,b},{c,d}}`.
|
||||
NestedAlternates,
|
||||
/// An error associated with parsing or compiling a regex.
|
||||
Regex(String),
|
||||
}
|
||||
|
||||
impl StdError for Error {
|
||||
fn description(&self) -> &str {
|
||||
match *self {
|
||||
Error::InvalidRecursive => {
|
||||
"invalid use of **; must be one path component"
|
||||
}
|
||||
Error::UnclosedClass => {
|
||||
"unclosed character class; missing ']'"
|
||||
}
|
||||
Error::InvalidRange(_, _) => {
|
||||
"invalid character range"
|
||||
}
|
||||
Error::UnopenedAlternates => {
|
||||
"unopened alternate group; missing '{' \
|
||||
(maybe escape '}' with '[}]'?)"
|
||||
}
|
||||
Error::UnclosedAlternates => {
|
||||
"unclosed alternate group; missing '}' \
|
||||
(maybe escape '{' with '[{]'?)"
|
||||
}
|
||||
Error::NestedAlternates => {
|
||||
"nested alternate groups are not allowed"
|
||||
}
|
||||
Error::Regex(ref err) => err,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Error::InvalidRecursive
|
||||
| Error::UnclosedClass
|
||||
| Error::UnopenedAlternates
|
||||
| Error::UnclosedAlternates
|
||||
| Error::NestedAlternates
|
||||
| Error::Regex(_) => {
|
||||
write!(f, "{}", self.description())
|
||||
}
|
||||
Error::InvalidRange(s, e) => {
|
||||
write!(f, "invalid range; '{}' > '{}'", s, e)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn new_regex(pat: &str) -> Result<Regex, Error> {
|
||||
RegexBuilder::new(pat)
|
||||
.dot_matches_new_line(true)
|
||||
.size_limit(10 * (1 << 20))
|
||||
.dfa_size_limit(10 * (1 << 20))
|
||||
.compile()
|
||||
.map_err(|err| Error::Regex(err.to_string()))
|
||||
}
|
||||
|
||||
fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
|
||||
where S: AsRef<str>, I: IntoIterator<Item=S> {
|
||||
RegexSet::new(pats).map_err(|err| Error::Regex(err.to_string()))
|
||||
}
|
||||
|
||||
type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
|
||||
|
||||
/// GlobSet represents a group of globs that can be matched together in a
|
||||
/// single pass.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct GlobSet {
|
||||
len: usize,
|
||||
strats: Vec<GlobSetMatchStrategy>,
|
||||
}
|
||||
|
||||
impl GlobSet {
|
||||
/// Returns true if this set is empty, and therefore matches nothing.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len == 0
|
||||
}
|
||||
|
||||
/// Returns the number of globs in this set.
|
||||
pub fn len(&self) -> usize {
|
||||
self.len
|
||||
}
|
||||
|
||||
/// Returns true if any glob in this set matches the path given.
|
||||
pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
|
||||
self.is_match_candidate(&Candidate::new(path.as_ref()))
|
||||
}
|
||||
|
||||
/// Returns true if any glob in this set matches the path given.
|
||||
///
|
||||
/// This takes a Candidate as input, which can be used to amortize the
|
||||
/// cost of preparing a path for matching.
|
||||
pub fn is_match_candidate(&self, path: &Candidate) -> bool {
|
||||
if self.is_empty() {
|
||||
return false;
|
||||
}
|
||||
for strat in &self.strats {
|
||||
if strat.is_match(path) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Returns the sequence number of every glob pattern that matches the
|
||||
/// given path.
|
||||
pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
|
||||
self.matches_candidate(&Candidate::new(path.as_ref()))
|
||||
}
|
||||
|
||||
/// Returns the sequence number of every glob pattern that matches the
|
||||
/// given path.
|
||||
///
|
||||
/// This takes a Candidate as input, which can be used to amortize the
|
||||
/// cost of preparing a path for matching.
|
||||
pub fn matches_candidate(&self, path: &Candidate) -> Vec<usize> {
|
||||
let mut into = vec![];
|
||||
if self.is_empty() {
|
||||
return into;
|
||||
}
|
||||
self.matches_candidate_into(path, &mut into);
|
||||
into
|
||||
}
|
||||
|
||||
/// Adds the sequence number of every glob pattern that matches the given
|
||||
/// path to the vec given.
|
||||
///
|
||||
/// `into` is is cleared before matching begins, and contains the set of
|
||||
/// sequence numbers (in ascending order) after matching ends. If no globs
|
||||
/// were matched, then `into` will be empty.
|
||||
pub fn matches_into<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
into: &mut Vec<usize>,
|
||||
) {
|
||||
self.matches_candidate_into(&Candidate::new(path.as_ref()), into);
|
||||
}
|
||||
|
||||
/// Adds the sequence number of every glob pattern that matches the given
|
||||
/// path to the vec given.
|
||||
///
|
||||
/// `into` is is cleared before matching begins, and contains the set of
|
||||
/// sequence numbers (in ascending order) after matching ends. If no globs
|
||||
/// were matched, then `into` will be empty.
|
||||
///
|
||||
/// This takes a Candidate as input, which can be used to amortize the
|
||||
/// cost of preparing a path for matching.
|
||||
pub fn matches_candidate_into(
|
||||
&self,
|
||||
path: &Candidate,
|
||||
into: &mut Vec<usize>,
|
||||
) {
|
||||
into.clear();
|
||||
if self.is_empty() {
|
||||
return;
|
||||
}
|
||||
for strat in &self.strats {
|
||||
strat.matches_into(path, into);
|
||||
}
|
||||
into.sort();
|
||||
into.dedup();
|
||||
}
|
||||
|
||||
fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
|
||||
if pats.is_empty() {
|
||||
return Ok(GlobSet { len: 0, strats: vec![] });
|
||||
}
|
||||
let mut lits = LiteralStrategy::new();
|
||||
let mut base_lits = BasenameLiteralStrategy::new();
|
||||
let mut exts = ExtensionStrategy::new();
|
||||
let mut prefixes = MultiStrategyBuilder::new();
|
||||
let mut suffixes = MultiStrategyBuilder::new();
|
||||
let mut required_exts = RequiredExtensionStrategyBuilder::new();
|
||||
let mut regexes = MultiStrategyBuilder::new();
|
||||
for (i, p) in pats.iter().enumerate() {
|
||||
match MatchStrategy::new(p) {
|
||||
MatchStrategy::Literal(lit) => {
|
||||
lits.add(i, lit);
|
||||
}
|
||||
MatchStrategy::BasenameLiteral(lit) => {
|
||||
base_lits.add(i, lit);
|
||||
}
|
||||
MatchStrategy::Extension(ext) => {
|
||||
exts.add(i, ext);
|
||||
}
|
||||
MatchStrategy::Prefix(prefix) => {
|
||||
prefixes.add(i, prefix);
|
||||
}
|
||||
MatchStrategy::Suffix { suffix, component } => {
|
||||
if component {
|
||||
lits.add(i, suffix[1..].to_string());
|
||||
}
|
||||
suffixes.add(i, suffix);
|
||||
}
|
||||
MatchStrategy::RequiredExtension(ext) => {
|
||||
required_exts.add(i, ext, p.regex().to_owned());
|
||||
}
|
||||
MatchStrategy::Regex => {
|
||||
debug!("glob converted to regex: {:?}", p);
|
||||
regexes.add(i, p.regex().to_owned());
|
||||
}
|
||||
}
|
||||
}
|
||||
debug!("built glob set; {} literals, {} basenames, {} extensions, \
|
||||
{} prefixes, {} suffixes, {} required extensions, {} regexes",
|
||||
lits.0.len(), base_lits.0.len(), exts.0.len(),
|
||||
prefixes.literals.len(), suffixes.literals.len(),
|
||||
required_exts.0.len(), regexes.literals.len());
|
||||
Ok(GlobSet {
|
||||
len: pats.len(),
|
||||
strats: vec![
|
||||
GlobSetMatchStrategy::Extension(exts),
|
||||
GlobSetMatchStrategy::BasenameLiteral(base_lits),
|
||||
GlobSetMatchStrategy::Literal(lits),
|
||||
GlobSetMatchStrategy::Suffix(suffixes.suffix()),
|
||||
GlobSetMatchStrategy::Prefix(prefixes.prefix()),
|
||||
GlobSetMatchStrategy::RequiredExtension(
|
||||
try!(required_exts.build())),
|
||||
GlobSetMatchStrategy::Regex(try!(regexes.regex_set())),
|
||||
],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
/// GlobSetBuilder builds a group of patterns that can be used to
|
||||
/// simultaneously match a file path.
|
||||
pub struct GlobSetBuilder {
|
||||
pats: Vec<Glob>,
|
||||
}
|
||||
|
||||
impl GlobSetBuilder {
|
||||
/// Create a new GlobSetBuilder. A GlobSetBuilder can be used to add new
|
||||
/// patterns. Once all patterns have been added, `build` should be called
|
||||
/// to produce a `GlobSet`, which can then be used for matching.
|
||||
pub fn new() -> GlobSetBuilder {
|
||||
GlobSetBuilder { pats: vec![] }
|
||||
}
|
||||
|
||||
/// Builds a new matcher from all of the glob patterns added so far.
|
||||
///
|
||||
/// Once a matcher is built, no new patterns can be added to it.
|
||||
pub fn build(&self) -> Result<GlobSet, Error> {
|
||||
GlobSet::new(&self.pats)
|
||||
}
|
||||
|
||||
/// Add a new pattern to this set.
|
||||
#[allow(dead_code)]
|
||||
pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
|
||||
self.pats.push(pat);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A candidate path for matching.
|
||||
///
|
||||
/// All glob matching in this crate operates on `Candidate` values.
|
||||
/// Constructing candidates has a very small cost associated with it, so
|
||||
/// callers may find it beneficial to amortize that cost when matching a single
|
||||
/// path against multiple globs or sets of globs.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Candidate<'a> {
|
||||
path: Cow<'a, [u8]>,
|
||||
basename: Cow<'a, [u8]>,
|
||||
ext: &'a OsStr,
|
||||
}
|
||||
|
||||
impl<'a> Candidate<'a> {
|
||||
/// Create a new candidate for matching from the given path.
|
||||
pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
|
||||
let path = path.as_ref();
|
||||
let basename = file_name(path).unwrap_or(OsStr::new(""));
|
||||
Candidate {
|
||||
path: normalize_path(path_bytes(path)),
|
||||
basename: os_str_bytes(basename),
|
||||
ext: file_name_ext(basename).unwrap_or(OsStr::new("")),
|
||||
}
|
||||
}
|
||||
|
||||
fn path_prefix(&self, max: usize) -> &[u8] {
|
||||
if self.path.len() <= max {
|
||||
&*self.path
|
||||
} else {
|
||||
&self.path[..max]
|
||||
}
|
||||
}
|
||||
|
||||
fn path_suffix(&self, max: usize) -> &[u8] {
|
||||
if self.path.len() <= max {
|
||||
&*self.path
|
||||
} else {
|
||||
&self.path[self.path.len() - max..]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum GlobSetMatchStrategy {
|
||||
Literal(LiteralStrategy),
|
||||
BasenameLiteral(BasenameLiteralStrategy),
|
||||
Extension(ExtensionStrategy),
|
||||
Prefix(PrefixStrategy),
|
||||
Suffix(SuffixStrategy),
|
||||
RequiredExtension(RequiredExtensionStrategy),
|
||||
Regex(RegexSetStrategy),
|
||||
}
|
||||
|
||||
impl GlobSetMatchStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
use self::GlobSetMatchStrategy::*;
|
||||
match *self {
|
||||
Literal(ref s) => s.is_match(candidate),
|
||||
BasenameLiteral(ref s) => s.is_match(candidate),
|
||||
Extension(ref s) => s.is_match(candidate),
|
||||
Prefix(ref s) => s.is_match(candidate),
|
||||
Suffix(ref s) => s.is_match(candidate),
|
||||
RequiredExtension(ref s) => s.is_match(candidate),
|
||||
Regex(ref s) => s.is_match(candidate),
|
||||
}
|
||||
}
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
use self::GlobSetMatchStrategy::*;
|
||||
match *self {
|
||||
Literal(ref s) => s.matches_into(candidate, matches),
|
||||
BasenameLiteral(ref s) => s.matches_into(candidate, matches),
|
||||
Extension(ref s) => s.matches_into(candidate, matches),
|
||||
Prefix(ref s) => s.matches_into(candidate, matches),
|
||||
Suffix(ref s) => s.matches_into(candidate, matches),
|
||||
RequiredExtension(ref s) => s.matches_into(candidate, matches),
|
||||
Regex(ref s) => s.matches_into(candidate, matches),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
|
||||
|
||||
impl LiteralStrategy {
|
||||
fn new() -> LiteralStrategy {
|
||||
LiteralStrategy(BTreeMap::new())
|
||||
}
|
||||
|
||||
fn add(&mut self, global_index: usize, lit: String) {
|
||||
self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
|
||||
}
|
||||
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
self.0.contains_key(&*candidate.path)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
if let Some(hits) = self.0.get(&*candidate.path) {
|
||||
matches.extend(hits);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
|
||||
|
||||
impl BasenameLiteralStrategy {
|
||||
fn new() -> BasenameLiteralStrategy {
|
||||
BasenameLiteralStrategy(BTreeMap::new())
|
||||
}
|
||||
|
||||
fn add(&mut self, global_index: usize, lit: String) {
|
||||
self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
|
||||
}
|
||||
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
if candidate.basename.is_empty() {
|
||||
return false;
|
||||
}
|
||||
self.0.contains_key(&*candidate.basename)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
if candidate.basename.is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Some(hits) = self.0.get(&*candidate.basename) {
|
||||
matches.extend(hits);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct ExtensionStrategy(HashMap<OsString, Vec<usize>, Fnv>);
|
||||
|
||||
impl ExtensionStrategy {
|
||||
fn new() -> ExtensionStrategy {
|
||||
ExtensionStrategy(HashMap::with_hasher(Fnv::default()))
|
||||
}
|
||||
|
||||
fn add(&mut self, global_index: usize, ext: OsString) {
|
||||
self.0.entry(ext).or_insert(vec![]).push(global_index);
|
||||
}
|
||||
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
if candidate.ext.is_empty() {
|
||||
return false;
|
||||
}
|
||||
self.0.contains_key(candidate.ext)
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
if candidate.ext.is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Some(hits) = self.0.get(candidate.ext) {
|
||||
matches.extend(hits);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct PrefixStrategy {
|
||||
matcher: FullAcAutomaton<Vec<u8>>,
|
||||
map: Vec<usize>,
|
||||
longest: usize,
|
||||
}
|
||||
|
||||
impl PrefixStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
let path = candidate.path_prefix(self.longest);
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.start == 0 {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
let path = candidate.path_prefix(self.longest);
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.start == 0 {
|
||||
matches.push(self.map[m.pati]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct SuffixStrategy {
|
||||
matcher: FullAcAutomaton<Vec<u8>>,
|
||||
map: Vec<usize>,
|
||||
longest: usize,
|
||||
}
|
||||
|
||||
impl SuffixStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
let path = candidate.path_suffix(self.longest);
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.end == path.len() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
let path = candidate.path_suffix(self.longest);
|
||||
for m in self.matcher.find_overlapping(path) {
|
||||
if m.end == path.len() {
|
||||
matches.push(self.map[m.pati]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct RequiredExtensionStrategy(HashMap<OsString, Vec<(usize, Regex)>, Fnv>);
|
||||
|
||||
impl RequiredExtensionStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
if candidate.ext.is_empty() {
|
||||
return false;
|
||||
}
|
||||
match self.0.get(candidate.ext) {
|
||||
None => false,
|
||||
Some(regexes) => {
|
||||
for &(_, ref re) in regexes {
|
||||
if re.is_match(&*candidate.path) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
if candidate.ext.is_empty() {
|
||||
return;
|
||||
}
|
||||
if let Some(regexes) = self.0.get(candidate.ext) {
|
||||
for &(global_index, ref re) in regexes {
|
||||
if re.is_match(&*candidate.path) {
|
||||
matches.push(global_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct RegexSetStrategy {
|
||||
matcher: RegexSet,
|
||||
map: Vec<usize>,
|
||||
}
|
||||
|
||||
impl RegexSetStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
self.matcher.is_match(&*candidate.path)
|
||||
}
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
for i in self.matcher.matches(&*candidate.path) {
|
||||
matches.push(self.map[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct MultiStrategyBuilder {
|
||||
literals: Vec<String>,
|
||||
map: Vec<usize>,
|
||||
longest: usize,
|
||||
}
|
||||
|
||||
impl MultiStrategyBuilder {
|
||||
fn new() -> MultiStrategyBuilder {
|
||||
MultiStrategyBuilder {
|
||||
literals: vec![],
|
||||
map: vec![],
|
||||
longest: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn add(&mut self, global_index: usize, literal: String) {
|
||||
if literal.len() > self.longest {
|
||||
self.longest = literal.len();
|
||||
}
|
||||
self.map.push(global_index);
|
||||
self.literals.push(literal);
|
||||
}
|
||||
|
||||
fn prefix(self) -> PrefixStrategy {
|
||||
let it = self.literals.into_iter().map(|s| s.into_bytes());
|
||||
PrefixStrategy {
|
||||
matcher: AcAutomaton::new(it).into_full(),
|
||||
map: self.map,
|
||||
longest: self.longest,
|
||||
}
|
||||
}
|
||||
|
||||
fn suffix(self) -> SuffixStrategy {
|
||||
let it = self.literals.into_iter().map(|s| s.into_bytes());
|
||||
SuffixStrategy {
|
||||
matcher: AcAutomaton::new(it).into_full(),
|
||||
map: self.map,
|
||||
longest: self.longest,
|
||||
}
|
||||
}
|
||||
|
||||
fn regex_set(self) -> Result<RegexSetStrategy, Error> {
|
||||
Ok(RegexSetStrategy {
|
||||
matcher: try!(new_regex_set(self.literals)),
|
||||
map: self.map,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct RequiredExtensionStrategyBuilder(
|
||||
HashMap<OsString, Vec<(usize, String)>>,
|
||||
);
|
||||
|
||||
impl RequiredExtensionStrategyBuilder {
|
||||
fn new() -> RequiredExtensionStrategyBuilder {
|
||||
RequiredExtensionStrategyBuilder(HashMap::new())
|
||||
}
|
||||
|
||||
fn add(&mut self, global_index: usize, ext: OsString, regex: String) {
|
||||
self.0.entry(ext).or_insert(vec![]).push((global_index, regex));
|
||||
}
|
||||
|
||||
fn build(self) -> Result<RequiredExtensionStrategy, Error> {
|
||||
let mut exts = HashMap::with_hasher(Fnv::default());
|
||||
for (ext, regexes) in self.0.into_iter() {
|
||||
exts.insert(ext.clone(), vec![]);
|
||||
for (global_index, regex) in regexes {
|
||||
let compiled = try!(new_regex(®ex));
|
||||
exts.get_mut(&ext).unwrap().push((global_index, compiled));
|
||||
}
|
||||
}
|
||||
Ok(RequiredExtensionStrategy(exts))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::GlobSetBuilder;
|
||||
use glob::Glob;
|
||||
|
||||
#[test]
|
||||
fn set_works() {
|
||||
let mut builder = GlobSetBuilder::new();
|
||||
builder.add(Glob::new("src/**/*.rs").unwrap());
|
||||
builder.add(Glob::new("*.c").unwrap());
|
||||
builder.add(Glob::new("src/lib.rs").unwrap());
|
||||
let set = builder.build().unwrap();
|
||||
|
||||
assert!(set.is_match("foo.c"));
|
||||
assert!(set.is_match("src/foo.c"));
|
||||
assert!(!set.is_match("foo.rs"));
|
||||
assert!(!set.is_match("tests/foo.rs"));
|
||||
assert!(set.is_match("src/foo.rs"));
|
||||
assert!(set.is_match("src/grep/src/main.rs"));
|
||||
|
||||
let matches = set.matches("src/lib.rs");
|
||||
assert_eq!(2, matches.len());
|
||||
assert_eq!(0, matches[0]);
|
||||
assert_eq!(2, matches[1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_set_works() {
|
||||
let set = GlobSetBuilder::new().build().unwrap();
|
||||
assert!(!set.is_match(""));
|
||||
assert!(!set.is_match("a"));
|
||||
}
|
||||
}
|
178
globset/src/pathutil.rs
Normal file
178
globset/src/pathutil.rs
Normal file
@@ -0,0 +1,178 @@
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(unix)]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use memchr::memrchr;
|
||||
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
if path.is_empty() {
|
||||
return None;
|
||||
} else if path.len() == 1 && path[0] == b'.' {
|
||||
return None;
|
||||
} else if path.last() == Some(&b'.') {
|
||||
return None;
|
||||
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
|
||||
return None;
|
||||
}
|
||||
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
|
||||
Some(OsStr::from_bytes(&path[last_slash..]))
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(not(unix))]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
path.as_ref().file_name()
|
||||
}
|
||||
|
||||
/// Return a file extension given a path's file name.
|
||||
///
|
||||
/// Note that this does NOT match the semantics of std::path::Path::extension.
|
||||
/// Namely, the extension includes the `.` and matching is otherwise more
|
||||
/// liberal. Specifically, the extenion is:
|
||||
///
|
||||
/// * None, if the file name given is empty;
|
||||
/// * None, if there is no embedded `.`;
|
||||
/// * Otherwise, the portion of the file name starting with the final `.`.
|
||||
///
|
||||
/// e.g., A file name of `.rs` has an extension `.rs`.
|
||||
///
|
||||
/// N.B. This is done to make certain glob match optimizations easier. Namely,
|
||||
/// a pattern like `*.rs` is obviously trying to match files with a `rs`
|
||||
/// extension, but it also matches files like `.rs`, which doesn't have an
|
||||
/// extension according to std::path::Path::extension.
|
||||
pub fn file_name_ext(name: &OsStr) -> Option<&OsStr> {
|
||||
// Yes, these functions are awful, and yes, we are completely violating
|
||||
// the abstraction barrier of std::ffi. The barrier we're violating is
|
||||
// that an OsStr's encoding is *ASCII compatible*. While this is obviously
|
||||
// true on Unix systems, it's also true on Windows because an OsStr uses
|
||||
// WTF-8 internally: https://simonsapin.github.io/wtf-8/
|
||||
//
|
||||
// We should consider doing the same for the other path utility functions.
|
||||
// Right now, we don't break any barriers, but Windows users are paying
|
||||
// for it.
|
||||
//
|
||||
// Got any better ideas that don't cost anything? Hit me up. ---AG
|
||||
unsafe fn os_str_as_u8_slice(s: &OsStr) -> &[u8] {
|
||||
::std::mem::transmute(s)
|
||||
}
|
||||
unsafe fn u8_slice_as_os_str(s: &[u8]) -> &OsStr {
|
||||
::std::mem::transmute(s)
|
||||
}
|
||||
if name.is_empty() {
|
||||
return None;
|
||||
}
|
||||
let name = unsafe { os_str_as_u8_slice(name) };
|
||||
for (i, &b) in name.iter().enumerate().rev() {
|
||||
if b == b'.' {
|
||||
return Some(unsafe { u8_slice_as_os_str(&name[i..]) });
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Return raw bytes of a path, transcoded to UTF-8 if necessary.
|
||||
pub fn path_bytes(path: &Path) -> Cow<[u8]> {
|
||||
os_str_bytes(path.as_os_str())
|
||||
}
|
||||
|
||||
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
|
||||
#[cfg(unix)]
|
||||
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
Cow::Borrowed(s.as_bytes())
|
||||
}
|
||||
|
||||
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
|
||||
#[cfg(not(unix))]
|
||||
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
|
||||
// TODO(burntsushi): On Windows, OS strings are WTF-8, which is a superset
|
||||
// of UTF-8, so even if we could get at the raw bytes, they wouldn't
|
||||
// be useful. We *must* convert to UTF-8 before doing path matching.
|
||||
// Unfortunate, but necessary.
|
||||
match s.to_string_lossy() {
|
||||
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
|
||||
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
|
||||
/// that recognize other characters as separators.
|
||||
#[cfg(unix)]
|
||||
pub fn normalize_path(path: Cow<[u8]>) -> Cow<[u8]> {
|
||||
// UNIX only uses /, so we're good.
|
||||
path
|
||||
}
|
||||
|
||||
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
|
||||
/// that recognize other characters as separators.
|
||||
#[cfg(not(unix))]
|
||||
pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
|
||||
use std::path::is_separator;
|
||||
|
||||
for i in 0..path.len() {
|
||||
if path[i] == b'/' || !is_separator(path[i] as char) {
|
||||
continue;
|
||||
}
|
||||
path.to_mut()[i] = b'/';
|
||||
}
|
||||
path
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::borrow::Cow;
|
||||
use std::ffi::OsStr;
|
||||
|
||||
use super::{file_name_ext, normalize_path};
|
||||
|
||||
macro_rules! ext {
|
||||
($name:ident, $file_name:expr, $ext:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let got = file_name_ext(OsStr::new($file_name));
|
||||
assert_eq!($ext.map(OsStr::new), got);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
ext!(ext1, "foo.rs", Some(".rs"));
|
||||
ext!(ext2, ".rs", Some(".rs"));
|
||||
ext!(ext3, "..rs", Some(".rs"));
|
||||
ext!(ext4, "", None::<&str>);
|
||||
ext!(ext5, "foo", None::<&str>);
|
||||
|
||||
macro_rules! normalize {
|
||||
($name:ident, $path:expr, $expected:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let got = normalize_path(Cow::Owned($path.to_vec()));
|
||||
assert_eq!($expected.to_vec(), got.into_owned());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
normalize!(normal1, b"foo", b"foo");
|
||||
normalize!(normal2, b"foo/bar", b"foo/bar");
|
||||
#[cfg(unix)]
|
||||
normalize!(normal3, b"foo\\bar", b"foo\\bar");
|
||||
#[cfg(not(unix))]
|
||||
normalize!(normal3, b"foo\\bar", b"foo/bar");
|
||||
#[cfg(unix)]
|
||||
normalize!(normal4, b"foo\\bar/baz", b"foo\\bar/baz");
|
||||
#[cfg(not(unix))]
|
||||
normalize!(normal4, b"foo\\bar/baz", b"foo/bar/baz");
|
||||
}
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep"
|
||||
version = "0.1.3" #:version
|
||||
version = "0.1.4" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Fast line oriented regex searching as a library.
|
||||
@@ -15,6 +15,6 @@ license = "Unlicense/MIT"
|
||||
[dependencies]
|
||||
log = "0.3"
|
||||
memchr = "0.1"
|
||||
memmap = "0.2"
|
||||
memmap = "0.5"
|
||||
regex = "0.1.77"
|
||||
regex-syntax = "0.3.5"
|
||||
|
@@ -9,7 +9,7 @@ principled.
|
||||
*/
|
||||
use std::cmp;
|
||||
|
||||
use regex::bytes::Regex;
|
||||
use regex::bytes::RegexBuilder;
|
||||
use syntax::{
|
||||
Expr, Literals, Lit,
|
||||
ByteClass, ByteRange, CharClass, ClassRange, Repeater,
|
||||
@@ -33,7 +33,7 @@ impl LiteralSets {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_regex(&self) -> Option<Regex> {
|
||||
pub fn to_regex_builder(&self) -> Option<RegexBuilder> {
|
||||
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
||||
debug!("literal prefixes detected: {:?}", self.prefixes);
|
||||
// When this is true, the regex engine will do a literal scan.
|
||||
@@ -79,14 +79,12 @@ impl LiteralSets {
|
||||
debug!("required literals found: {:?}", req_lits);
|
||||
let alts: Vec<String> =
|
||||
req_lits.into_iter().map(|x| bytes_to_regex(x)).collect();
|
||||
// Literals always compile.
|
||||
Some(Regex::new(&alts.join("|")).unwrap())
|
||||
Some(RegexBuilder::new(&alts.join("|")))
|
||||
} else if lit.is_empty() {
|
||||
None
|
||||
} else {
|
||||
// Literals always compile.
|
||||
debug!("required literal found: {:?}", show(lit));
|
||||
Some(Regex::new(&bytes_to_regex(lit)).unwrap())
|
||||
Some(RegexBuilder::new(&bytes_to_regex(lit)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -144,14 +144,19 @@ impl GrepBuilder {
|
||||
let expr = try!(self.parse());
|
||||
let literals = LiteralSets::create(&expr);
|
||||
let re = try!(self.regex(&expr));
|
||||
let required = literals.to_regex().or_else(|| {
|
||||
let expr = match strip_unicode_word_boundaries(&expr) {
|
||||
None => return None,
|
||||
Some(expr) => expr,
|
||||
};
|
||||
debug!("Stripped Unicode word boundaries. New AST:\n{:?}", expr);
|
||||
self.regex(&expr).ok()
|
||||
});
|
||||
let required = match literals.to_regex_builder() {
|
||||
Some(builder) => Some(try!(self.regex_build(builder))),
|
||||
None => {
|
||||
match strip_unicode_word_boundaries(&expr) {
|
||||
None => None,
|
||||
Some(expr) => {
|
||||
debug!("Stripped Unicode word boundaries. \
|
||||
New AST:\n{:?}", expr);
|
||||
self.regex(&expr).ok()
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(Grep {
|
||||
re: re,
|
||||
required: required,
|
||||
@@ -162,11 +167,12 @@ impl GrepBuilder {
|
||||
/// Creates a new regex from the given expression with the current
|
||||
/// configuration.
|
||||
fn regex(&self, expr: &Expr) -> Result<Regex> {
|
||||
let casei =
|
||||
self.opts.case_insensitive
|
||||
|| (self.opts.case_smart && !has_uppercase_literal(expr));
|
||||
RegexBuilder::new(&expr.to_string())
|
||||
.case_insensitive(casei)
|
||||
self.regex_build(RegexBuilder::new(&expr.to_string()))
|
||||
}
|
||||
|
||||
/// Builds a new regex from the given builder using the caller's settings.
|
||||
fn regex_build(&self, builder: RegexBuilder) -> Result<Regex> {
|
||||
builder
|
||||
.multi_line(true)
|
||||
.unicode(true)
|
||||
.size_limit(self.opts.size_limit)
|
||||
@@ -182,10 +188,29 @@ impl GrepBuilder {
|
||||
try!(syntax::ExprBuilder::new()
|
||||
.allow_bytes(true)
|
||||
.unicode(true)
|
||||
.case_insensitive(self.opts.case_insensitive)
|
||||
.case_insensitive(try!(self.is_case_insensitive()))
|
||||
.parse(&self.pattern));
|
||||
let expr = try!(nonl::remove(expr, self.opts.line_terminator));
|
||||
debug!("regex ast:\n{:#?}", expr);
|
||||
Ok(try!(nonl::remove(expr, self.opts.line_terminator)))
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
/// Determines whether the case insensitive flag should be enabled or not.
|
||||
///
|
||||
/// An error is returned if the regex could not be parsed.
|
||||
fn is_case_insensitive(&self) -> Result<bool> {
|
||||
if self.opts.case_insensitive {
|
||||
return Ok(true);
|
||||
}
|
||||
if !self.opts.case_smart {
|
||||
return Ok(false);
|
||||
}
|
||||
let expr =
|
||||
try!(syntax::ExprBuilder::new()
|
||||
.allow_bytes(true)
|
||||
.unicode(true)
|
||||
.parse(&self.pattern));
|
||||
Ok(!has_uppercase_literal(&expr))
|
||||
}
|
||||
}
|
||||
|
||||
|
37
ignore/Cargo.toml
Normal file
37
ignore/Cargo.toml
Normal file
@@ -0,0 +1,37 @@
|
||||
[package]
|
||||
name = "ignore"
|
||||
version = "0.1.4" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
A fast library for efficiently matching ignore files such as `.gitignore`
|
||||
against file paths.
|
||||
"""
|
||||
documentation = "https://docs.rs/ignore"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/ignore"
|
||||
repository = "https://github.com/BurntSushi/ripgrep/tree/master/ignore"
|
||||
readme = "README.md"
|
||||
keywords = ["glob", "ignore", "gitignore", "pattern", "file"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[lib]
|
||||
name = "ignore"
|
||||
bench = false
|
||||
|
||||
[dependencies]
|
||||
crossbeam = "0.2"
|
||||
globset = { version = "0.1.2", path = "../globset" }
|
||||
lazy_static = "0.2"
|
||||
log = "0.3"
|
||||
memchr = "0.1"
|
||||
regex = "0.1.77"
|
||||
thread_local = "0.3.0"
|
||||
walkdir = "1"
|
||||
|
||||
[dev-dependencies]
|
||||
tempdir = "0.3.5"
|
||||
|
||||
[features]
|
||||
simd-accel = ["globset/simd-accel"]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
66
ignore/README.md
Normal file
66
ignore/README.md
Normal file
@@ -0,0 +1,66 @@
|
||||
ignore
|
||||
======
|
||||
The ignore crate provides a fast recursive directory iterator that respects
|
||||
various filters such as globs, file types and `.gitignore` files. This crate
|
||||
also provides lower level direct access to gitignore and file type matchers.
|
||||
|
||||
[](https://travis-ci.org/BurntSushi/ripgrep)
|
||||
[](https://ci.appveyor.com/project/BurntSushi/ripgrep)
|
||||
[](https://crates.io/crates/ignore)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
||||
|
||||
### Documentation
|
||||
|
||||
[https://docs.rs/ignore](https://docs.rs/ignore)
|
||||
|
||||
### Usage
|
||||
|
||||
Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
ignore = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate ignore;
|
||||
```
|
||||
|
||||
### Example
|
||||
|
||||
This example shows the most basic usage of this crate. This code will
|
||||
recursively traverse the current directory while automatically filtering out
|
||||
files and directories according to ignore globs found in files like
|
||||
`.ignore` and `.gitignore`:
|
||||
|
||||
|
||||
```rust,no_run
|
||||
use ignore::Walk;
|
||||
|
||||
for result in Walk::new("./") {
|
||||
// Each item yielded by the iterator is either a directory entry or an
|
||||
// error, so either print the path or the error.
|
||||
match result {
|
||||
Ok(entry) => println!("{}", entry.path().display()),
|
||||
Err(err) => println!("ERROR: {}", err),
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Example: advanced
|
||||
|
||||
By default, the recursive directory iterator will ignore hidden files and
|
||||
directories. This can be disabled by building the iterator with `WalkBuilder`:
|
||||
|
||||
```rust,no_run
|
||||
use ignore::WalkBuilder;
|
||||
|
||||
for result in WalkBuilder::new("./").hidden(false).build() {
|
||||
println!("{:?}", result);
|
||||
}
|
||||
```
|
||||
|
||||
See the documentation for `WalkBuilder` for many other options.
|
92
ignore/examples/walk.rs
Normal file
92
ignore/examples/walk.rs
Normal file
@@ -0,0 +1,92 @@
|
||||
#![allow(dead_code, unused_imports, unused_mut, unused_variables)]
|
||||
|
||||
extern crate crossbeam;
|
||||
extern crate ignore;
|
||||
extern crate walkdir;
|
||||
|
||||
use std::env;
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::thread;
|
||||
|
||||
use crossbeam::sync::MsQueue;
|
||||
use ignore::WalkBuilder;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
fn main() {
|
||||
let mut path = env::args().nth(1).unwrap();
|
||||
let mut parallel = false;
|
||||
let mut simple = false;
|
||||
let queue: Arc<MsQueue<Option<DirEntry>>> = Arc::new(MsQueue::new());
|
||||
if path == "parallel" {
|
||||
path = env::args().nth(2).unwrap();
|
||||
parallel = true;
|
||||
} else if path == "walkdir" {
|
||||
path = env::args().nth(2).unwrap();
|
||||
simple = true;
|
||||
}
|
||||
|
||||
let stdout_queue = queue.clone();
|
||||
let stdout_thread = thread::spawn(move || {
|
||||
let mut stdout = io::BufWriter::new(io::stdout());
|
||||
while let Some(dent) = stdout_queue.pop() {
|
||||
write_path(&mut stdout, dent.path());
|
||||
}
|
||||
});
|
||||
|
||||
if parallel {
|
||||
let walker = WalkBuilder::new(path).threads(6).build_parallel();
|
||||
walker.run(|| {
|
||||
let queue = queue.clone();
|
||||
Box::new(move |result| {
|
||||
use ignore::WalkState::*;
|
||||
|
||||
queue.push(Some(DirEntry::Y(result.unwrap())));
|
||||
Continue
|
||||
})
|
||||
});
|
||||
} else if simple {
|
||||
let mut stdout = io::BufWriter::new(io::stdout());
|
||||
let walker = WalkDir::new(path);
|
||||
for result in walker {
|
||||
queue.push(Some(DirEntry::X(result.unwrap())));
|
||||
}
|
||||
} else {
|
||||
let mut stdout = io::BufWriter::new(io::stdout());
|
||||
let walker = WalkBuilder::new(path).build();
|
||||
for result in walker {
|
||||
queue.push(Some(DirEntry::Y(result.unwrap())));
|
||||
}
|
||||
}
|
||||
queue.push(None);
|
||||
stdout_thread.join().unwrap();
|
||||
}
|
||||
|
||||
enum DirEntry {
|
||||
X(walkdir::DirEntry),
|
||||
Y(ignore::DirEntry),
|
||||
}
|
||||
|
||||
impl DirEntry {
|
||||
fn path(&self) -> &Path {
|
||||
match *self {
|
||||
DirEntry::X(ref x) => x.path(),
|
||||
DirEntry::Y(ref y) => y.path(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn write_path<W: Write>(mut wtr: W, path: &Path) {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
wtr.write(path.as_os_str().as_bytes()).unwrap();
|
||||
wtr.write(b"\n").unwrap();
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn write_path<W: Write>(mut wtr: W, path: &Path) {
|
||||
wtr.write(path.to_string_lossy().as_bytes()).unwrap();
|
||||
wtr.write(b"\n").unwrap();
|
||||
}
|
811
ignore/src/dir.rs
Normal file
811
ignore/src/dir.rs
Normal file
@@ -0,0 +1,811 @@
|
||||
// This module provides a data structure, `Ignore`, that connects "directory
|
||||
// traversal" with "ignore matchers." Specifically, it knows about gitignore
|
||||
// semantics and precedence, and is organized based on directory hierarchy.
|
||||
// Namely, every matcher logically corresponds to ignore rules from a single
|
||||
// directory, and points to the matcher for its corresponding parent directory.
|
||||
// In this sense, `Ignore` is a *persistent* data structure.
|
||||
//
|
||||
// This design was specifically chosen to make it possible to use this data
|
||||
// structure in a parallel directory iterator.
|
||||
//
|
||||
// My initial intention was to expose this module as part of this crate's
|
||||
// public API, but I think the data structure's public API is too complicated
|
||||
// with non-obvious failure modes. Alas, such things haven't been documented
|
||||
// well.
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::ffi::OsString;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use pathutil::{is_hidden, strip_prefix};
|
||||
use overrides::{self, Override};
|
||||
use types::{self, Types};
|
||||
use {Error, Match, PartialErrorBuilder};
|
||||
|
||||
/// IgnoreMatch represents information about where a match came from when using
|
||||
/// the `Ignore` matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IgnoreMatch<'a>(IgnoreMatchInner<'a>);
|
||||
|
||||
/// IgnoreMatchInner describes precisely where the match information came from.
|
||||
/// This is private to allow expansion to more matchers in the future.
|
||||
#[derive(Clone, Debug)]
|
||||
enum IgnoreMatchInner<'a> {
|
||||
Override(overrides::Glob<'a>),
|
||||
Gitignore(&'a gitignore::Glob),
|
||||
Types(types::Glob<'a>),
|
||||
Hidden,
|
||||
}
|
||||
|
||||
impl<'a> IgnoreMatch<'a> {
|
||||
fn overrides(x: overrides::Glob<'a>) -> IgnoreMatch<'a> {
|
||||
IgnoreMatch(IgnoreMatchInner::Override(x))
|
||||
}
|
||||
|
||||
fn gitignore(x: &'a gitignore::Glob) -> IgnoreMatch<'a> {
|
||||
IgnoreMatch(IgnoreMatchInner::Gitignore(x))
|
||||
}
|
||||
|
||||
fn types(x: types::Glob<'a>) -> IgnoreMatch<'a> {
|
||||
IgnoreMatch(IgnoreMatchInner::Types(x))
|
||||
}
|
||||
|
||||
fn hidden() -> IgnoreMatch<'static> {
|
||||
IgnoreMatch(IgnoreMatchInner::Hidden)
|
||||
}
|
||||
}
|
||||
|
||||
/// Options for the ignore matcher, shared between the matcher itself and the
|
||||
/// builder.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct IgnoreOptions {
|
||||
/// Whether to ignore hidden file paths or not.
|
||||
hidden: bool,
|
||||
/// Whether to read .ignore files.
|
||||
ignore: bool,
|
||||
/// Whether to read git's global gitignore file.
|
||||
git_global: bool,
|
||||
/// Whether to read .gitignore files.
|
||||
git_ignore: bool,
|
||||
/// Whether to read .git/info/exclude files.
|
||||
git_exclude: bool,
|
||||
}
|
||||
|
||||
impl IgnoreOptions {
|
||||
/// Returns true if at least one type of ignore rules should be matched.
|
||||
fn should_ignores(&self) -> bool {
|
||||
self.ignore || self.git_global || self.git_ignore || self.git_exclude
|
||||
}
|
||||
}
|
||||
|
||||
/// Ignore is a matcher useful for recursively walking one or more directories.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Ignore(Arc<IgnoreInner>);
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct IgnoreInner {
|
||||
/// A map of all existing directories that have already been
|
||||
/// compiled into matchers.
|
||||
///
|
||||
/// Note that this is never used during matching, only when adding new
|
||||
/// parent directory matchers. This avoids needing to rebuild glob sets for
|
||||
/// parent directories if many paths are being searched.
|
||||
compiled: Arc<RwLock<HashMap<OsString, Ignore>>>,
|
||||
/// The path to the directory that this matcher was built from.
|
||||
dir: PathBuf,
|
||||
/// An override matcher (default is empty).
|
||||
overrides: Arc<Override>,
|
||||
/// A file type matcher.
|
||||
types: Arc<Types>,
|
||||
/// The parent directory to match next.
|
||||
///
|
||||
/// If this is the root directory or there are otherwise no more
|
||||
/// directories to match, then `parent` is `None`.
|
||||
parent: Option<Ignore>,
|
||||
/// Whether this is an absolute parent matcher, as added by add_parent.
|
||||
is_absolute_parent: bool,
|
||||
/// The absolute base path of this matcher. Populated only if parent
|
||||
/// directories are added.
|
||||
absolute_base: Option<Arc<PathBuf>>,
|
||||
/// Explicit ignore matchers specified by the caller.
|
||||
explicit_ignores: Arc<Vec<Gitignore>>,
|
||||
/// The matcher for .ignore files.
|
||||
ignore_matcher: Gitignore,
|
||||
/// A global gitignore matcher, usually from $XDG_CONFIG_HOME/git/ignore.
|
||||
git_global_matcher: Arc<Gitignore>,
|
||||
/// The matcher for .gitignore files.
|
||||
git_ignore_matcher: Gitignore,
|
||||
/// Special matcher for `.git/info/exclude` files.
|
||||
git_exclude_matcher: Gitignore,
|
||||
/// Whether this directory contains a .git sub-directory.
|
||||
has_git: bool,
|
||||
/// Ignore config.
|
||||
opts: IgnoreOptions,
|
||||
}
|
||||
|
||||
impl Ignore {
|
||||
/// Return the directory path of this matcher.
|
||||
#[allow(dead_code)]
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.0.dir
|
||||
}
|
||||
|
||||
/// Return true if this matcher has no parent.
|
||||
pub fn is_root(&self) -> bool {
|
||||
self.0.parent.is_none()
|
||||
}
|
||||
|
||||
/// Returns true if this matcher was added via the `add_parents` method.
|
||||
pub fn is_absolute_parent(&self) -> bool {
|
||||
self.0.is_absolute_parent
|
||||
}
|
||||
|
||||
/// Return this matcher's parent, if one exists.
|
||||
pub fn parent(&self) -> Option<Ignore> {
|
||||
self.0.parent.clone()
|
||||
}
|
||||
|
||||
/// Create a new `Ignore` matcher with the parent directories of `dir`.
|
||||
///
|
||||
/// Note that this can only be called on an `Ignore` matcher with no
|
||||
/// parents (i.e., `is_root` returns `true`). This will panic otherwise.
|
||||
pub fn add_parents<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
) -> (Ignore, Option<Error>) {
|
||||
if !self.is_root() {
|
||||
panic!("Ignore::add_parents called on non-root matcher");
|
||||
}
|
||||
let absolute_base = match path.as_ref().canonicalize() {
|
||||
Ok(path) => Arc::new(path),
|
||||
Err(_) => {
|
||||
// There's not much we can do here, so just return our
|
||||
// existing matcher. We drop the error to be consistent
|
||||
// with our general pattern of ignoring I/O errors when
|
||||
// processing ignore files.
|
||||
return (self.clone(), None);
|
||||
}
|
||||
};
|
||||
// List of parents, from child to root.
|
||||
let mut parents = vec![];
|
||||
let mut path = &**absolute_base;
|
||||
while let Some(parent) = path.parent() {
|
||||
parents.push(parent);
|
||||
path = parent;
|
||||
}
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
let mut ig = self.clone();
|
||||
for parent in parents.into_iter().rev() {
|
||||
let mut compiled = self.0.compiled.write().unwrap();
|
||||
if let Some(prebuilt) = compiled.get(parent.as_os_str()) {
|
||||
ig = prebuilt.clone();
|
||||
continue;
|
||||
}
|
||||
let (mut igtmp, err) = ig.add_child_path(parent);
|
||||
errs.maybe_push(err);
|
||||
igtmp.is_absolute_parent = true;
|
||||
igtmp.absolute_base = Some(absolute_base.clone());
|
||||
ig = Ignore(Arc::new(igtmp));
|
||||
compiled.insert(parent.as_os_str().to_os_string(), ig.clone());
|
||||
}
|
||||
(ig, errs.into_error_option())
|
||||
}
|
||||
|
||||
/// Create a new `Ignore` matcher for the given child directory.
|
||||
///
|
||||
/// Since building the matcher may require reading from multiple
|
||||
/// files, it's possible that this method partially succeeds. Therefore,
|
||||
/// a matcher is always returned (which may match nothing) and an error is
|
||||
/// returned if it exists.
|
||||
///
|
||||
/// Note that all I/O errors are completely ignored.
|
||||
pub fn add_child<P: AsRef<Path>>(
|
||||
&self,
|
||||
dir: P,
|
||||
) -> (Ignore, Option<Error>) {
|
||||
let (ig, err) = self.add_child_path(dir.as_ref());
|
||||
(Ignore(Arc::new(ig)), err)
|
||||
}
|
||||
|
||||
/// Like add_child, but takes a full path and returns an IgnoreInner.
|
||||
fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option<Error>) {
|
||||
static IG_NAMES: &'static [&'static str] = &[".rgignore", ".ignore"];
|
||||
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
let ig_matcher =
|
||||
if !self.0.opts.ignore {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(&dir, IG_NAMES);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let gi_matcher =
|
||||
if !self.0.opts.git_ignore {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(&dir, &[".gitignore"]);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let gi_exclude_matcher =
|
||||
if !self.0.opts.git_exclude {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (m, err) = create_gitignore(&dir, &[".git/info/exclude"]);
|
||||
errs.maybe_push(err);
|
||||
m
|
||||
};
|
||||
let ig = IgnoreInner {
|
||||
compiled: self.0.compiled.clone(),
|
||||
dir: dir.to_path_buf(),
|
||||
overrides: self.0.overrides.clone(),
|
||||
types: self.0.types.clone(),
|
||||
parent: Some(self.clone()),
|
||||
is_absolute_parent: false,
|
||||
absolute_base: self.0.absolute_base.clone(),
|
||||
explicit_ignores: self.0.explicit_ignores.clone(),
|
||||
ignore_matcher: ig_matcher,
|
||||
git_global_matcher: self.0.git_global_matcher.clone(),
|
||||
git_ignore_matcher: gi_matcher,
|
||||
git_exclude_matcher: gi_exclude_matcher,
|
||||
has_git: dir.join(".git").is_dir(),
|
||||
opts: self.0.opts,
|
||||
};
|
||||
(ig, errs.into_error_option())
|
||||
}
|
||||
|
||||
/// Returns a match indicating whether the given file path should be
|
||||
/// ignored or not.
|
||||
///
|
||||
/// The match contains information about its origin.
|
||||
pub fn matched<'a, P: AsRef<Path>>(
|
||||
&'a self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<IgnoreMatch<'a>> {
|
||||
// We need to be careful with our path. If it has a leading ./, then
|
||||
// strip it because it causes nothing but trouble.
|
||||
let mut path = path.as_ref();
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
// Match against the override patterns. If an override matches
|
||||
// regardless of whether it's whitelist/ignore, then we quit and
|
||||
// return that result immediately. Overrides have the highest
|
||||
// precedence.
|
||||
if !self.0.overrides.is_empty() {
|
||||
let mat =
|
||||
self.0.overrides.matched(path, is_dir)
|
||||
.map(IgnoreMatch::overrides);
|
||||
if !mat.is_none() {
|
||||
return mat;
|
||||
}
|
||||
}
|
||||
let mut whitelisted = Match::None;
|
||||
if self.0.opts.should_ignores() {
|
||||
let mat = self.matched_ignore(path, is_dir);
|
||||
if mat.is_ignore() {
|
||||
return mat;
|
||||
} else if mat.is_whitelist() {
|
||||
whitelisted = mat;
|
||||
}
|
||||
}
|
||||
if !self.0.types.is_empty() {
|
||||
let mat =
|
||||
self.0.types.matched(path, is_dir).map(IgnoreMatch::types);
|
||||
if mat.is_ignore() {
|
||||
return mat;
|
||||
} else if mat.is_whitelist() {
|
||||
whitelisted = mat;
|
||||
}
|
||||
}
|
||||
if whitelisted.is_none() && self.0.opts.hidden && is_hidden(path) {
|
||||
return Match::Ignore(IgnoreMatch::hidden());
|
||||
}
|
||||
whitelisted
|
||||
}
|
||||
|
||||
/// Performs matching only on the ignore files for this directory and
|
||||
/// all parent directories.
|
||||
fn matched_ignore<'a>(
|
||||
&'a self,
|
||||
path: &Path,
|
||||
is_dir: bool,
|
||||
) -> Match<IgnoreMatch<'a>> {
|
||||
let (mut m_ignore, mut m_gi, mut m_gi_exclude, mut m_explicit) =
|
||||
(Match::None, Match::None, Match::None, Match::None);
|
||||
let mut saw_git = false;
|
||||
for ig in self.parents().take_while(|ig| !ig.0.is_absolute_parent) {
|
||||
if m_ignore.is_none() {
|
||||
m_ignore =
|
||||
ig.0.ignore_matcher.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if !saw_git && m_gi.is_none() {
|
||||
m_gi =
|
||||
ig.0.git_ignore_matcher.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if !saw_git && m_gi_exclude.is_none() {
|
||||
m_gi_exclude =
|
||||
ig.0.git_exclude_matcher.matched(path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
saw_git = saw_git || ig.0.has_git;
|
||||
}
|
||||
if let Some(abs_parent_path) = self.absolute_base() {
|
||||
let path = abs_parent_path.join(path);
|
||||
for ig in self.parents().skip_while(|ig|!ig.0.is_absolute_parent) {
|
||||
if m_ignore.is_none() {
|
||||
m_ignore =
|
||||
ig.0.ignore_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if !saw_git && m_gi.is_none() {
|
||||
m_gi =
|
||||
ig.0.git_ignore_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
if !saw_git && m_gi_exclude.is_none() {
|
||||
m_gi_exclude =
|
||||
ig.0.git_exclude_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
}
|
||||
saw_git = saw_git || ig.0.has_git;
|
||||
}
|
||||
}
|
||||
for gi in self.0.explicit_ignores.iter().rev() {
|
||||
if !m_explicit.is_none() {
|
||||
break;
|
||||
}
|
||||
m_explicit = gi.matched(&path, is_dir).map(IgnoreMatch::gitignore);
|
||||
}
|
||||
let m_global = self.0.git_global_matcher.matched(&path, is_dir)
|
||||
.map(IgnoreMatch::gitignore);
|
||||
if !m_ignore.is_none() {
|
||||
m_ignore
|
||||
} else if !m_gi.is_none() {
|
||||
m_gi
|
||||
} else if !m_gi_exclude.is_none() {
|
||||
m_gi_exclude
|
||||
} else if !m_global.is_none() {
|
||||
m_global
|
||||
} else if !m_explicit.is_none() {
|
||||
m_explicit
|
||||
} else {
|
||||
Match::None
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns an iterator over parent ignore matchers, including this one.
|
||||
pub fn parents(&self) -> Parents {
|
||||
Parents(Some(self))
|
||||
}
|
||||
|
||||
/// Returns the first absolute path of the first absolute parent, if
|
||||
/// one exists.
|
||||
fn absolute_base(&self) -> Option<&Path> {
|
||||
self.0.absolute_base.as_ref().map(|p| &***p)
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over all parents of an ignore matcher, including itself.
|
||||
///
|
||||
/// The lifetime `'a` refers to the lifetime of the initial `Ignore` matcher.
|
||||
pub struct Parents<'a>(Option<&'a Ignore>);
|
||||
|
||||
impl<'a> Iterator for Parents<'a> {
|
||||
type Item = &'a Ignore;
|
||||
|
||||
fn next(&mut self) -> Option<&'a Ignore> {
|
||||
match self.0.take() {
|
||||
None => None,
|
||||
Some(ig) => {
|
||||
self.0 = ig.0.parent.as_ref();
|
||||
Some(ig)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A builder for creating an Ignore matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct IgnoreBuilder {
|
||||
/// The root directory path for this ignore matcher.
|
||||
dir: PathBuf,
|
||||
/// An override matcher (default is empty).
|
||||
overrides: Arc<Override>,
|
||||
/// A type matcher (default is empty).
|
||||
types: Arc<Types>,
|
||||
/// Explicit ignore matchers.
|
||||
explicit_ignores: Vec<Gitignore>,
|
||||
/// Ignore config.
|
||||
opts: IgnoreOptions,
|
||||
}
|
||||
|
||||
impl IgnoreBuilder {
|
||||
/// Create a new builder for an `Ignore` matcher.
|
||||
///
|
||||
/// All relative file paths are resolved with respect to the current
|
||||
/// working directory.
|
||||
pub fn new() -> IgnoreBuilder {
|
||||
IgnoreBuilder {
|
||||
dir: Path::new("").to_path_buf(),
|
||||
overrides: Arc::new(Override::empty()),
|
||||
types: Arc::new(Types::empty()),
|
||||
explicit_ignores: vec![],
|
||||
opts: IgnoreOptions {
|
||||
hidden: true,
|
||||
ignore: true,
|
||||
git_global: true,
|
||||
git_ignore: true,
|
||||
git_exclude: true,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a new `Ignore` matcher.
|
||||
///
|
||||
/// The matcher returned won't match anything until ignore rules from
|
||||
/// directories are added to it.
|
||||
pub fn build(&self) -> Ignore {
|
||||
let git_global_matcher =
|
||||
if !self.opts.git_global {
|
||||
Gitignore::empty()
|
||||
} else {
|
||||
let (gi, err) = Gitignore::global();
|
||||
if let Some(err) = err {
|
||||
debug!("{}", err);
|
||||
}
|
||||
gi
|
||||
};
|
||||
Ignore(Arc::new(IgnoreInner {
|
||||
compiled: Arc::new(RwLock::new(HashMap::new())),
|
||||
dir: self.dir.clone(),
|
||||
overrides: self.overrides.clone(),
|
||||
types: self.types.clone(),
|
||||
parent: None,
|
||||
is_absolute_parent: true,
|
||||
absolute_base: None,
|
||||
explicit_ignores: Arc::new(self.explicit_ignores.clone()),
|
||||
ignore_matcher: Gitignore::empty(),
|
||||
git_global_matcher: Arc::new(git_global_matcher),
|
||||
git_ignore_matcher: Gitignore::empty(),
|
||||
git_exclude_matcher: Gitignore::empty(),
|
||||
has_git: false,
|
||||
opts: self.opts,
|
||||
}))
|
||||
}
|
||||
|
||||
/// Add an override matcher.
|
||||
///
|
||||
/// By default, no override matcher is used.
|
||||
///
|
||||
/// This overrides any previous setting.
|
||||
pub fn overrides(&mut self, overrides: Override) -> &mut IgnoreBuilder {
|
||||
self.overrides = Arc::new(overrides);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a file type matcher.
|
||||
///
|
||||
/// By default, no file type matcher is used.
|
||||
///
|
||||
/// This overrides any previous setting.
|
||||
pub fn types(&mut self, types: Types) -> &mut IgnoreBuilder {
|
||||
self.types = Arc::new(types);
|
||||
self
|
||||
}
|
||||
|
||||
/// Adds a new global ignore matcher from the ignore file path given.
|
||||
pub fn add_ignore(&mut self, ig: Gitignore) -> &mut IgnoreBuilder {
|
||||
self.explicit_ignores.push(ig);
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables ignoring hidden files.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn hidden(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.hidden = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading `.ignore` files.
|
||||
///
|
||||
/// `.ignore` files have the same semantics as `gitignore` files and are
|
||||
/// supported by search tools such as ripgrep and The Silver Searcher.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn ignore(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.ignore = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a global gitignore matcher.
|
||||
///
|
||||
/// Its precedence is lower than both normal `.gitignore` files and
|
||||
/// `.git/info/exclude` files.
|
||||
///
|
||||
/// This overwrites any previous global gitignore setting.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn git_global(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.git_global = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading `.gitignore` files.
|
||||
///
|
||||
/// `.gitignore` files have match semantics as described in the `gitignore`
|
||||
/// man page.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn git_ignore(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.git_ignore = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enables reading `.git/info/exclude` files.
|
||||
///
|
||||
/// `.git/info/exclude` files have match semantics as described in the
|
||||
/// `gitignore` man page.
|
||||
///
|
||||
/// This is enabled by default.
|
||||
pub fn git_exclude(&mut self, yes: bool) -> &mut IgnoreBuilder {
|
||||
self.opts.git_exclude = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new gitignore matcher for the directory given.
|
||||
///
|
||||
/// Ignore globs are extracted from each of the file names in `dir` in the
|
||||
/// order given (earlier names have lower precedence than later names).
|
||||
///
|
||||
/// I/O errors are ignored.
|
||||
pub fn create_gitignore(
|
||||
dir: &Path,
|
||||
names: &[&str],
|
||||
) -> (Gitignore, Option<Error>) {
|
||||
let mut builder = GitignoreBuilder::new(dir);
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
for name in names {
|
||||
let gipath = dir.join(name);
|
||||
errs.maybe_push_ignore_io(builder.add(gipath));
|
||||
}
|
||||
let gi = match builder.build() {
|
||||
Ok(gi) => gi,
|
||||
Err(err) => {
|
||||
errs.push(err);
|
||||
GitignoreBuilder::new(dir).build().unwrap()
|
||||
}
|
||||
};
|
||||
(gi, errs.into_error_option())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::fs::{self, File};
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use tempdir::TempDir;
|
||||
|
||||
use dir::IgnoreBuilder;
|
||||
use gitignore::Gitignore;
|
||||
use Error;
|
||||
|
||||
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
|
||||
let mut file = File::create(path).unwrap();
|
||||
file.write_all(contents.as_bytes()).unwrap();
|
||||
}
|
||||
|
||||
fn mkdirp<P: AsRef<Path>>(path: P) {
|
||||
fs::create_dir_all(path).unwrap();
|
||||
}
|
||||
|
||||
fn partial(err: Error) -> Vec<Error> {
|
||||
match err {
|
||||
Error::Partial(errs) => errs,
|
||||
_ => panic!("expected partial error but got {:?}", err),
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn explicit_ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join("not-an-ignore"), "foo\n!bar");
|
||||
|
||||
let (gi, err) = Gitignore::new(td.path().join("not-an-ignore"));
|
||||
assert!(err.is_none());
|
||||
let (ig, err) = IgnoreBuilder::new()
|
||||
.add_ignore(gi).build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_ignore());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
assert!(ig.matched("baz", false).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn git_exclude() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
mkdirp(td.path().join(".git/info"));
|
||||
wfile(td.path().join(".git/info/exclude"), "foo\n!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_ignore());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
assert!(ig.matched("baz", false).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gitignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "foo\n!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_ignore());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
assert!(ig.matched("baz", false).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".ignore"), "foo\n!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_ignore());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
assert!(ig.matched("baz", false).is_none());
|
||||
}
|
||||
|
||||
// Tests that an .ignore will override a .gitignore.
|
||||
#[test]
|
||||
fn ignore_over_gitignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
wfile(td.path().join(".ignore"), "!foo");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("foo", false).is_whitelist());
|
||||
}
|
||||
|
||||
// Tests that exclude has lower precedent than both .ignore and .gitignore.
|
||||
#[test]
|
||||
fn exclude_lowest() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "!foo");
|
||||
wfile(td.path().join(".ignore"), "!bar");
|
||||
mkdirp(td.path().join(".git/info"));
|
||||
wfile(td.path().join(".git/info/exclude"), "foo\nbar\nbaz");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
assert!(ig.matched("baz", false).is_ignore());
|
||||
assert!(ig.matched("foo", false).is_whitelist());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errored() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo");
|
||||
|
||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errored_both() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo");
|
||||
wfile(td.path().join(".ignore"), "fo**o");
|
||||
|
||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert_eq!(2, partial(err.expect("an error")).len());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errored_partial() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo\nbar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_some());
|
||||
assert!(ig.matched("bar", false).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn errored_partial_and_ignore() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
wfile(td.path().join(".gitignore"), "f**oo\nbar");
|
||||
wfile(td.path().join(".ignore"), "!bar");
|
||||
|
||||
let (ig, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_some());
|
||||
assert!(ig.matched("bar", false).is_whitelist());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn not_present_empty() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
|
||||
let (_, err) = IgnoreBuilder::new().build().add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn stops_at_git_dir() {
|
||||
// This tests that .gitignore files beyond a .git barrier aren't
|
||||
// matched, but .ignore files are.
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("foo/.git"));
|
||||
wfile(td.path().join(".gitignore"), "foo");
|
||||
wfile(td.path().join(".ignore"), "bar");
|
||||
|
||||
let ig0 = IgnoreBuilder::new().build();
|
||||
let (ig1, err) = ig0.add_child(td.path());
|
||||
assert!(err.is_none());
|
||||
let (ig2, err) = ig1.add_child(ig1.path().join("foo"));
|
||||
assert!(err.is_none());
|
||||
|
||||
assert!(ig1.matched("foo", false).is_ignore());
|
||||
assert!(ig2.matched("foo", false).is_none());
|
||||
|
||||
assert!(ig1.matched("bar", false).is_ignore());
|
||||
assert!(ig2.matched("bar", false).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn absolute_parent() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("foo"));
|
||||
wfile(td.path().join(".gitignore"), "bar");
|
||||
|
||||
// First, check that the parent gitignore file isn't detected if the
|
||||
// parent isn't added. This establishes a baseline.
|
||||
let ig0 = IgnoreBuilder::new().build();
|
||||
let (ig1, err) = ig0.add_child(td.path().join("foo"));
|
||||
assert!(err.is_none());
|
||||
assert!(ig1.matched("bar", false).is_none());
|
||||
|
||||
// Second, check that adding a parent directory actually works.
|
||||
let ig0 = IgnoreBuilder::new().build();
|
||||
let (ig1, err) = ig0.add_parents(td.path().join("foo"));
|
||||
assert!(err.is_none());
|
||||
let (ig2, err) = ig1.add_child(td.path().join("foo"));
|
||||
assert!(err.is_none());
|
||||
assert!(ig2.matched("bar", false).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn absolute_parent_anchored() {
|
||||
let td = TempDir::new("ignore-test-").unwrap();
|
||||
mkdirp(td.path().join(".git"));
|
||||
mkdirp(td.path().join("src/llvm"));
|
||||
wfile(td.path().join(".gitignore"), "/llvm/\nfoo");
|
||||
|
||||
let ig0 = IgnoreBuilder::new().build();
|
||||
let (ig1, err) = ig0.add_parents(td.path().join("src"));
|
||||
assert!(err.is_none());
|
||||
let (ig2, err) = ig1.add_child("src");
|
||||
assert!(err.is_none());
|
||||
|
||||
assert!(ig1.matched("llvm", true).is_none());
|
||||
assert!(ig2.matched("llvm", true).is_none());
|
||||
assert!(ig2.matched("src/llvm", true).is_none());
|
||||
assert!(ig2.matched("foo", false).is_ignore());
|
||||
assert!(ig2.matched("src/foo", false).is_ignore());
|
||||
}
|
||||
}
|
607
ignore/src/gitignore.rs
Normal file
607
ignore/src/gitignore.rs
Normal file
@@ -0,0 +1,607 @@
|
||||
/*!
|
||||
The gitignore module provides a way to match globs from a gitignore file
|
||||
against file paths.
|
||||
|
||||
Note that this module implements the specification as described in the
|
||||
`gitignore` man page from scratch. That is, this module does *not* shell out to
|
||||
the `git` command line tool.
|
||||
*/
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::env;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufRead, Read};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str;
|
||||
use std::sync::Arc;
|
||||
|
||||
use globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder};
|
||||
use regex::bytes::Regex;
|
||||
use thread_local::ThreadLocal;
|
||||
|
||||
use pathutil::{is_file_name, strip_prefix};
|
||||
use {Error, Match, PartialErrorBuilder};
|
||||
|
||||
/// Glob represents a single glob in a gitignore file.
|
||||
///
|
||||
/// This is used to report information about the highest precedent glob that
|
||||
/// matched in one or more gitignore files.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Glob {
|
||||
/// The file path that this glob was extracted from.
|
||||
from: Option<PathBuf>,
|
||||
/// The original glob string.
|
||||
original: String,
|
||||
/// The actual glob string used to convert to a regex.
|
||||
actual: String,
|
||||
/// Whether this is a whitelisted glob or not.
|
||||
is_whitelist: bool,
|
||||
/// Whether this glob should only match directories or not.
|
||||
is_only_dir: bool,
|
||||
}
|
||||
|
||||
impl Glob {
|
||||
/// Returns the file path that defined this glob.
|
||||
pub fn from(&self) -> Option<&Path> {
|
||||
self.from.as_ref().map(|p| &**p)
|
||||
}
|
||||
|
||||
/// The original glob as it was defined in a gitignore file.
|
||||
pub fn original(&self) -> &str {
|
||||
&self.original
|
||||
}
|
||||
|
||||
/// The actual glob that was compiled to respect gitignore
|
||||
/// semantics.
|
||||
pub fn actual(&self) -> &str {
|
||||
&self.actual
|
||||
}
|
||||
|
||||
/// Whether this was a whitelisted glob or not.
|
||||
pub fn is_whitelist(&self) -> bool {
|
||||
self.is_whitelist
|
||||
}
|
||||
|
||||
/// Whether this glob must match a directory or not.
|
||||
pub fn is_only_dir(&self) -> bool {
|
||||
self.is_only_dir
|
||||
}
|
||||
}
|
||||
|
||||
/// Gitignore is a matcher for the globs in one or more gitignore files
|
||||
/// in the same directory.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Gitignore {
|
||||
set: GlobSet,
|
||||
root: PathBuf,
|
||||
globs: Vec<Glob>,
|
||||
num_ignores: u64,
|
||||
num_whitelists: u64,
|
||||
matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
|
||||
}
|
||||
|
||||
impl Gitignore {
|
||||
/// Creates a new gitignore matcher from the gitignore file path given.
|
||||
///
|
||||
/// If it's desirable to include multiple gitignore files in a single
|
||||
/// matcher, or read gitignore globs from a different source, then
|
||||
/// use `GitignoreBuilder`.
|
||||
///
|
||||
/// This always returns a valid matcher, even if it's empty. In particular,
|
||||
/// a Gitignore file can be partially valid, e.g., when one glob is invalid
|
||||
/// but the rest aren't.
|
||||
///
|
||||
/// Note that I/O errors are ignored. For more granular control over
|
||||
/// errors, use `GitignoreBuilder`.
|
||||
pub fn new<P: AsRef<Path>>(
|
||||
gitignore_path: P,
|
||||
) -> (Gitignore, Option<Error>) {
|
||||
let path = gitignore_path.as_ref();
|
||||
let parent = path.parent().unwrap_or(Path::new("/"));
|
||||
let mut builder = GitignoreBuilder::new(parent);
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
errs.maybe_push_ignore_io(builder.add(path));
|
||||
match builder.build() {
|
||||
Ok(gi) => (gi, errs.into_error_option()),
|
||||
Err(err) => {
|
||||
errs.push(err);
|
||||
(Gitignore::empty(), errs.into_error_option())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new gitignore matcher from the global ignore file, if one
|
||||
/// exists.
|
||||
///
|
||||
/// The global config file path is specified by git's `core.excludesFile`
|
||||
/// config option.
|
||||
///
|
||||
/// Git's config file location is `$HOME/.gitconfig`. If `$HOME/.gitconfig`
|
||||
/// does not exist or does not specify `core.excludesFile`, then
|
||||
/// `$XDG_CONFIG_HOME/git/ignore` is read. If `$XDG_CONFIG_HOME` is not
|
||||
/// set or is empty, then `$HOME/.config/git/ignore` is used instead.
|
||||
pub fn global() -> (Gitignore, Option<Error>) {
|
||||
match gitconfig_excludes_path() {
|
||||
None => (Gitignore::empty(), None),
|
||||
Some(path) => {
|
||||
if !path.is_file() {
|
||||
(Gitignore::empty(), None)
|
||||
} else {
|
||||
Gitignore::new(path)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new empty gitignore matcher that never matches anything.
|
||||
///
|
||||
/// Its path is empty.
|
||||
pub fn empty() -> Gitignore {
|
||||
GitignoreBuilder::new("").build().unwrap()
|
||||
}
|
||||
|
||||
/// Returns the directory containing this gitignore matcher.
|
||||
///
|
||||
/// All matches are done relative to this path.
|
||||
pub fn path(&self) -> &Path {
|
||||
&*self.root
|
||||
}
|
||||
|
||||
/// Returns true if and only if this gitignore has zero globs, and
|
||||
/// therefore never matches any file path.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.set.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the total number of globs, which should be equivalent to
|
||||
/// `num_ignores + num_whitelists`.
|
||||
pub fn len(&self) -> usize {
|
||||
self.set.len()
|
||||
}
|
||||
|
||||
/// Returns the total number of ignore globs.
|
||||
pub fn num_ignores(&self) -> u64 {
|
||||
self.num_ignores
|
||||
}
|
||||
|
||||
/// Returns the total number of whitelisted globs.
|
||||
pub fn num_whitelists(&self) -> u64 {
|
||||
self.num_whitelists
|
||||
}
|
||||
|
||||
/// Returns whether the given file path matched a pattern in this gitignore
|
||||
/// matcher.
|
||||
///
|
||||
/// `is_dir` should be true if the path refers to a directory and false
|
||||
/// otherwise.
|
||||
///
|
||||
/// The given path is matched relative to the path given when building
|
||||
/// the matcher. Specifically, before matching `path`, its prefix (as
|
||||
/// determined by a common suffix of the directory containing this
|
||||
/// gitignore) is stripped. If there is no common suffix/prefix overlap,
|
||||
/// then `path` is assumed to be relative to this matcher.
|
||||
pub fn matched<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<&Glob> {
|
||||
if self.is_empty() {
|
||||
return Match::None;
|
||||
}
|
||||
self.matched_stripped(self.strip(path.as_ref()), is_dir)
|
||||
}
|
||||
|
||||
/// Like matched, but takes a path that has already been stripped.
|
||||
fn matched_stripped<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<&Glob> {
|
||||
if self.is_empty() {
|
||||
return Match::None;
|
||||
}
|
||||
let path = path.as_ref();
|
||||
let _matches = self.matches.get_default();
|
||||
let mut matches = _matches.borrow_mut();
|
||||
let candidate = Candidate::new(path);
|
||||
self.set.matches_candidate_into(&candidate, &mut *matches);
|
||||
for &i in matches.iter().rev() {
|
||||
let glob = &self.globs[i];
|
||||
if !glob.is_only_dir() || is_dir {
|
||||
return if glob.is_whitelist() {
|
||||
Match::Whitelist(glob)
|
||||
} else {
|
||||
Match::Ignore(glob)
|
||||
};
|
||||
}
|
||||
}
|
||||
Match::None
|
||||
}
|
||||
|
||||
/// Strips the given path such that it's suitable for matching with this
|
||||
/// gitignore matcher.
|
||||
fn strip<'a, P: 'a + AsRef<Path> + ?Sized>(
|
||||
&'a self,
|
||||
path: &'a P,
|
||||
) -> &'a Path {
|
||||
let mut path = path.as_ref();
|
||||
// A leading ./ is completely superfluous. We also strip it from
|
||||
// our gitignore root path, so we need to strip it from our candidate
|
||||
// path too.
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
// Strip any common prefix between the candidate path and the root
|
||||
// of the gitignore, to make sure we get relative matching right.
|
||||
// BUT, a file name might not have any directory components to it,
|
||||
// in which case, we don't want to accidentally strip any part of the
|
||||
// file name.
|
||||
if !is_file_name(path) {
|
||||
if let Some(p) = strip_prefix(&self.root, path) {
|
||||
path = p;
|
||||
// If we're left with a leading slash, get rid of it.
|
||||
if let Some(p) = strip_prefix("/", path) {
|
||||
path = p;
|
||||
}
|
||||
}
|
||||
}
|
||||
path
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a matcher for a single set of globs from a .gitignore file.
|
||||
pub struct GitignoreBuilder {
|
||||
builder: GlobSetBuilder,
|
||||
root: PathBuf,
|
||||
globs: Vec<Glob>,
|
||||
}
|
||||
|
||||
impl GitignoreBuilder {
|
||||
/// Create a new builder for a gitignore file.
|
||||
///
|
||||
/// The path given should be the path at which the globs for this gitignore
|
||||
/// file should be matched. Note that paths are always matched relative
|
||||
/// to the root path given here. Generally, the root path should correspond
|
||||
/// to the *directory* containing a `.gitignore` file.
|
||||
pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
|
||||
let root = root.as_ref();
|
||||
GitignoreBuilder {
|
||||
builder: GlobSetBuilder::new(),
|
||||
root: strip_prefix("./", root).unwrap_or(root).to_path_buf(),
|
||||
globs: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a new matcher from the globs added so far.
|
||||
///
|
||||
/// Once a matcher is built, no new globs can be added to it.
|
||||
pub fn build(&self) -> Result<Gitignore, Error> {
|
||||
let nignore = self.globs.iter().filter(|g| !g.is_whitelist()).count();
|
||||
let nwhite = self.globs.iter().filter(|g| g.is_whitelist()).count();
|
||||
let set = try!(
|
||||
self.builder.build().map_err(|err| Error::Glob(err.to_string())));
|
||||
Ok(Gitignore {
|
||||
set: set,
|
||||
root: self.root.clone(),
|
||||
globs: self.globs.clone(),
|
||||
num_ignores: nignore as u64,
|
||||
num_whitelists: nwhite as u64,
|
||||
matches: Arc::new(ThreadLocal::default()),
|
||||
})
|
||||
}
|
||||
|
||||
/// Add each glob from the file path given.
|
||||
///
|
||||
/// The file given should be formatted as a `gitignore` file.
|
||||
///
|
||||
/// Note that partial errors can be returned. For example, if there was
|
||||
/// a problem adding one glob, an error for that will be returned, but
|
||||
/// all other valid globs will still be added.
|
||||
pub fn add<P: AsRef<Path>>(&mut self, path: P) -> Option<Error> {
|
||||
let path = path.as_ref();
|
||||
let file = match File::open(path) {
|
||||
Err(err) => return Some(Error::Io(err).with_path(path)),
|
||||
Ok(file) => file,
|
||||
};
|
||||
let rdr = io::BufReader::new(file);
|
||||
let mut errs = PartialErrorBuilder::default();
|
||||
for (i, line) in rdr.lines().enumerate() {
|
||||
let lineno = (i + 1) as u64;
|
||||
let line = match line {
|
||||
Ok(line) => line,
|
||||
Err(err) => {
|
||||
errs.push(Error::Io(err).tagged(path, lineno));
|
||||
continue;
|
||||
}
|
||||
};
|
||||
if let Err(err) = self.add_line(Some(path.to_path_buf()), &line) {
|
||||
errs.push(err.tagged(path, lineno));
|
||||
}
|
||||
}
|
||||
errs.into_error_option()
|
||||
}
|
||||
|
||||
/// Add each glob line from the string given.
|
||||
///
|
||||
/// If this string came from a particular `gitignore` file, then its path
|
||||
/// should be provided here.
|
||||
///
|
||||
/// The string given should be formatted as a `gitignore` file.
|
||||
#[cfg(test)]
|
||||
fn add_str(
|
||||
&mut self,
|
||||
from: Option<PathBuf>,
|
||||
gitignore: &str,
|
||||
) -> Result<&mut GitignoreBuilder, Error> {
|
||||
for line in gitignore.lines() {
|
||||
try!(self.add_line(from.clone(), line));
|
||||
}
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Add a line from a gitignore file to this builder.
|
||||
///
|
||||
/// If this line came from a particular `gitignore` file, then its path
|
||||
/// should be provided here.
|
||||
///
|
||||
/// If the line could not be parsed as a glob, then an error is returned.
|
||||
pub fn add_line(
|
||||
&mut self,
|
||||
from: Option<PathBuf>,
|
||||
mut line: &str,
|
||||
) -> Result<&mut GitignoreBuilder, Error> {
|
||||
if line.starts_with("#") {
|
||||
return Ok(self);
|
||||
}
|
||||
if !line.ends_with("\\ ") {
|
||||
line = line.trim_right();
|
||||
}
|
||||
if line.is_empty() {
|
||||
return Ok(self);
|
||||
}
|
||||
let mut glob = Glob {
|
||||
from: from,
|
||||
original: line.to_string(),
|
||||
actual: String::new(),
|
||||
is_whitelist: false,
|
||||
is_only_dir: false,
|
||||
};
|
||||
let mut literal_separator = false;
|
||||
let has_slash = line.chars().any(|c| c == '/');
|
||||
let is_absolute = line.chars().nth(0).unwrap() == '/';
|
||||
if line.starts_with("\\!") || line.starts_with("\\#") {
|
||||
line = &line[1..];
|
||||
} else {
|
||||
if line.starts_with("!") {
|
||||
glob.is_whitelist = true;
|
||||
line = &line[1..];
|
||||
}
|
||||
if line.starts_with("/") {
|
||||
// `man gitignore` says that if a glob starts with a slash,
|
||||
// then the glob can only match the beginning of a path
|
||||
// (relative to the location of gitignore). We achieve this by
|
||||
// simply banning wildcards from matching /.
|
||||
literal_separator = true;
|
||||
line = &line[1..];
|
||||
}
|
||||
}
|
||||
// If it ends with a slash, then this should only match directories,
|
||||
// but the slash should otherwise not be used while globbing.
|
||||
if let Some((i, c)) = line.char_indices().rev().nth(0) {
|
||||
if c == '/' {
|
||||
glob.is_only_dir = true;
|
||||
line = &line[..i];
|
||||
}
|
||||
}
|
||||
// If there is a literal slash, then we note that so that globbing
|
||||
// doesn't let wildcards match slashes.
|
||||
glob.actual = line.to_string();
|
||||
if has_slash {
|
||||
literal_separator = true;
|
||||
}
|
||||
// If there was a leading slash, then this is a glob that must
|
||||
// match the entire path name. Otherwise, we should let it match
|
||||
// anywhere, so use a **/ prefix.
|
||||
if !is_absolute {
|
||||
// ... but only if we don't already have a **/ prefix.
|
||||
if !glob.actual.starts_with("**/") {
|
||||
glob.actual = format!("**/{}", glob.actual);
|
||||
}
|
||||
}
|
||||
// If the glob ends with `/**`, then we should only match everything
|
||||
// inside a directory, but not the directory itself. Standard globs
|
||||
// will match the directory. So we add `/*` to force the issue.
|
||||
if glob.actual.ends_with("/**") {
|
||||
glob.actual = format!("{}/*", glob.actual);
|
||||
}
|
||||
let parsed = try!(
|
||||
GlobBuilder::new(&glob.actual)
|
||||
.literal_separator(literal_separator)
|
||||
.build()
|
||||
.map_err(|err| Error::Glob(err.to_string())));
|
||||
self.builder.add(parsed);
|
||||
self.globs.push(glob);
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the file path of the current environment's global gitignore file.
|
||||
///
|
||||
/// Note that the file path returned may not exist.
|
||||
fn gitconfig_excludes_path() -> Option<PathBuf> {
|
||||
gitconfig_contents()
|
||||
.and_then(|data| parse_excludes_file(&data))
|
||||
.or_else(excludes_file_default)
|
||||
}
|
||||
|
||||
/// Returns the file contents of git's global config file, if one exists.
|
||||
fn gitconfig_contents() -> Option<Vec<u8>> {
|
||||
let home = match env::var_os("HOME") {
|
||||
None => return None,
|
||||
Some(home) => PathBuf::from(home),
|
||||
};
|
||||
let mut file = match File::open(home.join(".gitconfig")) {
|
||||
Err(_) => return None,
|
||||
Ok(file) => io::BufReader::new(file),
|
||||
};
|
||||
let mut contents = vec![];
|
||||
file.read_to_end(&mut contents).ok().map(|_| contents)
|
||||
}
|
||||
|
||||
/// Returns the default file path for a global .gitignore file.
|
||||
///
|
||||
/// Specifically, this respects XDG_CONFIG_HOME.
|
||||
fn excludes_file_default() -> Option<PathBuf> {
|
||||
env::var_os("XDG_CONFIG_HOME")
|
||||
.and_then(|x| if x.is_empty() { None } else { Some(x) })
|
||||
.or_else(|| env::var_os("HOME"))
|
||||
.map(|x| PathBuf::from(x).join("git/ignore"))
|
||||
}
|
||||
|
||||
/// Extract git's `core.excludesfile` config setting from the raw file contents
|
||||
/// given.
|
||||
fn parse_excludes_file(data: &[u8]) -> Option<PathBuf> {
|
||||
// N.B. This is the lazy approach, and isn't technically correct, but
|
||||
// probably works in more circumstances. I guess we would ideally have
|
||||
// a full INI parser. Yuck.
|
||||
lazy_static! {
|
||||
static ref RE: Regex = Regex::new(
|
||||
r"(?ium)^\s*excludesfile\s*=\s*(.+)\s*$").unwrap();
|
||||
};
|
||||
let caps = match RE.captures(data) {
|
||||
None => return None,
|
||||
Some(caps) => caps,
|
||||
};
|
||||
str::from_utf8(&caps[1]).ok().map(|s| PathBuf::from(expand_tilde(s)))
|
||||
}
|
||||
|
||||
/// Expands ~ in file paths to the value of $HOME.
|
||||
fn expand_tilde(path: &str) -> String {
|
||||
let home = match env::var("HOME") {
|
||||
Err(_) => return path.to_string(),
|
||||
Ok(home) => home,
|
||||
};
|
||||
path.replace("~", &home)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
use super::{Gitignore, GitignoreBuilder};
|
||||
|
||||
fn gi_from_str<P: AsRef<Path>>(root: P, s: &str) -> Gitignore {
|
||||
let mut builder = GitignoreBuilder::new(root);
|
||||
builder.add_str(None, s).unwrap();
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
macro_rules! ignored {
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr) => {
|
||||
ignored!($name, $root, $gi, $path, false);
|
||||
};
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let gi = gi_from_str($root, $gi);
|
||||
assert!(gi.matched($path, $is_dir).is_ignore());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! not_ignored {
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr) => {
|
||||
not_ignored!($name, $root, $gi, $path, false);
|
||||
};
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let gi = gi_from_str($root, $gi);
|
||||
assert!(!gi.matched($path, $is_dir).is_ignore());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const ROOT: &'static str = "/home/foobar/rust/rg";
|
||||
|
||||
ignored!(ig1, ROOT, "months", "months");
|
||||
ignored!(ig2, ROOT, "*.lock", "Cargo.lock");
|
||||
ignored!(ig3, ROOT, "*.rs", "src/main.rs");
|
||||
ignored!(ig4, ROOT, "src/*.rs", "src/main.rs");
|
||||
ignored!(ig5, ROOT, "/*.c", "cat-file.c");
|
||||
ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs");
|
||||
ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs");
|
||||
ignored!(ig8, ROOT, "foo/", "foo", true);
|
||||
ignored!(ig9, ROOT, "**/foo", "foo");
|
||||
ignored!(ig10, ROOT, "**/foo", "src/foo");
|
||||
ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar");
|
||||
ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz");
|
||||
ignored!(ig13, ROOT, "**/foo/bar", "foo/bar");
|
||||
ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar");
|
||||
ignored!(ig15, ROOT, "abc/**", "abc/x");
|
||||
ignored!(ig16, ROOT, "abc/**", "abc/x/y");
|
||||
ignored!(ig17, ROOT, "abc/**", "abc/x/y/z");
|
||||
ignored!(ig18, ROOT, "a/**/b", "a/b");
|
||||
ignored!(ig19, ROOT, "a/**/b", "a/x/b");
|
||||
ignored!(ig20, ROOT, "a/**/b", "a/x/y/b");
|
||||
ignored!(ig21, ROOT, r"\!xy", "!xy");
|
||||
ignored!(ig22, ROOT, r"\#foo", "#foo");
|
||||
ignored!(ig23, ROOT, "foo", "./foo");
|
||||
ignored!(ig24, ROOT, "target", "grep/target");
|
||||
ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
|
||||
ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz");
|
||||
ignored!(ig27, ROOT, "foo/", "xyz/foo", true);
|
||||
ignored!(ig28, ROOT, "src/*.rs", "src/grep/src/main.rs");
|
||||
ignored!(ig29, "./src", "/llvm/", "./src/llvm", true);
|
||||
ignored!(ig30, ROOT, "node_modules/ ", "node_modules", true);
|
||||
|
||||
not_ignored!(ignot1, ROOT, "amonths", "months");
|
||||
not_ignored!(ignot2, ROOT, "monthsa", "months");
|
||||
not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs");
|
||||
not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c");
|
||||
not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs");
|
||||
not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs");
|
||||
not_ignored!(ignot7, ROOT, "foo/", "foo", false);
|
||||
not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz");
|
||||
not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz");
|
||||
not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar");
|
||||
not_ignored!(ignot11, ROOT, "#foo", "#foo");
|
||||
not_ignored!(ignot12, ROOT, "\n\n\n", "foo");
|
||||
not_ignored!(ignot13, ROOT, "foo/**", "foo", true);
|
||||
not_ignored!(
|
||||
ignot14, "./third_party/protobuf", "m4/ltoptions.m4",
|
||||
"./third_party/protobuf/csharp/src/packages/repositories.config");
|
||||
|
||||
fn bytes(s: &str) -> Vec<u8> {
|
||||
s.to_string().into_bytes()
|
||||
}
|
||||
|
||||
fn path_string<P: AsRef<Path>>(path: P) -> String {
|
||||
path.as_ref().to_str().unwrap().to_string()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_excludes_file1() {
|
||||
let data = bytes("[core]\nexcludesFile = /foo/bar");
|
||||
let got = super::parse_excludes_file(&data).unwrap();
|
||||
assert_eq!(path_string(got), "/foo/bar");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_excludes_file2() {
|
||||
let data = bytes("[core]\nexcludesFile = ~/foo/bar");
|
||||
let got = super::parse_excludes_file(&data).unwrap();
|
||||
assert_eq!(path_string(got), super::expand_tilde("~/foo/bar"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_excludes_file3() {
|
||||
let data = bytes("[core]\nexcludeFile = /foo/bar");
|
||||
assert!(super::parse_excludes_file(&data).is_none());
|
||||
}
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/106
|
||||
#[test]
|
||||
fn regression_106() {
|
||||
gi_from_str("/", " ");
|
||||
}
|
||||
}
|
360
ignore/src/lib.rs
Normal file
360
ignore/src/lib.rs
Normal file
@@ -0,0 +1,360 @@
|
||||
/*!
|
||||
The ignore crate provides a fast recursive directory iterator that respects
|
||||
various filters such as globs, file types and `.gitignore` files. The precise
|
||||
matching rules and precedence is explained in the documentation for
|
||||
`WalkBuilder`.
|
||||
|
||||
Secondarily, this crate exposes gitignore and file type matchers for use cases
|
||||
that demand more fine-grained control.
|
||||
|
||||
# Example
|
||||
|
||||
This example shows the most basic usage of this crate. This code will
|
||||
recursively traverse the current directory while automatically filtering out
|
||||
files and directories according to ignore globs found in files like
|
||||
`.ignore` and `.gitignore`:
|
||||
|
||||
|
||||
```rust,no_run
|
||||
use ignore::Walk;
|
||||
|
||||
for result in Walk::new("./") {
|
||||
// Each item yielded by the iterator is either a directory entry or an
|
||||
// error, so either print the path or the error.
|
||||
match result {
|
||||
Ok(entry) => println!("{}", entry.path().display()),
|
||||
Err(err) => println!("ERROR: {}", err),
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
# Example: advanced
|
||||
|
||||
By default, the recursive directory iterator will ignore hidden files and
|
||||
directories. This can be disabled by building the iterator with `WalkBuilder`:
|
||||
|
||||
```rust,no_run
|
||||
use ignore::WalkBuilder;
|
||||
|
||||
for result in WalkBuilder::new("./").hidden(false).build() {
|
||||
println!("{:?}", result);
|
||||
}
|
||||
```
|
||||
|
||||
See the documentation for `WalkBuilder` for many other options.
|
||||
*/
|
||||
|
||||
extern crate crossbeam;
|
||||
extern crate globset;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate regex;
|
||||
#[cfg(test)]
|
||||
extern crate tempdir;
|
||||
extern crate thread_local;
|
||||
extern crate walkdir;
|
||||
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
pub use walk::{DirEntry, Walk, WalkBuilder, WalkParallel, WalkState};
|
||||
|
||||
mod dir;
|
||||
pub mod gitignore;
|
||||
mod pathutil;
|
||||
pub mod overrides;
|
||||
pub mod types;
|
||||
mod walk;
|
||||
|
||||
/// Represents an error that can occur when parsing a gitignore file.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// A collection of "soft" errors. These occur when adding an ignore
|
||||
/// file partially succeeded.
|
||||
Partial(Vec<Error>),
|
||||
/// An error associated with a specific line number.
|
||||
WithLineNumber { line: u64, err: Box<Error> },
|
||||
/// An error associated with a particular file path.
|
||||
WithPath { path: PathBuf, err: Box<Error> },
|
||||
/// An error associated with a particular directory depth when recursively
|
||||
/// walking a directory.
|
||||
WithDepth { depth: usize, err: Box<Error> },
|
||||
/// An error that occurs when a file loop is detected when traversing
|
||||
/// symbolic links.
|
||||
Loop { ancestor: PathBuf, child: PathBuf },
|
||||
/// An error that occurs when doing I/O, such as reading an ignore file.
|
||||
Io(io::Error),
|
||||
/// An error that occurs when trying to parse a glob.
|
||||
Glob(String),
|
||||
/// A type selection for a file type that is not defined.
|
||||
UnrecognizedFileType(String),
|
||||
/// A user specified file type definition could not be parsed.
|
||||
InvalidDefinition,
|
||||
}
|
||||
|
||||
impl Error {
|
||||
/// Returns true if this is a partial error.
|
||||
///
|
||||
/// A partial error occurs when only some operations failed while others
|
||||
/// may have succeeded. For example, an ignore file may contain an invalid
|
||||
/// glob among otherwise valid globs.
|
||||
pub fn is_partial(&self) -> bool {
|
||||
match *self {
|
||||
Error::Partial(_) => true,
|
||||
Error::WithLineNumber { ref err, .. } => err.is_partial(),
|
||||
Error::WithPath { ref err, .. } => err.is_partial(),
|
||||
Error::WithDepth { ref err, .. } => err.is_partial(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this error is exclusively an I/O error.
|
||||
pub fn is_io(&self) -> bool {
|
||||
match *self {
|
||||
Error::Partial(ref errs) => errs.len() == 1 && errs[0].is_io(),
|
||||
Error::WithLineNumber { ref err, .. } => err.is_io(),
|
||||
Error::WithPath { ref err, .. } => err.is_io(),
|
||||
Error::WithDepth { ref err, .. } => err.is_io(),
|
||||
Error::Loop { .. } => false,
|
||||
Error::Io(_) => true,
|
||||
Error::Glob(_) => false,
|
||||
Error::UnrecognizedFileType(_) => false,
|
||||
Error::InvalidDefinition => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a depth associated with recursively walking a directory (if
|
||||
/// this error was generated from a recursive directory iterator).
|
||||
pub fn depth(&self) -> Option<usize> {
|
||||
match *self {
|
||||
Error::WithPath { ref err, .. } => err.depth(),
|
||||
Error::WithDepth { depth, .. } => Some(depth),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Turn an error into a tagged error with the given file path.
|
||||
fn with_path<P: AsRef<Path>>(self, path: P) -> Error {
|
||||
Error::WithPath {
|
||||
path: path.as_ref().to_path_buf(),
|
||||
err: Box::new(self),
|
||||
}
|
||||
}
|
||||
|
||||
/// Turn an error into a tagged error with the given depth.
|
||||
fn with_depth(self, depth: usize) -> Error {
|
||||
Error::WithDepth {
|
||||
depth: depth,
|
||||
err: Box::new(self),
|
||||
}
|
||||
}
|
||||
|
||||
/// Turn an error into a tagged error with the given file path and line
|
||||
/// number. If path is empty, then it is omitted from the error.
|
||||
fn tagged<P: AsRef<Path>>(self, path: P, lineno: u64) -> Error {
|
||||
let errline = Error::WithLineNumber {
|
||||
line: lineno,
|
||||
err: Box::new(self),
|
||||
};
|
||||
if path.as_ref().as_os_str().is_empty() {
|
||||
return errline;
|
||||
}
|
||||
errline.with_path(path)
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for Error {
|
||||
fn description(&self) -> &str {
|
||||
match *self {
|
||||
Error::Partial(_) => "partial error",
|
||||
Error::WithLineNumber { ref err, .. } => err.description(),
|
||||
Error::WithPath { ref err, .. } => err.description(),
|
||||
Error::WithDepth { ref err, .. } => err.description(),
|
||||
Error::Loop { .. } => "file system loop found",
|
||||
Error::Io(ref err) => err.description(),
|
||||
Error::Glob(ref msg) => msg,
|
||||
Error::UnrecognizedFileType(_) => "unrecognized file type",
|
||||
Error::InvalidDefinition => "invalid definition",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Error::Partial(ref errs) => {
|
||||
let msgs: Vec<String> =
|
||||
errs.iter().map(|err| err.to_string()).collect();
|
||||
write!(f, "{}", msgs.join("\n"))
|
||||
}
|
||||
Error::WithLineNumber { line, ref err } => {
|
||||
write!(f, "line {}: {}", line, err)
|
||||
}
|
||||
Error::WithPath { ref path, ref err } => {
|
||||
write!(f, "{}: {}", path.display(), err)
|
||||
}
|
||||
Error::WithDepth { ref err, .. } => err.fmt(f),
|
||||
Error::Loop { ref ancestor, ref child } => {
|
||||
write!(f, "File system loop found: \
|
||||
{} points to an ancestor {}",
|
||||
child.display(), ancestor.display())
|
||||
}
|
||||
Error::Io(ref err) => err.fmt(f),
|
||||
Error::Glob(ref msg) => write!(f, "{}", msg),
|
||||
Error::UnrecognizedFileType(ref ty) => {
|
||||
write!(f, "unrecognized file type: {}", ty)
|
||||
}
|
||||
Error::InvalidDefinition => {
|
||||
write!(f, "invalid definition (format is type:glob, e.g., \
|
||||
html:*.html)")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
fn from(err: io::Error) -> Error {
|
||||
Error::Io(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<walkdir::Error> for Error {
|
||||
fn from(err: walkdir::Error) -> Error {
|
||||
let depth = err.depth();
|
||||
if let (Some(anc), Some(child)) = (err.loop_ancestor(), err.path()) {
|
||||
return Error::WithDepth {
|
||||
depth: depth,
|
||||
err: Box::new(Error::Loop {
|
||||
ancestor: anc.to_path_buf(),
|
||||
child: child.to_path_buf(),
|
||||
}),
|
||||
};
|
||||
}
|
||||
let path = err.path().map(|p| p.to_path_buf());
|
||||
let mut ig_err = Error::Io(io::Error::from(err));
|
||||
if let Some(path) = path {
|
||||
ig_err = Error::WithPath {
|
||||
path: path,
|
||||
err: Box::new(ig_err),
|
||||
};
|
||||
}
|
||||
ig_err
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
struct PartialErrorBuilder(Vec<Error>);
|
||||
|
||||
impl PartialErrorBuilder {
|
||||
fn push(&mut self, err: Error) {
|
||||
self.0.push(err);
|
||||
}
|
||||
|
||||
fn push_ignore_io(&mut self, err: Error) {
|
||||
if !err.is_io() {
|
||||
self.push(err);
|
||||
}
|
||||
}
|
||||
|
||||
fn maybe_push(&mut self, err: Option<Error>) {
|
||||
if let Some(err) = err {
|
||||
self.push(err);
|
||||
}
|
||||
}
|
||||
|
||||
fn maybe_push_ignore_io(&mut self, err: Option<Error>) {
|
||||
if let Some(err) = err {
|
||||
self.push_ignore_io(err);
|
||||
}
|
||||
}
|
||||
|
||||
fn into_error_option(mut self) -> Option<Error> {
|
||||
if self.0.is_empty() {
|
||||
None
|
||||
} else if self.0.len() == 1 {
|
||||
Some(self.0.pop().unwrap())
|
||||
} else {
|
||||
Some(Error::Partial(self.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The result of a glob match.
|
||||
///
|
||||
/// The type parameter `T` typically refers to a type that provides more
|
||||
/// information about a particular match. For example, it might identify
|
||||
/// the specific gitignore file and the specific glob pattern that caused
|
||||
/// the match.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Match<T> {
|
||||
/// The path didn't match any glob.
|
||||
None,
|
||||
/// The highest precedent glob matched indicates the path should be
|
||||
/// ignored.
|
||||
Ignore(T),
|
||||
/// The highest precedent glob matched indicates the path should be
|
||||
/// whitelisted.
|
||||
Whitelist(T),
|
||||
}
|
||||
|
||||
impl<T> Match<T> {
|
||||
/// Returns true if the match result didn't match any globs.
|
||||
pub fn is_none(&self) -> bool {
|
||||
match *self {
|
||||
Match::None => true,
|
||||
Match::Ignore(_) | Match::Whitelist(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the match result implies the path should be ignored.
|
||||
pub fn is_ignore(&self) -> bool {
|
||||
match *self {
|
||||
Match::Ignore(_) => true,
|
||||
Match::None | Match::Whitelist(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the match result implies the path should be
|
||||
/// whitelisted.
|
||||
pub fn is_whitelist(&self) -> bool {
|
||||
match *self {
|
||||
Match::Whitelist(_) => true,
|
||||
Match::None | Match::Ignore(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Inverts the match so that `Ignore` becomes `Whitelist` and
|
||||
/// `Whitelist` becomes `Ignore`. A non-match remains the same.
|
||||
pub fn invert(self) -> Match<T> {
|
||||
match self {
|
||||
Match::None => Match::None,
|
||||
Match::Ignore(t) => Match::Whitelist(t),
|
||||
Match::Whitelist(t) => Match::Ignore(t),
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the value inside this match if it exists.
|
||||
pub fn inner(&self) -> Option<&T> {
|
||||
match *self {
|
||||
Match::None => None,
|
||||
Match::Ignore(ref t) => Some(t),
|
||||
Match::Whitelist(ref t) => Some(t),
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply the given function to the value inside this match.
|
||||
///
|
||||
/// If the match has no value, then return the match unchanged.
|
||||
pub fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Match<U> {
|
||||
match self {
|
||||
Match::None => Match::None,
|
||||
Match::Ignore(t) => Match::Ignore(f(t)),
|
||||
Match::Whitelist(t) => Match::Whitelist(f(t)),
|
||||
}
|
||||
}
|
||||
}
|
217
ignore/src/overrides.rs
Normal file
217
ignore/src/overrides.rs
Normal file
@@ -0,0 +1,217 @@
|
||||
/*!
|
||||
The overrides module provides a way to specify a set of override globs.
|
||||
This provides functionality similar to `--include` or `--exclude` in command
|
||||
line tools.
|
||||
*/
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use {Error, Match};
|
||||
|
||||
/// Glob represents a single glob in an override matcher.
|
||||
///
|
||||
/// This is used to report information about the highest precedent glob
|
||||
/// that matched.
|
||||
///
|
||||
/// Note that not all matches necessarily correspond to a specific glob. For
|
||||
/// example, if there are one or more whitelist globs and a file path doesn't
|
||||
/// match any glob in the set, then the file path is considered to be ignored.
|
||||
///
|
||||
/// The lifetime `'a` refers to the lifetime of the matcher that produced
|
||||
/// this glob.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Glob<'a>(GlobInner<'a>);
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum GlobInner<'a> {
|
||||
/// No glob matched, but the file path should still be ignored.
|
||||
UnmatchedIgnore,
|
||||
/// A glob matched.
|
||||
Matched(&'a gitignore::Glob),
|
||||
}
|
||||
|
||||
impl<'a> Glob<'a> {
|
||||
fn unmatched() -> Glob<'a> {
|
||||
Glob(GlobInner::UnmatchedIgnore)
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages a set of overrides provided explicitly by the end user.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Override(Gitignore);
|
||||
|
||||
impl Override {
|
||||
/// Returns an empty matcher that never matches any file path.
|
||||
pub fn empty() -> Override {
|
||||
Override(Gitignore::empty())
|
||||
}
|
||||
|
||||
/// Returns the directory of this override set.
|
||||
///
|
||||
/// All matches are done relative to this path.
|
||||
pub fn path(&self) -> &Path {
|
||||
self.0.path()
|
||||
}
|
||||
|
||||
/// Returns true if and only if this matcher is empty.
|
||||
///
|
||||
/// When a matcher is empty, it will never match any file path.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.0.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the total number of ignore globs.
|
||||
pub fn num_ignores(&self) -> u64 {
|
||||
self.0.num_whitelists()
|
||||
}
|
||||
|
||||
/// Returns the total number of whitelisted globs.
|
||||
pub fn num_whitelists(&self) -> u64 {
|
||||
self.0.num_ignores()
|
||||
}
|
||||
|
||||
/// Returns whether the given file path matched a pattern in this override
|
||||
/// matcher.
|
||||
///
|
||||
/// `is_dir` should be true if the path refers to a directory and false
|
||||
/// otherwise.
|
||||
///
|
||||
/// If there are no overrides, then this always returns `Match::None`.
|
||||
///
|
||||
/// If there is at least one whitelist override and `is_dir` is false, then
|
||||
/// this never returns `Match::None`, since non-matches are interpreted as
|
||||
/// ignored.
|
||||
///
|
||||
/// The given path is matched to the globs relative to the path given
|
||||
/// when building the override matcher. Specifically, before matching
|
||||
/// `path`, its prefix (as determined by a common suffix of the directory
|
||||
/// given) is stripped. If there is no common suffix/prefix overlap, then
|
||||
/// `path` is assumed to reside in the same directory as the root path for
|
||||
/// this set of overrides.
|
||||
pub fn matched<'a, P: AsRef<Path>>(
|
||||
&'a self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<Glob<'a>> {
|
||||
if self.is_empty() {
|
||||
return Match::None;
|
||||
}
|
||||
let mat = self.0.matched(path, is_dir).invert();
|
||||
if mat.is_none() && self.num_whitelists() > 0 && !is_dir {
|
||||
return Match::Ignore(Glob::unmatched());
|
||||
}
|
||||
mat.map(move |giglob| Glob(GlobInner::Matched(giglob)))
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a matcher for a set of glob overrides.
|
||||
pub struct OverrideBuilder {
|
||||
builder: GitignoreBuilder,
|
||||
}
|
||||
|
||||
impl OverrideBuilder {
|
||||
/// Create a new override builder.
|
||||
///
|
||||
/// Matching is done relative to the directory path provided.
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> OverrideBuilder {
|
||||
OverrideBuilder {
|
||||
builder: GitignoreBuilder::new(path),
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a new override matcher from the globs added so far.
|
||||
///
|
||||
/// Once a matcher is built, no new globs can be added to it.
|
||||
pub fn build(&self) -> Result<Override, Error> {
|
||||
Ok(Override(try!(self.builder.build())))
|
||||
}
|
||||
|
||||
/// Add a glob to the set of overrides.
|
||||
///
|
||||
/// Globs provided here have precisely the same semantics as a single
|
||||
/// line in a `gitignore` file, where the meaning of `!` is inverted:
|
||||
/// namely, `!` at the beginning of a glob will ignore a file. Without `!`,
|
||||
/// all matches of the glob provided are treated as whitelist matches.
|
||||
pub fn add(&mut self, glob: &str) -> Result<&mut OverrideBuilder, Error> {
|
||||
try!(self.builder.add_line(None, glob));
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{Override, OverrideBuilder};
|
||||
|
||||
const ROOT: &'static str = "/home/andrew/foo";
|
||||
|
||||
fn ov(globs: &[&str]) -> Override {
|
||||
let mut builder = OverrideBuilder::new(ROOT);
|
||||
for glob in globs {
|
||||
builder.add(glob).unwrap();
|
||||
}
|
||||
builder.build().unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
let ov = ov(&[]);
|
||||
assert!(ov.matched("a.foo", false).is_none());
|
||||
assert!(ov.matched("a", false).is_none());
|
||||
assert!(ov.matched("", false).is_none());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple() {
|
||||
let ov = ov(&["*.foo", "!*.bar"]);
|
||||
assert!(ov.matched("a.foo", false).is_whitelist());
|
||||
assert!(ov.matched("a.foo", true).is_whitelist());
|
||||
assert!(ov.matched("a.rs", false).is_ignore());
|
||||
assert!(ov.matched("a.rs", true).is_none());
|
||||
assert!(ov.matched("a.bar", false).is_ignore());
|
||||
assert!(ov.matched("a.bar", true).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn only_ignores() {
|
||||
let ov = ov(&["!*.bar"]);
|
||||
assert!(ov.matched("a.rs", false).is_none());
|
||||
assert!(ov.matched("a.rs", true).is_none());
|
||||
assert!(ov.matched("a.bar", false).is_ignore());
|
||||
assert!(ov.matched("a.bar", true).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn precedence() {
|
||||
let ov = ov(&["*.foo", "!*.bar.foo"]);
|
||||
assert!(ov.matched("a.foo", false).is_whitelist());
|
||||
assert!(ov.matched("a.baz", false).is_ignore());
|
||||
assert!(ov.matched("a.bar.foo", false).is_ignore());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn gitignore() {
|
||||
let ov = ov(&["/foo", "bar/*.rs", "baz/**"]);
|
||||
assert!(ov.matched("bar/wat/lib.rs", false).is_ignore());
|
||||
assert!(ov.matched("wat/bar/lib.rs", false).is_whitelist());
|
||||
assert!(ov.matched("foo", false).is_whitelist());
|
||||
assert!(ov.matched("wat/foo", false).is_ignore());
|
||||
assert!(ov.matched("baz", false).is_ignore());
|
||||
assert!(ov.matched("baz/a", false).is_whitelist());
|
||||
assert!(ov.matched("baz/a/b", false).is_whitelist());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn allow_directories() {
|
||||
// This tests that directories are NOT ignored when they are unmatched.
|
||||
let ov = ov(&["*.rs"]);
|
||||
assert!(ov.matched("foo.rs", false).is_whitelist());
|
||||
assert!(ov.matched("foo.c", false).is_ignore());
|
||||
assert!(ov.matched("foo", false).is_ignore());
|
||||
assert!(ov.matched("foo", true).is_none());
|
||||
assert!(ov.matched("src/foo.rs", false).is_whitelist());
|
||||
assert!(ov.matched("src/foo.c", false).is_ignore());
|
||||
assert!(ov.matched("src/foo", false).is_ignore());
|
||||
assert!(ov.matched("src/foo", true).is_none());
|
||||
}
|
||||
}
|
108
ignore/src/pathutil.rs
Normal file
108
ignore/src/pathutil.rs
Normal file
@@ -0,0 +1,108 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
#[cfg(unix)]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
if let Some(name) = file_name(path.as_ref()) {
|
||||
name.as_bytes().get(0) == Some(&b'.')
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
#[cfg(not(unix))]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
if let Some(name) = file_name(path.as_ref()) {
|
||||
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip `prefix` from the `path` and return the remainder.
|
||||
///
|
||||
/// If `path` doesn't have a prefix `prefix`, then return `None`.
|
||||
#[cfg(unix)]
|
||||
pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
|
||||
prefix: &'a P,
|
||||
path: &'a Path,
|
||||
) -> Option<&'a Path> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
let prefix = prefix.as_ref().as_os_str().as_bytes();
|
||||
let path = path.as_os_str().as_bytes();
|
||||
if prefix.len() > path.len() || prefix != &path[0..prefix.len()] {
|
||||
None
|
||||
} else {
|
||||
Some(&Path::new(OsStr::from_bytes(&path[prefix.len()..])))
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip `prefix` from the `path` and return the remainder.
|
||||
///
|
||||
/// If `path` doesn't have a prefix `prefix`, then return `None`.
|
||||
#[cfg(not(unix))]
|
||||
pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
|
||||
prefix: &'a P,
|
||||
path: &'a Path,
|
||||
) -> Option<&'a Path> {
|
||||
path.strip_prefix(prefix).ok()
|
||||
}
|
||||
|
||||
/// Returns true if this file path is just a file name. i.e., Its parent is
|
||||
/// the empty string.
|
||||
#[cfg(unix)]
|
||||
pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use memchr::memchr;
|
||||
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
memchr(b'/', path).is_none()
|
||||
}
|
||||
|
||||
/// Returns true if this file path is just a file name. i.e., Its parent is
|
||||
/// the empty string.
|
||||
#[cfg(not(unix))]
|
||||
pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool {
|
||||
path.as_ref().parent().map(|p| p.as_os_str().is_empty()).unwrap_or(false)
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(unix)]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use memchr::memrchr;
|
||||
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
if path.is_empty() {
|
||||
return None;
|
||||
} else if path.len() == 1 && path[0] == b'.' {
|
||||
return None;
|
||||
} else if path.last() == Some(&b'.') {
|
||||
return None;
|
||||
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
|
||||
return None;
|
||||
}
|
||||
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
|
||||
Some(OsStr::from_bytes(&path[last_slash..]))
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(not(unix))]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
path.as_ref().file_name()
|
||||
}
|
581
ignore/src/types.rs
Normal file
581
ignore/src/types.rs
Normal file
@@ -0,0 +1,581 @@
|
||||
/*!
|
||||
The types module provides a way of associating globs on file names to file
|
||||
types.
|
||||
|
||||
This can be used to match specific types of files. For example, among
|
||||
the default file types provided, the Rust file type is defined to be `*.rs`
|
||||
with name `rust`. Similarly, the C file type is defined to be `*.{c,h}` with
|
||||
name `c`.
|
||||
|
||||
Note that the set of default types may change over time.
|
||||
|
||||
# Example
|
||||
|
||||
This shows how to create and use a simple file type matcher using the default
|
||||
file types defined in this crate.
|
||||
|
||||
```
|
||||
use ignore::types::TypesBuilder;
|
||||
|
||||
let mut builder = TypesBuilder::new();
|
||||
builder.add_defaults();
|
||||
builder.select("rust");
|
||||
let matcher = builder.build().unwrap();
|
||||
|
||||
assert!(matcher.matched("foo.rs", false).is_whitelist());
|
||||
assert!(matcher.matched("foo.c", false).is_ignore());
|
||||
```
|
||||
|
||||
# Example: negation
|
||||
|
||||
This is like the previous example, but shows how negating a file type works.
|
||||
That is, this will let us match file paths that *don't* correspond to a
|
||||
particular file type.
|
||||
|
||||
```
|
||||
use ignore::types::TypesBuilder;
|
||||
|
||||
let mut builder = TypesBuilder::new();
|
||||
builder.add_defaults();
|
||||
builder.negate("c");
|
||||
let matcher = builder.build().unwrap();
|
||||
|
||||
assert!(matcher.matched("foo.rs", false).is_none());
|
||||
assert!(matcher.matched("foo.c", false).is_ignore());
|
||||
```
|
||||
|
||||
# Example: custom file type definitions
|
||||
|
||||
This shows how to extend this library default file type definitions with
|
||||
your own.
|
||||
|
||||
```
|
||||
use ignore::types::TypesBuilder;
|
||||
|
||||
let mut builder = TypesBuilder::new();
|
||||
builder.add_defaults();
|
||||
builder.add("foo", "*.foo");
|
||||
// Another way of adding a file type definition.
|
||||
// This is useful when accepting input from an end user.
|
||||
builder.add_def("bar:*.bar");
|
||||
// Note: we only select `foo`, not `bar`.
|
||||
builder.select("foo");
|
||||
let matcher = builder.build().unwrap();
|
||||
|
||||
assert!(matcher.matched("x.foo", false).is_whitelist());
|
||||
// This is ignored because we only selected the `foo` file type.
|
||||
assert!(matcher.matched("x.bar", false).is_ignore());
|
||||
```
|
||||
*/
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashMap;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
|
||||
use thread_local::ThreadLocal;
|
||||
|
||||
use pathutil::file_name;
|
||||
use {Error, Match};
|
||||
|
||||
const DEFAULT_TYPES: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("agda", &["*.agda", "*.lagda"]),
|
||||
("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]),
|
||||
("asm", &["*.asm", "*.s", "*.S"]),
|
||||
("awk", &["*.awk"]),
|
||||
("c", &["*.c", "*.h", "*.H"]),
|
||||
("cbor", &["*.cbor"]),
|
||||
("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
|
||||
("cmake", &["*.cmake", "CMakeLists.txt"]),
|
||||
("coffeescript", &["*.coffee"]),
|
||||
("creole", &["*.creole"]),
|
||||
("config", &["*.config"]),
|
||||
("cpp", &[
|
||||
"*.C", "*.cc", "*.cpp", "*.cxx",
|
||||
"*.h", "*.H", "*.hh", "*.hpp",
|
||||
]),
|
||||
("cs", &["*.cs"]),
|
||||
("csharp", &["*.cs"]),
|
||||
("css", &["*.css"]),
|
||||
("cython", &["*.pyx"]),
|
||||
("dart", &["*.dart"]),
|
||||
("d", &["*.d"]),
|
||||
("elisp", &["*.el"]),
|
||||
("erlang", &["*.erl", "*.hrl"]),
|
||||
("fish", &["*.fish"]),
|
||||
("fortran", &[
|
||||
"*.f", "*.F", "*.f77", "*.F77", "*.pfo",
|
||||
"*.f90", "*.F90", "*.f95", "*.F95",
|
||||
]),
|
||||
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
|
||||
("go", &["*.go"]),
|
||||
("groovy", &["*.groovy", "*.gradle"]),
|
||||
("h", &["*.h", "*.hpp"]),
|
||||
("hbs", &["*.hbs"]),
|
||||
("haskell", &["*.hs", "*.lhs"]),
|
||||
("html", &["*.htm", "*.html"]),
|
||||
("java", &["*.java"]),
|
||||
("jinja", &["*.jinja", "*.jinja2"]),
|
||||
("js", &[
|
||||
"*.js", "*.jsx", "*.vue",
|
||||
]),
|
||||
("json", &["*.json"]),
|
||||
("jsonl", &["*.jsonl"]),
|
||||
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
||||
("lua", &["*.lua"]),
|
||||
("m4", &["*.ac", "*.m4"]),
|
||||
("make", &["gnumakefile", "Gnumakefile", "makefile", "Makefile", "*.mk", "*.mak"]),
|
||||
("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
|
||||
("md", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
|
||||
("matlab", &["*.m"]),
|
||||
("mk", &["mkfile"]),
|
||||
("ml", &["*.ml"]),
|
||||
("nim", &["*.nim"]),
|
||||
("objc", &["*.h", "*.m"]),
|
||||
("objcpp", &["*.h", "*.mm"]),
|
||||
("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
|
||||
("org", &["*.org"]),
|
||||
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]),
|
||||
("pdf", &["*.pdf"]),
|
||||
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
|
||||
("pod", &["*.pod"]),
|
||||
("py", &["*.py"]),
|
||||
("readme", &["README*", "*README"]),
|
||||
("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
|
||||
("rdoc", &["*.rdoc"]),
|
||||
("rst", &["*.rst"]),
|
||||
("ruby", &["*.rb"]),
|
||||
("rust", &["*.rs"]),
|
||||
("scala", &["*.scala"]),
|
||||
("sh", &["*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh"]),
|
||||
("spark", &["*.spark"]),
|
||||
("sql", &["*.sql"]),
|
||||
("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
|
||||
("swift", &["*.swift"]),
|
||||
("taskpaper", &["*.taskpaper"]),
|
||||
("tcl", &["*.tcl"]),
|
||||
("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib"]),
|
||||
("textile", &["*.textile"]),
|
||||
("ts", &["*.ts", "*.tsx"]),
|
||||
("txt", &["*.txt"]),
|
||||
("toml", &["*.toml", "Cargo.lock"]),
|
||||
("vala", &["*.vala"]),
|
||||
("vb", &["*.vb"]),
|
||||
("vimscript", &["*.vim"]),
|
||||
("wiki", &["*.mediawiki", "*.wiki"]),
|
||||
("xml", &["*.xml"]),
|
||||
("yacc", &["*.y"]),
|
||||
("yaml", &["*.yaml", "*.yml"]),
|
||||
];
|
||||
|
||||
/// Glob represents a single glob in a set of file type definitions.
|
||||
///
|
||||
/// There may be more than one glob for a particular file type.
|
||||
///
|
||||
/// This is used to report information about the highest precedent glob
|
||||
/// that matched.
|
||||
///
|
||||
/// Note that not all matches necessarily correspond to a specific glob.
|
||||
/// For example, if there are one or more selections and a file path doesn't
|
||||
/// match any of those selections, then the file path is considered to be
|
||||
/// ignored.
|
||||
///
|
||||
/// The lifetime `'a` refers to the lifetime of the underlying file type
|
||||
/// definition, which corresponds to the lifetime of the file type matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Glob<'a>(GlobInner<'a>);
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum GlobInner<'a> {
|
||||
/// No glob matched, but the file path should still be ignored.
|
||||
UnmatchedIgnore,
|
||||
/// A glob matched.
|
||||
Matched {
|
||||
/// The file type definition which provided the glob.
|
||||
def: &'a FileTypeDef,
|
||||
/// The index of the glob that matched inside the file type definition.
|
||||
which: usize,
|
||||
/// Whether the selection was negated or not.
|
||||
negated: bool,
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Glob<'a> {
|
||||
fn unmatched() -> Glob<'a> {
|
||||
Glob(GlobInner::UnmatchedIgnore)
|
||||
}
|
||||
}
|
||||
|
||||
/// A single file type definition.
|
||||
///
|
||||
/// File type definitions can be retrieved in aggregate from a file type
|
||||
/// matcher. File type definitions are also reported when its responsible
|
||||
/// for a match.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FileTypeDef {
|
||||
name: String,
|
||||
globs: Vec<String>,
|
||||
}
|
||||
|
||||
impl FileTypeDef {
|
||||
/// Return the name of this file type.
|
||||
pub fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
|
||||
/// Return the globs used to recognize this file type.
|
||||
pub fn globs(&self) -> &[String] {
|
||||
&self.globs
|
||||
}
|
||||
}
|
||||
|
||||
/// Types is a file type matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Types {
|
||||
/// All of the file type definitions, sorted lexicographically by name.
|
||||
defs: Vec<FileTypeDef>,
|
||||
/// All of the selections made by the user.
|
||||
selections: Vec<Selection<FileTypeDef>>,
|
||||
/// Whether there is at least one Selection::Select in our selections.
|
||||
/// When this is true, a Match::None is converted to Match::Ignore.
|
||||
has_selected: bool,
|
||||
/// A mapping from glob index in the set to two indices. The first is an
|
||||
/// index into `selections` and the second is an index into the
|
||||
/// corresponding file type definition's list of globs.
|
||||
glob_to_selection: Vec<(usize, usize)>,
|
||||
/// The set of all glob selections, used for actual matching.
|
||||
set: GlobSet,
|
||||
/// Temporary storage for globs that match.
|
||||
matches: Arc<ThreadLocal<RefCell<Vec<usize>>>>,
|
||||
}
|
||||
|
||||
/// Indicates the type of a selection for a particular file type.
|
||||
#[derive(Clone, Debug)]
|
||||
enum Selection<T> {
|
||||
Select(String, T),
|
||||
Negate(String, T),
|
||||
}
|
||||
|
||||
impl<T> Selection<T> {
|
||||
fn is_negated(&self) -> bool {
|
||||
match *self {
|
||||
Selection::Select(..) => false,
|
||||
Selection::Negate(..) => true,
|
||||
}
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
match *self {
|
||||
Selection::Select(ref name, _) => name,
|
||||
Selection::Negate(ref name, _) => name,
|
||||
}
|
||||
}
|
||||
|
||||
fn map<U, F: FnOnce(T) -> U>(self, f: F) -> Selection<U> {
|
||||
match self {
|
||||
Selection::Select(name, inner) => {
|
||||
Selection::Select(name, f(inner))
|
||||
}
|
||||
Selection::Negate(name, inner) => {
|
||||
Selection::Negate(name, f(inner))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn inner(&self) -> &T {
|
||||
match *self {
|
||||
Selection::Select(_, ref inner) => inner,
|
||||
Selection::Negate(_, ref inner) => inner,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Types {
|
||||
/// Creates a new file type matcher that never matches any path and
|
||||
/// contains no file type definitions.
|
||||
pub fn empty() -> Types {
|
||||
Types {
|
||||
defs: vec![],
|
||||
selections: vec![],
|
||||
has_selected: false,
|
||||
glob_to_selection: vec![],
|
||||
set: GlobSetBuilder::new().build().unwrap(),
|
||||
matches: Arc::new(ThreadLocal::default()),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if this matcher has zero selections.
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.selections.is_empty()
|
||||
}
|
||||
|
||||
/// Returns the number of selections used in this matcher.
|
||||
pub fn len(&self) -> usize {
|
||||
self.selections.len()
|
||||
}
|
||||
|
||||
/// Return the set of current file type definitions.
|
||||
///
|
||||
/// Definitions and globs are sorted.
|
||||
pub fn definitions(&self) -> &[FileTypeDef] {
|
||||
&self.defs
|
||||
}
|
||||
|
||||
/// Returns a match for the given path against this file type matcher.
|
||||
///
|
||||
/// The path is considered whitelisted if it matches a selected file type.
|
||||
/// The path is considered ignored if it matches a negated file type.
|
||||
/// If at least one file type is selected and `path` doesn't match, then
|
||||
/// the path is also considered ignored.
|
||||
pub fn matched<'a, P: AsRef<Path>>(
|
||||
&'a self,
|
||||
path: P,
|
||||
is_dir: bool,
|
||||
) -> Match<Glob<'a>> {
|
||||
// File types don't apply to directories, and we can't do anything
|
||||
// if our glob set is empty.
|
||||
if is_dir || self.set.is_empty() {
|
||||
return Match::None;
|
||||
}
|
||||
// We only want to match against the file name, so extract it.
|
||||
// If one doesn't exist, then we can't match it.
|
||||
let name = match file_name(path.as_ref()) {
|
||||
Some(name) => name,
|
||||
None if self.has_selected => {
|
||||
return Match::Ignore(Glob::unmatched());
|
||||
}
|
||||
None => {
|
||||
return Match::None;
|
||||
}
|
||||
};
|
||||
let mut matches = self.matches.get_default().borrow_mut();
|
||||
self.set.matches_into(name, &mut *matches);
|
||||
// The highest precedent match is the last one.
|
||||
if let Some(&i) = matches.last() {
|
||||
let (isel, iglob) = self.glob_to_selection[i];
|
||||
let sel = &self.selections[isel];
|
||||
let glob = Glob(GlobInner::Matched {
|
||||
def: sel.inner(),
|
||||
which: iglob,
|
||||
negated: sel.is_negated(),
|
||||
});
|
||||
return if sel.is_negated() {
|
||||
Match::Ignore(glob)
|
||||
} else {
|
||||
Match::Whitelist(glob)
|
||||
};
|
||||
}
|
||||
if self.has_selected {
|
||||
Match::Ignore(Glob::unmatched())
|
||||
} else {
|
||||
Match::None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// TypesBuilder builds a type matcher from a set of file type definitions and
|
||||
/// a set of file type selections.
|
||||
pub struct TypesBuilder {
|
||||
types: HashMap<String, FileTypeDef>,
|
||||
selections: Vec<Selection<()>>,
|
||||
}
|
||||
|
||||
impl TypesBuilder {
|
||||
/// Create a new builder for a file type matcher.
|
||||
///
|
||||
/// The builder contains *no* type definitions to start with. A set
|
||||
/// of default type definitions can be added with `add_defaults`, and
|
||||
/// additional type definitions can be added with `select` and `negate`.
|
||||
pub fn new() -> TypesBuilder {
|
||||
TypesBuilder {
|
||||
types: HashMap::new(),
|
||||
selections: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the current set of file type definitions *and* selections into
|
||||
/// a file type matcher.
|
||||
pub fn build(&self) -> Result<Types, Error> {
|
||||
let defs = self.definitions();
|
||||
let has_selected = self.selections.iter().any(|s| !s.is_negated());
|
||||
|
||||
let mut selections = vec![];
|
||||
let mut glob_to_selection = vec![];
|
||||
let mut build_set = GlobSetBuilder::new();
|
||||
for (isel, selection) in self.selections.iter().enumerate() {
|
||||
let def = match self.types.get(selection.name()) {
|
||||
Some(def) => def.clone(),
|
||||
None => {
|
||||
let name = selection.name().to_string();
|
||||
return Err(Error::UnrecognizedFileType(name));
|
||||
}
|
||||
};
|
||||
for (iglob, glob) in def.globs.iter().enumerate() {
|
||||
build_set.add(try!(
|
||||
GlobBuilder::new(glob)
|
||||
.literal_separator(true)
|
||||
.build()
|
||||
.map_err(|err| Error::Glob(err.to_string()))));
|
||||
glob_to_selection.push((isel, iglob));
|
||||
}
|
||||
selections.push(selection.clone().map(move |_| def));
|
||||
}
|
||||
let set = try!(build_set.build().map_err(|err| {
|
||||
Error::Glob(err.to_string())
|
||||
}));
|
||||
Ok(Types {
|
||||
defs: defs,
|
||||
selections: selections,
|
||||
has_selected: has_selected,
|
||||
glob_to_selection: glob_to_selection,
|
||||
set: set,
|
||||
matches: Arc::new(ThreadLocal::default()),
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the set of current file type definitions.
|
||||
///
|
||||
/// Definitions and globs are sorted.
|
||||
pub fn definitions(&self) -> Vec<FileTypeDef> {
|
||||
let mut defs = vec![];
|
||||
for def in self.types.values() {
|
||||
let mut def = def.clone();
|
||||
def.globs.sort();
|
||||
defs.push(def);
|
||||
}
|
||||
defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
|
||||
defs
|
||||
}
|
||||
|
||||
/// Select the file type given by `name`.
|
||||
///
|
||||
/// If `name` is `all`, then all file types currently defined are selected.
|
||||
pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
if name == "all" {
|
||||
for name in self.types.keys() {
|
||||
self.selections.push(Selection::Select(name.to_string(), ()));
|
||||
}
|
||||
} else {
|
||||
self.selections.push(Selection::Select(name.to_string(), ()));
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Ignore the file type given by `name`.
|
||||
///
|
||||
/// If `name` is `all`, then all file types currently defined are negated.
|
||||
pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
if name == "all" {
|
||||
for name in self.types.keys() {
|
||||
self.selections.push(Selection::Negate(name.to_string(), ()));
|
||||
}
|
||||
} else {
|
||||
self.selections.push(Selection::Negate(name.to_string(), ()));
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Clear any file type definitions for the type name given.
|
||||
pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
self.types.remove(name);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a new file type definition. `name` can be arbitrary and `pat`
|
||||
/// should be a glob recognizing file paths belonging to the `name` type.
|
||||
///
|
||||
/// If `name` is `all` or otherwise contains a `:`, then an error is
|
||||
/// returned.
|
||||
pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> {
|
||||
if name == "all" || name.contains(':') {
|
||||
return Err(Error::InvalidDefinition);
|
||||
}
|
||||
let (key, glob) = (name.to_string(), glob.to_string());
|
||||
self.types.entry(key).or_insert_with(|| {
|
||||
FileTypeDef { name: name.to_string(), globs: vec![] }
|
||||
}).globs.push(glob);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a new file type definition specified in string form. The format
|
||||
/// is `name:glob`. Names may not include a colon.
|
||||
pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
|
||||
let name: String = def.chars().take_while(|&c| c != ':').collect();
|
||||
let pat: String = def.chars().skip(name.chars().count() + 1).collect();
|
||||
if name.is_empty() || pat.is_empty() {
|
||||
return Err(Error::InvalidDefinition);
|
||||
}
|
||||
self.add(&name, &pat)
|
||||
}
|
||||
|
||||
/// Add a set of default file type definitions.
|
||||
pub fn add_defaults(&mut self) -> &mut TypesBuilder {
|
||||
static MSG: &'static str = "adding a default type should never fail";
|
||||
for &(name, exts) in DEFAULT_TYPES {
|
||||
for ext in exts {
|
||||
self.add(name, ext).expect(MSG);
|
||||
}
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::TypesBuilder;
|
||||
|
||||
macro_rules! matched {
|
||||
($name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr) => {
|
||||
matched!($name, $types, $sel, $selnot, $path, true);
|
||||
};
|
||||
(not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr) => {
|
||||
matched!($name, $types, $sel, $selnot, $path, false);
|
||||
};
|
||||
($name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr, $matched:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut btypes = TypesBuilder::new();
|
||||
for tydef in $types {
|
||||
btypes.add_def(tydef).unwrap();
|
||||
}
|
||||
for sel in $sel {
|
||||
btypes.select(sel);
|
||||
}
|
||||
for selnot in $selnot {
|
||||
btypes.negate(selnot);
|
||||
}
|
||||
let types = btypes.build().unwrap();
|
||||
let mat = types.matched($path, false);
|
||||
assert_eq!($matched, !mat.is_ignore());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn types() -> Vec<&'static str> {
|
||||
vec![
|
||||
"html:*.html",
|
||||
"html:*.htm",
|
||||
"rust:*.rs",
|
||||
"js:*.js",
|
||||
"foo:*.{rs,foo}",
|
||||
]
|
||||
}
|
||||
|
||||
matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
|
||||
matched!(match2, types(), vec!["html"], vec![], "index.html");
|
||||
matched!(match3, types(), vec!["html"], vec![], "index.htm");
|
||||
matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
|
||||
matched!(match5, types(), vec![], vec![], "index.html");
|
||||
matched!(match6, types(), vec![], vec!["rust"], "index.html");
|
||||
matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo");
|
||||
|
||||
matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
|
||||
matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
|
||||
matched!(not, matchnot3, types(), vec!["foo"], vec!["rust"], "main.rs");
|
||||
matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs");
|
||||
matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo");
|
||||
}
|
1371
ignore/src/walk.rs
Normal file
1371
ignore/src/walk.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,7 @@
|
||||
# Contributor: Andrew Gallant <jamslam@gmail.com>
|
||||
# Maintainer: Andrew Gallant
|
||||
pkgname=ripgrep
|
||||
pkgver=0.1.16
|
||||
pkgver=0.2.3
|
||||
pkgrel=1
|
||||
pkgdesc="A search tool that combines the usability of The Silver Searcher with the raw speed of grep."
|
||||
arch=('i686' 'x86_64')
|
||||
@@ -9,7 +9,7 @@ url="https://github.com/BurntSushi/ripgrep"
|
||||
license=('UNLICENSE')
|
||||
makedepends=('cargo')
|
||||
source=("https://github.com/BurntSushi/$pkgname/archive/$pkgver.tar.gz")
|
||||
sha256sums=('6f877018742c9a7557102ccebeedb40d7c779b470a5910a7bdab50ca2ce21532')
|
||||
sha256sums=('a88531558d2023df76190ea2e52bee50d739eabece8a57df29abbad0c6bdb917')
|
||||
|
||||
build() {
|
||||
cd "$pkgname-$pkgver"
|
||||
@@ -29,8 +29,9 @@ package() {
|
||||
|
||||
install -Dm755 "target/release/rg" "$pkgdir/usr/bin/rg"
|
||||
install -Dm644 "doc/rg.1" "$pkgdir/usr/share/man/man1/rg.1"
|
||||
install -Dm644 "README-NEW.md" "$pkgdir/usr/share/doc/ripgrep/README.md"
|
||||
install -Dm644 "README.md" "$pkgdir/usr/share/doc/ripgrep/README.md"
|
||||
install -Dm644 "COPYING" "$pkgdir/usr/share/doc/ripgrep/COPYING"
|
||||
install -Dm644 "LICENSE-MIT" "$pkgdir/usr/share/doc/ripgrep/LICENSE-MIT"
|
||||
install -Dm644 "UNLICENSE" "$pkgdir/usr/share/doc/ripgrep/UNLICENSE"
|
||||
install -Dm644 "CHANGELOG.md" "$pkgdir/usr/share/doc/ripgrep/CHANGELOG.md"
|
||||
}
|
||||
|
14
pkg/brew/ripgrep-bin.rb
Normal file
14
pkg/brew/ripgrep-bin.rb
Normal file
@@ -0,0 +1,14 @@
|
||||
class RipgrepBin < Formula
|
||||
version '0.2.5'
|
||||
desc "Search tool like grep and The Silver Searcher."
|
||||
homepage "https://github.com/BurntSushi/ripgrep"
|
||||
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz"
|
||||
sha256 "c6775a50c6f769de2ee66892a700961ec60a85219aa414ef6880dfcc17bf2467"
|
||||
|
||||
conflicts_with "ripgrep"
|
||||
|
||||
def install
|
||||
bin.install "rg"
|
||||
man1.install "rg.1"
|
||||
end
|
||||
end
|
@@ -1,19 +0,0 @@
|
||||
require 'formula'
|
||||
class Ripgrep < Formula
|
||||
version '0.1.17'
|
||||
desc "Search tool like grep and The Silver Searcher."
|
||||
homepage "https://github.com/BurntSushi/ripgrep"
|
||||
|
||||
if Hardware::CPU.is_64_bit?
|
||||
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz"
|
||||
sha256 "cb7b551a08849cef6ef8f17229224f094299692981976a3c5873c93f68c8fa1a"
|
||||
else
|
||||
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-i686-apple-darwin.tar.gz"
|
||||
sha256 "0e936874b9f3fd661c5566e7f8fe18343baa5e9371e57d8d71000e9234fc376b"
|
||||
end
|
||||
|
||||
def install
|
||||
bin.install "rg"
|
||||
man1.install "rg.1"
|
||||
end
|
||||
end
|
305
src/args.rs
305
src/args.rs
@@ -1,9 +1,9 @@
|
||||
use std::cmp;
|
||||
use std::env;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process;
|
||||
|
||||
use docopt::Docopt;
|
||||
use docopt::{self, Docopt};
|
||||
use env_logger;
|
||||
use grep::{Grep, GrepBuilder};
|
||||
use log;
|
||||
@@ -14,19 +14,16 @@ use term::Terminal;
|
||||
use term;
|
||||
#[cfg(windows)]
|
||||
use term::WinConsole;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use atty;
|
||||
use gitignore::{Gitignore, GitignoreBuilder};
|
||||
use ignore::Ignore;
|
||||
use ignore::overrides::{Override, OverrideBuilder};
|
||||
use ignore::types::{FileTypeDef, Types, TypesBuilder};
|
||||
use ignore;
|
||||
use out::{Out, ColoredTerminal};
|
||||
use printer::Printer;
|
||||
use search_buffer::BufferSearcher;
|
||||
use search_stream::{InputBuffer, Searcher};
|
||||
#[cfg(windows)]
|
||||
use terminal_win::WindowsBuffer;
|
||||
use types::{FileTypeDef, Types, TypesBuilder};
|
||||
use walk;
|
||||
use worker::{Worker, WorkerBuilder};
|
||||
|
||||
use Result;
|
||||
|
||||
@@ -42,7 +39,9 @@ Usage: rg [options] -e PATTERN ... [<path> ...]
|
||||
rg [options] --help
|
||||
rg [options] --version
|
||||
|
||||
rg recursively searches your current directory for a regex pattern.
|
||||
ripgrep (rg) recursively searches your current directory for a regex pattern.
|
||||
|
||||
Project home page: https://github.com/BurntSushi/ripgrep
|
||||
|
||||
Common options:
|
||||
-a, --text Search binary files as if they were text.
|
||||
@@ -52,7 +51,8 @@ Common options:
|
||||
[default: auto]
|
||||
-e, --regexp PATTERN ... Use PATTERN to search. This option can be
|
||||
provided multiple times, where all patterns
|
||||
given are searched.
|
||||
given are searched. This is also useful when
|
||||
searching for a pattern that starts with a dash.
|
||||
-F, --fixed-strings Treat the pattern as a literal string instead of
|
||||
a regular expression.
|
||||
-g, --glob GLOB ... Include or exclude files for searching that
|
||||
@@ -62,14 +62,12 @@ Common options:
|
||||
Precede a glob with a '!' to exclude it.
|
||||
-h, --help Show this usage message.
|
||||
-i, --ignore-case Case insensitive search.
|
||||
Overridden by --case-sensitive.
|
||||
-n, --line-number Show line numbers (1-based). This is enabled
|
||||
by default at a tty.
|
||||
-N, --no-line-number Suppress line numbers.
|
||||
-q, --quiet Do not print anything to stdout. If a match is
|
||||
found in a file, stop searching that file.
|
||||
-r, --replace ARG Replace every match with the string given.
|
||||
Capture group indices (e.g., $5) and names
|
||||
(e.g., $foo) are supported.
|
||||
-t, --type TYPE ... Only search files matching TYPE. Multiple type
|
||||
flags may be provided. Use the --type-list flag
|
||||
to list all available types.
|
||||
@@ -133,14 +131,31 @@ Less common options:
|
||||
Search hidden directories and files. (Hidden directories and files are
|
||||
skipped by default.)
|
||||
|
||||
--ignore-file FILE ...
|
||||
Specify additional ignore files for filtering file paths. Ignore files
|
||||
should be in the gitignore format and are matched relative to the
|
||||
current working directory. These ignore files have lower precedence
|
||||
than all other ignore file types. When specifying multiple ignore
|
||||
files, earlier files have lower precedence than later files.
|
||||
|
||||
-L, --follow
|
||||
Follow symlinks.
|
||||
|
||||
-m, --max-count NUM
|
||||
Limit the number of matching lines per file searched to NUM.
|
||||
|
||||
--maxdepth NUM
|
||||
Descend at most NUM directories below the command line arguments.
|
||||
A value of zero only searches the starting-points themselves.
|
||||
|
||||
--mmap
|
||||
Search using memory maps when possible. This is enabled by default
|
||||
when ripgrep thinks it will be faster. (Note that mmap searching
|
||||
doesn't currently support the various context related options.)
|
||||
|
||||
--no-messages
|
||||
Suppress all error messages.
|
||||
|
||||
--no-mmap
|
||||
Never use memory maps, even when they might be faster.
|
||||
|
||||
@@ -155,15 +170,32 @@ Less common options:
|
||||
Don't respect version control ignore files (e.g., .gitignore).
|
||||
Note that .ignore files will continue to be respected.
|
||||
|
||||
--null
|
||||
Whenever a file name is printed, follow it with a NUL byte.
|
||||
This includes printing filenames before matches, and when printing
|
||||
a list of matching files such as with --count, --files-with-matches
|
||||
and --files.
|
||||
|
||||
-p, --pretty
|
||||
Alias for --color=always --heading -n.
|
||||
|
||||
-r, --replace ARG
|
||||
Replace every match with the string given when printing search results.
|
||||
Neither this flag nor any other flag will modify your files.
|
||||
|
||||
Capture group indices (e.g., $5) and names (e.g., $foo) are supported
|
||||
in the replacement string.
|
||||
|
||||
-s, --case-sensitive
|
||||
Search case sensitively. This overrides --ignore-case and --smart-case.
|
||||
|
||||
-S, --smart-case
|
||||
Search case insensitively if the pattern is all lowercase.
|
||||
Search case sensitively otherwise.
|
||||
Search case sensitively otherwise. This is overridden by
|
||||
either --case-sensitive or --ignore-case.
|
||||
|
||||
-j, --threads ARG
|
||||
The number of threads to use. Defaults to the number of logical CPUs
|
||||
The number of threads to use. 0 means use the number of logical CPUs
|
||||
(capped at 6). [default: 0]
|
||||
|
||||
--version
|
||||
@@ -179,12 +211,13 @@ File type management options:
|
||||
Show all supported file types and their associated globs.
|
||||
|
||||
--type-add ARG ...
|
||||
Add a new glob for a particular file type. Only one glob can be added
|
||||
at a time. Multiple type-add flags can be provided. Unless type-clear
|
||||
is used, globs are added to any existing globs inside of ripgrep. Note
|
||||
that this must be passed to every invocation of rg.
|
||||
Add a new glob for a particular file type. Only one glob can be
|
||||
added at a time. Multiple --type-add flags can be provided.
|
||||
Unless --type-clear is used, globs are added to any existing globs
|
||||
inside of ripgrep. Note that this must be passed to every invocation of
|
||||
rg. Type settings are NOT persisted.
|
||||
|
||||
Example: `--type-add html:*.html`
|
||||
Example: `rg --type-add 'foo:*.foo' -tfoo PATTERN`
|
||||
|
||||
--type-clear TYPE ...
|
||||
Clear the file type globs previously defined for TYPE. This only clears
|
||||
@@ -200,6 +233,7 @@ pub struct RawArgs {
|
||||
arg_path: Vec<String>,
|
||||
flag_after_context: usize,
|
||||
flag_before_context: usize,
|
||||
flag_case_sensitive: bool,
|
||||
flag_color: String,
|
||||
flag_column: bool,
|
||||
flag_context: usize,
|
||||
@@ -213,17 +247,22 @@ pub struct RawArgs {
|
||||
flag_heading: bool,
|
||||
flag_hidden: bool,
|
||||
flag_ignore_case: bool,
|
||||
flag_ignore_file: Vec<String>,
|
||||
flag_invert_match: bool,
|
||||
flag_line_number: bool,
|
||||
flag_fixed_strings: bool,
|
||||
flag_max_count: Option<usize>,
|
||||
flag_maxdepth: Option<usize>,
|
||||
flag_mmap: bool,
|
||||
flag_no_heading: bool,
|
||||
flag_no_ignore: bool,
|
||||
flag_no_ignore_parent: bool,
|
||||
flag_no_ignore_vcs: bool,
|
||||
flag_no_line_number: bool,
|
||||
flag_no_messages: bool,
|
||||
flag_no_mmap: bool,
|
||||
flag_no_filename: bool,
|
||||
flag_null: bool,
|
||||
flag_pretty: bool,
|
||||
flag_quiet: bool,
|
||||
flag_regexp: Vec<String>,
|
||||
@@ -245,7 +284,6 @@ pub struct RawArgs {
|
||||
/// Args are transformed/normalized from RawArgs.
|
||||
#[derive(Debug)]
|
||||
pub struct Args {
|
||||
pattern: String,
|
||||
paths: Vec<PathBuf>,
|
||||
after_context: usize,
|
||||
before_context: usize,
|
||||
@@ -257,23 +295,27 @@ pub struct Args {
|
||||
eol: u8,
|
||||
files: bool,
|
||||
follow: bool,
|
||||
glob_overrides: Option<Gitignore>,
|
||||
glob_overrides: Override,
|
||||
grep: Grep,
|
||||
heading: bool,
|
||||
hidden: bool,
|
||||
ignore_case: bool,
|
||||
ignore_files: Vec<PathBuf>,
|
||||
invert_match: bool,
|
||||
line_number: bool,
|
||||
line_per_match: bool,
|
||||
max_count: Option<u64>,
|
||||
maxdepth: Option<usize>,
|
||||
mmap: bool,
|
||||
no_ignore: bool,
|
||||
no_ignore_parent: bool,
|
||||
no_ignore_vcs: bool,
|
||||
no_messages: bool,
|
||||
null: bool,
|
||||
quiet: bool,
|
||||
replace: Option<Vec<u8>>,
|
||||
text: bool,
|
||||
threads: usize,
|
||||
type_defs: Vec<FileTypeDef>,
|
||||
type_list: bool,
|
||||
types: Types,
|
||||
with_filename: bool,
|
||||
@@ -282,7 +324,6 @@ pub struct Args {
|
||||
impl RawArgs {
|
||||
/// Convert arguments parsed into a configuration used by ripgrep.
|
||||
fn to_args(&self) -> Result<Args> {
|
||||
let pattern = self.pattern();
|
||||
let paths =
|
||||
if self.arg_path.is_empty() {
|
||||
if atty::on_stdin()
|
||||
@@ -312,7 +353,7 @@ impl RawArgs {
|
||||
} else if cfg!(windows) {
|
||||
// On Windows, memory maps appear faster than read calls. Neat.
|
||||
true
|
||||
} else if cfg!(darwin) {
|
||||
} else if cfg!(target_os = "macos") {
|
||||
// On Mac, memory maps appear to suck. Neat.
|
||||
false
|
||||
} else {
|
||||
@@ -325,30 +366,30 @@ impl RawArgs {
|
||||
}
|
||||
let glob_overrides =
|
||||
if self.flag_glob.is_empty() {
|
||||
None
|
||||
Override::empty()
|
||||
} else {
|
||||
let cwd = try!(env::current_dir());
|
||||
let mut bgi = GitignoreBuilder::new(cwd);
|
||||
let mut ovr = OverrideBuilder::new(try!(env::current_dir()));
|
||||
for pat in &self.flag_glob {
|
||||
try!(bgi.add("<argv>", pat));
|
||||
try!(ovr.add(pat));
|
||||
}
|
||||
Some(try!(bgi.build()))
|
||||
try!(ovr.build())
|
||||
};
|
||||
let threads =
|
||||
if self.flag_threads == 0 {
|
||||
cmp::min(8, num_cpus::get())
|
||||
num_cpus::get()
|
||||
} else {
|
||||
self.flag_threads
|
||||
};
|
||||
let color =
|
||||
if self.flag_vimgrep {
|
||||
if self.flag_color == "always" {
|
||||
true
|
||||
} else if self.flag_vimgrep {
|
||||
false
|
||||
} else if self.flag_color == "auto" {
|
||||
atty::on_stdout() || self.flag_pretty
|
||||
} else {
|
||||
self.flag_color == "always"
|
||||
false
|
||||
};
|
||||
let eol = b'\n';
|
||||
|
||||
let mut with_filename = self.flag_with_filename;
|
||||
if !with_filename {
|
||||
@@ -356,22 +397,13 @@ impl RawArgs {
|
||||
}
|
||||
with_filename = with_filename && !self.flag_no_filename;
|
||||
|
||||
let mut btypes = TypesBuilder::new();
|
||||
btypes.add_defaults();
|
||||
try!(self.add_types(&mut btypes));
|
||||
let types = try!(btypes.build());
|
||||
let grep = try!(
|
||||
GrepBuilder::new(&pattern)
|
||||
.case_smart(self.flag_smart_case)
|
||||
.case_insensitive(self.flag_ignore_case)
|
||||
.line_terminator(eol)
|
||||
.build()
|
||||
);
|
||||
let no_ignore = self.flag_no_ignore || self.flag_unrestricted >= 1;
|
||||
let hidden = self.flag_hidden || self.flag_unrestricted >= 2;
|
||||
let text = self.flag_text || self.flag_unrestricted >= 3;
|
||||
let ignore_files: Vec<_> = self.flag_ignore_file.iter().map(|p| {
|
||||
Path::new(p).to_path_buf()
|
||||
}).collect();
|
||||
let mut args = Args {
|
||||
pattern: pattern,
|
||||
paths: paths,
|
||||
after_context: after_context,
|
||||
before_context: before_context,
|
||||
@@ -380,17 +412,20 @@ impl RawArgs {
|
||||
context_separator: unescape(&self.flag_context_separator),
|
||||
count: self.flag_count,
|
||||
files_with_matches: self.flag_files_with_matches,
|
||||
eol: eol,
|
||||
eol: self.eol(),
|
||||
files: self.flag_files,
|
||||
follow: self.flag_follow,
|
||||
glob_overrides: glob_overrides,
|
||||
grep: grep,
|
||||
grep: try!(self.grep()),
|
||||
heading: !self.flag_no_heading && self.flag_heading,
|
||||
hidden: hidden,
|
||||
ignore_case: self.flag_ignore_case,
|
||||
ignore_files: ignore_files,
|
||||
invert_match: self.flag_invert_match,
|
||||
line_number: !self.flag_no_line_number && self.flag_line_number,
|
||||
line_per_match: self.flag_vimgrep,
|
||||
max_count: self.flag_max_count.map(|max| max as u64),
|
||||
maxdepth: self.flag_maxdepth,
|
||||
mmap: mmap,
|
||||
no_ignore: no_ignore,
|
||||
no_ignore_parent:
|
||||
@@ -399,13 +434,14 @@ impl RawArgs {
|
||||
no_ignore_vcs:
|
||||
// --no-ignore implies --no-ignore-vcs
|
||||
self.flag_no_ignore_vcs || no_ignore,
|
||||
no_messages: self.flag_no_messages,
|
||||
null: self.flag_null,
|
||||
quiet: self.flag_quiet,
|
||||
replace: self.flag_replace.clone().map(|s| s.into_bytes()),
|
||||
text: text,
|
||||
threads: threads,
|
||||
type_defs: btypes.definitions(),
|
||||
type_list: self.flag_type_list,
|
||||
types: types,
|
||||
types: try!(self.types()),
|
||||
with_filename: with_filename,
|
||||
};
|
||||
// If stdout is a tty, then apply some special default options.
|
||||
@@ -424,20 +460,22 @@ impl RawArgs {
|
||||
Ok(args)
|
||||
}
|
||||
|
||||
fn add_types(&self, types: &mut TypesBuilder) -> Result<()> {
|
||||
fn types(&self) -> Result<Types> {
|
||||
let mut btypes = TypesBuilder::new();
|
||||
btypes.add_defaults();
|
||||
for ty in &self.flag_type_clear {
|
||||
types.clear(ty);
|
||||
btypes.clear(ty);
|
||||
}
|
||||
for def in &self.flag_type_add {
|
||||
try!(types.add_def(def));
|
||||
try!(btypes.add_def(def));
|
||||
}
|
||||
for ty in &self.flag_type {
|
||||
types.select(ty);
|
||||
btypes.select(ty);
|
||||
}
|
||||
for ty in &self.flag_type_not {
|
||||
types.negate(ty);
|
||||
btypes.negate(ty);
|
||||
}
|
||||
Ok(())
|
||||
btypes.build().map_err(From::from)
|
||||
}
|
||||
|
||||
fn pattern(&self) -> String {
|
||||
@@ -467,6 +505,27 @@ impl RawArgs {
|
||||
s
|
||||
}
|
||||
}
|
||||
|
||||
fn eol(&self) -> u8 {
|
||||
// We might want to make this configurable.
|
||||
b'\n'
|
||||
}
|
||||
|
||||
fn grep(&self) -> Result<Grep> {
|
||||
let smart =
|
||||
self.flag_smart_case
|
||||
&& !self.flag_ignore_case
|
||||
&& !self.flag_case_sensitive;
|
||||
let casei =
|
||||
self.flag_ignore_case
|
||||
&& !self.flag_case_sensitive;
|
||||
GrepBuilder::new(&self.pattern())
|
||||
.case_smart(smart)
|
||||
.case_insensitive(casei)
|
||||
.line_terminator(self.eol())
|
||||
.build()
|
||||
.map_err(From::from)
|
||||
}
|
||||
}
|
||||
|
||||
impl Args {
|
||||
@@ -494,7 +553,15 @@ impl Args {
|
||||
let mut raw: RawArgs =
|
||||
Docopt::new(USAGE)
|
||||
.and_then(|d| d.argv(argv).version(Some(version())).decode())
|
||||
.unwrap_or_else(|e| e.exit());
|
||||
.unwrap_or_else(|e| {
|
||||
match e {
|
||||
docopt::Error::Version(ref v) => {
|
||||
println!("ripgrep {}", v);
|
||||
process::exit(0);
|
||||
}
|
||||
e => e.exit(),
|
||||
}
|
||||
});
|
||||
|
||||
let mut logb = env_logger::LogBuilder::new();
|
||||
if raw.flag_debug {
|
||||
@@ -531,16 +598,9 @@ impl Args {
|
||||
self.grep.clone()
|
||||
}
|
||||
|
||||
/// Creates a new input buffer that is used in searching.
|
||||
pub fn input_buffer(&self) -> InputBuffer {
|
||||
let mut inp = InputBuffer::new();
|
||||
inp.eol(self.eol);
|
||||
inp
|
||||
}
|
||||
|
||||
/// Whether we should prefer memory maps for searching or not.
|
||||
pub fn mmap(&self) -> bool {
|
||||
self.mmap
|
||||
/// Whether ripgrep should be quiet or not.
|
||||
pub fn quiet(&self) -> bool {
|
||||
self.quiet
|
||||
}
|
||||
|
||||
/// Create a new printer of individual search results that writes to the
|
||||
@@ -552,7 +612,7 @@ impl Args {
|
||||
.eol(self.eol)
|
||||
.heading(self.heading)
|
||||
.line_per_match(self.line_per_match)
|
||||
.quiet(self.quiet)
|
||||
.null(self.null)
|
||||
.with_filename(self.with_filename);
|
||||
if let Some(ref rep) = self.replace {
|
||||
p = p.replace(rep.clone());
|
||||
@@ -581,6 +641,11 @@ impl Args {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the given arguments are known to never produce a match.
|
||||
pub fn never_match(&self) -> bool {
|
||||
self.max_count == Some(0)
|
||||
}
|
||||
|
||||
/// Create a new buffer for use with searching.
|
||||
#[cfg(not(windows))]
|
||||
pub fn outbuf(&self) -> ColoredTerminal<term::TerminfoTerminal<Vec<u8>>> {
|
||||
@@ -612,18 +677,16 @@ impl Args {
|
||||
&self.paths
|
||||
}
|
||||
|
||||
/// Create a new line based searcher whose configuration is taken from the
|
||||
/// command line. This searcher supports a dizzying array of features:
|
||||
/// inverted matching, line counting, context control and more.
|
||||
pub fn searcher<'a, R: io::Read, W: Send + Terminal>(
|
||||
&self,
|
||||
inp: &'a mut InputBuffer,
|
||||
printer: &'a mut Printer<W>,
|
||||
grep: &'a Grep,
|
||||
path: &'a Path,
|
||||
rdr: R,
|
||||
) -> Searcher<'a, R, W> {
|
||||
Searcher::new(inp, printer, grep, path, rdr)
|
||||
/// Returns true if there is exactly one file path given to search.
|
||||
pub fn is_one_path(&self) -> bool {
|
||||
self.paths.len() == 1
|
||||
&& (self.paths[0] == Path::new("-") || self.paths[0].is_file())
|
||||
}
|
||||
|
||||
/// Create a worker whose configuration is taken from the
|
||||
/// command line.
|
||||
pub fn worker(&self) -> Worker {
|
||||
WorkerBuilder::new(self.grep())
|
||||
.after_context(self.after_context)
|
||||
.before_context(self.before_context)
|
||||
.count(self.count)
|
||||
@@ -631,26 +694,11 @@ impl Args {
|
||||
.eol(self.eol)
|
||||
.line_number(self.line_number)
|
||||
.invert_match(self.invert_match)
|
||||
.max_count(self.max_count)
|
||||
.mmap(self.mmap)
|
||||
.quiet(self.quiet)
|
||||
.text(self.text)
|
||||
}
|
||||
|
||||
/// Create a new line based searcher whose configuration is taken from the
|
||||
/// command line. This search operates on an entire file all once (which
|
||||
/// may have been memory mapped).
|
||||
pub fn searcher_buffer<'a, W: Send + Terminal>(
|
||||
&self,
|
||||
printer: &'a mut Printer<W>,
|
||||
grep: &'a Grep,
|
||||
path: &'a Path,
|
||||
buf: &'a [u8],
|
||||
) -> BufferSearcher<'a, W> {
|
||||
BufferSearcher::new(printer, grep, path, buf)
|
||||
.count(self.count)
|
||||
.files_with_matches(self.files_with_matches)
|
||||
.eol(self.eol)
|
||||
.line_number(self.line_number)
|
||||
.invert_match(self.invert_match)
|
||||
.text(self.text)
|
||||
.build()
|
||||
}
|
||||
|
||||
/// Returns the number of worker search threads that should be used.
|
||||
@@ -660,7 +708,7 @@ impl Args {
|
||||
|
||||
/// Returns a list of type definitions currently loaded.
|
||||
pub fn type_defs(&self) -> &[FileTypeDef] {
|
||||
&self.type_defs
|
||||
self.types.definitions()
|
||||
}
|
||||
|
||||
/// Returns true if ripgrep should print the type definitions currently
|
||||
@@ -669,21 +717,48 @@ impl Args {
|
||||
self.type_list
|
||||
}
|
||||
|
||||
/// Create a new recursive directory iterator at the path given.
|
||||
pub fn walker(&self, path: &Path) -> Result<walk::Iter> {
|
||||
let wd = WalkDir::new(path).follow_links(self.follow);
|
||||
let mut ig = Ignore::new();
|
||||
ig.ignore_hidden(!self.hidden);
|
||||
ig.no_ignore(self.no_ignore);
|
||||
ig.no_ignore_vcs(self.no_ignore_vcs);
|
||||
ig.add_types(self.types.clone());
|
||||
if !self.no_ignore_parent {
|
||||
try!(ig.push_parents(path));
|
||||
/// Returns true if error messages should be suppressed.
|
||||
pub fn no_messages(&self) -> bool {
|
||||
self.no_messages
|
||||
}
|
||||
|
||||
/// Create a new recursive directory iterator over the paths in argv.
|
||||
pub fn walker(&self) -> ignore::Walk {
|
||||
self.walker_builder().build()
|
||||
}
|
||||
|
||||
/// Create a new parallel recursive directory iterator over the paths
|
||||
/// in argv.
|
||||
pub fn walker_parallel(&self) -> ignore::WalkParallel {
|
||||
self.walker_builder().build_parallel()
|
||||
}
|
||||
|
||||
fn walker_builder(&self) -> ignore::WalkBuilder {
|
||||
let paths = self.paths();
|
||||
let mut wd = ignore::WalkBuilder::new(&paths[0]);
|
||||
for path in &paths[1..] {
|
||||
wd.add(path);
|
||||
}
|
||||
if let Some(ref overrides) = self.glob_overrides {
|
||||
ig.add_override(overrides.clone());
|
||||
for path in &self.ignore_files {
|
||||
if let Some(err) = wd.add_ignore(path) {
|
||||
if !self.no_messages {
|
||||
eprintln!("{}", err);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(walk::Iter::new(ig, wd))
|
||||
|
||||
wd.follow_links(self.follow);
|
||||
wd.hidden(!self.hidden);
|
||||
wd.max_depth(self.maxdepth);
|
||||
wd.overrides(self.glob_overrides.clone());
|
||||
wd.types(self.types.clone());
|
||||
wd.git_global(!self.no_ignore && !self.no_ignore_vcs);
|
||||
wd.git_ignore(!self.no_ignore && !self.no_ignore_vcs);
|
||||
wd.git_exclude(!self.no_ignore && !self.no_ignore_vcs);
|
||||
wd.ignore(!self.no_ignore);
|
||||
wd.parents(!self.no_ignore_parent);
|
||||
wd.threads(self.threads());
|
||||
wd
|
||||
}
|
||||
}
|
||||
|
||||
@@ -709,7 +784,7 @@ enum State {
|
||||
Literal,
|
||||
}
|
||||
|
||||
/// Unescapes a string given on the command line. It supports a limit set of
|
||||
/// Unescapes a string given on the command line. It supports a limited set of
|
||||
/// escape sequences:
|
||||
///
|
||||
/// * \t, \r and \n are mapped to their corresponding ASCII bytes.
|
||||
|
429
src/gitignore.rs
429
src/gitignore.rs
@@ -1,429 +0,0 @@
|
||||
/*!
|
||||
The gitignore module provides a way of reading a gitignore file and applying
|
||||
it to a particular file name to determine whether it should be ignore or not.
|
||||
The motivation for this submodule is performance and portability:
|
||||
|
||||
1. There is a gitignore crate on crates.io, but it uses the standard `glob`
|
||||
crate and checks patterns one-by-one. This is a reasonable implementation,
|
||||
but not suitable for the performance we need here.
|
||||
2. We could shell out to a `git` sub-command like ls-files or status, but it
|
||||
seems better to not rely on the existence of external programs for a search
|
||||
tool. Besides, we need to implement this logic anyway to support things like
|
||||
an .ignore file.
|
||||
|
||||
The key implementation detail here is that a single gitignore file is compiled
|
||||
into a single RegexSet, which can be used to report which globs match a
|
||||
particular file name. We can then do a quick post-processing step to implement
|
||||
additional rules such as whitelists (prefix of `!`) or directory-only globs
|
||||
(suffix of `/`).
|
||||
*/
|
||||
|
||||
// TODO(burntsushi): Implement something similar, but for Mercurial. We can't
|
||||
// use this exact implementation because hgignore files are different.
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::error::Error as StdError;
|
||||
use std::fmt;
|
||||
use std::fs::File;
|
||||
use std::io::{self, BufRead};
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use regex;
|
||||
|
||||
use glob;
|
||||
use pathutil::{is_file_name, strip_prefix};
|
||||
|
||||
/// Represents an error that can occur when parsing a gitignore file.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
Glob(glob::Error),
|
||||
Regex(regex::Error),
|
||||
Io(io::Error),
|
||||
}
|
||||
|
||||
impl StdError for Error {
|
||||
fn description(&self) -> &str {
|
||||
match *self {
|
||||
Error::Glob(ref err) => err.description(),
|
||||
Error::Regex(ref err) => err.description(),
|
||||
Error::Io(ref err) => err.description(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Error::Glob(ref err) => err.fmt(f),
|
||||
Error::Regex(ref err) => err.fmt(f),
|
||||
Error::Io(ref err) => err.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<glob::Error> for Error {
|
||||
fn from(err: glob::Error) -> Error {
|
||||
Error::Glob(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<regex::Error> for Error {
|
||||
fn from(err: regex::Error) -> Error {
|
||||
Error::Regex(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<io::Error> for Error {
|
||||
fn from(err: io::Error) -> Error {
|
||||
Error::Io(err)
|
||||
}
|
||||
}
|
||||
|
||||
/// Gitignore is a matcher for the glob patterns in a single gitignore file.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Gitignore {
|
||||
set: glob::Set,
|
||||
root: PathBuf,
|
||||
patterns: Vec<Pattern>,
|
||||
num_ignores: u64,
|
||||
num_whitelist: u64,
|
||||
}
|
||||
|
||||
impl Gitignore {
|
||||
/// Create a new gitignore glob matcher from the given root directory and
|
||||
/// string containing the contents of a gitignore file.
|
||||
#[allow(dead_code)]
|
||||
fn from_str<P: AsRef<Path>>(
|
||||
root: P,
|
||||
gitignore: &str,
|
||||
) -> Result<Gitignore, Error> {
|
||||
let mut builder = GitignoreBuilder::new(root);
|
||||
try!(builder.add_str(gitignore));
|
||||
builder.build()
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given file path should be ignored
|
||||
/// according to the globs in this gitignore. `is_dir` should be true if
|
||||
/// the path refers to a directory and false otherwise.
|
||||
///
|
||||
/// Before matching path, its prefix (as determined by a common suffix
|
||||
/// of the directory containing this gitignore) is stripped. If there is
|
||||
/// no common suffix/prefix overlap, then path is assumed to reside in the
|
||||
/// same directory as this gitignore file.
|
||||
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
|
||||
let mut path = path.as_ref();
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
// Strip any common prefix between the candidate path and the root
|
||||
// of the gitignore, to make sure we get relative matching right.
|
||||
// BUT, a file name might not have any directory components to it,
|
||||
// in which case, we don't want to accidentally strip any part of the
|
||||
// file name.
|
||||
if !is_file_name(path) {
|
||||
if let Some(p) = strip_prefix(&self.root, path) {
|
||||
path = p;
|
||||
}
|
||||
}
|
||||
if let Some(p) = strip_prefix("/", path) {
|
||||
path = p;
|
||||
}
|
||||
self.matched_stripped(path, is_dir)
|
||||
}
|
||||
|
||||
/// Like matched, but takes a path that has already been stripped.
|
||||
pub fn matched_stripped(&self, path: &Path, is_dir: bool) -> Match {
|
||||
thread_local! {
|
||||
static MATCHES: RefCell<Vec<usize>> = {
|
||||
RefCell::new(vec![])
|
||||
}
|
||||
};
|
||||
MATCHES.with(|matches| {
|
||||
let mut matches = matches.borrow_mut();
|
||||
self.set.matches_into(path, &mut *matches);
|
||||
for &i in matches.iter().rev() {
|
||||
let pat = &self.patterns[i];
|
||||
if !pat.only_dir || is_dir {
|
||||
return if pat.whitelist {
|
||||
Match::Whitelist(pat)
|
||||
} else {
|
||||
Match::Ignored(pat)
|
||||
};
|
||||
}
|
||||
}
|
||||
Match::None
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns the total number of ignore patterns.
|
||||
pub fn num_ignores(&self) -> u64 {
|
||||
self.num_ignores
|
||||
}
|
||||
}
|
||||
|
||||
/// The result of a glob match.
|
||||
///
|
||||
/// The lifetime `'a` refers to the lifetime of the pattern that resulted in
|
||||
/// a match (whether ignored or whitelisted).
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum Match<'a> {
|
||||
/// The path didn't match any glob in the gitignore file.
|
||||
None,
|
||||
/// The last glob matched indicates the path should be ignored.
|
||||
Ignored(&'a Pattern),
|
||||
/// The last glob matched indicates the path should be whitelisted.
|
||||
Whitelist(&'a Pattern),
|
||||
}
|
||||
|
||||
impl<'a> Match<'a> {
|
||||
/// Returns true if the match result implies the path should be ignored.
|
||||
#[allow(dead_code)]
|
||||
pub fn is_ignored(&self) -> bool {
|
||||
match *self {
|
||||
Match::Ignored(_) => true,
|
||||
Match::None | Match::Whitelist(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the match result didn't match any globs.
|
||||
pub fn is_none(&self) -> bool {
|
||||
match *self {
|
||||
Match::None => true,
|
||||
Match::Ignored(_) | Match::Whitelist(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Inverts the match so that Ignored becomes Whitelisted and Whitelisted
|
||||
/// becomes Ignored. A non-match remains the same.
|
||||
pub fn invert(self) -> Match<'a> {
|
||||
match self {
|
||||
Match::None => Match::None,
|
||||
Match::Ignored(pat) => Match::Whitelist(pat),
|
||||
Match::Whitelist(pat) => Match::Ignored(pat),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// GitignoreBuilder constructs a matcher for a single set of globs from a
|
||||
/// .gitignore file.
|
||||
pub struct GitignoreBuilder {
|
||||
builder: glob::SetBuilder,
|
||||
root: PathBuf,
|
||||
patterns: Vec<Pattern>,
|
||||
}
|
||||
|
||||
/// Pattern represents a single pattern in a gitignore file. It doesn't
|
||||
/// know how to do glob matching directly, but it does store additional
|
||||
/// options on a pattern, such as whether it's whitelisted.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Pattern {
|
||||
/// The file path that this pattern was extracted from (may be empty).
|
||||
pub from: PathBuf,
|
||||
/// The original glob pattern string.
|
||||
pub original: String,
|
||||
/// The actual glob pattern string used to convert to a regex.
|
||||
pub pat: String,
|
||||
/// Whether this is a whitelisted pattern or not.
|
||||
pub whitelist: bool,
|
||||
/// Whether this pattern should only match directories or not.
|
||||
pub only_dir: bool,
|
||||
}
|
||||
|
||||
impl GitignoreBuilder {
|
||||
/// Create a new builder for a gitignore file.
|
||||
///
|
||||
/// The path given should be the path at which the globs for this gitignore
|
||||
/// file should be matched.
|
||||
pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
|
||||
let root = strip_prefix("./", root.as_ref()).unwrap_or(root.as_ref());
|
||||
GitignoreBuilder {
|
||||
builder: glob::SetBuilder::new(),
|
||||
root: root.to_path_buf(),
|
||||
patterns: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Builds a new matcher from the glob patterns added so far.
|
||||
///
|
||||
/// Once a matcher is built, no new glob patterns can be added to it.
|
||||
pub fn build(self) -> Result<Gitignore, Error> {
|
||||
let nignores = self.patterns.iter().filter(|p| !p.whitelist).count();
|
||||
let nwhitelist = self.patterns.iter().filter(|p| p.whitelist).count();
|
||||
Ok(Gitignore {
|
||||
set: try!(self.builder.build()),
|
||||
root: self.root,
|
||||
patterns: self.patterns,
|
||||
num_ignores: nignores as u64,
|
||||
num_whitelist: nwhitelist as u64,
|
||||
})
|
||||
}
|
||||
|
||||
/// Add each pattern line from the file path given.
|
||||
pub fn add_path<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
|
||||
let rdr = io::BufReader::new(try!(File::open(&path)));
|
||||
for line in rdr.lines() {
|
||||
try!(self.add(&path, &try!(line)));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add each pattern line from the string given.
|
||||
pub fn add_str(&mut self, gitignore: &str) -> Result<(), Error> {
|
||||
for line in gitignore.lines() {
|
||||
try!(self.add("", line));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a line from a gitignore file to this builder.
|
||||
///
|
||||
/// If the line could not be parsed as a glob, then an error is returned.
|
||||
pub fn add<P: AsRef<Path>>(
|
||||
&mut self,
|
||||
from: P,
|
||||
mut line: &str,
|
||||
) -> Result<(), Error> {
|
||||
if line.is_empty() || line.starts_with("#") {
|
||||
return Ok(());
|
||||
}
|
||||
if !line.ends_with("\\ ") {
|
||||
line = line.trim_right();
|
||||
}
|
||||
let mut pat = Pattern {
|
||||
from: from.as_ref().to_path_buf(),
|
||||
original: line.to_string(),
|
||||
pat: String::new(),
|
||||
whitelist: false,
|
||||
only_dir: false,
|
||||
};
|
||||
let mut opts = glob::MatchOptions::default();
|
||||
let has_slash = line.chars().any(|c| c == '/');
|
||||
let is_absolute = line.chars().nth(0).unwrap() == '/';
|
||||
if line.starts_with("\\!") || line.starts_with("\\#") {
|
||||
line = &line[1..];
|
||||
} else {
|
||||
if line.starts_with("!") {
|
||||
pat.whitelist = true;
|
||||
line = &line[1..];
|
||||
}
|
||||
if line.starts_with("/") {
|
||||
// `man gitignore` says that if a glob starts with a slash,
|
||||
// then the glob can only match the beginning of a path
|
||||
// (relative to the location of gitignore). We achieve this by
|
||||
// simply banning wildcards from matching /.
|
||||
opts.require_literal_separator = true;
|
||||
line = &line[1..];
|
||||
}
|
||||
}
|
||||
// If it ends with a slash, then this should only match directories,
|
||||
// but the slash should otherwise not be used while globbing.
|
||||
if let Some((i, c)) = line.char_indices().rev().nth(0) {
|
||||
if c == '/' {
|
||||
pat.only_dir = true;
|
||||
line = &line[..i];
|
||||
}
|
||||
}
|
||||
// If there is a literal slash, then we note that so that globbing
|
||||
// doesn't let wildcards match slashes.
|
||||
pat.pat = line.to_string();
|
||||
if has_slash {
|
||||
opts.require_literal_separator = true;
|
||||
}
|
||||
// If there was a leading slash, then this is a pattern that must
|
||||
// match the entire path name. Otherwise, we should let it match
|
||||
// anywhere, so use a **/ prefix.
|
||||
if !is_absolute {
|
||||
// ... but only if we don't already have a **/ prefix.
|
||||
if !pat.pat.starts_with("**/") {
|
||||
pat.pat = format!("**/{}", pat.pat);
|
||||
}
|
||||
}
|
||||
// If the pattern ends with `/**`, then we should only match everything
|
||||
// inside a directory, but not the directory itself. Standard globs
|
||||
// will match the directory. So we add `/*` to force the issue.
|
||||
if pat.pat.ends_with("/**") {
|
||||
pat.pat = format!("{}/*", pat.pat);
|
||||
}
|
||||
try!(self.builder.add_with(&pat.pat, &opts));
|
||||
self.patterns.push(pat);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::Gitignore;
|
||||
|
||||
macro_rules! ignored {
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr) => {
|
||||
ignored!($name, $root, $gi, $path, false);
|
||||
};
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let gi = Gitignore::from_str($root, $gi).unwrap();
|
||||
assert!(gi.matched($path, $is_dir).is_ignored());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! not_ignored {
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr) => {
|
||||
not_ignored!($name, $root, $gi, $path, false);
|
||||
};
|
||||
($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let gi = Gitignore::from_str($root, $gi).unwrap();
|
||||
assert!(!gi.matched($path, $is_dir).is_ignored());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const ROOT: &'static str = "/home/foobar/rust/rg";
|
||||
|
||||
ignored!(ig1, ROOT, "months", "months");
|
||||
ignored!(ig2, ROOT, "*.lock", "Cargo.lock");
|
||||
ignored!(ig3, ROOT, "*.rs", "src/main.rs");
|
||||
ignored!(ig4, ROOT, "src/*.rs", "src/main.rs");
|
||||
ignored!(ig5, ROOT, "/*.c", "cat-file.c");
|
||||
ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs");
|
||||
ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs");
|
||||
ignored!(ig8, ROOT, "foo/", "foo", true);
|
||||
ignored!(ig9, ROOT, "**/foo", "foo");
|
||||
ignored!(ig10, ROOT, "**/foo", "src/foo");
|
||||
ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar");
|
||||
ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz");
|
||||
ignored!(ig13, ROOT, "**/foo/bar", "foo/bar");
|
||||
ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar");
|
||||
ignored!(ig15, ROOT, "abc/**", "abc/x");
|
||||
ignored!(ig16, ROOT, "abc/**", "abc/x/y");
|
||||
ignored!(ig17, ROOT, "abc/**", "abc/x/y/z");
|
||||
ignored!(ig18, ROOT, "a/**/b", "a/b");
|
||||
ignored!(ig19, ROOT, "a/**/b", "a/x/b");
|
||||
ignored!(ig20, ROOT, "a/**/b", "a/x/y/b");
|
||||
ignored!(ig21, ROOT, r"\!xy", "!xy");
|
||||
ignored!(ig22, ROOT, r"\#foo", "#foo");
|
||||
ignored!(ig23, ROOT, "foo", "./foo");
|
||||
ignored!(ig24, ROOT, "target", "grep/target");
|
||||
ignored!(ig25, ROOT, "Cargo.lock", "./tabwriter-bin/Cargo.lock");
|
||||
ignored!(ig26, ROOT, "/foo/bar/baz", "./foo/bar/baz");
|
||||
ignored!(ig27, ROOT, "foo/", "xyz/foo", true);
|
||||
ignored!(ig28, ROOT, "src/*.rs", "src/grep/src/main.rs");
|
||||
ignored!(ig29, "./src", "/llvm/", "./src/llvm", true);
|
||||
ignored!(ig30, ROOT, "node_modules/ ", "node_modules", true);
|
||||
|
||||
not_ignored!(ignot1, ROOT, "amonths", "months");
|
||||
not_ignored!(ignot2, ROOT, "monthsa", "months");
|
||||
not_ignored!(ignot3, ROOT, "/src/*.rs", "src/grep/src/main.rs");
|
||||
not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c");
|
||||
not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs");
|
||||
not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs");
|
||||
not_ignored!(ignot7, ROOT, "foo/", "foo", false);
|
||||
not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz");
|
||||
not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz");
|
||||
not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar");
|
||||
not_ignored!(ignot11, ROOT, "#foo", "#foo");
|
||||
not_ignored!(ignot12, ROOT, "\n\n\n", "foo");
|
||||
not_ignored!(ignot13, ROOT, "foo/**", "foo", true);
|
||||
}
|
1170
src/glob.rs
1170
src/glob.rs
File diff suppressed because it is too large
Load Diff
490
src/ignore.rs
490
src/ignore.rs
@@ -1,490 +0,0 @@
|
||||
/*!
|
||||
The ignore module is responsible for managing the state required to determine
|
||||
whether a *single* file path should be searched or not.
|
||||
|
||||
In general, there are two ways to ignore a particular file:
|
||||
|
||||
1. Specify an ignore rule in some "global" configuration, such as a
|
||||
$HOME/.ignore or on the command line.
|
||||
2. A specific ignore file (like .gitignore) found during directory traversal.
|
||||
|
||||
The `IgnoreDir` type handles ignore patterns for any one particular directory
|
||||
(including "global" ignore patterns), while the `Ignore` type handles a stack
|
||||
of `IgnoreDir`s for use during directory traversal.
|
||||
*/
|
||||
|
||||
use std::error::Error as StdError;
|
||||
use std::ffi::OsString;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder, Match, Pattern};
|
||||
use pathutil::{file_name, is_hidden};
|
||||
use types::Types;
|
||||
|
||||
const IGNORE_NAMES: &'static [&'static str] = &[
|
||||
".gitignore",
|
||||
".ignore",
|
||||
".rgignore",
|
||||
];
|
||||
|
||||
/// Represents an error that can occur when parsing a gitignore file.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
Gitignore(gitignore::Error),
|
||||
Io {
|
||||
path: PathBuf,
|
||||
err: io::Error,
|
||||
},
|
||||
}
|
||||
|
||||
impl Error {
|
||||
fn from_io<P: AsRef<Path>>(path: P, err: io::Error) -> Error {
|
||||
Error::Io { path: path.as_ref().to_path_buf(), err: err }
|
||||
}
|
||||
}
|
||||
|
||||
impl StdError for Error {
|
||||
fn description(&self) -> &str {
|
||||
match *self {
|
||||
Error::Gitignore(ref err) => err.description(),
|
||||
Error::Io { ref err, .. } => err.description(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Error::Gitignore(ref err) => err.fmt(f),
|
||||
Error::Io { ref path, ref err } => {
|
||||
write!(f, "{}: {}", path.display(), err)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<gitignore::Error> for Error {
|
||||
fn from(err: gitignore::Error) -> Error {
|
||||
Error::Gitignore(err)
|
||||
}
|
||||
}
|
||||
|
||||
/// Ignore represents a collection of ignore patterns organized by directory.
|
||||
/// In particular, a stack is maintained, where the top of the stack
|
||||
/// corresponds to the current directory being searched and the bottom of the
|
||||
/// stack represents the root of a search. Ignore patterns at the top of the
|
||||
/// stack take precedence over ignore patterns at the bottom of the stack.
|
||||
pub struct Ignore {
|
||||
/// A stack of ignore patterns at each directory level of traversal.
|
||||
/// A directory that contributes no ignore patterns is `None`.
|
||||
stack: Vec<IgnoreDir>,
|
||||
/// A stack of parent directories above the root of the current search.
|
||||
parent_stack: Vec<IgnoreDir>,
|
||||
/// A set of override globs that are always checked first. A match (whether
|
||||
/// it's whitelist or blacklist) trumps anything in stack.
|
||||
overrides: Overrides,
|
||||
/// A file type matcher.
|
||||
types: Types,
|
||||
/// Whether to ignore hidden files or not.
|
||||
ignore_hidden: bool,
|
||||
/// When true, don't look at .gitignore or .ignore files for ignore
|
||||
/// rules.
|
||||
no_ignore: bool,
|
||||
/// When true, don't look at .gitignore files for ignore rules.
|
||||
no_ignore_vcs: bool,
|
||||
}
|
||||
|
||||
impl Ignore {
|
||||
/// Create an empty set of ignore patterns.
|
||||
pub fn new() -> Ignore {
|
||||
Ignore {
|
||||
stack: vec![],
|
||||
parent_stack: vec![],
|
||||
overrides: Overrides::new(None),
|
||||
types: Types::empty(),
|
||||
ignore_hidden: true,
|
||||
no_ignore: false,
|
||||
no_ignore_vcs: true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set whether hidden files/folders should be ignored (defaults to true).
|
||||
pub fn ignore_hidden(&mut self, yes: bool) -> &mut Ignore {
|
||||
self.ignore_hidden = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// When set, ignore files are ignored.
|
||||
pub fn no_ignore(&mut self, yes: bool) -> &mut Ignore {
|
||||
self.no_ignore = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// When set, VCS ignore files are ignored.
|
||||
pub fn no_ignore_vcs(&mut self, yes: bool) -> &mut Ignore {
|
||||
self.no_ignore_vcs = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a set of globs that overrides all other match logic.
|
||||
pub fn add_override(&mut self, gi: Gitignore) -> &mut Ignore {
|
||||
self.overrides = Overrides::new(Some(gi));
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a file type matcher. The file type matcher has the lowest
|
||||
/// precedence.
|
||||
pub fn add_types(&mut self, types: Types) -> &mut Ignore {
|
||||
self.types = types;
|
||||
self
|
||||
}
|
||||
|
||||
/// Push parent directories of `path` on to the stack.
|
||||
pub fn push_parents<P: AsRef<Path>>(
|
||||
&mut self,
|
||||
path: P,
|
||||
) -> Result<(), Error> {
|
||||
let path = try!(path.as_ref().canonicalize().map_err(|err| {
|
||||
Error::from_io(path.as_ref(), err)
|
||||
}));
|
||||
let mut path = &*path;
|
||||
let mut saw_git = path.join(".git").is_dir();
|
||||
let mut ignore_names = IGNORE_NAMES.to_vec();
|
||||
if self.no_ignore_vcs {
|
||||
ignore_names.retain(|&name| name != ".gitignore");
|
||||
}
|
||||
let mut ignore_dir_results = vec![];
|
||||
while let Some(parent) = path.parent() {
|
||||
if self.no_ignore {
|
||||
ignore_dir_results.push(Ok(IgnoreDir::empty(parent)));
|
||||
} else {
|
||||
if saw_git {
|
||||
ignore_names.retain(|&name| name != ".gitignore");
|
||||
} else {
|
||||
saw_git = parent.join(".git").is_dir();
|
||||
}
|
||||
let ignore_dir_result =
|
||||
IgnoreDir::with_ignore_names(parent, ignore_names.iter());
|
||||
ignore_dir_results.push(ignore_dir_result);
|
||||
}
|
||||
path = parent;
|
||||
}
|
||||
|
||||
for ignore_dir_result in ignore_dir_results.into_iter().rev() {
|
||||
self.parent_stack.push(try!(ignore_dir_result));
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a directory to the stack.
|
||||
///
|
||||
/// Note that even if this returns an error, the directory is added to the
|
||||
/// stack (and therefore should be popped).
|
||||
pub fn push<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
|
||||
if self.no_ignore {
|
||||
self.stack.push(IgnoreDir::empty(path));
|
||||
Ok(())
|
||||
} else if self.no_ignore_vcs {
|
||||
self.push_ignore_dir(IgnoreDir::without_vcs(path))
|
||||
} else {
|
||||
self.push_ignore_dir(IgnoreDir::new(path))
|
||||
}
|
||||
}
|
||||
|
||||
/// Pushes the result of building a directory matcher on to the stack.
|
||||
///
|
||||
/// If the result given contains an error, then it is returned.
|
||||
pub fn push_ignore_dir(
|
||||
&mut self,
|
||||
result: Result<IgnoreDir, Error>,
|
||||
) -> Result<(), Error> {
|
||||
match result {
|
||||
Ok(id) => {
|
||||
self.stack.push(id);
|
||||
Ok(())
|
||||
}
|
||||
Err(err) => {
|
||||
// Don't leave the stack in an inconsistent state.
|
||||
self.stack.push(IgnoreDir::empty("error"));
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Pop a directory from the stack.
|
||||
///
|
||||
/// This panics if the stack is empty.
|
||||
pub fn pop(&mut self) {
|
||||
self.stack.pop().expect("non-empty stack");
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given file path should be ignored.
|
||||
pub fn ignored<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> bool {
|
||||
let path = path.as_ref();
|
||||
let mat = self.overrides.matched(path, is_dir);
|
||||
if let Some(is_ignored) = self.ignore_match(path, mat) {
|
||||
return is_ignored;
|
||||
}
|
||||
let mut whitelisted = false;
|
||||
if !self.no_ignore {
|
||||
for id in self.stack.iter().rev() {
|
||||
let mat = id.matched(path, is_dir);
|
||||
if let Some(is_ignored) = self.ignore_match(path, mat) {
|
||||
if is_ignored {
|
||||
return true;
|
||||
}
|
||||
// If this path is whitelisted by an ignore, then
|
||||
// fallthrough and let the file type matcher have a say.
|
||||
whitelisted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If the file has been whitelisted, then we have to stop checking
|
||||
// parent directories. The only thing that can override a whitelist
|
||||
// at this point is a type filter.
|
||||
if !whitelisted {
|
||||
let mut path = path.to_path_buf();
|
||||
for id in self.parent_stack.iter().rev() {
|
||||
if let Some(ref dirname) = id.name {
|
||||
path = Path::new(dirname).join(path);
|
||||
}
|
||||
let mat = id.matched(&*path, is_dir);
|
||||
if let Some(is_ignored) = self.ignore_match(&*path, mat) {
|
||||
if is_ignored {
|
||||
return true;
|
||||
}
|
||||
// If this path is whitelisted by an ignore, then
|
||||
// fallthrough and let the file type matcher have a
|
||||
// say.
|
||||
whitelisted = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let mat = self.types.matched(path, is_dir);
|
||||
if let Some(is_ignored) = self.ignore_match(path, mat) {
|
||||
if is_ignored {
|
||||
return true;
|
||||
}
|
||||
whitelisted = true;
|
||||
}
|
||||
if !whitelisted && self.ignore_hidden && is_hidden(&path) {
|
||||
debug!("{} ignored because it is hidden", path.display());
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
|
||||
/// Returns true if the given match says the given pattern should be
|
||||
/// ignored or false if the given pattern should be explicitly whitelisted.
|
||||
/// Returns None otherwise.
|
||||
pub fn ignore_match<P: AsRef<Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
mat: Match,
|
||||
) -> Option<bool> {
|
||||
let path = path.as_ref();
|
||||
match mat {
|
||||
Match::Whitelist(ref pat) => {
|
||||
debug!("{} whitelisted by {:?}", path.display(), pat);
|
||||
Some(false)
|
||||
}
|
||||
Match::Ignored(ref pat) => {
|
||||
debug!("{} ignored by {:?}", path.display(), pat);
|
||||
Some(true)
|
||||
}
|
||||
Match::None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// IgnoreDir represents a set of ignore patterns retrieved from a single
|
||||
/// directory.
|
||||
#[derive(Debug)]
|
||||
pub struct IgnoreDir {
|
||||
/// The path to this directory as given.
|
||||
path: PathBuf,
|
||||
/// The directory name, if one exists.
|
||||
name: Option<OsString>,
|
||||
/// A single accumulation of glob patterns for this directory, matched
|
||||
/// using gitignore semantics.
|
||||
///
|
||||
/// This will include patterns from rgignore as well. The patterns are
|
||||
/// ordered so that precedence applies automatically (e.g., rgignore
|
||||
/// patterns procede gitignore patterns).
|
||||
gi: Option<Gitignore>,
|
||||
// TODO(burntsushi): Matching other types of glob patterns that don't
|
||||
// conform to gitignore will probably require refactoring this approach.
|
||||
}
|
||||
|
||||
impl IgnoreDir {
|
||||
/// Create a new matcher for the given directory.
|
||||
pub fn new<P: AsRef<Path>>(path: P) -> Result<IgnoreDir, Error> {
|
||||
IgnoreDir::with_ignore_names(path, IGNORE_NAMES.iter())
|
||||
}
|
||||
|
||||
/// Create a new matcher for the given directory.
|
||||
///
|
||||
/// Don't respect VCS ignore files.
|
||||
pub fn without_vcs<P: AsRef<Path>>(path: P) -> Result<IgnoreDir, Error> {
|
||||
let names = IGNORE_NAMES.iter().filter(|name| **name != ".gitignore");
|
||||
IgnoreDir::with_ignore_names(path, names)
|
||||
}
|
||||
|
||||
/// Create a new IgnoreDir that never matches anything with the given path.
|
||||
pub fn empty<P: AsRef<Path>>(path: P) -> IgnoreDir {
|
||||
IgnoreDir {
|
||||
path: path.as_ref().to_path_buf(),
|
||||
name: file_name(path.as_ref()).map(|s| s.to_os_string()),
|
||||
gi: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new matcher for the given directory using only the ignore
|
||||
/// patterns found in the file names given.
|
||||
///
|
||||
/// If no ignore glob patterns could be found in the directory then `None`
|
||||
/// is returned.
|
||||
///
|
||||
/// Note that the order of the names given is meaningful. Names appearing
|
||||
/// later in the list have precedence over names appearing earlier in the
|
||||
/// list.
|
||||
pub fn with_ignore_names<P: AsRef<Path>, S, I>(
|
||||
path: P,
|
||||
names: I,
|
||||
) -> Result<IgnoreDir, Error>
|
||||
where P: AsRef<Path>, S: AsRef<str>, I: Iterator<Item=S> {
|
||||
let mut id = IgnoreDir::empty(path);
|
||||
let mut ok = false;
|
||||
let mut builder = GitignoreBuilder::new(&id.path);
|
||||
// The ordering here is important. Later globs have higher precedence.
|
||||
for name in names {
|
||||
ok = builder.add_path(id.path.join(name.as_ref())).is_ok() || ok;
|
||||
}
|
||||
if !ok {
|
||||
return Ok(id);
|
||||
}
|
||||
id.gi = Some(try!(builder.build()));
|
||||
Ok(id)
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given file path should be ignored
|
||||
/// according to the globs in this directory. `is_dir` should be true if
|
||||
/// the path refers to a directory and false otherwise.
|
||||
///
|
||||
/// Before matching path, its prefix (as determined by a common suffix
|
||||
/// of this directory) is stripped. If there is
|
||||
/// no common suffix/prefix overlap, then path is assumed to reside
|
||||
/// directly in this directory.
|
||||
///
|
||||
/// If the given path has a `./` prefix then it is stripped before
|
||||
/// matching.
|
||||
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
|
||||
self.gi.as_ref()
|
||||
.map(|gi| gi.matched(path, is_dir))
|
||||
.unwrap_or(Match::None)
|
||||
}
|
||||
}
|
||||
|
||||
/// Manages a set of overrides provided explicitly by the end user.
|
||||
struct Overrides {
|
||||
gi: Option<Gitignore>,
|
||||
unmatched_pat: Pattern,
|
||||
}
|
||||
|
||||
impl Overrides {
|
||||
/// Creates a new set of overrides from the gitignore matcher provided.
|
||||
/// If no matcher is provided, then the resulting overrides have no effect.
|
||||
fn new(gi: Option<Gitignore>) -> Overrides {
|
||||
Overrides {
|
||||
gi: gi,
|
||||
unmatched_pat: Pattern {
|
||||
from: Path::new("<argv>").to_path_buf(),
|
||||
original: "<none>".to_string(),
|
||||
pat: "<none>".to_string(),
|
||||
whitelist: false,
|
||||
only_dir: false,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns a match for the given path against this set of overrides.
|
||||
///
|
||||
/// If there are no overrides, then this always returns Match::None.
|
||||
///
|
||||
/// If there is at least one positive override, then this never returns
|
||||
/// Match::None (and interpreting non-matches as ignored) unless is_dir
|
||||
/// is true.
|
||||
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
|
||||
let path = path.as_ref();
|
||||
self.gi.as_ref()
|
||||
.map(|gi| {
|
||||
let mat = gi.matched_stripped(path, is_dir).invert();
|
||||
if mat.is_none() && !is_dir {
|
||||
if gi.num_ignores() > 0 {
|
||||
return Match::Ignored(&self.unmatched_pat);
|
||||
}
|
||||
}
|
||||
mat
|
||||
})
|
||||
.unwrap_or(Match::None)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
use gitignore::GitignoreBuilder;
|
||||
use super::IgnoreDir;
|
||||
|
||||
macro_rules! ignored_dir {
|
||||
($name:ident, $root:expr, $gi:expr, $xi:expr, $path:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut builder = GitignoreBuilder::new(&$root);
|
||||
builder.add_str($gi).unwrap();
|
||||
builder.add_str($xi).unwrap();
|
||||
let gi = builder.build().unwrap();
|
||||
let id = IgnoreDir {
|
||||
path: Path::new($root).to_path_buf(),
|
||||
name: Path::new($root).file_name().map(|s| {
|
||||
s.to_os_string()
|
||||
}),
|
||||
gi: Some(gi),
|
||||
};
|
||||
assert!(id.matched($path, false).is_ignored());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! not_ignored_dir {
|
||||
($name:ident, $root:expr, $gi:expr, $xi:expr, $path:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut builder = GitignoreBuilder::new(&$root);
|
||||
builder.add_str($gi).unwrap();
|
||||
builder.add_str($xi).unwrap();
|
||||
let gi = builder.build().unwrap();
|
||||
let id = IgnoreDir {
|
||||
path: Path::new($root).to_path_buf(),
|
||||
name: Path::new($root).file_name().map(|s| {
|
||||
s.to_os_string()
|
||||
}),
|
||||
gi: Some(gi),
|
||||
};
|
||||
assert!(!id.matched($path, false).is_ignored());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
const ROOT: &'static str = "/home/foobar/rust/rg";
|
||||
|
||||
ignored_dir!(id1, ROOT, "src/main.rs", "", "src/main.rs");
|
||||
ignored_dir!(id2, ROOT, "", "src/main.rs", "src/main.rs");
|
||||
ignored_dir!(id3, ROOT, "!src/main.rs", "*.rs", "src/main.rs");
|
||||
|
||||
not_ignored_dir!(idnot1, ROOT, "*.rs", "!src/main.rs", "src/main.rs");
|
||||
}
|
512
src/main.rs
512
src/main.rs
@@ -1,8 +1,8 @@
|
||||
extern crate deque;
|
||||
extern crate ctrlc;
|
||||
extern crate docopt;
|
||||
extern crate env_logger;
|
||||
extern crate fnv;
|
||||
extern crate grep;
|
||||
extern crate ignore;
|
||||
#[cfg(windows)]
|
||||
extern crate kernel32;
|
||||
#[macro_use]
|
||||
@@ -16,33 +16,24 @@ extern crate num_cpus;
|
||||
extern crate regex;
|
||||
extern crate rustc_serialize;
|
||||
extern crate term;
|
||||
extern crate walkdir;
|
||||
#[cfg(windows)]
|
||||
extern crate winapi;
|
||||
|
||||
use std::error::Error;
|
||||
use std::fs::File;
|
||||
use std::io::{self, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::io;
|
||||
use std::io::Write;
|
||||
use std::process;
|
||||
use std::result;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
|
||||
use std::sync::mpsc;
|
||||
use std::thread;
|
||||
use std::cmp;
|
||||
|
||||
use deque::{Stealer, Stolen};
|
||||
use grep::Grep;
|
||||
use memmap::{Mmap, Protection};
|
||||
use term::Terminal;
|
||||
use walkdir::DirEntry;
|
||||
|
||||
use args::Args;
|
||||
use out::{ColoredTerminal, Out};
|
||||
use pathutil::strip_prefix;
|
||||
use printer::Printer;
|
||||
use search_stream::InputBuffer;
|
||||
#[cfg(windows)]
|
||||
use terminal_win::WindowsBuffer;
|
||||
use worker::Work;
|
||||
|
||||
macro_rules! errored {
|
||||
($($tt:tt)*) => {
|
||||
@@ -59,9 +50,6 @@ macro_rules! eprintln {
|
||||
|
||||
mod args;
|
||||
mod atty;
|
||||
mod gitignore;
|
||||
mod glob;
|
||||
mod ignore;
|
||||
mod out;
|
||||
mod pathutil;
|
||||
mod printer;
|
||||
@@ -69,13 +57,12 @@ mod search_buffer;
|
||||
mod search_stream;
|
||||
#[cfg(windows)]
|
||||
mod terminal_win;
|
||||
mod types;
|
||||
mod walk;
|
||||
mod worker;
|
||||
|
||||
pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;
|
||||
|
||||
fn main() {
|
||||
match Args::parse().and_then(run) {
|
||||
match Args::parse().map(Arc::new).and_then(run) {
|
||||
Ok(count) if count == 0 => process::exit(1),
|
||||
Ok(_) => process::exit(0),
|
||||
Err(err) => {
|
||||
@@ -85,150 +72,173 @@ fn main() {
|
||||
}
|
||||
}
|
||||
|
||||
fn run(args: Args) -> Result<u64> {
|
||||
let args = Arc::new(args);
|
||||
let paths = args.paths();
|
||||
fn run(args: Arc<Args>) -> Result<u64> {
|
||||
if args.never_match() {
|
||||
return Ok(0);
|
||||
}
|
||||
{
|
||||
let args = args.clone();
|
||||
ctrlc::set_handler(move || {
|
||||
let stdout = io::stdout();
|
||||
let mut stdout = stdout.lock();
|
||||
|
||||
let _ = args.stdout().reset();
|
||||
let _ = stdout.flush();
|
||||
|
||||
process::exit(1);
|
||||
});
|
||||
}
|
||||
let threads = cmp::max(1, args.threads() - 1);
|
||||
if args.files() {
|
||||
return run_files(args.clone());
|
||||
}
|
||||
if args.type_list() {
|
||||
return run_types(args.clone());
|
||||
}
|
||||
if paths.len() == 1 && (paths[0] == Path::new("-") || paths[0].is_file()) {
|
||||
return run_one(args.clone(), &paths[0]);
|
||||
}
|
||||
if threads == 1 {
|
||||
return run_one_thread(args.clone());
|
||||
}
|
||||
|
||||
let out = Arc::new(Mutex::new(args.out()));
|
||||
let mut workers = vec![];
|
||||
|
||||
let workq = {
|
||||
let (workq, stealer) = deque::new();
|
||||
for _ in 0..threads {
|
||||
let worker = MultiWorker {
|
||||
chan_work: stealer.clone(),
|
||||
out: out.clone(),
|
||||
outbuf: Some(args.outbuf()),
|
||||
worker: Worker {
|
||||
args: args.clone(),
|
||||
inpbuf: args.input_buffer(),
|
||||
grep: args.grep(),
|
||||
match_count: 0,
|
||||
},
|
||||
};
|
||||
workers.push(thread::spawn(move || worker.run()));
|
||||
}
|
||||
workq
|
||||
};
|
||||
let mut paths_searched: u64 = 0;
|
||||
for p in paths {
|
||||
if p == Path::new("-") {
|
||||
paths_searched += 1;
|
||||
workq.push(Work::Stdin);
|
||||
if threads == 1 || args.is_one_path() {
|
||||
run_files_one_thread(args)
|
||||
} else {
|
||||
for ent in try!(args.walker(p)) {
|
||||
paths_searched += 1;
|
||||
workq.push(Work::File(ent));
|
||||
run_files_parallel(args)
|
||||
}
|
||||
} else if args.type_list() {
|
||||
run_types(args)
|
||||
} else if threads == 1 || args.is_one_path() {
|
||||
run_one_thread(args)
|
||||
} else {
|
||||
run_parallel(args)
|
||||
}
|
||||
}
|
||||
|
||||
fn run_parallel(args: Arc<Args>) -> Result<u64> {
|
||||
let out = Arc::new(Mutex::new(args.out()));
|
||||
let quiet_matched = QuietMatched::new(args.quiet());
|
||||
let paths_searched = Arc::new(AtomicUsize::new(0));
|
||||
let match_count = Arc::new(AtomicUsize::new(0));
|
||||
|
||||
args.walker_parallel().run(|| {
|
||||
let args = args.clone();
|
||||
let quiet_matched = quiet_matched.clone();
|
||||
let paths_searched = paths_searched.clone();
|
||||
let match_count = match_count.clone();
|
||||
let out = out.clone();
|
||||
let mut outbuf = args.outbuf();
|
||||
let mut worker = args.worker();
|
||||
Box::new(move |result| {
|
||||
use ignore::WalkState::*;
|
||||
|
||||
if quiet_matched.has_match() {
|
||||
return Quit;
|
||||
}
|
||||
let dent = match get_or_log_dir_entry(result, args.no_messages()) {
|
||||
None => return Continue,
|
||||
Some(dent) => dent,
|
||||
};
|
||||
paths_searched.fetch_add(1, Ordering::SeqCst);
|
||||
outbuf.clear();
|
||||
{
|
||||
// This block actually executes the search and prints the
|
||||
// results into outbuf.
|
||||
let mut printer = args.printer(&mut outbuf);
|
||||
let count =
|
||||
if dent.is_stdin() {
|
||||
worker.run(&mut printer, Work::Stdin)
|
||||
} else {
|
||||
worker.run(&mut printer, Work::DirEntry(dent))
|
||||
};
|
||||
match_count.fetch_add(count as usize, Ordering::SeqCst);
|
||||
if quiet_matched.set_match(count > 0) {
|
||||
return Quit;
|
||||
}
|
||||
}
|
||||
if !outbuf.get_ref().is_empty() {
|
||||
// This should be the only mutex in all of ripgrep. Since the
|
||||
// common case is to report a small number of matches relative
|
||||
// to the corpus, this really shouldn't matter much.
|
||||
//
|
||||
// Still, it'd be nice to send this on a channel, but then we'd
|
||||
// need to manage a pool of outbufs, which would complicate the
|
||||
// code.
|
||||
let mut out = out.lock().unwrap();
|
||||
out.write(&outbuf);
|
||||
}
|
||||
Continue
|
||||
})
|
||||
});
|
||||
if !args.paths().is_empty() && paths_searched.load(Ordering::SeqCst) == 0 {
|
||||
if !args.no_messages() {
|
||||
eprint_nothing_searched();
|
||||
}
|
||||
}
|
||||
Ok(match_count.load(Ordering::SeqCst) as u64)
|
||||
}
|
||||
|
||||
fn run_one_thread(args: Arc<Args>) -> Result<u64> {
|
||||
let mut worker = args.worker();
|
||||
let mut term = args.stdout();
|
||||
let mut paths_searched: u64 = 0;
|
||||
let mut match_count = 0;
|
||||
for result in args.walker() {
|
||||
let dent = match get_or_log_dir_entry(result, args.no_messages()) {
|
||||
None => continue,
|
||||
Some(dent) => dent,
|
||||
};
|
||||
let mut printer = args.printer(&mut term);
|
||||
if match_count > 0 {
|
||||
if args.quiet() {
|
||||
break;
|
||||
}
|
||||
if let Some(sep) = args.file_separator() {
|
||||
printer = printer.file_separator(sep);
|
||||
}
|
||||
}
|
||||
paths_searched += 1;
|
||||
match_count +=
|
||||
if dent.is_stdin() {
|
||||
worker.run(&mut printer, Work::Stdin)
|
||||
} else {
|
||||
worker.run(&mut printer, Work::DirEntry(dent))
|
||||
};
|
||||
}
|
||||
if !paths.is_empty() && paths_searched == 0 {
|
||||
eprintln!("No files were searched, which means ripgrep probably \
|
||||
applied a filter you didn't expect. \
|
||||
Try running again with --debug.");
|
||||
}
|
||||
for _ in 0..workers.len() {
|
||||
workq.push(Work::Quit);
|
||||
}
|
||||
let mut match_count = 0;
|
||||
for worker in workers {
|
||||
match_count += worker.join().unwrap();
|
||||
if !args.paths().is_empty() && paths_searched == 0 {
|
||||
if !args.no_messages() {
|
||||
eprint_nothing_searched();
|
||||
}
|
||||
}
|
||||
Ok(match_count)
|
||||
}
|
||||
|
||||
fn run_one_thread(args: Arc<Args>) -> Result<u64> {
|
||||
let mut worker = Worker {
|
||||
args: args.clone(),
|
||||
inpbuf: args.input_buffer(),
|
||||
grep: args.grep(),
|
||||
match_count: 0,
|
||||
};
|
||||
let paths = args.paths();
|
||||
let filesep = args.file_separator();
|
||||
let mut term = args.stdout();
|
||||
|
||||
let mut paths_searched: u64 = 0;
|
||||
for p in paths {
|
||||
if p == Path::new("-") {
|
||||
if worker.match_count > 0 {
|
||||
if let Some(ref sep) = filesep {
|
||||
let _ = term.write_all(sep);
|
||||
let _ = term.write_all(b"\n");
|
||||
}
|
||||
}
|
||||
paths_searched += 1;
|
||||
let mut printer = args.printer(&mut term);
|
||||
worker.do_work(&mut printer, WorkReady::Stdin);
|
||||
} else {
|
||||
for ent in try!(args.walker(p)) {
|
||||
if worker.match_count > 0 {
|
||||
if let Some(ref sep) = filesep {
|
||||
let _ = term.write_all(sep);
|
||||
let _ = term.write_all(b"\n");
|
||||
}
|
||||
}
|
||||
paths_searched += 1;
|
||||
let mut printer = args.printer(&mut term);
|
||||
let file = try!(File::open(ent.path()));
|
||||
worker.do_work(&mut printer, WorkReady::DirFile(ent, file));
|
||||
}
|
||||
fn run_files_parallel(args: Arc<Args>) -> Result<u64> {
|
||||
let print_args = args.clone();
|
||||
let (tx, rx) = mpsc::channel::<ignore::DirEntry>();
|
||||
let print_thread = thread::spawn(move || {
|
||||
let term = print_args.stdout();
|
||||
let mut printer = print_args.printer(term);
|
||||
let mut file_count = 0;
|
||||
for dent in rx.iter() {
|
||||
printer.path(dent.path());
|
||||
file_count += 1;
|
||||
}
|
||||
}
|
||||
if !paths.is_empty() && paths_searched == 0 {
|
||||
eprintln!("No files were searched, which means ripgrep probably \
|
||||
applied a filter you didn't expect. \
|
||||
Try running again with --debug.");
|
||||
}
|
||||
Ok(worker.match_count)
|
||||
file_count
|
||||
});
|
||||
let no_messages = args.no_messages();
|
||||
args.walker_parallel().run(move || {
|
||||
let tx = tx.clone();
|
||||
Box::new(move |result| {
|
||||
if let Some(dent) = get_or_log_dir_entry(result, no_messages) {
|
||||
tx.send(dent).unwrap();
|
||||
}
|
||||
ignore::WalkState::Continue
|
||||
})
|
||||
});
|
||||
Ok(print_thread.join().unwrap())
|
||||
}
|
||||
|
||||
fn run_one(args: Arc<Args>, path: &Path) -> Result<u64> {
|
||||
let mut worker = Worker {
|
||||
args: args.clone(),
|
||||
inpbuf: args.input_buffer(),
|
||||
grep: args.grep(),
|
||||
match_count: 0,
|
||||
};
|
||||
let term = args.stdout();
|
||||
let mut printer = args.printer(term);
|
||||
let work =
|
||||
if path == Path::new("-") {
|
||||
WorkReady::Stdin
|
||||
} else {
|
||||
WorkReady::PathFile(path.to_path_buf(), try!(File::open(path)))
|
||||
};
|
||||
worker.do_work(&mut printer, work);
|
||||
Ok(worker.match_count)
|
||||
}
|
||||
|
||||
fn run_files(args: Arc<Args>) -> Result<u64> {
|
||||
fn run_files_one_thread(args: Arc<Args>) -> Result<u64> {
|
||||
let term = args.stdout();
|
||||
let mut printer = args.printer(term);
|
||||
let mut file_count = 0;
|
||||
for p in args.paths() {
|
||||
if p == Path::new("-") {
|
||||
printer.path(&Path::new("<stdin>"));
|
||||
file_count += 1;
|
||||
} else {
|
||||
for ent in try!(args.walker(p)) {
|
||||
printer.path(ent.path());
|
||||
file_count += 1;
|
||||
}
|
||||
}
|
||||
for result in args.walker() {
|
||||
let dent = match get_or_log_dir_entry(result, args.no_messages()) {
|
||||
None => continue,
|
||||
Some(dent) => dent,
|
||||
};
|
||||
printer.path(dent.path());
|
||||
file_count += 1;
|
||||
}
|
||||
Ok(file_count)
|
||||
}
|
||||
@@ -244,147 +254,73 @@ fn run_types(args: Arc<Args>) -> Result<u64> {
|
||||
Ok(ty_count)
|
||||
}
|
||||
|
||||
enum Work {
|
||||
Stdin,
|
||||
File(DirEntry),
|
||||
Quit,
|
||||
}
|
||||
|
||||
enum WorkReady {
|
||||
Stdin,
|
||||
DirFile(DirEntry, File),
|
||||
PathFile(PathBuf, File),
|
||||
}
|
||||
|
||||
struct MultiWorker {
|
||||
chan_work: Stealer<Work>,
|
||||
out: Arc<Mutex<Out>>,
|
||||
#[cfg(not(windows))]
|
||||
outbuf: Option<ColoredTerminal<term::TerminfoTerminal<Vec<u8>>>>,
|
||||
#[cfg(windows)]
|
||||
outbuf: Option<ColoredTerminal<WindowsBuffer>>,
|
||||
worker: Worker,
|
||||
}
|
||||
|
||||
struct Worker {
|
||||
args: Arc<Args>,
|
||||
inpbuf: InputBuffer,
|
||||
grep: Grep,
|
||||
match_count: u64,
|
||||
}
|
||||
|
||||
impl MultiWorker {
|
||||
fn run(mut self) -> u64 {
|
||||
loop {
|
||||
let work = match self.chan_work.steal() {
|
||||
Stolen::Empty | Stolen::Abort => continue,
|
||||
Stolen::Data(Work::Quit) => break,
|
||||
Stolen::Data(Work::Stdin) => WorkReady::Stdin,
|
||||
Stolen::Data(Work::File(ent)) => {
|
||||
match File::open(ent.path()) {
|
||||
Ok(file) => WorkReady::DirFile(ent, file),
|
||||
Err(err) => {
|
||||
eprintln!("{}: {}", ent.path().display(), err);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
let mut outbuf = self.outbuf.take().unwrap();
|
||||
outbuf.clear();
|
||||
let mut printer = self.worker.args.printer(outbuf);
|
||||
self.worker.do_work(&mut printer, work);
|
||||
let outbuf = printer.into_inner();
|
||||
if !outbuf.get_ref().is_empty() {
|
||||
let mut out = self.out.lock().unwrap();
|
||||
out.write(&outbuf);
|
||||
}
|
||||
self.outbuf = Some(outbuf);
|
||||
}
|
||||
self.worker.match_count
|
||||
}
|
||||
}
|
||||
|
||||
impl Worker {
|
||||
fn do_work<W: Terminal + Send>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
work: WorkReady,
|
||||
) {
|
||||
let result = match work {
|
||||
WorkReady::Stdin => {
|
||||
let stdin = io::stdin();
|
||||
let stdin = stdin.lock();
|
||||
self.search(printer, &Path::new("<stdin>"), stdin)
|
||||
}
|
||||
WorkReady::DirFile(ent, file) => {
|
||||
let mut path = ent.path();
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
if self.args.mmap() {
|
||||
self.search_mmap(printer, path, &file)
|
||||
} else {
|
||||
self.search(printer, path, file)
|
||||
}
|
||||
}
|
||||
WorkReady::PathFile(path, file) => {
|
||||
let mut path = &*path;
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
if self.args.mmap() {
|
||||
self.search_mmap(printer, path, &file)
|
||||
} else {
|
||||
self.search(printer, path, file)
|
||||
}
|
||||
}
|
||||
};
|
||||
match result {
|
||||
Ok(count) => {
|
||||
self.match_count += count;
|
||||
}
|
||||
Err(err) => {
|
||||
fn get_or_log_dir_entry(
|
||||
result: result::Result<ignore::DirEntry, ignore::Error>,
|
||||
no_messages: bool,
|
||||
) -> Option<ignore::DirEntry> {
|
||||
match result {
|
||||
Err(err) => {
|
||||
if !no_messages {
|
||||
eprintln!("{}", err);
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn search<R: io::Read, W: Terminal + Send>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
path: &Path,
|
||||
rdr: R,
|
||||
) -> Result<u64> {
|
||||
self.args.searcher(
|
||||
&mut self.inpbuf,
|
||||
printer,
|
||||
&self.grep,
|
||||
path,
|
||||
rdr,
|
||||
).run().map_err(From::from)
|
||||
}
|
||||
|
||||
fn search_mmap<W: Terminal + Send>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
path: &Path,
|
||||
file: &File,
|
||||
) -> Result<u64> {
|
||||
if try!(file.metadata()).len() == 0 {
|
||||
// Opening a memory map with an empty file results in an error.
|
||||
// However, this may not actually be an empty file! For example,
|
||||
// /proc/cpuinfo reports itself as an empty file, but it can
|
||||
// produce data when it's read from. Therefore, we fall back to
|
||||
// regular read calls.
|
||||
return self.search(printer, path, file);
|
||||
Ok(dent) => {
|
||||
if let Some(err) = dent.error() {
|
||||
if !no_messages {
|
||||
eprintln!("{}", err);
|
||||
}
|
||||
}
|
||||
if !dent.file_type().map_or(true, |x| x.is_file()) {
|
||||
None
|
||||
} else {
|
||||
Some(dent)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn eprint_nothing_searched() {
|
||||
eprintln!("No files were searched, which means ripgrep probably \
|
||||
applied a filter you didn't expect. \
|
||||
Try running again with --debug.");
|
||||
}
|
||||
|
||||
/// A simple thread safe abstraction for determining whether a search should
|
||||
/// stop if the user has requested quiet mode.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct QuietMatched(Arc<Option<AtomicBool>>);
|
||||
|
||||
impl QuietMatched {
|
||||
/// Create a new QuietMatched value.
|
||||
///
|
||||
/// If quiet is true, then set_match and has_match will reflect whether
|
||||
/// a search should quit or not because it found a match.
|
||||
///
|
||||
/// If quiet is false, then set_match is always a no-op and has_match
|
||||
/// always returns false.
|
||||
pub fn new(quiet: bool) -> QuietMatched {
|
||||
let atomic = if quiet { Some(AtomicBool::new(false)) } else { None };
|
||||
QuietMatched(Arc::new(atomic))
|
||||
}
|
||||
|
||||
/// Returns true if and only if quiet mode is enabled and a match has
|
||||
/// occurred.
|
||||
pub fn has_match(&self) -> bool {
|
||||
match *self.0 {
|
||||
None => false,
|
||||
Some(ref matched) => matched.load(Ordering::SeqCst),
|
||||
}
|
||||
}
|
||||
|
||||
/// Sets whether a match has occurred or not.
|
||||
///
|
||||
/// If quiet mode is disabled, then this is a no-op.
|
||||
pub fn set_match(&self, yes: bool) -> bool {
|
||||
match *self.0 {
|
||||
None => false,
|
||||
Some(_) if !yes => false,
|
||||
Some(ref m) => { m.store(true, Ordering::SeqCst); true }
|
||||
}
|
||||
let mmap = try!(Mmap::open(file, Protection::Read));
|
||||
Ok(self.args.searcher_buffer(
|
||||
printer,
|
||||
&self.grep,
|
||||
path,
|
||||
unsafe { mmap.as_slice() },
|
||||
).run())
|
||||
}
|
||||
}
|
||||
|
@@ -48,8 +48,6 @@ impl Out {
|
||||
|
||||
/// If set, the separator is printed between matches from different files.
|
||||
/// By default, no separator is printed.
|
||||
///
|
||||
/// If sep is empty, then no file separator is printed.
|
||||
pub fn file_separator(mut self, sep: Vec<u8>) -> Out {
|
||||
self.file_separator = Some(sep);
|
||||
self
|
||||
|
@@ -8,7 +8,6 @@ with the raw bytes directly.
|
||||
On large repositories (like chromium), this can have a ~25% performance
|
||||
improvement on just listing the files to search (!).
|
||||
*/
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
/// Strip `prefix` from the `path` and return the remainder.
|
||||
@@ -19,6 +18,7 @@ pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
|
||||
prefix: &'a P,
|
||||
path: &'a Path,
|
||||
) -> Option<&'a Path> {
|
||||
use std::ffi::OsStr;
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
let prefix = prefix.as_ref().as_os_str().as_bytes();
|
||||
@@ -40,79 +40,3 @@ pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
|
||||
) -> Option<&'a Path> {
|
||||
path.strip_prefix(prefix).ok()
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(unix)]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use memchr::memrchr;
|
||||
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
if path.is_empty() {
|
||||
return None;
|
||||
} else if path.len() == 1 && path[0] == b'.' {
|
||||
return None;
|
||||
} else if path.last() == Some(&b'.') {
|
||||
return None;
|
||||
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
|
||||
return None;
|
||||
}
|
||||
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
|
||||
Some(OsStr::from_bytes(&path[last_slash..]))
|
||||
}
|
||||
|
||||
/// The final component of the path, if it is a normal file.
|
||||
///
|
||||
/// If the path terminates in ., .., or consists solely of a root of prefix,
|
||||
/// file_name will return None.
|
||||
#[cfg(not(unix))]
|
||||
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
|
||||
path: &'a P,
|
||||
) -> Option<&'a OsStr> {
|
||||
path.as_ref().file_name()
|
||||
}
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
#[cfg(unix)]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
if let Some(name) = file_name(path.as_ref()) {
|
||||
name.as_bytes().get(0) == Some(&b'.')
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if this file path is considered to be hidden.
|
||||
#[cfg(not(unix))]
|
||||
pub fn is_hidden<P: AsRef<Path>>(path: P) -> bool {
|
||||
if let Some(name) = file_name(path.as_ref()) {
|
||||
name.to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this file path is just a file name. i.e., Its parent is
|
||||
/// the empty string.
|
||||
#[cfg(unix)]
|
||||
pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
use memchr::memchr;
|
||||
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
memchr(b'/', path).is_none()
|
||||
}
|
||||
|
||||
/// Returns true if this file path is just a file name. i.e., Its parent is
|
||||
/// the empty string.
|
||||
#[cfg(not(unix))]
|
||||
pub fn is_file_name<P: AsRef<Path>>(path: P) -> bool {
|
||||
path.as_ref().parent().map(|p| p.as_os_str().is_empty()).unwrap_or(false)
|
||||
}
|
||||
|
132
src/printer.rs
132
src/printer.rs
@@ -5,7 +5,7 @@ use term::{Attr, Terminal};
|
||||
use term::color;
|
||||
|
||||
use pathutil::strip_prefix;
|
||||
use types::FileTypeDef;
|
||||
use ignore::types::FileTypeDef;
|
||||
|
||||
/// Printer encapsulates all output logic for searching.
|
||||
///
|
||||
@@ -25,18 +25,49 @@ pub struct Printer<W> {
|
||||
/// printed via the match directly, but occasionally we need to insert them
|
||||
/// ourselves (for example, to print a context separator).
|
||||
eol: u8,
|
||||
/// A file separator to show before any matches are printed.
|
||||
file_separator: Option<Vec<u8>>,
|
||||
/// Whether to show file name as a heading or not.
|
||||
///
|
||||
/// N.B. If with_filename is false, then this setting has no effect.
|
||||
heading: bool,
|
||||
/// Whether to show every match on its own line.
|
||||
line_per_match: bool,
|
||||
/// Whether to suppress all output.
|
||||
quiet: bool,
|
||||
/// Whether to print NUL bytes after a file path instead of new lines
|
||||
/// or `:`.
|
||||
null: bool,
|
||||
/// A string to use as a replacement of each match in a matching line.
|
||||
replace: Option<Vec<u8>>,
|
||||
/// Whether to prefix each match with the corresponding file name.
|
||||
with_filename: bool,
|
||||
/// The choice of colors.
|
||||
color_choice: ColorChoice
|
||||
}
|
||||
|
||||
struct ColorChoice {
|
||||
matched_line: color::Color,
|
||||
heading: color::Color,
|
||||
line_number: color::Color
|
||||
}
|
||||
|
||||
impl ColorChoice {
|
||||
#[cfg(unix)]
|
||||
pub fn new() -> ColorChoice {
|
||||
ColorChoice {
|
||||
matched_line: color::RED,
|
||||
heading: color::GREEN,
|
||||
line_number: color::BLUE
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
pub fn new() -> ColorChoice {
|
||||
ColorChoice {
|
||||
matched_line: color::BRIGHT_RED,
|
||||
heading: color::BRIGHT_GREEN,
|
||||
line_number: color::BRIGHT_BLUE
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Terminal + Send> Printer<W> {
|
||||
@@ -48,11 +79,13 @@ impl<W: Terminal + Send> Printer<W> {
|
||||
column: false,
|
||||
context_separator: "--".to_string().into_bytes(),
|
||||
eol: b'\n',
|
||||
file_separator: None,
|
||||
heading: false,
|
||||
line_per_match: false,
|
||||
quiet: false,
|
||||
null: false,
|
||||
replace: None,
|
||||
with_filename: false,
|
||||
color_choice: ColorChoice::new()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -75,6 +108,13 @@ impl<W: Terminal + Send> Printer<W> {
|
||||
self
|
||||
}
|
||||
|
||||
/// If set, the separator is printed before any matches. By default, no
|
||||
/// separator is printed.
|
||||
pub fn file_separator(mut self, sep: Vec<u8>) -> Printer<W> {
|
||||
self.file_separator = Some(sep);
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to show file name as a heading or not.
|
||||
///
|
||||
/// N.B. If with_filename is false, then this setting has no effect.
|
||||
@@ -89,9 +129,10 @@ impl<W: Terminal + Send> Printer<W> {
|
||||
self
|
||||
}
|
||||
|
||||
/// When set, all output is suppressed.
|
||||
pub fn quiet(mut self, yes: bool) -> Printer<W> {
|
||||
self.quiet = yes;
|
||||
/// Whether to cause NUL bytes to follow file paths instead of other
|
||||
/// visual separators (like `:`, `-` and `\n`).
|
||||
pub fn null(mut self, yes: bool) -> Printer<W> {
|
||||
self.null = yes;
|
||||
self
|
||||
}
|
||||
|
||||
@@ -116,12 +157,8 @@ impl<W: Terminal + Send> Printer<W> {
|
||||
self.has_printed
|
||||
}
|
||||
|
||||
/// Returns true if the printer has been configured to be quiet.
|
||||
pub fn is_quiet(&self) -> bool {
|
||||
self.quiet
|
||||
}
|
||||
|
||||
/// Flushes the underlying writer and returns it.
|
||||
#[allow(dead_code)]
|
||||
pub fn into_inner(mut self) -> W {
|
||||
let _ = self.wtr.flush();
|
||||
self.wtr
|
||||
@@ -132,11 +169,11 @@ impl<W: Terminal + Send> Printer<W> {
|
||||
self.write(def.name().as_bytes());
|
||||
self.write(b": ");
|
||||
let mut first = true;
|
||||
for pat in def.patterns() {
|
||||
for glob in def.globs() {
|
||||
if !first {
|
||||
self.write(b", ");
|
||||
}
|
||||
self.write(pat.as_bytes());
|
||||
self.write(glob.as_bytes());
|
||||
first = false;
|
||||
}
|
||||
self.write_eol();
|
||||
@@ -146,14 +183,22 @@ impl<W: Terminal + Send> Printer<W> {
|
||||
pub fn path<P: AsRef<Path>>(&mut self, path: P) {
|
||||
let path = strip_prefix("./", path.as_ref()).unwrap_or(path.as_ref());
|
||||
self.write_path(path);
|
||||
self.write_eol();
|
||||
if self.null {
|
||||
self.write(b"\x00");
|
||||
} else {
|
||||
self.write_eol();
|
||||
}
|
||||
}
|
||||
|
||||
/// Prints the given path and a count of the number of matches found.
|
||||
pub fn path_count<P: AsRef<Path>>(&mut self, path: P, count: u64) {
|
||||
if self.with_filename {
|
||||
self.write_path(path);
|
||||
self.write(b":");
|
||||
if self.null {
|
||||
self.write(b"\x00");
|
||||
} else {
|
||||
self.write(b":");
|
||||
}
|
||||
}
|
||||
self.write(count.to_string().as_bytes());
|
||||
self.write_eol();
|
||||
@@ -162,9 +207,6 @@ impl<W: Terminal + Send> Printer<W> {
|
||||
/// Prints the context separator.
|
||||
pub fn context_separate(&mut self) {
|
||||
// N.B. We can't use `write` here because of borrowing restrictions.
|
||||
if self.quiet {
|
||||
return;
|
||||
}
|
||||
if self.context_separator.is_empty() {
|
||||
return;
|
||||
}
|
||||
@@ -186,7 +228,7 @@ impl<W: Terminal + Send> Printer<W> {
|
||||
let column =
|
||||
if self.column {
|
||||
Some(re.find(&buf[start..end])
|
||||
.map(|(s, _)| s + 1).unwrap_or(0) as u64)
|
||||
.map(|(s, _)| s).unwrap_or(0) as u64)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
@@ -211,10 +253,10 @@ impl<W: Terminal + Send> Printer<W> {
|
||||
column: Option<u64>,
|
||||
) {
|
||||
if self.heading && self.with_filename && !self.has_printed {
|
||||
self.write_file_sep();
|
||||
self.write_heading(path.as_ref());
|
||||
} else if !self.heading && self.with_filename {
|
||||
self.write_path(path.as_ref());
|
||||
self.write(b":");
|
||||
self.write_non_heading_path(path.as_ref());
|
||||
}
|
||||
if let Some(line_number) = line_number {
|
||||
self.line_number(line_number, b':');
|
||||
@@ -243,7 +285,7 @@ impl<W: Terminal + Send> Printer<W> {
|
||||
let mut last_written = 0;
|
||||
for (s, e) in re.find_iter(buf) {
|
||||
self.write(&buf[last_written..s]);
|
||||
let _ = self.wtr.fg(color::BRIGHT_RED);
|
||||
let _ = self.wtr.fg(self.color_choice.matched_line);
|
||||
let _ = self.wtr.attr(Attr::Bold);
|
||||
self.write(&buf[s..e]);
|
||||
let _ = self.wtr.reset();
|
||||
@@ -261,10 +303,15 @@ impl<W: Terminal + Send> Printer<W> {
|
||||
line_number: Option<u64>,
|
||||
) {
|
||||
if self.heading && self.with_filename && !self.has_printed {
|
||||
self.write_file_sep();
|
||||
self.write_heading(path.as_ref());
|
||||
} else if !self.heading && self.with_filename {
|
||||
self.write_path(path.as_ref());
|
||||
self.write(b"-");
|
||||
if self.null {
|
||||
self.write(b"\x00");
|
||||
} else {
|
||||
self.write(b"-");
|
||||
}
|
||||
}
|
||||
if let Some(line_number) = line_number {
|
||||
self.line_number(line_number, b'-');
|
||||
@@ -277,19 +324,39 @@ impl<W: Terminal + Send> Printer<W> {
|
||||
|
||||
fn write_heading<P: AsRef<Path>>(&mut self, path: P) {
|
||||
if self.wtr.supports_color() {
|
||||
let _ = self.wtr.fg(color::BRIGHT_GREEN);
|
||||
let _ = self.wtr.fg(self.color_choice.heading);
|
||||
let _ = self.wtr.attr(Attr::Bold);
|
||||
}
|
||||
self.write_path(path.as_ref());
|
||||
self.write_eol();
|
||||
if self.null {
|
||||
self.write(b"\x00");
|
||||
} else {
|
||||
self.write_eol();
|
||||
}
|
||||
if self.wtr.supports_color() {
|
||||
let _ = self.wtr.reset();
|
||||
}
|
||||
}
|
||||
|
||||
fn write_non_heading_path<P: AsRef<Path>>(&mut self, path: P) {
|
||||
if self.wtr.supports_color() {
|
||||
let _ = self.wtr.fg(self.color_choice.heading);
|
||||
let _ = self.wtr.attr(Attr::Bold);
|
||||
}
|
||||
self.write_path(path.as_ref());
|
||||
if self.wtr.supports_color() {
|
||||
let _ = self.wtr.reset();
|
||||
}
|
||||
if self.null {
|
||||
self.write(b"\x00");
|
||||
} else {
|
||||
self.write(b":");
|
||||
}
|
||||
}
|
||||
|
||||
fn line_number(&mut self, n: u64, sep: u8) {
|
||||
if self.wtr.supports_color() {
|
||||
let _ = self.wtr.fg(color::BRIGHT_BLUE);
|
||||
let _ = self.wtr.fg(self.color_choice.line_number);
|
||||
let _ = self.wtr.attr(Attr::Bold);
|
||||
}
|
||||
self.write(n.to_string().as_bytes());
|
||||
@@ -313,9 +380,6 @@ impl<W: Terminal + Send> Printer<W> {
|
||||
}
|
||||
|
||||
fn write(&mut self, buf: &[u8]) {
|
||||
if self.quiet {
|
||||
return;
|
||||
}
|
||||
self.has_printed = true;
|
||||
let _ = self.wtr.write_all(buf);
|
||||
}
|
||||
@@ -324,4 +388,12 @@ impl<W: Terminal + Send> Printer<W> {
|
||||
let eol = self.eol;
|
||||
self.write(&[eol]);
|
||||
}
|
||||
|
||||
fn write_file_sep(&mut self) {
|
||||
if let Some(ref sep) = self.file_separator {
|
||||
self.has_printed = true;
|
||||
let _ = self.wtr.write_all(sep);
|
||||
let _ = self.wtr.write_all(b"\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -81,6 +81,21 @@ impl<'a, W: Send + Terminal> BufferSearcher<'a, W> {
|
||||
self
|
||||
}
|
||||
|
||||
/// Limit the number of matches to the given count.
|
||||
///
|
||||
/// The default is None, which corresponds to no limit.
|
||||
pub fn max_count(mut self, count: Option<u64>) -> Self {
|
||||
self.opts.max_count = count;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, don't show any output and quit searching after the first
|
||||
/// match is found.
|
||||
pub fn quiet(mut self, yes: bool) -> Self {
|
||||
self.opts.quiet = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, search binary files as if they were text.
|
||||
pub fn text(mut self, yes: bool) -> Self {
|
||||
self.opts.text = yes;
|
||||
@@ -104,11 +119,11 @@ impl<'a, W: Send + Terminal> BufferSearcher<'a, W> {
|
||||
self.print_match(m.start(), m.end());
|
||||
}
|
||||
last_end = m.end();
|
||||
if self.printer.is_quiet() || self.opts.files_with_matches {
|
||||
if self.opts.terminate(self.match_count) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if self.opts.invert_match {
|
||||
if self.opts.invert_match && !self.opts.terminate(self.match_count) {
|
||||
let upto = self.buf.len();
|
||||
self.print_inverted_matches(last_end, upto);
|
||||
}
|
||||
@@ -139,6 +154,9 @@ impl<'a, W: Send + Terminal> BufferSearcher<'a, W> {
|
||||
debug_assert!(self.opts.invert_match);
|
||||
let mut it = IterLines::new(self.opts.eol, start);
|
||||
while let Some((s, e)) = it.next(&self.buf[..end]) {
|
||||
if self.opts.terminate(self.match_count) {
|
||||
return;
|
||||
}
|
||||
self.print_match(s, e);
|
||||
}
|
||||
}
|
||||
@@ -259,6 +277,26 @@ and exhibited clearly, with a label attached.\
|
||||
assert_eq!(out, "/baz.rs\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_count() {
|
||||
let (count, out) = search(
|
||||
"Sherlock", SHERLOCK, |s| s.max_count(Some(1)));
|
||||
assert_eq!(1, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invert_match_max_count() {
|
||||
let (count, out) = search(
|
||||
"zzzz", SHERLOCK, |s| s.invert_match(true).max_count(Some(1)));
|
||||
assert_eq!(1, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invert_match() {
|
||||
let (count, out) = search(
|
||||
|
@@ -4,6 +4,8 @@ printing matches. In particular, it searches the file in a streaming fashion
|
||||
using `read` calls and a (roughly) fixed size buffer.
|
||||
*/
|
||||
|
||||
extern crate bytecount;
|
||||
|
||||
use std::cmp;
|
||||
use std::error::Error as StdError;
|
||||
use std::fmt;
|
||||
@@ -84,6 +86,8 @@ pub struct Options {
|
||||
pub eol: u8,
|
||||
pub invert_match: bool,
|
||||
pub line_number: bool,
|
||||
pub max_count: Option<u64>,
|
||||
pub quiet: bool,
|
||||
pub text: bool,
|
||||
}
|
||||
|
||||
@@ -97,6 +101,8 @@ impl Default for Options {
|
||||
eol: b'\n',
|
||||
invert_match: false,
|
||||
line_number: false,
|
||||
max_count: None,
|
||||
quiet: false,
|
||||
text: false,
|
||||
}
|
||||
}
|
||||
@@ -104,10 +110,27 @@ impl Default for Options {
|
||||
}
|
||||
|
||||
impl Options {
|
||||
/// Both --count and --files-with-matches options imply that we should not
|
||||
/// display matches at all.
|
||||
/// Several options (--quiet, --count, --files-with-matches) imply that
|
||||
/// we shouldn't ever display matches.
|
||||
pub fn skip_matches(&self) -> bool {
|
||||
return self.count || self.files_with_matches;
|
||||
self.count || self.files_with_matches || self.quiet
|
||||
}
|
||||
|
||||
/// Some options (--quiet, --files-with-matches) imply that we can stop
|
||||
/// searching after the first match.
|
||||
pub fn stop_after_first_match(&self) -> bool {
|
||||
self.files_with_matches || self.quiet
|
||||
}
|
||||
|
||||
/// Returns true if the search should terminate based on the match count.
|
||||
pub fn terminate(&self, match_count: u64) -> bool {
|
||||
if match_count > 0 && self.stop_after_first_match() {
|
||||
return true;
|
||||
}
|
||||
if self.max_count.map_or(false, |max| match_count >= max) {
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -197,6 +220,21 @@ impl<'a, R: io::Read, W: Terminal + Send> Searcher<'a, R, W> {
|
||||
self
|
||||
}
|
||||
|
||||
/// Limit the number of matches to the given count.
|
||||
///
|
||||
/// The default is None, which corresponds to no limit.
|
||||
pub fn max_count(mut self, count: Option<u64>) -> Self {
|
||||
self.opts.max_count = count;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, don't show any output and quit searching after the first
|
||||
/// match is found.
|
||||
pub fn quiet(mut self, yes: bool) -> Self {
|
||||
self.opts.quiet = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, search binary files as if they were text.
|
||||
pub fn text(mut self, yes: bool) -> Self {
|
||||
self.opts.text = yes;
|
||||
@@ -265,8 +303,7 @@ impl<'a, R: io::Read, W: Terminal + Send> Searcher<'a, R, W> {
|
||||
|
||||
#[inline(always)]
|
||||
fn terminate(&self) -> bool {
|
||||
self.match_count > 0
|
||||
&& (self.printer.is_quiet() || self.opts.files_with_matches)
|
||||
self.opts.terminate(self.match_count)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
@@ -303,6 +340,9 @@ impl<'a, R: io::Read, W: Terminal + Send> Searcher<'a, R, W> {
|
||||
debug_assert!(self.opts.invert_match);
|
||||
let mut it = IterLines::new(self.opts.eol, self.inp.pos);
|
||||
while let Some((start, end)) = it.next(&self.inp.buf[..upto]) {
|
||||
if self.terminate() {
|
||||
return;
|
||||
}
|
||||
self.print_match(start, end);
|
||||
self.inp.pos = end;
|
||||
}
|
||||
@@ -568,89 +608,9 @@ pub fn is_binary(buf: &[u8]) -> bool {
|
||||
}
|
||||
|
||||
/// Count the number of lines in the given buffer.
|
||||
#[inline(never)]
|
||||
|
||||
#[inline(never)]
|
||||
pub fn count_lines(buf: &[u8], eol: u8) -> u64 {
|
||||
// This was adapted from code in the memchr crate. The specific benefit
|
||||
// here is that we can avoid a branch in the inner loop because all we're
|
||||
// doing is counting.
|
||||
|
||||
// The technique to count EOL bytes was adapted from:
|
||||
// http://bits.stephan-brumme.com/null.html
|
||||
const LO_U64: u64 = 0x0101010101010101;
|
||||
const HI_U64: u64 = 0x8080808080808080;
|
||||
|
||||
// use truncation
|
||||
const LO_USIZE: usize = LO_U64 as usize;
|
||||
const HI_USIZE: usize = HI_U64 as usize;
|
||||
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
const USIZE_BYTES: usize = 4;
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
const USIZE_BYTES: usize = 8;
|
||||
|
||||
fn count_eol(eol: usize) -> u64 {
|
||||
// Ideally, this would compile down to a POPCNT instruction, but
|
||||
// it looks like you need to set RUSTFLAGS="-C target-cpu=native"
|
||||
// (or target-feature=+popcnt) to get that to work. Bummer.
|
||||
(eol.wrapping_sub(LO_USIZE) & !eol & HI_USIZE).count_ones() as u64
|
||||
}
|
||||
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
fn repeat_byte(b: u8) -> usize {
|
||||
let mut rep = (b as usize) << 8 | b as usize;
|
||||
rep = rep << 16 | rep;
|
||||
rep
|
||||
}
|
||||
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
fn repeat_byte(b: u8) -> usize {
|
||||
let mut rep = (b as usize) << 8 | b as usize;
|
||||
rep = rep << 16 | rep;
|
||||
rep = rep << 32 | rep;
|
||||
rep
|
||||
}
|
||||
|
||||
fn count_lines_slow(mut buf: &[u8], eol: u8) -> u64 {
|
||||
let mut count = 0;
|
||||
while let Some(pos) = memchr(eol, buf) {
|
||||
count += 1;
|
||||
buf = &buf[pos + 1..];
|
||||
}
|
||||
count
|
||||
}
|
||||
|
||||
let len = buf.len();
|
||||
let ptr = buf.as_ptr();
|
||||
let mut count = 0;
|
||||
|
||||
// Search up to an aligned boundary...
|
||||
let align = (ptr as usize) & (USIZE_BYTES - 1);
|
||||
let mut i = 0;
|
||||
if align > 0 {
|
||||
i = cmp::min(USIZE_BYTES - align, len);
|
||||
count += count_lines_slow(&buf[..i], eol);
|
||||
}
|
||||
|
||||
// ... and search the rest.
|
||||
let repeated_eol = repeat_byte(eol);
|
||||
|
||||
if len >= 2 * USIZE_BYTES {
|
||||
while i <= len - (2 * USIZE_BYTES) {
|
||||
unsafe {
|
||||
let u = *(ptr.offset(i as isize) as *const usize);
|
||||
let v = *(ptr.offset((i + USIZE_BYTES) as isize)
|
||||
as *const usize);
|
||||
|
||||
count += count_eol(u ^ repeated_eol);
|
||||
count += count_eol(v ^ repeated_eol);
|
||||
}
|
||||
i += USIZE_BYTES * 2;
|
||||
}
|
||||
}
|
||||
count += count_lines_slow(&buf[i..], eol);
|
||||
count
|
||||
bytecount::count(buf, eol) as u64
|
||||
}
|
||||
|
||||
/// Replaces a with b in buf.
|
||||
@@ -1026,6 +986,26 @@ fn main() {
|
||||
assert_eq!(out, "/baz.rs\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_count() {
|
||||
let (count, out) = search_smallcap(
|
||||
"Sherlock", SHERLOCK, |s| s.max_count(Some(1)));
|
||||
assert_eq!(1, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invert_match_max_count() {
|
||||
let (count, out) = search(
|
||||
"zzzz", SHERLOCK, |s| s.invert_match(true).max_count(Some(1)));
|
||||
assert_eq!(1, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invert_match() {
|
||||
let (count, out) = search_smallcap(
|
||||
|
435
src/types.rs
435
src/types.rs
@@ -1,435 +0,0 @@
|
||||
/*!
|
||||
The types module provides a way of associating glob patterns on file names to
|
||||
file types.
|
||||
*/
|
||||
|
||||
use std::collections::HashMap;
|
||||
use std::error::Error as StdError;
|
||||
use std::fmt;
|
||||
use std::path::Path;
|
||||
|
||||
use regex;
|
||||
|
||||
use gitignore::{Match, Pattern};
|
||||
use glob::{self, MatchOptions};
|
||||
|
||||
const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
|
||||
("asm", &["*.asm", "*.s", "*.S"]),
|
||||
("awk", &["*.awk"]),
|
||||
("c", &["*.c", "*.h", "*.H"]),
|
||||
("cbor", &["*.cbor"]),
|
||||
("clojure", &["*.clj", "*.cljs"]),
|
||||
("cmake", &["CMakeLists.txt"]),
|
||||
("coffeescript", &["*.coffee"]),
|
||||
("cpp", &[
|
||||
"*.C", "*.cc", "*.cpp", "*.cxx",
|
||||
"*.h", "*.H", "*.hh", "*.hpp",
|
||||
]),
|
||||
("csharp", &["*.cs"]),
|
||||
("css", &["*.css"]),
|
||||
("cython", &["*.pyx"]),
|
||||
("dart", &["*.dart"]),
|
||||
("d", &["*.d"]),
|
||||
("elisp", &["*.el"]),
|
||||
("erlang", &["*.erl", "*.hrl"]),
|
||||
("fortran", &[
|
||||
"*.f", "*.F", "*.f77", "*.F77", "*.pfo",
|
||||
"*.f90", "*.F90", "*.f95", "*.F95",
|
||||
]),
|
||||
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
|
||||
("go", &["*.go"]),
|
||||
("groovy", &["*.groovy"]),
|
||||
("haskell", &["*.hs", "*.lhs"]),
|
||||
("html", &["*.htm", "*.html"]),
|
||||
("java", &["*.java"]),
|
||||
("js", &[
|
||||
"*.js", "*.jsx", "*.vue",
|
||||
]),
|
||||
("json", &["*.json"]),
|
||||
("jsonl", &["*.jsonl"]),
|
||||
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
||||
("lua", &["*.lua"]),
|
||||
("m4", &["*.ac", "*.m4"]),
|
||||
("make", &["gnumakefile", "Gnumakefile", "makefile", "Makefile", "*.mk"]),
|
||||
("markdown", &["*.md"]),
|
||||
("matlab", &["*.m"]),
|
||||
("mk", &["mkfile"]),
|
||||
("ml", &["*.ml"]),
|
||||
("nim", &["*.nim"]),
|
||||
("objc", &["*.h", "*.m"]),
|
||||
("objcpp", &["*.h", "*.mm"]),
|
||||
("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
|
||||
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm"]),
|
||||
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
|
||||
("py", &["*.py"]),
|
||||
("readme", &["README*", "*README"]),
|
||||
("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
|
||||
("rst", &["*.rst"]),
|
||||
("ruby", &["*.rb"]),
|
||||
("rust", &["*.rs"]),
|
||||
("scala", &["*.scala"]),
|
||||
("sh", &["*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh"]),
|
||||
("sql", &["*.sql"]),
|
||||
("swift", &["*.swift"]),
|
||||
("tex", &["*.tex", "*.cls", "*.sty"]),
|
||||
("ts", &["*.ts", "*.tsx"]),
|
||||
("txt", &["*.txt"]),
|
||||
("toml", &["*.toml", "Cargo.lock"]),
|
||||
("vala", &["*.vala"]),
|
||||
("vb", &["*.vb"]),
|
||||
("vimscript", &["*.vim"]),
|
||||
("xml", &["*.xml"]),
|
||||
("yacc", &["*.y"]),
|
||||
("yaml", &["*.yaml", "*.yml"]),
|
||||
];
|
||||
|
||||
/// Describes all the possible failure conditions for building a file type
|
||||
/// matcher.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// We tried to select (or negate) a file type that is not defined.
|
||||
UnrecognizedFileType(String),
|
||||
/// A user specified file type definition could not be parsed.
|
||||
InvalidDefinition,
|
||||
/// There was an error building the matcher (probably a bad glob).
|
||||
Glob(glob::Error),
|
||||
/// There was an error compiling a glob as a regex.
|
||||
Regex(regex::Error),
|
||||
}
|
||||
|
||||
impl StdError for Error {
|
||||
fn description(&self) -> &str {
|
||||
match *self {
|
||||
Error::UnrecognizedFileType(_) => "unrecognized file type",
|
||||
Error::InvalidDefinition => "invalid definition",
|
||||
Error::Glob(ref err) => err.description(),
|
||||
Error::Regex(ref err) => err.description(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Error::UnrecognizedFileType(ref ty) => {
|
||||
write!(f, "unrecognized file type: {}", ty)
|
||||
}
|
||||
Error::InvalidDefinition => {
|
||||
write!(f, "invalid definition (format is type:glob, e.g., \
|
||||
html:*.html)")
|
||||
}
|
||||
Error::Glob(ref err) => err.fmt(f),
|
||||
Error::Regex(ref err) => err.fmt(f),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<glob::Error> for Error {
|
||||
fn from(err: glob::Error) -> Error {
|
||||
Error::Glob(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<regex::Error> for Error {
|
||||
fn from(err: regex::Error) -> Error {
|
||||
Error::Regex(err)
|
||||
}
|
||||
}
|
||||
|
||||
/// A single file type definition.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FileTypeDef {
|
||||
name: String,
|
||||
pats: Vec<String>,
|
||||
}
|
||||
|
||||
impl FileTypeDef {
|
||||
/// Return the name of this file type.
|
||||
pub fn name(&self) -> &str {
|
||||
&self.name
|
||||
}
|
||||
|
||||
/// Return the glob patterns used to recognize this file type.
|
||||
pub fn patterns(&self) -> &[String] {
|
||||
&self.pats
|
||||
}
|
||||
}
|
||||
|
||||
/// Types is a file type matcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Types {
|
||||
selected: Option<glob::SetYesNo>,
|
||||
negated: Option<glob::SetYesNo>,
|
||||
has_selected: bool,
|
||||
unmatched_pat: Pattern,
|
||||
}
|
||||
|
||||
impl Types {
|
||||
/// Creates a new file type matcher from the given Gitignore matcher. If
|
||||
/// not Gitignore matcher is provided, then the file type matcher has no
|
||||
/// effect.
|
||||
///
|
||||
/// If has_selected is true, then at least one file type was selected.
|
||||
/// Therefore, any non-matches should be ignored.
|
||||
fn new(
|
||||
selected: Option<glob::SetYesNo>,
|
||||
negated: Option<glob::SetYesNo>,
|
||||
has_selected: bool,
|
||||
) -> Types {
|
||||
Types {
|
||||
selected: selected,
|
||||
negated: negated,
|
||||
has_selected: has_selected,
|
||||
unmatched_pat: Pattern {
|
||||
from: Path::new("<filetype>").to_path_buf(),
|
||||
original: "<N/A>".to_string(),
|
||||
pat: "<N/A>".to_string(),
|
||||
whitelist: false,
|
||||
only_dir: false,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new file type matcher that never matches.
|
||||
pub fn empty() -> Types {
|
||||
Types::new(None, None, false)
|
||||
}
|
||||
|
||||
/// Returns a match for the given path against this file type matcher.
|
||||
///
|
||||
/// The path is considered whitelisted if it matches a selected file type.
|
||||
/// The path is considered ignored if it matched a negated file type.
|
||||
/// If at least one file type is selected and path doesn't match, then
|
||||
/// the path is also considered ignored.
|
||||
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
|
||||
// If we don't have any matcher, then we can't do anything.
|
||||
if self.negated.is_none() && self.selected.is_none() {
|
||||
return Match::None;
|
||||
}
|
||||
// File types don't apply to directories.
|
||||
if is_dir {
|
||||
return Match::None;
|
||||
}
|
||||
let path = path.as_ref();
|
||||
let name = match path.file_name() {
|
||||
Some(name) => name.to_string_lossy(),
|
||||
None if self.has_selected => {
|
||||
return Match::Ignored(&self.unmatched_pat);
|
||||
}
|
||||
None => {
|
||||
return Match::None;
|
||||
}
|
||||
};
|
||||
if self.negated.as_ref().map(|s| s.is_match(&*name)).unwrap_or(false) {
|
||||
return Match::Ignored(&self.unmatched_pat);
|
||||
}
|
||||
if self.selected.as_ref().map(|s|s.is_match(&*name)).unwrap_or(false) {
|
||||
return Match::Whitelist(&self.unmatched_pat);
|
||||
}
|
||||
if self.has_selected {
|
||||
Match::Ignored(&self.unmatched_pat)
|
||||
} else {
|
||||
Match::None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// TypesBuilder builds a type matcher from a set of file type definitions and
|
||||
/// a set of file type selections.
|
||||
pub struct TypesBuilder {
|
||||
types: HashMap<String, Vec<String>>,
|
||||
selected: Vec<String>,
|
||||
negated: Vec<String>,
|
||||
}
|
||||
|
||||
impl TypesBuilder {
|
||||
/// Create a new builder for a file type matcher.
|
||||
pub fn new() -> TypesBuilder {
|
||||
TypesBuilder {
|
||||
types: HashMap::new(),
|
||||
selected: vec![],
|
||||
negated: vec![],
|
||||
}
|
||||
}
|
||||
|
||||
/// Build the current set of file type definitions *and* selections into
|
||||
/// a file type matcher.
|
||||
pub fn build(&self) -> Result<Types, Error> {
|
||||
let opts = MatchOptions {
|
||||
require_literal_separator: true, ..MatchOptions::default()
|
||||
};
|
||||
let selected_globs =
|
||||
if self.selected.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let mut bset = glob::SetBuilder::new();
|
||||
for name in &self.selected {
|
||||
let globs = match self.types.get(name) {
|
||||
Some(globs) => globs,
|
||||
None => {
|
||||
let msg = name.to_string();
|
||||
return Err(Error::UnrecognizedFileType(msg));
|
||||
}
|
||||
};
|
||||
for glob in globs {
|
||||
try!(bset.add_with(glob, &opts));
|
||||
}
|
||||
}
|
||||
Some(try!(bset.build_yesno()))
|
||||
};
|
||||
let negated_globs =
|
||||
if self.negated.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let mut bset = glob::SetBuilder::new();
|
||||
for name in &self.negated {
|
||||
let globs = match self.types.get(name) {
|
||||
Some(globs) => globs,
|
||||
None => {
|
||||
let msg = name.to_string();
|
||||
return Err(Error::UnrecognizedFileType(msg));
|
||||
}
|
||||
};
|
||||
for glob in globs {
|
||||
try!(bset.add_with(glob, &opts));
|
||||
}
|
||||
}
|
||||
Some(try!(bset.build_yesno()))
|
||||
};
|
||||
Ok(Types::new(
|
||||
selected_globs, negated_globs, !self.selected.is_empty()))
|
||||
}
|
||||
|
||||
/// Return the set of current file type definitions.
|
||||
pub fn definitions(&self) -> Vec<FileTypeDef> {
|
||||
let mut defs = vec![];
|
||||
for (ref name, ref pats) in &self.types {
|
||||
let mut pats = pats.to_vec();
|
||||
pats.sort();
|
||||
defs.push(FileTypeDef {
|
||||
name: name.to_string(),
|
||||
pats: pats,
|
||||
});
|
||||
}
|
||||
defs.sort_by(|def1, def2| def1.name().cmp(def2.name()));
|
||||
defs
|
||||
}
|
||||
|
||||
/// Select the file type given by `name`.
|
||||
///
|
||||
/// If `name` is `all`, then all file types are selected.
|
||||
pub fn select(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
if name == "all" {
|
||||
for name in self.types.keys() {
|
||||
self.selected.push(name.to_string());
|
||||
}
|
||||
} else {
|
||||
self.selected.push(name.to_string());
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Ignore the file type given by `name`.
|
||||
///
|
||||
/// If `name` is `all`, then all file types are negated.
|
||||
pub fn negate(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
if name == "all" {
|
||||
for name in self.types.keys() {
|
||||
self.negated.push(name.to_string());
|
||||
}
|
||||
} else {
|
||||
self.negated.push(name.to_string());
|
||||
}
|
||||
self
|
||||
}
|
||||
|
||||
/// Clear any file type definitions for the type given.
|
||||
pub fn clear(&mut self, name: &str) -> &mut TypesBuilder {
|
||||
self.types.remove(name);
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a new file type definition. `name` can be arbitrary and `pat`
|
||||
/// should be a glob recognizing file paths belonging to the `name` type.
|
||||
pub fn add(&mut self, name: &str, pat: &str) -> &mut TypesBuilder {
|
||||
self.types.entry(name.to_string())
|
||||
.or_insert(vec![]).push(pat.to_string());
|
||||
self
|
||||
}
|
||||
|
||||
/// Add a new file type definition specified in string form. The format
|
||||
/// is `name:glob`. Names may not include a colon.
|
||||
pub fn add_def(&mut self, def: &str) -> Result<(), Error> {
|
||||
let name: String = def.chars().take_while(|&c| c != ':').collect();
|
||||
let pat: String = def.chars().skip(name.chars().count() + 1).collect();
|
||||
if name.is_empty() || pat.is_empty() {
|
||||
return Err(Error::InvalidDefinition);
|
||||
}
|
||||
self.add(&name, &pat);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Add a set of default file type definitions.
|
||||
pub fn add_defaults(&mut self) -> &mut TypesBuilder {
|
||||
for &(name, exts) in TYPE_EXTENSIONS {
|
||||
for ext in exts {
|
||||
self.add(name, ext);
|
||||
}
|
||||
}
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::TypesBuilder;
|
||||
|
||||
macro_rules! matched {
|
||||
($name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr) => {
|
||||
matched!($name, $types, $sel, $selnot, $path, true);
|
||||
};
|
||||
(not, $name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr) => {
|
||||
matched!($name, $types, $sel, $selnot, $path, false);
|
||||
};
|
||||
($name:ident, $types:expr, $sel:expr, $selnot:expr,
|
||||
$path:expr, $matched:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let mut btypes = TypesBuilder::new();
|
||||
for tydef in $types {
|
||||
btypes.add_def(tydef).unwrap();
|
||||
}
|
||||
for sel in $sel {
|
||||
btypes.select(sel);
|
||||
}
|
||||
for selnot in $selnot {
|
||||
btypes.negate(selnot);
|
||||
}
|
||||
let types = btypes.build().unwrap();
|
||||
let mat = types.matched($path, false);
|
||||
assert_eq!($matched, !mat.is_ignored());
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
fn types() -> Vec<&'static str> {
|
||||
vec![
|
||||
"html:*.html",
|
||||
"html:*.htm",
|
||||
"rust:*.rs",
|
||||
"js:*.js",
|
||||
]
|
||||
}
|
||||
|
||||
matched!(match1, types(), vec!["rust"], vec![], "lib.rs");
|
||||
matched!(match2, types(), vec!["html"], vec![], "index.html");
|
||||
matched!(match3, types(), vec!["html"], vec![], "index.htm");
|
||||
matched!(match4, types(), vec!["html", "rust"], vec![], "main.rs");
|
||||
matched!(match5, types(), vec![], vec![], "index.html");
|
||||
matched!(match6, types(), vec![], vec!["rust"], "index.html");
|
||||
|
||||
matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
|
||||
matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
|
||||
}
|
140
src/walk.rs
140
src/walk.rs
@@ -1,140 +0,0 @@
|
||||
/*!
|
||||
The walk module implements a recursive directory iterator (using the `walkdir`)
|
||||
crate that can efficiently skip and ignore files and directories specified in
|
||||
a user's ignore patterns.
|
||||
*/
|
||||
|
||||
use walkdir::{self, DirEntry, WalkDir, WalkDirIterator};
|
||||
|
||||
use ignore::Ignore;
|
||||
|
||||
/// Iter is a recursive directory iterator over file paths in a directory.
|
||||
/// Only file paths should be searched are yielded.
|
||||
pub struct Iter {
|
||||
ig: Ignore,
|
||||
it: WalkEventIter,
|
||||
}
|
||||
|
||||
impl Iter {
|
||||
/// Create a new recursive directory iterator using the ignore patterns
|
||||
/// and walkdir iterator given.
|
||||
pub fn new(ig: Ignore, wd: WalkDir) -> Iter {
|
||||
Iter {
|
||||
ig: ig,
|
||||
it: WalkEventIter::from(wd),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if this entry should be skipped.
|
||||
#[inline(always)]
|
||||
fn skip_entry(&self, ent: &DirEntry) -> bool {
|
||||
if ent.depth() == 0 {
|
||||
// Never skip the root directory.
|
||||
return false;
|
||||
}
|
||||
if self.ig.ignored(ent.path(), ent.file_type().is_dir()) {
|
||||
return true;
|
||||
}
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for Iter {
|
||||
type Item = DirEntry;
|
||||
|
||||
#[inline(always)]
|
||||
fn next(&mut self) -> Option<DirEntry> {
|
||||
while let Some(ev) = self.it.next() {
|
||||
match ev {
|
||||
Err(err) => {
|
||||
eprintln!("{}", err);
|
||||
}
|
||||
Ok(WalkEvent::Exit) => {
|
||||
self.ig.pop();
|
||||
}
|
||||
Ok(WalkEvent::Dir(ent)) => {
|
||||
if self.skip_entry(&ent) {
|
||||
self.it.it.skip_current_dir();
|
||||
// Still need to push this on the stack because we'll
|
||||
// get a WalkEvent::Exit event for this dir. We don't
|
||||
// care if it errors though.
|
||||
let _ = self.ig.push(ent.path());
|
||||
continue;
|
||||
}
|
||||
if let Err(err) = self.ig.push(ent.path()) {
|
||||
eprintln!("{}", err);
|
||||
self.it.it.skip_current_dir();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Ok(WalkEvent::File(ent)) => {
|
||||
if self.skip_entry(&ent) {
|
||||
continue;
|
||||
}
|
||||
// If this isn't actually a file (e.g., a symlink), then
|
||||
// skip it.
|
||||
if !ent.file_type().is_file() {
|
||||
continue;
|
||||
}
|
||||
return Some(ent);
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// WalkEventIter transforms a WalkDir iterator into an iterator that more
|
||||
/// accurately describes the directory tree. Namely, it emits events that are
|
||||
/// one of three types: directory, file or "exit." An "exit" event means that
|
||||
/// the entire contents of a directory have been enumerated.
|
||||
struct WalkEventIter {
|
||||
depth: usize,
|
||||
it: walkdir::Iter,
|
||||
next: Option<Result<DirEntry, walkdir::Error>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum WalkEvent {
|
||||
Dir(DirEntry),
|
||||
File(DirEntry),
|
||||
Exit,
|
||||
}
|
||||
|
||||
impl From<WalkDir> for WalkEventIter {
|
||||
fn from(it: WalkDir) -> WalkEventIter {
|
||||
WalkEventIter { depth: 0, it: it.into_iter(), next: None }
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for WalkEventIter {
|
||||
type Item = walkdir::Result<WalkEvent>;
|
||||
|
||||
#[inline(always)]
|
||||
fn next(&mut self) -> Option<walkdir::Result<WalkEvent>> {
|
||||
let dent = self.next.take().or_else(|| self.it.next());
|
||||
let depth = match dent {
|
||||
None => 0,
|
||||
Some(Ok(ref dent)) => dent.depth(),
|
||||
Some(Err(ref err)) => err.depth(),
|
||||
};
|
||||
if depth < self.depth {
|
||||
self.depth -= 1;
|
||||
self.next = dent;
|
||||
return Some(Ok(WalkEvent::Exit));
|
||||
}
|
||||
self.depth = depth;
|
||||
match dent {
|
||||
None => None,
|
||||
Some(Err(err)) => Some(Err(err)),
|
||||
Some(Ok(dent)) => {
|
||||
if dent.file_type().is_dir() {
|
||||
self.depth += 1;
|
||||
Some(Ok(WalkEvent::Dir(dent)))
|
||||
} else {
|
||||
Some(Ok(WalkEvent::File(dent)))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
271
src/worker.rs
Normal file
271
src/worker.rs
Normal file
@@ -0,0 +1,271 @@
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
|
||||
use grep::Grep;
|
||||
use ignore::DirEntry;
|
||||
use memmap::{Mmap, Protection};
|
||||
use term::Terminal;
|
||||
|
||||
use pathutil::strip_prefix;
|
||||
use printer::Printer;
|
||||
use search_buffer::BufferSearcher;
|
||||
use search_stream::{InputBuffer, Searcher};
|
||||
|
||||
use Result;
|
||||
|
||||
pub enum Work {
|
||||
Stdin,
|
||||
DirEntry(DirEntry),
|
||||
}
|
||||
|
||||
pub struct WorkerBuilder {
|
||||
grep: Grep,
|
||||
opts: Options,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct Options {
|
||||
mmap: bool,
|
||||
after_context: usize,
|
||||
before_context: usize,
|
||||
count: bool,
|
||||
files_with_matches: bool,
|
||||
eol: u8,
|
||||
invert_match: bool,
|
||||
line_number: bool,
|
||||
max_count: Option<u64>,
|
||||
no_messages: bool,
|
||||
quiet: bool,
|
||||
text: bool,
|
||||
}
|
||||
|
||||
impl Default for Options {
|
||||
fn default() -> Options {
|
||||
Options {
|
||||
mmap: false,
|
||||
after_context: 0,
|
||||
before_context: 0,
|
||||
count: false,
|
||||
files_with_matches: false,
|
||||
eol: b'\n',
|
||||
invert_match: false,
|
||||
line_number: false,
|
||||
max_count: None,
|
||||
no_messages: false,
|
||||
quiet: false,
|
||||
text: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WorkerBuilder {
|
||||
/// Create a new builder for a worker.
|
||||
///
|
||||
/// A reusable input buffer and a grep matcher are required, but there
|
||||
/// are numerous additional options that can be configured on this builder.
|
||||
pub fn new(grep: Grep) -> WorkerBuilder {
|
||||
WorkerBuilder {
|
||||
grep: grep,
|
||||
opts: Options::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create the worker from this builder.
|
||||
pub fn build(self) -> Worker {
|
||||
let mut inpbuf = InputBuffer::new();
|
||||
inpbuf.eol(self.opts.eol);
|
||||
Worker {
|
||||
grep: self.grep,
|
||||
inpbuf: inpbuf,
|
||||
opts: self.opts,
|
||||
}
|
||||
}
|
||||
|
||||
/// The number of contextual lines to show after each match. The default
|
||||
/// is zero.
|
||||
pub fn after_context(mut self, count: usize) -> Self {
|
||||
self.opts.after_context = count;
|
||||
self
|
||||
}
|
||||
|
||||
/// The number of contextual lines to show before each match. The default
|
||||
/// is zero.
|
||||
pub fn before_context(mut self, count: usize) -> Self {
|
||||
self.opts.before_context = count;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print a count instead of each match.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn count(mut self, yes: bool) -> Self {
|
||||
self.opts.count = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print the path instead of each match.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn files_with_matches(mut self, yes: bool) -> Self {
|
||||
self.opts.files_with_matches = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the end-of-line byte used by this searcher.
|
||||
pub fn eol(mut self, eol: u8) -> Self {
|
||||
self.opts.eol = eol;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, matching is inverted so that lines that *don't* match the
|
||||
/// given pattern are treated as matches.
|
||||
pub fn invert_match(mut self, yes: bool) -> Self {
|
||||
self.opts.invert_match = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, compute line numbers and prefix each line of output with
|
||||
/// them.
|
||||
pub fn line_number(mut self, yes: bool) -> Self {
|
||||
self.opts.line_number = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Limit the number of matches to the given count.
|
||||
///
|
||||
/// The default is None, which corresponds to no limit.
|
||||
pub fn max_count(mut self, count: Option<u64>) -> Self {
|
||||
self.opts.max_count = count;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, try to use memory maps for searching if possible.
|
||||
pub fn mmap(mut self, yes: bool) -> Self {
|
||||
self.opts.mmap = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, don't show any output and quit searching after the first
|
||||
/// match is found.
|
||||
pub fn quiet(mut self, yes: bool) -> Self {
|
||||
self.opts.quiet = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, search binary files as if they were text.
|
||||
pub fn text(mut self, yes: bool) -> Self {
|
||||
self.opts.text = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Worker is responsible for executing searches on file paths, while choosing
|
||||
/// streaming search or memory map search as appropriate.
|
||||
pub struct Worker {
|
||||
inpbuf: InputBuffer,
|
||||
grep: Grep,
|
||||
opts: Options,
|
||||
}
|
||||
|
||||
impl Worker {
|
||||
/// Execute the worker with the given printer and work item.
|
||||
///
|
||||
/// A work item can either be stdin or a file path.
|
||||
pub fn run<W: Terminal + Send>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
work: Work,
|
||||
) -> u64 {
|
||||
let result = match work {
|
||||
Work::Stdin => {
|
||||
let stdin = io::stdin();
|
||||
let stdin = stdin.lock();
|
||||
self.search(printer, &Path::new("<stdin>"), stdin)
|
||||
}
|
||||
Work::DirEntry(dent) => {
|
||||
let mut path = dent.path();
|
||||
let file = match File::open(path) {
|
||||
Ok(file) => file,
|
||||
Err(err) => {
|
||||
if !self.opts.no_messages {
|
||||
eprintln!("{}: {}", path.display(), err);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
if self.opts.mmap {
|
||||
self.search_mmap(printer, path, &file)
|
||||
} else {
|
||||
self.search(printer, path, file)
|
||||
}
|
||||
}
|
||||
};
|
||||
match result {
|
||||
Ok(count) => {
|
||||
count
|
||||
}
|
||||
Err(err) => {
|
||||
if !self.opts.no_messages {
|
||||
eprintln!("{}", err);
|
||||
}
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn search<R: io::Read, W: Terminal + Send>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
path: &Path,
|
||||
rdr: R,
|
||||
) -> Result<u64> {
|
||||
let searcher = Searcher::new(
|
||||
&mut self.inpbuf, printer, &self.grep, path, rdr);
|
||||
searcher
|
||||
.after_context(self.opts.after_context)
|
||||
.before_context(self.opts.before_context)
|
||||
.count(self.opts.count)
|
||||
.files_with_matches(self.opts.files_with_matches)
|
||||
.eol(self.opts.eol)
|
||||
.line_number(self.opts.line_number)
|
||||
.invert_match(self.opts.invert_match)
|
||||
.max_count(self.opts.max_count)
|
||||
.quiet(self.opts.quiet)
|
||||
.text(self.opts.text)
|
||||
.run()
|
||||
.map_err(From::from)
|
||||
}
|
||||
|
||||
fn search_mmap<W: Terminal + Send>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
path: &Path,
|
||||
file: &File,
|
||||
) -> Result<u64> {
|
||||
if try!(file.metadata()).len() == 0 {
|
||||
// Opening a memory map with an empty file results in an error.
|
||||
// However, this may not actually be an empty file! For example,
|
||||
// /proc/cpuinfo reports itself as an empty file, but it can
|
||||
// produce data when it's read from. Therefore, we fall back to
|
||||
// regular read calls.
|
||||
return self.search(printer, path, file);
|
||||
}
|
||||
let mmap = try!(Mmap::open(file, Protection::Read));
|
||||
let searcher = BufferSearcher::new(
|
||||
printer, &self.grep, path, unsafe { mmap.as_slice() });
|
||||
Ok(searcher
|
||||
.count(self.opts.count)
|
||||
.files_with_matches(self.opts.files_with_matches)
|
||||
.eol(self.opts.eol)
|
||||
.line_number(self.opts.line_number)
|
||||
.invert_match(self.opts.invert_match)
|
||||
.max_count(self.opts.max_count)
|
||||
.quiet(self.opts.quiet)
|
||||
.text(self.opts.text)
|
||||
.run())
|
||||
}
|
||||
}
|
372
tests/tests.rs
372
tests/tests.rs
@@ -54,6 +54,27 @@ fn path(unix: &str) -> String {
|
||||
}
|
||||
}
|
||||
|
||||
fn paths(unix: &[&str]) -> Vec<String> {
|
||||
let mut xs: Vec<_> = unix.iter().map(|s| path(s)).collect();
|
||||
xs.sort();
|
||||
xs
|
||||
}
|
||||
|
||||
fn paths_from_stdout(stdout: String) -> Vec<String> {
|
||||
let mut paths: Vec<_> = stdout.lines().map(|s| {
|
||||
s.split(":").next().unwrap().to_string()
|
||||
}).collect();
|
||||
paths.sort();
|
||||
paths
|
||||
}
|
||||
|
||||
fn sort_lines(lines: &str) -> String {
|
||||
let mut lines: Vec<String> =
|
||||
lines.trim().lines().map(|s| s.to_owned()).collect();
|
||||
lines.sort();
|
||||
format!("{}\n", lines.join("\n"))
|
||||
}
|
||||
|
||||
sherlock!(single_file, |wd: WorkDir, mut cmd| {
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
@@ -86,8 +107,8 @@ sherlock!(columns, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("--column");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
58:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
50:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
57:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
49:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
@@ -535,7 +556,7 @@ sherlock!(symlink_nofollow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create_dir("foo");
|
||||
wd.create_dir("foo/bar");
|
||||
wd.link("foo/baz", "foo/bar/baz");
|
||||
wd.link_dir("foo/baz", "foo/bar/baz");
|
||||
wd.create_dir("foo/baz");
|
||||
wd.create("foo/baz/sherlock", hay::SHERLOCK);
|
||||
cmd.current_dir(wd.path().join("foo/bar"));
|
||||
@@ -548,7 +569,7 @@ sherlock!(symlink_follow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create_dir("foo/bar");
|
||||
wd.create_dir("foo/baz");
|
||||
wd.create("foo/baz/sherlock", hay::SHERLOCK);
|
||||
wd.link("foo/baz", "foo/bar/baz");
|
||||
wd.link_dir("foo/baz", "foo/bar/baz");
|
||||
cmd.arg("-L");
|
||||
cmd.current_dir(wd.path().join("foo/bar"));
|
||||
|
||||
@@ -585,17 +606,6 @@ sherlock!(unrestricted2, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
#[cfg(not(windows))]
|
||||
sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
|
||||
cmd.arg("-uuu");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "file:foo\x00bar\nfile:foo\x00baz\n");
|
||||
});
|
||||
|
||||
// On Windows, this test uses memory maps, so the NUL bytes don't get replaced.
|
||||
#[cfg(windows)]
|
||||
sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
|
||||
cmd.arg("-uuu");
|
||||
@@ -652,7 +662,6 @@ clean!(regression_30, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
}
|
||||
wd.create_dir("vendor");
|
||||
wd.create("vendor/manifest", "test");
|
||||
cmd.arg("--debug");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = path("vendor/manifest:test\n");
|
||||
@@ -698,6 +707,13 @@ clean!(regression_67, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
assert_eq!(lines, path("dir/bar:test\n"));
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/87
|
||||
clean!(regression_87, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".gitignore", "foo\n**no-vcs**");
|
||||
wd.create("foo", "test");
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/90
|
||||
clean!(regression_90, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".gitignore", "!.foo");
|
||||
@@ -716,8 +732,170 @@ clean!(regression_93, r"(\d{1,3}\.){3}\d{1,3}", ".",
|
||||
assert_eq!(lines, "foo:192.168.1.1\n");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/99
|
||||
clean!(regression_99, "test", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("foo1", "test");
|
||||
wd.create("foo2", "zzz");
|
||||
wd.create("bar", "test");
|
||||
cmd.arg("-j1").arg("--heading");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(sort_lines(&lines), sort_lines("bar\ntest\n\nfoo1\ntest\n"));
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/105
|
||||
clean!(regression_105_part1, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("foo", "zztest");
|
||||
cmd.arg("--vimgrep");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "foo:1:3:zztest\n");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/105
|
||||
clean!(regression_105_part2, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("foo", "zztest");
|
||||
cmd.arg("--column");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "foo:3:zztest\n");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/127
|
||||
clean!(regression_127, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
// Set up a directory hierarchy like this:
|
||||
//
|
||||
// .gitignore
|
||||
// foo/
|
||||
// sherlock
|
||||
// watson
|
||||
//
|
||||
// Where `.gitignore` contains `foo/sherlock`.
|
||||
//
|
||||
// ripgrep should ignore 'foo/sherlock' giving us results only from
|
||||
// 'foo/watson' but on Windows ripgrep will include both 'foo/sherlock' and
|
||||
// 'foo/watson' in the search results.
|
||||
wd.create(".gitignore", "foo/sherlock\n");
|
||||
wd.create_dir("foo");
|
||||
wd.create("foo/sherlock", hay::SHERLOCK);
|
||||
wd.create("foo/watson", hay::SHERLOCK);
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = format!("\
|
||||
{path}:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
{path}:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
", path=path("foo/watson"));
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/128
|
||||
clean!(regression_128, "x", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create_bytes("foo", b"01234567\x0b\n\x0b\n\x0b\n\x0b\nx");
|
||||
cmd.arg("-n");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "foo:5:x\n");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/131
|
||||
//
|
||||
// TODO(burntsushi): Darwin doesn't like this test for some reason.
|
||||
#[cfg(not(target_os = "macos"))]
|
||||
clean!(regression_131, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".gitignore", "TopÑapa");
|
||||
wd.create("TopÑapa", "test");
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/137
|
||||
//
|
||||
// TODO(burntsushi): Figure out why Windows gives "access denied" errors
|
||||
// when trying to create a file symlink. For now, disable test on Windows.
|
||||
#[cfg(not(windows))]
|
||||
sherlock!(regression_137, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.link_file("sherlock", "sym1");
|
||||
wd.link_file("sherlock", "sym2");
|
||||
cmd.arg("sym1");
|
||||
cmd.arg("sym2");
|
||||
cmd.arg("-j1");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
sym1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sym1:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
sym2:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sym2:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, path(expected));
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/156
|
||||
clean!(
|
||||
regression_156,
|
||||
r#"#(?:parse|include)\s*\(\s*(?:"|')[./A-Za-z_-]+(?:"|')"#,
|
||||
"testcase.txt",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
const TESTCASE: &'static str = r#"#parse('widgets/foo_bar_macros.vm')
|
||||
#parse ( 'widgets/mobile/foo_bar_macros.vm' )
|
||||
#parse ("widgets/foobarhiddenformfields.vm")
|
||||
#parse ( "widgets/foo_bar_legal.vm" )
|
||||
#include( 'widgets/foo_bar_tips.vm' )
|
||||
#include('widgets/mobile/foo_bar_macros.vm')
|
||||
#include ("widgets/mobile/foo_bar_resetpw.vm")
|
||||
#parse('widgets/foo-bar-macros.vm')
|
||||
#parse ( 'widgets/mobile/foo-bar-macros.vm' )
|
||||
#parse ("widgets/foo-bar-hiddenformfields.vm")
|
||||
#parse ( "widgets/foo-bar-legal.vm" )
|
||||
#include( 'widgets/foo-bar-tips.vm' )
|
||||
#include('widgets/mobile/foo-bar-macros.vm')
|
||||
#include ("widgets/mobile/foo-bar-resetpw.vm")
|
||||
"#;
|
||||
wd.create("testcase.txt", TESTCASE);
|
||||
cmd.arg("-N");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, TESTCASE);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/184
|
||||
clean!(regression_184, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".gitignore", ".*");
|
||||
wd.create_dir("foo/bar");
|
||||
wd.create("foo/bar/baz", "test");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, format!("{}:test\n", path("foo/bar/baz")));
|
||||
|
||||
cmd.current_dir(wd.path().join("./foo/bar"));
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "baz:test\n");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/199
|
||||
clean!(regression_199, r"\btest\b", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("foo", "tEsT");
|
||||
cmd.arg("--smart-case");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "foo:tEsT\n");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/206
|
||||
clean!(regression_206, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create_dir("foo");
|
||||
wd.create("foo/bar.txt", "test");
|
||||
cmd.arg("-g").arg("*.txt");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, format!("{}:test\n", path("foo/bar.txt")));
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/20
|
||||
sherlock!(feature_20, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
sherlock!(feature_20_no_filename, "Sherlock", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("--no-filename");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
@@ -728,8 +906,76 @@ be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/45
|
||||
sherlock!(feature_45_relative_cwd, "test", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".not-an-ignore", "foo\n/bar");
|
||||
wd.create_dir("bar");
|
||||
wd.create_dir("baz/bar");
|
||||
wd.create_dir("baz/baz/bar");
|
||||
wd.create("bar/test", "test");
|
||||
wd.create("baz/bar/test", "test");
|
||||
wd.create("baz/baz/bar/test", "test");
|
||||
wd.create("baz/foo", "test");
|
||||
wd.create("baz/test", "test");
|
||||
wd.create("foo", "test");
|
||||
wd.create("test", "test");
|
||||
|
||||
// First, get a baseline without applying ignore rules.
|
||||
let lines = paths_from_stdout(wd.stdout(&mut cmd));
|
||||
assert_eq!(lines, paths(&[
|
||||
"bar/test", "baz/bar/test", "baz/baz/bar/test", "baz/foo",
|
||||
"baz/test", "foo", "test",
|
||||
]));
|
||||
|
||||
// Now try again with the ignore file activated.
|
||||
cmd.arg("--ignore-file").arg(".not-an-ignore");
|
||||
let lines = paths_from_stdout(wd.stdout(&mut cmd));
|
||||
assert_eq!(lines, paths(&[
|
||||
"baz/bar/test", "baz/baz/bar/test", "baz/test", "test",
|
||||
]));
|
||||
|
||||
// Now do it again, but inside the baz directory.
|
||||
// Since the ignore file is interpreted relative to the CWD, this will
|
||||
// cause the /bar anchored pattern to filter out baz/bar, which is a
|
||||
// subtle difference between true parent ignore files and manually
|
||||
// specified ignore files.
|
||||
let mut cmd = wd.command();
|
||||
cmd.arg("test").arg(".").arg("--ignore-file").arg("../.not-an-ignore");
|
||||
cmd.current_dir(wd.path().join("baz"));
|
||||
let lines = paths_from_stdout(wd.stdout(&mut cmd));
|
||||
assert_eq!(lines, paths(&["baz/bar/test", "test"]));
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/45
|
||||
sherlock!(feature_45_precedence_with_others, "test", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".not-an-ignore", "*.log");
|
||||
wd.create(".ignore", "!imp.log");
|
||||
wd.create("imp.log", "test");
|
||||
wd.create("wat.log", "test");
|
||||
|
||||
cmd.arg("--ignore-file").arg(".not-an-ignore");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "imp.log:test\n");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/45
|
||||
sherlock!(feature_45_precedence_internal, "test", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".not-an-ignore1", "*.log");
|
||||
wd.create(".not-an-ignore2", "!imp.log");
|
||||
wd.create("imp.log", "test");
|
||||
wd.create("wat.log", "test");
|
||||
|
||||
cmd.arg("--ignore-file").arg(".not-an-ignore1");
|
||||
cmd.arg("--ignore-file").arg(".not-an-ignore2");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "imp.log:test\n");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/68
|
||||
clean!(feature_68, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
clean!(feature_68_no_ignore_vcs, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".gitignore", "foo");
|
||||
wd.create(".ignore", "bar");
|
||||
wd.create("foo", "test");
|
||||
@@ -741,7 +987,8 @@ clean!(feature_68, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/70
|
||||
sherlock!(feature_70, "sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
sherlock!(feature_70_smart_case, "sherlock", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("--smart-case");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
@@ -752,6 +999,93 @@ sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/89
|
||||
sherlock!(feature_89_files_with_matches, "Sherlock", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("--null").arg("--files-with-matches");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "sherlock\x00");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/89
|
||||
sherlock!(feature_89_count, "Sherlock", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("--null").arg("--count");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "sherlock\x002\n");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/89
|
||||
sherlock!(feature_89_files, "NADA", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("--null").arg("--files");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "sherlock\x00");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/89
|
||||
sherlock!(feature_89_match, "Sherlock", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("--null").arg("-C1");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock\x00For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock\x00Holmeses, success in the province of detective work must always
|
||||
sherlock\x00be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
sherlock\x00can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/109
|
||||
clean!(feature_109_max_depth, "far", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create_dir("one");
|
||||
wd.create("one/pass", "far");
|
||||
wd.create_dir("one/too");
|
||||
wd.create("one/too/many", "far");
|
||||
|
||||
cmd.arg("--maxdepth").arg("2");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = path("one/pass:far\n");
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/124
|
||||
clean!(feature_109_case_sensitive_part1, "test", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("foo", "tEsT");
|
||||
cmd.arg("--smart-case").arg("--case-sensitive");
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/124
|
||||
clean!(feature_109_case_sensitive_part2, "test", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("foo", "tEsT");
|
||||
cmd.arg("--ignore-case").arg("--case-sensitive");
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/159
|
||||
clean!(feature_159_works, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("foo", "test\ntest");
|
||||
cmd.arg("-m1");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "foo:test\n");
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/159
|
||||
clean!(feature_159_zero_max, "test", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("foo", "test\ntest");
|
||||
cmd.arg("-m0");
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
#[test]
|
||||
fn binary_nosearch() {
|
||||
let wd = WorkDir::new("binary_nosearch");
|
||||
|
@@ -43,9 +43,14 @@ impl WorkDir {
|
||||
|
||||
/// Create a new file with the given name and contents in this directory.
|
||||
pub fn create<P: AsRef<Path>>(&self, name: P, contents: &str) {
|
||||
self.create_bytes(name, contents.as_bytes());
|
||||
}
|
||||
|
||||
/// Create a new file with the given name and contents in this directory.
|
||||
pub fn create_bytes<P: AsRef<Path>>(&self, name: P, contents: &[u8]) {
|
||||
let path = self.dir.join(name);
|
||||
let mut file = nice_err(&path, File::create(&path));
|
||||
nice_err(&path, file.write_all(contents.as_bytes()));
|
||||
nice_err(&path, file.write_all(contents));
|
||||
nice_err(&path, file.flush());
|
||||
}
|
||||
|
||||
@@ -83,7 +88,7 @@ impl WorkDir {
|
||||
/// Creates a directory symlink to the src with the given target name
|
||||
/// in this directory.
|
||||
#[cfg(not(windows))]
|
||||
pub fn link<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
|
||||
pub fn link_dir<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
|
||||
use std::os::unix::fs::symlink;
|
||||
let src = self.dir.join(src);
|
||||
let target = self.dir.join(target);
|
||||
@@ -91,8 +96,10 @@ impl WorkDir {
|
||||
nice_err(&target, symlink(&src, &target));
|
||||
}
|
||||
|
||||
/// Creates a directory symlink to the src with the given target name
|
||||
/// in this directory.
|
||||
#[cfg(windows)]
|
||||
pub fn link<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
|
||||
pub fn link_dir<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
|
||||
use std::os::windows::fs::symlink_dir;
|
||||
let src = self.dir.join(src);
|
||||
let target = self.dir.join(target);
|
||||
@@ -100,6 +107,32 @@ impl WorkDir {
|
||||
nice_err(&target, symlink_dir(&src, &target));
|
||||
}
|
||||
|
||||
/// Creates a file symlink to the src with the given target name
|
||||
/// in this directory.
|
||||
#[cfg(not(windows))]
|
||||
pub fn link_file<S: AsRef<Path>, T: AsRef<Path>>(
|
||||
&self,
|
||||
src: S,
|
||||
target: T,
|
||||
) {
|
||||
self.link_dir(src, target);
|
||||
}
|
||||
|
||||
/// Creates a file symlink to the src with the given target name
|
||||
/// in this directory.
|
||||
#[cfg(windows)]
|
||||
pub fn link_file<S: AsRef<Path>, T: AsRef<Path>>(
|
||||
&self,
|
||||
src: S,
|
||||
target: T,
|
||||
) {
|
||||
use std::os::windows::fs::symlink_file;
|
||||
let src = self.dir.join(src);
|
||||
let target = self.dir.join(target);
|
||||
let _ = fs::remove_file(&target);
|
||||
nice_err(&target, symlink_file(&src, &target));
|
||||
}
|
||||
|
||||
/// Runs and captures the stdout of the given command.
|
||||
///
|
||||
/// If the return type could not be created from a string, then this
|
||||
|
Reference in New Issue
Block a user