Compare commits

..

1 Commits

Author SHA1 Message Date
Andrew Gallant
a3c4326139 0.1.18 2016-09-25 22:30:24 -04:00
31 changed files with 1445 additions and 3110 deletions

1
.gitignore vendored
View File

@@ -2,4 +2,3 @@
tags
target
/grep/Cargo.lock
/globset/Cargo.lock

View File

@@ -15,6 +15,9 @@ matrix:
- os: linux
rust: nightly
env: TARGET=x86_64-unknown-linux-musl
- os: osx
rust: nightly
env: TARGET=i686-apple-darwin
- os: osx
rust: nightly
env: TARGET=x86_64-apple-darwin

View File

@@ -1,26 +1,3 @@
0.2.1
=====
Feature enhancements:
* Added or improved file type filtering for Clojure and SystemVerilog.
* [FEATURE #89](https://github.com/BurntSushi/ripgrep/issues/89):
Add a --null flag that outputs a NUL byte after every file path.
Bug fixes:
* [BUG #98](https://github.com/BurntSushi/ripgrep/issues/98):
Fix a bug in single threaded mode when if opening a file failed, ripgrep
quit instead of continuing the search.
* [BUG #99](https://github.com/BurntSushi/ripgrep/issues/99):
Fix another bug in single threaded mode where empty lines were being printed
by mistake.
* [BUG #105](https://github.com/BurntSushi/ripgrep/issues/105):
Fix an off-by-one error with --column.
* [BUG #106](https://github.com/BurntSushi/ripgrep/issues/106):
Fix a bug where a whitespace only line in a gitignore file caused ripgrep
to panic (i.e., crash).
0.2.0
=====
Feature enhancements:

25
Cargo.lock generated
View File

@@ -1,11 +1,12 @@
[root]
name = "ripgrep"
version = "0.2.1"
version = "0.1.18"
dependencies = [
"deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)",
"docopt 0.6.85 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
"globset 0.1.0",
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
"glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"grep 0.1.3",
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -39,7 +40,7 @@ dependencies = [
[[package]]
name = "docopt"
version = "0.6.86"
version = "0.6.85"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -73,16 +74,9 @@ dependencies = [
]
[[package]]
name = "globset"
version = "0.1.0"
dependencies = [
"aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
"fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 0.1.11 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 0.1.77 (registry+https://github.com/rust-lang/crates.io-index)",
]
name = "glob"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "grep"
@@ -240,10 +234,11 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[metadata]
"checksum aho-corasick 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ca972c2ea5f742bfce5687b9aef75506a764f61d37f8f649047846a9686ddb66"
"checksum deque 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1614659040e711785ed8ea24219140654da1729f3ec8a47a9719d041112fe7bf"
"checksum docopt 0.6.86 (registry+https://github.com/rust-lang/crates.io-index)" = "4a7ef30445607f6fc8720f0a0a2c7442284b629cf0d049286860fae23e71c4d9"
"checksum docopt 0.6.85 (registry+https://github.com/rust-lang/crates.io-index)" = "1b88d783674021c5570e7238e17985b9b8c7141d90f33de49031b8d56e7f0bf9"
"checksum env_logger 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "15abd780e45b3ea4f76b4e9a26ff4843258dd8a3eed2775a0e7368c2e7936c2f"
"checksum fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "6cc484842f1e2884faf56f529f960cc12ad8c71ce96cc7abba0a067c98fee344"
"checksum fs2 0.2.5 (registry+https://github.com/rust-lang/crates.io-index)" = "bcd414e5a1a979b931bb92f41b7a54106d3f6d2e6c253e9ce943b7cd468251ef"
"checksum glob 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "8be18de09a56b60ed0edf84bc9df007e30040691af7acd1c41874faac5895bfb"
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
"checksum lazy_static 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "49247ec2a285bb3dcb23cbd9c35193c025e7251bfce77c1d5da97e6362dffe7f"
"checksum libc 0.2.16 (registry+https://github.com/rust-lang/crates.io-index)" = "408014cace30ee0f767b1c4517980646a573ec61a57957aeeabcac8ac0a02e8d"

View File

@@ -1,6 +1,6 @@
[package]
name = "ripgrep"
version = "0.2.1" #:version
version = "0.1.18" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Line oriented search tool using Rust's regex library. Combines the raw
@@ -26,7 +26,7 @@ path = "tests/tests.rs"
deque = "0.3"
docopt = "0.6"
env_logger = "0.3"
globset = { version = "0.1.0", path = "globset" }
fnv = "1.0"
grep = { version = "0.1.3", path = "grep" }
lazy_static = "0.2"
libc = "0.2"
@@ -46,5 +46,8 @@ winapi = "0.2"
[features]
simd-accel = ["regex/simd-accel"]
[dev-dependencies]
glob = "0.2"
[profile.release]
debug = true

View File

@@ -1 +0,0 @@
pkg/brew

View File

@@ -30,7 +30,7 @@ for a very detailed comparison with more benchmarks and analysis.
| ripgrep | `rg -n -w '[A-Z]+_SUSPEND'` | 450 | **0.245s** |
| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 450 | 0.753s |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 0.823s |
| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 2.880s |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 2.880s |
| [sift](https://github.com/svent/sift) | `sift --git -n -w '[A-Z]+_SUSPEND'` | 450 | 3.656s |
| [The Platinum Searcher](https://github.com/monochromegane/the_platinum_searcher) | `pt -w -e '[A-Z]+_SUSPEND'` | 450 | 12.369s |
| [ack](http://beyondgrep.com/) | `ack -w '[A-Z]+_SUSPEND'` | 1878 | 16.952s |
@@ -97,25 +97,18 @@ but you'll need to have the
[Microsoft VC++ 2015 redistributable](https://www.microsoft.com/en-us/download/details.aspx?id=48145)
installed.
If you're a **Mac OS X Homebrew** user, then you can install ripgrep either
from homebrew-core, (compiled with rust stable, no SIMD):
If you're a **Homebrew** user, then you can install it with a custom formula
(N.B. `ripgrep` isn't actually in Homebrew yet. This just installs the binary
directly):
```
$ brew install ripgrep
```
or you can install a binary compiled with rust nightly (including SIMD and all
optimizations) by utilizing a custom tap:
```
$ brew tap burntsushi/ripgrep https://github.com/BurntSushi/ripgrep.git
$ brew install burntsushi/ripgrep/ripgrep-bin
$ brew install https://raw.githubusercontent.com/BurntSushi/ripgrep/master/pkg/brew/ripgrep.rb
```
If you're an **Arch Linux** user, then you can install `ripgrep` from the official repos:
```
$ pacman -S ripgrep
$ pacman -Syu ripgrep
```
If you're a **Rust programmer**, `ripgrep` can be installed with `cargo`:
@@ -221,11 +214,10 @@ $ rg -Tjs foobar
```
To see a list of types supported, run `rg --type-list`. To add a new type, use
`--type-add`, which must be accompanied by a pattern for searching (`rg` won't
persist your type settings):
`--type-add`:
```
$ rg --type-add 'foo:*.{foo,foobar}' -tfoo bar
$ rg --type-add 'foo:*.foo,*.foobar'
```
The type `foo` will now match any file ending with the `.foo` or `.foobar`

View File

@@ -28,8 +28,6 @@ build: false
# TODO modify this phase as you see fit
test_script:
- cargo test --verbose
- cargo test --verbose --manifest-path grep/Cargo.toml
- cargo test --verbose --manifest-path globset/Cargo.toml
before_deploy:
# Generate artifacts for release
@@ -43,7 +41,7 @@ before_deploy:
- appveyor PushArtifact ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip
deploy:
description: 'Automatically deployed release'
description: 'Windows release'
# All the zipped artifacts will be deployed
artifact: /.*\.zip/
auth_token:

5
benches/README.md Normal file
View File

@@ -0,0 +1,5 @@
These are internal microbenchmarks for tracking the peformance of individual
components inside of ripgrep. At the moment, they aren't heavily used.
For performance benchmarks of ripgrep proper, see the sibling `benchsuite`
directory.

View File

@@ -5,50 +5,37 @@ tool itself, see the benchsuite directory.
#![feature(test)]
extern crate glob;
extern crate globset;
#[macro_use]
extern crate lazy_static;
extern crate regex;
extern crate test;
use globset::{Candidate, Glob, GlobMatcher, GlobSet, GlobSetBuilder};
const EXT: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";
const EXT_PAT: &'static str = "*.txt";
const SHORT: &'static str = "some/needle.txt";
const SHORT_PAT: &'static str = "some/**/needle.txt";
const LONG: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";
const LONG_PAT: &'static str = "some/**/needle.txt";
#[allow(dead_code, unused_variables)]
#[path = "../src/glob.rs"]
mod reglob;
fn new_glob(pat: &str) -> glob::Pattern {
glob::Pattern::new(pat).unwrap()
}
fn new_reglob(pat: &str) -> GlobMatcher {
Glob::new(pat).unwrap().compile_matcher()
}
fn new_reglob_many(pats: &[&str]) -> GlobSet {
let mut builder = GlobSetBuilder::new();
for pat in pats {
builder.add(Glob::new(pat).unwrap());
}
fn new_reglob(pat: &str) -> reglob::Set {
let mut builder = reglob::SetBuilder::new();
builder.add(pat).unwrap();
builder.build().unwrap()
}
#[bench]
fn ext_glob(b: &mut test::Bencher) {
let pat = new_glob(EXT_PAT);
b.iter(|| assert!(pat.matches(EXT)));
}
#[bench]
fn ext_regex(b: &mut test::Bencher) {
let set = new_reglob(EXT_PAT);
let cand = Candidate::new(EXT);
b.iter(|| assert!(set.is_match_candidate(&cand)));
fn new_reglob_many(pats: &[&str]) -> reglob::Set {
let mut builder = reglob::SetBuilder::new();
for pat in pats {
builder.add(pat).unwrap();
}
builder.build().unwrap()
}
#[bench]
@@ -60,8 +47,7 @@ fn short_glob(b: &mut test::Bencher) {
#[bench]
fn short_regex(b: &mut test::Bencher) {
let set = new_reglob(SHORT_PAT);
let cand = Candidate::new(SHORT);
b.iter(|| assert!(set.is_match_candidate(&cand)));
b.iter(|| assert!(set.is_match(SHORT)));
}
#[bench]
@@ -73,8 +59,7 @@ fn long_glob(b: &mut test::Bencher) {
#[bench]
fn long_regex(b: &mut test::Bencher) {
let set = new_reglob(LONG_PAT);
let cand = Candidate::new(LONG);
b.iter(|| assert!(set.is_match_candidate(&cand)));
b.iter(|| assert!(set.is_match(LONG)));
}
const MANY_SHORT_GLOBS: &'static [&'static str] = &[
@@ -116,3 +101,26 @@ fn many_short_regex_set(b: &mut test::Bencher) {
let set = new_reglob_many(MANY_SHORT_GLOBS);
b.iter(|| assert_eq!(2, set.matches(MANY_SHORT_SEARCH).iter().count()));
}
// This is the fastest on my system (beating many_glob by about 2x). This
// suggests that a RegexSet needs quite a few regexes (or a larger haystack)
// in order for it to scale.
//
// TODO(burntsushi): come up with a benchmark that uses more complex patterns
// or a longer haystack.
#[bench]
fn many_short_regex_pattern(b: &mut test::Bencher) {
let pats: Vec<_> = MANY_SHORT_GLOBS.iter().map(|&s| {
let pat = reglob::Pattern::new(s).unwrap();
regex::Regex::new(&pat.to_regex()).unwrap()
}).collect();
b.iter(|| {
let mut count = 0;
for pat in &pats {
if pat.is_match(MANY_SHORT_SEARCH) {
count += 1;
}
}
assert_eq!(2, count);
})
}

View File

@@ -19,10 +19,6 @@ run_test_suite() {
cargo clean --target $TARGET --verbose
cargo build --target $TARGET --verbose
cargo test --target $TARGET --verbose
cargo build --target $TARGET --verbose --manifest-path grep/Cargo.toml
cargo test --target $TARGET --verbose --manifest-path grep/Cargo.toml
cargo build --target $TARGET --verbose --manifest-path globset/Cargo.toml
cargo test --target $TARGET --verbose --manifest-path globset/Cargo.toml
# sanity check the file type
file target/$TARGET/debug/rg

View File

@@ -70,7 +70,6 @@ Show this usage message.
.TP
.B \-i, \-\-ignore\-case
Case insensitive search.
Overridden by \-\-case\-sensitive.
.RS
.RE
.TP
@@ -210,12 +209,6 @@ Follow symlinks.
.RS
.RE
.TP
.B \-\-maxdepth \f[I]NUM\f[]
Descend at most NUM directories below the command line arguments.
A value of zero searches only the starting\-points themselves.
.RS
.RE
.TP
.B \-\-mmap
Search using memory maps when possible.
This is enabled by default when ripgrep thinks it will be faster.
@@ -246,29 +239,14 @@ Note that .ignore files will continue to be respected.
.RS
.RE
.TP
.B \-\-null
Whenever a file name is printed, follow it with a NUL byte.
This includes printing filenames before matches, and when printing a
list of matching files such as with \-\-count, \-\-files\-with\-matches
and \-\-files.
.RS
.RE
.TP
.B \-p, \-\-pretty
Alias for \-\-color=always \-\-heading \-n.
.RS
.RE
.TP
.B \-s, \-\-case\-sensitive
Search case sensitively.
This overrides \-\-ignore\-case and \-\-smart\-case.
.RS
.RE
.TP
.B \-S, \-\-smart\-case
Search case insensitively if the pattern is all lowercase.
Search case sensitively otherwise.
This is overridden by either \-\-case\-sensitive or \-\-ignore\-case.
.RS
.RE
.TP

View File

@@ -49,7 +49,7 @@ the raw speed of grep.
: Show this usage message.
-i, --ignore-case
: Case insensitive search. Overridden by --case-sensitive.
: Case insensitive search.
-n, --line-number
: Show line numbers (1-based). This is enabled by default at a tty.
@@ -136,10 +136,6 @@ the raw speed of grep.
-L, --follow
: Follow symlinks.
--maxdepth *NUM*
: Descend at most NUM directories below the command line arguments.
A value of zero searches only the starting-points themselves.
--mmap
: Search using memory maps when possible. This is enabled by default
when ripgrep thinks it will be faster. (Note that mmap searching
@@ -159,22 +155,12 @@ the raw speed of grep.
: Don't respect version control ignore files (e.g., .gitignore).
Note that .ignore files will continue to be respected.
--null
: Whenever a file name is printed, follow it with a NUL byte.
This includes printing filenames before matches, and when printing
a list of matching files such as with --count, --files-with-matches
and --files.
-p, --pretty
: Alias for --color=always --heading -n.
-s, --case-sensitive
: Search case sensitively. This overrides --ignore-case and --smart-case.
-S, --smart-case
: Search case insensitively if the pattern is all lowercase.
Search case sensitively otherwise. This is overridden by either
--case-sensitive or --ignore-case.
Search case sensitively otherwise.
-j, --threads *ARG*
: The number of threads to use. Defaults to the number of logical CPUs

View File

@@ -1,30 +0,0 @@
[package]
name = "globset"
version = "0.1.0" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Cross platform single glob and glob set matching. Glob set matching is the
process of matching one or more glob patterns against a single candidate path
simultaneously, and returning all of the globs that matched.
"""
documentation = "https://docs.rs/globset"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/globset"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/globset"
readme = "README.md"
keywords = ["regex", "glob", "multiple", "set", "pattern"]
license = "Unlicense/MIT"
[lib]
name = "globset"
bench = false
[dependencies]
aho-corasick = "0.5.3"
fnv = "1.0"
lazy_static = "0.2"
log = "0.3"
memchr = "0.1"
regex = "0.1.77"
[dev-dependencies]
glob = "0.2"

View File

@@ -1,122 +0,0 @@
globset
=======
Cross platform single glob and glob set matching. Glob set matching is the
process of matching one or more glob patterns against a single candidate path
simultaneously, and returning all of the globs that matched.
[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.png)](https://travis-ci.org/BurntSushi/ripgrep)
[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep)
[![](https://img.shields.io/crates/v/globset.svg)](https://crates.io/crates/globset)
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
### Documentation
[https://docs.rs/globset](https://docs.rs/globset)
### Usage
Add this to your `Cargo.toml`:
```toml
[dependencies]
globset = "0.1"
```
and this to your crate root:
```rust
extern crate globset;
```
### Example: one glob
This example shows how to match a single glob against a single file path.
```rust
use globset::Glob;
let glob = try!(Glob::new("*.rs")).compile_matcher();
assert!(glob.is_match("foo.rs"));
assert!(glob.is_match("foo/bar.rs"));
assert!(!glob.is_match("Cargo.toml"));
```
### Example: configuring a glob matcher
This example shows how to use a `GlobBuilder` to configure aspects of match
semantics. In this example, we prevent wildcards from matching path separators.
```rust
use globset::GlobBuilder;
let glob = try!(GlobBuilder::new("*.rs")
.literal_separator(true).build()).compile_matcher();
assert!(glob.is_match("foo.rs"));
assert!(!glob.is_match("foo/bar.rs")); // no longer matches
assert!(!glob.is_match("Cargo.toml"));
```
### Example: match multiple globs at once
This example shows how to match multiple glob patterns at once.
```rust
use globset::{Glob, GlobSetBuilder};
let mut builder = GlobSetBuilder::new();
// A GlobBuilder can be used to configure each glob's match semantics
// independently.
builder.add(try!(Glob::new("*.rs")));
builder.add(try!(Glob::new("src/lib.rs")));
builder.add(try!(Glob::new("src/**/foo.rs")));
let set = try!(builder.build());
assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
```
### Performance
This crate implements globs by converting them to regular expressions, and
executing them with the
[`regex`](https://github.com/rust-lang-nursery/regex)
crate.
For single glob matching, performance of this crate should be roughly on par
with the performance of the
[`glob`](https://github.com/rust-lang-nursery/glob)
crate. (`*_regex` correspond to benchmarks for this library while `*_glob`
correspond to benchmarks for the `glob` library.)
Optimizations in the `regex` crate may propel this library past `glob`,
particularly when matching longer paths.
```
test ext_glob ... bench: 425 ns/iter (+/- 21)
test ext_regex ... bench: 175 ns/iter (+/- 10)
test long_glob ... bench: 182 ns/iter (+/- 11)
test long_regex ... bench: 173 ns/iter (+/- 10)
test short_glob ... bench: 69 ns/iter (+/- 4)
test short_regex ... bench: 83 ns/iter (+/- 2)
```
The primary performance advantage of this crate is when matching multiple
globs against a single path. With the `glob` crate, one must match each glob
synchronously, one after the other. In this crate, many can be matched
simultaneously. For example:
```
test many_short_glob ... bench: 1,063 ns/iter (+/- 47)
test many_short_regex_set ... bench: 186 ns/iter (+/- 11)
```
### Comparison with the [`glob`](https://github.com/rust-lang-nursery/glob) crate
* Supports alternate "or" globs, e.g., `*.{foo,bar}`.
* Can match non-UTF-8 file paths correctly.
* Supports matching multiple globs at once.
* Doesn't provide a recursive directory iterator of matching file paths,
although I believe this crate should grow one eventually.
* Supports case insensitive and require-literal-separator match options, but
**doesn't** support the require-literal-leading-dot option.

File diff suppressed because it is too large Load Diff

View File

@@ -1,753 +0,0 @@
/*!
The globset crate provides cross platform single glob and glob set matching.
Glob set matching is the process of matching one or more glob patterns against
a single candidate path simultaneously, and returning all of the globs that
matched. For example, given this set of globs:
```ignore
*.rs
src/lib.rs
src/**/foo.rs
```
and a path `src/bar/baz/foo.rs`, then the set would report the first and third
globs as matching.
Single glob matching is also provided and is done by converting globs to
# Example: one glob
This example shows how to match a single glob against a single file path.
```
# fn example() -> Result<(), globset::Error> {
use globset::Glob;
let glob = try!(Glob::new("*.rs")).compile_matcher();
assert!(glob.is_match("foo.rs"));
assert!(glob.is_match("foo/bar.rs"));
assert!(!glob.is_match("Cargo.toml"));
# Ok(()) } example().unwrap();
```
# Example: configuring a glob matcher
This example shows how to use a `GlobBuilder` to configure aspects of match
semantics. In this example, we prevent wildcards from matching path separators.
```
# fn example() -> Result<(), globset::Error> {
use globset::GlobBuilder;
let glob = try!(GlobBuilder::new("*.rs")
.literal_separator(true).build()).compile_matcher();
assert!(glob.is_match("foo.rs"));
assert!(!glob.is_match("foo/bar.rs")); // no longer matches
assert!(!glob.is_match("Cargo.toml"));
# Ok(()) } example().unwrap();
```
# Example: match multiple globs at once
This example shows how to match multiple glob patterns at once.
```
# fn example() -> Result<(), globset::Error> {
use globset::{Glob, GlobSetBuilder};
let mut builder = GlobSetBuilder::new();
// A GlobBuilder can be used to configure each glob's match semantics
// independently.
builder.add(try!(Glob::new("*.rs")));
builder.add(try!(Glob::new("src/lib.rs")));
builder.add(try!(Glob::new("src/**/foo.rs")));
let set = try!(builder.build());
assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
# Ok(()) } example().unwrap();
```
# Syntax
Standard Unix-style glob syntax is supported:
* `?` matches any single character. (If the `literal_separator` option is
enabled, then `?` can never match a path separator.)
* `*` matches zero or more characters. (If the `literal_separator` option is
enabled, then `*` can never match a path separator.)
* `**` recursively matches directories but are only legal in three situations.
First, if the glob starts with <code>\*\*&#x2F;</code>, then it matches
all directories. For example, <code>\*\*&#x2F;foo</code> matches `foo`
and `bar/foo` but not `foo/bar`. Secondly, if the glob ends with
<code>&#x2F;\*\*</code>, then it matches all sub-entries. For example,
<code>foo&#x2F;\*\*</code> matches `foo/a` and `foo/a/b`, but not `foo`.
Thirdly, if the glob contains <code>&#x2F;\*\*&#x2F;</code> anywhere within
the pattern, then it matches zero or more directories. Using `**` anywhere
else is illegal (N.B. the glob `**` is allowed and means "match everything").
* `{a,b}` matches `a` or `b` where `a` and `b` are arbitrary glob patterns.
(N.B. Nesting `{...}` is not currently allowed.)
* `[ab]` matches `a` or `b` where `a` and `b` are characters. Use
`[!ab]` to match any character except for `a` and `b`.
* Metacharacters such as `*` and `?` can be escaped with character class
notation. e.g., `[*]` matches `*`.
A `GlobBuilder` can be used to prevent wildcards from matching path separators,
or to enable case insensitive matching.
*/
#![deny(missing_docs)]
extern crate aho_corasick;
extern crate fnv;
#[macro_use]
extern crate lazy_static;
#[macro_use]
extern crate log;
extern crate memchr;
extern crate regex;
use std::borrow::Cow;
use std::collections::{BTreeMap, HashMap};
use std::error::Error as StdError;
use std::ffi::{OsStr, OsString};
use std::fmt;
use std::hash;
use std::path::Path;
use std::str;
use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
use regex::bytes::{Regex, RegexBuilder, RegexSet};
use pathutil::{
file_name, file_name_ext, normalize_path, os_str_bytes, path_bytes,
};
use glob::MatchStrategy;
pub use glob::{Glob, GlobBuilder, GlobMatcher};
mod glob;
mod pathutil;
macro_rules! eprintln {
($($tt:tt)*) => {{
use std::io::Write;
let _ = writeln!(&mut ::std::io::stderr(), $($tt)*);
}}
}
/// Represents an error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Error {
/// Occurs when a use of `**` is invalid. Namely, `**` can only appear
/// adjacent to a path separator, or the beginning/end of a glob.
InvalidRecursive,
/// Occurs when a character class (e.g., `[abc]`) is not closed.
UnclosedClass,
/// Occurs when a range in a character (e.g., `[a-z]`) is invalid. For
/// example, if the range starts with a lexicographically larger character
/// than it ends with.
InvalidRange(char, char),
/// Occurs when a `}` is found without a matching `{`.
UnopenedAlternates,
/// Occurs when a `{` is found without a matching `}`.
UnclosedAlternates,
/// Occurs when an alternating group is nested inside another alternating
/// group, e.g., `{{a,b},{c,d}}`.
NestedAlternates,
/// An error associated with parsing or compiling a regex.
Regex(String),
}
impl StdError for Error {
fn description(&self) -> &str {
match *self {
Error::InvalidRecursive => {
"invalid use of **; must be one path component"
}
Error::UnclosedClass => {
"unclosed character class; missing ']'"
}
Error::InvalidRange(_, _) => {
"invalid character range"
}
Error::UnopenedAlternates => {
"unopened alternate group; missing '{' \
(maybe escape '}' with '[}]'?)"
}
Error::UnclosedAlternates => {
"unclosed alternate group; missing '}' \
(maybe escape '{' with '[{]'?)"
}
Error::NestedAlternates => {
"nested alternate groups are not allowed"
}
Error::Regex(ref err) => err,
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::InvalidRecursive
| Error::UnclosedClass
| Error::UnopenedAlternates
| Error::UnclosedAlternates
| Error::NestedAlternates
| Error::Regex(_) => {
write!(f, "{}", self.description())
}
Error::InvalidRange(s, e) => {
write!(f, "invalid range; '{}' > '{}'", s, e)
}
}
}
}
fn new_regex(pat: &str) -> Result<Regex, Error> {
RegexBuilder::new(pat)
.dot_matches_new_line(true)
.size_limit(10 * (1 << 20))
.dfa_size_limit(10 * (1 << 20))
.compile()
.map_err(|err| Error::Regex(err.to_string()))
}
fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
where S: AsRef<str>, I: IntoIterator<Item=S> {
RegexSet::new(pats).map_err(|err| Error::Regex(err.to_string()))
}
type Fnv = hash::BuildHasherDefault<fnv::FnvHasher>;
/// GlobSet represents a group of globs that can be matched together in a
/// single pass.
#[derive(Clone, Debug)]
pub struct GlobSet {
strats: Vec<GlobSetMatchStrategy>,
}
impl GlobSet {
/// Returns true if any glob in this set matches the path given.
pub fn is_match<P: AsRef<Path>>(&self, path: P) -> bool {
self.is_match_candidate(&Candidate::new(path.as_ref()))
}
/// Returns true if any glob in this set matches the path given.
///
/// This takes a Candidate as input, which can be used to amortize the
/// cost of preparing a path for matching.
pub fn is_match_candidate(&self, path: &Candidate) -> bool {
for strat in &self.strats {
if strat.is_match(path) {
return true;
}
}
false
}
/// Returns the sequence number of every glob pattern that matches the
/// given path.
///
/// This takes a Candidate as input, which can be used to amortize the
/// cost of preparing a path for matching.
pub fn matches<P: AsRef<Path>>(&self, path: P) -> Vec<usize> {
self.matches_candidate(&Candidate::new(path.as_ref()))
}
/// Returns the sequence number of every glob pattern that matches the
/// given path.
///
/// This takes a Candidate as input, which can be used to amortize the
/// cost of preparing a path for matching.
pub fn matches_candidate(&self, path: &Candidate) -> Vec<usize> {
let mut into = vec![];
self.matches_candidate_into(path, &mut into);
into
}
/// Adds the sequence number of every glob pattern that matches the given
/// path to the vec given.
///
/// `into` is is cleared before matching begins, and contains the set of
/// sequence numbers (in ascending order) after matching ends. If no globs
/// were matched, then `into` will be empty.
pub fn matches_candidate_into(
&self,
path: &Candidate,
into: &mut Vec<usize>,
) {
into.clear();
for strat in &self.strats {
strat.matches_into(path, into);
}
into.sort();
into.dedup();
}
fn new(pats: &[Glob]) -> Result<GlobSet, Error> {
let mut lits = LiteralStrategy::new();
let mut base_lits = BasenameLiteralStrategy::new();
let mut exts = ExtensionStrategy::new();
let mut prefixes = MultiStrategyBuilder::new();
let mut suffixes = MultiStrategyBuilder::new();
let mut required_exts = RequiredExtensionStrategyBuilder::new();
let mut regexes = MultiStrategyBuilder::new();
for (i, p) in pats.iter().enumerate() {
match MatchStrategy::new(p) {
MatchStrategy::Literal(lit) => {
lits.add(i, lit);
}
MatchStrategy::BasenameLiteral(lit) => {
base_lits.add(i, lit);
}
MatchStrategy::Extension(ext) => {
exts.add(i, ext);
}
MatchStrategy::Prefix(prefix) => {
prefixes.add(i, prefix);
}
MatchStrategy::Suffix { suffix, component } => {
if component {
lits.add(i, suffix[1..].to_string());
}
suffixes.add(i, suffix);
}
MatchStrategy::RequiredExtension(ext) => {
required_exts.add(i, ext, p.regex().to_owned());
}
MatchStrategy::Regex => {
debug!("glob converted to regex: {:?}", p);
regexes.add(i, p.regex().to_owned());
}
}
}
debug!("built glob set; {} literals, {} basenames, {} extensions, \
{} prefixes, {} suffixes, {} required extensions, {} regexes",
lits.0.len(), base_lits.0.len(), exts.0.len(),
prefixes.literals.len(), suffixes.literals.len(),
required_exts.0.len(), regexes.literals.len());
Ok(GlobSet {
strats: vec![
GlobSetMatchStrategy::Extension(exts),
GlobSetMatchStrategy::BasenameLiteral(base_lits),
GlobSetMatchStrategy::Literal(lits),
GlobSetMatchStrategy::Suffix(suffixes.suffix()),
GlobSetMatchStrategy::Prefix(prefixes.prefix()),
GlobSetMatchStrategy::RequiredExtension(
try!(required_exts.build())),
GlobSetMatchStrategy::Regex(try!(regexes.regex_set())),
],
})
}
}
/// GlobSetBuilder builds a group of patterns that can be used to
/// simultaneously match a file path.
pub struct GlobSetBuilder {
pats: Vec<Glob>,
}
impl GlobSetBuilder {
/// Create a new GlobSetBuilder. A GlobSetBuilder can be used to add new
/// patterns. Once all patterns have been added, `build` should be called
/// to produce a `GlobSet`, which can then be used for matching.
pub fn new() -> GlobSetBuilder {
GlobSetBuilder { pats: vec![] }
}
/// Builds a new matcher from all of the glob patterns added so far.
///
/// Once a matcher is built, no new patterns can be added to it.
pub fn build(&self) -> Result<GlobSet, Error> {
GlobSet::new(&self.pats)
}
/// Add a new pattern to this set.
#[allow(dead_code)]
pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
self.pats.push(pat);
self
}
}
/// A candidate path for matching.
///
/// All glob matching in this crate operates on `Candidate` values.
/// Constructing candidates has a very small cost associated with it, so
/// callers may find it beneficial to amortize that cost when matching a single
/// path against multiple globs or sets of globs.
#[derive(Clone, Debug)]
pub struct Candidate<'a> {
path: Cow<'a, [u8]>,
basename: Cow<'a, [u8]>,
ext: &'a OsStr,
}
impl<'a> Candidate<'a> {
/// Create a new candidate for matching from the given path.
pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
let path = path.as_ref();
let basename = file_name(path).unwrap_or(OsStr::new(""));
Candidate {
path: normalize_path(path_bytes(path)),
basename: os_str_bytes(basename),
ext: file_name_ext(basename).unwrap_or(OsStr::new("")),
}
}
fn path_prefix(&self, max: usize) -> &[u8] {
if self.path.len() <= max {
&*self.path
} else {
&self.path[..max]
}
}
fn path_suffix(&self, max: usize) -> &[u8] {
if self.path.len() <= max {
&*self.path
} else {
&self.path[self.path.len() - max..]
}
}
}
#[derive(Clone, Debug)]
enum GlobSetMatchStrategy {
Literal(LiteralStrategy),
BasenameLiteral(BasenameLiteralStrategy),
Extension(ExtensionStrategy),
Prefix(PrefixStrategy),
Suffix(SuffixStrategy),
RequiredExtension(RequiredExtensionStrategy),
Regex(RegexSetStrategy),
}
impl GlobSetMatchStrategy {
fn is_match(&self, candidate: &Candidate) -> bool {
use self::GlobSetMatchStrategy::*;
match *self {
Literal(ref s) => s.is_match(candidate),
BasenameLiteral(ref s) => s.is_match(candidate),
Extension(ref s) => s.is_match(candidate),
Prefix(ref s) => s.is_match(candidate),
Suffix(ref s) => s.is_match(candidate),
RequiredExtension(ref s) => s.is_match(candidate),
Regex(ref s) => s.is_match(candidate),
}
}
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
use self::GlobSetMatchStrategy::*;
match *self {
Literal(ref s) => s.matches_into(candidate, matches),
BasenameLiteral(ref s) => s.matches_into(candidate, matches),
Extension(ref s) => s.matches_into(candidate, matches),
Prefix(ref s) => s.matches_into(candidate, matches),
Suffix(ref s) => s.matches_into(candidate, matches),
RequiredExtension(ref s) => s.matches_into(candidate, matches),
Regex(ref s) => s.matches_into(candidate, matches),
}
}
}
#[derive(Clone, Debug)]
struct LiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
impl LiteralStrategy {
fn new() -> LiteralStrategy {
LiteralStrategy(BTreeMap::new())
}
fn add(&mut self, global_index: usize, lit: String) {
self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
}
fn is_match(&self, candidate: &Candidate) -> bool {
self.0.contains_key(&*candidate.path)
}
#[inline(never)]
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
if let Some(hits) = self.0.get(&*candidate.path) {
matches.extend(hits);
}
}
}
#[derive(Clone, Debug)]
struct BasenameLiteralStrategy(BTreeMap<Vec<u8>, Vec<usize>>);
impl BasenameLiteralStrategy {
fn new() -> BasenameLiteralStrategy {
BasenameLiteralStrategy(BTreeMap::new())
}
fn add(&mut self, global_index: usize, lit: String) {
self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
}
fn is_match(&self, candidate: &Candidate) -> bool {
if candidate.basename.is_empty() {
return false;
}
self.0.contains_key(&*candidate.basename)
}
#[inline(never)]
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
if candidate.basename.is_empty() {
return;
}
if let Some(hits) = self.0.get(&*candidate.basename) {
matches.extend(hits);
}
}
}
#[derive(Clone, Debug)]
struct ExtensionStrategy(HashMap<OsString, Vec<usize>, Fnv>);
impl ExtensionStrategy {
fn new() -> ExtensionStrategy {
ExtensionStrategy(HashMap::with_hasher(Fnv::default()))
}
fn add(&mut self, global_index: usize, ext: OsString) {
self.0.entry(ext).or_insert(vec![]).push(global_index);
}
fn is_match(&self, candidate: &Candidate) -> bool {
if candidate.ext.is_empty() {
return false;
}
self.0.contains_key(candidate.ext)
}
#[inline(never)]
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
if candidate.ext.is_empty() {
return;
}
if let Some(hits) = self.0.get(candidate.ext) {
matches.extend(hits);
}
}
}
#[derive(Clone, Debug)]
struct PrefixStrategy {
matcher: FullAcAutomaton<Vec<u8>>,
map: Vec<usize>,
longest: usize,
}
impl PrefixStrategy {
fn is_match(&self, candidate: &Candidate) -> bool {
let path = candidate.path_prefix(self.longest);
for m in self.matcher.find_overlapping(path) {
if m.start == 0 {
return true;
}
}
false
}
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
let path = candidate.path_prefix(self.longest);
for m in self.matcher.find_overlapping(path) {
if m.start == 0 {
matches.push(self.map[m.pati]);
}
}
}
}
#[derive(Clone, Debug)]
struct SuffixStrategy {
matcher: FullAcAutomaton<Vec<u8>>,
map: Vec<usize>,
longest: usize,
}
impl SuffixStrategy {
fn is_match(&self, candidate: &Candidate) -> bool {
let path = candidate.path_suffix(self.longest);
for m in self.matcher.find_overlapping(path) {
if m.end == path.len() {
return true;
}
}
false
}
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
let path = candidate.path_suffix(self.longest);
for m in self.matcher.find_overlapping(path) {
if m.end == path.len() {
matches.push(self.map[m.pati]);
}
}
}
}
#[derive(Clone, Debug)]
struct RequiredExtensionStrategy(HashMap<OsString, Vec<(usize, Regex)>, Fnv>);
impl RequiredExtensionStrategy {
fn is_match(&self, candidate: &Candidate) -> bool {
if candidate.ext.is_empty() {
return false;
}
match self.0.get(candidate.ext) {
None => false,
Some(regexes) => {
for &(_, ref re) in regexes {
if re.is_match(&*candidate.path) {
return true;
}
}
false
}
}
}
#[inline(never)]
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
if candidate.ext.is_empty() {
return;
}
if let Some(regexes) = self.0.get(candidate.ext) {
for &(global_index, ref re) in regexes {
if re.is_match(&*candidate.path) {
matches.push(global_index);
}
}
}
}
}
#[derive(Clone, Debug)]
struct RegexSetStrategy {
matcher: RegexSet,
map: Vec<usize>,
}
impl RegexSetStrategy {
fn is_match(&self, candidate: &Candidate) -> bool {
self.matcher.is_match(&*candidate.path)
}
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
for i in self.matcher.matches(&*candidate.path) {
matches.push(self.map[i]);
}
}
}
#[derive(Clone, Debug)]
struct MultiStrategyBuilder {
literals: Vec<String>,
map: Vec<usize>,
longest: usize,
}
impl MultiStrategyBuilder {
fn new() -> MultiStrategyBuilder {
MultiStrategyBuilder {
literals: vec![],
map: vec![],
longest: 0,
}
}
fn add(&mut self, global_index: usize, literal: String) {
if literal.len() > self.longest {
self.longest = literal.len();
}
self.map.push(global_index);
self.literals.push(literal);
}
fn prefix(self) -> PrefixStrategy {
let it = self.literals.into_iter().map(|s| s.into_bytes());
PrefixStrategy {
matcher: AcAutomaton::new(it).into_full(),
map: self.map,
longest: self.longest,
}
}
fn suffix(self) -> SuffixStrategy {
let it = self.literals.into_iter().map(|s| s.into_bytes());
SuffixStrategy {
matcher: AcAutomaton::new(it).into_full(),
map: self.map,
longest: self.longest,
}
}
fn regex_set(self) -> Result<RegexSetStrategy, Error> {
Ok(RegexSetStrategy {
matcher: try!(new_regex_set(self.literals)),
map: self.map,
})
}
}
#[derive(Clone, Debug)]
struct RequiredExtensionStrategyBuilder(
HashMap<OsString, Vec<(usize, String)>>,
);
impl RequiredExtensionStrategyBuilder {
fn new() -> RequiredExtensionStrategyBuilder {
RequiredExtensionStrategyBuilder(HashMap::new())
}
fn add(&mut self, global_index: usize, ext: OsString, regex: String) {
self.0.entry(ext).or_insert(vec![]).push((global_index, regex));
}
fn build(self) -> Result<RequiredExtensionStrategy, Error> {
let mut exts = HashMap::with_hasher(Fnv::default());
for (ext, regexes) in self.0.into_iter() {
exts.insert(ext.clone(), vec![]);
for (global_index, regex) in regexes {
let compiled = try!(new_regex(&regex));
exts.get_mut(&ext).unwrap().push((global_index, compiled));
}
}
Ok(RequiredExtensionStrategy(exts))
}
}
#[cfg(test)]
mod tests {
use super::GlobSetBuilder;
use glob::Glob;
#[test]
fn set_works() {
let mut builder = GlobSetBuilder::new();
builder.add(Glob::new("src/**/*.rs").unwrap());
builder.add(Glob::new("*.c").unwrap());
builder.add(Glob::new("src/lib.rs").unwrap());
let set = builder.build().unwrap();
assert!(set.is_match("foo.c"));
assert!(set.is_match("src/foo.c"));
assert!(!set.is_match("foo.rs"));
assert!(!set.is_match("tests/foo.rs"));
assert!(set.is_match("src/foo.rs"));
assert!(set.is_match("src/grep/src/main.rs"));
let matches = set.matches("src/lib.rs");
assert_eq!(2, matches.len());
assert_eq!(0, matches[0]);
assert_eq!(2, matches[1]);
}
}

View File

@@ -1,180 +0,0 @@
use std::borrow::Cow;
use std::ffi::OsStr;
use std::path::Path;
/// The final component of the path, if it is a normal file.
///
/// If the path terminates in ., .., or consists solely of a root of prefix,
/// file_name will return None.
#[cfg(unix)]
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
path: &'a P,
) -> Option<&'a OsStr> {
use std::os::unix::ffi::OsStrExt;
use memchr::memrchr;
let path = path.as_ref().as_os_str().as_bytes();
if path.is_empty() {
return None;
} else if path.len() == 1 && path[0] == b'.' {
return None;
} else if path.last() == Some(&b'.') {
return None;
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
return None;
}
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
Some(OsStr::from_bytes(&path[last_slash..]))
}
/// The final component of the path, if it is a normal file.
///
/// If the path terminates in ., .., or consists solely of a root of prefix,
/// file_name will return None.
#[cfg(not(unix))]
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
path: &'a P,
) -> Option<&'a OsStr> {
path.as_ref().file_name()
}
/// Return a file extension given a path's file name.
///
/// Note that this does NOT match the semantics of std::path::Path::extension.
/// Namely, the extension includes the `.` and matching is otherwise more
/// liberal. Specifically, the extenion is:
///
/// * None, if the file name given is empty;
/// * None, if there is no embedded `.`;
/// * Otherwise, the portion of the file name starting with the final `.`.
///
/// e.g., A file name of `.rs` has an extension `.rs`.
///
/// N.B. This is done to make certain glob match optimizations easier. Namely,
/// a pattern like `*.rs` is obviously trying to match files with a `rs`
/// extension, but it also matches files like `.rs`, which doesn't have an
/// extension according to std::path::Path::extension.
pub fn file_name_ext(name: &OsStr) -> Option<&OsStr> {
// Yes, these functions are awful, and yes, we are completely violating
// the abstraction barrier of std::ffi. The barrier we're violating is
// that an OsStr's encoding is *ASCII compatible*. While this is obviously
// true on Unix systems, it's also true on Windows because an OsStr uses
// WTF-8 internally: https://simonsapin.github.io/wtf-8/
//
// We should consider doing the same for the other path utility functions.
// Right now, we don't break any barriers, but Windows users are paying
// for it.
//
// Got any better ideas that don't cost anything? Hit me up. ---AG
unsafe fn os_str_as_u8_slice(s: &OsStr) -> &[u8] {
::std::mem::transmute(s)
}
unsafe fn u8_slice_as_os_str(s: &[u8]) -> &OsStr {
::std::mem::transmute(s)
}
if name.is_empty() {
return None;
}
let name = unsafe { os_str_as_u8_slice(name) };
for (i, &b) in name.iter().enumerate().rev() {
if b == b'.' {
return Some(unsafe { u8_slice_as_os_str(&name[i..]) });
}
}
None
}
/// Return raw bytes of a path, transcoded to UTF-8 if necessary.
pub fn path_bytes(path: &Path) -> Cow<[u8]> {
os_str_bytes(path.as_os_str())
}
/// Return the raw bytes of the given OS string, transcoded to UTF-8 if
/// necessary.
#[cfg(unix)]
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
use std::os::unix::ffi::OsStrExt;
Cow::Borrowed(s.as_bytes())
}
/// Return the raw bytes of the given OS string, transcoded to UTF-8 if
/// necessary.
#[cfg(not(unix))]
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
// TODO(burntsushi): On Windows, OS strings are WTF-8, which is a superset
// of UTF-8, so even if we could get at the raw bytes, they wouldn't
// be useful. We *must* convert to UTF-8 before doing path matching.
// Unfortunate, but necessary.
match s.to_string_lossy() {
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
}
}
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
/// that recognize other characters as separators.
#[cfg(unix)]
pub fn normalize_path(path: Cow<[u8]>) -> Cow<[u8]> {
// UNIX only uses /, so we're good.
path
}
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
/// that recognize other characters as separators.
#[cfg(not(unix))]
pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
use std::path::is_separator;
for i in 0..path.len() {
if path[i] == b'/' || !is_separator(path[i] as char) {
continue;
}
path.to_mut()[i] = b'/';
}
path
}
#[cfg(test)]
mod tests {
use std::borrow::Cow;
use std::ffi::OsStr;
use super::{file_name_ext, normalize_path};
macro_rules! ext {
($name:ident, $file_name:expr, $ext:expr) => {
#[test]
fn $name() {
let got = file_name_ext(OsStr::new($file_name));
assert_eq!($ext.map(OsStr::new), got);
}
};
}
ext!(ext1, "foo.rs", Some(".rs"));
ext!(ext2, ".rs", Some(".rs"));
ext!(ext3, "..rs", Some(".rs"));
ext!(ext4, "", None::<&str>);
ext!(ext5, "foo", None::<&str>);
macro_rules! normalize {
($name:ident, $path:expr, $expected:expr) => {
#[test]
fn $name() {
let got = normalize_path(Cow::Owned($path.to_vec()));
assert_eq!($expected.to_vec(), got.into_owned());
}
};
}
normalize!(normal1, b"foo", b"foo");
normalize!(normal2, b"foo/bar", b"foo/bar");
#[cfg(unix)]
normalize!(normal3, b"foo\\bar", b"foo\\bar");
#[cfg(not(unix))]
normalize!(normal3, b"foo\\bar", b"foo/bar");
#[cfg(unix)]
normalize!(normal4, b"foo\\bar/baz", b"foo\\bar/baz");
#[cfg(not(unix))]
normalize!(normal4, b"foo\\bar/baz", b"foo/bar/baz");
}

View File

@@ -1,14 +0,0 @@
class RipgrepBin < Formula
version '0.2.1'
desc "Search tool like grep and The Silver Searcher."
homepage "https://github.com/BurntSushi/ripgrep"
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz"
sha256 "f8b208239b988708da2e58f848a75bf70ad144e201b3ed99cd323cc5a699625f"
conflicts_with "ripgrep"
def install
bin.install "rg"
man1.install "rg.1"
end
end

19
pkg/brew/ripgrep.rb Normal file
View File

@@ -0,0 +1,19 @@
require 'formula'
class Ripgrep < Formula
version '0.1.17'
desc "Search tool like grep and The Silver Searcher."
homepage "https://github.com/BurntSushi/ripgrep"
if Hardware::CPU.is_64_bit?
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-x86_64-apple-darwin.tar.gz"
sha256 "cb7b551a08849cef6ef8f17229224f094299692981976a3c5873c93f68c8fa1a"
else
url "https://github.com/BurntSushi/ripgrep/releases/download/#{version}/ripgrep-#{version}-i686-apple-darwin.tar.gz"
sha256 "0e936874b9f3fd661c5566e7f8fe18343baa5e9371e57d8d71000e9234fc376b"
end
def install
bin.install "rg"
man1.install "rg.1"
end
end

View File

@@ -62,7 +62,6 @@ Common options:
Precede a glob with a '!' to exclude it.
-h, --help Show this usage message.
-i, --ignore-case Case insensitive search.
Overridden by --case-sensitive.
-n, --line-number Show line numbers (1-based). This is enabled
by default at a tty.
-N, --no-line-number Suppress line numbers.
@@ -137,10 +136,6 @@ Less common options:
-L, --follow
Follow symlinks.
--maxdepth NUM
Descend at most NUM directories below the command line arguments.
A value of zero only searches the starting-points themselves.
--mmap
Search using memory maps when possible. This is enabled by default
when ripgrep thinks it will be faster. (Note that mmap searching
@@ -160,22 +155,12 @@ Less common options:
Don't respect version control ignore files (e.g., .gitignore).
Note that .ignore files will continue to be respected.
--null
Whenever a file name is printed, follow it with a NUL byte.
This includes printing filenames before matches, and when printing
a list of matching files such as with --count, --files-with-matches
and --files.
-p, --pretty
Alias for --color=always --heading -n.
-s, --case-sensitive
Search case sensitively. This overrides --ignore-case and --smart-case.
-S, --smart-case
Search case insensitively if the pattern is all lowercase.
Search case sensitively otherwise. This is overridden by
either --case-sensitive or --ignore-case.
Search case sensitively otherwise.
-j, --threads ARG
The number of threads to use. Defaults to the number of logical CPUs
@@ -215,7 +200,6 @@ pub struct RawArgs {
arg_path: Vec<String>,
flag_after_context: usize,
flag_before_context: usize,
flag_case_sensitive: bool,
flag_color: String,
flag_column: bool,
flag_context: usize,
@@ -232,7 +216,6 @@ pub struct RawArgs {
flag_invert_match: bool,
flag_line_number: bool,
flag_fixed_strings: bool,
flag_maxdepth: Option<usize>,
flag_mmap: bool,
flag_no_heading: bool,
flag_no_ignore: bool,
@@ -241,7 +224,6 @@ pub struct RawArgs {
flag_no_line_number: bool,
flag_no_mmap: bool,
flag_no_filename: bool,
flag_null: bool,
flag_pretty: bool,
flag_quiet: bool,
flag_regexp: Vec<String>,
@@ -263,6 +245,7 @@ pub struct RawArgs {
/// Args are transformed/normalized from RawArgs.
#[derive(Debug)]
pub struct Args {
pattern: String,
paths: Vec<PathBuf>,
after_context: usize,
before_context: usize,
@@ -282,16 +265,15 @@ pub struct Args {
invert_match: bool,
line_number: bool,
line_per_match: bool,
maxdepth: Option<usize>,
mmap: bool,
no_ignore: bool,
no_ignore_parent: bool,
no_ignore_vcs: bool,
null: bool,
quiet: bool,
replace: Option<Vec<u8>>,
text: bool,
threads: usize,
type_defs: Vec<FileTypeDef>,
type_list: bool,
types: Types,
with_filename: bool,
@@ -300,6 +282,7 @@ pub struct Args {
impl RawArgs {
/// Convert arguments parsed into a configuration used by ripgrep.
fn to_args(&self) -> Result<Args> {
let pattern = self.pattern();
let paths =
if self.arg_path.is_empty() {
if atty::on_stdin()
@@ -358,15 +341,14 @@ impl RawArgs {
self.flag_threads
};
let color =
if self.flag_color == "always" {
true
} else if self.flag_vimgrep {
if self.flag_vimgrep {
false
} else if self.flag_color == "auto" {
atty::on_stdout() || self.flag_pretty
} else {
false
self.flag_color == "always"
};
let eol = b'\n';
let mut with_filename = self.flag_with_filename;
if !with_filename {
@@ -374,10 +356,22 @@ impl RawArgs {
}
with_filename = with_filename && !self.flag_no_filename;
let mut btypes = TypesBuilder::new();
btypes.add_defaults();
try!(self.add_types(&mut btypes));
let types = try!(btypes.build());
let grep = try!(
GrepBuilder::new(&pattern)
.case_smart(self.flag_smart_case)
.case_insensitive(self.flag_ignore_case)
.line_terminator(eol)
.build()
);
let no_ignore = self.flag_no_ignore || self.flag_unrestricted >= 1;
let hidden = self.flag_hidden || self.flag_unrestricted >= 2;
let text = self.flag_text || self.flag_unrestricted >= 3;
let mut args = Args {
pattern: pattern,
paths: paths,
after_context: after_context,
before_context: before_context,
@@ -386,18 +380,17 @@ impl RawArgs {
context_separator: unescape(&self.flag_context_separator),
count: self.flag_count,
files_with_matches: self.flag_files_with_matches,
eol: self.eol(),
eol: eol,
files: self.flag_files,
follow: self.flag_follow,
glob_overrides: glob_overrides,
grep: try!(self.grep()),
grep: grep,
heading: !self.flag_no_heading && self.flag_heading,
hidden: hidden,
ignore_case: self.flag_ignore_case,
invert_match: self.flag_invert_match,
line_number: !self.flag_no_line_number && self.flag_line_number,
line_per_match: self.flag_vimgrep,
maxdepth: self.flag_maxdepth,
mmap: mmap,
no_ignore: no_ignore,
no_ignore_parent:
@@ -406,13 +399,13 @@ impl RawArgs {
no_ignore_vcs:
// --no-ignore implies --no-ignore-vcs
self.flag_no_ignore_vcs || no_ignore,
null: self.flag_null,
quiet: self.flag_quiet,
replace: self.flag_replace.clone().map(|s| s.into_bytes()),
text: text,
threads: threads,
type_defs: btypes.definitions(),
type_list: self.flag_type_list,
types: try!(self.types()),
types: types,
with_filename: with_filename,
};
// If stdout is a tty, then apply some special default options.
@@ -431,22 +424,20 @@ impl RawArgs {
Ok(args)
}
fn types(&self) -> Result<Types> {
let mut btypes = TypesBuilder::new();
btypes.add_defaults();
fn add_types(&self, types: &mut TypesBuilder) -> Result<()> {
for ty in &self.flag_type_clear {
btypes.clear(ty);
types.clear(ty);
}
for def in &self.flag_type_add {
try!(btypes.add_def(def));
try!(types.add_def(def));
}
for ty in &self.flag_type {
btypes.select(ty);
types.select(ty);
}
for ty in &self.flag_type_not {
btypes.negate(ty);
types.negate(ty);
}
btypes.build().map_err(From::from)
Ok(())
}
fn pattern(&self) -> String {
@@ -476,27 +467,6 @@ impl RawArgs {
s
}
}
fn eol(&self) -> u8 {
// We might want to make this configurable.
b'\n'
}
fn grep(&self) -> Result<Grep> {
let smart =
self.flag_smart_case
&& !self.flag_ignore_case
&& !self.flag_case_sensitive;
let casei =
self.flag_ignore_case
&& !self.flag_case_sensitive;
GrepBuilder::new(&self.pattern())
.case_smart(smart)
.case_insensitive(casei)
.line_terminator(self.eol())
.build()
.map_err(From::from)
}
}
impl Args {
@@ -573,11 +543,6 @@ impl Args {
self.mmap
}
/// Whether ripgrep should be quiet or not.
pub fn quiet(&self) -> bool {
self.quiet
}
/// Create a new printer of individual search results that writes to the
/// writer given.
pub fn printer<W: Terminal + Send>(&self, wtr: W) -> Printer<W> {
@@ -587,7 +552,7 @@ impl Args {
.eol(self.eol)
.heading(self.heading)
.line_per_match(self.line_per_match)
.null(self.null)
.quiet(self.quiet)
.with_filename(self.with_filename);
if let Some(ref rep) = self.replace {
p = p.replace(rep.clone());
@@ -666,7 +631,6 @@ impl Args {
.eol(self.eol)
.line_number(self.line_number)
.invert_match(self.invert_match)
.quiet(self.quiet)
.text(self.text)
}
@@ -686,7 +650,6 @@ impl Args {
.eol(self.eol)
.line_number(self.line_number)
.invert_match(self.invert_match)
.quiet(self.quiet)
.text(self.text)
}
@@ -697,7 +660,7 @@ impl Args {
/// Returns a list of type definitions currently loaded.
pub fn type_defs(&self) -> &[FileTypeDef] {
self.types.definitions()
&self.type_defs
}
/// Returns true if ripgrep should print the type definitions currently
@@ -708,27 +671,17 @@ impl Args {
/// Create a new recursive directory iterator at the path given.
pub fn walker(&self, path: &Path) -> Result<walk::Iter> {
// Always follow symlinks for explicitly specified files.
let mut wd = WalkDir::new(path).follow_links(
self.follow || path.is_file());
if let Some(maxdepth) = self.maxdepth {
wd = wd.max_depth(maxdepth);
}
let wd = WalkDir::new(path).follow_links(self.follow);
let mut ig = Ignore::new();
// Only register ignore rules if this is a directory. If it's a file,
// then it was explicitly given by the end user, so we always search
// it.
if path.is_dir() {
ig.ignore_hidden(!self.hidden);
ig.no_ignore(self.no_ignore);
ig.no_ignore_vcs(self.no_ignore_vcs);
ig.add_types(self.types.clone());
if !self.no_ignore_parent {
try!(ig.push_parents(path));
}
if let Some(ref overrides) = self.glob_overrides {
ig.add_override(overrides.clone());
}
ig.ignore_hidden(!self.hidden);
ig.no_ignore(self.no_ignore);
ig.no_ignore_vcs(self.no_ignore_vcs);
ig.add_types(self.types.clone());
if !self.no_ignore_parent {
try!(ig.push_parents(path));
}
if let Some(ref overrides) = self.glob_overrides {
ig.add_override(overrides.clone());
}
Ok(walk::Iter::new(ig, wd))
}

View File

@@ -28,15 +28,15 @@ use std::fs::File;
use std::io::{self, BufRead};
use std::path::{Path, PathBuf};
use globset::{self, Candidate, GlobBuilder, GlobSet, GlobSetBuilder};
use regex;
use glob;
use pathutil::{is_file_name, strip_prefix};
/// Represents an error that can occur when parsing a gitignore file.
#[derive(Debug)]
pub enum Error {
Glob(globset::Error),
Glob(glob::Error),
Regex(regex::Error),
Io(io::Error),
}
@@ -61,8 +61,8 @@ impl fmt::Display for Error {
}
}
impl From<globset::Error> for Error {
fn from(err: globset::Error) -> Error {
impl From<glob::Error> for Error {
fn from(err: glob::Error) -> Error {
Error::Glob(err)
}
}
@@ -82,7 +82,7 @@ impl From<io::Error> for Error {
/// Gitignore is a matcher for the glob patterns in a single gitignore file.
#[derive(Clone, Debug)]
pub struct Gitignore {
set: GlobSet,
set: glob::Set,
root: PathBuf,
patterns: Vec<Pattern>,
num_ignores: u64,
@@ -140,8 +140,7 @@ impl Gitignore {
};
MATCHES.with(|matches| {
let mut matches = matches.borrow_mut();
let candidate = Candidate::new(path);
self.set.matches_candidate_into(&candidate, &mut *matches);
self.set.matches_into(path, &mut *matches);
for &i in matches.iter().rev() {
let pat = &self.patterns[i];
if !pat.only_dir || is_dir {
@@ -208,7 +207,7 @@ impl<'a> Match<'a> {
/// GitignoreBuilder constructs a matcher for a single set of globs from a
/// .gitignore file.
pub struct GitignoreBuilder {
builder: GlobSetBuilder,
builder: glob::SetBuilder,
root: PathBuf,
patterns: Vec<Pattern>,
}
@@ -238,7 +237,7 @@ impl GitignoreBuilder {
pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
let root = strip_prefix("./", root.as_ref()).unwrap_or(root.as_ref());
GitignoreBuilder {
builder: GlobSetBuilder::new(),
builder: glob::SetBuilder::new(),
root: root.to_path_buf(),
patterns: vec![],
}
@@ -262,19 +261,8 @@ impl GitignoreBuilder {
/// Add each pattern line from the file path given.
pub fn add_path<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
let rdr = io::BufReader::new(try!(File::open(&path)));
debug!("gitignore: {}", path.as_ref().display());
for (i, line) in rdr.lines().enumerate() {
let line = match line {
Ok(line) => line,
Err(err) => {
debug!("error reading line {} in {}: {}",
i, path.as_ref().display(), err);
continue;
}
};
if let Err(err) = self.add(&path, &line) {
debug!("error adding gitignore pattern: '{}': {}", line, err);
}
for line in rdr.lines() {
try!(self.add(&path, &try!(line)));
}
Ok(())
}
@@ -295,15 +283,12 @@ impl GitignoreBuilder {
from: P,
mut line: &str,
) -> Result<(), Error> {
if line.starts_with("#") {
if line.is_empty() || line.starts_with("#") {
return Ok(());
}
if !line.ends_with("\\ ") {
line = line.trim_right();
}
if line.is_empty() {
return Ok(());
}
let mut pat = Pattern {
from: from.as_ref().to_path_buf(),
original: line.to_string(),
@@ -311,7 +296,7 @@ impl GitignoreBuilder {
whitelist: false,
only_dir: false,
};
let mut literal_separator = false;
let mut opts = glob::MatchOptions::default();
let has_slash = line.chars().any(|c| c == '/');
let is_absolute = line.chars().nth(0).unwrap() == '/';
if line.starts_with("\\!") || line.starts_with("\\#") {
@@ -326,7 +311,7 @@ impl GitignoreBuilder {
// then the glob can only match the beginning of a path
// (relative to the location of gitignore). We achieve this by
// simply banning wildcards from matching /.
literal_separator = true;
opts.require_literal_separator = true;
line = &line[1..];
}
}
@@ -342,7 +327,7 @@ impl GitignoreBuilder {
// doesn't let wildcards match slashes.
pat.pat = line.to_string();
if has_slash {
literal_separator = true;
opts.require_literal_separator = true;
}
// If there was a leading slash, then this is a pattern that must
// match the entire path name. Otherwise, we should let it match
@@ -359,11 +344,7 @@ impl GitignoreBuilder {
if pat.pat.ends_with("/**") {
pat.pat = format!("{}/*", pat.pat);
}
let parsed = try!(
GlobBuilder::new(&pat.pat)
.literal_separator(literal_separator)
.build());
self.builder.add(parsed);
try!(self.builder.add_with(&pat.pat, &opts));
self.patterns.push(pat);
Ok(())
}
@@ -445,13 +426,4 @@ mod tests {
not_ignored!(ignot11, ROOT, "#foo", "#foo");
not_ignored!(ignot12, ROOT, "\n\n\n", "foo");
not_ignored!(ignot13, ROOT, "foo/**", "foo", true);
not_ignored!(
ignot14, "./third_party/protobuf", "m4/ltoptions.m4",
"./third_party/protobuf/csharp/src/packages/repositories.config");
// See: https://github.com/BurntSushi/ripgrep/issues/106
#[test]
fn regression_106() {
Gitignore::from_str("/", " ").unwrap();
}
}

1170
src/glob.rs Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,7 +1,7 @@
extern crate deque;
extern crate docopt;
extern crate env_logger;
extern crate globset;
extern crate fnv;
extern crate grep;
#[cfg(windows)]
extern crate kernel32;
@@ -22,12 +22,11 @@ extern crate winapi;
use std::error::Error;
use std::fs::File;
use std::io;
use std::path::Path;
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::process;
use std::result;
use std::sync::{Arc, Mutex};
use std::sync::atomic::{AtomicBool, Ordering};
use std::thread;
use std::cmp;
@@ -61,6 +60,7 @@ macro_rules! eprintln {
mod args;
mod atty;
mod gitignore;
mod glob;
mod ignore;
mod out;
mod pathutil;
@@ -89,20 +89,20 @@ fn run(args: Args) -> Result<u64> {
let args = Arc::new(args);
let paths = args.paths();
let threads = cmp::max(1, args.threads() - 1);
let isone =
paths.len() == 1 && (paths[0] == Path::new("-") || paths[0].is_file());
if args.files() {
return run_files(args.clone());
}
if args.type_list() {
return run_types(args.clone());
}
if threads == 1 || isone {
if paths.len() == 1 && (paths[0] == Path::new("-") || paths[0].is_file()) {
return run_one(args.clone(), &paths[0]);
}
if threads == 1 {
return run_one_thread(args.clone());
}
let out = Arc::new(Mutex::new(args.out()));
let quiet_matched = QuietMatched::new(args.quiet());
let mut workers = vec![];
let workq = {
@@ -110,7 +110,6 @@ fn run(args: Args) -> Result<u64> {
for _ in 0..threads {
let worker = MultiWorker {
chan_work: stealer.clone(),
quiet_matched: quiet_matched.clone(),
out: out.clone(),
outbuf: Some(args.outbuf()),
worker: Worker {
@@ -126,17 +125,11 @@ fn run(args: Args) -> Result<u64> {
};
let mut paths_searched: u64 = 0;
for p in paths {
if quiet_matched.has_match() {
break;
}
if p == Path::new("-") {
paths_searched += 1;
workq.push(Work::Stdin);
} else {
for ent in try!(args.walker(p)) {
if quiet_matched.has_match() {
break;
}
paths_searched += 1;
workq.push(Work::File(ent));
}
@@ -165,41 +158,32 @@ fn run_one_thread(args: Arc<Args>) -> Result<u64> {
match_count: 0,
};
let paths = args.paths();
let filesep = args.file_separator();
let mut term = args.stdout();
let mut paths_searched: u64 = 0;
for p in paths {
if args.quiet() && worker.match_count > 0 {
break;
}
if p == Path::new("-") {
paths_searched += 1;
let mut printer = args.printer(&mut term);
if worker.match_count > 0 {
if let Some(sep) = args.file_separator() {
printer = printer.file_separator(sep);
if let Some(ref sep) = filesep {
let _ = term.write_all(sep);
let _ = term.write_all(b"\n");
}
}
paths_searched += 1;
let mut printer = args.printer(&mut term);
worker.do_work(&mut printer, WorkReady::Stdin);
} else {
for ent in try!(args.walker(p)) {
paths_searched += 1;
let mut printer = args.printer(&mut term);
if worker.match_count > 0 {
if args.quiet() {
break;
}
if let Some(sep) = args.file_separator() {
printer = printer.file_separator(sep);
if let Some(ref sep) = filesep {
let _ = term.write_all(sep);
let _ = term.write_all(b"\n");
}
}
let file = match File::open(ent.path()) {
Ok(file) => file,
Err(err) => {
eprintln!("{}: {}", ent.path().display(), err);
continue;
}
};
paths_searched += 1;
let mut printer = args.printer(&mut term);
let file = try!(File::open(ent.path()));
worker.do_work(&mut printer, WorkReady::DirFile(ent, file));
}
}
@@ -212,6 +196,25 @@ fn run_one_thread(args: Arc<Args>) -> Result<u64> {
Ok(worker.match_count)
}
fn run_one(args: Arc<Args>, path: &Path) -> Result<u64> {
let mut worker = Worker {
args: args.clone(),
inpbuf: args.input_buffer(),
grep: args.grep(),
match_count: 0,
};
let term = args.stdout();
let mut printer = args.printer(term);
let work =
if path == Path::new("-") {
WorkReady::Stdin
} else {
WorkReady::PathFile(path.to_path_buf(), try!(File::open(path)))
};
worker.do_work(&mut printer, work);
Ok(worker.match_count)
}
fn run_files(args: Arc<Args>) -> Result<u64> {
let term = args.stdout();
let mut printer = args.printer(term);
@@ -250,11 +253,11 @@ enum Work {
enum WorkReady {
Stdin,
DirFile(DirEntry, File),
PathFile(PathBuf, File),
}
struct MultiWorker {
chan_work: Stealer<Work>,
quiet_matched: QuietMatched,
out: Arc<Mutex<Out>>,
#[cfg(not(windows))]
outbuf: Option<ColoredTerminal<term::TerminfoTerminal<Vec<u8>>>>,
@@ -273,9 +276,6 @@ struct Worker {
impl MultiWorker {
fn run(mut self) -> u64 {
loop {
if self.quiet_matched.has_match() {
break;
}
let work = match self.chan_work.steal() {
Stolen::Empty | Stolen::Abort => continue,
Stolen::Data(Work::Quit) => break,
@@ -294,9 +294,6 @@ impl MultiWorker {
outbuf.clear();
let mut printer = self.worker.args.printer(outbuf);
self.worker.do_work(&mut printer, work);
if self.quiet_matched.set_match(self.worker.match_count > 0) {
break;
}
let outbuf = printer.into_inner();
if !outbuf.get_ref().is_empty() {
let mut out = self.out.lock().unwrap();
@@ -331,6 +328,17 @@ impl Worker {
self.search(printer, path, file)
}
}
WorkReady::PathFile(path, file) => {
let mut path = &*path;
if let Some(p) = strip_prefix("./", path) {
path = p;
}
if self.args.mmap() {
self.search_mmap(printer, path, &file)
} else {
self.search(printer, path, file)
}
}
};
match result {
Ok(count) => {
@@ -380,28 +388,3 @@ impl Worker {
).run())
}
}
#[derive(Clone, Debug)]
struct QuietMatched(Arc<Option<AtomicBool>>);
impl QuietMatched {
fn new(quiet: bool) -> QuietMatched {
let atomic = if quiet { Some(AtomicBool::new(false)) } else { None };
QuietMatched(Arc::new(atomic))
}
fn has_match(&self) -> bool {
match *self.0 {
None => false,
Some(ref matched) => matched.load(Ordering::SeqCst),
}
}
fn set_match(&self, yes: bool) -> bool {
match *self.0 {
None => false,
Some(_) if !yes => false,
Some(ref m) => { m.store(true, Ordering::SeqCst); true }
}
}
}

View File

@@ -48,6 +48,8 @@ impl Out {
/// If set, the separator is printed between matches from different files.
/// By default, no separator is printed.
///
/// If sep is empty, then no file separator is printed.
pub fn file_separator(mut self, sep: Vec<u8>) -> Out {
self.file_separator = Some(sep);
self

View File

@@ -25,49 +25,18 @@ pub struct Printer<W> {
/// printed via the match directly, but occasionally we need to insert them
/// ourselves (for example, to print a context separator).
eol: u8,
/// A file separator to show before any matches are printed.
file_separator: Option<Vec<u8>>,
/// Whether to show file name as a heading or not.
///
/// N.B. If with_filename is false, then this setting has no effect.
heading: bool,
/// Whether to show every match on its own line.
line_per_match: bool,
/// Whether to print NUL bytes after a file path instead of new lines
/// or `:`.
null: bool,
/// Whether to suppress all output.
quiet: bool,
/// A string to use as a replacement of each match in a matching line.
replace: Option<Vec<u8>>,
/// Whether to prefix each match with the corresponding file name.
with_filename: bool,
/// The choice of colors.
color_choice: ColorChoice
}
struct ColorChoice {
matched_line: color::Color,
heading: color::Color,
line_number: color::Color
}
impl ColorChoice {
#[cfg(unix)]
pub fn new() -> ColorChoice {
ColorChoice {
matched_line: color::RED,
heading: color::GREEN,
line_number: color::BLUE
}
}
#[cfg(not(unix))]
pub fn new() -> ColorChoice {
ColorChoice {
matched_line: color::BRIGHT_RED,
heading: color::BRIGHT_GREEN,
line_number: color::BRIGHT_BLUE
}
}
}
impl<W: Terminal + Send> Printer<W> {
@@ -79,13 +48,11 @@ impl<W: Terminal + Send> Printer<W> {
column: false,
context_separator: "--".to_string().into_bytes(),
eol: b'\n',
file_separator: None,
heading: false,
line_per_match: false,
null: false,
quiet: false,
replace: None,
with_filename: false,
color_choice: ColorChoice::new()
}
}
@@ -108,13 +75,6 @@ impl<W: Terminal + Send> Printer<W> {
self
}
/// If set, the separator is printed before any matches. By default, no
/// separator is printed.
pub fn file_separator(mut self, sep: Vec<u8>) -> Printer<W> {
self.file_separator = Some(sep);
self
}
/// Whether to show file name as a heading or not.
///
/// N.B. If with_filename is false, then this setting has no effect.
@@ -129,10 +89,9 @@ impl<W: Terminal + Send> Printer<W> {
self
}
/// Whether to cause NUL bytes to follow file paths instead of other
/// visual separators (like `:`, `-` and `\n`).
pub fn null(mut self, yes: bool) -> Printer<W> {
self.null = yes;
/// When set, all output is suppressed.
pub fn quiet(mut self, yes: bool) -> Printer<W> {
self.quiet = yes;
self
}
@@ -157,6 +116,11 @@ impl<W: Terminal + Send> Printer<W> {
self.has_printed
}
/// Returns true if the printer has been configured to be quiet.
pub fn is_quiet(&self) -> bool {
self.quiet
}
/// Flushes the underlying writer and returns it.
pub fn into_inner(mut self) -> W {
let _ = self.wtr.flush();
@@ -182,22 +146,14 @@ impl<W: Terminal + Send> Printer<W> {
pub fn path<P: AsRef<Path>>(&mut self, path: P) {
let path = strip_prefix("./", path.as_ref()).unwrap_or(path.as_ref());
self.write_path(path);
if self.null {
self.write(b"\x00");
} else {
self.write_eol();
}
self.write_eol();
}
/// Prints the given path and a count of the number of matches found.
pub fn path_count<P: AsRef<Path>>(&mut self, path: P, count: u64) {
if self.with_filename {
self.write_path(path);
if self.null {
self.write(b"\x00");
} else {
self.write(b":");
}
self.write(b":");
}
self.write(count.to_string().as_bytes());
self.write_eol();
@@ -206,6 +162,9 @@ impl<W: Terminal + Send> Printer<W> {
/// Prints the context separator.
pub fn context_separate(&mut self) {
// N.B. We can't use `write` here because of borrowing restrictions.
if self.quiet {
return;
}
if self.context_separator.is_empty() {
return;
}
@@ -227,7 +186,7 @@ impl<W: Terminal + Send> Printer<W> {
let column =
if self.column {
Some(re.find(&buf[start..end])
.map(|(s, _)| s).unwrap_or(0) as u64)
.map(|(s, _)| s + 1).unwrap_or(0) as u64)
} else {
None
};
@@ -252,10 +211,10 @@ impl<W: Terminal + Send> Printer<W> {
column: Option<u64>,
) {
if self.heading && self.with_filename && !self.has_printed {
self.write_file_sep();
self.write_heading(path.as_ref());
} else if !self.heading && self.with_filename {
self.write_non_heading_path(path.as_ref());
self.write_path(path.as_ref());
self.write(b":");
}
if let Some(line_number) = line_number {
self.line_number(line_number, b':');
@@ -284,7 +243,7 @@ impl<W: Terminal + Send> Printer<W> {
let mut last_written = 0;
for (s, e) in re.find_iter(buf) {
self.write(&buf[last_written..s]);
let _ = self.wtr.fg(self.color_choice.matched_line);
let _ = self.wtr.fg(color::BRIGHT_RED);
let _ = self.wtr.attr(Attr::Bold);
self.write(&buf[s..e]);
let _ = self.wtr.reset();
@@ -302,15 +261,10 @@ impl<W: Terminal + Send> Printer<W> {
line_number: Option<u64>,
) {
if self.heading && self.with_filename && !self.has_printed {
self.write_file_sep();
self.write_heading(path.as_ref());
} else if !self.heading && self.with_filename {
self.write_path(path.as_ref());
if self.null {
self.write(b"\x00");
} else {
self.write(b"-");
}
self.write(b"-");
}
if let Some(line_number) = line_number {
self.line_number(line_number, b'-');
@@ -323,39 +277,19 @@ impl<W: Terminal + Send> Printer<W> {
fn write_heading<P: AsRef<Path>>(&mut self, path: P) {
if self.wtr.supports_color() {
let _ = self.wtr.fg(self.color_choice.heading);
let _ = self.wtr.fg(color::BRIGHT_GREEN);
let _ = self.wtr.attr(Attr::Bold);
}
self.write_path(path.as_ref());
if self.null {
self.write(b"\x00");
} else {
self.write_eol();
}
self.write_eol();
if self.wtr.supports_color() {
let _ = self.wtr.reset();
}
}
fn write_non_heading_path<P: AsRef<Path>>(&mut self, path: P) {
if self.wtr.supports_color() {
let _ = self.wtr.fg(self.color_choice.heading);
let _ = self.wtr.attr(Attr::Bold);
}
self.write_path(path.as_ref());
if self.wtr.supports_color() {
let _ = self.wtr.reset();
}
if self.null {
self.write(b"\x00");
} else {
self.write(b":");
}
}
fn line_number(&mut self, n: u64, sep: u8) {
if self.wtr.supports_color() {
let _ = self.wtr.fg(self.color_choice.line_number);
let _ = self.wtr.fg(color::BRIGHT_BLUE);
let _ = self.wtr.attr(Attr::Bold);
}
self.write(n.to_string().as_bytes());
@@ -379,6 +313,9 @@ impl<W: Terminal + Send> Printer<W> {
}
fn write(&mut self, buf: &[u8]) {
if self.quiet {
return;
}
self.has_printed = true;
let _ = self.wtr.write_all(buf);
}
@@ -387,12 +324,4 @@ impl<W: Terminal + Send> Printer<W> {
let eol = self.eol;
self.write(&[eol]);
}
fn write_file_sep(&mut self) {
if let Some(ref sep) = self.file_separator {
self.has_printed = true;
let _ = self.wtr.write_all(sep);
let _ = self.wtr.write_all(b"\n");
}
}
}

View File

@@ -81,13 +81,6 @@ impl<'a, W: Send + Terminal> BufferSearcher<'a, W> {
self
}
/// If enabled, don't show any output and quit searching after the first
/// match is found.
pub fn quiet(mut self, yes: bool) -> Self {
self.opts.quiet = yes;
self
}
/// If enabled, search binary files as if they were text.
pub fn text(mut self, yes: bool) -> Self {
self.opts.text = yes;
@@ -111,7 +104,7 @@ impl<'a, W: Send + Terminal> BufferSearcher<'a, W> {
self.print_match(m.start(), m.end());
}
last_end = m.end();
if self.opts.stop_after_first_match() {
if self.printer.is_quiet() || self.opts.files_with_matches {
break;
}
}

View File

@@ -84,7 +84,6 @@ pub struct Options {
pub eol: u8,
pub invert_match: bool,
pub line_number: bool,
pub quiet: bool,
pub text: bool,
}
@@ -98,7 +97,6 @@ impl Default for Options {
eol: b'\n',
invert_match: false,
line_number: false,
quiet: false,
text: false,
}
}
@@ -106,16 +104,10 @@ impl Default for Options {
}
impl Options {
/// Several options (--quiet, --count, --files-with-matches) imply that
/// we shouldn't ever display matches.
/// Both --count and --files-with-matches options imply that we should not
/// display matches at all.
pub fn skip_matches(&self) -> bool {
self.count || self.files_with_matches || self.quiet
}
/// Some options (--quiet, --files-with-matches) imply that we can stop
/// searching after the first match.
pub fn stop_after_first_match(&self) -> bool {
self.files_with_matches || self.quiet
return self.count || self.files_with_matches;
}
}
@@ -205,13 +197,6 @@ impl<'a, R: io::Read, W: Terminal + Send> Searcher<'a, R, W> {
self
}
/// If enabled, don't show any output and quit searching after the first
/// match is found.
pub fn quiet(mut self, yes: bool) -> Self {
self.opts.quiet = yes;
self
}
/// If enabled, search binary files as if they were text.
pub fn text(mut self, yes: bool) -> Self {
self.opts.text = yes;
@@ -280,7 +265,8 @@ impl<'a, R: io::Read, W: Terminal + Send> Searcher<'a, R, W> {
#[inline(always)]
fn terminate(&self) -> bool {
self.match_count > 0 && self.opts.stop_after_first_match()
self.match_count > 0
&& (self.printer.is_quiet() || self.opts.files_with_matches)
}
#[inline(always)]

View File

@@ -11,17 +11,16 @@ use std::path::Path;
use regex;
use gitignore::{Match, Pattern};
use globset::{self, GlobBuilder, GlobSet, GlobSetBuilder};
use glob::{self, MatchOptions};
const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
("asm", &["*.asm", "*.s", "*.S"]),
("awk", &["*.awk"]),
("c", &["*.c", "*.h", "*.H"]),
("cbor", &["*.cbor"]),
("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
("cmake", &["*.cmake", "CMakeLists.txt"]),
("clojure", &["*.clj", "*.cljs"]),
("cmake", &["CMakeLists.txt"]),
("coffeescript", &["*.coffee"]),
("config", &["*.config"]),
("cpp", &[
"*.C", "*.cc", "*.cpp", "*.cxx",
"*.h", "*.H", "*.hh", "*.hpp",
@@ -43,7 +42,6 @@ const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
("haskell", &["*.hs", "*.lhs"]),
("html", &["*.htm", "*.html"]),
("java", &["*.java"]),
("jinja", &["*.jinja", "*.jinja2"]),
("js", &[
"*.js", "*.jsx", "*.vue",
]),
@@ -54,7 +52,6 @@ const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
("m4", &["*.ac", "*.m4"]),
("make", &["gnumakefile", "Gnumakefile", "makefile", "Makefile", "*.mk"]),
("markdown", &["*.md"]),
("md", &["*.md"]),
("matlab", &["*.m"]),
("mk", &["mkfile"]),
("ml", &["*.ml"]),
@@ -72,9 +69,7 @@ const TYPE_EXTENSIONS: &'static [(&'static str, &'static [&'static str])] = &[
("rust", &["*.rs"]),
("scala", &["*.scala"]),
("sh", &["*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh"]),
("spark", &["*.spark"]),
("sql", &["*.sql"]),
("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
("swift", &["*.swift"]),
("tex", &["*.tex", "*.cls", "*.sty"]),
("ts", &["*.ts", "*.tsx"]),
@@ -97,7 +92,7 @@ pub enum Error {
/// A user specified file type definition could not be parsed.
InvalidDefinition,
/// There was an error building the matcher (probably a bad glob).
Glob(globset::Error),
Glob(glob::Error),
/// There was an error compiling a glob as a regex.
Regex(regex::Error),
}
@@ -129,8 +124,8 @@ impl fmt::Display for Error {
}
}
impl From<globset::Error> for Error {
fn from(err: globset::Error) -> Error {
impl From<glob::Error> for Error {
fn from(err: glob::Error) -> Error {
Error::Glob(err)
}
}
@@ -163,9 +158,8 @@ impl FileTypeDef {
/// Types is a file type matcher.
#[derive(Clone, Debug)]
pub struct Types {
defs: Vec<FileTypeDef>,
selected: Option<GlobSet>,
negated: Option<GlobSet>,
selected: Option<glob::SetYesNo>,
negated: Option<glob::SetYesNo>,
has_selected: bool,
unmatched_pat: Pattern,
}
@@ -178,13 +172,11 @@ impl Types {
/// If has_selected is true, then at least one file type was selected.
/// Therefore, any non-matches should be ignored.
fn new(
selected: Option<GlobSet>,
negated: Option<GlobSet>,
selected: Option<glob::SetYesNo>,
negated: Option<glob::SetYesNo>,
has_selected: bool,
defs: Vec<FileTypeDef>,
) -> Types {
Types {
defs: defs,
selected: selected,
negated: negated,
has_selected: has_selected,
@@ -200,7 +192,7 @@ impl Types {
/// Creates a new file type matcher that never matches.
pub fn empty() -> Types {
Types::new(None, None, false, vec![])
Types::new(None, None, false)
}
/// Returns a match for the given path against this file type matcher.
@@ -240,11 +232,6 @@ impl Types {
Match::None
}
}
/// Return the set of current file type definitions.
pub fn definitions(&self) -> &[FileTypeDef] {
&self.defs
}
}
/// TypesBuilder builds a type matcher from a set of file type definitions and
@@ -268,11 +255,14 @@ impl TypesBuilder {
/// Build the current set of file type definitions *and* selections into
/// a file type matcher.
pub fn build(&self) -> Result<Types, Error> {
let opts = MatchOptions {
require_literal_separator: true, ..MatchOptions::default()
};
let selected_globs =
if self.selected.is_empty() {
None
} else {
let mut bset = GlobSetBuilder::new();
let mut bset = glob::SetBuilder::new();
for name in &self.selected {
let globs = match self.types.get(name) {
Some(globs) => globs,
@@ -282,19 +272,16 @@ impl TypesBuilder {
}
};
for glob in globs {
let pat = try!(
GlobBuilder::new(glob)
.literal_separator(true).build());
bset.add(pat);
try!(bset.add_with(glob, &opts));
}
}
Some(try!(bset.build()))
Some(try!(bset.build_yesno()))
};
let negated_globs =
if self.negated.is_empty() {
None
} else {
let mut bset = GlobSetBuilder::new();
let mut bset = glob::SetBuilder::new();
for name in &self.negated {
let globs = match self.types.get(name) {
Some(globs) => globs,
@@ -304,20 +291,13 @@ impl TypesBuilder {
}
};
for glob in globs {
let pat = try!(
GlobBuilder::new(glob)
.literal_separator(true).build());
bset.add(pat);
try!(bset.add_with(glob, &opts));
}
}
Some(try!(bset.build()))
Some(try!(bset.build_yesno()))
};
Ok(Types::new(
selected_globs,
negated_globs,
!self.selected.is_empty(),
self.definitions(),
))
selected_globs, negated_globs, !self.selected.is_empty()))
}
/// Return the set of current file type definitions.

View File

@@ -54,13 +54,6 @@ fn path(unix: &str) -> String {
}
}
fn sort_lines(lines: &str) -> String {
let mut lines: Vec<String> =
lines.trim().lines().map(|s| s.to_owned()).collect();
lines.sort();
format!("{}\n", lines.join("\n"))
}
sherlock!(single_file, |wd: WorkDir, mut cmd| {
let lines: String = wd.stdout(&mut cmd);
let expected = "\
@@ -93,8 +86,8 @@ sherlock!(columns, |wd: WorkDir, mut cmd: Command| {
cmd.arg("--column");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
57:For the Doctor Watsons of this world, as opposed to the Sherlock
49:be, to a very large extent, the result of luck. Sherlock Holmes
58:For the Doctor Watsons of this world, as opposed to the Sherlock
50:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
@@ -542,7 +535,7 @@ sherlock!(symlink_nofollow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.remove("sherlock");
wd.create_dir("foo");
wd.create_dir("foo/bar");
wd.link_dir("foo/baz", "foo/bar/baz");
wd.link("foo/baz", "foo/bar/baz");
wd.create_dir("foo/baz");
wd.create("foo/baz/sherlock", hay::SHERLOCK);
cmd.current_dir(wd.path().join("foo/bar"));
@@ -555,7 +548,7 @@ sherlock!(symlink_follow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create_dir("foo/bar");
wd.create_dir("foo/baz");
wd.create("foo/baz/sherlock", hay::SHERLOCK);
wd.link_dir("foo/baz", "foo/bar/baz");
wd.link("foo/baz", "foo/bar/baz");
cmd.arg("-L");
cmd.current_dir(wd.path().join("foo/bar"));
@@ -592,6 +585,17 @@ sherlock!(unrestricted2, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
assert_eq!(lines, expected);
});
#[cfg(not(windows))]
sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
cmd.arg("-uuu");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file:foo\x00bar\nfile:foo\x00baz\n");
});
// On Windows, this test uses memory maps, so the NUL bytes don't get replaced.
#[cfg(windows)]
sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
cmd.arg("-uuu");
@@ -648,6 +652,7 @@ clean!(regression_30, "test", ".", |wd: WorkDir, mut cmd: Command| {
}
wd.create_dir("vendor");
wd.create("vendor/manifest", "test");
cmd.arg("--debug");
let lines: String = wd.stdout(&mut cmd);
let expected = path("vendor/manifest:test\n");
@@ -693,13 +698,6 @@ clean!(regression_67, "test", ".", |wd: WorkDir, mut cmd: Command| {
assert_eq!(lines, path("dir/bar:test\n"));
});
// See: https://github.com/BurntSushi/ripgrep/issues/87
clean!(regression_87, "test", ".", |wd: WorkDir, mut cmd: Command| {
wd.create(".gitignore", "foo\n**no-vcs**");
wd.create("foo", "test");
wd.assert_err(&mut cmd);
});
// See: https://github.com/BurntSushi/ripgrep/issues/90
clean!(regression_90, "test", ".", |wd: WorkDir, mut cmd: Command| {
wd.create(".gitignore", "!.foo");
@@ -718,97 +716,8 @@ clean!(regression_93, r"(\d{1,3}\.){3}\d{1,3}", ".",
assert_eq!(lines, "foo:192.168.1.1\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/99
clean!(regression_99, "test", ".",
|wd: WorkDir, mut cmd: Command| {
wd.create("foo1", "test");
wd.create("foo2", "zzz");
wd.create("bar", "test");
cmd.arg("-j1").arg("--heading");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(sort_lines(&lines), sort_lines("bar\ntest\n\nfoo1\ntest\n"));
});
// See: https://github.com/BurntSushi/ripgrep/issues/105
clean!(regression_105_part1, "test", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("foo", "zztest");
cmd.arg("--vimgrep");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:1:3:zztest\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/105
clean!(regression_105_part2, "test", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("foo", "zztest");
cmd.arg("--column");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:3:zztest\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/127
clean!(regression_127, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
// Set up a directory hierarchy like this:
//
// .gitignore
// foo/
// sherlock
// watson
//
// Where `.gitignore` contains `foo/sherlock`.
//
// ripgrep should ignore 'foo/sherlock' giving us results only from
// 'foo/watson' but on Windows ripgrep will include both 'foo/sherlock' and
// 'foo/watson' in the search results.
wd.create(".gitignore", "foo/sherlock\n");
wd.create_dir("foo");
wd.create("foo/sherlock", hay::SHERLOCK);
wd.create("foo/watson", hay::SHERLOCK);
let lines: String = wd.stdout(&mut cmd);
let expected = format!("\
{path}:For the Doctor Watsons of this world, as opposed to the Sherlock
{path}:be, to a very large extent, the result of luck. Sherlock Holmes
", path=path("foo/watson"));
assert_eq!(lines, expected);
});
// See: https://github.com/BurntSushi/ripgrep/issues/131
clean!(regression_131, "test", ".", |wd: WorkDir, mut cmd: Command| {
wd.create(".gitignore", "TopÑapa");
wd.create("TopÑapa", "test");
wd.assert_err(&mut cmd);
});
// See: https://github.com/BurntSushi/ripgrep/issues/137
//
// TODO(burntsushi): Figure out why Windows gives "access denied" errors
// when trying to create a file symlink. For now, disable test on Windows.
#[cfg(not(windows))]
sherlock!(regression_137, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.link_file("sherlock", "sym1");
wd.link_file("sherlock", "sym2");
cmd.arg("sym1");
cmd.arg("sym2");
cmd.arg("-j1");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
sym1:For the Doctor Watsons of this world, as opposed to the Sherlock
sym1:be, to a very large extent, the result of luck. Sherlock Holmes
sym2:For the Doctor Watsons of this world, as opposed to the Sherlock
sym2:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, path(expected));
});
// See: https://github.com/BurntSushi/ripgrep/issues/20
sherlock!(feature_20_no_filename, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
sherlock!(feature_20, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--no-filename");
let lines: String = wd.stdout(&mut cmd);
@@ -820,7 +729,7 @@ be, to a very large extent, the result of luck. Sherlock Holmes
});
// See: https://github.com/BurntSushi/ripgrep/issues/68
clean!(feature_68_no_ignore_vcs, "test", ".", |wd: WorkDir, mut cmd: Command| {
clean!(feature_68, "test", ".", |wd: WorkDir, mut cmd: Command| {
wd.create(".gitignore", "foo");
wd.create(".ignore", "bar");
wd.create("foo", "test");
@@ -832,8 +741,7 @@ clean!(feature_68_no_ignore_vcs, "test", ".", |wd: WorkDir, mut cmd: Command| {
});
// See: https://github.com/BurntSushi/ripgrep/issues/70
sherlock!(feature_70_smart_case, "sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
sherlock!(feature_70, "sherlock", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--smart-case");
let lines: String = wd.stdout(&mut cmd);
@@ -844,78 +752,6 @@ sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
assert_eq!(lines, expected);
});
// See: https://github.com/BurntSushi/ripgrep/issues/89
sherlock!(feature_89_files_with_matches, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
cmd.arg("--null").arg("--files-with-matches");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "sherlock\x00");
});
// See: https://github.com/BurntSushi/ripgrep/issues/89
sherlock!(feature_89_count, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
cmd.arg("--null").arg("--count");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "sherlock\x002\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/89
sherlock!(feature_89_files, "NADA", ".",
|wd: WorkDir, mut cmd: Command| {
cmd.arg("--null").arg("--files");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "sherlock\x00");
});
// See: https://github.com/BurntSushi/ripgrep/issues/89
sherlock!(feature_89_match, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
cmd.arg("--null").arg("-C1");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock\x00For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock\x00Holmeses, success in the province of detective work must always
sherlock\x00be, to a very large extent, the result of luck. Sherlock Holmes
sherlock\x00can extract a clew from a wisp of straw or a flake of cigar ash;
";
assert_eq!(lines, expected);
});
// See: https://github.com/BurntSushi/ripgrep/issues/109
clean!(feature_109_max_depth, "far", ".", |wd: WorkDir, mut cmd: Command| {
wd.create_dir("one");
wd.create("one/pass", "far");
wd.create_dir("one/too");
wd.create("one/too/many", "far");
cmd.arg("--maxdepth").arg("2");
let lines: String = wd.stdout(&mut cmd);
let expected = path("one/pass:far\n");
assert_eq!(lines, expected);
});
// See: https://github.com/BurntSushi/ripgrep/issues/124
clean!(feature_109_case_sensitive_part1, "test", ".",
|wd: WorkDir, mut cmd: Command| {
wd.create("foo", "tEsT");
cmd.arg("--smart-case").arg("--case-sensitive");
wd.assert_err(&mut cmd);
});
// See: https://github.com/BurntSushi/ripgrep/issues/124
clean!(feature_109_case_sensitive_part2, "test", ".",
|wd: WorkDir, mut cmd: Command| {
wd.create("foo", "tEsT");
cmd.arg("--ignore-case").arg("--case-sensitive");
wd.assert_err(&mut cmd);
});
#[test]
fn binary_nosearch() {
let wd = WorkDir::new("binary_nosearch");

View File

@@ -83,7 +83,7 @@ impl WorkDir {
/// Creates a directory symlink to the src with the given target name
/// in this directory.
#[cfg(not(windows))]
pub fn link_dir<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
pub fn link<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
use std::os::unix::fs::symlink;
let src = self.dir.join(src);
let target = self.dir.join(target);
@@ -91,10 +91,8 @@ impl WorkDir {
nice_err(&target, symlink(&src, &target));
}
/// Creates a directory symlink to the src with the given target name
/// in this directory.
#[cfg(windows)]
pub fn link_dir<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
pub fn link<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
use std::os::windows::fs::symlink_dir;
let src = self.dir.join(src);
let target = self.dir.join(target);
@@ -102,32 +100,6 @@ impl WorkDir {
nice_err(&target, symlink_dir(&src, &target));
}
/// Creates a file symlink to the src with the given target name
/// in this directory.
#[cfg(not(windows))]
pub fn link_file<S: AsRef<Path>, T: AsRef<Path>>(
&self,
src: S,
target: T,
) {
self.link_dir(src, target);
}
/// Creates a file symlink to the src with the given target name
/// in this directory.
#[cfg(windows)]
pub fn link_file<S: AsRef<Path>, T: AsRef<Path>>(
&self,
src: S,
target: T,
) {
use std::os::windows::fs::symlink_file;
let src = self.dir.join(src);
let target = self.dir.join(target);
let _ = fs::remove_file(&target);
nice_err(&target, symlink_file(&src, &target));
}
/// Runs and captures the stdout of the given command.
///
/// If the return type could not be created from a string, then this