mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-28 18:51:58 -07:00
Compare commits
9 Commits
globset-0.
...
grep-regex
Author | SHA1 | Date | |
---|---|---|---|
|
2cae30e399 | ||
|
8e57989cd2 | ||
|
b9f5835534 | ||
|
e70778e89d | ||
|
87c4a2b4b1 | ||
|
0aa31676e3 | ||
|
9f0e88bcb1 | ||
|
eb4b389846 | ||
|
dc337bab0a |
12
.github/workflows/ci.yml
vendored
12
.github/workflows/ci.yml
vendored
@@ -98,11 +98,9 @@ jobs:
|
||||
ci/macos-install-packages
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: dtolnay/rust-toolchain@v1
|
||||
with:
|
||||
toolchain: ${{ matrix.rust }}
|
||||
profile: minimal
|
||||
override: true
|
||||
|
||||
- name: Use Cross
|
||||
if: matrix.target != ''
|
||||
@@ -185,11 +183,9 @@ jobs:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: dtolnay/rust-toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
profile: minimal
|
||||
components: rustfmt
|
||||
- name: Check formatting
|
||||
run: |
|
||||
@@ -202,11 +198,9 @@ jobs:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: dtolnay/rust-toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
profile: minimal
|
||||
override: true
|
||||
- name: Check documentation
|
||||
env:
|
||||
RUSTDOCFLAGS: -D warnings
|
||||
|
4
.github/workflows/release.yml
vendored
4
.github/workflows/release.yml
vendored
@@ -112,11 +112,9 @@ jobs:
|
||||
ci/macos-install-packages
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: dtolnay/rust-toolchain@v1
|
||||
with:
|
||||
toolchain: ${{ matrix.rust }}
|
||||
profile: minimal
|
||||
override: true
|
||||
target: ${{ matrix.target }}
|
||||
|
||||
- name: Use Cross
|
||||
|
@@ -1,5 +1,5 @@
|
||||
13.0.1
|
||||
======
|
||||
TBD
|
||||
===
|
||||
Unreleased changes. Release notes have not yet been written.
|
||||
|
||||
Bug fixes:
|
||||
@@ -8,6 +8,8 @@ Bug fixes:
|
||||
Fix bug when using `-w` with a regex that can match the empty string.
|
||||
* [BUG #1911](https://github.com/BurntSushi/ripgrep/issues/1911):
|
||||
Disable mmap searching in all non-64-bit environments.
|
||||
* [BUG #2236](https://github.com/BurntSushi/ripgrep/issues/2236):
|
||||
Fix gitignore parsing bug where a trailing `\/` resulted in an error.
|
||||
|
||||
|
||||
13.0.0 (2021-06-12)
|
||||
|
2
Cargo.lock
generated
2
Cargo.lock
generated
@@ -211,7 +211,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "grep-regex"
|
||||
version = "0.1.9"
|
||||
version = "0.1.10"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"bstr",
|
||||
|
@@ -16,7 +16,7 @@ edition = "2018"
|
||||
[dependencies]
|
||||
atty = "0.2.11"
|
||||
bstr = "0.2.0"
|
||||
globset = { version = "0.4.7", path = "../globset" }
|
||||
globset = { version = "0.4.9", path = "../globset" }
|
||||
lazy_static = "1.1.0"
|
||||
log = "0.4.5"
|
||||
regex = "1.1"
|
||||
|
@@ -382,7 +382,7 @@ impl DecompressionReader {
|
||||
///
|
||||
/// `close` is also called in `drop` as a last line of defense against
|
||||
/// resource leakage. Any error from the child process is then printed as a
|
||||
/// warning to stderr. This can be avoided by explictly calling `close`
|
||||
/// warning to stderr. This can be avoided by explicitly calling `close`
|
||||
/// before the CommandReader is dropped.
|
||||
pub fn close(&mut self) -> io::Result<()> {
|
||||
match self.rdr {
|
||||
|
@@ -8,7 +8,7 @@ use regex::Regex;
|
||||
/// An error that occurs when parsing a human readable size description.
|
||||
///
|
||||
/// This error provides an end user friendly message describing why the
|
||||
/// description coudln't be parsed and what the expected format is.
|
||||
/// description couldn't be parsed and what the expected format is.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct ParseSizeError {
|
||||
original: String,
|
||||
|
@@ -212,13 +212,13 @@ pub fn is_readable_stdin() -> bool {
|
||||
!is_tty_stdin() && imp()
|
||||
}
|
||||
|
||||
/// Returns true if and only if stdin is believed to be connectted to a tty
|
||||
/// Returns true if and only if stdin is believed to be connected to a tty
|
||||
/// or a console.
|
||||
pub fn is_tty_stdin() -> bool {
|
||||
atty::is(atty::Stream::Stdin)
|
||||
}
|
||||
|
||||
/// Returns true if and only if stdout is believed to be connectted to a tty
|
||||
/// Returns true if and only if stdout is believed to be connected to a tty
|
||||
/// or a console.
|
||||
///
|
||||
/// This is useful for when you want your command line program to produce
|
||||
@@ -230,7 +230,7 @@ pub fn is_tty_stdout() -> bool {
|
||||
atty::is(atty::Stream::Stdout)
|
||||
}
|
||||
|
||||
/// Returns true if and only if stderr is believed to be connectted to a tty
|
||||
/// Returns true if and only if stderr is believed to be connected to a tty
|
||||
/// or a console.
|
||||
pub fn is_tty_stderr() -> bool {
|
||||
atty::is(atty::Stream::Stderr)
|
||||
|
@@ -221,7 +221,7 @@ impl CommandReader {
|
||||
///
|
||||
/// `close` is also called in `drop` as a last line of defense against
|
||||
/// resource leakage. Any error from the child process is then printed as a
|
||||
/// warning to stderr. This can be avoided by explictly calling `close`
|
||||
/// warning to stderr. This can be avoided by explicitly calling `close`
|
||||
/// before the CommandReader is dropped.
|
||||
pub fn close(&mut self) -> io::Result<()> {
|
||||
// Dropping stdout closes the underlying file descriptor, which should
|
||||
|
@@ -104,7 +104,7 @@ struct ArgsImp {
|
||||
///
|
||||
/// It's important that this is only built once, since building this goes
|
||||
/// through regex compilation and various types of analyses. That is, if
|
||||
/// you need many of theses (one per thread, for example), it is better to
|
||||
/// you need many of these (one per thread, for example), it is better to
|
||||
/// build it once and then clone it.
|
||||
matcher: PatternMatcher,
|
||||
/// The paths provided at the command line. This is guaranteed to be
|
||||
|
@@ -67,7 +67,7 @@ impl SubjectBuilder {
|
||||
if subj.is_file() {
|
||||
return Some(subj);
|
||||
}
|
||||
// We got nothin. Emit a debug message, but only if this isn't a
|
||||
// We got nothing. Emit a debug message, but only if this isn't a
|
||||
// directory. Otherwise, emitting messages for directories is just
|
||||
// noisy.
|
||||
if !subj.is_dir() {
|
||||
|
@@ -19,7 +19,7 @@ Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
globset = "0.3"
|
||||
globset = "0.4"
|
||||
```
|
||||
|
||||
### Features
|
||||
@@ -78,12 +78,12 @@ assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
|
||||
|
||||
This crate implements globs by converting them to regular expressions, and
|
||||
executing them with the
|
||||
[`regex`](https://github.com/rust-lang-nursery/regex)
|
||||
[`regex`](https://github.com/rust-lang/regex)
|
||||
crate.
|
||||
|
||||
For single glob matching, performance of this crate should be roughly on par
|
||||
with the performance of the
|
||||
[`glob`](https://github.com/rust-lang-nursery/glob)
|
||||
[`glob`](https://github.com/rust-lang/glob)
|
||||
crate. (`*_regex` correspond to benchmarks for this library while `*_glob`
|
||||
correspond to benchmarks for the `glob` library.)
|
||||
Optimizations in the `regex` crate may propel this library past `glob`,
|
||||
@@ -108,7 +108,7 @@ test many_short_glob ... bench: 1,063 ns/iter (+/- 47)
|
||||
test many_short_regex_set ... bench: 186 ns/iter (+/- 11)
|
||||
```
|
||||
|
||||
### Comparison with the [`glob`](https://github.com/rust-lang-nursery/glob) crate
|
||||
### Comparison with the [`glob`](https://github.com/rust-lang/glob) crate
|
||||
|
||||
* Supports alternate "or" globs, e.g., `*.{foo,bar}`.
|
||||
* Can match non-UTF-8 file paths correctly.
|
||||
|
@@ -20,7 +20,7 @@ bench = false
|
||||
|
||||
[dependencies]
|
||||
crossbeam-utils = "0.8.0"
|
||||
globset = { version = "0.4.7", path = "../globset" }
|
||||
globset = { version = "0.4.9", path = "../globset" }
|
||||
lazy_static = "1.1"
|
||||
log = "0.4.5"
|
||||
memchr = "2.1"
|
||||
|
@@ -57,6 +57,7 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
("dhall", &["*.dhall"]),
|
||||
("diff", &["*.patch", "*.diff"]),
|
||||
("docker", &["*Dockerfile*"]),
|
||||
("dts", &["*.dts", "*.dtsi"]),
|
||||
("dvc", &["Dvcfile", "*.dvc"]),
|
||||
("ebuild", &["*.ebuild"]),
|
||||
("edn", &["*.edn"]),
|
||||
|
@@ -474,10 +474,13 @@ impl GitignoreBuilder {
|
||||
}
|
||||
// If it ends with a slash, then this should only match directories,
|
||||
// but the slash should otherwise not be used while globbing.
|
||||
if let Some((i, c)) = line.char_indices().rev().nth(0) {
|
||||
if c == '/' {
|
||||
glob.is_only_dir = true;
|
||||
line = &line[..i];
|
||||
if line.as_bytes().last() == Some(&b'/') {
|
||||
glob.is_only_dir = true;
|
||||
line = &line[..line.len() - 1];
|
||||
// If the slash was escaped, then remove the escape.
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/2236
|
||||
if line.as_bytes().last() == Some(&b'\\') {
|
||||
line = &line[..line.len() - 1];
|
||||
}
|
||||
}
|
||||
glob.actual = line.to_string();
|
||||
|
@@ -147,7 +147,7 @@ impl JSONBuilder {
|
||||
/// is not limited to UTF-8 exclusively, which in turn implies that matches
|
||||
/// may be reported that contain invalid UTF-8. Moreover, this printer may
|
||||
/// also print file paths, and the encoding of file paths is itself not
|
||||
/// guarnateed to be valid UTF-8. Therefore, this printer must deal with the
|
||||
/// guaranteed to be valid UTF-8. Therefore, this printer must deal with the
|
||||
/// presence of invalid UTF-8 somehow. The printer could silently ignore such
|
||||
/// things completely, or even lossily transcode invalid UTF-8 to valid UTF-8
|
||||
/// by replacing all invalid sequences with the Unicode replacement character.
|
||||
|
@@ -1594,7 +1594,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
/// multiple lines.
|
||||
///
|
||||
/// Note that this doesn't just return whether the searcher is in multi
|
||||
/// line mode, but also checks if the mater can match over multiple lines.
|
||||
/// line mode, but also checks if the matter can match over multiple lines.
|
||||
/// If it can't, then we don't need multi line handling, even if the
|
||||
/// searcher has multi line mode enabled.
|
||||
fn multi_line(&self) -> bool {
|
||||
|
@@ -508,7 +508,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> {
|
||||
/// multiple lines.
|
||||
///
|
||||
/// Note that this doesn't just return whether the searcher is in multi
|
||||
/// line mode, but also checks if the mater can match over multiple lines.
|
||||
/// line mode, but also checks if the matter can match over multiple lines.
|
||||
/// If it can't, then we don't need multi line handling, even if the
|
||||
/// searcher has multi line mode enabled.
|
||||
fn multi_line(&self, searcher: &Searcher) -> bool {
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-regex"
|
||||
version = "0.1.9" #:version
|
||||
version = "0.1.10" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Use Rust's regex library with the 'grep' crate.
|
||||
|
@@ -175,6 +175,36 @@ impl ConfiguredHIR {
|
||||
self.config.crlf && self.expr.is_line_anchored_end()
|
||||
}
|
||||
|
||||
/// Returns the line terminator configured on this expression.
|
||||
///
|
||||
/// When we have beginning/end anchors (NOT line anchors), the fast line
|
||||
/// searching path isn't quite correct. Or at least, doesn't match the
|
||||
/// slow path. Namely, the slow path strips line terminators while the
|
||||
/// fast path does not. Since '$' (when multi-line mode is disabled)
|
||||
/// doesn't match at line boundaries, the existence of a line terminator
|
||||
/// might cause it to not match when it otherwise would with the line
|
||||
/// terminator stripped.
|
||||
///
|
||||
/// Since searching with text anchors is exceptionally rare in the
|
||||
/// context of line oriented searching (multi-line mode is basically
|
||||
/// always enabled), we just disable this optimization when there are
|
||||
/// text anchors. We disable it by not returning a line terminator, since
|
||||
/// without a line terminator, the fast search path can't be executed.
|
||||
///
|
||||
/// See: https://github.com/BurntSushi/ripgrep/issues/2260
|
||||
pub fn line_terminator(&self) -> Option<LineTerminator> {
|
||||
if self.is_any_anchored() {
|
||||
None
|
||||
} else {
|
||||
self.config.line_terminator
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if the underlying HIR has any text anchors.
|
||||
fn is_any_anchored(&self) -> bool {
|
||||
self.expr.is_any_anchored_start() || self.expr.is_any_anchored_end()
|
||||
}
|
||||
|
||||
/// Builds a regular expression from this HIR expression.
|
||||
pub fn regex(&self) -> Result<Regex, Error> {
|
||||
self.pattern_to_regex(&self.expr.to_string())
|
||||
|
@@ -52,8 +52,12 @@ impl RegexMatcherBuilder {
|
||||
|
||||
let matcher = RegexMatcherImpl::new(&chir)?;
|
||||
log::trace!("final regex: {:?}", matcher.regex());
|
||||
let mut config = self.config.clone();
|
||||
// We override the line terminator in case the configured expr doesn't
|
||||
// support it.
|
||||
config.line_terminator = chir.line_terminator();
|
||||
Ok(RegexMatcher {
|
||||
config: self.config.clone(),
|
||||
config,
|
||||
matcher,
|
||||
fast_line_regex,
|
||||
non_matching_bytes,
|
||||
|
@@ -108,7 +108,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
}
|
||||
|
||||
pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
||||
if self.is_line_by_line_fast() {
|
||||
if dbg!(self.is_line_by_line_fast()) {
|
||||
self.match_by_line_fast(buf)
|
||||
} else {
|
||||
self.match_by_line_slow(buf)
|
||||
|
@@ -1512,4 +1512,31 @@ and exhibited clearly, with a label attached.\
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/2260
|
||||
#[test]
|
||||
fn regression_2260() {
|
||||
use grep_regex::RegexMatcherBuilder;
|
||||
|
||||
use crate::SearcherBuilder;
|
||||
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.line_terminator(Some(b'\n'))
|
||||
.build(r"^\w+$")
|
||||
.unwrap();
|
||||
let mut searcher = SearcherBuilder::new().line_number(true).build();
|
||||
|
||||
let mut matched = false;
|
||||
searcher
|
||||
.search_slice(
|
||||
&matcher,
|
||||
b"GATC\n",
|
||||
crate::sinks::UTF8(|_, _| {
|
||||
matched = true;
|
||||
Ok(true)
|
||||
}),
|
||||
)
|
||||
.unwrap();
|
||||
assert!(matched);
|
||||
}
|
||||
}
|
||||
|
@@ -1118,3 +1118,11 @@ pipc () { # [-h] [-U|-u <pkgspec>[,<pkgspec>...]] [<reqs-in>...] [-- <pip-compi
|
||||
let expected = " [-h] [-U|-u <pkgspec>[,<pkgspec>...]] [<reqs-in>...] [-- <pip-compile-arg>...]\n";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/2236
|
||||
rgtest!(r2236, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create(".ignore", r"foo\/");
|
||||
dir.create_dir("foo");
|
||||
dir.create("foo/bar", "test\n");
|
||||
cmd.args(&["test"]).assert_err();
|
||||
});
|
||||
|
Reference in New Issue
Block a user