Compare commits

..

17 Commits

Author SHA1 Message Date
Andrew Gallant
7efa2e46d3 grep-0.2.10 2022-07-15 10:06:53 -04:00
Andrew Gallant
db0b92b62d grep: bump grep-searcher to 0.1.10
This was a result of leaving a stray 'dbg!'.
2022-07-15 10:06:31 -04:00
Andrew Gallant
33b81cac48 grep-searcher-0.1.10 2022-07-15 10:05:46 -04:00
Andrew Gallant
6a13a4f64d searcher: remove stray 'dbg!' 2022-07-15 10:05:20 -04:00
Andrew Gallant
b13d835d95 grep-0.2.9 2022-07-15 10:03:06 -04:00
Andrew Gallant
d53506b7f7 grep: bump 'grep-regex' and 'grep-searcher'
To 0.1.10 and 0.1.9, respectively.
2022-07-15 10:02:41 -04:00
Andrew Gallant
78a35d4d43 grep-searcher-0.1.9 2022-07-15 10:02:24 -04:00
Andrew Gallant
a933d0bc90 searcher: bump grep-regex dep to 0.1.10 2022-07-15 10:02:06 -04:00
Andrew Gallant
2cae30e399 grep-regex-0.1.10 2022-07-15 10:01:42 -04:00
Andrew Gallant
8e57989cd2 regex: fix matching bug when text anchors are used
It turns out that if there are text anchors (that is, \A or \z, or ^/$
when multi-line is disabled), then the "fast" line searching path isn't
quite correct. Since searching without multi-line mode is exceptionally
rare, we just look for the presence of text anchors and specifically
disable the line terminator option in 'grep-regex'. This in turn
inhibits the "fast" line searching path.

Fixes #2260
2022-07-15 09:53:39 -04:00
Andrew Gallant
b9f5835534 ci: switch to dtolnay/rust-toolchain
The actions-rs/toolchain project appears dead. dtolnay's also seems more
sustainable given its simplicity, but it does enough to suit our needs.
2022-07-14 13:48:14 -04:00
tleb
e70778e89d ignore/types: add dts to default types
See: https://devicetree-specification.readthedocs.io/en/v0.3/source-language.html

PR #2255
2022-07-07 12:24:12 -04:00
zhimoe
87c4a2b4b1 doc: fix typo
PR #2248
2022-06-26 18:49:54 -04:00
Kian-Meng Ang
0aa31676e3 doc: fix typos
PR #2245
2022-06-24 09:58:20 -04:00
Andrew Gallant
9f0e88bcb1 ignore: fix gitignore parsing bug for trailing \/
When a glob pattern ended with a \/, and since we permit backslash
escapes, the glob parser gave a "dangling escape" error. Which is weird,
because the \ is clearly not dangling.

The issue is that the layer above the glob parser, the gitignore parser,
was stripping the trailing / so that it wouldn't be part of the matching
logic. Of course, stripping the trailing / while it is escaped without
removing the backslash escape is wrong. So we do that here.

Fixes #2236
2022-06-14 10:40:37 -04:00
Alex Touchet
eb4b389846 globset/readme: update version number and some links
PR #2232
2022-06-11 14:17:32 -04:00
Andrew Gallant
dc337bab0a deps: update to globset 0.4.9 2022-06-10 14:11:20 -04:00
25 changed files with 112 additions and 45 deletions

View File

@@ -98,11 +98,9 @@ jobs:
ci/macos-install-packages ci/macos-install-packages
- name: Install Rust - name: Install Rust
uses: actions-rs/toolchain@v1 uses: dtolnay/rust-toolchain@v1
with: with:
toolchain: ${{ matrix.rust }} toolchain: ${{ matrix.rust }}
profile: minimal
override: true
- name: Use Cross - name: Use Cross
if: matrix.target != '' if: matrix.target != ''
@@ -185,11 +183,9 @@ jobs:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Install Rust - name: Install Rust
uses: actions-rs/toolchain@v1 uses: dtolnay/rust-toolchain@v1
with: with:
toolchain: stable toolchain: stable
override: true
profile: minimal
components: rustfmt components: rustfmt
- name: Check formatting - name: Check formatting
run: | run: |
@@ -202,11 +198,9 @@ jobs:
- name: Checkout repository - name: Checkout repository
uses: actions/checkout@v2 uses: actions/checkout@v2
- name: Install Rust - name: Install Rust
uses: actions-rs/toolchain@v1 uses: dtolnay/rust-toolchain@v1
with: with:
toolchain: stable toolchain: stable
profile: minimal
override: true
- name: Check documentation - name: Check documentation
env: env:
RUSTDOCFLAGS: -D warnings RUSTDOCFLAGS: -D warnings

View File

@@ -112,11 +112,9 @@ jobs:
ci/macos-install-packages ci/macos-install-packages
- name: Install Rust - name: Install Rust
uses: actions-rs/toolchain@v1 uses: dtolnay/rust-toolchain@v1
with: with:
toolchain: ${{ matrix.rust }} toolchain: ${{ matrix.rust }}
profile: minimal
override: true
target: ${{ matrix.target }} target: ${{ matrix.target }}
- name: Use Cross - name: Use Cross

View File

@@ -1,5 +1,5 @@
13.0.1 TBD
====== ===
Unreleased changes. Release notes have not yet been written. Unreleased changes. Release notes have not yet been written.
Bug fixes: Bug fixes:
@@ -8,6 +8,8 @@ Bug fixes:
Fix bug when using `-w` with a regex that can match the empty string. Fix bug when using `-w` with a regex that can match the empty string.
* [BUG #1911](https://github.com/BurntSushi/ripgrep/issues/1911): * [BUG #1911](https://github.com/BurntSushi/ripgrep/issues/1911):
Disable mmap searching in all non-64-bit environments. Disable mmap searching in all non-64-bit environments.
* [BUG #2236](https://github.com/BurntSushi/ripgrep/issues/2236):
Fix gitignore parsing bug where a trailing `\/` resulted in an error.
13.0.0 (2021-06-12) 13.0.0 (2021-06-12)

6
Cargo.lock generated
View File

@@ -152,7 +152,7 @@ dependencies = [
[[package]] [[package]]
name = "grep" name = "grep"
version = "0.2.8" version = "0.2.10"
dependencies = [ dependencies = [
"grep-cli", "grep-cli",
"grep-matcher", "grep-matcher",
@@ -211,7 +211,7 @@ dependencies = [
[[package]] [[package]]
name = "grep-regex" name = "grep-regex"
version = "0.1.9" version = "0.1.10"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"bstr", "bstr",
@@ -224,7 +224,7 @@ dependencies = [
[[package]] [[package]]
name = "grep-searcher" name = "grep-searcher"
version = "0.1.8" version = "0.1.10"
dependencies = [ dependencies = [
"bstr", "bstr",
"bytecount", "bytecount",

View File

@@ -16,7 +16,7 @@ edition = "2018"
[dependencies] [dependencies]
atty = "0.2.11" atty = "0.2.11"
bstr = "0.2.0" bstr = "0.2.0"
globset = { version = "0.4.7", path = "../globset" } globset = { version = "0.4.9", path = "../globset" }
lazy_static = "1.1.0" lazy_static = "1.1.0"
log = "0.4.5" log = "0.4.5"
regex = "1.1" regex = "1.1"

View File

@@ -382,7 +382,7 @@ impl DecompressionReader {
/// ///
/// `close` is also called in `drop` as a last line of defense against /// `close` is also called in `drop` as a last line of defense against
/// resource leakage. Any error from the child process is then printed as a /// resource leakage. Any error from the child process is then printed as a
/// warning to stderr. This can be avoided by explictly calling `close` /// warning to stderr. This can be avoided by explicitly calling `close`
/// before the CommandReader is dropped. /// before the CommandReader is dropped.
pub fn close(&mut self) -> io::Result<()> { pub fn close(&mut self) -> io::Result<()> {
match self.rdr { match self.rdr {

View File

@@ -8,7 +8,7 @@ use regex::Regex;
/// An error that occurs when parsing a human readable size description. /// An error that occurs when parsing a human readable size description.
/// ///
/// This error provides an end user friendly message describing why the /// This error provides an end user friendly message describing why the
/// description coudln't be parsed and what the expected format is. /// description couldn't be parsed and what the expected format is.
#[derive(Clone, Debug, Eq, PartialEq)] #[derive(Clone, Debug, Eq, PartialEq)]
pub struct ParseSizeError { pub struct ParseSizeError {
original: String, original: String,

View File

@@ -212,13 +212,13 @@ pub fn is_readable_stdin() -> bool {
!is_tty_stdin() && imp() !is_tty_stdin() && imp()
} }
/// Returns true if and only if stdin is believed to be connectted to a tty /// Returns true if and only if stdin is believed to be connected to a tty
/// or a console. /// or a console.
pub fn is_tty_stdin() -> bool { pub fn is_tty_stdin() -> bool {
atty::is(atty::Stream::Stdin) atty::is(atty::Stream::Stdin)
} }
/// Returns true if and only if stdout is believed to be connectted to a tty /// Returns true if and only if stdout is believed to be connected to a tty
/// or a console. /// or a console.
/// ///
/// This is useful for when you want your command line program to produce /// This is useful for when you want your command line program to produce
@@ -230,7 +230,7 @@ pub fn is_tty_stdout() -> bool {
atty::is(atty::Stream::Stdout) atty::is(atty::Stream::Stdout)
} }
/// Returns true if and only if stderr is believed to be connectted to a tty /// Returns true if and only if stderr is believed to be connected to a tty
/// or a console. /// or a console.
pub fn is_tty_stderr() -> bool { pub fn is_tty_stderr() -> bool {
atty::is(atty::Stream::Stderr) atty::is(atty::Stream::Stderr)

View File

@@ -221,7 +221,7 @@ impl CommandReader {
/// ///
/// `close` is also called in `drop` as a last line of defense against /// `close` is also called in `drop` as a last line of defense against
/// resource leakage. Any error from the child process is then printed as a /// resource leakage. Any error from the child process is then printed as a
/// warning to stderr. This can be avoided by explictly calling `close` /// warning to stderr. This can be avoided by explicitly calling `close`
/// before the CommandReader is dropped. /// before the CommandReader is dropped.
pub fn close(&mut self) -> io::Result<()> { pub fn close(&mut self) -> io::Result<()> {
// Dropping stdout closes the underlying file descriptor, which should // Dropping stdout closes the underlying file descriptor, which should

View File

@@ -104,7 +104,7 @@ struct ArgsImp {
/// ///
/// It's important that this is only built once, since building this goes /// It's important that this is only built once, since building this goes
/// through regex compilation and various types of analyses. That is, if /// through regex compilation and various types of analyses. That is, if
/// you need many of theses (one per thread, for example), it is better to /// you need many of these (one per thread, for example), it is better to
/// build it once and then clone it. /// build it once and then clone it.
matcher: PatternMatcher, matcher: PatternMatcher,
/// The paths provided at the command line. This is guaranteed to be /// The paths provided at the command line. This is guaranteed to be

View File

@@ -67,7 +67,7 @@ impl SubjectBuilder {
if subj.is_file() { if subj.is_file() {
return Some(subj); return Some(subj);
} }
// We got nothin. Emit a debug message, but only if this isn't a // We got nothing. Emit a debug message, but only if this isn't a
// directory. Otherwise, emitting messages for directories is just // directory. Otherwise, emitting messages for directories is just
// noisy. // noisy.
if !subj.is_dir() { if !subj.is_dir() {

View File

@@ -19,7 +19,7 @@ Add this to your `Cargo.toml`:
```toml ```toml
[dependencies] [dependencies]
globset = "0.3" globset = "0.4"
``` ```
### Features ### Features
@@ -78,12 +78,12 @@ assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
This crate implements globs by converting them to regular expressions, and This crate implements globs by converting them to regular expressions, and
executing them with the executing them with the
[`regex`](https://github.com/rust-lang-nursery/regex) [`regex`](https://github.com/rust-lang/regex)
crate. crate.
For single glob matching, performance of this crate should be roughly on par For single glob matching, performance of this crate should be roughly on par
with the performance of the with the performance of the
[`glob`](https://github.com/rust-lang-nursery/glob) [`glob`](https://github.com/rust-lang/glob)
crate. (`*_regex` correspond to benchmarks for this library while `*_glob` crate. (`*_regex` correspond to benchmarks for this library while `*_glob`
correspond to benchmarks for the `glob` library.) correspond to benchmarks for the `glob` library.)
Optimizations in the `regex` crate may propel this library past `glob`, Optimizations in the `regex` crate may propel this library past `glob`,
@@ -108,7 +108,7 @@ test many_short_glob ... bench: 1,063 ns/iter (+/- 47)
test many_short_regex_set ... bench: 186 ns/iter (+/- 11) test many_short_regex_set ... bench: 186 ns/iter (+/- 11)
``` ```
### Comparison with the [`glob`](https://github.com/rust-lang-nursery/glob) crate ### Comparison with the [`glob`](https://github.com/rust-lang/glob) crate
* Supports alternate "or" globs, e.g., `*.{foo,bar}`. * Supports alternate "or" globs, e.g., `*.{foo,bar}`.
* Can match non-UTF-8 file paths correctly. * Can match non-UTF-8 file paths correctly.

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "grep" name = "grep"
version = "0.2.8" #:version version = "0.2.10" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"] authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """ description = """
Fast line oriented regex searching as a library. Fast line oriented regex searching as a library.
@@ -18,8 +18,8 @@ grep-cli = { version = "0.1.6", path = "../cli" }
grep-matcher = { version = "0.1.5", path = "../matcher" } grep-matcher = { version = "0.1.5", path = "../matcher" }
grep-pcre2 = { version = "0.1.5", path = "../pcre2", optional = true } grep-pcre2 = { version = "0.1.5", path = "../pcre2", optional = true }
grep-printer = { version = "0.1.6", path = "../printer" } grep-printer = { version = "0.1.6", path = "../printer" }
grep-regex = { version = "0.1.9", path = "../regex" } grep-regex = { version = "0.1.10", path = "../regex" }
grep-searcher = { version = "0.1.8", path = "../searcher" } grep-searcher = { version = "0.1.10", path = "../searcher" }
[dev-dependencies] [dev-dependencies]
termcolor = "1.0.4" termcolor = "1.0.4"

View File

@@ -20,7 +20,7 @@ bench = false
[dependencies] [dependencies]
crossbeam-utils = "0.8.0" crossbeam-utils = "0.8.0"
globset = { version = "0.4.7", path = "../globset" } globset = { version = "0.4.9", path = "../globset" }
lazy_static = "1.1" lazy_static = "1.1"
log = "0.4.5" log = "0.4.5"
memchr = "2.1" memchr = "2.1"

View File

@@ -57,6 +57,7 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
("dhall", &["*.dhall"]), ("dhall", &["*.dhall"]),
("diff", &["*.patch", "*.diff"]), ("diff", &["*.patch", "*.diff"]),
("docker", &["*Dockerfile*"]), ("docker", &["*Dockerfile*"]),
("dts", &["*.dts", "*.dtsi"]),
("dvc", &["Dvcfile", "*.dvc"]), ("dvc", &["Dvcfile", "*.dvc"]),
("ebuild", &["*.ebuild"]), ("ebuild", &["*.ebuild"]),
("edn", &["*.edn"]), ("edn", &["*.edn"]),

View File

@@ -474,10 +474,13 @@ impl GitignoreBuilder {
} }
// If it ends with a slash, then this should only match directories, // If it ends with a slash, then this should only match directories,
// but the slash should otherwise not be used while globbing. // but the slash should otherwise not be used while globbing.
if let Some((i, c)) = line.char_indices().rev().nth(0) { if line.as_bytes().last() == Some(&b'/') {
if c == '/' {
glob.is_only_dir = true; glob.is_only_dir = true;
line = &line[..i]; line = &line[..line.len() - 1];
// If the slash was escaped, then remove the escape.
// See: https://github.com/BurntSushi/ripgrep/issues/2236
if line.as_bytes().last() == Some(&b'\\') {
line = &line[..line.len() - 1];
} }
} }
glob.actual = line.to_string(); glob.actual = line.to_string();

View File

@@ -147,7 +147,7 @@ impl JSONBuilder {
/// is not limited to UTF-8 exclusively, which in turn implies that matches /// is not limited to UTF-8 exclusively, which in turn implies that matches
/// may be reported that contain invalid UTF-8. Moreover, this printer may /// may be reported that contain invalid UTF-8. Moreover, this printer may
/// also print file paths, and the encoding of file paths is itself not /// also print file paths, and the encoding of file paths is itself not
/// guarnateed to be valid UTF-8. Therefore, this printer must deal with the /// guaranteed to be valid UTF-8. Therefore, this printer must deal with the
/// presence of invalid UTF-8 somehow. The printer could silently ignore such /// presence of invalid UTF-8 somehow. The printer could silently ignore such
/// things completely, or even lossily transcode invalid UTF-8 to valid UTF-8 /// things completely, or even lossily transcode invalid UTF-8 to valid UTF-8
/// by replacing all invalid sequences with the Unicode replacement character. /// by replacing all invalid sequences with the Unicode replacement character.

View File

@@ -1594,7 +1594,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
/// multiple lines. /// multiple lines.
/// ///
/// Note that this doesn't just return whether the searcher is in multi /// Note that this doesn't just return whether the searcher is in multi
/// line mode, but also checks if the mater can match over multiple lines. /// line mode, but also checks if the matter can match over multiple lines.
/// If it can't, then we don't need multi line handling, even if the /// If it can't, then we don't need multi line handling, even if the
/// searcher has multi line mode enabled. /// searcher has multi line mode enabled.
fn multi_line(&self) -> bool { fn multi_line(&self) -> bool {

View File

@@ -508,7 +508,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> {
/// multiple lines. /// multiple lines.
/// ///
/// Note that this doesn't just return whether the searcher is in multi /// Note that this doesn't just return whether the searcher is in multi
/// line mode, but also checks if the mater can match over multiple lines. /// line mode, but also checks if the matter can match over multiple lines.
/// If it can't, then we don't need multi line handling, even if the /// If it can't, then we don't need multi line handling, even if the
/// searcher has multi line mode enabled. /// searcher has multi line mode enabled.
fn multi_line(&self, searcher: &Searcher) -> bool { fn multi_line(&self, searcher: &Searcher) -> bool {

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "grep-regex" name = "grep-regex"
version = "0.1.9" #:version version = "0.1.10" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"] authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """ description = """
Use Rust's regex library with the 'grep' crate. Use Rust's regex library with the 'grep' crate.

View File

@@ -175,6 +175,36 @@ impl ConfiguredHIR {
self.config.crlf && self.expr.is_line_anchored_end() self.config.crlf && self.expr.is_line_anchored_end()
} }
/// Returns the line terminator configured on this expression.
///
/// When we have beginning/end anchors (NOT line anchors), the fast line
/// searching path isn't quite correct. Or at least, doesn't match the
/// slow path. Namely, the slow path strips line terminators while the
/// fast path does not. Since '$' (when multi-line mode is disabled)
/// doesn't match at line boundaries, the existence of a line terminator
/// might cause it to not match when it otherwise would with the line
/// terminator stripped.
///
/// Since searching with text anchors is exceptionally rare in the
/// context of line oriented searching (multi-line mode is basically
/// always enabled), we just disable this optimization when there are
/// text anchors. We disable it by not returning a line terminator, since
/// without a line terminator, the fast search path can't be executed.
///
/// See: https://github.com/BurntSushi/ripgrep/issues/2260
pub fn line_terminator(&self) -> Option<LineTerminator> {
if self.is_any_anchored() {
None
} else {
self.config.line_terminator
}
}
/// Returns true if and only if the underlying HIR has any text anchors.
fn is_any_anchored(&self) -> bool {
self.expr.is_any_anchored_start() || self.expr.is_any_anchored_end()
}
/// Builds a regular expression from this HIR expression. /// Builds a regular expression from this HIR expression.
pub fn regex(&self) -> Result<Regex, Error> { pub fn regex(&self) -> Result<Regex, Error> {
self.pattern_to_regex(&self.expr.to_string()) self.pattern_to_regex(&self.expr.to_string())

View File

@@ -52,8 +52,12 @@ impl RegexMatcherBuilder {
let matcher = RegexMatcherImpl::new(&chir)?; let matcher = RegexMatcherImpl::new(&chir)?;
log::trace!("final regex: {:?}", matcher.regex()); log::trace!("final regex: {:?}", matcher.regex());
let mut config = self.config.clone();
// We override the line terminator in case the configured expr doesn't
// support it.
config.line_terminator = chir.line_terminator();
Ok(RegexMatcher { Ok(RegexMatcher {
config: self.config.clone(), config,
matcher, matcher,
fast_line_regex, fast_line_regex,
non_matching_bytes, non_matching_bytes,

View File

@@ -1,6 +1,6 @@
[package] [package]
name = "grep-searcher" name = "grep-searcher"
version = "0.1.8" #:version version = "0.1.10" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"] authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """ description = """
Fast line oriented regex searching as a library. Fast line oriented regex searching as a library.
@@ -23,7 +23,7 @@ log = "0.4.5"
memmap = { package = "memmap2", version = "0.5.3" } memmap = { package = "memmap2", version = "0.5.3" }
[dev-dependencies] [dev-dependencies]
grep-regex = { version = "0.1.9", path = "../regex" } grep-regex = { version = "0.1.10", path = "../regex" }
regex = "1.1" regex = "1.1"
[features] [features]

View File

@@ -1512,4 +1512,31 @@ and exhibited clearly, with a label attached.\
) )
.unwrap(); .unwrap();
} }
// See: https://github.com/BurntSushi/ripgrep/issues/2260
#[test]
fn regression_2260() {
use grep_regex::RegexMatcherBuilder;
use crate::SearcherBuilder;
let matcher = RegexMatcherBuilder::new()
.line_terminator(Some(b'\n'))
.build(r"^\w+$")
.unwrap();
let mut searcher = SearcherBuilder::new().line_number(true).build();
let mut matched = false;
searcher
.search_slice(
&matcher,
b"GATC\n",
crate::sinks::UTF8(|_, _| {
matched = true;
Ok(true)
}),
)
.unwrap();
assert!(matched);
}
} }

View File

@@ -1118,3 +1118,11 @@ pipc () { # [-h] [-U|-u <pkgspec>[,<pkgspec>...]] [<reqs-in>...] [-- <pip-compi
let expected = " [-h] [-U|-u <pkgspec>[,<pkgspec>...]] [<reqs-in>...] [-- <pip-compile-arg>...]\n"; let expected = " [-h] [-U|-u <pkgspec>[,<pkgspec>...]] [<reqs-in>...] [-- <pip-compile-arg>...]\n";
eqnice!(expected, cmd.stdout()); eqnice!(expected, cmd.stdout());
}); });
// See: https://github.com/BurntSushi/ripgrep/issues/2236
rgtest!(r2236, |dir: Dir, mut cmd: TestCommand| {
dir.create(".ignore", r"foo\/");
dir.create_dir("foo");
dir.create("foo/bar", "test\n");
cmd.args(&["test"]).assert_err();
});