Compare commits

...

11 Commits

Author SHA1 Message Date
Andrew Gallant
78a35d4d43 grep-searcher-0.1.9 2022-07-15 10:02:24 -04:00
Andrew Gallant
a933d0bc90 searcher: bump grep-regex dep to 0.1.10 2022-07-15 10:02:06 -04:00
Andrew Gallant
2cae30e399 grep-regex-0.1.10 2022-07-15 10:01:42 -04:00
Andrew Gallant
8e57989cd2 regex: fix matching bug when text anchors are used
It turns out that if there are text anchors (that is, \A or \z, or ^/$
when multi-line is disabled), then the "fast" line searching path isn't
quite correct. Since searching without multi-line mode is exceptionally
rare, we just look for the presence of text anchors and specifically
disable the line terminator option in 'grep-regex'. This in turn
inhibits the "fast" line searching path.

Fixes #2260
2022-07-15 09:53:39 -04:00
Andrew Gallant
b9f5835534 ci: switch to dtolnay/rust-toolchain
The actions-rs/toolchain project appears dead. dtolnay's also seems more
sustainable given its simplicity, but it does enough to suit our needs.
2022-07-14 13:48:14 -04:00
tleb
e70778e89d ignore/types: add dts to default types
See: https://devicetree-specification.readthedocs.io/en/v0.3/source-language.html

PR #2255
2022-07-07 12:24:12 -04:00
zhimoe
87c4a2b4b1 doc: fix typo
PR #2248
2022-06-26 18:49:54 -04:00
Kian-Meng Ang
0aa31676e3 doc: fix typos
PR #2245
2022-06-24 09:58:20 -04:00
Andrew Gallant
9f0e88bcb1 ignore: fix gitignore parsing bug for trailing \/
When a glob pattern ended with a \/, and since we permit backslash
escapes, the glob parser gave a "dangling escape" error. Which is weird,
because the \ is clearly not dangling.

The issue is that the layer above the glob parser, the gitignore parser,
was stripping the trailing / so that it wouldn't be part of the matching
logic. Of course, stripping the trailing / while it is escaped without
removing the backslash escape is wrong. So we do that here.

Fixes #2236
2022-06-14 10:40:37 -04:00
Alex Touchet
eb4b389846 globset/readme: update version number and some links
PR #2232
2022-06-11 14:17:32 -04:00
Andrew Gallant
dc337bab0a deps: update to globset 0.4.9 2022-06-10 14:11:20 -04:00
25 changed files with 109 additions and 42 deletions

View File

@@ -98,11 +98,9 @@ jobs:
ci/macos-install-packages
- name: Install Rust
uses: actions-rs/toolchain@v1
uses: dtolnay/rust-toolchain@v1
with:
toolchain: ${{ matrix.rust }}
profile: minimal
override: true
- name: Use Cross
if: matrix.target != ''
@@ -185,11 +183,9 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v2
- name: Install Rust
uses: actions-rs/toolchain@v1
uses: dtolnay/rust-toolchain@v1
with:
toolchain: stable
override: true
profile: minimal
components: rustfmt
- name: Check formatting
run: |
@@ -202,11 +198,9 @@ jobs:
- name: Checkout repository
uses: actions/checkout@v2
- name: Install Rust
uses: actions-rs/toolchain@v1
uses: dtolnay/rust-toolchain@v1
with:
toolchain: stable
profile: minimal
override: true
- name: Check documentation
env:
RUSTDOCFLAGS: -D warnings

View File

@@ -112,11 +112,9 @@ jobs:
ci/macos-install-packages
- name: Install Rust
uses: actions-rs/toolchain@v1
uses: dtolnay/rust-toolchain@v1
with:
toolchain: ${{ matrix.rust }}
profile: minimal
override: true
target: ${{ matrix.target }}
- name: Use Cross

View File

@@ -1,5 +1,5 @@
13.0.1
======
TBD
===
Unreleased changes. Release notes have not yet been written.
Bug fixes:
@@ -8,6 +8,8 @@ Bug fixes:
Fix bug when using `-w` with a regex that can match the empty string.
* [BUG #1911](https://github.com/BurntSushi/ripgrep/issues/1911):
Disable mmap searching in all non-64-bit environments.
* [BUG #2236](https://github.com/BurntSushi/ripgrep/issues/2236):
Fix gitignore parsing bug where a trailing `\/` resulted in an error.
13.0.0 (2021-06-12)

4
Cargo.lock generated
View File

@@ -211,7 +211,7 @@ dependencies = [
[[package]]
name = "grep-regex"
version = "0.1.9"
version = "0.1.10"
dependencies = [
"aho-corasick",
"bstr",
@@ -224,7 +224,7 @@ dependencies = [
[[package]]
name = "grep-searcher"
version = "0.1.8"
version = "0.1.9"
dependencies = [
"bstr",
"bytecount",

View File

@@ -16,7 +16,7 @@ edition = "2018"
[dependencies]
atty = "0.2.11"
bstr = "0.2.0"
globset = { version = "0.4.7", path = "../globset" }
globset = { version = "0.4.9", path = "../globset" }
lazy_static = "1.1.0"
log = "0.4.5"
regex = "1.1"

View File

@@ -382,7 +382,7 @@ impl DecompressionReader {
///
/// `close` is also called in `drop` as a last line of defense against
/// resource leakage. Any error from the child process is then printed as a
/// warning to stderr. This can be avoided by explictly calling `close`
/// warning to stderr. This can be avoided by explicitly calling `close`
/// before the CommandReader is dropped.
pub fn close(&mut self) -> io::Result<()> {
match self.rdr {

View File

@@ -8,7 +8,7 @@ use regex::Regex;
/// An error that occurs when parsing a human readable size description.
///
/// This error provides an end user friendly message describing why the
/// description coudln't be parsed and what the expected format is.
/// description couldn't be parsed and what the expected format is.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ParseSizeError {
original: String,

View File

@@ -212,13 +212,13 @@ pub fn is_readable_stdin() -> bool {
!is_tty_stdin() && imp()
}
/// Returns true if and only if stdin is believed to be connectted to a tty
/// Returns true if and only if stdin is believed to be connected to a tty
/// or a console.
pub fn is_tty_stdin() -> bool {
atty::is(atty::Stream::Stdin)
}
/// Returns true if and only if stdout is believed to be connectted to a tty
/// Returns true if and only if stdout is believed to be connected to a tty
/// or a console.
///
/// This is useful for when you want your command line program to produce
@@ -230,7 +230,7 @@ pub fn is_tty_stdout() -> bool {
atty::is(atty::Stream::Stdout)
}
/// Returns true if and only if stderr is believed to be connectted to a tty
/// Returns true if and only if stderr is believed to be connected to a tty
/// or a console.
pub fn is_tty_stderr() -> bool {
atty::is(atty::Stream::Stderr)

View File

@@ -221,7 +221,7 @@ impl CommandReader {
///
/// `close` is also called in `drop` as a last line of defense against
/// resource leakage. Any error from the child process is then printed as a
/// warning to stderr. This can be avoided by explictly calling `close`
/// warning to stderr. This can be avoided by explicitly calling `close`
/// before the CommandReader is dropped.
pub fn close(&mut self) -> io::Result<()> {
// Dropping stdout closes the underlying file descriptor, which should

View File

@@ -104,7 +104,7 @@ struct ArgsImp {
///
/// It's important that this is only built once, since building this goes
/// through regex compilation and various types of analyses. That is, if
/// you need many of theses (one per thread, for example), it is better to
/// you need many of these (one per thread, for example), it is better to
/// build it once and then clone it.
matcher: PatternMatcher,
/// The paths provided at the command line. This is guaranteed to be

View File

@@ -67,7 +67,7 @@ impl SubjectBuilder {
if subj.is_file() {
return Some(subj);
}
// We got nothin. Emit a debug message, but only if this isn't a
// We got nothing. Emit a debug message, but only if this isn't a
// directory. Otherwise, emitting messages for directories is just
// noisy.
if !subj.is_dir() {

View File

@@ -19,7 +19,7 @@ Add this to your `Cargo.toml`:
```toml
[dependencies]
globset = "0.3"
globset = "0.4"
```
### Features
@@ -78,12 +78,12 @@ assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
This crate implements globs by converting them to regular expressions, and
executing them with the
[`regex`](https://github.com/rust-lang-nursery/regex)
[`regex`](https://github.com/rust-lang/regex)
crate.
For single glob matching, performance of this crate should be roughly on par
with the performance of the
[`glob`](https://github.com/rust-lang-nursery/glob)
[`glob`](https://github.com/rust-lang/glob)
crate. (`*_regex` correspond to benchmarks for this library while `*_glob`
correspond to benchmarks for the `glob` library.)
Optimizations in the `regex` crate may propel this library past `glob`,
@@ -108,7 +108,7 @@ test many_short_glob ... bench: 1,063 ns/iter (+/- 47)
test many_short_regex_set ... bench: 186 ns/iter (+/- 11)
```
### Comparison with the [`glob`](https://github.com/rust-lang-nursery/glob) crate
### Comparison with the [`glob`](https://github.com/rust-lang/glob) crate
* Supports alternate "or" globs, e.g., `*.{foo,bar}`.
* Can match non-UTF-8 file paths correctly.

View File

@@ -20,7 +20,7 @@ bench = false
[dependencies]
crossbeam-utils = "0.8.0"
globset = { version = "0.4.7", path = "../globset" }
globset = { version = "0.4.9", path = "../globset" }
lazy_static = "1.1"
log = "0.4.5"
memchr = "2.1"

View File

@@ -57,6 +57,7 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
("dhall", &["*.dhall"]),
("diff", &["*.patch", "*.diff"]),
("docker", &["*Dockerfile*"]),
("dts", &["*.dts", "*.dtsi"]),
("dvc", &["Dvcfile", "*.dvc"]),
("ebuild", &["*.ebuild"]),
("edn", &["*.edn"]),

View File

@@ -474,10 +474,13 @@ impl GitignoreBuilder {
}
// If it ends with a slash, then this should only match directories,
// but the slash should otherwise not be used while globbing.
if let Some((i, c)) = line.char_indices().rev().nth(0) {
if c == '/' {
glob.is_only_dir = true;
line = &line[..i];
if line.as_bytes().last() == Some(&b'/') {
glob.is_only_dir = true;
line = &line[..line.len() - 1];
// If the slash was escaped, then remove the escape.
// See: https://github.com/BurntSushi/ripgrep/issues/2236
if line.as_bytes().last() == Some(&b'\\') {
line = &line[..line.len() - 1];
}
}
glob.actual = line.to_string();

View File

@@ -147,7 +147,7 @@ impl JSONBuilder {
/// is not limited to UTF-8 exclusively, which in turn implies that matches
/// may be reported that contain invalid UTF-8. Moreover, this printer may
/// also print file paths, and the encoding of file paths is itself not
/// guarnateed to be valid UTF-8. Therefore, this printer must deal with the
/// guaranteed to be valid UTF-8. Therefore, this printer must deal with the
/// presence of invalid UTF-8 somehow. The printer could silently ignore such
/// things completely, or even lossily transcode invalid UTF-8 to valid UTF-8
/// by replacing all invalid sequences with the Unicode replacement character.

View File

@@ -1594,7 +1594,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
/// multiple lines.
///
/// Note that this doesn't just return whether the searcher is in multi
/// line mode, but also checks if the mater can match over multiple lines.
/// line mode, but also checks if the matter can match over multiple lines.
/// If it can't, then we don't need multi line handling, even if the
/// searcher has multi line mode enabled.
fn multi_line(&self) -> bool {

View File

@@ -508,7 +508,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> {
/// multiple lines.
///
/// Note that this doesn't just return whether the searcher is in multi
/// line mode, but also checks if the mater can match over multiple lines.
/// line mode, but also checks if the matter can match over multiple lines.
/// If it can't, then we don't need multi line handling, even if the
/// searcher has multi line mode enabled.
fn multi_line(&self, searcher: &Searcher) -> bool {

View File

@@ -1,6 +1,6 @@
[package]
name = "grep-regex"
version = "0.1.9" #:version
version = "0.1.10" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Use Rust's regex library with the 'grep' crate.

View File

@@ -175,6 +175,36 @@ impl ConfiguredHIR {
self.config.crlf && self.expr.is_line_anchored_end()
}
/// Returns the line terminator configured on this expression.
///
/// When we have beginning/end anchors (NOT line anchors), the fast line
/// searching path isn't quite correct. Or at least, doesn't match the
/// slow path. Namely, the slow path strips line terminators while the
/// fast path does not. Since '$' (when multi-line mode is disabled)
/// doesn't match at line boundaries, the existence of a line terminator
/// might cause it to not match when it otherwise would with the line
/// terminator stripped.
///
/// Since searching with text anchors is exceptionally rare in the
/// context of line oriented searching (multi-line mode is basically
/// always enabled), we just disable this optimization when there are
/// text anchors. We disable it by not returning a line terminator, since
/// without a line terminator, the fast search path can't be executed.
///
/// See: https://github.com/BurntSushi/ripgrep/issues/2260
pub fn line_terminator(&self) -> Option<LineTerminator> {
if self.is_any_anchored() {
None
} else {
self.config.line_terminator
}
}
/// Returns true if and only if the underlying HIR has any text anchors.
fn is_any_anchored(&self) -> bool {
self.expr.is_any_anchored_start() || self.expr.is_any_anchored_end()
}
/// Builds a regular expression from this HIR expression.
pub fn regex(&self) -> Result<Regex, Error> {
self.pattern_to_regex(&self.expr.to_string())

View File

@@ -52,8 +52,12 @@ impl RegexMatcherBuilder {
let matcher = RegexMatcherImpl::new(&chir)?;
log::trace!("final regex: {:?}", matcher.regex());
let mut config = self.config.clone();
// We override the line terminator in case the configured expr doesn't
// support it.
config.line_terminator = chir.line_terminator();
Ok(RegexMatcher {
config: self.config.clone(),
config,
matcher,
fast_line_regex,
non_matching_bytes,

View File

@@ -1,6 +1,6 @@
[package]
name = "grep-searcher"
version = "0.1.8" #:version
version = "0.1.9" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Fast line oriented regex searching as a library.
@@ -23,7 +23,7 @@ log = "0.4.5"
memmap = { package = "memmap2", version = "0.5.3" }
[dev-dependencies]
grep-regex = { version = "0.1.9", path = "../regex" }
grep-regex = { version = "0.1.10", path = "../regex" }
regex = "1.1"
[features]

View File

@@ -108,7 +108,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
}
pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
if self.is_line_by_line_fast() {
if dbg!(self.is_line_by_line_fast()) {
self.match_by_line_fast(buf)
} else {
self.match_by_line_slow(buf)

View File

@@ -1512,4 +1512,31 @@ and exhibited clearly, with a label attached.\
)
.unwrap();
}
// See: https://github.com/BurntSushi/ripgrep/issues/2260
#[test]
fn regression_2260() {
use grep_regex::RegexMatcherBuilder;
use crate::SearcherBuilder;
let matcher = RegexMatcherBuilder::new()
.line_terminator(Some(b'\n'))
.build(r"^\w+$")
.unwrap();
let mut searcher = SearcherBuilder::new().line_number(true).build();
let mut matched = false;
searcher
.search_slice(
&matcher,
b"GATC\n",
crate::sinks::UTF8(|_, _| {
matched = true;
Ok(true)
}),
)
.unwrap();
assert!(matched);
}
}

View File

@@ -1118,3 +1118,11 @@ pipc () { # [-h] [-U|-u <pkgspec>[,<pkgspec>...]] [<reqs-in>...] [-- <pip-compi
let expected = " [-h] [-U|-u <pkgspec>[,<pkgspec>...]] [<reqs-in>...] [-- <pip-compile-arg>...]\n";
eqnice!(expected, cmd.stdout());
});
// See: https://github.com/BurntSushi/ripgrep/issues/2236
rgtest!(r2236, |dir: Dir, mut cmd: TestCommand| {
dir.create(".ignore", r"foo\/");
dir.create_dir("foo");
dir.create("foo/bar", "test\n");
cmd.args(&["test"]).assert_err();
});