Compare commits

...

6 Commits

Author SHA1 Message Date
Andrew Gallant
2cfb338530 globset-0.4.9 2022-06-10 14:10:34 -04:00
Sergio Benitez
48646e3451 globset: make 'log' an optional feature
PR #1910
2022-06-10 14:10:09 -04:00
Andrew Gallant
985394a19e deps: update to packed_simd_2 0.3.8
It broke on latest nightly. I'm *very* close to just removing the
'simd-accel' feature altogether.

Fixes #2230
2022-06-10 09:39:17 -04:00
jgart
ec36f8c3ff ignore/types: add pants
See: https://www.pantsbuild.org/

PR #2228
2022-06-08 13:29:17 -04:00
jpe90
a726d03641 ignore/types: add hare to default types
PR #2219
2022-05-22 20:08:45 -04:00
Andrew Gallant
91afd4214a printer: fix duplicative replacement in multiline mode
This furthers our kludge of dealing with PCRE2's look-around in the
printer. Because of our bad abstraction boundaries, we added a kludge to
deal with PCRE2 look-around by extending the bytes we search by a fixed
amount to hopefully permit any look-around to operate. But because of
that kludge, we wind up over extending ourselves in some cases and
dragging along those extra bytes.

We had fixed this for simple searching by simply rejecting any matches
past the end point. But we didn't do the same for replacements. So this
commit extends our kludge to replacements.

Thanks to @sonohgong for diagnosing the problem and proposing a fix. I
mostly went with their solution, but adding the new replacement routine
as an internal helper rather than a new APIn in the 'grep-matcher'
crate.

Fixes #2095, Fixes #2208
2022-05-11 14:44:58 -04:00
6 changed files with 146 additions and 27 deletions

8
Cargo.lock generated
View File

@@ -1,5 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "aho-corasick"
version = "0.7.18"
@@ -135,7 +137,7 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
[[package]]
name = "globset"
version = "0.4.8"
version = "0.4.9"
dependencies = [
"aho-corasick",
"bstr",
@@ -357,9 +359,9 @@ checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9"
[[package]]
name = "packed_simd_2"
version = "0.3.7"
version = "0.3.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "defdcfef86dcc44ad208f71d9ff4ce28df6537a4e0d6b0e8e845cb8ca10059a6"
checksum = "a1914cd452d8fccd6f9db48147b29fd4ae05bea9dc5d9ad578509f72415de282"
dependencies = [
"cfg-if",
"libm",

View File

@@ -1,6 +1,6 @@
[package]
name = "globset"
version = "0.4.8" #:version
version = "0.4.9" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Cross platform single glob and glob set matching. Glob set matching is the
@@ -23,7 +23,7 @@ bench = false
aho-corasick = "0.7.3"
bstr = { version = "0.2.0", default-features = false, features = ["std"] }
fnv = "1.0.6"
log = "0.4.5"
log = { version = "0.4.5", optional = true }
regex = { version = "1.1.5", default-features = false, features = ["perf", "std"] }
serde = { version = "1.0.104", optional = true }
@@ -33,5 +33,6 @@ lazy_static = "1"
serde_json = "1.0.45"
[features]
default = ["log"]
simd-accel = []
serde1 = ["serde"]

View File

@@ -125,6 +125,16 @@ mod pathutil;
#[cfg(feature = "serde1")]
mod serde_impl;
#[cfg(feature = "log")]
macro_rules! debug {
($($token:tt)*) => (::log::debug!($($token)*);)
}
#[cfg(not(feature = "log"))]
macro_rules! debug {
($($token:tt)*) => {};
}
/// Represents an error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Error {
@@ -413,12 +423,12 @@ impl GlobSet {
required_exts.add(i, ext, p.regex().to_owned());
}
MatchStrategy::Regex => {
log::debug!("glob converted to regex: {:?}", p);
debug!("glob converted to regex: {:?}", p);
regexes.add(i, p.regex().to_owned());
}
}
}
log::debug!(
debug!(
"built glob set; {} literals, {} basenames, {} extensions, \
{} prefixes, {} suffixes, {} required extensions, {} regexes",
lits.0.len(),

View File

@@ -83,6 +83,7 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
("gzip", &["*.gz", "*.tgz"]),
("h", &["*.h", "*.hh", "*.hpp"]),
("haml", &["*.haml"]),
("hare", &["*.ha"]),
("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]),
("hbs", &["*.hbs"]),
("hs", &["*.hs", "*.lhs"]),
@@ -161,6 +162,7 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
("objcpp", &["*.h", "*.mm"]),
("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
("org", &["*.org", "*.org_archive"]),
("pants", &["BUILD"]),
("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]),
("pdf", &["*.pdf"]),
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]),

View File

@@ -82,10 +82,10 @@ impl<M: Matcher> Replacer<M> {
dst.clear();
matches.clear();
matcher
.replace_with_captures_at(
replace_with_captures_in_context(
matcher,
subject,
range.start,
range.clone(),
caps,
dst,
|caps, dst| {
@@ -458,3 +458,33 @@ pub fn trim_line_terminator(
*line = line.with_end(end);
}
}
/// Like `Matcher::replace_with_captures_at`, but accepts an end bound.
///
/// See also: `find_iter_at_in_context` for why we need this.
fn replace_with_captures_in_context<M, F>(
matcher: M,
bytes: &[u8],
range: std::ops::Range<usize>,
caps: &mut M::Captures,
dst: &mut Vec<u8>,
mut append: F,
) -> Result<(), M::Error>
where
M: Matcher,
F: FnMut(&M::Captures, &mut Vec<u8>) -> bool,
{
let mut last_match = range.start;
matcher.captures_iter_at(bytes, range.start, caps, |caps| {
let m = caps.get(0).unwrap();
if m.start() >= range.end {
return false;
}
dst.extend(&bytes[last_match..m.start()]);
last_match = m.end();
append(caps, dst)
})?;
let end = std::cmp::min(bytes.len(), range.end);
dst.extend(&bytes[last_match..end]);
Ok(())
}

View File

@@ -1044,3 +1044,77 @@ rgtest!(r1891, |dir: Dir, mut cmd: TestCommand| {
// happen when each match needs to be detected.
eqnice!("1:\n2:\n2:\n", cmd.args(&["-won", "", "test"]).stdout());
});
// See: https://github.com/BurntSushi/ripgrep/issues/2095
rgtest!(r2095, |dir: Dir, mut cmd: TestCommand| {
dir.create(
"test",
"#!/usr/bin/env bash
zero=one
a=one
if true; then
a=(
a
b
c
)
true
fi
a=two
b=one
});
",
);
cmd.args(&[
"--line-number",
"--multiline",
"--only-matching",
"--replace",
"${value}",
r"^(?P<indent>\s*)a=(?P<value>(?ms:[(].*?[)])|.*?)$",
"test",
]);
let expected = "4:one
8:(
9: a
10: b
11: c
12: )
15:two
";
eqnice!(expected, cmd.stdout());
});
// See: https://github.com/BurntSushi/ripgrep/issues/2208
rgtest!(r2208, |dir: Dir, mut cmd: TestCommand| {
dir.create("test", "# Compile requirements.txt files from all found or specified requirements.in files (compile).
# Use -h to include hashes, -u dep1,dep2... to upgrade specific dependencies, and -U to upgrade all.
pipc () { # [-h] [-U|-u <pkgspec>[,<pkgspec>...]] [<reqs-in>...] [-- <pip-compile-arg>...]
emulate -L zsh
unset REPLY
if [[ $1 == --help ]] { zpy $0; return }
[[ $ZPY_PROCS ]] || return
local gen_hashes upgrade upgrade_csv
while [[ $1 == -[hUu] ]] {
if [[ $1 == -h ]] { gen_hashes=--generate-hashes; shift }
if [[ $1 == -U ]] { upgrade=1; shift }
if [[ $1 == -u ]] { upgrade=1; upgrade_csv=$2; shift 2 }
}
}
");
cmd.args(&[
"-N",
"-U",
"-r", "$usage",
r#"^(?P<predoc>\n?(# .*\n)*)(alias (?P<aname>pipc)="[^"]+"|(?P<fname>pipc) \(\) \{)( #(?P<usage> .+))?"#,
"test",
]);
let expected = " [-h] [-U|-u <pkgspec>[,<pkgspec>...]] [<reqs-in>...] [-- <pip-compile-arg>...]\n";
eqnice!(expected, cmd.stdout());
});