mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-30 11:41:57 -07:00
Compare commits
93 Commits
grep-searc
...
globset-0.
Author | SHA1 | Date | |
---|---|---|---|
|
1d35859861 | ||
|
601e122e9f | ||
|
efb2e8ce1e | ||
|
8d464e5c78 | ||
|
d67809d6c4 | ||
|
6abb962f0d | ||
|
6d95c130d5 | ||
|
4782ebd5e0 | ||
|
4993d29a16 | ||
|
23adbd6795 | ||
|
9df8ab42b1 | ||
|
cb7501ff11 | ||
|
3b66f37a31 | ||
|
3eccb7c363 | ||
|
f30a30867e | ||
|
7313dca472 | ||
|
99bf2b01dc | ||
|
ee1360cc07 | ||
|
db6bb21a62 | ||
|
da7c81fb96 | ||
|
a4e3d56de1 | ||
|
7c83b90f95 | ||
|
97b5b7769c | ||
|
2708f9e81d | ||
|
f3241fd657 | ||
|
cfe357188d | ||
|
792451e331 | ||
|
7dafd58a32 | ||
|
b92550b67b | ||
|
383d3b336b | ||
|
fc7e634395 | ||
|
c9584b035b | ||
|
f34fd5c4b6 | ||
|
d51c6c005a | ||
|
ea05881319 | ||
|
1d4e3df19c | ||
|
0f6181d309 | ||
|
e902e2fef4 | ||
|
07cbfee225 | ||
|
d675844510 | ||
|
54e609d657 | ||
|
43bbcca06f | ||
|
ad9bfdd981 | ||
|
36194c2742 | ||
|
0c1cbd99f3 | ||
|
96cfc0ed13 | ||
|
da8ecddce9 | ||
|
545a7dc759 | ||
|
16f783832e | ||
|
f4d07b9cbd | ||
|
0b6eccf4d3 | ||
|
3ac4541e9f | ||
|
7b72e982f2 | ||
|
a68db3ac02 | ||
|
b12905daca | ||
|
ca740d9ace | ||
|
e80c102dee | ||
|
8ac66a9e04 | ||
|
04dde9a4eb | ||
|
81341702af | ||
|
d34c5c88a7 | ||
|
4b8aa91ae5 | ||
|
a775b493fd | ||
|
a6dbff502f | ||
|
51480d57a6 | ||
|
d9bd261be8 | ||
|
9d62eb997a | ||
|
e028ea3792 | ||
|
1035f6b1ff | ||
|
a7f1276021 | ||
|
4fcb1b2202 | ||
|
949092fd22 | ||
|
4a7e7094ad | ||
|
fc0d9b90a9 | ||
|
335aa4937a | ||
|
803c447845 | ||
|
c5415adbe8 | ||
|
251376597f | ||
|
e593f5b7ee | ||
|
6b19be2477 | ||
|
041544853c | ||
|
a7ae9e4043 | ||
|
595e7845b8 | ||
|
44fb9fce2c | ||
|
339c46a6ed | ||
|
fe97c0a152 | ||
|
826f3fad5b | ||
|
bc55049327 | ||
|
d58e9353fc | ||
|
ca60fef4db | ||
|
a25307d6c8 | ||
|
b80947a8b3 | ||
|
ad793a0d8f |
6
.github/dependabot.yml
vendored
6
.github/dependabot.yml
vendored
@@ -1,6 +0,0 @@
|
|||||||
version: 2
|
|
||||||
updates:
|
|
||||||
- package-ecosystem: "github-actions"
|
|
||||||
directory: "/"
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
28
.github/workflows/ci.yml
vendored
28
.github/workflows/ci.yml
vendored
@@ -42,31 +42,31 @@ jobs:
|
|||||||
- win-gnu
|
- win-gnu
|
||||||
include:
|
include:
|
||||||
- build: pinned
|
- build: pinned
|
||||||
os: ubuntu-22.04
|
os: ubuntu-latest
|
||||||
rust: 1.65.0
|
rust: 1.70.0
|
||||||
- build: stable
|
- build: stable
|
||||||
os: ubuntu-22.04
|
os: ubuntu-latest
|
||||||
rust: stable
|
rust: stable
|
||||||
- build: beta
|
- build: beta
|
||||||
os: ubuntu-22.04
|
os: ubuntu-latest
|
||||||
rust: beta
|
rust: beta
|
||||||
- build: nightly
|
- build: nightly
|
||||||
os: ubuntu-22.04
|
os: ubuntu-latest
|
||||||
rust: nightly
|
rust: nightly
|
||||||
- build: nightly-musl
|
- build: nightly-musl
|
||||||
os: ubuntu-22.04
|
os: ubuntu-latest
|
||||||
rust: nightly
|
rust: nightly
|
||||||
target: x86_64-unknown-linux-musl
|
target: x86_64-unknown-linux-musl
|
||||||
- build: nightly-32
|
- build: nightly-32
|
||||||
os: ubuntu-22.04
|
os: ubuntu-latest
|
||||||
rust: nightly
|
rust: nightly
|
||||||
target: i686-unknown-linux-gnu
|
target: i686-unknown-linux-gnu
|
||||||
- build: nightly-mips
|
- build: nightly-mips
|
||||||
os: ubuntu-22.04
|
os: ubuntu-latest
|
||||||
rust: nightly
|
rust: nightly
|
||||||
target: mips64-unknown-linux-gnuabi64
|
target: mips64-unknown-linux-gnuabi64
|
||||||
- build: nightly-arm
|
- build: nightly-arm
|
||||||
os: ubuntu-22.04
|
os: ubuntu-latest
|
||||||
rust: nightly
|
rust: nightly
|
||||||
# For stripping release binaries:
|
# For stripping release binaries:
|
||||||
# docker run --rm -v $PWD/target:/target:Z \
|
# docker run --rm -v $PWD/target:/target:Z \
|
||||||
@@ -75,7 +75,7 @@ jobs:
|
|||||||
# /target/arm-unknown-linux-gnueabihf/debug/rg
|
# /target/arm-unknown-linux-gnueabihf/debug/rg
|
||||||
target: arm-unknown-linux-gnueabihf
|
target: arm-unknown-linux-gnueabihf
|
||||||
- build: macos
|
- build: macos
|
||||||
os: macos-12
|
os: macos-latest
|
||||||
rust: nightly
|
rust: nightly
|
||||||
- build: win-msvc
|
- build: win-msvc
|
||||||
os: windows-2022
|
os: windows-2022
|
||||||
@@ -88,12 +88,12 @@ jobs:
|
|||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
- name: Install packages (Ubuntu)
|
- name: Install packages (Ubuntu)
|
||||||
if: matrix.os == 'ubuntu-22.04'
|
if: matrix.os == 'ubuntu-latest'
|
||||||
run: |
|
run: |
|
||||||
ci/ubuntu-install-packages
|
ci/ubuntu-install-packages
|
||||||
|
|
||||||
- name: Install packages (macOS)
|
- name: Install packages (macOS)
|
||||||
if: matrix.os == 'macos-12'
|
if: matrix.os == 'macos-latest'
|
||||||
run: |
|
run: |
|
||||||
ci/macos-install-packages
|
ci/macos-install-packages
|
||||||
|
|
||||||
@@ -178,7 +178,7 @@ jobs:
|
|||||||
|
|
||||||
rustfmt:
|
rustfmt:
|
||||||
name: rustfmt
|
name: rustfmt
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
@@ -192,7 +192,7 @@ jobs:
|
|||||||
|
|
||||||
docs:
|
docs:
|
||||||
name: Docs
|
name: Docs
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
|
52
.github/workflows/release.yml
vendored
52
.github/workflows/release.yml
vendored
@@ -24,31 +24,24 @@ on:
|
|||||||
jobs:
|
jobs:
|
||||||
create-release:
|
create-release:
|
||||||
name: create-release
|
name: create-release
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
# env:
|
# env:
|
||||||
# Set to force version number, e.g., when no tag exists.
|
# Set to force version number, e.g., when no tag exists.
|
||||||
# RG_VERSION: TEST-0.0.0
|
# RG_VERSION: TEST-0.0.0
|
||||||
outputs:
|
outputs:
|
||||||
upload_url: ${{ steps.release.outputs.upload_url }}
|
|
||||||
rg_version: ${{ env.RG_VERSION }}
|
rg_version: ${{ env.RG_VERSION }}
|
||||||
steps:
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
- name: Get the release version from the tag
|
- name: Get the release version from the tag
|
||||||
shell: bash
|
shell: bash
|
||||||
if: env.RG_VERSION == ''
|
if: env.RG_VERSION == ''
|
||||||
run: |
|
run: |
|
||||||
# Apparently, this is the right way to get a tag name. Really?
|
echo "RG_VERSION=$GITHUB_REF_NAME" >> $GITHUB_ENV
|
||||||
#
|
|
||||||
# See: https://github.community/t5/GitHub-Actions/How-to-get-just-the-tag-name/m-p/32167/highlight/true#M1027
|
|
||||||
echo "RG_VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
|
|
||||||
echo "version is: ${{ env.RG_VERSION }}"
|
echo "version is: ${{ env.RG_VERSION }}"
|
||||||
- name: Create GitHub release
|
- name: Create GitHub release
|
||||||
id: release
|
|
||||||
uses: actions/create-release@v1
|
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GH_TOKEN: ${{ github.token }}
|
||||||
with:
|
run: gh release create ${{ env.RG_VERSION }}
|
||||||
tag_name: ${{ env.RG_VERSION }}
|
|
||||||
release_name: ${{ env.RG_VERSION }}
|
|
||||||
|
|
||||||
build-release:
|
build-release:
|
||||||
name: build-release
|
name: build-release
|
||||||
@@ -71,27 +64,27 @@ jobs:
|
|||||||
build: [linux, linux-arm, macos, win-msvc, win-gnu, win32-msvc]
|
build: [linux, linux-arm, macos, win-msvc, win-gnu, win32-msvc]
|
||||||
include:
|
include:
|
||||||
- build: linux
|
- build: linux
|
||||||
os: ubuntu-22.04
|
os: ubuntu-latest
|
||||||
rust: nightly
|
rust: nightly
|
||||||
target: x86_64-unknown-linux-musl
|
target: x86_64-unknown-linux-musl
|
||||||
- build: linux-arm
|
- build: linux-arm
|
||||||
os: ubuntu-22.04
|
os: ubuntu-latest
|
||||||
rust: nightly
|
rust: nightly
|
||||||
target: arm-unknown-linux-gnueabihf
|
target: arm-unknown-linux-gnueabihf
|
||||||
- build: macos
|
- build: macos
|
||||||
os: macos-12
|
os: macos-latest
|
||||||
rust: nightly
|
rust: nightly
|
||||||
target: x86_64-apple-darwin
|
target: x86_64-apple-darwin
|
||||||
- build: win-msvc
|
- build: win-msvc
|
||||||
os: windows-2022
|
os: windows-latest
|
||||||
rust: nightly
|
rust: nightly
|
||||||
target: x86_64-pc-windows-msvc
|
target: x86_64-pc-windows-msvc
|
||||||
- build: win-gnu
|
- build: win-gnu
|
||||||
os: windows-2022
|
os: windows-latest
|
||||||
rust: nightly-x86_64-gnu
|
rust: nightly-x86_64-gnu
|
||||||
target: x86_64-pc-windows-gnu
|
target: x86_64-pc-windows-gnu
|
||||||
- build: win32-msvc
|
- build: win32-msvc
|
||||||
os: windows-2022
|
os: windows-latest
|
||||||
rust: nightly
|
rust: nightly
|
||||||
target: i686-pc-windows-msvc
|
target: i686-pc-windows-msvc
|
||||||
|
|
||||||
@@ -100,12 +93,12 @@ jobs:
|
|||||||
uses: actions/checkout@v3
|
uses: actions/checkout@v3
|
||||||
|
|
||||||
- name: Install packages (Ubuntu)
|
- name: Install packages (Ubuntu)
|
||||||
if: matrix.os == 'ubuntu-22.04'
|
if: matrix.os == 'ubuntu-latest'
|
||||||
run: |
|
run: |
|
||||||
ci/ubuntu-install-packages
|
ci/ubuntu-install-packages
|
||||||
|
|
||||||
- name: Install packages (macOS)
|
- name: Install packages (macOS)
|
||||||
if: matrix.os == 'macos-12'
|
if: matrix.os == 'macos-latest'
|
||||||
run: |
|
run: |
|
||||||
ci/macos-install-packages
|
ci/macos-install-packages
|
||||||
|
|
||||||
@@ -132,8 +125,8 @@ jobs:
|
|||||||
- name: Build release binary
|
- name: Build release binary
|
||||||
run: ${{ env.CARGO }} build --verbose --release --features pcre2 ${{ env.TARGET_FLAGS }}
|
run: ${{ env.CARGO }} build --verbose --release --features pcre2 ${{ env.TARGET_FLAGS }}
|
||||||
|
|
||||||
- name: Strip release binary (linux and macos)
|
- name: Strip release binary (linux, macos and macos-arm)
|
||||||
if: matrix.build == 'linux' || matrix.build == 'macos'
|
if: matrix.build == 'linux' || matrix.os == 'macos'
|
||||||
run: strip "target/${{ matrix.target }}/release/rg"
|
run: strip "target/${{ matrix.target }}/release/rg"
|
||||||
|
|
||||||
- name: Strip release binary (arm)
|
- name: Strip release binary (arm)
|
||||||
@@ -157,24 +150,23 @@ jobs:
|
|||||||
cp "$outdir"/{rg.bash,rg.fish,_rg.ps1} "$staging/complete/"
|
cp "$outdir"/{rg.bash,rg.fish,_rg.ps1} "$staging/complete/"
|
||||||
cp complete/_rg "$staging/complete/"
|
cp complete/_rg "$staging/complete/"
|
||||||
|
|
||||||
if [ "${{ matrix.os }}" = "windows-2022" ]; then
|
if [ "${{ matrix.os }}" = "windows-latest" ]; then
|
||||||
cp "target/${{ matrix.target }}/release/rg.exe" "$staging/"
|
cp "target/${{ matrix.target }}/release/rg.exe" "$staging/"
|
||||||
7z a "$staging.zip" "$staging"
|
7z a "$staging.zip" "$staging"
|
||||||
|
certutil -hashfile "$staging.zip" SHA256 > "$staging.zip.sha256"
|
||||||
echo "ASSET=$staging.zip" >> $GITHUB_ENV
|
echo "ASSET=$staging.zip" >> $GITHUB_ENV
|
||||||
|
echo "ASSET_SUM=$staging.zip.sha256" >> $GITHUB_ENV
|
||||||
else
|
else
|
||||||
# The man page is only generated on Unix systems. ¯\_(ツ)_/¯
|
# The man page is only generated on Unix systems. ¯\_(ツ)_/¯
|
||||||
cp "$outdir"/rg.1 "$staging/doc/"
|
cp "$outdir"/rg.1 "$staging/doc/"
|
||||||
cp "target/${{ matrix.target }}/release/rg" "$staging/"
|
cp "target/${{ matrix.target }}/release/rg" "$staging/"
|
||||||
tar czf "$staging.tar.gz" "$staging"
|
tar czf "$staging.tar.gz" "$staging"
|
||||||
|
shasum -a 256 "$staging.tar.gz" > "$staging.tar.gz.sha256"
|
||||||
echo "ASSET=$staging.tar.gz" >> $GITHUB_ENV
|
echo "ASSET=$staging.tar.gz" >> $GITHUB_ENV
|
||||||
|
echo "ASSET_SUM=$staging.tar.gz.sha256" >> $GITHUB_ENV
|
||||||
fi
|
fi
|
||||||
|
|
||||||
- name: Upload release archive
|
- name: Upload release archive
|
||||||
uses: actions/upload-release-asset@v1.0.2
|
|
||||||
env:
|
env:
|
||||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
GH_TOKEN: ${{ github.token }}
|
||||||
with:
|
run: gh release upload ${{ needs.create-release.outputs.rg_version }} ${{ env.ASSET }} ${{ env.ASSET_SUM }}
|
||||||
upload_url: ${{ needs.create-release.outputs.upload_url }}
|
|
||||||
asset_path: ${{ env.ASSET }}
|
|
||||||
asset_name: ${{ env.ASSET }}
|
|
||||||
asset_content_type: application/octet-stream
|
|
||||||
|
28
CHANGELOG.md
28
CHANGELOG.md
@@ -2,14 +2,42 @@ TBD
|
|||||||
===
|
===
|
||||||
Unreleased changes. Release notes have not yet been written.
|
Unreleased changes. Release notes have not yet been written.
|
||||||
|
|
||||||
|
**BREAKING CHANGES**
|
||||||
|
|
||||||
|
* `rg -C1 -A2` used to be equivalent to `rg -A2`, but now it is equivalent to
|
||||||
|
`rg -B1 -A2`. That is, `-A` and `-B` no longer completely override `-C`.
|
||||||
|
Instead, they only partially override `-C`.
|
||||||
|
|
||||||
|
Feature enhancements:
|
||||||
|
|
||||||
|
* Added or improved file type filtering for Ada, DITA, Elixir, Fuchsia, Gentoo, GraphQL, Markdown, Raku, TypeScript, USD, V
|
||||||
|
* [FEATURE #1790](https://github.com/BurntSushi/ripgrep/issues/1790):
|
||||||
|
Add new `--stop-on-nonmatch` flag.
|
||||||
|
* [FEATURE #2195](https://github.com/BurntSushi/ripgrep/issues/2195):
|
||||||
|
When `extra-verbose` mode is enabled in zsh, show extra file type info.
|
||||||
|
* [FEATURE #2409](https://github.com/BurntSushi/ripgrep/pull/2409):
|
||||||
|
Added installation instructions for `winget`.
|
||||||
|
|
||||||
Bug fixes:
|
Bug fixes:
|
||||||
|
|
||||||
* [BUG #1891](https://github.com/BurntSushi/ripgrep/issues/1891):
|
* [BUG #1891](https://github.com/BurntSushi/ripgrep/issues/1891):
|
||||||
Fix bug when using `-w` with a regex that can match the empty string.
|
Fix bug when using `-w` with a regex that can match the empty string.
|
||||||
* [BUG #1911](https://github.com/BurntSushi/ripgrep/issues/1911):
|
* [BUG #1911](https://github.com/BurntSushi/ripgrep/issues/1911):
|
||||||
Disable mmap searching in all non-64-bit environments.
|
Disable mmap searching in all non-64-bit environments.
|
||||||
|
* [BUG #2108](https://github.com/BurntSushi/ripgrep/issues/2108):
|
||||||
|
Improve docs for `-r/--replace` syntax.
|
||||||
|
* [BUG #2198](https://github.com/BurntSushi/ripgrep/issues/2198):
|
||||||
|
Fix bug where `--no-ignore-dot` would not ignore `.rgignore`.
|
||||||
|
* [BUG #2288](https://github.com/BurntSushi/ripgrep/issues/2288):
|
||||||
|
`-A` and `-B` now only each partially override `-C`.
|
||||||
* [BUG #2236](https://github.com/BurntSushi/ripgrep/issues/2236):
|
* [BUG #2236](https://github.com/BurntSushi/ripgrep/issues/2236):
|
||||||
Fix gitignore parsing bug where a trailing `\/` resulted in an error.
|
Fix gitignore parsing bug where a trailing `\/` resulted in an error.
|
||||||
|
* [BUG #2243](https://github.com/BurntSushi/ripgrep/issues/2243):
|
||||||
|
Fix `--sort` flag for values other than `path`.
|
||||||
|
* [BUG #2480](https://github.com/BurntSushi/ripgrep/issues/2480):
|
||||||
|
Fix bug when using inline regex flags with `-e/--regexp`.
|
||||||
|
* [BUG #2523](https://github.com/BurntSushi/ripgrep/issues/2523):
|
||||||
|
Make executable searching take `.com` into account on Windows.
|
||||||
|
|
||||||
|
|
||||||
13.0.0 (2021-06-12)
|
13.0.0 (2021-06-12)
|
||||||
|
187
Cargo.lock
generated
187
Cargo.lock
generated
@@ -4,24 +4,13 @@ version = 3
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "aho-corasick"
|
name = "aho-corasick"
|
||||||
version = "0.7.20"
|
version = "1.0.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
|
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memchr",
|
"memchr",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "atty"
|
|
||||||
version = "0.2.14"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
|
||||||
dependencies = [
|
|
||||||
"hermit-abi",
|
|
||||||
"libc",
|
|
||||||
"winapi",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "base64"
|
name = "base64"
|
||||||
version = "0.20.0"
|
version = "0.20.0"
|
||||||
@@ -36,12 +25,11 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "bstr"
|
name = "bstr"
|
||||||
version = "1.1.0"
|
version = "1.6.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "b45ea9b00a7b3f2988e9a65ad3917e62123c38dba709b666506207be96d1790b"
|
checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"memchr",
|
"memchr",
|
||||||
"once_cell",
|
|
||||||
"regex-automata",
|
"regex-automata",
|
||||||
"serde",
|
"serde",
|
||||||
]
|
]
|
||||||
@@ -54,9 +42,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "cc"
|
name = "cc"
|
||||||
version = "1.0.78"
|
version = "1.0.79"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d"
|
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"jobserver",
|
"jobserver",
|
||||||
]
|
]
|
||||||
@@ -81,9 +69,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crossbeam-channel"
|
name = "crossbeam-channel"
|
||||||
version = "0.5.6"
|
version = "0.5.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
|
checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"crossbeam-utils",
|
"crossbeam-utils",
|
||||||
@@ -91,18 +79,18 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "crossbeam-utils"
|
name = "crossbeam-utils"
|
||||||
version = "0.8.14"
|
version = "0.8.16"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f"
|
checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "encoding_rs"
|
name = "encoding_rs"
|
||||||
version = "0.8.31"
|
version = "0.8.32"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b"
|
checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cfg-if",
|
"cfg-if",
|
||||||
"packed_simd_2",
|
"packed_simd_2",
|
||||||
@@ -123,21 +111,15 @@ version = "1.0.7"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "fs_extra"
|
|
||||||
version = "1.2.0"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "glob"
|
name = "glob"
|
||||||
version = "0.3.0"
|
version = "0.3.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "globset"
|
name = "globset"
|
||||||
version = "0.4.10"
|
version = "0.4.11"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"bstr",
|
"bstr",
|
||||||
@@ -152,7 +134,7 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "grep"
|
name = "grep"
|
||||||
version = "0.2.10"
|
version = "0.2.12"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"grep-cli",
|
"grep-cli",
|
||||||
"grep-matcher",
|
"grep-matcher",
|
||||||
@@ -166,9 +148,8 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "grep-cli"
|
name = "grep-cli"
|
||||||
version = "0.1.7"
|
version = "0.1.8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"atty",
|
|
||||||
"bstr",
|
"bstr",
|
||||||
"globset",
|
"globset",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
@@ -192,12 +173,13 @@ name = "grep-pcre2"
|
|||||||
version = "0.1.6"
|
version = "0.1.6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"grep-matcher",
|
"grep-matcher",
|
||||||
|
"log",
|
||||||
"pcre2",
|
"pcre2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "grep-printer"
|
name = "grep-printer"
|
||||||
version = "0.1.6"
|
version = "0.1.7"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"base64",
|
"base64",
|
||||||
"bstr",
|
"bstr",
|
||||||
@@ -217,9 +199,8 @@ dependencies = [
|
|||||||
"bstr",
|
"bstr",
|
||||||
"grep-matcher",
|
"grep-matcher",
|
||||||
"log",
|
"log",
|
||||||
"regex",
|
"regex-automata",
|
||||||
"regex-syntax",
|
"regex-syntax",
|
||||||
"thread_local",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
@@ -237,18 +218,9 @@ dependencies = [
|
|||||||
"regex",
|
"regex",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "hermit-abi"
|
|
||||||
version = "0.1.19"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
|
||||||
dependencies = [
|
|
||||||
"libc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ignore"
|
name = "ignore"
|
||||||
version = "0.4.19"
|
version = "0.4.20"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"crossbeam-channel",
|
"crossbeam-channel",
|
||||||
"globset",
|
"globset",
|
||||||
@@ -264,18 +236,17 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "itoa"
|
name = "itoa"
|
||||||
version = "1.0.5"
|
version = "1.0.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440"
|
checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jemalloc-sys"
|
name = "jemalloc-sys"
|
||||||
version = "0.5.2+5.3.0-patched"
|
version = "0.5.3+5.3.0-patched"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "134163979b6eed9564c98637b710b40979939ba351f59952708234ea11b5f3f8"
|
checksum = "f9bd5d616ea7ed58b571b2e209a65759664d7fb021a0819d7a790afc67e47ca1"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
"fs_extra",
|
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
@@ -291,9 +262,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "jobserver"
|
name = "jobserver"
|
||||||
version = "0.1.25"
|
version = "0.1.26"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "068b1ee6743e4d11fb9c6a1e6064b3693a1b600e7f5f5988047d98b3dc9fb90b"
|
checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
@@ -306,9 +277,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libc"
|
name = "libc"
|
||||||
version = "0.2.139"
|
version = "0.2.147"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"
|
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "libm"
|
name = "libm"
|
||||||
@@ -318,12 +289,9 @@ checksum = "7fc7aa29613bd6a620df431842069224d8bc9011086b1db4c0e0cd47fa03ec9a"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "log"
|
name = "log"
|
||||||
version = "0.4.17"
|
version = "0.4.19"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
|
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
|
||||||
dependencies = [
|
|
||||||
"cfg-if",
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memchr"
|
name = "memchr"
|
||||||
@@ -333,18 +301,18 @@ checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "memmap2"
|
name = "memmap2"
|
||||||
version = "0.5.8"
|
version = "0.5.10"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4b182332558b18d807c4ce1ca8ca983b34c3ee32765e47b3f0f69b90355cc1dc"
|
checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "once_cell"
|
name = "once_cell"
|
||||||
version = "1.17.0"
|
version = "1.18.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66"
|
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "packed_simd_2"
|
name = "packed_simd_2"
|
||||||
@@ -358,9 +326,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pcre2"
|
name = "pcre2"
|
||||||
version = "0.2.3"
|
version = "0.2.4"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "85b30f2f69903b439dd9dc9e824119b82a55bf113b29af8d70948a03c1b11ab1"
|
checksum = "486aca7e74edb8cab09a48d461177f450a5cca3b55e61d139f7552190e2bbcf5"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc",
|
"libc",
|
||||||
"log",
|
"log",
|
||||||
@@ -370,9 +338,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pcre2-sys"
|
name = "pcre2-sys"
|
||||||
version = "0.2.5"
|
version = "0.2.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "dec30e5e9ec37eb8fbf1dea5989bc957fd3df56fbee5061aa7b7a99dbb37b722"
|
checksum = "ae234f441970dbd52d4e29bee70f3b56ca83040081cb2b55b7df772b16e0b06e"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cc",
|
"cc",
|
||||||
"libc",
|
"libc",
|
||||||
@@ -381,50 +349,56 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pkg-config"
|
name = "pkg-config"
|
||||||
version = "0.3.26"
|
version = "0.3.27"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
|
checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "proc-macro2"
|
name = "proc-macro2"
|
||||||
version = "1.0.49"
|
version = "1.0.63"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "57a8eca9f9c4ffde41714334dee777596264c7825420f521abc92b5b5deb63a5"
|
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"unicode-ident",
|
"unicode-ident",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "quote"
|
name = "quote"
|
||||||
version = "1.0.23"
|
version = "1.0.29"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b"
|
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex"
|
name = "regex"
|
||||||
version = "1.7.0"
|
version = "1.9.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a"
|
checksum = "89089e897c013b3deb627116ae56a6955a72b8bed395c9526af31c9fe528b484"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-automata",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-automata"
|
||||||
|
version = "0.3.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "fa250384981ea14565685dea16a9ccc4d1c541a13f82b9c168572264d1df8c56"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"memchr",
|
"memchr",
|
||||||
"regex-syntax",
|
"regex-syntax",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "regex-automata"
|
|
||||||
version = "0.1.10"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex-syntax"
|
name = "regex-syntax"
|
||||||
version = "0.6.28"
|
version = "0.7.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
|
checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ripgrep"
|
name = "ripgrep"
|
||||||
@@ -437,7 +411,6 @@ dependencies = [
|
|||||||
"jemallocator",
|
"jemallocator",
|
||||||
"lazy_static",
|
"lazy_static",
|
||||||
"log",
|
"log",
|
||||||
"regex",
|
|
||||||
"serde",
|
"serde",
|
||||||
"serde_derive",
|
"serde_derive",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
@@ -447,9 +420,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "ryu"
|
name = "ryu"
|
||||||
version = "1.0.12"
|
version = "1.0.14"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde"
|
checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "same-file"
|
name = "same-file"
|
||||||
@@ -462,18 +435,18 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde"
|
name = "serde"
|
||||||
version = "1.0.152"
|
version = "1.0.166"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb"
|
checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"serde_derive",
|
"serde_derive",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_derive"
|
name = "serde_derive"
|
||||||
version = "1.0.152"
|
version = "1.0.166"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e"
|
checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@@ -482,9 +455,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "serde_json"
|
name = "serde_json"
|
||||||
version = "1.0.91"
|
version = "1.0.100"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883"
|
checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"itoa",
|
"itoa",
|
||||||
"ryu",
|
"ryu",
|
||||||
@@ -499,9 +472,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "syn"
|
name = "syn"
|
||||||
version = "1.0.107"
|
version = "2.0.23"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5"
|
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@@ -510,9 +483,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "termcolor"
|
name = "termcolor"
|
||||||
version = "1.1.3"
|
version = "1.2.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
|
checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"winapi-util",
|
"winapi-util",
|
||||||
]
|
]
|
||||||
@@ -528,18 +501,19 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "thread_local"
|
name = "thread_local"
|
||||||
version = "1.1.4"
|
version = "1.1.7"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180"
|
checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"cfg-if",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-ident"
|
name = "unicode-ident"
|
||||||
version = "1.0.6"
|
version = "1.0.10"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
|
checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "unicode-width"
|
name = "unicode-width"
|
||||||
@@ -549,12 +523,11 @@ checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "walkdir"
|
name = "walkdir"
|
||||||
version = "2.3.2"
|
version = "2.3.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
|
checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"same-file",
|
"same-file",
|
||||||
"winapi",
|
|
||||||
"winapi-util",
|
"winapi-util",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
16
Cargo.toml
16
Cargo.toml
@@ -13,11 +13,18 @@ repository = "https://github.com/BurntSushi/ripgrep"
|
|||||||
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||||
categories = ["command-line-utilities", "text-processing"]
|
categories = ["command-line-utilities", "text-processing"]
|
||||||
license = "Unlicense OR MIT"
|
license = "Unlicense OR MIT"
|
||||||
exclude = ["HomebrewFormula"]
|
exclude = [
|
||||||
|
"HomebrewFormula",
|
||||||
|
"/.github/",
|
||||||
|
"/ci/",
|
||||||
|
"/pkg/",
|
||||||
|
"/benchsuite/",
|
||||||
|
"/scripts/",
|
||||||
|
]
|
||||||
build = "build.rs"
|
build = "build.rs"
|
||||||
autotests = false
|
autotests = false
|
||||||
edition = "2018"
|
edition = "2018"
|
||||||
rust-version = "1.65"
|
rust-version = "1.70"
|
||||||
|
|
||||||
[[bin]]
|
[[bin]]
|
||||||
bench = false
|
bench = false
|
||||||
@@ -42,12 +49,11 @@ members = [
|
|||||||
]
|
]
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
bstr = "1.1.0"
|
bstr = "1.6.0"
|
||||||
grep = { version = "0.2.8", path = "crates/grep" }
|
grep = { version = "0.2.12", path = "crates/grep" }
|
||||||
ignore = { version = "0.4.19", path = "crates/ignore" }
|
ignore = { version = "0.4.19", path = "crates/ignore" }
|
||||||
lazy_static = "1.1.0"
|
lazy_static = "1.1.0"
|
||||||
log = "0.4.5"
|
log = "0.4.5"
|
||||||
regex = "1.3.5"
|
|
||||||
serde_json = "1.0.23"
|
serde_json = "1.0.23"
|
||||||
termcolor = "1.1.0"
|
termcolor = "1.1.0"
|
||||||
|
|
||||||
|
@@ -6,6 +6,7 @@ image = "burntsushi/cross:i686-unknown-linux-gnu"
|
|||||||
|
|
||||||
[target.mips64-unknown-linux-gnuabi64]
|
[target.mips64-unknown-linux-gnuabi64]
|
||||||
image = "burntsushi/cross:mips64-unknown-linux-gnuabi64"
|
image = "burntsushi/cross:mips64-unknown-linux-gnuabi64"
|
||||||
|
build-std = true
|
||||||
|
|
||||||
[target.arm-unknown-linux-gnueabihf]
|
[target.arm-unknown-linux-gnueabihf]
|
||||||
image = "burntsushi/cross:arm-unknown-linux-gnueabihf"
|
image = "burntsushi/cross:arm-unknown-linux-gnueabihf"
|
||||||
|
7
GUIDE.md
7
GUIDE.md
@@ -567,12 +567,15 @@ $ cat $HOME/.ripgreprc
|
|||||||
--type-add
|
--type-add
|
||||||
web:*.{html,css,js}*
|
web:*.{html,css,js}*
|
||||||
|
|
||||||
|
# Search hidden files / directories (e.g. dotfiles) by default
|
||||||
|
--hidden
|
||||||
|
|
||||||
# Using glob patterns to include/exclude files or folders
|
# Using glob patterns to include/exclude files or folders
|
||||||
--glob=!git/*
|
--glob=!.git/*
|
||||||
|
|
||||||
# or
|
# or
|
||||||
--glob
|
--glob
|
||||||
!git/*
|
!.git/*
|
||||||
|
|
||||||
# Set the colors.
|
# Set the colors.
|
||||||
--colors=line:none
|
--colors=line:none
|
||||||
|
55
README.md
55
README.md
@@ -228,17 +228,25 @@ If you're a **Windows Scoop** user, then you can install ripgrep from the
|
|||||||
$ scoop install ripgrep
|
$ scoop install ripgrep
|
||||||
```
|
```
|
||||||
|
|
||||||
|
If you're a **Windows Winget** user, then you can install ripgrep from the
|
||||||
|
[winget-pkgs](https://github.com/microsoft/winget-pkgs/tree/master/manifests/b/BurntSushi/ripgrep)
|
||||||
|
repository:
|
||||||
|
|
||||||
|
```
|
||||||
|
$ winget install BurntSushi.ripgrep.MSVC
|
||||||
|
```
|
||||||
|
|
||||||
If you're an **Arch Linux** user, then you can install ripgrep from the official repos:
|
If you're an **Arch Linux** user, then you can install ripgrep from the official repos:
|
||||||
|
|
||||||
```
|
```
|
||||||
$ pacman -S ripgrep
|
$ sudo pacman -S ripgrep
|
||||||
```
|
```
|
||||||
|
|
||||||
If you're a **Gentoo** user, you can install ripgrep from the
|
If you're a **Gentoo** user, you can install ripgrep from the
|
||||||
[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep):
|
[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep):
|
||||||
|
|
||||||
```
|
```
|
||||||
$ emerge sys-apps/ripgrep
|
$ sudo emerge sys-apps/ripgrep
|
||||||
```
|
```
|
||||||
|
|
||||||
If you're a **Fedora** user, you can install ripgrep from official
|
If you're a **Fedora** user, you can install ripgrep from official
|
||||||
@@ -259,6 +267,7 @@ If you're a **RHEL/CentOS 7/8** user, you can install ripgrep from
|
|||||||
[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
|
[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
|
||||||
|
|
||||||
```
|
```
|
||||||
|
$ sudo yum install -y yum-utils
|
||||||
$ sudo yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo
|
$ sudo yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo
|
||||||
$ sudo yum install ripgrep
|
$ sudo yum install ripgrep
|
||||||
```
|
```
|
||||||
@@ -268,14 +277,13 @@ If you're a **Nix** user, you can install ripgrep from
|
|||||||
|
|
||||||
```
|
```
|
||||||
$ nix-env --install ripgrep
|
$ nix-env --install ripgrep
|
||||||
$ # (Or using the attribute name, which is also ripgrep.)
|
|
||||||
```
|
```
|
||||||
|
|
||||||
If you're a **Guix** user, you can install ripgrep from the official
|
If you're a **Guix** user, you can install ripgrep from the official
|
||||||
package collection:
|
package collection:
|
||||||
|
|
||||||
```
|
```
|
||||||
$ guix install ripgrep
|
$ sudo guix install ripgrep
|
||||||
```
|
```
|
||||||
|
|
||||||
If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**),
|
If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**),
|
||||||
@@ -287,8 +295,10 @@ $ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/13.0.0/ripgre
|
|||||||
$ sudo dpkg -i ripgrep_13.0.0_amd64.deb
|
$ sudo dpkg -i ripgrep_13.0.0_amd64.deb
|
||||||
```
|
```
|
||||||
|
|
||||||
If you run Debian Buster (currently Debian stable) or Debian sid, ripgrep is
|
If you run Debian stable, ripgrep is [officially maintained by
|
||||||
[officially maintained by Debian](https://tracker.debian.org/pkg/rust-ripgrep).
|
Debian](https://tracker.debian.org/pkg/rust-ripgrep), although its version may
|
||||||
|
be older than the `deb` package available in the previous step.
|
||||||
|
|
||||||
```
|
```
|
||||||
$ sudo apt-get install ripgrep
|
$ sudo apt-get install ripgrep
|
||||||
```
|
```
|
||||||
@@ -306,11 +316,18 @@ seem to work right and generate a number of very strange bug reports that I
|
|||||||
don't know how to fix and don't have the time to fix. Therefore, it is no
|
don't know how to fix and don't have the time to fix. Therefore, it is no
|
||||||
longer a recommended installation option.)
|
longer a recommended installation option.)
|
||||||
|
|
||||||
|
If you're an **ALT** user, you can install ripgrep from the
|
||||||
|
[official repo](https://packages.altlinux.org/en/search?name=ripgrep):
|
||||||
|
|
||||||
|
```
|
||||||
|
$ sudo apt-get install ripgrep
|
||||||
|
```
|
||||||
|
|
||||||
If you're a **FreeBSD** user, then you can install ripgrep from the
|
If you're a **FreeBSD** user, then you can install ripgrep from the
|
||||||
[official ports](https://www.freshports.org/textproc/ripgrep/):
|
[official ports](https://www.freshports.org/textproc/ripgrep/):
|
||||||
|
|
||||||
```
|
```
|
||||||
# pkg install ripgrep
|
$ sudo pkg install ripgrep
|
||||||
```
|
```
|
||||||
|
|
||||||
If you're an **OpenBSD** user, then you can install ripgrep from the
|
If you're an **OpenBSD** user, then you can install ripgrep from the
|
||||||
@@ -324,26 +341,26 @@ If you're a **NetBSD** user, then you can install ripgrep from
|
|||||||
[pkgsrc](https://pkgsrc.se/textproc/ripgrep):
|
[pkgsrc](https://pkgsrc.se/textproc/ripgrep):
|
||||||
|
|
||||||
```
|
```
|
||||||
# pkgin install ripgrep
|
$ sudo pkgin install ripgrep
|
||||||
```
|
```
|
||||||
|
|
||||||
If you're a **Haiku x86_64** user, then you can install ripgrep from the
|
If you're a **Haiku x86_64** user, then you can install ripgrep from the
|
||||||
[official ports](https://github.com/haikuports/haikuports/tree/master/sys-apps/ripgrep):
|
[official ports](https://github.com/haikuports/haikuports/tree/master/sys-apps/ripgrep):
|
||||||
|
|
||||||
```
|
```
|
||||||
$ pkgman install ripgrep
|
$ sudo pkgman install ripgrep
|
||||||
```
|
```
|
||||||
|
|
||||||
If you're a **Haiku x86_gcc2** user, then you can install ripgrep from the
|
If you're a **Haiku x86_gcc2** user, then you can install ripgrep from the
|
||||||
same port as Haiku x86_64 using the x86 secondary architecture build:
|
same port as Haiku x86_64 using the x86 secondary architecture build:
|
||||||
|
|
||||||
```
|
```
|
||||||
$ pkgman install ripgrep_x86
|
$ sudo pkgman install ripgrep_x86
|
||||||
```
|
```
|
||||||
|
|
||||||
If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
|
If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
|
||||||
|
|
||||||
* Note that the minimum supported version of Rust for ripgrep is **1.34.0**,
|
* Note that the minimum supported version of Rust for ripgrep is **1.70.0**,
|
||||||
although ripgrep may work with older versions.
|
although ripgrep may work with older versions.
|
||||||
* Note that the binary may be bigger than expected because it contains debug
|
* Note that the binary may be bigger than expected because it contains debug
|
||||||
symbols. This is intentional. To remove debug symbols and therefore reduce
|
symbols. This is intentional. To remove debug symbols and therefore reduce
|
||||||
@@ -358,7 +375,7 @@ $ cargo install ripgrep
|
|||||||
|
|
||||||
ripgrep is written in Rust, so you'll need to grab a
|
ripgrep is written in Rust, so you'll need to grab a
|
||||||
[Rust installation](https://www.rust-lang.org/) in order to compile it.
|
[Rust installation](https://www.rust-lang.org/) in order to compile it.
|
||||||
ripgrep compiles with Rust 1.65.0 (stable) or newer. In general, ripgrep tracks
|
ripgrep compiles with Rust 1.70.0 (stable) or newer. In general, ripgrep tracks
|
||||||
the latest stable release of the Rust compiler.
|
the latest stable release of the Rust compiler.
|
||||||
|
|
||||||
To build ripgrep:
|
To build ripgrep:
|
||||||
@@ -430,12 +447,20 @@ $ cargo test --all
|
|||||||
from the repository root.
|
from the repository root.
|
||||||
|
|
||||||
|
|
||||||
|
### Related tools
|
||||||
|
|
||||||
|
* [delta](https://github.com/dandavison/delta) is a syntax highlighting
|
||||||
|
pager that supports the `rg --json` output format. So all you need to do to
|
||||||
|
make it work is `rg --json pattern | delta`. See [delta's manual section on
|
||||||
|
grep](https://dandavison.github.io/delta/grep.html) for more details.
|
||||||
|
|
||||||
|
|
||||||
### Vulnerability reporting
|
### Vulnerability reporting
|
||||||
|
|
||||||
For reporting a security vulnerability, please
|
For reporting a security vulnerability, please
|
||||||
[contact Andrew Gallant](https://blog.burntsushi.net/about/),
|
[contact Andrew Gallant](https://blog.burntsushi.net/about/).
|
||||||
which has my email address and PGP public key if you wish to send an encrypted
|
The contact page has my email address and PGP public key if you wish to send an
|
||||||
message.
|
encrypted message.
|
||||||
|
|
||||||
|
|
||||||
### Translations
|
### Translations
|
||||||
|
28
build.rs
28
build.rs
@@ -48,6 +48,34 @@ fn main() {
|
|||||||
if let Some(rev) = git_revision_hash() {
|
if let Some(rev) = git_revision_hash() {
|
||||||
println!("cargo:rustc-env=RIPGREP_BUILD_GIT_HASH={}", rev);
|
println!("cargo:rustc-env=RIPGREP_BUILD_GIT_HASH={}", rev);
|
||||||
}
|
}
|
||||||
|
// Embed a Windows manifest and set some linker options. The main reason
|
||||||
|
// for this is to enable long path support on Windows. This still, I
|
||||||
|
// believe, requires enabling long path support in the registry. But if
|
||||||
|
// that's enabled, then this will let ripgrep use C:\... style paths that
|
||||||
|
// are longer than 260 characters.
|
||||||
|
set_windows_exe_options();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn set_windows_exe_options() {
|
||||||
|
static MANIFEST: &str = "pkg/windows/Manifest.xml";
|
||||||
|
|
||||||
|
let Ok(target_os) = env::var("CARGO_CFG_TARGET_OS") else { return };
|
||||||
|
let Ok(target_env) = env::var("CARGO_CFG_TARGET_ENV") else { return };
|
||||||
|
if !(target_os == "windows" && target_env == "msvc") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let Ok(mut manifest) = env::current_dir() else { return };
|
||||||
|
manifest.push(MANIFEST);
|
||||||
|
let Some(manifest) = manifest.to_str() else { return };
|
||||||
|
|
||||||
|
println!("cargo:rerun-if-changed={}", MANIFEST);
|
||||||
|
// Embed the Windows application manifest file.
|
||||||
|
println!("cargo:rustc-link-arg-bin=rg=/MANIFEST:EMBED");
|
||||||
|
println!("cargo:rustc-link-arg-bin=rg=/MANIFESTINPUT:{manifest}");
|
||||||
|
// Turn linker warnings into errors. Helps debugging, otherwise the
|
||||||
|
// warnings get squashed (I believe).
|
||||||
|
println!("cargo:rustc-link-arg-bin=rg=/WX");
|
||||||
}
|
}
|
||||||
|
|
||||||
fn git_revision_hash() -> Option<String> {
|
fn git_revision_hash() -> Option<String> {
|
||||||
|
11
complete/_rg
11
complete/_rg
@@ -30,7 +30,7 @@ _rg() {
|
|||||||
[[ $_RG_COMPLETE_LIST_ARGS == (1|t*|y*) ]] ||
|
[[ $_RG_COMPLETE_LIST_ARGS == (1|t*|y*) ]] ||
|
||||||
# (--[imnp]* => --ignore*, --messages, --no-*, --pcre2-unicode)
|
# (--[imnp]* => --ignore*, --messages, --no-*, --pcre2-unicode)
|
||||||
[[ $PREFIX$SUFFIX == --[imnp]* ]] ||
|
[[ $PREFIX$SUFFIX == --[imnp]* ]] ||
|
||||||
zstyle -t ":complete:$curcontext:*" complete-all
|
zstyle -t ":completion:${curcontext}:" complete-all
|
||||||
then
|
then
|
||||||
no=
|
no=
|
||||||
fi
|
fi
|
||||||
@@ -319,6 +319,7 @@ _rg() {
|
|||||||
'(-q --quiet)'{-q,--quiet}'[suppress normal output]'
|
'(-q --quiet)'{-q,--quiet}'[suppress normal output]'
|
||||||
'--regex-size-limit=[specify upper size limit of compiled regex]:regex size (bytes)'
|
'--regex-size-limit=[specify upper size limit of compiled regex]:regex size (bytes)'
|
||||||
'*'{-u,--unrestricted}'[reduce level of "smart" searching]'
|
'*'{-u,--unrestricted}'[reduce level of "smart" searching]'
|
||||||
|
'--stop-on-nonmatch[stop on first non-matching line after a matching one]'
|
||||||
|
|
||||||
+ operand # Operands
|
+ operand # Operands
|
||||||
'(--files --type-list file regexp)1: :_guard "^-*" pattern'
|
'(--files --type-list file regexp)1: :_guard "^-*" pattern'
|
||||||
@@ -432,9 +433,13 @@ _rg_types() {
|
|||||||
local -a expl
|
local -a expl
|
||||||
local -aU _types
|
local -aU _types
|
||||||
|
|
||||||
_types=( ${(@)${(f)"$( _call_program types rg --type-list )"}%%:*} )
|
_types=( ${(@)${(f)"$( _call_program types $words[1] --type-list )"}//:[[:space:]]##/:} )
|
||||||
|
|
||||||
_wanted types expl 'file type' compadd -a "$@" - _types
|
if zstyle -t ":completion:${curcontext}:types" extra-verbose; then
|
||||||
|
_describe -t types 'file type' _types
|
||||||
|
else
|
||||||
|
_wanted types expl 'file type' compadd "$@" - ${(@)_types%%:*}
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
_rg "$@"
|
_rg "$@"
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "grep-cli"
|
name = "grep-cli"
|
||||||
version = "0.1.7" #:version
|
version = "0.1.8" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
Utilities for search oriented command line applications.
|
Utilities for search oriented command line applications.
|
||||||
@@ -14,8 +14,7 @@ license = "Unlicense OR MIT"
|
|||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
atty = "0.2.11"
|
bstr = "1.6.0"
|
||||||
bstr = "1.1.0"
|
|
||||||
globset = { version = "0.4.10", path = "../globset" }
|
globset = { version = "0.4.10", path = "../globset" }
|
||||||
lazy_static = "1.1.0"
|
lazy_static = "1.1.0"
|
||||||
log = "0.4.5"
|
log = "0.4.5"
|
||||||
|
@@ -18,7 +18,7 @@ pub struct DecompressionMatcherBuilder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// A representation of a single command for decompressing data
|
/// A representation of a single command for decompressing data
|
||||||
/// out-of-proccess.
|
/// out-of-process.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
struct DecompressionCommand {
|
struct DecompressionCommand {
|
||||||
/// The glob that matches this command.
|
/// The glob that matches this command.
|
||||||
@@ -132,7 +132,7 @@ impl DecompressionMatcherBuilder {
|
|||||||
A: AsRef<OsStr>,
|
A: AsRef<OsStr>,
|
||||||
{
|
{
|
||||||
let glob = glob.to_string();
|
let glob = glob.to_string();
|
||||||
let bin = resolve_binary(Path::new(program.as_ref()))?;
|
let bin = try_resolve_binary(Path::new(program.as_ref()))?;
|
||||||
let args =
|
let args =
|
||||||
args.into_iter().map(|a| a.as_ref().to_os_string()).collect();
|
args.into_iter().map(|a| a.as_ref().to_os_string()).collect();
|
||||||
self.commands.push(DecompressionCommand { glob, bin, args });
|
self.commands.push(DecompressionCommand { glob, bin, args });
|
||||||
@@ -421,6 +421,34 @@ impl io::Read for DecompressionReader {
|
|||||||
/// On non-Windows, this is a no-op.
|
/// On non-Windows, this is a no-op.
|
||||||
pub fn resolve_binary<P: AsRef<Path>>(
|
pub fn resolve_binary<P: AsRef<Path>>(
|
||||||
prog: P,
|
prog: P,
|
||||||
|
) -> Result<PathBuf, CommandError> {
|
||||||
|
if !cfg!(windows) {
|
||||||
|
return Ok(prog.as_ref().to_path_buf());
|
||||||
|
}
|
||||||
|
try_resolve_binary(prog)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resolves a path to a program to a path by searching for the program in
|
||||||
|
/// `PATH`.
|
||||||
|
///
|
||||||
|
/// If the program could not be resolved, then an error is returned.
|
||||||
|
///
|
||||||
|
/// The purpose of doing this instead of passing the path to the program
|
||||||
|
/// directly to Command::new is that Command::new will hand relative paths
|
||||||
|
/// to CreateProcess on Windows, which will implicitly search the current
|
||||||
|
/// working directory for the executable. This could be undesirable for
|
||||||
|
/// security reasons. e.g., running ripgrep with the -z/--search-zip flag on an
|
||||||
|
/// untrusted directory tree could result in arbitrary programs executing on
|
||||||
|
/// Windows.
|
||||||
|
///
|
||||||
|
/// Note that this could still return a relative path if PATH contains a
|
||||||
|
/// relative path. We permit this since it is assumed that the user has set
|
||||||
|
/// this explicitly, and thus, desires this behavior.
|
||||||
|
///
|
||||||
|
/// If `check_exists` is false or the path is already an absolute path this
|
||||||
|
/// will return immediately.
|
||||||
|
fn try_resolve_binary<P: AsRef<Path>>(
|
||||||
|
prog: P,
|
||||||
) -> Result<PathBuf, CommandError> {
|
) -> Result<PathBuf, CommandError> {
|
||||||
use std::env;
|
use std::env;
|
||||||
|
|
||||||
@@ -433,7 +461,7 @@ pub fn resolve_binary<P: AsRef<Path>>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
let prog = prog.as_ref();
|
let prog = prog.as_ref();
|
||||||
if !cfg!(windows) || prog.is_absolute() {
|
if prog.is_absolute() {
|
||||||
return Ok(prog.to_path_buf());
|
return Ok(prog.to_path_buf());
|
||||||
}
|
}
|
||||||
let syspaths = match env::var_os("PATH") {
|
let syspaths = match env::var_os("PATH") {
|
||||||
@@ -455,9 +483,11 @@ pub fn resolve_binary<P: AsRef<Path>>(
|
|||||||
return Ok(abs_prog.to_path_buf());
|
return Ok(abs_prog.to_path_buf());
|
||||||
}
|
}
|
||||||
if abs_prog.extension().is_none() {
|
if abs_prog.extension().is_none() {
|
||||||
let abs_prog = abs_prog.with_extension("exe");
|
for extension in ["com", "exe"] {
|
||||||
if is_exe(&abs_prog) {
|
let abs_prog = abs_prog.with_extension(extension);
|
||||||
return Ok(abs_prog.to_path_buf());
|
if is_exe(&abs_prog) {
|
||||||
|
return Ok(abs_prog.to_path_buf());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -165,6 +165,8 @@ mod pattern;
|
|||||||
mod process;
|
mod process;
|
||||||
mod wtr;
|
mod wtr;
|
||||||
|
|
||||||
|
use std::io::IsTerminal;
|
||||||
|
|
||||||
pub use crate::decompress::{
|
pub use crate::decompress::{
|
||||||
resolve_binary, DecompressionMatcher, DecompressionMatcherBuilder,
|
resolve_binary, DecompressionMatcher, DecompressionMatcherBuilder,
|
||||||
DecompressionReader, DecompressionReaderBuilder,
|
DecompressionReader, DecompressionReaderBuilder,
|
||||||
@@ -215,7 +217,7 @@ pub fn is_readable_stdin() -> bool {
|
|||||||
/// Returns true if and only if stdin is believed to be connected to a tty
|
/// Returns true if and only if stdin is believed to be connected to a tty
|
||||||
/// or a console.
|
/// or a console.
|
||||||
pub fn is_tty_stdin() -> bool {
|
pub fn is_tty_stdin() -> bool {
|
||||||
atty::is(atty::Stream::Stdin)
|
std::io::stdin().is_terminal()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if and only if stdout is believed to be connected to a tty
|
/// Returns true if and only if stdout is believed to be connected to a tty
|
||||||
@@ -227,11 +229,11 @@ pub fn is_tty_stdin() -> bool {
|
|||||||
/// implementations of `ls` will often show one item per line when stdout is
|
/// implementations of `ls` will often show one item per line when stdout is
|
||||||
/// redirected, but will condensed output when printing to a tty.
|
/// redirected, but will condensed output when printing to a tty.
|
||||||
pub fn is_tty_stdout() -> bool {
|
pub fn is_tty_stdout() -> bool {
|
||||||
atty::is(atty::Stream::Stdout)
|
std::io::stdout().is_terminal()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if and only if stderr is believed to be connected to a tty
|
/// Returns true if and only if stderr is believed to be connected to a tty
|
||||||
/// or a console.
|
/// or a console.
|
||||||
pub fn is_tty_stderr() -> bool {
|
pub fn is_tty_stderr() -> bool {
|
||||||
atty::is(atty::Stream::Stderr)
|
std::io::stderr().is_terminal()
|
||||||
}
|
}
|
||||||
|
@@ -632,6 +632,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
|
|||||||
flag_sort(&mut args);
|
flag_sort(&mut args);
|
||||||
flag_sortr(&mut args);
|
flag_sortr(&mut args);
|
||||||
flag_stats(&mut args);
|
flag_stats(&mut args);
|
||||||
|
flag_stop_on_nonmatch(&mut args);
|
||||||
flag_text(&mut args);
|
flag_text(&mut args);
|
||||||
flag_threads(&mut args);
|
flag_threads(&mut args);
|
||||||
flag_trim(&mut args);
|
flag_trim(&mut args);
|
||||||
@@ -698,7 +699,7 @@ fn flag_after_context(args: &mut Vec<RGArg>) {
|
|||||||
"\
|
"\
|
||||||
Show NUM lines after each match.
|
Show NUM lines after each match.
|
||||||
|
|
||||||
This overrides the --context and --passthru flags.
|
This overrides the --passthru flag and partially overrides --context.
|
||||||
"
|
"
|
||||||
);
|
);
|
||||||
let arg = RGArg::flag("after-context", "NUM")
|
let arg = RGArg::flag("after-context", "NUM")
|
||||||
@@ -706,8 +707,7 @@ This overrides the --context and --passthru flags.
|
|||||||
.help(SHORT)
|
.help(SHORT)
|
||||||
.long_help(LONG)
|
.long_help(LONG)
|
||||||
.number()
|
.number()
|
||||||
.overrides("passthru")
|
.overrides("passthru");
|
||||||
.overrides("context");
|
|
||||||
args.push(arg);
|
args.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -768,7 +768,7 @@ fn flag_before_context(args: &mut Vec<RGArg>) {
|
|||||||
"\
|
"\
|
||||||
Show NUM lines before each match.
|
Show NUM lines before each match.
|
||||||
|
|
||||||
This overrides the --context and --passthru flags.
|
This overrides the --passthru flag and partially overrides --context.
|
||||||
"
|
"
|
||||||
);
|
);
|
||||||
let arg = RGArg::flag("before-context", "NUM")
|
let arg = RGArg::flag("before-context", "NUM")
|
||||||
@@ -776,8 +776,7 @@ This overrides the --context and --passthru flags.
|
|||||||
.help(SHORT)
|
.help(SHORT)
|
||||||
.long_help(LONG)
|
.long_help(LONG)
|
||||||
.number()
|
.number()
|
||||||
.overrides("passthru")
|
.overrides("passthru");
|
||||||
.overrides("context");
|
|
||||||
args.push(arg);
|
args.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1009,8 +1008,7 @@ fn flag_context(args: &mut Vec<RGArg>) {
|
|||||||
Show NUM lines before and after each match. This is equivalent to providing
|
Show NUM lines before and after each match. This is equivalent to providing
|
||||||
both the -B/--before-context and -A/--after-context flags with the same value.
|
both the -B/--before-context and -A/--after-context flags with the same value.
|
||||||
|
|
||||||
This overrides both the -B/--before-context and -A/--after-context flags,
|
This overrides the --passthru flag.
|
||||||
in addition to the --passthru flag.
|
|
||||||
"
|
"
|
||||||
);
|
);
|
||||||
let arg = RGArg::flag("context", "NUM")
|
let arg = RGArg::flag("context", "NUM")
|
||||||
@@ -1018,9 +1016,7 @@ in addition to the --passthru flag.
|
|||||||
.help(SHORT)
|
.help(SHORT)
|
||||||
.long_help(LONG)
|
.long_help(LONG)
|
||||||
.number()
|
.number()
|
||||||
.overrides("passthru")
|
.overrides("passthru");
|
||||||
.overrides("before-context")
|
|
||||||
.overrides("after-context");
|
|
||||||
args.push(arg);
|
args.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1711,6 +1707,8 @@ fn flag_line_number(args: &mut Vec<RGArg>) {
|
|||||||
"\
|
"\
|
||||||
Show line numbers (1-based). This is enabled by default when searching in a
|
Show line numbers (1-based). This is enabled by default when searching in a
|
||||||
terminal.
|
terminal.
|
||||||
|
|
||||||
|
This flag overrides --no-line-number.
|
||||||
"
|
"
|
||||||
);
|
);
|
||||||
let arg = RGArg::switch("line-number")
|
let arg = RGArg::switch("line-number")
|
||||||
@@ -1725,6 +1723,8 @@ terminal.
|
|||||||
"\
|
"\
|
||||||
Suppress line numbers. This is enabled by default when not searching in a
|
Suppress line numbers. This is enabled by default when not searching in a
|
||||||
terminal.
|
terminal.
|
||||||
|
|
||||||
|
This flag overrides --line-number.
|
||||||
"
|
"
|
||||||
);
|
);
|
||||||
let arg = RGArg::switch("no-line-number")
|
let arg = RGArg::switch("no-line-number")
|
||||||
@@ -1927,13 +1927,16 @@ Nevertheless, if you only care about matches spanning at most one line, then it
|
|||||||
is always better to disable multiline mode.
|
is always better to disable multiline mode.
|
||||||
|
|
||||||
This flag can be disabled with --no-multiline.
|
This flag can be disabled with --no-multiline.
|
||||||
|
|
||||||
|
This overrides the --stop-on-nonmatch flag.
|
||||||
"
|
"
|
||||||
);
|
);
|
||||||
let arg = RGArg::switch("multiline")
|
let arg = RGArg::switch("multiline")
|
||||||
.short("U")
|
.short("U")
|
||||||
.help(SHORT)
|
.help(SHORT)
|
||||||
.long_help(LONG)
|
.long_help(LONG)
|
||||||
.overrides("no-multiline");
|
.overrides("no-multiline")
|
||||||
|
.overrides("stop-on-nonmatch");
|
||||||
args.push(arg);
|
args.push(arg);
|
||||||
|
|
||||||
let arg = RGArg::switch("no-multiline").hidden().overrides("multiline");
|
let arg = RGArg::switch("no-multiline").hidden().overrides("multiline");
|
||||||
@@ -2583,8 +2586,8 @@ Do not print anything to stdout. If a match is found in a file, then ripgrep
|
|||||||
will stop searching. This is useful when ripgrep is used only for its exit
|
will stop searching. This is useful when ripgrep is used only for its exit
|
||||||
code (which will be an error if no matches are found).
|
code (which will be an error if no matches are found).
|
||||||
|
|
||||||
When --files is used, then ripgrep will stop finding files after finding the
|
When --files is used, ripgrep will stop finding files after finding the
|
||||||
first file that matches all ignore rules.
|
first file that does not match any ignore rules.
|
||||||
"
|
"
|
||||||
);
|
);
|
||||||
let arg = RGArg::switch("quiet").short("q").help(SHORT).long_help(LONG);
|
let arg = RGArg::switch("quiet").short("q").help(SHORT).long_help(LONG);
|
||||||
@@ -2647,6 +2650,17 @@ replacement string. Capture group indices are numbered based on the position of
|
|||||||
the opening parenthesis of the group, where the leftmost such group is $1. The
|
the opening parenthesis of the group, where the leftmost such group is $1. The
|
||||||
special $0 group corresponds to the entire match.
|
special $0 group corresponds to the entire match.
|
||||||
|
|
||||||
|
The name of a group is formed by taking the longest string of letters, numbers
|
||||||
|
and underscores (i.e. [_0-9A-Za-z]) after the $. For example, $1a will be
|
||||||
|
replaced with the group named '1a', not the group at index 1. If the group's
|
||||||
|
name contains characters that aren't letters, numbers or underscores, or you
|
||||||
|
want to immediately follow the group with another string, the name should be
|
||||||
|
put inside braces. For example, ${1}a will take the content of the group at
|
||||||
|
index 1 and append 'a' to the end of it.
|
||||||
|
|
||||||
|
If an index or name does not refer to a valid capture group, it will be
|
||||||
|
replaced with an empty string.
|
||||||
|
|
||||||
In shells such as Bash and zsh, you should wrap the pattern in single quotes
|
In shells such as Bash and zsh, you should wrap the pattern in single quotes
|
||||||
instead of double quotes. Otherwise, capture group indices will be replaced by
|
instead of double quotes. Otherwise, capture group indices will be replaced by
|
||||||
expanded shell variables which will most likely be empty.
|
expanded shell variables which will most likely be empty.
|
||||||
@@ -2844,6 +2858,25 @@ This flag can be disabled with --no-stats.
|
|||||||
args.push(arg);
|
args.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn flag_stop_on_nonmatch(args: &mut Vec<RGArg>) {
|
||||||
|
const SHORT: &str = "Stop searching after a non-match.";
|
||||||
|
const LONG: &str = long!(
|
||||||
|
"\
|
||||||
|
Enabling this option will cause ripgrep to stop reading a file once it
|
||||||
|
encounters a non-matching line after it has encountered a matching line.
|
||||||
|
This is useful if it is expected that all matches in a given file will be on
|
||||||
|
sequential lines, for example due to the lines being sorted.
|
||||||
|
|
||||||
|
This overrides the -U/--multiline flag.
|
||||||
|
"
|
||||||
|
);
|
||||||
|
let arg = RGArg::switch("stop-on-nonmatch")
|
||||||
|
.help(SHORT)
|
||||||
|
.long_help(LONG)
|
||||||
|
.overrides("multiline");
|
||||||
|
args.push(arg);
|
||||||
|
}
|
||||||
|
|
||||||
fn flag_text(args: &mut Vec<RGArg>) {
|
fn flag_text(args: &mut Vec<RGArg>) {
|
||||||
const SHORT: &str = "Search binary files as if they were text.";
|
const SHORT: &str = "Search binary files as if they were text.";
|
||||||
const LONG: &str = long!(
|
const LONG: &str = long!(
|
||||||
|
@@ -31,7 +31,6 @@ use ignore::overrides::{Override, OverrideBuilder};
|
|||||||
use ignore::types::{FileTypeDef, Types, TypesBuilder};
|
use ignore::types::{FileTypeDef, Types, TypesBuilder};
|
||||||
use ignore::{Walk, WalkBuilder, WalkParallel};
|
use ignore::{Walk, WalkBuilder, WalkParallel};
|
||||||
use log;
|
use log;
|
||||||
use regex;
|
|
||||||
use termcolor::{BufferWriter, ColorChoice, WriteColor};
|
use termcolor::{BufferWriter, ColorChoice, WriteColor};
|
||||||
|
|
||||||
use crate::app;
|
use crate::app;
|
||||||
@@ -42,7 +41,7 @@ use crate::path_printer::{PathPrinter, PathPrinterBuilder};
|
|||||||
use crate::search::{
|
use crate::search::{
|
||||||
PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder,
|
PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder,
|
||||||
};
|
};
|
||||||
use crate::subject::SubjectBuilder;
|
use crate::subject::{Subject, SubjectBuilder};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
/// The command that ripgrep should execute based on the command line
|
/// The command that ripgrep should execute based on the command line
|
||||||
@@ -325,6 +324,46 @@ impl Args {
|
|||||||
.build())
|
.build())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if `stat`-related sorting is required
|
||||||
|
pub fn needs_stat_sort(&self) -> bool {
|
||||||
|
return self.matches().sort_by().map_or(
|
||||||
|
false,
|
||||||
|
|sort_by| match sort_by.kind {
|
||||||
|
SortByKind::LastModified
|
||||||
|
| SortByKind::Created
|
||||||
|
| SortByKind::LastAccessed => sort_by.check().is_ok(),
|
||||||
|
_ => false,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sort subjects if a sorter is specified, but only if the sort requires
|
||||||
|
/// stat calls. Non-stat related sorts are handled during file traversal
|
||||||
|
///
|
||||||
|
/// This function assumes that it is known that a stat-related sort is
|
||||||
|
/// required, and does not check for it again.
|
||||||
|
///
|
||||||
|
/// It is important that that precondition is fulfilled, since this function
|
||||||
|
/// consumes the subjects iterator, and is therefore a blocking function.
|
||||||
|
pub fn sort_by_stat<I>(&self, subjects: I) -> Vec<Subject>
|
||||||
|
where
|
||||||
|
I: Iterator<Item = Subject>,
|
||||||
|
{
|
||||||
|
let sorter = match self.matches().sort_by() {
|
||||||
|
Ok(v) => v,
|
||||||
|
Err(_) => return subjects.collect(),
|
||||||
|
};
|
||||||
|
use SortByKind::*;
|
||||||
|
let mut keyed = match sorter.kind {
|
||||||
|
LastModified => load_timestamps(subjects, |m| m.modified()),
|
||||||
|
LastAccessed => load_timestamps(subjects, |m| m.accessed()),
|
||||||
|
Created => load_timestamps(subjects, |m| m.created()),
|
||||||
|
_ => return subjects.collect(),
|
||||||
|
};
|
||||||
|
keyed.sort_by(|a, b| sort_by_option(&a.0, &b.0, sorter.reverse));
|
||||||
|
keyed.into_iter().map(|v| v.1).collect()
|
||||||
|
}
|
||||||
|
|
||||||
/// Return a parallel walker that may use additional threads.
|
/// Return a parallel walker that may use additional threads.
|
||||||
pub fn walker_parallel(&self) -> Result<WalkParallel> {
|
pub fn walker_parallel(&self) -> Result<WalkParallel> {
|
||||||
Ok(self
|
Ok(self
|
||||||
@@ -405,44 +444,23 @@ impl SortBy {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn configure_walk_builder(self, builder: &mut WalkBuilder) {
|
/// Load sorters only if they are applicable at the walk stage.
|
||||||
// This isn't entirely optimal. In particular, we will wind up issuing
|
///
|
||||||
// a stat for many files redundantly. Aside from having potentially
|
/// In particular, sorts that involve `stat` calls are not loaded because
|
||||||
// inconsistent results with respect to sorting, this is also slow.
|
/// the walk inherently assumes that parent directories are aware of all its
|
||||||
// We could fix this here at the expense of memory by caching stat
|
/// decendent properties, but `stat` does not work that way.
|
||||||
// calls. A better fix would be to find a way to push this down into
|
fn configure_builder_sort(self, builder: &mut WalkBuilder) {
|
||||||
// directory traversal itself, but that's a somewhat nasty change.
|
use SortByKind::*;
|
||||||
match self.kind {
|
match self.kind {
|
||||||
SortByKind::None => {}
|
Path if self.reverse => {
|
||||||
SortByKind::Path => {
|
builder.sort_by_file_name(|a, b| a.cmp(b).reverse());
|
||||||
if self.reverse {
|
|
||||||
builder.sort_by_file_name(|a, b| a.cmp(b).reverse());
|
|
||||||
} else {
|
|
||||||
builder.sort_by_file_name(|a, b| a.cmp(b));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
SortByKind::LastModified => {
|
Path => {
|
||||||
builder.sort_by_file_path(move |a, b| {
|
builder.sort_by_file_name(|a, b| a.cmp(b));
|
||||||
sort_by_metadata_time(a, b, self.reverse, |md| {
|
|
||||||
md.modified()
|
|
||||||
})
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
SortByKind::LastAccessed => {
|
// these use `stat` calls and will be sorted in Args::sort_by_stat()
|
||||||
builder.sort_by_file_path(move |a, b| {
|
LastModified | LastAccessed | Created | None => {}
|
||||||
sort_by_metadata_time(a, b, self.reverse, |md| {
|
};
|
||||||
md.accessed()
|
|
||||||
})
|
|
||||||
});
|
|
||||||
}
|
|
||||||
SortByKind::Created => {
|
|
||||||
builder.sort_by_file_path(move |a, b| {
|
|
||||||
sort_by_metadata_time(a, b, self.reverse, |md| {
|
|
||||||
md.created()
|
|
||||||
})
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -472,24 +490,6 @@ enum EncodingMode {
|
|||||||
Disabled,
|
Disabled,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EncodingMode {
|
|
||||||
/// Checks if an explicit encoding has been set. Returns false for
|
|
||||||
/// automatic BOM sniffing and no sniffing.
|
|
||||||
///
|
|
||||||
/// This is only used to determine whether PCRE2 needs to have its own
|
|
||||||
/// UTF-8 checking enabled. If we have an explicit encoding set, then
|
|
||||||
/// we're always guaranteed to get UTF-8, so we can disable PCRE2's check.
|
|
||||||
/// Otherwise, we have no such guarantee, and must enable PCRE2' UTF-8
|
|
||||||
/// check.
|
|
||||||
#[cfg(feature = "pcre2")]
|
|
||||||
fn has_explicit_encoding(&self) -> bool {
|
|
||||||
match self {
|
|
||||||
EncodingMode::Some(_) => true,
|
|
||||||
_ => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ArgMatches {
|
impl ArgMatches {
|
||||||
/// Create an ArgMatches from clap's parse result.
|
/// Create an ArgMatches from clap's parse result.
|
||||||
fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches {
|
fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches {
|
||||||
@@ -671,6 +671,8 @@ impl ArgMatches {
|
|||||||
.multi_line(true)
|
.multi_line(true)
|
||||||
.unicode(self.unicode())
|
.unicode(self.unicode())
|
||||||
.octal(false)
|
.octal(false)
|
||||||
|
.fixed_strings(self.is_present("fixed-strings"))
|
||||||
|
.whole_line(self.is_present("line-regexp"))
|
||||||
.word(self.is_present("word-regexp"));
|
.word(self.is_present("word-regexp"));
|
||||||
if self.is_present("multiline") {
|
if self.is_present("multiline") {
|
||||||
builder.dot_matches_new_line(self.is_present("multiline-dotall"));
|
builder.dot_matches_new_line(self.is_present("multiline-dotall"));
|
||||||
@@ -697,12 +699,7 @@ impl ArgMatches {
|
|||||||
if let Some(limit) = self.dfa_size_limit()? {
|
if let Some(limit) = self.dfa_size_limit()? {
|
||||||
builder.dfa_size_limit(limit);
|
builder.dfa_size_limit(limit);
|
||||||
}
|
}
|
||||||
let res = if self.is_present("fixed-strings") {
|
match builder.build_many(patterns) {
|
||||||
builder.build_literals(patterns)
|
|
||||||
} else {
|
|
||||||
builder.build(&patterns.join("|"))
|
|
||||||
};
|
|
||||||
match res {
|
|
||||||
Ok(m) => Ok(m),
|
Ok(m) => Ok(m),
|
||||||
Err(err) => Err(From::from(suggest_multiline(err.to_string()))),
|
Err(err) => Err(From::from(suggest_multiline(err.to_string()))),
|
||||||
}
|
}
|
||||||
@@ -719,6 +716,8 @@ impl ArgMatches {
|
|||||||
.case_smart(self.case_smart())
|
.case_smart(self.case_smart())
|
||||||
.caseless(self.case_insensitive())
|
.caseless(self.case_insensitive())
|
||||||
.multi_line(true)
|
.multi_line(true)
|
||||||
|
.fixed_strings(self.is_present("fixed-strings"))
|
||||||
|
.whole_line(self.is_present("line-regexp"))
|
||||||
.word(self.is_present("word-regexp"));
|
.word(self.is_present("word-regexp"));
|
||||||
// For whatever reason, the JIT craps out during regex compilation with
|
// For whatever reason, the JIT craps out during regex compilation with
|
||||||
// a "no more memory" error on 32 bit systems. So don't use it there.
|
// a "no more memory" error on 32 bit systems. So don't use it there.
|
||||||
@@ -732,14 +731,6 @@ impl ArgMatches {
|
|||||||
}
|
}
|
||||||
if self.unicode() {
|
if self.unicode() {
|
||||||
builder.utf(true).ucp(true);
|
builder.utf(true).ucp(true);
|
||||||
if self.encoding()?.has_explicit_encoding() {
|
|
||||||
// SAFETY: If an encoding was specified, then we're guaranteed
|
|
||||||
// to get valid UTF-8, so we can disable PCRE2's UTF checking.
|
|
||||||
// (Feeding invalid UTF-8 to PCRE2 is undefined behavior.)
|
|
||||||
unsafe {
|
|
||||||
builder.disable_utf_check();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if self.is_present("multiline") {
|
if self.is_present("multiline") {
|
||||||
builder.dotall(self.is_present("multiline-dotall"));
|
builder.dotall(self.is_present("multiline-dotall"));
|
||||||
@@ -747,7 +738,7 @@ impl ArgMatches {
|
|||||||
if self.is_present("crlf") {
|
if self.is_present("crlf") {
|
||||||
builder.crlf(true);
|
builder.crlf(true);
|
||||||
}
|
}
|
||||||
Ok(builder.build(&patterns.join("|"))?)
|
Ok(builder.build_many(patterns)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Build a JSON printer that writes results to the given writer.
|
/// Build a JSON printer that writes results to the given writer.
|
||||||
@@ -849,7 +840,8 @@ impl ArgMatches {
|
|||||||
.before_context(ctx_before)
|
.before_context(ctx_before)
|
||||||
.after_context(ctx_after)
|
.after_context(ctx_after)
|
||||||
.passthru(self.is_present("passthru"))
|
.passthru(self.is_present("passthru"))
|
||||||
.memory_map(self.mmap_choice(paths));
|
.memory_map(self.mmap_choice(paths))
|
||||||
|
.stop_on_nonmatch(self.is_present("stop-on-nonmatch"));
|
||||||
match self.encoding()? {
|
match self.encoding()? {
|
||||||
EncodingMode::Some(enc) => {
|
EncodingMode::Some(enc) => {
|
||||||
builder.encoding(Some(enc));
|
builder.encoding(Some(enc));
|
||||||
@@ -900,12 +892,10 @@ impl ArgMatches {
|
|||||||
.git_exclude(!self.no_ignore_vcs() && !self.no_ignore_exclude())
|
.git_exclude(!self.no_ignore_vcs() && !self.no_ignore_exclude())
|
||||||
.require_git(!self.is_present("no-require-git"))
|
.require_git(!self.is_present("no-require-git"))
|
||||||
.ignore_case_insensitive(self.ignore_file_case_insensitive());
|
.ignore_case_insensitive(self.ignore_file_case_insensitive());
|
||||||
if !self.no_ignore() {
|
if !self.no_ignore() && !self.no_ignore_dot() {
|
||||||
builder.add_custom_ignore_filename(".rgignore");
|
builder.add_custom_ignore_filename(".rgignore");
|
||||||
}
|
}
|
||||||
let sortby = self.sort_by()?;
|
self.sort_by()?.configure_builder_sort(&mut builder);
|
||||||
sortby.check()?;
|
|
||||||
sortby.configure_walk_builder(&mut builder);
|
|
||||||
Ok(builder)
|
Ok(builder)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1020,10 +1010,10 @@ impl ArgMatches {
|
|||||||
/// If there was a problem parsing the values from the user as an integer,
|
/// If there was a problem parsing the values from the user as an integer,
|
||||||
/// then an error is returned.
|
/// then an error is returned.
|
||||||
fn contexts(&self) -> Result<(usize, usize)> {
|
fn contexts(&self) -> Result<(usize, usize)> {
|
||||||
let after = self.usize_of("after-context")?.unwrap_or(0);
|
|
||||||
let before = self.usize_of("before-context")?.unwrap_or(0);
|
|
||||||
let both = self.usize_of("context")?.unwrap_or(0);
|
let both = self.usize_of("context")?.unwrap_or(0);
|
||||||
Ok(if both > 0 { (both, both) } else { (before, after) })
|
let after = self.usize_of("after-context")?.unwrap_or(both);
|
||||||
|
let before = self.usize_of("before-context")?.unwrap_or(both);
|
||||||
|
Ok((before, after))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the unescaped context separator in UTF-8 bytes.
|
/// Returns the unescaped context separator in UTF-8 bytes.
|
||||||
@@ -1080,7 +1070,6 @@ impl ArgMatches {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let label = match self.value_of_lossy("encoding") {
|
let label = match self.value_of_lossy("encoding") {
|
||||||
None if self.pcre2_unicode() => "utf-8".to_string(),
|
|
||||||
None => return Ok(EncodingMode::Auto),
|
None => return Ok(EncodingMode::Auto),
|
||||||
Some(label) => label,
|
Some(label) => label,
|
||||||
};
|
};
|
||||||
@@ -1412,11 +1401,6 @@ impl ArgMatches {
|
|||||||
/// Get a sequence of all available patterns from the command line.
|
/// Get a sequence of all available patterns from the command line.
|
||||||
/// This includes reading the -e/--regexp and -f/--file flags.
|
/// This includes reading the -e/--regexp and -f/--file flags.
|
||||||
///
|
///
|
||||||
/// Note that if -F/--fixed-strings is set, then all patterns will be
|
|
||||||
/// escaped. If -x/--line-regexp is set, then all patterns are surrounded
|
|
||||||
/// by `^...$`. Other things, such as --word-regexp, are handled by the
|
|
||||||
/// regex matcher itself.
|
|
||||||
///
|
|
||||||
/// If any pattern is invalid UTF-8, then an error is returned.
|
/// If any pattern is invalid UTF-8, then an error is returned.
|
||||||
fn patterns(&self) -> Result<Vec<String>> {
|
fn patterns(&self) -> Result<Vec<String>> {
|
||||||
if self.is_present("files") || self.is_present("type-list") {
|
if self.is_present("files") || self.is_present("type-list") {
|
||||||
@@ -1457,16 +1441,6 @@ impl ArgMatches {
|
|||||||
Ok(pats)
|
Ok(pats)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a pattern that is guaranteed to produce an empty regular
|
|
||||||
/// expression that is valid in any position.
|
|
||||||
fn pattern_empty(&self) -> String {
|
|
||||||
// This would normally just be an empty string, which works on its
|
|
||||||
// own, but if the patterns are joined in a set of alternations, then
|
|
||||||
// you wind up with `foo|`, which is currently invalid in Rust's regex
|
|
||||||
// engine.
|
|
||||||
"(?:z{0})*".to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Converts an OsStr pattern to a String pattern. The pattern is escaped
|
/// Converts an OsStr pattern to a String pattern. The pattern is escaped
|
||||||
/// if -F/--fixed-strings is set.
|
/// if -F/--fixed-strings is set.
|
||||||
///
|
///
|
||||||
@@ -1485,30 +1459,12 @@ impl ArgMatches {
|
|||||||
/// Applies additional processing on the given pattern if necessary
|
/// Applies additional processing on the given pattern if necessary
|
||||||
/// (such as escaping meta characters or turning it into a line regex).
|
/// (such as escaping meta characters or turning it into a line regex).
|
||||||
fn pattern_from_string(&self, pat: String) -> String {
|
fn pattern_from_string(&self, pat: String) -> String {
|
||||||
let pat = self.pattern_line(self.pattern_literal(pat));
|
|
||||||
if pat.is_empty() {
|
if pat.is_empty() {
|
||||||
self.pattern_empty()
|
// This would normally just be an empty string, which works on its
|
||||||
} else {
|
// own, but if the patterns are joined in a set of alternations,
|
||||||
pat
|
// then you wind up with `foo|`, which is currently invalid in
|
||||||
}
|
// Rust's regex engine.
|
||||||
}
|
"(?:)".to_string()
|
||||||
|
|
||||||
/// Returns the given pattern as a line pattern if the -x/--line-regexp
|
|
||||||
/// flag is set. Otherwise, the pattern is returned unchanged.
|
|
||||||
fn pattern_line(&self, pat: String) -> String {
|
|
||||||
if self.is_present("line-regexp") {
|
|
||||||
format!(r"^(?:{})$", pat)
|
|
||||||
} else {
|
|
||||||
pat
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns the given pattern as a literal pattern if the
|
|
||||||
/// -F/--fixed-strings flag is set. Otherwise, the pattern is returned
|
|
||||||
/// unchanged.
|
|
||||||
fn pattern_literal(&self, pat: String) -> String {
|
|
||||||
if self.is_present("fixed-strings") {
|
|
||||||
regex::escape(&pat)
|
|
||||||
} else {
|
} else {
|
||||||
pat
|
pat
|
||||||
}
|
}
|
||||||
@@ -1641,12 +1597,6 @@ impl ArgMatches {
|
|||||||
!(self.is_present("no-unicode") || self.is_present("no-pcre2-unicode"))
|
!(self.is_present("no-unicode") || self.is_present("no-pcre2-unicode"))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if and only if PCRE2 is enabled and its Unicode mode is
|
|
||||||
/// enabled.
|
|
||||||
fn pcre2_unicode(&self) -> bool {
|
|
||||||
self.is_present("pcre2") && self.unicode()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns true if and only if file names containing each match should
|
/// Returns true if and only if file names containing each match should
|
||||||
/// be emitted.
|
/// be emitted.
|
||||||
fn with_filename(&self, paths: &[PathBuf]) -> bool {
|
fn with_filename(&self, paths: &[PathBuf]) -> bool {
|
||||||
@@ -1807,32 +1757,18 @@ fn u64_to_usize(arg_name: &str, value: Option<u64>) -> Result<Option<usize>> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Builds a comparator for sorting two files according to a system time
|
/// Sorts by an optional parameter.
|
||||||
/// extracted from the file's metadata.
|
//
|
||||||
///
|
/// If parameter is found to be `None`, both entries compare equal.
|
||||||
/// If there was a problem extracting the metadata or if the time is not
|
fn sort_by_option<T: Ord>(
|
||||||
/// available, then both entries compare equal.
|
p1: &Option<T>,
|
||||||
fn sort_by_metadata_time<G>(
|
p2: &Option<T>,
|
||||||
p1: &Path,
|
|
||||||
p2: &Path,
|
|
||||||
reverse: bool,
|
reverse: bool,
|
||||||
get_time: G,
|
) -> cmp::Ordering {
|
||||||
) -> cmp::Ordering
|
match (p1, p2, reverse) {
|
||||||
where
|
(Some(p1), Some(p2), true) => p1.cmp(&p2).reverse(),
|
||||||
G: Fn(&fs::Metadata) -> io::Result<SystemTime>,
|
(Some(p1), Some(p2), false) => p1.cmp(&p2),
|
||||||
{
|
_ => cmp::Ordering::Equal,
|
||||||
let t1 = match p1.metadata().and_then(|md| get_time(&md)) {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(_) => return cmp::Ordering::Equal,
|
|
||||||
};
|
|
||||||
let t2 = match p2.metadata().and_then(|md| get_time(&md)) {
|
|
||||||
Ok(t) => t,
|
|
||||||
Err(_) => return cmp::Ordering::Equal,
|
|
||||||
};
|
|
||||||
if reverse {
|
|
||||||
t1.cmp(&t2).reverse()
|
|
||||||
} else {
|
|
||||||
t1.cmp(&t2)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1886,3 +1822,17 @@ fn current_dir() -> Result<PathBuf> {
|
|||||||
)
|
)
|
||||||
.into())
|
.into())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Tries to assign a timestamp to every `Subject` in the vector to help with
|
||||||
|
/// sorting Subjects by time.
|
||||||
|
fn load_timestamps<G>(
|
||||||
|
subjects: impl Iterator<Item = Subject>,
|
||||||
|
get_time: G,
|
||||||
|
) -> Vec<(Option<SystemTime>, Subject)>
|
||||||
|
where
|
||||||
|
G: Fn(&fs::Metadata) -> io::Result<SystemTime>,
|
||||||
|
{
|
||||||
|
subjects
|
||||||
|
.map(|s| (s.path().metadata().and_then(|m| get_time(&m)).ok(), s))
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
@@ -33,7 +33,7 @@ impl Log for Logger {
|
|||||||
fn log(&self, record: &log::Record<'_>) {
|
fn log(&self, record: &log::Record<'_>) {
|
||||||
match (record.file(), record.line()) {
|
match (record.file(), record.line()) {
|
||||||
(Some(file), Some(line)) => {
|
(Some(file), Some(line)) => {
|
||||||
eprintln!(
|
eprintln_locked!(
|
||||||
"{}|{}|{}:{}: {}",
|
"{}|{}|{}:{}: {}",
|
||||||
record.level(),
|
record.level(),
|
||||||
record.target(),
|
record.target(),
|
||||||
@@ -43,7 +43,7 @@ impl Log for Logger {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
(Some(file), None) => {
|
(Some(file), None) => {
|
||||||
eprintln!(
|
eprintln_locked!(
|
||||||
"{}|{}|{}: {}",
|
"{}|{}|{}: {}",
|
||||||
record.level(),
|
record.level(),
|
||||||
record.target(),
|
record.target(),
|
||||||
@@ -52,7 +52,7 @@ impl Log for Logger {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
_ => {
|
_ => {
|
||||||
eprintln!(
|
eprintln_locked!(
|
||||||
"{}|{}: {}",
|
"{}|{}: {}",
|
||||||
record.level(),
|
record.level(),
|
||||||
record.target(),
|
record.target(),
|
||||||
@@ -63,6 +63,6 @@ impl Log for Logger {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn flush(&self) {
|
fn flush(&self) {
|
||||||
// We use eprintln! which is flushed on every call.
|
// We use eprintln_locked! which is flushed on every call.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -47,7 +47,7 @@ type Result<T> = ::std::result::Result<T, Box<dyn error::Error>>;
|
|||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
if let Err(err) = Args::parse().and_then(try_main) {
|
if let Err(err) = Args::parse().and_then(try_main) {
|
||||||
eprintln!("{}", err);
|
eprintln_locked!("{}", err);
|
||||||
process::exit(2);
|
process::exit(2);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -77,53 +77,70 @@ fn try_main(args: Args) -> Result<()> {
|
|||||||
/// steps through the file list (current directory by default) and searches
|
/// steps through the file list (current directory by default) and searches
|
||||||
/// each file sequentially.
|
/// each file sequentially.
|
||||||
fn search(args: &Args) -> Result<bool> {
|
fn search(args: &Args) -> Result<bool> {
|
||||||
let started_at = Instant::now();
|
/// The meat of the routine is here. This lets us call the same iteration
|
||||||
let quit_after_match = args.quit_after_match()?;
|
/// code over each file regardless of whether we stream over the files
|
||||||
let subject_builder = args.subject_builder();
|
/// as they're produced by the underlying directory traversal or whether
|
||||||
let mut stats = args.stats()?;
|
/// they've been collected and sorted (for example) first.
|
||||||
let mut searcher = args.search_worker(args.stdout())?;
|
fn iter(
|
||||||
let mut matched = false;
|
args: &Args,
|
||||||
let mut searched = false;
|
subjects: impl Iterator<Item = Subject>,
|
||||||
|
started_at: std::time::Instant,
|
||||||
|
) -> Result<bool> {
|
||||||
|
let quit_after_match = args.quit_after_match()?;
|
||||||
|
let mut stats = args.stats()?;
|
||||||
|
let mut searcher = args.search_worker(args.stdout())?;
|
||||||
|
let mut matched = false;
|
||||||
|
let mut searched = false;
|
||||||
|
|
||||||
for result in args.walker()? {
|
for subject in subjects {
|
||||||
let subject = match subject_builder.build_from_result(result) {
|
searched = true;
|
||||||
Some(subject) => subject,
|
let search_result = match searcher.search(&subject) {
|
||||||
None => continue,
|
Ok(search_result) => search_result,
|
||||||
};
|
|
||||||
searched = true;
|
|
||||||
let search_result = match searcher.search(&subject) {
|
|
||||||
Ok(search_result) => search_result,
|
|
||||||
Err(err) => {
|
|
||||||
// A broken pipe means graceful termination.
|
// A broken pipe means graceful termination.
|
||||||
if err.kind() == io::ErrorKind::BrokenPipe {
|
Err(err) if err.kind() == io::ErrorKind::BrokenPipe => break,
|
||||||
break;
|
Err(err) => {
|
||||||
|
err_message!("{}: {}", subject.path().display(), err);
|
||||||
|
continue;
|
||||||
}
|
}
|
||||||
err_message!("{}: {}", subject.path().display(), err);
|
};
|
||||||
continue;
|
matched |= search_result.has_match();
|
||||||
|
if let Some(ref mut stats) = stats {
|
||||||
|
*stats += search_result.stats().unwrap();
|
||||||
|
}
|
||||||
|
if matched && quit_after_match {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
};
|
|
||||||
matched = matched || search_result.has_match();
|
|
||||||
if let Some(ref mut stats) = stats {
|
|
||||||
*stats += search_result.stats().unwrap();
|
|
||||||
}
|
}
|
||||||
if matched && quit_after_match {
|
if args.using_default_path() && !searched {
|
||||||
break;
|
eprint_nothing_searched();
|
||||||
}
|
}
|
||||||
|
if let Some(ref stats) = stats {
|
||||||
|
let elapsed = Instant::now().duration_since(started_at);
|
||||||
|
// We don't care if we couldn't print this successfully.
|
||||||
|
let _ = searcher.print_stats(elapsed, stats);
|
||||||
|
}
|
||||||
|
Ok(matched)
|
||||||
}
|
}
|
||||||
if args.using_default_path() && !searched {
|
|
||||||
eprint_nothing_searched();
|
let started_at = Instant::now();
|
||||||
|
let subject_builder = args.subject_builder();
|
||||||
|
let subjects = args
|
||||||
|
.walker()?
|
||||||
|
.filter_map(|result| subject_builder.build_from_result(result));
|
||||||
|
if args.needs_stat_sort() {
|
||||||
|
let subjects = args.sort_by_stat(subjects).into_iter();
|
||||||
|
iter(args, subjects, started_at)
|
||||||
|
} else {
|
||||||
|
iter(args, subjects, started_at)
|
||||||
}
|
}
|
||||||
if let Some(ref stats) = stats {
|
|
||||||
let elapsed = Instant::now().duration_since(started_at);
|
|
||||||
// We don't care if we couldn't print this successfully.
|
|
||||||
let _ = searcher.print_stats(elapsed, stats);
|
|
||||||
}
|
|
||||||
Ok(matched)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The top-level entry point for multi-threaded search. The parallelism is
|
/// The top-level entry point for multi-threaded search. The parallelism is
|
||||||
/// itself achieved by the recursive directory traversal. All we need to do is
|
/// itself achieved by the recursive directory traversal. All we need to do is
|
||||||
/// feed it a worker for performing a search on each file.
|
/// feed it a worker for performing a search on each file.
|
||||||
|
///
|
||||||
|
/// Requesting a sorted output from ripgrep (such as with `--sort path`) will
|
||||||
|
/// automatically disable parallelism and hence sorting is not handled here.
|
||||||
fn search_parallel(args: &Args) -> Result<bool> {
|
fn search_parallel(args: &Args) -> Result<bool> {
|
||||||
use std::sync::atomic::AtomicBool;
|
use std::sync::atomic::AtomicBool;
|
||||||
use std::sync::atomic::Ordering::SeqCst;
|
use std::sync::atomic::Ordering::SeqCst;
|
||||||
@@ -214,35 +231,54 @@ fn eprint_nothing_searched() {
|
|||||||
/// recursively steps through the file list (current directory by default) and
|
/// recursively steps through the file list (current directory by default) and
|
||||||
/// prints each path sequentially using a single thread.
|
/// prints each path sequentially using a single thread.
|
||||||
fn files(args: &Args) -> Result<bool> {
|
fn files(args: &Args) -> Result<bool> {
|
||||||
let quit_after_match = args.quit_after_match()?;
|
/// The meat of the routine is here. This lets us call the same iteration
|
||||||
let subject_builder = args.subject_builder();
|
/// code over each file regardless of whether we stream over the files
|
||||||
let mut matched = false;
|
/// as they're produced by the underlying directory traversal or whether
|
||||||
let mut path_printer = args.path_printer(args.stdout())?;
|
/// they've been collected and sorted (for example) first.
|
||||||
for result in args.walker()? {
|
fn iter(
|
||||||
let subject = match subject_builder.build_from_result(result) {
|
args: &Args,
|
||||||
Some(subject) => subject,
|
subjects: impl Iterator<Item = Subject>,
|
||||||
None => continue,
|
) -> Result<bool> {
|
||||||
};
|
let quit_after_match = args.quit_after_match()?;
|
||||||
matched = true;
|
let mut matched = false;
|
||||||
if quit_after_match {
|
let mut path_printer = args.path_printer(args.stdout())?;
|
||||||
break;
|
|
||||||
}
|
for subject in subjects {
|
||||||
if let Err(err) = path_printer.write_path(subject.path()) {
|
matched = true;
|
||||||
// A broken pipe means graceful termination.
|
if quit_after_match {
|
||||||
if err.kind() == io::ErrorKind::BrokenPipe {
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// Otherwise, we have some other error that's preventing us from
|
if let Err(err) = path_printer.write_path(subject.path()) {
|
||||||
// writing to stdout, so we should bubble it up.
|
// A broken pipe means graceful termination.
|
||||||
return Err(err.into());
|
if err.kind() == io::ErrorKind::BrokenPipe {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Otherwise, we have some other error that's preventing us from
|
||||||
|
// writing to stdout, so we should bubble it up.
|
||||||
|
return Err(err.into());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Ok(matched)
|
||||||
|
}
|
||||||
|
|
||||||
|
let subject_builder = args.subject_builder();
|
||||||
|
let subjects = args
|
||||||
|
.walker()?
|
||||||
|
.filter_map(|result| subject_builder.build_from_result(result));
|
||||||
|
if args.needs_stat_sort() {
|
||||||
|
let subjects = args.sort_by_stat(subjects).into_iter();
|
||||||
|
iter(args, subjects)
|
||||||
|
} else {
|
||||||
|
iter(args, subjects)
|
||||||
}
|
}
|
||||||
Ok(matched)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The top-level entry point for listing files without searching them. This
|
/// The top-level entry point for listing files without searching them. This
|
||||||
/// recursively steps through the file list (current directory by default) and
|
/// recursively steps through the file list (current directory by default) and
|
||||||
/// prints each path sequentially using multiple threads.
|
/// prints each path sequentially using multiple threads.
|
||||||
|
///
|
||||||
|
/// Requesting a sorted output from ripgrep (such as with `--sort path`) will
|
||||||
|
/// automatically disable parallelism and hence sorting is not handled here.
|
||||||
fn files_parallel(args: &Args) -> Result<bool> {
|
fn files_parallel(args: &Args) -> Result<bool> {
|
||||||
use std::sync::atomic::AtomicBool;
|
use std::sync::atomic::AtomicBool;
|
||||||
use std::sync::atomic::Ordering::SeqCst;
|
use std::sync::atomic::Ordering::SeqCst;
|
||||||
|
@@ -4,12 +4,28 @@ static MESSAGES: AtomicBool = AtomicBool::new(false);
|
|||||||
static IGNORE_MESSAGES: AtomicBool = AtomicBool::new(false);
|
static IGNORE_MESSAGES: AtomicBool = AtomicBool::new(false);
|
||||||
static ERRORED: AtomicBool = AtomicBool::new(false);
|
static ERRORED: AtomicBool = AtomicBool::new(false);
|
||||||
|
|
||||||
|
/// Like eprintln, but locks STDOUT to prevent interleaving lines.
|
||||||
|
#[macro_export]
|
||||||
|
macro_rules! eprintln_locked {
|
||||||
|
($($tt:tt)*) => {{
|
||||||
|
{
|
||||||
|
// This is a bit of an abstraction violation because we explicitly
|
||||||
|
// lock STDOUT before printing to STDERR. This avoids interleaving
|
||||||
|
// lines within ripgrep because `search_parallel` uses `termcolor`,
|
||||||
|
// which accesses the same STDOUT lock when writing lines.
|
||||||
|
let stdout = std::io::stdout();
|
||||||
|
let _handle = stdout.lock();
|
||||||
|
eprintln!($($tt)*);
|
||||||
|
}
|
||||||
|
}}
|
||||||
|
}
|
||||||
|
|
||||||
/// Emit a non-fatal error message, unless messages were disabled.
|
/// Emit a non-fatal error message, unless messages were disabled.
|
||||||
#[macro_export]
|
#[macro_export]
|
||||||
macro_rules! message {
|
macro_rules! message {
|
||||||
($($tt:tt)*) => {
|
($($tt:tt)*) => {
|
||||||
if crate::messages::messages() {
|
if crate::messages::messages() {
|
||||||
eprintln!($($tt)*);
|
eprintln_locked!($($tt)*);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -30,7 +46,7 @@ macro_rules! err_message {
|
|||||||
macro_rules! ignore_message {
|
macro_rules! ignore_message {
|
||||||
($($tt:tt)*) => {
|
($($tt:tt)*) => {
|
||||||
if crate::messages::messages() && crate::messages::ignore_messages() {
|
if crate::messages::messages() && crate::messages::ignore_messages() {
|
||||||
eprintln!($($tt)*);
|
eprintln_locked!($($tt)*);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "globset"
|
name = "globset"
|
||||||
version = "0.4.10" #:version
|
version = "0.4.11" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
Cross platform single glob and glob set matching. Glob set matching is the
|
Cross platform single glob and glob set matching. Glob set matching is the
|
||||||
@@ -20,11 +20,11 @@ name = "globset"
|
|||||||
bench = false
|
bench = false
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
aho-corasick = "0.7.3"
|
aho-corasick = "1.0.2"
|
||||||
bstr = { version = "1.1.0", default-features = false, features = ["std"] }
|
bstr = { version = "1.6.0", default-features = false, features = ["std"] }
|
||||||
fnv = "1.0.6"
|
fnv = "1.0.6"
|
||||||
log = { version = "0.4.5", optional = true }
|
log = { version = "0.4.5", optional = true }
|
||||||
regex = { version = "1.1.5", default-features = false, features = ["perf", "std"] }
|
regex = { version = "1.8.3", default-features = false, features = ["perf", "std"] }
|
||||||
serde = { version = "1.0.104", optional = true }
|
serde = { version = "1.0.104", optional = true }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
|
@@ -208,6 +208,9 @@ struct GlobOptions {
|
|||||||
/// Whether or not to use `\` to escape special characters.
|
/// Whether or not to use `\` to escape special characters.
|
||||||
/// e.g., when enabled, `\*` will match a literal `*`.
|
/// e.g., when enabled, `\*` will match a literal `*`.
|
||||||
backslash_escape: bool,
|
backslash_escape: bool,
|
||||||
|
/// Whether or not an empty case in an alternate will be removed.
|
||||||
|
/// e.g., when enabled, `{,a}` will match "" and "a".
|
||||||
|
empty_alternates: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl GlobOptions {
|
impl GlobOptions {
|
||||||
@@ -216,6 +219,7 @@ impl GlobOptions {
|
|||||||
case_insensitive: false,
|
case_insensitive: false,
|
||||||
literal_separator: false,
|
literal_separator: false,
|
||||||
backslash_escape: !is_separator('\\'),
|
backslash_escape: !is_separator('\\'),
|
||||||
|
empty_alternates: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -633,6 +637,16 @@ impl<'a> GlobBuilder<'a> {
|
|||||||
self.opts.backslash_escape = yes;
|
self.opts.backslash_escape = yes;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Toggle whether an empty pattern in a list of alternates is accepted.
|
||||||
|
///
|
||||||
|
/// For example, if this is set then the glob `foo{,.txt}` will match both `foo` and `foo.txt`.
|
||||||
|
///
|
||||||
|
/// By default this is false.
|
||||||
|
pub fn empty_alternates(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
|
||||||
|
self.opts.empty_alternates = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Tokens {
|
impl Tokens {
|
||||||
@@ -714,7 +728,7 @@ impl Tokens {
|
|||||||
for pat in patterns {
|
for pat in patterns {
|
||||||
let mut altre = String::new();
|
let mut altre = String::new();
|
||||||
self.tokens_to_regex(options, &pat, &mut altre);
|
self.tokens_to_regex(options, &pat, &mut altre);
|
||||||
if !altre.is_empty() {
|
if !altre.is_empty() || options.empty_alternates {
|
||||||
parts.push(altre);
|
parts.push(altre);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1020,6 +1034,7 @@ mod tests {
|
|||||||
casei: Option<bool>,
|
casei: Option<bool>,
|
||||||
litsep: Option<bool>,
|
litsep: Option<bool>,
|
||||||
bsesc: Option<bool>,
|
bsesc: Option<bool>,
|
||||||
|
ealtre: Option<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! syntax {
|
macro_rules! syntax {
|
||||||
@@ -1059,6 +1074,9 @@ mod tests {
|
|||||||
if let Some(bsesc) = $options.bsesc {
|
if let Some(bsesc) = $options.bsesc {
|
||||||
builder.backslash_escape(bsesc);
|
builder.backslash_escape(bsesc);
|
||||||
}
|
}
|
||||||
|
if let Some(ealtre) = $options.ealtre {
|
||||||
|
builder.empty_alternates(ealtre);
|
||||||
|
}
|
||||||
let pat = builder.build().unwrap();
|
let pat = builder.build().unwrap();
|
||||||
assert_eq!(format!("(?-u){}", $re), pat.regex());
|
assert_eq!(format!("(?-u){}", $re), pat.regex());
|
||||||
}
|
}
|
||||||
@@ -1082,6 +1100,9 @@ mod tests {
|
|||||||
if let Some(bsesc) = $options.bsesc {
|
if let Some(bsesc) = $options.bsesc {
|
||||||
builder.backslash_escape(bsesc);
|
builder.backslash_escape(bsesc);
|
||||||
}
|
}
|
||||||
|
if let Some(ealtre) = $options.ealtre {
|
||||||
|
builder.empty_alternates(ealtre);
|
||||||
|
}
|
||||||
let pat = builder.build().unwrap();
|
let pat = builder.build().unwrap();
|
||||||
let matcher = pat.compile_matcher();
|
let matcher = pat.compile_matcher();
|
||||||
let strategic = pat.compile_strategic_matcher();
|
let strategic = pat.compile_strategic_matcher();
|
||||||
@@ -1110,6 +1131,9 @@ mod tests {
|
|||||||
if let Some(bsesc) = $options.bsesc {
|
if let Some(bsesc) = $options.bsesc {
|
||||||
builder.backslash_escape(bsesc);
|
builder.backslash_escape(bsesc);
|
||||||
}
|
}
|
||||||
|
if let Some(ealtre) = $options.ealtre {
|
||||||
|
builder.empty_alternates(ealtre);
|
||||||
|
}
|
||||||
let pat = builder.build().unwrap();
|
let pat = builder.build().unwrap();
|
||||||
let matcher = pat.compile_matcher();
|
let matcher = pat.compile_matcher();
|
||||||
let strategic = pat.compile_strategic_matcher();
|
let strategic = pat.compile_strategic_matcher();
|
||||||
@@ -1195,13 +1219,23 @@ mod tests {
|
|||||||
syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
|
syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
|
||||||
|
|
||||||
const CASEI: Options =
|
const CASEI: Options =
|
||||||
Options { casei: Some(true), litsep: None, bsesc: None };
|
Options { casei: Some(true), litsep: None, bsesc: None, ealtre: None };
|
||||||
const SLASHLIT: Options =
|
const SLASHLIT: Options =
|
||||||
Options { casei: None, litsep: Some(true), bsesc: None };
|
Options { casei: None, litsep: Some(true), bsesc: None, ealtre: None };
|
||||||
const NOBSESC: Options =
|
const NOBSESC: Options = Options {
|
||||||
Options { casei: None, litsep: None, bsesc: Some(false) };
|
casei: None,
|
||||||
|
litsep: None,
|
||||||
|
bsesc: Some(false),
|
||||||
|
ealtre: None,
|
||||||
|
};
|
||||||
const BSESC: Options =
|
const BSESC: Options =
|
||||||
Options { casei: None, litsep: None, bsesc: Some(true) };
|
Options { casei: None, litsep: None, bsesc: Some(true), ealtre: None };
|
||||||
|
const EALTRE: Options = Options {
|
||||||
|
casei: None,
|
||||||
|
litsep: None,
|
||||||
|
bsesc: Some(true),
|
||||||
|
ealtre: Some(true),
|
||||||
|
};
|
||||||
|
|
||||||
toregex!(re_casei, "a", "(?i)^a$", &CASEI);
|
toregex!(re_casei, "a", "(?i)^a$", &CASEI);
|
||||||
|
|
||||||
@@ -1326,6 +1360,9 @@ mod tests {
|
|||||||
matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
|
matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
|
||||||
matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
|
matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
|
||||||
matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
|
matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
|
||||||
|
matches!(matchalt14, "foo{,.txt}", "foo.txt");
|
||||||
|
nmatches!(matchalt15, "foo{,.txt}", "foo");
|
||||||
|
matches!(matchalt16, "foo{,.txt}", "foo", EALTRE);
|
||||||
|
|
||||||
matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
|
matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
@@ -1425,6 +1462,9 @@ mod tests {
|
|||||||
if let Some(bsesc) = $options.bsesc {
|
if let Some(bsesc) = $options.bsesc {
|
||||||
builder.backslash_escape(bsesc);
|
builder.backslash_escape(bsesc);
|
||||||
}
|
}
|
||||||
|
if let Some(ealtre) = $options.ealtre {
|
||||||
|
builder.empty_alternates(ealtre);
|
||||||
|
}
|
||||||
let pat = builder.build().unwrap();
|
let pat = builder.build().unwrap();
|
||||||
assert_eq!($expect, pat.$which());
|
assert_eq!($expect, pat.$which());
|
||||||
}
|
}
|
||||||
|
@@ -498,13 +498,23 @@ impl GlobSetBuilder {
|
|||||||
/// Constructing candidates has a very small cost associated with it, so
|
/// Constructing candidates has a very small cost associated with it, so
|
||||||
/// callers may find it beneficial to amortize that cost when matching a single
|
/// callers may find it beneficial to amortize that cost when matching a single
|
||||||
/// path against multiple globs or sets of globs.
|
/// path against multiple globs or sets of globs.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone)]
|
||||||
pub struct Candidate<'a> {
|
pub struct Candidate<'a> {
|
||||||
path: Cow<'a, [u8]>,
|
path: Cow<'a, [u8]>,
|
||||||
basename: Cow<'a, [u8]>,
|
basename: Cow<'a, [u8]>,
|
||||||
ext: Cow<'a, [u8]>,
|
ext: Cow<'a, [u8]>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl<'a> std::fmt::Debug for Candidate<'a> {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
|
f.debug_struct("Candidate")
|
||||||
|
.field("path", &self.path.as_bstr())
|
||||||
|
.field("basename", &self.basename.as_bstr())
|
||||||
|
.field("ext", &self.ext.as_bstr())
|
||||||
|
.finish()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> Candidate<'a> {
|
impl<'a> Candidate<'a> {
|
||||||
/// Create a new candidate for matching from the given path.
|
/// Create a new candidate for matching from the given path.
|
||||||
pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
|
pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
|
||||||
@@ -818,7 +828,7 @@ impl MultiStrategyBuilder {
|
|||||||
|
|
||||||
fn prefix(self) -> PrefixStrategy {
|
fn prefix(self) -> PrefixStrategy {
|
||||||
PrefixStrategy {
|
PrefixStrategy {
|
||||||
matcher: AhoCorasick::new_auto_configured(&self.literals),
|
matcher: AhoCorasick::new(&self.literals).unwrap(),
|
||||||
map: self.map,
|
map: self.map,
|
||||||
longest: self.longest,
|
longest: self.longest,
|
||||||
}
|
}
|
||||||
@@ -826,7 +836,7 @@ impl MultiStrategyBuilder {
|
|||||||
|
|
||||||
fn suffix(self) -> SuffixStrategy {
|
fn suffix(self) -> SuffixStrategy {
|
||||||
SuffixStrategy {
|
SuffixStrategy {
|
||||||
matcher: AhoCorasick::new_auto_configured(&self.literals),
|
matcher: AhoCorasick::new(&self.literals).unwrap(),
|
||||||
map: self.map,
|
map: self.map,
|
||||||
longest: self.longest,
|
longest: self.longest,
|
||||||
}
|
}
|
||||||
@@ -870,6 +880,29 @@ impl RequiredExtensionStrategyBuilder {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Escape meta-characters within the given glob pattern.
|
||||||
|
///
|
||||||
|
/// The escaping works by surrounding meta-characters with brackets. For
|
||||||
|
/// example, `*` becomes `[*]`.
|
||||||
|
pub fn escape(s: &str) -> String {
|
||||||
|
let mut escaped = String::with_capacity(s.len());
|
||||||
|
for c in s.chars() {
|
||||||
|
match c {
|
||||||
|
// note that ! does not need escaping because it is only special
|
||||||
|
// inside brackets
|
||||||
|
'?' | '*' | '[' | ']' => {
|
||||||
|
escaped.push('[');
|
||||||
|
escaped.push(c);
|
||||||
|
escaped.push(']');
|
||||||
|
}
|
||||||
|
c => {
|
||||||
|
escaped.push(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
escaped
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use super::{GlobSet, GlobSetBuilder};
|
use super::{GlobSet, GlobSetBuilder};
|
||||||
@@ -909,4 +942,16 @@ mod tests {
|
|||||||
assert!(!set.is_match(""));
|
assert!(!set.is_match(""));
|
||||||
assert!(!set.is_match("a"));
|
assert!(!set.is_match("a"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn escape() {
|
||||||
|
use super::escape;
|
||||||
|
assert_eq!("foo", escape("foo"));
|
||||||
|
assert_eq!("foo[*]", escape("foo*"));
|
||||||
|
assert_eq!("[[][]]", escape("[]"));
|
||||||
|
assert_eq!("[*][?]", escape("*?"));
|
||||||
|
assert_eq!("src/[*][*]/[*].rs", escape("src/**/*.rs"));
|
||||||
|
assert_eq!("bar[[]ab[]]baz", escape("bar[ab]baz"));
|
||||||
|
assert_eq!("bar[[]!![]]!baz", escape("bar[!!]!baz"));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@@ -27,7 +27,7 @@ pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
|
|||||||
///
|
///
|
||||||
/// Note that this does NOT match the semantics of std::path::Path::extension.
|
/// Note that this does NOT match the semantics of std::path::Path::extension.
|
||||||
/// Namely, the extension includes the `.` and matching is otherwise more
|
/// Namely, the extension includes the `.` and matching is otherwise more
|
||||||
/// liberal. Specifically, the extenion is:
|
/// liberal. Specifically, the extension is:
|
||||||
///
|
///
|
||||||
/// * None, if the file name given is empty;
|
/// * None, if the file name given is empty;
|
||||||
/// * None, if there is no embedded `.`;
|
/// * None, if there is no embedded `.`;
|
||||||
|
@@ -1,5 +1,7 @@
|
|||||||
use serde::de::Error;
|
use serde::{
|
||||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
de::{Error, Visitor},
|
||||||
|
{Deserialize, Deserializer, Serialize, Serializer},
|
||||||
|
};
|
||||||
|
|
||||||
use crate::Glob;
|
use crate::Glob;
|
||||||
|
|
||||||
@@ -12,18 +14,66 @@ impl Serialize for Glob {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct GlobVisitor;
|
||||||
|
|
||||||
|
impl<'a> Visitor<'a> for GlobVisitor {
|
||||||
|
type Value = Glob;
|
||||||
|
|
||||||
|
fn expecting(
|
||||||
|
&self,
|
||||||
|
formatter: &mut std::fmt::Formatter,
|
||||||
|
) -> std::fmt::Result {
|
||||||
|
formatter.write_str("a glob pattern")
|
||||||
|
}
|
||||||
|
|
||||||
|
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
|
||||||
|
where
|
||||||
|
E: Error,
|
||||||
|
{
|
||||||
|
Glob::new(v).map_err(serde::de::Error::custom)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<'de> Deserialize<'de> for Glob {
|
impl<'de> Deserialize<'de> for Glob {
|
||||||
fn deserialize<D: Deserializer<'de>>(
|
fn deserialize<D: Deserializer<'de>>(
|
||||||
deserializer: D,
|
deserializer: D,
|
||||||
) -> Result<Self, D::Error> {
|
) -> Result<Self, D::Error> {
|
||||||
let glob = <&str as Deserialize>::deserialize(deserializer)?;
|
deserializer.deserialize_str(GlobVisitor)
|
||||||
Glob::new(glob).map_err(D::Error::custom)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use Glob;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use crate::Glob;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn glob_deserialize_borrowed() {
|
||||||
|
let string = r#"{"markdown": "*.md"}"#;
|
||||||
|
|
||||||
|
let map: HashMap<String, Glob> =
|
||||||
|
serde_json::from_str(&string).unwrap();
|
||||||
|
assert_eq!(map["markdown"], Glob::new("*.md").unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn glob_deserialize_owned() {
|
||||||
|
let string = r#"{"markdown": "*.md"}"#;
|
||||||
|
|
||||||
|
let v: serde_json::Value = serde_json::from_str(&string).unwrap();
|
||||||
|
let map: HashMap<String, Glob> = serde_json::from_value(v).unwrap();
|
||||||
|
assert_eq!(map["markdown"], Glob::new("*.md").unwrap());
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn glob_deserialize_error() {
|
||||||
|
let string = r#"{"error": "["}"#;
|
||||||
|
|
||||||
|
let map = serde_json::from_str::<HashMap<String, Glob>>(&string);
|
||||||
|
|
||||||
|
assert!(map.is_err());
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn glob_json_works() {
|
fn glob_json_works() {
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "grep"
|
name = "grep"
|
||||||
version = "0.2.10" #:version
|
version = "0.2.12" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
Fast line oriented regex searching as a library.
|
Fast line oriented regex searching as a library.
|
||||||
@@ -17,9 +17,9 @@ edition = "2018"
|
|||||||
grep-cli = { version = "0.1.7", path = "../cli" }
|
grep-cli = { version = "0.1.7", path = "../cli" }
|
||||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||||
grep-pcre2 = { version = "0.1.6", path = "../pcre2", optional = true }
|
grep-pcre2 = { version = "0.1.6", path = "../pcre2", optional = true }
|
||||||
grep-printer = { version = "0.1.6", path = "../printer" }
|
grep-printer = { version = "0.1.7", path = "../printer" }
|
||||||
grep-regex = { version = "0.1.11", path = "../regex" }
|
grep-regex = { version = "0.1.11", path = "../regex" }
|
||||||
grep-searcher = { version = "0.1.10", path = "../searcher" }
|
grep-searcher = { version = "0.1.11", path = "../searcher" }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
termcolor = "1.0.4"
|
termcolor = "1.0.4"
|
||||||
|
@@ -12,8 +12,6 @@ are sparse.
|
|||||||
A cookbook and a guide are planned.
|
A cookbook and a guide are planned.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#![deny(missing_docs)]
|
|
||||||
|
|
||||||
pub extern crate grep_cli as cli;
|
pub extern crate grep_cli as cli;
|
||||||
pub extern crate grep_matcher as matcher;
|
pub extern crate grep_matcher as matcher;
|
||||||
#[cfg(feature = "pcre2")]
|
#[cfg(feature = "pcre2")]
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "ignore"
|
name = "ignore"
|
||||||
version = "0.4.19" #:version
|
version = "0.4.20" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
A fast library for efficiently matching ignore files such as `.gitignore`
|
A fast library for efficiently matching ignore files such as `.gitignore`
|
||||||
@@ -22,8 +22,8 @@ bench = false
|
|||||||
globset = { version = "0.4.10", path = "../globset" }
|
globset = { version = "0.4.10", path = "../globset" }
|
||||||
lazy_static = "1.1"
|
lazy_static = "1.1"
|
||||||
log = "0.4.5"
|
log = "0.4.5"
|
||||||
memchr = "2.1"
|
memchr = "2.5"
|
||||||
regex = "1.1"
|
regex = { version = "1.9.0", default-features = false, features = ["perf", "std", "unicode-gencat"] }
|
||||||
same-file = "1.0.4"
|
same-file = "1.0.4"
|
||||||
thread_local = "1"
|
thread_local = "1"
|
||||||
walkdir = "2.2.7"
|
walkdir = "2.2.7"
|
||||||
|
@@ -9,104 +9,113 @@
|
|||||||
/// Please try to keep this list sorted lexicographically and wrapped to 79
|
/// Please try to keep this list sorted lexicographically and wrapped to 79
|
||||||
/// columns (inclusive).
|
/// columns (inclusive).
|
||||||
#[rustfmt::skip]
|
#[rustfmt::skip]
|
||||||
pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
pub const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
|
||||||
("agda", &["*.agda", "*.lagda"]),
|
(&["ada"], &["*.adb", "*.ads"]),
|
||||||
("aidl", &["*.aidl"]),
|
(&["agda"], &["*.agda", "*.lagda"]),
|
||||||
("amake", &["*.mk", "*.bp"]),
|
(&["aidl"], &["*.aidl"]),
|
||||||
("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]),
|
(&["alire"], &["alire.toml"]),
|
||||||
("asm", &["*.asm", "*.s", "*.S"]),
|
(&["amake"], &["*.mk", "*.bp"]),
|
||||||
("asp", &[
|
(&["asciidoc"], &["*.adoc", "*.asc", "*.asciidoc"]),
|
||||||
|
(&["asm"], &["*.asm", "*.s", "*.S"]),
|
||||||
|
(&["asp"], &[
|
||||||
"*.aspx", "*.aspx.cs", "*.aspx.vb", "*.ascx", "*.ascx.cs",
|
"*.aspx", "*.aspx.cs", "*.aspx.vb", "*.ascx", "*.ascx.cs",
|
||||||
"*.ascx.vb", "*.asp"
|
"*.ascx.vb", "*.asp"
|
||||||
]),
|
]),
|
||||||
("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]),
|
(&["ats"], &["*.ats", "*.dats", "*.sats", "*.hats"]),
|
||||||
("avro", &["*.avdl", "*.avpr", "*.avsc"]),
|
(&["avro"], &["*.avdl", "*.avpr", "*.avsc"]),
|
||||||
("awk", &["*.awk"]),
|
(&["awk"], &["*.awk"]),
|
||||||
("bazel", &[
|
(&["bat", "batch"], &["*.bat"]),
|
||||||
|
(&["bazel"], &[
|
||||||
"*.bazel", "*.bzl", "*.BUILD", "*.bazelrc", "BUILD", "MODULE.bazel",
|
"*.bazel", "*.bzl", "*.BUILD", "*.bazelrc", "BUILD", "MODULE.bazel",
|
||||||
"WORKSPACE", "WORKSPACE.bazel",
|
"WORKSPACE", "WORKSPACE.bazel",
|
||||||
]),
|
]),
|
||||||
("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
|
(&["bitbake"], &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
|
||||||
("brotli", &["*.br"]),
|
(&["brotli"], &["*.br"]),
|
||||||
("buildstream", &["*.bst"]),
|
(&["buildstream"], &["*.bst"]),
|
||||||
("bzip2", &["*.bz2", "*.tbz2"]),
|
(&["bzip2"], &["*.bz2", "*.tbz2"]),
|
||||||
("c", &["*.[chH]", "*.[chH].in", "*.cats"]),
|
(&["c"], &["*.[chH]", "*.[chH].in", "*.cats"]),
|
||||||
("cabal", &["*.cabal"]),
|
(&["cabal"], &["*.cabal"]),
|
||||||
("candid", &["*.did"]),
|
(&["candid"], &["*.did"]),
|
||||||
("carp", &["*.carp"]),
|
(&["carp"], &["*.carp"]),
|
||||||
("cbor", &["*.cbor"]),
|
(&["cbor"], &["*.cbor"]),
|
||||||
("ceylon", &["*.ceylon"]),
|
(&["ceylon"], &["*.ceylon"]),
|
||||||
("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
|
(&["clojure"], &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
|
||||||
("cmake", &["*.cmake", "CMakeLists.txt"]),
|
(&["cmake"], &["*.cmake", "CMakeLists.txt"]),
|
||||||
("coffeescript", &["*.coffee"]),
|
(&["cmd"], &["*.bat", "*.cmd"]),
|
||||||
("config", &["*.cfg", "*.conf", "*.config", "*.ini"]),
|
(&["cml"], &["*.cml"]),
|
||||||
("coq", &["*.v"]),
|
(&["coffeescript"], &["*.coffee"]),
|
||||||
("cpp", &[
|
(&["config"], &["*.cfg", "*.conf", "*.config", "*.ini"]),
|
||||||
|
(&["coq"], &["*.v"]),
|
||||||
|
(&["cpp"], &[
|
||||||
"*.[ChH]", "*.cc", "*.[ch]pp", "*.[ch]xx", "*.hh", "*.inl",
|
"*.[ChH]", "*.cc", "*.[ch]pp", "*.[ch]xx", "*.hh", "*.inl",
|
||||||
"*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in",
|
"*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in",
|
||||||
]),
|
]),
|
||||||
("creole", &["*.creole"]),
|
(&["creole"], &["*.creole"]),
|
||||||
("crystal", &["Projectfile", "*.cr", "*.ecr", "shard.yml"]),
|
(&["crystal"], &["Projectfile", "*.cr", "*.ecr", "shard.yml"]),
|
||||||
("cs", &["*.cs"]),
|
(&["cs"], &["*.cs"]),
|
||||||
("csharp", &["*.cs"]),
|
(&["csharp"], &["*.cs"]),
|
||||||
("cshtml", &["*.cshtml"]),
|
(&["cshtml"], &["*.cshtml"]),
|
||||||
("css", &["*.css", "*.scss"]),
|
(&["css"], &["*.css", "*.scss"]),
|
||||||
("csv", &["*.csv"]),
|
(&["csv"], &["*.csv"]),
|
||||||
("cuda", &["*.cu", "*.cuh"]),
|
(&["cuda"], &["*.cu", "*.cuh"]),
|
||||||
("cython", &["*.pyx", "*.pxi", "*.pxd"]),
|
(&["cython"], &["*.pyx", "*.pxi", "*.pxd"]),
|
||||||
("d", &["*.d"]),
|
(&["d"], &["*.d"]),
|
||||||
("dart", &["*.dart"]),
|
(&["dart"], &["*.dart"]),
|
||||||
("devicetree", &["*.dts", "*.dtsi"]),
|
(&["devicetree"], &["*.dts", "*.dtsi"]),
|
||||||
("dhall", &["*.dhall"]),
|
(&["dhall"], &["*.dhall"]),
|
||||||
("diff", &["*.patch", "*.diff"]),
|
(&["diff"], &["*.patch", "*.diff"]),
|
||||||
("docker", &["*Dockerfile*"]),
|
(&["dita"], &["*.dita", "*.ditamap", "*.ditaval"]),
|
||||||
("dts", &["*.dts", "*.dtsi"]),
|
(&["docker"], &["*Dockerfile*"]),
|
||||||
("dvc", &["Dvcfile", "*.dvc"]),
|
(&["dockercompose"], &["docker-compose.yml", "docker-compose.*.yml"]),
|
||||||
("ebuild", &["*.ebuild"]),
|
(&["dts"], &["*.dts", "*.dtsi"]),
|
||||||
("edn", &["*.edn"]),
|
(&["dvc"], &["Dvcfile", "*.dvc"]),
|
||||||
("elisp", &["*.el"]),
|
(&["ebuild"], &["*.ebuild", "*.eclass"]),
|
||||||
("elixir", &["*.ex", "*.eex", "*.exs"]),
|
(&["edn"], &["*.edn"]),
|
||||||
("elm", &["*.elm"]),
|
(&["elisp"], &["*.el"]),
|
||||||
("erb", &["*.erb"]),
|
(&["elixir"], &["*.ex", "*.eex", "*.exs", "*.heex", "*.leex", "*.livemd"]),
|
||||||
("erlang", &["*.erl", "*.hrl"]),
|
(&["elm"], &["*.elm"]),
|
||||||
("fennel", &["*.fnl"]),
|
(&["erb"], &["*.erb"]),
|
||||||
("fidl", &["*.fidl"]),
|
(&["erlang"], &["*.erl", "*.hrl"]),
|
||||||
("fish", &["*.fish"]),
|
(&["fennel"], &["*.fnl"]),
|
||||||
("flatbuffers", &["*.fbs"]),
|
(&["fidl"], &["*.fidl"]),
|
||||||
("fortran", &[
|
(&["fish"], &["*.fish"]),
|
||||||
|
(&["flatbuffers"], &["*.fbs"]),
|
||||||
|
(&["fortran"], &[
|
||||||
"*.f", "*.F", "*.f77", "*.F77", "*.pfo",
|
"*.f", "*.F", "*.f77", "*.F77", "*.pfo",
|
||||||
"*.f90", "*.F90", "*.f95", "*.F95",
|
"*.f90", "*.F90", "*.f95", "*.F95",
|
||||||
]),
|
]),
|
||||||
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
|
(&["fsharp"], &["*.fs", "*.fsx", "*.fsi"]),
|
||||||
("fut", &["*.fut"]),
|
(&["fut"], &["*.fut"]),
|
||||||
("gap", &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]),
|
(&["gap"], &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]),
|
||||||
("gn", &["*.gn", "*.gni"]),
|
(&["gn"], &["*.gn", "*.gni"]),
|
||||||
("go", &["*.go"]),
|
(&["go"], &["*.go"]),
|
||||||
("gradle", &["*.gradle"]),
|
(&["gprbuild"], &["*.gpr"]),
|
||||||
("groovy", &["*.groovy", "*.gradle"]),
|
(&["gradle"], &["*.gradle"]),
|
||||||
("gzip", &["*.gz", "*.tgz"]),
|
(&["graphql"], &["*.graphql", "*.graphqls"]),
|
||||||
("h", &["*.h", "*.hh", "*.hpp"]),
|
(&["groovy"], &["*.groovy", "*.gradle"]),
|
||||||
("haml", &["*.haml"]),
|
(&["gzip"], &["*.gz", "*.tgz"]),
|
||||||
("hare", &["*.ha"]),
|
(&["h"], &["*.h", "*.hh", "*.hpp"]),
|
||||||
("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]),
|
(&["haml"], &["*.haml"]),
|
||||||
("hbs", &["*.hbs"]),
|
(&["hare"], &["*.ha"]),
|
||||||
("hs", &["*.hs", "*.lhs"]),
|
(&["haskell"], &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]),
|
||||||
("html", &["*.htm", "*.html", "*.ejs"]),
|
(&["hbs"], &["*.hbs"]),
|
||||||
("hy", &["*.hy"]),
|
(&["hs"], &["*.hs", "*.lhs"]),
|
||||||
("idris", &["*.idr", "*.lidr"]),
|
(&["html"], &["*.htm", "*.html", "*.ejs"]),
|
||||||
("janet", &["*.janet"]),
|
(&["hy"], &["*.hy"]),
|
||||||
("java", &["*.java", "*.jsp", "*.jspx", "*.properties"]),
|
(&["idris"], &["*.idr", "*.lidr"]),
|
||||||
("jinja", &["*.j2", "*.jinja", "*.jinja2"]),
|
(&["janet"], &["*.janet"]),
|
||||||
("jl", &["*.jl"]),
|
(&["java"], &["*.java", "*.jsp", "*.jspx", "*.properties"]),
|
||||||
("js", &["*.js", "*.jsx", "*.vue", "*.cjs", "*.mjs"]),
|
(&["jinja"], &["*.j2", "*.jinja", "*.jinja2"]),
|
||||||
("json", &["*.json", "composer.lock"]),
|
(&["jl"], &["*.jl"]),
|
||||||
("jsonl", &["*.jsonl"]),
|
(&["js"], &["*.js", "*.jsx", "*.vue", "*.cjs", "*.mjs"]),
|
||||||
("julia", &["*.jl"]),
|
(&["json"], &["*.json", "composer.lock"]),
|
||||||
("jupyter", &["*.ipynb", "*.jpynb"]),
|
(&["jsonl"], &["*.jsonl"]),
|
||||||
("k", &["*.k"]),
|
(&["julia"], &["*.jl"]),
|
||||||
("kotlin", &["*.kt", "*.kts"]),
|
(&["jupyter"], &["*.ipynb", "*.jpynb"]),
|
||||||
("less", &["*.less"]),
|
(&["k"], &["*.k"]),
|
||||||
("license", &[
|
(&["kotlin"], &["*.kt", "*.kts"]),
|
||||||
|
(&["less"], &["*.less"]),
|
||||||
|
(&["license"], &[
|
||||||
// General
|
// General
|
||||||
"COPYING", "COPYING[.-]*",
|
"COPYING", "COPYING[.-]*",
|
||||||
"COPYRIGHT", "COPYRIGHT[.-]*",
|
"COPYRIGHT", "COPYRIGHT[.-]*",
|
||||||
@@ -133,79 +142,91 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
|||||||
"MPL-*[0-9]*",
|
"MPL-*[0-9]*",
|
||||||
"OFL-*[0-9]*",
|
"OFL-*[0-9]*",
|
||||||
]),
|
]),
|
||||||
("lilypond", &["*.ly", "*.ily"]),
|
(&["lilypond"], &["*.ly", "*.ily"]),
|
||||||
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
(&["lisp"], &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
||||||
("lock", &["*.lock", "package-lock.json"]),
|
(&["lock"], &["*.lock", "package-lock.json"]),
|
||||||
("log", &["*.log"]),
|
(&["log"], &["*.log"]),
|
||||||
("lua", &["*.lua"]),
|
(&["lua"], &["*.lua"]),
|
||||||
("lz4", &["*.lz4"]),
|
(&["lz4"], &["*.lz4"]),
|
||||||
("lzma", &["*.lzma"]),
|
(&["lzma"], &["*.lzma"]),
|
||||||
("m4", &["*.ac", "*.m4"]),
|
(&["m4"], &["*.ac", "*.m4"]),
|
||||||
("make", &[
|
(&["make"], &[
|
||||||
"[Gg][Nn][Uu]makefile", "[Mm]akefile",
|
"[Gg][Nn][Uu]makefile", "[Mm]akefile",
|
||||||
"[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am",
|
"[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am",
|
||||||
"[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in",
|
"[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in",
|
||||||
"*.mk", "*.mak"
|
"*.mk", "*.mak"
|
||||||
]),
|
]),
|
||||||
("mako", &["*.mako", "*.mao"]),
|
(&["mako"], &["*.mako", "*.mao"]),
|
||||||
("man", &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]),
|
(&["man"], &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]),
|
||||||
("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkd", "*.mkdn"]),
|
(&["markdown", "md"], &[
|
||||||
("matlab", &["*.m"]),
|
"*.markdown",
|
||||||
("md", &["*.markdown", "*.md", "*.mdown", "*.mkd", "*.mkdn"]),
|
"*.md",
|
||||||
("meson", &["meson.build", "meson_options.txt"]),
|
"*.mdown",
|
||||||
("minified", &["*.min.html", "*.min.css", "*.min.js"]),
|
"*.mdwn",
|
||||||
("mint", &["*.mint"]),
|
"*.mkd",
|
||||||
("mk", &["mkfile"]),
|
"*.mkdn",
|
||||||
("ml", &["*.ml"]),
|
"*.mdx",
|
||||||
("motoko", &["*.mo"]),
|
|
||||||
("msbuild", &[
|
|
||||||
"*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets",
|
|
||||||
]),
|
]),
|
||||||
("nim", &["*.nim", "*.nimf", "*.nimble", "*.nims"]),
|
(&["matlab"], &["*.m"]),
|
||||||
("nix", &["*.nix"]),
|
(&["meson"], &["meson.build", "meson_options.txt"]),
|
||||||
("objc", &["*.h", "*.m"]),
|
(&["minified"], &["*.min.html", "*.min.css", "*.min.js"]),
|
||||||
("objcpp", &["*.h", "*.mm"]),
|
(&["mint"], &["*.mint"]),
|
||||||
("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
|
(&["mk"], &["mkfile"]),
|
||||||
("org", &["*.org", "*.org_archive"]),
|
(&["ml"], &["*.ml"]),
|
||||||
("pants", &["BUILD"]),
|
(&["motoko"], &["*.mo"]),
|
||||||
("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]),
|
(&["msbuild"], &[
|
||||||
("pdf", &["*.pdf"]),
|
"*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets",
|
||||||
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]),
|
"*.sln",
|
||||||
("php", &[
|
]),
|
||||||
|
(&["nim"], &["*.nim", "*.nimf", "*.nimble", "*.nims"]),
|
||||||
|
(&["nix"], &["*.nix"]),
|
||||||
|
(&["objc"], &["*.h", "*.m"]),
|
||||||
|
(&["objcpp"], &["*.h", "*.mm"]),
|
||||||
|
(&["ocaml"], &["*.ml", "*.mli", "*.mll", "*.mly"]),
|
||||||
|
(&["org"], &["*.org", "*.org_archive"]),
|
||||||
|
(&["pants"], &["BUILD"]),
|
||||||
|
(&["pascal"], &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]),
|
||||||
|
(&["pdf"], &["*.pdf"]),
|
||||||
|
(&["perl"], &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]),
|
||||||
|
(&["php"], &[
|
||||||
// note that PHP 6 doesn't exist
|
// note that PHP 6 doesn't exist
|
||||||
// See: https://wiki.php.net/rfc/php6
|
// See: https://wiki.php.net/rfc/php6
|
||||||
"*.php", "*.php3", "*.php4", "*.php5", "*.php7", "*.php8",
|
"*.php", "*.php3", "*.php4", "*.php5", "*.php7", "*.php8",
|
||||||
"*.pht", "*.phtml"
|
"*.pht", "*.phtml"
|
||||||
]),
|
]),
|
||||||
("po", &["*.po"]),
|
(&["po"], &["*.po"]),
|
||||||
("pod", &["*.pod"]),
|
(&["pod"], &["*.pod"]),
|
||||||
("postscript", &["*.eps", "*.ps"]),
|
(&["postscript"], &["*.eps", "*.ps"]),
|
||||||
("protobuf", &["*.proto"]),
|
(&["protobuf"], &["*.proto"]),
|
||||||
("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]),
|
(&["ps"], &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]),
|
||||||
("puppet", &["*.epp", "*.erb", "*.pp", "*.rb"]),
|
(&["puppet"], &["*.epp", "*.erb", "*.pp", "*.rb"]),
|
||||||
("purs", &["*.purs"]),
|
(&["purs"], &["*.purs"]),
|
||||||
("py", &["*.py"]),
|
(&["py", "python"], &["*.py", "*.pyi"]),
|
||||||
("qmake", &["*.pro", "*.pri", "*.prf"]),
|
(&["qmake"], &["*.pro", "*.pri", "*.prf"]),
|
||||||
("qml", &["*.qml"]),
|
(&["qml"], &["*.qml"]),
|
||||||
("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
|
(&["r"], &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
|
||||||
("racket", &["*.rkt"]),
|
(&["racket"], &["*.rkt"]),
|
||||||
("rdoc", &["*.rdoc"]),
|
(&["raku"], &[
|
||||||
("readme", &["README*", "*README"]),
|
"*.raku", "*.rakumod", "*.rakudoc", "*.rakutest",
|
||||||
("reasonml", &["*.re", "*.rei"]),
|
"*.p6", "*.pl6", "*.pm6"
|
||||||
("red", &["*.r", "*.red", "*.reds"]),
|
]),
|
||||||
("rescript", &["*.res", "*.resi"]),
|
(&["rdoc"], &["*.rdoc"]),
|
||||||
("robot", &["*.robot"]),
|
(&["readme"], &["README*", "*README"]),
|
||||||
("rst", &["*.rst"]),
|
(&["reasonml"], &["*.re", "*.rei"]),
|
||||||
("ruby", &[
|
(&["red"], &["*.r", "*.red", "*.reds"]),
|
||||||
|
(&["rescript"], &["*.res", "*.resi"]),
|
||||||
|
(&["robot"], &["*.robot"]),
|
||||||
|
(&["rst"], &["*.rst"]),
|
||||||
|
(&["ruby"], &[
|
||||||
// Idiomatic files
|
// Idiomatic files
|
||||||
"config.ru", "Gemfile", ".irbrc", "Rakefile",
|
"config.ru", "Gemfile", ".irbrc", "Rakefile",
|
||||||
// Extensions
|
// Extensions
|
||||||
"*.gemspec", "*.rb", "*.rbw"
|
"*.gemspec", "*.rb", "*.rbw"
|
||||||
]),
|
]),
|
||||||
("rust", &["*.rs"]),
|
(&["rust"], &["*.rs"]),
|
||||||
("sass", &["*.sass", "*.scss"]),
|
(&["sass"], &["*.sass", "*.scss"]),
|
||||||
("scala", &["*.scala", "*.sbt"]),
|
(&["scala"], &["*.scala", "*.sbt"]),
|
||||||
("sh", &[
|
(&["sh"], &[
|
||||||
// Portable/misc. init files
|
// Portable/misc. init files
|
||||||
".login", ".logout", ".profile", "profile",
|
".login", ".logout", ".profile", "profile",
|
||||||
// bash-specific init files
|
// bash-specific init files
|
||||||
@@ -228,60 +249,66 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
|||||||
// Extensions
|
// Extensions
|
||||||
"*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh",
|
"*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh",
|
||||||
]),
|
]),
|
||||||
("slim", &["*.skim", "*.slim", "*.slime"]),
|
(&["slim"], &["*.skim", "*.slim", "*.slime"]),
|
||||||
("smarty", &["*.tpl"]),
|
(&["smarty"], &["*.tpl"]),
|
||||||
("sml", &["*.sml", "*.sig"]),
|
(&["sml"], &["*.sml", "*.sig"]),
|
||||||
("solidity", &["*.sol"]),
|
(&["solidity"], &["*.sol"]),
|
||||||
("soy", &["*.soy"]),
|
(&["soy"], &["*.soy"]),
|
||||||
("spark", &["*.spark"]),
|
(&["spark"], &["*.spark"]),
|
||||||
("spec", &["*.spec"]),
|
(&["spec"], &["*.spec"]),
|
||||||
("sql", &["*.sql", "*.psql"]),
|
(&["sql"], &["*.sql", "*.psql"]),
|
||||||
("stylus", &["*.styl"]),
|
(&["stylus"], &["*.styl"]),
|
||||||
("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
|
(&["sv"], &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
|
||||||
("svg", &["*.svg"]),
|
(&["svg"], &["*.svg"]),
|
||||||
("swift", &["*.swift"]),
|
(&["swift"], &["*.swift"]),
|
||||||
("swig", &["*.def", "*.i"]),
|
(&["swig"], &["*.def", "*.i"]),
|
||||||
("systemd", &[
|
(&["systemd"], &[
|
||||||
"*.automount", "*.conf", "*.device", "*.link", "*.mount", "*.path",
|
"*.automount", "*.conf", "*.device", "*.link", "*.mount", "*.path",
|
||||||
"*.scope", "*.service", "*.slice", "*.socket", "*.swap", "*.target",
|
"*.scope", "*.service", "*.slice", "*.socket", "*.swap", "*.target",
|
||||||
"*.timer",
|
"*.timer",
|
||||||
]),
|
]),
|
||||||
("taskpaper", &["*.taskpaper"]),
|
(&["taskpaper"], &["*.taskpaper"]),
|
||||||
("tcl", &["*.tcl"]),
|
(&["tcl"], &["*.tcl"]),
|
||||||
("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]),
|
(&["tex"], &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]),
|
||||||
("texinfo", &["*.texi"]),
|
(&["texinfo"], &["*.texi"]),
|
||||||
("textile", &["*.textile"]),
|
(&["textile"], &["*.textile"]),
|
||||||
("tf", &["*.tf"]),
|
(&["tf"], &[
|
||||||
("thrift", &["*.thrift"]),
|
"*.tf", "*.auto.tfvars", "terraform.tfvars", "*.tf.json",
|
||||||
("toml", &["*.toml", "Cargo.lock"]),
|
"*.auto.tfvars.json", "terraform.tfvars.json", "*.terraformrc",
|
||||||
("ts", &["*.ts", "*.tsx", "*.cts", "*.mts"]),
|
"terraform.rc", "*.tfrc", "*.terraform.lock.hcl",
|
||||||
("twig", &["*.twig"]),
|
]),
|
||||||
("txt", &["*.txt"]),
|
(&["thrift"], &["*.thrift"]),
|
||||||
("typoscript", &["*.typoscript", "*.ts"]),
|
(&["toml"], &["*.toml", "Cargo.lock"]),
|
||||||
("vala", &["*.vala"]),
|
(&["ts", "typescript"], &["*.ts", "*.tsx", "*.cts", "*.mts"]),
|
||||||
("vb", &["*.vb"]),
|
(&["twig"], &["*.twig"]),
|
||||||
("vcl", &["*.vcl"]),
|
(&["txt"], &["*.txt"]),
|
||||||
("verilog", &["*.v", "*.vh", "*.sv", "*.svh"]),
|
(&["typoscript"], &["*.typoscript", "*.ts"]),
|
||||||
("vhdl", &["*.vhd", "*.vhdl"]),
|
(&["usd"], &["*.usd", "*.usda", "*.usdc"]),
|
||||||
("vim", &[
|
(&["v"], &["*.v"]),
|
||||||
|
(&["vala"], &["*.vala"]),
|
||||||
|
(&["vb"], &["*.vb"]),
|
||||||
|
(&["vcl"], &["*.vcl"]),
|
||||||
|
(&["verilog"], &["*.v", "*.vh", "*.sv", "*.svh"]),
|
||||||
|
(&["vhdl"], &["*.vhd", "*.vhdl"]),
|
||||||
|
(&["vim"], &[
|
||||||
"*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc",
|
"*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc",
|
||||||
]),
|
]),
|
||||||
("vimscript", &[
|
(&["vimscript"], &[
|
||||||
"*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc",
|
"*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc",
|
||||||
]),
|
]),
|
||||||
("webidl", &["*.idl", "*.webidl", "*.widl"]),
|
(&["webidl"], &["*.idl", "*.webidl", "*.widl"]),
|
||||||
("wiki", &["*.mediawiki", "*.wiki"]),
|
(&["wiki"], &["*.mediawiki", "*.wiki"]),
|
||||||
("xml", &[
|
(&["xml"], &[
|
||||||
"*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb",
|
"*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb",
|
||||||
"*.rng", "*.sch", "*.xhtml",
|
"*.rng", "*.sch", "*.xhtml",
|
||||||
]),
|
]),
|
||||||
("xz", &["*.xz", "*.txz"]),
|
(&["xz"], &["*.xz", "*.txz"]),
|
||||||
("yacc", &["*.y"]),
|
(&["yacc"], &["*.y"]),
|
||||||
("yaml", &["*.yaml", "*.yml"]),
|
(&["yaml"], &["*.yaml", "*.yml"]),
|
||||||
("yang", &["*.yang"]),
|
(&["yang"], &["*.yang"]),
|
||||||
("z", &["*.Z"]),
|
(&["z"], &["*.Z"]),
|
||||||
("zig", &["*.zig"]),
|
(&["zig"], &["*.zig"]),
|
||||||
("zsh", &[
|
(&["zsh"], &[
|
||||||
".zshenv", "zshenv",
|
".zshenv", "zshenv",
|
||||||
".zlogin", "zlogin",
|
".zlogin", "zlogin",
|
||||||
".zlogout", "zlogout",
|
".zlogout", "zlogout",
|
||||||
@@ -289,7 +316,7 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
|||||||
".zshrc", "zshrc",
|
".zshrc", "zshrc",
|
||||||
"*.zsh",
|
"*.zsh",
|
||||||
]),
|
]),
|
||||||
("zstd", &["*.zst", "*.zstd"]),
|
(&["zstd"], &["*.zst", "*.zstd"]),
|
||||||
];
|
];
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@@ -298,10 +325,8 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn default_types_are_sorted() {
|
fn default_types_are_sorted() {
|
||||||
let mut names = DEFAULT_TYPES.iter().map(|(name, _exts)| name);
|
let mut names = DEFAULT_TYPES.iter().map(|(aliases, _)| aliases[0]);
|
||||||
|
|
||||||
let Some(mut previous_name) = names.next() else { return; };
|
let Some(mut previous_name) = names.next() else { return; };
|
||||||
|
|
||||||
for name in names {
|
for name in names {
|
||||||
assert!(
|
assert!(
|
||||||
name > previous_name,
|
name > previous_name,
|
||||||
@@ -309,7 +334,6 @@ mod tests {
|
|||||||
name,
|
name,
|
||||||
previous_name
|
previous_name
|
||||||
);
|
);
|
||||||
|
|
||||||
previous_name = name;
|
previous_name = name;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -533,7 +533,7 @@ impl GitignoreBuilder {
|
|||||||
/// Return the file path of the current environment's global gitignore file.
|
/// Return the file path of the current environment's global gitignore file.
|
||||||
///
|
///
|
||||||
/// Note that the file path returned may not exist.
|
/// Note that the file path returned may not exist.
|
||||||
fn gitconfig_excludes_path() -> Option<PathBuf> {
|
pub fn gitconfig_excludes_path() -> Option<PathBuf> {
|
||||||
// git supports $HOME/.gitconfig and $XDG_CONFIG_HOME/git/config. Notably,
|
// git supports $HOME/.gitconfig and $XDG_CONFIG_HOME/git/config. Notably,
|
||||||
// both can be active at the same time, where $HOME/.gitconfig takes
|
// both can be active at the same time, where $HOME/.gitconfig takes
|
||||||
// precedent. So if $HOME/.gitconfig defines a `core.excludesFile`, then
|
// precedent. So if $HOME/.gitconfig defines a `core.excludesFile`, then
|
||||||
@@ -596,8 +596,13 @@ fn parse_excludes_file(data: &[u8]) -> Option<PathBuf> {
|
|||||||
// probably works in more circumstances. I guess we would ideally have
|
// probably works in more circumstances. I guess we would ideally have
|
||||||
// a full INI parser. Yuck.
|
// a full INI parser. Yuck.
|
||||||
lazy_static::lazy_static! {
|
lazy_static::lazy_static! {
|
||||||
static ref RE: Regex =
|
static ref RE: Regex = Regex::new(
|
||||||
Regex::new(r"(?im)^\s*excludesfile\s*=\s*(.+)\s*$").unwrap();
|
r"(?xim-u)
|
||||||
|
^[[:space:]]*excludesfile[[:space:]]*
|
||||||
|
=
|
||||||
|
[[:space:]]*(.+)[[:space:]]*$
|
||||||
|
"
|
||||||
|
).unwrap();
|
||||||
};
|
};
|
||||||
let caps = match RE.captures(data) {
|
let caps = match RE.captures(data) {
|
||||||
None => return None,
|
None => return None,
|
||||||
|
@@ -106,6 +106,7 @@ impl Override {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Builds a matcher for a set of glob overrides.
|
/// Builds a matcher for a set of glob overrides.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
pub struct OverrideBuilder {
|
pub struct OverrideBuilder {
|
||||||
builder: GitignoreBuilder,
|
builder: GitignoreBuilder,
|
||||||
}
|
}
|
||||||
|
@@ -488,9 +488,11 @@ impl TypesBuilder {
|
|||||||
/// Add a set of default file type definitions.
|
/// Add a set of default file type definitions.
|
||||||
pub fn add_defaults(&mut self) -> &mut TypesBuilder {
|
pub fn add_defaults(&mut self) -> &mut TypesBuilder {
|
||||||
static MSG: &'static str = "adding a default type should never fail";
|
static MSG: &'static str = "adding a default type should never fail";
|
||||||
for &(name, exts) in DEFAULT_TYPES {
|
for &(names, exts) in DEFAULT_TYPES {
|
||||||
for ext in exts {
|
for name in names {
|
||||||
self.add(name, ext).expect(MSG);
|
for ext in exts {
|
||||||
|
self.add(name, ext).expect(MSG);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self
|
self
|
||||||
@@ -537,6 +539,8 @@ mod tests {
|
|||||||
"html:*.htm",
|
"html:*.htm",
|
||||||
"rust:*.rs",
|
"rust:*.rs",
|
||||||
"js:*.js",
|
"js:*.js",
|
||||||
|
"py:*.py",
|
||||||
|
"python:*.py",
|
||||||
"foo:*.{rs,foo}",
|
"foo:*.{rs,foo}",
|
||||||
"combo:include:html,rust",
|
"combo:include:html,rust",
|
||||||
]
|
]
|
||||||
@@ -551,6 +555,8 @@ mod tests {
|
|||||||
matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo");
|
matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo");
|
||||||
matched!(match8, types(), vec!["combo"], vec![], "index.html");
|
matched!(match8, types(), vec!["combo"], vec![], "index.html");
|
||||||
matched!(match9, types(), vec!["combo"], vec![], "lib.rs");
|
matched!(match9, types(), vec!["combo"], vec![], "lib.rs");
|
||||||
|
matched!(match10, types(), vec!["py"], vec![], "main.py");
|
||||||
|
matched!(match11, types(), vec!["python"], vec![], "main.py");
|
||||||
|
|
||||||
matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
|
matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
|
||||||
matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
|
matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
|
||||||
@@ -558,6 +564,8 @@ mod tests {
|
|||||||
matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs");
|
matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs");
|
||||||
matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo");
|
matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo");
|
||||||
matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js");
|
matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js");
|
||||||
|
matched!(not, matchnot7, types(), vec!["py"], vec![], "index.html");
|
||||||
|
matched!(not, matchnot8, types(), vec!["python"], vec![], "doc.md");
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_invalid_defs() {
|
fn test_invalid_defs() {
|
||||||
@@ -569,7 +577,7 @@ mod tests {
|
|||||||
let original_defs = btypes.definitions();
|
let original_defs = btypes.definitions();
|
||||||
let bad_defs = vec![
|
let bad_defs = vec![
|
||||||
// Reference to type that does not exist
|
// Reference to type that does not exist
|
||||||
"combo:include:html,python",
|
"combo:include:html,qwerty",
|
||||||
// Bad format
|
// Bad format
|
||||||
"combo:foobar:html,rust",
|
"combo:foobar:html,rust",
|
||||||
"",
|
"",
|
||||||
|
@@ -1681,7 +1681,7 @@ impl<'s> Worker<'s> {
|
|||||||
stack.pop()
|
stack.pop()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Signal that work has been received.
|
/// Signal that work has been finished.
|
||||||
fn work_done(&self) {
|
fn work_done(&self) {
|
||||||
self.num_pending.fetch_sub(1, Ordering::SeqCst);
|
self.num_pending.fetch_sub(1, Ordering::SeqCst);
|
||||||
}
|
}
|
||||||
|
@@ -15,4 +15,5 @@ edition = "2018"
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||||
pcre2 = "0.2.3"
|
log = "0.4.19"
|
||||||
|
pcre2 = "0.2.4"
|
||||||
|
@@ -11,6 +11,8 @@ pub struct RegexMatcherBuilder {
|
|||||||
builder: RegexBuilder,
|
builder: RegexBuilder,
|
||||||
case_smart: bool,
|
case_smart: bool,
|
||||||
word: bool,
|
word: bool,
|
||||||
|
fixed_strings: bool,
|
||||||
|
whole_line: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RegexMatcherBuilder {
|
impl RegexMatcherBuilder {
|
||||||
@@ -20,6 +22,8 @@ impl RegexMatcherBuilder {
|
|||||||
builder: RegexBuilder::new(),
|
builder: RegexBuilder::new(),
|
||||||
case_smart: false,
|
case_smart: false,
|
||||||
word: false,
|
word: false,
|
||||||
|
fixed_strings: false,
|
||||||
|
whole_line: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -29,17 +33,40 @@ impl RegexMatcherBuilder {
|
|||||||
/// If there was a problem compiling the pattern, then an error is
|
/// If there was a problem compiling the pattern, then an error is
|
||||||
/// returned.
|
/// returned.
|
||||||
pub fn build(&self, pattern: &str) -> Result<RegexMatcher, Error> {
|
pub fn build(&self, pattern: &str) -> Result<RegexMatcher, Error> {
|
||||||
|
self.build_many(&[pattern])
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compile all of the given patterns into a single regex that matches when
|
||||||
|
/// at least one of the patterns matches.
|
||||||
|
///
|
||||||
|
/// If there was a problem building the regex, then an error is returned.
|
||||||
|
pub fn build_many<P: AsRef<str>>(
|
||||||
|
&self,
|
||||||
|
patterns: &[P],
|
||||||
|
) -> Result<RegexMatcher, Error> {
|
||||||
let mut builder = self.builder.clone();
|
let mut builder = self.builder.clone();
|
||||||
if self.case_smart && !has_uppercase_literal(pattern) {
|
let mut pats = Vec::with_capacity(patterns.len());
|
||||||
|
for p in patterns.iter() {
|
||||||
|
pats.push(if self.fixed_strings {
|
||||||
|
format!("(?:{})", pcre2::escape(p.as_ref()))
|
||||||
|
} else {
|
||||||
|
format!("(?:{})", p.as_ref())
|
||||||
|
});
|
||||||
|
}
|
||||||
|
let mut singlepat = pats.join("|");
|
||||||
|
if self.case_smart && !has_uppercase_literal(&singlepat) {
|
||||||
builder.caseless(true);
|
builder.caseless(true);
|
||||||
}
|
}
|
||||||
let res = if self.word {
|
if self.whole_line {
|
||||||
let pattern = format!(r"(?<!\w)(?:{})(?!\w)", pattern);
|
singlepat = format!(r"(?m:^)(?:{})(?m:$)", singlepat);
|
||||||
builder.build(&pattern)
|
} else if self.word {
|
||||||
} else {
|
// We make this option exclusive with whole_line because when
|
||||||
builder.build(pattern)
|
// whole_line is enabled, all matches necessary fall on word
|
||||||
};
|
// boundaries. So this extra goop is strictly redundant.
|
||||||
res.map_err(Error::regex).map(|regex| {
|
singlepat = format!(r"(?<!\w)(?:{})(?!\w)", singlepat);
|
||||||
|
}
|
||||||
|
log::trace!("final regex: {:?}", singlepat);
|
||||||
|
builder.build(&singlepat).map_err(Error::regex).map(|regex| {
|
||||||
let mut names = HashMap::new();
|
let mut names = HashMap::new();
|
||||||
for (i, name) in regex.capture_names().iter().enumerate() {
|
for (i, name) in regex.capture_names().iter().enumerate() {
|
||||||
if let Some(ref name) = *name {
|
if let Some(ref name) = *name {
|
||||||
@@ -144,6 +171,21 @@ impl RegexMatcherBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether the patterns should be treated as literal strings or not. When
|
||||||
|
/// this is active, all characters, including ones that would normally be
|
||||||
|
/// special regex meta characters, are matched literally.
|
||||||
|
pub fn fixed_strings(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||||
|
self.fixed_strings = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether each pattern should match the entire line or not. This is
|
||||||
|
/// equivalent to surrounding the pattern with `(?m:^)` and `(?m:$)`.
|
||||||
|
pub fn whole_line(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||||
|
self.whole_line = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Enable Unicode matching mode.
|
/// Enable Unicode matching mode.
|
||||||
///
|
///
|
||||||
/// When enabled, the following patterns become Unicode aware: `\b`, `\B`,
|
/// When enabled, the following patterns become Unicode aware: `\b`, `\B`,
|
||||||
@@ -178,23 +220,22 @@ impl RegexMatcherBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// When UTF matching mode is enabled, this will disable the UTF checking
|
/// This is now deprecated and is a no-op.
|
||||||
/// that PCRE2 will normally perform automatically. If UTF matching mode
|
|
||||||
/// is not enabled, then this has no effect.
|
|
||||||
///
|
///
|
||||||
/// UTF checking is enabled by default when UTF matching mode is enabled.
|
/// Previously, this option permitted disabling PCRE2's UTF-8 validity
|
||||||
/// If UTF matching mode is enabled and UTF checking is enabled, then PCRE2
|
/// check, which could result in undefined behavior if the haystack was
|
||||||
/// will return an error if you attempt to search a subject string that is
|
/// not valid UTF-8. But PCRE2 introduced a new option, `PCRE2_MATCH_INVALID_UTF`,
|
||||||
/// not valid UTF-8.
|
/// in 10.34 which this crate always sets. When this option is enabled,
|
||||||
|
/// PCRE2 claims to not have undefined behavior when the haystack is
|
||||||
|
/// invalid UTF-8.
|
||||||
///
|
///
|
||||||
/// # Safety
|
/// Therefore, disabling the UTF-8 check is not something that is exposed
|
||||||
///
|
/// by this crate.
|
||||||
/// It is undefined behavior to disable the UTF check in UTF matching mode
|
#[deprecated(
|
||||||
/// and search a subject string that is not valid UTF-8. When the UTF check
|
since = "0.2.4",
|
||||||
/// is disabled, callers must guarantee that the subject string is valid
|
note = "now a no-op due to new PCRE2 features"
|
||||||
/// UTF-8.
|
)]
|
||||||
pub unsafe fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder {
|
pub fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder {
|
||||||
self.builder.disable_utf_check();
|
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
[package]
|
[package]
|
||||||
name = "grep-printer"
|
name = "grep-printer"
|
||||||
version = "0.1.6" #:version
|
version = "0.1.7" #:version
|
||||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||||
description = """
|
description = """
|
||||||
An implementation of the grep crate's Sink trait that provides standard
|
An implementation of the grep crate's Sink trait that provides standard
|
||||||
@@ -20,9 +20,9 @@ serde1 = ["base64", "serde", "serde_json"]
|
|||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
base64 = { version = "0.20.0", optional = true }
|
base64 = { version = "0.20.0", optional = true }
|
||||||
bstr = "1.1.0"
|
bstr = "1.6.0"
|
||||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||||
grep-searcher = { version = "0.1.8", path = "../searcher" }
|
grep-searcher = { version = "0.1.11", path = "../searcher" }
|
||||||
termcolor = "1.0.4"
|
termcolor = "1.0.4"
|
||||||
serde = { version = "1.0.77", optional = true, features = ["derive"] }
|
serde = { version = "1.0.77", optional = true, features = ["derive"] }
|
||||||
serde_json = { version = "1.0.27", optional = true }
|
serde_json = { version = "1.0.27", optional = true }
|
||||||
|
@@ -11,13 +11,12 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/regex"
|
|||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
keywords = ["regex", "grep", "search", "pattern", "line"]
|
keywords = ["regex", "grep", "search", "pattern", "line"]
|
||||||
license = "Unlicense OR MIT"
|
license = "Unlicense OR MIT"
|
||||||
edition = "2018"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
aho-corasick = "0.7.3"
|
aho-corasick = "1.0.2"
|
||||||
bstr = "1.1.0"
|
bstr = "1.6.0"
|
||||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||||
log = "0.4.5"
|
log = "0.4.19"
|
||||||
regex = "1.1"
|
regex-automata = { version = "0.3.0" }
|
||||||
regex-syntax = "0.6.5"
|
regex-syntax = "0.7.2"
|
||||||
thread_local = "1.1.2"
|
|
||||||
|
@@ -1,17 +1,13 @@
|
|||||||
use regex_syntax::ast::parse::Parser;
|
|
||||||
use regex_syntax::ast::{self, Ast};
|
use regex_syntax::ast::{self, Ast};
|
||||||
|
|
||||||
/// The results of analyzing AST of a regular expression (e.g., for supporting
|
/// The results of analyzing AST of a regular expression (e.g., for supporting
|
||||||
/// smart case).
|
/// smart case).
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct AstAnalysis {
|
pub(crate) struct AstAnalysis {
|
||||||
/// True if and only if a literal uppercase character occurs in the regex.
|
/// True if and only if a literal uppercase character occurs in the regex.
|
||||||
any_uppercase: bool,
|
any_uppercase: bool,
|
||||||
/// True if and only if the regex contains any literal at all.
|
/// True if and only if the regex contains any literal at all.
|
||||||
any_literal: bool,
|
any_literal: bool,
|
||||||
/// True if and only if the regex consists entirely of a literal and no
|
|
||||||
/// other special regex characters.
|
|
||||||
all_verbatim_literal: bool,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AstAnalysis {
|
impl AstAnalysis {
|
||||||
@@ -19,16 +15,16 @@ impl AstAnalysis {
|
|||||||
///
|
///
|
||||||
/// If `pattern` is not a valid regular expression, then `None` is
|
/// If `pattern` is not a valid regular expression, then `None` is
|
||||||
/// returned.
|
/// returned.
|
||||||
#[allow(dead_code)]
|
#[cfg(test)]
|
||||||
pub fn from_pattern(pattern: &str) -> Option<AstAnalysis> {
|
pub(crate) fn from_pattern(pattern: &str) -> Option<AstAnalysis> {
|
||||||
Parser::new()
|
regex_syntax::ast::parse::Parser::new()
|
||||||
.parse(pattern)
|
.parse(pattern)
|
||||||
.map(|ast| AstAnalysis::from_ast(&ast))
|
.map(|ast| AstAnalysis::from_ast(&ast))
|
||||||
.ok()
|
.ok()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Perform an AST analysis given the AST.
|
/// Perform an AST analysis given the AST.
|
||||||
pub fn from_ast(ast: &Ast) -> AstAnalysis {
|
pub(crate) fn from_ast(ast: &Ast) -> AstAnalysis {
|
||||||
let mut analysis = AstAnalysis::new();
|
let mut analysis = AstAnalysis::new();
|
||||||
analysis.from_ast_impl(ast);
|
analysis.from_ast_impl(ast);
|
||||||
analysis
|
analysis
|
||||||
@@ -40,7 +36,7 @@ impl AstAnalysis {
|
|||||||
/// For example, a pattern like `\pL` contains no uppercase literals,
|
/// For example, a pattern like `\pL` contains no uppercase literals,
|
||||||
/// even though `L` is uppercase and the `\pL` class contains uppercase
|
/// even though `L` is uppercase and the `\pL` class contains uppercase
|
||||||
/// characters.
|
/// characters.
|
||||||
pub fn any_uppercase(&self) -> bool {
|
pub(crate) fn any_uppercase(&self) -> bool {
|
||||||
self.any_uppercase
|
self.any_uppercase
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -48,32 +44,13 @@ impl AstAnalysis {
|
|||||||
///
|
///
|
||||||
/// For example, a pattern like `\pL` reports `false`, but a pattern like
|
/// For example, a pattern like `\pL` reports `false`, but a pattern like
|
||||||
/// `\pLfoo` reports `true`.
|
/// `\pLfoo` reports `true`.
|
||||||
pub fn any_literal(&self) -> bool {
|
pub(crate) fn any_literal(&self) -> bool {
|
||||||
self.any_literal
|
self.any_literal
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if and only if the entire pattern is a verbatim literal
|
|
||||||
/// with no special meta characters.
|
|
||||||
///
|
|
||||||
/// When this is true, then the pattern satisfies the following law:
|
|
||||||
/// `escape(pattern) == pattern`. Notable examples where this returns
|
|
||||||
/// `false` include patterns like `a\u0061` even though `\u0061` is just
|
|
||||||
/// a literal `a`.
|
|
||||||
///
|
|
||||||
/// The purpose of this flag is to determine whether the patterns can be
|
|
||||||
/// given to non-regex substring search algorithms as-is.
|
|
||||||
#[allow(dead_code)]
|
|
||||||
pub fn all_verbatim_literal(&self) -> bool {
|
|
||||||
self.all_verbatim_literal
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Creates a new `AstAnalysis` value with an initial configuration.
|
/// Creates a new `AstAnalysis` value with an initial configuration.
|
||||||
fn new() -> AstAnalysis {
|
fn new() -> AstAnalysis {
|
||||||
AstAnalysis {
|
AstAnalysis { any_uppercase: false, any_literal: false }
|
||||||
any_uppercase: false,
|
|
||||||
any_literal: false,
|
|
||||||
all_verbatim_literal: true,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn from_ast_impl(&mut self, ast: &Ast) {
|
fn from_ast_impl(&mut self, ast: &Ast) {
|
||||||
@@ -86,26 +63,20 @@ impl AstAnalysis {
|
|||||||
| Ast::Dot(_)
|
| Ast::Dot(_)
|
||||||
| Ast::Assertion(_)
|
| Ast::Assertion(_)
|
||||||
| Ast::Class(ast::Class::Unicode(_))
|
| Ast::Class(ast::Class::Unicode(_))
|
||||||
| Ast::Class(ast::Class::Perl(_)) => {
|
| Ast::Class(ast::Class::Perl(_)) => {}
|
||||||
self.all_verbatim_literal = false;
|
|
||||||
}
|
|
||||||
Ast::Literal(ref x) => {
|
Ast::Literal(ref x) => {
|
||||||
self.from_ast_literal(x);
|
self.from_ast_literal(x);
|
||||||
}
|
}
|
||||||
Ast::Class(ast::Class::Bracketed(ref x)) => {
|
Ast::Class(ast::Class::Bracketed(ref x)) => {
|
||||||
self.all_verbatim_literal = false;
|
|
||||||
self.from_ast_class_set(&x.kind);
|
self.from_ast_class_set(&x.kind);
|
||||||
}
|
}
|
||||||
Ast::Repetition(ref x) => {
|
Ast::Repetition(ref x) => {
|
||||||
self.all_verbatim_literal = false;
|
|
||||||
self.from_ast_impl(&x.ast);
|
self.from_ast_impl(&x.ast);
|
||||||
}
|
}
|
||||||
Ast::Group(ref x) => {
|
Ast::Group(ref x) => {
|
||||||
self.all_verbatim_literal = false;
|
|
||||||
self.from_ast_impl(&x.ast);
|
self.from_ast_impl(&x.ast);
|
||||||
}
|
}
|
||||||
Ast::Alternation(ref alt) => {
|
Ast::Alternation(ref alt) => {
|
||||||
self.all_verbatim_literal = false;
|
|
||||||
for x in &alt.asts {
|
for x in &alt.asts {
|
||||||
self.from_ast_impl(x);
|
self.from_ast_impl(x);
|
||||||
}
|
}
|
||||||
@@ -161,9 +132,6 @@ impl AstAnalysis {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn from_ast_literal(&mut self, ast: &ast::Literal) {
|
fn from_ast_literal(&mut self, ast: &ast::Literal) {
|
||||||
if ast.kind != ast::LiteralKind::Verbatim {
|
|
||||||
self.all_verbatim_literal = false;
|
|
||||||
}
|
|
||||||
self.any_literal = true;
|
self.any_literal = true;
|
||||||
self.any_uppercase = self.any_uppercase || ast.c.is_uppercase();
|
self.any_uppercase = self.any_uppercase || ast.c.is_uppercase();
|
||||||
}
|
}
|
||||||
@@ -171,7 +139,7 @@ impl AstAnalysis {
|
|||||||
/// Returns true if and only if the attributes can never change no matter
|
/// Returns true if and only if the attributes can never change no matter
|
||||||
/// what other AST it might see.
|
/// what other AST it might see.
|
||||||
fn done(&self) -> bool {
|
fn done(&self) -> bool {
|
||||||
self.any_uppercase && self.any_literal && !self.all_verbatim_literal
|
self.any_uppercase && self.any_literal
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -188,76 +156,61 @@ mod tests {
|
|||||||
let x = analysis("");
|
let x = analysis("");
|
||||||
assert!(!x.any_uppercase);
|
assert!(!x.any_uppercase);
|
||||||
assert!(!x.any_literal);
|
assert!(!x.any_literal);
|
||||||
assert!(x.all_verbatim_literal);
|
|
||||||
|
|
||||||
let x = analysis("foo");
|
let x = analysis("foo");
|
||||||
assert!(!x.any_uppercase);
|
assert!(!x.any_uppercase);
|
||||||
assert!(x.any_literal);
|
assert!(x.any_literal);
|
||||||
assert!(x.all_verbatim_literal);
|
|
||||||
|
|
||||||
let x = analysis("Foo");
|
let x = analysis("Foo");
|
||||||
assert!(x.any_uppercase);
|
assert!(x.any_uppercase);
|
||||||
assert!(x.any_literal);
|
assert!(x.any_literal);
|
||||||
assert!(x.all_verbatim_literal);
|
|
||||||
|
|
||||||
let x = analysis("foO");
|
let x = analysis("foO");
|
||||||
assert!(x.any_uppercase);
|
assert!(x.any_uppercase);
|
||||||
assert!(x.any_literal);
|
assert!(x.any_literal);
|
||||||
assert!(x.all_verbatim_literal);
|
|
||||||
|
|
||||||
let x = analysis(r"foo\\");
|
let x = analysis(r"foo\\");
|
||||||
assert!(!x.any_uppercase);
|
assert!(!x.any_uppercase);
|
||||||
assert!(x.any_literal);
|
assert!(x.any_literal);
|
||||||
assert!(!x.all_verbatim_literal);
|
|
||||||
|
|
||||||
let x = analysis(r"foo\w");
|
let x = analysis(r"foo\w");
|
||||||
assert!(!x.any_uppercase);
|
assert!(!x.any_uppercase);
|
||||||
assert!(x.any_literal);
|
assert!(x.any_literal);
|
||||||
assert!(!x.all_verbatim_literal);
|
|
||||||
|
|
||||||
let x = analysis(r"foo\S");
|
let x = analysis(r"foo\S");
|
||||||
assert!(!x.any_uppercase);
|
assert!(!x.any_uppercase);
|
||||||
assert!(x.any_literal);
|
assert!(x.any_literal);
|
||||||
assert!(!x.all_verbatim_literal);
|
|
||||||
|
|
||||||
let x = analysis(r"foo\p{Ll}");
|
let x = analysis(r"foo\p{Ll}");
|
||||||
assert!(!x.any_uppercase);
|
assert!(!x.any_uppercase);
|
||||||
assert!(x.any_literal);
|
assert!(x.any_literal);
|
||||||
assert!(!x.all_verbatim_literal);
|
|
||||||
|
|
||||||
let x = analysis(r"foo[a-z]");
|
let x = analysis(r"foo[a-z]");
|
||||||
assert!(!x.any_uppercase);
|
assert!(!x.any_uppercase);
|
||||||
assert!(x.any_literal);
|
assert!(x.any_literal);
|
||||||
assert!(!x.all_verbatim_literal);
|
|
||||||
|
|
||||||
let x = analysis(r"foo[A-Z]");
|
let x = analysis(r"foo[A-Z]");
|
||||||
assert!(x.any_uppercase);
|
assert!(x.any_uppercase);
|
||||||
assert!(x.any_literal);
|
assert!(x.any_literal);
|
||||||
assert!(!x.all_verbatim_literal);
|
|
||||||
|
|
||||||
let x = analysis(r"foo[\S\t]");
|
let x = analysis(r"foo[\S\t]");
|
||||||
assert!(!x.any_uppercase);
|
assert!(!x.any_uppercase);
|
||||||
assert!(x.any_literal);
|
assert!(x.any_literal);
|
||||||
assert!(!x.all_verbatim_literal);
|
|
||||||
|
|
||||||
let x = analysis(r"foo\\S");
|
let x = analysis(r"foo\\S");
|
||||||
assert!(x.any_uppercase);
|
assert!(x.any_uppercase);
|
||||||
assert!(x.any_literal);
|
assert!(x.any_literal);
|
||||||
assert!(!x.all_verbatim_literal);
|
|
||||||
|
|
||||||
let x = analysis(r"\p{Ll}");
|
let x = analysis(r"\p{Ll}");
|
||||||
assert!(!x.any_uppercase);
|
assert!(!x.any_uppercase);
|
||||||
assert!(!x.any_literal);
|
assert!(!x.any_literal);
|
||||||
assert!(!x.all_verbatim_literal);
|
|
||||||
|
|
||||||
let x = analysis(r"aBc\w");
|
let x = analysis(r"aBc\w");
|
||||||
assert!(x.any_uppercase);
|
assert!(x.any_uppercase);
|
||||||
assert!(x.any_literal);
|
assert!(x.any_literal);
|
||||||
assert!(!x.all_verbatim_literal);
|
|
||||||
|
|
||||||
let x = analysis(r"a\u0061");
|
let x = analysis(r"a\u0061");
|
||||||
assert!(!x.any_uppercase);
|
assert!(!x.any_uppercase);
|
||||||
assert!(x.any_literal);
|
assert!(x.any_literal);
|
||||||
assert!(!x.all_verbatim_literal);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,15 +1,16 @@
|
|||||||
use grep_matcher::{ByteSet, LineTerminator};
|
use {
|
||||||
use regex::bytes::{Regex, RegexBuilder};
|
grep_matcher::{ByteSet, LineTerminator},
|
||||||
use regex_syntax::ast::{self, Ast};
|
regex_automata::meta::Regex,
|
||||||
use regex_syntax::hir::{self, Hir};
|
regex_syntax::{
|
||||||
|
ast,
|
||||||
|
hir::{self, Hir, HirKind},
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
use crate::ast::AstAnalysis;
|
use crate::{
|
||||||
use crate::crlf::crlfify;
|
ast::AstAnalysis, error::Error, non_matching::non_matching_bytes,
|
||||||
use crate::error::Error;
|
strip::strip_from_match,
|
||||||
use crate::literal::LiteralSets;
|
};
|
||||||
use crate::multi::alternation_literals;
|
|
||||||
use crate::non_matching::non_matching_bytes;
|
|
||||||
use crate::strip::strip_from_match;
|
|
||||||
|
|
||||||
/// Config represents the configuration of a regex matcher in this crate.
|
/// Config represents the configuration of a regex matcher in this crate.
|
||||||
/// The configuration is itself a rough combination of the knobs found in
|
/// The configuration is itself a rough combination of the knobs found in
|
||||||
@@ -21,21 +22,23 @@ use crate::strip::strip_from_match;
|
|||||||
/// configuration which generated it, and provides transformation on that HIR
|
/// configuration which generated it, and provides transformation on that HIR
|
||||||
/// such that the configuration is preserved.
|
/// such that the configuration is preserved.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct Config {
|
pub(crate) struct Config {
|
||||||
pub case_insensitive: bool,
|
pub(crate) case_insensitive: bool,
|
||||||
pub case_smart: bool,
|
pub(crate) case_smart: bool,
|
||||||
pub multi_line: bool,
|
pub(crate) multi_line: bool,
|
||||||
pub dot_matches_new_line: bool,
|
pub(crate) dot_matches_new_line: bool,
|
||||||
pub swap_greed: bool,
|
pub(crate) swap_greed: bool,
|
||||||
pub ignore_whitespace: bool,
|
pub(crate) ignore_whitespace: bool,
|
||||||
pub unicode: bool,
|
pub(crate) unicode: bool,
|
||||||
pub octal: bool,
|
pub(crate) octal: bool,
|
||||||
pub size_limit: usize,
|
pub(crate) size_limit: usize,
|
||||||
pub dfa_size_limit: usize,
|
pub(crate) dfa_size_limit: usize,
|
||||||
pub nest_limit: u32,
|
pub(crate) nest_limit: u32,
|
||||||
pub line_terminator: Option<LineTerminator>,
|
pub(crate) line_terminator: Option<LineTerminator>,
|
||||||
pub crlf: bool,
|
pub(crate) crlf: bool,
|
||||||
pub word: bool,
|
pub(crate) word: bool,
|
||||||
|
pub(crate) fixed_strings: bool,
|
||||||
|
pub(crate) whole_line: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Config {
|
impl Default for Config {
|
||||||
@@ -50,47 +53,28 @@ impl Default for Config {
|
|||||||
unicode: true,
|
unicode: true,
|
||||||
octal: false,
|
octal: false,
|
||||||
// These size limits are much bigger than what's in the regex
|
// These size limits are much bigger than what's in the regex
|
||||||
// crate.
|
// crate by default.
|
||||||
size_limit: 100 * (1 << 20),
|
size_limit: 100 * (1 << 20),
|
||||||
dfa_size_limit: 1000 * (1 << 20),
|
dfa_size_limit: 1000 * (1 << 20),
|
||||||
nest_limit: 250,
|
nest_limit: 250,
|
||||||
line_terminator: None,
|
line_terminator: None,
|
||||||
crlf: false,
|
crlf: false,
|
||||||
word: false,
|
word: false,
|
||||||
|
fixed_strings: false,
|
||||||
|
whole_line: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Config {
|
impl Config {
|
||||||
/// Parse the given pattern and returned its HIR expression along with
|
/// Use this configuration to build an HIR from the given patterns. The HIR
|
||||||
/// the current configuration.
|
/// returned corresponds to a single regex that is an alternation of the
|
||||||
///
|
/// patterns given.
|
||||||
/// If there was a problem parsing the given expression then an error
|
pub(crate) fn build_many<P: AsRef<str>>(
|
||||||
/// is returned.
|
&self,
|
||||||
pub fn hir(&self, pattern: &str) -> Result<ConfiguredHIR, Error> {
|
patterns: &[P],
|
||||||
let ast = self.ast(pattern)?;
|
) -> Result<ConfiguredHIR, Error> {
|
||||||
let analysis = self.analysis(&ast)?;
|
ConfiguredHIR::new(self.clone(), patterns)
|
||||||
let expr = hir::translate::TranslatorBuilder::new()
|
|
||||||
.allow_invalid_utf8(true)
|
|
||||||
.case_insensitive(self.is_case_insensitive(&analysis))
|
|
||||||
.multi_line(self.multi_line)
|
|
||||||
.dot_matches_new_line(self.dot_matches_new_line)
|
|
||||||
.swap_greed(self.swap_greed)
|
|
||||||
.unicode(self.unicode)
|
|
||||||
.build()
|
|
||||||
.translate(pattern, &ast)
|
|
||||||
.map_err(Error::regex)?;
|
|
||||||
let expr = match self.line_terminator {
|
|
||||||
None => expr,
|
|
||||||
Some(line_term) => strip_from_match(expr, line_term)?,
|
|
||||||
};
|
|
||||||
Ok(ConfiguredHIR {
|
|
||||||
original: pattern.to_string(),
|
|
||||||
config: self.clone(),
|
|
||||||
analysis,
|
|
||||||
// If CRLF mode is enabled, replace `$` with `(?:\r?$)`.
|
|
||||||
expr: if self.crlf { crlfify(expr) } else { expr },
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Accounting for the `smart_case` config knob, return true if and only if
|
/// Accounting for the `smart_case` config knob, return true if and only if
|
||||||
@@ -105,35 +89,55 @@ impl Config {
|
|||||||
analysis.any_literal() && !analysis.any_uppercase()
|
analysis.any_literal() && !analysis.any_uppercase()
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if and only if this config is simple enough such that
|
/// Returns whether the given patterns should be treated as "fixed strings"
|
||||||
/// if the pattern is a simple alternation of literals, then it can be
|
/// literals. This is different from just querying the `fixed_strings` knob
|
||||||
/// constructed via a plain Aho-Corasick automaton.
|
/// in that if the knob is false, this will still return true in some cases
|
||||||
|
/// if the patterns are themselves indistinguishable from literals.
|
||||||
///
|
///
|
||||||
/// Note that it is OK to return true even when settings like `multi_line`
|
/// The main idea here is that if this returns true, then it is safe
|
||||||
/// are enabled, since if multi-line can impact the match semantics of a
|
/// to build an `regex_syntax::hir::Hir` value directly from the given
|
||||||
/// regex, then it is by definition not a simple alternation of literals.
|
/// patterns as an alternation of `hir::Literal` values.
|
||||||
pub fn can_plain_aho_corasick(&self) -> bool {
|
fn is_fixed_strings<P: AsRef<str>>(&self, patterns: &[P]) -> bool {
|
||||||
!self.word && !self.case_insensitive && !self.case_smart
|
// When these are enabled, we really need to parse the patterns and
|
||||||
}
|
// let them go through the standard HIR translation process in order
|
||||||
|
// for case folding transforms to be applied.
|
||||||
/// Perform analysis on the AST of this pattern.
|
if self.case_insensitive || self.case_smart {
|
||||||
///
|
return false;
|
||||||
/// This returns an error if the given pattern failed to parse.
|
}
|
||||||
fn analysis(&self, ast: &Ast) -> Result<AstAnalysis, Error> {
|
// Even if whole_line or word is enabled, both of those things can
|
||||||
Ok(AstAnalysis::from_ast(ast))
|
// be implemented by wrapping the Hir generated by an alternation of
|
||||||
}
|
// fixed string literals. So for here at least, we don't care about the
|
||||||
|
// word or whole_line settings.
|
||||||
/// Parse the given pattern into its abstract syntax.
|
if self.fixed_strings {
|
||||||
///
|
// ... but if any literal contains a line terminator, then we've
|
||||||
/// This returns an error if the given pattern failed to parse.
|
// got to bail out because this will ultimately result in an error.
|
||||||
fn ast(&self, pattern: &str) -> Result<Ast, Error> {
|
if let Some(lineterm) = self.line_terminator {
|
||||||
ast::parse::ParserBuilder::new()
|
for p in patterns.iter() {
|
||||||
.nest_limit(self.nest_limit)
|
if has_line_terminator(lineterm, p.as_ref()) {
|
||||||
.octal(self.octal)
|
return false;
|
||||||
.ignore_whitespace(self.ignore_whitespace)
|
}
|
||||||
.build()
|
}
|
||||||
.parse(pattern)
|
}
|
||||||
.map_err(Error::regex)
|
return true;
|
||||||
|
}
|
||||||
|
// In this case, the only way we can hand construct the Hir is if none
|
||||||
|
// of the patterns contain meta characters. If they do, then we need to
|
||||||
|
// send them through the standard parsing/translation process.
|
||||||
|
for p in patterns.iter() {
|
||||||
|
let p = p.as_ref();
|
||||||
|
if p.chars().any(regex_syntax::is_meta_character) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Same deal as when fixed_strings is set above. If the pattern has
|
||||||
|
// a line terminator anywhere, then we need to bail out and let
|
||||||
|
// an error occur.
|
||||||
|
if let Some(lineterm) = self.line_terminator {
|
||||||
|
if has_line_terminator(lineterm, p) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -149,170 +153,268 @@ impl Config {
|
|||||||
/// size limits set on the configured HIR will be propagated out to any
|
/// size limits set on the configured HIR will be propagated out to any
|
||||||
/// subsequently constructed HIR or regular expression.
|
/// subsequently constructed HIR or regular expression.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct ConfiguredHIR {
|
pub(crate) struct ConfiguredHIR {
|
||||||
original: String,
|
|
||||||
config: Config,
|
config: Config,
|
||||||
analysis: AstAnalysis,
|
hir: Hir,
|
||||||
expr: Hir,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ConfiguredHIR {
|
impl ConfiguredHIR {
|
||||||
/// Return the configuration for this HIR expression.
|
/// Parse the given patterns into a single HIR expression that represents
|
||||||
pub fn config(&self) -> &Config {
|
/// an alternation of the patterns given.
|
||||||
|
fn new<P: AsRef<str>>(
|
||||||
|
config: Config,
|
||||||
|
patterns: &[P],
|
||||||
|
) -> Result<ConfiguredHIR, Error> {
|
||||||
|
let hir = if config.is_fixed_strings(patterns) {
|
||||||
|
let mut alts = vec![];
|
||||||
|
for p in patterns.iter() {
|
||||||
|
alts.push(Hir::literal(p.as_ref().as_bytes()));
|
||||||
|
}
|
||||||
|
log::debug!(
|
||||||
|
"assembling HIR from {} fixed string literals",
|
||||||
|
alts.len()
|
||||||
|
);
|
||||||
|
let hir = Hir::alternation(alts);
|
||||||
|
hir
|
||||||
|
} else {
|
||||||
|
let mut alts = vec![];
|
||||||
|
for p in patterns.iter() {
|
||||||
|
alts.push(if config.fixed_strings {
|
||||||
|
format!("(?:{})", regex_syntax::escape(p.as_ref()))
|
||||||
|
} else {
|
||||||
|
format!("(?:{})", p.as_ref())
|
||||||
|
});
|
||||||
|
}
|
||||||
|
let pattern = alts.join("|");
|
||||||
|
let ast = ast::parse::ParserBuilder::new()
|
||||||
|
.nest_limit(config.nest_limit)
|
||||||
|
.octal(config.octal)
|
||||||
|
.ignore_whitespace(config.ignore_whitespace)
|
||||||
|
.build()
|
||||||
|
.parse(&pattern)
|
||||||
|
.map_err(Error::generic)?;
|
||||||
|
let analysis = AstAnalysis::from_ast(&ast);
|
||||||
|
let mut hir = hir::translate::TranslatorBuilder::new()
|
||||||
|
.utf8(false)
|
||||||
|
.case_insensitive(config.is_case_insensitive(&analysis))
|
||||||
|
.multi_line(config.multi_line)
|
||||||
|
.dot_matches_new_line(config.dot_matches_new_line)
|
||||||
|
.crlf(config.crlf)
|
||||||
|
.swap_greed(config.swap_greed)
|
||||||
|
.unicode(config.unicode)
|
||||||
|
.build()
|
||||||
|
.translate(&pattern, &ast)
|
||||||
|
.map_err(Error::generic)?;
|
||||||
|
// We don't need to do this for the fixed-strings case above
|
||||||
|
// because is_fixed_strings will return false if any pattern
|
||||||
|
// contains a line terminator. Therefore, we don't need to strip
|
||||||
|
// it.
|
||||||
|
//
|
||||||
|
// We go to some pains to avoid doing this in the fixed-strings
|
||||||
|
// case because this can result in building a new HIR when ripgrep
|
||||||
|
// is given a huge set of literals to search for. And this can
|
||||||
|
// actually take a little time. It's not huge, but it's noticeable.
|
||||||
|
hir = match config.line_terminator {
|
||||||
|
None => hir,
|
||||||
|
Some(line_term) => strip_from_match(hir, line_term)?,
|
||||||
|
};
|
||||||
|
hir
|
||||||
|
};
|
||||||
|
Ok(ConfiguredHIR { config, hir })
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Return a reference to the underlying configuration.
|
||||||
|
pub(crate) fn config(&self) -> &Config {
|
||||||
&self.config
|
&self.config
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compute the set of non-matching bytes for this HIR expression.
|
/// Return a reference to the underyling HIR.
|
||||||
pub fn non_matching_bytes(&self) -> ByteSet {
|
pub(crate) fn hir(&self) -> &Hir {
|
||||||
non_matching_bytes(&self.expr)
|
&self.hir
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if and only if this regex needs to have its match offsets
|
/// Convert this HIR to a regex that can be used for matching.
|
||||||
/// tweaked because of CRLF support. Specifically, this occurs when the
|
pub(crate) fn to_regex(&self) -> Result<Regex, Error> {
|
||||||
/// CRLF hack is enabled and the regex is line anchored at the end. In
|
let meta = Regex::config()
|
||||||
/// this case, matches that end with a `\r` have the `\r` stripped.
|
.utf8_empty(false)
|
||||||
pub fn needs_crlf_stripped(&self) -> bool {
|
.nfa_size_limit(Some(self.config.size_limit))
|
||||||
self.config.crlf && self.expr.is_line_anchored_end()
|
// We don't expose a knob for this because the one-pass DFA is
|
||||||
|
// usually not a perf bottleneck for ripgrep. But we give it some
|
||||||
|
// extra room than the default.
|
||||||
|
.onepass_size_limit(Some(10 * (1 << 20)))
|
||||||
|
// Same deal here. The default limit for full DFAs is VERY small,
|
||||||
|
// but with ripgrep we can afford to spend a bit more time on
|
||||||
|
// building them I think.
|
||||||
|
.dfa_size_limit(Some(1 * (1 << 20)))
|
||||||
|
.dfa_state_limit(Some(1_000))
|
||||||
|
.hybrid_cache_capacity(self.config.dfa_size_limit);
|
||||||
|
Regex::builder()
|
||||||
|
.configure(meta)
|
||||||
|
.build_from_hir(&self.hir)
|
||||||
|
.map_err(Error::regex)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Compute the set of non-matching bytes for this HIR expression.
|
||||||
|
pub(crate) fn non_matching_bytes(&self) -> ByteSet {
|
||||||
|
non_matching_bytes(&self.hir)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the line terminator configured on this expression.
|
/// Returns the line terminator configured on this expression.
|
||||||
///
|
///
|
||||||
/// When we have beginning/end anchors (NOT line anchors), the fast line
|
/// When we have beginning/end anchors (NOT line anchors), the fast line
|
||||||
/// searching path isn't quite correct. Or at least, doesn't match the
|
/// searching path isn't quite correct. Or at least, doesn't match the slow
|
||||||
/// slow path. Namely, the slow path strips line terminators while the
|
/// path. Namely, the slow path strips line terminators while the fast path
|
||||||
/// fast path does not. Since '$' (when multi-line mode is disabled)
|
/// does not. Since '$' (when multi-line mode is disabled) doesn't match at
|
||||||
/// doesn't match at line boundaries, the existence of a line terminator
|
/// line boundaries, the existence of a line terminator might cause it to
|
||||||
/// might cause it to not match when it otherwise would with the line
|
/// not match when it otherwise would with the line terminator stripped.
|
||||||
/// terminator stripped.
|
|
||||||
///
|
///
|
||||||
/// Since searching with text anchors is exceptionally rare in the
|
/// Since searching with text anchors is exceptionally rare in the context
|
||||||
/// context of line oriented searching (multi-line mode is basically
|
/// of line oriented searching (multi-line mode is basically always
|
||||||
/// always enabled), we just disable this optimization when there are
|
/// enabled), we just disable this optimization when there are text
|
||||||
/// text anchors. We disable it by not returning a line terminator, since
|
/// anchors. We disable it by not returning a line terminator, since
|
||||||
/// without a line terminator, the fast search path can't be executed.
|
/// without a line terminator, the fast search path can't be executed.
|
||||||
///
|
///
|
||||||
|
/// Actually, the above is no longer quite correct. Later on, another
|
||||||
|
/// optimization was added where if the line terminator was in the set of
|
||||||
|
/// bytes that was guaranteed to never be part of a match, then the higher
|
||||||
|
/// level search infrastructure assumes that the fast line-by-line search
|
||||||
|
/// path can still be taken. This optimization applies when multi-line
|
||||||
|
/// search (not multi-line mode) is enabled. In that case, there is no
|
||||||
|
/// configured line terminator since the regex is permitted to match a
|
||||||
|
/// line terminator. But if the regex is guaranteed to never match across
|
||||||
|
/// multiple lines despite multi-line search being requested, we can still
|
||||||
|
/// do the faster and more flexible line-by-line search. This is why the
|
||||||
|
/// non-matching extraction routine removes `\n` when `\A` and `\z` are
|
||||||
|
/// present even though that's not quite correct...
|
||||||
|
///
|
||||||
/// See: <https://github.com/BurntSushi/ripgrep/issues/2260>
|
/// See: <https://github.com/BurntSushi/ripgrep/issues/2260>
|
||||||
pub fn line_terminator(&self) -> Option<LineTerminator> {
|
pub(crate) fn line_terminator(&self) -> Option<LineTerminator> {
|
||||||
if self.is_any_anchored() {
|
if self.hir.properties().look_set().contains_anchor_haystack() {
|
||||||
None
|
None
|
||||||
} else {
|
} else {
|
||||||
self.config.line_terminator
|
self.config.line_terminator
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if and only if the underlying HIR has any text anchors.
|
/// Turns this configured HIR into one that only matches when both sides of
|
||||||
fn is_any_anchored(&self) -> bool {
|
/// the match correspond to a word boundary.
|
||||||
self.expr.is_any_anchored_start() || self.expr.is_any_anchored_end()
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Builds a regular expression from this HIR expression.
|
|
||||||
pub fn regex(&self) -> Result<Regex, Error> {
|
|
||||||
self.pattern_to_regex(&self.expr.to_string())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// If this HIR corresponds to an alternation of literals with no
|
|
||||||
/// capturing groups, then this returns those literals.
|
|
||||||
pub fn alternation_literals(&self) -> Option<Vec<Vec<u8>>> {
|
|
||||||
if !self.config.can_plain_aho_corasick() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
alternation_literals(&self.expr)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Applies the given function to the concrete syntax of this HIR and then
|
|
||||||
/// generates a new HIR based on the result of the function in a way that
|
|
||||||
/// preserves the configuration.
|
|
||||||
///
|
///
|
||||||
/// For example, this can be used to wrap a user provided regular
|
/// Note that the HIR returned is like turning `pat` into
|
||||||
/// expression with additional semantics. e.g., See the `WordMatcher`.
|
/// `(?m:^|\W)(pat)(?m:$|\W)`. That is, the true match is at capture group
|
||||||
pub fn with_pattern<F: FnMut(&str) -> String>(
|
/// `1` and not `0`.
|
||||||
&self,
|
pub(crate) fn into_word(self) -> Result<ConfiguredHIR, Error> {
|
||||||
mut f: F,
|
// In theory building the HIR for \W should never fail, but there are
|
||||||
) -> Result<ConfiguredHIR, Error> {
|
// likely some pathological cases (particularly with respect to certain
|
||||||
self.pattern_to_hir(&f(&self.expr.to_string()))
|
// values of limits) where it could in theory fail.
|
||||||
|
let non_word = {
|
||||||
|
let mut config = self.config.clone();
|
||||||
|
config.fixed_strings = false;
|
||||||
|
ConfiguredHIR::new(config, &[r"\W"])?
|
||||||
|
};
|
||||||
|
let line_anchor_start = Hir::look(self.line_anchor_start());
|
||||||
|
let line_anchor_end = Hir::look(self.line_anchor_end());
|
||||||
|
let hir = Hir::concat(vec![
|
||||||
|
Hir::alternation(vec![line_anchor_start, non_word.hir.clone()]),
|
||||||
|
Hir::capture(hir::Capture {
|
||||||
|
index: 1,
|
||||||
|
name: None,
|
||||||
|
sub: Box::new(renumber_capture_indices(self.hir)?),
|
||||||
|
}),
|
||||||
|
Hir::alternation(vec![non_word.hir, line_anchor_end]),
|
||||||
|
]);
|
||||||
|
Ok(ConfiguredHIR { config: self.config, hir })
|
||||||
}
|
}
|
||||||
|
|
||||||
/// If the current configuration has a line terminator set and if useful
|
/// Turns this configured HIR into an equivalent one, but where it must
|
||||||
/// literals could be extracted, then a regular expression matching those
|
/// match at the start and end of a line.
|
||||||
/// literals is returned. If no line terminator is set, then `None` is
|
pub(crate) fn into_whole_line(self) -> ConfiguredHIR {
|
||||||
/// returned.
|
let line_anchor_start = Hir::look(self.line_anchor_start());
|
||||||
///
|
let line_anchor_end = Hir::look(self.line_anchor_end());
|
||||||
/// If compiling the resulting regular expression failed, then an error
|
let hir =
|
||||||
/// is returned.
|
Hir::concat(vec![line_anchor_start, self.hir, line_anchor_end]);
|
||||||
///
|
ConfiguredHIR { config: self.config, hir }
|
||||||
/// This method only returns something when a line terminator is set
|
}
|
||||||
/// because matches from this regex are generally candidates that must be
|
|
||||||
/// confirmed before reporting a match. When performing a line oriented
|
/// Turns this configured HIR into an equivalent one, but where it must
|
||||||
/// search, confirmation is easy: just extend the candidate match to its
|
/// match at the start and end of the haystack.
|
||||||
/// respective line boundaries and then re-search that line for a full
|
pub(crate) fn into_anchored(self) -> ConfiguredHIR {
|
||||||
/// match. This only works when the line terminator is set because the line
|
let hir = Hir::concat(vec![
|
||||||
/// terminator setting guarantees that the regex itself can never match
|
Hir::look(hir::Look::Start),
|
||||||
/// through the line terminator byte.
|
self.hir,
|
||||||
pub fn fast_line_regex(&self) -> Result<Option<Regex>, Error> {
|
Hir::look(hir::Look::End),
|
||||||
if self.config.line_terminator.is_none() {
|
]);
|
||||||
return Ok(None);
|
ConfiguredHIR { config: self.config, hir }
|
||||||
}
|
}
|
||||||
match LiteralSets::new(&self.expr).one_regex(self.config.word) {
|
|
||||||
None => Ok(None),
|
/// Returns the "start line" anchor for this configuration.
|
||||||
Some(pattern) => self.pattern_to_regex(&pattern).map(Some),
|
fn line_anchor_start(&self) -> hir::Look {
|
||||||
|
if self.config.crlf {
|
||||||
|
hir::Look::StartCRLF
|
||||||
|
} else {
|
||||||
|
hir::Look::StartLF
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Create a regex from the given pattern using this HIR's configuration.
|
/// Returns the "end line" anchor for this configuration.
|
||||||
fn pattern_to_regex(&self, pattern: &str) -> Result<Regex, Error> {
|
fn line_anchor_end(&self) -> hir::Look {
|
||||||
// The settings we explicitly set here are intentionally a subset
|
if self.config.crlf {
|
||||||
// of the settings we have. The key point here is that our HIR
|
hir::Look::EndCRLF
|
||||||
// expression is computed with the settings in mind, such that setting
|
} else {
|
||||||
// them here could actually lead to unintended behavior. For example,
|
hir::Look::EndLF
|
||||||
// consider the pattern `(?U)a+`. This will get folded into the HIR
|
}
|
||||||
// as a non-greedy repetition operator which will in turn get printed
|
}
|
||||||
// to the concrete syntax as `a+?`, which is correct. But if we
|
}
|
||||||
// set the `swap_greed` option again, then we'll wind up with `(?U)a+?`
|
|
||||||
// which is equal to `a+` which is not the same as what we were given.
|
/// This increments the index of every capture group in the given hir by 1. If
|
||||||
//
|
/// any increment results in an overflow, then an error is returned.
|
||||||
// We also don't need to apply `case_insensitive` since this gets
|
fn renumber_capture_indices(hir: Hir) -> Result<Hir, Error> {
|
||||||
// folded into the HIR and would just cause us to do redundant work.
|
Ok(match hir.into_kind() {
|
||||||
//
|
HirKind::Empty => Hir::empty(),
|
||||||
// Finally, we don't need to set `ignore_whitespace` since the concrete
|
HirKind::Literal(hir::Literal(lit)) => Hir::literal(lit),
|
||||||
// syntax emitted by the HIR printer never needs it.
|
HirKind::Class(cls) => Hir::class(cls),
|
||||||
//
|
HirKind::Look(x) => Hir::look(x),
|
||||||
// We set the rest of the options. Some of them are important, such as
|
HirKind::Repetition(mut x) => {
|
||||||
// the size limit, and some of them are necessary to preserve the
|
x.sub = Box::new(renumber_capture_indices(*x.sub)?);
|
||||||
// intention of the original pattern. For example, the Unicode flag
|
Hir::repetition(x)
|
||||||
// will impact how the WordMatcher functions, namely, whether its
|
}
|
||||||
// word boundaries are Unicode aware or not.
|
HirKind::Capture(mut cap) => {
|
||||||
RegexBuilder::new(&pattern)
|
cap.index = match cap.index.checked_add(1) {
|
||||||
.nest_limit(self.config.nest_limit)
|
Some(index) => index,
|
||||||
.octal(self.config.octal)
|
None => {
|
||||||
.multi_line(self.config.multi_line)
|
// This error message kind of sucks, but it's probably
|
||||||
.dot_matches_new_line(self.config.dot_matches_new_line)
|
// impossible for it to happen. The only way a capture
|
||||||
.unicode(self.config.unicode)
|
// index can overflow addition is if the regex is huge
|
||||||
.size_limit(self.config.size_limit)
|
// (or something else has gone horribly wrong).
|
||||||
.dfa_size_limit(self.config.dfa_size_limit)
|
let msg = "could not renumber capture index, too big";
|
||||||
.build()
|
return Err(Error::any(msg));
|
||||||
.map_err(Error::regex)
|
}
|
||||||
}
|
};
|
||||||
|
cap.sub = Box::new(renumber_capture_indices(*cap.sub)?);
|
||||||
/// Create an HIR expression from the given pattern using this HIR's
|
Hir::capture(cap)
|
||||||
/// configuration.
|
}
|
||||||
fn pattern_to_hir(&self, pattern: &str) -> Result<ConfiguredHIR, Error> {
|
HirKind::Concat(subs) => {
|
||||||
// See `pattern_to_regex` comment for explanation of why we only set
|
let subs = subs
|
||||||
// a subset of knobs here. e.g., `swap_greed` is explicitly left out.
|
.into_iter()
|
||||||
let expr = ::regex_syntax::ParserBuilder::new()
|
.map(|sub| renumber_capture_indices(sub))
|
||||||
.nest_limit(self.config.nest_limit)
|
.collect::<Result<Vec<Hir>, Error>>()?;
|
||||||
.octal(self.config.octal)
|
Hir::concat(subs)
|
||||||
.allow_invalid_utf8(true)
|
}
|
||||||
.multi_line(self.config.multi_line)
|
HirKind::Alternation(subs) => {
|
||||||
.dot_matches_new_line(self.config.dot_matches_new_line)
|
let subs = subs
|
||||||
.unicode(self.config.unicode)
|
.into_iter()
|
||||||
.build()
|
.map(|sub| renumber_capture_indices(sub))
|
||||||
.parse(pattern)
|
.collect::<Result<Vec<Hir>, Error>>()?;
|
||||||
.map_err(Error::regex)?;
|
Hir::alternation(subs)
|
||||||
Ok(ConfiguredHIR {
|
}
|
||||||
original: self.original.clone(),
|
})
|
||||||
config: self.config.clone(),
|
}
|
||||||
analysis: self.analysis.clone(),
|
|
||||||
expr,
|
/// Returns true if the given literal string contains any byte from the line
|
||||||
})
|
/// terminator given.
|
||||||
|
fn has_line_terminator(lineterm: LineTerminator, literal: &str) -> bool {
|
||||||
|
if lineterm.is_crlf() {
|
||||||
|
literal.as_bytes().iter().copied().any(|b| b == b'\r' || b == b'\n')
|
||||||
|
} else {
|
||||||
|
literal.as_bytes().iter().copied().any(|b| b == lineterm.as_byte())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,189 +0,0 @@
|
|||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
use grep_matcher::{Match, Matcher, NoError};
|
|
||||||
use regex::bytes::Regex;
|
|
||||||
use regex_syntax::hir::{self, Hir, HirKind};
|
|
||||||
|
|
||||||
use crate::config::ConfiguredHIR;
|
|
||||||
use crate::error::Error;
|
|
||||||
use crate::matcher::RegexCaptures;
|
|
||||||
|
|
||||||
/// A matcher for implementing "word match" semantics.
|
|
||||||
#[derive(Clone, Debug)]
|
|
||||||
pub struct CRLFMatcher {
|
|
||||||
/// The regex.
|
|
||||||
regex: Regex,
|
|
||||||
/// A map from capture group name to capture group index.
|
|
||||||
names: HashMap<String, usize>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl CRLFMatcher {
|
|
||||||
/// Create a new matcher from the given pattern that strips `\r` from the
|
|
||||||
/// end of every match.
|
|
||||||
///
|
|
||||||
/// This panics if the given expression doesn't need its CRLF stripped.
|
|
||||||
pub fn new(expr: &ConfiguredHIR) -> Result<CRLFMatcher, Error> {
|
|
||||||
assert!(expr.needs_crlf_stripped());
|
|
||||||
|
|
||||||
let regex = expr.regex()?;
|
|
||||||
let mut names = HashMap::new();
|
|
||||||
for (i, optional_name) in regex.capture_names().enumerate() {
|
|
||||||
if let Some(name) = optional_name {
|
|
||||||
names.insert(name.to_string(), i.checked_sub(1).unwrap());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(CRLFMatcher { regex, names })
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the underlying regex used by this matcher.
|
|
||||||
pub fn regex(&self) -> &Regex {
|
|
||||||
&self.regex
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Matcher for CRLFMatcher {
|
|
||||||
type Captures = RegexCaptures;
|
|
||||||
type Error = NoError;
|
|
||||||
|
|
||||||
fn find_at(
|
|
||||||
&self,
|
|
||||||
haystack: &[u8],
|
|
||||||
at: usize,
|
|
||||||
) -> Result<Option<Match>, NoError> {
|
|
||||||
let m = match self.regex.find_at(haystack, at) {
|
|
||||||
None => return Ok(None),
|
|
||||||
Some(m) => Match::new(m.start(), m.end()),
|
|
||||||
};
|
|
||||||
Ok(Some(adjust_match(haystack, m)))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
|
|
||||||
Ok(RegexCaptures::new(self.regex.capture_locations()))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn capture_count(&self) -> usize {
|
|
||||||
self.regex.captures_len().checked_sub(1).unwrap()
|
|
||||||
}
|
|
||||||
|
|
||||||
fn capture_index(&self, name: &str) -> Option<usize> {
|
|
||||||
self.names.get(name).map(|i| *i)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn captures_at(
|
|
||||||
&self,
|
|
||||||
haystack: &[u8],
|
|
||||||
at: usize,
|
|
||||||
caps: &mut RegexCaptures,
|
|
||||||
) -> Result<bool, NoError> {
|
|
||||||
caps.strip_crlf(false);
|
|
||||||
let r =
|
|
||||||
self.regex.captures_read_at(caps.locations_mut(), haystack, at);
|
|
||||||
if !r.is_some() {
|
|
||||||
return Ok(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the end of our match includes a `\r`, then strip it from all
|
|
||||||
// capture groups ending at the same location.
|
|
||||||
let end = caps.locations().get(0).unwrap().1;
|
|
||||||
if end > 0 && haystack.get(end - 1) == Some(&b'\r') {
|
|
||||||
caps.strip_crlf(true);
|
|
||||||
}
|
|
||||||
Ok(true)
|
|
||||||
}
|
|
||||||
|
|
||||||
// We specifically do not implement other methods like find_iter or
|
|
||||||
// captures_iter. Namely, the iter methods are guaranteed to be correct
|
|
||||||
// by virtue of implementing find_at and captures_at above.
|
|
||||||
}
|
|
||||||
|
|
||||||
/// If the given match ends with a `\r`, then return a new match that ends
|
|
||||||
/// immediately before the `\r`.
|
|
||||||
pub fn adjust_match(haystack: &[u8], m: Match) -> Match {
|
|
||||||
if m.end() > 0 && haystack.get(m.end() - 1) == Some(&b'\r') {
|
|
||||||
m.with_end(m.end() - 1)
|
|
||||||
} else {
|
|
||||||
m
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Substitutes all occurrences of multi-line enabled `$` with `(?:\r?$)`.
|
|
||||||
///
|
|
||||||
/// This does not preserve the exact semantics of the given expression,
|
|
||||||
/// however, it does have the useful property that anything that matched the
|
|
||||||
/// given expression will also match the returned expression. The difference is
|
|
||||||
/// that the returned expression can match possibly other things as well.
|
|
||||||
///
|
|
||||||
/// The principle reason why we do this is because the underlying regex engine
|
|
||||||
/// doesn't support CRLF aware `$` look-around. It's planned to fix it at that
|
|
||||||
/// level, but we perform this kludge in the mean time.
|
|
||||||
///
|
|
||||||
/// Note that while the match preserving semantics are nice and neat, the
|
|
||||||
/// match position semantics are quite a bit messier. Namely, `$` only ever
|
|
||||||
/// matches the position between characters where as `\r??` can match a
|
|
||||||
/// character and change the offset. This is regretable, but works out pretty
|
|
||||||
/// nicely in most cases, especially when a match is limited to a single line.
|
|
||||||
pub fn crlfify(expr: Hir) -> Hir {
|
|
||||||
match expr.into_kind() {
|
|
||||||
HirKind::Anchor(hir::Anchor::EndLine) => {
|
|
||||||
let concat = Hir::concat(vec![
|
|
||||||
Hir::repetition(hir::Repetition {
|
|
||||||
kind: hir::RepetitionKind::ZeroOrOne,
|
|
||||||
greedy: false,
|
|
||||||
hir: Box::new(Hir::literal(hir::Literal::Unicode('\r'))),
|
|
||||||
}),
|
|
||||||
Hir::anchor(hir::Anchor::EndLine),
|
|
||||||
]);
|
|
||||||
Hir::group(hir::Group {
|
|
||||||
kind: hir::GroupKind::NonCapturing,
|
|
||||||
hir: Box::new(concat),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
HirKind::Empty => Hir::empty(),
|
|
||||||
HirKind::Literal(x) => Hir::literal(x),
|
|
||||||
HirKind::Class(x) => Hir::class(x),
|
|
||||||
HirKind::Anchor(x) => Hir::anchor(x),
|
|
||||||
HirKind::WordBoundary(x) => Hir::word_boundary(x),
|
|
||||||
HirKind::Repetition(mut x) => {
|
|
||||||
x.hir = Box::new(crlfify(*x.hir));
|
|
||||||
Hir::repetition(x)
|
|
||||||
}
|
|
||||||
HirKind::Group(mut x) => {
|
|
||||||
x.hir = Box::new(crlfify(*x.hir));
|
|
||||||
Hir::group(x)
|
|
||||||
}
|
|
||||||
HirKind::Concat(xs) => {
|
|
||||||
Hir::concat(xs.into_iter().map(crlfify).collect())
|
|
||||||
}
|
|
||||||
HirKind::Alternation(xs) => {
|
|
||||||
Hir::alternation(xs.into_iter().map(crlfify).collect())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use super::crlfify;
|
|
||||||
use regex_syntax::Parser;
|
|
||||||
|
|
||||||
fn roundtrip(pattern: &str) -> String {
|
|
||||||
let expr1 = Parser::new().parse(pattern).unwrap();
|
|
||||||
let expr2 = crlfify(expr1);
|
|
||||||
expr2.to_string()
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn various() {
|
|
||||||
assert_eq!(roundtrip(r"(?m)$"), "(?:\r??(?m:$))");
|
|
||||||
assert_eq!(roundtrip(r"(?m)$$"), "(?:\r??(?m:$))(?:\r??(?m:$))");
|
|
||||||
assert_eq!(
|
|
||||||
roundtrip(r"(?m)(?:foo$|bar$)"),
|
|
||||||
"(?:foo(?:\r??(?m:$))|bar(?:\r??(?m:$)))"
|
|
||||||
);
|
|
||||||
assert_eq!(roundtrip(r"(?m)$a"), "(?:\r??(?m:$))a");
|
|
||||||
|
|
||||||
// Not a multiline `$`, so no crlfifying occurs.
|
|
||||||
assert_eq!(roundtrip(r"$"), "\\z");
|
|
||||||
// It's a literal, derp.
|
|
||||||
assert_eq!(roundtrip(r"\$"), "\\$");
|
|
||||||
}
|
|
||||||
}
|
|
@@ -1,8 +1,3 @@
|
|||||||
use std::error;
|
|
||||||
use std::fmt;
|
|
||||||
|
|
||||||
use crate::util;
|
|
||||||
|
|
||||||
/// An error that can occur in this crate.
|
/// An error that can occur in this crate.
|
||||||
///
|
///
|
||||||
/// Generally, this error corresponds to problems building a regular
|
/// Generally, this error corresponds to problems building a regular
|
||||||
@@ -18,10 +13,27 @@ impl Error {
|
|||||||
Error { kind }
|
Error { kind }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn regex<E: error::Error>(err: E) -> Error {
|
pub(crate) fn regex(err: regex_automata::meta::BuildError) -> Error {
|
||||||
|
if let Some(size_limit) = err.size_limit() {
|
||||||
|
let kind = ErrorKind::Regex(format!(
|
||||||
|
"compiled regex exceeds size limit of {size_limit}",
|
||||||
|
));
|
||||||
|
Error { kind }
|
||||||
|
} else if let Some(ref err) = err.syntax_error() {
|
||||||
|
Error::generic(err)
|
||||||
|
} else {
|
||||||
|
Error::generic(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn generic<E: std::error::Error>(err: E) -> Error {
|
||||||
Error { kind: ErrorKind::Regex(err.to_string()) }
|
Error { kind: ErrorKind::Regex(err.to_string()) }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn any<E: ToString>(msg: E) -> Error {
|
||||||
|
Error { kind: ErrorKind::Regex(msg.to_string()) }
|
||||||
|
}
|
||||||
|
|
||||||
/// Return the kind of this error.
|
/// Return the kind of this error.
|
||||||
pub fn kind(&self) -> &ErrorKind {
|
pub fn kind(&self) -> &ErrorKind {
|
||||||
&self.kind
|
&self.kind
|
||||||
@@ -30,6 +42,7 @@ impl Error {
|
|||||||
|
|
||||||
/// The kind of an error that can occur.
|
/// The kind of an error that can occur.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
|
#[non_exhaustive]
|
||||||
pub enum ErrorKind {
|
pub enum ErrorKind {
|
||||||
/// An error that occurred as a result of parsing a regular expression.
|
/// An error that occurred as a result of parsing a regular expression.
|
||||||
/// This can be a syntax error or an error that results from attempting to
|
/// This can be a syntax error or an error that results from attempting to
|
||||||
@@ -51,38 +64,26 @@ pub enum ErrorKind {
|
|||||||
///
|
///
|
||||||
/// The invalid byte is included in this error.
|
/// The invalid byte is included in this error.
|
||||||
InvalidLineTerminator(u8),
|
InvalidLineTerminator(u8),
|
||||||
/// Hints that destructuring should not be exhaustive.
|
|
||||||
///
|
|
||||||
/// This enum may grow additional variants, so this makes sure clients
|
|
||||||
/// don't count on exhaustive matching. (Otherwise, adding a new variant
|
|
||||||
/// could break existing code.)
|
|
||||||
#[doc(hidden)]
|
|
||||||
__Nonexhaustive,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl error::Error for Error {
|
impl std::error::Error for Error {}
|
||||||
fn description(&self) -> &str {
|
|
||||||
match self.kind {
|
impl std::fmt::Display for Error {
|
||||||
ErrorKind::Regex(_) => "regex error",
|
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||||
ErrorKind::NotAllowed(_) => "literal not allowed",
|
use bstr::ByteSlice;
|
||||||
ErrorKind::InvalidLineTerminator(_) => "invalid line terminator",
|
|
||||||
ErrorKind::__Nonexhaustive => unreachable!(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl fmt::Display for Error {
|
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
|
||||||
match self.kind {
|
match self.kind {
|
||||||
ErrorKind::Regex(ref s) => write!(f, "{}", s),
|
ErrorKind::Regex(ref s) => write!(f, "{}", s),
|
||||||
ErrorKind::NotAllowed(ref lit) => {
|
ErrorKind::NotAllowed(ref lit) => {
|
||||||
write!(f, "the literal '{:?}' is not allowed in a regex", lit)
|
write!(f, "the literal {:?} is not allowed in a regex", lit)
|
||||||
}
|
}
|
||||||
ErrorKind::InvalidLineTerminator(byte) => {
|
ErrorKind::InvalidLineTerminator(byte) => {
|
||||||
let x = util::show_bytes(&[byte]);
|
write!(
|
||||||
write!(f, "line terminators must be ASCII, but '{}' is not", x)
|
f,
|
||||||
|
"line terminators must be ASCII, but {} is not",
|
||||||
|
[byte].as_bstr()
|
||||||
|
)
|
||||||
}
|
}
|
||||||
ErrorKind::__Nonexhaustive => unreachable!(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -8,12 +8,9 @@ pub use crate::matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
|||||||
|
|
||||||
mod ast;
|
mod ast;
|
||||||
mod config;
|
mod config;
|
||||||
mod crlf;
|
|
||||||
mod error;
|
mod error;
|
||||||
mod literal;
|
mod literal;
|
||||||
mod matcher;
|
mod matcher;
|
||||||
mod multi;
|
|
||||||
mod non_matching;
|
mod non_matching;
|
||||||
mod strip;
|
mod strip;
|
||||||
mod util;
|
|
||||||
mod word;
|
mod word;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,15 +1,22 @@
|
|||||||
use std::collections::HashMap;
|
use std::sync::Arc;
|
||||||
|
|
||||||
use grep_matcher::{
|
use {
|
||||||
ByteSet, Captures, LineMatchKind, LineTerminator, Match, Matcher, NoError,
|
grep_matcher::{
|
||||||
|
ByteSet, Captures, LineMatchKind, LineTerminator, Match, Matcher,
|
||||||
|
NoError,
|
||||||
|
},
|
||||||
|
regex_automata::{
|
||||||
|
meta::Regex, util::captures::Captures as AutomataCaptures, Input,
|
||||||
|
PatternID,
|
||||||
|
},
|
||||||
};
|
};
|
||||||
use regex::bytes::{CaptureLocations, Regex};
|
|
||||||
|
|
||||||
use crate::config::{Config, ConfiguredHIR};
|
use crate::{
|
||||||
use crate::crlf::CRLFMatcher;
|
config::{Config, ConfiguredHIR},
|
||||||
use crate::error::Error;
|
error::Error,
|
||||||
use crate::multi::MultiLiteralMatcher;
|
literal::InnerLiterals,
|
||||||
use crate::word::WordMatcher;
|
word::WordMatcher,
|
||||||
|
};
|
||||||
|
|
||||||
/// A builder for constructing a `Matcher` using regular expressions.
|
/// A builder for constructing a `Matcher` using regular expressions.
|
||||||
///
|
///
|
||||||
@@ -43,18 +50,37 @@ impl RegexMatcherBuilder {
|
|||||||
/// The syntax supported is documented as part of the regex crate:
|
/// The syntax supported is documented as part of the regex crate:
|
||||||
/// <https://docs.rs/regex/#syntax>.
|
/// <https://docs.rs/regex/#syntax>.
|
||||||
pub fn build(&self, pattern: &str) -> Result<RegexMatcher, Error> {
|
pub fn build(&self, pattern: &str) -> Result<RegexMatcher, Error> {
|
||||||
let chir = self.config.hir(pattern)?;
|
self.build_many(&[pattern])
|
||||||
let fast_line_regex = chir.fast_line_regex()?;
|
}
|
||||||
let non_matching_bytes = chir.non_matching_bytes();
|
|
||||||
if let Some(ref re) = fast_line_regex {
|
|
||||||
log::debug!("extracted fast line regex: {:?}", re);
|
|
||||||
}
|
|
||||||
|
|
||||||
let matcher = RegexMatcherImpl::new(&chir)?;
|
/// Build a new matcher using the current configuration for the provided
|
||||||
log::trace!("final regex: {:?}", matcher.regex());
|
/// patterns. The resulting matcher behaves as if all of the patterns
|
||||||
let mut config = self.config.clone();
|
/// given are joined together into a single alternation. That is, it
|
||||||
// We override the line terminator in case the configured expr doesn't
|
/// reports matches where at least one of the given patterns matches.
|
||||||
|
pub fn build_many<P: AsRef<str>>(
|
||||||
|
&self,
|
||||||
|
patterns: &[P],
|
||||||
|
) -> Result<RegexMatcher, Error> {
|
||||||
|
let chir = self.config.build_many(patterns)?;
|
||||||
|
let matcher = RegexMatcherImpl::new(chir)?;
|
||||||
|
let (chir, re) = (matcher.chir(), matcher.regex());
|
||||||
|
log::trace!("final regex: {:?}", chir.hir().to_string());
|
||||||
|
|
||||||
|
let non_matching_bytes = chir.non_matching_bytes();
|
||||||
|
// If we can pick out some literals from the regex, then we might be
|
||||||
|
// able to build a faster regex that quickly identifies candidate
|
||||||
|
// matching lines. The regex engine will do what it can on its own, but
|
||||||
|
// we can specifically do a little more when a line terminator is set.
|
||||||
|
// For example, for a regex like `\w+foo\w+`, we can look for `foo`,
|
||||||
|
// and when a match is found, look for the line containing `foo` and
|
||||||
|
// then run the original regex on only that line. (In this case, the
|
||||||
|
// regex engine is likely to handle this case for us since it's so
|
||||||
|
// simple, but the idea applies.)
|
||||||
|
let fast_line_regex = InnerLiterals::new(chir, re).one_regex()?;
|
||||||
|
|
||||||
|
// We override the line terminator in case the configured HIR doesn't
|
||||||
// support it.
|
// support it.
|
||||||
|
let mut config = self.config.clone();
|
||||||
config.line_terminator = chir.line_terminator();
|
config.line_terminator = chir.line_terminator();
|
||||||
Ok(RegexMatcher {
|
Ok(RegexMatcher {
|
||||||
config,
|
config,
|
||||||
@@ -73,39 +99,7 @@ impl RegexMatcherBuilder {
|
|||||||
&self,
|
&self,
|
||||||
literals: &[B],
|
literals: &[B],
|
||||||
) -> Result<RegexMatcher, Error> {
|
) -> Result<RegexMatcher, Error> {
|
||||||
let mut has_escape = false;
|
self.build_many(literals)
|
||||||
let mut slices = vec![];
|
|
||||||
for lit in literals {
|
|
||||||
slices.push(lit.as_ref());
|
|
||||||
has_escape = has_escape || lit.as_ref().contains('\\');
|
|
||||||
}
|
|
||||||
// Even when we have a fixed set of literals, we might still want to
|
|
||||||
// use the regex engine. Specifically, if any string has an escape
|
|
||||||
// in it, then we probably can't feed it to Aho-Corasick without
|
|
||||||
// removing the escape. Additionally, if there are any particular
|
|
||||||
// special match semantics we need to honor, that Aho-Corasick isn't
|
|
||||||
// enough. Finally, the regex engine can do really well with a small
|
|
||||||
// number of literals (at time of writing, this is changing soon), so
|
|
||||||
// we use it when there's a small set.
|
|
||||||
//
|
|
||||||
// Yes, this is one giant hack. Ideally, this entirely separate literal
|
|
||||||
// matcher that uses Aho-Corasick would be pushed down into the regex
|
|
||||||
// engine.
|
|
||||||
if has_escape
|
|
||||||
|| !self.config.can_plain_aho_corasick()
|
|
||||||
|| literals.len() < 40
|
|
||||||
{
|
|
||||||
return self.build(&slices.join("|"));
|
|
||||||
}
|
|
||||||
|
|
||||||
let matcher = MultiLiteralMatcher::new(&slices)?;
|
|
||||||
let imp = RegexMatcherImpl::MultiLiteral(matcher);
|
|
||||||
Ok(RegexMatcher {
|
|
||||||
config: self.config.clone(),
|
|
||||||
matcher: imp,
|
|
||||||
fast_line_regex: None,
|
|
||||||
non_matching_bytes: ByteSet::empty(),
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Set the value for the case insensitive (`i`) flag.
|
/// Set the value for the case insensitive (`i`) flag.
|
||||||
@@ -306,20 +300,15 @@ impl RegexMatcherBuilder {
|
|||||||
/// 1. It causes the line terminator for the matcher to be `\r\n`. Namely,
|
/// 1. It causes the line terminator for the matcher to be `\r\n`. Namely,
|
||||||
/// this prevents the matcher from ever producing a match that contains
|
/// this prevents the matcher from ever producing a match that contains
|
||||||
/// a `\r` or `\n`.
|
/// a `\r` or `\n`.
|
||||||
/// 2. It translates all instances of `$` in the pattern to `(?:\r??$)`.
|
/// 2. It enables CRLF mode for `^` and `$`. This means that line anchors
|
||||||
/// This works around the fact that the regex engine does not support
|
/// will treat both `\r` and `\n` as line terminators, but will never
|
||||||
/// matching CRLF as a line terminator when using `$`.
|
/// match between a `\r` and `\n`.
|
||||||
///
|
///
|
||||||
/// In particular, because of (2), the matches produced by the matcher may
|
/// Note that if you do not wish to set the line terminator but would
|
||||||
/// be slightly different than what one would expect given the pattern.
|
/// still like `$` to match `\r\n` line terminators, then it is valid to
|
||||||
/// This is the trade off made: in many cases, `$` will "just work" in the
|
/// call `crlf(true)` followed by `line_terminator(None)`. Ordering is
|
||||||
/// presence of `\r\n` line terminators, but matches may require some
|
/// important, since `crlf` sets the line terminator, but `line_terminator`
|
||||||
/// trimming to faithfully represent the intended match.
|
/// does not touch the `crlf` setting.
|
||||||
///
|
|
||||||
/// Note that if you do not wish to set the line terminator but would still
|
|
||||||
/// like `$` to match `\r\n` line terminators, then it is valid to call
|
|
||||||
/// `crlf(true)` followed by `line_terminator(None)`. Ordering is
|
|
||||||
/// important, since `crlf` and `line_terminator` override each other.
|
|
||||||
pub fn crlf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
pub fn crlf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||||
if yes {
|
if yes {
|
||||||
self.config.line_terminator = Some(LineTerminator::crlf());
|
self.config.line_terminator = Some(LineTerminator::crlf());
|
||||||
@@ -345,6 +334,21 @@ impl RegexMatcherBuilder {
|
|||||||
self.config.word = yes;
|
self.config.word = yes;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether the patterns should be treated as literal strings or not. When
|
||||||
|
/// this is active, all characters, including ones that would normally be
|
||||||
|
/// special regex meta characters, are matched literally.
|
||||||
|
pub fn fixed_strings(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||||
|
self.config.fixed_strings = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Whether each pattern should match the entire line or not. This is
|
||||||
|
/// equivalent to surrounding the pattern with `(?m:^)` and `(?m:$)`.
|
||||||
|
pub fn whole_line(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||||
|
self.config.whole_line = yes;
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// An implementation of the `Matcher` trait using Rust's standard regex
|
/// An implementation of the `Matcher` trait using Rust's standard regex
|
||||||
@@ -374,10 +378,10 @@ impl RegexMatcher {
|
|||||||
/// Create a new matcher from the given pattern using the default
|
/// Create a new matcher from the given pattern using the default
|
||||||
/// configuration, but matches lines terminated by `\n`.
|
/// configuration, but matches lines terminated by `\n`.
|
||||||
///
|
///
|
||||||
/// This is meant to be a convenience constructor for using a
|
/// This is meant to be a convenience constructor for
|
||||||
/// `RegexMatcherBuilder` and setting its
|
/// using a `RegexMatcherBuilder` and setting its
|
||||||
/// [`line_terminator`](struct.RegexMatcherBuilder.html#method.line_terminator)
|
/// [`line_terminator`](RegexMatcherBuilder::method.line_terminator) to
|
||||||
/// to `\n`. The purpose of using this constructor is to permit special
|
/// `\n`. The purpose of using this constructor is to permit special
|
||||||
/// optimizations that help speed up line oriented search. These types of
|
/// optimizations that help speed up line oriented search. These types of
|
||||||
/// optimizations are only appropriate when matches span no more than one
|
/// optimizations are only appropriate when matches span no more than one
|
||||||
/// line. For this reason, this constructor will return an error if the
|
/// line. For this reason, this constructor will return an error if the
|
||||||
@@ -393,13 +397,6 @@ impl RegexMatcher {
|
|||||||
enum RegexMatcherImpl {
|
enum RegexMatcherImpl {
|
||||||
/// The standard matcher used for all regular expressions.
|
/// The standard matcher used for all regular expressions.
|
||||||
Standard(StandardMatcher),
|
Standard(StandardMatcher),
|
||||||
/// A matcher for an alternation of plain literals.
|
|
||||||
MultiLiteral(MultiLiteralMatcher),
|
|
||||||
/// A matcher that strips `\r` from the end of matches.
|
|
||||||
///
|
|
||||||
/// This is only used when the CRLF hack is enabled and the regex is line
|
|
||||||
/// anchored at the end.
|
|
||||||
CRLF(CRLFMatcher),
|
|
||||||
/// A matcher that only matches at word boundaries. This transforms the
|
/// A matcher that only matches at word boundaries. This transforms the
|
||||||
/// regex to `(^|\W)(...)($|\W)` instead of the more intuitive `\b(...)\b`.
|
/// regex to `(^|\W)(...)($|\W)` instead of the more intuitive `\b(...)\b`.
|
||||||
/// Because of this, the WordMatcher provides its own implementation of
|
/// Because of this, the WordMatcher provides its own implementation of
|
||||||
@@ -411,29 +408,33 @@ enum RegexMatcherImpl {
|
|||||||
impl RegexMatcherImpl {
|
impl RegexMatcherImpl {
|
||||||
/// Based on the configuration, create a new implementation of the
|
/// Based on the configuration, create a new implementation of the
|
||||||
/// `Matcher` trait.
|
/// `Matcher` trait.
|
||||||
fn new(expr: &ConfiguredHIR) -> Result<RegexMatcherImpl, Error> {
|
fn new(mut chir: ConfiguredHIR) -> Result<RegexMatcherImpl, Error> {
|
||||||
if expr.config().word {
|
// When whole_line is set, we don't use a word matcher even if word
|
||||||
Ok(RegexMatcherImpl::Word(WordMatcher::new(expr)?))
|
// matching was requested. Why? Because `(?m:^)(pat)(?m:$)` implies
|
||||||
} else if expr.needs_crlf_stripped() {
|
// word matching.
|
||||||
Ok(RegexMatcherImpl::CRLF(CRLFMatcher::new(expr)?))
|
Ok(if chir.config().word && !chir.config().whole_line {
|
||||||
|
RegexMatcherImpl::Word(WordMatcher::new(chir)?)
|
||||||
} else {
|
} else {
|
||||||
if let Some(lits) = expr.alternation_literals() {
|
if chir.config().whole_line {
|
||||||
if lits.len() >= 40 {
|
chir = chir.into_whole_line();
|
||||||
let matcher = MultiLiteralMatcher::new(&lits)?;
|
|
||||||
return Ok(RegexMatcherImpl::MultiLiteral(matcher));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Ok(RegexMatcherImpl::Standard(StandardMatcher::new(expr)?))
|
RegexMatcherImpl::Standard(StandardMatcher::new(chir)?)
|
||||||
}
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the underlying regex object used.
|
/// Return the underlying regex object used.
|
||||||
fn regex(&self) -> String {
|
fn regex(&self) -> &Regex {
|
||||||
match *self {
|
match *self {
|
||||||
RegexMatcherImpl::Word(ref x) => x.regex().to_string(),
|
RegexMatcherImpl::Word(ref x) => x.regex(),
|
||||||
RegexMatcherImpl::CRLF(ref x) => x.regex().to_string(),
|
RegexMatcherImpl::Standard(ref x) => &x.regex,
|
||||||
RegexMatcherImpl::MultiLiteral(_) => "<N/A>".to_string(),
|
}
|
||||||
RegexMatcherImpl::Standard(ref x) => x.regex.to_string(),
|
}
|
||||||
|
|
||||||
|
/// Return the underlying HIR of the regex used for searching.
|
||||||
|
fn chir(&self) -> &ConfiguredHIR {
|
||||||
|
match *self {
|
||||||
|
RegexMatcherImpl::Word(ref x) => x.chir(),
|
||||||
|
RegexMatcherImpl::Standard(ref x) => &x.chir,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -453,8 +454,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.find_at(haystack, at),
|
Standard(ref m) => m.find_at(haystack, at),
|
||||||
MultiLiteral(ref m) => m.find_at(haystack, at),
|
|
||||||
CRLF(ref m) => m.find_at(haystack, at),
|
|
||||||
Word(ref m) => m.find_at(haystack, at),
|
Word(ref m) => m.find_at(haystack, at),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -463,8 +462,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.new_captures(),
|
Standard(ref m) => m.new_captures(),
|
||||||
MultiLiteral(ref m) => m.new_captures(),
|
|
||||||
CRLF(ref m) => m.new_captures(),
|
|
||||||
Word(ref m) => m.new_captures(),
|
Word(ref m) => m.new_captures(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -473,8 +470,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.capture_count(),
|
Standard(ref m) => m.capture_count(),
|
||||||
MultiLiteral(ref m) => m.capture_count(),
|
|
||||||
CRLF(ref m) => m.capture_count(),
|
|
||||||
Word(ref m) => m.capture_count(),
|
Word(ref m) => m.capture_count(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -483,8 +478,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.capture_index(name),
|
Standard(ref m) => m.capture_index(name),
|
||||||
MultiLiteral(ref m) => m.capture_index(name),
|
|
||||||
CRLF(ref m) => m.capture_index(name),
|
|
||||||
Word(ref m) => m.capture_index(name),
|
Word(ref m) => m.capture_index(name),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -493,8 +486,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.find(haystack),
|
Standard(ref m) => m.find(haystack),
|
||||||
MultiLiteral(ref m) => m.find(haystack),
|
|
||||||
CRLF(ref m) => m.find(haystack),
|
|
||||||
Word(ref m) => m.find(haystack),
|
Word(ref m) => m.find(haystack),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -506,8 +497,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.find_iter(haystack, matched),
|
Standard(ref m) => m.find_iter(haystack, matched),
|
||||||
MultiLiteral(ref m) => m.find_iter(haystack, matched),
|
|
||||||
CRLF(ref m) => m.find_iter(haystack, matched),
|
|
||||||
Word(ref m) => m.find_iter(haystack, matched),
|
Word(ref m) => m.find_iter(haystack, matched),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -523,8 +512,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.try_find_iter(haystack, matched),
|
Standard(ref m) => m.try_find_iter(haystack, matched),
|
||||||
MultiLiteral(ref m) => m.try_find_iter(haystack, matched),
|
|
||||||
CRLF(ref m) => m.try_find_iter(haystack, matched),
|
|
||||||
Word(ref m) => m.try_find_iter(haystack, matched),
|
Word(ref m) => m.try_find_iter(haystack, matched),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -537,8 +524,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.captures(haystack, caps),
|
Standard(ref m) => m.captures(haystack, caps),
|
||||||
MultiLiteral(ref m) => m.captures(haystack, caps),
|
|
||||||
CRLF(ref m) => m.captures(haystack, caps),
|
|
||||||
Word(ref m) => m.captures(haystack, caps),
|
Word(ref m) => m.captures(haystack, caps),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -555,8 +540,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.captures_iter(haystack, caps, matched),
|
Standard(ref m) => m.captures_iter(haystack, caps, matched),
|
||||||
MultiLiteral(ref m) => m.captures_iter(haystack, caps, matched),
|
|
||||||
CRLF(ref m) => m.captures_iter(haystack, caps, matched),
|
|
||||||
Word(ref m) => m.captures_iter(haystack, caps, matched),
|
Word(ref m) => m.captures_iter(haystack, caps, matched),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -573,10 +556,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.try_captures_iter(haystack, caps, matched),
|
Standard(ref m) => m.try_captures_iter(haystack, caps, matched),
|
||||||
MultiLiteral(ref m) => {
|
|
||||||
m.try_captures_iter(haystack, caps, matched)
|
|
||||||
}
|
|
||||||
CRLF(ref m) => m.try_captures_iter(haystack, caps, matched),
|
|
||||||
Word(ref m) => m.try_captures_iter(haystack, caps, matched),
|
Word(ref m) => m.try_captures_iter(haystack, caps, matched),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -590,8 +569,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.captures_at(haystack, at, caps),
|
Standard(ref m) => m.captures_at(haystack, at, caps),
|
||||||
MultiLiteral(ref m) => m.captures_at(haystack, at, caps),
|
|
||||||
CRLF(ref m) => m.captures_at(haystack, at, caps),
|
|
||||||
Word(ref m) => m.captures_at(haystack, at, caps),
|
Word(ref m) => m.captures_at(haystack, at, caps),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -608,8 +585,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.replace(haystack, dst, append),
|
Standard(ref m) => m.replace(haystack, dst, append),
|
||||||
MultiLiteral(ref m) => m.replace(haystack, dst, append),
|
|
||||||
CRLF(ref m) => m.replace(haystack, dst, append),
|
|
||||||
Word(ref m) => m.replace(haystack, dst, append),
|
Word(ref m) => m.replace(haystack, dst, append),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -629,12 +604,6 @@ impl Matcher for RegexMatcher {
|
|||||||
Standard(ref m) => {
|
Standard(ref m) => {
|
||||||
m.replace_with_captures(haystack, caps, dst, append)
|
m.replace_with_captures(haystack, caps, dst, append)
|
||||||
}
|
}
|
||||||
MultiLiteral(ref m) => {
|
|
||||||
m.replace_with_captures(haystack, caps, dst, append)
|
|
||||||
}
|
|
||||||
CRLF(ref m) => {
|
|
||||||
m.replace_with_captures(haystack, caps, dst, append)
|
|
||||||
}
|
|
||||||
Word(ref m) => {
|
Word(ref m) => {
|
||||||
m.replace_with_captures(haystack, caps, dst, append)
|
m.replace_with_captures(haystack, caps, dst, append)
|
||||||
}
|
}
|
||||||
@@ -645,8 +614,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.is_match(haystack),
|
Standard(ref m) => m.is_match(haystack),
|
||||||
MultiLiteral(ref m) => m.is_match(haystack),
|
|
||||||
CRLF(ref m) => m.is_match(haystack),
|
|
||||||
Word(ref m) => m.is_match(haystack),
|
Word(ref m) => m.is_match(haystack),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -659,8 +626,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.is_match_at(haystack, at),
|
Standard(ref m) => m.is_match_at(haystack, at),
|
||||||
MultiLiteral(ref m) => m.is_match_at(haystack, at),
|
|
||||||
CRLF(ref m) => m.is_match_at(haystack, at),
|
|
||||||
Word(ref m) => m.is_match_at(haystack, at),
|
Word(ref m) => m.is_match_at(haystack, at),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -672,8 +637,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.shortest_match(haystack),
|
Standard(ref m) => m.shortest_match(haystack),
|
||||||
MultiLiteral(ref m) => m.shortest_match(haystack),
|
|
||||||
CRLF(ref m) => m.shortest_match(haystack),
|
|
||||||
Word(ref m) => m.shortest_match(haystack),
|
Word(ref m) => m.shortest_match(haystack),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -686,8 +649,6 @@ impl Matcher for RegexMatcher {
|
|||||||
use self::RegexMatcherImpl::*;
|
use self::RegexMatcherImpl::*;
|
||||||
match self.matcher {
|
match self.matcher {
|
||||||
Standard(ref m) => m.shortest_match_at(haystack, at),
|
Standard(ref m) => m.shortest_match_at(haystack, at),
|
||||||
MultiLiteral(ref m) => m.shortest_match_at(haystack, at),
|
|
||||||
CRLF(ref m) => m.shortest_match_at(haystack, at),
|
|
||||||
Word(ref m) => m.shortest_match_at(haystack, at),
|
Word(ref m) => m.shortest_match_at(haystack, at),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -706,7 +667,10 @@ impl Matcher for RegexMatcher {
|
|||||||
) -> Result<Option<LineMatchKind>, NoError> {
|
) -> Result<Option<LineMatchKind>, NoError> {
|
||||||
Ok(match self.fast_line_regex {
|
Ok(match self.fast_line_regex {
|
||||||
Some(ref regex) => {
|
Some(ref regex) => {
|
||||||
regex.shortest_match(haystack).map(LineMatchKind::Candidate)
|
let input = Input::new(haystack);
|
||||||
|
regex
|
||||||
|
.search_half(&input)
|
||||||
|
.map(|hm| LineMatchKind::Candidate(hm.offset()))
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
self.shortest_match(haystack)?.map(LineMatchKind::Confirmed)
|
self.shortest_match(haystack)?.map(LineMatchKind::Confirmed)
|
||||||
@@ -721,20 +685,19 @@ struct StandardMatcher {
|
|||||||
/// The regular expression compiled from the pattern provided by the
|
/// The regular expression compiled from the pattern provided by the
|
||||||
/// caller.
|
/// caller.
|
||||||
regex: Regex,
|
regex: Regex,
|
||||||
/// A map from capture group name to its corresponding index.
|
/// The HIR that produced this regex.
|
||||||
names: HashMap<String, usize>,
|
///
|
||||||
|
/// We put this in an `Arc` because by the time it gets here, it won't
|
||||||
|
/// change. And because cloning and dropping an `Hir` is somewhat expensive
|
||||||
|
/// due to its deep recursive representation.
|
||||||
|
chir: Arc<ConfiguredHIR>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl StandardMatcher {
|
impl StandardMatcher {
|
||||||
fn new(expr: &ConfiguredHIR) -> Result<StandardMatcher, Error> {
|
fn new(chir: ConfiguredHIR) -> Result<StandardMatcher, Error> {
|
||||||
let regex = expr.regex()?;
|
let chir = Arc::new(chir);
|
||||||
let mut names = HashMap::new();
|
let regex = chir.to_regex()?;
|
||||||
for (i, optional_name) in regex.capture_names().enumerate() {
|
Ok(StandardMatcher { regex, chir })
|
||||||
if let Some(name) = optional_name {
|
|
||||||
names.insert(name.to_string(), i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(StandardMatcher { regex, names })
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -747,14 +710,12 @@ impl Matcher for StandardMatcher {
|
|||||||
haystack: &[u8],
|
haystack: &[u8],
|
||||||
at: usize,
|
at: usize,
|
||||||
) -> Result<Option<Match>, NoError> {
|
) -> Result<Option<Match>, NoError> {
|
||||||
Ok(self
|
let input = Input::new(haystack).span(at..haystack.len());
|
||||||
.regex
|
Ok(self.regex.find(input).map(|m| Match::new(m.start(), m.end())))
|
||||||
.find_at(haystack, at)
|
|
||||||
.map(|m| Match::new(m.start(), m.end())))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
|
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
|
||||||
Ok(RegexCaptures::new(self.regex.capture_locations()))
|
Ok(RegexCaptures::new(self.regex.create_captures()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn capture_count(&self) -> usize {
|
fn capture_count(&self) -> usize {
|
||||||
@@ -762,7 +723,7 @@ impl Matcher for StandardMatcher {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn capture_index(&self, name: &str) -> Option<usize> {
|
fn capture_index(&self, name: &str) -> Option<usize> {
|
||||||
self.names.get(name).map(|i| *i)
|
self.regex.group_info().to_index(PatternID::ZERO, name)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn try_find_iter<F, E>(
|
fn try_find_iter<F, E>(
|
||||||
@@ -789,10 +750,10 @@ impl Matcher for StandardMatcher {
|
|||||||
at: usize,
|
at: usize,
|
||||||
caps: &mut RegexCaptures,
|
caps: &mut RegexCaptures,
|
||||||
) -> Result<bool, NoError> {
|
) -> Result<bool, NoError> {
|
||||||
Ok(self
|
let input = Input::new(haystack).span(at..haystack.len());
|
||||||
.regex
|
let caps = caps.captures_mut();
|
||||||
.captures_read_at(&mut caps.locations_mut(), haystack, at)
|
self.regex.search_captures(&input, caps);
|
||||||
.is_some())
|
Ok(caps.is_match())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn shortest_match_at(
|
fn shortest_match_at(
|
||||||
@@ -800,7 +761,8 @@ impl Matcher for StandardMatcher {
|
|||||||
haystack: &[u8],
|
haystack: &[u8],
|
||||||
at: usize,
|
at: usize,
|
||||||
) -> Result<Option<usize>, NoError> {
|
) -> Result<Option<usize>, NoError> {
|
||||||
Ok(self.regex.shortest_match_at(haystack, at))
|
let input = Input::new(haystack).span(at..haystack.len());
|
||||||
|
Ok(self.regex.search_half(&input).map(|hm| hm.offset()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -819,137 +781,51 @@ impl Matcher for StandardMatcher {
|
|||||||
/// index of the group using the corresponding matcher's `capture_index`
|
/// index of the group using the corresponding matcher's `capture_index`
|
||||||
/// method, and then use that index with `RegexCaptures::get`.
|
/// method, and then use that index with `RegexCaptures::get`.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct RegexCaptures(RegexCapturesImp);
|
pub struct RegexCaptures {
|
||||||
|
/// Where the captures are stored.
|
||||||
#[derive(Clone, Debug)]
|
caps: AutomataCaptures,
|
||||||
enum RegexCapturesImp {
|
/// These captures behave as if the capturing groups begin at the given
|
||||||
AhoCorasick {
|
/// offset. When set to `0`, this has no affect and capture groups are
|
||||||
/// The start and end of the match, corresponding to capture group 0.
|
/// indexed like normal.
|
||||||
mat: Option<Match>,
|
///
|
||||||
},
|
/// This is useful when building matchers that wrap arbitrary regular
|
||||||
Regex {
|
/// expressions. For example, `WordMatcher` takes an existing regex
|
||||||
/// Where the locations are stored.
|
/// `re` and creates `(?:^|\W)(re)(?:$|\W)`, but hides the fact that
|
||||||
locs: CaptureLocations,
|
/// the regex has been wrapped from the caller. In order to do this,
|
||||||
/// These captures behave as if the capturing groups begin at the given
|
/// the matcher and the capturing groups must behave as if `(re)` is
|
||||||
/// offset. When set to `0`, this has no affect and capture groups are
|
/// the `0`th capture group.
|
||||||
/// indexed like normal.
|
offset: usize,
|
||||||
///
|
|
||||||
/// This is useful when building matchers that wrap arbitrary regular
|
|
||||||
/// expressions. For example, `WordMatcher` takes an existing regex
|
|
||||||
/// `re` and creates `(?:^|\W)(re)(?:$|\W)`, but hides the fact that
|
|
||||||
/// the regex has been wrapped from the caller. In order to do this,
|
|
||||||
/// the matcher and the capturing groups must behave as if `(re)` is
|
|
||||||
/// the `0`th capture group.
|
|
||||||
offset: usize,
|
|
||||||
/// When enable, the end of a match has `\r` stripped from it, if one
|
|
||||||
/// exists.
|
|
||||||
strip_crlf: bool,
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Captures for RegexCaptures {
|
impl Captures for RegexCaptures {
|
||||||
fn len(&self) -> usize {
|
fn len(&self) -> usize {
|
||||||
match self.0 {
|
self.caps
|
||||||
RegexCapturesImp::AhoCorasick { .. } => 1,
|
.group_info()
|
||||||
RegexCapturesImp::Regex { ref locs, offset, .. } => {
|
.all_group_len()
|
||||||
locs.len().checked_sub(offset).unwrap()
|
.checked_sub(self.offset)
|
||||||
}
|
.unwrap()
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get(&self, i: usize) -> Option<Match> {
|
fn get(&self, i: usize) -> Option<Match> {
|
||||||
match self.0 {
|
let actual = i.checked_add(self.offset).unwrap();
|
||||||
RegexCapturesImp::AhoCorasick { mat, .. } => {
|
self.caps.get_group(actual).map(|sp| Match::new(sp.start, sp.end))
|
||||||
if i == 0 {
|
|
||||||
mat
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
RegexCapturesImp::Regex { ref locs, offset, strip_crlf } => {
|
|
||||||
if !strip_crlf {
|
|
||||||
let actual = i.checked_add(offset).unwrap();
|
|
||||||
return locs.pos(actual).map(|(s, e)| Match::new(s, e));
|
|
||||||
}
|
|
||||||
|
|
||||||
// currently don't support capture offsetting with CRLF
|
|
||||||
// stripping
|
|
||||||
assert_eq!(offset, 0);
|
|
||||||
let m = match locs.pos(i).map(|(s, e)| Match::new(s, e)) {
|
|
||||||
None => return None,
|
|
||||||
Some(m) => m,
|
|
||||||
};
|
|
||||||
// If the end position of this match corresponds to the end
|
|
||||||
// position of the overall match, then we apply our CRLF
|
|
||||||
// stripping. Otherwise, we cannot assume stripping is correct.
|
|
||||||
if i == 0 || m.end() == locs.pos(0).unwrap().1 {
|
|
||||||
Some(m.with_end(m.end() - 1))
|
|
||||||
} else {
|
|
||||||
Some(m)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RegexCaptures {
|
impl RegexCaptures {
|
||||||
pub(crate) fn simple() -> RegexCaptures {
|
pub(crate) fn new(caps: AutomataCaptures) -> RegexCaptures {
|
||||||
RegexCaptures(RegexCapturesImp::AhoCorasick { mat: None })
|
RegexCaptures::with_offset(caps, 0)
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn new(locs: CaptureLocations) -> RegexCaptures {
|
|
||||||
RegexCaptures::with_offset(locs, 0)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn with_offset(
|
pub(crate) fn with_offset(
|
||||||
locs: CaptureLocations,
|
caps: AutomataCaptures,
|
||||||
offset: usize,
|
offset: usize,
|
||||||
) -> RegexCaptures {
|
) -> RegexCaptures {
|
||||||
RegexCaptures(RegexCapturesImp::Regex {
|
RegexCaptures { caps, offset }
|
||||||
locs,
|
|
||||||
offset,
|
|
||||||
strip_crlf: false,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn locations(&self) -> &CaptureLocations {
|
pub(crate) fn captures_mut(&mut self) -> &mut AutomataCaptures {
|
||||||
match self.0 {
|
&mut self.caps
|
||||||
RegexCapturesImp::AhoCorasick { .. } => {
|
|
||||||
panic!("getting locations for simple captures is invalid")
|
|
||||||
}
|
|
||||||
RegexCapturesImp::Regex { ref locs, .. } => locs,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn locations_mut(&mut self) -> &mut CaptureLocations {
|
|
||||||
match self.0 {
|
|
||||||
RegexCapturesImp::AhoCorasick { .. } => {
|
|
||||||
panic!("getting locations for simple captures is invalid")
|
|
||||||
}
|
|
||||||
RegexCapturesImp::Regex { ref mut locs, .. } => locs,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn strip_crlf(&mut self, yes: bool) {
|
|
||||||
match self.0 {
|
|
||||||
RegexCapturesImp::AhoCorasick { .. } => {
|
|
||||||
panic!("setting strip_crlf for simple captures is invalid")
|
|
||||||
}
|
|
||||||
RegexCapturesImp::Regex { ref mut strip_crlf, .. } => {
|
|
||||||
*strip_crlf = yes;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub(crate) fn set_simple(&mut self, one: Option<Match>) {
|
|
||||||
match self.0 {
|
|
||||||
RegexCapturesImp::AhoCorasick { ref mut mat } => {
|
|
||||||
*mat = one;
|
|
||||||
}
|
|
||||||
RegexCapturesImp::Regex { .. } => {
|
|
||||||
panic!("setting simple captures for regex is invalid")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1036,7 +912,9 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Test that finding candidate lines works as expected.
|
// Test that finding candidate lines works as expected.
|
||||||
|
// FIXME: Re-enable this test once inner literal extraction works.
|
||||||
#[test]
|
#[test]
|
||||||
|
#[ignore]
|
||||||
fn candidate_lines() {
|
fn candidate_lines() {
|
||||||
fn is_confirmed(m: LineMatchKind) -> bool {
|
fn is_confirmed(m: LineMatchKind) -> bool {
|
||||||
match m {
|
match m {
|
||||||
|
@@ -1,6 +1,6 @@
|
|||||||
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
|
use aho_corasick::{AhoCorasick, MatchKind};
|
||||||
use grep_matcher::{Match, Matcher, NoError};
|
use grep_matcher::{Match, Matcher, NoError};
|
||||||
use regex_syntax::hir::Hir;
|
use regex_syntax::hir::{Hir, HirKind};
|
||||||
|
|
||||||
use crate::error::Error;
|
use crate::error::Error;
|
||||||
use crate::matcher::RegexCaptures;
|
use crate::matcher::RegexCaptures;
|
||||||
@@ -23,11 +23,10 @@ impl MultiLiteralMatcher {
|
|||||||
pub fn new<B: AsRef<[u8]>>(
|
pub fn new<B: AsRef<[u8]>>(
|
||||||
literals: &[B],
|
literals: &[B],
|
||||||
) -> Result<MultiLiteralMatcher, Error> {
|
) -> Result<MultiLiteralMatcher, Error> {
|
||||||
let ac = AhoCorasickBuilder::new()
|
let ac = AhoCorasick::builder()
|
||||||
.match_kind(MatchKind::LeftmostFirst)
|
.match_kind(MatchKind::LeftmostFirst)
|
||||||
.auto_configure(literals)
|
.build(literals)
|
||||||
.build_with_size::<usize, _, _>(literals)
|
.map_err(Error::generic)?;
|
||||||
.map_err(Error::regex)?;
|
|
||||||
Ok(MultiLiteralMatcher { ac })
|
Ok(MultiLiteralMatcher { ac })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -79,13 +78,11 @@ impl Matcher for MultiLiteralMatcher {
|
|||||||
/// Alternation literals checks if the given HIR is a simple alternation of
|
/// Alternation literals checks if the given HIR is a simple alternation of
|
||||||
/// literals, and if so, returns them. Otherwise, this returns None.
|
/// literals, and if so, returns them. Otherwise, this returns None.
|
||||||
pub fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
|
pub fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
|
||||||
use regex_syntax::hir::{HirKind, Literal};
|
|
||||||
|
|
||||||
// This is pretty hacky, but basically, if `is_alternation_literal` is
|
// This is pretty hacky, but basically, if `is_alternation_literal` is
|
||||||
// true, then we can make several assumptions about the structure of our
|
// true, then we can make several assumptions about the structure of our
|
||||||
// HIR. This is what justifies the `unreachable!` statements below.
|
// HIR. This is what justifies the `unreachable!` statements below.
|
||||||
|
|
||||||
if !expr.is_alternation_literal() {
|
if !expr.properties().is_alternation_literal() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
let alts = match *expr.kind() {
|
let alts = match *expr.kind() {
|
||||||
@@ -93,26 +90,16 @@ pub fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
|
|||||||
_ => return None, // one literal isn't worth it
|
_ => return None, // one literal isn't worth it
|
||||||
};
|
};
|
||||||
|
|
||||||
let extendlit = |lit: &Literal, dst: &mut Vec<u8>| match *lit {
|
|
||||||
Literal::Unicode(c) => {
|
|
||||||
let mut buf = [0; 4];
|
|
||||||
dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
|
|
||||||
}
|
|
||||||
Literal::Byte(b) => {
|
|
||||||
dst.push(b);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut lits = vec![];
|
let mut lits = vec![];
|
||||||
for alt in alts {
|
for alt in alts {
|
||||||
let mut lit = vec![];
|
let mut lit = vec![];
|
||||||
match *alt.kind() {
|
match *alt.kind() {
|
||||||
HirKind::Empty => {}
|
HirKind::Empty => {}
|
||||||
HirKind::Literal(ref x) => extendlit(x, &mut lit),
|
HirKind::Literal(ref x) => lit.extend_from_slice(&x.0),
|
||||||
HirKind::Concat(ref exprs) => {
|
HirKind::Concat(ref exprs) => {
|
||||||
for e in exprs {
|
for e in exprs {
|
||||||
match *e.kind() {
|
match *e.kind() {
|
||||||
HirKind::Literal(ref x) => extendlit(x, &mut lit),
|
HirKind::Literal(ref x) => lit.extend_from_slice(&x.0),
|
||||||
_ => unreachable!("expected literal, got {:?}", e),
|
_ => unreachable!("expected literal, got {:?}", e),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,9 +1,13 @@
|
|||||||
use grep_matcher::ByteSet;
|
use {
|
||||||
use regex_syntax::hir::{self, Hir, HirKind};
|
grep_matcher::ByteSet,
|
||||||
use regex_syntax::utf8::Utf8Sequences;
|
regex_syntax::{
|
||||||
|
hir::{self, Hir, HirKind, Look},
|
||||||
|
utf8::Utf8Sequences,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
/// Return a confirmed set of non-matching bytes from the given expression.
|
/// Return a confirmed set of non-matching bytes from the given expression.
|
||||||
pub fn non_matching_bytes(expr: &Hir) -> ByteSet {
|
pub(crate) fn non_matching_bytes(expr: &Hir) -> ByteSet {
|
||||||
let mut set = ByteSet::full();
|
let mut set = ByteSet::full();
|
||||||
remove_matching_bytes(expr, &mut set);
|
remove_matching_bytes(expr, &mut set);
|
||||||
set
|
set
|
||||||
@@ -13,18 +17,27 @@ pub fn non_matching_bytes(expr: &Hir) -> ByteSet {
|
|||||||
/// the given expression.
|
/// the given expression.
|
||||||
fn remove_matching_bytes(expr: &Hir, set: &mut ByteSet) {
|
fn remove_matching_bytes(expr: &Hir, set: &mut ByteSet) {
|
||||||
match *expr.kind() {
|
match *expr.kind() {
|
||||||
HirKind::Empty | HirKind::WordBoundary(_) => {}
|
HirKind::Empty
|
||||||
HirKind::Anchor(_) => {
|
| HirKind::Look(Look::WordAscii | Look::WordAsciiNegate)
|
||||||
|
| HirKind::Look(Look::WordUnicode | Look::WordUnicodeNegate) => {}
|
||||||
|
HirKind::Look(Look::Start | Look::End) => {
|
||||||
|
// FIXME: This is wrong, but not doing this leads to incorrect
|
||||||
|
// results because of how anchored searches are implemented in
|
||||||
|
// the 'grep-searcher' crate.
|
||||||
set.remove(b'\n');
|
set.remove(b'\n');
|
||||||
}
|
}
|
||||||
HirKind::Literal(hir::Literal::Unicode(c)) => {
|
HirKind::Look(Look::StartLF | Look::EndLF) => {
|
||||||
for &b in c.encode_utf8(&mut [0; 4]).as_bytes() {
|
set.remove(b'\n');
|
||||||
|
}
|
||||||
|
HirKind::Look(Look::StartCRLF | Look::EndCRLF) => {
|
||||||
|
set.remove(b'\r');
|
||||||
|
set.remove(b'\n');
|
||||||
|
}
|
||||||
|
HirKind::Literal(hir::Literal(ref lit)) => {
|
||||||
|
for &b in lit.iter() {
|
||||||
set.remove(b);
|
set.remove(b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
HirKind::Literal(hir::Literal::Byte(b)) => {
|
|
||||||
set.remove(b);
|
|
||||||
}
|
|
||||||
HirKind::Class(hir::Class::Unicode(ref cls)) => {
|
HirKind::Class(hir::Class::Unicode(ref cls)) => {
|
||||||
for range in cls.iter() {
|
for range in cls.iter() {
|
||||||
// This is presumably faster than encoding every codepoint
|
// This is presumably faster than encoding every codepoint
|
||||||
@@ -42,10 +55,10 @@ fn remove_matching_bytes(expr: &Hir, set: &mut ByteSet) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
HirKind::Repetition(ref x) => {
|
HirKind::Repetition(ref x) => {
|
||||||
remove_matching_bytes(&x.hir, set);
|
remove_matching_bytes(&x.sub, set);
|
||||||
}
|
}
|
||||||
HirKind::Group(ref x) => {
|
HirKind::Capture(ref x) => {
|
||||||
remove_matching_bytes(&x.hir, set);
|
remove_matching_bytes(&x.sub, set);
|
||||||
}
|
}
|
||||||
HirKind::Concat(ref xs) => {
|
HirKind::Concat(ref xs) => {
|
||||||
for x in xs {
|
for x in xs {
|
||||||
@@ -62,17 +75,13 @@ fn remove_matching_bytes(expr: &Hir, set: &mut ByteSet) {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use grep_matcher::ByteSet;
|
use {grep_matcher::ByteSet, regex_syntax::ParserBuilder};
|
||||||
use regex_syntax::ParserBuilder;
|
|
||||||
|
|
||||||
use super::non_matching_bytes;
|
use super::non_matching_bytes;
|
||||||
|
|
||||||
fn extract(pattern: &str) -> ByteSet {
|
fn extract(pattern: &str) -> ByteSet {
|
||||||
let expr = ParserBuilder::new()
|
let expr =
|
||||||
.allow_invalid_utf8(true)
|
ParserBuilder::new().utf8(false).build().parse(pattern).unwrap();
|
||||||
.build()
|
|
||||||
.parse(pattern)
|
|
||||||
.unwrap();
|
|
||||||
non_matching_bytes(&expr)
|
non_matching_bytes(&expr)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -131,9 +140,13 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn anchor() {
|
fn anchor() {
|
||||||
|
// FIXME: The first four tests below should correspond to a full set
|
||||||
|
// of bytes for the non-matching bytes I think.
|
||||||
assert_eq!(sparse(&extract(r"^")), sparse_except(&[b'\n']));
|
assert_eq!(sparse(&extract(r"^")), sparse_except(&[b'\n']));
|
||||||
assert_eq!(sparse(&extract(r"$")), sparse_except(&[b'\n']));
|
assert_eq!(sparse(&extract(r"$")), sparse_except(&[b'\n']));
|
||||||
assert_eq!(sparse(&extract(r"\A")), sparse_except(&[b'\n']));
|
assert_eq!(sparse(&extract(r"\A")), sparse_except(&[b'\n']));
|
||||||
assert_eq!(sparse(&extract(r"\z")), sparse_except(&[b'\n']));
|
assert_eq!(sparse(&extract(r"\z")), sparse_except(&[b'\n']));
|
||||||
|
assert_eq!(sparse(&extract(r"(?m)^")), sparse_except(&[b'\n']));
|
||||||
|
assert_eq!(sparse(&extract(r"(?m)$")), sparse_except(&[b'\n']));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -1,5 +1,7 @@
|
|||||||
use grep_matcher::LineTerminator;
|
use {
|
||||||
use regex_syntax::hir::{self, Hir, HirKind};
|
grep_matcher::LineTerminator,
|
||||||
|
regex_syntax::hir::{self, Hir, HirKind},
|
||||||
|
};
|
||||||
|
|
||||||
use crate::error::{Error, ErrorKind};
|
use crate::error::{Error, ErrorKind};
|
||||||
|
|
||||||
@@ -15,7 +17,26 @@ use crate::error::{Error, ErrorKind};
|
|||||||
///
|
///
|
||||||
/// If the given line terminator is not ASCII, then this function returns an
|
/// If the given line terminator is not ASCII, then this function returns an
|
||||||
/// error.
|
/// error.
|
||||||
pub fn strip_from_match(
|
///
|
||||||
|
/// Note that as of regex 1.9, this routine could theoretically be implemented
|
||||||
|
/// without returning an error. Namely, for example, we could turn
|
||||||
|
/// `foo\nbar` into `foo[a&&b]bar`. That is, replace line terminators with a
|
||||||
|
/// sub-expression that can never match anything. Thus, ripgrep would accept
|
||||||
|
/// such regexes and just silently not match anything. Regex versions prior to 1.8
|
||||||
|
/// don't support such constructs. I ended up deciding to leave the existing
|
||||||
|
/// behavior of returning an error instead. For example:
|
||||||
|
///
|
||||||
|
/// ```text
|
||||||
|
/// $ echo -n 'foo\nbar\n' | rg 'foo\nbar'
|
||||||
|
/// the literal '"\n"' is not allowed in a regex
|
||||||
|
///
|
||||||
|
/// Consider enabling multiline mode with the --multiline flag (or -U for short).
|
||||||
|
/// When multiline mode is enabled, new line characters can be matched.
|
||||||
|
/// ```
|
||||||
|
///
|
||||||
|
/// This looks like a good error message to me, and even suggests a flag that
|
||||||
|
/// the user can use instead.
|
||||||
|
pub(crate) fn strip_from_match(
|
||||||
expr: Hir,
|
expr: Hir,
|
||||||
line_term: LineTerminator,
|
line_term: LineTerminator,
|
||||||
) -> Result<Hir, Error> {
|
) -> Result<Hir, Error> {
|
||||||
@@ -23,40 +44,34 @@ pub fn strip_from_match(
|
|||||||
let expr1 = strip_from_match_ascii(expr, b'\r')?;
|
let expr1 = strip_from_match_ascii(expr, b'\r')?;
|
||||||
strip_from_match_ascii(expr1, b'\n')
|
strip_from_match_ascii(expr1, b'\n')
|
||||||
} else {
|
} else {
|
||||||
let b = line_term.as_byte();
|
strip_from_match_ascii(expr, line_term.as_byte())
|
||||||
if b > 0x7F {
|
|
||||||
return Err(Error::new(ErrorKind::InvalidLineTerminator(b)));
|
|
||||||
}
|
|
||||||
strip_from_match_ascii(expr, b)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The implementation of strip_from_match. The given byte must be ASCII. This
|
/// The implementation of strip_from_match. The given byte must be ASCII.
|
||||||
/// function panics otherwise.
|
/// This function returns an error otherwise. It also returns an error if
|
||||||
|
/// it couldn't remove `\n` from the given regex without leaving an empty
|
||||||
|
/// character class in its place.
|
||||||
fn strip_from_match_ascii(expr: Hir, byte: u8) -> Result<Hir, Error> {
|
fn strip_from_match_ascii(expr: Hir, byte: u8) -> Result<Hir, Error> {
|
||||||
assert!(byte <= 0x7F);
|
if !byte.is_ascii() {
|
||||||
let chr = byte as char;
|
return Err(Error::new(ErrorKind::InvalidLineTerminator(byte)));
|
||||||
assert_eq!(chr.len_utf8(), 1);
|
}
|
||||||
|
let ch = char::from(byte);
|
||||||
let invalid = || Err(Error::new(ErrorKind::NotAllowed(chr.to_string())));
|
let invalid = || Err(Error::new(ErrorKind::NotAllowed(ch.to_string())));
|
||||||
|
|
||||||
Ok(match expr.into_kind() {
|
Ok(match expr.into_kind() {
|
||||||
HirKind::Empty => Hir::empty(),
|
HirKind::Empty => Hir::empty(),
|
||||||
HirKind::Literal(hir::Literal::Unicode(c)) => {
|
HirKind::Literal(hir::Literal(lit)) => {
|
||||||
if c == chr {
|
if lit.iter().find(|&&b| b == byte).is_some() {
|
||||||
return invalid();
|
return invalid();
|
||||||
}
|
}
|
||||||
Hir::literal(hir::Literal::Unicode(c))
|
Hir::literal(lit)
|
||||||
}
|
|
||||||
HirKind::Literal(hir::Literal::Byte(b)) => {
|
|
||||||
if b as char == chr {
|
|
||||||
return invalid();
|
|
||||||
}
|
|
||||||
Hir::literal(hir::Literal::Byte(b))
|
|
||||||
}
|
}
|
||||||
HirKind::Class(hir::Class::Unicode(mut cls)) => {
|
HirKind::Class(hir::Class::Unicode(mut cls)) => {
|
||||||
|
if cls.ranges().is_empty() {
|
||||||
|
return Ok(Hir::class(hir::Class::Unicode(cls)));
|
||||||
|
}
|
||||||
let remove = hir::ClassUnicode::new(Some(
|
let remove = hir::ClassUnicode::new(Some(
|
||||||
hir::ClassUnicodeRange::new(chr, chr),
|
hir::ClassUnicodeRange::new(ch, ch),
|
||||||
));
|
));
|
||||||
cls.difference(&remove);
|
cls.difference(&remove);
|
||||||
if cls.ranges().is_empty() {
|
if cls.ranges().is_empty() {
|
||||||
@@ -65,6 +80,9 @@ fn strip_from_match_ascii(expr: Hir, byte: u8) -> Result<Hir, Error> {
|
|||||||
Hir::class(hir::Class::Unicode(cls))
|
Hir::class(hir::Class::Unicode(cls))
|
||||||
}
|
}
|
||||||
HirKind::Class(hir::Class::Bytes(mut cls)) => {
|
HirKind::Class(hir::Class::Bytes(mut cls)) => {
|
||||||
|
if cls.ranges().is_empty() {
|
||||||
|
return Ok(Hir::class(hir::Class::Bytes(cls)));
|
||||||
|
}
|
||||||
let remove = hir::ClassBytes::new(Some(
|
let remove = hir::ClassBytes::new(Some(
|
||||||
hir::ClassBytesRange::new(byte, byte),
|
hir::ClassBytesRange::new(byte, byte),
|
||||||
));
|
));
|
||||||
@@ -74,15 +92,14 @@ fn strip_from_match_ascii(expr: Hir, byte: u8) -> Result<Hir, Error> {
|
|||||||
}
|
}
|
||||||
Hir::class(hir::Class::Bytes(cls))
|
Hir::class(hir::Class::Bytes(cls))
|
||||||
}
|
}
|
||||||
HirKind::Anchor(x) => Hir::anchor(x),
|
HirKind::Look(x) => Hir::look(x),
|
||||||
HirKind::WordBoundary(x) => Hir::word_boundary(x),
|
|
||||||
HirKind::Repetition(mut x) => {
|
HirKind::Repetition(mut x) => {
|
||||||
x.hir = Box::new(strip_from_match_ascii(*x.hir, byte)?);
|
x.sub = Box::new(strip_from_match_ascii(*x.sub, byte)?);
|
||||||
Hir::repetition(x)
|
Hir::repetition(x)
|
||||||
}
|
}
|
||||||
HirKind::Group(mut x) => {
|
HirKind::Capture(mut x) => {
|
||||||
x.hir = Box::new(strip_from_match_ascii(*x.hir, byte)?);
|
x.sub = Box::new(strip_from_match_ascii(*x.sub, byte)?);
|
||||||
Hir::group(x)
|
Hir::capture(x)
|
||||||
}
|
}
|
||||||
HirKind::Concat(xs) => {
|
HirKind::Concat(xs) => {
|
||||||
let xs = xs
|
let xs = xs
|
||||||
@@ -131,11 +148,11 @@ mod tests {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn various() {
|
fn various() {
|
||||||
assert_eq!(roundtrip(r"[a\n]", b'\n'), "[a]");
|
assert_eq!(roundtrip(r"[a\n]", b'\n'), "a");
|
||||||
assert_eq!(roundtrip(r"[a\n]", b'a'), "[\n]");
|
assert_eq!(roundtrip(r"[a\n]", b'a'), "\n");
|
||||||
assert_eq!(roundtrip_crlf(r"[a\n]"), "[a]");
|
assert_eq!(roundtrip_crlf(r"[a\n]"), "a");
|
||||||
assert_eq!(roundtrip_crlf(r"[a\r]"), "[a]");
|
assert_eq!(roundtrip_crlf(r"[a\r]"), "a");
|
||||||
assert_eq!(roundtrip_crlf(r"[a\r\n]"), "[a]");
|
assert_eq!(roundtrip_crlf(r"[a\r\n]"), "a");
|
||||||
|
|
||||||
assert_eq!(roundtrip(r"(?-u)\s", b'a'), r"(?-u:[\x09-\x0D\x20])");
|
assert_eq!(roundtrip(r"(?-u)\s", b'a'), r"(?-u:[\x09-\x0D\x20])");
|
||||||
assert_eq!(roundtrip(r"(?-u)\s", b'\n'), r"(?-u:[\x09\x0B-\x0D\x20])");
|
assert_eq!(roundtrip(r"(?-u)\s", b'\n'), r"(?-u:[\x09\x0B-\x0D\x20])");
|
||||||
|
@@ -1,29 +0,0 @@
|
|||||||
/// Converts an arbitrary sequence of bytes to a literal suitable for building
|
|
||||||
/// a regular expression.
|
|
||||||
pub fn bytes_to_regex(bs: &[u8]) -> String {
|
|
||||||
use regex_syntax::is_meta_character;
|
|
||||||
use std::fmt::Write;
|
|
||||||
|
|
||||||
let mut s = String::with_capacity(bs.len());
|
|
||||||
for &b in bs {
|
|
||||||
if b <= 0x7F && !is_meta_character(b as char) {
|
|
||||||
write!(s, r"{}", b as char).unwrap();
|
|
||||||
} else {
|
|
||||||
write!(s, r"\x{:02x}", b).unwrap();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
s
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Converts arbitrary bytes to a nice string.
|
|
||||||
pub fn show_bytes(bs: &[u8]) -> String {
|
|
||||||
use std::ascii::escape_default;
|
|
||||||
use std::str;
|
|
||||||
|
|
||||||
let mut nice = String::new();
|
|
||||||
for &b in bs {
|
|
||||||
let part: Vec<u8> = escape_default(b).collect();
|
|
||||||
nice.push_str(str::from_utf8(&part).unwrap());
|
|
||||||
}
|
|
||||||
nice
|
|
||||||
}
|
|
@@ -1,39 +1,59 @@
|
|||||||
use std::cell::RefCell;
|
use std::{
|
||||||
use std::collections::HashMap;
|
collections::HashMap,
|
||||||
use std::sync::Arc;
|
panic::{RefUnwindSafe, UnwindSafe},
|
||||||
|
sync::Arc,
|
||||||
|
};
|
||||||
|
|
||||||
use grep_matcher::{Match, Matcher, NoError};
|
use {
|
||||||
use regex::bytes::{CaptureLocations, Regex};
|
grep_matcher::{Match, Matcher, NoError},
|
||||||
use thread_local::ThreadLocal;
|
regex_automata::{
|
||||||
|
meta::Regex, util::captures::Captures, util::pool::Pool, Input,
|
||||||
|
PatternID,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
use crate::config::ConfiguredHIR;
|
use crate::{config::ConfiguredHIR, error::Error, matcher::RegexCaptures};
|
||||||
use crate::error::Error;
|
|
||||||
use crate::matcher::RegexCaptures;
|
type PoolFn =
|
||||||
|
Box<dyn Fn() -> Captures + Send + Sync + UnwindSafe + RefUnwindSafe>;
|
||||||
|
|
||||||
/// A matcher for implementing "word match" semantics.
|
/// A matcher for implementing "word match" semantics.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct WordMatcher {
|
pub(crate) struct WordMatcher {
|
||||||
/// The regex which is roughly `(?:^|\W)(<original pattern>)(?:$|\W)`.
|
/// The regex which is roughly `(?:^|\W)(<original pattern>)(?:$|\W)`.
|
||||||
regex: Regex,
|
regex: Regex,
|
||||||
|
/// The HIR that produced the regex above. We don't keep the HIR for the
|
||||||
|
/// `original` regex.
|
||||||
|
///
|
||||||
|
/// We put this in an `Arc` because by the time it gets here, it won't
|
||||||
|
/// change. And because cloning and dropping an `Hir` is somewhat expensive
|
||||||
|
/// due to its deep recursive representation.
|
||||||
|
chir: Arc<ConfiguredHIR>,
|
||||||
/// The original regex supplied by the user, which we use in a fast path
|
/// The original regex supplied by the user, which we use in a fast path
|
||||||
/// to try and detect matches before deferring to slower engines.
|
/// to try and detect matches before deferring to slower engines.
|
||||||
original: Regex,
|
original: Regex,
|
||||||
/// A map from capture group name to capture group index.
|
/// A map from capture group name to capture group index.
|
||||||
names: HashMap<String, usize>,
|
names: HashMap<String, usize>,
|
||||||
/// A reusable buffer for finding the match location of the inner group.
|
/// A thread-safe pool of reusable buffers for finding the match offset of
|
||||||
locs: Arc<ThreadLocal<RefCell<CaptureLocations>>>,
|
/// the inner group.
|
||||||
|
caps: Arc<Pool<Captures, PoolFn>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Clone for WordMatcher {
|
impl Clone for WordMatcher {
|
||||||
fn clone(&self) -> WordMatcher {
|
fn clone(&self) -> WordMatcher {
|
||||||
// We implement Clone manually so that we get a fresh ThreadLocal such
|
// We implement Clone manually so that we get a fresh Pool such that it
|
||||||
// that it can set its own thread owner. This permits each thread
|
// can set its own thread owner. This permits each thread usings `caps`
|
||||||
// usings `locs` to hit the fast path.
|
// to hit the fast path.
|
||||||
|
//
|
||||||
|
// Note that cloning a regex is "cheap" since it uses reference
|
||||||
|
// counting internally.
|
||||||
|
let re = self.regex.clone();
|
||||||
WordMatcher {
|
WordMatcher {
|
||||||
regex: self.regex.clone(),
|
regex: self.regex.clone(),
|
||||||
|
chir: Arc::clone(&self.chir),
|
||||||
original: self.original.clone(),
|
original: self.original.clone(),
|
||||||
names: self.names.clone(),
|
names: self.names.clone(),
|
||||||
locs: Arc::new(ThreadLocal::new()),
|
caps: Arc::new(Pool::new(Box::new(move || re.create_captures()))),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -44,31 +64,38 @@ impl WordMatcher {
|
|||||||
///
|
///
|
||||||
/// The given options are used to construct the regular expression
|
/// The given options are used to construct the regular expression
|
||||||
/// internally.
|
/// internally.
|
||||||
pub fn new(expr: &ConfiguredHIR) -> Result<WordMatcher, Error> {
|
pub(crate) fn new(chir: ConfiguredHIR) -> Result<WordMatcher, Error> {
|
||||||
let original =
|
let original = chir.clone().into_anchored().to_regex()?;
|
||||||
expr.with_pattern(|pat| format!("^(?:{})$", pat))?.regex()?;
|
let chir = Arc::new(chir.into_word()?);
|
||||||
let word_expr = expr.with_pattern(|pat| {
|
let regex = chir.to_regex()?;
|
||||||
let pat = format!(r"(?:(?m:^)|\W)({})(?:\W|(?m:$))", pat);
|
let caps = Arc::new(Pool::new({
|
||||||
log::debug!("word regex: {:?}", pat);
|
let regex = regex.clone();
|
||||||
pat
|
Box::new(move || regex.create_captures()) as PoolFn
|
||||||
})?;
|
}));
|
||||||
let regex = word_expr.regex()?;
|
|
||||||
let locs = Arc::new(ThreadLocal::new());
|
|
||||||
|
|
||||||
let mut names = HashMap::new();
|
let mut names = HashMap::new();
|
||||||
for (i, optional_name) in regex.capture_names().enumerate() {
|
let it = regex.group_info().pattern_names(PatternID::ZERO);
|
||||||
|
for (i, optional_name) in it.enumerate() {
|
||||||
if let Some(name) = optional_name {
|
if let Some(name) = optional_name {
|
||||||
names.insert(name.to_string(), i.checked_sub(1).unwrap());
|
names.insert(name.to_string(), i.checked_sub(1).unwrap());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(WordMatcher { regex, original, names, locs })
|
Ok(WordMatcher { regex, chir, original, names, caps })
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the underlying regex used by this matcher.
|
/// Return the underlying regex used to match at word boundaries.
|
||||||
pub fn regex(&self) -> &Regex {
|
///
|
||||||
|
/// The original regex is in the capture group at index 1.
|
||||||
|
pub(crate) fn regex(&self) -> &Regex {
|
||||||
&self.regex
|
&self.regex
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the underlying HIR for the regex used to match at word
|
||||||
|
/// boundaries.
|
||||||
|
pub(crate) fn chir(&self) -> &ConfiguredHIR {
|
||||||
|
&self.chir
|
||||||
|
}
|
||||||
|
|
||||||
/// Attempt to do a fast confirmation of a word match that covers a subset
|
/// Attempt to do a fast confirmation of a word match that covers a subset
|
||||||
/// (but hopefully a big subset) of most cases. Ok(Some(..)) is returned
|
/// (but hopefully a big subset) of most cases. Ok(Some(..)) is returned
|
||||||
/// when a match is found. Ok(None) is returned when there is definitively
|
/// when a match is found. Ok(None) is returned when there is definitively
|
||||||
@@ -79,12 +106,11 @@ impl WordMatcher {
|
|||||||
haystack: &[u8],
|
haystack: &[u8],
|
||||||
at: usize,
|
at: usize,
|
||||||
) -> Result<Option<Match>, ()> {
|
) -> Result<Option<Match>, ()> {
|
||||||
// This is a bit hairy. The whole point here is to avoid running an
|
// This is a bit hairy. The whole point here is to avoid running a
|
||||||
// NFA simulation in the regex engine. Remember, our word regex looks
|
// slower regex engine to extract capture groups. Remember, our word
|
||||||
// like this:
|
// regex looks like this:
|
||||||
//
|
//
|
||||||
// (^|\W)(<original regex>)($|\W)
|
// (^|\W)(<original regex>)(\W|$)
|
||||||
// where ^ and $ have multiline mode DISABLED
|
|
||||||
//
|
//
|
||||||
// What we want are the match offsets of <original regex>. So in the
|
// What we want are the match offsets of <original regex>. So in the
|
||||||
// easy/common case, the original regex will be sandwiched between
|
// easy/common case, the original regex will be sandwiched between
|
||||||
@@ -102,7 +128,8 @@ impl WordMatcher {
|
|||||||
// The reason why we cannot handle the ^/$ cases here is because we
|
// The reason why we cannot handle the ^/$ cases here is because we
|
||||||
// can't assume anything about the original pattern. (Try commenting
|
// can't assume anything about the original pattern. (Try commenting
|
||||||
// out the checks for ^/$ below and run the tests to see examples.)
|
// out the checks for ^/$ below and run the tests to see examples.)
|
||||||
let mut cand = match self.regex.find_at(haystack, at) {
|
let input = Input::new(haystack).span(at..haystack.len());
|
||||||
|
let mut cand = match self.regex.find(input) {
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
Some(m) => Match::new(m.start(), m.end()),
|
Some(m) => Match::new(m.start(), m.end()),
|
||||||
};
|
};
|
||||||
@@ -145,23 +172,23 @@ impl Matcher for WordMatcher {
|
|||||||
//
|
//
|
||||||
// OK, well, it turns out that it is worth it! But it is quite tricky.
|
// OK, well, it turns out that it is worth it! But it is quite tricky.
|
||||||
// See `fast_find` for details. Effectively, this lets us skip running
|
// See `fast_find` for details. Effectively, this lets us skip running
|
||||||
// the NFA simulation in the regex engine in the vast majority of
|
// a slower regex engine to extract capture groups in the vast majority
|
||||||
// cases. However, the NFA simulation is required for full correctness.
|
// of cases. However, the slower engine is I believe required for full
|
||||||
|
// correctness.
|
||||||
match self.fast_find(haystack, at) {
|
match self.fast_find(haystack, at) {
|
||||||
Ok(Some(m)) => return Ok(Some(m)),
|
Ok(Some(m)) => return Ok(Some(m)),
|
||||||
Ok(None) => return Ok(None),
|
Ok(None) => return Ok(None),
|
||||||
Err(()) => {}
|
Err(()) => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
let cell =
|
let input = Input::new(haystack).span(at..haystack.len());
|
||||||
self.locs.get_or(|| RefCell::new(self.regex.capture_locations()));
|
let mut caps = self.caps.get();
|
||||||
let mut caps = cell.borrow_mut();
|
self.regex.search_captures(&input, &mut caps);
|
||||||
self.regex.captures_read_at(&mut caps, haystack, at);
|
Ok(caps.get_group(1).map(|sp| Match::new(sp.start, sp.end)))
|
||||||
Ok(caps.get(1).map(|m| Match::new(m.0, m.1)))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
|
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
|
||||||
Ok(RegexCaptures::with_offset(self.regex.capture_locations(), 1))
|
Ok(RegexCaptures::with_offset(self.regex.create_captures(), 1))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn capture_count(&self) -> usize {
|
fn capture_count(&self) -> usize {
|
||||||
@@ -178,9 +205,10 @@ impl Matcher for WordMatcher {
|
|||||||
at: usize,
|
at: usize,
|
||||||
caps: &mut RegexCaptures,
|
caps: &mut RegexCaptures,
|
||||||
) -> Result<bool, NoError> {
|
) -> Result<bool, NoError> {
|
||||||
let r =
|
let input = Input::new(haystack).span(at..haystack.len());
|
||||||
self.regex.captures_read_at(caps.locations_mut(), haystack, at);
|
let caps = caps.captures_mut();
|
||||||
Ok(r.is_some())
|
self.regex.search_captures(&input, caps);
|
||||||
|
Ok(caps.is_match())
|
||||||
}
|
}
|
||||||
|
|
||||||
// We specifically do not implement other methods like find_iter or
|
// We specifically do not implement other methods like find_iter or
|
||||||
@@ -195,8 +223,8 @@ mod tests {
|
|||||||
use grep_matcher::{Captures, Match, Matcher};
|
use grep_matcher::{Captures, Match, Matcher};
|
||||||
|
|
||||||
fn matcher(pattern: &str) -> WordMatcher {
|
fn matcher(pattern: &str) -> WordMatcher {
|
||||||
let chir = Config::default().hir(pattern).unwrap();
|
let chir = Config::default().build_many(&[pattern]).unwrap();
|
||||||
WordMatcher::new(&chir).unwrap()
|
WordMatcher::new(chir).unwrap()
|
||||||
}
|
}
|
||||||
|
|
||||||
fn find(pattern: &str, haystack: &str) -> Option<(usize, usize)> {
|
fn find(pattern: &str, haystack: &str) -> Option<(usize, usize)> {
|
||||||
|
@@ -14,7 +14,7 @@ license = "Unlicense OR MIT"
|
|||||||
edition = "2018"
|
edition = "2018"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
bstr = { version = "1.1.0", default-features = false, features = ["std"] }
|
bstr = { version = "1.6.0", default-features = false, features = ["std"] }
|
||||||
bytecount = "0.6"
|
bytecount = "0.6"
|
||||||
encoding_rs = "0.8.14"
|
encoding_rs = "0.8.14"
|
||||||
encoding_rs_io = "0.1.6"
|
encoding_rs_io = "0.1.6"
|
||||||
|
@@ -10,6 +10,12 @@ use crate::sink::{
|
|||||||
};
|
};
|
||||||
use grep_matcher::{LineMatchKind, Matcher};
|
use grep_matcher::{LineMatchKind, Matcher};
|
||||||
|
|
||||||
|
enum FastMatchResult {
|
||||||
|
Continue,
|
||||||
|
Stop,
|
||||||
|
SwitchToSlow,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct Core<'s, M: 's, S> {
|
pub struct Core<'s, M: 's, S> {
|
||||||
config: &'s Config,
|
config: &'s Config,
|
||||||
@@ -25,6 +31,7 @@ pub struct Core<'s, M: 's, S> {
|
|||||||
last_line_visited: usize,
|
last_line_visited: usize,
|
||||||
after_context_left: usize,
|
after_context_left: usize,
|
||||||
has_sunk: bool,
|
has_sunk: bool,
|
||||||
|
has_matched: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||||
@@ -50,6 +57,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
last_line_visited: 0,
|
last_line_visited: 0,
|
||||||
after_context_left: 0,
|
after_context_left: 0,
|
||||||
has_sunk: false,
|
has_sunk: false,
|
||||||
|
has_matched: false,
|
||||||
};
|
};
|
||||||
if !core.searcher.multi_line_with_matcher(&core.matcher) {
|
if !core.searcher.multi_line_with_matcher(&core.matcher) {
|
||||||
if core.is_line_by_line_fast() {
|
if core.is_line_by_line_fast() {
|
||||||
@@ -109,7 +117,11 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
|
|
||||||
pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
||||||
if self.is_line_by_line_fast() {
|
if self.is_line_by_line_fast() {
|
||||||
self.match_by_line_fast(buf)
|
match self.match_by_line_fast(buf)? {
|
||||||
|
FastMatchResult::SwitchToSlow => self.match_by_line_slow(buf),
|
||||||
|
FastMatchResult::Continue => Ok(true),
|
||||||
|
FastMatchResult::Stop => Ok(false),
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
self.match_by_line_slow(buf)
|
self.match_by_line_slow(buf)
|
||||||
}
|
}
|
||||||
@@ -270,7 +282,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
self.set_pos(line.end());
|
self.set_pos(line.end());
|
||||||
if matched != self.config.invert_match {
|
let success = matched != self.config.invert_match;
|
||||||
|
if success {
|
||||||
|
self.has_matched = true;
|
||||||
if !self.before_context_by_line(buf, line.start())? {
|
if !self.before_context_by_line(buf, line.start())? {
|
||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
@@ -286,40 +300,51 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if self.config.stop_on_nonmatch && !success && self.has_matched {
|
||||||
|
return Ok(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
Ok(true)
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn match_by_line_fast(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
fn match_by_line_fast(
|
||||||
debug_assert!(!self.config.passthru);
|
&mut self,
|
||||||
|
buf: &[u8],
|
||||||
|
) -> Result<FastMatchResult, S::Error> {
|
||||||
|
use FastMatchResult::*;
|
||||||
|
|
||||||
|
debug_assert!(!self.config.passthru);
|
||||||
while !buf[self.pos()..].is_empty() {
|
while !buf[self.pos()..].is_empty() {
|
||||||
|
if self.config.stop_on_nonmatch && self.has_matched {
|
||||||
|
return Ok(SwitchToSlow);
|
||||||
|
}
|
||||||
if self.config.invert_match {
|
if self.config.invert_match {
|
||||||
if !self.match_by_line_fast_invert(buf)? {
|
if !self.match_by_line_fast_invert(buf)? {
|
||||||
return Ok(false);
|
return Ok(Stop);
|
||||||
}
|
}
|
||||||
} else if let Some(line) = self.find_by_line_fast(buf)? {
|
} else if let Some(line) = self.find_by_line_fast(buf)? {
|
||||||
|
self.has_matched = true;
|
||||||
if self.config.max_context() > 0 {
|
if self.config.max_context() > 0 {
|
||||||
if !self.after_context_by_line(buf, line.start())? {
|
if !self.after_context_by_line(buf, line.start())? {
|
||||||
return Ok(false);
|
return Ok(Stop);
|
||||||
}
|
}
|
||||||
if !self.before_context_by_line(buf, line.start())? {
|
if !self.before_context_by_line(buf, line.start())? {
|
||||||
return Ok(false);
|
return Ok(Stop);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
self.set_pos(line.end());
|
self.set_pos(line.end());
|
||||||
if !self.sink_matched(buf, &line)? {
|
if !self.sink_matched(buf, &line)? {
|
||||||
return Ok(false);
|
return Ok(Stop);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if !self.after_context_by_line(buf, buf.len())? {
|
if !self.after_context_by_line(buf, buf.len())? {
|
||||||
return Ok(false);
|
return Ok(Stop);
|
||||||
}
|
}
|
||||||
self.set_pos(buf.len());
|
self.set_pos(buf.len());
|
||||||
Ok(true)
|
Ok(Continue)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline(always)]
|
#[inline(always)]
|
||||||
@@ -344,6 +369,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
if invert_match.is_empty() {
|
if invert_match.is_empty() {
|
||||||
return Ok(true);
|
return Ok(true);
|
||||||
}
|
}
|
||||||
|
self.has_matched = true;
|
||||||
if !self.after_context_by_line(buf, invert_match.start())? {
|
if !self.after_context_by_line(buf, invert_match.start())? {
|
||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
@@ -577,6 +603,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
|||||||
if self.config.passthru {
|
if self.config.passthru {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
if self.config.stop_on_nonmatch && self.has_matched {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
if let Some(line_term) = self.matcher.line_terminator() {
|
if let Some(line_term) = self.matcher.line_terminator() {
|
||||||
if line_term == self.config.line_term {
|
if line_term == self.config.line_term {
|
||||||
return true;
|
return true;
|
||||||
|
@@ -71,16 +71,6 @@ impl MmapChoice {
|
|||||||
if !self.is_enabled() {
|
if !self.is_enabled() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
if !cfg!(target_pointer_width = "64") {
|
|
||||||
// For 32-bit systems, it looks like mmap will succeed even if it
|
|
||||||
// can't address the entire file. This seems to happen at least on
|
|
||||||
// Windows, even though it uses to work prior to ripgrep 13. The
|
|
||||||
// only Windows-related change in ripgrep 13, AFAIK, was statically
|
|
||||||
// linking vcruntime. So maybe that's related? But I'm not sure.
|
|
||||||
//
|
|
||||||
// See: https://github.com/BurntSushi/ripgrep/issues/1911
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
if cfg!(target_os = "macos") {
|
if cfg!(target_os = "macos") {
|
||||||
// I guess memory maps on macOS aren't great. Should re-evaluate.
|
// I guess memory maps on macOS aren't great. Should re-evaluate.
|
||||||
return None;
|
return None;
|
||||||
|
@@ -173,6 +173,9 @@ pub struct Config {
|
|||||||
encoding: Option<Encoding>,
|
encoding: Option<Encoding>,
|
||||||
/// Whether to do automatic transcoding based on a BOM or not.
|
/// Whether to do automatic transcoding based on a BOM or not.
|
||||||
bom_sniffing: bool,
|
bom_sniffing: bool,
|
||||||
|
/// Whether to stop searching when a non-matching line is found after a
|
||||||
|
/// matching line.
|
||||||
|
stop_on_nonmatch: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Config {
|
impl Default for Config {
|
||||||
@@ -190,6 +193,7 @@ impl Default for Config {
|
|||||||
multi_line: false,
|
multi_line: false,
|
||||||
encoding: None,
|
encoding: None,
|
||||||
bom_sniffing: true,
|
bom_sniffing: true,
|
||||||
|
stop_on_nonmatch: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -555,6 +559,19 @@ impl SearcherBuilder {
|
|||||||
self.config.bom_sniffing = yes;
|
self.config.bom_sniffing = yes;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Stop searching a file when a non-matching line is found after a
|
||||||
|
/// matching line.
|
||||||
|
///
|
||||||
|
/// This is useful for searching sorted files where it is expected that all
|
||||||
|
/// the matches will be on adjacent lines.
|
||||||
|
pub fn stop_on_nonmatch(
|
||||||
|
&mut self,
|
||||||
|
stop_on_nonmatch: bool,
|
||||||
|
) -> &mut SearcherBuilder {
|
||||||
|
self.config.stop_on_nonmatch = stop_on_nonmatch;
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A searcher executes searches over a haystack and writes results to a caller
|
/// A searcher executes searches over a haystack and writes results to a caller
|
||||||
@@ -838,6 +855,13 @@ impl Searcher {
|
|||||||
self.config.multi_line
|
self.config.multi_line
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if this searcher is configured to stop when in
|
||||||
|
/// finds a non-matching line after a matching one.
|
||||||
|
#[inline]
|
||||||
|
pub fn stop_on_nonmatch(&self) -> bool {
|
||||||
|
self.config.stop_on_nonmatch
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true if and only if this searcher will choose a multi-line
|
/// Returns true if and only if this searcher will choose a multi-line
|
||||||
/// strategy given the provided matcher.
|
/// strategy given the provided matcher.
|
||||||
///
|
///
|
||||||
|
@@ -232,6 +232,16 @@ would behave identically to the following command
|
|||||||
|
|
||||||
rg --glob '!.git' foo
|
rg --glob '!.git' foo
|
||||||
|
|
||||||
|
The bottom line is that every shell argument needs to be on its own line. So
|
||||||
|
for example, a config file containing
|
||||||
|
|
||||||
|
-j 4
|
||||||
|
|
||||||
|
is probably not doing what you intend. Instead, you want
|
||||||
|
|
||||||
|
-j
|
||||||
|
4
|
||||||
|
|
||||||
ripgrep also provides a flag, *--no-config*, that when present will suppress
|
ripgrep also provides a flag, *--no-config*, that when present will suppress
|
||||||
any and all support for configuration. This includes any future support
|
any and all support for configuration. This includes any future support
|
||||||
for auto-loading configuration files from pre-determined paths.
|
for auto-loading configuration files from pre-determined paths.
|
||||||
|
28
pkg/windows/Manifest.xml
Normal file
28
pkg/windows/Manifest.xml
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||||
|
<!--
|
||||||
|
This is a Windows application manifest file.
|
||||||
|
See: https://docs.microsoft.com/en-us/windows/win32/sbscs/application-manifests
|
||||||
|
-->
|
||||||
|
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0" xmlns:asmv3="urn:schemas-microsoft-com:asm.v3">
|
||||||
|
<!-- Versions rustc supports as compiler hosts -->
|
||||||
|
<compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1">
|
||||||
|
<application>
|
||||||
|
<!-- Windows 7 --><supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/>
|
||||||
|
<!-- Windows 8 --><supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
|
||||||
|
<!-- Windows 8.1 --><supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
|
||||||
|
<!-- Windows 10 and 11 --><supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
|
||||||
|
</application>
|
||||||
|
</compatibility>
|
||||||
|
<!-- Use UTF-8 code page -->
|
||||||
|
<asmv3:application>
|
||||||
|
<asmv3:windowsSettings xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">
|
||||||
|
<activeCodePage>UTF-8</activeCodePage>
|
||||||
|
</asmv3:windowsSettings>
|
||||||
|
</asmv3:application>
|
||||||
|
<!-- Remove (most) legacy path limits -->
|
||||||
|
<asmv3:application>
|
||||||
|
<asmv3:windowsSettings xmlns:ws2="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
|
||||||
|
<ws2:longPathAware>true</ws2:longPathAware>
|
||||||
|
</asmv3:windowsSettings>
|
||||||
|
</asmv3:application>
|
||||||
|
</assembly>
|
15
pkg/windows/README.md
Normal file
15
pkg/windows/README.md
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
This directory contains a Windows manifest for various Windows-specific
|
||||||
|
settings.
|
||||||
|
|
||||||
|
The main thing we enable here is [`longPathAware`], which permits paths of the
|
||||||
|
form `C:\` to be longer than 260 characters.
|
||||||
|
|
||||||
|
The approach taken here was modeled off of a [similar change for `rustc`][rustc pr].
|
||||||
|
In particular, this manifest gets linked into the final binary. Those linker
|
||||||
|
arguments are applied in `build.rs`.
|
||||||
|
|
||||||
|
This currently only applies to MSVC builds. If there's an easy way to make this
|
||||||
|
apply to GNU builds as well, then patches are welcome.
|
||||||
|
|
||||||
|
[`longPathAware`]: https://learn.microsoft.com/en-us/windows/win32/sbscs/application-manifests#longpathaware
|
||||||
|
[rustc pr]: https://github.com/rust-lang/rust/pull/96737
|
@@ -787,6 +787,28 @@ rgtest!(f1466_no_ignore_files, |dir: Dir, mut cmd: TestCommand| {
|
|||||||
eqnice!("foo\n", cmd.arg("-u").stdout());
|
eqnice!("foo\n", cmd.arg("-u").stdout());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// See: https://github.com/BurntSushi/ripgrep/pull/2361
|
||||||
|
rgtest!(f2361_sort_nested_files, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
use std::{thread::sleep, time::Duration};
|
||||||
|
|
||||||
|
dir.create("foo", "1");
|
||||||
|
sleep(Duration::from_millis(100));
|
||||||
|
dir.create_dir("dir");
|
||||||
|
sleep(Duration::from_millis(100));
|
||||||
|
dir.create(dir.path().join("dir").join("bar"), "1");
|
||||||
|
|
||||||
|
cmd.arg("--sort").arg("accessed").arg("--files");
|
||||||
|
eqnice!("foo\ndir/bar\n", cmd.stdout());
|
||||||
|
|
||||||
|
dir.create("foo", "2");
|
||||||
|
sleep(Duration::from_millis(100));
|
||||||
|
dir.create(dir.path().join("dir").join("bar"), "2");
|
||||||
|
sleep(Duration::from_millis(100));
|
||||||
|
|
||||||
|
cmd.arg("--sort").arg("accessed").arg("--files");
|
||||||
|
eqnice!("foo\ndir/bar\n", cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
// See: https://github.com/BurntSushi/ripgrep/issues/1404
|
// See: https://github.com/BurntSushi/ripgrep/issues/1404
|
||||||
rgtest!(f1404_nothing_searched_warning, |dir: Dir, mut cmd: TestCommand| {
|
rgtest!(f1404_nothing_searched_warning, |dir: Dir, mut cmd: TestCommand| {
|
||||||
dir.create(".ignore", "ignored-dir/**");
|
dir.create(".ignore", "ignored-dir/**");
|
||||||
@@ -921,6 +943,23 @@ rgtest!(f1842_field_match_separator, |dir: Dir, _: TestCommand| {
|
|||||||
eqnice!(expected, dir.command().args(&args).stdout());
|
eqnice!(expected, dir.command().args(&args).stdout());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// See: https://github.com/BurntSushi/ripgrep/issues/2288
|
||||||
|
rgtest!(f2288_context_partial_override, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create("test", "1\n2\n3\n4\n5\n6\n7\n8\n9\n");
|
||||||
|
cmd.args(&["-C1", "-A2", "5", "test"]);
|
||||||
|
eqnice!("4\n5\n6\n7\n", cmd.stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
// See: https://github.com/BurntSushi/ripgrep/issues/2288
|
||||||
|
rgtest!(
|
||||||
|
f2288_context_partial_override_rev,
|
||||||
|
|dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create("test", "1\n2\n3\n4\n5\n6\n7\n8\n9\n");
|
||||||
|
cmd.args(&["-A2", "-C1", "5", "test"]);
|
||||||
|
eqnice!("4\n5\n6\n7\n", cmd.stdout());
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
rgtest!(no_context_sep, |dir: Dir, mut cmd: TestCommand| {
|
rgtest!(no_context_sep, |dir: Dir, mut cmd: TestCommand| {
|
||||||
dir.create("test", "foo\nctx\nbar\nctx\nfoo\nctx");
|
dir.create("test", "foo\nctx\nbar\nctx\nfoo\nctx");
|
||||||
cmd.args(&["-A1", "--no-context-separator", "foo", "test"]);
|
cmd.args(&["-A1", "--no-context-separator", "foo", "test"]);
|
||||||
@@ -975,3 +1014,10 @@ rgtest!(no_unicode, |dir: Dir, mut cmd: TestCommand| {
|
|||||||
dir.create("test", "δ");
|
dir.create("test", "δ");
|
||||||
cmd.arg("-i").arg("--no-unicode").arg("Δ").assert_err();
|
cmd.arg("-i").arg("--no-unicode").arg("Δ").assert_err();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// See: https://github.com/BurntSushi/ripgrep/issues/1790
|
||||||
|
rgtest!(stop_on_nonmatch, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create("test", "line1\nline2\nline3\nline4\nline5");
|
||||||
|
cmd.args(&["--stop-on-nonmatch", "[235]"]);
|
||||||
|
eqnice!("test:line2\ntest:line3\n", cmd.stdout());
|
||||||
|
});
|
||||||
|
@@ -1065,3 +1065,48 @@ rgtest!(type_list, |_: Dir, mut cmd: TestCommand| {
|
|||||||
// This can change over time, so just make sure we print something.
|
// This can change over time, so just make sure we print something.
|
||||||
assert!(!cmd.stdout().is_empty());
|
assert!(!cmd.stdout().is_empty());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// The following series of tests seeks to test all permutations of ripgrep's
|
||||||
|
// sorted queries.
|
||||||
|
//
|
||||||
|
// They all rely on this setup function, which sets up this particular file
|
||||||
|
// structure with a particular creation order:
|
||||||
|
// ├── a # 1
|
||||||
|
// ├── b # 4
|
||||||
|
// └── dir # 2
|
||||||
|
// ├── c # 3
|
||||||
|
// └── d # 5
|
||||||
|
//
|
||||||
|
// This order is important when sorting them by system time-stamps.
|
||||||
|
fn sort_setup(dir: Dir) {
|
||||||
|
use std::{thread::sleep, time::Duration};
|
||||||
|
|
||||||
|
let sub_dir = dir.path().join("dir");
|
||||||
|
dir.create("a", "test");
|
||||||
|
sleep(Duration::from_millis(100));
|
||||||
|
dir.create_dir(&sub_dir);
|
||||||
|
sleep(Duration::from_millis(100));
|
||||||
|
dir.create(sub_dir.join("c"), "test");
|
||||||
|
sleep(Duration::from_millis(100));
|
||||||
|
dir.create("b", "test");
|
||||||
|
sleep(Duration::from_millis(100));
|
||||||
|
dir.create(sub_dir.join("d"), "test");
|
||||||
|
}
|
||||||
|
|
||||||
|
rgtest!(sort_files, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
sort_setup(dir);
|
||||||
|
let expected = "a:test\nb:test\ndir/c:test\ndir/d:test\n";
|
||||||
|
eqnice!(expected, cmd.args(["--sort", "path", "test"]).stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
rgtest!(sort_accessed, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
sort_setup(dir);
|
||||||
|
let expected = "a:test\ndir/c:test\nb:test\ndir/d:test\n";
|
||||||
|
eqnice!(expected, cmd.args(["--sort", "accessed", "test"]).stdout());
|
||||||
|
});
|
||||||
|
|
||||||
|
rgtest!(sortr_accessed, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
sort_setup(dir);
|
||||||
|
let expected = "dir/d:test\nb:test\ndir/c:test\na:test\n";
|
||||||
|
eqnice!(expected, cmd.args(["--sortr", "accessed", "test"]).stdout());
|
||||||
|
});
|
||||||
|
@@ -1090,6 +1090,19 @@ b=one
|
|||||||
eqnice!(expected, cmd.stdout());
|
eqnice!(expected, cmd.stdout());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// See: https://github.com/BurntSushi/ripgrep/issues/2198
|
||||||
|
rgtest!(r2198, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create(".ignore", "a");
|
||||||
|
dir.create(".rgignore", "b");
|
||||||
|
dir.create("a", "");
|
||||||
|
dir.create("b", "");
|
||||||
|
dir.create("c", "");
|
||||||
|
|
||||||
|
cmd.arg("--files").arg("--sort").arg("path");
|
||||||
|
eqnice!("c\n", cmd.stdout());
|
||||||
|
eqnice!("a\nb\nc\n", cmd.arg("--no-ignore-dot").stdout());
|
||||||
|
});
|
||||||
|
|
||||||
// See: https://github.com/BurntSushi/ripgrep/issues/2208
|
// See: https://github.com/BurntSushi/ripgrep/issues/2208
|
||||||
rgtest!(r2208, |dir: Dir, mut cmd: TestCommand| {
|
rgtest!(r2208, |dir: Dir, mut cmd: TestCommand| {
|
||||||
dir.create("test", "# Compile requirements.txt files from all found or specified requirements.in files (compile).
|
dir.create("test", "# Compile requirements.txt files from all found or specified requirements.in files (compile).
|
||||||
@@ -1126,3 +1139,37 @@ rgtest!(r2236, |dir: Dir, mut cmd: TestCommand| {
|
|||||||
dir.create("foo/bar", "test\n");
|
dir.create("foo/bar", "test\n");
|
||||||
cmd.args(&["test"]).assert_err();
|
cmd.args(&["test"]).assert_err();
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// See: https://github.com/BurntSushi/ripgrep/issues/2480
|
||||||
|
rgtest!(r2480, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
dir.create("file", "FooBar\n");
|
||||||
|
|
||||||
|
// no regression in empty pattern behavior
|
||||||
|
cmd.args(&["-e", "", "file"]);
|
||||||
|
eqnice!("FooBar\n", cmd.stdout());
|
||||||
|
|
||||||
|
// no regression in single pattern behavior
|
||||||
|
let mut cmd = dir.command();
|
||||||
|
cmd.args(&["-e", ")(", "file"]);
|
||||||
|
eqnice!("FooBar\n", cmd.stdout());
|
||||||
|
|
||||||
|
// no regression in multiple patterns behavior
|
||||||
|
let mut cmd = dir.command();
|
||||||
|
cmd.args(&["--only-matching", "-e", "Foo", "-e", "Bar", "file"]);
|
||||||
|
eqnice!("Foo\nBar\n", cmd.stdout());
|
||||||
|
|
||||||
|
// no regression in capture groups behavior
|
||||||
|
let mut cmd = dir.command();
|
||||||
|
cmd.args(&["-e", "Fo(oB)a(r)", "--replace", "${0}_${1}_${2}${3}", "file"]);
|
||||||
|
eqnice!("FooBar_oB_r\n", cmd.stdout()); // note: ${3} expected to be empty
|
||||||
|
|
||||||
|
// flag does not leak into next pattern on match
|
||||||
|
let mut cmd = dir.command();
|
||||||
|
cmd.args(&["--only-matching", "-e", "(?i)foo", "-e", "bar", "file"]);
|
||||||
|
eqnice!("Foo\n", cmd.stdout());
|
||||||
|
|
||||||
|
// flag does not leak into next pattern on mismatch
|
||||||
|
let mut cmd = dir.command();
|
||||||
|
cmd.args(&["--only-matching", "-e", "(?i)notfoo", "-e", "bar", "file"]);
|
||||||
|
cmd.assert_err();
|
||||||
|
});
|
||||||
|
Reference in New Issue
Block a user