mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-26 17:51:56 -07:00
Compare commits
202 Commits
ignore-0.4
...
grep-0.2.1
Author | SHA1 | Date | |
---|---|---|---|
|
ca60fef4db | ||
|
a25307d6c8 | ||
|
b80947a8b3 | ||
|
ad793a0d8f | ||
|
120e55e7c7 | ||
|
3941a7701d | ||
|
96e130fbf9 | ||
|
180c4eaf8b | ||
|
81529288cf | ||
|
bcc7473a87 | ||
|
bc78c644db | ||
|
dc7267a0fb | ||
|
3224324e25 | ||
|
0f61f08eb1 | ||
|
a0e8dbe9df | ||
|
e95254a86f | ||
|
2f484d8ce5 | ||
|
364772ddd2 | ||
|
2e207833bc | ||
|
92b35a65f8 | ||
|
ac8fecbbf2 | ||
|
8596817374 | ||
|
28bff84a0a | ||
|
61101289fa | ||
|
13faa39b66 | ||
|
6b61271bbb | ||
|
1be86392e0 | ||
|
63058453fa | ||
|
7f23cd63a5 | ||
|
8905d54a9f | ||
|
25a4eaf5ae | ||
|
0000157917 | ||
|
65b1b0e38a | ||
|
c032cda4b7 | ||
|
eab044d829 | ||
|
55e62a4411 | ||
|
5b2f614aad | ||
|
4386b8e805 | ||
|
6b012d8129 | ||
|
a928ca4221 | ||
|
d1570defbf | ||
|
b732c23e36 | ||
|
49965703fa | ||
|
609838aebd | ||
|
515f120b5c | ||
|
a66315d232 | ||
|
bdf10ab7c0 | ||
|
a02678800b | ||
|
387df97d85 | ||
|
a9d97a1dda | ||
|
3bb71b0cb8 | ||
|
87b33c96c0 | ||
|
5e975c43f8 | ||
|
7efa2e46d3 | ||
|
db0b92b62d | ||
|
33b81cac48 | ||
|
6a13a4f64d | ||
|
b13d835d95 | ||
|
d53506b7f7 | ||
|
78a35d4d43 | ||
|
a933d0bc90 | ||
|
2cae30e399 | ||
|
8e57989cd2 | ||
|
b9f5835534 | ||
|
e70778e89d | ||
|
87c4a2b4b1 | ||
|
0aa31676e3 | ||
|
9f0e88bcb1 | ||
|
eb4b389846 | ||
|
dc337bab0a | ||
|
2cfb338530 | ||
|
48646e3451 | ||
|
985394a19e | ||
|
ec36f8c3ff | ||
|
a726d03641 | ||
|
91afd4214a | ||
|
4dc6c73c5a | ||
|
36d03b4101 | ||
|
d161acb0a3 | ||
|
30ee6f08ee | ||
|
ced5b92aa9 | ||
|
191315a2ea | ||
|
5370064f00 | ||
|
b6189c659e | ||
|
0b36942f68 | ||
|
7e05cde008 | ||
|
418d048b27 | ||
|
009dda1488 | ||
|
ba535fb5a3 | ||
|
427aaeeb2e | ||
|
f5cff746bc | ||
|
457f53b7ee | ||
|
eb35f7978e | ||
|
fc69bd366c | ||
|
9b01a8f9ae | ||
|
0ff5dd2360 | ||
|
3c7819301b | ||
|
699e651db2 | ||
|
9eddb71b8e | ||
|
abf115228e | ||
|
fdfc418be5 | ||
|
5bf74362b9 | ||
|
431ea38620 | ||
|
caba5c4348 | ||
|
07f97d42cf | ||
|
e33d6e73f5 | ||
|
478da4f271 | ||
|
7ce66f73cf | ||
|
bc76a30c23 | ||
|
5e81c60b35 | ||
|
b3e5ae9d28 | ||
|
a024f14fdd | ||
|
8c30c8294a | ||
|
c44d263419 | ||
|
af6b6c543b | ||
|
1a4fec8b4a | ||
|
c8d8ab8ded | ||
|
1d53ed2744 | ||
|
29696d1455 | ||
|
57ce623a57 | ||
|
f1c656de40 | ||
|
dd47582619 | ||
|
9b88cf8b72 | ||
|
6668d7ba8a | ||
|
008da5dca4 | ||
|
a34df1f690 | ||
|
7f3fd6f7ce | ||
|
6331a7ac18 | ||
|
cd4386bd9b | ||
|
cdc20c5685 | ||
|
0cf2b98df2 | ||
|
9efdbf74a1 | ||
|
53cb9a779e | ||
|
14860b0f16 | ||
|
0eb1a1e7c9 | ||
|
5631e5c7a0 | ||
|
21644408f2 | ||
|
0ee85a89f5 | ||
|
ed9d37959f | ||
|
9f924ee187 | ||
|
35c5db6d1a | ||
|
e824531e38 | ||
|
af54069c51 | ||
|
77a9e99964 | ||
|
459a9c5637 | ||
|
e4c4540f6a | ||
|
5d0f2b0fc0 | ||
|
079a23b515 | ||
|
6e27649af1 | ||
|
df83b8b444 | ||
|
e48a17e189 | ||
|
fbb2cfed28 | ||
|
af8b27ffae | ||
|
8a4071eea9 | ||
|
ee23ab5173 | ||
|
efd9cfb2fc | ||
|
656aa12649 | ||
|
fc31aedcf3 | ||
|
578e1992fa | ||
|
46d0130597 | ||
|
7534d5144f | ||
|
a28e664abd | ||
|
0ca96e004c | ||
|
2295061e80 | ||
|
53c4855517 | ||
|
121e0135c1 | ||
|
c53c4c0ade | ||
|
4566882521 | ||
|
12dd455ee9 | ||
|
e6cac8b119 | ||
|
0f502a9439 | ||
|
51d2db7f19 | ||
|
b3a6a69f9d | ||
|
26a29c750e | ||
|
beda5f70dc | ||
|
5af7707a35 | ||
|
3f33a83a5f | ||
|
35b52d33b9 | ||
|
a77b914e7a | ||
|
2e2af50a4d | ||
|
229d1a8d41 | ||
|
8ec6ef373f | ||
|
581a35e568 | ||
|
ba965962fe | ||
|
94e4b8e301 | ||
|
2af77242c5 | ||
|
3f4c4188c1 | ||
|
ce4b587055 | ||
|
be63122508 | ||
|
92286ad4d2 | ||
|
4ebe8375ec | ||
|
7923d25228 | ||
|
1c3eebefec | ||
|
64ac2ebe0f | ||
|
46fb77c20c | ||
|
6a1c3253e0 | ||
|
c7730d1f3a | ||
|
c5ea5a13df | ||
|
9c8d873a75 | ||
|
7899a4b931 | ||
|
ae55a4e872 | ||
|
3a1780d841 |
8
.cargo/config.toml
Normal file
8
.cargo/config.toml
Normal file
@@ -0,0 +1,8 @@
|
||||
# On Windows MSVC, statically link the C runtime so that the resulting EXE does
|
||||
# not depend on the vcruntime DLL.
|
||||
#
|
||||
# See: https://github.com/BurntSushi/ripgrep/pull/1613
|
||||
[target.x86_64-pc-windows-msvc]
|
||||
rustflags = ["-C", "target-feature=+crt-static"]
|
||||
[target.i686-pc-windows-msvc]
|
||||
rustflags = ["-C", "target-feature=+crt-static"]
|
3
.github/ISSUE_TEMPLATE/feature_request.md
vendored
3
.github/ISSUE_TEMPLATE/feature_request.md
vendored
@@ -15,3 +15,6 @@ examples of how ripgrep would be used if your feature request were added.
|
||||
If you're not sure what to write here, then try imagining what the ideal
|
||||
documentation of your new feature would look like in ripgrep's man page. Then
|
||||
try to write it.
|
||||
|
||||
If you're requesting the addition or change of default file types, please open
|
||||
a PR. We can discuss it there if necessary.
|
||||
|
6
.github/dependabot.yml
vendored
Normal file
6
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
74
.github/workflows/ci.yml
vendored
74
.github/workflows/ci.yml
vendored
@@ -42,31 +42,31 @@ jobs:
|
||||
- win-gnu
|
||||
include:
|
||||
- build: pinned
|
||||
os: ubuntu-18.04
|
||||
rust: 1.41.0
|
||||
os: ubuntu-22.04
|
||||
rust: 1.65.0
|
||||
- build: stable
|
||||
os: ubuntu-18.04
|
||||
os: ubuntu-22.04
|
||||
rust: stable
|
||||
- build: beta
|
||||
os: ubuntu-18.04
|
||||
os: ubuntu-22.04
|
||||
rust: beta
|
||||
- build: nightly
|
||||
os: ubuntu-18.04
|
||||
os: ubuntu-22.04
|
||||
rust: nightly
|
||||
- build: nightly-musl
|
||||
os: ubuntu-18.04
|
||||
os: ubuntu-22.04
|
||||
rust: nightly
|
||||
target: x86_64-unknown-linux-musl
|
||||
- build: nightly-32
|
||||
os: ubuntu-18.04
|
||||
os: ubuntu-22.04
|
||||
rust: nightly
|
||||
target: i686-unknown-linux-gnu
|
||||
- build: nightly-mips
|
||||
os: ubuntu-18.04
|
||||
os: ubuntu-22.04
|
||||
rust: nightly
|
||||
target: mips64-unknown-linux-gnuabi64
|
||||
- build: nightly-arm
|
||||
os: ubuntu-18.04
|
||||
os: ubuntu-22.04
|
||||
rust: nightly
|
||||
# For stripping release binaries:
|
||||
# docker run --rm -v $PWD/target:/target:Z \
|
||||
@@ -75,34 +75,32 @@ jobs:
|
||||
# /target/arm-unknown-linux-gnueabihf/debug/rg
|
||||
target: arm-unknown-linux-gnueabihf
|
||||
- build: macos
|
||||
os: macos-latest
|
||||
os: macos-12
|
||||
rust: nightly
|
||||
- build: win-msvc
|
||||
os: windows-2019
|
||||
os: windows-2022
|
||||
rust: nightly
|
||||
- build: win-gnu
|
||||
os: windows-2019
|
||||
os: windows-2022
|
||||
rust: nightly-x86_64-gnu
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Install packages (Ubuntu)
|
||||
if: matrix.os == 'ubuntu-18.04'
|
||||
if: matrix.os == 'ubuntu-22.04'
|
||||
run: |
|
||||
ci/ubuntu-install-packages
|
||||
|
||||
- name: Install packages (macOS)
|
||||
if: matrix.os == 'macos-latest'
|
||||
if: matrix.os == 'macos-12'
|
||||
run: |
|
||||
ci/macos-install-packages
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ matrix.rust }}
|
||||
profile: minimal
|
||||
override: true
|
||||
|
||||
- name: Use Cross
|
||||
if: matrix.target != ''
|
||||
@@ -118,10 +116,10 @@ jobs:
|
||||
echo "target flag is: ${{ env.TARGET_FLAGS }}"
|
||||
|
||||
- name: Build ripgrep and all crates
|
||||
run: ${{ env.CARGO }} build --verbose --all ${{ env.TARGET_FLAGS }}
|
||||
run: ${{ env.CARGO }} build --verbose --workspace ${{ env.TARGET_FLAGS }}
|
||||
|
||||
- name: Build ripgrep with PCRE2
|
||||
run: ${{ env.CARGO }} build --verbose --all --features pcre2 ${{ env.TARGET_FLAGS }}
|
||||
run: ${{ env.CARGO }} build --verbose --workspace --features pcre2 ${{ env.TARGET_FLAGS }}
|
||||
|
||||
# This is useful for debugging problems when the expected build artifacts
|
||||
# (like shell completions and man pages) aren't generated.
|
||||
@@ -139,7 +137,7 @@ jobs:
|
||||
|
||||
- name: Run tests with PCRE2 (sans cross)
|
||||
if: matrix.target == ''
|
||||
run: ${{ env.CARGO }} test --verbose --all --features pcre2 ${{ env.TARGET_FLAGS }}
|
||||
run: ${{ env.CARGO }} test --verbose --workspace --features pcre2 ${{ env.TARGET_FLAGS }}
|
||||
|
||||
- name: Run tests without PCRE2 (with cross)
|
||||
# These tests should actually work, but they almost double the runtime.
|
||||
@@ -147,17 +145,17 @@ jobs:
|
||||
# enabled, every integration test is run twice: one with the default
|
||||
# regex engine and once with PCRE2.
|
||||
if: matrix.target != ''
|
||||
run: ${{ env.CARGO }} test --verbose --all ${{ env.TARGET_FLAGS }}
|
||||
run: ${{ env.CARGO }} test --verbose --workspace ${{ env.TARGET_FLAGS }}
|
||||
|
||||
- name: Test for existence of build artifacts (Windows)
|
||||
if: matrix.os == 'windows-2019'
|
||||
if: matrix.os == 'windows-2022'
|
||||
shell: bash
|
||||
run: |
|
||||
outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")"
|
||||
ls "$outdir/_rg.ps1" && file "$outdir/_rg.ps1"
|
||||
|
||||
- name: Test for existence of build artifacts (Unix)
|
||||
if: matrix.os != 'windows-2019'
|
||||
if: matrix.os != 'windows-2022'
|
||||
shell: bash
|
||||
run: |
|
||||
outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")"
|
||||
@@ -174,23 +172,35 @@ jobs:
|
||||
# 'rg' binary (done in test-complete) with qemu, which is a pain and
|
||||
# doesn't really gain us much. If shell completion works in one place,
|
||||
# it probably works everywhere.
|
||||
if: matrix.target == '' && matrix.os != 'windows-2019'
|
||||
if: matrix.target == '' && matrix.os != 'windows-2022'
|
||||
shell: bash
|
||||
run: ci/test-complete
|
||||
|
||||
rustfmt:
|
||||
name: rustfmt
|
||||
runs-on: ubuntu-18.04
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
uses: actions/checkout@v3
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: stable
|
||||
override: true
|
||||
profile: minimal
|
||||
components: rustfmt
|
||||
- name: Check formatting
|
||||
run: |
|
||||
cargo fmt --all -- --check
|
||||
run: cargo fmt --all --check
|
||||
|
||||
docs:
|
||||
name: Docs
|
||||
runs-on: ubuntu-22.04
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
- name: Install Rust
|
||||
uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: stable
|
||||
- name: Check documentation
|
||||
env:
|
||||
RUSTDOCFLAGS: -D warnings
|
||||
run: cargo doc --no-deps --document-private-items --workspace
|
||||
|
97
.github/workflows/release.yml
vendored
97
.github/workflows/release.yml
vendored
@@ -1,35 +1,39 @@
|
||||
# The way this works is a little weird. But basically, the create-release job
|
||||
# runs purely to initialize the GitHub release itself. Once done, the upload
|
||||
# URL of the release is saved as an artifact.
|
||||
# The way this works is the following:
|
||||
#
|
||||
# The build-release job runs only once create-release is finished. It gets
|
||||
# the release upload URL by downloading the corresponding artifact (which was
|
||||
# uploaded by create-release). It then builds the release executables for each
|
||||
# supported platform and attaches them as release assets to the previously
|
||||
# created release.
|
||||
# The create-release job runs purely to initialize the GitHub release itself
|
||||
# and to output upload_url for the following job.
|
||||
#
|
||||
# The build-release job runs only once create-release is finished. It gets the
|
||||
# release upload URL from create-release job outputs, then builds the release
|
||||
# executables for each supported platform and attaches them as release assets
|
||||
# to the previously created release.
|
||||
#
|
||||
# The key here is that we create the release only once.
|
||||
#
|
||||
# Reference:
|
||||
# https://eugene-babichenko.github.io/blog/2020/05/09/github-actions-cross-platform-auto-releases/
|
||||
|
||||
name: release
|
||||
on:
|
||||
push:
|
||||
# Enable when testing release infrastructure on a branch.
|
||||
# branches:
|
||||
# - ag/release
|
||||
# - ag/work
|
||||
tags:
|
||||
- '[0-9]+.[0-9]+.[0-9]+'
|
||||
- "[0-9]+.[0-9]+.[0-9]+"
|
||||
jobs:
|
||||
create-release:
|
||||
name: create-release
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-22.04
|
||||
# env:
|
||||
# Set to force version number, e.g., when no tag exists.
|
||||
# RG_VERSION: TEST-0.0.0
|
||||
outputs:
|
||||
upload_url: ${{ steps.release.outputs.upload_url }}
|
||||
rg_version: ${{ env.RG_VERSION }}
|
||||
steps:
|
||||
- name: Create artifacts directory
|
||||
run: mkdir artifacts
|
||||
|
||||
- name: Get the release version from the tag
|
||||
shell: bash
|
||||
if: env.RG_VERSION == ''
|
||||
run: |
|
||||
# Apparently, this is the right way to get a tag name. Really?
|
||||
@@ -37,7 +41,6 @@ jobs:
|
||||
# See: https://github.community/t5/GitHub-Actions/How-to-get-just-the-tag-name/m-p/32167/highlight/true#M1027
|
||||
echo "RG_VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
|
||||
echo "version is: ${{ env.RG_VERSION }}"
|
||||
|
||||
- name: Create GitHub release
|
||||
id: release
|
||||
uses: actions/create-release@v1
|
||||
@@ -47,18 +50,6 @@ jobs:
|
||||
tag_name: ${{ env.RG_VERSION }}
|
||||
release_name: ${{ env.RG_VERSION }}
|
||||
|
||||
- name: Save release upload URL to artifact
|
||||
run: echo "${{ steps.release.outputs.upload_url }}" > artifacts/release-upload-url
|
||||
|
||||
- name: Save version number to artifact
|
||||
run: echo "${{ env.RG_VERSION }}" > artifacts/release-version
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v1
|
||||
with:
|
||||
name: artifacts
|
||||
path: artifacts
|
||||
|
||||
build-release:
|
||||
name: build-release
|
||||
needs: ['create-release']
|
||||
@@ -68,7 +59,7 @@ jobs:
|
||||
# systems.
|
||||
CARGO: cargo
|
||||
# When CARGO is set to CROSS, this is set to `--target matrix.target`.
|
||||
TARGET_FLAGS:
|
||||
TARGET_FLAGS: ""
|
||||
# When CARGO is set to CROSS, TARGET_DIR includes matrix.target.
|
||||
TARGET_DIR: ./target
|
||||
# Emit backtraces on panics.
|
||||
@@ -80,56 +71,52 @@ jobs:
|
||||
build: [linux, linux-arm, macos, win-msvc, win-gnu, win32-msvc]
|
||||
include:
|
||||
- build: linux
|
||||
os: ubuntu-18.04
|
||||
os: ubuntu-22.04
|
||||
rust: nightly
|
||||
target: x86_64-unknown-linux-musl
|
||||
- build: linux-arm
|
||||
os: ubuntu-18.04
|
||||
os: ubuntu-22.04
|
||||
rust: nightly
|
||||
target: arm-unknown-linux-gnueabihf
|
||||
- build: macos
|
||||
os: macos-latest
|
||||
os: macos-12
|
||||
rust: nightly
|
||||
target: x86_64-apple-darwin
|
||||
- build: win-msvc
|
||||
os: windows-2019
|
||||
os: windows-2022
|
||||
rust: nightly
|
||||
target: x86_64-pc-windows-msvc
|
||||
- build: win-gnu
|
||||
os: windows-2019
|
||||
os: windows-2022
|
||||
rust: nightly-x86_64-gnu
|
||||
target: x86_64-pc-windows-gnu
|
||||
- build: win32-msvc
|
||||
os: windows-2019
|
||||
os: windows-2022
|
||||
rust: nightly
|
||||
target: i686-pc-windows-msvc
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 1
|
||||
uses: actions/checkout@v3
|
||||
|
||||
- name: Install packages (Ubuntu)
|
||||
if: matrix.os == 'ubuntu-18.04'
|
||||
if: matrix.os == 'ubuntu-22.04'
|
||||
run: |
|
||||
ci/ubuntu-install-packages
|
||||
|
||||
- name: Install packages (macOS)
|
||||
if: matrix.os == 'macos-latest'
|
||||
if: matrix.os == 'macos-12'
|
||||
run: |
|
||||
ci/macos-install-packages
|
||||
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@v1
|
||||
uses: dtolnay/rust-toolchain@master
|
||||
with:
|
||||
toolchain: ${{ matrix.rust }}
|
||||
profile: minimal
|
||||
override: true
|
||||
target: ${{ matrix.target }}
|
||||
|
||||
- name: Use Cross
|
||||
# if: matrix.os != 'windows-2019'
|
||||
shell: bash
|
||||
run: |
|
||||
cargo install cross
|
||||
echo "CARGO=cross" >> $GITHUB_ENV
|
||||
@@ -142,22 +129,6 @@ jobs:
|
||||
echo "target flag is: ${{ env.TARGET_FLAGS }}"
|
||||
echo "target dir is: ${{ env.TARGET_DIR }}"
|
||||
|
||||
- name: Get release download URL
|
||||
uses: actions/download-artifact@v1
|
||||
with:
|
||||
name: artifacts
|
||||
path: artifacts
|
||||
|
||||
- name: Set release upload URL and release version
|
||||
shell: bash
|
||||
run: |
|
||||
release_upload_url="$(cat artifacts/release-upload-url)"
|
||||
echo "RELEASE_UPLOAD_URL=$release_upload_url" >> $GITHUB_ENV
|
||||
echo "release upload url: $RELEASE_UPLOAD_URL"
|
||||
release_version="$(cat artifacts/release-version)"
|
||||
echo "RELEASE_VERSION=$release_version" >> $GITHUB_ENV
|
||||
echo "release version: $RELEASE_VERSION"
|
||||
|
||||
- name: Build release binary
|
||||
run: ${{ env.CARGO }} build --verbose --release --features pcre2 ${{ env.TARGET_FLAGS }}
|
||||
|
||||
@@ -178,7 +149,7 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")"
|
||||
staging="ripgrep-${{ env.RELEASE_VERSION }}-${{ matrix.target }}"
|
||||
staging="ripgrep-${{ needs.create-release.outputs.rg_version }}-${{ matrix.target }}"
|
||||
mkdir -p "$staging"/{complete,doc}
|
||||
|
||||
cp {README.md,COPYING,UNLICENSE,LICENSE-MIT} "$staging/"
|
||||
@@ -186,7 +157,7 @@ jobs:
|
||||
cp "$outdir"/{rg.bash,rg.fish,_rg.ps1} "$staging/complete/"
|
||||
cp complete/_rg "$staging/complete/"
|
||||
|
||||
if [ "${{ matrix.os }}" = "windows-2019" ]; then
|
||||
if [ "${{ matrix.os }}" = "windows-2022" ]; then
|
||||
cp "target/${{ matrix.target }}/release/rg.exe" "$staging/"
|
||||
7z a "$staging.zip" "$staging"
|
||||
echo "ASSET=$staging.zip" >> $GITHUB_ENV
|
||||
@@ -199,11 +170,11 @@ jobs:
|
||||
fi
|
||||
|
||||
- name: Upload release archive
|
||||
uses: actions/upload-release-asset@v1.0.1
|
||||
uses: actions/upload-release-asset@v1.0.2
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
upload_url: ${{ env.RELEASE_UPLOAD_URL }}
|
||||
upload_url: ${{ needs.create-release.outputs.upload_url }}
|
||||
asset_path: ${{ env.ASSET }}
|
||||
asset_name: ${{ env.ASSET }}
|
||||
asset_content_type: application/octet-stream
|
||||
|
4
.gitignore
vendored
4
.gitignore
vendored
@@ -15,3 +15,7 @@ parts
|
||||
*.snap
|
||||
*.pyc
|
||||
ripgrep*_source.tar.bz2
|
||||
|
||||
# Cargo timings
|
||||
cargo-timing-*.html
|
||||
cargo-timing.html
|
||||
|
124
CHANGELOG.md
124
CHANGELOG.md
@@ -2,6 +2,42 @@ TBD
|
||||
===
|
||||
Unreleased changes. Release notes have not yet been written.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #1891](https://github.com/BurntSushi/ripgrep/issues/1891):
|
||||
Fix bug when using `-w` with a regex that can match the empty string.
|
||||
* [BUG #1911](https://github.com/BurntSushi/ripgrep/issues/1911):
|
||||
Disable mmap searching in all non-64-bit environments.
|
||||
* [BUG #2236](https://github.com/BurntSushi/ripgrep/issues/2236):
|
||||
Fix gitignore parsing bug where a trailing `\/` resulted in an error.
|
||||
|
||||
|
||||
13.0.0 (2021-06-12)
|
||||
===================
|
||||
ripgrep 13 is a new major version release of ripgrep that primarily contains
|
||||
bug fixes, some performance improvements and a few minor breaking changes.
|
||||
There is also a fix for a security vulnerability on Windows
|
||||
([CVE-2021-3013](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-3013)).
|
||||
|
||||
Some highlights:
|
||||
|
||||
A new short flag, `-.`, has been added. It is an alias for the `--hidden` flag,
|
||||
which instructs ripgrep to search hidden files and directories.
|
||||
|
||||
ripgrep is now using a new
|
||||
[vectorized implementation of `memmem`](https://github.com/BurntSushi/memchr/pull/82),
|
||||
which accelerates many common searches. If you notice any performance
|
||||
regressions (or major improvements), I'd love to hear about them through an
|
||||
issue report!
|
||||
|
||||
Also, for Windows users targeting MSVC, Cargo will now build fully static
|
||||
executables of ripgrep. The release binaries for ripgrep 13 have been compiled
|
||||
using this configuration.
|
||||
|
||||
**BREAKING CHANGES**:
|
||||
|
||||
**Binary detection output has changed slightly.**
|
||||
|
||||
In this release, a small tweak has been made to the output format when a binary
|
||||
file is detected. Previously, it looked like this:
|
||||
|
||||
@@ -15,12 +51,100 @@ Now it looks like this:
|
||||
FOO: binary file matches (found "\0" byte around offset XXX)
|
||||
```
|
||||
|
||||
**vimgrep output in multi-line now only prints the first line for each match.**
|
||||
|
||||
See [issue 1866](https://github.com/BurntSushi/ripgrep/issues/1866) for more
|
||||
discussion on this. Previously, every line in a match was duplicated, even
|
||||
when it spanned multiple lines. There are no changes to vimgrep output when
|
||||
multi-line mode is disabled.
|
||||
|
||||
**In multi-line mode, --count is now equivalent to --count-matches.**
|
||||
|
||||
This appears to match how `pcre2grep` implements `--count`. Previously, ripgrep
|
||||
would produce outright incorrect counts. Another alternative would be to simply
|
||||
count the number of lines---even if it's more than the number of matches---but
|
||||
that seems highly unintuitive.
|
||||
|
||||
**FULL LIST OF FIXES AND IMPROVEMENTS:**
|
||||
|
||||
Security fixes:
|
||||
|
||||
* [CVE-2021-3013](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-3013):
|
||||
Fixes a security hole on Windows where running ripgrep with either the
|
||||
`-z/--search-zip` or `--pre` flags can result in running arbitrary
|
||||
executables from the current directory.
|
||||
* [VULN #1773](https://github.com/BurntSushi/ripgrep/issues/1773):
|
||||
This is the public facing issue tracking CVE-2021-3013. ripgrep's README
|
||||
now contains a section describing how to report a vulnerability.
|
||||
|
||||
Performance improvements:
|
||||
|
||||
* [PERF #1657](https://github.com/BurntSushi/ripgrep/discussions/1657):
|
||||
Check if a file should be ignored first before issuing stat calls.
|
||||
* [PERF memchr#82](https://github.com/BurntSushi/memchr/pull/82):
|
||||
ripgrep now uses a new vectorized implementation of `memmem`.
|
||||
|
||||
Feature enhancements:
|
||||
|
||||
* Added or improved file type filtering for ASP, Bazel, dvc, FlatBuffers,
|
||||
Futhark, minified files, Mint, pofiles (from GNU gettext) Racket, Red, Ruby,
|
||||
VCL, Yang.
|
||||
* [FEATURE #1404](https://github.com/BurntSushi/ripgrep/pull/1404):
|
||||
ripgrep now prints a warning if nothing is searched.
|
||||
* [FEATURE #1613](https://github.com/BurntSushi/ripgrep/pull/1613):
|
||||
Cargo will now produce static executables on Windows when using MSVC.
|
||||
* [FEATURE #1680](https://github.com/BurntSushi/ripgrep/pull/1680):
|
||||
Add `-.` as a short flag alias for `--hidden`.
|
||||
* [FEATURE #1842](https://github.com/BurntSushi/ripgrep/issues/1842):
|
||||
Add `--field-{context,match}-separator` for customizing field delimiters.
|
||||
* [FEATURE #1856](https://github.com/BurntSushi/ripgrep/pull/1856):
|
||||
The README now links to a
|
||||
[Spanish translation](https://github.com/UltiRequiem/traducciones/tree/master/ripgrep).
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #1277](https://github.com/BurntSushi/ripgrep/issues/1277):
|
||||
Document cygwin path translation behavior in the FAQ.
|
||||
* [BUG #1739](https://github.com/BurntSushi/ripgrep/issues/1739):
|
||||
Fix bug where replacements were buggy if the regex matched a line terminator.
|
||||
* [BUG #1311](https://github.com/BurntSushi/ripgrep/issues/1311):
|
||||
Fix multi-line bug where a search & replace for `\n` didn't work as expected.
|
||||
* [BUG #1401](https://github.com/BurntSushi/ripgrep/issues/1401):
|
||||
Fix buggy interaction between PCRE2 look-around and `-o/--only-matching`.
|
||||
* [BUG #1412](https://github.com/BurntSushi/ripgrep/issues/1412):
|
||||
Fix multi-line bug with searches using look-around past matching lines.
|
||||
* [BUG #1577](https://github.com/BurntSushi/ripgrep/issues/1577):
|
||||
Fish shell completions will continue to be auto-generated.
|
||||
* [BUG #1642](https://github.com/BurntSushi/ripgrep/issues/1642):
|
||||
Fixes a bug where using `-m` and `-A` printed more matches than the limit.
|
||||
* [BUG #1703](https://github.com/BurntSushi/ripgrep/issues/1703):
|
||||
Clarify the function of `-u/--unrestricted`.
|
||||
* [BUG #1708](https://github.com/BurntSushi/ripgrep/issues/1708):
|
||||
Clarify how `-S/--smart-case` works.
|
||||
* [BUG #1730](https://github.com/BurntSushi/ripgrep/issues/1730):
|
||||
Clarify that CLI invocation must always be valid, regardless of config file.
|
||||
* [BUG #1741](https://github.com/BurntSushi/ripgrep/issues/1741):
|
||||
Fix stdin detection when using PowerShell in UNIX environments.
|
||||
* [BUG #1756](https://github.com/BurntSushi/ripgrep/pull/1756):
|
||||
Fix bug where `foo/**` would match `foo`, but it shouldn't.
|
||||
* [BUG #1765](https://github.com/BurntSushi/ripgrep/issues/1765):
|
||||
Fix panic when `--crlf` is used in some cases.
|
||||
* [BUG #1638](https://github.com/BurntSushi/ripgrep/issues/1638):
|
||||
Correctly sniff UTF-8 and do transcoding, like we do for UTF-16.
|
||||
* [BUG #1816](https://github.com/BurntSushi/ripgrep/issues/1816):
|
||||
Add documentation for glob alternate syntax, e.g., `{a,b,..}`.
|
||||
* [BUG #1847](https://github.com/BurntSushi/ripgrep/issues/1847):
|
||||
Clarify how the `--hidden` flag works.
|
||||
* [BUG #1866](https://github.com/BurntSushi/ripgrep/issues/1866#issuecomment-841635553):
|
||||
Fix bug when computing column numbers in `--vimgrep` mode.
|
||||
* [BUG #1868](https://github.com/BurntSushi/ripgrep/issues/1868):
|
||||
Fix bug where `--passthru` and `-A/-B/-C` did not override each other.
|
||||
* [BUG #1869](https://github.com/BurntSushi/ripgrep/pull/1869):
|
||||
Clarify docs for `--files-with-matches` and `--files-without-match`.
|
||||
* [BUG #1878](https://github.com/BurntSushi/ripgrep/issues/1878):
|
||||
Fix bug where `\A` could produce unanchored matches in multiline search.
|
||||
* [BUG 94e4b8e3](https://github.com/BurntSushi/ripgrep/commit/94e4b8e3):
|
||||
Fix column numbers with `--vimgrep` is used with `-U/--multiline`.
|
||||
|
||||
|
||||
12.1.1 (2020-05-29)
|
||||
|
241
Cargo.lock
generated
241
Cargo.lock
generated
@@ -1,10 +1,12 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.14"
|
||||
version = "0.7.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b476ce7103678b0c6d3d395dbbae31d48ff910bd28be979ba5d48c6351131d0d"
|
||||
checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
@@ -20,62 +22,45 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.13.0"
|
||||
version = "0.20.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
|
||||
checksum = "0ea22880d78093b0cbe17c89f64a7d457941e65759157ec6cb31a31d652b05e5"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
version = "1.2.1"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
|
||||
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "0.2.14"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "473fc6b38233f9af7baa94fb5852dca389e3d95b8e21c8e3719301462c5d9faf"
|
||||
checksum = "b45ea9b00a7b3f2988e9a65ad3917e62123c38dba709b666506207be96d1790b"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"memchr",
|
||||
"once_cell",
|
||||
"regex-automata",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytecount"
|
||||
version = "0.6.0"
|
||||
version = "0.6.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b0017894339f586ccb943b01b9555de56770c11cda818e7e3d8bd93f4ed7f46e"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
|
||||
checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.61"
|
||||
version = "1.0.78"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ed67cbde08356238e75fc4656be4749481eeffb09e19f320a25237d5221c985d"
|
||||
checksum = "a20104e2335ce8a659d6dd92a51a767a0c062599c73b343fd152cb401e828c3d"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
version = "1.0.0"
|
||||
@@ -84,9 +69,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "clap"
|
||||
version = "2.33.3"
|
||||
version = "2.34.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
|
||||
checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c"
|
||||
dependencies = [
|
||||
"bitflags",
|
||||
"strsim",
|
||||
@@ -94,41 +79,32 @@ dependencies = [
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "const_fn"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ce90df4c658c62f12d78f7508cf92f9173e5184a539c10bfe54a3107b3ffd0f2"
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.0"
|
||||
version = "0.5.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775"
|
||||
checksum = "c2dd04ddaf88237dc3b8d8f9a3c1004b506b54b3313403944054d23c0870c521"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.0"
|
||||
version = "0.8.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec91540d98355f690a86367e566ecad2e9e579f230230eb7c21398372be73ea5"
|
||||
checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if 1.0.0",
|
||||
"const_fn",
|
||||
"lazy_static",
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.26"
|
||||
version = "0.8.31"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "801bbab217d7f79c0062f4f7205b5d4427c6d1a7bd7aafdd1475f7c59d62b283"
|
||||
checksum = "9852635589dc9f9ea1b6fe9f05b50ef208c85c834a562f0c6abb1c475736ec2b"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if",
|
||||
"packed_simd_2",
|
||||
]
|
||||
|
||||
@@ -161,7 +137,7 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||
|
||||
[[package]]
|
||||
name = "globset"
|
||||
version = "0.4.6"
|
||||
version = "0.4.10"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"bstr",
|
||||
@@ -176,7 +152,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "grep"
|
||||
version = "0.2.7"
|
||||
version = "0.2.11"
|
||||
dependencies = [
|
||||
"grep-cli",
|
||||
"grep-matcher",
|
||||
@@ -190,7 +166,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "grep-cli"
|
||||
version = "0.1.5"
|
||||
version = "0.1.7"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"bstr",
|
||||
@@ -205,7 +181,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "grep-matcher"
|
||||
version = "0.1.4"
|
||||
version = "0.1.6"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"regex",
|
||||
@@ -213,7 +189,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "grep-pcre2"
|
||||
version = "0.1.4"
|
||||
version = "0.1.6"
|
||||
dependencies = [
|
||||
"grep-matcher",
|
||||
"pcre2",
|
||||
@@ -221,7 +197,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "grep-printer"
|
||||
version = "0.1.5"
|
||||
version = "0.1.7"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"bstr",
|
||||
@@ -229,14 +205,13 @@ dependencies = [
|
||||
"grep-regex",
|
||||
"grep-searcher",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"termcolor",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-regex"
|
||||
version = "0.1.8"
|
||||
version = "0.1.11"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"bstr",
|
||||
@@ -249,7 +224,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "grep-searcher"
|
||||
version = "0.1.7"
|
||||
version = "0.1.11"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"bytecount",
|
||||
@@ -258,25 +233,24 @@ dependencies = [
|
||||
"grep-matcher",
|
||||
"grep-regex",
|
||||
"log",
|
||||
"memmap",
|
||||
"memmap2",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.17"
|
||||
version = "0.1.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5aca5565f760fb5b220e499d72710ed156fdb74e631659e99377d9ebfbd13ae8"
|
||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ignore"
|
||||
version = "0.4.17"
|
||||
version = "0.4.19"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-utils",
|
||||
"globset",
|
||||
"lazy_static",
|
||||
"log",
|
||||
@@ -290,15 +264,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.6"
|
||||
version = "1.0.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6"
|
||||
checksum = "fad582f4b9e86b6caa621cabeb0963332d92eea04729ab12892c2533951e6440"
|
||||
|
||||
[[package]]
|
||||
name = "jemalloc-sys"
|
||||
version = "0.3.2"
|
||||
version = "0.5.2+5.3.0-patched"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0d3b9f3f5c9b31aa0f5ed3260385ac205db665baa41d49bb8338008ae94ede45"
|
||||
checksum = "134163979b6eed9564c98637b710b40979939ba351f59952708234ea11b5f3f8"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"fs_extra",
|
||||
@@ -307,9 +281,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "jemallocator"
|
||||
version = "0.3.2"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43ae63fcfc45e99ab3d1b29a46782ad679e98436c3169d15a167a1108a724b69"
|
||||
checksum = "16c2514137880c52b0b4822b563fadd38257c1f380858addb74a400889696ea6"
|
||||
dependencies = [
|
||||
"jemalloc-sys",
|
||||
"libc",
|
||||
@@ -317,9 +291,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.21"
|
||||
version = "0.1.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5c71313ebb9439f74b00d9d2dcec36440beaf57a6aa0623068441dd7cd81a7f2"
|
||||
checksum = "068b1ee6743e4d11fb9c6a1e6064b3693a1b600e7f5f5988047d98b3dc9fb90b"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
@@ -332,9 +306,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.80"
|
||||
version = "0.2.139"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614"
|
||||
checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
@@ -344,46 +318,41 @@ checksum = "7fc7aa29613bd6a620df431842069224d8bc9011086b1db4c0e0cd47fa03ec9a"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.11"
|
||||
version = "0.4.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
|
||||
checksum = "abb12e687cfb44aa40f41fc3978ef76448f9b6038cad6aef4259d3c095a2382e"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.10",
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.3.4"
|
||||
version = "2.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
|
||||
[[package]]
|
||||
name = "memmap"
|
||||
version = "0.7.0"
|
||||
name = "memmap2"
|
||||
version = "0.5.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
|
||||
checksum = "4b182332558b18d807c4ce1ca8ca983b34c3ee32765e47b3f0f69b90355cc1dc"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "num_cpus"
|
||||
version = "1.13.0"
|
||||
name = "once_cell"
|
||||
version = "1.17.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
]
|
||||
checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66"
|
||||
|
||||
[[package]]
|
||||
name = "packed_simd_2"
|
||||
version = "0.3.4"
|
||||
version = "0.3.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3278e0492f961fd4ae70909f56b2723a7e8d01a228427294e19cdfdebda89a17"
|
||||
checksum = "a1914cd452d8fccd6f9db48147b29fd4ae05bea9dc5d9ad578509f72415de282"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.10",
|
||||
"cfg-if",
|
||||
"libm",
|
||||
]
|
||||
|
||||
@@ -412,58 +381,54 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.19"
|
||||
version = "0.3.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c"
|
||||
checksum = "6ac9a59f73473f1b8d852421e59e64809f025994837ef743615c6d0c5b305160"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.24"
|
||||
version = "1.0.49"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71"
|
||||
checksum = "57a8eca9f9c4ffde41714334dee777596264c7825420f521abc92b5b5deb63a5"
|
||||
dependencies = [
|
||||
"unicode-xid",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.7"
|
||||
version = "1.0.23"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
|
||||
checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.4.2"
|
||||
version = "1.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c"
|
||||
checksum = "e076559ef8e241f2ae3479e36f97bd5741c0330689e217ad51ce2c76808b868a"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
"thread_local",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.1.9"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.21"
|
||||
version = "0.6.28"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189"
|
||||
checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
|
||||
|
||||
[[package]]
|
||||
name = "ripgrep"
|
||||
version = "12.1.1"
|
||||
version = "13.0.0"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"clap",
|
||||
@@ -472,7 +437,6 @@ dependencies = [
|
||||
"jemallocator",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"num_cpus",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
@@ -483,9 +447,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.5"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e"
|
||||
checksum = "7b4b9743ed687d4b4bcedf9ff5eaa7398495ae14e61cba0a295704edbc7decde"
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
@@ -498,15 +462,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.117"
|
||||
version = "1.0.152"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b88fa983de7720629c9387e9f517353ed404164b1e482c970a90c1a4aaf7dc1a"
|
||||
checksum = "bb7d1f0d3021d347a83e556fc4683dea2ea09d87bccdf88ff5c12545d89d5efb"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.117"
|
||||
version = "1.0.152"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cbd1ae72adb44aab48f325a02444a5fc079349a8d804c1fc922aed3f7454c74e"
|
||||
checksum = "af487d118eecd09402d70a5d72551860e788df87b464af30e5ea6a38c75c541e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -515,9 +482,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.59"
|
||||
version = "1.0.91"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dcac07dbffa1c65e7f816ab9eba78eb142c6d44410f4eeba1e26e4f5dfa56b95"
|
||||
checksum = "877c235533714907a8c2464236f5c4b2a17262ef1bd71f38f35ea592c8da6883"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
@@ -532,20 +499,20 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.48"
|
||||
version = "1.0.107"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cc371affeffc477f42a221a1e4297aedcea33d47d19b61455588bd9d8f6b19ac"
|
||||
checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-xid",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.1.0"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "bb6bfa289a4d7c5766392812c0a1f4c1ba45afa1ad47803c11e1f407d846d75f"
|
||||
checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
@@ -561,30 +528,30 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "1.0.1"
|
||||
version = "1.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
|
||||
checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.8"
|
||||
name = "unicode-ident"
|
||||
version = "1.0.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
|
||||
checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.2.1"
|
||||
name = "unicode-width"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
|
||||
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.3.1"
|
||||
version = "2.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d"
|
||||
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
|
||||
dependencies = [
|
||||
"same-file",
|
||||
"winapi",
|
||||
|
19
Cargo.toml
19
Cargo.toml
@@ -1,16 +1,15 @@
|
||||
[package]
|
||||
name = "ripgrep"
|
||||
version = "12.1.1" #:version
|
||||
version = "13.0.0" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
ripgrep is a line-oriented search tool that recursively searches your current
|
||||
directory for a regex pattern while respecting your gitignore rules. ripgrep
|
||||
has first class support on Windows, macOS and Linux.
|
||||
ripgrep is a line-oriented search tool that recursively searches the current
|
||||
directory for a regex pattern while respecting gitignore rules. ripgrep has
|
||||
first class support on Windows, macOS and Linux.
|
||||
"""
|
||||
documentation = "https://github.com/BurntSushi/ripgrep"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
categories = ["command-line-utilities", "text-processing"]
|
||||
license = "Unlicense OR MIT"
|
||||
@@ -18,6 +17,7 @@ exclude = ["HomebrewFormula"]
|
||||
build = "build.rs"
|
||||
autotests = false
|
||||
edition = "2018"
|
||||
rust-version = "1.65"
|
||||
|
||||
[[bin]]
|
||||
bench = false
|
||||
@@ -42,12 +42,11 @@ members = [
|
||||
]
|
||||
|
||||
[dependencies]
|
||||
bstr = "0.2.12"
|
||||
grep = { version = "0.2.7", path = "crates/grep" }
|
||||
ignore = { version = "0.4.16", path = "crates/ignore" }
|
||||
bstr = "1.1.0"
|
||||
grep = { version = "0.2.8", path = "crates/grep" }
|
||||
ignore = { version = "0.4.19", path = "crates/ignore" }
|
||||
lazy_static = "1.1.0"
|
||||
log = "0.4.5"
|
||||
num_cpus = "1.8.0"
|
||||
regex = "1.3.5"
|
||||
serde_json = "1.0.23"
|
||||
termcolor = "1.1.0"
|
||||
@@ -58,7 +57,7 @@ default-features = false
|
||||
features = ["suggestions"]
|
||||
|
||||
[target.'cfg(all(target_env = "musl", target_pointer_width = "64"))'.dependencies.jemallocator]
|
||||
version = "0.3.0"
|
||||
version = "0.5.0"
|
||||
|
||||
[build-dependencies]
|
||||
lazy_static = "1.1.0"
|
||||
|
21
GUIDE.md
21
GUIDE.md
@@ -177,16 +177,21 @@ After recursive search, ripgrep's most important feature is what it *doesn't*
|
||||
search. By default, when you search a directory, ripgrep will ignore all of
|
||||
the following:
|
||||
|
||||
1. Files and directories that match the rules in your `.gitignore` glob
|
||||
pattern.
|
||||
1. Files and directories that match glob patterns in these three categories:
|
||||
1. gitignore globs (including global and repo-specific globs).
|
||||
2. `.ignore` globs, which take precedence over all gitignore globs
|
||||
when there's a conflict.
|
||||
3. `.rgignore` globs, which take precedence over all `.ignore` globs
|
||||
when there's a conflict.
|
||||
2. Hidden files and directories.
|
||||
3. Binary files. (ripgrep considers any file with a `NUL` byte to be binary.)
|
||||
4. Symbolic links aren't followed.
|
||||
|
||||
All of these things can be toggled using various flags provided by ripgrep:
|
||||
|
||||
1. You can disable `.gitignore` handling with the `--no-ignore` flag.
|
||||
2. Hidden files and directories can be searched with the `--hidden` flag.
|
||||
1. You can disable all ignore-related filtering with the `--no-ignore` flag.
|
||||
2. Hidden files and directories can be searched with the `--hidden` (`-.` for
|
||||
short) flag.
|
||||
3. Binary files can be searched via the `--text` (`-a` for short) flag.
|
||||
Be careful with this flag! Binary files may emit control characters to your
|
||||
terminal, which might cause strange behavior.
|
||||
@@ -644,9 +649,9 @@ given, which is the default:
|
||||
they correspond to a UTF-16 BOM, then ripgrep will transcode the contents of
|
||||
the file from UTF-16 to UTF-8, and then execute the search on the transcoded
|
||||
version of the file. (This incurs a performance penalty since transcoding
|
||||
is slower than regex searching.) If the file contains invalid UTF-16, then
|
||||
the Unicode replacement codepoint is substituted in place of invalid code
|
||||
units.
|
||||
is needed in addition to regex searching.) If the file contains invalid
|
||||
UTF-16, then the Unicode replacement codepoint is substituted in place of
|
||||
invalid code units.
|
||||
* To handle other cases, ripgrep provides a `-E/--encoding` flag, which permits
|
||||
you to specify an encoding from the
|
||||
[Encoding Standard](https://encoding.spec.whatwg.org/#concept-encoding-get).
|
||||
@@ -988,6 +993,8 @@ used options that will likely impact how you use ripgrep on a regular basis.
|
||||
* `-S/--smart-case`: This is similar to `--ignore-case`, but disables itself
|
||||
if the pattern contains any uppercase letters. Usually this flag is put into
|
||||
alias or a config file.
|
||||
* `-F/--fixed-strings`: Disable regular expression matching and treat the pattern
|
||||
as a literal string.
|
||||
* `-w/--word-regexp`: Require that all matches of the pattern be surrounded
|
||||
by word boundaries. That is, given `pattern`, the `--word-regexp` flag will
|
||||
cause ripgrep to behave as if `pattern` were actually `\b(?:pattern)\b`.
|
||||
|
80
README.md
80
README.md
@@ -1,12 +1,12 @@
|
||||
ripgrep (rg)
|
||||
------------
|
||||
ripgrep is a line-oriented search tool that recursively searches your current
|
||||
directory for a regex pattern. By default, ripgrep will respect your .gitignore
|
||||
and automatically skip hidden files/directories and binary files. ripgrep
|
||||
has first class support on Windows, macOS and Linux, with binary downloads
|
||||
available for [every release](https://github.com/BurntSushi/ripgrep/releases).
|
||||
ripgrep is similar to other popular search tools like The Silver Searcher, ack
|
||||
and grep.
|
||||
ripgrep is a line-oriented search tool that recursively searches the current
|
||||
directory for a regex pattern. By default, ripgrep will respect gitignore rules
|
||||
and automatically skip hidden files/directories and binary files. (To disable
|
||||
all automatic filtering by default, use `rg -uuu`.) ripgrep has first class
|
||||
support on Windows, macOS and Linux, with binary downloads available for [every
|
||||
release](https://github.com/BurntSushi/ripgrep/releases). ripgrep is similar to
|
||||
other popular search tools like The Silver Searcher, ack and grep.
|
||||
|
||||
[](https://github.com/BurntSushi/ripgrep/actions)
|
||||
[](https://crates.io/crates/ripgrep)
|
||||
@@ -90,16 +90,16 @@ times are unaffected by the presence or absence of `-n`.
|
||||
because it contains most of their features and is generally faster. (See
|
||||
[the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly
|
||||
replace grep.)
|
||||
* Like other tools specialized to code search, ripgrep defaults to recursive
|
||||
directory search and won't search files ignored by your
|
||||
`.gitignore`/`.ignore`/`.rgignore` files. It also ignores hidden and binary
|
||||
files by default. ripgrep also implements full support for `.gitignore`,
|
||||
whereas there are many bugs related to that functionality in other code
|
||||
search tools claiming to provide the same functionality.
|
||||
* ripgrep can search specific types of files. For example, `rg -tpy foo`
|
||||
limits your search to Python files and `rg -Tjs foo` excludes JavaScript
|
||||
files from your search. ripgrep can be taught about new file types with
|
||||
custom matching rules.
|
||||
* Like other tools specialized to code search, ripgrep defaults to
|
||||
[recursive search](GUIDE.md#recursive-search) and does [automatic
|
||||
filtering](GUIDE.md#automatic-filtering). Namely, ripgrep won't search files
|
||||
ignored by your `.gitignore`/`.ignore`/`.rgignore` files, it won't search
|
||||
hidden files and it won't search binary files. Automatic filtering can be
|
||||
disabled with `rg -uuu`.
|
||||
* ripgrep can [search specific types of files](GUIDE.md#manual-filtering-file-types).
|
||||
For example, `rg -tpy foo` limits your search to Python files and `rg -Tjs
|
||||
foo` excludes JavaScript files from your search. ripgrep can be taught about
|
||||
new file types with custom matching rules.
|
||||
* ripgrep supports many features found in `grep`, such as showing the context
|
||||
of search results, searching multiple patterns, highlighting matches with
|
||||
color and full Unicode support. Unlike GNU grep, ripgrep stays fast while
|
||||
@@ -110,16 +110,20 @@ times are unaffected by the presence or absence of `-n`.
|
||||
regex engine. PCRE2 support can be enabled with `-P/--pcre2` (use PCRE2
|
||||
always) or `--auto-hybrid-regex` (use PCRE2 only if needed). An alternative
|
||||
syntax is provided via the `--engine (default|pcre2|auto-hybrid)` option.
|
||||
* ripgrep supports searching files in text encodings other than UTF-8, such
|
||||
as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for
|
||||
automatically detecting UTF-16 is provided. Other text encodings must be
|
||||
specifically specified with the `-E/--encoding` flag.)
|
||||
* ripgrep has [rudimentary support for replacements](GUIDE.md#replacements),
|
||||
which permit rewriting output based on what was matched.
|
||||
* ripgrep supports [searching files in text encodings](GUIDE.md#file-encoding)
|
||||
other than UTF-8, such as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more.
|
||||
(Some support for automatically detecting UTF-16 is provided. Other text
|
||||
encodings must be specifically specified with the `-E/--encoding` flag.)
|
||||
* ripgrep supports searching files compressed in a common format (brotli,
|
||||
bzip2, gzip, lz4, lzma, xz, or zstandard) with the `-z/--search-zip` flag.
|
||||
* ripgrep supports
|
||||
[arbitrary input preprocessing filters](GUIDE.md#preprocessor)
|
||||
which could be PDF text extraction, less supported decompression, decrypting,
|
||||
automatic encoding detection and so on.
|
||||
* ripgrep can be configured via a
|
||||
[configuration file](GUIDE.md#configuration-file).
|
||||
|
||||
In other words, use ripgrep if you like speed, filtering by default, fewer
|
||||
bugs and Unicode support.
|
||||
@@ -192,15 +196,9 @@ multiline search and opt-in fancy regex support via PCRE2.
|
||||
The binary name for ripgrep is `rg`.
|
||||
|
||||
**[Archives of precompiled binaries for ripgrep are available for Windows,
|
||||
macOS and Linux.](https://github.com/BurntSushi/ripgrep/releases)** Users of
|
||||
platforms not explicitly mentioned below are advised to download one of these
|
||||
archives.
|
||||
|
||||
Linux binaries are static executables. Windows binaries are available either as
|
||||
built with MinGW (GNU) or with Microsoft Visual C++ (MSVC). When possible,
|
||||
prefer MSVC over GNU, but you'll need to have the [Microsoft VC++ 2015
|
||||
redistributable](https://www.microsoft.com/en-us/download/details.aspx?id=48145)
|
||||
installed.
|
||||
macOS and Linux.](https://github.com/BurntSushi/ripgrep/releases)** Linux and
|
||||
Windows binaries are static executables. Users of platforms not explicitly
|
||||
mentioned below are advised to download one of these archives.
|
||||
|
||||
If you're a **macOS Homebrew** or a **Linuxbrew** user, then you can install
|
||||
ripgrep from homebrew-core:
|
||||
@@ -273,13 +271,20 @@ $ nix-env --install ripgrep
|
||||
$ # (Or using the attribute name, which is also ripgrep.)
|
||||
```
|
||||
|
||||
If you're a **Guix** user, you can install ripgrep from the official
|
||||
package collection:
|
||||
|
||||
```
|
||||
$ guix install ripgrep
|
||||
```
|
||||
|
||||
If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**),
|
||||
then ripgrep can be installed using a binary `.deb` file provided in each
|
||||
[ripgrep release](https://github.com/BurntSushi/ripgrep/releases).
|
||||
|
||||
```
|
||||
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/12.1.1/ripgrep_12.1.1_amd64.deb
|
||||
$ sudo dpkg -i ripgrep_12.1.1_amd64.deb
|
||||
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/13.0.0/ripgrep_13.0.0_amd64.deb
|
||||
$ sudo dpkg -i ripgrep_13.0.0_amd64.deb
|
||||
```
|
||||
|
||||
If you run Debian Buster (currently Debian stable) or Debian sid, ripgrep is
|
||||
@@ -353,7 +358,7 @@ $ cargo install ripgrep
|
||||
|
||||
ripgrep is written in Rust, so you'll need to grab a
|
||||
[Rust installation](https://www.rust-lang.org/) in order to compile it.
|
||||
ripgrep compiles with Rust 1.34.0 (stable) or newer. In general, ripgrep tracks
|
||||
ripgrep compiles with Rust 1.65.0 (stable) or newer. In general, ripgrep tracks
|
||||
the latest stable release of the Rust compiler.
|
||||
|
||||
To build ripgrep:
|
||||
@@ -425,9 +430,18 @@ $ cargo test --all
|
||||
from the repository root.
|
||||
|
||||
|
||||
### Vulnerability reporting
|
||||
|
||||
For reporting a security vulnerability, please
|
||||
[contact Andrew Gallant](https://blog.burntsushi.net/about/),
|
||||
which has my email address and PGP public key if you wish to send an encrypted
|
||||
message.
|
||||
|
||||
|
||||
### Translations
|
||||
|
||||
The following is a list of known translations of ripgrep's documentation. These
|
||||
are unofficially maintained and may not be up to date.
|
||||
|
||||
* [Chinese](https://github.com/chinanf-boy/ripgrep-zh#%E6%9B%B4%E6%96%B0-)
|
||||
* [Spanish](https://github.com/UltiRequiem/traducciones/tree/master/ripgrep)
|
||||
|
@@ -1,9 +1,11 @@
|
||||
Release Checklist
|
||||
-----------------
|
||||
* Ensure local `master` is up to date with respect to `origin/master`.
|
||||
* Run `cargo update` and review dependency updates. Commit updated
|
||||
`Cargo.lock`.
|
||||
* Run `cargo outdated` and review semver incompatible updates. Unless there is
|
||||
a strong motivation otherwise, review and update every dependency.
|
||||
a strong motivation otherwise, review and update every dependency. Also
|
||||
run `--aggressive`, but don't update to crates that are still in beta.
|
||||
* Review changes for every crate in `crates` since the last ripgrep release.
|
||||
If the set of changes is non-empty, issue a new release for that crate. Check
|
||||
crates in the following order. After updating a crate, ensure minimal
|
||||
@@ -24,14 +26,19 @@ Release Checklist
|
||||
`cargo update -p ripgrep` so that the `Cargo.lock` is updated. Commit the
|
||||
changes and create a new signed tag. Alternatively, use
|
||||
`cargo-up --no-push --no-release Cargo.toml {VERSION}` to automate this.
|
||||
* Push changes to GitHub, NOT including the tag. (But do not publish new
|
||||
version of ripgrep to crates.io yet.)
|
||||
* Once CI for `master` finishes successfully, push the version tag. (Trying to
|
||||
do this in one step seems to result in GitHub Actions not seeing the tag
|
||||
push and thus not running the release workflow.)
|
||||
* Wait for CI to finish creating the release. If the release build fails, then
|
||||
delete the tag from GitHub, make fixes, re-tag, delete the release and push.
|
||||
* Copy the relevant section of the CHANGELOG to the tagged release notes.
|
||||
Include this blurb describing what ripgrep is:
|
||||
> In case you haven't heard of it before, ripgrep is a line-oriented search
|
||||
> tool that recursively searches your current directory for a regex pattern.
|
||||
> By default, ripgrep will respect your gitignore rules and automatically
|
||||
> skip hidden files/directories and binary files.
|
||||
> tool that recursively searches the current directory for a regex pattern.
|
||||
> By default, ripgrep will respect gitignore rules and automatically skip
|
||||
> hidden files/directories and binary files.
|
||||
* Run `ci/build-deb` locally and manually upload the deb package to the
|
||||
release.
|
||||
* Run `cargo publish`.
|
||||
|
@@ -26,15 +26,13 @@ SUBTITLES_DIR = 'subtitles'
|
||||
SUBTITLES_EN_NAME = 'en.txt'
|
||||
SUBTITLES_EN_NAME_SAMPLE = 'en.sample.txt'
|
||||
SUBTITLES_EN_NAME_GZ = '%s.gz' % SUBTITLES_EN_NAME
|
||||
# SUBTITLES_EN_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.en.gz' # noqa
|
||||
SUBTITLES_EN_URL = 'https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2016/mono/en.txt.gz' # noqa
|
||||
SUBTITLES_RU_NAME = 'ru.txt'
|
||||
SUBTITLES_RU_NAME_GZ = '%s.gz' % SUBTITLES_RU_NAME
|
||||
# SUBTITLES_RU_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.ru.gz' # noqa
|
||||
SUBTITLES_RU_URL = 'https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2016/mono/ru.txt.gz' # noqa
|
||||
|
||||
LINUX_DIR = 'linux'
|
||||
LINUX_CLONE = 'git://github.com/BurntSushi/linux'
|
||||
LINUX_CLONE = 'https://github.com/BurntSushi/linux'
|
||||
|
||||
# Grep takes locale settings from the environment. There is a *substantial*
|
||||
# performance impact for enabling Unicode, so we need to handle this explicitly
|
||||
@@ -546,7 +544,11 @@ def bench_subtitles_ru_literal(suite_dir):
|
||||
Command('rg (lines)', ['rg', '-n', pat, ru]),
|
||||
Command('ag (lines)', ['ag', '-s', pat, ru]),
|
||||
Command('grep (lines)', ['grep', '-n', pat, ru], env=GREP_ASCII),
|
||||
Command('ugrep (lines)', ['ugrep', '-n', pat, ru])
|
||||
# ugrep incorrectly identifies this corpus as binary, but it is
|
||||
# entirely valid UTF-8. So we tell ugrep to always treat the corpus
|
||||
# as text even though this technically gives it an edge over other
|
||||
# tools. (It no longer needs to check for binary data.)
|
||||
Command('ugrep (lines)', ['ugrep', '-a', '-n', pat, ru])
|
||||
])
|
||||
|
||||
|
||||
@@ -564,7 +566,8 @@ def bench_subtitles_ru_literal_casei(suite_dir):
|
||||
Command('grep (ASCII)', ['grep', '-E', '-i', pat, ru], env=GREP_ASCII),
|
||||
Command('rg (lines)', ['rg', '-n', '-i', pat, ru]),
|
||||
Command('ag (lines) (ASCII)', ['ag', '-i', pat, ru]),
|
||||
Command('ugrep (lines) (ASCII)', ['ugrep', '-n', '-i', pat, ru])
|
||||
# See bench_subtitles_ru_literal for why we use '-a' here.
|
||||
Command('ugrep (lines) (ASCII)', ['ugrep', '-a', '-n', '-i', pat, ru])
|
||||
])
|
||||
|
||||
|
||||
@@ -588,7 +591,8 @@ def bench_subtitles_ru_literal_word(suite_dir):
|
||||
Command('grep (ASCII)', [
|
||||
'grep', '-nw', pat, ru,
|
||||
], env=GREP_ASCII),
|
||||
Command('ugrep (ASCII)', ['ugrep', '-nw', pat, ru]),
|
||||
# See bench_subtitles_ru_literal for why we use '-a' here.
|
||||
Command('ugrep (ASCII)', ['ugrep', '-anw', pat, ru]),
|
||||
Command('rg', ['rg', '-nw', pat, ru]),
|
||||
Command('grep', ['grep', '-nw', pat, ru], env=GREP_UNICODE),
|
||||
])
|
||||
@@ -612,7 +616,8 @@ def bench_subtitles_ru_alternate(suite_dir):
|
||||
Command('rg (lines)', ['rg', '-n', pat, ru]),
|
||||
Command('ag (lines)', ['ag', '-s', pat, ru]),
|
||||
Command('grep (lines)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII),
|
||||
Command('ugrep (lines)', ['ugrep', '-n', pat, ru]),
|
||||
# See bench_subtitles_ru_literal for why we use '-a' here.
|
||||
Command('ugrep (lines)', ['ugrep', '-an', pat, ru]),
|
||||
Command('rg', ['rg', pat, ru]),
|
||||
Command('grep', ['grep', '-E', pat, ru], env=GREP_ASCII),
|
||||
])
|
||||
@@ -637,7 +642,8 @@ def bench_subtitles_ru_alternate_casei(suite_dir):
|
||||
Command('grep (ASCII)', [
|
||||
'grep', '-E', '-ni', pat, ru,
|
||||
], env=GREP_ASCII),
|
||||
Command('ugrep (ASCII)', ['ugrep', '-n', '-i', pat, ru]),
|
||||
# See bench_subtitles_ru_literal for why we use '-a' here.
|
||||
Command('ugrep (ASCII)', ['ugrep', '-ani', pat, ru]),
|
||||
Command('rg', ['rg', '-n', '-i', pat, ru]),
|
||||
Command('grep', ['grep', '-E', '-ni', pat, ru], env=GREP_UNICODE),
|
||||
])
|
||||
@@ -654,10 +660,11 @@ def bench_subtitles_ru_surrounding_words(suite_dir):
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', '-n', pat, ru]),
|
||||
Command('grep', ['grep', '-E', '-n', pat, ru], env=GREP_UNICODE),
|
||||
Command('ugrep', ['ugrep', '-n', pat, ru]),
|
||||
Command('ugrep', ['ugrep', '-an', pat, ru]),
|
||||
Command('ag (ASCII)', ['ag', '-s', pat, ru]),
|
||||
Command('grep (ASCII)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII),
|
||||
Command('ugrep (ASCII)', ['ugrep', '-n', '-U', pat, ru]),
|
||||
# See bench_subtitles_ru_literal for why we use '-a' here.
|
||||
Command('ugrep (ASCII)', ['ugrep', '-a', '-n', '-U', pat, ru]),
|
||||
])
|
||||
|
||||
|
||||
@@ -676,11 +683,13 @@ def bench_subtitles_ru_no_literal(suite_dir):
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', '-n', pat, ru]),
|
||||
Command('ugrep', ['ugrep', '-n', pat, ru]),
|
||||
# See bench_subtitles_ru_literal for why we use '-a' here.
|
||||
Command('ugrep', ['ugrep', '-an', pat, ru]),
|
||||
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, ru]),
|
||||
Command('ag (ASCII)', ['ag', '-s', pat, ru]),
|
||||
Command('grep (ASCII)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII),
|
||||
Command('ugrep (ASCII)', ['ugrep', '-n', '-U', pat, ru])
|
||||
# See bench_subtitles_ru_literal for why we use '-a' here.
|
||||
Command('ugrep (ASCII)', ['ugrep', '-anU', pat, ru])
|
||||
])
|
||||
|
||||
|
||||
|
38
benchsuite/runs/2022-12-16-archlinux-duff/README.md
Normal file
38
benchsuite/runs/2022-12-16-archlinux-duff/README.md
Normal file
@@ -0,0 +1,38 @@
|
||||
This directory contains updated benchmarks as of 2022-12-16. They were captured
|
||||
via the benchsuite script at `benchsuite/benchsuite` from the root of this
|
||||
repository. The command that was run:
|
||||
|
||||
$ ./benchsuite \
|
||||
--dir /dev/shm/benchsuite \
|
||||
--raw runs/2022-12-16-archlinux-duff/raw.csv \
|
||||
| tee runs/2022-12-16-archlinux-duff/summary
|
||||
|
||||
The versions of each tool are as follows:
|
||||
|
||||
$ rg --version
|
||||
ripgrep 13.0.0 (rev 87c4a2b4b1)
|
||||
-SIMD -AVX (compiled)
|
||||
+SIMD +AVX (runtime)
|
||||
|
||||
$ grep -V
|
||||
grep (GNU grep) 3.8
|
||||
|
||||
$ ag -V
|
||||
ag version 2.2.0
|
||||
|
||||
Features:
|
||||
+jit +lzma +zlib
|
||||
|
||||
$ git --version
|
||||
git version 2.39.0
|
||||
|
||||
$ ugrep --version
|
||||
ugrep 3.9.2 x86_64-pc-linux-gnu +avx2 +pcre2jit +zlib +bzip2 +lzma +lz4 +zstd
|
||||
License BSD-3-Clause: <https://opensource.org/licenses/BSD-3-Clause>
|
||||
Written by Robert van Engelen and others: <https://github.com/Genivia/ugrep>
|
||||
|
||||
The version of ripgrep used was compiled from source on commit 7f23cd63:
|
||||
|
||||
$ cargo build --release --features 'pcre2'
|
||||
|
||||
This was run on a machine with an Intel i9-12900K with 128GB of memory.
|
400
benchsuite/runs/2022-12-16-archlinux-duff/raw.csv
Normal file
400
benchsuite/runs/2022-12-16-archlinux-duff/raw.csv
Normal file
@@ -0,0 +1,400 @@
|
||||
benchmark,warmup_iter,iter,name,command,duration,lines,env
|
||||
linux_literal_default,1,3,rg,rg PM_RESUME,0.08678817749023438,39,
|
||||
linux_literal_default,1,3,rg,rg PM_RESUME,0.08307123184204102,39,
|
||||
linux_literal_default,1,3,rg,rg PM_RESUME,0.08347964286804199,39,
|
||||
linux_literal_default,1,3,ag,ag PM_RESUME,0.2955434322357178,39,
|
||||
linux_literal_default,1,3,ag,ag PM_RESUME,0.2954287528991699,39,
|
||||
linux_literal_default,1,3,ag,ag PM_RESUME,0.2938194274902344,39,
|
||||
linux_literal_default,1,3,git grep,git grep PM_RESUME,0.23198556900024414,39,LC_ALL=en_US.UTF-8
|
||||
linux_literal_default,1,3,git grep,git grep PM_RESUME,0.22356963157653809,39,LC_ALL=en_US.UTF-8
|
||||
linux_literal_default,1,3,git grep,git grep PM_RESUME,0.2189793586730957,39,LC_ALL=en_US.UTF-8
|
||||
linux_literal_default,1,3,ugrep,ugrep -r PM_RESUME ./,0.10710000991821289,39,
|
||||
linux_literal_default,1,3,ugrep,ugrep -r PM_RESUME ./,0.10364222526550293,39,
|
||||
linux_literal_default,1,3,ugrep,ugrep -r PM_RESUME ./,0.1052248477935791,39,
|
||||
linux_literal_default,1,3,grep,grep -r PM_RESUME ./,0.9994468688964844,39,LC_ALL=en_US.UTF-8
|
||||
linux_literal_default,1,3,grep,grep -r PM_RESUME ./,0.9939279556274414,39,LC_ALL=en_US.UTF-8
|
||||
linux_literal_default,1,3,grep,grep -r PM_RESUME ./,0.9957931041717529,39,LC_ALL=en_US.UTF-8
|
||||
linux_literal,1,3,rg,rg -n PM_RESUME,0.08603358268737793,39,
|
||||
linux_literal,1,3,rg,rg -n PM_RESUME,0.0837090015411377,39,
|
||||
linux_literal,1,3,rg,rg -n PM_RESUME,0.08435535430908203,39,
|
||||
linux_literal,1,3,rg (mmap),rg -n --mmap PM_RESUME,0.3215503692626953,39,
|
||||
linux_literal,1,3,rg (mmap),rg -n --mmap PM_RESUME,0.32426929473876953,39,
|
||||
linux_literal,1,3,rg (mmap),rg -n --mmap PM_RESUME,0.3215982913970947,39,
|
||||
linux_literal,1,3,ag (mmap),ag -s PM_RESUME,0.2894856929779053,39,
|
||||
linux_literal,1,3,ag (mmap),ag -s PM_RESUME,0.2892603874206543,39,
|
||||
linux_literal,1,3,ag (mmap),ag -s PM_RESUME,0.29217028617858887,39,
|
||||
linux_literal,1,3,git grep,git grep -I -n PM_RESUME,0.206068754196167,39,LC_ALL=C
|
||||
linux_literal,1,3,git grep,git grep -I -n PM_RESUME,0.2218036651611328,39,LC_ALL=C
|
||||
linux_literal,1,3,git grep,git grep -I -n PM_RESUME,0.20590710639953613,39,LC_ALL=C
|
||||
linux_literal,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n PM_RESUME ./,0.18692874908447266,39,
|
||||
linux_literal,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n PM_RESUME ./,0.19518327713012695,39,
|
||||
linux_literal,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n PM_RESUME ./,0.18577361106872559,39,
|
||||
linux_literal_casei,1,3,rg,rg -n -i PM_RESUME,0.08709383010864258,536,
|
||||
linux_literal_casei,1,3,rg,rg -n -i PM_RESUME,0.08861064910888672,536,
|
||||
linux_literal_casei,1,3,rg,rg -n -i PM_RESUME,0.08769798278808594,536,
|
||||
linux_literal_casei,1,3,rg (mmap),rg -n -i --mmap PM_RESUME,0.3218965530395508,536,
|
||||
linux_literal_casei,1,3,rg (mmap),rg -n -i --mmap PM_RESUME,0.30869364738464355,536,
|
||||
linux_literal_casei,1,3,rg (mmap),rg -n -i --mmap PM_RESUME,0.31044936180114746,536,
|
||||
linux_literal_casei,1,3,ag (mmap),ag -i PM_RESUME,0.2989068031311035,536,
|
||||
linux_literal_casei,1,3,ag (mmap),ag -i PM_RESUME,0.2996039390563965,536,
|
||||
linux_literal_casei,1,3,ag (mmap),ag -i PM_RESUME,0.29817700386047363,536,
|
||||
linux_literal_casei,1,3,git grep,git grep -I -n -i PM_RESUME,0.2122786045074463,536,LC_ALL=C
|
||||
linux_literal_casei,1,3,git grep,git grep -I -n -i PM_RESUME,0.20763754844665527,536,LC_ALL=C
|
||||
linux_literal_casei,1,3,git grep,git grep -I -n -i PM_RESUME,0.220794677734375,536,LC_ALL=C
|
||||
linux_literal_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i PM_RESUME ./,0.17305850982666016,536,
|
||||
linux_literal_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i PM_RESUME ./,0.1745915412902832,536,
|
||||
linux_literal_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i PM_RESUME ./,0.17526865005493164,536,
|
||||
linux_re_literal_suffix,1,3,rg,rg -n [A-Z]+_RESUME,0.08527851104736328,2160,
|
||||
linux_re_literal_suffix,1,3,rg,rg -n [A-Z]+_RESUME,0.08487534523010254,2160,
|
||||
linux_re_literal_suffix,1,3,rg,rg -n [A-Z]+_RESUME,0.0848684310913086,2160,
|
||||
linux_re_literal_suffix,1,3,ag,ag -s [A-Z]+_RESUME,0.37945985794067383,2160,
|
||||
linux_re_literal_suffix,1,3,ag,ag -s [A-Z]+_RESUME,0.36303210258483887,2160,
|
||||
linux_re_literal_suffix,1,3,ag,ag -s [A-Z]+_RESUME,0.36359691619873047,2160,
|
||||
linux_re_literal_suffix,1,3,git grep,git grep -E -I -n [A-Z]+_RESUME,0.9589834213256836,2160,LC_ALL=C
|
||||
linux_re_literal_suffix,1,3,git grep,git grep -E -I -n [A-Z]+_RESUME,0.9206984043121338,2160,LC_ALL=C
|
||||
linux_re_literal_suffix,1,3,git grep,git grep -E -I -n [A-Z]+_RESUME,0.8642933368682861,2160,LC_ALL=C
|
||||
linux_re_literal_suffix,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n [A-Z]+_RESUME ./,0.40503501892089844,2160,
|
||||
linux_re_literal_suffix,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n [A-Z]+_RESUME ./,0.4531714916229248,2160,
|
||||
linux_re_literal_suffix,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n [A-Z]+_RESUME ./,0.4397866725921631,2160,
|
||||
linux_word,1,3,rg,rg -n -w PM_RESUME,0.08639907836914062,9,
|
||||
linux_word,1,3,rg,rg -n -w PM_RESUME,0.08583569526672363,9,
|
||||
linux_word,1,3,rg,rg -n -w PM_RESUME,0.08414363861083984,9,
|
||||
linux_word,1,3,ag,ag -s -w PM_RESUME,0.2853865623474121,9,
|
||||
linux_word,1,3,ag,ag -s -w PM_RESUME,0.2871377468109131,9,
|
||||
linux_word,1,3,ag,ag -s -w PM_RESUME,0.28753662109375,9,
|
||||
linux_word,1,3,git grep,git grep -E -I -n -w PM_RESUME,0.20428204536437988,9,LC_ALL=C
|
||||
linux_word,1,3,git grep,git grep -E -I -n -w PM_RESUME,0.20490717887878418,9,LC_ALL=C
|
||||
linux_word,1,3,git grep,git grep -E -I -n -w PM_RESUME,0.20840072631835938,9,LC_ALL=C
|
||||
linux_word,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -w PM_RESUME ./,0.18790841102600098,9,
|
||||
linux_word,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -w PM_RESUME ./,0.18659543991088867,9,
|
||||
linux_word,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -w PM_RESUME ./,0.19104933738708496,9,
|
||||
linux_unicode_greek,1,3,rg,rg -n \p{Greek},0.19976496696472168,105,
|
||||
linux_unicode_greek,1,3,rg,rg -n \p{Greek},0.20618367195129395,105,
|
||||
linux_unicode_greek,1,3,rg,rg -n \p{Greek},0.19702935218811035,105,
|
||||
linux_unicode_greek,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \p{Greek} ./,0.17758727073669434,105,
|
||||
linux_unicode_greek,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \p{Greek} ./,0.17793798446655273,105,
|
||||
linux_unicode_greek,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \p{Greek} ./,0.1872577667236328,105,
|
||||
linux_unicode_greek_casei,1,3,rg,rg -n -i \p{Greek},0.19808244705200195,245,
|
||||
linux_unicode_greek_casei,1,3,rg,rg -n -i \p{Greek},0.1979837417602539,245,
|
||||
linux_unicode_greek_casei,1,3,rg,rg -n -i \p{Greek},0.1984400749206543,245,
|
||||
linux_unicode_greek_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i \p{Greek} ./,0.1819148063659668,105,
|
||||
linux_unicode_greek_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i \p{Greek} ./,0.17530512809753418,105,
|
||||
linux_unicode_greek_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i \p{Greek} ./,0.17999005317687988,105,
|
||||
linux_unicode_word,1,3,rg,rg -n \wAh,0.08527827262878418,247,
|
||||
linux_unicode_word,1,3,rg,rg -n \wAh,0.08541679382324219,247,
|
||||
linux_unicode_word,1,3,rg,rg -n \wAh,0.08553218841552734,247,
|
||||
linux_unicode_word,1,3,rg (ASCII),rg -n (?-u)\wAh,0.08484745025634766,233,
|
||||
linux_unicode_word,1,3,rg (ASCII),rg -n (?-u)\wAh,0.08466482162475586,233,
|
||||
linux_unicode_word,1,3,rg (ASCII),rg -n (?-u)\wAh,0.08487439155578613,233,
|
||||
linux_unicode_word,1,3,ag (ASCII),ag -s \wAh,0.3061795234680176,233,
|
||||
linux_unicode_word,1,3,ag (ASCII),ag -s \wAh,0.2993617057800293,233,
|
||||
linux_unicode_word,1,3,ag (ASCII),ag -s \wAh,0.29722046852111816,233,
|
||||
linux_unicode_word,1,3,git grep,git grep -E -I -n \wAh,4.257144451141357,247,LC_ALL=en_US.UTF-8
|
||||
linux_unicode_word,1,3,git grep,git grep -E -I -n \wAh,3.852163076400757,247,LC_ALL=en_US.UTF-8
|
||||
linux_unicode_word,1,3,git grep,git grep -E -I -n \wAh,3.8293941020965576,247,LC_ALL=en_US.UTF-8
|
||||
linux_unicode_word,1,3,git grep (ASCII),git grep -E -I -n \wAh,1.647632122039795,233,LC_ALL=C
|
||||
linux_unicode_word,1,3,git grep (ASCII),git grep -E -I -n \wAh,1.6269629001617432,233,LC_ALL=C
|
||||
linux_unicode_word,1,3,git grep (ASCII),git grep -E -I -n \wAh,1.5847914218902588,233,LC_ALL=C
|
||||
linux_unicode_word,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \wAh ./,0.1802208423614502,247,
|
||||
linux_unicode_word,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \wAh ./,0.17564702033996582,247,
|
||||
linux_unicode_word,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \wAh ./,0.1746981143951416,247,
|
||||
linux_unicode_word,1,3,ugrep (ASCII),ugrep -r --ignore-files --no-hidden -I -n -U \wAh ./,0.1799161434173584,233,
|
||||
linux_unicode_word,1,3,ugrep (ASCII),ugrep -r --ignore-files --no-hidden -I -n -U \wAh ./,0.18733000755310059,233,
|
||||
linux_unicode_word,1,3,ugrep (ASCII),ugrep -r --ignore-files --no-hidden -I -n -U \wAh ./,0.18859529495239258,233,
|
||||
linux_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.26203155517578125,721,
|
||||
linux_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.2615540027618408,721,
|
||||
linux_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.2730247974395752,721,
|
||||
linux_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.19902300834655762,720,
|
||||
linux_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.20034146308898926,720,
|
||||
linux_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.20192813873291016,720,
|
||||
linux_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.8269081115722656,1134,
|
||||
linux_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.8393104076385498,1134,
|
||||
linux_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.8293666839599609,1134,
|
||||
linux_no_literal,1,3,git grep,git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},7.334395408630371,721,LC_ALL=en_US.UTF-8
|
||||
linux_no_literal,1,3,git grep,git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},7.338796854019165,721,LC_ALL=en_US.UTF-8
|
||||
linux_no_literal,1,3,git grep,git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},7.36545991897583,721,LC_ALL=en_US.UTF-8
|
||||
linux_no_literal,1,3,git grep (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},2.1588926315307617,720,LC_ALL=C
|
||||
linux_no_literal,1,3,git grep (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},2.132209062576294,720,LC_ALL=C
|
||||
linux_no_literal,1,3,git grep (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},2.1407439708709717,720,LC_ALL=C
|
||||
linux_no_literal,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} ./,3.410162925720215,723,
|
||||
linux_no_literal,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} ./,3.405057668685913,723,
|
||||
linux_no_literal,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} ./,3.3945884704589844,723,
|
||||
linux_no_literal,1,3,ugrep (ASCII),ugrep -r --ignore-files --no-hidden -I -n -U \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} ./,0.23865604400634766,722,
|
||||
linux_no_literal,1,3,ugrep (ASCII),ugrep -r --ignore-files --no-hidden -I -n -U \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} ./,0.23371148109436035,722,
|
||||
linux_no_literal,1,3,ugrep (ASCII),ugrep -r --ignore-files --no-hidden -I -n -U \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} ./,0.2343149185180664,722,
|
||||
linux_alternates,1,3,rg,rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.08691263198852539,140,
|
||||
linux_alternates,1,3,rg,rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.08707070350646973,140,
|
||||
linux_alternates,1,3,rg,rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.08713960647583008,140,
|
||||
linux_alternates,1,3,ag,ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.32947278022766113,140,
|
||||
linux_alternates,1,3,ag,ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.33203840255737305,140,
|
||||
linux_alternates,1,3,ag,ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.3292670249938965,140,
|
||||
linux_alternates,1,3,git grep,git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.4576725959777832,140,LC_ALL=C
|
||||
linux_alternates,1,3,git grep,git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.41936421394348145,140,LC_ALL=C
|
||||
linux_alternates,1,3,git grep,git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.3639688491821289,140,LC_ALL=C
|
||||
linux_alternates,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT ./,0.17806458473205566,140,
|
||||
linux_alternates,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT ./,0.18224716186523438,140,
|
||||
linux_alternates,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT ./,0.17795038223266602,140,
|
||||
linux_alternates_casei,1,3,rg,rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.12421393394470215,241,
|
||||
linux_alternates_casei,1,3,rg,rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.12235784530639648,241,
|
||||
linux_alternates_casei,1,3,rg,rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.12151455879211426,241,
|
||||
linux_alternates_casei,1,3,ag,ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.529585599899292,241,
|
||||
linux_alternates_casei,1,3,ag,ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.5305526256561279,241,
|
||||
linux_alternates_casei,1,3,ag,ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.5311264991760254,241,
|
||||
linux_alternates_casei,1,3,git grep,git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.7589735984802246,241,LC_ALL=C
|
||||
linux_alternates_casei,1,3,git grep,git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.7852108478546143,241,LC_ALL=C
|
||||
linux_alternates_casei,1,3,git grep,git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.8308050632476807,241,LC_ALL=C
|
||||
linux_alternates_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT ./,0.17955923080444336,241,
|
||||
linux_alternates_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT ./,0.1745290756225586,241,
|
||||
linux_alternates_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT ./,0.1773686408996582,241,
|
||||
subtitles_en_literal,1,3,rg,rg Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.1213979721069336,830,
|
||||
subtitles_en_literal,1,3,rg,rg Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.1213991641998291,830,
|
||||
subtitles_en_literal,1,3,rg,rg Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.12620782852172852,830,
|
||||
subtitles_en_literal,1,3,rg (no mmap),rg --no-mmap Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18207263946533203,830,
|
||||
subtitles_en_literal,1,3,rg (no mmap),rg --no-mmap Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.17281484603881836,830,
|
||||
subtitles_en_literal,1,3,rg (no mmap),rg --no-mmap Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.17368507385253906,830,
|
||||
subtitles_en_literal,1,3,grep,grep Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.560560941696167,830,LC_ALL=C
|
||||
subtitles_en_literal,1,3,grep,grep Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.563499927520752,830,LC_ALL=C
|
||||
subtitles_en_literal,1,3,grep,grep Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.5916609764099121,830,LC_ALL=C
|
||||
subtitles_en_literal,1,3,rg (lines),rg -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.19600844383239746,830,
|
||||
subtitles_en_literal,1,3,rg (lines),rg -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18436980247497559,830,
|
||||
subtitles_en_literal,1,3,rg (lines),rg -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18594050407409668,830,
|
||||
subtitles_en_literal,1,3,ag (lines),ag -s Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.871025562286377,830,
|
||||
subtitles_en_literal,1,3,ag (lines),ag -s Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.8636960983276367,830,
|
||||
subtitles_en_literal,1,3,ag (lines),ag -s Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.8680994510650635,830,
|
||||
subtitles_en_literal,1,3,grep (lines),grep -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.9978001117706299,830,LC_ALL=C
|
||||
subtitles_en_literal,1,3,grep (lines),grep -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.9385361671447754,830,LC_ALL=C
|
||||
subtitles_en_literal,1,3,grep (lines),grep -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.0036489963531494,830,LC_ALL=C
|
||||
subtitles_en_literal,1,3,ugrep (lines),ugrep -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18918490409851074,830,
|
||||
subtitles_en_literal,1,3,ugrep (lines),ugrep -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.1769108772277832,830,
|
||||
subtitles_en_literal,1,3,ugrep (lines),ugrep -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18808293342590332,830,
|
||||
subtitles_en_literal_casei,1,3,rg,rg -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.21876287460327148,871,
|
||||
subtitles_en_literal_casei,1,3,rg,rg -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.2044692039489746,871,
|
||||
subtitles_en_literal_casei,1,3,rg,rg -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.2184743881225586,871,
|
||||
subtitles_en_literal_casei,1,3,grep,grep -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,2.224027156829834,871,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_casei,1,3,grep,grep -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,2.223188877105713,871,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_casei,1,3,grep,grep -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,2.223966598510742,871,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_casei,1,3,grep (ASCII),grep -E -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.671149492263794,871,LC_ALL=C
|
||||
subtitles_en_literal_casei,1,3,grep (ASCII),grep -E -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.6705749034881592,871,LC_ALL=C
|
||||
subtitles_en_literal_casei,1,3,grep (ASCII),grep -E -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.6700258255004883,871,LC_ALL=C
|
||||
subtitles_en_literal_casei,1,3,rg (lines),rg -n -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.2624058723449707,871,
|
||||
subtitles_en_literal_casei,1,3,rg (lines),rg -n -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.25513339042663574,871,
|
||||
subtitles_en_literal_casei,1,3,rg (lines),rg -n -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.26088857650756836,871,
|
||||
subtitles_en_literal_casei,1,3,ag (lines) (ASCII),ag -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.9144322872161865,871,
|
||||
subtitles_en_literal_casei,1,3,ag (lines) (ASCII),ag -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.866628885269165,871,
|
||||
subtitles_en_literal_casei,1,3,ag (lines) (ASCII),ag -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.9098389148712158,871,
|
||||
subtitles_en_literal_casei,1,3,ugrep (lines),ugrep -n -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.7860472202301025,871,
|
||||
subtitles_en_literal_casei,1,3,ugrep (lines),ugrep -n -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.7858343124389648,871,
|
||||
subtitles_en_literal_casei,1,3,ugrep (lines),ugrep -n -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.782252311706543,871,
|
||||
subtitles_en_literal_word,1,3,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /dev/shm/benchsuite/subtitles/en.sample.txt,0.18424677848815918,830,
|
||||
subtitles_en_literal_word,1,3,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /dev/shm/benchsuite/subtitles/en.sample.txt,0.19610810279846191,830,
|
||||
subtitles_en_literal_word,1,3,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /dev/shm/benchsuite/subtitles/en.sample.txt,0.18711471557617188,830,
|
||||
subtitles_en_literal_word,1,3,ag (ASCII),ag -sw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.8301315307617188,830,
|
||||
subtitles_en_literal_word,1,3,ag (ASCII),ag -sw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.8689801692962646,830,
|
||||
subtitles_en_literal_word,1,3,ag (ASCII),ag -sw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.8279321193695068,830,
|
||||
subtitles_en_literal_word,1,3,grep (ASCII),grep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.0036842823028564,830,LC_ALL=C
|
||||
subtitles_en_literal_word,1,3,grep (ASCII),grep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.002833604812622,830,LC_ALL=C
|
||||
subtitles_en_literal_word,1,3,grep (ASCII),grep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.9236147403717041,830,LC_ALL=C
|
||||
subtitles_en_literal_word,1,3,ugrep (ASCII),ugrep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.17717313766479492,830,
|
||||
subtitles_en_literal_word,1,3,ugrep (ASCII),ugrep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18994617462158203,830,
|
||||
subtitles_en_literal_word,1,3,ugrep (ASCII),ugrep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.17972850799560547,830,
|
||||
subtitles_en_literal_word,1,3,rg,rg -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18804550170898438,830,
|
||||
subtitles_en_literal_word,1,3,rg,rg -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18867778778076172,830,
|
||||
subtitles_en_literal_word,1,3,rg,rg -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.19913530349731445,830,
|
||||
subtitles_en_literal_word,1,3,grep,grep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.0044364929199219,830,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_word,1,3,grep,grep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.0040032863616943,830,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_word,1,3,grep,grep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.9627983570098877,830,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_alternate,1,3,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.24848055839538574,1094,
|
||||
subtitles_en_alternate,1,3,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.24738383293151855,1094,
|
||||
subtitles_en_alternate,1,3,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.24789118766784668,1094,
|
||||
subtitles_en_alternate,1,3,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,2.668708562850952,1094,
|
||||
subtitles_en_alternate,1,3,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,2.57511305809021,1094,
|
||||
subtitles_en_alternate,1,3,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,2.6714110374450684,1094,
|
||||
subtitles_en_alternate,1,3,grep (lines),grep -E -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,2.0586187839508057,1094,LC_ALL=C
|
||||
subtitles_en_alternate,1,3,grep (lines),grep -E -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,2.0227150917053223,1094,LC_ALL=C
|
||||
subtitles_en_alternate,1,3,grep (lines),grep -E -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,2.075378179550171,1094,LC_ALL=C
|
||||
subtitles_en_alternate,1,3,ugrep (lines),ugrep -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.7863781452178955,1094,
|
||||
subtitles_en_alternate,1,3,ugrep (lines),ugrep -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.7874250411987305,1094,
|
||||
subtitles_en_alternate,1,3,ugrep (lines),ugrep -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.7867889404296875,1094,
|
||||
subtitles_en_alternate,1,3,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.18195557594299316,1094,
|
||||
subtitles_en_alternate,1,3,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.18239641189575195,1094,
|
||||
subtitles_en_alternate,1,3,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.1625690460205078,1094,
|
||||
subtitles_en_alternate,1,3,grep,grep -E Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,1.6601614952087402,1094,LC_ALL=C
|
||||
subtitles_en_alternate,1,3,grep,grep -E Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,1.6617567539215088,1094,LC_ALL=C
|
||||
subtitles_en_alternate,1,3,grep,grep -E Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,1.6584677696228027,1094,LC_ALL=C
|
||||
subtitles_en_alternate_casei,1,3,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,4.0028722286224365,1136,
|
||||
subtitles_en_alternate_casei,1,3,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,3.991217851638794,1136,
|
||||
subtitles_en_alternate_casei,1,3,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,4.00272274017334,1136,
|
||||
subtitles_en_alternate_casei,1,3,grep (ASCII),grep -E -ni Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,3.549154758453369,1136,LC_ALL=C
|
||||
subtitles_en_alternate_casei,1,3,grep (ASCII),grep -E -ni Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,3.5468921661376953,1136,LC_ALL=C
|
||||
subtitles_en_alternate_casei,1,3,grep (ASCII),grep -E -ni Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,3.5873491764068604,1136,LC_ALL=C
|
||||
subtitles_en_alternate_casei,1,3,ugrep (ASCII),ugrep -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.7872169017791748,1136,
|
||||
subtitles_en_alternate_casei,1,3,ugrep (ASCII),ugrep -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.784674882888794,1136,
|
||||
subtitles_en_alternate_casei,1,3,ugrep (ASCII),ugrep -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.7882401943206787,1136,
|
||||
subtitles_en_alternate_casei,1,3,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.4785435199737549,1136,
|
||||
subtitles_en_alternate_casei,1,3,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.4940922260284424,1136,
|
||||
subtitles_en_alternate_casei,1,3,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.4774627685546875,1136,
|
||||
subtitles_en_alternate_casei,1,3,grep,grep -E -ni Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,3.5677175521850586,1136,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_alternate_casei,1,3,grep,grep -E -ni Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,3.603273391723633,1136,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_alternate_casei,1,3,grep,grep -E -ni Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,3.5834741592407227,1136,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,rg,rg -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.20238041877746582,278,
|
||||
subtitles_ru_surrounding_words,1,3,rg,rg -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.2031264305114746,278,
|
||||
subtitles_ru_surrounding_words,1,3,rg,rg -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.20475172996520996,278,
|
||||
subtitles_ru_surrounding_words,1,3,grep,grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0288453102111816,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,grep,grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.044802188873291,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,grep,grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0432109832763672,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,ugrep,ugrep -an \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,43.00765633583069,278,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep,ugrep -an \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,42.832849740982056,278,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep,ugrep -an \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,42.915205240249634,278,
|
||||
subtitles_ru_surrounding_words,1,3,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.083683967590332,,
|
||||
subtitles_ru_surrounding_words,1,3,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0841526985168457,,
|
||||
subtitles_ru_surrounding_words,1,3,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0850934982299805,,
|
||||
subtitles_ru_surrounding_words,1,3,grep (ASCII),grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0116353034973145,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,3,grep (ASCII),grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.9868073463439941,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,3,grep (ASCII),grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0224814414978027,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,3,ugrep (ASCII),ugrep -a -n -U \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.8892502784729004,,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep (ASCII),ugrep -a -n -U \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.8910088539123535,,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep (ASCII),ugrep -a -n -U \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.8897674083709717,,
|
||||
subtitles_en_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,2.11850643157959,22,
|
||||
subtitles_en_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,2.1359670162200928,22,
|
||||
subtitles_en_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,2.103114128112793,22,
|
||||
subtitles_en_no_literal,1,3,ugrep,ugrep -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,13.050881385803223,22,
|
||||
subtitles_en_no_literal,1,3,ugrep,ugrep -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,13.050772190093994,22,
|
||||
subtitles_en_no_literal,1,3,ugrep,ugrep -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,13.05719804763794,22,
|
||||
subtitles_en_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,1.9961926937103271,22,
|
||||
subtitles_en_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,2.019721508026123,22,
|
||||
subtitles_en_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,1.9965126514434814,22,
|
||||
subtitles_en_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,6.849602222442627,302,
|
||||
subtitles_en_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,6.813834190368652,302,
|
||||
subtitles_en_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,6.8263633251190186,302,
|
||||
subtitles_en_no_literal,1,3,grep (ASCII),grep -E -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,4.42924165725708,22,LC_ALL=C
|
||||
subtitles_en_no_literal,1,3,grep (ASCII),grep -E -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,4.378557205200195,22,LC_ALL=C
|
||||
subtitles_en_no_literal,1,3,grep (ASCII),grep -E -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,4.376646518707275,22,LC_ALL=C
|
||||
subtitles_en_no_literal,1,3,ugrep (ASCII),ugrep -n -U \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,3.5110037326812744,22,
|
||||
subtitles_en_no_literal,1,3,ugrep (ASCII),ugrep -n -U \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,3.5137360095977783,22,
|
||||
subtitles_en_no_literal,1,3,ugrep (ASCII),ugrep -n -U \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,3.5051844120025635,22,
|
||||
subtitles_ru_literal,1,3,rg,rg Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.13207745552062988,583,
|
||||
subtitles_ru_literal,1,3,rg,rg Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.13084721565246582,583,
|
||||
subtitles_ru_literal,1,3,rg,rg Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.13469862937927246,583,
|
||||
subtitles_ru_literal,1,3,rg (no mmap),rg --no-mmap Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.18022370338439941,583,
|
||||
subtitles_ru_literal,1,3,rg (no mmap),rg --no-mmap Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.1801767349243164,583,
|
||||
subtitles_ru_literal,1,3,rg (no mmap),rg --no-mmap Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.17995166778564453,583,
|
||||
subtitles_ru_literal,1,3,grep,grep Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.5151040554046631,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,3,grep,grep Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.5154542922973633,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,3,grep,grep Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.49927639961242676,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,3,rg (lines),rg -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.19464492797851562,583,
|
||||
subtitles_ru_literal,1,3,rg (lines),rg -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.18920588493347168,583,
|
||||
subtitles_ru_literal,1,3,rg (lines),rg -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.19465351104736328,583,
|
||||
subtitles_ru_literal,1,3,ag (lines),ag -s Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,1.9595966339111328,583,
|
||||
subtitles_ru_literal,1,3,ag (lines),ag -s Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,2.0014493465423584,583,
|
||||
subtitles_ru_literal,1,3,ag (lines),ag -s Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,1.9567768573760986,583,
|
||||
subtitles_ru_literal,1,3,grep (lines),grep -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.8119180202484131,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,3,grep (lines),grep -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.8111097812652588,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,3,grep (lines),grep -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.8006868362426758,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,3,ugrep (lines),ugrep -a -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.70003342628479,583,
|
||||
subtitles_ru_literal,1,3,ugrep (lines),ugrep -a -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.650275468826294,583,
|
||||
subtitles_ru_literal,1,3,ugrep (lines),ugrep -a -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.689772367477417,583,
|
||||
subtitles_ru_literal_casei,1,3,rg,rg -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.267578125,604,
|
||||
subtitles_ru_literal_casei,1,3,rg,rg -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.2665982246398926,604,
|
||||
subtitles_ru_literal_casei,1,3,rg,rg -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.26861572265625,604,
|
||||
subtitles_ru_literal_casei,1,3,grep,grep -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,4.764627456665039,604,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_casei,1,3,grep,grep -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,4.767015695571899,604,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_casei,1,3,grep,grep -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,4.7688889503479,604,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_casei,1,3,grep (ASCII),grep -E -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.5046737194061279,583,LC_ALL=C
|
||||
subtitles_ru_literal_casei,1,3,grep (ASCII),grep -E -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.5139875411987305,583,LC_ALL=C
|
||||
subtitles_ru_literal_casei,1,3,grep (ASCII),grep -E -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.4993159770965576,583,LC_ALL=C
|
||||
subtitles_ru_literal_casei,1,3,rg (lines),rg -n -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.33438658714294434,604,
|
||||
subtitles_ru_literal_casei,1,3,rg (lines),rg -n -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.3398289680480957,604,
|
||||
subtitles_ru_literal_casei,1,3,rg (lines),rg -n -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.3298227787017822,604,
|
||||
subtitles_ru_literal_casei,1,3,ag (lines) (ASCII),ag -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.4468214511871338,,
|
||||
subtitles_ru_literal_casei,1,3,ag (lines) (ASCII),ag -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.44559574127197266,,
|
||||
subtitles_ru_literal_casei,1,3,ag (lines) (ASCII),ag -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.47882938385009766,,
|
||||
subtitles_ru_literal_casei,1,3,ugrep (lines) (ASCII),ugrep -a -n -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.7039575576782227,583,
|
||||
subtitles_ru_literal_casei,1,3,ugrep (lines) (ASCII),ugrep -a -n -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.6490752696990967,583,
|
||||
subtitles_ru_literal_casei,1,3,ugrep (lines) (ASCII),ugrep -a -n -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.8081104755401611,583,
|
||||
subtitles_ru_literal_word,1,3,rg (ASCII),rg -n (?-u:^|\W)Шерлок Холмс(?-u:$|\W) /dev/shm/benchsuite/subtitles/ru.txt,0.20162224769592285,583,
|
||||
subtitles_ru_literal_word,1,3,rg (ASCII),rg -n (?-u:^|\W)Шерлок Холмс(?-u:$|\W) /dev/shm/benchsuite/subtitles/ru.txt,0.18215250968933105,583,
|
||||
subtitles_ru_literal_word,1,3,rg (ASCII),rg -n (?-u:^|\W)Шерлок Холмс(?-u:$|\W) /dev/shm/benchsuite/subtitles/ru.txt,0.20087671279907227,583,
|
||||
subtitles_ru_literal_word,1,3,ag (ASCII),ag -sw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.48624587059020996,,
|
||||
subtitles_ru_literal_word,1,3,ag (ASCII),ag -sw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.5212516784667969,,
|
||||
subtitles_ru_literal_word,1,3,ag (ASCII),ag -sw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.520557165145874,,
|
||||
subtitles_ru_literal_word,1,3,grep (ASCII),grep -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.8108196258544922,583,LC_ALL=C
|
||||
subtitles_ru_literal_word,1,3,grep (ASCII),grep -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.8121066093444824,583,LC_ALL=C
|
||||
subtitles_ru_literal_word,1,3,grep (ASCII),grep -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.7784581184387207,583,LC_ALL=C
|
||||
subtitles_ru_literal_word,1,3,ugrep (ASCII),ugrep -anw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.7469344139099121,583,
|
||||
subtitles_ru_literal_word,1,3,ugrep (ASCII),ugrep -anw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.6838233470916748,583,
|
||||
subtitles_ru_literal_word,1,3,ugrep (ASCII),ugrep -anw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.6921679973602295,583,
|
||||
subtitles_ru_literal_word,1,3,rg,rg -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.19918251037597656,579,
|
||||
subtitles_ru_literal_word,1,3,rg,rg -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.2046656608581543,579,
|
||||
subtitles_ru_literal_word,1,3,rg,rg -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.1984848976135254,579,
|
||||
subtitles_ru_literal_word,1,3,grep,grep -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.794173002243042,579,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_word,1,3,grep,grep -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.7715346813201904,579,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_word,1,3,grep,grep -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.8116705417633057,579,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_alternate,1,3,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,0.6730976104736328,691,
|
||||
subtitles_ru_alternate,1,3,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,0.7020411491394043,691,
|
||||
subtitles_ru_alternate,1,3,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,0.6693949699401855,691,
|
||||
subtitles_ru_alternate,1,3,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,2.7100515365600586,691,
|
||||
subtitles_ru_alternate,1,3,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,2.7458419799804688,691,
|
||||
subtitles_ru_alternate,1,3,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,2.7115116119384766,691,
|
||||
subtitles_ru_alternate,1,3,grep (lines),grep -E -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.703738451004028,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,3,grep (lines),grep -E -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.715883731842041,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,3,grep (lines),grep -E -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.712724924087524,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,3,ugrep (lines),ugrep -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,8.276995420455933,691,
|
||||
subtitles_ru_alternate,1,3,ugrep (lines),ugrep -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,8.304608345031738,691,
|
||||
subtitles_ru_alternate,1,3,ugrep (lines),ugrep -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,8.322760820388794,691,
|
||||
subtitles_ru_alternate,1,3,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,0.6119842529296875,691,
|
||||
subtitles_ru_alternate,1,3,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,0.6368775367736816,691,
|
||||
subtitles_ru_alternate,1,3,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,0.6258070468902588,691,
|
||||
subtitles_ru_alternate,1,3,grep,grep -E Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.4300291538238525,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,3,grep,grep -E Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.418199300765991,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,3,grep,grep -E Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.425868511199951,691,LC_ALL=C
|
||||
subtitles_ru_alternate_casei,1,3,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,2.7216460704803467,691,
|
||||
subtitles_ru_alternate_casei,1,3,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,2.7108607292175293,691,
|
||||
subtitles_ru_alternate_casei,1,3,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,2.747138500213623,691,
|
||||
subtitles_ru_alternate_casei,1,3,grep (ASCII),grep -E -ni Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.711230039596558,691,LC_ALL=C
|
||||
subtitles_ru_alternate_casei,1,3,grep (ASCII),grep -E -ni Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.709407329559326,691,LC_ALL=C
|
||||
subtitles_ru_alternate_casei,1,3,grep (ASCII),grep -E -ni Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.714034557342529,691,LC_ALL=C
|
||||
subtitles_ru_alternate_casei,1,3,ugrep (ASCII),ugrep -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,8.305904626846313,691,
|
||||
subtitles_ru_alternate_casei,1,3,ugrep (ASCII),ugrep -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,8.307406187057495,691,
|
||||
subtitles_ru_alternate_casei,1,3,ugrep (ASCII),ugrep -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,8.288233995437622,691,
|
||||
subtitles_ru_alternate_casei,1,3,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,3.673624277114868,735,
|
||||
subtitles_ru_alternate_casei,1,3,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,3.6759188175201416,735,
|
||||
subtitles_ru_alternate_casei,1,3,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,3.66877818107605,735,
|
||||
subtitles_ru_alternate_casei,1,3,grep,grep -E -ni Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.366282224655151,735,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_alternate_casei,1,3,grep,grep -E -ni Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.370524883270264,735,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_alternate_casei,1,3,grep,grep -E -ni Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.342163324356079,735,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,rg,rg -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.20331382751464844,278,
|
||||
subtitles_ru_surrounding_words,1,3,rg,rg -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.2034592628479004,278,
|
||||
subtitles_ru_surrounding_words,1,3,rg,rg -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.20407724380493164,278,
|
||||
subtitles_ru_surrounding_words,1,3,grep,grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0436389446258545,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,grep,grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0388383865356445,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,grep,grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0446207523345947,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,ugrep,ugrep -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.29245424270629883,1,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep,ugrep -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.29168128967285156,1,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep,ugrep -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.29593825340270996,1,
|
||||
subtitles_ru_surrounding_words,1,3,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.085604190826416,,
|
||||
subtitles_ru_surrounding_words,1,3,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.083526372909546,,
|
||||
subtitles_ru_surrounding_words,1,3,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.1223819255828857,,
|
||||
subtitles_ru_surrounding_words,1,3,grep (ASCII),grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.9905192852020264,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,3,grep (ASCII),grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0222513675689697,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,3,grep (ASCII),grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0216262340545654,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,3,ugrep (ASCII),ugrep -a -n -U \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.8875806331634521,,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep (ASCII),ugrep -a -n -U \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.8861405849456787,,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep (ASCII),ugrep -a -n -U \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.8898241519927979,,
|
||||
subtitles_ru_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,2.237398147583008,41,
|
||||
subtitles_ru_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,2.253706693649292,41,
|
||||
subtitles_ru_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,2.2161178588867188,41,
|
||||
subtitles_ru_no_literal,1,3,ugrep,ugrep -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,28.85959553718567,41,
|
||||
subtitles_ru_no_literal,1,3,ugrep,ugrep -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,28.666419982910156,41,
|
||||
subtitles_ru_no_literal,1,3,ugrep,ugrep -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,28.90555214881897,41,
|
||||
subtitles_ru_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,2.051813840866089,,
|
||||
subtitles_ru_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,2.026675224304199,,
|
||||
subtitles_ru_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,2.027498245239258,,
|
||||
subtitles_ru_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,1.0998010635375977,,
|
||||
subtitles_ru_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,1.0900018215179443,,
|
||||
subtitles_ru_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,1.0901548862457275,,
|
||||
subtitles_ru_no_literal,1,3,grep (ASCII),grep -E -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,1.0691263675689697,,LC_ALL=C
|
||||
subtitles_ru_no_literal,1,3,grep (ASCII),grep -E -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,1.0875153541564941,,LC_ALL=C
|
||||
subtitles_ru_no_literal,1,3,grep (ASCII),grep -E -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,1.0997354984283447,,LC_ALL=C
|
||||
subtitles_ru_no_literal,1,3,ugrep (ASCII),ugrep -anU \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,0.8329172134399414,,
|
||||
subtitles_ru_no_literal,1,3,ugrep (ASCII),ugrep -anU \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,0.8292679786682129,,
|
||||
subtitles_ru_no_literal,1,3,ugrep (ASCII),ugrep -anU \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,0.8326950073242188,,
|
|
208
benchsuite/runs/2022-12-16-archlinux-duff/summary
Normal file
208
benchsuite/runs/2022-12-16-archlinux-duff/summary
Normal file
@@ -0,0 +1,208 @@
|
||||
linux_literal_default (pattern: PM_RESUME)
|
||||
------------------------------------------
|
||||
rg* 0.084 +/- 0.002 (lines: 39)*
|
||||
ag 0.295 +/- 0.001 (lines: 39)
|
||||
git grep 0.225 +/- 0.007 (lines: 39)
|
||||
ugrep 0.105 +/- 0.002 (lines: 39)
|
||||
grep 0.996 +/- 0.003 (lines: 39)
|
||||
|
||||
linux_literal (pattern: PM_RESUME)
|
||||
----------------------------------
|
||||
rg* 0.085 +/- 0.001 (lines: 39)*
|
||||
rg (mmap) 0.322 +/- 0.002 (lines: 39)
|
||||
ag (mmap) 0.290 +/- 0.002 (lines: 39)
|
||||
git grep 0.211 +/- 0.009 (lines: 39)
|
||||
ugrep 0.189 +/- 0.005 (lines: 39)
|
||||
|
||||
linux_literal_casei (pattern: PM_RESUME)
|
||||
----------------------------------------
|
||||
rg* 0.088 +/- 0.001 (lines: 536)*
|
||||
rg (mmap) 0.314 +/- 0.007 (lines: 536)
|
||||
ag (mmap) 0.299 +/- 0.001 (lines: 536)
|
||||
git grep 0.214 +/- 0.007 (lines: 536)
|
||||
ugrep 0.174 +/- 0.001 (lines: 536)
|
||||
|
||||
linux_re_literal_suffix (pattern: [A-Z]+_RESUME)
|
||||
------------------------------------------------
|
||||
rg* 0.085 +/- 0.000 (lines: 2160)*
|
||||
ag 0.369 +/- 0.009 (lines: 2160)
|
||||
git grep 0.915 +/- 0.048 (lines: 2160)
|
||||
ugrep 0.433 +/- 0.025 (lines: 2160)
|
||||
|
||||
linux_word (pattern: PM_RESUME)
|
||||
-------------------------------
|
||||
rg* 0.085 +/- 0.001 (lines: 9)*
|
||||
ag 0.287 +/- 0.001 (lines: 9)
|
||||
git grep 0.206 +/- 0.002 (lines: 9)
|
||||
ugrep 0.189 +/- 0.002 (lines: 9)
|
||||
|
||||
linux_unicode_greek (pattern: \p{Greek})
|
||||
----------------------------------------
|
||||
rg 0.201 +/- 0.005 (lines: 105)
|
||||
ugrep* 0.181 +/- 0.005 (lines: 105)*
|
||||
|
||||
linux_unicode_greek_casei (pattern: \p{Greek})
|
||||
----------------------------------------------
|
||||
rg 0.198 +/- 0.000 (lines: 245)
|
||||
ugrep* 0.179 +/- 0.003 (lines: 105)*
|
||||
|
||||
linux_unicode_word (pattern: \wAh)
|
||||
----------------------------------
|
||||
rg 0.085 +/- 0.000 (lines: 247)
|
||||
rg (ASCII)* 0.085 +/- 0.000 (lines: 233)*
|
||||
ag (ASCII) 0.301 +/- 0.005 (lines: 233)
|
||||
git grep 3.980 +/- 0.241 (lines: 247)
|
||||
git grep (ASCII) 1.620 +/- 0.032 (lines: 233)
|
||||
ugrep 0.177 +/- 0.003 (lines: 247)
|
||||
ugrep (ASCII) 0.185 +/- 0.005 (lines: 233)
|
||||
|
||||
linux_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5})
|
||||
-----------------------------------------------------------------
|
||||
rg 0.266 +/- 0.006 (lines: 721)
|
||||
rg (ASCII)* 0.200 +/- 0.001 (lines: 720)*
|
||||
ag (ASCII) 0.832 +/- 0.007 (lines: 1134)
|
||||
git grep 7.346 +/- 0.017 (lines: 721)
|
||||
git grep (ASCII) 2.144 +/- 0.014 (lines: 720)
|
||||
ugrep 3.403 +/- 0.008 (lines: 723)
|
||||
ugrep (ASCII) 0.236 +/- 0.003 (lines: 722)
|
||||
|
||||
linux_alternates (pattern: ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT)
|
||||
-------------------------------------------------------------------------
|
||||
rg* 0.087 +/- 0.000 (lines: 140)*
|
||||
ag 0.330 +/- 0.002 (lines: 140)
|
||||
git grep 0.414 +/- 0.047 (lines: 140)
|
||||
ugrep 0.179 +/- 0.002 (lines: 140)
|
||||
|
||||
linux_alternates_casei (pattern: ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT)
|
||||
-------------------------------------------------------------------------------
|
||||
rg* 0.123 +/- 0.001 (lines: 241)*
|
||||
ag 0.530 +/- 0.001 (lines: 241)
|
||||
git grep 0.792 +/- 0.036 (lines: 241)
|
||||
ugrep 0.177 +/- 0.003 (lines: 241)
|
||||
|
||||
subtitles_en_literal (pattern: Sherlock Holmes)
|
||||
-----------------------------------------------
|
||||
rg* 0.123 +/- 0.003 (lines: 830)*
|
||||
rg (no mmap) 0.176 +/- 0.005 (lines: 830)
|
||||
grep 0.572 +/- 0.017 (lines: 830)
|
||||
rg (lines) 0.189 +/- 0.006 (lines: 830)
|
||||
ag (lines) 1.868 +/- 0.004 (lines: 830)
|
||||
grep (lines) 0.980 +/- 0.036 (lines: 830)
|
||||
ugrep (lines) 0.185 +/- 0.007 (lines: 830)
|
||||
|
||||
subtitles_en_literal_casei (pattern: Sherlock Holmes)
|
||||
-----------------------------------------------------
|
||||
rg* 0.214 +/- 0.008 (lines: 871)*
|
||||
grep 2.224 +/- 0.000 (lines: 871)
|
||||
grep (ASCII) 0.671 +/- 0.001 (lines: 871)
|
||||
rg (lines) 0.259 +/- 0.004 (lines: 871)
|
||||
ag (lines) (ASCII) 1.897 +/- 0.026 (lines: 871)
|
||||
ugrep (lines) 0.785 +/- 0.002 (lines: 871)
|
||||
|
||||
subtitles_en_literal_word (pattern: Sherlock Holmes)
|
||||
----------------------------------------------------
|
||||
rg (ASCII) 0.189 +/- 0.006 (lines: 830)
|
||||
ag (ASCII) 1.842 +/- 0.023 (lines: 830)
|
||||
grep (ASCII) 0.977 +/- 0.046 (lines: 830)
|
||||
ugrep (ASCII)* 0.182 +/- 0.007 (lines: 830)*
|
||||
rg 0.192 +/- 0.006 (lines: 830)
|
||||
grep 0.990 +/- 0.024 (lines: 830)
|
||||
|
||||
subtitles_en_alternate (pattern: Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty)
|
||||
---------------------------------------------------------------------------------------------------------------
|
||||
rg (lines) 0.248 +/- 0.001 (lines: 1094)
|
||||
ag (lines) 2.638 +/- 0.055 (lines: 1094)
|
||||
grep (lines) 2.052 +/- 0.027 (lines: 1094)
|
||||
ugrep (lines) 0.787 +/- 0.001 (lines: 1094)
|
||||
rg* 0.176 +/- 0.011 (lines: 1094)*
|
||||
grep 1.660 +/- 0.002 (lines: 1094)
|
||||
|
||||
subtitles_en_alternate_casei (pattern: Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty)
|
||||
---------------------------------------------------------------------------------------------------------------------
|
||||
ag (ASCII) 3.999 +/- 0.007 (lines: 1136)
|
||||
grep (ASCII) 3.561 +/- 0.023 (lines: 1136)
|
||||
ugrep (ASCII) 0.787 +/- 0.002 (lines: 1136)
|
||||
rg* 0.483 +/- 0.009 (lines: 1136)*
|
||||
grep 3.585 +/- 0.018 (lines: 1136)
|
||||
|
||||
subtitles_en_surrounding_words (pattern: \w+\s+Holmes\s+\w+)
|
||||
------------------------------------------------------------
|
||||
rg 0.200 +/- 0.001 (lines: 483)
|
||||
grep 1.303 +/- 0.040 (lines: 483)
|
||||
ugrep 43.220 +/- 0.047 (lines: 483)
|
||||
rg (ASCII)* 0.197 +/- 0.000 (lines: 483)*
|
||||
ag (ASCII) 5.223 +/- 0.056 (lines: 489)
|
||||
grep (ASCII) 1.316 +/- 0.043 (lines: 483)
|
||||
ugrep (ASCII) 17.647 +/- 0.219 (lines: 483)
|
||||
|
||||
subtitles_en_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5})
|
||||
----------------------------------------------------------------------------------------
|
||||
rg 2.119 +/- 0.016 (lines: 22)
|
||||
ugrep 13.053 +/- 0.004 (lines: 22)
|
||||
rg (ASCII)* 2.004 +/- 0.013 (lines: 22)*
|
||||
ag (ASCII) 6.830 +/- 0.018 (lines: 302)
|
||||
grep (ASCII) 4.395 +/- 0.030 (lines: 22)
|
||||
ugrep (ASCII) 3.510 +/- 0.004 (lines: 22)
|
||||
|
||||
subtitles_ru_literal (pattern: Шерлок Холмс)
|
||||
--------------------------------------------
|
||||
rg* 0.133 +/- 0.002 (lines: 583)*
|
||||
rg (no mmap) 0.180 +/- 0.000 (lines: 583)
|
||||
grep 0.510 +/- 0.009 (lines: 583)
|
||||
rg (lines) 0.193 +/- 0.003 (lines: 583)
|
||||
ag (lines) 1.973 +/- 0.025 (lines: 583)
|
||||
grep (lines) 0.808 +/- 0.006 (lines: 583)
|
||||
ugrep (lines) 0.680 +/- 0.026 (lines: 583)
|
||||
|
||||
subtitles_ru_literal_casei (pattern: Шерлок Холмс)
|
||||
--------------------------------------------------
|
||||
rg* 0.268 +/- 0.001 (lines: 604)*
|
||||
grep 4.767 +/- 0.002 (lines: 604)
|
||||
grep (ASCII) 0.506 +/- 0.007 (lines: 583)
|
||||
rg (lines) 0.335 +/- 0.005 (lines: 604)
|
||||
ag (lines) (ASCII) 0.457 +/- 0.019 (lines: 0)
|
||||
ugrep (lines) (ASCII) 0.720 +/- 0.081 (lines: 583)
|
||||
|
||||
subtitles_ru_literal_word (pattern: Шерлок Холмс)
|
||||
-------------------------------------------------
|
||||
rg (ASCII)* 0.195 +/- 0.011 (lines: 583)*
|
||||
ag (ASCII) 0.509 +/- 0.020 (lines: 0)
|
||||
grep (ASCII) 0.800 +/- 0.019 (lines: 583)
|
||||
ugrep (ASCII) 0.708 +/- 0.034 (lines: 583)
|
||||
rg 0.201 +/- 0.003 (lines: 579)
|
||||
grep 0.792 +/- 0.020 (lines: 579)
|
||||
|
||||
subtitles_ru_alternate (pattern: Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти)
|
||||
-----------------------------------------------------------------------------------------------------------
|
||||
rg (lines) 0.682 +/- 0.018 (lines: 691)
|
||||
ag (lines) 2.722 +/- 0.020 (lines: 691)
|
||||
grep (lines) 5.711 +/- 0.006 (lines: 691)
|
||||
ugrep (lines) 8.301 +/- 0.023 (lines: 691)
|
||||
rg* 0.625 +/- 0.012 (lines: 691)*
|
||||
grep 5.425 +/- 0.006 (lines: 691)
|
||||
|
||||
subtitles_ru_alternate_casei (pattern: Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти)
|
||||
-----------------------------------------------------------------------------------------------------------------
|
||||
ag (ASCII)* 2.727 +/- 0.019 (lines: 691)*
|
||||
grep (ASCII) 5.712 +/- 0.002 (lines: 691)
|
||||
ugrep (ASCII) 8.301 +/- 0.011 (lines: 691)
|
||||
rg 3.673 +/- 0.004 (lines: 735)
|
||||
grep 5.360 +/- 0.015 (lines: 735)
|
||||
|
||||
subtitles_ru_surrounding_words (pattern: \w+\s+Холмс\s+\w+)
|
||||
-----------------------------------------------------------
|
||||
rg* 0.203 +/- 0.001 (lines: 278)*
|
||||
grep 1.039 +/- 0.009 (lines: 278)
|
||||
ugrep 42.919 +/- 0.087 (lines: 278)
|
||||
ag (ASCII) 1.084 +/- 0.001 (lines: 0)
|
||||
grep (ASCII) 1.007 +/- 0.018 (lines: 0)
|
||||
ugrep (ASCII) 0.890 +/- 0.001 (lines: 0)
|
||||
|
||||
subtitles_ru_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5})
|
||||
----------------------------------------------------------------------------------------
|
||||
rg 2.236 +/- 0.019 (lines: 41)
|
||||
ugrep 28.811 +/- 0.127 (lines: 41)
|
||||
rg (ASCII) 2.035 +/- 0.014 (lines: 0)
|
||||
ag (ASCII) 1.093 +/- 0.006 (lines: 0)
|
||||
grep (ASCII) 1.085 +/- 0.015 (lines: 0)
|
||||
ugrep (ASCII)* 0.832 +/- 0.002 (lines: 0)*
|
@@ -17,16 +17,21 @@ if ! command -V cargo-deb > /dev/null 2>&1; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -V asciidoctor > /dev/null 2>&1; then
|
||||
echo "asciidoctor command missing" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 'cargo deb' does not seem to provide a way to specify an asset that is
|
||||
# created at build time, such as ripgrep's man page. To work around this,
|
||||
# we force a debug build, copy out the man page (and shell completions)
|
||||
# produced from that build, put it into a predictable location and then build
|
||||
# the deb, which knows where to look.
|
||||
cargo build
|
||||
|
||||
DEPLOY_DIR=deployment/deb
|
||||
OUT_DIR="$("$D"/cargo-out-dir target/debug/)"
|
||||
mkdir -p "$DEPLOY_DIR"
|
||||
cargo build
|
||||
|
||||
# Copy man page and shell completions.
|
||||
cp "$OUT_DIR"/{rg.1,rg.bash,rg.fish} "$DEPLOY_DIR/"
|
||||
@@ -34,4 +39,4 @@ cp complete/_rg "$DEPLOY_DIR/"
|
||||
|
||||
# Since we're distributing the dpkg, we don't know whether the user will have
|
||||
# PCRE2 installed, so just do a static build.
|
||||
PCRE2_SYS_STATIC=1 cargo deb
|
||||
PCRE2_SYS_STATIC=1 cargo deb --target x86_64-unknown-linux-musl
|
||||
|
@@ -44,8 +44,8 @@ main() {
|
||||
# Occasionally we may have to handle some manually, however
|
||||
help_args=( ${(f)"$(
|
||||
$rg --help |
|
||||
$rg -i -- '^\s+--?[a-z0-9]|--[a-z]' |
|
||||
$rg -ior '$1' -- $'[\t /\"\'`.,](-[a-z0-9]|--[a-z0-9-]+)\\b' |
|
||||
$rg -i -- '^\s+--?[a-z0-9.]|--[a-z]' |
|
||||
$rg -ior '$1' -- $'[\t /\"\'`.,](-[a-z0-9.]|--[a-z0-9-]+)(,|\\b)' |
|
||||
$rg -v -- --print0 | # False positives
|
||||
sort -u
|
||||
)"} )
|
||||
|
@@ -1,6 +1,16 @@
|
||||
#!/bin/sh
|
||||
|
||||
# This script gets run in weird environments that have been stripped of just
|
||||
# about every inessential thing. In order to keep this script versatile, we
|
||||
# just install 'sudo' and use it like normal if it doesn't exist. If it doesn't
|
||||
# exist, we assume we're root. (Otherwise we ain't doing much of anything
|
||||
# anyway.)
|
||||
if ! command -V sudo; then
|
||||
apt-get update
|
||||
apt-get install -y --no-install-recommends sudo
|
||||
fi
|
||||
sudo apt-get update
|
||||
sudo apt-get install -y --no-install-recommends \
|
||||
asciidoctor \
|
||||
zsh xz-utils liblz4-tool musl-tools
|
||||
zsh xz-utils liblz4-tool musl-tools \
|
||||
brotli zstd
|
||||
|
@@ -121,7 +121,7 @@ _rg() {
|
||||
"(pretty-vimgrep)--no-heading[don't show matches grouped by file name]"
|
||||
|
||||
+ '(hidden)' # Hidden-file options
|
||||
'--hidden[search hidden files and directories]'
|
||||
{-.,--hidden}'[search hidden files and directories]'
|
||||
$no"--no-hidden[don't search hidden files and directories]"
|
||||
|
||||
+ '(hybrid)' # hybrid regex options
|
||||
@@ -303,6 +303,8 @@ _rg() {
|
||||
'--context-separator=[specify string used to separate non-continuous context lines in output]:separator'
|
||||
$no"--no-context-separator[don't print context separators]"
|
||||
'--debug[show debug messages]'
|
||||
'--field-context-separator[set string to delimit fields in context lines]'
|
||||
'--field-match-separator[set string to delimit fields in matching lines]'
|
||||
'--trace[show more verbose debug messages]'
|
||||
'--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size (bytes)'
|
||||
"(1 stats)--files[show each file that would be searched (but don't search)]"
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-cli"
|
||||
version = "0.1.5" #:version
|
||||
version = "0.1.7" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Utilities for search oriented command line applications.
|
||||
@@ -10,12 +10,13 @@ homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli"
|
||||
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "cli", "utility", "util"]
|
||||
license = "Unlicense/MIT"
|
||||
license = "Unlicense OR MIT"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
atty = "0.2.11"
|
||||
bstr = "0.2.0"
|
||||
globset = { version = "0.4.5", path = "../globset" }
|
||||
bstr = "1.1.0"
|
||||
globset = { version = "0.4.10", path = "../globset" }
|
||||
lazy_static = "1.1.0"
|
||||
log = "0.4.5"
|
||||
regex = "1.1"
|
||||
|
@@ -29,9 +29,3 @@ Add this to your `Cargo.toml`:
|
||||
[dependencies]
|
||||
grep-cli = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_cli;
|
||||
```
|
||||
|
@@ -1,12 +1,12 @@
|
||||
use std::ffi::{OsStr, OsString};
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::Command;
|
||||
|
||||
use globset::{Glob, GlobSet, GlobSetBuilder};
|
||||
|
||||
use process::{CommandError, CommandReader, CommandReaderBuilder};
|
||||
use crate::process::{CommandError, CommandReader, CommandReaderBuilder};
|
||||
|
||||
/// A builder for a matcher that determines which files get decompressed.
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -24,7 +24,7 @@ struct DecompressionCommand {
|
||||
/// The glob that matches this command.
|
||||
glob: String,
|
||||
/// The command or binary name.
|
||||
bin: OsString,
|
||||
bin: PathBuf,
|
||||
/// The arguments to invoke with the command.
|
||||
args: Vec<OsString>,
|
||||
}
|
||||
@@ -83,23 +83,60 @@ impl DecompressionMatcherBuilder {
|
||||
///
|
||||
/// The syntax for the glob is documented in the
|
||||
/// [`globset` crate](https://docs.rs/globset/#syntax).
|
||||
///
|
||||
/// The `program` given is resolved with respect to `PATH` and turned
|
||||
/// into an absolute path internally before being executed by the current
|
||||
/// platform. Notably, on Windows, this avoids a security problem where
|
||||
/// passing a relative path to `CreateProcess` will automatically search
|
||||
/// the current directory for a matching program. If the program could
|
||||
/// not be resolved, then it is silently ignored and the association is
|
||||
/// dropped. For this reason, callers should prefer `try_associate`.
|
||||
pub fn associate<P, I, A>(
|
||||
&mut self,
|
||||
glob: &str,
|
||||
program: P,
|
||||
args: I,
|
||||
) -> &mut DecompressionMatcherBuilder
|
||||
where
|
||||
P: AsRef<OsStr>,
|
||||
I: IntoIterator<Item = A>,
|
||||
A: AsRef<OsStr>,
|
||||
{
|
||||
let _ = self.try_associate(glob, program, args);
|
||||
self
|
||||
}
|
||||
|
||||
/// Associates a glob with a command to decompress files matching the glob.
|
||||
///
|
||||
/// If multiple globs match the same file, then the most recently added
|
||||
/// glob takes precedence.
|
||||
///
|
||||
/// The syntax for the glob is documented in the
|
||||
/// [`globset` crate](https://docs.rs/globset/#syntax).
|
||||
///
|
||||
/// The `program` given is resolved with respect to `PATH` and turned
|
||||
/// into an absolute path internally before being executed by the current
|
||||
/// platform. Notably, on Windows, this avoids a security problem where
|
||||
/// passing a relative path to `CreateProcess` will automatically search
|
||||
/// the current directory for a matching program. If the program could not
|
||||
/// be resolved, then an error is returned.
|
||||
pub fn try_associate<P, I, A>(
|
||||
&mut self,
|
||||
glob: &str,
|
||||
program: P,
|
||||
args: I,
|
||||
) -> Result<&mut DecompressionMatcherBuilder, CommandError>
|
||||
where
|
||||
P: AsRef<OsStr>,
|
||||
I: IntoIterator<Item = A>,
|
||||
A: AsRef<OsStr>,
|
||||
{
|
||||
let glob = glob.to_string();
|
||||
let bin = program.as_ref().to_os_string();
|
||||
let bin = resolve_binary(Path::new(program.as_ref()))?;
|
||||
let args =
|
||||
args.into_iter().map(|a| a.as_ref().to_os_string()).collect();
|
||||
self.commands.push(DecompressionCommand { glob, bin, args });
|
||||
self
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -193,7 +230,7 @@ impl DecompressionReaderBuilder {
|
||||
match self.command_builder.build(&mut cmd) {
|
||||
Ok(cmd_reader) => Ok(DecompressionReader { rdr: Ok(cmd_reader) }),
|
||||
Err(err) => {
|
||||
debug!(
|
||||
log::debug!(
|
||||
"{}: error spawning command '{:?}': {} \
|
||||
(falling back to uncompressed reader)",
|
||||
path.display(),
|
||||
@@ -329,6 +366,30 @@ impl DecompressionReader {
|
||||
let file = File::open(path)?;
|
||||
Ok(DecompressionReader { rdr: Err(file) })
|
||||
}
|
||||
|
||||
/// Closes this reader, freeing any resources used by its underlying child
|
||||
/// process, if one was used. If the child process exits with a nonzero
|
||||
/// exit code, the returned Err value will include its stderr.
|
||||
///
|
||||
/// `close` is idempotent, meaning it can be safely called multiple times.
|
||||
/// The first call closes the CommandReader and any subsequent calls do
|
||||
/// nothing.
|
||||
///
|
||||
/// This method should be called after partially reading a file to prevent
|
||||
/// resource leakage. However there is no need to call `close` explicitly
|
||||
/// if your code always calls `read` to EOF, as `read` takes care of
|
||||
/// calling `close` in this case.
|
||||
///
|
||||
/// `close` is also called in `drop` as a last line of defense against
|
||||
/// resource leakage. Any error from the child process is then printed as a
|
||||
/// warning to stderr. This can be avoided by explicitly calling `close`
|
||||
/// before the CommandReader is dropped.
|
||||
pub fn close(&mut self) -> io::Result<()> {
|
||||
match self.rdr {
|
||||
Ok(ref mut rdr) => rdr.close(),
|
||||
Err(_) => Ok(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Read for DecompressionReader {
|
||||
@@ -340,6 +401,70 @@ impl io::Read for DecompressionReader {
|
||||
}
|
||||
}
|
||||
|
||||
/// Resolves a path to a program to a path by searching for the program in
|
||||
/// `PATH`.
|
||||
///
|
||||
/// If the program could not be resolved, then an error is returned.
|
||||
///
|
||||
/// The purpose of doing this instead of passing the path to the program
|
||||
/// directly to Command::new is that Command::new will hand relative paths
|
||||
/// to CreateProcess on Windows, which will implicitly search the current
|
||||
/// working directory for the executable. This could be undesirable for
|
||||
/// security reasons. e.g., running ripgrep with the -z/--search-zip flag on an
|
||||
/// untrusted directory tree could result in arbitrary programs executing on
|
||||
/// Windows.
|
||||
///
|
||||
/// Note that this could still return a relative path if PATH contains a
|
||||
/// relative path. We permit this since it is assumed that the user has set
|
||||
/// this explicitly, and thus, desires this behavior.
|
||||
///
|
||||
/// On non-Windows, this is a no-op.
|
||||
pub fn resolve_binary<P: AsRef<Path>>(
|
||||
prog: P,
|
||||
) -> Result<PathBuf, CommandError> {
|
||||
use std::env;
|
||||
|
||||
fn is_exe(path: &Path) -> bool {
|
||||
let md = match path.metadata() {
|
||||
Err(_) => return false,
|
||||
Ok(md) => md,
|
||||
};
|
||||
!md.is_dir()
|
||||
}
|
||||
|
||||
let prog = prog.as_ref();
|
||||
if !cfg!(windows) || prog.is_absolute() {
|
||||
return Ok(prog.to_path_buf());
|
||||
}
|
||||
let syspaths = match env::var_os("PATH") {
|
||||
Some(syspaths) => syspaths,
|
||||
None => {
|
||||
let msg = "system PATH environment variable not found";
|
||||
return Err(CommandError::io(io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
msg,
|
||||
)));
|
||||
}
|
||||
};
|
||||
for syspath in env::split_paths(&syspaths) {
|
||||
if syspath.as_os_str().is_empty() {
|
||||
continue;
|
||||
}
|
||||
let abs_prog = syspath.join(prog);
|
||||
if is_exe(&abs_prog) {
|
||||
return Ok(abs_prog.to_path_buf());
|
||||
}
|
||||
if abs_prog.extension().is_none() {
|
||||
let abs_prog = abs_prog.with_extension("exe");
|
||||
if is_exe(&abs_prog) {
|
||||
return Ok(abs_prog.to_path_buf());
|
||||
}
|
||||
}
|
||||
}
|
||||
let msg = format!("{}: could not find executable in PATH", prog.display());
|
||||
return Err(CommandError::io(io::Error::new(io::ErrorKind::Other, msg)));
|
||||
}
|
||||
|
||||
fn default_decompression_commands() -> Vec<DecompressionCommand> {
|
||||
const ARGS_GZIP: &[&str] = &["gzip", "-d", "-c"];
|
||||
const ARGS_BZIP: &[&str] = &["bzip2", "-d", "-c"];
|
||||
@@ -350,29 +475,36 @@ fn default_decompression_commands() -> Vec<DecompressionCommand> {
|
||||
const ARGS_ZSTD: &[&str] = &["zstd", "-q", "-d", "-c"];
|
||||
const ARGS_UNCOMPRESS: &[&str] = &["uncompress", "-c"];
|
||||
|
||||
fn cmd(glob: &str, args: &[&str]) -> DecompressionCommand {
|
||||
DecompressionCommand {
|
||||
fn add(glob: &str, args: &[&str], cmds: &mut Vec<DecompressionCommand>) {
|
||||
let bin = match resolve_binary(Path::new(args[0])) {
|
||||
Ok(bin) => bin,
|
||||
Err(err) => {
|
||||
log::debug!("{}", err);
|
||||
return;
|
||||
}
|
||||
};
|
||||
cmds.push(DecompressionCommand {
|
||||
glob: glob.to_string(),
|
||||
bin: OsStr::new(&args[0]).to_os_string(),
|
||||
bin,
|
||||
args: args
|
||||
.iter()
|
||||
.skip(1)
|
||||
.map(|s| OsStr::new(s).to_os_string())
|
||||
.collect(),
|
||||
}
|
||||
});
|
||||
}
|
||||
vec![
|
||||
cmd("*.gz", ARGS_GZIP),
|
||||
cmd("*.tgz", ARGS_GZIP),
|
||||
cmd("*.bz2", ARGS_BZIP),
|
||||
cmd("*.tbz2", ARGS_BZIP),
|
||||
cmd("*.xz", ARGS_XZ),
|
||||
cmd("*.txz", ARGS_XZ),
|
||||
cmd("*.lz4", ARGS_LZ4),
|
||||
cmd("*.lzma", ARGS_LZMA),
|
||||
cmd("*.br", ARGS_BROTLI),
|
||||
cmd("*.zst", ARGS_ZSTD),
|
||||
cmd("*.zstd", ARGS_ZSTD),
|
||||
cmd("*.Z", ARGS_UNCOMPRESS),
|
||||
]
|
||||
let mut cmds = vec![];
|
||||
add("*.gz", ARGS_GZIP, &mut cmds);
|
||||
add("*.tgz", ARGS_GZIP, &mut cmds);
|
||||
add("*.bz2", ARGS_BZIP, &mut cmds);
|
||||
add("*.tbz2", ARGS_BZIP, &mut cmds);
|
||||
add("*.xz", ARGS_XZ, &mut cmds);
|
||||
add("*.txz", ARGS_XZ, &mut cmds);
|
||||
add("*.lz4", ARGS_LZ4, &mut cmds);
|
||||
add("*.lzma", ARGS_LZMA, &mut cmds);
|
||||
add("*.br", ARGS_BROTLI, &mut cmds);
|
||||
add("*.zst", ARGS_ZSTD, &mut cmds);
|
||||
add("*.zstd", ARGS_ZSTD, &mut cmds);
|
||||
add("*.Z", ARGS_UNCOMPRESS, &mut cmds);
|
||||
cmds
|
||||
}
|
||||
|
@@ -8,7 +8,7 @@ use regex::Regex;
|
||||
/// An error that occurs when parsing a human readable size description.
|
||||
///
|
||||
/// This error provides an end user friendly message describing why the
|
||||
/// description coudln't be parsed and what the expected format is.
|
||||
/// description couldn't be parsed and what the expected format is.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct ParseSizeError {
|
||||
original: String,
|
||||
@@ -52,7 +52,7 @@ impl error::Error for ParseSizeError {
|
||||
}
|
||||
|
||||
impl fmt::Display for ParseSizeError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
use self::ParseSizeErrorKind::*;
|
||||
|
||||
match self.kind {
|
||||
@@ -88,7 +88,7 @@ impl From<ParseSizeError> for io::Error {
|
||||
///
|
||||
/// Additional suffixes may be added over time.
|
||||
pub fn parse_human_readable_size(size: &str) -> Result<u64, ParseSizeError> {
|
||||
lazy_static! {
|
||||
lazy_static::lazy_static! {
|
||||
// Normally I'd just parse something this simple by hand to avoid the
|
||||
// regex dep, but we bring regex in any way for glob matching, so might
|
||||
// as well use it.
|
||||
|
@@ -158,19 +158,6 @@ error message is crafted that typically tells the user how to fix the problem.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate atty;
|
||||
extern crate bstr;
|
||||
extern crate globset;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate regex;
|
||||
extern crate same_file;
|
||||
extern crate termcolor;
|
||||
#[cfg(windows)]
|
||||
extern crate winapi_util;
|
||||
|
||||
mod decompress;
|
||||
mod escape;
|
||||
mod human;
|
||||
@@ -178,18 +165,18 @@ mod pattern;
|
||||
mod process;
|
||||
mod wtr;
|
||||
|
||||
pub use decompress::{
|
||||
DecompressionMatcher, DecompressionMatcherBuilder, DecompressionReader,
|
||||
DecompressionReaderBuilder,
|
||||
pub use crate::decompress::{
|
||||
resolve_binary, DecompressionMatcher, DecompressionMatcherBuilder,
|
||||
DecompressionReader, DecompressionReaderBuilder,
|
||||
};
|
||||
pub use escape::{escape, escape_os, unescape, unescape_os};
|
||||
pub use human::{parse_human_readable_size, ParseSizeError};
|
||||
pub use pattern::{
|
||||
pub use crate::escape::{escape, escape_os, unescape, unescape_os};
|
||||
pub use crate::human::{parse_human_readable_size, ParseSizeError};
|
||||
pub use crate::pattern::{
|
||||
pattern_from_bytes, pattern_from_os, patterns_from_path,
|
||||
patterns_from_reader, patterns_from_stdin, InvalidPatternError,
|
||||
};
|
||||
pub use process::{CommandError, CommandReader, CommandReaderBuilder};
|
||||
pub use wtr::{
|
||||
pub use crate::process::{CommandError, CommandReader, CommandReaderBuilder};
|
||||
pub use crate::wtr::{
|
||||
stdout, stdout_buffered_block, stdout_buffered_line, StandardStream,
|
||||
};
|
||||
|
||||
@@ -225,13 +212,13 @@ pub fn is_readable_stdin() -> bool {
|
||||
!is_tty_stdin() && imp()
|
||||
}
|
||||
|
||||
/// Returns true if and only if stdin is believed to be connectted to a tty
|
||||
/// Returns true if and only if stdin is believed to be connected to a tty
|
||||
/// or a console.
|
||||
pub fn is_tty_stdin() -> bool {
|
||||
atty::is(atty::Stream::Stdin)
|
||||
}
|
||||
|
||||
/// Returns true if and only if stdout is believed to be connectted to a tty
|
||||
/// Returns true if and only if stdout is believed to be connected to a tty
|
||||
/// or a console.
|
||||
///
|
||||
/// This is useful for when you want your command line program to produce
|
||||
@@ -243,7 +230,7 @@ pub fn is_tty_stdout() -> bool {
|
||||
atty::is(atty::Stream::Stdout)
|
||||
}
|
||||
|
||||
/// Returns true if and only if stderr is believed to be connectted to a tty
|
||||
/// Returns true if and only if stderr is believed to be connected to a tty
|
||||
/// or a console.
|
||||
pub fn is_tty_stderr() -> bool {
|
||||
atty::is(atty::Stream::Stderr)
|
||||
|
@@ -8,7 +8,7 @@ use std::str;
|
||||
|
||||
use bstr::io::BufReadExt;
|
||||
|
||||
use escape::{escape, escape_os};
|
||||
use crate::escape::{escape, escape_os};
|
||||
|
||||
/// An error that occurs when a pattern could not be converted to valid UTF-8.
|
||||
///
|
||||
@@ -35,7 +35,7 @@ impl error::Error for InvalidPatternError {
|
||||
}
|
||||
|
||||
impl fmt::Display for InvalidPatternError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"found invalid UTF-8 in pattern at byte offset {}: {} \
|
||||
|
@@ -30,6 +30,14 @@ impl CommandError {
|
||||
pub(crate) fn stderr(bytes: Vec<u8>) -> CommandError {
|
||||
CommandError { kind: CommandErrorKind::Stderr(bytes) }
|
||||
}
|
||||
|
||||
/// Returns true if and only if this error has empty data from stderr.
|
||||
pub(crate) fn is_empty(&self) -> bool {
|
||||
match self.kind {
|
||||
CommandErrorKind::Stderr(ref bytes) => bytes.is_empty(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for CommandError {
|
||||
@@ -39,7 +47,7 @@ impl error::Error for CommandError {
|
||||
}
|
||||
|
||||
impl fmt::Display for CommandError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self.kind {
|
||||
CommandErrorKind::Io(ref e) => e.fmt(f),
|
||||
CommandErrorKind::Stderr(ref bytes) => {
|
||||
@@ -107,18 +115,12 @@ impl CommandReaderBuilder {
|
||||
.stdout(process::Stdio::piped())
|
||||
.stderr(process::Stdio::piped())
|
||||
.spawn()?;
|
||||
let stdout = child.stdout.take().unwrap();
|
||||
let stderr = if self.async_stderr {
|
||||
StderrReader::async(child.stderr.take().unwrap())
|
||||
StderrReader::r#async(child.stderr.take().unwrap())
|
||||
} else {
|
||||
StderrReader::sync(child.stderr.take().unwrap())
|
||||
};
|
||||
Ok(CommandReader {
|
||||
child: child,
|
||||
stdout: stdout,
|
||||
stderr: stderr,
|
||||
done: false,
|
||||
})
|
||||
Ok(CommandReader { child, stderr, eof: false })
|
||||
}
|
||||
|
||||
/// When enabled, the reader will asynchronously read the contents of the
|
||||
@@ -175,9 +177,11 @@ impl CommandReaderBuilder {
|
||||
#[derive(Debug)]
|
||||
pub struct CommandReader {
|
||||
child: process::Child,
|
||||
stdout: process::ChildStdout,
|
||||
stderr: StderrReader,
|
||||
done: bool,
|
||||
/// This is set to true once 'read' returns zero bytes. When this isn't
|
||||
/// set and we close the reader, then we anticipate a pipe error when
|
||||
/// reaping the child process and silence it.
|
||||
eof: bool,
|
||||
}
|
||||
|
||||
impl CommandReader {
|
||||
@@ -201,23 +205,73 @@ impl CommandReader {
|
||||
) -> Result<CommandReader, CommandError> {
|
||||
CommandReaderBuilder::new().build(cmd)
|
||||
}
|
||||
|
||||
/// Closes the CommandReader, freeing any resources used by its underlying
|
||||
/// child process. If the child process exits with a nonzero exit code, the
|
||||
/// returned Err value will include its stderr.
|
||||
///
|
||||
/// `close` is idempotent, meaning it can be safely called multiple times.
|
||||
/// The first call closes the CommandReader and any subsequent calls do
|
||||
/// nothing.
|
||||
///
|
||||
/// This method should be called after partially reading a file to prevent
|
||||
/// resource leakage. However there is no need to call `close` explicitly
|
||||
/// if your code always calls `read` to EOF, as `read` takes care of
|
||||
/// calling `close` in this case.
|
||||
///
|
||||
/// `close` is also called in `drop` as a last line of defense against
|
||||
/// resource leakage. Any error from the child process is then printed as a
|
||||
/// warning to stderr. This can be avoided by explicitly calling `close`
|
||||
/// before the CommandReader is dropped.
|
||||
pub fn close(&mut self) -> io::Result<()> {
|
||||
// Dropping stdout closes the underlying file descriptor, which should
|
||||
// cause a well-behaved child process to exit. If child.stdout is None
|
||||
// we assume that close() has already been called and do nothing.
|
||||
let stdout = match self.child.stdout.take() {
|
||||
None => return Ok(()),
|
||||
Some(stdout) => stdout,
|
||||
};
|
||||
drop(stdout);
|
||||
if self.child.wait()?.success() {
|
||||
Ok(())
|
||||
} else {
|
||||
let err = self.stderr.read_to_end();
|
||||
// In the specific case where we haven't consumed the full data
|
||||
// from the child process, then closing stdout above results in
|
||||
// a pipe signal being thrown in most cases. But I don't think
|
||||
// there is any reliable and portable way of detecting it. Instead,
|
||||
// if we know we haven't hit EOF (so we anticipate a broken pipe
|
||||
// error) and if stderr otherwise doesn't have anything on it, then
|
||||
// we assume total success.
|
||||
if !self.eof && err.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
Err(io::Error::from(err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for CommandReader {
|
||||
fn drop(&mut self) {
|
||||
if let Err(error) = self.close() {
|
||||
log::warn!("{}", error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Read for CommandReader {
|
||||
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
if self.done {
|
||||
return Ok(0);
|
||||
}
|
||||
let nread = self.stdout.read(buf)?;
|
||||
let stdout = match self.child.stdout {
|
||||
None => return Ok(0),
|
||||
Some(ref mut stdout) => stdout,
|
||||
};
|
||||
let nread = stdout.read(buf)?;
|
||||
if nread == 0 {
|
||||
self.done = true;
|
||||
// Reap the child now that we're done reading. If the command
|
||||
// failed, report stderr as an error.
|
||||
if !self.child.wait()?.success() {
|
||||
return Err(io::Error::from(self.stderr.read_to_end()));
|
||||
}
|
||||
self.eof = true;
|
||||
self.close().map(|_| 0)
|
||||
} else {
|
||||
Ok(nread)
|
||||
}
|
||||
Ok(nread)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -231,7 +285,7 @@ enum StderrReader {
|
||||
|
||||
impl StderrReader {
|
||||
/// Create a reader for stderr that reads contents asynchronously.
|
||||
fn async(mut stderr: process::ChildStderr) -> StderrReader {
|
||||
fn r#async(mut stderr: process::ChildStderr) -> StderrReader {
|
||||
let handle =
|
||||
thread::spawn(move || stderr_to_command_error(&mut stderr));
|
||||
StderrReader::Async(Some(handle))
|
||||
|
@@ -2,7 +2,7 @@ use std::io;
|
||||
|
||||
use termcolor;
|
||||
|
||||
use is_tty_stdout;
|
||||
use crate::is_tty_stdout;
|
||||
|
||||
/// A writer that supports coloring with either line or block buffering.
|
||||
pub struct StandardStream(StandardStreamKind);
|
||||
|
@@ -13,8 +13,8 @@ use clap::{self, crate_authors, crate_version, App, AppSettings};
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
const ABOUT: &str = "
|
||||
ripgrep (rg) recursively searches your current directory for a regex pattern.
|
||||
By default, ripgrep will respect your .gitignore and automatically skip hidden
|
||||
ripgrep (rg) recursively searches the current directory for a regex pattern.
|
||||
By default, ripgrep will respect gitignore rules and automatically skip hidden
|
||||
files/directories and binary files.
|
||||
|
||||
Use -h for short descriptions and --help for more details.
|
||||
@@ -568,6 +568,8 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
|
||||
flag_dfa_size_limit(&mut args);
|
||||
flag_encoding(&mut args);
|
||||
flag_engine(&mut args);
|
||||
flag_field_context_separator(&mut args);
|
||||
flag_field_match_separator(&mut args);
|
||||
flag_file(&mut args);
|
||||
flag_files(&mut args);
|
||||
flag_files_with_matches(&mut args);
|
||||
@@ -696,7 +698,7 @@ fn flag_after_context(args: &mut Vec<RGArg>) {
|
||||
"\
|
||||
Show NUM lines after each match.
|
||||
|
||||
This overrides the --context flag.
|
||||
This overrides the --context and --passthru flags.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::flag("after-context", "NUM")
|
||||
@@ -704,6 +706,7 @@ This overrides the --context flag.
|
||||
.help(SHORT)
|
||||
.long_help(LONG)
|
||||
.number()
|
||||
.overrides("passthru")
|
||||
.overrides("context");
|
||||
args.push(arg);
|
||||
}
|
||||
@@ -765,7 +768,7 @@ fn flag_before_context(args: &mut Vec<RGArg>) {
|
||||
"\
|
||||
Show NUM lines before each match.
|
||||
|
||||
This overrides the --context flag.
|
||||
This overrides the --context and --passthru flags.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::flag("before-context", "NUM")
|
||||
@@ -773,6 +776,7 @@ This overrides the --context flag.
|
||||
.help(SHORT)
|
||||
.long_help(LONG)
|
||||
.number()
|
||||
.overrides("passthru")
|
||||
.overrides("context");
|
||||
args.push(arg);
|
||||
}
|
||||
@@ -871,8 +875,8 @@ Print the 0-based byte offset within the input file before each line of output.
|
||||
If -o (--only-matching) is specified, print the offset of the matching part
|
||||
itself.
|
||||
|
||||
If ripgrep does transcoding, then the byte offset is in terms of the the result
|
||||
of transcoding and not the original data. This applies similarly to another
|
||||
If ripgrep does transcoding, then the byte offset is in terms of the result of
|
||||
transcoding and not the original data. This applies similarly to another
|
||||
transformation on the source, such as decompression or a --pre filter. Note
|
||||
that when the PCRE2 regex engine is used, then UTF-8 transcoding is done by
|
||||
default.
|
||||
@@ -966,7 +970,7 @@ or, equivalently,
|
||||
|
||||
rg --colors 'match:bg:0x0,0x80,0xFF'
|
||||
|
||||
Note that the the intense and nointense style flags will have no effect when
|
||||
Note that the intense and nointense style flags will have no effect when
|
||||
used alongside these extended color codes.
|
||||
"
|
||||
);
|
||||
@@ -1005,7 +1009,8 @@ fn flag_context(args: &mut Vec<RGArg>) {
|
||||
Show NUM lines before and after each match. This is equivalent to providing
|
||||
both the -B/--before-context and -A/--after-context flags with the same value.
|
||||
|
||||
This overrides both the -B/--before-context and -A/--after-context flags.
|
||||
This overrides both the -B/--before-context and -A/--after-context flags,
|
||||
in addition to the --passthru flag.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::flag("context", "NUM")
|
||||
@@ -1013,6 +1018,7 @@ This overrides both the -B/--before-context and -A/--after-context flags.
|
||||
.help(SHORT)
|
||||
.long_help(LONG)
|
||||
.number()
|
||||
.overrides("passthru")
|
||||
.overrides("before-context")
|
||||
.overrides("after-context");
|
||||
args.push(arg);
|
||||
@@ -1051,11 +1057,13 @@ fn flag_count(args: &mut Vec<RGArg>) {
|
||||
This flag suppresses normal output and shows the number of lines that match
|
||||
the given patterns for each file searched. Each file containing a match has its
|
||||
path and count printed on each line. Note that this reports the number of lines
|
||||
that match and not the total number of matches.
|
||||
that match and not the total number of matches, unless -U/--multiline is
|
||||
enabled. In multiline mode, --count is equivalent to --count-matches.
|
||||
|
||||
If only one file is given to ripgrep, then only the count is printed if there
|
||||
is a match. The --with-filename flag can be used to force printing the file
|
||||
path in this case.
|
||||
path in this case. If you need a count to be printed regardless of whether
|
||||
there is a match, then use --include-zero.
|
||||
|
||||
This overrides the --count-matches flag. Note that when --count is combined
|
||||
with --only-matching, then ripgrep behaves as if --count-matches was given.
|
||||
@@ -1209,7 +1217,7 @@ between supported regex engines depending on the features used in a pattern on
|
||||
a best effort basis.
|
||||
|
||||
Note that the 'pcre2' engine is an optional ripgrep feature. If PCRE2 wasn't
|
||||
including in your build of ripgrep, then using this flag will result in ripgrep
|
||||
included in your build of ripgrep, then using this flag will result in ripgrep
|
||||
printing an error message and exiting.
|
||||
|
||||
This overrides previous uses of --pcre2 and --auto-hybrid-regex flags.
|
||||
@@ -1227,6 +1235,38 @@ This overrides previous uses of --pcre2 and --auto-hybrid-regex flags.
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_field_context_separator(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Set the field context separator.";
|
||||
const LONG: &str = long!(
|
||||
"\
|
||||
Set the field context separator, which is used to delimit file paths, line
|
||||
numbers, columns and the context itself, when printing contextual lines. The
|
||||
separator may be any number of bytes, including zero. Escape sequences like
|
||||
\\x7F or \\t may be used. The '-' character is the default value.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::flag("field-context-separator", "SEPARATOR")
|
||||
.help(SHORT)
|
||||
.long_help(LONG);
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_field_match_separator(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Set the match separator.";
|
||||
const LONG: &str = long!(
|
||||
"\
|
||||
Set the field match separator, which is used to delimit file paths, line
|
||||
numbers, columns and the match itself. The separator may be any number of
|
||||
bytes, including zero. Escape sequences like \\x7F or \\t may be used. The ':'
|
||||
character is the default value.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::flag("field-match-separator", "SEPARATOR")
|
||||
.help(SHORT)
|
||||
.long_help(LONG);
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_file(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Search for patterns from the given file.";
|
||||
const LONG: &str = long!(
|
||||
@@ -1266,10 +1306,10 @@ This is useful to determine whether a particular file is being searched or not.
|
||||
}
|
||||
|
||||
fn flag_files_with_matches(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Only print the paths with at least one match.";
|
||||
const SHORT: &str = "Print the paths with at least one match.";
|
||||
const LONG: &str = long!(
|
||||
"\
|
||||
Only print the paths with at least one match.
|
||||
Print the paths with at least one match and suppress match contents.
|
||||
|
||||
This overrides --files-without-match.
|
||||
"
|
||||
@@ -1283,11 +1323,11 @@ This overrides --files-without-match.
|
||||
}
|
||||
|
||||
fn flag_files_without_match(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Only print the paths that contain zero matches.";
|
||||
const SHORT: &str = "Print the paths that contain zero matches.";
|
||||
const LONG: &str = long!(
|
||||
"\
|
||||
Only print the paths that contain zero matches. This inverts/negates the
|
||||
--files-with-matches flag.
|
||||
Print the paths that contain zero matches and suppress match contents. This
|
||||
inverts/negates the --files-with-matches flag.
|
||||
|
||||
This overrides --files-with-matches.
|
||||
"
|
||||
@@ -1354,6 +1394,13 @@ used. Globbing rules match .gitignore globs. Precede a glob with a ! to exclude
|
||||
it. If multiple globs match a file or directory, the glob given later in the
|
||||
command line takes precedence.
|
||||
|
||||
As an extension, globs support specifying alternatives: *-g ab{c,d}* is
|
||||
equivalent to *-g abc -g abd*. Empty alternatives like *-g ab{,c}* are not
|
||||
currently supported. Note that this syntax extension is also currently enabled
|
||||
in gitignore files, even though this syntax isn't supported by git itself.
|
||||
ripgrep may disable this syntax extension in gitignore files, but it will
|
||||
always remain available via the -g/--glob flag.
|
||||
|
||||
When this flag is set, every file and directory is applied to it to test for
|
||||
a match. So for example, if you only want to search in a particular directory
|
||||
'foo', then *-g foo* is incorrect because 'foo/bar' does not match the glob
|
||||
@@ -1433,10 +1480,15 @@ Search hidden files and directories. By default, hidden files and directories
|
||||
are skipped. Note that if a hidden file or a directory is whitelisted in an
|
||||
ignore file, then it will be searched even if this flag isn't provided.
|
||||
|
||||
A file or directory is considered hidden if its base name starts with a dot
|
||||
character ('.'). On operating systems which support a `hidden` file attribute,
|
||||
like Windows, files with this attribute are also considered hidden.
|
||||
|
||||
This flag can be disabled with --no-hidden.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::switch("hidden")
|
||||
.short(".")
|
||||
.help(SHORT)
|
||||
.long_help(LONG)
|
||||
.overrides("no-hidden");
|
||||
@@ -1496,7 +1548,7 @@ When specifying multiple ignore files, earlier files have lower precedence
|
||||
than later files.
|
||||
|
||||
If you are looking for a way to include or exclude files and directories
|
||||
directly on the command line, then used -g instead.
|
||||
directly on the command line, then use -g instead.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::flag("ignore-file", "PATH")
|
||||
@@ -1971,6 +2023,9 @@ fn flag_no_ignore_dot(args: &mut Vec<RGArg>) {
|
||||
"\
|
||||
Don't respect .ignore files.
|
||||
|
||||
This does *not* affect whether ripgrep will ignore files and directories
|
||||
whose names begin with a dot. For that, see the -./--hidden flag.
|
||||
|
||||
This flag can be disabled with the --ignore-dot flag.
|
||||
"
|
||||
);
|
||||
@@ -2341,12 +2396,17 @@ the empty string. For example, if you are searching using 'rg foo' then using
|
||||
'rg \"^|foo\"' instead will emit every line in every file searched, but only
|
||||
occurrences of 'foo' will be highlighted. This flag enables the same behavior
|
||||
without needing to modify the pattern.
|
||||
|
||||
This overrides the --context, --after-context and --before-context flags.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::switch("passthru")
|
||||
.help(SHORT)
|
||||
.long_help(LONG)
|
||||
.alias("passthrough");
|
||||
.alias("passthrough")
|
||||
.overrides("after-context")
|
||||
.overrides("before-context")
|
||||
.overrides("context");
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
@@ -2967,8 +3027,8 @@ fn flag_unrestricted(args: &mut Vec<RGArg>) {
|
||||
"\
|
||||
Reduce the level of \"smart\" searching. A single -u won't respect .gitignore
|
||||
(etc.) files (--no-ignore). Two -u flags will additionally search hidden files
|
||||
and directories (--hidden). Three -u flags will additionally search binary files
|
||||
(--binary).
|
||||
and directories (-./--hidden). Three -u flags will additionally search binary
|
||||
files (--binary).
|
||||
|
||||
'rg -uuu' is roughly equivalent to 'grep -r'.
|
||||
"
|
||||
|
@@ -31,7 +31,6 @@ use ignore::overrides::{Override, OverrideBuilder};
|
||||
use ignore::types::{FileTypeDef, Types, TypesBuilder};
|
||||
use ignore::{Walk, WalkBuilder, WalkParallel};
|
||||
use log;
|
||||
use num_cpus;
|
||||
use regex;
|
||||
use termcolor::{BufferWriter, ColorChoice, WriteColor};
|
||||
|
||||
@@ -97,14 +96,17 @@ pub struct Args(Arc<ArgsImp>);
|
||||
struct ArgsImp {
|
||||
/// Mid-to-low level routines for extracting CLI arguments.
|
||||
matches: ArgMatches,
|
||||
/// The patterns provided at the command line and/or via the -f/--file
|
||||
/// flag. This may be empty.
|
||||
patterns: Vec<String>,
|
||||
/// The command we want to execute.
|
||||
command: Command,
|
||||
/// The number of threads to use. This is based in part on available
|
||||
/// threads, in part on the number of threads requested and in part on the
|
||||
/// command we're running.
|
||||
threads: usize,
|
||||
/// A matcher built from the patterns.
|
||||
///
|
||||
/// It's important that this is only built once, since building this goes
|
||||
/// through regex compilation and various types of analyses. That is, if
|
||||
/// you need many of theses (one per thread, for example), it is better to
|
||||
/// you need many of these (one per thread, for example), it is better to
|
||||
/// build it once and then clone it.
|
||||
matcher: PatternMatcher,
|
||||
/// The paths provided at the command line. This is guaranteed to be
|
||||
@@ -165,12 +167,6 @@ impl Args {
|
||||
&self.0.matches
|
||||
}
|
||||
|
||||
/// Return the patterns found in the command line arguments. This includes
|
||||
/// patterns read via the -f/--file flags.
|
||||
fn patterns(&self) -> &[String] {
|
||||
&self.0.patterns
|
||||
}
|
||||
|
||||
/// Return the matcher builder from the patterns.
|
||||
fn matcher(&self) -> &PatternMatcher {
|
||||
&self.0.matcher
|
||||
@@ -186,7 +182,7 @@ impl Args {
|
||||
/// Returns true if and only if `paths` had to be populated with a default
|
||||
/// path, which occurs only when no paths were given as command line
|
||||
/// arguments.
|
||||
fn using_default_path(&self) -> bool {
|
||||
pub fn using_default_path(&self) -> bool {
|
||||
self.0.using_default_path
|
||||
}
|
||||
|
||||
@@ -197,7 +193,7 @@ impl Args {
|
||||
fn printer<W: WriteColor>(&self, wtr: W) -> Result<Printer<W>> {
|
||||
match self.matches().output_kind() {
|
||||
OutputKind::Standard => {
|
||||
let separator_search = self.command()? == Command::Search;
|
||||
let separator_search = self.command() == Command::Search;
|
||||
self.matches()
|
||||
.printer_standard(self.paths(), wtr, separator_search)
|
||||
.map(Printer::Standard)
|
||||
@@ -225,28 +221,8 @@ impl Args {
|
||||
}
|
||||
|
||||
/// Return the high-level command that ripgrep should run.
|
||||
pub fn command(&self) -> Result<Command> {
|
||||
let is_one_search = self.matches().is_one_search(self.paths());
|
||||
let threads = self.matches().threads()?;
|
||||
let one_thread = is_one_search || threads == 1;
|
||||
|
||||
Ok(if self.matches().is_present("pcre2-version") {
|
||||
Command::PCRE2Version
|
||||
} else if self.matches().is_present("type-list") {
|
||||
Command::Types
|
||||
} else if self.matches().is_present("files") {
|
||||
if one_thread {
|
||||
Command::Files
|
||||
} else {
|
||||
Command::FilesParallel
|
||||
}
|
||||
} else if self.matches().can_never_match(self.patterns()) {
|
||||
Command::SearchNever
|
||||
} else if one_thread {
|
||||
Command::Search
|
||||
} else {
|
||||
Command::SearchParallel
|
||||
})
|
||||
pub fn command(&self) -> Command {
|
||||
self.0.command
|
||||
}
|
||||
|
||||
/// Builder a path printer that can be used for printing just file paths,
|
||||
@@ -290,7 +266,7 @@ impl Args {
|
||||
let mut builder = SearchWorkerBuilder::new();
|
||||
builder
|
||||
.json_stats(matches.is_present("json"))
|
||||
.preprocessor(matches.preprocessor())
|
||||
.preprocessor(matches.preprocessor())?
|
||||
.preprocessor_globs(matches.preprocessor_globs()?)
|
||||
.search_zip(matches.is_present("search-zip"))
|
||||
.binary_detection_implicit(matches.binary_detection_implicit())
|
||||
@@ -304,7 +280,7 @@ impl Args {
|
||||
/// When this returns a `Stats` value, then it is guaranteed that the
|
||||
/// search worker will be configured to track statistics as well.
|
||||
pub fn stats(&self) -> Result<Option<Stats>> {
|
||||
Ok(if self.command()?.is_search() && self.matches().stats() {
|
||||
Ok(if self.command().is_search() && self.matches().stats() {
|
||||
Some(Stats::new())
|
||||
} else {
|
||||
None
|
||||
@@ -343,12 +319,18 @@ impl Args {
|
||||
|
||||
/// Return a walker that never uses additional threads.
|
||||
pub fn walker(&self) -> Result<Walk> {
|
||||
Ok(self.matches().walker_builder(self.paths())?.build())
|
||||
Ok(self
|
||||
.matches()
|
||||
.walker_builder(self.paths(), self.0.threads)?
|
||||
.build())
|
||||
}
|
||||
|
||||
/// Return a parallel walker that may use additional threads.
|
||||
pub fn walker_parallel(&self) -> Result<WalkParallel> {
|
||||
Ok(self.matches().walker_builder(self.paths())?.build_parallel())
|
||||
Ok(self
|
||||
.matches()
|
||||
.walker_builder(self.paths(), self.0.threads)?
|
||||
.build_parallel())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -557,9 +539,36 @@ impl ArgMatches {
|
||||
} else {
|
||||
false
|
||||
};
|
||||
// Now figure out the number of threads we'll use and which
|
||||
// command will run.
|
||||
let is_one_search = self.is_one_search(&paths);
|
||||
let threads = if is_one_search { 1 } else { self.threads()? };
|
||||
if threads == 1 {
|
||||
log::debug!("running in single threaded mode");
|
||||
} else {
|
||||
log::debug!("running with {threads} threads for parallelism");
|
||||
}
|
||||
let command = if self.is_present("pcre2-version") {
|
||||
Command::PCRE2Version
|
||||
} else if self.is_present("type-list") {
|
||||
Command::Types
|
||||
} else if self.is_present("files") {
|
||||
if threads == 1 {
|
||||
Command::Files
|
||||
} else {
|
||||
Command::FilesParallel
|
||||
}
|
||||
} else if self.can_never_match(&patterns) {
|
||||
Command::SearchNever
|
||||
} else if threads == 1 {
|
||||
Command::Search
|
||||
} else {
|
||||
Command::SearchParallel
|
||||
};
|
||||
Ok(Args(Arc::new(ArgsImp {
|
||||
matches: self,
|
||||
patterns,
|
||||
command,
|
||||
threads,
|
||||
matcher,
|
||||
paths,
|
||||
using_default_path,
|
||||
@@ -777,6 +786,7 @@ impl ArgMatches {
|
||||
.path(self.with_filename(paths))
|
||||
.only_matching(self.is_present("only-matching"))
|
||||
.per_match(self.is_present("vimgrep"))
|
||||
.per_match_one_line(true)
|
||||
.replacement(self.replacement())
|
||||
.max_columns(self.max_columns()?)
|
||||
.max_columns_preview(self.max_columns_preview())
|
||||
@@ -786,8 +796,8 @@ impl ArgMatches {
|
||||
.trim_ascii(self.is_present("trim"))
|
||||
.separator_search(None)
|
||||
.separator_context(self.context_separator())
|
||||
.separator_field_match(b":".to_vec())
|
||||
.separator_field_context(b"-".to_vec())
|
||||
.separator_field_match(self.field_match_separator())
|
||||
.separator_field_context(self.field_context_separator())
|
||||
.separator_path(self.path_separator()?)
|
||||
.path_terminator(self.path_terminator());
|
||||
if separator_search {
|
||||
@@ -857,7 +867,11 @@ impl ArgMatches {
|
||||
///
|
||||
/// If there was a problem parsing the CLI arguments necessary for
|
||||
/// constructing the builder, then this returns an error.
|
||||
fn walker_builder(&self, paths: &[PathBuf]) -> Result<WalkBuilder> {
|
||||
fn walker_builder(
|
||||
&self,
|
||||
paths: &[PathBuf],
|
||||
threads: usize,
|
||||
) -> Result<WalkBuilder> {
|
||||
let mut builder = WalkBuilder::new(&paths[0]);
|
||||
for path in &paths[1..] {
|
||||
builder.add(path);
|
||||
@@ -873,7 +887,7 @@ impl ArgMatches {
|
||||
.max_depth(self.usize_of("max-depth")?)
|
||||
.follow_links(self.is_present("follow"))
|
||||
.max_filesize(self.max_file_size()?)
|
||||
.threads(self.threads()?)
|
||||
.threads(threads)
|
||||
.same_file_system(self.is_present("one-file-system"))
|
||||
.skip_stdout(!self.is_present("files"))
|
||||
.overrides(self.overrides()?)
|
||||
@@ -1377,6 +1391,24 @@ impl ArgMatches {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the unescaped field context separator. If one wasn't specified,
|
||||
/// then '-' is used as the default.
|
||||
fn field_context_separator(&self) -> Vec<u8> {
|
||||
match self.value_of_os("field-context-separator") {
|
||||
None => b"-".to_vec(),
|
||||
Some(sep) => cli::unescape_os(&sep),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the unescaped field match separator. If one wasn't specified,
|
||||
/// then ':' is used as the default.
|
||||
fn field_match_separator(&self) -> Vec<u8> {
|
||||
match self.value_of_os("field-match-separator") {
|
||||
None => b":".to_vec(),
|
||||
Some(sep) => cli::unescape_os(&sep),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a sequence of all available patterns from the command line.
|
||||
/// This includes reading the -e/--regexp and -f/--file flags.
|
||||
///
|
||||
@@ -1573,7 +1605,9 @@ impl ArgMatches {
|
||||
return Ok(1);
|
||||
}
|
||||
let threads = self.usize_of("threads")?.unwrap_or(0);
|
||||
Ok(if threads == 0 { cmp::min(12, num_cpus::get()) } else { threads })
|
||||
let available =
|
||||
std::thread::available_parallelism().map_or(1, |n| n.get());
|
||||
Ok(if threads == 0 { cmp::min(12, available) } else { threads })
|
||||
}
|
||||
|
||||
/// Builds a file type matcher from the command line flags.
|
||||
@@ -1701,7 +1735,7 @@ impl ArgMatches {
|
||||
self.0.value_of_os(name)
|
||||
}
|
||||
|
||||
fn values_of_os(&self, name: &str) -> Option<clap::OsValues> {
|
||||
fn values_of_os(&self, name: &str) -> Option<clap::OsValues<'_>> {
|
||||
self.0.values_of_os(name)
|
||||
}
|
||||
}
|
||||
|
@@ -28,7 +28,10 @@ pub fn args() -> Vec<OsString> {
|
||||
let (args, errs) = match parse(&config_path) {
|
||||
Ok((args, errs)) => (args, errs),
|
||||
Err(err) => {
|
||||
message!("{}", err);
|
||||
message!(
|
||||
"failed to read the file specified in RIPGREP_CONFIG_PATH: {}",
|
||||
err
|
||||
);
|
||||
return vec![];
|
||||
}
|
||||
};
|
||||
@@ -77,7 +80,7 @@ fn parse<P: AsRef<Path>>(
|
||||
fn parse_reader<R: io::Read>(
|
||||
rdr: R,
|
||||
) -> Result<(Vec<OsString>, Vec<Box<dyn Error>>)> {
|
||||
let bufrdr = io::BufReader::new(rdr);
|
||||
let mut bufrdr = io::BufReader::new(rdr);
|
||||
let (mut args, mut errs) = (vec![], vec![]);
|
||||
let mut line_number = 0;
|
||||
bufrdr.for_byte_line_with_terminator(|line| {
|
||||
|
@@ -24,13 +24,13 @@ impl Logger {
|
||||
}
|
||||
|
||||
impl Log for Logger {
|
||||
fn enabled(&self, _: &log::Metadata) -> bool {
|
||||
fn enabled(&self, _: &log::Metadata<'_>) -> bool {
|
||||
// We set the log level via log::set_max_level, so we don't need to
|
||||
// implement filtering here.
|
||||
true
|
||||
}
|
||||
|
||||
fn log(&self, record: &log::Record) {
|
||||
fn log(&self, record: &log::Record<'_>) {
|
||||
match (record.file(), record.line()) {
|
||||
(Some(file), Some(line)) => {
|
||||
eprintln!(
|
||||
|
@@ -55,7 +55,7 @@ fn main() {
|
||||
fn try_main(args: Args) -> Result<()> {
|
||||
use args::Command::*;
|
||||
|
||||
let matched = match args.command()? {
|
||||
let matched = match args.command() {
|
||||
Search => search(&args),
|
||||
SearchParallel => search_parallel(&args),
|
||||
SearchNever => Ok(false),
|
||||
@@ -83,12 +83,14 @@ fn search(args: &Args) -> Result<bool> {
|
||||
let mut stats = args.stats()?;
|
||||
let mut searcher = args.search_worker(args.stdout())?;
|
||||
let mut matched = false;
|
||||
let mut searched = false;
|
||||
|
||||
for result in args.walker()? {
|
||||
let subject = match subject_builder.build_from_result(result) {
|
||||
Some(subject) => subject,
|
||||
None => continue,
|
||||
};
|
||||
searched = true;
|
||||
let search_result = match searcher.search(&subject) {
|
||||
Ok(search_result) => search_result,
|
||||
Err(err) => {
|
||||
@@ -108,6 +110,9 @@ fn search(args: &Args) -> Result<bool> {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if args.using_default_path() && !searched {
|
||||
eprint_nothing_searched();
|
||||
}
|
||||
if let Some(ref stats) = stats {
|
||||
let elapsed = Instant::now().duration_since(started_at);
|
||||
// We don't care if we couldn't print this successfully.
|
||||
@@ -129,11 +134,13 @@ fn search_parallel(args: &Args) -> Result<bool> {
|
||||
let bufwtr = args.buffer_writer()?;
|
||||
let stats = args.stats()?.map(Mutex::new);
|
||||
let matched = AtomicBool::new(false);
|
||||
let searched = AtomicBool::new(false);
|
||||
let mut searcher_err = None;
|
||||
args.walker_parallel()?.run(|| {
|
||||
let bufwtr = &bufwtr;
|
||||
let stats = &stats;
|
||||
let matched = &matched;
|
||||
let searched = &searched;
|
||||
let subject_builder = &subject_builder;
|
||||
let mut searcher = match args.search_worker(bufwtr.buffer()) {
|
||||
Ok(searcher) => searcher,
|
||||
@@ -148,6 +155,7 @@ fn search_parallel(args: &Args) -> Result<bool> {
|
||||
Some(subject) => subject,
|
||||
None => return WalkState::Continue,
|
||||
};
|
||||
searched.store(true, SeqCst);
|
||||
searcher.printer().get_mut().clear();
|
||||
let search_result = match searcher.search(&subject) {
|
||||
Ok(search_result) => search_result,
|
||||
@@ -181,6 +189,9 @@ fn search_parallel(args: &Args) -> Result<bool> {
|
||||
if let Some(err) = searcher_err.take() {
|
||||
return Err(err);
|
||||
}
|
||||
if args.using_default_path() && !searched.load(SeqCst) {
|
||||
eprint_nothing_searched();
|
||||
}
|
||||
if let Some(ref locked_stats) = stats {
|
||||
let elapsed = Instant::now().duration_since(started_at);
|
||||
let stats = locked_stats.lock().unwrap();
|
||||
@@ -191,6 +202,14 @@ fn search_parallel(args: &Args) -> Result<bool> {
|
||||
Ok(matched.load(SeqCst))
|
||||
}
|
||||
|
||||
fn eprint_nothing_searched() {
|
||||
err_message!(
|
||||
"No files were searched, which means ripgrep probably \
|
||||
applied a filter you didn't expect.\n\
|
||||
Running with --debug will show why files are being skipped."
|
||||
);
|
||||
}
|
||||
|
||||
/// The top-level entry point for listing files without searching them. This
|
||||
/// recursively steps through the file list (current directory by default) and
|
||||
/// prints each path sequentially using a single thread.
|
||||
|
@@ -115,9 +115,14 @@ impl SearchWorkerBuilder {
|
||||
pub fn preprocessor(
|
||||
&mut self,
|
||||
cmd: Option<PathBuf>,
|
||||
) -> &mut SearchWorkerBuilder {
|
||||
self.config.preprocessor = cmd;
|
||||
self
|
||||
) -> crate::Result<&mut SearchWorkerBuilder> {
|
||||
if let Some(ref prog) = cmd {
|
||||
let bin = cli::resolve_binary(prog)?;
|
||||
self.config.preprocessor = Some(bin);
|
||||
} else {
|
||||
self.config.preprocessor = None;
|
||||
}
|
||||
Ok(self)
|
||||
}
|
||||
|
||||
/// Set the globs for determining which files should be run through the
|
||||
@@ -325,11 +330,12 @@ impl<W: WriteColor> SearchWorker<W> {
|
||||
} else {
|
||||
self.config.binary_implicit.clone()
|
||||
};
|
||||
self.searcher.set_binary_detection(bin);
|
||||
|
||||
let path = subject.path();
|
||||
log::trace!("{}: binary detection: {:?}", path.display(), bin);
|
||||
|
||||
self.searcher.set_binary_detection(bin);
|
||||
if subject.is_stdin() {
|
||||
self.search_reader(path, io::stdin().lock())
|
||||
self.search_reader(path, &mut io::stdin().lock())
|
||||
} else if self.should_preprocess(path) {
|
||||
self.search_preprocessor(path)
|
||||
} else if self.should_decompress(path) {
|
||||
@@ -393,7 +399,7 @@ impl<W: WriteColor> SearchWorker<W> {
|
||||
let mut cmd = Command::new(bin);
|
||||
cmd.arg(path).stdin(Stdio::from(File::open(path)?));
|
||||
|
||||
let rdr = self.command_builder.build(&mut cmd).map_err(|err| {
|
||||
let mut rdr = self.command_builder.build(&mut cmd).map_err(|err| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!(
|
||||
@@ -402,20 +408,28 @@ impl<W: WriteColor> SearchWorker<W> {
|
||||
),
|
||||
)
|
||||
})?;
|
||||
self.search_reader(path, rdr).map_err(|err| {
|
||||
let result = self.search_reader(path, &mut rdr).map_err(|err| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("preprocessor command failed: '{:?}': {}", cmd, err),
|
||||
)
|
||||
})
|
||||
});
|
||||
let close_result = rdr.close();
|
||||
let search_result = result?;
|
||||
close_result?;
|
||||
Ok(search_result)
|
||||
}
|
||||
|
||||
/// Attempt to decompress the data at the given file path and search the
|
||||
/// result. If the given file path isn't recognized as a compressed file,
|
||||
/// then search it without doing any decompression.
|
||||
fn search_decompress(&mut self, path: &Path) -> io::Result<SearchResult> {
|
||||
let rdr = self.decomp_builder.build(path)?;
|
||||
self.search_reader(path, rdr)
|
||||
let mut rdr = self.decomp_builder.build(path)?;
|
||||
let result = self.search_reader(path, &mut rdr);
|
||||
let close_result = rdr.close();
|
||||
let search_result = result?;
|
||||
close_result?;
|
||||
Ok(search_result)
|
||||
}
|
||||
|
||||
/// Search the contents of the given file path.
|
||||
@@ -442,7 +456,7 @@ impl<W: WriteColor> SearchWorker<W> {
|
||||
fn search_reader<R: io::Read>(
|
||||
&mut self,
|
||||
path: &Path,
|
||||
rdr: R,
|
||||
rdr: &mut R,
|
||||
) -> io::Result<SearchResult> {
|
||||
use self::PatternMatcher::*;
|
||||
|
||||
@@ -498,12 +512,12 @@ fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
|
||||
searcher: &mut Searcher,
|
||||
printer: &mut Printer<W>,
|
||||
path: &Path,
|
||||
rdr: R,
|
||||
mut rdr: R,
|
||||
) -> io::Result<SearchResult> {
|
||||
match *printer {
|
||||
Printer::Standard(ref mut p) => {
|
||||
let mut sink = p.sink_with_path(&matcher, path);
|
||||
searcher.search_reader(&matcher, rdr, &mut sink)?;
|
||||
searcher.search_reader(&matcher, &mut rdr, &mut sink)?;
|
||||
Ok(SearchResult {
|
||||
has_match: sink.has_match(),
|
||||
stats: sink.stats().map(|s| s.clone()),
|
||||
@@ -511,7 +525,7 @@ fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
|
||||
}
|
||||
Printer::Summary(ref mut p) => {
|
||||
let mut sink = p.sink_with_path(&matcher, path);
|
||||
searcher.search_reader(&matcher, rdr, &mut sink)?;
|
||||
searcher.search_reader(&matcher, &mut rdr, &mut sink)?;
|
||||
Ok(SearchResult {
|
||||
has_match: sink.has_match(),
|
||||
stats: sink.stats().map(|s| s.clone()),
|
||||
@@ -519,7 +533,7 @@ fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
|
||||
}
|
||||
Printer::JSON(ref mut p) => {
|
||||
let mut sink = p.sink_with_path(&matcher, path);
|
||||
searcher.search_reader(&matcher, rdr, &mut sink)?;
|
||||
searcher.search_reader(&matcher, &mut rdr, &mut sink)?;
|
||||
Ok(SearchResult {
|
||||
has_match: sink.has_match(),
|
||||
stats: Some(sink.stats().clone()),
|
||||
|
@@ -67,7 +67,7 @@ impl SubjectBuilder {
|
||||
if subj.is_file() {
|
||||
return Some(subj);
|
||||
}
|
||||
// We got nothin. Emit a debug message, but only if this isn't a
|
||||
// We got nothing. Emit a debug message, but only if this isn't a
|
||||
// directory. Otherwise, emitting messages for directories is just
|
||||
// noisy.
|
||||
if !subj.is_dir() {
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "globset"
|
||||
version = "0.4.6" #:version
|
||||
version = "0.4.10" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Cross platform single glob and glob set matching. Glob set matching is the
|
||||
@@ -12,7 +12,8 @@ homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/globset"
|
||||
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/globset"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "glob", "multiple", "set", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
license = "Unlicense OR MIT"
|
||||
edition = "2018"
|
||||
|
||||
[lib]
|
||||
name = "globset"
|
||||
@@ -20,9 +21,9 @@ bench = false
|
||||
|
||||
[dependencies]
|
||||
aho-corasick = "0.7.3"
|
||||
bstr = { version = "0.2.0", default-features = false, features = ["std"] }
|
||||
bstr = { version = "1.1.0", default-features = false, features = ["std"] }
|
||||
fnv = "1.0.6"
|
||||
log = "0.4.5"
|
||||
log = { version = "0.4.5", optional = true }
|
||||
regex = { version = "1.1.5", default-features = false, features = ["perf", "std"] }
|
||||
serde = { version = "1.0.104", optional = true }
|
||||
|
||||
@@ -32,5 +33,6 @@ lazy_static = "1"
|
||||
serde_json = "1.0.45"
|
||||
|
||||
[features]
|
||||
default = ["log"]
|
||||
simd-accel = []
|
||||
serde1 = ["serde"]
|
||||
|
@@ -19,13 +19,7 @@ Add this to your `Cargo.toml`:
|
||||
|
||||
```toml
|
||||
[dependencies]
|
||||
globset = "0.3"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate globset;
|
||||
globset = "0.4"
|
||||
```
|
||||
|
||||
### Features
|
||||
@@ -84,12 +78,12 @@ assert_eq!(set.matches("src/bar/baz/foo.rs"), vec![0, 2]);
|
||||
|
||||
This crate implements globs by converting them to regular expressions, and
|
||||
executing them with the
|
||||
[`regex`](https://github.com/rust-lang-nursery/regex)
|
||||
[`regex`](https://github.com/rust-lang/regex)
|
||||
crate.
|
||||
|
||||
For single glob matching, performance of this crate should be roughly on par
|
||||
with the performance of the
|
||||
[`glob`](https://github.com/rust-lang-nursery/glob)
|
||||
[`glob`](https://github.com/rust-lang/glob)
|
||||
crate. (`*_regex` correspond to benchmarks for this library while `*_glob`
|
||||
correspond to benchmarks for the `glob` library.)
|
||||
Optimizations in the `regex` crate may propel this library past `glob`,
|
||||
@@ -114,7 +108,7 @@ test many_short_glob ... bench: 1,063 ns/iter (+/- 47)
|
||||
test many_short_regex_set ... bench: 186 ns/iter (+/- 11)
|
||||
```
|
||||
|
||||
### Comparison with the [`glob`](https://github.com/rust-lang-nursery/glob) crate
|
||||
### Comparison with the [`glob`](https://github.com/rust-lang/glob) crate
|
||||
|
||||
* Supports alternate "or" globs, e.g., `*.{foo,bar}`.
|
||||
* Can match non-UTF-8 file paths correctly.
|
||||
|
@@ -4,9 +4,6 @@ tool itself, see the benchsuite directory.
|
||||
*/
|
||||
#![feature(test)]
|
||||
|
||||
extern crate glob;
|
||||
extern crate globset;
|
||||
extern crate regex;
|
||||
extern crate test;
|
||||
|
||||
use globset::{Candidate, Glob, GlobMatcher, GlobSet, GlobSetBuilder};
|
||||
|
@@ -8,7 +8,7 @@ use std::str;
|
||||
use regex;
|
||||
use regex::bytes::Regex;
|
||||
|
||||
use {new_regex, Candidate, Error, ErrorKind};
|
||||
use crate::{new_regex, Candidate, Error, ErrorKind};
|
||||
|
||||
/// Describes a matching strategy for a particular pattern.
|
||||
///
|
||||
@@ -98,7 +98,7 @@ impl hash::Hash for Glob {
|
||||
}
|
||||
|
||||
impl fmt::Display for Glob {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
self.glob.fmt(f)
|
||||
}
|
||||
}
|
||||
@@ -127,7 +127,7 @@ impl GlobMatcher {
|
||||
}
|
||||
|
||||
/// Tests whether the given path matches this pattern or not.
|
||||
pub fn is_match_candidate(&self, path: &Candidate) -> bool {
|
||||
pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
|
||||
self.re.is_match(&path.path)
|
||||
}
|
||||
|
||||
@@ -143,8 +143,6 @@ impl GlobMatcher {
|
||||
struct GlobStrategic {
|
||||
/// The match strategy to use.
|
||||
strategy: MatchStrategy,
|
||||
/// The underlying pattern.
|
||||
pat: Glob,
|
||||
/// The pattern, as a compiled regex.
|
||||
re: Regex,
|
||||
}
|
||||
@@ -157,7 +155,7 @@ impl GlobStrategic {
|
||||
}
|
||||
|
||||
/// Tests whether the given path matches this pattern or not.
|
||||
fn is_match_candidate(&self, candidate: &Candidate) -> bool {
|
||||
fn is_match_candidate(&self, candidate: &Candidate<'_>) -> bool {
|
||||
let byte_path = &*candidate.path;
|
||||
|
||||
match self.strategy {
|
||||
@@ -273,7 +271,7 @@ impl Glob {
|
||||
let strategy = MatchStrategy::new(self);
|
||||
let re =
|
||||
new_regex(&self.re).expect("regex compilation shouldn't fail");
|
||||
GlobStrategic { strategy: strategy, pat: self.clone(), re: re }
|
||||
GlobStrategic { strategy: strategy, re: re }
|
||||
}
|
||||
|
||||
/// Returns the original glob pattern used to build this pattern.
|
||||
@@ -403,7 +401,7 @@ impl Glob {
|
||||
if self.opts.case_insensitive {
|
||||
return None;
|
||||
}
|
||||
let end = match self.tokens.last() {
|
||||
let (end, need_sep) = match self.tokens.last() {
|
||||
Some(&Token::ZeroOrMore) => {
|
||||
if self.opts.literal_separator {
|
||||
// If a trailing `*` can't match a `/`, then we can't
|
||||
@@ -414,9 +412,10 @@ impl Glob {
|
||||
// literal prefix.
|
||||
return None;
|
||||
}
|
||||
self.tokens.len() - 1
|
||||
(self.tokens.len() - 1, false)
|
||||
}
|
||||
_ => self.tokens.len(),
|
||||
Some(&Token::RecursiveSuffix) => (self.tokens.len() - 1, true),
|
||||
_ => (self.tokens.len(), false),
|
||||
};
|
||||
let mut lit = String::new();
|
||||
for t in &self.tokens[0..end] {
|
||||
@@ -425,6 +424,9 @@ impl Glob {
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
if need_sep {
|
||||
lit.push('/');
|
||||
}
|
||||
if lit.is_empty() {
|
||||
None
|
||||
} else {
|
||||
@@ -612,6 +614,8 @@ impl<'a> GlobBuilder<'a> {
|
||||
}
|
||||
|
||||
/// Toggle whether a literal `/` is required to match a path separator.
|
||||
///
|
||||
/// By default this is false: `*` and `?` will match `/`.
|
||||
pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
|
||||
self.opts.literal_separator = yes;
|
||||
self
|
||||
@@ -683,7 +687,7 @@ impl Tokens {
|
||||
re.push_str("(?:/?|.*/)");
|
||||
}
|
||||
Token::RecursiveSuffix => {
|
||||
re.push_str("(?:/?|/.*)");
|
||||
re.push_str("/.*");
|
||||
}
|
||||
Token::RecursiveZeroOrMore => {
|
||||
re.push_str("(?:/|/.*/)");
|
||||
@@ -1009,7 +1013,7 @@ fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
|
||||
mod tests {
|
||||
use super::Token::*;
|
||||
use super::{Glob, GlobBuilder, Token};
|
||||
use {ErrorKind, GlobSetBuilder};
|
||||
use crate::{ErrorKind, GlobSetBuilder};
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
struct Options {
|
||||
@@ -1222,9 +1226,9 @@ mod tests {
|
||||
toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
|
||||
toregex!(re17, "**/**/**", r"^.*$");
|
||||
toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
|
||||
toregex!(re19, "a/**", r"^a(?:/?|/.*)$");
|
||||
toregex!(re20, "a/**/**", r"^a(?:/?|/.*)$");
|
||||
toregex!(re21, "a/**/**/**", r"^a(?:/?|/.*)$");
|
||||
toregex!(re19, "a/**", r"^a/.*$");
|
||||
toregex!(re20, "a/**/**", r"^a/.*$");
|
||||
toregex!(re21, "a/**/**/**", r"^a/.*$");
|
||||
toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
|
||||
toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
|
||||
toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
|
||||
@@ -1270,11 +1274,12 @@ mod tests {
|
||||
matches!(matchrec18, "/**/test", "/test");
|
||||
matches!(matchrec19, "**/.*", ".abc");
|
||||
matches!(matchrec20, "**/.*", "abc/.abc");
|
||||
matches!(matchrec21, ".*/**", ".abc");
|
||||
matches!(matchrec21, "**/foo/bar", "foo/bar");
|
||||
matches!(matchrec22, ".*/**", ".abc/abc");
|
||||
matches!(matchrec23, "foo/**", "foo");
|
||||
matches!(matchrec24, "**/foo/bar", "foo/bar");
|
||||
matches!(matchrec25, "some/*/needle.txt", "some/one/needle.txt");
|
||||
matches!(matchrec23, "test/**", "test/");
|
||||
matches!(matchrec24, "test/**", "test/one");
|
||||
matches!(matchrec25, "test/**", "test/one/two");
|
||||
matches!(matchrec26, "some/*/needle.txt", "some/one/needle.txt");
|
||||
|
||||
matches!(matchrange1, "a[0-9]b", "a0b");
|
||||
matches!(matchrange2, "a[0-9]b", "a9b");
|
||||
@@ -1400,6 +1405,8 @@ mod tests {
|
||||
"some/one/two/three/needle.txt",
|
||||
SLASHLIT
|
||||
);
|
||||
nmatches!(matchrec33, ".*/**", ".abc");
|
||||
nmatches!(matchrec34, "foo/**", "foo");
|
||||
|
||||
macro_rules! extract {
|
||||
($which:ident, $name:ident, $pat:expr, $expect:expr) => {
|
||||
@@ -1504,7 +1511,7 @@ mod tests {
|
||||
prefix!(extract_prefix1, "/foo", Some(s("/foo")));
|
||||
prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
|
||||
prefix!(extract_prefix3, "**/foo", None);
|
||||
prefix!(extract_prefix4, "foo/**", None);
|
||||
prefix!(extract_prefix4, "foo/**", Some(s("foo/")));
|
||||
|
||||
suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true)));
|
||||
suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false)));
|
||||
|
@@ -103,16 +103,6 @@ or to enable case insensitive matching.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate aho_corasick;
|
||||
extern crate bstr;
|
||||
extern crate fnv;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate regex;
|
||||
|
||||
#[cfg(feature = "serde1")]
|
||||
extern crate serde;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::error::Error as StdError;
|
||||
@@ -125,9 +115,9 @@ use aho_corasick::AhoCorasick;
|
||||
use bstr::{ByteSlice, ByteVec, B};
|
||||
use regex::bytes::{Regex, RegexBuilder, RegexSet};
|
||||
|
||||
use glob::MatchStrategy;
|
||||
pub use glob::{Glob, GlobBuilder, GlobMatcher};
|
||||
use pathutil::{file_name, file_name_ext, normalize_path};
|
||||
use crate::glob::MatchStrategy;
|
||||
pub use crate::glob::{Glob, GlobBuilder, GlobMatcher};
|
||||
use crate::pathutil::{file_name, file_name_ext, normalize_path};
|
||||
|
||||
mod glob;
|
||||
mod pathutil;
|
||||
@@ -135,6 +125,16 @@ mod pathutil;
|
||||
#[cfg(feature = "serde1")]
|
||||
mod serde_impl;
|
||||
|
||||
#[cfg(feature = "log")]
|
||||
macro_rules! debug {
|
||||
($($token:tt)*) => (::log::debug!($($token)*);)
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "log"))]
|
||||
macro_rules! debug {
|
||||
($($token:tt)*) => {};
|
||||
}
|
||||
|
||||
/// Represents an error that can occur when parsing a glob pattern.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct Error {
|
||||
@@ -228,7 +228,7 @@ impl ErrorKind {
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self.glob {
|
||||
None => self.kind.fmt(f),
|
||||
Some(ref glob) => {
|
||||
@@ -239,7 +239,7 @@ impl fmt::Display for Error {
|
||||
}
|
||||
|
||||
impl fmt::Display for ErrorKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match *self {
|
||||
ErrorKind::InvalidRecursive
|
||||
| ErrorKind::UnclosedClass
|
||||
@@ -317,7 +317,7 @@ impl GlobSet {
|
||||
///
|
||||
/// This takes a Candidate as input, which can be used to amortize the
|
||||
/// cost of preparing a path for matching.
|
||||
pub fn is_match_candidate(&self, path: &Candidate) -> bool {
|
||||
pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
|
||||
if self.is_empty() {
|
||||
return false;
|
||||
}
|
||||
@@ -340,7 +340,7 @@ impl GlobSet {
|
||||
///
|
||||
/// This takes a Candidate as input, which can be used to amortize the
|
||||
/// cost of preparing a path for matching.
|
||||
pub fn matches_candidate(&self, path: &Candidate) -> Vec<usize> {
|
||||
pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec<usize> {
|
||||
let mut into = vec![];
|
||||
if self.is_empty() {
|
||||
return into;
|
||||
@@ -374,7 +374,7 @@ impl GlobSet {
|
||||
/// cost of preparing a path for matching.
|
||||
pub fn matches_candidate_into(
|
||||
&self,
|
||||
path: &Candidate,
|
||||
path: &Candidate<'_>,
|
||||
into: &mut Vec<usize>,
|
||||
) {
|
||||
into.clear();
|
||||
@@ -456,6 +456,13 @@ impl GlobSet {
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for GlobSet {
|
||||
/// Create a default empty GlobSet.
|
||||
fn default() -> Self {
|
||||
GlobSet::empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// GlobSetBuilder builds a group of patterns that can be used to
|
||||
/// simultaneously match a file path.
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -536,7 +543,7 @@ enum GlobSetMatchStrategy {
|
||||
}
|
||||
|
||||
impl GlobSetMatchStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
use self::GlobSetMatchStrategy::*;
|
||||
match *self {
|
||||
Literal(ref s) => s.is_match(candidate),
|
||||
@@ -549,7 +556,11 @@ impl GlobSetMatchStrategy {
|
||||
}
|
||||
}
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
use self::GlobSetMatchStrategy::*;
|
||||
match *self {
|
||||
Literal(ref s) => s.matches_into(candidate, matches),
|
||||
@@ -575,12 +586,16 @@ impl LiteralStrategy {
|
||||
self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
|
||||
}
|
||||
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
self.0.contains_key(candidate.path.as_bytes())
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
if let Some(hits) = self.0.get(candidate.path.as_bytes()) {
|
||||
matches.extend(hits);
|
||||
}
|
||||
@@ -599,7 +614,7 @@ impl BasenameLiteralStrategy {
|
||||
self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
|
||||
}
|
||||
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
if candidate.basename.is_empty() {
|
||||
return false;
|
||||
}
|
||||
@@ -607,7 +622,11 @@ impl BasenameLiteralStrategy {
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
if candidate.basename.is_empty() {
|
||||
return;
|
||||
}
|
||||
@@ -629,7 +648,7 @@ impl ExtensionStrategy {
|
||||
self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
|
||||
}
|
||||
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
if candidate.ext.is_empty() {
|
||||
return false;
|
||||
}
|
||||
@@ -637,7 +656,11 @@ impl ExtensionStrategy {
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
if candidate.ext.is_empty() {
|
||||
return;
|
||||
}
|
||||
@@ -655,7 +678,7 @@ struct PrefixStrategy {
|
||||
}
|
||||
|
||||
impl PrefixStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
let path = candidate.path_prefix(self.longest);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.start() == 0 {
|
||||
@@ -665,7 +688,11 @@ impl PrefixStrategy {
|
||||
false
|
||||
}
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
let path = candidate.path_prefix(self.longest);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.start() == 0 {
|
||||
@@ -683,7 +710,7 @@ struct SuffixStrategy {
|
||||
}
|
||||
|
||||
impl SuffixStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
let path = candidate.path_suffix(self.longest);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.end() == path.len() {
|
||||
@@ -693,7 +720,11 @@ impl SuffixStrategy {
|
||||
false
|
||||
}
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
let path = candidate.path_suffix(self.longest);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.end() == path.len() {
|
||||
@@ -707,7 +738,7 @@ impl SuffixStrategy {
|
||||
struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>);
|
||||
|
||||
impl RequiredExtensionStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
if candidate.ext.is_empty() {
|
||||
return false;
|
||||
}
|
||||
@@ -725,7 +756,11 @@ impl RequiredExtensionStrategy {
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
if candidate.ext.is_empty() {
|
||||
return;
|
||||
}
|
||||
@@ -746,11 +781,15 @@ struct RegexSetStrategy {
|
||||
}
|
||||
|
||||
impl RegexSetStrategy {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
self.matcher.is_match(candidate.path.as_bytes())
|
||||
}
|
||||
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
for i in self.matcher.matches(candidate.path.as_bytes()) {
|
||||
matches.push(self.map[i]);
|
||||
}
|
||||
@@ -833,8 +872,8 @@ impl RequiredExtensionStrategyBuilder {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::GlobSetBuilder;
|
||||
use glob::Glob;
|
||||
use super::{GlobSet, GlobSetBuilder};
|
||||
use crate::glob::Glob;
|
||||
|
||||
#[test]
|
||||
fn set_works() {
|
||||
@@ -863,4 +902,11 @@ mod tests {
|
||||
assert!(!set.is_match(""));
|
||||
assert!(!set.is_match("a"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_set_is_empty_works() {
|
||||
let set: GlobSet = Default::default();
|
||||
assert!(!set.is_match(""));
|
||||
assert!(!set.is_match("a"));
|
||||
}
|
||||
}
|
||||
|
@@ -60,7 +60,7 @@ pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
|
||||
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
|
||||
/// that recognize other characters as separators.
|
||||
#[cfg(unix)]
|
||||
pub fn normalize_path(path: Cow<[u8]>) -> Cow<[u8]> {
|
||||
pub fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
|
||||
// UNIX only uses /, so we're good.
|
||||
path
|
||||
}
|
||||
|
@@ -1,7 +1,7 @@
|
||||
use serde::de::Error;
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
use Glob;
|
||||
use crate::Glob;
|
||||
|
||||
impl Serialize for Glob {
|
||||
fn serialize<S: Serializer>(
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep"
|
||||
version = "0.2.7" #:version
|
||||
version = "0.2.11" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Fast line oriented regex searching as a library.
|
||||
@@ -10,15 +10,16 @@ homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/grep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/grep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
license = "Unlicense OR MIT"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
grep-cli = { version = "0.1.5", path = "../cli" }
|
||||
grep-matcher = { version = "0.1.4", path = "../matcher" }
|
||||
grep-pcre2 = { version = "0.1.4", path = "../pcre2", optional = true }
|
||||
grep-printer = { version = "0.1.5", path = "../printer" }
|
||||
grep-regex = { version = "0.1.8", path = "../regex" }
|
||||
grep-searcher = { version = "0.1.7", path = "../searcher" }
|
||||
grep-cli = { version = "0.1.7", path = "../cli" }
|
||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||
grep-pcre2 = { version = "0.1.6", path = "../pcre2", optional = true }
|
||||
grep-printer = { version = "0.1.7", path = "../printer" }
|
||||
grep-regex = { version = "0.1.11", path = "../regex" }
|
||||
grep-searcher = { version = "0.1.11", path = "../searcher" }
|
||||
|
||||
[dev-dependencies]
|
||||
termcolor = "1.0.4"
|
||||
|
@@ -26,12 +26,6 @@ Add this to your `Cargo.toml`:
|
||||
grep = "0.2"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep;
|
||||
```
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
|
@@ -1,7 +1,3 @@
|
||||
extern crate grep;
|
||||
extern crate termcolor;
|
||||
extern crate walkdir;
|
||||
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::ffi::OsString;
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "ignore"
|
||||
version = "0.4.17" #:version
|
||||
version = "0.4.19" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
A fast library for efficiently matching ignore files such as `.gitignore`
|
||||
@@ -11,15 +11,15 @@ homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/ignore"
|
||||
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/ignore"
|
||||
readme = "README.md"
|
||||
keywords = ["glob", "ignore", "gitignore", "pattern", "file"]
|
||||
license = "Unlicense/MIT"
|
||||
license = "Unlicense OR MIT"
|
||||
edition = "2018"
|
||||
|
||||
[lib]
|
||||
name = "ignore"
|
||||
bench = false
|
||||
|
||||
[dependencies]
|
||||
crossbeam-utils = "0.8.0"
|
||||
globset = { version = "0.4.5", path = "../globset" }
|
||||
globset = { version = "0.4.10", path = "../globset" }
|
||||
lazy_static = "1.1"
|
||||
log = "0.4.5"
|
||||
memchr = "2.1"
|
||||
|
@@ -22,12 +22,6 @@ Add this to your `Cargo.toml`:
|
||||
ignore = "0.4"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate ignore;
|
||||
```
|
||||
|
||||
### Example
|
||||
|
||||
This example shows the most basic usage of this crate. This code will
|
||||
|
@@ -1,7 +1,3 @@
|
||||
extern crate crossbeam_channel as channel;
|
||||
extern crate ignore;
|
||||
extern crate walkdir;
|
||||
|
||||
use std::env;
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
@@ -14,7 +10,7 @@ fn main() {
|
||||
let mut path = env::args().nth(1).unwrap();
|
||||
let mut parallel = false;
|
||||
let mut simple = false;
|
||||
let (tx, rx) = channel::bounded::<DirEntry>(100);
|
||||
let (tx, rx) = crossbeam_channel::bounded::<DirEntry>(100);
|
||||
if path == "parallel" {
|
||||
path = env::args().nth(2).unwrap();
|
||||
parallel = true;
|
||||
|
@@ -4,7 +4,7 @@
|
||||
/// types to each invocation of ripgrep with the '--type-add' flag.
|
||||
///
|
||||
/// If you would like to add or improve this list, please file a PR:
|
||||
/// https://github.com/BurntSushi/ripgrep
|
||||
/// <https://github.com/BurntSushi/ripgrep>.
|
||||
///
|
||||
/// Please try to keep this list sorted lexicographically and wrapped to 79
|
||||
/// columns (inclusive).
|
||||
@@ -16,18 +16,24 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]),
|
||||
("asm", &["*.asm", "*.s", "*.S"]),
|
||||
("asp", &[
|
||||
"*.aspx", "*.aspx.cs", "*.aspx.cs", "*.ascx", "*.ascx.cs", "*.ascx.vb",
|
||||
"*.aspx", "*.aspx.cs", "*.aspx.vb", "*.ascx", "*.ascx.cs",
|
||||
"*.ascx.vb", "*.asp"
|
||||
]),
|
||||
("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]),
|
||||
("avro", &["*.avdl", "*.avpr", "*.avsc"]),
|
||||
("awk", &["*.awk"]),
|
||||
("bazel", &["*.bazel", "*.bzl", "BUILD", "WORKSPACE"]),
|
||||
("bazel", &[
|
||||
"*.bazel", "*.bzl", "*.BUILD", "*.bazelrc", "BUILD", "MODULE.bazel",
|
||||
"WORKSPACE", "WORKSPACE.bazel",
|
||||
]),
|
||||
("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
|
||||
("brotli", &["*.br"]),
|
||||
("buildstream", &["*.bst"]),
|
||||
("bzip2", &["*.bz2", "*.tbz2"]),
|
||||
("c", &["*.[chH]", "*.[chH].in", "*.cats"]),
|
||||
("cabal", &["*.cabal"]),
|
||||
("candid", &["*.did"]),
|
||||
("carp", &["*.carp"]),
|
||||
("cbor", &["*.cbor"]),
|
||||
("ceylon", &["*.ceylon"]),
|
||||
("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
|
||||
@@ -40,18 +46,21 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
"*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in",
|
||||
]),
|
||||
("creole", &["*.creole"]),
|
||||
("crystal", &["Projectfile", "*.cr"]),
|
||||
("crystal", &["Projectfile", "*.cr", "*.ecr", "shard.yml"]),
|
||||
("cs", &["*.cs"]),
|
||||
("csharp", &["*.cs"]),
|
||||
("cshtml", &["*.cshtml"]),
|
||||
("css", &["*.css", "*.scss"]),
|
||||
("csv", &["*.csv"]),
|
||||
("cuda", &["*.cu", "*.cuh"]),
|
||||
("cython", &["*.pyx", "*.pxi", "*.pxd"]),
|
||||
("d", &["*.d"]),
|
||||
("dart", &["*.dart"]),
|
||||
("devicetree", &["*.dts", "*.dtsi"]),
|
||||
("dhall", &["*.dhall"]),
|
||||
("diff", &["*.patch", "*.diff"]),
|
||||
("docker", &["*Dockerfile*"]),
|
||||
("dts", &["*.dts", "*.dtsi"]),
|
||||
("dvc", &["Dvcfile", "*.dvc"]),
|
||||
("ebuild", &["*.ebuild"]),
|
||||
("edn", &["*.edn"]),
|
||||
@@ -60,6 +69,7 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
("elm", &["*.elm"]),
|
||||
("erb", &["*.erb"]),
|
||||
("erlang", &["*.erl", "*.hrl"]),
|
||||
("fennel", &["*.fnl"]),
|
||||
("fidl", &["*.fidl"]),
|
||||
("fish", &["*.fish"]),
|
||||
("flatbuffers", &["*.fbs"]),
|
||||
@@ -68,24 +78,27 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
"*.f90", "*.F90", "*.f95", "*.F95",
|
||||
]),
|
||||
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
|
||||
("fut", &[".fut"]),
|
||||
("fut", &["*.fut"]),
|
||||
("gap", &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]),
|
||||
("gn", &["*.gn", "*.gni"]),
|
||||
("go", &["*.go"]),
|
||||
("gradle", &["*.gradle"]),
|
||||
("groovy", &["*.groovy", "*.gradle"]),
|
||||
("gzip", &["*.gz", "*.tgz"]),
|
||||
("h", &["*.h", "*.hpp"]),
|
||||
("h", &["*.h", "*.hh", "*.hpp"]),
|
||||
("haml", &["*.haml"]),
|
||||
("hare", &["*.ha"]),
|
||||
("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]),
|
||||
("hbs", &["*.hbs"]),
|
||||
("hs", &["*.hs", "*.lhs"]),
|
||||
("html", &["*.htm", "*.html", "*.ejs"]),
|
||||
("hy", &["*.hy"]),
|
||||
("idris", &["*.idr", "*.lidr"]),
|
||||
("janet", &["*.janet"]),
|
||||
("java", &["*.java", "*.jsp", "*.jspx", "*.properties"]),
|
||||
("jinja", &["*.j2", "*.jinja", "*.jinja2"]),
|
||||
("jl", &["*.jl"]),
|
||||
("js", &["*.js", "*.jsx", "*.vue"]),
|
||||
("js", &["*.js", "*.jsx", "*.vue", "*.cjs", "*.mjs"]),
|
||||
("json", &["*.json", "composer.lock"]),
|
||||
("jsonl", &["*.jsonl"]),
|
||||
("julia", &["*.jl"]),
|
||||
@@ -120,6 +133,7 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
"MPL-*[0-9]*",
|
||||
"OFL-*[0-9]*",
|
||||
]),
|
||||
("lilypond", &["*.ly", "*.ily"]),
|
||||
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
||||
("lock", &["*.lock", "package-lock.json"]),
|
||||
("log", &["*.log"]),
|
||||
@@ -135,13 +149,15 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
]),
|
||||
("mako", &["*.mako", "*.mao"]),
|
||||
("man", &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]),
|
||||
("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
|
||||
("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkd", "*.mkdn"]),
|
||||
("matlab", &["*.m"]),
|
||||
("md", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
|
||||
("md", &["*.markdown", "*.md", "*.mdown", "*.mkd", "*.mkdn"]),
|
||||
("meson", &["meson.build", "meson_options.txt"]),
|
||||
("minified", &["*.min.html", "*.min.css", "*.min.js"]),
|
||||
("mint", &["*.mint"]),
|
||||
("mk", &["mkfile"]),
|
||||
("ml", &["*.ml"]),
|
||||
("motoko", &["*.mo"]),
|
||||
("msbuild", &[
|
||||
"*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets",
|
||||
]),
|
||||
@@ -151,15 +167,22 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
("objcpp", &["*.h", "*.mm"]),
|
||||
("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
|
||||
("org", &["*.org", "*.org_archive"]),
|
||||
("pants", &["BUILD"]),
|
||||
("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]),
|
||||
("pdf", &["*.pdf"]),
|
||||
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]),
|
||||
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
|
||||
("php", &[
|
||||
// note that PHP 6 doesn't exist
|
||||
// See: https://wiki.php.net/rfc/php6
|
||||
"*.php", "*.php3", "*.php4", "*.php5", "*.php7", "*.php8",
|
||||
"*.pht", "*.phtml"
|
||||
]),
|
||||
("po", &["*.po"]),
|
||||
("pod", &["*.pod"]),
|
||||
("postscript", &["*.eps", "*.ps"]),
|
||||
("protobuf", &["*.proto"]),
|
||||
("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]),
|
||||
("puppet", &["*.erb", "*.pp", "*.rb"]),
|
||||
("puppet", &["*.epp", "*.erb", "*.pp", "*.rb"]),
|
||||
("purs", &["*.purs"]),
|
||||
("py", &["*.py"]),
|
||||
("qmake", &["*.pro", "*.pri", "*.prf"]),
|
||||
@@ -168,9 +191,17 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
("racket", &["*.rkt"]),
|
||||
("rdoc", &["*.rdoc"]),
|
||||
("readme", &["README*", "*README"]),
|
||||
("reasonml", &["*.re", "*.rei"]),
|
||||
("red", &["*.r", "*.red", "*.reds"]),
|
||||
("rescript", &["*.res", "*.resi"]),
|
||||
("robot", &["*.robot"]),
|
||||
("rst", &["*.rst"]),
|
||||
("ruby", &["Gemfile", "*.gemspec", ".irbrc", "Rakefile", "*.rb"]),
|
||||
("ruby", &[
|
||||
// Idiomatic files
|
||||
"config.ru", "Gemfile", ".irbrc", "Rakefile",
|
||||
// Extensions
|
||||
"*.gemspec", "*.rb", "*.rbw"
|
||||
]),
|
||||
("rust", &["*.rs"]),
|
||||
("sass", &["*.sass", "*.scss"]),
|
||||
("scala", &["*.scala", "*.sbt"]),
|
||||
@@ -200,6 +231,7 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
("slim", &["*.skim", "*.slim", "*.slime"]),
|
||||
("smarty", &["*.tpl"]),
|
||||
("sml", &["*.sml", "*.sig"]),
|
||||
("solidity", &["*.sol"]),
|
||||
("soy", &["*.soy"]),
|
||||
("spark", &["*.spark"]),
|
||||
("spec", &["*.spec"]),
|
||||
@@ -217,11 +249,12 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
("taskpaper", &["*.taskpaper"]),
|
||||
("tcl", &["*.tcl"]),
|
||||
("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]),
|
||||
("texinfo", &["*.texi"]),
|
||||
("textile", &["*.textile"]),
|
||||
("tf", &["*.tf"]),
|
||||
("thrift", &["*.thrift"]),
|
||||
("toml", &["*.toml", "Cargo.lock"]),
|
||||
("ts", &["*.ts", "*.tsx"]),
|
||||
("ts", &["*.ts", "*.tsx", "*.cts", "*.mts"]),
|
||||
("twig", &["*.twig"]),
|
||||
("txt", &["*.txt"]),
|
||||
("typoscript", &["*.typoscript", "*.ts"]),
|
||||
@@ -230,8 +263,12 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
("vcl", &["*.vcl"]),
|
||||
("verilog", &["*.v", "*.vh", "*.sv", "*.svh"]),
|
||||
("vhdl", &["*.vhd", "*.vhdl"]),
|
||||
("vim", &["*.vim"]),
|
||||
("vimscript", &["*.vim"]),
|
||||
("vim", &[
|
||||
"*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc",
|
||||
]),
|
||||
("vimscript", &[
|
||||
"*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc",
|
||||
]),
|
||||
("webidl", &["*.idl", "*.webidl", "*.widl"]),
|
||||
("wiki", &["*.mediawiki", "*.wiki"]),
|
||||
("xml", &[
|
||||
@@ -254,3 +291,26 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
]),
|
||||
("zstd", &["*.zst", "*.zstd"]),
|
||||
];
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DEFAULT_TYPES;
|
||||
|
||||
#[test]
|
||||
fn default_types_are_sorted() {
|
||||
let mut names = DEFAULT_TYPES.iter().map(|(name, _exts)| name);
|
||||
|
||||
let Some(mut previous_name) = names.next() else { return; };
|
||||
|
||||
for name in names {
|
||||
assert!(
|
||||
name > previous_name,
|
||||
r#""{}" should be sorted before "{}" in `DEFAULT_TYPES`"#,
|
||||
name,
|
||||
previous_name
|
||||
);
|
||||
|
||||
previous_name = name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -20,12 +20,12 @@ use std::io::{self, BufRead};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use overrides::{self, Override};
|
||||
use pathutil::{is_hidden, strip_prefix};
|
||||
use types::{self, Types};
|
||||
use walk::DirEntry;
|
||||
use {Error, Match, PartialErrorBuilder};
|
||||
use crate::gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use crate::overrides::{self, Override};
|
||||
use crate::pathutil::{is_hidden, strip_prefix};
|
||||
use crate::types::{self, Types};
|
||||
use crate::walk::DirEntry;
|
||||
use crate::{Error, Match, PartialErrorBuilder};
|
||||
|
||||
/// IgnoreMatch represents information about where a match came from when using
|
||||
/// the `Ignore` matcher.
|
||||
@@ -202,11 +202,12 @@ impl Ignore {
|
||||
errs.maybe_push(err);
|
||||
igtmp.is_absolute_parent = true;
|
||||
igtmp.absolute_base = Some(absolute_base.clone());
|
||||
igtmp.has_git = if self.0.opts.git_ignore {
|
||||
parent.join(".git").exists()
|
||||
} else {
|
||||
false
|
||||
};
|
||||
igtmp.has_git =
|
||||
if self.0.opts.require_git && self.0.opts.git_ignore {
|
||||
parent.join(".git").exists()
|
||||
} else {
|
||||
false
|
||||
};
|
||||
ig = Ignore(Arc::new(igtmp));
|
||||
compiled.insert(parent.as_os_str().to_os_string(), ig.clone());
|
||||
}
|
||||
@@ -231,7 +232,9 @@ impl Ignore {
|
||||
|
||||
/// Like add_child, but takes a full path and returns an IgnoreInner.
|
||||
fn add_child_path(&self, dir: &Path) -> (IgnoreInner, Option<Error>) {
|
||||
let git_type = if self.0.opts.git_ignore || self.0.opts.git_exclude {
|
||||
let git_type = if self.0.opts.require_git
|
||||
&& (self.0.opts.git_ignore || self.0.opts.git_exclude)
|
||||
{
|
||||
dir.join(".git").metadata().ok().map(|md| md.file_type())
|
||||
} else {
|
||||
None
|
||||
@@ -495,7 +498,7 @@ impl Ignore {
|
||||
}
|
||||
|
||||
/// Returns an iterator over parent ignore matchers, including this one.
|
||||
pub fn parents(&self) -> Parents {
|
||||
pub fn parents(&self) -> Parents<'_> {
|
||||
Parents(Some(self))
|
||||
}
|
||||
|
||||
@@ -581,7 +584,7 @@ impl IgnoreBuilder {
|
||||
.unwrap();
|
||||
let (gi, err) = builder.build_global();
|
||||
if let Some(err) = err {
|
||||
debug!("{}", err);
|
||||
log::debug!("{}", err);
|
||||
}
|
||||
gi
|
||||
};
|
||||
@@ -840,10 +843,10 @@ mod tests {
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use dir::IgnoreBuilder;
|
||||
use gitignore::Gitignore;
|
||||
use tests::TempDir;
|
||||
use Error;
|
||||
use crate::dir::IgnoreBuilder;
|
||||
use crate::gitignore::Gitignore;
|
||||
use crate::tests::TempDir;
|
||||
use crate::Error;
|
||||
|
||||
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
|
||||
let mut file = File::create(path).unwrap();
|
||||
|
@@ -19,8 +19,8 @@ use globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder};
|
||||
use regex::bytes::Regex;
|
||||
use thread_local::ThreadLocal;
|
||||
|
||||
use pathutil::{is_file_name, strip_prefix};
|
||||
use {Error, Match, PartialErrorBuilder};
|
||||
use crate::pathutil::{is_file_name, strip_prefix};
|
||||
use crate::{Error, Match, PartialErrorBuilder};
|
||||
|
||||
/// Glob represents a single glob in a gitignore file.
|
||||
///
|
||||
@@ -474,10 +474,13 @@ impl GitignoreBuilder {
|
||||
}
|
||||
// If it ends with a slash, then this should only match directories,
|
||||
// but the slash should otherwise not be used while globbing.
|
||||
if let Some((i, c)) = line.char_indices().rev().nth(0) {
|
||||
if c == '/' {
|
||||
glob.is_only_dir = true;
|
||||
line = &line[..i];
|
||||
if line.as_bytes().last() == Some(&b'/') {
|
||||
glob.is_only_dir = true;
|
||||
line = &line[..line.len() - 1];
|
||||
// If the slash was escaped, then remove the escape.
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/2236
|
||||
if line.as_bytes().last() == Some(&b'\\') {
|
||||
line = &line[..line.len() - 1];
|
||||
}
|
||||
}
|
||||
glob.actual = line.to_string();
|
||||
@@ -592,7 +595,7 @@ fn parse_excludes_file(data: &[u8]) -> Option<PathBuf> {
|
||||
// N.B. This is the lazy approach, and isn't technically correct, but
|
||||
// probably works in more circumstances. I guess we would ideally have
|
||||
// a full INI parser. Yuck.
|
||||
lazy_static! {
|
||||
lazy_static::lazy_static! {
|
||||
static ref RE: Regex =
|
||||
Regex::new(r"(?im)^\s*excludesfile\s*=\s*(.+)\s*$").unwrap();
|
||||
};
|
||||
|
@@ -46,25 +46,12 @@ See the documentation for `WalkBuilder` for many other options.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate globset;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate regex;
|
||||
extern crate same_file;
|
||||
extern crate thread_local;
|
||||
extern crate walkdir;
|
||||
#[cfg(windows)]
|
||||
extern crate winapi_util;
|
||||
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
pub use walk::{
|
||||
pub use crate::walk::{
|
||||
DirEntry, ParallelVisitor, ParallelVisitorBuilder, Walk, WalkBuilder,
|
||||
WalkParallel, WalkState,
|
||||
};
|
||||
@@ -334,7 +321,7 @@ impl error::Error for Error {
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match *self {
|
||||
Error::Partial(ref errs) => {
|
||||
let msgs: Vec<String> =
|
||||
|
@@ -6,8 +6,8 @@ line tools.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use {Error, Match};
|
||||
use crate::gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use crate::{Error, Match};
|
||||
|
||||
/// Glob represents a single glob in an override matcher.
|
||||
///
|
||||
|
@@ -1,7 +1,7 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
use walk::DirEntry;
|
||||
use crate::walk::DirEntry;
|
||||
|
||||
/// Returns true if and only if this entry is considered to be hidden.
|
||||
///
|
||||
|
@@ -93,9 +93,9 @@ use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
|
||||
use regex::Regex;
|
||||
use thread_local::ThreadLocal;
|
||||
|
||||
use default_types::DEFAULT_TYPES;
|
||||
use pathutil::file_name;
|
||||
use {Error, Match};
|
||||
use crate::default_types::DEFAULT_TYPES;
|
||||
use crate::pathutil::file_name;
|
||||
use crate::{Error, Match};
|
||||
|
||||
/// Glob represents a single glob in a set of file type definitions.
|
||||
///
|
||||
@@ -122,10 +122,6 @@ enum GlobInner<'a> {
|
||||
Matched {
|
||||
/// The file type definition which provided the glob.
|
||||
def: &'a FileTypeDef,
|
||||
/// The index of the glob that matched inside the file type definition.
|
||||
which: usize,
|
||||
/// Whether the selection was negated or not.
|
||||
negated: bool,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -291,13 +287,9 @@ impl Types {
|
||||
self.set.matches_into(name, &mut *matches);
|
||||
// The highest precedent match is the last one.
|
||||
if let Some(&i) = matches.last() {
|
||||
let (isel, iglob) = self.glob_to_selection[i];
|
||||
let (isel, _) = self.glob_to_selection[i];
|
||||
let sel = &self.selections[isel];
|
||||
let glob = Glob(GlobInner::Matched {
|
||||
def: sel.inner(),
|
||||
which: iglob,
|
||||
negated: sel.is_negated(),
|
||||
});
|
||||
let glob = Glob(GlobInner::Matched { def: sel.inner() });
|
||||
return if sel.is_negated() {
|
||||
Match::Ignore(glob)
|
||||
} else {
|
||||
@@ -427,7 +419,7 @@ impl TypesBuilder {
|
||||
/// If `name` is `all` or otherwise contains any character that is not a
|
||||
/// Unicode letter or number, then an error is returned.
|
||||
pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> {
|
||||
lazy_static! {
|
||||
lazy_static::lazy_static! {
|
||||
static ref RE: Regex = Regex::new(r"^[\pL\pN]+$").unwrap();
|
||||
};
|
||||
if name == "all" || !RE.is_match(name) {
|
||||
|
@@ -13,11 +13,11 @@ use std::vec;
|
||||
use same_file::Handle;
|
||||
use walkdir::{self, WalkDir};
|
||||
|
||||
use dir::{Ignore, IgnoreBuilder};
|
||||
use gitignore::GitignoreBuilder;
|
||||
use overrides::Override;
|
||||
use types::Types;
|
||||
use {Error, PartialErrorBuilder};
|
||||
use crate::dir::{Ignore, IgnoreBuilder};
|
||||
use crate::gitignore::GitignoreBuilder;
|
||||
use crate::overrides::Override;
|
||||
use crate::types::Types;
|
||||
use crate::{Error, PartialErrorBuilder};
|
||||
|
||||
/// A directory entry with a possible error attached.
|
||||
///
|
||||
@@ -252,7 +252,7 @@ struct DirEntryRaw {
|
||||
}
|
||||
|
||||
impl fmt::Debug for DirEntryRaw {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
// Leaving out FileType because it doesn't have a debug impl
|
||||
// in Rust 1.9. We could add it if we really wanted to by manually
|
||||
// querying each possibly file type. Meh. ---AG
|
||||
@@ -504,7 +504,7 @@ enum Sorter {
|
||||
struct Filter(Arc<dyn Fn(&DirEntry) -> bool + Send + Sync + 'static>);
|
||||
|
||||
impl fmt::Debug for WalkBuilder {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
f.debug_struct("WalkBuilder")
|
||||
.field("paths", &self.paths)
|
||||
.field("ig_builder", &self.ig_builder)
|
||||
@@ -934,15 +934,23 @@ impl Walk {
|
||||
if ent.depth() == 0 {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// We ensure that trivial skipping is done before any other potentially
|
||||
// expensive operations (stat, filesystem other) are done. This seems
|
||||
// like an obvious optimization but becomes critical when filesystem
|
||||
// operations even as simple as stat can result in significant
|
||||
// overheads; an example of this was a bespoke filesystem layer in
|
||||
// Windows that hosted files remotely and would download them on-demand
|
||||
// when particular filesystem operations occurred. Users of this system
|
||||
// who ensured correct file-type filters were being used could still
|
||||
// get unnecessary file access resulting in large downloads.
|
||||
if should_skip_entry(&self.ig, ent) {
|
||||
return Ok(true);
|
||||
}
|
||||
if let Some(ref stdout) = self.skip {
|
||||
if path_equals(ent, stdout)? {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
if should_skip_entry(&self.ig, ent) {
|
||||
return Ok(true);
|
||||
}
|
||||
if self.max_filesize.is_some() && !ent.is_dir() {
|
||||
return Ok(skip_filesize(
|
||||
self.max_filesize.unwrap(),
|
||||
@@ -1218,7 +1226,7 @@ impl WalkParallel {
|
||||
/// visitor runs on only one thread, this build-up can be done without
|
||||
/// synchronization. Then, once traversal is complete, all of the results
|
||||
/// can be merged together into a single data structure.
|
||||
pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder) {
|
||||
pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder<'_>) {
|
||||
let threads = self.threads();
|
||||
let stack = Arc::new(Mutex::new(vec![]));
|
||||
{
|
||||
@@ -1274,7 +1282,7 @@ impl WalkParallel {
|
||||
let quit_now = Arc::new(AtomicBool::new(false));
|
||||
let num_pending =
|
||||
Arc::new(AtomicUsize::new(stack.lock().unwrap().len()));
|
||||
crossbeam_utils::thread::scope(|s| {
|
||||
std::thread::scope(|s| {
|
||||
let mut handles = vec![];
|
||||
for _ in 0..threads {
|
||||
let worker = Worker {
|
||||
@@ -1288,13 +1296,12 @@ impl WalkParallel {
|
||||
skip: self.skip.clone(),
|
||||
filter: self.filter.clone(),
|
||||
};
|
||||
handles.push(s.spawn(|_| worker.run()));
|
||||
handles.push(s.spawn(|| worker.run()));
|
||||
}
|
||||
for handle in handles {
|
||||
handle.join().unwrap();
|
||||
}
|
||||
})
|
||||
.unwrap(); // Pass along panics from threads
|
||||
});
|
||||
}
|
||||
|
||||
fn threads(&self) -> usize {
|
||||
@@ -1549,6 +1556,11 @@ impl<'s> Worker<'s> {
|
||||
}
|
||||
}
|
||||
}
|
||||
// N.B. See analogous call in the single-threaded implementation about
|
||||
// why it's important for this to come before the checks below.
|
||||
if should_skip_entry(ig, &dent) {
|
||||
return WalkState::Continue;
|
||||
}
|
||||
if let Some(ref stdout) = self.skip {
|
||||
let is_stdout = match path_equals(&dent, stdout) {
|
||||
Ok(is_stdout) => is_stdout,
|
||||
@@ -1558,7 +1570,6 @@ impl<'s> Worker<'s> {
|
||||
return WalkState::Continue;
|
||||
}
|
||||
}
|
||||
let should_skip_path = should_skip_entry(ig, &dent);
|
||||
let should_skip_filesize =
|
||||
if self.max_filesize.is_some() && !dent.is_dir() {
|
||||
skip_filesize(
|
||||
@@ -1575,8 +1586,7 @@ impl<'s> Worker<'s> {
|
||||
} else {
|
||||
false
|
||||
};
|
||||
if !should_skip_path && !should_skip_filesize && !should_skip_filtered
|
||||
{
|
||||
if !should_skip_filesize && !should_skip_filtered {
|
||||
self.send(Work { dent, ignore: ig.clone(), root_device });
|
||||
}
|
||||
WalkState::Continue
|
||||
@@ -1714,7 +1724,7 @@ fn skip_filesize(
|
||||
|
||||
if let Some(fs) = filesize {
|
||||
if fs > max_filesize {
|
||||
debug!("ignoring {}: {} bytes", path.display(), fs);
|
||||
log::debug!("ignoring {}: {} bytes", path.display(), fs);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
@@ -1727,10 +1737,10 @@ fn skip_filesize(
|
||||
fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool {
|
||||
let m = ig.matched_dir_entry(dent);
|
||||
if m.is_ignore() {
|
||||
debug!("ignoring {}: {:?}", dent.path().display(), m);
|
||||
log::debug!("ignoring {}: {:?}", dent.path().display(), m);
|
||||
true
|
||||
} else if m.is_whitelist() {
|
||||
debug!("whitelisting {}: {:?}", dent.path().display(), m);
|
||||
log::debug!("whitelisting {}: {:?}", dent.path().display(), m);
|
||||
false
|
||||
} else {
|
||||
false
|
||||
@@ -1841,7 +1851,7 @@ mod tests {
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use super::{DirEntry, WalkBuilder, WalkState};
|
||||
use tests::TempDir;
|
||||
use crate::tests::TempDir;
|
||||
|
||||
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
|
||||
let mut file = File::create(path).unwrap();
|
||||
|
@@ -1,5 +1,3 @@
|
||||
extern crate ignore;
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use ignore::gitignore::{Gitignore, GitignoreBuilder};
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-matcher"
|
||||
version = "0.1.4" #:version
|
||||
version = "0.1.6" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
A trait for regular expressions, with a focus on line oriented search.
|
||||
@@ -10,8 +10,9 @@ homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/matcher"
|
||||
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/matcher"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "pattern", "trait"]
|
||||
license = "Unlicense/MIT"
|
||||
license = "Unlicense OR MIT"
|
||||
autotests = false
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
memchr = "2.1"
|
||||
|
@@ -27,9 +27,3 @@ Add this to your `Cargo.toml`:
|
||||
[dependencies]
|
||||
grep-matcher = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_matcher;
|
||||
```
|
||||
|
@@ -92,7 +92,7 @@ impl From<usize> for Ref<'static> {
|
||||
/// starting at the beginning of `replacement`.
|
||||
///
|
||||
/// If no such valid reference could be found, None is returned.
|
||||
fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
|
||||
fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
|
||||
let mut i = 0;
|
||||
if replacement.len() <= 1 || replacement[0] != b'$' {
|
||||
return None;
|
||||
|
@@ -38,14 +38,12 @@ implementations.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate memchr;
|
||||
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::ops;
|
||||
use std::u64;
|
||||
|
||||
use interpolate::interpolate;
|
||||
use crate::interpolate::interpolate;
|
||||
|
||||
mod interpolate;
|
||||
|
||||
@@ -118,7 +116,7 @@ impl Match {
|
||||
/// This method panics if `start > self.end`.
|
||||
#[inline]
|
||||
pub fn with_start(&self, start: usize) -> Match {
|
||||
assert!(start <= self.end);
|
||||
assert!(start <= self.end, "{} is not <= {}", start, self.end);
|
||||
Match { start, ..*self }
|
||||
}
|
||||
|
||||
@@ -130,7 +128,7 @@ impl Match {
|
||||
/// This method panics if `self.start > end`.
|
||||
#[inline]
|
||||
pub fn with_end(&self, end: usize) -> Match {
|
||||
assert!(self.start <= end);
|
||||
assert!(self.start <= end, "{} is not <= {}", self.start, end);
|
||||
Match { end, ..*self }
|
||||
}
|
||||
|
||||
@@ -304,7 +302,7 @@ pub struct ByteSet(BitSet);
|
||||
struct BitSet([u64; 4]);
|
||||
|
||||
impl fmt::Debug for BitSet {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let mut fmtd = f.debug_set();
|
||||
for b in (0..256).map(|b| b as u8) {
|
||||
if ByteSet(*self).contains(b) {
|
||||
@@ -494,7 +492,7 @@ impl ::std::error::Error for NoError {
|
||||
}
|
||||
|
||||
impl fmt::Display for NoError {
|
||||
fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
panic!("BUG for NoError: an impossible error occurred")
|
||||
}
|
||||
}
|
||||
@@ -618,12 +616,31 @@ pub trait Matcher {
|
||||
fn find_iter<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
matched: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(Match) -> bool,
|
||||
{
|
||||
self.find_iter_at(haystack, 0, matched)
|
||||
}
|
||||
|
||||
/// Executes the given function over successive non-overlapping matches
|
||||
/// in `haystack`. If no match exists, then the given function is never
|
||||
/// called. If the function returns `false`, then iteration stops.
|
||||
///
|
||||
/// The significance of the starting point is that it takes the surrounding
|
||||
/// context into consideration. For example, the `\A` anchor can only
|
||||
/// match when `at == 0`.
|
||||
fn find_iter_at<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
mut matched: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(Match) -> bool,
|
||||
{
|
||||
self.try_find_iter(haystack, |m| Ok(matched(m)))
|
||||
self.try_find_iter_at(haystack, at, |m| Ok(matched(m)))
|
||||
.map(|r: Result<(), ()>| r.unwrap())
|
||||
}
|
||||
|
||||
@@ -637,12 +654,35 @@ pub trait Matcher {
|
||||
fn try_find_iter<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
matched: F,
|
||||
) -> Result<Result<(), E>, Self::Error>
|
||||
where
|
||||
F: FnMut(Match) -> Result<bool, E>,
|
||||
{
|
||||
self.try_find_iter_at(haystack, 0, matched)
|
||||
}
|
||||
|
||||
/// Executes the given function over successive non-overlapping matches
|
||||
/// in `haystack`. If no match exists, then the given function is never
|
||||
/// called. If the function returns `false`, then iteration stops.
|
||||
/// Similarly, if the function returns an error then iteration stops and
|
||||
/// the error is yielded. If an error occurs while executing the search,
|
||||
/// then it is converted to
|
||||
/// `E`.
|
||||
///
|
||||
/// The significance of the starting point is that it takes the surrounding
|
||||
/// context into consideration. For example, the `\A` anchor can only
|
||||
/// match when `at == 0`.
|
||||
fn try_find_iter_at<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
mut matched: F,
|
||||
) -> Result<Result<(), E>, Self::Error>
|
||||
where
|
||||
F: FnMut(Match) -> Result<bool, E>,
|
||||
{
|
||||
let mut last_end = 0;
|
||||
let mut last_end = at;
|
||||
let mut last_match = None;
|
||||
|
||||
loop {
|
||||
@@ -696,12 +736,33 @@ pub trait Matcher {
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
caps: &mut Self::Captures,
|
||||
matched: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures) -> bool,
|
||||
{
|
||||
self.captures_iter_at(haystack, 0, caps, matched)
|
||||
}
|
||||
|
||||
/// Executes the given function over successive non-overlapping matches
|
||||
/// in `haystack` with capture groups extracted from each match. If no
|
||||
/// match exists, then the given function is never called. If the function
|
||||
/// returns `false`, then iteration stops.
|
||||
///
|
||||
/// The significance of the starting point is that it takes the surrounding
|
||||
/// context into consideration. For example, the `\A` anchor can only
|
||||
/// match when `at == 0`.
|
||||
fn captures_iter_at<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut Self::Captures,
|
||||
mut matched: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures) -> bool,
|
||||
{
|
||||
self.try_captures_iter(haystack, caps, |caps| Ok(matched(caps)))
|
||||
self.try_captures_iter_at(haystack, at, caps, |caps| Ok(matched(caps)))
|
||||
.map(|r: Result<(), ()>| r.unwrap())
|
||||
}
|
||||
|
||||
@@ -716,12 +777,36 @@ pub trait Matcher {
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
caps: &mut Self::Captures,
|
||||
matched: F,
|
||||
) -> Result<Result<(), E>, Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures) -> Result<bool, E>,
|
||||
{
|
||||
self.try_captures_iter_at(haystack, 0, caps, matched)
|
||||
}
|
||||
|
||||
/// Executes the given function over successive non-overlapping matches
|
||||
/// in `haystack` with capture groups extracted from each match. If no
|
||||
/// match exists, then the given function is never called. If the function
|
||||
/// returns `false`, then iteration stops. Similarly, if the function
|
||||
/// returns an error then iteration stops and the error is yielded. If
|
||||
/// an error occurs while executing the search, then it is converted to
|
||||
/// `E`.
|
||||
///
|
||||
/// The significance of the starting point is that it takes the surrounding
|
||||
/// context into consideration. For example, the `\A` anchor can only
|
||||
/// match when `at == 0`.
|
||||
fn try_captures_iter_at<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut Self::Captures,
|
||||
mut matched: F,
|
||||
) -> Result<Result<(), E>, Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures) -> Result<bool, E>,
|
||||
{
|
||||
let mut last_end = 0;
|
||||
let mut last_end = at;
|
||||
let mut last_match = None;
|
||||
|
||||
loop {
|
||||
@@ -819,13 +904,35 @@ pub trait Matcher {
|
||||
haystack: &[u8],
|
||||
caps: &mut Self::Captures,
|
||||
dst: &mut Vec<u8>,
|
||||
append: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
|
||||
{
|
||||
self.replace_with_captures_at(haystack, 0, caps, dst, append)
|
||||
}
|
||||
|
||||
/// Replaces every match in the given haystack with the result of calling
|
||||
/// `append` with the matching capture groups.
|
||||
///
|
||||
/// If the given `append` function returns `false`, then replacement stops.
|
||||
///
|
||||
/// The significance of the starting point is that it takes the surrounding
|
||||
/// context into consideration. For example, the `\A` anchor can only
|
||||
/// match when `at == 0`.
|
||||
fn replace_with_captures_at<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut Self::Captures,
|
||||
dst: &mut Vec<u8>,
|
||||
mut append: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
|
||||
{
|
||||
let mut last_match = 0;
|
||||
self.captures_iter(haystack, caps, |caps| {
|
||||
let mut last_match = at;
|
||||
self.captures_iter_at(haystack, at, caps, |caps| {
|
||||
let m = caps.get(0).unwrap();
|
||||
dst.extend(&haystack[last_match..m.start]);
|
||||
last_match = m.end;
|
||||
@@ -1039,6 +1146,18 @@ impl<'a, M: Matcher> Matcher for &'a M {
|
||||
(*self).find_iter(haystack, matched)
|
||||
}
|
||||
|
||||
fn find_iter_at<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
matched: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(Match) -> bool,
|
||||
{
|
||||
(*self).find_iter_at(haystack, at, matched)
|
||||
}
|
||||
|
||||
fn try_find_iter<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
@@ -1050,6 +1169,18 @@ impl<'a, M: Matcher> Matcher for &'a M {
|
||||
(*self).try_find_iter(haystack, matched)
|
||||
}
|
||||
|
||||
fn try_find_iter_at<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
matched: F,
|
||||
) -> Result<Result<(), E>, Self::Error>
|
||||
where
|
||||
F: FnMut(Match) -> Result<bool, E>,
|
||||
{
|
||||
(*self).try_find_iter_at(haystack, at, matched)
|
||||
}
|
||||
|
||||
fn captures(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
@@ -1070,6 +1201,19 @@ impl<'a, M: Matcher> Matcher for &'a M {
|
||||
(*self).captures_iter(haystack, caps, matched)
|
||||
}
|
||||
|
||||
fn captures_iter_at<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut Self::Captures,
|
||||
matched: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures) -> bool,
|
||||
{
|
||||
(*self).captures_iter_at(haystack, at, caps, matched)
|
||||
}
|
||||
|
||||
fn try_captures_iter<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
@@ -1082,6 +1226,19 @@ impl<'a, M: Matcher> Matcher for &'a M {
|
||||
(*self).try_captures_iter(haystack, caps, matched)
|
||||
}
|
||||
|
||||
fn try_captures_iter_at<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut Self::Captures,
|
||||
matched: F,
|
||||
) -> Result<Result<(), E>, Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures) -> Result<bool, E>,
|
||||
{
|
||||
(*self).try_captures_iter_at(haystack, at, caps, matched)
|
||||
}
|
||||
|
||||
fn replace<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
@@ -1107,6 +1264,20 @@ impl<'a, M: Matcher> Matcher for &'a M {
|
||||
(*self).replace_with_captures(haystack, caps, dst, append)
|
||||
}
|
||||
|
||||
fn replace_with_captures_at<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut Self::Captures,
|
||||
dst: &mut Vec<u8>,
|
||||
append: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
|
||||
{
|
||||
(*self).replace_with_captures_at(haystack, at, caps, dst, append)
|
||||
}
|
||||
|
||||
fn is_match(&self, haystack: &[u8]) -> Result<bool, Self::Error> {
|
||||
(*self).is_match(haystack)
|
||||
}
|
||||
|
@@ -1,7 +1,7 @@
|
||||
use grep_matcher::{Captures, Match, Matcher};
|
||||
use regex::bytes::Regex;
|
||||
|
||||
use util::{RegexMatcher, RegexMatcherNoCaps};
|
||||
use crate::util::{RegexMatcher, RegexMatcherNoCaps};
|
||||
|
||||
fn matcher(pattern: &str) -> RegexMatcher {
|
||||
RegexMatcher::new(Regex::new(pattern).unwrap())
|
||||
|
@@ -1,6 +1,3 @@
|
||||
extern crate grep_matcher;
|
||||
extern crate regex;
|
||||
|
||||
mod util;
|
||||
|
||||
mod test_matcher;
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-pcre2"
|
||||
version = "0.1.4" #:version
|
||||
version = "0.1.6" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Use PCRE2 with the 'grep' crate.
|
||||
@@ -10,8 +10,9 @@ homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/pcre2"
|
||||
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/pcre2"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "pcre", "backreference", "look"]
|
||||
license = "Unlicense/MIT"
|
||||
license = "Unlicense OR MIT"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
grep-matcher = { version = "0.1.2", path = "../matcher" }
|
||||
pcre2 = "0.2.0"
|
||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||
pcre2 = "0.2.3"
|
||||
|
@@ -30,9 +30,3 @@ Add this to your `Cargo.toml`:
|
||||
[dependencies]
|
||||
grep-pcre2 = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_pcre2;
|
||||
```
|
||||
|
@@ -50,7 +50,7 @@ impl error::Error for Error {
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self.kind {
|
||||
ErrorKind::Regex(ref s) => write!(f, "{}", s),
|
||||
ErrorKind::__Nonexhaustive => unreachable!(),
|
||||
|
@@ -5,11 +5,8 @@ An implementation of `grep-matcher`'s `Matcher` trait for
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate grep_matcher;
|
||||
extern crate pcre2;
|
||||
|
||||
pub use error::{Error, ErrorKind};
|
||||
pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
||||
pub use crate::error::{Error, ErrorKind};
|
||||
pub use crate::matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
||||
pub use pcre2::{is_jit_available, version};
|
||||
|
||||
mod error;
|
||||
|
@@ -3,7 +3,7 @@ use std::collections::HashMap;
|
||||
use grep_matcher::{Captures, Match, Matcher};
|
||||
use pcre2::bytes::{CaptureLocations, Regex, RegexBuilder};
|
||||
|
||||
use error::Error;
|
||||
use crate::error::Error;
|
||||
|
||||
/// A builder for configuring the compilation of a PCRE2 regex.
|
||||
#[derive(Clone, Debug)]
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-printer"
|
||||
version = "0.1.5" #:version
|
||||
version = "0.1.7" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
An implementation of the grep crate's Sink trait that provides standard
|
||||
@@ -11,21 +11,21 @@ homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/printer"
|
||||
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/printer"
|
||||
readme = "README.md"
|
||||
keywords = ["grep", "pattern", "print", "printer", "sink"]
|
||||
license = "Unlicense/MIT"
|
||||
license = "Unlicense OR MIT"
|
||||
edition = "2018"
|
||||
|
||||
[features]
|
||||
default = ["serde1"]
|
||||
serde1 = ["base64", "serde", "serde_derive", "serde_json"]
|
||||
serde1 = ["base64", "serde", "serde_json"]
|
||||
|
||||
[dependencies]
|
||||
base64 = { version = "0.13.0", optional = true }
|
||||
bstr = "0.2.0"
|
||||
grep-matcher = { version = "0.1.2", path = "../matcher" }
|
||||
grep-searcher = { version = "0.1.4", path = "../searcher" }
|
||||
base64 = { version = "0.20.0", optional = true }
|
||||
bstr = "1.1.0"
|
||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||
grep-searcher = { version = "0.1.11", path = "../searcher" }
|
||||
termcolor = "1.0.4"
|
||||
serde = { version = "1.0.77", optional = true }
|
||||
serde_derive = { version = "1.0.77", optional = true }
|
||||
serde = { version = "1.0.77", optional = true, features = ["derive"] }
|
||||
serde_json = { version = "1.0.27", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
grep-regex = { version = "0.1.3", path = "../regex" }
|
||||
grep-regex = { version = "0.1.11", path = "../regex" }
|
||||
|
@@ -26,9 +26,3 @@ Add this to your `Cargo.toml`:
|
||||
[dependencies]
|
||||
grep-printer = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_printer;
|
||||
```
|
||||
|
@@ -60,7 +60,7 @@ impl ColorError {
|
||||
}
|
||||
|
||||
impl fmt::Display for ColorError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match *self {
|
||||
ColorError::UnrecognizedOutType(ref name) => write!(
|
||||
f,
|
||||
@@ -147,9 +147,6 @@ pub struct ColorSpecs {
|
||||
/// A `UserColorSpec` can also be converted to a `termcolor::ColorSpec`:
|
||||
///
|
||||
/// ```rust
|
||||
/// extern crate grep_printer;
|
||||
/// extern crate termcolor;
|
||||
///
|
||||
/// # fn main() {
|
||||
/// use termcolor::{Color, ColorSpec};
|
||||
/// use grep_printer::UserColorSpec;
|
||||
|
@@ -4,14 +4,14 @@ use std::time::Instant;
|
||||
|
||||
use grep_matcher::{Match, Matcher};
|
||||
use grep_searcher::{
|
||||
Searcher, Sink, SinkContext, SinkContextKind, SinkError, SinkFinish,
|
||||
SinkMatch,
|
||||
Searcher, Sink, SinkContext, SinkContextKind, SinkFinish, SinkMatch,
|
||||
};
|
||||
use serde_json as json;
|
||||
|
||||
use counter::CounterWriter;
|
||||
use jsont;
|
||||
use stats::Stats;
|
||||
use crate::counter::CounterWriter;
|
||||
use crate::jsont;
|
||||
use crate::stats::Stats;
|
||||
use crate::util::find_iter_at_in_context;
|
||||
|
||||
/// The configuration for the JSON printer.
|
||||
///
|
||||
@@ -147,7 +147,7 @@ impl JSONBuilder {
|
||||
/// is not limited to UTF-8 exclusively, which in turn implies that matches
|
||||
/// may be reported that contain invalid UTF-8. Moreover, this printer may
|
||||
/// also print file paths, and the encoding of file paths is itself not
|
||||
/// guarnateed to be valid UTF-8. Therefore, this printer must deal with the
|
||||
/// guaranteed to be valid UTF-8. Therefore, this printer must deal with the
|
||||
/// presence of invalid UTF-8 somehow. The printer could silently ignore such
|
||||
/// things completely, or even lossily transcode invalid UTF-8 to valid UTF-8
|
||||
/// by replacing all invalid sequences with the Unicode replacement character.
|
||||
@@ -507,7 +507,10 @@ impl<W: io::Write> JSON<W> {
|
||||
|
||||
/// Write the given message followed by a new line. The new line is
|
||||
/// determined from the configuration of the given searcher.
|
||||
fn write_message(&mut self, message: &jsont::Message) -> io::Result<()> {
|
||||
fn write_message(
|
||||
&mut self,
|
||||
message: &jsont::Message<'_>,
|
||||
) -> io::Result<()> {
|
||||
if self.config.pretty {
|
||||
json::to_writer_pretty(&mut self.wtr, message)?;
|
||||
} else {
|
||||
@@ -552,7 +555,7 @@ impl<W> JSON<W> {
|
||||
/// * `W` refers to the underlying writer that this printer is writing its
|
||||
/// output to.
|
||||
#[derive(Debug)]
|
||||
pub struct JSONSink<'p, 's, M: Matcher, W: 's> {
|
||||
pub struct JSONSink<'p, 's, M: Matcher, W> {
|
||||
matcher: M,
|
||||
json: &'s mut JSON<W>,
|
||||
path: Option<&'p Path>,
|
||||
@@ -603,7 +606,12 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
|
||||
|
||||
/// Execute the matcher over the given bytes and record the match
|
||||
/// locations if the current configuration demands match granularity.
|
||||
fn record_matches(&mut self, bytes: &[u8]) -> io::Result<()> {
|
||||
fn record_matches(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
bytes: &[u8],
|
||||
range: std::ops::Range<usize>,
|
||||
) -> io::Result<()> {
|
||||
self.json.matches.clear();
|
||||
// If printing requires knowing the location of each individual match,
|
||||
// then compute and stored those right now for use later. While this
|
||||
@@ -612,12 +620,17 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
|
||||
// the extent that it's easy to ensure that we never do more than
|
||||
// one search to find the matches.
|
||||
let matches = &mut self.json.matches;
|
||||
self.matcher
|
||||
.find_iter(bytes, |m| {
|
||||
matches.push(m);
|
||||
find_iter_at_in_context(
|
||||
searcher,
|
||||
&self.matcher,
|
||||
bytes,
|
||||
range.clone(),
|
||||
|m| {
|
||||
let (s, e) = (m.start() - range.start, m.end() - range.start);
|
||||
matches.push(Match::new(s, e));
|
||||
true
|
||||
})
|
||||
.map_err(io::Error::error_message)?;
|
||||
},
|
||||
)?;
|
||||
// Don't report empty matches appearing at the end of the bytes.
|
||||
if !matches.is_empty()
|
||||
&& matches.last().unwrap().is_empty()
|
||||
@@ -644,6 +657,16 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
|
||||
self.after_context_remaining == 0
|
||||
}
|
||||
|
||||
/// Returns whether the current match count exceeds the configured limit.
|
||||
/// If there is no limit, then this always returns false.
|
||||
fn match_more_than_limit(&self) -> bool {
|
||||
let limit = match self.json.config.max_matches {
|
||||
None => return false,
|
||||
Some(limit) => limit,
|
||||
};
|
||||
self.match_count > limit
|
||||
}
|
||||
|
||||
/// Write the "begin" message.
|
||||
fn write_begin_message(&mut self) -> io::Result<()> {
|
||||
if self.begin_printed {
|
||||
@@ -662,13 +685,30 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
|
||||
fn matched(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
mat: &SinkMatch,
|
||||
mat: &SinkMatch<'_>,
|
||||
) -> Result<bool, io::Error> {
|
||||
self.write_begin_message()?;
|
||||
|
||||
self.match_count += 1;
|
||||
self.after_context_remaining = searcher.after_context() as u64;
|
||||
self.record_matches(mat.bytes())?;
|
||||
// When we've exceeded our match count, then the remaining context
|
||||
// lines should not be reset, but instead, decremented. This avoids a
|
||||
// bug where we display more matches than a configured limit. The main
|
||||
// idea here is that 'matched' might be called again while printing
|
||||
// an after-context line. In that case, we should treat this as a
|
||||
// contextual line rather than a matching line for the purposes of
|
||||
// termination.
|
||||
if self.match_more_than_limit() {
|
||||
self.after_context_remaining =
|
||||
self.after_context_remaining.saturating_sub(1);
|
||||
} else {
|
||||
self.after_context_remaining = searcher.after_context() as u64;
|
||||
}
|
||||
|
||||
self.record_matches(
|
||||
searcher,
|
||||
mat.buffer(),
|
||||
mat.bytes_range_in_buffer(),
|
||||
)?;
|
||||
self.stats.add_matches(self.json.matches.len() as u64);
|
||||
self.stats.add_matched_lines(mat.lines().count() as u64);
|
||||
|
||||
@@ -687,7 +727,7 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
|
||||
fn context(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
ctx: &SinkContext,
|
||||
ctx: &SinkContext<'_>,
|
||||
) -> Result<bool, io::Error> {
|
||||
self.write_begin_message()?;
|
||||
self.json.matches.clear();
|
||||
@@ -697,7 +737,7 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
|
||||
self.after_context_remaining.saturating_sub(1);
|
||||
}
|
||||
let submatches = if searcher.invert_match() {
|
||||
self.record_matches(ctx.bytes())?;
|
||||
self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
|
||||
SubMatches::new(ctx.bytes(), &self.json.matches)
|
||||
} else {
|
||||
SubMatches::empty()
|
||||
@@ -799,7 +839,7 @@ impl<'a> SubMatches<'a> {
|
||||
}
|
||||
|
||||
/// Return this set of match ranges as a slice.
|
||||
fn as_slice(&self) -> &[jsont::SubMatch] {
|
||||
fn as_slice(&self) -> &[jsont::SubMatch<'_>] {
|
||||
match *self {
|
||||
SubMatches::Empty => &[],
|
||||
SubMatches::Small(ref x) => x,
|
||||
@@ -871,6 +911,38 @@ and exhibited clearly, with a label attached.\
|
||||
assert_eq!(got.lines().count(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_matches_after_context() {
|
||||
let haystack = "\
|
||||
a
|
||||
b
|
||||
c
|
||||
d
|
||||
e
|
||||
d
|
||||
e
|
||||
d
|
||||
e
|
||||
d
|
||||
e
|
||||
";
|
||||
let matcher = RegexMatcher::new(r"d").unwrap();
|
||||
let mut printer =
|
||||
JSONBuilder::new().max_matches(Some(1)).build(vec![]);
|
||||
SearcherBuilder::new()
|
||||
.after_context(2)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
haystack.as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
let got = printer_contents(&mut printer);
|
||||
|
||||
assert_eq!(got.lines().count(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_match() {
|
||||
let matcher = RegexMatcher::new(r"DOES NOT MATCH").unwrap();
|
||||
|
@@ -13,7 +13,7 @@ use std::str;
|
||||
use base64;
|
||||
use serde::{Serialize, Serializer};
|
||||
|
||||
use stats::Stats;
|
||||
use crate::stats::Stats;
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(tag = "type", content = "data")]
|
||||
@@ -90,7 +90,7 @@ enum Data<'a> {
|
||||
}
|
||||
|
||||
impl<'a> Data<'a> {
|
||||
fn from_bytes(bytes: &[u8]) -> Data {
|
||||
fn from_bytes(bytes: &[u8]) -> Data<'_> {
|
||||
match str::from_utf8(bytes) {
|
||||
Ok(text) => Data::Text { text: Cow::Borrowed(text) },
|
||||
Err(_) => Data::Bytes { bytes },
|
||||
@@ -98,7 +98,7 @@ impl<'a> Data<'a> {
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn from_path(path: &Path) -> Data {
|
||||
fn from_path(path: &Path) -> Data<'_> {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
match path.to_str() {
|
||||
|
@@ -27,10 +27,6 @@ contain matches.
|
||||
This example shows how to create a "standard" printer and execute a search.
|
||||
|
||||
```
|
||||
extern crate grep_regex;
|
||||
extern crate grep_printer;
|
||||
extern crate grep_searcher;
|
||||
|
||||
use std::error::Error;
|
||||
|
||||
use grep_regex::RegexMatcher;
|
||||
@@ -68,29 +64,26 @@ fn example() -> Result<(), Box<Error>> {
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
pub use crate::color::{
|
||||
default_color_specs, ColorError, ColorSpecs, UserColorSpec,
|
||||
};
|
||||
#[cfg(feature = "serde1")]
|
||||
extern crate base64;
|
||||
extern crate bstr;
|
||||
extern crate grep_matcher;
|
||||
#[cfg(test)]
|
||||
extern crate grep_regex;
|
||||
extern crate grep_searcher;
|
||||
#[cfg(feature = "serde1")]
|
||||
extern crate serde;
|
||||
#[cfg(feature = "serde1")]
|
||||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
#[cfg(feature = "serde1")]
|
||||
extern crate serde_json;
|
||||
extern crate termcolor;
|
||||
pub use crate::json::{JSONBuilder, JSONSink, JSON};
|
||||
pub use crate::standard::{Standard, StandardBuilder, StandardSink};
|
||||
pub use crate::stats::Stats;
|
||||
pub use crate::summary::{Summary, SummaryBuilder, SummaryKind, SummarySink};
|
||||
pub use crate::util::PrinterPath;
|
||||
|
||||
pub use color::{default_color_specs, ColorError, ColorSpecs, UserColorSpec};
|
||||
#[cfg(feature = "serde1")]
|
||||
pub use json::{JSONBuilder, JSONSink, JSON};
|
||||
pub use standard::{Standard, StandardBuilder, StandardSink};
|
||||
pub use stats::Stats;
|
||||
pub use summary::{Summary, SummaryBuilder, SummaryKind, SummarySink};
|
||||
pub use util::PrinterPath;
|
||||
// The maximum number of bytes to execute a search to account for look-ahead.
|
||||
//
|
||||
// This is an unfortunate kludge since PCRE2 doesn't provide a way to search
|
||||
// a substring of some input while accounting for look-ahead. In theory, we
|
||||
// could refactor the various 'grep' interfaces to account for it, but it would
|
||||
// be a large change. So for now, we just let PCRE2 go looking a bit for a
|
||||
// match without searching the entire rest of the contents.
|
||||
//
|
||||
// Note that this kludge is only active in multi-line mode.
|
||||
const MAX_LOOK_AHEAD: usize = 128;
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
|
@@ -8,15 +8,18 @@ use std::time::Instant;
|
||||
use bstr::ByteSlice;
|
||||
use grep_matcher::{Match, Matcher};
|
||||
use grep_searcher::{
|
||||
LineStep, Searcher, Sink, SinkContext, SinkContextKind, SinkError,
|
||||
SinkFinish, SinkMatch,
|
||||
LineStep, Searcher, Sink, SinkContext, SinkContextKind, SinkFinish,
|
||||
SinkMatch,
|
||||
};
|
||||
use termcolor::{ColorSpec, NoColor, WriteColor};
|
||||
|
||||
use color::ColorSpecs;
|
||||
use counter::CounterWriter;
|
||||
use stats::Stats;
|
||||
use util::{trim_ascii_prefix, PrinterPath, Replacer, Sunk};
|
||||
use crate::color::ColorSpecs;
|
||||
use crate::counter::CounterWriter;
|
||||
use crate::stats::Stats;
|
||||
use crate::util::{
|
||||
find_iter_at_in_context, trim_ascii_prefix, trim_line_terminator,
|
||||
PrinterPath, Replacer, Sunk,
|
||||
};
|
||||
|
||||
/// The configuration for the standard printer.
|
||||
///
|
||||
@@ -31,6 +34,7 @@ struct Config {
|
||||
path: bool,
|
||||
only_matching: bool,
|
||||
per_match: bool,
|
||||
per_match_one_line: bool,
|
||||
replacement: Arc<Option<Vec<u8>>>,
|
||||
max_columns: Option<u64>,
|
||||
max_columns_preview: bool,
|
||||
@@ -55,6 +59,7 @@ impl Default for Config {
|
||||
path: true,
|
||||
only_matching: false,
|
||||
per_match: false,
|
||||
per_match_one_line: false,
|
||||
replacement: Arc::new(None),
|
||||
max_columns: None,
|
||||
max_columns_preview: false,
|
||||
@@ -219,15 +224,36 @@ impl StandardBuilder {
|
||||
/// the `column` option, which will show the starting column number for
|
||||
/// every match on every line.
|
||||
///
|
||||
/// When multi-line mode is enabled, each match and its accompanying lines
|
||||
/// are printed. As with single line matches, if a line contains multiple
|
||||
/// matches (even if only partially), then that line is printed once for
|
||||
/// each match it participates in.
|
||||
/// When multi-line mode is enabled, each match is printed, including every
|
||||
/// line in the match. As with single line matches, if a line contains
|
||||
/// multiple matches (even if only partially), then that line is printed
|
||||
/// once for each match it participates in, assuming it's the first line in
|
||||
/// that match. In multi-line mode, column numbers only indicate the start
|
||||
/// of a match. Subsequent lines in a multi-line match always have a column
|
||||
/// number of `1`.
|
||||
///
|
||||
/// When a match contains multiple lines, enabling `per_match_one_line`
|
||||
/// will cause only the first line each in match to be printed.
|
||||
pub fn per_match(&mut self, yes: bool) -> &mut StandardBuilder {
|
||||
self.config.per_match = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Print at most one line per match when `per_match` is enabled.
|
||||
///
|
||||
/// By default, every line in each match found is printed when `per_match`
|
||||
/// is enabled. However, this is sometimes undesirable, e.g., when you
|
||||
/// only ever want one line per match.
|
||||
///
|
||||
/// This is only applicable when multi-line matching is enabled, since
|
||||
/// otherwise, matches are guaranteed to span one line.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn per_match_one_line(&mut self, yes: bool) -> &mut StandardBuilder {
|
||||
self.config.per_match_one_line = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the bytes that will be used to replace each occurrence of a match
|
||||
/// found.
|
||||
///
|
||||
@@ -292,9 +318,6 @@ impl StandardBuilder {
|
||||
/// Column numbers are computed in terms of bytes from the start of the
|
||||
/// line being printed.
|
||||
///
|
||||
/// For matches that span multiple lines, the column number for each
|
||||
/// matching line is in terms of the first matching line.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn column(&mut self, yes: bool) -> &mut StandardBuilder {
|
||||
self.config.column = yes;
|
||||
@@ -602,7 +625,7 @@ impl<W> Standard<W> {
|
||||
/// * `W` refers to the underlying writer that this printer is writing its
|
||||
/// output to.
|
||||
#[derive(Debug)]
|
||||
pub struct StandardSink<'p, 's, M: Matcher, W: 's> {
|
||||
pub struct StandardSink<'p, 's, M: Matcher, W> {
|
||||
matcher: M,
|
||||
standard: &'s mut Standard<W>,
|
||||
replacer: Replacer<M>,
|
||||
@@ -662,7 +685,12 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> {
|
||||
|
||||
/// Execute the matcher over the given bytes and record the match
|
||||
/// locations if the current configuration demands match granularity.
|
||||
fn record_matches(&mut self, bytes: &[u8]) -> io::Result<()> {
|
||||
fn record_matches(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
bytes: &[u8],
|
||||
range: std::ops::Range<usize>,
|
||||
) -> io::Result<()> {
|
||||
self.standard.matches.clear();
|
||||
if !self.needs_match_granularity {
|
||||
return Ok(());
|
||||
@@ -675,16 +703,21 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> {
|
||||
// one search to find the matches (well, for replacements, we do one
|
||||
// additional search to perform the actual replacement).
|
||||
let matches = &mut self.standard.matches;
|
||||
self.matcher
|
||||
.find_iter(bytes, |m| {
|
||||
matches.push(m);
|
||||
find_iter_at_in_context(
|
||||
searcher,
|
||||
&self.matcher,
|
||||
bytes,
|
||||
range.clone(),
|
||||
|m| {
|
||||
let (s, e) = (m.start() - range.start, m.end() - range.start);
|
||||
matches.push(Match::new(s, e));
|
||||
true
|
||||
})
|
||||
.map_err(io::Error::error_message)?;
|
||||
},
|
||||
)?;
|
||||
// Don't report empty matches appearing at the end of the bytes.
|
||||
if !matches.is_empty()
|
||||
&& matches.last().unwrap().is_empty()
|
||||
&& matches.last().unwrap().start() >= bytes.len()
|
||||
&& matches.last().unwrap().start() >= range.end
|
||||
{
|
||||
matches.pop().unwrap();
|
||||
}
|
||||
@@ -695,14 +728,25 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> {
|
||||
/// replacement, lazily allocating memory if necessary.
|
||||
///
|
||||
/// To access the result of a replacement, use `replacer.replacement()`.
|
||||
fn replace(&mut self, bytes: &[u8]) -> io::Result<()> {
|
||||
fn replace(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
bytes: &[u8],
|
||||
range: std::ops::Range<usize>,
|
||||
) -> io::Result<()> {
|
||||
self.replacer.clear();
|
||||
if self.standard.config.replacement.is_some() {
|
||||
let replacement = (*self.standard.config.replacement)
|
||||
.as_ref()
|
||||
.map(|r| &*r)
|
||||
.unwrap();
|
||||
self.replacer.replace_all(&self.matcher, bytes, replacement)?;
|
||||
self.replacer.replace_all(
|
||||
searcher,
|
||||
&self.matcher,
|
||||
bytes,
|
||||
range,
|
||||
replacement,
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -722,6 +766,16 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> {
|
||||
}
|
||||
self.after_context_remaining == 0
|
||||
}
|
||||
|
||||
/// Returns whether the current match count exceeds the configured limit.
|
||||
/// If there is no limit, then this always returns false.
|
||||
fn match_more_than_limit(&self) -> bool {
|
||||
let limit = match self.standard.config.max_matches {
|
||||
None => return false,
|
||||
Some(limit) => limit,
|
||||
};
|
||||
self.match_count > limit
|
||||
}
|
||||
}
|
||||
|
||||
impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
@@ -730,13 +784,29 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
fn matched(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
mat: &SinkMatch,
|
||||
mat: &SinkMatch<'_>,
|
||||
) -> Result<bool, io::Error> {
|
||||
self.match_count += 1;
|
||||
self.after_context_remaining = searcher.after_context() as u64;
|
||||
// When we've exceeded our match count, then the remaining context
|
||||
// lines should not be reset, but instead, decremented. This avoids a
|
||||
// bug where we display more matches than a configured limit. The main
|
||||
// idea here is that 'matched' might be called again while printing
|
||||
// an after-context line. In that case, we should treat this as a
|
||||
// contextual line rather than a matching line for the purposes of
|
||||
// termination.
|
||||
if self.match_more_than_limit() {
|
||||
self.after_context_remaining =
|
||||
self.after_context_remaining.saturating_sub(1);
|
||||
} else {
|
||||
self.after_context_remaining = searcher.after_context() as u64;
|
||||
}
|
||||
|
||||
self.record_matches(mat.bytes())?;
|
||||
self.replace(mat.bytes())?;
|
||||
self.record_matches(
|
||||
searcher,
|
||||
mat.buffer(),
|
||||
mat.bytes_range_in_buffer(),
|
||||
)?;
|
||||
self.replace(searcher, mat.buffer(), mat.bytes_range_in_buffer())?;
|
||||
|
||||
if let Some(ref mut stats) = self.stats {
|
||||
stats.add_matches(self.standard.matches.len() as u64);
|
||||
@@ -755,7 +825,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
fn context(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
ctx: &SinkContext,
|
||||
ctx: &SinkContext<'_>,
|
||||
) -> Result<bool, io::Error> {
|
||||
self.standard.matches.clear();
|
||||
self.replacer.clear();
|
||||
@@ -765,8 +835,8 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
self.after_context_remaining.saturating_sub(1);
|
||||
}
|
||||
if searcher.invert_match() {
|
||||
self.record_matches(ctx.bytes())?;
|
||||
self.replace(ctx.bytes())?;
|
||||
self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
|
||||
self.replace(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
|
||||
}
|
||||
if searcher.binary_detection().convert_byte().is_some() {
|
||||
if self.binary_byte_offset.is_some() {
|
||||
@@ -834,7 +904,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
/// A StandardImpl is initialized every time a match or a contextual line is
|
||||
/// reported.
|
||||
#[derive(Debug)]
|
||||
struct StandardImpl<'a, M: 'a + Matcher, W: 'a> {
|
||||
struct StandardImpl<'a, M: Matcher, W> {
|
||||
searcher: &'a Searcher,
|
||||
sink: &'a StandardSink<'a, 'a, M, W>,
|
||||
sunk: Sunk<'a>,
|
||||
@@ -846,7 +916,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
/// Bundle self with a searcher and return the core implementation of Sink.
|
||||
fn new(
|
||||
searcher: &'a Searcher,
|
||||
sink: &'a StandardSink<M, W>,
|
||||
sink: &'a StandardSink<'_, '_, M, W>,
|
||||
) -> StandardImpl<'a, M, W> {
|
||||
StandardImpl {
|
||||
searcher: searcher,
|
||||
@@ -860,7 +930,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
/// for use with handling matching lines.
|
||||
fn from_match(
|
||||
searcher: &'a Searcher,
|
||||
sink: &'a StandardSink<M, W>,
|
||||
sink: &'a StandardSink<'_, '_, M, W>,
|
||||
mat: &'a SinkMatch<'a>,
|
||||
) -> StandardImpl<'a, M, W> {
|
||||
let sunk = Sunk::from_sink_match(
|
||||
@@ -875,7 +945,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
/// for use with handling contextual lines.
|
||||
fn from_context(
|
||||
searcher: &'a Searcher,
|
||||
sink: &'a StandardSink<M, W>,
|
||||
sink: &'a StandardSink<'_, '_, M, W>,
|
||||
ctx: &'a SinkContext<'a>,
|
||||
) -> StandardImpl<'a, M, W> {
|
||||
let sunk = Sunk::from_sink_context(
|
||||
@@ -1090,7 +1160,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
self.write_prelude(
|
||||
self.sunk.absolute_byte_offset() + line.start() as u64,
|
||||
self.sunk.line_number().map(|n| n + count),
|
||||
Some(m.start() as u64 + 1),
|
||||
Some(m.start().saturating_sub(line.start()) as u64 + 1),
|
||||
)?;
|
||||
count += 1;
|
||||
if self.exceeds_max_columns(&bytes[line]) {
|
||||
@@ -1115,6 +1185,15 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
}
|
||||
}
|
||||
self.write_line_term()?;
|
||||
// It turns out that vimgrep really only wants one line per
|
||||
// match, even when a match spans multiple lines. So when
|
||||
// that option is enabled, we just quit after printing the
|
||||
// first line.
|
||||
//
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1866
|
||||
if self.config().per_match_one_line {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -1469,14 +1548,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
}
|
||||
|
||||
fn trim_line_terminator(&self, buf: &[u8], line: &mut Match) {
|
||||
let lineterm = self.searcher.line_terminator();
|
||||
if lineterm.is_suffix(&buf[*line]) {
|
||||
let mut end = line.end() - 1;
|
||||
if lineterm.is_crlf() && buf[end - 1] == b'\r' {
|
||||
end -= 1;
|
||||
}
|
||||
*line = line.with_end(end);
|
||||
}
|
||||
trim_line_terminator(&self.searcher, buf, line);
|
||||
}
|
||||
|
||||
fn has_line_terminator(&self, buf: &[u8]) -> bool {
|
||||
@@ -1522,7 +1594,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
/// multiple lines.
|
||||
///
|
||||
/// Note that this doesn't just return whether the searcher is in multi
|
||||
/// line mode, but also checks if the mater can match over multiple lines.
|
||||
/// line mode, but also checks if the matter can match over multiple lines.
|
||||
/// If it can't, then we don't need multi line handling, even if the
|
||||
/// searcher has multi line mode enabled.
|
||||
fn multi_line(&self) -> bool {
|
||||
@@ -1545,11 +1617,12 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use grep_regex::RegexMatcher;
|
||||
use grep_matcher::LineTerminator;
|
||||
use grep_regex::{RegexMatcher, RegexMatcherBuilder};
|
||||
use grep_searcher::SearcherBuilder;
|
||||
use termcolor::NoColor;
|
||||
use termcolor::{Ansi, NoColor};
|
||||
|
||||
use super::{Standard, StandardBuilder};
|
||||
use super::{ColorSpecs, Standard, StandardBuilder};
|
||||
|
||||
const SHERLOCK: &'static str = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
@@ -1574,6 +1647,10 @@ and exhibited clearly, with a label attached.\
|
||||
String::from_utf8(printer.get_mut().get_ref().to_owned()).unwrap()
|
||||
}
|
||||
|
||||
fn printer_contents_ansi(printer: &mut Standard<Ansi<Vec<u8>>>) -> String {
|
||||
String::from_utf8(printer.get_mut().get_ref().to_owned()).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reports_match() {
|
||||
let matcher = RegexMatcher::new("Sherlock").unwrap();
|
||||
@@ -2990,9 +3067,9 @@ Holmeses, success in the province of detective work must always
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2:16:Holmeses, success in the province of detective work must always
|
||||
2:1:Holmeses, success in the province of detective work must always
|
||||
5:12:but Doctor Watson has to have it taken out for him and dusted,
|
||||
6:12:and exhibited clearly, with a label attached.
|
||||
6:1:and exhibited clearly, with a label attached.
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
@@ -3019,9 +3096,94 @@ Holmeses, success in the province of detective work must always
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2:16:Holmeses, success in the province of detective work must always
|
||||
2:123:Holmeses, success in the province of detective work must always
|
||||
3:123:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
2:1:Holmeses, success in the province of detective work must always
|
||||
2:58:Holmeses, success in the province of detective work must always
|
||||
3:1:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn per_match_multi_line1_only_first_line() {
|
||||
let matcher =
|
||||
RegexMatcher::new(r"(?s:.{0})(Doctor Watsons|Sherlock)").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.per_match(true)
|
||||
.per_match_one_line(true)
|
||||
.column(true)
|
||||
.build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.multi_line(true)
|
||||
.line_number(true)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
SHERLOCK.as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
1:9:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
1:57:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
3:49:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn per_match_multi_line2_only_first_line() {
|
||||
let matcher =
|
||||
RegexMatcher::new(r"(?s)Watson.+?(Holmeses|clearly)").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.per_match(true)
|
||||
.per_match_one_line(true)
|
||||
.column(true)
|
||||
.build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.multi_line(true)
|
||||
.line_number(true)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
SHERLOCK.as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
5:12:but Doctor Watson has to have it taken out for him and dusted,
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn per_match_multi_line3_only_first_line() {
|
||||
let matcher =
|
||||
RegexMatcher::new(r"(?s)Watson.+?Holmeses|always.+?be").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.per_match(true)
|
||||
.per_match_one_line(true)
|
||||
.column(true)
|
||||
.build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.multi_line(true)
|
||||
.line_number(true)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
SHERLOCK.as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2:58:Holmeses, success in the province of detective work must always
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
@@ -3080,6 +3242,80 @@ Holmeses, success in the province of detective work must always
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
// This is a somewhat weird test that checks the behavior of attempting
|
||||
// to replace a line terminator with something else.
|
||||
//
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1311
|
||||
#[test]
|
||||
fn replacement_multi_line() {
|
||||
let matcher = RegexMatcher::new(r"\n").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.replacement(Some(b"?".to_vec()))
|
||||
.build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.line_number(true)
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
"hello\nworld\n".as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "1:hello?world?\n";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replacement_multi_line_diff_line_term() {
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.line_terminator(Some(b'\x00'))
|
||||
.build(r"\n")
|
||||
.unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.replacement(Some(b"?".to_vec()))
|
||||
.build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.line_terminator(LineTerminator::byte(b'\x00'))
|
||||
.line_number(true)
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
"hello\nworld\n".as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "1:hello?world?\x00";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replacement_multi_line_combine_lines() {
|
||||
let matcher = RegexMatcher::new(r"\n(.)?").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.replacement(Some(b"?$1".to_vec()))
|
||||
.build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.line_number(true)
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
"hello\nworld\n".as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "1:hello?world?\n";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replacement_max_columns() {
|
||||
let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap();
|
||||
@@ -3386,4 +3622,57 @@ and xxx clearly, with a label attached.
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regression_search_empty_with_crlf() {
|
||||
let matcher =
|
||||
RegexMatcherBuilder::new().crlf(true).build(r"x?").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.color_specs(ColorSpecs::default_with_color())
|
||||
.build(Ansi::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.line_terminator(LineTerminator::crlf())
|
||||
.build()
|
||||
.search_reader(&matcher, &b"\n"[..], printer.sink(&matcher))
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents_ansi(&mut printer);
|
||||
assert!(!got.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regression_after_context_with_match() {
|
||||
let haystack = "\
|
||||
a
|
||||
b
|
||||
c
|
||||
d
|
||||
e
|
||||
d
|
||||
e
|
||||
d
|
||||
e
|
||||
d
|
||||
e
|
||||
";
|
||||
|
||||
let matcher = RegexMatcherBuilder::new().build(r"d").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.max_matches(Some(1))
|
||||
.build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.line_number(true)
|
||||
.after_context(2)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
haystack.as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "4:d\n5-e\n6:d\n";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
}
|
||||
|
@@ -1,14 +1,14 @@
|
||||
use std::ops::{Add, AddAssign};
|
||||
use std::time::Duration;
|
||||
|
||||
use util::NiceDuration;
|
||||
use crate::util::NiceDuration;
|
||||
|
||||
/// Summary statistics produced at the end of a search.
|
||||
///
|
||||
/// When statistics are reported by a printer, they correspond to all searches
|
||||
/// executed with that printer.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "serde1", derive(Serialize))]
|
||||
#[cfg_attr(feature = "serde1", derive(serde::Serialize))]
|
||||
pub struct Stats {
|
||||
elapsed: NiceDuration,
|
||||
searches: u64,
|
||||
|
@@ -8,10 +8,10 @@ use grep_matcher::Matcher;
|
||||
use grep_searcher::{Searcher, Sink, SinkError, SinkFinish, SinkMatch};
|
||||
use termcolor::{ColorSpec, NoColor, WriteColor};
|
||||
|
||||
use color::ColorSpecs;
|
||||
use counter::CounterWriter;
|
||||
use stats::Stats;
|
||||
use util::PrinterPath;
|
||||
use crate::color::ColorSpecs;
|
||||
use crate::counter::CounterWriter;
|
||||
use crate::stats::Stats;
|
||||
use crate::util::{find_iter_at_in_context, PrinterPath};
|
||||
|
||||
/// The configuration for the summary printer.
|
||||
///
|
||||
@@ -457,7 +457,7 @@ impl<W> Summary<W> {
|
||||
/// * `W` refers to the underlying writer that this printer is writing its
|
||||
/// output to.
|
||||
#[derive(Debug)]
|
||||
pub struct SummarySink<'p, 's, M: Matcher, W: 's> {
|
||||
pub struct SummarySink<'p, 's, M: Matcher, W> {
|
||||
matcher: M,
|
||||
summary: &'s mut Summary<W>,
|
||||
path: Option<PrinterPath<'p>>,
|
||||
@@ -504,6 +504,17 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> {
|
||||
self.stats.as_ref()
|
||||
}
|
||||
|
||||
/// Returns true if and only if the searcher may report matches over
|
||||
/// multiple lines.
|
||||
///
|
||||
/// Note that this doesn't just return whether the searcher is in multi
|
||||
/// line mode, but also checks if the matter can match over multiple lines.
|
||||
/// If it can't, then we don't need multi line handling, even if the
|
||||
/// searcher has multi line mode enabled.
|
||||
fn multi_line(&self, searcher: &Searcher) -> bool {
|
||||
searcher.multi_line_with_matcher(&self.matcher)
|
||||
}
|
||||
|
||||
/// Returns true if this printer should quit.
|
||||
///
|
||||
/// This implements the logic for handling quitting after seeing a certain
|
||||
@@ -579,32 +590,39 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
|
||||
|
||||
fn matched(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
mat: &SinkMatch,
|
||||
searcher: &Searcher,
|
||||
mat: &SinkMatch<'_>,
|
||||
) -> Result<bool, io::Error> {
|
||||
self.match_count += 1;
|
||||
if let Some(ref mut stats) = self.stats {
|
||||
let mut match_count = 0;
|
||||
self.matcher
|
||||
.find_iter(mat.bytes(), |_| {
|
||||
match_count += 1;
|
||||
let is_multi_line = self.multi_line(searcher);
|
||||
let sink_match_count = if self.stats.is_none() && !is_multi_line {
|
||||
1
|
||||
} else {
|
||||
// This gives us as many bytes as the searcher can offer. This
|
||||
// isn't guaranteed to hold the necessary context to get match
|
||||
// detection correct (because of look-around), but it does in
|
||||
// practice.
|
||||
let buf = mat.buffer();
|
||||
let range = mat.bytes_range_in_buffer();
|
||||
let mut count = 0;
|
||||
find_iter_at_in_context(
|
||||
searcher,
|
||||
&self.matcher,
|
||||
buf,
|
||||
range,
|
||||
|_| {
|
||||
count += 1;
|
||||
true
|
||||
})
|
||||
.map_err(io::Error::error_message)?;
|
||||
if match_count == 0 {
|
||||
// It is possible for the match count to be zero when
|
||||
// look-around is used. Since `SinkMatch` won't necessarily
|
||||
// contain the look-around in its match span, the search here
|
||||
// could fail to find anything.
|
||||
//
|
||||
// It seems likely that setting match_count=1 here is probably
|
||||
// wrong in some cases, but I don't think we can do any
|
||||
// better. (Because this printer cannot assume that subsequent
|
||||
// contents have been loaded into memory, so we have no way of
|
||||
// increasing the search span here.)
|
||||
match_count = 1;
|
||||
}
|
||||
stats.add_matches(match_count);
|
||||
},
|
||||
)?;
|
||||
count
|
||||
};
|
||||
if is_multi_line {
|
||||
self.match_count += sink_match_count;
|
||||
} else {
|
||||
self.match_count += 1;
|
||||
}
|
||||
if let Some(ref mut stats) = self.stats {
|
||||
stats.add_matches(sink_match_count);
|
||||
stats.add_matched_lines(mat.lines().count() as u64);
|
||||
} else if self.summary.config.kind.quit_early() {
|
||||
return Ok(false);
|
||||
|
@@ -7,11 +7,13 @@ use std::time;
|
||||
use bstr::{ByteSlice, ByteVec};
|
||||
use grep_matcher::{Captures, LineTerminator, Match, Matcher};
|
||||
use grep_searcher::{
|
||||
LineIter, SinkContext, SinkContextKind, SinkError, SinkMatch,
|
||||
LineIter, Searcher, SinkContext, SinkContextKind, SinkError, SinkMatch,
|
||||
};
|
||||
#[cfg(feature = "serde1")]
|
||||
use serde::{Serialize, Serializer};
|
||||
|
||||
use crate::MAX_LOOK_AHEAD;
|
||||
|
||||
/// A type for handling replacements while amortizing allocation.
|
||||
pub struct Replacer<M: Matcher> {
|
||||
space: Option<Space<M>>,
|
||||
@@ -27,7 +29,7 @@ struct Space<M: Matcher> {
|
||||
}
|
||||
|
||||
impl<M: Matcher> fmt::Debug for Replacer<M> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
let (dst, matches) = self.replacement().unwrap_or((&[], &[]));
|
||||
f.debug_struct("Replacer")
|
||||
.field("dst", &dst)
|
||||
@@ -52,18 +54,41 @@ impl<M: Matcher> Replacer<M> {
|
||||
/// This can fail if the underlying matcher reports an error.
|
||||
pub fn replace_all<'a>(
|
||||
&'a mut self,
|
||||
searcher: &Searcher,
|
||||
matcher: &M,
|
||||
subject: &[u8],
|
||||
mut subject: &[u8],
|
||||
range: std::ops::Range<usize>,
|
||||
replacement: &[u8],
|
||||
) -> io::Result<()> {
|
||||
// See the giant comment in 'find_iter_at_in_context' below for why we
|
||||
// do this dance.
|
||||
let is_multi_line = searcher.multi_line_with_matcher(&matcher);
|
||||
if is_multi_line {
|
||||
if subject[range.end..].len() >= MAX_LOOK_AHEAD {
|
||||
subject = &subject[..range.end + MAX_LOOK_AHEAD];
|
||||
}
|
||||
} else {
|
||||
// When searching a single line, we should remove the line
|
||||
// terminator. Otherwise, it's possible for the regex (via
|
||||
// look-around) to observe the line terminator and not match
|
||||
// because of it.
|
||||
let mut m = Match::new(0, range.end);
|
||||
trim_line_terminator(searcher, subject, &mut m);
|
||||
subject = &subject[..m.end()];
|
||||
}
|
||||
{
|
||||
let &mut Space { ref mut dst, ref mut caps, ref mut matches } =
|
||||
self.allocate(matcher)?;
|
||||
dst.clear();
|
||||
matches.clear();
|
||||
|
||||
matcher
|
||||
.replace_with_captures(subject, caps, dst, |caps, dst| {
|
||||
replace_with_captures_in_context(
|
||||
matcher,
|
||||
subject,
|
||||
range.clone(),
|
||||
caps,
|
||||
dst,
|
||||
|caps, dst| {
|
||||
let start = dst.len();
|
||||
caps.interpolate(
|
||||
|name| matcher.capture_index(name),
|
||||
@@ -74,8 +99,9 @@ impl<M: Matcher> Replacer<M> {
|
||||
let end = dst.len();
|
||||
matches.push(Match::new(start, end));
|
||||
true
|
||||
})
|
||||
.map_err(io::Error::error_message)?;
|
||||
},
|
||||
)
|
||||
.map_err(io::Error::error_message)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -304,7 +330,7 @@ impl<'a> PrinterPath<'a> {
|
||||
pub struct NiceDuration(pub time::Duration);
|
||||
|
||||
impl fmt::Display for NiceDuration {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{:0.6}s", self.fractional_seconds())
|
||||
}
|
||||
}
|
||||
@@ -357,3 +383,108 @@ pub fn trim_ascii_prefix(
|
||||
.count();
|
||||
range.with_start(range.start() + count)
|
||||
}
|
||||
|
||||
pub fn find_iter_at_in_context<M, F>(
|
||||
searcher: &Searcher,
|
||||
matcher: M,
|
||||
mut bytes: &[u8],
|
||||
range: std::ops::Range<usize>,
|
||||
mut matched: F,
|
||||
) -> io::Result<()>
|
||||
where
|
||||
M: Matcher,
|
||||
F: FnMut(Match) -> bool,
|
||||
{
|
||||
// This strange dance is to account for the possibility of look-ahead in
|
||||
// the regex. The problem here is that mat.bytes() doesn't include the
|
||||
// lines beyond the match boundaries in mulit-line mode, which means that
|
||||
// when we try to rediscover the full set of matches here, the regex may no
|
||||
// longer match if it required some look-ahead beyond the matching lines.
|
||||
//
|
||||
// PCRE2 (and the grep-matcher interfaces) has no way of specifying an end
|
||||
// bound of the search. So we kludge it and let the regex engine search the
|
||||
// rest of the buffer... But to avoid things getting too crazy, we cap the
|
||||
// buffer.
|
||||
//
|
||||
// If it weren't for multi-line mode, then none of this would be needed.
|
||||
// Alternatively, if we refactored the grep interfaces to pass along the
|
||||
// full set of matches (if available) from the searcher, then that might
|
||||
// also help here. But that winds up paying an upfront unavoidable cost for
|
||||
// the case where matches don't need to be counted. So then you'd have to
|
||||
// introduce a way to pass along matches conditionally, only when needed.
|
||||
// Yikes.
|
||||
//
|
||||
// Maybe the bigger picture thing here is that the searcher should be
|
||||
// responsible for finding matches when necessary, and the printer
|
||||
// shouldn't be involved in this business in the first place. Sigh. Live
|
||||
// and learn. Abstraction boundaries are hard.
|
||||
let is_multi_line = searcher.multi_line_with_matcher(&matcher);
|
||||
if is_multi_line {
|
||||
if bytes[range.end..].len() >= MAX_LOOK_AHEAD {
|
||||
bytes = &bytes[..range.end + MAX_LOOK_AHEAD];
|
||||
}
|
||||
} else {
|
||||
// When searching a single line, we should remove the line terminator.
|
||||
// Otherwise, it's possible for the regex (via look-around) to observe
|
||||
// the line terminator and not match because of it.
|
||||
let mut m = Match::new(0, range.end);
|
||||
trim_line_terminator(searcher, bytes, &mut m);
|
||||
bytes = &bytes[..m.end()];
|
||||
}
|
||||
matcher
|
||||
.find_iter_at(bytes, range.start, |m| {
|
||||
if m.start() >= range.end {
|
||||
return false;
|
||||
}
|
||||
matched(m)
|
||||
})
|
||||
.map_err(io::Error::error_message)
|
||||
}
|
||||
|
||||
/// Given a buf and some bounds, if there is a line terminator at the end of
|
||||
/// the given bounds in buf, then the bounds are trimmed to remove the line
|
||||
/// terminator.
|
||||
pub fn trim_line_terminator(
|
||||
searcher: &Searcher,
|
||||
buf: &[u8],
|
||||
line: &mut Match,
|
||||
) {
|
||||
let lineterm = searcher.line_terminator();
|
||||
if lineterm.is_suffix(&buf[*line]) {
|
||||
let mut end = line.end() - 1;
|
||||
if lineterm.is_crlf() && end > 0 && buf.get(end - 1) == Some(&b'\r') {
|
||||
end -= 1;
|
||||
}
|
||||
*line = line.with_end(end);
|
||||
}
|
||||
}
|
||||
|
||||
/// Like `Matcher::replace_with_captures_at`, but accepts an end bound.
|
||||
///
|
||||
/// See also: `find_iter_at_in_context` for why we need this.
|
||||
fn replace_with_captures_in_context<M, F>(
|
||||
matcher: M,
|
||||
bytes: &[u8],
|
||||
range: std::ops::Range<usize>,
|
||||
caps: &mut M::Captures,
|
||||
dst: &mut Vec<u8>,
|
||||
mut append: F,
|
||||
) -> Result<(), M::Error>
|
||||
where
|
||||
M: Matcher,
|
||||
F: FnMut(&M::Captures, &mut Vec<u8>) -> bool,
|
||||
{
|
||||
let mut last_match = range.start;
|
||||
matcher.captures_iter_at(bytes, range.start, caps, |caps| {
|
||||
let m = caps.get(0).unwrap();
|
||||
if m.start() >= range.end {
|
||||
return false;
|
||||
}
|
||||
dst.extend(&bytes[last_match..m.start()]);
|
||||
last_match = m.end();
|
||||
append(caps, dst)
|
||||
})?;
|
||||
let end = std::cmp::min(bytes.len(), range.end);
|
||||
dst.extend(&bytes[last_match..end]);
|
||||
Ok(())
|
||||
}
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-regex"
|
||||
version = "0.1.8" #:version
|
||||
version = "0.1.11" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Use Rust's regex library with the 'grep' crate.
|
||||
@@ -10,13 +10,14 @@ homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/regex"
|
||||
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/regex"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "search", "pattern", "line"]
|
||||
license = "Unlicense/MIT"
|
||||
license = "Unlicense OR MIT"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
aho-corasick = "0.7.3"
|
||||
bstr = "0.2.10"
|
||||
grep-matcher = { version = "0.1.2", path = "../matcher" }
|
||||
bstr = "1.1.0"
|
||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||
log = "0.4.5"
|
||||
regex = "1.1"
|
||||
regex-syntax = "0.6.5"
|
||||
thread_local = "1"
|
||||
thread_local = "1.1.2"
|
||||
|
@@ -26,9 +26,3 @@ Add this to your `Cargo.toml`:
|
||||
[dependencies]
|
||||
grep-regex = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_regex;
|
||||
```
|
||||
|
@@ -3,13 +3,13 @@ use regex::bytes::{Regex, RegexBuilder};
|
||||
use regex_syntax::ast::{self, Ast};
|
||||
use regex_syntax::hir::{self, Hir};
|
||||
|
||||
use ast::AstAnalysis;
|
||||
use crlf::crlfify;
|
||||
use error::Error;
|
||||
use literal::LiteralSets;
|
||||
use multi::alternation_literals;
|
||||
use non_matching::non_matching_bytes;
|
||||
use strip::strip_from_match;
|
||||
use crate::ast::AstAnalysis;
|
||||
use crate::crlf::crlfify;
|
||||
use crate::error::Error;
|
||||
use crate::literal::LiteralSets;
|
||||
use crate::multi::alternation_literals;
|
||||
use crate::non_matching::non_matching_bytes;
|
||||
use crate::strip::strip_from_match;
|
||||
|
||||
/// Config represents the configuration of a regex matcher in this crate.
|
||||
/// The configuration is itself a rough combination of the knobs found in
|
||||
@@ -175,6 +175,36 @@ impl ConfiguredHIR {
|
||||
self.config.crlf && self.expr.is_line_anchored_end()
|
||||
}
|
||||
|
||||
/// Returns the line terminator configured on this expression.
|
||||
///
|
||||
/// When we have beginning/end anchors (NOT line anchors), the fast line
|
||||
/// searching path isn't quite correct. Or at least, doesn't match the
|
||||
/// slow path. Namely, the slow path strips line terminators while the
|
||||
/// fast path does not. Since '$' (when multi-line mode is disabled)
|
||||
/// doesn't match at line boundaries, the existence of a line terminator
|
||||
/// might cause it to not match when it otherwise would with the line
|
||||
/// terminator stripped.
|
||||
///
|
||||
/// Since searching with text anchors is exceptionally rare in the
|
||||
/// context of line oriented searching (multi-line mode is basically
|
||||
/// always enabled), we just disable this optimization when there are
|
||||
/// text anchors. We disable it by not returning a line terminator, since
|
||||
/// without a line terminator, the fast search path can't be executed.
|
||||
///
|
||||
/// See: <https://github.com/BurntSushi/ripgrep/issues/2260>
|
||||
pub fn line_terminator(&self) -> Option<LineTerminator> {
|
||||
if self.is_any_anchored() {
|
||||
None
|
||||
} else {
|
||||
self.config.line_terminator
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if the underlying HIR has any text anchors.
|
||||
fn is_any_anchored(&self) -> bool {
|
||||
self.expr.is_any_anchored_start() || self.expr.is_any_anchored_end()
|
||||
}
|
||||
|
||||
/// Builds a regular expression from this HIR expression.
|
||||
pub fn regex(&self) -> Result<Regex, Error> {
|
||||
self.pattern_to_regex(&self.expr.to_string())
|
||||
|
@@ -4,9 +4,9 @@ use grep_matcher::{Match, Matcher, NoError};
|
||||
use regex::bytes::Regex;
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
|
||||
use config::ConfiguredHIR;
|
||||
use error::Error;
|
||||
use matcher::RegexCaptures;
|
||||
use crate::config::ConfiguredHIR;
|
||||
use crate::error::Error;
|
||||
use crate::matcher::RegexCaptures;
|
||||
|
||||
/// A matcher for implementing "word match" semantics.
|
||||
#[derive(Clone, Debug)]
|
||||
|
@@ -1,7 +1,7 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
|
||||
use util;
|
||||
use crate::util;
|
||||
|
||||
/// An error that can occur in this crate.
|
||||
///
|
||||
@@ -72,7 +72,7 @@ impl error::Error for Error {
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self.kind {
|
||||
ErrorKind::Regex(ref s) => write!(f, "{}", s),
|
||||
ErrorKind::NotAllowed(ref lit) => {
|
||||
|
@@ -1,20 +1,10 @@
|
||||
/*!
|
||||
An implementation of `grep-matcher`'s `Matcher` trait for Rust's regex engine.
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate aho_corasick;
|
||||
extern crate bstr;
|
||||
extern crate grep_matcher;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate regex;
|
||||
extern crate regex_syntax;
|
||||
extern crate thread_local;
|
||||
|
||||
pub use error::{Error, ErrorKind};
|
||||
pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
||||
pub use crate::error::{Error, ErrorKind};
|
||||
pub use crate::matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
||||
|
||||
mod ast;
|
||||
mod config;
|
||||
|
@@ -9,7 +9,7 @@ use bstr::ByteSlice;
|
||||
use regex_syntax::hir::literal::{Literal, Literals};
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
|
||||
use util;
|
||||
use crate::util;
|
||||
|
||||
/// Represents prefix, suffix and inner "required" literals for a regular
|
||||
/// expression.
|
||||
@@ -55,7 +55,7 @@ impl LiteralSets {
|
||||
|
||||
if !word {
|
||||
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
||||
debug!("literal prefixes detected: {:?}", self.prefixes);
|
||||
log::debug!("literal prefixes detected: {:?}", self.prefixes);
|
||||
// When this is true, the regex engine will do a literal scan,
|
||||
// so we don't need to return anything. But we only do this
|
||||
// if we aren't doing a word regex, since a word regex adds
|
||||
@@ -106,7 +106,7 @@ impl LiteralSets {
|
||||
&& !any_empty
|
||||
&& !any_white
|
||||
{
|
||||
debug!("required literals found: {:?}", req_lits);
|
||||
log::debug!("required literals found: {:?}", req_lits);
|
||||
let alts: Vec<String> = req_lits
|
||||
.into_iter()
|
||||
.map(|x| util::bytes_to_regex(x))
|
||||
@@ -149,27 +149,27 @@ impl LiteralSets {
|
||||
let lits = match (p_min_len, s_min_len) {
|
||||
(None, None) => return None,
|
||||
(Some(_), None) => {
|
||||
debug!("prefix literals found");
|
||||
log::debug!("prefix literals found");
|
||||
self.prefixes.literals()
|
||||
}
|
||||
(None, Some(_)) => {
|
||||
debug!("suffix literals found");
|
||||
log::debug!("suffix literals found");
|
||||
self.suffixes.literals()
|
||||
}
|
||||
(Some(p), Some(s)) => {
|
||||
if p >= s {
|
||||
debug!("prefix literals found");
|
||||
log::debug!("prefix literals found");
|
||||
self.prefixes.literals()
|
||||
} else {
|
||||
debug!("suffix literals found");
|
||||
log::debug!("suffix literals found");
|
||||
self.suffixes.literals()
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
debug!("prefix/suffix literals found: {:?}", lits);
|
||||
log::debug!("prefix/suffix literals found: {:?}", lits);
|
||||
if has_only_whitespace(lits) {
|
||||
debug!("dropping literals because one was whitespace");
|
||||
log::debug!("dropping literals because one was whitespace");
|
||||
return None;
|
||||
}
|
||||
let alts: Vec<String> =
|
||||
@@ -177,9 +177,9 @@ impl LiteralSets {
|
||||
// We're matching raw bytes, so disable Unicode mode.
|
||||
Some(format!("(?-u:{})", alts.join("|")))
|
||||
} else {
|
||||
debug!("required literal found: {:?}", util::show_bytes(lit));
|
||||
log::debug!("required literal found: {:?}", util::show_bytes(lit));
|
||||
if lit.chars().all(|c| c.is_whitespace()) {
|
||||
debug!("dropping literal because one was whitespace");
|
||||
log::debug!("dropping literal because one was whitespace");
|
||||
return None;
|
||||
}
|
||||
Some(format!("(?-u:{})", util::bytes_to_regex(&lit)))
|
||||
|
@@ -5,11 +5,11 @@ use grep_matcher::{
|
||||
};
|
||||
use regex::bytes::{CaptureLocations, Regex};
|
||||
|
||||
use config::{Config, ConfiguredHIR};
|
||||
use crlf::CRLFMatcher;
|
||||
use error::Error;
|
||||
use multi::MultiLiteralMatcher;
|
||||
use word::WordMatcher;
|
||||
use crate::config::{Config, ConfiguredHIR};
|
||||
use crate::crlf::CRLFMatcher;
|
||||
use crate::error::Error;
|
||||
use crate::multi::MultiLiteralMatcher;
|
||||
use crate::word::WordMatcher;
|
||||
|
||||
/// A builder for constructing a `Matcher` using regular expressions.
|
||||
///
|
||||
@@ -19,7 +19,7 @@ use word::WordMatcher;
|
||||
/// types of optimizations.
|
||||
///
|
||||
/// The syntax supported is documented as part of the regex crate:
|
||||
/// https://docs.rs/regex/*/regex/#syntax
|
||||
/// <https://docs.rs/regex/#syntax>.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexMatcherBuilder {
|
||||
config: Config,
|
||||
@@ -41,19 +41,23 @@ impl RegexMatcherBuilder {
|
||||
/// pattern.
|
||||
///
|
||||
/// The syntax supported is documented as part of the regex crate:
|
||||
/// https://docs.rs/regex/*/regex/#syntax
|
||||
/// <https://docs.rs/regex/#syntax>.
|
||||
pub fn build(&self, pattern: &str) -> Result<RegexMatcher, Error> {
|
||||
let chir = self.config.hir(pattern)?;
|
||||
let fast_line_regex = chir.fast_line_regex()?;
|
||||
let non_matching_bytes = chir.non_matching_bytes();
|
||||
if let Some(ref re) = fast_line_regex {
|
||||
debug!("extracted fast line regex: {:?}", re);
|
||||
log::debug!("extracted fast line regex: {:?}", re);
|
||||
}
|
||||
|
||||
let matcher = RegexMatcherImpl::new(&chir)?;
|
||||
trace!("final regex: {:?}", matcher.regex());
|
||||
log::trace!("final regex: {:?}", matcher.regex());
|
||||
let mut config = self.config.clone();
|
||||
// We override the line terminator in case the configured expr doesn't
|
||||
// support it.
|
||||
config.line_terminator = chir.line_terminator();
|
||||
Ok(RegexMatcher {
|
||||
config: self.config.clone(),
|
||||
config,
|
||||
matcher,
|
||||
fast_line_regex,
|
||||
non_matching_bytes,
|
||||
|
@@ -2,8 +2,8 @@ use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
|
||||
use grep_matcher::{Match, Matcher, NoError};
|
||||
use regex_syntax::hir::Hir;
|
||||
|
||||
use error::Error;
|
||||
use matcher::RegexCaptures;
|
||||
use crate::error::Error;
|
||||
use crate::matcher::RegexCaptures;
|
||||
|
||||
/// A matcher for an alternation of literals.
|
||||
///
|
||||
|
@@ -13,7 +13,10 @@ pub fn non_matching_bytes(expr: &Hir) -> ByteSet {
|
||||
/// the given expression.
|
||||
fn remove_matching_bytes(expr: &Hir, set: &mut ByteSet) {
|
||||
match *expr.kind() {
|
||||
HirKind::Empty | HirKind::Anchor(_) | HirKind::WordBoundary(_) => {}
|
||||
HirKind::Empty | HirKind::WordBoundary(_) => {}
|
||||
HirKind::Anchor(_) => {
|
||||
set.remove(b'\n');
|
||||
}
|
||||
HirKind::Literal(hir::Literal::Unicode(c)) => {
|
||||
for &b in c.encode_utf8(&mut [0; 4]).as_bytes() {
|
||||
set.remove(b);
|
||||
@@ -125,4 +128,12 @@ mod tests {
|
||||
assert_eq!(sparse(&extract(r"\xFF")), sparse_except(&[0xC3, 0xBF]));
|
||||
assert_eq!(sparse(&extract(r"(?-u)\xFF")), sparse_except(&[0xFF]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anchor() {
|
||||
assert_eq!(sparse(&extract(r"^")), sparse_except(&[b'\n']));
|
||||
assert_eq!(sparse(&extract(r"$")), sparse_except(&[b'\n']));
|
||||
assert_eq!(sparse(&extract(r"\A")), sparse_except(&[b'\n']));
|
||||
assert_eq!(sparse(&extract(r"\z")), sparse_except(&[b'\n']));
|
||||
}
|
||||
}
|
||||
|
@@ -1,7 +1,7 @@
|
||||
use grep_matcher::LineTerminator;
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
|
||||
use error::{Error, ErrorKind};
|
||||
use crate::error::{Error, ErrorKind};
|
||||
|
||||
/// Return an HIR that is guaranteed to never match the given line terminator,
|
||||
/// if possible.
|
||||
@@ -106,7 +106,7 @@ mod tests {
|
||||
use regex_syntax::Parser;
|
||||
|
||||
use super::{strip_from_match, LineTerminator};
|
||||
use error::Error;
|
||||
use crate::error::Error;
|
||||
|
||||
fn roundtrip(pattern: &str, byte: u8) -> String {
|
||||
roundtrip_line_term(pattern, LineTerminator::byte(byte)).unwrap()
|
||||
|
@@ -4,11 +4,11 @@ use std::sync::Arc;
|
||||
|
||||
use grep_matcher::{Match, Matcher, NoError};
|
||||
use regex::bytes::{CaptureLocations, Regex};
|
||||
use thread_local::CachedThreadLocal;
|
||||
use thread_local::ThreadLocal;
|
||||
|
||||
use config::ConfiguredHIR;
|
||||
use error::Error;
|
||||
use matcher::RegexCaptures;
|
||||
use crate::config::ConfiguredHIR;
|
||||
use crate::error::Error;
|
||||
use crate::matcher::RegexCaptures;
|
||||
|
||||
/// A matcher for implementing "word match" semantics.
|
||||
#[derive(Debug)]
|
||||
@@ -21,19 +21,19 @@ pub struct WordMatcher {
|
||||
/// A map from capture group name to capture group index.
|
||||
names: HashMap<String, usize>,
|
||||
/// A reusable buffer for finding the match location of the inner group.
|
||||
locs: Arc<CachedThreadLocal<RefCell<CaptureLocations>>>,
|
||||
locs: Arc<ThreadLocal<RefCell<CaptureLocations>>>,
|
||||
}
|
||||
|
||||
impl Clone for WordMatcher {
|
||||
fn clone(&self) -> WordMatcher {
|
||||
// We implement Clone manually so that we get a fresh CachedThreadLocal
|
||||
// such that it can set its own thread owner. This permits each thread
|
||||
// We implement Clone manually so that we get a fresh ThreadLocal such
|
||||
// that it can set its own thread owner. This permits each thread
|
||||
// usings `locs` to hit the fast path.
|
||||
WordMatcher {
|
||||
regex: self.regex.clone(),
|
||||
original: self.original.clone(),
|
||||
names: self.names.clone(),
|
||||
locs: Arc::new(CachedThreadLocal::new()),
|
||||
locs: Arc::new(ThreadLocal::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -48,12 +48,12 @@ impl WordMatcher {
|
||||
let original =
|
||||
expr.with_pattern(|pat| format!("^(?:{})$", pat))?.regex()?;
|
||||
let word_expr = expr.with_pattern(|pat| {
|
||||
let pat = format!(r"(?:(?-m:^)|\W)({})(?:(?-m:$)|\W)", pat);
|
||||
debug!("word regex: {:?}", pat);
|
||||
let pat = format!(r"(?:(?m:^)|\W)({})(?:\W|(?m:$))", pat);
|
||||
log::debug!("word regex: {:?}", pat);
|
||||
pat
|
||||
})?;
|
||||
let regex = word_expr.regex()?;
|
||||
let locs = Arc::new(CachedThreadLocal::new());
|
||||
let locs = Arc::new(ThreadLocal::new());
|
||||
|
||||
let mut names = HashMap::new();
|
||||
for (i, optional_name) in regex.capture_names().enumerate() {
|
||||
@@ -111,8 +111,15 @@ impl WordMatcher {
|
||||
}
|
||||
let (_, slen) = bstr::decode_utf8(&haystack[cand]);
|
||||
let (_, elen) = bstr::decode_last_utf8(&haystack[cand]);
|
||||
cand =
|
||||
cand.with_start(cand.start() + slen).with_end(cand.end() - elen);
|
||||
let new_start = cand.start() + slen;
|
||||
let new_end = cand.end() - elen;
|
||||
// This occurs the original regex can match the empty string. In this
|
||||
// case, just bail instead of trying to get it right here since it's
|
||||
// likely a pathological case.
|
||||
if new_start > new_end {
|
||||
return Err(());
|
||||
}
|
||||
cand = cand.with_start(new_start).with_end(new_end);
|
||||
if self.original.is_match(&haystack[cand]) {
|
||||
Ok(Some(cand))
|
||||
} else {
|
||||
@@ -184,7 +191,7 @@ impl Matcher for WordMatcher {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::WordMatcher;
|
||||
use config::Config;
|
||||
use crate::config::Config;
|
||||
use grep_matcher::{Captures, Match, Matcher};
|
||||
|
||||
fn matcher(pattern: &str) -> WordMatcher {
|
||||
@@ -237,6 +244,8 @@ mod tests {
|
||||
assert_eq!(Some((2, 5)), find(r"!?foo!?", "a!foo!a"));
|
||||
|
||||
assert_eq!(Some((2, 7)), find(r"!?foo!?", "##!foo!\n"));
|
||||
assert_eq!(Some((3, 8)), find(r"!?foo!?", "##\n!foo!##"));
|
||||
assert_eq!(Some((3, 8)), find(r"!?foo!?", "##\n!foo!\n##"));
|
||||
assert_eq!(Some((3, 7)), find(r"f?oo!?", "##\nfoo!##"));
|
||||
assert_eq!(Some((2, 5)), find(r"(?-u)foo[^a]*", "#!foo☃aaa"));
|
||||
}
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-searcher"
|
||||
version = "0.1.7" #:version
|
||||
version = "0.1.11" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Fast line oriented regex searching as a library.
|
||||
@@ -10,19 +10,20 @@ homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/searcher"
|
||||
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/searcher"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
license = "Unlicense OR MIT"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
bstr = { version = "0.2.0", default-features = false, features = ["std"] }
|
||||
bstr = { version = "1.1.0", default-features = false, features = ["std"] }
|
||||
bytecount = "0.6"
|
||||
encoding_rs = "0.8.14"
|
||||
encoding_rs_io = "0.1.6"
|
||||
grep-matcher = { version = "0.1.2", path = "../matcher" }
|
||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||
log = "0.4.5"
|
||||
memmap = "0.7"
|
||||
memmap = { package = "memmap2", version = "0.5.3" }
|
||||
|
||||
[dev-dependencies]
|
||||
grep-regex = { version = "0.1.3", path = "../regex" }
|
||||
grep-regex = { version = "0.1.11", path = "../regex" }
|
||||
regex = "1.1"
|
||||
|
||||
[features]
|
||||
|
@@ -28,9 +28,3 @@ Add this to your `Cargo.toml`:
|
||||
[dependencies]
|
||||
grep-searcher = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_searcher;
|
||||
```
|
||||
|
@@ -1,6 +1,3 @@
|
||||
extern crate grep_regex;
|
||||
extern crate grep_searcher;
|
||||
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::io;
|
||||
|
@@ -48,10 +48,6 @@ using the
|
||||
implementation of `Sink`.
|
||||
|
||||
```
|
||||
extern crate grep_matcher;
|
||||
extern crate grep_regex;
|
||||
extern crate grep_searcher;
|
||||
|
||||
use std::error::Error;
|
||||
|
||||
use grep_matcher::Matcher;
|
||||
@@ -99,24 +95,13 @@ searches stdin.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate bstr;
|
||||
extern crate bytecount;
|
||||
extern crate encoding_rs;
|
||||
extern crate encoding_rs_io;
|
||||
extern crate grep_matcher;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memmap;
|
||||
#[cfg(test)]
|
||||
extern crate regex;
|
||||
|
||||
pub use lines::{LineIter, LineStep};
|
||||
pub use searcher::{
|
||||
pub use crate::lines::{LineIter, LineStep};
|
||||
pub use crate::searcher::{
|
||||
BinaryDetection, ConfigError, Encoding, MmapChoice, Searcher,
|
||||
SearcherBuilder,
|
||||
};
|
||||
pub use sink::sinks;
|
||||
pub use sink::{
|
||||
pub use crate::sink::sinks;
|
||||
pub use crate::sink::{
|
||||
Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
|
||||
};
|
||||
|
||||
|
@@ -4,7 +4,7 @@ use std::io;
|
||||
use bstr::ByteSlice;
|
||||
|
||||
/// The default buffer capacity that we use for the line buffer.
|
||||
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB
|
||||
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 64 * (1 << 10); // 64 KB
|
||||
|
||||
/// The behavior of a searcher in the face of long lines and big contexts.
|
||||
///
|
||||
@@ -481,7 +481,7 @@ impl LineBuffer {
|
||||
}
|
||||
|
||||
let roll_len = self.end - self.pos;
|
||||
self.buf.copy_within_str(self.pos..self.end, 0);
|
||||
self.buf.copy_within(self.pos..self.end, 0);
|
||||
self.pos = 0;
|
||||
self.last_lineterm = roll_len;
|
||||
self.end = roll_len;
|
||||
|
@@ -2,13 +2,13 @@ use std::cmp;
|
||||
|
||||
use bstr::ByteSlice;
|
||||
|
||||
use grep_matcher::{LineMatchKind, Matcher};
|
||||
use line_buffer::BinaryDetection;
|
||||
use lines::{self, LineStep};
|
||||
use searcher::{Config, Range, Searcher};
|
||||
use sink::{
|
||||
use crate::line_buffer::BinaryDetection;
|
||||
use crate::lines::{self, LineStep};
|
||||
use crate::searcher::{Config, Range, Searcher};
|
||||
use crate::sink::{
|
||||
Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
|
||||
};
|
||||
use grep_matcher::{LineMatchKind, Matcher};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Core<'s, M: 's, S> {
|
||||
@@ -53,9 +53,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
};
|
||||
if !core.searcher.multi_line_with_matcher(&core.matcher) {
|
||||
if core.is_line_by_line_fast() {
|
||||
trace!("searcher core: will use fast line searcher");
|
||||
log::trace!("searcher core: will use fast line searcher");
|
||||
} else {
|
||||
trace!("searcher core: will use slow line searcher");
|
||||
log::trace!("searcher core: will use slow line searcher");
|
||||
}
|
||||
}
|
||||
core
|
||||
@@ -441,6 +441,8 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
bytes: linebuf,
|
||||
absolute_byte_offset: offset,
|
||||
line_number: self.line_number,
|
||||
buffer: buf,
|
||||
bytes_range_in_buffer: range.start()..range.end(),
|
||||
},
|
||||
)?;
|
||||
if !keepgoing {
|
||||
@@ -465,6 +467,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
let keepgoing = self.sink.context(
|
||||
&self.searcher,
|
||||
&SinkContext {
|
||||
#[cfg(test)]
|
||||
line_term: self.config.line_term,
|
||||
bytes: &buf[*range],
|
||||
kind: SinkContextKind::Before,
|
||||
@@ -495,6 +498,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
let keepgoing = self.sink.context(
|
||||
&self.searcher,
|
||||
&SinkContext {
|
||||
#[cfg(test)]
|
||||
line_term: self.config.line_term,
|
||||
bytes: &buf[*range],
|
||||
kind: SinkContextKind::After,
|
||||
@@ -524,6 +528,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
let keepgoing = self.sink.context(
|
||||
&self.searcher,
|
||||
&SinkContext {
|
||||
#[cfg(test)]
|
||||
line_term: self.config.line_term,
|
||||
bytes: &buf[*range],
|
||||
kind: SinkContextKind::Other,
|
||||
|
@@ -1,16 +1,16 @@
|
||||
use std::cmp;
|
||||
use std::io;
|
||||
|
||||
use crate::line_buffer::{LineBufferReader, DEFAULT_BUFFER_CAPACITY};
|
||||
use crate::lines::{self, LineStep};
|
||||
use crate::sink::{Sink, SinkError};
|
||||
use grep_matcher::Matcher;
|
||||
use line_buffer::{LineBufferReader, DEFAULT_BUFFER_CAPACITY};
|
||||
use lines::{self, LineStep};
|
||||
use sink::{Sink, SinkError};
|
||||
|
||||
use searcher::core::Core;
|
||||
use searcher::{Config, Range, Searcher};
|
||||
use crate::searcher::core::Core;
|
||||
use crate::searcher::{Config, Range, Searcher};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ReadByLine<'s, M: 's, R, S> {
|
||||
pub struct ReadByLine<'s, M, R, S> {
|
||||
config: &'s Config,
|
||||
core: Core<'s, M, S>,
|
||||
rdr: LineBufferReader<'s, R>,
|
||||
@@ -87,8 +87,7 @@ where
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SliceByLine<'s, M: 's, S> {
|
||||
config: &'s Config,
|
||||
pub struct SliceByLine<'s, M, S> {
|
||||
core: Core<'s, M, S>,
|
||||
slice: &'s [u8],
|
||||
}
|
||||
@@ -103,7 +102,6 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
|
||||
debug_assert!(!searcher.multi_line_with_matcher(&matcher));
|
||||
|
||||
SliceByLine {
|
||||
config: &searcher.config,
|
||||
core: Core::new(searcher, matcher, write_to, true),
|
||||
slice: slice,
|
||||
}
|
||||
@@ -134,7 +132,7 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MultiLine<'s, M: 's, S> {
|
||||
pub struct MultiLine<'s, M, S> {
|
||||
config: &'s Config,
|
||||
core: Core<'s, M, S>,
|
||||
slice: &'s [u8],
|
||||
@@ -226,10 +224,19 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
|
||||
}
|
||||
Some(last_match) => {
|
||||
// If the lines in the previous match overlap with the lines
|
||||
// in this match, then simply grow the match and move on.
|
||||
// This happens when the next match begins on the same line
|
||||
// that the last match ends on.
|
||||
if last_match.end() > line.start() {
|
||||
// in this match, then simply grow the match and move on. This
|
||||
// happens when the next match begins on the same line that the
|
||||
// last match ends on.
|
||||
//
|
||||
// Note that we do not technically require strict overlap here.
|
||||
// Instead, we only require that the lines are adjacent. This
|
||||
// provides larger blocks of lines to the printer, and results
|
||||
// in overall better behavior with respect to how replacements
|
||||
// are handled.
|
||||
//
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1311
|
||||
// And also the associated commit fixing #1311.
|
||||
if last_match.end() >= line.start() {
|
||||
self.last_match = Some(last_match.with_end(line.end()));
|
||||
Ok(true)
|
||||
} else {
|
||||
@@ -340,8 +347,8 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use searcher::{BinaryDetection, SearcherBuilder};
|
||||
use testutil::{KitchenSink, RegexMatcher, SearcherTester};
|
||||
use crate::searcher::{BinaryDetection, SearcherBuilder};
|
||||
use crate::testutil::{KitchenSink, RegexMatcher, SearcherTester};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -633,7 +640,7 @@ d
|
||||
haystack.push_str("a\n");
|
||||
|
||||
let byte_count = haystack.len();
|
||||
let exp = format!("0:a\n131186:a\n\nbyte count:{}\n", byte_count);
|
||||
let exp = format!("0:a\n1048690:a\n\nbyte count:{}\n", byte_count);
|
||||
|
||||
SearcherTester::new(&haystack, "a")
|
||||
.line_number(false)
|
||||
@@ -714,21 +721,23 @@ d
|
||||
haystack.push_str("zzz\n");
|
||||
}
|
||||
haystack.push_str("a\n");
|
||||
haystack.push_str("zzz\n");
|
||||
haystack.push_str("a\x00a\n");
|
||||
haystack.push_str("zzz\n");
|
||||
haystack.push_str("a\n");
|
||||
|
||||
// The line buffered searcher has slightly different semantics here.
|
||||
// Namely, it will *always* detect binary data in the current buffer
|
||||
// before searching it. Thus, the total number of bytes searched is
|
||||
// smaller than below.
|
||||
let exp = "0:a\n\nbyte count:32770\nbinary offset:32773\n";
|
||||
let exp = "0:a\n\nbyte count:262146\nbinary offset:262153\n";
|
||||
// In contrast, the slice readers (for multi line as well) will only
|
||||
// look for binary data in the initial chunk of bytes. After that
|
||||
// point, it only looks for binary data in matches. Note though that
|
||||
// the binary offset remains the same. (See the binary4 test for a case
|
||||
// where the offset is explicitly different.)
|
||||
let exp_slice =
|
||||
"0:a\n32770:a\n\nbyte count:32773\nbinary offset:32773\n";
|
||||
"0:a\n262146:a\n\nbyte count:262153\nbinary offset:262153\n";
|
||||
|
||||
SearcherTester::new(&haystack, "a")
|
||||
.binary_detection(BinaryDetection::quit(0))
|
||||
@@ -755,12 +764,12 @@ d
|
||||
haystack.push_str("a\x00a\n");
|
||||
haystack.push_str("a\n");
|
||||
|
||||
let exp = "0:a\n\nbyte count:32770\nbinary offset:32773\n";
|
||||
let exp = "0:a\n\nbyte count:262146\nbinary offset:262149\n";
|
||||
// The binary offset for the Slice readers corresponds to the binary
|
||||
// data in `a\x00a\n` since the first line with binary data
|
||||
// (`b\x00b\n`) isn't part of a match, and is therefore undetected.
|
||||
let exp_slice =
|
||||
"0:a\n32770:a\n\nbyte count:32777\nbinary offset:32777\n";
|
||||
"0:a\n262146:a\n\nbyte count:262153\nbinary offset:262153\n";
|
||||
|
||||
SearcherTester::new(&haystack, "a")
|
||||
.binary_detection(BinaryDetection::quit(0))
|
||||
@@ -1477,8 +1486,8 @@ byte count:307
|
||||
|
||||
#[test]
|
||||
fn scratch() {
|
||||
use sinks;
|
||||
use testutil::RegexMatcher;
|
||||
use crate::sinks;
|
||||
use crate::testutil::RegexMatcher;
|
||||
|
||||
const SHERLOCK: &'static [u8] = b"\
|
||||
For the Doctor Wat\xFFsons of this world, as opposed to the Sherlock
|
||||
@@ -1503,4 +1512,31 @@ and exhibited clearly, with a label attached.\
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/2260
|
||||
#[test]
|
||||
fn regression_2260() {
|
||||
use grep_regex::RegexMatcherBuilder;
|
||||
|
||||
use crate::SearcherBuilder;
|
||||
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.line_terminator(Some(b'\n'))
|
||||
.build(r"^\w+$")
|
||||
.unwrap();
|
||||
let mut searcher = SearcherBuilder::new().line_number(true).build();
|
||||
|
||||
let mut matched = false;
|
||||
searcher
|
||||
.search_slice(
|
||||
&matcher,
|
||||
b"GATC\n",
|
||||
crate::sinks::UTF8(|_, _| {
|
||||
matched = true;
|
||||
Ok(true)
|
||||
}),
|
||||
)
|
||||
.unwrap();
|
||||
assert!(matched);
|
||||
}
|
||||
}
|
||||
|
@@ -71,6 +71,16 @@ impl MmapChoice {
|
||||
if !self.is_enabled() {
|
||||
return None;
|
||||
}
|
||||
if !cfg!(target_pointer_width = "64") {
|
||||
// For 32-bit systems, it looks like mmap will succeed even if it
|
||||
// can't address the entire file. This seems to happen at least on
|
||||
// Windows, even though it uses to work prior to ripgrep 13. The
|
||||
// only Windows-related change in ripgrep 13, AFAIK, was statically
|
||||
// linking vcruntime. So maybe that's related? But I'm not sure.
|
||||
//
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1911
|
||||
return None;
|
||||
}
|
||||
if cfg!(target_os = "macos") {
|
||||
// I guess memory maps on macOS aren't great. Should re-evaluate.
|
||||
return None;
|
||||
@@ -83,13 +93,13 @@ impl MmapChoice {
|
||||
Ok(mmap) => Some(mmap),
|
||||
Err(err) => {
|
||||
if let Some(path) = path {
|
||||
debug!(
|
||||
log::debug!(
|
||||
"{}: failed to open memory map: {}",
|
||||
path.display(),
|
||||
err
|
||||
);
|
||||
} else {
|
||||
debug!("failed to open memory map: {}", err);
|
||||
log::debug!("failed to open memory map: {}", err);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
@@ -5,15 +5,15 @@ use std::fs::File;
|
||||
use std::io::{self, Read};
|
||||
use std::path::Path;
|
||||
|
||||
use encoding_rs;
|
||||
use encoding_rs_io::DecodeReaderBytesBuilder;
|
||||
use grep_matcher::{LineTerminator, Match, Matcher};
|
||||
use line_buffer::{
|
||||
use crate::line_buffer::{
|
||||
self, alloc_error, BufferAllocation, LineBuffer, LineBufferBuilder,
|
||||
LineBufferReader, DEFAULT_BUFFER_CAPACITY,
|
||||
};
|
||||
use searcher::glue::{MultiLine, ReadByLine, SliceByLine};
|
||||
use sink::{Sink, SinkError};
|
||||
use crate::searcher::glue::{MultiLine, ReadByLine, SliceByLine};
|
||||
use crate::sink::{Sink, SinkError};
|
||||
use encoding_rs;
|
||||
use encoding_rs_io::DecodeReaderBytesBuilder;
|
||||
use grep_matcher::{LineTerminator, Match, Matcher};
|
||||
|
||||
pub use self::mmap::MmapChoice;
|
||||
|
||||
@@ -263,7 +263,7 @@ impl ::std::error::Error for ConfigError {
|
||||
}
|
||||
|
||||
impl fmt::Display for ConfigError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match *self {
|
||||
ConfigError::SearchUnavailable => {
|
||||
write!(f, "grep config error: no available searchers")
|
||||
@@ -659,16 +659,19 @@ impl Searcher {
|
||||
S: Sink,
|
||||
{
|
||||
if let Some(mmap) = self.config.mmap.open(file, path) {
|
||||
trace!("{:?}: searching via memory map", path);
|
||||
log::trace!("{:?}: searching via memory map", path);
|
||||
return self.search_slice(matcher, &mmap, write_to);
|
||||
}
|
||||
// Fast path for multi-line searches of files when memory maps are
|
||||
// not enabled. This pre-allocates a buffer roughly the size of the
|
||||
// file, which isn't possible when searching an arbitrary io::Read.
|
||||
if self.multi_line_with_matcher(&matcher) {
|
||||
trace!("{:?}: reading entire file on to heap for mulitline", path);
|
||||
log::trace!(
|
||||
"{:?}: reading entire file on to heap for mulitline",
|
||||
path
|
||||
);
|
||||
self.fill_multi_line_buffer_from_file::<S>(file)?;
|
||||
trace!("{:?}: searching via multiline strategy", path);
|
||||
log::trace!("{:?}: searching via multiline strategy", path);
|
||||
MultiLine::new(
|
||||
self,
|
||||
matcher,
|
||||
@@ -677,7 +680,7 @@ impl Searcher {
|
||||
)
|
||||
.run()
|
||||
} else {
|
||||
trace!("{:?}: searching using generic reader", path);
|
||||
log::trace!("{:?}: searching using generic reader", path);
|
||||
self.search_reader(matcher, file, write_to)
|
||||
}
|
||||
}
|
||||
@@ -707,15 +710,17 @@ impl Searcher {
|
||||
self.check_config(&matcher).map_err(S::Error::error_config)?;
|
||||
|
||||
let mut decode_buffer = self.decode_buffer.borrow_mut();
|
||||
let read_from = self
|
||||
let decoder = self
|
||||
.decode_builder
|
||||
.build_with_buffer(read_from, &mut *decode_buffer)
|
||||
.map_err(S::Error::error_io)?;
|
||||
|
||||
if self.multi_line_with_matcher(&matcher) {
|
||||
trace!("generic reader: reading everything to heap for multiline");
|
||||
self.fill_multi_line_buffer_from_reader::<_, S>(read_from)?;
|
||||
trace!("generic reader: searching via multiline strategy");
|
||||
log::trace!(
|
||||
"generic reader: reading everything to heap for multiline"
|
||||
);
|
||||
self.fill_multi_line_buffer_from_reader::<_, S>(decoder)?;
|
||||
log::trace!("generic reader: searching via multiline strategy");
|
||||
MultiLine::new(
|
||||
self,
|
||||
matcher,
|
||||
@@ -725,8 +730,8 @@ impl Searcher {
|
||||
.run()
|
||||
} else {
|
||||
let mut line_buffer = self.line_buffer.borrow_mut();
|
||||
let rdr = LineBufferReader::new(read_from, &mut *line_buffer);
|
||||
trace!("generic reader: searching via roll buffer strategy");
|
||||
let rdr = LineBufferReader::new(decoder, &mut *line_buffer);
|
||||
log::trace!("generic reader: searching via roll buffer strategy");
|
||||
ReadByLine::new(self, matcher, rdr, write_to).run()
|
||||
}
|
||||
}
|
||||
@@ -747,14 +752,16 @@ impl Searcher {
|
||||
|
||||
// We can search the slice directly, unless we need to do transcoding.
|
||||
if self.slice_needs_transcoding(slice) {
|
||||
trace!("slice reader: needs transcoding, using generic reader");
|
||||
log::trace!(
|
||||
"slice reader: needs transcoding, using generic reader"
|
||||
);
|
||||
return self.search_reader(matcher, slice, write_to);
|
||||
}
|
||||
if self.multi_line_with_matcher(&matcher) {
|
||||
trace!("slice reader: searching via multiline strategy");
|
||||
log::trace!("slice reader: searching via multiline strategy");
|
||||
MultiLine::new(self, matcher, slice, write_to).run()
|
||||
} else {
|
||||
trace!("slice reader: searching via slice-by-line strategy");
|
||||
log::trace!("slice reader: searching via slice-by-line strategy");
|
||||
SliceByLine::new(self, matcher, slice, write_to).run()
|
||||
}
|
||||
}
|
||||
@@ -788,7 +795,7 @@ impl Searcher {
|
||||
/// Returns true if and only if the given slice needs to be transcoded.
|
||||
fn slice_needs_transcoding(&self, slice: &[u8]) -> bool {
|
||||
self.config.encoding.is_some()
|
||||
|| (self.config.bom_sniffing && slice_has_utf16_bom(slice))
|
||||
|| (self.config.bom_sniffing && slice_has_bom(slice))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -973,22 +980,24 @@ impl Searcher {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given slice begins with a UTF-16 BOM.
|
||||
/// Returns true if and only if the given slice begins with a UTF-8 or UTF-16
|
||||
/// BOM.
|
||||
///
|
||||
/// This is used by the searcher to determine if a transcoder is necessary.
|
||||
/// Otherwise, it is advantageous to search the slice directly.
|
||||
fn slice_has_utf16_bom(slice: &[u8]) -> bool {
|
||||
fn slice_has_bom(slice: &[u8]) -> bool {
|
||||
let enc = match encoding_rs::Encoding::for_bom(slice) {
|
||||
None => return false,
|
||||
Some((enc, _)) => enc,
|
||||
};
|
||||
[encoding_rs::UTF_16LE, encoding_rs::UTF_16BE].contains(&enc)
|
||||
[encoding_rs::UTF_16LE, encoding_rs::UTF_16BE, encoding_rs::UTF_8]
|
||||
.contains(&enc)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use testutil::{KitchenSink, RegexMatcher};
|
||||
use crate::testutil::{KitchenSink, RegexMatcher};
|
||||
|
||||
#[test]
|
||||
fn config_error_heap_limit() {
|
||||
@@ -1009,4 +1018,21 @@ mod tests {
|
||||
let res = searcher.search_slice(matcher, &[], sink);
|
||||
assert!(res.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uft8_bom_sniffing() {
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1638
|
||||
// ripgrep must sniff utf-8 BOM, just like it does with utf-16
|
||||
let matcher = RegexMatcher::new("foo");
|
||||
let haystack: &[u8] = &[0xef, 0xbb, 0xbf, 0x66, 0x6f, 0x6f];
|
||||
|
||||
let mut sink = KitchenSink::new();
|
||||
let mut searcher = SearcherBuilder::new().build();
|
||||
|
||||
let res = searcher.search_slice(matcher, haystack, &mut sink);
|
||||
assert!(res.is_ok());
|
||||
|
||||
let sink_output = String::from_utf8(sink.as_bytes().to_vec()).unwrap();
|
||||
assert_eq!(sink_output, "1:0:foo\nbyte count:3\n");
|
||||
}
|
||||
}
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user