mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-27 10:11:58 -07:00
Compare commits
1 Commits
grep-cli-0
...
ag/index
Author | SHA1 | Date | |
---|---|---|---|
|
60a1db34a6 |
49
.github/workflows/ci.yml
vendored
49
.github/workflows/ci.yml
vendored
@@ -42,31 +42,31 @@ jobs:
|
||||
- win-gnu
|
||||
include:
|
||||
- build: pinned
|
||||
os: ubuntu-latest
|
||||
rust: 1.70.0
|
||||
os: ubuntu-18.04
|
||||
rust: 1.52.1
|
||||
- build: stable
|
||||
os: ubuntu-latest
|
||||
os: ubuntu-18.04
|
||||
rust: stable
|
||||
- build: beta
|
||||
os: ubuntu-latest
|
||||
os: ubuntu-18.04
|
||||
rust: beta
|
||||
- build: nightly
|
||||
os: ubuntu-latest
|
||||
os: ubuntu-18.04
|
||||
rust: nightly
|
||||
- build: nightly-musl
|
||||
os: ubuntu-latest
|
||||
os: ubuntu-18.04
|
||||
rust: nightly
|
||||
target: x86_64-unknown-linux-musl
|
||||
- build: nightly-32
|
||||
os: ubuntu-latest
|
||||
os: ubuntu-18.04
|
||||
rust: nightly
|
||||
target: i686-unknown-linux-gnu
|
||||
- build: nightly-mips
|
||||
os: ubuntu-latest
|
||||
os: ubuntu-18.04
|
||||
rust: nightly
|
||||
target: mips64-unknown-linux-gnuabi64
|
||||
- build: nightly-arm
|
||||
os: ubuntu-latest
|
||||
os: ubuntu-18.04
|
||||
rust: nightly
|
||||
# For stripping release binaries:
|
||||
# docker run --rm -v $PWD/target:/target:Z \
|
||||
@@ -78,17 +78,17 @@ jobs:
|
||||
os: macos-latest
|
||||
rust: nightly
|
||||
- build: win-msvc
|
||||
os: windows-2022
|
||||
os: windows-2019
|
||||
rust: nightly
|
||||
- build: win-gnu
|
||||
os: windows-2022
|
||||
os: windows-2019
|
||||
rust: nightly-x86_64-gnu
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v2
|
||||
|
||||
- name: Install packages (Ubuntu)
|
||||
if: matrix.os == 'ubuntu-latest'
|
||||
if: matrix.os == 'ubuntu-18.04'
|
||||
run: |
|
||||
ci/ubuntu-install-packages
|
||||
|
||||
@@ -98,7 +98,7 @@ jobs:
|
||||
ci/macos-install-packages
|
||||
|
||||
- name: Install Rust
|
||||
uses: dtolnay/rust-toolchain@master
|
||||
uses: dtolnay/rust-toolchain@v1
|
||||
with:
|
||||
toolchain: ${{ matrix.rust }}
|
||||
|
||||
@@ -148,14 +148,14 @@ jobs:
|
||||
run: ${{ env.CARGO }} test --verbose --workspace ${{ env.TARGET_FLAGS }}
|
||||
|
||||
- name: Test for existence of build artifacts (Windows)
|
||||
if: matrix.os == 'windows-2022'
|
||||
if: matrix.os == 'windows-2019'
|
||||
shell: bash
|
||||
run: |
|
||||
outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")"
|
||||
ls "$outdir/_rg.ps1" && file "$outdir/_rg.ps1"
|
||||
|
||||
- name: Test for existence of build artifacts (Unix)
|
||||
if: matrix.os != 'windows-2022'
|
||||
if: matrix.os != 'windows-2019'
|
||||
shell: bash
|
||||
run: |
|
||||
outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")"
|
||||
@@ -172,32 +172,33 @@ jobs:
|
||||
# 'rg' binary (done in test-complete) with qemu, which is a pain and
|
||||
# doesn't really gain us much. If shell completion works in one place,
|
||||
# it probably works everywhere.
|
||||
if: matrix.target == '' && matrix.os != 'windows-2022'
|
||||
if: matrix.target == '' && matrix.os != 'windows-2019'
|
||||
shell: bash
|
||||
run: ci/test-complete
|
||||
|
||||
rustfmt:
|
||||
name: rustfmt
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-18.04
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v2
|
||||
- name: Install Rust
|
||||
uses: dtolnay/rust-toolchain@master
|
||||
uses: dtolnay/rust-toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
components: rustfmt
|
||||
- name: Check formatting
|
||||
run: cargo fmt --all --check
|
||||
run: |
|
||||
cargo fmt --all -- --check
|
||||
|
||||
docs:
|
||||
name: Docs
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v2
|
||||
- name: Install Rust
|
||||
uses: dtolnay/rust-toolchain@master
|
||||
uses: dtolnay/rust-toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
- name: Check documentation
|
||||
|
52
.github/workflows/release.yml
vendored
52
.github/workflows/release.yml
vendored
@@ -29,19 +29,26 @@ jobs:
|
||||
# Set to force version number, e.g., when no tag exists.
|
||||
# RG_VERSION: TEST-0.0.0
|
||||
outputs:
|
||||
upload_url: ${{ steps.release.outputs.upload_url }}
|
||||
rg_version: ${{ env.RG_VERSION }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Get the release version from the tag
|
||||
shell: bash
|
||||
if: env.RG_VERSION == ''
|
||||
run: |
|
||||
echo "RG_VERSION=$GITHUB_REF_NAME" >> $GITHUB_ENV
|
||||
# Apparently, this is the right way to get a tag name. Really?
|
||||
#
|
||||
# See: https://github.community/t5/GitHub-Actions/How-to-get-just-the-tag-name/m-p/32167/highlight/true#M1027
|
||||
echo "RG_VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
|
||||
echo "version is: ${{ env.RG_VERSION }}"
|
||||
- name: Create GitHub release
|
||||
id: release
|
||||
uses: actions/create-release@v1
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
run: gh release create ${{ env.RG_VERSION }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
tag_name: ${{ env.RG_VERSION }}
|
||||
release_name: ${{ env.RG_VERSION }}
|
||||
|
||||
build-release:
|
||||
name: build-release
|
||||
@@ -64,11 +71,11 @@ jobs:
|
||||
build: [linux, linux-arm, macos, win-msvc, win-gnu, win32-msvc]
|
||||
include:
|
||||
- build: linux
|
||||
os: ubuntu-latest
|
||||
os: ubuntu-18.04
|
||||
rust: nightly
|
||||
target: x86_64-unknown-linux-musl
|
||||
- build: linux-arm
|
||||
os: ubuntu-latest
|
||||
os: ubuntu-18.04
|
||||
rust: nightly
|
||||
target: arm-unknown-linux-gnueabihf
|
||||
- build: macos
|
||||
@@ -76,24 +83,26 @@ jobs:
|
||||
rust: nightly
|
||||
target: x86_64-apple-darwin
|
||||
- build: win-msvc
|
||||
os: windows-latest
|
||||
os: windows-2019
|
||||
rust: nightly
|
||||
target: x86_64-pc-windows-msvc
|
||||
- build: win-gnu
|
||||
os: windows-latest
|
||||
os: windows-2019
|
||||
rust: nightly-x86_64-gnu
|
||||
target: x86_64-pc-windows-gnu
|
||||
- build: win32-msvc
|
||||
os: windows-latest
|
||||
os: windows-2019
|
||||
rust: nightly
|
||||
target: i686-pc-windows-msvc
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
uses: actions/checkout@v2
|
||||
with:
|
||||
fetch-depth: 1
|
||||
|
||||
- name: Install packages (Ubuntu)
|
||||
if: matrix.os == 'ubuntu-latest'
|
||||
if: matrix.os == 'ubuntu-18.04'
|
||||
run: |
|
||||
ci/ubuntu-install-packages
|
||||
|
||||
@@ -103,7 +112,7 @@ jobs:
|
||||
ci/macos-install-packages
|
||||
|
||||
- name: Install Rust
|
||||
uses: dtolnay/rust-toolchain@master
|
||||
uses: dtolnay/rust-toolchain@v1
|
||||
with:
|
||||
toolchain: ${{ matrix.rust }}
|
||||
target: ${{ matrix.target }}
|
||||
@@ -125,8 +134,8 @@ jobs:
|
||||
- name: Build release binary
|
||||
run: ${{ env.CARGO }} build --verbose --release --features pcre2 ${{ env.TARGET_FLAGS }}
|
||||
|
||||
- name: Strip release binary (linux, macos and macos-arm)
|
||||
if: matrix.build == 'linux' || matrix.os == 'macos'
|
||||
- name: Strip release binary (linux and macos)
|
||||
if: matrix.build == 'linux' || matrix.build == 'macos'
|
||||
run: strip "target/${{ matrix.target }}/release/rg"
|
||||
|
||||
- name: Strip release binary (arm)
|
||||
@@ -150,23 +159,24 @@ jobs:
|
||||
cp "$outdir"/{rg.bash,rg.fish,_rg.ps1} "$staging/complete/"
|
||||
cp complete/_rg "$staging/complete/"
|
||||
|
||||
if [ "${{ matrix.os }}" = "windows-latest" ]; then
|
||||
if [ "${{ matrix.os }}" = "windows-2019" ]; then
|
||||
cp "target/${{ matrix.target }}/release/rg.exe" "$staging/"
|
||||
7z a "$staging.zip" "$staging"
|
||||
certutil -hashfile "$staging.zip" SHA256 > "$staging.zip.sha256"
|
||||
echo "ASSET=$staging.zip" >> $GITHUB_ENV
|
||||
echo "ASSET_SUM=$staging.zip.sha256" >> $GITHUB_ENV
|
||||
else
|
||||
# The man page is only generated on Unix systems. ¯\_(ツ)_/¯
|
||||
cp "$outdir"/rg.1 "$staging/doc/"
|
||||
cp "target/${{ matrix.target }}/release/rg" "$staging/"
|
||||
tar czf "$staging.tar.gz" "$staging"
|
||||
shasum -a 256 "$staging.tar.gz" > "$staging.tar.gz.sha256"
|
||||
echo "ASSET=$staging.tar.gz" >> $GITHUB_ENV
|
||||
echo "ASSET_SUM=$staging.tar.gz.sha256" >> $GITHUB_ENV
|
||||
fi
|
||||
|
||||
- name: Upload release archive
|
||||
uses: actions/upload-release-asset@v1.0.1
|
||||
env:
|
||||
GH_TOKEN: ${{ github.token }}
|
||||
run: gh release upload ${{ needs.create-release.outputs.rg_version }} ${{ env.ASSET }} ${{ env.ASSET_SUM }}
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
upload_url: ${{ needs.create-release.outputs.upload_url }}
|
||||
asset_path: ${{ env.ASSET }}
|
||||
asset_name: ${{ env.ASSET }}
|
||||
asset_content_type: application/octet-stream
|
||||
|
28
CHANGELOG.md
28
CHANGELOG.md
@@ -2,42 +2,14 @@ TBD
|
||||
===
|
||||
Unreleased changes. Release notes have not yet been written.
|
||||
|
||||
**BREAKING CHANGES**
|
||||
|
||||
* `rg -C1 -A2` used to be equivalent to `rg -A2`, but now it is equivalent to
|
||||
`rg -B1 -A2`. That is, `-A` and `-B` no longer completely override `-C`.
|
||||
Instead, they only partially override `-C`.
|
||||
|
||||
Feature enhancements:
|
||||
|
||||
* Added or improved file type filtering for Ada, DITA, Elixir, Fuchsia, Gentoo, GraphQL, Markdown, Raku, TypeScript, USD, V
|
||||
* [FEATURE #1790](https://github.com/BurntSushi/ripgrep/issues/1790):
|
||||
Add new `--stop-on-nonmatch` flag.
|
||||
* [FEATURE #2195](https://github.com/BurntSushi/ripgrep/issues/2195):
|
||||
When `extra-verbose` mode is enabled in zsh, show extra file type info.
|
||||
* [FEATURE #2409](https://github.com/BurntSushi/ripgrep/pull/2409):
|
||||
Added installation instructions for `winget`.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #1891](https://github.com/BurntSushi/ripgrep/issues/1891):
|
||||
Fix bug when using `-w` with a regex that can match the empty string.
|
||||
* [BUG #1911](https://github.com/BurntSushi/ripgrep/issues/1911):
|
||||
Disable mmap searching in all non-64-bit environments.
|
||||
* [BUG #2108](https://github.com/BurntSushi/ripgrep/issues/2108):
|
||||
Improve docs for `-r/--replace` syntax.
|
||||
* [BUG #2198](https://github.com/BurntSushi/ripgrep/issues/2198):
|
||||
Fix bug where `--no-ignore-dot` would not ignore `.rgignore`.
|
||||
* [BUG #2288](https://github.com/BurntSushi/ripgrep/issues/2288):
|
||||
`-A` and `-B` now only each partially override `-C`.
|
||||
* [BUG #2236](https://github.com/BurntSushi/ripgrep/issues/2236):
|
||||
Fix gitignore parsing bug where a trailing `\/` resulted in an error.
|
||||
* [BUG #2243](https://github.com/BurntSushi/ripgrep/issues/2243):
|
||||
Fix `--sort` flag for values other than `path`.
|
||||
* [BUG #2480](https://github.com/BurntSushi/ripgrep/issues/2480):
|
||||
Fix bug when using inline regex flags with `-e/--regexp`.
|
||||
* [BUG #2523](https://github.com/BurntSushi/ripgrep/issues/2523):
|
||||
Make executable searching take `.com` into account on Windows.
|
||||
|
||||
|
||||
13.0.0 (2021-06-12)
|
||||
|
243
Cargo.lock
generated
243
Cargo.lock
generated
@@ -4,18 +4,29 @@ version = 3
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "1.0.2"
|
||||
version = "0.7.18"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43f6cb1bf222025340178f382c426f13757b2960e89779dfcb319c32542a5a41"
|
||||
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.20.0"
|
||||
name = "atty"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0ea22880d78093b0cbe17c89f64a7d457941e65759157ec6cb31a31d652b05e5"
|
||||
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.13.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
|
||||
|
||||
[[package]]
|
||||
name = "bitflags"
|
||||
@@ -25,26 +36,26 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
|
||||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "1.6.0"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6798148dccfbff0fae41c7574d2fa8f1ef3492fba0face179de5d8d447d67b05"
|
||||
checksum = "ba3569f383e8f1598449f1a423e72e99569137b47740b1da11ef19af3d5c3223"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "bytecount"
|
||||
version = "0.6.3"
|
||||
version = "0.6.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c"
|
||||
checksum = "72feb31ffc86498dacdbd0fcebb56138e7177a8cc5cea4516031d15ae85a742e"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.79"
|
||||
version = "1.0.73"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "50d30906286121d95be3d479533b458f87493b30a4b5f79a607db8f5d11aa91f"
|
||||
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
]
|
||||
@@ -69,9 +80,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.8"
|
||||
version = "0.5.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "a33c2bf77f2df06183c3aa30d1e96c0695a313d4f9c453cc3762a6db39f99200"
|
||||
checksum = "5aaa7bd5fb665c6864b5f963dd9097905c54125909c7aa94c9e18507cdbe6c53"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"crossbeam-utils",
|
||||
@@ -79,18 +90,19 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.16"
|
||||
version = "0.8.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
|
||||
checksum = "0bf124c720b7686e3c2663cf54062ab0f68a88af2fb6a030e87e30bf721fcb38"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.32"
|
||||
version = "0.8.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "071a31f4ee85403370b58aca746f01041ede6f0da2730960ad001edc2b71b394"
|
||||
checksum = "7896dc8abb250ffdda33912550faa54c88ec8b998dec0b2c55ab224921ce11df"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"packed_simd_2",
|
||||
@@ -112,14 +124,20 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.1"
|
||||
name = "fs_extra"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
|
||||
checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394"
|
||||
|
||||
[[package]]
|
||||
name = "glob"
|
||||
version = "0.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||
|
||||
[[package]]
|
||||
name = "globset"
|
||||
version = "0.4.11"
|
||||
version = "0.4.9"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"bstr",
|
||||
@@ -134,7 +152,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "grep"
|
||||
version = "0.2.12"
|
||||
version = "0.2.10"
|
||||
dependencies = [
|
||||
"grep-cli",
|
||||
"grep-matcher",
|
||||
@@ -148,8 +166,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "grep-cli"
|
||||
version = "0.1.9"
|
||||
version = "0.1.6"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"bstr",
|
||||
"globset",
|
||||
"lazy_static",
|
||||
@@ -160,9 +179,17 @@ dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-index"
|
||||
version = "0.0.1"
|
||||
dependencies = [
|
||||
"bstr 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.6.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-matcher"
|
||||
version = "0.1.6"
|
||||
version = "0.1.5"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
"regex",
|
||||
@@ -170,16 +197,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "grep-pcre2"
|
||||
version = "0.1.6"
|
||||
version = "0.1.5"
|
||||
dependencies = [
|
||||
"grep-matcher",
|
||||
"log",
|
||||
"pcre2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-printer"
|
||||
version = "0.1.7"
|
||||
version = "0.1.6"
|
||||
dependencies = [
|
||||
"base64",
|
||||
"bstr",
|
||||
@@ -193,19 +219,20 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "grep-regex"
|
||||
version = "0.1.11"
|
||||
version = "0.1.10"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"bstr",
|
||||
"grep-matcher",
|
||||
"log",
|
||||
"regex-automata",
|
||||
"regex",
|
||||
"regex-syntax",
|
||||
"thread_local",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep-searcher"
|
||||
version = "0.1.11"
|
||||
version = "0.1.10"
|
||||
dependencies = [
|
||||
"bstr",
|
||||
"bytecount",
|
||||
@@ -218,11 +245,21 @@ dependencies = [
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.19"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ignore"
|
||||
version = "0.4.20"
|
||||
version = "0.4.18"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-utils",
|
||||
"globset",
|
||||
"lazy_static",
|
||||
"log",
|
||||
@@ -236,25 +273,26 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "1.0.8"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "62b02a5381cc465bd3041d84623d0fa3b66738b52b8e2fc3bab8ad63ab032f4a"
|
||||
checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
|
||||
|
||||
[[package]]
|
||||
name = "jemalloc-sys"
|
||||
version = "0.5.3+5.3.0-patched"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f9bd5d616ea7ed58b571b2e209a65759664d7fb021a0819d7a790afc67e47ca1"
|
||||
checksum = "0d3b9f3f5c9b31aa0f5ed3260385ac205db665baa41d49bb8338008ae94ede45"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"fs_extra",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "jemallocator"
|
||||
version = "0.5.0"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "16c2514137880c52b0b4822b563fadd38257c1f380858addb74a400889696ea6"
|
||||
checksum = "43ae63fcfc45e99ab3d1b29a46782ad679e98436c3169d15a167a1108a724b69"
|
||||
dependencies = [
|
||||
"jemalloc-sys",
|
||||
"libc",
|
||||
@@ -262,9 +300,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.26"
|
||||
version = "0.1.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "936cfd212a0155903bcbc060e316fb6cc7cbf2e1907329391ebadc1fe0ce77c2"
|
||||
checksum = "af25a77299a7f711a01975c35a6a424eb6862092cc2d6c72c4ed6cbc56dfc1fa"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
@@ -277,9 +315,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.147"
|
||||
version = "0.2.121"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b4668fb0ea861c1df094127ac5f1da3409a82116a4ba74fca2e58ef927159bb3"
|
||||
checksum = "efaa7b300f3b5fe8eb6bf21ce3895e1751d9665086af2d64b42f19701015ff4f"
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
@@ -289,30 +327,43 @@ checksum = "7fc7aa29613bd6a620df431842069224d8bc9011086b1db4c0e0cd47fa03ec9a"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.19"
|
||||
version = "0.4.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
|
||||
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
version = "2.4.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.5.10"
|
||||
version = "0.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327"
|
||||
checksum = "057a3db23999c867821a7a59feb06a578fcb03685e983dff90daf9e7d24ac08f"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.18.0"
|
||||
name = "num_cpus"
|
||||
version = "1.13.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d"
|
||||
checksum = "19e64526ebdee182341572e50e9ad03965aa510cd94427a4549448f285e957a1"
|
||||
dependencies = [
|
||||
"hermit-abi",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.10.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "87f3e037eac156d1775da914196f0f37741a274155e34a0b7e427c35d2a2ecb9"
|
||||
|
||||
[[package]]
|
||||
name = "packed_simd_2"
|
||||
@@ -326,9 +377,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pcre2"
|
||||
version = "0.2.4"
|
||||
version = "0.2.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "486aca7e74edb8cab09a48d461177f450a5cca3b55e61d139f7552190e2bbcf5"
|
||||
checksum = "85b30f2f69903b439dd9dc9e824119b82a55bf113b29af8d70948a03c1b11ab1"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
@@ -338,9 +389,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pcre2-sys"
|
||||
version = "0.2.6"
|
||||
version = "0.2.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ae234f441970dbd52d4e29bee70f3b56ca83040081cb2b55b7df772b16e0b06e"
|
||||
checksum = "dec30e5e9ec37eb8fbf1dea5989bc957fd3df56fbee5061aa7b7a99dbb37b722"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
@@ -349,56 +400,50 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.27"
|
||||
version = "0.3.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964"
|
||||
checksum = "58893f751c9b0412871a09abd62ecd2a00298c6c83befa223ef98c52aef40cbe"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.63"
|
||||
version = "1.0.36"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7b368fba921b0dce7e60f5e04ec15e565b3303972b42bcfde1d0713b881959eb"
|
||||
checksum = "c7342d5883fbccae1cc37a2353b09c87c9b0f3afd73f5fb9bba687a1f733b029"
|
||||
dependencies = [
|
||||
"unicode-ident",
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.29"
|
||||
version = "1.0.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "573015e8ab27661678357f27dc26460738fd2b6c86e46f386fde94cb5d913105"
|
||||
checksum = "b4af2ec4714533fcdf07e886f17025ace8b997b9ce51204ee69b6da831c3da57"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.9.0"
|
||||
version = "1.5.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "89089e897c013b3deb627116ae56a6955a72b8bed395c9526af31c9fe528b484"
|
||||
checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-automata",
|
||||
"regex-syntax",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.3.0"
|
||||
version = "0.1.10"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fa250384981ea14565685dea16a9ccc4d1c541a13f82b9c168572264d1df8c56"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
]
|
||||
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.7.3"
|
||||
version = "0.6.25"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2ab07dc67230e4a4718e70fd5c20055a4334b121f1f9db8fe63ef39ce9b8c846"
|
||||
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
|
||||
|
||||
[[package]]
|
||||
name = "ripgrep"
|
||||
@@ -411,6 +456,8 @@ dependencies = [
|
||||
"jemallocator",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"num_cpus",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
@@ -420,9 +467,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "ryu"
|
||||
version = "1.0.14"
|
||||
version = "1.0.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fe232bdf6be8c8de797b22184ee71118d63780ea42ac85b61d1baa6d3b782ae9"
|
||||
checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
|
||||
|
||||
[[package]]
|
||||
name = "same-file"
|
||||
@@ -435,18 +482,18 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.166"
|
||||
version = "1.0.136"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d01b7404f9d441d3ad40e6a636a7782c377d2abdbe4fa2440e2edcc2f4f10db8"
|
||||
checksum = "ce31e24b01e1e524df96f1c2fdd054405f8d7376249a5110886fb4b658484789"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.166"
|
||||
version = "1.0.136"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "5dd83d6dde2b6b2d466e14d9d1acce8816dedee94f735eac6395808b3483c6d6"
|
||||
checksum = "08597e7152fcd306f41838ed3e37be9eaeed2b61c42e2117266a554fab4662f9"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -455,9 +502,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.100"
|
||||
version = "1.0.79"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0f1e14e89be7aa4c4b78bdbdc9eb5bf8517829a600ae8eaa39a6e1d960b5185c"
|
||||
checksum = "8e8d9fa5c3b304765ce1fd9c4c8a3de2c8db365a5b91be52f186efc675681d95"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
@@ -472,20 +519,20 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "2.0.23"
|
||||
version = "1.0.89"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "59fb7d6d8281a51045d62b8eb3a7d1ce347b76f312af50cd3dc0af39c87c1737"
|
||||
checksum = "ea297be220d52398dcc07ce15a209fce436d361735ac1db700cab3b6cdfb9f54"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-ident",
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.2.0"
|
||||
version = "1.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6"
|
||||
checksum = "bab24d30b911b2376f3a13cc2cd443142f0c81dda04c118693e35b3835757755"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
@@ -501,33 +548,33 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "1.1.7"
|
||||
version = "1.1.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3fdd6f064ccff2d6567adcb3873ca630700f00b5ad3f060c25b5dcfd9a4ce152"
|
||||
checksum = "5516c27b78311c50bf42c071425c560ac799b11c30b31f87e3081965fe5e0180"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"once_cell",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-ident"
|
||||
version = "1.0.10"
|
||||
name = "unicode-width"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "22049a19f4a68748a168c0fc439f9516686aa045927ff767eca0a85101fb6e73"
|
||||
checksum = "3ed742d4ea2bd1176e236172c8429aaf54486e7ac098db29ffe6529e0ce50973"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-width"
|
||||
version = "0.1.10"
|
||||
name = "unicode-xid"
|
||||
version = "0.2.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
|
||||
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.3.3"
|
||||
version = "2.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "36df944cda56c7d8d8b7496af378e6b16de9284591917d307c9b4d313c44e698"
|
||||
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
|
||||
dependencies = [
|
||||
"same-file",
|
||||
"winapi",
|
||||
"winapi-util",
|
||||
]
|
||||
|
||||
|
21
Cargo.toml
21
Cargo.toml
@@ -13,18 +13,10 @@ repository = "https://github.com/BurntSushi/ripgrep"
|
||||
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
categories = ["command-line-utilities", "text-processing"]
|
||||
license = "Unlicense OR MIT"
|
||||
exclude = [
|
||||
"HomebrewFormula",
|
||||
"/.github/",
|
||||
"/ci/",
|
||||
"/pkg/",
|
||||
"/benchsuite/",
|
||||
"/scripts/",
|
||||
]
|
||||
exclude = ["HomebrewFormula"]
|
||||
build = "build.rs"
|
||||
autotests = false
|
||||
edition = "2018"
|
||||
rust-version = "1.70"
|
||||
|
||||
[[bin]]
|
||||
bench = false
|
||||
@@ -40,6 +32,7 @@ members = [
|
||||
"crates/globset",
|
||||
"crates/grep",
|
||||
"crates/cli",
|
||||
"crates/index",
|
||||
"crates/matcher",
|
||||
"crates/pcre2",
|
||||
"crates/printer",
|
||||
@@ -49,11 +42,13 @@ members = [
|
||||
]
|
||||
|
||||
[dependencies]
|
||||
bstr = "1.6.0"
|
||||
grep = { version = "0.2.12", path = "crates/grep" }
|
||||
ignore = { version = "0.4.19", path = "crates/ignore" }
|
||||
bstr = "0.2.12"
|
||||
grep = { version = "0.2.8", path = "crates/grep" }
|
||||
ignore = { version = "0.4.18", path = "crates/ignore" }
|
||||
lazy_static = "1.1.0"
|
||||
log = "0.4.5"
|
||||
num_cpus = "1.8.0"
|
||||
regex = "1.3.5"
|
||||
serde_json = "1.0.23"
|
||||
termcolor = "1.1.0"
|
||||
|
||||
@@ -63,7 +58,7 @@ default-features = false
|
||||
features = ["suggestions"]
|
||||
|
||||
[target.'cfg(all(target_env = "musl", target_pointer_width = "64"))'.dependencies.jemallocator]
|
||||
version = "0.5.0"
|
||||
version = "0.3.0"
|
||||
|
||||
[build-dependencies]
|
||||
lazy_static = "1.1.0"
|
||||
|
@@ -6,7 +6,6 @@ image = "burntsushi/cross:i686-unknown-linux-gnu"
|
||||
|
||||
[target.mips64-unknown-linux-gnuabi64]
|
||||
image = "burntsushi/cross:mips64-unknown-linux-gnuabi64"
|
||||
build-std = true
|
||||
|
||||
[target.arm-unknown-linux-gnueabihf]
|
||||
image = "burntsushi/cross:arm-unknown-linux-gnueabihf"
|
||||
|
10
GUIDE.md
10
GUIDE.md
@@ -190,8 +190,7 @@ the following:
|
||||
All of these things can be toggled using various flags provided by ripgrep:
|
||||
|
||||
1. You can disable all ignore-related filtering with the `--no-ignore` flag.
|
||||
2. Hidden files and directories can be searched with the `--hidden` (`-.` for
|
||||
short) flag.
|
||||
2. Hidden files and directories can be searched with the `--hidden` flag.
|
||||
3. Binary files can be searched via the `--text` (`-a` for short) flag.
|
||||
Be careful with this flag! Binary files may emit control characters to your
|
||||
terminal, which might cause strange behavior.
|
||||
@@ -567,15 +566,12 @@ $ cat $HOME/.ripgreprc
|
||||
--type-add
|
||||
web:*.{html,css,js}*
|
||||
|
||||
# Search hidden files / directories (e.g. dotfiles) by default
|
||||
--hidden
|
||||
|
||||
# Using glob patterns to include/exclude files or folders
|
||||
--glob=!.git/*
|
||||
--glob=!git/*
|
||||
|
||||
# or
|
||||
--glob
|
||||
!.git/*
|
||||
!git/*
|
||||
|
||||
# Set the colors.
|
||||
--colors=line:none
|
||||
|
102
README.md
102
README.md
@@ -2,11 +2,11 @@ ripgrep (rg)
|
||||
------------
|
||||
ripgrep is a line-oriented search tool that recursively searches the current
|
||||
directory for a regex pattern. By default, ripgrep will respect gitignore rules
|
||||
and automatically skip hidden files/directories and binary files. (To disable
|
||||
all automatic filtering by default, use `rg -uuu`.) ripgrep has first class
|
||||
support on Windows, macOS and Linux, with binary downloads available for [every
|
||||
release](https://github.com/BurntSushi/ripgrep/releases). ripgrep is similar to
|
||||
other popular search tools like The Silver Searcher, ack and grep.
|
||||
and automatically skip hidden files/directories and binary files. ripgrep
|
||||
has first class support on Windows, macOS and Linux, with binary downloads
|
||||
available for [every release](https://github.com/BurntSushi/ripgrep/releases).
|
||||
ripgrep is similar to other popular search tools like The Silver Searcher, ack
|
||||
and grep.
|
||||
|
||||
[](https://github.com/BurntSushi/ripgrep/actions)
|
||||
[](https://crates.io/crates/ripgrep)
|
||||
@@ -90,16 +90,16 @@ times are unaffected by the presence or absence of `-n`.
|
||||
because it contains most of their features and is generally faster. (See
|
||||
[the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly
|
||||
replace grep.)
|
||||
* Like other tools specialized to code search, ripgrep defaults to
|
||||
[recursive search](GUIDE.md#recursive-search) and does [automatic
|
||||
filtering](GUIDE.md#automatic-filtering). Namely, ripgrep won't search files
|
||||
ignored by your `.gitignore`/`.ignore`/`.rgignore` files, it won't search
|
||||
hidden files and it won't search binary files. Automatic filtering can be
|
||||
disabled with `rg -uuu`.
|
||||
* ripgrep can [search specific types of files](GUIDE.md#manual-filtering-file-types).
|
||||
For example, `rg -tpy foo` limits your search to Python files and `rg -Tjs
|
||||
foo` excludes JavaScript files from your search. ripgrep can be taught about
|
||||
new file types with custom matching rules.
|
||||
* Like other tools specialized to code search, ripgrep defaults to recursive
|
||||
directory search and won't search files ignored by your
|
||||
`.gitignore`/`.ignore`/`.rgignore` files. It also ignores hidden and binary
|
||||
files by default. ripgrep also implements full support for `.gitignore`,
|
||||
whereas there are many bugs related to that functionality in other code
|
||||
search tools claiming to provide the same functionality.
|
||||
* ripgrep can search specific types of files. For example, `rg -tpy foo`
|
||||
limits your search to Python files and `rg -Tjs foo` excludes JavaScript
|
||||
files from your search. ripgrep can be taught about new file types with
|
||||
custom matching rules.
|
||||
* ripgrep supports many features found in `grep`, such as showing the context
|
||||
of search results, searching multiple patterns, highlighting matches with
|
||||
color and full Unicode support. Unlike GNU grep, ripgrep stays fast while
|
||||
@@ -110,20 +110,16 @@ times are unaffected by the presence or absence of `-n`.
|
||||
regex engine. PCRE2 support can be enabled with `-P/--pcre2` (use PCRE2
|
||||
always) or `--auto-hybrid-regex` (use PCRE2 only if needed). An alternative
|
||||
syntax is provided via the `--engine (default|pcre2|auto-hybrid)` option.
|
||||
* ripgrep has [rudimentary support for replacements](GUIDE.md#replacements),
|
||||
which permit rewriting output based on what was matched.
|
||||
* ripgrep supports [searching files in text encodings](GUIDE.md#file-encoding)
|
||||
other than UTF-8, such as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more.
|
||||
(Some support for automatically detecting UTF-16 is provided. Other text
|
||||
encodings must be specifically specified with the `-E/--encoding` flag.)
|
||||
* ripgrep supports searching files in text encodings other than UTF-8, such
|
||||
as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for
|
||||
automatically detecting UTF-16 is provided. Other text encodings must be
|
||||
specifically specified with the `-E/--encoding` flag.)
|
||||
* ripgrep supports searching files compressed in a common format (brotli,
|
||||
bzip2, gzip, lz4, lzma, xz, or zstandard) with the `-z/--search-zip` flag.
|
||||
* ripgrep supports
|
||||
[arbitrary input preprocessing filters](GUIDE.md#preprocessor)
|
||||
which could be PDF text extraction, less supported decompression, decrypting,
|
||||
automatic encoding detection and so on.
|
||||
* ripgrep can be configured via a
|
||||
[configuration file](GUIDE.md#configuration-file).
|
||||
|
||||
In other words, use ripgrep if you like speed, filtering by default, fewer
|
||||
bugs and Unicode support.
|
||||
@@ -228,25 +224,17 @@ If you're a **Windows Scoop** user, then you can install ripgrep from the
|
||||
$ scoop install ripgrep
|
||||
```
|
||||
|
||||
If you're a **Windows Winget** user, then you can install ripgrep from the
|
||||
[winget-pkgs](https://github.com/microsoft/winget-pkgs/tree/master/manifests/b/BurntSushi/ripgrep)
|
||||
repository:
|
||||
|
||||
```
|
||||
$ winget install BurntSushi.ripgrep.MSVC
|
||||
```
|
||||
|
||||
If you're an **Arch Linux** user, then you can install ripgrep from the official repos:
|
||||
|
||||
```
|
||||
$ sudo pacman -S ripgrep
|
||||
$ pacman -S ripgrep
|
||||
```
|
||||
|
||||
If you're a **Gentoo** user, you can install ripgrep from the
|
||||
[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep):
|
||||
|
||||
```
|
||||
$ sudo emerge sys-apps/ripgrep
|
||||
$ emerge sys-apps/ripgrep
|
||||
```
|
||||
|
||||
If you're a **Fedora** user, you can install ripgrep from official
|
||||
@@ -267,7 +255,6 @@ If you're a **RHEL/CentOS 7/8** user, you can install ripgrep from
|
||||
[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
|
||||
|
||||
```
|
||||
$ sudo yum install -y yum-utils
|
||||
$ sudo yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo
|
||||
$ sudo yum install ripgrep
|
||||
```
|
||||
@@ -277,13 +264,7 @@ If you're a **Nix** user, you can install ripgrep from
|
||||
|
||||
```
|
||||
$ nix-env --install ripgrep
|
||||
```
|
||||
|
||||
If you're a **Guix** user, you can install ripgrep from the official
|
||||
package collection:
|
||||
|
||||
```
|
||||
$ sudo guix install ripgrep
|
||||
$ # (Or using the attribute name, which is also ripgrep.)
|
||||
```
|
||||
|
||||
If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**),
|
||||
@@ -295,10 +276,8 @@ $ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/13.0.0/ripgre
|
||||
$ sudo dpkg -i ripgrep_13.0.0_amd64.deb
|
||||
```
|
||||
|
||||
If you run Debian stable, ripgrep is [officially maintained by
|
||||
Debian](https://tracker.debian.org/pkg/rust-ripgrep), although its version may
|
||||
be older than the `deb` package available in the previous step.
|
||||
|
||||
If you run Debian Buster (currently Debian stable) or Debian sid, ripgrep is
|
||||
[officially maintained by Debian](https://tracker.debian.org/pkg/rust-ripgrep).
|
||||
```
|
||||
$ sudo apt-get install ripgrep
|
||||
```
|
||||
@@ -316,18 +295,11 @@ seem to work right and generate a number of very strange bug reports that I
|
||||
don't know how to fix and don't have the time to fix. Therefore, it is no
|
||||
longer a recommended installation option.)
|
||||
|
||||
If you're an **ALT** user, you can install ripgrep from the
|
||||
[official repo](https://packages.altlinux.org/en/search?name=ripgrep):
|
||||
|
||||
```
|
||||
$ sudo apt-get install ripgrep
|
||||
```
|
||||
|
||||
If you're a **FreeBSD** user, then you can install ripgrep from the
|
||||
[official ports](https://www.freshports.org/textproc/ripgrep/):
|
||||
|
||||
```
|
||||
$ sudo pkg install ripgrep
|
||||
# pkg install ripgrep
|
||||
```
|
||||
|
||||
If you're an **OpenBSD** user, then you can install ripgrep from the
|
||||
@@ -341,26 +313,26 @@ If you're a **NetBSD** user, then you can install ripgrep from
|
||||
[pkgsrc](https://pkgsrc.se/textproc/ripgrep):
|
||||
|
||||
```
|
||||
$ sudo pkgin install ripgrep
|
||||
# pkgin install ripgrep
|
||||
```
|
||||
|
||||
If you're a **Haiku x86_64** user, then you can install ripgrep from the
|
||||
[official ports](https://github.com/haikuports/haikuports/tree/master/sys-apps/ripgrep):
|
||||
|
||||
```
|
||||
$ sudo pkgman install ripgrep
|
||||
$ pkgman install ripgrep
|
||||
```
|
||||
|
||||
If you're a **Haiku x86_gcc2** user, then you can install ripgrep from the
|
||||
same port as Haiku x86_64 using the x86 secondary architecture build:
|
||||
|
||||
```
|
||||
$ sudo pkgman install ripgrep_x86
|
||||
$ pkgman install ripgrep_x86
|
||||
```
|
||||
|
||||
If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
|
||||
|
||||
* Note that the minimum supported version of Rust for ripgrep is **1.70.0**,
|
||||
* Note that the minimum supported version of Rust for ripgrep is **1.34.0**,
|
||||
although ripgrep may work with older versions.
|
||||
* Note that the binary may be bigger than expected because it contains debug
|
||||
symbols. This is intentional. To remove debug symbols and therefore reduce
|
||||
@@ -375,7 +347,7 @@ $ cargo install ripgrep
|
||||
|
||||
ripgrep is written in Rust, so you'll need to grab a
|
||||
[Rust installation](https://www.rust-lang.org/) in order to compile it.
|
||||
ripgrep compiles with Rust 1.70.0 (stable) or newer. In general, ripgrep tracks
|
||||
ripgrep compiles with Rust 1.34.0 (stable) or newer. In general, ripgrep tracks
|
||||
the latest stable release of the Rust compiler.
|
||||
|
||||
To build ripgrep:
|
||||
@@ -447,20 +419,12 @@ $ cargo test --all
|
||||
from the repository root.
|
||||
|
||||
|
||||
### Related tools
|
||||
|
||||
* [delta](https://github.com/dandavison/delta) is a syntax highlighting
|
||||
pager that supports the `rg --json` output format. So all you need to do to
|
||||
make it work is `rg --json pattern | delta`. See [delta's manual section on
|
||||
grep](https://dandavison.github.io/delta/grep.html) for more details.
|
||||
|
||||
|
||||
### Vulnerability reporting
|
||||
|
||||
For reporting a security vulnerability, please
|
||||
[contact Andrew Gallant](https://blog.burntsushi.net/about/).
|
||||
The contact page has my email address and PGP public key if you wish to send an
|
||||
encrypted message.
|
||||
[contact Andrew Gallant](https://blog.burntsushi.net/about/),
|
||||
which has my email address and PGP public key if you wish to send an encrypted
|
||||
message.
|
||||
|
||||
|
||||
### Translations
|
||||
|
@@ -26,13 +26,15 @@ SUBTITLES_DIR = 'subtitles'
|
||||
SUBTITLES_EN_NAME = 'en.txt'
|
||||
SUBTITLES_EN_NAME_SAMPLE = 'en.sample.txt'
|
||||
SUBTITLES_EN_NAME_GZ = '%s.gz' % SUBTITLES_EN_NAME
|
||||
# SUBTITLES_EN_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.en.gz' # noqa
|
||||
SUBTITLES_EN_URL = 'https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2016/mono/en.txt.gz' # noqa
|
||||
SUBTITLES_RU_NAME = 'ru.txt'
|
||||
SUBTITLES_RU_NAME_GZ = '%s.gz' % SUBTITLES_RU_NAME
|
||||
# SUBTITLES_RU_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.ru.gz' # noqa
|
||||
SUBTITLES_RU_URL = 'https://object.pouta.csc.fi/OPUS-OpenSubtitles/v2016/mono/ru.txt.gz' # noqa
|
||||
|
||||
LINUX_DIR = 'linux'
|
||||
LINUX_CLONE = 'https://github.com/BurntSushi/linux'
|
||||
LINUX_CLONE = 'git://github.com/BurntSushi/linux'
|
||||
|
||||
# Grep takes locale settings from the environment. There is a *substantial*
|
||||
# performance impact for enabling Unicode, so we need to handle this explicitly
|
||||
@@ -544,11 +546,7 @@ def bench_subtitles_ru_literal(suite_dir):
|
||||
Command('rg (lines)', ['rg', '-n', pat, ru]),
|
||||
Command('ag (lines)', ['ag', '-s', pat, ru]),
|
||||
Command('grep (lines)', ['grep', '-n', pat, ru], env=GREP_ASCII),
|
||||
# ugrep incorrectly identifies this corpus as binary, but it is
|
||||
# entirely valid UTF-8. So we tell ugrep to always treat the corpus
|
||||
# as text even though this technically gives it an edge over other
|
||||
# tools. (It no longer needs to check for binary data.)
|
||||
Command('ugrep (lines)', ['ugrep', '-a', '-n', pat, ru])
|
||||
Command('ugrep (lines)', ['ugrep', '-n', pat, ru])
|
||||
])
|
||||
|
||||
|
||||
@@ -566,8 +564,7 @@ def bench_subtitles_ru_literal_casei(suite_dir):
|
||||
Command('grep (ASCII)', ['grep', '-E', '-i', pat, ru], env=GREP_ASCII),
|
||||
Command('rg (lines)', ['rg', '-n', '-i', pat, ru]),
|
||||
Command('ag (lines) (ASCII)', ['ag', '-i', pat, ru]),
|
||||
# See bench_subtitles_ru_literal for why we use '-a' here.
|
||||
Command('ugrep (lines) (ASCII)', ['ugrep', '-a', '-n', '-i', pat, ru])
|
||||
Command('ugrep (lines) (ASCII)', ['ugrep', '-n', '-i', pat, ru])
|
||||
])
|
||||
|
||||
|
||||
@@ -591,8 +588,7 @@ def bench_subtitles_ru_literal_word(suite_dir):
|
||||
Command('grep (ASCII)', [
|
||||
'grep', '-nw', pat, ru,
|
||||
], env=GREP_ASCII),
|
||||
# See bench_subtitles_ru_literal for why we use '-a' here.
|
||||
Command('ugrep (ASCII)', ['ugrep', '-anw', pat, ru]),
|
||||
Command('ugrep (ASCII)', ['ugrep', '-nw', pat, ru]),
|
||||
Command('rg', ['rg', '-nw', pat, ru]),
|
||||
Command('grep', ['grep', '-nw', pat, ru], env=GREP_UNICODE),
|
||||
])
|
||||
@@ -616,8 +612,7 @@ def bench_subtitles_ru_alternate(suite_dir):
|
||||
Command('rg (lines)', ['rg', '-n', pat, ru]),
|
||||
Command('ag (lines)', ['ag', '-s', pat, ru]),
|
||||
Command('grep (lines)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII),
|
||||
# See bench_subtitles_ru_literal for why we use '-a' here.
|
||||
Command('ugrep (lines)', ['ugrep', '-an', pat, ru]),
|
||||
Command('ugrep (lines)', ['ugrep', '-n', pat, ru]),
|
||||
Command('rg', ['rg', pat, ru]),
|
||||
Command('grep', ['grep', '-E', pat, ru], env=GREP_ASCII),
|
||||
])
|
||||
@@ -642,8 +637,7 @@ def bench_subtitles_ru_alternate_casei(suite_dir):
|
||||
Command('grep (ASCII)', [
|
||||
'grep', '-E', '-ni', pat, ru,
|
||||
], env=GREP_ASCII),
|
||||
# See bench_subtitles_ru_literal for why we use '-a' here.
|
||||
Command('ugrep (ASCII)', ['ugrep', '-ani', pat, ru]),
|
||||
Command('ugrep (ASCII)', ['ugrep', '-n', '-i', pat, ru]),
|
||||
Command('rg', ['rg', '-n', '-i', pat, ru]),
|
||||
Command('grep', ['grep', '-E', '-ni', pat, ru], env=GREP_UNICODE),
|
||||
])
|
||||
@@ -660,11 +654,10 @@ def bench_subtitles_ru_surrounding_words(suite_dir):
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', '-n', pat, ru]),
|
||||
Command('grep', ['grep', '-E', '-n', pat, ru], env=GREP_UNICODE),
|
||||
Command('ugrep', ['ugrep', '-an', pat, ru]),
|
||||
Command('ugrep', ['ugrep', '-n', pat, ru]),
|
||||
Command('ag (ASCII)', ['ag', '-s', pat, ru]),
|
||||
Command('grep (ASCII)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII),
|
||||
# See bench_subtitles_ru_literal for why we use '-a' here.
|
||||
Command('ugrep (ASCII)', ['ugrep', '-a', '-n', '-U', pat, ru]),
|
||||
Command('ugrep (ASCII)', ['ugrep', '-n', '-U', pat, ru]),
|
||||
])
|
||||
|
||||
|
||||
@@ -683,13 +676,11 @@ def bench_subtitles_ru_no_literal(suite_dir):
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', '-n', pat, ru]),
|
||||
# See bench_subtitles_ru_literal for why we use '-a' here.
|
||||
Command('ugrep', ['ugrep', '-an', pat, ru]),
|
||||
Command('ugrep', ['ugrep', '-n', pat, ru]),
|
||||
Command('rg (ASCII)', ['rg', '-n', '(?-u)' + pat, ru]),
|
||||
Command('ag (ASCII)', ['ag', '-s', pat, ru]),
|
||||
Command('grep (ASCII)', ['grep', '-E', '-n', pat, ru], env=GREP_ASCII),
|
||||
# See bench_subtitles_ru_literal for why we use '-a' here.
|
||||
Command('ugrep (ASCII)', ['ugrep', '-anU', pat, ru])
|
||||
Command('ugrep (ASCII)', ['ugrep', '-n', '-U', pat, ru])
|
||||
])
|
||||
|
||||
|
||||
|
@@ -1,38 +0,0 @@
|
||||
This directory contains updated benchmarks as of 2022-12-16. They were captured
|
||||
via the benchsuite script at `benchsuite/benchsuite` from the root of this
|
||||
repository. The command that was run:
|
||||
|
||||
$ ./benchsuite \
|
||||
--dir /dev/shm/benchsuite \
|
||||
--raw runs/2022-12-16-archlinux-duff/raw.csv \
|
||||
| tee runs/2022-12-16-archlinux-duff/summary
|
||||
|
||||
The versions of each tool are as follows:
|
||||
|
||||
$ rg --version
|
||||
ripgrep 13.0.0 (rev 87c4a2b4b1)
|
||||
-SIMD -AVX (compiled)
|
||||
+SIMD +AVX (runtime)
|
||||
|
||||
$ grep -V
|
||||
grep (GNU grep) 3.8
|
||||
|
||||
$ ag -V
|
||||
ag version 2.2.0
|
||||
|
||||
Features:
|
||||
+jit +lzma +zlib
|
||||
|
||||
$ git --version
|
||||
git version 2.39.0
|
||||
|
||||
$ ugrep --version
|
||||
ugrep 3.9.2 x86_64-pc-linux-gnu +avx2 +pcre2jit +zlib +bzip2 +lzma +lz4 +zstd
|
||||
License BSD-3-Clause: <https://opensource.org/licenses/BSD-3-Clause>
|
||||
Written by Robert van Engelen and others: <https://github.com/Genivia/ugrep>
|
||||
|
||||
The version of ripgrep used was compiled from source on commit 7f23cd63:
|
||||
|
||||
$ cargo build --release --features 'pcre2'
|
||||
|
||||
This was run on a machine with an Intel i9-12900K with 128GB of memory.
|
@@ -1,400 +0,0 @@
|
||||
benchmark,warmup_iter,iter,name,command,duration,lines,env
|
||||
linux_literal_default,1,3,rg,rg PM_RESUME,0.08678817749023438,39,
|
||||
linux_literal_default,1,3,rg,rg PM_RESUME,0.08307123184204102,39,
|
||||
linux_literal_default,1,3,rg,rg PM_RESUME,0.08347964286804199,39,
|
||||
linux_literal_default,1,3,ag,ag PM_RESUME,0.2955434322357178,39,
|
||||
linux_literal_default,1,3,ag,ag PM_RESUME,0.2954287528991699,39,
|
||||
linux_literal_default,1,3,ag,ag PM_RESUME,0.2938194274902344,39,
|
||||
linux_literal_default,1,3,git grep,git grep PM_RESUME,0.23198556900024414,39,LC_ALL=en_US.UTF-8
|
||||
linux_literal_default,1,3,git grep,git grep PM_RESUME,0.22356963157653809,39,LC_ALL=en_US.UTF-8
|
||||
linux_literal_default,1,3,git grep,git grep PM_RESUME,0.2189793586730957,39,LC_ALL=en_US.UTF-8
|
||||
linux_literal_default,1,3,ugrep,ugrep -r PM_RESUME ./,0.10710000991821289,39,
|
||||
linux_literal_default,1,3,ugrep,ugrep -r PM_RESUME ./,0.10364222526550293,39,
|
||||
linux_literal_default,1,3,ugrep,ugrep -r PM_RESUME ./,0.1052248477935791,39,
|
||||
linux_literal_default,1,3,grep,grep -r PM_RESUME ./,0.9994468688964844,39,LC_ALL=en_US.UTF-8
|
||||
linux_literal_default,1,3,grep,grep -r PM_RESUME ./,0.9939279556274414,39,LC_ALL=en_US.UTF-8
|
||||
linux_literal_default,1,3,grep,grep -r PM_RESUME ./,0.9957931041717529,39,LC_ALL=en_US.UTF-8
|
||||
linux_literal,1,3,rg,rg -n PM_RESUME,0.08603358268737793,39,
|
||||
linux_literal,1,3,rg,rg -n PM_RESUME,0.0837090015411377,39,
|
||||
linux_literal,1,3,rg,rg -n PM_RESUME,0.08435535430908203,39,
|
||||
linux_literal,1,3,rg (mmap),rg -n --mmap PM_RESUME,0.3215503692626953,39,
|
||||
linux_literal,1,3,rg (mmap),rg -n --mmap PM_RESUME,0.32426929473876953,39,
|
||||
linux_literal,1,3,rg (mmap),rg -n --mmap PM_RESUME,0.3215982913970947,39,
|
||||
linux_literal,1,3,ag (mmap),ag -s PM_RESUME,0.2894856929779053,39,
|
||||
linux_literal,1,3,ag (mmap),ag -s PM_RESUME,0.2892603874206543,39,
|
||||
linux_literal,1,3,ag (mmap),ag -s PM_RESUME,0.29217028617858887,39,
|
||||
linux_literal,1,3,git grep,git grep -I -n PM_RESUME,0.206068754196167,39,LC_ALL=C
|
||||
linux_literal,1,3,git grep,git grep -I -n PM_RESUME,0.2218036651611328,39,LC_ALL=C
|
||||
linux_literal,1,3,git grep,git grep -I -n PM_RESUME,0.20590710639953613,39,LC_ALL=C
|
||||
linux_literal,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n PM_RESUME ./,0.18692874908447266,39,
|
||||
linux_literal,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n PM_RESUME ./,0.19518327713012695,39,
|
||||
linux_literal,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n PM_RESUME ./,0.18577361106872559,39,
|
||||
linux_literal_casei,1,3,rg,rg -n -i PM_RESUME,0.08709383010864258,536,
|
||||
linux_literal_casei,1,3,rg,rg -n -i PM_RESUME,0.08861064910888672,536,
|
||||
linux_literal_casei,1,3,rg,rg -n -i PM_RESUME,0.08769798278808594,536,
|
||||
linux_literal_casei,1,3,rg (mmap),rg -n -i --mmap PM_RESUME,0.3218965530395508,536,
|
||||
linux_literal_casei,1,3,rg (mmap),rg -n -i --mmap PM_RESUME,0.30869364738464355,536,
|
||||
linux_literal_casei,1,3,rg (mmap),rg -n -i --mmap PM_RESUME,0.31044936180114746,536,
|
||||
linux_literal_casei,1,3,ag (mmap),ag -i PM_RESUME,0.2989068031311035,536,
|
||||
linux_literal_casei,1,3,ag (mmap),ag -i PM_RESUME,0.2996039390563965,536,
|
||||
linux_literal_casei,1,3,ag (mmap),ag -i PM_RESUME,0.29817700386047363,536,
|
||||
linux_literal_casei,1,3,git grep,git grep -I -n -i PM_RESUME,0.2122786045074463,536,LC_ALL=C
|
||||
linux_literal_casei,1,3,git grep,git grep -I -n -i PM_RESUME,0.20763754844665527,536,LC_ALL=C
|
||||
linux_literal_casei,1,3,git grep,git grep -I -n -i PM_RESUME,0.220794677734375,536,LC_ALL=C
|
||||
linux_literal_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i PM_RESUME ./,0.17305850982666016,536,
|
||||
linux_literal_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i PM_RESUME ./,0.1745915412902832,536,
|
||||
linux_literal_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i PM_RESUME ./,0.17526865005493164,536,
|
||||
linux_re_literal_suffix,1,3,rg,rg -n [A-Z]+_RESUME,0.08527851104736328,2160,
|
||||
linux_re_literal_suffix,1,3,rg,rg -n [A-Z]+_RESUME,0.08487534523010254,2160,
|
||||
linux_re_literal_suffix,1,3,rg,rg -n [A-Z]+_RESUME,0.0848684310913086,2160,
|
||||
linux_re_literal_suffix,1,3,ag,ag -s [A-Z]+_RESUME,0.37945985794067383,2160,
|
||||
linux_re_literal_suffix,1,3,ag,ag -s [A-Z]+_RESUME,0.36303210258483887,2160,
|
||||
linux_re_literal_suffix,1,3,ag,ag -s [A-Z]+_RESUME,0.36359691619873047,2160,
|
||||
linux_re_literal_suffix,1,3,git grep,git grep -E -I -n [A-Z]+_RESUME,0.9589834213256836,2160,LC_ALL=C
|
||||
linux_re_literal_suffix,1,3,git grep,git grep -E -I -n [A-Z]+_RESUME,0.9206984043121338,2160,LC_ALL=C
|
||||
linux_re_literal_suffix,1,3,git grep,git grep -E -I -n [A-Z]+_RESUME,0.8642933368682861,2160,LC_ALL=C
|
||||
linux_re_literal_suffix,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n [A-Z]+_RESUME ./,0.40503501892089844,2160,
|
||||
linux_re_literal_suffix,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n [A-Z]+_RESUME ./,0.4531714916229248,2160,
|
||||
linux_re_literal_suffix,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n [A-Z]+_RESUME ./,0.4397866725921631,2160,
|
||||
linux_word,1,3,rg,rg -n -w PM_RESUME,0.08639907836914062,9,
|
||||
linux_word,1,3,rg,rg -n -w PM_RESUME,0.08583569526672363,9,
|
||||
linux_word,1,3,rg,rg -n -w PM_RESUME,0.08414363861083984,9,
|
||||
linux_word,1,3,ag,ag -s -w PM_RESUME,0.2853865623474121,9,
|
||||
linux_word,1,3,ag,ag -s -w PM_RESUME,0.2871377468109131,9,
|
||||
linux_word,1,3,ag,ag -s -w PM_RESUME,0.28753662109375,9,
|
||||
linux_word,1,3,git grep,git grep -E -I -n -w PM_RESUME,0.20428204536437988,9,LC_ALL=C
|
||||
linux_word,1,3,git grep,git grep -E -I -n -w PM_RESUME,0.20490717887878418,9,LC_ALL=C
|
||||
linux_word,1,3,git grep,git grep -E -I -n -w PM_RESUME,0.20840072631835938,9,LC_ALL=C
|
||||
linux_word,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -w PM_RESUME ./,0.18790841102600098,9,
|
||||
linux_word,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -w PM_RESUME ./,0.18659543991088867,9,
|
||||
linux_word,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -w PM_RESUME ./,0.19104933738708496,9,
|
||||
linux_unicode_greek,1,3,rg,rg -n \p{Greek},0.19976496696472168,105,
|
||||
linux_unicode_greek,1,3,rg,rg -n \p{Greek},0.20618367195129395,105,
|
||||
linux_unicode_greek,1,3,rg,rg -n \p{Greek},0.19702935218811035,105,
|
||||
linux_unicode_greek,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \p{Greek} ./,0.17758727073669434,105,
|
||||
linux_unicode_greek,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \p{Greek} ./,0.17793798446655273,105,
|
||||
linux_unicode_greek,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \p{Greek} ./,0.1872577667236328,105,
|
||||
linux_unicode_greek_casei,1,3,rg,rg -n -i \p{Greek},0.19808244705200195,245,
|
||||
linux_unicode_greek_casei,1,3,rg,rg -n -i \p{Greek},0.1979837417602539,245,
|
||||
linux_unicode_greek_casei,1,3,rg,rg -n -i \p{Greek},0.1984400749206543,245,
|
||||
linux_unicode_greek_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i \p{Greek} ./,0.1819148063659668,105,
|
||||
linux_unicode_greek_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i \p{Greek} ./,0.17530512809753418,105,
|
||||
linux_unicode_greek_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i \p{Greek} ./,0.17999005317687988,105,
|
||||
linux_unicode_word,1,3,rg,rg -n \wAh,0.08527827262878418,247,
|
||||
linux_unicode_word,1,3,rg,rg -n \wAh,0.08541679382324219,247,
|
||||
linux_unicode_word,1,3,rg,rg -n \wAh,0.08553218841552734,247,
|
||||
linux_unicode_word,1,3,rg (ASCII),rg -n (?-u)\wAh,0.08484745025634766,233,
|
||||
linux_unicode_word,1,3,rg (ASCII),rg -n (?-u)\wAh,0.08466482162475586,233,
|
||||
linux_unicode_word,1,3,rg (ASCII),rg -n (?-u)\wAh,0.08487439155578613,233,
|
||||
linux_unicode_word,1,3,ag (ASCII),ag -s \wAh,0.3061795234680176,233,
|
||||
linux_unicode_word,1,3,ag (ASCII),ag -s \wAh,0.2993617057800293,233,
|
||||
linux_unicode_word,1,3,ag (ASCII),ag -s \wAh,0.29722046852111816,233,
|
||||
linux_unicode_word,1,3,git grep,git grep -E -I -n \wAh,4.257144451141357,247,LC_ALL=en_US.UTF-8
|
||||
linux_unicode_word,1,3,git grep,git grep -E -I -n \wAh,3.852163076400757,247,LC_ALL=en_US.UTF-8
|
||||
linux_unicode_word,1,3,git grep,git grep -E -I -n \wAh,3.8293941020965576,247,LC_ALL=en_US.UTF-8
|
||||
linux_unicode_word,1,3,git grep (ASCII),git grep -E -I -n \wAh,1.647632122039795,233,LC_ALL=C
|
||||
linux_unicode_word,1,3,git grep (ASCII),git grep -E -I -n \wAh,1.6269629001617432,233,LC_ALL=C
|
||||
linux_unicode_word,1,3,git grep (ASCII),git grep -E -I -n \wAh,1.5847914218902588,233,LC_ALL=C
|
||||
linux_unicode_word,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \wAh ./,0.1802208423614502,247,
|
||||
linux_unicode_word,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \wAh ./,0.17564702033996582,247,
|
||||
linux_unicode_word,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \wAh ./,0.1746981143951416,247,
|
||||
linux_unicode_word,1,3,ugrep (ASCII),ugrep -r --ignore-files --no-hidden -I -n -U \wAh ./,0.1799161434173584,233,
|
||||
linux_unicode_word,1,3,ugrep (ASCII),ugrep -r --ignore-files --no-hidden -I -n -U \wAh ./,0.18733000755310059,233,
|
||||
linux_unicode_word,1,3,ugrep (ASCII),ugrep -r --ignore-files --no-hidden -I -n -U \wAh ./,0.18859529495239258,233,
|
||||
linux_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.26203155517578125,721,
|
||||
linux_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.2615540027618408,721,
|
||||
linux_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.2730247974395752,721,
|
||||
linux_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.19902300834655762,720,
|
||||
linux_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.20034146308898926,720,
|
||||
linux_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.20192813873291016,720,
|
||||
linux_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.8269081115722656,1134,
|
||||
linux_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.8393104076385498,1134,
|
||||
linux_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},0.8293666839599609,1134,
|
||||
linux_no_literal,1,3,git grep,git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},7.334395408630371,721,LC_ALL=en_US.UTF-8
|
||||
linux_no_literal,1,3,git grep,git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},7.338796854019165,721,LC_ALL=en_US.UTF-8
|
||||
linux_no_literal,1,3,git grep,git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},7.36545991897583,721,LC_ALL=en_US.UTF-8
|
||||
linux_no_literal,1,3,git grep (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},2.1588926315307617,720,LC_ALL=C
|
||||
linux_no_literal,1,3,git grep (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},2.132209062576294,720,LC_ALL=C
|
||||
linux_no_literal,1,3,git grep (ASCII),git grep -E -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5},2.1407439708709717,720,LC_ALL=C
|
||||
linux_no_literal,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} ./,3.410162925720215,723,
|
||||
linux_no_literal,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} ./,3.405057668685913,723,
|
||||
linux_no_literal,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} ./,3.3945884704589844,723,
|
||||
linux_no_literal,1,3,ugrep (ASCII),ugrep -r --ignore-files --no-hidden -I -n -U \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} ./,0.23865604400634766,722,
|
||||
linux_no_literal,1,3,ugrep (ASCII),ugrep -r --ignore-files --no-hidden -I -n -U \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} ./,0.23371148109436035,722,
|
||||
linux_no_literal,1,3,ugrep (ASCII),ugrep -r --ignore-files --no-hidden -I -n -U \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} ./,0.2343149185180664,722,
|
||||
linux_alternates,1,3,rg,rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.08691263198852539,140,
|
||||
linux_alternates,1,3,rg,rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.08707070350646973,140,
|
||||
linux_alternates,1,3,rg,rg -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.08713960647583008,140,
|
||||
linux_alternates,1,3,ag,ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.32947278022766113,140,
|
||||
linux_alternates,1,3,ag,ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.33203840255737305,140,
|
||||
linux_alternates,1,3,ag,ag -s ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.3292670249938965,140,
|
||||
linux_alternates,1,3,git grep,git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.4576725959777832,140,LC_ALL=C
|
||||
linux_alternates,1,3,git grep,git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.41936421394348145,140,LC_ALL=C
|
||||
linux_alternates,1,3,git grep,git grep -E -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.3639688491821289,140,LC_ALL=C
|
||||
linux_alternates,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT ./,0.17806458473205566,140,
|
||||
linux_alternates,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT ./,0.18224716186523438,140,
|
||||
linux_alternates,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT ./,0.17795038223266602,140,
|
||||
linux_alternates_casei,1,3,rg,rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.12421393394470215,241,
|
||||
linux_alternates_casei,1,3,rg,rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.12235784530639648,241,
|
||||
linux_alternates_casei,1,3,rg,rg -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.12151455879211426,241,
|
||||
linux_alternates_casei,1,3,ag,ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.529585599899292,241,
|
||||
linux_alternates_casei,1,3,ag,ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.5305526256561279,241,
|
||||
linux_alternates_casei,1,3,ag,ag -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.5311264991760254,241,
|
||||
linux_alternates_casei,1,3,git grep,git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.7589735984802246,241,LC_ALL=C
|
||||
linux_alternates_casei,1,3,git grep,git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.7852108478546143,241,LC_ALL=C
|
||||
linux_alternates_casei,1,3,git grep,git grep -E -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT,0.8308050632476807,241,LC_ALL=C
|
||||
linux_alternates_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT ./,0.17955923080444336,241,
|
||||
linux_alternates_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT ./,0.1745290756225586,241,
|
||||
linux_alternates_casei,1,3,ugrep,ugrep -r --ignore-files --no-hidden -I -n -i ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT ./,0.1773686408996582,241,
|
||||
subtitles_en_literal,1,3,rg,rg Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.1213979721069336,830,
|
||||
subtitles_en_literal,1,3,rg,rg Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.1213991641998291,830,
|
||||
subtitles_en_literal,1,3,rg,rg Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.12620782852172852,830,
|
||||
subtitles_en_literal,1,3,rg (no mmap),rg --no-mmap Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18207263946533203,830,
|
||||
subtitles_en_literal,1,3,rg (no mmap),rg --no-mmap Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.17281484603881836,830,
|
||||
subtitles_en_literal,1,3,rg (no mmap),rg --no-mmap Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.17368507385253906,830,
|
||||
subtitles_en_literal,1,3,grep,grep Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.560560941696167,830,LC_ALL=C
|
||||
subtitles_en_literal,1,3,grep,grep Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.563499927520752,830,LC_ALL=C
|
||||
subtitles_en_literal,1,3,grep,grep Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.5916609764099121,830,LC_ALL=C
|
||||
subtitles_en_literal,1,3,rg (lines),rg -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.19600844383239746,830,
|
||||
subtitles_en_literal,1,3,rg (lines),rg -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18436980247497559,830,
|
||||
subtitles_en_literal,1,3,rg (lines),rg -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18594050407409668,830,
|
||||
subtitles_en_literal,1,3,ag (lines),ag -s Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.871025562286377,830,
|
||||
subtitles_en_literal,1,3,ag (lines),ag -s Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.8636960983276367,830,
|
||||
subtitles_en_literal,1,3,ag (lines),ag -s Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.8680994510650635,830,
|
||||
subtitles_en_literal,1,3,grep (lines),grep -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.9978001117706299,830,LC_ALL=C
|
||||
subtitles_en_literal,1,3,grep (lines),grep -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.9385361671447754,830,LC_ALL=C
|
||||
subtitles_en_literal,1,3,grep (lines),grep -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.0036489963531494,830,LC_ALL=C
|
||||
subtitles_en_literal,1,3,ugrep (lines),ugrep -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18918490409851074,830,
|
||||
subtitles_en_literal,1,3,ugrep (lines),ugrep -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.1769108772277832,830,
|
||||
subtitles_en_literal,1,3,ugrep (lines),ugrep -n Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18808293342590332,830,
|
||||
subtitles_en_literal_casei,1,3,rg,rg -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.21876287460327148,871,
|
||||
subtitles_en_literal_casei,1,3,rg,rg -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.2044692039489746,871,
|
||||
subtitles_en_literal_casei,1,3,rg,rg -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.2184743881225586,871,
|
||||
subtitles_en_literal_casei,1,3,grep,grep -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,2.224027156829834,871,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_casei,1,3,grep,grep -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,2.223188877105713,871,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_casei,1,3,grep,grep -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,2.223966598510742,871,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_casei,1,3,grep (ASCII),grep -E -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.671149492263794,871,LC_ALL=C
|
||||
subtitles_en_literal_casei,1,3,grep (ASCII),grep -E -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.6705749034881592,871,LC_ALL=C
|
||||
subtitles_en_literal_casei,1,3,grep (ASCII),grep -E -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.6700258255004883,871,LC_ALL=C
|
||||
subtitles_en_literal_casei,1,3,rg (lines),rg -n -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.2624058723449707,871,
|
||||
subtitles_en_literal_casei,1,3,rg (lines),rg -n -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.25513339042663574,871,
|
||||
subtitles_en_literal_casei,1,3,rg (lines),rg -n -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.26088857650756836,871,
|
||||
subtitles_en_literal_casei,1,3,ag (lines) (ASCII),ag -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.9144322872161865,871,
|
||||
subtitles_en_literal_casei,1,3,ag (lines) (ASCII),ag -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.866628885269165,871,
|
||||
subtitles_en_literal_casei,1,3,ag (lines) (ASCII),ag -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.9098389148712158,871,
|
||||
subtitles_en_literal_casei,1,3,ugrep (lines),ugrep -n -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.7860472202301025,871,
|
||||
subtitles_en_literal_casei,1,3,ugrep (lines),ugrep -n -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.7858343124389648,871,
|
||||
subtitles_en_literal_casei,1,3,ugrep (lines),ugrep -n -i Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.782252311706543,871,
|
||||
subtitles_en_literal_word,1,3,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /dev/shm/benchsuite/subtitles/en.sample.txt,0.18424677848815918,830,
|
||||
subtitles_en_literal_word,1,3,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /dev/shm/benchsuite/subtitles/en.sample.txt,0.19610810279846191,830,
|
||||
subtitles_en_literal_word,1,3,rg (ASCII),rg -n (?-u:\b)Sherlock Holmes(?-u:\b) /dev/shm/benchsuite/subtitles/en.sample.txt,0.18711471557617188,830,
|
||||
subtitles_en_literal_word,1,3,ag (ASCII),ag -sw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.8301315307617188,830,
|
||||
subtitles_en_literal_word,1,3,ag (ASCII),ag -sw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.8689801692962646,830,
|
||||
subtitles_en_literal_word,1,3,ag (ASCII),ag -sw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.8279321193695068,830,
|
||||
subtitles_en_literal_word,1,3,grep (ASCII),grep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.0036842823028564,830,LC_ALL=C
|
||||
subtitles_en_literal_word,1,3,grep (ASCII),grep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.002833604812622,830,LC_ALL=C
|
||||
subtitles_en_literal_word,1,3,grep (ASCII),grep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.9236147403717041,830,LC_ALL=C
|
||||
subtitles_en_literal_word,1,3,ugrep (ASCII),ugrep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.17717313766479492,830,
|
||||
subtitles_en_literal_word,1,3,ugrep (ASCII),ugrep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18994617462158203,830,
|
||||
subtitles_en_literal_word,1,3,ugrep (ASCII),ugrep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.17972850799560547,830,
|
||||
subtitles_en_literal_word,1,3,rg,rg -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18804550170898438,830,
|
||||
subtitles_en_literal_word,1,3,rg,rg -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.18867778778076172,830,
|
||||
subtitles_en_literal_word,1,3,rg,rg -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.19913530349731445,830,
|
||||
subtitles_en_literal_word,1,3,grep,grep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.0044364929199219,830,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_word,1,3,grep,grep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,1.0040032863616943,830,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_literal_word,1,3,grep,grep -nw Sherlock Holmes /dev/shm/benchsuite/subtitles/en.sample.txt,0.9627983570098877,830,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_alternate,1,3,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.24848055839538574,1094,
|
||||
subtitles_en_alternate,1,3,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.24738383293151855,1094,
|
||||
subtitles_en_alternate,1,3,rg (lines),rg -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.24789118766784668,1094,
|
||||
subtitles_en_alternate,1,3,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,2.668708562850952,1094,
|
||||
subtitles_en_alternate,1,3,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,2.57511305809021,1094,
|
||||
subtitles_en_alternate,1,3,ag (lines),ag -s Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,2.6714110374450684,1094,
|
||||
subtitles_en_alternate,1,3,grep (lines),grep -E -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,2.0586187839508057,1094,LC_ALL=C
|
||||
subtitles_en_alternate,1,3,grep (lines),grep -E -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,2.0227150917053223,1094,LC_ALL=C
|
||||
subtitles_en_alternate,1,3,grep (lines),grep -E -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,2.075378179550171,1094,LC_ALL=C
|
||||
subtitles_en_alternate,1,3,ugrep (lines),ugrep -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.7863781452178955,1094,
|
||||
subtitles_en_alternate,1,3,ugrep (lines),ugrep -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.7874250411987305,1094,
|
||||
subtitles_en_alternate,1,3,ugrep (lines),ugrep -n Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.7867889404296875,1094,
|
||||
subtitles_en_alternate,1,3,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.18195557594299316,1094,
|
||||
subtitles_en_alternate,1,3,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.18239641189575195,1094,
|
||||
subtitles_en_alternate,1,3,rg,rg Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.1625690460205078,1094,
|
||||
subtitles_en_alternate,1,3,grep,grep -E Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,1.6601614952087402,1094,LC_ALL=C
|
||||
subtitles_en_alternate,1,3,grep,grep -E Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,1.6617567539215088,1094,LC_ALL=C
|
||||
subtitles_en_alternate,1,3,grep,grep -E Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,1.6584677696228027,1094,LC_ALL=C
|
||||
subtitles_en_alternate_casei,1,3,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,4.0028722286224365,1136,
|
||||
subtitles_en_alternate_casei,1,3,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,3.991217851638794,1136,
|
||||
subtitles_en_alternate_casei,1,3,ag (ASCII),ag -s -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,4.00272274017334,1136,
|
||||
subtitles_en_alternate_casei,1,3,grep (ASCII),grep -E -ni Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,3.549154758453369,1136,LC_ALL=C
|
||||
subtitles_en_alternate_casei,1,3,grep (ASCII),grep -E -ni Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,3.5468921661376953,1136,LC_ALL=C
|
||||
subtitles_en_alternate_casei,1,3,grep (ASCII),grep -E -ni Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,3.5873491764068604,1136,LC_ALL=C
|
||||
subtitles_en_alternate_casei,1,3,ugrep (ASCII),ugrep -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.7872169017791748,1136,
|
||||
subtitles_en_alternate_casei,1,3,ugrep (ASCII),ugrep -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.784674882888794,1136,
|
||||
subtitles_en_alternate_casei,1,3,ugrep (ASCII),ugrep -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.7882401943206787,1136,
|
||||
subtitles_en_alternate_casei,1,3,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.4785435199737549,1136,
|
||||
subtitles_en_alternate_casei,1,3,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.4940922260284424,1136,
|
||||
subtitles_en_alternate_casei,1,3,rg,rg -n -i Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,0.4774627685546875,1136,
|
||||
subtitles_en_alternate_casei,1,3,grep,grep -E -ni Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,3.5677175521850586,1136,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_alternate_casei,1,3,grep,grep -E -ni Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,3.603273391723633,1136,LC_ALL=en_US.UTF-8
|
||||
subtitles_en_alternate_casei,1,3,grep,grep -E -ni Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty /dev/shm/benchsuite/subtitles/en.sample.txt,3.5834741592407227,1136,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,rg,rg -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.20238041877746582,278,
|
||||
subtitles_ru_surrounding_words,1,3,rg,rg -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.2031264305114746,278,
|
||||
subtitles_ru_surrounding_words,1,3,rg,rg -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.20475172996520996,278,
|
||||
subtitles_ru_surrounding_words,1,3,grep,grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0288453102111816,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,grep,grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.044802188873291,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,grep,grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0432109832763672,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,ugrep,ugrep -an \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,43.00765633583069,278,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep,ugrep -an \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,42.832849740982056,278,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep,ugrep -an \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,42.915205240249634,278,
|
||||
subtitles_ru_surrounding_words,1,3,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.083683967590332,,
|
||||
subtitles_ru_surrounding_words,1,3,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0841526985168457,,
|
||||
subtitles_ru_surrounding_words,1,3,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0850934982299805,,
|
||||
subtitles_ru_surrounding_words,1,3,grep (ASCII),grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0116353034973145,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,3,grep (ASCII),grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.9868073463439941,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,3,grep (ASCII),grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0224814414978027,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,3,ugrep (ASCII),ugrep -a -n -U \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.8892502784729004,,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep (ASCII),ugrep -a -n -U \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.8910088539123535,,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep (ASCII),ugrep -a -n -U \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.8897674083709717,,
|
||||
subtitles_en_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,2.11850643157959,22,
|
||||
subtitles_en_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,2.1359670162200928,22,
|
||||
subtitles_en_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,2.103114128112793,22,
|
||||
subtitles_en_no_literal,1,3,ugrep,ugrep -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,13.050881385803223,22,
|
||||
subtitles_en_no_literal,1,3,ugrep,ugrep -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,13.050772190093994,22,
|
||||
subtitles_en_no_literal,1,3,ugrep,ugrep -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,13.05719804763794,22,
|
||||
subtitles_en_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,1.9961926937103271,22,
|
||||
subtitles_en_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,2.019721508026123,22,
|
||||
subtitles_en_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,1.9965126514434814,22,
|
||||
subtitles_en_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,6.849602222442627,302,
|
||||
subtitles_en_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,6.813834190368652,302,
|
||||
subtitles_en_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,6.8263633251190186,302,
|
||||
subtitles_en_no_literal,1,3,grep (ASCII),grep -E -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,4.42924165725708,22,LC_ALL=C
|
||||
subtitles_en_no_literal,1,3,grep (ASCII),grep -E -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,4.378557205200195,22,LC_ALL=C
|
||||
subtitles_en_no_literal,1,3,grep (ASCII),grep -E -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,4.376646518707275,22,LC_ALL=C
|
||||
subtitles_en_no_literal,1,3,ugrep (ASCII),ugrep -n -U \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,3.5110037326812744,22,
|
||||
subtitles_en_no_literal,1,3,ugrep (ASCII),ugrep -n -U \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,3.5137360095977783,22,
|
||||
subtitles_en_no_literal,1,3,ugrep (ASCII),ugrep -n -U \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/en.sample.txt,3.5051844120025635,22,
|
||||
subtitles_ru_literal,1,3,rg,rg Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.13207745552062988,583,
|
||||
subtitles_ru_literal,1,3,rg,rg Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.13084721565246582,583,
|
||||
subtitles_ru_literal,1,3,rg,rg Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.13469862937927246,583,
|
||||
subtitles_ru_literal,1,3,rg (no mmap),rg --no-mmap Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.18022370338439941,583,
|
||||
subtitles_ru_literal,1,3,rg (no mmap),rg --no-mmap Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.1801767349243164,583,
|
||||
subtitles_ru_literal,1,3,rg (no mmap),rg --no-mmap Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.17995166778564453,583,
|
||||
subtitles_ru_literal,1,3,grep,grep Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.5151040554046631,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,3,grep,grep Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.5154542922973633,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,3,grep,grep Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.49927639961242676,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,3,rg (lines),rg -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.19464492797851562,583,
|
||||
subtitles_ru_literal,1,3,rg (lines),rg -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.18920588493347168,583,
|
||||
subtitles_ru_literal,1,3,rg (lines),rg -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.19465351104736328,583,
|
||||
subtitles_ru_literal,1,3,ag (lines),ag -s Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,1.9595966339111328,583,
|
||||
subtitles_ru_literal,1,3,ag (lines),ag -s Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,2.0014493465423584,583,
|
||||
subtitles_ru_literal,1,3,ag (lines),ag -s Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,1.9567768573760986,583,
|
||||
subtitles_ru_literal,1,3,grep (lines),grep -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.8119180202484131,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,3,grep (lines),grep -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.8111097812652588,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,3,grep (lines),grep -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.8006868362426758,583,LC_ALL=C
|
||||
subtitles_ru_literal,1,3,ugrep (lines),ugrep -a -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.70003342628479,583,
|
||||
subtitles_ru_literal,1,3,ugrep (lines),ugrep -a -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.650275468826294,583,
|
||||
subtitles_ru_literal,1,3,ugrep (lines),ugrep -a -n Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.689772367477417,583,
|
||||
subtitles_ru_literal_casei,1,3,rg,rg -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.267578125,604,
|
||||
subtitles_ru_literal_casei,1,3,rg,rg -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.2665982246398926,604,
|
||||
subtitles_ru_literal_casei,1,3,rg,rg -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.26861572265625,604,
|
||||
subtitles_ru_literal_casei,1,3,grep,grep -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,4.764627456665039,604,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_casei,1,3,grep,grep -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,4.767015695571899,604,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_casei,1,3,grep,grep -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,4.7688889503479,604,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_casei,1,3,grep (ASCII),grep -E -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.5046737194061279,583,LC_ALL=C
|
||||
subtitles_ru_literal_casei,1,3,grep (ASCII),grep -E -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.5139875411987305,583,LC_ALL=C
|
||||
subtitles_ru_literal_casei,1,3,grep (ASCII),grep -E -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.4993159770965576,583,LC_ALL=C
|
||||
subtitles_ru_literal_casei,1,3,rg (lines),rg -n -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.33438658714294434,604,
|
||||
subtitles_ru_literal_casei,1,3,rg (lines),rg -n -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.3398289680480957,604,
|
||||
subtitles_ru_literal_casei,1,3,rg (lines),rg -n -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.3298227787017822,604,
|
||||
subtitles_ru_literal_casei,1,3,ag (lines) (ASCII),ag -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.4468214511871338,,
|
||||
subtitles_ru_literal_casei,1,3,ag (lines) (ASCII),ag -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.44559574127197266,,
|
||||
subtitles_ru_literal_casei,1,3,ag (lines) (ASCII),ag -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.47882938385009766,,
|
||||
subtitles_ru_literal_casei,1,3,ugrep (lines) (ASCII),ugrep -a -n -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.7039575576782227,583,
|
||||
subtitles_ru_literal_casei,1,3,ugrep (lines) (ASCII),ugrep -a -n -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.6490752696990967,583,
|
||||
subtitles_ru_literal_casei,1,3,ugrep (lines) (ASCII),ugrep -a -n -i Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.8081104755401611,583,
|
||||
subtitles_ru_literal_word,1,3,rg (ASCII),rg -n (?-u:^|\W)Шерлок Холмс(?-u:$|\W) /dev/shm/benchsuite/subtitles/ru.txt,0.20162224769592285,583,
|
||||
subtitles_ru_literal_word,1,3,rg (ASCII),rg -n (?-u:^|\W)Шерлок Холмс(?-u:$|\W) /dev/shm/benchsuite/subtitles/ru.txt,0.18215250968933105,583,
|
||||
subtitles_ru_literal_word,1,3,rg (ASCII),rg -n (?-u:^|\W)Шерлок Холмс(?-u:$|\W) /dev/shm/benchsuite/subtitles/ru.txt,0.20087671279907227,583,
|
||||
subtitles_ru_literal_word,1,3,ag (ASCII),ag -sw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.48624587059020996,,
|
||||
subtitles_ru_literal_word,1,3,ag (ASCII),ag -sw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.5212516784667969,,
|
||||
subtitles_ru_literal_word,1,3,ag (ASCII),ag -sw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.520557165145874,,
|
||||
subtitles_ru_literal_word,1,3,grep (ASCII),grep -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.8108196258544922,583,LC_ALL=C
|
||||
subtitles_ru_literal_word,1,3,grep (ASCII),grep -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.8121066093444824,583,LC_ALL=C
|
||||
subtitles_ru_literal_word,1,3,grep (ASCII),grep -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.7784581184387207,583,LC_ALL=C
|
||||
subtitles_ru_literal_word,1,3,ugrep (ASCII),ugrep -anw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.7469344139099121,583,
|
||||
subtitles_ru_literal_word,1,3,ugrep (ASCII),ugrep -anw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.6838233470916748,583,
|
||||
subtitles_ru_literal_word,1,3,ugrep (ASCII),ugrep -anw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.6921679973602295,583,
|
||||
subtitles_ru_literal_word,1,3,rg,rg -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.19918251037597656,579,
|
||||
subtitles_ru_literal_word,1,3,rg,rg -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.2046656608581543,579,
|
||||
subtitles_ru_literal_word,1,3,rg,rg -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.1984848976135254,579,
|
||||
subtitles_ru_literal_word,1,3,grep,grep -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.794173002243042,579,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_word,1,3,grep,grep -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.7715346813201904,579,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_literal_word,1,3,grep,grep -nw Шерлок Холмс /dev/shm/benchsuite/subtitles/ru.txt,0.8116705417633057,579,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_alternate,1,3,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,0.6730976104736328,691,
|
||||
subtitles_ru_alternate,1,3,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,0.7020411491394043,691,
|
||||
subtitles_ru_alternate,1,3,rg (lines),rg -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,0.6693949699401855,691,
|
||||
subtitles_ru_alternate,1,3,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,2.7100515365600586,691,
|
||||
subtitles_ru_alternate,1,3,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,2.7458419799804688,691,
|
||||
subtitles_ru_alternate,1,3,ag (lines),ag -s Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,2.7115116119384766,691,
|
||||
subtitles_ru_alternate,1,3,grep (lines),grep -E -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.703738451004028,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,3,grep (lines),grep -E -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.715883731842041,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,3,grep (lines),grep -E -n Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.712724924087524,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,3,ugrep (lines),ugrep -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,8.276995420455933,691,
|
||||
subtitles_ru_alternate,1,3,ugrep (lines),ugrep -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,8.304608345031738,691,
|
||||
subtitles_ru_alternate,1,3,ugrep (lines),ugrep -an Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,8.322760820388794,691,
|
||||
subtitles_ru_alternate,1,3,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,0.6119842529296875,691,
|
||||
subtitles_ru_alternate,1,3,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,0.6368775367736816,691,
|
||||
subtitles_ru_alternate,1,3,rg,rg Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,0.6258070468902588,691,
|
||||
subtitles_ru_alternate,1,3,grep,grep -E Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.4300291538238525,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,3,grep,grep -E Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.418199300765991,691,LC_ALL=C
|
||||
subtitles_ru_alternate,1,3,grep,grep -E Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.425868511199951,691,LC_ALL=C
|
||||
subtitles_ru_alternate_casei,1,3,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,2.7216460704803467,691,
|
||||
subtitles_ru_alternate_casei,1,3,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,2.7108607292175293,691,
|
||||
subtitles_ru_alternate_casei,1,3,ag (ASCII),ag -s -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,2.747138500213623,691,
|
||||
subtitles_ru_alternate_casei,1,3,grep (ASCII),grep -E -ni Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.711230039596558,691,LC_ALL=C
|
||||
subtitles_ru_alternate_casei,1,3,grep (ASCII),grep -E -ni Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.709407329559326,691,LC_ALL=C
|
||||
subtitles_ru_alternate_casei,1,3,grep (ASCII),grep -E -ni Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.714034557342529,691,LC_ALL=C
|
||||
subtitles_ru_alternate_casei,1,3,ugrep (ASCII),ugrep -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,8.305904626846313,691,
|
||||
subtitles_ru_alternate_casei,1,3,ugrep (ASCII),ugrep -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,8.307406187057495,691,
|
||||
subtitles_ru_alternate_casei,1,3,ugrep (ASCII),ugrep -ani Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,8.288233995437622,691,
|
||||
subtitles_ru_alternate_casei,1,3,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,3.673624277114868,735,
|
||||
subtitles_ru_alternate_casei,1,3,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,3.6759188175201416,735,
|
||||
subtitles_ru_alternate_casei,1,3,rg,rg -n -i Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,3.66877818107605,735,
|
||||
subtitles_ru_alternate_casei,1,3,grep,grep -E -ni Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.366282224655151,735,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_alternate_casei,1,3,grep,grep -E -ni Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.370524883270264,735,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_alternate_casei,1,3,grep,grep -E -ni Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти /dev/shm/benchsuite/subtitles/ru.txt,5.342163324356079,735,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,rg,rg -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.20331382751464844,278,
|
||||
subtitles_ru_surrounding_words,1,3,rg,rg -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.2034592628479004,278,
|
||||
subtitles_ru_surrounding_words,1,3,rg,rg -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.20407724380493164,278,
|
||||
subtitles_ru_surrounding_words,1,3,grep,grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0436389446258545,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,grep,grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0388383865356445,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,grep,grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0446207523345947,278,LC_ALL=en_US.UTF-8
|
||||
subtitles_ru_surrounding_words,1,3,ugrep,ugrep -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.29245424270629883,1,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep,ugrep -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.29168128967285156,1,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep,ugrep -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.29593825340270996,1,
|
||||
subtitles_ru_surrounding_words,1,3,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.085604190826416,,
|
||||
subtitles_ru_surrounding_words,1,3,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.083526372909546,,
|
||||
subtitles_ru_surrounding_words,1,3,ag (ASCII),ag -s \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.1223819255828857,,
|
||||
subtitles_ru_surrounding_words,1,3,grep (ASCII),grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.9905192852020264,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,3,grep (ASCII),grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0222513675689697,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,3,grep (ASCII),grep -E -n \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,1.0216262340545654,,LC_ALL=C
|
||||
subtitles_ru_surrounding_words,1,3,ugrep (ASCII),ugrep -a -n -U \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.8875806331634521,,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep (ASCII),ugrep -a -n -U \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.8861405849456787,,
|
||||
subtitles_ru_surrounding_words,1,3,ugrep (ASCII),ugrep -a -n -U \w+\s+Холмс\s+\w+ /dev/shm/benchsuite/subtitles/ru.txt,0.8898241519927979,,
|
||||
subtitles_ru_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,2.237398147583008,41,
|
||||
subtitles_ru_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,2.253706693649292,41,
|
||||
subtitles_ru_no_literal,1,3,rg,rg -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,2.2161178588867188,41,
|
||||
subtitles_ru_no_literal,1,3,ugrep,ugrep -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,28.85959553718567,41,
|
||||
subtitles_ru_no_literal,1,3,ugrep,ugrep -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,28.666419982910156,41,
|
||||
subtitles_ru_no_literal,1,3,ugrep,ugrep -an \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,28.90555214881897,41,
|
||||
subtitles_ru_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,2.051813840866089,,
|
||||
subtitles_ru_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,2.026675224304199,,
|
||||
subtitles_ru_no_literal,1,3,rg (ASCII),rg -n (?-u)\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,2.027498245239258,,
|
||||
subtitles_ru_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,1.0998010635375977,,
|
||||
subtitles_ru_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,1.0900018215179443,,
|
||||
subtitles_ru_no_literal,1,3,ag (ASCII),ag -s \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,1.0901548862457275,,
|
||||
subtitles_ru_no_literal,1,3,grep (ASCII),grep -E -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,1.0691263675689697,,LC_ALL=C
|
||||
subtitles_ru_no_literal,1,3,grep (ASCII),grep -E -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,1.0875153541564941,,LC_ALL=C
|
||||
subtitles_ru_no_literal,1,3,grep (ASCII),grep -E -n \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,1.0997354984283447,,LC_ALL=C
|
||||
subtitles_ru_no_literal,1,3,ugrep (ASCII),ugrep -anU \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,0.8329172134399414,,
|
||||
subtitles_ru_no_literal,1,3,ugrep (ASCII),ugrep -anU \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,0.8292679786682129,,
|
||||
subtitles_ru_no_literal,1,3,ugrep (ASCII),ugrep -anU \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5} /dev/shm/benchsuite/subtitles/ru.txt,0.8326950073242188,,
|
|
@@ -1,208 +0,0 @@
|
||||
linux_literal_default (pattern: PM_RESUME)
|
||||
------------------------------------------
|
||||
rg* 0.084 +/- 0.002 (lines: 39)*
|
||||
ag 0.295 +/- 0.001 (lines: 39)
|
||||
git grep 0.225 +/- 0.007 (lines: 39)
|
||||
ugrep 0.105 +/- 0.002 (lines: 39)
|
||||
grep 0.996 +/- 0.003 (lines: 39)
|
||||
|
||||
linux_literal (pattern: PM_RESUME)
|
||||
----------------------------------
|
||||
rg* 0.085 +/- 0.001 (lines: 39)*
|
||||
rg (mmap) 0.322 +/- 0.002 (lines: 39)
|
||||
ag (mmap) 0.290 +/- 0.002 (lines: 39)
|
||||
git grep 0.211 +/- 0.009 (lines: 39)
|
||||
ugrep 0.189 +/- 0.005 (lines: 39)
|
||||
|
||||
linux_literal_casei (pattern: PM_RESUME)
|
||||
----------------------------------------
|
||||
rg* 0.088 +/- 0.001 (lines: 536)*
|
||||
rg (mmap) 0.314 +/- 0.007 (lines: 536)
|
||||
ag (mmap) 0.299 +/- 0.001 (lines: 536)
|
||||
git grep 0.214 +/- 0.007 (lines: 536)
|
||||
ugrep 0.174 +/- 0.001 (lines: 536)
|
||||
|
||||
linux_re_literal_suffix (pattern: [A-Z]+_RESUME)
|
||||
------------------------------------------------
|
||||
rg* 0.085 +/- 0.000 (lines: 2160)*
|
||||
ag 0.369 +/- 0.009 (lines: 2160)
|
||||
git grep 0.915 +/- 0.048 (lines: 2160)
|
||||
ugrep 0.433 +/- 0.025 (lines: 2160)
|
||||
|
||||
linux_word (pattern: PM_RESUME)
|
||||
-------------------------------
|
||||
rg* 0.085 +/- 0.001 (lines: 9)*
|
||||
ag 0.287 +/- 0.001 (lines: 9)
|
||||
git grep 0.206 +/- 0.002 (lines: 9)
|
||||
ugrep 0.189 +/- 0.002 (lines: 9)
|
||||
|
||||
linux_unicode_greek (pattern: \p{Greek})
|
||||
----------------------------------------
|
||||
rg 0.201 +/- 0.005 (lines: 105)
|
||||
ugrep* 0.181 +/- 0.005 (lines: 105)*
|
||||
|
||||
linux_unicode_greek_casei (pattern: \p{Greek})
|
||||
----------------------------------------------
|
||||
rg 0.198 +/- 0.000 (lines: 245)
|
||||
ugrep* 0.179 +/- 0.003 (lines: 105)*
|
||||
|
||||
linux_unicode_word (pattern: \wAh)
|
||||
----------------------------------
|
||||
rg 0.085 +/- 0.000 (lines: 247)
|
||||
rg (ASCII)* 0.085 +/- 0.000 (lines: 233)*
|
||||
ag (ASCII) 0.301 +/- 0.005 (lines: 233)
|
||||
git grep 3.980 +/- 0.241 (lines: 247)
|
||||
git grep (ASCII) 1.620 +/- 0.032 (lines: 233)
|
||||
ugrep 0.177 +/- 0.003 (lines: 247)
|
||||
ugrep (ASCII) 0.185 +/- 0.005 (lines: 233)
|
||||
|
||||
linux_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5})
|
||||
-----------------------------------------------------------------
|
||||
rg 0.266 +/- 0.006 (lines: 721)
|
||||
rg (ASCII)* 0.200 +/- 0.001 (lines: 720)*
|
||||
ag (ASCII) 0.832 +/- 0.007 (lines: 1134)
|
||||
git grep 7.346 +/- 0.017 (lines: 721)
|
||||
git grep (ASCII) 2.144 +/- 0.014 (lines: 720)
|
||||
ugrep 3.403 +/- 0.008 (lines: 723)
|
||||
ugrep (ASCII) 0.236 +/- 0.003 (lines: 722)
|
||||
|
||||
linux_alternates (pattern: ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT)
|
||||
-------------------------------------------------------------------------
|
||||
rg* 0.087 +/- 0.000 (lines: 140)*
|
||||
ag 0.330 +/- 0.002 (lines: 140)
|
||||
git grep 0.414 +/- 0.047 (lines: 140)
|
||||
ugrep 0.179 +/- 0.002 (lines: 140)
|
||||
|
||||
linux_alternates_casei (pattern: ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT)
|
||||
-------------------------------------------------------------------------------
|
||||
rg* 0.123 +/- 0.001 (lines: 241)*
|
||||
ag 0.530 +/- 0.001 (lines: 241)
|
||||
git grep 0.792 +/- 0.036 (lines: 241)
|
||||
ugrep 0.177 +/- 0.003 (lines: 241)
|
||||
|
||||
subtitles_en_literal (pattern: Sherlock Holmes)
|
||||
-----------------------------------------------
|
||||
rg* 0.123 +/- 0.003 (lines: 830)*
|
||||
rg (no mmap) 0.176 +/- 0.005 (lines: 830)
|
||||
grep 0.572 +/- 0.017 (lines: 830)
|
||||
rg (lines) 0.189 +/- 0.006 (lines: 830)
|
||||
ag (lines) 1.868 +/- 0.004 (lines: 830)
|
||||
grep (lines) 0.980 +/- 0.036 (lines: 830)
|
||||
ugrep (lines) 0.185 +/- 0.007 (lines: 830)
|
||||
|
||||
subtitles_en_literal_casei (pattern: Sherlock Holmes)
|
||||
-----------------------------------------------------
|
||||
rg* 0.214 +/- 0.008 (lines: 871)*
|
||||
grep 2.224 +/- 0.000 (lines: 871)
|
||||
grep (ASCII) 0.671 +/- 0.001 (lines: 871)
|
||||
rg (lines) 0.259 +/- 0.004 (lines: 871)
|
||||
ag (lines) (ASCII) 1.897 +/- 0.026 (lines: 871)
|
||||
ugrep (lines) 0.785 +/- 0.002 (lines: 871)
|
||||
|
||||
subtitles_en_literal_word (pattern: Sherlock Holmes)
|
||||
----------------------------------------------------
|
||||
rg (ASCII) 0.189 +/- 0.006 (lines: 830)
|
||||
ag (ASCII) 1.842 +/- 0.023 (lines: 830)
|
||||
grep (ASCII) 0.977 +/- 0.046 (lines: 830)
|
||||
ugrep (ASCII)* 0.182 +/- 0.007 (lines: 830)*
|
||||
rg 0.192 +/- 0.006 (lines: 830)
|
||||
grep 0.990 +/- 0.024 (lines: 830)
|
||||
|
||||
subtitles_en_alternate (pattern: Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty)
|
||||
---------------------------------------------------------------------------------------------------------------
|
||||
rg (lines) 0.248 +/- 0.001 (lines: 1094)
|
||||
ag (lines) 2.638 +/- 0.055 (lines: 1094)
|
||||
grep (lines) 2.052 +/- 0.027 (lines: 1094)
|
||||
ugrep (lines) 0.787 +/- 0.001 (lines: 1094)
|
||||
rg* 0.176 +/- 0.011 (lines: 1094)*
|
||||
grep 1.660 +/- 0.002 (lines: 1094)
|
||||
|
||||
subtitles_en_alternate_casei (pattern: Sherlock Holmes|John Watson|Irene Adler|Inspector Lestrade|Professor Moriarty)
|
||||
---------------------------------------------------------------------------------------------------------------------
|
||||
ag (ASCII) 3.999 +/- 0.007 (lines: 1136)
|
||||
grep (ASCII) 3.561 +/- 0.023 (lines: 1136)
|
||||
ugrep (ASCII) 0.787 +/- 0.002 (lines: 1136)
|
||||
rg* 0.483 +/- 0.009 (lines: 1136)*
|
||||
grep 3.585 +/- 0.018 (lines: 1136)
|
||||
|
||||
subtitles_en_surrounding_words (pattern: \w+\s+Holmes\s+\w+)
|
||||
------------------------------------------------------------
|
||||
rg 0.200 +/- 0.001 (lines: 483)
|
||||
grep 1.303 +/- 0.040 (lines: 483)
|
||||
ugrep 43.220 +/- 0.047 (lines: 483)
|
||||
rg (ASCII)* 0.197 +/- 0.000 (lines: 483)*
|
||||
ag (ASCII) 5.223 +/- 0.056 (lines: 489)
|
||||
grep (ASCII) 1.316 +/- 0.043 (lines: 483)
|
||||
ugrep (ASCII) 17.647 +/- 0.219 (lines: 483)
|
||||
|
||||
subtitles_en_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5})
|
||||
----------------------------------------------------------------------------------------
|
||||
rg 2.119 +/- 0.016 (lines: 22)
|
||||
ugrep 13.053 +/- 0.004 (lines: 22)
|
||||
rg (ASCII)* 2.004 +/- 0.013 (lines: 22)*
|
||||
ag (ASCII) 6.830 +/- 0.018 (lines: 302)
|
||||
grep (ASCII) 4.395 +/- 0.030 (lines: 22)
|
||||
ugrep (ASCII) 3.510 +/- 0.004 (lines: 22)
|
||||
|
||||
subtitles_ru_literal (pattern: Шерлок Холмс)
|
||||
--------------------------------------------
|
||||
rg* 0.133 +/- 0.002 (lines: 583)*
|
||||
rg (no mmap) 0.180 +/- 0.000 (lines: 583)
|
||||
grep 0.510 +/- 0.009 (lines: 583)
|
||||
rg (lines) 0.193 +/- 0.003 (lines: 583)
|
||||
ag (lines) 1.973 +/- 0.025 (lines: 583)
|
||||
grep (lines) 0.808 +/- 0.006 (lines: 583)
|
||||
ugrep (lines) 0.680 +/- 0.026 (lines: 583)
|
||||
|
||||
subtitles_ru_literal_casei (pattern: Шерлок Холмс)
|
||||
--------------------------------------------------
|
||||
rg* 0.268 +/- 0.001 (lines: 604)*
|
||||
grep 4.767 +/- 0.002 (lines: 604)
|
||||
grep (ASCII) 0.506 +/- 0.007 (lines: 583)
|
||||
rg (lines) 0.335 +/- 0.005 (lines: 604)
|
||||
ag (lines) (ASCII) 0.457 +/- 0.019 (lines: 0)
|
||||
ugrep (lines) (ASCII) 0.720 +/- 0.081 (lines: 583)
|
||||
|
||||
subtitles_ru_literal_word (pattern: Шерлок Холмс)
|
||||
-------------------------------------------------
|
||||
rg (ASCII)* 0.195 +/- 0.011 (lines: 583)*
|
||||
ag (ASCII) 0.509 +/- 0.020 (lines: 0)
|
||||
grep (ASCII) 0.800 +/- 0.019 (lines: 583)
|
||||
ugrep (ASCII) 0.708 +/- 0.034 (lines: 583)
|
||||
rg 0.201 +/- 0.003 (lines: 579)
|
||||
grep 0.792 +/- 0.020 (lines: 579)
|
||||
|
||||
subtitles_ru_alternate (pattern: Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти)
|
||||
-----------------------------------------------------------------------------------------------------------
|
||||
rg (lines) 0.682 +/- 0.018 (lines: 691)
|
||||
ag (lines) 2.722 +/- 0.020 (lines: 691)
|
||||
grep (lines) 5.711 +/- 0.006 (lines: 691)
|
||||
ugrep (lines) 8.301 +/- 0.023 (lines: 691)
|
||||
rg* 0.625 +/- 0.012 (lines: 691)*
|
||||
grep 5.425 +/- 0.006 (lines: 691)
|
||||
|
||||
subtitles_ru_alternate_casei (pattern: Шерлок Холмс|Джон Уотсон|Ирен Адлер|инспектор Лестрейд|профессор Мориарти)
|
||||
-----------------------------------------------------------------------------------------------------------------
|
||||
ag (ASCII)* 2.727 +/- 0.019 (lines: 691)*
|
||||
grep (ASCII) 5.712 +/- 0.002 (lines: 691)
|
||||
ugrep (ASCII) 8.301 +/- 0.011 (lines: 691)
|
||||
rg 3.673 +/- 0.004 (lines: 735)
|
||||
grep 5.360 +/- 0.015 (lines: 735)
|
||||
|
||||
subtitles_ru_surrounding_words (pattern: \w+\s+Холмс\s+\w+)
|
||||
-----------------------------------------------------------
|
||||
rg* 0.203 +/- 0.001 (lines: 278)*
|
||||
grep 1.039 +/- 0.009 (lines: 278)
|
||||
ugrep 42.919 +/- 0.087 (lines: 278)
|
||||
ag (ASCII) 1.084 +/- 0.001 (lines: 0)
|
||||
grep (ASCII) 1.007 +/- 0.018 (lines: 0)
|
||||
ugrep (ASCII) 0.890 +/- 0.001 (lines: 0)
|
||||
|
||||
subtitles_ru_no_literal (pattern: \w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5})
|
||||
----------------------------------------------------------------------------------------
|
||||
rg 2.236 +/- 0.019 (lines: 41)
|
||||
ugrep 28.811 +/- 0.127 (lines: 41)
|
||||
rg (ASCII) 2.035 +/- 0.014 (lines: 0)
|
||||
ag (ASCII) 1.093 +/- 0.006 (lines: 0)
|
||||
grep (ASCII) 1.085 +/- 0.015 (lines: 0)
|
||||
ugrep (ASCII)* 0.832 +/- 0.002 (lines: 0)*
|
28
build.rs
28
build.rs
@@ -48,34 +48,6 @@ fn main() {
|
||||
if let Some(rev) = git_revision_hash() {
|
||||
println!("cargo:rustc-env=RIPGREP_BUILD_GIT_HASH={}", rev);
|
||||
}
|
||||
// Embed a Windows manifest and set some linker options. The main reason
|
||||
// for this is to enable long path support on Windows. This still, I
|
||||
// believe, requires enabling long path support in the registry. But if
|
||||
// that's enabled, then this will let ripgrep use C:\... style paths that
|
||||
// are longer than 260 characters.
|
||||
set_windows_exe_options();
|
||||
}
|
||||
|
||||
fn set_windows_exe_options() {
|
||||
static MANIFEST: &str = "pkg/windows/Manifest.xml";
|
||||
|
||||
let Ok(target_os) = env::var("CARGO_CFG_TARGET_OS") else { return };
|
||||
let Ok(target_env) = env::var("CARGO_CFG_TARGET_ENV") else { return };
|
||||
if !(target_os == "windows" && target_env == "msvc") {
|
||||
return;
|
||||
}
|
||||
|
||||
let Ok(mut manifest) = env::current_dir() else { return };
|
||||
manifest.push(MANIFEST);
|
||||
let Some(manifest) = manifest.to_str() else { return };
|
||||
|
||||
println!("cargo:rerun-if-changed={}", MANIFEST);
|
||||
// Embed the Windows application manifest file.
|
||||
println!("cargo:rustc-link-arg-bin=rg=/MANIFEST:EMBED");
|
||||
println!("cargo:rustc-link-arg-bin=rg=/MANIFESTINPUT:{manifest}");
|
||||
// Turn linker warnings into errors. Helps debugging, otherwise the
|
||||
// warnings get squashed (I believe).
|
||||
println!("cargo:rustc-link-arg-bin=rg=/WX");
|
||||
}
|
||||
|
||||
fn git_revision_hash() -> Option<String> {
|
||||
|
11
complete/_rg
11
complete/_rg
@@ -30,7 +30,7 @@ _rg() {
|
||||
[[ $_RG_COMPLETE_LIST_ARGS == (1|t*|y*) ]] ||
|
||||
# (--[imnp]* => --ignore*, --messages, --no-*, --pcre2-unicode)
|
||||
[[ $PREFIX$SUFFIX == --[imnp]* ]] ||
|
||||
zstyle -t ":completion:${curcontext}:" complete-all
|
||||
zstyle -t ":complete:$curcontext:*" complete-all
|
||||
then
|
||||
no=
|
||||
fi
|
||||
@@ -319,7 +319,6 @@ _rg() {
|
||||
'(-q --quiet)'{-q,--quiet}'[suppress normal output]'
|
||||
'--regex-size-limit=[specify upper size limit of compiled regex]:regex size (bytes)'
|
||||
'*'{-u,--unrestricted}'[reduce level of "smart" searching]'
|
||||
'--stop-on-nonmatch[stop on first non-matching line after a matching one]'
|
||||
|
||||
+ operand # Operands
|
||||
'(--files --type-list file regexp)1: :_guard "^-*" pattern'
|
||||
@@ -433,13 +432,9 @@ _rg_types() {
|
||||
local -a expl
|
||||
local -aU _types
|
||||
|
||||
_types=( ${(@)${(f)"$( _call_program types $words[1] --type-list )"}//:[[:space:]]##/:} )
|
||||
_types=( ${(@)${(f)"$( _call_program types rg --type-list )"}%%:*} )
|
||||
|
||||
if zstyle -t ":completion:${curcontext}:types" extra-verbose; then
|
||||
_describe -t types 'file type' _types
|
||||
else
|
||||
_wanted types expl 'file type' compadd "$@" - ${(@)_types%%:*}
|
||||
fi
|
||||
_wanted types expl 'file type' compadd -a "$@" - _types
|
||||
}
|
||||
|
||||
_rg "$@"
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-cli"
|
||||
version = "0.1.9" #:version
|
||||
version = "0.1.6" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Utilities for search oriented command line applications.
|
||||
@@ -14,8 +14,9 @@ license = "Unlicense OR MIT"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
bstr = "1.6.0"
|
||||
globset = { version = "0.4.10", path = "../globset" }
|
||||
atty = "0.2.11"
|
||||
bstr = "0.2.0"
|
||||
globset = { version = "0.4.9", path = "../globset" }
|
||||
lazy_static = "1.1.0"
|
||||
log = "0.4.5"
|
||||
regex = "1.1"
|
||||
|
@@ -18,7 +18,7 @@ pub struct DecompressionMatcherBuilder {
|
||||
}
|
||||
|
||||
/// A representation of a single command for decompressing data
|
||||
/// out-of-process.
|
||||
/// out-of-proccess.
|
||||
#[derive(Clone, Debug)]
|
||||
struct DecompressionCommand {
|
||||
/// The glob that matches this command.
|
||||
@@ -132,7 +132,7 @@ impl DecompressionMatcherBuilder {
|
||||
A: AsRef<OsStr>,
|
||||
{
|
||||
let glob = glob.to_string();
|
||||
let bin = try_resolve_binary(Path::new(program.as_ref()))?;
|
||||
let bin = resolve_binary(Path::new(program.as_ref()))?;
|
||||
let args =
|
||||
args.into_iter().map(|a| a.as_ref().to_os_string()).collect();
|
||||
self.commands.push(DecompressionCommand { glob, bin, args });
|
||||
@@ -421,34 +421,6 @@ impl io::Read for DecompressionReader {
|
||||
/// On non-Windows, this is a no-op.
|
||||
pub fn resolve_binary<P: AsRef<Path>>(
|
||||
prog: P,
|
||||
) -> Result<PathBuf, CommandError> {
|
||||
if !cfg!(windows) {
|
||||
return Ok(prog.as_ref().to_path_buf());
|
||||
}
|
||||
try_resolve_binary(prog)
|
||||
}
|
||||
|
||||
/// Resolves a path to a program to a path by searching for the program in
|
||||
/// `PATH`.
|
||||
///
|
||||
/// If the program could not be resolved, then an error is returned.
|
||||
///
|
||||
/// The purpose of doing this instead of passing the path to the program
|
||||
/// directly to Command::new is that Command::new will hand relative paths
|
||||
/// to CreateProcess on Windows, which will implicitly search the current
|
||||
/// working directory for the executable. This could be undesirable for
|
||||
/// security reasons. e.g., running ripgrep with the -z/--search-zip flag on an
|
||||
/// untrusted directory tree could result in arbitrary programs executing on
|
||||
/// Windows.
|
||||
///
|
||||
/// Note that this could still return a relative path if PATH contains a
|
||||
/// relative path. We permit this since it is assumed that the user has set
|
||||
/// this explicitly, and thus, desires this behavior.
|
||||
///
|
||||
/// If `check_exists` is false or the path is already an absolute path this
|
||||
/// will return immediately.
|
||||
fn try_resolve_binary<P: AsRef<Path>>(
|
||||
prog: P,
|
||||
) -> Result<PathBuf, CommandError> {
|
||||
use std::env;
|
||||
|
||||
@@ -461,7 +433,7 @@ fn try_resolve_binary<P: AsRef<Path>>(
|
||||
}
|
||||
|
||||
let prog = prog.as_ref();
|
||||
if prog.is_absolute() {
|
||||
if !cfg!(windows) || prog.is_absolute() {
|
||||
return Ok(prog.to_path_buf());
|
||||
}
|
||||
let syspaths = match env::var_os("PATH") {
|
||||
@@ -483,11 +455,9 @@ fn try_resolve_binary<P: AsRef<Path>>(
|
||||
return Ok(abs_prog.to_path_buf());
|
||||
}
|
||||
if abs_prog.extension().is_none() {
|
||||
for extension in ["com", "exe"] {
|
||||
let abs_prog = abs_prog.with_extension(extension);
|
||||
if is_exe(&abs_prog) {
|
||||
return Ok(abs_prog.to_path_buf());
|
||||
}
|
||||
let abs_prog = abs_prog.with_extension("exe");
|
||||
if is_exe(&abs_prog) {
|
||||
return Ok(abs_prog.to_path_buf());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -165,8 +165,6 @@ mod pattern;
|
||||
mod process;
|
||||
mod wtr;
|
||||
|
||||
use std::io::IsTerminal;
|
||||
|
||||
pub use crate::decompress::{
|
||||
resolve_binary, DecompressionMatcher, DecompressionMatcherBuilder,
|
||||
DecompressionReader, DecompressionReaderBuilder,
|
||||
@@ -217,7 +215,7 @@ pub fn is_readable_stdin() -> bool {
|
||||
/// Returns true if and only if stdin is believed to be connected to a tty
|
||||
/// or a console.
|
||||
pub fn is_tty_stdin() -> bool {
|
||||
std::io::stdin().is_terminal()
|
||||
atty::is(atty::Stream::Stdin)
|
||||
}
|
||||
|
||||
/// Returns true if and only if stdout is believed to be connected to a tty
|
||||
@@ -229,11 +227,11 @@ pub fn is_tty_stdin() -> bool {
|
||||
/// implementations of `ls` will often show one item per line when stdout is
|
||||
/// redirected, but will condensed output when printing to a tty.
|
||||
pub fn is_tty_stdout() -> bool {
|
||||
std::io::stdout().is_terminal()
|
||||
atty::is(atty::Stream::Stdout)
|
||||
}
|
||||
|
||||
/// Returns true if and only if stderr is believed to be connected to a tty
|
||||
/// or a console.
|
||||
pub fn is_tty_stderr() -> bool {
|
||||
std::io::stderr().is_terminal()
|
||||
atty::is(atty::Stream::Stderr)
|
||||
}
|
||||
|
@@ -632,7 +632,6 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
|
||||
flag_sort(&mut args);
|
||||
flag_sortr(&mut args);
|
||||
flag_stats(&mut args);
|
||||
flag_stop_on_nonmatch(&mut args);
|
||||
flag_text(&mut args);
|
||||
flag_threads(&mut args);
|
||||
flag_trim(&mut args);
|
||||
@@ -699,7 +698,7 @@ fn flag_after_context(args: &mut Vec<RGArg>) {
|
||||
"\
|
||||
Show NUM lines after each match.
|
||||
|
||||
This overrides the --passthru flag and partially overrides --context.
|
||||
This overrides the --context and --passthru flags.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::flag("after-context", "NUM")
|
||||
@@ -707,7 +706,8 @@ This overrides the --passthru flag and partially overrides --context.
|
||||
.help(SHORT)
|
||||
.long_help(LONG)
|
||||
.number()
|
||||
.overrides("passthru");
|
||||
.overrides("passthru")
|
||||
.overrides("context");
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
@@ -768,7 +768,7 @@ fn flag_before_context(args: &mut Vec<RGArg>) {
|
||||
"\
|
||||
Show NUM lines before each match.
|
||||
|
||||
This overrides the --passthru flag and partially overrides --context.
|
||||
This overrides the --context and --passthru flags.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::flag("before-context", "NUM")
|
||||
@@ -776,7 +776,8 @@ This overrides the --passthru flag and partially overrides --context.
|
||||
.help(SHORT)
|
||||
.long_help(LONG)
|
||||
.number()
|
||||
.overrides("passthru");
|
||||
.overrides("passthru")
|
||||
.overrides("context");
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
@@ -874,8 +875,8 @@ Print the 0-based byte offset within the input file before each line of output.
|
||||
If -o (--only-matching) is specified, print the offset of the matching part
|
||||
itself.
|
||||
|
||||
If ripgrep does transcoding, then the byte offset is in terms of the result of
|
||||
transcoding and not the original data. This applies similarly to another
|
||||
If ripgrep does transcoding, then the byte offset is in terms of the the result
|
||||
of transcoding and not the original data. This applies similarly to another
|
||||
transformation on the source, such as decompression or a --pre filter. Note
|
||||
that when the PCRE2 regex engine is used, then UTF-8 transcoding is done by
|
||||
default.
|
||||
@@ -969,7 +970,7 @@ or, equivalently,
|
||||
|
||||
rg --colors 'match:bg:0x0,0x80,0xFF'
|
||||
|
||||
Note that the intense and nointense style flags will have no effect when
|
||||
Note that the the intense and nointense style flags will have no effect when
|
||||
used alongside these extended color codes.
|
||||
"
|
||||
);
|
||||
@@ -1008,7 +1009,8 @@ fn flag_context(args: &mut Vec<RGArg>) {
|
||||
Show NUM lines before and after each match. This is equivalent to providing
|
||||
both the -B/--before-context and -A/--after-context flags with the same value.
|
||||
|
||||
This overrides the --passthru flag.
|
||||
This overrides both the -B/--before-context and -A/--after-context flags,
|
||||
in addition to the --passthru flag.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::flag("context", "NUM")
|
||||
@@ -1016,7 +1018,9 @@ This overrides the --passthru flag.
|
||||
.help(SHORT)
|
||||
.long_help(LONG)
|
||||
.number()
|
||||
.overrides("passthru");
|
||||
.overrides("passthru")
|
||||
.overrides("before-context")
|
||||
.overrides("after-context");
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
@@ -1707,8 +1711,6 @@ fn flag_line_number(args: &mut Vec<RGArg>) {
|
||||
"\
|
||||
Show line numbers (1-based). This is enabled by default when searching in a
|
||||
terminal.
|
||||
|
||||
This flag overrides --no-line-number.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::switch("line-number")
|
||||
@@ -1723,8 +1725,6 @@ This flag overrides --no-line-number.
|
||||
"\
|
||||
Suppress line numbers. This is enabled by default when not searching in a
|
||||
terminal.
|
||||
|
||||
This flag overrides --line-number.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::switch("no-line-number")
|
||||
@@ -1927,16 +1927,13 @@ Nevertheless, if you only care about matches spanning at most one line, then it
|
||||
is always better to disable multiline mode.
|
||||
|
||||
This flag can be disabled with --no-multiline.
|
||||
|
||||
This overrides the --stop-on-nonmatch flag.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::switch("multiline")
|
||||
.short("U")
|
||||
.help(SHORT)
|
||||
.long_help(LONG)
|
||||
.overrides("no-multiline")
|
||||
.overrides("stop-on-nonmatch");
|
||||
.overrides("no-multiline");
|
||||
args.push(arg);
|
||||
|
||||
let arg = RGArg::switch("no-multiline").hidden().overrides("multiline");
|
||||
@@ -2586,8 +2583,8 @@ Do not print anything to stdout. If a match is found in a file, then ripgrep
|
||||
will stop searching. This is useful when ripgrep is used only for its exit
|
||||
code (which will be an error if no matches are found).
|
||||
|
||||
When --files is used, ripgrep will stop finding files after finding the
|
||||
first file that does not match any ignore rules.
|
||||
When --files is used, then ripgrep will stop finding files after finding the
|
||||
first file that matches all ignore rules.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::switch("quiet").short("q").help(SHORT).long_help(LONG);
|
||||
@@ -2650,17 +2647,6 @@ replacement string. Capture group indices are numbered based on the position of
|
||||
the opening parenthesis of the group, where the leftmost such group is $1. The
|
||||
special $0 group corresponds to the entire match.
|
||||
|
||||
The name of a group is formed by taking the longest string of letters, numbers
|
||||
and underscores (i.e. [_0-9A-Za-z]) after the $. For example, $1a will be
|
||||
replaced with the group named '1a', not the group at index 1. If the group's
|
||||
name contains characters that aren't letters, numbers or underscores, or you
|
||||
want to immediately follow the group with another string, the name should be
|
||||
put inside braces. For example, ${1}a will take the content of the group at
|
||||
index 1 and append 'a' to the end of it.
|
||||
|
||||
If an index or name does not refer to a valid capture group, it will be
|
||||
replaced with an empty string.
|
||||
|
||||
In shells such as Bash and zsh, you should wrap the pattern in single quotes
|
||||
instead of double quotes. Otherwise, capture group indices will be replaced by
|
||||
expanded shell variables which will most likely be empty.
|
||||
@@ -2858,25 +2844,6 @@ This flag can be disabled with --no-stats.
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_stop_on_nonmatch(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Stop searching after a non-match.";
|
||||
const LONG: &str = long!(
|
||||
"\
|
||||
Enabling this option will cause ripgrep to stop reading a file once it
|
||||
encounters a non-matching line after it has encountered a matching line.
|
||||
This is useful if it is expected that all matches in a given file will be on
|
||||
sequential lines, for example due to the lines being sorted.
|
||||
|
||||
This overrides the -U/--multiline flag.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::switch("stop-on-nonmatch")
|
||||
.help(SHORT)
|
||||
.long_help(LONG)
|
||||
.overrides("multiline");
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_text(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Search binary files as if they were text.";
|
||||
const LONG: &str = long!(
|
||||
|
@@ -31,6 +31,8 @@ use ignore::overrides::{Override, OverrideBuilder};
|
||||
use ignore::types::{FileTypeDef, Types, TypesBuilder};
|
||||
use ignore::{Walk, WalkBuilder, WalkParallel};
|
||||
use log;
|
||||
use num_cpus;
|
||||
use regex;
|
||||
use termcolor::{BufferWriter, ColorChoice, WriteColor};
|
||||
|
||||
use crate::app;
|
||||
@@ -41,7 +43,7 @@ use crate::path_printer::{PathPrinter, PathPrinterBuilder};
|
||||
use crate::search::{
|
||||
PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder,
|
||||
};
|
||||
use crate::subject::{Subject, SubjectBuilder};
|
||||
use crate::subject::SubjectBuilder;
|
||||
use crate::Result;
|
||||
|
||||
/// The command that ripgrep should execute based on the command line
|
||||
@@ -95,12 +97,9 @@ pub struct Args(Arc<ArgsImp>);
|
||||
struct ArgsImp {
|
||||
/// Mid-to-low level routines for extracting CLI arguments.
|
||||
matches: ArgMatches,
|
||||
/// The command we want to execute.
|
||||
command: Command,
|
||||
/// The number of threads to use. This is based in part on available
|
||||
/// threads, in part on the number of threads requested and in part on the
|
||||
/// command we're running.
|
||||
threads: usize,
|
||||
/// The patterns provided at the command line and/or via the -f/--file
|
||||
/// flag. This may be empty.
|
||||
patterns: Vec<String>,
|
||||
/// A matcher built from the patterns.
|
||||
///
|
||||
/// It's important that this is only built once, since building this goes
|
||||
@@ -166,6 +165,12 @@ impl Args {
|
||||
&self.0.matches
|
||||
}
|
||||
|
||||
/// Return the patterns found in the command line arguments. This includes
|
||||
/// patterns read via the -f/--file flags.
|
||||
fn patterns(&self) -> &[String] {
|
||||
&self.0.patterns
|
||||
}
|
||||
|
||||
/// Return the matcher builder from the patterns.
|
||||
fn matcher(&self) -> &PatternMatcher {
|
||||
&self.0.matcher
|
||||
@@ -192,7 +197,7 @@ impl Args {
|
||||
fn printer<W: WriteColor>(&self, wtr: W) -> Result<Printer<W>> {
|
||||
match self.matches().output_kind() {
|
||||
OutputKind::Standard => {
|
||||
let separator_search = self.command() == Command::Search;
|
||||
let separator_search = self.command()? == Command::Search;
|
||||
self.matches()
|
||||
.printer_standard(self.paths(), wtr, separator_search)
|
||||
.map(Printer::Standard)
|
||||
@@ -220,8 +225,28 @@ impl Args {
|
||||
}
|
||||
|
||||
/// Return the high-level command that ripgrep should run.
|
||||
pub fn command(&self) -> Command {
|
||||
self.0.command
|
||||
pub fn command(&self) -> Result<Command> {
|
||||
let is_one_search = self.matches().is_one_search(self.paths());
|
||||
let threads = self.matches().threads()?;
|
||||
let one_thread = is_one_search || threads == 1;
|
||||
|
||||
Ok(if self.matches().is_present("pcre2-version") {
|
||||
Command::PCRE2Version
|
||||
} else if self.matches().is_present("type-list") {
|
||||
Command::Types
|
||||
} else if self.matches().is_present("files") {
|
||||
if one_thread {
|
||||
Command::Files
|
||||
} else {
|
||||
Command::FilesParallel
|
||||
}
|
||||
} else if self.matches().can_never_match(self.patterns()) {
|
||||
Command::SearchNever
|
||||
} else if one_thread {
|
||||
Command::Search
|
||||
} else {
|
||||
Command::SearchParallel
|
||||
})
|
||||
}
|
||||
|
||||
/// Builder a path printer that can be used for printing just file paths,
|
||||
@@ -279,7 +304,7 @@ impl Args {
|
||||
/// When this returns a `Stats` value, then it is guaranteed that the
|
||||
/// search worker will be configured to track statistics as well.
|
||||
pub fn stats(&self) -> Result<Option<Stats>> {
|
||||
Ok(if self.command().is_search() && self.matches().stats() {
|
||||
Ok(if self.command()?.is_search() && self.matches().stats() {
|
||||
Some(Stats::new())
|
||||
} else {
|
||||
None
|
||||
@@ -318,58 +343,12 @@ impl Args {
|
||||
|
||||
/// Return a walker that never uses additional threads.
|
||||
pub fn walker(&self) -> Result<Walk> {
|
||||
Ok(self
|
||||
.matches()
|
||||
.walker_builder(self.paths(), self.0.threads)?
|
||||
.build())
|
||||
}
|
||||
|
||||
/// Returns true if and only if `stat`-related sorting is required
|
||||
pub fn needs_stat_sort(&self) -> bool {
|
||||
return self.matches().sort_by().map_or(
|
||||
false,
|
||||
|sort_by| match sort_by.kind {
|
||||
SortByKind::LastModified
|
||||
| SortByKind::Created
|
||||
| SortByKind::LastAccessed => sort_by.check().is_ok(),
|
||||
_ => false,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
/// Sort subjects if a sorter is specified, but only if the sort requires
|
||||
/// stat calls. Non-stat related sorts are handled during file traversal
|
||||
///
|
||||
/// This function assumes that it is known that a stat-related sort is
|
||||
/// required, and does not check for it again.
|
||||
///
|
||||
/// It is important that that precondition is fulfilled, since this function
|
||||
/// consumes the subjects iterator, and is therefore a blocking function.
|
||||
pub fn sort_by_stat<I>(&self, subjects: I) -> Vec<Subject>
|
||||
where
|
||||
I: Iterator<Item = Subject>,
|
||||
{
|
||||
let sorter = match self.matches().sort_by() {
|
||||
Ok(v) => v,
|
||||
Err(_) => return subjects.collect(),
|
||||
};
|
||||
use SortByKind::*;
|
||||
let mut keyed = match sorter.kind {
|
||||
LastModified => load_timestamps(subjects, |m| m.modified()),
|
||||
LastAccessed => load_timestamps(subjects, |m| m.accessed()),
|
||||
Created => load_timestamps(subjects, |m| m.created()),
|
||||
_ => return subjects.collect(),
|
||||
};
|
||||
keyed.sort_by(|a, b| sort_by_option(&a.0, &b.0, sorter.reverse));
|
||||
keyed.into_iter().map(|v| v.1).collect()
|
||||
Ok(self.matches().walker_builder(self.paths())?.build())
|
||||
}
|
||||
|
||||
/// Return a parallel walker that may use additional threads.
|
||||
pub fn walker_parallel(&self) -> Result<WalkParallel> {
|
||||
Ok(self
|
||||
.matches()
|
||||
.walker_builder(self.paths(), self.0.threads)?
|
||||
.build_parallel())
|
||||
Ok(self.matches().walker_builder(self.paths())?.build_parallel())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -444,23 +423,44 @@ impl SortBy {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Load sorters only if they are applicable at the walk stage.
|
||||
///
|
||||
/// In particular, sorts that involve `stat` calls are not loaded because
|
||||
/// the walk inherently assumes that parent directories are aware of all its
|
||||
/// decendent properties, but `stat` does not work that way.
|
||||
fn configure_builder_sort(self, builder: &mut WalkBuilder) {
|
||||
use SortByKind::*;
|
||||
fn configure_walk_builder(self, builder: &mut WalkBuilder) {
|
||||
// This isn't entirely optimal. In particular, we will wind up issuing
|
||||
// a stat for many files redundantly. Aside from having potentially
|
||||
// inconsistent results with respect to sorting, this is also slow.
|
||||
// We could fix this here at the expense of memory by caching stat
|
||||
// calls. A better fix would be to find a way to push this down into
|
||||
// directory traversal itself, but that's a somewhat nasty change.
|
||||
match self.kind {
|
||||
Path if self.reverse => {
|
||||
builder.sort_by_file_name(|a, b| a.cmp(b).reverse());
|
||||
SortByKind::None => {}
|
||||
SortByKind::Path => {
|
||||
if self.reverse {
|
||||
builder.sort_by_file_name(|a, b| a.cmp(b).reverse());
|
||||
} else {
|
||||
builder.sort_by_file_name(|a, b| a.cmp(b));
|
||||
}
|
||||
}
|
||||
Path => {
|
||||
builder.sort_by_file_name(|a, b| a.cmp(b));
|
||||
SortByKind::LastModified => {
|
||||
builder.sort_by_file_path(move |a, b| {
|
||||
sort_by_metadata_time(a, b, self.reverse, |md| {
|
||||
md.modified()
|
||||
})
|
||||
});
|
||||
}
|
||||
// these use `stat` calls and will be sorted in Args::sort_by_stat()
|
||||
LastModified | LastAccessed | Created | None => {}
|
||||
};
|
||||
SortByKind::LastAccessed => {
|
||||
builder.sort_by_file_path(move |a, b| {
|
||||
sort_by_metadata_time(a, b, self.reverse, |md| {
|
||||
md.accessed()
|
||||
})
|
||||
});
|
||||
}
|
||||
SortByKind::Created => {
|
||||
builder.sort_by_file_path(move |a, b| {
|
||||
sort_by_metadata_time(a, b, self.reverse, |md| {
|
||||
md.created()
|
||||
})
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -490,6 +490,24 @@ enum EncodingMode {
|
||||
Disabled,
|
||||
}
|
||||
|
||||
impl EncodingMode {
|
||||
/// Checks if an explicit encoding has been set. Returns false for
|
||||
/// automatic BOM sniffing and no sniffing.
|
||||
///
|
||||
/// This is only used to determine whether PCRE2 needs to have its own
|
||||
/// UTF-8 checking enabled. If we have an explicit encoding set, then
|
||||
/// we're always guaranteed to get UTF-8, so we can disable PCRE2's check.
|
||||
/// Otherwise, we have no such guarantee, and must enable PCRE2' UTF-8
|
||||
/// check.
|
||||
#[cfg(feature = "pcre2")]
|
||||
fn has_explicit_encoding(&self) -> bool {
|
||||
match self {
|
||||
EncodingMode::Some(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ArgMatches {
|
||||
/// Create an ArgMatches from clap's parse result.
|
||||
fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches {
|
||||
@@ -539,36 +557,9 @@ impl ArgMatches {
|
||||
} else {
|
||||
false
|
||||
};
|
||||
// Now figure out the number of threads we'll use and which
|
||||
// command will run.
|
||||
let is_one_search = self.is_one_search(&paths);
|
||||
let threads = if is_one_search { 1 } else { self.threads()? };
|
||||
if threads == 1 {
|
||||
log::debug!("running in single threaded mode");
|
||||
} else {
|
||||
log::debug!("running with {threads} threads for parallelism");
|
||||
}
|
||||
let command = if self.is_present("pcre2-version") {
|
||||
Command::PCRE2Version
|
||||
} else if self.is_present("type-list") {
|
||||
Command::Types
|
||||
} else if self.is_present("files") {
|
||||
if threads == 1 {
|
||||
Command::Files
|
||||
} else {
|
||||
Command::FilesParallel
|
||||
}
|
||||
} else if self.can_never_match(&patterns) {
|
||||
Command::SearchNever
|
||||
} else if threads == 1 {
|
||||
Command::Search
|
||||
} else {
|
||||
Command::SearchParallel
|
||||
};
|
||||
Ok(Args(Arc::new(ArgsImp {
|
||||
matches: self,
|
||||
command,
|
||||
threads,
|
||||
patterns,
|
||||
matcher,
|
||||
paths,
|
||||
using_default_path,
|
||||
@@ -671,8 +662,6 @@ impl ArgMatches {
|
||||
.multi_line(true)
|
||||
.unicode(self.unicode())
|
||||
.octal(false)
|
||||
.fixed_strings(self.is_present("fixed-strings"))
|
||||
.whole_line(self.is_present("line-regexp"))
|
||||
.word(self.is_present("word-regexp"));
|
||||
if self.is_present("multiline") {
|
||||
builder.dot_matches_new_line(self.is_present("multiline-dotall"));
|
||||
@@ -699,7 +688,12 @@ impl ArgMatches {
|
||||
if let Some(limit) = self.dfa_size_limit()? {
|
||||
builder.dfa_size_limit(limit);
|
||||
}
|
||||
match builder.build_many(patterns) {
|
||||
let res = if self.is_present("fixed-strings") {
|
||||
builder.build_literals(patterns)
|
||||
} else {
|
||||
builder.build(&patterns.join("|"))
|
||||
};
|
||||
match res {
|
||||
Ok(m) => Ok(m),
|
||||
Err(err) => Err(From::from(suggest_multiline(err.to_string()))),
|
||||
}
|
||||
@@ -716,8 +710,6 @@ impl ArgMatches {
|
||||
.case_smart(self.case_smart())
|
||||
.caseless(self.case_insensitive())
|
||||
.multi_line(true)
|
||||
.fixed_strings(self.is_present("fixed-strings"))
|
||||
.whole_line(self.is_present("line-regexp"))
|
||||
.word(self.is_present("word-regexp"));
|
||||
// For whatever reason, the JIT craps out during regex compilation with
|
||||
// a "no more memory" error on 32 bit systems. So don't use it there.
|
||||
@@ -731,6 +723,14 @@ impl ArgMatches {
|
||||
}
|
||||
if self.unicode() {
|
||||
builder.utf(true).ucp(true);
|
||||
if self.encoding()?.has_explicit_encoding() {
|
||||
// SAFETY: If an encoding was specified, then we're guaranteed
|
||||
// to get valid UTF-8, so we can disable PCRE2's UTF checking.
|
||||
// (Feeding invalid UTF-8 to PCRE2 is undefined behavior.)
|
||||
unsafe {
|
||||
builder.disable_utf_check();
|
||||
}
|
||||
}
|
||||
}
|
||||
if self.is_present("multiline") {
|
||||
builder.dotall(self.is_present("multiline-dotall"));
|
||||
@@ -738,7 +738,7 @@ impl ArgMatches {
|
||||
if self.is_present("crlf") {
|
||||
builder.crlf(true);
|
||||
}
|
||||
Ok(builder.build_many(patterns)?)
|
||||
Ok(builder.build(&patterns.join("|"))?)
|
||||
}
|
||||
|
||||
/// Build a JSON printer that writes results to the given writer.
|
||||
@@ -840,8 +840,7 @@ impl ArgMatches {
|
||||
.before_context(ctx_before)
|
||||
.after_context(ctx_after)
|
||||
.passthru(self.is_present("passthru"))
|
||||
.memory_map(self.mmap_choice(paths))
|
||||
.stop_on_nonmatch(self.is_present("stop-on-nonmatch"));
|
||||
.memory_map(self.mmap_choice(paths));
|
||||
match self.encoding()? {
|
||||
EncodingMode::Some(enc) => {
|
||||
builder.encoding(Some(enc));
|
||||
@@ -859,11 +858,7 @@ impl ArgMatches {
|
||||
///
|
||||
/// If there was a problem parsing the CLI arguments necessary for
|
||||
/// constructing the builder, then this returns an error.
|
||||
fn walker_builder(
|
||||
&self,
|
||||
paths: &[PathBuf],
|
||||
threads: usize,
|
||||
) -> Result<WalkBuilder> {
|
||||
fn walker_builder(&self, paths: &[PathBuf]) -> Result<WalkBuilder> {
|
||||
let mut builder = WalkBuilder::new(&paths[0]);
|
||||
for path in &paths[1..] {
|
||||
builder.add(path);
|
||||
@@ -879,7 +874,7 @@ impl ArgMatches {
|
||||
.max_depth(self.usize_of("max-depth")?)
|
||||
.follow_links(self.is_present("follow"))
|
||||
.max_filesize(self.max_file_size()?)
|
||||
.threads(threads)
|
||||
.threads(self.threads()?)
|
||||
.same_file_system(self.is_present("one-file-system"))
|
||||
.skip_stdout(!self.is_present("files"))
|
||||
.overrides(self.overrides()?)
|
||||
@@ -892,10 +887,12 @@ impl ArgMatches {
|
||||
.git_exclude(!self.no_ignore_vcs() && !self.no_ignore_exclude())
|
||||
.require_git(!self.is_present("no-require-git"))
|
||||
.ignore_case_insensitive(self.ignore_file_case_insensitive());
|
||||
if !self.no_ignore() && !self.no_ignore_dot() {
|
||||
if !self.no_ignore() {
|
||||
builder.add_custom_ignore_filename(".rgignore");
|
||||
}
|
||||
self.sort_by()?.configure_builder_sort(&mut builder);
|
||||
let sortby = self.sort_by()?;
|
||||
sortby.check()?;
|
||||
sortby.configure_walk_builder(&mut builder);
|
||||
Ok(builder)
|
||||
}
|
||||
}
|
||||
@@ -1010,10 +1007,10 @@ impl ArgMatches {
|
||||
/// If there was a problem parsing the values from the user as an integer,
|
||||
/// then an error is returned.
|
||||
fn contexts(&self) -> Result<(usize, usize)> {
|
||||
let after = self.usize_of("after-context")?.unwrap_or(0);
|
||||
let before = self.usize_of("before-context")?.unwrap_or(0);
|
||||
let both = self.usize_of("context")?.unwrap_or(0);
|
||||
let after = self.usize_of("after-context")?.unwrap_or(both);
|
||||
let before = self.usize_of("before-context")?.unwrap_or(both);
|
||||
Ok((before, after))
|
||||
Ok(if both > 0 { (both, both) } else { (before, after) })
|
||||
}
|
||||
|
||||
/// Returns the unescaped context separator in UTF-8 bytes.
|
||||
@@ -1070,6 +1067,7 @@ impl ArgMatches {
|
||||
}
|
||||
|
||||
let label = match self.value_of_lossy("encoding") {
|
||||
None if self.pcre2_unicode() => "utf-8".to_string(),
|
||||
None => return Ok(EncodingMode::Auto),
|
||||
Some(label) => label,
|
||||
};
|
||||
@@ -1401,6 +1399,11 @@ impl ArgMatches {
|
||||
/// Get a sequence of all available patterns from the command line.
|
||||
/// This includes reading the -e/--regexp and -f/--file flags.
|
||||
///
|
||||
/// Note that if -F/--fixed-strings is set, then all patterns will be
|
||||
/// escaped. If -x/--line-regexp is set, then all patterns are surrounded
|
||||
/// by `^...$`. Other things, such as --word-regexp, are handled by the
|
||||
/// regex matcher itself.
|
||||
///
|
||||
/// If any pattern is invalid UTF-8, then an error is returned.
|
||||
fn patterns(&self) -> Result<Vec<String>> {
|
||||
if self.is_present("files") || self.is_present("type-list") {
|
||||
@@ -1441,6 +1444,16 @@ impl ArgMatches {
|
||||
Ok(pats)
|
||||
}
|
||||
|
||||
/// Returns a pattern that is guaranteed to produce an empty regular
|
||||
/// expression that is valid in any position.
|
||||
fn pattern_empty(&self) -> String {
|
||||
// This would normally just be an empty string, which works on its
|
||||
// own, but if the patterns are joined in a set of alternations, then
|
||||
// you wind up with `foo|`, which is currently invalid in Rust's regex
|
||||
// engine.
|
||||
"(?:z{0})*".to_string()
|
||||
}
|
||||
|
||||
/// Converts an OsStr pattern to a String pattern. The pattern is escaped
|
||||
/// if -F/--fixed-strings is set.
|
||||
///
|
||||
@@ -1459,12 +1472,30 @@ impl ArgMatches {
|
||||
/// Applies additional processing on the given pattern if necessary
|
||||
/// (such as escaping meta characters or turning it into a line regex).
|
||||
fn pattern_from_string(&self, pat: String) -> String {
|
||||
let pat = self.pattern_line(self.pattern_literal(pat));
|
||||
if pat.is_empty() {
|
||||
// This would normally just be an empty string, which works on its
|
||||
// own, but if the patterns are joined in a set of alternations,
|
||||
// then you wind up with `foo|`, which is currently invalid in
|
||||
// Rust's regex engine.
|
||||
"(?:)".to_string()
|
||||
self.pattern_empty()
|
||||
} else {
|
||||
pat
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the given pattern as a line pattern if the -x/--line-regexp
|
||||
/// flag is set. Otherwise, the pattern is returned unchanged.
|
||||
fn pattern_line(&self, pat: String) -> String {
|
||||
if self.is_present("line-regexp") {
|
||||
format!(r"^(?:{})$", pat)
|
||||
} else {
|
||||
pat
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the given pattern as a literal pattern if the
|
||||
/// -F/--fixed-strings flag is set. Otherwise, the pattern is returned
|
||||
/// unchanged.
|
||||
fn pattern_literal(&self, pat: String) -> String {
|
||||
if self.is_present("fixed-strings") {
|
||||
regex::escape(&pat)
|
||||
} else {
|
||||
pat
|
||||
}
|
||||
@@ -1561,9 +1592,7 @@ impl ArgMatches {
|
||||
return Ok(1);
|
||||
}
|
||||
let threads = self.usize_of("threads")?.unwrap_or(0);
|
||||
let available =
|
||||
std::thread::available_parallelism().map_or(1, |n| n.get());
|
||||
Ok(if threads == 0 { cmp::min(12, available) } else { threads })
|
||||
Ok(if threads == 0 { cmp::min(12, num_cpus::get()) } else { threads })
|
||||
}
|
||||
|
||||
/// Builds a file type matcher from the command line flags.
|
||||
@@ -1597,6 +1626,12 @@ impl ArgMatches {
|
||||
!(self.is_present("no-unicode") || self.is_present("no-pcre2-unicode"))
|
||||
}
|
||||
|
||||
/// Returns true if and only if PCRE2 is enabled and its Unicode mode is
|
||||
/// enabled.
|
||||
fn pcre2_unicode(&self) -> bool {
|
||||
self.is_present("pcre2") && self.unicode()
|
||||
}
|
||||
|
||||
/// Returns true if and only if file names containing each match should
|
||||
/// be emitted.
|
||||
fn with_filename(&self, paths: &[PathBuf]) -> bool {
|
||||
@@ -1757,18 +1792,32 @@ fn u64_to_usize(arg_name: &str, value: Option<u64>) -> Result<Option<usize>> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Sorts by an optional parameter.
|
||||
//
|
||||
/// If parameter is found to be `None`, both entries compare equal.
|
||||
fn sort_by_option<T: Ord>(
|
||||
p1: &Option<T>,
|
||||
p2: &Option<T>,
|
||||
/// Builds a comparator for sorting two files according to a system time
|
||||
/// extracted from the file's metadata.
|
||||
///
|
||||
/// If there was a problem extracting the metadata or if the time is not
|
||||
/// available, then both entries compare equal.
|
||||
fn sort_by_metadata_time<G>(
|
||||
p1: &Path,
|
||||
p2: &Path,
|
||||
reverse: bool,
|
||||
) -> cmp::Ordering {
|
||||
match (p1, p2, reverse) {
|
||||
(Some(p1), Some(p2), true) => p1.cmp(&p2).reverse(),
|
||||
(Some(p1), Some(p2), false) => p1.cmp(&p2),
|
||||
_ => cmp::Ordering::Equal,
|
||||
get_time: G,
|
||||
) -> cmp::Ordering
|
||||
where
|
||||
G: Fn(&fs::Metadata) -> io::Result<SystemTime>,
|
||||
{
|
||||
let t1 = match p1.metadata().and_then(|md| get_time(&md)) {
|
||||
Ok(t) => t,
|
||||
Err(_) => return cmp::Ordering::Equal,
|
||||
};
|
||||
let t2 = match p2.metadata().and_then(|md| get_time(&md)) {
|
||||
Ok(t) => t,
|
||||
Err(_) => return cmp::Ordering::Equal,
|
||||
};
|
||||
if reverse {
|
||||
t1.cmp(&t2).reverse()
|
||||
} else {
|
||||
t1.cmp(&t2)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1822,17 +1871,3 @@ fn current_dir() -> Result<PathBuf> {
|
||||
)
|
||||
.into())
|
||||
}
|
||||
|
||||
/// Tries to assign a timestamp to every `Subject` in the vector to help with
|
||||
/// sorting Subjects by time.
|
||||
fn load_timestamps<G>(
|
||||
subjects: impl Iterator<Item = Subject>,
|
||||
get_time: G,
|
||||
) -> Vec<(Option<SystemTime>, Subject)>
|
||||
where
|
||||
G: Fn(&fs::Metadata) -> io::Result<SystemTime>,
|
||||
{
|
||||
subjects
|
||||
.map(|s| (s.path().metadata().and_then(|m| get_time(&m)).ok(), s))
|
||||
.collect()
|
||||
}
|
||||
|
@@ -80,7 +80,7 @@ fn parse<P: AsRef<Path>>(
|
||||
fn parse_reader<R: io::Read>(
|
||||
rdr: R,
|
||||
) -> Result<(Vec<OsString>, Vec<Box<dyn Error>>)> {
|
||||
let mut bufrdr = io::BufReader::new(rdr);
|
||||
let bufrdr = io::BufReader::new(rdr);
|
||||
let (mut args, mut errs) = (vec![], vec![]);
|
||||
let mut line_number = 0;
|
||||
bufrdr.for_byte_line_with_terminator(|line| {
|
||||
|
@@ -33,7 +33,7 @@ impl Log for Logger {
|
||||
fn log(&self, record: &log::Record<'_>) {
|
||||
match (record.file(), record.line()) {
|
||||
(Some(file), Some(line)) => {
|
||||
eprintln_locked!(
|
||||
eprintln!(
|
||||
"{}|{}|{}:{}: {}",
|
||||
record.level(),
|
||||
record.target(),
|
||||
@@ -43,7 +43,7 @@ impl Log for Logger {
|
||||
);
|
||||
}
|
||||
(Some(file), None) => {
|
||||
eprintln_locked!(
|
||||
eprintln!(
|
||||
"{}|{}|{}: {}",
|
||||
record.level(),
|
||||
record.target(),
|
||||
@@ -52,7 +52,7 @@ impl Log for Logger {
|
||||
);
|
||||
}
|
||||
_ => {
|
||||
eprintln_locked!(
|
||||
eprintln!(
|
||||
"{}|{}: {}",
|
||||
record.level(),
|
||||
record.target(),
|
||||
@@ -63,6 +63,6 @@ impl Log for Logger {
|
||||
}
|
||||
|
||||
fn flush(&self) {
|
||||
// We use eprintln_locked! which is flushed on every call.
|
||||
// We use eprintln! which is flushed on every call.
|
||||
}
|
||||
}
|
||||
|
@@ -47,7 +47,7 @@ type Result<T> = ::std::result::Result<T, Box<dyn error::Error>>;
|
||||
|
||||
fn main() {
|
||||
if let Err(err) = Args::parse().and_then(try_main) {
|
||||
eprintln_locked!("{}", err);
|
||||
eprintln!("{}", err);
|
||||
process::exit(2);
|
||||
}
|
||||
}
|
||||
@@ -55,7 +55,7 @@ fn main() {
|
||||
fn try_main(args: Args) -> Result<()> {
|
||||
use args::Command::*;
|
||||
|
||||
let matched = match args.command() {
|
||||
let matched = match args.command()? {
|
||||
Search => search(&args),
|
||||
SearchParallel => search_parallel(&args),
|
||||
SearchNever => Ok(false),
|
||||
@@ -77,70 +77,53 @@ fn try_main(args: Args) -> Result<()> {
|
||||
/// steps through the file list (current directory by default) and searches
|
||||
/// each file sequentially.
|
||||
fn search(args: &Args) -> Result<bool> {
|
||||
/// The meat of the routine is here. This lets us call the same iteration
|
||||
/// code over each file regardless of whether we stream over the files
|
||||
/// as they're produced by the underlying directory traversal or whether
|
||||
/// they've been collected and sorted (for example) first.
|
||||
fn iter(
|
||||
args: &Args,
|
||||
subjects: impl Iterator<Item = Subject>,
|
||||
started_at: std::time::Instant,
|
||||
) -> Result<bool> {
|
||||
let quit_after_match = args.quit_after_match()?;
|
||||
let mut stats = args.stats()?;
|
||||
let mut searcher = args.search_worker(args.stdout())?;
|
||||
let mut matched = false;
|
||||
let mut searched = false;
|
||||
|
||||
for subject in subjects {
|
||||
searched = true;
|
||||
let search_result = match searcher.search(&subject) {
|
||||
Ok(search_result) => search_result,
|
||||
// A broken pipe means graceful termination.
|
||||
Err(err) if err.kind() == io::ErrorKind::BrokenPipe => break,
|
||||
Err(err) => {
|
||||
err_message!("{}: {}", subject.path().display(), err);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
matched |= search_result.has_match();
|
||||
if let Some(ref mut stats) = stats {
|
||||
*stats += search_result.stats().unwrap();
|
||||
}
|
||||
if matched && quit_after_match {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if args.using_default_path() && !searched {
|
||||
eprint_nothing_searched();
|
||||
}
|
||||
if let Some(ref stats) = stats {
|
||||
let elapsed = Instant::now().duration_since(started_at);
|
||||
// We don't care if we couldn't print this successfully.
|
||||
let _ = searcher.print_stats(elapsed, stats);
|
||||
}
|
||||
Ok(matched)
|
||||
}
|
||||
|
||||
let started_at = Instant::now();
|
||||
let quit_after_match = args.quit_after_match()?;
|
||||
let subject_builder = args.subject_builder();
|
||||
let subjects = args
|
||||
.walker()?
|
||||
.filter_map(|result| subject_builder.build_from_result(result));
|
||||
if args.needs_stat_sort() {
|
||||
let subjects = args.sort_by_stat(subjects).into_iter();
|
||||
iter(args, subjects, started_at)
|
||||
} else {
|
||||
iter(args, subjects, started_at)
|
||||
let mut stats = args.stats()?;
|
||||
let mut searcher = args.search_worker(args.stdout())?;
|
||||
let mut matched = false;
|
||||
let mut searched = false;
|
||||
|
||||
for result in args.walker()? {
|
||||
let subject = match subject_builder.build_from_result(result) {
|
||||
Some(subject) => subject,
|
||||
None => continue,
|
||||
};
|
||||
searched = true;
|
||||
let search_result = match searcher.search(&subject) {
|
||||
Ok(search_result) => search_result,
|
||||
Err(err) => {
|
||||
// A broken pipe means graceful termination.
|
||||
if err.kind() == io::ErrorKind::BrokenPipe {
|
||||
break;
|
||||
}
|
||||
err_message!("{}: {}", subject.path().display(), err);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
matched = matched || search_result.has_match();
|
||||
if let Some(ref mut stats) = stats {
|
||||
*stats += search_result.stats().unwrap();
|
||||
}
|
||||
if matched && quit_after_match {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if args.using_default_path() && !searched {
|
||||
eprint_nothing_searched();
|
||||
}
|
||||
if let Some(ref stats) = stats {
|
||||
let elapsed = Instant::now().duration_since(started_at);
|
||||
// We don't care if we couldn't print this successfully.
|
||||
let _ = searcher.print_stats(elapsed, stats);
|
||||
}
|
||||
Ok(matched)
|
||||
}
|
||||
|
||||
/// The top-level entry point for multi-threaded search. The parallelism is
|
||||
/// itself achieved by the recursive directory traversal. All we need to do is
|
||||
/// feed it a worker for performing a search on each file.
|
||||
///
|
||||
/// Requesting a sorted output from ripgrep (such as with `--sort path`) will
|
||||
/// automatically disable parallelism and hence sorting is not handled here.
|
||||
fn search_parallel(args: &Args) -> Result<bool> {
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering::SeqCst;
|
||||
@@ -231,54 +214,35 @@ fn eprint_nothing_searched() {
|
||||
/// recursively steps through the file list (current directory by default) and
|
||||
/// prints each path sequentially using a single thread.
|
||||
fn files(args: &Args) -> Result<bool> {
|
||||
/// The meat of the routine is here. This lets us call the same iteration
|
||||
/// code over each file regardless of whether we stream over the files
|
||||
/// as they're produced by the underlying directory traversal or whether
|
||||
/// they've been collected and sorted (for example) first.
|
||||
fn iter(
|
||||
args: &Args,
|
||||
subjects: impl Iterator<Item = Subject>,
|
||||
) -> Result<bool> {
|
||||
let quit_after_match = args.quit_after_match()?;
|
||||
let mut matched = false;
|
||||
let mut path_printer = args.path_printer(args.stdout())?;
|
||||
|
||||
for subject in subjects {
|
||||
matched = true;
|
||||
if quit_after_match {
|
||||
let quit_after_match = args.quit_after_match()?;
|
||||
let subject_builder = args.subject_builder();
|
||||
let mut matched = false;
|
||||
let mut path_printer = args.path_printer(args.stdout())?;
|
||||
for result in args.walker()? {
|
||||
let subject = match subject_builder.build_from_result(result) {
|
||||
Some(subject) => subject,
|
||||
None => continue,
|
||||
};
|
||||
matched = true;
|
||||
if quit_after_match {
|
||||
break;
|
||||
}
|
||||
if let Err(err) = path_printer.write_path(subject.path()) {
|
||||
// A broken pipe means graceful termination.
|
||||
if err.kind() == io::ErrorKind::BrokenPipe {
|
||||
break;
|
||||
}
|
||||
if let Err(err) = path_printer.write_path(subject.path()) {
|
||||
// A broken pipe means graceful termination.
|
||||
if err.kind() == io::ErrorKind::BrokenPipe {
|
||||
break;
|
||||
}
|
||||
// Otherwise, we have some other error that's preventing us from
|
||||
// writing to stdout, so we should bubble it up.
|
||||
return Err(err.into());
|
||||
}
|
||||
// Otherwise, we have some other error that's preventing us from
|
||||
// writing to stdout, so we should bubble it up.
|
||||
return Err(err.into());
|
||||
}
|
||||
Ok(matched)
|
||||
}
|
||||
|
||||
let subject_builder = args.subject_builder();
|
||||
let subjects = args
|
||||
.walker()?
|
||||
.filter_map(|result| subject_builder.build_from_result(result));
|
||||
if args.needs_stat_sort() {
|
||||
let subjects = args.sort_by_stat(subjects).into_iter();
|
||||
iter(args, subjects)
|
||||
} else {
|
||||
iter(args, subjects)
|
||||
}
|
||||
Ok(matched)
|
||||
}
|
||||
|
||||
/// The top-level entry point for listing files without searching them. This
|
||||
/// recursively steps through the file list (current directory by default) and
|
||||
/// prints each path sequentially using multiple threads.
|
||||
///
|
||||
/// Requesting a sorted output from ripgrep (such as with `--sort path`) will
|
||||
/// automatically disable parallelism and hence sorting is not handled here.
|
||||
fn files_parallel(args: &Args) -> Result<bool> {
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering::SeqCst;
|
||||
|
@@ -4,28 +4,12 @@ static MESSAGES: AtomicBool = AtomicBool::new(false);
|
||||
static IGNORE_MESSAGES: AtomicBool = AtomicBool::new(false);
|
||||
static ERRORED: AtomicBool = AtomicBool::new(false);
|
||||
|
||||
/// Like eprintln, but locks STDOUT to prevent interleaving lines.
|
||||
#[macro_export]
|
||||
macro_rules! eprintln_locked {
|
||||
($($tt:tt)*) => {{
|
||||
{
|
||||
// This is a bit of an abstraction violation because we explicitly
|
||||
// lock STDOUT before printing to STDERR. This avoids interleaving
|
||||
// lines within ripgrep because `search_parallel` uses `termcolor`,
|
||||
// which accesses the same STDOUT lock when writing lines.
|
||||
let stdout = std::io::stdout();
|
||||
let _handle = stdout.lock();
|
||||
eprintln!($($tt)*);
|
||||
}
|
||||
}}
|
||||
}
|
||||
|
||||
/// Emit a non-fatal error message, unless messages were disabled.
|
||||
#[macro_export]
|
||||
macro_rules! message {
|
||||
($($tt:tt)*) => {
|
||||
if crate::messages::messages() {
|
||||
eprintln_locked!($($tt)*);
|
||||
eprintln!($($tt)*);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -46,7 +30,7 @@ macro_rules! err_message {
|
||||
macro_rules! ignore_message {
|
||||
($($tt:tt)*) => {
|
||||
if crate::messages::messages() && crate::messages::ignore_messages() {
|
||||
eprintln_locked!($($tt)*);
|
||||
eprintln!($($tt)*);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "globset"
|
||||
version = "0.4.11" #:version
|
||||
version = "0.4.9" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Cross platform single glob and glob set matching. Glob set matching is the
|
||||
@@ -20,11 +20,11 @@ name = "globset"
|
||||
bench = false
|
||||
|
||||
[dependencies]
|
||||
aho-corasick = "1.0.2"
|
||||
bstr = { version = "1.6.0", default-features = false, features = ["std"] }
|
||||
aho-corasick = "0.7.3"
|
||||
bstr = { version = "0.2.0", default-features = false, features = ["std"] }
|
||||
fnv = "1.0.6"
|
||||
log = { version = "0.4.5", optional = true }
|
||||
regex = { version = "1.8.3", default-features = false, features = ["perf", "std"] }
|
||||
regex = { version = "1.1.5", default-features = false, features = ["perf", "std"] }
|
||||
serde = { version = "1.0.104", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
|
@@ -208,9 +208,6 @@ struct GlobOptions {
|
||||
/// Whether or not to use `\` to escape special characters.
|
||||
/// e.g., when enabled, `\*` will match a literal `*`.
|
||||
backslash_escape: bool,
|
||||
/// Whether or not an empty case in an alternate will be removed.
|
||||
/// e.g., when enabled, `{,a}` will match "" and "a".
|
||||
empty_alternates: bool,
|
||||
}
|
||||
|
||||
impl GlobOptions {
|
||||
@@ -219,7 +216,6 @@ impl GlobOptions {
|
||||
case_insensitive: false,
|
||||
literal_separator: false,
|
||||
backslash_escape: !is_separator('\\'),
|
||||
empty_alternates: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -637,16 +633,6 @@ impl<'a> GlobBuilder<'a> {
|
||||
self.opts.backslash_escape = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Toggle whether an empty pattern in a list of alternates is accepted.
|
||||
///
|
||||
/// For example, if this is set then the glob `foo{,.txt}` will match both `foo` and `foo.txt`.
|
||||
///
|
||||
/// By default this is false.
|
||||
pub fn empty_alternates(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
|
||||
self.opts.empty_alternates = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
impl Tokens {
|
||||
@@ -728,7 +714,7 @@ impl Tokens {
|
||||
for pat in patterns {
|
||||
let mut altre = String::new();
|
||||
self.tokens_to_regex(options, &pat, &mut altre);
|
||||
if !altre.is_empty() || options.empty_alternates {
|
||||
if !altre.is_empty() {
|
||||
parts.push(altre);
|
||||
}
|
||||
}
|
||||
@@ -1034,7 +1020,6 @@ mod tests {
|
||||
casei: Option<bool>,
|
||||
litsep: Option<bool>,
|
||||
bsesc: Option<bool>,
|
||||
ealtre: Option<bool>,
|
||||
}
|
||||
|
||||
macro_rules! syntax {
|
||||
@@ -1074,9 +1059,6 @@ mod tests {
|
||||
if let Some(bsesc) = $options.bsesc {
|
||||
builder.backslash_escape(bsesc);
|
||||
}
|
||||
if let Some(ealtre) = $options.ealtre {
|
||||
builder.empty_alternates(ealtre);
|
||||
}
|
||||
let pat = builder.build().unwrap();
|
||||
assert_eq!(format!("(?-u){}", $re), pat.regex());
|
||||
}
|
||||
@@ -1100,9 +1082,6 @@ mod tests {
|
||||
if let Some(bsesc) = $options.bsesc {
|
||||
builder.backslash_escape(bsesc);
|
||||
}
|
||||
if let Some(ealtre) = $options.ealtre {
|
||||
builder.empty_alternates(ealtre);
|
||||
}
|
||||
let pat = builder.build().unwrap();
|
||||
let matcher = pat.compile_matcher();
|
||||
let strategic = pat.compile_strategic_matcher();
|
||||
@@ -1131,9 +1110,6 @@ mod tests {
|
||||
if let Some(bsesc) = $options.bsesc {
|
||||
builder.backslash_escape(bsesc);
|
||||
}
|
||||
if let Some(ealtre) = $options.ealtre {
|
||||
builder.empty_alternates(ealtre);
|
||||
}
|
||||
let pat = builder.build().unwrap();
|
||||
let matcher = pat.compile_matcher();
|
||||
let strategic = pat.compile_strategic_matcher();
|
||||
@@ -1219,23 +1195,13 @@ mod tests {
|
||||
syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
|
||||
|
||||
const CASEI: Options =
|
||||
Options { casei: Some(true), litsep: None, bsesc: None, ealtre: None };
|
||||
Options { casei: Some(true), litsep: None, bsesc: None };
|
||||
const SLASHLIT: Options =
|
||||
Options { casei: None, litsep: Some(true), bsesc: None, ealtre: None };
|
||||
const NOBSESC: Options = Options {
|
||||
casei: None,
|
||||
litsep: None,
|
||||
bsesc: Some(false),
|
||||
ealtre: None,
|
||||
};
|
||||
Options { casei: None, litsep: Some(true), bsesc: None };
|
||||
const NOBSESC: Options =
|
||||
Options { casei: None, litsep: None, bsesc: Some(false) };
|
||||
const BSESC: Options =
|
||||
Options { casei: None, litsep: None, bsesc: Some(true), ealtre: None };
|
||||
const EALTRE: Options = Options {
|
||||
casei: None,
|
||||
litsep: None,
|
||||
bsesc: Some(true),
|
||||
ealtre: Some(true),
|
||||
};
|
||||
Options { casei: None, litsep: None, bsesc: Some(true) };
|
||||
|
||||
toregex!(re_casei, "a", "(?i)^a$", &CASEI);
|
||||
|
||||
@@ -1360,9 +1326,6 @@ mod tests {
|
||||
matches!(matchalt11, "{*.foo,*.bar,*.wat}", "test.foo");
|
||||
matches!(matchalt12, "{*.foo,*.bar,*.wat}", "test.bar");
|
||||
matches!(matchalt13, "{*.foo,*.bar,*.wat}", "test.wat");
|
||||
matches!(matchalt14, "foo{,.txt}", "foo.txt");
|
||||
nmatches!(matchalt15, "foo{,.txt}", "foo");
|
||||
matches!(matchalt16, "foo{,.txt}", "foo", EALTRE);
|
||||
|
||||
matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
|
||||
#[cfg(unix)]
|
||||
@@ -1462,9 +1425,6 @@ mod tests {
|
||||
if let Some(bsesc) = $options.bsesc {
|
||||
builder.backslash_escape(bsesc);
|
||||
}
|
||||
if let Some(ealtre) = $options.ealtre {
|
||||
builder.empty_alternates(ealtre);
|
||||
}
|
||||
let pat = builder.build().unwrap();
|
||||
assert_eq!($expect, pat.$which());
|
||||
}
|
||||
|
@@ -498,23 +498,13 @@ impl GlobSetBuilder {
|
||||
/// Constructing candidates has a very small cost associated with it, so
|
||||
/// callers may find it beneficial to amortize that cost when matching a single
|
||||
/// path against multiple globs or sets of globs.
|
||||
#[derive(Clone)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Candidate<'a> {
|
||||
path: Cow<'a, [u8]>,
|
||||
basename: Cow<'a, [u8]>,
|
||||
ext: Cow<'a, [u8]>,
|
||||
}
|
||||
|
||||
impl<'a> std::fmt::Debug for Candidate<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
f.debug_struct("Candidate")
|
||||
.field("path", &self.path.as_bstr())
|
||||
.field("basename", &self.basename.as_bstr())
|
||||
.field("ext", &self.ext.as_bstr())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Candidate<'a> {
|
||||
/// Create a new candidate for matching from the given path.
|
||||
pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
|
||||
@@ -828,7 +818,7 @@ impl MultiStrategyBuilder {
|
||||
|
||||
fn prefix(self) -> PrefixStrategy {
|
||||
PrefixStrategy {
|
||||
matcher: AhoCorasick::new(&self.literals).unwrap(),
|
||||
matcher: AhoCorasick::new_auto_configured(&self.literals),
|
||||
map: self.map,
|
||||
longest: self.longest,
|
||||
}
|
||||
@@ -836,7 +826,7 @@ impl MultiStrategyBuilder {
|
||||
|
||||
fn suffix(self) -> SuffixStrategy {
|
||||
SuffixStrategy {
|
||||
matcher: AhoCorasick::new(&self.literals).unwrap(),
|
||||
matcher: AhoCorasick::new_auto_configured(&self.literals),
|
||||
map: self.map,
|
||||
longest: self.longest,
|
||||
}
|
||||
@@ -880,29 +870,6 @@ impl RequiredExtensionStrategyBuilder {
|
||||
}
|
||||
}
|
||||
|
||||
/// Escape meta-characters within the given glob pattern.
|
||||
///
|
||||
/// The escaping works by surrounding meta-characters with brackets. For
|
||||
/// example, `*` becomes `[*]`.
|
||||
pub fn escape(s: &str) -> String {
|
||||
let mut escaped = String::with_capacity(s.len());
|
||||
for c in s.chars() {
|
||||
match c {
|
||||
// note that ! does not need escaping because it is only special
|
||||
// inside brackets
|
||||
'?' | '*' | '[' | ']' => {
|
||||
escaped.push('[');
|
||||
escaped.push(c);
|
||||
escaped.push(']');
|
||||
}
|
||||
c => {
|
||||
escaped.push(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
escaped
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{GlobSet, GlobSetBuilder};
|
||||
@@ -942,16 +909,4 @@ mod tests {
|
||||
assert!(!set.is_match(""));
|
||||
assert!(!set.is_match("a"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn escape() {
|
||||
use super::escape;
|
||||
assert_eq!("foo", escape("foo"));
|
||||
assert_eq!("foo[*]", escape("foo*"));
|
||||
assert_eq!("[[][]]", escape("[]"));
|
||||
assert_eq!("[*][?]", escape("*?"));
|
||||
assert_eq!("src/[*][*]/[*].rs", escape("src/**/*.rs"));
|
||||
assert_eq!("bar[[]ab[]]baz", escape("bar[ab]baz"));
|
||||
assert_eq!("bar[[]!![]]!baz", escape("bar[!!]!baz"));
|
||||
}
|
||||
}
|
||||
|
@@ -27,7 +27,7 @@ pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
|
||||
///
|
||||
/// Note that this does NOT match the semantics of std::path::Path::extension.
|
||||
/// Namely, the extension includes the `.` and matching is otherwise more
|
||||
/// liberal. Specifically, the extension is:
|
||||
/// liberal. Specifically, the extenion is:
|
||||
///
|
||||
/// * None, if the file name given is empty;
|
||||
/// * None, if there is no embedded `.`;
|
||||
|
@@ -1,7 +1,5 @@
|
||||
use serde::{
|
||||
de::{Error, Visitor},
|
||||
{Deserialize, Deserializer, Serialize, Serializer},
|
||||
};
|
||||
use serde::de::Error;
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
|
||||
use crate::Glob;
|
||||
|
||||
@@ -14,66 +12,18 @@ impl Serialize for Glob {
|
||||
}
|
||||
}
|
||||
|
||||
struct GlobVisitor;
|
||||
|
||||
impl<'a> Visitor<'a> for GlobVisitor {
|
||||
type Value = Glob;
|
||||
|
||||
fn expecting(
|
||||
&self,
|
||||
formatter: &mut std::fmt::Formatter,
|
||||
) -> std::fmt::Result {
|
||||
formatter.write_str("a glob pattern")
|
||||
}
|
||||
|
||||
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
|
||||
where
|
||||
E: Error,
|
||||
{
|
||||
Glob::new(v).map_err(serde::de::Error::custom)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'de> Deserialize<'de> for Glob {
|
||||
fn deserialize<D: Deserializer<'de>>(
|
||||
deserializer: D,
|
||||
) -> Result<Self, D::Error> {
|
||||
deserializer.deserialize_str(GlobVisitor)
|
||||
let glob = <&str as Deserialize>::deserialize(deserializer)?;
|
||||
Glob::new(glob).map_err(D::Error::custom)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::Glob;
|
||||
|
||||
#[test]
|
||||
fn glob_deserialize_borrowed() {
|
||||
let string = r#"{"markdown": "*.md"}"#;
|
||||
|
||||
let map: HashMap<String, Glob> =
|
||||
serde_json::from_str(&string).unwrap();
|
||||
assert_eq!(map["markdown"], Glob::new("*.md").unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn glob_deserialize_owned() {
|
||||
let string = r#"{"markdown": "*.md"}"#;
|
||||
|
||||
let v: serde_json::Value = serde_json::from_str(&string).unwrap();
|
||||
let map: HashMap<String, Glob> = serde_json::from_value(v).unwrap();
|
||||
assert_eq!(map["markdown"], Glob::new("*.md").unwrap());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn glob_deserialize_error() {
|
||||
let string = r#"{"error": "["}"#;
|
||||
|
||||
let map = serde_json::from_str::<HashMap<String, Glob>>(&string);
|
||||
|
||||
assert!(map.is_err());
|
||||
}
|
||||
use Glob;
|
||||
|
||||
#[test]
|
||||
fn glob_json_works() {
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep"
|
||||
version = "0.2.12" #:version
|
||||
version = "0.2.10" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Fast line oriented regex searching as a library.
|
||||
@@ -14,12 +14,12 @@ license = "Unlicense OR MIT"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
grep-cli = { version = "0.1.7", path = "../cli" }
|
||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||
grep-pcre2 = { version = "0.1.6", path = "../pcre2", optional = true }
|
||||
grep-printer = { version = "0.1.7", path = "../printer" }
|
||||
grep-regex = { version = "0.1.11", path = "../regex" }
|
||||
grep-searcher = { version = "0.1.11", path = "../searcher" }
|
||||
grep-cli = { version = "0.1.6", path = "../cli" }
|
||||
grep-matcher = { version = "0.1.5", path = "../matcher" }
|
||||
grep-pcre2 = { version = "0.1.5", path = "../pcre2", optional = true }
|
||||
grep-printer = { version = "0.1.6", path = "../printer" }
|
||||
grep-regex = { version = "0.1.10", path = "../regex" }
|
||||
grep-searcher = { version = "0.1.10", path = "../searcher" }
|
||||
|
||||
[dev-dependencies]
|
||||
termcolor = "1.0.4"
|
||||
|
@@ -12,6 +12,8 @@ are sparse.
|
||||
A cookbook and a guide are planned.
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
pub extern crate grep_cli as cli;
|
||||
pub extern crate grep_matcher as matcher;
|
||||
#[cfg(feature = "pcre2")]
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "ignore"
|
||||
version = "0.4.20" #:version
|
||||
version = "0.4.18" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
A fast library for efficiently matching ignore files such as `.gitignore`
|
||||
@@ -19,11 +19,12 @@ name = "ignore"
|
||||
bench = false
|
||||
|
||||
[dependencies]
|
||||
globset = { version = "0.4.10", path = "../globset" }
|
||||
crossbeam-utils = "0.8.0"
|
||||
globset = { version = "0.4.9", path = "../globset" }
|
||||
lazy_static = "1.1"
|
||||
log = "0.4.5"
|
||||
memchr = "2.5"
|
||||
regex = { version = "1.9.0", default-features = false, features = ["perf", "std", "unicode-gencat"] }
|
||||
memchr = "2.1"
|
||||
regex = "1.1"
|
||||
same-file = "1.0.4"
|
||||
thread_local = "1"
|
||||
walkdir = "2.2.7"
|
||||
|
@@ -9,113 +9,101 @@
|
||||
/// Please try to keep this list sorted lexicographically and wrapped to 79
|
||||
/// columns (inclusive).
|
||||
#[rustfmt::skip]
|
||||
pub const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
|
||||
(&["ada"], &["*.adb", "*.ads"]),
|
||||
(&["agda"], &["*.agda", "*.lagda"]),
|
||||
(&["aidl"], &["*.aidl"]),
|
||||
(&["alire"], &["alire.toml"]),
|
||||
(&["amake"], &["*.mk", "*.bp"]),
|
||||
(&["asciidoc"], &["*.adoc", "*.asc", "*.asciidoc"]),
|
||||
(&["asm"], &["*.asm", "*.s", "*.S"]),
|
||||
(&["asp"], &[
|
||||
pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
("agda", &["*.agda", "*.lagda"]),
|
||||
("aidl", &["*.aidl"]),
|
||||
("amake", &["*.mk", "*.bp"]),
|
||||
("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]),
|
||||
("asm", &["*.asm", "*.s", "*.S"]),
|
||||
("asp", &[
|
||||
"*.aspx", "*.aspx.cs", "*.aspx.vb", "*.ascx", "*.ascx.cs",
|
||||
"*.ascx.vb", "*.asp"
|
||||
]),
|
||||
(&["ats"], &["*.ats", "*.dats", "*.sats", "*.hats"]),
|
||||
(&["avro"], &["*.avdl", "*.avpr", "*.avsc"]),
|
||||
(&["awk"], &["*.awk"]),
|
||||
(&["bat", "batch"], &["*.bat"]),
|
||||
(&["bazel"], &[
|
||||
("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]),
|
||||
("avro", &["*.avdl", "*.avpr", "*.avsc"]),
|
||||
("awk", &["*.awk"]),
|
||||
("bazel", &[
|
||||
"*.bazel", "*.bzl", "*.BUILD", "*.bazelrc", "BUILD", "MODULE.bazel",
|
||||
"WORKSPACE", "WORKSPACE.bazel",
|
||||
]),
|
||||
(&["bitbake"], &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
|
||||
(&["brotli"], &["*.br"]),
|
||||
(&["buildstream"], &["*.bst"]),
|
||||
(&["bzip2"], &["*.bz2", "*.tbz2"]),
|
||||
(&["c"], &["*.[chH]", "*.[chH].in", "*.cats"]),
|
||||
(&["cabal"], &["*.cabal"]),
|
||||
(&["candid"], &["*.did"]),
|
||||
(&["carp"], &["*.carp"]),
|
||||
(&["cbor"], &["*.cbor"]),
|
||||
(&["ceylon"], &["*.ceylon"]),
|
||||
(&["clojure"], &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
|
||||
(&["cmake"], &["*.cmake", "CMakeLists.txt"]),
|
||||
(&["cmd"], &["*.bat", "*.cmd"]),
|
||||
(&["cml"], &["*.cml"]),
|
||||
(&["coffeescript"], &["*.coffee"]),
|
||||
(&["config"], &["*.cfg", "*.conf", "*.config", "*.ini"]),
|
||||
(&["coq"], &["*.v"]),
|
||||
(&["cpp"], &[
|
||||
("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
|
||||
("brotli", &["*.br"]),
|
||||
("buildstream", &["*.bst"]),
|
||||
("bzip2", &["*.bz2", "*.tbz2"]),
|
||||
("c", &["*.[chH]", "*.[chH].in", "*.cats"]),
|
||||
("cabal", &["*.cabal"]),
|
||||
("cbor", &["*.cbor"]),
|
||||
("ceylon", &["*.ceylon"]),
|
||||
("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
|
||||
("cmake", &["*.cmake", "CMakeLists.txt"]),
|
||||
("coffeescript", &["*.coffee"]),
|
||||
("config", &["*.cfg", "*.conf", "*.config", "*.ini"]),
|
||||
("coq", &["*.v"]),
|
||||
("cpp", &[
|
||||
"*.[ChH]", "*.cc", "*.[ch]pp", "*.[ch]xx", "*.hh", "*.inl",
|
||||
"*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in",
|
||||
]),
|
||||
(&["creole"], &["*.creole"]),
|
||||
(&["crystal"], &["Projectfile", "*.cr", "*.ecr", "shard.yml"]),
|
||||
(&["cs"], &["*.cs"]),
|
||||
(&["csharp"], &["*.cs"]),
|
||||
(&["cshtml"], &["*.cshtml"]),
|
||||
(&["css"], &["*.css", "*.scss"]),
|
||||
(&["csv"], &["*.csv"]),
|
||||
(&["cuda"], &["*.cu", "*.cuh"]),
|
||||
(&["cython"], &["*.pyx", "*.pxi", "*.pxd"]),
|
||||
(&["d"], &["*.d"]),
|
||||
(&["dart"], &["*.dart"]),
|
||||
(&["devicetree"], &["*.dts", "*.dtsi"]),
|
||||
(&["dhall"], &["*.dhall"]),
|
||||
(&["diff"], &["*.patch", "*.diff"]),
|
||||
(&["dita"], &["*.dita", "*.ditamap", "*.ditaval"]),
|
||||
(&["docker"], &["*Dockerfile*"]),
|
||||
(&["dockercompose"], &["docker-compose.yml", "docker-compose.*.yml"]),
|
||||
(&["dts"], &["*.dts", "*.dtsi"]),
|
||||
(&["dvc"], &["Dvcfile", "*.dvc"]),
|
||||
(&["ebuild"], &["*.ebuild", "*.eclass"]),
|
||||
(&["edn"], &["*.edn"]),
|
||||
(&["elisp"], &["*.el"]),
|
||||
(&["elixir"], &["*.ex", "*.eex", "*.exs", "*.heex", "*.leex", "*.livemd"]),
|
||||
(&["elm"], &["*.elm"]),
|
||||
(&["erb"], &["*.erb"]),
|
||||
(&["erlang"], &["*.erl", "*.hrl"]),
|
||||
(&["fennel"], &["*.fnl"]),
|
||||
(&["fidl"], &["*.fidl"]),
|
||||
(&["fish"], &["*.fish"]),
|
||||
(&["flatbuffers"], &["*.fbs"]),
|
||||
(&["fortran"], &[
|
||||
("creole", &["*.creole"]),
|
||||
("crystal", &["Projectfile", "*.cr", "*.ecr", "shard.yml"]),
|
||||
("cs", &["*.cs"]),
|
||||
("csharp", &["*.cs"]),
|
||||
("cshtml", &["*.cshtml"]),
|
||||
("css", &["*.css", "*.scss"]),
|
||||
("csv", &["*.csv"]),
|
||||
("cuda", &["*.cu", "*.cuh"]),
|
||||
("cython", &["*.pyx", "*.pxi", "*.pxd"]),
|
||||
("d", &["*.d"]),
|
||||
("dart", &["*.dart"]),
|
||||
("dhall", &["*.dhall"]),
|
||||
("diff", &["*.patch", "*.diff"]),
|
||||
("docker", &["*Dockerfile*"]),
|
||||
("dts", &["*.dts", "*.dtsi"]),
|
||||
("dvc", &["Dvcfile", "*.dvc"]),
|
||||
("ebuild", &["*.ebuild"]),
|
||||
("edn", &["*.edn"]),
|
||||
("elisp", &["*.el"]),
|
||||
("elixir", &["*.ex", "*.eex", "*.exs"]),
|
||||
("elm", &["*.elm"]),
|
||||
("erb", &["*.erb"]),
|
||||
("erlang", &["*.erl", "*.hrl"]),
|
||||
("fennel", &["*.fnl"]),
|
||||
("fidl", &["*.fidl"]),
|
||||
("fish", &["*.fish"]),
|
||||
("flatbuffers", &["*.fbs"]),
|
||||
("fortran", &[
|
||||
"*.f", "*.F", "*.f77", "*.F77", "*.pfo",
|
||||
"*.f90", "*.F90", "*.f95", "*.F95",
|
||||
]),
|
||||
(&["fsharp"], &["*.fs", "*.fsx", "*.fsi"]),
|
||||
(&["fut"], &["*.fut"]),
|
||||
(&["gap"], &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]),
|
||||
(&["gn"], &["*.gn", "*.gni"]),
|
||||
(&["go"], &["*.go"]),
|
||||
(&["gprbuild"], &["*.gpr"]),
|
||||
(&["gradle"], &["*.gradle"]),
|
||||
(&["graphql"], &["*.graphql", "*.graphqls"]),
|
||||
(&["groovy"], &["*.groovy", "*.gradle"]),
|
||||
(&["gzip"], &["*.gz", "*.tgz"]),
|
||||
(&["h"], &["*.h", "*.hh", "*.hpp"]),
|
||||
(&["haml"], &["*.haml"]),
|
||||
(&["hare"], &["*.ha"]),
|
||||
(&["haskell"], &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]),
|
||||
(&["hbs"], &["*.hbs"]),
|
||||
(&["hs"], &["*.hs", "*.lhs"]),
|
||||
(&["html"], &["*.htm", "*.html", "*.ejs"]),
|
||||
(&["hy"], &["*.hy"]),
|
||||
(&["idris"], &["*.idr", "*.lidr"]),
|
||||
(&["janet"], &["*.janet"]),
|
||||
(&["java"], &["*.java", "*.jsp", "*.jspx", "*.properties"]),
|
||||
(&["jinja"], &["*.j2", "*.jinja", "*.jinja2"]),
|
||||
(&["jl"], &["*.jl"]),
|
||||
(&["js"], &["*.js", "*.jsx", "*.vue", "*.cjs", "*.mjs"]),
|
||||
(&["json"], &["*.json", "composer.lock"]),
|
||||
(&["jsonl"], &["*.jsonl"]),
|
||||
(&["julia"], &["*.jl"]),
|
||||
(&["jupyter"], &["*.ipynb", "*.jpynb"]),
|
||||
(&["k"], &["*.k"]),
|
||||
(&["kotlin"], &["*.kt", "*.kts"]),
|
||||
(&["less"], &["*.less"]),
|
||||
(&["license"], &[
|
||||
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
|
||||
("fut", &["*.fut"]),
|
||||
("gap", &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]),
|
||||
("gn", &["*.gn", "*.gni"]),
|
||||
("go", &["*.go"]),
|
||||
("gradle", &["*.gradle"]),
|
||||
("groovy", &["*.groovy", "*.gradle"]),
|
||||
("gzip", &["*.gz", "*.tgz"]),
|
||||
("h", &["*.h", "*.hh", "*.hpp"]),
|
||||
("haml", &["*.haml"]),
|
||||
("hare", &["*.ha"]),
|
||||
("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]),
|
||||
("hbs", &["*.hbs"]),
|
||||
("hs", &["*.hs", "*.lhs"]),
|
||||
("html", &["*.htm", "*.html", "*.ejs"]),
|
||||
("hy", &["*.hy"]),
|
||||
("idris", &["*.idr", "*.lidr"]),
|
||||
("janet", &["*.janet"]),
|
||||
("java", &["*.java", "*.jsp", "*.jspx", "*.properties"]),
|
||||
("jinja", &["*.j2", "*.jinja", "*.jinja2"]),
|
||||
("jl", &["*.jl"]),
|
||||
("js", &["*.js", "*.jsx", "*.vue"]),
|
||||
("json", &["*.json", "composer.lock"]),
|
||||
("jsonl", &["*.jsonl"]),
|
||||
("julia", &["*.jl"]),
|
||||
("jupyter", &["*.ipynb", "*.jpynb"]),
|
||||
("k", &["*.k"]),
|
||||
("kotlin", &["*.kt", "*.kts"]),
|
||||
("less", &["*.less"]),
|
||||
("license", &[
|
||||
// General
|
||||
"COPYING", "COPYING[.-]*",
|
||||
"COPYRIGHT", "COPYRIGHT[.-]*",
|
||||
@@ -142,91 +130,76 @@ pub const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
|
||||
"MPL-*[0-9]*",
|
||||
"OFL-*[0-9]*",
|
||||
]),
|
||||
(&["lilypond"], &["*.ly", "*.ily"]),
|
||||
(&["lisp"], &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
||||
(&["lock"], &["*.lock", "package-lock.json"]),
|
||||
(&["log"], &["*.log"]),
|
||||
(&["lua"], &["*.lua"]),
|
||||
(&["lz4"], &["*.lz4"]),
|
||||
(&["lzma"], &["*.lzma"]),
|
||||
(&["m4"], &["*.ac", "*.m4"]),
|
||||
(&["make"], &[
|
||||
("lilypond", &["*.ly", "*.ily"]),
|
||||
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
|
||||
("lock", &["*.lock", "package-lock.json"]),
|
||||
("log", &["*.log"]),
|
||||
("lua", &["*.lua"]),
|
||||
("lz4", &["*.lz4"]),
|
||||
("lzma", &["*.lzma"]),
|
||||
("m4", &["*.ac", "*.m4"]),
|
||||
("make", &[
|
||||
"[Gg][Nn][Uu]makefile", "[Mm]akefile",
|
||||
"[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am",
|
||||
"[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in",
|
||||
"*.mk", "*.mak"
|
||||
]),
|
||||
(&["mako"], &["*.mako", "*.mao"]),
|
||||
(&["man"], &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]),
|
||||
(&["markdown", "md"], &[
|
||||
"*.markdown",
|
||||
"*.md",
|
||||
"*.mdown",
|
||||
"*.mdwn",
|
||||
"*.mkd",
|
||||
"*.mkdn",
|
||||
"*.mdx",
|
||||
]),
|
||||
(&["matlab"], &["*.m"]),
|
||||
(&["meson"], &["meson.build", "meson_options.txt"]),
|
||||
(&["minified"], &["*.min.html", "*.min.css", "*.min.js"]),
|
||||
(&["mint"], &["*.mint"]),
|
||||
(&["mk"], &["mkfile"]),
|
||||
(&["ml"], &["*.ml"]),
|
||||
(&["motoko"], &["*.mo"]),
|
||||
(&["msbuild"], &[
|
||||
("mako", &["*.mako", "*.mao"]),
|
||||
("man", &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]),
|
||||
("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkd", "*.mkdn"]),
|
||||
("matlab", &["*.m"]),
|
||||
("md", &["*.markdown", "*.md", "*.mdown", "*.mkd", "*.mkdn"]),
|
||||
("meson", &["meson.build", "meson_options.txt"]),
|
||||
("minified", &["*.min.html", "*.min.css", "*.min.js"]),
|
||||
("mint", &["*.mint"]),
|
||||
("mk", &["mkfile"]),
|
||||
("ml", &["*.ml"]),
|
||||
("msbuild", &[
|
||||
"*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets",
|
||||
"*.sln",
|
||||
]),
|
||||
(&["nim"], &["*.nim", "*.nimf", "*.nimble", "*.nims"]),
|
||||
(&["nix"], &["*.nix"]),
|
||||
(&["objc"], &["*.h", "*.m"]),
|
||||
(&["objcpp"], &["*.h", "*.mm"]),
|
||||
(&["ocaml"], &["*.ml", "*.mli", "*.mll", "*.mly"]),
|
||||
(&["org"], &["*.org", "*.org_archive"]),
|
||||
(&["pants"], &["BUILD"]),
|
||||
(&["pascal"], &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]),
|
||||
(&["pdf"], &["*.pdf"]),
|
||||
(&["perl"], &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]),
|
||||
(&["php"], &[
|
||||
("nim", &["*.nim", "*.nimf", "*.nimble", "*.nims"]),
|
||||
("nix", &["*.nix"]),
|
||||
("objc", &["*.h", "*.m"]),
|
||||
("objcpp", &["*.h", "*.mm"]),
|
||||
("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
|
||||
("org", &["*.org", "*.org_archive"]),
|
||||
("pants", &["BUILD"]),
|
||||
("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]),
|
||||
("pdf", &["*.pdf"]),
|
||||
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]),
|
||||
("php", &[
|
||||
// note that PHP 6 doesn't exist
|
||||
// See: https://wiki.php.net/rfc/php6
|
||||
"*.php", "*.php3", "*.php4", "*.php5", "*.php7", "*.php8",
|
||||
"*.pht", "*.phtml"
|
||||
]),
|
||||
(&["po"], &["*.po"]),
|
||||
(&["pod"], &["*.pod"]),
|
||||
(&["postscript"], &["*.eps", "*.ps"]),
|
||||
(&["protobuf"], &["*.proto"]),
|
||||
(&["ps"], &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]),
|
||||
(&["puppet"], &["*.epp", "*.erb", "*.pp", "*.rb"]),
|
||||
(&["purs"], &["*.purs"]),
|
||||
(&["py", "python"], &["*.py", "*.pyi"]),
|
||||
(&["qmake"], &["*.pro", "*.pri", "*.prf"]),
|
||||
(&["qml"], &["*.qml"]),
|
||||
(&["r"], &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
|
||||
(&["racket"], &["*.rkt"]),
|
||||
(&["raku"], &[
|
||||
"*.raku", "*.rakumod", "*.rakudoc", "*.rakutest",
|
||||
"*.p6", "*.pl6", "*.pm6"
|
||||
]),
|
||||
(&["rdoc"], &["*.rdoc"]),
|
||||
(&["readme"], &["README*", "*README"]),
|
||||
(&["reasonml"], &["*.re", "*.rei"]),
|
||||
(&["red"], &["*.r", "*.red", "*.reds"]),
|
||||
(&["rescript"], &["*.res", "*.resi"]),
|
||||
(&["robot"], &["*.robot"]),
|
||||
(&["rst"], &["*.rst"]),
|
||||
(&["ruby"], &[
|
||||
("po", &["*.po"]),
|
||||
("pod", &["*.pod"]),
|
||||
("postscript", &["*.eps", "*.ps"]),
|
||||
("protobuf", &["*.proto"]),
|
||||
("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]),
|
||||
("puppet", &["*.erb", "*.pp", "*.rb"]),
|
||||
("purs", &["*.purs"]),
|
||||
("py", &["*.py"]),
|
||||
("qmake", &["*.pro", "*.pri", "*.prf"]),
|
||||
("qml", &["*.qml"]),
|
||||
("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
|
||||
("racket", &["*.rkt"]),
|
||||
("rdoc", &["*.rdoc"]),
|
||||
("readme", &["README*", "*README"]),
|
||||
("red", &["*.r", "*.red", "*.reds"]),
|
||||
("robot", &["*.robot"]),
|
||||
("rst", &["*.rst"]),
|
||||
("ruby", &[
|
||||
// Idiomatic files
|
||||
"config.ru", "Gemfile", ".irbrc", "Rakefile",
|
||||
// Extensions
|
||||
"*.gemspec", "*.rb", "*.rbw"
|
||||
]),
|
||||
(&["rust"], &["*.rs"]),
|
||||
(&["sass"], &["*.sass", "*.scss"]),
|
||||
(&["scala"], &["*.scala", "*.sbt"]),
|
||||
(&["sh"], &[
|
||||
("rust", &["*.rs"]),
|
||||
("sass", &["*.sass", "*.scss"]),
|
||||
("scala", &["*.scala", "*.sbt"]),
|
||||
("sh", &[
|
||||
// Portable/misc. init files
|
||||
".login", ".logout", ".profile", "profile",
|
||||
// bash-specific init files
|
||||
@@ -249,66 +222,59 @@ pub const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
|
||||
// Extensions
|
||||
"*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh",
|
||||
]),
|
||||
(&["slim"], &["*.skim", "*.slim", "*.slime"]),
|
||||
(&["smarty"], &["*.tpl"]),
|
||||
(&["sml"], &["*.sml", "*.sig"]),
|
||||
(&["solidity"], &["*.sol"]),
|
||||
(&["soy"], &["*.soy"]),
|
||||
(&["spark"], &["*.spark"]),
|
||||
(&["spec"], &["*.spec"]),
|
||||
(&["sql"], &["*.sql", "*.psql"]),
|
||||
(&["stylus"], &["*.styl"]),
|
||||
(&["sv"], &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
|
||||
(&["svg"], &["*.svg"]),
|
||||
(&["swift"], &["*.swift"]),
|
||||
(&["swig"], &["*.def", "*.i"]),
|
||||
(&["systemd"], &[
|
||||
("slim", &["*.skim", "*.slim", "*.slime"]),
|
||||
("smarty", &["*.tpl"]),
|
||||
("sml", &["*.sml", "*.sig"]),
|
||||
("soy", &["*.soy"]),
|
||||
("spark", &["*.spark"]),
|
||||
("spec", &["*.spec"]),
|
||||
("sql", &["*.sql", "*.psql"]),
|
||||
("stylus", &["*.styl"]),
|
||||
("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
|
||||
("svg", &["*.svg"]),
|
||||
("swift", &["*.swift"]),
|
||||
("swig", &["*.def", "*.i"]),
|
||||
("systemd", &[
|
||||
"*.automount", "*.conf", "*.device", "*.link", "*.mount", "*.path",
|
||||
"*.scope", "*.service", "*.slice", "*.socket", "*.swap", "*.target",
|
||||
"*.timer",
|
||||
]),
|
||||
(&["taskpaper"], &["*.taskpaper"]),
|
||||
(&["tcl"], &["*.tcl"]),
|
||||
(&["tex"], &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]),
|
||||
(&["texinfo"], &["*.texi"]),
|
||||
(&["textile"], &["*.textile"]),
|
||||
(&["tf"], &[
|
||||
"*.tf", "*.auto.tfvars", "terraform.tfvars", "*.tf.json",
|
||||
"*.auto.tfvars.json", "terraform.tfvars.json", "*.terraformrc",
|
||||
"terraform.rc", "*.tfrc", "*.terraform.lock.hcl",
|
||||
]),
|
||||
(&["thrift"], &["*.thrift"]),
|
||||
(&["toml"], &["*.toml", "Cargo.lock"]),
|
||||
(&["ts", "typescript"], &["*.ts", "*.tsx", "*.cts", "*.mts"]),
|
||||
(&["twig"], &["*.twig"]),
|
||||
(&["txt"], &["*.txt"]),
|
||||
(&["typoscript"], &["*.typoscript", "*.ts"]),
|
||||
(&["usd"], &["*.usd", "*.usda", "*.usdc"]),
|
||||
(&["v"], &["*.v"]),
|
||||
(&["vala"], &["*.vala"]),
|
||||
(&["vb"], &["*.vb"]),
|
||||
(&["vcl"], &["*.vcl"]),
|
||||
(&["verilog"], &["*.v", "*.vh", "*.sv", "*.svh"]),
|
||||
(&["vhdl"], &["*.vhd", "*.vhdl"]),
|
||||
(&["vim"], &[
|
||||
("taskpaper", &["*.taskpaper"]),
|
||||
("tcl", &["*.tcl"]),
|
||||
("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]),
|
||||
("texinfo", &["*.texi"]),
|
||||
("textile", &["*.textile"]),
|
||||
("tf", &["*.tf"]),
|
||||
("thrift", &["*.thrift"]),
|
||||
("toml", &["*.toml", "Cargo.lock"]),
|
||||
("ts", &["*.ts", "*.tsx"]),
|
||||
("twig", &["*.twig"]),
|
||||
("txt", &["*.txt"]),
|
||||
("typoscript", &["*.typoscript", "*.ts"]),
|
||||
("vala", &["*.vala"]),
|
||||
("vb", &["*.vb"]),
|
||||
("vcl", &["*.vcl"]),
|
||||
("verilog", &["*.v", "*.vh", "*.sv", "*.svh"]),
|
||||
("vhdl", &["*.vhd", "*.vhdl"]),
|
||||
("vim", &[
|
||||
"*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc",
|
||||
]),
|
||||
(&["vimscript"], &[
|
||||
("vimscript", &[
|
||||
"*.vim", ".vimrc", ".gvimrc", "vimrc", "gvimrc", "_vimrc", "_gvimrc",
|
||||
]),
|
||||
(&["webidl"], &["*.idl", "*.webidl", "*.widl"]),
|
||||
(&["wiki"], &["*.mediawiki", "*.wiki"]),
|
||||
(&["xml"], &[
|
||||
("webidl", &["*.idl", "*.webidl", "*.widl"]),
|
||||
("wiki", &["*.mediawiki", "*.wiki"]),
|
||||
("xml", &[
|
||||
"*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb",
|
||||
"*.rng", "*.sch", "*.xhtml",
|
||||
]),
|
||||
(&["xz"], &["*.xz", "*.txz"]),
|
||||
(&["yacc"], &["*.y"]),
|
||||
(&["yaml"], &["*.yaml", "*.yml"]),
|
||||
(&["yang"], &["*.yang"]),
|
||||
(&["z"], &["*.Z"]),
|
||||
(&["zig"], &["*.zig"]),
|
||||
(&["zsh"], &[
|
||||
("xz", &["*.xz", "*.txz"]),
|
||||
("yacc", &["*.y"]),
|
||||
("yaml", &["*.yaml", "*.yml"]),
|
||||
("yang", &["*.yang"]),
|
||||
("z", &["*.Z"]),
|
||||
("zig", &["*.zig"]),
|
||||
("zsh", &[
|
||||
".zshenv", "zshenv",
|
||||
".zlogin", "zlogin",
|
||||
".zlogout", "zlogout",
|
||||
@@ -316,25 +282,5 @@ pub const DEFAULT_TYPES: &[(&[&str], &[&str])] = &[
|
||||
".zshrc", "zshrc",
|
||||
"*.zsh",
|
||||
]),
|
||||
(&["zstd"], &["*.zst", "*.zstd"]),
|
||||
("zstd", &["*.zst", "*.zstd"]),
|
||||
];
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::DEFAULT_TYPES;
|
||||
|
||||
#[test]
|
||||
fn default_types_are_sorted() {
|
||||
let mut names = DEFAULT_TYPES.iter().map(|(aliases, _)| aliases[0]);
|
||||
let Some(mut previous_name) = names.next() else { return; };
|
||||
for name in names {
|
||||
assert!(
|
||||
name > previous_name,
|
||||
r#""{}" should be sorted before "{}" in `DEFAULT_TYPES`"#,
|
||||
name,
|
||||
previous_name
|
||||
);
|
||||
previous_name = name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -533,7 +533,7 @@ impl GitignoreBuilder {
|
||||
/// Return the file path of the current environment's global gitignore file.
|
||||
///
|
||||
/// Note that the file path returned may not exist.
|
||||
pub fn gitconfig_excludes_path() -> Option<PathBuf> {
|
||||
fn gitconfig_excludes_path() -> Option<PathBuf> {
|
||||
// git supports $HOME/.gitconfig and $XDG_CONFIG_HOME/git/config. Notably,
|
||||
// both can be active at the same time, where $HOME/.gitconfig takes
|
||||
// precedent. So if $HOME/.gitconfig defines a `core.excludesFile`, then
|
||||
@@ -596,13 +596,8 @@ fn parse_excludes_file(data: &[u8]) -> Option<PathBuf> {
|
||||
// probably works in more circumstances. I guess we would ideally have
|
||||
// a full INI parser. Yuck.
|
||||
lazy_static::lazy_static! {
|
||||
static ref RE: Regex = Regex::new(
|
||||
r"(?xim-u)
|
||||
^[[:space:]]*excludesfile[[:space:]]*
|
||||
=
|
||||
[[:space:]]*(.+)[[:space:]]*$
|
||||
"
|
||||
).unwrap();
|
||||
static ref RE: Regex =
|
||||
Regex::new(r"(?im)^\s*excludesfile\s*=\s*(.+)\s*$").unwrap();
|
||||
};
|
||||
let caps = match RE.captures(data) {
|
||||
None => return None,
|
||||
|
@@ -106,7 +106,6 @@ impl Override {
|
||||
}
|
||||
|
||||
/// Builds a matcher for a set of glob overrides.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct OverrideBuilder {
|
||||
builder: GitignoreBuilder,
|
||||
}
|
||||
|
@@ -488,11 +488,9 @@ impl TypesBuilder {
|
||||
/// Add a set of default file type definitions.
|
||||
pub fn add_defaults(&mut self) -> &mut TypesBuilder {
|
||||
static MSG: &'static str = "adding a default type should never fail";
|
||||
for &(names, exts) in DEFAULT_TYPES {
|
||||
for name in names {
|
||||
for ext in exts {
|
||||
self.add(name, ext).expect(MSG);
|
||||
}
|
||||
for &(name, exts) in DEFAULT_TYPES {
|
||||
for ext in exts {
|
||||
self.add(name, ext).expect(MSG);
|
||||
}
|
||||
}
|
||||
self
|
||||
@@ -539,8 +537,6 @@ mod tests {
|
||||
"html:*.htm",
|
||||
"rust:*.rs",
|
||||
"js:*.js",
|
||||
"py:*.py",
|
||||
"python:*.py",
|
||||
"foo:*.{rs,foo}",
|
||||
"combo:include:html,rust",
|
||||
]
|
||||
@@ -555,8 +551,6 @@ mod tests {
|
||||
matched!(match7, types(), vec!["foo"], vec!["rust"], "main.foo");
|
||||
matched!(match8, types(), vec!["combo"], vec![], "index.html");
|
||||
matched!(match9, types(), vec!["combo"], vec![], "lib.rs");
|
||||
matched!(match10, types(), vec!["py"], vec![], "main.py");
|
||||
matched!(match11, types(), vec!["python"], vec![], "main.py");
|
||||
|
||||
matched!(not, matchnot1, types(), vec!["rust"], vec![], "index.html");
|
||||
matched!(not, matchnot2, types(), vec![], vec!["rust"], "main.rs");
|
||||
@@ -564,8 +558,6 @@ mod tests {
|
||||
matched!(not, matchnot4, types(), vec!["rust"], vec!["foo"], "main.rs");
|
||||
matched!(not, matchnot5, types(), vec!["rust"], vec!["foo"], "main.foo");
|
||||
matched!(not, matchnot6, types(), vec!["combo"], vec![], "leftpad.js");
|
||||
matched!(not, matchnot7, types(), vec!["py"], vec![], "index.html");
|
||||
matched!(not, matchnot8, types(), vec!["python"], vec![], "doc.md");
|
||||
|
||||
#[test]
|
||||
fn test_invalid_defs() {
|
||||
@@ -577,7 +569,7 @@ mod tests {
|
||||
let original_defs = btypes.definitions();
|
||||
let bad_defs = vec![
|
||||
// Reference to type that does not exist
|
||||
"combo:include:html,qwerty",
|
||||
"combo:include:html,python",
|
||||
// Bad format
|
||||
"combo:foobar:html,rust",
|
||||
"",
|
||||
|
@@ -941,7 +941,7 @@ impl Walk {
|
||||
// overheads; an example of this was a bespoke filesystem layer in
|
||||
// Windows that hosted files remotely and would download them on-demand
|
||||
// when particular filesystem operations occurred. Users of this system
|
||||
// who ensured correct file-type filters were being used could still
|
||||
// who ensured correct file-type fileters were being used could still
|
||||
// get unnecessary file access resulting in large downloads.
|
||||
if should_skip_entry(&self.ig, ent) {
|
||||
return Ok(true);
|
||||
@@ -1282,7 +1282,7 @@ impl WalkParallel {
|
||||
let quit_now = Arc::new(AtomicBool::new(false));
|
||||
let num_pending =
|
||||
Arc::new(AtomicUsize::new(stack.lock().unwrap().len()));
|
||||
std::thread::scope(|s| {
|
||||
crossbeam_utils::thread::scope(|s| {
|
||||
let mut handles = vec![];
|
||||
for _ in 0..threads {
|
||||
let worker = Worker {
|
||||
@@ -1296,12 +1296,13 @@ impl WalkParallel {
|
||||
skip: self.skip.clone(),
|
||||
filter: self.filter.clone(),
|
||||
};
|
||||
handles.push(s.spawn(|| worker.run()));
|
||||
handles.push(s.spawn(|_| worker.run()));
|
||||
}
|
||||
for handle in handles {
|
||||
handle.join().unwrap();
|
||||
}
|
||||
});
|
||||
})
|
||||
.unwrap(); // Pass along panics from threads
|
||||
}
|
||||
|
||||
fn threads(&self) -> usize {
|
||||
@@ -1681,7 +1682,7 @@ impl<'s> Worker<'s> {
|
||||
stack.pop()
|
||||
}
|
||||
|
||||
/// Signal that work has been finished.
|
||||
/// Signal that work has been received.
|
||||
fn work_done(&self) {
|
||||
self.num_pending.fetch_sub(1, Ordering::SeqCst);
|
||||
}
|
||||
|
19
crates/index/Cargo.toml
Normal file
19
crates/index/Cargo.toml
Normal file
@@ -0,0 +1,19 @@
|
||||
[package]
|
||||
publish = false
|
||||
name = "grep-index"
|
||||
version = "0.0.1" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Grep, but with an index.
|
||||
"""
|
||||
documentation = "https://docs.rs/grep-index"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "search", "index", "ngram"]
|
||||
license = "Unlicense/MIT"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
bstr = "0.2"
|
||||
regex-syntax = "0.6.14"
|
21
crates/index/LICENSE-MIT
Normal file
21
crates/index/LICENSE-MIT
Normal file
@@ -0,0 +1,21 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Andrew Gallant
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
8
crates/index/README.md
Normal file
8
crates/index/README.md
Normal file
@@ -0,0 +1,8 @@
|
||||
grep-index
|
||||
----------
|
||||
WIP.
|
||||
|
||||
[](https://github.com/BurntSushi/ripgrep/actions)
|
||||
[](https://crates.io/crates/grep-index)
|
||||
|
||||
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
|
24
crates/index/UNLICENSE
Normal file
24
crates/index/UNLICENSE
Normal file
@@ -0,0 +1,24 @@
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
7
crates/index/src/lib.rs
Normal file
7
crates/index/src/lib.rs
Normal file
@@ -0,0 +1,7 @@
|
||||
/*!
|
||||
TODO
|
||||
*/
|
||||
|
||||
#![allow(warnings)]
|
||||
|
||||
mod literal;
|
1040
crates/index/src/literal.rs
Normal file
1040
crates/index/src/literal.rs
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-matcher"
|
||||
version = "0.1.6" #:version
|
||||
version = "0.1.5" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
A trait for regular expressions, with a focus on line oriented search.
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-pcre2"
|
||||
version = "0.1.6" #:version
|
||||
version = "0.1.5" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Use PCRE2 with the 'grep' crate.
|
||||
@@ -14,6 +14,5 @@ license = "Unlicense OR MIT"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||
log = "0.4.19"
|
||||
pcre2 = "0.2.4"
|
||||
grep-matcher = { version = "0.1.5", path = "../matcher" }
|
||||
pcre2 = "0.2.3"
|
||||
|
@@ -11,8 +11,6 @@ pub struct RegexMatcherBuilder {
|
||||
builder: RegexBuilder,
|
||||
case_smart: bool,
|
||||
word: bool,
|
||||
fixed_strings: bool,
|
||||
whole_line: bool,
|
||||
}
|
||||
|
||||
impl RegexMatcherBuilder {
|
||||
@@ -22,8 +20,6 @@ impl RegexMatcherBuilder {
|
||||
builder: RegexBuilder::new(),
|
||||
case_smart: false,
|
||||
word: false,
|
||||
fixed_strings: false,
|
||||
whole_line: false,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -33,40 +29,17 @@ impl RegexMatcherBuilder {
|
||||
/// If there was a problem compiling the pattern, then an error is
|
||||
/// returned.
|
||||
pub fn build(&self, pattern: &str) -> Result<RegexMatcher, Error> {
|
||||
self.build_many(&[pattern])
|
||||
}
|
||||
|
||||
/// Compile all of the given patterns into a single regex that matches when
|
||||
/// at least one of the patterns matches.
|
||||
///
|
||||
/// If there was a problem building the regex, then an error is returned.
|
||||
pub fn build_many<P: AsRef<str>>(
|
||||
&self,
|
||||
patterns: &[P],
|
||||
) -> Result<RegexMatcher, Error> {
|
||||
let mut builder = self.builder.clone();
|
||||
let mut pats = Vec::with_capacity(patterns.len());
|
||||
for p in patterns.iter() {
|
||||
pats.push(if self.fixed_strings {
|
||||
format!("(?:{})", pcre2::escape(p.as_ref()))
|
||||
} else {
|
||||
format!("(?:{})", p.as_ref())
|
||||
});
|
||||
}
|
||||
let mut singlepat = pats.join("|");
|
||||
if self.case_smart && !has_uppercase_literal(&singlepat) {
|
||||
if self.case_smart && !has_uppercase_literal(pattern) {
|
||||
builder.caseless(true);
|
||||
}
|
||||
if self.whole_line {
|
||||
singlepat = format!(r"(?m:^)(?:{})(?m:$)", singlepat);
|
||||
} else if self.word {
|
||||
// We make this option exclusive with whole_line because when
|
||||
// whole_line is enabled, all matches necessary fall on word
|
||||
// boundaries. So this extra goop is strictly redundant.
|
||||
singlepat = format!(r"(?<!\w)(?:{})(?!\w)", singlepat);
|
||||
}
|
||||
log::trace!("final regex: {:?}", singlepat);
|
||||
builder.build(&singlepat).map_err(Error::regex).map(|regex| {
|
||||
let res = if self.word {
|
||||
let pattern = format!(r"(?<!\w)(?:{})(?!\w)", pattern);
|
||||
builder.build(&pattern)
|
||||
} else {
|
||||
builder.build(pattern)
|
||||
};
|
||||
res.map_err(Error::regex).map(|regex| {
|
||||
let mut names = HashMap::new();
|
||||
for (i, name) in regex.capture_names().iter().enumerate() {
|
||||
if let Some(ref name) = *name {
|
||||
@@ -171,21 +144,6 @@ impl RegexMatcherBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether the patterns should be treated as literal strings or not. When
|
||||
/// this is active, all characters, including ones that would normally be
|
||||
/// special regex meta characters, are matched literally.
|
||||
pub fn fixed_strings(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.fixed_strings = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether each pattern should match the entire line or not. This is
|
||||
/// equivalent to surrounding the pattern with `(?m:^)` and `(?m:$)`.
|
||||
pub fn whole_line(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.whole_line = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable Unicode matching mode.
|
||||
///
|
||||
/// When enabled, the following patterns become Unicode aware: `\b`, `\B`,
|
||||
@@ -220,22 +178,23 @@ impl RegexMatcherBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// This is now deprecated and is a no-op.
|
||||
/// When UTF matching mode is enabled, this will disable the UTF checking
|
||||
/// that PCRE2 will normally perform automatically. If UTF matching mode
|
||||
/// is not enabled, then this has no effect.
|
||||
///
|
||||
/// Previously, this option permitted disabling PCRE2's UTF-8 validity
|
||||
/// check, which could result in undefined behavior if the haystack was
|
||||
/// not valid UTF-8. But PCRE2 introduced a new option, `PCRE2_MATCH_INVALID_UTF`,
|
||||
/// in 10.34 which this crate always sets. When this option is enabled,
|
||||
/// PCRE2 claims to not have undefined behavior when the haystack is
|
||||
/// invalid UTF-8.
|
||||
/// UTF checking is enabled by default when UTF matching mode is enabled.
|
||||
/// If UTF matching mode is enabled and UTF checking is enabled, then PCRE2
|
||||
/// will return an error if you attempt to search a subject string that is
|
||||
/// not valid UTF-8.
|
||||
///
|
||||
/// Therefore, disabling the UTF-8 check is not something that is exposed
|
||||
/// by this crate.
|
||||
#[deprecated(
|
||||
since = "0.2.4",
|
||||
note = "now a no-op due to new PCRE2 features"
|
||||
)]
|
||||
pub fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder {
|
||||
/// # Safety
|
||||
///
|
||||
/// It is undefined behavior to disable the UTF check in UTF matching mode
|
||||
/// and search a subject string that is not valid UTF-8. When the UTF check
|
||||
/// is disabled, callers must guarantee that the subject string is valid
|
||||
/// UTF-8.
|
||||
pub unsafe fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder {
|
||||
self.builder.disable_utf_check();
|
||||
self
|
||||
}
|
||||
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-printer"
|
||||
version = "0.1.7" #:version
|
||||
version = "0.1.6" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
An implementation of the grep crate's Sink trait that provides standard
|
||||
@@ -19,13 +19,13 @@ default = ["serde1"]
|
||||
serde1 = ["base64", "serde", "serde_json"]
|
||||
|
||||
[dependencies]
|
||||
base64 = { version = "0.20.0", optional = true }
|
||||
bstr = "1.6.0"
|
||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||
grep-searcher = { version = "0.1.11", path = "../searcher" }
|
||||
base64 = { version = "0.13.0", optional = true }
|
||||
bstr = "0.2.0"
|
||||
grep-matcher = { version = "0.1.5", path = "../matcher" }
|
||||
grep-searcher = { version = "0.1.8", path = "../searcher" }
|
||||
termcolor = "1.0.4"
|
||||
serde = { version = "1.0.77", optional = true, features = ["derive"] }
|
||||
serde_json = { version = "1.0.27", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
grep-regex = { version = "0.1.11", path = "../regex" }
|
||||
grep-regex = { version = "0.1.9", path = "../regex" }
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-regex"
|
||||
version = "0.1.11" #:version
|
||||
version = "0.1.10" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Use Rust's regex library with the 'grep' crate.
|
||||
@@ -11,12 +11,13 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/regex"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "search", "pattern", "line"]
|
||||
license = "Unlicense OR MIT"
|
||||
edition = "2021"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
aho-corasick = "1.0.2"
|
||||
bstr = "1.6.0"
|
||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||
log = "0.4.19"
|
||||
regex-automata = { version = "0.3.0" }
|
||||
regex-syntax = "0.7.2"
|
||||
aho-corasick = "0.7.3"
|
||||
bstr = "0.2.10"
|
||||
grep-matcher = { version = "0.1.5", path = "../matcher" }
|
||||
log = "0.4.5"
|
||||
regex = "1.1"
|
||||
regex-syntax = "0.6.5"
|
||||
thread_local = "1.1.2"
|
||||
|
@@ -1,13 +1,17 @@
|
||||
use regex_syntax::ast::parse::Parser;
|
||||
use regex_syntax::ast::{self, Ast};
|
||||
|
||||
/// The results of analyzing AST of a regular expression (e.g., for supporting
|
||||
/// smart case).
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct AstAnalysis {
|
||||
pub struct AstAnalysis {
|
||||
/// True if and only if a literal uppercase character occurs in the regex.
|
||||
any_uppercase: bool,
|
||||
/// True if and only if the regex contains any literal at all.
|
||||
any_literal: bool,
|
||||
/// True if and only if the regex consists entirely of a literal and no
|
||||
/// other special regex characters.
|
||||
all_verbatim_literal: bool,
|
||||
}
|
||||
|
||||
impl AstAnalysis {
|
||||
@@ -15,16 +19,16 @@ impl AstAnalysis {
|
||||
///
|
||||
/// If `pattern` is not a valid regular expression, then `None` is
|
||||
/// returned.
|
||||
#[cfg(test)]
|
||||
pub(crate) fn from_pattern(pattern: &str) -> Option<AstAnalysis> {
|
||||
regex_syntax::ast::parse::Parser::new()
|
||||
#[allow(dead_code)]
|
||||
pub fn from_pattern(pattern: &str) -> Option<AstAnalysis> {
|
||||
Parser::new()
|
||||
.parse(pattern)
|
||||
.map(|ast| AstAnalysis::from_ast(&ast))
|
||||
.ok()
|
||||
}
|
||||
|
||||
/// Perform an AST analysis given the AST.
|
||||
pub(crate) fn from_ast(ast: &Ast) -> AstAnalysis {
|
||||
pub fn from_ast(ast: &Ast) -> AstAnalysis {
|
||||
let mut analysis = AstAnalysis::new();
|
||||
analysis.from_ast_impl(ast);
|
||||
analysis
|
||||
@@ -36,7 +40,7 @@ impl AstAnalysis {
|
||||
/// For example, a pattern like `\pL` contains no uppercase literals,
|
||||
/// even though `L` is uppercase and the `\pL` class contains uppercase
|
||||
/// characters.
|
||||
pub(crate) fn any_uppercase(&self) -> bool {
|
||||
pub fn any_uppercase(&self) -> bool {
|
||||
self.any_uppercase
|
||||
}
|
||||
|
||||
@@ -44,13 +48,32 @@ impl AstAnalysis {
|
||||
///
|
||||
/// For example, a pattern like `\pL` reports `false`, but a pattern like
|
||||
/// `\pLfoo` reports `true`.
|
||||
pub(crate) fn any_literal(&self) -> bool {
|
||||
pub fn any_literal(&self) -> bool {
|
||||
self.any_literal
|
||||
}
|
||||
|
||||
/// Returns true if and only if the entire pattern is a verbatim literal
|
||||
/// with no special meta characters.
|
||||
///
|
||||
/// When this is true, then the pattern satisfies the following law:
|
||||
/// `escape(pattern) == pattern`. Notable examples where this returns
|
||||
/// `false` include patterns like `a\u0061` even though `\u0061` is just
|
||||
/// a literal `a`.
|
||||
///
|
||||
/// The purpose of this flag is to determine whether the patterns can be
|
||||
/// given to non-regex substring search algorithms as-is.
|
||||
#[allow(dead_code)]
|
||||
pub fn all_verbatim_literal(&self) -> bool {
|
||||
self.all_verbatim_literal
|
||||
}
|
||||
|
||||
/// Creates a new `AstAnalysis` value with an initial configuration.
|
||||
fn new() -> AstAnalysis {
|
||||
AstAnalysis { any_uppercase: false, any_literal: false }
|
||||
AstAnalysis {
|
||||
any_uppercase: false,
|
||||
any_literal: false,
|
||||
all_verbatim_literal: true,
|
||||
}
|
||||
}
|
||||
|
||||
fn from_ast_impl(&mut self, ast: &Ast) {
|
||||
@@ -63,20 +86,26 @@ impl AstAnalysis {
|
||||
| Ast::Dot(_)
|
||||
| Ast::Assertion(_)
|
||||
| Ast::Class(ast::Class::Unicode(_))
|
||||
| Ast::Class(ast::Class::Perl(_)) => {}
|
||||
| Ast::Class(ast::Class::Perl(_)) => {
|
||||
self.all_verbatim_literal = false;
|
||||
}
|
||||
Ast::Literal(ref x) => {
|
||||
self.from_ast_literal(x);
|
||||
}
|
||||
Ast::Class(ast::Class::Bracketed(ref x)) => {
|
||||
self.all_verbatim_literal = false;
|
||||
self.from_ast_class_set(&x.kind);
|
||||
}
|
||||
Ast::Repetition(ref x) => {
|
||||
self.all_verbatim_literal = false;
|
||||
self.from_ast_impl(&x.ast);
|
||||
}
|
||||
Ast::Group(ref x) => {
|
||||
self.all_verbatim_literal = false;
|
||||
self.from_ast_impl(&x.ast);
|
||||
}
|
||||
Ast::Alternation(ref alt) => {
|
||||
self.all_verbatim_literal = false;
|
||||
for x in &alt.asts {
|
||||
self.from_ast_impl(x);
|
||||
}
|
||||
@@ -132,6 +161,9 @@ impl AstAnalysis {
|
||||
}
|
||||
|
||||
fn from_ast_literal(&mut self, ast: &ast::Literal) {
|
||||
if ast.kind != ast::LiteralKind::Verbatim {
|
||||
self.all_verbatim_literal = false;
|
||||
}
|
||||
self.any_literal = true;
|
||||
self.any_uppercase = self.any_uppercase || ast.c.is_uppercase();
|
||||
}
|
||||
@@ -139,7 +171,7 @@ impl AstAnalysis {
|
||||
/// Returns true if and only if the attributes can never change no matter
|
||||
/// what other AST it might see.
|
||||
fn done(&self) -> bool {
|
||||
self.any_uppercase && self.any_literal
|
||||
self.any_uppercase && self.any_literal && !self.all_verbatim_literal
|
||||
}
|
||||
}
|
||||
|
||||
@@ -156,61 +188,76 @@ mod tests {
|
||||
let x = analysis("");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(!x.any_literal);
|
||||
assert!(x.all_verbatim_literal);
|
||||
|
||||
let x = analysis("foo");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(x.all_verbatim_literal);
|
||||
|
||||
let x = analysis("Foo");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(x.all_verbatim_literal);
|
||||
|
||||
let x = analysis("foO");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo\\");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo\w");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo\S");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo\p{Ll}");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo[a-z]");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo[A-Z]");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo[\S\t]");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"foo\\S");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"\p{Ll}");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(!x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"aBc\w");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
|
||||
let x = analysis(r"a\u0061");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
assert!(!x.all_verbatim_literal);
|
||||
}
|
||||
}
|
||||
|
@@ -1,16 +1,15 @@
|
||||
use {
|
||||
grep_matcher::{ByteSet, LineTerminator},
|
||||
regex_automata::meta::Regex,
|
||||
regex_syntax::{
|
||||
ast,
|
||||
hir::{self, Hir, HirKind},
|
||||
},
|
||||
};
|
||||
use grep_matcher::{ByteSet, LineTerminator};
|
||||
use regex::bytes::{Regex, RegexBuilder};
|
||||
use regex_syntax::ast::{self, Ast};
|
||||
use regex_syntax::hir::{self, Hir};
|
||||
|
||||
use crate::{
|
||||
ast::AstAnalysis, error::Error, non_matching::non_matching_bytes,
|
||||
strip::strip_from_match,
|
||||
};
|
||||
use crate::ast::AstAnalysis;
|
||||
use crate::crlf::crlfify;
|
||||
use crate::error::Error;
|
||||
use crate::literal::LiteralSets;
|
||||
use crate::multi::alternation_literals;
|
||||
use crate::non_matching::non_matching_bytes;
|
||||
use crate::strip::strip_from_match;
|
||||
|
||||
/// Config represents the configuration of a regex matcher in this crate.
|
||||
/// The configuration is itself a rough combination of the knobs found in
|
||||
@@ -22,23 +21,21 @@ use crate::{
|
||||
/// configuration which generated it, and provides transformation on that HIR
|
||||
/// such that the configuration is preserved.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct Config {
|
||||
pub(crate) case_insensitive: bool,
|
||||
pub(crate) case_smart: bool,
|
||||
pub(crate) multi_line: bool,
|
||||
pub(crate) dot_matches_new_line: bool,
|
||||
pub(crate) swap_greed: bool,
|
||||
pub(crate) ignore_whitespace: bool,
|
||||
pub(crate) unicode: bool,
|
||||
pub(crate) octal: bool,
|
||||
pub(crate) size_limit: usize,
|
||||
pub(crate) dfa_size_limit: usize,
|
||||
pub(crate) nest_limit: u32,
|
||||
pub(crate) line_terminator: Option<LineTerminator>,
|
||||
pub(crate) crlf: bool,
|
||||
pub(crate) word: bool,
|
||||
pub(crate) fixed_strings: bool,
|
||||
pub(crate) whole_line: bool,
|
||||
pub struct Config {
|
||||
pub case_insensitive: bool,
|
||||
pub case_smart: bool,
|
||||
pub multi_line: bool,
|
||||
pub dot_matches_new_line: bool,
|
||||
pub swap_greed: bool,
|
||||
pub ignore_whitespace: bool,
|
||||
pub unicode: bool,
|
||||
pub octal: bool,
|
||||
pub size_limit: usize,
|
||||
pub dfa_size_limit: usize,
|
||||
pub nest_limit: u32,
|
||||
pub line_terminator: Option<LineTerminator>,
|
||||
pub crlf: bool,
|
||||
pub word: bool,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
@@ -53,28 +50,47 @@ impl Default for Config {
|
||||
unicode: true,
|
||||
octal: false,
|
||||
// These size limits are much bigger than what's in the regex
|
||||
// crate by default.
|
||||
// crate.
|
||||
size_limit: 100 * (1 << 20),
|
||||
dfa_size_limit: 1000 * (1 << 20),
|
||||
nest_limit: 250,
|
||||
line_terminator: None,
|
||||
crlf: false,
|
||||
word: false,
|
||||
fixed_strings: false,
|
||||
whole_line: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Config {
|
||||
/// Use this configuration to build an HIR from the given patterns. The HIR
|
||||
/// returned corresponds to a single regex that is an alternation of the
|
||||
/// patterns given.
|
||||
pub(crate) fn build_many<P: AsRef<str>>(
|
||||
&self,
|
||||
patterns: &[P],
|
||||
) -> Result<ConfiguredHIR, Error> {
|
||||
ConfiguredHIR::new(self.clone(), patterns)
|
||||
/// Parse the given pattern and returned its HIR expression along with
|
||||
/// the current configuration.
|
||||
///
|
||||
/// If there was a problem parsing the given expression then an error
|
||||
/// is returned.
|
||||
pub fn hir(&self, pattern: &str) -> Result<ConfiguredHIR, Error> {
|
||||
let ast = self.ast(pattern)?;
|
||||
let analysis = self.analysis(&ast)?;
|
||||
let expr = hir::translate::TranslatorBuilder::new()
|
||||
.allow_invalid_utf8(true)
|
||||
.case_insensitive(self.is_case_insensitive(&analysis))
|
||||
.multi_line(self.multi_line)
|
||||
.dot_matches_new_line(self.dot_matches_new_line)
|
||||
.swap_greed(self.swap_greed)
|
||||
.unicode(self.unicode)
|
||||
.build()
|
||||
.translate(pattern, &ast)
|
||||
.map_err(Error::regex)?;
|
||||
let expr = match self.line_terminator {
|
||||
None => expr,
|
||||
Some(line_term) => strip_from_match(expr, line_term)?,
|
||||
};
|
||||
Ok(ConfiguredHIR {
|
||||
original: pattern.to_string(),
|
||||
config: self.clone(),
|
||||
analysis,
|
||||
// If CRLF mode is enabled, replace `$` with `(?:\r?$)`.
|
||||
expr: if self.crlf { crlfify(expr) } else { expr },
|
||||
})
|
||||
}
|
||||
|
||||
/// Accounting for the `smart_case` config knob, return true if and only if
|
||||
@@ -89,55 +105,35 @@ impl Config {
|
||||
analysis.any_literal() && !analysis.any_uppercase()
|
||||
}
|
||||
|
||||
/// Returns whether the given patterns should be treated as "fixed strings"
|
||||
/// literals. This is different from just querying the `fixed_strings` knob
|
||||
/// in that if the knob is false, this will still return true in some cases
|
||||
/// if the patterns are themselves indistinguishable from literals.
|
||||
/// Returns true if and only if this config is simple enough such that
|
||||
/// if the pattern is a simple alternation of literals, then it can be
|
||||
/// constructed via a plain Aho-Corasick automaton.
|
||||
///
|
||||
/// The main idea here is that if this returns true, then it is safe
|
||||
/// to build an `regex_syntax::hir::Hir` value directly from the given
|
||||
/// patterns as an alternation of `hir::Literal` values.
|
||||
fn is_fixed_strings<P: AsRef<str>>(&self, patterns: &[P]) -> bool {
|
||||
// When these are enabled, we really need to parse the patterns and
|
||||
// let them go through the standard HIR translation process in order
|
||||
// for case folding transforms to be applied.
|
||||
if self.case_insensitive || self.case_smart {
|
||||
return false;
|
||||
}
|
||||
// Even if whole_line or word is enabled, both of those things can
|
||||
// be implemented by wrapping the Hir generated by an alternation of
|
||||
// fixed string literals. So for here at least, we don't care about the
|
||||
// word or whole_line settings.
|
||||
if self.fixed_strings {
|
||||
// ... but if any literal contains a line terminator, then we've
|
||||
// got to bail out because this will ultimately result in an error.
|
||||
if let Some(lineterm) = self.line_terminator {
|
||||
for p in patterns.iter() {
|
||||
if has_line_terminator(lineterm, p.as_ref()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
// In this case, the only way we can hand construct the Hir is if none
|
||||
// of the patterns contain meta characters. If they do, then we need to
|
||||
// send them through the standard parsing/translation process.
|
||||
for p in patterns.iter() {
|
||||
let p = p.as_ref();
|
||||
if p.chars().any(regex_syntax::is_meta_character) {
|
||||
return false;
|
||||
}
|
||||
// Same deal as when fixed_strings is set above. If the pattern has
|
||||
// a line terminator anywhere, then we need to bail out and let
|
||||
// an error occur.
|
||||
if let Some(lineterm) = self.line_terminator {
|
||||
if has_line_terminator(lineterm, p) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
true
|
||||
/// Note that it is OK to return true even when settings like `multi_line`
|
||||
/// are enabled, since if multi-line can impact the match semantics of a
|
||||
/// regex, then it is by definition not a simple alternation of literals.
|
||||
pub fn can_plain_aho_corasick(&self) -> bool {
|
||||
!self.word && !self.case_insensitive && !self.case_smart
|
||||
}
|
||||
|
||||
/// Perform analysis on the AST of this pattern.
|
||||
///
|
||||
/// This returns an error if the given pattern failed to parse.
|
||||
fn analysis(&self, ast: &Ast) -> Result<AstAnalysis, Error> {
|
||||
Ok(AstAnalysis::from_ast(ast))
|
||||
}
|
||||
|
||||
/// Parse the given pattern into its abstract syntax.
|
||||
///
|
||||
/// This returns an error if the given pattern failed to parse.
|
||||
fn ast(&self, pattern: &str) -> Result<Ast, Error> {
|
||||
ast::parse::ParserBuilder::new()
|
||||
.nest_limit(self.nest_limit)
|
||||
.octal(self.octal)
|
||||
.ignore_whitespace(self.ignore_whitespace)
|
||||
.build()
|
||||
.parse(pattern)
|
||||
.map_err(Error::regex)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -153,268 +149,170 @@ impl Config {
|
||||
/// size limits set on the configured HIR will be propagated out to any
|
||||
/// subsequently constructed HIR or regular expression.
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct ConfiguredHIR {
|
||||
pub struct ConfiguredHIR {
|
||||
original: String,
|
||||
config: Config,
|
||||
hir: Hir,
|
||||
analysis: AstAnalysis,
|
||||
expr: Hir,
|
||||
}
|
||||
|
||||
impl ConfiguredHIR {
|
||||
/// Parse the given patterns into a single HIR expression that represents
|
||||
/// an alternation of the patterns given.
|
||||
fn new<P: AsRef<str>>(
|
||||
config: Config,
|
||||
patterns: &[P],
|
||||
) -> Result<ConfiguredHIR, Error> {
|
||||
let hir = if config.is_fixed_strings(patterns) {
|
||||
let mut alts = vec![];
|
||||
for p in patterns.iter() {
|
||||
alts.push(Hir::literal(p.as_ref().as_bytes()));
|
||||
}
|
||||
log::debug!(
|
||||
"assembling HIR from {} fixed string literals",
|
||||
alts.len()
|
||||
);
|
||||
let hir = Hir::alternation(alts);
|
||||
hir
|
||||
} else {
|
||||
let mut alts = vec![];
|
||||
for p in patterns.iter() {
|
||||
alts.push(if config.fixed_strings {
|
||||
format!("(?:{})", regex_syntax::escape(p.as_ref()))
|
||||
} else {
|
||||
format!("(?:{})", p.as_ref())
|
||||
});
|
||||
}
|
||||
let pattern = alts.join("|");
|
||||
let ast = ast::parse::ParserBuilder::new()
|
||||
.nest_limit(config.nest_limit)
|
||||
.octal(config.octal)
|
||||
.ignore_whitespace(config.ignore_whitespace)
|
||||
.build()
|
||||
.parse(&pattern)
|
||||
.map_err(Error::generic)?;
|
||||
let analysis = AstAnalysis::from_ast(&ast);
|
||||
let mut hir = hir::translate::TranslatorBuilder::new()
|
||||
.utf8(false)
|
||||
.case_insensitive(config.is_case_insensitive(&analysis))
|
||||
.multi_line(config.multi_line)
|
||||
.dot_matches_new_line(config.dot_matches_new_line)
|
||||
.crlf(config.crlf)
|
||||
.swap_greed(config.swap_greed)
|
||||
.unicode(config.unicode)
|
||||
.build()
|
||||
.translate(&pattern, &ast)
|
||||
.map_err(Error::generic)?;
|
||||
// We don't need to do this for the fixed-strings case above
|
||||
// because is_fixed_strings will return false if any pattern
|
||||
// contains a line terminator. Therefore, we don't need to strip
|
||||
// it.
|
||||
//
|
||||
// We go to some pains to avoid doing this in the fixed-strings
|
||||
// case because this can result in building a new HIR when ripgrep
|
||||
// is given a huge set of literals to search for. And this can
|
||||
// actually take a little time. It's not huge, but it's noticeable.
|
||||
hir = match config.line_terminator {
|
||||
None => hir,
|
||||
Some(line_term) => strip_from_match(hir, line_term)?,
|
||||
};
|
||||
hir
|
||||
};
|
||||
Ok(ConfiguredHIR { config, hir })
|
||||
}
|
||||
|
||||
/// Return a reference to the underlying configuration.
|
||||
pub(crate) fn config(&self) -> &Config {
|
||||
/// Return the configuration for this HIR expression.
|
||||
pub fn config(&self) -> &Config {
|
||||
&self.config
|
||||
}
|
||||
|
||||
/// Return a reference to the underyling HIR.
|
||||
pub(crate) fn hir(&self) -> &Hir {
|
||||
&self.hir
|
||||
}
|
||||
|
||||
/// Convert this HIR to a regex that can be used for matching.
|
||||
pub(crate) fn to_regex(&self) -> Result<Regex, Error> {
|
||||
let meta = Regex::config()
|
||||
.utf8_empty(false)
|
||||
.nfa_size_limit(Some(self.config.size_limit))
|
||||
// We don't expose a knob for this because the one-pass DFA is
|
||||
// usually not a perf bottleneck for ripgrep. But we give it some
|
||||
// extra room than the default.
|
||||
.onepass_size_limit(Some(10 * (1 << 20)))
|
||||
// Same deal here. The default limit for full DFAs is VERY small,
|
||||
// but with ripgrep we can afford to spend a bit more time on
|
||||
// building them I think.
|
||||
.dfa_size_limit(Some(1 * (1 << 20)))
|
||||
.dfa_state_limit(Some(1_000))
|
||||
.hybrid_cache_capacity(self.config.dfa_size_limit);
|
||||
Regex::builder()
|
||||
.configure(meta)
|
||||
.build_from_hir(&self.hir)
|
||||
.map_err(Error::regex)
|
||||
}
|
||||
|
||||
/// Compute the set of non-matching bytes for this HIR expression.
|
||||
pub(crate) fn non_matching_bytes(&self) -> ByteSet {
|
||||
non_matching_bytes(&self.hir)
|
||||
pub fn non_matching_bytes(&self) -> ByteSet {
|
||||
non_matching_bytes(&self.expr)
|
||||
}
|
||||
|
||||
/// Returns true if and only if this regex needs to have its match offsets
|
||||
/// tweaked because of CRLF support. Specifically, this occurs when the
|
||||
/// CRLF hack is enabled and the regex is line anchored at the end. In
|
||||
/// this case, matches that end with a `\r` have the `\r` stripped.
|
||||
pub fn needs_crlf_stripped(&self) -> bool {
|
||||
self.config.crlf && self.expr.is_line_anchored_end()
|
||||
}
|
||||
|
||||
/// Returns the line terminator configured on this expression.
|
||||
///
|
||||
/// When we have beginning/end anchors (NOT line anchors), the fast line
|
||||
/// searching path isn't quite correct. Or at least, doesn't match the slow
|
||||
/// path. Namely, the slow path strips line terminators while the fast path
|
||||
/// does not. Since '$' (when multi-line mode is disabled) doesn't match at
|
||||
/// line boundaries, the existence of a line terminator might cause it to
|
||||
/// not match when it otherwise would with the line terminator stripped.
|
||||
/// searching path isn't quite correct. Or at least, doesn't match the
|
||||
/// slow path. Namely, the slow path strips line terminators while the
|
||||
/// fast path does not. Since '$' (when multi-line mode is disabled)
|
||||
/// doesn't match at line boundaries, the existence of a line terminator
|
||||
/// might cause it to not match when it otherwise would with the line
|
||||
/// terminator stripped.
|
||||
///
|
||||
/// Since searching with text anchors is exceptionally rare in the context
|
||||
/// of line oriented searching (multi-line mode is basically always
|
||||
/// enabled), we just disable this optimization when there are text
|
||||
/// anchors. We disable it by not returning a line terminator, since
|
||||
/// Since searching with text anchors is exceptionally rare in the
|
||||
/// context of line oriented searching (multi-line mode is basically
|
||||
/// always enabled), we just disable this optimization when there are
|
||||
/// text anchors. We disable it by not returning a line terminator, since
|
||||
/// without a line terminator, the fast search path can't be executed.
|
||||
///
|
||||
/// Actually, the above is no longer quite correct. Later on, another
|
||||
/// optimization was added where if the line terminator was in the set of
|
||||
/// bytes that was guaranteed to never be part of a match, then the higher
|
||||
/// level search infrastructure assumes that the fast line-by-line search
|
||||
/// path can still be taken. This optimization applies when multi-line
|
||||
/// search (not multi-line mode) is enabled. In that case, there is no
|
||||
/// configured line terminator since the regex is permitted to match a
|
||||
/// line terminator. But if the regex is guaranteed to never match across
|
||||
/// multiple lines despite multi-line search being requested, we can still
|
||||
/// do the faster and more flexible line-by-line search. This is why the
|
||||
/// non-matching extraction routine removes `\n` when `\A` and `\z` are
|
||||
/// present even though that's not quite correct...
|
||||
///
|
||||
/// See: <https://github.com/BurntSushi/ripgrep/issues/2260>
|
||||
pub(crate) fn line_terminator(&self) -> Option<LineTerminator> {
|
||||
if self.hir.properties().look_set().contains_anchor_haystack() {
|
||||
pub fn line_terminator(&self) -> Option<LineTerminator> {
|
||||
if self.is_any_anchored() {
|
||||
None
|
||||
} else {
|
||||
self.config.line_terminator
|
||||
}
|
||||
}
|
||||
|
||||
/// Turns this configured HIR into one that only matches when both sides of
|
||||
/// the match correspond to a word boundary.
|
||||
/// Returns true if and only if the underlying HIR has any text anchors.
|
||||
fn is_any_anchored(&self) -> bool {
|
||||
self.expr.is_any_anchored_start() || self.expr.is_any_anchored_end()
|
||||
}
|
||||
|
||||
/// Builds a regular expression from this HIR expression.
|
||||
pub fn regex(&self) -> Result<Regex, Error> {
|
||||
self.pattern_to_regex(&self.expr.to_string())
|
||||
}
|
||||
|
||||
/// If this HIR corresponds to an alternation of literals with no
|
||||
/// capturing groups, then this returns those literals.
|
||||
pub fn alternation_literals(&self) -> Option<Vec<Vec<u8>>> {
|
||||
if !self.config.can_plain_aho_corasick() {
|
||||
return None;
|
||||
}
|
||||
alternation_literals(&self.expr)
|
||||
}
|
||||
|
||||
/// Applies the given function to the concrete syntax of this HIR and then
|
||||
/// generates a new HIR based on the result of the function in a way that
|
||||
/// preserves the configuration.
|
||||
///
|
||||
/// Note that the HIR returned is like turning `pat` into
|
||||
/// `(?m:^|\W)(pat)(?m:$|\W)`. That is, the true match is at capture group
|
||||
/// `1` and not `0`.
|
||||
pub(crate) fn into_word(self) -> Result<ConfiguredHIR, Error> {
|
||||
// In theory building the HIR for \W should never fail, but there are
|
||||
// likely some pathological cases (particularly with respect to certain
|
||||
// values of limits) where it could in theory fail.
|
||||
let non_word = {
|
||||
let mut config = self.config.clone();
|
||||
config.fixed_strings = false;
|
||||
ConfiguredHIR::new(config, &[r"\W"])?
|
||||
};
|
||||
let line_anchor_start = Hir::look(self.line_anchor_start());
|
||||
let line_anchor_end = Hir::look(self.line_anchor_end());
|
||||
let hir = Hir::concat(vec![
|
||||
Hir::alternation(vec![line_anchor_start, non_word.hir.clone()]),
|
||||
Hir::capture(hir::Capture {
|
||||
index: 1,
|
||||
name: None,
|
||||
sub: Box::new(renumber_capture_indices(self.hir)?),
|
||||
}),
|
||||
Hir::alternation(vec![non_word.hir, line_anchor_end]),
|
||||
]);
|
||||
Ok(ConfiguredHIR { config: self.config, hir })
|
||||
/// For example, this can be used to wrap a user provided regular
|
||||
/// expression with additional semantics. e.g., See the `WordMatcher`.
|
||||
pub fn with_pattern<F: FnMut(&str) -> String>(
|
||||
&self,
|
||||
mut f: F,
|
||||
) -> Result<ConfiguredHIR, Error> {
|
||||
self.pattern_to_hir(&f(&self.expr.to_string()))
|
||||
}
|
||||
|
||||
/// Turns this configured HIR into an equivalent one, but where it must
|
||||
/// match at the start and end of a line.
|
||||
pub(crate) fn into_whole_line(self) -> ConfiguredHIR {
|
||||
let line_anchor_start = Hir::look(self.line_anchor_start());
|
||||
let line_anchor_end = Hir::look(self.line_anchor_end());
|
||||
let hir =
|
||||
Hir::concat(vec![line_anchor_start, self.hir, line_anchor_end]);
|
||||
ConfiguredHIR { config: self.config, hir }
|
||||
}
|
||||
|
||||
/// Turns this configured HIR into an equivalent one, but where it must
|
||||
/// match at the start and end of the haystack.
|
||||
pub(crate) fn into_anchored(self) -> ConfiguredHIR {
|
||||
let hir = Hir::concat(vec![
|
||||
Hir::look(hir::Look::Start),
|
||||
self.hir,
|
||||
Hir::look(hir::Look::End),
|
||||
]);
|
||||
ConfiguredHIR { config: self.config, hir }
|
||||
}
|
||||
|
||||
/// Returns the "start line" anchor for this configuration.
|
||||
fn line_anchor_start(&self) -> hir::Look {
|
||||
if self.config.crlf {
|
||||
hir::Look::StartCRLF
|
||||
} else {
|
||||
hir::Look::StartLF
|
||||
/// If the current configuration has a line terminator set and if useful
|
||||
/// literals could be extracted, then a regular expression matching those
|
||||
/// literals is returned. If no line terminator is set, then `None` is
|
||||
/// returned.
|
||||
///
|
||||
/// If compiling the resulting regular expression failed, then an error
|
||||
/// is returned.
|
||||
///
|
||||
/// This method only returns something when a line terminator is set
|
||||
/// because matches from this regex are generally candidates that must be
|
||||
/// confirmed before reporting a match. When performing a line oriented
|
||||
/// search, confirmation is easy: just extend the candidate match to its
|
||||
/// respective line boundaries and then re-search that line for a full
|
||||
/// match. This only works when the line terminator is set because the line
|
||||
/// terminator setting guarantees that the regex itself can never match
|
||||
/// through the line terminator byte.
|
||||
pub fn fast_line_regex(&self) -> Result<Option<Regex>, Error> {
|
||||
if self.config.line_terminator.is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
match LiteralSets::new(&self.expr).one_regex(self.config.word) {
|
||||
None => Ok(None),
|
||||
Some(pattern) => self.pattern_to_regex(&pattern).map(Some),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the "end line" anchor for this configuration.
|
||||
fn line_anchor_end(&self) -> hir::Look {
|
||||
if self.config.crlf {
|
||||
hir::Look::EndCRLF
|
||||
} else {
|
||||
hir::Look::EndLF
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// This increments the index of every capture group in the given hir by 1. If
|
||||
/// any increment results in an overflow, then an error is returned.
|
||||
fn renumber_capture_indices(hir: Hir) -> Result<Hir, Error> {
|
||||
Ok(match hir.into_kind() {
|
||||
HirKind::Empty => Hir::empty(),
|
||||
HirKind::Literal(hir::Literal(lit)) => Hir::literal(lit),
|
||||
HirKind::Class(cls) => Hir::class(cls),
|
||||
HirKind::Look(x) => Hir::look(x),
|
||||
HirKind::Repetition(mut x) => {
|
||||
x.sub = Box::new(renumber_capture_indices(*x.sub)?);
|
||||
Hir::repetition(x)
|
||||
}
|
||||
HirKind::Capture(mut cap) => {
|
||||
cap.index = match cap.index.checked_add(1) {
|
||||
Some(index) => index,
|
||||
None => {
|
||||
// This error message kind of sucks, but it's probably
|
||||
// impossible for it to happen. The only way a capture
|
||||
// index can overflow addition is if the regex is huge
|
||||
// (or something else has gone horribly wrong).
|
||||
let msg = "could not renumber capture index, too big";
|
||||
return Err(Error::any(msg));
|
||||
}
|
||||
};
|
||||
cap.sub = Box::new(renumber_capture_indices(*cap.sub)?);
|
||||
Hir::capture(cap)
|
||||
}
|
||||
HirKind::Concat(subs) => {
|
||||
let subs = subs
|
||||
.into_iter()
|
||||
.map(|sub| renumber_capture_indices(sub))
|
||||
.collect::<Result<Vec<Hir>, Error>>()?;
|
||||
Hir::concat(subs)
|
||||
}
|
||||
HirKind::Alternation(subs) => {
|
||||
let subs = subs
|
||||
.into_iter()
|
||||
.map(|sub| renumber_capture_indices(sub))
|
||||
.collect::<Result<Vec<Hir>, Error>>()?;
|
||||
Hir::alternation(subs)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns true if the given literal string contains any byte from the line
|
||||
/// terminator given.
|
||||
fn has_line_terminator(lineterm: LineTerminator, literal: &str) -> bool {
|
||||
if lineterm.is_crlf() {
|
||||
literal.as_bytes().iter().copied().any(|b| b == b'\r' || b == b'\n')
|
||||
} else {
|
||||
literal.as_bytes().iter().copied().any(|b| b == lineterm.as_byte())
|
||||
/// Create a regex from the given pattern using this HIR's configuration.
|
||||
fn pattern_to_regex(&self, pattern: &str) -> Result<Regex, Error> {
|
||||
// The settings we explicitly set here are intentionally a subset
|
||||
// of the settings we have. The key point here is that our HIR
|
||||
// expression is computed with the settings in mind, such that setting
|
||||
// them here could actually lead to unintended behavior. For example,
|
||||
// consider the pattern `(?U)a+`. This will get folded into the HIR
|
||||
// as a non-greedy repetition operator which will in turn get printed
|
||||
// to the concrete syntax as `a+?`, which is correct. But if we
|
||||
// set the `swap_greed` option again, then we'll wind up with `(?U)a+?`
|
||||
// which is equal to `a+` which is not the same as what we were given.
|
||||
//
|
||||
// We also don't need to apply `case_insensitive` since this gets
|
||||
// folded into the HIR and would just cause us to do redundant work.
|
||||
//
|
||||
// Finally, we don't need to set `ignore_whitespace` since the concrete
|
||||
// syntax emitted by the HIR printer never needs it.
|
||||
//
|
||||
// We set the rest of the options. Some of them are important, such as
|
||||
// the size limit, and some of them are necessary to preserve the
|
||||
// intention of the original pattern. For example, the Unicode flag
|
||||
// will impact how the WordMatcher functions, namely, whether its
|
||||
// word boundaries are Unicode aware or not.
|
||||
RegexBuilder::new(&pattern)
|
||||
.nest_limit(self.config.nest_limit)
|
||||
.octal(self.config.octal)
|
||||
.multi_line(self.config.multi_line)
|
||||
.dot_matches_new_line(self.config.dot_matches_new_line)
|
||||
.unicode(self.config.unicode)
|
||||
.size_limit(self.config.size_limit)
|
||||
.dfa_size_limit(self.config.dfa_size_limit)
|
||||
.build()
|
||||
.map_err(Error::regex)
|
||||
}
|
||||
|
||||
/// Create an HIR expression from the given pattern using this HIR's
|
||||
/// configuration.
|
||||
fn pattern_to_hir(&self, pattern: &str) -> Result<ConfiguredHIR, Error> {
|
||||
// See `pattern_to_regex` comment for explanation of why we only set
|
||||
// a subset of knobs here. e.g., `swap_greed` is explicitly left out.
|
||||
let expr = ::regex_syntax::ParserBuilder::new()
|
||||
.nest_limit(self.config.nest_limit)
|
||||
.octal(self.config.octal)
|
||||
.allow_invalid_utf8(true)
|
||||
.multi_line(self.config.multi_line)
|
||||
.dot_matches_new_line(self.config.dot_matches_new_line)
|
||||
.unicode(self.config.unicode)
|
||||
.build()
|
||||
.parse(pattern)
|
||||
.map_err(Error::regex)?;
|
||||
Ok(ConfiguredHIR {
|
||||
original: self.original.clone(),
|
||||
config: self.config.clone(),
|
||||
analysis: self.analysis.clone(),
|
||||
expr,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
189
crates/regex/src/crlf.rs
Normal file
189
crates/regex/src/crlf.rs
Normal file
@@ -0,0 +1,189 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use grep_matcher::{Match, Matcher, NoError};
|
||||
use regex::bytes::Regex;
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
|
||||
use crate::config::ConfiguredHIR;
|
||||
use crate::error::Error;
|
||||
use crate::matcher::RegexCaptures;
|
||||
|
||||
/// A matcher for implementing "word match" semantics.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CRLFMatcher {
|
||||
/// The regex.
|
||||
regex: Regex,
|
||||
/// A map from capture group name to capture group index.
|
||||
names: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl CRLFMatcher {
|
||||
/// Create a new matcher from the given pattern that strips `\r` from the
|
||||
/// end of every match.
|
||||
///
|
||||
/// This panics if the given expression doesn't need its CRLF stripped.
|
||||
pub fn new(expr: &ConfiguredHIR) -> Result<CRLFMatcher, Error> {
|
||||
assert!(expr.needs_crlf_stripped());
|
||||
|
||||
let regex = expr.regex()?;
|
||||
let mut names = HashMap::new();
|
||||
for (i, optional_name) in regex.capture_names().enumerate() {
|
||||
if let Some(name) = optional_name {
|
||||
names.insert(name.to_string(), i.checked_sub(1).unwrap());
|
||||
}
|
||||
}
|
||||
Ok(CRLFMatcher { regex, names })
|
||||
}
|
||||
|
||||
/// Return the underlying regex used by this matcher.
|
||||
pub fn regex(&self) -> &Regex {
|
||||
&self.regex
|
||||
}
|
||||
}
|
||||
|
||||
impl Matcher for CRLFMatcher {
|
||||
type Captures = RegexCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>, NoError> {
|
||||
let m = match self.regex.find_at(haystack, at) {
|
||||
None => return Ok(None),
|
||||
Some(m) => Match::new(m.start(), m.end()),
|
||||
};
|
||||
Ok(Some(adjust_match(haystack, m)))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
|
||||
Ok(RegexCaptures::new(self.regex.capture_locations()))
|
||||
}
|
||||
|
||||
fn capture_count(&self) -> usize {
|
||||
self.regex.captures_len().checked_sub(1).unwrap()
|
||||
}
|
||||
|
||||
fn capture_index(&self, name: &str) -> Option<usize> {
|
||||
self.names.get(name).map(|i| *i)
|
||||
}
|
||||
|
||||
fn captures_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool, NoError> {
|
||||
caps.strip_crlf(false);
|
||||
let r =
|
||||
self.regex.captures_read_at(caps.locations_mut(), haystack, at);
|
||||
if !r.is_some() {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// If the end of our match includes a `\r`, then strip it from all
|
||||
// capture groups ending at the same location.
|
||||
let end = caps.locations().get(0).unwrap().1;
|
||||
if end > 0 && haystack.get(end - 1) == Some(&b'\r') {
|
||||
caps.strip_crlf(true);
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
// We specifically do not implement other methods like find_iter or
|
||||
// captures_iter. Namely, the iter methods are guaranteed to be correct
|
||||
// by virtue of implementing find_at and captures_at above.
|
||||
}
|
||||
|
||||
/// If the given match ends with a `\r`, then return a new match that ends
|
||||
/// immediately before the `\r`.
|
||||
pub fn adjust_match(haystack: &[u8], m: Match) -> Match {
|
||||
if m.end() > 0 && haystack.get(m.end() - 1) == Some(&b'\r') {
|
||||
m.with_end(m.end() - 1)
|
||||
} else {
|
||||
m
|
||||
}
|
||||
}
|
||||
|
||||
/// Substitutes all occurrences of multi-line enabled `$` with `(?:\r?$)`.
|
||||
///
|
||||
/// This does not preserve the exact semantics of the given expression,
|
||||
/// however, it does have the useful property that anything that matched the
|
||||
/// given expression will also match the returned expression. The difference is
|
||||
/// that the returned expression can match possibly other things as well.
|
||||
///
|
||||
/// The principle reason why we do this is because the underlying regex engine
|
||||
/// doesn't support CRLF aware `$` look-around. It's planned to fix it at that
|
||||
/// level, but we perform this kludge in the mean time.
|
||||
///
|
||||
/// Note that while the match preserving semantics are nice and neat, the
|
||||
/// match position semantics are quite a bit messier. Namely, `$` only ever
|
||||
/// matches the position between characters where as `\r??` can match a
|
||||
/// character and change the offset. This is regretable, but works out pretty
|
||||
/// nicely in most cases, especially when a match is limited to a single line.
|
||||
pub fn crlfify(expr: Hir) -> Hir {
|
||||
match expr.into_kind() {
|
||||
HirKind::Anchor(hir::Anchor::EndLine) => {
|
||||
let concat = Hir::concat(vec![
|
||||
Hir::repetition(hir::Repetition {
|
||||
kind: hir::RepetitionKind::ZeroOrOne,
|
||||
greedy: false,
|
||||
hir: Box::new(Hir::literal(hir::Literal::Unicode('\r'))),
|
||||
}),
|
||||
Hir::anchor(hir::Anchor::EndLine),
|
||||
]);
|
||||
Hir::group(hir::Group {
|
||||
kind: hir::GroupKind::NonCapturing,
|
||||
hir: Box::new(concat),
|
||||
})
|
||||
}
|
||||
HirKind::Empty => Hir::empty(),
|
||||
HirKind::Literal(x) => Hir::literal(x),
|
||||
HirKind::Class(x) => Hir::class(x),
|
||||
HirKind::Anchor(x) => Hir::anchor(x),
|
||||
HirKind::WordBoundary(x) => Hir::word_boundary(x),
|
||||
HirKind::Repetition(mut x) => {
|
||||
x.hir = Box::new(crlfify(*x.hir));
|
||||
Hir::repetition(x)
|
||||
}
|
||||
HirKind::Group(mut x) => {
|
||||
x.hir = Box::new(crlfify(*x.hir));
|
||||
Hir::group(x)
|
||||
}
|
||||
HirKind::Concat(xs) => {
|
||||
Hir::concat(xs.into_iter().map(crlfify).collect())
|
||||
}
|
||||
HirKind::Alternation(xs) => {
|
||||
Hir::alternation(xs.into_iter().map(crlfify).collect())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::crlfify;
|
||||
use regex_syntax::Parser;
|
||||
|
||||
fn roundtrip(pattern: &str) -> String {
|
||||
let expr1 = Parser::new().parse(pattern).unwrap();
|
||||
let expr2 = crlfify(expr1);
|
||||
expr2.to_string()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn various() {
|
||||
assert_eq!(roundtrip(r"(?m)$"), "(?:\r??(?m:$))");
|
||||
assert_eq!(roundtrip(r"(?m)$$"), "(?:\r??(?m:$))(?:\r??(?m:$))");
|
||||
assert_eq!(
|
||||
roundtrip(r"(?m)(?:foo$|bar$)"),
|
||||
"(?:foo(?:\r??(?m:$))|bar(?:\r??(?m:$)))"
|
||||
);
|
||||
assert_eq!(roundtrip(r"(?m)$a"), "(?:\r??(?m:$))a");
|
||||
|
||||
// Not a multiline `$`, so no crlfifying occurs.
|
||||
assert_eq!(roundtrip(r"$"), "\\z");
|
||||
// It's a literal, derp.
|
||||
assert_eq!(roundtrip(r"\$"), "\\$");
|
||||
}
|
||||
}
|
@@ -1,3 +1,8 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
|
||||
use crate::util;
|
||||
|
||||
/// An error that can occur in this crate.
|
||||
///
|
||||
/// Generally, this error corresponds to problems building a regular
|
||||
@@ -13,27 +18,10 @@ impl Error {
|
||||
Error { kind }
|
||||
}
|
||||
|
||||
pub(crate) fn regex(err: regex_automata::meta::BuildError) -> Error {
|
||||
if let Some(size_limit) = err.size_limit() {
|
||||
let kind = ErrorKind::Regex(format!(
|
||||
"compiled regex exceeds size limit of {size_limit}",
|
||||
));
|
||||
Error { kind }
|
||||
} else if let Some(ref err) = err.syntax_error() {
|
||||
Error::generic(err)
|
||||
} else {
|
||||
Error::generic(err)
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn generic<E: std::error::Error>(err: E) -> Error {
|
||||
pub(crate) fn regex<E: error::Error>(err: E) -> Error {
|
||||
Error { kind: ErrorKind::Regex(err.to_string()) }
|
||||
}
|
||||
|
||||
pub(crate) fn any<E: ToString>(msg: E) -> Error {
|
||||
Error { kind: ErrorKind::Regex(msg.to_string()) }
|
||||
}
|
||||
|
||||
/// Return the kind of this error.
|
||||
pub fn kind(&self) -> &ErrorKind {
|
||||
&self.kind
|
||||
@@ -42,7 +30,6 @@ impl Error {
|
||||
|
||||
/// The kind of an error that can occur.
|
||||
#[derive(Clone, Debug)]
|
||||
#[non_exhaustive]
|
||||
pub enum ErrorKind {
|
||||
/// An error that occurred as a result of parsing a regular expression.
|
||||
/// This can be a syntax error or an error that results from attempting to
|
||||
@@ -64,26 +51,38 @@ pub enum ErrorKind {
|
||||
///
|
||||
/// The invalid byte is included in this error.
|
||||
InvalidLineTerminator(u8),
|
||||
/// Hints that destructuring should not be exhaustive.
|
||||
///
|
||||
/// This enum may grow additional variants, so this makes sure clients
|
||||
/// don't count on exhaustive matching. (Otherwise, adding a new variant
|
||||
/// could break existing code.)
|
||||
#[doc(hidden)]
|
||||
__Nonexhaustive,
|
||||
}
|
||||
|
||||
impl std::error::Error for Error {}
|
||||
|
||||
impl std::fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
use bstr::ByteSlice;
|
||||
|
||||
impl error::Error for Error {
|
||||
fn description(&self) -> &str {
|
||||
match self.kind {
|
||||
ErrorKind::Regex(ref s) => write!(f, "{}", s),
|
||||
ErrorKind::NotAllowed(ref lit) => {
|
||||
write!(f, "the literal {:?} is not allowed in a regex", lit)
|
||||
}
|
||||
ErrorKind::InvalidLineTerminator(byte) => {
|
||||
write!(
|
||||
f,
|
||||
"line terminators must be ASCII, but {} is not",
|
||||
[byte].as_bstr()
|
||||
)
|
||||
}
|
||||
ErrorKind::Regex(_) => "regex error",
|
||||
ErrorKind::NotAllowed(_) => "literal not allowed",
|
||||
ErrorKind::InvalidLineTerminator(_) => "invalid line terminator",
|
||||
ErrorKind::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
match self.kind {
|
||||
ErrorKind::Regex(ref s) => write!(f, "{}", s),
|
||||
ErrorKind::NotAllowed(ref lit) => {
|
||||
write!(f, "the literal '{:?}' is not allowed in a regex", lit)
|
||||
}
|
||||
ErrorKind::InvalidLineTerminator(byte) => {
|
||||
let x = util::show_bytes(&[byte]);
|
||||
write!(f, "line terminators must be ASCII, but '{}' is not", x)
|
||||
}
|
||||
ErrorKind::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@@ -8,9 +8,12 @@ pub use crate::matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
||||
|
||||
mod ast;
|
||||
mod config;
|
||||
mod crlf;
|
||||
mod error;
|
||||
mod literal;
|
||||
mod matcher;
|
||||
mod multi;
|
||||
mod non_matching;
|
||||
mod strip;
|
||||
mod util;
|
||||
mod word;
|
||||
|
File diff suppressed because it is too large
Load Diff
@@ -1,22 +1,15 @@
|
||||
use std::sync::Arc;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use {
|
||||
grep_matcher::{
|
||||
ByteSet, Captures, LineMatchKind, LineTerminator, Match, Matcher,
|
||||
NoError,
|
||||
},
|
||||
regex_automata::{
|
||||
meta::Regex, util::captures::Captures as AutomataCaptures, Input,
|
||||
PatternID,
|
||||
},
|
||||
use grep_matcher::{
|
||||
ByteSet, Captures, LineMatchKind, LineTerminator, Match, Matcher, NoError,
|
||||
};
|
||||
use regex::bytes::{CaptureLocations, Regex};
|
||||
|
||||
use crate::{
|
||||
config::{Config, ConfiguredHIR},
|
||||
error::Error,
|
||||
literal::InnerLiterals,
|
||||
word::WordMatcher,
|
||||
};
|
||||
use crate::config::{Config, ConfiguredHIR};
|
||||
use crate::crlf::CRLFMatcher;
|
||||
use crate::error::Error;
|
||||
use crate::multi::MultiLiteralMatcher;
|
||||
use crate::word::WordMatcher;
|
||||
|
||||
/// A builder for constructing a `Matcher` using regular expressions.
|
||||
///
|
||||
@@ -50,37 +43,18 @@ impl RegexMatcherBuilder {
|
||||
/// The syntax supported is documented as part of the regex crate:
|
||||
/// <https://docs.rs/regex/#syntax>.
|
||||
pub fn build(&self, pattern: &str) -> Result<RegexMatcher, Error> {
|
||||
self.build_many(&[pattern])
|
||||
}
|
||||
|
||||
/// Build a new matcher using the current configuration for the provided
|
||||
/// patterns. The resulting matcher behaves as if all of the patterns
|
||||
/// given are joined together into a single alternation. That is, it
|
||||
/// reports matches where at least one of the given patterns matches.
|
||||
pub fn build_many<P: AsRef<str>>(
|
||||
&self,
|
||||
patterns: &[P],
|
||||
) -> Result<RegexMatcher, Error> {
|
||||
let chir = self.config.build_many(patterns)?;
|
||||
let matcher = RegexMatcherImpl::new(chir)?;
|
||||
let (chir, re) = (matcher.chir(), matcher.regex());
|
||||
log::trace!("final regex: {:?}", chir.hir().to_string());
|
||||
|
||||
let chir = self.config.hir(pattern)?;
|
||||
let fast_line_regex = chir.fast_line_regex()?;
|
||||
let non_matching_bytes = chir.non_matching_bytes();
|
||||
// If we can pick out some literals from the regex, then we might be
|
||||
// able to build a faster regex that quickly identifies candidate
|
||||
// matching lines. The regex engine will do what it can on its own, but
|
||||
// we can specifically do a little more when a line terminator is set.
|
||||
// For example, for a regex like `\w+foo\w+`, we can look for `foo`,
|
||||
// and when a match is found, look for the line containing `foo` and
|
||||
// then run the original regex on only that line. (In this case, the
|
||||
// regex engine is likely to handle this case for us since it's so
|
||||
// simple, but the idea applies.)
|
||||
let fast_line_regex = InnerLiterals::new(chir, re).one_regex()?;
|
||||
if let Some(ref re) = fast_line_regex {
|
||||
log::debug!("extracted fast line regex: {:?}", re);
|
||||
}
|
||||
|
||||
// We override the line terminator in case the configured HIR doesn't
|
||||
// support it.
|
||||
let matcher = RegexMatcherImpl::new(&chir)?;
|
||||
log::trace!("final regex: {:?}", matcher.regex());
|
||||
let mut config = self.config.clone();
|
||||
// We override the line terminator in case the configured expr doesn't
|
||||
// support it.
|
||||
config.line_terminator = chir.line_terminator();
|
||||
Ok(RegexMatcher {
|
||||
config,
|
||||
@@ -99,7 +73,39 @@ impl RegexMatcherBuilder {
|
||||
&self,
|
||||
literals: &[B],
|
||||
) -> Result<RegexMatcher, Error> {
|
||||
self.build_many(literals)
|
||||
let mut has_escape = false;
|
||||
let mut slices = vec![];
|
||||
for lit in literals {
|
||||
slices.push(lit.as_ref());
|
||||
has_escape = has_escape || lit.as_ref().contains('\\');
|
||||
}
|
||||
// Even when we have a fixed set of literals, we might still want to
|
||||
// use the regex engine. Specifically, if any string has an escape
|
||||
// in it, then we probably can't feed it to Aho-Corasick without
|
||||
// removing the escape. Additionally, if there are any particular
|
||||
// special match semantics we need to honor, that Aho-Corasick isn't
|
||||
// enough. Finally, the regex engine can do really well with a small
|
||||
// number of literals (at time of writing, this is changing soon), so
|
||||
// we use it when there's a small set.
|
||||
//
|
||||
// Yes, this is one giant hack. Ideally, this entirely separate literal
|
||||
// matcher that uses Aho-Corasick would be pushed down into the regex
|
||||
// engine.
|
||||
if has_escape
|
||||
|| !self.config.can_plain_aho_corasick()
|
||||
|| literals.len() < 40
|
||||
{
|
||||
return self.build(&slices.join("|"));
|
||||
}
|
||||
|
||||
let matcher = MultiLiteralMatcher::new(&slices)?;
|
||||
let imp = RegexMatcherImpl::MultiLiteral(matcher);
|
||||
Ok(RegexMatcher {
|
||||
config: self.config.clone(),
|
||||
matcher: imp,
|
||||
fast_line_regex: None,
|
||||
non_matching_bytes: ByteSet::empty(),
|
||||
})
|
||||
}
|
||||
|
||||
/// Set the value for the case insensitive (`i`) flag.
|
||||
@@ -300,15 +306,20 @@ impl RegexMatcherBuilder {
|
||||
/// 1. It causes the line terminator for the matcher to be `\r\n`. Namely,
|
||||
/// this prevents the matcher from ever producing a match that contains
|
||||
/// a `\r` or `\n`.
|
||||
/// 2. It enables CRLF mode for `^` and `$`. This means that line anchors
|
||||
/// will treat both `\r` and `\n` as line terminators, but will never
|
||||
/// match between a `\r` and `\n`.
|
||||
/// 2. It translates all instances of `$` in the pattern to `(?:\r??$)`.
|
||||
/// This works around the fact that the regex engine does not support
|
||||
/// matching CRLF as a line terminator when using `$`.
|
||||
///
|
||||
/// Note that if you do not wish to set the line terminator but would
|
||||
/// still like `$` to match `\r\n` line terminators, then it is valid to
|
||||
/// call `crlf(true)` followed by `line_terminator(None)`. Ordering is
|
||||
/// important, since `crlf` sets the line terminator, but `line_terminator`
|
||||
/// does not touch the `crlf` setting.
|
||||
/// In particular, because of (2), the matches produced by the matcher may
|
||||
/// be slightly different than what one would expect given the pattern.
|
||||
/// This is the trade off made: in many cases, `$` will "just work" in the
|
||||
/// presence of `\r\n` line terminators, but matches may require some
|
||||
/// trimming to faithfully represent the intended match.
|
||||
///
|
||||
/// Note that if you do not wish to set the line terminator but would still
|
||||
/// like `$` to match `\r\n` line terminators, then it is valid to call
|
||||
/// `crlf(true)` followed by `line_terminator(None)`. Ordering is
|
||||
/// important, since `crlf` and `line_terminator` override each other.
|
||||
pub fn crlf(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
if yes {
|
||||
self.config.line_terminator = Some(LineTerminator::crlf());
|
||||
@@ -334,21 +345,6 @@ impl RegexMatcherBuilder {
|
||||
self.config.word = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether the patterns should be treated as literal strings or not. When
|
||||
/// this is active, all characters, including ones that would normally be
|
||||
/// special regex meta characters, are matched literally.
|
||||
pub fn fixed_strings(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.config.fixed_strings = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether each pattern should match the entire line or not. This is
|
||||
/// equivalent to surrounding the pattern with `(?m:^)` and `(?m:$)`.
|
||||
pub fn whole_line(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
|
||||
self.config.whole_line = yes;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// An implementation of the `Matcher` trait using Rust's standard regex
|
||||
@@ -378,10 +374,10 @@ impl RegexMatcher {
|
||||
/// Create a new matcher from the given pattern using the default
|
||||
/// configuration, but matches lines terminated by `\n`.
|
||||
///
|
||||
/// This is meant to be a convenience constructor for
|
||||
/// using a `RegexMatcherBuilder` and setting its
|
||||
/// [`line_terminator`](RegexMatcherBuilder::method.line_terminator) to
|
||||
/// `\n`. The purpose of using this constructor is to permit special
|
||||
/// This is meant to be a convenience constructor for using a
|
||||
/// `RegexMatcherBuilder` and setting its
|
||||
/// [`line_terminator`](struct.RegexMatcherBuilder.html#method.line_terminator)
|
||||
/// to `\n`. The purpose of using this constructor is to permit special
|
||||
/// optimizations that help speed up line oriented search. These types of
|
||||
/// optimizations are only appropriate when matches span no more than one
|
||||
/// line. For this reason, this constructor will return an error if the
|
||||
@@ -397,6 +393,13 @@ impl RegexMatcher {
|
||||
enum RegexMatcherImpl {
|
||||
/// The standard matcher used for all regular expressions.
|
||||
Standard(StandardMatcher),
|
||||
/// A matcher for an alternation of plain literals.
|
||||
MultiLiteral(MultiLiteralMatcher),
|
||||
/// A matcher that strips `\r` from the end of matches.
|
||||
///
|
||||
/// This is only used when the CRLF hack is enabled and the regex is line
|
||||
/// anchored at the end.
|
||||
CRLF(CRLFMatcher),
|
||||
/// A matcher that only matches at word boundaries. This transforms the
|
||||
/// regex to `(^|\W)(...)($|\W)` instead of the more intuitive `\b(...)\b`.
|
||||
/// Because of this, the WordMatcher provides its own implementation of
|
||||
@@ -408,33 +411,29 @@ enum RegexMatcherImpl {
|
||||
impl RegexMatcherImpl {
|
||||
/// Based on the configuration, create a new implementation of the
|
||||
/// `Matcher` trait.
|
||||
fn new(mut chir: ConfiguredHIR) -> Result<RegexMatcherImpl, Error> {
|
||||
// When whole_line is set, we don't use a word matcher even if word
|
||||
// matching was requested. Why? Because `(?m:^)(pat)(?m:$)` implies
|
||||
// word matching.
|
||||
Ok(if chir.config().word && !chir.config().whole_line {
|
||||
RegexMatcherImpl::Word(WordMatcher::new(chir)?)
|
||||
fn new(expr: &ConfiguredHIR) -> Result<RegexMatcherImpl, Error> {
|
||||
if expr.config().word {
|
||||
Ok(RegexMatcherImpl::Word(WordMatcher::new(expr)?))
|
||||
} else if expr.needs_crlf_stripped() {
|
||||
Ok(RegexMatcherImpl::CRLF(CRLFMatcher::new(expr)?))
|
||||
} else {
|
||||
if chir.config().whole_line {
|
||||
chir = chir.into_whole_line();
|
||||
if let Some(lits) = expr.alternation_literals() {
|
||||
if lits.len() >= 40 {
|
||||
let matcher = MultiLiteralMatcher::new(&lits)?;
|
||||
return Ok(RegexMatcherImpl::MultiLiteral(matcher));
|
||||
}
|
||||
}
|
||||
RegexMatcherImpl::Standard(StandardMatcher::new(chir)?)
|
||||
})
|
||||
}
|
||||
|
||||
/// Return the underlying regex object used.
|
||||
fn regex(&self) -> &Regex {
|
||||
match *self {
|
||||
RegexMatcherImpl::Word(ref x) => x.regex(),
|
||||
RegexMatcherImpl::Standard(ref x) => &x.regex,
|
||||
Ok(RegexMatcherImpl::Standard(StandardMatcher::new(expr)?))
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the underlying HIR of the regex used for searching.
|
||||
fn chir(&self) -> &ConfiguredHIR {
|
||||
/// Return the underlying regex object used.
|
||||
fn regex(&self) -> String {
|
||||
match *self {
|
||||
RegexMatcherImpl::Word(ref x) => x.chir(),
|
||||
RegexMatcherImpl::Standard(ref x) => &x.chir,
|
||||
RegexMatcherImpl::Word(ref x) => x.regex().to_string(),
|
||||
RegexMatcherImpl::CRLF(ref x) => x.regex().to_string(),
|
||||
RegexMatcherImpl::MultiLiteral(_) => "<N/A>".to_string(),
|
||||
RegexMatcherImpl::Standard(ref x) => x.regex.to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -454,6 +453,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.find_at(haystack, at),
|
||||
MultiLiteral(ref m) => m.find_at(haystack, at),
|
||||
CRLF(ref m) => m.find_at(haystack, at),
|
||||
Word(ref m) => m.find_at(haystack, at),
|
||||
}
|
||||
}
|
||||
@@ -462,6 +463,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.new_captures(),
|
||||
MultiLiteral(ref m) => m.new_captures(),
|
||||
CRLF(ref m) => m.new_captures(),
|
||||
Word(ref m) => m.new_captures(),
|
||||
}
|
||||
}
|
||||
@@ -470,6 +473,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.capture_count(),
|
||||
MultiLiteral(ref m) => m.capture_count(),
|
||||
CRLF(ref m) => m.capture_count(),
|
||||
Word(ref m) => m.capture_count(),
|
||||
}
|
||||
}
|
||||
@@ -478,6 +483,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.capture_index(name),
|
||||
MultiLiteral(ref m) => m.capture_index(name),
|
||||
CRLF(ref m) => m.capture_index(name),
|
||||
Word(ref m) => m.capture_index(name),
|
||||
}
|
||||
}
|
||||
@@ -486,6 +493,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.find(haystack),
|
||||
MultiLiteral(ref m) => m.find(haystack),
|
||||
CRLF(ref m) => m.find(haystack),
|
||||
Word(ref m) => m.find(haystack),
|
||||
}
|
||||
}
|
||||
@@ -497,6 +506,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.find_iter(haystack, matched),
|
||||
MultiLiteral(ref m) => m.find_iter(haystack, matched),
|
||||
CRLF(ref m) => m.find_iter(haystack, matched),
|
||||
Word(ref m) => m.find_iter(haystack, matched),
|
||||
}
|
||||
}
|
||||
@@ -512,6 +523,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.try_find_iter(haystack, matched),
|
||||
MultiLiteral(ref m) => m.try_find_iter(haystack, matched),
|
||||
CRLF(ref m) => m.try_find_iter(haystack, matched),
|
||||
Word(ref m) => m.try_find_iter(haystack, matched),
|
||||
}
|
||||
}
|
||||
@@ -524,6 +537,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.captures(haystack, caps),
|
||||
MultiLiteral(ref m) => m.captures(haystack, caps),
|
||||
CRLF(ref m) => m.captures(haystack, caps),
|
||||
Word(ref m) => m.captures(haystack, caps),
|
||||
}
|
||||
}
|
||||
@@ -540,6 +555,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.captures_iter(haystack, caps, matched),
|
||||
MultiLiteral(ref m) => m.captures_iter(haystack, caps, matched),
|
||||
CRLF(ref m) => m.captures_iter(haystack, caps, matched),
|
||||
Word(ref m) => m.captures_iter(haystack, caps, matched),
|
||||
}
|
||||
}
|
||||
@@ -556,6 +573,10 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.try_captures_iter(haystack, caps, matched),
|
||||
MultiLiteral(ref m) => {
|
||||
m.try_captures_iter(haystack, caps, matched)
|
||||
}
|
||||
CRLF(ref m) => m.try_captures_iter(haystack, caps, matched),
|
||||
Word(ref m) => m.try_captures_iter(haystack, caps, matched),
|
||||
}
|
||||
}
|
||||
@@ -569,6 +590,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.captures_at(haystack, at, caps),
|
||||
MultiLiteral(ref m) => m.captures_at(haystack, at, caps),
|
||||
CRLF(ref m) => m.captures_at(haystack, at, caps),
|
||||
Word(ref m) => m.captures_at(haystack, at, caps),
|
||||
}
|
||||
}
|
||||
@@ -585,6 +608,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.replace(haystack, dst, append),
|
||||
MultiLiteral(ref m) => m.replace(haystack, dst, append),
|
||||
CRLF(ref m) => m.replace(haystack, dst, append),
|
||||
Word(ref m) => m.replace(haystack, dst, append),
|
||||
}
|
||||
}
|
||||
@@ -604,6 +629,12 @@ impl Matcher for RegexMatcher {
|
||||
Standard(ref m) => {
|
||||
m.replace_with_captures(haystack, caps, dst, append)
|
||||
}
|
||||
MultiLiteral(ref m) => {
|
||||
m.replace_with_captures(haystack, caps, dst, append)
|
||||
}
|
||||
CRLF(ref m) => {
|
||||
m.replace_with_captures(haystack, caps, dst, append)
|
||||
}
|
||||
Word(ref m) => {
|
||||
m.replace_with_captures(haystack, caps, dst, append)
|
||||
}
|
||||
@@ -614,6 +645,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.is_match(haystack),
|
||||
MultiLiteral(ref m) => m.is_match(haystack),
|
||||
CRLF(ref m) => m.is_match(haystack),
|
||||
Word(ref m) => m.is_match(haystack),
|
||||
}
|
||||
}
|
||||
@@ -626,6 +659,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.is_match_at(haystack, at),
|
||||
MultiLiteral(ref m) => m.is_match_at(haystack, at),
|
||||
CRLF(ref m) => m.is_match_at(haystack, at),
|
||||
Word(ref m) => m.is_match_at(haystack, at),
|
||||
}
|
||||
}
|
||||
@@ -637,6 +672,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.shortest_match(haystack),
|
||||
MultiLiteral(ref m) => m.shortest_match(haystack),
|
||||
CRLF(ref m) => m.shortest_match(haystack),
|
||||
Word(ref m) => m.shortest_match(haystack),
|
||||
}
|
||||
}
|
||||
@@ -649,6 +686,8 @@ impl Matcher for RegexMatcher {
|
||||
use self::RegexMatcherImpl::*;
|
||||
match self.matcher {
|
||||
Standard(ref m) => m.shortest_match_at(haystack, at),
|
||||
MultiLiteral(ref m) => m.shortest_match_at(haystack, at),
|
||||
CRLF(ref m) => m.shortest_match_at(haystack, at),
|
||||
Word(ref m) => m.shortest_match_at(haystack, at),
|
||||
}
|
||||
}
|
||||
@@ -667,10 +706,7 @@ impl Matcher for RegexMatcher {
|
||||
) -> Result<Option<LineMatchKind>, NoError> {
|
||||
Ok(match self.fast_line_regex {
|
||||
Some(ref regex) => {
|
||||
let input = Input::new(haystack);
|
||||
regex
|
||||
.search_half(&input)
|
||||
.map(|hm| LineMatchKind::Candidate(hm.offset()))
|
||||
regex.shortest_match(haystack).map(LineMatchKind::Candidate)
|
||||
}
|
||||
None => {
|
||||
self.shortest_match(haystack)?.map(LineMatchKind::Confirmed)
|
||||
@@ -685,19 +721,20 @@ struct StandardMatcher {
|
||||
/// The regular expression compiled from the pattern provided by the
|
||||
/// caller.
|
||||
regex: Regex,
|
||||
/// The HIR that produced this regex.
|
||||
///
|
||||
/// We put this in an `Arc` because by the time it gets here, it won't
|
||||
/// change. And because cloning and dropping an `Hir` is somewhat expensive
|
||||
/// due to its deep recursive representation.
|
||||
chir: Arc<ConfiguredHIR>,
|
||||
/// A map from capture group name to its corresponding index.
|
||||
names: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl StandardMatcher {
|
||||
fn new(chir: ConfiguredHIR) -> Result<StandardMatcher, Error> {
|
||||
let chir = Arc::new(chir);
|
||||
let regex = chir.to_regex()?;
|
||||
Ok(StandardMatcher { regex, chir })
|
||||
fn new(expr: &ConfiguredHIR) -> Result<StandardMatcher, Error> {
|
||||
let regex = expr.regex()?;
|
||||
let mut names = HashMap::new();
|
||||
for (i, optional_name) in regex.capture_names().enumerate() {
|
||||
if let Some(name) = optional_name {
|
||||
names.insert(name.to_string(), i);
|
||||
}
|
||||
}
|
||||
Ok(StandardMatcher { regex, names })
|
||||
}
|
||||
}
|
||||
|
||||
@@ -710,12 +747,14 @@ impl Matcher for StandardMatcher {
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>, NoError> {
|
||||
let input = Input::new(haystack).span(at..haystack.len());
|
||||
Ok(self.regex.find(input).map(|m| Match::new(m.start(), m.end())))
|
||||
Ok(self
|
||||
.regex
|
||||
.find_at(haystack, at)
|
||||
.map(|m| Match::new(m.start(), m.end())))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
|
||||
Ok(RegexCaptures::new(self.regex.create_captures()))
|
||||
Ok(RegexCaptures::new(self.regex.capture_locations()))
|
||||
}
|
||||
|
||||
fn capture_count(&self) -> usize {
|
||||
@@ -723,7 +762,7 @@ impl Matcher for StandardMatcher {
|
||||
}
|
||||
|
||||
fn capture_index(&self, name: &str) -> Option<usize> {
|
||||
self.regex.group_info().to_index(PatternID::ZERO, name)
|
||||
self.names.get(name).map(|i| *i)
|
||||
}
|
||||
|
||||
fn try_find_iter<F, E>(
|
||||
@@ -750,10 +789,10 @@ impl Matcher for StandardMatcher {
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool, NoError> {
|
||||
let input = Input::new(haystack).span(at..haystack.len());
|
||||
let caps = caps.captures_mut();
|
||||
self.regex.search_captures(&input, caps);
|
||||
Ok(caps.is_match())
|
||||
Ok(self
|
||||
.regex
|
||||
.captures_read_at(&mut caps.locations_mut(), haystack, at)
|
||||
.is_some())
|
||||
}
|
||||
|
||||
fn shortest_match_at(
|
||||
@@ -761,8 +800,7 @@ impl Matcher for StandardMatcher {
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<usize>, NoError> {
|
||||
let input = Input::new(haystack).span(at..haystack.len());
|
||||
Ok(self.regex.search_half(&input).map(|hm| hm.offset()))
|
||||
Ok(self.regex.shortest_match_at(haystack, at))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -781,51 +819,137 @@ impl Matcher for StandardMatcher {
|
||||
/// index of the group using the corresponding matcher's `capture_index`
|
||||
/// method, and then use that index with `RegexCaptures::get`.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexCaptures {
|
||||
/// Where the captures are stored.
|
||||
caps: AutomataCaptures,
|
||||
/// These captures behave as if the capturing groups begin at the given
|
||||
/// offset. When set to `0`, this has no affect and capture groups are
|
||||
/// indexed like normal.
|
||||
///
|
||||
/// This is useful when building matchers that wrap arbitrary regular
|
||||
/// expressions. For example, `WordMatcher` takes an existing regex
|
||||
/// `re` and creates `(?:^|\W)(re)(?:$|\W)`, but hides the fact that
|
||||
/// the regex has been wrapped from the caller. In order to do this,
|
||||
/// the matcher and the capturing groups must behave as if `(re)` is
|
||||
/// the `0`th capture group.
|
||||
offset: usize,
|
||||
pub struct RegexCaptures(RegexCapturesImp);
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
enum RegexCapturesImp {
|
||||
AhoCorasick {
|
||||
/// The start and end of the match, corresponding to capture group 0.
|
||||
mat: Option<Match>,
|
||||
},
|
||||
Regex {
|
||||
/// Where the locations are stored.
|
||||
locs: CaptureLocations,
|
||||
/// These captures behave as if the capturing groups begin at the given
|
||||
/// offset. When set to `0`, this has no affect and capture groups are
|
||||
/// indexed like normal.
|
||||
///
|
||||
/// This is useful when building matchers that wrap arbitrary regular
|
||||
/// expressions. For example, `WordMatcher` takes an existing regex
|
||||
/// `re` and creates `(?:^|\W)(re)(?:$|\W)`, but hides the fact that
|
||||
/// the regex has been wrapped from the caller. In order to do this,
|
||||
/// the matcher and the capturing groups must behave as if `(re)` is
|
||||
/// the `0`th capture group.
|
||||
offset: usize,
|
||||
/// When enable, the end of a match has `\r` stripped from it, if one
|
||||
/// exists.
|
||||
strip_crlf: bool,
|
||||
},
|
||||
}
|
||||
|
||||
impl Captures for RegexCaptures {
|
||||
fn len(&self) -> usize {
|
||||
self.caps
|
||||
.group_info()
|
||||
.all_group_len()
|
||||
.checked_sub(self.offset)
|
||||
.unwrap()
|
||||
match self.0 {
|
||||
RegexCapturesImp::AhoCorasick { .. } => 1,
|
||||
RegexCapturesImp::Regex { ref locs, offset, .. } => {
|
||||
locs.len().checked_sub(offset).unwrap()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn get(&self, i: usize) -> Option<Match> {
|
||||
let actual = i.checked_add(self.offset).unwrap();
|
||||
self.caps.get_group(actual).map(|sp| Match::new(sp.start, sp.end))
|
||||
match self.0 {
|
||||
RegexCapturesImp::AhoCorasick { mat, .. } => {
|
||||
if i == 0 {
|
||||
mat
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
RegexCapturesImp::Regex { ref locs, offset, strip_crlf } => {
|
||||
if !strip_crlf {
|
||||
let actual = i.checked_add(offset).unwrap();
|
||||
return locs.pos(actual).map(|(s, e)| Match::new(s, e));
|
||||
}
|
||||
|
||||
// currently don't support capture offsetting with CRLF
|
||||
// stripping
|
||||
assert_eq!(offset, 0);
|
||||
let m = match locs.pos(i).map(|(s, e)| Match::new(s, e)) {
|
||||
None => return None,
|
||||
Some(m) => m,
|
||||
};
|
||||
// If the end position of this match corresponds to the end
|
||||
// position of the overall match, then we apply our CRLF
|
||||
// stripping. Otherwise, we cannot assume stripping is correct.
|
||||
if i == 0 || m.end() == locs.pos(0).unwrap().1 {
|
||||
Some(m.with_end(m.end() - 1))
|
||||
} else {
|
||||
Some(m)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl RegexCaptures {
|
||||
pub(crate) fn new(caps: AutomataCaptures) -> RegexCaptures {
|
||||
RegexCaptures::with_offset(caps, 0)
|
||||
pub(crate) fn simple() -> RegexCaptures {
|
||||
RegexCaptures(RegexCapturesImp::AhoCorasick { mat: None })
|
||||
}
|
||||
|
||||
pub(crate) fn new(locs: CaptureLocations) -> RegexCaptures {
|
||||
RegexCaptures::with_offset(locs, 0)
|
||||
}
|
||||
|
||||
pub(crate) fn with_offset(
|
||||
caps: AutomataCaptures,
|
||||
locs: CaptureLocations,
|
||||
offset: usize,
|
||||
) -> RegexCaptures {
|
||||
RegexCaptures { caps, offset }
|
||||
RegexCaptures(RegexCapturesImp::Regex {
|
||||
locs,
|
||||
offset,
|
||||
strip_crlf: false,
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn captures_mut(&mut self) -> &mut AutomataCaptures {
|
||||
&mut self.caps
|
||||
pub(crate) fn locations(&self) -> &CaptureLocations {
|
||||
match self.0 {
|
||||
RegexCapturesImp::AhoCorasick { .. } => {
|
||||
panic!("getting locations for simple captures is invalid")
|
||||
}
|
||||
RegexCapturesImp::Regex { ref locs, .. } => locs,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn locations_mut(&mut self) -> &mut CaptureLocations {
|
||||
match self.0 {
|
||||
RegexCapturesImp::AhoCorasick { .. } => {
|
||||
panic!("getting locations for simple captures is invalid")
|
||||
}
|
||||
RegexCapturesImp::Regex { ref mut locs, .. } => locs,
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn strip_crlf(&mut self, yes: bool) {
|
||||
match self.0 {
|
||||
RegexCapturesImp::AhoCorasick { .. } => {
|
||||
panic!("setting strip_crlf for simple captures is invalid")
|
||||
}
|
||||
RegexCapturesImp::Regex { ref mut strip_crlf, .. } => {
|
||||
*strip_crlf = yes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn set_simple(&mut self, one: Option<Match>) {
|
||||
match self.0 {
|
||||
RegexCapturesImp::AhoCorasick { ref mut mat } => {
|
||||
*mat = one;
|
||||
}
|
||||
RegexCapturesImp::Regex { .. } => {
|
||||
panic!("setting simple captures for regex is invalid")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -912,9 +1036,7 @@ mod tests {
|
||||
}
|
||||
|
||||
// Test that finding candidate lines works as expected.
|
||||
// FIXME: Re-enable this test once inner literal extraction works.
|
||||
#[test]
|
||||
#[ignore]
|
||||
fn candidate_lines() {
|
||||
fn is_confirmed(m: LineMatchKind) -> bool {
|
||||
match m {
|
||||
|
@@ -1,6 +1,6 @@
|
||||
use aho_corasick::{AhoCorasick, MatchKind};
|
||||
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
|
||||
use grep_matcher::{Match, Matcher, NoError};
|
||||
use regex_syntax::hir::{Hir, HirKind};
|
||||
use regex_syntax::hir::Hir;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::matcher::RegexCaptures;
|
||||
@@ -23,10 +23,11 @@ impl MultiLiteralMatcher {
|
||||
pub fn new<B: AsRef<[u8]>>(
|
||||
literals: &[B],
|
||||
) -> Result<MultiLiteralMatcher, Error> {
|
||||
let ac = AhoCorasick::builder()
|
||||
let ac = AhoCorasickBuilder::new()
|
||||
.match_kind(MatchKind::LeftmostFirst)
|
||||
.build(literals)
|
||||
.map_err(Error::generic)?;
|
||||
.auto_configure(literals)
|
||||
.build_with_size::<usize, _, _>(literals)
|
||||
.map_err(Error::regex)?;
|
||||
Ok(MultiLiteralMatcher { ac })
|
||||
}
|
||||
}
|
||||
@@ -78,11 +79,13 @@ impl Matcher for MultiLiteralMatcher {
|
||||
/// Alternation literals checks if the given HIR is a simple alternation of
|
||||
/// literals, and if so, returns them. Otherwise, this returns None.
|
||||
pub fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
|
||||
use regex_syntax::hir::{HirKind, Literal};
|
||||
|
||||
// This is pretty hacky, but basically, if `is_alternation_literal` is
|
||||
// true, then we can make several assumptions about the structure of our
|
||||
// HIR. This is what justifies the `unreachable!` statements below.
|
||||
|
||||
if !expr.properties().is_alternation_literal() {
|
||||
if !expr.is_alternation_literal() {
|
||||
return None;
|
||||
}
|
||||
let alts = match *expr.kind() {
|
||||
@@ -90,16 +93,26 @@ pub fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
|
||||
_ => return None, // one literal isn't worth it
|
||||
};
|
||||
|
||||
let extendlit = |lit: &Literal, dst: &mut Vec<u8>| match *lit {
|
||||
Literal::Unicode(c) => {
|
||||
let mut buf = [0; 4];
|
||||
dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
|
||||
}
|
||||
Literal::Byte(b) => {
|
||||
dst.push(b);
|
||||
}
|
||||
};
|
||||
|
||||
let mut lits = vec![];
|
||||
for alt in alts {
|
||||
let mut lit = vec![];
|
||||
match *alt.kind() {
|
||||
HirKind::Empty => {}
|
||||
HirKind::Literal(ref x) => lit.extend_from_slice(&x.0),
|
||||
HirKind::Literal(ref x) => extendlit(x, &mut lit),
|
||||
HirKind::Concat(ref exprs) => {
|
||||
for e in exprs {
|
||||
match *e.kind() {
|
||||
HirKind::Literal(ref x) => lit.extend_from_slice(&x.0),
|
||||
HirKind::Literal(ref x) => extendlit(x, &mut lit),
|
||||
_ => unreachable!("expected literal, got {:?}", e),
|
||||
}
|
||||
}
|
||||
|
@@ -1,13 +1,9 @@
|
||||
use {
|
||||
grep_matcher::ByteSet,
|
||||
regex_syntax::{
|
||||
hir::{self, Hir, HirKind, Look},
|
||||
utf8::Utf8Sequences,
|
||||
},
|
||||
};
|
||||
use grep_matcher::ByteSet;
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
use regex_syntax::utf8::Utf8Sequences;
|
||||
|
||||
/// Return a confirmed set of non-matching bytes from the given expression.
|
||||
pub(crate) fn non_matching_bytes(expr: &Hir) -> ByteSet {
|
||||
pub fn non_matching_bytes(expr: &Hir) -> ByteSet {
|
||||
let mut set = ByteSet::full();
|
||||
remove_matching_bytes(expr, &mut set);
|
||||
set
|
||||
@@ -17,27 +13,18 @@ pub(crate) fn non_matching_bytes(expr: &Hir) -> ByteSet {
|
||||
/// the given expression.
|
||||
fn remove_matching_bytes(expr: &Hir, set: &mut ByteSet) {
|
||||
match *expr.kind() {
|
||||
HirKind::Empty
|
||||
| HirKind::Look(Look::WordAscii | Look::WordAsciiNegate)
|
||||
| HirKind::Look(Look::WordUnicode | Look::WordUnicodeNegate) => {}
|
||||
HirKind::Look(Look::Start | Look::End) => {
|
||||
// FIXME: This is wrong, but not doing this leads to incorrect
|
||||
// results because of how anchored searches are implemented in
|
||||
// the 'grep-searcher' crate.
|
||||
HirKind::Empty | HirKind::WordBoundary(_) => {}
|
||||
HirKind::Anchor(_) => {
|
||||
set.remove(b'\n');
|
||||
}
|
||||
HirKind::Look(Look::StartLF | Look::EndLF) => {
|
||||
set.remove(b'\n');
|
||||
}
|
||||
HirKind::Look(Look::StartCRLF | Look::EndCRLF) => {
|
||||
set.remove(b'\r');
|
||||
set.remove(b'\n');
|
||||
}
|
||||
HirKind::Literal(hir::Literal(ref lit)) => {
|
||||
for &b in lit.iter() {
|
||||
HirKind::Literal(hir::Literal::Unicode(c)) => {
|
||||
for &b in c.encode_utf8(&mut [0; 4]).as_bytes() {
|
||||
set.remove(b);
|
||||
}
|
||||
}
|
||||
HirKind::Literal(hir::Literal::Byte(b)) => {
|
||||
set.remove(b);
|
||||
}
|
||||
HirKind::Class(hir::Class::Unicode(ref cls)) => {
|
||||
for range in cls.iter() {
|
||||
// This is presumably faster than encoding every codepoint
|
||||
@@ -55,10 +42,10 @@ fn remove_matching_bytes(expr: &Hir, set: &mut ByteSet) {
|
||||
}
|
||||
}
|
||||
HirKind::Repetition(ref x) => {
|
||||
remove_matching_bytes(&x.sub, set);
|
||||
remove_matching_bytes(&x.hir, set);
|
||||
}
|
||||
HirKind::Capture(ref x) => {
|
||||
remove_matching_bytes(&x.sub, set);
|
||||
HirKind::Group(ref x) => {
|
||||
remove_matching_bytes(&x.hir, set);
|
||||
}
|
||||
HirKind::Concat(ref xs) => {
|
||||
for x in xs {
|
||||
@@ -75,13 +62,17 @@ fn remove_matching_bytes(expr: &Hir, set: &mut ByteSet) {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use {grep_matcher::ByteSet, regex_syntax::ParserBuilder};
|
||||
use grep_matcher::ByteSet;
|
||||
use regex_syntax::ParserBuilder;
|
||||
|
||||
use super::non_matching_bytes;
|
||||
|
||||
fn extract(pattern: &str) -> ByteSet {
|
||||
let expr =
|
||||
ParserBuilder::new().utf8(false).build().parse(pattern).unwrap();
|
||||
let expr = ParserBuilder::new()
|
||||
.allow_invalid_utf8(true)
|
||||
.build()
|
||||
.parse(pattern)
|
||||
.unwrap();
|
||||
non_matching_bytes(&expr)
|
||||
}
|
||||
|
||||
@@ -140,13 +131,9 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn anchor() {
|
||||
// FIXME: The first four tests below should correspond to a full set
|
||||
// of bytes for the non-matching bytes I think.
|
||||
assert_eq!(sparse(&extract(r"^")), sparse_except(&[b'\n']));
|
||||
assert_eq!(sparse(&extract(r"$")), sparse_except(&[b'\n']));
|
||||
assert_eq!(sparse(&extract(r"\A")), sparse_except(&[b'\n']));
|
||||
assert_eq!(sparse(&extract(r"\z")), sparse_except(&[b'\n']));
|
||||
assert_eq!(sparse(&extract(r"(?m)^")), sparse_except(&[b'\n']));
|
||||
assert_eq!(sparse(&extract(r"(?m)$")), sparse_except(&[b'\n']));
|
||||
}
|
||||
}
|
||||
|
@@ -1,7 +1,5 @@
|
||||
use {
|
||||
grep_matcher::LineTerminator,
|
||||
regex_syntax::hir::{self, Hir, HirKind},
|
||||
};
|
||||
use grep_matcher::LineTerminator;
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
|
||||
use crate::error::{Error, ErrorKind};
|
||||
|
||||
@@ -17,26 +15,7 @@ use crate::error::{Error, ErrorKind};
|
||||
///
|
||||
/// If the given line terminator is not ASCII, then this function returns an
|
||||
/// error.
|
||||
///
|
||||
/// Note that as of regex 1.9, this routine could theoretically be implemented
|
||||
/// without returning an error. Namely, for example, we could turn
|
||||
/// `foo\nbar` into `foo[a&&b]bar`. That is, replace line terminators with a
|
||||
/// sub-expression that can never match anything. Thus, ripgrep would accept
|
||||
/// such regexes and just silently not match anything. Regex versions prior to 1.8
|
||||
/// don't support such constructs. I ended up deciding to leave the existing
|
||||
/// behavior of returning an error instead. For example:
|
||||
///
|
||||
/// ```text
|
||||
/// $ echo -n 'foo\nbar\n' | rg 'foo\nbar'
|
||||
/// the literal '"\n"' is not allowed in a regex
|
||||
///
|
||||
/// Consider enabling multiline mode with the --multiline flag (or -U for short).
|
||||
/// When multiline mode is enabled, new line characters can be matched.
|
||||
/// ```
|
||||
///
|
||||
/// This looks like a good error message to me, and even suggests a flag that
|
||||
/// the user can use instead.
|
||||
pub(crate) fn strip_from_match(
|
||||
pub fn strip_from_match(
|
||||
expr: Hir,
|
||||
line_term: LineTerminator,
|
||||
) -> Result<Hir, Error> {
|
||||
@@ -44,34 +23,40 @@ pub(crate) fn strip_from_match(
|
||||
let expr1 = strip_from_match_ascii(expr, b'\r')?;
|
||||
strip_from_match_ascii(expr1, b'\n')
|
||||
} else {
|
||||
strip_from_match_ascii(expr, line_term.as_byte())
|
||||
let b = line_term.as_byte();
|
||||
if b > 0x7F {
|
||||
return Err(Error::new(ErrorKind::InvalidLineTerminator(b)));
|
||||
}
|
||||
strip_from_match_ascii(expr, b)
|
||||
}
|
||||
}
|
||||
|
||||
/// The implementation of strip_from_match. The given byte must be ASCII.
|
||||
/// This function returns an error otherwise. It also returns an error if
|
||||
/// it couldn't remove `\n` from the given regex without leaving an empty
|
||||
/// character class in its place.
|
||||
/// The implementation of strip_from_match. The given byte must be ASCII. This
|
||||
/// function panics otherwise.
|
||||
fn strip_from_match_ascii(expr: Hir, byte: u8) -> Result<Hir, Error> {
|
||||
if !byte.is_ascii() {
|
||||
return Err(Error::new(ErrorKind::InvalidLineTerminator(byte)));
|
||||
}
|
||||
let ch = char::from(byte);
|
||||
let invalid = || Err(Error::new(ErrorKind::NotAllowed(ch.to_string())));
|
||||
assert!(byte <= 0x7F);
|
||||
let chr = byte as char;
|
||||
assert_eq!(chr.len_utf8(), 1);
|
||||
|
||||
let invalid = || Err(Error::new(ErrorKind::NotAllowed(chr.to_string())));
|
||||
|
||||
Ok(match expr.into_kind() {
|
||||
HirKind::Empty => Hir::empty(),
|
||||
HirKind::Literal(hir::Literal(lit)) => {
|
||||
if lit.iter().find(|&&b| b == byte).is_some() {
|
||||
HirKind::Literal(hir::Literal::Unicode(c)) => {
|
||||
if c == chr {
|
||||
return invalid();
|
||||
}
|
||||
Hir::literal(lit)
|
||||
Hir::literal(hir::Literal::Unicode(c))
|
||||
}
|
||||
HirKind::Literal(hir::Literal::Byte(b)) => {
|
||||
if b as char == chr {
|
||||
return invalid();
|
||||
}
|
||||
Hir::literal(hir::Literal::Byte(b))
|
||||
}
|
||||
HirKind::Class(hir::Class::Unicode(mut cls)) => {
|
||||
if cls.ranges().is_empty() {
|
||||
return Ok(Hir::class(hir::Class::Unicode(cls)));
|
||||
}
|
||||
let remove = hir::ClassUnicode::new(Some(
|
||||
hir::ClassUnicodeRange::new(ch, ch),
|
||||
hir::ClassUnicodeRange::new(chr, chr),
|
||||
));
|
||||
cls.difference(&remove);
|
||||
if cls.ranges().is_empty() {
|
||||
@@ -80,9 +65,6 @@ fn strip_from_match_ascii(expr: Hir, byte: u8) -> Result<Hir, Error> {
|
||||
Hir::class(hir::Class::Unicode(cls))
|
||||
}
|
||||
HirKind::Class(hir::Class::Bytes(mut cls)) => {
|
||||
if cls.ranges().is_empty() {
|
||||
return Ok(Hir::class(hir::Class::Bytes(cls)));
|
||||
}
|
||||
let remove = hir::ClassBytes::new(Some(
|
||||
hir::ClassBytesRange::new(byte, byte),
|
||||
));
|
||||
@@ -92,14 +74,15 @@ fn strip_from_match_ascii(expr: Hir, byte: u8) -> Result<Hir, Error> {
|
||||
}
|
||||
Hir::class(hir::Class::Bytes(cls))
|
||||
}
|
||||
HirKind::Look(x) => Hir::look(x),
|
||||
HirKind::Anchor(x) => Hir::anchor(x),
|
||||
HirKind::WordBoundary(x) => Hir::word_boundary(x),
|
||||
HirKind::Repetition(mut x) => {
|
||||
x.sub = Box::new(strip_from_match_ascii(*x.sub, byte)?);
|
||||
x.hir = Box::new(strip_from_match_ascii(*x.hir, byte)?);
|
||||
Hir::repetition(x)
|
||||
}
|
||||
HirKind::Capture(mut x) => {
|
||||
x.sub = Box::new(strip_from_match_ascii(*x.sub, byte)?);
|
||||
Hir::capture(x)
|
||||
HirKind::Group(mut x) => {
|
||||
x.hir = Box::new(strip_from_match_ascii(*x.hir, byte)?);
|
||||
Hir::group(x)
|
||||
}
|
||||
HirKind::Concat(xs) => {
|
||||
let xs = xs
|
||||
@@ -148,11 +131,11 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn various() {
|
||||
assert_eq!(roundtrip(r"[a\n]", b'\n'), "a");
|
||||
assert_eq!(roundtrip(r"[a\n]", b'a'), "\n");
|
||||
assert_eq!(roundtrip_crlf(r"[a\n]"), "a");
|
||||
assert_eq!(roundtrip_crlf(r"[a\r]"), "a");
|
||||
assert_eq!(roundtrip_crlf(r"[a\r\n]"), "a");
|
||||
assert_eq!(roundtrip(r"[a\n]", b'\n'), "[a]");
|
||||
assert_eq!(roundtrip(r"[a\n]", b'a'), "[\n]");
|
||||
assert_eq!(roundtrip_crlf(r"[a\n]"), "[a]");
|
||||
assert_eq!(roundtrip_crlf(r"[a\r]"), "[a]");
|
||||
assert_eq!(roundtrip_crlf(r"[a\r\n]"), "[a]");
|
||||
|
||||
assert_eq!(roundtrip(r"(?-u)\s", b'a'), r"(?-u:[\x09-\x0D\x20])");
|
||||
assert_eq!(roundtrip(r"(?-u)\s", b'\n'), r"(?-u:[\x09\x0B-\x0D\x20])");
|
||||
|
29
crates/regex/src/util.rs
Normal file
29
crates/regex/src/util.rs
Normal file
@@ -0,0 +1,29 @@
|
||||
/// Converts an arbitrary sequence of bytes to a literal suitable for building
|
||||
/// a regular expression.
|
||||
pub fn bytes_to_regex(bs: &[u8]) -> String {
|
||||
use regex_syntax::is_meta_character;
|
||||
use std::fmt::Write;
|
||||
|
||||
let mut s = String::with_capacity(bs.len());
|
||||
for &b in bs {
|
||||
if b <= 0x7F && !is_meta_character(b as char) {
|
||||
write!(s, r"{}", b as char).unwrap();
|
||||
} else {
|
||||
write!(s, r"\x{:02x}", b).unwrap();
|
||||
}
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
/// Converts arbitrary bytes to a nice string.
|
||||
pub fn show_bytes(bs: &[u8]) -> String {
|
||||
use std::ascii::escape_default;
|
||||
use std::str;
|
||||
|
||||
let mut nice = String::new();
|
||||
for &b in bs {
|
||||
let part: Vec<u8> = escape_default(b).collect();
|
||||
nice.push_str(str::from_utf8(&part).unwrap());
|
||||
}
|
||||
nice
|
||||
}
|
@@ -1,59 +1,39 @@
|
||||
use std::{
|
||||
collections::HashMap,
|
||||
panic::{RefUnwindSafe, UnwindSafe},
|
||||
sync::Arc,
|
||||
};
|
||||
use std::cell::RefCell;
|
||||
use std::collections::HashMap;
|
||||
use std::sync::Arc;
|
||||
|
||||
use {
|
||||
grep_matcher::{Match, Matcher, NoError},
|
||||
regex_automata::{
|
||||
meta::Regex, util::captures::Captures, util::pool::Pool, Input,
|
||||
PatternID,
|
||||
},
|
||||
};
|
||||
use grep_matcher::{Match, Matcher, NoError};
|
||||
use regex::bytes::{CaptureLocations, Regex};
|
||||
use thread_local::ThreadLocal;
|
||||
|
||||
use crate::{config::ConfiguredHIR, error::Error, matcher::RegexCaptures};
|
||||
|
||||
type PoolFn =
|
||||
Box<dyn Fn() -> Captures + Send + Sync + UnwindSafe + RefUnwindSafe>;
|
||||
use crate::config::ConfiguredHIR;
|
||||
use crate::error::Error;
|
||||
use crate::matcher::RegexCaptures;
|
||||
|
||||
/// A matcher for implementing "word match" semantics.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct WordMatcher {
|
||||
pub struct WordMatcher {
|
||||
/// The regex which is roughly `(?:^|\W)(<original pattern>)(?:$|\W)`.
|
||||
regex: Regex,
|
||||
/// The HIR that produced the regex above. We don't keep the HIR for the
|
||||
/// `original` regex.
|
||||
///
|
||||
/// We put this in an `Arc` because by the time it gets here, it won't
|
||||
/// change. And because cloning and dropping an `Hir` is somewhat expensive
|
||||
/// due to its deep recursive representation.
|
||||
chir: Arc<ConfiguredHIR>,
|
||||
/// The original regex supplied by the user, which we use in a fast path
|
||||
/// to try and detect matches before deferring to slower engines.
|
||||
original: Regex,
|
||||
/// A map from capture group name to capture group index.
|
||||
names: HashMap<String, usize>,
|
||||
/// A thread-safe pool of reusable buffers for finding the match offset of
|
||||
/// the inner group.
|
||||
caps: Arc<Pool<Captures, PoolFn>>,
|
||||
/// A reusable buffer for finding the match location of the inner group.
|
||||
locs: Arc<ThreadLocal<RefCell<CaptureLocations>>>,
|
||||
}
|
||||
|
||||
impl Clone for WordMatcher {
|
||||
fn clone(&self) -> WordMatcher {
|
||||
// We implement Clone manually so that we get a fresh Pool such that it
|
||||
// can set its own thread owner. This permits each thread usings `caps`
|
||||
// to hit the fast path.
|
||||
//
|
||||
// Note that cloning a regex is "cheap" since it uses reference
|
||||
// counting internally.
|
||||
let re = self.regex.clone();
|
||||
// We implement Clone manually so that we get a fresh ThreadLocal such
|
||||
// that it can set its own thread owner. This permits each thread
|
||||
// usings `locs` to hit the fast path.
|
||||
WordMatcher {
|
||||
regex: self.regex.clone(),
|
||||
chir: Arc::clone(&self.chir),
|
||||
original: self.original.clone(),
|
||||
names: self.names.clone(),
|
||||
caps: Arc::new(Pool::new(Box::new(move || re.create_captures()))),
|
||||
locs: Arc::new(ThreadLocal::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -64,38 +44,31 @@ impl WordMatcher {
|
||||
///
|
||||
/// The given options are used to construct the regular expression
|
||||
/// internally.
|
||||
pub(crate) fn new(chir: ConfiguredHIR) -> Result<WordMatcher, Error> {
|
||||
let original = chir.clone().into_anchored().to_regex()?;
|
||||
let chir = Arc::new(chir.into_word()?);
|
||||
let regex = chir.to_regex()?;
|
||||
let caps = Arc::new(Pool::new({
|
||||
let regex = regex.clone();
|
||||
Box::new(move || regex.create_captures()) as PoolFn
|
||||
}));
|
||||
pub fn new(expr: &ConfiguredHIR) -> Result<WordMatcher, Error> {
|
||||
let original =
|
||||
expr.with_pattern(|pat| format!("^(?:{})$", pat))?.regex()?;
|
||||
let word_expr = expr.with_pattern(|pat| {
|
||||
let pat = format!(r"(?:(?m:^)|\W)({})(?:\W|(?m:$))", pat);
|
||||
log::debug!("word regex: {:?}", pat);
|
||||
pat
|
||||
})?;
|
||||
let regex = word_expr.regex()?;
|
||||
let locs = Arc::new(ThreadLocal::new());
|
||||
|
||||
let mut names = HashMap::new();
|
||||
let it = regex.group_info().pattern_names(PatternID::ZERO);
|
||||
for (i, optional_name) in it.enumerate() {
|
||||
for (i, optional_name) in regex.capture_names().enumerate() {
|
||||
if let Some(name) = optional_name {
|
||||
names.insert(name.to_string(), i.checked_sub(1).unwrap());
|
||||
}
|
||||
}
|
||||
Ok(WordMatcher { regex, chir, original, names, caps })
|
||||
Ok(WordMatcher { regex, original, names, locs })
|
||||
}
|
||||
|
||||
/// Return the underlying regex used to match at word boundaries.
|
||||
///
|
||||
/// The original regex is in the capture group at index 1.
|
||||
pub(crate) fn regex(&self) -> &Regex {
|
||||
/// Return the underlying regex used by this matcher.
|
||||
pub fn regex(&self) -> &Regex {
|
||||
&self.regex
|
||||
}
|
||||
|
||||
/// Return the underlying HIR for the regex used to match at word
|
||||
/// boundaries.
|
||||
pub(crate) fn chir(&self) -> &ConfiguredHIR {
|
||||
&self.chir
|
||||
}
|
||||
|
||||
/// Attempt to do a fast confirmation of a word match that covers a subset
|
||||
/// (but hopefully a big subset) of most cases. Ok(Some(..)) is returned
|
||||
/// when a match is found. Ok(None) is returned when there is definitively
|
||||
@@ -106,11 +79,12 @@ impl WordMatcher {
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>, ()> {
|
||||
// This is a bit hairy. The whole point here is to avoid running a
|
||||
// slower regex engine to extract capture groups. Remember, our word
|
||||
// regex looks like this:
|
||||
// This is a bit hairy. The whole point here is to avoid running an
|
||||
// NFA simulation in the regex engine. Remember, our word regex looks
|
||||
// like this:
|
||||
//
|
||||
// (^|\W)(<original regex>)(\W|$)
|
||||
// (^|\W)(<original regex>)($|\W)
|
||||
// where ^ and $ have multiline mode DISABLED
|
||||
//
|
||||
// What we want are the match offsets of <original regex>. So in the
|
||||
// easy/common case, the original regex will be sandwiched between
|
||||
@@ -128,8 +102,7 @@ impl WordMatcher {
|
||||
// The reason why we cannot handle the ^/$ cases here is because we
|
||||
// can't assume anything about the original pattern. (Try commenting
|
||||
// out the checks for ^/$ below and run the tests to see examples.)
|
||||
let input = Input::new(haystack).span(at..haystack.len());
|
||||
let mut cand = match self.regex.find(input) {
|
||||
let mut cand = match self.regex.find_at(haystack, at) {
|
||||
None => return Ok(None),
|
||||
Some(m) => Match::new(m.start(), m.end()),
|
||||
};
|
||||
@@ -172,23 +145,23 @@ impl Matcher for WordMatcher {
|
||||
//
|
||||
// OK, well, it turns out that it is worth it! But it is quite tricky.
|
||||
// See `fast_find` for details. Effectively, this lets us skip running
|
||||
// a slower regex engine to extract capture groups in the vast majority
|
||||
// of cases. However, the slower engine is I believe required for full
|
||||
// correctness.
|
||||
// the NFA simulation in the regex engine in the vast majority of
|
||||
// cases. However, the NFA simulation is required for full correctness.
|
||||
match self.fast_find(haystack, at) {
|
||||
Ok(Some(m)) => return Ok(Some(m)),
|
||||
Ok(None) => return Ok(None),
|
||||
Err(()) => {}
|
||||
}
|
||||
|
||||
let input = Input::new(haystack).span(at..haystack.len());
|
||||
let mut caps = self.caps.get();
|
||||
self.regex.search_captures(&input, &mut caps);
|
||||
Ok(caps.get_group(1).map(|sp| Match::new(sp.start, sp.end)))
|
||||
let cell =
|
||||
self.locs.get_or(|| RefCell::new(self.regex.capture_locations()));
|
||||
let mut caps = cell.borrow_mut();
|
||||
self.regex.captures_read_at(&mut caps, haystack, at);
|
||||
Ok(caps.get(1).map(|m| Match::new(m.0, m.1)))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
|
||||
Ok(RegexCaptures::with_offset(self.regex.create_captures(), 1))
|
||||
Ok(RegexCaptures::with_offset(self.regex.capture_locations(), 1))
|
||||
}
|
||||
|
||||
fn capture_count(&self) -> usize {
|
||||
@@ -205,10 +178,9 @@ impl Matcher for WordMatcher {
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool, NoError> {
|
||||
let input = Input::new(haystack).span(at..haystack.len());
|
||||
let caps = caps.captures_mut();
|
||||
self.regex.search_captures(&input, caps);
|
||||
Ok(caps.is_match())
|
||||
let r =
|
||||
self.regex.captures_read_at(caps.locations_mut(), haystack, at);
|
||||
Ok(r.is_some())
|
||||
}
|
||||
|
||||
// We specifically do not implement other methods like find_iter or
|
||||
@@ -223,8 +195,8 @@ mod tests {
|
||||
use grep_matcher::{Captures, Match, Matcher};
|
||||
|
||||
fn matcher(pattern: &str) -> WordMatcher {
|
||||
let chir = Config::default().build_many(&[pattern]).unwrap();
|
||||
WordMatcher::new(chir).unwrap()
|
||||
let chir = Config::default().hir(pattern).unwrap();
|
||||
WordMatcher::new(&chir).unwrap()
|
||||
}
|
||||
|
||||
fn find(pattern: &str, haystack: &str) -> Option<(usize, usize)> {
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-searcher"
|
||||
version = "0.1.11" #:version
|
||||
version = "0.1.10" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Fast line oriented regex searching as a library.
|
||||
@@ -14,16 +14,16 @@ license = "Unlicense OR MIT"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
bstr = { version = "1.6.0", default-features = false, features = ["std"] }
|
||||
bstr = { version = "0.2.0", default-features = false, features = ["std"] }
|
||||
bytecount = "0.6"
|
||||
encoding_rs = "0.8.14"
|
||||
encoding_rs_io = "0.1.6"
|
||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||
grep-matcher = { version = "0.1.5", path = "../matcher" }
|
||||
log = "0.4.5"
|
||||
memmap = { package = "memmap2", version = "0.5.3" }
|
||||
|
||||
[dev-dependencies]
|
||||
grep-regex = { version = "0.1.11", path = "../regex" }
|
||||
grep-regex = { version = "0.1.10", path = "../regex" }
|
||||
regex = "1.1"
|
||||
|
||||
[features]
|
||||
|
@@ -481,7 +481,7 @@ impl LineBuffer {
|
||||
}
|
||||
|
||||
let roll_len = self.end - self.pos;
|
||||
self.buf.copy_within(self.pos..self.end, 0);
|
||||
self.buf.copy_within_str(self.pos..self.end, 0);
|
||||
self.pos = 0;
|
||||
self.last_lineterm = roll_len;
|
||||
self.end = roll_len;
|
||||
|
@@ -10,12 +10,6 @@ use crate::sink::{
|
||||
};
|
||||
use grep_matcher::{LineMatchKind, Matcher};
|
||||
|
||||
enum FastMatchResult {
|
||||
Continue,
|
||||
Stop,
|
||||
SwitchToSlow,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Core<'s, M: 's, S> {
|
||||
config: &'s Config,
|
||||
@@ -31,7 +25,6 @@ pub struct Core<'s, M: 's, S> {
|
||||
last_line_visited: usize,
|
||||
after_context_left: usize,
|
||||
has_sunk: bool,
|
||||
has_matched: bool,
|
||||
}
|
||||
|
||||
impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
@@ -57,7 +50,6 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
last_line_visited: 0,
|
||||
after_context_left: 0,
|
||||
has_sunk: false,
|
||||
has_matched: false,
|
||||
};
|
||||
if !core.searcher.multi_line_with_matcher(&core.matcher) {
|
||||
if core.is_line_by_line_fast() {
|
||||
@@ -117,11 +109,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
|
||||
pub fn match_by_line(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
||||
if self.is_line_by_line_fast() {
|
||||
match self.match_by_line_fast(buf)? {
|
||||
FastMatchResult::SwitchToSlow => self.match_by_line_slow(buf),
|
||||
FastMatchResult::Continue => Ok(true),
|
||||
FastMatchResult::Stop => Ok(false),
|
||||
}
|
||||
self.match_by_line_fast(buf)
|
||||
} else {
|
||||
self.match_by_line_slow(buf)
|
||||
}
|
||||
@@ -282,9 +270,7 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
}
|
||||
};
|
||||
self.set_pos(line.end());
|
||||
let success = matched != self.config.invert_match;
|
||||
if success {
|
||||
self.has_matched = true;
|
||||
if matched != self.config.invert_match {
|
||||
if !self.before_context_by_line(buf, line.start())? {
|
||||
return Ok(false);
|
||||
}
|
||||
@@ -300,51 +286,40 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
if self.config.stop_on_nonmatch && !success && self.has_matched {
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
fn match_by_line_fast(
|
||||
&mut self,
|
||||
buf: &[u8],
|
||||
) -> Result<FastMatchResult, S::Error> {
|
||||
use FastMatchResult::*;
|
||||
|
||||
fn match_by_line_fast(&mut self, buf: &[u8]) -> Result<bool, S::Error> {
|
||||
debug_assert!(!self.config.passthru);
|
||||
|
||||
while !buf[self.pos()..].is_empty() {
|
||||
if self.config.stop_on_nonmatch && self.has_matched {
|
||||
return Ok(SwitchToSlow);
|
||||
}
|
||||
if self.config.invert_match {
|
||||
if !self.match_by_line_fast_invert(buf)? {
|
||||
return Ok(Stop);
|
||||
return Ok(false);
|
||||
}
|
||||
} else if let Some(line) = self.find_by_line_fast(buf)? {
|
||||
self.has_matched = true;
|
||||
if self.config.max_context() > 0 {
|
||||
if !self.after_context_by_line(buf, line.start())? {
|
||||
return Ok(Stop);
|
||||
return Ok(false);
|
||||
}
|
||||
if !self.before_context_by_line(buf, line.start())? {
|
||||
return Ok(Stop);
|
||||
return Ok(false);
|
||||
}
|
||||
}
|
||||
self.set_pos(line.end());
|
||||
if !self.sink_matched(buf, &line)? {
|
||||
return Ok(Stop);
|
||||
return Ok(false);
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if !self.after_context_by_line(buf, buf.len())? {
|
||||
return Ok(Stop);
|
||||
return Ok(false);
|
||||
}
|
||||
self.set_pos(buf.len());
|
||||
Ok(Continue)
|
||||
Ok(true)
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
@@ -369,7 +344,6 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
if invert_match.is_empty() {
|
||||
return Ok(true);
|
||||
}
|
||||
self.has_matched = true;
|
||||
if !self.after_context_by_line(buf, invert_match.start())? {
|
||||
return Ok(false);
|
||||
}
|
||||
@@ -603,9 +577,6 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
if self.config.passthru {
|
||||
return false;
|
||||
}
|
||||
if self.config.stop_on_nonmatch && self.has_matched {
|
||||
return false;
|
||||
}
|
||||
if let Some(line_term) = self.matcher.line_terminator() {
|
||||
if line_term == self.config.line_term {
|
||||
return true;
|
||||
|
@@ -71,6 +71,16 @@ impl MmapChoice {
|
||||
if !self.is_enabled() {
|
||||
return None;
|
||||
}
|
||||
if !cfg!(target_pointer_width = "64") {
|
||||
// For 32-bit systems, it looks like mmap will succeed even if it
|
||||
// can't address the entire file. This seems to happen at least on
|
||||
// Windows, even though it uses to work prior to ripgrep 13. The
|
||||
// only Windows-related change in ripgrep 13, AFAIK, was statically
|
||||
// linking vcruntime. So maybe that's related? But I'm not sure.
|
||||
//
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1911
|
||||
return None;
|
||||
}
|
||||
if cfg!(target_os = "macos") {
|
||||
// I guess memory maps on macOS aren't great. Should re-evaluate.
|
||||
return None;
|
||||
|
@@ -173,9 +173,6 @@ pub struct Config {
|
||||
encoding: Option<Encoding>,
|
||||
/// Whether to do automatic transcoding based on a BOM or not.
|
||||
bom_sniffing: bool,
|
||||
/// Whether to stop searching when a non-matching line is found after a
|
||||
/// matching line.
|
||||
stop_on_nonmatch: bool,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
@@ -193,7 +190,6 @@ impl Default for Config {
|
||||
multi_line: false,
|
||||
encoding: None,
|
||||
bom_sniffing: true,
|
||||
stop_on_nonmatch: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -559,19 +555,6 @@ impl SearcherBuilder {
|
||||
self.config.bom_sniffing = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Stop searching a file when a non-matching line is found after a
|
||||
/// matching line.
|
||||
///
|
||||
/// This is useful for searching sorted files where it is expected that all
|
||||
/// the matches will be on adjacent lines.
|
||||
pub fn stop_on_nonmatch(
|
||||
&mut self,
|
||||
stop_on_nonmatch: bool,
|
||||
) -> &mut SearcherBuilder {
|
||||
self.config.stop_on_nonmatch = stop_on_nonmatch;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// A searcher executes searches over a haystack and writes results to a caller
|
||||
@@ -855,13 +838,6 @@ impl Searcher {
|
||||
self.config.multi_line
|
||||
}
|
||||
|
||||
/// Returns true if and only if this searcher is configured to stop when in
|
||||
/// finds a non-matching line after a matching one.
|
||||
#[inline]
|
||||
pub fn stop_on_nonmatch(&self) -> bool {
|
||||
self.config.stop_on_nonmatch
|
||||
}
|
||||
|
||||
/// Returns true if and only if this searcher will choose a multi-line
|
||||
/// strategy given the provided matcher.
|
||||
///
|
||||
|
@@ -232,16 +232,6 @@ would behave identically to the following command
|
||||
|
||||
rg --glob '!.git' foo
|
||||
|
||||
The bottom line is that every shell argument needs to be on its own line. So
|
||||
for example, a config file containing
|
||||
|
||||
-j 4
|
||||
|
||||
is probably not doing what you intend. Instead, you want
|
||||
|
||||
-j
|
||||
4
|
||||
|
||||
ripgrep also provides a flag, *--no-config*, that when present will suppress
|
||||
any and all support for configuration. This includes any future support
|
||||
for auto-loading configuration files from pre-determined paths.
|
||||
|
@@ -1,28 +0,0 @@
|
||||
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
|
||||
<!--
|
||||
This is a Windows application manifest file.
|
||||
See: https://docs.microsoft.com/en-us/windows/win32/sbscs/application-manifests
|
||||
-->
|
||||
<assembly xmlns="urn:schemas-microsoft-com:asm.v1" manifestVersion="1.0" xmlns:asmv3="urn:schemas-microsoft-com:asm.v3">
|
||||
<!-- Versions rustc supports as compiler hosts -->
|
||||
<compatibility xmlns="urn:schemas-microsoft-com:compatibility.v1">
|
||||
<application>
|
||||
<!-- Windows 7 --><supportedOS Id="{35138b9a-5d96-4fbd-8e2d-a2440225f93a}"/>
|
||||
<!-- Windows 8 --><supportedOS Id="{4a2f28e3-53b9-4441-ba9c-d69d4a4a6e38}"/>
|
||||
<!-- Windows 8.1 --><supportedOS Id="{1f676c76-80e1-4239-95bb-83d0f6d0da78}"/>
|
||||
<!-- Windows 10 and 11 --><supportedOS Id="{8e0f7a12-bfb3-4fe8-b9a5-48fd50a15a9a}"/>
|
||||
</application>
|
||||
</compatibility>
|
||||
<!-- Use UTF-8 code page -->
|
||||
<asmv3:application>
|
||||
<asmv3:windowsSettings xmlns="http://schemas.microsoft.com/SMI/2019/WindowsSettings">
|
||||
<activeCodePage>UTF-8</activeCodePage>
|
||||
</asmv3:windowsSettings>
|
||||
</asmv3:application>
|
||||
<!-- Remove (most) legacy path limits -->
|
||||
<asmv3:application>
|
||||
<asmv3:windowsSettings xmlns:ws2="http://schemas.microsoft.com/SMI/2016/WindowsSettings">
|
||||
<ws2:longPathAware>true</ws2:longPathAware>
|
||||
</asmv3:windowsSettings>
|
||||
</asmv3:application>
|
||||
</assembly>
|
@@ -1,15 +0,0 @@
|
||||
This directory contains a Windows manifest for various Windows-specific
|
||||
settings.
|
||||
|
||||
The main thing we enable here is [`longPathAware`], which permits paths of the
|
||||
form `C:\` to be longer than 260 characters.
|
||||
|
||||
The approach taken here was modeled off of a [similar change for `rustc`][rustc pr].
|
||||
In particular, this manifest gets linked into the final binary. Those linker
|
||||
arguments are applied in `build.rs`.
|
||||
|
||||
This currently only applies to MSVC builds. If there's an easy way to make this
|
||||
apply to GNU builds as well, then patches are welcome.
|
||||
|
||||
[`longPathAware`]: https://learn.microsoft.com/en-us/windows/win32/sbscs/application-manifests#longpathaware
|
||||
[rustc pr]: https://github.com/rust-lang/rust/pull/96737
|
@@ -787,28 +787,6 @@ rgtest!(f1466_no_ignore_files, |dir: Dir, mut cmd: TestCommand| {
|
||||
eqnice!("foo\n", cmd.arg("-u").stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/pull/2361
|
||||
rgtest!(f2361_sort_nested_files, |dir: Dir, mut cmd: TestCommand| {
|
||||
use std::{thread::sleep, time::Duration};
|
||||
|
||||
dir.create("foo", "1");
|
||||
sleep(Duration::from_millis(100));
|
||||
dir.create_dir("dir");
|
||||
sleep(Duration::from_millis(100));
|
||||
dir.create(dir.path().join("dir").join("bar"), "1");
|
||||
|
||||
cmd.arg("--sort").arg("accessed").arg("--files");
|
||||
eqnice!("foo\ndir/bar\n", cmd.stdout());
|
||||
|
||||
dir.create("foo", "2");
|
||||
sleep(Duration::from_millis(100));
|
||||
dir.create(dir.path().join("dir").join("bar"), "2");
|
||||
sleep(Duration::from_millis(100));
|
||||
|
||||
cmd.arg("--sort").arg("accessed").arg("--files");
|
||||
eqnice!("foo\ndir/bar\n", cmd.stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1404
|
||||
rgtest!(f1404_nothing_searched_warning, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create(".ignore", "ignored-dir/**");
|
||||
@@ -943,23 +921,6 @@ rgtest!(f1842_field_match_separator, |dir: Dir, _: TestCommand| {
|
||||
eqnice!(expected, dir.command().args(&args).stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/2288
|
||||
rgtest!(f2288_context_partial_override, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("test", "1\n2\n3\n4\n5\n6\n7\n8\n9\n");
|
||||
cmd.args(&["-C1", "-A2", "5", "test"]);
|
||||
eqnice!("4\n5\n6\n7\n", cmd.stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/2288
|
||||
rgtest!(
|
||||
f2288_context_partial_override_rev,
|
||||
|dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("test", "1\n2\n3\n4\n5\n6\n7\n8\n9\n");
|
||||
cmd.args(&["-A2", "-C1", "5", "test"]);
|
||||
eqnice!("4\n5\n6\n7\n", cmd.stdout());
|
||||
}
|
||||
);
|
||||
|
||||
rgtest!(no_context_sep, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("test", "foo\nctx\nbar\nctx\nfoo\nctx");
|
||||
cmd.args(&["-A1", "--no-context-separator", "foo", "test"]);
|
||||
@@ -1014,10 +975,3 @@ rgtest!(no_unicode, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("test", "δ");
|
||||
cmd.arg("-i").arg("--no-unicode").arg("Δ").assert_err();
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1790
|
||||
rgtest!(stop_on_nonmatch, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("test", "line1\nline2\nline3\nline4\nline5");
|
||||
cmd.args(&["--stop-on-nonmatch", "[235]"]);
|
||||
eqnice!("test:line2\ntest:line3\n", cmd.stdout());
|
||||
});
|
||||
|
@@ -1065,48 +1065,3 @@ rgtest!(type_list, |_: Dir, mut cmd: TestCommand| {
|
||||
// This can change over time, so just make sure we print something.
|
||||
assert!(!cmd.stdout().is_empty());
|
||||
});
|
||||
|
||||
// The following series of tests seeks to test all permutations of ripgrep's
|
||||
// sorted queries.
|
||||
//
|
||||
// They all rely on this setup function, which sets up this particular file
|
||||
// structure with a particular creation order:
|
||||
// ├── a # 1
|
||||
// ├── b # 4
|
||||
// └── dir # 2
|
||||
// ├── c # 3
|
||||
// └── d # 5
|
||||
//
|
||||
// This order is important when sorting them by system time-stamps.
|
||||
fn sort_setup(dir: Dir) {
|
||||
use std::{thread::sleep, time::Duration};
|
||||
|
||||
let sub_dir = dir.path().join("dir");
|
||||
dir.create("a", "test");
|
||||
sleep(Duration::from_millis(100));
|
||||
dir.create_dir(&sub_dir);
|
||||
sleep(Duration::from_millis(100));
|
||||
dir.create(sub_dir.join("c"), "test");
|
||||
sleep(Duration::from_millis(100));
|
||||
dir.create("b", "test");
|
||||
sleep(Duration::from_millis(100));
|
||||
dir.create(sub_dir.join("d"), "test");
|
||||
}
|
||||
|
||||
rgtest!(sort_files, |dir: Dir, mut cmd: TestCommand| {
|
||||
sort_setup(dir);
|
||||
let expected = "a:test\nb:test\ndir/c:test\ndir/d:test\n";
|
||||
eqnice!(expected, cmd.args(["--sort", "path", "test"]).stdout());
|
||||
});
|
||||
|
||||
rgtest!(sort_accessed, |dir: Dir, mut cmd: TestCommand| {
|
||||
sort_setup(dir);
|
||||
let expected = "a:test\ndir/c:test\nb:test\ndir/d:test\n";
|
||||
eqnice!(expected, cmd.args(["--sort", "accessed", "test"]).stdout());
|
||||
});
|
||||
|
||||
rgtest!(sortr_accessed, |dir: Dir, mut cmd: TestCommand| {
|
||||
sort_setup(dir);
|
||||
let expected = "dir/d:test\nb:test\ndir/c:test\na:test\n";
|
||||
eqnice!(expected, cmd.args(["--sortr", "accessed", "test"]).stdout());
|
||||
});
|
||||
|
@@ -1090,19 +1090,6 @@ b=one
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/2198
|
||||
rgtest!(r2198, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create(".ignore", "a");
|
||||
dir.create(".rgignore", "b");
|
||||
dir.create("a", "");
|
||||
dir.create("b", "");
|
||||
dir.create("c", "");
|
||||
|
||||
cmd.arg("--files").arg("--sort").arg("path");
|
||||
eqnice!("c\n", cmd.stdout());
|
||||
eqnice!("a\nb\nc\n", cmd.arg("--no-ignore-dot").stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/2208
|
||||
rgtest!(r2208, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("test", "# Compile requirements.txt files from all found or specified requirements.in files (compile).
|
||||
@@ -1139,37 +1126,3 @@ rgtest!(r2236, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("foo/bar", "test\n");
|
||||
cmd.args(&["test"]).assert_err();
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/2480
|
||||
rgtest!(r2480, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("file", "FooBar\n");
|
||||
|
||||
// no regression in empty pattern behavior
|
||||
cmd.args(&["-e", "", "file"]);
|
||||
eqnice!("FooBar\n", cmd.stdout());
|
||||
|
||||
// no regression in single pattern behavior
|
||||
let mut cmd = dir.command();
|
||||
cmd.args(&["-e", ")(", "file"]);
|
||||
eqnice!("FooBar\n", cmd.stdout());
|
||||
|
||||
// no regression in multiple patterns behavior
|
||||
let mut cmd = dir.command();
|
||||
cmd.args(&["--only-matching", "-e", "Foo", "-e", "Bar", "file"]);
|
||||
eqnice!("Foo\nBar\n", cmd.stdout());
|
||||
|
||||
// no regression in capture groups behavior
|
||||
let mut cmd = dir.command();
|
||||
cmd.args(&["-e", "Fo(oB)a(r)", "--replace", "${0}_${1}_${2}${3}", "file"]);
|
||||
eqnice!("FooBar_oB_r\n", cmd.stdout()); // note: ${3} expected to be empty
|
||||
|
||||
// flag does not leak into next pattern on match
|
||||
let mut cmd = dir.command();
|
||||
cmd.args(&["--only-matching", "-e", "(?i)foo", "-e", "bar", "file"]);
|
||||
eqnice!("Foo\n", cmd.stdout());
|
||||
|
||||
// flag does not leak into next pattern on mismatch
|
||||
let mut cmd = dir.command();
|
||||
cmd.args(&["--only-matching", "-e", "(?i)notfoo", "-e", "bar", "file"]);
|
||||
cmd.assert_err();
|
||||
});
|
||||
|
Reference in New Issue
Block a user