mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-27 18:21:57 -07:00
Compare commits
1 Commits
grep-cli-0
...
ag/fix-cve
Author | SHA1 | Date | |
---|---|---|---|
|
aecc0ea126 |
@@ -1,8 +0,0 @@
|
||||
# On Windows MSVC, statically link the C runtime so that the resulting EXE does
|
||||
# not depend on the vcruntime DLL.
|
||||
#
|
||||
# See: https://github.com/BurntSushi/ripgrep/pull/1613
|
||||
[target.x86_64-pc-windows-msvc]
|
||||
rustflags = ["-C", "target-feature=+crt-static"]
|
||||
[target.i686-pc-windows-msvc]
|
||||
rustflags = ["-C", "target-feature=+crt-static"]
|
3
.github/ISSUE_TEMPLATE/feature_request.md
vendored
3
.github/ISSUE_TEMPLATE/feature_request.md
vendored
@@ -15,6 +15,3 @@ examples of how ripgrep would be used if your feature request were added.
|
||||
If you're not sure what to write here, then try imagining what the ideal
|
||||
documentation of your new feature would look like in ripgrep's man page. Then
|
||||
try to write it.
|
||||
|
||||
If you're requesting the addition or change of default file types, please open
|
||||
a PR. We can discuss it there if necessary.
|
||||
|
27
.github/workflows/ci.yml
vendored
27
.github/workflows/ci.yml
vendored
@@ -43,7 +43,7 @@ jobs:
|
||||
include:
|
||||
- build: pinned
|
||||
os: ubuntu-18.04
|
||||
rust: 1.52.1
|
||||
rust: 1.41.0
|
||||
- build: stable
|
||||
os: ubuntu-18.04
|
||||
rust: stable
|
||||
@@ -118,10 +118,10 @@ jobs:
|
||||
echo "target flag is: ${{ env.TARGET_FLAGS }}"
|
||||
|
||||
- name: Build ripgrep and all crates
|
||||
run: ${{ env.CARGO }} build --verbose --workspace ${{ env.TARGET_FLAGS }}
|
||||
run: ${{ env.CARGO }} build --verbose --all ${{ env.TARGET_FLAGS }}
|
||||
|
||||
- name: Build ripgrep with PCRE2
|
||||
run: ${{ env.CARGO }} build --verbose --workspace --features pcre2 ${{ env.TARGET_FLAGS }}
|
||||
run: ${{ env.CARGO }} build --verbose --all --features pcre2 ${{ env.TARGET_FLAGS }}
|
||||
|
||||
# This is useful for debugging problems when the expected build artifacts
|
||||
# (like shell completions and man pages) aren't generated.
|
||||
@@ -139,7 +139,7 @@ jobs:
|
||||
|
||||
- name: Run tests with PCRE2 (sans cross)
|
||||
if: matrix.target == ''
|
||||
run: ${{ env.CARGO }} test --verbose --workspace --features pcre2 ${{ env.TARGET_FLAGS }}
|
||||
run: ${{ env.CARGO }} test --verbose --all --features pcre2 ${{ env.TARGET_FLAGS }}
|
||||
|
||||
- name: Run tests without PCRE2 (with cross)
|
||||
# These tests should actually work, but they almost double the runtime.
|
||||
@@ -147,7 +147,7 @@ jobs:
|
||||
# enabled, every integration test is run twice: one with the default
|
||||
# regex engine and once with PCRE2.
|
||||
if: matrix.target != ''
|
||||
run: ${{ env.CARGO }} test --verbose --workspace ${{ env.TARGET_FLAGS }}
|
||||
run: ${{ env.CARGO }} test --verbose --all ${{ env.TARGET_FLAGS }}
|
||||
|
||||
- name: Test for existence of build artifacts (Windows)
|
||||
if: matrix.os == 'windows-2019'
|
||||
@@ -194,20 +194,3 @@ jobs:
|
||||
- name: Check formatting
|
||||
run: |
|
||||
cargo fmt --all -- --check
|
||||
|
||||
docs:
|
||||
name: Docs
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v2
|
||||
- name: Install Rust
|
||||
uses: actions-rs/toolchain@v1
|
||||
with:
|
||||
toolchain: stable
|
||||
profile: minimal
|
||||
override: true
|
||||
- name: Check documentation
|
||||
env:
|
||||
RUSTDOCFLAGS: -D warnings
|
||||
run: cargo doc --no-deps --document-private-items --workspace
|
||||
|
67
.github/workflows/release.yml
vendored
67
.github/workflows/release.yml
vendored
@@ -1,26 +1,23 @@
|
||||
# The way this works is the following:
|
||||
# The way this works is a little weird. But basically, the create-release job
|
||||
# runs purely to initialize the GitHub release itself. Once done, the upload
|
||||
# URL of the release is saved as an artifact.
|
||||
#
|
||||
# The create-release job runs purely to initialize the GitHub release itself
|
||||
# and to output upload_url for the following job.
|
||||
#
|
||||
# The build-release job runs only once create-release is finished. It gets the
|
||||
# release upload URL from create-release job outputs, then builds the release
|
||||
# executables for each supported platform and attaches them as release assets
|
||||
# to the previously created release.
|
||||
# The build-release job runs only once create-release is finished. It gets
|
||||
# the release upload URL by downloading the corresponding artifact (which was
|
||||
# uploaded by create-release). It then builds the release executables for each
|
||||
# supported platform and attaches them as release assets to the previously
|
||||
# created release.
|
||||
#
|
||||
# The key here is that we create the release only once.
|
||||
#
|
||||
# Reference:
|
||||
# https://eugene-babichenko.github.io/blog/2020/05/09/github-actions-cross-platform-auto-releases/
|
||||
|
||||
name: release
|
||||
on:
|
||||
push:
|
||||
# Enable when testing release infrastructure on a branch.
|
||||
# branches:
|
||||
# - ag/work
|
||||
# - ag/release
|
||||
tags:
|
||||
- "[0-9]+.[0-9]+.[0-9]+"
|
||||
- '[0-9]+.[0-9]+.[0-9]+'
|
||||
jobs:
|
||||
create-release:
|
||||
name: create-release
|
||||
@@ -28,12 +25,11 @@ jobs:
|
||||
# env:
|
||||
# Set to force version number, e.g., when no tag exists.
|
||||
# RG_VERSION: TEST-0.0.0
|
||||
outputs:
|
||||
upload_url: ${{ steps.release.outputs.upload_url }}
|
||||
rg_version: ${{ env.RG_VERSION }}
|
||||
steps:
|
||||
- name: Create artifacts directory
|
||||
run: mkdir artifacts
|
||||
|
||||
- name: Get the release version from the tag
|
||||
shell: bash
|
||||
if: env.RG_VERSION == ''
|
||||
run: |
|
||||
# Apparently, this is the right way to get a tag name. Really?
|
||||
@@ -41,6 +37,7 @@ jobs:
|
||||
# See: https://github.community/t5/GitHub-Actions/How-to-get-just-the-tag-name/m-p/32167/highlight/true#M1027
|
||||
echo "RG_VERSION=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
|
||||
echo "version is: ${{ env.RG_VERSION }}"
|
||||
|
||||
- name: Create GitHub release
|
||||
id: release
|
||||
uses: actions/create-release@v1
|
||||
@@ -50,6 +47,18 @@ jobs:
|
||||
tag_name: ${{ env.RG_VERSION }}
|
||||
release_name: ${{ env.RG_VERSION }}
|
||||
|
||||
- name: Save release upload URL to artifact
|
||||
run: echo "${{ steps.release.outputs.upload_url }}" > artifacts/release-upload-url
|
||||
|
||||
- name: Save version number to artifact
|
||||
run: echo "${{ env.RG_VERSION }}" > artifacts/release-version
|
||||
|
||||
- name: Upload artifacts
|
||||
uses: actions/upload-artifact@v1
|
||||
with:
|
||||
name: artifacts
|
||||
path: artifacts
|
||||
|
||||
build-release:
|
||||
name: build-release
|
||||
needs: ['create-release']
|
||||
@@ -59,7 +68,7 @@ jobs:
|
||||
# systems.
|
||||
CARGO: cargo
|
||||
# When CARGO is set to CROSS, this is set to `--target matrix.target`.
|
||||
TARGET_FLAGS: ""
|
||||
TARGET_FLAGS:
|
||||
# When CARGO is set to CROSS, TARGET_DIR includes matrix.target.
|
||||
TARGET_DIR: ./target
|
||||
# Emit backtraces on panics.
|
||||
@@ -120,7 +129,7 @@ jobs:
|
||||
target: ${{ matrix.target }}
|
||||
|
||||
- name: Use Cross
|
||||
shell: bash
|
||||
# if: matrix.os != 'windows-2019'
|
||||
run: |
|
||||
cargo install cross
|
||||
echo "CARGO=cross" >> $GITHUB_ENV
|
||||
@@ -133,6 +142,22 @@ jobs:
|
||||
echo "target flag is: ${{ env.TARGET_FLAGS }}"
|
||||
echo "target dir is: ${{ env.TARGET_DIR }}"
|
||||
|
||||
- name: Get release download URL
|
||||
uses: actions/download-artifact@v1
|
||||
with:
|
||||
name: artifacts
|
||||
path: artifacts
|
||||
|
||||
- name: Set release upload URL and release version
|
||||
shell: bash
|
||||
run: |
|
||||
release_upload_url="$(cat artifacts/release-upload-url)"
|
||||
echo "RELEASE_UPLOAD_URL=$release_upload_url" >> $GITHUB_ENV
|
||||
echo "release upload url: $RELEASE_UPLOAD_URL"
|
||||
release_version="$(cat artifacts/release-version)"
|
||||
echo "RELEASE_VERSION=$release_version" >> $GITHUB_ENV
|
||||
echo "release version: $RELEASE_VERSION"
|
||||
|
||||
- name: Build release binary
|
||||
run: ${{ env.CARGO }} build --verbose --release --features pcre2 ${{ env.TARGET_FLAGS }}
|
||||
|
||||
@@ -153,7 +178,7 @@ jobs:
|
||||
shell: bash
|
||||
run: |
|
||||
outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")"
|
||||
staging="ripgrep-${{ needs.create-release.outputs.rg_version }}-${{ matrix.target }}"
|
||||
staging="ripgrep-${{ env.RELEASE_VERSION }}-${{ matrix.target }}"
|
||||
mkdir -p "$staging"/{complete,doc}
|
||||
|
||||
cp {README.md,COPYING,UNLICENSE,LICENSE-MIT} "$staging/"
|
||||
@@ -178,7 +203,7 @@ jobs:
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
upload_url: ${{ needs.create-release.outputs.upload_url }}
|
||||
upload_url: ${{ env.RELEASE_UPLOAD_URL }}
|
||||
asset_path: ${{ env.ASSET }}
|
||||
asset_name: ${{ env.ASSET }}
|
||||
asset_content_type: application/octet-stream
|
||||
|
4
.gitignore
vendored
4
.gitignore
vendored
@@ -15,7 +15,3 @@ parts
|
||||
*.snap
|
||||
*.pyc
|
||||
ripgrep*_source.tar.bz2
|
||||
|
||||
# Cargo timings
|
||||
cargo-timing-*.html
|
||||
cargo-timing.html
|
||||
|
110
CHANGELOG.md
110
CHANGELOG.md
@@ -1,26 +1,6 @@
|
||||
TBD
|
||||
===
|
||||
ripgrep 13 is a new major version release of ripgrep that primarily contains
|
||||
bug fixes. There is also a fix for a security vulnerability on Windows
|
||||
([CVE-2021-3013](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-3013)),
|
||||
some performance improvements and some minor breaking changes.
|
||||
|
||||
A new short flag, `-.`, has been added. It is an alias for the `--hidden` flag,
|
||||
which instructs ripgrep to search hidden files and directories.
|
||||
|
||||
ripgrep is now using a new
|
||||
[vectorized implementation of `memmem`](https://github.com/BurntSushi/memchr/pull/82),
|
||||
which accelerates many common searches. If you notice any performance
|
||||
regressions (or major improvements), I'd love to hear about them through an
|
||||
issue report!
|
||||
|
||||
Also, for Windows users targeting MSVC, Cargo will now build fully static
|
||||
executables of ripgrep. The release binaries for ripgrep 13 have been compiled
|
||||
using this configuration.
|
||||
|
||||
**BREAKING CHANGES**:
|
||||
|
||||
**Binary detection output has changed slightly.**
|
||||
Unreleased changes. Release notes have not yet been written.
|
||||
|
||||
In this release, a small tweak has been made to the output format when a binary
|
||||
file is detected. Previously, it looked like this:
|
||||
@@ -35,100 +15,12 @@ Now it looks like this:
|
||||
FOO: binary file matches (found "\0" byte around offset XXX)
|
||||
```
|
||||
|
||||
**vimgrep output in multi-line now only prints the first line for each match.**
|
||||
|
||||
See [issue 1866](https://github.com/BurntSushi/ripgrep/issues/1866) for more
|
||||
discussion on this. Previously, every line in a match was duplicated, even
|
||||
when it spanned multiple lines. There are no changes to vimgrep output when
|
||||
multi-line mode is disabled.
|
||||
|
||||
**In multi-line mode, --count is now equivalent to --count-matches.**
|
||||
|
||||
This appears to match how `pcre2grep` implements `--count`. Previously, ripgrep
|
||||
would produce outright incorrect counts. Another alternative would be to simply
|
||||
count the number of lines---even if it's more than the number of matches---but
|
||||
that seems highly unintuitive.
|
||||
|
||||
**FULL LIST OF FIXES AND IMPROVEMENTS:**
|
||||
|
||||
Security fixes:
|
||||
|
||||
* [CVE-2021-3013](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-3013):
|
||||
Fixes a security hole on Windows where running ripgrep with either the
|
||||
`-z/--search-zip` or `--pre` flags can result in running arbitrary
|
||||
executables from the current directory.
|
||||
* [VULN #1773](https://github.com/BurntSushi/ripgrep/issues/1773):
|
||||
This is the public facing issue tracking CVE-2021-3013. ripgrep's README
|
||||
now contains a section describing how to report a vulnerability.
|
||||
|
||||
Performance improvements:
|
||||
|
||||
* [PERF #1657](https://github.com/BurntSushi/ripgrep/discussions/1657):
|
||||
Check if a file should be ignored first before issuing stat calls.
|
||||
* [PERF memchr#82](https://github.com/BurntSushi/memchr/pull/82):
|
||||
ripgrep now uses a new vectorized implementation of `memmem`.
|
||||
|
||||
Feature enhancements:
|
||||
|
||||
* Added or improved file type filtering for ASP, Bazel, dvc, FlatBuffers,
|
||||
Futhark, minified files, Mint, pofiles (from GNU gettext) Racket, Red, Ruby,
|
||||
VCL, Yang.
|
||||
* [FEATURE #1404](https://github.com/BurntSushi/ripgrep/pull/1404):
|
||||
ripgrep now prints a warning if nothing is searched.
|
||||
* [FEATURE #1613](https://github.com/BurntSushi/ripgrep/pull/1613):
|
||||
Cargo will now produce static executables on Windows when using MSVC.
|
||||
* [FEATURE #1680](https://github.com/BurntSushi/ripgrep/pull/1680):
|
||||
Add `-.` as a short flag alias for `--hidden`.
|
||||
* [FEATURE #1842](https://github.com/BurntSushi/ripgrep/issues/1842):
|
||||
Add `--field-{context,match}-separator` for customizing field delimiters.
|
||||
* [FEATURE #1856](https://github.com/BurntSushi/ripgrep/pull/1856):
|
||||
The README now links to a
|
||||
[Spanish translation](https://github.com/UltiRequiem/traducciones/tree/master/ripgrep).
|
||||
|
||||
Bug fixes:
|
||||
|
||||
* [BUG #1277](https://github.com/BurntSushi/ripgrep/issues/1277):
|
||||
Document cygwin path translation behavior in the FAQ.
|
||||
* [BUG #1739](https://github.com/BurntSushi/ripgrep/issues/1739):
|
||||
Fix bug where replacements were buggy if the regex matched a line terminator.
|
||||
* [BUG #1311](https://github.com/BurntSushi/ripgrep/issues/1311):
|
||||
Fix multi-line bug where a search & replace for `\n` didn't work as expected.
|
||||
* [BUG #1401](https://github.com/BurntSushi/ripgrep/issues/1401):
|
||||
Fix buggy interaction between PCRE2 look-around and `-o/--only-matching`.
|
||||
* [BUG #1412](https://github.com/BurntSushi/ripgrep/issues/1412):
|
||||
Fix multi-line bug with searches using look-around past matching lines.
|
||||
* [BUG #1577](https://github.com/BurntSushi/ripgrep/issues/1577):
|
||||
Fish shell completions will continue to be auto-generated.
|
||||
* [BUG #1642](https://github.com/BurntSushi/ripgrep/issues/1642):
|
||||
Fixes a bug where using `-m` and `-A` printed more matches than the limit.
|
||||
* [BUG #1703](https://github.com/BurntSushi/ripgrep/issues/1703):
|
||||
Clarify the function of `-u/--unrestricted`.
|
||||
* [BUG #1708](https://github.com/BurntSushi/ripgrep/issues/1708):
|
||||
Clarify how `-S/--smart-case` works.
|
||||
* [BUG #1730](https://github.com/BurntSushi/ripgrep/issues/1730):
|
||||
Clarify that CLI invocation must always be valid, regardless of config file.
|
||||
* [BUG #1741](https://github.com/BurntSushi/ripgrep/issues/1741):
|
||||
Fix stdin detection when using PowerShell in UNIX environments.
|
||||
* [BUG #1756](https://github.com/BurntSushi/ripgrep/pull/1756):
|
||||
Fix bug where `foo/**` would match `foo`, but it shouldn't.
|
||||
* [BUG #1765](https://github.com/BurntSushi/ripgrep/issues/1765):
|
||||
Fix panic when `--crlf` is used in some cases.
|
||||
* [BUG #1638](https://github.com/BurntSushi/ripgrep/issues/1638):
|
||||
Correctly sniff UTF-8 and do transcoding, like we do for UTF-16.
|
||||
* [BUG #1816](https://github.com/BurntSushi/ripgrep/issues/1816):
|
||||
Add documentation for glob alternate syntax, e.g., `{a,b,..}`.
|
||||
* [BUG #1847](https://github.com/BurntSushi/ripgrep/issues/1847):
|
||||
Clarify how the `--hidden` flag works.
|
||||
* [BUG #1866](https://github.com/BurntSushi/ripgrep/issues/1866#issuecomment-841635553):
|
||||
Fix bug when computing column numbers in `--vimgrep` mode.
|
||||
* [BUG #1868](https://github.com/BurntSushi/ripgrep/issues/1868):
|
||||
Fix bug where `--passthru` and `-A/-B/-C` did not override each other.
|
||||
* [BUG #1869](https://github.com/BurntSushi/ripgrep/pull/1869):
|
||||
Clarify docs for `--files-with-matches` and `--files-without-match`.
|
||||
* [BUG #1878](https://github.com/BurntSushi/ripgrep/issues/1878):
|
||||
Fix bug where `\A` could produce unanchored matches in multiline search.
|
||||
* [BUG 94e4b8e3](https://github.com/BurntSushi/ripgrep/commit/94e4b8e3):
|
||||
Fix column numbers with `--vimgrep` is used with `-U/--multiline`.
|
||||
|
||||
|
||||
12.1.1 (2020-05-29)
|
||||
|
165
Cargo.lock
generated
165
Cargo.lock
generated
@@ -1,12 +1,10 @@
|
||||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 3
|
||||
|
||||
[[package]]
|
||||
name = "aho-corasick"
|
||||
version = "0.7.18"
|
||||
version = "0.7.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
|
||||
checksum = "b476ce7103678b0c6d3d395dbbae31d48ff910bd28be979ba5d48c6351131d0d"
|
||||
dependencies = [
|
||||
"memchr",
|
||||
]
|
||||
@@ -22,6 +20,12 @@ dependencies = [
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "autocfg"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
|
||||
|
||||
[[package]]
|
||||
name = "base64"
|
||||
version = "0.13.0"
|
||||
@@ -36,9 +40,9 @@ checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
|
||||
|
||||
[[package]]
|
||||
name = "bstr"
|
||||
version = "0.2.16"
|
||||
version = "0.2.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "90682c8d613ad3373e66de8c6411e0ae2ab2571e879d2efbf73558cc66f21279"
|
||||
checksum = "473fc6b38233f9af7baa94fb5852dca389e3d95b8e21c8e3719301462c5d9faf"
|
||||
dependencies = [
|
||||
"lazy_static",
|
||||
"memchr",
|
||||
@@ -47,15 +51,21 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "bytecount"
|
||||
version = "0.6.2"
|
||||
version = "0.6.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "72feb31ffc86498dacdbd0fcebb56138e7177a8cc5cea4516031d15ae85a742e"
|
||||
checksum = "b0017894339f586ccb943b01b9555de56770c11cda818e7e3d8bd93f4ed7f46e"
|
||||
|
||||
[[package]]
|
||||
name = "byteorder"
|
||||
version = "1.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.68"
|
||||
version = "1.0.61"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4a72c244c1ff497a746a7e1fb3d14bd08420ecda70c8f25c7112f2781652d787"
|
||||
checksum = "ed67cbde08356238e75fc4656be4749481eeffb09e19f320a25237d5221c985d"
|
||||
dependencies = [
|
||||
"jobserver",
|
||||
]
|
||||
@@ -85,10 +95,16 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.1"
|
||||
name = "const_fn"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
|
||||
checksum = "ce90df4c658c62f12d78f7508cf92f9173e5184a539c10bfe54a3107b3ffd0f2"
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-channel"
|
||||
version = "0.5.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dca26ee1f8d361640700bde38b2c37d8c22b3ce2d360e1fc1c74ea4b0aa7d775"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"crossbeam-utils",
|
||||
@@ -96,19 +112,21 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.5"
|
||||
version = "0.8.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
|
||||
checksum = "ec91540d98355f690a86367e566ecad2e9e579f230230eb7c21398372be73ea5"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"cfg-if 1.0.0",
|
||||
"const_fn",
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "encoding_rs"
|
||||
version = "0.8.28"
|
||||
version = "0.8.26"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "80df024fbc5ac80f87dfef0d9f5209a252f2a497f7f42944cff24d8253cac065"
|
||||
checksum = "801bbab217d7f79c0062f4f7205b5d4427c6d1a7bd7aafdd1475f7c59d62b283"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"packed_simd_2",
|
||||
@@ -143,7 +161,7 @@ checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
|
||||
|
||||
[[package]]
|
||||
name = "globset"
|
||||
version = "0.4.7"
|
||||
version = "0.4.6"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"bstr",
|
||||
@@ -172,7 +190,7 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "grep-cli"
|
||||
version = "0.1.6"
|
||||
version = "0.1.5"
|
||||
dependencies = [
|
||||
"atty",
|
||||
"bstr",
|
||||
@@ -211,6 +229,7 @@ dependencies = [
|
||||
"grep-regex",
|
||||
"grep-searcher",
|
||||
"serde",
|
||||
"serde_derive",
|
||||
"serde_json",
|
||||
"termcolor",
|
||||
]
|
||||
@@ -239,22 +258,22 @@ dependencies = [
|
||||
"grep-matcher",
|
||||
"grep-regex",
|
||||
"log",
|
||||
"memmap2",
|
||||
"memmap",
|
||||
"regex",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "hermit-abi"
|
||||
version = "0.1.18"
|
||||
version = "0.1.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c"
|
||||
checksum = "5aca5565f760fb5b220e499d72710ed156fdb74e631659e99377d9ebfbd13ae8"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ignore"
|
||||
version = "0.4.18"
|
||||
version = "0.4.17"
|
||||
dependencies = [
|
||||
"crossbeam-channel",
|
||||
"crossbeam-utils",
|
||||
@@ -271,9 +290,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "itoa"
|
||||
version = "0.4.7"
|
||||
version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
|
||||
checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6"
|
||||
|
||||
[[package]]
|
||||
name = "jemalloc-sys"
|
||||
@@ -298,9 +317,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "jobserver"
|
||||
version = "0.1.22"
|
||||
version = "0.1.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "972f5ae5d1cb9c6ae417789196c803205313edde988685da5e3aae0827b9e7fd"
|
||||
checksum = "5c71313ebb9439f74b00d9d2dcec36440beaf57a6aa0623068441dd7cd81a7f2"
|
||||
dependencies = [
|
||||
"libc",
|
||||
]
|
||||
@@ -313,9 +332,9 @@ checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.97"
|
||||
version = "0.2.80"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "12b8adadd720df158f4d70dfe7ccc6adb0472d7c55ca83445f6a5ab3e36f8fb6"
|
||||
checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614"
|
||||
|
||||
[[package]]
|
||||
name = "libm"
|
||||
@@ -325,26 +344,27 @@ checksum = "7fc7aa29613bd6a620df431842069224d8bc9011086b1db4c0e0cd47fa03ec9a"
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.14"
|
||||
version = "0.4.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
|
||||
checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
|
||||
dependencies = [
|
||||
"cfg-if 1.0.0",
|
||||
"cfg-if 0.1.10",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.4.0"
|
||||
version = "2.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
|
||||
checksum = "0ee1c47aaa256ecabcaea351eae4a9b01ef39ed810004e298d2511ed284b1525"
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
version = "0.3.0"
|
||||
name = "memmap"
|
||||
version = "0.7.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "20ff203f7bdc401350b1dbaa0355135777d25f41c0bbc601851bbd6cf61e8ff5"
|
||||
checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"winapi",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -357,17 +377,11 @@ dependencies = [
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "once_cell"
|
||||
version = "1.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3"
|
||||
|
||||
[[package]]
|
||||
name = "packed_simd_2"
|
||||
version = "0.3.5"
|
||||
version = "0.3.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0e64858a2d3733fdd61adfdd6da89aa202f7ff0e741d2fc7ed1e452ba9dc99d7"
|
||||
checksum = "3278e0492f961fd4ae70909f56b2723a7e8d01a228427294e19cdfdebda89a17"
|
||||
dependencies = [
|
||||
"cfg-if 0.1.10",
|
||||
"libm",
|
||||
@@ -404,44 +418,48 @@ checksum = "3831453b3449ceb48b6d9c7ad7c96d5ea673e9b470a1dc578c2ce6521230884c"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.27"
|
||||
version = "1.0.24"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038"
|
||||
checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71"
|
||||
dependencies = [
|
||||
"unicode-xid",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.9"
|
||||
version = "1.0.7"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
|
||||
checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.5.4"
|
||||
version = "1.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
|
||||
checksum = "38cf2c13ed4745de91a5eb834e11c00bcc3709e773173b2ce4c56c9fbde04b9c"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
"regex-syntax",
|
||||
"thread_local",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.1.10"
|
||||
version = "0.1.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132"
|
||||
checksum = "ae1ded71d66a4a97f5e961fd0cb25a5f366a42a41570d16a763a69c092c26ae4"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.6.25"
|
||||
version = "0.6.21"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
|
||||
checksum = "3b181ba2dcf07aaccad5448e8ead58db5b742cf85dfe035e2227f137a539a189"
|
||||
|
||||
[[package]]
|
||||
name = "ripgrep"
|
||||
@@ -480,18 +498,15 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "1.0.126"
|
||||
version = "1.0.117"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03"
|
||||
dependencies = [
|
||||
"serde_derive",
|
||||
]
|
||||
checksum = "b88fa983de7720629c9387e9f517353ed404164b1e482c970a90c1a4aaf7dc1a"
|
||||
|
||||
[[package]]
|
||||
name = "serde_derive"
|
||||
version = "1.0.126"
|
||||
version = "1.0.117"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43"
|
||||
checksum = "cbd1ae72adb44aab48f325a02444a5fc079349a8d804c1fc922aed3f7454c74e"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -500,9 +515,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.64"
|
||||
version = "1.0.59"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79"
|
||||
checksum = "dcac07dbffa1c65e7f816ab9eba78eb142c6d44410f4eeba1e26e4f5dfa56b95"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
@@ -517,9 +532,9 @@ checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.73"
|
||||
version = "1.0.48"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f71489ff30030d2ae598524f61326b902466f72a0fb1a8564c001cc63425bcc7"
|
||||
checksum = "cc371affeffc477f42a221a1e4297aedcea33d47d19b61455588bd9d8f6b19ac"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -528,9 +543,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "termcolor"
|
||||
version = "1.1.2"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4"
|
||||
checksum = "bb6bfa289a4d7c5766392812c0a1f4c1ba45afa1ad47803c11e1f407d846d75f"
|
||||
dependencies = [
|
||||
"winapi-util",
|
||||
]
|
||||
@@ -546,11 +561,11 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "thread_local"
|
||||
version = "1.1.3"
|
||||
version = "1.0.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd"
|
||||
checksum = "d40c6d1b69745a6ec6fb1ca717914848da4b44ae29d9b3080cbee91d72a69b14"
|
||||
dependencies = [
|
||||
"once_cell",
|
||||
"lazy_static",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -561,15 +576,15 @@ checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.2.2"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
|
||||
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
|
||||
|
||||
[[package]]
|
||||
name = "walkdir"
|
||||
version = "2.3.2"
|
||||
version = "2.3.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
|
||||
checksum = "777182bc735b6424e1a57516d35ed72cb8019d85c8c9bf536dccb3445c1a2f7d"
|
||||
dependencies = [
|
||||
"same-file",
|
||||
"winapi",
|
||||
|
@@ -3,13 +3,14 @@ name = "ripgrep"
|
||||
version = "12.1.1" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
ripgrep is a line-oriented search tool that recursively searches the current
|
||||
directory for a regex pattern while respecting gitignore rules. ripgrep has
|
||||
first class support on Windows, macOS and Linux.
|
||||
ripgrep is a line-oriented search tool that recursively searches your current
|
||||
directory for a regex pattern while respecting your gitignore rules. ripgrep
|
||||
has first class support on Windows, macOS and Linux.
|
||||
"""
|
||||
documentation = "https://github.com/BurntSushi/ripgrep"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
categories = ["command-line-utilities", "text-processing"]
|
||||
license = "Unlicense OR MIT"
|
||||
@@ -43,7 +44,7 @@ members = [
|
||||
[dependencies]
|
||||
bstr = "0.2.12"
|
||||
grep = { version = "0.2.7", path = "crates/grep" }
|
||||
ignore = { version = "0.4.18", path = "crates/ignore" }
|
||||
ignore = { version = "0.4.16", path = "crates/ignore" }
|
||||
lazy_static = "1.1.0"
|
||||
log = "0.4.5"
|
||||
num_cpus = "1.8.0"
|
||||
|
10
GUIDE.md
10
GUIDE.md
@@ -177,19 +177,15 @@ After recursive search, ripgrep's most important feature is what it *doesn't*
|
||||
search. By default, when you search a directory, ripgrep will ignore all of
|
||||
the following:
|
||||
|
||||
1. Files and directories that match glob patterns in these three categories:
|
||||
1. gitignore globs (including global and repo-specific globs).
|
||||
2. `.ignore` globs, which take precedence over all gitignore globs when
|
||||
there's a conflict.
|
||||
3. `.rgignore` globs, which take precedence over all `.ignore` globs when
|
||||
there's a conflict.
|
||||
1. Files and directories that match the rules in your `.gitignore` glob
|
||||
pattern.
|
||||
2. Hidden files and directories.
|
||||
3. Binary files. (ripgrep considers any file with a `NUL` byte to be binary.)
|
||||
4. Symbolic links aren't followed.
|
||||
|
||||
All of these things can be toggled using various flags provided by ripgrep:
|
||||
|
||||
1. You can disable all ignore-related filtering with the `--no-ignore` flag.
|
||||
1. You can disable `.gitignore` handling with the `--no-ignore` flag.
|
||||
2. Hidden files and directories can be searched with the `--hidden` flag.
|
||||
3. Binary files can be searched via the `--text` (`-a` for short) flag.
|
||||
Be careful with this flag! Binary files may emit control characters to your
|
||||
|
25
README.md
25
README.md
@@ -1,7 +1,7 @@
|
||||
ripgrep (rg)
|
||||
------------
|
||||
ripgrep is a line-oriented search tool that recursively searches the current
|
||||
directory for a regex pattern. By default, ripgrep will respect gitignore rules
|
||||
ripgrep is a line-oriented search tool that recursively searches your current
|
||||
directory for a regex pattern. By default, ripgrep will respect your .gitignore
|
||||
and automatically skip hidden files/directories and binary files. ripgrep
|
||||
has first class support on Windows, macOS and Linux, with binary downloads
|
||||
available for [every release](https://github.com/BurntSushi/ripgrep/releases).
|
||||
@@ -192,9 +192,15 @@ multiline search and opt-in fancy regex support via PCRE2.
|
||||
The binary name for ripgrep is `rg`.
|
||||
|
||||
**[Archives of precompiled binaries for ripgrep are available for Windows,
|
||||
macOS and Linux.](https://github.com/BurntSushi/ripgrep/releases)** Linux and
|
||||
Windows binaries are static executables. Users of platforms not explicitly
|
||||
mentioned below are advised to download one of these archives.
|
||||
macOS and Linux.](https://github.com/BurntSushi/ripgrep/releases)** Users of
|
||||
platforms not explicitly mentioned below are advised to download one of these
|
||||
archives.
|
||||
|
||||
Linux binaries are static executables. Windows binaries are available either as
|
||||
built with MinGW (GNU) or with Microsoft Visual C++ (MSVC). When possible,
|
||||
prefer MSVC over GNU, but you'll need to have the [Microsoft VC++ 2015
|
||||
redistributable](https://www.microsoft.com/en-us/download/details.aspx?id=48145)
|
||||
installed.
|
||||
|
||||
If you're a **macOS Homebrew** or a **Linuxbrew** user, then you can install
|
||||
ripgrep from homebrew-core:
|
||||
@@ -419,18 +425,9 @@ $ cargo test --all
|
||||
from the repository root.
|
||||
|
||||
|
||||
### Vulnerability reporting
|
||||
|
||||
For reporting a security vulnerability, please
|
||||
[contact Andrew Gallant](https://blog.burntsushi.net/about/),
|
||||
which has my email address and PGP public key if you wish to send an encrypted
|
||||
message.
|
||||
|
||||
|
||||
### Translations
|
||||
|
||||
The following is a list of known translations of ripgrep's documentation. These
|
||||
are unofficially maintained and may not be up to date.
|
||||
|
||||
* [Chinese](https://github.com/chinanf-boy/ripgrep-zh#%E6%9B%B4%E6%96%B0-)
|
||||
* [Spanish](https://github.com/UltiRequiem/traducciones/tree/master/ripgrep)
|
||||
|
@@ -1,11 +1,9 @@
|
||||
Release Checklist
|
||||
-----------------
|
||||
* Ensure local `master` is up to date with respect to `origin/master`.
|
||||
* Run `cargo update` and review dependency updates. Commit updated
|
||||
`Cargo.lock`.
|
||||
* Run `cargo outdated` and review semver incompatible updates. Unless there is
|
||||
a strong motivation otherwise, review and update every dependency. Also
|
||||
run `--aggressive`, but don't update to crates that are still in beta.
|
||||
a strong motivation otherwise, review and update every dependency.
|
||||
* Review changes for every crate in `crates` since the last ripgrep release.
|
||||
If the set of changes is non-empty, issue a new release for that crate. Check
|
||||
crates in the following order. After updating a crate, ensure minimal
|
||||
@@ -31,9 +29,9 @@ Release Checklist
|
||||
* Copy the relevant section of the CHANGELOG to the tagged release notes.
|
||||
Include this blurb describing what ripgrep is:
|
||||
> In case you haven't heard of it before, ripgrep is a line-oriented search
|
||||
> tool that recursively searches the current directory for a regex pattern.
|
||||
> By default, ripgrep will respect gitignore rules and automatically skip
|
||||
> hidden files/directories and binary files.
|
||||
> tool that recursively searches your current directory for a regex pattern.
|
||||
> By default, ripgrep will respect your gitignore rules and automatically
|
||||
> skip hidden files/directories and binary files.
|
||||
* Run `ci/build-deb` locally and manually upload the deb package to the
|
||||
release.
|
||||
* Run `cargo publish`.
|
||||
|
@@ -17,21 +17,16 @@ if ! command -V cargo-deb > /dev/null 2>&1; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if ! command -V asciidoctor > /dev/null 2>&1; then
|
||||
echo "asciidoctor command missing" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# 'cargo deb' does not seem to provide a way to specify an asset that is
|
||||
# created at build time, such as ripgrep's man page. To work around this,
|
||||
# we force a debug build, copy out the man page (and shell completions)
|
||||
# produced from that build, put it into a predictable location and then build
|
||||
# the deb, which knows where to look.
|
||||
cargo build
|
||||
|
||||
DEPLOY_DIR=deployment/deb
|
||||
OUT_DIR="$("$D"/cargo-out-dir target/debug/)"
|
||||
mkdir -p "$DEPLOY_DIR"
|
||||
cargo build
|
||||
|
||||
# Copy man page and shell completions.
|
||||
cp "$OUT_DIR"/{rg.1,rg.bash,rg.fish} "$DEPLOY_DIR/"
|
||||
|
@@ -44,8 +44,8 @@ main() {
|
||||
# Occasionally we may have to handle some manually, however
|
||||
help_args=( ${(f)"$(
|
||||
$rg --help |
|
||||
$rg -i -- '^\s+--?[a-z0-9.]|--[a-z]' |
|
||||
$rg -ior '$1' -- $'[\t /\"\'`.,](-[a-z0-9.]|--[a-z0-9-]+)(,|\\b)' |
|
||||
$rg -i -- '^\s+--?[a-z0-9]|--[a-z]' |
|
||||
$rg -ior '$1' -- $'[\t /\"\'`.,](-[a-z0-9]|--[a-z0-9-]+)\\b' |
|
||||
$rg -v -- --print0 | # False positives
|
||||
sort -u
|
||||
)"} )
|
||||
|
@@ -121,7 +121,7 @@ _rg() {
|
||||
"(pretty-vimgrep)--no-heading[don't show matches grouped by file name]"
|
||||
|
||||
+ '(hidden)' # Hidden-file options
|
||||
{-.,--hidden}'[search hidden files and directories]'
|
||||
'--hidden[search hidden files and directories]'
|
||||
$no"--no-hidden[don't search hidden files and directories]"
|
||||
|
||||
+ '(hybrid)' # hybrid regex options
|
||||
@@ -303,8 +303,6 @@ _rg() {
|
||||
'--context-separator=[specify string used to separate non-continuous context lines in output]:separator'
|
||||
$no"--no-context-separator[don't print context separators]"
|
||||
'--debug[show debug messages]'
|
||||
'--field-context-separator[set string to delimit fields in context lines]'
|
||||
'--field-match-separator[set string to delimit fields in matching lines]'
|
||||
'--trace[show more verbose debug messages]'
|
||||
'--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size (bytes)'
|
||||
"(1 stats)--files[show each file that would be searched (but don't search)]"
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep-cli"
|
||||
version = "0.1.6" #:version
|
||||
version = "0.1.5" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Utilities for search oriented command line applications.
|
||||
@@ -11,12 +11,11 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "cli", "utility", "util"]
|
||||
license = "Unlicense/MIT"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
atty = "0.2.11"
|
||||
bstr = "0.2.0"
|
||||
globset = { version = "0.4.7", path = "../globset" }
|
||||
globset = { version = "0.4.5", path = "../globset" }
|
||||
lazy_static = "1.1.0"
|
||||
log = "0.4.5"
|
||||
regex = "1.1"
|
||||
|
@@ -29,3 +29,9 @@ Add this to your `Cargo.toml`:
|
||||
[dependencies]
|
||||
grep-cli = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_cli;
|
||||
```
|
||||
|
@@ -6,7 +6,7 @@ use std::process::Command;
|
||||
|
||||
use globset::{Glob, GlobSet, GlobSetBuilder};
|
||||
|
||||
use crate::process::{CommandError, CommandReader, CommandReaderBuilder};
|
||||
use process::{CommandError, CommandReader, CommandReaderBuilder};
|
||||
|
||||
/// A builder for a matcher that determines which files get decompressed.
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -230,7 +230,7 @@ impl DecompressionReaderBuilder {
|
||||
match self.command_builder.build(&mut cmd) {
|
||||
Ok(cmd_reader) => Ok(DecompressionReader { rdr: Ok(cmd_reader) }),
|
||||
Err(err) => {
|
||||
log::debug!(
|
||||
debug!(
|
||||
"{}: error spawning command '{:?}': {} \
|
||||
(falling back to uncompressed reader)",
|
||||
path.display(),
|
||||
@@ -366,30 +366,6 @@ impl DecompressionReader {
|
||||
let file = File::open(path)?;
|
||||
Ok(DecompressionReader { rdr: Err(file) })
|
||||
}
|
||||
|
||||
/// Closes this reader, freeing any resources used by its underlying child
|
||||
/// process, if one was used. If the child process exits with a nonzero
|
||||
/// exit code, the returned Err value will include its stderr.
|
||||
///
|
||||
/// `close` is idempotent, meaning it can be safely called multiple times.
|
||||
/// The first call closes the CommandReader and any subsequent calls do
|
||||
/// nothing.
|
||||
///
|
||||
/// This method should be called after partially reading a file to prevent
|
||||
/// resource leakage. However there is no need to call `close` explicitly
|
||||
/// if your code always calls `read` to EOF, as `read` takes care of
|
||||
/// calling `close` in this case.
|
||||
///
|
||||
/// `close` is also called in `drop` as a last line of defense against
|
||||
/// resource leakage. Any error from the child process is then printed as a
|
||||
/// warning to stderr. This can be avoided by explictly calling `close`
|
||||
/// before the CommandReader is dropped.
|
||||
pub fn close(&mut self) -> io::Result<()> {
|
||||
match self.rdr {
|
||||
Ok(ref mut rdr) => rdr.close(),
|
||||
Err(_) => Ok(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Read for DecompressionReader {
|
||||
@@ -479,7 +455,7 @@ fn default_decompression_commands() -> Vec<DecompressionCommand> {
|
||||
let bin = match resolve_binary(Path::new(args[0])) {
|
||||
Ok(bin) => bin,
|
||||
Err(err) => {
|
||||
log::debug!("{}", err);
|
||||
debug!("{}", err);
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
@@ -52,7 +52,7 @@ impl error::Error for ParseSizeError {
|
||||
}
|
||||
|
||||
impl fmt::Display for ParseSizeError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
use self::ParseSizeErrorKind::*;
|
||||
|
||||
match self.kind {
|
||||
@@ -88,7 +88,7 @@ impl From<ParseSizeError> for io::Error {
|
||||
///
|
||||
/// Additional suffixes may be added over time.
|
||||
pub fn parse_human_readable_size(size: &str) -> Result<u64, ParseSizeError> {
|
||||
lazy_static::lazy_static! {
|
||||
lazy_static! {
|
||||
// Normally I'd just parse something this simple by hand to avoid the
|
||||
// regex dep, but we bring regex in any way for glob matching, so might
|
||||
// as well use it.
|
||||
|
@@ -158,6 +158,19 @@ error message is crafted that typically tells the user how to fix the problem.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate atty;
|
||||
extern crate bstr;
|
||||
extern crate globset;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate regex;
|
||||
extern crate same_file;
|
||||
extern crate termcolor;
|
||||
#[cfg(windows)]
|
||||
extern crate winapi_util;
|
||||
|
||||
mod decompress;
|
||||
mod escape;
|
||||
mod human;
|
||||
@@ -165,18 +178,18 @@ mod pattern;
|
||||
mod process;
|
||||
mod wtr;
|
||||
|
||||
pub use crate::decompress::{
|
||||
pub use decompress::{
|
||||
resolve_binary, DecompressionMatcher, DecompressionMatcherBuilder,
|
||||
DecompressionReader, DecompressionReaderBuilder,
|
||||
};
|
||||
pub use crate::escape::{escape, escape_os, unescape, unescape_os};
|
||||
pub use crate::human::{parse_human_readable_size, ParseSizeError};
|
||||
pub use crate::pattern::{
|
||||
pub use escape::{escape, escape_os, unescape, unescape_os};
|
||||
pub use human::{parse_human_readable_size, ParseSizeError};
|
||||
pub use pattern::{
|
||||
pattern_from_bytes, pattern_from_os, patterns_from_path,
|
||||
patterns_from_reader, patterns_from_stdin, InvalidPatternError,
|
||||
};
|
||||
pub use crate::process::{CommandError, CommandReader, CommandReaderBuilder};
|
||||
pub use crate::wtr::{
|
||||
pub use process::{CommandError, CommandReader, CommandReaderBuilder};
|
||||
pub use wtr::{
|
||||
stdout, stdout_buffered_block, stdout_buffered_line, StandardStream,
|
||||
};
|
||||
|
||||
|
@@ -8,7 +8,7 @@ use std::str;
|
||||
|
||||
use bstr::io::BufReadExt;
|
||||
|
||||
use crate::escape::{escape, escape_os};
|
||||
use escape::{escape, escape_os};
|
||||
|
||||
/// An error that occurs when a pattern could not be converted to valid UTF-8.
|
||||
///
|
||||
@@ -35,7 +35,7 @@ impl error::Error for InvalidPatternError {
|
||||
}
|
||||
|
||||
impl fmt::Display for InvalidPatternError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(
|
||||
f,
|
||||
"found invalid UTF-8 in pattern at byte offset {}: {} \
|
||||
|
@@ -30,14 +30,6 @@ impl CommandError {
|
||||
pub(crate) fn stderr(bytes: Vec<u8>) -> CommandError {
|
||||
CommandError { kind: CommandErrorKind::Stderr(bytes) }
|
||||
}
|
||||
|
||||
/// Returns true if and only if this error has empty data from stderr.
|
||||
pub(crate) fn is_empty(&self) -> bool {
|
||||
match self.kind {
|
||||
CommandErrorKind::Stderr(ref bytes) => bytes.is_empty(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl error::Error for CommandError {
|
||||
@@ -47,7 +39,7 @@ impl error::Error for CommandError {
|
||||
}
|
||||
|
||||
impl fmt::Display for CommandError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.kind {
|
||||
CommandErrorKind::Io(ref e) => e.fmt(f),
|
||||
CommandErrorKind::Stderr(ref bytes) => {
|
||||
@@ -115,12 +107,18 @@ impl CommandReaderBuilder {
|
||||
.stdout(process::Stdio::piped())
|
||||
.stderr(process::Stdio::piped())
|
||||
.spawn()?;
|
||||
let stdout = child.stdout.take().unwrap();
|
||||
let stderr = if self.async_stderr {
|
||||
StderrReader::r#async(child.stderr.take().unwrap())
|
||||
StderrReader::async(child.stderr.take().unwrap())
|
||||
} else {
|
||||
StderrReader::sync(child.stderr.take().unwrap())
|
||||
};
|
||||
Ok(CommandReader { child, stderr, eof: false })
|
||||
Ok(CommandReader {
|
||||
child: child,
|
||||
stdout: stdout,
|
||||
stderr: stderr,
|
||||
done: false,
|
||||
})
|
||||
}
|
||||
|
||||
/// When enabled, the reader will asynchronously read the contents of the
|
||||
@@ -177,11 +175,9 @@ impl CommandReaderBuilder {
|
||||
#[derive(Debug)]
|
||||
pub struct CommandReader {
|
||||
child: process::Child,
|
||||
stdout: process::ChildStdout,
|
||||
stderr: StderrReader,
|
||||
/// This is set to true once 'read' returns zero bytes. When this isn't
|
||||
/// set and we close the reader, then we anticipate a pipe error when
|
||||
/// reaping the child process and silence it.
|
||||
eof: bool,
|
||||
done: bool,
|
||||
}
|
||||
|
||||
impl CommandReader {
|
||||
@@ -205,73 +201,23 @@ impl CommandReader {
|
||||
) -> Result<CommandReader, CommandError> {
|
||||
CommandReaderBuilder::new().build(cmd)
|
||||
}
|
||||
|
||||
/// Closes the CommandReader, freeing any resources used by its underlying
|
||||
/// child process. If the child process exits with a nonzero exit code, the
|
||||
/// returned Err value will include its stderr.
|
||||
///
|
||||
/// `close` is idempotent, meaning it can be safely called multiple times.
|
||||
/// The first call closes the CommandReader and any subsequent calls do
|
||||
/// nothing.
|
||||
///
|
||||
/// This method should be called after partially reading a file to prevent
|
||||
/// resource leakage. However there is no need to call `close` explicitly
|
||||
/// if your code always calls `read` to EOF, as `read` takes care of
|
||||
/// calling `close` in this case.
|
||||
///
|
||||
/// `close` is also called in `drop` as a last line of defense against
|
||||
/// resource leakage. Any error from the child process is then printed as a
|
||||
/// warning to stderr. This can be avoided by explictly calling `close`
|
||||
/// before the CommandReader is dropped.
|
||||
pub fn close(&mut self) -> io::Result<()> {
|
||||
// Dropping stdout closes the underlying file descriptor, which should
|
||||
// cause a well-behaved child process to exit. If child.stdout is None
|
||||
// we assume that close() has already been called and do nothing.
|
||||
let stdout = match self.child.stdout.take() {
|
||||
None => return Ok(()),
|
||||
Some(stdout) => stdout,
|
||||
};
|
||||
drop(stdout);
|
||||
if self.child.wait()?.success() {
|
||||
Ok(())
|
||||
} else {
|
||||
let err = self.stderr.read_to_end();
|
||||
// In the specific case where we haven't consumed the full data
|
||||
// from the child process, then closing stdout above results in
|
||||
// a pipe signal being thrown in most cases. But I don't think
|
||||
// there is any reliable and portable way of detecting it. Instead,
|
||||
// if we know we haven't hit EOF (so we anticipate a broken pipe
|
||||
// error) and if stderr otherwise doesn't have anything on it, then
|
||||
// we assume total success.
|
||||
if !self.eof && err.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
Err(io::Error::from(err))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Drop for CommandReader {
|
||||
fn drop(&mut self) {
|
||||
if let Err(error) = self.close() {
|
||||
log::warn!("{}", error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Read for CommandReader {
|
||||
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
let stdout = match self.child.stdout {
|
||||
None => return Ok(0),
|
||||
Some(ref mut stdout) => stdout,
|
||||
};
|
||||
let nread = stdout.read(buf)?;
|
||||
if nread == 0 {
|
||||
self.eof = true;
|
||||
self.close().map(|_| 0)
|
||||
} else {
|
||||
Ok(nread)
|
||||
if self.done {
|
||||
return Ok(0);
|
||||
}
|
||||
let nread = self.stdout.read(buf)?;
|
||||
if nread == 0 {
|
||||
self.done = true;
|
||||
// Reap the child now that we're done reading. If the command
|
||||
// failed, report stderr as an error.
|
||||
if !self.child.wait()?.success() {
|
||||
return Err(io::Error::from(self.stderr.read_to_end()));
|
||||
}
|
||||
}
|
||||
Ok(nread)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -285,7 +231,7 @@ enum StderrReader {
|
||||
|
||||
impl StderrReader {
|
||||
/// Create a reader for stderr that reads contents asynchronously.
|
||||
fn r#async(mut stderr: process::ChildStderr) -> StderrReader {
|
||||
fn async(mut stderr: process::ChildStderr) -> StderrReader {
|
||||
let handle =
|
||||
thread::spawn(move || stderr_to_command_error(&mut stderr));
|
||||
StderrReader::Async(Some(handle))
|
||||
|
@@ -2,7 +2,7 @@ use std::io;
|
||||
|
||||
use termcolor;
|
||||
|
||||
use crate::is_tty_stdout;
|
||||
use is_tty_stdout;
|
||||
|
||||
/// A writer that supports coloring with either line or block buffering.
|
||||
pub struct StandardStream(StandardStreamKind);
|
||||
|
@@ -13,8 +13,8 @@ use clap::{self, crate_authors, crate_version, App, AppSettings};
|
||||
use lazy_static::lazy_static;
|
||||
|
||||
const ABOUT: &str = "
|
||||
ripgrep (rg) recursively searches the current directory for a regex pattern.
|
||||
By default, ripgrep will respect gitignore rules and automatically skip hidden
|
||||
ripgrep (rg) recursively searches your current directory for a regex pattern.
|
||||
By default, ripgrep will respect your .gitignore and automatically skip hidden
|
||||
files/directories and binary files.
|
||||
|
||||
Use -h for short descriptions and --help for more details.
|
||||
@@ -568,8 +568,6 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
|
||||
flag_dfa_size_limit(&mut args);
|
||||
flag_encoding(&mut args);
|
||||
flag_engine(&mut args);
|
||||
flag_field_context_separator(&mut args);
|
||||
flag_field_match_separator(&mut args);
|
||||
flag_file(&mut args);
|
||||
flag_files(&mut args);
|
||||
flag_files_with_matches(&mut args);
|
||||
@@ -698,7 +696,7 @@ fn flag_after_context(args: &mut Vec<RGArg>) {
|
||||
"\
|
||||
Show NUM lines after each match.
|
||||
|
||||
This overrides the --context and --passthru flags.
|
||||
This overrides the --context flag.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::flag("after-context", "NUM")
|
||||
@@ -706,7 +704,6 @@ This overrides the --context and --passthru flags.
|
||||
.help(SHORT)
|
||||
.long_help(LONG)
|
||||
.number()
|
||||
.overrides("passthru")
|
||||
.overrides("context");
|
||||
args.push(arg);
|
||||
}
|
||||
@@ -768,7 +765,7 @@ fn flag_before_context(args: &mut Vec<RGArg>) {
|
||||
"\
|
||||
Show NUM lines before each match.
|
||||
|
||||
This overrides the --context and --passthru flags.
|
||||
This overrides the --context flag.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::flag("before-context", "NUM")
|
||||
@@ -776,7 +773,6 @@ This overrides the --context and --passthru flags.
|
||||
.help(SHORT)
|
||||
.long_help(LONG)
|
||||
.number()
|
||||
.overrides("passthru")
|
||||
.overrides("context");
|
||||
args.push(arg);
|
||||
}
|
||||
@@ -1009,8 +1005,7 @@ fn flag_context(args: &mut Vec<RGArg>) {
|
||||
Show NUM lines before and after each match. This is equivalent to providing
|
||||
both the -B/--before-context and -A/--after-context flags with the same value.
|
||||
|
||||
This overrides both the -B/--before-context and -A/--after-context flags,
|
||||
in addition to the --passthru flag.
|
||||
This overrides both the -B/--before-context and -A/--after-context flags.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::flag("context", "NUM")
|
||||
@@ -1018,7 +1013,6 @@ in addition to the --passthru flag.
|
||||
.help(SHORT)
|
||||
.long_help(LONG)
|
||||
.number()
|
||||
.overrides("passthru")
|
||||
.overrides("before-context")
|
||||
.overrides("after-context");
|
||||
args.push(arg);
|
||||
@@ -1057,13 +1051,11 @@ fn flag_count(args: &mut Vec<RGArg>) {
|
||||
This flag suppresses normal output and shows the number of lines that match
|
||||
the given patterns for each file searched. Each file containing a match has its
|
||||
path and count printed on each line. Note that this reports the number of lines
|
||||
that match and not the total number of matches, unless -U/--multiline is
|
||||
enabled. In multiline mode, --count is equivalent to --count-matches.
|
||||
that match and not the total number of matches.
|
||||
|
||||
If only one file is given to ripgrep, then only the count is printed if there
|
||||
is a match. The --with-filename flag can be used to force printing the file
|
||||
path in this case. If you need a count to be printed regardless of whether
|
||||
there is a match, then use --include-zero.
|
||||
path in this case.
|
||||
|
||||
This overrides the --count-matches flag. Note that when --count is combined
|
||||
with --only-matching, then ripgrep behaves as if --count-matches was given.
|
||||
@@ -1217,7 +1209,7 @@ between supported regex engines depending on the features used in a pattern on
|
||||
a best effort basis.
|
||||
|
||||
Note that the 'pcre2' engine is an optional ripgrep feature. If PCRE2 wasn't
|
||||
included in your build of ripgrep, then using this flag will result in ripgrep
|
||||
including in your build of ripgrep, then using this flag will result in ripgrep
|
||||
printing an error message and exiting.
|
||||
|
||||
This overrides previous uses of --pcre2 and --auto-hybrid-regex flags.
|
||||
@@ -1235,38 +1227,6 @@ This overrides previous uses of --pcre2 and --auto-hybrid-regex flags.
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_field_context_separator(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Set the field context separator.";
|
||||
const LONG: &str = long!(
|
||||
"\
|
||||
Set the field context separator, which is used to delimit file paths, line
|
||||
numbers, columns and the context itself, when printing contextual lines. The
|
||||
separator may be any number of bytes, including zero. Escape sequences like
|
||||
\\x7F or \\t may be used. The default value is -.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::flag("field-context-separator", "SEPARATOR")
|
||||
.help(SHORT)
|
||||
.long_help(LONG);
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_field_match_separator(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Set the match separator.";
|
||||
const LONG: &str = long!(
|
||||
"\
|
||||
Set the field match separator, which is used to delimit file paths, line
|
||||
numbers, columns and the match itself. The separator may be any number of
|
||||
bytes, including zero. Escape sequences like \\x7F or \\t may be used. The
|
||||
default value is -.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::flag("field-match-separator", "SEPARATOR")
|
||||
.help(SHORT)
|
||||
.long_help(LONG);
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_file(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Search for patterns from the given file.";
|
||||
const LONG: &str = long!(
|
||||
@@ -1306,10 +1266,10 @@ This is useful to determine whether a particular file is being searched or not.
|
||||
}
|
||||
|
||||
fn flag_files_with_matches(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Print the paths with at least one match.";
|
||||
const SHORT: &str = "Only print the paths with at least one match.";
|
||||
const LONG: &str = long!(
|
||||
"\
|
||||
Print the paths with at least one match and suppress match contents.
|
||||
Only print the paths with at least one match.
|
||||
|
||||
This overrides --files-without-match.
|
||||
"
|
||||
@@ -1323,11 +1283,11 @@ This overrides --files-without-match.
|
||||
}
|
||||
|
||||
fn flag_files_without_match(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Print the paths that contain zero matches.";
|
||||
const SHORT: &str = "Only print the paths that contain zero matches.";
|
||||
const LONG: &str = long!(
|
||||
"\
|
||||
Print the paths that contain zero matches and suppress match contents. This
|
||||
inverts/negates the --files-with-matches flag.
|
||||
Only print the paths that contain zero matches. This inverts/negates the
|
||||
--files-with-matches flag.
|
||||
|
||||
This overrides --files-with-matches.
|
||||
"
|
||||
@@ -1394,13 +1354,6 @@ used. Globbing rules match .gitignore globs. Precede a glob with a ! to exclude
|
||||
it. If multiple globs match a file or directory, the glob given later in the
|
||||
command line takes precedence.
|
||||
|
||||
As an extension, globs support specifying alternatives: *-g ab{c,d}* is
|
||||
equivalet to *-g abc -g abd*. Empty alternatives like *-g ab{,c}* are not
|
||||
currently supported. Note that this syntax extension is also currently enabled
|
||||
in gitignore files, even though this syntax isn't supported by git itself.
|
||||
ripgrep may disable this syntax extension in gitignore files, but it will
|
||||
always remain available via the -g/--glob flag.
|
||||
|
||||
When this flag is set, every file and directory is applied to it to test for
|
||||
a match. So for example, if you only want to search in a particular directory
|
||||
'foo', then *-g foo* is incorrect because 'foo/bar' does not match the glob
|
||||
@@ -1480,15 +1433,10 @@ Search hidden files and directories. By default, hidden files and directories
|
||||
are skipped. Note that if a hidden file or a directory is whitelisted in an
|
||||
ignore file, then it will be searched even if this flag isn't provided.
|
||||
|
||||
A file or directory is considered hidden if its base name starts with a dot
|
||||
character ('.'). On operating systems which support a `hidden` file attribute,
|
||||
like Windows, files with this attribute are also considered hidden.
|
||||
|
||||
This flag can be disabled with --no-hidden.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::switch("hidden")
|
||||
.short(".")
|
||||
.help(SHORT)
|
||||
.long_help(LONG)
|
||||
.overrides("no-hidden");
|
||||
@@ -2023,9 +1971,6 @@ fn flag_no_ignore_dot(args: &mut Vec<RGArg>) {
|
||||
"\
|
||||
Don't respect .ignore files.
|
||||
|
||||
This does *not* affect whether ripgrep will ignore files and directories
|
||||
whose names begin with a dot. For that, see the -./--hidden flag.
|
||||
|
||||
This flag can be disabled with the --ignore-dot flag.
|
||||
"
|
||||
);
|
||||
@@ -2396,17 +2341,12 @@ the empty string. For example, if you are searching using 'rg foo' then using
|
||||
'rg \"^|foo\"' instead will emit every line in every file searched, but only
|
||||
occurrences of 'foo' will be highlighted. This flag enables the same behavior
|
||||
without needing to modify the pattern.
|
||||
|
||||
This overrides the --context, --after-context and --before-context flags.
|
||||
"
|
||||
);
|
||||
let arg = RGArg::switch("passthru")
|
||||
.help(SHORT)
|
||||
.long_help(LONG)
|
||||
.alias("passthrough")
|
||||
.overrides("after-context")
|
||||
.overrides("before-context")
|
||||
.overrides("context");
|
||||
.alias("passthrough");
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
@@ -3027,8 +2967,8 @@ fn flag_unrestricted(args: &mut Vec<RGArg>) {
|
||||
"\
|
||||
Reduce the level of \"smart\" searching. A single -u won't respect .gitignore
|
||||
(etc.) files (--no-ignore). Two -u flags will additionally search hidden files
|
||||
and directories (-./--hidden). Three -u flags will additionally search binary
|
||||
files (--binary).
|
||||
and directories (--hidden). Three -u flags will additionally search binary files
|
||||
(--binary).
|
||||
|
||||
'rg -uuu' is roughly equivalent to 'grep -r'.
|
||||
"
|
||||
|
@@ -186,7 +186,7 @@ impl Args {
|
||||
/// Returns true if and only if `paths` had to be populated with a default
|
||||
/// path, which occurs only when no paths were given as command line
|
||||
/// arguments.
|
||||
pub fn using_default_path(&self) -> bool {
|
||||
fn using_default_path(&self) -> bool {
|
||||
self.0.using_default_path
|
||||
}
|
||||
|
||||
@@ -777,7 +777,6 @@ impl ArgMatches {
|
||||
.path(self.with_filename(paths))
|
||||
.only_matching(self.is_present("only-matching"))
|
||||
.per_match(self.is_present("vimgrep"))
|
||||
.per_match_one_line(true)
|
||||
.replacement(self.replacement())
|
||||
.max_columns(self.max_columns()?)
|
||||
.max_columns_preview(self.max_columns_preview())
|
||||
@@ -787,8 +786,8 @@ impl ArgMatches {
|
||||
.trim_ascii(self.is_present("trim"))
|
||||
.separator_search(None)
|
||||
.separator_context(self.context_separator())
|
||||
.separator_field_match(self.field_match_separator())
|
||||
.separator_field_context(self.field_context_separator())
|
||||
.separator_field_match(b":".to_vec())
|
||||
.separator_field_context(b"-".to_vec())
|
||||
.separator_path(self.path_separator()?)
|
||||
.path_terminator(self.path_terminator());
|
||||
if separator_search {
|
||||
@@ -1378,24 +1377,6 @@ impl ArgMatches {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the unescaped field context separator. If one wasn't specified,
|
||||
/// then '-' is used as the default.
|
||||
fn field_context_separator(&self) -> Vec<u8> {
|
||||
match self.value_of_os("field-context-separator") {
|
||||
None => b"-".to_vec(),
|
||||
Some(sep) => cli::unescape_os(&sep),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the unescaped field match separator. If one wasn't specified,
|
||||
/// then ':' is used as the default.
|
||||
fn field_match_separator(&self) -> Vec<u8> {
|
||||
match self.value_of_os("field-match-separator") {
|
||||
None => b":".to_vec(),
|
||||
Some(sep) => cli::unescape_os(&sep),
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a sequence of all available patterns from the command line.
|
||||
/// This includes reading the -e/--regexp and -f/--file flags.
|
||||
///
|
||||
@@ -1720,7 +1701,7 @@ impl ArgMatches {
|
||||
self.0.value_of_os(name)
|
||||
}
|
||||
|
||||
fn values_of_os(&self, name: &str) -> Option<clap::OsValues<'_>> {
|
||||
fn values_of_os(&self, name: &str) -> Option<clap::OsValues> {
|
||||
self.0.values_of_os(name)
|
||||
}
|
||||
}
|
||||
|
@@ -24,13 +24,13 @@ impl Logger {
|
||||
}
|
||||
|
||||
impl Log for Logger {
|
||||
fn enabled(&self, _: &log::Metadata<'_>) -> bool {
|
||||
fn enabled(&self, _: &log::Metadata) -> bool {
|
||||
// We set the log level via log::set_max_level, so we don't need to
|
||||
// implement filtering here.
|
||||
true
|
||||
}
|
||||
|
||||
fn log(&self, record: &log::Record<'_>) {
|
||||
fn log(&self, record: &log::Record) {
|
||||
match (record.file(), record.line()) {
|
||||
(Some(file), Some(line)) => {
|
||||
eprintln!(
|
||||
|
@@ -83,14 +83,12 @@ fn search(args: &Args) -> Result<bool> {
|
||||
let mut stats = args.stats()?;
|
||||
let mut searcher = args.search_worker(args.stdout())?;
|
||||
let mut matched = false;
|
||||
let mut searched = false;
|
||||
|
||||
for result in args.walker()? {
|
||||
let subject = match subject_builder.build_from_result(result) {
|
||||
Some(subject) => subject,
|
||||
None => continue,
|
||||
};
|
||||
searched = true;
|
||||
let search_result = match searcher.search(&subject) {
|
||||
Ok(search_result) => search_result,
|
||||
Err(err) => {
|
||||
@@ -110,9 +108,6 @@ fn search(args: &Args) -> Result<bool> {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if args.using_default_path() && !searched {
|
||||
eprint_nothing_searched();
|
||||
}
|
||||
if let Some(ref stats) = stats {
|
||||
let elapsed = Instant::now().duration_since(started_at);
|
||||
// We don't care if we couldn't print this successfully.
|
||||
@@ -134,13 +129,11 @@ fn search_parallel(args: &Args) -> Result<bool> {
|
||||
let bufwtr = args.buffer_writer()?;
|
||||
let stats = args.stats()?.map(Mutex::new);
|
||||
let matched = AtomicBool::new(false);
|
||||
let searched = AtomicBool::new(false);
|
||||
let mut searcher_err = None;
|
||||
args.walker_parallel()?.run(|| {
|
||||
let bufwtr = &bufwtr;
|
||||
let stats = &stats;
|
||||
let matched = &matched;
|
||||
let searched = &searched;
|
||||
let subject_builder = &subject_builder;
|
||||
let mut searcher = match args.search_worker(bufwtr.buffer()) {
|
||||
Ok(searcher) => searcher,
|
||||
@@ -155,7 +148,6 @@ fn search_parallel(args: &Args) -> Result<bool> {
|
||||
Some(subject) => subject,
|
||||
None => return WalkState::Continue,
|
||||
};
|
||||
searched.store(true, SeqCst);
|
||||
searcher.printer().get_mut().clear();
|
||||
let search_result = match searcher.search(&subject) {
|
||||
Ok(search_result) => search_result,
|
||||
@@ -189,9 +181,6 @@ fn search_parallel(args: &Args) -> Result<bool> {
|
||||
if let Some(err) = searcher_err.take() {
|
||||
return Err(err);
|
||||
}
|
||||
if args.using_default_path() && !searched.load(SeqCst) {
|
||||
eprint_nothing_searched();
|
||||
}
|
||||
if let Some(ref locked_stats) = stats {
|
||||
let elapsed = Instant::now().duration_since(started_at);
|
||||
let stats = locked_stats.lock().unwrap();
|
||||
@@ -202,14 +191,6 @@ fn search_parallel(args: &Args) -> Result<bool> {
|
||||
Ok(matched.load(SeqCst))
|
||||
}
|
||||
|
||||
fn eprint_nothing_searched() {
|
||||
err_message!(
|
||||
"No files were searched, which means ripgrep probably \
|
||||
applied a filter you didn't expect.\n\
|
||||
Running with --debug will show why files are being skipped."
|
||||
);
|
||||
}
|
||||
|
||||
/// The top-level entry point for listing files without searching them. This
|
||||
/// recursively steps through the file list (current directory by default) and
|
||||
/// prints each path sequentially using a single thread.
|
||||
|
@@ -330,12 +330,11 @@ impl<W: WriteColor> SearchWorker<W> {
|
||||
} else {
|
||||
self.config.binary_implicit.clone()
|
||||
};
|
||||
let path = subject.path();
|
||||
log::trace!("{}: binary detection: {:?}", path.display(), bin);
|
||||
|
||||
self.searcher.set_binary_detection(bin);
|
||||
|
||||
let path = subject.path();
|
||||
if subject.is_stdin() {
|
||||
self.search_reader(path, &mut io::stdin().lock())
|
||||
self.search_reader(path, io::stdin().lock())
|
||||
} else if self.should_preprocess(path) {
|
||||
self.search_preprocessor(path)
|
||||
} else if self.should_decompress(path) {
|
||||
@@ -399,7 +398,7 @@ impl<W: WriteColor> SearchWorker<W> {
|
||||
let mut cmd = Command::new(bin);
|
||||
cmd.arg(path).stdin(Stdio::from(File::open(path)?));
|
||||
|
||||
let mut rdr = self.command_builder.build(&mut cmd).map_err(|err| {
|
||||
let rdr = self.command_builder.build(&mut cmd).map_err(|err| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!(
|
||||
@@ -408,28 +407,20 @@ impl<W: WriteColor> SearchWorker<W> {
|
||||
),
|
||||
)
|
||||
})?;
|
||||
let result = self.search_reader(path, &mut rdr).map_err(|err| {
|
||||
self.search_reader(path, rdr).map_err(|err| {
|
||||
io::Error::new(
|
||||
io::ErrorKind::Other,
|
||||
format!("preprocessor command failed: '{:?}': {}", cmd, err),
|
||||
)
|
||||
});
|
||||
let close_result = rdr.close();
|
||||
let search_result = result?;
|
||||
close_result?;
|
||||
Ok(search_result)
|
||||
})
|
||||
}
|
||||
|
||||
/// Attempt to decompress the data at the given file path and search the
|
||||
/// result. If the given file path isn't recognized as a compressed file,
|
||||
/// then search it without doing any decompression.
|
||||
fn search_decompress(&mut self, path: &Path) -> io::Result<SearchResult> {
|
||||
let mut rdr = self.decomp_builder.build(path)?;
|
||||
let result = self.search_reader(path, &mut rdr);
|
||||
let close_result = rdr.close();
|
||||
let search_result = result?;
|
||||
close_result?;
|
||||
Ok(search_result)
|
||||
let rdr = self.decomp_builder.build(path)?;
|
||||
self.search_reader(path, rdr)
|
||||
}
|
||||
|
||||
/// Search the contents of the given file path.
|
||||
@@ -456,7 +447,7 @@ impl<W: WriteColor> SearchWorker<W> {
|
||||
fn search_reader<R: io::Read>(
|
||||
&mut self,
|
||||
path: &Path,
|
||||
rdr: &mut R,
|
||||
rdr: R,
|
||||
) -> io::Result<SearchResult> {
|
||||
use self::PatternMatcher::*;
|
||||
|
||||
@@ -512,12 +503,12 @@ fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
|
||||
searcher: &mut Searcher,
|
||||
printer: &mut Printer<W>,
|
||||
path: &Path,
|
||||
mut rdr: R,
|
||||
rdr: R,
|
||||
) -> io::Result<SearchResult> {
|
||||
match *printer {
|
||||
Printer::Standard(ref mut p) => {
|
||||
let mut sink = p.sink_with_path(&matcher, path);
|
||||
searcher.search_reader(&matcher, &mut rdr, &mut sink)?;
|
||||
searcher.search_reader(&matcher, rdr, &mut sink)?;
|
||||
Ok(SearchResult {
|
||||
has_match: sink.has_match(),
|
||||
stats: sink.stats().map(|s| s.clone()),
|
||||
@@ -525,7 +516,7 @@ fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
|
||||
}
|
||||
Printer::Summary(ref mut p) => {
|
||||
let mut sink = p.sink_with_path(&matcher, path);
|
||||
searcher.search_reader(&matcher, &mut rdr, &mut sink)?;
|
||||
searcher.search_reader(&matcher, rdr, &mut sink)?;
|
||||
Ok(SearchResult {
|
||||
has_match: sink.has_match(),
|
||||
stats: sink.stats().map(|s| s.clone()),
|
||||
@@ -533,7 +524,7 @@ fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
|
||||
}
|
||||
Printer::JSON(ref mut p) => {
|
||||
let mut sink = p.sink_with_path(&matcher, path);
|
||||
searcher.search_reader(&matcher, &mut rdr, &mut sink)?;
|
||||
searcher.search_reader(&matcher, rdr, &mut sink)?;
|
||||
Ok(SearchResult {
|
||||
has_match: sink.has_match(),
|
||||
stats: Some(sink.stats().clone()),
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "globset"
|
||||
version = "0.4.7" #:version
|
||||
version = "0.4.6" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Cross platform single glob and glob set matching. Glob set matching is the
|
||||
@@ -13,7 +13,6 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/globset"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "glob", "multiple", "set", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
edition = "2018"
|
||||
|
||||
[lib]
|
||||
name = "globset"
|
||||
|
@@ -22,6 +22,12 @@ Add this to your `Cargo.toml`:
|
||||
globset = "0.3"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate globset;
|
||||
```
|
||||
|
||||
### Features
|
||||
|
||||
* `serde1`: Enables implementing Serde traits on the `Glob` type.
|
||||
|
@@ -4,6 +4,9 @@ tool itself, see the benchsuite directory.
|
||||
*/
|
||||
#![feature(test)]
|
||||
|
||||
extern crate glob;
|
||||
extern crate globset;
|
||||
extern crate regex;
|
||||
extern crate test;
|
||||
|
||||
use globset::{Candidate, Glob, GlobMatcher, GlobSet, GlobSetBuilder};
|
||||
|
@@ -8,7 +8,7 @@ use std::str;
|
||||
use regex;
|
||||
use regex::bytes::Regex;
|
||||
|
||||
use crate::{new_regex, Candidate, Error, ErrorKind};
|
||||
use {new_regex, Candidate, Error, ErrorKind};
|
||||
|
||||
/// Describes a matching strategy for a particular pattern.
|
||||
///
|
||||
@@ -98,7 +98,7 @@ impl hash::Hash for Glob {
|
||||
}
|
||||
|
||||
impl fmt::Display for Glob {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
self.glob.fmt(f)
|
||||
}
|
||||
}
|
||||
@@ -127,7 +127,7 @@ impl GlobMatcher {
|
||||
}
|
||||
|
||||
/// Tests whether the given path matches this pattern or not.
|
||||
pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
|
||||
pub fn is_match_candidate(&self, path: &Candidate) -> bool {
|
||||
self.re.is_match(&path.path)
|
||||
}
|
||||
|
||||
@@ -157,7 +157,7 @@ impl GlobStrategic {
|
||||
}
|
||||
|
||||
/// Tests whether the given path matches this pattern or not.
|
||||
fn is_match_candidate(&self, candidate: &Candidate<'_>) -> bool {
|
||||
fn is_match_candidate(&self, candidate: &Candidate) -> bool {
|
||||
let byte_path = &*candidate.path;
|
||||
|
||||
match self.strategy {
|
||||
@@ -403,7 +403,7 @@ impl Glob {
|
||||
if self.opts.case_insensitive {
|
||||
return None;
|
||||
}
|
||||
let (end, need_sep) = match self.tokens.last() {
|
||||
let end = match self.tokens.last() {
|
||||
Some(&Token::ZeroOrMore) => {
|
||||
if self.opts.literal_separator {
|
||||
// If a trailing `*` can't match a `/`, then we can't
|
||||
@@ -414,10 +414,9 @@ impl Glob {
|
||||
// literal prefix.
|
||||
return None;
|
||||
}
|
||||
(self.tokens.len() - 1, false)
|
||||
self.tokens.len() - 1
|
||||
}
|
||||
Some(&Token::RecursiveSuffix) => (self.tokens.len() - 1, true),
|
||||
_ => (self.tokens.len(), false),
|
||||
_ => self.tokens.len(),
|
||||
};
|
||||
let mut lit = String::new();
|
||||
for t in &self.tokens[0..end] {
|
||||
@@ -426,9 +425,6 @@ impl Glob {
|
||||
_ => return None,
|
||||
}
|
||||
}
|
||||
if need_sep {
|
||||
lit.push('/');
|
||||
}
|
||||
if lit.is_empty() {
|
||||
None
|
||||
} else {
|
||||
@@ -616,8 +612,6 @@ impl<'a> GlobBuilder<'a> {
|
||||
}
|
||||
|
||||
/// Toggle whether a literal `/` is required to match a path separator.
|
||||
///
|
||||
/// By default this is false: `*` and `?` will match `/`.
|
||||
pub fn literal_separator(&mut self, yes: bool) -> &mut GlobBuilder<'a> {
|
||||
self.opts.literal_separator = yes;
|
||||
self
|
||||
@@ -689,7 +683,7 @@ impl Tokens {
|
||||
re.push_str("(?:/?|.*/)");
|
||||
}
|
||||
Token::RecursiveSuffix => {
|
||||
re.push_str("/.*");
|
||||
re.push_str("(?:/?|/.*)");
|
||||
}
|
||||
Token::RecursiveZeroOrMore => {
|
||||
re.push_str("(?:/|/.*/)");
|
||||
@@ -1015,7 +1009,7 @@ fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
|
||||
mod tests {
|
||||
use super::Token::*;
|
||||
use super::{Glob, GlobBuilder, Token};
|
||||
use crate::{ErrorKind, GlobSetBuilder};
|
||||
use {ErrorKind, GlobSetBuilder};
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
struct Options {
|
||||
@@ -1228,9 +1222,9 @@ mod tests {
|
||||
toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
|
||||
toregex!(re17, "**/**/**", r"^.*$");
|
||||
toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
|
||||
toregex!(re19, "a/**", r"^a/.*$");
|
||||
toregex!(re20, "a/**/**", r"^a/.*$");
|
||||
toregex!(re21, "a/**/**/**", r"^a/.*$");
|
||||
toregex!(re19, "a/**", r"^a(?:/?|/.*)$");
|
||||
toregex!(re20, "a/**/**", r"^a(?:/?|/.*)$");
|
||||
toregex!(re21, "a/**/**/**", r"^a(?:/?|/.*)$");
|
||||
toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
|
||||
toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
|
||||
toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
|
||||
@@ -1276,12 +1270,11 @@ mod tests {
|
||||
matches!(matchrec18, "/**/test", "/test");
|
||||
matches!(matchrec19, "**/.*", ".abc");
|
||||
matches!(matchrec20, "**/.*", "abc/.abc");
|
||||
matches!(matchrec21, "**/foo/bar", "foo/bar");
|
||||
matches!(matchrec21, ".*/**", ".abc");
|
||||
matches!(matchrec22, ".*/**", ".abc/abc");
|
||||
matches!(matchrec23, "test/**", "test/");
|
||||
matches!(matchrec24, "test/**", "test/one");
|
||||
matches!(matchrec25, "test/**", "test/one/two");
|
||||
matches!(matchrec26, "some/*/needle.txt", "some/one/needle.txt");
|
||||
matches!(matchrec23, "foo/**", "foo");
|
||||
matches!(matchrec24, "**/foo/bar", "foo/bar");
|
||||
matches!(matchrec25, "some/*/needle.txt", "some/one/needle.txt");
|
||||
|
||||
matches!(matchrange1, "a[0-9]b", "a0b");
|
||||
matches!(matchrange2, "a[0-9]b", "a9b");
|
||||
@@ -1407,8 +1400,6 @@ mod tests {
|
||||
"some/one/two/three/needle.txt",
|
||||
SLASHLIT
|
||||
);
|
||||
nmatches!(matchrec33, ".*/**", ".abc");
|
||||
nmatches!(matchrec34, "foo/**", "foo");
|
||||
|
||||
macro_rules! extract {
|
||||
($which:ident, $name:ident, $pat:expr, $expect:expr) => {
|
||||
@@ -1513,7 +1504,7 @@ mod tests {
|
||||
prefix!(extract_prefix1, "/foo", Some(s("/foo")));
|
||||
prefix!(extract_prefix2, "/foo/*", Some(s("/foo/")));
|
||||
prefix!(extract_prefix3, "**/foo", None);
|
||||
prefix!(extract_prefix4, "foo/**", Some(s("foo/")));
|
||||
prefix!(extract_prefix4, "foo/**", None);
|
||||
|
||||
suffix!(extract_suffix1, "**/foo/bar", Some((s("/foo/bar"), true)));
|
||||
suffix!(extract_suffix2, "*/foo/bar", Some((s("/foo/bar"), false)));
|
||||
|
@@ -103,6 +103,16 @@ or to enable case insensitive matching.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate aho_corasick;
|
||||
extern crate bstr;
|
||||
extern crate fnv;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate regex;
|
||||
|
||||
#[cfg(feature = "serde1")]
|
||||
extern crate serde;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::error::Error as StdError;
|
||||
@@ -115,9 +125,9 @@ use aho_corasick::AhoCorasick;
|
||||
use bstr::{ByteSlice, ByteVec, B};
|
||||
use regex::bytes::{Regex, RegexBuilder, RegexSet};
|
||||
|
||||
use crate::glob::MatchStrategy;
|
||||
pub use crate::glob::{Glob, GlobBuilder, GlobMatcher};
|
||||
use crate::pathutil::{file_name, file_name_ext, normalize_path};
|
||||
use glob::MatchStrategy;
|
||||
pub use glob::{Glob, GlobBuilder, GlobMatcher};
|
||||
use pathutil::{file_name, file_name_ext, normalize_path};
|
||||
|
||||
mod glob;
|
||||
mod pathutil;
|
||||
@@ -218,7 +228,7 @@ impl ErrorKind {
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.glob {
|
||||
None => self.kind.fmt(f),
|
||||
Some(ref glob) => {
|
||||
@@ -229,7 +239,7 @@ impl fmt::Display for Error {
|
||||
}
|
||||
|
||||
impl fmt::Display for ErrorKind {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
ErrorKind::InvalidRecursive
|
||||
| ErrorKind::UnclosedClass
|
||||
@@ -307,7 +317,7 @@ impl GlobSet {
|
||||
///
|
||||
/// This takes a Candidate as input, which can be used to amortize the
|
||||
/// cost of preparing a path for matching.
|
||||
pub fn is_match_candidate(&self, path: &Candidate<'_>) -> bool {
|
||||
pub fn is_match_candidate(&self, path: &Candidate) -> bool {
|
||||
if self.is_empty() {
|
||||
return false;
|
||||
}
|
||||
@@ -330,7 +340,7 @@ impl GlobSet {
|
||||
///
|
||||
/// This takes a Candidate as input, which can be used to amortize the
|
||||
/// cost of preparing a path for matching.
|
||||
pub fn matches_candidate(&self, path: &Candidate<'_>) -> Vec<usize> {
|
||||
pub fn matches_candidate(&self, path: &Candidate) -> Vec<usize> {
|
||||
let mut into = vec![];
|
||||
if self.is_empty() {
|
||||
return into;
|
||||
@@ -364,7 +374,7 @@ impl GlobSet {
|
||||
/// cost of preparing a path for matching.
|
||||
pub fn matches_candidate_into(
|
||||
&self,
|
||||
path: &Candidate<'_>,
|
||||
path: &Candidate,
|
||||
into: &mut Vec<usize>,
|
||||
) {
|
||||
into.clear();
|
||||
@@ -413,12 +423,12 @@ impl GlobSet {
|
||||
required_exts.add(i, ext, p.regex().to_owned());
|
||||
}
|
||||
MatchStrategy::Regex => {
|
||||
log::debug!("glob converted to regex: {:?}", p);
|
||||
debug!("glob converted to regex: {:?}", p);
|
||||
regexes.add(i, p.regex().to_owned());
|
||||
}
|
||||
}
|
||||
}
|
||||
log::debug!(
|
||||
debug!(
|
||||
"built glob set; {} literals, {} basenames, {} extensions, \
|
||||
{} prefixes, {} suffixes, {} required extensions, {} regexes",
|
||||
lits.0.len(),
|
||||
@@ -446,13 +456,6 @@ impl GlobSet {
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for GlobSet {
|
||||
/// Create a default empty GlobSet.
|
||||
fn default() -> Self {
|
||||
GlobSet::empty()
|
||||
}
|
||||
}
|
||||
|
||||
/// GlobSetBuilder builds a group of patterns that can be used to
|
||||
/// simultaneously match a file path.
|
||||
#[derive(Clone, Debug)]
|
||||
@@ -533,7 +536,7 @@ enum GlobSetMatchStrategy {
|
||||
}
|
||||
|
||||
impl GlobSetMatchStrategy {
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
use self::GlobSetMatchStrategy::*;
|
||||
match *self {
|
||||
Literal(ref s) => s.is_match(candidate),
|
||||
@@ -546,11 +549,7 @@ impl GlobSetMatchStrategy {
|
||||
}
|
||||
}
|
||||
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
use self::GlobSetMatchStrategy::*;
|
||||
match *self {
|
||||
Literal(ref s) => s.matches_into(candidate, matches),
|
||||
@@ -576,16 +575,12 @@ impl LiteralStrategy {
|
||||
self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
|
||||
}
|
||||
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
self.0.contains_key(candidate.path.as_bytes())
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
if let Some(hits) = self.0.get(candidate.path.as_bytes()) {
|
||||
matches.extend(hits);
|
||||
}
|
||||
@@ -604,7 +599,7 @@ impl BasenameLiteralStrategy {
|
||||
self.0.entry(lit.into_bytes()).or_insert(vec![]).push(global_index);
|
||||
}
|
||||
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
if candidate.basename.is_empty() {
|
||||
return false;
|
||||
}
|
||||
@@ -612,11 +607,7 @@ impl BasenameLiteralStrategy {
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
if candidate.basename.is_empty() {
|
||||
return;
|
||||
}
|
||||
@@ -638,7 +629,7 @@ impl ExtensionStrategy {
|
||||
self.0.entry(ext.into_bytes()).or_insert(vec![]).push(global_index);
|
||||
}
|
||||
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
if candidate.ext.is_empty() {
|
||||
return false;
|
||||
}
|
||||
@@ -646,11 +637,7 @@ impl ExtensionStrategy {
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
if candidate.ext.is_empty() {
|
||||
return;
|
||||
}
|
||||
@@ -668,7 +655,7 @@ struct PrefixStrategy {
|
||||
}
|
||||
|
||||
impl PrefixStrategy {
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
let path = candidate.path_prefix(self.longest);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.start() == 0 {
|
||||
@@ -678,11 +665,7 @@ impl PrefixStrategy {
|
||||
false
|
||||
}
|
||||
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
let path = candidate.path_prefix(self.longest);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.start() == 0 {
|
||||
@@ -700,7 +683,7 @@ struct SuffixStrategy {
|
||||
}
|
||||
|
||||
impl SuffixStrategy {
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
let path = candidate.path_suffix(self.longest);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.end() == path.len() {
|
||||
@@ -710,11 +693,7 @@ impl SuffixStrategy {
|
||||
false
|
||||
}
|
||||
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
let path = candidate.path_suffix(self.longest);
|
||||
for m in self.matcher.find_overlapping_iter(path) {
|
||||
if m.end() == path.len() {
|
||||
@@ -728,7 +707,7 @@ impl SuffixStrategy {
|
||||
struct RequiredExtensionStrategy(HashMap<Vec<u8>, Vec<(usize, Regex)>, Fnv>);
|
||||
|
||||
impl RequiredExtensionStrategy {
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
if candidate.ext.is_empty() {
|
||||
return false;
|
||||
}
|
||||
@@ -746,11 +725,7 @@ impl RequiredExtensionStrategy {
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
if candidate.ext.is_empty() {
|
||||
return;
|
||||
}
|
||||
@@ -771,15 +746,11 @@ struct RegexSetStrategy {
|
||||
}
|
||||
|
||||
impl RegexSetStrategy {
|
||||
fn is_match(&self, candidate: &Candidate<'_>) -> bool {
|
||||
fn is_match(&self, candidate: &Candidate) -> bool {
|
||||
self.matcher.is_match(candidate.path.as_bytes())
|
||||
}
|
||||
|
||||
fn matches_into(
|
||||
&self,
|
||||
candidate: &Candidate<'_>,
|
||||
matches: &mut Vec<usize>,
|
||||
) {
|
||||
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
|
||||
for i in self.matcher.matches(candidate.path.as_bytes()) {
|
||||
matches.push(self.map[i]);
|
||||
}
|
||||
@@ -862,8 +833,8 @@ impl RequiredExtensionStrategyBuilder {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{GlobSet, GlobSetBuilder};
|
||||
use crate::glob::Glob;
|
||||
use super::GlobSetBuilder;
|
||||
use glob::Glob;
|
||||
|
||||
#[test]
|
||||
fn set_works() {
|
||||
@@ -892,11 +863,4 @@ mod tests {
|
||||
assert!(!set.is_match(""));
|
||||
assert!(!set.is_match("a"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn default_set_is_empty_works() {
|
||||
let set: GlobSet = Default::default();
|
||||
assert!(!set.is_match(""));
|
||||
assert!(!set.is_match("a"));
|
||||
}
|
||||
}
|
||||
|
@@ -60,7 +60,7 @@ pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
|
||||
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
|
||||
/// that recognize other characters as separators.
|
||||
#[cfg(unix)]
|
||||
pub fn normalize_path(path: Cow<'_, [u8]>) -> Cow<'_, [u8]> {
|
||||
pub fn normalize_path(path: Cow<[u8]>) -> Cow<[u8]> {
|
||||
// UNIX only uses /, so we're good.
|
||||
path
|
||||
}
|
||||
|
@@ -11,7 +11,6 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/grep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
grep-cli = { version = "0.1.5", path = "../cli" }
|
||||
|
@@ -26,6 +26,12 @@ Add this to your `Cargo.toml`:
|
||||
grep = "0.2"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep;
|
||||
```
|
||||
|
||||
|
||||
### Features
|
||||
|
||||
|
@@ -1,3 +1,7 @@
|
||||
extern crate grep;
|
||||
extern crate termcolor;
|
||||
extern crate walkdir;
|
||||
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::ffi::OsString;
|
||||
|
@@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "ignore"
|
||||
version = "0.4.18" #:version
|
||||
version = "0.4.17" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
A fast library for efficiently matching ignore files such as `.gitignore`
|
||||
@@ -12,7 +12,6 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/ignore"
|
||||
readme = "README.md"
|
||||
keywords = ["glob", "ignore", "gitignore", "pattern", "file"]
|
||||
license = "Unlicense/MIT"
|
||||
edition = "2018"
|
||||
|
||||
[lib]
|
||||
name = "ignore"
|
||||
@@ -20,7 +19,7 @@ bench = false
|
||||
|
||||
[dependencies]
|
||||
crossbeam-utils = "0.8.0"
|
||||
globset = { version = "0.4.7", path = "../globset" }
|
||||
globset = { version = "0.4.5", path = "../globset" }
|
||||
lazy_static = "1.1"
|
||||
log = "0.4.5"
|
||||
memchr = "2.1"
|
||||
|
@@ -22,6 +22,12 @@ Add this to your `Cargo.toml`:
|
||||
ignore = "0.4"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate ignore;
|
||||
```
|
||||
|
||||
### Example
|
||||
|
||||
This example shows the most basic usage of this crate. This code will
|
||||
|
@@ -1,3 +1,7 @@
|
||||
extern crate crossbeam_channel as channel;
|
||||
extern crate ignore;
|
||||
extern crate walkdir;
|
||||
|
||||
use std::env;
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
@@ -10,7 +14,7 @@ fn main() {
|
||||
let mut path = env::args().nth(1).unwrap();
|
||||
let mut parallel = false;
|
||||
let mut simple = false;
|
||||
let (tx, rx) = crossbeam_channel::bounded::<DirEntry>(100);
|
||||
let (tx, rx) = channel::bounded::<DirEntry>(100);
|
||||
if path == "parallel" {
|
||||
path = env::args().nth(2).unwrap();
|
||||
parallel = true;
|
||||
|
@@ -4,7 +4,7 @@
|
||||
/// types to each invocation of ripgrep with the '--type-add' flag.
|
||||
///
|
||||
/// If you would like to add or improve this list, please file a PR:
|
||||
/// <https://github.com/BurntSushi/ripgrep>.
|
||||
/// https://github.com/BurntSushi/ripgrep
|
||||
///
|
||||
/// Please try to keep this list sorted lexicographically and wrapped to 79
|
||||
/// columns (inclusive).
|
||||
@@ -16,12 +16,12 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]),
|
||||
("asm", &["*.asm", "*.s", "*.S"]),
|
||||
("asp", &[
|
||||
"*.aspx", "*.aspx.cs", "*.aspx.vb", "*.ascx", "*.ascx.cs", "*.ascx.vb",
|
||||
"*.aspx", "*.aspx.cs", "*.aspx.cs", "*.ascx", "*.ascx.cs", "*.ascx.vb",
|
||||
]),
|
||||
("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]),
|
||||
("avro", &["*.avdl", "*.avpr", "*.avsc"]),
|
||||
("awk", &["*.awk"]),
|
||||
("bazel", &["*.bazel", "*.bzl", "*.BUILD", "*.bazelrc", "BUILD", "WORKSPACE"]),
|
||||
("bazel", &["*.bazel", "*.bzl", "BUILD", "WORKSPACE"]),
|
||||
("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
|
||||
("brotli", &["*.br"]),
|
||||
("buildstream", &["*.bst"]),
|
||||
@@ -140,7 +140,6 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
("md", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
|
||||
("meson", &["meson.build", "meson_options.txt"]),
|
||||
("minified", &["*.min.html", "*.min.css", "*.min.js"]),
|
||||
("mint", &["*.mint"]),
|
||||
("mk", &["mkfile"]),
|
||||
("ml", &["*.ml"]),
|
||||
("msbuild", &[
|
||||
@@ -156,7 +155,6 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
("pdf", &["*.pdf"]),
|
||||
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]),
|
||||
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
|
||||
("po", &["*.po"]),
|
||||
("pod", &["*.pod"]),
|
||||
("postscript", &["*.eps", "*.ps"]),
|
||||
("protobuf", &["*.proto"]),
|
||||
@@ -170,15 +168,9 @@ pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
|
||||
("racket", &["*.rkt"]),
|
||||
("rdoc", &["*.rdoc"]),
|
||||
("readme", &["README*", "*README"]),
|
||||
("red", &["*.r", "*.red", "*.reds"]),
|
||||
("robot", &["*.robot"]),
|
||||
("rst", &["*.rst"]),
|
||||
("ruby", &[
|
||||
// Idiomatic files
|
||||
"config.ru", "Gemfile", ".irbrc", "Rakefile",
|
||||
// Extensions
|
||||
"*.gemspec", "*.rb", "*.rbw"
|
||||
]),
|
||||
("ruby", &["Gemfile", "*.gemspec", ".irbrc", "Rakefile", "*.rb"]),
|
||||
("rust", &["*.rs"]),
|
||||
("sass", &["*.sass", "*.scss"]),
|
||||
("scala", &["*.scala", "*.sbt"]),
|
||||
|
@@ -20,12 +20,12 @@ use std::io::{self, BufRead};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
use crate::gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use crate::overrides::{self, Override};
|
||||
use crate::pathutil::{is_hidden, strip_prefix};
|
||||
use crate::types::{self, Types};
|
||||
use crate::walk::DirEntry;
|
||||
use crate::{Error, Match, PartialErrorBuilder};
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use overrides::{self, Override};
|
||||
use pathutil::{is_hidden, strip_prefix};
|
||||
use types::{self, Types};
|
||||
use walk::DirEntry;
|
||||
use {Error, Match, PartialErrorBuilder};
|
||||
|
||||
/// IgnoreMatch represents information about where a match came from when using
|
||||
/// the `Ignore` matcher.
|
||||
@@ -495,7 +495,7 @@ impl Ignore {
|
||||
}
|
||||
|
||||
/// Returns an iterator over parent ignore matchers, including this one.
|
||||
pub fn parents(&self) -> Parents<'_> {
|
||||
pub fn parents(&self) -> Parents {
|
||||
Parents(Some(self))
|
||||
}
|
||||
|
||||
@@ -581,7 +581,7 @@ impl IgnoreBuilder {
|
||||
.unwrap();
|
||||
let (gi, err) = builder.build_global();
|
||||
if let Some(err) = err {
|
||||
log::debug!("{}", err);
|
||||
debug!("{}", err);
|
||||
}
|
||||
gi
|
||||
};
|
||||
@@ -840,10 +840,10 @@ mod tests {
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::dir::IgnoreBuilder;
|
||||
use crate::gitignore::Gitignore;
|
||||
use crate::tests::TempDir;
|
||||
use crate::Error;
|
||||
use dir::IgnoreBuilder;
|
||||
use gitignore::Gitignore;
|
||||
use tests::TempDir;
|
||||
use Error;
|
||||
|
||||
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
|
||||
let mut file = File::create(path).unwrap();
|
||||
|
@@ -19,8 +19,8 @@ use globset::{Candidate, GlobBuilder, GlobSet, GlobSetBuilder};
|
||||
use regex::bytes::Regex;
|
||||
use thread_local::ThreadLocal;
|
||||
|
||||
use crate::pathutil::{is_file_name, strip_prefix};
|
||||
use crate::{Error, Match, PartialErrorBuilder};
|
||||
use pathutil::{is_file_name, strip_prefix};
|
||||
use {Error, Match, PartialErrorBuilder};
|
||||
|
||||
/// Glob represents a single glob in a gitignore file.
|
||||
///
|
||||
@@ -592,7 +592,7 @@ fn parse_excludes_file(data: &[u8]) -> Option<PathBuf> {
|
||||
// N.B. This is the lazy approach, and isn't technically correct, but
|
||||
// probably works in more circumstances. I guess we would ideally have
|
||||
// a full INI parser. Yuck.
|
||||
lazy_static::lazy_static! {
|
||||
lazy_static! {
|
||||
static ref RE: Regex =
|
||||
Regex::new(r"(?im)^\s*excludesfile\s*=\s*(.+)\s*$").unwrap();
|
||||
};
|
||||
|
@@ -46,12 +46,25 @@ See the documentation for `WalkBuilder` for many other options.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate globset;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate regex;
|
||||
extern crate same_file;
|
||||
extern crate thread_local;
|
||||
extern crate walkdir;
|
||||
#[cfg(windows)]
|
||||
extern crate winapi_util;
|
||||
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
pub use crate::walk::{
|
||||
pub use walk::{
|
||||
DirEntry, ParallelVisitor, ParallelVisitorBuilder, Walk, WalkBuilder,
|
||||
WalkParallel, WalkState,
|
||||
};
|
||||
@@ -321,7 +334,7 @@ impl error::Error for Error {
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Error::Partial(ref errs) => {
|
||||
let msgs: Vec<String> =
|
||||
|
@@ -6,8 +6,8 @@ line tools.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use crate::gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use crate::{Error, Match};
|
||||
use gitignore::{self, Gitignore, GitignoreBuilder};
|
||||
use {Error, Match};
|
||||
|
||||
/// Glob represents a single glob in an override matcher.
|
||||
///
|
||||
|
@@ -1,7 +1,7 @@
|
||||
use std::ffi::OsStr;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::walk::DirEntry;
|
||||
use walk::DirEntry;
|
||||
|
||||
/// Returns true if and only if this entry is considered to be hidden.
|
||||
///
|
||||
|
@@ -93,9 +93,9 @@ use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
|
||||
use regex::Regex;
|
||||
use thread_local::ThreadLocal;
|
||||
|
||||
use crate::default_types::DEFAULT_TYPES;
|
||||
use crate::pathutil::file_name;
|
||||
use crate::{Error, Match};
|
||||
use default_types::DEFAULT_TYPES;
|
||||
use pathutil::file_name;
|
||||
use {Error, Match};
|
||||
|
||||
/// Glob represents a single glob in a set of file type definitions.
|
||||
///
|
||||
@@ -427,7 +427,7 @@ impl TypesBuilder {
|
||||
/// If `name` is `all` or otherwise contains any character that is not a
|
||||
/// Unicode letter or number, then an error is returned.
|
||||
pub fn add(&mut self, name: &str, glob: &str) -> Result<(), Error> {
|
||||
lazy_static::lazy_static! {
|
||||
lazy_static! {
|
||||
static ref RE: Regex = Regex::new(r"^[\pL\pN]+$").unwrap();
|
||||
};
|
||||
if name == "all" || !RE.is_match(name) {
|
||||
|
@@ -13,11 +13,11 @@ use std::vec;
|
||||
use same_file::Handle;
|
||||
use walkdir::{self, WalkDir};
|
||||
|
||||
use crate::dir::{Ignore, IgnoreBuilder};
|
||||
use crate::gitignore::GitignoreBuilder;
|
||||
use crate::overrides::Override;
|
||||
use crate::types::Types;
|
||||
use crate::{Error, PartialErrorBuilder};
|
||||
use dir::{Ignore, IgnoreBuilder};
|
||||
use gitignore::GitignoreBuilder;
|
||||
use overrides::Override;
|
||||
use types::Types;
|
||||
use {Error, PartialErrorBuilder};
|
||||
|
||||
/// A directory entry with a possible error attached.
|
||||
///
|
||||
@@ -252,7 +252,7 @@ struct DirEntryRaw {
|
||||
}
|
||||
|
||||
impl fmt::Debug for DirEntryRaw {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
// Leaving out FileType because it doesn't have a debug impl
|
||||
// in Rust 1.9. We could add it if we really wanted to by manually
|
||||
// querying each possibly file type. Meh. ---AG
|
||||
@@ -504,7 +504,7 @@ enum Sorter {
|
||||
struct Filter(Arc<dyn Fn(&DirEntry) -> bool + Send + Sync + 'static>);
|
||||
|
||||
impl fmt::Debug for WalkBuilder {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
f.debug_struct("WalkBuilder")
|
||||
.field("paths", &self.paths)
|
||||
.field("ig_builder", &self.ig_builder)
|
||||
@@ -934,23 +934,15 @@ impl Walk {
|
||||
if ent.depth() == 0 {
|
||||
return Ok(false);
|
||||
}
|
||||
// We ensure that trivial skipping is done before any other potentially
|
||||
// expensive operations (stat, filesystem other) are done. This seems
|
||||
// like an obvious optimization but becomes critical when filesystem
|
||||
// operations even as simple as stat can result in significant
|
||||
// overheads; an example of this was a bespoke filesystem layer in
|
||||
// Windows that hosted files remotely and would download them on-demand
|
||||
// when particular filesystem operations occurred. Users of this system
|
||||
// who ensured correct file-type fileters were being used could still
|
||||
// get unnecessary file access resulting in large downloads.
|
||||
if should_skip_entry(&self.ig, ent) {
|
||||
return Ok(true);
|
||||
}
|
||||
|
||||
if let Some(ref stdout) = self.skip {
|
||||
if path_equals(ent, stdout)? {
|
||||
return Ok(true);
|
||||
}
|
||||
}
|
||||
if should_skip_entry(&self.ig, ent) {
|
||||
return Ok(true);
|
||||
}
|
||||
if self.max_filesize.is_some() && !ent.is_dir() {
|
||||
return Ok(skip_filesize(
|
||||
self.max_filesize.unwrap(),
|
||||
@@ -1226,7 +1218,7 @@ impl WalkParallel {
|
||||
/// visitor runs on only one thread, this build-up can be done without
|
||||
/// synchronization. Then, once traversal is complete, all of the results
|
||||
/// can be merged together into a single data structure.
|
||||
pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder<'_>) {
|
||||
pub fn visit(mut self, builder: &mut dyn ParallelVisitorBuilder) {
|
||||
let threads = self.threads();
|
||||
let stack = Arc::new(Mutex::new(vec![]));
|
||||
{
|
||||
@@ -1557,11 +1549,6 @@ impl<'s> Worker<'s> {
|
||||
}
|
||||
}
|
||||
}
|
||||
// N.B. See analogous call in the single-threaded implementation about
|
||||
// why it's important for this to come before the checks below.
|
||||
if should_skip_entry(ig, &dent) {
|
||||
return WalkState::Continue;
|
||||
}
|
||||
if let Some(ref stdout) = self.skip {
|
||||
let is_stdout = match path_equals(&dent, stdout) {
|
||||
Ok(is_stdout) => is_stdout,
|
||||
@@ -1571,6 +1558,7 @@ impl<'s> Worker<'s> {
|
||||
return WalkState::Continue;
|
||||
}
|
||||
}
|
||||
let should_skip_path = should_skip_entry(ig, &dent);
|
||||
let should_skip_filesize =
|
||||
if self.max_filesize.is_some() && !dent.is_dir() {
|
||||
skip_filesize(
|
||||
@@ -1587,7 +1575,8 @@ impl<'s> Worker<'s> {
|
||||
} else {
|
||||
false
|
||||
};
|
||||
if !should_skip_filesize && !should_skip_filtered {
|
||||
if !should_skip_path && !should_skip_filesize && !should_skip_filtered
|
||||
{
|
||||
self.send(Work { dent, ignore: ig.clone(), root_device });
|
||||
}
|
||||
WalkState::Continue
|
||||
@@ -1725,7 +1714,7 @@ fn skip_filesize(
|
||||
|
||||
if let Some(fs) = filesize {
|
||||
if fs > max_filesize {
|
||||
log::debug!("ignoring {}: {} bytes", path.display(), fs);
|
||||
debug!("ignoring {}: {} bytes", path.display(), fs);
|
||||
true
|
||||
} else {
|
||||
false
|
||||
@@ -1738,10 +1727,10 @@ fn skip_filesize(
|
||||
fn should_skip_entry(ig: &Ignore, dent: &DirEntry) -> bool {
|
||||
let m = ig.matched_dir_entry(dent);
|
||||
if m.is_ignore() {
|
||||
log::debug!("ignoring {}: {:?}", dent.path().display(), m);
|
||||
debug!("ignoring {}: {:?}", dent.path().display(), m);
|
||||
true
|
||||
} else if m.is_whitelist() {
|
||||
log::debug!("whitelisting {}: {:?}", dent.path().display(), m);
|
||||
debug!("whitelisting {}: {:?}", dent.path().display(), m);
|
||||
false
|
||||
} else {
|
||||
false
|
||||
@@ -1852,7 +1841,7 @@ mod tests {
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use super::{DirEntry, WalkBuilder, WalkState};
|
||||
use crate::tests::TempDir;
|
||||
use tests::TempDir;
|
||||
|
||||
fn wfile<P: AsRef<Path>>(path: P, contents: &str) {
|
||||
let mut file = File::create(path).unwrap();
|
||||
|
@@ -1,3 +1,5 @@
|
||||
extern crate ignore;
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
use ignore::gitignore::{Gitignore, GitignoreBuilder};
|
||||
|
@@ -12,7 +12,6 @@ readme = "README.md"
|
||||
keywords = ["regex", "pattern", "trait"]
|
||||
license = "Unlicense/MIT"
|
||||
autotests = false
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
memchr = "2.1"
|
||||
|
@@ -27,3 +27,9 @@ Add this to your `Cargo.toml`:
|
||||
[dependencies]
|
||||
grep-matcher = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_matcher;
|
||||
```
|
||||
|
@@ -92,7 +92,7 @@ impl From<usize> for Ref<'static> {
|
||||
/// starting at the beginning of `replacement`.
|
||||
///
|
||||
/// If no such valid reference could be found, None is returned.
|
||||
fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
|
||||
fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
|
||||
let mut i = 0;
|
||||
if replacement.len() <= 1 || replacement[0] != b'$' {
|
||||
return None;
|
||||
|
@@ -38,12 +38,14 @@ implementations.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
extern crate memchr;
|
||||
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::ops;
|
||||
use std::u64;
|
||||
|
||||
use crate::interpolate::interpolate;
|
||||
use interpolate::interpolate;
|
||||
|
||||
mod interpolate;
|
||||
|
||||
@@ -302,7 +304,7 @@ pub struct ByteSet(BitSet);
|
||||
struct BitSet([u64; 4]);
|
||||
|
||||
impl fmt::Debug for BitSet {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let mut fmtd = f.debug_set();
|
||||
for b in (0..256).map(|b| b as u8) {
|
||||
if ByteSet(*self).contains(b) {
|
||||
@@ -492,7 +494,7 @@ impl ::std::error::Error for NoError {
|
||||
}
|
||||
|
||||
impl fmt::Display for NoError {
|
||||
fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, _: &mut fmt::Formatter) -> fmt::Result {
|
||||
panic!("BUG for NoError: an impossible error occurred")
|
||||
}
|
||||
}
|
||||
@@ -616,31 +618,12 @@ pub trait Matcher {
|
||||
fn find_iter<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
matched: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(Match) -> bool,
|
||||
{
|
||||
self.find_iter_at(haystack, 0, matched)
|
||||
}
|
||||
|
||||
/// Executes the given function over successive non-overlapping matches
|
||||
/// in `haystack`. If no match exists, then the given function is never
|
||||
/// called. If the function returns `false`, then iteration stops.
|
||||
///
|
||||
/// The significance of the starting point is that it takes the surrounding
|
||||
/// context into consideration. For example, the `\A` anchor can only
|
||||
/// match when `at == 0`.
|
||||
fn find_iter_at<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
mut matched: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(Match) -> bool,
|
||||
{
|
||||
self.try_find_iter_at(haystack, at, |m| Ok(matched(m)))
|
||||
self.try_find_iter(haystack, |m| Ok(matched(m)))
|
||||
.map(|r: Result<(), ()>| r.unwrap())
|
||||
}
|
||||
|
||||
@@ -654,35 +637,12 @@ pub trait Matcher {
|
||||
fn try_find_iter<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
matched: F,
|
||||
) -> Result<Result<(), E>, Self::Error>
|
||||
where
|
||||
F: FnMut(Match) -> Result<bool, E>,
|
||||
{
|
||||
self.try_find_iter_at(haystack, 0, matched)
|
||||
}
|
||||
|
||||
/// Executes the given function over successive non-overlapping matches
|
||||
/// in `haystack`. If no match exists, then the given function is never
|
||||
/// called. If the function returns `false`, then iteration stops.
|
||||
/// Similarly, if the function returns an error then iteration stops and
|
||||
/// the error is yielded. If an error occurs while executing the search,
|
||||
/// then it is converted to
|
||||
/// `E`.
|
||||
///
|
||||
/// The significance of the starting point is that it takes the surrounding
|
||||
/// context into consideration. For example, the `\A` anchor can only
|
||||
/// match when `at == 0`.
|
||||
fn try_find_iter_at<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
mut matched: F,
|
||||
) -> Result<Result<(), E>, Self::Error>
|
||||
where
|
||||
F: FnMut(Match) -> Result<bool, E>,
|
||||
{
|
||||
let mut last_end = at;
|
||||
let mut last_end = 0;
|
||||
let mut last_match = None;
|
||||
|
||||
loop {
|
||||
@@ -736,33 +696,12 @@ pub trait Matcher {
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
caps: &mut Self::Captures,
|
||||
matched: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures) -> bool,
|
||||
{
|
||||
self.captures_iter_at(haystack, 0, caps, matched)
|
||||
}
|
||||
|
||||
/// Executes the given function over successive non-overlapping matches
|
||||
/// in `haystack` with capture groups extracted from each match. If no
|
||||
/// match exists, then the given function is never called. If the function
|
||||
/// returns `false`, then iteration stops.
|
||||
///
|
||||
/// The significance of the starting point is that it takes the surrounding
|
||||
/// context into consideration. For example, the `\A` anchor can only
|
||||
/// match when `at == 0`.
|
||||
fn captures_iter_at<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut Self::Captures,
|
||||
mut matched: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures) -> bool,
|
||||
{
|
||||
self.try_captures_iter_at(haystack, at, caps, |caps| Ok(matched(caps)))
|
||||
self.try_captures_iter(haystack, caps, |caps| Ok(matched(caps)))
|
||||
.map(|r: Result<(), ()>| r.unwrap())
|
||||
}
|
||||
|
||||
@@ -777,36 +716,12 @@ pub trait Matcher {
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
caps: &mut Self::Captures,
|
||||
matched: F,
|
||||
) -> Result<Result<(), E>, Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures) -> Result<bool, E>,
|
||||
{
|
||||
self.try_captures_iter_at(haystack, 0, caps, matched)
|
||||
}
|
||||
|
||||
/// Executes the given function over successive non-overlapping matches
|
||||
/// in `haystack` with capture groups extracted from each match. If no
|
||||
/// match exists, then the given function is never called. If the function
|
||||
/// returns `false`, then iteration stops. Similarly, if the function
|
||||
/// returns an error then iteration stops and the error is yielded. If
|
||||
/// an error occurs while executing the search, then it is converted to
|
||||
/// `E`.
|
||||
///
|
||||
/// The significance of the starting point is that it takes the surrounding
|
||||
/// context into consideration. For example, the `\A` anchor can only
|
||||
/// match when `at == 0`.
|
||||
fn try_captures_iter_at<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut Self::Captures,
|
||||
mut matched: F,
|
||||
) -> Result<Result<(), E>, Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures) -> Result<bool, E>,
|
||||
{
|
||||
let mut last_end = at;
|
||||
let mut last_end = 0;
|
||||
let mut last_match = None;
|
||||
|
||||
loop {
|
||||
@@ -904,35 +819,13 @@ pub trait Matcher {
|
||||
haystack: &[u8],
|
||||
caps: &mut Self::Captures,
|
||||
dst: &mut Vec<u8>,
|
||||
append: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
|
||||
{
|
||||
self.replace_with_captures_at(haystack, 0, caps, dst, append)
|
||||
}
|
||||
|
||||
/// Replaces every match in the given haystack with the result of calling
|
||||
/// `append` with the matching capture groups.
|
||||
///
|
||||
/// If the given `append` function returns `false`, then replacement stops.
|
||||
///
|
||||
/// The significance of the starting point is that it takes the surrounding
|
||||
/// context into consideration. For example, the `\A` anchor can only
|
||||
/// match when `at == 0`.
|
||||
fn replace_with_captures_at<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut Self::Captures,
|
||||
dst: &mut Vec<u8>,
|
||||
mut append: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
|
||||
{
|
||||
let mut last_match = at;
|
||||
self.captures_iter_at(haystack, at, caps, |caps| {
|
||||
let mut last_match = 0;
|
||||
self.captures_iter(haystack, caps, |caps| {
|
||||
let m = caps.get(0).unwrap();
|
||||
dst.extend(&haystack[last_match..m.start]);
|
||||
last_match = m.end;
|
||||
@@ -1146,18 +1039,6 @@ impl<'a, M: Matcher> Matcher for &'a M {
|
||||
(*self).find_iter(haystack, matched)
|
||||
}
|
||||
|
||||
fn find_iter_at<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
matched: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(Match) -> bool,
|
||||
{
|
||||
(*self).find_iter_at(haystack, at, matched)
|
||||
}
|
||||
|
||||
fn try_find_iter<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
@@ -1169,18 +1050,6 @@ impl<'a, M: Matcher> Matcher for &'a M {
|
||||
(*self).try_find_iter(haystack, matched)
|
||||
}
|
||||
|
||||
fn try_find_iter_at<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
matched: F,
|
||||
) -> Result<Result<(), E>, Self::Error>
|
||||
where
|
||||
F: FnMut(Match) -> Result<bool, E>,
|
||||
{
|
||||
(*self).try_find_iter_at(haystack, at, matched)
|
||||
}
|
||||
|
||||
fn captures(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
@@ -1201,19 +1070,6 @@ impl<'a, M: Matcher> Matcher for &'a M {
|
||||
(*self).captures_iter(haystack, caps, matched)
|
||||
}
|
||||
|
||||
fn captures_iter_at<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut Self::Captures,
|
||||
matched: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures) -> bool,
|
||||
{
|
||||
(*self).captures_iter_at(haystack, at, caps, matched)
|
||||
}
|
||||
|
||||
fn try_captures_iter<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
@@ -1226,19 +1082,6 @@ impl<'a, M: Matcher> Matcher for &'a M {
|
||||
(*self).try_captures_iter(haystack, caps, matched)
|
||||
}
|
||||
|
||||
fn try_captures_iter_at<F, E>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut Self::Captures,
|
||||
matched: F,
|
||||
) -> Result<Result<(), E>, Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures) -> Result<bool, E>,
|
||||
{
|
||||
(*self).try_captures_iter_at(haystack, at, caps, matched)
|
||||
}
|
||||
|
||||
fn replace<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
@@ -1264,20 +1107,6 @@ impl<'a, M: Matcher> Matcher for &'a M {
|
||||
(*self).replace_with_captures(haystack, caps, dst, append)
|
||||
}
|
||||
|
||||
fn replace_with_captures_at<F>(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut Self::Captures,
|
||||
dst: &mut Vec<u8>,
|
||||
append: F,
|
||||
) -> Result<(), Self::Error>
|
||||
where
|
||||
F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
|
||||
{
|
||||
(*self).replace_with_captures_at(haystack, at, caps, dst, append)
|
||||
}
|
||||
|
||||
fn is_match(&self, haystack: &[u8]) -> Result<bool, Self::Error> {
|
||||
(*self).is_match(haystack)
|
||||
}
|
||||
|
@@ -1,7 +1,7 @@
|
||||
use grep_matcher::{Captures, Match, Matcher};
|
||||
use regex::bytes::Regex;
|
||||
|
||||
use crate::util::{RegexMatcher, RegexMatcherNoCaps};
|
||||
use util::{RegexMatcher, RegexMatcherNoCaps};
|
||||
|
||||
fn matcher(pattern: &str) -> RegexMatcher {
|
||||
RegexMatcher::new(Regex::new(pattern).unwrap())
|
||||
|
@@ -1,3 +1,6 @@
|
||||
extern crate grep_matcher;
|
||||
extern crate regex;
|
||||
|
||||
mod util;
|
||||
|
||||
mod test_matcher;
|
||||
|
@@ -11,7 +11,6 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/pcre2"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "pcre", "backreference", "look"]
|
||||
license = "Unlicense/MIT"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
grep-matcher = { version = "0.1.2", path = "../matcher" }
|
||||
|
@@ -30,3 +30,9 @@ Add this to your `Cargo.toml`:
|
||||
[dependencies]
|
||||
grep-pcre2 = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_pcre2;
|
||||
```
|
||||
|
@@ -50,7 +50,7 @@ impl error::Error for Error {
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.kind {
|
||||
ErrorKind::Regex(ref s) => write!(f, "{}", s),
|
||||
ErrorKind::__Nonexhaustive => unreachable!(),
|
||||
|
@@ -5,8 +5,11 @@ An implementation of `grep-matcher`'s `Matcher` trait for
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
pub use crate::error::{Error, ErrorKind};
|
||||
pub use crate::matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
||||
extern crate grep_matcher;
|
||||
extern crate pcre2;
|
||||
|
||||
pub use error::{Error, ErrorKind};
|
||||
pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
||||
pub use pcre2::{is_jit_available, version};
|
||||
|
||||
mod error;
|
||||
|
@@ -3,7 +3,7 @@ use std::collections::HashMap;
|
||||
use grep_matcher::{Captures, Match, Matcher};
|
||||
use pcre2::bytes::{CaptureLocations, Regex, RegexBuilder};
|
||||
|
||||
use crate::error::Error;
|
||||
use error::Error;
|
||||
|
||||
/// A builder for configuring the compilation of a PCRE2 regex.
|
||||
#[derive(Clone, Debug)]
|
||||
|
@@ -12,11 +12,10 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/printer"
|
||||
readme = "README.md"
|
||||
keywords = ["grep", "pattern", "print", "printer", "sink"]
|
||||
license = "Unlicense/MIT"
|
||||
edition = "2018"
|
||||
|
||||
[features]
|
||||
default = ["serde1"]
|
||||
serde1 = ["base64", "serde", "serde_json"]
|
||||
serde1 = ["base64", "serde", "serde_derive", "serde_json"]
|
||||
|
||||
[dependencies]
|
||||
base64 = { version = "0.13.0", optional = true }
|
||||
@@ -24,7 +23,8 @@ bstr = "0.2.0"
|
||||
grep-matcher = { version = "0.1.2", path = "../matcher" }
|
||||
grep-searcher = { version = "0.1.4", path = "../searcher" }
|
||||
termcolor = "1.0.4"
|
||||
serde = { version = "1.0.77", optional = true, features = ["derive"] }
|
||||
serde = { version = "1.0.77", optional = true }
|
||||
serde_derive = { version = "1.0.77", optional = true }
|
||||
serde_json = { version = "1.0.27", optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
|
@@ -26,3 +26,9 @@ Add this to your `Cargo.toml`:
|
||||
[dependencies]
|
||||
grep-printer = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_printer;
|
||||
```
|
||||
|
@@ -60,7 +60,7 @@ impl ColorError {
|
||||
}
|
||||
|
||||
impl fmt::Display for ColorError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
ColorError::UnrecognizedOutType(ref name) => write!(
|
||||
f,
|
||||
@@ -147,6 +147,9 @@ pub struct ColorSpecs {
|
||||
/// A `UserColorSpec` can also be converted to a `termcolor::ColorSpec`:
|
||||
///
|
||||
/// ```rust
|
||||
/// extern crate grep_printer;
|
||||
/// extern crate termcolor;
|
||||
///
|
||||
/// # fn main() {
|
||||
/// use termcolor::{Color, ColorSpec};
|
||||
/// use grep_printer::UserColorSpec;
|
||||
|
@@ -4,14 +4,14 @@ use std::time::Instant;
|
||||
|
||||
use grep_matcher::{Match, Matcher};
|
||||
use grep_searcher::{
|
||||
Searcher, Sink, SinkContext, SinkContextKind, SinkFinish, SinkMatch,
|
||||
Searcher, Sink, SinkContext, SinkContextKind, SinkError, SinkFinish,
|
||||
SinkMatch,
|
||||
};
|
||||
use serde_json as json;
|
||||
|
||||
use crate::counter::CounterWriter;
|
||||
use crate::jsont;
|
||||
use crate::stats::Stats;
|
||||
use crate::util::find_iter_at_in_context;
|
||||
use counter::CounterWriter;
|
||||
use jsont;
|
||||
use stats::Stats;
|
||||
|
||||
/// The configuration for the JSON printer.
|
||||
///
|
||||
@@ -507,10 +507,7 @@ impl<W: io::Write> JSON<W> {
|
||||
|
||||
/// Write the given message followed by a new line. The new line is
|
||||
/// determined from the configuration of the given searcher.
|
||||
fn write_message(
|
||||
&mut self,
|
||||
message: &jsont::Message<'_>,
|
||||
) -> io::Result<()> {
|
||||
fn write_message(&mut self, message: &jsont::Message) -> io::Result<()> {
|
||||
if self.config.pretty {
|
||||
json::to_writer_pretty(&mut self.wtr, message)?;
|
||||
} else {
|
||||
@@ -555,7 +552,7 @@ impl<W> JSON<W> {
|
||||
/// * `W` refers to the underlying writer that this printer is writing its
|
||||
/// output to.
|
||||
#[derive(Debug)]
|
||||
pub struct JSONSink<'p, 's, M: Matcher, W> {
|
||||
pub struct JSONSink<'p, 's, M: Matcher, W: 's> {
|
||||
matcher: M,
|
||||
json: &'s mut JSON<W>,
|
||||
path: Option<&'p Path>,
|
||||
@@ -606,12 +603,7 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
|
||||
|
||||
/// Execute the matcher over the given bytes and record the match
|
||||
/// locations if the current configuration demands match granularity.
|
||||
fn record_matches(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
bytes: &[u8],
|
||||
range: std::ops::Range<usize>,
|
||||
) -> io::Result<()> {
|
||||
fn record_matches(&mut self, bytes: &[u8]) -> io::Result<()> {
|
||||
self.json.matches.clear();
|
||||
// If printing requires knowing the location of each individual match,
|
||||
// then compute and stored those right now for use later. While this
|
||||
@@ -620,17 +612,12 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
|
||||
// the extent that it's easy to ensure that we never do more than
|
||||
// one search to find the matches.
|
||||
let matches = &mut self.json.matches;
|
||||
find_iter_at_in_context(
|
||||
searcher,
|
||||
&self.matcher,
|
||||
bytes,
|
||||
range.clone(),
|
||||
|m| {
|
||||
let (s, e) = (m.start() - range.start, m.end() - range.start);
|
||||
matches.push(Match::new(s, e));
|
||||
self.matcher
|
||||
.find_iter(bytes, |m| {
|
||||
matches.push(m);
|
||||
true
|
||||
},
|
||||
)?;
|
||||
})
|
||||
.map_err(io::Error::error_message)?;
|
||||
// Don't report empty matches appearing at the end of the bytes.
|
||||
if !matches.is_empty()
|
||||
&& matches.last().unwrap().is_empty()
|
||||
@@ -657,16 +644,6 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
|
||||
self.after_context_remaining == 0
|
||||
}
|
||||
|
||||
/// Returns whether the current match count exceeds the configured limit.
|
||||
/// If there is no limit, then this always returns false.
|
||||
fn match_more_than_limit(&self) -> bool {
|
||||
let limit = match self.json.config.max_matches {
|
||||
None => return false,
|
||||
Some(limit) => limit,
|
||||
};
|
||||
self.match_count > limit
|
||||
}
|
||||
|
||||
/// Write the "begin" message.
|
||||
fn write_begin_message(&mut self) -> io::Result<()> {
|
||||
if self.begin_printed {
|
||||
@@ -685,30 +662,13 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
|
||||
fn matched(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
mat: &SinkMatch<'_>,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, io::Error> {
|
||||
self.write_begin_message()?;
|
||||
|
||||
self.match_count += 1;
|
||||
// When we've exceeded our match count, then the remaining context
|
||||
// lines should not be reset, but instead, decremented. This avoids a
|
||||
// bug where we display more matches than a configured limit. The main
|
||||
// idea here is that 'matched' might be called again while printing
|
||||
// an after-context line. In that case, we should treat this as a
|
||||
// contextual line rather than a matching line for the purposes of
|
||||
// termination.
|
||||
if self.match_more_than_limit() {
|
||||
self.after_context_remaining =
|
||||
self.after_context_remaining.saturating_sub(1);
|
||||
} else {
|
||||
self.after_context_remaining = searcher.after_context() as u64;
|
||||
}
|
||||
|
||||
self.record_matches(
|
||||
searcher,
|
||||
mat.buffer(),
|
||||
mat.bytes_range_in_buffer(),
|
||||
)?;
|
||||
self.after_context_remaining = searcher.after_context() as u64;
|
||||
self.record_matches(mat.bytes())?;
|
||||
self.stats.add_matches(self.json.matches.len() as u64);
|
||||
self.stats.add_matched_lines(mat.lines().count() as u64);
|
||||
|
||||
@@ -727,7 +687,7 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
|
||||
fn context(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
ctx: &SinkContext<'_>,
|
||||
ctx: &SinkContext,
|
||||
) -> Result<bool, io::Error> {
|
||||
self.write_begin_message()?;
|
||||
self.json.matches.clear();
|
||||
@@ -737,7 +697,7 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
|
||||
self.after_context_remaining.saturating_sub(1);
|
||||
}
|
||||
let submatches = if searcher.invert_match() {
|
||||
self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
|
||||
self.record_matches(ctx.bytes())?;
|
||||
SubMatches::new(ctx.bytes(), &self.json.matches)
|
||||
} else {
|
||||
SubMatches::empty()
|
||||
@@ -839,7 +799,7 @@ impl<'a> SubMatches<'a> {
|
||||
}
|
||||
|
||||
/// Return this set of match ranges as a slice.
|
||||
fn as_slice(&self) -> &[jsont::SubMatch<'_>] {
|
||||
fn as_slice(&self) -> &[jsont::SubMatch] {
|
||||
match *self {
|
||||
SubMatches::Empty => &[],
|
||||
SubMatches::Small(ref x) => x,
|
||||
@@ -911,38 +871,6 @@ and exhibited clearly, with a label attached.\
|
||||
assert_eq!(got.lines().count(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_matches_after_context() {
|
||||
let haystack = "\
|
||||
a
|
||||
b
|
||||
c
|
||||
d
|
||||
e
|
||||
d
|
||||
e
|
||||
d
|
||||
e
|
||||
d
|
||||
e
|
||||
";
|
||||
let matcher = RegexMatcher::new(r"d").unwrap();
|
||||
let mut printer =
|
||||
JSONBuilder::new().max_matches(Some(1)).build(vec![]);
|
||||
SearcherBuilder::new()
|
||||
.after_context(2)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
haystack.as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
let got = printer_contents(&mut printer);
|
||||
|
||||
assert_eq!(got.lines().count(), 5);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn no_match() {
|
||||
let matcher = RegexMatcher::new(r"DOES NOT MATCH").unwrap();
|
||||
|
@@ -13,7 +13,7 @@ use std::str;
|
||||
use base64;
|
||||
use serde::{Serialize, Serializer};
|
||||
|
||||
use crate::stats::Stats;
|
||||
use stats::Stats;
|
||||
|
||||
#[derive(Serialize)]
|
||||
#[serde(tag = "type", content = "data")]
|
||||
@@ -90,7 +90,7 @@ enum Data<'a> {
|
||||
}
|
||||
|
||||
impl<'a> Data<'a> {
|
||||
fn from_bytes(bytes: &[u8]) -> Data<'_> {
|
||||
fn from_bytes(bytes: &[u8]) -> Data {
|
||||
match str::from_utf8(bytes) {
|
||||
Ok(text) => Data::Text { text: Cow::Borrowed(text) },
|
||||
Err(_) => Data::Bytes { bytes },
|
||||
@@ -98,7 +98,7 @@ impl<'a> Data<'a> {
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn from_path(path: &Path) -> Data<'_> {
|
||||
fn from_path(path: &Path) -> Data {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
match path.to_str() {
|
||||
|
@@ -27,6 +27,10 @@ contain matches.
|
||||
This example shows how to create a "standard" printer and execute a search.
|
||||
|
||||
```
|
||||
extern crate grep_regex;
|
||||
extern crate grep_printer;
|
||||
extern crate grep_searcher;
|
||||
|
||||
use std::error::Error;
|
||||
|
||||
use grep_regex::RegexMatcher;
|
||||
@@ -64,26 +68,29 @@ fn example() -> Result<(), Box<Error>> {
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
pub use crate::color::{
|
||||
default_color_specs, ColorError, ColorSpecs, UserColorSpec,
|
||||
};
|
||||
#[cfg(feature = "serde1")]
|
||||
pub use crate::json::{JSONBuilder, JSONSink, JSON};
|
||||
pub use crate::standard::{Standard, StandardBuilder, StandardSink};
|
||||
pub use crate::stats::Stats;
|
||||
pub use crate::summary::{Summary, SummaryBuilder, SummaryKind, SummarySink};
|
||||
pub use crate::util::PrinterPath;
|
||||
extern crate base64;
|
||||
extern crate bstr;
|
||||
extern crate grep_matcher;
|
||||
#[cfg(test)]
|
||||
extern crate grep_regex;
|
||||
extern crate grep_searcher;
|
||||
#[cfg(feature = "serde1")]
|
||||
extern crate serde;
|
||||
#[cfg(feature = "serde1")]
|
||||
#[macro_use]
|
||||
extern crate serde_derive;
|
||||
#[cfg(feature = "serde1")]
|
||||
extern crate serde_json;
|
||||
extern crate termcolor;
|
||||
|
||||
// The maximum number of bytes to execute a search to account for look-ahead.
|
||||
//
|
||||
// This is an unfortunate kludge since PCRE2 doesn't provide a way to search
|
||||
// a substring of some input while accounting for look-ahead. In theory, we
|
||||
// could refactor the various 'grep' interfaces to account for it, but it would
|
||||
// be a large change. So for now, we just let PCRE2 go looking a bit for a
|
||||
// match without searching the entire rest of the contents.
|
||||
//
|
||||
// Note that this kludge is only active in multi-line mode.
|
||||
const MAX_LOOK_AHEAD: usize = 128;
|
||||
pub use color::{default_color_specs, ColorError, ColorSpecs, UserColorSpec};
|
||||
#[cfg(feature = "serde1")]
|
||||
pub use json::{JSONBuilder, JSONSink, JSON};
|
||||
pub use standard::{Standard, StandardBuilder, StandardSink};
|
||||
pub use stats::Stats;
|
||||
pub use summary::{Summary, SummaryBuilder, SummaryKind, SummarySink};
|
||||
pub use util::PrinterPath;
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
|
@@ -8,18 +8,15 @@ use std::time::Instant;
|
||||
use bstr::ByteSlice;
|
||||
use grep_matcher::{Match, Matcher};
|
||||
use grep_searcher::{
|
||||
LineStep, Searcher, Sink, SinkContext, SinkContextKind, SinkFinish,
|
||||
SinkMatch,
|
||||
LineStep, Searcher, Sink, SinkContext, SinkContextKind, SinkError,
|
||||
SinkFinish, SinkMatch,
|
||||
};
|
||||
use termcolor::{ColorSpec, NoColor, WriteColor};
|
||||
|
||||
use crate::color::ColorSpecs;
|
||||
use crate::counter::CounterWriter;
|
||||
use crate::stats::Stats;
|
||||
use crate::util::{
|
||||
find_iter_at_in_context, trim_ascii_prefix, trim_line_terminator,
|
||||
PrinterPath, Replacer, Sunk,
|
||||
};
|
||||
use color::ColorSpecs;
|
||||
use counter::CounterWriter;
|
||||
use stats::Stats;
|
||||
use util::{trim_ascii_prefix, PrinterPath, Replacer, Sunk};
|
||||
|
||||
/// The configuration for the standard printer.
|
||||
///
|
||||
@@ -34,7 +31,6 @@ struct Config {
|
||||
path: bool,
|
||||
only_matching: bool,
|
||||
per_match: bool,
|
||||
per_match_one_line: bool,
|
||||
replacement: Arc<Option<Vec<u8>>>,
|
||||
max_columns: Option<u64>,
|
||||
max_columns_preview: bool,
|
||||
@@ -59,7 +55,6 @@ impl Default for Config {
|
||||
path: true,
|
||||
only_matching: false,
|
||||
per_match: false,
|
||||
per_match_one_line: false,
|
||||
replacement: Arc::new(None),
|
||||
max_columns: None,
|
||||
max_columns_preview: false,
|
||||
@@ -224,36 +219,15 @@ impl StandardBuilder {
|
||||
/// the `column` option, which will show the starting column number for
|
||||
/// every match on every line.
|
||||
///
|
||||
/// When multi-line mode is enabled, each match is printed, including every
|
||||
/// line in the match. As with single line matches, if a line contains
|
||||
/// multiple matches (even if only partially), then that line is printed
|
||||
/// once for each match it participates in, assuming it's the first line in
|
||||
/// that match. In multi-line mode, column numbers only indicate the start
|
||||
/// of a match. Subsequent lines in a multi-line match always have a column
|
||||
/// number of `1`.
|
||||
///
|
||||
/// When a match contains multiple lines, enabling `per_match_one_line`
|
||||
/// will cause only the first line each in match to be printed.
|
||||
/// When multi-line mode is enabled, each match and its accompanying lines
|
||||
/// are printed. As with single line matches, if a line contains multiple
|
||||
/// matches (even if only partially), then that line is printed once for
|
||||
/// each match it participates in.
|
||||
pub fn per_match(&mut self, yes: bool) -> &mut StandardBuilder {
|
||||
self.config.per_match = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Print at most one line per match when `per_match` is enabled.
|
||||
///
|
||||
/// By default, every line in each match found is printed when `per_match`
|
||||
/// is enabled. However, this is sometimes undesirable, e.g., when you
|
||||
/// only ever want one line per match.
|
||||
///
|
||||
/// This is only applicable when multi-line matching is enabled, since
|
||||
/// otherwise, matches are guaranteed to span one line.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn per_match_one_line(&mut self, yes: bool) -> &mut StandardBuilder {
|
||||
self.config.per_match_one_line = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the bytes that will be used to replace each occurrence of a match
|
||||
/// found.
|
||||
///
|
||||
@@ -318,6 +292,9 @@ impl StandardBuilder {
|
||||
/// Column numbers are computed in terms of bytes from the start of the
|
||||
/// line being printed.
|
||||
///
|
||||
/// For matches that span multiple lines, the column number for each
|
||||
/// matching line is in terms of the first matching line.
|
||||
///
|
||||
/// This is disabled by default.
|
||||
pub fn column(&mut self, yes: bool) -> &mut StandardBuilder {
|
||||
self.config.column = yes;
|
||||
@@ -625,7 +602,7 @@ impl<W> Standard<W> {
|
||||
/// * `W` refers to the underlying writer that this printer is writing its
|
||||
/// output to.
|
||||
#[derive(Debug)]
|
||||
pub struct StandardSink<'p, 's, M: Matcher, W> {
|
||||
pub struct StandardSink<'p, 's, M: Matcher, W: 's> {
|
||||
matcher: M,
|
||||
standard: &'s mut Standard<W>,
|
||||
replacer: Replacer<M>,
|
||||
@@ -685,12 +662,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> {
|
||||
|
||||
/// Execute the matcher over the given bytes and record the match
|
||||
/// locations if the current configuration demands match granularity.
|
||||
fn record_matches(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
bytes: &[u8],
|
||||
range: std::ops::Range<usize>,
|
||||
) -> io::Result<()> {
|
||||
fn record_matches(&mut self, bytes: &[u8]) -> io::Result<()> {
|
||||
self.standard.matches.clear();
|
||||
if !self.needs_match_granularity {
|
||||
return Ok(());
|
||||
@@ -703,21 +675,16 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> {
|
||||
// one search to find the matches (well, for replacements, we do one
|
||||
// additional search to perform the actual replacement).
|
||||
let matches = &mut self.standard.matches;
|
||||
find_iter_at_in_context(
|
||||
searcher,
|
||||
&self.matcher,
|
||||
bytes,
|
||||
range.clone(),
|
||||
|m| {
|
||||
let (s, e) = (m.start() - range.start, m.end() - range.start);
|
||||
matches.push(Match::new(s, e));
|
||||
self.matcher
|
||||
.find_iter(bytes, |m| {
|
||||
matches.push(m);
|
||||
true
|
||||
},
|
||||
)?;
|
||||
})
|
||||
.map_err(io::Error::error_message)?;
|
||||
// Don't report empty matches appearing at the end of the bytes.
|
||||
if !matches.is_empty()
|
||||
&& matches.last().unwrap().is_empty()
|
||||
&& matches.last().unwrap().start() >= range.end
|
||||
&& matches.last().unwrap().start() >= bytes.len()
|
||||
{
|
||||
matches.pop().unwrap();
|
||||
}
|
||||
@@ -728,25 +695,14 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> {
|
||||
/// replacement, lazily allocating memory if necessary.
|
||||
///
|
||||
/// To access the result of a replacement, use `replacer.replacement()`.
|
||||
fn replace(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
bytes: &[u8],
|
||||
range: std::ops::Range<usize>,
|
||||
) -> io::Result<()> {
|
||||
fn replace(&mut self, bytes: &[u8]) -> io::Result<()> {
|
||||
self.replacer.clear();
|
||||
if self.standard.config.replacement.is_some() {
|
||||
let replacement = (*self.standard.config.replacement)
|
||||
.as_ref()
|
||||
.map(|r| &*r)
|
||||
.unwrap();
|
||||
self.replacer.replace_all(
|
||||
searcher,
|
||||
&self.matcher,
|
||||
bytes,
|
||||
range,
|
||||
replacement,
|
||||
)?;
|
||||
self.replacer.replace_all(&self.matcher, bytes, replacement)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@@ -766,16 +722,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> StandardSink<'p, 's, M, W> {
|
||||
}
|
||||
self.after_context_remaining == 0
|
||||
}
|
||||
|
||||
/// Returns whether the current match count exceeds the configured limit.
|
||||
/// If there is no limit, then this always returns false.
|
||||
fn match_more_than_limit(&self) -> bool {
|
||||
let limit = match self.standard.config.max_matches {
|
||||
None => return false,
|
||||
Some(limit) => limit,
|
||||
};
|
||||
self.match_count > limit
|
||||
}
|
||||
}
|
||||
|
||||
impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
@@ -784,29 +730,13 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
fn matched(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
mat: &SinkMatch<'_>,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, io::Error> {
|
||||
self.match_count += 1;
|
||||
// When we've exceeded our match count, then the remaining context
|
||||
// lines should not be reset, but instead, decremented. This avoids a
|
||||
// bug where we display more matches than a configured limit. The main
|
||||
// idea here is that 'matched' might be called again while printing
|
||||
// an after-context line. In that case, we should treat this as a
|
||||
// contextual line rather than a matching line for the purposes of
|
||||
// termination.
|
||||
if self.match_more_than_limit() {
|
||||
self.after_context_remaining =
|
||||
self.after_context_remaining.saturating_sub(1);
|
||||
} else {
|
||||
self.after_context_remaining = searcher.after_context() as u64;
|
||||
}
|
||||
self.after_context_remaining = searcher.after_context() as u64;
|
||||
|
||||
self.record_matches(
|
||||
searcher,
|
||||
mat.buffer(),
|
||||
mat.bytes_range_in_buffer(),
|
||||
)?;
|
||||
self.replace(searcher, mat.buffer(), mat.bytes_range_in_buffer())?;
|
||||
self.record_matches(mat.bytes())?;
|
||||
self.replace(mat.bytes())?;
|
||||
|
||||
if let Some(ref mut stats) = self.stats {
|
||||
stats.add_matches(self.standard.matches.len() as u64);
|
||||
@@ -825,7 +755,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
fn context(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
ctx: &SinkContext<'_>,
|
||||
ctx: &SinkContext,
|
||||
) -> Result<bool, io::Error> {
|
||||
self.standard.matches.clear();
|
||||
self.replacer.clear();
|
||||
@@ -835,8 +765,8 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
self.after_context_remaining.saturating_sub(1);
|
||||
}
|
||||
if searcher.invert_match() {
|
||||
self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
|
||||
self.replace(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
|
||||
self.record_matches(ctx.bytes())?;
|
||||
self.replace(ctx.bytes())?;
|
||||
}
|
||||
if searcher.binary_detection().convert_byte().is_some() {
|
||||
if self.binary_byte_offset.is_some() {
|
||||
@@ -904,7 +834,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for StandardSink<'p, 's, M, W> {
|
||||
/// A StandardImpl is initialized every time a match or a contextual line is
|
||||
/// reported.
|
||||
#[derive(Debug)]
|
||||
struct StandardImpl<'a, M: Matcher, W> {
|
||||
struct StandardImpl<'a, M: 'a + Matcher, W: 'a> {
|
||||
searcher: &'a Searcher,
|
||||
sink: &'a StandardSink<'a, 'a, M, W>,
|
||||
sunk: Sunk<'a>,
|
||||
@@ -916,7 +846,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
/// Bundle self with a searcher and return the core implementation of Sink.
|
||||
fn new(
|
||||
searcher: &'a Searcher,
|
||||
sink: &'a StandardSink<'_, '_, M, W>,
|
||||
sink: &'a StandardSink<M, W>,
|
||||
) -> StandardImpl<'a, M, W> {
|
||||
StandardImpl {
|
||||
searcher: searcher,
|
||||
@@ -930,7 +860,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
/// for use with handling matching lines.
|
||||
fn from_match(
|
||||
searcher: &'a Searcher,
|
||||
sink: &'a StandardSink<'_, '_, M, W>,
|
||||
sink: &'a StandardSink<M, W>,
|
||||
mat: &'a SinkMatch<'a>,
|
||||
) -> StandardImpl<'a, M, W> {
|
||||
let sunk = Sunk::from_sink_match(
|
||||
@@ -945,7 +875,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
/// for use with handling contextual lines.
|
||||
fn from_context(
|
||||
searcher: &'a Searcher,
|
||||
sink: &'a StandardSink<'_, '_, M, W>,
|
||||
sink: &'a StandardSink<M, W>,
|
||||
ctx: &'a SinkContext<'a>,
|
||||
) -> StandardImpl<'a, M, W> {
|
||||
let sunk = Sunk::from_sink_context(
|
||||
@@ -1160,7 +1090,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
self.write_prelude(
|
||||
self.sunk.absolute_byte_offset() + line.start() as u64,
|
||||
self.sunk.line_number().map(|n| n + count),
|
||||
Some(m.start().saturating_sub(line.start()) as u64 + 1),
|
||||
Some(m.start() as u64 + 1),
|
||||
)?;
|
||||
count += 1;
|
||||
if self.exceeds_max_columns(&bytes[line]) {
|
||||
@@ -1185,15 +1115,6 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
}
|
||||
}
|
||||
self.write_line_term()?;
|
||||
// It turns out that vimgrep really only wants one line per
|
||||
// match, even when a match spans multiple lines. So when
|
||||
// that option is enabled, we just quit after printing the
|
||||
// first line.
|
||||
//
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1866
|
||||
if self.config().per_match_one_line {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -1548,7 +1469,14 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
}
|
||||
|
||||
fn trim_line_terminator(&self, buf: &[u8], line: &mut Match) {
|
||||
trim_line_terminator(&self.searcher, buf, line);
|
||||
let lineterm = self.searcher.line_terminator();
|
||||
if lineterm.is_suffix(&buf[*line]) {
|
||||
let mut end = line.end() - 1;
|
||||
if lineterm.is_crlf() && buf[end - 1] == b'\r' {
|
||||
end -= 1;
|
||||
}
|
||||
*line = line.with_end(end);
|
||||
}
|
||||
}
|
||||
|
||||
fn has_line_terminator(&self, buf: &[u8]) -> bool {
|
||||
@@ -1617,12 +1545,11 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use grep_matcher::LineTerminator;
|
||||
use grep_regex::{RegexMatcher, RegexMatcherBuilder};
|
||||
use grep_regex::RegexMatcher;
|
||||
use grep_searcher::SearcherBuilder;
|
||||
use termcolor::{Ansi, NoColor};
|
||||
use termcolor::NoColor;
|
||||
|
||||
use super::{ColorSpecs, Standard, StandardBuilder};
|
||||
use super::{Standard, StandardBuilder};
|
||||
|
||||
const SHERLOCK: &'static str = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
@@ -1647,10 +1574,6 @@ and exhibited clearly, with a label attached.\
|
||||
String::from_utf8(printer.get_mut().get_ref().to_owned()).unwrap()
|
||||
}
|
||||
|
||||
fn printer_contents_ansi(printer: &mut Standard<Ansi<Vec<u8>>>) -> String {
|
||||
String::from_utf8(printer.get_mut().get_ref().to_owned()).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reports_match() {
|
||||
let matcher = RegexMatcher::new("Sherlock").unwrap();
|
||||
@@ -3067,9 +2990,9 @@ Holmeses, success in the province of detective work must always
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2:1:Holmeses, success in the province of detective work must always
|
||||
2:16:Holmeses, success in the province of detective work must always
|
||||
5:12:but Doctor Watson has to have it taken out for him and dusted,
|
||||
6:1:and exhibited clearly, with a label attached.
|
||||
6:12:and exhibited clearly, with a label attached.
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
@@ -3096,94 +3019,9 @@ Holmeses, success in the province of detective work must always
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2:1:Holmeses, success in the province of detective work must always
|
||||
2:58:Holmeses, success in the province of detective work must always
|
||||
3:1:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn per_match_multi_line1_only_first_line() {
|
||||
let matcher =
|
||||
RegexMatcher::new(r"(?s:.{0})(Doctor Watsons|Sherlock)").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.per_match(true)
|
||||
.per_match_one_line(true)
|
||||
.column(true)
|
||||
.build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.multi_line(true)
|
||||
.line_number(true)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
SHERLOCK.as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
1:9:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
1:57:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
3:49:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn per_match_multi_line2_only_first_line() {
|
||||
let matcher =
|
||||
RegexMatcher::new(r"(?s)Watson.+?(Holmeses|clearly)").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.per_match(true)
|
||||
.per_match_one_line(true)
|
||||
.column(true)
|
||||
.build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.multi_line(true)
|
||||
.line_number(true)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
SHERLOCK.as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
5:12:but Doctor Watson has to have it taken out for him and dusted,
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn per_match_multi_line3_only_first_line() {
|
||||
let matcher =
|
||||
RegexMatcher::new(r"(?s)Watson.+?Holmeses|always.+?be").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.per_match(true)
|
||||
.per_match_one_line(true)
|
||||
.column(true)
|
||||
.build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.multi_line(true)
|
||||
.line_number(true)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
SHERLOCK.as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "\
|
||||
1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2:58:Holmeses, success in the province of detective work must always
|
||||
2:16:Holmeses, success in the province of detective work must always
|
||||
2:123:Holmeses, success in the province of detective work must always
|
||||
3:123:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
@@ -3242,80 +3080,6 @@ Holmeses, success in the province of detective work must always
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
// This is a somewhat weird test that checks the behavior of attempting
|
||||
// to replace a line terminator with something else.
|
||||
//
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1311
|
||||
#[test]
|
||||
fn replacement_multi_line() {
|
||||
let matcher = RegexMatcher::new(r"\n").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.replacement(Some(b"?".to_vec()))
|
||||
.build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.line_number(true)
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
"hello\nworld\n".as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "1:hello?world?\n";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replacement_multi_line_diff_line_term() {
|
||||
let matcher = RegexMatcherBuilder::new()
|
||||
.line_terminator(Some(b'\x00'))
|
||||
.build(r"\n")
|
||||
.unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.replacement(Some(b"?".to_vec()))
|
||||
.build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.line_terminator(LineTerminator::byte(b'\x00'))
|
||||
.line_number(true)
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
"hello\nworld\n".as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "1:hello?world?\x00";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replacement_multi_line_combine_lines() {
|
||||
let matcher = RegexMatcher::new(r"\n(.)?").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.replacement(Some(b"?$1".to_vec()))
|
||||
.build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.line_number(true)
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
"hello\nworld\n".as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "1:hello?world?\n";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replacement_max_columns() {
|
||||
let matcher = RegexMatcher::new(r"Sherlock|Doctor (\w+)").unwrap();
|
||||
@@ -3622,57 +3386,4 @@ and xxx clearly, with a label attached.
|
||||
";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regression_search_empty_with_crlf() {
|
||||
let matcher =
|
||||
RegexMatcherBuilder::new().crlf(true).build(r"x?").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.color_specs(ColorSpecs::default_with_color())
|
||||
.build(Ansi::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.line_terminator(LineTerminator::crlf())
|
||||
.build()
|
||||
.search_reader(&matcher, &b"\n"[..], printer.sink(&matcher))
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents_ansi(&mut printer);
|
||||
assert!(!got.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regression_after_context_with_match() {
|
||||
let haystack = "\
|
||||
a
|
||||
b
|
||||
c
|
||||
d
|
||||
e
|
||||
d
|
||||
e
|
||||
d
|
||||
e
|
||||
d
|
||||
e
|
||||
";
|
||||
|
||||
let matcher = RegexMatcherBuilder::new().build(r"d").unwrap();
|
||||
let mut printer = StandardBuilder::new()
|
||||
.max_matches(Some(1))
|
||||
.build(NoColor::new(vec![]));
|
||||
SearcherBuilder::new()
|
||||
.line_number(true)
|
||||
.after_context(2)
|
||||
.build()
|
||||
.search_reader(
|
||||
&matcher,
|
||||
haystack.as_bytes(),
|
||||
printer.sink(&matcher),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let got = printer_contents(&mut printer);
|
||||
let expected = "4:d\n5-e\n6:d\n";
|
||||
assert_eq_printed!(expected, got);
|
||||
}
|
||||
}
|
||||
|
@@ -1,14 +1,14 @@
|
||||
use std::ops::{Add, AddAssign};
|
||||
use std::time::Duration;
|
||||
|
||||
use crate::util::NiceDuration;
|
||||
use util::NiceDuration;
|
||||
|
||||
/// Summary statistics produced at the end of a search.
|
||||
///
|
||||
/// When statistics are reported by a printer, they correspond to all searches
|
||||
/// executed with that printer.
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
#[cfg_attr(feature = "serde1", derive(serde::Serialize))]
|
||||
#[cfg_attr(feature = "serde1", derive(Serialize))]
|
||||
pub struct Stats {
|
||||
elapsed: NiceDuration,
|
||||
searches: u64,
|
||||
|
@@ -8,10 +8,10 @@ use grep_matcher::Matcher;
|
||||
use grep_searcher::{Searcher, Sink, SinkError, SinkFinish, SinkMatch};
|
||||
use termcolor::{ColorSpec, NoColor, WriteColor};
|
||||
|
||||
use crate::color::ColorSpecs;
|
||||
use crate::counter::CounterWriter;
|
||||
use crate::stats::Stats;
|
||||
use crate::util::{find_iter_at_in_context, PrinterPath};
|
||||
use color::ColorSpecs;
|
||||
use counter::CounterWriter;
|
||||
use stats::Stats;
|
||||
use util::PrinterPath;
|
||||
|
||||
/// The configuration for the summary printer.
|
||||
///
|
||||
@@ -457,7 +457,7 @@ impl<W> Summary<W> {
|
||||
/// * `W` refers to the underlying writer that this printer is writing its
|
||||
/// output to.
|
||||
#[derive(Debug)]
|
||||
pub struct SummarySink<'p, 's, M: Matcher, W> {
|
||||
pub struct SummarySink<'p, 's, M: Matcher, W: 's> {
|
||||
matcher: M,
|
||||
summary: &'s mut Summary<W>,
|
||||
path: Option<PrinterPath<'p>>,
|
||||
@@ -504,17 +504,6 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> {
|
||||
self.stats.as_ref()
|
||||
}
|
||||
|
||||
/// Returns true if and only if the searcher may report matches over
|
||||
/// multiple lines.
|
||||
///
|
||||
/// Note that this doesn't just return whether the searcher is in multi
|
||||
/// line mode, but also checks if the mater can match over multiple lines.
|
||||
/// If it can't, then we don't need multi line handling, even if the
|
||||
/// searcher has multi line mode enabled.
|
||||
fn multi_line(&self, searcher: &Searcher) -> bool {
|
||||
searcher.multi_line_with_matcher(&self.matcher)
|
||||
}
|
||||
|
||||
/// Returns true if this printer should quit.
|
||||
///
|
||||
/// This implements the logic for handling quitting after seeing a certain
|
||||
@@ -590,39 +579,32 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
|
||||
|
||||
fn matched(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
mat: &SinkMatch<'_>,
|
||||
_searcher: &Searcher,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, io::Error> {
|
||||
let is_multi_line = self.multi_line(searcher);
|
||||
let sink_match_count = if self.stats.is_none() && !is_multi_line {
|
||||
1
|
||||
} else {
|
||||
// This gives us as many bytes as the searcher can offer. This
|
||||
// isn't guaranteed to hold the necessary context to get match
|
||||
// detection correct (because of look-around), but it does in
|
||||
// practice.
|
||||
let buf = mat.buffer();
|
||||
let range = mat.bytes_range_in_buffer();
|
||||
let mut count = 0;
|
||||
find_iter_at_in_context(
|
||||
searcher,
|
||||
&self.matcher,
|
||||
buf,
|
||||
range,
|
||||
|_| {
|
||||
count += 1;
|
||||
true
|
||||
},
|
||||
)?;
|
||||
count
|
||||
};
|
||||
if is_multi_line {
|
||||
self.match_count += sink_match_count;
|
||||
} else {
|
||||
self.match_count += 1;
|
||||
}
|
||||
self.match_count += 1;
|
||||
if let Some(ref mut stats) = self.stats {
|
||||
stats.add_matches(sink_match_count);
|
||||
let mut match_count = 0;
|
||||
self.matcher
|
||||
.find_iter(mat.bytes(), |_| {
|
||||
match_count += 1;
|
||||
true
|
||||
})
|
||||
.map_err(io::Error::error_message)?;
|
||||
if match_count == 0 {
|
||||
// It is possible for the match count to be zero when
|
||||
// look-around is used. Since `SinkMatch` won't necessarily
|
||||
// contain the look-around in its match span, the search here
|
||||
// could fail to find anything.
|
||||
//
|
||||
// It seems likely that setting match_count=1 here is probably
|
||||
// wrong in some cases, but I don't think we can do any
|
||||
// better. (Because this printer cannot assume that subsequent
|
||||
// contents have been loaded into memory, so we have no way of
|
||||
// increasing the search span here.)
|
||||
match_count = 1;
|
||||
}
|
||||
stats.add_matches(match_count);
|
||||
stats.add_matched_lines(mat.lines().count() as u64);
|
||||
} else if self.summary.config.kind.quit_early() {
|
||||
return Ok(false);
|
||||
|
@@ -7,13 +7,11 @@ use std::time;
|
||||
use bstr::{ByteSlice, ByteVec};
|
||||
use grep_matcher::{Captures, LineTerminator, Match, Matcher};
|
||||
use grep_searcher::{
|
||||
LineIter, Searcher, SinkContext, SinkContextKind, SinkError, SinkMatch,
|
||||
LineIter, SinkContext, SinkContextKind, SinkError, SinkMatch,
|
||||
};
|
||||
#[cfg(feature = "serde1")]
|
||||
use serde::{Serialize, Serializer};
|
||||
|
||||
use crate::MAX_LOOK_AHEAD;
|
||||
|
||||
/// A type for handling replacements while amortizing allocation.
|
||||
pub struct Replacer<M: Matcher> {
|
||||
space: Option<Space<M>>,
|
||||
@@ -29,7 +27,7 @@ struct Space<M: Matcher> {
|
||||
}
|
||||
|
||||
impl<M: Matcher> fmt::Debug for Replacer<M> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let (dst, matches) = self.replacement().unwrap_or((&[], &[]));
|
||||
f.debug_struct("Replacer")
|
||||
.field("dst", &dst)
|
||||
@@ -54,28 +52,10 @@ impl<M: Matcher> Replacer<M> {
|
||||
/// This can fail if the underlying matcher reports an error.
|
||||
pub fn replace_all<'a>(
|
||||
&'a mut self,
|
||||
searcher: &Searcher,
|
||||
matcher: &M,
|
||||
mut subject: &[u8],
|
||||
range: std::ops::Range<usize>,
|
||||
subject: &[u8],
|
||||
replacement: &[u8],
|
||||
) -> io::Result<()> {
|
||||
// See the giant comment in 'find_iter_at_in_context' below for why we
|
||||
// do this dance.
|
||||
let is_multi_line = searcher.multi_line_with_matcher(&matcher);
|
||||
if is_multi_line {
|
||||
if subject[range.end..].len() >= MAX_LOOK_AHEAD {
|
||||
subject = &subject[..range.end + MAX_LOOK_AHEAD];
|
||||
}
|
||||
} else {
|
||||
// When searching a single line, we should remove the line
|
||||
// terminator. Otherwise, it's possible for the regex (via
|
||||
// look-around) to observe the line terminator and not match
|
||||
// because of it.
|
||||
let mut m = Match::new(0, range.end);
|
||||
trim_line_terminator(searcher, subject, &mut m);
|
||||
subject = &subject[..m.end()];
|
||||
}
|
||||
{
|
||||
let &mut Space { ref mut dst, ref mut caps, ref mut matches } =
|
||||
self.allocate(matcher)?;
|
||||
@@ -83,24 +63,18 @@ impl<M: Matcher> Replacer<M> {
|
||||
matches.clear();
|
||||
|
||||
matcher
|
||||
.replace_with_captures_at(
|
||||
subject,
|
||||
range.start,
|
||||
caps,
|
||||
dst,
|
||||
|caps, dst| {
|
||||
let start = dst.len();
|
||||
caps.interpolate(
|
||||
|name| matcher.capture_index(name),
|
||||
subject,
|
||||
replacement,
|
||||
dst,
|
||||
);
|
||||
let end = dst.len();
|
||||
matches.push(Match::new(start, end));
|
||||
true
|
||||
},
|
||||
)
|
||||
.replace_with_captures(subject, caps, dst, |caps, dst| {
|
||||
let start = dst.len();
|
||||
caps.interpolate(
|
||||
|name| matcher.capture_index(name),
|
||||
subject,
|
||||
replacement,
|
||||
dst,
|
||||
);
|
||||
let end = dst.len();
|
||||
matches.push(Match::new(start, end));
|
||||
true
|
||||
})
|
||||
.map_err(io::Error::error_message)?;
|
||||
}
|
||||
Ok(())
|
||||
@@ -330,7 +304,7 @@ impl<'a> PrinterPath<'a> {
|
||||
pub struct NiceDuration(pub time::Duration);
|
||||
|
||||
impl fmt::Display for NiceDuration {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "{:0.6}s", self.fractional_seconds())
|
||||
}
|
||||
}
|
||||
@@ -383,78 +357,3 @@ pub fn trim_ascii_prefix(
|
||||
.count();
|
||||
range.with_start(range.start() + count)
|
||||
}
|
||||
|
||||
pub fn find_iter_at_in_context<M, F>(
|
||||
searcher: &Searcher,
|
||||
matcher: M,
|
||||
mut bytes: &[u8],
|
||||
range: std::ops::Range<usize>,
|
||||
mut matched: F,
|
||||
) -> io::Result<()>
|
||||
where
|
||||
M: Matcher,
|
||||
F: FnMut(Match) -> bool,
|
||||
{
|
||||
// This strange dance is to account for the possibility of look-ahead in
|
||||
// the regex. The problem here is that mat.bytes() doesn't include the
|
||||
// lines beyond the match boundaries in mulit-line mode, which means that
|
||||
// when we try to rediscover the full set of matches here, the regex may no
|
||||
// longer match if it required some look-ahead beyond the matching lines.
|
||||
//
|
||||
// PCRE2 (and the grep-matcher interfaces) has no way of specifying an end
|
||||
// bound of the search. So we kludge it and let the regex engine search the
|
||||
// rest of the buffer... But to avoid things getting too crazy, we cap the
|
||||
// buffer.
|
||||
//
|
||||
// If it weren't for multi-line mode, then none of this would be needed.
|
||||
// Alternatively, if we refactored the grep interfaces to pass along the
|
||||
// full set of matches (if available) from the searcher, then that might
|
||||
// also help here. But that winds up paying an upfront unavoidable cost for
|
||||
// the case where matches don't need to be counted. So then you'd have to
|
||||
// introduce a way to pass along matches conditionally, only when needed.
|
||||
// Yikes.
|
||||
//
|
||||
// Maybe the bigger picture thing here is that the searcher should be
|
||||
// responsible for finding matches when necessary, and the printer
|
||||
// shouldn't be involved in this business in the first place. Sigh. Live
|
||||
// and learn. Abstraction boundaries are hard.
|
||||
let is_multi_line = searcher.multi_line_with_matcher(&matcher);
|
||||
if is_multi_line {
|
||||
if bytes[range.end..].len() >= MAX_LOOK_AHEAD {
|
||||
bytes = &bytes[..range.end + MAX_LOOK_AHEAD];
|
||||
}
|
||||
} else {
|
||||
// When searching a single line, we should remove the line terminator.
|
||||
// Otherwise, it's possible for the regex (via look-around) to observe
|
||||
// the line terminator and not match because of it.
|
||||
let mut m = Match::new(0, range.end);
|
||||
trim_line_terminator(searcher, bytes, &mut m);
|
||||
bytes = &bytes[..m.end()];
|
||||
}
|
||||
matcher
|
||||
.find_iter_at(bytes, range.start, |m| {
|
||||
if m.start() >= range.end {
|
||||
return false;
|
||||
}
|
||||
matched(m)
|
||||
})
|
||||
.map_err(io::Error::error_message)
|
||||
}
|
||||
|
||||
/// Given a buf and some bounds, if there is a line terminator at the end of
|
||||
/// the given bounds in buf, then the bounds are trimmed to remove the line
|
||||
/// terminator.
|
||||
pub fn trim_line_terminator(
|
||||
searcher: &Searcher,
|
||||
buf: &[u8],
|
||||
line: &mut Match,
|
||||
) {
|
||||
let lineterm = searcher.line_terminator();
|
||||
if lineterm.is_suffix(&buf[*line]) {
|
||||
let mut end = line.end() - 1;
|
||||
if lineterm.is_crlf() && end > 0 && buf.get(end - 1) == Some(&b'\r') {
|
||||
end -= 1;
|
||||
}
|
||||
*line = line.with_end(end);
|
||||
}
|
||||
}
|
||||
|
@@ -11,7 +11,6 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/regex"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "search", "pattern", "line"]
|
||||
license = "Unlicense/MIT"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
aho-corasick = "0.7.3"
|
||||
@@ -20,4 +19,4 @@ grep-matcher = { version = "0.1.2", path = "../matcher" }
|
||||
log = "0.4.5"
|
||||
regex = "1.1"
|
||||
regex-syntax = "0.6.5"
|
||||
thread_local = "1.1.2"
|
||||
thread_local = "1"
|
||||
|
@@ -26,3 +26,9 @@ Add this to your `Cargo.toml`:
|
||||
[dependencies]
|
||||
grep-regex = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_regex;
|
||||
```
|
||||
|
@@ -3,13 +3,13 @@ use regex::bytes::{Regex, RegexBuilder};
|
||||
use regex_syntax::ast::{self, Ast};
|
||||
use regex_syntax::hir::{self, Hir};
|
||||
|
||||
use crate::ast::AstAnalysis;
|
||||
use crate::crlf::crlfify;
|
||||
use crate::error::Error;
|
||||
use crate::literal::LiteralSets;
|
||||
use crate::multi::alternation_literals;
|
||||
use crate::non_matching::non_matching_bytes;
|
||||
use crate::strip::strip_from_match;
|
||||
use ast::AstAnalysis;
|
||||
use crlf::crlfify;
|
||||
use error::Error;
|
||||
use literal::LiteralSets;
|
||||
use multi::alternation_literals;
|
||||
use non_matching::non_matching_bytes;
|
||||
use strip::strip_from_match;
|
||||
|
||||
/// Config represents the configuration of a regex matcher in this crate.
|
||||
/// The configuration is itself a rough combination of the knobs found in
|
||||
|
@@ -4,9 +4,9 @@ use grep_matcher::{Match, Matcher, NoError};
|
||||
use regex::bytes::Regex;
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
|
||||
use crate::config::ConfiguredHIR;
|
||||
use crate::error::Error;
|
||||
use crate::matcher::RegexCaptures;
|
||||
use config::ConfiguredHIR;
|
||||
use error::Error;
|
||||
use matcher::RegexCaptures;
|
||||
|
||||
/// A matcher for implementing "word match" semantics.
|
||||
#[derive(Clone, Debug)]
|
||||
|
@@ -1,7 +1,7 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
|
||||
use crate::util;
|
||||
use util;
|
||||
|
||||
/// An error that can occur in this crate.
|
||||
///
|
||||
@@ -72,7 +72,7 @@ impl error::Error for Error {
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self.kind {
|
||||
ErrorKind::Regex(ref s) => write!(f, "{}", s),
|
||||
ErrorKind::NotAllowed(ref lit) => {
|
||||
|
@@ -1,10 +1,20 @@
|
||||
/*!
|
||||
An implementation of `grep-matcher`'s `Matcher` trait for Rust's regex engine.
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
pub use crate::error::{Error, ErrorKind};
|
||||
pub use crate::matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
||||
extern crate aho_corasick;
|
||||
extern crate bstr;
|
||||
extern crate grep_matcher;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate regex;
|
||||
extern crate regex_syntax;
|
||||
extern crate thread_local;
|
||||
|
||||
pub use error::{Error, ErrorKind};
|
||||
pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
|
||||
|
||||
mod ast;
|
||||
mod config;
|
||||
|
@@ -9,7 +9,7 @@ use bstr::ByteSlice;
|
||||
use regex_syntax::hir::literal::{Literal, Literals};
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
|
||||
use crate::util;
|
||||
use util;
|
||||
|
||||
/// Represents prefix, suffix and inner "required" literals for a regular
|
||||
/// expression.
|
||||
@@ -55,7 +55,7 @@ impl LiteralSets {
|
||||
|
||||
if !word {
|
||||
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
||||
log::debug!("literal prefixes detected: {:?}", self.prefixes);
|
||||
debug!("literal prefixes detected: {:?}", self.prefixes);
|
||||
// When this is true, the regex engine will do a literal scan,
|
||||
// so we don't need to return anything. But we only do this
|
||||
// if we aren't doing a word regex, since a word regex adds
|
||||
@@ -106,7 +106,7 @@ impl LiteralSets {
|
||||
&& !any_empty
|
||||
&& !any_white
|
||||
{
|
||||
log::debug!("required literals found: {:?}", req_lits);
|
||||
debug!("required literals found: {:?}", req_lits);
|
||||
let alts: Vec<String> = req_lits
|
||||
.into_iter()
|
||||
.map(|x| util::bytes_to_regex(x))
|
||||
@@ -149,27 +149,27 @@ impl LiteralSets {
|
||||
let lits = match (p_min_len, s_min_len) {
|
||||
(None, None) => return None,
|
||||
(Some(_), None) => {
|
||||
log::debug!("prefix literals found");
|
||||
debug!("prefix literals found");
|
||||
self.prefixes.literals()
|
||||
}
|
||||
(None, Some(_)) => {
|
||||
log::debug!("suffix literals found");
|
||||
debug!("suffix literals found");
|
||||
self.suffixes.literals()
|
||||
}
|
||||
(Some(p), Some(s)) => {
|
||||
if p >= s {
|
||||
log::debug!("prefix literals found");
|
||||
debug!("prefix literals found");
|
||||
self.prefixes.literals()
|
||||
} else {
|
||||
log::debug!("suffix literals found");
|
||||
debug!("suffix literals found");
|
||||
self.suffixes.literals()
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
log::debug!("prefix/suffix literals found: {:?}", lits);
|
||||
debug!("prefix/suffix literals found: {:?}", lits);
|
||||
if has_only_whitespace(lits) {
|
||||
log::debug!("dropping literals because one was whitespace");
|
||||
debug!("dropping literals because one was whitespace");
|
||||
return None;
|
||||
}
|
||||
let alts: Vec<String> =
|
||||
@@ -177,9 +177,9 @@ impl LiteralSets {
|
||||
// We're matching raw bytes, so disable Unicode mode.
|
||||
Some(format!("(?-u:{})", alts.join("|")))
|
||||
} else {
|
||||
log::debug!("required literal found: {:?}", util::show_bytes(lit));
|
||||
debug!("required literal found: {:?}", util::show_bytes(lit));
|
||||
if lit.chars().all(|c| c.is_whitespace()) {
|
||||
log::debug!("dropping literal because one was whitespace");
|
||||
debug!("dropping literal because one was whitespace");
|
||||
return None;
|
||||
}
|
||||
Some(format!("(?-u:{})", util::bytes_to_regex(&lit)))
|
||||
|
@@ -5,11 +5,11 @@ use grep_matcher::{
|
||||
};
|
||||
use regex::bytes::{CaptureLocations, Regex};
|
||||
|
||||
use crate::config::{Config, ConfiguredHIR};
|
||||
use crate::crlf::CRLFMatcher;
|
||||
use crate::error::Error;
|
||||
use crate::multi::MultiLiteralMatcher;
|
||||
use crate::word::WordMatcher;
|
||||
use config::{Config, ConfiguredHIR};
|
||||
use crlf::CRLFMatcher;
|
||||
use error::Error;
|
||||
use multi::MultiLiteralMatcher;
|
||||
use word::WordMatcher;
|
||||
|
||||
/// A builder for constructing a `Matcher` using regular expressions.
|
||||
///
|
||||
@@ -19,7 +19,7 @@ use crate::word::WordMatcher;
|
||||
/// types of optimizations.
|
||||
///
|
||||
/// The syntax supported is documented as part of the regex crate:
|
||||
/// <https://docs.rs/regex/#syntax>.
|
||||
/// https://docs.rs/regex/*/regex/#syntax
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexMatcherBuilder {
|
||||
config: Config,
|
||||
@@ -41,17 +41,17 @@ impl RegexMatcherBuilder {
|
||||
/// pattern.
|
||||
///
|
||||
/// The syntax supported is documented as part of the regex crate:
|
||||
/// <https://docs.rs/regex/#syntax>.
|
||||
/// https://docs.rs/regex/*/regex/#syntax
|
||||
pub fn build(&self, pattern: &str) -> Result<RegexMatcher, Error> {
|
||||
let chir = self.config.hir(pattern)?;
|
||||
let fast_line_regex = chir.fast_line_regex()?;
|
||||
let non_matching_bytes = chir.non_matching_bytes();
|
||||
if let Some(ref re) = fast_line_regex {
|
||||
log::debug!("extracted fast line regex: {:?}", re);
|
||||
debug!("extracted fast line regex: {:?}", re);
|
||||
}
|
||||
|
||||
let matcher = RegexMatcherImpl::new(&chir)?;
|
||||
log::trace!("final regex: {:?}", matcher.regex());
|
||||
trace!("final regex: {:?}", matcher.regex());
|
||||
Ok(RegexMatcher {
|
||||
config: self.config.clone(),
|
||||
matcher,
|
||||
|
@@ -2,8 +2,8 @@ use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
|
||||
use grep_matcher::{Match, Matcher, NoError};
|
||||
use regex_syntax::hir::Hir;
|
||||
|
||||
use crate::error::Error;
|
||||
use crate::matcher::RegexCaptures;
|
||||
use error::Error;
|
||||
use matcher::RegexCaptures;
|
||||
|
||||
/// A matcher for an alternation of literals.
|
||||
///
|
||||
|
@@ -13,10 +13,7 @@ pub fn non_matching_bytes(expr: &Hir) -> ByteSet {
|
||||
/// the given expression.
|
||||
fn remove_matching_bytes(expr: &Hir, set: &mut ByteSet) {
|
||||
match *expr.kind() {
|
||||
HirKind::Empty | HirKind::WordBoundary(_) => {}
|
||||
HirKind::Anchor(_) => {
|
||||
set.remove(b'\n');
|
||||
}
|
||||
HirKind::Empty | HirKind::Anchor(_) | HirKind::WordBoundary(_) => {}
|
||||
HirKind::Literal(hir::Literal::Unicode(c)) => {
|
||||
for &b in c.encode_utf8(&mut [0; 4]).as_bytes() {
|
||||
set.remove(b);
|
||||
@@ -128,12 +125,4 @@ mod tests {
|
||||
assert_eq!(sparse(&extract(r"\xFF")), sparse_except(&[0xC3, 0xBF]));
|
||||
assert_eq!(sparse(&extract(r"(?-u)\xFF")), sparse_except(&[0xFF]));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn anchor() {
|
||||
assert_eq!(sparse(&extract(r"^")), sparse_except(&[b'\n']));
|
||||
assert_eq!(sparse(&extract(r"$")), sparse_except(&[b'\n']));
|
||||
assert_eq!(sparse(&extract(r"\A")), sparse_except(&[b'\n']));
|
||||
assert_eq!(sparse(&extract(r"\z")), sparse_except(&[b'\n']));
|
||||
}
|
||||
}
|
||||
|
@@ -1,7 +1,7 @@
|
||||
use grep_matcher::LineTerminator;
|
||||
use regex_syntax::hir::{self, Hir, HirKind};
|
||||
|
||||
use crate::error::{Error, ErrorKind};
|
||||
use error::{Error, ErrorKind};
|
||||
|
||||
/// Return an HIR that is guaranteed to never match the given line terminator,
|
||||
/// if possible.
|
||||
@@ -106,7 +106,7 @@ mod tests {
|
||||
use regex_syntax::Parser;
|
||||
|
||||
use super::{strip_from_match, LineTerminator};
|
||||
use crate::error::Error;
|
||||
use error::Error;
|
||||
|
||||
fn roundtrip(pattern: &str, byte: u8) -> String {
|
||||
roundtrip_line_term(pattern, LineTerminator::byte(byte)).unwrap()
|
||||
|
@@ -4,11 +4,11 @@ use std::sync::Arc;
|
||||
|
||||
use grep_matcher::{Match, Matcher, NoError};
|
||||
use regex::bytes::{CaptureLocations, Regex};
|
||||
use thread_local::ThreadLocal;
|
||||
use thread_local::CachedThreadLocal;
|
||||
|
||||
use crate::config::ConfiguredHIR;
|
||||
use crate::error::Error;
|
||||
use crate::matcher::RegexCaptures;
|
||||
use config::ConfiguredHIR;
|
||||
use error::Error;
|
||||
use matcher::RegexCaptures;
|
||||
|
||||
/// A matcher for implementing "word match" semantics.
|
||||
#[derive(Debug)]
|
||||
@@ -21,19 +21,19 @@ pub struct WordMatcher {
|
||||
/// A map from capture group name to capture group index.
|
||||
names: HashMap<String, usize>,
|
||||
/// A reusable buffer for finding the match location of the inner group.
|
||||
locs: Arc<ThreadLocal<RefCell<CaptureLocations>>>,
|
||||
locs: Arc<CachedThreadLocal<RefCell<CaptureLocations>>>,
|
||||
}
|
||||
|
||||
impl Clone for WordMatcher {
|
||||
fn clone(&self) -> WordMatcher {
|
||||
// We implement Clone manually so that we get a fresh ThreadLocal such
|
||||
// that it can set its own thread owner. This permits each thread
|
||||
// We implement Clone manually so that we get a fresh CachedThreadLocal
|
||||
// such that it can set its own thread owner. This permits each thread
|
||||
// usings `locs` to hit the fast path.
|
||||
WordMatcher {
|
||||
regex: self.regex.clone(),
|
||||
original: self.original.clone(),
|
||||
names: self.names.clone(),
|
||||
locs: Arc::new(ThreadLocal::new()),
|
||||
locs: Arc::new(CachedThreadLocal::new()),
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -48,12 +48,12 @@ impl WordMatcher {
|
||||
let original =
|
||||
expr.with_pattern(|pat| format!("^(?:{})$", pat))?.regex()?;
|
||||
let word_expr = expr.with_pattern(|pat| {
|
||||
let pat = format!(r"(?:(?m:^)|\W)({})(?:\W|(?m:$))", pat);
|
||||
log::debug!("word regex: {:?}", pat);
|
||||
let pat = format!(r"(?:(?-m:^)|\W)({})(?:(?-m:$)|\W)", pat);
|
||||
debug!("word regex: {:?}", pat);
|
||||
pat
|
||||
})?;
|
||||
let regex = word_expr.regex()?;
|
||||
let locs = Arc::new(ThreadLocal::new());
|
||||
let locs = Arc::new(CachedThreadLocal::new());
|
||||
|
||||
let mut names = HashMap::new();
|
||||
for (i, optional_name) in regex.capture_names().enumerate() {
|
||||
@@ -184,7 +184,7 @@ impl Matcher for WordMatcher {
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::WordMatcher;
|
||||
use crate::config::Config;
|
||||
use config::Config;
|
||||
use grep_matcher::{Captures, Match, Matcher};
|
||||
|
||||
fn matcher(pattern: &str) -> WordMatcher {
|
||||
@@ -237,8 +237,6 @@ mod tests {
|
||||
assert_eq!(Some((2, 5)), find(r"!?foo!?", "a!foo!a"));
|
||||
|
||||
assert_eq!(Some((2, 7)), find(r"!?foo!?", "##!foo!\n"));
|
||||
assert_eq!(Some((3, 8)), find(r"!?foo!?", "##\n!foo!##"));
|
||||
assert_eq!(Some((3, 8)), find(r"!?foo!?", "##\n!foo!\n##"));
|
||||
assert_eq!(Some((3, 7)), find(r"f?oo!?", "##\nfoo!##"));
|
||||
assert_eq!(Some((2, 5)), find(r"(?-u)foo[^a]*", "#!foo☃aaa"));
|
||||
}
|
||||
|
@@ -11,7 +11,6 @@ repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/searcher"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
bstr = { version = "0.2.0", default-features = false, features = ["std"] }
|
||||
@@ -20,7 +19,7 @@ encoding_rs = "0.8.14"
|
||||
encoding_rs_io = "0.1.6"
|
||||
grep-matcher = { version = "0.1.2", path = "../matcher" }
|
||||
log = "0.4.5"
|
||||
memmap = { package = "memmap2", version = "0.3.0" }
|
||||
memmap = "0.7"
|
||||
|
||||
[dev-dependencies]
|
||||
grep-regex = { version = "0.1.3", path = "../regex" }
|
||||
|
@@ -28,3 +28,9 @@ Add this to your `Cargo.toml`:
|
||||
[dependencies]
|
||||
grep-searcher = "0.1"
|
||||
```
|
||||
|
||||
and this to your crate root:
|
||||
|
||||
```rust
|
||||
extern crate grep_searcher;
|
||||
```
|
||||
|
@@ -1,3 +1,6 @@
|
||||
extern crate grep_regex;
|
||||
extern crate grep_searcher;
|
||||
|
||||
use std::env;
|
||||
use std::error::Error;
|
||||
use std::io;
|
||||
|
@@ -48,6 +48,10 @@ using the
|
||||
implementation of `Sink`.
|
||||
|
||||
```
|
||||
extern crate grep_matcher;
|
||||
extern crate grep_regex;
|
||||
extern crate grep_searcher;
|
||||
|
||||
use std::error::Error;
|
||||
|
||||
use grep_matcher::Matcher;
|
||||
@@ -95,13 +99,24 @@ searches stdin.
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
pub use crate::lines::{LineIter, LineStep};
|
||||
pub use crate::searcher::{
|
||||
extern crate bstr;
|
||||
extern crate bytecount;
|
||||
extern crate encoding_rs;
|
||||
extern crate encoding_rs_io;
|
||||
extern crate grep_matcher;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memmap;
|
||||
#[cfg(test)]
|
||||
extern crate regex;
|
||||
|
||||
pub use lines::{LineIter, LineStep};
|
||||
pub use searcher::{
|
||||
BinaryDetection, ConfigError, Encoding, MmapChoice, Searcher,
|
||||
SearcherBuilder,
|
||||
};
|
||||
pub use crate::sink::sinks;
|
||||
pub use crate::sink::{
|
||||
pub use sink::sinks;
|
||||
pub use sink::{
|
||||
Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
|
||||
};
|
||||
|
||||
|
@@ -4,7 +4,7 @@ use std::io;
|
||||
use bstr::ByteSlice;
|
||||
|
||||
/// The default buffer capacity that we use for the line buffer.
|
||||
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 64 * (1 << 10); // 64 KB
|
||||
pub(crate) const DEFAULT_BUFFER_CAPACITY: usize = 8 * (1 << 10); // 8 KB
|
||||
|
||||
/// The behavior of a searcher in the face of long lines and big contexts.
|
||||
///
|
||||
|
@@ -2,13 +2,13 @@ use std::cmp;
|
||||
|
||||
use bstr::ByteSlice;
|
||||
|
||||
use crate::line_buffer::BinaryDetection;
|
||||
use crate::lines::{self, LineStep};
|
||||
use crate::searcher::{Config, Range, Searcher};
|
||||
use crate::sink::{
|
||||
use grep_matcher::{LineMatchKind, Matcher};
|
||||
use line_buffer::BinaryDetection;
|
||||
use lines::{self, LineStep};
|
||||
use searcher::{Config, Range, Searcher};
|
||||
use sink::{
|
||||
Sink, SinkContext, SinkContextKind, SinkError, SinkFinish, SinkMatch,
|
||||
};
|
||||
use grep_matcher::{LineMatchKind, Matcher};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Core<'s, M: 's, S> {
|
||||
@@ -53,9 +53,9 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
};
|
||||
if !core.searcher.multi_line_with_matcher(&core.matcher) {
|
||||
if core.is_line_by_line_fast() {
|
||||
log::trace!("searcher core: will use fast line searcher");
|
||||
trace!("searcher core: will use fast line searcher");
|
||||
} else {
|
||||
log::trace!("searcher core: will use slow line searcher");
|
||||
trace!("searcher core: will use slow line searcher");
|
||||
}
|
||||
}
|
||||
core
|
||||
@@ -441,8 +441,6 @@ impl<'s, M: Matcher, S: Sink> Core<'s, M, S> {
|
||||
bytes: linebuf,
|
||||
absolute_byte_offset: offset,
|
||||
line_number: self.line_number,
|
||||
buffer: buf,
|
||||
bytes_range_in_buffer: range.start()..range.end(),
|
||||
},
|
||||
)?;
|
||||
if !keepgoing {
|
||||
|
@@ -1,16 +1,16 @@
|
||||
use std::cmp;
|
||||
use std::io;
|
||||
|
||||
use crate::line_buffer::{LineBufferReader, DEFAULT_BUFFER_CAPACITY};
|
||||
use crate::lines::{self, LineStep};
|
||||
use crate::sink::{Sink, SinkError};
|
||||
use grep_matcher::Matcher;
|
||||
use line_buffer::{LineBufferReader, DEFAULT_BUFFER_CAPACITY};
|
||||
use lines::{self, LineStep};
|
||||
use sink::{Sink, SinkError};
|
||||
|
||||
use crate::searcher::core::Core;
|
||||
use crate::searcher::{Config, Range, Searcher};
|
||||
use searcher::core::Core;
|
||||
use searcher::{Config, Range, Searcher};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ReadByLine<'s, M, R, S> {
|
||||
pub struct ReadByLine<'s, M: 's, R, S> {
|
||||
config: &'s Config,
|
||||
core: Core<'s, M, S>,
|
||||
rdr: LineBufferReader<'s, R>,
|
||||
@@ -87,7 +87,7 @@ where
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct SliceByLine<'s, M, S> {
|
||||
pub struct SliceByLine<'s, M: 's, S> {
|
||||
config: &'s Config,
|
||||
core: Core<'s, M, S>,
|
||||
slice: &'s [u8],
|
||||
@@ -134,7 +134,7 @@ impl<'s, M: Matcher, S: Sink> SliceByLine<'s, M, S> {
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct MultiLine<'s, M, S> {
|
||||
pub struct MultiLine<'s, M: 's, S> {
|
||||
config: &'s Config,
|
||||
core: Core<'s, M, S>,
|
||||
slice: &'s [u8],
|
||||
@@ -226,19 +226,10 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
|
||||
}
|
||||
Some(last_match) => {
|
||||
// If the lines in the previous match overlap with the lines
|
||||
// in this match, then simply grow the match and move on. This
|
||||
// happens when the next match begins on the same line that the
|
||||
// last match ends on.
|
||||
//
|
||||
// Note that we do not technically require strict overlap here.
|
||||
// Instead, we only require that the lines are adjacent. This
|
||||
// provides larger blocks of lines to the printer, and results
|
||||
// in overall better behavior with respect to how replacements
|
||||
// are handled.
|
||||
//
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1311
|
||||
// And also the associated commit fixing #1311.
|
||||
if last_match.end() >= line.start() {
|
||||
// in this match, then simply grow the match and move on.
|
||||
// This happens when the next match begins on the same line
|
||||
// that the last match ends on.
|
||||
if last_match.end() > line.start() {
|
||||
self.last_match = Some(last_match.with_end(line.end()));
|
||||
Ok(true)
|
||||
} else {
|
||||
@@ -349,8 +340,8 @@ impl<'s, M: Matcher, S: Sink> MultiLine<'s, M, S> {
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::searcher::{BinaryDetection, SearcherBuilder};
|
||||
use crate::testutil::{KitchenSink, RegexMatcher, SearcherTester};
|
||||
use searcher::{BinaryDetection, SearcherBuilder};
|
||||
use testutil::{KitchenSink, RegexMatcher, SearcherTester};
|
||||
|
||||
use super::*;
|
||||
|
||||
@@ -642,7 +633,7 @@ d
|
||||
haystack.push_str("a\n");
|
||||
|
||||
let byte_count = haystack.len();
|
||||
let exp = format!("0:a\n1048690:a\n\nbyte count:{}\n", byte_count);
|
||||
let exp = format!("0:a\n131186:a\n\nbyte count:{}\n", byte_count);
|
||||
|
||||
SearcherTester::new(&haystack, "a")
|
||||
.line_number(false)
|
||||
@@ -723,23 +714,21 @@ d
|
||||
haystack.push_str("zzz\n");
|
||||
}
|
||||
haystack.push_str("a\n");
|
||||
haystack.push_str("zzz\n");
|
||||
haystack.push_str("a\x00a\n");
|
||||
haystack.push_str("zzz\n");
|
||||
haystack.push_str("a\n");
|
||||
|
||||
// The line buffered searcher has slightly different semantics here.
|
||||
// Namely, it will *always* detect binary data in the current buffer
|
||||
// before searching it. Thus, the total number of bytes searched is
|
||||
// smaller than below.
|
||||
let exp = "0:a\n\nbyte count:262146\nbinary offset:262153\n";
|
||||
let exp = "0:a\n\nbyte count:32770\nbinary offset:32773\n";
|
||||
// In contrast, the slice readers (for multi line as well) will only
|
||||
// look for binary data in the initial chunk of bytes. After that
|
||||
// point, it only looks for binary data in matches. Note though that
|
||||
// the binary offset remains the same. (See the binary4 test for a case
|
||||
// where the offset is explicitly different.)
|
||||
let exp_slice =
|
||||
"0:a\n262146:a\n\nbyte count:262153\nbinary offset:262153\n";
|
||||
"0:a\n32770:a\n\nbyte count:32773\nbinary offset:32773\n";
|
||||
|
||||
SearcherTester::new(&haystack, "a")
|
||||
.binary_detection(BinaryDetection::quit(0))
|
||||
@@ -766,12 +755,12 @@ d
|
||||
haystack.push_str("a\x00a\n");
|
||||
haystack.push_str("a\n");
|
||||
|
||||
let exp = "0:a\n\nbyte count:262146\nbinary offset:262149\n";
|
||||
let exp = "0:a\n\nbyte count:32770\nbinary offset:32773\n";
|
||||
// The binary offset for the Slice readers corresponds to the binary
|
||||
// data in `a\x00a\n` since the first line with binary data
|
||||
// (`b\x00b\n`) isn't part of a match, and is therefore undetected.
|
||||
let exp_slice =
|
||||
"0:a\n262146:a\n\nbyte count:262153\nbinary offset:262153\n";
|
||||
"0:a\n32770:a\n\nbyte count:32777\nbinary offset:32777\n";
|
||||
|
||||
SearcherTester::new(&haystack, "a")
|
||||
.binary_detection(BinaryDetection::quit(0))
|
||||
@@ -1488,8 +1477,8 @@ byte count:307
|
||||
|
||||
#[test]
|
||||
fn scratch() {
|
||||
use crate::sinks;
|
||||
use crate::testutil::RegexMatcher;
|
||||
use sinks;
|
||||
use testutil::RegexMatcher;
|
||||
|
||||
const SHERLOCK: &'static [u8] = b"\
|
||||
For the Doctor Wat\xFFsons of this world, as opposed to the Sherlock
|
||||
|
@@ -83,13 +83,13 @@ impl MmapChoice {
|
||||
Ok(mmap) => Some(mmap),
|
||||
Err(err) => {
|
||||
if let Some(path) = path {
|
||||
log::debug!(
|
||||
debug!(
|
||||
"{}: failed to open memory map: {}",
|
||||
path.display(),
|
||||
err
|
||||
);
|
||||
} else {
|
||||
log::debug!("failed to open memory map: {}", err);
|
||||
debug!("failed to open memory map: {}", err);
|
||||
}
|
||||
None
|
||||
}
|
||||
|
@@ -5,15 +5,15 @@ use std::fs::File;
|
||||
use std::io::{self, Read};
|
||||
use std::path::Path;
|
||||
|
||||
use crate::line_buffer::{
|
||||
self, alloc_error, BufferAllocation, LineBuffer, LineBufferBuilder,
|
||||
LineBufferReader, DEFAULT_BUFFER_CAPACITY,
|
||||
};
|
||||
use crate::searcher::glue::{MultiLine, ReadByLine, SliceByLine};
|
||||
use crate::sink::{Sink, SinkError};
|
||||
use encoding_rs;
|
||||
use encoding_rs_io::DecodeReaderBytesBuilder;
|
||||
use grep_matcher::{LineTerminator, Match, Matcher};
|
||||
use line_buffer::{
|
||||
self, alloc_error, BufferAllocation, LineBuffer, LineBufferBuilder,
|
||||
LineBufferReader, DEFAULT_BUFFER_CAPACITY,
|
||||
};
|
||||
use searcher::glue::{MultiLine, ReadByLine, SliceByLine};
|
||||
use sink::{Sink, SinkError};
|
||||
|
||||
pub use self::mmap::MmapChoice;
|
||||
|
||||
@@ -263,7 +263,7 @@ impl ::std::error::Error for ConfigError {
|
||||
}
|
||||
|
||||
impl fmt::Display for ConfigError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
ConfigError::SearchUnavailable => {
|
||||
write!(f, "grep config error: no available searchers")
|
||||
@@ -659,19 +659,16 @@ impl Searcher {
|
||||
S: Sink,
|
||||
{
|
||||
if let Some(mmap) = self.config.mmap.open(file, path) {
|
||||
log::trace!("{:?}: searching via memory map", path);
|
||||
trace!("{:?}: searching via memory map", path);
|
||||
return self.search_slice(matcher, &mmap, write_to);
|
||||
}
|
||||
// Fast path for multi-line searches of files when memory maps are
|
||||
// not enabled. This pre-allocates a buffer roughly the size of the
|
||||
// file, which isn't possible when searching an arbitrary io::Read.
|
||||
if self.multi_line_with_matcher(&matcher) {
|
||||
log::trace!(
|
||||
"{:?}: reading entire file on to heap for mulitline",
|
||||
path
|
||||
);
|
||||
trace!("{:?}: reading entire file on to heap for mulitline", path);
|
||||
self.fill_multi_line_buffer_from_file::<S>(file)?;
|
||||
log::trace!("{:?}: searching via multiline strategy", path);
|
||||
trace!("{:?}: searching via multiline strategy", path);
|
||||
MultiLine::new(
|
||||
self,
|
||||
matcher,
|
||||
@@ -680,7 +677,7 @@ impl Searcher {
|
||||
)
|
||||
.run()
|
||||
} else {
|
||||
log::trace!("{:?}: searching using generic reader", path);
|
||||
trace!("{:?}: searching using generic reader", path);
|
||||
self.search_reader(matcher, file, write_to)
|
||||
}
|
||||
}
|
||||
@@ -710,17 +707,15 @@ impl Searcher {
|
||||
self.check_config(&matcher).map_err(S::Error::error_config)?;
|
||||
|
||||
let mut decode_buffer = self.decode_buffer.borrow_mut();
|
||||
let decoder = self
|
||||
let read_from = self
|
||||
.decode_builder
|
||||
.build_with_buffer(read_from, &mut *decode_buffer)
|
||||
.map_err(S::Error::error_io)?;
|
||||
|
||||
if self.multi_line_with_matcher(&matcher) {
|
||||
log::trace!(
|
||||
"generic reader: reading everything to heap for multiline"
|
||||
);
|
||||
self.fill_multi_line_buffer_from_reader::<_, S>(decoder)?;
|
||||
log::trace!("generic reader: searching via multiline strategy");
|
||||
trace!("generic reader: reading everything to heap for multiline");
|
||||
self.fill_multi_line_buffer_from_reader::<_, S>(read_from)?;
|
||||
trace!("generic reader: searching via multiline strategy");
|
||||
MultiLine::new(
|
||||
self,
|
||||
matcher,
|
||||
@@ -730,8 +725,8 @@ impl Searcher {
|
||||
.run()
|
||||
} else {
|
||||
let mut line_buffer = self.line_buffer.borrow_mut();
|
||||
let rdr = LineBufferReader::new(decoder, &mut *line_buffer);
|
||||
log::trace!("generic reader: searching via roll buffer strategy");
|
||||
let rdr = LineBufferReader::new(read_from, &mut *line_buffer);
|
||||
trace!("generic reader: searching via roll buffer strategy");
|
||||
ReadByLine::new(self, matcher, rdr, write_to).run()
|
||||
}
|
||||
}
|
||||
@@ -752,16 +747,14 @@ impl Searcher {
|
||||
|
||||
// We can search the slice directly, unless we need to do transcoding.
|
||||
if self.slice_needs_transcoding(slice) {
|
||||
log::trace!(
|
||||
"slice reader: needs transcoding, using generic reader"
|
||||
);
|
||||
trace!("slice reader: needs transcoding, using generic reader");
|
||||
return self.search_reader(matcher, slice, write_to);
|
||||
}
|
||||
if self.multi_line_with_matcher(&matcher) {
|
||||
log::trace!("slice reader: searching via multiline strategy");
|
||||
trace!("slice reader: searching via multiline strategy");
|
||||
MultiLine::new(self, matcher, slice, write_to).run()
|
||||
} else {
|
||||
log::trace!("slice reader: searching via slice-by-line strategy");
|
||||
trace!("slice reader: searching via slice-by-line strategy");
|
||||
SliceByLine::new(self, matcher, slice, write_to).run()
|
||||
}
|
||||
}
|
||||
@@ -795,7 +788,7 @@ impl Searcher {
|
||||
/// Returns true if and only if the given slice needs to be transcoded.
|
||||
fn slice_needs_transcoding(&self, slice: &[u8]) -> bool {
|
||||
self.config.encoding.is_some()
|
||||
|| (self.config.bom_sniffing && slice_has_bom(slice))
|
||||
|| (self.config.bom_sniffing && slice_has_utf16_bom(slice))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -980,24 +973,22 @@ impl Searcher {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given slice begins with a UTF-8 or UTF-16
|
||||
/// BOM.
|
||||
/// Returns true if and only if the given slice begins with a UTF-16 BOM.
|
||||
///
|
||||
/// This is used by the searcher to determine if a transcoder is necessary.
|
||||
/// Otherwise, it is advantageous to search the slice directly.
|
||||
fn slice_has_bom(slice: &[u8]) -> bool {
|
||||
fn slice_has_utf16_bom(slice: &[u8]) -> bool {
|
||||
let enc = match encoding_rs::Encoding::for_bom(slice) {
|
||||
None => return false,
|
||||
Some((enc, _)) => enc,
|
||||
};
|
||||
[encoding_rs::UTF_16LE, encoding_rs::UTF_16BE, encoding_rs::UTF_8]
|
||||
.contains(&enc)
|
||||
[encoding_rs::UTF_16LE, encoding_rs::UTF_16BE].contains(&enc)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::testutil::{KitchenSink, RegexMatcher};
|
||||
use testutil::{KitchenSink, RegexMatcher};
|
||||
|
||||
#[test]
|
||||
fn config_error_heap_limit() {
|
||||
@@ -1018,21 +1009,4 @@ mod tests {
|
||||
let res = searcher.search_slice(matcher, &[], sink);
|
||||
assert!(res.is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn uft8_bom_sniffing() {
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1638
|
||||
// ripgrep must sniff utf-8 BOM, just like it does with utf-16
|
||||
let matcher = RegexMatcher::new("foo");
|
||||
let haystack: &[u8] = &[0xef, 0xbb, 0xbf, 0x66, 0x6f, 0x6f];
|
||||
|
||||
let mut sink = KitchenSink::new();
|
||||
let mut searcher = SearcherBuilder::new().build();
|
||||
|
||||
let res = searcher.search_slice(matcher, haystack, &mut sink);
|
||||
assert!(res.is_ok());
|
||||
|
||||
let sink_output = String::from_utf8(sink.as_bytes().to_vec()).unwrap();
|
||||
assert_eq!(sink_output, "1:0:foo\nbyte count:3\n");
|
||||
}
|
||||
}
|
||||
|
@@ -4,8 +4,8 @@ use std::io;
|
||||
|
||||
use grep_matcher::LineTerminator;
|
||||
|
||||
use crate::lines::LineIter;
|
||||
use crate::searcher::{ConfigError, Searcher};
|
||||
use lines::LineIter;
|
||||
use searcher::{ConfigError, Searcher};
|
||||
|
||||
/// A trait that describes errors that can be reported by searchers and
|
||||
/// implementations of `Sink`.
|
||||
@@ -121,7 +121,7 @@ pub trait Sink {
|
||||
fn matched(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
_mat: &SinkMatch<'_>,
|
||||
_mat: &SinkMatch,
|
||||
) -> Result<bool, Self::Error>;
|
||||
|
||||
/// This method is called whenever a context line is found, and is optional
|
||||
@@ -140,7 +140,7 @@ pub trait Sink {
|
||||
fn context(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
_context: &SinkContext<'_>,
|
||||
_context: &SinkContext,
|
||||
) -> Result<bool, Self::Error> {
|
||||
Ok(true)
|
||||
}
|
||||
@@ -226,7 +226,7 @@ impl<'a, S: Sink> Sink for &'a mut S {
|
||||
fn matched(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
mat: &SinkMatch<'_>,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).matched(searcher, mat)
|
||||
}
|
||||
@@ -235,7 +235,7 @@ impl<'a, S: Sink> Sink for &'a mut S {
|
||||
fn context(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
context: &SinkContext<'_>,
|
||||
context: &SinkContext,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).context(searcher, context)
|
||||
}
|
||||
@@ -279,7 +279,7 @@ impl<S: Sink + ?Sized> Sink for Box<S> {
|
||||
fn matched(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
mat: &SinkMatch<'_>,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).matched(searcher, mat)
|
||||
}
|
||||
@@ -288,7 +288,7 @@ impl<S: Sink + ?Sized> Sink for Box<S> {
|
||||
fn context(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
context: &SinkContext<'_>,
|
||||
context: &SinkContext,
|
||||
) -> Result<bool, S::Error> {
|
||||
(**self).context(searcher, context)
|
||||
}
|
||||
@@ -365,8 +365,6 @@ pub struct SinkMatch<'b> {
|
||||
pub(crate) bytes: &'b [u8],
|
||||
pub(crate) absolute_byte_offset: u64,
|
||||
pub(crate) line_number: Option<u64>,
|
||||
pub(crate) buffer: &'b [u8],
|
||||
pub(crate) bytes_range_in_buffer: std::ops::Range<usize>,
|
||||
}
|
||||
|
||||
impl<'b> SinkMatch<'b> {
|
||||
@@ -407,18 +405,6 @@ impl<'b> SinkMatch<'b> {
|
||||
pub fn line_number(&self) -> Option<u64> {
|
||||
self.line_number
|
||||
}
|
||||
|
||||
/// TODO
|
||||
#[inline]
|
||||
pub fn buffer(&self) -> &'b [u8] {
|
||||
self.buffer
|
||||
}
|
||||
|
||||
/// TODO
|
||||
#[inline]
|
||||
pub fn bytes_range_in_buffer(&self) -> std::ops::Range<usize> {
|
||||
self.bytes_range_in_buffer.clone()
|
||||
}
|
||||
}
|
||||
|
||||
/// The type of context reported by a searcher.
|
||||
@@ -514,7 +500,7 @@ pub mod sinks {
|
||||
use std::str;
|
||||
|
||||
use super::{Sink, SinkError, SinkMatch};
|
||||
use crate::searcher::Searcher;
|
||||
use searcher::Searcher;
|
||||
|
||||
/// A sink that provides line numbers and matches as strings while ignoring
|
||||
/// everything else.
|
||||
@@ -545,7 +531,7 @@ pub mod sinks {
|
||||
fn matched(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
mat: &SinkMatch<'_>,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, io::Error> {
|
||||
let matched = match str::from_utf8(mat.bytes()) {
|
||||
Ok(matched) => matched,
|
||||
@@ -593,7 +579,7 @@ pub mod sinks {
|
||||
fn matched(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
mat: &SinkMatch<'_>,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, io::Error> {
|
||||
use std::borrow::Cow;
|
||||
|
||||
@@ -643,7 +629,7 @@ pub mod sinks {
|
||||
fn matched(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
mat: &SinkMatch<'_>,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, io::Error> {
|
||||
let line_number = match mat.line_number() {
|
||||
Some(line_number) => line_number,
|
||||
|
@@ -7,8 +7,8 @@ use grep_matcher::{
|
||||
};
|
||||
use regex::bytes::{Regex, RegexBuilder};
|
||||
|
||||
use crate::searcher::{BinaryDetection, Searcher, SearcherBuilder};
|
||||
use crate::sink::{Sink, SinkContext, SinkFinish, SinkMatch};
|
||||
use searcher::{BinaryDetection, Searcher, SearcherBuilder};
|
||||
use sink::{Sink, SinkContext, SinkFinish, SinkMatch};
|
||||
|
||||
/// A simple regex matcher.
|
||||
///
|
||||
@@ -129,7 +129,7 @@ impl Sink for KitchenSink {
|
||||
fn matched(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
mat: &SinkMatch<'_>,
|
||||
mat: &SinkMatch,
|
||||
) -> Result<bool, io::Error> {
|
||||
assert!(!mat.bytes().is_empty());
|
||||
assert!(mat.lines().count() >= 1);
|
||||
@@ -152,7 +152,7 @@ impl Sink for KitchenSink {
|
||||
fn context(
|
||||
&mut self,
|
||||
_searcher: &Searcher,
|
||||
context: &SinkContext<'_>,
|
||||
context: &SinkContext,
|
||||
) -> Result<bool, io::Error> {
|
||||
assert!(!context.bytes().is_empty());
|
||||
assert!(context.lines().count() == 1);
|
||||
|
@@ -3,7 +3,7 @@ rg(1)
|
||||
|
||||
Name
|
||||
----
|
||||
rg - recursively search the current directory for lines matching a pattern
|
||||
rg - recursively search current directory for lines matching a pattern
|
||||
|
||||
|
||||
Synopsis
|
||||
@@ -27,7 +27,7 @@ Synopsis
|
||||
|
||||
DESCRIPTION
|
||||
-----------
|
||||
ripgrep (rg) recursively searches the current directory for a regex pattern.
|
||||
ripgrep (rg) recursively searches your current directory for a regex pattern.
|
||||
By default, ripgrep will respect your .gitignore and automatically skip hidden
|
||||
files/directories and binary files.
|
||||
|
||||
@@ -82,10 +82,10 @@ _PATH_::
|
||||
|
||||
OPTIONS
|
||||
-------
|
||||
Note that many options can be disabled via flags. In some cases, those flags
|
||||
are not listed in a first class way below. For example, the *--column*
|
||||
flag (listed below) enables column numbers in ripgrep's output, but the
|
||||
*--no-column* flag (not listed below) disables them. The reverse can also
|
||||
Note that for many options, there exist flags to disable them. In some cases,
|
||||
those flags are not listed in a first class way below. For example, the
|
||||
*--column* flag (listed below) enables column numbers in ripgrep's output, but
|
||||
the *--no-column* flag (not listed below) disables them. The reverse can also
|
||||
exist. For example, the *--no-ignore* flag (listed below) disables ripgrep's
|
||||
*gitignore* logic, but the *--ignore* flag (not listed below) enables it. These
|
||||
flags are useful for overriding a ripgrep configuration file on the command
|
||||
@@ -166,7 +166,7 @@ Each of the types of filtering can be configured via command line flags:
|
||||
* There are several flags starting with '--no-ignore' that toggle which,
|
||||
if any, ignore rules are respected. '--no-ignore' by itself will disable
|
||||
all of them.
|
||||
* '-./--hidden' will force ripgrep to search hidden files and directories.
|
||||
* '--hidden' will force ripgrep to search hidden files and directories.
|
||||
* '--binary' will force ripgrep to search binary files.
|
||||
* '-L/--follow' will force ripgrep to follow symlinks.
|
||||
|
||||
|
@@ -5,17 +5,17 @@ use crate::util::{Dir, TestCommand};
|
||||
// bug report: https://github.com/BurntSushi/ripgrep/issues/306
|
||||
|
||||
// Our haystack is the first 500 lines of Gutenberg's copy of "A Study in
|
||||
// Scarlet," with a NUL byte at line 1898: `abcdef\x00`.
|
||||
// Scarlet," with a NUL byte at line 237: `abcdef\x00`.
|
||||
//
|
||||
// The position and size of the haystack is, unfortunately, significant. In
|
||||
// particular, the NUL byte is specifically inserted at some point *after* the
|
||||
// first 65,536 bytes, which corresponds to the initial capacity of the buffer
|
||||
// first 8192 bytes, which corresponds to the initial capacity of the buffer
|
||||
// that ripgrep uses to read files. (grep for DEFAULT_BUFFER_CAPACITY.) The
|
||||
// position of the NUL byte ensures that we can execute some search on the
|
||||
// initial buffer contents without ever detecting any binary data. Moreover,
|
||||
// when using a memory map for searching, only the first 65,536 bytes are
|
||||
// when using a memory map for searching, only the first 8192 bytes are
|
||||
// scanned for a NUL byte, so no binary bytes are detected at all when using
|
||||
// a memory map (unless our query matches line 1898).
|
||||
// a memory map (unless our query matches line 237).
|
||||
//
|
||||
// One last note: in the tests below, we use --no-mmap heavily because binary
|
||||
// detection with memory maps is a bit different. Namely, NUL bytes are only
|
||||
@@ -40,7 +40,7 @@ rgtest!(after_match1_implicit, |dir: Dir, mut cmd: TestCommand| {
|
||||
|
||||
let expected = "\
|
||||
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||
hay: WARNING: stopped searching binary file after match (found \"\\0\" byte around offset 77041)
|
||||
hay: WARNING: stopped searching binary file after match (found \"\\0\" byte around offset 9741)
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
@@ -53,7 +53,7 @@ rgtest!(after_match1_explicit, |dir: Dir, mut cmd: TestCommand| {
|
||||
|
||||
let expected = "\
|
||||
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||
binary file matches (found \"\\0\" byte around offset 77041)
|
||||
binary file matches (found \"\\0\" byte around offset 9741)
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
@@ -64,7 +64,7 @@ rgtest!(after_match1_stdin, |_: Dir, mut cmd: TestCommand| {
|
||||
|
||||
let expected = "\
|
||||
1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||
binary file matches (found \"\\0\" byte around offset 77041)
|
||||
binary file matches (found \"\\0\" byte around offset 9741)
|
||||
";
|
||||
eqnice!(expected, cmd.pipe(HAY));
|
||||
});
|
||||
@@ -85,7 +85,7 @@ rgtest!(after_match1_implicit_binary, |dir: Dir, mut cmd: TestCommand| {
|
||||
|
||||
let expected = "\
|
||||
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||
hay: binary file matches (found \"\\0\" byte around offset 77041)
|
||||
hay: binary file matches (found \"\\0\" byte around offset 9741)
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
@@ -200,7 +200,7 @@ rgtest!(after_match2_implicit, |dir: Dir, mut cmd: TestCommand| {
|
||||
|
||||
let expected = "\
|
||||
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||
hay: WARNING: stopped searching binary file after match (found \"\\0\" byte around offset 77041)
|
||||
hay: WARNING: stopped searching binary file after match (found \"\\0\" byte around offset 9741)
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
@@ -220,7 +220,7 @@ rgtest!(after_match2_implicit_text, |dir: Dir, mut cmd: TestCommand| {
|
||||
|
||||
let expected = "\
|
||||
hay:1:The Project Gutenberg EBook of A Study In Scarlet, by Arthur Conan Doyle
|
||||
hay:1867:\"And yet you say he is not a medical student?\"
|
||||
hay:236:\"And yet you say he is not a medical student?\"
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
@@ -240,7 +240,7 @@ rgtest!(before_match1_explicit, |dir: Dir, mut cmd: TestCommand| {
|
||||
cmd.args(&["--no-mmap", "-n", "Heaven", "hay"]);
|
||||
|
||||
let expected = "\
|
||||
binary file matches (found \"\\0\" byte around offset 77041)
|
||||
binary file matches (found \"\\0\" byte around offset 9741)
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
@@ -253,7 +253,7 @@ rgtest!(before_match1_implicit_binary, |dir: Dir, mut cmd: TestCommand| {
|
||||
cmd.args(&["--no-mmap", "-n", "--binary", "Heaven", "-g", "hay"]);
|
||||
|
||||
let expected = "\
|
||||
hay: binary file matches (found \"\\0\" byte around offset 77041)
|
||||
hay: binary file matches (found \"\\0\" byte around offset 9741)
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
@@ -265,7 +265,7 @@ rgtest!(before_match1_implicit_text, |dir: Dir, mut cmd: TestCommand| {
|
||||
cmd.args(&["--no-mmap", "-n", "--text", "Heaven", "-g", "hay"]);
|
||||
|
||||
let expected = "\
|
||||
hay:1871:\"No. Heaven knows what the objects of his studies are. But here we
|
||||
hay:238:\"No. Heaven knows what the objects of his studies are. But here we
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
@@ -288,7 +288,7 @@ rgtest!(before_match2_explicit, |dir: Dir, mut cmd: TestCommand| {
|
||||
cmd.args(&["--no-mmap", "-n", "a medical student", "hay"]);
|
||||
|
||||
let expected = "\
|
||||
binary file matches (found \"\\0\" byte around offset 77041)
|
||||
binary file matches (found \"\\0\" byte around offset 9741)
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
@@ -300,7 +300,7 @@ rgtest!(before_match2_implicit_text, |dir: Dir, mut cmd: TestCommand| {
|
||||
cmd.args(&["--no-mmap", "-n", "--text", "a medical student", "-g", "hay"]);
|
||||
|
||||
let expected = "\
|
||||
hay:1867:\"And yet you say he is not a medical student?\"
|
||||
hay:236:\"And yet you say he is not a medical student?\"
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
File diff suppressed because it is too large
Load Diff
134
tests/feature.rs
134
tests/feature.rs
@@ -787,140 +787,6 @@ rgtest!(f1466_no_ignore_files, |dir: Dir, mut cmd: TestCommand| {
|
||||
eqnice!("foo\n", cmd.arg("-u").stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1404
|
||||
rgtest!(f1404_nothing_searched_warning, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create(".ignore", "ignored-dir/**");
|
||||
dir.create_dir("ignored-dir");
|
||||
dir.create("ignored-dir/foo", "needle");
|
||||
|
||||
// Test that, if ripgrep searches only ignored folders/files, then there
|
||||
// is a non-zero exit code.
|
||||
cmd.arg("needle");
|
||||
cmd.assert_err();
|
||||
|
||||
// Test that we actually get an error message that we expect.
|
||||
let output = cmd.cmd().output().unwrap();
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
let expected = "\
|
||||
No files were searched, which means ripgrep probably applied \
|
||||
a filter you didn't expect.\n\
|
||||
Running with --debug will show why files are being skipped.\n\
|
||||
";
|
||||
eqnice!(expected, stderr);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1404
|
||||
rgtest!(f1404_nothing_searched_ignored, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create(".ignore", "ignored-dir/**");
|
||||
dir.create_dir("ignored-dir");
|
||||
dir.create("ignored-dir/foo", "needle");
|
||||
|
||||
// Test that, if ripgrep searches only ignored folders/files, then there
|
||||
// is a non-zero exit code.
|
||||
cmd.arg("--no-messages").arg("needle");
|
||||
cmd.assert_err();
|
||||
|
||||
// But since --no-messages is given, there should not be any error message
|
||||
// printed.
|
||||
let output = cmd.cmd().output().unwrap();
|
||||
let stderr = String::from_utf8_lossy(&output.stderr);
|
||||
let expected = "";
|
||||
eqnice!(expected, stderr);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1842
|
||||
rgtest!(f1842_field_context_separator, |dir: Dir, _: TestCommand| {
|
||||
dir.create("sherlock", SHERLOCK);
|
||||
|
||||
// Test the default.
|
||||
let base = &["-n", "-A1", "Doctor Watsons", "sherlock"];
|
||||
let expected = "\
|
||||
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2-Holmeses, success in the province of detective work must always
|
||||
";
|
||||
eqnice!(expected, dir.command().args(base).stdout());
|
||||
|
||||
// Test that it can be overridden.
|
||||
let mut args = vec!["--field-context-separator", "!"];
|
||||
args.extend(base);
|
||||
let expected = "\
|
||||
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2!Holmeses, success in the province of detective work must always
|
||||
";
|
||||
eqnice!(expected, dir.command().args(&args).stdout());
|
||||
|
||||
// Test that it can use multiple bytes.
|
||||
let mut args = vec!["--field-context-separator", "!!"];
|
||||
args.extend(base);
|
||||
let expected = "\
|
||||
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2!!Holmeses, success in the province of detective work must always
|
||||
";
|
||||
eqnice!(expected, dir.command().args(&args).stdout());
|
||||
|
||||
// Test that unescaping works.
|
||||
let mut args = vec!["--field-context-separator", r"\x7F"];
|
||||
args.extend(base);
|
||||
let expected = "\
|
||||
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2\x7FHolmeses, success in the province of detective work must always
|
||||
";
|
||||
eqnice!(expected, dir.command().args(&args).stdout());
|
||||
|
||||
// Test that an empty separator is OK.
|
||||
let mut args = vec!["--field-context-separator", r""];
|
||||
args.extend(base);
|
||||
let expected = "\
|
||||
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2Holmeses, success in the province of detective work must always
|
||||
";
|
||||
eqnice!(expected, dir.command().args(&args).stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1842
|
||||
rgtest!(f1842_field_match_separator, |dir: Dir, _: TestCommand| {
|
||||
dir.create("sherlock", SHERLOCK);
|
||||
|
||||
// Test the default.
|
||||
let base = &["-n", "Doctor Watsons", "sherlock"];
|
||||
let expected = "\
|
||||
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
";
|
||||
eqnice!(expected, dir.command().args(base).stdout());
|
||||
|
||||
// Test that it can be overridden.
|
||||
let mut args = vec!["--field-match-separator", "!"];
|
||||
args.extend(base);
|
||||
let expected = "\
|
||||
1!For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
";
|
||||
eqnice!(expected, dir.command().args(&args).stdout());
|
||||
|
||||
// Test that it can use multiple bytes.
|
||||
let mut args = vec!["--field-match-separator", "!!"];
|
||||
args.extend(base);
|
||||
let expected = "\
|
||||
1!!For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
";
|
||||
eqnice!(expected, dir.command().args(&args).stdout());
|
||||
|
||||
// Test that unescaping works.
|
||||
let mut args = vec!["--field-match-separator", r"\x7F"];
|
||||
args.extend(base);
|
||||
let expected = "\
|
||||
1\x7FFor the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
";
|
||||
eqnice!(expected, dir.command().args(&args).stdout());
|
||||
|
||||
// Test that an empty separator is OK.
|
||||
let mut args = vec!["--field-match-separator", r""];
|
||||
args.extend(base);
|
||||
let expected = "\
|
||||
1For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
";
|
||||
eqnice!(expected, dir.command().args(&args).stdout());
|
||||
});
|
||||
|
||||
rgtest!(no_context_sep, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("test", "foo\nctx\nbar\nctx\nfoo\nctx");
|
||||
cmd.args(&["-A1", "--no-context-separator", "foo", "test"]);
|
||||
|
@@ -323,38 +323,24 @@ rgtest!(r1095_crlf_empty_match, |dir: Dir, mut cmd: TestCommand| {
|
||||
|
||||
// Check without --crlf flag.
|
||||
let msgs = json_decode(&cmd.arg("-U").arg("--json").arg("\n").stdout());
|
||||
assert_eq!(msgs.len(), 4);
|
||||
assert_eq!(msgs.len(), 5);
|
||||
|
||||
let m = msgs[1].unwrap_match();
|
||||
assert_eq!(m.lines, Data::text("test\r\n\n"));
|
||||
assert_eq!(m.lines, Data::text("test\r\n"));
|
||||
assert_eq!(m.submatches[0].m, Data::text("\n"));
|
||||
|
||||
let m = msgs[2].unwrap_match();
|
||||
assert_eq!(m.lines, Data::text("\n"));
|
||||
assert_eq!(m.submatches[0].m, Data::text("\n"));
|
||||
assert_eq!(m.submatches[1].m, Data::text("\n"));
|
||||
|
||||
// Now check with --crlf flag.
|
||||
let msgs = json_decode(&cmd.arg("--crlf").stdout());
|
||||
assert_eq!(msgs.len(), 4);
|
||||
|
||||
let m = msgs[1].unwrap_match();
|
||||
assert_eq!(m.lines, Data::text("test\r\n\n"));
|
||||
assert_eq!(m.lines, Data::text("test\r\n"));
|
||||
assert_eq!(m.submatches[0].m, Data::text("\n"));
|
||||
|
||||
let m = msgs[2].unwrap_match();
|
||||
assert_eq!(m.lines, Data::text("\n"));
|
||||
assert_eq!(m.submatches[0].m, Data::text("\n"));
|
||||
assert_eq!(m.submatches[1].m, Data::text("\n"));
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1412
|
||||
rgtest!(r1412_look_behind_match_missing, |dir: Dir, mut cmd: TestCommand| {
|
||||
// Only PCRE2 supports look-around.
|
||||
if !dir.is_pcre2() {
|
||||
return;
|
||||
}
|
||||
|
||||
dir.create("test", "foo\nbar\n");
|
||||
|
||||
let msgs = json_decode(
|
||||
&cmd.arg("-U").arg("--json").arg(r"(?<=foo\n)bar").stdout(),
|
||||
);
|
||||
assert_eq!(msgs.len(), 4);
|
||||
|
||||
let m = msgs[1].unwrap_match();
|
||||
assert_eq!(m.lines, Data::text("bar\n"));
|
||||
assert_eq!(m.submatches.len(), 1);
|
||||
});
|
||||
|
@@ -64,11 +64,6 @@ rgtest!(only_matching, |dir: Dir, mut cmd: TestCommand| {
|
||||
});
|
||||
|
||||
// Tests that --vimgrep works in multiline mode.
|
||||
//
|
||||
// In particular, we test that only the first line of each match is printed,
|
||||
// even when a match spans multiple lines.
|
||||
//
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1866
|
||||
rgtest!(vimgrep, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("sherlock", SHERLOCK);
|
||||
cmd.args(&[
|
||||
@@ -82,6 +77,7 @@ rgtest!(vimgrep, |dir: Dir, mut cmd: TestCommand| {
|
||||
let expected = "\
|
||||
sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock:2:57:Holmeses, success in the province of detective work must always
|
||||
sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted,
|
||||
";
|
||||
|
@@ -392,9 +392,7 @@ rgtest!(r428_color_context_path, |dir: Dir, mut cmd: TestCommand| {
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/428
|
||||
rgtest!(r428_unrecognized_style, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("file.txt", "Sherlock");
|
||||
|
||||
rgtest!(r428_unrecognized_style, |_: Dir, mut cmd: TestCommand| {
|
||||
cmd.arg("--colors=match:style:").arg("Sherlock");
|
||||
cmd.assert_err();
|
||||
|
||||
@@ -744,15 +742,6 @@ rgtest!(r1259_drop_last_byte_nonl, |dir: Dir, mut cmd: TestCommand| {
|
||||
eqnice!("fz\n", cmd.arg("-f").arg("patterns-nl").arg("test").stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1311
|
||||
rgtest!(r1311_multi_line_term_replace, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("input", "hello\nworld\n");
|
||||
eqnice!(
|
||||
"1:hello?world?\n",
|
||||
cmd.args(&["-U", "-r?", "-n", "\n", "input"]).stdout()
|
||||
);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1319
|
||||
rgtest!(r1319, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("input", "CCAGCTACTCGGGAGGCTGAGGCTGGAGGATCGCTTGAGTCCAGGAGTTC");
|
||||
@@ -772,28 +761,6 @@ rgtest!(r1334_crazy_literals, |dir: Dir, mut cmd: TestCommand| {
|
||||
);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1380
|
||||
rgtest!(r1380, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create(
|
||||
"foo",
|
||||
"\
|
||||
a
|
||||
b
|
||||
c
|
||||
d
|
||||
e
|
||||
d
|
||||
e
|
||||
d
|
||||
e
|
||||
d
|
||||
e
|
||||
",
|
||||
);
|
||||
|
||||
eqnice!("d\ne\nd\n", cmd.args(&["-A2", "-m1", "d", "foo"]).stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1389
|
||||
rgtest!(r1389_bad_symlinks_no_biscuit, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create_dir("mydir");
|
||||
@@ -806,44 +773,6 @@ rgtest!(r1389_bad_symlinks_no_biscuit, |dir: Dir, mut cmd: TestCommand| {
|
||||
eqnice!("mylink/file.txt:test\n", stdout);
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1401
|
||||
rgtest!(r1401_look_ahead_only_matching_1, |dir: Dir, mut cmd: TestCommand| {
|
||||
// Only PCRE2 supports look-around.
|
||||
if !dir.is_pcre2() {
|
||||
return;
|
||||
}
|
||||
dir.create("ip.txt", "foo 42\nxoyz\ncat\tdog\n");
|
||||
cmd.args(&["-No", r".*o(?!.*\s)", "ip.txt"]);
|
||||
eqnice!("xo\ncat\tdo\n", cmd.stdout());
|
||||
|
||||
let mut cmd = dir.command();
|
||||
cmd.args(&["-No", r".*o(?!.*[ \t])", "ip.txt"]);
|
||||
eqnice!("xo\ncat\tdo\n", cmd.stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1401
|
||||
rgtest!(r1401_look_ahead_only_matching_2, |dir: Dir, mut cmd: TestCommand| {
|
||||
// Only PCRE2 supports look-around.
|
||||
if !dir.is_pcre2() {
|
||||
return;
|
||||
}
|
||||
dir.create("ip.txt", "foo 42\nxoyz\ncat\tdog\nfoo");
|
||||
cmd.args(&["-No", r".*o(?!.*\s)", "ip.txt"]);
|
||||
eqnice!("xo\ncat\tdo\nfoo\n", cmd.stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1412
|
||||
rgtest!(r1412_look_behind_no_replacement, |dir: Dir, mut cmd: TestCommand| {
|
||||
// Only PCRE2 supports look-around.
|
||||
if !dir.is_pcre2() {
|
||||
return;
|
||||
}
|
||||
|
||||
dir.create("test", "foo\nbar\n");
|
||||
cmd.args(&["-nU", "-rquux", r"(?<=foo\n)bar", "test"]);
|
||||
eqnice!("2:quux\n", cmd.stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/pull/1446
|
||||
rgtest!(
|
||||
r1446_respect_excludes_in_worktree,
|
||||
@@ -935,97 +864,3 @@ use B;
|
||||
]);
|
||||
eqnice!("2\n", cmd.stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1638
|
||||
//
|
||||
// Tests if UTF-8 BOM is sniffed, then the column index is correct.
|
||||
rgtest!(r1638, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create_bytes("foo", b"\xef\xbb\xbfx");
|
||||
|
||||
eqnice!("foo:1:1:x\n", cmd.arg("--column").arg("x").stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1739
|
||||
rgtest!(r1739_replacement_lineterm_match, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("test", "a\n");
|
||||
cmd.args(&[r"-r${0}f", r".*", "test"]);
|
||||
eqnice!("af\n", cmd.stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1765
|
||||
rgtest!(r1765, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("test", "\n");
|
||||
// We need to add --color=always here to force the failure, since the bad
|
||||
// code path is only triggered when colors are enabled.
|
||||
cmd.args(&[r"x?", "--crlf", "--color", "always"]);
|
||||
|
||||
assert!(!cmd.stdout().is_empty());
|
||||
});
|
||||
|
||||
rgtest!(r1866, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("test", "foobar\nfoobar\nfoo quux");
|
||||
cmd.args(&[
|
||||
"--multiline",
|
||||
"--vimgrep",
|
||||
r"foobar\nfoobar\nfoo|quux",
|
||||
"test",
|
||||
]);
|
||||
|
||||
// vimgrep only wants the first line of each match, even when a match
|
||||
// spans multiple lines.
|
||||
//
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1866
|
||||
let expected = "\
|
||||
test:1:1:foobar
|
||||
test:3:5:foo quux
|
||||
";
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
||||
// See: https://github.com/BurntSushi/ripgrep/issues/1868
|
||||
rgtest!(r1868_context_passthru_override, |dir: Dir, _: TestCommand| {
|
||||
dir.create("test", "foo\nbar\nbaz\nquux\n");
|
||||
|
||||
let args = &["-C1", "bar", "test"];
|
||||
eqnice!("foo\nbar\nbaz\n", dir.command().args(args).stdout());
|
||||
let args = &["--passthru", "bar", "test"];
|
||||
eqnice!("foo\nbar\nbaz\nquux\n", dir.command().args(args).stdout());
|
||||
|
||||
let args = &["--passthru", "-C1", "bar", "test"];
|
||||
eqnice!("foo\nbar\nbaz\n", dir.command().args(args).stdout());
|
||||
let args = &["-C1", "--passthru", "bar", "test"];
|
||||
eqnice!("foo\nbar\nbaz\nquux\n", dir.command().args(args).stdout());
|
||||
|
||||
let args = &["--passthru", "-B1", "bar", "test"];
|
||||
eqnice!("foo\nbar\n", dir.command().args(args).stdout());
|
||||
let args = &["-B1", "--passthru", "bar", "test"];
|
||||
eqnice!("foo\nbar\nbaz\nquux\n", dir.command().args(args).stdout());
|
||||
|
||||
let args = &["--passthru", "-A1", "bar", "test"];
|
||||
eqnice!("bar\nbaz\n", dir.command().args(args).stdout());
|
||||
let args = &["-A1", "--passthru", "bar", "test"];
|
||||
eqnice!("foo\nbar\nbaz\nquux\n", dir.command().args(args).stdout());
|
||||
});
|
||||
|
||||
rgtest!(r1878, |dir: Dir, _: TestCommand| {
|
||||
dir.create("test", "a\nbaz\nabc\n");
|
||||
|
||||
// Since ripgrep enables (?m) by default, '^' will match at the beginning
|
||||
// of a line, even when -U/--multiline is used.
|
||||
let args = &["-U", "--no-mmap", r"^baz", "test"];
|
||||
eqnice!("baz\n", dir.command().args(args).stdout());
|
||||
let args = &["-U", "--mmap", r"^baz", "test"];
|
||||
eqnice!("baz\n", dir.command().args(args).stdout());
|
||||
|
||||
// But when (?-m) is disabled, or when \A is used, then there should be no
|
||||
// matches that aren't anchored to the beginning of the file.
|
||||
let args = &["-U", "--no-mmap", r"(?-m)^baz", "test"];
|
||||
dir.command().args(args).assert_err();
|
||||
let args = &["-U", "--mmap", r"(?-m)^baz", "test"];
|
||||
dir.command().args(args).assert_err();
|
||||
|
||||
let args = &["-U", "--no-mmap", r"\Abaz", "test"];
|
||||
dir.command().args(args).assert_err();
|
||||
let args = &["-U", "--mmap", r"\Abaz", "test"];
|
||||
dir.command().args(args).assert_err();
|
||||
});
|
||||
|
Reference in New Issue
Block a user