Compare commits

..

2 Commits

Author SHA1 Message Date
Andrew Gallant
ebc80b01fa deps: update to clap 2.33 2019-04-06 09:37:33 -04:00
lesnyrumcajs
b00cd69a40 searcher: add option to disable BOM sniffing
This commit adds a new encoding feature where the -E/--encoding flag
will now accept a value of 'none'. When given this value, all encoding
related machinery is disabled and ripgrep will search the raw bytes of
the file, including the BOM if it's present.

Closes #1207, Closes #1208
2019-04-06 09:36:32 -04:00
162 changed files with 4314 additions and 8069 deletions

View File

@@ -1,195 +0,0 @@
name: ci
on:
pull_request:
push:
branches:
- master
schedule:
- cron: '00 01 * * *'
jobs:
test:
name: test
env:
# For some builds, we use cross to test on 32-bit and big-endian
# systems.
CARGO: cargo
# When CARGO is set to CROSS, this is set to `--target matrix.target`.
TARGET_FLAGS:
# When CARGO is set to CROSS, TARGET_DIR includes matrix.target.
TARGET_DIR: ./target
# Emit backtraces on panics.
RUST_BACKTRACE: 1
runs-on: ${{ matrix.os }}
strategy:
matrix:
build:
# We test ripgrep on a pinned version of Rust, along with the moving
# targets of 'stable' and 'beta' for good measure.
- pinned
- stable
- beta
# Our release builds are generated by a nightly compiler to take
# advantage of the latest optimizations/compile time improvements. So
# we test all of them here. (We don't do mips releases, but test on
# mips for big-endian coverage.)
- nightly
- nightly-musl
- nightly-32
- nightly-mips
- nightly-arm
- macos
- win-msvc
- win-gnu
include:
- build: pinned
os: ubuntu-18.04
rust: 1.41.0
- build: stable
os: ubuntu-18.04
rust: stable
- build: beta
os: ubuntu-18.04
rust: beta
- build: nightly
os: ubuntu-18.04
rust: nightly
- build: nightly-musl
os: ubuntu-18.04
rust: nightly
target: x86_64-unknown-linux-musl
- build: nightly-32
os: ubuntu-18.04
rust: nightly
target: i686-unknown-linux-gnu
- build: nightly-mips
os: ubuntu-18.04
rust: nightly
target: mips64-unknown-linux-gnuabi64
- build: nightly-arm
os: ubuntu-18.04
rust: nightly
# For stripping release binaries:
# docker run --rm -v $PWD/target:/target:Z \
# rustembedded/cross:arm-unknown-linux-gnueabihf \
# arm-linux-gnueabihf-strip \
# /target/arm-unknown-linux-gnueabihf/debug/rg
target: arm-unknown-linux-gnueabihf
- build: macos
os: macos-latest
rust: nightly
- build: win-msvc
os: windows-2019
rust: nightly
- build: win-gnu
os: windows-2019
rust: nightly-x86_64-gnu
steps:
- name: Checkout repository
uses: actions/checkout@v1
- name: Install packages (Ubuntu)
if: matrix.os == 'ubuntu-18.04'
run: |
ci/ubuntu-install-packages
- name: Install packages (macOS)
if: matrix.os == 'macos-latest'
run: |
ci/macos-install-packages
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: ${{ matrix.rust }}
profile: minimal
override: true
- name: Use Cross
if: matrix.target != ''
run: |
# FIXME: to work around bugs in latest cross release, install master.
# See: https://github.com/rust-embedded/cross/issues/357
cargo install --git https://github.com/rust-embedded/cross
echo "::set-env name=CARGO::cross"
echo "::set-env name=TARGET_FLAGS::--target ${{ matrix.target }}"
echo "::set-env name=TARGET_DIR::./target/${{ matrix.target }}"
- name: Show command used for Cargo
run: |
echo "cargo command is: ${{ env.CARGO }}"
echo "target flag is: ${{ env.TARGET_FLAGS }}"
- name: Build ripgrep and all crates
run: ${{ env.CARGO }} build --verbose --all ${{ env.TARGET_FLAGS }}
- name: Build ripgrep with PCRE2
run: ${{ env.CARGO }} build --verbose --all --features pcre2 ${{ env.TARGET_FLAGS }}
# This is useful for debugging problems when the expected build artifacts
# (like shell completions and man pages) aren't generated.
- name: Show build.rs stderr
shell: bash
run: |
set +x
stderr="$(find "${{ env.TARGET_DIR }}/debug" -name stderr -print0 | xargs -0 ls -t | head -n1)"
if [ -s "$stderr" ]; then
echo "===== $stderr ===== "
cat "$stderr"
echo "====="
fi
set -x
- name: Run tests with PCRE2 (sans cross)
if: matrix.target == ''
run: ${{ env.CARGO }} test --verbose --all --features pcre2 ${{ env.TARGET_FLAGS }}
- name: Run tests without PCRE2 (with cross)
# These tests should actually work, but they almost double the runtime.
# Every integration test spins up qemu to run 'rg', and when PCRE2 is
# enabled, every integration test is run twice: one with the default
# regex engine and once with PCRE2.
if: matrix.target != ''
run: ${{ env.CARGO }} test --verbose --all ${{ env.TARGET_FLAGS }}
- name: Test for existence of build artifacts (Windows)
if: matrix.os == 'windows-2019'
shell: bash
run: |
outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")"
ls "$outdir/_rg.ps1" && file "$outdir/_rg.ps1"
- name: Test for existence of build artifacts (Unix)
if: matrix.os != 'windows-2019'
shell: bash
run: |
outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")"
for f in rg.bash rg.fish rg.1; do
# We could use file -E here, but it isn't supported on macOS.
ls "$outdir/$f" && file "$outdir/$f"
done
- name: Test zsh shell completions (Unix, sans cross)
# We could test this when using Cross, but we'd have to execute the
# 'rg' binary (done in test-complete) with qemu, which is a pain and
# doesn't really gain us much. If shell completion works in one place,
# it probably works everywhere.
if: matrix.target == '' && matrix.os != 'windows-2019'
shell: bash
run: ci/test-complete
rustfmt:
name: rustfmt
runs-on: ubuntu-18.04
steps:
- name: Checkout repository
uses: actions/checkout@v1
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
profile: minimal
components: rustfmt
- name: Check formatting
run: |
cargo fmt --all -- --check

View File

@@ -1,211 +0,0 @@
# The way this works is a little weird. But basically, the create-release job
# runs purely to initialize the GitHub release itself. Once done, the upload
# URL of the release is saved as an artifact.
#
# The build-release job runs only once create-release is finished. It gets
# the release upload URL by downloading the corresponding artifact (which was
# uploaded by create-release). It then builds the release executables for each
# supported platform and attaches them as release assets to the previously
# created release.
#
# The key here is that we create the release only once.
name: release
on:
push:
# Enable when testing release infrastructure on a branch.
# branches:
# - ag/release
tags:
- '[0-9]+.[0-9]+.[0-9]+'
jobs:
create-release:
name: create-release
runs-on: ubuntu-latest
# env:
# Set to force version number, e.g., when no tag exists.
# RG_VERSION: TEST-0.0.0
steps:
- name: Create artifacts directory
run: mkdir artifacts
- name: Get the release version from the tag
if: env.RG_VERSION == ''
run: |
# Apparently, this is the right way to get a tag name. Really?
#
# See: https://github.community/t5/GitHub-Actions/How-to-get-just-the-tag-name/m-p/32167/highlight/true#M1027
echo "::set-env name=RG_VERSION::${GITHUB_REF#refs/tags/}"
echo "version is: ${{ env.RG_VERSION }}"
- name: Create GitHub release
id: release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ env.RG_VERSION }}
release_name: ${{ env.RG_VERSION }}
- name: Save release upload URL to artifact
run: echo "${{ steps.release.outputs.upload_url }}" > artifacts/release-upload-url
- name: Save version number to artifact
run: echo "${{ env.RG_VERSION }}" > artifacts/release-version
- name: Upload artifacts
uses: actions/upload-artifact@v1
with:
name: artifacts
path: artifacts
build-release:
name: build-release
needs: ['create-release']
runs-on: ${{ matrix.os }}
env:
# For some builds, we use cross to test on 32-bit and big-endian
# systems.
CARGO: cargo
# When CARGO is set to CROSS, this is set to `--target matrix.target`.
TARGET_FLAGS:
# When CARGO is set to CROSS, TARGET_DIR includes matrix.target.
TARGET_DIR: ./target
# Emit backtraces on panics.
RUST_BACKTRACE: 1
# Build static releases with PCRE2.
PCRE2_SYS_STATIC: 1
strategy:
matrix:
build: [linux, linux-arm, macos, win-msvc, win-gnu, win32-msvc]
include:
- build: linux
os: ubuntu-18.04
rust: nightly
target: x86_64-unknown-linux-musl
- build: linux-arm
os: ubuntu-18.04
rust: nightly
target: arm-unknown-linux-gnueabihf
- build: macos
os: macos-latest
rust: nightly
target: x86_64-apple-darwin
- build: win-msvc
os: windows-2019
rust: nightly
target: x86_64-pc-windows-msvc
- build: win-gnu
os: windows-2019
rust: nightly-x86_64-gnu
target: x86_64-pc-windows-gnu
- build: win32-msvc
os: windows-2019
rust: nightly
target: i686-pc-windows-msvc
steps:
- name: Checkout repository
uses: actions/checkout@v1
with:
fetch-depth: 1
- name: Install packages (Ubuntu)
if: matrix.os == 'ubuntu-18.04'
run: |
ci/ubuntu-install-packages
- name: Install packages (macOS)
if: matrix.os == 'macos-latest'
run: |
ci/macos-install-packages
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: ${{ matrix.rust }}
profile: minimal
override: true
target: ${{ matrix.target }}
- name: Use Cross
# if: matrix.os != 'windows-2019'
run: |
# FIXME: to work around bugs in latest cross release, install master.
# See: https://github.com/rust-embedded/cross/issues/357
cargo install --git https://github.com/rust-embedded/cross
echo "::set-env name=CARGO::cross"
echo "::set-env name=TARGET_FLAGS::--target ${{ matrix.target }}"
echo "::set-env name=TARGET_DIR::./target/${{ matrix.target }}"
- name: Show command used for Cargo
run: |
echo "cargo command is: ${{ env.CARGO }}"
echo "target flag is: ${{ env.TARGET_FLAGS }}"
echo "target dir is: ${{ env.TARGET_DIR }}"
- name: Get release download URL
uses: actions/download-artifact@v1
with:
name: artifacts
path: artifacts
- name: Set release upload URL and release version
shell: bash
run: |
release_upload_url="$(cat artifacts/release-upload-url)"
echo "::set-env name=RELEASE_UPLOAD_URL::$release_upload_url"
echo "release upload url: $RELEASE_UPLOAD_URL"
release_version="$(cat artifacts/release-version)"
echo "::set-env name=RELEASE_VERSION::$release_version"
echo "release version: $RELEASE_VERSION"
- name: Build release binary
run: ${{ env.CARGO }} build --verbose --release --features pcre2 ${{ env.TARGET_FLAGS }}
- name: Strip release binary (linux and macos)
if: matrix.build == 'linux' || matrix.build == 'macos'
run: strip "target/${{ matrix.target }}/release/rg"
- name: Strip release binary (arm)
if: matrix.build == 'linux-arm'
run: |
docker run --rm -v \
"$PWD/target:/target:Z" \
rustembedded/cross:arm-unknown-linux-gnueabihf \
arm-linux-gnueabihf-strip \
/target/arm-unknown-linux-gnueabihf/release/rg
- name: Build archive
shell: bash
run: |
outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")"
staging="ripgrep-${{ env.RELEASE_VERSION }}-${{ matrix.target }}"
mkdir -p "$staging"/{complete,doc}
cp {README.md,COPYING,UNLICENSE,LICENSE-MIT} "$staging/"
cp {CHANGELOG.md,FAQ.md,GUIDE.md} "$staging/doc/"
cp "$outdir"/{rg.bash,rg.fish,_rg.ps1} "$staging/complete/"
cp complete/_rg "$staging/complete/"
if [ "${{ matrix.os }}" = "windows-2019" ]; then
cp "target/${{ matrix.target }}/release/rg.exe" "$staging/"
7z a "$staging.zip" "$staging"
echo "::set-env name=ASSET::$staging.zip"
else
# The man page is only generated on Unix systems. ¯\_(ツ)_/¯
cp "$outdir"/rg.1 "$staging/doc/"
cp "target/${{ matrix.target }}/release/rg" "$staging/"
tar czf "$staging.tar.gz" "$staging"
echo "::set-env name=ASSET::$staging.tar.gz"
fi
- name: Upload release archive
uses: actions/upload-release-asset@v1.0.1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ env.RELEASE_UPLOAD_URL }}
asset_path: ${{ env.ASSET }}
asset_name: ${{ env.ASSET }}
asset_content_type: application/octet-stream

110
.travis.yml Normal file
View File

@@ -0,0 +1,110 @@
language: rust
dist: xenial
env:
global:
- PROJECT_NAME: ripgrep
- RUST_BACKTRACE: full
addons:
apt:
packages:
# For generating man page.
- libxslt1-dev
- asciidoc
- docbook-xsl
- xsltproc
- libxml2-utils
# Needed for completion-function test.
- zsh
# Needed for testing decompression search.
- xz-utils
- liblz4-tool
# For building MUSL static builds on Linux.
- musl-tools
matrix:
fast_finish: true
include:
# Nightly channel.
# All *nix releases are done on the nightly channel to take advantage
# of the regex library's multiple pattern SIMD search.
- os: linux
rust: nightly
env: TARGET=i686-unknown-linux-musl
- os: linux
rust: nightly
env: TARGET=x86_64-unknown-linux-musl
- os: osx
rust: nightly
# XML_CATALOG_FILES is apparently necessary for asciidoc on macOS.
env: TARGET=x86_64-apple-darwin XML_CATALOG_FILES=/usr/local/etc/xml/catalog
- os: linux
rust: nightly
env: TARGET=arm-unknown-linux-gnueabihf GCC_VERSION=4.8
addons:
apt:
packages:
- gcc-4.8-arm-linux-gnueabihf
- binutils-arm-linux-gnueabihf
- libc6-armhf-cross
- libc6-dev-armhf-cross
# For generating man page.
- libxslt1-dev
- asciidoc
- docbook-xsl
- xsltproc
- libxml2-utils
# Beta channel. We enable these to make sure there are no regressions in
# Rust beta releases.
- os: linux
rust: beta
env: TARGET=x86_64-unknown-linux-musl
- os: linux
rust: beta
env: TARGET=x86_64-unknown-linux-gnu
# Minimum Rust supported channel. We enable these to make sure ripgrep
# continues to work on the advertised minimum Rust version.
- os: linux
rust: 1.32.0
env: TARGET=x86_64-unknown-linux-gnu
- os: linux
rust: 1.32.0
env: TARGET=x86_64-unknown-linux-musl
- os: linux
rust: 1.32.0
env: TARGET=arm-unknown-linux-gnueabihf GCC_VERSION=4.8
addons:
apt:
packages:
- gcc-4.8-arm-linux-gnueabihf
- binutils-arm-linux-gnueabihf
- libc6-armhf-cross
- libc6-dev-armhf-cross
# For generating man page.
- libxslt1-dev
- asciidoc
- docbook-xsl
- xsltproc
- libxml2-utils
install: ci/install.sh
script: ci/script.sh
before_deploy: ci/before_deploy.sh
deploy:
provider: releases
file_glob: true
file: deployment/${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}.tar.gz
skip_cleanup: true
on:
condition: $TRAVIS_RUST_VERSION = nightly
branch: master # i guess we do need this after all?
tags: true
api_key:
secure: "IbSnsbGkxSydR/sozOf1/SRvHplzwRUHzcTjM7BKnr7GccL86gRPUrsrvD103KjQUGWIc1TnK1YTq5M0Onswg/ORDjqa1JEJPkPdPnVh9ipbF7M2De/7IlB4X4qXLKoApn8+bx2x/mfYXu4G+G1/2QdbaKK2yfXZKyjz0YFx+6CNrVCT2Nk8q7aHvOOzAL58vsG8iPDpupuhxlMDDn/UhyOWVInmPPQ0iJR1ZUJN8xJwXvKvBbfp3AhaBiAzkhXHNLgBR8QC5noWWMXnuVDMY3k4f3ic0V+p/qGUCN/nhptuceLxKFicMCYObSZeUzE5RAI0/OBW7l3z2iCoc+TbAnn+JrX/ObJCfzgAOXAU3tLaBFMiqQPGFKjKg1ltSYXomOFP/F7zALjpvFp4lYTBajRR+O3dqaxA9UQuRjw27vOeUpMcga4ZzL4VXFHzrxZKBHN//XIGjYAVhJ1NSSeGpeJV5/+jYzzWKfwSagRxQyVCzMooYFFXzn8Yxdm3PJlmp3GaAogNkdB9qKcrEvRINCelalzALPi0hD/HUDi8DD2PNTCLLMo6VSYtvc685Zbe+KgNzDV1YyTrRCUW6JotrS0r2ULLwnsh40hSB//nNv3XmwNmC/CmW5QAnIGj8cBMF4S2t6ohADIndojdAfNiptmaZOIT6owK7bWMgPMyopo="
branches:
only:
# Pushes and PR to the master branch
- master
# Ruby regex to match tags. Required, or travis won't trigger deploys when
# a new tag is pushed.
- /^\d+\.\d+\.\d+.*$/
notifications:
email:
on_success: never

View File

@@ -1,158 +1,6 @@
12.0.1 (TBD)
0.11.0 (TBD)
============
ripgrep 12.0.1 is a small patch release that includes a couple minor bug fixes.
These bug fixes address regressions introduced in the 12.0.0 release.
Bug fixes:
* [BUG #1520](https://github.com/BurntSushi/ripgrep/issues/1520):
Don't emit spurious error messages in git repositories with submodules.
12.0.0 (2020-03-15)
===================
ripgrep 12 is a new major version release of ripgrep that contains many bug
fixes, several important performance improvements and a few minor new features.
In a near future release, I am hoping to add an
[indexing feature](https://github.com/BurntSushi/ripgrep/issues/1497)
to ripgrep, which will dramatically speed up searching by building an index.
Feedback would very much be appreciated, especially on the user experience
which will be difficult to get right.
This release has no known breaking changes.
Deprecations:
* The `--no-pcre2-unicode` flag is deprecated. Instead, use the `--no-unicode`
flag, which applies to both the default regex engine and PCRE2. For now,
`--no-pcre2-unicode` and `--pcre2-unicode` are aliases to `--no-unicode`
and `--unicode`, respectively. The `--[no-]pcre2-unicode` flags may be
removed in a future release.
* The `--auto-hybrid-regex` flag is deprecated. Instead, use the new `--engine`
flag with the `auto` value.
Performance improvements:
* [PERF #1087](https://github.com/BurntSushi/ripgrep/pull/1087):
ripgrep is smarter when detected literals are whitespace.
* [PERF #1381](https://github.com/BurntSushi/ripgrep/pull/1381):
Directory traversal is sped up with speculative ignore-file existence checks.
* [PERF cd8ec38a](https://github.com/BurntSushi/ripgrep/commit/cd8ec38a):
Improve inner literal detection to cover more cases more effectively.
e.g., ` +Sherlock Holmes +` now has ` Sherlock Holmes ` extracted instead
of ` `.
* [PERF 6a0e0147](https://github.com/BurntSushi/ripgrep/commit/6a0e0147):
Improve literal detection when the `-w/--word-regexp` flag is used.
* [PERF ad97e9c9](https://github.com/BurntSushi/ripgrep/commit/ad97e9c9):
Improve overall performance of the `-w/--word-regexp` flag.
Feature enhancements:
* Added or improved file type filtering for erb, diff, Gradle, HAML, Org,
Postscript, Skim, Slim, Slime, RPM Spec files, Typoscript, xml.
* [FEATURE #1370](https://github.com/BurntSushi/ripgrep/pull/1370):
Add `--include-zero` flag that shows files searched without matches.
* [FEATURE #1390](https://github.com/BurntSushi/ripgrep/pull/1390):
Add `--no-context-separator` flag that always hides context separators.
* [FEATURE #1414](https://github.com/BurntSushi/ripgrep/pull/1414):
Add `--no-require-git` flag to allow ripgrep to respect gitignores anywhere.
* [FEATURE #1420](https://github.com/BurntSushi/ripgrep/pull/1420):
Add `--no-ignore-exclude` to disregard rules in `.git/info/exclude` files.
* [FEATURE #1466](https://github.com/BurntSushi/ripgrep/pull/1466):
Add `--no-ignore-files` flag to disable all `--ignore-file` flags.
* [FEATURE #1488](https://github.com/BurntSushi/ripgrep/pull/1488):
Add '--engine' flag for easier switching between regex engines.
* [FEATURE 75cbe88f](https://github.com/BurntSushi/ripgrep/commit/75cbe88f):
Add `--no-unicode` flag. This works on all supported regex engines.
Bug fixes:
* [BUG #1291](https://github.com/BurntSushi/ripgrep/issues/1291):
ripgrep now works in non-existent directories.
* [BUG #1319](https://github.com/BurntSushi/ripgrep/issues/1319):
Fix match bug due to errant literal detection.
* [**BUG #1335**](https://github.com/BurntSushi/ripgrep/issues/1335):
Fixes a performance bug when searching plain text files with very long lines.
This was a serious performance regression in some cases.
* [BUG #1344](https://github.com/BurntSushi/ripgrep/issues/1344):
Document usage of `--type all`.
* [BUG #1389](https://github.com/BurntSushi/ripgrep/issues/1389):
Fixes a bug where ripgrep would panic when searching a symlinked directory.
* [BUG #1439](https://github.com/BurntSushi/ripgrep/issues/1439):
Improve documentation for ripgrep's automatic stdin detection.
* [BUG #1441](https://github.com/BurntSushi/ripgrep/issues/1441):
Remove CPU features from man page.
* [BUG #1442](https://github.com/BurntSushi/ripgrep/issues/1442),
[BUG #1478](https://github.com/BurntSushi/ripgrep/issues/1478):
Improve documentation of the `-g/--glob` flag.
* [BUG #1445](https://github.com/BurntSushi/ripgrep/issues/1445):
ripgrep now respects ignore rules from .git/info/exclude in worktrees.
* [BUG #1485](https://github.com/BurntSushi/ripgrep/issues/1485):
Fish shell completions from the release Debian package are now installed to
`/usr/share/fish/vendor_completions.d/rg.fish`.
11.0.2 (2019-08-01)
===================
ripgrep 11.0.2 is a new patch release that fixes a few bugs, including a
performance regression and a matching bug when using the `-F/--fixed-strings`
flag.
Feature enhancements:
* [FEATURE #1293](https://github.com/BurntSushi/ripgrep/issues/1293):
Added `--glob-case-insensitive` flag that makes `--glob` behave as `--iglob`.
Bug fixes:
* [BUG #1246](https://github.com/BurntSushi/ripgrep/issues/1246):
Add translations to README, starting with an unofficial Chinese translation.
* [BUG #1259](https://github.com/BurntSushi/ripgrep/issues/1259):
Fix bug where the last byte of a `-f file` was stripped if it wasn't a `\n`.
* [BUG #1261](https://github.com/BurntSushi/ripgrep/issues/1261):
Document that no error is reported when searching for `\n` with `-P/--pcre2`.
* [BUG #1284](https://github.com/BurntSushi/ripgrep/issues/1284):
Mention `.ignore` and `.rgignore` more prominently in the README.
* [BUG #1292](https://github.com/BurntSushi/ripgrep/issues/1292):
Fix bug where `--with-filename` was sometimes enabled incorrectly.
* [BUG #1268](https://github.com/BurntSushi/ripgrep/issues/1268):
Fix major performance regression in GitHub `x86_64-linux` binary release.
* [BUG #1302](https://github.com/BurntSushi/ripgrep/issues/1302):
Show better error messages when a non-existent preprocessor command is given.
* [BUG #1334](https://github.com/BurntSushi/ripgrep/issues/1334):
Fix match regression with `-F` flag when patterns contain meta characters.
11.0.1 (2019-04-16)
===================
ripgrep 11.0.1 is a new patch release that fixes a search regression introduced
in the previous 11.0.0 release. In particular, ripgrep can enter an infinite
loop for some search patterns when searching invalid UTF-8.
Bug fixes:
* [BUG #1247](https://github.com/BurntSushi/ripgrep/issues/1247):
Fix search bug that can cause ripgrep to enter an infinite loop.
11.0.0 (2019-04-15)
===================
ripgrep 11 is a new major version release of ripgrep that contains many bug
fixes, some performance improvements and a few feature enhancements. Notably,
ripgrep's user experience for binary file filtering has been improved. See the
[guide's new section on binary data](GUIDE.md#binary-data) for more details.
This release also marks a change in ripgrep's versioning. Where as the previous
version was `0.10.0`, this version is `11.0.0`. Moving forward, ripgrep's
major version will be increased a few times per year. ripgrep will continue to
be conservative with respect to backwards compatibility, but may occasionally
introduce breaking changes, which will always be documented in this CHANGELOG.
See [issue 1172](https://github.com/BurntSushi/ripgrep/issues/1172) for a bit
more detail on why this versioning change was made.
This release increases the **minimum supported Rust version** from 1.28.0 to
1.34.0.
TODO.
**BREAKING CHANGES**:
@@ -163,91 +11,45 @@ This release increases the **minimum supported Rust version** from 1.28.0 to
error (e.g., regex syntax error). One exception to this is if ripgrep is run
with `-q/--quiet`. In that case, if an error occurs and a match is found,
then ripgrep will exit with a `0` exit status code.
* Supplying the `-u/--unrestricted` flag three times is now equivalent to
supplying `--no-ignore --hidden --binary`. Previously, `-uuu` was equivalent
to `--no-ignore --hidden --text`. The difference is that `--binary` disables
binary file filtering without potentially dumping binary data into your
terminal. That is, `rg -uuu foo` should now be equivalent to `grep -r foo`.
* The `avx-accel` feature of ripgrep has been removed since it is no longer
necessary. All uses of AVX in ripgrep are now enabled automatically via
runtime CPU feature detection. The `simd-accel` feature does remain available
(only for enabling SIMD for transcoding), however, it does increase
compilation times substantially at the moment.
Performance improvements:
* [PERF #497](https://github.com/BurntSushi/ripgrep/issues/497),
[PERF #838](https://github.com/BurntSushi/ripgrep/issues/838):
Make `rg -F -f dictionary-of-literals` much faster.
runtime CPU feature detection. The `simd-accel` feature does remain
available, however, it does increase compilation times substantially at the
moment.
Feature enhancements:
* Added or improved file type filtering for Apache Thrift, ASP, Bazel, Brotli,
BuildStream, bzip2, C, C++, Cython, gzip, Java, Make, Postscript, QML, Tex,
XML, xz, zig and zstd.
* [FEATURE #855](https://github.com/BurntSushi/ripgrep/issues/855):
Add `--binary` flag for disabling binary file filtering.
* [FEATURE #1078](https://github.com/BurntSushi/ripgrep/pull/1078):
Add `--max-columns-preview` flag for showing a preview of long lines.
* [FEATURE #1099](https://github.com/BurntSushi/ripgrep/pull/1099):
Add support for Brotli and Zstd to the `-z/--search-zip` flag.
* [FEATURE #1138](https://github.com/BurntSushi/ripgrep/pull/1138):
Add `--no-ignore-dot` flag for ignoring `.ignore` files.
* [FEATURE #1155](https://github.com/BurntSushi/ripgrep/pull/1155):
Add `--auto-hybrid-regex` flag for automatically falling back to PCRE2.
* [FEATURE #1159](https://github.com/BurntSushi/ripgrep/pull/1159):
ripgrep's exit status logic should now match GNU grep. See updated man page.
* [FEATURE #1164](https://github.com/BurntSushi/ripgrep/pull/1164):
Add `--ignore-file-case-insensitive` for case insensitive ignore globs.
* [FEATURE #1185](https://github.com/BurntSushi/ripgrep/pull/1185):
Add `-I` flag as a short option for the `--no-filename` flag.
* [FEATURE #1207](https://github.com/BurntSushi/ripgrep/pull/1207):
Add `none` value to `-E/--encoding` to forcefully disable all transcoding.
* [FEATURE da9d7204](https://github.com/BurntSushi/ripgrep/commit/da9d7204):
Add `--pcre2-version` for querying showing PCRE2 version information.
* [FEATURE #1170](https://github.com/BurntSushi/ripgrep/pull/1170):
Add `--ignore-file-case-insensitive` for case insensitive .ignore globs.
Bug fixes:
* [BUG #306](https://github.com/BurntSushi/ripgrep/issues/306),
[BUG #855](https://github.com/BurntSushi/ripgrep/issues/855):
Improve the user experience for ripgrep's binary file filtering.
* [BUG #373](https://github.com/BurntSushi/ripgrep/issues/373),
[BUG #1098](https://github.com/BurntSushi/ripgrep/issues/1098):
`**` is now accepted as valid syntax anywhere in a glob.
* [BUG #916](https://github.com/BurntSushi/ripgrep/issues/916):
ripgrep no longer hangs when searching `/proc` with a zombie process present.
* [BUG #1052](https://github.com/BurntSushi/ripgrep/issues/1052):
Fix bug where ripgrep could panic when transcoding UTF-16 files.
* [BUG #1055](https://github.com/BurntSushi/ripgrep/issues/1055):
Suggest `-U/--multiline` when a pattern contains a `\n`.
* [BUG #1063](https://github.com/BurntSushi/ripgrep/issues/1063):
Always strip a BOM if it's present, even for UTF-8.
* [BUG #1064](https://github.com/BurntSushi/ripgrep/issues/1064):
Fix inner literal detection that could lead to incorrect matches.
* [BUG #1079](https://github.com/BurntSushi/ripgrep/issues/1079):
Fixes a bug where the order of globs could result in missing a match.
* [BUG #1089](https://github.com/BurntSushi/ripgrep/issues/1089):
Fix another bug where ripgrep could panic when transcoding UTF-16 files.
* [BUG #1091](https://github.com/BurntSushi/ripgrep/issues/1091):
Add note about inverted flags to the man page.
* [BUG #1093](https://github.com/BurntSushi/ripgrep/pull/1093):
Fix handling of literal slashes in gitignore patterns.
* [BUG #1095](https://github.com/BurntSushi/ripgrep/issues/1095):
Fix corner cases involving the `--crlf` flag.
* [BUG #1101](https://github.com/BurntSushi/ripgrep/issues/1101):
Fix AsciiDoc escaping for man page output.
* [BUG #1103](https://github.com/BurntSushi/ripgrep/issues/1103):
Clarify what `--encoding auto` does.
* [BUG #1106](https://github.com/BurntSushi/ripgrep/issues/1106):
`--files-with-matches` and `--files-without-match` work with one file.
* [BUG #1093](https://github.com/BurntSushi/ripgrep/pull/1093):
Fix handling of literal slashes in gitignore patterns.
* [BUG #1121](https://github.com/BurntSushi/ripgrep/issues/1121):
Fix bug that was triggering Windows antimalware when using the `--files`
flag.
Fix bug that was triggering Windows antimalware when using the --files flag.
* [BUG #1125](https://github.com/BurntSushi/ripgrep/issues/1125),
[BUG #1159](https://github.com/BurntSushi/ripgrep/issues/1159):
ripgrep shouldn't panic for `rg -h | rg` and should emit correct exit status.
* [BUG #1144](https://github.com/BurntSushi/ripgrep/issues/1144):
Fixes a bug where line numbers could be wrong on big-endian machines.
* [BUG #1154](https://github.com/BurntSushi/ripgrep/issues/1154):
Windows files with "hidden" attribute are now treated as hidden.
* [BUG #1173](https://github.com/BurntSushi/ripgrep/issues/1173):
@@ -256,12 +58,6 @@ Bug fixes:
Fix handling of repeated `**` patterns in gitignore files.
* [BUG #1176](https://github.com/BurntSushi/ripgrep/issues/1176):
Fix bug where `-F`/`-x` weren't applied to patterns given via `-f`.
* [BUG #1189](https://github.com/BurntSushi/ripgrep/issues/1189):
Document cases where ripgrep may use a lot of memory.
* [BUG #1203](https://github.com/BurntSushi/ripgrep/issues/1203):
Fix a matching bug related to the suffix literal optimization.
* [BUG 8f14cb18](https://github.com/BurntSushi/ripgrep/commit/8f14cb18):
Increase the default stack size for PCRE2's JIT.
0.10.0 (2018-09-07)

690
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +1,11 @@
[package]
name = "ripgrep"
version = "12.0.0" #:version
version = "0.10.0" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
ripgrep is a line-oriented search tool that recursively searches your current
directory for a regex pattern while respecting your gitignore rules. ripgrep
has first class support on Windows, macOS and Linux.
has first class support on Windows, macOS and Linux
"""
documentation = "https://github.com/BurntSushi/ripgrep"
homepage = "https://github.com/BurntSushi/ripgrep"
@@ -20,12 +20,12 @@ autotests = false
edition = "2018"
[badges]
# I guess crates.io does not support GitHub Action badges yet.
# Tracking PR: https://github.com/rust-lang/crates.io/pull/1838
travis-ci = { repository = "BurntSushi/ripgrep" }
appveyor = { repository = "BurntSushi/ripgrep" }
[[bin]]
bench = false
path = "crates/core/main.rs"
path = "src/main.rs"
name = "rg"
[[test]]
@@ -34,48 +34,44 @@ path = "tests/tests.rs"
[workspace]
members = [
"crates/globset",
"crates/grep",
"crates/cli",
"crates/matcher",
"crates/pcre2",
"crates/printer",
"crates/regex",
"crates/searcher",
"crates/ignore",
"globset",
"grep",
"grep-cli",
"grep-matcher",
"grep-pcre2",
"grep-printer",
"grep-regex",
"grep-searcher",
"ignore",
]
[dependencies]
bstr = "0.2.12"
grep = { version = "0.2.5", path = "crates/grep" }
ignore = { version = "0.4.12", path = "crates/ignore" }
bstr = "0.1.2"
grep = { version = "0.2.3", path = "grep" }
ignore = { version = "0.4.4", path = "ignore" }
lazy_static = "1.1.0"
log = "0.4.5"
num_cpus = "1.8.0"
regex = "1.3.5"
regex = "1.0.5"
serde_json = "1.0.23"
termcolor = "1.1.0"
termcolor = "1.0.3"
[dependencies.clap]
version = "2.33.0"
version = "2.32.0"
default-features = false
features = ["suggestions"]
[target.'cfg(all(target_env = "musl", target_pointer_width = "64"))'.dependencies.jemallocator]
version = "0.3.0"
[build-dependencies]
lazy_static = "1.1.0"
[build-dependencies.clap]
version = "2.33.0"
version = "2.32.0"
default-features = false
features = ["suggestions"]
[dev-dependencies]
serde = "1.0.77"
serde_derive = "1.0.77"
walkdir = "2"
[features]
simd-accel = ["grep/simd-accel"]
@@ -98,11 +94,11 @@ assets = [
# The man page is automatically generated by ripgrep's build process, so
# this file isn't actually commited. Instead, to create a dpkg, either
# create a deployment/deb directory and copy the man page to it, or use the
# 'ci/build-deb' script.
# 'ci/build_deb.sh' script.
["deployment/deb/rg.1", "usr/share/man/man1/rg.1", "644"],
# Similarly for shell completions.
["deployment/deb/rg.bash", "usr/share/bash-completion/completions/rg", "644"],
["deployment/deb/rg.fish", "usr/share/fish/vendor_completions.d/rg.fish", "644"],
["deployment/deb/rg.fish", "usr/share/fish/completions/rg.fish", "644"],
["deployment/deb/_rg", "usr/share/zsh/vendor-completions/", "644"],
]
extended-description = """\

View File

@@ -1,11 +0,0 @@
[target.x86_64-unknown-linux-musl]
image = "burntsushi/cross:x86_64-unknown-linux-musl"
[target.i686-unknown-linux-gnu]
image = "burntsushi/cross:i686-unknown-linux-gnu"
[target.mips64-unknown-linux-gnuabi64]
image = "burntsushi/cross:mips64-unknown-linux-gnuabi64"
[target.arm-unknown-linux-gnueabihf]
image = "burntsushi/cross:arm-unknown-linux-gnueabihf"

29
FAQ.md
View File

@@ -25,7 +25,6 @@
* [How is ripgrep licensed?](#license)
* [Can ripgrep replace grep?](#posix4ever)
* [What does the "rip" in ripgrep mean?](#intentcountsforsomething)
* [How can I donate to ripgrep or its maintainers?](#donations)
<h3 name="config">
@@ -51,9 +50,9 @@ ripgrep is a project whose contributors are volunteers. A release schedule
adds undue stress to said volunteers. Therefore, releases are made on a best
effort basis and no dates **will ever be given**.
An exception to this _can be_ high impact bugs. If a ripgrep release contains
a significant regression, then there will generally be a strong push to get a
patch release out with a fix. However, no promises are made.
One exception to this is high impact bugs. If a ripgrep release contains a
significant regression, then there will generally be a strong push to get a
patch release out with a fix.
<h3 name="manpage">
@@ -935,8 +934,8 @@ Here are some cases where you might *not* want to use ripgrep. The same caveats
for the previous section apply.
* Are you writing portable shell scripts intended to work in a variety of
environments? Great, probably not a good idea to use ripgrep! ripgrep has
nowhere near the ubiquity of grep, so if you do use ripgrep, you might need
environments? Great, probably not a good idea to use ripgrep! ripgrep is has
nowhere near the ubquity of grep, so if you do use ripgrep, you might need
to futz with the installation process more than you would with grep.
* Do you care about POSIX compatibility? If so, then you can't use ripgrep
because it never was, isn't and never will be POSIX compatible.
@@ -982,21 +981,3 @@ grep](#posix4ever),
ripgrep is neither actually a "grep killer" nor was it ever intended to be. It
certainly does eat into some of its use cases, but that's nothing that other
tools like ack or The Silver Searcher weren't already doing.
<h3 name="donations">
How can I donate to ripgrep or its maintainers?
</h3>
As of now, you can't. While I believe the various efforts that are being
undertaken to help fund FOSS are extremely important, they aren't a good fit
for me. ripgrep is and I hope will remain a project of love that I develop in
my free time. As such, involving money---even in the form of donations given
without expectations---would severely change that dynamic for me personally.
Instead, I'd recommend donating to something else that is doing work that you
find meaningful. If you would like suggestions, then my favorites are:
* [The Internet Archive](https://archive.org/donate/)
* [Rails Girls](https://railsgirlssummerofcode.org/campaign/)
* [Wikipedia](https://wikimediafoundation.org/support/)

View File

@@ -18,7 +18,6 @@ translatable to any command line shell environment.
* [Replacements](#replacements)
* [Configuration file](#configuration-file)
* [File encoding](#file-encoding)
* [Binary data](#binary-data)
* [Common options](#common-options)
@@ -110,7 +109,7 @@ colors, you'll notice that `faster` will be highlighted instead of just the
It is beyond the scope of this guide to provide a full tutorial on regular
expressions, but ripgrep's specific syntax is documented here:
https://docs.rs/regex/*/regex/#syntax
https://docs.rs/regex/0.2.5/regex/#syntax
### Recursive search
@@ -411,21 +410,6 @@ alias rg="rg --type-add 'web:*.{html,css,js}'"
or add `--type-add=web:*.{html,css,js}` to your ripgrep configuration file.
([Configuration files](#configuration-file) are covered in more detail later.)
#### The special `all` file type
A special option supported by the `--type` flag is `all`. `--type all` looks
for a match in any of the supported file types listed by `--type-list`,
including those added on the command line using `--type-add`. It's equivalent
to the command `rg --type agda --type asciidoc --type asm ...`, where `...`
stands for a list of `--type` flags for the rest of the types in `--type-list`.
As an example, let's suppose you have a shell script in your current directory,
`my-shell-script`, which includes a shell library, `my-shell-library.bash`.
Both `rg --type sh` and `rg --type all` would only search for matches in
`my-shell-library.bash`, not `my-shell-script`, because the globs matched
by the `sh` file type don't include files without an extension. On the
other hand, `rg --type-not all` would search `my-shell-script` but not
`my-shell-library.bash`.
### Replacements
@@ -553,9 +537,8 @@ formatting peculiarities:
```
$ cat $HOME/.ripgreprc
# Don't let ripgrep vomit really long lines to my terminal, and show a preview.
# Don't let ripgrep vomit really long lines to my terminal.
--max-columns=150
--max-columns-preview
# Add my 'web' type.
--type-add
@@ -697,76 +680,6 @@ $ rg '\w(?-u:\w)\w'
```
### Binary data
In addition to skipping hidden files and files in your `.gitignore` by default,
ripgrep also attempts to skip binary files. ripgrep does this by default
because binary files (like PDFs or images) are typically not things you want to
search when searching for regex matches. Moreover, if content in a binary file
did match, then it's possible for undesirable binary data to be printed to your
terminal and wreak havoc.
Unfortunately, unlike skipping hidden files and respecting your `.gitignore`
rules, a file cannot as easily be classified as binary. In order to figure out
whether a file is binary, the most effective heuristic that balances
correctness with performance is to simply look for `NUL` bytes. At that point,
the determination is simple: a file is considered "binary" if and only if it
contains a `NUL` byte somewhere in its contents.
The issue is that while most binary files will have a `NUL` byte toward the
beginning of its contents, this is not necessarily true. The `NUL` byte might
be the very last byte in a large file, but that file is still considered
binary. While this leads to a fair amount of complexity inside ripgrep's
implementation, it also results in some unintuitive user experiences.
At a high level, ripgrep operates in three different modes with respect to
binary files:
1. The default mode is to attempt to remove binary files from a search
completely. This is meant to mirror how ripgrep removes hidden files and
files in your `.gitignore` automatically. That is, as soon as a file is
detected as binary, searching stops. If a match was already printed (because
it was detected long before a `NUL` byte), then ripgrep will print a warning
message indicating that the search stopped prematurely. This default mode
**only applies to files searched by ripgrep as a result of recursive
directory traversal**, which is consistent with ripgrep's other automatic
filtering. For example, `rg foo .file` will search `.file` even though it
is hidden. Similarly, `rg foo binary-file` will search `binary-file` in
"binary" mode automatically.
2. Binary mode is similar to the default mode, except it will not always
stop searching after it sees a `NUL` byte. Namely, in this mode, ripgrep
will continue searching a file that is known to be binary until the first
of two conditions is met: 1) the end of the file has been reached or 2) a
match is or has been seen. This means that in binary mode, if ripgrep
reports no matches, then there are no matches in the file. When a match does
occur, ripgrep prints a message similar to one it prints when in its default
mode indicating that the search has stopped prematurely. This mode can be
forcefully enabled for all files with the `--binary` flag. The purpose of
binary mode is to provide a way to discover matches in all files, but to
avoid having binary data dumped into your terminal.
3. Text mode completely disables all binary detection and searches all files
as if they were text. This is useful when searching a file that is
predominantly text but contains a `NUL` byte, or if you are specifically
trying to search binary data. This mode can be enabled with the `-a/--text`
flag. Note that when using this mode on very large binary files, it is
possible for ripgrep to use a lot of memory.
Unfortunately, there is one additional complexity in ripgrep that can make it
difficult to reason about binary files. That is, the way binary detection works
depends on the way that ripgrep searches your files. Specifically:
* When ripgrep uses memory maps, then binary detection is only performed on the
first few kilobytes of the file in addition to every matching line.
* When ripgrep doesn't use memory maps, then binary detection is performed on
all bytes searched.
This means that whether a file is detected as binary or not can change based
on the internal search strategy used by ripgrep. If you prefer to keep
ripgrep's binary file detection consistent, then you can disable memory maps
via the `--no-mmap` flag. (The cost will be a small performance regression when
searching very large files on some platforms.)
### Common options
ripgrep has a lot of flags. Too many to keep in your head at once. This section

156
README.md
View File

@@ -8,11 +8,11 @@ available for [every release](https://github.com/BurntSushi/ripgrep/releases).
ripgrep is similar to other popular search tools like The Silver Searcher, ack
and grep.
[![Build status](https://github.com/BurntSushi/ripgrep/workflows/ci/badge.svg)](https://github.com/BurntSushi/ripgrep/actions)
[![Linux build status](https://travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep)
[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep)
[![Crates.io](https://img.shields.io/crates/v/ripgrep.svg)](https://crates.io/crates/ripgrep)
[![Packaging status](https://repology.org/badge/tiny-repos/ripgrep.svg)](https://repology.org/project/ripgrep/badges)
Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org).
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
### CHANGELOG
@@ -28,60 +28,60 @@ Please see the [CHANGELOG](CHANGELOG.md) for a release history.
* [Configuration files](GUIDE.md#configuration-file)
* [Shell completions](FAQ.md#complete)
* [Building](#building)
* [Translations](#translations)
### Screenshot of search results
[![A screenshot of a sample search with ripgrep](https://burntsushi.net/stuff/ripgrep1.png)](https://burntsushi.net/stuff/ripgrep1.png)
[![A screenshot of a sample search with ripgrep](http://burntsushi.net/stuff/ripgrep1.png)](http://burntsushi.net/stuff/ripgrep1.png)
### Quick examples comparing tools
This example searches the entire
[Linux kernel source tree](https://github.com/BurntSushi/linux)
(after running `make defconfig && make -j8`) for `[A-Z]+_SUSPEND`, where
all matches must be words. Timings were collected on a system with an Intel
i7-6900K 3.2 GHz.
This example searches the entire Linux kernel source tree (after running
`make defconfig && make -j8`) for `[A-Z]+_SUSPEND`, where all matches must be
words. Timings were collected on a system with an Intel i7-6900K 3.2 GHz, and
ripgrep was compiled with SIMD enabled.
Please remember that a single benchmark is never enough! See my
[blog post on ripgrep](https://blog.burntsushi.net/ripgrep/)
[blog post on ripgrep](http://blog.burntsushi.net/ripgrep/)
for a very detailed comparison with more benchmarks and analysis.
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep (Unicode) | `rg -n -w '[A-Z]+_SUSPEND'` | 452 | **0.136s** |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `git grep -P -n -w '[A-Z]+_SUSPEND'` | 452 | 0.348s |
| [ugrep (Unicode)](https://github.com/Genivia/ugrep) | `ugrep -r --ignore-files --no-hidden -I -w '[A-Z]+_SUSPEND'` | 452 | 0.506s |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 452 | 1.150s |
| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 452 | 0.654s |
| [ack](https://github.com/beyondgrep/ack3) | `ack -w '[A-Z]+_SUSPEND'` | 452 | 4.054s |
| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 452 | 4.205s |
| ripgrep (Unicode) | `rg -n -w '[A-Z]+_SUSPEND'` | 450 | **0.106s** |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 0.553s |
| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 450 | 0.589s |
| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 2.266s |
| [sift](https://github.com/svent/sift) | `sift --git -n -w '[A-Z]+_SUSPEND'` | 450 | 3.505s |
| [ack](https://github.com/petdance/ack2) | `ack -w '[A-Z]+_SUSPEND'` | 1878 | 6.823s |
| [The Platinum Searcher](https://github.com/monochromegane/the_platinum_searcher) | `pt -w -e '[A-Z]+_SUSPEND'` | 450 | 14.208s |
Here's another benchmark on the same corpus as above that disregards gitignore
files and searches with a whitelist instead. The corpus is the same as in the
previous benchmark, and the flags passed to each command ensure that they are
doing equivalent work:
(Yes, `ack` [has](https://github.com/petdance/ack2/issues/445) a
[bug](https://github.com/petdance/ack2/issues/14).)
Here's another benchmark that disregards gitignore files and searches with a
whitelist instead. The corpus is the same as in the previous benchmark, and the
flags passed to each command ensure that they are doing equivalent work:
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep | `rg -uuu -tc -n -w '[A-Z]+_SUSPEND'` | 388 | **0.096s** |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 388 | 0.493s |
| [GNU grep](https://www.gnu.org/software/grep/) | `egrep -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 388 | 0.806s |
| ripgrep | `rg -L -u -tc -n -w '[A-Z]+_SUSPEND'` | 404 | **0.079s** |
| [ucg](https://github.com/gvansickle/ucg) | `ucg --type=cc -w '[A-Z]+_SUSPEND'` | 390 | 0.163s |
| [GNU grep](https://www.gnu.org/software/grep/) | `egrep -R -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 404 | 0.611s |
And finally, a straight-up comparison between ripgrep, ugrep and GNU grep on a
single large file cached in memory
(~13GB, [`OpenSubtitles.raw.en.gz`](http://opus.nlpl.eu/download.php?f=OpenSubtitles/v2018/mono/OpenSubtitles.raw.en.gz)):
(`ucg` [has slightly different behavior in the presence of symbolic links](https://github.com/gvansickle/ucg/issues/106).)
And finally, a straight-up comparison between ripgrep and GNU grep on a single
large file (~9.3GB,
[`OpenSubtitles2016.raw.en.gz`](http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.en.gz)):
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep | `rg -w 'Sherlock [A-Z]\w+'` | 7882 | **2.769s** |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -w 'Sherlock [A-Z]\w+'` | 7882 | 6.802s |
| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 egrep -w 'Sherlock [A-Z]\w+'` | 7882 | 9.027s |
| ripgrep | `rg -w 'Sherlock [A-Z]\w+'` | 5268 | **2.108s** |
| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C egrep -w 'Sherlock [A-Z]\w+'` | 5268 | 7.014s |
In the above benchmark, passing the `-n` flag (for showing line numbers)
increases the times to `3.423s` for ripgrep and `13.031s` for GNU grep. ugrep
times are unaffected by the presence or absence of `-n`.
increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep.
### Why should I use ripgrep?
@@ -91,11 +91,11 @@ times are unaffected by the presence or absence of `-n`.
[the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly
replace grep.)
* Like other tools specialized to code search, ripgrep defaults to recursive
directory search and won't search files ignored by your
`.gitignore`/`.ignore`/`.rgignore` files. It also ignores hidden and binary
files by default. ripgrep also implements full support for `.gitignore`,
whereas there are many bugs related to that functionality in other code
search tools claiming to provide the same functionality.
directory search and won't search files ignored by your `.gitignore` files.
It also ignores hidden and binary files by default. ripgrep also implements
full support for `.gitignore`, whereas there are many bugs related to that
functionality in other code search tools claiming to provide the same
functionality.
* ripgrep can search specific types of files. For example, `rg -tpy foo`
limits your search to Python files and `rg -Tjs foo` excludes Javascript
files from your search. ripgrep can be taught about new file types with
@@ -107,15 +107,13 @@ times are unaffected by the presence or absence of `-n`.
* ripgrep has optional support for switching its regex engine to use PCRE2.
Among other things, this makes it possible to use look-around and
backreferences in your patterns, which are not supported in ripgrep's default
regex engine. PCRE2 support can be enabled with `-P/--pcre2` (use PCRE2
always) or `--auto-hybrid-regex` (use PCRE2 only if needed). An alternative
syntax is provided via the `--engine (default|pcre2|auto-hybrid)` option.
regex engine. PCRE2 support is enabled with `-P`.
* ripgrep supports searching files in text encodings other than UTF-8, such
as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for
automatically detecting UTF-16 is provided. Other text encodings must be
specifically specified with the `-E/--encoding` flag.)
* ripgrep supports searching files compressed in a common format (brotli,
bzip2, gzip, lz4, lzma, xz, or zstandard) with the `-z/--search-zip` flag.
* ripgrep supports searching files compressed in a common format (gzip, xz,
lzma, bzip2 or lz4) with the `-z/--search-zip` flag.
* ripgrep supports arbitrary input preprocessing filters which could be PDF
text extraction, less supported decompression, decrypting, automatic encoding
detection and so on.
@@ -149,12 +147,12 @@ or more of the following:
### Is it really faster than everything else?
Generally, yes. A large number of benchmarks with detailed analysis for each is
[available on my blog](https://blog.burntsushi.net/ripgrep/).
[available on my blog](http://blog.burntsushi.net/ripgrep/).
Summarizing, ripgrep is fast because:
* It is built on top of
[Rust's regex engine](https://github.com/rust-lang/regex).
[Rust's regex engine](https://github.com/rust-lang-nursery/regex).
Rust's regex engine uses finite automata, SIMD and aggressive literal
optimizations to make searching very fast. (PCRE2 support can be opted into
with the `-P/--pcre2` flag.)
@@ -201,13 +199,22 @@ prefer MSVC over GNU, but you'll need to have the [Microsoft VC++ 2015
redistributable](https://www.microsoft.com/en-us/download/details.aspx?id=48145)
installed.
If you're a **macOS Homebrew** or a **Linuxbrew** user, then you can install
ripgrep from homebrew-core:
If you're a **macOS Homebrew** or a **Linuxbrew** user,
then you can install ripgrep either
from homebrew-core, (compiled with rust stable, no SIMD):
```
$ brew install ripgrep
```
or you can install a binary compiled with rust nightly (including SIMD and all
optimizations) by utilizing a custom tap:
```
$ brew tap burntsushi/ripgrep https://github.com/BurntSushi/ripgrep.git
$ brew install ripgrep-bin
```
If you're a **MacPorts** user, then you can install ripgrep from the
[official ports](https://www.macports.org/ports.php?by=name&substr=ripgrep):
@@ -223,7 +230,7 @@ $ choco install ripgrep
```
If you're a **Windows Scoop** user, then you can install ripgrep from the
[official bucket](https://github.com/ScoopInstaller/Main/blob/master/bucket/ripgrep.json):
[official bucket](https://github.com/lukesampson/scoop/blob/master/bucket/ripgrep.json):
```
$ scoop install ripgrep
@@ -249,14 +256,23 @@ repositories.
$ sudo dnf install ripgrep
```
If you're an **openSUSE** user, ripgrep is included in **openSUSE Tumbleweed**
and **openSUSE Leap** since 15.1.
If you're an **openSUSE Leap 15.0** user, you can install ripgrep from the
[utilities repo](https://build.opensuse.org/package/show/utilities/ripgrep):
```
$ sudo zypper ar https://download.opensuse.org/repositories/utilities/openSUSE_Leap_15.0/utilities.repo
$ sudo zypper install ripgrep
```
If you're an **openSUSE Tumbleweed** user, you can install ripgrep from the
[official repo](http://software.opensuse.org/package/ripgrep):
```
$ sudo zypper install ripgrep
```
If you're a **RHEL/CentOS 7/8** user, you can install ripgrep from
If you're a **RHEL/CentOS 7** user, you can install ripgrep from
[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
```
@@ -277,11 +293,11 @@ then ripgrep can be installed using a binary `.deb` file provided in each
[ripgrep release](https://github.com/BurntSushi/ripgrep/releases).
```
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/11.0.2/ripgrep_11.0.2_amd64.deb
$ sudo dpkg -i ripgrep_11.0.2_amd64.deb
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/0.10.0/ripgrep_0.10.0_amd64.deb
$ sudo dpkg -i ripgrep_0.10.0_amd64.deb
```
If you run Debian Buster (currently Debian stable) or Debian sid, ripgrep is
If you run Debian Buster (currently Debian testing) or Debian sid, ripgrep is
[officially maintained by Debian](https://tracker.debian.org/pkg/rust-ripgrep).
```
$ sudo apt-get install ripgrep
@@ -321,23 +337,9 @@ If you're a **NetBSD** user, then you can install ripgrep from
# pkgin install ripgrep
```
If you're a **Haiku x86_64** user, then you can install ripgrep from the
[official ports](https://github.com/haikuports/haikuports/tree/master/sys-apps/ripgrep):
```
$ pkgman install ripgrep
```
If you're a **Haiku x86_gcc2** user, then you can install ripgrep from the
same port as Haiku x86_64 using the x86 secondary architecture build:
```
$ pkgman install ripgrep_x86
```
If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
* Note that the minimum supported version of Rust for ripgrep is **1.34.0**,
* Note that the minimum supported version of Rust for ripgrep is **1.32.0**,
although ripgrep may work with older versions.
* Note that the binary may be bigger than expected because it contains debug
symbols. This is intentional. To remove debug symbols and therefore reduce
@@ -347,12 +349,18 @@ If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
$ cargo install ripgrep
```
When compiling with Rust 1.27 or newer, this will automatically enable SIMD
optimizations for search.
ripgrep isn't currently in any other package repositories.
[I'd like to change that](https://github.com/BurntSushi/ripgrep/issues/10).
### Building
ripgrep is written in Rust, so you'll need to grab a
[Rust installation](https://www.rust-lang.org/) in order to compile it.
ripgrep compiles with Rust 1.34.0 (stable) or newer. In general, ripgrep tracks
ripgrep compiles with Rust 1.32.0 (stable) or newer. In general, ripgrep tracks
the latest stable release of the Rust compiler.
To build ripgrep:
@@ -422,11 +430,3 @@ $ cargo test --all
```
from the repository root.
### Translations
The following is a list of known translations of ripgrep's documentation. These
are unofficially maintained and may not be up to date.
* [Chinese](https://github.com/chinanf-boy/ripgrep-zh#%E6%9B%B4%E6%96%B0-)

81
appveyor.yml Normal file
View File

@@ -0,0 +1,81 @@
cache:
- c:\cargo\registry
- c:\cargo\git
init:
- mkdir c:\cargo
- mkdir c:\rustup
- SET PATH=c:\cargo\bin;%PATH%
clone_folder: c:\projects\ripgrep
environment:
CARGO_HOME: "c:\\cargo"
RUSTUP_HOME: "c:\\rustup"
CARGO_TARGET_DIR: "c:\\projects\\ripgrep\\target"
global:
PROJECT_NAME: ripgrep
RUST_BACKTRACE: full
matrix:
- TARGET: x86_64-pc-windows-gnu
CHANNEL: stable
BITS: 64
MSYS2: 1
- TARGET: x86_64-pc-windows-msvc
CHANNEL: stable
BITS: 64
- TARGET: i686-pc-windows-gnu
CHANNEL: stable
BITS: 32
MSYS2: 1
- TARGET: i686-pc-windows-msvc
CHANNEL: stable
BITS: 32
matrix:
fast_finish: true
# Install Rust and Cargo
# (Based on from https://github.com/rust-lang/libc/blob/master/appveyor.yml)
install:
- curl -sSf -o rustup-init.exe https://win.rustup.rs/
- rustup-init.exe -y --default-host %TARGET%
- set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
- if defined MSYS2 set PATH=C:\msys64\mingw%BITS%\bin;%PATH%
- rustc -V
- cargo -V
# Hack to work around a harmless warning in Appveyor builds?
build: false
# Equivalent to Travis' `script` phase
test_script:
- cargo test --verbose --all --features pcre2
before_deploy:
# Generate artifacts for release
- cargo build --release --features pcre2
- mkdir staging
- copy target\release\rg.exe staging
- ps: copy target\release\build\ripgrep-*\out\_rg.ps1 staging
- cd staging
# release zipfile will look like 'ripgrep-1.2.3-x86_64-pc-windows-msvc'
- 7z a ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip *
- appveyor PushArtifact ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip
deploy:
description: 'Automatically deployed release'
# All the zipped artifacts will be deployed
artifact: /.*\.zip/
auth_token:
secure: vv4vBCEosGlyQjaEC1+kraP2P6O4CQSa+Tw50oHWFTGcmuXxaWS0/yEXbxsIRLpw
provider: GitHub
# deploy when a new tag is pushed and only on the stable channel
on:
CHANNEL: stable
appveyor_repo_tag: true
branches:
only:
- /^\d+\.\d+\.\d+$/
- master

View File

@@ -9,7 +9,7 @@ use clap::Shell;
use app::{RGArg, RGArgKind};
#[allow(dead_code)]
#[path = "crates/core/app.rs"]
#[path = "src/app.rs"]
mod app;
fn main() {
@@ -21,8 +21,7 @@ fn main() {
eprintln!(
"OUT_DIR environment variable not defined. \
Please file a bug: \
https://github.com/BurntSushi/ripgrep/issues/new"
);
https://github.com/BurntSushi/ripgrep/issues/new");
process::exit(1);
}
};
@@ -86,15 +85,13 @@ fn generate_man_page<P: AsRef<Path>>(outdir: P) -> io::Result<()> {
let githash = git_revision_hash();
let githash = githash.as_ref().map(|x| &**x);
tpl = tpl.replace("{VERSION}", &app::long_version(githash, false));
tpl = tpl.replace("{VERSION}", &app::long_version(githash));
File::create(&txt_path)?.write_all(tpl.as_bytes())?;
let result = process::Command::new("a2x")
.arg("--no-xmllint")
.arg("--doctype")
.arg("manpage")
.arg("--format")
.arg("manpage")
.arg("--doctype").arg("manpage")
.arg("--format").arg("manpage")
.arg(&txt_path)
.spawn()?
.wait()?;
@@ -117,7 +114,7 @@ fn formatted_options() -> io::Result<String> {
// ripgrep only has two positional arguments, and probably will only
// ever have two positional arguments, so we just hardcode them into
// the template.
if let app::RGArgKind::Positional { .. } = arg.kind {
if let app::RGArgKind::Positional{..} = arg.kind {
continue;
}
formatted.push(formatted_arg(&arg)?);
@@ -127,9 +124,7 @@ fn formatted_options() -> io::Result<String> {
fn formatted_arg(arg: &RGArg) -> io::Result<String> {
match arg.kind {
RGArgKind::Positional { .. } => {
panic!("unexpected positional argument")
}
RGArgKind::Positional{..} => panic!("unexpected positional argument"),
RGArgKind::Switch { long, short, multiple } => {
let mut out = vec![];
@@ -168,8 +163,7 @@ fn formatted_arg(arg: &RGArg) -> io::Result<String> {
}
fn formatted_doc_txt(arg: &RGArg) -> io::Result<String> {
let paragraphs: Vec<String> = arg
.doc_long
let paragraphs: Vec<String> = arg.doc_long
.replace("{", "&#123;")
.replace("}", r"&#125;")
.split("\n\n")

61
ci/before_deploy.sh Executable file
View File

@@ -0,0 +1,61 @@
#!/bin/bash
# package the build artifacts
set -ex
. "$(dirname $0)/utils.sh"
# Generate artifacts for release
mk_artifacts() {
if is_arm; then
cargo build --target "$TARGET" --release
else
# Technically, MUSL builds will force PCRE2 to get statically compiled,
# but we also want PCRE2 statically build for macOS binaries.
PCRE2_SYS_STATIC=1 cargo build --target "$TARGET" --release --features 'pcre2'
fi
}
mk_tarball() {
# When cross-compiling, use the right `strip` tool on the binary.
local gcc_prefix="$(gcc_prefix)"
# Create a temporary dir that contains our staging area.
# $tmpdir/$name is what eventually ends up as the deployed archive.
local tmpdir="$(mktemp -d)"
local name="${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}"
local staging="$tmpdir/$name"
mkdir -p "$staging"/{complete,doc}
# The deployment directory is where the final archive will reside.
# This path is known by the .travis.yml configuration.
local out_dir="$(pwd)/deployment"
mkdir -p "$out_dir"
# Find the correct (most recent) Cargo "out" directory. The out directory
# contains shell completion files and the man page.
local cargo_out_dir="$(cargo_out_dir "target/$TARGET")"
# Copy the ripgrep binary and strip it.
cp "target/$TARGET/release/rg" "$staging/rg"
"${gcc_prefix}strip" "$staging/rg"
# Copy the licenses and README.
cp {README.md,UNLICENSE,COPYING,LICENSE-MIT} "$staging/"
# Copy documentation and man page.
cp {CHANGELOG.md,FAQ.md,GUIDE.md} "$staging/doc/"
if command -V a2x 2>&1 > /dev/null; then
# The man page should only exist if we have asciidoc installed.
cp "$cargo_out_dir/rg.1" "$staging/doc/"
fi
# Copy shell completion files.
cp "$cargo_out_dir"/{rg.bash,rg.fish,_rg.ps1} "$staging/complete/"
cp complete/_rg "$staging/complete/"
(cd "$tmpdir" && tar czf "$out_dir/$name.tar.gz" "$name")
rm -rf "$tmpdir"
}
main() {
mk_artifacts
mk_tarball
}
main

View File

@@ -1,7 +1,6 @@
#!/bin/bash
set -e
D="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
# This script builds a binary dpkg for Debian based distros. It does not
# currently run in CI, and is instead run manually and the resulting dpkg is
@@ -24,13 +23,20 @@ fi
# the deb, which knows where to look.
DEPLOY_DIR=deployment/deb
OUT_DIR="$("$D"/cargo-out-dir target/debug/)"
mkdir -p "$DEPLOY_DIR"
cargo build
# Copy man page and shell completions.
cp "$OUT_DIR"/{rg.1,rg.bash,rg.fish} "$DEPLOY_DIR/"
cp complete/_rg "$DEPLOY_DIR/"
# Find and copy man page.
manpage="$(find ./target/debug -name rg.1 -print0 | xargs -0 ls -t | head -n1)"
cp "$manpage" "$DEPLOY_DIR/"
# Do the same for shell completions.
compbash="$(find ./target/debug -name rg.bash -print0 | xargs -0 ls -t | head -n1)"
cp "$compbash" "$DEPLOY_DIR/"
compfish="$(find ./target/debug -name rg.fish -print0 | xargs -0 ls -t | head -n1)"
cp "$compfish" "$DEPLOY_DIR/"
compzsh="complete/_rg"
cp "$compzsh" "$DEPLOY_DIR/"
# Since we're distributing the dpkg, we don't know whether the user will have
# PCRE2 installed, so just do a static build.

View File

@@ -1,19 +0,0 @@
#!/bin/bash
# Finds Cargo's `OUT_DIR` directory from the most recent build.
#
# This requires one parameter corresponding to the target directory
# to search for the build output.
if [ $# != 1 ]; then
echo "Usage: $(basename "$0") <target-dir>" >&2
exit 2
fi
# This works by finding the most recent stamp file, which is produced by
# every ripgrep build.
target_dir="$1"
find "$target_dir" -name ripgrep-stamp -print0 \
| xargs -0 ls -t \
| head -n1 \
| xargs dirname

View File

@@ -1,24 +0,0 @@
These are Docker images used for cross compilation in CI builds (or locally)
via the [Cross](https://github.com/rust-embedded/cross) tool.
The Cross tool actually provides its own Docker images, and all Docker images
in this directory are derived from one of them. We provide our own in order
to customize the environment. For example, we need to install some things like
`asciidoc` in order to generate man pages. We also install compression tools
like `xz` so that tests for the `-z/--search-zip` flag are run.
If you make a change to a Docker image, then you can re-build it. `cd` into the
directory containing the `Dockerfile` and run:
$ cd x86_64-unknown-linux-musl
$ ./build
At this point, subsequent uses of `cross` will now use your built image since
Docker prefers local images over remote images. In order to make these changes
stick, they need to be pushed to Docker Hub:
$ docker push burntsushi/cross:x86_64-unknown-linux-musl
Of course, only I (BurntSushi) can push to that location. To make `cross` use
a different location, then edit `Cross.toml` in the root of this repo to use
a different image name for the desired target.

View File

@@ -1,4 +0,0 @@
FROM rustembedded/cross:arm-unknown-linux-gnueabihf
COPY stage/ubuntu-install-packages /
RUN /ubuntu-install-packages

View File

@@ -1,5 +0,0 @@
#!/bin/sh
mkdir -p stage
cp ../../ubuntu-install-packages ./stage/
docker build -t burntsushi/cross:arm-unknown-linux-gnueabihf .

View File

@@ -1,4 +0,0 @@
FROM rustembedded/cross:i686-unknown-linux-gnu
COPY stage/ubuntu-install-packages /
RUN /ubuntu-install-packages

View File

@@ -1,5 +0,0 @@
#!/bin/sh
mkdir -p stage
cp ../../ubuntu-install-packages ./stage/
docker build -t burntsushi/cross:i686-unknown-linux-gnu .

View File

@@ -1,4 +0,0 @@
FROM rustembedded/cross:mips64-unknown-linux-gnuabi64
COPY stage/ubuntu-install-packages /
RUN /ubuntu-install-packages

View File

@@ -1,5 +0,0 @@
#!/bin/sh
mkdir -p stage
cp ../../ubuntu-install-packages ./stage/
docker build -t burntsushi/cross:mips64-unknown-linux-gnuabi64 .

View File

@@ -1,4 +0,0 @@
FROM rustembedded/cross:x86_64-unknown-linux-musl
COPY stage/ubuntu-install-packages /
RUN /ubuntu-install-packages

View File

@@ -1,5 +0,0 @@
#!/bin/sh
mkdir -p stage
cp ../../ubuntu-install-packages ./stage/
docker build -t burntsushi/cross:x86_64-unknown-linux-musl .

61
ci/install.sh Executable file
View File

@@ -0,0 +1,61 @@
#!/bin/bash
# install stuff needed for the `script` phase
# Where rustup gets installed.
export PATH="$PATH:$HOME/.cargo/bin"
set -ex
. "$(dirname $0)/utils.sh"
install_rustup() {
curl https://sh.rustup.rs -sSf \
| sh -s -- -y --default-toolchain="$TRAVIS_RUST_VERSION"
rustc -V
cargo -V
}
install_targets() {
if [ $(host) != "$TARGET" ]; then
rustup target add $TARGET
fi
}
install_osx_dependencies() {
if ! is_osx; then
return
fi
brew install asciidoc docbook-xsl
}
configure_cargo() {
local prefix=$(gcc_prefix)
if [ -n "${prefix}" ]; then
local gcc_suffix=
if [ -n "$GCC_VERSION" ]; then
gcc_suffix="-$GCC_VERSION"
fi
local gcc="${prefix}gcc${gcc_suffix}"
# information about the cross compiler
"${gcc}" -v
# tell cargo which linker to use for cross compilation
mkdir -p .cargo
cat >>.cargo/config <<EOF
[target.$TARGET]
linker = "${gcc}"
EOF
fi
}
main() {
install_osx_dependencies
install_rustup
install_targets
configure_cargo
}
main

View File

@@ -1,3 +0,0 @@
#!/bin/sh
brew install asciidoc docbook-xsl

50
ci/script.sh Executable file
View File

@@ -0,0 +1,50 @@
#!/bin/bash
# build, test and generate docs in this phase
set -ex
. "$(dirname $0)/utils.sh"
main() {
# Test a normal debug build.
if is_arm; then
cargo build --target "$TARGET" --verbose
else
cargo build --target "$TARGET" --verbose --all --features 'pcre2'
fi
# Show the output of the most recent build.rs stderr.
set +x
stderr="$(find "target/$TARGET/debug" -name stderr -print0 | xargs -0 ls -t | head -n1)"
if [ -s "$stderr" ]; then
echo "===== $stderr ====="
cat "$stderr"
echo "====="
fi
set -x
# sanity check the file type
file target/"$TARGET"/debug/rg
# Check that we've generated man page and other shell completions.
outdir="$(cargo_out_dir "target/$TARGET/debug")"
file "$outdir/rg.bash"
file "$outdir/rg.fish"
file "$outdir/_rg.ps1"
file "$outdir/rg.1"
# Apparently tests don't work on arm, so just bail now. I guess we provide
# ARM releases on a best effort basis?
if is_arm; then
return 0
fi
# Test that zsh completions are in sync with ripgrep's actual args.
"$(dirname "${0}")/test_complete.sh"
# Run tests for ripgrep and all sub-crates.
cargo test --target "$TARGET" --verbose --all --features 'pcre2'
}
main

View File

@@ -18,7 +18,7 @@ get_comp_args() {
main() {
local diff
local rg="${0:a:h}/../${TARGET_DIR:-target}/release/rg"
local rg="${0:a:h}/../target/${TARGET:-}/release/rg"
local _rg="${0:a:h}/../complete/_rg"
local -a help_args comp_args
@@ -44,7 +44,7 @@ main() {
# Occasionally we may have to handle some manually, however
help_args=( ${(f)"$(
$rg --help |
$rg -i -- '^\s+--?[a-z0-9]|--[a-z]' |
$rg -i -- '^\s+--?[a-z0-9]|--[imnp]' |
$rg -ior '$1' -- $'[\t /\"\'`.,](-[a-z0-9]|--[a-z0-9-]+)\\b' |
$rg -v -- --print0 | # False positives
sort -u

View File

@@ -1,6 +0,0 @@
#!/bin/sh
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
libxslt1-dev asciidoc docbook-xsl xsltproc libxml2-utils \
zsh xz-utils liblz4-tool musl-tools

View File

@@ -55,13 +55,6 @@ gcc_prefix() {
esac
}
is_musl() {
case "$TARGET" in
*-musl) return 0 ;;
*) return 1 ;;
esac
}
is_x86() {
case "$(architecture)" in
amd64|i386) return 0 ;;
@@ -69,13 +62,6 @@ is_x86() {
esac
}
is_x86_64() {
case "$(architecture)" in
amd64) return 0 ;;
*) return 1 ;;
esac
}
is_arm() {
case "$(architecture)" in
armhf) return 0 ;;
@@ -96,14 +82,3 @@ is_osx() {
*) return 1 ;;
esac
}
builder() {
if is_musl && is_x86_64; then
# cargo install cross
# To work around https://github.com/rust-embedded/cross/issues/357
cargo install --git https://github.com/rust-embedded/cross --force
echo "cross"
else
echo "cargo"
fi
}

View File

@@ -3,7 +3,7 @@
##
# zsh completion function for ripgrep
#
# Run ci/test-complete after building to ensure that the options supported by
# Run ci/test_complete.sh after building to ensure that the options supported by
# this function stay in synch with the `rg` binary.
#
# For convenience, a completion reference guide is included at the bottom of
@@ -43,7 +43,6 @@ _rg() {
+ '(exclusive)' # Misc. fully exclusive options
'(: * -)'{-h,--help}'[display help information]'
'(: * -)'{-V,--version}'[display version information]'
'(: * -)'--pcre2-version'[print the version of PCRE2 used by ripgrep, if available]'
+ '(buffered)' # buffering options
'--line-buffered[force line buffering]'
@@ -72,19 +71,11 @@ _rg() {
+ '(count)' # Counting options
{-c,--count}'[only show count of matching lines for each file]'
'--count-matches[only show count of individual matches for each file]'
'--include-zero[include files with zero matches in summary]'
+ '(encoding)' # Encoding options
{-E+,--encoding=}'[specify text encoding of files to search]: :_rg_encodings'
$no'--no-encoding[use default text encoding]'
+ '(engine)' # Engine choice options
'--engine=[select which regex engine to use]:when:((
default\:"use default engine"
pcre2\:"identical to --pcre2"
auto\:"identical to --auto-hybrid-regex"
))'
+ file # File-input options
'(1)*'{-f+,--file=}'[specify file containing patterns to search for]: :_files'
@@ -94,7 +85,7 @@ _rg() {
+ '(file-name)' # File-name options
{-H,--with-filename}'[show file name for matches]'
{-I,--no-filename}"[don't show file name for matches]"
"--no-filename[don't show file name for matches]"
+ '(file-system)' # File system options
"--one-file-system[don't descend into directories on other file systems]"
@@ -112,10 +103,6 @@ _rg() {
'*'{-g+,--glob=}'[include/exclude files matching specified glob]:glob'
'*--iglob=[include/exclude files matching specified case-insensitive glob]:glob'
+ '(glob-case-insensitive)' # File-glob case sensitivity options
'--glob-case-insensitive[treat -g/--glob patterns case insensitively]'
$no'--no-glob-case-insensitive[treat -g/--glob patterns case sensitively]'
+ '(heading)' # Heading options
'(pretty-vimgrep)--heading[show matches grouped by file name]'
"(pretty-vimgrep)--no-heading[don't show matches grouped by file name]"
@@ -124,10 +111,6 @@ _rg() {
'--hidden[search hidden files and directories]'
$no"--no-hidden[don't search hidden files and directories]"
+ '(hybrid)' # hybrid regex options
'--auto-hybrid-regex[dynamically use PCRE2 if necessary]'
$no"--no-auto-hybrid-regex[don't dynamically use PCRE2 if necessary]"
+ '(ignore)' # Ignore-file options
"(--no-ignore-global --no-ignore-parent --no-ignore-vcs --no-ignore-dot)--no-ignore[don't respect ignore files]"
$no'(--ignore-global --ignore-parent --ignore-vcs --ignore-dot)--ignore[respect ignore files]'
@@ -136,10 +119,6 @@ _rg() {
'--ignore-file-case-insensitive[process ignore files case insensitively]'
$no'--no-ignore-file-case-insensitive[process ignore files case sensitively]'
+ '(ignore-exclude)' # Local exclude (ignore)-file options
"--no-ignore-exclude[don't respect local exclude (ignore) files]"
$no'--ignore-exclude[respect local exclude (ignore) files]'
+ '(ignore-global)' # Global ignore-file options
"--no-ignore-global[don't respect global ignore files]"
$no'--ignore-global[respect global ignore files]'
@@ -152,18 +131,10 @@ _rg() {
"--no-ignore-vcs[don't respect version control ignore files]"
$no'--ignore-vcs[respect version control ignore files]'
+ '(require-git)' # git specific settings
"--no-require-git[don't require git repository to respect gitignore rules]"
$no'--require-git[require git repository to respect gitignore rules]'
+ '(ignore-dot)' # .ignore options
+ '(ignore-dot)' # .ignore-file options
"--no-ignore-dot[don't respect .ignore files]"
$no'--ignore-dot[respect .ignore files]'
+ '(ignore-files)' # custom global ignore file options
"--no-ignore-files[don't respect --ignore-file flags]"
$no'--ignore-files[respect --ignore-file files]'
+ '(json)' # JSON options
'--json[output results in JSON Lines format]'
$no"--no-json[don't output results in JSON Lines format]"
@@ -177,10 +148,6 @@ _rg() {
$no"--no-crlf[don't use CRLF as line terminator]"
'(text)--null-data[use NUL as line terminator]'
+ '(max-columns-preview)' # max column preview options
'--max-columns-preview[show preview for long lines (with -M)]'
$no"--no-max-columns-preview[don't show preview for long lines (with -M)]"
+ '(max-depth)' # Directory-depth options
'--max-depth=[specify max number of directories to descend]:number of directories'
'!--maxdepth=:number of directories'
@@ -260,8 +227,6 @@ _rg() {
+ '(text)' # Binary-search options
{-a,--text}'[search binary files as if they were text]'
"--binary[search binary files, don't print binary data]"
$no"--no-binary[don't search binary files]"
$no"(--null-data)--no-text[don't search binary files as if they were text]"
+ '(threads)' # Thread-count options
@@ -283,10 +248,6 @@ _rg() {
{-w,--word-regexp}'[only show matches surrounded by word boundaries]'
{-x,--line-regexp}'[only show matches surrounded by line boundaries]'
+ '(unicode)' # Unicode options
$no'--unicode[enable Unicode mode]'
'--no-unicode[disable Unicode mode]'
+ '(zip)' # Compression options
'(--pre)'{-z,--search-zip}'[search in compressed files]'
$no"--no-search-zip[don't search in compressed files]"
@@ -301,9 +262,7 @@ _rg() {
))'
'*--colors=[specify color and style settings]: :->colorspec'
'--context-separator=[specify string used to separate non-continuous context lines in output]:separator'
$no"--no-context-separator[don't print context separators]"
'--debug[show debug messages]'
'--trace[show more verbose debug messages]'
'--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size (bytes)'
"(1 stats)--files[show each file that would be searched (but don't search)]"
'*--ignore-file=[specify additional ignore file]:ignore file:_files'
@@ -323,7 +282,7 @@ _rg() {
'(--type-list)*: :_files'
)
# This is used with test-complete to verify that there are no options
# This is used with test_complete.sh to verify that there are no options
# listed in the help output that aren't also defined here
[[ $_RG_COMPLETE_LIST_ARGS == (1|t*|y*) ]] && {
print -rl - $args

View File

@@ -1,15 +0,0 @@
ripgrep core
------------
This is the core ripgrep crate. In particular, `main.rs` is where the `main`
function lives.
Most of ripgrep core consists of two things:
* The definition of the CLI interface, including docs for every flag.
* Glue code that brings the `grep-matcher`, `grep-regex`, `grep-searcher` and
`grep-printer` crates together to actually execute the search.
Currently, there are no plans to make ripgrep core available as an independent
library. However, much of the heavy lifting of ripgrep is done via its
constituent crates, which can be reused independent of ripgrep. Unfortunately,
there is no guide or tutorial to teach folks how to do this yet.

View File

@@ -1,38 +0,0 @@
use serde::de::Error;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use Glob;
impl Serialize for Glob {
fn serialize<S: Serializer>(
&self,
serializer: S,
) -> Result<S::Ok, S::Error> {
serializer.serialize_str(self.glob())
}
}
impl<'de> Deserialize<'de> for Glob {
fn deserialize<D: Deserializer<'de>>(
deserializer: D,
) -> Result<Self, D::Error> {
let glob = <&str as Deserialize>::deserialize(deserializer)?;
Glob::new(glob).map_err(D::Error::custom)
}
}
#[cfg(test)]
mod tests {
use Glob;
#[test]
fn glob_json_works() {
let test_glob = Glob::new("src/**/*.rs").unwrap();
let ser = serde_json::to_string(&test_glob).unwrap();
assert_eq!(ser, "\"src/**/*.rs\"");
let de: Glob = serde_json::from_str(&ser).unwrap();
assert_eq!(test_glob, de);
}
}

View File

@@ -1,246 +0,0 @@
/// This list represents the default file types that ripgrep ships with. In
/// general, any file format is fair game, although it should generally be
/// limited to reasonably popular open formats. For other cases, you can add
/// types to each invocation of ripgrep with the '--type-add' flag.
///
/// If you would like to add or improve this list, please file a PR:
/// https://github.com/BurntSushi/ripgrep
///
/// Please try to keep this list sorted lexicographically and wrapped to 79
/// columns (inclusive).
#[rustfmt::skip]
pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
("agda", &["*.agda", "*.lagda"]),
("aidl", &["*.aidl"]),
("amake", &["*.mk", "*.bp"]),
("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]),
("asm", &["*.asm", "*.s", "*.S"]),
("asp", &[
"*.aspx", "*.aspx.cs", "*.aspx.cs", "*.ascx", "*.ascx.cs", "*.ascx.vb",
]),
("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]),
("avro", &["*.avdl", "*.avpr", "*.avsc"]),
("awk", &["*.awk"]),
("bazel", &["*.bzl", "WORKSPACE", "BUILD", "BUILD.bazel"]),
("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
("brotli", &["*.br"]),
("buildstream", &["*.bst"]),
("bzip2", &["*.bz2", "*.tbz2"]),
("c", &["*.[chH]", "*.[chH].in", "*.cats"]),
("cabal", &["*.cabal"]),
("cbor", &["*.cbor"]),
("ceylon", &["*.ceylon"]),
("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
("cmake", &["*.cmake", "CMakeLists.txt"]),
("coffeescript", &["*.coffee"]),
("config", &["*.cfg", "*.conf", "*.config", "*.ini"]),
("coq", &["*.v"]),
("cpp", &[
"*.[ChH]", "*.cc", "*.[ch]pp", "*.[ch]xx", "*.hh", "*.inl",
"*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in",
]),
("creole", &["*.creole"]),
("crystal", &["Projectfile", "*.cr"]),
("cs", &["*.cs"]),
("csharp", &["*.cs"]),
("cshtml", &["*.cshtml"]),
("css", &["*.css", "*.scss"]),
("csv", &["*.csv"]),
("cython", &["*.pyx", "*.pxi", "*.pxd"]),
("d", &["*.d"]),
("dart", &["*.dart"]),
("dhall", &["*.dhall"]),
("diff", &["*.patch", "*.diff"]),
("docker", &["*Dockerfile*"]),
("edn", &["*.edn"]),
("elisp", &["*.el"]),
("elixir", &["*.ex", "*.eex", "*.exs"]),
("elm", &["*.elm"]),
("erb", &["*.erb"]),
("erlang", &["*.erl", "*.hrl"]),
("fidl", &["*.fidl"]),
("fish", &["*.fish"]),
("fortran", &[
"*.f", "*.F", "*.f77", "*.F77", "*.pfo",
"*.f90", "*.F90", "*.f95", "*.F95",
]),
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
("gap", &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]),
("gn", &["*.gn", "*.gni"]),
("go", &["*.go"]),
("gradle", &["*.gradle"]),
("groovy", &["*.groovy", "*.gradle"]),
("gzip", &["*.gz", "*.tgz"]),
("h", &["*.h", "*.hpp"]),
("haml", &["*.haml"]),
("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]),
("hbs", &["*.hbs"]),
("hs", &["*.hs", "*.lhs"]),
("html", &["*.htm", "*.html", "*.ejs"]),
("idris", &["*.idr", "*.lidr"]),
("java", &["*.java", "*.jsp", "*.jspx", "*.properties"]),
("jinja", &["*.j2", "*.jinja", "*.jinja2"]),
("jl", &["*.jl"]),
("js", &["*.js", "*.jsx", "*.vue"]),
("json", &["*.json", "composer.lock"]),
("jsonl", &["*.jsonl"]),
("julia", &["*.jl"]),
("jupyter", &["*.ipynb", "*.jpynb"]),
("k", &["*.k"]),
("kotlin", &["*.kt", "*.kts"]),
("less", &["*.less"]),
("license", &[
// General
"COPYING", "COPYING[.-]*",
"COPYRIGHT", "COPYRIGHT[.-]*",
"EULA", "EULA[.-]*",
"licen[cs]e", "licen[cs]e.*",
"LICEN[CS]E", "LICEN[CS]E[.-]*", "*[.-]LICEN[CS]E*",
"NOTICE", "NOTICE[.-]*",
"PATENTS", "PATENTS[.-]*",
"UNLICEN[CS]E", "UNLICEN[CS]E[.-]*",
// GPL (gpl.txt, etc.)
"agpl[.-]*",
"gpl[.-]*",
"lgpl[.-]*",
// Other license-specific (APACHE-2.0.txt, etc.)
"AGPL-*[0-9]*",
"APACHE-*[0-9]*",
"BSD-*[0-9]*",
"CC-BY-*",
"GFDL-*[0-9]*",
"GNU-*[0-9]*",
"GPL-*[0-9]*",
"LGPL-*[0-9]*",
"MIT-*[0-9]*",
"MPL-*[0-9]*",
"OFL-*[0-9]*",
]),
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
("lock", &["*.lock", "package-lock.json"]),
("log", &["*.log"]),
("lua", &["*.lua"]),
("lz4", &["*.lz4"]),
("lzma", &["*.lzma"]),
("m4", &["*.ac", "*.m4"]),
("make", &[
"[Gg][Nn][Uu]makefile", "[Mm]akefile",
"[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am",
"[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in",
"*.mk", "*.mak"
]),
("mako", &["*.mako", "*.mao"]),
("man", &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]),
("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
("matlab", &["*.m"]),
("md", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
("mk", &["mkfile"]),
("ml", &["*.ml"]),
("msbuild", &[
"*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets",
]),
("nim", &["*.nim", "*.nimf", "*.nimble", "*.nims"]),
("nix", &["*.nix"]),
("objc", &["*.h", "*.m"]),
("objcpp", &["*.h", "*.mm"]),
("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
("org", &["*.org", "*.org_archive"]),
("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]),
("pdf", &["*.pdf"]),
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]),
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
("pod", &["*.pod"]),
("postscript", &["*.eps", "*.ps"]),
("protobuf", &["*.proto"]),
("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]),
("puppet", &["*.erb", "*.pp", "*.rb"]),
("purs", &["*.purs"]),
("py", &["*.py"]),
("qmake", &["*.pro", "*.pri", "*.prf"]),
("qml", &["*.qml"]),
("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
("rdoc", &["*.rdoc"]),
("readme", &["README*", "*README"]),
("robot", &["*.robot"]),
("rst", &["*.rst"]),
("ruby", &["Gemfile", "*.gemspec", ".irbrc", "Rakefile", "*.rb"]),
("rust", &["*.rs"]),
("sass", &["*.sass", "*.scss"]),
("scala", &["*.scala", "*.sbt"]),
("sh", &[
// Portable/misc. init files
".login", ".logout", ".profile", "profile",
// bash-specific init files
".bash_login", "bash_login",
".bash_logout", "bash_logout",
".bash_profile", "bash_profile",
".bashrc", "bashrc", "*.bashrc",
// csh-specific init files
".cshrc", "*.cshrc",
// ksh-specific init files
".kshrc", "*.kshrc",
// tcsh-specific init files
".tcshrc",
// zsh-specific init files
".zshenv", "zshenv",
".zlogin", "zlogin",
".zlogout", "zlogout",
".zprofile", "zprofile",
".zshrc", "zshrc",
// Extensions
"*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh",
]),
("slim", &["*.skim", "*.slim", "*.slime"]),
("smarty", &["*.tpl"]),
("sml", &["*.sml", "*.sig"]),
("soy", &["*.soy"]),
("spark", &["*.spark"]),
("spec", &["*.spec"]),
("sql", &["*.sql", "*.psql"]),
("stylus", &["*.styl"]),
("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
("svg", &["*.svg"]),
("swift", &["*.swift"]),
("swig", &["*.def", "*.i"]),
("systemd", &[
"*.automount", "*.conf", "*.device", "*.link", "*.mount", "*.path",
"*.scope", "*.service", "*.slice", "*.socket", "*.swap", "*.target",
"*.timer",
]),
("taskpaper", &["*.taskpaper"]),
("tcl", &["*.tcl"]),
("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]),
("textile", &["*.textile"]),
("tf", &["*.tf"]),
("thrift", &["*.thrift"]),
("toml", &["*.toml", "Cargo.lock"]),
("ts", &["*.ts", "*.tsx"]),
("twig", &["*.twig"]),
("txt", &["*.txt"]),
("typoscript", &["*.typoscript", "*.ts"]),
("vala", &["*.vala"]),
("vb", &["*.vb"]),
("verilog", &["*.v", "*.vh", "*.sv", "*.svh"]),
("vhdl", &["*.vhd", "*.vhdl"]),
("vim", &["*.vim"]),
("vimscript", &["*.vim"]),
("webidl", &["*.idl", "*.webidl", "*.widl"]),
("wiki", &["*.mediawiki", "*.wiki"]),
("xml", &[
"*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb",
"*.rng", "*.sch", "*.xhtml",
]),
("xz", &["*.xz", "*.txz"]),
("yacc", &["*.y"]),
("yaml", &["*.yaml", "*.yml"]),
("zig", &["*.zig"]),
("zsh", &[
".zshenv", "zshenv",
".zlogin", "zlogin",
".zlogout", "zlogout",
".zprofile", "zprofile",
".zshrc", "zshrc",
"*.zsh",
]),
("zstd", &["*.zst", "*.zstd"]),
];

View File

@@ -1,125 +0,0 @@
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
use grep_matcher::{Match, Matcher, NoError};
use regex_syntax::hir::Hir;
use error::Error;
use matcher::RegexCaptures;
/// A matcher for an alternation of literals.
///
/// Ideally, this optimization would be pushed down into the regex engine, but
/// making this work correctly there would require quite a bit of refactoring.
/// Moreover, doing it one layer above lets us do thing like, "if we
/// specifically only want to search for literals, then don't bother with
/// regex parsing at all."
#[derive(Clone, Debug)]
pub struct MultiLiteralMatcher {
/// The Aho-Corasick automaton.
ac: AhoCorasick,
}
impl MultiLiteralMatcher {
/// Create a new multi-literal matcher from the given literals.
pub fn new<B: AsRef<[u8]>>(
literals: &[B],
) -> Result<MultiLiteralMatcher, Error> {
let ac = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostFirst)
.auto_configure(literals)
.build_with_size::<usize, _, _>(literals)
.map_err(Error::regex)?;
Ok(MultiLiteralMatcher { ac })
}
}
impl Matcher for MultiLiteralMatcher {
type Captures = RegexCaptures;
type Error = NoError;
fn find_at(
&self,
haystack: &[u8],
at: usize,
) -> Result<Option<Match>, NoError> {
match self.ac.find(&haystack[at..]) {
None => Ok(None),
Some(m) => Ok(Some(Match::new(at + m.start(), at + m.end()))),
}
}
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
Ok(RegexCaptures::simple())
}
fn capture_count(&self) -> usize {
1
}
fn capture_index(&self, _: &str) -> Option<usize> {
None
}
fn captures_at(
&self,
haystack: &[u8],
at: usize,
caps: &mut RegexCaptures,
) -> Result<bool, NoError> {
caps.set_simple(None);
let mat = self.find_at(haystack, at)?;
caps.set_simple(mat);
Ok(mat.is_some())
}
// We specifically do not implement other methods like find_iter. Namely,
// the iter methods are guaranteed to be correct by virtue of implementing
// find_at above.
}
/// Alternation literals checks if the given HIR is a simple alternation of
/// literals, and if so, returns them. Otherwise, this returns None.
pub fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
use regex_syntax::hir::{HirKind, Literal};
// This is pretty hacky, but basically, if `is_alternation_literal` is
// true, then we can make several assumptions about the structure of our
// HIR. This is what justifies the `unreachable!` statements below.
if !expr.is_alternation_literal() {
return None;
}
let alts = match *expr.kind() {
HirKind::Alternation(ref alts) => alts,
_ => return None, // one literal isn't worth it
};
let extendlit = |lit: &Literal, dst: &mut Vec<u8>| match *lit {
Literal::Unicode(c) => {
let mut buf = [0; 4];
dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
}
Literal::Byte(b) => {
dst.push(b);
}
};
let mut lits = vec![];
for alt in alts {
let mut lit = vec![];
match *alt.kind() {
HirKind::Empty => {}
HirKind::Literal(ref x) => extendlit(x, &mut lit),
HirKind::Concat(ref exprs) => {
for e in exprs {
match *e.kind() {
HirKind::Literal(ref x) => extendlit(x, &mut lit),
_ => unreachable!("expected literal, got {:?}", e),
}
}
}
_ => unreachable!("expected literal or concat, got {:?}", alt),
}
lits.push(lit);
}
Some(lits)
}

View File

@@ -41,14 +41,6 @@ configuration file. The file can specify one shell argument per line. Lines
starting with *#* are ignored. For more details, see the man page or the
*README*.
ripgrep will automatically detect if stdin exists and search stdin for a regex
pattern, e.g. *ls | rg foo*. In some environments, stdin may exist when it
shouldn't. To turn off stdin detection explicitly specify the directory to
search, e.g. *rg foo ./*.
Tip: to disable all smart filtering and make ripgrep behave a bit more like
classical grep, use *rg -uuu*.
REGEX SYNTAX
------------
@@ -109,76 +101,6 @@ was found. In summary:
unable to read a file).
AUTOMATIC FILTERING
-------------------
TL;DR - To disable automatic filtering, use 'rg -uuu'.
One of ripgrep's most important features is its automatic smart filtering.
It is the most apparent differentiating feature between ripgrep and other tools
like 'grep'. As such, its behavior may be surprising to users that aren't
expecting it.
ripgrep does four types of filtering automatically:
1. Files and directories that match ignore rules are not searched.
2. Hidden files and directories are not searched.
3. Binary files (files with a 'NUL' byte) are not searched.
4. Symbolic links are not followed.
The first type of filtering is the most sophisticated. ripgrep will attempt to
respect your gitignore rules as faithfully as possible. In particular, this
includes the following:
* Any global rules, e.g., in '$HOME/.config/git/ignore'.
* Any rules in '.gitignore'.
* Any local rules, e.g., in '.git/info/exclude'.
In some cases, ripgrep and git will not always be in sync in terms of which
files are ignored. For example, a file that is ignored via '.gitignore' but is
tracked by git would not be searched by ripgrep even though git tracks it. This
is unlikely to ever be fixed. Instead, you should either make sure your exclude
rules match the files you track precisely, or otherwise use 'git grep' for
search.
Additional ignore rules can be provided outside of a git context:
* Any rules in '.ignore'.
* Any rules in '.rgignore'.
* Any rules in files specified with the '--ignore-file' flag.
The precedence of ignore rules is as follows, with later items overriding
earlier items:
* Files given by '--ignore-file'.
* Global gitignore rules, e.g., from '$HOME/.config/git/ignore'.
* Local rules from '.git/info/exclude'.
* Rules from '.gitignore'.
* Rules from '.ignore'.
* Rules from '.rgignore'.
So for example, if 'foo' were in a '.gitignore' and '!foo' were in an
'.rgignore', then 'foo' would not be ignored since '.rgignore' takes precedence
over '.gitignore'.
Each of the types of filtering can be configured via command line flags:
* There are several flags starting with '--no-ignore' that toggle which,
if any, ignore rules are respected. '--no-ignore' by itself will disable
all of them.
* '--hidden' will force ripgrep to search hidden files and directories.
* '--binary' will force ripgrep to search binary files.
* '-L/--follow' will force ripgrep to follow symlinks.
As a special short hand, the `-u` flag can be specified up to three times. Each
additional time incrementally decreases filtering:
* '-u' is equivalent to '--no-ignore'.
* '-uu' is equivalent to '--no-ignore --hidden'.
* '-uuu' is equivalent to '--no-ignore --hidden --binary'.
In particular, 'rg -uuu' should search the same exact content as 'grep -r'.
CONFIGURATION FILES
-------------------
ripgrep supports reading configuration files that change ripgrep's default
@@ -218,16 +140,16 @@ would behave identically to the following command
same with using globs
--glob=!.git
--glob=!git/*
or
--glob
!.git
!git/*
would behave identically to the following command
rg --glob '!.git' foo
rg --glob '!git/*' foo
ripgrep also provides a flag, *--no-config*, that when present will suppress
any and all support for configuration. This includes any future support
@@ -267,21 +189,6 @@ file that is simultaneously truncated. This behavior can be avoided by passing
the *--no-mmap* flag which will forcefully disable the use of memory maps in
all cases.
ripgrep may use a large amount of memory depending on a few factors. Firstly,
if ripgrep uses parallelism for search (the default), then the entire output
for each individual file is buffered into memory in order to prevent
interleaving matches in the output. To avoid this, you can disable parallelism
with the *-j1* flag. Secondly, ripgrep always needs to have at least a single
line in memory in order to execute a search. A file with a very long line can
thus cause ripgrep to use a lot of memory. Generally, this only occurs when
searching binary data with the *-a* flag enabled. (When the *-a* flag isn't
enabled, ripgrep will replace all NUL bytes with line terminators, which
typically prevents exorbitant memory usage.) Thirdly, when ripgrep searches
a large file using a memory map, the process will report its resident memory
usage as the size of the file. However, this does not mean ripgrep actually
needed to use that much memory; the operating system will generally handle this
for you.
VERSION
-------

View File

@@ -1,6 +1,6 @@
[package]
name = "globset"
version = "0.4.5" #:version
version = "0.4.2" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Cross platform single glob and glob set matching. Glob set matching is the
@@ -8,8 +8,8 @@ process of matching one or more glob patterns against a single candidate path
simultaneously, and returning all of the globs that matched.
"""
documentation = "https://docs.rs/globset"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/globset"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/globset"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/globset"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/globset"
readme = "README.md"
keywords = ["regex", "glob", "multiple", "set", "pattern"]
license = "Unlicense/MIT"
@@ -20,17 +20,13 @@ bench = false
[dependencies]
aho-corasick = "0.7.3"
bstr = { version = "0.2.0", default-features = false, features = ["std"] }
bstr = { version = "0.1.2", default-features = false, features = ["std"] }
fnv = "1.0.6"
log = "0.4.5"
regex = "1.1.5"
serde = { version = "1.0.104", optional = true }
[dev-dependencies]
glob = "0.3.0"
lazy_static = "1"
serde_json = "1.0.45"
glob = "0.2.11"
[features]
simd-accel = []
serde1 = ["serde"]

View File

@@ -29,10 +29,6 @@ and this to your crate root:
extern crate globset;
```
### Features
* `serde1`: Enables implementing Serde traits on the `Glob` type.
### Example: one glob
This example shows how to match a single glob against a single file path.

View File

@@ -6,9 +6,14 @@ tool itself, see the benchsuite directory.
extern crate glob;
extern crate globset;
#[macro_use]
extern crate lazy_static;
extern crate regex;
extern crate test;
use std::ffi::OsStr;
use std::path::Path;
use globset::{Candidate, Glob, GlobMatcher, GlobSet, GlobSetBuilder};
const EXT: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";

View File

@@ -2,13 +2,13 @@ use std::fmt;
use std::hash;
use std::iter;
use std::ops::{Deref, DerefMut};
use std::path::{is_separator, Path};
use std::path::{Path, is_separator};
use std::str;
use regex;
use regex::bytes::Regex;
use {new_regex, Candidate, Error, ErrorKind};
use {Candidate, Error, ErrorKind, new_regex};
/// Describes a matching strategy for a particular pattern.
///
@@ -85,16 +85,16 @@ pub struct Glob {
}
impl PartialEq for Glob {
fn eq(&self, other: &Glob) -> bool {
self.glob == other.glob && self.opts == other.opts
}
fn eq(&self, other: &Glob) -> bool {
self.glob == other.glob && self.opts == other.opts
}
}
impl hash::Hash for Glob {
fn hash<H: hash::Hasher>(&self, state: &mut H) {
self.glob.hash(state);
self.opts.hash(state);
}
fn hash<H: hash::Hasher>(&self, state: &mut H) {
self.glob.hash(state);
self.opts.hash(state);
}
}
impl fmt::Display for Glob {
@@ -103,14 +103,6 @@ impl fmt::Display for Glob {
}
}
impl str::FromStr for Glob {
type Err = Error;
fn from_str(glob: &str) -> Result<Self, Self::Err> {
Self::new(glob)
}
}
/// A matcher for a single pattern.
#[derive(Clone, Debug)]
pub struct GlobMatcher {
@@ -128,12 +120,7 @@ impl GlobMatcher {
/// Tests whether the given path matches this pattern or not.
pub fn is_match_candidate(&self, path: &Candidate) -> bool {
self.re.is_match(&path.path)
}
/// Returns the `Glob` used to compile this matcher.
pub fn glob(&self) -> &Glob {
&self.pat
self.re.is_match(path.path.as_bytes())
}
}
@@ -158,7 +145,7 @@ impl GlobStrategic {
/// Tests whether the given path matches this pattern or not.
fn is_match_candidate(&self, candidate: &Candidate) -> bool {
let byte_path = &*candidate.path;
let byte_path = candidate.path.as_bytes();
match self.strategy {
MatchStrategy::Literal(ref lit) => lit.as_bytes() == byte_path,
@@ -227,15 +214,11 @@ struct Tokens(Vec<Token>);
impl Deref for Tokens {
type Target = Vec<Token>;
fn deref(&self) -> &Vec<Token> {
&self.0
}
fn deref(&self) -> &Vec<Token> { &self.0 }
}
impl DerefMut for Tokens {
fn deref_mut(&mut self) -> &mut Vec<Token> {
&mut self.0
}
fn deref_mut(&mut self) -> &mut Vec<Token> { &mut self.0 }
}
#[derive(Clone, Debug, Eq, PartialEq)]
@@ -246,7 +229,10 @@ enum Token {
RecursivePrefix,
RecursiveSuffix,
RecursiveZeroOrMore,
Class { negated: bool, ranges: Vec<(char, char)> },
Class {
negated: bool,
ranges: Vec<(char, char)>,
},
Alternates(Vec<Tokens>),
}
@@ -258,9 +244,12 @@ impl Glob {
/// Returns a matcher for this pattern.
pub fn compile_matcher(&self) -> GlobMatcher {
let re =
new_regex(&self.re).expect("regex compilation shouldn't fail");
GlobMatcher { pat: self.clone(), re: re }
let re = new_regex(&self.re)
.expect("regex compilation shouldn't fail");
GlobMatcher {
pat: self.clone(),
re: re,
}
}
/// Returns a strategic matcher.
@@ -271,9 +260,13 @@ impl Glob {
#[cfg(test)]
fn compile_strategic_matcher(&self) -> GlobStrategic {
let strategy = MatchStrategy::new(self);
let re =
new_regex(&self.re).expect("regex compilation shouldn't fail");
GlobStrategic { strategy: strategy, pat: self.clone(), re: re }
let re = new_regex(&self.re)
.expect("regex compilation shouldn't fail");
GlobStrategic {
strategy: strategy,
pat: self.clone(),
re: re,
}
}
/// Returns the original glob pattern used to build this pattern.
@@ -531,7 +524,7 @@ impl Glob {
| Token::RecursiveZeroOrMore => {
return None;
}
Token::Class { .. } | Token::Alternates(..) => {
Token::Class{..} | Token::Alternates(..) => {
// We *could* be a little smarter here, but either one
// of these is going to prevent our literal optimizations
// anyway, so give up.
@@ -568,7 +561,10 @@ impl<'a> GlobBuilder<'a> {
///
/// The pattern is not compiled until `build` is called.
pub fn new(glob: &'a str) -> GlobBuilder<'a> {
GlobBuilder { glob: glob, opts: GlobOptions::default() }
GlobBuilder {
glob: glob,
opts: GlobOptions::default(),
}
}
/// Parses and builds the pattern.
@@ -866,22 +862,25 @@ impl<'a> Parser<'a> {
return Ok(());
}
}
let is_suffix = match self.peek() {
None => {
assert!(self.bump().is_none());
true
}
Some(',') | Some('}') if self.stack.len() >= 2 => true,
Some(c) if is_separator(c) => {
assert!(self.bump().map(is_separator).unwrap_or(false));
false
}
_ => {
self.push_token(Token::ZeroOrMore)?;
self.push_token(Token::ZeroOrMore)?;
return Ok(());
}
};
let is_suffix =
match self.peek() {
None => {
assert!(self.bump().is_none());
true
}
Some(',') | Some('}') if self.stack.len() >= 2 => {
true
}
Some(c) if is_separator(c) => {
assert!(self.bump().map(is_separator).unwrap_or(false));
false
}
_ => {
self.push_token(Token::ZeroOrMore)?;
self.push_token(Token::ZeroOrMore)?;
return Ok(());
}
};
match self.pop_token()? {
Token::RecursivePrefix => {
self.push_token(Token::RecursivePrefix)?;
@@ -961,10 +960,7 @@ impl<'a> Parser<'a> {
// invariant: in_range is only set when there is
// already at least one character seen.
add_to_last_range(
&self.glob,
ranges.last_mut().unwrap(),
c,
)?;
&self.glob, ranges.last_mut().unwrap(), c)?;
} else {
ranges.push((c, c));
}
@@ -978,7 +974,10 @@ impl<'a> Parser<'a> {
// it as a literal.
ranges.push(('-', '-'));
}
self.push_token(Token::Class { negated: negated, ranges: ranges })
self.push_token(Token::Class {
negated: negated,
ranges: ranges,
})
}
fn bump(&mut self) -> Option<char> {
@@ -1007,9 +1006,9 @@ fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
#[cfg(test)]
mod tests {
use super::Token::*;
use {GlobSetBuilder, ErrorKind};
use super::{Glob, GlobBuilder, Token};
use {ErrorKind, GlobSetBuilder};
use super::Token::*;
#[derive(Clone, Copy, Debug, Default)]
struct Options {
@@ -1025,7 +1024,7 @@ mod tests {
let pat = Glob::new($pat).unwrap();
assert_eq!($tokens, pat.tokens.0);
}
};
}
}
macro_rules! syntaxerr {
@@ -1035,7 +1034,7 @@ mod tests {
let err = Glob::new($pat).unwrap_err();
assert_eq!(&$err, err.kind());
}
};
}
}
macro_rules! toregex {
@@ -1117,9 +1116,7 @@ mod tests {
};
}
fn s(string: &str) -> String {
string.to_string()
}
fn s(string: &str) -> String { string.to_string() }
fn class(s: char, e: char) -> Token {
Class { negated: false, ranges: vec![(s, e)] }
@@ -1143,20 +1140,16 @@ mod tests {
syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
syntax!(seq1, "*", vec![ZeroOrMore]);
syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
syntax!(
seq3,
"*a*b*",
vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,]
);
syntax!(seq3, "*a*b*", vec![
ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,
]);
syntax!(rseq1, "**", vec![RecursivePrefix]);
syntax!(rseq2, "**/", vec![RecursivePrefix]);
syntax!(rseq3, "/**", vec![RecursiveSuffix]);
syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
syntax!(
rseq5,
"a/**/b",
vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),]
);
syntax!(rseq5, "a/**/b", vec![
Literal('a'), RecursiveZeroOrMore, Literal('b'),
]);
syntax!(cls1, "[a]", vec![class('a', 'a')]);
syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
@@ -1168,11 +1161,9 @@ mod tests {
syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
syntax!(
cls12,
"[-a-z-]",
vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),]
);
syntax!(cls12, "[-a-z-]", vec![
rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),
]);
syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
syntax!(cls14, "[--z]", vec![class('-', 'z')]);
syntax!(cls15, "[ --]", vec![class(' ', '-')]);
@@ -1190,14 +1181,26 @@ mod tests {
syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
const CASEI: Options =
Options { casei: Some(true), litsep: None, bsesc: None };
const SLASHLIT: Options =
Options { casei: None, litsep: Some(true), bsesc: None };
const NOBSESC: Options =
Options { casei: None, litsep: None, bsesc: Some(false) };
const BSESC: Options =
Options { casei: None, litsep: None, bsesc: Some(true) };
const CASEI: Options = Options {
casei: Some(true),
litsep: None,
bsesc: None,
};
const SLASHLIT: Options = Options {
casei: None,
litsep: Some(true),
bsesc: None,
};
const NOBSESC: Options = Options {
casei: None,
litsep: None,
bsesc: Some(false),
};
const BSESC: Options = Options {
casei: None,
litsep: None,
bsesc: Some(true),
};
toregex!(re_casei, "a", "(?i)^a$", &CASEI);
@@ -1295,11 +1298,8 @@ mod tests {
matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt");
matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
matches!(
matchpat7,
"*some/path/to/hello.txt",
"a/bigger/some/path/to/hello.txt"
);
matches!(matchpat7, "*some/path/to/hello.txt",
"a/bigger/some/path/to/hello.txt");
matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_");
@@ -1362,44 +1362,28 @@ mod tests {
nmatches!(matchnot15, "[!-]", "-");
nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
nmatches!(
matchnot18,
"*some/path/to/hello.txt",
"some/path/to/hello.txt-and-then-some"
);
nmatches!(
matchnot19,
"*some/path/to/hello.txt",
"some/other/path/to/hello.txt"
);
nmatches!(matchnot18, "*some/path/to/hello.txt",
"some/path/to/hello.txt-and-then-some");
nmatches!(matchnot19, "*some/path/to/hello.txt",
"some/other/path/to/hello.txt");
nmatches!(matchnot20, "a", "foo/a");
nmatches!(matchnot21, "./foo", "foo");
nmatches!(matchnot22, "**/foo", "foofoo");
nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
nmatches!(
matchnot26,
"**/m4/ltoptions.m4",
"csharp/src/packages/repositories.config",
SLASHLIT
);
nmatches!(matchnot26, "**/m4/ltoptions.m4",
"csharp/src/packages/repositories.config", SLASHLIT);
nmatches!(matchnot27, "a[^0-9]b", "a0b");
nmatches!(matchnot28, "a[^0-9]b", "a9b");
nmatches!(matchnot29, "[^-]", "-");
nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
nmatches!(
matchrec31,
"some/*/needle.txt",
"some/one/two/needle.txt",
SLASHLIT
);
"some/*/needle.txt", "some/one/two/needle.txt", SLASHLIT);
nmatches!(
matchrec32,
"some/*/needle.txt",
"some/one/two/three/needle.txt",
SLASHLIT
);
"some/*/needle.txt", "some/one/two/three/needle.txt", SLASHLIT);
macro_rules! extract {
($which:ident, $name:ident, $pat:expr, $expect:expr) => {
@@ -1461,27 +1445,19 @@ mod tests {
literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
literal!(extract_lit8, "**/foo/bar", None);
basetokens!(
extract_basetoks1,
"**/foo",
Some(&*vec![Literal('f'), Literal('o'), Literal('o'),])
);
basetokens!(extract_basetoks1, "**/foo", Some(&*vec![
Literal('f'), Literal('o'), Literal('o'),
]));
basetokens!(extract_basetoks2, "**/foo", None, CASEI);
basetokens!(
extract_basetoks3,
"**/foo",
Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]),
SLASHLIT
);
basetokens!(extract_basetoks3, "**/foo", Some(&*vec![
Literal('f'), Literal('o'), Literal('o'),
]), SLASHLIT);
basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
basetokens!(extract_basetoks5, "*foo", None);
basetokens!(extract_basetoks6, "**/fo*o", None);
basetokens!(
extract_basetoks7,
"**/fo*o",
Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]),
SLASHLIT
);
basetokens!(extract_basetoks7, "**/fo*o", Some(&*vec![
Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),
]), SLASHLIT);
ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
ext!(extract_ext2, "**/*.rs.bak", None);

View File

@@ -110,9 +110,6 @@ extern crate fnv;
extern crate log;
extern crate regex;
#[cfg(feature = "serde1")]
extern crate serde;
use std::borrow::Cow;
use std::collections::{BTreeMap, HashMap};
use std::error::Error as StdError;
@@ -122,19 +119,16 @@ use std::path::Path;
use std::str;
use aho_corasick::AhoCorasick;
use bstr::{ByteSlice, ByteVec, B};
use bstr::{B, BStr, BString};
use regex::bytes::{Regex, RegexBuilder, RegexSet};
use pathutil::{file_name, file_name_ext, normalize_path};
use glob::MatchStrategy;
pub use glob::{Glob, GlobBuilder, GlobMatcher};
use pathutil::{file_name, file_name_ext, normalize_path};
mod glob;
mod pathutil;
#[cfg(feature = "serde1")]
mod serde_impl;
/// Represents an error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Error {
@@ -208,7 +202,9 @@ impl ErrorKind {
ErrorKind::UnclosedClass => {
"unclosed character class; missing ']'"
}
ErrorKind::InvalidRange(_, _) => "invalid character range",
ErrorKind::InvalidRange(_, _) => {
"invalid character range"
}
ErrorKind::UnopenedAlternates => {
"unopened alternate group; missing '{' \
(maybe escape '}' with '[}]'?)"
@@ -220,7 +216,9 @@ impl ErrorKind {
ErrorKind::NestedAlternates => {
"nested alternate groups are not allowed"
}
ErrorKind::DanglingEscape => "dangling '\\'",
ErrorKind::DanglingEscape => {
"dangling '\\'"
}
ErrorKind::Regex(ref err) => err,
ErrorKind::__Nonexhaustive => unreachable!(),
}
@@ -247,7 +245,9 @@ impl fmt::Display for ErrorKind {
| ErrorKind::UnclosedAlternates
| ErrorKind::NestedAlternates
| ErrorKind::DanglingEscape
| ErrorKind::Regex(_) => write!(f, "{}", self.description()),
| ErrorKind::Regex(_) => {
write!(f, "{}", self.description())
}
ErrorKind::InvalidRange(s, e) => {
write!(f, "invalid range; '{}' > '{}'", s, e)
}
@@ -262,20 +262,21 @@ fn new_regex(pat: &str) -> Result<Regex, Error> {
.size_limit(10 * (1 << 20))
.dfa_size_limit(10 * (1 << 20))
.build()
.map_err(|err| Error {
glob: Some(pat.to_string()),
kind: ErrorKind::Regex(err.to_string()),
.map_err(|err| {
Error {
glob: Some(pat.to_string()),
kind: ErrorKind::Regex(err.to_string()),
}
})
}
fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
where
S: AsRef<str>,
I: IntoIterator<Item = S>,
{
RegexSet::new(pats).map_err(|err| Error {
glob: None,
kind: ErrorKind::Regex(err.to_string()),
where S: AsRef<str>, I: IntoIterator<Item=S> {
RegexSet::new(pats).map_err(|err| {
Error {
glob: None,
kind: ErrorKind::Regex(err.to_string()),
}
})
}
@@ -293,7 +294,10 @@ impl GlobSet {
/// Create an empty `GlobSet`. An empty set matches nothing.
#[inline]
pub fn empty() -> GlobSet {
GlobSet { len: 0, strats: vec![] }
GlobSet {
len: 0,
strats: vec![],
}
}
/// Returns true if this set is empty, and therefore matches nothing.
@@ -428,17 +432,11 @@ impl GlobSet {
}
}
}
debug!(
"built glob set; {} literals, {} basenames, {} extensions, \
debug!("built glob set; {} literals, {} basenames, {} extensions, \
{} prefixes, {} suffixes, {} required extensions, {} regexes",
lits.0.len(),
base_lits.0.len(),
exts.0.len(),
prefixes.literals.len(),
suffixes.literals.len(),
required_exts.0.len(),
regexes.literals.len()
);
lits.0.len(), base_lits.0.len(), exts.0.len(),
prefixes.literals.len(), suffixes.literals.len(),
required_exts.0.len(), regexes.literals.len());
Ok(GlobSet {
len: pats.len(),
strats: vec![
@@ -448,8 +446,7 @@ impl GlobSet {
GlobSetMatchStrategy::Suffix(suffixes.suffix()),
GlobSetMatchStrategy::Prefix(prefixes.prefix()),
GlobSetMatchStrategy::RequiredExtension(
required_exts.build()?,
),
required_exts.build()?),
GlobSetMatchStrategy::Regex(regexes.regex_set()?),
],
})
@@ -493,21 +490,25 @@ impl GlobSetBuilder {
/// path against multiple globs or sets of globs.
#[derive(Clone, Debug)]
pub struct Candidate<'a> {
path: Cow<'a, [u8]>,
basename: Cow<'a, [u8]>,
ext: Cow<'a, [u8]>,
path: Cow<'a, BStr>,
basename: Cow<'a, BStr>,
ext: Cow<'a, BStr>,
}
impl<'a> Candidate<'a> {
/// Create a new candidate for matching from the given path.
pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
let path = normalize_path(BString::from_path_lossy(path.as_ref()));
let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
Candidate { path: path, basename: basename, ext: ext }
Candidate {
path: path,
basename: basename,
ext: ext,
}
}
fn path_prefix(&self, max: usize) -> &[u8] {
fn path_prefix(&self, max: usize) -> &BStr {
if self.path.len() <= max {
&*self.path
} else {
@@ -515,7 +516,7 @@ impl<'a> Candidate<'a> {
}
}
fn path_suffix(&self, max: usize) -> &[u8] {
fn path_suffix(&self, max: usize) -> &BStr {
if self.path.len() <= max {
&*self.path
} else {
@@ -766,7 +767,11 @@ struct MultiStrategyBuilder {
impl MultiStrategyBuilder {
fn new() -> MultiStrategyBuilder {
MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 }
MultiStrategyBuilder {
literals: vec![],
map: vec![],
longest: 0,
}
}
fn add(&mut self, global_index: usize, literal: String) {

View File

@@ -1,15 +1,15 @@
use std::borrow::Cow;
use bstr::{ByteSlice, ByteVec};
use bstr::BStr;
/// The final component of the path, if it is a normal file.
///
/// If the path terminates in ., .., or consists solely of a root of prefix,
/// file_name will return None.
pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
pub fn file_name<'a>(path: &Cow<'a, BStr>) -> Option<Cow<'a, BStr>> {
if path.is_empty() {
return None;
} else if path.last_byte() == Some(b'.') {
} else if path.last() == Some(b'.') {
return None;
}
let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0);
@@ -39,7 +39,7 @@ pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
/// a pattern like `*.rs` is obviously trying to match files with a `rs`
/// extension, but it also matches files like `.rs`, which doesn't have an
/// extension according to std::path::Path::extension.
pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
pub fn file_name_ext<'a>(name: &Cow<'a, BStr>) -> Option<Cow<'a, BStr>> {
if name.is_empty() {
return None;
}
@@ -60,7 +60,7 @@ pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
/// that recognize other characters as separators.
#[cfg(unix)]
pub fn normalize_path(path: Cow<[u8]>) -> Cow<[u8]> {
pub fn normalize_path(path: Cow<BStr>) -> Cow<BStr> {
// UNIX only uses /, so we're good.
path
}
@@ -68,7 +68,7 @@ pub fn normalize_path(path: Cow<[u8]>) -> Cow<[u8]> {
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
/// that recognize other characters as separators.
#[cfg(not(unix))]
pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
pub fn normalize_path(mut path: Cow<BStr>) -> Cow<BStr> {
use std::path::is_separator;
for i in 0..path.len() {
@@ -84,7 +84,7 @@ pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
mod tests {
use std::borrow::Cow;
use bstr::{ByteVec, B};
use bstr::{B, BString};
use super::{file_name_ext, normalize_path};
@@ -92,7 +92,7 @@ mod tests {
($name:ident, $file_name:expr, $ext:expr) => {
#[test]
fn $name() {
let bs = Vec::from($file_name);
let bs = BString::from($file_name);
let got = file_name_ext(&Cow::Owned(bs));
assert_eq!($ext.map(|s| Cow::Borrowed(B(s))), got);
}
@@ -109,7 +109,7 @@ mod tests {
($name:ident, $path:expr, $expected:expr) => {
#[test]
fn $name() {
let bs = Vec::from_slice($path);
let bs = BString::from_slice($path);
let got = normalize_path(Cow::Owned(bs));
assert_eq!($expected.to_vec(), got.into_owned());
}

View File

@@ -1,21 +1,21 @@
[package]
name = "grep-cli"
version = "0.1.4" #:version
version = "0.1.1" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Utilities for search oriented command line applications.
"""
documentation = "https://docs.rs/grep-cli"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli"
homepage = "https://github.com/BurntSushi/ripgrep"
repository = "https://github.com/BurntSushi/ripgrep"
readme = "README.md"
keywords = ["regex", "grep", "cli", "utility", "util"]
license = "Unlicense/MIT"
[dependencies]
atty = "0.2.11"
bstr = "0.2.0"
globset = { version = "0.4.3", path = "../globset" }
bstr = "0.1.2"
globset = { version = "0.4.2", path = "../globset" }
lazy_static = "1.1.0"
log = "0.4.5"
regex = "1.1"

View File

@@ -38,7 +38,10 @@ impl Default for DecompressionMatcherBuilder {
impl DecompressionMatcherBuilder {
/// Create a new builder for configuring a decompression matcher.
pub fn new() -> DecompressionMatcherBuilder {
DecompressionMatcherBuilder { commands: vec![], defaults: true }
DecompressionMatcherBuilder {
commands: vec![],
defaults: true,
}
}
/// Build a matcher for determining how to decompress files.
@@ -46,11 +49,12 @@ impl DecompressionMatcherBuilder {
/// If there was a problem compiling the matcher, then an error is
/// returned.
pub fn build(&self) -> Result<DecompressionMatcher, CommandError> {
let defaults = if !self.defaults {
vec![]
} else {
default_decompression_commands()
};
let defaults =
if !self.defaults {
vec![]
} else {
default_decompression_commands()
};
let mut glob_builder = GlobSetBuilder::new();
let mut commands = vec![];
for decomp_cmd in defaults.iter().chain(&self.commands) {
@@ -89,15 +93,17 @@ impl DecompressionMatcherBuilder {
program: P,
args: I,
) -> &mut DecompressionMatcherBuilder
where
P: AsRef<OsStr>,
I: IntoIterator<Item = A>,
A: AsRef<OsStr>,
where P: AsRef<OsStr>,
I: IntoIterator<Item=A>,
A: AsRef<OsStr>,
{
let glob = glob.to_string();
let bin = program.as_ref().to_os_string();
let args =
args.into_iter().map(|a| a.as_ref().to_os_string()).collect();
let args = args
.into_iter()
.map(|a| a.as_ref().to_os_string())
.collect();
self.commands.push(DecompressionCommand { glob, bin, args });
self
}

View File

@@ -1,7 +1,7 @@
use std::ffi::OsStr;
use std::str;
use bstr::{ByteSlice, ByteVec};
use bstr::{BStr, BString};
/// A single state in the state machine used by `unescape`.
#[derive(Clone, Copy, Eq, PartialEq)]
@@ -38,6 +38,7 @@ enum State {
/// assert_eq!(r"foo\nbar\xFFbaz", escape(b"foo\nbar\xFFbaz"));
/// ```
pub fn escape(bytes: &[u8]) -> String {
let bytes = BStr::new(bytes);
let mut escaped = String::new();
for (s, e, ch) in bytes.char_indices() {
if ch == '\u{FFFD}' {
@@ -55,7 +56,7 @@ pub fn escape(bytes: &[u8]) -> String {
///
/// This is like [`escape`](fn.escape.html), but accepts an OS string.
pub fn escape_os(string: &OsStr) -> String {
escape(Vec::from_os_str_lossy(string).as_bytes())
escape(BString::from_os_str_lossy(string).as_bytes())
}
/// Unescapes a string.
@@ -95,61 +96,51 @@ pub fn unescape(s: &str) -> Vec<u8> {
let mut state = Literal;
for c in s.chars() {
match state {
Escape => match c {
'\\' => {
bytes.push(b'\\');
state = Literal;
Escape => {
match c {
'\\' => { bytes.push(b'\\'); state = Literal; }
'n' => { bytes.push(b'\n'); state = Literal; }
'r' => { bytes.push(b'\r'); state = Literal; }
't' => { bytes.push(b'\t'); state = Literal; }
'x' => { state = HexFirst; }
c => {
bytes.extend(format!(r"\{}", c).into_bytes());
state = Literal;
}
}
'n' => {
bytes.push(b'\n');
state = Literal;
}
HexFirst => {
match c {
'0'...'9' | 'A'...'F' | 'a'...'f' => {
state = HexSecond(c);
}
c => {
bytes.extend(format!(r"\x{}", c).into_bytes());
state = Literal;
}
}
'r' => {
bytes.push(b'\r');
state = Literal;
}
HexSecond(first) => {
match c {
'0'...'9' | 'A'...'F' | 'a'...'f' => {
let ordinal = format!("{}{}", first, c);
let byte = u8::from_str_radix(&ordinal, 16).unwrap();
bytes.push(byte);
state = Literal;
}
c => {
let original = format!(r"\x{}{}", first, c);
bytes.extend(original.into_bytes());
state = Literal;
}
}
't' => {
bytes.push(b'\t');
state = Literal;
}
Literal => {
match c {
'\\' => { state = Escape; }
c => { bytes.extend(c.to_string().as_bytes()); }
}
'x' => {
state = HexFirst;
}
c => {
bytes.extend(format!(r"\{}", c).into_bytes());
state = Literal;
}
},
HexFirst => match c {
'0'..='9' | 'A'..='F' | 'a'..='f' => {
state = HexSecond(c);
}
c => {
bytes.extend(format!(r"\x{}", c).into_bytes());
state = Literal;
}
},
HexSecond(first) => match c {
'0'..='9' | 'A'..='F' | 'a'..='f' => {
let ordinal = format!("{}{}", first, c);
let byte = u8::from_str_radix(&ordinal, 16).unwrap();
bytes.push(byte);
state = Literal;
}
c => {
let original = format!(r"\x{}{}", first, c);
bytes.extend(original.into_bytes());
state = Literal;
}
},
Literal => match c {
'\\' => {
state = Escape;
}
c => {
bytes.extend(c.to_string().as_bytes());
}
},
}
}
}
match state {
@@ -183,7 +174,7 @@ fn escape_char(cp: char, into: &mut String) {
/// Adds the given byte to the given string, escaping it if necessary.
fn escape_byte(byte: u8, into: &mut String) {
match byte {
0x21..=0x5B | 0x5D..=0x7D => into.push(byte as char),
0x21...0x5B | 0x5D...0x7D => into.push(byte as char),
b'\n' => into.push_str(r"\n"),
b'\r' => into.push_str(r"\r"),
b'\t' => into.push_str(r"\t"),

View File

@@ -46,9 +46,7 @@ impl ParseSizeError {
}
impl error::Error for ParseSizeError {
fn description(&self) -> &str {
"invalid size"
}
fn description(&self) -> &str { "invalid size" }
}
impl fmt::Display for ParseSizeError {
@@ -56,19 +54,26 @@ impl fmt::Display for ParseSizeError {
use self::ParseSizeErrorKind::*;
match self.kind {
InvalidFormat => write!(
f,
"invalid format for size '{}', which should be a sequence \
InvalidFormat => {
write!(
f,
"invalid format for size '{}', which should be a sequence \
of digits followed by an optional 'K', 'M' or 'G' \
suffix",
self.original
),
InvalidInt(ref err) => write!(
f,
"invalid integer found in size '{}': {}",
self.original, err
),
Overflow => write!(f, "size too big in '{}'", self.original),
self.original
)
}
InvalidInt(ref err) => {
write!(
f,
"invalid integer found in size '{}': {}",
self.original,
err
)
}
Overflow => {
write!(f, "size too big in '{}'", self.original)
}
}
}
}
@@ -99,16 +104,17 @@ pub fn parse_human_readable_size(size: &str) -> Result<u64, ParseSizeError> {
Some(caps) => caps,
None => return Err(ParseSizeError::format(size)),
};
let value: u64 =
caps[1].parse().map_err(|err| ParseSizeError::int(size, err))?;
let value: u64 = caps[1].parse().map_err(|err| {
ParseSizeError::int(size, err)
})?;
let suffix = match caps.get(2) {
None => return Ok(value),
Some(cap) => cap.as_str(),
};
let bytes = match suffix {
"K" => value.checked_mul(1 << 10),
"M" => value.checked_mul(1 << 20),
"G" => value.checked_mul(1 << 30),
"K" => value.checked_mul(1<<10),
"M" => value.checked_mul(1<<20),
"G" => value.checked_mul(1<<30),
// Because if the regex matches this group, it must be [KMG].
_ => unreachable!(),
};
@@ -128,19 +134,19 @@ mod tests {
#[test]
fn suffix_k() {
let x = parse_human_readable_size("123K").unwrap();
assert_eq!(123 * (1 << 10), x);
assert_eq!(123 * (1<<10), x);
}
#[test]
fn suffix_m() {
let x = parse_human_readable_size("123M").unwrap();
assert_eq!(123 * (1 << 20), x);
assert_eq!(123 * (1<<20), x);
}
#[test]
fn suffix_g() {
let x = parse_human_readable_size("123G").unwrap();
assert_eq!(123 * (1 << 30), x);
assert_eq!(123 * (1<<30), x);
}
#[test]

View File

@@ -179,18 +179,20 @@ mod process;
mod wtr;
pub use decompress::{
DecompressionMatcher, DecompressionMatcherBuilder, DecompressionReader,
DecompressionReaderBuilder,
DecompressionMatcher, DecompressionMatcherBuilder,
DecompressionReader, DecompressionReaderBuilder,
};
pub use escape::{escape, escape_os, unescape, unescape_os};
pub use human::{parse_human_readable_size, ParseSizeError};
pub use human::{ParseSizeError, parse_human_readable_size};
pub use pattern::{
pattern_from_bytes, pattern_from_os, patterns_from_path,
patterns_from_reader, patterns_from_stdin, InvalidPatternError,
InvalidPatternError,
pattern_from_os, pattern_from_bytes,
patterns_from_path, patterns_from_reader, patterns_from_stdin,
};
pub use process::{CommandError, CommandReader, CommandReaderBuilder};
pub use wtr::{
stdout, stdout_buffered_block, stdout_buffered_line, StandardStream,
StandardStream,
stdout, stdout_buffered_line, stdout_buffered_block,
};
/// Returns true if and only if stdin is believed to be readable.
@@ -203,8 +205,8 @@ pub use wtr::{
pub fn is_readable_stdin() -> bool {
#[cfg(unix)]
fn imp() -> bool {
use same_file::Handle;
use std::os::unix::fs::FileTypeExt;
use same_file::Handle;
let ft = match Handle::stdin().and_then(|h| h.as_file().metadata()) {
Err(_) => return false,

View File

@@ -2,12 +2,10 @@ use std::error;
use std::ffi::OsStr;
use std::fmt;
use std::fs::File;
use std::io;
use std::io::{self, BufRead};
use std::path::Path;
use std::str;
use bstr::io::BufReadExt;
use escape::{escape, escape_os};
/// An error that occurs when a pattern could not be converted to valid UTF-8.
@@ -29,9 +27,7 @@ impl InvalidPatternError {
}
impl error::Error for InvalidPatternError {
fn description(&self) -> &str {
"invalid pattern"
}
fn description(&self) -> &str { "invalid pattern" }
}
impl fmt::Display for InvalidPatternError {
@@ -41,7 +37,8 @@ impl fmt::Display for InvalidPatternError {
"found invalid UTF-8 in pattern at byte offset {} \
(use hex escape sequences to match arbitrary bytes \
in a pattern, e.g., \\xFF): '{}'",
self.valid_up_to, self.original,
self.valid_up_to,
self.original,
)
}
}
@@ -80,9 +77,11 @@ pub fn pattern_from_os(pattern: &OsStr) -> Result<&str, InvalidPatternError> {
pub fn pattern_from_bytes(
pattern: &[u8],
) -> Result<&str, InvalidPatternError> {
str::from_utf8(pattern).map_err(|err| InvalidPatternError {
original: escape(pattern),
valid_up_to: err.valid_up_to(),
str::from_utf8(pattern).map_err(|err| {
InvalidPatternError {
original: escape(pattern),
valid_up_to: err.valid_up_to(),
}
})
}
@@ -118,7 +117,10 @@ pub fn patterns_from_stdin() -> io::Result<Vec<String>> {
let stdin = io::stdin();
let locked = stdin.lock();
patterns_from_reader(locked).map_err(|err| {
io::Error::new(io::ErrorKind::Other, format!("<stdin>:{}", err))
io::Error::new(
io::ErrorKind::Other,
format!("<stdin>:{}", err),
)
})
}
@@ -154,20 +156,28 @@ pub fn patterns_from_stdin() -> io::Result<Vec<String>> {
/// ```
pub fn patterns_from_reader<R: io::Read>(rdr: R) -> io::Result<Vec<String>> {
let mut patterns = vec![];
let mut bufrdr = io::BufReader::new(rdr);
let mut line = vec![];
let mut line_number = 0;
io::BufReader::new(rdr).for_byte_line(|line| {
while {
line.clear();
line_number += 1;
match pattern_from_bytes(line) {
Ok(pattern) => {
patterns.push(pattern.to_string());
Ok(true)
}
Err(err) => Err(io::Error::new(
io::ErrorKind::Other,
format!("{}: {}", line_number, err),
)),
bufrdr.read_until(b'\n', &mut line)? > 0
} {
line.pop().unwrap(); // remove trailing '\n'
if line.last() == Some(&b'\r') {
line.pop().unwrap();
}
})?;
match pattern_from_bytes(&line) {
Ok(pattern) => patterns.push(pattern.to_string()),
Err(err) => {
return Err(io::Error::new(
io::ErrorKind::Other,
format!("{}: {}", line_number, err),
));
}
}
}
Ok(patterns)
}
@@ -185,8 +195,8 @@ mod tests {
#[test]
#[cfg(unix)]
fn os() {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
use std::ffi::OsStr;
let pat = OsStr::from_bytes(b"abc\xFFxyz");
let err = pattern_from_os(pat).unwrap_err();

View File

@@ -33,9 +33,7 @@ impl CommandError {
}
impl error::Error for CommandError {
fn description(&self) -> &str {
"command error"
}
fn description(&self) -> &str { "command error" }
}
impl fmt::Display for CommandError {
@@ -48,12 +46,7 @@ impl fmt::Display for CommandError {
write!(f, "<stderr is empty>")
} else {
let div = iter::repeat('-').take(79).collect::<String>();
write!(
f,
"\n{div}\n{msg}\n{div}",
div = div,
msg = msg.trim()
)
write!(f, "\n{div}\n{msg}\n{div}", div=div, msg=msg.trim())
}
}
}
@@ -108,11 +101,12 @@ impl CommandReaderBuilder {
.stderr(process::Stdio::piped())
.spawn()?;
let stdout = child.stdout.take().unwrap();
let stderr = if self.async_stderr {
StderrReader::async(child.stderr.take().unwrap())
} else {
StderrReader::sync(child.stderr.take().unwrap())
};
let stderr =
if self.async_stderr {
StderrReader::async(child.stderr.take().unwrap())
} else {
StderrReader::sync(child.stderr.take().unwrap())
};
Ok(CommandReader {
child: child,
stdout: stdout,
@@ -232,8 +226,9 @@ enum StderrReader {
impl StderrReader {
/// Create a reader for stderr that reads contents asynchronously.
fn async(mut stderr: process::ChildStderr) -> StderrReader {
let handle =
thread::spawn(move || stderr_to_command_error(&mut stderr));
let handle = thread::spawn(move || {
stderr_to_command_error(&mut stderr)
});
StderrReader::Async(Some(handle))
}
@@ -252,7 +247,9 @@ impl StderrReader {
let handle = handle
.take()
.expect("read_to_end cannot be called more than once");
handle.join().expect("stderr reading thread does not panic")
handle
.join()
.expect("stderr reading thread does not panic")
}
StderrReader::Sync(ref mut stderr) => {
stderr_to_command_error(stderr)

View File

@@ -1,13 +1,13 @@
[package]
name = "grep-matcher"
version = "0.1.4" #:version
version = "0.1.1" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
A trait for regular expressions, with a focus on line oriented search.
"""
documentation = "https://docs.rs/grep-matcher"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/matcher"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/matcher"
homepage = "https://github.com/BurntSushi/ripgrep"
repository = "https://github.com/BurntSushi/ripgrep"
readme = "README.md"
keywords = ["regex", "pattern", "trait"]
license = "Unlicense/MIT"

View File

@@ -19,7 +19,7 @@ pub fn interpolate<A, N>(
dst: &mut Vec<u8>,
) where
A: FnMut(usize, &mut Vec<u8>),
N: FnMut(&str) -> Option<usize>,
N: FnMut(&str) -> Option<usize>
{
while !replacement.is_empty() {
match memchr(b'$', replacement) {
@@ -134,14 +134,14 @@ fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
/// Returns true if and only if the given byte is allowed in a capture name.
fn is_valid_cap_letter(b: &u8) -> bool {
match *b {
b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true,
b'0' ... b'9' | b'a' ... b'z' | b'A' ... b'Z' | b'_' => true,
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::{find_cap_ref, interpolate, CaptureRef};
use super::{CaptureRef, find_cap_ref, interpolate};
macro_rules! find {
($name:ident, $text:expr) => {
@@ -211,7 +211,7 @@ mod tests {
fn $name() {
assert_eq!($expected, interpolate_string($map, $caps, $hay));
}
};
}
}
interp!(

View File

@@ -278,7 +278,7 @@ impl LineTerminator {
}
}
impl Default for LineTerminator {
impl Default for LineTerminator {
#[inline]
fn default() -> LineTerminator {
LineTerminator::byte(b'\n')
@@ -439,8 +439,7 @@ pub trait Captures {
haystack: &[u8],
replacement: &[u8],
dst: &mut Vec<u8>,
) where
F: FnMut(&str) -> Option<usize>,
) where F: FnMut(&str) -> Option<usize>
{
interpolate(
replacement,
@@ -464,18 +463,12 @@ pub struct NoCaptures(());
impl NoCaptures {
/// Create an empty set of capturing groups.
pub fn new() -> NoCaptures {
NoCaptures(())
}
pub fn new() -> NoCaptures { NoCaptures(()) }
}
impl Captures for NoCaptures {
fn len(&self) -> usize {
0
}
fn get(&self, _: usize) -> Option<Match> {
None
}
fn len(&self) -> usize { 0 }
fn get(&self, _: usize) -> Option<Match> { None }
}
/// NoError provides an error type for matchers that never produce errors.
@@ -488,9 +481,7 @@ impl Captures for NoCaptures {
pub struct NoError(());
impl ::std::error::Error for NoError {
fn description(&self) -> &str {
"no error"
}
fn description(&self) -> &str { "no error" }
}
impl fmt::Display for NoError {
@@ -608,7 +599,10 @@ pub trait Matcher {
///
/// The text encoding of `haystack` is not strictly specified. Matchers are
/// advised to assume UTF-8, or at worst, some ASCII compatible encoding.
fn find(&self, haystack: &[u8]) -> Result<Option<Match>, Self::Error> {
fn find(
&self,
haystack: &[u8],
) -> Result<Option<Match>, Self::Error> {
self.find_at(haystack, 0)
}
@@ -620,8 +614,7 @@ pub trait Matcher {
haystack: &[u8],
mut matched: F,
) -> Result<(), Self::Error>
where
F: FnMut(Match) -> bool,
where F: FnMut(Match) -> bool
{
self.try_find_iter(haystack, |m| Ok(matched(m)))
.map(|r: Result<(), ()>| r.unwrap())
@@ -639,8 +632,7 @@ pub trait Matcher {
haystack: &[u8],
mut matched: F,
) -> Result<Result<(), E>, Self::Error>
where
F: FnMut(Match) -> Result<bool, E>,
where F: FnMut(Match) -> Result<bool, E>
{
let mut last_end = 0;
let mut last_match = None;
@@ -698,8 +690,7 @@ pub trait Matcher {
caps: &mut Self::Captures,
mut matched: F,
) -> Result<(), Self::Error>
where
F: FnMut(&Self::Captures) -> bool,
where F: FnMut(&Self::Captures) -> bool
{
self.try_captures_iter(haystack, caps, |caps| Ok(matched(caps)))
.map(|r: Result<(), ()>| r.unwrap())
@@ -718,8 +709,7 @@ pub trait Matcher {
caps: &mut Self::Captures,
mut matched: F,
) -> Result<Result<(), E>, Self::Error>
where
F: FnMut(&Self::Captures) -> Result<bool, E>,
where F: FnMut(&Self::Captures) -> Result<bool, E>
{
let mut last_end = 0;
let mut last_match = None;
@@ -797,8 +787,7 @@ pub trait Matcher {
dst: &mut Vec<u8>,
mut append: F,
) -> Result<(), Self::Error>
where
F: FnMut(Match, &mut Vec<u8>) -> bool,
where F: FnMut(Match, &mut Vec<u8>) -> bool
{
let mut last_match = 0;
self.find_iter(haystack, |m| {
@@ -821,8 +810,7 @@ pub trait Matcher {
dst: &mut Vec<u8>,
mut append: F,
) -> Result<(), Self::Error>
where
F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
where F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool
{
let mut last_match = 0;
self.captures_iter(haystack, caps, |caps| {
@@ -1024,7 +1012,10 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).capture_count()
}
fn find(&self, haystack: &[u8]) -> Result<Option<Match>, Self::Error> {
fn find(
&self,
haystack: &[u8]
) -> Result<Option<Match>, Self::Error> {
(*self).find(haystack)
}
@@ -1033,8 +1024,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
haystack: &[u8],
matched: F,
) -> Result<(), Self::Error>
where
F: FnMut(Match) -> bool,
where F: FnMut(Match) -> bool
{
(*self).find_iter(haystack, matched)
}
@@ -1044,8 +1034,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
haystack: &[u8],
matched: F,
) -> Result<Result<(), E>, Self::Error>
where
F: FnMut(Match) -> Result<bool, E>,
where F: FnMut(Match) -> Result<bool, E>
{
(*self).try_find_iter(haystack, matched)
}
@@ -1064,8 +1053,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
caps: &mut Self::Captures,
matched: F,
) -> Result<(), Self::Error>
where
F: FnMut(&Self::Captures) -> bool,
where F: FnMut(&Self::Captures) -> bool
{
(*self).captures_iter(haystack, caps, matched)
}
@@ -1076,8 +1064,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
caps: &mut Self::Captures,
matched: F,
) -> Result<Result<(), E>, Self::Error>
where
F: FnMut(&Self::Captures) -> Result<bool, E>,
where F: FnMut(&Self::Captures) -> Result<bool, E>
{
(*self).try_captures_iter(haystack, caps, matched)
}
@@ -1088,8 +1075,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
dst: &mut Vec<u8>,
append: F,
) -> Result<(), Self::Error>
where
F: FnMut(Match, &mut Vec<u8>) -> bool,
where F: FnMut(Match, &mut Vec<u8>) -> bool
{
(*self).replace(haystack, dst, append)
}
@@ -1101,8 +1087,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
dst: &mut Vec<u8>,
append: F,
) -> Result<(), Self::Error>
where
F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
where F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool
{
(*self).replace_with_captures(haystack, caps, dst, append)
}
@@ -1114,7 +1099,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
fn is_match_at(
&self,
haystack: &[u8],
at: usize,
at: usize
) -> Result<bool, Self::Error> {
(*self).is_match_at(haystack, at)
}

View File

@@ -25,22 +25,18 @@ fn find() {
fn find_iter() {
let matcher = matcher(r"(\w+)\s+(\w+)");
let mut matches = vec![];
matcher
.find_iter(b"aa bb cc dd", |m| {
matches.push(m);
true
})
.unwrap();
matcher.find_iter(b"aa bb cc dd", |m| {
matches.push(m);
true
}).unwrap();
assert_eq!(matches, vec![m(0, 5), m(6, 11)]);
// Test that find_iter respects short circuiting.
matches.clear();
matcher
.find_iter(b"aa bb cc dd", |m| {
matches.push(m);
false
})
.unwrap();
matcher.find_iter(b"aa bb cc dd", |m| {
matches.push(m);
false
}).unwrap();
assert_eq!(matches, vec![m(0, 5)]);
}
@@ -51,17 +47,14 @@ fn try_find_iter() {
let matcher = matcher(r"(\w+)\s+(\w+)");
let mut matches = vec![];
let err = matcher
.try_find_iter(b"aa bb cc dd", |m| {
if matches.is_empty() {
matches.push(m);
Ok(true)
} else {
Err(MyError)
}
})
.unwrap()
.unwrap_err();
let err = matcher.try_find_iter(b"aa bb cc dd", |m| {
if matches.is_empty() {
matches.push(m);
Ok(true)
} else {
Err(MyError)
}
}).unwrap().unwrap_err();
assert_eq!(matches, vec![m(0, 5)]);
assert_eq!(err, MyError);
}
@@ -96,30 +89,28 @@ fn captures_iter() {
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
let mut caps = matcher.new_captures().unwrap();
let mut matches = vec![];
matcher
.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
true
})
.unwrap();
assert_eq!(
matches,
vec![m(0, 5), m(0, 2), m(3, 5), m(6, 11), m(6, 8), m(9, 11),]
);
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
true
}).unwrap();
assert_eq!(matches, vec![
m(0, 5), m(0, 2), m(3, 5),
m(6, 11), m(6, 8), m(9, 11),
]);
// Test that captures_iter respects short circuiting.
matches.clear();
matcher
.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
false
})
.unwrap();
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5),]);
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
false
}).unwrap();
assert_eq!(matches, vec![
m(0, 5), m(0, 2), m(3, 5),
]);
}
#[test]
@@ -130,19 +121,16 @@ fn try_captures_iter() {
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
let mut caps = matcher.new_captures().unwrap();
let mut matches = vec![];
let err = matcher
.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| {
if matches.is_empty() {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
Ok(true)
} else {
Err(MyError)
}
})
.unwrap()
.unwrap_err();
let err = matcher.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| {
if matches.is_empty() {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
Ok(true)
} else {
Err(MyError)
}
}).unwrap().unwrap_err();
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5)]);
assert_eq!(err, MyError);
}
@@ -162,12 +150,10 @@ fn no_captures() {
assert!(!matcher.captures(b"homer simpson", &mut caps).unwrap());
let mut called = false;
matcher
.captures_iter(b"homer simpson", &mut caps, |_| {
called = true;
true
})
.unwrap();
matcher.captures_iter(b"homer simpson", &mut caps, |_| {
called = true;
true
}).unwrap();
assert!(!called);
}
@@ -175,22 +161,18 @@ fn no_captures() {
fn replace() {
let matcher = matcher(r"(\w+)\s+(\w+)");
let mut dst = vec![];
matcher
.replace(b"aa bb cc dd", &mut dst, |_, dst| {
dst.push(b'z');
true
})
.unwrap();
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
dst.push(b'z');
true
}).unwrap();
assert_eq!(dst, b"z z");
// Test that replacements respect short circuiting.
dst.clear();
matcher
.replace(b"aa bb cc dd", &mut dst, |_, dst| {
dst.push(b'z');
false
})
.unwrap();
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
dst.push(b'z');
false
}).unwrap();
assert_eq!(dst, b"z cc dd");
}
@@ -200,31 +182,27 @@ fn replace_with_captures() {
let haystack = b"aa bb cc dd";
let mut caps = matcher.new_captures().unwrap();
let mut dst = vec![];
matcher
.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
caps.interpolate(
|name| matcher.capture_index(name),
haystack,
b"$2 $1",
dst,
);
true
})
.unwrap();
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
caps.interpolate(
|name| matcher.capture_index(name),
haystack,
b"$2 $1",
dst,
);
true
}).unwrap();
assert_eq!(dst, b"bb aa dd cc");
// Test that replacements respect short circuiting.
dst.clear();
matcher
.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
caps.interpolate(
|name| matcher.capture_index(name),
haystack,
b"$2 $1",
dst,
);
false
})
.unwrap();
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
caps.interpolate(
|name| matcher.capture_index(name),
haystack,
b"$2 $1",
dst,
);
false
}).unwrap();
assert_eq!(dst, b"bb aa cc dd");
}

View File

@@ -18,7 +18,10 @@ impl RegexMatcher {
names.insert(name.to_string(), i);
}
}
RegexMatcher { re: re, names: names }
RegexMatcher {
re: re,
names: names,
}
}
}
@@ -28,9 +31,12 @@ impl Matcher for RegexMatcher {
type Captures = RegexCaptures;
type Error = NoError;
fn find_at(&self, haystack: &[u8], at: usize) -> Result<Option<Match>> {
Ok(self
.re
fn find_at(
&self,
haystack: &[u8],
at: usize,
) -> Result<Option<Match>> {
Ok(self.re
.find_at(haystack, at)
.map(|m| Match::new(m.start(), m.end())))
}
@@ -69,9 +75,12 @@ impl Matcher for RegexMatcherNoCaps {
type Captures = NoCaptures;
type Error = NoError;
fn find_at(&self, haystack: &[u8], at: usize) -> Result<Option<Match>> {
Ok(self
.0
fn find_at(
&self,
haystack: &[u8],
at: usize,
) -> Result<Option<Match>> {
Ok(self.0
.find_at(haystack, at)
.map(|m| Match::new(m.start(), m.end())))
}

View File

@@ -1,17 +1,17 @@
[package]
name = "grep-pcre2"
version = "0.1.4" #:version
version = "0.1.2" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Use PCRE2 with the 'grep' crate.
"""
documentation = "https://docs.rs/grep-pcre2"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/pcre2"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/pcre2"
homepage = "https://github.com/BurntSushi/ripgrep"
repository = "https://github.com/BurntSushi/ripgrep"
readme = "README.md"
keywords = ["regex", "grep", "pcre", "backreference", "look"]
license = "Unlicense/MIT"
[dependencies]
grep-matcher = { version = "0.1.2", path = "../matcher" }
pcre2 = "0.2.0"
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
pcre2 = "0.1.1"

View File

@@ -10,7 +10,6 @@ extern crate pcre2;
pub use error::{Error, ErrorKind};
pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
pub use pcre2::{is_jit_available, version};
mod error;
mod matcher;

View File

@@ -33,12 +33,13 @@ impl RegexMatcherBuilder {
if self.case_smart && !has_uppercase_literal(pattern) {
builder.caseless(true);
}
let res = if self.word {
let pattern = format!(r"(?<!\w)(?:{})(?!\w)", pattern);
builder.build(&pattern)
} else {
builder.build(pattern)
};
let res =
if self.word {
let pattern = format!(r"(?<!\w)(?:{})(?!\w)", pattern);
builder.build(&pattern)
} else {
builder.build(pattern)
};
res.map_err(Error::regex).map(|regex| {
let mut names = HashMap::new();
for (i, name) in regex.capture_names().iter().enumerate() {
@@ -226,27 +227,6 @@ impl RegexMatcherBuilder {
self.builder.jit_if_available(yes);
self
}
/// Set the maximum size of PCRE2's JIT stack, in bytes. If the JIT is
/// not enabled, then this has no effect.
///
/// When `None` is given, no custom JIT stack will be created, and instead,
/// the default JIT stack is used. When the default is used, its maximum
/// size is 32 KB.
///
/// When this is set, then a new JIT stack will be created with the given
/// maximum size as its limit.
///
/// Increasing the stack size can be useful for larger regular expressions.
///
/// By default, this is set to `None`.
pub fn max_jit_stack_size(
&mut self,
bytes: Option<usize>,
) -> &mut RegexMatcherBuilder {
self.builder.max_jit_stack_size(bytes);
self
}
}
/// An implementation of the `Matcher` trait using PCRE2.
@@ -273,8 +253,7 @@ impl Matcher for RegexMatcher {
haystack: &[u8],
at: usize,
) -> Result<Option<Match>, Error> {
Ok(self
.regex
Ok(self.regex
.find_at(haystack, at)
.map_err(Error::regex)?
.map(|m| Match::new(m.start(), m.end())))
@@ -297,8 +276,7 @@ impl Matcher for RegexMatcher {
haystack: &[u8],
mut matched: F,
) -> Result<Result<(), E>, Error>
where
F: FnMut(Match) -> Result<bool, E>,
where F: FnMut(Match) -> Result<bool, E>
{
for result in self.regex.find_iter(haystack) {
let m = result.map_err(Error::regex)?;
@@ -317,11 +295,10 @@ impl Matcher for RegexMatcher {
at: usize,
caps: &mut RegexCaptures,
) -> Result<bool, Error> {
Ok(self
.regex
.captures_read_at(&mut caps.locs, haystack, at)
.map_err(Error::regex)?
.is_some())
Ok(self.regex
.captures_read_at(&mut caps.locs, haystack, at)
.map_err(Error::regex)?
.is_some())
}
}
@@ -385,19 +362,23 @@ fn has_uppercase_literal(pattern: &str) -> bool {
#[cfg(test)]
mod tests {
use super::*;
use grep_matcher::{LineMatchKind, Matcher};
use super::*;
// Test that enabling word matches does the right thing and demonstrate
// the difference between it and surrounding the regex in `\b`.
#[test]
fn word() {
let matcher =
RegexMatcherBuilder::new().word(true).build(r"-2").unwrap();
let matcher = RegexMatcherBuilder::new()
.word(true)
.build(r"-2")
.unwrap();
assert!(matcher.is_match(b"abc -2 foo").unwrap());
let matcher =
RegexMatcherBuilder::new().word(false).build(r"\b-2\b").unwrap();
let matcher = RegexMatcherBuilder::new()
.word(false)
.build(r"\b-2\b")
.unwrap();
assert!(!matcher.is_match(b"abc -2 foo").unwrap());
}
@@ -430,12 +411,16 @@ mod tests {
// Test that smart case works.
#[test]
fn case_smart() {
let matcher =
RegexMatcherBuilder::new().case_smart(true).build(r"abc").unwrap();
let matcher = RegexMatcherBuilder::new()
.case_smart(true)
.build(r"abc")
.unwrap();
assert!(matcher.is_match(b"ABC").unwrap());
let matcher =
RegexMatcherBuilder::new().case_smart(true).build(r"aBc").unwrap();
let matcher = RegexMatcherBuilder::new()
.case_smart(true)
.build(r"aBc")
.unwrap();
assert!(!matcher.is_match(b"ABC").unwrap());
}
@@ -449,7 +434,9 @@ mod tests {
}
}
let matcher = RegexMatcherBuilder::new().build(r"\wfoo\s").unwrap();
let matcher = RegexMatcherBuilder::new()
.build(r"\wfoo\s")
.unwrap();
let m = matcher.find_candidate_line(b"afoo ").unwrap().unwrap();
assert!(is_confirmed(m));
}

View File

@@ -1,14 +1,14 @@
[package]
name = "grep-printer"
version = "0.1.4" #:version
version = "0.1.1" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
An implementation of the grep crate's Sink trait that provides standard
printing of search results, similar to grep itself.
"""
documentation = "https://docs.rs/grep-printer"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/printer"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/printer"
homepage = "https://github.com/BurntSushi/ripgrep"
repository = "https://github.com/BurntSushi/ripgrep"
readme = "README.md"
keywords = ["grep", "pattern", "print", "printer", "sink"]
license = "Unlicense/MIT"
@@ -18,14 +18,14 @@ default = ["serde1"]
serde1 = ["base64", "serde", "serde_derive", "serde_json"]
[dependencies]
base64 = { version = "0.11.0", optional = true }
bstr = "0.2.0"
grep-matcher = { version = "0.1.2", path = "../matcher" }
grep-searcher = { version = "0.1.4", path = "../searcher" }
base64 = { version = "0.10.0", optional = true }
bstr = "0.1.2"
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
grep-searcher = { version = "0.1.1", path = "../grep-searcher" }
termcolor = "1.0.4"
serde = { version = "1.0.77", optional = true }
serde_derive = { version = "1.0.77", optional = true }
serde_json = { version = "1.0.27", optional = true }
[dev-dependencies]
grep-regex = { version = "0.1.3", path = "../regex" }
grep-regex = { version = "0.1.1", path = "../grep-regex" }

View File

@@ -62,32 +62,42 @@ impl ColorError {
impl fmt::Display for ColorError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
ColorError::UnrecognizedOutType(ref name) => write!(
f,
"unrecognized output type '{}'. Choose from: \
ColorError::UnrecognizedOutType(ref name) => {
write!(
f,
"unrecognized output type '{}'. Choose from: \
path, line, column, match.",
name,
),
ColorError::UnrecognizedSpecType(ref name) => write!(
f,
"unrecognized spec type '{}'. Choose from: \
name,
)
}
ColorError::UnrecognizedSpecType(ref name) => {
write!(
f,
"unrecognized spec type '{}'. Choose from: \
fg, bg, style, none.",
name,
),
ColorError::UnrecognizedColor(_, ref msg) => write!(f, "{}", msg),
ColorError::UnrecognizedStyle(ref name) => write!(
f,
"unrecognized style attribute '{}'. Choose from: \
name,
)
}
ColorError::UnrecognizedColor(_, ref msg) => {
write!(f, "{}", msg)
}
ColorError::UnrecognizedStyle(ref name) => {
write!(
f,
"unrecognized style attribute '{}'. Choose from: \
nobold, bold, nointense, intense, nounderline, \
underline.",
name,
),
ColorError::InvalidFormat(ref original) => write!(
f,
"invalid color spec format: '{}'. Valid format \
name,
)
}
ColorError::InvalidFormat(ref original) => {
write!(
f,
"invalid color spec format: '{}'. Valid format \
is '(path|line|column|match):(fg|bg|style):(value)'.",
original,
),
original,
)
}
}
}
}
@@ -217,7 +227,7 @@ enum Style {
Intense,
NoIntense,
Underline,
NoUnderline,
NoUnderline
}
impl ColorSpecs {
@@ -278,32 +288,18 @@ impl SpecValue {
fn merge_into(&self, cspec: &mut ColorSpec) {
match *self {
SpecValue::None => cspec.clear(),
SpecValue::Fg(ref color) => {
cspec.set_fg(Some(color.clone()));
SpecValue::Fg(ref color) => { cspec.set_fg(Some(color.clone())); }
SpecValue::Bg(ref color) => { cspec.set_bg(Some(color.clone())); }
SpecValue::Style(ref style) => {
match *style {
Style::Bold => { cspec.set_bold(true); }
Style::NoBold => { cspec.set_bold(false); }
Style::Intense => { cspec.set_intense(true); }
Style::NoIntense => { cspec.set_intense(false); }
Style::Underline => { cspec.set_underline(true); }
Style::NoUnderline => { cspec.set_underline(false); }
}
}
SpecValue::Bg(ref color) => {
cspec.set_bg(Some(color.clone()));
}
SpecValue::Style(ref style) => match *style {
Style::Bold => {
cspec.set_bold(true);
}
Style::NoBold => {
cspec.set_bold(false);
}
Style::Intense => {
cspec.set_intense(true);
}
Style::NoIntense => {
cspec.set_intense(false);
}
Style::Underline => {
cspec.set_underline(true);
}
Style::NoUnderline => {
cspec.set_underline(false);
}
},
}
}
}
@@ -319,7 +315,10 @@ impl FromStr for UserColorSpec {
let otype: OutType = pieces[0].parse()?;
match pieces[1].parse()? {
SpecType::None => {
Ok(UserColorSpec { ty: otype, value: SpecValue::None })
Ok(UserColorSpec {
ty: otype,
value: SpecValue::None,
})
}
SpecType::Style => {
if pieces.len() < 3 {
@@ -332,16 +331,18 @@ impl FromStr for UserColorSpec {
if pieces.len() < 3 {
return Err(ColorError::InvalidFormat(s.to_string()));
}
let color: Color =
pieces[2].parse().map_err(ColorError::from_parse_error)?;
let color: Color = pieces[2]
.parse()
.map_err(ColorError::from_parse_error)?;
Ok(UserColorSpec { ty: otype, value: SpecValue::Fg(color) })
}
SpecType::Bg => {
if pieces.len() < 3 {
return Err(ColorError::InvalidFormat(s.to_string()));
}
let color: Color =
pieces[2].parse().map_err(ColorError::from_parse_error)?;
let color: Color = pieces[2]
.parse()
.map_err(ColorError::from_parse_error)?;
Ok(UserColorSpec { ty: otype, value: SpecValue::Bg(color) })
}
}

View File

@@ -4,8 +4,8 @@ use std::time::Instant;
use grep_matcher::{Match, Matcher};
use grep_searcher::{
Searcher, Sink, SinkContext, SinkContextKind, SinkError, SinkFinish,
SinkMatch,
Searcher,
Sink, SinkError, SinkContext, SinkContextKind, SinkFinish, SinkMatch,
};
use serde_json as json;
@@ -27,7 +27,11 @@ struct Config {
impl Default for Config {
fn default() -> Config {
Config { pretty: false, max_matches: None, always_begin_end: false }
Config {
pretty: false,
max_matches: None,
always_begin_end: false,
}
}
}
@@ -488,9 +492,8 @@ impl<W: io::Write> JSON<W> {
matcher: M,
path: &'p P,
) -> JSONSink<'p, 's, M, W>
where
M: Matcher,
P: ?Sized + AsRef<Path>,
where M: Matcher,
P: ?Sized + AsRef<Path>,
{
JSONSink {
matcher: matcher,
@@ -612,12 +615,10 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
// the extent that it's easy to ensure that we never do more than
// one search to find the matches.
let matches = &mut self.json.matches;
self.matcher
.find_iter(bytes, |m| {
matches.push(m);
true
})
.map_err(io::Error::error_message)?;
self.matcher.find_iter(bytes, |m| {
matches.push(m);
true
}).map_err(io::Error::error_message)?;
// Don't report empty matches appearing at the end of the bytes.
if !matches.is_empty()
&& matches.last().unwrap().is_empty()
@@ -649,7 +650,9 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
if self.begin_printed {
return Ok(());
}
let msg = jsont::Message::Begin(jsont::Begin { path: self.path });
let msg = jsont::Message::Begin(jsont::Begin {
path: self.path,
});
self.json.write_message(&msg)?;
self.begin_printed = true;
Ok(())
@@ -696,12 +699,13 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
self.after_context_remaining =
self.after_context_remaining.saturating_sub(1);
}
let submatches = if searcher.invert_match() {
self.record_matches(ctx.bytes())?;
SubMatches::new(ctx.bytes(), &self.json.matches)
} else {
SubMatches::empty()
};
let submatches =
if searcher.invert_match() {
self.record_matches(ctx.bytes())?;
SubMatches::new(ctx.bytes(), &self.json.matches)
} else {
SubMatches::empty()
};
let msg = jsont::Message::Context(jsont::Context {
path: self.path,
lines: ctx.bytes(),
@@ -713,7 +717,10 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
Ok(!self.should_quit())
}
fn begin(&mut self, _searcher: &Searcher) -> Result<bool, io::Error> {
fn begin(
&mut self,
_searcher: &Searcher,
) -> Result<bool, io::Error> {
self.json.wtr.reset_count();
self.start_time = Instant::now();
self.match_count = 0;
@@ -772,7 +779,7 @@ enum SubMatches<'a> {
impl<'a> SubMatches<'a> {
/// Create a new set of match ranges from a set of matches and the
/// corresponding bytes that those matches apply to.
fn new(bytes: &'a [u8], matches: &[Match]) -> SubMatches<'a> {
fn new(bytes: &'a[u8], matches: &[Match]) -> SubMatches<'a> {
if matches.len() == 1 {
let mat = matches[0];
SubMatches::Small([jsont::SubMatch {
@@ -810,11 +817,11 @@ impl<'a> SubMatches<'a> {
#[cfg(test)]
mod tests {
use grep_matcher::LineTerminator;
use grep_regex::{RegexMatcher, RegexMatcherBuilder};
use grep_matcher::LineTerminator;
use grep_searcher::SearcherBuilder;
use super::{JSONBuilder, JSON};
use super::{JSON, JSONBuilder};
const SHERLOCK: &'static [u8] = b"\
For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -825,7 +832,9 @@ but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.
";
fn printer_contents(printer: &mut JSON<Vec<u8>>) -> String {
fn printer_contents(
printer: &mut JSON<Vec<u8>>,
) -> String {
String::from_utf8(printer.get_mut().to_owned()).unwrap()
}
@@ -842,8 +851,11 @@ but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.\
";
let matcher = RegexMatcher::new(r"Watson").unwrap();
let mut printer = JSONBuilder::new().build(vec![]);
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = JSONBuilder::new()
.build(vec![]);
SearcherBuilder::new()
.binary_detection(BinaryDetection::quit(b'\x00'))
.heap_limit(Some(80))
@@ -859,9 +871,12 @@ and exhibited clearly, with a label attached.\
#[test]
fn max_matches() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let mut printer =
JSONBuilder::new().max_matches(Some(1)).build(vec![]);
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = JSONBuilder::new()
.max_matches(Some(1))
.build(vec![]);
SearcherBuilder::new()
.build()
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
@@ -873,8 +888,11 @@ and exhibited clearly, with a label attached.\
#[test]
fn no_match() {
let matcher = RegexMatcher::new(r"DOES NOT MATCH").unwrap();
let mut printer = JSONBuilder::new().build(vec![]);
let matcher = RegexMatcher::new(
r"DOES NOT MATCH"
).unwrap();
let mut printer = JSONBuilder::new()
.build(vec![]);
SearcherBuilder::new()
.build()
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
@@ -886,9 +904,12 @@ and exhibited clearly, with a label attached.\
#[test]
fn always_begin_end_no_match() {
let matcher = RegexMatcher::new(r"DOES NOT MATCH").unwrap();
let mut printer =
JSONBuilder::new().always_begin_end(true).build(vec![]);
let matcher = RegexMatcher::new(
r"DOES NOT MATCH"
).unwrap();
let mut printer = JSONBuilder::new()
.always_begin_end(true)
.build(vec![]);
SearcherBuilder::new()
.build()
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
@@ -903,8 +924,11 @@ and exhibited clearly, with a label attached.\
fn missing_crlf() {
let haystack = "test\r\n".as_bytes();
let matcher = RegexMatcherBuilder::new().build("test").unwrap();
let mut printer = JSONBuilder::new().build(vec![]);
let matcher = RegexMatcherBuilder::new()
.build("test")
.unwrap();
let mut printer = JSONBuilder::new()
.build(vec![]);
SearcherBuilder::new()
.build()
.search_reader(&matcher, haystack, printer.sink(&matcher))
@@ -917,9 +941,12 @@ and exhibited clearly, with a label attached.\
got.lines().nth(1).unwrap(),
);
let matcher =
RegexMatcherBuilder::new().crlf(true).build("test").unwrap();
let mut printer = JSONBuilder::new().build(vec![]);
let matcher = RegexMatcherBuilder::new()
.crlf(true)
.build("test")
.unwrap();
let mut printer = JSONBuilder::new()
.build(vec![]);
SearcherBuilder::new()
.line_terminator(LineTerminator::crlf())
.build()

View File

@@ -80,9 +80,7 @@ pub struct SubMatch<'a> {
#[derive(Clone, Debug, Hash, PartialEq, Eq, Serialize)]
#[serde(untagged)]
enum Data<'a> {
Text {
text: Cow<'a, str>,
},
Text { text: Cow<'a, str> },
Bytes {
#[serde(serialize_with = "to_base64")]
bytes: &'a [u8],
@@ -118,26 +116,32 @@ impl<'a> Data<'a> {
}
}
fn to_base64<T, S>(bytes: T, ser: S) -> Result<S::Ok, S::Error>
where
T: AsRef<[u8]>,
S: Serializer,
fn to_base64<T, S>(
bytes: T,
ser: S,
) -> Result<S::Ok, S::Error>
where T: AsRef<[u8]>,
S: Serializer
{
ser.serialize_str(&base64::encode(&bytes))
}
fn ser_bytes<T, S>(bytes: T, ser: S) -> Result<S::Ok, S::Error>
where
T: AsRef<[u8]>,
S: Serializer,
fn ser_bytes<T, S>(
bytes: T,
ser: S,
) -> Result<S::Ok, S::Error>
where T: AsRef<[u8]>,
S: Serializer
{
Data::from_bytes(bytes.as_ref()).serialize(ser)
}
fn ser_path<P, S>(path: &Option<P>, ser: S) -> Result<S::Ok, S::Error>
where
P: AsRef<Path>,
S: Serializer,
fn ser_path<P, S>(
path: &Option<P>,
ser: S,
) -> Result<S::Ok, S::Error>
where P: AsRef<Path>,
S: Serializer
{
path.as_ref().map(|p| Data::from_path(p.as_ref())).serialize(ser)
}

View File

@@ -84,9 +84,9 @@ extern crate serde_derive;
extern crate serde_json;
extern crate termcolor;
pub use color::{default_color_specs, ColorError, ColorSpecs, UserColorSpec};
pub use color::{ColorError, ColorSpecs, UserColorSpec, default_color_specs};
#[cfg(feature = "serde1")]
pub use json::{JSONBuilder, JSONSink, JSON};
pub use json::{JSON, JSONBuilder, JSONSink};
pub use standard::{Standard, StandardBuilder, StandardSink};
pub use stats::Stats;
pub use summary::{Summary, SummaryBuilder, SummaryKind, SummarySink};

View File

@@ -34,8 +34,8 @@ impl<'a> Add<&'a Stats> for Stats {
Stats {
elapsed: NiceDuration(self.elapsed.0 + rhs.elapsed.0),
searches: self.searches + rhs.searches,
searches_with_match: self.searches_with_match
+ rhs.searches_with_match,
searches_with_match:
self.searches_with_match + rhs.searches_with_match,
bytes_searched: self.bytes_searched + rhs.bytes_searched,
bytes_printed: self.bytes_printed + rhs.bytes_printed,
matched_lines: self.matched_lines + rhs.matched_lines,

View File

@@ -168,7 +168,10 @@ impl SummaryBuilder {
///
/// This is a convenience routine for
/// `SummaryBuilder::build(termcolor::NoColor::new(wtr))`.
pub fn build_no_color<W: io::Write>(&self, wtr: W) -> Summary<NoColor<W>> {
pub fn build_no_color<W: io::Write>(
&self,
wtr: W,
) -> Summary<NoColor<W>> {
self.build(NoColor::new(wtr))
}
@@ -199,9 +202,10 @@ impl SummaryBuilder {
/// This completely overrides any previous color specifications. This does
/// not add to any previously provided color specifications on this
/// builder.
///
/// The default color specifications provide no styling.
pub fn color_specs(&mut self, specs: ColorSpecs) -> &mut SummaryBuilder {
pub fn color_specs(
&mut self,
specs: ColorSpecs,
) -> &mut SummaryBuilder {
self.config.colors = specs;
self
}
@@ -252,8 +256,6 @@ impl SummaryBuilder {
/// If multi line search is enabled and a match spans multiple lines, then
/// that match is counted exactly once for the purposes of enforcing this
/// limit, regardless of how many lines it spans.
///
/// This is disabled by default.
pub fn max_matches(&mut self, limit: Option<u64>) -> &mut SummaryBuilder {
self.config.max_matches = limit;
self
@@ -264,8 +266,6 @@ impl SummaryBuilder {
/// When enabled and the mode is either `Count` or `CountMatches`, then
/// results are not printed if no matches were found. Otherwise, every
/// search prints a result with a possibly `0` number of matches.
///
/// This is enabled by default.
pub fn exclude_zero(&mut self, yes: bool) -> &mut SummaryBuilder {
self.config.exclude_zero = yes;
self
@@ -275,7 +275,10 @@ impl SummaryBuilder {
/// `CountMatches` modes.
///
/// By default, this is set to `:`.
pub fn separator_field(&mut self, sep: Vec<u8>) -> &mut SummaryBuilder {
pub fn separator_field(
&mut self,
sep: Vec<u8>,
) -> &mut SummaryBuilder {
self.config.separator_field = Arc::new(sep);
self
}
@@ -289,9 +292,10 @@ impl SummaryBuilder {
/// A typical use for this option is to permit cygwin users on Windows to
/// set the path separator to `/` instead of using the system default of
/// `\`.
///
/// This is disabled by default.
pub fn separator_path(&mut self, sep: Option<u8>) -> &mut SummaryBuilder {
pub fn separator_path(
&mut self,
sep: Option<u8>,
) -> &mut SummaryBuilder {
self.config.separator_path = sep;
self
}
@@ -370,11 +374,12 @@ impl<W: WriteColor> Summary<W> {
&'s mut self,
matcher: M,
) -> SummarySink<'static, 's, M, W> {
let stats = if self.config.stats || self.config.kind.requires_stats() {
Some(Stats::new())
} else {
None
};
let stats =
if self.config.stats || self.config.kind.requires_stats() {
Some(Stats::new())
} else {
None
};
SummarySink {
matcher: matcher,
summary: self,
@@ -395,22 +400,20 @@ impl<W: WriteColor> Summary<W> {
matcher: M,
path: &'p P,
) -> SummarySink<'p, 's, M, W>
where
M: Matcher,
P: ?Sized + AsRef<Path>,
where M: Matcher,
P: ?Sized + AsRef<Path>,
{
if !self.config.path && !self.config.kind.requires_path() {
return self.sink(matcher);
}
let stats = if self.config.stats || self.config.kind.requires_stats() {
Some(Stats::new())
} else {
None
};
let stats =
if self.config.stats || self.config.kind.requires_stats() {
Some(Stats::new())
} else {
None
};
let ppath = PrinterPath::with_separator(
path.as_ref(),
self.config.separator_path,
);
path.as_ref(), self.config.separator_path);
SummarySink {
matcher: matcher,
summary: self,
@@ -585,12 +588,10 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
self.match_count += 1;
if let Some(ref mut stats) = self.stats {
let mut match_count = 0;
self.matcher
.find_iter(mat.bytes(), |_| {
match_count += 1;
true
})
.map_err(io::Error::error_message)?;
self.matcher.find_iter(mat.bytes(), |_| {
match_count += 1;
true
}).map_err(io::Error::error_message)?;
stats.add_matches(match_count);
stats.add_matched_lines(mat.lines().count() as u64);
} else if self.summary.config.kind.quit_early() {
@@ -599,7 +600,10 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
Ok(!self.should_quit())
}
fn begin(&mut self, _searcher: &Searcher) -> Result<bool, io::Error> {
fn begin(
&mut self,
_searcher: &Searcher,
) -> Result<bool, io::Error> {
if self.path.is_none() && self.summary.config.kind.requires_path() {
return Err(io::Error::error_message(format!(
"output kind {:?} requires a file path",
@@ -632,37 +636,10 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
stats.add_bytes_searched(finish.byte_count());
stats.add_bytes_printed(self.summary.wtr.borrow().count());
}
// If our binary detection method says to quit after seeing binary
// data, then we shouldn't print any results at all, even if we've
// found a match before detecting binary data. The intent here is to
// keep BinaryDetection::quit as a form of filter. Otherwise, we can
// present a matching file with a smaller number of matches than
// there might be, which can be quite misleading.
//
// If our binary detection method is to convert binary data, then we
// don't quit and therefore search the entire contents of the file.
//
// There is an unfortunate inconsistency here. Namely, when using
// Quiet or PathWithMatch, then the printer can quit after the first
// match seen, which could be long before seeing binary data. This
// means that using PathWithMatch can print a path where as using
// Count might not print it at all because of binary data.
//
// It's not possible to fix this without also potentially significantly
// impacting the performance of Quiet or PathWithMatch, so we accept
// the bug.
if self.binary_byte_offset.is_some()
&& searcher.binary_detection().quit_byte().is_some()
{
// Squash the match count. The statistics reported will still
// contain the match count, but the "official" match count should
// be zero.
self.match_count = 0;
return Ok(());
}
let show_count =
!self.summary.config.exclude_zero || self.match_count > 0;
!self.summary.config.exclude_zero
|| self.match_count > 0;
match self.summary.config.kind {
SummaryKind::Count => {
if show_count {
@@ -673,8 +650,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
}
SummaryKind::CountMatches => {
if show_count {
let stats = self
.stats
let stats = self.stats
.as_ref()
.expect("CountMatches should enable stats tracking");
self.write_path_field()?;
@@ -704,7 +680,7 @@ mod tests {
use grep_searcher::SearcherBuilder;
use termcolor::NoColor;
use super::{Summary, SummaryBuilder, SummaryKind};
use super::{Summary, SummaryKind, SummaryBuilder};
const SHERLOCK: &'static [u8] = b"\
For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -715,41 +691,45 @@ but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.
";
fn printer_contents(printer: &mut Summary<NoColor<Vec<u8>>>) -> String {
fn printer_contents(
printer: &mut Summary<NoColor<Vec<u8>>>,
) -> String {
String::from_utf8(printer.get_mut().get_ref().to_owned()).unwrap()
}
#[test]
fn path_with_match_error() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::PathWithMatch)
.build_no_color(vec![]);
let res = SearcherBuilder::new().build().search_reader(
&matcher,
SHERLOCK,
printer.sink(&matcher),
);
let res = SearcherBuilder::new()
.build()
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher));
assert!(res.is_err());
}
#[test]
fn path_without_match_error() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::PathWithoutMatch)
.build_no_color(vec![]);
let res = SearcherBuilder::new().build().search_reader(
&matcher,
SHERLOCK,
printer.sink(&matcher),
);
let res = SearcherBuilder::new()
.build()
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher));
assert!(res.is_err());
}
#[test]
fn count_no_path() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.build_no_color(vec![]);
@@ -764,7 +744,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_no_path_even_with_path() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.path(false)
@@ -784,7 +766,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_path() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.build_no_color(vec![]);
@@ -803,7 +787,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_path_with_zero() {
let matcher = RegexMatcher::new(r"NO MATCH").unwrap();
let matcher = RegexMatcher::new(
r"NO MATCH"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.exclude_zero(false)
@@ -823,7 +809,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_path_without_zero() {
let matcher = RegexMatcher::new(r"NO MATCH").unwrap();
let matcher = RegexMatcher::new(
r"NO MATCH"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.exclude_zero(true)
@@ -843,7 +831,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_path_field_separator() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.separator_field(b"ZZ".to_vec())
@@ -863,7 +853,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_path_terminator() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.path_terminator(Some(b'\x00'))
@@ -883,7 +875,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_path_separator() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.separator_path(Some(b'\\'))
@@ -903,7 +897,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_max_matches() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.max_matches(Some(1))
@@ -919,7 +915,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_matches() {
let matcher = RegexMatcher::new(r"Watson|Sherlock").unwrap();
let matcher = RegexMatcher::new(
r"Watson|Sherlock"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::CountMatches)
.build_no_color(vec![]);
@@ -938,7 +936,9 @@ and exhibited clearly, with a label attached.
#[test]
fn path_with_match_found() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::PathWithMatch)
.build_no_color(vec![]);
@@ -957,7 +957,9 @@ and exhibited clearly, with a label attached.
#[test]
fn path_with_match_not_found() {
let matcher = RegexMatcher::new(r"ZZZZZZZZ").unwrap();
let matcher = RegexMatcher::new(
r"ZZZZZZZZ"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::PathWithMatch)
.build_no_color(vec![]);
@@ -974,9 +976,12 @@ and exhibited clearly, with a label attached.
assert_eq_printed!("", got);
}
#[test]
fn path_without_match_found() {
let matcher = RegexMatcher::new(r"ZZZZZZZZZ").unwrap();
let matcher = RegexMatcher::new(
r"ZZZZZZZZZ"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::PathWithoutMatch)
.build_no_color(vec![]);
@@ -995,7 +1000,9 @@ and exhibited clearly, with a label attached.
#[test]
fn path_without_match_not_found() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::PathWithoutMatch)
.build_no_color(vec![]);
@@ -1014,7 +1021,9 @@ and exhibited clearly, with a label attached.
#[test]
fn quiet() {
let matcher = RegexMatcher::new(r"Watson|Sherlock").unwrap();
let matcher = RegexMatcher::new(
r"Watson|Sherlock"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Quiet)
.build_no_color(vec![]);
@@ -1036,7 +1045,9 @@ and exhibited clearly, with a label attached.
#[test]
fn quiet_with_stats() {
let matcher = RegexMatcher::new(r"Watson|Sherlock").unwrap();
let matcher = RegexMatcher::new(
r"Watson|Sherlock"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Quiet)
.stats(true)

View File

@@ -4,10 +4,11 @@ use std::io;
use std::path::Path;
use std::time;
use bstr::{ByteSlice, ByteVec};
use bstr::{BStr, BString};
use grep_matcher::{Captures, LineTerminator, Match, Matcher};
use grep_searcher::{
LineIter, SinkContext, SinkContextKind, SinkError, SinkMatch,
LineIter,
SinkError, SinkContext, SinkContextKind, SinkMatch,
};
#[cfg(feature = "serde1")]
use serde::{Serialize, Serializer};
@@ -57,13 +58,19 @@ impl<M: Matcher> Replacer<M> {
replacement: &[u8],
) -> io::Result<()> {
{
let &mut Space { ref mut dst, ref mut caps, ref mut matches } =
self.allocate(matcher)?;
let &mut Space {
ref mut dst,
ref mut caps,
ref mut matches,
} = self.allocate(matcher)?;
dst.clear();
matches.clear();
matcher
.replace_with_captures(subject, caps, dst, |caps, dst| {
matcher.replace_with_captures(
subject,
caps,
dst,
|caps, dst| {
let start = dst.len();
caps.interpolate(
|name| matcher.capture_index(name),
@@ -74,8 +81,8 @@ impl<M: Matcher> Replacer<M> {
let end = dst.len();
matches.push(Match::new(start, end));
true
})
.map_err(io::Error::error_message)?;
},
).map_err(io::Error::error_message)?;
}
Ok(())
}
@@ -115,10 +122,14 @@ impl<M: Matcher> Replacer<M> {
/// matcher fails.
fn allocate(&mut self, matcher: &M) -> io::Result<&mut Space<M>> {
if self.space.is_none() {
let caps =
matcher.new_captures().map_err(io::Error::error_message)?;
self.space =
Some(Space { caps: caps, dst: vec![], matches: vec![] });
let caps = matcher
.new_captures()
.map_err(io::Error::error_message)?;
self.space = Some(Space {
caps: caps,
dst: vec![],
matches: vec![],
});
}
Ok(self.space.as_mut().unwrap())
}
@@ -165,8 +176,9 @@ impl<'a> Sunk<'a> {
original_matches: &'a [Match],
replacement: Option<(&'a [u8], &'a [Match])>,
) -> Sunk<'a> {
let (bytes, matches) =
replacement.unwrap_or_else(|| (sunk.bytes(), original_matches));
let (bytes, matches) = replacement.unwrap_or_else(|| {
(sunk.bytes(), original_matches)
});
Sunk {
bytes: bytes,
absolute_byte_offset: sunk.absolute_byte_offset(),
@@ -183,8 +195,9 @@ impl<'a> Sunk<'a> {
original_matches: &'a [Match],
replacement: Option<(&'a [u8], &'a [Match])>,
) -> Sunk<'a> {
let (bytes, matches) =
replacement.unwrap_or_else(|| (sunk.bytes(), original_matches));
let (bytes, matches) = replacement.unwrap_or_else(|| {
(sunk.bytes(), original_matches)
});
Sunk {
bytes: bytes,
absolute_byte_offset: sunk.absolute_byte_offset(),
@@ -250,12 +263,12 @@ impl<'a> Sunk<'a> {
/// portability with a small cost: on Windows, paths that are not valid UTF-16
/// will not roundtrip correctly.
#[derive(Clone, Debug)]
pub struct PrinterPath<'a>(Cow<'a, [u8]>);
pub struct PrinterPath<'a>(Cow<'a, BStr>);
impl<'a> PrinterPath<'a> {
/// Create a new path suitable for printing.
pub fn new(path: &'a Path) -> PrinterPath<'a> {
PrinterPath(Vec::from_path_lossy(path))
PrinterPath(BString::from_path_lossy(path))
}
/// Create a new printer path from the given path which can be efficiently
@@ -276,23 +289,19 @@ impl<'a> PrinterPath<'a> {
/// path separators that are both replaced by `new_sep`. In all other
/// environments, only `/` is treated as a path separator.
fn replace_separator(&mut self, new_sep: u8) {
let transformed_path: Vec<u8> = self
.0
.bytes()
.map(|b| {
if b == b'/' || (cfg!(windows) && b == b'\\') {
new_sep
} else {
b
}
})
.collect();
let transformed_path: BString = self.0.bytes().map(|b| {
if b == b'/' || (cfg!(windows) && b == b'\\') {
new_sep
} else {
b
}
}).collect();
self.0 = Cow::Owned(transformed_path);
}
/// Return the raw bytes for this path.
pub fn as_bytes(&self) -> &[u8] {
&self.0
self.0.as_bytes()
}
}
@@ -337,7 +346,7 @@ impl Serialize for NiceDuration {
///
/// This stops trimming a prefix as soon as it sees non-whitespace or a line
/// terminator.
pub fn trim_ascii_prefix(
pub fn trim_ascii_prefix_range(
line_term: LineTerminator,
slice: &[u8],
range: Match,
@@ -357,3 +366,14 @@ pub fn trim_ascii_prefix(
.count();
range.with_start(range.start() + count)
}
/// Trim prefix ASCII spaces from the given slice and return the corresponding
/// sub-slice.
pub fn trim_ascii_prefix(line_term: LineTerminator, slice: &[u8]) -> &[u8] {
let range = trim_ascii_prefix_range(
line_term,
slice,
Match::new(0, slice.len()),
);
&slice[range]
}

View File

@@ -1,22 +1,21 @@
[package]
name = "grep-regex"
version = "0.1.7" #:version
version = "0.1.2" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Use Rust's regex library with the 'grep' crate.
"""
documentation = "https://docs.rs/grep-regex"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/regex"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/regex"
homepage = "https://github.com/BurntSushi/ripgrep"
repository = "https://github.com/BurntSushi/ripgrep"
readme = "README.md"
keywords = ["regex", "grep", "search", "pattern", "line"]
license = "Unlicense/MIT"
[dependencies]
aho-corasick = "0.7.3"
bstr = "0.2.10"
grep-matcher = { version = "0.1.2", path = "../matcher" }
log = "0.4.5"
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
regex = "1.1"
regex-syntax = "0.6.5"
thread_local = "1"
thread_local = "0.3.6"
utf8-ranges = "1.0.1"

View File

@@ -1,5 +1,5 @@
use regex_syntax::ast::parse::Parser;
use regex_syntax::ast::{self, Ast};
use regex_syntax::ast::parse::Parser;
/// The results of analyzing AST of a regular expression (e.g., for supporting
/// smart case).

View File

@@ -1,13 +1,12 @@
use grep_matcher::{ByteSet, LineTerminator};
use regex::bytes::{Regex, RegexBuilder};
use regex_syntax::ast::{self, Ast};
use regex_syntax::hir::{self, Hir};
use regex_syntax::hir::Hir;
use ast::AstAnalysis;
use crlf::crlfify;
use error::Error;
use literal::LiteralSets;
use multi::alternation_literals;
use non_matching::non_matching_bytes;
use strip::strip_from_match;
@@ -51,8 +50,8 @@ impl Default for Config {
octal: false,
// These size limits are much bigger than what's in the regex
// crate.
size_limit: 100 * (1 << 20),
dfa_size_limit: 1000 * (1 << 20),
size_limit: 100 * (1<<20),
dfa_size_limit: 1000 * (1<<20),
nest_limit: 250,
line_terminator: None,
crlf: false,
@@ -68,17 +67,19 @@ impl Config {
/// If there was a problem parsing the given expression then an error
/// is returned.
pub fn hir(&self, pattern: &str) -> Result<ConfiguredHIR, Error> {
let ast = self.ast(pattern)?;
let analysis = self.analysis(&ast)?;
let expr = hir::translate::TranslatorBuilder::new()
let analysis = self.analysis(pattern)?;
let expr = ::regex_syntax::ParserBuilder::new()
.nest_limit(self.nest_limit)
.octal(self.octal)
.allow_invalid_utf8(true)
.case_insensitive(self.is_case_insensitive(&analysis))
.ignore_whitespace(self.ignore_whitespace)
.case_insensitive(self.is_case_insensitive(&analysis)?)
.multi_line(self.multi_line)
.dot_matches_new_line(self.dot_matches_new_line)
.swap_greed(self.swap_greed)
.unicode(self.unicode)
.build()
.translate(pattern, &ast)
.parse(pattern)
.map_err(Error::regex)?;
let expr = match self.line_terminator {
None => expr,
@@ -87,7 +88,7 @@ impl Config {
Ok(ConfiguredHIR {
original: pattern.to_string(),
config: self.clone(),
analysis,
analysis: analysis,
// If CRLF mode is enabled, replace `$` with `(?:\r?$)`.
expr: if self.crlf { crlfify(expr) } else { expr },
})
@@ -95,32 +96,24 @@ impl Config {
/// Accounting for the `smart_case` config knob, return true if and only if
/// this pattern should be matched case insensitively.
fn is_case_insensitive(&self, analysis: &AstAnalysis) -> bool {
fn is_case_insensitive(
&self,
analysis: &AstAnalysis,
) -> Result<bool, Error> {
if self.case_insensitive {
return true;
return Ok(true);
}
if !self.case_smart {
return false;
return Ok(false);
}
analysis.any_literal() && !analysis.any_uppercase()
}
/// Returns true if and only if this config is simple enough such that
/// if the pattern is a simple alternation of literals, then it can be
/// constructed via a plain Aho-Corasick automaton.
///
/// Note that it is OK to return true even when settings like `multi_line`
/// are enabled, since if multi-line can impact the match semantics of a
/// regex, then it is by definition not a simple alternation of literals.
pub fn can_plain_aho_corasick(&self) -> bool {
!self.word && !self.case_insensitive && !self.case_smart
Ok(analysis.any_literal() && !analysis.any_uppercase())
}
/// Perform analysis on the AST of this pattern.
///
/// This returns an error if the given pattern failed to parse.
fn analysis(&self, ast: &Ast) -> Result<AstAnalysis, Error> {
Ok(AstAnalysis::from_ast(ast))
fn analysis(&self, pattern: &str) -> Result<AstAnalysis, Error> {
Ok(AstAnalysis::from_ast(&self.ast(pattern)?))
}
/// Parse the given pattern into its abstract syntax.
@@ -180,15 +173,6 @@ impl ConfiguredHIR {
self.pattern_to_regex(&self.expr.to_string())
}
/// If this HIR corresponds to an alternation of literals with no
/// capturing groups, then this returns those literals.
pub fn alternation_literals(&self) -> Option<Vec<Vec<u8>>> {
if !self.config.can_plain_aho_corasick() {
return None;
}
alternation_literals(&self.expr)
}
/// Applies the given function to the concrete syntax of this HIR and then
/// generates a new HIR based on the result of the function in a way that
/// preserves the configuration.
@@ -198,7 +182,8 @@ impl ConfiguredHIR {
pub fn with_pattern<F: FnMut(&str) -> String>(
&self,
mut f: F,
) -> Result<ConfiguredHIR, Error> {
) -> Result<ConfiguredHIR, Error>
{
self.pattern_to_hir(&f(&self.expr.to_string()))
}
@@ -282,7 +267,7 @@ impl ConfiguredHIR {
original: self.original.clone(),
config: self.config.clone(),
analysis: self.analysis.clone(),
expr,
expr: expr,
})
}
}

View File

@@ -76,8 +76,7 @@ impl Matcher for CRLFMatcher {
caps: &mut RegexCaptures,
) -> Result<bool, NoError> {
caps.strip_crlf(false);
let r =
self.regex.captures_read_at(caps.locations_mut(), haystack, at);
let r = self.regex.captures_read_at(caps.locations(), haystack, at);
if !r.is_some() {
return Ok(false);
}
@@ -162,8 +161,8 @@ pub fn crlfify(expr: Hir) -> Hir {
#[cfg(test)]
mod tests {
use super::crlfify;
use regex_syntax::Parser;
use super::crlfify;
fn roundtrip(pattern: &str) -> String {
let expr1 = Parser::new().parse(pattern).unwrap();

View File

@@ -4,14 +4,13 @@ An implementation of `grep-matcher`'s `Matcher` trait for Rust's regex engine.
#![deny(missing_docs)]
extern crate aho_corasick;
extern crate bstr;
extern crate grep_matcher;
#[macro_use]
extern crate log;
extern crate regex;
extern crate regex_syntax;
extern crate thread_local;
extern crate utf8_ranges;
pub use error::{Error, ErrorKind};
pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
@@ -22,7 +21,6 @@ mod crlf;
mod error;
mod literal;
mod matcher;
mod multi;
mod non_matching;
mod strip;
mod util;

View File

@@ -5,9 +5,10 @@ the regex engine doesn't look for inner literals. Since we're doing line based
searching, we can use them, so we need to do it ourselves.
*/
use bstr::ByteSlice;
use regex_syntax::hir::literal::{Literal, Literals};
use std::cmp;
use regex_syntax::hir::{self, Hir, HirKind};
use regex_syntax::hir::literal::{Literal, Literals};
use util;
@@ -36,7 +37,7 @@ impl LiteralSets {
LiteralSets {
prefixes: Literals::prefixes(expr),
suffixes: Literals::suffixes(expr),
required,
required: required,
}
}
@@ -100,12 +101,7 @@ impl LiteralSets {
// helps with case insensitive matching, which can generate lots of
// inner required literals.
let any_empty = req_lits.iter().any(|lit| lit.is_empty());
let any_white = has_only_whitespace(&req_lits);
if req.len() > lit.len()
&& req_lits.len() > 1
&& !any_empty
&& !any_white
{
if req.len() > lit.len() && req_lits.len() > 1 && !any_empty {
debug!("required literals found: {:?}", req_lits);
let alts: Vec<String> = req_lits
.into_iter()
@@ -114,74 +110,9 @@ impl LiteralSets {
// We're matching raw bytes, so disable Unicode mode.
Some(format!("(?-u:{})", alts.join("|")))
} else if lit.is_empty() {
// If we're here, then we have no LCP. No LCS. And no detected
// inner required literals. In theory this shouldn't happen, but
// the inner literal detector isn't as nice as we hope and doens't
// actually support returning a set of alternating required
// literals. (Instead, it only returns a set where EVERY literal
// in it is required. It cannot currently express "either P or Q
// is required.")
//
// In this case, it is possible that we still have meaningful
// prefixes or suffixes to use. So we look for the set of literals
// with the highest minimum length and use that to build our "fast"
// regex.
//
// This manifests in fairly common scenarios. e.g.,
//
// rg -w 'foo|bar|baz|quux'
//
// Normally, without the `-w`, the regex engine itself would
// detect the prefix correctly. Unfortunately, the `-w` option
// turns the regex into something like this:
//
// rg '(^|\W)(foo|bar|baz|quux)($|\W)'
//
// Which will defeat all prefix and suffix literal optimizations.
// (Not in theory---it could be better. But the current
// implementation isn't good enough.) ... So we make up for it
// here.
if !word {
return None;
}
let p_min_len = self.prefixes.min_len();
let s_min_len = self.suffixes.min_len();
let lits = match (p_min_len, s_min_len) {
(None, None) => return None,
(Some(_), None) => {
debug!("prefix literals found");
self.prefixes.literals()
}
(None, Some(_)) => {
debug!("suffix literals found");
self.suffixes.literals()
}
(Some(p), Some(s)) => {
if p >= s {
debug!("prefix literals found");
self.prefixes.literals()
} else {
debug!("suffix literals found");
self.suffixes.literals()
}
}
};
debug!("prefix/suffix literals found: {:?}", lits);
if has_only_whitespace(lits) {
debug!("dropping literals because one was whitespace");
return None;
}
let alts: Vec<String> =
lits.into_iter().map(|x| util::bytes_to_regex(x)).collect();
// We're matching raw bytes, so disable Unicode mode.
Some(format!("(?-u:{})", alts.join("|")))
None
} else {
debug!("required literal found: {:?}", util::show_bytes(lit));
if lit.chars().all(|c| c.is_whitespace()) {
debug!("dropping literal because one was whitespace");
return None;
}
Some(format!("(?-u:{})", util::bytes_to_regex(&lit)))
}
}
@@ -209,28 +140,25 @@ fn union_required(expr: &Hir, lits: &mut Literals) {
HirKind::Group(hir::Group { ref hir, .. }) => {
union_required(&**hir, lits);
}
HirKind::Repetition(ref x) => match x.kind {
hir::RepetitionKind::ZeroOrOne => lits.cut(),
hir::RepetitionKind::ZeroOrMore => lits.cut(),
hir::RepetitionKind::OneOrMore => {
union_required(&x.hir, lits);
HirKind::Repetition(ref x) => {
match x.kind {
hir::RepetitionKind::ZeroOrOne => lits.cut(),
hir::RepetitionKind::ZeroOrMore => lits.cut(),
hir::RepetitionKind::OneOrMore => {
union_required(&x.hir, lits);
lits.cut();
}
hir::RepetitionKind::Range(ref rng) => {
let (min, max) = match *rng {
hir::RepetitionRange::Exactly(m) => (m, Some(m)),
hir::RepetitionRange::AtLeast(m) => (m, None),
hir::RepetitionRange::Bounded(m, n) => (m, Some(n)),
};
repeat_range_literals(
&x.hir, min, max, x.greedy, lits, union_required);
}
}
hir::RepetitionKind::Range(ref rng) => {
let (min, max) = match *rng {
hir::RepetitionRange::Exactly(m) => (m, Some(m)),
hir::RepetitionRange::AtLeast(m) => (m, None),
hir::RepetitionRange::Bounded(m, n) => (m, Some(n)),
};
repeat_range_literals(
&x.hir,
min,
max,
x.greedy,
lits,
union_required,
);
}
},
}
HirKind::Concat(ref es) if es.is_empty() => {}
HirKind::Concat(ref es) if es.len() == 1 => {
union_required(&es[0], lits)
@@ -265,7 +193,7 @@ fn union_required(expr: &Hir, lits: &mut Literals) {
fn repeat_range_literals<F: FnMut(&Hir, &mut Literals)>(
e: &Hir,
min: u32,
_max: Option<u32>,
max: Option<u32>,
_greedy: bool,
lits: &mut Literals,
mut f: F,
@@ -276,13 +204,19 @@ fn repeat_range_literals<F: FnMut(&Hir, &mut Literals)>(
// just treat it as `e*`.
lits.cut();
} else {
let n = cmp::min(lits.limit_size(), min as usize);
// We only extract literals from a single repetition, even though
// we could do more. e.g., `a{3}` will have `a` extracted instead of
// `aaa`. The reason is that inner literal extraction can't be unioned
// across repetitions. e.g., extracting `foofoofoo` from `(\w+foo){3}`
// is wrong.
f(e, lits);
lits.cut();
if n < min as usize {
lits.cut();
}
if max.map_or(true, |max| min < max) {
lits.cut();
}
}
}
@@ -329,9 +263,9 @@ fn is_simple(expr: &Hir) -> bool {
| HirKind::Repetition(_)
| HirKind::Concat(_)
| HirKind::Alternation(_) => true,
HirKind::Anchor(_) | HirKind::WordBoundary(_) | HirKind::Group(_) => {
false
}
HirKind::Anchor(_)
| HirKind::WordBoundary(_)
| HirKind::Group(_) => false,
}
}
@@ -345,21 +279,10 @@ fn count_byte_class(cls: &hir::ClassBytes) -> u32 {
cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum()
}
/// Returns true if and only if any of the literals in the given set is
/// entirely whitespace.
fn has_only_whitespace(lits: &[Literal]) -> bool {
for lit in lits {
if lit.chars().all(|c| c.is_whitespace()) {
return true;
}
}
false
}
#[cfg(test)]
mod tests {
use super::LiteralSets;
use regex_syntax::Parser;
use super::LiteralSets;
fn sets(pattern: &str) -> LiteralSets {
let hir = Parser::new().parse(pattern).unwrap();
@@ -405,15 +328,4 @@ mod tests {
// assert_eq!(one_regex(r"a.*c"), pat("a"));
assert_eq!(one_regex(r"a(.*c)"), pat("a"));
}
#[test]
fn regression_1319() {
// Regression from:
// https://github.com/BurntSushi/ripgrep/issues/1319
assert_eq!(
one_regex(r"TTGAGTCCAGGAG[ATCG]{2}C"),
pat("TTGAGTCCAGGAGA|TTGAGTCCAGGAGC|\
TTGAGTCCAGGAGG|TTGAGTCCAGGAGT")
);
}
}

View File

@@ -1,14 +1,13 @@
use std::collections::HashMap;
use grep_matcher::{
ByteSet, Captures, LineMatchKind, LineTerminator, Match, Matcher, NoError,
Captures, LineMatchKind, LineTerminator, Match, Matcher, NoError, ByteSet,
};
use regex::bytes::{CaptureLocations, Regex};
use config::{Config, ConfiguredHIR};
use crlf::CRLFMatcher;
use error::Error;
use multi::MultiLiteralMatcher;
use word::WordMatcher;
/// A builder for constructing a `Matcher` using regular expressions.
@@ -34,7 +33,9 @@ impl Default for RegexMatcherBuilder {
impl RegexMatcherBuilder {
/// Create a new builder for configuring a regex matcher.
pub fn new() -> RegexMatcherBuilder {
RegexMatcherBuilder { config: Config::default() }
RegexMatcherBuilder {
config: Config::default(),
}
}
/// Build a new matcher using the current configuration for the provided
@@ -47,60 +48,16 @@ impl RegexMatcherBuilder {
let fast_line_regex = chir.fast_line_regex()?;
let non_matching_bytes = chir.non_matching_bytes();
if let Some(ref re) = fast_line_regex {
debug!("extracted fast line regex: {:?}", re);
trace!("extracted fast line regex: {:?}", re);
}
let matcher = RegexMatcherImpl::new(&chir)?;
trace!("final regex: {:?}", matcher.regex());
trace!("final regex: {:?}", matcher.regex().to_string());
Ok(RegexMatcher {
config: self.config.clone(),
matcher,
fast_line_regex,
non_matching_bytes,
})
}
/// Build a new matcher from a plain alternation of literals.
///
/// Depending on the configuration set by the builder, this may be able to
/// build a matcher substantially faster than by joining the patterns with
/// a `|` and calling `build`.
pub fn build_literals<B: AsRef<str>>(
&self,
literals: &[B],
) -> Result<RegexMatcher, Error> {
let mut has_escape = false;
let mut slices = vec![];
for lit in literals {
slices.push(lit.as_ref());
has_escape = has_escape || lit.as_ref().contains('\\');
}
// Even when we have a fixed set of literals, we might still want to
// use the regex engine. Specifically, if any string has an escape
// in it, then we probably can't feed it to Aho-Corasick without
// removing the escape. Additionally, if there are any particular
// special match semantics we need to honor, that Aho-Corasick isn't
// enough. Finally, the regex engine can do really well with a small
// number of literals (at time of writing, this is changing soon), so
// we use it when there's a small set.
//
// Yes, this is one giant hack. Ideally, this entirely separate literal
// matcher that uses Aho-Corasick would be pushed down into the regex
// engine.
if has_escape
|| !self.config.can_plain_aho_corasick()
|| literals.len() < 40
{
return self.build(&slices.join("|"));
}
let matcher = MultiLiteralMatcher::new(&slices)?;
let imp = RegexMatcherImpl::MultiLiteral(matcher);
Ok(RegexMatcher {
config: self.config.clone(),
matcher: imp,
fast_line_regex: None,
non_matching_bytes: ByteSet::empty(),
matcher: matcher,
fast_line_regex: fast_line_regex,
non_matching_bytes: non_matching_bytes,
})
}
@@ -380,7 +337,9 @@ impl RegexMatcher {
/// given pattern contains a literal `\n`. Other uses of `\n` (such as in
/// `\s`) are removed transparently.
pub fn new_line_matcher(pattern: &str) -> Result<RegexMatcher, Error> {
RegexMatcherBuilder::new().line_terminator(Some(b'\n')).build(pattern)
RegexMatcherBuilder::new()
.line_terminator(Some(b'\n'))
.build(pattern)
}
}
@@ -389,8 +348,6 @@ impl RegexMatcher {
enum RegexMatcherImpl {
/// The standard matcher used for all regular expressions.
Standard(StandardMatcher),
/// A matcher for an alternation of plain literals.
MultiLiteral(MultiLiteralMatcher),
/// A matcher that strips `\r` from the end of matches.
///
/// This is only used when the CRLF hack is enabled and the regex is line
@@ -413,23 +370,16 @@ impl RegexMatcherImpl {
} else if expr.needs_crlf_stripped() {
Ok(RegexMatcherImpl::CRLF(CRLFMatcher::new(expr)?))
} else {
if let Some(lits) = expr.alternation_literals() {
if lits.len() >= 40 {
let matcher = MultiLiteralMatcher::new(&lits)?;
return Ok(RegexMatcherImpl::MultiLiteral(matcher));
}
}
Ok(RegexMatcherImpl::Standard(StandardMatcher::new(expr)?))
}
}
/// Return the underlying regex object used.
fn regex(&self) -> String {
fn regex(&self) -> &Regex {
match *self {
RegexMatcherImpl::Word(ref x) => x.regex().to_string(),
RegexMatcherImpl::CRLF(ref x) => x.regex().to_string(),
RegexMatcherImpl::MultiLiteral(_) => "<N/A>".to_string(),
RegexMatcherImpl::Standard(ref x) => x.regex.to_string(),
RegexMatcherImpl::Word(ref x) => x.regex(),
RegexMatcherImpl::CRLF(ref x) => x.regex(),
RegexMatcherImpl::Standard(ref x) => &x.regex,
}
}
}
@@ -449,7 +399,6 @@ impl Matcher for RegexMatcher {
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.find_at(haystack, at),
MultiLiteral(ref m) => m.find_at(haystack, at),
CRLF(ref m) => m.find_at(haystack, at),
Word(ref m) => m.find_at(haystack, at),
}
@@ -459,7 +408,6 @@ impl Matcher for RegexMatcher {
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.new_captures(),
MultiLiteral(ref m) => m.new_captures(),
CRLF(ref m) => m.new_captures(),
Word(ref m) => m.new_captures(),
}
@@ -469,7 +417,6 @@ impl Matcher for RegexMatcher {
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.capture_count(),
MultiLiteral(ref m) => m.capture_count(),
CRLF(ref m) => m.capture_count(),
Word(ref m) => m.capture_count(),
}
@@ -479,7 +426,6 @@ impl Matcher for RegexMatcher {
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.capture_index(name),
MultiLiteral(ref m) => m.capture_index(name),
CRLF(ref m) => m.capture_index(name),
Word(ref m) => m.capture_index(name),
}
@@ -489,20 +435,21 @@ impl Matcher for RegexMatcher {
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.find(haystack),
MultiLiteral(ref m) => m.find(haystack),
CRLF(ref m) => m.find(haystack),
Word(ref m) => m.find(haystack),
}
}
fn find_iter<F>(&self, haystack: &[u8], matched: F) -> Result<(), NoError>
where
F: FnMut(Match) -> bool,
fn find_iter<F>(
&self,
haystack: &[u8],
matched: F,
) -> Result<(), NoError>
where F: FnMut(Match) -> bool
{
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.find_iter(haystack, matched),
MultiLiteral(ref m) => m.find_iter(haystack, matched),
CRLF(ref m) => m.find_iter(haystack, matched),
Word(ref m) => m.find_iter(haystack, matched),
}
@@ -513,13 +460,11 @@ impl Matcher for RegexMatcher {
haystack: &[u8],
matched: F,
) -> Result<Result<(), E>, NoError>
where
F: FnMut(Match) -> Result<bool, E>,
where F: FnMut(Match) -> Result<bool, E>
{
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.try_find_iter(haystack, matched),
MultiLiteral(ref m) => m.try_find_iter(haystack, matched),
CRLF(ref m) => m.try_find_iter(haystack, matched),
Word(ref m) => m.try_find_iter(haystack, matched),
}
@@ -533,7 +478,6 @@ impl Matcher for RegexMatcher {
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.captures(haystack, caps),
MultiLiteral(ref m) => m.captures(haystack, caps),
CRLF(ref m) => m.captures(haystack, caps),
Word(ref m) => m.captures(haystack, caps),
}
@@ -545,13 +489,11 @@ impl Matcher for RegexMatcher {
caps: &mut RegexCaptures,
matched: F,
) -> Result<(), NoError>
where
F: FnMut(&RegexCaptures) -> bool,
where F: FnMut(&RegexCaptures) -> bool
{
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.captures_iter(haystack, caps, matched),
MultiLiteral(ref m) => m.captures_iter(haystack, caps, matched),
CRLF(ref m) => m.captures_iter(haystack, caps, matched),
Word(ref m) => m.captures_iter(haystack, caps, matched),
}
@@ -563,15 +505,11 @@ impl Matcher for RegexMatcher {
caps: &mut RegexCaptures,
matched: F,
) -> Result<Result<(), E>, NoError>
where
F: FnMut(&RegexCaptures) -> Result<bool, E>,
where F: FnMut(&RegexCaptures) -> Result<bool, E>
{
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.try_captures_iter(haystack, caps, matched),
MultiLiteral(ref m) => {
m.try_captures_iter(haystack, caps, matched)
}
CRLF(ref m) => m.try_captures_iter(haystack, caps, matched),
Word(ref m) => m.try_captures_iter(haystack, caps, matched),
}
@@ -586,7 +524,6 @@ impl Matcher for RegexMatcher {
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.captures_at(haystack, at, caps),
MultiLiteral(ref m) => m.captures_at(haystack, at, caps),
CRLF(ref m) => m.captures_at(haystack, at, caps),
Word(ref m) => m.captures_at(haystack, at, caps),
}
@@ -598,13 +535,11 @@ impl Matcher for RegexMatcher {
dst: &mut Vec<u8>,
append: F,
) -> Result<(), NoError>
where
F: FnMut(Match, &mut Vec<u8>) -> bool,
where F: FnMut(Match, &mut Vec<u8>) -> bool
{
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.replace(haystack, dst, append),
MultiLiteral(ref m) => m.replace(haystack, dst, append),
CRLF(ref m) => m.replace(haystack, dst, append),
Word(ref m) => m.replace(haystack, dst, append),
}
@@ -617,17 +552,13 @@ impl Matcher for RegexMatcher {
dst: &mut Vec<u8>,
append: F,
) -> Result<(), NoError>
where
F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
where F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool
{
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => {
m.replace_with_captures(haystack, caps, dst, append)
}
MultiLiteral(ref m) => {
m.replace_with_captures(haystack, caps, dst, append)
}
CRLF(ref m) => {
m.replace_with_captures(haystack, caps, dst, append)
}
@@ -641,7 +572,6 @@ impl Matcher for RegexMatcher {
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.is_match(haystack),
MultiLiteral(ref m) => m.is_match(haystack),
CRLF(ref m) => m.is_match(haystack),
Word(ref m) => m.is_match(haystack),
}
@@ -655,7 +585,6 @@ impl Matcher for RegexMatcher {
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.is_match_at(haystack, at),
MultiLiteral(ref m) => m.is_match_at(haystack, at),
CRLF(ref m) => m.is_match_at(haystack, at),
Word(ref m) => m.is_match_at(haystack, at),
}
@@ -668,7 +597,6 @@ impl Matcher for RegexMatcher {
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.shortest_match(haystack),
MultiLiteral(ref m) => m.shortest_match(haystack),
CRLF(ref m) => m.shortest_match(haystack),
Word(ref m) => m.shortest_match(haystack),
}
@@ -682,7 +610,6 @@ impl Matcher for RegexMatcher {
use self::RegexMatcherImpl::*;
match self.matcher {
Standard(ref m) => m.shortest_match_at(haystack, at),
MultiLiteral(ref m) => m.shortest_match_at(haystack, at),
CRLF(ref m) => m.shortest_match_at(haystack, at),
Word(ref m) => m.shortest_match_at(haystack, at),
}
@@ -743,8 +670,7 @@ impl Matcher for StandardMatcher {
haystack: &[u8],
at: usize,
) -> Result<Option<Match>, NoError> {
Ok(self
.regex
Ok(self.regex
.find_at(haystack, at)
.map(|m| Match::new(m.start(), m.end())))
}
@@ -766,8 +692,7 @@ impl Matcher for StandardMatcher {
haystack: &[u8],
mut matched: F,
) -> Result<Result<(), E>, NoError>
where
F: FnMut(Match) -> Result<bool, E>,
where F: FnMut(Match) -> Result<bool, E>
{
for m in self.regex.find_iter(haystack) {
match matched(Match::new(m.start(), m.end())) {
@@ -785,10 +710,7 @@ impl Matcher for StandardMatcher {
at: usize,
caps: &mut RegexCaptures,
) -> Result<bool, NoError> {
Ok(self
.regex
.captures_read_at(&mut caps.locations_mut(), haystack, at)
.is_some())
Ok(self.regex.captures_read_at(&mut caps.locs, haystack, at).is_some())
}
fn shortest_match_at(
@@ -815,84 +737,54 @@ impl Matcher for StandardMatcher {
/// index of the group using the corresponding matcher's `capture_index`
/// method, and then use that index with `RegexCaptures::get`.
#[derive(Clone, Debug)]
pub struct RegexCaptures(RegexCapturesImp);
#[derive(Clone, Debug)]
enum RegexCapturesImp {
AhoCorasick {
/// The start and end of the match, corresponding to capture group 0.
mat: Option<Match>,
},
Regex {
/// Where the locations are stored.
locs: CaptureLocations,
/// These captures behave as if the capturing groups begin at the given
/// offset. When set to `0`, this has no affect and capture groups are
/// indexed like normal.
///
/// This is useful when building matchers that wrap arbitrary regular
/// expressions. For example, `WordMatcher` takes an existing regex
/// `re` and creates `(?:^|\W)(re)(?:$|\W)`, but hides the fact that
/// the regex has been wrapped from the caller. In order to do this,
/// the matcher and the capturing groups must behave as if `(re)` is
/// the `0`th capture group.
offset: usize,
/// When enable, the end of a match has `\r` stripped from it, if one
/// exists.
strip_crlf: bool,
},
pub struct RegexCaptures {
/// Where the locations are stored.
locs: CaptureLocations,
/// These captures behave as if the capturing groups begin at the given
/// offset. When set to `0`, this has no affect and capture groups are
/// indexed like normal.
///
/// This is useful when building matchers that wrap arbitrary regular
/// expressions. For example, `WordMatcher` takes an existing regex `re`
/// and creates `(?:^|\W)(re)(?:$|\W)`, but hides the fact that the regex
/// has been wrapped from the caller. In order to do this, the matcher
/// and the capturing groups must behave as if `(re)` is the `0`th capture
/// group.
offset: usize,
/// When enable, the end of a match has `\r` stripped from it, if one
/// exists.
strip_crlf: bool,
}
impl Captures for RegexCaptures {
fn len(&self) -> usize {
match self.0 {
RegexCapturesImp::AhoCorasick { .. } => 1,
RegexCapturesImp::Regex { ref locs, offset, .. } => {
locs.len().checked_sub(offset).unwrap()
}
}
self.locs.len().checked_sub(self.offset).unwrap()
}
fn get(&self, i: usize) -> Option<Match> {
match self.0 {
RegexCapturesImp::AhoCorasick { mat, .. } => {
if i == 0 {
mat
} else {
None
}
}
RegexCapturesImp::Regex { ref locs, offset, strip_crlf } => {
if !strip_crlf {
let actual = i.checked_add(offset).unwrap();
return locs.pos(actual).map(|(s, e)| Match::new(s, e));
}
if !self.strip_crlf {
let actual = i.checked_add(self.offset).unwrap();
return self.locs.pos(actual).map(|(s, e)| Match::new(s, e));
}
// currently don't support capture offsetting with CRLF
// stripping
assert_eq!(offset, 0);
let m = match locs.pos(i).map(|(s, e)| Match::new(s, e)) {
None => return None,
Some(m) => m,
};
// If the end position of this match corresponds to the end
// position of the overall match, then we apply our CRLF
// stripping. Otherwise, we cannot assume stripping is correct.
if i == 0 || m.end() == locs.pos(0).unwrap().1 {
Some(m.with_end(m.end() - 1))
} else {
Some(m)
}
}
// currently don't support capture offsetting with CRLF stripping
assert_eq!(self.offset, 0);
let m = match self.locs.pos(i).map(|(s, e)| Match::new(s, e)) {
None => return None,
Some(m) => m,
};
// If the end position of this match corresponds to the end position
// of the overall match, then we apply our CRLF stripping. Otherwise,
// we cannot assume stripping is correct.
if i == 0 || m.end() == self.locs.pos(0).unwrap().1 {
Some(m.with_end(m.end() - 1))
} else {
Some(m)
}
}
}
impl RegexCaptures {
pub(crate) fn simple() -> RegexCaptures {
RegexCaptures(RegexCapturesImp::AhoCorasick { mat: None })
}
pub(crate) fn new(locs: CaptureLocations) -> RegexCaptures {
RegexCaptures::with_offset(locs, 0)
}
@@ -901,69 +793,37 @@ impl RegexCaptures {
locs: CaptureLocations,
offset: usize,
) -> RegexCaptures {
RegexCaptures(RegexCapturesImp::Regex {
locs,
offset,
strip_crlf: false,
})
RegexCaptures { locs, offset, strip_crlf: false }
}
pub(crate) fn locations(&self) -> &CaptureLocations {
match self.0 {
RegexCapturesImp::AhoCorasick { .. } => {
panic!("getting locations for simple captures is invalid")
}
RegexCapturesImp::Regex { ref locs, .. } => locs,
}
}
pub(crate) fn locations_mut(&mut self) -> &mut CaptureLocations {
match self.0 {
RegexCapturesImp::AhoCorasick { .. } => {
panic!("getting locations for simple captures is invalid")
}
RegexCapturesImp::Regex { ref mut locs, .. } => locs,
}
pub(crate) fn locations(&mut self) -> &mut CaptureLocations {
&mut self.locs
}
pub(crate) fn strip_crlf(&mut self, yes: bool) {
match self.0 {
RegexCapturesImp::AhoCorasick { .. } => {
panic!("setting strip_crlf for simple captures is invalid")
}
RegexCapturesImp::Regex { ref mut strip_crlf, .. } => {
*strip_crlf = yes;
}
}
}
pub(crate) fn set_simple(&mut self, one: Option<Match>) {
match self.0 {
RegexCapturesImp::AhoCorasick { ref mut mat } => {
*mat = one;
}
RegexCapturesImp::Regex { .. } => {
panic!("setting simple captures for regex is invalid")
}
}
self.strip_crlf = yes;
}
}
#[cfg(test)]
mod tests {
use super::*;
use grep_matcher::{LineMatchKind, Matcher};
use super::*;
// Test that enabling word matches does the right thing and demonstrate
// the difference between it and surrounding the regex in `\b`.
#[test]
fn word() {
let matcher =
RegexMatcherBuilder::new().word(true).build(r"-2").unwrap();
let matcher = RegexMatcherBuilder::new()
.word(true)
.build(r"-2")
.unwrap();
assert!(matcher.is_match(b"abc -2 foo").unwrap());
let matcher =
RegexMatcherBuilder::new().word(false).build(r"\b-2\b").unwrap();
let matcher = RegexMatcherBuilder::new()
.word(false)
.build(r"\b-2\b")
.unwrap();
assert!(!matcher.is_match(b"abc -2 foo").unwrap());
}
@@ -972,7 +832,9 @@ mod tests {
#[test]
fn line_terminator() {
// This works, because there's no line terminator specified.
let matcher = RegexMatcherBuilder::new().build(r"abc\sxyz").unwrap();
let matcher = RegexMatcherBuilder::new()
.build(r"abc\sxyz")
.unwrap();
assert!(matcher.is_match(b"abc\nxyz").unwrap());
// This doesn't.
@@ -1022,12 +884,16 @@ mod tests {
// Test that smart case works.
#[test]
fn case_smart() {
let matcher =
RegexMatcherBuilder::new().case_smart(true).build(r"abc").unwrap();
let matcher = RegexMatcherBuilder::new()
.case_smart(true)
.build(r"abc")
.unwrap();
assert!(matcher.is_match(b"ABC").unwrap());
let matcher =
RegexMatcherBuilder::new().case_smart(true).build(r"aBc").unwrap();
let matcher = RegexMatcherBuilder::new()
.case_smart(true)
.build(r"aBc")
.unwrap();
assert!(!matcher.is_match(b"ABC").unwrap());
}
@@ -1049,7 +915,9 @@ mod tests {
// With no line terminator set, we can't employ any optimizations,
// so we get a confirmed match.
let matcher = RegexMatcherBuilder::new().build(r"\wfoo\s").unwrap();
let matcher = RegexMatcherBuilder::new()
.build(r"\wfoo\s")
.unwrap();
let m = matcher.find_candidate_line(b"afoo ").unwrap().unwrap();
assert!(is_confirmed(m));

View File

@@ -1,6 +1,6 @@
use grep_matcher::ByteSet;
use regex_syntax::hir::{self, Hir, HirKind};
use regex_syntax::utf8::Utf8Sequences;
use utf8_ranges::Utf8Sequences;
/// Return a confirmed set of non-matching bytes from the given expression.
pub fn non_matching_bytes(expr: &Hir) -> ByteSet {
@@ -11,9 +11,14 @@ pub fn non_matching_bytes(expr: &Hir) -> ByteSet {
/// Remove any bytes from the given set that can occur in a matched produced by
/// the given expression.
fn remove_matching_bytes(expr: &Hir, set: &mut ByteSet) {
fn remove_matching_bytes(
expr: &Hir,
set: &mut ByteSet,
) {
match *expr.kind() {
HirKind::Empty | HirKind::Anchor(_) | HirKind::WordBoundary(_) => {}
HirKind::Empty
| HirKind::Anchor(_)
| HirKind::WordBoundary(_) => {}
HirKind::Literal(hir::Literal::Unicode(c)) => {
for &b in c.encode_utf8(&mut [0; 4]).as_bytes() {
set.remove(b);
@@ -100,20 +105,15 @@ mod tests {
#[test]
fn dot() {
assert_eq!(
sparse(&extract(".")),
vec![
b'\n', 192, 193, 245, 246, 247, 248, 249, 250, 251, 252, 253,
254, 255,
]
);
assert_eq!(
sparse(&extract("(?s).")),
vec![
192, 193, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254,
255,
]
);
assert_eq!(sparse(&extract(".")), vec![
b'\n',
192, 193, 245, 246, 247, 248, 249,
250, 251, 252, 253, 254, 255,
]);
assert_eq!(sparse(&extract("(?s).")), vec![
192, 193, 245, 246, 247, 248, 249,
250, 251, 252, 253, 254, 255,
]);
assert_eq!(sparse(&extract("(?-u).")), vec![b'\n']);
assert_eq!(sparse(&extract("(?s-u).")), vec![]);
}

View File

@@ -33,7 +33,10 @@ pub fn strip_from_match(
/// The implementation of strip_from_match. The given byte must be ASCII. This
/// function panics otherwise.
fn strip_from_match_ascii(expr: Hir, byte: u8) -> Result<Hir, Error> {
fn strip_from_match_ascii(
expr: Hir,
byte: u8,
) -> Result<Hir, Error> {
assert!(byte <= 0x7F);
let chr = byte as char;
assert_eq!(chr.len_utf8(), 1);
@@ -85,15 +88,13 @@ fn strip_from_match_ascii(expr: Hir, byte: u8) -> Result<Hir, Error> {
Hir::group(x)
}
HirKind::Concat(xs) => {
let xs = xs
.into_iter()
let xs = xs.into_iter()
.map(|e| strip_from_match_ascii(e, byte))
.collect::<Result<Vec<Hir>, Error>>()?;
Hir::concat(xs)
}
HirKind::Alternation(xs) => {
let xs = xs
.into_iter()
let xs = xs.into_iter()
.map(|e| strip_from_match_ascii(e, byte))
.collect::<Result<Vec<Hir>, Error>>()?;
Hir::alternation(xs)
@@ -105,8 +106,8 @@ fn strip_from_match_ascii(expr: Hir, byte: u8) -> Result<Hir, Error> {
mod tests {
use regex_syntax::Parser;
use super::{strip_from_match, LineTerminator};
use error::Error;
use super::{LineTerminator, strip_from_match};
fn roundtrip(pattern: &str, byte: u8) -> String {
roundtrip_line_term(pattern, LineTerminator::byte(byte)).unwrap()

Some files were not shown because too many files have changed in this diff Show More