Compare commits

..

5 Commits

Author SHA1 Message Date
Andrew Gallant
16f0fa6aa6 TRASH: ci: test release build
We test the release process in Travis by setting TRAVIS_TAG explicitly.
We test the release process in Appveyor by switching to a branch test.
2018-09-07 13:30:24 -04:00
Andrew Gallant
b602dbd294 ci: clean up appveyor
Remove some outdated comments and unused config. Also, make the regex for
matching tags a bit more specific.
2018-09-07 13:29:06 -04:00
Andrew Gallant
011aabe477 ci: remove 'branch' condition for deployment
Travis docs[1] say this is ignore when 'tags' is used.

[1] - https://docs.travis-ci.com/user/deployment/#conditional-releases-with-on
2018-09-07 13:29:05 -04:00
Andrew Gallant
c12acd7396 deb: add completions
This commit adds Bash, zsh and fish completions to the Debian binary
package.

Fixes #1032
2018-09-07 13:29:05 -04:00
Andrew Gallant
71fb43e51e deps: update versions for all crates
I don't think every change here is needed, but this ensures we're using
the latest version of every direct dependency.
2018-09-07 13:29:05 -04:00
173 changed files with 5000 additions and 10163 deletions

View File

@@ -1,6 +0,0 @@
blank_issues_enabled: true
contact_links:
- name: Ask a question
about: |
You've come to seek help or want to discuss something related to ripgrep.
url: https://github.com/BurntSushi/ripgrep/discussions/new

View File

@@ -1,17 +0,0 @@
---
name: Feature request
about: Suggest a new feature for ripgrep
title: ''
labels: ''
assignees: ''
---
#### Describe your feature request
Please describe the behavior you want and the motivation. Please also provide
examples of how ripgrep would be used if your feature request were added.
If you're not sure what to write here, then try imagining what the ideal
documentation of your new feature would look like in ripgrep's man page. Then
try to write it.

View File

@@ -1,198 +0,0 @@
name: ci
on:
pull_request:
push:
branches:
- master
schedule:
- cron: '00 01 * * *'
jobs:
test:
name: test
env:
# For some builds, we use cross to test on 32-bit and big-endian
# systems.
CARGO: cargo
# When CARGO is set to CROSS, this is set to `--target matrix.target`.
TARGET_FLAGS:
# When CARGO is set to CROSS, TARGET_DIR includes matrix.target.
TARGET_DIR: ./target
# Emit backtraces on panics.
RUST_BACKTRACE: 1
runs-on: ${{ matrix.os }}
strategy:
matrix:
build:
# We test ripgrep on a pinned version of Rust, along with the moving
# targets of 'stable' and 'beta' for good measure.
- pinned
- stable
- beta
# Our release builds are generated by a nightly compiler to take
# advantage of the latest optimizations/compile time improvements. So
# we test all of them here. (We don't do mips releases, but test on
# mips for big-endian coverage.)
- nightly
- nightly-musl
- nightly-32
- nightly-mips
- nightly-arm
- macos
- win-msvc
- win-gnu
include:
- build: pinned
os: ubuntu-18.04
rust: 1.41.0
- build: stable
os: ubuntu-18.04
rust: stable
- build: beta
os: ubuntu-18.04
rust: beta
- build: nightly
os: ubuntu-18.04
rust: nightly
- build: nightly-musl
os: ubuntu-18.04
rust: nightly
target: x86_64-unknown-linux-musl
- build: nightly-32
os: ubuntu-18.04
rust: nightly
target: i686-unknown-linux-gnu
- build: nightly-mips
os: ubuntu-18.04
rust: nightly
target: mips64-unknown-linux-gnuabi64
- build: nightly-arm
os: ubuntu-18.04
rust: nightly
# For stripping release binaries:
# docker run --rm -v $PWD/target:/target:Z \
# rustembedded/cross:arm-unknown-linux-gnueabihf \
# arm-linux-gnueabihf-strip \
# /target/arm-unknown-linux-gnueabihf/debug/rg
target: arm-unknown-linux-gnueabihf
- build: macos
os: macos-latest
rust: nightly
- build: win-msvc
os: windows-2019
rust: nightly
- build: win-gnu
os: windows-2019
rust: nightly-x86_64-gnu
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Install packages (Ubuntu)
if: matrix.os == 'ubuntu-18.04'
run: |
ci/ubuntu-install-packages
- name: Install packages (macOS)
if: matrix.os == 'macos-latest'
run: |
ci/macos-install-packages
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: ${{ matrix.rust }}
profile: minimal
override: true
- name: Use Cross
if: matrix.target != ''
run: |
# FIXME: to work around bugs in latest cross release, install master.
# See: https://github.com/rust-embedded/cross/issues/357
cargo install --git https://github.com/rust-embedded/cross
echo "::set-env name=CARGO::cross"
echo "::set-env name=TARGET_FLAGS::--target ${{ matrix.target }}"
echo "::set-env name=TARGET_DIR::./target/${{ matrix.target }}"
- name: Show command used for Cargo
run: |
echo "cargo command is: ${{ env.CARGO }}"
echo "target flag is: ${{ env.TARGET_FLAGS }}"
- name: Build ripgrep and all crates
run: ${{ env.CARGO }} build --verbose --all ${{ env.TARGET_FLAGS }}
- name: Build ripgrep with PCRE2
run: ${{ env.CARGO }} build --verbose --all --features pcre2 ${{ env.TARGET_FLAGS }}
# This is useful for debugging problems when the expected build artifacts
# (like shell completions and man pages) aren't generated.
- name: Show build.rs stderr
shell: bash
run: |
set +x
stderr="$(find "${{ env.TARGET_DIR }}/debug" -name stderr -print0 | xargs -0 ls -t | head -n1)"
if [ -s "$stderr" ]; then
echo "===== $stderr ===== "
cat "$stderr"
echo "====="
fi
set -x
- name: Run tests with PCRE2 (sans cross)
if: matrix.target == ''
run: ${{ env.CARGO }} test --verbose --all --features pcre2 ${{ env.TARGET_FLAGS }}
- name: Run tests without PCRE2 (with cross)
# These tests should actually work, but they almost double the runtime.
# Every integration test spins up qemu to run 'rg', and when PCRE2 is
# enabled, every integration test is run twice: one with the default
# regex engine and once with PCRE2.
if: matrix.target != ''
run: ${{ env.CARGO }} test --verbose --all ${{ env.TARGET_FLAGS }}
- name: Test for existence of build artifacts (Windows)
if: matrix.os == 'windows-2019'
shell: bash
run: |
outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")"
ls "$outdir/_rg.ps1" && file "$outdir/_rg.ps1"
- name: Test for existence of build artifacts (Unix)
if: matrix.os != 'windows-2019'
shell: bash
run: |
outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")"
# TODO: Check for the man page generation here. For whatever reason,
# it seems to be intermittently failing in CI. No idea why.
# for f in rg.bash rg.fish rg.1; do
for f in rg.bash rg.fish; do
# We could use file -E here, but it isn't supported on macOS.
ls "$outdir/$f" && file "$outdir/$f"
done
- name: Test zsh shell completions (Unix, sans cross)
# We could test this when using Cross, but we'd have to execute the
# 'rg' binary (done in test-complete) with qemu, which is a pain and
# doesn't really gain us much. If shell completion works in one place,
# it probably works everywhere.
if: matrix.target == '' && matrix.os != 'windows-2019'
shell: bash
run: ci/test-complete
rustfmt:
name: rustfmt
runs-on: ubuntu-18.04
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: stable
override: true
profile: minimal
components: rustfmt
- name: Check formatting
run: |
cargo fmt --all -- --check

View File

@@ -1,211 +0,0 @@
# The way this works is a little weird. But basically, the create-release job
# runs purely to initialize the GitHub release itself. Once done, the upload
# URL of the release is saved as an artifact.
#
# The build-release job runs only once create-release is finished. It gets
# the release upload URL by downloading the corresponding artifact (which was
# uploaded by create-release). It then builds the release executables for each
# supported platform and attaches them as release assets to the previously
# created release.
#
# The key here is that we create the release only once.
name: release
on:
push:
# Enable when testing release infrastructure on a branch.
# branches:
# - ag/release
tags:
- '[0-9]+.[0-9]+.[0-9]+'
jobs:
create-release:
name: create-release
runs-on: ubuntu-latest
# env:
# Set to force version number, e.g., when no tag exists.
# RG_VERSION: TEST-0.0.0
steps:
- name: Create artifacts directory
run: mkdir artifacts
- name: Get the release version from the tag
if: env.RG_VERSION == ''
run: |
# Apparently, this is the right way to get a tag name. Really?
#
# See: https://github.community/t5/GitHub-Actions/How-to-get-just-the-tag-name/m-p/32167/highlight/true#M1027
echo "::set-env name=RG_VERSION::${GITHUB_REF#refs/tags/}"
echo "version is: ${{ env.RG_VERSION }}"
- name: Create GitHub release
id: release
uses: actions/create-release@v1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
tag_name: ${{ env.RG_VERSION }}
release_name: ${{ env.RG_VERSION }}
- name: Save release upload URL to artifact
run: echo "${{ steps.release.outputs.upload_url }}" > artifacts/release-upload-url
- name: Save version number to artifact
run: echo "${{ env.RG_VERSION }}" > artifacts/release-version
- name: Upload artifacts
uses: actions/upload-artifact@v1
with:
name: artifacts
path: artifacts
build-release:
name: build-release
needs: ['create-release']
runs-on: ${{ matrix.os }}
env:
# For some builds, we use cross to test on 32-bit and big-endian
# systems.
CARGO: cargo
# When CARGO is set to CROSS, this is set to `--target matrix.target`.
TARGET_FLAGS:
# When CARGO is set to CROSS, TARGET_DIR includes matrix.target.
TARGET_DIR: ./target
# Emit backtraces on panics.
RUST_BACKTRACE: 1
# Build static releases with PCRE2.
PCRE2_SYS_STATIC: 1
strategy:
matrix:
build: [linux, linux-arm, macos, win-msvc, win-gnu, win32-msvc]
include:
- build: linux
os: ubuntu-18.04
rust: nightly
target: x86_64-unknown-linux-musl
- build: linux-arm
os: ubuntu-18.04
rust: nightly
target: arm-unknown-linux-gnueabihf
- build: macos
os: macos-latest
rust: nightly
target: x86_64-apple-darwin
- build: win-msvc
os: windows-2019
rust: nightly
target: x86_64-pc-windows-msvc
- build: win-gnu
os: windows-2019
rust: nightly-x86_64-gnu
target: x86_64-pc-windows-gnu
- build: win32-msvc
os: windows-2019
rust: nightly
target: i686-pc-windows-msvc
steps:
- name: Checkout repository
uses: actions/checkout@v2
with:
fetch-depth: 1
- name: Install packages (Ubuntu)
if: matrix.os == 'ubuntu-18.04'
run: |
ci/ubuntu-install-packages
- name: Install packages (macOS)
if: matrix.os == 'macos-latest'
run: |
ci/macos-install-packages
- name: Install Rust
uses: actions-rs/toolchain@v1
with:
toolchain: ${{ matrix.rust }}
profile: minimal
override: true
target: ${{ matrix.target }}
- name: Use Cross
# if: matrix.os != 'windows-2019'
run: |
# FIXME: to work around bugs in latest cross release, install master.
# See: https://github.com/rust-embedded/cross/issues/357
cargo install --git https://github.com/rust-embedded/cross
echo "::set-env name=CARGO::cross"
echo "::set-env name=TARGET_FLAGS::--target ${{ matrix.target }}"
echo "::set-env name=TARGET_DIR::./target/${{ matrix.target }}"
- name: Show command used for Cargo
run: |
echo "cargo command is: ${{ env.CARGO }}"
echo "target flag is: ${{ env.TARGET_FLAGS }}"
echo "target dir is: ${{ env.TARGET_DIR }}"
- name: Get release download URL
uses: actions/download-artifact@v1
with:
name: artifacts
path: artifacts
- name: Set release upload URL and release version
shell: bash
run: |
release_upload_url="$(cat artifacts/release-upload-url)"
echo "::set-env name=RELEASE_UPLOAD_URL::$release_upload_url"
echo "release upload url: $RELEASE_UPLOAD_URL"
release_version="$(cat artifacts/release-version)"
echo "::set-env name=RELEASE_VERSION::$release_version"
echo "release version: $RELEASE_VERSION"
- name: Build release binary
run: ${{ env.CARGO }} build --verbose --release --features pcre2 ${{ env.TARGET_FLAGS }}
- name: Strip release binary (linux and macos)
if: matrix.build == 'linux' || matrix.build == 'macos'
run: strip "target/${{ matrix.target }}/release/rg"
- name: Strip release binary (arm)
if: matrix.build == 'linux-arm'
run: |
docker run --rm -v \
"$PWD/target:/target:Z" \
rustembedded/cross:arm-unknown-linux-gnueabihf \
arm-linux-gnueabihf-strip \
/target/arm-unknown-linux-gnueabihf/release/rg
- name: Build archive
shell: bash
run: |
outdir="$(ci/cargo-out-dir "${{ env.TARGET_DIR }}")"
staging="ripgrep-${{ env.RELEASE_VERSION }}-${{ matrix.target }}"
mkdir -p "$staging"/{complete,doc}
cp {README.md,COPYING,UNLICENSE,LICENSE-MIT} "$staging/"
cp {CHANGELOG.md,FAQ.md,GUIDE.md} "$staging/doc/"
cp "$outdir"/{rg.bash,rg.fish,_rg.ps1} "$staging/complete/"
cp complete/_rg "$staging/complete/"
if [ "${{ matrix.os }}" = "windows-2019" ]; then
cp "target/${{ matrix.target }}/release/rg.exe" "$staging/"
7z a "$staging.zip" "$staging"
echo "::set-env name=ASSET::$staging.zip"
else
# The man page is only generated on Unix systems. ¯\_(ツ)_/¯
cp "$outdir"/rg.1 "$staging/doc/"
cp "target/${{ matrix.target }}/release/rg" "$staging/"
tar czf "$staging.tar.gz" "$staging"
echo "::set-env name=ASSET::$staging.tar.gz"
fi
- name: Upload release archive
uses: actions/upload-release-asset@v1.0.1
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
upload_url: ${{ env.RELEASE_UPLOAD_URL }}
asset_path: ${{ env.ASSET }}
asset_name: ${{ env.ASSET }}
asset_content_type: application/octet-stream

110
.travis.yml Normal file
View File

@@ -0,0 +1,110 @@
language: rust
env:
global:
- PROJECT_NAME: ripgrep
- RUST_BACKTRACE: full
- TRAVIS_TAG: testrelease
addons:
apt:
packages:
# For generating man page.
- libxslt1-dev
- asciidoc
- docbook-xsl
- xsltproc
- libxml2-utils
# Needed for completion-function test.
- zsh
# Needed for testing decompression search.
- xz-utils
- liblz4-tool
# For building MUSL static builds on Linux.
- musl-tools
matrix:
fast_finish: true
include:
# Nightly channel.
# All *nix releases are done on the nightly channel to take advantage
# of the regex library's multiple pattern SIMD search.
- os: linux
rust: nightly
env: TARGET=i686-unknown-linux-musl
- os: linux
rust: nightly
env: TARGET=x86_64-unknown-linux-musl
- os: osx
rust: nightly
# XML_CATALOG_FILES is apparently necessary for asciidoc on macOS.
env: TARGET=x86_64-apple-darwin XML_CATALOG_FILES=/usr/local/etc/xml/catalog
- os: linux
rust: nightly
env: TARGET=arm-unknown-linux-gnueabihf GCC_VERSION=4.8
addons:
apt:
packages:
- gcc-4.8-arm-linux-gnueabihf
- binutils-arm-linux-gnueabihf
- libc6-armhf-cross
- libc6-dev-armhf-cross
# For generating man page.
- libxslt1-dev
- asciidoc
- docbook-xsl
- xsltproc
- libxml2-utils
# Beta channel. We enable these to make sure there are no regressions in
# Rust beta releases.
- os: linux
rust: beta
env: TARGET=x86_64-unknown-linux-musl
- os: linux
rust: beta
env: TARGET=x86_64-unknown-linux-gnu
# Minimum Rust supported channel. We enable these to make sure ripgrep
# continues to work on the advertised minimum Rust version.
- os: linux
rust: 1.28.0
env: TARGET=x86_64-unknown-linux-gnu
- os: linux
rust: 1.28.0
env: TARGET=x86_64-unknown-linux-musl
- os: linux
rust: 1.28.0
env: TARGET=arm-unknown-linux-gnueabihf GCC_VERSION=4.8
addons:
apt:
packages:
- gcc-4.8-arm-linux-gnueabihf
- binutils-arm-linux-gnueabihf
- libc6-armhf-cross
- libc6-dev-armhf-cross
# For generating man page.
- libxslt1-dev
- asciidoc
- docbook-xsl
- xsltproc
- libxml2-utils
install: ci/install.sh
script: ci/script.sh
before_deploy: ci/before_deploy.sh
deploy:
provider: releases
file_glob: true
file: deployment/${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}.tar.gz
skip_cleanup: true
on:
condition: $TRAVIS_RUST_VERSION = nightly
tags: true
api_key:
secure: "IbSnsbGkxSydR/sozOf1/SRvHplzwRUHzcTjM7BKnr7GccL86gRPUrsrvD103KjQUGWIc1TnK1YTq5M0Onswg/ORDjqa1JEJPkPdPnVh9ipbF7M2De/7IlB4X4qXLKoApn8+bx2x/mfYXu4G+G1/2QdbaKK2yfXZKyjz0YFx+6CNrVCT2Nk8q7aHvOOzAL58vsG8iPDpupuhxlMDDn/UhyOWVInmPPQ0iJR1ZUJN8xJwXvKvBbfp3AhaBiAzkhXHNLgBR8QC5noWWMXnuVDMY3k4f3ic0V+p/qGUCN/nhptuceLxKFicMCYObSZeUzE5RAI0/OBW7l3z2iCoc+TbAnn+JrX/ObJCfzgAOXAU3tLaBFMiqQPGFKjKg1ltSYXomOFP/F7zALjpvFp4lYTBajRR+O3dqaxA9UQuRjw27vOeUpMcga4ZzL4VXFHzrxZKBHN//XIGjYAVhJ1NSSeGpeJV5/+jYzzWKfwSagRxQyVCzMooYFFXzn8Yxdm3PJlmp3GaAogNkdB9qKcrEvRINCelalzALPi0hD/HUDi8DD2PNTCLLMo6VSYtvc685Zbe+KgNzDV1YyTrRCUW6JotrS0r2ULLwnsh40hSB//nNv3XmwNmC/CmW5QAnIGj8cBMF4S2t6ohADIndojdAfNiptmaZOIT6owK7bWMgPMyopo="
branches:
only:
# Pushes and PR to the master branch
- master
- ag/prepare-0.10.0
# Ruby regex to match tags. Required, or travis won't trigger deploys when
# a new tag is pushed.
- /^\d+\.\d+\.\d+.*$/
notifications:
email:
on_success: never

View File

@@ -1,313 +1,5 @@
TBD
===
ripgrep 12.1.0 is a small minor version release that mostly includes bug fixes
and documentation improvements.
**Notices for downstream ripgrep package maintainers:**
* Fish shell completions will be removed in the ripgrep 13 release.
See [#1577](https://github.com/BurntSushi/ripgrep/issues/1577)
for more details.
* ripgrep has switched from `a2x` to `asciidoctor` to generate the man page.
If `asciidoctor` is not present, then ripgrep will currently fall back to
`a2x`. Support for `a2x` will be dropped in the ripgrep 13 release.
See [#1544](https://github.com/BurntSushi/ripgrep/issues/1544)
for more details.
Feature enhancements:
* [FEATURE #1547](https://github.com/BurntSushi/ripgrep/pull/1547):
Support decompressing `.Z` files via `uncompress`.
Bug fixes:
* [BUG #1252](https://github.com/BurntSushi/ripgrep/issues/1252):
Add a section on the `--pre` flag to the GUIDE.
* [BUG #1339](https://github.com/BurntSushi/ripgrep/issues/1339):
Improve error message when a pattern with invalid UTF-8 is provided.
* [BUG #1524](https://github.com/BurntSushi/ripgrep/issues/1524):
Note how to escape a `$` when using `--replace`.
* [BUG #1537](https://github.com/BurntSushi/ripgrep/issues/1537):
Fix match bug caused by inner literal optimization.
* [BUG #1544](https://github.com/BurntSushi/ripgrep/issues/1544):
ripgrep now uses `asciidoctor` instead of `a2x` to generate its man page.
* [BUG #1550](https://github.com/BurntSushi/ripgrep/issues/1550):
Substantially reduce peak memory usage when searching wide directories.
* [BUG #1571](https://github.com/BurntSushi/ripgrep/issues/1571):
Add note about configuration files in `--type-{add,clear}` docs.
* [BUG #1573](https://github.com/BurntSushi/ripgrep/issues/1573):
Fix incorrect `--count-matches` output when using look-around.
12.0.1 (2020-03-29)
===================
ripgrep 12.0.1 is a small patch release that includes a minor bug fix relating
to superfluous error messages when searching git repositories with sub-modules.
This was a regression introduced in the 12.0.0 release.
Bug fixes:
* [BUG #1520](https://github.com/BurntSushi/ripgrep/issues/1520):
Don't emit spurious error messages in git repositories with submodules.
12.0.0 (2020-03-15)
===================
ripgrep 12 is a new major version release of ripgrep that contains many bug
fixes, several important performance improvements and a few minor new features.
In a near future release, I am hoping to add an
[indexing feature](https://github.com/BurntSushi/ripgrep/issues/1497)
to ripgrep, which will dramatically speed up searching by building an index.
Feedback would very much be appreciated, especially on the user experience
which will be difficult to get right.
This release has no known breaking changes.
Deprecations:
* The `--no-pcre2-unicode` flag is deprecated. Instead, use the `--no-unicode`
flag, which applies to both the default regex engine and PCRE2. For now,
`--no-pcre2-unicode` and `--pcre2-unicode` are aliases to `--no-unicode`
and `--unicode`, respectively. The `--[no-]pcre2-unicode` flags may be
removed in a future release.
* The `--auto-hybrid-regex` flag is deprecated. Instead, use the new `--engine`
flag with the `auto` value.
Performance improvements:
* [PERF #1087](https://github.com/BurntSushi/ripgrep/pull/1087):
ripgrep is smarter when detected literals are whitespace.
* [PERF #1381](https://github.com/BurntSushi/ripgrep/pull/1381):
Directory traversal is sped up with speculative ignore-file existence checks.
* [PERF cd8ec38a](https://github.com/BurntSushi/ripgrep/commit/cd8ec38a):
Improve inner literal detection to cover more cases more effectively.
e.g., ` +Sherlock Holmes +` now has ` Sherlock Holmes ` extracted instead
of ` `.
* [PERF 6a0e0147](https://github.com/BurntSushi/ripgrep/commit/6a0e0147):
Improve literal detection when the `-w/--word-regexp` flag is used.
* [PERF ad97e9c9](https://github.com/BurntSushi/ripgrep/commit/ad97e9c9):
Improve overall performance of the `-w/--word-regexp` flag.
Feature enhancements:
* Added or improved file type filtering for erb, diff, Gradle, HAML, Org,
Postscript, Skim, Slim, Slime, RPM Spec files, Typoscript, xml.
* [FEATURE #1370](https://github.com/BurntSushi/ripgrep/pull/1370):
Add `--include-zero` flag that shows files searched without matches.
* [FEATURE #1390](https://github.com/BurntSushi/ripgrep/pull/1390):
Add `--no-context-separator` flag that always hides context separators.
* [FEATURE #1414](https://github.com/BurntSushi/ripgrep/pull/1414):
Add `--no-require-git` flag to allow ripgrep to respect gitignores anywhere.
* [FEATURE #1420](https://github.com/BurntSushi/ripgrep/pull/1420):
Add `--no-ignore-exclude` to disregard rules in `.git/info/exclude` files.
* [FEATURE #1466](https://github.com/BurntSushi/ripgrep/pull/1466):
Add `--no-ignore-files` flag to disable all `--ignore-file` flags.
* [FEATURE #1488](https://github.com/BurntSushi/ripgrep/pull/1488):
Add '--engine' flag for easier switching between regex engines.
* [FEATURE 75cbe88f](https://github.com/BurntSushi/ripgrep/commit/75cbe88f):
Add `--no-unicode` flag. This works on all supported regex engines.
Bug fixes:
* [BUG #1291](https://github.com/BurntSushi/ripgrep/issues/1291):
ripgrep now works in non-existent directories.
* [BUG #1319](https://github.com/BurntSushi/ripgrep/issues/1319):
Fix match bug due to errant literal detection.
* [**BUG #1335**](https://github.com/BurntSushi/ripgrep/issues/1335):
Fixes a performance bug when searching plain text files with very long lines.
This was a serious performance regression in some cases.
* [BUG #1344](https://github.com/BurntSushi/ripgrep/issues/1344):
Document usage of `--type all`.
* [BUG #1389](https://github.com/BurntSushi/ripgrep/issues/1389):
Fixes a bug where ripgrep would panic when searching a symlinked directory.
* [BUG #1439](https://github.com/BurntSushi/ripgrep/issues/1439):
Improve documentation for ripgrep's automatic stdin detection.
* [BUG #1441](https://github.com/BurntSushi/ripgrep/issues/1441):
Remove CPU features from man page.
* [BUG #1442](https://github.com/BurntSushi/ripgrep/issues/1442),
[BUG #1478](https://github.com/BurntSushi/ripgrep/issues/1478):
Improve documentation of the `-g/--glob` flag.
* [BUG #1445](https://github.com/BurntSushi/ripgrep/issues/1445):
ripgrep now respects ignore rules from .git/info/exclude in worktrees.
* [BUG #1485](https://github.com/BurntSushi/ripgrep/issues/1485):
Fish shell completions from the release Debian package are now installed to
`/usr/share/fish/vendor_completions.d/rg.fish`.
11.0.2 (2019-08-01)
===================
ripgrep 11.0.2 is a new patch release that fixes a few bugs, including a
performance regression and a matching bug when using the `-F/--fixed-strings`
flag.
Feature enhancements:
* [FEATURE #1293](https://github.com/BurntSushi/ripgrep/issues/1293):
Added `--glob-case-insensitive` flag that makes `--glob` behave as `--iglob`.
Bug fixes:
* [BUG #1246](https://github.com/BurntSushi/ripgrep/issues/1246):
Add translations to README, starting with an unofficial Chinese translation.
* [BUG #1259](https://github.com/BurntSushi/ripgrep/issues/1259):
Fix bug where the last byte of a `-f file` was stripped if it wasn't a `\n`.
* [BUG #1261](https://github.com/BurntSushi/ripgrep/issues/1261):
Document that no error is reported when searching for `\n` with `-P/--pcre2`.
* [BUG #1284](https://github.com/BurntSushi/ripgrep/issues/1284):
Mention `.ignore` and `.rgignore` more prominently in the README.
* [BUG #1292](https://github.com/BurntSushi/ripgrep/issues/1292):
Fix bug where `--with-filename` was sometimes enabled incorrectly.
* [BUG #1268](https://github.com/BurntSushi/ripgrep/issues/1268):
Fix major performance regression in GitHub `x86_64-linux` binary release.
* [BUG #1302](https://github.com/BurntSushi/ripgrep/issues/1302):
Show better error messages when a non-existent preprocessor command is given.
* [BUG #1334](https://github.com/BurntSushi/ripgrep/issues/1334):
Fix match regression with `-F` flag when patterns contain meta characters.
11.0.1 (2019-04-16)
===================
ripgrep 11.0.1 is a new patch release that fixes a search regression introduced
in the previous 11.0.0 release. In particular, ripgrep can enter an infinite
loop for some search patterns when searching invalid UTF-8.
Bug fixes:
* [BUG #1247](https://github.com/BurntSushi/ripgrep/issues/1247):
Fix search bug that can cause ripgrep to enter an infinite loop.
11.0.0 (2019-04-15)
===================
ripgrep 11 is a new major version release of ripgrep that contains many bug
fixes, some performance improvements and a few feature enhancements. Notably,
ripgrep's user experience for binary file filtering has been improved. See the
[guide's new section on binary data](GUIDE.md#binary-data) for more details.
This release also marks a change in ripgrep's versioning. Where as the previous
version was `0.10.0`, this version is `11.0.0`. Moving forward, ripgrep's
major version will be increased a few times per year. ripgrep will continue to
be conservative with respect to backwards compatibility, but may occasionally
introduce breaking changes, which will always be documented in this CHANGELOG.
See [issue 1172](https://github.com/BurntSushi/ripgrep/issues/1172) for a bit
more detail on why this versioning change was made.
This release increases the **minimum supported Rust version** from 1.28.0 to
1.34.0.
**BREAKING CHANGES**:
* ripgrep has tweaked its exit status codes to be more like GNU grep's. Namely,
if a non-fatal error occurs during a search, then ripgrep will now always
emit a `2` exit status code, regardless of whether a match is found or not.
Previously, ripgrep would only emit a `2` exit status code for a catastrophic
error (e.g., regex syntax error). One exception to this is if ripgrep is run
with `-q/--quiet`. In that case, if an error occurs and a match is found,
then ripgrep will exit with a `0` exit status code.
* Supplying the `-u/--unrestricted` flag three times is now equivalent to
supplying `--no-ignore --hidden --binary`. Previously, `-uuu` was equivalent
to `--no-ignore --hidden --text`. The difference is that `--binary` disables
binary file filtering without potentially dumping binary data into your
terminal. That is, `rg -uuu foo` should now be equivalent to `grep -r foo`.
* The `avx-accel` feature of ripgrep has been removed since it is no longer
necessary. All uses of AVX in ripgrep are now enabled automatically via
runtime CPU feature detection. The `simd-accel` feature does remain available
(only for enabling SIMD for transcoding), however, it does increase
compilation times substantially at the moment.
Performance improvements:
* [PERF #497](https://github.com/BurntSushi/ripgrep/issues/497),
[PERF #838](https://github.com/BurntSushi/ripgrep/issues/838):
Make `rg -F -f dictionary-of-literals` much faster.
Feature enhancements:
* Added or improved file type filtering for Apache Thrift, ASP, Bazel, Brotli,
BuildStream, bzip2, C, C++, Cython, gzip, Java, Make, Postscript, QML, Tex,
XML, xz, zig and zstd.
* [FEATURE #855](https://github.com/BurntSushi/ripgrep/issues/855):
Add `--binary` flag for disabling binary file filtering.
* [FEATURE #1078](https://github.com/BurntSushi/ripgrep/pull/1078):
Add `--max-columns-preview` flag for showing a preview of long lines.
* [FEATURE #1099](https://github.com/BurntSushi/ripgrep/pull/1099):
Add support for Brotli and Zstd to the `-z/--search-zip` flag.
* [FEATURE #1138](https://github.com/BurntSushi/ripgrep/pull/1138):
Add `--no-ignore-dot` flag for ignoring `.ignore` files.
* [FEATURE #1155](https://github.com/BurntSushi/ripgrep/pull/1155):
Add `--auto-hybrid-regex` flag for automatically falling back to PCRE2.
* [FEATURE #1159](https://github.com/BurntSushi/ripgrep/pull/1159):
ripgrep's exit status logic should now match GNU grep. See updated man page.
* [FEATURE #1164](https://github.com/BurntSushi/ripgrep/pull/1164):
Add `--ignore-file-case-insensitive` for case insensitive ignore globs.
* [FEATURE #1185](https://github.com/BurntSushi/ripgrep/pull/1185):
Add `-I` flag as a short option for the `--no-filename` flag.
* [FEATURE #1207](https://github.com/BurntSushi/ripgrep/pull/1207):
Add `none` value to `-E/--encoding` to forcefully disable all transcoding.
* [FEATURE da9d7204](https://github.com/BurntSushi/ripgrep/commit/da9d7204):
Add `--pcre2-version` for querying showing PCRE2 version information.
Bug fixes:
* [BUG #306](https://github.com/BurntSushi/ripgrep/issues/306),
[BUG #855](https://github.com/BurntSushi/ripgrep/issues/855):
Improve the user experience for ripgrep's binary file filtering.
* [BUG #373](https://github.com/BurntSushi/ripgrep/issues/373),
[BUG #1098](https://github.com/BurntSushi/ripgrep/issues/1098):
`**` is now accepted as valid syntax anywhere in a glob.
* [BUG #916](https://github.com/BurntSushi/ripgrep/issues/916):
ripgrep no longer hangs when searching `/proc` with a zombie process present.
* [BUG #1052](https://github.com/BurntSushi/ripgrep/issues/1052):
Fix bug where ripgrep could panic when transcoding UTF-16 files.
* [BUG #1055](https://github.com/BurntSushi/ripgrep/issues/1055):
Suggest `-U/--multiline` when a pattern contains a `\n`.
* [BUG #1063](https://github.com/BurntSushi/ripgrep/issues/1063):
Always strip a BOM if it's present, even for UTF-8.
* [BUG #1064](https://github.com/BurntSushi/ripgrep/issues/1064):
Fix inner literal detection that could lead to incorrect matches.
* [BUG #1079](https://github.com/BurntSushi/ripgrep/issues/1079):
Fixes a bug where the order of globs could result in missing a match.
* [BUG #1089](https://github.com/BurntSushi/ripgrep/issues/1089):
Fix another bug where ripgrep could panic when transcoding UTF-16 files.
* [BUG #1091](https://github.com/BurntSushi/ripgrep/issues/1091):
Add note about inverted flags to the man page.
* [BUG #1093](https://github.com/BurntSushi/ripgrep/pull/1093):
Fix handling of literal slashes in gitignore patterns.
* [BUG #1095](https://github.com/BurntSushi/ripgrep/issues/1095):
Fix corner cases involving the `--crlf` flag.
* [BUG #1101](https://github.com/BurntSushi/ripgrep/issues/1101):
Fix AsciiDoc escaping for man page output.
* [BUG #1103](https://github.com/BurntSushi/ripgrep/issues/1103):
Clarify what `--encoding auto` does.
* [BUG #1106](https://github.com/BurntSushi/ripgrep/issues/1106):
`--files-with-matches` and `--files-without-match` work with one file.
* [BUG #1121](https://github.com/BurntSushi/ripgrep/issues/1121):
Fix bug that was triggering Windows antimalware when using the `--files`
flag.
* [BUG #1125](https://github.com/BurntSushi/ripgrep/issues/1125),
[BUG #1159](https://github.com/BurntSushi/ripgrep/issues/1159):
ripgrep shouldn't panic for `rg -h | rg` and should emit correct exit status.
* [BUG #1144](https://github.com/BurntSushi/ripgrep/issues/1144):
Fixes a bug where line numbers could be wrong on big-endian machines.
* [BUG #1154](https://github.com/BurntSushi/ripgrep/issues/1154):
Windows files with "hidden" attribute are now treated as hidden.
* [BUG #1173](https://github.com/BurntSushi/ripgrep/issues/1173):
Fix handling of `**` patterns in gitignore files.
* [BUG #1174](https://github.com/BurntSushi/ripgrep/issues/1174):
Fix handling of repeated `**` patterns in gitignore files.
* [BUG #1176](https://github.com/BurntSushi/ripgrep/issues/1176):
Fix bug where `-F`/`-x` weren't applied to patterns given via `-f`.
* [BUG #1189](https://github.com/BurntSushi/ripgrep/issues/1189):
Document cases where ripgrep may use a lot of memory.
* [BUG #1203](https://github.com/BurntSushi/ripgrep/issues/1203):
Fix a matching bug related to the suffix literal optimization.
* [BUG 8f14cb18](https://github.com/BurntSushi/ripgrep/commit/8f14cb18):
Increase the default stack size for PCRE2's JIT.
0.10.0 (2018-09-07)
===================
0.10.0 (TBD)
============
This is a new minor version release of ripgrep that contains some major new
features, a huge number of bug fixes, and is the first release based on
libripgrep. The entirety of ripgrep's core search and printing code has been

894
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,11 +1,11 @@
[package]
name = "ripgrep"
version = "12.1.0" #:version
version = "0.10.0" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
ripgrep is a line-oriented search tool that recursively searches your current
directory for a regex pattern while respecting your gitignore rules. ripgrep
has first class support on Windows, macOS and Linux.
has first class support on Windows, macOS and Linux
"""
documentation = "https://github.com/BurntSushi/ripgrep"
homepage = "https://github.com/BurntSushi/ripgrep"
@@ -17,11 +17,14 @@ license = "Unlicense OR MIT"
exclude = ["HomebrewFormula"]
build = "build.rs"
autotests = false
edition = "2018"
[badges]
travis-ci = { repository = "BurntSushi/ripgrep" }
appveyor = { repository = "BurntSushi/ripgrep" }
[[bin]]
bench = false
path = "crates/core/main.rs"
path = "src/main.rs"
name = "rg"
[[test]]
@@ -30,50 +33,46 @@ path = "tests/tests.rs"
[workspace]
members = [
"crates/globset",
"crates/grep",
"crates/cli",
"crates/matcher",
"crates/pcre2",
"crates/printer",
"crates/regex",
"crates/searcher",
"crates/ignore",
"globset",
"grep",
"grep-cli",
"grep-matcher",
"grep-pcre2",
"grep-printer",
"grep-regex",
"grep-searcher",
"ignore",
]
[dependencies]
bstr = "0.2.12"
grep = { version = "0.2.6", path = "crates/grep" }
ignore = { version = "0.4.15", path = "crates/ignore" }
grep = { version = "0.2.2", path = "grep" }
ignore = { version = "0.4.3", path = "ignore" }
lazy_static = "1.1.0"
log = "0.4.5"
num_cpus = "1.8.0"
regex = "1.3.5"
regex = "1.0.5"
serde_json = "1.0.23"
termcolor = "1.1.0"
termcolor = "1.0.3"
[dependencies.clap]
version = "2.33.0"
version = "2.32.0"
default-features = false
features = ["suggestions"]
[target.'cfg(all(target_env = "musl", target_pointer_width = "64"))'.dependencies.jemallocator]
version = "0.3.0"
[build-dependencies]
lazy_static = "1.1.0"
[build-dependencies.clap]
version = "2.33.0"
version = "2.32.0"
default-features = false
features = ["suggestions"]
[dev-dependencies]
serde = "1.0.77"
serde_derive = "1.0.77"
walkdir = "2"
[features]
avx-accel = ["grep/avx-accel"]
simd-accel = ["grep/simd-accel"]
pcre2 = ["grep/pcre2"]
@@ -82,7 +81,6 @@ debug = 1
[package.metadata.deb]
features = ["pcre2"]
section = "utils"
assets = [
["target/release/rg", "usr/bin/", "755"],
["COPYING", "usr/share/doc/ripgrep/", "644"],
@@ -94,11 +92,11 @@ assets = [
# The man page is automatically generated by ripgrep's build process, so
# this file isn't actually commited. Instead, to create a dpkg, either
# create a deployment/deb directory and copy the man page to it, or use the
# 'ci/build-deb' script.
# 'ci/build_deb.sh' script.
["deployment/deb/rg.1", "usr/share/man/man1/rg.1", "644"],
# Similarly for shell completions.
["deployment/deb/rg.bash", "usr/share/bash-completion/completions/rg", "644"],
["deployment/deb/rg.fish", "usr/share/fish/vendor_completions.d/rg.fish", "644"],
["deployment/deb/rg.fish", "usr/share/fish/completions/rg.fish", "644"],
["deployment/deb/_rg", "usr/share/zsh/vendor-completions/", "644"],
]
extended-description = """\

View File

@@ -1,11 +0,0 @@
[target.x86_64-unknown-linux-musl]
image = "burntsushi/cross:x86_64-unknown-linux-musl"
[target.i686-unknown-linux-gnu]
image = "burntsushi/cross:i686-unknown-linux-gnu"
[target.mips64-unknown-linux-gnuabi64]
image = "burntsushi/cross:mips64-unknown-linux-gnuabi64"
[target.arm-unknown-linux-gnueabihf]
image = "burntsushi/cross:arm-unknown-linux-gnueabihf"

51
FAQ.md
View File

@@ -25,7 +25,6 @@
* [How is ripgrep licensed?](#license)
* [Can ripgrep replace grep?](#posix4ever)
* [What does the "rip" in ripgrep mean?](#intentcountsforsomething)
* [How can I donate to ripgrep or its maintainers?](#donations)
<h3 name="config">
@@ -51,19 +50,18 @@ ripgrep is a project whose contributors are volunteers. A release schedule
adds undue stress to said volunteers. Therefore, releases are made on a best
effort basis and no dates **will ever be given**.
An exception to this _can be_ high impact bugs. If a ripgrep release contains
a significant regression, then there will generally be a strong push to get a
patch release out with a fix. However, no promises are made.
One exception to this is high impact bugs. If a ripgrep release contains a
significant regression, then there will generally be a strong push to get a
patch release out with a fix.
<h3 name="manpage">
Does ripgrep have a man page?
</h3>
Yes! Whenever ripgrep is compiled on a system with `asciidoctor` or `asciidoc`
present, then a man page is generated from ripgrep's argv parser. After
compiling ripgrep, you can find the man page like so from the root of the
repository:
Yes! Whenever ripgrep is compiled on a system with `asciidoc` present, then a
man page is generated from ripgrep's argv parser. After compiling ripgrep, you
can find the man page like so from the root of the repository:
```
$ find ./target -name rg.1 -print0 | xargs -0 ls -t | head -n1
@@ -120,7 +118,7 @@ from run to run of ripgrep.
The only way to make the order of results consistent is to ask ripgrep to
sort the output. Currently, this will disable all parallelism. (On smaller
repositories, you might not notice much of a performance difference!) You
can achieve this with the `--sort path` flag.
can achieve this with the `--sort-files` flag.
There is more discussion on this topic here:
https://github.com/BurntSushi/ripgrep/issues/152
@@ -138,10 +136,10 @@ How do I search compressed files?
</h3>
ripgrep's `-z/--search-zip` flag will cause it to search compressed files
automatically. Currently, this supports gzip, bzip2, xz, lzma, lz4, Brotli and
Zstd. Each of these requires requires the corresponding `gzip`, `bzip2`, `xz`,
`lz4`, `brotli` and `zstd` binaries to be installed on your system. (That is,
ripgrep does decompression by shelling out to another process.)
automatically. Currently, this supports gzip, bzip2, lzma, lz4 and xz only and
requires the corresponding `gzip`, `bzip2` and `xz` binaries to be installed on
your system. (That is, ripgrep does decompression by shelling out to another
process.)
ripgrep currently does not search archive formats, so `*.tar.gz` files, for
example, are skipped.
@@ -151,8 +149,9 @@ example, are skipped.
How do I search over multiple lines?
</h3>
The `-U/--multiline` flag enables ripgrep to report results that span over
multiple lines.
This isn't currently possible. ripgrep is fundamentally a line-oriented search
tool. With that said,
[multiline search is a planned opt-in feature](https://github.com/BurntSushi/ripgrep/issues/176).
<h3 name="fancy">
@@ -936,8 +935,8 @@ Here are some cases where you might *not* want to use ripgrep. The same caveats
for the previous section apply.
* Are you writing portable shell scripts intended to work in a variety of
environments? Great, probably not a good idea to use ripgrep! ripgrep has
nowhere near the ubiquity of grep, so if you do use ripgrep, you might need
environments? Great, probably not a good idea to use ripgrep! ripgrep is has
nowhere near the ubquity of grep, so if you do use ripgrep, you might need
to futz with the installation process more than you would with grep.
* Do you care about POSIX compatibility? If so, then you can't use ripgrep
because it never was, isn't and never will be POSIX compatible.
@@ -983,21 +982,3 @@ grep](#posix4ever),
ripgrep is neither actually a "grep killer" nor was it ever intended to be. It
certainly does eat into some of its use cases, but that's nothing that other
tools like ack or The Silver Searcher weren't already doing.
<h3 name="donations">
How can I donate to ripgrep or its maintainers?
</h3>
As of now, you can't. While I believe the various efforts that are being
undertaken to help fund FOSS are extremely important, they aren't a good fit
for me. ripgrep is and I hope will remain a project of love that I develop in
my free time. As such, involving money---even in the form of donations given
without expectations---would severely change that dynamic for me personally.
Instead, I'd recommend donating to something else that is doing work that you
find meaningful. If you would like suggestions, then my favorites are:
* [The Internet Archive](https://archive.org/donate/)
* [Rails Girls](https://railsgirlssummerofcode.org/campaign/)
* [Wikipedia](https://wikimediafoundation.org/support/)

351
GUIDE.md
View File

@@ -18,8 +18,6 @@ translatable to any command line shell environment.
* [Replacements](#replacements)
* [Configuration file](#configuration-file)
* [File encoding](#file-encoding)
* [Binary data](#binary-data)
* [Preprocessor](#preprocessor)
* [Common options](#common-options)
@@ -111,7 +109,7 @@ colors, you'll notice that `faster` will be highlighted instead of just the
It is beyond the scope of this guide to provide a full tutorial on regular
expressions, but ripgrep's specific syntax is documented here:
https://docs.rs/regex/*/regex/#syntax
https://docs.rs/regex/0.2.5/regex/#syntax
### Recursive search
@@ -237,11 +235,6 @@ Like `.gitignore`, a `.ignore` file can be placed in any directory. Its rules
will be processed with respect to the directory it resides in, just like
`.gitignore`.
To process `.gitignore` and `.ignore` files case insensitively, use the flag
`--ignore-file-case-insensitive`. This is especially useful on case insensitive
file systems like those on Windows and macOS. Note though that this can come
with a significant performance penalty, and is therefore disabled by default.
For a more in depth description of how glob patterns in a `.gitignore` file
are interpreted, please see `man gitignore`.
@@ -412,21 +405,6 @@ alias rg="rg --type-add 'web:*.{html,css,js}'"
or add `--type-add=web:*.{html,css,js}` to your ripgrep configuration file.
([Configuration files](#configuration-file) are covered in more detail later.)
#### The special `all` file type
A special option supported by the `--type` flag is `all`. `--type all` looks
for a match in any of the supported file types listed by `--type-list`,
including those added on the command line using `--type-add`. It's equivalent
to the command `rg --type agda --type asciidoc --type asm ...`, where `...`
stands for a list of `--type` flags for the rest of the types in `--type-list`.
As an example, let's suppose you have a shell script in your current directory,
`my-shell-script`, which includes a shell library, `my-shell-library.bash`.
Both `rg --type sh` and `rg --type all` would only search for matches in
`my-shell-library.bash`, not `my-shell-script`, because the globs matched
by the `sh` file type don't include files without an extension. On the
other hand, `rg --type-not all` would search `my-shell-script` but not
`my-shell-library.bash`.
### Replacements
@@ -542,9 +520,9 @@ config file. Once the environment variable is set, open the file and just type
in the flags you want set automatically. There are only two rules for
describing the format of the config file:
1. Every line is a shell argument, after trimming whitespace.
2. Lines starting with `#` (optionally preceded by any amount of whitespace)
are ignored.
1. Every line is a shell argument, after trimming ASCII whitespace.
2. Lines starting with `#` (optionally preceded by any amount of
ASCII whitespace) are ignored.
In particular, there is no escaping. Each line is given to ripgrep as a single
command line argument verbatim.
@@ -554,9 +532,8 @@ formatting peculiarities:
```
$ cat $HOME/.ripgreprc
# Don't let ripgrep vomit really long lines to my terminal, and show a preview.
# Don't let ripgrep vomit really long lines to my terminal.
--max-columns=150
--max-columns-preview
# Add my 'web' type.
--type-add
@@ -621,14 +598,13 @@ topic, but we can try to summarize its relevancy to ripgrep:
* Files are generally just a bundle of bytes. There is no reliable way to know
their encoding.
* Either the encoding of the pattern must match the encoding of the files being
searched, or a form of transcoding must be performed that converts either the
searched, or a form of transcoding must be performed converts either the
pattern or the file to the same encoding as the other.
* ripgrep tends to work best on plain text files, and among plain text files,
the most popular encodings likely consist of ASCII, latin1 or UTF-8. As
a special exception, UTF-16 is prevalent in Windows environments
In light of the above, here is how ripgrep behaves when `--encoding auto` is
given, which is the default:
In light of the above, here is how ripgrep behaves:
* All input is assumed to be ASCII compatible (which means every byte that
corresponds to an ASCII codepoint actually is an ASCII codepoint). This
@@ -644,15 +620,12 @@ given, which is the default:
they correspond to a UTF-16 BOM, then ripgrep will transcode the contents of
the file from UTF-16 to UTF-8, and then execute the search on the transcoded
version of the file. (This incurs a performance penalty since transcoding
is slower than regex searching.) If the file contains invalid UTF-16, then
the Unicode replacement codepoint is substituted in place of invalid code
units.
is slower than regex searching.)
* To handle other cases, ripgrep provides a `-E/--encoding` flag, which permits
you to specify an encoding from the
[Encoding Standard](https://encoding.spec.whatwg.org/#concept-encoding-get).
ripgrep will assume *all* files searched are the encoding specified (unless
the file has a BOM) and will perform a transcoding step just like in the
UTF-16 case described above.
ripgrep will assume *all* files searched are the encoding specified and
will perform a transcoding step just like in the UTF-16 case described above.
By default, ripgrep will not require its input be valid UTF-8. That is, ripgrep
can and will search arbitrary bytes. The key here is that if you're searching
@@ -662,26 +635,9 @@ pattern won't find anything. With all that said, this mode of operation is
important, because it lets you find ASCII or UTF-8 *within* files that are
otherwise arbitrary bytes.
As a special case, the `-E/--encoding` flag supports the value `none`, which
will completely disable all encoding related logic, including BOM sniffing.
When `-E/--encoding` is set to `none`, ripgrep will search the raw bytes of
the underlying file with no transcoding step. For example, here's how you might
search the raw UTF-16 encoding of the string `Шерлок`:
```
$ rg '(?-u)\(\x045\x04@\x04;\x04>\x04:\x04' -E none -a some-utf16-file
```
Of course, that's just an example meant to show how one can drop down into
raw bytes. Namely, the simpler command works as you might expect automatically:
```
$ rg 'Шерлок' some-utf16-file
```
Finally, it is possible to disable ripgrep's Unicode support from within the
regular expression. For example, let's say you wanted `.` to match any byte
rather than any Unicode codepoint. (You might want this while searching a
pattern regular expression. For example, let's say you wanted `.` to match any
byte rather than any Unicode codepoint. (You might want this while searching a
binary file, since `.` by default will not match invalid UTF-8.) You could do
this by disabling Unicode via a regular expression flag:
@@ -698,282 +654,6 @@ $ rg '\w(?-u:\w)\w'
```
### Binary data
In addition to skipping hidden files and files in your `.gitignore` by default,
ripgrep also attempts to skip binary files. ripgrep does this by default
because binary files (like PDFs or images) are typically not things you want to
search when searching for regex matches. Moreover, if content in a binary file
did match, then it's possible for undesirable binary data to be printed to your
terminal and wreak havoc.
Unfortunately, unlike skipping hidden files and respecting your `.gitignore`
rules, a file cannot as easily be classified as binary. In order to figure out
whether a file is binary, the most effective heuristic that balances
correctness with performance is to simply look for `NUL` bytes. At that point,
the determination is simple: a file is considered "binary" if and only if it
contains a `NUL` byte somewhere in its contents.
The issue is that while most binary files will have a `NUL` byte toward the
beginning of its contents, this is not necessarily true. The `NUL` byte might
be the very last byte in a large file, but that file is still considered
binary. While this leads to a fair amount of complexity inside ripgrep's
implementation, it also results in some unintuitive user experiences.
At a high level, ripgrep operates in three different modes with respect to
binary files:
1. The default mode is to attempt to remove binary files from a search
completely. This is meant to mirror how ripgrep removes hidden files and
files in your `.gitignore` automatically. That is, as soon as a file is
detected as binary, searching stops. If a match was already printed (because
it was detected long before a `NUL` byte), then ripgrep will print a warning
message indicating that the search stopped prematurely. This default mode
**only applies to files searched by ripgrep as a result of recursive
directory traversal**, which is consistent with ripgrep's other automatic
filtering. For example, `rg foo .file` will search `.file` even though it
is hidden. Similarly, `rg foo binary-file` will search `binary-file` in
"binary" mode automatically.
2. Binary mode is similar to the default mode, except it will not always
stop searching after it sees a `NUL` byte. Namely, in this mode, ripgrep
will continue searching a file that is known to be binary until the first
of two conditions is met: 1) the end of the file has been reached or 2) a
match is or has been seen. This means that in binary mode, if ripgrep
reports no matches, then there are no matches in the file. When a match does
occur, ripgrep prints a message similar to one it prints when in its default
mode indicating that the search has stopped prematurely. This mode can be
forcefully enabled for all files with the `--binary` flag. The purpose of
binary mode is to provide a way to discover matches in all files, but to
avoid having binary data dumped into your terminal.
3. Text mode completely disables all binary detection and searches all files
as if they were text. This is useful when searching a file that is
predominantly text but contains a `NUL` byte, or if you are specifically
trying to search binary data. This mode can be enabled with the `-a/--text`
flag. Note that when using this mode on very large binary files, it is
possible for ripgrep to use a lot of memory.
Unfortunately, there is one additional complexity in ripgrep that can make it
difficult to reason about binary files. That is, the way binary detection works
depends on the way that ripgrep searches your files. Specifically:
* When ripgrep uses memory maps, then binary detection is only performed on the
first few kilobytes of the file in addition to every matching line.
* When ripgrep doesn't use memory maps, then binary detection is performed on
all bytes searched.
This means that whether a file is detected as binary or not can change based
on the internal search strategy used by ripgrep. If you prefer to keep
ripgrep's binary file detection consistent, then you can disable memory maps
via the `--no-mmap` flag. (The cost will be a small performance regression when
searching very large files on some platforms.)
### Preprocessor
In ripgrep, a preprocessor is any type of command that can be run to transform
the input of every file before ripgrep searches it. This makes it possible to
search virtually any kind of content that can be automatically converted to
text without having to teach ripgrep how to read said content.
One common example is searching PDFs. PDFs are first and foremost meant to be
displayed to users. But PDFs often have text streams in them that can be useful
to search. In our case, we want to search Bruce Watson's excellent
dissertation,
[Taxonomies and Toolkits of Regular Language Algorithms](https://burntsushi.net/stuff/1995-watson.pdf).
After downloading it, let's try searching it:
```
$ rg 'The Commentz-Walter algorithm' 1995-watson.pdf
$
```
Surely, a dissertation on regular language algorithms would mention
Commentz-Walter. Indeed it does, but our search isn't picking it up because
PDFs are a binary format, and the text shown in the PDF may not be encoded as
simple contiguous UTF-8. Namely, even passing the `-a/--text` flag to ripgrep
will not make our search work.
One way to fix this is to convert the PDF to plain text first. This won't work
well for all PDFs, but does great in a lot of cases. (Note that the tool we
use, `pdftotext`, is part of the [poppler](https://poppler.freedesktop.org)
PDF rendering library.)
```
$ pdftotext 1995-watson.pdf > 1995-watson.txt
$ rg 'The Commentz-Walter algorithm' 1995-watson.txt
316:The Commentz-Walter algorithms : : : : : : : : : : : : : : :
7165:4.4 The Commentz-Walter algorithms
10062:in input string S , we obtain the Boyer-Moore algorithm. The Commentz-Walter algorithm
17218:The Commentz-Walter algorithm (and its variants) displayed more interesting behaviour,
17249:Aho-Corasick algorithms are used extensively. The Commentz-Walter algorithms are used
17297: The Commentz-Walter algorithms (CW). In all versions of the CW algorithms, a common program skeleton is used with di erent shift functions. The CW algorithms are
```
But having to explicitly convert every file can be a pain, especially when you
have a directory full of PDF files. Instead, we can use ripgrep's preprocessor
feature to search the PDF. ripgrep's `--pre` flag works by taking a single
command name and then executing that command for every file that it searches.
ripgrep passes the file path as the first and only argument to the command and
also sends the contents of the file to stdin. So let's write a simple shell
script that wraps `pdftotext` in a way that conforms to this interface:
```
$ cat preprocess
#!/bin/sh
exec pdftotext - -
```
With `preprocess` in the same directory as `1995-watson.pdf`, we can now use it
to search the PDF:
```
$ rg --pre ./preprocess 'The Commentz-Walter algorithm' 1995-watson.pdf
316:The Commentz-Walter algorithms : : : : : : : : : : : : : : :
7165:4.4 The Commentz-Walter algorithms
10062:in input string S , we obtain the Boyer-Moore algorithm. The Commentz-Walter algorithm
17218:The Commentz-Walter algorithm (and its variants) displayed more interesting behaviour,
17249:Aho-Corasick algorithms are used extensively. The Commentz-Walter algorithms are used
17297: The Commentz-Walter algorithms (CW). In all versions of the CW algorithms, a common program skeleton is used with di erent shift functions. The CW algorithms are
```
Note that `preprocess` must be resolvable to a command that ripgrep can read.
The simplest way to do this is to put your preprocessor command in a directory
that is in your `PATH` (or equivalent), or otherwise use an absolute path.
As a bonus, this turns out to be quite a bit faster than other specialized PDF
grepping tools:
```
$ time rg --pre ./preprocess 'The Commentz-Walter algorithm' 1995-watson.pdf -c
6
real 0.697
user 0.684
sys 0.007
maxmem 16 MB
faults 0
$ time pdfgrep 'The Commentz-Walter algorithm' 1995-watson.pdf -c
6
real 1.336
user 1.310
sys 0.023
maxmem 16 MB
faults 0
```
If you wind up needing to search a lot of PDFs, then ripgrep's parallelism can
make the speed difference even greater.
#### A more robust preprocessor
One of the problems with the aforementioned preprocessor is that it will fail
if you try to search a file that isn't a PDF:
```
$ echo foo > not-a-pdf
$ rg --pre ./preprocess 'The Commentz-Walter algorithm' not-a-pdf
not-a-pdf: preprocessor command failed: '"./preprocess" "not-a-pdf"':
-------------------------------------------------------------------------------
Syntax Warning: May not be a PDF file (continuing anyway)
Syntax Error: Couldn't find trailer dictionary
Syntax Error: Couldn't find trailer dictionary
Syntax Error: Couldn't read xref table
```
To fix this, we can make our preprocessor script a bit more robust by only
running `pdftotext` when we think the input is a non-empty PDF:
```
$ cat preprocessor
#!/bin/sh
case "$1" in
*.pdf)
# The -s flag ensures that the file is non-empty.
if [ -s "$1" ]; then
exec pdftotext - -
else
exec cat
fi
;;
*)
exec cat
;;
esac
```
We can even extend our preprocessor to search other kinds of files. Sometimes
we don't always know the file type from the file name, so we can use the `file`
utility to "sniff" the type of the file based on its contents:
```
$ cat processor
#!/bin/sh
case "$1" in
*.pdf)
# The -s flag ensures that the file is non-empty.
if [ -s "$1" ]; then
exec pdftotext - -
else
exec cat
fi
;;
*)
case $(file "$1") in
*Zstandard*)
exec pzstd -cdq
;;
*)
exec cat
;;
esac
;;
esac
```
#### Reducing preprocessor overhead
There is one more problem with the above approach: it requires running a
preprocessor for every single file that ripgrep searches. If every file needs
a preprocessor, then this is OK. But if most don't, then this can substantially
slow down searches because of the overhead of launching new processors. You
can avoid this by telling ripgrep to only invoke the preprocessor when the file
path matches a glob. For example, consider the performance difference even when
searching a repository as small as ripgrep's:
```
$ time rg --pre pre-rg 'fn is_empty' -c
crates/globset/src/lib.rs:1
crates/matcher/src/lib.rs:2
crates/ignore/src/overrides.rs:1
crates/ignore/src/gitignore.rs:1
crates/ignore/src/types.rs:1
real 0.138
user 0.485
sys 0.209
maxmem 7 MB
faults 0
$ time rg --pre pre-rg --pre-glob '*.pdf' 'fn is_empty' -c
crates/globset/src/lib.rs:1
crates/ignore/src/types.rs:1
crates/ignore/src/gitignore.rs:1
crates/ignore/src/overrides.rs:1
crates/matcher/src/lib.rs:2
real 0.008
user 0.010
sys 0.002
maxmem 7 MB
faults 0
```
### Common options
ripgrep has a lot of flags. Too many to keep in your head at once. This section
@@ -995,11 +675,10 @@ used options that will likely impact how you use ripgrep on a regular basis.
* `--files`: Print the files that ripgrep *would* search, but don't actually
search them.
* `-a/--text`: Search binary files as if they were plain text.
* `-U/--multiline`: Permit matches to span multiple lines.
* `-z/--search-zip`: Search compressed files (gzip, bzip2, lzma, xz, lz4,
brotli, zstd). This is disabled by default.
* `-z/--search-zip`: Search compressed files (gzip, bzip2, lzma, xz). This is
disabled by default.
* `-C/--context`: Show the lines surrounding a match.
* `--sort path`: Force ripgrep to sort its output by file name. (This disables
* `--sort-files`: Force ripgrep to sort its output by file name. (This disables
parallelism, so it might be slower.)
* `-L/--follow`: Follow symbolic links while recursively searching.
* `-M/--max-columns`: Limit the length of lines printed by ripgrep.

View File

@@ -1,12 +1,3 @@
---
name: Bug report
about: An issue with ripgrep or any of its crates (ignore, globset, etc.)
title: ''
labels: ''
assignees: ''
---
#### What version of ripgrep are you using?
Replace this text with the output of `rg --version`.
@@ -21,11 +12,18 @@ Github binary releases.
Replace this text with your operating system and version.
#### Describe your bug.
#### Describe your question, feature request, or bug.
Give a high level description of the bug.
If a question, please describe the problem you're trying to solve and give
as much context as possible.
#### What are the steps to reproduce the behavior?
If a feature request, please describe the behavior you want and the motivation.
Please also provide an example of how ripgrep would be used if your feature
request were added.
If a bug, please see below.
#### If this is a bug, what are the steps to reproduce the behavior?
If possible, please include both your search patterns and the corpus on which
you are searching. Unless the bug is very obvious, then it is unlikely that it
@@ -34,7 +32,7 @@ will be fixed if the ripgrep maintainers cannot reproduce it.
If the corpus is too big and you cannot decrease its size, file the bug anyway
and the ripgrep maintainers will help figure out next steps.
#### What is the actual behavior?
#### If this is a bug, what is the actual behavior?
Show the command you ran and the actual output. Include the `--debug` flag in
your invocation of ripgrep.
@@ -50,6 +48,6 @@ goes
here
```
#### What is the expected behavior?
#### If this is a bug, what is the expected behavior?
What do you think ripgrep should have done?

204
README.md
View File

@@ -1,18 +1,17 @@
ripgrep (rg)
------------
ripgrep is a line-oriented search tool that recursively searches your current
directory for a regex pattern. By default, ripgrep will respect your .gitignore
and automatically skip hidden files/directories and binary files. ripgrep
directory for a regex pattern while respecting your gitignore rules. ripgrep
has first class support on Windows, macOS and Linux, with binary downloads
available for [every release](https://github.com/BurntSushi/ripgrep/releases).
ripgrep is similar to other popular search tools like The Silver Searcher, ack
and grep.
ripgrep is similar to other popular search tools like The Silver Searcher,
ack and grep.
[![Build status](https://github.com/BurntSushi/ripgrep/workflows/ci/badge.svg)](https://github.com/BurntSushi/ripgrep/actions)
[![Linux build status](https://travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep)
[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep)
[![Crates.io](https://img.shields.io/crates/v/ripgrep.svg)](https://crates.io/crates/ripgrep)
[![Packaging status](https://repology.org/badge/tiny-repos/ripgrep.svg)](https://repology.org/project/ripgrep/badges)
Dual-licensed under MIT or the [UNLICENSE](https://unlicense.org).
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
### CHANGELOG
@@ -28,60 +27,60 @@ Please see the [CHANGELOG](CHANGELOG.md) for a release history.
* [Configuration files](GUIDE.md#configuration-file)
* [Shell completions](FAQ.md#complete)
* [Building](#building)
* [Translations](#translations)
### Screenshot of search results
[![A screenshot of a sample search with ripgrep](https://burntsushi.net/stuff/ripgrep1.png)](https://burntsushi.net/stuff/ripgrep1.png)
[![A screenshot of a sample search with ripgrep](http://burntsushi.net/stuff/ripgrep1.png)](http://burntsushi.net/stuff/ripgrep1.png)
### Quick examples comparing tools
This example searches the entire
[Linux kernel source tree](https://github.com/BurntSushi/linux)
(after running `make defconfig && make -j8`) for `[A-Z]+_SUSPEND`, where
all matches must be words. Timings were collected on a system with an Intel
i7-6900K 3.2 GHz.
This example searches the entire Linux kernel source tree (after running
`make defconfig && make -j8`) for `[A-Z]+_SUSPEND`, where all matches must be
words. Timings were collected on a system with an Intel i7-6900K 3.2 GHz, and
ripgrep was compiled with SIMD enabled.
Please remember that a single benchmark is never enough! See my
[blog post on ripgrep](https://blog.burntsushi.net/ripgrep/)
[blog post on ripgrep](http://blog.burntsushi.net/ripgrep/)
for a very detailed comparison with more benchmarks and analysis.
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep (Unicode) | `rg -n -w '[A-Z]+_SUSPEND'` | 452 | **0.136s** |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `git grep -P -n -w '[A-Z]+_SUSPEND'` | 452 | 0.348s |
| [ugrep (Unicode)](https://github.com/Genivia/ugrep) | `ugrep -r --ignore-files --no-hidden -I -w '[A-Z]+_SUSPEND'` | 452 | 0.506s |
| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 452 | 0.654s |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 452 | 1.150s |
| [ack](https://github.com/beyondgrep/ack3) | `ack -w '[A-Z]+_SUSPEND'` | 452 | 4.054s |
| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 452 | 4.205s |
| ripgrep (Unicode) | `rg -n -w '[A-Z]+_SUSPEND'` | 450 | **0.106s** |
| [git grep](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=C git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 0.553s |
| [The Silver Searcher](https://github.com/ggreer/the_silver_searcher) | `ag -w '[A-Z]+_SUSPEND'` | 450 | 0.589s |
| [git grep (Unicode)](https://www.kernel.org/pub/software/scm/git/docs/git-grep.html) | `LC_ALL=en_US.UTF-8 git grep -E -n -w '[A-Z]+_SUSPEND'` | 450 | 2.266s |
| [sift](https://github.com/svent/sift) | `sift --git -n -w '[A-Z]+_SUSPEND'` | 450 | 3.505s |
| [ack](https://github.com/petdance/ack2) | `ack -w '[A-Z]+_SUSPEND'` | 1878 | 6.823s |
| [The Platinum Searcher](https://github.com/monochromegane/the_platinum_searcher) | `pt -w -e '[A-Z]+_SUSPEND'` | 450 | 14.208s |
Here's another benchmark on the same corpus as above that disregards gitignore
files and searches with a whitelist instead. The corpus is the same as in the
previous benchmark, and the flags passed to each command ensure that they are
doing equivalent work:
(Yes, `ack` [has](https://github.com/petdance/ack2/issues/445) a
[bug](https://github.com/petdance/ack2/issues/14).)
Here's another benchmark that disregards gitignore files and searches with a
whitelist instead. The corpus is the same as in the previous benchmark, and the
flags passed to each command ensure that they are doing equivalent work:
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep | `rg -uuu -tc -n -w '[A-Z]+_SUSPEND'` | 388 | **0.096s** |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 388 | 0.493s |
| [GNU grep](https://www.gnu.org/software/grep/) | `egrep -r -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 388 | 0.806s |
| ripgrep | `rg -L -u -tc -n -w '[A-Z]+_SUSPEND'` | 404 | **0.079s** |
| [ucg](https://github.com/gvansickle/ucg) | `ucg --type=cc -w '[A-Z]+_SUSPEND'` | 390 | 0.163s |
| [GNU grep](https://www.gnu.org/software/grep/) | `egrep -R -n --include='*.c' --include='*.h' -w '[A-Z]+_SUSPEND'` | 404 | 0.611s |
And finally, a straight-up comparison between ripgrep, ugrep and GNU grep on a
single large file cached in memory
(~13GB, [`OpenSubtitles.raw.en.gz`](http://opus.nlpl.eu/download.php?f=OpenSubtitles/v2018/mono/OpenSubtitles.raw.en.gz)):
(`ucg` [has slightly different behavior in the presence of symbolic links](https://github.com/gvansickle/ucg/issues/106).)
And finally, a straight-up comparison between ripgrep and GNU grep on a single
large file (~9.3GB,
[`OpenSubtitles2016.raw.en.gz`](http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.en.gz)):
| Tool | Command | Line count | Time |
| ---- | ------- | ---------- | ---- |
| ripgrep | `rg -w 'Sherlock [A-Z]\w+'` | 7882 | **2.769s** |
| [ugrep](https://github.com/Genivia/ugrep) | `ugrep -w 'Sherlock [A-Z]\w+'` | 7882 | 6.802s |
| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=en_US.UTF-8 egrep -w 'Sherlock [A-Z]\w+'` | 7882 | 9.027s |
| ripgrep | `rg -w 'Sherlock [A-Z]\w+'` | 5268 | **2.108s** |
| [GNU grep](https://www.gnu.org/software/grep/) | `LC_ALL=C egrep -w 'Sherlock [A-Z]\w+'` | 5268 | 7.014s |
In the above benchmark, passing the `-n` flag (for showing line numbers)
increases the times to `3.423s` for ripgrep and `13.031s` for GNU grep. ugrep
times are unaffected by the presence or absence of `-n`.
increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep.
### Why should I use ripgrep?
@@ -91,11 +90,11 @@ times are unaffected by the presence or absence of `-n`.
[the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly
replace grep.)
* Like other tools specialized to code search, ripgrep defaults to recursive
directory search and won't search files ignored by your
`.gitignore`/`.ignore`/`.rgignore` files. It also ignores hidden and binary
files by default. ripgrep also implements full support for `.gitignore`,
whereas there are many bugs related to that functionality in other code
search tools claiming to provide the same functionality.
directory search and won't search files ignored by your `.gitignore` files.
It also ignores hidden and binary files by default. ripgrep also implements
full support for `.gitignore`, whereas there are many bugs related to that
functionality in other code search tools claiming to provide the same
functionality.
* ripgrep can search specific types of files. For example, `rg -tpy foo`
limits your search to Python files and `rg -Tjs foo` excludes Javascript
files from your search. ripgrep can be taught about new file types with
@@ -106,20 +105,17 @@ times are unaffected by the presence or absence of `-n`.
supporting Unicode (which is always on).
* ripgrep has optional support for switching its regex engine to use PCRE2.
Among other things, this makes it possible to use look-around and
backreferences in your patterns, which are not supported in ripgrep's default
regex engine. PCRE2 support can be enabled with `-P/--pcre2` (use PCRE2
always) or `--auto-hybrid-regex` (use PCRE2 only if needed). An alternative
syntax is provided via the `--engine (default|pcre2|auto-hybrid)` option.
backreferences in your patterns, which are supported in ripgrep's default
regex engine. PCRE2 support is enabled with `-P`.
* ripgrep supports searching files in text encodings other than UTF-8, such
as UTF-16, latin-1, GBK, EUC-JP, Shift_JIS and more. (Some support for
automatically detecting UTF-16 is provided. Other text encodings must be
specifically specified with the `-E/--encoding` flag.)
* ripgrep supports searching files compressed in a common format (brotli,
bzip2, gzip, lz4, lzma, xz, or zstandard) with the `-z/--search-zip` flag.
* ripgrep supports
[arbitrary input preprocessing filters](GUIDE.md#preprocessor)
which could be PDF text extraction, less supported decompression, decrypting,
automatic encoding detection and so on.
* ripgrep supports searching files compressed in a common format (gzip, xz,
lzma, bzip2 or lz4) with the `-z/--search-zip` flag.
* ripgrep supports arbitrary input preprocessing filters which could be PDF
text extraction, less supported decompression, decrypting, automatic encoding
detection and so on.
In other words, use ripgrep if you like speed, filtering by default, fewer
bugs and Unicode support.
@@ -150,12 +146,12 @@ or more of the following:
### Is it really faster than everything else?
Generally, yes. A large number of benchmarks with detailed analysis for each is
[available on my blog](https://blog.burntsushi.net/ripgrep/).
[available on my blog](http://blog.burntsushi.net/ripgrep/).
Summarizing, ripgrep is fast because:
* It is built on top of
[Rust's regex engine](https://github.com/rust-lang/regex).
[Rust's regex engine](https://github.com/rust-lang-nursery/regex).
Rust's regex engine uses finite automata, SIMD and aggressive literal
optimizations to make searching very fast. (PCRE2 support can be opted into
with the `-P/--pcre2` flag.)
@@ -202,13 +198,22 @@ prefer MSVC over GNU, but you'll need to have the [Microsoft VC++ 2015
redistributable](https://www.microsoft.com/en-us/download/details.aspx?id=48145)
installed.
If you're a **macOS Homebrew** or a **Linuxbrew** user, then you can install
ripgrep from homebrew-core:
If you're a **macOS Homebrew** or a **Linuxbrew** user,
then you can install ripgrep either
from homebrew-core, (compiled with rust stable, no SIMD):
```
$ brew install ripgrep
```
or you can install a binary compiled with rust nightly (including SIMD and all
optimizations) by utilizing a custom tap:
```
$ brew tap burntsushi/ripgrep https://github.com/BurntSushi/ripgrep.git
$ brew install ripgrep-bin
```
If you're a **MacPorts** user, then you can install ripgrep from the
[official ports](https://www.macports.org/ports.php?by=name&substr=ripgrep):
@@ -224,7 +229,7 @@ $ choco install ripgrep
```
If you're a **Windows Scoop** user, then you can install ripgrep from the
[official bucket](https://github.com/ScoopInstaller/Main/blob/master/bucket/ripgrep.json):
[official bucket](https://github.com/lukesampson/scoop/blob/master/bucket/ripgrep.json):
```
$ scoop install ripgrep
@@ -243,21 +248,29 @@ If you're a **Gentoo** user, you can install ripgrep from the
$ emerge sys-apps/ripgrep
```
If you're a **Fedora** user, you can install ripgrep from official
If you're a **Fedora 27+** user, you can install ripgrep from official
repositories.
```
$ sudo dnf install ripgrep
```
If you're an **openSUSE** user, ripgrep is included in **openSUSE Tumbleweed**
and **openSUSE Leap** since 15.1.
If you're a **Fedora 24+** user, you can install ripgrep from
[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
```
$ sudo dnf copr enable carlwgeorge/ripgrep
$ sudo dnf install ripgrep
```
If you're an **openSUSE Tumbleweed** user, you can install ripgrep from the
[official repo](http://software.opensuse.org/package/ripgrep):
```
$ sudo zypper install ripgrep
```
If you're a **RHEL/CentOS 7/8** user, you can install ripgrep from
If you're a **RHEL/CentOS 7** user, you can install ripgrep from
[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
```
@@ -275,27 +288,20 @@ $ # (Or using the attribute name, which is also ripgrep.)
If you're a **Debian** user (or a user of a Debian derivative like **Ubuntu**),
then ripgrep can be installed using a binary `.deb` file provided in each
[ripgrep release](https://github.com/BurntSushi/ripgrep/releases).
[ripgrep release](https://github.com/BurntSushi/ripgrep/releases). Note that
ripgrep is not in the official Debian or Ubuntu repositories.
```
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/11.0.2/ripgrep_11.0.2_amd64.deb
$ sudo dpkg -i ripgrep_11.0.2_amd64.deb
$ curl -LO https://github.com/BurntSushi/ripgrep/releases/download/0.9.0/ripgrep_0.9.0_amd64.deb
$ sudo dpkg -i ripgrep_0.9.0_amd64.deb
```
If you run Debian Buster (currently Debian stable) or Debian sid, ripgrep is
If you run Debian Buster (currently Debian testing) or Debian sid, ripgrep is
[officially maintained by Debian](https://tracker.debian.org/pkg/rust-ripgrep).
```
$ sudo apt-get install ripgrep
```
If you're an **Ubuntu Cosmic (18.10)** (or newer) user, ripgrep is
[available](https://launchpad.net/ubuntu/+source/rust-ripgrep) using the same
packaging as Debian:
```
$ sudo apt-get install ripgrep
```
(N.B. Various snaps for ripgrep on Ubuntu are also available, but none of them
seem to work right and generate a number of very strange bug reports that I
don't know how to fix and don't have the time to fix. Therefore, it is no
@@ -322,23 +328,9 @@ If you're a **NetBSD** user, then you can install ripgrep from
# pkgin install ripgrep
```
If you're a **Haiku x86_64** user, then you can install ripgrep from the
[official ports](https://github.com/haikuports/haikuports/tree/master/sys-apps/ripgrep):
```
$ pkgman install ripgrep
```
If you're a **Haiku x86_gcc2** user, then you can install ripgrep from the
same port as Haiku x86_64 using the x86 secondary architecture build:
```
$ pkgman install ripgrep_x86
```
If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
* Note that the minimum supported version of Rust for ripgrep is **1.34.0**,
* Note that the minimum supported version of Rust for ripgrep is **1.28.0**,
although ripgrep may work with older versions.
* Note that the binary may be bigger than expected because it contains debug
symbols. This is intentional. To remove debug symbols and therefore reduce
@@ -348,12 +340,18 @@ If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
$ cargo install ripgrep
```
When compiling with Rust 1.27 or newer, this will automatically enable SIMD
optimizations for search.
ripgrep isn't currently in any other package repositories.
[I'd like to change that](https://github.com/BurntSushi/ripgrep/issues/10).
### Building
ripgrep is written in Rust, so you'll need to grab a
[Rust installation](https://www.rust-lang.org/) in order to compile it.
ripgrep compiles with Rust 1.34.0 (stable) or newer. In general, ripgrep tracks
ripgrep compiles with Rust 1.28.0 (stable) or newer. In general, ripgrep tracks
the latest stable release of the Rust compiler.
To build ripgrep:
@@ -370,14 +368,18 @@ If you have a Rust nightly compiler and a recent Intel CPU, then you can enable
additional optional SIMD acceleration like so:
```
RUSTFLAGS="-C target-cpu=native" cargo build --release --features 'simd-accel'
RUSTFLAGS="-C target-cpu=native" cargo build --release --features 'simd-accel avx-accel'
```
The `simd-accel` feature enables SIMD support in certain ripgrep dependencies
(responsible for transcoding). They are not necessary to get SIMD optimizations
for search; those are enabled automatically. Hopefully, some day, the
`simd-accel` feature will similarly become unnecessary. **WARNING:** Currently,
enabling this option can increase compilation times dramatically.
If your machine doesn't support AVX instructions, then simply remove
`avx-accel` from the features list. Similarly for SIMD (which corresponds
roughly to SSE instructions).
The `simd-accel` and `avx-accel` features enable SIMD support in certain
ripgrep dependencies (responsible for counting lines and transcoding). They
are not necessary to get SIMD optimizations for search; those are enabled
automatically. Hopefully, some day, the `simd-accel` and `avx-accel` features
will similarly become unnecessary.
Finally, optional PCRE2 support can be built with ripgrep by enabling the
`pcre2` feature:
@@ -386,8 +388,8 @@ Finally, optional PCRE2 support can be built with ripgrep by enabling the
$ cargo build --release --features 'pcre2'
```
(Tip: use `--features 'pcre2 simd-accel'` to also include compile time SIMD
optimizations, which will only work with a nightly compiler.)
(Tip: use `--features 'pcre2 simd-accel avx-accel'` to also include compile
time SIMD optimizations, which will only work with a nightly compiler.)
Enabling the PCRE2 feature works with a stable Rust compiler and will
attempt to automatically find and link with your system's PCRE2 library via
@@ -423,11 +425,3 @@ $ cargo test --all
```
from the repository root.
### Translations
The following is a list of known translations of ripgrep's documentation. These
are unofficially maintained and may not be up to date.
* [Chinese](https://github.com/chinanf-boy/ripgrep-zh#%E6%9B%B4%E6%96%B0-)

View File

@@ -1,35 +0,0 @@
Release Checklist
-----------------
* Run `cargo update` and review dependency updates. Commit updated
`Cargo.lock`.
* Run `cargo outdated` and review semver incompatible updates. Unless there is
a strong motivation otherwise, review and update every dependency.
* Review changes for every crate in `crates` since the last ripgrep release.
If the set of changes is non-empty, issue a new release for that crate. Check
crates in the following order. After updating a crate, ensure minimal
versions are updated as appropriate in dependents. If an update is required,
run `cargo-up --no-push crates/{CRATE}/Cargo.toml`.
* crates/globset
* crates/ignore
* crates/cli
* crates/matcher
* crates/regex
* crates/pcre2
* crates/searcher
* crates/printer
* crates/grep (bump minimal versions as necessary)
* crates/core (do **not** bump version, but update dependencies as needed)
* Edit the `Cargo.toml` to set the new ripgrep version. Run
`cargo update -p ripgrep` so that the `Cargo.lock` is updated. Commit the
changes. Alternatively, use
`cargo-up --no-push --no-release Cargo.toml {VERSION}`.
* Create a new signed tag for the ripgrep release. Push it to GitHub.
* Wait for CI to finish creating the release. If the release build fails, then
delete the tag from GitHub, make fixes, re-tag, delete the release and push.
* Copy the relevant section of the CHANGELOG to the tagged release notes.
* Run `ci/build-deb` locally and manually upload the deb package to the
release.
* Run `cargo publish`.
* Run `ci/sha256-releases >> pkg/brew/ripgrep-bin.rb`. Then edit
`pkg/brew/ripgrep-bin.rb` to update the version numbers and sha256 hashes.
Remove extraneous stuff added by `ci/sha256-releases`. Commit changes.

82
appveyor.yml Normal file
View File

@@ -0,0 +1,82 @@
cache:
- c:\cargo\registry
- c:\cargo\git
init:
- mkdir c:\cargo
- mkdir c:\rustup
- SET PATH=c:\cargo\bin;%PATH%
clone_folder: c:\projects\ripgrep
environment:
CARGO_HOME: "c:\\cargo"
RUSTUP_HOME: "c:\\rustup"
CARGO_TARGET_DIR: "c:\\projects\\ripgrep\\target"
global:
PROJECT_NAME: ripgrep
RUST_BACKTRACE: full
matrix:
- TARGET: x86_64-pc-windows-gnu
CHANNEL: stable
BITS: 64
MSYS2: 1
- TARGET: x86_64-pc-windows-msvc
CHANNEL: stable
BITS: 64
- TARGET: i686-pc-windows-gnu
CHANNEL: stable
BITS: 32
MSYS2: 1
- TARGET: i686-pc-windows-msvc
CHANNEL: stable
BITS: 32
matrix:
fast_finish: true
# Install Rust and Cargo
# (Based on from https://github.com/rust-lang/libc/blob/master/appveyor.yml)
install:
- curl -sSf -o rustup-init.exe https://win.rustup.rs/
- rustup-init.exe -y --default-host %TARGET%
- set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
- if defined MSYS2 set PATH=C:\msys64\mingw%BITS%\bin;%PATH%
- rustc -V
- cargo -V
# Hack to work around a harmless warning in Appveyor builds?
build: false
# Equivalent to Travis' `script` phase
test_script:
- cargo test --verbose --all --features pcre2
before_deploy:
# Generate artifacts for release
- cargo build --release --features pcre2
- mkdir staging
- copy target\release\rg.exe staging
- ps: copy target\release\build\ripgrep-*\out\_rg.ps1 staging
- cd staging
# release zipfile will look like 'ripgrep-1.2.3-x86_64-pc-windows-msvc'
- 7z a ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip *
- appveyor PushArtifact ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip
deploy:
description: 'Automatically deployed release'
# All the zipped artifacts will be deployed
artifact: /.*\.zip/
auth_token:
secure: vv4vBCEosGlyQjaEC1+kraP2P6O4CQSa+Tw50oHWFTGcmuXxaWS0/yEXbxsIRLpw
provider: GitHub
# deploy when a new tag is pushed and only on the stable channel
on:
CHANNEL: stable
branch: ag/prepare-0.10.0
branches:
only:
- /^\d+\.\d+\.\d+$/
- master
- ag/prepare-0.10.0

View File

@@ -1,3 +1,8 @@
#[macro_use]
extern crate clap;
#[macro_use]
extern crate lazy_static;
use std::env;
use std::fs::{self, File};
use std::io::{self, Read, Write};
@@ -9,7 +14,7 @@ use clap::Shell;
use app::{RGArg, RGArgKind};
#[allow(dead_code)]
#[path = "crates/core/app.rs"]
#[path = "src/app.rs"]
mod app;
fn main() {
@@ -21,8 +26,7 @@ fn main() {
eprintln!(
"OUT_DIR environment variable not defined. \
Please file a bug: \
https://github.com/BurntSushi/ripgrep/issues/new"
);
https://github.com/BurntSushi/ripgrep/issues/new");
process::exit(1);
}
};
@@ -65,51 +69,6 @@ fn git_revision_hash() -> Option<String> {
}
fn generate_man_page<P: AsRef<Path>>(outdir: P) -> io::Result<()> {
// If asciidoctor isn't installed, fallback to asciidoc.
if let Err(err) = process::Command::new("asciidoctor").output() {
eprintln!(
"Could not run 'asciidoctor' binary, falling back to 'a2x'."
);
eprintln!("Error from running 'asciidoctor': {}", err);
return legacy_generate_man_page::<P>(outdir);
}
// 1. Read asciidoctor template.
// 2. Interpolate template with auto-generated docs.
// 3. Save interpolation to disk.
// 4. Use asciidoctor to convert to man page.
let outdir = outdir.as_ref();
let cwd = env::current_dir()?;
let tpl_path = cwd.join("doc").join("rg.1.txt.tpl");
let txt_path = outdir.join("rg.1.txt");
let mut tpl = String::new();
File::open(&tpl_path)?.read_to_string(&mut tpl)?;
let options =
formatted_options()?.replace("&#123;", "{").replace("&#125;", "}");
tpl = tpl.replace("{OPTIONS}", &options);
let githash = git_revision_hash();
let githash = githash.as_ref().map(|x| &**x);
tpl = tpl.replace("{VERSION}", &app::long_version(githash, false));
File::create(&txt_path)?.write_all(tpl.as_bytes())?;
let result = process::Command::new("asciidoctor")
.arg("--doctype")
.arg("manpage")
.arg("--backend")
.arg("manpage")
.arg(&txt_path)
.spawn()?
.wait()?;
if !result.success() {
let msg =
format!("'asciidoctor' failed with exit code {:?}", result.code());
return Err(ioerr(msg));
}
Ok(())
}
fn legacy_generate_man_page<P: AsRef<Path>>(outdir: P) -> io::Result<()> {
// If asciidoc isn't installed, then don't do anything.
if let Err(err) = process::Command::new("a2x").output() {
eprintln!("Could not run 'a2x' binary, skipping man page generation.");
@@ -131,15 +90,13 @@ fn legacy_generate_man_page<P: AsRef<Path>>(outdir: P) -> io::Result<()> {
let githash = git_revision_hash();
let githash = githash.as_ref().map(|x| &**x);
tpl = tpl.replace("{VERSION}", &app::long_version(githash, false));
tpl = tpl.replace("{VERSION}", &app::long_version(githash));
File::create(&txt_path)?.write_all(tpl.as_bytes())?;
let result = process::Command::new("a2x")
.arg("--no-xmllint")
.arg("--doctype")
.arg("manpage")
.arg("--format")
.arg("manpage")
.arg("--doctype").arg("manpage")
.arg("--format").arg("manpage")
.arg(&txt_path)
.spawn()?
.wait()?;
@@ -162,7 +119,7 @@ fn formatted_options() -> io::Result<String> {
// ripgrep only has two positional arguments, and probably will only
// ever have two positional arguments, so we just hardcode them into
// the template.
if let app::RGArgKind::Positional { .. } = arg.kind {
if let app::RGArgKind::Positional{..} = arg.kind {
continue;
}
formatted.push(formatted_arg(&arg)?);
@@ -172,9 +129,7 @@ fn formatted_options() -> io::Result<String> {
fn formatted_arg(arg: &RGArg) -> io::Result<String> {
match arg.kind {
RGArgKind::Positional { .. } => {
panic!("unexpected positional argument")
}
RGArgKind::Positional{..} => panic!("unexpected positional argument"),
RGArgKind::Switch { long, short, multiple } => {
let mut out = vec![];
@@ -213,13 +168,7 @@ fn formatted_arg(arg: &RGArg) -> io::Result<String> {
}
fn formatted_doc_txt(arg: &RGArg) -> io::Result<String> {
let paragraphs: Vec<String> = arg
.doc_long
.replace("{", "&#123;")
.replace("}", r"&#125;")
.split("\n\n")
.map(|s| s.to_string())
.collect();
let paragraphs: Vec<&str> = arg.doc_long.split("\n\n").collect();
if paragraphs.is_empty() {
return Err(ioerr(format!("missing docs for --{}", arg.name)));
}

59
ci/before_deploy.sh Executable file
View File

@@ -0,0 +1,59 @@
#!/bin/bash
# package the build artifacts
set -ex
. "$(dirname $0)/utils.sh"
# Generate artifacts for release
mk_artifacts() {
if is_arm; then
cargo build --target "$TARGET" --release
else
cargo build --target "$TARGET" --release --features 'pcre2'
fi
}
mk_tarball() {
# When cross-compiling, use the right `strip` tool on the binary.
local gcc_prefix="$(gcc_prefix)"
# Create a temporary dir that contains our staging area.
# $tmpdir/$name is what eventually ends up as the deployed archive.
local tmpdir="$(mktemp -d)"
local name="${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}"
local staging="$tmpdir/$name"
mkdir -p "$staging"/{complete,doc}
# The deployment directory is where the final archive will reside.
# This path is known by the .travis.yml configuration.
local out_dir="$(pwd)/deployment"
mkdir -p "$out_dir"
# Find the correct (most recent) Cargo "out" directory. The out directory
# contains shell completion files and the man page.
local cargo_out_dir="$(cargo_out_dir "target/$TARGET")"
# Copy the ripgrep binary and strip it.
cp "target/$TARGET/release/rg" "$staging/rg"
"${gcc_prefix}strip" "$staging/rg"
# Copy the licenses and README.
cp {README.md,UNLICENSE,COPYING,LICENSE-MIT} "$staging/"
# Copy documentation and man page.
cp {CHANGELOG.md,FAQ.md,GUIDE.md} "$staging/doc/"
if command -V a2x 2>&1 > /dev/null; then
# The man page should only exist if we have asciidoc installed.
cp "$cargo_out_dir/rg.1" "$staging/doc/"
fi
# Copy shell completion files.
cp "$cargo_out_dir"/{rg.bash,rg.fish,_rg.ps1} "$staging/complete/"
cp complete/_rg "$staging/complete/"
(cd "$tmpdir" && tar czf "$out_dir/$name.tar.gz" "$name")
rm -rf "$tmpdir"
}
main() {
mk_artifacts
mk_tarball
}
main

View File

@@ -1,7 +1,6 @@
#!/bin/bash
set -e
D="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
# This script builds a binary dpkg for Debian based distros. It does not
# currently run in CI, and is instead run manually and the resulting dpkg is
@@ -24,13 +23,20 @@ fi
# the deb, which knows where to look.
DEPLOY_DIR=deployment/deb
OUT_DIR="$("$D"/cargo-out-dir target/debug/)"
mkdir -p "$DEPLOY_DIR"
cargo build
# Copy man page and shell completions.
cp "$OUT_DIR"/{rg.1,rg.bash,rg.fish} "$DEPLOY_DIR/"
cp complete/_rg "$DEPLOY_DIR/"
# Find and copy man page.
manpage="$(find ./target/debug -name rg.1 -print0 | xargs -0 ls -t | head -n1)"
cp "$manpage" "$DEPLOY_DIR/"
# Do the same for shell completions.
compbash="$(find ./target/debug -name rg.bash -print0 | xargs -0 ls -t | head -n1)"
cp "$compbash" "$DEPLOY_DIR/"
compfish="$(find ./target/debug -name rg.fish -print0 | xargs -0 ls -t | head -n1)"
cp "$compfish" "$DEPLOY_DIR/"
compzsh="complete/_rg"
cp "$compzsh" "$DEPLOY_DIR/"
# Since we're distributing the dpkg, we don't know whether the user will have
# PCRE2 installed, so just do a static build.

View File

@@ -1,19 +0,0 @@
#!/bin/bash
# Finds Cargo's `OUT_DIR` directory from the most recent build.
#
# This requires one parameter corresponding to the target directory
# to search for the build output.
if [ $# != 1 ]; then
echo "Usage: $(basename "$0") <target-dir>" >&2
exit 2
fi
# This works by finding the most recent stamp file, which is produced by
# every ripgrep build.
target_dir="$1"
find "$target_dir" -name ripgrep-stamp -print0 \
| xargs -0 ls -t \
| head -n1 \
| xargs dirname

View File

@@ -1,24 +0,0 @@
These are Docker images used for cross compilation in CI builds (or locally)
via the [Cross](https://github.com/rust-embedded/cross) tool.
The Cross tool actually provides its own Docker images, and all Docker images
in this directory are derived from one of them. We provide our own in order
to customize the environment. For example, we need to install some things like
`asciidoctor` in order to generate man pages. We also install compression tools
like `xz` so that tests for the `-z/--search-zip` flag are run.
If you make a change to a Docker image, then you can re-build it. `cd` into the
directory containing the `Dockerfile` and run:
$ cd x86_64-unknown-linux-musl
$ ./build
At this point, subsequent uses of `cross` will now use your built image since
Docker prefers local images over remote images. In order to make these changes
stick, they need to be pushed to Docker Hub:
$ docker push burntsushi/cross:x86_64-unknown-linux-musl
Of course, only I (BurntSushi) can push to that location. To make `cross` use
a different location, then edit `Cross.toml` in the root of this repo to use
a different image name for the desired target.

View File

@@ -1,4 +0,0 @@
FROM rustembedded/cross:arm-unknown-linux-gnueabihf
COPY stage/ubuntu-install-packages /
RUN /ubuntu-install-packages

View File

@@ -1,5 +0,0 @@
#!/bin/sh
mkdir -p stage
cp ../../ubuntu-install-packages ./stage/
docker build -t burntsushi/cross:arm-unknown-linux-gnueabihf .

View File

@@ -1,4 +0,0 @@
FROM rustembedded/cross:i686-unknown-linux-gnu
COPY stage/ubuntu-install-packages /
RUN /ubuntu-install-packages

View File

@@ -1,5 +0,0 @@
#!/bin/sh
mkdir -p stage
cp ../../ubuntu-install-packages ./stage/
docker build -t burntsushi/cross:i686-unknown-linux-gnu .

View File

@@ -1,4 +0,0 @@
FROM rustembedded/cross:mips64-unknown-linux-gnuabi64
COPY stage/ubuntu-install-packages /
RUN /ubuntu-install-packages

View File

@@ -1,5 +0,0 @@
#!/bin/sh
mkdir -p stage
cp ../../ubuntu-install-packages ./stage/
docker build -t burntsushi/cross:mips64-unknown-linux-gnuabi64 .

View File

@@ -1,4 +0,0 @@
FROM rustembedded/cross:x86_64-unknown-linux-musl
COPY stage/ubuntu-install-packages /
RUN /ubuntu-install-packages

View File

@@ -1,5 +0,0 @@
#!/bin/sh
mkdir -p stage
cp ../../ubuntu-install-packages ./stage/
docker build -t burntsushi/cross:x86_64-unknown-linux-musl .

61
ci/install.sh Executable file
View File

@@ -0,0 +1,61 @@
#!/bin/bash
# install stuff needed for the `script` phase
# Where rustup gets installed.
export PATH="$PATH:$HOME/.cargo/bin"
set -ex
. "$(dirname $0)/utils.sh"
install_rustup() {
curl https://sh.rustup.rs -sSf \
| sh -s -- -y --default-toolchain="$TRAVIS_RUST_VERSION"
rustc -V
cargo -V
}
install_targets() {
if [ $(host) != "$TARGET" ]; then
rustup target add $TARGET
fi
}
install_osx_dependencies() {
if ! is_osx; then
return
fi
brew install asciidoc docbook-xsl
}
configure_cargo() {
local prefix=$(gcc_prefix)
if [ -n "${prefix}" ]; then
local gcc_suffix=
if [ -n "$GCC_VERSION" ]; then
gcc_suffix="-$GCC_VERSION"
fi
local gcc="${prefix}gcc${gcc_suffix}"
# information about the cross compiler
"${gcc}" -v
# tell cargo which linker to use for cross compilation
mkdir -p .cargo
cat >>.cargo/config <<EOF
[target.$TARGET]
linker = "${gcc}"
EOF
fi
}
main() {
install_osx_dependencies
install_rustup
install_targets
configure_cargo
}
main

View File

@@ -1,3 +0,0 @@
#!/bin/sh
brew install asciidoctor

50
ci/script.sh Executable file
View File

@@ -0,0 +1,50 @@
#!/bin/bash
# build, test and generate docs in this phase
set -ex
. "$(dirname $0)/utils.sh"
main() {
# Test a normal debug build.
if is_arm; then
cargo build --target "$TARGET" --verbose
else
cargo build --target "$TARGET" --verbose --all --features 'pcre2'
fi
# Show the output of the most recent build.rs stderr.
set +x
stderr="$(find "target/$TARGET/debug" -name stderr -print0 | xargs -0 ls -t | head -n1)"
if [ -s "$stderr" ]; then
echo "===== $stderr ====="
cat "$stderr"
echo "====="
fi
set -x
# sanity check the file type
file target/"$TARGET"/debug/rg
# Check that we've generated man page and other shell completions.
outdir="$(cargo_out_dir "target/$TARGET/debug")"
file "$outdir/rg.bash"
file "$outdir/rg.fish"
file "$outdir/_rg.ps1"
file "$outdir/rg.1"
# Apparently tests don't work on arm, so just bail now. I guess we provide
# ARM releases on a best effort basis?
if is_arm; then
return 0
fi
# Test that zsh completions are in sync with ripgrep's actual args.
"$(dirname "${0}")/test_complete.sh"
# Run tests for ripgrep and all sub-crates.
cargo test --target "$TARGET" --verbose --all --features 'pcre2'
}
main

View File

@@ -18,7 +18,7 @@ get_comp_args() {
main() {
local diff
local rg="${0:a:h}/../${TARGET_DIR:-target}/release/rg"
local rg="${0:a:h}/../target/${TARGET:-}/release/rg"
local _rg="${0:a:h}/../complete/_rg"
local -a help_args comp_args
@@ -44,7 +44,7 @@ main() {
# Occasionally we may have to handle some manually, however
help_args=( ${(f)"$(
$rg --help |
$rg -i -- '^\s+--?[a-z0-9]|--[a-z]' |
$rg -i -- '^\s+--?[a-z0-9]|--[imnp]' |
$rg -ior '$1' -- $'[\t /\"\'`.,](-[a-z0-9]|--[a-z0-9-]+)\\b' |
$rg -v -- --print0 | # False positives
sort -u

View File

@@ -1,6 +0,0 @@
#!/bin/sh
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
asciidoctor \
zsh xz-utils liblz4-tool musl-tools

View File

@@ -55,13 +55,6 @@ gcc_prefix() {
esac
}
is_musl() {
case "$TARGET" in
*-musl) return 0 ;;
*) return 1 ;;
esac
}
is_x86() {
case "$(architecture)" in
amd64|i386) return 0 ;;
@@ -69,13 +62,6 @@ is_x86() {
esac
}
is_x86_64() {
case "$(architecture)" in
amd64) return 0 ;;
*) return 1 ;;
esac
}
is_arm() {
case "$(architecture)" in
armhf) return 0 ;;
@@ -96,14 +82,3 @@ is_osx() {
*) return 1 ;;
esac
}
builder() {
if is_musl && is_x86_64; then
# cargo install cross
# To work around https://github.com/rust-embedded/cross/issues/357
cargo install --git https://github.com/rust-embedded/cross --force
echo "cross"
else
echo "cargo"
fi
}

View File

@@ -3,7 +3,7 @@
##
# zsh completion function for ripgrep
#
# Run ci/test-complete after building to ensure that the options supported by
# Run ci/test_complete.sh after building to ensure that the options supported by
# this function stay in synch with the `rg` binary.
#
# For convenience, a completion reference guide is included at the bottom of
@@ -43,7 +43,6 @@ _rg() {
+ '(exclusive)' # Misc. fully exclusive options
'(: * -)'{-h,--help}'[display help information]'
'(: * -)'{-V,--version}'[display version information]'
'(: * -)'--pcre2-version'[print the version of PCRE2 used by ripgrep, if available]'
+ '(buffered)' # buffering options
'--line-buffered[force line buffering]'
@@ -72,19 +71,11 @@ _rg() {
+ '(count)' # Counting options
{-c,--count}'[only show count of matching lines for each file]'
'--count-matches[only show count of individual matches for each file]'
'--include-zero[include files with zero matches in summary]'
+ '(encoding)' # Encoding options
{-E+,--encoding=}'[specify text encoding of files to search]: :_rg_encodings'
$no'--no-encoding[use default text encoding]'
+ '(engine)' # Engine choice options
'--engine=[select which regex engine to use]:when:((
default\:"use default engine"
pcre2\:"identical to --pcre2"
auto\:"identical to --auto-hybrid-regex"
))'
+ file # File-input options
'(1)*'{-f+,--file=}'[specify file containing patterns to search for]: :_files'
@@ -94,7 +85,7 @@ _rg() {
+ '(file-name)' # File-name options
{-H,--with-filename}'[show file name for matches]'
{-I,--no-filename}"[don't show file name for matches]"
"--no-filename[don't show file name for matches]"
+ '(file-system)' # File system options
"--one-file-system[don't descend into directories on other file systems]"
@@ -112,10 +103,6 @@ _rg() {
'*'{-g+,--glob=}'[include/exclude files matching specified glob]:glob'
'*--iglob=[include/exclude files matching specified case-insensitive glob]:glob'
+ '(glob-case-insensitive)' # File-glob case sensitivity options
'--glob-case-insensitive[treat -g/--glob patterns case insensitively]'
$no'--no-glob-case-insensitive[treat -g/--glob patterns case sensitively]'
+ '(heading)' # Heading options
'(pretty-vimgrep)--heading[show matches grouped by file name]'
"(pretty-vimgrep)--no-heading[don't show matches grouped by file name]"
@@ -124,21 +111,9 @@ _rg() {
'--hidden[search hidden files and directories]'
$no"--no-hidden[don't search hidden files and directories]"
+ '(hybrid)' # hybrid regex options
'--auto-hybrid-regex[dynamically use PCRE2 if necessary]'
$no"--no-auto-hybrid-regex[don't dynamically use PCRE2 if necessary]"
+ '(ignore)' # Ignore-file options
"(--no-ignore-global --no-ignore-parent --no-ignore-vcs --no-ignore-dot)--no-ignore[don't respect ignore files]"
$no'(--ignore-global --ignore-parent --ignore-vcs --ignore-dot)--ignore[respect ignore files]'
+ '(ignore-file-case-insensitive)' # Ignore-file case sensitivity options
'--ignore-file-case-insensitive[process ignore files case insensitively]'
$no'--no-ignore-file-case-insensitive[process ignore files case sensitively]'
+ '(ignore-exclude)' # Local exclude (ignore)-file options
"--no-ignore-exclude[don't respect local exclude (ignore) files]"
$no'--ignore-exclude[respect local exclude (ignore) files]'
"(--no-ignore-global --no-ignore-parent --no-ignore-vcs)--no-ignore[don't respect ignore files]"
$no'(--ignore-global --ignore-parent --ignore-vcs)--ignore[respect ignore files]'
+ '(ignore-global)' # Global ignore-file options
"--no-ignore-global[don't respect global ignore files]"
@@ -152,18 +127,6 @@ _rg() {
"--no-ignore-vcs[don't respect version control ignore files]"
$no'--ignore-vcs[respect version control ignore files]'
+ '(require-git)' # git specific settings
"--no-require-git[don't require git repository to respect gitignore rules]"
$no'--require-git[require git repository to respect gitignore rules]'
+ '(ignore-dot)' # .ignore options
"--no-ignore-dot[don't respect .ignore files]"
$no'--ignore-dot[respect .ignore files]'
+ '(ignore-files)' # custom global ignore file options
"--no-ignore-files[don't respect --ignore-file flags]"
$no'--ignore-files[respect --ignore-file files]'
+ '(json)' # JSON options
'--json[output results in JSON Lines format]'
$no"--no-json[don't output results in JSON Lines format]"
@@ -177,10 +140,6 @@ _rg() {
$no"--no-crlf[don't use CRLF as line terminator]"
'(text)--null-data[use NUL as line terminator]'
+ '(max-columns-preview)' # max column preview options
'--max-columns-preview[show preview for long lines (with -M)]'
$no"--no-max-columns-preview[don't show preview for long lines (with -M)]"
+ '(max-depth)' # Directory-depth options
'--max-depth=[specify max number of directories to descend]:number of directories'
'!--maxdepth=:number of directories'
@@ -260,8 +219,6 @@ _rg() {
+ '(text)' # Binary-search options
{-a,--text}'[search binary files as if they were text]'
"--binary[search binary files, don't print binary data]"
$no"--no-binary[don't search binary files]"
$no"(--null-data)--no-text[don't search binary files as if they were text]"
+ '(threads)' # Thread-count options
@@ -283,10 +240,6 @@ _rg() {
{-w,--word-regexp}'[only show matches surrounded by word boundaries]'
{-x,--line-regexp}'[only show matches surrounded by line boundaries]'
+ '(unicode)' # Unicode options
$no'--unicode[enable Unicode mode]'
'--no-unicode[disable Unicode mode]'
+ '(zip)' # Compression options
'(--pre)'{-z,--search-zip}'[search in compressed files]'
$no"--no-search-zip[don't search in compressed files]"
@@ -301,9 +254,7 @@ _rg() {
))'
'*--colors=[specify color and style settings]: :->colorspec'
'--context-separator=[specify string used to separate non-continuous context lines in output]:separator'
$no"--no-context-separator[don't print context separators]"
'--debug[show debug messages]'
'--trace[show more verbose debug messages]'
'--dfa-size-limit=[specify upper size limit of generated DFA]:DFA size (bytes)'
"(1 stats)--files[show each file that would be searched (but don't search)]"
'*--ignore-file=[specify additional ignore file]:ignore file:_files'
@@ -323,7 +274,7 @@ _rg() {
'(--type-list)*: :_files'
)
# This is used with test-complete to verify that there are no options
# This is used with test_complete.sh to verify that there are no options
# listed in the help output that aren't also defined here
[[ $_RG_COMPLETE_LIST_ARGS == (1|t*|y*) ]] && {
print -rl - $args
@@ -419,7 +370,7 @@ _rg_encodings() {
shift{-,_}jis csshiftjis {,x-}sjis ms_kanji ms932
utf{,-}8 utf-16{,be,le} unicode-1-1-utf-8
windows-{31j,874,949,125{0..8}} dos-874 tis-620 ansi_x3.4-1968
x-user-defined auto none
x-user-defined auto
)
_wanted encodings expl encoding compadd -a "$@" - _encodings

View File

@@ -1,15 +0,0 @@
ripgrep core
------------
This is the core ripgrep crate. In particular, `main.rs` is where the `main`
function lives.
Most of ripgrep core consists of two things:
* The definition of the CLI interface, including docs for every flag.
* Glue code that brings the `grep-matcher`, `grep-regex`, `grep-searcher` and
`grep-printer` crates together to actually execute the search.
Currently, there are no plans to make ripgrep core available as an independent
library. However, much of the heavy lifting of ripgrep is done via its
constituent crates, which can be reused independent of ripgrep. Unfortunately,
there is no guide or tutorial to teach folks how to do this yet.

View File

@@ -1,38 +0,0 @@
use serde::de::Error;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use Glob;
impl Serialize for Glob {
fn serialize<S: Serializer>(
&self,
serializer: S,
) -> Result<S::Ok, S::Error> {
serializer.serialize_str(self.glob())
}
}
impl<'de> Deserialize<'de> for Glob {
fn deserialize<D: Deserializer<'de>>(
deserializer: D,
) -> Result<Self, D::Error> {
let glob = <&str as Deserialize>::deserialize(deserializer)?;
Glob::new(glob).map_err(D::Error::custom)
}
}
#[cfg(test)]
mod tests {
use Glob;
#[test]
fn glob_json_works() {
let test_glob = Glob::new("src/**/*.rs").unwrap();
let ser = serde_json::to_string(&test_glob).unwrap();
assert_eq!(ser, "\"src/**/*.rs\"");
let de: Glob = serde_json::from_str(&ser).unwrap();
assert_eq!(test_glob, de);
}
}

View File

@@ -1,32 +0,0 @@
[package]
name = "grep"
version = "0.2.6" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Fast line oriented regex searching as a library.
"""
documentation = "http://burntsushi.net/rustdoc/grep/"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/grep"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/grep"
readme = "README.md"
keywords = ["regex", "grep", "egrep", "search", "pattern"]
license = "Unlicense/MIT"
[dependencies]
grep-cli = { version = "0.1.4", path = "../cli" }
grep-matcher = { version = "0.1.4", path = "../matcher" }
grep-pcre2 = { version = "0.1.4", path = "../pcre2", optional = true }
grep-printer = { version = "0.1.5", path = "../printer" }
grep-regex = { version = "0.1.8", path = "../regex" }
grep-searcher = { version = "0.1.7", path = "../searcher" }
[dev-dependencies]
termcolor = "1.0.4"
walkdir = "2.2.7"
[features]
simd-accel = ["grep-searcher/simd-accel"]
pcre2 = ["grep-pcre2"]
# This feature is DEPRECATED. Runtime dispatch is used for SIMD now.
avx-accel = []

View File

@@ -1,248 +0,0 @@
/// This list represents the default file types that ripgrep ships with. In
/// general, any file format is fair game, although it should generally be
/// limited to reasonably popular open formats. For other cases, you can add
/// types to each invocation of ripgrep with the '--type-add' flag.
///
/// If you would like to add or improve this list, please file a PR:
/// https://github.com/BurntSushi/ripgrep
///
/// Please try to keep this list sorted lexicographically and wrapped to 79
/// columns (inclusive).
#[rustfmt::skip]
pub const DEFAULT_TYPES: &[(&str, &[&str])] = &[
("agda", &["*.agda", "*.lagda"]),
("aidl", &["*.aidl"]),
("amake", &["*.mk", "*.bp"]),
("asciidoc", &["*.adoc", "*.asc", "*.asciidoc"]),
("asm", &["*.asm", "*.s", "*.S"]),
("asp", &[
"*.aspx", "*.aspx.cs", "*.aspx.cs", "*.ascx", "*.ascx.cs", "*.ascx.vb",
]),
("ats", &["*.ats", "*.dats", "*.sats", "*.hats"]),
("avro", &["*.avdl", "*.avpr", "*.avsc"]),
("awk", &["*.awk"]),
("bazel", &["*.bzl", "WORKSPACE", "BUILD", "BUILD.bazel"]),
("bitbake", &["*.bb", "*.bbappend", "*.bbclass", "*.conf", "*.inc"]),
("brotli", &["*.br"]),
("buildstream", &["*.bst"]),
("bzip2", &["*.bz2", "*.tbz2"]),
("c", &["*.[chH]", "*.[chH].in", "*.cats"]),
("cabal", &["*.cabal"]),
("cbor", &["*.cbor"]),
("ceylon", &["*.ceylon"]),
("clojure", &["*.clj", "*.cljc", "*.cljs", "*.cljx"]),
("cmake", &["*.cmake", "CMakeLists.txt"]),
("coffeescript", &["*.coffee"]),
("config", &["*.cfg", "*.conf", "*.config", "*.ini"]),
("coq", &["*.v"]),
("cpp", &[
"*.[ChH]", "*.cc", "*.[ch]pp", "*.[ch]xx", "*.hh", "*.inl",
"*.[ChH].in", "*.cc.in", "*.[ch]pp.in", "*.[ch]xx.in", "*.hh.in",
]),
("creole", &["*.creole"]),
("crystal", &["Projectfile", "*.cr"]),
("cs", &["*.cs"]),
("csharp", &["*.cs"]),
("cshtml", &["*.cshtml"]),
("css", &["*.css", "*.scss"]),
("csv", &["*.csv"]),
("cython", &["*.pyx", "*.pxi", "*.pxd"]),
("d", &["*.d"]),
("dart", &["*.dart"]),
("dhall", &["*.dhall"]),
("diff", &["*.patch", "*.diff"]),
("docker", &["*Dockerfile*"]),
("ebuild", &["*.ebuild"]),
("edn", &["*.edn"]),
("elisp", &["*.el"]),
("elixir", &["*.ex", "*.eex", "*.exs"]),
("elm", &["*.elm"]),
("erb", &["*.erb"]),
("erlang", &["*.erl", "*.hrl"]),
("fidl", &["*.fidl"]),
("fish", &["*.fish"]),
("fortran", &[
"*.f", "*.F", "*.f77", "*.F77", "*.pfo",
"*.f90", "*.F90", "*.f95", "*.F95",
]),
("fsharp", &["*.fs", "*.fsx", "*.fsi"]),
("gap", &["*.g", "*.gap", "*.gi", "*.gd", "*.tst"]),
("gn", &["*.gn", "*.gni"]),
("go", &["*.go"]),
("gradle", &["*.gradle"]),
("groovy", &["*.groovy", "*.gradle"]),
("gzip", &["*.gz", "*.tgz"]),
("h", &["*.h", "*.hpp"]),
("haml", &["*.haml"]),
("haskell", &["*.hs", "*.lhs", "*.cpphs", "*.c2hs", "*.hsc"]),
("hbs", &["*.hbs"]),
("hs", &["*.hs", "*.lhs"]),
("html", &["*.htm", "*.html", "*.ejs"]),
("idris", &["*.idr", "*.lidr"]),
("java", &["*.java", "*.jsp", "*.jspx", "*.properties"]),
("jinja", &["*.j2", "*.jinja", "*.jinja2"]),
("jl", &["*.jl"]),
("js", &["*.js", "*.jsx", "*.vue"]),
("json", &["*.json", "composer.lock"]),
("jsonl", &["*.jsonl"]),
("julia", &["*.jl"]),
("jupyter", &["*.ipynb", "*.jpynb"]),
("k", &["*.k"]),
("kotlin", &["*.kt", "*.kts"]),
("less", &["*.less"]),
("license", &[
// General
"COPYING", "COPYING[.-]*",
"COPYRIGHT", "COPYRIGHT[.-]*",
"EULA", "EULA[.-]*",
"licen[cs]e", "licen[cs]e.*",
"LICEN[CS]E", "LICEN[CS]E[.-]*", "*[.-]LICEN[CS]E*",
"NOTICE", "NOTICE[.-]*",
"PATENTS", "PATENTS[.-]*",
"UNLICEN[CS]E", "UNLICEN[CS]E[.-]*",
// GPL (gpl.txt, etc.)
"agpl[.-]*",
"gpl[.-]*",
"lgpl[.-]*",
// Other license-specific (APACHE-2.0.txt, etc.)
"AGPL-*[0-9]*",
"APACHE-*[0-9]*",
"BSD-*[0-9]*",
"CC-BY-*",
"GFDL-*[0-9]*",
"GNU-*[0-9]*",
"GPL-*[0-9]*",
"LGPL-*[0-9]*",
"MIT-*[0-9]*",
"MPL-*[0-9]*",
"OFL-*[0-9]*",
]),
("lisp", &["*.el", "*.jl", "*.lisp", "*.lsp", "*.sc", "*.scm"]),
("lock", &["*.lock", "package-lock.json"]),
("log", &["*.log"]),
("lua", &["*.lua"]),
("lz4", &["*.lz4"]),
("lzma", &["*.lzma"]),
("m4", &["*.ac", "*.m4"]),
("make", &[
"[Gg][Nn][Uu]makefile", "[Mm]akefile",
"[Gg][Nn][Uu]makefile.am", "[Mm]akefile.am",
"[Gg][Nn][Uu]makefile.in", "[Mm]akefile.in",
"*.mk", "*.mak"
]),
("mako", &["*.mako", "*.mao"]),
("man", &["*.[0-9lnpx]", "*.[0-9][cEFMmpSx]"]),
("markdown", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
("matlab", &["*.m"]),
("md", &["*.markdown", "*.md", "*.mdown", "*.mkdn"]),
("mk", &["mkfile"]),
("ml", &["*.ml"]),
("msbuild", &[
"*.csproj", "*.fsproj", "*.vcxproj", "*.proj", "*.props", "*.targets",
]),
("nim", &["*.nim", "*.nimf", "*.nimble", "*.nims"]),
("nix", &["*.nix"]),
("objc", &["*.h", "*.m"]),
("objcpp", &["*.h", "*.mm"]),
("ocaml", &["*.ml", "*.mli", "*.mll", "*.mly"]),
("org", &["*.org", "*.org_archive"]),
("pascal", &["*.pas", "*.dpr", "*.lpr", "*.pp", "*.inc"]),
("pdf", &["*.pdf"]),
("perl", &["*.perl", "*.pl", "*.PL", "*.plh", "*.plx", "*.pm", "*.t"]),
("php", &["*.php", "*.php3", "*.php4", "*.php5", "*.phtml"]),
("pod", &["*.pod"]),
("postscript", &["*.eps", "*.ps"]),
("protobuf", &["*.proto"]),
("ps", &["*.cdxml", "*.ps1", "*.ps1xml", "*.psd1", "*.psm1"]),
("puppet", &["*.erb", "*.pp", "*.rb"]),
("purs", &["*.purs"]),
("py", &["*.py"]),
("qmake", &["*.pro", "*.pri", "*.prf"]),
("qml", &["*.qml"]),
("r", &["*.R", "*.r", "*.Rmd", "*.Rnw"]),
("rdoc", &["*.rdoc"]),
("readme", &["README*", "*README"]),
("robot", &["*.robot"]),
("rst", &["*.rst"]),
("ruby", &["Gemfile", "*.gemspec", ".irbrc", "Rakefile", "*.rb"]),
("rust", &["*.rs"]),
("sass", &["*.sass", "*.scss"]),
("scala", &["*.scala", "*.sbt"]),
("sh", &[
// Portable/misc. init files
".login", ".logout", ".profile", "profile",
// bash-specific init files
".bash_login", "bash_login",
".bash_logout", "bash_logout",
".bash_profile", "bash_profile",
".bashrc", "bashrc", "*.bashrc",
// csh-specific init files
".cshrc", "*.cshrc",
// ksh-specific init files
".kshrc", "*.kshrc",
// tcsh-specific init files
".tcshrc",
// zsh-specific init files
".zshenv", "zshenv",
".zlogin", "zlogin",
".zlogout", "zlogout",
".zprofile", "zprofile",
".zshrc", "zshrc",
// Extensions
"*.bash", "*.csh", "*.ksh", "*.sh", "*.tcsh", "*.zsh",
]),
("slim", &["*.skim", "*.slim", "*.slime"]),
("smarty", &["*.tpl"]),
("sml", &["*.sml", "*.sig"]),
("soy", &["*.soy"]),
("spark", &["*.spark"]),
("spec", &["*.spec"]),
("sql", &["*.sql", "*.psql"]),
("stylus", &["*.styl"]),
("sv", &["*.v", "*.vg", "*.sv", "*.svh", "*.h"]),
("svg", &["*.svg"]),
("swift", &["*.swift"]),
("swig", &["*.def", "*.i"]),
("systemd", &[
"*.automount", "*.conf", "*.device", "*.link", "*.mount", "*.path",
"*.scope", "*.service", "*.slice", "*.socket", "*.swap", "*.target",
"*.timer",
]),
("taskpaper", &["*.taskpaper"]),
("tcl", &["*.tcl"]),
("tex", &["*.tex", "*.ltx", "*.cls", "*.sty", "*.bib", "*.dtx", "*.ins"]),
("textile", &["*.textile"]),
("tf", &["*.tf"]),
("thrift", &["*.thrift"]),
("toml", &["*.toml", "Cargo.lock"]),
("ts", &["*.ts", "*.tsx"]),
("twig", &["*.twig"]),
("txt", &["*.txt"]),
("typoscript", &["*.typoscript", "*.ts"]),
("vala", &["*.vala"]),
("vb", &["*.vb"]),
("verilog", &["*.v", "*.vh", "*.sv", "*.svh"]),
("vhdl", &["*.vhd", "*.vhdl"]),
("vim", &["*.vim"]),
("vimscript", &["*.vim"]),
("webidl", &["*.idl", "*.webidl", "*.widl"]),
("wiki", &["*.mediawiki", "*.wiki"]),
("xml", &[
"*.xml", "*.xml.dist", "*.dtd", "*.xsl", "*.xslt", "*.xsd", "*.xjb",
"*.rng", "*.sch", "*.xhtml",
]),
("xz", &["*.xz", "*.txz"]),
("yacc", &["*.y"]),
("yaml", &["*.yaml", "*.yml"]),
("z", &["*.Z"]),
("zig", &["*.zig"]),
("zsh", &[
".zshenv", "zshenv",
".zlogin", "zlogin",
".zlogout", "zlogout",
".zprofile", "zprofile",
".zshrc", "zshrc",
"*.zsh",
]),
("zstd", &["*.zst", "*.zstd"]),
];

View File

@@ -1,22 +0,0 @@
[package]
name = "grep-regex"
version = "0.1.8" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Use Rust's regex library with the 'grep' crate.
"""
documentation = "https://docs.rs/grep-regex"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/regex"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/regex"
readme = "README.md"
keywords = ["regex", "grep", "search", "pattern", "line"]
license = "Unlicense/MIT"
[dependencies]
aho-corasick = "0.7.3"
bstr = "0.2.10"
grep-matcher = { version = "0.1.2", path = "../matcher" }
log = "0.4.5"
regex = "1.1"
regex-syntax = "0.6.5"
thread_local = "1"

View File

@@ -1,125 +0,0 @@
use aho_corasick::{AhoCorasick, AhoCorasickBuilder, MatchKind};
use grep_matcher::{Match, Matcher, NoError};
use regex_syntax::hir::Hir;
use error::Error;
use matcher::RegexCaptures;
/// A matcher for an alternation of literals.
///
/// Ideally, this optimization would be pushed down into the regex engine, but
/// making this work correctly there would require quite a bit of refactoring.
/// Moreover, doing it one layer above lets us do thing like, "if we
/// specifically only want to search for literals, then don't bother with
/// regex parsing at all."
#[derive(Clone, Debug)]
pub struct MultiLiteralMatcher {
/// The Aho-Corasick automaton.
ac: AhoCorasick,
}
impl MultiLiteralMatcher {
/// Create a new multi-literal matcher from the given literals.
pub fn new<B: AsRef<[u8]>>(
literals: &[B],
) -> Result<MultiLiteralMatcher, Error> {
let ac = AhoCorasickBuilder::new()
.match_kind(MatchKind::LeftmostFirst)
.auto_configure(literals)
.build_with_size::<usize, _, _>(literals)
.map_err(Error::regex)?;
Ok(MultiLiteralMatcher { ac })
}
}
impl Matcher for MultiLiteralMatcher {
type Captures = RegexCaptures;
type Error = NoError;
fn find_at(
&self,
haystack: &[u8],
at: usize,
) -> Result<Option<Match>, NoError> {
match self.ac.find(&haystack[at..]) {
None => Ok(None),
Some(m) => Ok(Some(Match::new(at + m.start(), at + m.end()))),
}
}
fn new_captures(&self) -> Result<RegexCaptures, NoError> {
Ok(RegexCaptures::simple())
}
fn capture_count(&self) -> usize {
1
}
fn capture_index(&self, _: &str) -> Option<usize> {
None
}
fn captures_at(
&self,
haystack: &[u8],
at: usize,
caps: &mut RegexCaptures,
) -> Result<bool, NoError> {
caps.set_simple(None);
let mat = self.find_at(haystack, at)?;
caps.set_simple(mat);
Ok(mat.is_some())
}
// We specifically do not implement other methods like find_iter. Namely,
// the iter methods are guaranteed to be correct by virtue of implementing
// find_at above.
}
/// Alternation literals checks if the given HIR is a simple alternation of
/// literals, and if so, returns them. Otherwise, this returns None.
pub fn alternation_literals(expr: &Hir) -> Option<Vec<Vec<u8>>> {
use regex_syntax::hir::{HirKind, Literal};
// This is pretty hacky, but basically, if `is_alternation_literal` is
// true, then we can make several assumptions about the structure of our
// HIR. This is what justifies the `unreachable!` statements below.
if !expr.is_alternation_literal() {
return None;
}
let alts = match *expr.kind() {
HirKind::Alternation(ref alts) => alts,
_ => return None, // one literal isn't worth it
};
let extendlit = |lit: &Literal, dst: &mut Vec<u8>| match *lit {
Literal::Unicode(c) => {
let mut buf = [0; 4];
dst.extend_from_slice(c.encode_utf8(&mut buf).as_bytes());
}
Literal::Byte(b) => {
dst.push(b);
}
};
let mut lits = vec![];
for alt in alts {
let mut lit = vec![];
match *alt.kind() {
HirKind::Empty => {}
HirKind::Literal(ref x) => extendlit(x, &mut lit),
HirKind::Concat(ref exprs) => {
for e in exprs {
match *e.kind() {
HirKind::Literal(ref x) => extendlit(x, &mut lit),
_ => unreachable!("expected literal, got {:?}", e),
}
}
}
_ => unreachable!("expected literal or concat, got {:?}", alt),
}
lits.push(lit);
}
Some(lits)
}

View File

@@ -1,33 +0,0 @@
[package]
name = "grep-searcher"
version = "0.1.7" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Fast line oriented regex searching as a library.
"""
documentation = "https://docs.rs/grep-searcher"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/searcher"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/searcher"
readme = "README.md"
keywords = ["regex", "grep", "egrep", "search", "pattern"]
license = "Unlicense/MIT"
[dependencies]
bstr = { version = "0.2.0", default-features = false, features = ["std"] }
bytecount = "0.6"
encoding_rs = "0.8.14"
encoding_rs_io = "0.1.6"
grep-matcher = { version = "0.1.2", path = "../matcher" }
log = "0.4.5"
memmap = "0.7"
[dev-dependencies]
grep-regex = { version = "0.1.3", path = "../regex" }
regex = "1.1"
[features]
default = ["bytecount/runtime-dispatch-simd"]
simd-accel = ["encoding_rs/simd-accel"]
# This feature is DEPRECATED. Runtime dispatch is used for SIMD now.
avx-accel = []

View File

@@ -34,20 +34,12 @@ files/directories and binary files.
ripgrep's default regex engine uses finite automata and guarantees linear
time searching. Because of this, features like backreferences and arbitrary
look-around are not supported. However, if ripgrep is built with PCRE2, then
the *--pcre2* flag can be used to enable backreferences and look-around.
the --pcre2 flag can be used to enable backreferences and look-around.
ripgrep supports configuration files. Set *RIPGREP_CONFIG_PATH* to a
ripgrep supports configuration files. Set RIPGREP_CONFIG_PATH to a
configuration file. The file can specify one shell argument per line. Lines
starting with *#* are ignored. For more details, see the man page or the
*README*.
ripgrep will automatically detect if stdin exists and search stdin for a regex
pattern, e.g. *ls | rg foo*. In some environments, stdin may exist when it
shouldn't. To turn off stdin detection explicitly specify the directory to
search, e.g. *rg foo ./*.
Tip: to disable all smart filtering and make ripgrep behave a bit more like
classical grep, use *rg -uuu*.
starting with '#' are ignored. For more details, see the man page or the
README.
REGEX SYNTAX
@@ -60,10 +52,10 @@ https://docs.rs/regex/*/regex/bytes/index.html#syntax
To a first approximation, ripgrep uses Perl-like regexes without look-around or
backreferences. This makes them very similar to the "extended" (ERE) regular
expressions supported by *egrep*, but with a few additional features like
expressions supported by `egrep`, but with a few additional features like
Unicode character classes.
If you're using ripgrep with the *--pcre2* flag, then please consult
If you're using ripgrep with the --pcre2 flag, then please consult
https://www.pcre.org or the PCRE2 man pages for documentation on the supported
syntax.
@@ -75,109 +67,19 @@ _PATTERN_::
dash, use the -e/--regexp option.
_PATH_::
A file or directory to search. Directories are searched recursively. File
paths specified explicitly on the command line override glob and ignore
rules.
A file or directory to search. Directories are searched recursively. Paths
specified expicitly on the command line override glob and ignore rules.
OPTIONS
-------
Note that for many options, there exist flags to disable them. In some cases,
those flags are not listed in a first class way below. For example, the
*--column* flag (listed below) enables column numbers in ripgrep's output, but
the *--no-column* flag (not listed below) disables them. The reverse can also
exist. For example, the *--no-ignore* flag (listed below) disables ripgrep's
*gitignore* logic, but the *--ignore* flag (not listed below) enables it. These
flags are useful for overriding a ripgrep configuration file on the command
line. Each flag's documentation notes whether an inverted flag exists. In all
cases, the flag specified last takes precedence.
{OPTIONS}
EXIT STATUS
-----------
If ripgrep finds a match, then the exit status of the program is 0. If no match
could be found, then the exit status is 1. If an error occurred, then the exit
status is always 2 unless ripgrep was run with the *--quiet* flag and a match
was found. In summary:
* `0` exit status occurs only when at least one match was found, and if
no error occurred, unless *--quiet* was given.
* `1` exit status occurs only when no match was found and no error occurred.
* `2` exit status occurs when an error occurred. This is true for both
catastrophic errors (e.g., a regex syntax error) and for soft errors (e.g.,
unable to read a file).
AUTOMATIC FILTERING
-------------------
TL;DR - To disable automatic filtering, use 'rg -uuu'.
One of ripgrep's most important features is its automatic smart filtering.
It is the most apparent differentiating feature between ripgrep and other tools
like 'grep'. As such, its behavior may be surprising to users that aren't
expecting it.
ripgrep does four types of filtering automatically:
1. Files and directories that match ignore rules are not searched.
2. Hidden files and directories are not searched.
3. Binary files (files with a 'NUL' byte) are not searched.
4. Symbolic links are not followed.
The first type of filtering is the most sophisticated. ripgrep will attempt to
respect your gitignore rules as faithfully as possible. In particular, this
includes the following:
* Any global rules, e.g., in '$HOME/.config/git/ignore'.
* Any rules in '.gitignore'.
* Any local rules, e.g., in '.git/info/exclude'.
In some cases, ripgrep and git will not always be in sync in terms of which
files are ignored. For example, a file that is ignored via '.gitignore' but is
tracked by git would not be searched by ripgrep even though git tracks it. This
is unlikely to ever be fixed. Instead, you should either make sure your exclude
rules match the files you track precisely, or otherwise use 'git grep' for
search.
Additional ignore rules can be provided outside of a git context:
* Any rules in '.ignore'.
* Any rules in '.rgignore'.
* Any rules in files specified with the '--ignore-file' flag.
The precedence of ignore rules is as follows, with later items overriding
earlier items:
* Files given by '--ignore-file'.
* Global gitignore rules, e.g., from '$HOME/.config/git/ignore'.
* Local rules from '.git/info/exclude'.
* Rules from '.gitignore'.
* Rules from '.ignore'.
* Rules from '.rgignore'.
So for example, if 'foo' were in a '.gitignore' and '!foo' were in an
'.rgignore', then 'foo' would not be ignored since '.rgignore' takes precedence
over '.gitignore'.
Each of the types of filtering can be configured via command line flags:
* There are several flags starting with '--no-ignore' that toggle which,
if any, ignore rules are respected. '--no-ignore' by itself will disable
all of them.
* '--hidden' will force ripgrep to search hidden files and directories.
* '--binary' will force ripgrep to search binary files.
* '-L/--follow' will force ripgrep to follow symlinks.
As a special short hand, the `-u` flag can be specified up to three times. Each
additional time incrementally decreases filtering:
* '-u' is equivalent to '--no-ignore'.
* '-uu' is equivalent to '--no-ignore --hidden'.
* '-uuu' is equivalent to '--no-ignore --hidden --binary'.
In particular, 'rg -uuu' should search the same exact content as 'grep -r'.
could be found, then the exit status is non-zero.
CONFIGURATION FILES
@@ -186,12 +88,12 @@ ripgrep supports reading configuration files that change ripgrep's default
behavior. The format of the configuration file is an "rc" style and is very
simple. It is defined by two rules:
1. Every line is a shell argument, after trimming whitespace.
2. Lines starting with *#* (optionally preceded by any amount of
whitespace) are ignored.
1. Every line is a shell argument, after trimming ASCII whitespace.
2. Lines starting with _#_ (optionally preceded by any amount of
ASCII whitespace) are ignored.
ripgrep will look for a single configuration file if and only if the
*RIPGREP_CONFIG_PATH* environment variable is set and is non-empty.
_RIPGREP_CONFIG_PATH_ environment variable is set and is non-empty.
ripgrep will parse shell arguments from this file on startup and will
behave as if the arguments in this file were prepended to any explicit
arguments given to ripgrep on the command line.
@@ -219,16 +121,16 @@ would behave identically to the following command
same with using globs
--glob=!.git
--glob=!git/*
or
--glob
!.git
!git/*
would behave identically to the following command
rg --glob '!.git' foo
rg --glob '!git/*' foo
ripgrep also provides a flag, *--no-config*, that when present will suppress
any and all support for configuration. This includes any future support
@@ -253,35 +155,20 @@ SHELL COMPLETION
Shell completion files are included in the release tarball for Bash, Fish, Zsh
and PowerShell.
For *bash*, move *rg.bash* to *$XDG_CONFIG_HOME/bash_completion*
or */etc/bash_completion.d/*.
For *bash*, move `rg.bash` to `$XDG_CONFIG_HOME/bash_completion`
or `/etc/bash_completion.d/`.
For *fish*, move *rg.fish* to *$HOME/.config/fish/completions*.
For *fish*, move `rg.fish` to `$HOME/.config/fish/completions`.
For *zsh*, move *_rg* to one of your *$fpath* directories.
For *zsh*, move `_rg` to one of your `$fpath` directories.
CAVEATS
-------
ripgrep may abort unexpectedly when using default settings if it searches a
file that is simultaneously truncated. This behavior can be avoided by passing
the *--no-mmap* flag which will forcefully disable the use of memory maps in
all cases.
ripgrep may use a large amount of memory depending on a few factors. Firstly,
if ripgrep uses parallelism for search (the default), then the entire output
for each individual file is buffered into memory in order to prevent
interleaving matches in the output. To avoid this, you can disable parallelism
with the *-j1* flag. Secondly, ripgrep always needs to have at least a single
line in memory in order to execute a search. A file with a very long line can
thus cause ripgrep to use a lot of memory. Generally, this only occurs when
searching binary data with the *-a* flag enabled. (When the *-a* flag isn't
enabled, ripgrep will replace all NUL bytes with line terminators, which
typically prevents exorbitant memory usage.) Thirdly, when ripgrep searches
a large file using a memory map, the process will report its resident memory
usage as the size of the file. However, this does not mean ripgrep actually
needed to use that much memory; the operating system will generally handle this
for you.
the --no-mmap flag which will forcefully disable the use of memory maps in all
cases.
VERSION
@@ -293,11 +180,7 @@ HOMEPAGE
--------
https://github.com/BurntSushi/ripgrep
Please report bugs and feature requests in the issue tracker. Please do your
best to provide a reproducible test case for bugs. This should include the
corpus being searched, the *rg* command, the actual output and the expected
output. Please also include the output of running the same *rg* command but
with the *--debug* flag.
Please report bugs and feature requests in the issue tracker.
AUTHORS

View File

@@ -1,6 +1,6 @@
[package]
name = "globset"
version = "0.4.5" #:version
version = "0.4.2" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Cross platform single glob and glob set matching. Glob set matching is the
@@ -8,8 +8,8 @@ process of matching one or more glob patterns against a single candidate path
simultaneously, and returning all of the globs that matched.
"""
documentation = "https://docs.rs/globset"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/globset"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/globset"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/globset"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/globset"
readme = "README.md"
keywords = ["regex", "glob", "multiple", "set", "pattern"]
license = "Unlicense/MIT"
@@ -19,18 +19,14 @@ name = "globset"
bench = false
[dependencies]
aho-corasick = "0.7.3"
bstr = { version = "0.2.0", default-features = false, features = ["std"] }
aho-corasick = "0.6.8"
fnv = "1.0.6"
log = "0.4.5"
regex = "1.1.5"
serde = { version = "1.0.104", optional = true }
memchr = "2.0.2"
regex = "1.0.5"
[dev-dependencies]
glob = "0.3.0"
lazy_static = "1"
serde_json = "1.0.45"
glob = "0.2.11"
[features]
simd-accel = []
serde1 = ["serde"]

View File

@@ -29,10 +29,6 @@ and this to your crate root:
extern crate globset;
```
### Features
* `serde1`: Enables implementing Serde traits on the `Glob` type.
### Example: one glob
This example shows how to match a single glob against a single file path.

View File

@@ -6,9 +6,14 @@ tool itself, see the benchsuite directory.
extern crate glob;
extern crate globset;
#[macro_use]
extern crate lazy_static;
extern crate regex;
extern crate test;
use std::ffi::OsStr;
use std::path::Path;
use globset::{Candidate, Glob, GlobMatcher, GlobSet, GlobSetBuilder};
const EXT: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";

View File

@@ -2,13 +2,13 @@ use std::fmt;
use std::hash;
use std::iter;
use std::ops::{Deref, DerefMut};
use std::path::{is_separator, Path};
use std::path::{Path, is_separator};
use std::str;
use regex;
use regex::bytes::Regex;
use {new_regex, Candidate, Error, ErrorKind};
use {Candidate, Error, ErrorKind, new_regex};
/// Describes a matching strategy for a particular pattern.
///
@@ -85,16 +85,16 @@ pub struct Glob {
}
impl PartialEq for Glob {
fn eq(&self, other: &Glob) -> bool {
self.glob == other.glob && self.opts == other.opts
}
fn eq(&self, other: &Glob) -> bool {
self.glob == other.glob && self.opts == other.opts
}
}
impl hash::Hash for Glob {
fn hash<H: hash::Hasher>(&self, state: &mut H) {
self.glob.hash(state);
self.opts.hash(state);
}
fn hash<H: hash::Hasher>(&self, state: &mut H) {
self.glob.hash(state);
self.opts.hash(state);
}
}
impl fmt::Display for Glob {
@@ -103,14 +103,6 @@ impl fmt::Display for Glob {
}
}
impl str::FromStr for Glob {
type Err = Error;
fn from_str(glob: &str) -> Result<Self, Self::Err> {
Self::new(glob)
}
}
/// A matcher for a single pattern.
#[derive(Clone, Debug)]
pub struct GlobMatcher {
@@ -130,11 +122,6 @@ impl GlobMatcher {
pub fn is_match_candidate(&self, path: &Candidate) -> bool {
self.re.is_match(&path.path)
}
/// Returns the `Glob` used to compile this matcher.
pub fn glob(&self) -> &Glob {
&self.pat
}
}
/// A strategic matcher for a single pattern.
@@ -227,15 +214,11 @@ struct Tokens(Vec<Token>);
impl Deref for Tokens {
type Target = Vec<Token>;
fn deref(&self) -> &Vec<Token> {
&self.0
}
fn deref(&self) -> &Vec<Token> { &self.0 }
}
impl DerefMut for Tokens {
fn deref_mut(&mut self) -> &mut Vec<Token> {
&mut self.0
}
fn deref_mut(&mut self) -> &mut Vec<Token> { &mut self.0 }
}
#[derive(Clone, Debug, Eq, PartialEq)]
@@ -246,7 +229,10 @@ enum Token {
RecursivePrefix,
RecursiveSuffix,
RecursiveZeroOrMore,
Class { negated: bool, ranges: Vec<(char, char)> },
Class {
negated: bool,
ranges: Vec<(char, char)>,
},
Alternates(Vec<Tokens>),
}
@@ -258,9 +244,12 @@ impl Glob {
/// Returns a matcher for this pattern.
pub fn compile_matcher(&self) -> GlobMatcher {
let re =
new_regex(&self.re).expect("regex compilation shouldn't fail");
GlobMatcher { pat: self.clone(), re: re }
let re = new_regex(&self.re)
.expect("regex compilation shouldn't fail");
GlobMatcher {
pat: self.clone(),
re: re,
}
}
/// Returns a strategic matcher.
@@ -271,9 +260,13 @@ impl Glob {
#[cfg(test)]
fn compile_strategic_matcher(&self) -> GlobStrategic {
let strategy = MatchStrategy::new(self);
let re =
new_regex(&self.re).expect("regex compilation shouldn't fail");
GlobStrategic { strategy: strategy, pat: self.clone(), re: re }
let re = new_regex(&self.re)
.expect("regex compilation shouldn't fail");
GlobStrategic {
strategy: strategy,
pat: self.clone(),
re: re,
}
}
/// Returns the original glob pattern used to build this pattern.
@@ -531,7 +524,7 @@ impl Glob {
| Token::RecursiveZeroOrMore => {
return None;
}
Token::Class { .. } | Token::Alternates(..) => {
Token::Class{..} | Token::Alternates(..) => {
// We *could* be a little smarter here, but either one
// of these is going to prevent our literal optimizations
// anyway, so give up.
@@ -568,7 +561,10 @@ impl<'a> GlobBuilder<'a> {
///
/// The pattern is not compiled until `build` is called.
pub fn new(glob: &'a str) -> GlobBuilder<'a> {
GlobBuilder { glob: glob, opts: GlobOptions::default() }
GlobBuilder {
glob: glob,
opts: GlobOptions::default(),
}
}
/// Parses and builds the pattern.
@@ -841,63 +837,40 @@ impl<'a> Parser<'a> {
fn parse_star(&mut self) -> Result<(), Error> {
let prev = self.prev;
if self.peek() != Some('*') {
if self.chars.peek() != Some(&'*') {
self.push_token(Token::ZeroOrMore)?;
return Ok(());
}
assert!(self.bump() == Some('*'));
if !self.have_tokens()? {
if !self.peek().map_or(true, is_separator) {
self.push_token(Token::ZeroOrMore)?;
self.push_token(Token::ZeroOrMore)?;
} else {
self.push_token(Token::RecursivePrefix)?;
assert!(self.bump().map_or(true, is_separator));
self.push_token(Token::RecursivePrefix)?;
let next = self.bump();
if !next.map(is_separator).unwrap_or(true) {
return Err(self.error(ErrorKind::InvalidRecursive));
}
return Ok(());
}
self.pop_token()?;
if !prev.map(is_separator).unwrap_or(false) {
if self.stack.len() <= 1
|| (prev != Some(',') && prev != Some('{'))
{
self.push_token(Token::ZeroOrMore)?;
self.push_token(Token::ZeroOrMore)?;
return Ok(());
|| (prev != Some(',') && prev != Some('{')) {
return Err(self.error(ErrorKind::InvalidRecursive));
}
}
let is_suffix = match self.peek() {
match self.chars.peek() {
None => {
assert!(self.bump().is_none());
true
self.push_token(Token::RecursiveSuffix)
}
Some(',') | Some('}') if self.stack.len() >= 2 => true,
Some(c) if is_separator(c) => {
Some(&',') | Some(&'}') if self.stack.len() >= 2 => {
self.push_token(Token::RecursiveSuffix)
}
Some(&c) if is_separator(c) => {
assert!(self.bump().map(is_separator).unwrap_or(false));
false
}
_ => {
self.push_token(Token::ZeroOrMore)?;
self.push_token(Token::ZeroOrMore)?;
return Ok(());
}
};
match self.pop_token()? {
Token::RecursivePrefix => {
self.push_token(Token::RecursivePrefix)?;
}
Token::RecursiveSuffix => {
self.push_token(Token::RecursiveSuffix)?;
}
_ => {
if is_suffix {
self.push_token(Token::RecursiveSuffix)?;
} else {
self.push_token(Token::RecursiveZeroOrMore)?;
}
self.push_token(Token::RecursiveZeroOrMore)
}
_ => Err(self.error(ErrorKind::InvalidRecursive)),
}
Ok(())
}
fn parse_class(&mut self) -> Result<(), Error> {
@@ -961,10 +934,7 @@ impl<'a> Parser<'a> {
// invariant: in_range is only set when there is
// already at least one character seen.
add_to_last_range(
&self.glob,
ranges.last_mut().unwrap(),
c,
)?;
&self.glob, ranges.last_mut().unwrap(), c)?;
} else {
ranges.push((c, c));
}
@@ -978,7 +948,10 @@ impl<'a> Parser<'a> {
// it as a literal.
ranges.push(('-', '-'));
}
self.push_token(Token::Class { negated: negated, ranges: ranges })
self.push_token(Token::Class {
negated: negated,
ranges: ranges,
})
}
fn bump(&mut self) -> Option<char> {
@@ -986,10 +959,6 @@ impl<'a> Parser<'a> {
self.cur = self.chars.next();
self.cur
}
fn peek(&mut self) -> Option<char> {
self.chars.peek().map(|&ch| ch)
}
}
#[cfg(test)]
@@ -1007,9 +976,9 @@ fn ends_with(needle: &[u8], haystack: &[u8]) -> bool {
#[cfg(test)]
mod tests {
use super::Token::*;
use {GlobSetBuilder, ErrorKind};
use super::{Glob, GlobBuilder, Token};
use {ErrorKind, GlobSetBuilder};
use super::Token::*;
#[derive(Clone, Copy, Debug, Default)]
struct Options {
@@ -1025,7 +994,7 @@ mod tests {
let pat = Glob::new($pat).unwrap();
assert_eq!($tokens, pat.tokens.0);
}
};
}
}
macro_rules! syntaxerr {
@@ -1035,7 +1004,7 @@ mod tests {
let err = Glob::new($pat).unwrap_err();
assert_eq!(&$err, err.kind());
}
};
}
}
macro_rules! toregex {
@@ -1117,9 +1086,7 @@ mod tests {
};
}
fn s(string: &str) -> String {
string.to_string()
}
fn s(string: &str) -> String { string.to_string() }
fn class(s: char, e: char) -> Token {
Class { negated: false, ranges: vec![(s, e)] }
@@ -1143,20 +1110,16 @@ mod tests {
syntax!(any2, "a?b", vec![Literal('a'), Any, Literal('b')]);
syntax!(seq1, "*", vec![ZeroOrMore]);
syntax!(seq2, "a*b", vec![Literal('a'), ZeroOrMore, Literal('b')]);
syntax!(
seq3,
"*a*b*",
vec![ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,]
);
syntax!(seq3, "*a*b*", vec![
ZeroOrMore, Literal('a'), ZeroOrMore, Literal('b'), ZeroOrMore,
]);
syntax!(rseq1, "**", vec![RecursivePrefix]);
syntax!(rseq2, "**/", vec![RecursivePrefix]);
syntax!(rseq3, "/**", vec![RecursiveSuffix]);
syntax!(rseq4, "/**/", vec![RecursiveZeroOrMore]);
syntax!(
rseq5,
"a/**/b",
vec![Literal('a'), RecursiveZeroOrMore, Literal('b'),]
);
syntax!(rseq5, "a/**/b", vec![
Literal('a'), RecursiveZeroOrMore, Literal('b'),
]);
syntax!(cls1, "[a]", vec![class('a', 'a')]);
syntax!(cls2, "[!a]", vec![classn('a', 'a')]);
syntax!(cls3, "[a-z]", vec![class('a', 'z')]);
@@ -1168,11 +1131,9 @@ mod tests {
syntax!(cls9, "[a-]", vec![rclass(&[('a', 'a'), ('-', '-')])]);
syntax!(cls10, "[-a-z]", vec![rclass(&[('-', '-'), ('a', 'z')])]);
syntax!(cls11, "[a-z-]", vec![rclass(&[('a', 'z'), ('-', '-')])]);
syntax!(
cls12,
"[-a-z-]",
vec![rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),]
);
syntax!(cls12, "[-a-z-]", vec![
rclass(&[('-', '-'), ('a', 'z'), ('-', '-')]),
]);
syntax!(cls13, "[]-z]", vec![class(']', 'z')]);
syntax!(cls14, "[--z]", vec![class('-', 'z')]);
syntax!(cls15, "[ --]", vec![class(' ', '-')]);
@@ -1183,6 +1144,13 @@ mod tests {
syntax!(cls20, "[^a]", vec![classn('a', 'a')]);
syntax!(cls21, "[^a-z]", vec![classn('a', 'z')]);
syntaxerr!(err_rseq1, "a**", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq2, "**a", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq3, "a**b", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq4, "***", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq5, "/a**", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq6, "/**a", ErrorKind::InvalidRecursive);
syntaxerr!(err_rseq7, "/a**b", ErrorKind::InvalidRecursive);
syntaxerr!(err_unclosed1, "[", ErrorKind::UnclosedClass);
syntaxerr!(err_unclosed2, "[]", ErrorKind::UnclosedClass);
syntaxerr!(err_unclosed3, "[!", ErrorKind::UnclosedClass);
@@ -1190,14 +1158,26 @@ mod tests {
syntaxerr!(err_range1, "[z-a]", ErrorKind::InvalidRange('z', 'a'));
syntaxerr!(err_range2, "[z--]", ErrorKind::InvalidRange('z', '-'));
const CASEI: Options =
Options { casei: Some(true), litsep: None, bsesc: None };
const SLASHLIT: Options =
Options { casei: None, litsep: Some(true), bsesc: None };
const NOBSESC: Options =
Options { casei: None, litsep: None, bsesc: Some(false) };
const BSESC: Options =
Options { casei: None, litsep: None, bsesc: Some(true) };
const CASEI: Options = Options {
casei: Some(true),
litsep: None,
bsesc: None,
};
const SLASHLIT: Options = Options {
casei: None,
litsep: Some(true),
bsesc: None,
};
const NOBSESC: Options = Options {
casei: None,
litsep: None,
bsesc: Some(false),
};
const BSESC: Options = Options {
casei: None,
litsep: None,
bsesc: Some(true),
};
toregex!(re_casei, "a", "(?i)^a$", &CASEI);
@@ -1214,30 +1194,8 @@ mod tests {
toregex!(re8, "[*]", r"^[\*]$");
toregex!(re9, "[+]", r"^[\+]$");
toregex!(re10, "+", r"^\+$");
toregex!(re11, "", r"^\xe2\x98\x83$");
toregex!(re12, "**", r"^.*$");
toregex!(re13, "**/", r"^.*$");
toregex!(re14, "**/*", r"^(?:/?|.*/).*$");
toregex!(re15, "**/**", r"^.*$");
toregex!(re16, "**/**/*", r"^(?:/?|.*/).*$");
toregex!(re17, "**/**/**", r"^.*$");
toregex!(re18, "**/**/**/*", r"^(?:/?|.*/).*$");
toregex!(re19, "a/**", r"^a(?:/?|/.*)$");
toregex!(re20, "a/**/**", r"^a(?:/?|/.*)$");
toregex!(re21, "a/**/**/**", r"^a(?:/?|/.*)$");
toregex!(re22, "a/**/b", r"^a(?:/|/.*/)b$");
toregex!(re23, "a/**/**/b", r"^a(?:/|/.*/)b$");
toregex!(re24, "a/**/**/**/b", r"^a(?:/|/.*/)b$");
toregex!(re25, "**/b", r"^(?:/?|.*/)b$");
toregex!(re26, "**/**/b", r"^(?:/?|.*/)b$");
toregex!(re27, "**/**/**/b", r"^(?:/?|.*/)b$");
toregex!(re28, "a**", r"^a.*.*$");
toregex!(re29, "**a", r"^.*.*a$");
toregex!(re30, "a**b", r"^a.*.*b$");
toregex!(re31, "***", r"^.*.*.*$");
toregex!(re32, "/a**", r"^/a.*.*$");
toregex!(re33, "/**a", r"^/.*.*a$");
toregex!(re34, "/a**b", r"^/a.*.*b$");
toregex!(re11, "**", r"^.*$");
toregex!(re12, "", r"^\xe2\x98\x83$");
matches!(match1, "a", "a");
matches!(match2, "a*b", "a_b");
@@ -1295,11 +1253,8 @@ mod tests {
matches!(matchpat4, "*hello.txt", "some\\path\\to\\hello.txt");
matches!(matchpat5, "*hello.txt", "/an/absolute/path/to/hello.txt");
matches!(matchpat6, "*some/path/to/hello.txt", "some/path/to/hello.txt");
matches!(
matchpat7,
"*some/path/to/hello.txt",
"a/bigger/some/path/to/hello.txt"
);
matches!(matchpat7, "*some/path/to/hello.txt",
"a/bigger/some/path/to/hello.txt");
matches!(matchescape, "_[[]_[]]_[?]_[*]_!_", "_[_]_?_*_!_");
@@ -1362,44 +1317,28 @@ mod tests {
nmatches!(matchnot15, "[!-]", "-");
nmatches!(matchnot16, "*hello.txt", "hello.txt-and-then-some");
nmatches!(matchnot17, "*hello.txt", "goodbye.txt");
nmatches!(
matchnot18,
"*some/path/to/hello.txt",
"some/path/to/hello.txt-and-then-some"
);
nmatches!(
matchnot19,
"*some/path/to/hello.txt",
"some/other/path/to/hello.txt"
);
nmatches!(matchnot18, "*some/path/to/hello.txt",
"some/path/to/hello.txt-and-then-some");
nmatches!(matchnot19, "*some/path/to/hello.txt",
"some/other/path/to/hello.txt");
nmatches!(matchnot20, "a", "foo/a");
nmatches!(matchnot21, "./foo", "foo");
nmatches!(matchnot22, "**/foo", "foofoo");
nmatches!(matchnot23, "**/foo/bar", "foofoo/bar");
nmatches!(matchnot24, "/*.c", "mozilla-sha1/sha1.c");
nmatches!(matchnot25, "*.c", "mozilla-sha1/sha1.c", SLASHLIT);
nmatches!(
matchnot26,
"**/m4/ltoptions.m4",
"csharp/src/packages/repositories.config",
SLASHLIT
);
nmatches!(matchnot26, "**/m4/ltoptions.m4",
"csharp/src/packages/repositories.config", SLASHLIT);
nmatches!(matchnot27, "a[^0-9]b", "a0b");
nmatches!(matchnot28, "a[^0-9]b", "a9b");
nmatches!(matchnot29, "[^-]", "-");
nmatches!(matchnot30, "some/*/needle.txt", "some/needle.txt");
nmatches!(
matchrec31,
"some/*/needle.txt",
"some/one/two/needle.txt",
SLASHLIT
);
"some/*/needle.txt", "some/one/two/needle.txt", SLASHLIT);
nmatches!(
matchrec32,
"some/*/needle.txt",
"some/one/two/three/needle.txt",
SLASHLIT
);
"some/*/needle.txt", "some/one/two/three/needle.txt", SLASHLIT);
macro_rules! extract {
($which:ident, $name:ident, $pat:expr, $expect:expr) => {
@@ -1461,27 +1400,19 @@ mod tests {
literal!(extract_lit7, "foo/bar", Some(s("foo/bar")));
literal!(extract_lit8, "**/foo/bar", None);
basetokens!(
extract_basetoks1,
"**/foo",
Some(&*vec![Literal('f'), Literal('o'), Literal('o'),])
);
basetokens!(extract_basetoks1, "**/foo", Some(&*vec![
Literal('f'), Literal('o'), Literal('o'),
]));
basetokens!(extract_basetoks2, "**/foo", None, CASEI);
basetokens!(
extract_basetoks3,
"**/foo",
Some(&*vec![Literal('f'), Literal('o'), Literal('o'),]),
SLASHLIT
);
basetokens!(extract_basetoks3, "**/foo", Some(&*vec![
Literal('f'), Literal('o'), Literal('o'),
]), SLASHLIT);
basetokens!(extract_basetoks4, "*foo", None, SLASHLIT);
basetokens!(extract_basetoks5, "*foo", None);
basetokens!(extract_basetoks6, "**/fo*o", None);
basetokens!(
extract_basetoks7,
"**/fo*o",
Some(&*vec![Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),]),
SLASHLIT
);
basetokens!(extract_basetoks7, "**/fo*o", Some(&*vec![
Literal('f'), Literal('o'), ZeroOrMore, Literal('o'),
]), SLASHLIT);
ext!(extract_ext1, "**/*.rs", Some(s(".rs")));
ext!(extract_ext2, "**/*.rs.bak", None);

View File

@@ -104,37 +104,33 @@ or to enable case insensitive matching.
#![deny(missing_docs)]
extern crate aho_corasick;
extern crate bstr;
extern crate fnv;
#[macro_use]
extern crate log;
extern crate memchr;
extern crate regex;
#[cfg(feature = "serde1")]
extern crate serde;
use std::borrow::Cow;
use std::collections::{BTreeMap, HashMap};
use std::error::Error as StdError;
use std::ffi::OsStr;
use std::fmt;
use std::hash;
use std::path::Path;
use std::str;
use aho_corasick::AhoCorasick;
use bstr::{ByteSlice, ByteVec, B};
use aho_corasick::{Automaton, AcAutomaton, FullAcAutomaton};
use regex::bytes::{Regex, RegexBuilder, RegexSet};
use pathutil::{
file_name, file_name_ext, normalize_path, os_str_bytes, path_bytes,
};
use glob::MatchStrategy;
pub use glob::{Glob, GlobBuilder, GlobMatcher};
use pathutil::{file_name, file_name_ext, normalize_path};
mod glob;
mod pathutil;
#[cfg(feature = "serde1")]
mod serde_impl;
/// Represents an error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Error {
@@ -147,13 +143,8 @@ pub struct Error {
/// The kind of error that can occur when parsing a glob pattern.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum ErrorKind {
/// **DEPRECATED**.
///
/// This error used to occur for consistency with git's glob specification,
/// but the specification now accepts all uses of `**`. When `**` does not
/// appear adjacent to a path separator or at the beginning/end of a glob,
/// it is now treated as two consecutive `*` patterns. As such, this error
/// is no longer used.
/// Occurs when a use of `**` is invalid. Namely, `**` can only appear
/// adjacent to a path separator, or the beginning/end of a glob.
InvalidRecursive,
/// Occurs when a character class (e.g., `[abc]`) is not closed.
UnclosedClass,
@@ -208,7 +199,9 @@ impl ErrorKind {
ErrorKind::UnclosedClass => {
"unclosed character class; missing ']'"
}
ErrorKind::InvalidRange(_, _) => "invalid character range",
ErrorKind::InvalidRange(_, _) => {
"invalid character range"
}
ErrorKind::UnopenedAlternates => {
"unopened alternate group; missing '{' \
(maybe escape '}' with '[}]'?)"
@@ -220,7 +213,9 @@ impl ErrorKind {
ErrorKind::NestedAlternates => {
"nested alternate groups are not allowed"
}
ErrorKind::DanglingEscape => "dangling '\\'",
ErrorKind::DanglingEscape => {
"dangling '\\'"
}
ErrorKind::Regex(ref err) => err,
ErrorKind::__Nonexhaustive => unreachable!(),
}
@@ -247,7 +242,9 @@ impl fmt::Display for ErrorKind {
| ErrorKind::UnclosedAlternates
| ErrorKind::NestedAlternates
| ErrorKind::DanglingEscape
| ErrorKind::Regex(_) => write!(f, "{}", self.description()),
| ErrorKind::Regex(_) => {
write!(f, "{}", self.description())
}
ErrorKind::InvalidRange(s, e) => {
write!(f, "invalid range; '{}' > '{}'", s, e)
}
@@ -262,20 +259,21 @@ fn new_regex(pat: &str) -> Result<Regex, Error> {
.size_limit(10 * (1 << 20))
.dfa_size_limit(10 * (1 << 20))
.build()
.map_err(|err| Error {
glob: Some(pat.to_string()),
kind: ErrorKind::Regex(err.to_string()),
.map_err(|err| {
Error {
glob: Some(pat.to_string()),
kind: ErrorKind::Regex(err.to_string()),
}
})
}
fn new_regex_set<I, S>(pats: I) -> Result<RegexSet, Error>
where
S: AsRef<str>,
I: IntoIterator<Item = S>,
{
RegexSet::new(pats).map_err(|err| Error {
glob: None,
kind: ErrorKind::Regex(err.to_string()),
where S: AsRef<str>, I: IntoIterator<Item=S> {
RegexSet::new(pats).map_err(|err| {
Error {
glob: None,
kind: ErrorKind::Regex(err.to_string()),
}
})
}
@@ -291,19 +289,19 @@ pub struct GlobSet {
impl GlobSet {
/// Create an empty `GlobSet`. An empty set matches nothing.
#[inline]
pub fn empty() -> GlobSet {
GlobSet { len: 0, strats: vec![] }
GlobSet {
len: 0,
strats: vec![],
}
}
/// Returns true if this set is empty, and therefore matches nothing.
#[inline]
pub fn is_empty(&self) -> bool {
self.len == 0
}
/// Returns the number of globs in this set.
#[inline]
pub fn len(&self) -> usize {
self.len
}
@@ -428,17 +426,11 @@ impl GlobSet {
}
}
}
debug!(
"built glob set; {} literals, {} basenames, {} extensions, \
debug!("built glob set; {} literals, {} basenames, {} extensions, \
{} prefixes, {} suffixes, {} required extensions, {} regexes",
lits.0.len(),
base_lits.0.len(),
exts.0.len(),
prefixes.literals.len(),
suffixes.literals.len(),
required_exts.0.len(),
regexes.literals.len()
);
lits.0.len(), base_lits.0.len(), exts.0.len(),
prefixes.literals.len(), suffixes.literals.len(),
required_exts.0.len(), regexes.literals.len());
Ok(GlobSet {
len: pats.len(),
strats: vec![
@@ -448,8 +440,7 @@ impl GlobSet {
GlobSetMatchStrategy::Suffix(suffixes.suffix()),
GlobSetMatchStrategy::Prefix(prefixes.prefix()),
GlobSetMatchStrategy::RequiredExtension(
required_exts.build()?,
),
required_exts.build()?),
GlobSetMatchStrategy::Regex(regexes.regex_set()?),
],
})
@@ -501,10 +492,13 @@ pub struct Candidate<'a> {
impl<'a> Candidate<'a> {
/// Create a new candidate for matching from the given path.
pub fn new<P: AsRef<Path> + ?Sized>(path: &'a P) -> Candidate<'a> {
let path = normalize_path(Vec::from_path_lossy(path.as_ref()));
let basename = file_name(&path).unwrap_or(Cow::Borrowed(B("")));
let ext = file_name_ext(&basename).unwrap_or(Cow::Borrowed(B("")));
Candidate { path: path, basename: basename, ext: ext }
let path = path.as_ref();
let basename = file_name(path).unwrap_or(OsStr::new(""));
Candidate {
path: normalize_path(path_bytes(path)),
basename: os_str_bytes(basename),
ext: file_name_ext(basename).unwrap_or(Cow::Borrowed(b"")),
}
}
fn path_prefix(&self, max: usize) -> &[u8] {
@@ -576,12 +570,12 @@ impl LiteralStrategy {
}
fn is_match(&self, candidate: &Candidate) -> bool {
self.0.contains_key(candidate.path.as_bytes())
self.0.contains_key(&*candidate.path)
}
#[inline(never)]
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
if let Some(hits) = self.0.get(candidate.path.as_bytes()) {
if let Some(hits) = self.0.get(&*candidate.path) {
matches.extend(hits);
}
}
@@ -603,7 +597,7 @@ impl BasenameLiteralStrategy {
if candidate.basename.is_empty() {
return false;
}
self.0.contains_key(candidate.basename.as_bytes())
self.0.contains_key(&*candidate.basename)
}
#[inline(never)]
@@ -611,7 +605,7 @@ impl BasenameLiteralStrategy {
if candidate.basename.is_empty() {
return;
}
if let Some(hits) = self.0.get(candidate.basename.as_bytes()) {
if let Some(hits) = self.0.get(&*candidate.basename) {
matches.extend(hits);
}
}
@@ -633,7 +627,7 @@ impl ExtensionStrategy {
if candidate.ext.is_empty() {
return false;
}
self.0.contains_key(candidate.ext.as_bytes())
self.0.contains_key(&*candidate.ext)
}
#[inline(never)]
@@ -641,7 +635,7 @@ impl ExtensionStrategy {
if candidate.ext.is_empty() {
return;
}
if let Some(hits) = self.0.get(candidate.ext.as_bytes()) {
if let Some(hits) = self.0.get(&*candidate.ext) {
matches.extend(hits);
}
}
@@ -649,7 +643,7 @@ impl ExtensionStrategy {
#[derive(Clone, Debug)]
struct PrefixStrategy {
matcher: AhoCorasick,
matcher: FullAcAutomaton<Vec<u8>>,
map: Vec<usize>,
longest: usize,
}
@@ -657,8 +651,8 @@ struct PrefixStrategy {
impl PrefixStrategy {
fn is_match(&self, candidate: &Candidate) -> bool {
let path = candidate.path_prefix(self.longest);
for m in self.matcher.find_overlapping_iter(path) {
if m.start() == 0 {
for m in self.matcher.find_overlapping(path) {
if m.start == 0 {
return true;
}
}
@@ -667,9 +661,9 @@ impl PrefixStrategy {
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
let path = candidate.path_prefix(self.longest);
for m in self.matcher.find_overlapping_iter(path) {
if m.start() == 0 {
matches.push(self.map[m.pattern()]);
for m in self.matcher.find_overlapping(path) {
if m.start == 0 {
matches.push(self.map[m.pati]);
}
}
}
@@ -677,7 +671,7 @@ impl PrefixStrategy {
#[derive(Clone, Debug)]
struct SuffixStrategy {
matcher: AhoCorasick,
matcher: FullAcAutomaton<Vec<u8>>,
map: Vec<usize>,
longest: usize,
}
@@ -685,8 +679,8 @@ struct SuffixStrategy {
impl SuffixStrategy {
fn is_match(&self, candidate: &Candidate) -> bool {
let path = candidate.path_suffix(self.longest);
for m in self.matcher.find_overlapping_iter(path) {
if m.end() == path.len() {
for m in self.matcher.find_overlapping(path) {
if m.end == path.len() {
return true;
}
}
@@ -695,9 +689,9 @@ impl SuffixStrategy {
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
let path = candidate.path_suffix(self.longest);
for m in self.matcher.find_overlapping_iter(path) {
if m.end() == path.len() {
matches.push(self.map[m.pattern()]);
for m in self.matcher.find_overlapping(path) {
if m.end == path.len() {
matches.push(self.map[m.pati]);
}
}
}
@@ -711,11 +705,11 @@ impl RequiredExtensionStrategy {
if candidate.ext.is_empty() {
return false;
}
match self.0.get(candidate.ext.as_bytes()) {
match self.0.get(&*candidate.ext) {
None => false,
Some(regexes) => {
for &(_, ref re) in regexes {
if re.is_match(candidate.path.as_bytes()) {
if re.is_match(&*candidate.path) {
return true;
}
}
@@ -729,9 +723,9 @@ impl RequiredExtensionStrategy {
if candidate.ext.is_empty() {
return;
}
if let Some(regexes) = self.0.get(candidate.ext.as_bytes()) {
if let Some(regexes) = self.0.get(&*candidate.ext) {
for &(global_index, ref re) in regexes {
if re.is_match(candidate.path.as_bytes()) {
if re.is_match(&*candidate.path) {
matches.push(global_index);
}
}
@@ -747,11 +741,11 @@ struct RegexSetStrategy {
impl RegexSetStrategy {
fn is_match(&self, candidate: &Candidate) -> bool {
self.matcher.is_match(candidate.path.as_bytes())
self.matcher.is_match(&*candidate.path)
}
fn matches_into(&self, candidate: &Candidate, matches: &mut Vec<usize>) {
for i in self.matcher.matches(candidate.path.as_bytes()) {
for i in self.matcher.matches(&*candidate.path) {
matches.push(self.map[i]);
}
}
@@ -766,7 +760,11 @@ struct MultiStrategyBuilder {
impl MultiStrategyBuilder {
fn new() -> MultiStrategyBuilder {
MultiStrategyBuilder { literals: vec![], map: vec![], longest: 0 }
MultiStrategyBuilder {
literals: vec![],
map: vec![],
longest: 0,
}
}
fn add(&mut self, global_index: usize, literal: String) {
@@ -778,16 +776,18 @@ impl MultiStrategyBuilder {
}
fn prefix(self) -> PrefixStrategy {
let it = self.literals.into_iter().map(|s| s.into_bytes());
PrefixStrategy {
matcher: AhoCorasick::new_auto_configured(&self.literals),
matcher: AcAutomaton::new(it).into_full(),
map: self.map,
longest: self.longest,
}
}
fn suffix(self) -> SuffixStrategy {
let it = self.literals.into_iter().map(|s| s.into_bytes());
SuffixStrategy {
matcher: AhoCorasick::new_auto_configured(&self.literals),
matcher: AcAutomaton::new(it).into_full(),
map: self.map,
longest: self.longest,
}

View File

@@ -1,26 +1,41 @@
use std::borrow::Cow;
use bstr::{ByteSlice, ByteVec};
use std::ffi::OsStr;
use std::path::Path;
/// The final component of the path, if it is a normal file.
///
/// If the path terminates in ., .., or consists solely of a root of prefix,
/// file_name will return None.
pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
#[cfg(unix)]
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
path: &'a P,
) -> Option<&'a OsStr> {
use std::os::unix::ffi::OsStrExt;
use memchr::memrchr;
let path = path.as_ref().as_os_str().as_bytes();
if path.is_empty() {
return None;
} else if path.last_byte() == Some(b'.') {
} else if path.len() == 1 && path[0] == b'.' {
return None;
} else if path.last() == Some(&b'.') {
return None;
} else if path.len() >= 2 && &path[path.len() - 2..] == &b".."[..] {
return None;
}
let last_slash = path.rfind_byte(b'/').map(|i| i + 1).unwrap_or(0);
Some(match *path {
Cow::Borrowed(path) => Cow::Borrowed(&path[last_slash..]),
Cow::Owned(ref path) => {
let mut path = path.clone();
path.drain_bytes(..last_slash);
Cow::Owned(path)
}
})
let last_slash = memrchr(b'/', path).map(|i| i + 1).unwrap_or(0);
Some(OsStr::from_bytes(&path[last_slash..]))
}
/// The final component of the path, if it is a normal file.
///
/// If the path terminates in ., .., or consists solely of a root of prefix,
/// file_name will return None.
#[cfg(not(unix))]
pub fn file_name<'a, P: AsRef<Path> + ?Sized>(
path: &'a P,
) -> Option<&'a OsStr> {
path.as_ref().file_name()
}
/// Return a file extension given a path's file name.
@@ -39,24 +54,55 @@ pub fn file_name<'a>(path: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
/// a pattern like `*.rs` is obviously trying to match files with a `rs`
/// extension, but it also matches files like `.rs`, which doesn't have an
/// extension according to std::path::Path::extension.
pub fn file_name_ext<'a>(name: &Cow<'a, [u8]>) -> Option<Cow<'a, [u8]>> {
pub fn file_name_ext(name: &OsStr) -> Option<Cow<[u8]>> {
if name.is_empty() {
return None;
}
let last_dot_at = match name.rfind_byte(b'.') {
None => return None,
Some(i) => i,
let name = os_str_bytes(name);
let last_dot_at = {
let result = name
.iter().enumerate().rev()
.find(|&(_, &b)| b == b'.')
.map(|(i, _)| i);
match result {
None => return None,
Some(i) => i,
}
};
Some(match *name {
Some(match name {
Cow::Borrowed(name) => Cow::Borrowed(&name[last_dot_at..]),
Cow::Owned(ref name) => {
let mut name = name.clone();
name.drain_bytes(..last_dot_at);
Cow::Owned(mut name) => {
name.drain(..last_dot_at);
Cow::Owned(name)
}
})
}
/// Return raw bytes of a path, transcoded to UTF-8 if necessary.
pub fn path_bytes(path: &Path) -> Cow<[u8]> {
os_str_bytes(path.as_os_str())
}
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
#[cfg(unix)]
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
use std::os::unix::ffi::OsStrExt;
Cow::Borrowed(s.as_bytes())
}
/// Return the raw bytes of the given OS string, possibly transcoded to UTF-8.
#[cfg(not(unix))]
pub fn os_str_bytes(s: &OsStr) -> Cow<[u8]> {
// TODO(burntsushi): On Windows, OS strings are WTF-8, which is a superset
// of UTF-8, so even if we could get at the raw bytes, they wouldn't
// be useful. We *must* convert to UTF-8 before doing path matching.
// Unfortunate, but necessary.
match s.to_string_lossy() {
Cow::Owned(s) => Cow::Owned(s.into_bytes()),
Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
}
}
/// Normalizes a path to use `/` as a separator everywhere, even on platforms
/// that recognize other characters as separators.
#[cfg(unix)]
@@ -83,8 +129,7 @@ pub fn normalize_path(mut path: Cow<[u8]>) -> Cow<[u8]> {
#[cfg(test)]
mod tests {
use std::borrow::Cow;
use bstr::{ByteVec, B};
use std::ffi::OsStr;
use super::{file_name_ext, normalize_path};
@@ -92,9 +137,8 @@ mod tests {
($name:ident, $file_name:expr, $ext:expr) => {
#[test]
fn $name() {
let bs = Vec::from($file_name);
let got = file_name_ext(&Cow::Owned(bs));
assert_eq!($ext.map(|s| Cow::Borrowed(B(s))), got);
let got = file_name_ext(OsStr::new($file_name));
assert_eq!($ext.map(|s| Cow::Borrowed(s.as_bytes())), got);
}
};
}
@@ -109,8 +153,7 @@ mod tests {
($name:ident, $path:expr, $expected:expr) => {
#[test]
fn $name() {
let bs = Vec::from_slice($path);
let got = normalize_path(Cow::Owned(bs));
let got = normalize_path(Cow::Owned($path.to_vec()));
assert_eq!($expected.to_vec(), got.into_owned());
}
};

View File

@@ -1,26 +1,25 @@
[package]
name = "grep-cli"
version = "0.1.4" #:version
version = "0.1.1" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Utilities for search oriented command line applications.
"""
documentation = "https://docs.rs/grep-cli"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/cli"
homepage = "https://github.com/BurntSushi/ripgrep"
repository = "https://github.com/BurntSushi/ripgrep"
readme = "README.md"
keywords = ["regex", "grep", "cli", "utility", "util"]
license = "Unlicense/MIT"
[dependencies]
atty = "0.2.11"
bstr = "0.2.0"
globset = { version = "0.4.5", path = "../globset" }
globset = { version = "0.4.2", path = "../globset" }
lazy_static = "1.1.0"
log = "0.4.5"
regex = "1.1"
same-file = "1.0.4"
termcolor = "1.0.4"
regex = "1.0.5"
same-file = "1.0.3"
termcolor = "1.0.3"
[target.'cfg(windows)'.dependencies.winapi-util]
version = "0.1.1"

View File

@@ -38,7 +38,10 @@ impl Default for DecompressionMatcherBuilder {
impl DecompressionMatcherBuilder {
/// Create a new builder for configuring a decompression matcher.
pub fn new() -> DecompressionMatcherBuilder {
DecompressionMatcherBuilder { commands: vec![], defaults: true }
DecompressionMatcherBuilder {
commands: vec![],
defaults: true,
}
}
/// Build a matcher for determining how to decompress files.
@@ -46,11 +49,12 @@ impl DecompressionMatcherBuilder {
/// If there was a problem compiling the matcher, then an error is
/// returned.
pub fn build(&self) -> Result<DecompressionMatcher, CommandError> {
let defaults = if !self.defaults {
vec![]
} else {
default_decompression_commands()
};
let defaults =
if !self.defaults {
vec![]
} else {
default_decompression_commands()
};
let mut glob_builder = GlobSetBuilder::new();
let mut commands = vec![];
for decomp_cmd in defaults.iter().chain(&self.commands) {
@@ -89,15 +93,17 @@ impl DecompressionMatcherBuilder {
program: P,
args: I,
) -> &mut DecompressionMatcherBuilder
where
P: AsRef<OsStr>,
I: IntoIterator<Item = A>,
A: AsRef<OsStr>,
where P: AsRef<OsStr>,
I: IntoIterator<Item=A>,
A: AsRef<OsStr>,
{
let glob = glob.to_string();
let bin = program.as_ref().to_os_string();
let args =
args.into_iter().map(|a| a.as_ref().to_os_string()).collect();
let args = args
.into_iter()
.map(|a| a.as_ref().to_os_string())
.collect();
self.commands.push(DecompressionCommand { glob, bin, args });
self
}
@@ -346,9 +352,6 @@ fn default_decompression_commands() -> Vec<DecompressionCommand> {
const ARGS_XZ: &[&str] = &["xz", "-d", "-c"];
const ARGS_LZ4: &[&str] = &["lz4", "-d", "-c"];
const ARGS_LZMA: &[&str] = &["xz", "--format=lzma", "-d", "-c"];
const ARGS_BROTLI: &[&str] = &["brotli", "-d", "-c"];
const ARGS_ZSTD: &[&str] = &["zstd", "-q", "-d", "-c"];
const ARGS_UNCOMPRESS: &[&str] = &["uncompress", "-c"];
fn cmd(glob: &str, args: &[&str]) -> DecompressionCommand {
DecompressionCommand {
@@ -364,15 +367,15 @@ fn default_decompression_commands() -> Vec<DecompressionCommand> {
vec![
cmd("*.gz", ARGS_GZIP),
cmd("*.tgz", ARGS_GZIP),
cmd("*.bz2", ARGS_BZIP),
cmd("*.tbz2", ARGS_BZIP),
cmd("*.xz", ARGS_XZ),
cmd("*.txz", ARGS_XZ),
cmd("*.lz4", ARGS_LZ4),
cmd("*.lzma", ARGS_LZMA),
cmd("*.br", ARGS_BROTLI),
cmd("*.zst", ARGS_ZSTD),
cmd("*.zstd", ARGS_ZSTD),
cmd("*.Z", ARGS_UNCOMPRESS),
]
}

View File

@@ -1,8 +1,6 @@
use std::ffi::OsStr;
use std::str;
use bstr::{ByteSlice, ByteVec};
/// A single state in the state machine used by `unescape`.
#[derive(Clone, Copy, Eq, PartialEq)]
enum State {
@@ -37,15 +35,18 @@ enum State {
///
/// assert_eq!(r"foo\nbar\xFFbaz", escape(b"foo\nbar\xFFbaz"));
/// ```
pub fn escape(bytes: &[u8]) -> String {
pub fn escape(mut bytes: &[u8]) -> String {
let mut escaped = String::new();
for (s, e, ch) in bytes.char_indices() {
if ch == '\u{FFFD}' {
for b in bytes[s..e].bytes() {
escape_byte(b, &mut escaped);
while let Some(result) = decode_utf8(bytes) {
match result {
Ok(cp) => {
escape_char(cp, &mut escaped);
bytes = &bytes[cp.len_utf8()..];
}
Err(byte) => {
escape_byte(byte, &mut escaped);
bytes = &bytes[1..];
}
} else {
escape_char(ch, &mut escaped);
}
}
escaped
@@ -55,7 +56,19 @@ pub fn escape(bytes: &[u8]) -> String {
///
/// This is like [`escape`](fn.escape.html), but accepts an OS string.
pub fn escape_os(string: &OsStr) -> String {
escape(Vec::from_os_str_lossy(string).as_bytes())
#[cfg(unix)]
fn imp(string: &OsStr) -> String {
use std::os::unix::ffi::OsStrExt;
escape(string.as_bytes())
}
#[cfg(not(unix))]
fn imp(string: &OsStr) -> String {
escape(string.to_string_lossy().as_bytes())
}
imp(string)
}
/// Unescapes a string.
@@ -95,61 +108,51 @@ pub fn unescape(s: &str) -> Vec<u8> {
let mut state = Literal;
for c in s.chars() {
match state {
Escape => match c {
'\\' => {
bytes.push(b'\\');
state = Literal;
Escape => {
match c {
'\\' => { bytes.push(b'\\'); state = Literal; }
'n' => { bytes.push(b'\n'); state = Literal; }
'r' => { bytes.push(b'\r'); state = Literal; }
't' => { bytes.push(b'\t'); state = Literal; }
'x' => { state = HexFirst; }
c => {
bytes.extend(format!(r"\{}", c).into_bytes());
state = Literal;
}
}
'n' => {
bytes.push(b'\n');
state = Literal;
}
HexFirst => {
match c {
'0'...'9' | 'A'...'F' | 'a'...'f' => {
state = HexSecond(c);
}
c => {
bytes.extend(format!(r"\x{}", c).into_bytes());
state = Literal;
}
}
'r' => {
bytes.push(b'\r');
state = Literal;
}
HexSecond(first) => {
match c {
'0'...'9' | 'A'...'F' | 'a'...'f' => {
let ordinal = format!("{}{}", first, c);
let byte = u8::from_str_radix(&ordinal, 16).unwrap();
bytes.push(byte);
state = Literal;
}
c => {
let original = format!(r"\x{}{}", first, c);
bytes.extend(original.into_bytes());
state = Literal;
}
}
't' => {
bytes.push(b'\t');
state = Literal;
}
Literal => {
match c {
'\\' => { state = Escape; }
c => { bytes.extend(c.to_string().as_bytes()); }
}
'x' => {
state = HexFirst;
}
c => {
bytes.extend(format!(r"\{}", c).into_bytes());
state = Literal;
}
},
HexFirst => match c {
'0'..='9' | 'A'..='F' | 'a'..='f' => {
state = HexSecond(c);
}
c => {
bytes.extend(format!(r"\x{}", c).into_bytes());
state = Literal;
}
},
HexSecond(first) => match c {
'0'..='9' | 'A'..='F' | 'a'..='f' => {
let ordinal = format!("{}{}", first, c);
let byte = u8::from_str_radix(&ordinal, 16).unwrap();
bytes.push(byte);
state = Literal;
}
c => {
let original = format!(r"\x{}{}", first, c);
bytes.extend(original.into_bytes());
state = Literal;
}
},
Literal => match c {
'\\' => {
state = Escape;
}
c => {
bytes.extend(c.to_string().as_bytes());
}
},
}
}
}
match state {
@@ -183,7 +186,7 @@ fn escape_char(cp: char, into: &mut String) {
/// Adds the given byte to the given string, escaping it if necessary.
fn escape_byte(byte: u8, into: &mut String) {
match byte {
0x21..=0x5B | 0x5D..=0x7D => into.push(byte as char),
0x21...0x5B | 0x5D...0x7D => into.push(byte as char),
b'\n' => into.push_str(r"\n"),
b'\r' => into.push_str(r"\r"),
b'\t' => into.push_str(r"\t"),
@@ -192,6 +195,46 @@ fn escape_byte(byte: u8, into: &mut String) {
}
}
/// Decodes the next UTF-8 encoded codepoint from the given byte slice.
///
/// If no valid encoding of a codepoint exists at the beginning of the given
/// byte slice, then the first byte is returned instead.
///
/// This returns `None` if and only if `bytes` is empty.
fn decode_utf8(bytes: &[u8]) -> Option<Result<char, u8>> {
if bytes.is_empty() {
return None;
}
let len = match utf8_len(bytes[0]) {
None => return Some(Err(bytes[0])),
Some(len) if len > bytes.len() => return Some(Err(bytes[0])),
Some(len) => len,
};
match str::from_utf8(&bytes[..len]) {
Ok(s) => Some(Ok(s.chars().next().unwrap())),
Err(_) => Some(Err(bytes[0])),
}
}
/// Given a UTF-8 leading byte, this returns the total number of code units
/// in the following encoded codepoint.
///
/// If the given byte is not a valid UTF-8 leading byte, then this returns
/// `None`.
fn utf8_len(byte: u8) -> Option<usize> {
if byte <= 0x7F {
Some(1)
} else if byte <= 0b110_11111 {
Some(2)
} else if byte <= 0b1110_1111 {
Some(3)
} else if byte <= 0b1111_0111 {
Some(4)
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::{escape, unescape};

View File

@@ -46,9 +46,7 @@ impl ParseSizeError {
}
impl error::Error for ParseSizeError {
fn description(&self) -> &str {
"invalid size"
}
fn description(&self) -> &str { "invalid size" }
}
impl fmt::Display for ParseSizeError {
@@ -56,19 +54,26 @@ impl fmt::Display for ParseSizeError {
use self::ParseSizeErrorKind::*;
match self.kind {
InvalidFormat => write!(
f,
"invalid format for size '{}', which should be a sequence \
InvalidFormat => {
write!(
f,
"invalid format for size '{}', which should be a sequence \
of digits followed by an optional 'K', 'M' or 'G' \
suffix",
self.original
),
InvalidInt(ref err) => write!(
f,
"invalid integer found in size '{}': {}",
self.original, err
),
Overflow => write!(f, "size too big in '{}'", self.original),
self.original
)
}
InvalidInt(ref err) => {
write!(
f,
"invalid integer found in size '{}': {}",
self.original,
err
)
}
Overflow => {
write!(f, "size too big in '{}'", self.original)
}
}
}
}
@@ -99,16 +104,17 @@ pub fn parse_human_readable_size(size: &str) -> Result<u64, ParseSizeError> {
Some(caps) => caps,
None => return Err(ParseSizeError::format(size)),
};
let value: u64 =
caps[1].parse().map_err(|err| ParseSizeError::int(size, err))?;
let value: u64 = caps[1].parse().map_err(|err| {
ParseSizeError::int(size, err)
})?;
let suffix = match caps.get(2) {
None => return Ok(value),
Some(cap) => cap.as_str(),
};
let bytes = match suffix {
"K" => value.checked_mul(1 << 10),
"M" => value.checked_mul(1 << 20),
"G" => value.checked_mul(1 << 30),
"K" => value.checked_mul(1<<10),
"M" => value.checked_mul(1<<20),
"G" => value.checked_mul(1<<30),
// Because if the regex matches this group, it must be [KMG].
_ => unreachable!(),
};
@@ -128,19 +134,19 @@ mod tests {
#[test]
fn suffix_k() {
let x = parse_human_readable_size("123K").unwrap();
assert_eq!(123 * (1 << 10), x);
assert_eq!(123 * (1<<10), x);
}
#[test]
fn suffix_m() {
let x = parse_human_readable_size("123M").unwrap();
assert_eq!(123 * (1 << 20), x);
assert_eq!(123 * (1<<20), x);
}
#[test]
fn suffix_g() {
let x = parse_human_readable_size("123G").unwrap();
assert_eq!(123 * (1 << 30), x);
assert_eq!(123 * (1<<30), x);
}
#[test]

View File

@@ -159,7 +159,6 @@ error message is crafted that typically tells the user how to fix the problem.
#![deny(missing_docs)]
extern crate atty;
extern crate bstr;
extern crate globset;
#[macro_use]
extern crate lazy_static;
@@ -179,18 +178,20 @@ mod process;
mod wtr;
pub use decompress::{
DecompressionMatcher, DecompressionMatcherBuilder, DecompressionReader,
DecompressionReaderBuilder,
DecompressionMatcher, DecompressionMatcherBuilder,
DecompressionReader, DecompressionReaderBuilder,
};
pub use escape::{escape, escape_os, unescape, unescape_os};
pub use human::{parse_human_readable_size, ParseSizeError};
pub use human::{ParseSizeError, parse_human_readable_size};
pub use pattern::{
pattern_from_bytes, pattern_from_os, patterns_from_path,
patterns_from_reader, patterns_from_stdin, InvalidPatternError,
InvalidPatternError,
pattern_from_os, pattern_from_bytes,
patterns_from_path, patterns_from_reader, patterns_from_stdin,
};
pub use process::{CommandError, CommandReader, CommandReaderBuilder};
pub use wtr::{
stdout, stdout_buffered_block, stdout_buffered_line, StandardStream,
StandardStream,
stdout, stdout_buffered_line, stdout_buffered_block,
};
/// Returns true if and only if stdin is believed to be readable.
@@ -203,8 +204,8 @@ pub use wtr::{
pub fn is_readable_stdin() -> bool {
#[cfg(unix)]
fn imp() -> bool {
use same_file::Handle;
use std::os::unix::fs::FileTypeExt;
use same_file::Handle;
let ft = match Handle::stdin().and_then(|h| h.as_file().metadata()) {
Err(_) => return false,

View File

@@ -2,12 +2,10 @@ use std::error;
use std::ffi::OsStr;
use std::fmt;
use std::fs::File;
use std::io;
use std::io::{self, BufRead};
use std::path::Path;
use std::str;
use bstr::io::BufReadExt;
use escape::{escape, escape_os};
/// An error that occurs when a pattern could not be converted to valid UTF-8.
@@ -29,19 +27,18 @@ impl InvalidPatternError {
}
impl error::Error for InvalidPatternError {
fn description(&self) -> &str {
"invalid pattern"
}
fn description(&self) -> &str { "invalid pattern" }
}
impl fmt::Display for InvalidPatternError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"found invalid UTF-8 in pattern at byte offset {}: {} \
(disable Unicode mode and use hex escape sequences to match \
arbitrary bytes in a pattern, e.g., '(?-u)\\xFF')",
self.valid_up_to, self.original,
"found invalid UTF-8 in pattern at byte offset {} \
(use hex escape sequences to match arbitrary bytes \
in a pattern, e.g., \\xFF): '{}'",
self.valid_up_to,
self.original,
)
}
}
@@ -64,7 +61,10 @@ pub fn pattern_from_os(pattern: &OsStr) -> Result<&str, InvalidPatternError> {
.to_string_lossy()
.find('\u{FFFD}')
.expect("a Unicode replacement codepoint for invalid UTF-8");
InvalidPatternError { original: escape_os(pattern), valid_up_to }
InvalidPatternError {
original: escape_os(pattern),
valid_up_to: valid_up_to,
}
})
}
@@ -77,9 +77,11 @@ pub fn pattern_from_os(pattern: &OsStr) -> Result<&str, InvalidPatternError> {
pub fn pattern_from_bytes(
pattern: &[u8],
) -> Result<&str, InvalidPatternError> {
str::from_utf8(pattern).map_err(|err| InvalidPatternError {
original: escape(pattern),
valid_up_to: err.valid_up_to(),
str::from_utf8(pattern).map_err(|err| {
InvalidPatternError {
original: escape(pattern),
valid_up_to: err.valid_up_to(),
}
})
}
@@ -115,7 +117,10 @@ pub fn patterns_from_stdin() -> io::Result<Vec<String>> {
let stdin = io::stdin();
let locked = stdin.lock();
patterns_from_reader(locked).map_err(|err| {
io::Error::new(io::ErrorKind::Other, format!("<stdin>:{}", err))
io::Error::new(
io::ErrorKind::Other,
format!("<stdin>:{}", err),
)
})
}
@@ -151,20 +156,28 @@ pub fn patterns_from_stdin() -> io::Result<Vec<String>> {
/// ```
pub fn patterns_from_reader<R: io::Read>(rdr: R) -> io::Result<Vec<String>> {
let mut patterns = vec![];
let mut bufrdr = io::BufReader::new(rdr);
let mut line = vec![];
let mut line_number = 0;
io::BufReader::new(rdr).for_byte_line(|line| {
while {
line.clear();
line_number += 1;
match pattern_from_bytes(line) {
Ok(pattern) => {
patterns.push(pattern.to_string());
Ok(true)
}
Err(err) => Err(io::Error::new(
io::ErrorKind::Other,
format!("{}: {}", line_number, err),
)),
bufrdr.read_until(b'\n', &mut line)? > 0
} {
line.pop().unwrap(); // remove trailing '\n'
if line.last() == Some(&b'\r') {
line.pop().unwrap();
}
})?;
match pattern_from_bytes(&line) {
Ok(pattern) => patterns.push(pattern.to_string()),
Err(err) => {
return Err(io::Error::new(
io::ErrorKind::Other,
format!("{}: {}", line_number, err),
));
}
}
}
Ok(patterns)
}
@@ -182,8 +195,8 @@ mod tests {
#[test]
#[cfg(unix)]
fn os() {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
use std::ffi::OsStr;
let pat = OsStr::from_bytes(b"abc\xFFxyz");
let err = pattern_from_os(pat).unwrap_err();

View File

@@ -33,9 +33,7 @@ impl CommandError {
}
impl error::Error for CommandError {
fn description(&self) -> &str {
"command error"
}
fn description(&self) -> &str { "command error" }
}
impl fmt::Display for CommandError {
@@ -48,12 +46,7 @@ impl fmt::Display for CommandError {
write!(f, "<stderr is empty>")
} else {
let div = iter::repeat('-').take(79).collect::<String>();
write!(
f,
"\n{div}\n{msg}\n{div}",
div = div,
msg = msg.trim()
)
write!(f, "\n{div}\n{msg}\n{div}", div=div, msg=msg.trim())
}
}
}
@@ -108,11 +101,12 @@ impl CommandReaderBuilder {
.stderr(process::Stdio::piped())
.spawn()?;
let stdout = child.stdout.take().unwrap();
let stderr = if self.async_stderr {
StderrReader::async(child.stderr.take().unwrap())
} else {
StderrReader::sync(child.stderr.take().unwrap())
};
let stderr =
if self.async_stderr {
StderrReader::async(child.stderr.take().unwrap())
} else {
StderrReader::sync(child.stderr.take().unwrap())
};
Ok(CommandReader {
child: child,
stdout: stdout,
@@ -232,8 +226,9 @@ enum StderrReader {
impl StderrReader {
/// Create a reader for stderr that reads contents asynchronously.
fn async(mut stderr: process::ChildStderr) -> StderrReader {
let handle =
thread::spawn(move || stderr_to_command_error(&mut stderr));
let handle = thread::spawn(move || {
stderr_to_command_error(&mut stderr)
});
StderrReader::Async(Some(handle))
}
@@ -252,7 +247,9 @@ impl StderrReader {
let handle = handle
.take()
.expect("read_to_end cannot be called more than once");
handle.join().expect("stderr reading thread does not panic")
handle
.join()
.expect("stderr reading thread does not panic")
}
StderrReader::Sync(ref mut stderr) => {
stderr_to_command_error(stderr)

View File

@@ -1,23 +1,23 @@
[package]
name = "grep-matcher"
version = "0.1.4" #:version
version = "0.1.1" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
A trait for regular expressions, with a focus on line oriented search.
"""
documentation = "https://docs.rs/grep-matcher"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/matcher"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/matcher"
homepage = "https://github.com/BurntSushi/ripgrep"
repository = "https://github.com/BurntSushi/ripgrep"
readme = "README.md"
keywords = ["regex", "pattern", "trait"]
license = "Unlicense/MIT"
autotests = false
[dependencies]
memchr = "2.1"
memchr = "2.0.2"
[dev-dependencies]
regex = "1.1"
regex = "1.0.5"
[[test]]
name = "integration"

View File

@@ -19,7 +19,7 @@ pub fn interpolate<A, N>(
dst: &mut Vec<u8>,
) where
A: FnMut(usize, &mut Vec<u8>),
N: FnMut(&str) -> Option<usize>,
N: FnMut(&str) -> Option<usize>
{
while !replacement.is_empty() {
match memchr(b'$', replacement) {
@@ -134,14 +134,14 @@ fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef> {
/// Returns true if and only if the given byte is allowed in a capture name.
fn is_valid_cap_letter(b: &u8) -> bool {
match *b {
b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true,
b'0' ... b'9' | b'a' ... b'z' | b'A' ... b'Z' | b'_' => true,
_ => false,
}
}
#[cfg(test)]
mod tests {
use super::{find_cap_ref, interpolate, CaptureRef};
use super::{CaptureRef, find_cap_ref, interpolate};
macro_rules! find {
($name:ident, $text:expr) => {
@@ -211,7 +211,7 @@ mod tests {
fn $name() {
assert_eq!($expected, interpolate_string($map, $caps, $hay));
}
};
}
}
interp!(

View File

@@ -278,7 +278,7 @@ impl LineTerminator {
}
}
impl Default for LineTerminator {
impl Default for LineTerminator {
#[inline]
fn default() -> LineTerminator {
LineTerminator::byte(b'\n')
@@ -439,8 +439,7 @@ pub trait Captures {
haystack: &[u8],
replacement: &[u8],
dst: &mut Vec<u8>,
) where
F: FnMut(&str) -> Option<usize>,
) where F: FnMut(&str) -> Option<usize>
{
interpolate(
replacement,
@@ -464,18 +463,12 @@ pub struct NoCaptures(());
impl NoCaptures {
/// Create an empty set of capturing groups.
pub fn new() -> NoCaptures {
NoCaptures(())
}
pub fn new() -> NoCaptures { NoCaptures(()) }
}
impl Captures for NoCaptures {
fn len(&self) -> usize {
0
}
fn get(&self, _: usize) -> Option<Match> {
None
}
fn len(&self) -> usize { 0 }
fn get(&self, _: usize) -> Option<Match> { None }
}
/// NoError provides an error type for matchers that never produce errors.
@@ -488,9 +481,7 @@ impl Captures for NoCaptures {
pub struct NoError(());
impl ::std::error::Error for NoError {
fn description(&self) -> &str {
"no error"
}
fn description(&self) -> &str { "no error" }
}
impl fmt::Display for NoError {
@@ -608,7 +599,10 @@ pub trait Matcher {
///
/// The text encoding of `haystack` is not strictly specified. Matchers are
/// advised to assume UTF-8, or at worst, some ASCII compatible encoding.
fn find(&self, haystack: &[u8]) -> Result<Option<Match>, Self::Error> {
fn find(
&self,
haystack: &[u8],
) -> Result<Option<Match>, Self::Error> {
self.find_at(haystack, 0)
}
@@ -620,8 +614,7 @@ pub trait Matcher {
haystack: &[u8],
mut matched: F,
) -> Result<(), Self::Error>
where
F: FnMut(Match) -> bool,
where F: FnMut(Match) -> bool
{
self.try_find_iter(haystack, |m| Ok(matched(m)))
.map(|r: Result<(), ()>| r.unwrap())
@@ -639,8 +632,7 @@ pub trait Matcher {
haystack: &[u8],
mut matched: F,
) -> Result<Result<(), E>, Self::Error>
where
F: FnMut(Match) -> Result<bool, E>,
where F: FnMut(Match) -> Result<bool, E>
{
let mut last_end = 0;
let mut last_match = None;
@@ -698,8 +690,7 @@ pub trait Matcher {
caps: &mut Self::Captures,
mut matched: F,
) -> Result<(), Self::Error>
where
F: FnMut(&Self::Captures) -> bool,
where F: FnMut(&Self::Captures) -> bool
{
self.try_captures_iter(haystack, caps, |caps| Ok(matched(caps)))
.map(|r: Result<(), ()>| r.unwrap())
@@ -718,8 +709,7 @@ pub trait Matcher {
caps: &mut Self::Captures,
mut matched: F,
) -> Result<Result<(), E>, Self::Error>
where
F: FnMut(&Self::Captures) -> Result<bool, E>,
where F: FnMut(&Self::Captures) -> Result<bool, E>
{
let mut last_end = 0;
let mut last_match = None;
@@ -797,8 +787,7 @@ pub trait Matcher {
dst: &mut Vec<u8>,
mut append: F,
) -> Result<(), Self::Error>
where
F: FnMut(Match, &mut Vec<u8>) -> bool,
where F: FnMut(Match, &mut Vec<u8>) -> bool
{
let mut last_match = 0;
self.find_iter(haystack, |m| {
@@ -821,8 +810,7 @@ pub trait Matcher {
dst: &mut Vec<u8>,
mut append: F,
) -> Result<(), Self::Error>
where
F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
where F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool
{
let mut last_match = 0;
self.captures_iter(haystack, caps, |caps| {
@@ -1024,7 +1012,10 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).capture_count()
}
fn find(&self, haystack: &[u8]) -> Result<Option<Match>, Self::Error> {
fn find(
&self,
haystack: &[u8]
) -> Result<Option<Match>, Self::Error> {
(*self).find(haystack)
}
@@ -1033,8 +1024,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
haystack: &[u8],
matched: F,
) -> Result<(), Self::Error>
where
F: FnMut(Match) -> bool,
where F: FnMut(Match) -> bool
{
(*self).find_iter(haystack, matched)
}
@@ -1044,8 +1034,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
haystack: &[u8],
matched: F,
) -> Result<Result<(), E>, Self::Error>
where
F: FnMut(Match) -> Result<bool, E>,
where F: FnMut(Match) -> Result<bool, E>
{
(*self).try_find_iter(haystack, matched)
}
@@ -1064,8 +1053,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
caps: &mut Self::Captures,
matched: F,
) -> Result<(), Self::Error>
where
F: FnMut(&Self::Captures) -> bool,
where F: FnMut(&Self::Captures) -> bool
{
(*self).captures_iter(haystack, caps, matched)
}
@@ -1076,8 +1064,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
caps: &mut Self::Captures,
matched: F,
) -> Result<Result<(), E>, Self::Error>
where
F: FnMut(&Self::Captures) -> Result<bool, E>,
where F: FnMut(&Self::Captures) -> Result<bool, E>
{
(*self).try_captures_iter(haystack, caps, matched)
}
@@ -1088,8 +1075,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
dst: &mut Vec<u8>,
append: F,
) -> Result<(), Self::Error>
where
F: FnMut(Match, &mut Vec<u8>) -> bool,
where F: FnMut(Match, &mut Vec<u8>) -> bool
{
(*self).replace(haystack, dst, append)
}
@@ -1101,8 +1087,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
dst: &mut Vec<u8>,
append: F,
) -> Result<(), Self::Error>
where
F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool,
where F: FnMut(&Self::Captures, &mut Vec<u8>) -> bool
{
(*self).replace_with_captures(haystack, caps, dst, append)
}
@@ -1114,7 +1099,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
fn is_match_at(
&self,
haystack: &[u8],
at: usize,
at: usize
) -> Result<bool, Self::Error> {
(*self).is_match_at(haystack, at)
}

View File

@@ -25,22 +25,18 @@ fn find() {
fn find_iter() {
let matcher = matcher(r"(\w+)\s+(\w+)");
let mut matches = vec![];
matcher
.find_iter(b"aa bb cc dd", |m| {
matches.push(m);
true
})
.unwrap();
matcher.find_iter(b"aa bb cc dd", |m| {
matches.push(m);
true
}).unwrap();
assert_eq!(matches, vec![m(0, 5), m(6, 11)]);
// Test that find_iter respects short circuiting.
matches.clear();
matcher
.find_iter(b"aa bb cc dd", |m| {
matches.push(m);
false
})
.unwrap();
matcher.find_iter(b"aa bb cc dd", |m| {
matches.push(m);
false
}).unwrap();
assert_eq!(matches, vec![m(0, 5)]);
}
@@ -51,17 +47,14 @@ fn try_find_iter() {
let matcher = matcher(r"(\w+)\s+(\w+)");
let mut matches = vec![];
let err = matcher
.try_find_iter(b"aa bb cc dd", |m| {
if matches.is_empty() {
matches.push(m);
Ok(true)
} else {
Err(MyError)
}
})
.unwrap()
.unwrap_err();
let err = matcher.try_find_iter(b"aa bb cc dd", |m| {
if matches.is_empty() {
matches.push(m);
Ok(true)
} else {
Err(MyError)
}
}).unwrap().unwrap_err();
assert_eq!(matches, vec![m(0, 5)]);
assert_eq!(err, MyError);
}
@@ -96,30 +89,28 @@ fn captures_iter() {
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
let mut caps = matcher.new_captures().unwrap();
let mut matches = vec![];
matcher
.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
true
})
.unwrap();
assert_eq!(
matches,
vec![m(0, 5), m(0, 2), m(3, 5), m(6, 11), m(6, 8), m(9, 11),]
);
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
true
}).unwrap();
assert_eq!(matches, vec![
m(0, 5), m(0, 2), m(3, 5),
m(6, 11), m(6, 8), m(9, 11),
]);
// Test that captures_iter respects short circuiting.
matches.clear();
matcher
.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
false
})
.unwrap();
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5),]);
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
false
}).unwrap();
assert_eq!(matches, vec![
m(0, 5), m(0, 2), m(3, 5),
]);
}
#[test]
@@ -130,19 +121,16 @@ fn try_captures_iter() {
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
let mut caps = matcher.new_captures().unwrap();
let mut matches = vec![];
let err = matcher
.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| {
if matches.is_empty() {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
Ok(true)
} else {
Err(MyError)
}
})
.unwrap()
.unwrap_err();
let err = matcher.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| {
if matches.is_empty() {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
Ok(true)
} else {
Err(MyError)
}
}).unwrap().unwrap_err();
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5)]);
assert_eq!(err, MyError);
}
@@ -162,12 +150,10 @@ fn no_captures() {
assert!(!matcher.captures(b"homer simpson", &mut caps).unwrap());
let mut called = false;
matcher
.captures_iter(b"homer simpson", &mut caps, |_| {
called = true;
true
})
.unwrap();
matcher.captures_iter(b"homer simpson", &mut caps, |_| {
called = true;
true
}).unwrap();
assert!(!called);
}
@@ -175,22 +161,18 @@ fn no_captures() {
fn replace() {
let matcher = matcher(r"(\w+)\s+(\w+)");
let mut dst = vec![];
matcher
.replace(b"aa bb cc dd", &mut dst, |_, dst| {
dst.push(b'z');
true
})
.unwrap();
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
dst.push(b'z');
true
}).unwrap();
assert_eq!(dst, b"z z");
// Test that replacements respect short circuiting.
dst.clear();
matcher
.replace(b"aa bb cc dd", &mut dst, |_, dst| {
dst.push(b'z');
false
})
.unwrap();
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
dst.push(b'z');
false
}).unwrap();
assert_eq!(dst, b"z cc dd");
}
@@ -200,31 +182,27 @@ fn replace_with_captures() {
let haystack = b"aa bb cc dd";
let mut caps = matcher.new_captures().unwrap();
let mut dst = vec![];
matcher
.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
caps.interpolate(
|name| matcher.capture_index(name),
haystack,
b"$2 $1",
dst,
);
true
})
.unwrap();
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
caps.interpolate(
|name| matcher.capture_index(name),
haystack,
b"$2 $1",
dst,
);
true
}).unwrap();
assert_eq!(dst, b"bb aa dd cc");
// Test that replacements respect short circuiting.
dst.clear();
matcher
.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
caps.interpolate(
|name| matcher.capture_index(name),
haystack,
b"$2 $1",
dst,
);
false
})
.unwrap();
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
caps.interpolate(
|name| matcher.capture_index(name),
haystack,
b"$2 $1",
dst,
);
false
}).unwrap();
assert_eq!(dst, b"bb aa cc dd");
}

View File

@@ -18,7 +18,10 @@ impl RegexMatcher {
names.insert(name.to_string(), i);
}
}
RegexMatcher { re: re, names: names }
RegexMatcher {
re: re,
names: names,
}
}
}
@@ -28,9 +31,12 @@ impl Matcher for RegexMatcher {
type Captures = RegexCaptures;
type Error = NoError;
fn find_at(&self, haystack: &[u8], at: usize) -> Result<Option<Match>> {
Ok(self
.re
fn find_at(
&self,
haystack: &[u8],
at: usize,
) -> Result<Option<Match>> {
Ok(self.re
.find_at(haystack, at)
.map(|m| Match::new(m.start(), m.end())))
}
@@ -69,9 +75,12 @@ impl Matcher for RegexMatcherNoCaps {
type Captures = NoCaptures;
type Error = NoError;
fn find_at(&self, haystack: &[u8], at: usize) -> Result<Option<Match>> {
Ok(self
.0
fn find_at(
&self,
haystack: &[u8],
at: usize,
) -> Result<Option<Match>> {
Ok(self.0
.find_at(haystack, at)
.map(|m| Match::new(m.start(), m.end())))
}

View File

@@ -1,17 +1,17 @@
[package]
name = "grep-pcre2"
version = "0.1.4" #:version
version = "0.1.1" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Use PCRE2 with the 'grep' crate.
"""
documentation = "https://docs.rs/grep-pcre2"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/pcre2"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/pcre2"
homepage = "https://github.com/BurntSushi/ripgrep"
repository = "https://github.com/BurntSushi/ripgrep"
readme = "README.md"
keywords = ["regex", "grep", "pcre", "backreference", "look"]
license = "Unlicense/MIT"
[dependencies]
grep-matcher = { version = "0.1.2", path = "../matcher" }
pcre2 = "0.2.0"
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
pcre2 = "0.1.0"

View File

@@ -10,7 +10,6 @@ extern crate pcre2;
pub use error::{Error, ErrorKind};
pub use matcher::{RegexCaptures, RegexMatcher, RegexMatcherBuilder};
pub use pcre2::{is_jit_available, version};
mod error;
mod matcher;

View File

@@ -33,12 +33,13 @@ impl RegexMatcherBuilder {
if self.case_smart && !has_uppercase_literal(pattern) {
builder.caseless(true);
}
let res = if self.word {
let pattern = format!(r"(?<!\w)(?:{})(?!\w)", pattern);
builder.build(&pattern)
} else {
builder.build(pattern)
};
let res =
if self.word {
let pattern = format!(r"(?<!\w)(?:{})(?!\w)", pattern);
builder.build(&pattern)
} else {
builder.build(pattern)
};
res.map_err(Error::regex).map(|regex| {
let mut names = HashMap::new();
for (i, name) in regex.capture_names().iter().enumerate() {
@@ -198,55 +199,16 @@ impl RegexMatcherBuilder {
self
}
/// Enable PCRE2's JIT and return an error if it's not available.
/// Enable PCRE2's JIT.
///
/// This generally speeds up matching quite a bit. The downside is that it
/// can increase the time it takes to compile a pattern.
///
/// If the JIT isn't available or if JIT compilation returns an error, then
/// regex compilation will fail with the corresponding error.
///
/// This is disabled by default, and always overrides `jit_if_available`.
/// This is disabled by default.
pub fn jit(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
self.builder.jit(yes);
self
}
/// Enable PCRE2's JIT if it's available.
///
/// This generally speeds up matching quite a bit. The downside is that it
/// can increase the time it takes to compile a pattern.
///
/// If the JIT isn't available or if JIT compilation returns an error,
/// then a debug message with the error will be emitted and the regex will
/// otherwise silently fall back to non-JIT matching.
///
/// This is disabled by default, and always overrides `jit`.
pub fn jit_if_available(&mut self, yes: bool) -> &mut RegexMatcherBuilder {
self.builder.jit_if_available(yes);
self
}
/// Set the maximum size of PCRE2's JIT stack, in bytes. If the JIT is
/// not enabled, then this has no effect.
///
/// When `None` is given, no custom JIT stack will be created, and instead,
/// the default JIT stack is used. When the default is used, its maximum
/// size is 32 KB.
///
/// When this is set, then a new JIT stack will be created with the given
/// maximum size as its limit.
///
/// Increasing the stack size can be useful for larger regular expressions.
///
/// By default, this is set to `None`.
pub fn max_jit_stack_size(
&mut self,
bytes: Option<usize>,
) -> &mut RegexMatcherBuilder {
self.builder.max_jit_stack_size(bytes);
self
}
}
/// An implementation of the `Matcher` trait using PCRE2.
@@ -273,8 +235,7 @@ impl Matcher for RegexMatcher {
haystack: &[u8],
at: usize,
) -> Result<Option<Match>, Error> {
Ok(self
.regex
Ok(self.regex
.find_at(haystack, at)
.map_err(Error::regex)?
.map(|m| Match::new(m.start(), m.end())))
@@ -297,8 +258,7 @@ impl Matcher for RegexMatcher {
haystack: &[u8],
mut matched: F,
) -> Result<Result<(), E>, Error>
where
F: FnMut(Match) -> Result<bool, E>,
where F: FnMut(Match) -> Result<bool, E>
{
for result in self.regex.find_iter(haystack) {
let m = result.map_err(Error::regex)?;
@@ -317,11 +277,10 @@ impl Matcher for RegexMatcher {
at: usize,
caps: &mut RegexCaptures,
) -> Result<bool, Error> {
Ok(self
.regex
.captures_read_at(&mut caps.locs, haystack, at)
.map_err(Error::regex)?
.is_some())
Ok(self.regex
.captures_read_at(&mut caps.locs, haystack, at)
.map_err(Error::regex)?
.is_some())
}
}
@@ -385,19 +344,23 @@ fn has_uppercase_literal(pattern: &str) -> bool {
#[cfg(test)]
mod tests {
use super::*;
use grep_matcher::{LineMatchKind, Matcher};
use super::*;
// Test that enabling word matches does the right thing and demonstrate
// the difference between it and surrounding the regex in `\b`.
#[test]
fn word() {
let matcher =
RegexMatcherBuilder::new().word(true).build(r"-2").unwrap();
let matcher = RegexMatcherBuilder::new()
.word(true)
.build(r"-2")
.unwrap();
assert!(matcher.is_match(b"abc -2 foo").unwrap());
let matcher =
RegexMatcherBuilder::new().word(false).build(r"\b-2\b").unwrap();
let matcher = RegexMatcherBuilder::new()
.word(false)
.build(r"\b-2\b")
.unwrap();
assert!(!matcher.is_match(b"abc -2 foo").unwrap());
}
@@ -430,12 +393,16 @@ mod tests {
// Test that smart case works.
#[test]
fn case_smart() {
let matcher =
RegexMatcherBuilder::new().case_smart(true).build(r"abc").unwrap();
let matcher = RegexMatcherBuilder::new()
.case_smart(true)
.build(r"abc")
.unwrap();
assert!(matcher.is_match(b"ABC").unwrap());
let matcher =
RegexMatcherBuilder::new().case_smart(true).build(r"aBc").unwrap();
let matcher = RegexMatcherBuilder::new()
.case_smart(true)
.build(r"aBc")
.unwrap();
assert!(!matcher.is_match(b"ABC").unwrap());
}
@@ -449,7 +416,9 @@ mod tests {
}
}
let matcher = RegexMatcherBuilder::new().build(r"\wfoo\s").unwrap();
let matcher = RegexMatcherBuilder::new()
.build(r"\wfoo\s")
.unwrap();
let m = matcher.find_candidate_line(b"afoo ").unwrap().unwrap();
assert!(is_confirmed(m));
}

View File

@@ -1,14 +1,14 @@
[package]
name = "grep-printer"
version = "0.1.5" #:version
version = "0.1.1" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
An implementation of the grep crate's Sink trait that provides standard
printing of search results, similar to grep itself.
"""
documentation = "https://docs.rs/grep-printer"
homepage = "https://github.com/BurntSushi/ripgrep/tree/master/crates/printer"
repository = "https://github.com/BurntSushi/ripgrep/tree/master/crates/printer"
homepage = "https://github.com/BurntSushi/ripgrep"
repository = "https://github.com/BurntSushi/ripgrep"
readme = "README.md"
keywords = ["grep", "pattern", "print", "printer", "sink"]
license = "Unlicense/MIT"
@@ -18,14 +18,13 @@ default = ["serde1"]
serde1 = ["base64", "serde", "serde_derive", "serde_json"]
[dependencies]
base64 = { version = "0.12.1", optional = true }
bstr = "0.2.0"
grep-matcher = { version = "0.1.2", path = "../matcher" }
grep-searcher = { version = "0.1.4", path = "../searcher" }
termcolor = "1.0.4"
base64 = { version = "0.9.2", optional = true }
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
grep-searcher = { version = "0.1.1", path = "../grep-searcher" }
termcolor = "1.0.3"
serde = { version = "1.0.77", optional = true }
serde_derive = { version = "1.0.77", optional = true }
serde_json = { version = "1.0.27", optional = true }
[dev-dependencies]
grep-regex = { version = "0.1.3", path = "../regex" }
grep-regex = { version = "0.1.1", path = "../grep-regex" }

View File

@@ -62,32 +62,42 @@ impl ColorError {
impl fmt::Display for ColorError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
ColorError::UnrecognizedOutType(ref name) => write!(
f,
"unrecognized output type '{}'. Choose from: \
ColorError::UnrecognizedOutType(ref name) => {
write!(
f,
"unrecognized output type '{}'. Choose from: \
path, line, column, match.",
name,
),
ColorError::UnrecognizedSpecType(ref name) => write!(
f,
"unrecognized spec type '{}'. Choose from: \
name,
)
}
ColorError::UnrecognizedSpecType(ref name) => {
write!(
f,
"unrecognized spec type '{}'. Choose from: \
fg, bg, style, none.",
name,
),
ColorError::UnrecognizedColor(_, ref msg) => write!(f, "{}", msg),
ColorError::UnrecognizedStyle(ref name) => write!(
f,
"unrecognized style attribute '{}'. Choose from: \
name,
)
}
ColorError::UnrecognizedColor(_, ref msg) => {
write!(f, "{}", msg)
}
ColorError::UnrecognizedStyle(ref name) => {
write!(
f,
"unrecognized style attribute '{}'. Choose from: \
nobold, bold, nointense, intense, nounderline, \
underline.",
name,
),
ColorError::InvalidFormat(ref original) => write!(
f,
"invalid color spec format: '{}'. Valid format \
name,
)
}
ColorError::InvalidFormat(ref original) => {
write!(
f,
"invalid color spec format: '{}'. Valid format \
is '(path|line|column|match):(fg|bg|style):(value)'.",
original,
),
original,
)
}
}
}
}
@@ -217,7 +227,7 @@ enum Style {
Intense,
NoIntense,
Underline,
NoUnderline,
NoUnderline
}
impl ColorSpecs {
@@ -278,32 +288,18 @@ impl SpecValue {
fn merge_into(&self, cspec: &mut ColorSpec) {
match *self {
SpecValue::None => cspec.clear(),
SpecValue::Fg(ref color) => {
cspec.set_fg(Some(color.clone()));
SpecValue::Fg(ref color) => { cspec.set_fg(Some(color.clone())); }
SpecValue::Bg(ref color) => { cspec.set_bg(Some(color.clone())); }
SpecValue::Style(ref style) => {
match *style {
Style::Bold => { cspec.set_bold(true); }
Style::NoBold => { cspec.set_bold(false); }
Style::Intense => { cspec.set_intense(true); }
Style::NoIntense => { cspec.set_intense(false); }
Style::Underline => { cspec.set_underline(true); }
Style::NoUnderline => { cspec.set_underline(false); }
}
}
SpecValue::Bg(ref color) => {
cspec.set_bg(Some(color.clone()));
}
SpecValue::Style(ref style) => match *style {
Style::Bold => {
cspec.set_bold(true);
}
Style::NoBold => {
cspec.set_bold(false);
}
Style::Intense => {
cspec.set_intense(true);
}
Style::NoIntense => {
cspec.set_intense(false);
}
Style::Underline => {
cspec.set_underline(true);
}
Style::NoUnderline => {
cspec.set_underline(false);
}
},
}
}
}
@@ -319,7 +315,10 @@ impl FromStr for UserColorSpec {
let otype: OutType = pieces[0].parse()?;
match pieces[1].parse()? {
SpecType::None => {
Ok(UserColorSpec { ty: otype, value: SpecValue::None })
Ok(UserColorSpec {
ty: otype,
value: SpecValue::None,
})
}
SpecType::Style => {
if pieces.len() < 3 {
@@ -332,16 +331,18 @@ impl FromStr for UserColorSpec {
if pieces.len() < 3 {
return Err(ColorError::InvalidFormat(s.to_string()));
}
let color: Color =
pieces[2].parse().map_err(ColorError::from_parse_error)?;
let color: Color = pieces[2]
.parse()
.map_err(ColorError::from_parse_error)?;
Ok(UserColorSpec { ty: otype, value: SpecValue::Fg(color) })
}
SpecType::Bg => {
if pieces.len() < 3 {
return Err(ColorError::InvalidFormat(s.to_string()));
}
let color: Color =
pieces[2].parse().map_err(ColorError::from_parse_error)?;
let color: Color = pieces[2]
.parse()
.map_err(ColorError::from_parse_error)?;
Ok(UserColorSpec { ty: otype, value: SpecValue::Bg(color) })
}
}

View File

@@ -4,8 +4,8 @@ use std::time::Instant;
use grep_matcher::{Match, Matcher};
use grep_searcher::{
Searcher, Sink, SinkContext, SinkContextKind, SinkError, SinkFinish,
SinkMatch,
Searcher,
Sink, SinkError, SinkContext, SinkContextKind, SinkFinish, SinkMatch,
};
use serde_json as json;
@@ -27,7 +27,11 @@ struct Config {
impl Default for Config {
fn default() -> Config {
Config { pretty: false, max_matches: None, always_begin_end: false }
Config {
pretty: false,
max_matches: None,
always_begin_end: false,
}
}
}
@@ -488,9 +492,8 @@ impl<W: io::Write> JSON<W> {
matcher: M,
path: &'p P,
) -> JSONSink<'p, 's, M, W>
where
M: Matcher,
P: ?Sized + AsRef<Path>,
where M: Matcher,
P: ?Sized + AsRef<Path>,
{
JSONSink {
matcher: matcher,
@@ -612,12 +615,10 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
// the extent that it's easy to ensure that we never do more than
// one search to find the matches.
let matches = &mut self.json.matches;
self.matcher
.find_iter(bytes, |m| {
matches.push(m);
true
})
.map_err(io::Error::error_message)?;
self.matcher.find_iter(bytes, |m| {
matches.push(m);
true
}).map_err(io::Error::error_message)?;
// Don't report empty matches appearing at the end of the bytes.
if !matches.is_empty()
&& matches.last().unwrap().is_empty()
@@ -649,7 +650,9 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
if self.begin_printed {
return Ok(());
}
let msg = jsont::Message::Begin(jsont::Begin { path: self.path });
let msg = jsont::Message::Begin(jsont::Begin {
path: self.path,
});
self.json.write_message(&msg)?;
self.begin_printed = true;
Ok(())
@@ -696,12 +699,13 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
self.after_context_remaining =
self.after_context_remaining.saturating_sub(1);
}
let submatches = if searcher.invert_match() {
self.record_matches(ctx.bytes())?;
SubMatches::new(ctx.bytes(), &self.json.matches)
} else {
SubMatches::empty()
};
let submatches =
if searcher.invert_match() {
self.record_matches(ctx.bytes())?;
SubMatches::new(ctx.bytes(), &self.json.matches)
} else {
SubMatches::empty()
};
let msg = jsont::Message::Context(jsont::Context {
path: self.path,
lines: ctx.bytes(),
@@ -713,7 +717,10 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
Ok(!self.should_quit())
}
fn begin(&mut self, _searcher: &Searcher) -> Result<bool, io::Error> {
fn begin(
&mut self,
_searcher: &Searcher,
) -> Result<bool, io::Error> {
self.json.wtr.reset_count();
self.start_time = Instant::now();
self.match_count = 0;
@@ -772,7 +779,7 @@ enum SubMatches<'a> {
impl<'a> SubMatches<'a> {
/// Create a new set of match ranges from a set of matches and the
/// corresponding bytes that those matches apply to.
fn new(bytes: &'a [u8], matches: &[Match]) -> SubMatches<'a> {
fn new(bytes: &'a[u8], matches: &[Match]) -> SubMatches<'a> {
if matches.len() == 1 {
let mat = matches[0];
SubMatches::Small([jsont::SubMatch {
@@ -810,11 +817,10 @@ impl<'a> SubMatches<'a> {
#[cfg(test)]
mod tests {
use grep_matcher::LineTerminator;
use grep_regex::{RegexMatcher, RegexMatcherBuilder};
use grep_regex::RegexMatcher;
use grep_searcher::SearcherBuilder;
use super::{JSONBuilder, JSON};
use super::{JSON, JSONBuilder};
const SHERLOCK: &'static [u8] = b"\
For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -825,7 +831,9 @@ but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.
";
fn printer_contents(printer: &mut JSON<Vec<u8>>) -> String {
fn printer_contents(
printer: &mut JSON<Vec<u8>>,
) -> String {
String::from_utf8(printer.get_mut().to_owned()).unwrap()
}
@@ -842,8 +850,11 @@ but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.\
";
let matcher = RegexMatcher::new(r"Watson").unwrap();
let mut printer = JSONBuilder::new().build(vec![]);
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = JSONBuilder::new()
.build(vec![]);
SearcherBuilder::new()
.binary_detection(BinaryDetection::quit(b'\x00'))
.heap_limit(Some(80))
@@ -859,9 +870,12 @@ and exhibited clearly, with a label attached.\
#[test]
fn max_matches() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let mut printer =
JSONBuilder::new().max_matches(Some(1)).build(vec![]);
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = JSONBuilder::new()
.max_matches(Some(1))
.build(vec![]);
SearcherBuilder::new()
.build()
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
@@ -873,8 +887,11 @@ and exhibited clearly, with a label attached.\
#[test]
fn no_match() {
let matcher = RegexMatcher::new(r"DOES NOT MATCH").unwrap();
let mut printer = JSONBuilder::new().build(vec![]);
let matcher = RegexMatcher::new(
r"DOES NOT MATCH"
).unwrap();
let mut printer = JSONBuilder::new()
.build(vec![]);
SearcherBuilder::new()
.build()
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
@@ -886,9 +903,12 @@ and exhibited clearly, with a label attached.\
#[test]
fn always_begin_end_no_match() {
let matcher = RegexMatcher::new(r"DOES NOT MATCH").unwrap();
let mut printer =
JSONBuilder::new().always_begin_end(true).build(vec![]);
let matcher = RegexMatcher::new(
r"DOES NOT MATCH"
).unwrap();
let mut printer = JSONBuilder::new()
.always_begin_end(true)
.build(vec![]);
SearcherBuilder::new()
.build()
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher))
@@ -898,39 +918,4 @@ and exhibited clearly, with a label attached.\
assert_eq!(got.lines().count(), 2);
assert!(got.contains("begin") && got.contains("end"));
}
#[test]
fn missing_crlf() {
let haystack = "test\r\n".as_bytes();
let matcher = RegexMatcherBuilder::new().build("test").unwrap();
let mut printer = JSONBuilder::new().build(vec![]);
SearcherBuilder::new()
.build()
.search_reader(&matcher, haystack, printer.sink(&matcher))
.unwrap();
let got = printer_contents(&mut printer);
assert_eq!(got.lines().count(), 3);
assert!(
got.lines().nth(1).unwrap().contains(r"test\r\n"),
r"missing 'test\r\n' in '{}'",
got.lines().nth(1).unwrap(),
);
let matcher =
RegexMatcherBuilder::new().crlf(true).build("test").unwrap();
let mut printer = JSONBuilder::new().build(vec![]);
SearcherBuilder::new()
.line_terminator(LineTerminator::crlf())
.build()
.search_reader(&matcher, haystack, printer.sink(&matcher))
.unwrap();
let got = printer_contents(&mut printer);
assert_eq!(got.lines().count(), 3);
assert!(
got.lines().nth(1).unwrap().contains(r"test\r\n"),
r"missing 'test\r\n' in '{}'",
got.lines().nth(1).unwrap(),
);
}
}

View File

@@ -80,9 +80,7 @@ pub struct SubMatch<'a> {
#[derive(Clone, Debug, Hash, PartialEq, Eq, Serialize)]
#[serde(untagged)]
enum Data<'a> {
Text {
text: Cow<'a, str>,
},
Text { text: Cow<'a, str> },
Bytes {
#[serde(serialize_with = "to_base64")]
bytes: &'a [u8],
@@ -118,26 +116,32 @@ impl<'a> Data<'a> {
}
}
fn to_base64<T, S>(bytes: T, ser: S) -> Result<S::Ok, S::Error>
where
T: AsRef<[u8]>,
S: Serializer,
fn to_base64<T, S>(
bytes: T,
ser: S,
) -> Result<S::Ok, S::Error>
where T: AsRef<[u8]>,
S: Serializer
{
ser.serialize_str(&base64::encode(&bytes))
}
fn ser_bytes<T, S>(bytes: T, ser: S) -> Result<S::Ok, S::Error>
where
T: AsRef<[u8]>,
S: Serializer,
fn ser_bytes<T, S>(
bytes: T,
ser: S,
) -> Result<S::Ok, S::Error>
where T: AsRef<[u8]>,
S: Serializer
{
Data::from_bytes(bytes.as_ref()).serialize(ser)
}
fn ser_path<P, S>(path: &Option<P>, ser: S) -> Result<S::Ok, S::Error>
where
P: AsRef<Path>,
S: Serializer,
fn ser_path<P, S>(
path: &Option<P>,
ser: S,
) -> Result<S::Ok, S::Error>
where P: AsRef<Path>,
S: Serializer
{
path.as_ref().map(|p| Data::from_path(p.as_ref())).serialize(ser)
}

View File

@@ -70,7 +70,6 @@ fn example() -> Result<(), Box<Error>> {
#[cfg(feature = "serde1")]
extern crate base64;
extern crate bstr;
extern crate grep_matcher;
#[cfg(test)]
extern crate grep_regex;
@@ -84,9 +83,9 @@ extern crate serde_derive;
extern crate serde_json;
extern crate termcolor;
pub use color::{default_color_specs, ColorError, ColorSpecs, UserColorSpec};
pub use color::{ColorError, ColorSpecs, UserColorSpec, default_color_specs};
#[cfg(feature = "serde1")]
pub use json::{JSONBuilder, JSONSink, JSON};
pub use json::{JSON, JSONBuilder, JSONSink};
pub use standard::{Standard, StandardBuilder, StandardSink};
pub use stats::Stats;
pub use summary::{Summary, SummaryBuilder, SummaryKind, SummarySink};

View File

@@ -1,4 +1,3 @@
/// Like assert_eq, but nicer output for long strings.
#[cfg(test)]
#[macro_export]
macro_rules! assert_eq_printed {

View File

@@ -34,8 +34,8 @@ impl<'a> Add<&'a Stats> for Stats {
Stats {
elapsed: NiceDuration(self.elapsed.0 + rhs.elapsed.0),
searches: self.searches + rhs.searches,
searches_with_match: self.searches_with_match
+ rhs.searches_with_match,
searches_with_match:
self.searches_with_match + rhs.searches_with_match,
bytes_searched: self.bytes_searched + rhs.bytes_searched,
bytes_printed: self.bytes_printed + rhs.bytes_printed,
matched_lines: self.matched_lines + rhs.matched_lines,

View File

@@ -168,7 +168,10 @@ impl SummaryBuilder {
///
/// This is a convenience routine for
/// `SummaryBuilder::build(termcolor::NoColor::new(wtr))`.
pub fn build_no_color<W: io::Write>(&self, wtr: W) -> Summary<NoColor<W>> {
pub fn build_no_color<W: io::Write>(
&self,
wtr: W,
) -> Summary<NoColor<W>> {
self.build(NoColor::new(wtr))
}
@@ -199,9 +202,10 @@ impl SummaryBuilder {
/// This completely overrides any previous color specifications. This does
/// not add to any previously provided color specifications on this
/// builder.
///
/// The default color specifications provide no styling.
pub fn color_specs(&mut self, specs: ColorSpecs) -> &mut SummaryBuilder {
pub fn color_specs(
&mut self,
specs: ColorSpecs,
) -> &mut SummaryBuilder {
self.config.colors = specs;
self
}
@@ -252,8 +256,6 @@ impl SummaryBuilder {
/// If multi line search is enabled and a match spans multiple lines, then
/// that match is counted exactly once for the purposes of enforcing this
/// limit, regardless of how many lines it spans.
///
/// This is disabled by default.
pub fn max_matches(&mut self, limit: Option<u64>) -> &mut SummaryBuilder {
self.config.max_matches = limit;
self
@@ -264,8 +266,6 @@ impl SummaryBuilder {
/// When enabled and the mode is either `Count` or `CountMatches`, then
/// results are not printed if no matches were found. Otherwise, every
/// search prints a result with a possibly `0` number of matches.
///
/// This is enabled by default.
pub fn exclude_zero(&mut self, yes: bool) -> &mut SummaryBuilder {
self.config.exclude_zero = yes;
self
@@ -275,7 +275,10 @@ impl SummaryBuilder {
/// `CountMatches` modes.
///
/// By default, this is set to `:`.
pub fn separator_field(&mut self, sep: Vec<u8>) -> &mut SummaryBuilder {
pub fn separator_field(
&mut self,
sep: Vec<u8>,
) -> &mut SummaryBuilder {
self.config.separator_field = Arc::new(sep);
self
}
@@ -289,9 +292,10 @@ impl SummaryBuilder {
/// A typical use for this option is to permit cygwin users on Windows to
/// set the path separator to `/` instead of using the system default of
/// `\`.
///
/// This is disabled by default.
pub fn separator_path(&mut self, sep: Option<u8>) -> &mut SummaryBuilder {
pub fn separator_path(
&mut self,
sep: Option<u8>,
) -> &mut SummaryBuilder {
self.config.separator_path = sep;
self
}
@@ -370,11 +374,12 @@ impl<W: WriteColor> Summary<W> {
&'s mut self,
matcher: M,
) -> SummarySink<'static, 's, M, W> {
let stats = if self.config.stats || self.config.kind.requires_stats() {
Some(Stats::new())
} else {
None
};
let stats =
if self.config.stats || self.config.kind.requires_stats() {
Some(Stats::new())
} else {
None
};
SummarySink {
matcher: matcher,
summary: self,
@@ -395,22 +400,20 @@ impl<W: WriteColor> Summary<W> {
matcher: M,
path: &'p P,
) -> SummarySink<'p, 's, M, W>
where
M: Matcher,
P: ?Sized + AsRef<Path>,
where M: Matcher,
P: ?Sized + AsRef<Path>,
{
if !self.config.path && !self.config.kind.requires_path() {
if !self.config.path {
return self.sink(matcher);
}
let stats = if self.config.stats || self.config.kind.requires_stats() {
Some(Stats::new())
} else {
None
};
let stats =
if self.config.stats || self.config.kind.requires_stats() {
Some(Stats::new())
} else {
None
};
let ppath = PrinterPath::with_separator(
path.as_ref(),
self.config.separator_path,
);
path.as_ref(), self.config.separator_path);
SummarySink {
matcher: matcher,
summary: self,
@@ -474,10 +477,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> SummarySink<'p, 's, M, W> {
/// This is unaffected by the result of searches before the previous
/// search.
pub fn has_match(&self) -> bool {
match self.summary.config.kind {
SummaryKind::PathWithoutMatch => self.match_count == 0,
_ => self.match_count > 0,
}
self.match_count > 0
}
/// If binary data was found in the previous search, this returns the
@@ -585,25 +585,10 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
self.match_count += 1;
if let Some(ref mut stats) = self.stats {
let mut match_count = 0;
self.matcher
.find_iter(mat.bytes(), |_| {
match_count += 1;
true
})
.map_err(io::Error::error_message)?;
if match_count == 0 {
// It is possible for the match count to be zero when
// look-around is used. Since `SinkMatch` won't necessarily
// contain the look-around in its match span, the search here
// could fail to find anything.
//
// It seems likely that setting match_count=1 here is probably
// wrong in some cases, but I don't think we can do any
// better. (Because this printer cannot assume that subsequent
// contents have been loaded into memory, so we have no way of
// increasing the search span here.)
match_count = 1;
}
self.matcher.find_iter(mat.bytes(), |_| {
match_count += 1;
true
}).map_err(io::Error::error_message)?;
stats.add_matches(match_count);
stats.add_matched_lines(mat.lines().count() as u64);
} else if self.summary.config.kind.quit_early() {
@@ -612,7 +597,10 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
Ok(!self.should_quit())
}
fn begin(&mut self, _searcher: &Searcher) -> Result<bool, io::Error> {
fn begin(
&mut self,
_searcher: &Searcher,
) -> Result<bool, io::Error> {
if self.path.is_none() && self.summary.config.kind.requires_path() {
return Err(io::Error::error_message(format!(
"output kind {:?} requires a file path",
@@ -645,37 +633,10 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
stats.add_bytes_searched(finish.byte_count());
stats.add_bytes_printed(self.summary.wtr.borrow().count());
}
// If our binary detection method says to quit after seeing binary
// data, then we shouldn't print any results at all, even if we've
// found a match before detecting binary data. The intent here is to
// keep BinaryDetection::quit as a form of filter. Otherwise, we can
// present a matching file with a smaller number of matches than
// there might be, which can be quite misleading.
//
// If our binary detection method is to convert binary data, then we
// don't quit and therefore search the entire contents of the file.
//
// There is an unfortunate inconsistency here. Namely, when using
// Quiet or PathWithMatch, then the printer can quit after the first
// match seen, which could be long before seeing binary data. This
// means that using PathWithMatch can print a path where as using
// Count might not print it at all because of binary data.
//
// It's not possible to fix this without also potentially significantly
// impacting the performance of Quiet or PathWithMatch, so we accept
// the bug.
if self.binary_byte_offset.is_some()
&& searcher.binary_detection().quit_byte().is_some()
{
// Squash the match count. The statistics reported will still
// contain the match count, but the "official" match count should
// be zero.
self.match_count = 0;
return Ok(());
}
let show_count =
!self.summary.config.exclude_zero || self.match_count > 0;
!self.summary.config.exclude_zero
|| self.match_count > 0;
match self.summary.config.kind {
SummaryKind::Count => {
if show_count {
@@ -686,8 +647,7 @@ impl<'p, 's, M: Matcher, W: WriteColor> Sink for SummarySink<'p, 's, M, W> {
}
SummaryKind::CountMatches => {
if show_count {
let stats = self
.stats
let stats = self.stats
.as_ref()
.expect("CountMatches should enable stats tracking");
self.write_path_field()?;
@@ -717,7 +677,7 @@ mod tests {
use grep_searcher::SearcherBuilder;
use termcolor::NoColor;
use super::{Summary, SummaryBuilder, SummaryKind};
use super::{Summary, SummaryKind, SummaryBuilder};
const SHERLOCK: &'static [u8] = b"\
For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -728,41 +688,45 @@ but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.
";
fn printer_contents(printer: &mut Summary<NoColor<Vec<u8>>>) -> String {
fn printer_contents(
printer: &mut Summary<NoColor<Vec<u8>>>,
) -> String {
String::from_utf8(printer.get_mut().get_ref().to_owned()).unwrap()
}
#[test]
fn path_with_match_error() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::PathWithMatch)
.build_no_color(vec![]);
let res = SearcherBuilder::new().build().search_reader(
&matcher,
SHERLOCK,
printer.sink(&matcher),
);
let res = SearcherBuilder::new()
.build()
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher));
assert!(res.is_err());
}
#[test]
fn path_without_match_error() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::PathWithoutMatch)
.build_no_color(vec![]);
let res = SearcherBuilder::new().build().search_reader(
&matcher,
SHERLOCK,
printer.sink(&matcher),
);
let res = SearcherBuilder::new()
.build()
.search_reader(&matcher, SHERLOCK, printer.sink(&matcher));
assert!(res.is_err());
}
#[test]
fn count_no_path() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.build_no_color(vec![]);
@@ -777,7 +741,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_no_path_even_with_path() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.path(false)
@@ -797,7 +763,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_path() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.build_no_color(vec![]);
@@ -816,7 +784,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_path_with_zero() {
let matcher = RegexMatcher::new(r"NO MATCH").unwrap();
let matcher = RegexMatcher::new(
r"NO MATCH"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.exclude_zero(false)
@@ -836,7 +806,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_path_without_zero() {
let matcher = RegexMatcher::new(r"NO MATCH").unwrap();
let matcher = RegexMatcher::new(
r"NO MATCH"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.exclude_zero(true)
@@ -856,7 +828,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_path_field_separator() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.separator_field(b"ZZ".to_vec())
@@ -876,7 +850,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_path_terminator() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.path_terminator(Some(b'\x00'))
@@ -896,7 +872,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_path_separator() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.separator_path(Some(b'\\'))
@@ -916,7 +894,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_max_matches() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Count)
.max_matches(Some(1))
@@ -932,7 +912,9 @@ and exhibited clearly, with a label attached.
#[test]
fn count_matches() {
let matcher = RegexMatcher::new(r"Watson|Sherlock").unwrap();
let matcher = RegexMatcher::new(
r"Watson|Sherlock"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::CountMatches)
.build_no_color(vec![]);
@@ -951,7 +933,9 @@ and exhibited clearly, with a label attached.
#[test]
fn path_with_match_found() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::PathWithMatch)
.build_no_color(vec![]);
@@ -970,7 +954,9 @@ and exhibited clearly, with a label attached.
#[test]
fn path_with_match_not_found() {
let matcher = RegexMatcher::new(r"ZZZZZZZZ").unwrap();
let matcher = RegexMatcher::new(
r"ZZZZZZZZ"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::PathWithMatch)
.build_no_color(vec![]);
@@ -987,9 +973,12 @@ and exhibited clearly, with a label attached.
assert_eq_printed!("", got);
}
#[test]
fn path_without_match_found() {
let matcher = RegexMatcher::new(r"ZZZZZZZZZ").unwrap();
let matcher = RegexMatcher::new(
r"ZZZZZZZZZ"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::PathWithoutMatch)
.build_no_color(vec![]);
@@ -1008,7 +997,9 @@ and exhibited clearly, with a label attached.
#[test]
fn path_without_match_not_found() {
let matcher = RegexMatcher::new(r"Watson").unwrap();
let matcher = RegexMatcher::new(
r"Watson"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::PathWithoutMatch)
.build_no_color(vec![]);
@@ -1027,7 +1018,9 @@ and exhibited clearly, with a label attached.
#[test]
fn quiet() {
let matcher = RegexMatcher::new(r"Watson|Sherlock").unwrap();
let matcher = RegexMatcher::new(
r"Watson|Sherlock"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Quiet)
.build_no_color(vec![]);
@@ -1049,7 +1042,9 @@ and exhibited clearly, with a label attached.
#[test]
fn quiet_with_stats() {
let matcher = RegexMatcher::new(r"Watson|Sherlock").unwrap();
let matcher = RegexMatcher::new(
r"Watson|Sherlock"
).unwrap();
let mut printer = SummaryBuilder::new()
.kind(SummaryKind::Quiet)
.stats(true)

View File

@@ -4,10 +4,10 @@ use std::io;
use std::path::Path;
use std::time;
use bstr::{ByteSlice, ByteVec};
use grep_matcher::{Captures, LineTerminator, Match, Matcher};
use grep_searcher::{
LineIter, SinkContext, SinkContextKind, SinkError, SinkMatch,
LineIter,
SinkError, SinkContext, SinkContextKind, SinkMatch,
};
#[cfg(feature = "serde1")]
use serde::{Serialize, Serializer};
@@ -57,13 +57,19 @@ impl<M: Matcher> Replacer<M> {
replacement: &[u8],
) -> io::Result<()> {
{
let &mut Space { ref mut dst, ref mut caps, ref mut matches } =
self.allocate(matcher)?;
let &mut Space {
ref mut dst,
ref mut caps,
ref mut matches,
} = self.allocate(matcher)?;
dst.clear();
matches.clear();
matcher
.replace_with_captures(subject, caps, dst, |caps, dst| {
matcher.replace_with_captures(
subject,
caps,
dst,
|caps, dst| {
let start = dst.len();
caps.interpolate(
|name| matcher.capture_index(name),
@@ -74,8 +80,8 @@ impl<M: Matcher> Replacer<M> {
let end = dst.len();
matches.push(Match::new(start, end));
true
})
.map_err(io::Error::error_message)?;
},
).map_err(io::Error::error_message)?;
}
Ok(())
}
@@ -115,10 +121,14 @@ impl<M: Matcher> Replacer<M> {
/// matcher fails.
fn allocate(&mut self, matcher: &M) -> io::Result<&mut Space<M>> {
if self.space.is_none() {
let caps =
matcher.new_captures().map_err(io::Error::error_message)?;
self.space =
Some(Space { caps: caps, dst: vec![], matches: vec![] });
let caps = matcher
.new_captures()
.map_err(io::Error::error_message)?;
self.space = Some(Space {
caps: caps,
dst: vec![],
matches: vec![],
});
}
Ok(self.space.as_mut().unwrap())
}
@@ -165,8 +175,9 @@ impl<'a> Sunk<'a> {
original_matches: &'a [Match],
replacement: Option<(&'a [u8], &'a [Match])>,
) -> Sunk<'a> {
let (bytes, matches) =
replacement.unwrap_or_else(|| (sunk.bytes(), original_matches));
let (bytes, matches) = replacement.unwrap_or_else(|| {
(sunk.bytes(), original_matches)
});
Sunk {
bytes: bytes,
absolute_byte_offset: sunk.absolute_byte_offset(),
@@ -183,8 +194,9 @@ impl<'a> Sunk<'a> {
original_matches: &'a [Match],
replacement: Option<(&'a [u8], &'a [Match])>,
) -> Sunk<'a> {
let (bytes, matches) =
replacement.unwrap_or_else(|| (sunk.bytes(), original_matches));
let (bytes, matches) = replacement.unwrap_or_else(|| {
(sunk.bytes(), original_matches)
});
Sunk {
bytes: bytes,
absolute_byte_offset: sunk.absolute_byte_offset(),
@@ -255,7 +267,21 @@ pub struct PrinterPath<'a>(Cow<'a, [u8]>);
impl<'a> PrinterPath<'a> {
/// Create a new path suitable for printing.
pub fn new(path: &'a Path) -> PrinterPath<'a> {
PrinterPath(Vec::from_path_lossy(path))
PrinterPath::new_impl(path)
}
#[cfg(unix)]
fn new_impl(path: &'a Path) -> PrinterPath<'a> {
use std::os::unix::ffi::OsStrExt;
PrinterPath(Cow::Borrowed(path.as_os_str().as_bytes()))
}
#[cfg(not(unix))]
fn new_impl(path: &'a Path) -> PrinterPath<'a> {
PrinterPath(match path.to_string_lossy() {
Cow::Owned(path) => Cow::Owned(path.into_bytes()),
Cow::Borrowed(path) => Cow::Borrowed(path.as_bytes()),
})
}
/// Create a new printer path from the given path which can be efficiently
@@ -276,23 +302,19 @@ impl<'a> PrinterPath<'a> {
/// path separators that are both replaced by `new_sep`. In all other
/// environments, only `/` is treated as a path separator.
fn replace_separator(&mut self, new_sep: u8) {
let transformed_path: Vec<u8> = self
.0
.bytes()
.map(|b| {
if b == b'/' || (cfg!(windows) && b == b'\\') {
new_sep
} else {
b
}
})
.collect();
let transformed_path: Vec<_> = self.as_bytes().iter().map(|&b| {
if b == b'/' || (cfg!(windows) && b == b'\\') {
new_sep
} else {
b
}
}).collect();
self.0 = Cow::Owned(transformed_path);
}
/// Return the raw bytes for this path.
pub fn as_bytes(&self) -> &[u8] {
&self.0
&*self.0
}
}
@@ -337,7 +359,7 @@ impl Serialize for NiceDuration {
///
/// This stops trimming a prefix as soon as it sees non-whitespace or a line
/// terminator.
pub fn trim_ascii_prefix(
pub fn trim_ascii_prefix_range(
line_term: LineTerminator,
slice: &[u8],
range: Match,
@@ -357,3 +379,14 @@ pub fn trim_ascii_prefix(
.count();
range.with_start(range.start() + count)
}
/// Trim prefix ASCII spaces from the given slice and return the corresponding
/// sub-slice.
pub fn trim_ascii_prefix(line_term: LineTerminator, slice: &[u8]) -> &[u8] {
let range = trim_ascii_prefix_range(
line_term,
slice,
Match::new(0, slice.len()),
);
&slice[range]
}

21
grep-regex/Cargo.toml Normal file
View File

@@ -0,0 +1,21 @@
[package]
name = "grep-regex"
version = "0.1.1" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Use Rust's regex library with the 'grep' crate.
"""
documentation = "https://docs.rs/grep-regex"
homepage = "https://github.com/BurntSushi/ripgrep"
repository = "https://github.com/BurntSushi/ripgrep"
readme = "README.md"
keywords = ["regex", "grep", "search", "pattern", "line"]
license = "Unlicense/MIT"
[dependencies]
log = "0.4.5"
grep-matcher = { version = "0.1.1", path = "../grep-matcher" }
regex = "1.0.5"
regex-syntax = "0.6.2"
thread_local = "0.3.6"
utf8-ranges = "1.0.1"

View File

@@ -1,5 +1,5 @@
use regex_syntax::ast::parse::Parser;
use regex_syntax::ast::{self, Ast};
use regex_syntax::ast::parse::Parser;
/// The results of analyzing AST of a regular expression (e.g., for supporting
/// smart case).

View File

@@ -1,13 +1,12 @@
use grep_matcher::{ByteSet, LineTerminator};
use regex::bytes::{Regex, RegexBuilder};
use regex_syntax::ast::{self, Ast};
use regex_syntax::hir::{self, Hir};
use regex_syntax::hir::Hir;
use ast::AstAnalysis;
use crlf::crlfify;
use error::Error;
use literal::LiteralSets;
use multi::alternation_literals;
use non_matching::non_matching_bytes;
use strip::strip_from_match;
@@ -51,8 +50,8 @@ impl Default for Config {
octal: false,
// These size limits are much bigger than what's in the regex
// crate.
size_limit: 100 * (1 << 20),
dfa_size_limit: 1000 * (1 << 20),
size_limit: 100 * (1<<20),
dfa_size_limit: 1000 * (1<<20),
nest_limit: 250,
line_terminator: None,
crlf: false,
@@ -68,17 +67,19 @@ impl Config {
/// If there was a problem parsing the given expression then an error
/// is returned.
pub fn hir(&self, pattern: &str) -> Result<ConfiguredHIR, Error> {
let ast = self.ast(pattern)?;
let analysis = self.analysis(&ast)?;
let expr = hir::translate::TranslatorBuilder::new()
let analysis = self.analysis(pattern)?;
let expr = ::regex_syntax::ParserBuilder::new()
.nest_limit(self.nest_limit)
.octal(self.octal)
.allow_invalid_utf8(true)
.case_insensitive(self.is_case_insensitive(&analysis))
.ignore_whitespace(self.ignore_whitespace)
.case_insensitive(self.is_case_insensitive(&analysis)?)
.multi_line(self.multi_line)
.dot_matches_new_line(self.dot_matches_new_line)
.swap_greed(self.swap_greed)
.unicode(self.unicode)
.build()
.translate(pattern, &ast)
.parse(pattern)
.map_err(Error::regex)?;
let expr = match self.line_terminator {
None => expr,
@@ -87,7 +88,7 @@ impl Config {
Ok(ConfiguredHIR {
original: pattern.to_string(),
config: self.clone(),
analysis,
analysis: analysis,
// If CRLF mode is enabled, replace `$` with `(?:\r?$)`.
expr: if self.crlf { crlfify(expr) } else { expr },
})
@@ -95,32 +96,24 @@ impl Config {
/// Accounting for the `smart_case` config knob, return true if and only if
/// this pattern should be matched case insensitively.
fn is_case_insensitive(&self, analysis: &AstAnalysis) -> bool {
fn is_case_insensitive(
&self,
analysis: &AstAnalysis,
) -> Result<bool, Error> {
if self.case_insensitive {
return true;
return Ok(true);
}
if !self.case_smart {
return false;
return Ok(false);
}
analysis.any_literal() && !analysis.any_uppercase()
}
/// Returns true if and only if this config is simple enough such that
/// if the pattern is a simple alternation of literals, then it can be
/// constructed via a plain Aho-Corasick automaton.
///
/// Note that it is OK to return true even when settings like `multi_line`
/// are enabled, since if multi-line can impact the match semantics of a
/// regex, then it is by definition not a simple alternation of literals.
pub fn can_plain_aho_corasick(&self) -> bool {
!self.word && !self.case_insensitive && !self.case_smart
Ok(analysis.any_literal() && !analysis.any_uppercase())
}
/// Perform analysis on the AST of this pattern.
///
/// This returns an error if the given pattern failed to parse.
fn analysis(&self, ast: &Ast) -> Result<AstAnalysis, Error> {
Ok(AstAnalysis::from_ast(ast))
fn analysis(&self, pattern: &str) -> Result<AstAnalysis, Error> {
Ok(AstAnalysis::from_ast(&self.ast(pattern)?))
}
/// Parse the given pattern into its abstract syntax.
@@ -167,28 +160,11 @@ impl ConfiguredHIR {
non_matching_bytes(&self.expr)
}
/// Returns true if and only if this regex needs to have its match offsets
/// tweaked because of CRLF support. Specifically, this occurs when the
/// CRLF hack is enabled and the regex is line anchored at the end. In
/// this case, matches that end with a `\r` have the `\r` stripped.
pub fn needs_crlf_stripped(&self) -> bool {
self.config.crlf && self.expr.is_line_anchored_end()
}
/// Builds a regular expression from this HIR expression.
pub fn regex(&self) -> Result<Regex, Error> {
self.pattern_to_regex(&self.expr.to_string())
}
/// If this HIR corresponds to an alternation of literals with no
/// capturing groups, then this returns those literals.
pub fn alternation_literals(&self) -> Option<Vec<Vec<u8>>> {
if !self.config.can_plain_aho_corasick() {
return None;
}
alternation_literals(&self.expr)
}
/// Applies the given function to the concrete syntax of this HIR and then
/// generates a new HIR based on the result of the function in a way that
/// preserves the configuration.
@@ -198,7 +174,8 @@ impl ConfiguredHIR {
pub fn with_pattern<F: FnMut(&str) -> String>(
&self,
mut f: F,
) -> Result<ConfiguredHIR, Error> {
) -> Result<ConfiguredHIR, Error>
{
self.pattern_to_hir(&f(&self.expr.to_string()))
}
@@ -222,7 +199,7 @@ impl ConfiguredHIR {
if self.config.line_terminator.is_none() {
return Ok(None);
}
match LiteralSets::new(&self.expr).one_regex(self.config.word) {
match LiteralSets::new(&self.expr).one_regex() {
None => Ok(None),
Some(pattern) => self.pattern_to_regex(&pattern).map(Some),
}
@@ -282,7 +259,7 @@ impl ConfiguredHIR {
original: self.original.clone(),
config: self.config.clone(),
analysis: self.analysis.clone(),
expr,
expr: expr,
})
}
}

Some files were not shown because too many files have changed in this diff Show More