mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-27 10:11:58 -07:00
Compare commits
31 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
9bf7696ec8 | ||
|
cb0f8fd2fa | ||
|
fa8112ec34 | ||
|
cf21b4a97e | ||
|
19615245cd | ||
|
98a48b44bc | ||
|
e3da726836 | ||
|
5b36c86c15 | ||
|
76331e5fec | ||
|
1e678d7052 | ||
|
dd986d7fe9 | ||
|
f83cd63b11 | ||
|
9a4527d107 | ||
|
8f0d3d78ca | ||
|
3f7cd977bc | ||
|
cc6b6dcf5b | ||
|
48878bbb8f | ||
|
0766617e07 | ||
|
afd99c43d7 | ||
|
96e87ab738 | ||
|
a744ec133d | ||
|
0042dce949 | ||
|
ca058d7584 | ||
|
af3b56a623 | ||
|
5938bed339 | ||
|
feff1849c8 | ||
|
9948e0ca07 | ||
|
fd3e5069b6 | ||
|
0891b4a3c0 | ||
|
af48aaa647 | ||
|
ee7f300ae2 |
54
.travis.yml
54
.travis.yml
@@ -1,37 +1,49 @@
|
||||
#language: rust
|
||||
#rust:
|
||||
# - stable
|
||||
# - beta
|
||||
# - nightly
|
||||
#script:
|
||||
# - cargo build --verbose
|
||||
# - cargo doc
|
||||
# - cargo test --verbose
|
||||
# - if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
|
||||
# cargo bench --verbose;
|
||||
# fi
|
||||
|
||||
language: rust
|
||||
cache: cargo
|
||||
|
||||
env:
|
||||
global:
|
||||
- PROJECT_NAME=xrep
|
||||
- PROJECT_NAME=ripgrep
|
||||
matrix:
|
||||
include:
|
||||
# Nightly channel
|
||||
- os: osx
|
||||
rust: nightly
|
||||
env: TARGET=i686-apple-darwin
|
||||
- os: osx
|
||||
rust: nightly
|
||||
env: TARGET=x86_64-apple-darwin
|
||||
# Nightly channel.
|
||||
# (All *nix releases are done on the nightly channel to take advantage
|
||||
# of the regex library's multiple pattern SIMD search.)
|
||||
- os: linux
|
||||
rust: nightly
|
||||
env: TARGET=i686-unknown-linux-musl
|
||||
- os: linux
|
||||
rust: nightly
|
||||
env: TARGET=x86_64-unknown-linux-musl
|
||||
- os: linux
|
||||
rust: nightly
|
||||
env: TARGET=x86_64-unknown-linux-gnu
|
||||
- os: osx
|
||||
rust: nightly
|
||||
env: TARGET=i686-apple-darwin
|
||||
- os: osx
|
||||
rust: nightly
|
||||
env: TARGET=x86_64-apple-darwin
|
||||
# Beta channel.
|
||||
- os: linux
|
||||
rust: beta
|
||||
env: TARGET=x86_64-unknown-linux-musl
|
||||
- os: linux
|
||||
rust: beta
|
||||
env: TARGET=x86_64-unknown-linux-gnu
|
||||
- os: osx
|
||||
rust: beta
|
||||
env: TARGET=x86_64-apple-darwin
|
||||
# Minimum Rust supported channel.
|
||||
- os: linux
|
||||
rust: 1.9.0
|
||||
env: TARGET=x86_64-unknown-linux-musl
|
||||
- os: linux
|
||||
rust: 1.9.0
|
||||
env: TARGET=x86_64-unknown-linux-gnu
|
||||
- os: osx
|
||||
rust: 1.9.0
|
||||
env: TARGET=x86_64-apple-darwin
|
||||
|
||||
before_install:
|
||||
- export PATH="$PATH:$HOME/.cargo/bin"
|
||||
|
14
Cargo.toml
14
Cargo.toml
@@ -1,14 +1,14 @@
|
||||
[package]
|
||||
publish = false
|
||||
name = "xrep"
|
||||
name = "ripgrep"
|
||||
version = "0.1.0" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Line oriented search tool using Rust's regex library.
|
||||
"""
|
||||
documentation = "https://github.com/BurntSushi/xrep"
|
||||
homepage = "https://github.com/BurntSushi/xrep"
|
||||
repository = "https://github.com/BurntSushi/xrep"
|
||||
documentation = "https://github.com/BurntSushi/ripgrep"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
@@ -16,7 +16,11 @@ license = "Unlicense/MIT"
|
||||
[[bin]]
|
||||
bench = false
|
||||
path = "src/main.rs"
|
||||
name = "xrep"
|
||||
name = "rg"
|
||||
|
||||
[[test]]
|
||||
name = "integration"
|
||||
path = "tests/tests.rs"
|
||||
|
||||
[dependencies]
|
||||
crossbeam = "0.2"
|
||||
|
12
appveyor.yml
12
appveyor.yml
@@ -1,6 +1,6 @@
|
||||
environment:
|
||||
global:
|
||||
PROJECT_NAME: xrep
|
||||
PROJECT_NAME: ripgrep
|
||||
matrix:
|
||||
# Nightly channel
|
||||
- TARGET: i686-pc-windows-gnu
|
||||
@@ -32,16 +32,14 @@ build: false
|
||||
# Equivalent to Travis' `script` phase
|
||||
# TODO modify this phase as you see fit
|
||||
test_script:
|
||||
- cargo build --verbose
|
||||
- cargo test
|
||||
- cargo test --verbose
|
||||
|
||||
before_deploy:
|
||||
# Generate artifacts for release
|
||||
- SET RUSTFLAGS="-C target-feature=+ssse3"
|
||||
- cargo build --release --features simd-accel
|
||||
# TODO(burntsushi): How can we enable SSSE3 on Windows?
|
||||
- cargo build --release
|
||||
- mkdir staging
|
||||
# TODO update this part to copy the artifacts that make sense for your project
|
||||
- copy target\release\xrep.exe staging
|
||||
- copy target\release\rg.exe staging
|
||||
- cd staging
|
||||
# release zipfile will look like 'rust-everywhere-v1.2.3-x86_64-pc-windows-msvc'
|
||||
- 7z a ../%PROJECT_NAME%-%APPVEYOR_REPO_TAG_NAME%-%TARGET%.zip *
|
||||
|
@@ -1,3 +1,7 @@
|
||||
/*!
|
||||
This module benchmarks the glob implementation. For benchmarks on the ripgrep
|
||||
tool itself, see the benchsuite directory.
|
||||
*/
|
||||
#![feature(test)]
|
||||
|
||||
extern crate glob;
|
||||
|
918
benchsuite
Executable file
918
benchsuite
Executable file
@@ -0,0 +1,918 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
'''
|
||||
benchsuite is a benchmark runner for comparing command line search tools.
|
||||
'''
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import os
|
||||
import os.path as path
|
||||
from multiprocessing import cpu_count
|
||||
import re
|
||||
import statistics
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
# Some constants for identifying the corpora we use to run tests.
|
||||
# We establish two very different kinds of corpora: a small number of large
|
||||
# files and a large number of small files. These are vastly different use cases
|
||||
# not only because of their performance characteristics, but also the
|
||||
# strategies used to increase the relevance of results returned.
|
||||
|
||||
SUBTITLES_DIR = 'subtitles'
|
||||
SUBTITLES_EN_NAME = 'OpenSubtitles2016.raw.en'
|
||||
SUBTITLES_EN_NAME_GZ = '%s.gz' % SUBTITLES_EN_NAME
|
||||
SUBTITLES_EN_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.en.gz'
|
||||
SUBTITLES_RU_NAME = 'OpenSubtitles2016.raw.ru'
|
||||
SUBTITLES_RU_NAME_GZ = '%s.gz' % SUBTITLES_RU_NAME
|
||||
SUBTITLES_RU_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.ru.gz'
|
||||
|
||||
LINUX_DIR = 'linux'
|
||||
LINUX_CLONE = 'git://github.com/BurntSushi/linux'
|
||||
|
||||
|
||||
def bench_linux_literal_default(suite_dir):
|
||||
'''
|
||||
Benchmark the speed of a literal using *default* settings.
|
||||
|
||||
This is a purposefully unfair benchmark for use in performance
|
||||
analysis, but it is pedagogically useful.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = 'PM_RESUME'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
# N.B. This is a purposefully unfair benchmark for illustrative purposes
|
||||
# of how the default modes for each search tool differ.
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', pat]),
|
||||
mkcmd('ag', ['ag', pat]),
|
||||
# ucg reports the exact same matches as ag and rg even though it
|
||||
# doesn't read gitignore files. Instead, it has a file whitelist
|
||||
# that happens to match up exactly with the gitignores for this search.
|
||||
mkcmd('ucg', ['ucg', pat]),
|
||||
mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'C'}),
|
||||
mkcmd('pt', ['pt', pat]),
|
||||
# sift reports an extra line here for a binary file matched.
|
||||
mkcmd('sift', ['sift', pat]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_literal(suite_dir):
|
||||
'''
|
||||
Benchmark the speed of a literal, attempting to be fair.
|
||||
|
||||
This tries to use the minimum set of options available in all tools
|
||||
to test how fast they are. For example, it makes sure there is no
|
||||
case insensitive matching and that line numbers are computed.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = 'PM_RESUME'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
|
||||
mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
|
||||
mkcmd('ag', ['ag', '-s', pat]),
|
||||
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
|
||||
mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
|
||||
mkcmd('git grep', [
|
||||
'git', 'grep', '-I', '-n', pat,
|
||||
], env={'LC_ALL': 'C'}),
|
||||
mkcmd('pt', ['pt', pat]),
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_literal_casei(suite_dir):
|
||||
'''
|
||||
Benchmark the speed of a case insensitive literal search.
|
||||
|
||||
This is like the linux_literal benchmark, except we ask the
|
||||
search tools to do case insensitive search.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = 'PM_RESUME'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', '-i', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
|
||||
mkcmd('rg-novcs-mmap', [
|
||||
'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
|
||||
]),
|
||||
mkcmd('ag', ['ag', '-i', pat]),
|
||||
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-i', pat]),
|
||||
mkcmd('ucg', ['ucg', '-i', pat]),
|
||||
mkcmd('git grep', [
|
||||
'git', 'grep', '-I', '-n', '-i', pat,
|
||||
], env={'LC_ALL': 'C'}),
|
||||
# sift yields more matches than it should here. Specifically, it gets
|
||||
# matches in Module.symvers and System.map in the repo root. Both of
|
||||
# those files show up in the repo root's .gitignore file.
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-i', pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_re_literal_suffix(suite_dir):
|
||||
'''
|
||||
Benchmark the speed of a literal inside a regex.
|
||||
|
||||
This, for example, inhibits a prefix byte optimization used
|
||||
inside of Go's regex engine (relevant for sift and pt).
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = '[A-Z]+_RESUME'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
|
||||
mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
|
||||
mkcmd('ag', ['ag', '-s', pat]),
|
||||
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
|
||||
mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_word(suite_dir):
|
||||
'''
|
||||
Benchmark use of the -w ("match word") flag in each tool.
|
||||
|
||||
sift has a lot of trouble with this because it forces it into Go's
|
||||
regex engine by surrounding the pattern with \b assertions.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = 'PM_RESUME'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', '-w', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-w', pat]),
|
||||
mkcmd('rg-novcs-mmap', [
|
||||
'rg', '--mmap', '--no-ignore', '-n', '-w', pat,
|
||||
]),
|
||||
mkcmd('ag', ['ag', '-s', '-w', pat]),
|
||||
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', '-w', pat]),
|
||||
mkcmd('ucg', ['ucg', '--nosmart-case', '-w', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
['git', 'grep', '-E', '-I', '-n', '-w', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-w', pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_unicode_greek(suite_dir):
|
||||
'''
|
||||
Benchmark matching of a Unicode category.
|
||||
|
||||
Only three tools (ripgrep, sift and pt) support this.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = r'\p{Greek}'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
# sift tries to search a bunch of PDF files and clutters up the
|
||||
# results, even though --binary-skip is provided. They are excluded
|
||||
# here explicitly, but don't have a measurable impact on performance.
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip',
|
||||
'--exclude-files', '.*',
|
||||
'--exclude-files', '*.pdf',
|
||||
pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_unicode_greek_casei(suite_dir):
|
||||
'''
|
||||
Benchmark matching of a Unicode category, case insensitively.
|
||||
|
||||
Only ripgrep gets this right (and it's still fast).
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = r'\p{Greek}'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', '-i', pat]),
|
||||
# sift tries to search a bunch of PDF files and clutters up the
|
||||
# results, even though --binary-skip is provided. They are excluded
|
||||
# here explicitly, but don't have a measurable impact on performance.
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip',
|
||||
'--exclude-files', '.*',
|
||||
'--exclude-files', '*.pdf',
|
||||
pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_unicode_word(suite_dir):
|
||||
'''
|
||||
Benchmark Unicode aware \w character class.
|
||||
|
||||
Only ripgrep and git-grep (with LC_ALL=en_US.UTF-8) actually get
|
||||
this right. Everything else uses the standard ASCII interpretation
|
||||
of \w.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = r'\wAh'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
|
||||
mkcmd('rg-novcs-mmap', [
|
||||
'rg', '--mmap', '--no-ignore', '-n', pat,
|
||||
]),
|
||||
mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
|
||||
mkcmd('ag-novcs (no Unicode)', [
|
||||
'ag', '--skip-vcs-ignores', '-s', pat,
|
||||
]),
|
||||
mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'en_US.UTF-8'},
|
||||
),
|
||||
mkcmd(
|
||||
'git grep (no Unicode)',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
mkcmd('sift (no Unicode)', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_no_literal(suite_dir):
|
||||
'''
|
||||
Benchmark a regex that defeats all literal optimizations.
|
||||
|
||||
Most search patterns have some kind of literal in them, which
|
||||
typically permits searches to take some shortcuts. Therefore, the
|
||||
applicability of this benchmark is somewhat suspicious, but the
|
||||
suite wouldn't feel complete without it.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = r'\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
|
||||
mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
|
||||
mkcmd('rg-novcs (no Unicode)', [
|
||||
'rg', '--no-ignore', '-n', '(?-u)' + pat,
|
||||
]),
|
||||
mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
|
||||
mkcmd('ag-novcs (no Unicode)', [
|
||||
'ag', '--skip-vcs-ignores', '-s', pat,
|
||||
]),
|
||||
mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'en_US.UTF-8'},
|
||||
),
|
||||
mkcmd(
|
||||
'git grep (no Unicode)',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
mkcmd('sift (no Unicode)', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_alternates(suite_dir):
|
||||
'''
|
||||
Benchmark a small alternation of literals.
|
||||
|
||||
sift doesn't make the cut. It's more than 10x slower than the next
|
||||
fastest result. The slowdown is likely because the Go regexp engine
|
||||
doesn't do any literal optimizations for this case (there is no
|
||||
common leading byte).
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = 'ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
|
||||
mkcmd('rg-novcs-mmap', [
|
||||
'rg', '--mmap', '--no-ignore', '-n', pat,
|
||||
]),
|
||||
mkcmd('ag', ['ag', '-s', pat]),
|
||||
mkcmd('ag-novcs', [
|
||||
'ag', '--skip-vcs-ignores', '-s', pat,
|
||||
]),
|
||||
mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_alternates_casei(suite_dir):
|
||||
'Benchmark a small alternation of literals case insensitively.'
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = 'ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', '-i', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
|
||||
mkcmd('rg-novcs-mmap', [
|
||||
'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
|
||||
]),
|
||||
mkcmd('ag', ['ag', '-i', pat]),
|
||||
mkcmd('ag-novcs', [
|
||||
'ag', '--skip-vcs-ignores', '-i', pat,
|
||||
]),
|
||||
mkcmd('ucg', ['ucg', '-i', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
['git', 'grep', '-E', '-I', '-n', '-i', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
])
|
||||
|
||||
|
||||
# BREADCRUMBS(burntsushi): We should benchmark an alternation for `linux` as
|
||||
# well.
|
||||
|
||||
def bench_sherlock(suite_dir):
|
||||
'TODO: Fix this and add more single file benchmarks.'
|
||||
require(suite_dir, 'subtitles-en')
|
||||
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME)
|
||||
pat = 'Sherlock'
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', pat, en]),
|
||||
Command('grep', ['grep', '-a', pat, en])
|
||||
])
|
||||
|
||||
|
||||
class MissingDependencies(Exception):
|
||||
'''
|
||||
A missing dependency exception.
|
||||
|
||||
This exception occurs when running a benchmark that requires a
|
||||
particular corpus that isn't available.
|
||||
|
||||
:ivar list(str) missing_names:
|
||||
A list of missing dependency names. These names correspond to
|
||||
names that can be used with the --download flag.
|
||||
'''
|
||||
def __init__(self, missing_names):
|
||||
self.missing_names = missing_names
|
||||
|
||||
def __str__(self):
|
||||
return 'MissingDependency(%s)' % repr(self.missing_names)
|
||||
|
||||
|
||||
class Benchmark(object):
|
||||
'''
|
||||
A single benchmark corresponding to a grouping of commands.
|
||||
|
||||
The main purpose of a benchmark is to compare the performance
|
||||
characteristics of a group of commands.
|
||||
'''
|
||||
|
||||
def __init__(self, name=None, pattern=None, commands=None,
|
||||
warmup_count=1, count=3, line_count=True):
|
||||
'''
|
||||
Create a single benchmark.
|
||||
|
||||
A single benchmark is composed of a set of commands that are
|
||||
benchmarked and compared against one another. A benchmark may
|
||||
have multiple commands that use the same search tool (but
|
||||
probably should have something differentiating them).
|
||||
|
||||
The grouping of commands is a purely human driven process.
|
||||
|
||||
By default, the output of every command is sent to /dev/null.
|
||||
Other types of behavior are available via the methods defined
|
||||
on this benchmark.
|
||||
|
||||
:param str name:
|
||||
A human readable string denoting the name of this
|
||||
benchmark.
|
||||
:param str pattern:
|
||||
The pattern that is used in search.
|
||||
:param list(Command) commands:
|
||||
A list of commands to initialize this benchmark with. More
|
||||
commands may be added before running the benchmark.
|
||||
:param int warmup_count:
|
||||
The number of times to run each command before recording
|
||||
samples.
|
||||
:param int count:
|
||||
The number of samples to collect from each command.
|
||||
:param bool line_count:
|
||||
When set, the lines of each search are counted and included
|
||||
in the samples produced.
|
||||
'''
|
||||
self.name = name
|
||||
self.pattern = pattern
|
||||
self.commands = commands or []
|
||||
self.warmup_count = warmup_count
|
||||
self.count = count
|
||||
self.line_count = line_count
|
||||
|
||||
def run(self):
|
||||
'''
|
||||
Runs this benchmark and returns the results.
|
||||
|
||||
:rtype: Result
|
||||
'''
|
||||
result = Result(self)
|
||||
for cmd in self.commands:
|
||||
# Do a warmup first.
|
||||
for _ in range(self.warmup_count):
|
||||
self.run_one(cmd)
|
||||
for _ in range(self.count):
|
||||
result.add(cmd, **self.run_one(cmd))
|
||||
return result
|
||||
|
||||
def run_one(self, cmd):
|
||||
'''
|
||||
Runs the given command exactly once.
|
||||
|
||||
Returns an object that includes the time taken by the command.
|
||||
If this benchmark was configured to count the number of lines
|
||||
returned, then the line count is also returned.
|
||||
|
||||
:param Command cmd: The command to run.
|
||||
:returns:
|
||||
A dict with two fields, duration and line_count.
|
||||
The duration is in seconds, with fractional milliseconds,
|
||||
and is guaranteed to be available. The line_count is set
|
||||
to None unless line counting is enabled, in which case,
|
||||
it is the number of lines in the search output.
|
||||
:rtype: int
|
||||
'''
|
||||
cmd.kwargs['stderr'] = subprocess.DEVNULL
|
||||
if self.line_count:
|
||||
cmd.kwargs['stdout'] = subprocess.PIPE
|
||||
else:
|
||||
cmd.kwargs['stdout'] = subprocess.DEVNULL
|
||||
|
||||
start = time.time()
|
||||
completed = cmd.run()
|
||||
end = time.time()
|
||||
|
||||
line_count = None
|
||||
if self.line_count:
|
||||
line_count = completed.stdout.count(b'\n')
|
||||
return {
|
||||
'duration': end - start,
|
||||
'line_count': line_count,
|
||||
}
|
||||
|
||||
|
||||
class Result(object):
|
||||
'''
|
||||
The result of running a benchmark.
|
||||
|
||||
Benchmark results consist of a set of samples, where each sample
|
||||
corresponds to a single run of a single command in the benchmark.
|
||||
Various statistics can be computed from these samples such as mean
|
||||
and standard deviation.
|
||||
'''
|
||||
def __init__(self, benchmark):
|
||||
'''
|
||||
Create a new set of results, initially empty.
|
||||
|
||||
:param Benchmarl benchmark:
|
||||
The benchmark that produced these results.
|
||||
'''
|
||||
self.benchmark = benchmark
|
||||
self.samples = []
|
||||
|
||||
def add(self, cmd, duration, line_count=None):
|
||||
'''
|
||||
Add a new sample to this result set.
|
||||
|
||||
:param Command cmd:
|
||||
The command that produced this sample.
|
||||
:param int duration:
|
||||
The duration, in milliseconds, that the command took to
|
||||
run.
|
||||
:param int line_count:
|
||||
The number of lines in the search output. This is optional.
|
||||
'''
|
||||
self.samples.append({
|
||||
'cmd': cmd,
|
||||
'duration': duration,
|
||||
'line_count': line_count,
|
||||
})
|
||||
|
||||
def fastest_sample(self):
|
||||
'''
|
||||
Returns the fastest recorded sample.
|
||||
'''
|
||||
return min(self.samples, key=lambda s: s['duration'])
|
||||
|
||||
def fastest_cmd(self):
|
||||
'''
|
||||
Returns the fastest command according to distribution.
|
||||
'''
|
||||
means = []
|
||||
for cmd in self.benchmark.commands:
|
||||
mean, _ = self.distribution_for(cmd)
|
||||
means.append((cmd, mean))
|
||||
return min(means, key=lambda tup: tup[1])[0]
|
||||
|
||||
def samples_for(self, cmd):
|
||||
'Returns an iterable of samples for cmd'
|
||||
yield from (s for s in self.samples if s['cmd'].name == cmd.name)
|
||||
|
||||
def line_counts_for(self, cmd):
|
||||
'''
|
||||
Returns the line counts recorded for each command.
|
||||
|
||||
:returns:
|
||||
A dictionary from command name to a set of line
|
||||
counts recorded.
|
||||
'''
|
||||
return {s['line_count'] for s in self.samples_for(cmd)
|
||||
if s['line_count'] is not None}
|
||||
|
||||
def distribution_for(self, cmd):
|
||||
'''
|
||||
Returns the distribution (mean +/- std) of the given command.
|
||||
|
||||
:rtype: (float, float)
|
||||
:returns:
|
||||
A tuple containing the mean and standard deviation, in that
|
||||
order.
|
||||
'''
|
||||
mean = statistics.mean(
|
||||
s['duration'] for s in self.samples_for(cmd))
|
||||
stdev = statistics.stdev(
|
||||
s['duration'] for s in self.samples_for(cmd))
|
||||
return mean, stdev
|
||||
|
||||
|
||||
class Command(object):
|
||||
def __init__(self, name, cmd, *args, **kwargs):
|
||||
'''
|
||||
Create a new command that is run as part of a benchmark.
|
||||
|
||||
*args and **kwargs are passed directly to ``subprocess.run``.
|
||||
An exception to this is stdin/stdout/stderr. Output
|
||||
redirection is completely controlled by the benchmark harness.
|
||||
Trying to set them here will trigger an assert.
|
||||
|
||||
:param str name:
|
||||
The human readable name of this command. This is
|
||||
particularly useful if the same search tool is used
|
||||
multiple times in the same benchmark with different
|
||||
arguments.
|
||||
:param list(str) cmd:
|
||||
The command to run as a list of arguments (including the
|
||||
command name itself).
|
||||
'''
|
||||
assert 'stdin' not in kwargs
|
||||
assert 'stdout' not in kwargs
|
||||
assert 'stderr' not in kwargs
|
||||
self.name = name
|
||||
self.cmd = cmd
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
def run(self):
|
||||
'''
|
||||
Runs this command and returns its status.
|
||||
|
||||
:rtype: subprocess.CompletedProcess
|
||||
'''
|
||||
return subprocess.run(self.cmd, *self.args, **self.kwargs)
|
||||
|
||||
|
||||
def eprint(*args, **kwargs):
|
||||
'Like print, but to stderr.'
|
||||
kwargs['file'] = sys.stderr
|
||||
print(*args, **kwargs)
|
||||
|
||||
|
||||
def run_cmd(cmd, *args, **kwargs):
|
||||
'''
|
||||
Print the command to stderr and run it.
|
||||
|
||||
If the command fails, throw a traceback.
|
||||
'''
|
||||
eprint('# %s' % ' '.join(cmd))
|
||||
kwargs['check'] = True
|
||||
return subprocess.run(cmd, *args, **kwargs)
|
||||
|
||||
|
||||
def require(suite_dir, *names):
|
||||
'''
|
||||
Declare a dependency on the given names for a benchmark.
|
||||
|
||||
If any dependency doesn't exist, then fail with an error message.
|
||||
'''
|
||||
errs = []
|
||||
for name in names:
|
||||
fun_name = name.replace('-', '_')
|
||||
if not globals()['has_%s' % fun_name](suite_dir):
|
||||
errs.append(name)
|
||||
if len(errs) > 0:
|
||||
raise MissingDependencies(errs)
|
||||
|
||||
|
||||
def download_linux(suite_dir):
|
||||
'Download and build the Linux kernel.'
|
||||
checkout_dir = path.join(suite_dir, LINUX_DIR)
|
||||
if not os.path.isdir(checkout_dir):
|
||||
# Clone from my fork so that we always get the same corpus *and* still
|
||||
# do a shallow clone. Shallow clones are much much cheaper than full
|
||||
# clones.
|
||||
run_cmd(['git', 'clone', '--depth', '1', LINUX_CLONE, checkout_dir])
|
||||
# We want to build the kernel because the process of building it produces
|
||||
# a lot of junk in the repository that a search tool probably shouldn't
|
||||
# touch.
|
||||
if not os.path.exists(path.join(checkout_dir, 'vmlinux')):
|
||||
eprint('# Building Linux kernel...')
|
||||
run_cmd(['make', 'defconfig'], cwd=checkout_dir)
|
||||
run_cmd(['make', '-j', str(cpu_count())], cwd=checkout_dir)
|
||||
|
||||
|
||||
def has_linux(suite_dir):
|
||||
'Returns true if we believe the Linux kernel is built.'
|
||||
checkout_dir = path.join(suite_dir, LINUX_DIR)
|
||||
return path.exists(path.join(checkout_dir, 'vmlinux'))
|
||||
|
||||
|
||||
def download_subtitles_en(suite_dir):
|
||||
'Download and decompress English subtitles.'
|
||||
subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
|
||||
en_path_gz = path.join(subtitle_dir, SUBTITLES_EN_NAME_GZ)
|
||||
en_path = path.join(subtitle_dir, SUBTITLES_EN_NAME)
|
||||
|
||||
if not os.path.isdir(subtitle_dir):
|
||||
os.makedirs(subtitle_dir)
|
||||
if not os.path.exists(en_path):
|
||||
if not os.path.exists(en_path_gz):
|
||||
run_cmd(['curl', '-LO', SUBTITLES_EN_URL], cwd=subtitle_dir)
|
||||
run_cmd(['gunzip', en_path_gz], cwd=subtitle_dir)
|
||||
|
||||
|
||||
def has_subtitles_en(suite_dir):
|
||||
'Returns true if English subtitles have been downloaded.'
|
||||
subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
|
||||
return path.exists(path.join(subtitle_dir, SUBTITLES_EN_NAME))
|
||||
|
||||
|
||||
def download_subtitles_ru(suite_dir):
|
||||
'Download and decompress Russian subtitles.'
|
||||
subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
|
||||
ru_path_gz = path.join(subtitle_dir, SUBTITLES_RU_NAME_GZ)
|
||||
ru_path = path.join(subtitle_dir, SUBTITLES_RU_NAME)
|
||||
|
||||
if not os.path.isdir(subtitle_dir):
|
||||
os.makedirs(subtitle_dir)
|
||||
if not os.path.exists(ru_path):
|
||||
if not os.path.exists(ru_path_gz):
|
||||
run_cmd(['curl', '-LO', SUBTITLES_RU_URL], cwd=subtitle_dir)
|
||||
run_cmd(['gunzip', ru_path_gz], cwd=subtitle_dir)
|
||||
|
||||
|
||||
def has_subtitles_ru(suite_dir):
|
||||
'Returns true if Russian subtitles have been downloaded.'
|
||||
subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
|
||||
return path.exists(path.join(subtitle_dir, SUBTITLES_RU_NAME))
|
||||
|
||||
|
||||
def download(suite_dir, choices):
|
||||
'''
|
||||
Download choices into suite_dir.
|
||||
|
||||
Specifically, choices specifies a list of corpora to fetch.
|
||||
|
||||
:param str suite_dir:
|
||||
The directory in which to download corpora.
|
||||
:param list(str) choices:
|
||||
A list of corpora to download. Available choices are:
|
||||
all, linux, subtitles-en, subtitles-ru.
|
||||
'''
|
||||
for choice in args.download:
|
||||
if choice == 'linux':
|
||||
download_linux(suite_dir)
|
||||
elif choice == 'subtitles-en':
|
||||
download_subtitles_en(suite_dir)
|
||||
elif choice == 'subtitles-ru':
|
||||
download_subtitles_ru(suite_dir)
|
||||
elif choice == 'all':
|
||||
download_linux(suite_dir)
|
||||
download_subtitles_en(suite_dir)
|
||||
download_subtitles_ru(suite_dir)
|
||||
else:
|
||||
eprint('Unrecognized download choice: %s' % choice)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def collect_benchmarks(suite_dir, filter_pat=None):
|
||||
'''
|
||||
Return an iterable of all runnable benchmarks.
|
||||
|
||||
:param str suite_dir:
|
||||
The directory containing corpora.
|
||||
:param str filter_pat:
|
||||
A single regular expression that is used to filter benchmarks
|
||||
by their name. When not specified, all benchmarks are run.
|
||||
:returns:
|
||||
An iterable over all runnable benchmarks. If a benchmark
|
||||
requires corpora that are missing, then a log message is
|
||||
emitted to stderr and it is not yielded.
|
||||
'''
|
||||
for fun in sorted(globals()):
|
||||
if not fun.startswith('bench_'):
|
||||
continue
|
||||
name = re.sub('^bench_', '', fun)
|
||||
if filter_pat is not None and not re.search(filter_pat, name):
|
||||
continue
|
||||
try:
|
||||
benchmark = globals()[fun](suite_dir)
|
||||
except MissingDependencies as e:
|
||||
eprint(
|
||||
'missing: %s, skipping benchmark %s (try running with: %s)' % (
|
||||
', '.join(e.missing_names),
|
||||
name,
|
||||
' '.join(['--download %s' % n for n in e.missing_names]),
|
||||
))
|
||||
continue
|
||||
benchmark.name = name
|
||||
yield benchmark
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser('Command line search tool benchmark suite.')
|
||||
p.add_argument(
|
||||
'--dir', metavar='PATH', default=os.getcwd(),
|
||||
help='The directory in which to download data and perform searches.')
|
||||
p.add_argument(
|
||||
'--download', metavar='CORPUS', action='append',
|
||||
choices=['all', 'linux', 'subtitles-en', 'subtitles-ru'],
|
||||
help='Download and prepare corpus data, then exit without running '
|
||||
'any benchmarks. Note that this command is intended to be '
|
||||
'idempotent. WARNING: This downloads over a gigabyte of data, '
|
||||
'and also includes building the Linux kernel. If "all" is used '
|
||||
'then the total uncompressed size is around 13 GB.')
|
||||
p.add_argument(
|
||||
'-f', '--force', action='store_true',
|
||||
help='Overwrite existing files if there is a conflict.')
|
||||
p.add_argument(
|
||||
'--list', action='store_true',
|
||||
help='List available benchmarks by name.')
|
||||
p.add_argument(
|
||||
'--raw', metavar='PATH',
|
||||
help='Dump raw data (all samples collected) in CSV format to the '
|
||||
'file path provided.')
|
||||
p.add_argument(
|
||||
'bench', metavar='PAT', nargs='?',
|
||||
help='A regex pattern that will only run benchmarks that match.')
|
||||
args = p.parse_args()
|
||||
|
||||
if args.download is not None and len(args.download) > 0:
|
||||
download(args.dir, args.choices)
|
||||
sys.exit(0)
|
||||
|
||||
if not path.isdir(args.dir):
|
||||
os.makedirs(args.dir)
|
||||
if args.raw is not None and path.exists(args.raw) and not args.force:
|
||||
eprint('File %s already exists (delete it or use --force)' % args.raw)
|
||||
sys.exit(1)
|
||||
raw_handle, raw_csv_wtr = None, None
|
||||
if args.raw is not None:
|
||||
fields = [
|
||||
'benchmark', 'warmup_iter', 'iter',
|
||||
'name', 'command', 'duration', 'lines', 'env',
|
||||
]
|
||||
raw_handle = open(args.raw, 'w+')
|
||||
raw_csv_wtr = csv.DictWriter(raw_handle, fields)
|
||||
raw_csv_wtr.writerow({x: x for x in fields})
|
||||
|
||||
benchmarks = collect_benchmarks(args.dir, filter_pat=args.bench)
|
||||
for i, b in enumerate(benchmarks):
|
||||
result = b.run()
|
||||
fastest_cmd = result.fastest_cmd()
|
||||
fastest_sample = result.fastest_sample()
|
||||
max_name_len = max(len(cmd.name) for cmd in b.commands)
|
||||
|
||||
if i > 0:
|
||||
print()
|
||||
header = '%s (pattern: %s)' % (b.name, b.pattern)
|
||||
print('%s\n%s' % (header, '-' * len(header)))
|
||||
for cmd in b.commands:
|
||||
name = cmd.name
|
||||
mean, stdev = result.distribution_for(cmd)
|
||||
line_counts = result.line_counts_for(cmd)
|
||||
show_fast_cmd, show_line_counts = '', ''
|
||||
if fastest_cmd.name == cmd.name:
|
||||
show_fast_cmd = '*'
|
||||
if fastest_sample['cmd'].name == cmd.name:
|
||||
name += '*'
|
||||
if len(line_counts) > 0:
|
||||
counts = map(str, line_counts)
|
||||
show_line_counts = ' (lines: %s)' % ', '.join(counts)
|
||||
fmt = '{name:{pad}} {mean:0.3f} +/- {stdev:0.3f}{lines}{fast_cmd}'
|
||||
print(fmt.format(
|
||||
name=name, pad=max_name_len + 2, fast_cmd=show_fast_cmd,
|
||||
mean=mean, stdev=stdev, lines=show_line_counts))
|
||||
sys.stdout.flush()
|
||||
|
||||
if raw_csv_wtr is not None:
|
||||
for sample in result.samples:
|
||||
cmd, duration = sample['cmd'], sample['duration']
|
||||
env = ' '.join(['%s=%s' % (k, v)
|
||||
for k, v in cmd.kwargs.get('env', {}).items()])
|
||||
raw_csv_wtr.writerow({
|
||||
'benchmark': b.name,
|
||||
'warmup_iter': b.warmup_count,
|
||||
'iter': b.count,
|
||||
'name': sample['cmd'].name,
|
||||
'command': ' '.join(cmd.cmd),
|
||||
'duration': duration,
|
||||
'lines': sample['line_count'] or '',
|
||||
'env': env,
|
||||
})
|
||||
raw_handle.flush()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@@ -6,23 +6,22 @@ set -ex
|
||||
|
||||
# Generate artifacts for release
|
||||
mk_artifacts() {
|
||||
RUSTFLAGS="-C target-feature=+ssse3" cargo build --target $TARGET --release --features simd-accel
|
||||
RUSTFLAGS="-C target-feature=+ssse3" \
|
||||
cargo build --target $TARGET --release --features simd-accel
|
||||
}
|
||||
|
||||
mk_tarball() {
|
||||
# create a "staging" directory
|
||||
local td=$(mktempd)
|
||||
local out_dir=$(pwd)
|
||||
local name="${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}"
|
||||
mkdir "$td/$name"
|
||||
|
||||
# TODO update this part to copy the artifacts that make sense for your project
|
||||
# NOTE All Cargo build artifacts will be under the 'target/$TARGET/{debug,release}'
|
||||
cp target/$TARGET/release/xrep $td
|
||||
cp target/$TARGET/release/rg "$td/$name/"
|
||||
cp {README.md,UNLICENSE,COPYING,LICENSE-MIT} "$td/$name/"
|
||||
|
||||
pushd $td
|
||||
|
||||
# release tarball will look like 'rust-everywhere-v1.2.3-x86_64-unknown-linux-gnu.tar.gz'
|
||||
tar czf $out_dir/${PROJECT_NAME}-${TRAVIS_TAG}-${TARGET}.tar.gz *
|
||||
|
||||
tar czf "$out_dir/$name.tar.gz" *
|
||||
popd
|
||||
rm -r $td
|
||||
}
|
||||
|
28
ci/script.sh
28
ci/script.sh
@@ -11,42 +11,20 @@ disable_cross_doctests() {
|
||||
if [ "$TRAVIS_OS_NAME" = "osx" ]; then
|
||||
brew install gnu-sed --default-names
|
||||
fi
|
||||
|
||||
find src -name '*.rs' -type f | xargs sed -i -e 's:\(//.\s*```\):\1 ignore,:g'
|
||||
fi
|
||||
}
|
||||
|
||||
# TODO modify this function as you see fit
|
||||
# PROTIP Always pass `--target $TARGET` to cargo commands, this makes cargo output build artifacts
|
||||
# to target/$TARGET/{debug,release} which can reduce the number of needed conditionals in the
|
||||
# `before_deploy`/packaging phase
|
||||
run_test_suite() {
|
||||
case $TARGET in
|
||||
# configure emulation for transparent execution of foreign binaries
|
||||
aarch64-unknown-linux-gnu)
|
||||
export QEMU_LD_PREFIX=/usr/aarch64-linux-gnu
|
||||
;;
|
||||
arm*-unknown-linux-gnueabihf)
|
||||
export QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ ! -z "$QEMU_LD_PREFIX" ]; then
|
||||
# Run tests on a single thread when using QEMU user emulation
|
||||
export RUST_TEST_THREADS=1
|
||||
fi
|
||||
|
||||
cargo build --target $TARGET --verbose
|
||||
cargo test --target $TARGET
|
||||
cargo test --target $TARGET --verbose
|
||||
|
||||
# sanity check the file type
|
||||
file target/$TARGET/debug/xrep
|
||||
file target/$TARGET/debug/rg
|
||||
}
|
||||
|
||||
main() {
|
||||
disable_cross_doctests
|
||||
# disable_cross_doctests
|
||||
run_test_suite
|
||||
}
|
||||
|
||||
|
1
compile
1
compile
@@ -1,4 +1,5 @@
|
||||
#!/bin/sh
|
||||
|
||||
export RUSTFLAGS="-C target-feature=+ssse3"
|
||||
# export RUSTFLAGS="-C target-cpu=native"
|
||||
cargo build --release --features simd-accel
|
||||
|
@@ -6,14 +6,15 @@ authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Fast line oriented regex searching as a library.
|
||||
"""
|
||||
documentation = "https://github.com/BurntSushi/xrep"
|
||||
homepage = "https://github.com/BurntSushi/xrep"
|
||||
repository = "https://github.com/BurntSushi/xrep"
|
||||
documentation = "https://github.com/BurntSushi/ripgrep"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[dependencies]
|
||||
log = "0.3"
|
||||
memchr = "0.1"
|
||||
memmap = "0.2"
|
||||
regex = "0.1.75"
|
||||
|
@@ -4,6 +4,8 @@
|
||||
A fast line oriented regex searcher.
|
||||
*/
|
||||
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate regex;
|
||||
extern crate regex_syntax as syntax;
|
||||
|
@@ -1,13 +1,22 @@
|
||||
/*!
|
||||
The literals module is responsible for extracting *inner* literals out of the
|
||||
AST of a regular expression. Normally this is the job of the regex engine
|
||||
itself, but the regex engine doesn't look for inner literals. Since we're doing
|
||||
line based searching, we can use them, so we need to do it ourselves.
|
||||
|
||||
Note that this implementation is incredibly suspicious. We need something more
|
||||
principled.
|
||||
*/
|
||||
use std::cmp;
|
||||
use std::iter;
|
||||
|
||||
use regex::bytes::Regex;
|
||||
use syntax::{
|
||||
Expr, Literals, Lit,
|
||||
Repeater,
|
||||
ByteClass, ByteRange, CharClass, ClassRange, Repeater,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct LiteralSets {
|
||||
prefixes: Literals,
|
||||
suffixes: Literals,
|
||||
@@ -27,6 +36,7 @@ impl LiteralSets {
|
||||
|
||||
pub fn to_regex(&self) -> Option<Regex> {
|
||||
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
||||
debug!("literal prefixes detected: {:?}", self.prefixes);
|
||||
// When this is true, the regex engine will do a literal scan.
|
||||
return None;
|
||||
}
|
||||
@@ -56,13 +66,27 @@ impl LiteralSets {
|
||||
if suf_lcs.len() > lit.len() {
|
||||
lit = suf_lcs;
|
||||
}
|
||||
if req.len() > lit.len() {
|
||||
if req_lits.len() == 1 && req.len() > lit.len() {
|
||||
lit = req;
|
||||
}
|
||||
if lit.is_empty() {
|
||||
|
||||
// Special case: if we detected an alternation of inner required
|
||||
// literals and its longest literal is bigger than the longest
|
||||
// prefix/suffix, then choose the alternation. In practice, this
|
||||
// helps with case insensitive matching, which can generate lots of
|
||||
// inner required literals.
|
||||
let any_empty = req_lits.iter().any(|lit| lit.is_empty());
|
||||
if req.len() > lit.len() && req_lits.len() > 1 && !any_empty {
|
||||
debug!("required literals found: {:?}", req_lits);
|
||||
let alts: Vec<String> =
|
||||
req_lits.into_iter().map(|x| bytes_to_regex(x)).collect();
|
||||
// Literals always compile.
|
||||
Some(Regex::new(&alts.join("|")).unwrap())
|
||||
} else if lit.is_empty() {
|
||||
None
|
||||
} else {
|
||||
// Literals always compile.
|
||||
debug!("required literal found: {:?}", show(lit));
|
||||
Some(Regex::new(&bytes_to_regex(lit)).unwrap())
|
||||
}
|
||||
}
|
||||
@@ -75,14 +99,30 @@ fn union_required(expr: &Expr, lits: &mut Literals) {
|
||||
let s: String = chars.iter().cloned().collect();
|
||||
lits.cross_add(s.as_bytes());
|
||||
}
|
||||
Literal { casei: true, .. } => {
|
||||
lits.cut();
|
||||
Literal { ref chars, casei: true } => {
|
||||
for &c in chars {
|
||||
let cls = CharClass::new(vec![
|
||||
ClassRange { start: c, end: c },
|
||||
]).case_fold();
|
||||
if !lits.add_char_class(&cls) {
|
||||
lits.cut();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
LiteralBytes { ref bytes, casei: false } => {
|
||||
lits.cross_add(bytes);
|
||||
}
|
||||
LiteralBytes { casei: true, .. } => {
|
||||
lits.cut();
|
||||
LiteralBytes { ref bytes, casei: true } => {
|
||||
for &b in bytes {
|
||||
let cls = ByteClass::new(vec![
|
||||
ByteRange { start: b, end: b },
|
||||
]).case_fold();
|
||||
if !lits.add_byte_class(&cls) {
|
||||
lits.cut();
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
Class(_) => {
|
||||
lits.cut();
|
||||
@@ -205,3 +245,18 @@ fn bytes_to_regex(bs: &[u8]) -> String {
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
/// Converts arbitrary bytes to a nice string.
|
||||
fn show(bs: &[u8]) -> String {
|
||||
// Why aren't we using this to feed to the regex? Doesn't really matter
|
||||
// I guess. ---AG
|
||||
use std::ascii::escape_default;
|
||||
use std::str;
|
||||
|
||||
let mut nice = String::new();
|
||||
for &b in bs {
|
||||
let part: Vec<u8> = escape_default(b).collect();
|
||||
nice.push_str(str::from_utf8(&part).unwrap());
|
||||
}
|
||||
nice
|
||||
}
|
||||
|
@@ -152,6 +152,7 @@ impl GrepBuilder {
|
||||
.unicode(true)
|
||||
.case_insensitive(self.opts.case_insensitive)
|
||||
.parse(&self.pattern));
|
||||
debug!("regex ast:\n{:#?}", expr);
|
||||
Ok(try!(nonl::remove(expr, self.opts.line_terminator)))
|
||||
}
|
||||
}
|
||||
@@ -194,7 +195,7 @@ impl Grep {
|
||||
let (prevnl, nextnl) = self.find_line(buf, e, e);
|
||||
match self.re.shortest_match(&buf[prevnl..nextnl]) {
|
||||
None => {
|
||||
start = nextnl + 1;
|
||||
start = nextnl;
|
||||
continue;
|
||||
}
|
||||
Some(_) => {
|
||||
@@ -253,7 +254,7 @@ impl<'b, 's> Iterator for Iter<'b, 's> {
|
||||
self.start = self.buf.len();
|
||||
return None;
|
||||
}
|
||||
self.start = mat.end + 1;
|
||||
self.start = mat.end;
|
||||
Some(mat)
|
||||
}
|
||||
}
|
||||
|
150
src/args.rs
150
src/args.rs
@@ -9,14 +9,16 @@ use grep::{Grep, GrepBuilder};
|
||||
use log;
|
||||
use num_cpus;
|
||||
use regex;
|
||||
use term::Terminal;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use atty;
|
||||
use gitignore::{Gitignore, GitignoreBuilder};
|
||||
use ignore::Ignore;
|
||||
use out::Out;
|
||||
use out::{Out, OutBuffer};
|
||||
use printer::Printer;
|
||||
use search::{InputBuffer, Searcher};
|
||||
use sys;
|
||||
use search_buffer::BufferSearcher;
|
||||
use search_stream::{InputBuffer, Searcher};
|
||||
use types::{FileTypeDef, Types, TypesBuilder};
|
||||
use walk;
|
||||
|
||||
@@ -27,13 +29,13 @@ use Result;
|
||||
/// If you've never heard of Docopt before, see: http://docopt.org
|
||||
/// (TL;DR: The CLI parser is generated from the usage string below.)
|
||||
const USAGE: &'static str = "
|
||||
Usage: xrep [options] <pattern> [<path> ...]
|
||||
xrep [options] --files [<path> ...]
|
||||
xrep [options] --type-list
|
||||
xrep --help
|
||||
xrep --version
|
||||
Usage: rg [options] <pattern> [<path> ...]
|
||||
rg [options] --files [<path> ...]
|
||||
rg [options] --type-list
|
||||
rg --help
|
||||
rg --version
|
||||
|
||||
xrep is like the silver searcher and grep, but faster than both.
|
||||
rg combines the usability of the silver search with the raw speed of grep.
|
||||
|
||||
Common options:
|
||||
-a, --text Search binary files as if they were text.
|
||||
@@ -75,6 +77,11 @@ Less common options:
|
||||
-C, --context NUM
|
||||
Show NUM lines before and after each match.
|
||||
|
||||
--column
|
||||
Show column numbers (1 based) in output. This only shows the column
|
||||
numbers for the first match on each line. Note that this doesn't try
|
||||
to account for Unicode. One byte is equal to one column.
|
||||
|
||||
--context-separator ARG
|
||||
The string to use when separating non-continuous context lines. Escape
|
||||
sequences may be used. [default: --]
|
||||
@@ -97,17 +104,22 @@ Less common options:
|
||||
Don't show any file name heading.
|
||||
|
||||
--hidden
|
||||
Search hidden directories and files.
|
||||
Search hidden directories and files. (Hidden directories and files are
|
||||
skipped by default.)
|
||||
|
||||
-L, --follow
|
||||
Follow symlinks.
|
||||
|
||||
--line-terminator ARG
|
||||
The byte to use for a line terminator. Escape sequences may be used.
|
||||
[default: \\n]
|
||||
--mmap
|
||||
Search using memory maps when possible. This is enabled by default
|
||||
when ripgrep thinks it will be faster. (Note that mmap searching
|
||||
doesn't current support the various context related options.)
|
||||
|
||||
--no-mmap
|
||||
Never use memory maps, even when they might be faster.
|
||||
|
||||
--no-ignore
|
||||
Don't respect ignore files (.gitignore, .xrepignore, etc.)
|
||||
Don't respect ignore files (.gitignore, .rgignore, etc.)
|
||||
|
||||
--no-ignore-parent
|
||||
Don't respect ignore files in parent directories.
|
||||
@@ -123,7 +135,7 @@ Less common options:
|
||||
(capped at 6). [default: 0]
|
||||
|
||||
--version
|
||||
Show the version number of xrep and exit.
|
||||
Show the version number of ripgrep and exit.
|
||||
|
||||
File type management options:
|
||||
--type-list
|
||||
@@ -138,7 +150,7 @@ File type management options:
|
||||
";
|
||||
|
||||
/// RawArgs are the args as they are parsed from Docopt. They aren't used
|
||||
/// directly by the rest of xrep.
|
||||
/// directly by the rest of ripgrep.
|
||||
#[derive(Debug, RustcDecodable)]
|
||||
pub struct RawArgs {
|
||||
arg_pattern: String,
|
||||
@@ -146,6 +158,7 @@ pub struct RawArgs {
|
||||
flag_after_context: usize,
|
||||
flag_before_context: usize,
|
||||
flag_color: String,
|
||||
flag_column: bool,
|
||||
flag_context: usize,
|
||||
flag_context_separator: String,
|
||||
flag_count: bool,
|
||||
@@ -158,12 +171,13 @@ pub struct RawArgs {
|
||||
flag_ignore_case: bool,
|
||||
flag_invert_match: bool,
|
||||
flag_line_number: bool,
|
||||
flag_line_terminator: String,
|
||||
flag_literal: bool,
|
||||
flag_mmap: bool,
|
||||
flag_no_heading: bool,
|
||||
flag_no_ignore: bool,
|
||||
flag_no_ignore_parent: bool,
|
||||
flag_no_line_number: bool,
|
||||
flag_no_mmap: bool,
|
||||
flag_pretty: bool,
|
||||
flag_quiet: bool,
|
||||
flag_replace: Option<String>,
|
||||
@@ -186,17 +200,20 @@ pub struct Args {
|
||||
after_context: usize,
|
||||
before_context: usize,
|
||||
color: bool,
|
||||
column: bool,
|
||||
context_separator: Vec<u8>,
|
||||
count: bool,
|
||||
eol: u8,
|
||||
files: bool,
|
||||
follow: bool,
|
||||
glob_overrides: Option<Gitignore>,
|
||||
grep: Grep,
|
||||
heading: bool,
|
||||
hidden: bool,
|
||||
ignore_case: bool,
|
||||
invert_match: bool,
|
||||
line_number: bool,
|
||||
mmap: bool,
|
||||
no_ignore: bool,
|
||||
no_ignore_parent: bool,
|
||||
quiet: bool,
|
||||
@@ -210,7 +227,7 @@ pub struct Args {
|
||||
}
|
||||
|
||||
impl RawArgs {
|
||||
/// Convert arguments parsed into a configuration used by xrep.
|
||||
/// Convert arguments parsed into a configuration used by ripgrep.
|
||||
fn to_args(&self) -> Result<Args> {
|
||||
let pattern = {
|
||||
let pattern =
|
||||
@@ -227,7 +244,9 @@ impl RawArgs {
|
||||
};
|
||||
let paths =
|
||||
if self.arg_path.is_empty() {
|
||||
if sys::stdin_is_atty() {
|
||||
if atty::on_stdin()
|
||||
|| self.flag_files
|
||||
|| self.flag_type_list {
|
||||
vec![Path::new("./").to_path_buf()]
|
||||
} else {
|
||||
vec![Path::new("-").to_path_buf()]
|
||||
@@ -243,15 +262,19 @@ impl RawArgs {
|
||||
} else {
|
||||
(self.flag_after_context, self.flag_before_context)
|
||||
};
|
||||
let eol = {
|
||||
let eol = unescape(&self.flag_line_terminator);
|
||||
if eol.is_empty() {
|
||||
errored!("Empty line terminator is not allowed.");
|
||||
} else if eol.len() > 1 {
|
||||
errored!("Line terminators are limited to exactly 1 byte.");
|
||||
}
|
||||
eol[0]
|
||||
};
|
||||
let mmap =
|
||||
if before_context > 0 || after_context > 0 || self.flag_no_mmap {
|
||||
false
|
||||
} else if self.flag_mmap {
|
||||
true
|
||||
} else {
|
||||
// If we're only searching a few paths and all of them are
|
||||
// files, then memory maps are probably faster.
|
||||
paths.len() <= 10 && paths.iter().all(|p| p.is_file())
|
||||
};
|
||||
if mmap {
|
||||
debug!("will try to use memory maps");
|
||||
}
|
||||
let glob_overrides =
|
||||
if self.flag_glob.is_empty() {
|
||||
None
|
||||
@@ -265,16 +288,17 @@ impl RawArgs {
|
||||
};
|
||||
let threads =
|
||||
if self.flag_threads == 0 {
|
||||
cmp::min(6, num_cpus::get())
|
||||
cmp::min(8, num_cpus::get())
|
||||
} else {
|
||||
self.flag_threads
|
||||
};
|
||||
let color =
|
||||
if self.flag_color == "auto" {
|
||||
sys::stdout_is_atty() || self.flag_pretty
|
||||
atty::on_stdout() || self.flag_pretty
|
||||
} else {
|
||||
self.flag_color == "always"
|
||||
};
|
||||
let eol = b'\n';
|
||||
let mut with_filename = self.flag_with_filename;
|
||||
if !with_filename {
|
||||
with_filename = paths.len() > 1 || paths[0].is_dir();
|
||||
@@ -283,23 +307,32 @@ impl RawArgs {
|
||||
btypes.add_defaults();
|
||||
try!(self.add_types(&mut btypes));
|
||||
let types = try!(btypes.build());
|
||||
let grep = try!(
|
||||
GrepBuilder::new(&pattern)
|
||||
.case_insensitive(self.flag_ignore_case)
|
||||
.line_terminator(eol)
|
||||
.build()
|
||||
);
|
||||
let mut args = Args {
|
||||
pattern: pattern,
|
||||
paths: paths,
|
||||
after_context: after_context,
|
||||
before_context: before_context,
|
||||
color: color,
|
||||
column: self.flag_column,
|
||||
context_separator: unescape(&self.flag_context_separator),
|
||||
count: self.flag_count,
|
||||
eol: eol,
|
||||
files: self.flag_files,
|
||||
follow: self.flag_follow,
|
||||
glob_overrides: glob_overrides,
|
||||
grep: grep,
|
||||
heading: !self.flag_no_heading && self.flag_heading,
|
||||
hidden: self.flag_hidden,
|
||||
ignore_case: self.flag_ignore_case,
|
||||
invert_match: self.flag_invert_match,
|
||||
line_number: !self.flag_no_line_number && self.flag_line_number,
|
||||
mmap: mmap,
|
||||
no_ignore: self.flag_no_ignore,
|
||||
no_ignore_parent: self.flag_no_ignore_parent,
|
||||
quiet: self.flag_quiet,
|
||||
@@ -312,7 +345,7 @@ impl RawArgs {
|
||||
with_filename: with_filename,
|
||||
};
|
||||
// If stdout is a tty, then apply some special default options.
|
||||
if sys::stdout_is_atty() || self.flag_pretty {
|
||||
if atty::on_stdout() || self.flag_pretty {
|
||||
if !self.flag_no_line_number && !args.count {
|
||||
args.line_number = true;
|
||||
}
|
||||
@@ -345,7 +378,7 @@ impl Args {
|
||||
///
|
||||
/// If a CLI usage error occurred, then exit the process and print a usage
|
||||
/// or error message. Similarly, if the user requested the version of
|
||||
/// xrep, then print the version and exit.
|
||||
/// ripgrep, then print the version and exit.
|
||||
///
|
||||
/// Also, initialize a global logger.
|
||||
pub fn parse() -> Result<Args> {
|
||||
@@ -367,7 +400,7 @@ impl Args {
|
||||
raw.to_args().map_err(From::from)
|
||||
}
|
||||
|
||||
/// Returns true if xrep should print the files it will search and exit
|
||||
/// Returns true if ripgrep should print the files it will search and exit
|
||||
/// (but not do any actual searching).
|
||||
pub fn files(&self) -> bool {
|
||||
self.files
|
||||
@@ -378,12 +411,8 @@ impl Args {
|
||||
/// basic searching of regular expressions in a single buffer.
|
||||
///
|
||||
/// The pattern and other flags are taken from the command line.
|
||||
pub fn grep(&self) -> Result<Grep> {
|
||||
GrepBuilder::new(&self.pattern)
|
||||
.case_insensitive(self.ignore_case)
|
||||
.line_terminator(self.eol)
|
||||
.build()
|
||||
.map_err(From::from)
|
||||
pub fn grep(&self) -> Grep {
|
||||
self.grep.clone()
|
||||
}
|
||||
|
||||
/// Creates a new input buffer that is used in searching.
|
||||
@@ -393,10 +422,16 @@ impl Args {
|
||||
inp
|
||||
}
|
||||
|
||||
/// Whether we should prefer memory maps for searching or not.
|
||||
pub fn mmap(&self) -> bool {
|
||||
self.mmap
|
||||
}
|
||||
|
||||
/// Create a new printer of individual search results that writes to the
|
||||
/// writer given.
|
||||
pub fn printer<W: Send + io::Write>(&self, wtr: W) -> Printer<W> {
|
||||
let mut p = Printer::new(wtr, self.color)
|
||||
pub fn printer<W: Send + Terminal>(&self, wtr: W) -> Printer<W> {
|
||||
let mut p = Printer::new(wtr)
|
||||
.column(self.column)
|
||||
.context_separator(self.context_separator.clone())
|
||||
.eol(self.eol)
|
||||
.heading(self.heading)
|
||||
@@ -410,8 +445,8 @@ impl Args {
|
||||
|
||||
/// Create a new printer of search results for an entire file that writes
|
||||
/// to the writer given.
|
||||
pub fn out<W: io::Write>(&self, wtr: W) -> Out<W> {
|
||||
let mut out = Out::new(wtr);
|
||||
pub fn out(&self) -> Out {
|
||||
let mut out = Out::new(self.color);
|
||||
if self.heading && !self.count {
|
||||
out = out.file_separator(b"".to_vec());
|
||||
} else if self.before_context > 0 || self.after_context > 0 {
|
||||
@@ -420,6 +455,11 @@ impl Args {
|
||||
out
|
||||
}
|
||||
|
||||
/// Create a new buffer for use with searching.
|
||||
pub fn outbuf(&self) -> OutBuffer {
|
||||
OutBuffer::new(self.color)
|
||||
}
|
||||
|
||||
/// Return the paths that should be searched.
|
||||
pub fn paths(&self) -> &[PathBuf] {
|
||||
&self.paths
|
||||
@@ -428,7 +468,7 @@ impl Args {
|
||||
/// Create a new line based searcher whose configuration is taken from the
|
||||
/// command line. This searcher supports a dizzying array of features:
|
||||
/// inverted matching, line counting, context control and more.
|
||||
pub fn searcher<'a, R: io::Read, W: Send + io::Write>(
|
||||
pub fn searcher<'a, R: io::Read, W: Send + Terminal>(
|
||||
&self,
|
||||
inp: &'a mut InputBuffer,
|
||||
printer: &'a mut Printer<W>,
|
||||
@@ -446,6 +486,24 @@ impl Args {
|
||||
.text(self.text)
|
||||
}
|
||||
|
||||
/// Create a new line based searcher whose configuration is taken from the
|
||||
/// command line. This search operates on an entire file all once (which
|
||||
/// may have been memory mapped).
|
||||
pub fn searcher_buffer<'a, W: Send + Terminal>(
|
||||
&self,
|
||||
printer: &'a mut Printer<W>,
|
||||
grep: &'a Grep,
|
||||
path: &'a Path,
|
||||
buf: &'a [u8],
|
||||
) -> BufferSearcher<'a, W> {
|
||||
BufferSearcher::new(printer, grep, path, buf)
|
||||
.count(self.count)
|
||||
.eol(self.eol)
|
||||
.line_number(self.line_number)
|
||||
.invert_match(self.invert_match)
|
||||
.text(self.text)
|
||||
}
|
||||
|
||||
/// Returns the number of worker search threads that should be used.
|
||||
pub fn threads(&self) -> usize {
|
||||
self.threads
|
||||
@@ -456,8 +514,8 @@ impl Args {
|
||||
&self.type_defs
|
||||
}
|
||||
|
||||
/// Returns true if xrep should print the type definitions currently loaded
|
||||
/// and then exit.
|
||||
/// Returns true if ripgrep should print the type definitions currently
|
||||
/// loaded and then exit.
|
||||
pub fn type_list(&self) -> bool {
|
||||
self.type_list
|
||||
}
|
||||
|
@@ -1,24 +1,23 @@
|
||||
/*!
|
||||
This io module contains various platform specific functions for detecting
|
||||
how xrep is being used. e.g., Is stdin being piped into it? Is stdout being
|
||||
redirected to a file? etc... We use this information to tweak various default
|
||||
configuration parameters such as colors and match formatting.
|
||||
This atty module contains functions for detecting whether ripgrep is being fed
|
||||
from (or to) a terminal. Windows and Unix do this differently, so implement
|
||||
both here.
|
||||
*/
|
||||
|
||||
use libc;
|
||||
|
||||
#[cfg(unix)]
|
||||
pub fn stdin_is_atty() -> bool {
|
||||
pub fn on_stdin() -> bool {
|
||||
use libc;
|
||||
0 < unsafe { libc::isatty(libc::STDIN_FILENO) }
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
pub fn stdout_is_atty() -> bool {
|
||||
pub fn on_stdout() -> bool {
|
||||
use libc;
|
||||
0 < unsafe { libc::isatty(libc::STDOUT_FILENO) }
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
pub fn stdin_is_atty() -> bool {
|
||||
pub fn on_stdin() -> bool {
|
||||
use kernel32;
|
||||
use winapi;
|
||||
|
||||
@@ -30,7 +29,7 @@ pub fn stdin_is_atty() -> bool {
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
pub fn stdout_is_atty() -> bool {
|
||||
pub fn on_stdout() -> bool {
|
||||
use kernel32;
|
||||
use winapi;
|
||||
|
@@ -9,7 +9,7 @@ The motivation for this submodule is performance and portability:
|
||||
2. We could shell out to a `git` sub-command like ls-files or status, but it
|
||||
seems better to not rely on the existence of external programs for a search
|
||||
tool. Besides, we need to implement this logic anyway to support things like
|
||||
an .xrepignore file.
|
||||
an .rgignore file.
|
||||
|
||||
The key implementation detail here is that a single gitignore file is compiled
|
||||
into a single RegexSet, which can be used to report which globs match a
|
||||
@@ -379,7 +379,7 @@ mod tests {
|
||||
};
|
||||
}
|
||||
|
||||
const ROOT: &'static str = "/home/foobar/rust/xrep";
|
||||
const ROOT: &'static str = "/home/foobar/rust/rg";
|
||||
|
||||
ignored!(ig1, ROOT, "months", "months");
|
||||
ignored!(ig2, ROOT, "*.lock", "Cargo.lock");
|
||||
|
23
src/glob.rs
23
src/glob.rs
@@ -29,7 +29,6 @@ to make its way into `glob` proper.
|
||||
use std::error::Error as StdError;
|
||||
use std::fmt;
|
||||
use std::iter;
|
||||
use std::path;
|
||||
use std::str;
|
||||
|
||||
use regex;
|
||||
@@ -214,7 +213,7 @@ impl Pattern {
|
||||
/// regular expression and will represent the matching semantics of this
|
||||
/// glob pattern and the options given.
|
||||
pub fn to_regex_with(&self, options: &MatchOptions) -> String {
|
||||
let sep = regex::quote(&path::MAIN_SEPARATOR.to_string());
|
||||
let seps = regex::quote(r"/\");
|
||||
let mut re = String::new();
|
||||
re.push_str("(?-u)");
|
||||
if options.case_insensitive {
|
||||
@@ -235,26 +234,27 @@ impl Pattern {
|
||||
}
|
||||
Token::Any => {
|
||||
if options.require_literal_separator {
|
||||
re.push_str(&format!("[^{}]", sep));
|
||||
re.push_str(&format!("[^{}]", seps));
|
||||
} else {
|
||||
re.push_str(".");
|
||||
}
|
||||
}
|
||||
Token::ZeroOrMore => {
|
||||
if options.require_literal_separator {
|
||||
re.push_str(&format!("[^{}]*", sep));
|
||||
re.push_str(&format!("[^{}]*", seps));
|
||||
} else {
|
||||
re.push_str(".*");
|
||||
}
|
||||
}
|
||||
Token::RecursivePrefix => {
|
||||
re.push_str(&format!("(?:{sep}?|.*{sep})", sep=sep));
|
||||
re.push_str(&format!("(?:[{sep}]?|.*[{sep}])", sep=seps));
|
||||
}
|
||||
Token::RecursiveSuffix => {
|
||||
re.push_str(&format!("(?:{sep}?|{sep}.*)", sep=sep));
|
||||
re.push_str(&format!("(?:[{sep}]?|[{sep}].*)", sep=seps));
|
||||
}
|
||||
Token::RecursiveZeroOrMore => {
|
||||
re.push_str(&format!("(?:{sep}|{sep}.*{sep})", sep=sep));
|
||||
re.push_str(&format!("(?:[{sep}]|[{sep}].*[{sep}])",
|
||||
sep=seps));
|
||||
}
|
||||
Token::Class { negated, ref ranges } => {
|
||||
re.push('[');
|
||||
@@ -480,6 +480,9 @@ mod tests {
|
||||
let pat = Pattern::new($pat).unwrap();
|
||||
let path = &Path::new($path).to_str().unwrap();
|
||||
let re = Regex::new(&pat.to_regex_with(&$options)).unwrap();
|
||||
// println!("PATTERN: {}", $pat);
|
||||
// println!("REGEX: {:?}", re);
|
||||
// println!("PATH: {}", path);
|
||||
assert!(!re.is_match(path.as_bytes()));
|
||||
}
|
||||
};
|
||||
@@ -561,12 +564,11 @@ mod tests {
|
||||
case_insensitive: true,
|
||||
require_literal_separator: false,
|
||||
};
|
||||
const SEP: char = ::std::path::MAIN_SEPARATOR;
|
||||
|
||||
toregex!(re_casei, "a", "(?i)^a$", &CASEI);
|
||||
|
||||
toregex!(re_slash1, "?", format!("^[^{}]$", SEP), SLASHLIT);
|
||||
toregex!(re_slash2, "*", format!("^[^{}]*$", SEP), SLASHLIT);
|
||||
toregex!(re_slash1, "?", r"^[^/\\]$", SLASHLIT);
|
||||
toregex!(re_slash2, "*", r"^[^/\\]*$", SLASHLIT);
|
||||
|
||||
toregex!(re1, "a", "^a$");
|
||||
toregex!(re2, "?", "^.$");
|
||||
@@ -642,6 +644,7 @@ mod tests {
|
||||
|
||||
matches!(matchslash1, "abc/def", "abc/def", SLASHLIT);
|
||||
nmatches!(matchslash2, "abc?def", "abc/def", SLASHLIT);
|
||||
nmatches!(matchslash2_win, "abc?def", "abc\\def", SLASHLIT);
|
||||
nmatches!(matchslash3, "abc*def", "abc/def", SLASHLIT);
|
||||
matches!(matchslash4, "abc[/]def", "abc/def", SLASHLIT); // differs
|
||||
|
||||
|
@@ -5,7 +5,7 @@ whether a *single* file path should be searched or not.
|
||||
In general, there are two ways to ignore a particular file:
|
||||
|
||||
1. Specify an ignore rule in some "global" configuration, such as a
|
||||
$HOME/.xrepignore or on the command line.
|
||||
$HOME/.rgignore or on the command line.
|
||||
2. A specific ignore file (like .gitignore) found during directory traversal.
|
||||
|
||||
The `IgnoreDir` type handles ignore patterns for any one particular directory
|
||||
@@ -24,7 +24,7 @@ use types::Types;
|
||||
const IGNORE_NAMES: &'static [&'static str] = &[
|
||||
".gitignore",
|
||||
".agignore",
|
||||
".xrepignore",
|
||||
".rgignore",
|
||||
];
|
||||
|
||||
/// Represents an error that can occur when parsing a gitignore file.
|
||||
@@ -257,8 +257,8 @@ pub struct IgnoreDir {
|
||||
/// A single accumulation of glob patterns for this directory, matched
|
||||
/// using gitignore semantics.
|
||||
///
|
||||
/// This will include patterns from xrepignore as well. The patterns are
|
||||
/// ordered so that precedence applies automatically (e.g., xrepignore
|
||||
/// This will include patterns from rgignore as well. The patterns are
|
||||
/// ordered so that precedence applies automatically (e.g., rgignore
|
||||
/// patterns procede gitignore patterns).
|
||||
gi: Option<Gitignore>,
|
||||
// TODO(burntsushi): Matching other types of glob patterns that don't
|
||||
@@ -422,7 +422,7 @@ mod tests {
|
||||
};
|
||||
}
|
||||
|
||||
const ROOT: &'static str = "/home/foobar/rust/xrep";
|
||||
const ROOT: &'static str = "/home/foobar/rust/rg";
|
||||
|
||||
ignored_dir!(id1, ROOT, "src/main.rs", "", "src/main.rs");
|
||||
ignored_dir!(id2, ROOT, "", "src/main.rs", "src/main.rs");
|
||||
|
59
src/main.rs
59
src/main.rs
@@ -34,12 +34,14 @@ use std::thread;
|
||||
|
||||
use crossbeam::sync::chase_lev::{self, Steal, Stealer};
|
||||
use grep::Grep;
|
||||
use memmap::{Mmap, Protection};
|
||||
use term::Terminal;
|
||||
use walkdir::DirEntry;
|
||||
|
||||
use args::Args;
|
||||
use out::Out;
|
||||
use out::{NoColorTerminal, Out, OutBuffer};
|
||||
use printer::Printer;
|
||||
use search::InputBuffer;
|
||||
use search_stream::InputBuffer;
|
||||
|
||||
macro_rules! errored {
|
||||
($($tt:tt)*) => {
|
||||
@@ -55,13 +57,14 @@ macro_rules! eprintln {
|
||||
}
|
||||
|
||||
mod args;
|
||||
mod atty;
|
||||
mod gitignore;
|
||||
mod glob;
|
||||
mod ignore;
|
||||
mod out;
|
||||
mod printer;
|
||||
mod search;
|
||||
mod sys;
|
||||
mod search_buffer;
|
||||
mod search_stream;
|
||||
mod terminal;
|
||||
mod types;
|
||||
mod walk;
|
||||
@@ -87,7 +90,8 @@ fn run(args: Args) -> Result<u64> {
|
||||
return run_types(args);
|
||||
}
|
||||
let args = Arc::new(args);
|
||||
let out = Arc::new(Mutex::new(args.out(io::stdout())));
|
||||
let out = Arc::new(Mutex::new(args.out()));
|
||||
let outbuf = args.outbuf();
|
||||
let mut workers = vec![];
|
||||
|
||||
let mut workq = {
|
||||
@@ -98,8 +102,8 @@ fn run(args: Args) -> Result<u64> {
|
||||
out: out.clone(),
|
||||
chan_work: stealer.clone(),
|
||||
inpbuf: args.input_buffer(),
|
||||
outbuf: Some(vec![]),
|
||||
grep: try!(args.grep()),
|
||||
outbuf: Some(outbuf.clone()),
|
||||
grep: args.grep(),
|
||||
match_count: 0,
|
||||
};
|
||||
workers.push(thread::spawn(move || worker.run()));
|
||||
@@ -126,7 +130,8 @@ fn run(args: Args) -> Result<u64> {
|
||||
}
|
||||
|
||||
fn run_files(args: Args) -> Result<u64> {
|
||||
let mut printer = args.printer(io::BufWriter::new(io::stdout()));
|
||||
let term = NoColorTerminal::new(io::BufWriter::new(io::stdout()));
|
||||
let mut printer = args.printer(term);
|
||||
let mut file_count = 0;
|
||||
for p in args.paths() {
|
||||
if p == Path::new("-") {
|
||||
@@ -143,7 +148,8 @@ fn run_files(args: Args) -> Result<u64> {
|
||||
}
|
||||
|
||||
fn run_types(args: Args) -> Result<u64> {
|
||||
let mut printer = args.printer(io::BufWriter::new(io::stdout()));
|
||||
let term = NoColorTerminal::new(io::BufWriter::new(io::stdout()));
|
||||
let mut printer = args.printer(term);
|
||||
let mut ty_count = 0;
|
||||
for def in args.type_defs() {
|
||||
printer.type_def(def);
|
||||
@@ -165,10 +171,10 @@ enum WorkReady {
|
||||
|
||||
struct Worker {
|
||||
args: Arc<Args>,
|
||||
out: Arc<Mutex<Out<io::Stdout>>>,
|
||||
out: Arc<Mutex<Out>>,
|
||||
chan_work: Stealer<Work>,
|
||||
inpbuf: InputBuffer,
|
||||
outbuf: Option<Vec<u8>>,
|
||||
outbuf: Option<OutBuffer>,
|
||||
grep: Grep,
|
||||
match_count: u64,
|
||||
}
|
||||
@@ -196,7 +202,7 @@ impl Worker {
|
||||
let mut printer = self.args.printer(outbuf);
|
||||
self.do_work(&mut printer, work);
|
||||
let outbuf = printer.into_inner();
|
||||
if !outbuf.is_empty() {
|
||||
if !outbuf.get_ref().is_empty() {
|
||||
let mut out = self.out.lock().unwrap();
|
||||
out.write(&outbuf);
|
||||
}
|
||||
@@ -205,7 +211,7 @@ impl Worker {
|
||||
self.match_count
|
||||
}
|
||||
|
||||
fn do_work<W: Send + io::Write>(
|
||||
fn do_work<W: Send + Terminal>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
work: WorkReady,
|
||||
@@ -221,7 +227,11 @@ impl Worker {
|
||||
if let Ok(p) = path.strip_prefix("./") {
|
||||
path = p;
|
||||
}
|
||||
self.search(printer, path, file)
|
||||
if self.args.mmap() {
|
||||
self.search_mmap(printer, path, &file)
|
||||
} else {
|
||||
self.search(printer, path, file)
|
||||
}
|
||||
}
|
||||
};
|
||||
match result {
|
||||
@@ -234,7 +244,7 @@ impl Worker {
|
||||
}
|
||||
}
|
||||
|
||||
fn search<R: io::Read, W: Send + io::Write>(
|
||||
fn search<R: io::Read, W: Send + Terminal>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
path: &Path,
|
||||
@@ -248,4 +258,23 @@ impl Worker {
|
||||
rdr,
|
||||
).run().map_err(From::from)
|
||||
}
|
||||
|
||||
fn search_mmap<W: Send + Terminal>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
path: &Path,
|
||||
file: &File,
|
||||
) -> Result<u64> {
|
||||
if try!(file.metadata()).len() == 0 {
|
||||
// Opening a memory map with an empty file results in an error.
|
||||
return Ok(0);
|
||||
}
|
||||
let mmap = try!(Mmap::open(file, Protection::Read));
|
||||
Ok(self.args.searcher_buffer(
|
||||
printer,
|
||||
&self.grep,
|
||||
path,
|
||||
unsafe { mmap.as_slice() },
|
||||
).run())
|
||||
}
|
||||
}
|
||||
|
458
src/out.rs
458
src/out.rs
@@ -1,4 +1,40 @@
|
||||
use std::io::{self, Write};
|
||||
use std::sync::Arc;
|
||||
|
||||
use term::{self, Terminal};
|
||||
use term::color::Color;
|
||||
use term::terminfo::TermInfo;
|
||||
#[cfg(windows)]
|
||||
use term::WinConsole;
|
||||
|
||||
use terminal::TerminfoTerminal;
|
||||
|
||||
pub type StdoutTerminal = Box<Terminal<Output=io::Stdout> + Send>;
|
||||
|
||||
/// Gets a terminal that supports color if available.
|
||||
#[cfg(windows)]
|
||||
fn term_stdout(color: bool) -> StdoutTerminal {
|
||||
let stdout = io::stdout();
|
||||
WinConsole::new(stdout)
|
||||
.ok()
|
||||
.map(|t| Box::new(t) as StdoutTerminal)
|
||||
.unwrap_or_else(|| {
|
||||
let stdout = io::stdout();
|
||||
Box::new(NoColorTerminal::new(stdout)) as StdoutTerminal
|
||||
})
|
||||
}
|
||||
|
||||
/// Gets a terminal that supports color if available.
|
||||
#[cfg(not(windows))]
|
||||
fn term_stdout(color: bool) -> StdoutTerminal {
|
||||
let stdout = io::stdout();
|
||||
if !color || TERMINFO.is_none() {
|
||||
Box::new(NoColorTerminal::new(stdout))
|
||||
} else {
|
||||
let info = TERMINFO.clone().unwrap();
|
||||
Box::new(TerminfoTerminal::new_with_terminfo(stdout, info))
|
||||
}
|
||||
}
|
||||
|
||||
/// Out controls the actual output of all search results for a particular file
|
||||
/// to the end user.
|
||||
@@ -6,17 +42,17 @@ use std::io::{self, Write};
|
||||
/// (The difference between Out and Printer is that a Printer works with
|
||||
/// individual search results where as Out works with search results for each
|
||||
/// file as a whole. For example, it knows when to print a file separator.)
|
||||
pub struct Out<W: io::Write> {
|
||||
wtr: io::BufWriter<W>,
|
||||
pub struct Out {
|
||||
term: StdoutTerminal,
|
||||
printed: bool,
|
||||
file_separator: Option<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl<W: io::Write> Out<W> {
|
||||
impl Out {
|
||||
/// Create a new Out that writes to the wtr given.
|
||||
pub fn new(wtr: W) -> Out<W> {
|
||||
pub fn new(color: bool) -> Out {
|
||||
Out {
|
||||
wtr: io::BufWriter::new(wtr),
|
||||
term: term_stdout(color),
|
||||
printed: false,
|
||||
file_separator: None,
|
||||
}
|
||||
@@ -26,22 +62,422 @@ impl<W: io::Write> Out<W> {
|
||||
/// By default, no separator is printed.
|
||||
///
|
||||
/// If sep is empty, then no file separator is printed.
|
||||
pub fn file_separator(mut self, sep: Vec<u8>) -> Out<W> {
|
||||
pub fn file_separator(mut self, sep: Vec<u8>) -> Out {
|
||||
self.file_separator = Some(sep);
|
||||
self
|
||||
}
|
||||
|
||||
/// Write the search results of a single file to the underlying wtr and
|
||||
/// flush wtr.
|
||||
pub fn write(&mut self, buf: &[u8]) {
|
||||
pub fn write(&mut self, buf: &OutBuffer) {
|
||||
if let Some(ref sep) = self.file_separator {
|
||||
if self.printed {
|
||||
let _ = self.wtr.write_all(sep);
|
||||
let _ = self.wtr.write_all(b"\n");
|
||||
let _ = self.term.write_all(sep);
|
||||
let _ = self.term.write_all(b"\n");
|
||||
}
|
||||
}
|
||||
let _ = self.wtr.write_all(buf);
|
||||
let _ = self.wtr.flush();
|
||||
match *buf {
|
||||
OutBuffer::Colored(ref tt) => {
|
||||
let _ = self.term.write_all(tt.get_ref());
|
||||
}
|
||||
OutBuffer::Windows(ref w) => {
|
||||
w.print_stdout(&mut self.term);
|
||||
}
|
||||
OutBuffer::NoColor(ref buf) => {
|
||||
let _ = self.term.write_all(buf);
|
||||
}
|
||||
}
|
||||
let _ = self.term.flush();
|
||||
self.printed = true;
|
||||
}
|
||||
}
|
||||
|
||||
/// OutBuffer corresponds to the final output buffer for search results. All
|
||||
/// search results are written to a buffer and then a buffer is flushed to
|
||||
/// stdout only after the full search has completed.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum OutBuffer {
|
||||
Colored(TerminfoTerminal<Vec<u8>>),
|
||||
Windows(WindowsBuffer),
|
||||
NoColor(Vec<u8>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct WindowsBuffer {
|
||||
buf: Vec<u8>,
|
||||
pos: usize,
|
||||
colors: Vec<WindowsColor>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct WindowsColor {
|
||||
pos: usize,
|
||||
opt: WindowsOption,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum WindowsOption {
|
||||
Foreground(Color),
|
||||
Background(Color),
|
||||
Reset,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TERMINFO: Option<Arc<TermInfo>> = {
|
||||
match TermInfo::from_env() {
|
||||
Ok(info) => Some(Arc::new(info)),
|
||||
Err(err) => {
|
||||
debug!("error loading terminfo for coloring: {}", err);
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl OutBuffer {
|
||||
/// Create a new output buffer.
|
||||
///
|
||||
/// When color is true, the buffer will attempt to support coloring.
|
||||
pub fn new(color: bool) -> OutBuffer {
|
||||
// If we want color, build a TerminfoTerminal and see if the current
|
||||
// environment supports coloring. If not, bail with NoColor. To avoid
|
||||
// losing our writer (ownership), do this the long way.
|
||||
if !color {
|
||||
return OutBuffer::NoColor(vec![]);
|
||||
}
|
||||
if cfg!(windows) {
|
||||
return OutBuffer::Windows(WindowsBuffer {
|
||||
buf: vec![],
|
||||
pos: 0,
|
||||
colors: vec![]
|
||||
});
|
||||
}
|
||||
if TERMINFO.is_none() {
|
||||
return OutBuffer::NoColor(vec![]);
|
||||
}
|
||||
let info = TERMINFO.clone().unwrap();
|
||||
let tt = TerminfoTerminal::new_with_terminfo(vec![], info);
|
||||
if !tt.supports_color() {
|
||||
debug!("environment doesn't support coloring");
|
||||
return OutBuffer::NoColor(tt.into_inner());
|
||||
}
|
||||
OutBuffer::Colored(tt)
|
||||
}
|
||||
|
||||
/// Clear the give buffer of all search results such that it is reusable
|
||||
/// in another search.
|
||||
pub fn clear(&mut self) {
|
||||
match *self {
|
||||
OutBuffer::Colored(ref mut tt) => {
|
||||
tt.get_mut().clear();
|
||||
}
|
||||
OutBuffer::Windows(ref mut win) => {
|
||||
win.buf.clear();
|
||||
win.colors.clear();
|
||||
win.pos = 0;
|
||||
}
|
||||
OutBuffer::NoColor(ref mut buf) => {
|
||||
buf.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn map_result<F, G>(
|
||||
&mut self,
|
||||
mut f: F,
|
||||
mut g: G,
|
||||
) -> term::Result<()>
|
||||
where F: FnMut(&mut TerminfoTerminal<Vec<u8>>) -> term::Result<()>,
|
||||
G: FnMut(&mut WindowsBuffer) -> term::Result<()> {
|
||||
match *self {
|
||||
OutBuffer::Colored(ref mut w) => f(w),
|
||||
OutBuffer::Windows(ref mut w) => g(w),
|
||||
OutBuffer::NoColor(_) => Err(term::Error::NotSupported),
|
||||
}
|
||||
}
|
||||
|
||||
fn map_bool<F, G>(
|
||||
&self,
|
||||
mut f: F,
|
||||
mut g: G,
|
||||
) -> bool
|
||||
where F: FnMut(&TerminfoTerminal<Vec<u8>>) -> bool,
|
||||
G: FnMut(&WindowsBuffer) -> bool {
|
||||
match *self {
|
||||
OutBuffer::Colored(ref w) => f(w),
|
||||
OutBuffer::Windows(ref w) => g(w),
|
||||
OutBuffer::NoColor(_) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Write for OutBuffer {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
match *self {
|
||||
OutBuffer::Colored(ref mut w) => w.write(buf),
|
||||
OutBuffer::Windows(ref mut w) => w.write(buf),
|
||||
OutBuffer::NoColor(ref mut w) => w.write(buf),
|
||||
}
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl term::Terminal for OutBuffer {
|
||||
type Output = Vec<u8>;
|
||||
|
||||
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
|
||||
self.map_result(|w| w.fg(fg), |w| w.fg(fg))
|
||||
}
|
||||
|
||||
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
|
||||
self.map_result(|w| w.bg(bg), |w| w.bg(bg))
|
||||
}
|
||||
|
||||
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
|
||||
self.map_result(|w| w.attr(attr), |w| w.attr(attr))
|
||||
}
|
||||
|
||||
fn supports_attr(&self, attr: term::Attr) -> bool {
|
||||
self.map_bool(|w| w.supports_attr(attr), |w| w.supports_attr(attr))
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.reset(), |w| w.reset())
|
||||
}
|
||||
|
||||
fn supports_reset(&self) -> bool {
|
||||
self.map_bool(|w| w.supports_reset(), |w| w.supports_reset())
|
||||
}
|
||||
|
||||
fn supports_color(&self) -> bool {
|
||||
self.map_bool(|w| w.supports_color(), |w| w.supports_color())
|
||||
}
|
||||
|
||||
fn cursor_up(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.cursor_up(), |w| w.cursor_up())
|
||||
}
|
||||
|
||||
fn delete_line(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.delete_line(), |w| w.delete_line())
|
||||
}
|
||||
|
||||
fn carriage_return(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.carriage_return(), |w| w.carriage_return())
|
||||
}
|
||||
|
||||
fn get_ref(&self) -> &Vec<u8> {
|
||||
match *self {
|
||||
OutBuffer::Colored(ref w) => w.get_ref(),
|
||||
OutBuffer::Windows(ref w) => w.get_ref(),
|
||||
OutBuffer::NoColor(ref w) => w,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_mut(&mut self) -> &mut Vec<u8> {
|
||||
match *self {
|
||||
OutBuffer::Colored(ref mut w) => w.get_mut(),
|
||||
OutBuffer::Windows(ref mut w) => w.get_mut(),
|
||||
OutBuffer::NoColor(ref mut w) => w,
|
||||
}
|
||||
}
|
||||
|
||||
fn into_inner(self) -> Vec<u8> {
|
||||
match self {
|
||||
OutBuffer::Colored(w) => w.into_inner(),
|
||||
OutBuffer::Windows(w) => w.into_inner(),
|
||||
OutBuffer::NoColor(w) => w,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WindowsBuffer {
|
||||
fn push(&mut self, opt: WindowsOption) {
|
||||
let pos = self.pos;
|
||||
self.colors.push(WindowsColor { pos: pos, opt: opt });
|
||||
}
|
||||
}
|
||||
|
||||
impl WindowsBuffer {
|
||||
/// Print the contents to the given terminal.
|
||||
pub fn print_stdout(&self, tt: &mut StdoutTerminal) {
|
||||
if !tt.supports_color() {
|
||||
let _ = tt.write_all(&self.buf);
|
||||
let _ = tt.flush();
|
||||
return;
|
||||
}
|
||||
let mut last = 0;
|
||||
for col in &self.colors {
|
||||
let _ = tt.write_all(&self.buf[last..col.pos]);
|
||||
match col.opt {
|
||||
WindowsOption::Foreground(c) => {
|
||||
let _ = tt.fg(c);
|
||||
}
|
||||
WindowsOption::Background(c) => {
|
||||
let _ = tt.bg(c);
|
||||
}
|
||||
WindowsOption::Reset => {
|
||||
let _ = tt.reset();
|
||||
}
|
||||
}
|
||||
last = col.pos;
|
||||
}
|
||||
let _ = tt.write_all(&self.buf[last..]);
|
||||
let _ = tt.flush();
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Write for WindowsBuffer {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
let n = try!(self.buf.write(buf));
|
||||
self.pos += n;
|
||||
Ok(n)
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl term::Terminal for WindowsBuffer {
|
||||
type Output = Vec<u8>;
|
||||
|
||||
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
|
||||
self.push(WindowsOption::Foreground(fg));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
|
||||
self.push(WindowsOption::Background(bg));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn supports_attr(&self, attr: term::Attr) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> term::Result<()> {
|
||||
self.push(WindowsOption::Reset);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn supports_reset(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn supports_color(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn cursor_up(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn delete_line(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn carriage_return(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn get_ref(&self) -> &Vec<u8> {
|
||||
&self.buf
|
||||
}
|
||||
|
||||
fn get_mut(&mut self) -> &mut Vec<u8> {
|
||||
&mut self.buf
|
||||
}
|
||||
|
||||
fn into_inner(self) -> Vec<u8> {
|
||||
self.buf
|
||||
}
|
||||
}
|
||||
|
||||
/// NoColorTerminal implements Terminal, but supports no coloring.
|
||||
///
|
||||
/// Its useful when an API requires a Terminal, but coloring isn't needed.
|
||||
pub struct NoColorTerminal<W> {
|
||||
wtr: W,
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> NoColorTerminal<W> {
|
||||
/// Wrap the given writer in a Terminal interface.
|
||||
pub fn new(wtr: W) -> NoColorTerminal<W> {
|
||||
NoColorTerminal {
|
||||
wtr: wtr,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> io::Write for NoColorTerminal<W> {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
self.wtr.write(buf)
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
self.wtr.flush()
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> term::Terminal for NoColorTerminal<W> {
|
||||
type Output = W;
|
||||
|
||||
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn supports_attr(&self, attr: term::Attr) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn supports_reset(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn supports_color(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn cursor_up(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn delete_line(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn carriage_return(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn get_ref(&self) -> &W {
|
||||
&self.wtr
|
||||
}
|
||||
|
||||
fn get_mut(&mut self) -> &mut W {
|
||||
&mut self.wtr
|
||||
}
|
||||
|
||||
fn into_inner(self) -> W {
|
||||
self.wtr
|
||||
}
|
||||
}
|
||||
|
203
src/printer.rs
203
src/printer.rs
@@ -1,17 +1,11 @@
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use regex::bytes::Regex;
|
||||
use term::{self, Terminal};
|
||||
use term::color::*;
|
||||
use term::terminfo::TermInfo;
|
||||
use term::{Attr, Terminal};
|
||||
use term::color;
|
||||
|
||||
use terminal::TerminfoTerminal;
|
||||
use types::FileTypeDef;
|
||||
|
||||
use self::Writer::*;
|
||||
|
||||
/// Printer encapsulates all output logic for searching.
|
||||
///
|
||||
/// Note that we currently ignore all write errors. It's probably worthwhile
|
||||
@@ -19,9 +13,11 @@ use self::Writer::*;
|
||||
/// writes to memory, neither of which commonly fail.
|
||||
pub struct Printer<W> {
|
||||
/// The underlying writer.
|
||||
wtr: Writer<W>,
|
||||
wtr: W,
|
||||
/// Whether anything has been printed to wtr yet.
|
||||
has_printed: bool,
|
||||
/// Whether to show column numbers for the first match or not.
|
||||
column: bool,
|
||||
/// The string to use to separate non-contiguous runs of context lines.
|
||||
context_separator: Vec<u8>,
|
||||
/// The end-of-line terminator used by the printer. In general, eols are
|
||||
@@ -40,14 +36,13 @@ pub struct Printer<W> {
|
||||
with_filename: bool,
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> Printer<W> {
|
||||
impl<W: Send + Terminal> Printer<W> {
|
||||
/// Create a new printer that writes to wtr.
|
||||
///
|
||||
/// `color` should be true if the printer should try to use coloring.
|
||||
pub fn new(wtr: W, color: bool) -> Printer<W> {
|
||||
pub fn new(wtr: W) -> Printer<W> {
|
||||
Printer {
|
||||
wtr: Writer::new(wtr, color),
|
||||
wtr: wtr,
|
||||
has_printed: false,
|
||||
column: false,
|
||||
context_separator: "--".to_string().into_bytes(),
|
||||
eol: b'\n',
|
||||
heading: false,
|
||||
@@ -57,6 +52,13 @@ impl<W: Send + io::Write> Printer<W> {
|
||||
}
|
||||
}
|
||||
|
||||
/// When set, column numbers will be printed for the first match on each
|
||||
/// line.
|
||||
pub fn column(mut self, yes: bool) -> Printer<W> {
|
||||
self.column = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the context separator. The default is `--`.
|
||||
pub fn context_separator(mut self, sep: Vec<u8>) -> Printer<W> {
|
||||
self.context_separator = sep;
|
||||
@@ -107,7 +109,7 @@ impl<W: Send + io::Write> Printer<W> {
|
||||
/// Flushes the underlying writer and returns it.
|
||||
pub fn into_inner(mut self) -> W {
|
||||
let _ = self.wtr.flush();
|
||||
self.wtr.into_inner()
|
||||
self.wtr
|
||||
}
|
||||
|
||||
/// Prints a type definition.
|
||||
@@ -173,6 +175,11 @@ impl<W: Send + io::Write> Printer<W> {
|
||||
if let Some(line_number) = line_number {
|
||||
self.line_number(line_number, b':');
|
||||
}
|
||||
if self.column {
|
||||
let c = re.find(&buf[start..end]).map(|(s, _)| s + 1).unwrap_or(0);
|
||||
self.write(c.to_string().as_bytes());
|
||||
self.write(b":");
|
||||
}
|
||||
if self.replace.is_some() {
|
||||
let line = re.replace_all(
|
||||
&buf[start..end], &**self.replace.as_ref().unwrap());
|
||||
@@ -186,15 +193,15 @@ impl<W: Send + io::Write> Printer<W> {
|
||||
}
|
||||
|
||||
pub fn write_match(&mut self, re: &Regex, buf: &[u8]) {
|
||||
if !self.wtr.is_color() {
|
||||
if !self.wtr.supports_color() {
|
||||
self.write(buf);
|
||||
return;
|
||||
}
|
||||
let mut last_written = 0;
|
||||
for (s, e) in re.find_iter(buf) {
|
||||
self.write(&buf[last_written..s]);
|
||||
let _ = self.wtr.fg(BRIGHT_RED);
|
||||
let _ = self.wtr.attr(term::Attr::Bold);
|
||||
let _ = self.wtr.fg(color::BRIGHT_RED);
|
||||
let _ = self.wtr.attr(Attr::Bold);
|
||||
self.write(&buf[s..e]);
|
||||
let _ = self.wtr.reset();
|
||||
last_written = e;
|
||||
@@ -226,23 +233,24 @@ impl<W: Send + io::Write> Printer<W> {
|
||||
}
|
||||
|
||||
fn write_heading<P: AsRef<Path>>(&mut self, path: P) {
|
||||
if self.wtr.is_color() {
|
||||
let _ = self.wtr.fg(GREEN);
|
||||
if self.wtr.supports_color() {
|
||||
let _ = self.wtr.fg(color::BRIGHT_GREEN);
|
||||
let _ = self.wtr.attr(Attr::Bold);
|
||||
}
|
||||
self.write(path.as_ref().to_string_lossy().as_bytes());
|
||||
self.write_eol();
|
||||
if self.wtr.is_color() {
|
||||
if self.wtr.supports_color() {
|
||||
let _ = self.wtr.reset();
|
||||
}
|
||||
}
|
||||
|
||||
fn line_number(&mut self, n: u64, sep: u8) {
|
||||
if self.wtr.is_color() {
|
||||
let _ = self.wtr.fg(YELLOW);
|
||||
let _ = self.wtr.attr(term::Attr::Bold);
|
||||
if self.wtr.supports_color() {
|
||||
let _ = self.wtr.fg(color::BRIGHT_BLUE);
|
||||
let _ = self.wtr.attr(Attr::Bold);
|
||||
}
|
||||
self.write(n.to_string().as_bytes());
|
||||
if self.wtr.is_color() {
|
||||
if self.wtr.supports_color() {
|
||||
let _ = self.wtr.reset();
|
||||
}
|
||||
self.write(&[sep]);
|
||||
@@ -261,148 +269,3 @@ impl<W: Send + io::Write> Printer<W> {
|
||||
self.write(&[eol]);
|
||||
}
|
||||
}
|
||||
|
||||
enum Writer<W> {
|
||||
Colored(TerminfoTerminal<W>),
|
||||
NoColor(W),
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TERMINFO: Option<Arc<TermInfo>> = {
|
||||
match term::terminfo::TermInfo::from_env() {
|
||||
Ok(info) => Some(Arc::new(info)),
|
||||
Err(err) => {
|
||||
debug!("error loading terminfo for coloring: {}", err);
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> Writer<W> {
|
||||
fn new(wtr: W, color: bool) -> Writer<W> {
|
||||
// If we want color, build a TerminfoTerminal and see if the current
|
||||
// environment supports coloring. If not, bail with NoColor. To avoid
|
||||
// losing our writer (ownership), do this the long way.
|
||||
if !color || TERMINFO.is_none() {
|
||||
return NoColor(wtr);
|
||||
}
|
||||
let info = TERMINFO.clone().unwrap();
|
||||
let tt = TerminfoTerminal::new_with_terminfo(wtr, info);
|
||||
if !tt.supports_color() {
|
||||
debug!("environment doesn't support coloring");
|
||||
return NoColor(tt.into_inner());
|
||||
}
|
||||
Colored(tt)
|
||||
}
|
||||
|
||||
fn is_color(&self) -> bool {
|
||||
match *self {
|
||||
Colored(_) => true,
|
||||
NoColor(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn map_result<F>(
|
||||
&mut self,
|
||||
mut f: F,
|
||||
) -> term::Result<()>
|
||||
where F: FnMut(&mut TerminfoTerminal<W>) -> term::Result<()> {
|
||||
match *self {
|
||||
Colored(ref mut w) => f(w),
|
||||
NoColor(_) => Err(term::Error::NotSupported),
|
||||
}
|
||||
}
|
||||
|
||||
fn map_bool<F>(
|
||||
&self,
|
||||
mut f: F,
|
||||
) -> bool
|
||||
where F: FnMut(&TerminfoTerminal<W>) -> bool {
|
||||
match *self {
|
||||
Colored(ref w) => f(w),
|
||||
NoColor(_) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> io::Write for Writer<W> {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
match *self {
|
||||
Colored(ref mut w) => w.write(buf),
|
||||
NoColor(ref mut w) => w.write(buf),
|
||||
}
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
match *self {
|
||||
Colored(ref mut w) => w.flush(),
|
||||
NoColor(ref mut w) => w.flush(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> term::Terminal for Writer<W> {
|
||||
type Output = W;
|
||||
|
||||
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
|
||||
self.map_result(|w| w.fg(fg))
|
||||
}
|
||||
|
||||
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
|
||||
self.map_result(|w| w.bg(bg))
|
||||
}
|
||||
|
||||
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
|
||||
self.map_result(|w| w.attr(attr))
|
||||
}
|
||||
|
||||
fn supports_attr(&self, attr: term::Attr) -> bool {
|
||||
self.map_bool(|w| w.supports_attr(attr))
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.reset())
|
||||
}
|
||||
|
||||
fn supports_reset(&self) -> bool {
|
||||
self.map_bool(|w| w.supports_reset())
|
||||
}
|
||||
|
||||
fn supports_color(&self) -> bool {
|
||||
self.map_bool(|w| w.supports_color())
|
||||
}
|
||||
|
||||
fn cursor_up(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.cursor_up())
|
||||
}
|
||||
|
||||
fn delete_line(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.delete_line())
|
||||
}
|
||||
|
||||
fn carriage_return(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.carriage_return())
|
||||
}
|
||||
|
||||
fn get_ref(&self) -> &W {
|
||||
match *self {
|
||||
Colored(ref w) => w.get_ref(),
|
||||
NoColor(ref w) => w,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_mut(&mut self) -> &mut W {
|
||||
match *self {
|
||||
Colored(ref mut w) => w.get_mut(),
|
||||
NoColor(ref mut w) => w,
|
||||
}
|
||||
}
|
||||
|
||||
fn into_inner(self) -> W {
|
||||
match self {
|
||||
Colored(w) => w.into_inner(),
|
||||
NoColor(w) => w,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
294
src/search_buffer.rs
Normal file
294
src/search_buffer.rs
Normal file
@@ -0,0 +1,294 @@
|
||||
/*!
|
||||
The search_buffer module is responsible for searching a single file all in a
|
||||
single buffer. Typically, the source of the buffer is a memory map. This can
|
||||
be useful for when memory maps are faster than streaming search.
|
||||
|
||||
Note that this module doesn't quite support everything that search_stream does.
|
||||
Notably, showing contexts.
|
||||
*/
|
||||
use std::cmp;
|
||||
use std::path::Path;
|
||||
|
||||
use grep::Grep;
|
||||
use term::Terminal;
|
||||
|
||||
use printer::Printer;
|
||||
use search_stream::{IterLines, Options, count_lines, is_binary};
|
||||
|
||||
pub struct BufferSearcher<'a, W: 'a> {
|
||||
opts: Options,
|
||||
printer: &'a mut Printer<W>,
|
||||
grep: &'a Grep,
|
||||
path: &'a Path,
|
||||
buf: &'a [u8],
|
||||
match_count: u64,
|
||||
line_count: Option<u64>,
|
||||
last_line: usize,
|
||||
}
|
||||
|
||||
impl<'a, W: Send + Terminal> BufferSearcher<'a, W> {
|
||||
pub fn new(
|
||||
printer: &'a mut Printer<W>,
|
||||
grep: &'a Grep,
|
||||
path: &'a Path,
|
||||
buf: &'a [u8],
|
||||
) -> BufferSearcher<'a, W> {
|
||||
BufferSearcher {
|
||||
opts: Options::default(),
|
||||
printer: printer,
|
||||
grep: grep,
|
||||
path: path,
|
||||
buf: buf,
|
||||
match_count: 0,
|
||||
line_count: None,
|
||||
last_line: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// If enabled, searching will print a count instead of each match.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn count(mut self, yes: bool) -> Self {
|
||||
self.opts.count = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the end-of-line byte used by this searcher.
|
||||
pub fn eol(mut self, eol: u8) -> Self {
|
||||
self.opts.eol = eol;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, matching is inverted so that lines that *don't* match the
|
||||
/// given pattern are treated as matches.
|
||||
pub fn invert_match(mut self, yes: bool) -> Self {
|
||||
self.opts.invert_match = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, compute line numbers and prefix each line of output with
|
||||
/// them.
|
||||
pub fn line_number(mut self, yes: bool) -> Self {
|
||||
self.opts.line_number = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, search binary files as if they were text.
|
||||
pub fn text(mut self, yes: bool) -> Self {
|
||||
self.opts.text = yes;
|
||||
self
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
pub fn run(mut self) -> u64 {
|
||||
let binary_upto = cmp::min(4096, self.buf.len());
|
||||
if !self.opts.text && is_binary(&self.buf[..binary_upto]) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
self.match_count = 0;
|
||||
self.line_count = if self.opts.line_number { Some(0) } else { None };
|
||||
let mut last_end = 0;
|
||||
for m in self.grep.iter(self.buf) {
|
||||
if self.opts.invert_match {
|
||||
self.print_inverted_matches(last_end, m.start());
|
||||
} else {
|
||||
self.print_match(m.start(), m.end());
|
||||
}
|
||||
last_end = m.end();
|
||||
}
|
||||
if self.opts.invert_match {
|
||||
let upto = self.buf.len();
|
||||
self.print_inverted_matches(last_end, upto);
|
||||
}
|
||||
if self.opts.count && self.match_count > 0 {
|
||||
self.printer.path_count(self.path, self.match_count);
|
||||
}
|
||||
self.match_count
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn print_match(&mut self, start: usize, end: usize) {
|
||||
self.match_count += 1;
|
||||
if self.opts.count {
|
||||
return;
|
||||
}
|
||||
self.count_lines(start);
|
||||
self.add_line(end);
|
||||
self.printer.matched(
|
||||
self.grep.regex(), self.path, self.buf,
|
||||
start, end, self.line_count);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn print_inverted_matches(&mut self, start: usize, end: usize) {
|
||||
debug_assert!(self.opts.invert_match);
|
||||
let mut it = IterLines::new(self.opts.eol, start);
|
||||
while let Some((s, e)) = it.next(&self.buf[..end]) {
|
||||
self.print_match(s, e);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn count_lines(&mut self, upto: usize) {
|
||||
if let Some(ref mut line_count) = self.line_count {
|
||||
*line_count += count_lines(
|
||||
&self.buf[self.last_line..upto], self.opts.eol);
|
||||
self.last_line = upto;
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn add_line(&mut self, line_end: usize) {
|
||||
if let Some(ref mut line_count) = self.line_count {
|
||||
*line_count += 1;
|
||||
self.last_line = line_end;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
|
||||
use grep::{Grep, GrepBuilder};
|
||||
use term::Terminal;
|
||||
|
||||
use out::OutBuffer;
|
||||
use printer::Printer;
|
||||
|
||||
use super::BufferSearcher;
|
||||
|
||||
const SHERLOCK: &'static str = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.\
|
||||
";
|
||||
|
||||
const CODE: &'static str = "\
|
||||
extern crate snap;
|
||||
|
||||
use std::io;
|
||||
|
||||
fn main() {
|
||||
let stdin = io::stdin();
|
||||
let stdout = io::stdout();
|
||||
|
||||
// Wrap the stdin reader in a Snappy reader.
|
||||
let mut rdr = snap::Reader::new(stdin.lock());
|
||||
let mut wtr = stdout.lock();
|
||||
io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
|
||||
}
|
||||
";
|
||||
|
||||
fn matcher(pat: &str) -> Grep {
|
||||
GrepBuilder::new(pat).build().unwrap()
|
||||
}
|
||||
|
||||
fn test_path() -> &'static Path {
|
||||
&Path::new("/baz.rs")
|
||||
}
|
||||
|
||||
type TestSearcher<'a> = BufferSearcher<'a, OutBuffer>;
|
||||
|
||||
fn search<F: FnMut(TestSearcher) -> TestSearcher>(
|
||||
pat: &str,
|
||||
haystack: &str,
|
||||
mut map: F,
|
||||
) -> (u64, String) {
|
||||
let outbuf = OutBuffer::NoColor(vec![]);
|
||||
let mut pp = Printer::new(outbuf).with_filename(true);
|
||||
let grep = GrepBuilder::new(pat).build().unwrap();
|
||||
let count = {
|
||||
let searcher = BufferSearcher::new(
|
||||
&mut pp, &grep, test_path(), haystack.as_bytes());
|
||||
map(searcher).run()
|
||||
};
|
||||
(count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_search() {
|
||||
let (count, out) = search("Sherlock", SHERLOCK, |s|s);
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn binary() {
|
||||
let text = "Sherlock\n\x00Holmes\n";
|
||||
let (count, out) = search("Sherlock|Holmes", text, |s|s);
|
||||
assert_eq!(0, count);
|
||||
assert_eq!(out, "");
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn binary_text() {
|
||||
let text = "Sherlock\n\x00Holmes\n";
|
||||
let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true));
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn line_numbers() {
|
||||
let (count, out) = search(
|
||||
"Sherlock", SHERLOCK, |s| s.line_number(true));
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn count() {
|
||||
let (count, out) = search(
|
||||
"Sherlock", SHERLOCK, |s| s.count(true));
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "/baz.rs:2\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invert_match() {
|
||||
let (count, out) = search(
|
||||
"Sherlock", SHERLOCK, |s| s.invert_match(true));
|
||||
assert_eq!(4, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:Holmeses, success in the province of detective work must always
|
||||
/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
/baz.rs:but Doctor Watson has to have it taken out for him and dusted,
|
||||
/baz.rs:and exhibited clearly, with a label attached.
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invert_match_line_numbers() {
|
||||
let (count, out) = search("Sherlock", SHERLOCK, |s| {
|
||||
s.invert_match(true).line_number(true)
|
||||
});
|
||||
assert_eq!(4, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:2:Holmeses, success in the province of detective work must always
|
||||
/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted,
|
||||
/baz.rs:6:and exhibited clearly, with a label attached.
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invert_match_count() {
|
||||
let (count, out) = search("Sherlock", SHERLOCK, |s| {
|
||||
s.invert_match(true).count(true)
|
||||
});
|
||||
assert_eq!(4, count);
|
||||
assert_eq!(out, "/baz.rs:4\n");
|
||||
}
|
||||
}
|
@@ -1,6 +1,7 @@
|
||||
/*!
|
||||
The search module is responsible for searching a single file and printing
|
||||
matches.
|
||||
The search_stream module is responsible for searching a single file and
|
||||
printing matches. In particular, it searches the file in a streaming fashion
|
||||
using `read` calls and a (roughly) fixed size buffer.
|
||||
*/
|
||||
|
||||
use std::cmp;
|
||||
@@ -11,6 +12,7 @@ use std::path::{Path, PathBuf};
|
||||
|
||||
use grep::{Grep, Match};
|
||||
use memchr::{memchr, memrchr};
|
||||
use term::Terminal;
|
||||
|
||||
use printer::Printer;
|
||||
|
||||
@@ -74,14 +76,14 @@ pub struct Searcher<'a, R, W: 'a> {
|
||||
|
||||
/// Options for configuring search.
|
||||
#[derive(Clone)]
|
||||
struct Options {
|
||||
after_context: usize,
|
||||
before_context: usize,
|
||||
count: bool,
|
||||
eol: u8,
|
||||
invert_match: bool,
|
||||
line_number: bool,
|
||||
text: bool,
|
||||
pub struct Options {
|
||||
pub after_context: usize,
|
||||
pub before_context: usize,
|
||||
pub count: bool,
|
||||
pub eol: u8,
|
||||
pub invert_match: bool,
|
||||
pub line_number: bool,
|
||||
pub text: bool,
|
||||
}
|
||||
|
||||
impl Default for Options {
|
||||
@@ -98,7 +100,7 @@ impl Default for Options {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {
|
||||
impl<'a, R: io::Read, W: Send + Terminal> Searcher<'a, R, W> {
|
||||
/// Create a new searcher.
|
||||
///
|
||||
/// `inp` is a reusable input buffer that is used as scratch space by this
|
||||
@@ -219,14 +221,11 @@ impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {
|
||||
self.print_inverted_matches(upto);
|
||||
}
|
||||
} else if matched {
|
||||
self.match_count += 1;
|
||||
if !self.opts.count {
|
||||
let start = self.last_match.start();
|
||||
let end = self.last_match.end();
|
||||
self.print_after_context(start);
|
||||
self.print_before_context(start);
|
||||
self.print_match(start, end);
|
||||
}
|
||||
let start = self.last_match.start();
|
||||
let end = self.last_match.end();
|
||||
self.print_after_context(start);
|
||||
self.print_before_context(start);
|
||||
self.print_match(start, end);
|
||||
}
|
||||
if matched {
|
||||
self.inp.pos = self.last_match.end();
|
||||
@@ -275,11 +274,8 @@ impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {
|
||||
debug_assert!(self.opts.invert_match);
|
||||
let mut it = IterLines::new(self.opts.eol, self.inp.pos);
|
||||
while let Some((start, end)) = it.next(&self.inp.buf[..upto]) {
|
||||
if !self.opts.count {
|
||||
self.print_match(start, end);
|
||||
}
|
||||
self.print_match(start, end);
|
||||
self.inp.pos = end;
|
||||
self.match_count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -325,11 +321,15 @@ impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {
|
||||
|
||||
#[inline(always)]
|
||||
fn print_match(&mut self, start: usize, end: usize) {
|
||||
self.match_count += 1;
|
||||
if self.opts.count {
|
||||
return;
|
||||
}
|
||||
self.print_separator(start);
|
||||
self.count_lines(start);
|
||||
self.add_line(end);
|
||||
self.printer.matched(
|
||||
self.grep.regex(), &self.path,
|
||||
self.grep.regex(), self.path,
|
||||
&self.inp.buf, start, end, self.line_count);
|
||||
self.last_printed = end;
|
||||
self.after_context_remaining = self.opts.after_context;
|
||||
@@ -535,7 +535,7 @@ impl InputBuffer {
|
||||
///
|
||||
/// Note that this may return both false positives and false negatives.
|
||||
#[inline(always)]
|
||||
fn is_binary(buf: &[u8]) -> bool {
|
||||
pub fn is_binary(buf: &[u8]) -> bool {
|
||||
if buf.len() >= 4 && &buf[0..4] == b"%PDF" {
|
||||
return true;
|
||||
}
|
||||
@@ -543,13 +543,88 @@ fn is_binary(buf: &[u8]) -> bool {
|
||||
}
|
||||
|
||||
/// Count the number of lines in the given buffer.
|
||||
#[inline(always)]
|
||||
fn count_lines(mut buf: &[u8], eol: u8) -> u64 {
|
||||
let mut count = 0;
|
||||
while let Some(pos) = memchr(eol, buf) {
|
||||
count += 1;
|
||||
buf = &buf[pos + 1..];
|
||||
#[inline(never)]
|
||||
|
||||
#[inline(never)]
|
||||
pub fn count_lines(buf: &[u8], eol: u8) -> u64 {
|
||||
// This was adapted from code in the memchr crate. The specific benefit
|
||||
// here is that we can avoid a branch in the inner loop because all we're
|
||||
// doing is counting.
|
||||
|
||||
// The technique to count EOL bytes was adapted from:
|
||||
// http://bits.stephan-brumme.com/null.html
|
||||
const LO_U64: u64 = 0x0101010101010101;
|
||||
const HI_U64: u64 = 0x8080808080808080;
|
||||
|
||||
// use truncation
|
||||
const LO_USIZE: usize = LO_U64 as usize;
|
||||
const HI_USIZE: usize = HI_U64 as usize;
|
||||
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
const USIZE_BYTES: usize = 4;
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
const USIZE_BYTES: usize = 8;
|
||||
|
||||
fn count_eol(eol: usize) -> u64 {
|
||||
// Ideally, this would compile down to a POPCNT instruction, but
|
||||
// it looks like you need to set RUSTFLAGS="-C target-cpu=native"
|
||||
// (or target-feature=+popcnt) to get that to work. Bummer.
|
||||
(eol.wrapping_sub(LO_USIZE) & !eol & HI_USIZE).count_ones() as u64
|
||||
}
|
||||
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
fn repeat_byte(b: u8) -> usize {
|
||||
let mut rep = (b as usize) << 8 | b as usize;
|
||||
rep = rep << 16 | rep;
|
||||
rep
|
||||
}
|
||||
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
fn repeat_byte(b: u8) -> usize {
|
||||
let mut rep = (b as usize) << 8 | b as usize;
|
||||
rep = rep << 16 | rep;
|
||||
rep = rep << 32 | rep;
|
||||
rep
|
||||
}
|
||||
|
||||
fn count_lines_slow(mut buf: &[u8], eol: u8) -> u64 {
|
||||
let mut count = 0;
|
||||
while let Some(pos) = memchr(eol, buf) {
|
||||
count += 1;
|
||||
buf = &buf[pos + 1..];
|
||||
}
|
||||
count
|
||||
}
|
||||
|
||||
let len = buf.len();
|
||||
let ptr = buf.as_ptr();
|
||||
let mut count = 0;
|
||||
|
||||
// Search up to an aligned boundary...
|
||||
let align = (ptr as usize) & (USIZE_BYTES - 1);
|
||||
let mut i = 0;
|
||||
if align > 0 {
|
||||
i = cmp::min(USIZE_BYTES - align, len);
|
||||
count += count_lines_slow(&buf[..i], eol);
|
||||
}
|
||||
|
||||
// ... and search the rest.
|
||||
let repeated_eol = repeat_byte(eol);
|
||||
|
||||
if len >= 2 * USIZE_BYTES {
|
||||
while i <= len - (2 * USIZE_BYTES) {
|
||||
unsafe {
|
||||
let u = *(ptr.offset(i as isize) as *const usize);
|
||||
let v = *(ptr.offset((i + USIZE_BYTES) as isize)
|
||||
as *const usize);
|
||||
|
||||
count += count_eol(u ^ repeated_eol);
|
||||
count += count_eol(v ^ repeated_eol);
|
||||
}
|
||||
i += USIZE_BYTES * 2;
|
||||
}
|
||||
}
|
||||
count += count_lines_slow(&buf[i..], eol);
|
||||
count
|
||||
}
|
||||
|
||||
@@ -575,7 +650,7 @@ fn replace_buf(buf: &mut [u8], a: u8, b: u8) {
|
||||
/// advance over the positions of each line. We neglect that approach to avoid
|
||||
/// the borrow in the search code. (Because the borrow prevents composition
|
||||
/// through other mutable methods.)
|
||||
struct IterLines {
|
||||
pub struct IterLines {
|
||||
eol: u8,
|
||||
pos: usize,
|
||||
}
|
||||
@@ -585,7 +660,7 @@ impl IterLines {
|
||||
///
|
||||
/// The buffer is passed to the `next` method.
|
||||
#[inline(always)]
|
||||
fn new(eol: u8, start: usize) -> IterLines {
|
||||
pub fn new(eol: u8, start: usize) -> IterLines {
|
||||
IterLines {
|
||||
eol: eol,
|
||||
pos: start,
|
||||
@@ -597,7 +672,7 @@ impl IterLines {
|
||||
///
|
||||
/// The range returned includes the new line.
|
||||
#[inline(always)]
|
||||
fn next(&mut self, buf: &[u8]) -> Option<(usize, usize)> {
|
||||
pub fn next(&mut self, buf: &[u8]) -> Option<(usize, usize)> {
|
||||
match memchr(self.eol, &buf[self.pos..]) {
|
||||
None => {
|
||||
if self.pos < buf.len() {
|
||||
@@ -689,13 +764,14 @@ mod tests {
|
||||
use std::path::Path;
|
||||
|
||||
use grep::{Grep, GrepBuilder};
|
||||
use term::Terminal;
|
||||
|
||||
use out::OutBuffer;
|
||||
use printer::Printer;
|
||||
|
||||
use super::{InputBuffer, Searcher, start_of_previous_lines};
|
||||
|
||||
lazy_static! {
|
||||
static ref SHERLOCK: &'static str = "\
|
||||
const SHERLOCK: &'static str = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
@@ -703,7 +779,8 @@ can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.\
|
||||
";
|
||||
static ref CODE: &'static str = "\
|
||||
|
||||
const CODE: &'static str = "\
|
||||
extern crate snap;
|
||||
|
||||
use std::io;
|
||||
@@ -718,7 +795,6 @@ fn main() {
|
||||
io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
|
||||
}
|
||||
";
|
||||
}
|
||||
|
||||
fn hay(s: &str) -> io::Cursor<Vec<u8>> {
|
||||
io::Cursor::new(s.to_string().into_bytes())
|
||||
@@ -732,7 +808,7 @@ fn main() {
|
||||
&Path::new("/baz.rs")
|
||||
}
|
||||
|
||||
type TestSearcher<'a> = Searcher<'a, io::Cursor<Vec<u8>>, Vec<u8>>;
|
||||
type TestSearcher<'a> = Searcher<'a, io::Cursor<Vec<u8>>, OutBuffer>;
|
||||
|
||||
fn search_smallcap<F: FnMut(TestSearcher) -> TestSearcher>(
|
||||
pat: &str,
|
||||
@@ -740,14 +816,15 @@ fn main() {
|
||||
mut map: F,
|
||||
) -> (u64, String) {
|
||||
let mut inp = InputBuffer::with_capacity(1);
|
||||
let mut pp = Printer::new(vec![], false).with_filename(true);
|
||||
let outbuf = OutBuffer::NoColor(vec![]);
|
||||
let mut pp = Printer::new(outbuf).with_filename(true);
|
||||
let grep = GrepBuilder::new(pat).build().unwrap();
|
||||
let count = {
|
||||
let searcher = Searcher::new(
|
||||
&mut inp, &mut pp, &grep, test_path(), hay(haystack));
|
||||
map(searcher).run().unwrap()
|
||||
};
|
||||
(count, String::from_utf8(pp.into_inner()).unwrap())
|
||||
(count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
|
||||
}
|
||||
|
||||
fn search<F: FnMut(TestSearcher) -> TestSearcher>(
|
||||
@@ -756,14 +833,15 @@ fn main() {
|
||||
mut map: F,
|
||||
) -> (u64, String) {
|
||||
let mut inp = InputBuffer::with_capacity(4096);
|
||||
let mut pp = Printer::new(vec![], false).with_filename(true);
|
||||
let outbuf = OutBuffer::NoColor(vec![]);
|
||||
let mut pp = Printer::new(outbuf).with_filename(true);
|
||||
let grep = GrepBuilder::new(pat).build().unwrap();
|
||||
let count = {
|
||||
let searcher = Searcher::new(
|
||||
&mut inp, &mut pp, &grep, test_path(), hay(haystack));
|
||||
map(searcher).run().unwrap()
|
||||
};
|
||||
(count, String::from_utf8(pp.into_inner()).unwrap())
|
||||
(count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -870,8 +948,8 @@ fn main() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_search() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s|s);
|
||||
fn basic_search1() {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s|s);
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
@@ -887,7 +965,6 @@ fn main() {
|
||||
assert_eq!(out, "");
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn binary_text() {
|
||||
let text = "Sherlock\n\x00Holmes\n";
|
||||
@@ -899,7 +976,7 @@ fn main() {
|
||||
#[test]
|
||||
fn line_numbers() {
|
||||
let (count, out) = search_smallcap(
|
||||
"Sherlock", &*SHERLOCK, |s| s.line_number(true));
|
||||
"Sherlock", SHERLOCK, |s| s.line_number(true));
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
@@ -910,7 +987,7 @@ fn main() {
|
||||
#[test]
|
||||
fn count() {
|
||||
let (count, out) = search_smallcap(
|
||||
"Sherlock", &*SHERLOCK, |s| s.count(true));
|
||||
"Sherlock", SHERLOCK, |s| s.count(true));
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "/baz.rs:2\n");
|
||||
}
|
||||
@@ -918,7 +995,7 @@ fn main() {
|
||||
#[test]
|
||||
fn invert_match() {
|
||||
let (count, out) = search_smallcap(
|
||||
"Sherlock", &*SHERLOCK, |s| s.invert_match(true));
|
||||
"Sherlock", SHERLOCK, |s| s.invert_match(true));
|
||||
assert_eq!(4, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:Holmeses, success in the province of detective work must always
|
||||
@@ -930,7 +1007,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn invert_match_line_numbers() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.invert_match(true).line_number(true)
|
||||
});
|
||||
assert_eq!(4, count);
|
||||
@@ -944,7 +1021,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn invert_match_count() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.invert_match(true).count(true)
|
||||
});
|
||||
assert_eq!(4, count);
|
||||
@@ -953,7 +1030,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_one1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).before_context(1)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -966,7 +1043,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_invert_one1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).before_context(1).invert_match(true)
|
||||
});
|
||||
assert_eq!(4, count);
|
||||
@@ -982,7 +1059,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_invert_one2() {
|
||||
let (count, out) = search_smallcap(" a ", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap(" a ", SHERLOCK, |s| {
|
||||
s.line_number(true).before_context(1).invert_match(true)
|
||||
});
|
||||
assert_eq!(3, count);
|
||||
@@ -997,7 +1074,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_two1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).before_context(2)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1010,7 +1087,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_two2() {
|
||||
let (count, out) = search_smallcap("dusted", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("dusted", SHERLOCK, |s| {
|
||||
s.line_number(true).before_context(2)
|
||||
});
|
||||
assert_eq!(1, count);
|
||||
@@ -1024,7 +1101,7 @@ fn main() {
|
||||
#[test]
|
||||
fn before_context_two3() {
|
||||
let (count, out) = search_smallcap(
|
||||
"success|attached", &*SHERLOCK, |s| {
|
||||
"success|attached", SHERLOCK, |s| {
|
||||
s.line_number(true).before_context(2)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1040,7 +1117,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_two4() {
|
||||
let (count, out) = search("stdin", &*CODE, |s| {
|
||||
let (count, out) = search("stdin", CODE, |s| {
|
||||
s.line_number(true).before_context(2)
|
||||
});
|
||||
assert_eq!(3, count);
|
||||
@@ -1057,7 +1134,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_two5() {
|
||||
let (count, out) = search("stdout", &*CODE, |s| {
|
||||
let (count, out) = search("stdout", CODE, |s| {
|
||||
s.line_number(true).before_context(2)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1074,7 +1151,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_three1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).before_context(3)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1087,7 +1164,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn after_context_one1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).after_context(1)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1101,7 +1178,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn after_context_invert_one1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).after_context(1).invert_match(true)
|
||||
});
|
||||
assert_eq!(4, count);
|
||||
@@ -1116,7 +1193,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn after_context_invert_one2() {
|
||||
let (count, out) = search_smallcap(" a ", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap(" a ", SHERLOCK, |s| {
|
||||
s.line_number(true).after_context(1).invert_match(true)
|
||||
});
|
||||
assert_eq!(3, count);
|
||||
@@ -1132,7 +1209,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn after_context_two1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).after_context(2)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1147,7 +1224,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn after_context_two2() {
|
||||
let (count, out) = search_smallcap("dusted", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("dusted", SHERLOCK, |s| {
|
||||
s.line_number(true).after_context(2)
|
||||
});
|
||||
assert_eq!(1, count);
|
||||
@@ -1160,7 +1237,7 @@ fn main() {
|
||||
#[test]
|
||||
fn after_context_two3() {
|
||||
let (count, out) = search_smallcap(
|
||||
"success|attached", &*SHERLOCK, |s| {
|
||||
"success|attached", SHERLOCK, |s| {
|
||||
s.line_number(true).after_context(2)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1175,7 +1252,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn after_context_three1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).after_context(3)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1192,7 +1269,7 @@ fn main() {
|
||||
#[test]
|
||||
fn before_after_context_two1() {
|
||||
let (count, out) = search(
|
||||
r"fn main|let mut rdr", &*CODE, |s| {
|
||||
r"fn main|let mut rdr", CODE, |s| {
|
||||
s.line_number(true).after_context(2).before_context(2)
|
||||
});
|
||||
assert_eq!(2, count);
|
24
tests/hay.rs
Normal file
24
tests/hay.rs
Normal file
@@ -0,0 +1,24 @@
|
||||
pub const SHERLOCK: &'static str = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.
|
||||
";
|
||||
|
||||
pub const CODE: &'static str = "\
|
||||
extern crate snap;
|
||||
|
||||
use std::io;
|
||||
|
||||
fn main() {
|
||||
let stdin = io::stdin();
|
||||
let stdout = io::stdout();
|
||||
|
||||
// Wrap the stdin reader in a Snappy reader.
|
||||
let mut rdr = snap::Reader::new(stdin.lock());
|
||||
let mut wtr = stdout.lock();
|
||||
io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
|
||||
}
|
||||
";
|
577
tests/tests.rs
Normal file
577
tests/tests.rs
Normal file
@@ -0,0 +1,577 @@
|
||||
/*!
|
||||
This module contains *integration* tests. Their purpose is to test the CLI
|
||||
interface. Namely, that passing a flag does what it says on the tin.
|
||||
|
||||
Tests for more fine grained behavior (like the search or the globber) should be
|
||||
unit tests in their respective modules.
|
||||
*/
|
||||
|
||||
#![allow(dead_code, unused_imports)]
|
||||
|
||||
use std::process::Command;
|
||||
|
||||
use workdir::WorkDir;
|
||||
|
||||
mod hay;
|
||||
mod workdir;
|
||||
|
||||
macro_rules! sherlock {
|
||||
($name:ident, $fun:expr) => {
|
||||
sherlock!($name, "Sherlock", $fun);
|
||||
};
|
||||
($name:ident, $query:expr, $fun:expr) => {
|
||||
sherlock!($name, $query, "sherlock", $fun);
|
||||
};
|
||||
($name:ident, $query:expr, $path:expr, $fun:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let wd = WorkDir::new(stringify!($name));
|
||||
wd.create("sherlock", hay::SHERLOCK);
|
||||
let mut cmd = wd.command();
|
||||
cmd.arg($query).arg($path);
|
||||
$fun(wd, cmd);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
sherlock!(single_file, |wd: WorkDir, mut cmd| {
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(dir, "Sherlock", ".", |wd: WorkDir, mut cmd| {
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(line_numbers, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-n");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
3:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(columns, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("--column");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
57:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
49:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(with_filename, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-H");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(with_heading, |wd: WorkDir, mut cmd: Command| {
|
||||
// This forces the issue since --with-filename is disabled by default
|
||||
// when searching one fil.e
|
||||
cmd.arg("--with-filename").arg("--heading");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(with_heading_default, "Sherlock", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
// Search two or more and get --with-filename enabled by default.
|
||||
// Use -j1 to get deterministic results.
|
||||
wd.create("foo", "Sherlock Holmes lives on Baker Street.");
|
||||
cmd.arg("-j1").arg("--heading");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected1 = "\
|
||||
foo
|
||||
Sherlock Holmes lives on Baker Street.
|
||||
|
||||
sherlock
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
let expected2 = "\
|
||||
sherlock
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
|
||||
foo
|
||||
Sherlock Holmes lives on Baker Street.
|
||||
";
|
||||
assert!(lines == expected1 || lines == expected2);
|
||||
});
|
||||
|
||||
sherlock!(inverted, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-v");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
Holmeses, success in the province of detective work must always
|
||||
can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(inverted_line_numbers, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-n").arg("-v");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
2:Holmeses, success in the province of detective work must always
|
||||
4:can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
5:but Doctor Watson has to have it taken out for him and dusted,
|
||||
6:and exhibited clearly, with a label attached.
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(case_insensitive, "sherlock", |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-i");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(word, "as", |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-w");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(literal, "()", "file", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("file", "blib\n()\nblab\n");
|
||||
cmd.arg("-Q");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "()\n");
|
||||
});
|
||||
|
||||
sherlock!(quiet, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-q");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert!(lines.is_empty());
|
||||
});
|
||||
|
||||
sherlock!(replace, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-r").arg("FooBar");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the FooBar
|
||||
be, to a very large extent, the result of luck. FooBar Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(replace_groups, "([A-Z][a-z]+) ([A-Z][a-z]+)",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-r").arg("$2, $1");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Watsons, Doctor of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Holmes, Sherlock
|
||||
but Watson, Doctor has to have it taken out for him and dusted,
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(replace_named_groups, "(?P<first>[A-Z][a-z]+) (?P<last>[A-Z][a-z]+)",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-r").arg("$last, $first");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Watsons, Doctor of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Holmes, Sherlock
|
||||
but Watson, Doctor has to have it taken out for him and dusted,
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(file_types, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("file.py", "Sherlock");
|
||||
wd.create("file.rs", "Sherlock");
|
||||
cmd.arg("-t").arg("rust");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "file.rs:Sherlock\n");
|
||||
});
|
||||
|
||||
sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create("file.py", "Sherlock");
|
||||
wd.create("file.rs", "Sherlock");
|
||||
cmd.arg("-T").arg("rust");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "file.py:Sherlock\n");
|
||||
});
|
||||
|
||||
sherlock!(file_type_clear, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("file.py", "Sherlock");
|
||||
wd.create("file.rs", "Sherlock");
|
||||
cmd.arg("--type-clear").arg("rust").arg("-t").arg("rust");
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
sherlock!(file_type_add, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("file.py", "Sherlock");
|
||||
wd.create("file.rs", "Sherlock");
|
||||
wd.create("file.wat", "Sherlock");
|
||||
cmd.arg("--type-add").arg("wat:*.wat").arg("-t").arg("wat");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "file.wat:Sherlock\n");
|
||||
});
|
||||
|
||||
sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("file.py", "Sherlock");
|
||||
wd.create("file.rs", "Sherlock");
|
||||
cmd.arg("-g").arg("*.rs");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "file.rs:Sherlock\n");
|
||||
});
|
||||
|
||||
sherlock!(glob_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create("file.py", "Sherlock");
|
||||
wd.create("file.rs", "Sherlock");
|
||||
cmd.arg("-g").arg("!*.rs");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "file.py:Sherlock\n");
|
||||
});
|
||||
|
||||
sherlock!(after_context, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-A").arg("1");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(after_context_line_numbers, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-A").arg("1").arg("-n");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2-Holmeses, success in the province of detective work must always
|
||||
3:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
4-can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(before_context, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-B").arg("1");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(before_context_line_numbers, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-B").arg("1").arg("-n");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2-Holmeses, success in the province of detective work must always
|
||||
3:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(context, "world|attached", |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-C").arg("1");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
--
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(context_line_numbers, "world|attached",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-C").arg("1").arg("-n");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2-Holmeses, success in the province of detective work must always
|
||||
--
|
||||
5-but Doctor Watson has to have it taken out for him and dusted,
|
||||
6:and exhibited clearly, with a label attached.
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create(".sherlock", hay::SHERLOCK);
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
sherlock!(no_ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create(".sherlock", hay::SHERLOCK);
|
||||
|
||||
cmd.arg("--hidden");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(ignore_git, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".gitignore", "sherlock\n");
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
sherlock!(ignore_ripgrep, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".rgignore", "sherlock\n");
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
sherlock!(no_ignore, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".gitignore", "sherlock\n");
|
||||
cmd.arg("--no-ignore");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(ignore_git_parent, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create(".gitignore", "sherlock\n");
|
||||
wd.create_dir(".git");
|
||||
wd.create_dir("foo");
|
||||
wd.create("foo/sherlock", hay::SHERLOCK);
|
||||
// Even though we search in foo/, which has no .gitignore, ripgrep will
|
||||
// search parent directories and respect the gitignore files found.
|
||||
cmd.current_dir(wd.path().join("foo"));
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
sherlock!(ignore_git_parent_stop, "Sherlock", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
// This tests that searching parent directories for .gitignore files stops
|
||||
// after it sees a .git directory. To test this, we create this directory
|
||||
// hierarchy:
|
||||
//
|
||||
// .gitignore (contains `sherlock`)
|
||||
// foo/
|
||||
// .git
|
||||
// bar/
|
||||
// sherlock
|
||||
//
|
||||
// And we perform the search inside `foo/bar/`. ripgrep will stop looking
|
||||
// for .gitignore files after it sees `foo/.git/`, and therefore not
|
||||
// respect the top-level `.gitignore` containing `sherlock`.
|
||||
wd.remove("sherlock");
|
||||
wd.create(".gitignore", "sherlock\n");
|
||||
wd.create_dir("foo");
|
||||
wd.create_dir("foo/.git");
|
||||
wd.create_dir("foo/bar");
|
||||
wd.create("foo/bar/sherlock", hay::SHERLOCK);
|
||||
cmd.current_dir(wd.path().join("foo").join("bar"));
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(ignore_ripgrep_parent_no_stop, "Sherlock", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
// This is like the `ignore_git_parent_stop` test, except it checks that
|
||||
// ripgrep *doesn't* stop checking for .rgignore files.
|
||||
wd.remove("sherlock");
|
||||
wd.create(".rgignore", "sherlock\n");
|
||||
wd.create_dir("foo");
|
||||
wd.create_dir("foo/.git");
|
||||
wd.create_dir("foo/bar");
|
||||
wd.create("foo/bar/sherlock", hay::SHERLOCK);
|
||||
cmd.current_dir(wd.path().join("foo").join("bar"));
|
||||
// The top-level .rgignore applies.
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
sherlock!(no_parent_ignore_git, "Sherlock", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
// Set up a directory hierarchy like this:
|
||||
//
|
||||
// .gitignore
|
||||
// foo/
|
||||
// .gitignore
|
||||
// sherlock
|
||||
// watson
|
||||
//
|
||||
// Where `.gitignore` contains `sherlock` and `foo/.gitignore` contains
|
||||
// `watson`.
|
||||
//
|
||||
// Now *do the search* from the foo directory. By default, ripgrep will
|
||||
// search parent directories for .gitignore files. The --no-ignore-parent
|
||||
// flag should prevent that. At the same time, the `foo/.gitignore` file
|
||||
// will still be respected (since the search is happening in `foo/`).
|
||||
//
|
||||
// In other words, we should only see results from `sherlock`, not from
|
||||
// `watson`.
|
||||
wd.remove("sherlock");
|
||||
wd.create(".gitignore", "sherlock\n");
|
||||
wd.create_dir("foo");
|
||||
wd.create("foo/.gitignore", "watson\n");
|
||||
wd.create("foo/sherlock", hay::SHERLOCK);
|
||||
wd.create("foo/watson", hay::SHERLOCK);
|
||||
cmd.current_dir(wd.path().join("foo"));
|
||||
cmd.arg("--no-ignore-parent");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(symlink_nofollow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create_dir("foo");
|
||||
wd.create_dir("foo/bar");
|
||||
wd.link("foo/baz", "foo/bar/baz");
|
||||
wd.create_dir("foo/baz");
|
||||
wd.create("foo/baz/sherlock", hay::SHERLOCK);
|
||||
cmd.current_dir(wd.path().join("foo/bar"));
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
sherlock!(symlink_follow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create_dir("foo");
|
||||
wd.create_dir("foo/bar");
|
||||
wd.create_dir("foo/baz");
|
||||
wd.create("foo/baz/sherlock", hay::SHERLOCK);
|
||||
wd.link("foo/baz", "foo/bar/baz");
|
||||
cmd.arg("-L");
|
||||
cmd.current_dir(wd.path().join("foo/bar"));
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
if cfg!(windows) {
|
||||
let expected = "\
|
||||
baz\\sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
baz\\sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
} else {
|
||||
let expected = "\
|
||||
baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
}
|
||||
});
|
||||
|
||||
#[test]
|
||||
fn binary_nosearch() {
|
||||
let wd = WorkDir::new("binary_nosearch");
|
||||
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
|
||||
let mut cmd = wd.command();
|
||||
cmd.arg("foo").arg("file");
|
||||
wd.assert_err(&mut cmd);
|
||||
}
|
||||
|
||||
// The following two tests show a discrepancy in search results between
|
||||
// searching with memory mapped files and stream searching. Stream searching
|
||||
// uses a heuristic (that GNU grep also uses) where NUL bytes are replaced with
|
||||
// the EOL terminator, which tends to avoid allocating large amounts of memory
|
||||
// for really long "lines." The memory map searcher has no need to worry about
|
||||
// such things, and more than that, it would be pretty hard for it to match
|
||||
// the semantics of streaming search in this case.
|
||||
//
|
||||
// Binary files with lots of NULs aren't really part of the use case of ripgrep
|
||||
// (or any other grep-like tool for that matter), so we shouldn't feel too bad
|
||||
// about it.
|
||||
#[test]
|
||||
fn binary_search_mmap() {
|
||||
let wd = WorkDir::new("binary_search_mmap");
|
||||
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
|
||||
let mut cmd = wd.command();
|
||||
cmd.arg("-a").arg("--mmap").arg("foo").arg("file");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "foo\x00bar\nfoo\x00baz\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn binary_search_no_mmap() {
|
||||
let wd = WorkDir::new("binary_search_no_mmap");
|
||||
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
|
||||
let mut cmd = wd.command();
|
||||
cmd.arg("-a").arg("--no-mmap").arg("foo").arg("file");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "foo\nfoo\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn files() {
|
||||
let wd = WorkDir::new("files");
|
||||
wd.create("file", "");
|
||||
wd.create_dir("dir");
|
||||
wd.create("dir/file", "");
|
||||
|
||||
let mut cmd = wd.command();
|
||||
cmd.arg("--files");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
if cfg!(windows) {
|
||||
assert!(lines == "./dir\\file\n./file\n"
|
||||
|| lines == "./file\n./dir\\file\n");
|
||||
} else {
|
||||
assert!(lines == "./file\n./dir/file\n"
|
||||
|| lines == "./dir/file\n./file\n");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_list() {
|
||||
let wd = WorkDir::new("type_list");
|
||||
|
||||
let mut cmd = wd.command();
|
||||
cmd.arg("--type-list");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
// This can change over time, so just make sure we print something.
|
||||
assert!(!lines.is_empty());
|
||||
}
|
189
tests/workdir.rs
Normal file
189
tests/workdir.rs
Normal file
@@ -0,0 +1,189 @@
|
||||
use std::env;
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{self, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{ATOMIC_USIZE_INIT, AtomicUsize, Ordering};
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
static TEST_DIR: &'static str = "ripgrep-tests";
|
||||
static NEXT_ID: AtomicUsize = ATOMIC_USIZE_INIT;
|
||||
|
||||
/// WorkDir represents a directory in which tests are run.
|
||||
///
|
||||
/// Directories are created from a global atomic counter to avoid duplicates.
|
||||
#[derive(Debug)]
|
||||
pub struct WorkDir {
|
||||
/// The directory in which this test executable is running.
|
||||
root: PathBuf,
|
||||
/// The directory in which the test should run. If a test needs to create
|
||||
/// files, they should go in here.
|
||||
dir: PathBuf,
|
||||
}
|
||||
|
||||
impl WorkDir {
|
||||
/// Create a new test working directory with the given name. The name
|
||||
/// does not need to be distinct for each invocation, but should correspond
|
||||
/// to a logical grouping of tests.
|
||||
pub fn new(name: &str) -> WorkDir {
|
||||
let id = NEXT_ID.fetch_add(1, Ordering::SeqCst);
|
||||
let root = env::current_exe().unwrap()
|
||||
.parent().expect("executable's directory").to_path_buf();
|
||||
let dir = root.join(TEST_DIR).join(name).join(&format!("{}", id));
|
||||
nice_err(&dir, repeat(|| fs::create_dir_all(&dir)));
|
||||
WorkDir {
|
||||
root: root,
|
||||
dir: dir,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new file with the given name and contents in this directory.
|
||||
pub fn create<P: AsRef<Path>>(&self, name: P, contents: &str) {
|
||||
let path = self.dir.join(name);
|
||||
let mut file = nice_err(&path, File::create(&path));
|
||||
nice_err(&path, file.write_all(contents.as_bytes()));
|
||||
nice_err(&path, file.flush());
|
||||
}
|
||||
|
||||
/// Remove a file with the given name from this directory.
|
||||
pub fn remove<P: AsRef<Path>>(&self, name: P) {
|
||||
let path = self.dir.join(name);
|
||||
nice_err(&path, fs::remove_file(&path));
|
||||
}
|
||||
|
||||
/// Create a new directory with the given path (and any directories above
|
||||
/// it) inside this directory.
|
||||
pub fn create_dir<P: AsRef<Path>>(&self, path: P) {
|
||||
let path = self.dir.join(path);
|
||||
nice_err(&path, repeat(|| fs::create_dir_all(&path)));
|
||||
}
|
||||
|
||||
/// Creates a new command that is set to use the ripgrep executable in
|
||||
/// this working directory.
|
||||
pub fn command(&self) -> process::Command {
|
||||
let mut cmd = process::Command::new(&self.bin());
|
||||
cmd.current_dir(&self.dir);
|
||||
cmd
|
||||
}
|
||||
|
||||
/// Returns the path to the ripgrep executable.
|
||||
pub fn bin(&self) -> PathBuf {
|
||||
self.root.join("rg")
|
||||
}
|
||||
|
||||
/// Returns the path to this directory.
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.dir
|
||||
}
|
||||
|
||||
/// Creates a directory symlink to the src with the given target name
|
||||
/// in this directory.
|
||||
#[cfg(not(windows))]
|
||||
pub fn link<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
|
||||
use std::os::unix::fs::symlink;
|
||||
let src = self.dir.join(src);
|
||||
let target = self.dir.join(target);
|
||||
let _ = fs::remove_file(&target);
|
||||
nice_err(&target, symlink(&src, &target));
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
pub fn link<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
|
||||
use std::os::windows::fs::symlink_dir;
|
||||
let src = self.dir.join(src);
|
||||
let target = self.dir.join(target);
|
||||
let _ = fs::remove_dir(&target);
|
||||
nice_err(&target, symlink_dir(&src, &target));
|
||||
}
|
||||
|
||||
/// Runs and captures the stdout of the given command.
|
||||
///
|
||||
/// If the return type could not be created from a string, then this
|
||||
/// panics.
|
||||
pub fn stdout<E: fmt::Debug, T: FromStr<Err=E>>(
|
||||
&self,
|
||||
cmd: &mut process::Command,
|
||||
) -> T {
|
||||
let o = self.output(cmd);
|
||||
let stdout = String::from_utf8_lossy(&o.stdout);
|
||||
match stdout.parse() {
|
||||
Ok(t) => t,
|
||||
Err(err) => {
|
||||
panic!("could not convert from string: {:?}\n\n{}", err, stdout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the output of a command. If the command failed, then this panics.
|
||||
pub fn output(&self, cmd: &mut process::Command) -> process::Output {
|
||||
let o = cmd.output().unwrap();
|
||||
if !o.status.success() {
|
||||
let suggest =
|
||||
if o.stderr.is_empty() {
|
||||
"\n\nDid your search end up with no results?".to_string()
|
||||
} else {
|
||||
"".to_string()
|
||||
};
|
||||
|
||||
panic!("\n\n==========\n\
|
||||
command failed but expected success!\
|
||||
{}\
|
||||
\n\ncommand: {:?}\
|
||||
\ncwd: {}\
|
||||
\n\nstatus: {}\
|
||||
\n\nstdout: {}\
|
||||
\n\nstderr: {}\
|
||||
\n\n==========\n",
|
||||
suggest, cmd, self.dir.display(), o.status,
|
||||
String::from_utf8_lossy(&o.stdout),
|
||||
String::from_utf8_lossy(&o.stderr));
|
||||
}
|
||||
o
|
||||
}
|
||||
|
||||
/// Runs the given command and asserts that it resulted in an error exit
|
||||
/// code.
|
||||
pub fn assert_err(&self, cmd: &mut process::Command) {
|
||||
let o = cmd.output().unwrap();
|
||||
if o.status.success() {
|
||||
panic!("\n\n===== {:?} =====\n\
|
||||
command succeeded but expected failure!\
|
||||
\n\ncwd: {}\
|
||||
\n\nstatus: {}\
|
||||
\n\nstdout: {}\n\nstderr: {}\
|
||||
\n\n=====\n",
|
||||
cmd, self.dir.display(), o.status,
|
||||
String::from_utf8_lossy(&o.stdout),
|
||||
String::from_utf8_lossy(&o.stderr));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn nice_err<P: AsRef<Path>, T, E: error::Error>(
|
||||
path: P,
|
||||
res: Result<T, E>,
|
||||
) -> T {
|
||||
match res {
|
||||
Ok(t) => t,
|
||||
Err(err) => {
|
||||
panic!("{}: {:?}", path.as_ref().display(), err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn repeat<F: FnMut() -> io::Result<()>>(mut f: F) -> io::Result<()> {
|
||||
let mut last_err = None;
|
||||
for _ in 0..10 {
|
||||
if let Err(err) = f() {
|
||||
last_err = Some(err);
|
||||
thread::sleep(Duration::from_millis(500));
|
||||
} else {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(last_err.unwrap())
|
||||
}
|
Reference in New Issue
Block a user