Compare commits

...

19 Commits

Author SHA1 Message Date
Andrew Gallant
9bf7696ec8 Initial cut at a benchmark suite for CLI search tools. 2016-09-11 01:05:36 -04:00
Andrew Gallant
cb0f8fd2fa Bump default thread count to 8. 2016-09-11 00:42:39 -04:00
Andrew Gallant
fa8112ec34 Add alternative compile strategy (in a comment). 2016-09-11 00:42:30 -04:00
Andrew Gallant
cf21b4a97e Add doc. 2016-09-11 00:42:19 -04:00
Andrew Gallant
19615245cd Make line counting much faster. 2016-09-10 01:35:44 -04:00
Andrew Gallant
98a48b44bc Fix off-by-one bug in searcher. 2016-09-10 01:35:30 -04:00
Andrew Gallant
e3da726836 Rename search module to search_stream.
The name better reflects the difference between it and the search_buffer
module.
2016-09-10 00:08:42 -04:00
Andrew Gallant
5b36c86c15 Rejigger the atty detection stuff. 2016-09-10 00:05:20 -04:00
Andrew Gallant
76331e5fec Fix test that relied on non-deterministic order of results. 2016-09-09 23:24:01 -04:00
Andrew Gallant
1e678d7052 Fix files test. What a pain. 2016-09-09 23:19:46 -04:00
Andrew Gallant
dd986d7fe9 Add standard Linux CI (GNU libc). 2016-09-09 23:19:37 -04:00
Andrew Gallant
f83cd63b11 Add integration tests. 2016-09-09 22:58:30 -04:00
Andrew Gallant
9a4527d107 fix Rust version number in CI 2016-09-09 18:47:05 -04:00
Andrew Gallant
8f0d3d78ca clean up CI script 2016-09-09 18:10:20 -04:00
Andrew Gallant
3f7cd977bc expand Rust versions we test on. 2016-09-09 18:07:30 -04:00
Andrew Gallant
cc6b6dcf5b fix windows build 2016-09-09 08:53:10 -04:00
Andrew Gallant
48878bbb8f update project name 2016-09-08 21:47:49 -04:00
Andrew Gallant
0766617e07 Refactor how coloring is done.
All in the name of appeasing Windows.
2016-09-08 21:46:14 -04:00
Andrew Gallant
afd99c43d7 fix deploy 2016-09-08 16:35:48 -04:00
19 changed files with 2401 additions and 506 deletions

View File

@@ -1,37 +1,49 @@
#language: rust
#rust:
# - stable
# - beta
# - nightly
#script:
# - cargo build --verbose
# - cargo doc
# - cargo test --verbose
# - if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
# cargo bench --verbose;
# fi
language: rust
cache: cargo
env:
global:
- PROJECT_NAME=xrep
- PROJECT_NAME=ripgrep
matrix:
include:
# Nightly channel
- os: osx
rust: nightly
env: TARGET=i686-apple-darwin
- os: osx
rust: nightly
env: TARGET=x86_64-apple-darwin
# Nightly channel.
# (All *nix releases are done on the nightly channel to take advantage
# of the regex library's multiple pattern SIMD search.)
- os: linux
rust: nightly
env: TARGET=i686-unknown-linux-musl
- os: linux
rust: nightly
env: TARGET=x86_64-unknown-linux-musl
- os: linux
rust: nightly
env: TARGET=x86_64-unknown-linux-gnu
- os: osx
rust: nightly
env: TARGET=i686-apple-darwin
- os: osx
rust: nightly
env: TARGET=x86_64-apple-darwin
# Beta channel.
- os: linux
rust: beta
env: TARGET=x86_64-unknown-linux-musl
- os: linux
rust: beta
env: TARGET=x86_64-unknown-linux-gnu
- os: osx
rust: beta
env: TARGET=x86_64-apple-darwin
# Minimum Rust supported channel.
- os: linux
rust: 1.9.0
env: TARGET=x86_64-unknown-linux-musl
- os: linux
rust: 1.9.0
env: TARGET=x86_64-unknown-linux-gnu
- os: osx
rust: 1.9.0
env: TARGET=x86_64-apple-darwin
before_install:
- export PATH="$PATH:$HOME/.cargo/bin"

View File

@@ -18,6 +18,10 @@ bench = false
path = "src/main.rs"
name = "rg"
[[test]]
name = "integration"
path = "tests/tests.rs"
[dependencies]
crossbeam = "0.2"
docopt = "0.6"

View File

@@ -1,6 +1,6 @@
environment:
global:
PROJECT_NAME: rg
PROJECT_NAME: ripgrep
matrix:
# Nightly channel
- TARGET: i686-pc-windows-gnu

View File

@@ -1,3 +1,7 @@
/*!
This module benchmarks the glob implementation. For benchmarks on the ripgrep
tool itself, see the benchsuite directory.
*/
#![feature(test)]
extern crate glob;

918
benchsuite Executable file
View File

@@ -0,0 +1,918 @@
#!/usr/bin/env python
'''
benchsuite is a benchmark runner for comparing command line search tools.
'''
import argparse
import csv
import os
import os.path as path
from multiprocessing import cpu_count
import re
import statistics
import subprocess
import sys
import time
# Some constants for identifying the corpora we use to run tests.
# We establish two very different kinds of corpora: a small number of large
# files and a large number of small files. These are vastly different use cases
# not only because of their performance characteristics, but also the
# strategies used to increase the relevance of results returned.
SUBTITLES_DIR = 'subtitles'
SUBTITLES_EN_NAME = 'OpenSubtitles2016.raw.en'
SUBTITLES_EN_NAME_GZ = '%s.gz' % SUBTITLES_EN_NAME
SUBTITLES_EN_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.en.gz'
SUBTITLES_RU_NAME = 'OpenSubtitles2016.raw.ru'
SUBTITLES_RU_NAME_GZ = '%s.gz' % SUBTITLES_RU_NAME
SUBTITLES_RU_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.ru.gz'
LINUX_DIR = 'linux'
LINUX_CLONE = 'git://github.com/BurntSushi/linux'
def bench_linux_literal_default(suite_dir):
'''
Benchmark the speed of a literal using *default* settings.
This is a purposefully unfair benchmark for use in performance
analysis, but it is pedagogically useful.
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
pat = 'PM_RESUME'
def mkcmd(*args, **kwargs):
kwargs['cwd'] = cwd
return Command(*args, **kwargs)
# N.B. This is a purposefully unfair benchmark for illustrative purposes
# of how the default modes for each search tool differ.
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', pat]),
mkcmd('ag', ['ag', pat]),
# ucg reports the exact same matches as ag and rg even though it
# doesn't read gitignore files. Instead, it has a file whitelist
# that happens to match up exactly with the gitignores for this search.
mkcmd('ucg', ['ucg', pat]),
mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'C'}),
mkcmd('pt', ['pt', pat]),
# sift reports an extra line here for a binary file matched.
mkcmd('sift', ['sift', pat]),
])
def bench_linux_literal(suite_dir):
'''
Benchmark the speed of a literal, attempting to be fair.
This tries to use the minimum set of options available in all tools
to test how fast they are. For example, it makes sure there is no
case insensitive matching and that line numbers are computed.
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
pat = 'PM_RESUME'
def mkcmd(*args, **kwargs):
kwargs['cwd'] = cwd
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]),
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
mkcmd('ag', ['ag', '-s', pat]),
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
mkcmd('git grep', [
'git', 'grep', '-I', '-n', pat,
], env={'LC_ALL': 'C'}),
mkcmd('pt', ['pt', pat]),
mkcmd('sift', [
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
]),
])
def bench_linux_literal_casei(suite_dir):
'''
Benchmark the speed of a case insensitive literal search.
This is like the linux_literal benchmark, except we ask the
search tools to do case insensitive search.
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
pat = 'PM_RESUME'
def mkcmd(*args, **kwargs):
kwargs['cwd'] = cwd
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', '-i', pat]),
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
mkcmd('rg-novcs-mmap', [
'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
]),
mkcmd('ag', ['ag', '-i', pat]),
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-i', pat]),
mkcmd('ucg', ['ucg', '-i', pat]),
mkcmd('git grep', [
'git', 'grep', '-I', '-n', '-i', pat,
], env={'LC_ALL': 'C'}),
# sift yields more matches than it should here. Specifically, it gets
# matches in Module.symvers and System.map in the repo root. Both of
# those files show up in the repo root's .gitignore file.
mkcmd('sift', [
'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-i', pat,
]),
])
def bench_linux_re_literal_suffix(suite_dir):
'''
Benchmark the speed of a literal inside a regex.
This, for example, inhibits a prefix byte optimization used
inside of Go's regex engine (relevant for sift and pt).
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
pat = '[A-Z]+_RESUME'
def mkcmd(*args, **kwargs):
kwargs['cwd'] = cwd
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]),
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
mkcmd('ag', ['ag', '-s', pat]),
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
mkcmd(
'git grep',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
mkcmd('sift', [
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
]),
])
def bench_linux_word(suite_dir):
'''
Benchmark use of the -w ("match word") flag in each tool.
sift has a lot of trouble with this because it forces it into Go's
regex engine by surrounding the pattern with \b assertions.
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
pat = 'PM_RESUME'
def mkcmd(*args, **kwargs):
kwargs['cwd'] = cwd
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', '-w', pat]),
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-w', pat]),
mkcmd('rg-novcs-mmap', [
'rg', '--mmap', '--no-ignore', '-n', '-w', pat,
]),
mkcmd('ag', ['ag', '-s', '-w', pat]),
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', '-w', pat]),
mkcmd('ucg', ['ucg', '--nosmart-case', '-w', pat]),
mkcmd(
'git grep',
['git', 'grep', '-E', '-I', '-n', '-w', pat],
env={'LC_ALL': 'C'},
),
mkcmd('sift', [
'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-w', pat,
]),
])
def bench_linux_unicode_greek(suite_dir):
'''
Benchmark matching of a Unicode category.
Only three tools (ripgrep, sift and pt) support this.
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
pat = r'\p{Greek}'
def mkcmd(*args, **kwargs):
kwargs['cwd'] = cwd
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]),
# sift tries to search a bunch of PDF files and clutters up the
# results, even though --binary-skip is provided. They are excluded
# here explicitly, but don't have a measurable impact on performance.
mkcmd('sift', [
'sift', '-n', '--binary-skip',
'--exclude-files', '.*',
'--exclude-files', '*.pdf',
pat,
]),
])
def bench_linux_unicode_greek_casei(suite_dir):
'''
Benchmark matching of a Unicode category, case insensitively.
Only ripgrep gets this right (and it's still fast).
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
pat = r'\p{Greek}'
def mkcmd(*args, **kwargs):
kwargs['cwd'] = cwd
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', '-i', pat]),
# sift tries to search a bunch of PDF files and clutters up the
# results, even though --binary-skip is provided. They are excluded
# here explicitly, but don't have a measurable impact on performance.
mkcmd('sift', [
'sift', '-n', '--binary-skip',
'--exclude-files', '.*',
'--exclude-files', '*.pdf',
pat,
]),
])
def bench_linux_unicode_word(suite_dir):
'''
Benchmark Unicode aware \w character class.
Only ripgrep and git-grep (with LC_ALL=en_US.UTF-8) actually get
this right. Everything else uses the standard ASCII interpretation
of \w.
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
pat = r'\wAh'
def mkcmd(*args, **kwargs):
kwargs['cwd'] = cwd
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]),
mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
mkcmd('rg-novcs-mmap', [
'rg', '--mmap', '--no-ignore', '-n', pat,
]),
mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
mkcmd('ag-novcs (no Unicode)', [
'ag', '--skip-vcs-ignores', '-s', pat,
]),
mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
mkcmd(
'git grep',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'en_US.UTF-8'},
),
mkcmd(
'git grep (no Unicode)',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
mkcmd('sift (no Unicode)', [
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
]),
])
def bench_linux_no_literal(suite_dir):
'''
Benchmark a regex that defeats all literal optimizations.
Most search patterns have some kind of literal in them, which
typically permits searches to take some shortcuts. Therefore, the
applicability of this benchmark is somewhat suspicious, but the
suite wouldn't feel complete without it.
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
pat = r'\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}'
def mkcmd(*args, **kwargs):
kwargs['cwd'] = cwd
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]),
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
mkcmd('rg-novcs (no Unicode)', [
'rg', '--no-ignore', '-n', '(?-u)' + pat,
]),
mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
mkcmd('ag-novcs (no Unicode)', [
'ag', '--skip-vcs-ignores', '-s', pat,
]),
mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
mkcmd(
'git grep',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'en_US.UTF-8'},
),
mkcmd(
'git grep (no Unicode)',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
mkcmd('sift (no Unicode)', [
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
]),
])
def bench_linux_alternates(suite_dir):
'''
Benchmark a small alternation of literals.
sift doesn't make the cut. It's more than 10x slower than the next
fastest result. The slowdown is likely because the Go regexp engine
doesn't do any literal optimizations for this case (there is no
common leading byte).
'''
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
pat = 'ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT'
def mkcmd(*args, **kwargs):
kwargs['cwd'] = cwd
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', pat]),
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
mkcmd('rg-novcs-mmap', [
'rg', '--mmap', '--no-ignore', '-n', pat,
]),
mkcmd('ag', ['ag', '-s', pat]),
mkcmd('ag-novcs', [
'ag', '--skip-vcs-ignores', '-s', pat,
]),
mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
mkcmd(
'git grep',
['git', 'grep', '-E', '-I', '-n', pat],
env={'LC_ALL': 'C'},
),
])
def bench_linux_alternates_casei(suite_dir):
'Benchmark a small alternation of literals case insensitively.'
require(suite_dir, 'linux')
cwd = path.join(suite_dir, LINUX_DIR)
pat = 'ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT'
def mkcmd(*args, **kwargs):
kwargs['cwd'] = cwd
return Command(*args, **kwargs)
return Benchmark(pattern=pat, commands=[
mkcmd('rg', ['rg', '-n', '-i', pat]),
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
mkcmd('rg-novcs-mmap', [
'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
]),
mkcmd('ag', ['ag', '-i', pat]),
mkcmd('ag-novcs', [
'ag', '--skip-vcs-ignores', '-i', pat,
]),
mkcmd('ucg', ['ucg', '-i', pat]),
mkcmd(
'git grep',
['git', 'grep', '-E', '-I', '-n', '-i', pat],
env={'LC_ALL': 'C'},
),
])
# BREADCRUMBS(burntsushi): We should benchmark an alternation for `linux` as
# well.
def bench_sherlock(suite_dir):
'TODO: Fix this and add more single file benchmarks.'
require(suite_dir, 'subtitles-en')
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME)
pat = 'Sherlock'
return Benchmark(pattern=pat, commands=[
Command('rg', ['rg', pat, en]),
Command('grep', ['grep', '-a', pat, en])
])
class MissingDependencies(Exception):
'''
A missing dependency exception.
This exception occurs when running a benchmark that requires a
particular corpus that isn't available.
:ivar list(str) missing_names:
A list of missing dependency names. These names correspond to
names that can be used with the --download flag.
'''
def __init__(self, missing_names):
self.missing_names = missing_names
def __str__(self):
return 'MissingDependency(%s)' % repr(self.missing_names)
class Benchmark(object):
'''
A single benchmark corresponding to a grouping of commands.
The main purpose of a benchmark is to compare the performance
characteristics of a group of commands.
'''
def __init__(self, name=None, pattern=None, commands=None,
warmup_count=1, count=3, line_count=True):
'''
Create a single benchmark.
A single benchmark is composed of a set of commands that are
benchmarked and compared against one another. A benchmark may
have multiple commands that use the same search tool (but
probably should have something differentiating them).
The grouping of commands is a purely human driven process.
By default, the output of every command is sent to /dev/null.
Other types of behavior are available via the methods defined
on this benchmark.
:param str name:
A human readable string denoting the name of this
benchmark.
:param str pattern:
The pattern that is used in search.
:param list(Command) commands:
A list of commands to initialize this benchmark with. More
commands may be added before running the benchmark.
:param int warmup_count:
The number of times to run each command before recording
samples.
:param int count:
The number of samples to collect from each command.
:param bool line_count:
When set, the lines of each search are counted and included
in the samples produced.
'''
self.name = name
self.pattern = pattern
self.commands = commands or []
self.warmup_count = warmup_count
self.count = count
self.line_count = line_count
def run(self):
'''
Runs this benchmark and returns the results.
:rtype: Result
'''
result = Result(self)
for cmd in self.commands:
# Do a warmup first.
for _ in range(self.warmup_count):
self.run_one(cmd)
for _ in range(self.count):
result.add(cmd, **self.run_one(cmd))
return result
def run_one(self, cmd):
'''
Runs the given command exactly once.
Returns an object that includes the time taken by the command.
If this benchmark was configured to count the number of lines
returned, then the line count is also returned.
:param Command cmd: The command to run.
:returns:
A dict with two fields, duration and line_count.
The duration is in seconds, with fractional milliseconds,
and is guaranteed to be available. The line_count is set
to None unless line counting is enabled, in which case,
it is the number of lines in the search output.
:rtype: int
'''
cmd.kwargs['stderr'] = subprocess.DEVNULL
if self.line_count:
cmd.kwargs['stdout'] = subprocess.PIPE
else:
cmd.kwargs['stdout'] = subprocess.DEVNULL
start = time.time()
completed = cmd.run()
end = time.time()
line_count = None
if self.line_count:
line_count = completed.stdout.count(b'\n')
return {
'duration': end - start,
'line_count': line_count,
}
class Result(object):
'''
The result of running a benchmark.
Benchmark results consist of a set of samples, where each sample
corresponds to a single run of a single command in the benchmark.
Various statistics can be computed from these samples such as mean
and standard deviation.
'''
def __init__(self, benchmark):
'''
Create a new set of results, initially empty.
:param Benchmarl benchmark:
The benchmark that produced these results.
'''
self.benchmark = benchmark
self.samples = []
def add(self, cmd, duration, line_count=None):
'''
Add a new sample to this result set.
:param Command cmd:
The command that produced this sample.
:param int duration:
The duration, in milliseconds, that the command took to
run.
:param int line_count:
The number of lines in the search output. This is optional.
'''
self.samples.append({
'cmd': cmd,
'duration': duration,
'line_count': line_count,
})
def fastest_sample(self):
'''
Returns the fastest recorded sample.
'''
return min(self.samples, key=lambda s: s['duration'])
def fastest_cmd(self):
'''
Returns the fastest command according to distribution.
'''
means = []
for cmd in self.benchmark.commands:
mean, _ = self.distribution_for(cmd)
means.append((cmd, mean))
return min(means, key=lambda tup: tup[1])[0]
def samples_for(self, cmd):
'Returns an iterable of samples for cmd'
yield from (s for s in self.samples if s['cmd'].name == cmd.name)
def line_counts_for(self, cmd):
'''
Returns the line counts recorded for each command.
:returns:
A dictionary from command name to a set of line
counts recorded.
'''
return {s['line_count'] for s in self.samples_for(cmd)
if s['line_count'] is not None}
def distribution_for(self, cmd):
'''
Returns the distribution (mean +/- std) of the given command.
:rtype: (float, float)
:returns:
A tuple containing the mean and standard deviation, in that
order.
'''
mean = statistics.mean(
s['duration'] for s in self.samples_for(cmd))
stdev = statistics.stdev(
s['duration'] for s in self.samples_for(cmd))
return mean, stdev
class Command(object):
def __init__(self, name, cmd, *args, **kwargs):
'''
Create a new command that is run as part of a benchmark.
*args and **kwargs are passed directly to ``subprocess.run``.
An exception to this is stdin/stdout/stderr. Output
redirection is completely controlled by the benchmark harness.
Trying to set them here will trigger an assert.
:param str name:
The human readable name of this command. This is
particularly useful if the same search tool is used
multiple times in the same benchmark with different
arguments.
:param list(str) cmd:
The command to run as a list of arguments (including the
command name itself).
'''
assert 'stdin' not in kwargs
assert 'stdout' not in kwargs
assert 'stderr' not in kwargs
self.name = name
self.cmd = cmd
self.args = args
self.kwargs = kwargs
def run(self):
'''
Runs this command and returns its status.
:rtype: subprocess.CompletedProcess
'''
return subprocess.run(self.cmd, *self.args, **self.kwargs)
def eprint(*args, **kwargs):
'Like print, but to stderr.'
kwargs['file'] = sys.stderr
print(*args, **kwargs)
def run_cmd(cmd, *args, **kwargs):
'''
Print the command to stderr and run it.
If the command fails, throw a traceback.
'''
eprint('# %s' % ' '.join(cmd))
kwargs['check'] = True
return subprocess.run(cmd, *args, **kwargs)
def require(suite_dir, *names):
'''
Declare a dependency on the given names for a benchmark.
If any dependency doesn't exist, then fail with an error message.
'''
errs = []
for name in names:
fun_name = name.replace('-', '_')
if not globals()['has_%s' % fun_name](suite_dir):
errs.append(name)
if len(errs) > 0:
raise MissingDependencies(errs)
def download_linux(suite_dir):
'Download and build the Linux kernel.'
checkout_dir = path.join(suite_dir, LINUX_DIR)
if not os.path.isdir(checkout_dir):
# Clone from my fork so that we always get the same corpus *and* still
# do a shallow clone. Shallow clones are much much cheaper than full
# clones.
run_cmd(['git', 'clone', '--depth', '1', LINUX_CLONE, checkout_dir])
# We want to build the kernel because the process of building it produces
# a lot of junk in the repository that a search tool probably shouldn't
# touch.
if not os.path.exists(path.join(checkout_dir, 'vmlinux')):
eprint('# Building Linux kernel...')
run_cmd(['make', 'defconfig'], cwd=checkout_dir)
run_cmd(['make', '-j', str(cpu_count())], cwd=checkout_dir)
def has_linux(suite_dir):
'Returns true if we believe the Linux kernel is built.'
checkout_dir = path.join(suite_dir, LINUX_DIR)
return path.exists(path.join(checkout_dir, 'vmlinux'))
def download_subtitles_en(suite_dir):
'Download and decompress English subtitles.'
subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
en_path_gz = path.join(subtitle_dir, SUBTITLES_EN_NAME_GZ)
en_path = path.join(subtitle_dir, SUBTITLES_EN_NAME)
if not os.path.isdir(subtitle_dir):
os.makedirs(subtitle_dir)
if not os.path.exists(en_path):
if not os.path.exists(en_path_gz):
run_cmd(['curl', '-LO', SUBTITLES_EN_URL], cwd=subtitle_dir)
run_cmd(['gunzip', en_path_gz], cwd=subtitle_dir)
def has_subtitles_en(suite_dir):
'Returns true if English subtitles have been downloaded.'
subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
return path.exists(path.join(subtitle_dir, SUBTITLES_EN_NAME))
def download_subtitles_ru(suite_dir):
'Download and decompress Russian subtitles.'
subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
ru_path_gz = path.join(subtitle_dir, SUBTITLES_RU_NAME_GZ)
ru_path = path.join(subtitle_dir, SUBTITLES_RU_NAME)
if not os.path.isdir(subtitle_dir):
os.makedirs(subtitle_dir)
if not os.path.exists(ru_path):
if not os.path.exists(ru_path_gz):
run_cmd(['curl', '-LO', SUBTITLES_RU_URL], cwd=subtitle_dir)
run_cmd(['gunzip', ru_path_gz], cwd=subtitle_dir)
def has_subtitles_ru(suite_dir):
'Returns true if Russian subtitles have been downloaded.'
subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
return path.exists(path.join(subtitle_dir, SUBTITLES_RU_NAME))
def download(suite_dir, choices):
'''
Download choices into suite_dir.
Specifically, choices specifies a list of corpora to fetch.
:param str suite_dir:
The directory in which to download corpora.
:param list(str) choices:
A list of corpora to download. Available choices are:
all, linux, subtitles-en, subtitles-ru.
'''
for choice in args.download:
if choice == 'linux':
download_linux(suite_dir)
elif choice == 'subtitles-en':
download_subtitles_en(suite_dir)
elif choice == 'subtitles-ru':
download_subtitles_ru(suite_dir)
elif choice == 'all':
download_linux(suite_dir)
download_subtitles_en(suite_dir)
download_subtitles_ru(suite_dir)
else:
eprint('Unrecognized download choice: %s' % choice)
sys.exit(1)
def collect_benchmarks(suite_dir, filter_pat=None):
'''
Return an iterable of all runnable benchmarks.
:param str suite_dir:
The directory containing corpora.
:param str filter_pat:
A single regular expression that is used to filter benchmarks
by their name. When not specified, all benchmarks are run.
:returns:
An iterable over all runnable benchmarks. If a benchmark
requires corpora that are missing, then a log message is
emitted to stderr and it is not yielded.
'''
for fun in sorted(globals()):
if not fun.startswith('bench_'):
continue
name = re.sub('^bench_', '', fun)
if filter_pat is not None and not re.search(filter_pat, name):
continue
try:
benchmark = globals()[fun](suite_dir)
except MissingDependencies as e:
eprint(
'missing: %s, skipping benchmark %s (try running with: %s)' % (
', '.join(e.missing_names),
name,
' '.join(['--download %s' % n for n in e.missing_names]),
))
continue
benchmark.name = name
yield benchmark
def main():
p = argparse.ArgumentParser('Command line search tool benchmark suite.')
p.add_argument(
'--dir', metavar='PATH', default=os.getcwd(),
help='The directory in which to download data and perform searches.')
p.add_argument(
'--download', metavar='CORPUS', action='append',
choices=['all', 'linux', 'subtitles-en', 'subtitles-ru'],
help='Download and prepare corpus data, then exit without running '
'any benchmarks. Note that this command is intended to be '
'idempotent. WARNING: This downloads over a gigabyte of data, '
'and also includes building the Linux kernel. If "all" is used '
'then the total uncompressed size is around 13 GB.')
p.add_argument(
'-f', '--force', action='store_true',
help='Overwrite existing files if there is a conflict.')
p.add_argument(
'--list', action='store_true',
help='List available benchmarks by name.')
p.add_argument(
'--raw', metavar='PATH',
help='Dump raw data (all samples collected) in CSV format to the '
'file path provided.')
p.add_argument(
'bench', metavar='PAT', nargs='?',
help='A regex pattern that will only run benchmarks that match.')
args = p.parse_args()
if args.download is not None and len(args.download) > 0:
download(args.dir, args.choices)
sys.exit(0)
if not path.isdir(args.dir):
os.makedirs(args.dir)
if args.raw is not None and path.exists(args.raw) and not args.force:
eprint('File %s already exists (delete it or use --force)' % args.raw)
sys.exit(1)
raw_handle, raw_csv_wtr = None, None
if args.raw is not None:
fields = [
'benchmark', 'warmup_iter', 'iter',
'name', 'command', 'duration', 'lines', 'env',
]
raw_handle = open(args.raw, 'w+')
raw_csv_wtr = csv.DictWriter(raw_handle, fields)
raw_csv_wtr.writerow({x: x for x in fields})
benchmarks = collect_benchmarks(args.dir, filter_pat=args.bench)
for i, b in enumerate(benchmarks):
result = b.run()
fastest_cmd = result.fastest_cmd()
fastest_sample = result.fastest_sample()
max_name_len = max(len(cmd.name) for cmd in b.commands)
if i > 0:
print()
header = '%s (pattern: %s)' % (b.name, b.pattern)
print('%s\n%s' % (header, '-' * len(header)))
for cmd in b.commands:
name = cmd.name
mean, stdev = result.distribution_for(cmd)
line_counts = result.line_counts_for(cmd)
show_fast_cmd, show_line_counts = '', ''
if fastest_cmd.name == cmd.name:
show_fast_cmd = '*'
if fastest_sample['cmd'].name == cmd.name:
name += '*'
if len(line_counts) > 0:
counts = map(str, line_counts)
show_line_counts = ' (lines: %s)' % ', '.join(counts)
fmt = '{name:{pad}} {mean:0.3f} +/- {stdev:0.3f}{lines}{fast_cmd}'
print(fmt.format(
name=name, pad=max_name_len + 2, fast_cmd=show_fast_cmd,
mean=mean, stdev=stdev, lines=show_line_counts))
sys.stdout.flush()
if raw_csv_wtr is not None:
for sample in result.samples:
cmd, duration = sample['cmd'], sample['duration']
env = ' '.join(['%s=%s' % (k, v)
for k, v in cmd.kwargs.get('env', {}).items()])
raw_csv_wtr.writerow({
'benchmark': b.name,
'warmup_iter': b.warmup_count,
'iter': b.count,
'name': sample['cmd'].name,
'command': ' '.join(cmd.cmd),
'duration': duration,
'lines': sample['line_count'] or '',
'env': env,
})
raw_handle.flush()
if __name__ == '__main__':
main()

View File

@@ -18,7 +18,7 @@ mk_tarball() {
mkdir "$td/$name"
cp target/$TARGET/release/rg "$td/$name/"
cp {README,UNLICENSE,COPYING,LICENSE_MIT} "$td/$name/"
cp {README.md,UNLICENSE,COPYING,LICENSE-MIT} "$td/$name/"
pushd $td
tar czf "$out_dir/$name.tar.gz" *

View File

@@ -11,42 +11,20 @@ disable_cross_doctests() {
if [ "$TRAVIS_OS_NAME" = "osx" ]; then
brew install gnu-sed --default-names
fi
find src -name '*.rs' -type f | xargs sed -i -e 's:\(//.\s*```\):\1 ignore,:g'
fi
}
# TODO modify this function as you see fit
# PROTIP Always pass `--target $TARGET` to cargo commands, this makes cargo output build artifacts
# to target/$TARGET/{debug,release} which can reduce the number of needed conditionals in the
# `before_deploy`/packaging phase
run_test_suite() {
case $TARGET in
# configure emulation for transparent execution of foreign binaries
aarch64-unknown-linux-gnu)
export QEMU_LD_PREFIX=/usr/aarch64-linux-gnu
;;
arm*-unknown-linux-gnueabihf)
export QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf
;;
*)
;;
esac
if [ ! -z "$QEMU_LD_PREFIX" ]; then
# Run tests on a single thread when using QEMU user emulation
export RUST_TEST_THREADS=1
fi
cargo build --target $TARGET --verbose
cargo test --target $TARGET
cargo test --target $TARGET --verbose
# sanity check the file type
file target/$TARGET/debug/rg
}
main() {
disable_cross_doctests
# disable_cross_doctests
run_test_suite
}

View File

@@ -1,4 +1,5 @@
#!/bin/sh
export RUSTFLAGS="-C target-feature=+ssse3"
# export RUSTFLAGS="-C target-cpu=native"
cargo build --release --features simd-accel

View File

@@ -195,7 +195,7 @@ impl Grep {
let (prevnl, nextnl) = self.find_line(buf, e, e);
match self.re.shortest_match(&buf[prevnl..nextnl]) {
None => {
start = nextnl + 1;
start = nextnl;
continue;
}
Some(_) => {

View File

@@ -9,15 +9,16 @@ use grep::{Grep, GrepBuilder};
use log;
use num_cpus;
use regex;
use term::Terminal;
use walkdir::WalkDir;
use atty;
use gitignore::{Gitignore, GitignoreBuilder};
use ignore::Ignore;
use out::Out;
use out::{Out, OutBuffer};
use printer::Printer;
use search::{InputBuffer, Searcher};
use search_buffer::BufferSearcher;
use sys;
use search_stream::{InputBuffer, Searcher};
use types::{FileTypeDef, Types, TypesBuilder};
use walk;
@@ -103,15 +104,12 @@ Less common options:
Don't show any file name heading.
--hidden
Search hidden directories and files.
Search hidden directories and files. (Hidden directories and files are
skipped by default.)
-L, --follow
Follow symlinks.
--line-terminator ARG
The byte to use for a line terminator. Escape sequences may be used.
[default: \\n]
--mmap
Search using memory maps when possible. This is enabled by default
when ripgrep thinks it will be faster. (Note that mmap searching
@@ -173,7 +171,6 @@ pub struct RawArgs {
flag_ignore_case: bool,
flag_invert_match: bool,
flag_line_number: bool,
flag_line_terminator: String,
flag_literal: bool,
flag_mmap: bool,
flag_no_heading: bool,
@@ -247,7 +244,9 @@ impl RawArgs {
};
let paths =
if self.arg_path.is_empty() {
if sys::stdin_is_atty() {
if atty::on_stdin()
|| self.flag_files
|| self.flag_type_list {
vec![Path::new("./").to_path_buf()]
} else {
vec![Path::new("-").to_path_buf()]
@@ -276,15 +275,6 @@ impl RawArgs {
if mmap {
debug!("will try to use memory maps");
}
let eol = {
let eol = unescape(&self.flag_line_terminator);
if eol.is_empty() {
errored!("Empty line terminator is not allowed.");
} else if eol.len() > 1 {
errored!("Line terminators are limited to exactly 1 byte.");
}
eol[0]
};
let glob_overrides =
if self.flag_glob.is_empty() {
None
@@ -298,16 +288,17 @@ impl RawArgs {
};
let threads =
if self.flag_threads == 0 {
cmp::min(6, num_cpus::get())
cmp::min(8, num_cpus::get())
} else {
self.flag_threads
};
let color =
if self.flag_color == "auto" {
sys::stdout_is_atty() || self.flag_pretty
atty::on_stdout() || self.flag_pretty
} else {
self.flag_color == "always"
};
let eol = b'\n';
let mut with_filename = self.flag_with_filename;
if !with_filename {
with_filename = paths.len() > 1 || paths[0].is_dir();
@@ -354,7 +345,7 @@ impl RawArgs {
with_filename: with_filename,
};
// If stdout is a tty, then apply some special default options.
if sys::stdout_is_atty() || self.flag_pretty {
if atty::on_stdout() || self.flag_pretty {
if !self.flag_no_line_number && !args.count {
args.line_number = true;
}
@@ -438,8 +429,8 @@ impl Args {
/// Create a new printer of individual search results that writes to the
/// writer given.
pub fn printer<W: Send + io::Write>(&self, wtr: W) -> Printer<W> {
let mut p = Printer::new(wtr, self.color)
pub fn printer<W: Send + Terminal>(&self, wtr: W) -> Printer<W> {
let mut p = Printer::new(wtr)
.column(self.column)
.context_separator(self.context_separator.clone())
.eol(self.eol)
@@ -454,8 +445,8 @@ impl Args {
/// Create a new printer of search results for an entire file that writes
/// to the writer given.
pub fn out<W: io::Write>(&self, wtr: W) -> Out<W> {
let mut out = Out::new(wtr);
pub fn out(&self) -> Out {
let mut out = Out::new(self.color);
if self.heading && !self.count {
out = out.file_separator(b"".to_vec());
} else if self.before_context > 0 || self.after_context > 0 {
@@ -464,6 +455,11 @@ impl Args {
out
}
/// Create a new buffer for use with searching.
pub fn outbuf(&self) -> OutBuffer {
OutBuffer::new(self.color)
}
/// Return the paths that should be searched.
pub fn paths(&self) -> &[PathBuf] {
&self.paths
@@ -472,7 +468,7 @@ impl Args {
/// Create a new line based searcher whose configuration is taken from the
/// command line. This searcher supports a dizzying array of features:
/// inverted matching, line counting, context control and more.
pub fn searcher<'a, R: io::Read, W: Send + io::Write>(
pub fn searcher<'a, R: io::Read, W: Send + Terminal>(
&self,
inp: &'a mut InputBuffer,
printer: &'a mut Printer<W>,
@@ -493,7 +489,7 @@ impl Args {
/// Create a new line based searcher whose configuration is taken from the
/// command line. This search operates on an entire file all once (which
/// may have been memory mapped).
pub fn searcher_buffer<'a, W: Send + io::Write>(
pub fn searcher_buffer<'a, W: Send + Terminal>(
&self,
printer: &'a mut Printer<W>,
grep: &'a Grep,

View File

@@ -1,24 +1,23 @@
/*!
This io module contains various platform specific functions for detecting
how ripgrep is being used. e.g., Is stdin being piped into it? Is stdout being
redirected to a file? etc... We use this information to tweak various default
configuration parameters such as colors and match formatting.
This atty module contains functions for detecting whether ripgrep is being fed
from (or to) a terminal. Windows and Unix do this differently, so implement
both here.
*/
#[cfg(unix)]
pub fn stdin_is_atty() -> bool {
pub fn on_stdin() -> bool {
use libc;
0 < unsafe { libc::isatty(libc::STDIN_FILENO) }
}
#[cfg(unix)]
pub fn stdout_is_atty() -> bool {
pub fn on_stdout() -> bool {
use libc;
0 < unsafe { libc::isatty(libc::STDOUT_FILENO) }
}
#[cfg(windows)]
pub fn stdin_is_atty() -> bool {
pub fn on_stdin() -> bool {
use kernel32;
use winapi;
@@ -30,7 +29,7 @@ pub fn stdin_is_atty() -> bool {
}
#[cfg(windows)]
pub fn stdout_is_atty() -> bool {
pub fn on_stdout() -> bool {
use kernel32;
use winapi;

View File

@@ -39,9 +39,9 @@ use term::Terminal;
use walkdir::DirEntry;
use args::Args;
use out::Out;
use out::{NoColorTerminal, Out, OutBuffer};
use printer::Printer;
use search::InputBuffer;
use search_stream::InputBuffer;
macro_rules! errored {
($($tt:tt)*) => {
@@ -57,14 +57,14 @@ macro_rules! eprintln {
}
mod args;
mod atty;
mod gitignore;
mod glob;
mod ignore;
mod out;
mod printer;
mod search;
mod search_buffer;
mod sys;
mod search_stream;
mod terminal;
mod types;
mod walk;
@@ -90,7 +90,8 @@ fn run(args: Args) -> Result<u64> {
return run_types(args);
}
let args = Arc::new(args);
let out = Arc::new(Mutex::new(args.out(io::stdout())));
let out = Arc::new(Mutex::new(args.out()));
let outbuf = args.outbuf();
let mut workers = vec![];
let mut workq = {
@@ -101,7 +102,7 @@ fn run(args: Args) -> Result<u64> {
out: out.clone(),
chan_work: stealer.clone(),
inpbuf: args.input_buffer(),
outbuf: Some(vec![]),
outbuf: Some(outbuf.clone()),
grep: args.grep(),
match_count: 0,
};
@@ -129,7 +130,8 @@ fn run(args: Args) -> Result<u64> {
}
fn run_files(args: Args) -> Result<u64> {
let mut printer = args.printer(io::BufWriter::new(io::stdout()));
let term = NoColorTerminal::new(io::BufWriter::new(io::stdout()));
let mut printer = args.printer(term);
let mut file_count = 0;
for p in args.paths() {
if p == Path::new("-") {
@@ -146,7 +148,8 @@ fn run_files(args: Args) -> Result<u64> {
}
fn run_types(args: Args) -> Result<u64> {
let mut printer = args.printer(io::BufWriter::new(io::stdout()));
let term = NoColorTerminal::new(io::BufWriter::new(io::stdout()));
let mut printer = args.printer(term);
let mut ty_count = 0;
for def in args.type_defs() {
printer.type_def(def);
@@ -168,10 +171,10 @@ enum WorkReady {
struct Worker {
args: Arc<Args>,
out: Arc<Mutex<Out<io::Stdout>>>,
out: Arc<Mutex<Out>>,
chan_work: Stealer<Work>,
inpbuf: InputBuffer,
outbuf: Option<Vec<u8>>,
outbuf: Option<OutBuffer>,
grep: Grep,
match_count: u64,
}
@@ -203,12 +206,12 @@ impl Worker {
let mut out = self.out.lock().unwrap();
out.write(&outbuf);
}
self.outbuf = Some(outbuf.into_inner());
self.outbuf = Some(outbuf);
}
self.match_count
}
fn do_work<W: Send + io::Write>(
fn do_work<W: Send + Terminal>(
&mut self,
printer: &mut Printer<W>,
work: WorkReady,
@@ -241,7 +244,7 @@ impl Worker {
}
}
fn search<R: io::Read, W: Send + io::Write>(
fn search<R: io::Read, W: Send + Terminal>(
&mut self,
printer: &mut Printer<W>,
path: &Path,
@@ -256,7 +259,7 @@ impl Worker {
).run().map_err(From::from)
}
fn search_mmap<W: Send + io::Write>(
fn search_mmap<W: Send + Terminal>(
&mut self,
printer: &mut Printer<W>,
path: &Path,

View File

@@ -1,10 +1,40 @@
use std::io::{self, Write};
use std::sync::Arc;
use term::{StdoutTerminal, Terminal};
use term::{self, Terminal};
use term::color::Color;
use term::terminfo::TermInfo;
#[cfg(windows)]
use term::WinConsole;
use printer::Writer;
use terminal::TerminfoTerminal;
pub type StdoutTerminal = Box<Terminal<Output=io::Stdout> + Send>;
/// Gets a terminal that supports color if available.
#[cfg(windows)]
fn term_stdout(color: bool) -> StdoutTerminal {
let stdout = io::stdout();
WinConsole::new(stdout)
.ok()
.map(|t| Box::new(t) as StdoutTerminal)
.unwrap_or_else(|| {
let stdout = io::stdout();
Box::new(NoColorTerminal::new(stdout)) as StdoutTerminal
})
}
/// Gets a terminal that supports color if available.
#[cfg(not(windows))]
fn term_stdout(color: bool) -> StdoutTerminal {
let stdout = io::stdout();
if !color || TERMINFO.is_none() {
Box::new(NoColorTerminal::new(stdout))
} else {
let info = TERMINFO.clone().unwrap();
Box::new(TerminfoTerminal::new_with_terminfo(stdout, info))
}
}
/// Out controls the actual output of all search results for a particular file
/// to the end user.
@@ -12,34 +42,17 @@ use printer::Writer;
/// (The difference between Out and Printer is that a Printer works with
/// individual search results where as Out works with search results for each
/// file as a whole. For example, it knows when to print a file separator.)
pub struct Out<W: io::Write> {
wtr: io::BufWriter<W>,
term: Option<Box<StdoutTerminal>>,
pub struct Out {
term: StdoutTerminal,
printed: bool,
file_separator: Option<Vec<u8>>,
}
/// This is like term::stdout, but on Windows always uses WinConsole instead
/// of trying for a TerminfoTerminal. This may be a mistake.
#[cfg(windows)]
fn term_stdout() -> Option<Box<StdoutTerminal>> {
WinConsole::new(io::stdout())
.ok()
.map(|t| Box::new(t) as Box<StdoutTerminal>)
}
#[cfg(not(windows))]
fn term_stdout() -> Option<Box<StdoutTerminal>> {
// We never use this crap on *nix.
None
}
impl<W: io::Write> Out<W> {
impl Out {
/// Create a new Out that writes to the wtr given.
pub fn new(wtr: W) -> Out<W> {
pub fn new(color: bool) -> Out {
Out {
wtr: io::BufWriter::new(wtr),
term: term_stdout(),
term: term_stdout(color),
printed: false,
file_separator: None,
}
@@ -49,39 +62,422 @@ impl<W: io::Write> Out<W> {
/// By default, no separator is printed.
///
/// If sep is empty, then no file separator is printed.
pub fn file_separator(mut self, sep: Vec<u8>) -> Out<W> {
pub fn file_separator(mut self, sep: Vec<u8>) -> Out {
self.file_separator = Some(sep);
self
}
/// Write the search results of a single file to the underlying wtr and
/// flush wtr.
pub fn write(&mut self, buf: &Writer<Vec<u8>>) {
pub fn write(&mut self, buf: &OutBuffer) {
if let Some(ref sep) = self.file_separator {
if self.printed {
let _ = self.wtr.write_all(sep);
let _ = self.wtr.write_all(b"\n");
let _ = self.term.write_all(sep);
let _ = self.term.write_all(b"\n");
}
}
match *buf {
Writer::Colored(ref tt) => {
let _ = self.wtr.write_all(tt.get_ref());
OutBuffer::Colored(ref tt) => {
let _ = self.term.write_all(tt.get_ref());
}
Writer::Windows(ref w) => {
match self.term {
None => {
let _ = self.wtr.write_all(w.get_ref());
}
Some(ref mut stdout) => {
w.print_stdout(stdout);
}
}
OutBuffer::Windows(ref w) => {
w.print_stdout(&mut self.term);
}
Writer::NoColor(ref buf) => {
let _ = self.wtr.write_all(buf);
OutBuffer::NoColor(ref buf) => {
let _ = self.term.write_all(buf);
}
}
let _ = self.wtr.flush();
let _ = self.term.flush();
self.printed = true;
}
}
/// OutBuffer corresponds to the final output buffer for search results. All
/// search results are written to a buffer and then a buffer is flushed to
/// stdout only after the full search has completed.
#[derive(Clone, Debug)]
pub enum OutBuffer {
Colored(TerminfoTerminal<Vec<u8>>),
Windows(WindowsBuffer),
NoColor(Vec<u8>),
}
#[derive(Clone, Debug)]
pub struct WindowsBuffer {
buf: Vec<u8>,
pos: usize,
colors: Vec<WindowsColor>,
}
#[derive(Clone, Debug)]
pub struct WindowsColor {
pos: usize,
opt: WindowsOption,
}
#[derive(Clone, Debug)]
pub enum WindowsOption {
Foreground(Color),
Background(Color),
Reset,
}
lazy_static! {
static ref TERMINFO: Option<Arc<TermInfo>> = {
match TermInfo::from_env() {
Ok(info) => Some(Arc::new(info)),
Err(err) => {
debug!("error loading terminfo for coloring: {}", err);
None
}
}
};
}
impl OutBuffer {
/// Create a new output buffer.
///
/// When color is true, the buffer will attempt to support coloring.
pub fn new(color: bool) -> OutBuffer {
// If we want color, build a TerminfoTerminal and see if the current
// environment supports coloring. If not, bail with NoColor. To avoid
// losing our writer (ownership), do this the long way.
if !color {
return OutBuffer::NoColor(vec![]);
}
if cfg!(windows) {
return OutBuffer::Windows(WindowsBuffer {
buf: vec![],
pos: 0,
colors: vec![]
});
}
if TERMINFO.is_none() {
return OutBuffer::NoColor(vec![]);
}
let info = TERMINFO.clone().unwrap();
let tt = TerminfoTerminal::new_with_terminfo(vec![], info);
if !tt.supports_color() {
debug!("environment doesn't support coloring");
return OutBuffer::NoColor(tt.into_inner());
}
OutBuffer::Colored(tt)
}
/// Clear the give buffer of all search results such that it is reusable
/// in another search.
pub fn clear(&mut self) {
match *self {
OutBuffer::Colored(ref mut tt) => {
tt.get_mut().clear();
}
OutBuffer::Windows(ref mut win) => {
win.buf.clear();
win.colors.clear();
win.pos = 0;
}
OutBuffer::NoColor(ref mut buf) => {
buf.clear();
}
}
}
fn map_result<F, G>(
&mut self,
mut f: F,
mut g: G,
) -> term::Result<()>
where F: FnMut(&mut TerminfoTerminal<Vec<u8>>) -> term::Result<()>,
G: FnMut(&mut WindowsBuffer) -> term::Result<()> {
match *self {
OutBuffer::Colored(ref mut w) => f(w),
OutBuffer::Windows(ref mut w) => g(w),
OutBuffer::NoColor(_) => Err(term::Error::NotSupported),
}
}
fn map_bool<F, G>(
&self,
mut f: F,
mut g: G,
) -> bool
where F: FnMut(&TerminfoTerminal<Vec<u8>>) -> bool,
G: FnMut(&WindowsBuffer) -> bool {
match *self {
OutBuffer::Colored(ref w) => f(w),
OutBuffer::Windows(ref w) => g(w),
OutBuffer::NoColor(_) => false,
}
}
}
impl io::Write for OutBuffer {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
match *self {
OutBuffer::Colored(ref mut w) => w.write(buf),
OutBuffer::Windows(ref mut w) => w.write(buf),
OutBuffer::NoColor(ref mut w) => w.write(buf),
}
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
impl term::Terminal for OutBuffer {
type Output = Vec<u8>;
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
self.map_result(|w| w.fg(fg), |w| w.fg(fg))
}
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
self.map_result(|w| w.bg(bg), |w| w.bg(bg))
}
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
self.map_result(|w| w.attr(attr), |w| w.attr(attr))
}
fn supports_attr(&self, attr: term::Attr) -> bool {
self.map_bool(|w| w.supports_attr(attr), |w| w.supports_attr(attr))
}
fn reset(&mut self) -> term::Result<()> {
self.map_result(|w| w.reset(), |w| w.reset())
}
fn supports_reset(&self) -> bool {
self.map_bool(|w| w.supports_reset(), |w| w.supports_reset())
}
fn supports_color(&self) -> bool {
self.map_bool(|w| w.supports_color(), |w| w.supports_color())
}
fn cursor_up(&mut self) -> term::Result<()> {
self.map_result(|w| w.cursor_up(), |w| w.cursor_up())
}
fn delete_line(&mut self) -> term::Result<()> {
self.map_result(|w| w.delete_line(), |w| w.delete_line())
}
fn carriage_return(&mut self) -> term::Result<()> {
self.map_result(|w| w.carriage_return(), |w| w.carriage_return())
}
fn get_ref(&self) -> &Vec<u8> {
match *self {
OutBuffer::Colored(ref w) => w.get_ref(),
OutBuffer::Windows(ref w) => w.get_ref(),
OutBuffer::NoColor(ref w) => w,
}
}
fn get_mut(&mut self) -> &mut Vec<u8> {
match *self {
OutBuffer::Colored(ref mut w) => w.get_mut(),
OutBuffer::Windows(ref mut w) => w.get_mut(),
OutBuffer::NoColor(ref mut w) => w,
}
}
fn into_inner(self) -> Vec<u8> {
match self {
OutBuffer::Colored(w) => w.into_inner(),
OutBuffer::Windows(w) => w.into_inner(),
OutBuffer::NoColor(w) => w,
}
}
}
impl WindowsBuffer {
fn push(&mut self, opt: WindowsOption) {
let pos = self.pos;
self.colors.push(WindowsColor { pos: pos, opt: opt });
}
}
impl WindowsBuffer {
/// Print the contents to the given terminal.
pub fn print_stdout(&self, tt: &mut StdoutTerminal) {
if !tt.supports_color() {
let _ = tt.write_all(&self.buf);
let _ = tt.flush();
return;
}
let mut last = 0;
for col in &self.colors {
let _ = tt.write_all(&self.buf[last..col.pos]);
match col.opt {
WindowsOption::Foreground(c) => {
let _ = tt.fg(c);
}
WindowsOption::Background(c) => {
let _ = tt.bg(c);
}
WindowsOption::Reset => {
let _ = tt.reset();
}
}
last = col.pos;
}
let _ = tt.write_all(&self.buf[last..]);
let _ = tt.flush();
}
}
impl io::Write for WindowsBuffer {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
let n = try!(self.buf.write(buf));
self.pos += n;
Ok(n)
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}
impl term::Terminal for WindowsBuffer {
type Output = Vec<u8>;
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
self.push(WindowsOption::Foreground(fg));
Ok(())
}
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
self.push(WindowsOption::Background(bg));
Ok(())
}
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn supports_attr(&self, attr: term::Attr) -> bool {
false
}
fn reset(&mut self) -> term::Result<()> {
self.push(WindowsOption::Reset);
Ok(())
}
fn supports_reset(&self) -> bool {
true
}
fn supports_color(&self) -> bool {
true
}
fn cursor_up(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn delete_line(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn carriage_return(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn get_ref(&self) -> &Vec<u8> {
&self.buf
}
fn get_mut(&mut self) -> &mut Vec<u8> {
&mut self.buf
}
fn into_inner(self) -> Vec<u8> {
self.buf
}
}
/// NoColorTerminal implements Terminal, but supports no coloring.
///
/// Its useful when an API requires a Terminal, but coloring isn't needed.
pub struct NoColorTerminal<W> {
wtr: W,
}
impl<W: Send + io::Write> NoColorTerminal<W> {
/// Wrap the given writer in a Terminal interface.
pub fn new(wtr: W) -> NoColorTerminal<W> {
NoColorTerminal {
wtr: wtr,
}
}
}
impl<W: Send + io::Write> io::Write for NoColorTerminal<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.wtr.write(buf)
}
fn flush(&mut self) -> io::Result<()> {
self.wtr.flush()
}
}
impl<W: Send + io::Write> term::Terminal for NoColorTerminal<W> {
type Output = W;
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn supports_attr(&self, attr: term::Attr) -> bool {
false
}
fn reset(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn supports_reset(&self) -> bool {
false
}
fn supports_color(&self) -> bool {
false
}
fn cursor_up(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn delete_line(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn carriage_return(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn get_ref(&self) -> &W {
&self.wtr
}
fn get_mut(&mut self) -> &mut W {
&mut self.wtr
}
fn into_inner(self) -> W {
self.wtr
}
}

View File

@@ -1,17 +1,11 @@
use std::io::{self, Write};
use std::path::Path;
use std::sync::Arc;
use regex::bytes::Regex;
use term::{self, StdoutTerminal, Terminal};
use term::color::*;
use term::terminfo::TermInfo;
use term::{Attr, Terminal};
use term::color;
use terminal::TerminfoTerminal;
use types::FileTypeDef;
use self::Writer::*;
/// Printer encapsulates all output logic for searching.
///
/// Note that we currently ignore all write errors. It's probably worthwhile
@@ -19,7 +13,7 @@ use self::Writer::*;
/// writes to memory, neither of which commonly fail.
pub struct Printer<W> {
/// The underlying writer.
wtr: Writer<W>,
wtr: W,
/// Whether anything has been printed to wtr yet.
has_printed: bool,
/// Whether to show column numbers for the first match or not.
@@ -42,13 +36,11 @@ pub struct Printer<W> {
with_filename: bool,
}
impl<W: Send + io::Write> Printer<W> {
impl<W: Send + Terminal> Printer<W> {
/// Create a new printer that writes to wtr.
///
/// `color` should be true if the printer should try to use coloring.
pub fn new(wtr: W, color: bool) -> Printer<W> {
pub fn new(wtr: W) -> Printer<W> {
Printer {
wtr: Writer::new(wtr, color),
wtr: wtr,
has_printed: false,
column: false,
context_separator: "--".to_string().into_bytes(),
@@ -115,7 +107,7 @@ impl<W: Send + io::Write> Printer<W> {
}
/// Flushes the underlying writer and returns it.
pub fn into_inner(mut self) -> Writer<W> {
pub fn into_inner(mut self) -> W {
let _ = self.wtr.flush();
self.wtr
}
@@ -201,15 +193,15 @@ impl<W: Send + io::Write> Printer<W> {
}
pub fn write_match(&mut self, re: &Regex, buf: &[u8]) {
if !self.wtr.is_color() {
if !self.wtr.supports_color() {
self.write(buf);
return;
}
let mut last_written = 0;
for (s, e) in re.find_iter(buf) {
self.write(&buf[last_written..s]);
let _ = self.wtr.fg(BRIGHT_RED);
let _ = self.wtr.attr(term::Attr::Bold);
let _ = self.wtr.fg(color::BRIGHT_RED);
let _ = self.wtr.attr(Attr::Bold);
self.write(&buf[s..e]);
let _ = self.wtr.reset();
last_written = e;
@@ -241,24 +233,24 @@ impl<W: Send + io::Write> Printer<W> {
}
fn write_heading<P: AsRef<Path>>(&mut self, path: P) {
if self.wtr.is_color() {
let _ = self.wtr.fg(BRIGHT_GREEN);
let _ = self.wtr.attr(term::Attr::Bold);
if self.wtr.supports_color() {
let _ = self.wtr.fg(color::BRIGHT_GREEN);
let _ = self.wtr.attr(Attr::Bold);
}
self.write(path.as_ref().to_string_lossy().as_bytes());
self.write_eol();
if self.wtr.is_color() {
if self.wtr.supports_color() {
let _ = self.wtr.reset();
}
}
fn line_number(&mut self, n: u64, sep: u8) {
if self.wtr.is_color() {
let _ = self.wtr.fg(BRIGHT_BLUE);
let _ = self.wtr.attr(term::Attr::Bold);
if self.wtr.supports_color() {
let _ = self.wtr.fg(color::BRIGHT_BLUE);
let _ = self.wtr.attr(Attr::Bold);
}
self.write(n.to_string().as_bytes());
if self.wtr.is_color() {
if self.wtr.supports_color() {
let _ = self.wtr.reset();
}
self.write(&[sep]);
@@ -277,289 +269,3 @@ impl<W: Send + io::Write> Printer<W> {
self.write(&[eol]);
}
}
/// Writer corresponds to the final output buffer for search results. All
/// search results are written to a Writer and then a Writer is flushed to
/// stdout only after the full search has completed.
pub enum Writer<W> {
Colored(TerminfoTerminal<W>),
Windows(WindowsWriter<W>),
NoColor(W),
}
pub struct WindowsWriter<W> {
wtr: W,
pos: usize,
colors: Vec<WindowsColor>,
}
pub struct WindowsColor {
pos: usize,
opt: WindowsOption,
}
pub enum WindowsOption {
Foreground(Color),
Background(Color),
Reset,
}
lazy_static! {
static ref TERMINFO: Option<Arc<TermInfo>> = {
match term::terminfo::TermInfo::from_env() {
Ok(info) => Some(Arc::new(info)),
Err(err) => {
debug!("error loading terminfo for coloring: {}", err);
None
}
}
};
}
impl<W: Send + io::Write> Writer<W> {
fn new(wtr: W, color: bool) -> Writer<W> {
// If we want color, build a TerminfoTerminal and see if the current
// environment supports coloring. If not, bail with NoColor. To avoid
// losing our writer (ownership), do this the long way.
if !color {
return NoColor(wtr);
}
if cfg!(windows) {
return Windows(WindowsWriter { wtr: wtr, pos: 0, colors: vec![] });
}
if TERMINFO.is_none() {
return NoColor(wtr);
}
let info = TERMINFO.clone().unwrap();
let tt = TerminfoTerminal::new_with_terminfo(wtr, info);
if !tt.supports_color() {
debug!("environment doesn't support coloring");
return NoColor(tt.into_inner());
}
Colored(tt)
}
fn is_color(&self) -> bool {
match *self {
Colored(_) => true,
Windows(_) => true,
NoColor(_) => false,
}
}
fn map_result<F, G>(
&mut self,
mut f: F,
mut g: G,
) -> term::Result<()>
where F: FnMut(&mut TerminfoTerminal<W>) -> term::Result<()>,
G: FnMut(&mut WindowsWriter<W>) -> term::Result<()> {
match *self {
Colored(ref mut w) => f(w),
Windows(ref mut w) => g(w),
NoColor(_) => Err(term::Error::NotSupported),
}
}
fn map_bool<F, G>(
&self,
mut f: F,
mut g: G,
) -> bool
where F: FnMut(&TerminfoTerminal<W>) -> bool,
G: FnMut(&WindowsWriter<W>) -> bool {
match *self {
Colored(ref w) => f(w),
Windows(ref w) => g(w),
NoColor(_) => false,
}
}
}
impl<W: Send + io::Write> io::Write for Writer<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
match *self {
Colored(ref mut w) => w.write(buf),
Windows(ref mut w) => w.write(buf),
NoColor(ref mut w) => w.write(buf),
}
}
fn flush(&mut self) -> io::Result<()> {
match *self {
Colored(ref mut w) => w.flush(),
Windows(ref mut w) => w.flush(),
NoColor(ref mut w) => w.flush(),
}
}
}
impl<W: Send + io::Write> term::Terminal for Writer<W> {
type Output = W;
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
self.map_result(|w| w.fg(fg), |w| w.fg(fg))
}
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
self.map_result(|w| w.bg(bg), |w| w.bg(bg))
}
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
self.map_result(|w| w.attr(attr), |w| w.attr(attr))
}
fn supports_attr(&self, attr: term::Attr) -> bool {
self.map_bool(|w| w.supports_attr(attr), |w| w.supports_attr(attr))
}
fn reset(&mut self) -> term::Result<()> {
self.map_result(|w| w.reset(), |w| w.reset())
}
fn supports_reset(&self) -> bool {
self.map_bool(|w| w.supports_reset(), |w| w.supports_reset())
}
fn supports_color(&self) -> bool {
self.map_bool(|w| w.supports_color(), |w| w.supports_color())
}
fn cursor_up(&mut self) -> term::Result<()> {
self.map_result(|w| w.cursor_up(), |w| w.cursor_up())
}
fn delete_line(&mut self) -> term::Result<()> {
self.map_result(|w| w.delete_line(), |w| w.delete_line())
}
fn carriage_return(&mut self) -> term::Result<()> {
self.map_result(|w| w.carriage_return(), |w| w.carriage_return())
}
fn get_ref(&self) -> &W {
match *self {
Colored(ref w) => w.get_ref(),
Windows(ref w) => w.get_ref(),
NoColor(ref w) => w,
}
}
fn get_mut(&mut self) -> &mut W {
match *self {
Colored(ref mut w) => w.get_mut(),
Windows(ref mut w) => w.get_mut(),
NoColor(ref mut w) => w,
}
}
fn into_inner(self) -> W {
match self {
Colored(w) => w.into_inner(),
Windows(w) => w.into_inner(),
NoColor(w) => w,
}
}
}
impl<W: Send + io::Write> WindowsWriter<W> {
fn push(&mut self, opt: WindowsOption) {
let pos = self.pos;
self.colors.push(WindowsColor { pos: pos, opt: opt });
}
}
impl WindowsWriter<Vec<u8>> {
/// Print the contents to the given terminal.
pub fn print_stdout(&self, tt: &mut Box<StdoutTerminal>) {
let mut last = 0;
for col in &self.colors {
let _ = tt.write_all(&self.wtr[last..col.pos]);
match col.opt {
WindowsOption::Foreground(c) => {
let _ = tt.fg(c);
}
WindowsOption::Background(c) => {
let _ = tt.bg(c);
}
WindowsOption::Reset => {
let _ = tt.reset();
}
}
last = col.pos;
}
let _ = tt.write_all(&self.wtr[last..]);
let _ = tt.flush();
}
}
impl<W: Send + io::Write> io::Write for WindowsWriter<W> {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
let n = try!(self.wtr.write(buf));
self.pos += n;
Ok(n)
}
fn flush(&mut self) -> io::Result<()> {
self.wtr.flush()
}
}
impl<W: Send + io::Write> term::Terminal for WindowsWriter<W> {
type Output = W;
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
self.push(WindowsOption::Foreground(fg));
Ok(())
}
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
self.push(WindowsOption::Background(bg));
Ok(())
}
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn supports_attr(&self, attr: term::Attr) -> bool {
false
}
fn reset(&mut self) -> term::Result<()> {
self.push(WindowsOption::Reset);
Ok(())
}
fn supports_reset(&self) -> bool {
true
}
fn supports_color(&self) -> bool {
true
}
fn cursor_up(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn delete_line(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn carriage_return(&mut self) -> term::Result<()> {
Err(term::Error::NotSupported)
}
fn get_ref(&self) -> &W {
&self.wtr
}
fn get_mut(&mut self) -> &mut W {
&mut self.wtr
}
fn into_inner(self) -> W {
self.wtr
}
}

View File

@@ -1,11 +1,19 @@
/*!
The search_buffer module is responsible for searching a single file all in a
single buffer. Typically, the source of the buffer is a memory map. This can
be useful for when memory maps are faster than streaming search.
Note that this module doesn't quite support everything that search_stream does.
Notably, showing contexts.
*/
use std::cmp;
use std::io;
use std::path::Path;
use grep::Grep;
use term::Terminal;
use printer::Printer;
use search::{IterLines, Options, count_lines, is_binary};
use search_stream::{IterLines, Options, count_lines, is_binary};
pub struct BufferSearcher<'a, W: 'a> {
opts: Options,
@@ -18,7 +26,7 @@ pub struct BufferSearcher<'a, W: 'a> {
last_line: usize,
}
impl<'a, W: Send + io::Write> BufferSearcher<'a, W> {
impl<'a, W: Send + Terminal> BufferSearcher<'a, W> {
pub fn new(
printer: &'a mut Printer<W>,
grep: &'a Grep,
@@ -146,12 +154,12 @@ mod tests {
use grep::{Grep, GrepBuilder};
use term::Terminal;
use out::OutBuffer;
use printer::Printer;
use super::BufferSearcher;
lazy_static! {
static ref SHERLOCK: &'static str = "\
const SHERLOCK: &'static str = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
@@ -159,7 +167,8 @@ can extract a clew from a wisp of straw or a flake of cigar ash;
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.\
";
static ref CODE: &'static str = "\
const CODE: &'static str = "\
extern crate snap;
use std::io;
@@ -174,7 +183,6 @@ fn main() {
io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
}
";
}
fn matcher(pat: &str) -> Grep {
GrepBuilder::new(pat).build().unwrap()
@@ -184,14 +192,15 @@ fn main() {
&Path::new("/baz.rs")
}
type TestSearcher<'a> = BufferSearcher<'a, Vec<u8>>;
type TestSearcher<'a> = BufferSearcher<'a, OutBuffer>;
fn search<F: FnMut(TestSearcher) -> TestSearcher>(
pat: &str,
haystack: &str,
mut map: F,
) -> (u64, String) {
let mut pp = Printer::new(vec![], false).with_filename(true);
let outbuf = OutBuffer::NoColor(vec![]);
let mut pp = Printer::new(outbuf).with_filename(true);
let grep = GrepBuilder::new(pat).build().unwrap();
let count = {
let searcher = BufferSearcher::new(
@@ -203,7 +212,7 @@ fn main() {
#[test]
fn basic_search() {
let (count, out) = search("Sherlock", &*SHERLOCK, |s|s);
let (count, out) = search("Sherlock", SHERLOCK, |s|s);
assert_eq!(2, count);
assert_eq!(out, "\
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -231,7 +240,7 @@ fn main() {
#[test]
fn line_numbers() {
let (count, out) = search(
"Sherlock", &*SHERLOCK, |s| s.line_number(true));
"Sherlock", SHERLOCK, |s| s.line_number(true));
assert_eq!(2, count);
assert_eq!(out, "\
/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -242,7 +251,7 @@ fn main() {
#[test]
fn count() {
let (count, out) = search(
"Sherlock", &*SHERLOCK, |s| s.count(true));
"Sherlock", SHERLOCK, |s| s.count(true));
assert_eq!(2, count);
assert_eq!(out, "/baz.rs:2\n");
}
@@ -250,7 +259,7 @@ fn main() {
#[test]
fn invert_match() {
let (count, out) = search(
"Sherlock", &*SHERLOCK, |s| s.invert_match(true));
"Sherlock", SHERLOCK, |s| s.invert_match(true));
assert_eq!(4, count);
assert_eq!(out, "\
/baz.rs:Holmeses, success in the province of detective work must always
@@ -262,7 +271,7 @@ fn main() {
#[test]
fn invert_match_line_numbers() {
let (count, out) = search("Sherlock", &*SHERLOCK, |s| {
let (count, out) = search("Sherlock", SHERLOCK, |s| {
s.invert_match(true).line_number(true)
});
assert_eq!(4, count);
@@ -276,7 +285,7 @@ fn main() {
#[test]
fn invert_match_count() {
let (count, out) = search("Sherlock", &*SHERLOCK, |s| {
let (count, out) = search("Sherlock", SHERLOCK, |s| {
s.invert_match(true).count(true)
});
assert_eq!(4, count);

View File

@@ -1,6 +1,7 @@
/*!
The search module is responsible for searching a single file and printing
matches.
The search_stream module is responsible for searching a single file and
printing matches. In particular, it searches the file in a streaming fashion
using `read` calls and a (roughly) fixed size buffer.
*/
use std::cmp;
@@ -11,6 +12,7 @@ use std::path::{Path, PathBuf};
use grep::{Grep, Match};
use memchr::{memchr, memrchr};
use term::Terminal;
use printer::Printer;
@@ -98,7 +100,7 @@ impl Default for Options {
}
}
impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {
impl<'a, R: io::Read, W: Send + Terminal> Searcher<'a, R, W> {
/// Create a new searcher.
///
/// `inp` is a reusable input buffer that is used as scratch space by this
@@ -541,13 +543,88 @@ pub fn is_binary(buf: &[u8]) -> bool {
}
/// Count the number of lines in the given buffer.
#[inline(always)]
pub fn count_lines(mut buf: &[u8], eol: u8) -> u64 {
let mut count = 0;
while let Some(pos) = memchr(eol, buf) {
count += 1;
buf = &buf[pos + 1..];
#[inline(never)]
#[inline(never)]
pub fn count_lines(buf: &[u8], eol: u8) -> u64 {
// This was adapted from code in the memchr crate. The specific benefit
// here is that we can avoid a branch in the inner loop because all we're
// doing is counting.
// The technique to count EOL bytes was adapted from:
// http://bits.stephan-brumme.com/null.html
const LO_U64: u64 = 0x0101010101010101;
const HI_U64: u64 = 0x8080808080808080;
// use truncation
const LO_USIZE: usize = LO_U64 as usize;
const HI_USIZE: usize = HI_U64 as usize;
#[cfg(target_pointer_width = "32")]
const USIZE_BYTES: usize = 4;
#[cfg(target_pointer_width = "64")]
const USIZE_BYTES: usize = 8;
fn count_eol(eol: usize) -> u64 {
// Ideally, this would compile down to a POPCNT instruction, but
// it looks like you need to set RUSTFLAGS="-C target-cpu=native"
// (or target-feature=+popcnt) to get that to work. Bummer.
(eol.wrapping_sub(LO_USIZE) & !eol & HI_USIZE).count_ones() as u64
}
#[cfg(target_pointer_width = "32")]
fn repeat_byte(b: u8) -> usize {
let mut rep = (b as usize) << 8 | b as usize;
rep = rep << 16 | rep;
rep
}
#[cfg(target_pointer_width = "64")]
fn repeat_byte(b: u8) -> usize {
let mut rep = (b as usize) << 8 | b as usize;
rep = rep << 16 | rep;
rep = rep << 32 | rep;
rep
}
fn count_lines_slow(mut buf: &[u8], eol: u8) -> u64 {
let mut count = 0;
while let Some(pos) = memchr(eol, buf) {
count += 1;
buf = &buf[pos + 1..];
}
count
}
let len = buf.len();
let ptr = buf.as_ptr();
let mut count = 0;
// Search up to an aligned boundary...
let align = (ptr as usize) & (USIZE_BYTES - 1);
let mut i = 0;
if align > 0 {
i = cmp::min(USIZE_BYTES - align, len);
count += count_lines_slow(&buf[..i], eol);
}
// ... and search the rest.
let repeated_eol = repeat_byte(eol);
if len >= 2 * USIZE_BYTES {
while i <= len - (2 * USIZE_BYTES) {
unsafe {
let u = *(ptr.offset(i as isize) as *const usize);
let v = *(ptr.offset((i + USIZE_BYTES) as isize)
as *const usize);
count += count_eol(u ^ repeated_eol);
count += count_eol(v ^ repeated_eol);
}
i += USIZE_BYTES * 2;
}
}
count += count_lines_slow(&buf[i..], eol);
count
}
@@ -689,12 +766,12 @@ mod tests {
use grep::{Grep, GrepBuilder};
use term::Terminal;
use out::OutBuffer;
use printer::Printer;
use super::{InputBuffer, Searcher, start_of_previous_lines};
lazy_static! {
static ref SHERLOCK: &'static str = "\
const SHERLOCK: &'static str = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
@@ -702,7 +779,8 @@ can extract a clew from a wisp of straw or a flake of cigar ash;
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.\
";
static ref CODE: &'static str = "\
const CODE: &'static str = "\
extern crate snap;
use std::io;
@@ -717,7 +795,6 @@ fn main() {
io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
}
";
}
fn hay(s: &str) -> io::Cursor<Vec<u8>> {
io::Cursor::new(s.to_string().into_bytes())
@@ -731,7 +808,7 @@ fn main() {
&Path::new("/baz.rs")
}
type TestSearcher<'a> = Searcher<'a, io::Cursor<Vec<u8>>, Vec<u8>>;
type TestSearcher<'a> = Searcher<'a, io::Cursor<Vec<u8>>, OutBuffer>;
fn search_smallcap<F: FnMut(TestSearcher) -> TestSearcher>(
pat: &str,
@@ -739,7 +816,8 @@ fn main() {
mut map: F,
) -> (u64, String) {
let mut inp = InputBuffer::with_capacity(1);
let mut pp = Printer::new(vec![], false).with_filename(true);
let outbuf = OutBuffer::NoColor(vec![]);
let mut pp = Printer::new(outbuf).with_filename(true);
let grep = GrepBuilder::new(pat).build().unwrap();
let count = {
let searcher = Searcher::new(
@@ -755,7 +833,8 @@ fn main() {
mut map: F,
) -> (u64, String) {
let mut inp = InputBuffer::with_capacity(4096);
let mut pp = Printer::new(vec![], false).with_filename(true);
let outbuf = OutBuffer::NoColor(vec![]);
let mut pp = Printer::new(outbuf).with_filename(true);
let grep = GrepBuilder::new(pat).build().unwrap();
let count = {
let searcher = Searcher::new(
@@ -870,7 +949,7 @@ fn main() {
#[test]
fn basic_search1() {
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s|s);
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s|s);
assert_eq!(2, count);
assert_eq!(out, "\
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -897,7 +976,7 @@ fn main() {
#[test]
fn line_numbers() {
let (count, out) = search_smallcap(
"Sherlock", &*SHERLOCK, |s| s.line_number(true));
"Sherlock", SHERLOCK, |s| s.line_number(true));
assert_eq!(2, count);
assert_eq!(out, "\
/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
@@ -908,7 +987,7 @@ fn main() {
#[test]
fn count() {
let (count, out) = search_smallcap(
"Sherlock", &*SHERLOCK, |s| s.count(true));
"Sherlock", SHERLOCK, |s| s.count(true));
assert_eq!(2, count);
assert_eq!(out, "/baz.rs:2\n");
}
@@ -916,7 +995,7 @@ fn main() {
#[test]
fn invert_match() {
let (count, out) = search_smallcap(
"Sherlock", &*SHERLOCK, |s| s.invert_match(true));
"Sherlock", SHERLOCK, |s| s.invert_match(true));
assert_eq!(4, count);
assert_eq!(out, "\
/baz.rs:Holmeses, success in the province of detective work must always
@@ -928,7 +1007,7 @@ fn main() {
#[test]
fn invert_match_line_numbers() {
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
s.invert_match(true).line_number(true)
});
assert_eq!(4, count);
@@ -942,7 +1021,7 @@ fn main() {
#[test]
fn invert_match_count() {
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
s.invert_match(true).count(true)
});
assert_eq!(4, count);
@@ -951,7 +1030,7 @@ fn main() {
#[test]
fn before_context_one1() {
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
s.line_number(true).before_context(1)
});
assert_eq!(2, count);
@@ -964,7 +1043,7 @@ fn main() {
#[test]
fn before_context_invert_one1() {
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
s.line_number(true).before_context(1).invert_match(true)
});
assert_eq!(4, count);
@@ -980,7 +1059,7 @@ fn main() {
#[test]
fn before_context_invert_one2() {
let (count, out) = search_smallcap(" a ", &*SHERLOCK, |s| {
let (count, out) = search_smallcap(" a ", SHERLOCK, |s| {
s.line_number(true).before_context(1).invert_match(true)
});
assert_eq!(3, count);
@@ -995,7 +1074,7 @@ fn main() {
#[test]
fn before_context_two1() {
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
s.line_number(true).before_context(2)
});
assert_eq!(2, count);
@@ -1008,7 +1087,7 @@ fn main() {
#[test]
fn before_context_two2() {
let (count, out) = search_smallcap("dusted", &*SHERLOCK, |s| {
let (count, out) = search_smallcap("dusted", SHERLOCK, |s| {
s.line_number(true).before_context(2)
});
assert_eq!(1, count);
@@ -1022,7 +1101,7 @@ fn main() {
#[test]
fn before_context_two3() {
let (count, out) = search_smallcap(
"success|attached", &*SHERLOCK, |s| {
"success|attached", SHERLOCK, |s| {
s.line_number(true).before_context(2)
});
assert_eq!(2, count);
@@ -1038,7 +1117,7 @@ fn main() {
#[test]
fn before_context_two4() {
let (count, out) = search("stdin", &*CODE, |s| {
let (count, out) = search("stdin", CODE, |s| {
s.line_number(true).before_context(2)
});
assert_eq!(3, count);
@@ -1055,7 +1134,7 @@ fn main() {
#[test]
fn before_context_two5() {
let (count, out) = search("stdout", &*CODE, |s| {
let (count, out) = search("stdout", CODE, |s| {
s.line_number(true).before_context(2)
});
assert_eq!(2, count);
@@ -1072,7 +1151,7 @@ fn main() {
#[test]
fn before_context_three1() {
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
s.line_number(true).before_context(3)
});
assert_eq!(2, count);
@@ -1085,7 +1164,7 @@ fn main() {
#[test]
fn after_context_one1() {
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
s.line_number(true).after_context(1)
});
assert_eq!(2, count);
@@ -1099,7 +1178,7 @@ fn main() {
#[test]
fn after_context_invert_one1() {
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
s.line_number(true).after_context(1).invert_match(true)
});
assert_eq!(4, count);
@@ -1114,7 +1193,7 @@ fn main() {
#[test]
fn after_context_invert_one2() {
let (count, out) = search_smallcap(" a ", &*SHERLOCK, |s| {
let (count, out) = search_smallcap(" a ", SHERLOCK, |s| {
s.line_number(true).after_context(1).invert_match(true)
});
assert_eq!(3, count);
@@ -1130,7 +1209,7 @@ fn main() {
#[test]
fn after_context_two1() {
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
s.line_number(true).after_context(2)
});
assert_eq!(2, count);
@@ -1145,7 +1224,7 @@ fn main() {
#[test]
fn after_context_two2() {
let (count, out) = search_smallcap("dusted", &*SHERLOCK, |s| {
let (count, out) = search_smallcap("dusted", SHERLOCK, |s| {
s.line_number(true).after_context(2)
});
assert_eq!(1, count);
@@ -1158,7 +1237,7 @@ fn main() {
#[test]
fn after_context_two3() {
let (count, out) = search_smallcap(
"success|attached", &*SHERLOCK, |s| {
"success|attached", SHERLOCK, |s| {
s.line_number(true).after_context(2)
});
assert_eq!(2, count);
@@ -1173,7 +1252,7 @@ fn main() {
#[test]
fn after_context_three1() {
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
s.line_number(true).after_context(3)
});
assert_eq!(2, count);
@@ -1190,7 +1269,7 @@ fn main() {
#[test]
fn before_after_context_two1() {
let (count, out) = search(
r"fn main|let mut rdr", &*CODE, |s| {
r"fn main|let mut rdr", CODE, |s| {
s.line_number(true).after_context(2).before_context(2)
});
assert_eq!(2, count);

24
tests/hay.rs Normal file
View File

@@ -0,0 +1,24 @@
pub const SHERLOCK: &'static str = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
can extract a clew from a wisp of straw or a flake of cigar ash;
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.
";
pub const CODE: &'static str = "\
extern crate snap;
use std::io;
fn main() {
let stdin = io::stdin();
let stdout = io::stdout();
// Wrap the stdin reader in a Snappy reader.
let mut rdr = snap::Reader::new(stdin.lock());
let mut wtr = stdout.lock();
io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
}
";

577
tests/tests.rs Normal file
View File

@@ -0,0 +1,577 @@
/*!
This module contains *integration* tests. Their purpose is to test the CLI
interface. Namely, that passing a flag does what it says on the tin.
Tests for more fine grained behavior (like the search or the globber) should be
unit tests in their respective modules.
*/
#![allow(dead_code, unused_imports)]
use std::process::Command;
use workdir::WorkDir;
mod hay;
mod workdir;
macro_rules! sherlock {
($name:ident, $fun:expr) => {
sherlock!($name, "Sherlock", $fun);
};
($name:ident, $query:expr, $fun:expr) => {
sherlock!($name, $query, "sherlock", $fun);
};
($name:ident, $query:expr, $path:expr, $fun:expr) => {
#[test]
fn $name() {
let wd = WorkDir::new(stringify!($name));
wd.create("sherlock", hay::SHERLOCK);
let mut cmd = wd.command();
cmd.arg($query).arg($path);
$fun(wd, cmd);
}
};
}
sherlock!(single_file, |wd: WorkDir, mut cmd| {
let lines: String = wd.stdout(&mut cmd);
let expected = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
sherlock!(dir, "Sherlock", ".", |wd: WorkDir, mut cmd| {
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
sherlock!(line_numbers, |wd: WorkDir, mut cmd: Command| {
cmd.arg("-n");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
1:For the Doctor Watsons of this world, as opposed to the Sherlock
3:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
sherlock!(columns, |wd: WorkDir, mut cmd: Command| {
cmd.arg("--column");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
57:For the Doctor Watsons of this world, as opposed to the Sherlock
49:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
sherlock!(with_filename, |wd: WorkDir, mut cmd: Command| {
cmd.arg("-H");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
sherlock!(with_heading, |wd: WorkDir, mut cmd: Command| {
// This forces the issue since --with-filename is disabled by default
// when searching one fil.e
cmd.arg("--with-filename").arg("--heading");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock
For the Doctor Watsons of this world, as opposed to the Sherlock
be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
sherlock!(with_heading_default, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
// Search two or more and get --with-filename enabled by default.
// Use -j1 to get deterministic results.
wd.create("foo", "Sherlock Holmes lives on Baker Street.");
cmd.arg("-j1").arg("--heading");
let lines: String = wd.stdout(&mut cmd);
let expected1 = "\
foo
Sherlock Holmes lives on Baker Street.
sherlock
For the Doctor Watsons of this world, as opposed to the Sherlock
be, to a very large extent, the result of luck. Sherlock Holmes
";
let expected2 = "\
sherlock
For the Doctor Watsons of this world, as opposed to the Sherlock
be, to a very large extent, the result of luck. Sherlock Holmes
foo
Sherlock Holmes lives on Baker Street.
";
assert!(lines == expected1 || lines == expected2);
});
sherlock!(inverted, |wd: WorkDir, mut cmd: Command| {
cmd.arg("-v");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
Holmeses, success in the province of detective work must always
can extract a clew from a wisp of straw or a flake of cigar ash;
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.
";
assert_eq!(lines, expected);
});
sherlock!(inverted_line_numbers, |wd: WorkDir, mut cmd: Command| {
cmd.arg("-n").arg("-v");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
2:Holmeses, success in the province of detective work must always
4:can extract a clew from a wisp of straw or a flake of cigar ash;
5:but Doctor Watson has to have it taken out for him and dusted,
6:and exhibited clearly, with a label attached.
";
assert_eq!(lines, expected);
});
sherlock!(case_insensitive, "sherlock", |wd: WorkDir, mut cmd: Command| {
cmd.arg("-i");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
sherlock!(word, "as", |wd: WorkDir, mut cmd: Command| {
cmd.arg("-w");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
";
assert_eq!(lines, expected);
});
sherlock!(literal, "()", "file", |wd: WorkDir, mut cmd: Command| {
wd.create("file", "blib\n()\nblab\n");
cmd.arg("-Q");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "()\n");
});
sherlock!(quiet, |wd: WorkDir, mut cmd: Command| {
cmd.arg("-q");
let lines: String = wd.stdout(&mut cmd);
assert!(lines.is_empty());
});
sherlock!(replace, |wd: WorkDir, mut cmd: Command| {
cmd.arg("-r").arg("FooBar");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
For the Doctor Watsons of this world, as opposed to the FooBar
be, to a very large extent, the result of luck. FooBar Holmes
";
assert_eq!(lines, expected);
});
sherlock!(replace_groups, "([A-Z][a-z]+) ([A-Z][a-z]+)",
|wd: WorkDir, mut cmd: Command| {
cmd.arg("-r").arg("$2, $1");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
For the Watsons, Doctor of this world, as opposed to the Sherlock
be, to a very large extent, the result of luck. Holmes, Sherlock
but Watson, Doctor has to have it taken out for him and dusted,
";
assert_eq!(lines, expected);
});
sherlock!(replace_named_groups, "(?P<first>[A-Z][a-z]+) (?P<last>[A-Z][a-z]+)",
|wd: WorkDir, mut cmd: Command| {
cmd.arg("-r").arg("$last, $first");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
For the Watsons, Doctor of this world, as opposed to the Sherlock
be, to a very large extent, the result of luck. Holmes, Sherlock
but Watson, Doctor has to have it taken out for him and dusted,
";
assert_eq!(lines, expected);
});
sherlock!(file_types, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file.py", "Sherlock");
wd.create("file.rs", "Sherlock");
cmd.arg("-t").arg("rust");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file.rs:Sherlock\n");
});
sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.remove("sherlock");
wd.create("file.py", "Sherlock");
wd.create("file.rs", "Sherlock");
cmd.arg("-T").arg("rust");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file.py:Sherlock\n");
});
sherlock!(file_type_clear, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file.py", "Sherlock");
wd.create("file.rs", "Sherlock");
cmd.arg("--type-clear").arg("rust").arg("-t").arg("rust");
wd.assert_err(&mut cmd);
});
sherlock!(file_type_add, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file.py", "Sherlock");
wd.create("file.rs", "Sherlock");
wd.create("file.wat", "Sherlock");
cmd.arg("--type-add").arg("wat:*.wat").arg("-t").arg("wat");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file.wat:Sherlock\n");
});
sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file.py", "Sherlock");
wd.create("file.rs", "Sherlock");
cmd.arg("-g").arg("*.rs");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file.rs:Sherlock\n");
});
sherlock!(glob_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.remove("sherlock");
wd.create("file.py", "Sherlock");
wd.create("file.rs", "Sherlock");
cmd.arg("-g").arg("!*.rs");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file.py:Sherlock\n");
});
sherlock!(after_context, |wd: WorkDir, mut cmd: Command| {
cmd.arg("-A").arg("1");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
can extract a clew from a wisp of straw or a flake of cigar ash;
";
assert_eq!(lines, expected);
});
sherlock!(after_context_line_numbers, |wd: WorkDir, mut cmd: Command| {
cmd.arg("-A").arg("1").arg("-n");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
1:For the Doctor Watsons of this world, as opposed to the Sherlock
2-Holmeses, success in the province of detective work must always
3:be, to a very large extent, the result of luck. Sherlock Holmes
4-can extract a clew from a wisp of straw or a flake of cigar ash;
";
assert_eq!(lines, expected);
});
sherlock!(before_context, |wd: WorkDir, mut cmd: Command| {
cmd.arg("-B").arg("1");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
sherlock!(before_context_line_numbers, |wd: WorkDir, mut cmd: Command| {
cmd.arg("-B").arg("1").arg("-n");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
1:For the Doctor Watsons of this world, as opposed to the Sherlock
2-Holmeses, success in the province of detective work must always
3:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
sherlock!(context, "world|attached", |wd: WorkDir, mut cmd: Command| {
cmd.arg("-C").arg("1");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
--
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.
";
assert_eq!(lines, expected);
});
sherlock!(context_line_numbers, "world|attached",
|wd: WorkDir, mut cmd: Command| {
cmd.arg("-C").arg("1").arg("-n");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
1:For the Doctor Watsons of this world, as opposed to the Sherlock
2-Holmeses, success in the province of detective work must always
--
5-but Doctor Watson has to have it taken out for him and dusted,
6:and exhibited clearly, with a label attached.
";
assert_eq!(lines, expected);
});
sherlock!(ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.remove("sherlock");
wd.create(".sherlock", hay::SHERLOCK);
wd.assert_err(&mut cmd);
});
sherlock!(no_ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.remove("sherlock");
wd.create(".sherlock", hay::SHERLOCK);
cmd.arg("--hidden");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
sherlock!(ignore_git, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create(".gitignore", "sherlock\n");
wd.assert_err(&mut cmd);
});
sherlock!(ignore_ripgrep, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create(".rgignore", "sherlock\n");
wd.assert_err(&mut cmd);
});
sherlock!(no_ignore, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create(".gitignore", "sherlock\n");
cmd.arg("--no-ignore");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
sherlock!(ignore_git_parent, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.remove("sherlock");
wd.create(".gitignore", "sherlock\n");
wd.create_dir(".git");
wd.create_dir("foo");
wd.create("foo/sherlock", hay::SHERLOCK);
// Even though we search in foo/, which has no .gitignore, ripgrep will
// search parent directories and respect the gitignore files found.
cmd.current_dir(wd.path().join("foo"));
wd.assert_err(&mut cmd);
});
sherlock!(ignore_git_parent_stop, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
// This tests that searching parent directories for .gitignore files stops
// after it sees a .git directory. To test this, we create this directory
// hierarchy:
//
// .gitignore (contains `sherlock`)
// foo/
// .git
// bar/
// sherlock
//
// And we perform the search inside `foo/bar/`. ripgrep will stop looking
// for .gitignore files after it sees `foo/.git/`, and therefore not
// respect the top-level `.gitignore` containing `sherlock`.
wd.remove("sherlock");
wd.create(".gitignore", "sherlock\n");
wd.create_dir("foo");
wd.create_dir("foo/.git");
wd.create_dir("foo/bar");
wd.create("foo/bar/sherlock", hay::SHERLOCK);
cmd.current_dir(wd.path().join("foo").join("bar"));
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
sherlock!(ignore_ripgrep_parent_no_stop, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
// This is like the `ignore_git_parent_stop` test, except it checks that
// ripgrep *doesn't* stop checking for .rgignore files.
wd.remove("sherlock");
wd.create(".rgignore", "sherlock\n");
wd.create_dir("foo");
wd.create_dir("foo/.git");
wd.create_dir("foo/bar");
wd.create("foo/bar/sherlock", hay::SHERLOCK);
cmd.current_dir(wd.path().join("foo").join("bar"));
// The top-level .rgignore applies.
wd.assert_err(&mut cmd);
});
sherlock!(no_parent_ignore_git, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
// Set up a directory hierarchy like this:
//
// .gitignore
// foo/
// .gitignore
// sherlock
// watson
//
// Where `.gitignore` contains `sherlock` and `foo/.gitignore` contains
// `watson`.
//
// Now *do the search* from the foo directory. By default, ripgrep will
// search parent directories for .gitignore files. The --no-ignore-parent
// flag should prevent that. At the same time, the `foo/.gitignore` file
// will still be respected (since the search is happening in `foo/`).
//
// In other words, we should only see results from `sherlock`, not from
// `watson`.
wd.remove("sherlock");
wd.create(".gitignore", "sherlock\n");
wd.create_dir("foo");
wd.create("foo/.gitignore", "watson\n");
wd.create("foo/sherlock", hay::SHERLOCK);
wd.create("foo/watson", hay::SHERLOCK);
cmd.current_dir(wd.path().join("foo"));
cmd.arg("--no-ignore-parent");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
sherlock!(symlink_nofollow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.remove("sherlock");
wd.create_dir("foo");
wd.create_dir("foo/bar");
wd.link("foo/baz", "foo/bar/baz");
wd.create_dir("foo/baz");
wd.create("foo/baz/sherlock", hay::SHERLOCK);
cmd.current_dir(wd.path().join("foo/bar"));
wd.assert_err(&mut cmd);
});
sherlock!(symlink_follow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.remove("sherlock");
wd.create_dir("foo");
wd.create_dir("foo/bar");
wd.create_dir("foo/baz");
wd.create("foo/baz/sherlock", hay::SHERLOCK);
wd.link("foo/baz", "foo/bar/baz");
cmd.arg("-L");
cmd.current_dir(wd.path().join("foo/bar"));
let lines: String = wd.stdout(&mut cmd);
if cfg!(windows) {
let expected = "\
baz\\sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
baz\\sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
} else {
let expected = "\
baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
}
});
#[test]
fn binary_nosearch() {
let wd = WorkDir::new("binary_nosearch");
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
let mut cmd = wd.command();
cmd.arg("foo").arg("file");
wd.assert_err(&mut cmd);
}
// The following two tests show a discrepancy in search results between
// searching with memory mapped files and stream searching. Stream searching
// uses a heuristic (that GNU grep also uses) where NUL bytes are replaced with
// the EOL terminator, which tends to avoid allocating large amounts of memory
// for really long "lines." The memory map searcher has no need to worry about
// such things, and more than that, it would be pretty hard for it to match
// the semantics of streaming search in this case.
//
// Binary files with lots of NULs aren't really part of the use case of ripgrep
// (or any other grep-like tool for that matter), so we shouldn't feel too bad
// about it.
#[test]
fn binary_search_mmap() {
let wd = WorkDir::new("binary_search_mmap");
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
let mut cmd = wd.command();
cmd.arg("-a").arg("--mmap").arg("foo").arg("file");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo\x00bar\nfoo\x00baz\n");
}
#[test]
fn binary_search_no_mmap() {
let wd = WorkDir::new("binary_search_no_mmap");
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
let mut cmd = wd.command();
cmd.arg("-a").arg("--no-mmap").arg("foo").arg("file");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo\nfoo\n");
}
#[test]
fn files() {
let wd = WorkDir::new("files");
wd.create("file", "");
wd.create_dir("dir");
wd.create("dir/file", "");
let mut cmd = wd.command();
cmd.arg("--files");
let lines: String = wd.stdout(&mut cmd);
if cfg!(windows) {
assert!(lines == "./dir\\file\n./file\n"
|| lines == "./file\n./dir\\file\n");
} else {
assert!(lines == "./file\n./dir/file\n"
|| lines == "./dir/file\n./file\n");
}
}
#[test]
fn type_list() {
let wd = WorkDir::new("type_list");
let mut cmd = wd.command();
cmd.arg("--type-list");
let lines: String = wd.stdout(&mut cmd);
// This can change over time, so just make sure we print something.
assert!(!lines.is_empty());
}

189
tests/workdir.rs Normal file
View File

@@ -0,0 +1,189 @@
use std::env;
use std::error;
use std::fmt;
use std::fs::{self, File};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::process;
use std::str::FromStr;
use std::sync::atomic::{ATOMIC_USIZE_INIT, AtomicUsize, Ordering};
use std::thread;
use std::time::Duration;
static TEST_DIR: &'static str = "ripgrep-tests";
static NEXT_ID: AtomicUsize = ATOMIC_USIZE_INIT;
/// WorkDir represents a directory in which tests are run.
///
/// Directories are created from a global atomic counter to avoid duplicates.
#[derive(Debug)]
pub struct WorkDir {
/// The directory in which this test executable is running.
root: PathBuf,
/// The directory in which the test should run. If a test needs to create
/// files, they should go in here.
dir: PathBuf,
}
impl WorkDir {
/// Create a new test working directory with the given name. The name
/// does not need to be distinct for each invocation, but should correspond
/// to a logical grouping of tests.
pub fn new(name: &str) -> WorkDir {
let id = NEXT_ID.fetch_add(1, Ordering::SeqCst);
let root = env::current_exe().unwrap()
.parent().expect("executable's directory").to_path_buf();
let dir = root.join(TEST_DIR).join(name).join(&format!("{}", id));
nice_err(&dir, repeat(|| fs::create_dir_all(&dir)));
WorkDir {
root: root,
dir: dir,
}
}
/// Create a new file with the given name and contents in this directory.
pub fn create<P: AsRef<Path>>(&self, name: P, contents: &str) {
let path = self.dir.join(name);
let mut file = nice_err(&path, File::create(&path));
nice_err(&path, file.write_all(contents.as_bytes()));
nice_err(&path, file.flush());
}
/// Remove a file with the given name from this directory.
pub fn remove<P: AsRef<Path>>(&self, name: P) {
let path = self.dir.join(name);
nice_err(&path, fs::remove_file(&path));
}
/// Create a new directory with the given path (and any directories above
/// it) inside this directory.
pub fn create_dir<P: AsRef<Path>>(&self, path: P) {
let path = self.dir.join(path);
nice_err(&path, repeat(|| fs::create_dir_all(&path)));
}
/// Creates a new command that is set to use the ripgrep executable in
/// this working directory.
pub fn command(&self) -> process::Command {
let mut cmd = process::Command::new(&self.bin());
cmd.current_dir(&self.dir);
cmd
}
/// Returns the path to the ripgrep executable.
pub fn bin(&self) -> PathBuf {
self.root.join("rg")
}
/// Returns the path to this directory.
pub fn path(&self) -> &Path {
&self.dir
}
/// Creates a directory symlink to the src with the given target name
/// in this directory.
#[cfg(not(windows))]
pub fn link<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
use std::os::unix::fs::symlink;
let src = self.dir.join(src);
let target = self.dir.join(target);
let _ = fs::remove_file(&target);
nice_err(&target, symlink(&src, &target));
}
#[cfg(windows)]
pub fn link<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
use std::os::windows::fs::symlink_dir;
let src = self.dir.join(src);
let target = self.dir.join(target);
let _ = fs::remove_dir(&target);
nice_err(&target, symlink_dir(&src, &target));
}
/// Runs and captures the stdout of the given command.
///
/// If the return type could not be created from a string, then this
/// panics.
pub fn stdout<E: fmt::Debug, T: FromStr<Err=E>>(
&self,
cmd: &mut process::Command,
) -> T {
let o = self.output(cmd);
let stdout = String::from_utf8_lossy(&o.stdout);
match stdout.parse() {
Ok(t) => t,
Err(err) => {
panic!("could not convert from string: {:?}\n\n{}", err, stdout);
}
}
}
/// Gets the output of a command. If the command failed, then this panics.
pub fn output(&self, cmd: &mut process::Command) -> process::Output {
let o = cmd.output().unwrap();
if !o.status.success() {
let suggest =
if o.stderr.is_empty() {
"\n\nDid your search end up with no results?".to_string()
} else {
"".to_string()
};
panic!("\n\n==========\n\
command failed but expected success!\
{}\
\n\ncommand: {:?}\
\ncwd: {}\
\n\nstatus: {}\
\n\nstdout: {}\
\n\nstderr: {}\
\n\n==========\n",
suggest, cmd, self.dir.display(), o.status,
String::from_utf8_lossy(&o.stdout),
String::from_utf8_lossy(&o.stderr));
}
o
}
/// Runs the given command and asserts that it resulted in an error exit
/// code.
pub fn assert_err(&self, cmd: &mut process::Command) {
let o = cmd.output().unwrap();
if o.status.success() {
panic!("\n\n===== {:?} =====\n\
command succeeded but expected failure!\
\n\ncwd: {}\
\n\nstatus: {}\
\n\nstdout: {}\n\nstderr: {}\
\n\n=====\n",
cmd, self.dir.display(), o.status,
String::from_utf8_lossy(&o.stdout),
String::from_utf8_lossy(&o.stderr));
}
}
}
fn nice_err<P: AsRef<Path>, T, E: error::Error>(
path: P,
res: Result<T, E>,
) -> T {
match res {
Ok(t) => t,
Err(err) => {
panic!("{}: {:?}", path.as_ref().display(), err);
}
}
}
fn repeat<F: FnMut() -> io::Result<()>>(mut f: F) -> io::Result<()> {
let mut last_err = None;
for _ in 0..10 {
if let Err(err) = f() {
last_err = Some(err);
thread::sleep(Duration::from_millis(500));
} else {
return Ok(());
}
}
Err(last_err.unwrap())
}