mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-27 10:11:58 -07:00
Compare commits
19 Commits
Author | SHA1 | Date | |
---|---|---|---|
|
9bf7696ec8 | ||
|
cb0f8fd2fa | ||
|
fa8112ec34 | ||
|
cf21b4a97e | ||
|
19615245cd | ||
|
98a48b44bc | ||
|
e3da726836 | ||
|
5b36c86c15 | ||
|
76331e5fec | ||
|
1e678d7052 | ||
|
dd986d7fe9 | ||
|
f83cd63b11 | ||
|
9a4527d107 | ||
|
8f0d3d78ca | ||
|
3f7cd977bc | ||
|
cc6b6dcf5b | ||
|
48878bbb8f | ||
|
0766617e07 | ||
|
afd99c43d7 |
54
.travis.yml
54
.travis.yml
@@ -1,37 +1,49 @@
|
||||
#language: rust
|
||||
#rust:
|
||||
# - stable
|
||||
# - beta
|
||||
# - nightly
|
||||
#script:
|
||||
# - cargo build --verbose
|
||||
# - cargo doc
|
||||
# - cargo test --verbose
|
||||
# - if [ "$TRAVIS_RUST_VERSION" = "nightly" ]; then
|
||||
# cargo bench --verbose;
|
||||
# fi
|
||||
|
||||
language: rust
|
||||
cache: cargo
|
||||
|
||||
env:
|
||||
global:
|
||||
- PROJECT_NAME=xrep
|
||||
- PROJECT_NAME=ripgrep
|
||||
matrix:
|
||||
include:
|
||||
# Nightly channel
|
||||
- os: osx
|
||||
rust: nightly
|
||||
env: TARGET=i686-apple-darwin
|
||||
- os: osx
|
||||
rust: nightly
|
||||
env: TARGET=x86_64-apple-darwin
|
||||
# Nightly channel.
|
||||
# (All *nix releases are done on the nightly channel to take advantage
|
||||
# of the regex library's multiple pattern SIMD search.)
|
||||
- os: linux
|
||||
rust: nightly
|
||||
env: TARGET=i686-unknown-linux-musl
|
||||
- os: linux
|
||||
rust: nightly
|
||||
env: TARGET=x86_64-unknown-linux-musl
|
||||
- os: linux
|
||||
rust: nightly
|
||||
env: TARGET=x86_64-unknown-linux-gnu
|
||||
- os: osx
|
||||
rust: nightly
|
||||
env: TARGET=i686-apple-darwin
|
||||
- os: osx
|
||||
rust: nightly
|
||||
env: TARGET=x86_64-apple-darwin
|
||||
# Beta channel.
|
||||
- os: linux
|
||||
rust: beta
|
||||
env: TARGET=x86_64-unknown-linux-musl
|
||||
- os: linux
|
||||
rust: beta
|
||||
env: TARGET=x86_64-unknown-linux-gnu
|
||||
- os: osx
|
||||
rust: beta
|
||||
env: TARGET=x86_64-apple-darwin
|
||||
# Minimum Rust supported channel.
|
||||
- os: linux
|
||||
rust: 1.9.0
|
||||
env: TARGET=x86_64-unknown-linux-musl
|
||||
- os: linux
|
||||
rust: 1.9.0
|
||||
env: TARGET=x86_64-unknown-linux-gnu
|
||||
- os: osx
|
||||
rust: 1.9.0
|
||||
env: TARGET=x86_64-apple-darwin
|
||||
|
||||
before_install:
|
||||
- export PATH="$PATH:$HOME/.cargo/bin"
|
||||
|
@@ -18,6 +18,10 @@ bench = false
|
||||
path = "src/main.rs"
|
||||
name = "rg"
|
||||
|
||||
[[test]]
|
||||
name = "integration"
|
||||
path = "tests/tests.rs"
|
||||
|
||||
[dependencies]
|
||||
crossbeam = "0.2"
|
||||
docopt = "0.6"
|
||||
|
@@ -1,6 +1,6 @@
|
||||
environment:
|
||||
global:
|
||||
PROJECT_NAME: rg
|
||||
PROJECT_NAME: ripgrep
|
||||
matrix:
|
||||
# Nightly channel
|
||||
- TARGET: i686-pc-windows-gnu
|
||||
|
@@ -1,3 +1,7 @@
|
||||
/*!
|
||||
This module benchmarks the glob implementation. For benchmarks on the ripgrep
|
||||
tool itself, see the benchsuite directory.
|
||||
*/
|
||||
#![feature(test)]
|
||||
|
||||
extern crate glob;
|
||||
|
918
benchsuite
Executable file
918
benchsuite
Executable file
@@ -0,0 +1,918 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
'''
|
||||
benchsuite is a benchmark runner for comparing command line search tools.
|
||||
'''
|
||||
|
||||
import argparse
|
||||
import csv
|
||||
import os
|
||||
import os.path as path
|
||||
from multiprocessing import cpu_count
|
||||
import re
|
||||
import statistics
|
||||
import subprocess
|
||||
import sys
|
||||
import time
|
||||
|
||||
# Some constants for identifying the corpora we use to run tests.
|
||||
# We establish two very different kinds of corpora: a small number of large
|
||||
# files and a large number of small files. These are vastly different use cases
|
||||
# not only because of their performance characteristics, but also the
|
||||
# strategies used to increase the relevance of results returned.
|
||||
|
||||
SUBTITLES_DIR = 'subtitles'
|
||||
SUBTITLES_EN_NAME = 'OpenSubtitles2016.raw.en'
|
||||
SUBTITLES_EN_NAME_GZ = '%s.gz' % SUBTITLES_EN_NAME
|
||||
SUBTITLES_EN_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.en.gz'
|
||||
SUBTITLES_RU_NAME = 'OpenSubtitles2016.raw.ru'
|
||||
SUBTITLES_RU_NAME_GZ = '%s.gz' % SUBTITLES_RU_NAME
|
||||
SUBTITLES_RU_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.ru.gz'
|
||||
|
||||
LINUX_DIR = 'linux'
|
||||
LINUX_CLONE = 'git://github.com/BurntSushi/linux'
|
||||
|
||||
|
||||
def bench_linux_literal_default(suite_dir):
|
||||
'''
|
||||
Benchmark the speed of a literal using *default* settings.
|
||||
|
||||
This is a purposefully unfair benchmark for use in performance
|
||||
analysis, but it is pedagogically useful.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = 'PM_RESUME'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
# N.B. This is a purposefully unfair benchmark for illustrative purposes
|
||||
# of how the default modes for each search tool differ.
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', pat]),
|
||||
mkcmd('ag', ['ag', pat]),
|
||||
# ucg reports the exact same matches as ag and rg even though it
|
||||
# doesn't read gitignore files. Instead, it has a file whitelist
|
||||
# that happens to match up exactly with the gitignores for this search.
|
||||
mkcmd('ucg', ['ucg', pat]),
|
||||
mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'C'}),
|
||||
mkcmd('pt', ['pt', pat]),
|
||||
# sift reports an extra line here for a binary file matched.
|
||||
mkcmd('sift', ['sift', pat]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_literal(suite_dir):
|
||||
'''
|
||||
Benchmark the speed of a literal, attempting to be fair.
|
||||
|
||||
This tries to use the minimum set of options available in all tools
|
||||
to test how fast they are. For example, it makes sure there is no
|
||||
case insensitive matching and that line numbers are computed.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = 'PM_RESUME'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
|
||||
mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
|
||||
mkcmd('ag', ['ag', '-s', pat]),
|
||||
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
|
||||
mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
|
||||
mkcmd('git grep', [
|
||||
'git', 'grep', '-I', '-n', pat,
|
||||
], env={'LC_ALL': 'C'}),
|
||||
mkcmd('pt', ['pt', pat]),
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_literal_casei(suite_dir):
|
||||
'''
|
||||
Benchmark the speed of a case insensitive literal search.
|
||||
|
||||
This is like the linux_literal benchmark, except we ask the
|
||||
search tools to do case insensitive search.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = 'PM_RESUME'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', '-i', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
|
||||
mkcmd('rg-novcs-mmap', [
|
||||
'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
|
||||
]),
|
||||
mkcmd('ag', ['ag', '-i', pat]),
|
||||
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-i', pat]),
|
||||
mkcmd('ucg', ['ucg', '-i', pat]),
|
||||
mkcmd('git grep', [
|
||||
'git', 'grep', '-I', '-n', '-i', pat,
|
||||
], env={'LC_ALL': 'C'}),
|
||||
# sift yields more matches than it should here. Specifically, it gets
|
||||
# matches in Module.symvers and System.map in the repo root. Both of
|
||||
# those files show up in the repo root's .gitignore file.
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-i', pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_re_literal_suffix(suite_dir):
|
||||
'''
|
||||
Benchmark the speed of a literal inside a regex.
|
||||
|
||||
This, for example, inhibits a prefix byte optimization used
|
||||
inside of Go's regex engine (relevant for sift and pt).
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = '[A-Z]+_RESUME'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
|
||||
mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]),
|
||||
mkcmd('ag', ['ag', '-s', pat]),
|
||||
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]),
|
||||
mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_word(suite_dir):
|
||||
'''
|
||||
Benchmark use of the -w ("match word") flag in each tool.
|
||||
|
||||
sift has a lot of trouble with this because it forces it into Go's
|
||||
regex engine by surrounding the pattern with \b assertions.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = 'PM_RESUME'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', '-w', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-w', pat]),
|
||||
mkcmd('rg-novcs-mmap', [
|
||||
'rg', '--mmap', '--no-ignore', '-n', '-w', pat,
|
||||
]),
|
||||
mkcmd('ag', ['ag', '-s', '-w', pat]),
|
||||
mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', '-w', pat]),
|
||||
mkcmd('ucg', ['ucg', '--nosmart-case', '-w', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
['git', 'grep', '-E', '-I', '-n', '-w', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-w', pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_unicode_greek(suite_dir):
|
||||
'''
|
||||
Benchmark matching of a Unicode category.
|
||||
|
||||
Only three tools (ripgrep, sift and pt) support this.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = r'\p{Greek}'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
# sift tries to search a bunch of PDF files and clutters up the
|
||||
# results, even though --binary-skip is provided. They are excluded
|
||||
# here explicitly, but don't have a measurable impact on performance.
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip',
|
||||
'--exclude-files', '.*',
|
||||
'--exclude-files', '*.pdf',
|
||||
pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_unicode_greek_casei(suite_dir):
|
||||
'''
|
||||
Benchmark matching of a Unicode category, case insensitively.
|
||||
|
||||
Only ripgrep gets this right (and it's still fast).
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = r'\p{Greek}'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', '-i', pat]),
|
||||
# sift tries to search a bunch of PDF files and clutters up the
|
||||
# results, even though --binary-skip is provided. They are excluded
|
||||
# here explicitly, but don't have a measurable impact on performance.
|
||||
mkcmd('sift', [
|
||||
'sift', '-n', '--binary-skip',
|
||||
'--exclude-files', '.*',
|
||||
'--exclude-files', '*.pdf',
|
||||
pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_unicode_word(suite_dir):
|
||||
'''
|
||||
Benchmark Unicode aware \w character class.
|
||||
|
||||
Only ripgrep and git-grep (with LC_ALL=en_US.UTF-8) actually get
|
||||
this right. Everything else uses the standard ASCII interpretation
|
||||
of \w.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = r'\wAh'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
|
||||
mkcmd('rg-novcs-mmap', [
|
||||
'rg', '--mmap', '--no-ignore', '-n', pat,
|
||||
]),
|
||||
mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
|
||||
mkcmd('ag-novcs (no Unicode)', [
|
||||
'ag', '--skip-vcs-ignores', '-s', pat,
|
||||
]),
|
||||
mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'en_US.UTF-8'},
|
||||
),
|
||||
mkcmd(
|
||||
'git grep (no Unicode)',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
mkcmd('sift (no Unicode)', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_no_literal(suite_dir):
|
||||
'''
|
||||
Benchmark a regex that defeats all literal optimizations.
|
||||
|
||||
Most search patterns have some kind of literal in them, which
|
||||
typically permits searches to take some shortcuts. Therefore, the
|
||||
applicability of this benchmark is somewhat suspicious, but the
|
||||
suite wouldn't feel complete without it.
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = r'\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
|
||||
mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]),
|
||||
mkcmd('rg-novcs (no Unicode)', [
|
||||
'rg', '--no-ignore', '-n', '(?-u)' + pat,
|
||||
]),
|
||||
mkcmd('ag (no Unicode)', ['ag', '-s', pat]),
|
||||
mkcmd('ag-novcs (no Unicode)', [
|
||||
'ag', '--skip-vcs-ignores', '-s', pat,
|
||||
]),
|
||||
mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'en_US.UTF-8'},
|
||||
),
|
||||
mkcmd(
|
||||
'git grep (no Unicode)',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
mkcmd('sift (no Unicode)', [
|
||||
'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat,
|
||||
]),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_alternates(suite_dir):
|
||||
'''
|
||||
Benchmark a small alternation of literals.
|
||||
|
||||
sift doesn't make the cut. It's more than 10x slower than the next
|
||||
fastest result. The slowdown is likely because the Go regexp engine
|
||||
doesn't do any literal optimizations for this case (there is no
|
||||
common leading byte).
|
||||
'''
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = 'ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]),
|
||||
mkcmd('rg-novcs-mmap', [
|
||||
'rg', '--mmap', '--no-ignore', '-n', pat,
|
||||
]),
|
||||
mkcmd('ag', ['ag', '-s', pat]),
|
||||
mkcmd('ag-novcs', [
|
||||
'ag', '--skip-vcs-ignores', '-s', pat,
|
||||
]),
|
||||
mkcmd('ucg', ['ucg', '--nosmart-case', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
['git', 'grep', '-E', '-I', '-n', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
])
|
||||
|
||||
|
||||
def bench_linux_alternates_casei(suite_dir):
|
||||
'Benchmark a small alternation of literals case insensitively.'
|
||||
require(suite_dir, 'linux')
|
||||
cwd = path.join(suite_dir, LINUX_DIR)
|
||||
pat = 'ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT'
|
||||
|
||||
def mkcmd(*args, **kwargs):
|
||||
kwargs['cwd'] = cwd
|
||||
return Command(*args, **kwargs)
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
mkcmd('rg', ['rg', '-n', '-i', pat]),
|
||||
mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]),
|
||||
mkcmd('rg-novcs-mmap', [
|
||||
'rg', '--mmap', '--no-ignore', '-n', '-i', pat,
|
||||
]),
|
||||
mkcmd('ag', ['ag', '-i', pat]),
|
||||
mkcmd('ag-novcs', [
|
||||
'ag', '--skip-vcs-ignores', '-i', pat,
|
||||
]),
|
||||
mkcmd('ucg', ['ucg', '-i', pat]),
|
||||
mkcmd(
|
||||
'git grep',
|
||||
['git', 'grep', '-E', '-I', '-n', '-i', pat],
|
||||
env={'LC_ALL': 'C'},
|
||||
),
|
||||
])
|
||||
|
||||
|
||||
# BREADCRUMBS(burntsushi): We should benchmark an alternation for `linux` as
|
||||
# well.
|
||||
|
||||
def bench_sherlock(suite_dir):
|
||||
'TODO: Fix this and add more single file benchmarks.'
|
||||
require(suite_dir, 'subtitles-en')
|
||||
en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME)
|
||||
pat = 'Sherlock'
|
||||
|
||||
return Benchmark(pattern=pat, commands=[
|
||||
Command('rg', ['rg', pat, en]),
|
||||
Command('grep', ['grep', '-a', pat, en])
|
||||
])
|
||||
|
||||
|
||||
class MissingDependencies(Exception):
|
||||
'''
|
||||
A missing dependency exception.
|
||||
|
||||
This exception occurs when running a benchmark that requires a
|
||||
particular corpus that isn't available.
|
||||
|
||||
:ivar list(str) missing_names:
|
||||
A list of missing dependency names. These names correspond to
|
||||
names that can be used with the --download flag.
|
||||
'''
|
||||
def __init__(self, missing_names):
|
||||
self.missing_names = missing_names
|
||||
|
||||
def __str__(self):
|
||||
return 'MissingDependency(%s)' % repr(self.missing_names)
|
||||
|
||||
|
||||
class Benchmark(object):
|
||||
'''
|
||||
A single benchmark corresponding to a grouping of commands.
|
||||
|
||||
The main purpose of a benchmark is to compare the performance
|
||||
characteristics of a group of commands.
|
||||
'''
|
||||
|
||||
def __init__(self, name=None, pattern=None, commands=None,
|
||||
warmup_count=1, count=3, line_count=True):
|
||||
'''
|
||||
Create a single benchmark.
|
||||
|
||||
A single benchmark is composed of a set of commands that are
|
||||
benchmarked and compared against one another. A benchmark may
|
||||
have multiple commands that use the same search tool (but
|
||||
probably should have something differentiating them).
|
||||
|
||||
The grouping of commands is a purely human driven process.
|
||||
|
||||
By default, the output of every command is sent to /dev/null.
|
||||
Other types of behavior are available via the methods defined
|
||||
on this benchmark.
|
||||
|
||||
:param str name:
|
||||
A human readable string denoting the name of this
|
||||
benchmark.
|
||||
:param str pattern:
|
||||
The pattern that is used in search.
|
||||
:param list(Command) commands:
|
||||
A list of commands to initialize this benchmark with. More
|
||||
commands may be added before running the benchmark.
|
||||
:param int warmup_count:
|
||||
The number of times to run each command before recording
|
||||
samples.
|
||||
:param int count:
|
||||
The number of samples to collect from each command.
|
||||
:param bool line_count:
|
||||
When set, the lines of each search are counted and included
|
||||
in the samples produced.
|
||||
'''
|
||||
self.name = name
|
||||
self.pattern = pattern
|
||||
self.commands = commands or []
|
||||
self.warmup_count = warmup_count
|
||||
self.count = count
|
||||
self.line_count = line_count
|
||||
|
||||
def run(self):
|
||||
'''
|
||||
Runs this benchmark and returns the results.
|
||||
|
||||
:rtype: Result
|
||||
'''
|
||||
result = Result(self)
|
||||
for cmd in self.commands:
|
||||
# Do a warmup first.
|
||||
for _ in range(self.warmup_count):
|
||||
self.run_one(cmd)
|
||||
for _ in range(self.count):
|
||||
result.add(cmd, **self.run_one(cmd))
|
||||
return result
|
||||
|
||||
def run_one(self, cmd):
|
||||
'''
|
||||
Runs the given command exactly once.
|
||||
|
||||
Returns an object that includes the time taken by the command.
|
||||
If this benchmark was configured to count the number of lines
|
||||
returned, then the line count is also returned.
|
||||
|
||||
:param Command cmd: The command to run.
|
||||
:returns:
|
||||
A dict with two fields, duration and line_count.
|
||||
The duration is in seconds, with fractional milliseconds,
|
||||
and is guaranteed to be available. The line_count is set
|
||||
to None unless line counting is enabled, in which case,
|
||||
it is the number of lines in the search output.
|
||||
:rtype: int
|
||||
'''
|
||||
cmd.kwargs['stderr'] = subprocess.DEVNULL
|
||||
if self.line_count:
|
||||
cmd.kwargs['stdout'] = subprocess.PIPE
|
||||
else:
|
||||
cmd.kwargs['stdout'] = subprocess.DEVNULL
|
||||
|
||||
start = time.time()
|
||||
completed = cmd.run()
|
||||
end = time.time()
|
||||
|
||||
line_count = None
|
||||
if self.line_count:
|
||||
line_count = completed.stdout.count(b'\n')
|
||||
return {
|
||||
'duration': end - start,
|
||||
'line_count': line_count,
|
||||
}
|
||||
|
||||
|
||||
class Result(object):
|
||||
'''
|
||||
The result of running a benchmark.
|
||||
|
||||
Benchmark results consist of a set of samples, where each sample
|
||||
corresponds to a single run of a single command in the benchmark.
|
||||
Various statistics can be computed from these samples such as mean
|
||||
and standard deviation.
|
||||
'''
|
||||
def __init__(self, benchmark):
|
||||
'''
|
||||
Create a new set of results, initially empty.
|
||||
|
||||
:param Benchmarl benchmark:
|
||||
The benchmark that produced these results.
|
||||
'''
|
||||
self.benchmark = benchmark
|
||||
self.samples = []
|
||||
|
||||
def add(self, cmd, duration, line_count=None):
|
||||
'''
|
||||
Add a new sample to this result set.
|
||||
|
||||
:param Command cmd:
|
||||
The command that produced this sample.
|
||||
:param int duration:
|
||||
The duration, in milliseconds, that the command took to
|
||||
run.
|
||||
:param int line_count:
|
||||
The number of lines in the search output. This is optional.
|
||||
'''
|
||||
self.samples.append({
|
||||
'cmd': cmd,
|
||||
'duration': duration,
|
||||
'line_count': line_count,
|
||||
})
|
||||
|
||||
def fastest_sample(self):
|
||||
'''
|
||||
Returns the fastest recorded sample.
|
||||
'''
|
||||
return min(self.samples, key=lambda s: s['duration'])
|
||||
|
||||
def fastest_cmd(self):
|
||||
'''
|
||||
Returns the fastest command according to distribution.
|
||||
'''
|
||||
means = []
|
||||
for cmd in self.benchmark.commands:
|
||||
mean, _ = self.distribution_for(cmd)
|
||||
means.append((cmd, mean))
|
||||
return min(means, key=lambda tup: tup[1])[0]
|
||||
|
||||
def samples_for(self, cmd):
|
||||
'Returns an iterable of samples for cmd'
|
||||
yield from (s for s in self.samples if s['cmd'].name == cmd.name)
|
||||
|
||||
def line_counts_for(self, cmd):
|
||||
'''
|
||||
Returns the line counts recorded for each command.
|
||||
|
||||
:returns:
|
||||
A dictionary from command name to a set of line
|
||||
counts recorded.
|
||||
'''
|
||||
return {s['line_count'] for s in self.samples_for(cmd)
|
||||
if s['line_count'] is not None}
|
||||
|
||||
def distribution_for(self, cmd):
|
||||
'''
|
||||
Returns the distribution (mean +/- std) of the given command.
|
||||
|
||||
:rtype: (float, float)
|
||||
:returns:
|
||||
A tuple containing the mean and standard deviation, in that
|
||||
order.
|
||||
'''
|
||||
mean = statistics.mean(
|
||||
s['duration'] for s in self.samples_for(cmd))
|
||||
stdev = statistics.stdev(
|
||||
s['duration'] for s in self.samples_for(cmd))
|
||||
return mean, stdev
|
||||
|
||||
|
||||
class Command(object):
|
||||
def __init__(self, name, cmd, *args, **kwargs):
|
||||
'''
|
||||
Create a new command that is run as part of a benchmark.
|
||||
|
||||
*args and **kwargs are passed directly to ``subprocess.run``.
|
||||
An exception to this is stdin/stdout/stderr. Output
|
||||
redirection is completely controlled by the benchmark harness.
|
||||
Trying to set them here will trigger an assert.
|
||||
|
||||
:param str name:
|
||||
The human readable name of this command. This is
|
||||
particularly useful if the same search tool is used
|
||||
multiple times in the same benchmark with different
|
||||
arguments.
|
||||
:param list(str) cmd:
|
||||
The command to run as a list of arguments (including the
|
||||
command name itself).
|
||||
'''
|
||||
assert 'stdin' not in kwargs
|
||||
assert 'stdout' not in kwargs
|
||||
assert 'stderr' not in kwargs
|
||||
self.name = name
|
||||
self.cmd = cmd
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
def run(self):
|
||||
'''
|
||||
Runs this command and returns its status.
|
||||
|
||||
:rtype: subprocess.CompletedProcess
|
||||
'''
|
||||
return subprocess.run(self.cmd, *self.args, **self.kwargs)
|
||||
|
||||
|
||||
def eprint(*args, **kwargs):
|
||||
'Like print, but to stderr.'
|
||||
kwargs['file'] = sys.stderr
|
||||
print(*args, **kwargs)
|
||||
|
||||
|
||||
def run_cmd(cmd, *args, **kwargs):
|
||||
'''
|
||||
Print the command to stderr and run it.
|
||||
|
||||
If the command fails, throw a traceback.
|
||||
'''
|
||||
eprint('# %s' % ' '.join(cmd))
|
||||
kwargs['check'] = True
|
||||
return subprocess.run(cmd, *args, **kwargs)
|
||||
|
||||
|
||||
def require(suite_dir, *names):
|
||||
'''
|
||||
Declare a dependency on the given names for a benchmark.
|
||||
|
||||
If any dependency doesn't exist, then fail with an error message.
|
||||
'''
|
||||
errs = []
|
||||
for name in names:
|
||||
fun_name = name.replace('-', '_')
|
||||
if not globals()['has_%s' % fun_name](suite_dir):
|
||||
errs.append(name)
|
||||
if len(errs) > 0:
|
||||
raise MissingDependencies(errs)
|
||||
|
||||
|
||||
def download_linux(suite_dir):
|
||||
'Download and build the Linux kernel.'
|
||||
checkout_dir = path.join(suite_dir, LINUX_DIR)
|
||||
if not os.path.isdir(checkout_dir):
|
||||
# Clone from my fork so that we always get the same corpus *and* still
|
||||
# do a shallow clone. Shallow clones are much much cheaper than full
|
||||
# clones.
|
||||
run_cmd(['git', 'clone', '--depth', '1', LINUX_CLONE, checkout_dir])
|
||||
# We want to build the kernel because the process of building it produces
|
||||
# a lot of junk in the repository that a search tool probably shouldn't
|
||||
# touch.
|
||||
if not os.path.exists(path.join(checkout_dir, 'vmlinux')):
|
||||
eprint('# Building Linux kernel...')
|
||||
run_cmd(['make', 'defconfig'], cwd=checkout_dir)
|
||||
run_cmd(['make', '-j', str(cpu_count())], cwd=checkout_dir)
|
||||
|
||||
|
||||
def has_linux(suite_dir):
|
||||
'Returns true if we believe the Linux kernel is built.'
|
||||
checkout_dir = path.join(suite_dir, LINUX_DIR)
|
||||
return path.exists(path.join(checkout_dir, 'vmlinux'))
|
||||
|
||||
|
||||
def download_subtitles_en(suite_dir):
|
||||
'Download and decompress English subtitles.'
|
||||
subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
|
||||
en_path_gz = path.join(subtitle_dir, SUBTITLES_EN_NAME_GZ)
|
||||
en_path = path.join(subtitle_dir, SUBTITLES_EN_NAME)
|
||||
|
||||
if not os.path.isdir(subtitle_dir):
|
||||
os.makedirs(subtitle_dir)
|
||||
if not os.path.exists(en_path):
|
||||
if not os.path.exists(en_path_gz):
|
||||
run_cmd(['curl', '-LO', SUBTITLES_EN_URL], cwd=subtitle_dir)
|
||||
run_cmd(['gunzip', en_path_gz], cwd=subtitle_dir)
|
||||
|
||||
|
||||
def has_subtitles_en(suite_dir):
|
||||
'Returns true if English subtitles have been downloaded.'
|
||||
subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
|
||||
return path.exists(path.join(subtitle_dir, SUBTITLES_EN_NAME))
|
||||
|
||||
|
||||
def download_subtitles_ru(suite_dir):
|
||||
'Download and decompress Russian subtitles.'
|
||||
subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
|
||||
ru_path_gz = path.join(subtitle_dir, SUBTITLES_RU_NAME_GZ)
|
||||
ru_path = path.join(subtitle_dir, SUBTITLES_RU_NAME)
|
||||
|
||||
if not os.path.isdir(subtitle_dir):
|
||||
os.makedirs(subtitle_dir)
|
||||
if not os.path.exists(ru_path):
|
||||
if not os.path.exists(ru_path_gz):
|
||||
run_cmd(['curl', '-LO', SUBTITLES_RU_URL], cwd=subtitle_dir)
|
||||
run_cmd(['gunzip', ru_path_gz], cwd=subtitle_dir)
|
||||
|
||||
|
||||
def has_subtitles_ru(suite_dir):
|
||||
'Returns true if Russian subtitles have been downloaded.'
|
||||
subtitle_dir = path.join(suite_dir, SUBTITLES_DIR)
|
||||
return path.exists(path.join(subtitle_dir, SUBTITLES_RU_NAME))
|
||||
|
||||
|
||||
def download(suite_dir, choices):
|
||||
'''
|
||||
Download choices into suite_dir.
|
||||
|
||||
Specifically, choices specifies a list of corpora to fetch.
|
||||
|
||||
:param str suite_dir:
|
||||
The directory in which to download corpora.
|
||||
:param list(str) choices:
|
||||
A list of corpora to download. Available choices are:
|
||||
all, linux, subtitles-en, subtitles-ru.
|
||||
'''
|
||||
for choice in args.download:
|
||||
if choice == 'linux':
|
||||
download_linux(suite_dir)
|
||||
elif choice == 'subtitles-en':
|
||||
download_subtitles_en(suite_dir)
|
||||
elif choice == 'subtitles-ru':
|
||||
download_subtitles_ru(suite_dir)
|
||||
elif choice == 'all':
|
||||
download_linux(suite_dir)
|
||||
download_subtitles_en(suite_dir)
|
||||
download_subtitles_ru(suite_dir)
|
||||
else:
|
||||
eprint('Unrecognized download choice: %s' % choice)
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def collect_benchmarks(suite_dir, filter_pat=None):
|
||||
'''
|
||||
Return an iterable of all runnable benchmarks.
|
||||
|
||||
:param str suite_dir:
|
||||
The directory containing corpora.
|
||||
:param str filter_pat:
|
||||
A single regular expression that is used to filter benchmarks
|
||||
by their name. When not specified, all benchmarks are run.
|
||||
:returns:
|
||||
An iterable over all runnable benchmarks. If a benchmark
|
||||
requires corpora that are missing, then a log message is
|
||||
emitted to stderr and it is not yielded.
|
||||
'''
|
||||
for fun in sorted(globals()):
|
||||
if not fun.startswith('bench_'):
|
||||
continue
|
||||
name = re.sub('^bench_', '', fun)
|
||||
if filter_pat is not None and not re.search(filter_pat, name):
|
||||
continue
|
||||
try:
|
||||
benchmark = globals()[fun](suite_dir)
|
||||
except MissingDependencies as e:
|
||||
eprint(
|
||||
'missing: %s, skipping benchmark %s (try running with: %s)' % (
|
||||
', '.join(e.missing_names),
|
||||
name,
|
||||
' '.join(['--download %s' % n for n in e.missing_names]),
|
||||
))
|
||||
continue
|
||||
benchmark.name = name
|
||||
yield benchmark
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser('Command line search tool benchmark suite.')
|
||||
p.add_argument(
|
||||
'--dir', metavar='PATH', default=os.getcwd(),
|
||||
help='The directory in which to download data and perform searches.')
|
||||
p.add_argument(
|
||||
'--download', metavar='CORPUS', action='append',
|
||||
choices=['all', 'linux', 'subtitles-en', 'subtitles-ru'],
|
||||
help='Download and prepare corpus data, then exit without running '
|
||||
'any benchmarks. Note that this command is intended to be '
|
||||
'idempotent. WARNING: This downloads over a gigabyte of data, '
|
||||
'and also includes building the Linux kernel. If "all" is used '
|
||||
'then the total uncompressed size is around 13 GB.')
|
||||
p.add_argument(
|
||||
'-f', '--force', action='store_true',
|
||||
help='Overwrite existing files if there is a conflict.')
|
||||
p.add_argument(
|
||||
'--list', action='store_true',
|
||||
help='List available benchmarks by name.')
|
||||
p.add_argument(
|
||||
'--raw', metavar='PATH',
|
||||
help='Dump raw data (all samples collected) in CSV format to the '
|
||||
'file path provided.')
|
||||
p.add_argument(
|
||||
'bench', metavar='PAT', nargs='?',
|
||||
help='A regex pattern that will only run benchmarks that match.')
|
||||
args = p.parse_args()
|
||||
|
||||
if args.download is not None and len(args.download) > 0:
|
||||
download(args.dir, args.choices)
|
||||
sys.exit(0)
|
||||
|
||||
if not path.isdir(args.dir):
|
||||
os.makedirs(args.dir)
|
||||
if args.raw is not None and path.exists(args.raw) and not args.force:
|
||||
eprint('File %s already exists (delete it or use --force)' % args.raw)
|
||||
sys.exit(1)
|
||||
raw_handle, raw_csv_wtr = None, None
|
||||
if args.raw is not None:
|
||||
fields = [
|
||||
'benchmark', 'warmup_iter', 'iter',
|
||||
'name', 'command', 'duration', 'lines', 'env',
|
||||
]
|
||||
raw_handle = open(args.raw, 'w+')
|
||||
raw_csv_wtr = csv.DictWriter(raw_handle, fields)
|
||||
raw_csv_wtr.writerow({x: x for x in fields})
|
||||
|
||||
benchmarks = collect_benchmarks(args.dir, filter_pat=args.bench)
|
||||
for i, b in enumerate(benchmarks):
|
||||
result = b.run()
|
||||
fastest_cmd = result.fastest_cmd()
|
||||
fastest_sample = result.fastest_sample()
|
||||
max_name_len = max(len(cmd.name) for cmd in b.commands)
|
||||
|
||||
if i > 0:
|
||||
print()
|
||||
header = '%s (pattern: %s)' % (b.name, b.pattern)
|
||||
print('%s\n%s' % (header, '-' * len(header)))
|
||||
for cmd in b.commands:
|
||||
name = cmd.name
|
||||
mean, stdev = result.distribution_for(cmd)
|
||||
line_counts = result.line_counts_for(cmd)
|
||||
show_fast_cmd, show_line_counts = '', ''
|
||||
if fastest_cmd.name == cmd.name:
|
||||
show_fast_cmd = '*'
|
||||
if fastest_sample['cmd'].name == cmd.name:
|
||||
name += '*'
|
||||
if len(line_counts) > 0:
|
||||
counts = map(str, line_counts)
|
||||
show_line_counts = ' (lines: %s)' % ', '.join(counts)
|
||||
fmt = '{name:{pad}} {mean:0.3f} +/- {stdev:0.3f}{lines}{fast_cmd}'
|
||||
print(fmt.format(
|
||||
name=name, pad=max_name_len + 2, fast_cmd=show_fast_cmd,
|
||||
mean=mean, stdev=stdev, lines=show_line_counts))
|
||||
sys.stdout.flush()
|
||||
|
||||
if raw_csv_wtr is not None:
|
||||
for sample in result.samples:
|
||||
cmd, duration = sample['cmd'], sample['duration']
|
||||
env = ' '.join(['%s=%s' % (k, v)
|
||||
for k, v in cmd.kwargs.get('env', {}).items()])
|
||||
raw_csv_wtr.writerow({
|
||||
'benchmark': b.name,
|
||||
'warmup_iter': b.warmup_count,
|
||||
'iter': b.count,
|
||||
'name': sample['cmd'].name,
|
||||
'command': ' '.join(cmd.cmd),
|
||||
'duration': duration,
|
||||
'lines': sample['line_count'] or '',
|
||||
'env': env,
|
||||
})
|
||||
raw_handle.flush()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
@@ -18,7 +18,7 @@ mk_tarball() {
|
||||
mkdir "$td/$name"
|
||||
|
||||
cp target/$TARGET/release/rg "$td/$name/"
|
||||
cp {README,UNLICENSE,COPYING,LICENSE_MIT} "$td/$name/"
|
||||
cp {README.md,UNLICENSE,COPYING,LICENSE-MIT} "$td/$name/"
|
||||
|
||||
pushd $td
|
||||
tar czf "$out_dir/$name.tar.gz" *
|
||||
|
26
ci/script.sh
26
ci/script.sh
@@ -11,42 +11,20 @@ disable_cross_doctests() {
|
||||
if [ "$TRAVIS_OS_NAME" = "osx" ]; then
|
||||
brew install gnu-sed --default-names
|
||||
fi
|
||||
|
||||
find src -name '*.rs' -type f | xargs sed -i -e 's:\(//.\s*```\):\1 ignore,:g'
|
||||
fi
|
||||
}
|
||||
|
||||
# TODO modify this function as you see fit
|
||||
# PROTIP Always pass `--target $TARGET` to cargo commands, this makes cargo output build artifacts
|
||||
# to target/$TARGET/{debug,release} which can reduce the number of needed conditionals in the
|
||||
# `before_deploy`/packaging phase
|
||||
run_test_suite() {
|
||||
case $TARGET in
|
||||
# configure emulation for transparent execution of foreign binaries
|
||||
aarch64-unknown-linux-gnu)
|
||||
export QEMU_LD_PREFIX=/usr/aarch64-linux-gnu
|
||||
;;
|
||||
arm*-unknown-linux-gnueabihf)
|
||||
export QEMU_LD_PREFIX=/usr/arm-linux-gnueabihf
|
||||
;;
|
||||
*)
|
||||
;;
|
||||
esac
|
||||
|
||||
if [ ! -z "$QEMU_LD_PREFIX" ]; then
|
||||
# Run tests on a single thread when using QEMU user emulation
|
||||
export RUST_TEST_THREADS=1
|
||||
fi
|
||||
|
||||
cargo build --target $TARGET --verbose
|
||||
cargo test --target $TARGET
|
||||
cargo test --target $TARGET --verbose
|
||||
|
||||
# sanity check the file type
|
||||
file target/$TARGET/debug/rg
|
||||
}
|
||||
|
||||
main() {
|
||||
disable_cross_doctests
|
||||
# disable_cross_doctests
|
||||
run_test_suite
|
||||
}
|
||||
|
||||
|
1
compile
1
compile
@@ -1,4 +1,5 @@
|
||||
#!/bin/sh
|
||||
|
||||
export RUSTFLAGS="-C target-feature=+ssse3"
|
||||
# export RUSTFLAGS="-C target-cpu=native"
|
||||
cargo build --release --features simd-accel
|
||||
|
@@ -195,7 +195,7 @@ impl Grep {
|
||||
let (prevnl, nextnl) = self.find_line(buf, e, e);
|
||||
match self.re.shortest_match(&buf[prevnl..nextnl]) {
|
||||
None => {
|
||||
start = nextnl + 1;
|
||||
start = nextnl;
|
||||
continue;
|
||||
}
|
||||
Some(_) => {
|
||||
|
52
src/args.rs
52
src/args.rs
@@ -9,15 +9,16 @@ use grep::{Grep, GrepBuilder};
|
||||
use log;
|
||||
use num_cpus;
|
||||
use regex;
|
||||
use term::Terminal;
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use atty;
|
||||
use gitignore::{Gitignore, GitignoreBuilder};
|
||||
use ignore::Ignore;
|
||||
use out::Out;
|
||||
use out::{Out, OutBuffer};
|
||||
use printer::Printer;
|
||||
use search::{InputBuffer, Searcher};
|
||||
use search_buffer::BufferSearcher;
|
||||
use sys;
|
||||
use search_stream::{InputBuffer, Searcher};
|
||||
use types::{FileTypeDef, Types, TypesBuilder};
|
||||
use walk;
|
||||
|
||||
@@ -103,15 +104,12 @@ Less common options:
|
||||
Don't show any file name heading.
|
||||
|
||||
--hidden
|
||||
Search hidden directories and files.
|
||||
Search hidden directories and files. (Hidden directories and files are
|
||||
skipped by default.)
|
||||
|
||||
-L, --follow
|
||||
Follow symlinks.
|
||||
|
||||
--line-terminator ARG
|
||||
The byte to use for a line terminator. Escape sequences may be used.
|
||||
[default: \\n]
|
||||
|
||||
--mmap
|
||||
Search using memory maps when possible. This is enabled by default
|
||||
when ripgrep thinks it will be faster. (Note that mmap searching
|
||||
@@ -173,7 +171,6 @@ pub struct RawArgs {
|
||||
flag_ignore_case: bool,
|
||||
flag_invert_match: bool,
|
||||
flag_line_number: bool,
|
||||
flag_line_terminator: String,
|
||||
flag_literal: bool,
|
||||
flag_mmap: bool,
|
||||
flag_no_heading: bool,
|
||||
@@ -247,7 +244,9 @@ impl RawArgs {
|
||||
};
|
||||
let paths =
|
||||
if self.arg_path.is_empty() {
|
||||
if sys::stdin_is_atty() {
|
||||
if atty::on_stdin()
|
||||
|| self.flag_files
|
||||
|| self.flag_type_list {
|
||||
vec![Path::new("./").to_path_buf()]
|
||||
} else {
|
||||
vec![Path::new("-").to_path_buf()]
|
||||
@@ -276,15 +275,6 @@ impl RawArgs {
|
||||
if mmap {
|
||||
debug!("will try to use memory maps");
|
||||
}
|
||||
let eol = {
|
||||
let eol = unescape(&self.flag_line_terminator);
|
||||
if eol.is_empty() {
|
||||
errored!("Empty line terminator is not allowed.");
|
||||
} else if eol.len() > 1 {
|
||||
errored!("Line terminators are limited to exactly 1 byte.");
|
||||
}
|
||||
eol[0]
|
||||
};
|
||||
let glob_overrides =
|
||||
if self.flag_glob.is_empty() {
|
||||
None
|
||||
@@ -298,16 +288,17 @@ impl RawArgs {
|
||||
};
|
||||
let threads =
|
||||
if self.flag_threads == 0 {
|
||||
cmp::min(6, num_cpus::get())
|
||||
cmp::min(8, num_cpus::get())
|
||||
} else {
|
||||
self.flag_threads
|
||||
};
|
||||
let color =
|
||||
if self.flag_color == "auto" {
|
||||
sys::stdout_is_atty() || self.flag_pretty
|
||||
atty::on_stdout() || self.flag_pretty
|
||||
} else {
|
||||
self.flag_color == "always"
|
||||
};
|
||||
let eol = b'\n';
|
||||
let mut with_filename = self.flag_with_filename;
|
||||
if !with_filename {
|
||||
with_filename = paths.len() > 1 || paths[0].is_dir();
|
||||
@@ -354,7 +345,7 @@ impl RawArgs {
|
||||
with_filename: with_filename,
|
||||
};
|
||||
// If stdout is a tty, then apply some special default options.
|
||||
if sys::stdout_is_atty() || self.flag_pretty {
|
||||
if atty::on_stdout() || self.flag_pretty {
|
||||
if !self.flag_no_line_number && !args.count {
|
||||
args.line_number = true;
|
||||
}
|
||||
@@ -438,8 +429,8 @@ impl Args {
|
||||
|
||||
/// Create a new printer of individual search results that writes to the
|
||||
/// writer given.
|
||||
pub fn printer<W: Send + io::Write>(&self, wtr: W) -> Printer<W> {
|
||||
let mut p = Printer::new(wtr, self.color)
|
||||
pub fn printer<W: Send + Terminal>(&self, wtr: W) -> Printer<W> {
|
||||
let mut p = Printer::new(wtr)
|
||||
.column(self.column)
|
||||
.context_separator(self.context_separator.clone())
|
||||
.eol(self.eol)
|
||||
@@ -454,8 +445,8 @@ impl Args {
|
||||
|
||||
/// Create a new printer of search results for an entire file that writes
|
||||
/// to the writer given.
|
||||
pub fn out<W: io::Write>(&self, wtr: W) -> Out<W> {
|
||||
let mut out = Out::new(wtr);
|
||||
pub fn out(&self) -> Out {
|
||||
let mut out = Out::new(self.color);
|
||||
if self.heading && !self.count {
|
||||
out = out.file_separator(b"".to_vec());
|
||||
} else if self.before_context > 0 || self.after_context > 0 {
|
||||
@@ -464,6 +455,11 @@ impl Args {
|
||||
out
|
||||
}
|
||||
|
||||
/// Create a new buffer for use with searching.
|
||||
pub fn outbuf(&self) -> OutBuffer {
|
||||
OutBuffer::new(self.color)
|
||||
}
|
||||
|
||||
/// Return the paths that should be searched.
|
||||
pub fn paths(&self) -> &[PathBuf] {
|
||||
&self.paths
|
||||
@@ -472,7 +468,7 @@ impl Args {
|
||||
/// Create a new line based searcher whose configuration is taken from the
|
||||
/// command line. This searcher supports a dizzying array of features:
|
||||
/// inverted matching, line counting, context control and more.
|
||||
pub fn searcher<'a, R: io::Read, W: Send + io::Write>(
|
||||
pub fn searcher<'a, R: io::Read, W: Send + Terminal>(
|
||||
&self,
|
||||
inp: &'a mut InputBuffer,
|
||||
printer: &'a mut Printer<W>,
|
||||
@@ -493,7 +489,7 @@ impl Args {
|
||||
/// Create a new line based searcher whose configuration is taken from the
|
||||
/// command line. This search operates on an entire file all once (which
|
||||
/// may have been memory mapped).
|
||||
pub fn searcher_buffer<'a, W: Send + io::Write>(
|
||||
pub fn searcher_buffer<'a, W: Send + Terminal>(
|
||||
&self,
|
||||
printer: &'a mut Printer<W>,
|
||||
grep: &'a Grep,
|
||||
|
@@ -1,24 +1,23 @@
|
||||
/*!
|
||||
This io module contains various platform specific functions for detecting
|
||||
how ripgrep is being used. e.g., Is stdin being piped into it? Is stdout being
|
||||
redirected to a file? etc... We use this information to tweak various default
|
||||
configuration parameters such as colors and match formatting.
|
||||
This atty module contains functions for detecting whether ripgrep is being fed
|
||||
from (or to) a terminal. Windows and Unix do this differently, so implement
|
||||
both here.
|
||||
*/
|
||||
|
||||
#[cfg(unix)]
|
||||
pub fn stdin_is_atty() -> bool {
|
||||
pub fn on_stdin() -> bool {
|
||||
use libc;
|
||||
0 < unsafe { libc::isatty(libc::STDIN_FILENO) }
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
pub fn stdout_is_atty() -> bool {
|
||||
pub fn on_stdout() -> bool {
|
||||
use libc;
|
||||
0 < unsafe { libc::isatty(libc::STDOUT_FILENO) }
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
pub fn stdin_is_atty() -> bool {
|
||||
pub fn on_stdin() -> bool {
|
||||
use kernel32;
|
||||
use winapi;
|
||||
|
||||
@@ -30,7 +29,7 @@ pub fn stdin_is_atty() -> bool {
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
pub fn stdout_is_atty() -> bool {
|
||||
pub fn on_stdout() -> bool {
|
||||
use kernel32;
|
||||
use winapi;
|
||||
|
31
src/main.rs
31
src/main.rs
@@ -39,9 +39,9 @@ use term::Terminal;
|
||||
use walkdir::DirEntry;
|
||||
|
||||
use args::Args;
|
||||
use out::Out;
|
||||
use out::{NoColorTerminal, Out, OutBuffer};
|
||||
use printer::Printer;
|
||||
use search::InputBuffer;
|
||||
use search_stream::InputBuffer;
|
||||
|
||||
macro_rules! errored {
|
||||
($($tt:tt)*) => {
|
||||
@@ -57,14 +57,14 @@ macro_rules! eprintln {
|
||||
}
|
||||
|
||||
mod args;
|
||||
mod atty;
|
||||
mod gitignore;
|
||||
mod glob;
|
||||
mod ignore;
|
||||
mod out;
|
||||
mod printer;
|
||||
mod search;
|
||||
mod search_buffer;
|
||||
mod sys;
|
||||
mod search_stream;
|
||||
mod terminal;
|
||||
mod types;
|
||||
mod walk;
|
||||
@@ -90,7 +90,8 @@ fn run(args: Args) -> Result<u64> {
|
||||
return run_types(args);
|
||||
}
|
||||
let args = Arc::new(args);
|
||||
let out = Arc::new(Mutex::new(args.out(io::stdout())));
|
||||
let out = Arc::new(Mutex::new(args.out()));
|
||||
let outbuf = args.outbuf();
|
||||
let mut workers = vec![];
|
||||
|
||||
let mut workq = {
|
||||
@@ -101,7 +102,7 @@ fn run(args: Args) -> Result<u64> {
|
||||
out: out.clone(),
|
||||
chan_work: stealer.clone(),
|
||||
inpbuf: args.input_buffer(),
|
||||
outbuf: Some(vec![]),
|
||||
outbuf: Some(outbuf.clone()),
|
||||
grep: args.grep(),
|
||||
match_count: 0,
|
||||
};
|
||||
@@ -129,7 +130,8 @@ fn run(args: Args) -> Result<u64> {
|
||||
}
|
||||
|
||||
fn run_files(args: Args) -> Result<u64> {
|
||||
let mut printer = args.printer(io::BufWriter::new(io::stdout()));
|
||||
let term = NoColorTerminal::new(io::BufWriter::new(io::stdout()));
|
||||
let mut printer = args.printer(term);
|
||||
let mut file_count = 0;
|
||||
for p in args.paths() {
|
||||
if p == Path::new("-") {
|
||||
@@ -146,7 +148,8 @@ fn run_files(args: Args) -> Result<u64> {
|
||||
}
|
||||
|
||||
fn run_types(args: Args) -> Result<u64> {
|
||||
let mut printer = args.printer(io::BufWriter::new(io::stdout()));
|
||||
let term = NoColorTerminal::new(io::BufWriter::new(io::stdout()));
|
||||
let mut printer = args.printer(term);
|
||||
let mut ty_count = 0;
|
||||
for def in args.type_defs() {
|
||||
printer.type_def(def);
|
||||
@@ -168,10 +171,10 @@ enum WorkReady {
|
||||
|
||||
struct Worker {
|
||||
args: Arc<Args>,
|
||||
out: Arc<Mutex<Out<io::Stdout>>>,
|
||||
out: Arc<Mutex<Out>>,
|
||||
chan_work: Stealer<Work>,
|
||||
inpbuf: InputBuffer,
|
||||
outbuf: Option<Vec<u8>>,
|
||||
outbuf: Option<OutBuffer>,
|
||||
grep: Grep,
|
||||
match_count: u64,
|
||||
}
|
||||
@@ -203,12 +206,12 @@ impl Worker {
|
||||
let mut out = self.out.lock().unwrap();
|
||||
out.write(&outbuf);
|
||||
}
|
||||
self.outbuf = Some(outbuf.into_inner());
|
||||
self.outbuf = Some(outbuf);
|
||||
}
|
||||
self.match_count
|
||||
}
|
||||
|
||||
fn do_work<W: Send + io::Write>(
|
||||
fn do_work<W: Send + Terminal>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
work: WorkReady,
|
||||
@@ -241,7 +244,7 @@ impl Worker {
|
||||
}
|
||||
}
|
||||
|
||||
fn search<R: io::Read, W: Send + io::Write>(
|
||||
fn search<R: io::Read, W: Send + Terminal>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
path: &Path,
|
||||
@@ -256,7 +259,7 @@ impl Worker {
|
||||
).run().map_err(From::from)
|
||||
}
|
||||
|
||||
fn search_mmap<W: Send + io::Write>(
|
||||
fn search_mmap<W: Send + Terminal>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
path: &Path,
|
||||
|
480
src/out.rs
480
src/out.rs
@@ -1,10 +1,40 @@
|
||||
use std::io::{self, Write};
|
||||
use std::sync::Arc;
|
||||
|
||||
use term::{StdoutTerminal, Terminal};
|
||||
use term::{self, Terminal};
|
||||
use term::color::Color;
|
||||
use term::terminfo::TermInfo;
|
||||
#[cfg(windows)]
|
||||
use term::WinConsole;
|
||||
|
||||
use printer::Writer;
|
||||
use terminal::TerminfoTerminal;
|
||||
|
||||
pub type StdoutTerminal = Box<Terminal<Output=io::Stdout> + Send>;
|
||||
|
||||
/// Gets a terminal that supports color if available.
|
||||
#[cfg(windows)]
|
||||
fn term_stdout(color: bool) -> StdoutTerminal {
|
||||
let stdout = io::stdout();
|
||||
WinConsole::new(stdout)
|
||||
.ok()
|
||||
.map(|t| Box::new(t) as StdoutTerminal)
|
||||
.unwrap_or_else(|| {
|
||||
let stdout = io::stdout();
|
||||
Box::new(NoColorTerminal::new(stdout)) as StdoutTerminal
|
||||
})
|
||||
}
|
||||
|
||||
/// Gets a terminal that supports color if available.
|
||||
#[cfg(not(windows))]
|
||||
fn term_stdout(color: bool) -> StdoutTerminal {
|
||||
let stdout = io::stdout();
|
||||
if !color || TERMINFO.is_none() {
|
||||
Box::new(NoColorTerminal::new(stdout))
|
||||
} else {
|
||||
let info = TERMINFO.clone().unwrap();
|
||||
Box::new(TerminfoTerminal::new_with_terminfo(stdout, info))
|
||||
}
|
||||
}
|
||||
|
||||
/// Out controls the actual output of all search results for a particular file
|
||||
/// to the end user.
|
||||
@@ -12,34 +42,17 @@ use printer::Writer;
|
||||
/// (The difference between Out and Printer is that a Printer works with
|
||||
/// individual search results where as Out works with search results for each
|
||||
/// file as a whole. For example, it knows when to print a file separator.)
|
||||
pub struct Out<W: io::Write> {
|
||||
wtr: io::BufWriter<W>,
|
||||
term: Option<Box<StdoutTerminal>>,
|
||||
pub struct Out {
|
||||
term: StdoutTerminal,
|
||||
printed: bool,
|
||||
file_separator: Option<Vec<u8>>,
|
||||
}
|
||||
|
||||
/// This is like term::stdout, but on Windows always uses WinConsole instead
|
||||
/// of trying for a TerminfoTerminal. This may be a mistake.
|
||||
#[cfg(windows)]
|
||||
fn term_stdout() -> Option<Box<StdoutTerminal>> {
|
||||
WinConsole::new(io::stdout())
|
||||
.ok()
|
||||
.map(|t| Box::new(t) as Box<StdoutTerminal>)
|
||||
}
|
||||
|
||||
#[cfg(not(windows))]
|
||||
fn term_stdout() -> Option<Box<StdoutTerminal>> {
|
||||
// We never use this crap on *nix.
|
||||
None
|
||||
}
|
||||
|
||||
impl<W: io::Write> Out<W> {
|
||||
impl Out {
|
||||
/// Create a new Out that writes to the wtr given.
|
||||
pub fn new(wtr: W) -> Out<W> {
|
||||
pub fn new(color: bool) -> Out {
|
||||
Out {
|
||||
wtr: io::BufWriter::new(wtr),
|
||||
term: term_stdout(),
|
||||
term: term_stdout(color),
|
||||
printed: false,
|
||||
file_separator: None,
|
||||
}
|
||||
@@ -49,39 +62,422 @@ impl<W: io::Write> Out<W> {
|
||||
/// By default, no separator is printed.
|
||||
///
|
||||
/// If sep is empty, then no file separator is printed.
|
||||
pub fn file_separator(mut self, sep: Vec<u8>) -> Out<W> {
|
||||
pub fn file_separator(mut self, sep: Vec<u8>) -> Out {
|
||||
self.file_separator = Some(sep);
|
||||
self
|
||||
}
|
||||
|
||||
/// Write the search results of a single file to the underlying wtr and
|
||||
/// flush wtr.
|
||||
pub fn write(&mut self, buf: &Writer<Vec<u8>>) {
|
||||
pub fn write(&mut self, buf: &OutBuffer) {
|
||||
if let Some(ref sep) = self.file_separator {
|
||||
if self.printed {
|
||||
let _ = self.wtr.write_all(sep);
|
||||
let _ = self.wtr.write_all(b"\n");
|
||||
let _ = self.term.write_all(sep);
|
||||
let _ = self.term.write_all(b"\n");
|
||||
}
|
||||
}
|
||||
match *buf {
|
||||
Writer::Colored(ref tt) => {
|
||||
let _ = self.wtr.write_all(tt.get_ref());
|
||||
OutBuffer::Colored(ref tt) => {
|
||||
let _ = self.term.write_all(tt.get_ref());
|
||||
}
|
||||
Writer::Windows(ref w) => {
|
||||
match self.term {
|
||||
None => {
|
||||
let _ = self.wtr.write_all(w.get_ref());
|
||||
}
|
||||
Some(ref mut stdout) => {
|
||||
w.print_stdout(stdout);
|
||||
}
|
||||
}
|
||||
OutBuffer::Windows(ref w) => {
|
||||
w.print_stdout(&mut self.term);
|
||||
}
|
||||
Writer::NoColor(ref buf) => {
|
||||
let _ = self.wtr.write_all(buf);
|
||||
OutBuffer::NoColor(ref buf) => {
|
||||
let _ = self.term.write_all(buf);
|
||||
}
|
||||
}
|
||||
let _ = self.wtr.flush();
|
||||
let _ = self.term.flush();
|
||||
self.printed = true;
|
||||
}
|
||||
}
|
||||
|
||||
/// OutBuffer corresponds to the final output buffer for search results. All
|
||||
/// search results are written to a buffer and then a buffer is flushed to
|
||||
/// stdout only after the full search has completed.
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum OutBuffer {
|
||||
Colored(TerminfoTerminal<Vec<u8>>),
|
||||
Windows(WindowsBuffer),
|
||||
NoColor(Vec<u8>),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct WindowsBuffer {
|
||||
buf: Vec<u8>,
|
||||
pos: usize,
|
||||
colors: Vec<WindowsColor>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct WindowsColor {
|
||||
pos: usize,
|
||||
opt: WindowsOption,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub enum WindowsOption {
|
||||
Foreground(Color),
|
||||
Background(Color),
|
||||
Reset,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TERMINFO: Option<Arc<TermInfo>> = {
|
||||
match TermInfo::from_env() {
|
||||
Ok(info) => Some(Arc::new(info)),
|
||||
Err(err) => {
|
||||
debug!("error loading terminfo for coloring: {}", err);
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl OutBuffer {
|
||||
/// Create a new output buffer.
|
||||
///
|
||||
/// When color is true, the buffer will attempt to support coloring.
|
||||
pub fn new(color: bool) -> OutBuffer {
|
||||
// If we want color, build a TerminfoTerminal and see if the current
|
||||
// environment supports coloring. If not, bail with NoColor. To avoid
|
||||
// losing our writer (ownership), do this the long way.
|
||||
if !color {
|
||||
return OutBuffer::NoColor(vec![]);
|
||||
}
|
||||
if cfg!(windows) {
|
||||
return OutBuffer::Windows(WindowsBuffer {
|
||||
buf: vec![],
|
||||
pos: 0,
|
||||
colors: vec![]
|
||||
});
|
||||
}
|
||||
if TERMINFO.is_none() {
|
||||
return OutBuffer::NoColor(vec![]);
|
||||
}
|
||||
let info = TERMINFO.clone().unwrap();
|
||||
let tt = TerminfoTerminal::new_with_terminfo(vec![], info);
|
||||
if !tt.supports_color() {
|
||||
debug!("environment doesn't support coloring");
|
||||
return OutBuffer::NoColor(tt.into_inner());
|
||||
}
|
||||
OutBuffer::Colored(tt)
|
||||
}
|
||||
|
||||
/// Clear the give buffer of all search results such that it is reusable
|
||||
/// in another search.
|
||||
pub fn clear(&mut self) {
|
||||
match *self {
|
||||
OutBuffer::Colored(ref mut tt) => {
|
||||
tt.get_mut().clear();
|
||||
}
|
||||
OutBuffer::Windows(ref mut win) => {
|
||||
win.buf.clear();
|
||||
win.colors.clear();
|
||||
win.pos = 0;
|
||||
}
|
||||
OutBuffer::NoColor(ref mut buf) => {
|
||||
buf.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn map_result<F, G>(
|
||||
&mut self,
|
||||
mut f: F,
|
||||
mut g: G,
|
||||
) -> term::Result<()>
|
||||
where F: FnMut(&mut TerminfoTerminal<Vec<u8>>) -> term::Result<()>,
|
||||
G: FnMut(&mut WindowsBuffer) -> term::Result<()> {
|
||||
match *self {
|
||||
OutBuffer::Colored(ref mut w) => f(w),
|
||||
OutBuffer::Windows(ref mut w) => g(w),
|
||||
OutBuffer::NoColor(_) => Err(term::Error::NotSupported),
|
||||
}
|
||||
}
|
||||
|
||||
fn map_bool<F, G>(
|
||||
&self,
|
||||
mut f: F,
|
||||
mut g: G,
|
||||
) -> bool
|
||||
where F: FnMut(&TerminfoTerminal<Vec<u8>>) -> bool,
|
||||
G: FnMut(&WindowsBuffer) -> bool {
|
||||
match *self {
|
||||
OutBuffer::Colored(ref w) => f(w),
|
||||
OutBuffer::Windows(ref w) => g(w),
|
||||
OutBuffer::NoColor(_) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Write for OutBuffer {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
match *self {
|
||||
OutBuffer::Colored(ref mut w) => w.write(buf),
|
||||
OutBuffer::Windows(ref mut w) => w.write(buf),
|
||||
OutBuffer::NoColor(ref mut w) => w.write(buf),
|
||||
}
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl term::Terminal for OutBuffer {
|
||||
type Output = Vec<u8>;
|
||||
|
||||
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
|
||||
self.map_result(|w| w.fg(fg), |w| w.fg(fg))
|
||||
}
|
||||
|
||||
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
|
||||
self.map_result(|w| w.bg(bg), |w| w.bg(bg))
|
||||
}
|
||||
|
||||
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
|
||||
self.map_result(|w| w.attr(attr), |w| w.attr(attr))
|
||||
}
|
||||
|
||||
fn supports_attr(&self, attr: term::Attr) -> bool {
|
||||
self.map_bool(|w| w.supports_attr(attr), |w| w.supports_attr(attr))
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.reset(), |w| w.reset())
|
||||
}
|
||||
|
||||
fn supports_reset(&self) -> bool {
|
||||
self.map_bool(|w| w.supports_reset(), |w| w.supports_reset())
|
||||
}
|
||||
|
||||
fn supports_color(&self) -> bool {
|
||||
self.map_bool(|w| w.supports_color(), |w| w.supports_color())
|
||||
}
|
||||
|
||||
fn cursor_up(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.cursor_up(), |w| w.cursor_up())
|
||||
}
|
||||
|
||||
fn delete_line(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.delete_line(), |w| w.delete_line())
|
||||
}
|
||||
|
||||
fn carriage_return(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.carriage_return(), |w| w.carriage_return())
|
||||
}
|
||||
|
||||
fn get_ref(&self) -> &Vec<u8> {
|
||||
match *self {
|
||||
OutBuffer::Colored(ref w) => w.get_ref(),
|
||||
OutBuffer::Windows(ref w) => w.get_ref(),
|
||||
OutBuffer::NoColor(ref w) => w,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_mut(&mut self) -> &mut Vec<u8> {
|
||||
match *self {
|
||||
OutBuffer::Colored(ref mut w) => w.get_mut(),
|
||||
OutBuffer::Windows(ref mut w) => w.get_mut(),
|
||||
OutBuffer::NoColor(ref mut w) => w,
|
||||
}
|
||||
}
|
||||
|
||||
fn into_inner(self) -> Vec<u8> {
|
||||
match self {
|
||||
OutBuffer::Colored(w) => w.into_inner(),
|
||||
OutBuffer::Windows(w) => w.into_inner(),
|
||||
OutBuffer::NoColor(w) => w,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WindowsBuffer {
|
||||
fn push(&mut self, opt: WindowsOption) {
|
||||
let pos = self.pos;
|
||||
self.colors.push(WindowsColor { pos: pos, opt: opt });
|
||||
}
|
||||
}
|
||||
|
||||
impl WindowsBuffer {
|
||||
/// Print the contents to the given terminal.
|
||||
pub fn print_stdout(&self, tt: &mut StdoutTerminal) {
|
||||
if !tt.supports_color() {
|
||||
let _ = tt.write_all(&self.buf);
|
||||
let _ = tt.flush();
|
||||
return;
|
||||
}
|
||||
let mut last = 0;
|
||||
for col in &self.colors {
|
||||
let _ = tt.write_all(&self.buf[last..col.pos]);
|
||||
match col.opt {
|
||||
WindowsOption::Foreground(c) => {
|
||||
let _ = tt.fg(c);
|
||||
}
|
||||
WindowsOption::Background(c) => {
|
||||
let _ = tt.bg(c);
|
||||
}
|
||||
WindowsOption::Reset => {
|
||||
let _ = tt.reset();
|
||||
}
|
||||
}
|
||||
last = col.pos;
|
||||
}
|
||||
let _ = tt.write_all(&self.buf[last..]);
|
||||
let _ = tt.flush();
|
||||
}
|
||||
}
|
||||
|
||||
impl io::Write for WindowsBuffer {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
let n = try!(self.buf.write(buf));
|
||||
self.pos += n;
|
||||
Ok(n)
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl term::Terminal for WindowsBuffer {
|
||||
type Output = Vec<u8>;
|
||||
|
||||
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
|
||||
self.push(WindowsOption::Foreground(fg));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
|
||||
self.push(WindowsOption::Background(bg));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn supports_attr(&self, attr: term::Attr) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> term::Result<()> {
|
||||
self.push(WindowsOption::Reset);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn supports_reset(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn supports_color(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn cursor_up(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn delete_line(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn carriage_return(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn get_ref(&self) -> &Vec<u8> {
|
||||
&self.buf
|
||||
}
|
||||
|
||||
fn get_mut(&mut self) -> &mut Vec<u8> {
|
||||
&mut self.buf
|
||||
}
|
||||
|
||||
fn into_inner(self) -> Vec<u8> {
|
||||
self.buf
|
||||
}
|
||||
}
|
||||
|
||||
/// NoColorTerminal implements Terminal, but supports no coloring.
|
||||
///
|
||||
/// Its useful when an API requires a Terminal, but coloring isn't needed.
|
||||
pub struct NoColorTerminal<W> {
|
||||
wtr: W,
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> NoColorTerminal<W> {
|
||||
/// Wrap the given writer in a Terminal interface.
|
||||
pub fn new(wtr: W) -> NoColorTerminal<W> {
|
||||
NoColorTerminal {
|
||||
wtr: wtr,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> io::Write for NoColorTerminal<W> {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
self.wtr.write(buf)
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
self.wtr.flush()
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> term::Terminal for NoColorTerminal<W> {
|
||||
type Output = W;
|
||||
|
||||
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn supports_attr(&self, attr: term::Attr) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn supports_reset(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn supports_color(&self) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn cursor_up(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn delete_line(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn carriage_return(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn get_ref(&self) -> &W {
|
||||
&self.wtr
|
||||
}
|
||||
|
||||
fn get_mut(&mut self) -> &mut W {
|
||||
&mut self.wtr
|
||||
}
|
||||
|
||||
fn into_inner(self) -> W {
|
||||
self.wtr
|
||||
}
|
||||
}
|
||||
|
330
src/printer.rs
330
src/printer.rs
@@ -1,17 +1,11 @@
|
||||
use std::io::{self, Write};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use regex::bytes::Regex;
|
||||
use term::{self, StdoutTerminal, Terminal};
|
||||
use term::color::*;
|
||||
use term::terminfo::TermInfo;
|
||||
use term::{Attr, Terminal};
|
||||
use term::color;
|
||||
|
||||
use terminal::TerminfoTerminal;
|
||||
use types::FileTypeDef;
|
||||
|
||||
use self::Writer::*;
|
||||
|
||||
/// Printer encapsulates all output logic for searching.
|
||||
///
|
||||
/// Note that we currently ignore all write errors. It's probably worthwhile
|
||||
@@ -19,7 +13,7 @@ use self::Writer::*;
|
||||
/// writes to memory, neither of which commonly fail.
|
||||
pub struct Printer<W> {
|
||||
/// The underlying writer.
|
||||
wtr: Writer<W>,
|
||||
wtr: W,
|
||||
/// Whether anything has been printed to wtr yet.
|
||||
has_printed: bool,
|
||||
/// Whether to show column numbers for the first match or not.
|
||||
@@ -42,13 +36,11 @@ pub struct Printer<W> {
|
||||
with_filename: bool,
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> Printer<W> {
|
||||
impl<W: Send + Terminal> Printer<W> {
|
||||
/// Create a new printer that writes to wtr.
|
||||
///
|
||||
/// `color` should be true if the printer should try to use coloring.
|
||||
pub fn new(wtr: W, color: bool) -> Printer<W> {
|
||||
pub fn new(wtr: W) -> Printer<W> {
|
||||
Printer {
|
||||
wtr: Writer::new(wtr, color),
|
||||
wtr: wtr,
|
||||
has_printed: false,
|
||||
column: false,
|
||||
context_separator: "--".to_string().into_bytes(),
|
||||
@@ -115,7 +107,7 @@ impl<W: Send + io::Write> Printer<W> {
|
||||
}
|
||||
|
||||
/// Flushes the underlying writer and returns it.
|
||||
pub fn into_inner(mut self) -> Writer<W> {
|
||||
pub fn into_inner(mut self) -> W {
|
||||
let _ = self.wtr.flush();
|
||||
self.wtr
|
||||
}
|
||||
@@ -201,15 +193,15 @@ impl<W: Send + io::Write> Printer<W> {
|
||||
}
|
||||
|
||||
pub fn write_match(&mut self, re: &Regex, buf: &[u8]) {
|
||||
if !self.wtr.is_color() {
|
||||
if !self.wtr.supports_color() {
|
||||
self.write(buf);
|
||||
return;
|
||||
}
|
||||
let mut last_written = 0;
|
||||
for (s, e) in re.find_iter(buf) {
|
||||
self.write(&buf[last_written..s]);
|
||||
let _ = self.wtr.fg(BRIGHT_RED);
|
||||
let _ = self.wtr.attr(term::Attr::Bold);
|
||||
let _ = self.wtr.fg(color::BRIGHT_RED);
|
||||
let _ = self.wtr.attr(Attr::Bold);
|
||||
self.write(&buf[s..e]);
|
||||
let _ = self.wtr.reset();
|
||||
last_written = e;
|
||||
@@ -241,24 +233,24 @@ impl<W: Send + io::Write> Printer<W> {
|
||||
}
|
||||
|
||||
fn write_heading<P: AsRef<Path>>(&mut self, path: P) {
|
||||
if self.wtr.is_color() {
|
||||
let _ = self.wtr.fg(BRIGHT_GREEN);
|
||||
let _ = self.wtr.attr(term::Attr::Bold);
|
||||
if self.wtr.supports_color() {
|
||||
let _ = self.wtr.fg(color::BRIGHT_GREEN);
|
||||
let _ = self.wtr.attr(Attr::Bold);
|
||||
}
|
||||
self.write(path.as_ref().to_string_lossy().as_bytes());
|
||||
self.write_eol();
|
||||
if self.wtr.is_color() {
|
||||
if self.wtr.supports_color() {
|
||||
let _ = self.wtr.reset();
|
||||
}
|
||||
}
|
||||
|
||||
fn line_number(&mut self, n: u64, sep: u8) {
|
||||
if self.wtr.is_color() {
|
||||
let _ = self.wtr.fg(BRIGHT_BLUE);
|
||||
let _ = self.wtr.attr(term::Attr::Bold);
|
||||
if self.wtr.supports_color() {
|
||||
let _ = self.wtr.fg(color::BRIGHT_BLUE);
|
||||
let _ = self.wtr.attr(Attr::Bold);
|
||||
}
|
||||
self.write(n.to_string().as_bytes());
|
||||
if self.wtr.is_color() {
|
||||
if self.wtr.supports_color() {
|
||||
let _ = self.wtr.reset();
|
||||
}
|
||||
self.write(&[sep]);
|
||||
@@ -277,289 +269,3 @@ impl<W: Send + io::Write> Printer<W> {
|
||||
self.write(&[eol]);
|
||||
}
|
||||
}
|
||||
|
||||
/// Writer corresponds to the final output buffer for search results. All
|
||||
/// search results are written to a Writer and then a Writer is flushed to
|
||||
/// stdout only after the full search has completed.
|
||||
pub enum Writer<W> {
|
||||
Colored(TerminfoTerminal<W>),
|
||||
Windows(WindowsWriter<W>),
|
||||
NoColor(W),
|
||||
}
|
||||
|
||||
pub struct WindowsWriter<W> {
|
||||
wtr: W,
|
||||
pos: usize,
|
||||
colors: Vec<WindowsColor>,
|
||||
}
|
||||
|
||||
pub struct WindowsColor {
|
||||
pos: usize,
|
||||
opt: WindowsOption,
|
||||
}
|
||||
|
||||
pub enum WindowsOption {
|
||||
Foreground(Color),
|
||||
Background(Color),
|
||||
Reset,
|
||||
}
|
||||
|
||||
lazy_static! {
|
||||
static ref TERMINFO: Option<Arc<TermInfo>> = {
|
||||
match term::terminfo::TermInfo::from_env() {
|
||||
Ok(info) => Some(Arc::new(info)),
|
||||
Err(err) => {
|
||||
debug!("error loading terminfo for coloring: {}", err);
|
||||
None
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> Writer<W> {
|
||||
fn new(wtr: W, color: bool) -> Writer<W> {
|
||||
// If we want color, build a TerminfoTerminal and see if the current
|
||||
// environment supports coloring. If not, bail with NoColor. To avoid
|
||||
// losing our writer (ownership), do this the long way.
|
||||
if !color {
|
||||
return NoColor(wtr);
|
||||
}
|
||||
if cfg!(windows) {
|
||||
return Windows(WindowsWriter { wtr: wtr, pos: 0, colors: vec![] });
|
||||
}
|
||||
if TERMINFO.is_none() {
|
||||
return NoColor(wtr);
|
||||
}
|
||||
let info = TERMINFO.clone().unwrap();
|
||||
let tt = TerminfoTerminal::new_with_terminfo(wtr, info);
|
||||
if !tt.supports_color() {
|
||||
debug!("environment doesn't support coloring");
|
||||
return NoColor(tt.into_inner());
|
||||
}
|
||||
Colored(tt)
|
||||
}
|
||||
|
||||
fn is_color(&self) -> bool {
|
||||
match *self {
|
||||
Colored(_) => true,
|
||||
Windows(_) => true,
|
||||
NoColor(_) => false,
|
||||
}
|
||||
}
|
||||
|
||||
fn map_result<F, G>(
|
||||
&mut self,
|
||||
mut f: F,
|
||||
mut g: G,
|
||||
) -> term::Result<()>
|
||||
where F: FnMut(&mut TerminfoTerminal<W>) -> term::Result<()>,
|
||||
G: FnMut(&mut WindowsWriter<W>) -> term::Result<()> {
|
||||
match *self {
|
||||
Colored(ref mut w) => f(w),
|
||||
Windows(ref mut w) => g(w),
|
||||
NoColor(_) => Err(term::Error::NotSupported),
|
||||
}
|
||||
}
|
||||
|
||||
fn map_bool<F, G>(
|
||||
&self,
|
||||
mut f: F,
|
||||
mut g: G,
|
||||
) -> bool
|
||||
where F: FnMut(&TerminfoTerminal<W>) -> bool,
|
||||
G: FnMut(&WindowsWriter<W>) -> bool {
|
||||
match *self {
|
||||
Colored(ref w) => f(w),
|
||||
Windows(ref w) => g(w),
|
||||
NoColor(_) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> io::Write for Writer<W> {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
match *self {
|
||||
Colored(ref mut w) => w.write(buf),
|
||||
Windows(ref mut w) => w.write(buf),
|
||||
NoColor(ref mut w) => w.write(buf),
|
||||
}
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
match *self {
|
||||
Colored(ref mut w) => w.flush(),
|
||||
Windows(ref mut w) => w.flush(),
|
||||
NoColor(ref mut w) => w.flush(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> term::Terminal for Writer<W> {
|
||||
type Output = W;
|
||||
|
||||
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
|
||||
self.map_result(|w| w.fg(fg), |w| w.fg(fg))
|
||||
}
|
||||
|
||||
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
|
||||
self.map_result(|w| w.bg(bg), |w| w.bg(bg))
|
||||
}
|
||||
|
||||
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
|
||||
self.map_result(|w| w.attr(attr), |w| w.attr(attr))
|
||||
}
|
||||
|
||||
fn supports_attr(&self, attr: term::Attr) -> bool {
|
||||
self.map_bool(|w| w.supports_attr(attr), |w| w.supports_attr(attr))
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.reset(), |w| w.reset())
|
||||
}
|
||||
|
||||
fn supports_reset(&self) -> bool {
|
||||
self.map_bool(|w| w.supports_reset(), |w| w.supports_reset())
|
||||
}
|
||||
|
||||
fn supports_color(&self) -> bool {
|
||||
self.map_bool(|w| w.supports_color(), |w| w.supports_color())
|
||||
}
|
||||
|
||||
fn cursor_up(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.cursor_up(), |w| w.cursor_up())
|
||||
}
|
||||
|
||||
fn delete_line(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.delete_line(), |w| w.delete_line())
|
||||
}
|
||||
|
||||
fn carriage_return(&mut self) -> term::Result<()> {
|
||||
self.map_result(|w| w.carriage_return(), |w| w.carriage_return())
|
||||
}
|
||||
|
||||
fn get_ref(&self) -> &W {
|
||||
match *self {
|
||||
Colored(ref w) => w.get_ref(),
|
||||
Windows(ref w) => w.get_ref(),
|
||||
NoColor(ref w) => w,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_mut(&mut self) -> &mut W {
|
||||
match *self {
|
||||
Colored(ref mut w) => w.get_mut(),
|
||||
Windows(ref mut w) => w.get_mut(),
|
||||
NoColor(ref mut w) => w,
|
||||
}
|
||||
}
|
||||
|
||||
fn into_inner(self) -> W {
|
||||
match self {
|
||||
Colored(w) => w.into_inner(),
|
||||
Windows(w) => w.into_inner(),
|
||||
NoColor(w) => w,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> WindowsWriter<W> {
|
||||
fn push(&mut self, opt: WindowsOption) {
|
||||
let pos = self.pos;
|
||||
self.colors.push(WindowsColor { pos: pos, opt: opt });
|
||||
}
|
||||
}
|
||||
|
||||
impl WindowsWriter<Vec<u8>> {
|
||||
/// Print the contents to the given terminal.
|
||||
pub fn print_stdout(&self, tt: &mut Box<StdoutTerminal>) {
|
||||
let mut last = 0;
|
||||
for col in &self.colors {
|
||||
let _ = tt.write_all(&self.wtr[last..col.pos]);
|
||||
match col.opt {
|
||||
WindowsOption::Foreground(c) => {
|
||||
let _ = tt.fg(c);
|
||||
}
|
||||
WindowsOption::Background(c) => {
|
||||
let _ = tt.bg(c);
|
||||
}
|
||||
WindowsOption::Reset => {
|
||||
let _ = tt.reset();
|
||||
}
|
||||
}
|
||||
last = col.pos;
|
||||
}
|
||||
let _ = tt.write_all(&self.wtr[last..]);
|
||||
let _ = tt.flush();
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> io::Write for WindowsWriter<W> {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
let n = try!(self.wtr.write(buf));
|
||||
self.pos += n;
|
||||
Ok(n)
|
||||
}
|
||||
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
self.wtr.flush()
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Send + io::Write> term::Terminal for WindowsWriter<W> {
|
||||
type Output = W;
|
||||
|
||||
fn fg(&mut self, fg: term::color::Color) -> term::Result<()> {
|
||||
self.push(WindowsOption::Foreground(fg));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn bg(&mut self, bg: term::color::Color) -> term::Result<()> {
|
||||
self.push(WindowsOption::Background(bg));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn attr(&mut self, attr: term::Attr) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn supports_attr(&self, attr: term::Attr) -> bool {
|
||||
false
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> term::Result<()> {
|
||||
self.push(WindowsOption::Reset);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn supports_reset(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn supports_color(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn cursor_up(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn delete_line(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn carriage_return(&mut self) -> term::Result<()> {
|
||||
Err(term::Error::NotSupported)
|
||||
}
|
||||
|
||||
fn get_ref(&self) -> &W {
|
||||
&self.wtr
|
||||
}
|
||||
|
||||
fn get_mut(&mut self) -> &mut W {
|
||||
&mut self.wtr
|
||||
}
|
||||
|
||||
fn into_inner(self) -> W {
|
||||
self.wtr
|
||||
}
|
||||
}
|
||||
|
@@ -1,11 +1,19 @@
|
||||
/*!
|
||||
The search_buffer module is responsible for searching a single file all in a
|
||||
single buffer. Typically, the source of the buffer is a memory map. This can
|
||||
be useful for when memory maps are faster than streaming search.
|
||||
|
||||
Note that this module doesn't quite support everything that search_stream does.
|
||||
Notably, showing contexts.
|
||||
*/
|
||||
use std::cmp;
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
|
||||
use grep::Grep;
|
||||
use term::Terminal;
|
||||
|
||||
use printer::Printer;
|
||||
use search::{IterLines, Options, count_lines, is_binary};
|
||||
use search_stream::{IterLines, Options, count_lines, is_binary};
|
||||
|
||||
pub struct BufferSearcher<'a, W: 'a> {
|
||||
opts: Options,
|
||||
@@ -18,7 +26,7 @@ pub struct BufferSearcher<'a, W: 'a> {
|
||||
last_line: usize,
|
||||
}
|
||||
|
||||
impl<'a, W: Send + io::Write> BufferSearcher<'a, W> {
|
||||
impl<'a, W: Send + Terminal> BufferSearcher<'a, W> {
|
||||
pub fn new(
|
||||
printer: &'a mut Printer<W>,
|
||||
grep: &'a Grep,
|
||||
@@ -146,12 +154,12 @@ mod tests {
|
||||
use grep::{Grep, GrepBuilder};
|
||||
use term::Terminal;
|
||||
|
||||
use out::OutBuffer;
|
||||
use printer::Printer;
|
||||
|
||||
use super::BufferSearcher;
|
||||
|
||||
lazy_static! {
|
||||
static ref SHERLOCK: &'static str = "\
|
||||
const SHERLOCK: &'static str = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
@@ -159,7 +167,8 @@ can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.\
|
||||
";
|
||||
static ref CODE: &'static str = "\
|
||||
|
||||
const CODE: &'static str = "\
|
||||
extern crate snap;
|
||||
|
||||
use std::io;
|
||||
@@ -174,7 +183,6 @@ fn main() {
|
||||
io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
|
||||
}
|
||||
";
|
||||
}
|
||||
|
||||
fn matcher(pat: &str) -> Grep {
|
||||
GrepBuilder::new(pat).build().unwrap()
|
||||
@@ -184,14 +192,15 @@ fn main() {
|
||||
&Path::new("/baz.rs")
|
||||
}
|
||||
|
||||
type TestSearcher<'a> = BufferSearcher<'a, Vec<u8>>;
|
||||
type TestSearcher<'a> = BufferSearcher<'a, OutBuffer>;
|
||||
|
||||
fn search<F: FnMut(TestSearcher) -> TestSearcher>(
|
||||
pat: &str,
|
||||
haystack: &str,
|
||||
mut map: F,
|
||||
) -> (u64, String) {
|
||||
let mut pp = Printer::new(vec![], false).with_filename(true);
|
||||
let outbuf = OutBuffer::NoColor(vec![]);
|
||||
let mut pp = Printer::new(outbuf).with_filename(true);
|
||||
let grep = GrepBuilder::new(pat).build().unwrap();
|
||||
let count = {
|
||||
let searcher = BufferSearcher::new(
|
||||
@@ -203,7 +212,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn basic_search() {
|
||||
let (count, out) = search("Sherlock", &*SHERLOCK, |s|s);
|
||||
let (count, out) = search("Sherlock", SHERLOCK, |s|s);
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
@@ -231,7 +240,7 @@ fn main() {
|
||||
#[test]
|
||||
fn line_numbers() {
|
||||
let (count, out) = search(
|
||||
"Sherlock", &*SHERLOCK, |s| s.line_number(true));
|
||||
"Sherlock", SHERLOCK, |s| s.line_number(true));
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
@@ -242,7 +251,7 @@ fn main() {
|
||||
#[test]
|
||||
fn count() {
|
||||
let (count, out) = search(
|
||||
"Sherlock", &*SHERLOCK, |s| s.count(true));
|
||||
"Sherlock", SHERLOCK, |s| s.count(true));
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "/baz.rs:2\n");
|
||||
}
|
||||
@@ -250,7 +259,7 @@ fn main() {
|
||||
#[test]
|
||||
fn invert_match() {
|
||||
let (count, out) = search(
|
||||
"Sherlock", &*SHERLOCK, |s| s.invert_match(true));
|
||||
"Sherlock", SHERLOCK, |s| s.invert_match(true));
|
||||
assert_eq!(4, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:Holmeses, success in the province of detective work must always
|
||||
@@ -262,7 +271,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn invert_match_line_numbers() {
|
||||
let (count, out) = search("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search("Sherlock", SHERLOCK, |s| {
|
||||
s.invert_match(true).line_number(true)
|
||||
});
|
||||
assert_eq!(4, count);
|
||||
@@ -276,7 +285,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn invert_match_count() {
|
||||
let (count, out) = search("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search("Sherlock", SHERLOCK, |s| {
|
||||
s.invert_match(true).count(true)
|
||||
});
|
||||
assert_eq!(4, count);
|
||||
|
@@ -1,6 +1,7 @@
|
||||
/*!
|
||||
The search module is responsible for searching a single file and printing
|
||||
matches.
|
||||
The search_stream module is responsible for searching a single file and
|
||||
printing matches. In particular, it searches the file in a streaming fashion
|
||||
using `read` calls and a (roughly) fixed size buffer.
|
||||
*/
|
||||
|
||||
use std::cmp;
|
||||
@@ -11,6 +12,7 @@ use std::path::{Path, PathBuf};
|
||||
|
||||
use grep::{Grep, Match};
|
||||
use memchr::{memchr, memrchr};
|
||||
use term::Terminal;
|
||||
|
||||
use printer::Printer;
|
||||
|
||||
@@ -98,7 +100,7 @@ impl Default for Options {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, R: io::Read, W: Send + io::Write> Searcher<'a, R, W> {
|
||||
impl<'a, R: io::Read, W: Send + Terminal> Searcher<'a, R, W> {
|
||||
/// Create a new searcher.
|
||||
///
|
||||
/// `inp` is a reusable input buffer that is used as scratch space by this
|
||||
@@ -541,13 +543,88 @@ pub fn is_binary(buf: &[u8]) -> bool {
|
||||
}
|
||||
|
||||
/// Count the number of lines in the given buffer.
|
||||
#[inline(always)]
|
||||
pub fn count_lines(mut buf: &[u8], eol: u8) -> u64 {
|
||||
let mut count = 0;
|
||||
while let Some(pos) = memchr(eol, buf) {
|
||||
count += 1;
|
||||
buf = &buf[pos + 1..];
|
||||
#[inline(never)]
|
||||
|
||||
#[inline(never)]
|
||||
pub fn count_lines(buf: &[u8], eol: u8) -> u64 {
|
||||
// This was adapted from code in the memchr crate. The specific benefit
|
||||
// here is that we can avoid a branch in the inner loop because all we're
|
||||
// doing is counting.
|
||||
|
||||
// The technique to count EOL bytes was adapted from:
|
||||
// http://bits.stephan-brumme.com/null.html
|
||||
const LO_U64: u64 = 0x0101010101010101;
|
||||
const HI_U64: u64 = 0x8080808080808080;
|
||||
|
||||
// use truncation
|
||||
const LO_USIZE: usize = LO_U64 as usize;
|
||||
const HI_USIZE: usize = HI_U64 as usize;
|
||||
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
const USIZE_BYTES: usize = 4;
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
const USIZE_BYTES: usize = 8;
|
||||
|
||||
fn count_eol(eol: usize) -> u64 {
|
||||
// Ideally, this would compile down to a POPCNT instruction, but
|
||||
// it looks like you need to set RUSTFLAGS="-C target-cpu=native"
|
||||
// (or target-feature=+popcnt) to get that to work. Bummer.
|
||||
(eol.wrapping_sub(LO_USIZE) & !eol & HI_USIZE).count_ones() as u64
|
||||
}
|
||||
|
||||
#[cfg(target_pointer_width = "32")]
|
||||
fn repeat_byte(b: u8) -> usize {
|
||||
let mut rep = (b as usize) << 8 | b as usize;
|
||||
rep = rep << 16 | rep;
|
||||
rep
|
||||
}
|
||||
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
fn repeat_byte(b: u8) -> usize {
|
||||
let mut rep = (b as usize) << 8 | b as usize;
|
||||
rep = rep << 16 | rep;
|
||||
rep = rep << 32 | rep;
|
||||
rep
|
||||
}
|
||||
|
||||
fn count_lines_slow(mut buf: &[u8], eol: u8) -> u64 {
|
||||
let mut count = 0;
|
||||
while let Some(pos) = memchr(eol, buf) {
|
||||
count += 1;
|
||||
buf = &buf[pos + 1..];
|
||||
}
|
||||
count
|
||||
}
|
||||
|
||||
let len = buf.len();
|
||||
let ptr = buf.as_ptr();
|
||||
let mut count = 0;
|
||||
|
||||
// Search up to an aligned boundary...
|
||||
let align = (ptr as usize) & (USIZE_BYTES - 1);
|
||||
let mut i = 0;
|
||||
if align > 0 {
|
||||
i = cmp::min(USIZE_BYTES - align, len);
|
||||
count += count_lines_slow(&buf[..i], eol);
|
||||
}
|
||||
|
||||
// ... and search the rest.
|
||||
let repeated_eol = repeat_byte(eol);
|
||||
|
||||
if len >= 2 * USIZE_BYTES {
|
||||
while i <= len - (2 * USIZE_BYTES) {
|
||||
unsafe {
|
||||
let u = *(ptr.offset(i as isize) as *const usize);
|
||||
let v = *(ptr.offset((i + USIZE_BYTES) as isize)
|
||||
as *const usize);
|
||||
|
||||
count += count_eol(u ^ repeated_eol);
|
||||
count += count_eol(v ^ repeated_eol);
|
||||
}
|
||||
i += USIZE_BYTES * 2;
|
||||
}
|
||||
}
|
||||
count += count_lines_slow(&buf[i..], eol);
|
||||
count
|
||||
}
|
||||
|
||||
@@ -689,12 +766,12 @@ mod tests {
|
||||
use grep::{Grep, GrepBuilder};
|
||||
use term::Terminal;
|
||||
|
||||
use out::OutBuffer;
|
||||
use printer::Printer;
|
||||
|
||||
use super::{InputBuffer, Searcher, start_of_previous_lines};
|
||||
|
||||
lazy_static! {
|
||||
static ref SHERLOCK: &'static str = "\
|
||||
const SHERLOCK: &'static str = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
@@ -702,7 +779,8 @@ can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.\
|
||||
";
|
||||
static ref CODE: &'static str = "\
|
||||
|
||||
const CODE: &'static str = "\
|
||||
extern crate snap;
|
||||
|
||||
use std::io;
|
||||
@@ -717,7 +795,6 @@ fn main() {
|
||||
io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
|
||||
}
|
||||
";
|
||||
}
|
||||
|
||||
fn hay(s: &str) -> io::Cursor<Vec<u8>> {
|
||||
io::Cursor::new(s.to_string().into_bytes())
|
||||
@@ -731,7 +808,7 @@ fn main() {
|
||||
&Path::new("/baz.rs")
|
||||
}
|
||||
|
||||
type TestSearcher<'a> = Searcher<'a, io::Cursor<Vec<u8>>, Vec<u8>>;
|
||||
type TestSearcher<'a> = Searcher<'a, io::Cursor<Vec<u8>>, OutBuffer>;
|
||||
|
||||
fn search_smallcap<F: FnMut(TestSearcher) -> TestSearcher>(
|
||||
pat: &str,
|
||||
@@ -739,7 +816,8 @@ fn main() {
|
||||
mut map: F,
|
||||
) -> (u64, String) {
|
||||
let mut inp = InputBuffer::with_capacity(1);
|
||||
let mut pp = Printer::new(vec![], false).with_filename(true);
|
||||
let outbuf = OutBuffer::NoColor(vec![]);
|
||||
let mut pp = Printer::new(outbuf).with_filename(true);
|
||||
let grep = GrepBuilder::new(pat).build().unwrap();
|
||||
let count = {
|
||||
let searcher = Searcher::new(
|
||||
@@ -755,7 +833,8 @@ fn main() {
|
||||
mut map: F,
|
||||
) -> (u64, String) {
|
||||
let mut inp = InputBuffer::with_capacity(4096);
|
||||
let mut pp = Printer::new(vec![], false).with_filename(true);
|
||||
let outbuf = OutBuffer::NoColor(vec![]);
|
||||
let mut pp = Printer::new(outbuf).with_filename(true);
|
||||
let grep = GrepBuilder::new(pat).build().unwrap();
|
||||
let count = {
|
||||
let searcher = Searcher::new(
|
||||
@@ -870,7 +949,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn basic_search1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s|s);
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s|s);
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
@@ -897,7 +976,7 @@ fn main() {
|
||||
#[test]
|
||||
fn line_numbers() {
|
||||
let (count, out) = search_smallcap(
|
||||
"Sherlock", &*SHERLOCK, |s| s.line_number(true));
|
||||
"Sherlock", SHERLOCK, |s| s.line_number(true));
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
@@ -908,7 +987,7 @@ fn main() {
|
||||
#[test]
|
||||
fn count() {
|
||||
let (count, out) = search_smallcap(
|
||||
"Sherlock", &*SHERLOCK, |s| s.count(true));
|
||||
"Sherlock", SHERLOCK, |s| s.count(true));
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "/baz.rs:2\n");
|
||||
}
|
||||
@@ -916,7 +995,7 @@ fn main() {
|
||||
#[test]
|
||||
fn invert_match() {
|
||||
let (count, out) = search_smallcap(
|
||||
"Sherlock", &*SHERLOCK, |s| s.invert_match(true));
|
||||
"Sherlock", SHERLOCK, |s| s.invert_match(true));
|
||||
assert_eq!(4, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:Holmeses, success in the province of detective work must always
|
||||
@@ -928,7 +1007,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn invert_match_line_numbers() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.invert_match(true).line_number(true)
|
||||
});
|
||||
assert_eq!(4, count);
|
||||
@@ -942,7 +1021,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn invert_match_count() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.invert_match(true).count(true)
|
||||
});
|
||||
assert_eq!(4, count);
|
||||
@@ -951,7 +1030,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_one1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).before_context(1)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -964,7 +1043,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_invert_one1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).before_context(1).invert_match(true)
|
||||
});
|
||||
assert_eq!(4, count);
|
||||
@@ -980,7 +1059,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_invert_one2() {
|
||||
let (count, out) = search_smallcap(" a ", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap(" a ", SHERLOCK, |s| {
|
||||
s.line_number(true).before_context(1).invert_match(true)
|
||||
});
|
||||
assert_eq!(3, count);
|
||||
@@ -995,7 +1074,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_two1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).before_context(2)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1008,7 +1087,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_two2() {
|
||||
let (count, out) = search_smallcap("dusted", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("dusted", SHERLOCK, |s| {
|
||||
s.line_number(true).before_context(2)
|
||||
});
|
||||
assert_eq!(1, count);
|
||||
@@ -1022,7 +1101,7 @@ fn main() {
|
||||
#[test]
|
||||
fn before_context_two3() {
|
||||
let (count, out) = search_smallcap(
|
||||
"success|attached", &*SHERLOCK, |s| {
|
||||
"success|attached", SHERLOCK, |s| {
|
||||
s.line_number(true).before_context(2)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1038,7 +1117,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_two4() {
|
||||
let (count, out) = search("stdin", &*CODE, |s| {
|
||||
let (count, out) = search("stdin", CODE, |s| {
|
||||
s.line_number(true).before_context(2)
|
||||
});
|
||||
assert_eq!(3, count);
|
||||
@@ -1055,7 +1134,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_two5() {
|
||||
let (count, out) = search("stdout", &*CODE, |s| {
|
||||
let (count, out) = search("stdout", CODE, |s| {
|
||||
s.line_number(true).before_context(2)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1072,7 +1151,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn before_context_three1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).before_context(3)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1085,7 +1164,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn after_context_one1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).after_context(1)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1099,7 +1178,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn after_context_invert_one1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).after_context(1).invert_match(true)
|
||||
});
|
||||
assert_eq!(4, count);
|
||||
@@ -1114,7 +1193,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn after_context_invert_one2() {
|
||||
let (count, out) = search_smallcap(" a ", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap(" a ", SHERLOCK, |s| {
|
||||
s.line_number(true).after_context(1).invert_match(true)
|
||||
});
|
||||
assert_eq!(3, count);
|
||||
@@ -1130,7 +1209,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn after_context_two1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).after_context(2)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1145,7 +1224,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn after_context_two2() {
|
||||
let (count, out) = search_smallcap("dusted", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("dusted", SHERLOCK, |s| {
|
||||
s.line_number(true).after_context(2)
|
||||
});
|
||||
assert_eq!(1, count);
|
||||
@@ -1158,7 +1237,7 @@ fn main() {
|
||||
#[test]
|
||||
fn after_context_two3() {
|
||||
let (count, out) = search_smallcap(
|
||||
"success|attached", &*SHERLOCK, |s| {
|
||||
"success|attached", SHERLOCK, |s| {
|
||||
s.line_number(true).after_context(2)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1173,7 +1252,7 @@ fn main() {
|
||||
|
||||
#[test]
|
||||
fn after_context_three1() {
|
||||
let (count, out) = search_smallcap("Sherlock", &*SHERLOCK, |s| {
|
||||
let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| {
|
||||
s.line_number(true).after_context(3)
|
||||
});
|
||||
assert_eq!(2, count);
|
||||
@@ -1190,7 +1269,7 @@ fn main() {
|
||||
#[test]
|
||||
fn before_after_context_two1() {
|
||||
let (count, out) = search(
|
||||
r"fn main|let mut rdr", &*CODE, |s| {
|
||||
r"fn main|let mut rdr", CODE, |s| {
|
||||
s.line_number(true).after_context(2).before_context(2)
|
||||
});
|
||||
assert_eq!(2, count);
|
24
tests/hay.rs
Normal file
24
tests/hay.rs
Normal file
@@ -0,0 +1,24 @@
|
||||
pub const SHERLOCK: &'static str = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.
|
||||
";
|
||||
|
||||
pub const CODE: &'static str = "\
|
||||
extern crate snap;
|
||||
|
||||
use std::io;
|
||||
|
||||
fn main() {
|
||||
let stdin = io::stdin();
|
||||
let stdout = io::stdout();
|
||||
|
||||
// Wrap the stdin reader in a Snappy reader.
|
||||
let mut rdr = snap::Reader::new(stdin.lock());
|
||||
let mut wtr = stdout.lock();
|
||||
io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
|
||||
}
|
||||
";
|
577
tests/tests.rs
Normal file
577
tests/tests.rs
Normal file
@@ -0,0 +1,577 @@
|
||||
/*!
|
||||
This module contains *integration* tests. Their purpose is to test the CLI
|
||||
interface. Namely, that passing a flag does what it says on the tin.
|
||||
|
||||
Tests for more fine grained behavior (like the search or the globber) should be
|
||||
unit tests in their respective modules.
|
||||
*/
|
||||
|
||||
#![allow(dead_code, unused_imports)]
|
||||
|
||||
use std::process::Command;
|
||||
|
||||
use workdir::WorkDir;
|
||||
|
||||
mod hay;
|
||||
mod workdir;
|
||||
|
||||
macro_rules! sherlock {
|
||||
($name:ident, $fun:expr) => {
|
||||
sherlock!($name, "Sherlock", $fun);
|
||||
};
|
||||
($name:ident, $query:expr, $fun:expr) => {
|
||||
sherlock!($name, $query, "sherlock", $fun);
|
||||
};
|
||||
($name:ident, $query:expr, $path:expr, $fun:expr) => {
|
||||
#[test]
|
||||
fn $name() {
|
||||
let wd = WorkDir::new(stringify!($name));
|
||||
wd.create("sherlock", hay::SHERLOCK);
|
||||
let mut cmd = wd.command();
|
||||
cmd.arg($query).arg($path);
|
||||
$fun(wd, cmd);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
sherlock!(single_file, |wd: WorkDir, mut cmd| {
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(dir, "Sherlock", ".", |wd: WorkDir, mut cmd| {
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(line_numbers, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-n");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
3:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(columns, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("--column");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
57:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
49:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(with_filename, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-H");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(with_heading, |wd: WorkDir, mut cmd: Command| {
|
||||
// This forces the issue since --with-filename is disabled by default
|
||||
// when searching one fil.e
|
||||
cmd.arg("--with-filename").arg("--heading");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(with_heading_default, "Sherlock", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
// Search two or more and get --with-filename enabled by default.
|
||||
// Use -j1 to get deterministic results.
|
||||
wd.create("foo", "Sherlock Holmes lives on Baker Street.");
|
||||
cmd.arg("-j1").arg("--heading");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected1 = "\
|
||||
foo
|
||||
Sherlock Holmes lives on Baker Street.
|
||||
|
||||
sherlock
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
let expected2 = "\
|
||||
sherlock
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
|
||||
foo
|
||||
Sherlock Holmes lives on Baker Street.
|
||||
";
|
||||
assert!(lines == expected1 || lines == expected2);
|
||||
});
|
||||
|
||||
sherlock!(inverted, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-v");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
Holmeses, success in the province of detective work must always
|
||||
can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(inverted_line_numbers, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-n").arg("-v");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
2:Holmeses, success in the province of detective work must always
|
||||
4:can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
5:but Doctor Watson has to have it taken out for him and dusted,
|
||||
6:and exhibited clearly, with a label attached.
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(case_insensitive, "sherlock", |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-i");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(word, "as", |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-w");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(literal, "()", "file", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("file", "blib\n()\nblab\n");
|
||||
cmd.arg("-Q");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "()\n");
|
||||
});
|
||||
|
||||
sherlock!(quiet, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-q");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert!(lines.is_empty());
|
||||
});
|
||||
|
||||
sherlock!(replace, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-r").arg("FooBar");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the FooBar
|
||||
be, to a very large extent, the result of luck. FooBar Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(replace_groups, "([A-Z][a-z]+) ([A-Z][a-z]+)",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-r").arg("$2, $1");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Watsons, Doctor of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Holmes, Sherlock
|
||||
but Watson, Doctor has to have it taken out for him and dusted,
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(replace_named_groups, "(?P<first>[A-Z][a-z]+) (?P<last>[A-Z][a-z]+)",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-r").arg("$last, $first");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Watsons, Doctor of this world, as opposed to the Sherlock
|
||||
be, to a very large extent, the result of luck. Holmes, Sherlock
|
||||
but Watson, Doctor has to have it taken out for him and dusted,
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(file_types, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("file.py", "Sherlock");
|
||||
wd.create("file.rs", "Sherlock");
|
||||
cmd.arg("-t").arg("rust");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "file.rs:Sherlock\n");
|
||||
});
|
||||
|
||||
sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create("file.py", "Sherlock");
|
||||
wd.create("file.rs", "Sherlock");
|
||||
cmd.arg("-T").arg("rust");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "file.py:Sherlock\n");
|
||||
});
|
||||
|
||||
sherlock!(file_type_clear, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("file.py", "Sherlock");
|
||||
wd.create("file.rs", "Sherlock");
|
||||
cmd.arg("--type-clear").arg("rust").arg("-t").arg("rust");
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
sherlock!(file_type_add, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("file.py", "Sherlock");
|
||||
wd.create("file.rs", "Sherlock");
|
||||
wd.create("file.wat", "Sherlock");
|
||||
cmd.arg("--type-add").arg("wat:*.wat").arg("-t").arg("wat");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "file.wat:Sherlock\n");
|
||||
});
|
||||
|
||||
sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create("file.py", "Sherlock");
|
||||
wd.create("file.rs", "Sherlock");
|
||||
cmd.arg("-g").arg("*.rs");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "file.rs:Sherlock\n");
|
||||
});
|
||||
|
||||
sherlock!(glob_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create("file.py", "Sherlock");
|
||||
wd.create("file.rs", "Sherlock");
|
||||
cmd.arg("-g").arg("!*.rs");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "file.py:Sherlock\n");
|
||||
});
|
||||
|
||||
sherlock!(after_context, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-A").arg("1");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(after_context_line_numbers, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-A").arg("1").arg("-n");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2-Holmeses, success in the province of detective work must always
|
||||
3:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
4-can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(before_context, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-B").arg("1");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(before_context_line_numbers, |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-B").arg("1").arg("-n");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2-Holmeses, success in the province of detective work must always
|
||||
3:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(context, "world|attached", |wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-C").arg("1");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
--
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(context_line_numbers, "world|attached",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
cmd.arg("-C").arg("1").arg("-n");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
2-Holmeses, success in the province of detective work must always
|
||||
--
|
||||
5-but Doctor Watson has to have it taken out for him and dusted,
|
||||
6:and exhibited clearly, with a label attached.
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create(".sherlock", hay::SHERLOCK);
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
sherlock!(no_ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create(".sherlock", hay::SHERLOCK);
|
||||
|
||||
cmd.arg("--hidden");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(ignore_git, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".gitignore", "sherlock\n");
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
sherlock!(ignore_ripgrep, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".rgignore", "sherlock\n");
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
sherlock!(no_ignore, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.create(".gitignore", "sherlock\n");
|
||||
cmd.arg("--no-ignore");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(ignore_git_parent, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create(".gitignore", "sherlock\n");
|
||||
wd.create_dir(".git");
|
||||
wd.create_dir("foo");
|
||||
wd.create("foo/sherlock", hay::SHERLOCK);
|
||||
// Even though we search in foo/, which has no .gitignore, ripgrep will
|
||||
// search parent directories and respect the gitignore files found.
|
||||
cmd.current_dir(wd.path().join("foo"));
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
sherlock!(ignore_git_parent_stop, "Sherlock", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
// This tests that searching parent directories for .gitignore files stops
|
||||
// after it sees a .git directory. To test this, we create this directory
|
||||
// hierarchy:
|
||||
//
|
||||
// .gitignore (contains `sherlock`)
|
||||
// foo/
|
||||
// .git
|
||||
// bar/
|
||||
// sherlock
|
||||
//
|
||||
// And we perform the search inside `foo/bar/`. ripgrep will stop looking
|
||||
// for .gitignore files after it sees `foo/.git/`, and therefore not
|
||||
// respect the top-level `.gitignore` containing `sherlock`.
|
||||
wd.remove("sherlock");
|
||||
wd.create(".gitignore", "sherlock\n");
|
||||
wd.create_dir("foo");
|
||||
wd.create_dir("foo/.git");
|
||||
wd.create_dir("foo/bar");
|
||||
wd.create("foo/bar/sherlock", hay::SHERLOCK);
|
||||
cmd.current_dir(wd.path().join("foo").join("bar"));
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(ignore_ripgrep_parent_no_stop, "Sherlock", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
// This is like the `ignore_git_parent_stop` test, except it checks that
|
||||
// ripgrep *doesn't* stop checking for .rgignore files.
|
||||
wd.remove("sherlock");
|
||||
wd.create(".rgignore", "sherlock\n");
|
||||
wd.create_dir("foo");
|
||||
wd.create_dir("foo/.git");
|
||||
wd.create_dir("foo/bar");
|
||||
wd.create("foo/bar/sherlock", hay::SHERLOCK);
|
||||
cmd.current_dir(wd.path().join("foo").join("bar"));
|
||||
// The top-level .rgignore applies.
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
sherlock!(no_parent_ignore_git, "Sherlock", ".",
|
||||
|wd: WorkDir, mut cmd: Command| {
|
||||
// Set up a directory hierarchy like this:
|
||||
//
|
||||
// .gitignore
|
||||
// foo/
|
||||
// .gitignore
|
||||
// sherlock
|
||||
// watson
|
||||
//
|
||||
// Where `.gitignore` contains `sherlock` and `foo/.gitignore` contains
|
||||
// `watson`.
|
||||
//
|
||||
// Now *do the search* from the foo directory. By default, ripgrep will
|
||||
// search parent directories for .gitignore files. The --no-ignore-parent
|
||||
// flag should prevent that. At the same time, the `foo/.gitignore` file
|
||||
// will still be respected (since the search is happening in `foo/`).
|
||||
//
|
||||
// In other words, we should only see results from `sherlock`, not from
|
||||
// `watson`.
|
||||
wd.remove("sherlock");
|
||||
wd.create(".gitignore", "sherlock\n");
|
||||
wd.create_dir("foo");
|
||||
wd.create("foo/.gitignore", "watson\n");
|
||||
wd.create("foo/sherlock", hay::SHERLOCK);
|
||||
wd.create("foo/watson", hay::SHERLOCK);
|
||||
cmd.current_dir(wd.path().join("foo"));
|
||||
cmd.arg("--no-ignore-parent");
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
let expected = "\
|
||||
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
});
|
||||
|
||||
sherlock!(symlink_nofollow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create_dir("foo");
|
||||
wd.create_dir("foo/bar");
|
||||
wd.link("foo/baz", "foo/bar/baz");
|
||||
wd.create_dir("foo/baz");
|
||||
wd.create("foo/baz/sherlock", hay::SHERLOCK);
|
||||
cmd.current_dir(wd.path().join("foo/bar"));
|
||||
wd.assert_err(&mut cmd);
|
||||
});
|
||||
|
||||
sherlock!(symlink_follow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
|
||||
wd.remove("sherlock");
|
||||
wd.create_dir("foo");
|
||||
wd.create_dir("foo/bar");
|
||||
wd.create_dir("foo/baz");
|
||||
wd.create("foo/baz/sherlock", hay::SHERLOCK);
|
||||
wd.link("foo/baz", "foo/bar/baz");
|
||||
cmd.arg("-L");
|
||||
cmd.current_dir(wd.path().join("foo/bar"));
|
||||
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
if cfg!(windows) {
|
||||
let expected = "\
|
||||
baz\\sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
baz\\sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
} else {
|
||||
let expected = "\
|
||||
baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
assert_eq!(lines, expected);
|
||||
}
|
||||
});
|
||||
|
||||
#[test]
|
||||
fn binary_nosearch() {
|
||||
let wd = WorkDir::new("binary_nosearch");
|
||||
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
|
||||
let mut cmd = wd.command();
|
||||
cmd.arg("foo").arg("file");
|
||||
wd.assert_err(&mut cmd);
|
||||
}
|
||||
|
||||
// The following two tests show a discrepancy in search results between
|
||||
// searching with memory mapped files and stream searching. Stream searching
|
||||
// uses a heuristic (that GNU grep also uses) where NUL bytes are replaced with
|
||||
// the EOL terminator, which tends to avoid allocating large amounts of memory
|
||||
// for really long "lines." The memory map searcher has no need to worry about
|
||||
// such things, and more than that, it would be pretty hard for it to match
|
||||
// the semantics of streaming search in this case.
|
||||
//
|
||||
// Binary files with lots of NULs aren't really part of the use case of ripgrep
|
||||
// (or any other grep-like tool for that matter), so we shouldn't feel too bad
|
||||
// about it.
|
||||
#[test]
|
||||
fn binary_search_mmap() {
|
||||
let wd = WorkDir::new("binary_search_mmap");
|
||||
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
|
||||
let mut cmd = wd.command();
|
||||
cmd.arg("-a").arg("--mmap").arg("foo").arg("file");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "foo\x00bar\nfoo\x00baz\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn binary_search_no_mmap() {
|
||||
let wd = WorkDir::new("binary_search_no_mmap");
|
||||
wd.create("file", "foo\x00bar\nfoo\x00baz\n");
|
||||
let mut cmd = wd.command();
|
||||
cmd.arg("-a").arg("--no-mmap").arg("foo").arg("file");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
assert_eq!(lines, "foo\nfoo\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn files() {
|
||||
let wd = WorkDir::new("files");
|
||||
wd.create("file", "");
|
||||
wd.create_dir("dir");
|
||||
wd.create("dir/file", "");
|
||||
|
||||
let mut cmd = wd.command();
|
||||
cmd.arg("--files");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
if cfg!(windows) {
|
||||
assert!(lines == "./dir\\file\n./file\n"
|
||||
|| lines == "./file\n./dir\\file\n");
|
||||
} else {
|
||||
assert!(lines == "./file\n./dir/file\n"
|
||||
|| lines == "./dir/file\n./file\n");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn type_list() {
|
||||
let wd = WorkDir::new("type_list");
|
||||
|
||||
let mut cmd = wd.command();
|
||||
cmd.arg("--type-list");
|
||||
let lines: String = wd.stdout(&mut cmd);
|
||||
// This can change over time, so just make sure we print something.
|
||||
assert!(!lines.is_empty());
|
||||
}
|
189
tests/workdir.rs
Normal file
189
tests/workdir.rs
Normal file
@@ -0,0 +1,189 @@
|
||||
use std::env;
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::fs::{self, File};
|
||||
use std::io::{self, Write};
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process;
|
||||
use std::str::FromStr;
|
||||
use std::sync::atomic::{ATOMIC_USIZE_INIT, AtomicUsize, Ordering};
|
||||
use std::thread;
|
||||
use std::time::Duration;
|
||||
|
||||
static TEST_DIR: &'static str = "ripgrep-tests";
|
||||
static NEXT_ID: AtomicUsize = ATOMIC_USIZE_INIT;
|
||||
|
||||
/// WorkDir represents a directory in which tests are run.
|
||||
///
|
||||
/// Directories are created from a global atomic counter to avoid duplicates.
|
||||
#[derive(Debug)]
|
||||
pub struct WorkDir {
|
||||
/// The directory in which this test executable is running.
|
||||
root: PathBuf,
|
||||
/// The directory in which the test should run. If a test needs to create
|
||||
/// files, they should go in here.
|
||||
dir: PathBuf,
|
||||
}
|
||||
|
||||
impl WorkDir {
|
||||
/// Create a new test working directory with the given name. The name
|
||||
/// does not need to be distinct for each invocation, but should correspond
|
||||
/// to a logical grouping of tests.
|
||||
pub fn new(name: &str) -> WorkDir {
|
||||
let id = NEXT_ID.fetch_add(1, Ordering::SeqCst);
|
||||
let root = env::current_exe().unwrap()
|
||||
.parent().expect("executable's directory").to_path_buf();
|
||||
let dir = root.join(TEST_DIR).join(name).join(&format!("{}", id));
|
||||
nice_err(&dir, repeat(|| fs::create_dir_all(&dir)));
|
||||
WorkDir {
|
||||
root: root,
|
||||
dir: dir,
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new file with the given name and contents in this directory.
|
||||
pub fn create<P: AsRef<Path>>(&self, name: P, contents: &str) {
|
||||
let path = self.dir.join(name);
|
||||
let mut file = nice_err(&path, File::create(&path));
|
||||
nice_err(&path, file.write_all(contents.as_bytes()));
|
||||
nice_err(&path, file.flush());
|
||||
}
|
||||
|
||||
/// Remove a file with the given name from this directory.
|
||||
pub fn remove<P: AsRef<Path>>(&self, name: P) {
|
||||
let path = self.dir.join(name);
|
||||
nice_err(&path, fs::remove_file(&path));
|
||||
}
|
||||
|
||||
/// Create a new directory with the given path (and any directories above
|
||||
/// it) inside this directory.
|
||||
pub fn create_dir<P: AsRef<Path>>(&self, path: P) {
|
||||
let path = self.dir.join(path);
|
||||
nice_err(&path, repeat(|| fs::create_dir_all(&path)));
|
||||
}
|
||||
|
||||
/// Creates a new command that is set to use the ripgrep executable in
|
||||
/// this working directory.
|
||||
pub fn command(&self) -> process::Command {
|
||||
let mut cmd = process::Command::new(&self.bin());
|
||||
cmd.current_dir(&self.dir);
|
||||
cmd
|
||||
}
|
||||
|
||||
/// Returns the path to the ripgrep executable.
|
||||
pub fn bin(&self) -> PathBuf {
|
||||
self.root.join("rg")
|
||||
}
|
||||
|
||||
/// Returns the path to this directory.
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.dir
|
||||
}
|
||||
|
||||
/// Creates a directory symlink to the src with the given target name
|
||||
/// in this directory.
|
||||
#[cfg(not(windows))]
|
||||
pub fn link<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
|
||||
use std::os::unix::fs::symlink;
|
||||
let src = self.dir.join(src);
|
||||
let target = self.dir.join(target);
|
||||
let _ = fs::remove_file(&target);
|
||||
nice_err(&target, symlink(&src, &target));
|
||||
}
|
||||
|
||||
#[cfg(windows)]
|
||||
pub fn link<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
|
||||
use std::os::windows::fs::symlink_dir;
|
||||
let src = self.dir.join(src);
|
||||
let target = self.dir.join(target);
|
||||
let _ = fs::remove_dir(&target);
|
||||
nice_err(&target, symlink_dir(&src, &target));
|
||||
}
|
||||
|
||||
/// Runs and captures the stdout of the given command.
|
||||
///
|
||||
/// If the return type could not be created from a string, then this
|
||||
/// panics.
|
||||
pub fn stdout<E: fmt::Debug, T: FromStr<Err=E>>(
|
||||
&self,
|
||||
cmd: &mut process::Command,
|
||||
) -> T {
|
||||
let o = self.output(cmd);
|
||||
let stdout = String::from_utf8_lossy(&o.stdout);
|
||||
match stdout.parse() {
|
||||
Ok(t) => t,
|
||||
Err(err) => {
|
||||
panic!("could not convert from string: {:?}\n\n{}", err, stdout);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Gets the output of a command. If the command failed, then this panics.
|
||||
pub fn output(&self, cmd: &mut process::Command) -> process::Output {
|
||||
let o = cmd.output().unwrap();
|
||||
if !o.status.success() {
|
||||
let suggest =
|
||||
if o.stderr.is_empty() {
|
||||
"\n\nDid your search end up with no results?".to_string()
|
||||
} else {
|
||||
"".to_string()
|
||||
};
|
||||
|
||||
panic!("\n\n==========\n\
|
||||
command failed but expected success!\
|
||||
{}\
|
||||
\n\ncommand: {:?}\
|
||||
\ncwd: {}\
|
||||
\n\nstatus: {}\
|
||||
\n\nstdout: {}\
|
||||
\n\nstderr: {}\
|
||||
\n\n==========\n",
|
||||
suggest, cmd, self.dir.display(), o.status,
|
||||
String::from_utf8_lossy(&o.stdout),
|
||||
String::from_utf8_lossy(&o.stderr));
|
||||
}
|
||||
o
|
||||
}
|
||||
|
||||
/// Runs the given command and asserts that it resulted in an error exit
|
||||
/// code.
|
||||
pub fn assert_err(&self, cmd: &mut process::Command) {
|
||||
let o = cmd.output().unwrap();
|
||||
if o.status.success() {
|
||||
panic!("\n\n===== {:?} =====\n\
|
||||
command succeeded but expected failure!\
|
||||
\n\ncwd: {}\
|
||||
\n\nstatus: {}\
|
||||
\n\nstdout: {}\n\nstderr: {}\
|
||||
\n\n=====\n",
|
||||
cmd, self.dir.display(), o.status,
|
||||
String::from_utf8_lossy(&o.stdout),
|
||||
String::from_utf8_lossy(&o.stderr));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn nice_err<P: AsRef<Path>, T, E: error::Error>(
|
||||
path: P,
|
||||
res: Result<T, E>,
|
||||
) -> T {
|
||||
match res {
|
||||
Ok(t) => t,
|
||||
Err(err) => {
|
||||
panic!("{}: {:?}", path.as_ref().display(), err);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn repeat<F: FnMut() -> io::Result<()>>(mut f: F) -> io::Result<()> {
|
||||
let mut last_err = None;
|
||||
for _ in 0..10 {
|
||||
if let Err(err) = f() {
|
||||
last_err = Some(err);
|
||||
thread::sleep(Duration::from_millis(500));
|
||||
} else {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
Err(last_err.unwrap())
|
||||
}
|
Reference in New Issue
Block a user