Compare commits

..

6 Commits

Author SHA1 Message Date
Andrew Gallant
dc9cb42ee8 PROGRESS: tests: re-tool integration tests 2018-08-07 18:38:24 -04:00
Andrew Gallant
584ef9ef34 ripgrep: migrate to libripgrep 2018-08-07 18:38:24 -04:00
Andrew Gallant
4bce2dff5d libripgrep: initial commit introducing libripgrep
libripgrep is not any one library, but rather, a collection of libraries
that roughly separate the following key distinct phases in a grep
implementation:

  1. Pattern matching (e.g., by a regex engine).
  2. Searching a file using a pattern matcher.
  3. Printing results.

Ultimately, both (1) and (3) are defined by de-coupled interfaces, of
which there may be multiple implementations. Namely, (1) is satisfied by
the `Matcher` trait in the `grep-matcher` crate and (3) is satisfied by
the `Sink` trait in the `grep2` crate. The searcher (2) ties everything
together and finds results using a matcher and reports those results
using a `Sink` implementation.
2018-08-07 18:23:13 -04:00
Andrew Gallant
545db65cbc ci: test libripgrep 2018-08-06 19:20:42 -04:00
Andrew Gallant
4869e6972b grep: remove senseless test
It was pulling in a sizable data file and doesn't appear to be testing
anything meaningful that isn't covered by a variety of other tests.
2018-08-06 19:20:04 -04:00
llogiq
ad9befbc1d deps: update bytecount to 0.3.2
PR #1003
2018-08-06 06:44:16 -04:00
15 changed files with 697 additions and 64 deletions

7
Cargo.lock generated
View File

@@ -40,7 +40,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bytecount"
version = "0.3.1"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"simd 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -181,7 +181,7 @@ dependencies = [
name = "grep-searcher"
version = "0.0.1"
dependencies = [
"bytecount 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs_io 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"grep-matcher 0.0.1",
@@ -338,6 +338,7 @@ dependencies = [
"num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)",
"termcolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
@@ -519,7 +520,7 @@ dependencies = [
"checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652"
"checksum base64 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "85415d2594767338a74a30c1d370b2f3262ec1b4ed2d7bba5b3faf4de40467d9"
"checksum bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d0c54bb8f454c567f21197eefcdbf5679d0bd99f2ddbe52e84c77061952e6789"
"checksum bytecount 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "882585cd7ec84e902472df34a5e01891202db3bf62614e1f0afe459c1afcf744"
"checksum bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f861d9ce359f56dbcb6e0c2a1cb84e52ad732cadb57b806adeb3c7668caccbd8"
"checksum byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "74c0b906e9446b0a2e4f760cdb3fa4b2c48cdc6db8766a845c54b6ff063fd2e9"
"checksum cfg-if 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "efe5c877e17a9c717a0bf3613b2709f723202c4e4675cc8f12926ded29bcb17e"
"checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"

View File

@@ -47,6 +47,7 @@ log = "0.4"
num_cpus = "1"
regex = "1"
same-file = "1"
serde_json = "1"
termcolor = "1"
[dependencies.clap]
@@ -56,7 +57,7 @@ features = ["suggestions", "color"]
[target.'cfg(windows)'.dependencies.winapi]
version = "0.3"
features = ["std", "winnt"]
features = ["std", "fileapi", "winnt"]
[build-dependencies]
lazy_static = "1"
@@ -71,4 +72,4 @@ avx-accel = ["grep/avx-accel"]
simd-accel = ["grep/simd-accel"]
[profile.release]
debug = true
debug = 1

View File

@@ -1014,7 +1014,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
line = line.with_end(line.end() - 1);
}
if self.config().trim_ascii {
line = trim_ascii_prefix_range(bytes, line);
line = self.trim_ascii_prefix_range(bytes, line);
}
while !line.is_empty() {
@@ -1058,7 +1058,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
line = line.with_end(line.end() - 1);
}
if self.config().trim_ascii {
line = trim_ascii_prefix_range(bytes, line);
line = self.trim_ascii_prefix_range(bytes, line);
}
while !line.is_empty() {
if matches[midx].end() <= line.start() {
@@ -1127,7 +1127,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
line = line.with_end(line.end() - 1);
}
if self.config().trim_ascii {
line = trim_ascii_prefix_range(bytes, line);
line = self.trim_ascii_prefix_range(bytes, line);
}
while !line.is_empty() {
@@ -1208,7 +1208,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
if !self.config().trim_ascii {
0
} else {
trim_ascii_prefix_range(
self.trim_ascii_prefix_range(
line,
Match::new(0, line.len()),
).start()
@@ -1369,7 +1369,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
if !self.config().trim_ascii {
return self.write(buf);
}
self.write(trim_ascii_prefix(buf))
self.write(self.trim_ascii_prefix(buf))
}
fn write(&self, buf: &[u8]) -> io::Result<()> {
@@ -1425,6 +1425,21 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
fn multi_line(&self) -> bool {
self.searcher.multi_line_with_matcher(&self.sink.matcher)
}
/// Trim prefix ASCII spaces from the given slice and return the
/// corresponding range.
///
/// This stops trimming a prefix as soon as it sees non-whitespace or a
/// line terminator.
fn trim_ascii_prefix_range(&self, slice: &[u8], range: Match) -> Match {
trim_ascii_prefix_range(self.searcher.line_terminator(), slice, range)
}
/// Trim prefix ASCII spaces from the given slice and return the
/// corresponding sub-slice.
fn trim_ascii_prefix<'s>(&self, slice: &'s [u8]) -> &'s [u8] {
trim_ascii_prefix(self.searcher.line_terminator(), slice)
}
}
#[cfg(test)]
@@ -1987,6 +2002,31 @@ Watson
assert_eq_printed!(expected, got);
}
#[test]
fn trim_ascii_with_line_term() {
let matcher = RegexMatcher::new("Watson").unwrap();
let mut printer = StandardBuilder::new()
.trim_ascii(true)
.build(NoColor::new(vec![]));
SearcherBuilder::new()
.line_number(true)
.before_context(1)
.build()
.search_reader(
&matcher,
"\n Watson".as_bytes(),
printer.sink(&matcher),
)
.unwrap();
let got = printer_contents(&mut printer);
let expected = "\
1-
2:Watson
";
assert_eq_printed!(expected, got);
}
#[test]
fn line_number() {
let matcher = RegexMatcher::new("Watson").unwrap();

View File

@@ -4,7 +4,7 @@ use std::io;
use std::path::Path;
use std::time;
use grep_matcher::{Captures, Match, Matcher};
use grep_matcher::{Captures, LineTerminator, Match, Matcher};
use grep_searcher::{
LineIter,
SinkError, SinkContext, SinkContextKind, SinkMatch,
@@ -317,7 +317,7 @@ pub struct NiceDuration(pub time::Duration);
impl fmt::Display for NiceDuration {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{:0.4}s", self.fractional_seconds())
write!(f, "{:0.6}s", self.fractional_seconds())
}
}
@@ -346,21 +346,37 @@ impl Serialize for NiceDuration {
/// Trim prefix ASCII spaces from the given slice and return the corresponding
/// range.
pub fn trim_ascii_prefix_range(slice: &[u8], range: Match) -> Match {
fn is_space(b: &&u8) -> bool {
match **b {
///
/// This stops trimming a prefix as soon as it sees non-whitespace or a line
/// terminator.
pub fn trim_ascii_prefix_range(
line_term: LineTerminator,
slice: &[u8],
range: Match,
) -> Match {
fn is_space(b: u8) -> bool {
match b {
b'\t' | b'\n' | b'\x0B' | b'\x0C' | b'\r' | b' ' => true,
_ => false,
}
}
let count = slice[range].iter().take_while(is_space).count();
let count = slice[range]
.iter()
.take_while(|&&b| -> bool {
is_space(b) && !line_term.as_bytes().contains(&b)
})
.count();
range.with_start(range.start() + count)
}
/// Trim prefix ASCII spaces from the given slice and return the corresponding
/// sub-slice.
pub fn trim_ascii_prefix(slice: &[u8]) -> &[u8] {
let range = trim_ascii_prefix_range(slice, Match::new(0, slice.len()));
pub fn trim_ascii_prefix(line_term: LineTerminator, slice: &[u8]) -> &[u8] {
let range = trim_ascii_prefix_range(
line_term,
slice,
Match::new(0, slice.len()),
);
&slice[range]
}

View File

@@ -518,6 +518,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_ignore_case(&mut args);
flag_ignore_file(&mut args);
flag_invert_match(&mut args);
flag_json(&mut args);
flag_line_number(&mut args);
flag_line_regexp(&mut args);
flag_max_columns(&mut args);
@@ -549,6 +550,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_stats(&mut args);
flag_text(&mut args);
flag_threads(&mut args);
flag_trim(&mut args);
flag_type(&mut args);
flag_type_add(&mut args);
flag_type_clear(&mut args);
@@ -1085,6 +1087,63 @@ Invert matching. Show lines that do not match the given patterns.
args.push(arg);
}
fn flag_json(args: &mut Vec<RGArg>) {
const SHORT: &str = "Show search results in a JSON Lines format.";
const LONG: &str = long!("\
Enable printing results in a JSON Lines format.
When this flag is provided, ripgrep will emit a sequence of messages, each
encoded as a JSON object, where there are five different message types:
**begin** - A message that indicates a file is being searched and contains at
least one match.
**end** - A message the indicates a file is done being searched. This message
also include summary statistics about the search for a particular file.
**match** - A message that indicates a match was found. This includes the text
and offsets of the match.
**context** - A message that indicates a contextual line was found. This
includes the text of the line, along with any match information if the search
was inverted.
**summary** - The final message emitted by ripgrep that contains summary
statistics about the search across all files.
Since file paths or the contents of files are not guaranteed to be valid UTF-8
and JSON itself must be valid UTF-8, ripgrep will emit all data elements as
objects with one of two keys: 'text' or 'bytes'. 'text' is a normal JSON string
the data is valid UTF-8 while 'bytes' is the base64 encoded contents of the
bytes.
The JSON Lines format is only supported for showing search results. It cannot
be used with other flags that emit other types of output, such as --files,
--files-with-matches, --files-without-match, --count or --count-matches.
ripgrep will report an error if any of the aforementioned flags are used in
concert with --json.
Other flags that control aspects of the standard output such as
--only-matching, --heading, --replace, --max-columns, etc., have no effect
when --json is set.
The JSON Lines format can be disabled with --no-json.
");
let arg = RGArg::switch("json")
.help(SHORT).long_help(LONG)
.overrides("no-json")
.conflicts(&[
"count", "count-matches",
"files", "files-with-matches", "files-without-match",
]);
args.push(arg);
let arg = RGArg::switch("no-json")
.hidden()
.overrides("json");
args.push(arg);
}
fn flag_line_number(args: &mut Vec<RGArg>) {
const SHORT: &str = "Show line numbers.";
const LONG: &str = long!("\
@@ -1219,7 +1278,7 @@ Enable matching across multiple lines.
This flag can be disabled with --no-multiline.
");
let arg = RGArg::switch("multiline")
let arg = RGArg::switch("multiline").short("U")
.help(SHORT).long_help(LONG)
.overrides("no-multiline");
args.push(arg);
@@ -1621,11 +1680,18 @@ searched, and the time taken for the entire search to complete.
This set of aggregate statistics may expand over time.
Note that this flag has no effect if --files, --files-with-matches or
--files-without-match is passed.");
--files-without-match is passed.
This flag can be disabled with --no-stats.
");
let arg = RGArg::switch("stats")
.help(SHORT).long_help(LONG);
.help(SHORT).long_help(LONG)
.overrides("no-stats");
args.push(arg);
let arg = RGArg::switch("no-stats")
.help(SHORT).long_help(LONG)
.overrides("stats");
args.push(arg);
}
@@ -1668,6 +1734,25 @@ causes ripgrep to choose the thread count using heuristics.
args.push(arg);
}
fn flag_trim(args: &mut Vec<RGArg>) {
const SHORT: &str = "Trim prefixed whitespace from matches.";
const LONG: &str = long!("\
When set, all ASCII whitespace at the beginning of each line printed will be
trimmed.
This flag can be disabled with --no-trim.
");
let arg = RGArg::switch("trim")
.help(SHORT).long_help(LONG)
.overrides("no-trim");
args.push(arg);
let arg = RGArg::switch("no-trim")
.help(SHORT).long_help(LONG)
.overrides("trim");
args.push(arg);
}
fn flag_type(args: &mut Vec<RGArg>) {
const SHORT: &str = "Only search files matching TYPE.";
const LONG: &str = long!("\

View File

@@ -275,6 +275,7 @@ impl Args {
let searcher = self.matches().searcher(self.paths())?;
let mut builder = SearchWorkerBuilder::new();
builder
.json_stats(self.matches().is_present("json"))
.preprocessor(self.matches().preprocessor())
.search_zip(self.matches().is_present("search-zip"));
Ok(builder.build(matcher, searcher, printer))
@@ -490,7 +491,7 @@ impl ArgMatches {
.max_matches(self.max_count()?)
.column(self.column())
.byte_offset(self.is_present("byte-offset"))
.trim_ascii(false)
.trim_ascii(self.is_present("trim"))
.separator_search(None)
.separator_context(Some(self.context_separator()))
.separator_field_match(b":".to_vec())
@@ -889,13 +890,21 @@ impl ArgMatches {
/// Determine the type of output we should produce.
fn output_kind(&self) -> OutputKind {
if self.is_present("quiet") {
// While we don't technically print results (or aggregate results)
// in quiet mode, we still support the --stats flag, and those
// stats are computed by the Summary printer for now.
return OutputKind::Summary;
} else if self.is_present("json") {
return OutputKind::JSON;
}
let (count, count_matches) = self.counts();
let summary =
count
|| count_matches
|| self.is_present("files-with-matches")
|| self.is_present("files-without-match")
|| self.is_present("quiet");
|| self.is_present("files-without-match");
if summary {
OutputKind::Summary
} else {
@@ -1378,7 +1387,15 @@ fn stdin_is_readable() -> bool {
/// Returns true if and only if stdin is deemed searchable.
#[cfg(windows)]
fn stdin_is_readable() -> bool {
// On Windows, it's not clear what the possibilities are to me, so just
// always return true.
true
use std::os::windows::io::AsRawHandle;
use winapi::um::fileapi::GetFileType;
use winapi::um::winbase::{FILE_TYPE_DISK, FILE_TYPE_PIPE};
let handle = match Handle::stdin() {
Err(_) => return false,
Ok(handle) => handle,
};
let raw_handle = handle.as_raw_handle();
let ft = unsafe { GetFileType(raw_handle) };
ft == FILE_TYPE_DISK || ft == FILE_TYPE_PIPE
}

View File

@@ -11,6 +11,8 @@ extern crate log;
extern crate num_cpus;
extern crate regex;
extern crate same_file;
#[macro_use]
extern crate serde_json;
extern crate termcolor;
#[cfg(windows)]
extern crate winapi;
@@ -103,7 +105,7 @@ fn search(args: Args) -> Result<bool> {
if let Some(ref stats) = stats {
let elapsed = Instant::now().duration_since(started_at);
// We don't care if we couldn't print this successfully.
let _ = searcher.printer().print_stats(elapsed, stats);
let _ = searcher.print_stats(elapsed, stats);
}
Ok(matched)
}
@@ -181,7 +183,7 @@ fn search_parallel(args: Args) -> Result<bool> {
let stats = locked_stats.lock().unwrap();
let mut searcher = args.search_worker(args.stdout())?;
// We don't care if we couldn't print this successfully.
let _ = searcher.printer().print_stats(elapsed, &stats);
let _ = searcher.print_stats(elapsed, &stats);
}
Ok(matched.load(SeqCst))
}

View File

@@ -6,6 +6,7 @@ use grep::matcher::Matcher;
use grep::printer::{JSON, Standard, Summary, Stats};
use grep::regex::RegexMatcher;
use grep::searcher::Searcher;
use serde_json as json;
use termcolor::WriteColor;
use decompressor::{DecompressionReader, is_compressed};
@@ -17,6 +18,7 @@ use subject::Subject;
/// at a very high level.
#[derive(Clone, Debug)]
struct Config {
json_stats: bool,
preprocessor: Option<PathBuf>,
search_zip: bool,
}
@@ -24,6 +26,7 @@ struct Config {
impl Default for Config {
fn default() -> Config {
Config {
json_stats: false,
preprocessor: None,
search_zip: false,
}
@@ -60,6 +63,18 @@ impl SearchWorkerBuilder {
SearchWorker { config, matcher, searcher, printer }
}
/// Forcefully use JSON to emit statistics, even if the underlying printer
/// is not the JSON printer.
///
/// This is useful for implementing flag combinations like
/// `--json --quiet`, which uses the summary printer for implementing
/// `--quiet` but still wants to emit summary statistics, which should
/// be JSON formatted because of the `--json` flag.
pub fn json_stats(&mut self, yes: bool) -> &mut SearchWorkerBuilder {
self.config.json_stats = yes;
self
}
/// Set the path to a preprocessor command.
///
/// When this is set, instead of searching files directly, the given
@@ -134,19 +149,15 @@ pub enum Printer<W> {
}
impl<W: WriteColor> Printer<W> {
/// Print the given statistics to the underlying writer in a way that is
/// consistent with this printer's format.
///
/// While `Stats` contains a duration itself, this only corresponds to the
/// time spent searching, where as `total_duration` should roughly
/// approximate the lifespan of the ripgrep process itself.
pub fn print_stats(
fn print_stats(
&mut self,
total_duration: Duration,
stats: &Stats,
) -> io::Result<()> {
match *self {
Printer::JSON(_) => unimplemented!(),
Printer::JSON(_) => {
self.print_stats_json(total_duration, stats)
}
Printer::Standard(_) | Printer::Summary(_) => {
self.print_stats_human(total_duration, stats)
}
@@ -167,8 +178,8 @@ impl<W: WriteColor> Printer<W> {
{searches} files searched
{bytes_printed} bytes printed
{bytes_searched} bytes searched
{search_time:.6} seconds spent searching
{process_time:.6} seconds
{search_time:0.6} seconds spent searching
{process_time:0.6} seconds
",
matches = stats.matches(),
lines = stats.matched_lines(),
@@ -181,6 +192,29 @@ impl<W: WriteColor> Printer<W> {
)
}
fn print_stats_json(
&mut self,
total_duration: Duration,
stats: &Stats,
) -> io::Result<()> {
// We specifically match the format laid out by the JSON printer in
// the grep-printer crate. We simply "extend" it with the 'summary'
// message type.
let fractional = fractional_seconds(total_duration);
json::to_writer(self.get_mut(), &json!({
"type": "summary",
"data": {
"stats": stats,
"elapsed_total": {
"secs": total_duration.as_secs(),
"nanos": total_duration.subsec_nanos(),
"human": format!("{:0.6}s", fractional),
},
}
}))?;
write!(self.get_mut(), "\n")
}
/// Return a mutable reference to the underlying printer's writer.
pub fn get_mut(&mut self) -> &mut W {
match *self {
@@ -215,6 +249,24 @@ impl<W: WriteColor> SearchWorker<W> {
&mut self.printer
}
/// Print the given statistics to the underlying writer in a way that is
/// consistent with this searcher's printer's format.
///
/// While `Stats` contains a duration itself, this only corresponds to the
/// time spent searching, where as `total_duration` should roughly
/// approximate the lifespan of the ripgrep process itself.
pub fn print_stats(
&mut self,
total_duration: Duration,
stats: &Stats,
) -> io::Result<()> {
if self.config.json_stats {
self.printer().print_stats_json(total_duration, stats)
} else {
self.printer().print_stats(total_duration, stats)
}
}
/// Search the given subject using the appropriate strategy.
fn search_impl(&mut self, subject: &Subject) -> io::Result<SearchResult> {
let path = subject.path();

View File

@@ -83,7 +83,6 @@ impl SubjectBuilder {
return None;
}
Err(err) => {
message!("{}: {}", subj.dent.path().display(), err);
debug!(
"ignoring {}: got error: {}",
subj.dent.path().display(), err

View File

@@ -6,19 +6,3 @@ can extract a clew from a wisp of straw or a flake of cigar ash;
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.
";
pub const CODE: &'static str = "\
extern crate snap;
use std::io;
fn main() {
let stdin = io::stdin();
let stdout = io::stdout();
// Wrap the stdin reader in a Snappy reader.
let mut rdr = snap::Reader::new(stdin.lock());
let mut wtr = stdout.lock();
io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
}
";

22
tests/macros.rs Normal file
View File

@@ -0,0 +1,22 @@
#[macro_export]
macro_rules! assert_eq_nice {
($expected:expr, $got:expr) => {
let expected = &*$expected;
let got = &*$got;
if expected != got {
panic!("
printed outputs differ!
expected:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
{}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
got:
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
{}
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
", expected, got);
}
}
}

36
tests/regression.rs Normal file
View File

@@ -0,0 +1,36 @@
use hay::SHERLOCK;
use workdir::WorkDir;
// See: https://github.com/BurntSushi/ripgrep/issues/16
#[test]
fn r16() {
let (wd, mut cmd) = WorkDir::new_with("r16");
wd.create_dir(".git");
wd.create(".gitignore", "ghi/");
wd.create_dir("ghi");
wd.create_dir("def/ghi");
wd.create("ghi/toplevel.txt", "xyz");
wd.create("def/ghi/subdir.txt", "xyz");
cmd.arg("xyz");
wd.assert_err(&mut cmd);
}
// See: https://github.com/BurntSushi/ripgrep/issues/25
#[test]
fn r25() {
let (wd, mut cmd) = WorkDir::new_with("r25");
wd.create_dir(".git");
wd.create(".gitignore", "/llvm/");
wd.create_dir("src/llvm");
wd.create("src/llvm/foo", "test");
cmd.arg("test");
let lines: String = wd.stdout(&mut cmd);
assert_eq_nice!("src/llvm/foo:test\n", lines);
cmd.current_dir(wd.path().join("src"));
let lines: String = wd.stdout(&mut cmd);
assert_eq_nice!("llvm/foo:test\n", lines);
}

View File

@@ -1,18 +1,15 @@
/*!
This module contains *integration* tests. Their purpose is to test the CLI
interface. Namely, that passing a flag does what it says on the tin.
Tests for more fine grained behavior (like the search or the globber) should be
unit tests in their respective modules.
*/
#![allow(dead_code, unused_imports)]
use std::process::Command;
use workdir::WorkDir;
#[macro_use]
mod macros;
mod hay;
mod regression;
mod util;
mod workdir;
macro_rules! sherlock {
@@ -47,11 +44,14 @@ macro_rules! clean {
}
fn path(unix: &str) -> String {
unix.to_string()
/*
if cfg!(windows) {
unix.replace("/", "\\")
} else {
unix.to_string()
}
*/
}
fn paths(unix: &[&str]) -> Vec<String> {

361
tests/util.rs Normal file
View File

@@ -0,0 +1,361 @@
use std::env;
use std::error;
use std::fmt;
use std::fs::{self, File};
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::process::{self, Command};
use std::str::FromStr;
use std::sync::atomic::{ATOMIC_USIZE_INIT, AtomicUsize, Ordering};
use std::thread;
use std::time::Duration;
static TEST_DIR: &'static str = "ripgrep-tests";
static NEXT_ID: AtomicUsize = ATOMIC_USIZE_INIT;
/// Dir represents a directory in which tests should be run.
///
/// Directories are created from a global atomic counter to avoid duplicates.
#[derive(Debug)]
pub struct Dir {
/// The directory in which this test executable is running.
root: PathBuf,
/// The directory in which the test should run. If a test needs to create
/// files, they should go in here. This directory is also used as the CWD
/// for any processes created by the test.
dir: PathBuf,
}
impl Dir {
/// Create a new test working directory with the given name. The name
/// does not need to be distinct for each invocation, but should correspond
/// to a logical grouping of tests.
pub fn new(name: &str) -> Dir {
let id = NEXT_ID.fetch_add(1, Ordering::SeqCst);
let root = env::current_exe()
.unwrap()
.parent()
.expect("executable's directory")
.to_path_buf();
let dir = env::temp_dir()
.join(TEST_DIR)
.join(name)
.join(&format!("{}", id));
nice_err(&dir, repeat(|| fs::create_dir_all(&dir)));
Dir {
root: root,
dir: dir,
}
}
/// Create a new file with the given name and contents in this directory,
/// or panic on error.
pub fn create<P: AsRef<Path>>(&self, name: P, contents: &str) {
self.create_bytes(name, contents.as_bytes());
}
/// Try to create a new file with the given name and contents in this
/// directory.
pub fn try_create<P: AsRef<Path>>(
&self,
name: P,
contents: &str,
) -> io::Result<()> {
let path = self.dir.join(name);
self.try_create_bytes(path, contents.as_bytes())
}
/// Create a new file with the given name and size.
pub fn create_size<P: AsRef<Path>>(&self, name: P, filesize: u64) {
let path = self.dir.join(name);
let file = nice_err(&path, File::create(&path));
nice_err(&path, file.set_len(filesize));
}
/// Create a new file with the given name and contents in this directory,
/// or panic on error.
pub fn create_bytes<P: AsRef<Path>>(&self, name: P, contents: &[u8]) {
let path = self.dir.join(name);
nice_err(&path, self.try_create_bytes(&path, contents));
}
/// Try to create a new file with the given name and contents in this
/// directory.
fn try_create_bytes<P: AsRef<Path>>(
&self,
path: P,
contents: &[u8],
) -> io::Result<()> {
let mut file = File::create(&path)?;
file.write_all(contents)?;
file.flush()
}
/// Remove a file with the given name from this directory.
pub fn remove<P: AsRef<Path>>(&self, name: P) {
let path = self.dir.join(name);
nice_err(&path, fs::remove_file(&path));
}
/// Create a new directory with the given path (and any directories above
/// it) inside this directory.
pub fn create_dir<P: AsRef<Path>>(&self, path: P) {
let path = self.dir.join(path);
nice_err(&path, repeat(|| fs::create_dir_all(&path)));
}
/// Creates a new command that is set to use the ripgrep executable in
/// this working directory.
///
/// This also:
///
/// * Unsets the `RIPGREP_CONFIG_PATH` environment variable.
/// * Sets the `--path-separator` to `/` so that paths have the same output
/// on all systems. Tests that need to check `--path-separator` itself
/// can simply pass it again to override it.
pub fn command(&self) -> process::Command {
let mut cmd = process::Command::new(&self.bin());
cmd.env_remove("RIPGREP_CONFIG_PATH");
cmd.current_dir(&self.dir);
cmd.arg("--path-separator").arg("/");
cmd
}
/// Returns the path to the ripgrep executable.
pub fn bin(&self) -> PathBuf {
if cfg!(windows) {
self.root.join("../rg.exe")
} else {
self.root.join("../rg")
}
}
/// Returns the path to this directory.
pub fn path(&self) -> &Path {
&self.dir
}
/// Creates a directory symlink to the src with the given target name
/// in this directory.
#[cfg(not(windows))]
pub fn link_dir<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
use std::os::unix::fs::symlink;
let src = self.dir.join(src);
let target = self.dir.join(target);
let _ = fs::remove_file(&target);
nice_err(&target, symlink(&src, &target));
}
/// Creates a directory symlink to the src with the given target name
/// in this directory.
#[cfg(windows)]
pub fn link_dir<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
use std::os::windows::fs::symlink_dir;
let src = self.dir.join(src);
let target = self.dir.join(target);
let _ = fs::remove_dir(&target);
nice_err(&target, symlink_dir(&src, &target));
}
/// Creates a file symlink to the src with the given target name
/// in this directory.
#[cfg(not(windows))]
pub fn link_file<S: AsRef<Path>, T: AsRef<Path>>(
&self,
src: S,
target: T,
) {
self.link_dir(src, target);
}
/// Creates a file symlink to the src with the given target name
/// in this directory.
#[cfg(windows)]
pub fn link_file<S: AsRef<Path>, T: AsRef<Path>>(
&self,
src: S,
target: T,
) {
use std::os::windows::fs::symlink_file;
let src = self.dir.join(src);
let target = self.dir.join(target);
let _ = fs::remove_file(&target);
nice_err(&target, symlink_file(&src, &target));
}
/// Runs and captures the stdout of the given command.
///
/// If the return type could not be created from a string, then this
/// panics.
pub fn stdout<E: fmt::Debug, T: FromStr<Err=E>>(
&self,
cmd: &mut process::Command,
) -> T {
let o = self.output(cmd);
let stdout = String::from_utf8_lossy(&o.stdout);
match stdout.parse() {
Ok(t) => t,
Err(err) => {
panic!(
"could not convert from string: {:?}\n\n{}",
err,
stdout
);
}
}
}
/// Gets the output of a command. If the command failed, then this panics.
pub fn output(&self, cmd: &mut process::Command) -> process::Output {
let output = cmd.output().unwrap();
self.expect_success(cmd, output)
}
/// Pipe `input` to a command, and collect the output.
pub fn pipe(
&self,
cmd: &mut process::Command,
input: &str
) -> process::Output {
cmd.stdin(process::Stdio::piped());
cmd.stdout(process::Stdio::piped());
cmd.stderr(process::Stdio::piped());
let mut child = cmd.spawn().unwrap();
// Pipe input to child process using a separate thread to avoid
// risk of deadlock between parent and child process.
let mut stdin = child.stdin.take().expect("expected standard input");
let input = input.to_owned();
let worker = thread::spawn(move || {
write!(stdin, "{}", input)
});
let output = self.expect_success(
cmd,
child.wait_with_output().unwrap(),
);
worker.join().unwrap().unwrap();
output
}
/// If `o` is not the output of a successful process run
fn expect_success(
&self,
cmd: &process::Command,
o: process::Output
) -> process::Output {
if !o.status.success() {
let suggest =
if o.stderr.is_empty() {
"\n\nDid your search end up with no results?".to_string()
} else {
"".to_string()
};
panic!("\n\n==========\n\
command failed but expected success!\
{}\
\n\ncommand: {:?}\
\ncwd: {}\
\n\nstatus: {}\
\n\nstdout: {}\
\n\nstderr: {}\
\n\n==========\n",
suggest, cmd, self.dir.display(), o.status,
String::from_utf8_lossy(&o.stdout),
String::from_utf8_lossy(&o.stderr));
}
o
}
/// Runs the given command and asserts that it resulted in an error exit
/// code.
pub fn assert_err(&self, cmd: &mut process::Command) {
let o = cmd.output().unwrap();
if o.status.success() {
panic!(
"\n\n===== {:?} =====\n\
command succeeded but expected failure!\
\n\ncwd: {}\
\n\nstatus: {}\
\n\nstdout: {}\n\nstderr: {}\
\n\n=====\n",
cmd,
self.dir.display(),
o.status,
String::from_utf8_lossy(&o.stdout),
String::from_utf8_lossy(&o.stderr)
);
}
}
/// Runs the given command and asserts that its exit code matches expected
/// exit code.
pub fn assert_exit_code(
&self,
expected_code: i32,
cmd: &mut process::Command,
) {
let code = cmd.status().unwrap().code().unwrap();
assert_eq!(
expected_code, code,
"\n\n===== {:?} =====\n\
expected exit code did not match\
\n\nexpected: {}\
\n\nfound: {}\
\n\n=====\n",
cmd, expected_code, code
);
}
/// Runs the given command and asserts that something was printed to
/// stderr.
pub fn assert_non_empty_stderr(&self, cmd: &mut process::Command) {
let o = cmd.output().unwrap();
if o.status.success() || o.stderr.is_empty() {
panic!("\n\n===== {:?} =====\n\
command succeeded but expected failure!\
\n\ncwd: {}\
\n\nstatus: {}\
\n\nstdout: {}\n\nstderr: {}\
\n\n=====\n",
cmd, self.dir.display(), o.status,
String::from_utf8_lossy(&o.stdout),
String::from_utf8_lossy(&o.stderr));
}
}
}
/// A simple wrapper around a process::Command with some conveniences.
#[derive(Debug)]
pub struct TestCommand {
/// The dir used to launched this command.
dir: Dir,
/// The actual command we use to control the process.
cmd: Command,
}
fn nice_err<T, E: error::Error>(
path: &Path,
res: Result<T, E>,
) -> T {
match res {
Ok(t) => t,
Err(err) => panic!("{}: {:?}", path.display(), err),
}
}
fn repeat<F: FnMut() -> io::Result<()>>(mut f: F) -> io::Result<()> {
let mut last_err = None;
for _ in 0..10 {
if let Err(err) = f() {
last_err = Some(err);
thread::sleep(Duration::from_millis(500));
} else {
return Ok(());
}
}
Err(last_err.unwrap())
}

View File

@@ -48,6 +48,15 @@ impl WorkDir {
}
}
/// Like `new`, but also returns a command that whose program is configured
/// to ripgrep's executable and has its current working directory set to
/// this work dir.
pub fn new_with(name: &str) -> (WorkDir, process::Command) {
let wd = WorkDir::new(name);
let command = wd.command();
(wd, command)
}
/// Create a new file with the given name and contents in this directory,
/// or panic on error.
pub fn create<P: AsRef<Path>>(&self, name: P, contents: &str) {
@@ -106,10 +115,18 @@ impl WorkDir {
/// Creates a new command that is set to use the ripgrep executable in
/// this working directory.
///
/// This also:
///
/// * Unsets the `RIPGREP_CONFIG_PATH` environment variable.
/// * Sets the `--path-separator` to `/` so that paths have the same output
/// on all systems. Tests that need to check `--path-separator` itself
/// can simply pass it again to override it.
pub fn command(&self) -> process::Command {
let mut cmd = process::Command::new(&self.bin());
cmd.env_remove("RIPGREP_CONFIG_PATH");
cmd.current_dir(&self.dir);
cmd.arg("--path-separator").arg("/");
cmd
}