PROGRESS: tests: re-tool integration tests

ripgrep: migrate to libripgrep
libripgrep: initial commit introducing libripgrep
2025-07-31 04:02:00 -07:00 · 2018-08-07 18:38:24 -04:00 · 2018-08-07 18:38:24 -04:00 · 2018-08-07 18:23:13 -04:00 · 2018-08-06 19:20:42 -04:00 · 2018-08-06 19:20:04 -04:00
15 changed files with 697 additions and 64 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -40,7 +40,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
 name = "bytecount"
-version = "0.3.1"
+version = "0.3.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 dependencies = [
 "simd 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
@@ -181,7 +181,7 @@ dependencies = [
 name = "grep-searcher"
 version = "0.0.1"
 dependencies = [
- "bytecount 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
+ "bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
 "encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)",
 "encoding_rs_io 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "grep-matcher 0.0.1",
@@ -338,6 +338,7 @@ dependencies = [
 "num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
 "same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "serde_json 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)",
 "termcolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
 ]
@@ -519,7 +520,7 @@ dependencies = [
 "checksum atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)" = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652"
 "checksum base64 0.9.2 (registry+https://github.com/rust-lang/crates.io-index)" = "85415d2594767338a74a30c1d370b2f3262ec1b4ed2d7bba5b3faf4de40467d9"
 "checksum bitflags 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "d0c54bb8f454c567f21197eefcdbf5679d0bd99f2ddbe52e84c77061952e6789"
-"checksum bytecount 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "882585cd7ec84e902472df34a5e01891202db3bf62614e1f0afe459c1afcf744"
+"checksum bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f861d9ce359f56dbcb6e0c2a1cb84e52ad732cadb57b806adeb3c7668caccbd8"
 "checksum byteorder 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "74c0b906e9446b0a2e4f760cdb3fa4b2c48cdc6db8766a845c54b6ff063fd2e9"
 "checksum cfg-if 0.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "efe5c877e17a9c717a0bf3613b2709f723202c4e4675cc8f12926ded29bcb17e"
 "checksum clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b957d88f4b6a63b9d70d5f454ac8011819c6efa7727858f458ab71c756ce2d3e"
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -47,6 +47,7 @@ log = "0.4"
 num_cpus = "1"
 regex = "1"
 same-file = "1"
+serde_json = "1"
 termcolor = "1"

 [dependencies.clap]
@@ -56,7 +57,7 @@ features = ["suggestions", "color"]

 [target.'cfg(windows)'.dependencies.winapi]
 version = "0.3"
-features = ["std", "winnt"]
+features = ["std", "fileapi", "winnt"]

 [build-dependencies]
 lazy_static = "1"
@@ -71,4 +72,4 @@ avx-accel = ["grep/avx-accel"]
 simd-accel = ["grep/simd-accel"]

 [profile.release]
-debug = true
+debug = 1
--- a/grep-printer/src/standard.rs
+++ b/grep-printer/src/standard.rs
@@ -1014,7 +1014,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
                line = line.with_end(line.end() - 1);
            }
            if self.config().trim_ascii {
-                line = trim_ascii_prefix_range(bytes, line);
+                line = self.trim_ascii_prefix_range(bytes, line);
            }

            while !line.is_empty() {
@@ -1058,7 +1058,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
                line = line.with_end(line.end() - 1);
            }
            if self.config().trim_ascii {
-                line = trim_ascii_prefix_range(bytes, line);
+                line = self.trim_ascii_prefix_range(bytes, line);
            }
            while !line.is_empty() {
                if matches[midx].end() <= line.start() {
@@ -1127,7 +1127,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
                    line = line.with_end(line.end() - 1);
                }
                if self.config().trim_ascii {
-                    line = trim_ascii_prefix_range(bytes, line);
+                    line = self.trim_ascii_prefix_range(bytes, line);
                }

                while !line.is_empty() {
@@ -1208,7 +1208,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
            if !self.config().trim_ascii {
                0
            } else {
-                trim_ascii_prefix_range(
+                self.trim_ascii_prefix_range(
                    line,
                    Match::new(0, line.len()),
                ).start()
@@ -1369,7 +1369,7 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
        if !self.config().trim_ascii {
            return self.write(buf);
        }
-        self.write(trim_ascii_prefix(buf))
+        self.write(self.trim_ascii_prefix(buf))
    }

    fn write(&self, buf: &[u8]) -> io::Result<()> {
@@ -1425,6 +1425,21 @@ impl<'a, M: Matcher, W: WriteColor> StandardImpl<'a, M, W> {
    fn multi_line(&self) -> bool {
        self.searcher.multi_line_with_matcher(&self.sink.matcher)
    }
+
+    /// Trim prefix ASCII spaces from the given slice and return the
+    /// corresponding range.
+    ///
+    /// This stops trimming a prefix as soon as it sees non-whitespace or a
+    /// line terminator.
+    fn trim_ascii_prefix_range(&self, slice: &[u8], range: Match) -> Match {
+        trim_ascii_prefix_range(self.searcher.line_terminator(), slice, range)
+    }
+
+    /// Trim prefix ASCII spaces from the given slice and return the
+    /// corresponding sub-slice.
+    fn trim_ascii_prefix<'s>(&self, slice: &'s [u8]) -> &'s [u8] {
+        trim_ascii_prefix(self.searcher.line_terminator(), slice)
+    }
 }

 #[cfg(test)]
@@ -1987,6 +2002,31 @@ Watson
        assert_eq_printed!(expected, got);
    }

+    #[test]
+    fn trim_ascii_with_line_term() {
+        let matcher = RegexMatcher::new("Watson").unwrap();
+        let mut printer = StandardBuilder::new()
+            .trim_ascii(true)
+            .build(NoColor::new(vec![]));
+        SearcherBuilder::new()
+            .line_number(true)
+            .before_context(1)
+            .build()
+            .search_reader(
+                &matcher,
+                "\n   Watson".as_bytes(),
+                printer.sink(&matcher),
+            )
+            .unwrap();
+
+        let got = printer_contents(&mut printer);
+        let expected = "\
+1-
+2:Watson
+";
+        assert_eq_printed!(expected, got);
+    }
+
    #[test]
    fn line_number() {
        let matcher = RegexMatcher::new("Watson").unwrap();
--- a/grep-printer/src/util.rs
+++ b/grep-printer/src/util.rs
@@ -4,7 +4,7 @@ use std::io;
 use std::path::Path;
 use std::time;

-use grep_matcher::{Captures, Match, Matcher};
+use grep_matcher::{Captures, LineTerminator, Match, Matcher};
 use grep_searcher::{
    LineIter,
    SinkError, SinkContext, SinkContextKind, SinkMatch,
@@ -317,7 +317,7 @@ pub struct NiceDuration(pub time::Duration);

 impl fmt::Display for NiceDuration {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        write!(f, "{:0.4}s", self.fractional_seconds())
+        write!(f, "{:0.6}s", self.fractional_seconds())
    }
 }

@@ -346,21 +346,37 @@ impl Serialize for NiceDuration {

 /// Trim prefix ASCII spaces from the given slice and return the corresponding
 /// range.
-pub fn trim_ascii_prefix_range(slice: &[u8], range: Match) -> Match {
-    fn is_space(b: &&u8) -> bool {
-        match **b {
+///
+/// This stops trimming a prefix as soon as it sees non-whitespace or a line
+/// terminator.
+pub fn trim_ascii_prefix_range(
+    line_term: LineTerminator,
+    slice: &[u8],
+    range: Match,
+) -> Match {
+    fn is_space(b: u8) -> bool {
+        match b {
            b'\t' | b'\n' | b'\x0B' | b'\x0C' | b'\r' | b' ' => true,
            _ => false,
        }
    }

-    let count = slice[range].iter().take_while(is_space).count();
+    let count = slice[range]
+        .iter()
+        .take_while(|&&b| -> bool {
+            is_space(b) && !line_term.as_bytes().contains(&b)
+        })
+        .count();
    range.with_start(range.start() + count)
 }

 /// Trim prefix ASCII spaces from the given slice and return the corresponding
 /// sub-slice.
-pub fn trim_ascii_prefix(slice: &[u8]) -> &[u8] {
-    let range = trim_ascii_prefix_range(slice, Match::new(0, slice.len()));
+pub fn trim_ascii_prefix(line_term: LineTerminator, slice: &[u8]) -> &[u8] {
+    let range = trim_ascii_prefix_range(
+        line_term,
+        slice,
+        Match::new(0, slice.len()),
+    );
    &slice[range]
 }
--- a/src/app.rs
+++ b/src/app.rs
@@ -518,6 +518,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
    flag_ignore_case(&mut args);
    flag_ignore_file(&mut args);
    flag_invert_match(&mut args);
+    flag_json(&mut args);
    flag_line_number(&mut args);
    flag_line_regexp(&mut args);
    flag_max_columns(&mut args);
@@ -549,6 +550,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
    flag_stats(&mut args);
    flag_text(&mut args);
    flag_threads(&mut args);
+    flag_trim(&mut args);
    flag_type(&mut args);
    flag_type_add(&mut args);
    flag_type_clear(&mut args);
@@ -1085,6 +1087,63 @@ Invert matching. Show lines that do not match the given patterns.
    args.push(arg);
 }

+fn flag_json(args: &mut Vec<RGArg>) {
+    const SHORT: &str = "Show search results in a JSON Lines format.";
+    const LONG: &str = long!("\
+Enable printing results in a JSON Lines format.
+
+When this flag is provided, ripgrep will emit a sequence of messages, each
+encoded as a JSON object, where there are five different message types:
+
+**begin** - A message that indicates a file is being searched and contains at
+least one match.
+
+**end** - A message the indicates a file is done being searched. This message
+also include summary statistics about the search for a particular file.
+
+**match** - A message that indicates a match was found. This includes the text
+and offsets of the match.
+
+**context** - A message that indicates a contextual line was found. This
+includes the text of the line, along with any match information if the search
+was inverted.
+
+**summary** - The final message emitted by ripgrep that contains summary
+statistics about the search across all files.
+
+Since file paths or the contents of files are not guaranteed to be valid UTF-8
+and JSON itself must be valid UTF-8, ripgrep will emit all data elements as
+objects with one of two keys: 'text' or 'bytes'. 'text' is a normal JSON string
+the data is valid UTF-8 while 'bytes' is the base64 encoded contents of the
+bytes.
+
+The JSON Lines format is only supported for showing search results. It cannot
+be used with other flags that emit other types of output, such as --files,
+--files-with-matches, --files-without-match, --count or --count-matches.
+ripgrep will report an error if any of the aforementioned flags are used in
+concert with --json.
+
+Other flags that control aspects of the standard output such as
+--only-matching, --heading, --replace, --max-columns, etc., have no effect
+when --json is set.
+
+The JSON Lines format can be disabled with --no-json.
+");
+    let arg = RGArg::switch("json")
+        .help(SHORT).long_help(LONG)
+        .overrides("no-json")
+        .conflicts(&[
+            "count", "count-matches",
+            "files", "files-with-matches", "files-without-match",
+        ]);
+    args.push(arg);
+
+    let arg = RGArg::switch("no-json")
+        .hidden()
+        .overrides("json");
+    args.push(arg);
+}
+
 fn flag_line_number(args: &mut Vec<RGArg>) {
    const SHORT: &str = "Show line numbers.";
    const LONG: &str = long!("\
@@ -1219,7 +1278,7 @@ Enable matching across multiple lines.

 This flag can be disabled with --no-multiline.
 ");
-    let arg = RGArg::switch("multiline")
+    let arg = RGArg::switch("multiline").short("U")
        .help(SHORT).long_help(LONG)
        .overrides("no-multiline");
    args.push(arg);
@@ -1621,11 +1680,18 @@ searched, and the time taken for the entire search to complete.
 This set of aggregate statistics may expand over time.

 Note that this flag has no effect if --files, --files-with-matches or
--files-without-match is passed.");
+--files-without-match is passed.

+This flag can be disabled with --no-stats.
+");
    let arg = RGArg::switch("stats")
-        .help(SHORT).long_help(LONG);
+        .help(SHORT).long_help(LONG)
+        .overrides("no-stats");
+    args.push(arg);

+    let arg = RGArg::switch("no-stats")
+        .help(SHORT).long_help(LONG)
+        .overrides("stats");
    args.push(arg);
 }

@@ -1668,6 +1734,25 @@ causes ripgrep to choose the thread count using heuristics.
    args.push(arg);
 }

+fn flag_trim(args: &mut Vec<RGArg>) {
+    const SHORT: &str = "Trim prefixed whitespace from matches.";
+    const LONG: &str = long!("\
+When set, all ASCII whitespace at the beginning of each line printed will be
+trimmed.
+
+This flag can be disabled with --no-trim.
+");
+    let arg = RGArg::switch("trim")
+        .help(SHORT).long_help(LONG)
+        .overrides("no-trim");
+    args.push(arg);
+
+    let arg = RGArg::switch("no-trim")
+        .help(SHORT).long_help(LONG)
+        .overrides("trim");
+    args.push(arg);
+}
+
 fn flag_type(args: &mut Vec<RGArg>) {
    const SHORT: &str = "Only search files matching TYPE.";
    const LONG: &str = long!("\
--- a/src/args.rs
+++ b/src/args.rs
@@ -275,6 +275,7 @@ impl Args {
        let searcher = self.matches().searcher(self.paths())?;
        let mut builder = SearchWorkerBuilder::new();
        builder
+            .json_stats(self.matches().is_present("json"))
            .preprocessor(self.matches().preprocessor())
            .search_zip(self.matches().is_present("search-zip"));
        Ok(builder.build(matcher, searcher, printer))
@@ -490,7 +491,7 @@ impl ArgMatches {
            .max_matches(self.max_count()?)
            .column(self.column())
            .byte_offset(self.is_present("byte-offset"))
-            .trim_ascii(false)
+            .trim_ascii(self.is_present("trim"))
            .separator_search(None)
            .separator_context(Some(self.context_separator()))
            .separator_field_match(b":".to_vec())
@@ -889,13 +890,21 @@ impl ArgMatches {

    /// Determine the type of output we should produce.
    fn output_kind(&self) -> OutputKind {
+        if self.is_present("quiet") {
+            // While we don't technically print results (or aggregate results)
+            // in quiet mode, we still support the --stats flag, and those
+            // stats are computed by the Summary printer for now.
+            return OutputKind::Summary;
+        } else if self.is_present("json") {
+            return OutputKind::JSON;
+        }
+
        let (count, count_matches) = self.counts();
        let summary =
            count
            || count_matches
            || self.is_present("files-with-matches")
-            || self.is_present("files-without-match")
-            || self.is_present("quiet");
+            || self.is_present("files-without-match");
        if summary {
            OutputKind::Summary
        } else {
@@ -1378,7 +1387,15 @@ fn stdin_is_readable() -> bool {
 /// Returns true if and only if stdin is deemed searchable.
 #[cfg(windows)]
 fn stdin_is_readable() -> bool {
-    // On Windows, it's not clear what the possibilities are to me, so just
-    // always return true.
-    true
+    use std::os::windows::io::AsRawHandle;
+    use winapi::um::fileapi::GetFileType;
+    use winapi::um::winbase::{FILE_TYPE_DISK, FILE_TYPE_PIPE};
+
+    let handle = match Handle::stdin() {
+        Err(_) => return false,
+        Ok(handle) => handle,
+    };
+    let raw_handle = handle.as_raw_handle();
+    let ft = unsafe { GetFileType(raw_handle) };
+    ft == FILE_TYPE_DISK || ft == FILE_TYPE_PIPE
 }
--- a/src/main.rs
+++ b/src/main.rs
@@ -11,6 +11,8 @@ extern crate log;
 extern crate num_cpus;
 extern crate regex;
 extern crate same_file;
+#[macro_use]
+extern crate serde_json;
 extern crate termcolor;
 #[cfg(windows)]
 extern crate winapi;
@@ -103,7 +105,7 @@ fn search(args: Args) -> Result<bool> {
    if let Some(ref stats) = stats {
        let elapsed = Instant::now().duration_since(started_at);
        // We don't care if we couldn't print this successfully.
-        let _ = searcher.printer().print_stats(elapsed, stats);
+        let _ = searcher.print_stats(elapsed, stats);
    }
    Ok(matched)
 }
@@ -181,7 +183,7 @@ fn search_parallel(args: Args) -> Result<bool> {
        let stats = locked_stats.lock().unwrap();
        let mut searcher = args.search_worker(args.stdout())?;
        // We don't care if we couldn't print this successfully.
-        let _ = searcher.printer().print_stats(elapsed, &stats);
+        let _ = searcher.print_stats(elapsed, &stats);
    }
    Ok(matched.load(SeqCst))
 }
--- a/src/search.rs
+++ b/src/search.rs
@@ -6,6 +6,7 @@ use grep::matcher::Matcher;
 use grep::printer::{JSON, Standard, Summary, Stats};
 use grep::regex::RegexMatcher;
 use grep::searcher::Searcher;
+use serde_json as json;
 use termcolor::WriteColor;

 use decompressor::{DecompressionReader, is_compressed};
@@ -17,6 +18,7 @@ use subject::Subject;
 /// at a very high level.
 #[derive(Clone, Debug)]
 struct Config {
+    json_stats: bool,
    preprocessor: Option<PathBuf>,
    search_zip: bool,
 }
@@ -24,6 +26,7 @@ struct Config {
 impl Default for Config {
    fn default() -> Config {
        Config {
+            json_stats: false,
            preprocessor: None,
            search_zip: false,
        }
@@ -60,6 +63,18 @@ impl SearchWorkerBuilder {
        SearchWorker { config, matcher, searcher, printer }
    }

+    /// Forcefully use JSON to emit statistics, even if the underlying printer
+    /// is not the JSON printer.
+    ///
+    /// This is useful for implementing flag combinations like
+    /// `--json --quiet`, which uses the summary printer for implementing
+    /// `--quiet` but still wants to emit summary statistics, which should
+    /// be JSON formatted because of the `--json` flag.
+    pub fn json_stats(&mut self, yes: bool) -> &mut SearchWorkerBuilder {
+        self.config.json_stats = yes;
+        self
+    }
+
    /// Set the path to a preprocessor command.
    ///
    /// When this is set, instead of searching files directly, the given
@@ -134,19 +149,15 @@ pub enum Printer<W> {
 }

 impl<W: WriteColor> Printer<W> {
-    /// Print the given statistics to the underlying writer in a way that is
-    /// consistent with this printer's format.
-    ///
-    /// While `Stats` contains a duration itself, this only corresponds to the
-    /// time spent searching, where as `total_duration` should roughly
-    /// approximate the lifespan of the ripgrep process itself.
-    pub fn print_stats(
+    fn print_stats(
        &mut self,
        total_duration: Duration,
        stats: &Stats,
    ) -> io::Result<()> {
        match *self {
-            Printer::JSON(_) => unimplemented!(),
+            Printer::JSON(_) => {
+                self.print_stats_json(total_duration, stats)
+            }
            Printer::Standard(_) | Printer::Summary(_) => {
                self.print_stats_human(total_duration, stats)
            }
@@ -167,8 +178,8 @@ impl<W: WriteColor> Printer<W> {
 {searches} files searched
 {bytes_printed} bytes printed
 {bytes_searched} bytes searched
-{search_time:.6} seconds spent searching
-{process_time:.6} seconds
+{search_time:0.6} seconds spent searching
+{process_time:0.6} seconds
 ",
            matches = stats.matches(),
            lines = stats.matched_lines(),
@@ -181,6 +192,29 @@ impl<W: WriteColor> Printer<W> {
        )
    }

+    fn print_stats_json(
+        &mut self,
+        total_duration: Duration,
+        stats: &Stats,
+    ) -> io::Result<()> {
+        // We specifically match the format laid out by the JSON printer in
+        // the grep-printer crate. We simply "extend" it with the 'summary'
+        // message type.
+        let fractional = fractional_seconds(total_duration);
+        json::to_writer(self.get_mut(), &json!({
+            "type": "summary",
+            "data": {
+                "stats": stats,
+                "elapsed_total": {
+                    "secs": total_duration.as_secs(),
+                    "nanos": total_duration.subsec_nanos(),
+                    "human": format!("{:0.6}s", fractional),
+                },
+            }
+        }))?;
+        write!(self.get_mut(), "\n")
+    }
+
    /// Return a mutable reference to the underlying printer's writer.
    pub fn get_mut(&mut self) -> &mut W {
        match *self {
@@ -215,6 +249,24 @@ impl<W: WriteColor> SearchWorker<W> {
        &mut self.printer
    }

+    /// Print the given statistics to the underlying writer in a way that is
+    /// consistent with this searcher's printer's format.
+    ///
+    /// While `Stats` contains a duration itself, this only corresponds to the
+    /// time spent searching, where as `total_duration` should roughly
+    /// approximate the lifespan of the ripgrep process itself.
+    pub fn print_stats(
+        &mut self,
+        total_duration: Duration,
+        stats: &Stats,
+    ) -> io::Result<()> {
+        if self.config.json_stats {
+            self.printer().print_stats_json(total_duration, stats)
+        } else {
+            self.printer().print_stats(total_duration, stats)
+        }
+    }
+
    /// Search the given subject using the appropriate strategy.
    fn search_impl(&mut self, subject: &Subject) -> io::Result<SearchResult> {
        let path = subject.path();
--- a/src/subject.rs
+++ b/src/subject.rs
@@ -83,7 +83,6 @@ impl SubjectBuilder {
                    return None;
                }
                Err(err) => {
-                    message!("{}: {}", subj.dent.path().display(), err);
                    debug!(
                        "ignoring {}: got error: {}",
                        subj.dent.path().display(), err
--- a/tests/hay.rs
+++ b/tests/hay.rs
@@ -6,19 +6,3 @@ can extract a clew from a wisp of straw or a flake of cigar ash;
 but Doctor Watson has to have it taken out for him and dusted,
 and exhibited clearly, with a label attached.
 ";
-
-pub const CODE: &'static str = "\
-extern crate snap;
-
-use std::io;
-
-fn main() {
-    let stdin = io::stdin();
-    let stdout = io::stdout();
-
-    // Wrap the stdin reader in a Snappy reader.
-    let mut rdr = snap::Reader::new(stdin.lock());
-    let mut wtr = stdout.lock();
-    io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\");
-}
-";
--- a/tests/macros.rs
+++ b/tests/macros.rs
@@ -0,0 +1,22 @@
+#[macro_export]
+macro_rules! assert_eq_nice {
+    ($expected:expr, $got:expr) => {
+        let expected = &*$expected;
+        let got = &*$got;
+        if expected != got {
+            panic!("
+printed outputs differ!
+
+expected:
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+{}
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+got:
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+{}
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+", expected, got);
+        }
+    }
+}
--- a/tests/regression.rs
+++ b/tests/regression.rs
@@ -0,0 +1,36 @@
+use hay::SHERLOCK;
+use workdir::WorkDir;
+
+// See: https://github.com/BurntSushi/ripgrep/issues/16
+#[test]
+fn r16() {
+    let (wd, mut cmd) = WorkDir::new_with("r16");
+    wd.create_dir(".git");
+    wd.create(".gitignore", "ghi/");
+    wd.create_dir("ghi");
+    wd.create_dir("def/ghi");
+    wd.create("ghi/toplevel.txt", "xyz");
+    wd.create("def/ghi/subdir.txt", "xyz");
+
+    cmd.arg("xyz");
+    wd.assert_err(&mut cmd);
+}
+
+// See: https://github.com/BurntSushi/ripgrep/issues/25
+#[test]
+fn r25() {
+    let (wd, mut cmd) = WorkDir::new_with("r25");
+    wd.create_dir(".git");
+    wd.create(".gitignore", "/llvm/");
+    wd.create_dir("src/llvm");
+    wd.create("src/llvm/foo", "test");
+
+    cmd.arg("test");
+
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq_nice!("src/llvm/foo:test\n", lines);
+
+    cmd.current_dir(wd.path().join("src"));
+    let lines: String = wd.stdout(&mut cmd);
+    assert_eq_nice!("llvm/foo:test\n", lines);
+}
--- a/tests/tests.rs
+++ b/tests/tests.rs
@@ -1,18 +1,15 @@
-/*!
-This module contains *integration* tests. Their purpose is to test the CLI
-interface. Namely, that passing a flag does what it says on the tin.
-
-Tests for more fine grained behavior (like the search or the globber) should be
-unit tests in their respective modules.
-*/
-
 #![allow(dead_code, unused_imports)]

 use std::process::Command;

 use workdir::WorkDir;

+#[macro_use]
+mod macros;
+
 mod hay;
+mod regression;
+mod util;
 mod workdir;

 macro_rules! sherlock {
@@ -47,11 +44,14 @@ macro_rules! clean {
 }

 fn path(unix: &str) -> String {
+    unix.to_string()
+    /*
    if cfg!(windows) {
        unix.replace("/", "\\")
    } else {
        unix.to_string()
    }
+    */
 }

 fn paths(unix: &[&str]) -> Vec<String> {
--- a/tests/util.rs
+++ b/tests/util.rs
@@ -0,0 +1,361 @@
+use std::env;
+use std::error;
+use std::fmt;
+use std::fs::{self, File};
+use std::io::{self, Write};
+use std::path::{Path, PathBuf};
+use std::process::{self, Command};
+use std::str::FromStr;
+use std::sync::atomic::{ATOMIC_USIZE_INIT, AtomicUsize, Ordering};
+use std::thread;
+use std::time::Duration;
+
+static TEST_DIR: &'static str = "ripgrep-tests";
+static NEXT_ID: AtomicUsize = ATOMIC_USIZE_INIT;
+
+/// Dir represents a directory in which tests should be run.
+///
+/// Directories are created from a global atomic counter to avoid duplicates.
+#[derive(Debug)]
+pub struct Dir {
+    /// The directory in which this test executable is running.
+    root: PathBuf,
+    /// The directory in which the test should run. If a test needs to create
+    /// files, they should go in here. This directory is also used as the CWD
+    /// for any processes created by the test.
+    dir: PathBuf,
+}
+
+impl Dir {
+    /// Create a new test working directory with the given name. The name
+    /// does not need to be distinct for each invocation, but should correspond
+    /// to a logical grouping of tests.
+    pub fn new(name: &str) -> Dir {
+        let id = NEXT_ID.fetch_add(1, Ordering::SeqCst);
+        let root = env::current_exe()
+            .unwrap()
+            .parent()
+            .expect("executable's directory")
+            .to_path_buf();
+        let dir = env::temp_dir()
+            .join(TEST_DIR)
+            .join(name)
+            .join(&format!("{}", id));
+        nice_err(&dir, repeat(|| fs::create_dir_all(&dir)));
+        Dir {
+            root: root,
+            dir: dir,
+        }
+    }
+
+    /// Create a new file with the given name and contents in this directory,
+    /// or panic on error.
+    pub fn create<P: AsRef<Path>>(&self, name: P, contents: &str) {
+        self.create_bytes(name, contents.as_bytes());
+    }
+
+    /// Try to create a new file with the given name and contents in this
+    /// directory.
+    pub fn try_create<P: AsRef<Path>>(
+        &self,
+        name: P,
+        contents: &str,
+    ) -> io::Result<()> {
+        let path = self.dir.join(name);
+        self.try_create_bytes(path, contents.as_bytes())
+    }
+
+    /// Create a new file with the given name and size.
+    pub fn create_size<P: AsRef<Path>>(&self, name: P, filesize: u64) {
+        let path = self.dir.join(name);
+        let file = nice_err(&path, File::create(&path));
+        nice_err(&path, file.set_len(filesize));
+    }
+
+    /// Create a new file with the given name and contents in this directory,
+    /// or panic on error.
+    pub fn create_bytes<P: AsRef<Path>>(&self, name: P, contents: &[u8]) {
+        let path = self.dir.join(name);
+        nice_err(&path, self.try_create_bytes(&path, contents));
+    }
+
+    /// Try to create a new file with the given name and contents in this
+    /// directory.
+    fn try_create_bytes<P: AsRef<Path>>(
+        &self,
+        path: P,
+        contents: &[u8],
+    ) -> io::Result<()> {
+        let mut file = File::create(&path)?;
+        file.write_all(contents)?;
+        file.flush()
+    }
+
+    /// Remove a file with the given name from this directory.
+    pub fn remove<P: AsRef<Path>>(&self, name: P) {
+        let path = self.dir.join(name);
+        nice_err(&path, fs::remove_file(&path));
+    }
+
+    /// Create a new directory with the given path (and any directories above
+    /// it) inside this directory.
+    pub fn create_dir<P: AsRef<Path>>(&self, path: P) {
+        let path = self.dir.join(path);
+        nice_err(&path, repeat(|| fs::create_dir_all(&path)));
+    }
+
+    /// Creates a new command that is set to use the ripgrep executable in
+    /// this working directory.
+    ///
+    /// This also:
+    ///
+    /// * Unsets the `RIPGREP_CONFIG_PATH` environment variable.
+    /// * Sets the `--path-separator` to `/` so that paths have the same output
+    ///   on all systems. Tests that need to check `--path-separator` itself
+    ///   can simply pass it again to override it.
+    pub fn command(&self) -> process::Command {
+        let mut cmd = process::Command::new(&self.bin());
+        cmd.env_remove("RIPGREP_CONFIG_PATH");
+        cmd.current_dir(&self.dir);
+        cmd.arg("--path-separator").arg("/");
+        cmd
+    }
+
+    /// Returns the path to the ripgrep executable.
+    pub fn bin(&self) -> PathBuf {
+        if cfg!(windows) {
+            self.root.join("../rg.exe")
+        } else {
+            self.root.join("../rg")
+        }
+    }
+
+    /// Returns the path to this directory.
+    pub fn path(&self) -> &Path {
+        &self.dir
+    }
+
+    /// Creates a directory symlink to the src with the given target name
+    /// in this directory.
+    #[cfg(not(windows))]
+    pub fn link_dir<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
+        use std::os::unix::fs::symlink;
+        let src = self.dir.join(src);
+        let target = self.dir.join(target);
+        let _ = fs::remove_file(&target);
+        nice_err(&target, symlink(&src, &target));
+    }
+
+    /// Creates a directory symlink to the src with the given target name
+    /// in this directory.
+    #[cfg(windows)]
+    pub fn link_dir<S: AsRef<Path>, T: AsRef<Path>>(&self, src: S, target: T) {
+        use std::os::windows::fs::symlink_dir;
+        let src = self.dir.join(src);
+        let target = self.dir.join(target);
+        let _ = fs::remove_dir(&target);
+        nice_err(&target, symlink_dir(&src, &target));
+    }
+
+    /// Creates a file symlink to the src with the given target name
+    /// in this directory.
+    #[cfg(not(windows))]
+    pub fn link_file<S: AsRef<Path>, T: AsRef<Path>>(
+        &self,
+        src: S,
+        target: T,
+    ) {
+        self.link_dir(src, target);
+    }
+
+    /// Creates a file symlink to the src with the given target name
+    /// in this directory.
+    #[cfg(windows)]
+    pub fn link_file<S: AsRef<Path>, T: AsRef<Path>>(
+        &self,
+        src: S,
+        target: T,
+    ) {
+        use std::os::windows::fs::symlink_file;
+        let src = self.dir.join(src);
+        let target = self.dir.join(target);
+        let _ = fs::remove_file(&target);
+        nice_err(&target, symlink_file(&src, &target));
+    }
+
+    /// Runs and captures the stdout of the given command.
+    ///
+    /// If the return type could not be created from a string, then this
+    /// panics.
+    pub fn stdout<E: fmt::Debug, T: FromStr<Err=E>>(
+        &self,
+        cmd: &mut process::Command,
+    ) -> T {
+        let o = self.output(cmd);
+        let stdout = String::from_utf8_lossy(&o.stdout);
+        match stdout.parse() {
+            Ok(t) => t,
+            Err(err) => {
+                panic!(
+                    "could not convert from string: {:?}\n\n{}",
+                    err,
+                    stdout
+                );
+            }
+        }
+    }
+
+    /// Gets the output of a command. If the command failed, then this panics.
+    pub fn output(&self, cmd: &mut process::Command) -> process::Output {
+        let output = cmd.output().unwrap();
+        self.expect_success(cmd, output)
+    }
+
+    /// Pipe `input` to a command, and collect the output.
+    pub fn pipe(
+        &self,
+        cmd: &mut process::Command,
+        input: &str
+    ) -> process::Output {
+        cmd.stdin(process::Stdio::piped());
+        cmd.stdout(process::Stdio::piped());
+        cmd.stderr(process::Stdio::piped());
+
+        let mut child = cmd.spawn().unwrap();
+
+        // Pipe input to child process using a separate thread to avoid
+        // risk of deadlock between parent and child process.
+        let mut stdin = child.stdin.take().expect("expected standard input");
+        let input = input.to_owned();
+        let worker = thread::spawn(move || {
+            write!(stdin, "{}", input)
+        });
+
+        let output = self.expect_success(
+            cmd,
+            child.wait_with_output().unwrap(),
+        );
+        worker.join().unwrap().unwrap();
+        output
+    }
+
+    /// If `o` is not the output of a successful process run
+    fn expect_success(
+        &self,
+        cmd: &process::Command,
+        o: process::Output
+    ) -> process::Output {
+        if !o.status.success() {
+            let suggest =
+                if o.stderr.is_empty() {
+                    "\n\nDid your search end up with no results?".to_string()
+                } else {
+                    "".to_string()
+                };
+
+            panic!("\n\n==========\n\
+                    command failed but expected success!\
+                    {}\
+                    \n\ncommand: {:?}\
+                    \ncwd: {}\
+                    \n\nstatus: {}\
+                    \n\nstdout: {}\
+                    \n\nstderr: {}\
+                    \n\n==========\n",
+                   suggest, cmd, self.dir.display(), o.status,
+                   String::from_utf8_lossy(&o.stdout),
+                   String::from_utf8_lossy(&o.stderr));
+        }
+        o
+    }
+
+    /// Runs the given command and asserts that it resulted in an error exit
+    /// code.
+    pub fn assert_err(&self, cmd: &mut process::Command) {
+        let o = cmd.output().unwrap();
+        if o.status.success() {
+            panic!(
+                "\n\n===== {:?} =====\n\
+                 command succeeded but expected failure!\
+                 \n\ncwd: {}\
+                 \n\nstatus: {}\
+                 \n\nstdout: {}\n\nstderr: {}\
+                 \n\n=====\n",
+                cmd,
+                self.dir.display(),
+                o.status,
+                String::from_utf8_lossy(&o.stdout),
+                String::from_utf8_lossy(&o.stderr)
+            );
+        }
+    }
+
+    /// Runs the given command and asserts that its exit code matches expected
+    /// exit code.
+    pub fn assert_exit_code(
+        &self,
+        expected_code: i32,
+        cmd: &mut process::Command,
+    ) {
+        let code = cmd.status().unwrap().code().unwrap();
+
+        assert_eq!(
+            expected_code, code,
+            "\n\n===== {:?} =====\n\
+             expected exit code did not match\
+             \n\nexpected: {}\
+             \n\nfound: {}\
+             \n\n=====\n",
+            cmd, expected_code, code
+        );
+    }
+
+    /// Runs the given command and asserts that something was printed to
+    /// stderr.
+    pub fn assert_non_empty_stderr(&self, cmd: &mut process::Command) {
+        let o = cmd.output().unwrap();
+        if o.status.success() || o.stderr.is_empty() {
+            panic!("\n\n===== {:?} =====\n\
+                    command succeeded but expected failure!\
+                    \n\ncwd: {}\
+                    \n\nstatus: {}\
+                    \n\nstdout: {}\n\nstderr: {}\
+                    \n\n=====\n",
+                   cmd, self.dir.display(), o.status,
+                   String::from_utf8_lossy(&o.stdout),
+                   String::from_utf8_lossy(&o.stderr));
+        }
+    }
+}
+
+/// A simple wrapper around a process::Command with some conveniences.
+#[derive(Debug)]
+pub struct TestCommand {
+    /// The dir used to launched this command.
+    dir: Dir,
+    /// The actual command we use to control the process.
+    cmd: Command,
+}
+
+fn nice_err<T, E: error::Error>(
+    path: &Path,
+    res: Result<T, E>,
+) -> T {
+    match res {
+        Ok(t) => t,
+        Err(err) => panic!("{}: {:?}", path.display(), err),
+    }
+}
+
+fn repeat<F: FnMut() -> io::Result<()>>(mut f: F) -> io::Result<()> {
+    let mut last_err = None;
+    for _ in 0..10 {
+        if let Err(err) = f() {
+            last_err = Some(err);
+            thread::sleep(Duration::from_millis(500));
+        } else {
+            return Ok(());
+        }
+    }
+    Err(last_err.unwrap())
+}
--- a/tests/workdir.rs
+++ b/tests/workdir.rs
@@ -48,6 +48,15 @@ impl WorkDir {
        }
    }

+    /// Like `new`, but also returns a command that whose program is configured
+    /// to ripgrep's executable and has its current working directory set to
+    /// this work dir.
+    pub fn new_with(name: &str) -> (WorkDir, process::Command) {
+        let wd = WorkDir::new(name);
+        let command = wd.command();
+        (wd, command)
+    }
+
    /// Create a new file with the given name and contents in this directory,
    /// or panic on error.
    pub fn create<P: AsRef<Path>>(&self, name: P, contents: &str) {
@@ -106,10 +115,18 @@ impl WorkDir {

    /// Creates a new command that is set to use the ripgrep executable in
    /// this working directory.
+    ///
+    /// This also:
+    ///
+    /// * Unsets the `RIPGREP_CONFIG_PATH` environment variable.
+    /// * Sets the `--path-separator` to `/` so that paths have the same output
+    ///   on all systems. Tests that need to check `--path-separator` itself
+    ///   can simply pass it again to override it.
    pub fn command(&self) -> process::Command {
        let mut cmd = process::Command::new(&self.bin());
        cmd.env_remove("RIPGREP_CONFIG_PATH");
        cmd.current_dir(&self.dir);
+        cmd.arg("--path-separator").arg("/");
        cmd
    }
Author	SHA1	Message	Date
Andrew Gallant	dc9cb42ee8	PROGRESS: tests: re-tool integration tests	2018-08-07 18:38:24 -04:00
Andrew Gallant	584ef9ef34	ripgrep: migrate to libripgrep	2018-08-07 18:38:24 -04:00
Andrew Gallant	4bce2dff5d	libripgrep: initial commit introducing libripgrep libripgrep is not any one library, but rather, a collection of libraries that roughly separate the following key distinct phases in a grep implementation: 1. Pattern matching (e.g., by a regex engine). 2. Searching a file using a pattern matcher. 3. Printing results. Ultimately, both (1) and (3) are defined by de-coupled interfaces, of which there may be multiple implementations. Namely, (1) is satisfied by the `Matcher` trait in the `grep-matcher` crate and (3) is satisfied by the `Sink` trait in the `grep2` crate. The searcher (2) ties everything together and finds results using a matcher and reports those results using a `Sink` implementation.	2018-08-07 18:23:13 -04:00
Andrew Gallant	545db65cbc	ci: test libripgrep	2018-08-06 19:20:42 -04:00
Andrew Gallant	4869e6972b	grep: remove senseless test It was pulling in a sizable data file and doesn't appear to be testing anything meaningful that isn't covered by a variety of other tests.	2018-08-06 19:20:04 -04:00
llogiq	ad9befbc1d	deps: update bytecount to 0.3.2 PR #1003	2018-08-06 06:44:16 -04:00