[ignore] Add extensive test for gitignore matching (#551)

[ignore] tests and new matched_path_or_any_parents method

The test data (gitignore rules and expected result) is based on the test
repo at <https://github.com/behnam/gitignore-test>.

The new `matched_path_or_any_parents` method fixes a bug
in gitignore matching where rules of form `<dir>/*` result in ignoring
only first-level files, but no deep files. This is not correct, as `<dir>/*`
matches the first-level directories under `<dir>`, resulting all to be
ignored. The new method fixes it by trying to match all parents in the
path against the gitignore rules.

The new method is necessary because it necessarily entails a
performance hit for trying to match all parents.
This commit is contained in:
Behnam Esfahbod ✅
2017-07-12 20:06:08 -06:00
committed by Andrew Gallant
parent aeac85389d
commit 84f4b4ef68
5 changed files with 524 additions and 3 deletions

View File

@@ -3,6 +3,7 @@ language: rust
env:
global:
- PROJECT_NAME=ripgrep
- RUST_BACKTRACE: full
matrix:
include:
# Nightly channel.

View File

@@ -1,6 +1,7 @@
environment:
global:
PROJECT_NAME: ripgrep
RUST_BACKTRACE: full
matrix:
- TARGET: i686-pc-windows-gnu
CHANNEL: stable

View File

@@ -169,8 +169,8 @@ impl Gitignore {
self.num_whitelists
}
/// Returns whether the given file path matched a pattern in this gitignore
/// matcher.
/// Returns whether the given path (file or directory) matched a pattern in
/// this gitignore matcher.
///
/// `is_dir` should be true if the path refers to a directory and false
/// otherwise.
@@ -191,6 +191,46 @@ impl Gitignore {
self.matched_stripped(self.strip(path.as_ref()), is_dir)
}
/// Returns whether the given path (file or directory, and expected to be
/// under the root) or any of its parent directories (up to the root)
/// matched a pattern in this gitignore matcher.
///
/// NOTE: This method is more expensive than walking the directory hierarchy
/// top-to-bottom and matching the entries. But, is easier to use in cases
/// when a list of paths are available without a hierarchy.
///
/// `is_dir` should be true if the path refers to a directory and false
/// otherwise.
///
/// The given path is matched relative to the path given when building
/// the matcher. Specifically, before matching `path`, its prefix (as
/// determined by a common suffix of the directory containing this
/// gitignore) is stripped. If there is no common suffix/prefix overlap,
/// then `path` is assumed to be relative to this matcher.
pub fn matched_path_or_any_parents<P: AsRef<Path>>(
&self,
path: P,
is_dir: bool,
) -> Match<&Glob> {
if self.is_empty() {
return Match::None;
}
let mut path = self.strip(path.as_ref());
debug_assert!(
!path.has_root(),
"path is expect to be under the root"
);
loop {
match self.matched_stripped(path, is_dir) {
Match::None => match path.parent() {
Some(parent) => path = parent,
None => return Match::None,
},
a_match => return a_match,
}
}
}
/// Like matched, but takes a path that has already been stripped.
fn matched_stripped<P: AsRef<Path>>(
&self,
@@ -440,7 +480,7 @@ impl GitignoreBuilder {
}
/// Toggle whether the globs should be matched case insensitively or not.
///
///
/// This is disabled by default.
pub fn case_insensitive(
&mut self, yes: bool

View File

@@ -0,0 +1,216 @@
# Based on https://github.com/behnam/gitignore-test/blob/master/.gitignore
### file in root
# MATCH /file_root_1
file_root_00
# NO_MATCH
file_root_01/
# NO_MATCH
file_root_02/*
# NO_MATCH
file_root_03/**
# MATCH /file_root_10
/file_root_10
# NO_MATCH
/file_root_11/
# NO_MATCH
/file_root_12/*
# NO_MATCH
/file_root_13/**
# NO_MATCH
*/file_root_20
# NO_MATCH
*/file_root_21/
# NO_MATCH
*/file_root_22/*
# NO_MATCH
*/file_root_23/**
# MATCH /file_root_30
**/file_root_30
# NO_MATCH
**/file_root_31/
# NO_MATCH
**/file_root_32/*
# NO_MATCH
**/file_root_33/**
### file in sub-dir
# MATCH /parent_dir/file_deep_1
file_deep_00
# NO_MATCH
file_deep_01/
# NO_MATCH
file_deep_02/*
# NO_MATCH
file_deep_03/**
# NO_MATCH
/file_deep_10
# NO_MATCH
/file_deep_11/
# NO_MATCH
/file_deep_12/*
# NO_MATCH
/file_deep_13/**
# MATCH /parent_dir/file_deep_20
*/file_deep_20
# NO_MATCH
*/file_deep_21/
# NO_MATCH
*/file_deep_22/*
# NO_MATCH
*/file_deep_23/**
# MATCH /parent_dir/file_deep_30
**/file_deep_30
# NO_MATCH
**/file_deep_31/
# NO_MATCH
**/file_deep_32/*
# NO_MATCH
**/file_deep_33/**
### dir in root
# MATCH /dir_root_00
dir_root_00
# MATCH /dir_root_01
dir_root_01/
# MATCH /dir_root_02
dir_root_02/*
# MATCH /dir_root_03
dir_root_03/**
# MATCH /dir_root_10
/dir_root_10
# MATCH /dir_root_11
/dir_root_11/
# MATCH /dir_root_12
/dir_root_12/*
# MATCH /dir_root_13
/dir_root_13/**
# NO_MATCH
*/dir_root_20
# NO_MATCH
*/dir_root_21/
# NO_MATCH
*/dir_root_22/*
# NO_MATCH
*/dir_root_23/**
# MATCH /dir_root_30
**/dir_root_30
# MATCH /dir_root_31
**/dir_root_31/
# MATCH /dir_root_32
**/dir_root_32/*
# MATCH /dir_root_33
**/dir_root_33/**
### dir in sub-dir
# MATCH /parent_dir/dir_deep_00
dir_deep_00
# MATCH /parent_dir/dir_deep_01
dir_deep_01/
# NO_MATCH
dir_deep_02/*
# NO_MATCH
dir_deep_03/**
# NO_MATCH
/dir_deep_10
# NO_MATCH
/dir_deep_11/
# NO_MATCH
/dir_deep_12/*
# NO_MATCH
/dir_deep_13/**
# MATCH /parent_dir/dir_deep_20
*/dir_deep_20
# MATCH /parent_dir/dir_deep_21
*/dir_deep_21/
# MATCH /parent_dir/dir_deep_22
*/dir_deep_22/*
# MATCH /parent_dir/dir_deep_23
*/dir_deep_23/**
# MATCH /parent_dir/dir_deep_30
**/dir_deep_30
# MATCH /parent_dir/dir_deep_31
**/dir_deep_31/
# MATCH /parent_dir/dir_deep_32
**/dir_deep_32/*
# MATCH /parent_dir/dir_deep_33
**/dir_deep_33/**

View File

@@ -0,0 +1,263 @@
extern crate ignore;
use std::path::Path;
use ignore::gitignore::{Gitignore, GitignoreBuilder};
const IGNORE_FILE: &'static str = "tests/gitignore_matched_path_or_any_parents_tests.gitignore";
fn get_gitignore() -> Gitignore {
let mut builder = GitignoreBuilder::new("ROOT");
let error = builder.add(IGNORE_FILE);
assert!(error.is_none(), "failed to open gitignore file");
builder.build().unwrap()
}
#[test]
#[should_panic(expected = "path is expect to be under the root")]
fn test_path_should_be_under_root() {
let gitignore = get_gitignore();
let path = "/tmp/some_file";
gitignore.matched_path_or_any_parents(Path::new(path), false);
assert!(false);
}
#[test]
fn test_files_in_root() {
let gitignore = get_gitignore();
let m = |path: &str| gitignore.matched_path_or_any_parents(Path::new(path), false);
// 0x
assert!(m("ROOT/file_root_00").is_ignore());
assert!(m("ROOT/file_root_01").is_none());
assert!(m("ROOT/file_root_02").is_none());
assert!(m("ROOT/file_root_03").is_none());
// 1x
assert!(m("ROOT/file_root_10").is_ignore());
assert!(m("ROOT/file_root_11").is_none());
assert!(m("ROOT/file_root_12").is_none());
assert!(m("ROOT/file_root_13").is_none());
// 2x
assert!(m("ROOT/file_root_20").is_none());
assert!(m("ROOT/file_root_21").is_none());
assert!(m("ROOT/file_root_22").is_none());
assert!(m("ROOT/file_root_23").is_none());
// 3x
assert!(m("ROOT/file_root_30").is_ignore());
assert!(m("ROOT/file_root_31").is_none());
assert!(m("ROOT/file_root_32").is_none());
assert!(m("ROOT/file_root_33").is_none());
}
#[test]
fn test_files_in_deep() {
let gitignore = get_gitignore();
let m = |path: &str| gitignore.matched_path_or_any_parents(Path::new(path), false);
// 0x
assert!(m("ROOT/parent_dir/file_deep_00").is_ignore());
assert!(m("ROOT/parent_dir/file_deep_01").is_none());
assert!(m("ROOT/parent_dir/file_deep_02").is_none());
assert!(m("ROOT/parent_dir/file_deep_03").is_none());
// 1x
assert!(m("ROOT/parent_dir/file_deep_10").is_none());
assert!(m("ROOT/parent_dir/file_deep_11").is_none());
assert!(m("ROOT/parent_dir/file_deep_12").is_none());
assert!(m("ROOT/parent_dir/file_deep_13").is_none());
// 2x
assert!(m("ROOT/parent_dir/file_deep_20").is_ignore());
assert!(m("ROOT/parent_dir/file_deep_21").is_none());
assert!(m("ROOT/parent_dir/file_deep_22").is_none());
assert!(m("ROOT/parent_dir/file_deep_23").is_none());
// 3x
assert!(m("ROOT/parent_dir/file_deep_30").is_ignore());
assert!(m("ROOT/parent_dir/file_deep_31").is_none());
assert!(m("ROOT/parent_dir/file_deep_32").is_none());
assert!(m("ROOT/parent_dir/file_deep_33").is_none());
}
#[test]
fn test_dirs_in_root() {
let gitignore = get_gitignore();
let m = |path: &str| gitignore.matched_path_or_any_parents(Path::new(path), true);
// 00
assert!(m("ROOT/dir_root_00").is_ignore());
assert!(m("ROOT/dir_root_00/file").is_ignore());
assert!(m("ROOT/dir_root_00/child_dir/file").is_ignore());
// 01
assert!(m("ROOT/dir_root_01").is_ignore());
assert!(m("ROOT/dir_root_01/file").is_ignore());
assert!(m("ROOT/dir_root_01/child_dir/file").is_ignore());
// 02
assert!(m("ROOT/dir_root_02").is_none()); // dir itself doesn't match
assert!(m("ROOT/dir_root_02/file").is_ignore());
assert!(m("ROOT/dir_root_02/child_dir/file").is_ignore());
// 03
assert!(m("ROOT/dir_root_03").is_none()); // dir itself doesn't match
assert!(m("ROOT/dir_root_03/file").is_ignore());
assert!(m("ROOT/dir_root_03/child_dir/file").is_ignore());
// 10
assert!(m("ROOT/dir_root_10").is_ignore());
assert!(m("ROOT/dir_root_10/file").is_ignore());
assert!(m("ROOT/dir_root_10/child_dir/file").is_ignore());
// 11
assert!(m("ROOT/dir_root_11").is_ignore());
assert!(m("ROOT/dir_root_11/file").is_ignore());
assert!(m("ROOT/dir_root_11/child_dir/file").is_ignore());
// 12
assert!(m("ROOT/dir_root_12").is_none()); // dir itself doesn't match
assert!(m("ROOT/dir_root_12/file").is_ignore());
assert!(m("ROOT/dir_root_12/child_dir/file").is_ignore());
// 13
assert!(m("ROOT/dir_root_13").is_none());
assert!(m("ROOT/dir_root_13/file").is_ignore());
assert!(m("ROOT/dir_root_13/child_dir/file").is_ignore());
// 20
assert!(m("ROOT/dir_root_20").is_none());
assert!(m("ROOT/dir_root_20/file").is_none());
assert!(m("ROOT/dir_root_20/child_dir/file").is_none());
// 21
assert!(m("ROOT/dir_root_21").is_none());
assert!(m("ROOT/dir_root_21/file").is_none());
assert!(m("ROOT/dir_root_21/child_dir/file").is_none());
// 22
assert!(m("ROOT/dir_root_22").is_none());
assert!(m("ROOT/dir_root_22/file").is_none());
assert!(m("ROOT/dir_root_22/child_dir/file").is_none());
// 23
assert!(m("ROOT/dir_root_23").is_none());
assert!(m("ROOT/dir_root_23/file").is_none());
assert!(m("ROOT/dir_root_23/child_dir/file").is_none());
// 30
assert!(m("ROOT/dir_root_30").is_ignore());
assert!(m("ROOT/dir_root_30/file").is_ignore());
assert!(m("ROOT/dir_root_30/child_dir/file").is_ignore());
// 31
assert!(m("ROOT/dir_root_31").is_ignore());
assert!(m("ROOT/dir_root_31/file").is_ignore());
assert!(m("ROOT/dir_root_31/child_dir/file").is_ignore());
// 32
assert!(m("ROOT/dir_root_32").is_none()); // dir itself doesn't match
assert!(m("ROOT/dir_root_32/file").is_ignore());
assert!(m("ROOT/dir_root_32/child_dir/file").is_ignore());
// 33
assert!(m("ROOT/dir_root_33").is_none()); // dir itself doesn't match
assert!(m("ROOT/dir_root_33/file").is_ignore());
assert!(m("ROOT/dir_root_33/child_dir/file").is_ignore());
}
#[test]
fn test_dirs_in_deep() {
let gitignore = get_gitignore();
let m = |path: &str| gitignore.matched_path_or_any_parents(Path::new(path), true);
// 00
assert!(m("ROOT/parent_dir/dir_deep_00").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_00/file").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_00/child_dir/file").is_ignore());
// 01
assert!(m("ROOT/parent_dir/dir_deep_01").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_01/file").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_01/child_dir/file").is_ignore());
// 02
assert!(m("ROOT/parent_dir/dir_deep_02").is_none()); // dir itself doesn't match
assert!(m("ROOT/parent_dir/dir_deep_02/file").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_02/child_dir/file").is_ignore());
// 03
assert!(m("ROOT/parent_dir/dir_deep_03").is_none()); // dir itself doesn't match
assert!(m("ROOT/parent_dir/dir_deep_03/file").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_03/child_dir/file").is_ignore());
// 10
assert!(m("ROOT/parent_dir/dir_deep_10").is_none());
assert!(m("ROOT/parent_dir/dir_deep_10/file").is_none());
assert!(m("ROOT/parent_dir/dir_deep_10/child_dir/file").is_none());
// 11
assert!(m("ROOT/parent_dir/dir_deep_11").is_none());
assert!(m("ROOT/parent_dir/dir_deep_11/file").is_none());
assert!(m("ROOT/parent_dir/dir_deep_11/child_dir/file").is_none());
// 12
assert!(m("ROOT/parent_dir/dir_deep_12").is_none());
assert!(m("ROOT/parent_dir/dir_deep_12/file").is_none());
assert!(m("ROOT/parent_dir/dir_deep_12/child_dir/file").is_none());
// 13
assert!(m("ROOT/parent_dir/dir_deep_13").is_none());
assert!(m("ROOT/parent_dir/dir_deep_13/file").is_none());
assert!(m("ROOT/parent_dir/dir_deep_13/child_dir/file").is_none());
// 20
assert!(m("ROOT/parent_dir/dir_deep_20").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_20/file").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_20/child_dir/file").is_ignore());
// 21
assert!(m("ROOT/parent_dir/dir_deep_21").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_21/file").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_21/child_dir/file").is_ignore());
// 22
assert!(m("ROOT/parent_dir/dir_deep_22").is_none()); // dir itself doesn't match
assert!(m("ROOT/parent_dir/dir_deep_22/file").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_22/child_dir/file").is_ignore());
// 23
assert!(m("ROOT/parent_dir/dir_deep_23").is_none()); // dir itself doesn't match
assert!(m("ROOT/parent_dir/dir_deep_23/file").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_23/child_dir/file").is_ignore());
// 30
assert!(m("ROOT/parent_dir/dir_deep_30").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_30/file").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_30/child_dir/file").is_ignore());
// 31
assert!(m("ROOT/parent_dir/dir_deep_31").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_31/file").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_31/child_dir/file").is_ignore());
// 32
assert!(m("ROOT/parent_dir/dir_deep_32").is_none()); // dir itself doesn't match
assert!(m("ROOT/parent_dir/dir_deep_32/file").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_32/child_dir/file").is_ignore());
// 33
assert!(m("ROOT/parent_dir/dir_deep_33").is_none()); // dir itself doesn't match
assert!(m("ROOT/parent_dir/dir_deep_33/file").is_ignore());
assert!(m("ROOT/parent_dir/dir_deep_33/child_dir/file").is_ignore());
}