mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-05-19 09:40:22 -07:00
File path filtering works and is pretty fast.
I'm pretty disappointed by the performance of regex sets. They are apparently spending a lot of their time in construction of the DFA, which probably means that the DFA is just too big. It turns out that it's actually faster to build an *additional* normal regex with the alternation of every glob and use it as a first-pass filter over every file path. If there's a match, only then do we try the more expensive RegexSet.
This commit is contained in:
parent
b55ecf34c7
commit
065c449980
@ -29,6 +29,9 @@ regex-syntax = { version = "0.3.1", path = "/home/andrew/rust/regex/regex-syntax
|
|||||||
rustc-serialize = "0.3"
|
rustc-serialize = "0.3"
|
||||||
walkdir = "0.1"
|
walkdir = "0.1"
|
||||||
|
|
||||||
|
[features]
|
||||||
|
simd-accel = ["regex/simd-accel"]
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
glob = "0.2"
|
glob = "0.2"
|
||||||
lazy_static = "0.2"
|
lazy_static = "0.2"
|
||||||
|
368
src/gitignore.rs
Normal file
368
src/gitignore.rs
Normal file
@ -0,0 +1,368 @@
|
|||||||
|
/*!
|
||||||
|
The gitignore module provides a way of reading a gitignore file and applying
|
||||||
|
it to a particular file name to determine whether it should be ignore or not.
|
||||||
|
The motivation for this submodule is performance and portability:
|
||||||
|
|
||||||
|
1. There is a gitignore crate on crates.io, but it uses the standard `glob`
|
||||||
|
crate and checks patterns one-by-one. This is a reasonable implementation,
|
||||||
|
but not suitable for the performance we need here.
|
||||||
|
2. We could shell out to a `git` sub-command like ls-files or status, but it
|
||||||
|
seems better to not rely on the existence of external programs for a search
|
||||||
|
tool. Besides, we need to implement this logic anyway to support things like
|
||||||
|
an .xrepignore file.
|
||||||
|
|
||||||
|
The key implementation detail here is that a single gitignore file is compiled
|
||||||
|
into a single RegexSet, which can be used to report which globs match a
|
||||||
|
particular file name. We can then do a quick post-processing step to implement
|
||||||
|
additional rules such as whitelists (prefix of `!`) or directory-only globs
|
||||||
|
(suffix of `/`).
|
||||||
|
*/
|
||||||
|
|
||||||
|
// TODO(burntsushi): Implement something similar, but for Mercurial. We can't
|
||||||
|
// use this exact implementation because hgignore files are different.
|
||||||
|
|
||||||
|
use std::env;
|
||||||
|
use std::error::Error as StdError;
|
||||||
|
use std::fmt;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::{self, BufRead};
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use regex;
|
||||||
|
|
||||||
|
use glob;
|
||||||
|
|
||||||
|
/// Represents an error that can occur when parsing a gitignore file.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum Error {
|
||||||
|
Glob(glob::Error),
|
||||||
|
Regex(regex::Error),
|
||||||
|
Io(io::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StdError for Error {
|
||||||
|
fn description(&self) -> &str {
|
||||||
|
match *self {
|
||||||
|
Error::Glob(ref err) => err.description(),
|
||||||
|
Error::Regex(ref err) => err.description(),
|
||||||
|
Error::Io(ref err) => err.description(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Error {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match *self {
|
||||||
|
Error::Glob(ref err) => err.fmt(f),
|
||||||
|
Error::Regex(ref err) => err.fmt(f),
|
||||||
|
Error::Io(ref err) => err.fmt(f),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<glob::Error> for Error {
|
||||||
|
fn from(err: glob::Error) -> Error {
|
||||||
|
Error::Glob(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<regex::Error> for Error {
|
||||||
|
fn from(err: regex::Error) -> Error {
|
||||||
|
Error::Regex(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<io::Error> for Error {
|
||||||
|
fn from(err: io::Error) -> Error {
|
||||||
|
Error::Io(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Gitignore is a matcher for the glob patterns in a single gitignore file.
|
||||||
|
pub struct Gitignore {
|
||||||
|
set: glob::Set,
|
||||||
|
root: PathBuf,
|
||||||
|
patterns: Vec<Pattern>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Gitignore {
|
||||||
|
/// Create a new gitignore glob matcher from the gitignore file at the
|
||||||
|
/// given path. The root of the gitignore file is the basename of path.
|
||||||
|
pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Gitignore, Error> {
|
||||||
|
let root = match path.as_ref().parent() {
|
||||||
|
Some(parent) => parent.to_path_buf(),
|
||||||
|
None => env::current_dir().unwrap_or(Path::new("/").to_path_buf()),
|
||||||
|
};
|
||||||
|
let mut builder = GitignoreBuilder::new(root);
|
||||||
|
try!(builder.add_path(path));
|
||||||
|
builder.build()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Create a new gitignore glob matcher from the given root directory and
|
||||||
|
/// string containing the contents of a gitignore file.
|
||||||
|
pub fn from_str<P: AsRef<Path>>(
|
||||||
|
root: P,
|
||||||
|
gitignore: &str,
|
||||||
|
) -> Result<Gitignore, Error> {
|
||||||
|
let mut builder = GitignoreBuilder::new(root);
|
||||||
|
try!(builder.add_str(gitignore));
|
||||||
|
builder.build()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if the given file path should be ignored
|
||||||
|
/// according to the globs in this gitignore. `is_dir` should be true if
|
||||||
|
/// the path refers to a directory and false otherwise.
|
||||||
|
///
|
||||||
|
/// Before matching path, its prefix (as determined by a common suffix
|
||||||
|
/// of the directory containing this gitignore) is stripped. If there is
|
||||||
|
/// no common suffix/prefix overlap, then path is assumed to reside in the
|
||||||
|
/// same directory as this gitignore file.
|
||||||
|
///
|
||||||
|
/// If the given path has a `./` prefix then it is stripped before
|
||||||
|
/// matching.
|
||||||
|
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
|
||||||
|
let mut path = path.as_ref();
|
||||||
|
if let Ok(p) = path.strip_prefix("./") {
|
||||||
|
path = p;
|
||||||
|
}
|
||||||
|
if let Ok(p) = path.strip_prefix(&self.root) {
|
||||||
|
path = p;
|
||||||
|
}
|
||||||
|
self.matched_utf8(&*path.to_string_lossy(), is_dir)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Like matched, but takes a path that has already been stripped and
|
||||||
|
/// converted to UTF-8.
|
||||||
|
pub fn matched_utf8(&self, path: &str, is_dir: bool) -> Match {
|
||||||
|
// A single regex with a bunch of alternations of glob patterns is
|
||||||
|
// unfortunately typically faster than a regex, so we use it as a
|
||||||
|
// first pass filter.
|
||||||
|
if !self.set.is_match(path) {
|
||||||
|
return Match::None;
|
||||||
|
}
|
||||||
|
let pat = match self.set.matches(path).iter().last() {
|
||||||
|
None => return Match::None,
|
||||||
|
Some(i) => &self.patterns[i],
|
||||||
|
};
|
||||||
|
if pat.whitelist {
|
||||||
|
Match::Whitelist
|
||||||
|
} else if !pat.only_dir || is_dir {
|
||||||
|
Match::Ignored
|
||||||
|
} else {
|
||||||
|
Match::None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// The result of a glob match.
|
||||||
|
#[derive(Clone, Debug)]
|
||||||
|
pub enum Match {
|
||||||
|
/// The path didn't match any glob in the gitignore file.
|
||||||
|
None,
|
||||||
|
/// The last glob matched indicates the path should be ignored.
|
||||||
|
Ignored,
|
||||||
|
/// The last glob matched indicates the path should be whitelisted.
|
||||||
|
Whitelist,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Match {
|
||||||
|
/// Returns true if the match result implies the path should be ignored.
|
||||||
|
pub fn is_ignored(&self) -> bool {
|
||||||
|
match *self {
|
||||||
|
Match::Ignored => true,
|
||||||
|
Match::None | Match::Whitelist => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// GitignoreBuilder constructs a matcher for a single set of globs from a
|
||||||
|
/// .gitignore file.
|
||||||
|
pub struct GitignoreBuilder {
|
||||||
|
builder: glob::SetBuilder,
|
||||||
|
root: PathBuf,
|
||||||
|
patterns: Vec<Pattern>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pattern represents a single pattern in a gitignore file. It doesn't
|
||||||
|
/// know how to do glob matching directly, but it does store additional
|
||||||
|
/// options on a pattern, such as whether it's whitelisted.
|
||||||
|
#[derive(Clone, Debug, Default)]
|
||||||
|
struct Pattern {
|
||||||
|
pat: String,
|
||||||
|
whitelist: bool, // prefix of '!'
|
||||||
|
only_dir: bool, // suffix of '/'
|
||||||
|
}
|
||||||
|
|
||||||
|
impl GitignoreBuilder {
|
||||||
|
/// Create a new builder for a gitignore file.
|
||||||
|
///
|
||||||
|
/// The path given should be the path at which the globs for this gitignore
|
||||||
|
/// file should be matched.
|
||||||
|
pub fn new<P: AsRef<Path>>(root: P) -> GitignoreBuilder {
|
||||||
|
GitignoreBuilder {
|
||||||
|
builder: glob::SetBuilder::new(),
|
||||||
|
root: root.as_ref().to_path_buf(),
|
||||||
|
patterns: vec![],
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Builds a new matcher from the glob patterns added so far.
|
||||||
|
///
|
||||||
|
/// Once a matcher is built, no new glob patterns can be added to it.
|
||||||
|
pub fn build(self) -> Result<Gitignore, Error> {
|
||||||
|
Ok(Gitignore {
|
||||||
|
set: try!(self.builder.build()),
|
||||||
|
root: self.root,
|
||||||
|
patterns: self.patterns,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add each pattern line from the file path given.
|
||||||
|
pub fn add_path<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
|
||||||
|
let rdr = io::BufReader::new(try!(File::open(&path)));
|
||||||
|
// println!("adding ignores from: {}", path.as_ref().display());
|
||||||
|
for line in rdr.lines() {
|
||||||
|
try!(self.add(&try!(line)));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add each pattern line from the string given.
|
||||||
|
pub fn add_str(&mut self, gitignore: &str) -> Result<(), Error> {
|
||||||
|
for line in gitignore.lines() {
|
||||||
|
try!(self.add(line));
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a line from a gitignore file to this builder.
|
||||||
|
///
|
||||||
|
/// If the line could not be parsed as a glob, then an error is returned.
|
||||||
|
pub fn add(&mut self, mut line: &str) -> Result<(), Error> {
|
||||||
|
if line.is_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let mut pat = Pattern::default();
|
||||||
|
let mut opts = glob::MatchOptions::default();
|
||||||
|
let has_slash = line.chars().any(|c| c == '/');
|
||||||
|
// If the line starts with an escaped '!', then remove the escape.
|
||||||
|
// Otherwise, if it starts with an unescaped '!', then this is a
|
||||||
|
// whitelist pattern.
|
||||||
|
match line.chars().nth(0) {
|
||||||
|
Some('#') => return Ok(()),
|
||||||
|
Some('\\') => {
|
||||||
|
match line.chars().nth(1) {
|
||||||
|
Some('!') | Some('#') => {
|
||||||
|
line = &line[1..];
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Some('!') => {
|
||||||
|
pat.whitelist = true;
|
||||||
|
line = &line[1..];
|
||||||
|
}
|
||||||
|
Some('/') => {
|
||||||
|
// `man gitignore` says that if a glob starts with a slash,
|
||||||
|
// then the glob can only match the beginning of a path
|
||||||
|
// (relative to the location of gitignore). We achieve this by
|
||||||
|
// simply banning wildcards from matching /.
|
||||||
|
opts.require_literal_separator = true;
|
||||||
|
line = &line[1..];
|
||||||
|
}
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
// If it ends with a slash, then this should only match directories,
|
||||||
|
// but the slash should otherwise not be used while globbing.
|
||||||
|
if let Some((i, c)) = line.char_indices().rev().nth(0) {
|
||||||
|
if c == '/' {
|
||||||
|
pat.only_dir = true;
|
||||||
|
line = &line[..i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If there is a literal slash, then we note that so that globbing
|
||||||
|
// doesn't let wildcards match slashes. Otherwise, we need to let
|
||||||
|
// the pattern match anywhere, so we add a `**/` prefix to achieve
|
||||||
|
// that behavior.
|
||||||
|
pat.pat = line.to_string();
|
||||||
|
if has_slash {
|
||||||
|
opts.require_literal_separator = true;
|
||||||
|
} else {
|
||||||
|
pat.pat = format!("**/{}", pat.pat);
|
||||||
|
}
|
||||||
|
try!(self.builder.add_with(&pat.pat, &opts));
|
||||||
|
self.patterns.push(pat);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::Gitignore;
|
||||||
|
|
||||||
|
macro_rules! ignored {
|
||||||
|
($name:ident, $root:expr, $gi:expr, $path:expr) => {
|
||||||
|
ignored!($name, $root, $gi, $path, false);
|
||||||
|
};
|
||||||
|
($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
let gi = Gitignore::from_str($root, $gi).unwrap();
|
||||||
|
assert!(gi.matched($path, $is_dir).is_ignored());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! not_ignored {
|
||||||
|
($name:ident, $root:expr, $gi:expr, $path:expr) => {
|
||||||
|
not_ignored!($name, $root, $gi, $path, false);
|
||||||
|
};
|
||||||
|
($name:ident, $root:expr, $gi:expr, $path:expr, $is_dir:expr) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
let gi = Gitignore::from_str($root, $gi).unwrap();
|
||||||
|
assert!(!gi.matched($path, $is_dir).is_ignored());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const ROOT: &'static str = "/home/foobar/rust/xrep";
|
||||||
|
|
||||||
|
ignored!(ig1, ROOT, "months", "months");
|
||||||
|
ignored!(ig2, ROOT, "*.lock", "Cargo.lock");
|
||||||
|
ignored!(ig3, ROOT, "*.rs", "src/main.rs");
|
||||||
|
ignored!(ig4, ROOT, "src/*.rs", "src/main.rs");
|
||||||
|
ignored!(ig5, ROOT, "/*.c", "cat-file.c");
|
||||||
|
ignored!(ig6, ROOT, "/src/*.rs", "src/main.rs");
|
||||||
|
ignored!(ig7, ROOT, "!src/main.rs\n*.rs", "src/main.rs");
|
||||||
|
ignored!(ig8, ROOT, "foo/", "foo", true);
|
||||||
|
ignored!(ig9, ROOT, "**/foo", "foo");
|
||||||
|
ignored!(ig10, ROOT, "**/foo", "src/foo");
|
||||||
|
ignored!(ig11, ROOT, "**/foo/**", "src/foo/bar");
|
||||||
|
ignored!(ig12, ROOT, "**/foo/**", "wat/src/foo/bar/baz");
|
||||||
|
ignored!(ig13, ROOT, "**/foo/bar", "foo/bar");
|
||||||
|
ignored!(ig14, ROOT, "**/foo/bar", "src/foo/bar");
|
||||||
|
ignored!(ig15, ROOT, "abc/**", "abc/x");
|
||||||
|
ignored!(ig16, ROOT, "abc/**", "abc/x/y");
|
||||||
|
ignored!(ig17, ROOT, "abc/**", "abc/x/y/z");
|
||||||
|
ignored!(ig18, ROOT, "a/**/b", "a/b");
|
||||||
|
ignored!(ig19, ROOT, "a/**/b", "a/x/b");
|
||||||
|
ignored!(ig20, ROOT, "a/**/b", "a/x/y/b");
|
||||||
|
ignored!(ig21, ROOT, r"\!xy", "!xy");
|
||||||
|
ignored!(ig22, ROOT, r"\#foo", "#foo");
|
||||||
|
ignored!(ig23, ROOT, "foo", "./foo");
|
||||||
|
ignored!(ig24, ROOT, "target", "grep/target");
|
||||||
|
|
||||||
|
not_ignored!(ignot1, ROOT, "amonths", "months");
|
||||||
|
not_ignored!(ignot2, ROOT, "monthsa", "months");
|
||||||
|
not_ignored!(ignot3, ROOT, "src/*.rs", "src/grep/src/main.rs");
|
||||||
|
not_ignored!(ignot4, ROOT, "/*.c", "mozilla-sha1/sha1.c");
|
||||||
|
not_ignored!(ignot5, ROOT, "/src/*.rs", "src/grep/src/main.rs");
|
||||||
|
not_ignored!(ignot6, ROOT, "*.rs\n!src/main.rs", "src/main.rs");
|
||||||
|
not_ignored!(ignot7, ROOT, "foo/", "foo", false);
|
||||||
|
not_ignored!(ignot8, ROOT, "**/foo/**", "wat/src/afoo/bar/baz");
|
||||||
|
not_ignored!(ignot9, ROOT, "**/foo/**", "wat/src/fooa/bar/baz");
|
||||||
|
not_ignored!(ignot10, ROOT, "**/foo/bar", "foo/src/bar");
|
||||||
|
not_ignored!(ignot11, ROOT, "#foo", "#foo");
|
||||||
|
not_ignored!(ignot12, ROOT, "\n\n\n", "foo");
|
||||||
|
}
|
38
src/glob.rs
38
src/glob.rs
@ -20,7 +20,7 @@ use std::path;
|
|||||||
use std::str;
|
use std::str;
|
||||||
|
|
||||||
use regex;
|
use regex;
|
||||||
use regex::bytes::{RegexSet, SetMatches};
|
use regex::bytes::{Regex, RegexSet, SetMatches};
|
||||||
|
|
||||||
/// Represents an error that can occur when parsing a glob pattern.
|
/// Represents an error that can occur when parsing a glob pattern.
|
||||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
@ -63,7 +63,8 @@ impl fmt::Display for Error {
|
|||||||
/// pass.
|
/// pass.
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct Set {
|
pub struct Set {
|
||||||
re: RegexSet,
|
re: Regex,
|
||||||
|
set: RegexSet,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Set {
|
impl Set {
|
||||||
@ -76,21 +77,12 @@ impl Set {
|
|||||||
/// Returns every glob pattern (by sequence number) that matches the given
|
/// Returns every glob pattern (by sequence number) that matches the given
|
||||||
/// path.
|
/// path.
|
||||||
pub fn matches<T: AsRef<[u8]>>(&self, path: T) -> SetMatches {
|
pub fn matches<T: AsRef<[u8]>>(&self, path: T) -> SetMatches {
|
||||||
self.re.matches(path.as_ref())
|
self.set.matches(path.as_ref())
|
||||||
}
|
|
||||||
|
|
||||||
/// Populates the given slice with corresponding patterns that matched.
|
|
||||||
pub fn matches_with<T: AsRef<[u8]>>(
|
|
||||||
&self,
|
|
||||||
path: T,
|
|
||||||
matches: &mut [bool],
|
|
||||||
) -> bool {
|
|
||||||
self.re.matches_with(path.as_ref(), matches)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the number of glob patterns in this set.
|
/// Returns the number of glob patterns in this set.
|
||||||
pub fn len(&self) -> usize {
|
pub fn len(&self) -> usize {
|
||||||
self.re.len()
|
self.set.len()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -113,8 +105,18 @@ impl SetBuilder {
|
|||||||
/// Once a matcher is built, no new patterns can be added to it.
|
/// Once a matcher is built, no new patterns can be added to it.
|
||||||
pub fn build(&self) -> Result<Set, regex::Error> {
|
pub fn build(&self) -> Result<Set, regex::Error> {
|
||||||
let it = self.pats.iter().map(|&(ref p, ref o)| p.to_regex_with(o));
|
let it = self.pats.iter().map(|&(ref p, ref o)| p.to_regex_with(o));
|
||||||
let re = try!(RegexSet::new(it));
|
let set = try!(RegexSet::new(it));
|
||||||
Ok(Set { re: re })
|
|
||||||
|
let mut joined = String::new();
|
||||||
|
for &(ref p, ref o) in &self.pats {
|
||||||
|
let part = format!("(?:{})", p.to_regex_with(o));
|
||||||
|
if !joined.is_empty() {
|
||||||
|
joined.push('|');
|
||||||
|
}
|
||||||
|
joined.push_str(&part);
|
||||||
|
}
|
||||||
|
let re = try!(Regex::new(&joined));
|
||||||
|
Ok(Set { re: re, set: set })
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add a new pattern to this set.
|
/// Add a new pattern to this set.
|
||||||
@ -151,10 +153,10 @@ pub struct Pattern {
|
|||||||
#[derive(Clone, Debug, Default)]
|
#[derive(Clone, Debug, Default)]
|
||||||
pub struct MatchOptions {
|
pub struct MatchOptions {
|
||||||
/// When true, matching is done case insensitively.
|
/// When true, matching is done case insensitively.
|
||||||
case_insensitive: bool,
|
pub case_insensitive: bool,
|
||||||
/// When true, neither `*` nor `?` match the current system's path
|
/// When true, neither `*` nor `?` match the current system's path
|
||||||
/// separator.
|
/// separator.
|
||||||
require_literal_separator: bool,
|
pub require_literal_separator: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||||
@ -199,6 +201,7 @@ impl Pattern {
|
|||||||
pub fn to_regex_with(&self, options: &MatchOptions) -> String {
|
pub fn to_regex_with(&self, options: &MatchOptions) -> String {
|
||||||
let sep = path::MAIN_SEPARATOR.to_string();
|
let sep = path::MAIN_SEPARATOR.to_string();
|
||||||
let mut re = String::new();
|
let mut re = String::new();
|
||||||
|
re.push_str("(?-u)");
|
||||||
if options.case_insensitive {
|
if options.case_insensitive {
|
||||||
re.push_str("(?i)");
|
re.push_str("(?i)");
|
||||||
}
|
}
|
||||||
@ -457,7 +460,6 @@ mod tests {
|
|||||||
fn $name() {
|
fn $name() {
|
||||||
let pat = Pattern::new($pat).unwrap();
|
let pat = Pattern::new($pat).unwrap();
|
||||||
let re = Regex::new(&pat.to_regex_with(&$options)).unwrap();
|
let re = Regex::new(&pat.to_regex_with(&$options)).unwrap();
|
||||||
// println!("{:?}", re);
|
|
||||||
assert!(!re.is_match($path));
|
assert!(!re.is_match($path));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
200
src/ignore.rs
Normal file
200
src/ignore.rs
Normal file
@ -0,0 +1,200 @@
|
|||||||
|
/*!
|
||||||
|
The ignore module is responsible for managing the state required to determine
|
||||||
|
whether a *single* file path should be searched or not.
|
||||||
|
|
||||||
|
In general, there are two ways to ignore a particular file:
|
||||||
|
|
||||||
|
1. Specify an ignore rule in some "global" configuration, such as a
|
||||||
|
$HOME/.xrepignore or on the command line.
|
||||||
|
2. A specific ignore file (like .gitignore) found during directory traversal.
|
||||||
|
|
||||||
|
The `IgnoreDir` type handles ignore patterns for any one particular directory
|
||||||
|
(including "global" ignore patterns), while the `Ignore` type handles a stack
|
||||||
|
of `IgnoreDir`s for use during directory traversal.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use std::error::Error as StdError;
|
||||||
|
use std::fmt;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
|
||||||
|
use gitignore::{self, Gitignore, GitignoreBuilder, Match};
|
||||||
|
|
||||||
|
/// Represents an error that can occur when parsing a gitignore file.
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum Error {
|
||||||
|
Gitignore(gitignore::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl StdError for Error {
|
||||||
|
fn description(&self) -> &str {
|
||||||
|
match *self {
|
||||||
|
Error::Gitignore(ref err) => err.description(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for Error {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||||
|
match *self {
|
||||||
|
Error::Gitignore(ref err) => err.fmt(f),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<gitignore::Error> for Error {
|
||||||
|
fn from(err: gitignore::Error) -> Error {
|
||||||
|
Error::Gitignore(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Ignore represents a collection of ignore patterns organized by directory.
|
||||||
|
/// In particular, a stack is maintained, where the top of the stack
|
||||||
|
/// corresponds to the current directory being searched and the bottom of the
|
||||||
|
/// stack represents the root of a search. Ignore patterns at the top of the
|
||||||
|
/// stack take precedence over ignore patterns at the bottom of the stack.
|
||||||
|
pub struct Ignore {
|
||||||
|
/// A stack of ignore patterns at each directory level of traversal.
|
||||||
|
/// A directory that contributes no ignore patterns is `None`.
|
||||||
|
stack: Vec<Option<IgnoreDir>>,
|
||||||
|
// TODO(burntsushi): Add other patterns from the command line here.
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Ignore {
|
||||||
|
/// Create an empty set of ignore patterns.
|
||||||
|
pub fn new() -> Ignore {
|
||||||
|
Ignore { stack: vec![] }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Add a directory to the stack.
|
||||||
|
pub fn push<P: AsRef<Path>>(&mut self, path: P) -> Result<(), Error> {
|
||||||
|
self.stack.push(try!(IgnoreDir::new(path)));
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pop a directory from the stack.
|
||||||
|
///
|
||||||
|
/// This panics if the stack is empty.
|
||||||
|
pub fn pop(&mut self) {
|
||||||
|
self.stack.pop().expect("non-empty stack");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if the given file path should be ignored.
|
||||||
|
pub fn ignored<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> bool {
|
||||||
|
let path = path.as_ref();
|
||||||
|
for id in self.stack.iter().rev().filter_map(|id| id.as_ref()) {
|
||||||
|
match id.matched(path, is_dir) {
|
||||||
|
Match::Whitelist => return false,
|
||||||
|
Match::Ignored => return true,
|
||||||
|
Match::None => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// IgnoreDir represents a set of ignore patterns retrieved from a single
|
||||||
|
/// directory.
|
||||||
|
pub struct IgnoreDir {
|
||||||
|
/// The path to this directory as given.
|
||||||
|
path: PathBuf,
|
||||||
|
/// A single accumulation of glob patterns for this directory, matched
|
||||||
|
/// using gitignore semantics.
|
||||||
|
///
|
||||||
|
/// This will include patterns from xrepignore as well. The patterns are
|
||||||
|
/// ordered so that precedence applies automatically (e.g., xrepignore
|
||||||
|
/// patterns procede gitignore patterns).
|
||||||
|
gi: Option<Gitignore>,
|
||||||
|
// TODO(burntsushi): Matching other types of glob patterns that don't
|
||||||
|
// conform to gitignore will probably require refactoring this approach.
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IgnoreDir {
|
||||||
|
/// Create a new matcher for the given directory.
|
||||||
|
///
|
||||||
|
/// If no ignore glob patterns could be found in the directory then `None`
|
||||||
|
/// is returned.
|
||||||
|
pub fn new<P: AsRef<Path>>(path: P) -> Result<Option<IgnoreDir>, Error> {
|
||||||
|
let mut id = IgnoreDir {
|
||||||
|
path: path.as_ref().to_path_buf(),
|
||||||
|
gi: None,
|
||||||
|
};
|
||||||
|
let mut ok = false;
|
||||||
|
let mut builder = GitignoreBuilder::new(&id.path);
|
||||||
|
// The ordering here is important. Later globs have higher precedence.
|
||||||
|
ok = builder.add_path(id.path.join(".gitignore")).is_ok() || ok;
|
||||||
|
ok = builder.add_path(id.path.join(".agignore")).is_ok() || ok;
|
||||||
|
ok = builder.add_path(id.path.join(".xrepignore")).is_ok() || ok;
|
||||||
|
if !ok {
|
||||||
|
Ok(None)
|
||||||
|
} else {
|
||||||
|
id.gi = Some(try!(builder.build()));
|
||||||
|
Ok(Some(id))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if the given file path should be ignored
|
||||||
|
/// according to the globs in this directory. `is_dir` should be true if
|
||||||
|
/// the path refers to a directory and false otherwise.
|
||||||
|
///
|
||||||
|
/// Before matching path, its prefix (as determined by a common suffix
|
||||||
|
/// of this directory) is stripped. If there is
|
||||||
|
/// no common suffix/prefix overlap, then path is assumed to reside
|
||||||
|
/// directly in this directory.
|
||||||
|
///
|
||||||
|
/// If the given path has a `./` prefix then it is stripped before
|
||||||
|
/// matching.
|
||||||
|
pub fn matched<P: AsRef<Path>>(&self, path: P, is_dir: bool) -> Match {
|
||||||
|
self.gi.as_ref()
|
||||||
|
.map(|gi| gi.matched(path, is_dir))
|
||||||
|
.unwrap_or(Match::None)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use std::path::Path;
|
||||||
|
use gitignore::GitignoreBuilder;
|
||||||
|
use super::IgnoreDir;
|
||||||
|
|
||||||
|
macro_rules! ignored_dir {
|
||||||
|
($name:ident, $root:expr, $gi:expr, $xi:expr, $path:expr) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
let mut builder = GitignoreBuilder::new(&$root);
|
||||||
|
builder.add_str($gi).unwrap();
|
||||||
|
builder.add_str($xi).unwrap();
|
||||||
|
let gi = builder.build().unwrap();
|
||||||
|
let id = IgnoreDir {
|
||||||
|
path: Path::new($root).to_path_buf(),
|
||||||
|
gi: Some(gi),
|
||||||
|
};
|
||||||
|
assert!(id.matched($path, false).is_ignored());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
macro_rules! not_ignored_dir {
|
||||||
|
($name:ident, $root:expr, $gi:expr, $xi:expr, $path:expr) => {
|
||||||
|
#[test]
|
||||||
|
fn $name() {
|
||||||
|
let mut builder = GitignoreBuilder::new(&$root);
|
||||||
|
builder.add_str($gi).unwrap();
|
||||||
|
builder.add_str($xi).unwrap();
|
||||||
|
let gi = builder.build().unwrap();
|
||||||
|
let id = IgnoreDir {
|
||||||
|
path: Path::new($root).to_path_buf(),
|
||||||
|
gi: Some(gi),
|
||||||
|
};
|
||||||
|
assert!(!id.matched($path, false).is_ignored());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
const ROOT: &'static str = "/home/foobar/rust/xrep";
|
||||||
|
|
||||||
|
ignored_dir!(id1, ROOT, "src/main.rs", "", "src/main.rs");
|
||||||
|
ignored_dir!(id2, ROOT, "", "src/main.rs", "src/main.rs");
|
||||||
|
ignored_dir!(id3, ROOT, "!src/main.rs", "*.rs", "src/main.rs");
|
||||||
|
|
||||||
|
not_ignored_dir!(idnot1, ROOT, "*.rs", "!src/main.rs", "src/main.rs");
|
||||||
|
}
|
88
src/main.rs
88
src/main.rs
@ -30,6 +30,8 @@ use docopt::Docopt;
|
|||||||
use grep::Grep;
|
use grep::Grep;
|
||||||
use walkdir::{WalkDir, WalkDirIterator};
|
use walkdir::{WalkDir, WalkDirIterator};
|
||||||
|
|
||||||
|
use ignore::Ignore;
|
||||||
|
|
||||||
macro_rules! errored {
|
macro_rules! errored {
|
||||||
($($tt:tt)*) => {
|
($($tt:tt)*) => {
|
||||||
return Err(From::from(format!($($tt)*)));
|
return Err(From::from(format!($($tt)*)));
|
||||||
@ -43,7 +45,9 @@ macro_rules! eprintln {
|
|||||||
}}
|
}}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
mod gitignore;
|
||||||
mod glob;
|
mod glob;
|
||||||
|
mod ignore;
|
||||||
|
|
||||||
pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;
|
pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;
|
||||||
|
|
||||||
@ -72,24 +76,40 @@ impl Args {
|
|||||||
if self.arg_path.is_empty() {
|
if self.arg_path.is_empty() {
|
||||||
return errored!("Searching stdin is not currently supported.");
|
return errored!("Searching stdin is not currently supported.");
|
||||||
}
|
}
|
||||||
|
let mut stdout = io::BufWriter::new(io::stdout());
|
||||||
|
let mut ig = Ignore::new();
|
||||||
for p in &self.arg_path {
|
for p in &self.arg_path {
|
||||||
let mut it = WalkDir::new(p).into_iter();
|
let mut it = WalkEventIter::from(WalkDir::new(p));
|
||||||
loop {
|
loop {
|
||||||
let ent = match it.next() {
|
let ev = match it.next() {
|
||||||
None => break,
|
None => break,
|
||||||
|
Some(Ok(ev)) => ev,
|
||||||
Some(Err(err)) => {
|
Some(Err(err)) => {
|
||||||
eprintln!("{}", err);
|
eprintln!("{}", err);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
Some(Ok(ent)) => ent,
|
|
||||||
};
|
};
|
||||||
if is_hidden(&ent) {
|
match ev {
|
||||||
if ent.file_type().is_dir() {
|
WalkEvent::Exit => {
|
||||||
it.skip_current_dir();
|
ig.pop();
|
||||||
}
|
}
|
||||||
|
WalkEvent::Dir(ent) => {
|
||||||
|
try!(ig.push(ent.path()));
|
||||||
|
if is_hidden(&ent) || ig.ignored(ent.path(), true) {
|
||||||
|
// if is_hidden(&ent) {
|
||||||
|
it.it.skip_current_dir();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
println!("{}", ent.path().display());
|
}
|
||||||
|
WalkEvent::File(ent) => {
|
||||||
|
if is_hidden(&ent) || ig.ignored(ent.path(), false) {
|
||||||
|
// if is_hidden(&ent) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let _ = writeln!(
|
||||||
|
&mut stdout, "{}", ent.path().display());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(0)
|
Ok(0)
|
||||||
@ -108,6 +128,60 @@ impl Args {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// WalkEventIter transforms a WalkDir iterator into an iterator that more
|
||||||
|
/// accurately describes the directory tree. Namely, it emits events that are
|
||||||
|
/// one of three types: directory, file or "exit." An "exit" event means that
|
||||||
|
/// the entire contents of a directory have been enumerated.
|
||||||
|
struct WalkEventIter {
|
||||||
|
depth: usize,
|
||||||
|
it: walkdir::Iter,
|
||||||
|
next: Option<result::Result<walkdir::DirEntry, walkdir::Error>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum WalkEvent {
|
||||||
|
Dir(walkdir::DirEntry),
|
||||||
|
File(walkdir::DirEntry),
|
||||||
|
Exit,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl From<walkdir::WalkDir> for WalkEventIter {
|
||||||
|
fn from(it: walkdir::WalkDir) -> WalkEventIter {
|
||||||
|
WalkEventIter { depth: 0, it: it.into_iter(), next: None }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Iterator for WalkEventIter {
|
||||||
|
type Item = io::Result<WalkEvent>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<io::Result<WalkEvent>> {
|
||||||
|
let dent = self.next.take().or_else(|| self.it.next());
|
||||||
|
let depth = match dent {
|
||||||
|
None => 0,
|
||||||
|
Some(Ok(ref dent)) => dent.depth(),
|
||||||
|
Some(Err(ref err)) => err.depth(),
|
||||||
|
};
|
||||||
|
if depth < self.depth {
|
||||||
|
self.depth -= 1;
|
||||||
|
self.next = dent;
|
||||||
|
return Some(Ok(WalkEvent::Exit));
|
||||||
|
}
|
||||||
|
self.depth = depth;
|
||||||
|
match dent {
|
||||||
|
None => None,
|
||||||
|
Some(Err(err)) => Some(Err(From::from(err))),
|
||||||
|
Some(Ok(dent)) => {
|
||||||
|
if dent.file_type().is_dir() {
|
||||||
|
self.depth += 1;
|
||||||
|
Some(Ok(WalkEvent::Dir(dent)))
|
||||||
|
} else {
|
||||||
|
Some(Ok(WalkEvent::File(dent)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn is_hidden(ent: &walkdir::DirEntry) -> bool {
|
fn is_hidden(ent: &walkdir::DirEntry) -> bool {
|
||||||
ent.depth() > 0 &&
|
ent.depth() > 0 &&
|
||||||
ent.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
ent.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user