mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-05-19 09:40:22 -07:00
Add file size exclusion to walker
A maximum filesize can be specified as an argument to a `WalkBuilder`. If a file exceeds the specified size it will be ignored as part of the resulting file/directory set. The filesize limit never applies to directories.
This commit is contained in:
parent
066f97d855
commit
49fd668712
@ -392,7 +392,9 @@ impl DirEntryRaw {
|
|||||||
/// continues.
|
/// continues.
|
||||||
/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
|
/// * Fifth, if the path hasn't been whitelisted and it is hidden, then the
|
||||||
/// path is skipped.
|
/// path is skipped.
|
||||||
/// * Sixth, if the path has made it this far then it is yielded in the
|
/// * Sixth, unless the path is a directory, the size of the file is compared
|
||||||
|
/// against the max filesize limit. If it exceeds the limit, it is skipped.
|
||||||
|
/// * Seventh, if the path has made it this far then it is yielded in the
|
||||||
/// iterator.
|
/// iterator.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct WalkBuilder {
|
pub struct WalkBuilder {
|
||||||
@ -400,6 +402,7 @@ pub struct WalkBuilder {
|
|||||||
ig_builder: IgnoreBuilder,
|
ig_builder: IgnoreBuilder,
|
||||||
parents: bool,
|
parents: bool,
|
||||||
max_depth: Option<usize>,
|
max_depth: Option<usize>,
|
||||||
|
max_filesize: Option<u64>,
|
||||||
follow_links: bool,
|
follow_links: bool,
|
||||||
sorter: Option<Arc<Fn(&OsString, &OsString) -> cmp::Ordering + 'static>>,
|
sorter: Option<Arc<Fn(&OsString, &OsString) -> cmp::Ordering + 'static>>,
|
||||||
threads: usize,
|
threads: usize,
|
||||||
@ -412,6 +415,7 @@ impl fmt::Debug for WalkBuilder {
|
|||||||
.field("ig_builder", &self.ig_builder)
|
.field("ig_builder", &self.ig_builder)
|
||||||
.field("parents", &self.parents)
|
.field("parents", &self.parents)
|
||||||
.field("max_depth", &self.max_depth)
|
.field("max_depth", &self.max_depth)
|
||||||
|
.field("max_filesize", &self.max_filesize)
|
||||||
.field("follow_links", &self.follow_links)
|
.field("follow_links", &self.follow_links)
|
||||||
.field("threads", &self.threads)
|
.field("threads", &self.threads)
|
||||||
.finish()
|
.finish()
|
||||||
@ -431,6 +435,7 @@ impl WalkBuilder {
|
|||||||
ig_builder: IgnoreBuilder::new(),
|
ig_builder: IgnoreBuilder::new(),
|
||||||
parents: true,
|
parents: true,
|
||||||
max_depth: None,
|
max_depth: None,
|
||||||
|
max_filesize: None,
|
||||||
follow_links: false,
|
follow_links: false,
|
||||||
sorter: None,
|
sorter: None,
|
||||||
threads: 0,
|
threads: 0,
|
||||||
@ -464,6 +469,7 @@ impl WalkBuilder {
|
|||||||
it: None,
|
it: None,
|
||||||
ig_root: ig_root.clone(),
|
ig_root: ig_root.clone(),
|
||||||
ig: ig_root.clone(),
|
ig: ig_root.clone(),
|
||||||
|
max_filesize: self.max_filesize,
|
||||||
parents: self.parents,
|
parents: self.parents,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -478,6 +484,7 @@ impl WalkBuilder {
|
|||||||
paths: self.paths.clone().into_iter(),
|
paths: self.paths.clone().into_iter(),
|
||||||
ig_root: self.ig_builder.build(),
|
ig_root: self.ig_builder.build(),
|
||||||
max_depth: self.max_depth,
|
max_depth: self.max_depth,
|
||||||
|
max_filesize: self.max_filesize,
|
||||||
follow_links: self.follow_links,
|
follow_links: self.follow_links,
|
||||||
parents: self.parents,
|
parents: self.parents,
|
||||||
threads: self.threads,
|
threads: self.threads,
|
||||||
@ -508,6 +515,12 @@ impl WalkBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Whether to ignore files above the specified limit.
|
||||||
|
pub fn max_filesize(&mut self, filesize: Option<u64>) -> &mut WalkBuilder {
|
||||||
|
self.max_filesize = filesize;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// The number of threads to use for traversal.
|
/// The number of threads to use for traversal.
|
||||||
///
|
///
|
||||||
/// Note that this only has an effect when using `build_parallel`.
|
/// Note that this only has an effect when using `build_parallel`.
|
||||||
@ -650,6 +663,7 @@ pub struct Walk {
|
|||||||
it: Option<WalkEventIter>,
|
it: Option<WalkEventIter>,
|
||||||
ig_root: Ignore,
|
ig_root: Ignore,
|
||||||
ig: Ignore,
|
ig: Ignore,
|
||||||
|
max_filesize: Option<u64>,
|
||||||
parents: bool,
|
parents: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -667,7 +681,10 @@ impl Walk {
|
|||||||
if ent.depth() == 0 {
|
if ent.depth() == 0 {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
skip_path(&self.ig, ent.path(), ent.file_type().is_dir())
|
|
||||||
|
let ft = ent.file_type().is_dir();
|
||||||
|
skip_path(&self.ig, ent.path(), ft) ||
|
||||||
|
skip_filesize(self.max_filesize, ent.path(), &ent.metadata().ok(), ft)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -824,6 +841,7 @@ pub struct WalkParallel {
|
|||||||
paths: vec::IntoIter<PathBuf>,
|
paths: vec::IntoIter<PathBuf>,
|
||||||
ig_root: Ignore,
|
ig_root: Ignore,
|
||||||
parents: bool,
|
parents: bool,
|
||||||
|
max_filesize: Option<u64>,
|
||||||
max_depth: Option<usize>,
|
max_depth: Option<usize>,
|
||||||
follow_links: bool,
|
follow_links: bool,
|
||||||
threads: usize,
|
threads: usize,
|
||||||
@ -886,6 +904,7 @@ impl WalkParallel {
|
|||||||
threads: threads,
|
threads: threads,
|
||||||
parents: self.parents,
|
parents: self.parents,
|
||||||
max_depth: self.max_depth,
|
max_depth: self.max_depth,
|
||||||
|
max_filesize: self.max_filesize,
|
||||||
follow_links: self.follow_links,
|
follow_links: self.follow_links,
|
||||||
};
|
};
|
||||||
handles.push(thread::spawn(|| worker.run()));
|
handles.push(thread::spawn(|| worker.run()));
|
||||||
@ -1000,6 +1019,9 @@ struct Worker {
|
|||||||
/// The maximum depth of directories to descend. A value of `0` means no
|
/// The maximum depth of directories to descend. A value of `0` means no
|
||||||
/// descension at all.
|
/// descension at all.
|
||||||
max_depth: Option<usize>,
|
max_depth: Option<usize>,
|
||||||
|
/// The maximum size a searched file can be (in bytes). If a file exceeds
|
||||||
|
/// this size it will be skipped.
|
||||||
|
max_filesize: Option<u64>,
|
||||||
/// Whether to follow symbolic links or not. When this is enabled, loop
|
/// Whether to follow symbolic links or not. When this is enabled, loop
|
||||||
/// detection is performed.
|
/// detection is performed.
|
||||||
follow_links: bool,
|
follow_links: bool,
|
||||||
@ -1106,7 +1128,10 @@ impl Worker {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
let is_dir = dent.file_type().map_or(false, |ft| ft.is_dir());
|
let is_dir = dent.file_type().map_or(false, |ft| ft.is_dir());
|
||||||
if !skip_path(ig, dent.path(), is_dir) {
|
if !skip_path(ig, dent.path(), is_dir) &&
|
||||||
|
!skip_filesize(self.max_filesize, dent.path(),
|
||||||
|
&dent.metadata().ok(), is_dir)
|
||||||
|
{
|
||||||
self.queue.push(Message::Work(Work {
|
self.queue.push(Message::Work(Work {
|
||||||
dent: dent,
|
dent: dent,
|
||||||
ignore: ig.clone(),
|
ignore: ig.clone(),
|
||||||
@ -1253,6 +1278,34 @@ fn check_symlink_loop(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn skip_filesize(
|
||||||
|
max_filesize: Option<u64>,
|
||||||
|
path: &Path,
|
||||||
|
ent: &Option<Metadata>,
|
||||||
|
is_dir: bool
|
||||||
|
) -> bool {
|
||||||
|
if is_dir {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
let filesize = match *ent {
|
||||||
|
Some(ref md) => Some(md.len()),
|
||||||
|
None => None
|
||||||
|
};
|
||||||
|
|
||||||
|
match (filesize, max_filesize) {
|
||||||
|
(Some(fs), Some(m_fs)) => {
|
||||||
|
if fs > m_fs {
|
||||||
|
debug!("ignoring {}: {} bytes", path.display(), fs);
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn skip_path(ig: &Ignore, path: &Path, is_dir: bool) -> bool {
|
fn skip_path(ig: &Ignore, path: &Path, is_dir: bool) -> bool {
|
||||||
let m = ig.matched(path, is_dir);
|
let m = ig.matched(path, is_dir);
|
||||||
if m.is_ignore() {
|
if m.is_ignore() {
|
||||||
@ -1282,6 +1335,11 @@ mod tests {
|
|||||||
file.write_all(contents.as_bytes()).unwrap();
|
file.write_all(contents.as_bytes()).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn wfile_size<P: AsRef<Path>>(path: P, size: u64) {
|
||||||
|
let file = File::create(path).unwrap();
|
||||||
|
file.set_len(size).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(unix)]
|
#[cfg(unix)]
|
||||||
fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) {
|
fn symlink<P: AsRef<Path>, Q: AsRef<Path>>(src: P, dst: Q) {
|
||||||
use std::os::unix::fs::symlink;
|
use std::os::unix::fs::symlink;
|
||||||
@ -1438,6 +1496,32 @@ mod tests {
|
|||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn max_filesize() {
|
||||||
|
let td = TempDir::new("walk-test-").unwrap();
|
||||||
|
mkdirp(td.path().join("a/b"));
|
||||||
|
wfile_size(td.path().join("foo"), 0);
|
||||||
|
wfile_size(td.path().join("bar"), 400);
|
||||||
|
wfile_size(td.path().join("baz"), 600);
|
||||||
|
wfile_size(td.path().join("a/foo"), 600);
|
||||||
|
wfile_size(td.path().join("a/bar"), 500);
|
||||||
|
wfile_size(td.path().join("a/baz"), 200);
|
||||||
|
|
||||||
|
let mut builder = WalkBuilder::new(td.path());
|
||||||
|
assert_paths(td.path(), &builder, &[
|
||||||
|
"a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz",
|
||||||
|
]);
|
||||||
|
assert_paths(td.path(), builder.max_filesize(Some(0)), &[
|
||||||
|
"a", "a/b", "foo"
|
||||||
|
]);
|
||||||
|
assert_paths(td.path(), builder.max_filesize(Some(500)), &[
|
||||||
|
"a", "a/b", "foo", "bar", "a/bar", "a/baz"
|
||||||
|
]);
|
||||||
|
assert_paths(td.path(), builder.max_filesize(Some(50000)), &[
|
||||||
|
"a", "a/b", "foo", "bar", "baz", "a/foo", "a/bar", "a/baz",
|
||||||
|
]);
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(unix)] // because symlinks on windows are weird
|
#[cfg(unix)] // because symlinks on windows are weird
|
||||||
#[test]
|
#[test]
|
||||||
fn symlinks() {
|
fn symlinks() {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user