From a18cf6ec39cd1d8fc7b4061858bb9263b6e5e073 Mon Sep 17 00:00:00 2001 From: sharkdp Date: Tue, 17 Sep 2019 20:07:23 +0200 Subject: [PATCH] ignore: add existence check for ignore files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds a simple `.exists()` check for `.gitignore`, `.ignore`, and other similar files before actually calling `File::open(…)` in `GitIgnoreBuilder::add`. The reason is that a simple existence check via `stat` can be faster than actually trying to `open` the file, see https://stackoverflow.com/a/12774387/704831. As we typically expect(?) the number of directories *without* ignore files to be much larger than the number of directories *with* ignore files, this leads to an overall speedup. The performance gain is not huge for `rg`, but can be quite significant if more `.gitignore`-like files are added via `add_custom_ignore_filename`. The speedup is *larger* for folders with *low* files-per-directory ratios. Note though that we do not do this check on Windows until a specific analysis there suggests this is beneficial. Namely, Windows generally has slower file system operations, so it's not clear whether this speculative check is actually a benefit or not. Benchmark results ----------------- `rg --files` in my home folder (200k results, 6.5 files per directory): | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | |:---|---:|---:|---:|---:| | `./rg-master --files` | 396.4 ± 3.2 | 390.9 | 400.0 | 1.05 | | `./rg-feature --files` | 376.0 ± 3.6 | 369.3 | 383.5 | 1.00 | `rg --files --hidden` in my home folder (800k results, 5.4 files per directory) | Command | Mean [s] | Min [s] | Max [s] | Relative | |:---|---:|---:|---:|---:| | `./rg-master --files --hidden` | 1.575 ± 0.012 | 1.560 | 1.597 | 1.06 | | `./rg-feature --files --hidden` | 1.479 ± 0.011 | 1.464 | 1.496 | 1.00 | `rg --files` in the chromium-79.0.3915.2 source tree (300k results, 12.7 files per directory) | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | |:---|---:|---:|---:|---:| | `~/rg-master --files` | 445.2 ± 5.3 | 435.6 | 453.0 | 1.04 | | `~/rg-feature --files` | 428.9 ± 7.0 | 418.2 | 440.0 | 1.00 | `rg --files` in the linux-5.3 source tree (65k results, 15.1 files per directory) | Command | Mean [ms] | Min [ms] | Max [ms] | Relative | |:---|---:|---:|---:|---:| | `./rg-master --files` | 94.5 ± 1.9 | 89.8 | 98.5 | 1.02 | | `./rg-feature --files` | 92.6 ± 2.7 | 88.4 | 98.7 | 1.00 | Closes #1381 --- CHANGELOG.md | 5 +++++ ignore/src/dir.rs | 16 +++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 68762a14..083000de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,11 @@ TBD === TODO +Performance improvements: + +* [PERF #1381](https://github.com/BurntSushi/ripgrep/pull/1381): + Directory traversal is sped up with speculative ignore-file existence checks. + Bug fixes: * [BUG #1335](https://github.com/BurntSushi/ripgrep/issues/1335): diff --git a/ignore/src/dir.rs b/ignore/src/dir.rs index e4440fc4..54e1f7be 100644 --- a/ignore/src/dir.rs +++ b/ignore/src/dir.rs @@ -689,7 +689,21 @@ pub fn create_gitignore>( builder.case_insensitive(case_insensitive).unwrap(); for name in names { let gipath = dir.join(name.as_ref()); - errs.maybe_push_ignore_io(builder.add(gipath)); + // This check is not necessary, but is added for performance. Namely, + // a simple stat call checking for existence can often be just a bit + // quicker than actually trying to open a file. Since the number of + // directories without ignore files likely greatly exceeds the number + // with ignore files, this check generally makes sense. + // + // However, until demonstrated otherwise, we speculatively do not do + // this on Windows since Windows is notorious for having slow file + // system operations. Namely, it's not clear whether this analysis + // makes sense on Windows. + // + // For more details: https://github.com/BurntSushi/ripgrep/pull/1381 + if cfg!(windows) || gipath.exists() { + errs.maybe_push_ignore_io(builder.add(gipath)); + } } let gi = match builder.build() { Ok(gi) => gi,