From 88524a2b5232c951f9d4c0a169a046a7354251bd Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 30 Sep 2023 08:17:54 -0400 Subject: [PATCH] core: dedup patterns ripgrep does not, and likely never will, report which pattern matched. Because of that, we can dedup the patterns via just their concrete syntax without any fuss. This is somewhat of a pathological case because you don't expect the end user to pass duplicate patterns in general. But if the end user generated a list of, say, names and did not dedup them, then ripgrep could end up spending a lot of extra time on those duplicates if there are many of them. By deduping them explicitly in the application, we essentially remove their extra cost completely. --- crates/core/args.rs | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/crates/core/args.rs b/crates/core/args.rs index 0da9f00f..05d015aa 100644 --- a/crates/core/args.rs +++ b/crates/core/args.rs @@ -1,4 +1,5 @@ use std::{ + collections::HashSet, env, ffi::{OsStr, OsString}, io::{self, IsTerminal, Write}, @@ -1436,35 +1437,44 @@ impl ArgMatches { if self.is_present("files") || self.is_present("type-list") { return Ok(vec![]); } + let mut seen = HashSet::new(); let mut pats = vec![]; + let mut add = |pat: String| { + if !seen.contains(&pat) { + seen.insert(pat.clone()); + pats.push(pat); + } + }; match self.values_of_os("regexp") { None => { if self.values_of_os("file").is_none() { if let Some(os_pat) = self.value_of_os("pattern") { - pats.push(self.pattern_from_os_str(os_pat)?); + add(self.pattern_from_os_str(os_pat)?); } } } Some(os_pats) => { for os_pat in os_pats { - pats.push(self.pattern_from_os_str(os_pat)?); + add(self.pattern_from_os_str(os_pat)?); } } } if let Some(paths) = self.values_of_os("file") { for path in paths { if path == "-" { - pats.extend( - cli::patterns_from_stdin()? - .into_iter() - .map(|p| self.pattern_from_string(p)), - ); + let it = cli::patterns_from_stdin()? + .into_iter() + .map(|p| self.pattern_from_string(p)); + for pat in it { + add(pat); + } } else { - pats.extend( - cli::patterns_from_path(path)? - .into_iter() - .map(|p| self.pattern_from_string(p)), - ); + let it = cli::patterns_from_path(path)? + .into_iter() + .map(|p| self.pattern_from_string(p)); + for pat in it { + add(pat); + } } } }