diff --git a/CHANGELOG.md b/CHANGELOG.md index f36245f3..f80f460a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,8 @@ Feature enhancements: Add support for Brotli and Zstd to the `-z/--search-zip` flag. * [FEATURE #1138](https://github.com/BurntSushi/ripgrep/pull/1138): Add `--no-ignore-dot` flag for ignoring `.ignore` files. +* [FEATURE #1155](https://github.com/BurntSushi/ripgrep/pull/1155): + Add `--auto-hybrid-regex` flag for automatically falling back to PCRE2. * [FEATURE #1159](https://github.com/BurntSushi/ripgrep/pull/1159): ripgrep's exit status logic should now match GNU grep. See updated man page. * [FEATURE #1170](https://github.com/BurntSushi/ripgrep/pull/1170): diff --git a/complete/_rg b/complete/_rg index f26a688d..ac3a52a1 100644 --- a/complete/_rg +++ b/complete/_rg @@ -112,6 +112,10 @@ _rg() { '--hidden[search hidden files and directories]' $no"--no-hidden[don't search hidden files and directories]" + + '(hybrid)' # hybrid regex options + '--auto-hybrid-regex[dynamically use PCRE2 if necessary]' + $no"--no-auto-hybrid-regex[don't dynamically use PCRE2 if necessary]" + + '(ignore)' # Ignore-file options "(--no-ignore-global --no-ignore-parent --no-ignore-vcs --no-ignore-dot)--no-ignore[don't respect ignore files]" $no'(--ignore-global --ignore-parent --ignore-vcs --ignore-dot)--ignore[respect ignore files]' diff --git a/src/app.rs b/src/app.rs index b102d7cd..7ec54118 100644 --- a/src/app.rs +++ b/src/app.rs @@ -547,6 +547,7 @@ pub fn all_args_and_flags() -> Vec { // flags are hidden and merely mentioned in the docs of the corresponding // "positive" flag. flag_after_context(&mut args); + flag_auto_hybrid_regex(&mut args); flag_before_context(&mut args); flag_binary(&mut args); flag_block_buffered(&mut args); @@ -683,6 +684,50 @@ This overrides the --context flag. args.push(arg); } +fn flag_auto_hybrid_regex(args: &mut Vec) { + const SHORT: &str = "Dynamically use PCRE2 if necessary."; + const LONG: &str = long!("\ +When this flag is used, ripgrep will dynamically choose between supported regex +engines depending on the features used in a pattern. When ripgrep chooses a +regex engine, it applies that choice for every regex provided to ripgrep (e.g., +via multiple -e/--regexp or -f/--file flags). + +As an example of how this flag might behave, ripgrep will attempt to use +its default finite automata based regex engine whenever the pattern can be +successfully compiled with that regex engine. If PCRE2 is enabled and if the +pattern given could not be compiled with the default regex engine, then PCRE2 +will be automatically used for searching. If PCRE2 isn't available, then this +flag has no effect because there is only one regex engine to choose from. + +In the future, ripgrep may adjust its heuristics for how it decides which +regex engine to use. In general, the heuristics will be limited to a static +analysis of the patterns, and not to any specific runtime behavior observed +while searching files. + +The primary downside of using this flag is that it may not always be obvious +which regex engine ripgrep uses, and thus, the match semantics or performance +profile of ripgrep may subtly and unexpectedly change. However, in many cases, +all regex engines will agree on what constitutes a match and it can be nice +to transparently support more advanced regex features like look-around and +backreferences without explicitly needing to enable them. + +This flag can be disabled with --no-auto-hybrid-regex. +"); + let arg = RGArg::switch("auto-hybrid-regex") + .help(SHORT).long_help(LONG) + .overrides("no-auto-hybrid-regex") + .overrides("pcre2") + .overrides("no-pcre2"); + args.push(arg); + + let arg = RGArg::switch("no-auto-hybrid-regex") + .hidden() + .overrides("auto-hybrid-regex") + .overrides("pcre2") + .overrides("no-pcre2"); + args.push(arg); +} + fn flag_before_context(args: &mut Vec) { const SHORT: &str = "Show NUM lines before each match."; const LONG: &str = long!("\ @@ -1938,12 +1983,16 @@ This flag can be disabled with --no-pcre2. "); let arg = RGArg::switch("pcre2").short("P") .help(SHORT).long_help(LONG) - .overrides("no-pcre2"); + .overrides("no-pcre2") + .overrides("auto-hybrid-regex") + .overrides("no-auto-hybrid-regex"); args.push(arg); let arg = RGArg::switch("no-pcre2") .hidden() - .overrides("pcre2"); + .overrides("pcre2") + .overrides("auto-hybrid-regex") + .overrides("no-auto-hybrid-regex"); args.push(arg); } diff --git a/src/args.rs b/src/args.rs index 389de1dd..80693da8 100644 --- a/src/args.rs +++ b/src/args.rs @@ -599,6 +599,25 @@ impl ArgMatches { if self.is_present("pcre2") { let matcher = self.matcher_pcre2(patterns)?; Ok(PatternMatcher::PCRE2(matcher)) + } else if self.is_present("auto-hybrid-regex") { + let rust_err = match self.matcher_rust(patterns) { + Ok(matcher) => return Ok(PatternMatcher::RustRegex(matcher)), + Err(err) => err, + }; + log::debug!( + "error building Rust regex in hybrid mode:\n{}", rust_err, + ); + let pcre_err = match self.matcher_pcre2(patterns) { + Ok(matcher) => return Ok(PatternMatcher::PCRE2(matcher)), + Err(err) => err, + }; + Err(From::from(format!( + "regex could not be compiled with either the default regex \ + engine or with PCRE2.\n\n\ + default regex engine error:\n{}\n{}\n{}\n\n\ + PCRE2 regex engine error:\n{}", + "~".repeat(79), rust_err, "~".repeat(79), pcre_err, + ))) } else { let matcher = match self.matcher_rust(patterns) { Ok(matcher) => matcher, diff --git a/tests/feature.rs b/tests/feature.rs index 6ee2bf87..be9f4bec 100644 --- a/tests/feature.rs +++ b/tests/feature.rs @@ -681,6 +681,21 @@ rgtest!(f1138_no_ignore_dot, |dir: Dir, mut cmd: TestCommand| { eqnice!("bar\n", cmd.arg("--ignore-file").arg(".fzf-ignore").stdout()); }); +// See: https://github.com/BurntSushi/ripgrep/issues/1155 +rgtest!(f1155_auto_hybrid_regex, |dir: Dir, mut cmd: TestCommand| { + // No sense in testing a hybrid regex engine with only one engine! + if !dir.is_pcre2() { + return; + } + + dir.create("sherlock", SHERLOCK); + cmd.arg("--no-pcre2").arg("--auto-hybrid-regex").arg(r"(?<=the )Sherlock"); + + let expected = "\ +sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +"; + eqnice!(expected, cmd.stdout()); +}); // See: https://github.com/BurntSushi/ripgrep/issues/1207 //