From 6a8051b258408343c4cf164acdc4cc2cd7928129 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sun, 25 Sep 2016 20:10:28 -0400 Subject: [PATCH] Don't union inner literals of repetitions. If we do, this results in extracting `foofoofoo` from `(\wfoo){3}`, which is wrong. This does prevent us from extracting `foofoofoo` from `foo{3}`, which is unfortunate, but we miss plenty of other stuff too. Literal extracting needs a good rethink (all the way down into the regex engine). Fixes #93 --- grep/src/literals.rs | 11 ++++++----- tests/tests.rs | 9 +++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/grep/src/literals.rs b/grep/src/literals.rs index f1685270..4cd34a87 100644 --- a/grep/src/literals.rs +++ b/grep/src/literals.rs @@ -8,7 +8,6 @@ Note that this implementation is incredibly suspicious. We need something more principled. */ use std::cmp; -use std::iter; use regex::bytes::Regex; use syntax::{ @@ -181,8 +180,6 @@ fn repeat_range_literals( lits: &mut Literals, mut f: F, ) { - use syntax::Expr::*; - if min == 0 { // This is a bit conservative. If `max` is set, then we could // treat this as a finite set of alternations. For now, we @@ -190,8 +187,12 @@ fn repeat_range_literals( lits.cut(); } else { let n = cmp::min(lits.limit_size(), min as usize); - let es = iter::repeat(e.clone()).take(n).collect(); - f(&Concat(es), lits); + // We only extract literals from a single repetition, even though + // we could do more. e.g., `a{3}` will have `a` extracted instead of + // `aaa`. The reason is that inner literal extraction can't be unioned + // across repetitions. e.g., extracting `foofoofoo` from `(\w+foo){3}` + // is wrong. + f(e, lits); if n < min as usize { lits.cut(); } diff --git a/tests/tests.rs b/tests/tests.rs index 85ca1164..a0b01c6a 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -703,6 +703,15 @@ clean!(regression_90, "test", ".", |wd: WorkDir, mut cmd: Command| { assert_eq!(lines, ".foo:test\n"); }); +// See: https://github.com/BurntSushi/ripgrep/issues/93 +clean!(regression_93, r"(\d{1,3}\.){3}\d{1,3}", ".", +|wd: WorkDir, mut cmd: Command| { + wd.create("foo", "192.168.1.1"); + + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines, "foo:192.168.1.1\n"); +}); + // See: https://github.com/BurntSushi/ripgrep/issues/20 sherlock!(feature_20, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--no-filename");