grep: small literal detection fix

This commit tweaks the inner literal detection heuristic such that if it
comes up with any literal that is all whitespace, then it's likely a bad
literal to look for since it's so common. Therefore, we simply reject the
inner literal optimization in this case and let the regex engine do its
thang.
This commit is contained in:
Andrew Gallant 2018-07-17 20:23:31 -04:00
parent d17ca45063
commit 7e5a590276
No known key found for this signature in database
GPG Key ID: B2E3A4923F8B0D44

View File

@ -67,6 +67,16 @@ impl LiteralSets {
lit = req;
}
// Special case: if we have any literals that are all whitespace,
// then this is probably a failing of the literal detection since
// whitespace is typically pretty common. In this case, don't bother
// with inner literal scanning at all and just defer to the regex.
let any_all_white = req_lits.iter()
.any(|lit| lit.iter().all(|&b| (b as char).is_whitespace()));
if any_all_white {
return None;
}
// Special case: if we detected an alternation of inner required
// literals and its longest literal is bigger than the longest
// prefix/suffix, then choose the alternation. In practice, this