ripgrep/grep/src/nonl.rs
Andrew Gallant cd08707c7c grep: upgrade to regex-syntax 0.5
This update brings with it many bug fixes:

  * Better error messages are printed overall. We also include
    explicit call out for unsupported features like backreferences
    and look-around.
  * Regexes like `\s*{` no longer emit incomprehensible errors.
  * Unicode escape sequences, such as `\u{..}` are now supported.

For the most part, this upgrade was done in a straight-forward way. We
resist the urge to refactor the `grep` crate, in anticipation of it
being rewritten anyway.

Note that we removed the `--fixed-strings` suggestion whenever a regex
syntax error occurs. In practice, I've found that it results in a lot of
false positives, and I believe that its use is not as paramount now that
regex parse errors are much more readable.

Closes #268, Closes #395, Closes #702, Closes #853
2018-03-13 22:55:39 -04:00

75 lines
2.5 KiB
Rust

use syntax::hir::{self, Hir, HirKind};
use {Error, Result};
/// Returns a new expression that is guaranteed to never match the given
/// ASCII character.
///
/// If the expression contains the literal byte, then an error is returned.
///
/// If `byte` is not an ASCII character (i.e., greater than `0x7F`), then this
/// function panics.
pub fn remove(expr: Hir, byte: u8) -> Result<Hir> {
assert!(byte <= 0x7F);
let chr = byte as char;
assert!(chr.len_utf8() == 1);
Ok(match expr.into_kind() {
HirKind::Empty => Hir::empty(),
HirKind::Literal(hir::Literal::Unicode(c)) => {
if c == chr {
return Err(Error::LiteralNotAllowed(chr));
}
Hir::literal(hir::Literal::Unicode(c))
}
HirKind::Literal(hir::Literal::Byte(b)) => {
if b as char == chr {
return Err(Error::LiteralNotAllowed(chr));
}
Hir::literal(hir::Literal::Byte(b))
}
HirKind::Class(hir::Class::Unicode(mut cls)) => {
let remove = hir::ClassUnicode::new(Some(
hir::ClassUnicodeRange::new(chr, chr),
));
cls.difference(&remove);
if cls.iter().next().is_none() {
return Err(Error::LiteralNotAllowed(chr));
}
Hir::class(hir::Class::Unicode(cls))
}
HirKind::Class(hir::Class::Bytes(mut cls)) => {
let remove = hir::ClassBytes::new(Some(
hir::ClassBytesRange::new(byte, byte),
));
cls.difference(&remove);
if cls.iter().next().is_none() {
return Err(Error::LiteralNotAllowed(chr));
}
Hir::class(hir::Class::Bytes(cls))
}
HirKind::Anchor(x) => Hir::anchor(x),
HirKind::WordBoundary(x) => Hir::word_boundary(x),
HirKind::Repetition(mut x) => {
x.hir = Box::new(remove(*x.hir, byte)?);
Hir::repetition(x)
}
HirKind::Group(mut x) => {
x.hir = Box::new(remove(*x.hir, byte)?);
Hir::group(x)
}
HirKind::Concat(xs) => {
let xs = xs.into_iter()
.map(|e| remove(e, byte))
.collect::<Result<Vec<Hir>>>()?;
Hir::concat(xs)
}
HirKind::Alternation(xs) => {
let xs = xs.into_iter()
.map(|e| remove(e, byte))
.collect::<Result<Vec<Hir>>>()?;
Hir::alternation(xs)
}
})
}