cli: error when searching for NUL

Basically, unless the -a/--text flag is given, it is generally always an
error to search for an explicit NUL byte because the binary detection
will prevent it from matching.

Fixes #1838
This commit is contained in:
Andrew Gallant
2023-11-25 10:39:08 -05:00
parent 7bb9f35d2d
commit 9ed7565fcb
10 changed files with 162 additions and 6 deletions

83
crates/regex/src/ban.rs Normal file
View File

@@ -0,0 +1,83 @@
use regex_syntax::hir::{
self, ClassBytesRange, ClassUnicodeRange, Hir, HirKind,
};
use crate::error::{Error, ErrorKind};
/// Returns an error when a sub-expression in `expr` must match `byte`.
pub(crate) fn check(expr: &Hir, byte: u8) -> Result<(), Error> {
assert!(byte.is_ascii(), "ban byte must be ASCII");
let ch = char::from(byte);
let invalid = || Err(Error::new(ErrorKind::Banned(byte)));
match expr.kind() {
HirKind::Empty => {}
HirKind::Literal(hir::Literal(ref lit)) => {
if lit.iter().find(|&&b| b == byte).is_some() {
return invalid();
}
}
HirKind::Class(hir::Class::Unicode(ref cls)) => {
if cls.ranges().iter().map(|r| r.len()).sum::<usize>() == 1 {
let contains =
|r: &&ClassUnicodeRange| r.start() <= ch && ch <= r.end();
if cls.ranges().iter().find(contains).is_some() {
return invalid();
}
}
}
HirKind::Class(hir::Class::Bytes(ref cls)) => {
if cls.ranges().iter().map(|r| r.len()).sum::<usize>() == 1 {
let contains = |r: &&ClassBytesRange| {
r.start() <= byte && byte <= r.end()
};
if cls.ranges().iter().find(contains).is_some() {
return invalid();
}
}
}
HirKind::Look(_) => {}
HirKind::Repetition(ref x) => check(&x.sub, byte)?,
HirKind::Capture(ref x) => check(&x.sub, byte)?,
HirKind::Concat(ref xs) => {
for x in xs.iter() {
check(x, byte)?;
}
}
HirKind::Alternation(ref xs) => {
for x in xs.iter() {
check(x, byte)?;
}
}
};
Ok(())
}
#[cfg(test)]
mod tests {
use regex_syntax::Parser;
/// Returns true when the given pattern is detected to contain the given
/// banned byte.
fn check(pattern: &str, byte: u8) -> bool {
let hir = Parser::new().parse(pattern).unwrap();
super::check(&hir, byte).is_err()
}
#[test]
fn various() {
assert!(check(r"\x00", 0));
assert!(check(r"a\x00", 0));
assert!(check(r"\x00b", 0));
assert!(check(r"a\x00b", 0));
assert!(check(r"\x00|ab", 0));
assert!(check(r"ab|\x00", 0));
assert!(check(r"\x00?", 0));
assert!(check(r"(\x00)", 0));
assert!(check(r"[\x00]", 0));
assert!(check(r"[^[^\x00]]", 0));
assert!(!check(r"[^\x00]", 0));
assert!(!check(r"[\x00a]", 0));
}
}