mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-08-03 21:51:58 -07:00
cli: error when searching for NUL
Basically, unless the -a/--text flag is given, it is generally always an error to search for an explicit NUL byte because the binary detection will prevent it from matching. Fixes #1838
This commit is contained in:
83
crates/regex/src/ban.rs
Normal file
83
crates/regex/src/ban.rs
Normal file
@@ -0,0 +1,83 @@
|
||||
use regex_syntax::hir::{
|
||||
self, ClassBytesRange, ClassUnicodeRange, Hir, HirKind,
|
||||
};
|
||||
|
||||
use crate::error::{Error, ErrorKind};
|
||||
|
||||
/// Returns an error when a sub-expression in `expr` must match `byte`.
|
||||
pub(crate) fn check(expr: &Hir, byte: u8) -> Result<(), Error> {
|
||||
assert!(byte.is_ascii(), "ban byte must be ASCII");
|
||||
let ch = char::from(byte);
|
||||
let invalid = || Err(Error::new(ErrorKind::Banned(byte)));
|
||||
match expr.kind() {
|
||||
HirKind::Empty => {}
|
||||
HirKind::Literal(hir::Literal(ref lit)) => {
|
||||
if lit.iter().find(|&&b| b == byte).is_some() {
|
||||
return invalid();
|
||||
}
|
||||
}
|
||||
HirKind::Class(hir::Class::Unicode(ref cls)) => {
|
||||
if cls.ranges().iter().map(|r| r.len()).sum::<usize>() == 1 {
|
||||
let contains =
|
||||
|r: &&ClassUnicodeRange| r.start() <= ch && ch <= r.end();
|
||||
if cls.ranges().iter().find(contains).is_some() {
|
||||
return invalid();
|
||||
}
|
||||
}
|
||||
}
|
||||
HirKind::Class(hir::Class::Bytes(ref cls)) => {
|
||||
if cls.ranges().iter().map(|r| r.len()).sum::<usize>() == 1 {
|
||||
let contains = |r: &&ClassBytesRange| {
|
||||
r.start() <= byte && byte <= r.end()
|
||||
};
|
||||
if cls.ranges().iter().find(contains).is_some() {
|
||||
return invalid();
|
||||
}
|
||||
}
|
||||
}
|
||||
HirKind::Look(_) => {}
|
||||
HirKind::Repetition(ref x) => check(&x.sub, byte)?,
|
||||
HirKind::Capture(ref x) => check(&x.sub, byte)?,
|
||||
HirKind::Concat(ref xs) => {
|
||||
for x in xs.iter() {
|
||||
check(x, byte)?;
|
||||
}
|
||||
}
|
||||
HirKind::Alternation(ref xs) => {
|
||||
for x in xs.iter() {
|
||||
check(x, byte)?;
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use regex_syntax::Parser;
|
||||
|
||||
/// Returns true when the given pattern is detected to contain the given
|
||||
/// banned byte.
|
||||
fn check(pattern: &str, byte: u8) -> bool {
|
||||
let hir = Parser::new().parse(pattern).unwrap();
|
||||
super::check(&hir, byte).is_err()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn various() {
|
||||
assert!(check(r"\x00", 0));
|
||||
assert!(check(r"a\x00", 0));
|
||||
assert!(check(r"\x00b", 0));
|
||||
assert!(check(r"a\x00b", 0));
|
||||
assert!(check(r"\x00|ab", 0));
|
||||
assert!(check(r"ab|\x00", 0));
|
||||
assert!(check(r"\x00?", 0));
|
||||
assert!(check(r"(\x00)", 0));
|
||||
|
||||
assert!(check(r"[\x00]", 0));
|
||||
assert!(check(r"[^[^\x00]]", 0));
|
||||
|
||||
assert!(!check(r"[^\x00]", 0));
|
||||
assert!(!check(r"[\x00a]", 0));
|
||||
}
|
||||
}
|
Reference in New Issue
Block a user