mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-03-17 20:28:03 +02:00
Basically, unless the -a/--text flag is given, it is generally always an error to search for an explicit NUL byte because the binary detection will prevent it from matching. Fixes #1838
84 lines
2.6 KiB
Rust
84 lines
2.6 KiB
Rust
use regex_syntax::hir::{
|
|
self, ClassBytesRange, ClassUnicodeRange, Hir, HirKind,
|
|
};
|
|
|
|
use crate::error::{Error, ErrorKind};
|
|
|
|
/// Returns an error when a sub-expression in `expr` must match `byte`.
|
|
pub(crate) fn check(expr: &Hir, byte: u8) -> Result<(), Error> {
|
|
assert!(byte.is_ascii(), "ban byte must be ASCII");
|
|
let ch = char::from(byte);
|
|
let invalid = || Err(Error::new(ErrorKind::Banned(byte)));
|
|
match expr.kind() {
|
|
HirKind::Empty => {}
|
|
HirKind::Literal(hir::Literal(ref lit)) => {
|
|
if lit.iter().find(|&&b| b == byte).is_some() {
|
|
return invalid();
|
|
}
|
|
}
|
|
HirKind::Class(hir::Class::Unicode(ref cls)) => {
|
|
if cls.ranges().iter().map(|r| r.len()).sum::<usize>() == 1 {
|
|
let contains =
|
|
|r: &&ClassUnicodeRange| r.start() <= ch && ch <= r.end();
|
|
if cls.ranges().iter().find(contains).is_some() {
|
|
return invalid();
|
|
}
|
|
}
|
|
}
|
|
HirKind::Class(hir::Class::Bytes(ref cls)) => {
|
|
if cls.ranges().iter().map(|r| r.len()).sum::<usize>() == 1 {
|
|
let contains = |r: &&ClassBytesRange| {
|
|
r.start() <= byte && byte <= r.end()
|
|
};
|
|
if cls.ranges().iter().find(contains).is_some() {
|
|
return invalid();
|
|
}
|
|
}
|
|
}
|
|
HirKind::Look(_) => {}
|
|
HirKind::Repetition(ref x) => check(&x.sub, byte)?,
|
|
HirKind::Capture(ref x) => check(&x.sub, byte)?,
|
|
HirKind::Concat(ref xs) => {
|
|
for x in xs.iter() {
|
|
check(x, byte)?;
|
|
}
|
|
}
|
|
HirKind::Alternation(ref xs) => {
|
|
for x in xs.iter() {
|
|
check(x, byte)?;
|
|
}
|
|
}
|
|
};
|
|
Ok(())
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use regex_syntax::Parser;
|
|
|
|
/// Returns true when the given pattern is detected to contain the given
|
|
/// banned byte.
|
|
fn check(pattern: &str, byte: u8) -> bool {
|
|
let hir = Parser::new().parse(pattern).unwrap();
|
|
super::check(&hir, byte).is_err()
|
|
}
|
|
|
|
#[test]
|
|
fn various() {
|
|
assert!(check(r"\x00", 0));
|
|
assert!(check(r"a\x00", 0));
|
|
assert!(check(r"\x00b", 0));
|
|
assert!(check(r"a\x00b", 0));
|
|
assert!(check(r"\x00|ab", 0));
|
|
assert!(check(r"ab|\x00", 0));
|
|
assert!(check(r"\x00?", 0));
|
|
assert!(check(r"(\x00)", 0));
|
|
|
|
assert!(check(r"[\x00]", 0));
|
|
assert!(check(r"[^[^\x00]]", 0));
|
|
|
|
assert!(!check(r"[^\x00]", 0));
|
|
assert!(!check(r"[\x00a]", 0));
|
|
}
|
|
}
|