From 0c1cbd99f36e4b97cbb915e9c5a7c0c091522105 Mon Sep 17 00:00:00 2001 From: Jakub Jirutka Date: Sat, 29 Apr 2023 17:04:00 +0200 Subject: [PATCH] ignore: tweak regex crate features This removes most of the Unicode features as they aren't currently used. We can always add them back later if necessary. We can avoid the unicode-perl feature by changing `\s` to `[[:space:]]`, which uses the ASCII-only definition of `\s`. Since we don't expect non-ASCII whitespace in git config files, this seems okay. Closes #2502 --- crates/ignore/Cargo.toml | 2 +- crates/ignore/src/gitignore.rs | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/crates/ignore/Cargo.toml b/crates/ignore/Cargo.toml index b946e2db..a9495aa3 100644 --- a/crates/ignore/Cargo.toml +++ b/crates/ignore/Cargo.toml @@ -23,7 +23,7 @@ globset = { version = "0.4.10", path = "../globset" } lazy_static = "1.1" log = "0.4.5" memchr = "2.5" -regex = "1.8.3" +regex = { version = "1.9.0", default-features = false, features = ["perf", "std", "unicode-gencat"] } same-file = "1.0.4" thread_local = "1" walkdir = "2.2.7" diff --git a/crates/ignore/src/gitignore.rs b/crates/ignore/src/gitignore.rs index 3c7ba5e6..b4203933 100644 --- a/crates/ignore/src/gitignore.rs +++ b/crates/ignore/src/gitignore.rs @@ -596,8 +596,13 @@ fn parse_excludes_file(data: &[u8]) -> Option { // probably works in more circumstances. I guess we would ideally have // a full INI parser. Yuck. lazy_static::lazy_static! { - static ref RE: Regex = - Regex::new(r"(?im)^\s*excludesfile\s*=\s*(.+)\s*$").unwrap(); + static ref RE: Regex = Regex::new( + r"(?xim-u) + ^[[:space:]]*excludesfile[[:space:]]* + = + [[:space:]]*(.+)[[:space:]]*$ + " + ).unwrap(); }; let caps = match RE.captures(data) { None => return None,