mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-07-26 09:42:00 -07:00
deps: update to pcre2 0.2.4
0.2.4 updates to PCRE2 10.42 and has a few other nice changes. For example, when `utf` is enabled, the crate will always set the PCRE2_MATCH_INVALID_UTF option. That means we no longer need to do transcoding or UTF-8 validity checks. Because of this, we actually get to remove one of the two uses of `unsafe` in ripgrep's `main` program. (This also updates a couple other dependencies for convenience.)
This commit is contained in:
@@ -472,24 +472,6 @@ enum EncodingMode {
|
||||
Disabled,
|
||||
}
|
||||
|
||||
impl EncodingMode {
|
||||
/// Checks if an explicit encoding has been set. Returns false for
|
||||
/// automatic BOM sniffing and no sniffing.
|
||||
///
|
||||
/// This is only used to determine whether PCRE2 needs to have its own
|
||||
/// UTF-8 checking enabled. If we have an explicit encoding set, then
|
||||
/// we're always guaranteed to get UTF-8, so we can disable PCRE2's check.
|
||||
/// Otherwise, we have no such guarantee, and must enable PCRE2' UTF-8
|
||||
/// check.
|
||||
#[cfg(feature = "pcre2")]
|
||||
fn has_explicit_encoding(&self) -> bool {
|
||||
match self {
|
||||
EncodingMode::Some(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ArgMatches {
|
||||
/// Create an ArgMatches from clap's parse result.
|
||||
fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches {
|
||||
@@ -732,14 +714,6 @@ impl ArgMatches {
|
||||
}
|
||||
if self.unicode() {
|
||||
builder.utf(true).ucp(true);
|
||||
if self.encoding()?.has_explicit_encoding() {
|
||||
// SAFETY: If an encoding was specified, then we're guaranteed
|
||||
// to get valid UTF-8, so we can disable PCRE2's UTF checking.
|
||||
// (Feeding invalid UTF-8 to PCRE2 is undefined behavior.)
|
||||
unsafe {
|
||||
builder.disable_utf_check();
|
||||
}
|
||||
}
|
||||
}
|
||||
if self.is_present("multiline") {
|
||||
builder.dotall(self.is_present("multiline-dotall"));
|
||||
@@ -1080,7 +1054,6 @@ impl ArgMatches {
|
||||
}
|
||||
|
||||
let label = match self.value_of_lossy("encoding") {
|
||||
None if self.pcre2_unicode() => "utf-8".to_string(),
|
||||
None => return Ok(EncodingMode::Auto),
|
||||
Some(label) => label,
|
||||
};
|
||||
@@ -1641,12 +1614,6 @@ impl ArgMatches {
|
||||
!(self.is_present("no-unicode") || self.is_present("no-pcre2-unicode"))
|
||||
}
|
||||
|
||||
/// Returns true if and only if PCRE2 is enabled and its Unicode mode is
|
||||
/// enabled.
|
||||
fn pcre2_unicode(&self) -> bool {
|
||||
self.is_present("pcre2") && self.unicode()
|
||||
}
|
||||
|
||||
/// Returns true if and only if file names containing each match should
|
||||
/// be emitted.
|
||||
fn with_filename(&self, paths: &[PathBuf]) -> bool {
|
||||
|
@@ -15,4 +15,4 @@ edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||
pcre2 = "0.2.3"
|
||||
pcre2 = "0.2.4"
|
||||
|
@@ -178,23 +178,22 @@ impl RegexMatcherBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// When UTF matching mode is enabled, this will disable the UTF checking
|
||||
/// that PCRE2 will normally perform automatically. If UTF matching mode
|
||||
/// is not enabled, then this has no effect.
|
||||
/// This is now deprecated and is a no-op.
|
||||
///
|
||||
/// UTF checking is enabled by default when UTF matching mode is enabled.
|
||||
/// If UTF matching mode is enabled and UTF checking is enabled, then PCRE2
|
||||
/// will return an error if you attempt to search a subject string that is
|
||||
/// not valid UTF-8.
|
||||
/// Previously, this option permitted disabling PCRE2's UTF-8 validity
|
||||
/// check, which could result in undefined behavior if the haystack was
|
||||
/// not valid UTF-8. But PCRE2 introduced a new option, `PCRE2_MATCH_INVALID_UTF`,
|
||||
/// in 10.34 which this crate always sets. When this option is enabled,
|
||||
/// PCRE2 claims to not have undefined behavior when the haystack is
|
||||
/// invalid UTF-8.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// It is undefined behavior to disable the UTF check in UTF matching mode
|
||||
/// and search a subject string that is not valid UTF-8. When the UTF check
|
||||
/// is disabled, callers must guarantee that the subject string is valid
|
||||
/// UTF-8.
|
||||
pub unsafe fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder {
|
||||
self.builder.disable_utf_check();
|
||||
/// Therefore, disabling the UTF-8 check is not something that is exposed
|
||||
/// by this crate.
|
||||
#[deprecated(
|
||||
since = "0.2.4",
|
||||
note = "now a no-op due to new PCRE2 features"
|
||||
)]
|
||||
pub fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
|
Reference in New Issue
Block a user