1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-03-17 20:28:03 +02:00

deps: update to pcre2 0.2.4

0.2.4 updates to PCRE2 10.42 and has a few other nice changes. For
example, when `utf` is enabled, the crate will always set the
PCRE2_MATCH_INVALID_UTF option. That means we no longer need to do
transcoding or UTF-8 validity checks.

Because of this, we actually get to remove one of the two uses of
`unsafe` in ripgrep's `main` program.

(This also updates a couple other dependencies for convenience.)
This commit is contained in:
Andrew Gallant 2023-06-18 13:07:48 -04:00
parent a775b493fd
commit 4b8aa91ae5
4 changed files with 23 additions and 57 deletions

16
Cargo.lock generated
View File

@ -80,9 +80,9 @@ dependencies = [
[[package]] [[package]]
name = "crossbeam-utils" name = "crossbeam-utils"
version = "0.8.15" version = "0.8.16"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b" checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
dependencies = [ dependencies = [
"cfg-if", "cfg-if",
] ]
@ -327,9 +327,9 @@ dependencies = [
[[package]] [[package]]
name = "pcre2" name = "pcre2"
version = "0.2.3" version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "85b30f2f69903b439dd9dc9e824119b82a55bf113b29af8d70948a03c1b11ab1" checksum = "486aca7e74edb8cab09a48d461177f450a5cca3b55e61d139f7552190e2bbcf5"
dependencies = [ dependencies = [
"libc", "libc",
"log", "log",
@ -339,9 +339,9 @@ dependencies = [
[[package]] [[package]]
name = "pcre2-sys" name = "pcre2-sys"
version = "0.2.5" version = "0.2.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dec30e5e9ec37eb8fbf1dea5989bc957fd3df56fbee5061aa7b7a99dbb37b722" checksum = "ae234f441970dbd52d4e29bee70f3b56ca83040081cb2b55b7df772b16e0b06e"
dependencies = [ dependencies = [
"cc", "cc",
"libc", "libc",
@ -457,9 +457,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_json" name = "serde_json"
version = "1.0.96" version = "1.0.97"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1" checksum = "bdf3bf93142acad5821c99197022e170842cdbc1c30482b98750c688c640842a"
dependencies = [ dependencies = [
"itoa", "itoa",
"ryu", "ryu",

View File

@ -472,24 +472,6 @@ enum EncodingMode {
Disabled, Disabled,
} }
impl EncodingMode {
/// Checks if an explicit encoding has been set. Returns false for
/// automatic BOM sniffing and no sniffing.
///
/// This is only used to determine whether PCRE2 needs to have its own
/// UTF-8 checking enabled. If we have an explicit encoding set, then
/// we're always guaranteed to get UTF-8, so we can disable PCRE2's check.
/// Otherwise, we have no such guarantee, and must enable PCRE2' UTF-8
/// check.
#[cfg(feature = "pcre2")]
fn has_explicit_encoding(&self) -> bool {
match self {
EncodingMode::Some(_) => true,
_ => false,
}
}
}
impl ArgMatches { impl ArgMatches {
/// Create an ArgMatches from clap's parse result. /// Create an ArgMatches from clap's parse result.
fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches { fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches {
@ -732,14 +714,6 @@ impl ArgMatches {
} }
if self.unicode() { if self.unicode() {
builder.utf(true).ucp(true); builder.utf(true).ucp(true);
if self.encoding()?.has_explicit_encoding() {
// SAFETY: If an encoding was specified, then we're guaranteed
// to get valid UTF-8, so we can disable PCRE2's UTF checking.
// (Feeding invalid UTF-8 to PCRE2 is undefined behavior.)
unsafe {
builder.disable_utf_check();
}
}
} }
if self.is_present("multiline") { if self.is_present("multiline") {
builder.dotall(self.is_present("multiline-dotall")); builder.dotall(self.is_present("multiline-dotall"));
@ -1080,7 +1054,6 @@ impl ArgMatches {
} }
let label = match self.value_of_lossy("encoding") { let label = match self.value_of_lossy("encoding") {
None if self.pcre2_unicode() => "utf-8".to_string(),
None => return Ok(EncodingMode::Auto), None => return Ok(EncodingMode::Auto),
Some(label) => label, Some(label) => label,
}; };
@ -1641,12 +1614,6 @@ impl ArgMatches {
!(self.is_present("no-unicode") || self.is_present("no-pcre2-unicode")) !(self.is_present("no-unicode") || self.is_present("no-pcre2-unicode"))
} }
/// Returns true if and only if PCRE2 is enabled and its Unicode mode is
/// enabled.
fn pcre2_unicode(&self) -> bool {
self.is_present("pcre2") && self.unicode()
}
/// Returns true if and only if file names containing each match should /// Returns true if and only if file names containing each match should
/// be emitted. /// be emitted.
fn with_filename(&self, paths: &[PathBuf]) -> bool { fn with_filename(&self, paths: &[PathBuf]) -> bool {

View File

@ -15,4 +15,4 @@ edition = "2018"
[dependencies] [dependencies]
grep-matcher = { version = "0.1.6", path = "../matcher" } grep-matcher = { version = "0.1.6", path = "../matcher" }
pcre2 = "0.2.3" pcre2 = "0.2.4"

View File

@ -178,23 +178,22 @@ impl RegexMatcherBuilder {
self self
} }
/// When UTF matching mode is enabled, this will disable the UTF checking /// This is now deprecated and is a no-op.
/// that PCRE2 will normally perform automatically. If UTF matching mode
/// is not enabled, then this has no effect.
/// ///
/// UTF checking is enabled by default when UTF matching mode is enabled. /// Previously, this option permitted disabling PCRE2's UTF-8 validity
/// If UTF matching mode is enabled and UTF checking is enabled, then PCRE2 /// check, which could result in undefined behavior if the haystack was
/// will return an error if you attempt to search a subject string that is /// not valid UTF-8. But PCRE2 introduced a new option, `PCRE2_MATCH_INVALID_UTF`,
/// not valid UTF-8. /// in 10.34 which this crate always sets. When this option is enabled,
/// PCRE2 claims to not have undefined behavior when the haystack is
/// invalid UTF-8.
/// ///
/// # Safety /// Therefore, disabling the UTF-8 check is not something that is exposed
/// /// by this crate.
/// It is undefined behavior to disable the UTF check in UTF matching mode #[deprecated(
/// and search a subject string that is not valid UTF-8. When the UTF check since = "0.2.4",
/// is disabled, callers must guarantee that the subject string is valid note = "now a no-op due to new PCRE2 features"
/// UTF-8. )]
pub unsafe fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder { pub fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder {
self.builder.disable_utf_check();
self self
} }