mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-03-17 20:28:03 +02:00
deps: update to pcre2 0.2.4
0.2.4 updates to PCRE2 10.42 and has a few other nice changes. For example, when `utf` is enabled, the crate will always set the PCRE2_MATCH_INVALID_UTF option. That means we no longer need to do transcoding or UTF-8 validity checks. Because of this, we actually get to remove one of the two uses of `unsafe` in ripgrep's `main` program. (This also updates a couple other dependencies for convenience.)
This commit is contained in:
parent
a775b493fd
commit
4b8aa91ae5
16
Cargo.lock
generated
16
Cargo.lock
generated
@ -80,9 +80,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.15"
|
||||
version = "0.8.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c063cd8cc95f5c377ed0d4b49a4b21f632396ff690e8470c29b3359b346984b"
|
||||
checksum = "5a22b2d63d4d1dc0b7f1b6b2747dd0088008a9be28b6ddf0b1e7d335e3037294"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
@ -327,9 +327,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pcre2"
|
||||
version = "0.2.3"
|
||||
version = "0.2.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "85b30f2f69903b439dd9dc9e824119b82a55bf113b29af8d70948a03c1b11ab1"
|
||||
checksum = "486aca7e74edb8cab09a48d461177f450a5cca3b55e61d139f7552190e2bbcf5"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"log",
|
||||
@ -339,9 +339,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "pcre2-sys"
|
||||
version = "0.2.5"
|
||||
version = "0.2.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "dec30e5e9ec37eb8fbf1dea5989bc957fd3df56fbee5061aa7b7a99dbb37b722"
|
||||
checksum = "ae234f441970dbd52d4e29bee70f3b56ca83040081cb2b55b7df772b16e0b06e"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"libc",
|
||||
@ -457,9 +457,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "serde_json"
|
||||
version = "1.0.96"
|
||||
version = "1.0.97"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "057d394a50403bcac12672b2b18fb387ab6d289d957dab67dd201875391e52f1"
|
||||
checksum = "bdf3bf93142acad5821c99197022e170842cdbc1c30482b98750c688c640842a"
|
||||
dependencies = [
|
||||
"itoa",
|
||||
"ryu",
|
||||
|
@ -472,24 +472,6 @@ enum EncodingMode {
|
||||
Disabled,
|
||||
}
|
||||
|
||||
impl EncodingMode {
|
||||
/// Checks if an explicit encoding has been set. Returns false for
|
||||
/// automatic BOM sniffing and no sniffing.
|
||||
///
|
||||
/// This is only used to determine whether PCRE2 needs to have its own
|
||||
/// UTF-8 checking enabled. If we have an explicit encoding set, then
|
||||
/// we're always guaranteed to get UTF-8, so we can disable PCRE2's check.
|
||||
/// Otherwise, we have no such guarantee, and must enable PCRE2' UTF-8
|
||||
/// check.
|
||||
#[cfg(feature = "pcre2")]
|
||||
fn has_explicit_encoding(&self) -> bool {
|
||||
match self {
|
||||
EncodingMode::Some(_) => true,
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl ArgMatches {
|
||||
/// Create an ArgMatches from clap's parse result.
|
||||
fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches {
|
||||
@ -732,14 +714,6 @@ impl ArgMatches {
|
||||
}
|
||||
if self.unicode() {
|
||||
builder.utf(true).ucp(true);
|
||||
if self.encoding()?.has_explicit_encoding() {
|
||||
// SAFETY: If an encoding was specified, then we're guaranteed
|
||||
// to get valid UTF-8, so we can disable PCRE2's UTF checking.
|
||||
// (Feeding invalid UTF-8 to PCRE2 is undefined behavior.)
|
||||
unsafe {
|
||||
builder.disable_utf_check();
|
||||
}
|
||||
}
|
||||
}
|
||||
if self.is_present("multiline") {
|
||||
builder.dotall(self.is_present("multiline-dotall"));
|
||||
@ -1080,7 +1054,6 @@ impl ArgMatches {
|
||||
}
|
||||
|
||||
let label = match self.value_of_lossy("encoding") {
|
||||
None if self.pcre2_unicode() => "utf-8".to_string(),
|
||||
None => return Ok(EncodingMode::Auto),
|
||||
Some(label) => label,
|
||||
};
|
||||
@ -1641,12 +1614,6 @@ impl ArgMatches {
|
||||
!(self.is_present("no-unicode") || self.is_present("no-pcre2-unicode"))
|
||||
}
|
||||
|
||||
/// Returns true if and only if PCRE2 is enabled and its Unicode mode is
|
||||
/// enabled.
|
||||
fn pcre2_unicode(&self) -> bool {
|
||||
self.is_present("pcre2") && self.unicode()
|
||||
}
|
||||
|
||||
/// Returns true if and only if file names containing each match should
|
||||
/// be emitted.
|
||||
fn with_filename(&self, paths: &[PathBuf]) -> bool {
|
||||
|
@ -15,4 +15,4 @@ edition = "2018"
|
||||
|
||||
[dependencies]
|
||||
grep-matcher = { version = "0.1.6", path = "../matcher" }
|
||||
pcre2 = "0.2.3"
|
||||
pcre2 = "0.2.4"
|
||||
|
@ -178,23 +178,22 @@ impl RegexMatcherBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// When UTF matching mode is enabled, this will disable the UTF checking
|
||||
/// that PCRE2 will normally perform automatically. If UTF matching mode
|
||||
/// is not enabled, then this has no effect.
|
||||
/// This is now deprecated and is a no-op.
|
||||
///
|
||||
/// UTF checking is enabled by default when UTF matching mode is enabled.
|
||||
/// If UTF matching mode is enabled and UTF checking is enabled, then PCRE2
|
||||
/// will return an error if you attempt to search a subject string that is
|
||||
/// not valid UTF-8.
|
||||
/// Previously, this option permitted disabling PCRE2's UTF-8 validity
|
||||
/// check, which could result in undefined behavior if the haystack was
|
||||
/// not valid UTF-8. But PCRE2 introduced a new option, `PCRE2_MATCH_INVALID_UTF`,
|
||||
/// in 10.34 which this crate always sets. When this option is enabled,
|
||||
/// PCRE2 claims to not have undefined behavior when the haystack is
|
||||
/// invalid UTF-8.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// It is undefined behavior to disable the UTF check in UTF matching mode
|
||||
/// and search a subject string that is not valid UTF-8. When the UTF check
|
||||
/// is disabled, callers must guarantee that the subject string is valid
|
||||
/// UTF-8.
|
||||
pub unsafe fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder {
|
||||
self.builder.disable_utf_check();
|
||||
/// Therefore, disabling the UTF-8 check is not something that is exposed
|
||||
/// by this crate.
|
||||
#[deprecated(
|
||||
since = "0.2.4",
|
||||
note = "now a no-op due to new PCRE2 features"
|
||||
)]
|
||||
pub fn disable_utf_check(&mut self) -> &mut RegexMatcherBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user