mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2024-12-07 11:13:17 +02:00
globset: optimize character escaping
Rewrites the char_to_escaped_literal and bytes_to_escaped_literal functions in a way that minimizes heap allocations. After this, the resulting string is the only allocation remaining. I believe when this code was originally written, the routines available to avoid heap allocations didn't exist. I'm skeptical that this matters in the grand scheme of things, but I think this is still worth doing for "good sense" reasons. PR #2833
This commit is contained in:
parent
dec0dc3196
commit
c9ebcbd8ab
@ -1,3 +1,4 @@
|
|||||||
|
use std::fmt::Write;
|
||||||
use std::path::{is_separator, Path};
|
use std::path::{is_separator, Path};
|
||||||
|
|
||||||
use regex_automata::meta::Regex;
|
use regex_automata::meta::Regex;
|
||||||
@ -732,7 +733,9 @@ impl Tokens {
|
|||||||
/// Convert a Unicode scalar value to an escaped string suitable for use as
|
/// Convert a Unicode scalar value to an escaped string suitable for use as
|
||||||
/// a literal in a non-Unicode regex.
|
/// a literal in a non-Unicode regex.
|
||||||
fn char_to_escaped_literal(c: char) -> String {
|
fn char_to_escaped_literal(c: char) -> String {
|
||||||
bytes_to_escaped_literal(&c.to_string().into_bytes())
|
let mut buf = [0; 4];
|
||||||
|
let bytes = c.encode_utf8(&mut buf).as_bytes();
|
||||||
|
bytes_to_escaped_literal(bytes)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
|
/// Converts an arbitrary sequence of bytes to a UTF-8 string. All non-ASCII
|
||||||
@ -741,11 +744,12 @@ fn bytes_to_escaped_literal(bs: &[u8]) -> String {
|
|||||||
let mut s = String::with_capacity(bs.len());
|
let mut s = String::with_capacity(bs.len());
|
||||||
for &b in bs {
|
for &b in bs {
|
||||||
if b <= 0x7F {
|
if b <= 0x7F {
|
||||||
s.push_str(®ex_syntax::escape(
|
regex_syntax::escape_into(
|
||||||
char::from(b).encode_utf8(&mut [0; 4]),
|
char::from(b).encode_utf8(&mut [0; 4]),
|
||||||
));
|
&mut s,
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
s.push_str(&format!("\\x{:02x}", b));
|
write!(&mut s, "\\x{:02x}", b).unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s
|
s
|
||||||
|
Loading…
Reference in New Issue
Block a user