1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-01-19 05:49:14 +02:00

printer: hand-roll decimal formatting

It seems like a trifle, but if the match frequency is high enough, the
allocation+formatting of line numbers (and columns and byte offsets)
starts to matter. We squash that part of the profile in this commit by
doing our own decimal formatting. I speculate that we get a speed-up
from this by avoiding the formatting machinery and also a possible
allocation.

An alternative would be to use the `itoa` crate, and it is indeed
marginally faster in ad hoc benchmarks, but I'm satisfied enough with
this solution.
This commit is contained in:
Andrew Gallant 2023-09-30 14:16:51 -04:00
parent dd1bc5b898
commit 1659fb9b43
3 changed files with 67 additions and 7 deletions

View File

@ -5,7 +5,7 @@ use {
termcolor::{HyperlinkSpec, WriteColor},
};
use crate::hyperlink_aliases;
use crate::{hyperlink_aliases, util::DecimalFormatter};
/// Hyperlink configuration.
///
@ -484,11 +484,11 @@ impl Part {
),
Part::Path => dest.extend_from_slice(&values.path.0),
Part::Line => {
let line = values.line.unwrap_or(1).to_string();
let line = DecimalFormatter::new(values.line.unwrap_or(1));
dest.extend_from_slice(line.as_bytes());
}
Part::Column => {
let column = values.column.unwrap_or(1).to_string();
let column = DecimalFormatter::new(values.column.unwrap_or(1));
dest.extend_from_slice(column.as_bytes());
}
}

View File

@ -24,7 +24,7 @@ use crate::{
stats::Stats,
util::{
find_iter_at_in_context, trim_ascii_prefix, trim_line_terminator,
PrinterPath, Replacer, Sunk,
DecimalFormatter, PrinterPath, Replacer, Sunk,
},
};
@ -1709,7 +1709,7 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> {
fn write_line_number(&mut self, line: Option<u64>) -> io::Result<()> {
let Some(line_number) = line else { return Ok(()) };
self.write_separator()?;
let n = line_number.to_string();
let n = DecimalFormatter::new(line_number);
self.std.write_spec(self.config().colors.line(), n.as_bytes())?;
self.next_separator = PreludeSeparator::FieldSeparator;
Ok(())
@ -1723,7 +1723,7 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> {
}
let Some(column_number) = column else { return Ok(()) };
self.write_separator()?;
let n = column_number.to_string();
let n = DecimalFormatter::new(column_number);
self.std.write_spec(self.config().colors.column(), n.as_bytes())?;
self.next_separator = PreludeSeparator::FieldSeparator;
Ok(())
@ -1736,7 +1736,7 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> {
return Ok(());
}
self.write_separator()?;
let n = offset.to_string();
let n = DecimalFormatter::new(offset);
self.std.write_spec(self.config().colors.column(), n.as_bytes())?;
self.next_separator = PreludeSeparator::FieldSeparator;
Ok(())

View File

@ -397,6 +397,47 @@ impl Serialize for NiceDuration {
}
}
/// A simple formatter for converting `u64` values to ASCII byte strings.
///
/// This avoids going through the formatting machinery which seems to
/// substantially slow things down.
///
/// The `itoa` crate does the same thing as this formatter, but is a bit
/// faster. We roll our own which is a bit slower, but gets us enough of a win
/// to be satisfied with and with pure safe code.
#[derive(Debug)]
pub(crate) struct DecimalFormatter {
buf: [u8; Self::MAX_U64_LEN],
start: usize,
}
impl DecimalFormatter {
/// Discovered via `u64::MAX.to_string().len()`.
const MAX_U64_LEN: usize = 20;
/// Create a new decimal formatter for the given 64-bit unsigned integer.
pub(crate) fn new(mut n: u64) -> DecimalFormatter {
let mut buf = [0; Self::MAX_U64_LEN];
let mut i = buf.len();
loop {
i -= 1;
let digit = u8::try_from(n % 10).unwrap();
n /= 10;
buf[i] = b'0' + digit;
if n == 0 {
break;
}
}
DecimalFormatter { buf, start: i }
}
/// Return the decimal formatted as an ASCII byte string.
pub(crate) fn as_bytes(&self) -> &[u8] {
&self.buf[self.start..]
}
}
/// Trim prefix ASCII spaces from the given slice and return the corresponding
/// range.
///
@ -527,3 +568,22 @@ where
dst.extend(&bytes[last_match..end]);
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn custom_decimal_format() {
let fmt = |n: u64| {
let bytes = DecimalFormatter::new(n).as_bytes().to_vec();
String::from_utf8(bytes).unwrap()
};
let std = |n: u64| n.to_string();
let ints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 100, 123, u64::MAX];
for n in ints {
assert_eq!(std(n), fmt(n));
}
}
}