mirror of
				https://github.com/BurntSushi/ripgrep.git
				synced 2025-10-30 23:17:47 +02:00 
			
		
		
		
	printer: hand-roll decimal formatting
It seems like a trifle, but if the match frequency is high enough, the allocation+formatting of line numbers (and columns and byte offsets) starts to matter. We squash that part of the profile in this commit by doing our own decimal formatting. I speculate that we get a speed-up from this by avoiding the formatting machinery and also a possible allocation. An alternative would be to use the `itoa` crate, and it is indeed marginally faster in ad hoc benchmarks, but I'm satisfied enough with this solution.
This commit is contained in:
		| @@ -5,7 +5,7 @@ use { | ||||
|     termcolor::{HyperlinkSpec, WriteColor}, | ||||
| }; | ||||
|  | ||||
| use crate::hyperlink_aliases; | ||||
| use crate::{hyperlink_aliases, util::DecimalFormatter}; | ||||
|  | ||||
| /// Hyperlink configuration. | ||||
| /// | ||||
| @@ -484,11 +484,11 @@ impl Part { | ||||
|             ), | ||||
|             Part::Path => dest.extend_from_slice(&values.path.0), | ||||
|             Part::Line => { | ||||
|                 let line = values.line.unwrap_or(1).to_string(); | ||||
|                 let line = DecimalFormatter::new(values.line.unwrap_or(1)); | ||||
|                 dest.extend_from_slice(line.as_bytes()); | ||||
|             } | ||||
|             Part::Column => { | ||||
|                 let column = values.column.unwrap_or(1).to_string(); | ||||
|                 let column = DecimalFormatter::new(values.column.unwrap_or(1)); | ||||
|                 dest.extend_from_slice(column.as_bytes()); | ||||
|             } | ||||
|         } | ||||
|   | ||||
| @@ -24,7 +24,7 @@ use crate::{ | ||||
|     stats::Stats, | ||||
|     util::{ | ||||
|         find_iter_at_in_context, trim_ascii_prefix, trim_line_terminator, | ||||
|         PrinterPath, Replacer, Sunk, | ||||
|         DecimalFormatter, PrinterPath, Replacer, Sunk, | ||||
|     }, | ||||
| }; | ||||
|  | ||||
| @@ -1709,7 +1709,7 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { | ||||
|     fn write_line_number(&mut self, line: Option<u64>) -> io::Result<()> { | ||||
|         let Some(line_number) = line else { return Ok(()) }; | ||||
|         self.write_separator()?; | ||||
|         let n = line_number.to_string(); | ||||
|         let n = DecimalFormatter::new(line_number); | ||||
|         self.std.write_spec(self.config().colors.line(), n.as_bytes())?; | ||||
|         self.next_separator = PreludeSeparator::FieldSeparator; | ||||
|         Ok(()) | ||||
| @@ -1723,7 +1723,7 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { | ||||
|         } | ||||
|         let Some(column_number) = column else { return Ok(()) }; | ||||
|         self.write_separator()?; | ||||
|         let n = column_number.to_string(); | ||||
|         let n = DecimalFormatter::new(column_number); | ||||
|         self.std.write_spec(self.config().colors.column(), n.as_bytes())?; | ||||
|         self.next_separator = PreludeSeparator::FieldSeparator; | ||||
|         Ok(()) | ||||
| @@ -1736,7 +1736,7 @@ impl<'a, M: Matcher, W: WriteColor> PreludeWriter<'a, M, W> { | ||||
|             return Ok(()); | ||||
|         } | ||||
|         self.write_separator()?; | ||||
|         let n = offset.to_string(); | ||||
|         let n = DecimalFormatter::new(offset); | ||||
|         self.std.write_spec(self.config().colors.column(), n.as_bytes())?; | ||||
|         self.next_separator = PreludeSeparator::FieldSeparator; | ||||
|         Ok(()) | ||||
|   | ||||
| @@ -397,6 +397,47 @@ impl Serialize for NiceDuration { | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// A simple formatter for converting `u64` values to ASCII byte strings. | ||||
| /// | ||||
| /// This avoids going through the formatting machinery which seems to | ||||
| /// substantially slow things down. | ||||
| /// | ||||
| /// The `itoa` crate does the same thing as this formatter, but is a bit | ||||
| /// faster. We roll our own which is a bit slower, but gets us enough of a win | ||||
| /// to be satisfied with and with pure safe code. | ||||
| #[derive(Debug)] | ||||
| pub(crate) struct DecimalFormatter { | ||||
|     buf: [u8; Self::MAX_U64_LEN], | ||||
|     start: usize, | ||||
| } | ||||
|  | ||||
| impl DecimalFormatter { | ||||
|     /// Discovered via `u64::MAX.to_string().len()`. | ||||
|     const MAX_U64_LEN: usize = 20; | ||||
|  | ||||
|     /// Create a new decimal formatter for the given 64-bit unsigned integer. | ||||
|     pub(crate) fn new(mut n: u64) -> DecimalFormatter { | ||||
|         let mut buf = [0; Self::MAX_U64_LEN]; | ||||
|         let mut i = buf.len(); | ||||
|         loop { | ||||
|             i -= 1; | ||||
|  | ||||
|             let digit = u8::try_from(n % 10).unwrap(); | ||||
|             n /= 10; | ||||
|             buf[i] = b'0' + digit; | ||||
|             if n == 0 { | ||||
|                 break; | ||||
|             } | ||||
|         } | ||||
|         DecimalFormatter { buf, start: i } | ||||
|     } | ||||
|  | ||||
|     /// Return the decimal formatted as an ASCII byte string. | ||||
|     pub(crate) fn as_bytes(&self) -> &[u8] { | ||||
|         &self.buf[self.start..] | ||||
|     } | ||||
| } | ||||
|  | ||||
| /// Trim prefix ASCII spaces from the given slice and return the corresponding | ||||
| /// range. | ||||
| /// | ||||
| @@ -527,3 +568,22 @@ where | ||||
|     dst.extend(&bytes[last_match..end]); | ||||
|     Ok(()) | ||||
| } | ||||
|  | ||||
| #[cfg(test)] | ||||
| mod tests { | ||||
|     use super::*; | ||||
|  | ||||
|     #[test] | ||||
|     fn custom_decimal_format() { | ||||
|         let fmt = |n: u64| { | ||||
|             let bytes = DecimalFormatter::new(n).as_bytes().to_vec(); | ||||
|             String::from_utf8(bytes).unwrap() | ||||
|         }; | ||||
|         let std = |n: u64| n.to_string(); | ||||
|  | ||||
|         let ints = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 100, 123, u64::MAX]; | ||||
|         for n in ints { | ||||
|             assert_eq!(std(n), fmt(n)); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user