1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-02-04 06:08:39 +02:00

matcher: add a bunch of inline annotations

Many of these functions should be inlineable, but I'm not 100% sure
that they can be inlined without these annotations. We don't want to
force things, but we do try and nudge the compiler in the right
direction.
This commit is contained in:
Andrew Gallant 2023-10-08 14:48:29 -04:00
parent 1659fb9b43
commit b9de003f81
2 changed files with 66 additions and 0 deletions

View File

@ -10,6 +10,7 @@ use memchr::memchr;
/// of a capture group reference and is expected to resolve the index to its
/// corresponding matched text. If no such match exists, then `append` should
/// not write anything to its given buffer.
#[inline]
pub fn interpolate<A, N>(
mut replacement: &[u8],
mut append: A,
@ -75,12 +76,14 @@ enum Ref<'a> {
}
impl<'a> From<&'a str> for Ref<'a> {
#[inline]
fn from(x: &'a str) -> Ref<'a> {
Ref::Named(x)
}
}
impl From<usize> for Ref<'static> {
#[inline]
fn from(x: usize) -> Ref<'static> {
Ref::Number(x)
}
@ -90,6 +93,7 @@ impl From<usize> for Ref<'static> {
/// starting at the beginning of `replacement`.
///
/// If no such valid reference could be found, None is returned.
#[inline]
fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
let mut i = 0;
if replacement.len() <= 1 || replacement[0] != b'$' {
@ -130,6 +134,7 @@ fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
}
/// Returns true if and only if the given byte is allowed in a capture name.
#[inline]
fn is_valid_cap_letter(b: &u8) -> bool {
match *b {
b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true,

View File

@ -304,12 +304,14 @@ impl std::fmt::Debug for BitSet {
impl ByteSet {
/// Create an empty set of bytes.
#[inline]
pub fn empty() -> ByteSet {
ByteSet(BitSet([0; 4]))
}
/// Create a full set of bytes such that every possible byte is in the set
/// returned.
#[inline]
pub fn full() -> ByteSet {
ByteSet(BitSet([u64::MAX; 4]))
}
@ -317,6 +319,7 @@ impl ByteSet {
/// Add a byte to this set.
///
/// If the given byte already belongs to this set, then this is a no-op.
#[inline]
pub fn add(&mut self, byte: u8) {
let bucket = byte / 64;
let bit = byte % 64;
@ -324,6 +327,7 @@ impl ByteSet {
}
/// Add an inclusive range of bytes.
#[inline]
pub fn add_all(&mut self, start: u8, end: u8) {
for b in start..=end {
self.add(b);
@ -333,6 +337,7 @@ impl ByteSet {
/// Remove a byte from this set.
///
/// If the given byte is not in this set, then this is a no-op.
#[inline]
pub fn remove(&mut self, byte: u8) {
let bucket = byte / 64;
let bit = byte % 64;
@ -340,6 +345,7 @@ impl ByteSet {
}
/// Remove an inclusive range of bytes.
#[inline]
pub fn remove_all(&mut self, start: u8, end: u8) {
for b in start..=end {
self.remove(b);
@ -347,6 +353,7 @@ impl ByteSet {
}
/// Return true if and only if the given byte is in this set.
#[inline]
pub fn contains(&self, byte: u8) -> bool {
let bucket = byte / 64;
let bit = byte % 64;
@ -387,6 +394,7 @@ pub trait Captures {
///
/// Note that capturing groups that have non-zero length but otherwise
/// contain no matching groups are *not* empty.
#[inline]
fn is_empty(&self) -> bool {
self.len() == 0
}
@ -420,6 +428,7 @@ pub trait Captures {
/// the given `haystack`. Generally, this means that `haystack` should be
/// the same slice that was searched to get the current capture group
/// matches.
#[inline]
fn interpolate<F>(
&self,
name_to_index: F,
@ -451,15 +460,19 @@ pub struct NoCaptures(());
impl NoCaptures {
/// Create an empty set of capturing groups.
#[inline]
pub fn new() -> NoCaptures {
NoCaptures(())
}
}
impl Captures for NoCaptures {
#[inline]
fn len(&self) -> usize {
0
}
#[inline]
fn get(&self, _: usize) -> Option<Match> {
None
}
@ -571,6 +584,7 @@ pub trait Matcher {
///
/// By default, capturing groups are not supported, so this always
/// returns 0.
#[inline]
fn capture_count(&self) -> usize {
0
}
@ -584,6 +598,7 @@ pub trait Matcher {
///
/// By default, capturing groups are not supported, so this always returns
/// `None`.
#[inline]
fn capture_index(&self, _name: &str) -> Option<usize> {
None
}
@ -593,6 +608,7 @@ pub trait Matcher {
///
/// The text encoding of `haystack` is not strictly specified. Matchers are
/// advised to assume UTF-8, or at worst, some ASCII compatible encoding.
#[inline]
fn find(&self, haystack: &[u8]) -> Result<Option<Match>, Self::Error> {
self.find_at(haystack, 0)
}
@ -600,6 +616,7 @@ pub trait Matcher {
/// Executes the given function over successive non-overlapping matches
/// in `haystack`. If no match exists, then the given function is never
/// called. If the function returns `false`, then iteration stops.
#[inline]
fn find_iter<F>(
&self,
haystack: &[u8],
@ -618,6 +635,7 @@ pub trait Matcher {
/// The significance of the starting point is that it takes the surrounding
/// context into consideration. For example, the `\A` anchor can only
/// match when `at == 0`.
#[inline]
fn find_iter_at<F>(
&self,
haystack: &[u8],
@ -638,6 +656,7 @@ pub trait Matcher {
/// the error is yielded. If an error occurs while executing the search,
/// then it is converted to
/// `E`.
#[inline]
fn try_find_iter<F, E>(
&self,
haystack: &[u8],
@ -660,6 +679,7 @@ pub trait Matcher {
/// The significance of the starting point is that it takes the surrounding
/// context into consideration. For example, the `\A` anchor can only
/// match when `at == 0`.
#[inline]
fn try_find_iter_at<F, E>(
&self,
haystack: &[u8],
@ -707,6 +727,7 @@ pub trait Matcher {
///
/// The text encoding of `haystack` is not strictly specified. Matchers are
/// advised to assume UTF-8, or at worst, some ASCII compatible encoding.
#[inline]
fn captures(
&self,
haystack: &[u8],
@ -719,6 +740,7 @@ pub trait Matcher {
/// in `haystack` with capture groups extracted from each match. If no
/// match exists, then the given function is never called. If the function
/// returns `false`, then iteration stops.
#[inline]
fn captures_iter<F>(
&self,
haystack: &[u8],
@ -739,6 +761,7 @@ pub trait Matcher {
/// The significance of the starting point is that it takes the surrounding
/// context into consideration. For example, the `\A` anchor can only
/// match when `at == 0`.
#[inline]
fn captures_iter_at<F>(
&self,
haystack: &[u8],
@ -760,6 +783,7 @@ pub trait Matcher {
/// returns an error then iteration stops and the error is yielded. If
/// an error occurs while executing the search, then it is converted to
/// `E`.
#[inline]
fn try_captures_iter<F, E>(
&self,
haystack: &[u8],
@ -783,6 +807,7 @@ pub trait Matcher {
/// The significance of the starting point is that it takes the surrounding
/// context into consideration. For example, the `\A` anchor can only
/// match when `at == 0`.
#[inline]
fn try_captures_iter_at<F, E>(
&self,
haystack: &[u8],
@ -849,6 +874,7 @@ pub trait Matcher {
/// Note that if implementors seek to support capturing groups, then they
/// should implement this method. Other methods that match based on
/// captures will then work automatically.
#[inline]
fn captures_at(
&self,
_haystack: &[u8],
@ -863,6 +889,7 @@ pub trait Matcher {
/// a handle to the `dst` buffer provided.
///
/// If the given `append` function returns `false`, then replacement stops.
#[inline]
fn replace<F>(
&self,
haystack: &[u8],
@ -886,6 +913,7 @@ pub trait Matcher {
/// `append` with the matching capture groups.
///
/// If the given `append` function returns `false`, then replacement stops.
#[inline]
fn replace_with_captures<F>(
&self,
haystack: &[u8],
@ -907,6 +935,7 @@ pub trait Matcher {
/// The significance of the starting point is that it takes the surrounding
/// context into consideration. For example, the `\A` anchor can only
/// match when `at == 0`.
#[inline]
fn replace_with_captures_at<F>(
&self,
haystack: &[u8],
@ -932,6 +961,7 @@ pub trait Matcher {
/// Returns true if and only if the matcher matches the given haystack.
///
/// By default, this method is implemented by calling `shortest_match`.
#[inline]
fn is_match(&self, haystack: &[u8]) -> Result<bool, Self::Error> {
self.is_match_at(haystack, 0)
}
@ -944,6 +974,7 @@ pub trait Matcher {
/// The significance of the starting point is that it takes the surrounding
/// context into consideration. For example, the `\A` anchor can only
/// match when `at == 0`.
#[inline]
fn is_match_at(
&self,
haystack: &[u8],
@ -966,6 +997,7 @@ pub trait Matcher {
/// a faster implementation of this than what `find` does.
///
/// By default, this method is implemented by calling `find`.
#[inline]
fn shortest_match(
&self,
haystack: &[u8],
@ -991,6 +1023,7 @@ pub trait Matcher {
/// The significance of the starting point is that it takes the surrounding
/// context into consideration. For example, the `\A` anchor can only
/// match when `at == 0`.
#[inline]
fn shortest_match_at(
&self,
haystack: &[u8],
@ -1019,6 +1052,7 @@ pub trait Matcher {
/// exists with that byte.
///
/// By default, this returns `None`.
#[inline]
fn non_matching_bytes(&self) -> Option<&ByteSet> {
None
}
@ -1035,6 +1069,7 @@ pub trait Matcher {
/// `CRLF`.
///
/// By default, this returns `None`.
#[inline]
fn line_terminator(&self) -> Option<LineTerminator> {
None
}
@ -1077,6 +1112,7 @@ pub trait Matcher {
/// Note that while this method may report false positives, it must never
/// report false negatives. That is, it can never skip over lines that
/// contain a match.
#[inline]
fn find_candidate_line(
&self,
haystack: &[u8],
@ -1089,6 +1125,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
type Captures = M::Captures;
type Error = M::Error;
#[inline]
fn find_at(
&self,
haystack: &[u8],
@ -1097,10 +1134,12 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).find_at(haystack, at)
}
#[inline]
fn new_captures(&self) -> Result<Self::Captures, Self::Error> {
(*self).new_captures()
}
#[inline]
fn captures_at(
&self,
haystack: &[u8],
@ -1110,18 +1149,22 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).captures_at(haystack, at, caps)
}
#[inline]
fn capture_index(&self, name: &str) -> Option<usize> {
(*self).capture_index(name)
}
#[inline]
fn capture_count(&self) -> usize {
(*self).capture_count()
}
#[inline]
fn find(&self, haystack: &[u8]) -> Result<Option<Match>, Self::Error> {
(*self).find(haystack)
}
#[inline]
fn find_iter<F>(
&self,
haystack: &[u8],
@ -1133,6 +1176,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).find_iter(haystack, matched)
}
#[inline]
fn find_iter_at<F>(
&self,
haystack: &[u8],
@ -1145,6 +1189,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).find_iter_at(haystack, at, matched)
}
#[inline]
fn try_find_iter<F, E>(
&self,
haystack: &[u8],
@ -1156,6 +1201,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).try_find_iter(haystack, matched)
}
#[inline]
fn try_find_iter_at<F, E>(
&self,
haystack: &[u8],
@ -1168,6 +1214,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).try_find_iter_at(haystack, at, matched)
}
#[inline]
fn captures(
&self,
haystack: &[u8],
@ -1176,6 +1223,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).captures(haystack, caps)
}
#[inline]
fn captures_iter<F>(
&self,
haystack: &[u8],
@ -1188,6 +1236,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).captures_iter(haystack, caps, matched)
}
#[inline]
fn captures_iter_at<F>(
&self,
haystack: &[u8],
@ -1201,6 +1250,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).captures_iter_at(haystack, at, caps, matched)
}
#[inline]
fn try_captures_iter<F, E>(
&self,
haystack: &[u8],
@ -1213,6 +1263,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).try_captures_iter(haystack, caps, matched)
}
#[inline]
fn try_captures_iter_at<F, E>(
&self,
haystack: &[u8],
@ -1226,6 +1277,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).try_captures_iter_at(haystack, at, caps, matched)
}
#[inline]
fn replace<F>(
&self,
haystack: &[u8],
@ -1238,6 +1290,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).replace(haystack, dst, append)
}
#[inline]
fn replace_with_captures<F>(
&self,
haystack: &[u8],
@ -1251,6 +1304,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).replace_with_captures(haystack, caps, dst, append)
}
#[inline]
fn replace_with_captures_at<F>(
&self,
haystack: &[u8],
@ -1265,10 +1319,12 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).replace_with_captures_at(haystack, at, caps, dst, append)
}
#[inline]
fn is_match(&self, haystack: &[u8]) -> Result<bool, Self::Error> {
(*self).is_match(haystack)
}
#[inline]
fn is_match_at(
&self,
haystack: &[u8],
@ -1277,6 +1333,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).is_match_at(haystack, at)
}
#[inline]
fn shortest_match(
&self,
haystack: &[u8],
@ -1284,6 +1341,7 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).shortest_match(haystack)
}
#[inline]
fn shortest_match_at(
&self,
haystack: &[u8],
@ -1292,14 +1350,17 @@ impl<'a, M: Matcher> Matcher for &'a M {
(*self).shortest_match_at(haystack, at)
}
#[inline]
fn non_matching_bytes(&self) -> Option<&ByteSet> {
(*self).non_matching_bytes()
}
#[inline]
fn line_terminator(&self) -> Option<LineTerminator> {
(*self).line_terminator()
}
#[inline]
fn find_candidate_line(
&self,
haystack: &[u8],