1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-04-19 09:02:15 +02:00

matcher: polish the grep-matcher crate

Not much here. Just updating to reflect my current style and bringing
the crate to the 2021 edition.
This commit is contained in:
Andrew Gallant 2023-09-25 16:48:25 -04:00
parent abfa65c2c1
commit 96f01b92a0
5 changed files with 41 additions and 49 deletions

View File

@ -12,13 +12,13 @@ readme = "README.md"
keywords = ["regex", "pattern", "trait"] keywords = ["regex", "pattern", "trait"]
license = "Unlicense OR MIT" license = "Unlicense OR MIT"
autotests = false autotests = false
edition = "2018" edition = "2021"
[dependencies] [dependencies]
memchr = "2.1" memchr = "2.6.3"
[dev-dependencies] [dev-dependencies]
regex = "1.1" regex = "1.9.5"
[[test]] [[test]]
name = "integration" name = "integration"

View File

@ -1,5 +1,3 @@
use std::str;
use memchr::memchr; use memchr::memchr;
/// Interpolate capture references in `replacement` and write the interpolation /// Interpolate capture references in `replacement` and write the interpolation
@ -114,7 +112,7 @@ fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
// therefore be valid UTF-8. If we really cared, we could avoid this UTF-8 // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
// check with an unchecked conversion or by parsing the number straight // check with an unchecked conversion or by parsing the number straight
// from &[u8]. // from &[u8].
let cap = str::from_utf8(&replacement[i..cap_end]) let cap = std::str::from_utf8(&replacement[i..cap_end])
.expect("valid UTF-8 capture name"); .expect("valid UTF-8 capture name");
if brace { if brace {
if !replacement.get(cap_end).map_or(false, |&b| b == b'}') { if !replacement.get(cap_end).map_or(false, |&b| b == b'}') {

View File

@ -38,11 +38,6 @@ implementations.
#![deny(missing_docs)] #![deny(missing_docs)]
use std::fmt;
use std::io;
use std::ops;
use std::u64;
use crate::interpolate::interpolate; use crate::interpolate::interpolate;
mod interpolate; mod interpolate;
@ -162,7 +157,7 @@ impl Match {
} }
} }
impl ops::Index<Match> for [u8] { impl std::ops::Index<Match> for [u8] {
type Output = [u8]; type Output = [u8];
#[inline] #[inline]
@ -171,14 +166,14 @@ impl ops::Index<Match> for [u8] {
} }
} }
impl ops::IndexMut<Match> for [u8] { impl std::ops::IndexMut<Match> for [u8] {
#[inline] #[inline]
fn index_mut(&mut self, index: Match) -> &mut [u8] { fn index_mut(&mut self, index: Match) -> &mut [u8] {
&mut self[index.start..index.end] &mut self[index.start..index.end]
} }
} }
impl ops::Index<Match> for str { impl std::ops::Index<Match> for str {
type Output = str; type Output = str;
#[inline] #[inline]
@ -204,11 +199,7 @@ pub struct LineTerminator(LineTerminatorImp);
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)] #[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
enum LineTerminatorImp { enum LineTerminatorImp {
/// Any single byte representing a line terminator. /// Any single byte representing a line terminator.
/// Byte(u8),
/// We represent this as an array so we can safely convert it to a slice
/// for convenient access. At some point, we can use `std::slice::from_ref`
/// instead.
Byte([u8; 1]),
/// A line terminator represented by `\r\n`. /// A line terminator represented by `\r\n`.
/// ///
/// When this option is used, consumers may generally treat a lone `\n` as /// When this option is used, consumers may generally treat a lone `\n` as
@ -220,7 +211,7 @@ impl LineTerminator {
/// Return a new single-byte line terminator. Any byte is valid. /// Return a new single-byte line terminator. Any byte is valid.
#[inline] #[inline]
pub fn byte(byte: u8) -> LineTerminator { pub fn byte(byte: u8) -> LineTerminator {
LineTerminator(LineTerminatorImp::Byte([byte])) LineTerminator(LineTerminatorImp::Byte(byte))
} }
/// Return a new line terminator represented by `\r\n`. /// Return a new line terminator represented by `\r\n`.
@ -246,7 +237,7 @@ impl LineTerminator {
#[inline] #[inline]
pub fn as_byte(&self) -> u8 { pub fn as_byte(&self) -> u8 {
match self.0 { match self.0 {
LineTerminatorImp::Byte(array) => array[0], LineTerminatorImp::Byte(byte) => byte,
LineTerminatorImp::CRLF => b'\n', LineTerminatorImp::CRLF => b'\n',
} }
} }
@ -260,7 +251,7 @@ impl LineTerminator {
#[inline] #[inline]
pub fn as_bytes(&self) -> &[u8] { pub fn as_bytes(&self) -> &[u8] {
match self.0 { match self.0 {
LineTerminatorImp::Byte(ref array) => array, LineTerminatorImp::Byte(ref byte) => std::slice::from_ref(byte),
LineTerminatorImp::CRLF => &[b'\r', b'\n'], LineTerminatorImp::CRLF => &[b'\r', b'\n'],
} }
} }
@ -301,10 +292,10 @@ pub struct ByteSet(BitSet);
#[derive(Clone, Copy)] #[derive(Clone, Copy)]
struct BitSet([u64; 4]); struct BitSet([u64; 4]);
impl fmt::Debug for BitSet { impl std::fmt::Debug for BitSet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut fmtd = f.debug_set(); let mut fmtd = f.debug_set();
for b in (0..256).map(|b| b as u8) { for b in 0..=255 {
if ByteSet(*self).contains(b) { if ByteSet(*self).contains(b) {
fmtd.entry(&b); fmtd.entry(&b);
} }
@ -331,12 +322,12 @@ impl ByteSet {
pub fn add(&mut self, byte: u8) { pub fn add(&mut self, byte: u8) {
let bucket = byte / 64; let bucket = byte / 64;
let bit = byte % 64; let bit = byte % 64;
(self.0).0[bucket as usize] |= 1 << bit; (self.0).0[usize::from(bucket)] |= 1 << bit;
} }
/// Add an inclusive range of bytes. /// Add an inclusive range of bytes.
pub fn add_all(&mut self, start: u8, end: u8) { pub fn add_all(&mut self, start: u8, end: u8) {
for b in (start as u64..end as u64 + 1).map(|b| b as u8) { for b in start..=end {
self.add(b); self.add(b);
} }
} }
@ -347,12 +338,12 @@ impl ByteSet {
pub fn remove(&mut self, byte: u8) { pub fn remove(&mut self, byte: u8) {
let bucket = byte / 64; let bucket = byte / 64;
let bit = byte % 64; let bit = byte % 64;
(self.0).0[bucket as usize] &= !(1 << bit); (self.0).0[usize::from(bucket)] &= !(1 << bit);
} }
/// Remove an inclusive range of bytes. /// Remove an inclusive range of bytes.
pub fn remove_all(&mut self, start: u8, end: u8) { pub fn remove_all(&mut self, start: u8, end: u8) {
for b in (start as u64..end as u64 + 1).map(|b| b as u8) { for b in start..=end {
self.remove(b); self.remove(b);
} }
} }
@ -361,7 +352,7 @@ impl ByteSet {
pub fn contains(&self, byte: u8) -> bool { pub fn contains(&self, byte: u8) -> bool {
let bucket = byte / 64; let bucket = byte / 64;
let bit = byte % 64; let bit = byte % 64;
(self.0).0[bucket as usize] & (1 << bit) > 0 (self.0).0[usize::from(bucket)] & (1 << bit) > 0
} }
} }
@ -478,27 +469,27 @@ impl Captures for NoCaptures {
/// NoError provides an error type for matchers that never produce errors. /// NoError provides an error type for matchers that never produce errors.
/// ///
/// This error type implements the `std::error::Error` and `fmt::Display` /// This error type implements the `std::error::Error` and `std::fmt::Display`
/// traits for use in matcher implementations that can never produce errors. /// traits for use in matcher implementations that can never produce errors.
/// ///
/// The `fmt::Debug` and `fmt::Display` impls for this type panics. /// The `std::fmt::Debug` and `std::fmt::Display` impls for this type panics.
#[derive(Debug, Eq, PartialEq)] #[derive(Debug, Eq, PartialEq)]
pub struct NoError(()); pub struct NoError(());
impl ::std::error::Error for NoError { impl std::error::Error for NoError {
fn description(&self) -> &str { fn description(&self) -> &str {
"no error" "no error"
} }
} }
impl fmt::Display for NoError { impl std::fmt::Display for NoError {
fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, _: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
panic!("BUG for NoError: an impossible error occurred") panic!("BUG for NoError: an impossible error occurred")
} }
} }
impl From<NoError> for io::Error { impl From<NoError> for std::io::Error {
fn from(_: NoError) -> io::Error { fn from(_: NoError) -> std::io::Error {
panic!("BUG for NoError: an impossible error occurred") panic!("BUG for NoError: an impossible error occurred")
} }
} }
@ -547,7 +538,7 @@ pub trait Matcher {
/// use the `NoError` type in this crate. In the future, when the "never" /// use the `NoError` type in this crate. In the future, when the "never"
/// (spelled `!`) type is stabilized, then it should probably be used /// (spelled `!`) type is stabilized, then it should probably be used
/// instead. /// instead.
type Error: fmt::Display; type Error: std::fmt::Display;
/// Returns the start and end byte range of the first match in `haystack` /// Returns the start and end byte range of the first match in `haystack`
/// after `at`, where the byte offsets are relative to that start of /// after `at`, where the byte offsets are relative to that start of

View File

@ -1,5 +1,7 @@
use grep_matcher::{Captures, Match, Matcher}; use {
use regex::bytes::Regex; grep_matcher::{Captures, Match, Matcher},
regex::bytes::Regex,
};
use crate::util::{RegexMatcher, RegexMatcherNoCaps}; use crate::util::{RegexMatcher, RegexMatcherNoCaps};

View File

@ -1,28 +1,29 @@
use std::collections::HashMap; use std::collections::HashMap;
use std::result;
use grep_matcher::{Captures, Match, Matcher, NoCaptures, NoError}; use {
use regex::bytes::{CaptureLocations, Regex}; grep_matcher::{Captures, Match, Matcher, NoCaptures, NoError},
regex::bytes::{CaptureLocations, Regex},
};
#[derive(Debug)] #[derive(Debug)]
pub struct RegexMatcher { pub(crate) struct RegexMatcher {
pub re: Regex, pub re: Regex,
pub names: HashMap<String, usize>, pub names: HashMap<String, usize>,
} }
impl RegexMatcher { impl RegexMatcher {
pub fn new(re: Regex) -> RegexMatcher { pub(crate) fn new(re: Regex) -> RegexMatcher {
let mut names = HashMap::new(); let mut names = HashMap::new();
for (i, optional_name) in re.capture_names().enumerate() { for (i, optional_name) in re.capture_names().enumerate() {
if let Some(name) = optional_name { if let Some(name) = optional_name {
names.insert(name.to_string(), i); names.insert(name.to_string(), i);
} }
} }
RegexMatcher { re: re, names: names } RegexMatcher { re, names }
} }
} }
type Result<T> = result::Result<T, NoError>; type Result<T> = std::result::Result<T, NoError>;
impl Matcher for RegexMatcher { impl Matcher for RegexMatcher {
type Captures = RegexCaptures; type Captures = RegexCaptures;
@ -63,7 +64,7 @@ impl Matcher for RegexMatcher {
} }
#[derive(Debug)] #[derive(Debug)]
pub struct RegexMatcherNoCaps(pub Regex); pub(crate) struct RegexMatcherNoCaps(pub(crate) Regex);
impl Matcher for RegexMatcherNoCaps { impl Matcher for RegexMatcherNoCaps {
type Captures = NoCaptures; type Captures = NoCaptures;
@ -82,7 +83,7 @@ impl Matcher for RegexMatcherNoCaps {
} }
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct RegexCaptures(CaptureLocations); pub(crate) struct RegexCaptures(CaptureLocations);
impl Captures for RegexCaptures { impl Captures for RegexCaptures {
fn len(&self) -> usize { fn len(&self) -> usize {