1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-01-03 05:10:12 +02:00

matcher: polish the grep-matcher crate

Not much here. Just updating to reflect my current style and bringing
the crate to the 2021 edition.
This commit is contained in:
Andrew Gallant 2023-09-25 16:48:25 -04:00
parent abfa65c2c1
commit 96f01b92a0
5 changed files with 41 additions and 49 deletions

View File

@ -12,13 +12,13 @@ readme = "README.md"
keywords = ["regex", "pattern", "trait"]
license = "Unlicense OR MIT"
autotests = false
edition = "2018"
edition = "2021"
[dependencies]
memchr = "2.1"
memchr = "2.6.3"
[dev-dependencies]
regex = "1.1"
regex = "1.9.5"
[[test]]
name = "integration"

View File

@ -1,5 +1,3 @@
use std::str;
use memchr::memchr;
/// Interpolate capture references in `replacement` and write the interpolation
@ -114,7 +112,7 @@ fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
// therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
// check with an unchecked conversion or by parsing the number straight
// from &[u8].
let cap = str::from_utf8(&replacement[i..cap_end])
let cap = std::str::from_utf8(&replacement[i..cap_end])
.expect("valid UTF-8 capture name");
if brace {
if !replacement.get(cap_end).map_or(false, |&b| b == b'}') {

View File

@ -38,11 +38,6 @@ implementations.
#![deny(missing_docs)]
use std::fmt;
use std::io;
use std::ops;
use std::u64;
use crate::interpolate::interpolate;
mod interpolate;
@ -162,7 +157,7 @@ impl Match {
}
}
impl ops::Index<Match> for [u8] {
impl std::ops::Index<Match> for [u8] {
type Output = [u8];
#[inline]
@ -171,14 +166,14 @@ impl ops::Index<Match> for [u8] {
}
}
impl ops::IndexMut<Match> for [u8] {
impl std::ops::IndexMut<Match> for [u8] {
#[inline]
fn index_mut(&mut self, index: Match) -> &mut [u8] {
&mut self[index.start..index.end]
}
}
impl ops::Index<Match> for str {
impl std::ops::Index<Match> for str {
type Output = str;
#[inline]
@ -204,11 +199,7 @@ pub struct LineTerminator(LineTerminatorImp);
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
enum LineTerminatorImp {
/// Any single byte representing a line terminator.
///
/// We represent this as an array so we can safely convert it to a slice
/// for convenient access. At some point, we can use `std::slice::from_ref`
/// instead.
Byte([u8; 1]),
Byte(u8),
/// A line terminator represented by `\r\n`.
///
/// When this option is used, consumers may generally treat a lone `\n` as
@ -220,7 +211,7 @@ impl LineTerminator {
/// Return a new single-byte line terminator. Any byte is valid.
#[inline]
pub fn byte(byte: u8) -> LineTerminator {
LineTerminator(LineTerminatorImp::Byte([byte]))
LineTerminator(LineTerminatorImp::Byte(byte))
}
/// Return a new line terminator represented by `\r\n`.
@ -246,7 +237,7 @@ impl LineTerminator {
#[inline]
pub fn as_byte(&self) -> u8 {
match self.0 {
LineTerminatorImp::Byte(array) => array[0],
LineTerminatorImp::Byte(byte) => byte,
LineTerminatorImp::CRLF => b'\n',
}
}
@ -260,7 +251,7 @@ impl LineTerminator {
#[inline]
pub fn as_bytes(&self) -> &[u8] {
match self.0 {
LineTerminatorImp::Byte(ref array) => array,
LineTerminatorImp::Byte(ref byte) => std::slice::from_ref(byte),
LineTerminatorImp::CRLF => &[b'\r', b'\n'],
}
}
@ -301,10 +292,10 @@ pub struct ByteSet(BitSet);
#[derive(Clone, Copy)]
struct BitSet([u64; 4]);
impl fmt::Debug for BitSet {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
impl std::fmt::Debug for BitSet {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let mut fmtd = f.debug_set();
for b in (0..256).map(|b| b as u8) {
for b in 0..=255 {
if ByteSet(*self).contains(b) {
fmtd.entry(&b);
}
@ -331,12 +322,12 @@ impl ByteSet {
pub fn add(&mut self, byte: u8) {
let bucket = byte / 64;
let bit = byte % 64;
(self.0).0[bucket as usize] |= 1 << bit;
(self.0).0[usize::from(bucket)] |= 1 << bit;
}
/// Add an inclusive range of bytes.
pub fn add_all(&mut self, start: u8, end: u8) {
for b in (start as u64..end as u64 + 1).map(|b| b as u8) {
for b in start..=end {
self.add(b);
}
}
@ -347,12 +338,12 @@ impl ByteSet {
pub fn remove(&mut self, byte: u8) {
let bucket = byte / 64;
let bit = byte % 64;
(self.0).0[bucket as usize] &= !(1 << bit);
(self.0).0[usize::from(bucket)] &= !(1 << bit);
}
/// Remove an inclusive range of bytes.
pub fn remove_all(&mut self, start: u8, end: u8) {
for b in (start as u64..end as u64 + 1).map(|b| b as u8) {
for b in start..=end {
self.remove(b);
}
}
@ -361,7 +352,7 @@ impl ByteSet {
pub fn contains(&self, byte: u8) -> bool {
let bucket = byte / 64;
let bit = byte % 64;
(self.0).0[bucket as usize] & (1 << bit) > 0
(self.0).0[usize::from(bucket)] & (1 << bit) > 0
}
}
@ -478,27 +469,27 @@ impl Captures for NoCaptures {
/// NoError provides an error type for matchers that never produce errors.
///
/// This error type implements the `std::error::Error` and `fmt::Display`
/// This error type implements the `std::error::Error` and `std::fmt::Display`
/// traits for use in matcher implementations that can never produce errors.
///
/// The `fmt::Debug` and `fmt::Display` impls for this type panics.
/// The `std::fmt::Debug` and `std::fmt::Display` impls for this type panics.
#[derive(Debug, Eq, PartialEq)]
pub struct NoError(());
impl ::std::error::Error for NoError {
impl std::error::Error for NoError {
fn description(&self) -> &str {
"no error"
}
}
impl fmt::Display for NoError {
fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result {
impl std::fmt::Display for NoError {
fn fmt(&self, _: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
panic!("BUG for NoError: an impossible error occurred")
}
}
impl From<NoError> for io::Error {
fn from(_: NoError) -> io::Error {
impl From<NoError> for std::io::Error {
fn from(_: NoError) -> std::io::Error {
panic!("BUG for NoError: an impossible error occurred")
}
}
@ -547,7 +538,7 @@ pub trait Matcher {
/// use the `NoError` type in this crate. In the future, when the "never"
/// (spelled `!`) type is stabilized, then it should probably be used
/// instead.
type Error: fmt::Display;
type Error: std::fmt::Display;
/// Returns the start and end byte range of the first match in `haystack`
/// after `at`, where the byte offsets are relative to that start of

View File

@ -1,5 +1,7 @@
use grep_matcher::{Captures, Match, Matcher};
use regex::bytes::Regex;
use {
grep_matcher::{Captures, Match, Matcher},
regex::bytes::Regex,
};
use crate::util::{RegexMatcher, RegexMatcherNoCaps};

View File

@ -1,28 +1,29 @@
use std::collections::HashMap;
use std::result;
use grep_matcher::{Captures, Match, Matcher, NoCaptures, NoError};
use regex::bytes::{CaptureLocations, Regex};
use {
grep_matcher::{Captures, Match, Matcher, NoCaptures, NoError},
regex::bytes::{CaptureLocations, Regex},
};
#[derive(Debug)]
pub struct RegexMatcher {
pub(crate) struct RegexMatcher {
pub re: Regex,
pub names: HashMap<String, usize>,
}
impl RegexMatcher {
pub fn new(re: Regex) -> RegexMatcher {
pub(crate) fn new(re: Regex) -> RegexMatcher {
let mut names = HashMap::new();
for (i, optional_name) in re.capture_names().enumerate() {
if let Some(name) = optional_name {
names.insert(name.to_string(), i);
}
}
RegexMatcher { re: re, names: names }
RegexMatcher { re, names }
}
}
type Result<T> = result::Result<T, NoError>;
type Result<T> = std::result::Result<T, NoError>;
impl Matcher for RegexMatcher {
type Captures = RegexCaptures;
@ -63,7 +64,7 @@ impl Matcher for RegexMatcher {
}
#[derive(Debug)]
pub struct RegexMatcherNoCaps(pub Regex);
pub(crate) struct RegexMatcherNoCaps(pub(crate) Regex);
impl Matcher for RegexMatcherNoCaps {
type Captures = NoCaptures;
@ -82,7 +83,7 @@ impl Matcher for RegexMatcherNoCaps {
}
#[derive(Clone, Debug)]
pub struct RegexCaptures(CaptureLocations);
pub(crate) struct RegexCaptures(CaptureLocations);
impl Captures for RegexCaptures {
fn len(&self) -> usize {