mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-04-19 09:02:15 +02:00
matcher: polish the grep-matcher crate
Not much here. Just updating to reflect my current style and bringing the crate to the 2021 edition.
This commit is contained in:
parent
abfa65c2c1
commit
96f01b92a0
@ -12,13 +12,13 @@ readme = "README.md"
|
|||||||
keywords = ["regex", "pattern", "trait"]
|
keywords = ["regex", "pattern", "trait"]
|
||||||
license = "Unlicense OR MIT"
|
license = "Unlicense OR MIT"
|
||||||
autotests = false
|
autotests = false
|
||||||
edition = "2018"
|
edition = "2021"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
memchr = "2.1"
|
memchr = "2.6.3"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
regex = "1.1"
|
regex = "1.9.5"
|
||||||
|
|
||||||
[[test]]
|
[[test]]
|
||||||
name = "integration"
|
name = "integration"
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
use std::str;
|
|
||||||
|
|
||||||
use memchr::memchr;
|
use memchr::memchr;
|
||||||
|
|
||||||
/// Interpolate capture references in `replacement` and write the interpolation
|
/// Interpolate capture references in `replacement` and write the interpolation
|
||||||
@ -114,7 +112,7 @@ fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> {
|
|||||||
// therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
|
// therefore be valid UTF-8. If we really cared, we could avoid this UTF-8
|
||||||
// check with an unchecked conversion or by parsing the number straight
|
// check with an unchecked conversion or by parsing the number straight
|
||||||
// from &[u8].
|
// from &[u8].
|
||||||
let cap = str::from_utf8(&replacement[i..cap_end])
|
let cap = std::str::from_utf8(&replacement[i..cap_end])
|
||||||
.expect("valid UTF-8 capture name");
|
.expect("valid UTF-8 capture name");
|
||||||
if brace {
|
if brace {
|
||||||
if !replacement.get(cap_end).map_or(false, |&b| b == b'}') {
|
if !replacement.get(cap_end).map_or(false, |&b| b == b'}') {
|
||||||
|
@ -38,11 +38,6 @@ implementations.
|
|||||||
|
|
||||||
#![deny(missing_docs)]
|
#![deny(missing_docs)]
|
||||||
|
|
||||||
use std::fmt;
|
|
||||||
use std::io;
|
|
||||||
use std::ops;
|
|
||||||
use std::u64;
|
|
||||||
|
|
||||||
use crate::interpolate::interpolate;
|
use crate::interpolate::interpolate;
|
||||||
|
|
||||||
mod interpolate;
|
mod interpolate;
|
||||||
@ -162,7 +157,7 @@ impl Match {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ops::Index<Match> for [u8] {
|
impl std::ops::Index<Match> for [u8] {
|
||||||
type Output = [u8];
|
type Output = [u8];
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
@ -171,14 +166,14 @@ impl ops::Index<Match> for [u8] {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ops::IndexMut<Match> for [u8] {
|
impl std::ops::IndexMut<Match> for [u8] {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn index_mut(&mut self, index: Match) -> &mut [u8] {
|
fn index_mut(&mut self, index: Match) -> &mut [u8] {
|
||||||
&mut self[index.start..index.end]
|
&mut self[index.start..index.end]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl ops::Index<Match> for str {
|
impl std::ops::Index<Match> for str {
|
||||||
type Output = str;
|
type Output = str;
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
@ -204,11 +199,7 @@ pub struct LineTerminator(LineTerminatorImp);
|
|||||||
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
|
#[derive(Clone, Copy, Debug, Eq, Hash, PartialEq)]
|
||||||
enum LineTerminatorImp {
|
enum LineTerminatorImp {
|
||||||
/// Any single byte representing a line terminator.
|
/// Any single byte representing a line terminator.
|
||||||
///
|
Byte(u8),
|
||||||
/// We represent this as an array so we can safely convert it to a slice
|
|
||||||
/// for convenient access. At some point, we can use `std::slice::from_ref`
|
|
||||||
/// instead.
|
|
||||||
Byte([u8; 1]),
|
|
||||||
/// A line terminator represented by `\r\n`.
|
/// A line terminator represented by `\r\n`.
|
||||||
///
|
///
|
||||||
/// When this option is used, consumers may generally treat a lone `\n` as
|
/// When this option is used, consumers may generally treat a lone `\n` as
|
||||||
@ -220,7 +211,7 @@ impl LineTerminator {
|
|||||||
/// Return a new single-byte line terminator. Any byte is valid.
|
/// Return a new single-byte line terminator. Any byte is valid.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn byte(byte: u8) -> LineTerminator {
|
pub fn byte(byte: u8) -> LineTerminator {
|
||||||
LineTerminator(LineTerminatorImp::Byte([byte]))
|
LineTerminator(LineTerminatorImp::Byte(byte))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return a new line terminator represented by `\r\n`.
|
/// Return a new line terminator represented by `\r\n`.
|
||||||
@ -246,7 +237,7 @@ impl LineTerminator {
|
|||||||
#[inline]
|
#[inline]
|
||||||
pub fn as_byte(&self) -> u8 {
|
pub fn as_byte(&self) -> u8 {
|
||||||
match self.0 {
|
match self.0 {
|
||||||
LineTerminatorImp::Byte(array) => array[0],
|
LineTerminatorImp::Byte(byte) => byte,
|
||||||
LineTerminatorImp::CRLF => b'\n',
|
LineTerminatorImp::CRLF => b'\n',
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -260,7 +251,7 @@ impl LineTerminator {
|
|||||||
#[inline]
|
#[inline]
|
||||||
pub fn as_bytes(&self) -> &[u8] {
|
pub fn as_bytes(&self) -> &[u8] {
|
||||||
match self.0 {
|
match self.0 {
|
||||||
LineTerminatorImp::Byte(ref array) => array,
|
LineTerminatorImp::Byte(ref byte) => std::slice::from_ref(byte),
|
||||||
LineTerminatorImp::CRLF => &[b'\r', b'\n'],
|
LineTerminatorImp::CRLF => &[b'\r', b'\n'],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -301,10 +292,10 @@ pub struct ByteSet(BitSet);
|
|||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
struct BitSet([u64; 4]);
|
struct BitSet([u64; 4]);
|
||||||
|
|
||||||
impl fmt::Debug for BitSet {
|
impl std::fmt::Debug for BitSet {
|
||||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
let mut fmtd = f.debug_set();
|
let mut fmtd = f.debug_set();
|
||||||
for b in (0..256).map(|b| b as u8) {
|
for b in 0..=255 {
|
||||||
if ByteSet(*self).contains(b) {
|
if ByteSet(*self).contains(b) {
|
||||||
fmtd.entry(&b);
|
fmtd.entry(&b);
|
||||||
}
|
}
|
||||||
@ -331,12 +322,12 @@ impl ByteSet {
|
|||||||
pub fn add(&mut self, byte: u8) {
|
pub fn add(&mut self, byte: u8) {
|
||||||
let bucket = byte / 64;
|
let bucket = byte / 64;
|
||||||
let bit = byte % 64;
|
let bit = byte % 64;
|
||||||
(self.0).0[bucket as usize] |= 1 << bit;
|
(self.0).0[usize::from(bucket)] |= 1 << bit;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Add an inclusive range of bytes.
|
/// Add an inclusive range of bytes.
|
||||||
pub fn add_all(&mut self, start: u8, end: u8) {
|
pub fn add_all(&mut self, start: u8, end: u8) {
|
||||||
for b in (start as u64..end as u64 + 1).map(|b| b as u8) {
|
for b in start..=end {
|
||||||
self.add(b);
|
self.add(b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -347,12 +338,12 @@ impl ByteSet {
|
|||||||
pub fn remove(&mut self, byte: u8) {
|
pub fn remove(&mut self, byte: u8) {
|
||||||
let bucket = byte / 64;
|
let bucket = byte / 64;
|
||||||
let bit = byte % 64;
|
let bit = byte % 64;
|
||||||
(self.0).0[bucket as usize] &= !(1 << bit);
|
(self.0).0[usize::from(bucket)] &= !(1 << bit);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Remove an inclusive range of bytes.
|
/// Remove an inclusive range of bytes.
|
||||||
pub fn remove_all(&mut self, start: u8, end: u8) {
|
pub fn remove_all(&mut self, start: u8, end: u8) {
|
||||||
for b in (start as u64..end as u64 + 1).map(|b| b as u8) {
|
for b in start..=end {
|
||||||
self.remove(b);
|
self.remove(b);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -361,7 +352,7 @@ impl ByteSet {
|
|||||||
pub fn contains(&self, byte: u8) -> bool {
|
pub fn contains(&self, byte: u8) -> bool {
|
||||||
let bucket = byte / 64;
|
let bucket = byte / 64;
|
||||||
let bit = byte % 64;
|
let bit = byte % 64;
|
||||||
(self.0).0[bucket as usize] & (1 << bit) > 0
|
(self.0).0[usize::from(bucket)] & (1 << bit) > 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -478,27 +469,27 @@ impl Captures for NoCaptures {
|
|||||||
|
|
||||||
/// NoError provides an error type for matchers that never produce errors.
|
/// NoError provides an error type for matchers that never produce errors.
|
||||||
///
|
///
|
||||||
/// This error type implements the `std::error::Error` and `fmt::Display`
|
/// This error type implements the `std::error::Error` and `std::fmt::Display`
|
||||||
/// traits for use in matcher implementations that can never produce errors.
|
/// traits for use in matcher implementations that can never produce errors.
|
||||||
///
|
///
|
||||||
/// The `fmt::Debug` and `fmt::Display` impls for this type panics.
|
/// The `std::fmt::Debug` and `std::fmt::Display` impls for this type panics.
|
||||||
#[derive(Debug, Eq, PartialEq)]
|
#[derive(Debug, Eq, PartialEq)]
|
||||||
pub struct NoError(());
|
pub struct NoError(());
|
||||||
|
|
||||||
impl ::std::error::Error for NoError {
|
impl std::error::Error for NoError {
|
||||||
fn description(&self) -> &str {
|
fn description(&self) -> &str {
|
||||||
"no error"
|
"no error"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl fmt::Display for NoError {
|
impl std::fmt::Display for NoError {
|
||||||
fn fmt(&self, _: &mut fmt::Formatter<'_>) -> fmt::Result {
|
fn fmt(&self, _: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
panic!("BUG for NoError: an impossible error occurred")
|
panic!("BUG for NoError: an impossible error occurred")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<NoError> for io::Error {
|
impl From<NoError> for std::io::Error {
|
||||||
fn from(_: NoError) -> io::Error {
|
fn from(_: NoError) -> std::io::Error {
|
||||||
panic!("BUG for NoError: an impossible error occurred")
|
panic!("BUG for NoError: an impossible error occurred")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -547,7 +538,7 @@ pub trait Matcher {
|
|||||||
/// use the `NoError` type in this crate. In the future, when the "never"
|
/// use the `NoError` type in this crate. In the future, when the "never"
|
||||||
/// (spelled `!`) type is stabilized, then it should probably be used
|
/// (spelled `!`) type is stabilized, then it should probably be used
|
||||||
/// instead.
|
/// instead.
|
||||||
type Error: fmt::Display;
|
type Error: std::fmt::Display;
|
||||||
|
|
||||||
/// Returns the start and end byte range of the first match in `haystack`
|
/// Returns the start and end byte range of the first match in `haystack`
|
||||||
/// after `at`, where the byte offsets are relative to that start of
|
/// after `at`, where the byte offsets are relative to that start of
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
use grep_matcher::{Captures, Match, Matcher};
|
use {
|
||||||
use regex::bytes::Regex;
|
grep_matcher::{Captures, Match, Matcher},
|
||||||
|
regex::bytes::Regex,
|
||||||
|
};
|
||||||
|
|
||||||
use crate::util::{RegexMatcher, RegexMatcherNoCaps};
|
use crate::util::{RegexMatcher, RegexMatcherNoCaps};
|
||||||
|
|
||||||
|
@ -1,28 +1,29 @@
|
|||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::result;
|
|
||||||
|
|
||||||
use grep_matcher::{Captures, Match, Matcher, NoCaptures, NoError};
|
use {
|
||||||
use regex::bytes::{CaptureLocations, Regex};
|
grep_matcher::{Captures, Match, Matcher, NoCaptures, NoError},
|
||||||
|
regex::bytes::{CaptureLocations, Regex},
|
||||||
|
};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct RegexMatcher {
|
pub(crate) struct RegexMatcher {
|
||||||
pub re: Regex,
|
pub re: Regex,
|
||||||
pub names: HashMap<String, usize>,
|
pub names: HashMap<String, usize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl RegexMatcher {
|
impl RegexMatcher {
|
||||||
pub fn new(re: Regex) -> RegexMatcher {
|
pub(crate) fn new(re: Regex) -> RegexMatcher {
|
||||||
let mut names = HashMap::new();
|
let mut names = HashMap::new();
|
||||||
for (i, optional_name) in re.capture_names().enumerate() {
|
for (i, optional_name) in re.capture_names().enumerate() {
|
||||||
if let Some(name) = optional_name {
|
if let Some(name) = optional_name {
|
||||||
names.insert(name.to_string(), i);
|
names.insert(name.to_string(), i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
RegexMatcher { re: re, names: names }
|
RegexMatcher { re, names }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type Result<T> = result::Result<T, NoError>;
|
type Result<T> = std::result::Result<T, NoError>;
|
||||||
|
|
||||||
impl Matcher for RegexMatcher {
|
impl Matcher for RegexMatcher {
|
||||||
type Captures = RegexCaptures;
|
type Captures = RegexCaptures;
|
||||||
@ -63,7 +64,7 @@ impl Matcher for RegexMatcher {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct RegexMatcherNoCaps(pub Regex);
|
pub(crate) struct RegexMatcherNoCaps(pub(crate) Regex);
|
||||||
|
|
||||||
impl Matcher for RegexMatcherNoCaps {
|
impl Matcher for RegexMatcherNoCaps {
|
||||||
type Captures = NoCaptures;
|
type Captures = NoCaptures;
|
||||||
@ -82,7 +83,7 @@ impl Matcher for RegexMatcherNoCaps {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct RegexCaptures(CaptureLocations);
|
pub(crate) struct RegexCaptures(CaptureLocations);
|
||||||
|
|
||||||
impl Captures for RegexCaptures {
|
impl Captures for RegexCaptures {
|
||||||
fn len(&self) -> usize {
|
fn len(&self) -> usize {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user