mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2024-12-12 19:18:24 +02:00
d9ca529356
libripgrep is not any one library, but rather, a collection of libraries that roughly separate the following key distinct phases in a grep implementation: 1. Pattern matching (e.g., by a regex engine). 2. Searching a file using a pattern matcher. 3. Printing results. Ultimately, both (1) and (3) are defined by de-coupled interfaces, of which there may be multiple implementations. Namely, (1) is satisfied by the `Matcher` trait in the `grep-matcher` crate and (3) is satisfied by the `Sink` trait in the `grep2` crate. The searcher (2) ties everything together and finds results using a matcher and reports those results using a `Sink` implementation. Closes #162
105 lines
2.5 KiB
Rust
105 lines
2.5 KiB
Rust
use std::collections::HashMap;
|
|
use std::result;
|
|
|
|
use grep_matcher::{Captures, Match, Matcher, NoCaptures, NoError};
|
|
use regex::bytes::{CaptureLocations, Regex};
|
|
|
|
#[derive(Debug)]
|
|
pub struct RegexMatcher {
|
|
pub re: Regex,
|
|
pub names: HashMap<String, usize>,
|
|
}
|
|
|
|
impl RegexMatcher {
|
|
pub fn new(re: Regex) -> RegexMatcher {
|
|
let mut names = HashMap::new();
|
|
for (i, optional_name) in re.capture_names().enumerate() {
|
|
if let Some(name) = optional_name {
|
|
names.insert(name.to_string(), i);
|
|
}
|
|
}
|
|
RegexMatcher {
|
|
re: re,
|
|
names: names,
|
|
}
|
|
}
|
|
}
|
|
|
|
type Result<T> = result::Result<T, NoError>;
|
|
|
|
impl Matcher for RegexMatcher {
|
|
type Captures = RegexCaptures;
|
|
type Error = NoError;
|
|
|
|
fn find_at(
|
|
&self,
|
|
haystack: &[u8],
|
|
at: usize,
|
|
) -> Result<Option<Match>> {
|
|
Ok(self.re
|
|
.find_at(haystack, at)
|
|
.map(|m| Match::new(m.start(), m.end())))
|
|
}
|
|
|
|
fn new_captures(&self) -> Result<RegexCaptures> {
|
|
Ok(RegexCaptures(self.re.capture_locations()))
|
|
}
|
|
|
|
fn captures_at(
|
|
&self,
|
|
haystack: &[u8],
|
|
at: usize,
|
|
caps: &mut RegexCaptures,
|
|
) -> Result<bool> {
|
|
Ok(self.re.captures_read_at(&mut caps.0, haystack, at).is_some())
|
|
}
|
|
|
|
fn capture_count(&self) -> usize {
|
|
self.re.captures_len()
|
|
}
|
|
|
|
fn capture_index(&self, name: &str) -> Option<usize> {
|
|
self.names.get(name).map(|i| *i)
|
|
}
|
|
|
|
// We purposely don't implement any other methods, so that we test the
|
|
// default impls. The "real" Regex impl for Matcher provides a few more
|
|
// impls. e.g., Its `find_iter` impl is faster than what we can do here,
|
|
// since the regex crate avoids synchronization overhead.
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub struct RegexMatcherNoCaps(pub Regex);
|
|
|
|
impl Matcher for RegexMatcherNoCaps {
|
|
type Captures = NoCaptures;
|
|
type Error = NoError;
|
|
|
|
fn find_at(
|
|
&self,
|
|
haystack: &[u8],
|
|
at: usize,
|
|
) -> Result<Option<Match>> {
|
|
Ok(self.0
|
|
.find_at(haystack, at)
|
|
.map(|m| Match::new(m.start(), m.end())))
|
|
}
|
|
|
|
fn new_captures(&self) -> Result<NoCaptures> {
|
|
Ok(NoCaptures::new())
|
|
}
|
|
}
|
|
|
|
#[derive(Clone, Debug)]
|
|
pub struct RegexCaptures(CaptureLocations);
|
|
|
|
impl Captures for RegexCaptures {
|
|
fn len(&self) -> usize {
|
|
self.0.len()
|
|
}
|
|
|
|
fn get(&self, i: usize) -> Option<Match> {
|
|
self.0.pos(i).map(|(s, e)| Match::new(s, e))
|
|
}
|
|
}
|