mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-11-29 05:57:07 +02:00
libripgrep: initial commit introducing libripgrep
libripgrep is not any one library, but rather, a collection of libraries that roughly separate the following key distinct phases in a grep implementation: 1. Pattern matching (e.g., by a regex engine). 2. Searching a file using a pattern matcher. 3. Printing results. Ultimately, both (1) and (3) are defined by de-coupled interfaces, of which there may be multiple implementations. Namely, (1) is satisfied by the `Matcher` trait in the `grep-matcher` crate and (3) is satisfied by the `Sink` trait in the `grep2` crate. The searcher (2) ties everything together and finds results using a matcher and reports those results using a `Sink` implementation. Closes #162
This commit is contained in:
208
grep-matcher/tests/test_matcher.rs
Normal file
208
grep-matcher/tests/test_matcher.rs
Normal file
@@ -0,0 +1,208 @@
|
||||
use grep_matcher::{Captures, Match, Matcher};
|
||||
use regex::bytes::Regex;
|
||||
|
||||
use util::{RegexMatcher, RegexMatcherNoCaps};
|
||||
|
||||
fn matcher(pattern: &str) -> RegexMatcher {
|
||||
RegexMatcher::new(Regex::new(pattern).unwrap())
|
||||
}
|
||||
|
||||
fn matcher_no_caps(pattern: &str) -> RegexMatcherNoCaps {
|
||||
RegexMatcherNoCaps(Regex::new(pattern).unwrap())
|
||||
}
|
||||
|
||||
fn m(start: usize, end: usize) -> Match {
|
||||
Match::new(start, end)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn find() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
assert_eq!(matcher.find(b" homer simpson ").unwrap(), Some(m(1, 14)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn find_iter() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let mut matches = vec![];
|
||||
matcher.find_iter(b"aa bb cc dd", |m| {
|
||||
matches.push(m);
|
||||
true
|
||||
}).unwrap();
|
||||
assert_eq!(matches, vec![m(0, 5), m(6, 11)]);
|
||||
|
||||
// Test that find_iter respects short circuiting.
|
||||
matches.clear();
|
||||
matcher.find_iter(b"aa bb cc dd", |m| {
|
||||
matches.push(m);
|
||||
false
|
||||
}).unwrap();
|
||||
assert_eq!(matches, vec![m(0, 5)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn try_find_iter() {
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
struct MyError;
|
||||
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let mut matches = vec![];
|
||||
let err = matcher.try_find_iter(b"aa bb cc dd", |m| {
|
||||
if matches.is_empty() {
|
||||
matches.push(m);
|
||||
Ok(true)
|
||||
} else {
|
||||
Err(MyError)
|
||||
}
|
||||
}).unwrap().unwrap_err();
|
||||
assert_eq!(matches, vec![m(0, 5)]);
|
||||
assert_eq!(err, MyError);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn shortest_match() {
|
||||
let matcher = matcher(r"a+");
|
||||
// This tests that the default impl isn't doing anything smart, and simply
|
||||
// defers to `find`.
|
||||
assert_eq!(matcher.shortest_match(b"aaa").unwrap(), Some(3));
|
||||
// The actual underlying regex is smarter.
|
||||
assert_eq!(matcher.re.shortest_match(b"aaa"), Some(1));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn captures() {
|
||||
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
assert_eq!(matcher.capture_count(), 3);
|
||||
assert_eq!(matcher.capture_index("a"), Some(1));
|
||||
assert_eq!(matcher.capture_index("b"), Some(2));
|
||||
assert_eq!(matcher.capture_index("nada"), None);
|
||||
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
assert!(matcher.captures(b" homer simpson ", &mut caps).unwrap());
|
||||
assert_eq!(caps.get(0), Some(m(1, 14)));
|
||||
assert_eq!(caps.get(1), Some(m(1, 6)));
|
||||
assert_eq!(caps.get(2), Some(m(7, 14)));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn captures_iter() {
|
||||
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
let mut matches = vec![];
|
||||
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
||||
matches.push(caps.get(0).unwrap());
|
||||
matches.push(caps.get(1).unwrap());
|
||||
matches.push(caps.get(2).unwrap());
|
||||
true
|
||||
}).unwrap();
|
||||
assert_eq!(matches, vec![
|
||||
m(0, 5), m(0, 2), m(3, 5),
|
||||
m(6, 11), m(6, 8), m(9, 11),
|
||||
]);
|
||||
|
||||
// Test that captures_iter respects short circuiting.
|
||||
matches.clear();
|
||||
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
||||
matches.push(caps.get(0).unwrap());
|
||||
matches.push(caps.get(1).unwrap());
|
||||
matches.push(caps.get(2).unwrap());
|
||||
false
|
||||
}).unwrap();
|
||||
assert_eq!(matches, vec![
|
||||
m(0, 5), m(0, 2), m(3, 5),
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn try_captures_iter() {
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
struct MyError;
|
||||
|
||||
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
let mut matches = vec![];
|
||||
let err = matcher.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
||||
if matches.is_empty() {
|
||||
matches.push(caps.get(0).unwrap());
|
||||
matches.push(caps.get(1).unwrap());
|
||||
matches.push(caps.get(2).unwrap());
|
||||
Ok(true)
|
||||
} else {
|
||||
Err(MyError)
|
||||
}
|
||||
}).unwrap().unwrap_err();
|
||||
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5)]);
|
||||
assert_eq!(err, MyError);
|
||||
}
|
||||
|
||||
// Test that our default impls for capturing are correct. Namely, when
|
||||
// capturing isn't supported by the underlying matcher, then all of the
|
||||
// various capturing related APIs fail fast.
|
||||
#[test]
|
||||
fn no_captures() {
|
||||
let matcher = matcher_no_caps(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||||
assert_eq!(matcher.capture_count(), 0);
|
||||
assert_eq!(matcher.capture_index("a"), None);
|
||||
assert_eq!(matcher.capture_index("b"), None);
|
||||
assert_eq!(matcher.capture_index("nada"), None);
|
||||
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
assert!(!matcher.captures(b"homer simpson", &mut caps).unwrap());
|
||||
|
||||
let mut called = false;
|
||||
matcher.captures_iter(b"homer simpson", &mut caps, |_| {
|
||||
called = true;
|
||||
true
|
||||
}).unwrap();
|
||||
assert!(!called);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let mut dst = vec![];
|
||||
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
||||
dst.push(b'z');
|
||||
true
|
||||
}).unwrap();
|
||||
assert_eq!(dst, b"z z");
|
||||
|
||||
// Test that replacements respect short circuiting.
|
||||
dst.clear();
|
||||
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
||||
dst.push(b'z');
|
||||
false
|
||||
}).unwrap();
|
||||
assert_eq!(dst, b"z cc dd");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn replace_with_captures() {
|
||||
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||||
let haystack = b"aa bb cc dd";
|
||||
let mut caps = matcher.new_captures().unwrap();
|
||||
let mut dst = vec![];
|
||||
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
||||
caps.interpolate(
|
||||
|name| matcher.capture_index(name),
|
||||
haystack,
|
||||
b"$2 $1",
|
||||
dst,
|
||||
);
|
||||
true
|
||||
}).unwrap();
|
||||
assert_eq!(dst, b"bb aa dd cc");
|
||||
|
||||
// Test that replacements respect short circuiting.
|
||||
dst.clear();
|
||||
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
||||
caps.interpolate(
|
||||
|name| matcher.capture_index(name),
|
||||
haystack,
|
||||
b"$2 $1",
|
||||
dst,
|
||||
);
|
||||
false
|
||||
}).unwrap();
|
||||
assert_eq!(dst, b"bb aa cc dd");
|
||||
}
|
||||
6
grep-matcher/tests/tests.rs
Normal file
6
grep-matcher/tests/tests.rs
Normal file
@@ -0,0 +1,6 @@
|
||||
extern crate grep_matcher;
|
||||
extern crate regex;
|
||||
|
||||
mod util;
|
||||
|
||||
mod test_matcher;
|
||||
104
grep-matcher/tests/util.rs
Normal file
104
grep-matcher/tests/util.rs
Normal file
@@ -0,0 +1,104 @@
|
||||
use std::collections::HashMap;
|
||||
use std::result;
|
||||
|
||||
use grep_matcher::{Captures, Match, Matcher, NoCaptures, NoError};
|
||||
use regex::bytes::{CaptureLocations, Regex};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RegexMatcher {
|
||||
pub re: Regex,
|
||||
pub names: HashMap<String, usize>,
|
||||
}
|
||||
|
||||
impl RegexMatcher {
|
||||
pub fn new(re: Regex) -> RegexMatcher {
|
||||
let mut names = HashMap::new();
|
||||
for (i, optional_name) in re.capture_names().enumerate() {
|
||||
if let Some(name) = optional_name {
|
||||
names.insert(name.to_string(), i);
|
||||
}
|
||||
}
|
||||
RegexMatcher {
|
||||
re: re,
|
||||
names: names,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type Result<T> = result::Result<T, NoError>;
|
||||
|
||||
impl Matcher for RegexMatcher {
|
||||
type Captures = RegexCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>> {
|
||||
Ok(self.re
|
||||
.find_at(haystack, at)
|
||||
.map(|m| Match::new(m.start(), m.end())))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<RegexCaptures> {
|
||||
Ok(RegexCaptures(self.re.capture_locations()))
|
||||
}
|
||||
|
||||
fn captures_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
caps: &mut RegexCaptures,
|
||||
) -> Result<bool> {
|
||||
Ok(self.re.captures_read_at(&mut caps.0, haystack, at).is_some())
|
||||
}
|
||||
|
||||
fn capture_count(&self) -> usize {
|
||||
self.re.captures_len()
|
||||
}
|
||||
|
||||
fn capture_index(&self, name: &str) -> Option<usize> {
|
||||
self.names.get(name).map(|i| *i)
|
||||
}
|
||||
|
||||
// We purposely don't implement any other methods, so that we test the
|
||||
// default impls. The "real" Regex impl for Matcher provides a few more
|
||||
// impls. e.g., Its `find_iter` impl is faster than what we can do here,
|
||||
// since the regex crate avoids synchronization overhead.
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RegexMatcherNoCaps(pub Regex);
|
||||
|
||||
impl Matcher for RegexMatcherNoCaps {
|
||||
type Captures = NoCaptures;
|
||||
type Error = NoError;
|
||||
|
||||
fn find_at(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
at: usize,
|
||||
) -> Result<Option<Match>> {
|
||||
Ok(self.0
|
||||
.find_at(haystack, at)
|
||||
.map(|m| Match::new(m.start(), m.end())))
|
||||
}
|
||||
|
||||
fn new_captures(&self) -> Result<NoCaptures> {
|
||||
Ok(NoCaptures::new())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexCaptures(CaptureLocations);
|
||||
|
||||
impl Captures for RegexCaptures {
|
||||
fn len(&self) -> usize {
|
||||
self.0.len()
|
||||
}
|
||||
|
||||
fn get(&self, i: usize) -> Option<Match> {
|
||||
self.0.pos(i).map(|(s, e)| Match::new(s, e))
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user