mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2024-12-12 19:18:24 +02:00
d9ca529356
libripgrep is not any one library, but rather, a collection of libraries that roughly separate the following key distinct phases in a grep implementation: 1. Pattern matching (e.g., by a regex engine). 2. Searching a file using a pattern matcher. 3. Printing results. Ultimately, both (1) and (3) are defined by de-coupled interfaces, of which there may be multiple implementations. Namely, (1) is satisfied by the `Matcher` trait in the `grep-matcher` crate and (3) is satisfied by the `Sink` trait in the `grep2` crate. The searcher (2) ties everything together and finds results using a matcher and reports those results using a `Sink` implementation. Closes #162
209 lines
5.9 KiB
Rust
209 lines
5.9 KiB
Rust
use grep_matcher::{Captures, Match, Matcher};
|
|
use regex::bytes::Regex;
|
|
|
|
use util::{RegexMatcher, RegexMatcherNoCaps};
|
|
|
|
fn matcher(pattern: &str) -> RegexMatcher {
|
|
RegexMatcher::new(Regex::new(pattern).unwrap())
|
|
}
|
|
|
|
fn matcher_no_caps(pattern: &str) -> RegexMatcherNoCaps {
|
|
RegexMatcherNoCaps(Regex::new(pattern).unwrap())
|
|
}
|
|
|
|
fn m(start: usize, end: usize) -> Match {
|
|
Match::new(start, end)
|
|
}
|
|
|
|
#[test]
|
|
fn find() {
|
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
|
assert_eq!(matcher.find(b" homer simpson ").unwrap(), Some(m(1, 14)));
|
|
}
|
|
|
|
#[test]
|
|
fn find_iter() {
|
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
|
let mut matches = vec![];
|
|
matcher.find_iter(b"aa bb cc dd", |m| {
|
|
matches.push(m);
|
|
true
|
|
}).unwrap();
|
|
assert_eq!(matches, vec![m(0, 5), m(6, 11)]);
|
|
|
|
// Test that find_iter respects short circuiting.
|
|
matches.clear();
|
|
matcher.find_iter(b"aa bb cc dd", |m| {
|
|
matches.push(m);
|
|
false
|
|
}).unwrap();
|
|
assert_eq!(matches, vec![m(0, 5)]);
|
|
}
|
|
|
|
#[test]
|
|
fn try_find_iter() {
|
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
|
struct MyError;
|
|
|
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
|
let mut matches = vec![];
|
|
let err = matcher.try_find_iter(b"aa bb cc dd", |m| {
|
|
if matches.is_empty() {
|
|
matches.push(m);
|
|
Ok(true)
|
|
} else {
|
|
Err(MyError)
|
|
}
|
|
}).unwrap().unwrap_err();
|
|
assert_eq!(matches, vec![m(0, 5)]);
|
|
assert_eq!(err, MyError);
|
|
}
|
|
|
|
#[test]
|
|
fn shortest_match() {
|
|
let matcher = matcher(r"a+");
|
|
// This tests that the default impl isn't doing anything smart, and simply
|
|
// defers to `find`.
|
|
assert_eq!(matcher.shortest_match(b"aaa").unwrap(), Some(3));
|
|
// The actual underlying regex is smarter.
|
|
assert_eq!(matcher.re.shortest_match(b"aaa"), Some(1));
|
|
}
|
|
|
|
#[test]
|
|
fn captures() {
|
|
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
|
assert_eq!(matcher.capture_count(), 3);
|
|
assert_eq!(matcher.capture_index("a"), Some(1));
|
|
assert_eq!(matcher.capture_index("b"), Some(2));
|
|
assert_eq!(matcher.capture_index("nada"), None);
|
|
|
|
let mut caps = matcher.new_captures().unwrap();
|
|
assert!(matcher.captures(b" homer simpson ", &mut caps).unwrap());
|
|
assert_eq!(caps.get(0), Some(m(1, 14)));
|
|
assert_eq!(caps.get(1), Some(m(1, 6)));
|
|
assert_eq!(caps.get(2), Some(m(7, 14)));
|
|
}
|
|
|
|
#[test]
|
|
fn captures_iter() {
|
|
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
|
let mut caps = matcher.new_captures().unwrap();
|
|
let mut matches = vec![];
|
|
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
|
matches.push(caps.get(0).unwrap());
|
|
matches.push(caps.get(1).unwrap());
|
|
matches.push(caps.get(2).unwrap());
|
|
true
|
|
}).unwrap();
|
|
assert_eq!(matches, vec![
|
|
m(0, 5), m(0, 2), m(3, 5),
|
|
m(6, 11), m(6, 8), m(9, 11),
|
|
]);
|
|
|
|
// Test that captures_iter respects short circuiting.
|
|
matches.clear();
|
|
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
|
matches.push(caps.get(0).unwrap());
|
|
matches.push(caps.get(1).unwrap());
|
|
matches.push(caps.get(2).unwrap());
|
|
false
|
|
}).unwrap();
|
|
assert_eq!(matches, vec![
|
|
m(0, 5), m(0, 2), m(3, 5),
|
|
]);
|
|
}
|
|
|
|
#[test]
|
|
fn try_captures_iter() {
|
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
|
struct MyError;
|
|
|
|
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
|
let mut caps = matcher.new_captures().unwrap();
|
|
let mut matches = vec![];
|
|
let err = matcher.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
|
if matches.is_empty() {
|
|
matches.push(caps.get(0).unwrap());
|
|
matches.push(caps.get(1).unwrap());
|
|
matches.push(caps.get(2).unwrap());
|
|
Ok(true)
|
|
} else {
|
|
Err(MyError)
|
|
}
|
|
}).unwrap().unwrap_err();
|
|
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5)]);
|
|
assert_eq!(err, MyError);
|
|
}
|
|
|
|
// Test that our default impls for capturing are correct. Namely, when
|
|
// capturing isn't supported by the underlying matcher, then all of the
|
|
// various capturing related APIs fail fast.
|
|
#[test]
|
|
fn no_captures() {
|
|
let matcher = matcher_no_caps(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
|
assert_eq!(matcher.capture_count(), 0);
|
|
assert_eq!(matcher.capture_index("a"), None);
|
|
assert_eq!(matcher.capture_index("b"), None);
|
|
assert_eq!(matcher.capture_index("nada"), None);
|
|
|
|
let mut caps = matcher.new_captures().unwrap();
|
|
assert!(!matcher.captures(b"homer simpson", &mut caps).unwrap());
|
|
|
|
let mut called = false;
|
|
matcher.captures_iter(b"homer simpson", &mut caps, |_| {
|
|
called = true;
|
|
true
|
|
}).unwrap();
|
|
assert!(!called);
|
|
}
|
|
|
|
#[test]
|
|
fn replace() {
|
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
|
let mut dst = vec![];
|
|
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
|
dst.push(b'z');
|
|
true
|
|
}).unwrap();
|
|
assert_eq!(dst, b"z z");
|
|
|
|
// Test that replacements respect short circuiting.
|
|
dst.clear();
|
|
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
|
dst.push(b'z');
|
|
false
|
|
}).unwrap();
|
|
assert_eq!(dst, b"z cc dd");
|
|
}
|
|
|
|
#[test]
|
|
fn replace_with_captures() {
|
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
|
let haystack = b"aa bb cc dd";
|
|
let mut caps = matcher.new_captures().unwrap();
|
|
let mut dst = vec![];
|
|
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
|
caps.interpolate(
|
|
|name| matcher.capture_index(name),
|
|
haystack,
|
|
b"$2 $1",
|
|
dst,
|
|
);
|
|
true
|
|
}).unwrap();
|
|
assert_eq!(dst, b"bb aa dd cc");
|
|
|
|
// Test that replacements respect short circuiting.
|
|
dst.clear();
|
|
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
|
caps.interpolate(
|
|
|name| matcher.capture_index(name),
|
|
haystack,
|
|
b"$2 $1",
|
|
dst,
|
|
);
|
|
false
|
|
}).unwrap();
|
|
assert_eq!(dst, b"bb aa cc dd");
|
|
}
|