mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2024-12-12 19:18:24 +02:00
209 lines
5.9 KiB
Rust
209 lines
5.9 KiB
Rust
|
use grep_matcher::{Captures, Match, Matcher};
|
||
|
use regex::bytes::Regex;
|
||
|
|
||
|
use util::{RegexMatcher, RegexMatcherNoCaps};
|
||
|
|
||
|
fn matcher(pattern: &str) -> RegexMatcher {
|
||
|
RegexMatcher::new(Regex::new(pattern).unwrap())
|
||
|
}
|
||
|
|
||
|
fn matcher_no_caps(pattern: &str) -> RegexMatcherNoCaps {
|
||
|
RegexMatcherNoCaps(Regex::new(pattern).unwrap())
|
||
|
}
|
||
|
|
||
|
fn m(start: usize, end: usize) -> Match {
|
||
|
Match::new(start, end)
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn find() {
|
||
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||
|
assert_eq!(matcher.find(b" homer simpson ").unwrap(), Some(m(1, 14)));
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn find_iter() {
|
||
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||
|
let mut matches = vec![];
|
||
|
matcher.find_iter(b"aa bb cc dd", |m| {
|
||
|
matches.push(m);
|
||
|
true
|
||
|
}).unwrap();
|
||
|
assert_eq!(matches, vec![m(0, 5), m(6, 11)]);
|
||
|
|
||
|
// Test that find_iter respects short circuiting.
|
||
|
matches.clear();
|
||
|
matcher.find_iter(b"aa bb cc dd", |m| {
|
||
|
matches.push(m);
|
||
|
false
|
||
|
}).unwrap();
|
||
|
assert_eq!(matches, vec![m(0, 5)]);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn try_find_iter() {
|
||
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||
|
struct MyError;
|
||
|
|
||
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||
|
let mut matches = vec![];
|
||
|
let err = matcher.try_find_iter(b"aa bb cc dd", |m| {
|
||
|
if matches.is_empty() {
|
||
|
matches.push(m);
|
||
|
Ok(true)
|
||
|
} else {
|
||
|
Err(MyError)
|
||
|
}
|
||
|
}).unwrap().unwrap_err();
|
||
|
assert_eq!(matches, vec![m(0, 5)]);
|
||
|
assert_eq!(err, MyError);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn shortest_match() {
|
||
|
let matcher = matcher(r"a+");
|
||
|
// This tests that the default impl isn't doing anything smart, and simply
|
||
|
// defers to `find`.
|
||
|
assert_eq!(matcher.shortest_match(b"aaa").unwrap(), Some(3));
|
||
|
// The actual underlying regex is smarter.
|
||
|
assert_eq!(matcher.re.shortest_match(b"aaa"), Some(1));
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn captures() {
|
||
|
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||
|
assert_eq!(matcher.capture_count(), 3);
|
||
|
assert_eq!(matcher.capture_index("a"), Some(1));
|
||
|
assert_eq!(matcher.capture_index("b"), Some(2));
|
||
|
assert_eq!(matcher.capture_index("nada"), None);
|
||
|
|
||
|
let mut caps = matcher.new_captures().unwrap();
|
||
|
assert!(matcher.captures(b" homer simpson ", &mut caps).unwrap());
|
||
|
assert_eq!(caps.get(0), Some(m(1, 14)));
|
||
|
assert_eq!(caps.get(1), Some(m(1, 6)));
|
||
|
assert_eq!(caps.get(2), Some(m(7, 14)));
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn captures_iter() {
|
||
|
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||
|
let mut caps = matcher.new_captures().unwrap();
|
||
|
let mut matches = vec![];
|
||
|
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
||
|
matches.push(caps.get(0).unwrap());
|
||
|
matches.push(caps.get(1).unwrap());
|
||
|
matches.push(caps.get(2).unwrap());
|
||
|
true
|
||
|
}).unwrap();
|
||
|
assert_eq!(matches, vec![
|
||
|
m(0, 5), m(0, 2), m(3, 5),
|
||
|
m(6, 11), m(6, 8), m(9, 11),
|
||
|
]);
|
||
|
|
||
|
// Test that captures_iter respects short circuiting.
|
||
|
matches.clear();
|
||
|
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
||
|
matches.push(caps.get(0).unwrap());
|
||
|
matches.push(caps.get(1).unwrap());
|
||
|
matches.push(caps.get(2).unwrap());
|
||
|
false
|
||
|
}).unwrap();
|
||
|
assert_eq!(matches, vec![
|
||
|
m(0, 5), m(0, 2), m(3, 5),
|
||
|
]);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn try_captures_iter() {
|
||
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
||
|
struct MyError;
|
||
|
|
||
|
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||
|
let mut caps = matcher.new_captures().unwrap();
|
||
|
let mut matches = vec![];
|
||
|
let err = matcher.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
||
|
if matches.is_empty() {
|
||
|
matches.push(caps.get(0).unwrap());
|
||
|
matches.push(caps.get(1).unwrap());
|
||
|
matches.push(caps.get(2).unwrap());
|
||
|
Ok(true)
|
||
|
} else {
|
||
|
Err(MyError)
|
||
|
}
|
||
|
}).unwrap().unwrap_err();
|
||
|
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5)]);
|
||
|
assert_eq!(err, MyError);
|
||
|
}
|
||
|
|
||
|
// Test that our default impls for capturing are correct. Namely, when
|
||
|
// capturing isn't supported by the underlying matcher, then all of the
|
||
|
// various capturing related APIs fail fast.
|
||
|
#[test]
|
||
|
fn no_captures() {
|
||
|
let matcher = matcher_no_caps(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
||
|
assert_eq!(matcher.capture_count(), 0);
|
||
|
assert_eq!(matcher.capture_index("a"), None);
|
||
|
assert_eq!(matcher.capture_index("b"), None);
|
||
|
assert_eq!(matcher.capture_index("nada"), None);
|
||
|
|
||
|
let mut caps = matcher.new_captures().unwrap();
|
||
|
assert!(!matcher.captures(b"homer simpson", &mut caps).unwrap());
|
||
|
|
||
|
let mut called = false;
|
||
|
matcher.captures_iter(b"homer simpson", &mut caps, |_| {
|
||
|
called = true;
|
||
|
true
|
||
|
}).unwrap();
|
||
|
assert!(!called);
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn replace() {
|
||
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||
|
let mut dst = vec![];
|
||
|
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
||
|
dst.push(b'z');
|
||
|
true
|
||
|
}).unwrap();
|
||
|
assert_eq!(dst, b"z z");
|
||
|
|
||
|
// Test that replacements respect short circuiting.
|
||
|
dst.clear();
|
||
|
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
||
|
dst.push(b'z');
|
||
|
false
|
||
|
}).unwrap();
|
||
|
assert_eq!(dst, b"z cc dd");
|
||
|
}
|
||
|
|
||
|
#[test]
|
||
|
fn replace_with_captures() {
|
||
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
||
|
let haystack = b"aa bb cc dd";
|
||
|
let mut caps = matcher.new_captures().unwrap();
|
||
|
let mut dst = vec![];
|
||
|
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
||
|
caps.interpolate(
|
||
|
|name| matcher.capture_index(name),
|
||
|
haystack,
|
||
|
b"$2 $1",
|
||
|
dst,
|
||
|
);
|
||
|
true
|
||
|
}).unwrap();
|
||
|
assert_eq!(dst, b"bb aa dd cc");
|
||
|
|
||
|
// Test that replacements respect short circuiting.
|
||
|
dst.clear();
|
||
|
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
||
|
caps.interpolate(
|
||
|
|name| matcher.capture_index(name),
|
||
|
haystack,
|
||
|
b"$2 $1",
|
||
|
dst,
|
||
|
);
|
||
|
false
|
||
|
}).unwrap();
|
||
|
assert_eq!(dst, b"bb aa cc dd");
|
||
|
}
|