mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-03-03 14:32:22 +02:00
The top-level listing was just getting a bit too long for my taste. So put all of the code in one directory and shrink the large top-level mess to a small top-level mess. NOTE: This commit only contains renames. The subsequent commit will actually make ripgrep build again. We do it this way with the naive hope that this will make it easier for git history to track the renames. Sigh.
231 lines
6.3 KiB
Rust
231 lines
6.3 KiB
Rust
use grep_matcher::{Captures, Match, Matcher};
|
|
use regex::bytes::Regex;
|
|
|
|
use util::{RegexMatcher, RegexMatcherNoCaps};
|
|
|
|
fn matcher(pattern: &str) -> RegexMatcher {
|
|
RegexMatcher::new(Regex::new(pattern).unwrap())
|
|
}
|
|
|
|
fn matcher_no_caps(pattern: &str) -> RegexMatcherNoCaps {
|
|
RegexMatcherNoCaps(Regex::new(pattern).unwrap())
|
|
}
|
|
|
|
fn m(start: usize, end: usize) -> Match {
|
|
Match::new(start, end)
|
|
}
|
|
|
|
#[test]
|
|
fn find() {
|
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
|
assert_eq!(matcher.find(b" homer simpson ").unwrap(), Some(m(1, 14)));
|
|
}
|
|
|
|
#[test]
|
|
fn find_iter() {
|
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
|
let mut matches = vec![];
|
|
matcher
|
|
.find_iter(b"aa bb cc dd", |m| {
|
|
matches.push(m);
|
|
true
|
|
})
|
|
.unwrap();
|
|
assert_eq!(matches, vec![m(0, 5), m(6, 11)]);
|
|
|
|
// Test that find_iter respects short circuiting.
|
|
matches.clear();
|
|
matcher
|
|
.find_iter(b"aa bb cc dd", |m| {
|
|
matches.push(m);
|
|
false
|
|
})
|
|
.unwrap();
|
|
assert_eq!(matches, vec![m(0, 5)]);
|
|
}
|
|
|
|
#[test]
|
|
fn try_find_iter() {
|
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
|
struct MyError;
|
|
|
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
|
let mut matches = vec![];
|
|
let err = matcher
|
|
.try_find_iter(b"aa bb cc dd", |m| {
|
|
if matches.is_empty() {
|
|
matches.push(m);
|
|
Ok(true)
|
|
} else {
|
|
Err(MyError)
|
|
}
|
|
})
|
|
.unwrap()
|
|
.unwrap_err();
|
|
assert_eq!(matches, vec![m(0, 5)]);
|
|
assert_eq!(err, MyError);
|
|
}
|
|
|
|
#[test]
|
|
fn shortest_match() {
|
|
let matcher = matcher(r"a+");
|
|
// This tests that the default impl isn't doing anything smart, and simply
|
|
// defers to `find`.
|
|
assert_eq!(matcher.shortest_match(b"aaa").unwrap(), Some(3));
|
|
// The actual underlying regex is smarter.
|
|
assert_eq!(matcher.re.shortest_match(b"aaa"), Some(1));
|
|
}
|
|
|
|
#[test]
|
|
fn captures() {
|
|
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
|
assert_eq!(matcher.capture_count(), 3);
|
|
assert_eq!(matcher.capture_index("a"), Some(1));
|
|
assert_eq!(matcher.capture_index("b"), Some(2));
|
|
assert_eq!(matcher.capture_index("nada"), None);
|
|
|
|
let mut caps = matcher.new_captures().unwrap();
|
|
assert!(matcher.captures(b" homer simpson ", &mut caps).unwrap());
|
|
assert_eq!(caps.get(0), Some(m(1, 14)));
|
|
assert_eq!(caps.get(1), Some(m(1, 6)));
|
|
assert_eq!(caps.get(2), Some(m(7, 14)));
|
|
}
|
|
|
|
#[test]
|
|
fn captures_iter() {
|
|
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
|
let mut caps = matcher.new_captures().unwrap();
|
|
let mut matches = vec![];
|
|
matcher
|
|
.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
|
matches.push(caps.get(0).unwrap());
|
|
matches.push(caps.get(1).unwrap());
|
|
matches.push(caps.get(2).unwrap());
|
|
true
|
|
})
|
|
.unwrap();
|
|
assert_eq!(
|
|
matches,
|
|
vec![m(0, 5), m(0, 2), m(3, 5), m(6, 11), m(6, 8), m(9, 11),]
|
|
);
|
|
|
|
// Test that captures_iter respects short circuiting.
|
|
matches.clear();
|
|
matcher
|
|
.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
|
matches.push(caps.get(0).unwrap());
|
|
matches.push(caps.get(1).unwrap());
|
|
matches.push(caps.get(2).unwrap());
|
|
false
|
|
})
|
|
.unwrap();
|
|
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5),]);
|
|
}
|
|
|
|
#[test]
|
|
fn try_captures_iter() {
|
|
#[derive(Clone, Debug, Eq, PartialEq)]
|
|
struct MyError;
|
|
|
|
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
|
let mut caps = matcher.new_captures().unwrap();
|
|
let mut matches = vec![];
|
|
let err = matcher
|
|
.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| {
|
|
if matches.is_empty() {
|
|
matches.push(caps.get(0).unwrap());
|
|
matches.push(caps.get(1).unwrap());
|
|
matches.push(caps.get(2).unwrap());
|
|
Ok(true)
|
|
} else {
|
|
Err(MyError)
|
|
}
|
|
})
|
|
.unwrap()
|
|
.unwrap_err();
|
|
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5)]);
|
|
assert_eq!(err, MyError);
|
|
}
|
|
|
|
// Test that our default impls for capturing are correct. Namely, when
|
|
// capturing isn't supported by the underlying matcher, then all of the
|
|
// various capturing related APIs fail fast.
|
|
#[test]
|
|
fn no_captures() {
|
|
let matcher = matcher_no_caps(r"(?P<a>\w+)\s+(?P<b>\w+)");
|
|
assert_eq!(matcher.capture_count(), 0);
|
|
assert_eq!(matcher.capture_index("a"), None);
|
|
assert_eq!(matcher.capture_index("b"), None);
|
|
assert_eq!(matcher.capture_index("nada"), None);
|
|
|
|
let mut caps = matcher.new_captures().unwrap();
|
|
assert!(!matcher.captures(b"homer simpson", &mut caps).unwrap());
|
|
|
|
let mut called = false;
|
|
matcher
|
|
.captures_iter(b"homer simpson", &mut caps, |_| {
|
|
called = true;
|
|
true
|
|
})
|
|
.unwrap();
|
|
assert!(!called);
|
|
}
|
|
|
|
#[test]
|
|
fn replace() {
|
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
|
let mut dst = vec![];
|
|
matcher
|
|
.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
|
dst.push(b'z');
|
|
true
|
|
})
|
|
.unwrap();
|
|
assert_eq!(dst, b"z z");
|
|
|
|
// Test that replacements respect short circuiting.
|
|
dst.clear();
|
|
matcher
|
|
.replace(b"aa bb cc dd", &mut dst, |_, dst| {
|
|
dst.push(b'z');
|
|
false
|
|
})
|
|
.unwrap();
|
|
assert_eq!(dst, b"z cc dd");
|
|
}
|
|
|
|
#[test]
|
|
fn replace_with_captures() {
|
|
let matcher = matcher(r"(\w+)\s+(\w+)");
|
|
let haystack = b"aa bb cc dd";
|
|
let mut caps = matcher.new_captures().unwrap();
|
|
let mut dst = vec![];
|
|
matcher
|
|
.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
|
caps.interpolate(
|
|
|name| matcher.capture_index(name),
|
|
haystack,
|
|
b"$2 $1",
|
|
dst,
|
|
);
|
|
true
|
|
})
|
|
.unwrap();
|
|
assert_eq!(dst, b"bb aa dd cc");
|
|
|
|
// Test that replacements respect short circuiting.
|
|
dst.clear();
|
|
matcher
|
|
.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
|
|
caps.interpolate(
|
|
|name| matcher.capture_index(name),
|
|
haystack,
|
|
b"$2 $1",
|
|
dst,
|
|
);
|
|
false
|
|
})
|
|
.unwrap();
|
|
assert_eq!(dst, b"bb aa cc dd");
|
|
}
|