1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2024-12-12 19:18:24 +02:00
ripgrep/grep-matcher/tests/test_matcher.rs
Andrew Gallant d9ca529356 libripgrep: initial commit introducing libripgrep
libripgrep is not any one library, but rather, a collection of libraries
that roughly separate the following key distinct phases in a grep
implementation:

  1. Pattern matching (e.g., by a regex engine).
  2. Searching a file using a pattern matcher.
  3. Printing results.

Ultimately, both (1) and (3) are defined by de-coupled interfaces, of
which there may be multiple implementations. Namely, (1) is satisfied by
the `Matcher` trait in the `grep-matcher` crate and (3) is satisfied by
the `Sink` trait in the `grep2` crate. The searcher (2) ties everything
together and finds results using a matcher and reports those results
using a `Sink` implementation.

Closes #162
2018-08-20 07:10:19 -04:00

209 lines
5.9 KiB
Rust

use grep_matcher::{Captures, Match, Matcher};
use regex::bytes::Regex;
use util::{RegexMatcher, RegexMatcherNoCaps};
fn matcher(pattern: &str) -> RegexMatcher {
RegexMatcher::new(Regex::new(pattern).unwrap())
}
fn matcher_no_caps(pattern: &str) -> RegexMatcherNoCaps {
RegexMatcherNoCaps(Regex::new(pattern).unwrap())
}
fn m(start: usize, end: usize) -> Match {
Match::new(start, end)
}
#[test]
fn find() {
let matcher = matcher(r"(\w+)\s+(\w+)");
assert_eq!(matcher.find(b" homer simpson ").unwrap(), Some(m(1, 14)));
}
#[test]
fn find_iter() {
let matcher = matcher(r"(\w+)\s+(\w+)");
let mut matches = vec![];
matcher.find_iter(b"aa bb cc dd", |m| {
matches.push(m);
true
}).unwrap();
assert_eq!(matches, vec![m(0, 5), m(6, 11)]);
// Test that find_iter respects short circuiting.
matches.clear();
matcher.find_iter(b"aa bb cc dd", |m| {
matches.push(m);
false
}).unwrap();
assert_eq!(matches, vec![m(0, 5)]);
}
#[test]
fn try_find_iter() {
#[derive(Clone, Debug, Eq, PartialEq)]
struct MyError;
let matcher = matcher(r"(\w+)\s+(\w+)");
let mut matches = vec![];
let err = matcher.try_find_iter(b"aa bb cc dd", |m| {
if matches.is_empty() {
matches.push(m);
Ok(true)
} else {
Err(MyError)
}
}).unwrap().unwrap_err();
assert_eq!(matches, vec![m(0, 5)]);
assert_eq!(err, MyError);
}
#[test]
fn shortest_match() {
let matcher = matcher(r"a+");
// This tests that the default impl isn't doing anything smart, and simply
// defers to `find`.
assert_eq!(matcher.shortest_match(b"aaa").unwrap(), Some(3));
// The actual underlying regex is smarter.
assert_eq!(matcher.re.shortest_match(b"aaa"), Some(1));
}
#[test]
fn captures() {
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
assert_eq!(matcher.capture_count(), 3);
assert_eq!(matcher.capture_index("a"), Some(1));
assert_eq!(matcher.capture_index("b"), Some(2));
assert_eq!(matcher.capture_index("nada"), None);
let mut caps = matcher.new_captures().unwrap();
assert!(matcher.captures(b" homer simpson ", &mut caps).unwrap());
assert_eq!(caps.get(0), Some(m(1, 14)));
assert_eq!(caps.get(1), Some(m(1, 6)));
assert_eq!(caps.get(2), Some(m(7, 14)));
}
#[test]
fn captures_iter() {
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
let mut caps = matcher.new_captures().unwrap();
let mut matches = vec![];
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
true
}).unwrap();
assert_eq!(matches, vec![
m(0, 5), m(0, 2), m(3, 5),
m(6, 11), m(6, 8), m(9, 11),
]);
// Test that captures_iter respects short circuiting.
matches.clear();
matcher.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
false
}).unwrap();
assert_eq!(matches, vec![
m(0, 5), m(0, 2), m(3, 5),
]);
}
#[test]
fn try_captures_iter() {
#[derive(Clone, Debug, Eq, PartialEq)]
struct MyError;
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
let mut caps = matcher.new_captures().unwrap();
let mut matches = vec![];
let err = matcher.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| {
if matches.is_empty() {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
Ok(true)
} else {
Err(MyError)
}
}).unwrap().unwrap_err();
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5)]);
assert_eq!(err, MyError);
}
// Test that our default impls for capturing are correct. Namely, when
// capturing isn't supported by the underlying matcher, then all of the
// various capturing related APIs fail fast.
#[test]
fn no_captures() {
let matcher = matcher_no_caps(r"(?P<a>\w+)\s+(?P<b>\w+)");
assert_eq!(matcher.capture_count(), 0);
assert_eq!(matcher.capture_index("a"), None);
assert_eq!(matcher.capture_index("b"), None);
assert_eq!(matcher.capture_index("nada"), None);
let mut caps = matcher.new_captures().unwrap();
assert!(!matcher.captures(b"homer simpson", &mut caps).unwrap());
let mut called = false;
matcher.captures_iter(b"homer simpson", &mut caps, |_| {
called = true;
true
}).unwrap();
assert!(!called);
}
#[test]
fn replace() {
let matcher = matcher(r"(\w+)\s+(\w+)");
let mut dst = vec![];
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
dst.push(b'z');
true
}).unwrap();
assert_eq!(dst, b"z z");
// Test that replacements respect short circuiting.
dst.clear();
matcher.replace(b"aa bb cc dd", &mut dst, |_, dst| {
dst.push(b'z');
false
}).unwrap();
assert_eq!(dst, b"z cc dd");
}
#[test]
fn replace_with_captures() {
let matcher = matcher(r"(\w+)\s+(\w+)");
let haystack = b"aa bb cc dd";
let mut caps = matcher.new_captures().unwrap();
let mut dst = vec![];
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
caps.interpolate(
|name| matcher.capture_index(name),
haystack,
b"$2 $1",
dst,
);
true
}).unwrap();
assert_eq!(dst, b"bb aa dd cc");
// Test that replacements respect short circuiting.
dst.clear();
matcher.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
caps.interpolate(
|name| matcher.capture_index(name),
haystack,
b"$2 $1",
dst,
);
false
}).unwrap();
assert_eq!(dst, b"bb aa cc dd");
}