1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-11-23 21:54:45 +02:00

repo: move all source code in crates directory

The top-level listing was just getting a bit too long for my taste. So
put all of the code in one directory and shrink the large top-level mess
to a small top-level mess.

NOTE: This commit only contains renames. The subsequent commit will
actually make ripgrep build again. We do it this way with the naive hope
that this will make it easier for git history to track the renames.
Sigh.
This commit is contained in:
Andrew Gallant
2020-02-17 18:19:19 -05:00
parent 0bc4f0447b
commit fdd8510fdd
113 changed files with 0 additions and 0 deletions

View File

@@ -0,0 +1,230 @@
use grep_matcher::{Captures, Match, Matcher};
use regex::bytes::Regex;
use util::{RegexMatcher, RegexMatcherNoCaps};
fn matcher(pattern: &str) -> RegexMatcher {
RegexMatcher::new(Regex::new(pattern).unwrap())
}
fn matcher_no_caps(pattern: &str) -> RegexMatcherNoCaps {
RegexMatcherNoCaps(Regex::new(pattern).unwrap())
}
fn m(start: usize, end: usize) -> Match {
Match::new(start, end)
}
#[test]
fn find() {
let matcher = matcher(r"(\w+)\s+(\w+)");
assert_eq!(matcher.find(b" homer simpson ").unwrap(), Some(m(1, 14)));
}
#[test]
fn find_iter() {
let matcher = matcher(r"(\w+)\s+(\w+)");
let mut matches = vec![];
matcher
.find_iter(b"aa bb cc dd", |m| {
matches.push(m);
true
})
.unwrap();
assert_eq!(matches, vec![m(0, 5), m(6, 11)]);
// Test that find_iter respects short circuiting.
matches.clear();
matcher
.find_iter(b"aa bb cc dd", |m| {
matches.push(m);
false
})
.unwrap();
assert_eq!(matches, vec![m(0, 5)]);
}
#[test]
fn try_find_iter() {
#[derive(Clone, Debug, Eq, PartialEq)]
struct MyError;
let matcher = matcher(r"(\w+)\s+(\w+)");
let mut matches = vec![];
let err = matcher
.try_find_iter(b"aa bb cc dd", |m| {
if matches.is_empty() {
matches.push(m);
Ok(true)
} else {
Err(MyError)
}
})
.unwrap()
.unwrap_err();
assert_eq!(matches, vec![m(0, 5)]);
assert_eq!(err, MyError);
}
#[test]
fn shortest_match() {
let matcher = matcher(r"a+");
// This tests that the default impl isn't doing anything smart, and simply
// defers to `find`.
assert_eq!(matcher.shortest_match(b"aaa").unwrap(), Some(3));
// The actual underlying regex is smarter.
assert_eq!(matcher.re.shortest_match(b"aaa"), Some(1));
}
#[test]
fn captures() {
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
assert_eq!(matcher.capture_count(), 3);
assert_eq!(matcher.capture_index("a"), Some(1));
assert_eq!(matcher.capture_index("b"), Some(2));
assert_eq!(matcher.capture_index("nada"), None);
let mut caps = matcher.new_captures().unwrap();
assert!(matcher.captures(b" homer simpson ", &mut caps).unwrap());
assert_eq!(caps.get(0), Some(m(1, 14)));
assert_eq!(caps.get(1), Some(m(1, 6)));
assert_eq!(caps.get(2), Some(m(7, 14)));
}
#[test]
fn captures_iter() {
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
let mut caps = matcher.new_captures().unwrap();
let mut matches = vec![];
matcher
.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
true
})
.unwrap();
assert_eq!(
matches,
vec![m(0, 5), m(0, 2), m(3, 5), m(6, 11), m(6, 8), m(9, 11),]
);
// Test that captures_iter respects short circuiting.
matches.clear();
matcher
.captures_iter(b"aa bb cc dd", &mut caps, |caps| {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
false
})
.unwrap();
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5),]);
}
#[test]
fn try_captures_iter() {
#[derive(Clone, Debug, Eq, PartialEq)]
struct MyError;
let matcher = matcher(r"(?P<a>\w+)\s+(?P<b>\w+)");
let mut caps = matcher.new_captures().unwrap();
let mut matches = vec![];
let err = matcher
.try_captures_iter(b"aa bb cc dd", &mut caps, |caps| {
if matches.is_empty() {
matches.push(caps.get(0).unwrap());
matches.push(caps.get(1).unwrap());
matches.push(caps.get(2).unwrap());
Ok(true)
} else {
Err(MyError)
}
})
.unwrap()
.unwrap_err();
assert_eq!(matches, vec![m(0, 5), m(0, 2), m(3, 5)]);
assert_eq!(err, MyError);
}
// Test that our default impls for capturing are correct. Namely, when
// capturing isn't supported by the underlying matcher, then all of the
// various capturing related APIs fail fast.
#[test]
fn no_captures() {
let matcher = matcher_no_caps(r"(?P<a>\w+)\s+(?P<b>\w+)");
assert_eq!(matcher.capture_count(), 0);
assert_eq!(matcher.capture_index("a"), None);
assert_eq!(matcher.capture_index("b"), None);
assert_eq!(matcher.capture_index("nada"), None);
let mut caps = matcher.new_captures().unwrap();
assert!(!matcher.captures(b"homer simpson", &mut caps).unwrap());
let mut called = false;
matcher
.captures_iter(b"homer simpson", &mut caps, |_| {
called = true;
true
})
.unwrap();
assert!(!called);
}
#[test]
fn replace() {
let matcher = matcher(r"(\w+)\s+(\w+)");
let mut dst = vec![];
matcher
.replace(b"aa bb cc dd", &mut dst, |_, dst| {
dst.push(b'z');
true
})
.unwrap();
assert_eq!(dst, b"z z");
// Test that replacements respect short circuiting.
dst.clear();
matcher
.replace(b"aa bb cc dd", &mut dst, |_, dst| {
dst.push(b'z');
false
})
.unwrap();
assert_eq!(dst, b"z cc dd");
}
#[test]
fn replace_with_captures() {
let matcher = matcher(r"(\w+)\s+(\w+)");
let haystack = b"aa bb cc dd";
let mut caps = matcher.new_captures().unwrap();
let mut dst = vec![];
matcher
.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
caps.interpolate(
|name| matcher.capture_index(name),
haystack,
b"$2 $1",
dst,
);
true
})
.unwrap();
assert_eq!(dst, b"bb aa dd cc");
// Test that replacements respect short circuiting.
dst.clear();
matcher
.replace_with_captures(haystack, &mut caps, &mut dst, |caps, dst| {
caps.interpolate(
|name| matcher.capture_index(name),
haystack,
b"$2 $1",
dst,
);
false
})
.unwrap();
assert_eq!(dst, b"bb aa cc dd");
}

View File

@@ -0,0 +1,6 @@
extern crate grep_matcher;
extern crate regex;
mod util;
mod test_matcher;

View File

@@ -0,0 +1,95 @@
use std::collections::HashMap;
use std::result;
use grep_matcher::{Captures, Match, Matcher, NoCaptures, NoError};
use regex::bytes::{CaptureLocations, Regex};
#[derive(Debug)]
pub struct RegexMatcher {
pub re: Regex,
pub names: HashMap<String, usize>,
}
impl RegexMatcher {
pub fn new(re: Regex) -> RegexMatcher {
let mut names = HashMap::new();
for (i, optional_name) in re.capture_names().enumerate() {
if let Some(name) = optional_name {
names.insert(name.to_string(), i);
}
}
RegexMatcher { re: re, names: names }
}
}
type Result<T> = result::Result<T, NoError>;
impl Matcher for RegexMatcher {
type Captures = RegexCaptures;
type Error = NoError;
fn find_at(&self, haystack: &[u8], at: usize) -> Result<Option<Match>> {
Ok(self
.re
.find_at(haystack, at)
.map(|m| Match::new(m.start(), m.end())))
}
fn new_captures(&self) -> Result<RegexCaptures> {
Ok(RegexCaptures(self.re.capture_locations()))
}
fn captures_at(
&self,
haystack: &[u8],
at: usize,
caps: &mut RegexCaptures,
) -> Result<bool> {
Ok(self.re.captures_read_at(&mut caps.0, haystack, at).is_some())
}
fn capture_count(&self) -> usize {
self.re.captures_len()
}
fn capture_index(&self, name: &str) -> Option<usize> {
self.names.get(name).map(|i| *i)
}
// We purposely don't implement any other methods, so that we test the
// default impls. The "real" Regex impl for Matcher provides a few more
// impls. e.g., Its `find_iter` impl is faster than what we can do here,
// since the regex crate avoids synchronization overhead.
}
#[derive(Debug)]
pub struct RegexMatcherNoCaps(pub Regex);
impl Matcher for RegexMatcherNoCaps {
type Captures = NoCaptures;
type Error = NoError;
fn find_at(&self, haystack: &[u8], at: usize) -> Result<Option<Match>> {
Ok(self
.0
.find_at(haystack, at)
.map(|m| Match::new(m.start(), m.end())))
}
fn new_captures(&self) -> Result<NoCaptures> {
Ok(NoCaptures::new())
}
}
#[derive(Clone, Debug)]
pub struct RegexCaptures(CaptureLocations);
impl Captures for RegexCaptures {
fn len(&self) -> usize {
self.0.len()
}
fn get(&self, i: usize) -> Option<Match> {
self.0.pos(i).map(|(s, e)| Match::new(s, e))
}
}