mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-01-03 05:10:12 +02:00
progress
This commit is contained in:
parent
4ae67a8587
commit
79a51029c1
@ -1,12 +1,10 @@
|
||||
use std::cmp;
|
||||
use std::iter;
|
||||
use std::str;
|
||||
|
||||
use regex::quote;
|
||||
use regex::bytes::Regex;
|
||||
use syntax::{
|
||||
Expr, Literals, Lit,
|
||||
ByteClass, CharClass, Repeater, ClassRange, ByteRange,
|
||||
Repeater,
|
||||
};
|
||||
|
||||
#[derive(Debug)]
|
||||
@ -16,11 +14,6 @@ pub struct LiteralSets {
|
||||
required: Literals,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct LiteralMatcher {
|
||||
re: Regex,
|
||||
}
|
||||
|
||||
impl LiteralSets {
|
||||
pub fn create(expr: &Expr) -> Self {
|
||||
let mut required = Literals::empty();
|
||||
@ -32,7 +25,11 @@ impl LiteralSets {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_matcher(&self) -> Option<LiteralMatcher> {
|
||||
pub fn to_matcher(&self) -> Option<Regex> {
|
||||
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
||||
// When this is true, the regex engine will do a literal scan.
|
||||
return None;
|
||||
}
|
||||
let pre_lcp = self.prefixes.longest_common_prefix();
|
||||
let pre_lcs = self.prefixes.longest_common_suffix();
|
||||
let suf_lcp = self.suffixes.longest_common_prefix();
|
||||
@ -60,8 +57,8 @@ impl LiteralSets {
|
||||
if lit.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let s = str::from_utf8(lit).unwrap();
|
||||
Some(LiteralMatcher { re: Regex::new("e(s)).unwrap() })
|
||||
// Literals always compile.
|
||||
Some(Regex::new(&bytes_to_regex(lit)).unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -74,39 +71,19 @@ fn union_required(expr: &Expr, lits: &mut Literals) {
|
||||
lits.cross_add(s.as_bytes());
|
||||
}
|
||||
Literal { ref chars, casei: true } => {
|
||||
for &c in chars {
|
||||
let cls = CharClass::new(vec![
|
||||
ClassRange { start: c, end: c },
|
||||
]).case_fold();
|
||||
if !lits.add_char_class(&cls) {
|
||||
lits.cut();
|
||||
return;
|
||||
}
|
||||
}
|
||||
lits.cut();
|
||||
}
|
||||
LiteralBytes { ref bytes, casei: false } => {
|
||||
lits.cross_add(bytes);
|
||||
}
|
||||
LiteralBytes { ref bytes, casei: true } => {
|
||||
for &b in bytes {
|
||||
let cls = ByteClass::new(vec![
|
||||
ByteRange { start: b, end: b },
|
||||
]).case_fold();
|
||||
if !lits.add_byte_class(&cls) {
|
||||
lits.cut();
|
||||
return;
|
||||
}
|
||||
}
|
||||
lits.cut();
|
||||
}
|
||||
Class(ref cls) => {
|
||||
if !lits.add_char_class(cls) {
|
||||
lits.cut();
|
||||
}
|
||||
lits.cut();
|
||||
}
|
||||
ClassBytes(ref cls) => {
|
||||
if !lits.add_byte_class(cls) {
|
||||
lits.cut();
|
||||
}
|
||||
lits.cut();
|
||||
}
|
||||
Group { ref e, .. } => {
|
||||
union_required(&**e, lits);
|
||||
@ -212,3 +189,13 @@ fn alternate_literals<F: FnMut(&Expr, &mut Literals)>(
|
||||
lits.add(Lit::new(lcs.to_vec()));
|
||||
}
|
||||
}
|
||||
|
||||
/// Converts an arbitrary sequence of bytes to a literal suitable for building
|
||||
/// a regular expression.
|
||||
fn bytes_to_regex(bs: &[u8]) -> String {
|
||||
let mut s = String::with_capacity(bs.len());
|
||||
for &b in bs {
|
||||
s.push_str(&format!("\\x{:02x}", b));
|
||||
}
|
||||
s
|
||||
}
|
||||
|
55
src/main.rs
55
src/main.rs
@ -20,9 +20,11 @@ use docopt::Docopt;
|
||||
use regex::bytes::Regex;
|
||||
|
||||
use literals::LiteralSets;
|
||||
use search::{LineSearcher, LineSearcherBuilder};
|
||||
|
||||
mod literals;
|
||||
mod nonl;
|
||||
mod search;
|
||||
|
||||
pub type Result<T> = result::Result<T, Box<Error + Send + Sync>>;
|
||||
|
||||
@ -46,20 +48,36 @@ fn main() {
|
||||
}
|
||||
|
||||
fn run(args: &Args) -> Result<u64> {
|
||||
let expr = try!(parse(&args.arg_pattern));
|
||||
let literals = LiteralSets::create(&expr);
|
||||
// println!("{:?}", literals);
|
||||
// println!("{:?}", literals.to_matcher());
|
||||
let re = Regex::new(&expr.to_string()).unwrap();
|
||||
if args.arg_file.is_empty() {
|
||||
let expr = try!(parse(&args.arg_pattern));
|
||||
let literals = LiteralSets::create(&expr);
|
||||
let re = Regex::new(&expr.to_string()).unwrap();
|
||||
let _stdin = io::stdin();
|
||||
let stdin = _stdin.lock();
|
||||
run_by_line(args, &re, stdin)
|
||||
} else {
|
||||
run_mmap(args, &re)
|
||||
let searcher =
|
||||
try!(LineSearcherBuilder::new(&args.arg_pattern).create());
|
||||
run_mmap(args, &searcher)
|
||||
}
|
||||
}
|
||||
|
||||
fn run_mmap(args: &Args, searcher: &LineSearcher) -> Result<u64> {
|
||||
use memmap::{Mmap, Protection};
|
||||
|
||||
assert!(args.arg_file.len() == 1);
|
||||
let mut wtr = io::BufWriter::new(io::stdout());
|
||||
let mut count = 0;
|
||||
let mmap = try!(Mmap::open_path(&args.arg_file[0], Protection::Read));
|
||||
let text = unsafe { mmap.as_slice() };
|
||||
for m in searcher.search(text) {
|
||||
try!(wtr.write(&text[m.start..m.end]));
|
||||
try!(wtr.write(b"\n"));
|
||||
count += 1;
|
||||
}
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
fn run_by_line<B: BufRead>(
|
||||
args: &Args,
|
||||
re: &Regex,
|
||||
@ -84,31 +102,6 @@ fn run_by_line<B: BufRead>(
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
fn run_mmap(args: &Args, re: &Regex) -> Result<u64> {
|
||||
use memchr::{memchr, memrchr};
|
||||
use memmap::{Mmap, Protection};
|
||||
|
||||
assert!(args.arg_file.len() == 1);
|
||||
let mut wtr = io::BufWriter::new(io::stdout());
|
||||
let mut count = 0;
|
||||
let mmap = try!(Mmap::open_path(&args.arg_file[0], Protection::Read));
|
||||
let text = unsafe { mmap.as_slice() };
|
||||
let mut start = 0;
|
||||
while let Some((s, e)) = re.find(&text[start..]) {
|
||||
let (s, e) = (start + s, start + e);
|
||||
let prevnl = memrchr(b'\n', &text[0..s]).map_or(0, |i| i + 1);
|
||||
let nextnl = memchr(b'\n', &text[e..]).map_or(text.len(), |i| e + i);
|
||||
try!(wtr.write(&text[prevnl..nextnl]));
|
||||
try!(wtr.write(b"\n"));
|
||||
start = nextnl + 1;
|
||||
count += 1;
|
||||
if start >= text.len() {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(count)
|
||||
}
|
||||
|
||||
fn parse(re: &str) -> Result<syntax::Expr> {
|
||||
let expr =
|
||||
try!(syntax::ExprBuilder::new()
|
||||
|
164
src/search.rs
Normal file
164
src/search.rs
Normal file
@ -0,0 +1,164 @@
|
||||
use memchr::{memchr, memrchr};
|
||||
use regex::bytes::Regex;
|
||||
use syntax;
|
||||
|
||||
use literals::LiteralSets;
|
||||
use nonl;
|
||||
use Result;
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct LineSearcher {
|
||||
re: Regex,
|
||||
required: Option<Regex>,
|
||||
opts: Options,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct LineSearcherBuilder {
|
||||
pattern: String,
|
||||
opts: Options,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default)]
|
||||
struct Options {
|
||||
case_insensitive: bool,
|
||||
lines: bool,
|
||||
locations: bool,
|
||||
}
|
||||
|
||||
impl LineSearcherBuilder {
|
||||
pub fn new(pattern: &str) -> LineSearcherBuilder {
|
||||
LineSearcherBuilder {
|
||||
pattern: pattern.to_string(),
|
||||
opts: Options::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn case_insensitive(mut self, yes: bool) -> LineSearcherBuilder {
|
||||
self.opts.case_insensitive = yes;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn line_numbers(mut self, yes: bool) -> LineSearcherBuilder {
|
||||
self.opts.lines = yes;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn locations(mut self, yes: bool) -> LineSearcherBuilder {
|
||||
self.opts.locations = yes;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn create(self) -> Result<LineSearcher> {
|
||||
let expr = try!(parse(&self.pattern));
|
||||
let literals = LiteralSets::create(&expr);
|
||||
let pat =
|
||||
if self.opts.case_insensitive {
|
||||
format!("(?i){}", expr)
|
||||
} else {
|
||||
expr.to_string()
|
||||
};
|
||||
// We've already parsed the pattern, so we know it will compiled.
|
||||
let re = Regex::new(&pat).unwrap();
|
||||
Ok(LineSearcher {
|
||||
re: re,
|
||||
required: literals.to_matcher(),
|
||||
opts: self.opts,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl LineSearcher {
|
||||
pub fn search<'b, 's>(&'s self, buf: &'b [u8]) -> Iter<'b, 's> {
|
||||
Iter {
|
||||
searcher: self,
|
||||
buf: buf,
|
||||
start: 0,
|
||||
count: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Match {
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
pub count: usize,
|
||||
pub line: Option<usize>,
|
||||
pub locations: Vec<(usize, usize)>,
|
||||
}
|
||||
|
||||
pub struct Iter<'b, 's> {
|
||||
searcher: &'s LineSearcher,
|
||||
buf: &'b [u8],
|
||||
start: usize,
|
||||
count: usize,
|
||||
}
|
||||
|
||||
impl<'b, 's> Iter<'b, 's> {
|
||||
fn next_line_match(&mut self) -> Option<(usize, usize)> {
|
||||
if self.start >= self.buf.len() {
|
||||
return None;
|
||||
}
|
||||
if let Some(ref req) = self.searcher.required {
|
||||
while self.start < self.buf.len() {
|
||||
let (s, e) = match req.find(&self.buf[self.start..]) {
|
||||
None => return None,
|
||||
Some((s, e)) => (self.start + s, self.start + e),
|
||||
};
|
||||
let (prevnl, nextnl) = self.find_line(s, e);
|
||||
match self.searcher.re.find(&self.buf[prevnl..nextnl]) {
|
||||
None => {
|
||||
self.start = nextnl + 1;
|
||||
continue;
|
||||
}
|
||||
Some(_) => return Some((prevnl, nextnl)),
|
||||
}
|
||||
}
|
||||
None
|
||||
} else {
|
||||
let (s, e) = match self.searcher.re.find(&self.buf[self.start..]) {
|
||||
None => return None,
|
||||
Some((s, e)) => (self.start + s, self.start + e),
|
||||
};
|
||||
Some(self.find_line(s, e))
|
||||
}
|
||||
}
|
||||
|
||||
fn find_line(&self, s: usize, e: usize) -> (usize, usize) {
|
||||
let prevnl =
|
||||
memrchr(b'\n', &self.buf[0..s]).map_or(0, |i| i + 1);
|
||||
let nextnl =
|
||||
memchr(b'\n', &self.buf[e..]).map_or(self.buf.len(), |i| e + i);
|
||||
(prevnl, nextnl)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'b, 's> Iterator for Iter<'b, 's> {
|
||||
type Item = Match;
|
||||
|
||||
fn next(&mut self) -> Option<Match> {
|
||||
let (prevnl, nextnl) = match self.next_line_match() {
|
||||
None => return None,
|
||||
Some((s, e)) => (s, e),
|
||||
};
|
||||
let count = self.count;
|
||||
self.start = nextnl + 1;
|
||||
self.count += 1;
|
||||
Some(Match {
|
||||
start: prevnl,
|
||||
end: nextnl,
|
||||
count: count,
|
||||
line: None,
|
||||
locations: vec![],
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
fn parse(re: &str) -> Result<syntax::Expr> {
|
||||
let expr =
|
||||
try!(syntax::ExprBuilder::new()
|
||||
.allow_bytes(true)
|
||||
.unicode(false)
|
||||
.parse(re));
|
||||
Ok(try!(nonl::remove(expr)))
|
||||
}
|
Loading…
Reference in New Issue
Block a user