1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-01-29 22:01:04 +02:00

ripgrep: remove old code

This commit is contained in:
Andrew Gallant 2018-08-06 10:44:26 -04:00
parent f16f9cedf1
commit acd20a803c
No known key found for this signature in database
GPG Key ID: B2E3A4923F8B0D44
27 changed files with 1374 additions and 20476 deletions

29
Cargo.lock generated
View File

@ -134,12 +134,12 @@ dependencies = [
[[package]]
name = "grep"
version = "0.1.9"
version = "0.2.0"
dependencies = [
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
"grep-matcher 0.0.1",
"grep-printer 0.0.1",
"grep-regex 0.0.1",
"grep-searcher 0.0.1",
]
[[package]]
@ -192,16 +192,6 @@ dependencies = [
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "grep2"
version = "0.2.0"
dependencies = [
"grep-matcher 0.0.1",
"grep-printer 0.0.1",
"grep-regex 0.0.1",
"grep-searcher 0.0.1",
]
[[package]]
name = "ignore"
version = "0.4.3"
@ -339,19 +329,12 @@ name = "ripgrep"
version = "0.9.0"
dependencies = [
"atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"bytecount 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs_io 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"globset 0.4.1",
"grep 0.1.9",
"grep2 0.2.0",
"grep 0.2.0",
"ignore 0.4.3",
"lazy_static 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
"num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",

View File

@ -35,23 +35,15 @@ path = "tests/tests.rs"
members = [
"grep", "globset", "ignore",
"grep-matcher", "grep-printer", "grep-regex", "grep-searcher",
"grep2",
]
[dependencies]
atty = "0.2.11"
bytecount = "0.3.1"
encoding_rs = "0.8"
encoding_rs_io = "0.1"
globset = { version = "0.4.0", path = "globset" }
grep = { version = "0.1.8", path = "grep" }
grep2 = { version = "0.2.0", path = "grep2" }
grep = { version = "0.2.0", path = "grep" }
ignore = { version = "0.4.0", path = "ignore" }
lazy_static = "1"
libc = "0.2"
log = "0.4"
memchr = "2"
memmap = "0.6"
num_cpus = "1"
regex = "1"
same-file = "1"
@ -75,15 +67,8 @@ default-features = false
features = ["suggestions", "color"]
[features]
avx-accel = [
"bytecount/avx-accel",
"grep2/avx-accel",
]
simd-accel = [
"bytecount/simd-accel",
"encoding_rs/simd-accel",
"grep2/simd-accel",
]
avx-accel = ["grep/avx-accel"]
simd-accel = ["grep/simd-accel"]
[profile.release]
debug = true

View File

@ -1,6 +1,6 @@
[package]
name = "grep"
version = "0.1.9" #:version
version = "0.2.0" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Fast line oriented regex searching as a library.
@ -13,7 +13,11 @@ keywords = ["regex", "grep", "egrep", "search", "pattern"]
license = "Unlicense/MIT"
[dependencies]
log = "0.4"
memchr = "2"
regex = "1"
regex-syntax = "0.6"
grep-matcher = { version = "0.0.1", path = "../grep-matcher" }
grep-printer = { version = "0.0.1", path = "../grep-printer" }
grep-regex = { version = "0.0.1", path = "../grep-regex" }
grep-searcher = { version = "0.0.1", path = "../grep-searcher" }
[features]
avx-accel = ["grep-searcher/avx-accel"]
simd-accel = ["grep-searcher/simd-accel"]

File diff suppressed because it is too large Load Diff

View File

@ -1,84 +1,10 @@
#![deny(missing_docs)]
/*!
A fast line oriented regex searcher.
TODO.
*/
#[macro_use]
extern crate log;
extern crate memchr;
extern crate regex;
extern crate regex_syntax as syntax;
#![deny(missing_docs)]
use std::error;
use std::fmt;
use std::result;
pub use search::{Grep, GrepBuilder, Iter, Match};
mod literals;
mod nonl;
mod search;
mod smart_case;
mod word_boundary;
/// Result is a convenient type alias that fixes the type of the error to
/// the `Error` type defined in this crate.
pub type Result<T> = result::Result<T, Error>;
/// Error enumerates the list of possible error conditions when building or
/// using a `Grep` line searcher.
#[derive(Debug)]
pub enum Error {
/// An error from parsing or compiling a regex.
Regex(regex::Error),
/// This error occurs when an illegal literal was found in the regex
/// pattern. For example, if the line terminator is `\n` and the regex
/// pattern is `\w+\n\w+`, then the presence of `\n` will cause this error.
LiteralNotAllowed(char),
/// An unused enum variant that indicates this enum may be expanded in
/// the future and therefore should not be exhaustively matched.
#[doc(hidden)]
__Nonexhaustive,
}
impl error::Error for Error {
fn description(&self) -> &str {
match *self {
Error::Regex(ref err) => err.description(),
Error::LiteralNotAllowed(_) => "use of forbidden literal",
Error::__Nonexhaustive => unreachable!(),
}
}
fn cause(&self) -> Option<&error::Error> {
match *self {
Error::Regex(ref err) => err.cause(),
_ => None,
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::Regex(ref err) => err.fmt(f),
Error::LiteralNotAllowed(chr) => {
write!(f, "Literal {:?} not allowed.", chr)
}
Error::__Nonexhaustive => unreachable!(),
}
}
}
impl From<regex::Error> for Error {
fn from(err: regex::Error) -> Error {
Error::Regex(err)
}
}
impl From<syntax::Error> for Error {
fn from(err: syntax::Error) -> Error {
Error::Regex(regex::Error::Syntax(err.to_string()))
}
}
pub extern crate grep_matcher as matcher;
pub extern crate grep_printer as printer;
pub extern crate grep_regex as regex;
pub extern crate grep_searcher as searcher;

View File

@ -1,274 +0,0 @@
/*!
The literals module is responsible for extracting *inner* literals out of the
AST of a regular expression. Normally this is the job of the regex engine
itself, but the regex engine doesn't look for inner literals. Since we're doing
line based searching, we can use them, so we need to do it ourselves.
Note that this implementation is incredibly suspicious. We need something more
principled.
*/
use std::cmp;
use regex::bytes::RegexBuilder;
use syntax::hir::{self, Hir, HirKind};
use syntax::hir::literal::{Literal, Literals};
#[derive(Clone, Debug)]
pub struct LiteralSets {
prefixes: Literals,
suffixes: Literals,
required: Literals,
}
impl LiteralSets {
pub fn create(expr: &Hir) -> Self {
let mut required = Literals::empty();
union_required(expr, &mut required);
LiteralSets {
prefixes: Literals::prefixes(expr),
suffixes: Literals::suffixes(expr),
required: required,
}
}
pub fn to_regex_builder(&self) -> Option<RegexBuilder> {
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
debug!("literal prefixes detected: {:?}", self.prefixes);
// When this is true, the regex engine will do a literal scan.
return None;
}
// Out of inner required literals, prefixes and suffixes, which one
// is the longest? We pick the longest to do fast literal scan under
// the assumption that a longer literal will have a lower false
// positive rate.
let pre_lcp = self.prefixes.longest_common_prefix();
let pre_lcs = self.prefixes.longest_common_suffix();
let suf_lcp = self.suffixes.longest_common_prefix();
let suf_lcs = self.suffixes.longest_common_suffix();
let req_lits = self.required.literals();
let req = match req_lits.iter().max_by_key(|lit| lit.len()) {
None => &[],
Some(req) => &***req,
};
let mut lit = pre_lcp;
if pre_lcs.len() > lit.len() {
lit = pre_lcs;
}
if suf_lcp.len() > lit.len() {
lit = suf_lcp;
}
if suf_lcs.len() > lit.len() {
lit = suf_lcs;
}
if req_lits.len() == 1 && req.len() > lit.len() {
lit = req;
}
// Special case: if we have any literals that are all whitespace,
// then this is probably a failing of the literal detection since
// whitespace is typically pretty common. In this case, don't bother
// with inner literal scanning at all and just defer to the regex.
let any_all_white = req_lits.iter()
.any(|lit| lit.iter().all(|&b| (b as char).is_whitespace()));
if any_all_white {
return None;
}
// Special case: if we detected an alternation of inner required
// literals and its longest literal is bigger than the longest
// prefix/suffix, then choose the alternation. In practice, this
// helps with case insensitive matching, which can generate lots of
// inner required literals.
let any_empty = req_lits.iter().any(|lit| lit.is_empty());
if req.len() > lit.len() && req_lits.len() > 1 && !any_empty {
debug!("required literals found: {:?}", req_lits);
let alts: Vec<String> =
req_lits.into_iter().map(|x| bytes_to_regex(x)).collect();
let mut builder = RegexBuilder::new(&alts.join("|"));
builder.unicode(false);
Some(builder)
} else if lit.is_empty() {
None
} else {
debug!("required literal found: {:?}", show(lit));
let mut builder = RegexBuilder::new(&bytes_to_regex(&lit));
builder.unicode(false);
Some(builder)
}
}
}
fn union_required(expr: &Hir, lits: &mut Literals) {
match *expr.kind() {
HirKind::Literal(hir::Literal::Unicode(c)) => {
let mut buf = [0u8; 4];
lits.cross_add(c.encode_utf8(&mut buf).as_bytes());
}
HirKind::Literal(hir::Literal::Byte(b)) => {
lits.cross_add(&[b]);
}
HirKind::Class(hir::Class::Unicode(ref cls)) => {
if count_unicode_class(cls) >= 5 || !lits.add_char_class(cls) {
lits.cut();
}
}
HirKind::Class(hir::Class::Bytes(ref cls)) => {
if count_byte_class(cls) >= 5 || !lits.add_byte_class(cls) {
lits.cut();
}
}
HirKind::Group(hir::Group { ref hir, .. }) => {
union_required(&**hir, lits);
}
HirKind::Repetition(ref x) => {
match x.kind {
hir::RepetitionKind::ZeroOrOne => lits.cut(),
hir::RepetitionKind::ZeroOrMore => lits.cut(),
hir::RepetitionKind::OneOrMore => {
union_required(&x.hir, lits);
lits.cut();
}
hir::RepetitionKind::Range(ref rng) => {
let (min, max) = match *rng {
hir::RepetitionRange::Exactly(m) => (m, Some(m)),
hir::RepetitionRange::AtLeast(m) => (m, None),
hir::RepetitionRange::Bounded(m, n) => (m, Some(n)),
};
repeat_range_literals(
&x.hir, min, max, x.greedy, lits, union_required);
}
}
}
HirKind::Concat(ref es) if es.is_empty() => {}
HirKind::Concat(ref es) if es.len() == 1 => {
union_required(&es[0], lits)
}
HirKind::Concat(ref es) => {
for e in es {
let mut lits2 = lits.to_empty();
union_required(e, &mut lits2);
if lits2.is_empty() {
lits.cut();
continue;
}
if lits2.contains_empty() {
lits.cut();
}
if !lits.cross_product(&lits2) {
// If this expression couldn't yield any literal that
// could be extended, then we need to quit. Since we're
// short-circuiting, we also need to freeze every member.
lits.cut();
break;
}
}
}
HirKind::Alternation(ref es) => {
alternate_literals(es, lits, union_required);
}
_ => lits.cut(),
}
}
fn repeat_range_literals<F: FnMut(&Hir, &mut Literals)>(
e: &Hir,
min: u32,
max: Option<u32>,
_greedy: bool,
lits: &mut Literals,
mut f: F,
) {
if min == 0 {
// This is a bit conservative. If `max` is set, then we could
// treat this as a finite set of alternations. For now, we
// just treat it as `e*`.
lits.cut();
} else {
let n = cmp::min(lits.limit_size(), min as usize);
// We only extract literals from a single repetition, even though
// we could do more. e.g., `a{3}` will have `a` extracted instead of
// `aaa`. The reason is that inner literal extraction can't be unioned
// across repetitions. e.g., extracting `foofoofoo` from `(\w+foo){3}`
// is wrong.
f(e, lits);
if n < min as usize {
lits.cut();
}
if max.map_or(true, |max| min < max) {
lits.cut();
}
}
}
fn alternate_literals<F: FnMut(&Hir, &mut Literals)>(
es: &[Hir],
lits: &mut Literals,
mut f: F,
) {
let mut lits2 = lits.to_empty();
for e in es {
let mut lits3 = lits.to_empty();
lits3.set_limit_size(lits.limit_size() / 5);
f(e, &mut lits3);
if lits3.is_empty() || !lits2.union(lits3) {
// If we couldn't find suffixes for *any* of the
// alternates, then the entire alternation has to be thrown
// away and any existing members must be frozen. Similarly,
// if the union couldn't complete, stop and freeze.
lits.cut();
return;
}
}
// All we do at the moment is look for prefixes and suffixes. If both
// are empty, then we report nothing. We should be able to do better than
// this, but we'll need something more expressive than just a "set of
// literals."
let lcp = lits2.longest_common_prefix();
let lcs = lits2.longest_common_suffix();
if !lcp.is_empty() {
lits.cross_add(lcp);
}
lits.cut();
if !lcs.is_empty() {
lits.add(Literal::empty());
lits.add(Literal::new(lcs.to_vec()));
}
}
/// Return the number of characters in the given class.
fn count_unicode_class(cls: &hir::ClassUnicode) -> u32 {
cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum()
}
/// Return the number of bytes in the given class.
fn count_byte_class(cls: &hir::ClassBytes) -> u32 {
cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum()
}
/// Converts an arbitrary sequence of bytes to a literal suitable for building
/// a regular expression.
fn bytes_to_regex(bs: &[u8]) -> String {
let mut s = String::with_capacity(bs.len());
for &b in bs {
s.push_str(&format!("\\x{:02x}", b));
}
s
}
/// Converts arbitrary bytes to a nice string.
fn show(bs: &[u8]) -> String {
// Why aren't we using this to feed to the regex? Doesn't really matter
// I guess. ---AG
use std::ascii::escape_default;
use std::str;
let mut nice = String::new();
for &b in bs {
let part: Vec<u8> = escape_default(b).collect();
nice.push_str(str::from_utf8(&part).unwrap());
}
nice
}

View File

@ -1,74 +0,0 @@
use syntax::hir::{self, Hir, HirKind};
use {Error, Result};
/// Returns a new expression that is guaranteed to never match the given
/// ASCII character.
///
/// If the expression contains the literal byte, then an error is returned.
///
/// If `byte` is not an ASCII character (i.e., greater than `0x7F`), then this
/// function panics.
pub fn remove(expr: Hir, byte: u8) -> Result<Hir> {
assert!(byte <= 0x7F);
let chr = byte as char;
assert!(chr.len_utf8() == 1);
Ok(match expr.into_kind() {
HirKind::Empty => Hir::empty(),
HirKind::Literal(hir::Literal::Unicode(c)) => {
if c == chr {
return Err(Error::LiteralNotAllowed(chr));
}
Hir::literal(hir::Literal::Unicode(c))
}
HirKind::Literal(hir::Literal::Byte(b)) => {
if b as char == chr {
return Err(Error::LiteralNotAllowed(chr));
}
Hir::literal(hir::Literal::Byte(b))
}
HirKind::Class(hir::Class::Unicode(mut cls)) => {
let remove = hir::ClassUnicode::new(Some(
hir::ClassUnicodeRange::new(chr, chr),
));
cls.difference(&remove);
if cls.iter().next().is_none() {
return Err(Error::LiteralNotAllowed(chr));
}
Hir::class(hir::Class::Unicode(cls))
}
HirKind::Class(hir::Class::Bytes(mut cls)) => {
let remove = hir::ClassBytes::new(Some(
hir::ClassBytesRange::new(byte, byte),
));
cls.difference(&remove);
if cls.iter().next().is_none() {
return Err(Error::LiteralNotAllowed(chr));
}
Hir::class(hir::Class::Bytes(cls))
}
HirKind::Anchor(x) => Hir::anchor(x),
HirKind::WordBoundary(x) => Hir::word_boundary(x),
HirKind::Repetition(mut x) => {
x.hir = Box::new(remove(*x.hir, byte)?);
Hir::repetition(x)
}
HirKind::Group(mut x) => {
x.hir = Box::new(remove(*x.hir, byte)?);
Hir::group(x)
}
HirKind::Concat(xs) => {
let xs = xs.into_iter()
.map(|e| remove(e, byte))
.collect::<Result<Vec<Hir>>>()?;
Hir::concat(xs)
}
HirKind::Alternation(xs) => {
let xs = xs.into_iter()
.map(|e| remove(e, byte))
.collect::<Result<Vec<Hir>>>()?;
Hir::alternation(xs)
}
})
}

View File

@ -1,356 +0,0 @@
use memchr::{memchr, memrchr};
use syntax::ParserBuilder;
use syntax::hir::Hir;
use regex::bytes::{Regex, RegexBuilder};
use literals::LiteralSets;
use nonl;
use smart_case::Cased;
use word_boundary::strip_unicode_word_boundaries;
use Result;
/// A matched line.
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct Match {
start: usize,
end: usize,
}
impl Match {
/// Create a new empty match value.
pub fn new() -> Match {
Match::default()
}
/// Return the starting byte offset of the line that matched.
#[inline]
pub fn start(&self) -> usize {
self.start
}
/// Return the ending byte offset of the line that matched.
#[inline]
pub fn end(&self) -> usize {
self.end
}
}
/// A fast line oriented regex searcher.
#[derive(Clone, Debug)]
pub struct Grep {
re: Regex,
required: Option<Regex>,
opts: Options,
}
/// A builder for a grep searcher.
#[derive(Clone, Debug)]
pub struct GrepBuilder {
pattern: String,
opts: Options,
}
#[derive(Clone, Debug)]
struct Options {
case_insensitive: bool,
case_smart: bool,
line_terminator: u8,
size_limit: usize,
dfa_size_limit: usize,
}
impl Default for Options {
fn default() -> Options {
Options {
case_insensitive: false,
case_smart: false,
line_terminator: b'\n',
size_limit: 10 * (1 << 20),
dfa_size_limit: 10 * (1 << 20),
}
}
}
impl GrepBuilder {
/// Create a new builder for line searching.
///
/// The pattern given should be a regular expression. The precise syntax
/// supported is documented on the regex crate.
pub fn new(pattern: &str) -> GrepBuilder {
GrepBuilder {
pattern: pattern.to_string(),
opts: Options::default(),
}
}
/// Set the line terminator.
///
/// The line terminator can be any ASCII character and serves to delineate
/// the match boundaries in the text searched.
///
/// This panics if `ascii_byte` is greater than `0x7F` (i.e., not ASCII).
pub fn line_terminator(mut self, ascii_byte: u8) -> GrepBuilder {
assert!(ascii_byte <= 0x7F);
self.opts.line_terminator = ascii_byte;
self
}
/// Set the case sensitive flag (`i`) on the regex.
pub fn case_insensitive(mut self, yes: bool) -> GrepBuilder {
self.opts.case_insensitive = yes;
self
}
/// Whether to enable smart case search or not (disabled by default).
///
/// Smart case uses case insensitive search if the pattern contains only
/// lowercase characters (ignoring any characters which immediately follow
/// a '\'). Otherwise, a case sensitive search is used instead.
///
/// Enabling the case_insensitive flag overrides this.
pub fn case_smart(mut self, yes: bool) -> GrepBuilder {
self.opts.case_smart = yes;
self
}
/// Set the approximate size limit of the compiled regular expression.
///
/// This roughly corresponds to the number of bytes occupied by a
/// single compiled program. If the program exceeds this number, then a
/// compilation error is returned.
pub fn size_limit(mut self, limit: usize) -> GrepBuilder {
self.opts.size_limit = limit;
self
}
/// Set the approximate size of the cache used by the DFA.
///
/// This roughly corresponds to the number of bytes that the DFA will use
/// while searching.
///
/// Note that this is a per thread limit. There is no way to set a global
/// limit. In particular, if a regex is used from multiple threads
/// simulanteously, then each thread may use up to the number of bytes
/// specified here.
pub fn dfa_size_limit(mut self, limit: usize) -> GrepBuilder {
self.opts.dfa_size_limit = limit;
self
}
/// Create a line searcher.
///
/// If there was a problem parsing or compiling the regex with the given
/// options, then an error is returned.
pub fn build(self) -> Result<Grep> {
let expr = self.parse()?;
let literals = LiteralSets::create(&expr);
let re = self.regex(&expr)?;
let required = match literals.to_regex_builder() {
Some(builder) => Some(self.regex_build(builder)?),
None => {
match strip_unicode_word_boundaries(&expr) {
None => None,
Some(expr) => {
debug!("Stripped Unicode word boundaries. \
New AST:\n{:?}", expr);
self.regex(&expr).ok()
}
}
}
};
Ok(Grep {
re: re,
required: required,
opts: self.opts,
})
}
/// Creates a new regex from the given expression with the current
/// configuration.
fn regex(&self, expr: &Hir) -> Result<Regex> {
let mut builder = RegexBuilder::new(&expr.to_string());
builder.unicode(true);
self.regex_build(builder)
}
/// Builds a new regex from the given builder using the caller's settings.
fn regex_build(&self, mut builder: RegexBuilder) -> Result<Regex> {
builder
.multi_line(true)
.size_limit(self.opts.size_limit)
.dfa_size_limit(self.opts.dfa_size_limit)
.build()
.map_err(From::from)
}
/// Parses the underlying pattern and ensures the pattern can never match
/// the line terminator.
fn parse(&self) -> Result<Hir> {
let expr = ParserBuilder::new()
.allow_invalid_utf8(true)
.case_insensitive(self.is_case_insensitive()?)
.multi_line(true)
.build()
.parse(&self.pattern)?;
debug!("original regex HIR pattern:\n{}", expr);
let expr = nonl::remove(expr, self.opts.line_terminator)?;
debug!("transformed regex HIR pattern:\n{}", expr);
Ok(expr)
}
/// Determines whether the case insensitive flag should be enabled or not.
fn is_case_insensitive(&self) -> Result<bool> {
if self.opts.case_insensitive {
return Ok(true);
}
if !self.opts.case_smart {
return Ok(false);
}
let cased = match Cased::from_pattern(&self.pattern) {
None => return Ok(false),
Some(cased) => cased,
};
Ok(cased.any_literal && !cased.any_uppercase)
}
}
impl Grep {
/// Returns a reference to the underlying regex used by the searcher.
pub fn regex(&self) -> &Regex {
&self.re
}
/// Returns an iterator over all matches in the given buffer.
pub fn iter<'b, 's>(&'s self, buf: &'b [u8]) -> Iter<'b, 's> {
Iter {
searcher: self,
buf: buf,
start: 0,
}
}
/// Fills in the next line that matches in the given buffer starting at
/// the position given.
///
/// If no match could be found, `false` is returned, otherwise, `true` is
/// returned.
pub fn read_match(
&self,
mat: &mut Match,
buf: &[u8],
mut start: usize,
) -> bool {
if start >= buf.len() {
return false;
}
if let Some(ref req) = self.required {
while start < buf.len() {
let e = match req.shortest_match(&buf[start..]) {
None => return false,
Some(e) => start + e,
};
let (prevnl, nextnl) = self.find_line(buf, e, e);
match self.re.shortest_match(&buf[prevnl..nextnl]) {
None => {
start = nextnl;
continue;
}
Some(_) => {
self.fill_match(mat, prevnl, nextnl);
return true;
}
}
}
false
} else {
let e = match self.re.shortest_match(&buf[start..]) {
None => return false,
Some(e) => start + e,
};
let (s, e) = self.find_line(buf, e, e);
self.fill_match(mat, s, e);
true
}
}
fn fill_match(&self, mat: &mut Match, start: usize, end: usize) {
mat.start = start;
mat.end = end;
}
fn find_line(&self, buf: &[u8], s: usize, e: usize) -> (usize, usize) {
(self.find_line_start(buf, s), self.find_line_end(buf, e))
}
fn find_line_start(&self, buf: &[u8], pos: usize) -> usize {
memrchr(self.opts.line_terminator, &buf[0..pos]).map_or(0, |i| i + 1)
}
fn find_line_end(&self, buf: &[u8], pos: usize) -> usize {
memchr(self.opts.line_terminator, &buf[pos..])
.map_or(buf.len(), |i| pos + i + 1)
}
}
/// An iterator over all matches in a particular buffer.
///
/// `'b` refers to the lifetime of the buffer, and `'s` refers to the lifetime
/// of the searcher.
pub struct Iter<'b, 's> {
searcher: &'s Grep,
buf: &'b [u8],
start: usize,
}
impl<'b, 's> Iterator for Iter<'b, 's> {
type Item = Match;
fn next(&mut self) -> Option<Match> {
let mut mat = Match::default();
if !self.searcher.read_match(&mut mat, self.buf, self.start) {
self.start = self.buf.len();
return None;
}
self.start = mat.end;
Some(mat)
}
}
#[cfg(test)]
mod tests {
use memchr::{memchr, memrchr};
use regex::bytes::Regex;
use super::{GrepBuilder, Match};
static SHERLOCK: &'static [u8] = include_bytes!("./data/sherlock.txt");
fn find_lines(pat: &str, haystack: &[u8]) -> Vec<Match> {
let re = Regex::new(pat).unwrap();
let mut lines = vec![];
for m in re.find_iter(haystack) {
let start = memrchr(b'\n', &haystack[..m.start()])
.map_or(0, |i| i + 1);
let end = memchr(b'\n', &haystack[m.end()..])
.map_or(haystack.len(), |i| m.end() + i + 1);
lines.push(Match {
start: start,
end: end,
});
}
lines
}
fn grep_lines(pat: &str, haystack: &[u8]) -> Vec<Match> {
let g = GrepBuilder::new(pat).build().unwrap();
g.iter(haystack).collect()
}
#[test]
fn buffered_literal() {
let expected = find_lines("Sherlock Holmes", SHERLOCK);
let got = grep_lines("Sherlock Holmes", SHERLOCK);
assert_eq!(expected.len(), got.len());
assert_eq!(expected, got);
}
}

View File

@ -1,191 +0,0 @@
use syntax::ast::{self, Ast};
use syntax::ast::parse::Parser;
/// The results of analyzing a regex for cased literals.
#[derive(Clone, Debug, Default)]
pub struct Cased {
/// True if and only if a literal uppercase character occurs in the regex.
///
/// A regex like `\pL` contains no uppercase literals, even though `L`
/// is uppercase and the `\pL` class contains uppercase characters.
pub any_uppercase: bool,
/// True if and only if the regex contains any literal at all. A regex like
/// `\pL` has this set to false.
pub any_literal: bool,
}
impl Cased {
/// Returns a `Cased` value by doing analysis on the AST of `pattern`.
///
/// If `pattern` is not a valid regular expression, then `None` is
/// returned.
pub fn from_pattern(pattern: &str) -> Option<Cased> {
Parser::new()
.parse(pattern)
.map(|ast| Cased::from_ast(&ast))
.ok()
}
fn from_ast(ast: &Ast) -> Cased {
let mut cased = Cased::default();
cased.from_ast_impl(ast);
cased
}
fn from_ast_impl(&mut self, ast: &Ast) {
if self.done() {
return;
}
match *ast {
Ast::Empty(_)
| Ast::Flags(_)
| Ast::Dot(_)
| Ast::Assertion(_)
| Ast::Class(ast::Class::Unicode(_))
| Ast::Class(ast::Class::Perl(_)) => {}
Ast::Literal(ref x) => {
self.from_ast_literal(x);
}
Ast::Class(ast::Class::Bracketed(ref x)) => {
self.from_ast_class_set(&x.kind);
}
Ast::Repetition(ref x) => {
self.from_ast_impl(&x.ast);
}
Ast::Group(ref x) => {
self.from_ast_impl(&x.ast);
}
Ast::Alternation(ref alt) => {
for x in &alt.asts {
self.from_ast_impl(x);
}
}
Ast::Concat(ref alt) => {
for x in &alt.asts {
self.from_ast_impl(x);
}
}
}
}
fn from_ast_class_set(&mut self, ast: &ast::ClassSet) {
if self.done() {
return;
}
match *ast {
ast::ClassSet::Item(ref item) => {
self.from_ast_class_set_item(item);
}
ast::ClassSet::BinaryOp(ref x) => {
self.from_ast_class_set(&x.lhs);
self.from_ast_class_set(&x.rhs);
}
}
}
fn from_ast_class_set_item(&mut self, ast: &ast::ClassSetItem) {
if self.done() {
return;
}
match *ast {
ast::ClassSetItem::Empty(_)
| ast::ClassSetItem::Ascii(_)
| ast::ClassSetItem::Unicode(_)
| ast::ClassSetItem::Perl(_) => {}
ast::ClassSetItem::Literal(ref x) => {
self.from_ast_literal(x);
}
ast::ClassSetItem::Range(ref x) => {
self.from_ast_literal(&x.start);
self.from_ast_literal(&x.end);
}
ast::ClassSetItem::Bracketed(ref x) => {
self.from_ast_class_set(&x.kind);
}
ast::ClassSetItem::Union(ref union) => {
for x in &union.items {
self.from_ast_class_set_item(x);
}
}
}
}
fn from_ast_literal(&mut self, ast: &ast::Literal) {
self.any_literal = true;
self.any_uppercase = self.any_uppercase || ast.c.is_uppercase();
}
/// Returns true if and only if the attributes can never change no matter
/// what other AST it might see.
fn done(&self) -> bool {
self.any_uppercase && self.any_literal
}
}
#[cfg(test)]
mod tests {
use super::*;
fn cased(pattern: &str) -> Cased {
Cased::from_pattern(pattern).unwrap()
}
#[test]
fn various() {
let x = cased("");
assert!(!x.any_uppercase);
assert!(!x.any_literal);
let x = cased("foo");
assert!(!x.any_uppercase);
assert!(x.any_literal);
let x = cased("Foo");
assert!(x.any_uppercase);
assert!(x.any_literal);
let x = cased("foO");
assert!(x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo\\");
assert!(!x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo\w");
assert!(!x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo\S");
assert!(!x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo\p{Ll}");
assert!(!x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo[a-z]");
assert!(!x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo[A-Z]");
assert!(x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo[\S\t]");
assert!(!x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo\\S");
assert!(x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"\p{Ll}");
assert!(!x.any_uppercase);
assert!(!x.any_literal);
let x = cased(r"aBc\w");
assert!(x.any_uppercase);
assert!(x.any_literal);
}
}

View File

@ -1,53 +0,0 @@
use syntax::hir::{self, Hir, HirKind};
/// Strips Unicode word boundaries from the given expression.
///
/// The key invariant this maintains is that the expression returned will match
/// *at least* every where the expression given will match. Namely, a match of
/// the returned expression can report false positives but it will never report
/// false negatives.
///
/// If no word boundaries could be stripped, then None is returned.
pub fn strip_unicode_word_boundaries(expr: &Hir) -> Option<Hir> {
// The real reason we do this is because Unicode word boundaries are the
// one thing that Rust's regex DFA engine can't handle. When it sees a
// Unicode word boundary among non-ASCII text, it falls back to one of the
// slower engines. We work around this limitation by attempting to use
// a regex to find candidate matches without a Unicode word boundary. We'll
// only then use the full (and slower) regex to confirm a candidate as a
// match or not during search.
//
// It looks like we only check the outer edges for `\b`? I guess this is
// an attempt to optimize for the `-w/--word-regexp` flag? ---AG
match *expr.kind() {
HirKind::Concat(ref es) if !es.is_empty() => {
let first = is_unicode_word_boundary(&es[0]);
let last = is_unicode_word_boundary(es.last().unwrap());
// Be careful not to strip word boundaries if there are no other
// expressions to match.
match (first, last) {
(true, false) if es.len() > 1 => {
Some(Hir::concat(es[1..].to_vec()))
}
(false, true) if es.len() > 1 => {
Some(Hir::concat(es[..es.len() - 1].to_vec()))
}
(true, true) if es.len() > 2 => {
Some(Hir::concat(es[1..es.len() - 1].to_vec()))
}
_ => None,
}
}
_ => None,
}
}
/// Returns true if the given expression is a Unicode word boundary.
fn is_unicode_word_boundary(expr: &Hir) -> bool {
match *expr.kind() {
HirKind::WordBoundary(hir::WordBoundary::Unicode) => true,
HirKind::WordBoundary(hir::WordBoundary::UnicodeNegate) => true,
HirKind::Group(ref x) => is_unicode_word_boundary(&x.hir),
_ => false,
}
}

View File

@ -1,3 +0,0 @@
This project is dual-licensed under the Unlicense and MIT licenses.
You may use this code under the terms of either license.

View File

@ -1,23 +0,0 @@
[package]
name = "grep2"
version = "0.2.0" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Fast line oriented regex searching as a library.
"""
documentation = "http://burntsushi.net/rustdoc/grep/"
homepage = "https://github.com/BurntSushi/ripgrep"
repository = "https://github.com/BurntSushi/ripgrep"
readme = "README.md"
keywords = ["regex", "grep", "egrep", "search", "pattern"]
license = "Unlicense/MIT"
[dependencies]
grep-matcher = { version = "0.0.1", path = "../grep-matcher" }
grep-printer = { version = "0.0.1", path = "../grep-printer" }
grep-regex = { version = "0.0.1", path = "../grep-regex" }
grep-searcher = { version = "0.0.1", path = "../grep-searcher" }
[features]
avx-accel = ["grep-searcher/avx-accel"]
simd-accel = ["grep-searcher/simd-accel"]

View File

@ -1,21 +0,0 @@
The MIT License (MIT)
Copyright (c) 2015 Andrew Gallant
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@ -1,4 +0,0 @@
grep
----
This is a *library* that provides grep-style line-by-line regex searching (with
comparable performance to `grep` itself).

View File

@ -1,24 +0,0 @@
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org/>

View File

@ -1,10 +0,0 @@
/*!
TODO.
*/
#![deny(missing_docs)]
pub extern crate grep_matcher as matcher;
pub extern crate grep_printer as printer;
pub extern crate grep_regex as regex;
pub extern crate grep_searcher as searcher;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,22 +1,13 @@
#![allow(dead_code, unused_imports, unused_mut, unused_variables)]
extern crate atty;
extern crate bytecount;
#[macro_use]
extern crate clap;
extern crate encoding_rs;
extern crate encoding_rs_io;
extern crate globset;
extern crate grep;
extern crate grep2;
extern crate ignore;
#[macro_use]
extern crate lazy_static;
extern crate libc;
#[macro_use]
extern crate log;
extern crate memchr;
extern crate memmap;
extern crate num_cpus;
extern crate regex;
extern crate same_file;
@ -24,416 +15,279 @@ extern crate termcolor;
#[cfg(windows)]
extern crate winapi;
use std::error::Error;
use std::io;
use std::process;
use std::result;
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::mpsc;
use std::thread;
use std::time::{Duration, Instant};
use std::sync::{Arc, Mutex};
use std::time::Instant;
use ignore::WalkState;
use args::Args;
use worker::Work;
macro_rules! errored {
($($tt:tt)*) => {
return Err(From::from(format!($($tt)*)));
}
}
use subject::Subject;
#[macro_use]
mod messages;
mod app;
mod args;
mod args2;
mod config;
mod decompressor;
mod preprocessor;
mod logger;
mod main2;
mod path_printer;
mod pathutil;
mod printer;
mod search;
mod search_buffer;
mod search_stream;
mod subject;
mod unescape;
mod worker;
pub type Result<T> = result::Result<T, Box<Error>>;
pub type Result<T> = ::std::result::Result<T, Box<::std::error::Error>>;
fn main() {
main2::main2();
// reset_sigpipe();
// match Args::parse().map(Arc::new).and_then(run) {
// Ok(0) => process::exit(1),
// Ok(_) => process::exit(0),
// Err(err) => {
// eprintln!("{}", err);
// process::exit(2);
// }
// }
}
fn run(args: Arc<Args>) -> Result<u64> {
if args.never_match() {
return Ok(0);
pub fn main() {
match Args::parse().and_then(run) {
Ok(true) => process::exit(0),
Ok(false) => process::exit(1),
Err(err) => {
eprintln!("{}", err);
process::exit(2);
}
let threads = args.threads();
if args.files() {
if threads == 1 || args.is_one_path() {
run_files_one_thread(&args)
} else {
run_files_parallel(args)
}
} else if args.type_list() {
run_types(&args)
} else if threads == 1 || args.is_one_path() {
run_one_thread(&args)
} else {
run_parallel(&args)
}
}
fn run_parallel(args: &Arc<Args>) -> Result<u64> {
let start_time = Instant::now();
let bufwtr = Arc::new(args.buffer_writer());
let quiet_matched = args.quiet_matched();
let paths_searched = Arc::new(AtomicUsize::new(0));
let match_line_count = Arc::new(AtomicUsize::new(0));
let paths_matched = Arc::new(AtomicUsize::new(0));
fn run(args: Args) -> Result<bool> {
use args::Command::*;
args.walker_parallel().run(|| {
let args = Arc::clone(args);
let quiet_matched = quiet_matched.clone();
let paths_searched = paths_searched.clone();
let match_line_count = match_line_count.clone();
let paths_matched = paths_matched.clone();
match args.command()? {
Search => search(args),
SearchParallel => search_parallel(args),
SearchNever => Ok(false),
Files => files(args),
FilesParallel => files_parallel(args),
Types => types(args),
}
}
/// The top-level entry point for single-threaded search. This recursively
/// steps through the file list (current directory by default) and searches
/// each file sequentially.
fn search(args: Args) -> Result<bool> {
let started_at = Instant::now();
let quit_after_match = args.quit_after_match()?;
let subject_builder = args.subject_builder();
let mut stats = args.stats()?;
let mut searcher = args.search_worker(args.stdout())?;
let mut matched = false;
for result in args.walker()? {
let subject = match subject_builder.build_from_result(result) {
Some(subject) => subject,
None => continue,
};
let search_result = match searcher.search(&subject) {
Ok(search_result) => search_result,
Err(err) => {
// A broken pipe means graceful termination.
if err.kind() == io::ErrorKind::BrokenPipe {
break;
}
message!("{}: {}", subject.path().display(), err);
continue;
}
};
matched = matched || search_result.has_match();
if let Some(ref mut stats) = stats {
*stats += search_result.stats().unwrap();
}
if matched && quit_after_match {
break;
}
}
if let Some(ref stats) = stats {
let elapsed = Instant::now().duration_since(started_at);
// We don't care if we couldn't print this successfully.
let _ = searcher.printer().print_stats(elapsed, stats);
}
Ok(matched)
}
/// The top-level entry point for multi-threaded search. The parallelism is
/// itself achieved by the recursive directory traversal. All we need to do is
/// feed it a worker for performing a search on each file.
fn search_parallel(args: Args) -> Result<bool> {
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering::SeqCst;
let quit_after_match = args.quit_after_match()?;
let started_at = Instant::now();
let subject_builder = Arc::new(args.subject_builder());
let bufwtr = Arc::new(args.buffer_writer()?);
let stats = Arc::new(args.stats()?.map(Mutex::new));
let matched = Arc::new(AtomicBool::new(false));
let mut searcher_err = None;
args.walker_parallel()?.run(|| {
let args = args.clone();
let bufwtr = Arc::clone(&bufwtr);
let mut buf = bufwtr.buffer();
let mut worker = args.worker();
Box::new(move |result| {
use ignore::WalkState::*;
let stats = Arc::clone(&stats);
let matched = Arc::clone(&matched);
let subject_builder = Arc::clone(&subject_builder);
let mut searcher = match args.search_worker(bufwtr.buffer()) {
Ok(searcher) => searcher,
Err(err) => {
searcher_err = Some(err);
return Box::new(move |_| {
WalkState::Quit
});
}
};
if quiet_matched.has_match() {
return Quit;
}
let dent = match get_or_log_dir_entry(
result,
args.stdout_handle(),
args.files(),
) {
None => return Continue,
Some(dent) => dent,
Box::new(move |result| {
let subject = match subject_builder.build_from_result(result) {
Some(subject) => subject,
None => return WalkState::Continue,
};
paths_searched.fetch_add(1, Ordering::SeqCst);
buf.clear();
{
// This block actually executes the search and prints the
// results into outbuf.
let mut printer = args.printer(&mut buf);
let count =
if dent.is_stdin() {
worker.run(&mut printer, Work::Stdin)
searcher.printer().get_mut().clear();
let search_result = match searcher.search(&subject) {
Ok(search_result) => search_result,
Err(err) => {
message!("{}: {}", subject.path().display(), err);
return WalkState::Continue;
}
};
if search_result.has_match() {
matched.store(true, SeqCst);
}
if let Some(ref locked_stats) = *stats {
let mut stats = locked_stats.lock().unwrap();
*stats += search_result.stats().unwrap();
}
if let Err(err) = bufwtr.print(searcher.printer().get_mut()) {
// A broken pipe means graceful termination.
if err.kind() == io::ErrorKind::BrokenPipe {
return WalkState::Quit;
}
// Otherwise, we continue on our merry way.
message!("{}: {}", subject.path().display(), err);
}
if matched.load(SeqCst) && quit_after_match {
WalkState::Quit
} else {
worker.run(&mut printer, Work::DirEntry(dent))
};
match_line_count.fetch_add(count as usize, Ordering::SeqCst);
if quiet_matched.set_match(count > 0) {
return Quit;
WalkState::Continue
}
if args.stats() && count > 0 {
paths_matched.fetch_add(1, Ordering::SeqCst);
}
}
// BUG(burntsushi): We should handle this error instead of ignoring
// it. See: https://github.com/BurntSushi/ripgrep/issues/200
let _ = bufwtr.print(&buf);
Continue
})
});
if paths_searched.load(Ordering::SeqCst) == 0 {
eprint_nothing_searched();
if let Some(err) = searcher_err.take() {
return Err(err);
}
let match_line_count = match_line_count.load(Ordering::SeqCst) as u64;
let paths_searched = paths_searched.load(Ordering::SeqCst) as u64;
let paths_matched = paths_matched.load(Ordering::SeqCst) as u64;
if args.stats() {
print_stats(
match_line_count,
paths_searched,
paths_matched,
start_time.elapsed(),
);
if let Some(ref locked_stats) = *stats {
let elapsed = Instant::now().duration_since(started_at);
let stats = locked_stats.lock().unwrap();
let mut searcher = args.search_worker(args.stdout())?;
// We don't care if we couldn't print this successfully.
let _ = searcher.printer().print_stats(elapsed, &stats);
}
Ok(match_line_count)
Ok(matched.load(SeqCst))
}
fn run_one_thread(args: &Arc<Args>) -> Result<u64> {
let start_time = Instant::now();
let mut stdout = args.stdout();
let mut worker = args.worker();
let mut paths_searched: u64 = 0;
let mut match_line_count = 0;
let mut paths_matched: u64 = 0;
for result in args.walker() {
let dent = match get_or_log_dir_entry(
result,
args.stdout_handle(),
args.files(),
) {
/// The top-level entry point for listing files without searching them. This
/// recursively steps through the file list (current directory by default) and
/// prints each path sequentially using a single thread.
fn files(args: Args) -> Result<bool> {
let quit_after_match = args.quit_after_match()?;
let subject_builder = args.subject_builder();
let mut matched = false;
let mut path_printer = args.path_printer(args.stdout())?;
for result in args.walker()? {
let subject = match subject_builder.build_from_result(result) {
Some(subject) => subject,
None => continue,
Some(dent) => dent,
};
let mut printer = args.printer(&mut stdout);
if match_line_count > 0 {
if args.quiet() {
matched = true;
if quit_after_match {
break;
}
if let Some(sep) = args.file_separator() {
printer = printer.file_separator(sep);
if let Err(err) = path_printer.write_path(subject.path()) {
// A broken pipe means graceful termination.
if err.kind() == io::ErrorKind::BrokenPipe {
break;
}
// Otherwise, we have some other error that's preventing us from
// writing to stdout, so we should bubble it up.
return Err(err.into());
}
}
paths_searched += 1;
let count =
if dent.is_stdin() {
worker.run(&mut printer, Work::Stdin)
} else {
worker.run(&mut printer, Work::DirEntry(dent))
};
match_line_count += count;
if args.stats() && count > 0 {
paths_matched += 1;
}
}
if paths_searched == 0 {
eprint_nothing_searched();
}
if args.stats() {
print_stats(
match_line_count,
paths_searched,
paths_matched,
start_time.elapsed(),
);
}
Ok(match_line_count)
Ok(matched)
}
fn run_files_parallel(args: Arc<Args>) -> Result<u64> {
let print_args = Arc::clone(&args);
let (tx, rx) = mpsc::channel::<ignore::DirEntry>();
let print_thread = thread::spawn(move || {
let mut printer = print_args.printer(print_args.stdout());
let mut file_count = 0;
for dent in rx.iter() {
if !print_args.quiet() {
printer.path(dent.path());
/// The top-level entry point for listing files without searching them. This
/// recursively steps through the file list (current directory by default) and
/// prints each path sequentially using multiple threads.
fn files_parallel(args: Args) -> Result<bool> {
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering::SeqCst;
use std::sync::mpsc;
use std::thread;
let quit_after_match = args.quit_after_match()?;
let subject_builder = Arc::new(args.subject_builder());
let mut path_printer = args.path_printer(args.stdout())?;
let matched = Arc::new(AtomicBool::new(false));
let (tx, rx) = mpsc::channel::<Subject>();
let print_thread = thread::spawn(move || -> io::Result<()> {
for subject in rx.iter() {
path_printer.write_path(subject.path())?;
}
file_count += 1;
}
file_count
Ok(())
});
args.walker_parallel().run(move || {
let args = Arc::clone(&args);
args.walker_parallel()?.run(|| {
let subject_builder = Arc::clone(&subject_builder);
let matched = Arc::clone(&matched);
let tx = tx.clone();
Box::new(move |result| {
if let Some(dent) = get_or_log_dir_entry(
result,
args.stdout_handle(),
args.files(),
) {
tx.send(dent).unwrap();
if args.quiet() {
return ignore::WalkState::Quit
let subject = match subject_builder.build_from_result(result) {
Some(subject) => subject,
None => return WalkState::Continue,
};
matched.store(true, SeqCst);
if quit_after_match {
WalkState::Quit
} else {
match tx.send(subject) {
Ok(_) => WalkState::Continue,
Err(_) => WalkState::Quit,
}
}
ignore::WalkState::Continue
})
});
Ok(print_thread.join().unwrap())
}
fn run_files_one_thread(args: &Arc<Args>) -> Result<u64> {
let mut printer = args.printer(args.stdout());
let mut file_count = 0;
for result in args.walker() {
let dent = match get_or_log_dir_entry(
result,
args.stdout_handle(),
args.files(),
) {
None => continue,
Some(dent) => dent,
};
file_count += 1;
if args.quiet() {
break;
} else {
printer.path(dent.path());
drop(tx);
if let Err(err) = print_thread.join().unwrap() {
// A broken pipe means graceful termination, so fall through.
// Otherwise, something bad happened while writing to stdout, so bubble
// it up.
if err.kind() != io::ErrorKind::BrokenPipe {
return Err(err.into());
}
}
Ok(file_count)
Ok(matched.load(SeqCst))
}
fn run_types(args: &Arc<Args>) -> Result<u64> {
let mut printer = args.printer(args.stdout());
let mut ty_count = 0;
for def in args.type_defs() {
printer.type_def(def);
ty_count += 1;
/// The top-level entry point for --type-list.
fn types(args: Args) -> Result<bool> {
let mut count = 0;
let mut stdout = args.stdout();
for def in args.type_defs()? {
count += 1;
stdout.write_all(def.name().as_bytes())?;
stdout.write_all(b": ")?;
let mut first = true;
for glob in def.globs() {
if !first {
stdout.write_all(b", ")?;
}
Ok(ty_count)
}
fn get_or_log_dir_entry(
result: result::Result<ignore::DirEntry, ignore::Error>,
stdout_handle: Option<&same_file::Handle>,
files_only: bool,
) -> Option<ignore::DirEntry> {
match result {
Err(err) => {
message!("{}", err);
None
}
Ok(dent) => {
if let Some(err) = dent.error() {
ignore_message!("{}", err);
}
if dent.file_type().is_none() {
return Some(dent); // entry is stdin
}
// A depth of 0 means the user gave the path explicitly, so we
// should always try to search it.
if dent.depth() == 0 && !ignore_entry_is_dir(&dent) {
return Some(dent);
} else if !ignore_entry_is_file(&dent) {
return None;
}
// If we are redirecting stdout to a file, then don't search that
// file.
if !files_only && is_stdout_file(&dent, stdout_handle) {
return None;
}
Some(dent)
stdout.write_all(glob.as_bytes())?;
first = false;
}
stdout.write_all(b"\n")?;
}
}
/// Returns true if and only if the given `ignore::DirEntry` points to a
/// directory.
///
/// This works around a bug in Rust's standard library:
/// https://github.com/rust-lang/rust/issues/46484
#[cfg(windows)]
fn ignore_entry_is_dir(dent: &ignore::DirEntry) -> bool {
use std::os::windows::fs::MetadataExt;
use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY;
dent.metadata().map(|md| {
md.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0
}).unwrap_or(false)
}
/// Returns true if and only if the given `ignore::DirEntry` points to a
/// directory.
#[cfg(not(windows))]
fn ignore_entry_is_dir(dent: &ignore::DirEntry) -> bool {
dent.file_type().map_or(false, |ft| ft.is_dir())
}
/// Returns true if and only if the given `ignore::DirEntry` points to a
/// file.
///
/// This works around a bug in Rust's standard library:
/// https://github.com/rust-lang/rust/issues/46484
#[cfg(windows)]
fn ignore_entry_is_file(dent: &ignore::DirEntry) -> bool {
!ignore_entry_is_dir(dent)
}
/// Returns true if and only if the given `ignore::DirEntry` points to a
/// file.
#[cfg(not(windows))]
fn ignore_entry_is_file(dent: &ignore::DirEntry) -> bool {
dent.file_type().map_or(false, |ft| ft.is_file())
}
fn is_stdout_file(
dent: &ignore::DirEntry,
stdout_handle: Option<&same_file::Handle>,
) -> bool {
let stdout_handle = match stdout_handle {
None => return false,
Some(stdout_handle) => stdout_handle,
};
// If we know for sure that these two things aren't equal, then avoid
// the costly extra stat call to determine equality.
if !maybe_dent_eq_handle(dent, stdout_handle) {
return false;
}
match same_file::Handle::from_path(dent.path()) {
Ok(h) => stdout_handle == &h,
Err(err) => {
message!("{}: {}", dent.path().display(), err);
false
}
}
}
#[cfg(unix)]
fn maybe_dent_eq_handle(
dent: &ignore::DirEntry,
handle: &same_file::Handle,
) -> bool {
dent.ino() == Some(handle.ino())
}
#[cfg(not(unix))]
fn maybe_dent_eq_handle(_: &ignore::DirEntry, _: &same_file::Handle) -> bool {
true
}
fn eprint_nothing_searched() {
message!(
"No files were searched, which means ripgrep probably \
applied a filter you didn't expect. \
Try running again with --debug.");
}
fn print_stats(
match_count: u64,
paths_searched: u64,
paths_matched: u64,
time_elapsed: Duration,
) {
let time_elapsed =
time_elapsed.as_secs() as f64
+ (time_elapsed.subsec_nanos() as f64 * 1e-9);
println!("\n{} matched lines\n\
{} files contained matches\n\
{} files searched\n\
{:.3} seconds", match_count, paths_matched,
paths_searched, time_elapsed);
}
// The Rust standard library suppresses the default SIGPIPE behavior, so that
// writing to a closed pipe doesn't kill the process. The goal is to instead
// handle errors through the normal result mechanism. Ripgrep needs some
// refactoring before it will be able to do that, however, so we re-enable the
// standard SIGPIPE behavior as a workaround. See
// https://github.com/BurntSushi/ripgrep/issues/200.
#[cfg(unix)]
fn reset_sigpipe() {
unsafe {
libc::signal(libc::SIGPIPE, libc::SIG_DFL);
}
}
#[cfg(not(unix))]
fn reset_sigpipe() {
// no-op
Ok(count > 0)
}

View File

@ -1,263 +0,0 @@
use std::io;
use std::process;
use std::sync::{Arc, Mutex};
use std::time::Instant;
use grep2::printer::Stats;
use ignore::WalkState;
use args2::Args;
use subject::Subject;
use Result;
pub fn main2() {
match Args::parse().and_then(run) {
Ok(false) => process::exit(1),
Ok(true) => process::exit(0),
Err(err) => {
eprintln!("{}", err);
process::exit(2);
}
}
}
fn run(args: Args) -> Result<bool> {
use args2::Command::*;
match args.command()? {
Search => search(args),
SearchParallel => search_parallel(args),
SearchNever => Ok(false),
Files => files(args),
FilesParallel => files_parallel(args),
Types => types(args),
}
}
/// The top-level entry point for single-threaded search. This recursively
/// steps through the file list (current directory by default) and searches
/// each file sequentially.
fn search(args: Args) -> Result<bool> {
let started_at = Instant::now();
let quit_after_match = args.quit_after_match()?;
let subject_builder = args.subject_builder();
let mut stats = args.stats()?;
let mut searcher = args.search_worker(args.stdout())?;
let mut matched = false;
for result in args.walker()? {
let subject = match subject_builder.build_from_result(result) {
Some(subject) => subject,
None => continue,
};
let search_result = match searcher.search(&subject) {
Ok(search_result) => search_result,
Err(err) => {
// A broken pipe means graceful termination.
if err.kind() == io::ErrorKind::BrokenPipe {
break;
}
message!("{}: {}", subject.path().display(), err);
continue;
}
};
matched = matched || search_result.has_match();
if let Some(ref mut stats) = stats {
*stats += search_result.stats().unwrap();
}
if matched && quit_after_match {
break;
}
}
if let Some(ref stats) = stats {
let elapsed = Instant::now().duration_since(started_at);
// We don't care if we couldn't print this successfully.
let _ = searcher.printer().print_stats(elapsed, stats);
}
Ok(matched)
}
/// The top-level entry point for multi-threaded search. The parallelism is
/// itself achieved by the recursive directory traversal. All we need to do is
/// feed it a worker for performing a search on each file.
fn search_parallel(args: Args) -> Result<bool> {
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering::SeqCst;
let quit_after_match = args.quit_after_match()?;
let started_at = Instant::now();
let subject_builder = Arc::new(args.subject_builder());
let bufwtr = Arc::new(args.buffer_writer()?);
let stats = Arc::new(args.stats()?.map(Mutex::new));
let matched = Arc::new(AtomicBool::new(false));
let mut searcher_err = None;
args.walker_parallel()?.run(|| {
let args = args.clone();
let bufwtr = Arc::clone(&bufwtr);
let stats = Arc::clone(&stats);
let matched = Arc::clone(&matched);
let subject_builder = Arc::clone(&subject_builder);
let mut searcher = match args.search_worker(bufwtr.buffer()) {
Ok(searcher) => searcher,
Err(err) => {
searcher_err = Some(err);
return Box::new(move |_| {
WalkState::Quit
});
}
};
Box::new(move |result| {
let subject = match subject_builder.build_from_result(result) {
Some(subject) => subject,
None => return WalkState::Continue,
};
searcher.printer().get_mut().clear();
let search_result = match searcher.search(&subject) {
Ok(search_result) => search_result,
Err(err) => {
message!("{}: {}", subject.path().display(), err);
return WalkState::Continue;
}
};
if search_result.has_match() {
matched.store(true, SeqCst);
}
if let Some(ref locked_stats) = *stats {
let mut stats = locked_stats.lock().unwrap();
*stats += search_result.stats().unwrap();
}
if let Err(err) = bufwtr.print(searcher.printer().get_mut()) {
// A broken pipe means graceful termination.
if err.kind() == io::ErrorKind::BrokenPipe {
return WalkState::Quit;
}
// Otherwise, we continue on our merry way.
message!("{}: {}", subject.path().display(), err);
}
if matched.load(SeqCst) && quit_after_match {
WalkState::Quit
} else {
WalkState::Continue
}
})
});
if let Some(err) = searcher_err.take() {
return Err(err);
}
if let Some(ref locked_stats) = *stats {
let elapsed = Instant::now().duration_since(started_at);
let stats = locked_stats.lock().unwrap();
let mut searcher = args.search_worker(args.stdout())?;
// We don't care if we couldn't print this successfully.
let _ = searcher.printer().print_stats(elapsed, &stats);
}
Ok(matched.load(SeqCst))
}
/// The top-level entry point for listing files without searching them. This
/// recursively steps through the file list (current directory by default) and
/// prints each path sequentially using a single thread.
fn files(args: Args) -> Result<bool> {
let quit_after_match = args.quit_after_match()?;
let subject_builder = args.subject_builder();
let mut matched = false;
let mut path_printer = args.path_printer(args.stdout())?;
for result in args.walker()? {
let subject = match subject_builder.build_from_result(result) {
Some(subject) => subject,
None => continue,
};
matched = true;
if quit_after_match {
break;
}
if let Err(err) = path_printer.write_path(subject.path()) {
// A broken pipe means graceful termination.
if err.kind() == io::ErrorKind::BrokenPipe {
break;
}
// Otherwise, we have some other error that's preventing us from
// writing to stdout, so we should bubble it up.
return Err(err.into());
}
}
Ok(matched)
}
/// The top-level entry point for listing files without searching them. This
/// recursively steps through the file list (current directory by default) and
/// prints each path sequentially using multiple threads.
fn files_parallel(args: Args) -> Result<bool> {
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering::SeqCst;
use std::sync::mpsc;
use std::thread;
let quit_after_match = args.quit_after_match()?;
let subject_builder = Arc::new(args.subject_builder());
let mut path_printer = args.path_printer(args.stdout())?;
let matched = Arc::new(AtomicBool::new(false));
let (tx, rx) = mpsc::channel::<Subject>();
let print_thread = thread::spawn(move || -> io::Result<()> {
for subject in rx.iter() {
path_printer.write_path(subject.path())?;
}
Ok(())
});
args.walker_parallel()?.run(|| {
let args = args.clone();
let subject_builder = Arc::clone(&subject_builder);
let matched = Arc::clone(&matched);
let tx = tx.clone();
Box::new(move |result| {
let subject = match subject_builder.build_from_result(result) {
Some(subject) => subject,
None => return WalkState::Continue,
};
matched.store(true, SeqCst);
if quit_after_match {
WalkState::Quit
} else {
match tx.send(subject) {
Ok(_) => WalkState::Continue,
Err(_) => WalkState::Quit,
}
}
})
});
drop(tx);
if let Err(err) = print_thread.join().unwrap() {
// A broken pipe means graceful termination, so fall through.
// Otherwise, something bad happened while writing to stdout, so bubble
// it up.
if err.kind() != io::ErrorKind::BrokenPipe {
return Err(err.into());
}
}
Ok(matched.load(SeqCst))
}
/// The top-level entry point for --type-list.
fn types(args: Args) -> Result<bool> {
let mut count = 0;
let mut stdout = args.stdout();
for def in args.type_defs()? {
count += 1;
stdout.write_all(def.name().as_bytes())?;
stdout.write_all(b": ")?;
let mut first = true;
for glob in def.globs() {
if !first {
stdout.write_all(b", ")?;
}
stdout.write_all(glob.as_bytes())?;
first = false;
}
stdout.write_all(b"\n")?;
}
Ok(count > 0)
}

View File

@ -1,7 +1,7 @@
use std::io;
use std::path::Path;
use grep2::printer::{ColorSpecs, PrinterPath};
use grep::printer::{ColorSpecs, PrinterPath};
use termcolor::WriteColor;
/// A configuration for describing how paths should be written.

View File

@ -1,39 +0,0 @@
/*!
The pathutil module provides platform specific operations on paths that are
typically faster than the same operations as provided in `std::path`. In
particular, we really want to avoid the costly operation of parsing the path
into its constituent components. We give up on Windows, but on Unix, we deal
with the raw bytes directly.
*/
use std::path::Path;
/// Strip `prefix` from the `path` and return the remainder.
///
/// If `path` doesn't have a prefix `prefix`, then return `None`.
#[cfg(unix)]
pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
prefix: &'a P,
path: &'a Path,
) -> Option<&'a Path> {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
let prefix = prefix.as_ref().as_os_str().as_bytes();
let path = path.as_os_str().as_bytes();
if prefix.len() > path.len() || prefix != &path[0..prefix.len()] {
None
} else {
Some(Path::new(OsStr::from_bytes(&path[prefix.len()..])))
}
}
/// Strip `prefix` from the `path` and return the remainder.
///
/// If `path` doesn't have a prefix `prefix`, then return `None`.
#[cfg(not(unix))]
pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
prefix: &'a P,
path: &'a Path,
) -> Option<&'a Path> {
path.strip_prefix(prefix).ok()
}

View File

@ -1,928 +0,0 @@
use std::error;
use std::fmt;
use std::path::Path;
use std::str::FromStr;
use regex::bytes::{Captures, Match, Regex, Replacer};
use termcolor::{Color, ColorSpec, ParseColorError, WriteColor};
use pathutil::strip_prefix;
use ignore::types::FileTypeDef;
/// Track the start and end of replacements to allow coloring them on output.
#[derive(Debug)]
struct Offset {
start: usize,
end: usize,
}
impl Offset {
fn new(start: usize, end: usize) -> Offset {
Offset { start: start, end: end }
}
}
impl<'m, 'r> From<&'m Match<'r>> for Offset {
fn from(m: &'m Match<'r>) -> Self {
Offset{ start: m.start(), end: m.end() }
}
}
/// `CountingReplacer` implements the Replacer interface for Regex,
/// and counts how often replacement is being performed.
struct CountingReplacer<'r> {
replace: &'r [u8],
count: &'r mut usize,
offsets: &'r mut Vec<Offset>,
}
impl<'r> CountingReplacer<'r> {
fn new(
replace: &'r [u8],
count: &'r mut usize,
offsets: &'r mut Vec<Offset>,
) -> CountingReplacer<'r> {
CountingReplacer { replace: replace, count: count, offsets: offsets, }
}
}
impl<'r> Replacer for CountingReplacer<'r> {
fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
*self.count += 1;
let start = dst.len();
caps.expand(self.replace, dst);
let end = dst.len();
if start != end {
self.offsets.push(Offset::new(start, end));
}
}
}
/// Printer encapsulates all output logic for searching.
///
/// Note that we currently ignore all write errors. It's probably worthwhile
/// to fix this, but printers are only ever used for writes to stdout or
/// writes to memory, neither of which commonly fail.
pub struct Printer<W> {
/// The underlying writer.
wtr: W,
/// Whether anything has been printed to wtr yet.
has_printed: bool,
/// Whether to show column numbers for the first match or not.
column: bool,
/// The string to use to separate non-contiguous runs of context lines.
context_separator: Vec<u8>,
/// The end-of-line terminator used by the printer. In general, eols are
/// printed via the match directly, but occasionally we need to insert them
/// ourselves (for example, to print a context separator).
eol: u8,
/// A file separator to show before any matches are printed.
file_separator: Option<Vec<u8>>,
/// Whether to show file name as a heading or not.
///
/// N.B. If with_filename is false, then this setting has no effect.
heading: bool,
/// Whether to show every match on its own line.
line_per_match: bool,
/// Whether to print NUL bytes after a file path instead of new lines
/// or `:`.
null: bool,
/// Print only the matched (non-empty) parts of a matching line
only_matching: bool,
/// A string to use as a replacement of each match in a matching line.
replace: Option<Vec<u8>>,
/// Whether to prefix each match with the corresponding file name.
with_filename: bool,
/// The color specifications.
colors: ColorSpecs,
/// The separator to use for file paths. If empty, this is ignored.
path_separator: Option<u8>,
/// Restrict lines to this many columns.
max_columns: Option<usize>,
}
impl<W: WriteColor> Printer<W> {
/// Create a new printer that writes to wtr with the given color settings.
pub fn new(wtr: W) -> Printer<W> {
Printer {
wtr: wtr,
has_printed: false,
column: false,
context_separator: "--".to_string().into_bytes(),
eol: b'\n',
file_separator: None,
heading: false,
line_per_match: false,
null: false,
only_matching: false,
replace: None,
with_filename: false,
colors: ColorSpecs::default(),
path_separator: None,
max_columns: None,
}
}
/// Set the color specifications.
pub fn colors(mut self, colors: ColorSpecs) -> Printer<W> {
self.colors = colors;
self
}
/// When set, column numbers will be printed for the first match on each
/// line.
pub fn column(mut self, yes: bool) -> Printer<W> {
self.column = yes;
self
}
/// Set the context separator. The default is `--`.
pub fn context_separator(mut self, sep: Vec<u8>) -> Printer<W> {
self.context_separator = sep;
self
}
/// Set the end-of-line terminator. The default is `\n`.
pub fn eol(mut self, eol: u8) -> Printer<W> {
self.eol = eol;
self
}
/// If set, the separator is printed before any matches. By default, no
/// separator is printed.
pub fn file_separator(mut self, sep: Vec<u8>) -> Printer<W> {
self.file_separator = Some(sep);
self
}
/// Whether to show file name as a heading or not.
///
/// N.B. If with_filename is false, then this setting has no effect.
pub fn heading(mut self, yes: bool) -> Printer<W> {
self.heading = yes;
self
}
/// Whether to show every match on its own line.
pub fn line_per_match(mut self, yes: bool) -> Printer<W> {
self.line_per_match = yes;
self
}
/// Whether to cause NUL bytes to follow file paths instead of other
/// visual separators (like `:`, `-` and `\n`).
pub fn null(mut self, yes: bool) -> Printer<W> {
self.null = yes;
self
}
/// Print only the matched (non-empty) parts of a matching line
pub fn only_matching(mut self, yes: bool) -> Printer<W> {
self.only_matching = yes;
self
}
/// A separator to use when printing file paths. When empty, use the
/// default separator for the current platform. (/ on Unix, \ on Windows.)
pub fn path_separator(mut self, sep: Option<u8>) -> Printer<W> {
self.path_separator = sep;
self
}
/// Replace every match in each matching line with the replacement string
/// given.
pub fn replace(mut self, replacement: Vec<u8>) -> Printer<W> {
self.replace = Some(replacement);
self
}
/// When set, each match is prefixed with the file name that it came from.
pub fn with_filename(mut self, yes: bool) -> Printer<W> {
self.with_filename = yes;
self
}
/// Configure the max. number of columns used for printing matching lines.
pub fn max_columns(mut self, max_columns: Option<usize>) -> Printer<W> {
self.max_columns = max_columns;
self
}
/// Returns true if and only if something has been printed.
pub fn has_printed(&self) -> bool {
self.has_printed
}
/// Flushes the underlying writer and returns it.
#[allow(dead_code)]
pub fn into_inner(mut self) -> W {
let _ = self.wtr.flush();
self.wtr
}
/// Prints a type definition.
pub fn type_def(&mut self, def: &FileTypeDef) {
self.write(def.name().as_bytes());
self.write(b": ");
let mut first = true;
for glob in def.globs() {
if !first {
self.write(b", ");
}
self.write(glob.as_bytes());
first = false;
}
self.write_eol();
}
/// Prints the given path.
pub fn path<P: AsRef<Path>>(&mut self, path: P) {
let path = strip_prefix("./", path.as_ref()).unwrap_or(path.as_ref());
self.write_path(path);
self.write_path_eol();
}
/// Prints the given path and a count of the number of matches found.
pub fn path_count<P: AsRef<Path>>(&mut self, path: P, count: u64) {
if self.with_filename {
self.write_path(path);
self.write_path_sep(b':');
}
self.write(count.to_string().as_bytes());
self.write_eol();
}
/// Prints the context separator.
pub fn context_separate(&mut self) {
if self.context_separator.is_empty() {
return;
}
let _ = self.wtr.write_all(&self.context_separator);
self.write_eol();
}
pub fn matched<P: AsRef<Path>>(
&mut self,
re: &Regex,
path: P,
buf: &[u8],
start: usize,
end: usize,
line_number: Option<u64>,
byte_offset: Option<u64>
) {
if !self.line_per_match && !self.only_matching {
let mat =
if !self.needs_match() {
(0, 0)
} else {
re.find(&buf[start..end])
.map(|m| (m.start(), m.end()))
.unwrap_or((0, 0))
};
return self.write_match(
re, path, buf, start, end, line_number,
byte_offset, mat.0, mat.1);
}
for m in re.find_iter(&buf[start..end]) {
self.write_match(
re, path.as_ref(), buf, start, end, line_number,
byte_offset, m.start(), m.end());
}
}
fn needs_match(&self) -> bool {
self.column
|| self.replace.is_some()
|| self.only_matching
}
fn write_match<P: AsRef<Path>>(
&mut self,
re: &Regex,
path: P,
buf: &[u8],
start: usize,
end: usize,
line_number: Option<u64>,
byte_offset: Option<u64>,
match_start: usize,
match_end: usize,
) {
if self.heading && self.with_filename && !self.has_printed {
self.write_file_sep();
self.write_path(path);
self.write_path_eol();
} else if !self.heading && self.with_filename {
self.write_path(path);
self.write_path_sep(b':');
}
if let Some(line_number) = line_number {
self.line_number(line_number, b':');
}
if self.column {
self.column_number(match_start as u64 + 1, b':');
}
if let Some(byte_offset) = byte_offset {
if self.only_matching {
self.write_byte_offset(
byte_offset + ((start + match_start) as u64), b':');
} else {
self.write_byte_offset(byte_offset + (start as u64), b':');
}
}
if self.replace.is_some() {
let mut count = 0;
let mut offsets = Vec::new();
let line = {
let replacer = CountingReplacer::new(
self.replace.as_ref().unwrap(), &mut count, &mut offsets);
if self.only_matching {
re.replace_all(
&buf[start + match_start..start + match_end], replacer)
} else {
re.replace_all(&buf[start..end], replacer)
}
};
if self.max_columns.map_or(false, |m| line.len() > m) {
let msg = format!(
"[Omitted long line with {} replacements]", count);
self.write_colored(msg.as_bytes(), |colors| colors.matched());
self.write_eol();
return;
}
self.write_matched_line(offsets, &*line, false);
} else {
let buf = if self.only_matching {
&buf[start + match_start..start + match_end]
} else {
&buf[start..end]
};
if self.max_columns.map_or(false, |m| buf.len() > m) {
let count = re.find_iter(buf).count();
let msg = format!("[Omitted long line with {} matches]", count);
self.write_colored(msg.as_bytes(), |colors| colors.matched());
self.write_eol();
return;
}
let only_match = self.only_matching;
self.write_matched_line(
re.find_iter(buf).map(|x| Offset::from(&x)), buf, only_match);
}
}
fn write_matched_line<I>(&mut self, offsets: I, buf: &[u8], only_match: bool)
where I: IntoIterator<Item=Offset>,
{
if !self.wtr.supports_color() || self.colors.matched().is_none() {
self.write(buf);
} else if only_match {
self.write_colored(buf, |colors| colors.matched());
} else {
let mut last_written = 0;
for o in offsets {
self.write(&buf[last_written..o.start]);
// This conditional checks if the match is both empty *and*
// past the end of the line. In this case, we never want to
// emit an additional color escape.
if o.start != o.end || o.end != buf.len() {
self.write_colored(
&buf[o.start..o.end], |colors| colors.matched());
}
last_written = o.end;
}
self.write(&buf[last_written..]);
}
if buf.last() != Some(&self.eol) {
self.write_eol();
}
}
pub fn context<P: AsRef<Path>>(
&mut self,
path: P,
buf: &[u8],
start: usize,
end: usize,
line_number: Option<u64>,
byte_offset: Option<u64>,
) {
if self.heading && self.with_filename && !self.has_printed {
self.write_file_sep();
self.write_path(path);
self.write_path_eol();
} else if !self.heading && self.with_filename {
self.write_path(path);
self.write_path_sep(b'-');
}
if let Some(line_number) = line_number {
self.line_number(line_number, b'-');
}
if let Some(byte_offset) = byte_offset {
self.write_byte_offset(byte_offset + (start as u64), b'-');
}
if self.max_columns.map_or(false, |m| end - start > m) {
self.write(b"[Omitted long context line]");
self.write_eol();
return;
}
self.write(&buf[start..end]);
if buf[start..end].last() != Some(&self.eol) {
self.write_eol();
}
}
fn separator(&mut self, sep: &[u8]) {
self.write(sep);
}
fn write_path_sep(&mut self, sep: u8) {
if self.null {
self.write(b"\x00");
} else {
self.separator(&[sep]);
}
}
fn write_path_eol(&mut self) {
if self.null {
self.write(b"\x00");
} else {
self.write_eol();
}
}
#[cfg(unix)]
fn write_path<P: AsRef<Path>>(&mut self, path: P) {
use std::os::unix::ffi::OsStrExt;
let path = path.as_ref().as_os_str().as_bytes();
self.write_path_replace_separator(path);
}
#[cfg(not(unix))]
fn write_path<P: AsRef<Path>>(&mut self, path: P) {
let path = path.as_ref().to_string_lossy();
self.write_path_replace_separator(path.as_bytes());
}
fn write_path_replace_separator(&mut self, path: &[u8]) {
match self.path_separator {
None => self.write_colored(path, |colors| colors.path()),
Some(sep) => {
let transformed_path: Vec<_> = path.iter().map(|&b| {
if b == b'/' || (cfg!(windows) && b == b'\\') {
sep
} else {
b
}
}).collect();
self.write_colored(&transformed_path, |colors| colors.path());
}
}
}
fn line_number(&mut self, n: u64, sep: u8) {
let line_number = n.to_string();
self.write_colored(line_number.as_bytes(), |colors| colors.line());
self.separator(&[sep]);
}
fn column_number(&mut self, n: u64, sep: u8) {
self.write_colored(n.to_string().as_bytes(), |colors| colors.column());
self.separator(&[sep]);
}
fn write_byte_offset(&mut self, o: u64, sep: u8) {
self.write_colored(o.to_string().as_bytes(), |colors| colors.column());
self.separator(&[sep]);
}
fn write(&mut self, buf: &[u8]) {
self.has_printed = true;
let _ = self.wtr.write_all(buf);
}
fn write_eol(&mut self) {
let eol = self.eol;
self.write(&[eol]);
}
fn write_colored<F>(&mut self, buf: &[u8], get_color: F)
where F: Fn(&ColorSpecs) -> &ColorSpec
{
let _ = self.wtr.set_color(get_color(&self.colors));
self.write(buf);
let _ = self.wtr.reset();
}
fn write_file_sep(&mut self) {
if let Some(ref sep) = self.file_separator {
self.has_printed = true;
let _ = self.wtr.write_all(sep);
let _ = self.wtr.write_all(b"\n");
}
}
}
/// An error that can occur when parsing color specifications.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Error {
/// This occurs when an unrecognized output type is used.
UnrecognizedOutType(String),
/// This occurs when an unrecognized spec type is used.
UnrecognizedSpecType(String),
/// This occurs when an unrecognized color name is used.
UnrecognizedColor(String, String),
/// This occurs when an unrecognized style attribute is used.
UnrecognizedStyle(String),
/// This occurs when the format of a color specification is invalid.
InvalidFormat(String),
}
impl error::Error for Error {
fn description(&self) -> &str {
match *self {
Error::UnrecognizedOutType(_) => "unrecognized output type",
Error::UnrecognizedSpecType(_) => "unrecognized spec type",
Error::UnrecognizedColor(_, _) => "unrecognized color name",
Error::UnrecognizedStyle(_) => "unrecognized style attribute",
Error::InvalidFormat(_) => "invalid color spec",
}
}
fn cause(&self) -> Option<&error::Error> {
None
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::UnrecognizedOutType(ref name) => {
write!(f, "Unrecognized output type '{}'. Choose from: \
path, line, column, match.", name)
}
Error::UnrecognizedSpecType(ref name) => {
write!(f, "Unrecognized spec type '{}'. Choose from: \
fg, bg, style, none.", name)
}
Error::UnrecognizedColor(_, ref msg) => {
write!(f, "{}", msg)
}
Error::UnrecognizedStyle(ref name) => {
write!(f, "Unrecognized style attribute '{}'. Choose from: \
nobold, bold, nointense, intense, nounderline, \
underline.", name)
}
Error::InvalidFormat(ref original) => {
write!(
f,
"Invalid color spec format: '{}'. Valid format \
is '(path|line|column|match):(fg|bg|style):(value)'.",
original)
}
}
}
}
impl From<ParseColorError> for Error {
fn from(err: ParseColorError) -> Error {
Error::UnrecognizedColor(err.invalid().to_string(), err.to_string())
}
}
/// A merged set of color specifications.
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct ColorSpecs {
path: ColorSpec,
line: ColorSpec,
column: ColorSpec,
matched: ColorSpec,
}
/// A single color specification provided by the user.
///
/// A `ColorSpecs` can be built by merging a sequence of `Spec`s.
///
/// ## Example
///
/// The only way to build a `Spec` is to parse it from a string. Once multiple
/// `Spec`s have been constructed, then can be merged into a single
/// `ColorSpecs` value.
///
/// ```rust
/// use termcolor::{Color, ColorSpecs, Spec};
///
/// let spec1: Spec = "path:fg:blue".parse().unwrap();
/// let spec2: Spec = "match:bg:green".parse().unwrap();
/// let specs = ColorSpecs::new(&[spec1, spec2]);
///
/// assert_eq!(specs.path().fg(), Some(Color::Blue));
/// assert_eq!(specs.matched().bg(), Some(Color::Green));
/// ```
///
/// ## Format
///
/// The format of a `Spec` is a triple: `{type}:{attribute}:{value}`. Each
/// component is defined as follows:
///
/// * `{type}` can be one of `path`, `line`, `column` or `match`.
/// * `{attribute}` can be one of `fg`, `bg` or `style`. `{attribute}` may also
/// be the special value `none`, in which case, `{value}` can be omitted.
/// * `{value}` is either a color name (for `fg`/`bg`) or a style instruction.
///
/// `{type}` controls which part of the output should be styled and is
/// application dependent.
///
/// When `{attribute}` is `none`, then this should cause any existing color
/// settings to be cleared.
///
/// `{value}` should be a color when `{attribute}` is `fg` or `bg`, or it
/// should be a style instruction when `{attribute}` is `style`. When
/// `{attribute}` is `none`, `{value}` must be omitted.
///
/// Valid colors are `black`, `blue`, `green`, `red`, `cyan`, `magenta`,
/// `yellow`, `white`.
///
/// Valid style instructions are `nobold`, `bold`, `intense`, `nointense`,
/// `underline`, `nounderline`.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Spec {
ty: OutType,
value: SpecValue,
}
/// The actual value given by the specification.
#[derive(Clone, Debug, Eq, PartialEq)]
enum SpecValue {
None,
Fg(Color),
Bg(Color),
Style(Style),
}
/// The set of configurable portions of ripgrep's output.
#[derive(Clone, Debug, Eq, PartialEq)]
enum OutType {
Path,
Line,
Column,
Match,
}
/// The specification type.
#[derive(Clone, Debug, Eq, PartialEq)]
enum SpecType {
Fg,
Bg,
Style,
None,
}
/// The set of available styles for use in the terminal.
#[derive(Clone, Debug, Eq, PartialEq)]
enum Style {
Bold,
NoBold,
Intense,
NoIntense,
Underline,
NoUnderline
}
impl ColorSpecs {
/// Create color specifications from a list of user supplied
/// specifications.
pub fn new(user_specs: &[Spec]) -> ColorSpecs {
let mut specs = ColorSpecs::default();
for user_spec in user_specs {
match user_spec.ty {
OutType::Path => user_spec.merge_into(&mut specs.path),
OutType::Line => user_spec.merge_into(&mut specs.line),
OutType::Column => user_spec.merge_into(&mut specs.column),
OutType::Match => user_spec.merge_into(&mut specs.matched),
}
}
specs
}
/// Return the color specification for coloring file paths.
fn path(&self) -> &ColorSpec {
&self.path
}
/// Return the color specification for coloring line numbers.
fn line(&self) -> &ColorSpec {
&self.line
}
/// Return the color specification for coloring column numbers.
fn column(&self) -> &ColorSpec {
&self.column
}
/// Return the color specification for coloring matched text.
fn matched(&self) -> &ColorSpec {
&self.matched
}
}
impl Spec {
/// Merge this spec into the given color specification.
fn merge_into(&self, cspec: &mut ColorSpec) {
self.value.merge_into(cspec);
}
}
impl SpecValue {
/// Merge this spec value into the given color specification.
fn merge_into(&self, cspec: &mut ColorSpec) {
match *self {
SpecValue::None => cspec.clear(),
SpecValue::Fg(ref color) => { cspec.set_fg(Some(color.clone())); }
SpecValue::Bg(ref color) => { cspec.set_bg(Some(color.clone())); }
SpecValue::Style(ref style) => {
match *style {
Style::Bold => { cspec.set_bold(true); }
Style::NoBold => { cspec.set_bold(false); }
Style::Intense => { cspec.set_intense(true); }
Style::NoIntense => { cspec.set_intense(false); }
Style::Underline => { cspec.set_underline(true); }
Style::NoUnderline => { cspec.set_underline(false); }
}
}
}
}
}
impl FromStr for Spec {
type Err = Error;
fn from_str(s: &str) -> Result<Spec, Error> {
let pieces: Vec<&str> = s.split(':').collect();
if pieces.len() <= 1 || pieces.len() > 3 {
return Err(Error::InvalidFormat(s.to_string()));
}
let otype: OutType = pieces[0].parse()?;
match pieces[1].parse()? {
SpecType::None => Ok(Spec { ty: otype, value: SpecValue::None }),
SpecType::Style => {
if pieces.len() < 3 {
return Err(Error::InvalidFormat(s.to_string()));
}
let style: Style = pieces[2].parse()?;
Ok(Spec { ty: otype, value: SpecValue::Style(style) })
}
SpecType::Fg => {
if pieces.len() < 3 {
return Err(Error::InvalidFormat(s.to_string()));
}
let color: Color = pieces[2].parse()?;
Ok(Spec { ty: otype, value: SpecValue::Fg(color) })
}
SpecType::Bg => {
if pieces.len() < 3 {
return Err(Error::InvalidFormat(s.to_string()));
}
let color: Color = pieces[2].parse()?;
Ok(Spec { ty: otype, value: SpecValue::Bg(color) })
}
}
}
}
impl FromStr for OutType {
type Err = Error;
fn from_str(s: &str) -> Result<OutType, Error> {
match &*s.to_lowercase() {
"path" => Ok(OutType::Path),
"line" => Ok(OutType::Line),
"column" => Ok(OutType::Column),
"match" => Ok(OutType::Match),
_ => Err(Error::UnrecognizedOutType(s.to_string())),
}
}
}
impl FromStr for SpecType {
type Err = Error;
fn from_str(s: &str) -> Result<SpecType, Error> {
match &*s.to_lowercase() {
"fg" => Ok(SpecType::Fg),
"bg" => Ok(SpecType::Bg),
"style" => Ok(SpecType::Style),
"none" => Ok(SpecType::None),
_ => Err(Error::UnrecognizedSpecType(s.to_string())),
}
}
}
impl FromStr for Style {
type Err = Error;
fn from_str(s: &str) -> Result<Style, Error> {
match &*s.to_lowercase() {
"bold" => Ok(Style::Bold),
"nobold" => Ok(Style::NoBold),
"intense" => Ok(Style::Intense),
"nointense" => Ok(Style::NoIntense),
"underline" => Ok(Style::Underline),
"nounderline" => Ok(Style::NoUnderline),
_ => Err(Error::UnrecognizedStyle(s.to_string())),
}
}
}
#[cfg(test)]
mod tests {
use termcolor::{Color, ColorSpec};
use super::{ColorSpecs, Error, OutType, Spec, SpecValue, Style};
#[test]
fn merge() {
let user_specs: &[Spec] = &[
"match:fg:blue".parse().unwrap(),
"match:none".parse().unwrap(),
"match:style:bold".parse().unwrap(),
];
let mut expect_matched = ColorSpec::new();
expect_matched.set_bold(true);
assert_eq!(ColorSpecs::new(user_specs), ColorSpecs {
path: ColorSpec::default(),
line: ColorSpec::default(),
column: ColorSpec::default(),
matched: expect_matched,
});
}
#[test]
fn specs() {
let spec: Spec = "path:fg:blue".parse().unwrap();
assert_eq!(spec, Spec {
ty: OutType::Path,
value: SpecValue::Fg(Color::Blue),
});
let spec: Spec = "path:bg:red".parse().unwrap();
assert_eq!(spec, Spec {
ty: OutType::Path,
value: SpecValue::Bg(Color::Red),
});
let spec: Spec = "match:style:bold".parse().unwrap();
assert_eq!(spec, Spec {
ty: OutType::Match,
value: SpecValue::Style(Style::Bold),
});
let spec: Spec = "match:style:intense".parse().unwrap();
assert_eq!(spec, Spec {
ty: OutType::Match,
value: SpecValue::Style(Style::Intense),
});
let spec: Spec = "match:style:underline".parse().unwrap();
assert_eq!(spec, Spec {
ty: OutType::Match,
value: SpecValue::Style(Style::Underline),
});
let spec: Spec = "line:none".parse().unwrap();
assert_eq!(spec, Spec {
ty: OutType::Line,
value: SpecValue::None,
});
let spec: Spec = "column:bg:green".parse().unwrap();
assert_eq!(spec, Spec {
ty: OutType::Column,
value: SpecValue::Bg(Color::Green),
});
}
#[test]
fn spec_errors() {
let err = "line:nonee".parse::<Spec>().unwrap_err();
assert_eq!(err, Error::UnrecognizedSpecType("nonee".to_string()));
let err = "".parse::<Spec>().unwrap_err();
assert_eq!(err, Error::InvalidFormat("".to_string()));
let err = "foo".parse::<Spec>().unwrap_err();
assert_eq!(err, Error::InvalidFormat("foo".to_string()));
let err = "line:style:italic".parse::<Spec>().unwrap_err();
assert_eq!(err, Error::UnrecognizedStyle("italic".to_string()));
let err = "line:fg:brown".parse::<Spec>().unwrap_err();
match err {
Error::UnrecognizedColor(name, _) => assert_eq!(name, "brown"),
err => assert!(false, "unexpected error: {:?}", err),
}
let err = "foo:fg:brown".parse::<Spec>().unwrap_err();
assert_eq!(err, Error::UnrecognizedOutType("foo".to_string()));
}
}

View File

@ -2,10 +2,10 @@ use std::io;
use std::path::{Path, PathBuf};
use std::time::Duration;
use grep2::matcher::Matcher;
use grep2::printer::{JSON, Standard, Summary, Stats};
use grep2::regex::RegexMatcher;
use grep2::searcher::Searcher;
use grep::matcher::Matcher;
use grep::printer::{JSON, Standard, Summary, Stats};
use grep::regex::RegexMatcher;
use grep::searcher::Searcher;
use termcolor::WriteColor;
use decompressor::{DecompressionReader, is_compressed};
@ -95,7 +95,6 @@ impl SearchWorkerBuilder {
#[derive(Clone, Debug, Default)]
pub struct SearchResult {
has_match: bool,
binary_byte_offset: Option<u64>,
stats: Option<Stats>,
}
@ -105,15 +104,6 @@ impl SearchResult {
self.has_match
}
/// Whether the search found binary data, and if so, the first absolute
/// byte offset at which it was detected.
///
/// This always returns `None` if binary data detection is disabled, even
/// when binary data is present.
pub fn binary_byte_offset(&self) -> Option<u64> {
self.binary_byte_offset
}
/// Return aggregate search statistics for a single search, if available.
///
/// It can be expensive to compute statistics, so these are only present
@ -168,10 +158,8 @@ impl<W: WriteColor> Printer<W> {
total_duration: Duration,
stats: &Stats,
) -> io::Result<()> {
let mut wtr = self.get_mut();
write!(
wtr,
self.get_mut(),
"
{matches} matches
{lines} matched lines
@ -295,9 +283,7 @@ fn search_path<M: Matcher, W: WriteColor>(
searcher.search_path(&matcher, path, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
binary_byte_offset: sink.binary_byte_offset(),
stats: sink.stats().map(|s| s.clone()),
..SearchResult::default()
})
}
Printer::Summary(ref mut p) => {
@ -305,9 +291,7 @@ fn search_path<M: Matcher, W: WriteColor>(
searcher.search_path(&matcher, path, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
binary_byte_offset: sink.binary_byte_offset(),
stats: sink.stats().map(|s| s.clone()),
..SearchResult::default()
})
}
Printer::JSON(ref mut p) => {
@ -315,9 +299,7 @@ fn search_path<M: Matcher, W: WriteColor>(
searcher.search_path(&matcher, path, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
binary_byte_offset: sink.binary_byte_offset(),
stats: Some(sink.stats().clone()),
..SearchResult::default()
})
}
}
@ -338,9 +320,7 @@ fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
searcher.search_reader(&matcher, rdr, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
binary_byte_offset: sink.binary_byte_offset(),
stats: sink.stats().map(|s| s.clone()),
..SearchResult::default()
})
}
Printer::Summary(ref mut p) => {
@ -348,9 +328,7 @@ fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
searcher.search_reader(&matcher, rdr, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
binary_byte_offset: sink.binary_byte_offset(),
stats: sink.stats().map(|s| s.clone()),
..SearchResult::default()
})
}
Printer::JSON(ref mut p) => {
@ -358,9 +336,7 @@ fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
searcher.search_reader(&matcher, rdr, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
binary_byte_offset: sink.binary_byte_offset(),
stats: Some(sink.stats().clone()),
..SearchResult::default()
})
}
}

View File

@ -1,424 +0,0 @@
/*!
The `search_buffer` module is responsible for searching a single file all in a
single buffer. Typically, the source of the buffer is a memory map. This can
be useful for when memory maps are faster than streaming search.
Note that this module doesn't quite support everything that `search_stream`
does. Notably, showing contexts.
*/
use std::cmp;
use std::path::Path;
use grep::Grep;
use termcolor::WriteColor;
use printer::Printer;
use search_stream::{IterLines, Options, count_lines, is_binary};
pub struct BufferSearcher<'a, W: 'a> {
opts: Options,
printer: &'a mut Printer<W>,
grep: &'a Grep,
path: &'a Path,
buf: &'a [u8],
match_line_count: u64,
match_count: Option<u64>,
line_count: Option<u64>,
byte_offset: Option<u64>,
last_line: usize,
}
impl<'a, W: WriteColor> BufferSearcher<'a, W> {
pub fn new(
printer: &'a mut Printer<W>,
grep: &'a Grep,
path: &'a Path,
buf: &'a [u8],
) -> BufferSearcher<'a, W> {
BufferSearcher {
opts: Options::default(),
printer: printer,
grep: grep,
path: path,
buf: buf,
match_line_count: 0,
match_count: None,
line_count: None,
byte_offset: None,
last_line: 0,
}
}
/// If enabled, searching will print a 0-based offset of the
/// matching line (or the actual match if -o is specified) before
/// printing the line itself.
///
/// Disabled by default.
pub fn byte_offset(mut self, yes: bool) -> Self {
self.opts.byte_offset = yes;
self
}
/// If enabled, searching will print a count instead of each match.
///
/// Disabled by default.
pub fn count(mut self, yes: bool) -> Self {
self.opts.count = yes;
self
}
/// If enabled, searching will print the count of individual matches
/// instead of each match.
///
/// Disabled by default.
pub fn count_matches(mut self, yes: bool) -> Self {
self.opts.count_matches = yes;
self
}
/// If enabled, searching will print the path instead of each match.
///
/// Disabled by default.
pub fn files_with_matches(mut self, yes: bool) -> Self {
self.opts.files_with_matches = yes;
self
}
/// If enabled, searching will print the path of files that *don't* match
/// the given pattern.
///
/// Disabled by default.
pub fn files_without_matches(mut self, yes: bool) -> Self {
self.opts.files_without_matches = yes;
self
}
/// Set the end-of-line byte used by this searcher.
pub fn eol(mut self, eol: u8) -> Self {
self.opts.eol = eol;
self
}
/// If enabled, matching is inverted so that lines that *don't* match the
/// given pattern are treated as matches.
pub fn invert_match(mut self, yes: bool) -> Self {
self.opts.invert_match = yes;
self
}
/// If enabled, compute line numbers and prefix each line of output with
/// them.
pub fn line_number(mut self, yes: bool) -> Self {
self.opts.line_number = yes;
self
}
/// Limit the number of matches to the given count.
///
/// The default is None, which corresponds to no limit.
pub fn max_count(mut self, count: Option<u64>) -> Self {
self.opts.max_count = count;
self
}
/// If enabled, don't show any output and quit searching after the first
/// match is found.
pub fn quiet(mut self, yes: bool) -> Self {
self.opts.quiet = yes;
self
}
/// If enabled, search binary files as if they were text.
pub fn text(mut self, yes: bool) -> Self {
self.opts.text = yes;
self
}
#[inline(never)]
pub fn run(mut self) -> u64 {
let binary_upto = cmp::min(10_240, self.buf.len());
if !self.opts.text && is_binary(&self.buf[..binary_upto], true) {
return 0;
}
self.match_line_count = 0;
self.line_count = if self.opts.line_number { Some(0) } else { None };
// The memory map searcher uses one contiguous block of bytes, so the
// offsets given the printer are sufficient to compute the byte offset.
self.byte_offset = if self.opts.byte_offset { Some(0) } else { None };
self.match_count = if self.opts.count_matches { Some(0) } else { None };
let mut last_end = 0;
for m in self.grep.iter(self.buf) {
if self.opts.invert_match {
self.print_inverted_matches(last_end, m.start());
} else {
self.print_match(m.start(), m.end());
}
last_end = m.end();
if self.opts.terminate(self.match_line_count) {
break;
}
}
if self.opts.invert_match && !self.opts.terminate(self.match_line_count) {
let upto = self.buf.len();
self.print_inverted_matches(last_end, upto);
}
if self.opts.count && self.match_line_count > 0 {
self.printer.path_count(self.path, self.match_line_count);
} else if self.opts.count_matches
&& self.match_count.map_or(false, |c| c > 0)
{
self.printer.path_count(self.path, self.match_count.unwrap());
}
if self.opts.files_with_matches && self.match_line_count > 0 {
self.printer.path(self.path);
}
if self.opts.files_without_matches && self.match_line_count == 0 {
self.printer.path(self.path);
}
self.match_line_count
}
#[inline(always)]
fn count_individual_matches(&mut self, start: usize, end: usize) {
if let Some(ref mut count) = self.match_count {
for _ in self.grep.regex().find_iter(&self.buf[start..end]) {
*count += 1;
}
}
}
#[inline(always)]
pub fn print_match(&mut self, start: usize, end: usize) {
self.match_line_count += 1;
self.count_individual_matches(start, end);
if self.opts.skip_matches() {
return;
}
self.count_lines(start);
self.add_line(end);
self.printer.matched(
self.grep.regex(), self.path, self.buf,
start, end, self.line_count, self.byte_offset);
}
#[inline(always)]
fn print_inverted_matches(&mut self, start: usize, end: usize) {
debug_assert!(self.opts.invert_match);
let mut it = IterLines::new(self.opts.eol, start);
while let Some((s, e)) = it.next(&self.buf[..end]) {
if self.opts.terminate(self.match_line_count) {
return;
}
self.print_match(s, e);
}
}
#[inline(always)]
fn count_lines(&mut self, upto: usize) {
if let Some(ref mut line_count) = self.line_count {
*line_count += count_lines(
&self.buf[self.last_line..upto], self.opts.eol);
self.last_line = upto;
}
}
#[inline(always)]
fn add_line(&mut self, line_end: usize) {
if let Some(ref mut line_count) = self.line_count {
*line_count += 1;
self.last_line = line_end;
}
}
}
#[cfg(test)]
mod tests {
use std::path::Path;
use grep::GrepBuilder;
use printer::Printer;
use termcolor;
use super::BufferSearcher;
const SHERLOCK: &'static str = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
can extract a clew from a wisp of straw or a flake of cigar ash;
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.\
";
fn test_path() -> &'static Path {
&Path::new("/baz.rs")
}
type TestSearcher<'a> = BufferSearcher<'a, termcolor::NoColor<Vec<u8>>>;
fn search<F: FnMut(TestSearcher) -> TestSearcher>(
pat: &str,
haystack: &str,
mut map: F,
) -> (u64, String) {
let outbuf = termcolor::NoColor::new(vec![]);
let mut pp = Printer::new(outbuf).with_filename(true);
let grep = GrepBuilder::new(pat).build().unwrap();
let count = {
let searcher = BufferSearcher::new(
&mut pp, &grep, test_path(), haystack.as_bytes());
map(searcher).run()
};
(count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
}
#[test]
fn basic_search() {
let (count, out) = search("Sherlock", SHERLOCK, |s|s);
assert_eq!(2, count);
assert_eq!(out, "\
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes
");
}
#[test]
fn binary() {
let text = "Sherlock\n\x00Holmes\n";
let (count, out) = search("Sherlock|Holmes", text, |s|s);
assert_eq!(0, count);
assert_eq!(out, "");
}
#[test]
fn binary_text() {
let text = "Sherlock\n\x00Holmes\n";
let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true));
assert_eq!(2, count);
assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n");
}
#[test]
fn line_numbers() {
let (count, out) = search(
"Sherlock", SHERLOCK, |s| s.line_number(true));
assert_eq!(2, count);
assert_eq!(out, "\
/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes
");
}
#[test]
fn byte_offset() {
let (_, out) = search(
"Sherlock", SHERLOCK, |s| s.byte_offset(true));
assert_eq!(out, "\
/baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock
/baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes
");
}
#[test]
fn byte_offset_inverted() {
let (_, out) = search("Sherlock", SHERLOCK, |s| {
s.invert_match(true).byte_offset(true)
});
assert_eq!(out, "\
/baz.rs:65:Holmeses, success in the province of detective work must always
/baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:258:but Doctor Watson has to have it taken out for him and dusted,
/baz.rs:321:and exhibited clearly, with a label attached.
");
}
#[test]
fn count() {
let (count, out) = search(
"Sherlock", SHERLOCK, |s| s.count(true));
assert_eq!(2, count);
assert_eq!(out, "/baz.rs:2\n");
}
#[test]
fn count_matches() {
let (_, out) = search(
"the", SHERLOCK, |s| s.count_matches(true));
assert_eq!(out, "/baz.rs:4\n");
}
#[test]
fn files_with_matches() {
let (count, out) = search(
"Sherlock", SHERLOCK, |s| s.files_with_matches(true));
assert_eq!(1, count);
assert_eq!(out, "/baz.rs\n");
}
#[test]
fn files_without_matches() {
let (count, out) = search(
"zzzz", SHERLOCK, |s| s.files_without_matches(true));
assert_eq!(0, count);
assert_eq!(out, "/baz.rs\n");
}
#[test]
fn max_count() {
let (count, out) = search(
"Sherlock", SHERLOCK, |s| s.max_count(Some(1)));
assert_eq!(1, count);
assert_eq!(out, "\
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
");
}
#[test]
fn invert_match_max_count() {
let (count, out) = search(
"zzzz", SHERLOCK, |s| s.invert_match(true).max_count(Some(1)));
assert_eq!(1, count);
assert_eq!(out, "\
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
");
}
#[test]
fn invert_match() {
let (count, out) = search(
"Sherlock", SHERLOCK, |s| s.invert_match(true));
assert_eq!(4, count);
assert_eq!(out, "\
/baz.rs:Holmeses, success in the province of detective work must always
/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:but Doctor Watson has to have it taken out for him and dusted,
/baz.rs:and exhibited clearly, with a label attached.
");
}
#[test]
fn invert_match_line_numbers() {
let (count, out) = search("Sherlock", SHERLOCK, |s| {
s.invert_match(true).line_number(true)
});
assert_eq!(4, count);
assert_eq!(out, "\
/baz.rs:2:Holmeses, success in the province of detective work must always
/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted,
/baz.rs:6:and exhibited clearly, with a label attached.
");
}
#[test]
fn invert_match_count() {
let (count, out) = search("Sherlock", SHERLOCK, |s| {
s.invert_match(true).count(true)
});
assert_eq!(4, count);
assert_eq!(out, "/baz.rs:4\n");
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,397 +0,0 @@
use std::fs::File;
use std::io;
use std::path::{Path, PathBuf};
use encoding_rs::Encoding;
use grep::Grep;
use ignore::DirEntry;
use memmap::Mmap;
use termcolor::WriteColor;
// use decoder::DecodeReader;
use encoding_rs_io::DecodeReaderBytesBuilder;
use decompressor::{self, DecompressionReader};
use preprocessor::PreprocessorReader;
use pathutil::strip_prefix;
use printer::Printer;
use search_buffer::BufferSearcher;
use search_stream::{InputBuffer, Searcher};
use Result;
pub enum Work {
Stdin,
DirEntry(DirEntry),
}
pub struct WorkerBuilder {
grep: Grep,
opts: Options,
}
#[derive(Clone, Debug)]
struct Options {
mmap: bool,
encoding: Option<&'static Encoding>,
after_context: usize,
before_context: usize,
byte_offset: bool,
count: bool,
count_matches: bool,
files_with_matches: bool,
files_without_matches: bool,
eol: u8,
invert_match: bool,
line_number: bool,
max_count: Option<u64>,
quiet: bool,
text: bool,
preprocessor: Option<PathBuf>,
search_zip_files: bool
}
impl Default for Options {
fn default() -> Options {
Options {
mmap: false,
encoding: None,
after_context: 0,
before_context: 0,
byte_offset: false,
count: false,
count_matches: false,
files_with_matches: false,
files_without_matches: false,
eol: b'\n',
invert_match: false,
line_number: false,
max_count: None,
quiet: false,
text: false,
search_zip_files: false,
preprocessor: None,
}
}
}
impl WorkerBuilder {
/// Create a new builder for a worker.
///
/// A reusable input buffer and a grep matcher are required, but there
/// are numerous additional options that can be configured on this builder.
pub fn new(grep: Grep) -> WorkerBuilder {
WorkerBuilder {
grep: grep,
opts: Options::default(),
}
}
/// Create the worker from this builder.
pub fn build(self) -> Worker {
let mut inpbuf = InputBuffer::new();
inpbuf.eol(self.opts.eol);
Worker {
grep: self.grep,
inpbuf: inpbuf,
decodebuf: vec![0; 8 * (1<<10)],
opts: self.opts,
}
}
/// The number of contextual lines to show after each match. The default
/// is zero.
pub fn after_context(mut self, count: usize) -> Self {
self.opts.after_context = count;
self
}
/// The number of contextual lines to show before each match. The default
/// is zero.
pub fn before_context(mut self, count: usize) -> Self {
self.opts.before_context = count;
self
}
/// If enabled, searching will print a 0-based offset of the
/// matching line (or the actual match if -o is specified) before
/// printing the line itself.
///
/// Disabled by default.
pub fn byte_offset(mut self, yes: bool) -> Self {
self.opts.byte_offset = yes;
self
}
/// If enabled, searching will print a count instead of each match.
///
/// Disabled by default.
pub fn count(mut self, yes: bool) -> Self {
self.opts.count = yes;
self
}
/// If enabled, searching will print the count of individual matches
/// instead of each match.
///
/// Disabled by default.
pub fn count_matches(mut self, yes: bool) -> Self {
self.opts.count_matches = yes;
self
}
/// Set the encoding to use to read each file.
///
/// If the encoding is `None` (the default), then the encoding is
/// automatically detected on a best-effort per-file basis.
pub fn encoding(mut self, enc: Option<&'static Encoding>) -> Self {
self.opts.encoding = enc;
self
}
/// If enabled, searching will print the path instead of each match.
///
/// Disabled by default.
pub fn files_with_matches(mut self, yes: bool) -> Self {
self.opts.files_with_matches = yes;
self
}
/// If enabled, searching will print the path of files without any matches.
///
/// Disabled by default.
pub fn files_without_matches(mut self, yes: bool) -> Self {
self.opts.files_without_matches = yes;
self
}
/// Set the end-of-line byte used by this searcher.
pub fn eol(mut self, eol: u8) -> Self {
self.opts.eol = eol;
self
}
/// If enabled, matching is inverted so that lines that *don't* match the
/// given pattern are treated as matches.
pub fn invert_match(mut self, yes: bool) -> Self {
self.opts.invert_match = yes;
self
}
/// If enabled, compute line numbers and prefix each line of output with
/// them.
pub fn line_number(mut self, yes: bool) -> Self {
self.opts.line_number = yes;
self
}
/// Limit the number of matches to the given count.
///
/// The default is None, which corresponds to no limit.
pub fn max_count(mut self, count: Option<u64>) -> Self {
self.opts.max_count = count;
self
}
/// If enabled, try to use memory maps for searching if possible.
pub fn mmap(mut self, yes: bool) -> Self {
self.opts.mmap = yes;
self
}
/// If enabled, don't show any output and quit searching after the first
/// match is found.
pub fn quiet(mut self, yes: bool) -> Self {
self.opts.quiet = yes;
self
}
/// If enabled, search binary files as if they were text.
pub fn text(mut self, yes: bool) -> Self {
self.opts.text = yes;
self
}
/// If enabled, search through compressed files as well
pub fn search_zip_files(mut self, yes: bool) -> Self {
self.opts.search_zip_files = yes;
self
}
/// If non-empty, search output of preprocessor run on each file
pub fn preprocessor(mut self, command: Option<PathBuf>) -> Self {
self.opts.preprocessor = command;
self
}
}
/// Worker is responsible for executing searches on file paths, while choosing
/// streaming search or memory map search as appropriate.
pub struct Worker {
grep: Grep,
inpbuf: InputBuffer,
decodebuf: Vec<u8>,
opts: Options,
}
impl Worker {
/// Execute the worker with the given printer and work item.
///
/// A work item can either be stdin or a file path.
pub fn run<W: WriteColor>(
&mut self,
printer: &mut Printer<W>,
work: Work,
) -> u64 {
let result = match work {
Work::Stdin => {
let stdin = io::stdin();
let stdin = stdin.lock();
self.search(printer, Path::new("<stdin>"), stdin)
}
Work::DirEntry(dent) => {
let mut path = dent.path();
if self.opts.preprocessor.is_some() {
let cmd = self.opts.preprocessor.clone().unwrap();
match PreprocessorReader::from_cmd_path(cmd, path) {
Ok(reader) => self.search(printer, path, reader),
Err(err) => {
message!("{}", err);
return 0;
}
}
} else if self.opts.search_zip_files
&& decompressor::is_compressed(path)
{
match DecompressionReader::from_path(path) {
Some(reader) => self.search(printer, path, reader),
None => {
return 0;
}
}
} else {
let file = match File::open(path) {
Ok(file) => file,
Err(err) => {
message!("{}: {}", path.display(), err);
return 0;
}
};
if let Some(p) = strip_prefix("./", path) {
path = p;
}
if self.opts.mmap {
self.search_mmap(printer, path, &file)
} else {
self.search(printer, path, file)
}
}
}
};
match result {
Ok(count) => {
count
}
Err(err) => {
message!("{}", err);
0
}
}
}
fn search<R: io::Read, W: WriteColor>(
&mut self,
printer: &mut Printer<W>,
path: &Path,
rdr: R,
) -> Result<u64> {
let rdr = DecodeReaderBytesBuilder::new()
.encoding(self.opts.encoding)
.utf8_passthru(true)
.build_with_buffer(rdr, &mut self.decodebuf)?;
let searcher = Searcher::new(
&mut self.inpbuf, printer, &self.grep, path, rdr);
searcher
.after_context(self.opts.after_context)
.before_context(self.opts.before_context)
.byte_offset(self.opts.byte_offset)
.count(self.opts.count)
.count_matches(self.opts.count_matches)
.files_with_matches(self.opts.files_with_matches)
.files_without_matches(self.opts.files_without_matches)
.eol(self.opts.eol)
.line_number(self.opts.line_number)
.invert_match(self.opts.invert_match)
.max_count(self.opts.max_count)
.quiet(self.opts.quiet)
.text(self.opts.text)
.run()
.map_err(From::from)
}
fn search_mmap<W: WriteColor>(
&mut self,
printer: &mut Printer<W>,
path: &Path,
file: &File,
) -> Result<u64> {
if file.metadata()?.len() == 0 {
// Opening a memory map with an empty file results in an error.
// However, this may not actually be an empty file! For example,
// /proc/cpuinfo reports itself as an empty file, but it can
// produce data when it's read from. Therefore, we fall back to
// regular read calls.
return self.search(printer, path, file);
}
let mmap = match self.mmap(file)? {
None => return self.search(printer, path, file),
Some(mmap) => mmap,
};
let buf = &*mmap;
if buf.len() >= 3 && Encoding::for_bom(buf).is_some() {
// If we have a UTF-16 bom in our memory map, then we need to fall
// back to the stream reader, which will do transcoding.
return self.search(printer, path, file);
}
let searcher = BufferSearcher::new(printer, &self.grep, path, buf);
Ok(searcher
.byte_offset(self.opts.byte_offset)
.count(self.opts.count)
.count_matches(self.opts.count_matches)
.files_with_matches(self.opts.files_with_matches)
.files_without_matches(self.opts.files_without_matches)
.eol(self.opts.eol)
.line_number(self.opts.line_number)
.invert_match(self.opts.invert_match)
.max_count(self.opts.max_count)
.quiet(self.opts.quiet)
.text(self.opts.text)
.run())
}
#[cfg(not(unix))]
fn mmap(&self, file: &File) -> Result<Option<Mmap>> {
Ok(Some(mmap_readonly(file)?))
}
#[cfg(unix)]
fn mmap(&self, file: &File) -> Result<Option<Mmap>> {
use libc::{EOVERFLOW, ENODEV, ENOMEM};
let err = match mmap_readonly(file) {
Ok(mmap) => return Ok(Some(mmap)),
Err(err) => err,
};
let code = err.raw_os_error();
if code == Some(EOVERFLOW)
|| code == Some(ENODEV)
|| code == Some(ENOMEM)
{
return Ok(None);
}
Err(From::from(err))
}
}
fn mmap_readonly(file: &File) -> io::Result<Mmap> {
unsafe { Mmap::map(file) }
}