mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-01-29 22:01:04 +02:00
ripgrep: remove old code
This commit is contained in:
parent
f16f9cedf1
commit
acd20a803c
29
Cargo.lock
generated
29
Cargo.lock
generated
@ -134,12 +134,12 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "grep"
|
||||
version = "0.1.9"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"grep-matcher 0.0.1",
|
||||
"grep-printer 0.0.1",
|
||||
"grep-regex 0.0.1",
|
||||
"grep-searcher 0.0.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@ -192,16 +192,6 @@ dependencies = [
|
||||
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "grep2"
|
||||
version = "0.2.0"
|
||||
dependencies = [
|
||||
"grep-matcher 0.0.1",
|
||||
"grep-printer 0.0.1",
|
||||
"grep-regex 0.0.1",
|
||||
"grep-searcher 0.0.1",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "ignore"
|
||||
version = "0.4.3"
|
||||
@ -339,19 +329,12 @@ name = "ripgrep"
|
||||
version = "0.9.0"
|
||||
dependencies = [
|
||||
"atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bytecount 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"encoding_rs_io 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"globset 0.4.1",
|
||||
"grep 0.1.9",
|
||||
"grep2 0.2.0",
|
||||
"grep 0.2.0",
|
||||
"ignore 0.4.3",
|
||||
"lazy_static 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
21
Cargo.toml
21
Cargo.toml
@ -35,23 +35,15 @@ path = "tests/tests.rs"
|
||||
members = [
|
||||
"grep", "globset", "ignore",
|
||||
"grep-matcher", "grep-printer", "grep-regex", "grep-searcher",
|
||||
"grep2",
|
||||
]
|
||||
|
||||
[dependencies]
|
||||
atty = "0.2.11"
|
||||
bytecount = "0.3.1"
|
||||
encoding_rs = "0.8"
|
||||
encoding_rs_io = "0.1"
|
||||
globset = { version = "0.4.0", path = "globset" }
|
||||
grep = { version = "0.1.8", path = "grep" }
|
||||
grep2 = { version = "0.2.0", path = "grep2" }
|
||||
grep = { version = "0.2.0", path = "grep" }
|
||||
ignore = { version = "0.4.0", path = "ignore" }
|
||||
lazy_static = "1"
|
||||
libc = "0.2"
|
||||
log = "0.4"
|
||||
memchr = "2"
|
||||
memmap = "0.6"
|
||||
num_cpus = "1"
|
||||
regex = "1"
|
||||
same-file = "1"
|
||||
@ -75,15 +67,8 @@ default-features = false
|
||||
features = ["suggestions", "color"]
|
||||
|
||||
[features]
|
||||
avx-accel = [
|
||||
"bytecount/avx-accel",
|
||||
"grep2/avx-accel",
|
||||
]
|
||||
simd-accel = [
|
||||
"bytecount/simd-accel",
|
||||
"encoding_rs/simd-accel",
|
||||
"grep2/simd-accel",
|
||||
]
|
||||
avx-accel = ["grep/avx-accel"]
|
||||
simd-accel = ["grep/simd-accel"]
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
|
@ -1,6 +1,6 @@
|
||||
[package]
|
||||
name = "grep"
|
||||
version = "0.1.9" #:version
|
||||
version = "0.2.0" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Fast line oriented regex searching as a library.
|
||||
@ -13,7 +13,11 @@ keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[dependencies]
|
||||
log = "0.4"
|
||||
memchr = "2"
|
||||
regex = "1"
|
||||
regex-syntax = "0.6"
|
||||
grep-matcher = { version = "0.0.1", path = "../grep-matcher" }
|
||||
grep-printer = { version = "0.0.1", path = "../grep-printer" }
|
||||
grep-regex = { version = "0.0.1", path = "../grep-regex" }
|
||||
grep-searcher = { version = "0.0.1", path = "../grep-searcher" }
|
||||
|
||||
[features]
|
||||
avx-accel = ["grep-searcher/avx-accel"]
|
||||
simd-accel = ["grep-searcher/simd-accel"]
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,84 +1,10 @@
|
||||
#![deny(missing_docs)]
|
||||
|
||||
/*!
|
||||
A fast line oriented regex searcher.
|
||||
TODO.
|
||||
*/
|
||||
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate regex;
|
||||
extern crate regex_syntax as syntax;
|
||||
#![deny(missing_docs)]
|
||||
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::result;
|
||||
|
||||
pub use search::{Grep, GrepBuilder, Iter, Match};
|
||||
|
||||
mod literals;
|
||||
mod nonl;
|
||||
mod search;
|
||||
mod smart_case;
|
||||
mod word_boundary;
|
||||
|
||||
/// Result is a convenient type alias that fixes the type of the error to
|
||||
/// the `Error` type defined in this crate.
|
||||
pub type Result<T> = result::Result<T, Error>;
|
||||
|
||||
/// Error enumerates the list of possible error conditions when building or
|
||||
/// using a `Grep` line searcher.
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
/// An error from parsing or compiling a regex.
|
||||
Regex(regex::Error),
|
||||
/// This error occurs when an illegal literal was found in the regex
|
||||
/// pattern. For example, if the line terminator is `\n` and the regex
|
||||
/// pattern is `\w+\n\w+`, then the presence of `\n` will cause this error.
|
||||
LiteralNotAllowed(char),
|
||||
/// An unused enum variant that indicates this enum may be expanded in
|
||||
/// the future and therefore should not be exhaustively matched.
|
||||
#[doc(hidden)]
|
||||
__Nonexhaustive,
|
||||
}
|
||||
|
||||
impl error::Error for Error {
|
||||
fn description(&self) -> &str {
|
||||
match *self {
|
||||
Error::Regex(ref err) => err.description(),
|
||||
Error::LiteralNotAllowed(_) => "use of forbidden literal",
|
||||
Error::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
}
|
||||
|
||||
fn cause(&self) -> Option<&error::Error> {
|
||||
match *self {
|
||||
Error::Regex(ref err) => err.cause(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Error::Regex(ref err) => err.fmt(f),
|
||||
Error::LiteralNotAllowed(chr) => {
|
||||
write!(f, "Literal {:?} not allowed.", chr)
|
||||
}
|
||||
Error::__Nonexhaustive => unreachable!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<regex::Error> for Error {
|
||||
fn from(err: regex::Error) -> Error {
|
||||
Error::Regex(err)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<syntax::Error> for Error {
|
||||
fn from(err: syntax::Error) -> Error {
|
||||
Error::Regex(regex::Error::Syntax(err.to_string()))
|
||||
}
|
||||
}
|
||||
pub extern crate grep_matcher as matcher;
|
||||
pub extern crate grep_printer as printer;
|
||||
pub extern crate grep_regex as regex;
|
||||
pub extern crate grep_searcher as searcher;
|
||||
|
@ -1,274 +0,0 @@
|
||||
/*!
|
||||
The literals module is responsible for extracting *inner* literals out of the
|
||||
AST of a regular expression. Normally this is the job of the regex engine
|
||||
itself, but the regex engine doesn't look for inner literals. Since we're doing
|
||||
line based searching, we can use them, so we need to do it ourselves.
|
||||
|
||||
Note that this implementation is incredibly suspicious. We need something more
|
||||
principled.
|
||||
*/
|
||||
use std::cmp;
|
||||
|
||||
use regex::bytes::RegexBuilder;
|
||||
use syntax::hir::{self, Hir, HirKind};
|
||||
use syntax::hir::literal::{Literal, Literals};
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct LiteralSets {
|
||||
prefixes: Literals,
|
||||
suffixes: Literals,
|
||||
required: Literals,
|
||||
}
|
||||
|
||||
impl LiteralSets {
|
||||
pub fn create(expr: &Hir) -> Self {
|
||||
let mut required = Literals::empty();
|
||||
union_required(expr, &mut required);
|
||||
LiteralSets {
|
||||
prefixes: Literals::prefixes(expr),
|
||||
suffixes: Literals::suffixes(expr),
|
||||
required: required,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_regex_builder(&self) -> Option<RegexBuilder> {
|
||||
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
|
||||
debug!("literal prefixes detected: {:?}", self.prefixes);
|
||||
// When this is true, the regex engine will do a literal scan.
|
||||
return None;
|
||||
}
|
||||
|
||||
// Out of inner required literals, prefixes and suffixes, which one
|
||||
// is the longest? We pick the longest to do fast literal scan under
|
||||
// the assumption that a longer literal will have a lower false
|
||||
// positive rate.
|
||||
let pre_lcp = self.prefixes.longest_common_prefix();
|
||||
let pre_lcs = self.prefixes.longest_common_suffix();
|
||||
let suf_lcp = self.suffixes.longest_common_prefix();
|
||||
let suf_lcs = self.suffixes.longest_common_suffix();
|
||||
|
||||
let req_lits = self.required.literals();
|
||||
let req = match req_lits.iter().max_by_key(|lit| lit.len()) {
|
||||
None => &[],
|
||||
Some(req) => &***req,
|
||||
};
|
||||
|
||||
let mut lit = pre_lcp;
|
||||
if pre_lcs.len() > lit.len() {
|
||||
lit = pre_lcs;
|
||||
}
|
||||
if suf_lcp.len() > lit.len() {
|
||||
lit = suf_lcp;
|
||||
}
|
||||
if suf_lcs.len() > lit.len() {
|
||||
lit = suf_lcs;
|
||||
}
|
||||
if req_lits.len() == 1 && req.len() > lit.len() {
|
||||
lit = req;
|
||||
}
|
||||
|
||||
// Special case: if we have any literals that are all whitespace,
|
||||
// then this is probably a failing of the literal detection since
|
||||
// whitespace is typically pretty common. In this case, don't bother
|
||||
// with inner literal scanning at all and just defer to the regex.
|
||||
let any_all_white = req_lits.iter()
|
||||
.any(|lit| lit.iter().all(|&b| (b as char).is_whitespace()));
|
||||
if any_all_white {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Special case: if we detected an alternation of inner required
|
||||
// literals and its longest literal is bigger than the longest
|
||||
// prefix/suffix, then choose the alternation. In practice, this
|
||||
// helps with case insensitive matching, which can generate lots of
|
||||
// inner required literals.
|
||||
let any_empty = req_lits.iter().any(|lit| lit.is_empty());
|
||||
if req.len() > lit.len() && req_lits.len() > 1 && !any_empty {
|
||||
debug!("required literals found: {:?}", req_lits);
|
||||
let alts: Vec<String> =
|
||||
req_lits.into_iter().map(|x| bytes_to_regex(x)).collect();
|
||||
let mut builder = RegexBuilder::new(&alts.join("|"));
|
||||
builder.unicode(false);
|
||||
Some(builder)
|
||||
} else if lit.is_empty() {
|
||||
None
|
||||
} else {
|
||||
debug!("required literal found: {:?}", show(lit));
|
||||
let mut builder = RegexBuilder::new(&bytes_to_regex(&lit));
|
||||
builder.unicode(false);
|
||||
Some(builder)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn union_required(expr: &Hir, lits: &mut Literals) {
|
||||
match *expr.kind() {
|
||||
HirKind::Literal(hir::Literal::Unicode(c)) => {
|
||||
let mut buf = [0u8; 4];
|
||||
lits.cross_add(c.encode_utf8(&mut buf).as_bytes());
|
||||
}
|
||||
HirKind::Literal(hir::Literal::Byte(b)) => {
|
||||
lits.cross_add(&[b]);
|
||||
}
|
||||
HirKind::Class(hir::Class::Unicode(ref cls)) => {
|
||||
if count_unicode_class(cls) >= 5 || !lits.add_char_class(cls) {
|
||||
lits.cut();
|
||||
}
|
||||
}
|
||||
HirKind::Class(hir::Class::Bytes(ref cls)) => {
|
||||
if count_byte_class(cls) >= 5 || !lits.add_byte_class(cls) {
|
||||
lits.cut();
|
||||
}
|
||||
}
|
||||
HirKind::Group(hir::Group { ref hir, .. }) => {
|
||||
union_required(&**hir, lits);
|
||||
}
|
||||
HirKind::Repetition(ref x) => {
|
||||
match x.kind {
|
||||
hir::RepetitionKind::ZeroOrOne => lits.cut(),
|
||||
hir::RepetitionKind::ZeroOrMore => lits.cut(),
|
||||
hir::RepetitionKind::OneOrMore => {
|
||||
union_required(&x.hir, lits);
|
||||
lits.cut();
|
||||
}
|
||||
hir::RepetitionKind::Range(ref rng) => {
|
||||
let (min, max) = match *rng {
|
||||
hir::RepetitionRange::Exactly(m) => (m, Some(m)),
|
||||
hir::RepetitionRange::AtLeast(m) => (m, None),
|
||||
hir::RepetitionRange::Bounded(m, n) => (m, Some(n)),
|
||||
};
|
||||
repeat_range_literals(
|
||||
&x.hir, min, max, x.greedy, lits, union_required);
|
||||
}
|
||||
}
|
||||
}
|
||||
HirKind::Concat(ref es) if es.is_empty() => {}
|
||||
HirKind::Concat(ref es) if es.len() == 1 => {
|
||||
union_required(&es[0], lits)
|
||||
}
|
||||
HirKind::Concat(ref es) => {
|
||||
for e in es {
|
||||
let mut lits2 = lits.to_empty();
|
||||
union_required(e, &mut lits2);
|
||||
if lits2.is_empty() {
|
||||
lits.cut();
|
||||
continue;
|
||||
}
|
||||
if lits2.contains_empty() {
|
||||
lits.cut();
|
||||
}
|
||||
if !lits.cross_product(&lits2) {
|
||||
// If this expression couldn't yield any literal that
|
||||
// could be extended, then we need to quit. Since we're
|
||||
// short-circuiting, we also need to freeze every member.
|
||||
lits.cut();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
HirKind::Alternation(ref es) => {
|
||||
alternate_literals(es, lits, union_required);
|
||||
}
|
||||
_ => lits.cut(),
|
||||
}
|
||||
}
|
||||
|
||||
fn repeat_range_literals<F: FnMut(&Hir, &mut Literals)>(
|
||||
e: &Hir,
|
||||
min: u32,
|
||||
max: Option<u32>,
|
||||
_greedy: bool,
|
||||
lits: &mut Literals,
|
||||
mut f: F,
|
||||
) {
|
||||
if min == 0 {
|
||||
// This is a bit conservative. If `max` is set, then we could
|
||||
// treat this as a finite set of alternations. For now, we
|
||||
// just treat it as `e*`.
|
||||
lits.cut();
|
||||
} else {
|
||||
let n = cmp::min(lits.limit_size(), min as usize);
|
||||
// We only extract literals from a single repetition, even though
|
||||
// we could do more. e.g., `a{3}` will have `a` extracted instead of
|
||||
// `aaa`. The reason is that inner literal extraction can't be unioned
|
||||
// across repetitions. e.g., extracting `foofoofoo` from `(\w+foo){3}`
|
||||
// is wrong.
|
||||
f(e, lits);
|
||||
if n < min as usize {
|
||||
lits.cut();
|
||||
}
|
||||
if max.map_or(true, |max| min < max) {
|
||||
lits.cut();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn alternate_literals<F: FnMut(&Hir, &mut Literals)>(
|
||||
es: &[Hir],
|
||||
lits: &mut Literals,
|
||||
mut f: F,
|
||||
) {
|
||||
let mut lits2 = lits.to_empty();
|
||||
for e in es {
|
||||
let mut lits3 = lits.to_empty();
|
||||
lits3.set_limit_size(lits.limit_size() / 5);
|
||||
f(e, &mut lits3);
|
||||
if lits3.is_empty() || !lits2.union(lits3) {
|
||||
// If we couldn't find suffixes for *any* of the
|
||||
// alternates, then the entire alternation has to be thrown
|
||||
// away and any existing members must be frozen. Similarly,
|
||||
// if the union couldn't complete, stop and freeze.
|
||||
lits.cut();
|
||||
return;
|
||||
}
|
||||
}
|
||||
// All we do at the moment is look for prefixes and suffixes. If both
|
||||
// are empty, then we report nothing. We should be able to do better than
|
||||
// this, but we'll need something more expressive than just a "set of
|
||||
// literals."
|
||||
let lcp = lits2.longest_common_prefix();
|
||||
let lcs = lits2.longest_common_suffix();
|
||||
if !lcp.is_empty() {
|
||||
lits.cross_add(lcp);
|
||||
}
|
||||
lits.cut();
|
||||
if !lcs.is_empty() {
|
||||
lits.add(Literal::empty());
|
||||
lits.add(Literal::new(lcs.to_vec()));
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the number of characters in the given class.
|
||||
fn count_unicode_class(cls: &hir::ClassUnicode) -> u32 {
|
||||
cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum()
|
||||
}
|
||||
|
||||
/// Return the number of bytes in the given class.
|
||||
fn count_byte_class(cls: &hir::ClassBytes) -> u32 {
|
||||
cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum()
|
||||
}
|
||||
|
||||
/// Converts an arbitrary sequence of bytes to a literal suitable for building
|
||||
/// a regular expression.
|
||||
fn bytes_to_regex(bs: &[u8]) -> String {
|
||||
let mut s = String::with_capacity(bs.len());
|
||||
for &b in bs {
|
||||
s.push_str(&format!("\\x{:02x}", b));
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
/// Converts arbitrary bytes to a nice string.
|
||||
fn show(bs: &[u8]) -> String {
|
||||
// Why aren't we using this to feed to the regex? Doesn't really matter
|
||||
// I guess. ---AG
|
||||
use std::ascii::escape_default;
|
||||
use std::str;
|
||||
|
||||
let mut nice = String::new();
|
||||
for &b in bs {
|
||||
let part: Vec<u8> = escape_default(b).collect();
|
||||
nice.push_str(str::from_utf8(&part).unwrap());
|
||||
}
|
||||
nice
|
||||
}
|
@ -1,74 +0,0 @@
|
||||
use syntax::hir::{self, Hir, HirKind};
|
||||
|
||||
use {Error, Result};
|
||||
|
||||
/// Returns a new expression that is guaranteed to never match the given
|
||||
/// ASCII character.
|
||||
///
|
||||
/// If the expression contains the literal byte, then an error is returned.
|
||||
///
|
||||
/// If `byte` is not an ASCII character (i.e., greater than `0x7F`), then this
|
||||
/// function panics.
|
||||
pub fn remove(expr: Hir, byte: u8) -> Result<Hir> {
|
||||
assert!(byte <= 0x7F);
|
||||
let chr = byte as char;
|
||||
assert!(chr.len_utf8() == 1);
|
||||
|
||||
Ok(match expr.into_kind() {
|
||||
HirKind::Empty => Hir::empty(),
|
||||
HirKind::Literal(hir::Literal::Unicode(c)) => {
|
||||
if c == chr {
|
||||
return Err(Error::LiteralNotAllowed(chr));
|
||||
}
|
||||
Hir::literal(hir::Literal::Unicode(c))
|
||||
}
|
||||
HirKind::Literal(hir::Literal::Byte(b)) => {
|
||||
if b as char == chr {
|
||||
return Err(Error::LiteralNotAllowed(chr));
|
||||
}
|
||||
Hir::literal(hir::Literal::Byte(b))
|
||||
}
|
||||
HirKind::Class(hir::Class::Unicode(mut cls)) => {
|
||||
let remove = hir::ClassUnicode::new(Some(
|
||||
hir::ClassUnicodeRange::new(chr, chr),
|
||||
));
|
||||
cls.difference(&remove);
|
||||
if cls.iter().next().is_none() {
|
||||
return Err(Error::LiteralNotAllowed(chr));
|
||||
}
|
||||
Hir::class(hir::Class::Unicode(cls))
|
||||
}
|
||||
HirKind::Class(hir::Class::Bytes(mut cls)) => {
|
||||
let remove = hir::ClassBytes::new(Some(
|
||||
hir::ClassBytesRange::new(byte, byte),
|
||||
));
|
||||
cls.difference(&remove);
|
||||
if cls.iter().next().is_none() {
|
||||
return Err(Error::LiteralNotAllowed(chr));
|
||||
}
|
||||
Hir::class(hir::Class::Bytes(cls))
|
||||
}
|
||||
HirKind::Anchor(x) => Hir::anchor(x),
|
||||
HirKind::WordBoundary(x) => Hir::word_boundary(x),
|
||||
HirKind::Repetition(mut x) => {
|
||||
x.hir = Box::new(remove(*x.hir, byte)?);
|
||||
Hir::repetition(x)
|
||||
}
|
||||
HirKind::Group(mut x) => {
|
||||
x.hir = Box::new(remove(*x.hir, byte)?);
|
||||
Hir::group(x)
|
||||
}
|
||||
HirKind::Concat(xs) => {
|
||||
let xs = xs.into_iter()
|
||||
.map(|e| remove(e, byte))
|
||||
.collect::<Result<Vec<Hir>>>()?;
|
||||
Hir::concat(xs)
|
||||
}
|
||||
HirKind::Alternation(xs) => {
|
||||
let xs = xs.into_iter()
|
||||
.map(|e| remove(e, byte))
|
||||
.collect::<Result<Vec<Hir>>>()?;
|
||||
Hir::alternation(xs)
|
||||
}
|
||||
})
|
||||
}
|
@ -1,356 +0,0 @@
|
||||
use memchr::{memchr, memrchr};
|
||||
use syntax::ParserBuilder;
|
||||
use syntax::hir::Hir;
|
||||
use regex::bytes::{Regex, RegexBuilder};
|
||||
|
||||
use literals::LiteralSets;
|
||||
use nonl;
|
||||
use smart_case::Cased;
|
||||
use word_boundary::strip_unicode_word_boundaries;
|
||||
use Result;
|
||||
|
||||
/// A matched line.
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq)]
|
||||
pub struct Match {
|
||||
start: usize,
|
||||
end: usize,
|
||||
}
|
||||
|
||||
impl Match {
|
||||
/// Create a new empty match value.
|
||||
pub fn new() -> Match {
|
||||
Match::default()
|
||||
}
|
||||
|
||||
/// Return the starting byte offset of the line that matched.
|
||||
#[inline]
|
||||
pub fn start(&self) -> usize {
|
||||
self.start
|
||||
}
|
||||
|
||||
/// Return the ending byte offset of the line that matched.
|
||||
#[inline]
|
||||
pub fn end(&self) -> usize {
|
||||
self.end
|
||||
}
|
||||
}
|
||||
|
||||
/// A fast line oriented regex searcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Grep {
|
||||
re: Regex,
|
||||
required: Option<Regex>,
|
||||
opts: Options,
|
||||
}
|
||||
|
||||
/// A builder for a grep searcher.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct GrepBuilder {
|
||||
pattern: String,
|
||||
opts: Options,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct Options {
|
||||
case_insensitive: bool,
|
||||
case_smart: bool,
|
||||
line_terminator: u8,
|
||||
size_limit: usize,
|
||||
dfa_size_limit: usize,
|
||||
}
|
||||
|
||||
impl Default for Options {
|
||||
fn default() -> Options {
|
||||
Options {
|
||||
case_insensitive: false,
|
||||
case_smart: false,
|
||||
line_terminator: b'\n',
|
||||
size_limit: 10 * (1 << 20),
|
||||
dfa_size_limit: 10 * (1 << 20),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl GrepBuilder {
|
||||
/// Create a new builder for line searching.
|
||||
///
|
||||
/// The pattern given should be a regular expression. The precise syntax
|
||||
/// supported is documented on the regex crate.
|
||||
pub fn new(pattern: &str) -> GrepBuilder {
|
||||
GrepBuilder {
|
||||
pattern: pattern.to_string(),
|
||||
opts: Options::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the line terminator.
|
||||
///
|
||||
/// The line terminator can be any ASCII character and serves to delineate
|
||||
/// the match boundaries in the text searched.
|
||||
///
|
||||
/// This panics if `ascii_byte` is greater than `0x7F` (i.e., not ASCII).
|
||||
pub fn line_terminator(mut self, ascii_byte: u8) -> GrepBuilder {
|
||||
assert!(ascii_byte <= 0x7F);
|
||||
self.opts.line_terminator = ascii_byte;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the case sensitive flag (`i`) on the regex.
|
||||
pub fn case_insensitive(mut self, yes: bool) -> GrepBuilder {
|
||||
self.opts.case_insensitive = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to enable smart case search or not (disabled by default).
|
||||
///
|
||||
/// Smart case uses case insensitive search if the pattern contains only
|
||||
/// lowercase characters (ignoring any characters which immediately follow
|
||||
/// a '\'). Otherwise, a case sensitive search is used instead.
|
||||
///
|
||||
/// Enabling the case_insensitive flag overrides this.
|
||||
pub fn case_smart(mut self, yes: bool) -> GrepBuilder {
|
||||
self.opts.case_smart = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the approximate size limit of the compiled regular expression.
|
||||
///
|
||||
/// This roughly corresponds to the number of bytes occupied by a
|
||||
/// single compiled program. If the program exceeds this number, then a
|
||||
/// compilation error is returned.
|
||||
pub fn size_limit(mut self, limit: usize) -> GrepBuilder {
|
||||
self.opts.size_limit = limit;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the approximate size of the cache used by the DFA.
|
||||
///
|
||||
/// This roughly corresponds to the number of bytes that the DFA will use
|
||||
/// while searching.
|
||||
///
|
||||
/// Note that this is a per thread limit. There is no way to set a global
|
||||
/// limit. In particular, if a regex is used from multiple threads
|
||||
/// simulanteously, then each thread may use up to the number of bytes
|
||||
/// specified here.
|
||||
pub fn dfa_size_limit(mut self, limit: usize) -> GrepBuilder {
|
||||
self.opts.dfa_size_limit = limit;
|
||||
self
|
||||
}
|
||||
|
||||
/// Create a line searcher.
|
||||
///
|
||||
/// If there was a problem parsing or compiling the regex with the given
|
||||
/// options, then an error is returned.
|
||||
pub fn build(self) -> Result<Grep> {
|
||||
let expr = self.parse()?;
|
||||
let literals = LiteralSets::create(&expr);
|
||||
let re = self.regex(&expr)?;
|
||||
let required = match literals.to_regex_builder() {
|
||||
Some(builder) => Some(self.regex_build(builder)?),
|
||||
None => {
|
||||
match strip_unicode_word_boundaries(&expr) {
|
||||
None => None,
|
||||
Some(expr) => {
|
||||
debug!("Stripped Unicode word boundaries. \
|
||||
New AST:\n{:?}", expr);
|
||||
self.regex(&expr).ok()
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
Ok(Grep {
|
||||
re: re,
|
||||
required: required,
|
||||
opts: self.opts,
|
||||
})
|
||||
}
|
||||
|
||||
/// Creates a new regex from the given expression with the current
|
||||
/// configuration.
|
||||
fn regex(&self, expr: &Hir) -> Result<Regex> {
|
||||
let mut builder = RegexBuilder::new(&expr.to_string());
|
||||
builder.unicode(true);
|
||||
self.regex_build(builder)
|
||||
}
|
||||
|
||||
/// Builds a new regex from the given builder using the caller's settings.
|
||||
fn regex_build(&self, mut builder: RegexBuilder) -> Result<Regex> {
|
||||
builder
|
||||
.multi_line(true)
|
||||
.size_limit(self.opts.size_limit)
|
||||
.dfa_size_limit(self.opts.dfa_size_limit)
|
||||
.build()
|
||||
.map_err(From::from)
|
||||
}
|
||||
|
||||
/// Parses the underlying pattern and ensures the pattern can never match
|
||||
/// the line terminator.
|
||||
fn parse(&self) -> Result<Hir> {
|
||||
let expr = ParserBuilder::new()
|
||||
.allow_invalid_utf8(true)
|
||||
.case_insensitive(self.is_case_insensitive()?)
|
||||
.multi_line(true)
|
||||
.build()
|
||||
.parse(&self.pattern)?;
|
||||
debug!("original regex HIR pattern:\n{}", expr);
|
||||
let expr = nonl::remove(expr, self.opts.line_terminator)?;
|
||||
debug!("transformed regex HIR pattern:\n{}", expr);
|
||||
Ok(expr)
|
||||
}
|
||||
|
||||
/// Determines whether the case insensitive flag should be enabled or not.
|
||||
fn is_case_insensitive(&self) -> Result<bool> {
|
||||
if self.opts.case_insensitive {
|
||||
return Ok(true);
|
||||
}
|
||||
if !self.opts.case_smart {
|
||||
return Ok(false);
|
||||
}
|
||||
let cased = match Cased::from_pattern(&self.pattern) {
|
||||
None => return Ok(false),
|
||||
Some(cased) => cased,
|
||||
};
|
||||
Ok(cased.any_literal && !cased.any_uppercase)
|
||||
}
|
||||
}
|
||||
|
||||
impl Grep {
|
||||
/// Returns a reference to the underlying regex used by the searcher.
|
||||
pub fn regex(&self) -> &Regex {
|
||||
&self.re
|
||||
}
|
||||
|
||||
/// Returns an iterator over all matches in the given buffer.
|
||||
pub fn iter<'b, 's>(&'s self, buf: &'b [u8]) -> Iter<'b, 's> {
|
||||
Iter {
|
||||
searcher: self,
|
||||
buf: buf,
|
||||
start: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Fills in the next line that matches in the given buffer starting at
|
||||
/// the position given.
|
||||
///
|
||||
/// If no match could be found, `false` is returned, otherwise, `true` is
|
||||
/// returned.
|
||||
pub fn read_match(
|
||||
&self,
|
||||
mat: &mut Match,
|
||||
buf: &[u8],
|
||||
mut start: usize,
|
||||
) -> bool {
|
||||
if start >= buf.len() {
|
||||
return false;
|
||||
}
|
||||
if let Some(ref req) = self.required {
|
||||
while start < buf.len() {
|
||||
let e = match req.shortest_match(&buf[start..]) {
|
||||
None => return false,
|
||||
Some(e) => start + e,
|
||||
};
|
||||
let (prevnl, nextnl) = self.find_line(buf, e, e);
|
||||
match self.re.shortest_match(&buf[prevnl..nextnl]) {
|
||||
None => {
|
||||
start = nextnl;
|
||||
continue;
|
||||
}
|
||||
Some(_) => {
|
||||
self.fill_match(mat, prevnl, nextnl);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
false
|
||||
} else {
|
||||
let e = match self.re.shortest_match(&buf[start..]) {
|
||||
None => return false,
|
||||
Some(e) => start + e,
|
||||
};
|
||||
let (s, e) = self.find_line(buf, e, e);
|
||||
self.fill_match(mat, s, e);
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
fn fill_match(&self, mat: &mut Match, start: usize, end: usize) {
|
||||
mat.start = start;
|
||||
mat.end = end;
|
||||
}
|
||||
|
||||
fn find_line(&self, buf: &[u8], s: usize, e: usize) -> (usize, usize) {
|
||||
(self.find_line_start(buf, s), self.find_line_end(buf, e))
|
||||
}
|
||||
|
||||
fn find_line_start(&self, buf: &[u8], pos: usize) -> usize {
|
||||
memrchr(self.opts.line_terminator, &buf[0..pos]).map_or(0, |i| i + 1)
|
||||
}
|
||||
|
||||
fn find_line_end(&self, buf: &[u8], pos: usize) -> usize {
|
||||
memchr(self.opts.line_terminator, &buf[pos..])
|
||||
.map_or(buf.len(), |i| pos + i + 1)
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over all matches in a particular buffer.
|
||||
///
|
||||
/// `'b` refers to the lifetime of the buffer, and `'s` refers to the lifetime
|
||||
/// of the searcher.
|
||||
pub struct Iter<'b, 's> {
|
||||
searcher: &'s Grep,
|
||||
buf: &'b [u8],
|
||||
start: usize,
|
||||
}
|
||||
|
||||
impl<'b, 's> Iterator for Iter<'b, 's> {
|
||||
type Item = Match;
|
||||
|
||||
fn next(&mut self) -> Option<Match> {
|
||||
let mut mat = Match::default();
|
||||
if !self.searcher.read_match(&mut mat, self.buf, self.start) {
|
||||
self.start = self.buf.len();
|
||||
return None;
|
||||
}
|
||||
self.start = mat.end;
|
||||
Some(mat)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use memchr::{memchr, memrchr};
|
||||
use regex::bytes::Regex;
|
||||
|
||||
use super::{GrepBuilder, Match};
|
||||
|
||||
static SHERLOCK: &'static [u8] = include_bytes!("./data/sherlock.txt");
|
||||
|
||||
fn find_lines(pat: &str, haystack: &[u8]) -> Vec<Match> {
|
||||
let re = Regex::new(pat).unwrap();
|
||||
let mut lines = vec![];
|
||||
for m in re.find_iter(haystack) {
|
||||
let start = memrchr(b'\n', &haystack[..m.start()])
|
||||
.map_or(0, |i| i + 1);
|
||||
let end = memchr(b'\n', &haystack[m.end()..])
|
||||
.map_or(haystack.len(), |i| m.end() + i + 1);
|
||||
lines.push(Match {
|
||||
start: start,
|
||||
end: end,
|
||||
});
|
||||
}
|
||||
lines
|
||||
}
|
||||
|
||||
fn grep_lines(pat: &str, haystack: &[u8]) -> Vec<Match> {
|
||||
let g = GrepBuilder::new(pat).build().unwrap();
|
||||
g.iter(haystack).collect()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn buffered_literal() {
|
||||
let expected = find_lines("Sherlock Holmes", SHERLOCK);
|
||||
let got = grep_lines("Sherlock Holmes", SHERLOCK);
|
||||
assert_eq!(expected.len(), got.len());
|
||||
assert_eq!(expected, got);
|
||||
}
|
||||
}
|
@ -1,191 +0,0 @@
|
||||
use syntax::ast::{self, Ast};
|
||||
use syntax::ast::parse::Parser;
|
||||
|
||||
/// The results of analyzing a regex for cased literals.
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct Cased {
|
||||
/// True if and only if a literal uppercase character occurs in the regex.
|
||||
///
|
||||
/// A regex like `\pL` contains no uppercase literals, even though `L`
|
||||
/// is uppercase and the `\pL` class contains uppercase characters.
|
||||
pub any_uppercase: bool,
|
||||
/// True if and only if the regex contains any literal at all. A regex like
|
||||
/// `\pL` has this set to false.
|
||||
pub any_literal: bool,
|
||||
}
|
||||
|
||||
impl Cased {
|
||||
/// Returns a `Cased` value by doing analysis on the AST of `pattern`.
|
||||
///
|
||||
/// If `pattern` is not a valid regular expression, then `None` is
|
||||
/// returned.
|
||||
pub fn from_pattern(pattern: &str) -> Option<Cased> {
|
||||
Parser::new()
|
||||
.parse(pattern)
|
||||
.map(|ast| Cased::from_ast(&ast))
|
||||
.ok()
|
||||
}
|
||||
|
||||
fn from_ast(ast: &Ast) -> Cased {
|
||||
let mut cased = Cased::default();
|
||||
cased.from_ast_impl(ast);
|
||||
cased
|
||||
}
|
||||
|
||||
fn from_ast_impl(&mut self, ast: &Ast) {
|
||||
if self.done() {
|
||||
return;
|
||||
}
|
||||
match *ast {
|
||||
Ast::Empty(_)
|
||||
| Ast::Flags(_)
|
||||
| Ast::Dot(_)
|
||||
| Ast::Assertion(_)
|
||||
| Ast::Class(ast::Class::Unicode(_))
|
||||
| Ast::Class(ast::Class::Perl(_)) => {}
|
||||
Ast::Literal(ref x) => {
|
||||
self.from_ast_literal(x);
|
||||
}
|
||||
Ast::Class(ast::Class::Bracketed(ref x)) => {
|
||||
self.from_ast_class_set(&x.kind);
|
||||
}
|
||||
Ast::Repetition(ref x) => {
|
||||
self.from_ast_impl(&x.ast);
|
||||
}
|
||||
Ast::Group(ref x) => {
|
||||
self.from_ast_impl(&x.ast);
|
||||
}
|
||||
Ast::Alternation(ref alt) => {
|
||||
for x in &alt.asts {
|
||||
self.from_ast_impl(x);
|
||||
}
|
||||
}
|
||||
Ast::Concat(ref alt) => {
|
||||
for x in &alt.asts {
|
||||
self.from_ast_impl(x);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn from_ast_class_set(&mut self, ast: &ast::ClassSet) {
|
||||
if self.done() {
|
||||
return;
|
||||
}
|
||||
match *ast {
|
||||
ast::ClassSet::Item(ref item) => {
|
||||
self.from_ast_class_set_item(item);
|
||||
}
|
||||
ast::ClassSet::BinaryOp(ref x) => {
|
||||
self.from_ast_class_set(&x.lhs);
|
||||
self.from_ast_class_set(&x.rhs);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn from_ast_class_set_item(&mut self, ast: &ast::ClassSetItem) {
|
||||
if self.done() {
|
||||
return;
|
||||
}
|
||||
match *ast {
|
||||
ast::ClassSetItem::Empty(_)
|
||||
| ast::ClassSetItem::Ascii(_)
|
||||
| ast::ClassSetItem::Unicode(_)
|
||||
| ast::ClassSetItem::Perl(_) => {}
|
||||
ast::ClassSetItem::Literal(ref x) => {
|
||||
self.from_ast_literal(x);
|
||||
}
|
||||
ast::ClassSetItem::Range(ref x) => {
|
||||
self.from_ast_literal(&x.start);
|
||||
self.from_ast_literal(&x.end);
|
||||
}
|
||||
ast::ClassSetItem::Bracketed(ref x) => {
|
||||
self.from_ast_class_set(&x.kind);
|
||||
}
|
||||
ast::ClassSetItem::Union(ref union) => {
|
||||
for x in &union.items {
|
||||
self.from_ast_class_set_item(x);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn from_ast_literal(&mut self, ast: &ast::Literal) {
|
||||
self.any_literal = true;
|
||||
self.any_uppercase = self.any_uppercase || ast.c.is_uppercase();
|
||||
}
|
||||
|
||||
/// Returns true if and only if the attributes can never change no matter
|
||||
/// what other AST it might see.
|
||||
fn done(&self) -> bool {
|
||||
self.any_uppercase && self.any_literal
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn cased(pattern: &str) -> Cased {
|
||||
Cased::from_pattern(pattern).unwrap()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn various() {
|
||||
let x = cased("");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(!x.any_literal);
|
||||
|
||||
let x = cased("foo");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
|
||||
let x = cased("Foo");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
|
||||
let x = cased("foO");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
|
||||
let x = cased(r"foo\\");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
|
||||
let x = cased(r"foo\w");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
|
||||
let x = cased(r"foo\S");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
|
||||
let x = cased(r"foo\p{Ll}");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
|
||||
let x = cased(r"foo[a-z]");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
|
||||
let x = cased(r"foo[A-Z]");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
|
||||
let x = cased(r"foo[\S\t]");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
|
||||
let x = cased(r"foo\\S");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
|
||||
let x = cased(r"\p{Ll}");
|
||||
assert!(!x.any_uppercase);
|
||||
assert!(!x.any_literal);
|
||||
|
||||
let x = cased(r"aBc\w");
|
||||
assert!(x.any_uppercase);
|
||||
assert!(x.any_literal);
|
||||
}
|
||||
}
|
@ -1,53 +0,0 @@
|
||||
use syntax::hir::{self, Hir, HirKind};
|
||||
|
||||
/// Strips Unicode word boundaries from the given expression.
|
||||
///
|
||||
/// The key invariant this maintains is that the expression returned will match
|
||||
/// *at least* every where the expression given will match. Namely, a match of
|
||||
/// the returned expression can report false positives but it will never report
|
||||
/// false negatives.
|
||||
///
|
||||
/// If no word boundaries could be stripped, then None is returned.
|
||||
pub fn strip_unicode_word_boundaries(expr: &Hir) -> Option<Hir> {
|
||||
// The real reason we do this is because Unicode word boundaries are the
|
||||
// one thing that Rust's regex DFA engine can't handle. When it sees a
|
||||
// Unicode word boundary among non-ASCII text, it falls back to one of the
|
||||
// slower engines. We work around this limitation by attempting to use
|
||||
// a regex to find candidate matches without a Unicode word boundary. We'll
|
||||
// only then use the full (and slower) regex to confirm a candidate as a
|
||||
// match or not during search.
|
||||
//
|
||||
// It looks like we only check the outer edges for `\b`? I guess this is
|
||||
// an attempt to optimize for the `-w/--word-regexp` flag? ---AG
|
||||
match *expr.kind() {
|
||||
HirKind::Concat(ref es) if !es.is_empty() => {
|
||||
let first = is_unicode_word_boundary(&es[0]);
|
||||
let last = is_unicode_word_boundary(es.last().unwrap());
|
||||
// Be careful not to strip word boundaries if there are no other
|
||||
// expressions to match.
|
||||
match (first, last) {
|
||||
(true, false) if es.len() > 1 => {
|
||||
Some(Hir::concat(es[1..].to_vec()))
|
||||
}
|
||||
(false, true) if es.len() > 1 => {
|
||||
Some(Hir::concat(es[..es.len() - 1].to_vec()))
|
||||
}
|
||||
(true, true) if es.len() > 2 => {
|
||||
Some(Hir::concat(es[1..es.len() - 1].to_vec()))
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if the given expression is a Unicode word boundary.
|
||||
fn is_unicode_word_boundary(expr: &Hir) -> bool {
|
||||
match *expr.kind() {
|
||||
HirKind::WordBoundary(hir::WordBoundary::Unicode) => true,
|
||||
HirKind::WordBoundary(hir::WordBoundary::UnicodeNegate) => true,
|
||||
HirKind::Group(ref x) => is_unicode_word_boundary(&x.hir),
|
||||
_ => false,
|
||||
}
|
||||
}
|
@ -1,3 +0,0 @@
|
||||
This project is dual-licensed under the Unlicense and MIT licenses.
|
||||
|
||||
You may use this code under the terms of either license.
|
@ -1,23 +0,0 @@
|
||||
[package]
|
||||
name = "grep2"
|
||||
version = "0.2.0" #:version
|
||||
authors = ["Andrew Gallant <jamslam@gmail.com>"]
|
||||
description = """
|
||||
Fast line oriented regex searching as a library.
|
||||
"""
|
||||
documentation = "http://burntsushi.net/rustdoc/grep/"
|
||||
homepage = "https://github.com/BurntSushi/ripgrep"
|
||||
repository = "https://github.com/BurntSushi/ripgrep"
|
||||
readme = "README.md"
|
||||
keywords = ["regex", "grep", "egrep", "search", "pattern"]
|
||||
license = "Unlicense/MIT"
|
||||
|
||||
[dependencies]
|
||||
grep-matcher = { version = "0.0.1", path = "../grep-matcher" }
|
||||
grep-printer = { version = "0.0.1", path = "../grep-printer" }
|
||||
grep-regex = { version = "0.0.1", path = "../grep-regex" }
|
||||
grep-searcher = { version = "0.0.1", path = "../grep-searcher" }
|
||||
|
||||
[features]
|
||||
avx-accel = ["grep-searcher/avx-accel"]
|
||||
simd-accel = ["grep-searcher/simd-accel"]
|
@ -1,21 +0,0 @@
|
||||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2015 Andrew Gallant
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
@ -1,4 +0,0 @@
|
||||
grep
|
||||
----
|
||||
This is a *library* that provides grep-style line-by-line regex searching (with
|
||||
comparable performance to `grep` itself).
|
@ -1,24 +0,0 @@
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or
|
||||
distribute this software, either in source code form or as a compiled
|
||||
binary, for any purpose, commercial or non-commercial, and by any
|
||||
means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors
|
||||
of this software dedicate any and all copyright interest in the
|
||||
software to the public domain. We make this dedication for the benefit
|
||||
of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of
|
||||
relinquishment in perpetuity of all present and future rights to this
|
||||
software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
|
||||
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
@ -1,10 +0,0 @@
|
||||
/*!
|
||||
TODO.
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
|
||||
pub extern crate grep_matcher as matcher;
|
||||
pub extern crate grep_printer as printer;
|
||||
pub extern crate grep_regex as regex;
|
||||
pub extern crate grep_searcher as searcher;
|
2098
src/args.rs
2098
src/args.rs
File diff suppressed because it is too large
Load Diff
1384
src/args2.rs
1384
src/args2.rs
File diff suppressed because it is too large
Load Diff
580
src/main.rs
580
src/main.rs
@ -1,22 +1,13 @@
|
||||
#![allow(dead_code, unused_imports, unused_mut, unused_variables)]
|
||||
|
||||
extern crate atty;
|
||||
extern crate bytecount;
|
||||
#[macro_use]
|
||||
extern crate clap;
|
||||
extern crate encoding_rs;
|
||||
extern crate encoding_rs_io;
|
||||
extern crate globset;
|
||||
extern crate grep;
|
||||
extern crate grep2;
|
||||
extern crate ignore;
|
||||
#[macro_use]
|
||||
extern crate lazy_static;
|
||||
extern crate libc;
|
||||
#[macro_use]
|
||||
extern crate log;
|
||||
extern crate memchr;
|
||||
extern crate memmap;
|
||||
extern crate num_cpus;
|
||||
extern crate regex;
|
||||
extern crate same_file;
|
||||
@ -24,416 +15,279 @@ extern crate termcolor;
|
||||
#[cfg(windows)]
|
||||
extern crate winapi;
|
||||
|
||||
use std::error::Error;
|
||||
use std::io;
|
||||
use std::process;
|
||||
use std::result;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::mpsc;
|
||||
use std::thread;
|
||||
use std::time::{Duration, Instant};
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Instant;
|
||||
|
||||
use ignore::WalkState;
|
||||
|
||||
use args::Args;
|
||||
use worker::Work;
|
||||
|
||||
macro_rules! errored {
|
||||
($($tt:tt)*) => {
|
||||
return Err(From::from(format!($($tt)*)));
|
||||
}
|
||||
}
|
||||
use subject::Subject;
|
||||
|
||||
#[macro_use]
|
||||
mod messages;
|
||||
|
||||
mod app;
|
||||
mod args;
|
||||
mod args2;
|
||||
mod config;
|
||||
mod decompressor;
|
||||
mod preprocessor;
|
||||
mod logger;
|
||||
mod main2;
|
||||
mod path_printer;
|
||||
mod pathutil;
|
||||
mod printer;
|
||||
mod search;
|
||||
mod search_buffer;
|
||||
mod search_stream;
|
||||
mod subject;
|
||||
mod unescape;
|
||||
mod worker;
|
||||
|
||||
pub type Result<T> = result::Result<T, Box<Error>>;
|
||||
pub type Result<T> = ::std::result::Result<T, Box<::std::error::Error>>;
|
||||
|
||||
fn main() {
|
||||
main2::main2();
|
||||
// reset_sigpipe();
|
||||
// match Args::parse().map(Arc::new).and_then(run) {
|
||||
// Ok(0) => process::exit(1),
|
||||
// Ok(_) => process::exit(0),
|
||||
// Err(err) => {
|
||||
// eprintln!("{}", err);
|
||||
// process::exit(2);
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
fn run(args: Arc<Args>) -> Result<u64> {
|
||||
if args.never_match() {
|
||||
return Ok(0);
|
||||
}
|
||||
let threads = args.threads();
|
||||
if args.files() {
|
||||
if threads == 1 || args.is_one_path() {
|
||||
run_files_one_thread(&args)
|
||||
} else {
|
||||
run_files_parallel(args)
|
||||
pub fn main() {
|
||||
match Args::parse().and_then(run) {
|
||||
Ok(true) => process::exit(0),
|
||||
Ok(false) => process::exit(1),
|
||||
Err(err) => {
|
||||
eprintln!("{}", err);
|
||||
process::exit(2);
|
||||
}
|
||||
} else if args.type_list() {
|
||||
run_types(&args)
|
||||
} else if threads == 1 || args.is_one_path() {
|
||||
run_one_thread(&args)
|
||||
} else {
|
||||
run_parallel(&args)
|
||||
}
|
||||
}
|
||||
|
||||
fn run_parallel(args: &Arc<Args>) -> Result<u64> {
|
||||
let start_time = Instant::now();
|
||||
let bufwtr = Arc::new(args.buffer_writer());
|
||||
let quiet_matched = args.quiet_matched();
|
||||
let paths_searched = Arc::new(AtomicUsize::new(0));
|
||||
let match_line_count = Arc::new(AtomicUsize::new(0));
|
||||
let paths_matched = Arc::new(AtomicUsize::new(0));
|
||||
fn run(args: Args) -> Result<bool> {
|
||||
use args::Command::*;
|
||||
|
||||
args.walker_parallel().run(|| {
|
||||
let args = Arc::clone(args);
|
||||
let quiet_matched = quiet_matched.clone();
|
||||
let paths_searched = paths_searched.clone();
|
||||
let match_line_count = match_line_count.clone();
|
||||
let paths_matched = paths_matched.clone();
|
||||
match args.command()? {
|
||||
Search => search(args),
|
||||
SearchParallel => search_parallel(args),
|
||||
SearchNever => Ok(false),
|
||||
Files => files(args),
|
||||
FilesParallel => files_parallel(args),
|
||||
Types => types(args),
|
||||
}
|
||||
}
|
||||
|
||||
/// The top-level entry point for single-threaded search. This recursively
|
||||
/// steps through the file list (current directory by default) and searches
|
||||
/// each file sequentially.
|
||||
fn search(args: Args) -> Result<bool> {
|
||||
let started_at = Instant::now();
|
||||
let quit_after_match = args.quit_after_match()?;
|
||||
let subject_builder = args.subject_builder();
|
||||
let mut stats = args.stats()?;
|
||||
let mut searcher = args.search_worker(args.stdout())?;
|
||||
let mut matched = false;
|
||||
|
||||
for result in args.walker()? {
|
||||
let subject = match subject_builder.build_from_result(result) {
|
||||
Some(subject) => subject,
|
||||
None => continue,
|
||||
};
|
||||
let search_result = match searcher.search(&subject) {
|
||||
Ok(search_result) => search_result,
|
||||
Err(err) => {
|
||||
// A broken pipe means graceful termination.
|
||||
if err.kind() == io::ErrorKind::BrokenPipe {
|
||||
break;
|
||||
}
|
||||
message!("{}: {}", subject.path().display(), err);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
matched = matched || search_result.has_match();
|
||||
if let Some(ref mut stats) = stats {
|
||||
*stats += search_result.stats().unwrap();
|
||||
}
|
||||
if matched && quit_after_match {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if let Some(ref stats) = stats {
|
||||
let elapsed = Instant::now().duration_since(started_at);
|
||||
// We don't care if we couldn't print this successfully.
|
||||
let _ = searcher.printer().print_stats(elapsed, stats);
|
||||
}
|
||||
Ok(matched)
|
||||
}
|
||||
|
||||
/// The top-level entry point for multi-threaded search. The parallelism is
|
||||
/// itself achieved by the recursive directory traversal. All we need to do is
|
||||
/// feed it a worker for performing a search on each file.
|
||||
fn search_parallel(args: Args) -> Result<bool> {
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering::SeqCst;
|
||||
|
||||
let quit_after_match = args.quit_after_match()?;
|
||||
let started_at = Instant::now();
|
||||
let subject_builder = Arc::new(args.subject_builder());
|
||||
let bufwtr = Arc::new(args.buffer_writer()?);
|
||||
let stats = Arc::new(args.stats()?.map(Mutex::new));
|
||||
let matched = Arc::new(AtomicBool::new(false));
|
||||
let mut searcher_err = None;
|
||||
args.walker_parallel()?.run(|| {
|
||||
let args = args.clone();
|
||||
let bufwtr = Arc::clone(&bufwtr);
|
||||
let mut buf = bufwtr.buffer();
|
||||
let mut worker = args.worker();
|
||||
Box::new(move |result| {
|
||||
use ignore::WalkState::*;
|
||||
let stats = Arc::clone(&stats);
|
||||
let matched = Arc::clone(&matched);
|
||||
let subject_builder = Arc::clone(&subject_builder);
|
||||
let mut searcher = match args.search_worker(bufwtr.buffer()) {
|
||||
Ok(searcher) => searcher,
|
||||
Err(err) => {
|
||||
searcher_err = Some(err);
|
||||
return Box::new(move |_| {
|
||||
WalkState::Quit
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
if quiet_matched.has_match() {
|
||||
return Quit;
|
||||
}
|
||||
let dent = match get_or_log_dir_entry(
|
||||
result,
|
||||
args.stdout_handle(),
|
||||
args.files(),
|
||||
) {
|
||||
None => return Continue,
|
||||
Some(dent) => dent,
|
||||
Box::new(move |result| {
|
||||
let subject = match subject_builder.build_from_result(result) {
|
||||
Some(subject) => subject,
|
||||
None => return WalkState::Continue,
|
||||
};
|
||||
paths_searched.fetch_add(1, Ordering::SeqCst);
|
||||
buf.clear();
|
||||
{
|
||||
// This block actually executes the search and prints the
|
||||
// results into outbuf.
|
||||
let mut printer = args.printer(&mut buf);
|
||||
let count =
|
||||
if dent.is_stdin() {
|
||||
worker.run(&mut printer, Work::Stdin)
|
||||
} else {
|
||||
worker.run(&mut printer, Work::DirEntry(dent))
|
||||
};
|
||||
match_line_count.fetch_add(count as usize, Ordering::SeqCst);
|
||||
if quiet_matched.set_match(count > 0) {
|
||||
return Quit;
|
||||
}
|
||||
if args.stats() && count > 0 {
|
||||
paths_matched.fetch_add(1, Ordering::SeqCst);
|
||||
searcher.printer().get_mut().clear();
|
||||
let search_result = match searcher.search(&subject) {
|
||||
Ok(search_result) => search_result,
|
||||
Err(err) => {
|
||||
message!("{}: {}", subject.path().display(), err);
|
||||
return WalkState::Continue;
|
||||
}
|
||||
};
|
||||
if search_result.has_match() {
|
||||
matched.store(true, SeqCst);
|
||||
}
|
||||
if let Some(ref locked_stats) = *stats {
|
||||
let mut stats = locked_stats.lock().unwrap();
|
||||
*stats += search_result.stats().unwrap();
|
||||
}
|
||||
if let Err(err) = bufwtr.print(searcher.printer().get_mut()) {
|
||||
// A broken pipe means graceful termination.
|
||||
if err.kind() == io::ErrorKind::BrokenPipe {
|
||||
return WalkState::Quit;
|
||||
}
|
||||
// Otherwise, we continue on our merry way.
|
||||
message!("{}: {}", subject.path().display(), err);
|
||||
}
|
||||
if matched.load(SeqCst) && quit_after_match {
|
||||
WalkState::Quit
|
||||
} else {
|
||||
WalkState::Continue
|
||||
}
|
||||
// BUG(burntsushi): We should handle this error instead of ignoring
|
||||
// it. See: https://github.com/BurntSushi/ripgrep/issues/200
|
||||
let _ = bufwtr.print(&buf);
|
||||
Continue
|
||||
})
|
||||
});
|
||||
if paths_searched.load(Ordering::SeqCst) == 0 {
|
||||
eprint_nothing_searched();
|
||||
if let Some(err) = searcher_err.take() {
|
||||
return Err(err);
|
||||
}
|
||||
let match_line_count = match_line_count.load(Ordering::SeqCst) as u64;
|
||||
let paths_searched = paths_searched.load(Ordering::SeqCst) as u64;
|
||||
let paths_matched = paths_matched.load(Ordering::SeqCst) as u64;
|
||||
if args.stats() {
|
||||
print_stats(
|
||||
match_line_count,
|
||||
paths_searched,
|
||||
paths_matched,
|
||||
start_time.elapsed(),
|
||||
);
|
||||
if let Some(ref locked_stats) = *stats {
|
||||
let elapsed = Instant::now().duration_since(started_at);
|
||||
let stats = locked_stats.lock().unwrap();
|
||||
let mut searcher = args.search_worker(args.stdout())?;
|
||||
// We don't care if we couldn't print this successfully.
|
||||
let _ = searcher.printer().print_stats(elapsed, &stats);
|
||||
}
|
||||
Ok(match_line_count)
|
||||
Ok(matched.load(SeqCst))
|
||||
}
|
||||
|
||||
fn run_one_thread(args: &Arc<Args>) -> Result<u64> {
|
||||
let start_time = Instant::now();
|
||||
let mut stdout = args.stdout();
|
||||
let mut worker = args.worker();
|
||||
let mut paths_searched: u64 = 0;
|
||||
let mut match_line_count = 0;
|
||||
let mut paths_matched: u64 = 0;
|
||||
for result in args.walker() {
|
||||
let dent = match get_or_log_dir_entry(
|
||||
result,
|
||||
args.stdout_handle(),
|
||||
args.files(),
|
||||
) {
|
||||
/// The top-level entry point for listing files without searching them. This
|
||||
/// recursively steps through the file list (current directory by default) and
|
||||
/// prints each path sequentially using a single thread.
|
||||
fn files(args: Args) -> Result<bool> {
|
||||
let quit_after_match = args.quit_after_match()?;
|
||||
let subject_builder = args.subject_builder();
|
||||
let mut matched = false;
|
||||
let mut path_printer = args.path_printer(args.stdout())?;
|
||||
for result in args.walker()? {
|
||||
let subject = match subject_builder.build_from_result(result) {
|
||||
Some(subject) => subject,
|
||||
None => continue,
|
||||
Some(dent) => dent,
|
||||
};
|
||||
let mut printer = args.printer(&mut stdout);
|
||||
if match_line_count > 0 {
|
||||
if args.quiet() {
|
||||
matched = true;
|
||||
if quit_after_match {
|
||||
break;
|
||||
}
|
||||
if let Err(err) = path_printer.write_path(subject.path()) {
|
||||
// A broken pipe means graceful termination.
|
||||
if err.kind() == io::ErrorKind::BrokenPipe {
|
||||
break;
|
||||
}
|
||||
if let Some(sep) = args.file_separator() {
|
||||
printer = printer.file_separator(sep);
|
||||
}
|
||||
}
|
||||
paths_searched += 1;
|
||||
let count =
|
||||
if dent.is_stdin() {
|
||||
worker.run(&mut printer, Work::Stdin)
|
||||
} else {
|
||||
worker.run(&mut printer, Work::DirEntry(dent))
|
||||
};
|
||||
match_line_count += count;
|
||||
if args.stats() && count > 0 {
|
||||
paths_matched += 1;
|
||||
// Otherwise, we have some other error that's preventing us from
|
||||
// writing to stdout, so we should bubble it up.
|
||||
return Err(err.into());
|
||||
}
|
||||
}
|
||||
if paths_searched == 0 {
|
||||
eprint_nothing_searched();
|
||||
}
|
||||
if args.stats() {
|
||||
print_stats(
|
||||
match_line_count,
|
||||
paths_searched,
|
||||
paths_matched,
|
||||
start_time.elapsed(),
|
||||
);
|
||||
}
|
||||
Ok(match_line_count)
|
||||
Ok(matched)
|
||||
}
|
||||
|
||||
fn run_files_parallel(args: Arc<Args>) -> Result<u64> {
|
||||
let print_args = Arc::clone(&args);
|
||||
let (tx, rx) = mpsc::channel::<ignore::DirEntry>();
|
||||
let print_thread = thread::spawn(move || {
|
||||
let mut printer = print_args.printer(print_args.stdout());
|
||||
let mut file_count = 0;
|
||||
for dent in rx.iter() {
|
||||
if !print_args.quiet() {
|
||||
printer.path(dent.path());
|
||||
}
|
||||
file_count += 1;
|
||||
/// The top-level entry point for listing files without searching them. This
|
||||
/// recursively steps through the file list (current directory by default) and
|
||||
/// prints each path sequentially using multiple threads.
|
||||
fn files_parallel(args: Args) -> Result<bool> {
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering::SeqCst;
|
||||
use std::sync::mpsc;
|
||||
use std::thread;
|
||||
|
||||
let quit_after_match = args.quit_after_match()?;
|
||||
let subject_builder = Arc::new(args.subject_builder());
|
||||
let mut path_printer = args.path_printer(args.stdout())?;
|
||||
let matched = Arc::new(AtomicBool::new(false));
|
||||
let (tx, rx) = mpsc::channel::<Subject>();
|
||||
|
||||
let print_thread = thread::spawn(move || -> io::Result<()> {
|
||||
for subject in rx.iter() {
|
||||
path_printer.write_path(subject.path())?;
|
||||
}
|
||||
file_count
|
||||
Ok(())
|
||||
});
|
||||
args.walker_parallel().run(move || {
|
||||
let args = Arc::clone(&args);
|
||||
args.walker_parallel()?.run(|| {
|
||||
let subject_builder = Arc::clone(&subject_builder);
|
||||
let matched = Arc::clone(&matched);
|
||||
let tx = tx.clone();
|
||||
|
||||
Box::new(move |result| {
|
||||
if let Some(dent) = get_or_log_dir_entry(
|
||||
result,
|
||||
args.stdout_handle(),
|
||||
args.files(),
|
||||
) {
|
||||
tx.send(dent).unwrap();
|
||||
if args.quiet() {
|
||||
return ignore::WalkState::Quit
|
||||
let subject = match subject_builder.build_from_result(result) {
|
||||
Some(subject) => subject,
|
||||
None => return WalkState::Continue,
|
||||
};
|
||||
matched.store(true, SeqCst);
|
||||
if quit_after_match {
|
||||
WalkState::Quit
|
||||
} else {
|
||||
match tx.send(subject) {
|
||||
Ok(_) => WalkState::Continue,
|
||||
Err(_) => WalkState::Quit,
|
||||
}
|
||||
}
|
||||
ignore::WalkState::Continue
|
||||
})
|
||||
});
|
||||
Ok(print_thread.join().unwrap())
|
||||
}
|
||||
|
||||
fn run_files_one_thread(args: &Arc<Args>) -> Result<u64> {
|
||||
let mut printer = args.printer(args.stdout());
|
||||
let mut file_count = 0;
|
||||
for result in args.walker() {
|
||||
let dent = match get_or_log_dir_entry(
|
||||
result,
|
||||
args.stdout_handle(),
|
||||
args.files(),
|
||||
) {
|
||||
None => continue,
|
||||
Some(dent) => dent,
|
||||
};
|
||||
file_count += 1;
|
||||
if args.quiet() {
|
||||
break;
|
||||
} else {
|
||||
printer.path(dent.path());
|
||||
drop(tx);
|
||||
if let Err(err) = print_thread.join().unwrap() {
|
||||
// A broken pipe means graceful termination, so fall through.
|
||||
// Otherwise, something bad happened while writing to stdout, so bubble
|
||||
// it up.
|
||||
if err.kind() != io::ErrorKind::BrokenPipe {
|
||||
return Err(err.into());
|
||||
}
|
||||
}
|
||||
Ok(file_count)
|
||||
Ok(matched.load(SeqCst))
|
||||
}
|
||||
|
||||
fn run_types(args: &Arc<Args>) -> Result<u64> {
|
||||
let mut printer = args.printer(args.stdout());
|
||||
let mut ty_count = 0;
|
||||
for def in args.type_defs() {
|
||||
printer.type_def(def);
|
||||
ty_count += 1;
|
||||
}
|
||||
Ok(ty_count)
|
||||
}
|
||||
/// The top-level entry point for --type-list.
|
||||
fn types(args: Args) -> Result<bool> {
|
||||
let mut count = 0;
|
||||
let mut stdout = args.stdout();
|
||||
for def in args.type_defs()? {
|
||||
count += 1;
|
||||
stdout.write_all(def.name().as_bytes())?;
|
||||
stdout.write_all(b": ")?;
|
||||
|
||||
fn get_or_log_dir_entry(
|
||||
result: result::Result<ignore::DirEntry, ignore::Error>,
|
||||
stdout_handle: Option<&same_file::Handle>,
|
||||
files_only: bool,
|
||||
) -> Option<ignore::DirEntry> {
|
||||
match result {
|
||||
Err(err) => {
|
||||
message!("{}", err);
|
||||
None
|
||||
}
|
||||
Ok(dent) => {
|
||||
if let Some(err) = dent.error() {
|
||||
ignore_message!("{}", err);
|
||||
}
|
||||
if dent.file_type().is_none() {
|
||||
return Some(dent); // entry is stdin
|
||||
}
|
||||
// A depth of 0 means the user gave the path explicitly, so we
|
||||
// should always try to search it.
|
||||
if dent.depth() == 0 && !ignore_entry_is_dir(&dent) {
|
||||
return Some(dent);
|
||||
} else if !ignore_entry_is_file(&dent) {
|
||||
return None;
|
||||
}
|
||||
// If we are redirecting stdout to a file, then don't search that
|
||||
// file.
|
||||
if !files_only && is_stdout_file(&dent, stdout_handle) {
|
||||
return None;
|
||||
}
|
||||
Some(dent)
|
||||
let mut first = true;
|
||||
for glob in def.globs() {
|
||||
if !first {
|
||||
stdout.write_all(b", ")?;
|
||||
}
|
||||
stdout.write_all(glob.as_bytes())?;
|
||||
first = false;
|
||||
}
|
||||
stdout.write_all(b"\n")?;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given `ignore::DirEntry` points to a
|
||||
/// directory.
|
||||
///
|
||||
/// This works around a bug in Rust's standard library:
|
||||
/// https://github.com/rust-lang/rust/issues/46484
|
||||
#[cfg(windows)]
|
||||
fn ignore_entry_is_dir(dent: &ignore::DirEntry) -> bool {
|
||||
use std::os::windows::fs::MetadataExt;
|
||||
use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY;
|
||||
|
||||
dent.metadata().map(|md| {
|
||||
md.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0
|
||||
}).unwrap_or(false)
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given `ignore::DirEntry` points to a
|
||||
/// directory.
|
||||
#[cfg(not(windows))]
|
||||
fn ignore_entry_is_dir(dent: &ignore::DirEntry) -> bool {
|
||||
dent.file_type().map_or(false, |ft| ft.is_dir())
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given `ignore::DirEntry` points to a
|
||||
/// file.
|
||||
///
|
||||
/// This works around a bug in Rust's standard library:
|
||||
/// https://github.com/rust-lang/rust/issues/46484
|
||||
#[cfg(windows)]
|
||||
fn ignore_entry_is_file(dent: &ignore::DirEntry) -> bool {
|
||||
!ignore_entry_is_dir(dent)
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given `ignore::DirEntry` points to a
|
||||
/// file.
|
||||
#[cfg(not(windows))]
|
||||
fn ignore_entry_is_file(dent: &ignore::DirEntry) -> bool {
|
||||
dent.file_type().map_or(false, |ft| ft.is_file())
|
||||
}
|
||||
|
||||
fn is_stdout_file(
|
||||
dent: &ignore::DirEntry,
|
||||
stdout_handle: Option<&same_file::Handle>,
|
||||
) -> bool {
|
||||
let stdout_handle = match stdout_handle {
|
||||
None => return false,
|
||||
Some(stdout_handle) => stdout_handle,
|
||||
};
|
||||
// If we know for sure that these two things aren't equal, then avoid
|
||||
// the costly extra stat call to determine equality.
|
||||
if !maybe_dent_eq_handle(dent, stdout_handle) {
|
||||
return false;
|
||||
}
|
||||
match same_file::Handle::from_path(dent.path()) {
|
||||
Ok(h) => stdout_handle == &h,
|
||||
Err(err) => {
|
||||
message!("{}: {}", dent.path().display(), err);
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn maybe_dent_eq_handle(
|
||||
dent: &ignore::DirEntry,
|
||||
handle: &same_file::Handle,
|
||||
) -> bool {
|
||||
dent.ino() == Some(handle.ino())
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn maybe_dent_eq_handle(_: &ignore::DirEntry, _: &same_file::Handle) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn eprint_nothing_searched() {
|
||||
message!(
|
||||
"No files were searched, which means ripgrep probably \
|
||||
applied a filter you didn't expect. \
|
||||
Try running again with --debug.");
|
||||
}
|
||||
|
||||
fn print_stats(
|
||||
match_count: u64,
|
||||
paths_searched: u64,
|
||||
paths_matched: u64,
|
||||
time_elapsed: Duration,
|
||||
) {
|
||||
let time_elapsed =
|
||||
time_elapsed.as_secs() as f64
|
||||
+ (time_elapsed.subsec_nanos() as f64 * 1e-9);
|
||||
println!("\n{} matched lines\n\
|
||||
{} files contained matches\n\
|
||||
{} files searched\n\
|
||||
{:.3} seconds", match_count, paths_matched,
|
||||
paths_searched, time_elapsed);
|
||||
}
|
||||
|
||||
// The Rust standard library suppresses the default SIGPIPE behavior, so that
|
||||
// writing to a closed pipe doesn't kill the process. The goal is to instead
|
||||
// handle errors through the normal result mechanism. Ripgrep needs some
|
||||
// refactoring before it will be able to do that, however, so we re-enable the
|
||||
// standard SIGPIPE behavior as a workaround. See
|
||||
// https://github.com/BurntSushi/ripgrep/issues/200.
|
||||
#[cfg(unix)]
|
||||
fn reset_sigpipe() {
|
||||
unsafe {
|
||||
libc::signal(libc::SIGPIPE, libc::SIG_DFL);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn reset_sigpipe() {
|
||||
// no-op
|
||||
Ok(count > 0)
|
||||
}
|
||||
|
263
src/main2.rs
263
src/main2.rs
@ -1,263 +0,0 @@
|
||||
use std::io;
|
||||
use std::process;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::time::Instant;
|
||||
|
||||
use grep2::printer::Stats;
|
||||
use ignore::WalkState;
|
||||
|
||||
use args2::Args;
|
||||
use subject::Subject;
|
||||
use Result;
|
||||
|
||||
pub fn main2() {
|
||||
match Args::parse().and_then(run) {
|
||||
Ok(false) => process::exit(1),
|
||||
Ok(true) => process::exit(0),
|
||||
Err(err) => {
|
||||
eprintln!("{}", err);
|
||||
process::exit(2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn run(args: Args) -> Result<bool> {
|
||||
use args2::Command::*;
|
||||
|
||||
match args.command()? {
|
||||
Search => search(args),
|
||||
SearchParallel => search_parallel(args),
|
||||
SearchNever => Ok(false),
|
||||
Files => files(args),
|
||||
FilesParallel => files_parallel(args),
|
||||
Types => types(args),
|
||||
}
|
||||
}
|
||||
|
||||
/// The top-level entry point for single-threaded search. This recursively
|
||||
/// steps through the file list (current directory by default) and searches
|
||||
/// each file sequentially.
|
||||
fn search(args: Args) -> Result<bool> {
|
||||
let started_at = Instant::now();
|
||||
let quit_after_match = args.quit_after_match()?;
|
||||
let subject_builder = args.subject_builder();
|
||||
let mut stats = args.stats()?;
|
||||
let mut searcher = args.search_worker(args.stdout())?;
|
||||
let mut matched = false;
|
||||
|
||||
for result in args.walker()? {
|
||||
let subject = match subject_builder.build_from_result(result) {
|
||||
Some(subject) => subject,
|
||||
None => continue,
|
||||
};
|
||||
let search_result = match searcher.search(&subject) {
|
||||
Ok(search_result) => search_result,
|
||||
Err(err) => {
|
||||
// A broken pipe means graceful termination.
|
||||
if err.kind() == io::ErrorKind::BrokenPipe {
|
||||
break;
|
||||
}
|
||||
message!("{}: {}", subject.path().display(), err);
|
||||
continue;
|
||||
}
|
||||
};
|
||||
matched = matched || search_result.has_match();
|
||||
if let Some(ref mut stats) = stats {
|
||||
*stats += search_result.stats().unwrap();
|
||||
}
|
||||
if matched && quit_after_match {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if let Some(ref stats) = stats {
|
||||
let elapsed = Instant::now().duration_since(started_at);
|
||||
// We don't care if we couldn't print this successfully.
|
||||
let _ = searcher.printer().print_stats(elapsed, stats);
|
||||
}
|
||||
Ok(matched)
|
||||
}
|
||||
|
||||
/// The top-level entry point for multi-threaded search. The parallelism is
|
||||
/// itself achieved by the recursive directory traversal. All we need to do is
|
||||
/// feed it a worker for performing a search on each file.
|
||||
fn search_parallel(args: Args) -> Result<bool> {
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering::SeqCst;
|
||||
|
||||
let quit_after_match = args.quit_after_match()?;
|
||||
let started_at = Instant::now();
|
||||
let subject_builder = Arc::new(args.subject_builder());
|
||||
let bufwtr = Arc::new(args.buffer_writer()?);
|
||||
let stats = Arc::new(args.stats()?.map(Mutex::new));
|
||||
let matched = Arc::new(AtomicBool::new(false));
|
||||
let mut searcher_err = None;
|
||||
args.walker_parallel()?.run(|| {
|
||||
let args = args.clone();
|
||||
let bufwtr = Arc::clone(&bufwtr);
|
||||
let stats = Arc::clone(&stats);
|
||||
let matched = Arc::clone(&matched);
|
||||
let subject_builder = Arc::clone(&subject_builder);
|
||||
let mut searcher = match args.search_worker(bufwtr.buffer()) {
|
||||
Ok(searcher) => searcher,
|
||||
Err(err) => {
|
||||
searcher_err = Some(err);
|
||||
return Box::new(move |_| {
|
||||
WalkState::Quit
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
Box::new(move |result| {
|
||||
let subject = match subject_builder.build_from_result(result) {
|
||||
Some(subject) => subject,
|
||||
None => return WalkState::Continue,
|
||||
};
|
||||
searcher.printer().get_mut().clear();
|
||||
let search_result = match searcher.search(&subject) {
|
||||
Ok(search_result) => search_result,
|
||||
Err(err) => {
|
||||
message!("{}: {}", subject.path().display(), err);
|
||||
return WalkState::Continue;
|
||||
}
|
||||
};
|
||||
if search_result.has_match() {
|
||||
matched.store(true, SeqCst);
|
||||
}
|
||||
if let Some(ref locked_stats) = *stats {
|
||||
let mut stats = locked_stats.lock().unwrap();
|
||||
*stats += search_result.stats().unwrap();
|
||||
}
|
||||
if let Err(err) = bufwtr.print(searcher.printer().get_mut()) {
|
||||
// A broken pipe means graceful termination.
|
||||
if err.kind() == io::ErrorKind::BrokenPipe {
|
||||
return WalkState::Quit;
|
||||
}
|
||||
// Otherwise, we continue on our merry way.
|
||||
message!("{}: {}", subject.path().display(), err);
|
||||
}
|
||||
if matched.load(SeqCst) && quit_after_match {
|
||||
WalkState::Quit
|
||||
} else {
|
||||
WalkState::Continue
|
||||
}
|
||||
})
|
||||
});
|
||||
if let Some(err) = searcher_err.take() {
|
||||
return Err(err);
|
||||
}
|
||||
if let Some(ref locked_stats) = *stats {
|
||||
let elapsed = Instant::now().duration_since(started_at);
|
||||
let stats = locked_stats.lock().unwrap();
|
||||
let mut searcher = args.search_worker(args.stdout())?;
|
||||
// We don't care if we couldn't print this successfully.
|
||||
let _ = searcher.printer().print_stats(elapsed, &stats);
|
||||
}
|
||||
Ok(matched.load(SeqCst))
|
||||
}
|
||||
|
||||
/// The top-level entry point for listing files without searching them. This
|
||||
/// recursively steps through the file list (current directory by default) and
|
||||
/// prints each path sequentially using a single thread.
|
||||
fn files(args: Args) -> Result<bool> {
|
||||
let quit_after_match = args.quit_after_match()?;
|
||||
let subject_builder = args.subject_builder();
|
||||
let mut matched = false;
|
||||
let mut path_printer = args.path_printer(args.stdout())?;
|
||||
for result in args.walker()? {
|
||||
let subject = match subject_builder.build_from_result(result) {
|
||||
Some(subject) => subject,
|
||||
None => continue,
|
||||
};
|
||||
matched = true;
|
||||
if quit_after_match {
|
||||
break;
|
||||
}
|
||||
if let Err(err) = path_printer.write_path(subject.path()) {
|
||||
// A broken pipe means graceful termination.
|
||||
if err.kind() == io::ErrorKind::BrokenPipe {
|
||||
break;
|
||||
}
|
||||
// Otherwise, we have some other error that's preventing us from
|
||||
// writing to stdout, so we should bubble it up.
|
||||
return Err(err.into());
|
||||
}
|
||||
}
|
||||
Ok(matched)
|
||||
}
|
||||
|
||||
/// The top-level entry point for listing files without searching them. This
|
||||
/// recursively steps through the file list (current directory by default) and
|
||||
/// prints each path sequentially using multiple threads.
|
||||
fn files_parallel(args: Args) -> Result<bool> {
|
||||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::atomic::Ordering::SeqCst;
|
||||
use std::sync::mpsc;
|
||||
use std::thread;
|
||||
|
||||
let quit_after_match = args.quit_after_match()?;
|
||||
let subject_builder = Arc::new(args.subject_builder());
|
||||
let mut path_printer = args.path_printer(args.stdout())?;
|
||||
let matched = Arc::new(AtomicBool::new(false));
|
||||
let (tx, rx) = mpsc::channel::<Subject>();
|
||||
|
||||
let print_thread = thread::spawn(move || -> io::Result<()> {
|
||||
for subject in rx.iter() {
|
||||
path_printer.write_path(subject.path())?;
|
||||
}
|
||||
Ok(())
|
||||
});
|
||||
args.walker_parallel()?.run(|| {
|
||||
let args = args.clone();
|
||||
let subject_builder = Arc::clone(&subject_builder);
|
||||
let matched = Arc::clone(&matched);
|
||||
let tx = tx.clone();
|
||||
|
||||
Box::new(move |result| {
|
||||
let subject = match subject_builder.build_from_result(result) {
|
||||
Some(subject) => subject,
|
||||
None => return WalkState::Continue,
|
||||
};
|
||||
matched.store(true, SeqCst);
|
||||
if quit_after_match {
|
||||
WalkState::Quit
|
||||
} else {
|
||||
match tx.send(subject) {
|
||||
Ok(_) => WalkState::Continue,
|
||||
Err(_) => WalkState::Quit,
|
||||
}
|
||||
}
|
||||
})
|
||||
});
|
||||
drop(tx);
|
||||
if let Err(err) = print_thread.join().unwrap() {
|
||||
// A broken pipe means graceful termination, so fall through.
|
||||
// Otherwise, something bad happened while writing to stdout, so bubble
|
||||
// it up.
|
||||
if err.kind() != io::ErrorKind::BrokenPipe {
|
||||
return Err(err.into());
|
||||
}
|
||||
}
|
||||
Ok(matched.load(SeqCst))
|
||||
}
|
||||
|
||||
/// The top-level entry point for --type-list.
|
||||
fn types(args: Args) -> Result<bool> {
|
||||
let mut count = 0;
|
||||
let mut stdout = args.stdout();
|
||||
for def in args.type_defs()? {
|
||||
count += 1;
|
||||
stdout.write_all(def.name().as_bytes())?;
|
||||
stdout.write_all(b": ")?;
|
||||
|
||||
let mut first = true;
|
||||
for glob in def.globs() {
|
||||
if !first {
|
||||
stdout.write_all(b", ")?;
|
||||
}
|
||||
stdout.write_all(glob.as_bytes())?;
|
||||
first = false;
|
||||
}
|
||||
stdout.write_all(b"\n")?;
|
||||
}
|
||||
Ok(count > 0)
|
||||
}
|
@ -1,7 +1,7 @@
|
||||
use std::io;
|
||||
use std::path::Path;
|
||||
|
||||
use grep2::printer::{ColorSpecs, PrinterPath};
|
||||
use grep::printer::{ColorSpecs, PrinterPath};
|
||||
use termcolor::WriteColor;
|
||||
|
||||
/// A configuration for describing how paths should be written.
|
||||
|
@ -1,39 +0,0 @@
|
||||
/*!
|
||||
The pathutil module provides platform specific operations on paths that are
|
||||
typically faster than the same operations as provided in `std::path`. In
|
||||
particular, we really want to avoid the costly operation of parsing the path
|
||||
into its constituent components. We give up on Windows, but on Unix, we deal
|
||||
with the raw bytes directly.
|
||||
*/
|
||||
use std::path::Path;
|
||||
|
||||
/// Strip `prefix` from the `path` and return the remainder.
|
||||
///
|
||||
/// If `path` doesn't have a prefix `prefix`, then return `None`.
|
||||
#[cfg(unix)]
|
||||
pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
|
||||
prefix: &'a P,
|
||||
path: &'a Path,
|
||||
) -> Option<&'a Path> {
|
||||
use std::ffi::OsStr;
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
|
||||
let prefix = prefix.as_ref().as_os_str().as_bytes();
|
||||
let path = path.as_os_str().as_bytes();
|
||||
if prefix.len() > path.len() || prefix != &path[0..prefix.len()] {
|
||||
None
|
||||
} else {
|
||||
Some(Path::new(OsStr::from_bytes(&path[prefix.len()..])))
|
||||
}
|
||||
}
|
||||
|
||||
/// Strip `prefix` from the `path` and return the remainder.
|
||||
///
|
||||
/// If `path` doesn't have a prefix `prefix`, then return `None`.
|
||||
#[cfg(not(unix))]
|
||||
pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
|
||||
prefix: &'a P,
|
||||
path: &'a Path,
|
||||
) -> Option<&'a Path> {
|
||||
path.strip_prefix(prefix).ok()
|
||||
}
|
928
src/printer.rs
928
src/printer.rs
@ -1,928 +0,0 @@
|
||||
use std::error;
|
||||
use std::fmt;
|
||||
use std::path::Path;
|
||||
use std::str::FromStr;
|
||||
|
||||
use regex::bytes::{Captures, Match, Regex, Replacer};
|
||||
use termcolor::{Color, ColorSpec, ParseColorError, WriteColor};
|
||||
|
||||
use pathutil::strip_prefix;
|
||||
use ignore::types::FileTypeDef;
|
||||
|
||||
/// Track the start and end of replacements to allow coloring them on output.
|
||||
#[derive(Debug)]
|
||||
struct Offset {
|
||||
start: usize,
|
||||
end: usize,
|
||||
}
|
||||
|
||||
impl Offset {
|
||||
fn new(start: usize, end: usize) -> Offset {
|
||||
Offset { start: start, end: end }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'m, 'r> From<&'m Match<'r>> for Offset {
|
||||
fn from(m: &'m Match<'r>) -> Self {
|
||||
Offset{ start: m.start(), end: m.end() }
|
||||
}
|
||||
}
|
||||
|
||||
/// `CountingReplacer` implements the Replacer interface for Regex,
|
||||
/// and counts how often replacement is being performed.
|
||||
struct CountingReplacer<'r> {
|
||||
replace: &'r [u8],
|
||||
count: &'r mut usize,
|
||||
offsets: &'r mut Vec<Offset>,
|
||||
}
|
||||
|
||||
impl<'r> CountingReplacer<'r> {
|
||||
fn new(
|
||||
replace: &'r [u8],
|
||||
count: &'r mut usize,
|
||||
offsets: &'r mut Vec<Offset>,
|
||||
) -> CountingReplacer<'r> {
|
||||
CountingReplacer { replace: replace, count: count, offsets: offsets, }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'r> Replacer for CountingReplacer<'r> {
|
||||
fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
|
||||
*self.count += 1;
|
||||
let start = dst.len();
|
||||
caps.expand(self.replace, dst);
|
||||
let end = dst.len();
|
||||
if start != end {
|
||||
self.offsets.push(Offset::new(start, end));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Printer encapsulates all output logic for searching.
|
||||
///
|
||||
/// Note that we currently ignore all write errors. It's probably worthwhile
|
||||
/// to fix this, but printers are only ever used for writes to stdout or
|
||||
/// writes to memory, neither of which commonly fail.
|
||||
pub struct Printer<W> {
|
||||
/// The underlying writer.
|
||||
wtr: W,
|
||||
/// Whether anything has been printed to wtr yet.
|
||||
has_printed: bool,
|
||||
/// Whether to show column numbers for the first match or not.
|
||||
column: bool,
|
||||
/// The string to use to separate non-contiguous runs of context lines.
|
||||
context_separator: Vec<u8>,
|
||||
/// The end-of-line terminator used by the printer. In general, eols are
|
||||
/// printed via the match directly, but occasionally we need to insert them
|
||||
/// ourselves (for example, to print a context separator).
|
||||
eol: u8,
|
||||
/// A file separator to show before any matches are printed.
|
||||
file_separator: Option<Vec<u8>>,
|
||||
/// Whether to show file name as a heading or not.
|
||||
///
|
||||
/// N.B. If with_filename is false, then this setting has no effect.
|
||||
heading: bool,
|
||||
/// Whether to show every match on its own line.
|
||||
line_per_match: bool,
|
||||
/// Whether to print NUL bytes after a file path instead of new lines
|
||||
/// or `:`.
|
||||
null: bool,
|
||||
/// Print only the matched (non-empty) parts of a matching line
|
||||
only_matching: bool,
|
||||
/// A string to use as a replacement of each match in a matching line.
|
||||
replace: Option<Vec<u8>>,
|
||||
/// Whether to prefix each match with the corresponding file name.
|
||||
with_filename: bool,
|
||||
/// The color specifications.
|
||||
colors: ColorSpecs,
|
||||
/// The separator to use for file paths. If empty, this is ignored.
|
||||
path_separator: Option<u8>,
|
||||
/// Restrict lines to this many columns.
|
||||
max_columns: Option<usize>,
|
||||
}
|
||||
|
||||
impl<W: WriteColor> Printer<W> {
|
||||
/// Create a new printer that writes to wtr with the given color settings.
|
||||
pub fn new(wtr: W) -> Printer<W> {
|
||||
Printer {
|
||||
wtr: wtr,
|
||||
has_printed: false,
|
||||
column: false,
|
||||
context_separator: "--".to_string().into_bytes(),
|
||||
eol: b'\n',
|
||||
file_separator: None,
|
||||
heading: false,
|
||||
line_per_match: false,
|
||||
null: false,
|
||||
only_matching: false,
|
||||
replace: None,
|
||||
with_filename: false,
|
||||
colors: ColorSpecs::default(),
|
||||
path_separator: None,
|
||||
max_columns: None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Set the color specifications.
|
||||
pub fn colors(mut self, colors: ColorSpecs) -> Printer<W> {
|
||||
self.colors = colors;
|
||||
self
|
||||
}
|
||||
|
||||
/// When set, column numbers will be printed for the first match on each
|
||||
/// line.
|
||||
pub fn column(mut self, yes: bool) -> Printer<W> {
|
||||
self.column = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the context separator. The default is `--`.
|
||||
pub fn context_separator(mut self, sep: Vec<u8>) -> Printer<W> {
|
||||
self.context_separator = sep;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the end-of-line terminator. The default is `\n`.
|
||||
pub fn eol(mut self, eol: u8) -> Printer<W> {
|
||||
self.eol = eol;
|
||||
self
|
||||
}
|
||||
|
||||
/// If set, the separator is printed before any matches. By default, no
|
||||
/// separator is printed.
|
||||
pub fn file_separator(mut self, sep: Vec<u8>) -> Printer<W> {
|
||||
self.file_separator = Some(sep);
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to show file name as a heading or not.
|
||||
///
|
||||
/// N.B. If with_filename is false, then this setting has no effect.
|
||||
pub fn heading(mut self, yes: bool) -> Printer<W> {
|
||||
self.heading = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to show every match on its own line.
|
||||
pub fn line_per_match(mut self, yes: bool) -> Printer<W> {
|
||||
self.line_per_match = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Whether to cause NUL bytes to follow file paths instead of other
|
||||
/// visual separators (like `:`, `-` and `\n`).
|
||||
pub fn null(mut self, yes: bool) -> Printer<W> {
|
||||
self.null = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Print only the matched (non-empty) parts of a matching line
|
||||
pub fn only_matching(mut self, yes: bool) -> Printer<W> {
|
||||
self.only_matching = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// A separator to use when printing file paths. When empty, use the
|
||||
/// default separator for the current platform. (/ on Unix, \ on Windows.)
|
||||
pub fn path_separator(mut self, sep: Option<u8>) -> Printer<W> {
|
||||
self.path_separator = sep;
|
||||
self
|
||||
}
|
||||
|
||||
/// Replace every match in each matching line with the replacement string
|
||||
/// given.
|
||||
pub fn replace(mut self, replacement: Vec<u8>) -> Printer<W> {
|
||||
self.replace = Some(replacement);
|
||||
self
|
||||
}
|
||||
|
||||
/// When set, each match is prefixed with the file name that it came from.
|
||||
pub fn with_filename(mut self, yes: bool) -> Printer<W> {
|
||||
self.with_filename = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Configure the max. number of columns used for printing matching lines.
|
||||
pub fn max_columns(mut self, max_columns: Option<usize>) -> Printer<W> {
|
||||
self.max_columns = max_columns;
|
||||
self
|
||||
}
|
||||
|
||||
/// Returns true if and only if something has been printed.
|
||||
pub fn has_printed(&self) -> bool {
|
||||
self.has_printed
|
||||
}
|
||||
|
||||
/// Flushes the underlying writer and returns it.
|
||||
#[allow(dead_code)]
|
||||
pub fn into_inner(mut self) -> W {
|
||||
let _ = self.wtr.flush();
|
||||
self.wtr
|
||||
}
|
||||
|
||||
/// Prints a type definition.
|
||||
pub fn type_def(&mut self, def: &FileTypeDef) {
|
||||
self.write(def.name().as_bytes());
|
||||
self.write(b": ");
|
||||
let mut first = true;
|
||||
for glob in def.globs() {
|
||||
if !first {
|
||||
self.write(b", ");
|
||||
}
|
||||
self.write(glob.as_bytes());
|
||||
first = false;
|
||||
}
|
||||
self.write_eol();
|
||||
}
|
||||
|
||||
/// Prints the given path.
|
||||
pub fn path<P: AsRef<Path>>(&mut self, path: P) {
|
||||
let path = strip_prefix("./", path.as_ref()).unwrap_or(path.as_ref());
|
||||
self.write_path(path);
|
||||
self.write_path_eol();
|
||||
}
|
||||
|
||||
/// Prints the given path and a count of the number of matches found.
|
||||
pub fn path_count<P: AsRef<Path>>(&mut self, path: P, count: u64) {
|
||||
if self.with_filename {
|
||||
self.write_path(path);
|
||||
self.write_path_sep(b':');
|
||||
}
|
||||
self.write(count.to_string().as_bytes());
|
||||
self.write_eol();
|
||||
}
|
||||
|
||||
/// Prints the context separator.
|
||||
pub fn context_separate(&mut self) {
|
||||
if self.context_separator.is_empty() {
|
||||
return;
|
||||
}
|
||||
let _ = self.wtr.write_all(&self.context_separator);
|
||||
self.write_eol();
|
||||
}
|
||||
|
||||
pub fn matched<P: AsRef<Path>>(
|
||||
&mut self,
|
||||
re: &Regex,
|
||||
path: P,
|
||||
buf: &[u8],
|
||||
start: usize,
|
||||
end: usize,
|
||||
line_number: Option<u64>,
|
||||
byte_offset: Option<u64>
|
||||
) {
|
||||
if !self.line_per_match && !self.only_matching {
|
||||
let mat =
|
||||
if !self.needs_match() {
|
||||
(0, 0)
|
||||
} else {
|
||||
re.find(&buf[start..end])
|
||||
.map(|m| (m.start(), m.end()))
|
||||
.unwrap_or((0, 0))
|
||||
};
|
||||
return self.write_match(
|
||||
re, path, buf, start, end, line_number,
|
||||
byte_offset, mat.0, mat.1);
|
||||
}
|
||||
for m in re.find_iter(&buf[start..end]) {
|
||||
self.write_match(
|
||||
re, path.as_ref(), buf, start, end, line_number,
|
||||
byte_offset, m.start(), m.end());
|
||||
}
|
||||
}
|
||||
|
||||
fn needs_match(&self) -> bool {
|
||||
self.column
|
||||
|| self.replace.is_some()
|
||||
|| self.only_matching
|
||||
}
|
||||
|
||||
fn write_match<P: AsRef<Path>>(
|
||||
&mut self,
|
||||
re: &Regex,
|
||||
path: P,
|
||||
buf: &[u8],
|
||||
start: usize,
|
||||
end: usize,
|
||||
line_number: Option<u64>,
|
||||
byte_offset: Option<u64>,
|
||||
match_start: usize,
|
||||
match_end: usize,
|
||||
) {
|
||||
if self.heading && self.with_filename && !self.has_printed {
|
||||
self.write_file_sep();
|
||||
self.write_path(path);
|
||||
self.write_path_eol();
|
||||
} else if !self.heading && self.with_filename {
|
||||
self.write_path(path);
|
||||
self.write_path_sep(b':');
|
||||
}
|
||||
if let Some(line_number) = line_number {
|
||||
self.line_number(line_number, b':');
|
||||
}
|
||||
if self.column {
|
||||
self.column_number(match_start as u64 + 1, b':');
|
||||
}
|
||||
if let Some(byte_offset) = byte_offset {
|
||||
if self.only_matching {
|
||||
self.write_byte_offset(
|
||||
byte_offset + ((start + match_start) as u64), b':');
|
||||
} else {
|
||||
self.write_byte_offset(byte_offset + (start as u64), b':');
|
||||
}
|
||||
}
|
||||
if self.replace.is_some() {
|
||||
let mut count = 0;
|
||||
let mut offsets = Vec::new();
|
||||
let line = {
|
||||
let replacer = CountingReplacer::new(
|
||||
self.replace.as_ref().unwrap(), &mut count, &mut offsets);
|
||||
if self.only_matching {
|
||||
re.replace_all(
|
||||
&buf[start + match_start..start + match_end], replacer)
|
||||
} else {
|
||||
re.replace_all(&buf[start..end], replacer)
|
||||
}
|
||||
};
|
||||
if self.max_columns.map_or(false, |m| line.len() > m) {
|
||||
let msg = format!(
|
||||
"[Omitted long line with {} replacements]", count);
|
||||
self.write_colored(msg.as_bytes(), |colors| colors.matched());
|
||||
self.write_eol();
|
||||
return;
|
||||
}
|
||||
self.write_matched_line(offsets, &*line, false);
|
||||
} else {
|
||||
let buf = if self.only_matching {
|
||||
&buf[start + match_start..start + match_end]
|
||||
} else {
|
||||
&buf[start..end]
|
||||
};
|
||||
if self.max_columns.map_or(false, |m| buf.len() > m) {
|
||||
let count = re.find_iter(buf).count();
|
||||
let msg = format!("[Omitted long line with {} matches]", count);
|
||||
self.write_colored(msg.as_bytes(), |colors| colors.matched());
|
||||
self.write_eol();
|
||||
return;
|
||||
}
|
||||
let only_match = self.only_matching;
|
||||
self.write_matched_line(
|
||||
re.find_iter(buf).map(|x| Offset::from(&x)), buf, only_match);
|
||||
}
|
||||
}
|
||||
|
||||
fn write_matched_line<I>(&mut self, offsets: I, buf: &[u8], only_match: bool)
|
||||
where I: IntoIterator<Item=Offset>,
|
||||
{
|
||||
if !self.wtr.supports_color() || self.colors.matched().is_none() {
|
||||
self.write(buf);
|
||||
} else if only_match {
|
||||
self.write_colored(buf, |colors| colors.matched());
|
||||
} else {
|
||||
let mut last_written = 0;
|
||||
for o in offsets {
|
||||
self.write(&buf[last_written..o.start]);
|
||||
// This conditional checks if the match is both empty *and*
|
||||
// past the end of the line. In this case, we never want to
|
||||
// emit an additional color escape.
|
||||
if o.start != o.end || o.end != buf.len() {
|
||||
self.write_colored(
|
||||
&buf[o.start..o.end], |colors| colors.matched());
|
||||
}
|
||||
last_written = o.end;
|
||||
}
|
||||
self.write(&buf[last_written..]);
|
||||
}
|
||||
if buf.last() != Some(&self.eol) {
|
||||
self.write_eol();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn context<P: AsRef<Path>>(
|
||||
&mut self,
|
||||
path: P,
|
||||
buf: &[u8],
|
||||
start: usize,
|
||||
end: usize,
|
||||
line_number: Option<u64>,
|
||||
byte_offset: Option<u64>,
|
||||
) {
|
||||
if self.heading && self.with_filename && !self.has_printed {
|
||||
self.write_file_sep();
|
||||
self.write_path(path);
|
||||
self.write_path_eol();
|
||||
} else if !self.heading && self.with_filename {
|
||||
self.write_path(path);
|
||||
self.write_path_sep(b'-');
|
||||
}
|
||||
if let Some(line_number) = line_number {
|
||||
self.line_number(line_number, b'-');
|
||||
}
|
||||
if let Some(byte_offset) = byte_offset {
|
||||
self.write_byte_offset(byte_offset + (start as u64), b'-');
|
||||
}
|
||||
if self.max_columns.map_or(false, |m| end - start > m) {
|
||||
self.write(b"[Omitted long context line]");
|
||||
self.write_eol();
|
||||
return;
|
||||
}
|
||||
self.write(&buf[start..end]);
|
||||
if buf[start..end].last() != Some(&self.eol) {
|
||||
self.write_eol();
|
||||
}
|
||||
}
|
||||
|
||||
fn separator(&mut self, sep: &[u8]) {
|
||||
self.write(sep);
|
||||
}
|
||||
|
||||
fn write_path_sep(&mut self, sep: u8) {
|
||||
if self.null {
|
||||
self.write(b"\x00");
|
||||
} else {
|
||||
self.separator(&[sep]);
|
||||
}
|
||||
}
|
||||
|
||||
fn write_path_eol(&mut self) {
|
||||
if self.null {
|
||||
self.write(b"\x00");
|
||||
} else {
|
||||
self.write_eol();
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn write_path<P: AsRef<Path>>(&mut self, path: P) {
|
||||
use std::os::unix::ffi::OsStrExt;
|
||||
let path = path.as_ref().as_os_str().as_bytes();
|
||||
self.write_path_replace_separator(path);
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn write_path<P: AsRef<Path>>(&mut self, path: P) {
|
||||
let path = path.as_ref().to_string_lossy();
|
||||
self.write_path_replace_separator(path.as_bytes());
|
||||
}
|
||||
|
||||
fn write_path_replace_separator(&mut self, path: &[u8]) {
|
||||
match self.path_separator {
|
||||
None => self.write_colored(path, |colors| colors.path()),
|
||||
Some(sep) => {
|
||||
let transformed_path: Vec<_> = path.iter().map(|&b| {
|
||||
if b == b'/' || (cfg!(windows) && b == b'\\') {
|
||||
sep
|
||||
} else {
|
||||
b
|
||||
}
|
||||
}).collect();
|
||||
self.write_colored(&transformed_path, |colors| colors.path());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn line_number(&mut self, n: u64, sep: u8) {
|
||||
let line_number = n.to_string();
|
||||
self.write_colored(line_number.as_bytes(), |colors| colors.line());
|
||||
self.separator(&[sep]);
|
||||
}
|
||||
|
||||
fn column_number(&mut self, n: u64, sep: u8) {
|
||||
self.write_colored(n.to_string().as_bytes(), |colors| colors.column());
|
||||
self.separator(&[sep]);
|
||||
}
|
||||
|
||||
fn write_byte_offset(&mut self, o: u64, sep: u8) {
|
||||
self.write_colored(o.to_string().as_bytes(), |colors| colors.column());
|
||||
self.separator(&[sep]);
|
||||
}
|
||||
|
||||
fn write(&mut self, buf: &[u8]) {
|
||||
self.has_printed = true;
|
||||
let _ = self.wtr.write_all(buf);
|
||||
}
|
||||
|
||||
fn write_eol(&mut self) {
|
||||
let eol = self.eol;
|
||||
self.write(&[eol]);
|
||||
}
|
||||
|
||||
fn write_colored<F>(&mut self, buf: &[u8], get_color: F)
|
||||
where F: Fn(&ColorSpecs) -> &ColorSpec
|
||||
{
|
||||
let _ = self.wtr.set_color(get_color(&self.colors));
|
||||
self.write(buf);
|
||||
let _ = self.wtr.reset();
|
||||
}
|
||||
|
||||
fn write_file_sep(&mut self) {
|
||||
if let Some(ref sep) = self.file_separator {
|
||||
self.has_printed = true;
|
||||
let _ = self.wtr.write_all(sep);
|
||||
let _ = self.wtr.write_all(b"\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An error that can occur when parsing color specifications.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub enum Error {
|
||||
/// This occurs when an unrecognized output type is used.
|
||||
UnrecognizedOutType(String),
|
||||
/// This occurs when an unrecognized spec type is used.
|
||||
UnrecognizedSpecType(String),
|
||||
/// This occurs when an unrecognized color name is used.
|
||||
UnrecognizedColor(String, String),
|
||||
/// This occurs when an unrecognized style attribute is used.
|
||||
UnrecognizedStyle(String),
|
||||
/// This occurs when the format of a color specification is invalid.
|
||||
InvalidFormat(String),
|
||||
}
|
||||
|
||||
impl error::Error for Error {
|
||||
fn description(&self) -> &str {
|
||||
match *self {
|
||||
Error::UnrecognizedOutType(_) => "unrecognized output type",
|
||||
Error::UnrecognizedSpecType(_) => "unrecognized spec type",
|
||||
Error::UnrecognizedColor(_, _) => "unrecognized color name",
|
||||
Error::UnrecognizedStyle(_) => "unrecognized style attribute",
|
||||
Error::InvalidFormat(_) => "invalid color spec",
|
||||
}
|
||||
}
|
||||
|
||||
fn cause(&self) -> Option<&error::Error> {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for Error {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match *self {
|
||||
Error::UnrecognizedOutType(ref name) => {
|
||||
write!(f, "Unrecognized output type '{}'. Choose from: \
|
||||
path, line, column, match.", name)
|
||||
}
|
||||
Error::UnrecognizedSpecType(ref name) => {
|
||||
write!(f, "Unrecognized spec type '{}'. Choose from: \
|
||||
fg, bg, style, none.", name)
|
||||
}
|
||||
Error::UnrecognizedColor(_, ref msg) => {
|
||||
write!(f, "{}", msg)
|
||||
}
|
||||
Error::UnrecognizedStyle(ref name) => {
|
||||
write!(f, "Unrecognized style attribute '{}'. Choose from: \
|
||||
nobold, bold, nointense, intense, nounderline, \
|
||||
underline.", name)
|
||||
}
|
||||
Error::InvalidFormat(ref original) => {
|
||||
write!(
|
||||
f,
|
||||
"Invalid color spec format: '{}'. Valid format \
|
||||
is '(path|line|column|match):(fg|bg|style):(value)'.",
|
||||
original)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<ParseColorError> for Error {
|
||||
fn from(err: ParseColorError) -> Error {
|
||||
Error::UnrecognizedColor(err.invalid().to_string(), err.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
/// A merged set of color specifications.
|
||||
#[derive(Clone, Debug, Default, Eq, PartialEq)]
|
||||
pub struct ColorSpecs {
|
||||
path: ColorSpec,
|
||||
line: ColorSpec,
|
||||
column: ColorSpec,
|
||||
matched: ColorSpec,
|
||||
}
|
||||
|
||||
/// A single color specification provided by the user.
|
||||
///
|
||||
/// A `ColorSpecs` can be built by merging a sequence of `Spec`s.
|
||||
///
|
||||
/// ## Example
|
||||
///
|
||||
/// The only way to build a `Spec` is to parse it from a string. Once multiple
|
||||
/// `Spec`s have been constructed, then can be merged into a single
|
||||
/// `ColorSpecs` value.
|
||||
///
|
||||
/// ```rust
|
||||
/// use termcolor::{Color, ColorSpecs, Spec};
|
||||
///
|
||||
/// let spec1: Spec = "path:fg:blue".parse().unwrap();
|
||||
/// let spec2: Spec = "match:bg:green".parse().unwrap();
|
||||
/// let specs = ColorSpecs::new(&[spec1, spec2]);
|
||||
///
|
||||
/// assert_eq!(specs.path().fg(), Some(Color::Blue));
|
||||
/// assert_eq!(specs.matched().bg(), Some(Color::Green));
|
||||
/// ```
|
||||
///
|
||||
/// ## Format
|
||||
///
|
||||
/// The format of a `Spec` is a triple: `{type}:{attribute}:{value}`. Each
|
||||
/// component is defined as follows:
|
||||
///
|
||||
/// * `{type}` can be one of `path`, `line`, `column` or `match`.
|
||||
/// * `{attribute}` can be one of `fg`, `bg` or `style`. `{attribute}` may also
|
||||
/// be the special value `none`, in which case, `{value}` can be omitted.
|
||||
/// * `{value}` is either a color name (for `fg`/`bg`) or a style instruction.
|
||||
///
|
||||
/// `{type}` controls which part of the output should be styled and is
|
||||
/// application dependent.
|
||||
///
|
||||
/// When `{attribute}` is `none`, then this should cause any existing color
|
||||
/// settings to be cleared.
|
||||
///
|
||||
/// `{value}` should be a color when `{attribute}` is `fg` or `bg`, or it
|
||||
/// should be a style instruction when `{attribute}` is `style`. When
|
||||
/// `{attribute}` is `none`, `{value}` must be omitted.
|
||||
///
|
||||
/// Valid colors are `black`, `blue`, `green`, `red`, `cyan`, `magenta`,
|
||||
/// `yellow`, `white`.
|
||||
///
|
||||
/// Valid style instructions are `nobold`, `bold`, `intense`, `nointense`,
|
||||
/// `underline`, `nounderline`.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct Spec {
|
||||
ty: OutType,
|
||||
value: SpecValue,
|
||||
}
|
||||
|
||||
/// The actual value given by the specification.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
enum SpecValue {
|
||||
None,
|
||||
Fg(Color),
|
||||
Bg(Color),
|
||||
Style(Style),
|
||||
}
|
||||
|
||||
/// The set of configurable portions of ripgrep's output.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
enum OutType {
|
||||
Path,
|
||||
Line,
|
||||
Column,
|
||||
Match,
|
||||
}
|
||||
|
||||
/// The specification type.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
enum SpecType {
|
||||
Fg,
|
||||
Bg,
|
||||
Style,
|
||||
None,
|
||||
}
|
||||
|
||||
/// The set of available styles for use in the terminal.
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
enum Style {
|
||||
Bold,
|
||||
NoBold,
|
||||
Intense,
|
||||
NoIntense,
|
||||
Underline,
|
||||
NoUnderline
|
||||
}
|
||||
|
||||
impl ColorSpecs {
|
||||
/// Create color specifications from a list of user supplied
|
||||
/// specifications.
|
||||
pub fn new(user_specs: &[Spec]) -> ColorSpecs {
|
||||
let mut specs = ColorSpecs::default();
|
||||
for user_spec in user_specs {
|
||||
match user_spec.ty {
|
||||
OutType::Path => user_spec.merge_into(&mut specs.path),
|
||||
OutType::Line => user_spec.merge_into(&mut specs.line),
|
||||
OutType::Column => user_spec.merge_into(&mut specs.column),
|
||||
OutType::Match => user_spec.merge_into(&mut specs.matched),
|
||||
}
|
||||
}
|
||||
specs
|
||||
}
|
||||
|
||||
/// Return the color specification for coloring file paths.
|
||||
fn path(&self) -> &ColorSpec {
|
||||
&self.path
|
||||
}
|
||||
|
||||
/// Return the color specification for coloring line numbers.
|
||||
fn line(&self) -> &ColorSpec {
|
||||
&self.line
|
||||
}
|
||||
|
||||
/// Return the color specification for coloring column numbers.
|
||||
fn column(&self) -> &ColorSpec {
|
||||
&self.column
|
||||
}
|
||||
|
||||
/// Return the color specification for coloring matched text.
|
||||
fn matched(&self) -> &ColorSpec {
|
||||
&self.matched
|
||||
}
|
||||
}
|
||||
|
||||
impl Spec {
|
||||
/// Merge this spec into the given color specification.
|
||||
fn merge_into(&self, cspec: &mut ColorSpec) {
|
||||
self.value.merge_into(cspec);
|
||||
}
|
||||
}
|
||||
|
||||
impl SpecValue {
|
||||
/// Merge this spec value into the given color specification.
|
||||
fn merge_into(&self, cspec: &mut ColorSpec) {
|
||||
match *self {
|
||||
SpecValue::None => cspec.clear(),
|
||||
SpecValue::Fg(ref color) => { cspec.set_fg(Some(color.clone())); }
|
||||
SpecValue::Bg(ref color) => { cspec.set_bg(Some(color.clone())); }
|
||||
SpecValue::Style(ref style) => {
|
||||
match *style {
|
||||
Style::Bold => { cspec.set_bold(true); }
|
||||
Style::NoBold => { cspec.set_bold(false); }
|
||||
Style::Intense => { cspec.set_intense(true); }
|
||||
Style::NoIntense => { cspec.set_intense(false); }
|
||||
Style::Underline => { cspec.set_underline(true); }
|
||||
Style::NoUnderline => { cspec.set_underline(false); }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Spec {
|
||||
type Err = Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Spec, Error> {
|
||||
let pieces: Vec<&str> = s.split(':').collect();
|
||||
if pieces.len() <= 1 || pieces.len() > 3 {
|
||||
return Err(Error::InvalidFormat(s.to_string()));
|
||||
}
|
||||
let otype: OutType = pieces[0].parse()?;
|
||||
match pieces[1].parse()? {
|
||||
SpecType::None => Ok(Spec { ty: otype, value: SpecValue::None }),
|
||||
SpecType::Style => {
|
||||
if pieces.len() < 3 {
|
||||
return Err(Error::InvalidFormat(s.to_string()));
|
||||
}
|
||||
let style: Style = pieces[2].parse()?;
|
||||
Ok(Spec { ty: otype, value: SpecValue::Style(style) })
|
||||
}
|
||||
SpecType::Fg => {
|
||||
if pieces.len() < 3 {
|
||||
return Err(Error::InvalidFormat(s.to_string()));
|
||||
}
|
||||
let color: Color = pieces[2].parse()?;
|
||||
Ok(Spec { ty: otype, value: SpecValue::Fg(color) })
|
||||
}
|
||||
SpecType::Bg => {
|
||||
if pieces.len() < 3 {
|
||||
return Err(Error::InvalidFormat(s.to_string()));
|
||||
}
|
||||
let color: Color = pieces[2].parse()?;
|
||||
Ok(Spec { ty: otype, value: SpecValue::Bg(color) })
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for OutType {
|
||||
type Err = Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<OutType, Error> {
|
||||
match &*s.to_lowercase() {
|
||||
"path" => Ok(OutType::Path),
|
||||
"line" => Ok(OutType::Line),
|
||||
"column" => Ok(OutType::Column),
|
||||
"match" => Ok(OutType::Match),
|
||||
_ => Err(Error::UnrecognizedOutType(s.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for SpecType {
|
||||
type Err = Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<SpecType, Error> {
|
||||
match &*s.to_lowercase() {
|
||||
"fg" => Ok(SpecType::Fg),
|
||||
"bg" => Ok(SpecType::Bg),
|
||||
"style" => Ok(SpecType::Style),
|
||||
"none" => Ok(SpecType::None),
|
||||
_ => Err(Error::UnrecognizedSpecType(s.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl FromStr for Style {
|
||||
type Err = Error;
|
||||
|
||||
fn from_str(s: &str) -> Result<Style, Error> {
|
||||
match &*s.to_lowercase() {
|
||||
"bold" => Ok(Style::Bold),
|
||||
"nobold" => Ok(Style::NoBold),
|
||||
"intense" => Ok(Style::Intense),
|
||||
"nointense" => Ok(Style::NoIntense),
|
||||
"underline" => Ok(Style::Underline),
|
||||
"nounderline" => Ok(Style::NoUnderline),
|
||||
_ => Err(Error::UnrecognizedStyle(s.to_string())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use termcolor::{Color, ColorSpec};
|
||||
use super::{ColorSpecs, Error, OutType, Spec, SpecValue, Style};
|
||||
|
||||
#[test]
|
||||
fn merge() {
|
||||
let user_specs: &[Spec] = &[
|
||||
"match:fg:blue".parse().unwrap(),
|
||||
"match:none".parse().unwrap(),
|
||||
"match:style:bold".parse().unwrap(),
|
||||
];
|
||||
let mut expect_matched = ColorSpec::new();
|
||||
expect_matched.set_bold(true);
|
||||
assert_eq!(ColorSpecs::new(user_specs), ColorSpecs {
|
||||
path: ColorSpec::default(),
|
||||
line: ColorSpec::default(),
|
||||
column: ColorSpec::default(),
|
||||
matched: expect_matched,
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn specs() {
|
||||
let spec: Spec = "path:fg:blue".parse().unwrap();
|
||||
assert_eq!(spec, Spec {
|
||||
ty: OutType::Path,
|
||||
value: SpecValue::Fg(Color::Blue),
|
||||
});
|
||||
|
||||
let spec: Spec = "path:bg:red".parse().unwrap();
|
||||
assert_eq!(spec, Spec {
|
||||
ty: OutType::Path,
|
||||
value: SpecValue::Bg(Color::Red),
|
||||
});
|
||||
|
||||
let spec: Spec = "match:style:bold".parse().unwrap();
|
||||
assert_eq!(spec, Spec {
|
||||
ty: OutType::Match,
|
||||
value: SpecValue::Style(Style::Bold),
|
||||
});
|
||||
|
||||
let spec: Spec = "match:style:intense".parse().unwrap();
|
||||
assert_eq!(spec, Spec {
|
||||
ty: OutType::Match,
|
||||
value: SpecValue::Style(Style::Intense),
|
||||
});
|
||||
|
||||
let spec: Spec = "match:style:underline".parse().unwrap();
|
||||
assert_eq!(spec, Spec {
|
||||
ty: OutType::Match,
|
||||
value: SpecValue::Style(Style::Underline),
|
||||
});
|
||||
|
||||
let spec: Spec = "line:none".parse().unwrap();
|
||||
assert_eq!(spec, Spec {
|
||||
ty: OutType::Line,
|
||||
value: SpecValue::None,
|
||||
});
|
||||
|
||||
let spec: Spec = "column:bg:green".parse().unwrap();
|
||||
assert_eq!(spec, Spec {
|
||||
ty: OutType::Column,
|
||||
value: SpecValue::Bg(Color::Green),
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn spec_errors() {
|
||||
let err = "line:nonee".parse::<Spec>().unwrap_err();
|
||||
assert_eq!(err, Error::UnrecognizedSpecType("nonee".to_string()));
|
||||
|
||||
let err = "".parse::<Spec>().unwrap_err();
|
||||
assert_eq!(err, Error::InvalidFormat("".to_string()));
|
||||
|
||||
let err = "foo".parse::<Spec>().unwrap_err();
|
||||
assert_eq!(err, Error::InvalidFormat("foo".to_string()));
|
||||
|
||||
let err = "line:style:italic".parse::<Spec>().unwrap_err();
|
||||
assert_eq!(err, Error::UnrecognizedStyle("italic".to_string()));
|
||||
|
||||
let err = "line:fg:brown".parse::<Spec>().unwrap_err();
|
||||
match err {
|
||||
Error::UnrecognizedColor(name, _) => assert_eq!(name, "brown"),
|
||||
err => assert!(false, "unexpected error: {:?}", err),
|
||||
}
|
||||
|
||||
let err = "foo:fg:brown".parse::<Spec>().unwrap_err();
|
||||
assert_eq!(err, Error::UnrecognizedOutType("foo".to_string()));
|
||||
}
|
||||
}
|
@ -2,10 +2,10 @@ use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::time::Duration;
|
||||
|
||||
use grep2::matcher::Matcher;
|
||||
use grep2::printer::{JSON, Standard, Summary, Stats};
|
||||
use grep2::regex::RegexMatcher;
|
||||
use grep2::searcher::Searcher;
|
||||
use grep::matcher::Matcher;
|
||||
use grep::printer::{JSON, Standard, Summary, Stats};
|
||||
use grep::regex::RegexMatcher;
|
||||
use grep::searcher::Searcher;
|
||||
use termcolor::WriteColor;
|
||||
|
||||
use decompressor::{DecompressionReader, is_compressed};
|
||||
@ -95,7 +95,6 @@ impl SearchWorkerBuilder {
|
||||
#[derive(Clone, Debug, Default)]
|
||||
pub struct SearchResult {
|
||||
has_match: bool,
|
||||
binary_byte_offset: Option<u64>,
|
||||
stats: Option<Stats>,
|
||||
}
|
||||
|
||||
@ -105,15 +104,6 @@ impl SearchResult {
|
||||
self.has_match
|
||||
}
|
||||
|
||||
/// Whether the search found binary data, and if so, the first absolute
|
||||
/// byte offset at which it was detected.
|
||||
///
|
||||
/// This always returns `None` if binary data detection is disabled, even
|
||||
/// when binary data is present.
|
||||
pub fn binary_byte_offset(&self) -> Option<u64> {
|
||||
self.binary_byte_offset
|
||||
}
|
||||
|
||||
/// Return aggregate search statistics for a single search, if available.
|
||||
///
|
||||
/// It can be expensive to compute statistics, so these are only present
|
||||
@ -168,10 +158,8 @@ impl<W: WriteColor> Printer<W> {
|
||||
total_duration: Duration,
|
||||
stats: &Stats,
|
||||
) -> io::Result<()> {
|
||||
let mut wtr = self.get_mut();
|
||||
|
||||
write!(
|
||||
wtr,
|
||||
self.get_mut(),
|
||||
"
|
||||
{matches} matches
|
||||
{lines} matched lines
|
||||
@ -295,9 +283,7 @@ fn search_path<M: Matcher, W: WriteColor>(
|
||||
searcher.search_path(&matcher, path, &mut sink)?;
|
||||
Ok(SearchResult {
|
||||
has_match: sink.has_match(),
|
||||
binary_byte_offset: sink.binary_byte_offset(),
|
||||
stats: sink.stats().map(|s| s.clone()),
|
||||
..SearchResult::default()
|
||||
})
|
||||
}
|
||||
Printer::Summary(ref mut p) => {
|
||||
@ -305,9 +291,7 @@ fn search_path<M: Matcher, W: WriteColor>(
|
||||
searcher.search_path(&matcher, path, &mut sink)?;
|
||||
Ok(SearchResult {
|
||||
has_match: sink.has_match(),
|
||||
binary_byte_offset: sink.binary_byte_offset(),
|
||||
stats: sink.stats().map(|s| s.clone()),
|
||||
..SearchResult::default()
|
||||
})
|
||||
}
|
||||
Printer::JSON(ref mut p) => {
|
||||
@ -315,9 +299,7 @@ fn search_path<M: Matcher, W: WriteColor>(
|
||||
searcher.search_path(&matcher, path, &mut sink)?;
|
||||
Ok(SearchResult {
|
||||
has_match: sink.has_match(),
|
||||
binary_byte_offset: sink.binary_byte_offset(),
|
||||
stats: Some(sink.stats().clone()),
|
||||
..SearchResult::default()
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -338,9 +320,7 @@ fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
|
||||
searcher.search_reader(&matcher, rdr, &mut sink)?;
|
||||
Ok(SearchResult {
|
||||
has_match: sink.has_match(),
|
||||
binary_byte_offset: sink.binary_byte_offset(),
|
||||
stats: sink.stats().map(|s| s.clone()),
|
||||
..SearchResult::default()
|
||||
})
|
||||
}
|
||||
Printer::Summary(ref mut p) => {
|
||||
@ -348,9 +328,7 @@ fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
|
||||
searcher.search_reader(&matcher, rdr, &mut sink)?;
|
||||
Ok(SearchResult {
|
||||
has_match: sink.has_match(),
|
||||
binary_byte_offset: sink.binary_byte_offset(),
|
||||
stats: sink.stats().map(|s| s.clone()),
|
||||
..SearchResult::default()
|
||||
})
|
||||
}
|
||||
Printer::JSON(ref mut p) => {
|
||||
@ -358,9 +336,7 @@ fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
|
||||
searcher.search_reader(&matcher, rdr, &mut sink)?;
|
||||
Ok(SearchResult {
|
||||
has_match: sink.has_match(),
|
||||
binary_byte_offset: sink.binary_byte_offset(),
|
||||
stats: Some(sink.stats().clone()),
|
||||
..SearchResult::default()
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -1,424 +0,0 @@
|
||||
/*!
|
||||
The `search_buffer` module is responsible for searching a single file all in a
|
||||
single buffer. Typically, the source of the buffer is a memory map. This can
|
||||
be useful for when memory maps are faster than streaming search.
|
||||
|
||||
Note that this module doesn't quite support everything that `search_stream`
|
||||
does. Notably, showing contexts.
|
||||
*/
|
||||
use std::cmp;
|
||||
use std::path::Path;
|
||||
|
||||
use grep::Grep;
|
||||
use termcolor::WriteColor;
|
||||
|
||||
use printer::Printer;
|
||||
use search_stream::{IterLines, Options, count_lines, is_binary};
|
||||
|
||||
pub struct BufferSearcher<'a, W: 'a> {
|
||||
opts: Options,
|
||||
printer: &'a mut Printer<W>,
|
||||
grep: &'a Grep,
|
||||
path: &'a Path,
|
||||
buf: &'a [u8],
|
||||
match_line_count: u64,
|
||||
match_count: Option<u64>,
|
||||
line_count: Option<u64>,
|
||||
byte_offset: Option<u64>,
|
||||
last_line: usize,
|
||||
}
|
||||
|
||||
impl<'a, W: WriteColor> BufferSearcher<'a, W> {
|
||||
pub fn new(
|
||||
printer: &'a mut Printer<W>,
|
||||
grep: &'a Grep,
|
||||
path: &'a Path,
|
||||
buf: &'a [u8],
|
||||
) -> BufferSearcher<'a, W> {
|
||||
BufferSearcher {
|
||||
opts: Options::default(),
|
||||
printer: printer,
|
||||
grep: grep,
|
||||
path: path,
|
||||
buf: buf,
|
||||
match_line_count: 0,
|
||||
match_count: None,
|
||||
line_count: None,
|
||||
byte_offset: None,
|
||||
last_line: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// If enabled, searching will print a 0-based offset of the
|
||||
/// matching line (or the actual match if -o is specified) before
|
||||
/// printing the line itself.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn byte_offset(mut self, yes: bool) -> Self {
|
||||
self.opts.byte_offset = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print a count instead of each match.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn count(mut self, yes: bool) -> Self {
|
||||
self.opts.count = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print the count of individual matches
|
||||
/// instead of each match.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn count_matches(mut self, yes: bool) -> Self {
|
||||
self.opts.count_matches = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print the path instead of each match.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn files_with_matches(mut self, yes: bool) -> Self {
|
||||
self.opts.files_with_matches = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print the path of files that *don't* match
|
||||
/// the given pattern.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn files_without_matches(mut self, yes: bool) -> Self {
|
||||
self.opts.files_without_matches = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the end-of-line byte used by this searcher.
|
||||
pub fn eol(mut self, eol: u8) -> Self {
|
||||
self.opts.eol = eol;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, matching is inverted so that lines that *don't* match the
|
||||
/// given pattern are treated as matches.
|
||||
pub fn invert_match(mut self, yes: bool) -> Self {
|
||||
self.opts.invert_match = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, compute line numbers and prefix each line of output with
|
||||
/// them.
|
||||
pub fn line_number(mut self, yes: bool) -> Self {
|
||||
self.opts.line_number = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Limit the number of matches to the given count.
|
||||
///
|
||||
/// The default is None, which corresponds to no limit.
|
||||
pub fn max_count(mut self, count: Option<u64>) -> Self {
|
||||
self.opts.max_count = count;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, don't show any output and quit searching after the first
|
||||
/// match is found.
|
||||
pub fn quiet(mut self, yes: bool) -> Self {
|
||||
self.opts.quiet = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, search binary files as if they were text.
|
||||
pub fn text(mut self, yes: bool) -> Self {
|
||||
self.opts.text = yes;
|
||||
self
|
||||
}
|
||||
|
||||
#[inline(never)]
|
||||
pub fn run(mut self) -> u64 {
|
||||
let binary_upto = cmp::min(10_240, self.buf.len());
|
||||
if !self.opts.text && is_binary(&self.buf[..binary_upto], true) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
self.match_line_count = 0;
|
||||
self.line_count = if self.opts.line_number { Some(0) } else { None };
|
||||
// The memory map searcher uses one contiguous block of bytes, so the
|
||||
// offsets given the printer are sufficient to compute the byte offset.
|
||||
self.byte_offset = if self.opts.byte_offset { Some(0) } else { None };
|
||||
self.match_count = if self.opts.count_matches { Some(0) } else { None };
|
||||
let mut last_end = 0;
|
||||
for m in self.grep.iter(self.buf) {
|
||||
if self.opts.invert_match {
|
||||
self.print_inverted_matches(last_end, m.start());
|
||||
} else {
|
||||
self.print_match(m.start(), m.end());
|
||||
}
|
||||
last_end = m.end();
|
||||
if self.opts.terminate(self.match_line_count) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if self.opts.invert_match && !self.opts.terminate(self.match_line_count) {
|
||||
let upto = self.buf.len();
|
||||
self.print_inverted_matches(last_end, upto);
|
||||
}
|
||||
if self.opts.count && self.match_line_count > 0 {
|
||||
self.printer.path_count(self.path, self.match_line_count);
|
||||
} else if self.opts.count_matches
|
||||
&& self.match_count.map_or(false, |c| c > 0)
|
||||
{
|
||||
self.printer.path_count(self.path, self.match_count.unwrap());
|
||||
}
|
||||
if self.opts.files_with_matches && self.match_line_count > 0 {
|
||||
self.printer.path(self.path);
|
||||
}
|
||||
if self.opts.files_without_matches && self.match_line_count == 0 {
|
||||
self.printer.path(self.path);
|
||||
}
|
||||
self.match_line_count
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn count_individual_matches(&mut self, start: usize, end: usize) {
|
||||
if let Some(ref mut count) = self.match_count {
|
||||
for _ in self.grep.regex().find_iter(&self.buf[start..end]) {
|
||||
*count += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
pub fn print_match(&mut self, start: usize, end: usize) {
|
||||
self.match_line_count += 1;
|
||||
self.count_individual_matches(start, end);
|
||||
if self.opts.skip_matches() {
|
||||
return;
|
||||
}
|
||||
self.count_lines(start);
|
||||
self.add_line(end);
|
||||
self.printer.matched(
|
||||
self.grep.regex(), self.path, self.buf,
|
||||
start, end, self.line_count, self.byte_offset);
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn print_inverted_matches(&mut self, start: usize, end: usize) {
|
||||
debug_assert!(self.opts.invert_match);
|
||||
let mut it = IterLines::new(self.opts.eol, start);
|
||||
while let Some((s, e)) = it.next(&self.buf[..end]) {
|
||||
if self.opts.terminate(self.match_line_count) {
|
||||
return;
|
||||
}
|
||||
self.print_match(s, e);
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn count_lines(&mut self, upto: usize) {
|
||||
if let Some(ref mut line_count) = self.line_count {
|
||||
*line_count += count_lines(
|
||||
&self.buf[self.last_line..upto], self.opts.eol);
|
||||
self.last_line = upto;
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn add_line(&mut self, line_end: usize) {
|
||||
if let Some(ref mut line_count) = self.line_count {
|
||||
*line_count += 1;
|
||||
self.last_line = line_end;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::path::Path;
|
||||
|
||||
use grep::GrepBuilder;
|
||||
|
||||
use printer::Printer;
|
||||
use termcolor;
|
||||
|
||||
use super::BufferSearcher;
|
||||
|
||||
const SHERLOCK: &'static str = "\
|
||||
For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
Holmeses, success in the province of detective work must always
|
||||
be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
but Doctor Watson has to have it taken out for him and dusted,
|
||||
and exhibited clearly, with a label attached.\
|
||||
";
|
||||
|
||||
fn test_path() -> &'static Path {
|
||||
&Path::new("/baz.rs")
|
||||
}
|
||||
|
||||
type TestSearcher<'a> = BufferSearcher<'a, termcolor::NoColor<Vec<u8>>>;
|
||||
|
||||
fn search<F: FnMut(TestSearcher) -> TestSearcher>(
|
||||
pat: &str,
|
||||
haystack: &str,
|
||||
mut map: F,
|
||||
) -> (u64, String) {
|
||||
let outbuf = termcolor::NoColor::new(vec![]);
|
||||
let mut pp = Printer::new(outbuf).with_filename(true);
|
||||
let grep = GrepBuilder::new(pat).build().unwrap();
|
||||
let count = {
|
||||
let searcher = BufferSearcher::new(
|
||||
&mut pp, &grep, test_path(), haystack.as_bytes());
|
||||
map(searcher).run()
|
||||
};
|
||||
(count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn basic_search() {
|
||||
let (count, out) = search("Sherlock", SHERLOCK, |s|s);
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn binary() {
|
||||
let text = "Sherlock\n\x00Holmes\n";
|
||||
let (count, out) = search("Sherlock|Holmes", text, |s|s);
|
||||
assert_eq!(0, count);
|
||||
assert_eq!(out, "");
|
||||
}
|
||||
|
||||
|
||||
#[test]
|
||||
fn binary_text() {
|
||||
let text = "Sherlock\n\x00Holmes\n";
|
||||
let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true));
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn line_numbers() {
|
||||
let (count, out) = search(
|
||||
"Sherlock", SHERLOCK, |s| s.line_number(true));
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn byte_offset() {
|
||||
let (_, out) = search(
|
||||
"Sherlock", SHERLOCK, |s| s.byte_offset(true));
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
/baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn byte_offset_inverted() {
|
||||
let (_, out) = search("Sherlock", SHERLOCK, |s| {
|
||||
s.invert_match(true).byte_offset(true)
|
||||
});
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:65:Holmeses, success in the province of detective work must always
|
||||
/baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
/baz.rs:258:but Doctor Watson has to have it taken out for him and dusted,
|
||||
/baz.rs:321:and exhibited clearly, with a label attached.
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn count() {
|
||||
let (count, out) = search(
|
||||
"Sherlock", SHERLOCK, |s| s.count(true));
|
||||
assert_eq!(2, count);
|
||||
assert_eq!(out, "/baz.rs:2\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn count_matches() {
|
||||
let (_, out) = search(
|
||||
"the", SHERLOCK, |s| s.count_matches(true));
|
||||
assert_eq!(out, "/baz.rs:4\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn files_with_matches() {
|
||||
let (count, out) = search(
|
||||
"Sherlock", SHERLOCK, |s| s.files_with_matches(true));
|
||||
assert_eq!(1, count);
|
||||
assert_eq!(out, "/baz.rs\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn files_without_matches() {
|
||||
let (count, out) = search(
|
||||
"zzzz", SHERLOCK, |s| s.files_without_matches(true));
|
||||
assert_eq!(0, count);
|
||||
assert_eq!(out, "/baz.rs\n");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn max_count() {
|
||||
let (count, out) = search(
|
||||
"Sherlock", SHERLOCK, |s| s.max_count(Some(1)));
|
||||
assert_eq!(1, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invert_match_max_count() {
|
||||
let (count, out) = search(
|
||||
"zzzz", SHERLOCK, |s| s.invert_match(true).max_count(Some(1)));
|
||||
assert_eq!(1, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invert_match() {
|
||||
let (count, out) = search(
|
||||
"Sherlock", SHERLOCK, |s| s.invert_match(true));
|
||||
assert_eq!(4, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:Holmeses, success in the province of detective work must always
|
||||
/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
/baz.rs:but Doctor Watson has to have it taken out for him and dusted,
|
||||
/baz.rs:and exhibited clearly, with a label attached.
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invert_match_line_numbers() {
|
||||
let (count, out) = search("Sherlock", SHERLOCK, |s| {
|
||||
s.invert_match(true).line_number(true)
|
||||
});
|
||||
assert_eq!(4, count);
|
||||
assert_eq!(out, "\
|
||||
/baz.rs:2:Holmeses, success in the province of detective work must always
|
||||
/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash;
|
||||
/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted,
|
||||
/baz.rs:6:and exhibited clearly, with a label attached.
|
||||
");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn invert_match_count() {
|
||||
let (count, out) = search("Sherlock", SHERLOCK, |s| {
|
||||
s.invert_match(true).count(true)
|
||||
});
|
||||
assert_eq!(4, count);
|
||||
assert_eq!(out, "/baz.rs:4\n");
|
||||
}
|
||||
}
|
1466
src/search_stream.rs
1466
src/search_stream.rs
File diff suppressed because it is too large
Load Diff
397
src/worker.rs
397
src/worker.rs
@ -1,397 +0,0 @@
|
||||
use std::fs::File;
|
||||
use std::io;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use encoding_rs::Encoding;
|
||||
use grep::Grep;
|
||||
use ignore::DirEntry;
|
||||
use memmap::Mmap;
|
||||
use termcolor::WriteColor;
|
||||
|
||||
// use decoder::DecodeReader;
|
||||
use encoding_rs_io::DecodeReaderBytesBuilder;
|
||||
use decompressor::{self, DecompressionReader};
|
||||
use preprocessor::PreprocessorReader;
|
||||
use pathutil::strip_prefix;
|
||||
use printer::Printer;
|
||||
use search_buffer::BufferSearcher;
|
||||
use search_stream::{InputBuffer, Searcher};
|
||||
|
||||
use Result;
|
||||
|
||||
pub enum Work {
|
||||
Stdin,
|
||||
DirEntry(DirEntry),
|
||||
}
|
||||
|
||||
pub struct WorkerBuilder {
|
||||
grep: Grep,
|
||||
opts: Options,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct Options {
|
||||
mmap: bool,
|
||||
encoding: Option<&'static Encoding>,
|
||||
after_context: usize,
|
||||
before_context: usize,
|
||||
byte_offset: bool,
|
||||
count: bool,
|
||||
count_matches: bool,
|
||||
files_with_matches: bool,
|
||||
files_without_matches: bool,
|
||||
eol: u8,
|
||||
invert_match: bool,
|
||||
line_number: bool,
|
||||
max_count: Option<u64>,
|
||||
quiet: bool,
|
||||
text: bool,
|
||||
preprocessor: Option<PathBuf>,
|
||||
search_zip_files: bool
|
||||
}
|
||||
|
||||
impl Default for Options {
|
||||
fn default() -> Options {
|
||||
Options {
|
||||
mmap: false,
|
||||
encoding: None,
|
||||
after_context: 0,
|
||||
before_context: 0,
|
||||
byte_offset: false,
|
||||
count: false,
|
||||
count_matches: false,
|
||||
files_with_matches: false,
|
||||
files_without_matches: false,
|
||||
eol: b'\n',
|
||||
invert_match: false,
|
||||
line_number: false,
|
||||
max_count: None,
|
||||
quiet: false,
|
||||
text: false,
|
||||
search_zip_files: false,
|
||||
preprocessor: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl WorkerBuilder {
|
||||
/// Create a new builder for a worker.
|
||||
///
|
||||
/// A reusable input buffer and a grep matcher are required, but there
|
||||
/// are numerous additional options that can be configured on this builder.
|
||||
pub fn new(grep: Grep) -> WorkerBuilder {
|
||||
WorkerBuilder {
|
||||
grep: grep,
|
||||
opts: Options::default(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Create the worker from this builder.
|
||||
pub fn build(self) -> Worker {
|
||||
let mut inpbuf = InputBuffer::new();
|
||||
inpbuf.eol(self.opts.eol);
|
||||
Worker {
|
||||
grep: self.grep,
|
||||
inpbuf: inpbuf,
|
||||
decodebuf: vec![0; 8 * (1<<10)],
|
||||
opts: self.opts,
|
||||
}
|
||||
}
|
||||
|
||||
/// The number of contextual lines to show after each match. The default
|
||||
/// is zero.
|
||||
pub fn after_context(mut self, count: usize) -> Self {
|
||||
self.opts.after_context = count;
|
||||
self
|
||||
}
|
||||
|
||||
/// The number of contextual lines to show before each match. The default
|
||||
/// is zero.
|
||||
pub fn before_context(mut self, count: usize) -> Self {
|
||||
self.opts.before_context = count;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print a 0-based offset of the
|
||||
/// matching line (or the actual match if -o is specified) before
|
||||
/// printing the line itself.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn byte_offset(mut self, yes: bool) -> Self {
|
||||
self.opts.byte_offset = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print a count instead of each match.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn count(mut self, yes: bool) -> Self {
|
||||
self.opts.count = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print the count of individual matches
|
||||
/// instead of each match.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn count_matches(mut self, yes: bool) -> Self {
|
||||
self.opts.count_matches = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the encoding to use to read each file.
|
||||
///
|
||||
/// If the encoding is `None` (the default), then the encoding is
|
||||
/// automatically detected on a best-effort per-file basis.
|
||||
pub fn encoding(mut self, enc: Option<&'static Encoding>) -> Self {
|
||||
self.opts.encoding = enc;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print the path instead of each match.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn files_with_matches(mut self, yes: bool) -> Self {
|
||||
self.opts.files_with_matches = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, searching will print the path of files without any matches.
|
||||
///
|
||||
/// Disabled by default.
|
||||
pub fn files_without_matches(mut self, yes: bool) -> Self {
|
||||
self.opts.files_without_matches = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the end-of-line byte used by this searcher.
|
||||
pub fn eol(mut self, eol: u8) -> Self {
|
||||
self.opts.eol = eol;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, matching is inverted so that lines that *don't* match the
|
||||
/// given pattern are treated as matches.
|
||||
pub fn invert_match(mut self, yes: bool) -> Self {
|
||||
self.opts.invert_match = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, compute line numbers and prefix each line of output with
|
||||
/// them.
|
||||
pub fn line_number(mut self, yes: bool) -> Self {
|
||||
self.opts.line_number = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Limit the number of matches to the given count.
|
||||
///
|
||||
/// The default is None, which corresponds to no limit.
|
||||
pub fn max_count(mut self, count: Option<u64>) -> Self {
|
||||
self.opts.max_count = count;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, try to use memory maps for searching if possible.
|
||||
pub fn mmap(mut self, yes: bool) -> Self {
|
||||
self.opts.mmap = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, don't show any output and quit searching after the first
|
||||
/// match is found.
|
||||
pub fn quiet(mut self, yes: bool) -> Self {
|
||||
self.opts.quiet = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, search binary files as if they were text.
|
||||
pub fn text(mut self, yes: bool) -> Self {
|
||||
self.opts.text = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If enabled, search through compressed files as well
|
||||
pub fn search_zip_files(mut self, yes: bool) -> Self {
|
||||
self.opts.search_zip_files = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// If non-empty, search output of preprocessor run on each file
|
||||
pub fn preprocessor(mut self, command: Option<PathBuf>) -> Self {
|
||||
self.opts.preprocessor = command;
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// Worker is responsible for executing searches on file paths, while choosing
|
||||
/// streaming search or memory map search as appropriate.
|
||||
pub struct Worker {
|
||||
grep: Grep,
|
||||
inpbuf: InputBuffer,
|
||||
decodebuf: Vec<u8>,
|
||||
opts: Options,
|
||||
}
|
||||
|
||||
impl Worker {
|
||||
/// Execute the worker with the given printer and work item.
|
||||
///
|
||||
/// A work item can either be stdin or a file path.
|
||||
pub fn run<W: WriteColor>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
work: Work,
|
||||
) -> u64 {
|
||||
let result = match work {
|
||||
Work::Stdin => {
|
||||
let stdin = io::stdin();
|
||||
let stdin = stdin.lock();
|
||||
self.search(printer, Path::new("<stdin>"), stdin)
|
||||
}
|
||||
Work::DirEntry(dent) => {
|
||||
let mut path = dent.path();
|
||||
if self.opts.preprocessor.is_some() {
|
||||
let cmd = self.opts.preprocessor.clone().unwrap();
|
||||
match PreprocessorReader::from_cmd_path(cmd, path) {
|
||||
Ok(reader) => self.search(printer, path, reader),
|
||||
Err(err) => {
|
||||
message!("{}", err);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
} else if self.opts.search_zip_files
|
||||
&& decompressor::is_compressed(path)
|
||||
{
|
||||
match DecompressionReader::from_path(path) {
|
||||
Some(reader) => self.search(printer, path, reader),
|
||||
None => {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let file = match File::open(path) {
|
||||
Ok(file) => file,
|
||||
Err(err) => {
|
||||
message!("{}: {}", path.display(), err);
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
if let Some(p) = strip_prefix("./", path) {
|
||||
path = p;
|
||||
}
|
||||
if self.opts.mmap {
|
||||
self.search_mmap(printer, path, &file)
|
||||
} else {
|
||||
self.search(printer, path, file)
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
match result {
|
||||
Ok(count) => {
|
||||
count
|
||||
}
|
||||
Err(err) => {
|
||||
message!("{}", err);
|
||||
0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn search<R: io::Read, W: WriteColor>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
path: &Path,
|
||||
rdr: R,
|
||||
) -> Result<u64> {
|
||||
let rdr = DecodeReaderBytesBuilder::new()
|
||||
.encoding(self.opts.encoding)
|
||||
.utf8_passthru(true)
|
||||
.build_with_buffer(rdr, &mut self.decodebuf)?;
|
||||
let searcher = Searcher::new(
|
||||
&mut self.inpbuf, printer, &self.grep, path, rdr);
|
||||
searcher
|
||||
.after_context(self.opts.after_context)
|
||||
.before_context(self.opts.before_context)
|
||||
.byte_offset(self.opts.byte_offset)
|
||||
.count(self.opts.count)
|
||||
.count_matches(self.opts.count_matches)
|
||||
.files_with_matches(self.opts.files_with_matches)
|
||||
.files_without_matches(self.opts.files_without_matches)
|
||||
.eol(self.opts.eol)
|
||||
.line_number(self.opts.line_number)
|
||||
.invert_match(self.opts.invert_match)
|
||||
.max_count(self.opts.max_count)
|
||||
.quiet(self.opts.quiet)
|
||||
.text(self.opts.text)
|
||||
.run()
|
||||
.map_err(From::from)
|
||||
}
|
||||
|
||||
fn search_mmap<W: WriteColor>(
|
||||
&mut self,
|
||||
printer: &mut Printer<W>,
|
||||
path: &Path,
|
||||
file: &File,
|
||||
) -> Result<u64> {
|
||||
if file.metadata()?.len() == 0 {
|
||||
// Opening a memory map with an empty file results in an error.
|
||||
// However, this may not actually be an empty file! For example,
|
||||
// /proc/cpuinfo reports itself as an empty file, but it can
|
||||
// produce data when it's read from. Therefore, we fall back to
|
||||
// regular read calls.
|
||||
return self.search(printer, path, file);
|
||||
}
|
||||
let mmap = match self.mmap(file)? {
|
||||
None => return self.search(printer, path, file),
|
||||
Some(mmap) => mmap,
|
||||
};
|
||||
let buf = &*mmap;
|
||||
if buf.len() >= 3 && Encoding::for_bom(buf).is_some() {
|
||||
// If we have a UTF-16 bom in our memory map, then we need to fall
|
||||
// back to the stream reader, which will do transcoding.
|
||||
return self.search(printer, path, file);
|
||||
}
|
||||
let searcher = BufferSearcher::new(printer, &self.grep, path, buf);
|
||||
Ok(searcher
|
||||
.byte_offset(self.opts.byte_offset)
|
||||
.count(self.opts.count)
|
||||
.count_matches(self.opts.count_matches)
|
||||
.files_with_matches(self.opts.files_with_matches)
|
||||
.files_without_matches(self.opts.files_without_matches)
|
||||
.eol(self.opts.eol)
|
||||
.line_number(self.opts.line_number)
|
||||
.invert_match(self.opts.invert_match)
|
||||
.max_count(self.opts.max_count)
|
||||
.quiet(self.opts.quiet)
|
||||
.text(self.opts.text)
|
||||
.run())
|
||||
}
|
||||
|
||||
#[cfg(not(unix))]
|
||||
fn mmap(&self, file: &File) -> Result<Option<Mmap>> {
|
||||
Ok(Some(mmap_readonly(file)?))
|
||||
}
|
||||
|
||||
#[cfg(unix)]
|
||||
fn mmap(&self, file: &File) -> Result<Option<Mmap>> {
|
||||
use libc::{EOVERFLOW, ENODEV, ENOMEM};
|
||||
|
||||
let err = match mmap_readonly(file) {
|
||||
Ok(mmap) => return Ok(Some(mmap)),
|
||||
Err(err) => err,
|
||||
};
|
||||
let code = err.raw_os_error();
|
||||
if code == Some(EOVERFLOW)
|
||||
|| code == Some(ENODEV)
|
||||
|| code == Some(ENOMEM)
|
||||
{
|
||||
return Ok(None);
|
||||
}
|
||||
Err(From::from(err))
|
||||
}
|
||||
}
|
||||
|
||||
fn mmap_readonly(file: &File) -> io::Result<Mmap> {
|
||||
unsafe { Mmap::map(file) }
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user