1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-03-17 20:28:03 +02:00

ripgrep: migrate to libripgrep

This commit does the work to delete the old `grep` crate and effectively
rewrite most of ripgrep core to use the new libripgrep crates. The new
`grep` crate is now a facade that collects the various crates that make
up libripgrep.

The most complex part of ripgrep core is now arguably the translation
between command line parameters and the library options, which is
ultimately where we want to be.
This commit is contained in:
Andrew Gallant 2018-08-03 17:26:22 -04:00
parent d9ca529356
commit bb110c1ebe
47 changed files with 3302 additions and 6020 deletions

View File

@ -17,6 +17,8 @@ addons:
# Needed for testing decompression search.
- xz-utils
- liblz4-tool
# For building MUSL static builds on Linux.
- musl-tools
matrix:
fast_finish: true
include:

39
Cargo.lock generated
View File

@ -139,12 +139,16 @@ dependencies = [
[[package]]
name = "grep"
version = "0.1.9"
version = "0.2.0"
dependencies = [
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
"atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"grep-matcher 0.0.1",
"grep-pcre2 0.0.1",
"grep-printer 0.0.1",
"grep-regex 0.0.1",
"grep-searcher 0.0.1",
"termcolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"walkdir 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -204,16 +208,6 @@ dependencies = [
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "grep2"
version = "0.1.8"
dependencies = [
"grep-matcher 0.0.1",
"grep-printer 0.0.1",
"grep-regex 0.0.1",
"grep-searcher 0.0.1",
]
[[package]]
name = "ignore"
version = "0.4.3"
@ -227,7 +221,7 @@ dependencies = [
"same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
"thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
"walkdir 2.1.4 (registry+https://github.com/rust-lang/crates.io-index)",
"walkdir 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -378,21 +372,16 @@ name = "ripgrep"
version = "0.9.0"
dependencies = [
"atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)",
"bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)",
"encoding_rs_io 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"globset 0.4.1",
"grep 0.1.9",
"grep 0.2.0",
"ignore 0.4.3",
"lazy_static 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
"memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)",
"num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)",
"regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_json 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)",
"termcolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -520,7 +509,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "walkdir"
version = "2.1.4"
version = "2.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
@ -608,7 +597,7 @@ dependencies = [
"checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526"
"checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc"
"checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122"
"checksum walkdir 2.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "63636bd0eb3d00ccb8b9036381b526efac53caf112b7783b730ab3f8e44da369"
"checksum walkdir 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f1b768ba943161a9226ccd59b26bcd901e5d60e6061f4fcad3034784e0c7372b"
"checksum winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "773ef9dcc5f24b7d850d0ff101e542ff24c3b090a9768e03ff889fdef41f00fd"
"checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
"checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View File

@ -35,7 +35,6 @@ path = "tests/tests.rs"
members = [
"globset",
"grep",
"grep2",
"grep-matcher",
"grep-pcre2",
"grep-printer",
@ -46,20 +45,15 @@ members = [
[dependencies]
atty = "0.2.11"
bytecount = "0.3.2"
encoding_rs = "0.8"
encoding_rs_io = "0.1"
globset = { version = "0.4.0", path = "globset" }
grep = { version = "0.1.8", path = "grep" }
grep = { version = "0.2.0", path = "grep" }
ignore = { version = "0.4.0", path = "ignore" }
lazy_static = "1"
libc = "0.2"
log = "0.4"
memchr = "2"
memmap = "0.6"
num_cpus = "1"
regex = "1"
same-file = "1"
serde_json = "1"
termcolor = "1"
[dependencies.clap]
@ -69,7 +63,7 @@ features = ["suggestions", "color"]
[target.'cfg(windows)'.dependencies.winapi]
version = "0.3"
features = ["std", "winnt"]
features = ["std", "fileapi", "winnt"]
[build-dependencies]
lazy_static = "1"
@ -80,15 +74,9 @@ default-features = false
features = ["suggestions", "color"]
[features]
avx-accel = [
"bytecount/avx-accel",
"grep2/avx-accel",
]
simd-accel = [
"bytecount/simd-accel",
"encoding_rs/simd-accel",
"grep2/simd-accel",
]
avx-accel = ["grep/avx-accel"]
simd-accel = ["grep/simd-accel"]
pcre2 = ["grep/pcre2"]
[profile.release]
debug = true
debug = 1

36
FAQ.md
View File

@ -157,13 +157,37 @@ tool. With that said,
How do I use lookaround and/or backreferences?
</h3>
This isn't currently possible. ripgrep uses finite automata to implement
regular expression search, and in turn, guarantees linear time searching on all
inputs. It is difficult to efficiently support lookaround and backreferences in
finite automata engines, so ripgrep does not provide these features.
ripgrep's default regex engine does not support lookaround or backreferences.
This is primarily because the default regex engine is implemented using finite
state machines in order to guarantee a linear worst case time complexity on all
inputs. Backreferences are not possible to implement in this paradigm, and
lookaround appears difficult to do efficiently.
If a production quality regular expression engine with these features is ever
written in Rust, then it is possible ripgrep will provide it as an opt-in
However, ripgrep optionally supports using PCRE2 as the regex engine instead of
the default one based on finite state machines. You can enable PCRE2 with the
`-P/--pcre2` flag. For example, in the root of the ripgrep repo, you can easily
find all palindromes:
```
$ rg -P '(\w{10})\1'
tests/misc.rs
483: cmd.arg("--max-filesize").arg("44444444444444444444");
globset/src/glob.rs
1206: matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa");
```
If your version of ripgrep doesn't support PCRE2, then you'll get an error
message when you try to use the `-P/--pcre2` flag:
```
$ rg -P '(\w{10})\1'
PCRE2 is not available in this build of ripgrep
```
Most of the releases distributed by the ripgrep project here on GitHub will
come bundled with PCRE2 enabled. If you installed ripgrep through a different
means (like your system's package manager), then please reach out to the
maintainer of that package to see whether it's possible to enable the PCRE2
feature.

118
README.md
View File

@ -7,7 +7,7 @@ available for [every release](https://github.com/BurntSushi/ripgrep/releases).
ripgrep is similar to other popular search tools like The Silver Searcher,
ack and grep.
[![Linux build status](https://travis-ci.org/BurntSushi/ripgrep.svg?branch=master)](https://travis-ci.org/BurntSushi/ripgrep)
[![Linux build status](https://travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep)
[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep)
[![Crates.io](https://img.shields.io/crates/v/ripgrep.svg)](https://crates.io/crates/ripgrep)
@ -85,14 +85,16 @@ increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep.
### Why should I use ripgrep?
* It can replace many use cases served by both The Silver Searcher and GNU grep
because it is generally faster than both. (See [the FAQ](FAQ.md#posix4ever)
for more details on whether ripgrep can truly replace grep.)
* Like The Silver Searcher, ripgrep defaults to recursive directory search
and won't search files ignored by your `.gitignore` files. It also ignores
hidden and binary files by default. ripgrep also implements full support
for `.gitignore`, whereas there are many bugs related to that functionality
in The Silver Searcher.
* It can replace many use cases served by other search tools
because it contains most of their features and is generally faster. (See
[the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly
replace grep.)
* Like other tools specialized to code search, ripgrep defaults to recursive
directory search and won't search files ignored by your `.gitignore` files.
It also ignores hidden and binary files by default. ripgrep also implements
full support for `.gitignore`, whereas there are many bugs related to that
functionality in other code search tools claiming to provide the same
functionality.
* ripgrep can search specific types of files. For example, `rg -tpy foo`
limits your search to Python files and `rg -Tjs foo` excludes Javascript
files from your search. ripgrep can be taught about new file types with
@ -117,22 +119,24 @@ bugs, and Unicode support.
### Why shouldn't I use ripgrep?
I'd like to try to convince you why you *shouldn't* use ripgrep. This should
give you a glimpse at some important downsides or missing features of
ripgrep.
Despite initially not wanting to add every feature under the sun to ripgrep,
over time, ripgrep has grown support for most features found in other file
searching tools. This includes searching for results spanning across multiple
lines, and opt-in support for PCRE2, which provides look-around and
backreference support.
* ripgrep uses a regex engine based on finite automata, so if you want fancy
regex features such as backreferences or lookaround, ripgrep won't provide
them to you. ripgrep does support lots of things though, including, but not
limited to: lazy quantification (e.g., `a+?`), repetitions (e.g., `a{2,5}`),
begin/end assertions (e.g., `^\w+$`), word boundaries (e.g., `\bfoo\b`), and
support for Unicode categories (e.g., `\p{Sc}` to match currency symbols or
`\p{Lu}` to match any uppercase letter). (Fancier regexes will never be
supported.)
* ripgrep doesn't have multiline search. (Will happen as an opt-in feature.)
At this point, the primary reasons not to use ripgrep probably consist of one
or more of the following:
In other words, if you like fancy regexes or multiline search, then ripgrep
may not quite meet your needs (yet).
* You need a portable and ubiquitous tool. While ripgrep works on Windows,
macOS and Linux, it is not ubiquitous and it does not conform to any
standard such as POSIX. The best tool for this job is good old grep.
* There still exists some other minor feature (or bug) found in another tool
that isn't in ripgrep.
* There is a performance edge case where ripgrep doesn't do well where another
tool does do well. (Please file a bug report!)
* ripgrep isn't possible to install on your machine or isn't available for your
platform. (Please file a bug report!)
### Is it really faster than everything else?
@ -145,7 +149,8 @@ Summarizing, ripgrep is fast because:
* It is built on top of
[Rust's regex engine](https://github.com/rust-lang-nursery/regex).
Rust's regex engine uses finite automata, SIMD and aggressive literal
optimizations to make searching very fast.
optimizations to make searching very fast. (PCRE2 support can be opted into
with the `-P/--pcre2` flag.)
* Rust's regex library maintains performance with full Unicode support by
building UTF-8 decoding directly into its deterministic finite automaton
engine.
@ -168,6 +173,11 @@ Andy Lester, author of [ack](https://beyondgrep.com/), has published an
excellent table comparing the features of ack, ag, git-grep, GNU grep and
ripgrep: https://beyondgrep.com/feature-comparison/
Note that ripgrep has grown a few significant new features recently that
are not yet present in Andy's table. This includes, but is not limited to,
configuration files, passthru, support for searching compressed files,
multiline search and opt-in fancy regex support via PCRE2.
### Installation
@ -207,13 +217,15 @@ If you're a **MacPorts** user, then you can install ripgrep from the
$ sudo port install ripgrep
```
If you're a **Windows Chocolatey** user, then you can install ripgrep from the [official repo](https://chocolatey.org/packages/ripgrep):
If you're a **Windows Chocolatey** user, then you can install ripgrep from the
[official repo](https://chocolatey.org/packages/ripgrep):
```
$ choco install ripgrep
```
If you're a **Windows Scoop** user, then you can install ripgrep from the [official bucket](https://github.com/lukesampson/scoop/blob/master/bucket/ripgrep.json):
If you're a **Windows Scoop** user, then you can install ripgrep from the
[official bucket](https://github.com/lukesampson/scoop/blob/master/bucket/ripgrep.json):
```
$ scoop install ripgrep
@ -225,32 +237,37 @@ If you're an **Arch Linux** user, then you can install ripgrep from the official
$ pacman -S ripgrep
```
If you're a **Gentoo** user, you can install ripgrep from the [official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep):
If you're a **Gentoo** user, you can install ripgrep from the
[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep):
```
$ emerge sys-apps/ripgrep
```
If you're a **Fedora 27+** user, you can install ripgrep from official repositories.
If you're a **Fedora 27+** user, you can install ripgrep from official
repositories.
```
$ sudo dnf install ripgrep
```
If you're a **Fedora 24+** user, you can install ripgrep from [copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
If you're a **Fedora 24+** user, you can install ripgrep from
[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
```
$ sudo dnf copr enable carlwgeorge/ripgrep
$ sudo dnf install ripgrep
```
If you're an **openSUSE Tumbleweed** user, you can install ripgrep from the [official repo](http://software.opensuse.org/package/ripgrep):
If you're an **openSUSE Tumbleweed** user, you can install ripgrep from the
[official repo](http://software.opensuse.org/package/ripgrep):
```
$ sudo zypper install ripgrep
```
If you're a **RHEL/CentOS 7** user, you can install ripgrep from [copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
If you're a **RHEL/CentOS 7** user, you can install ripgrep from
[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/):
```
$ sudo yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo
@ -286,25 +303,29 @@ seem to work right and generate a number of very strange bug reports that I
don't know how to fix and don't have the time to fix. Therefore, it is no
longer a recommended installation option.)
If you're a **FreeBSD** user, then you can install ripgrep from the [official ports](https://www.freshports.org/textproc/ripgrep/):
If you're a **FreeBSD** user, then you can install ripgrep from the
[official ports](https://www.freshports.org/textproc/ripgrep/):
```
# pkg install ripgrep
```
If you're an **OpenBSD** user, then you can install ripgrep from the [official ports](http://openports.se/textproc/ripgrep):
If you're an **OpenBSD** user, then you can install ripgrep from the
[official ports](http://openports.se/textproc/ripgrep):
```
$ doas pkg_add ripgrep
```
If you're a **NetBSD** user, then you can install ripgrep from [pkgsrc](http://pkgsrc.se/textproc/ripgrep):
If you're a **NetBSD** user, then you can install ripgrep from
[pkgsrc](http://pkgsrc.se/textproc/ripgrep):
```
# pkgin install ripgrep
```
If you're a **Rust programmer**, ripgrep can be installed with `cargo`.
* Note that the minimum supported version of Rust for ripgrep is **1.23.0**,
although ripgrep may work with older versions.
* Note that the binary may be bigger than expected because it contains debug
@ -353,6 +374,35 @@ are not necessary to get SIMD optimizations for search; those are enabled
automatically. Hopefully, some day, the `simd-accel` and `avx-accel` features
will similarly become unnecessary.
Finally, optional PCRE2 support can be built with ripgrep by enabling the
`pcre2` feature:
```
$ cargo build --release --features 'pcre2'
```
(Tip: use `--features 'pcre2 simd-accel avx-accel'` to also include compile
time SIMD optimizations.)
Enabling the PCRE2 feature will attempt to automatically find and link with
your system's PCRE2 library via `pkg-config`. If one doesn't exist, then
ripgrep will build PCRE2 from source using your system's C compiler and then
statically link it into the final executable. Static linking can be forced even
when there is an available PCRE2 system library by either building ripgrep with
the MUSL target or by setting `PCRE2_SYS_STATIC=1`.
ripgrep can be built with the MUSL target on Linux by first installing the MUSL
library on your system (consult your friendly neighborhood package manager).
Then you just need to add MUSL support to your Rust toolchain and rebuild
ripgrep, which yields a fully static executable:
```
$ rustup target add x86_64-unknown-linux-musl
$ cargo build --release --target x86_64-unknown-linux-musl
```
Applying the `--features` flag from above works as expected.
### Running tests

View File

@ -1,8 +1,6 @@
# Inspired from https://github.com/habitat-sh/habitat/blob/master/appveyor.yml
cache:
- c:\cargo\registry
- c:\cargo\git
- c:\projects\ripgrep\target
init:
- mkdir c:\cargo
@ -19,14 +17,20 @@ environment:
PROJECT_NAME: ripgrep
RUST_BACKTRACE: full
matrix:
- TARGET: i686-pc-windows-gnu
CHANNEL: stable
- TARGET: i686-pc-windows-msvc
CHANNEL: stable
- TARGET: x86_64-pc-windows-gnu
CHANNEL: stable
BITS: 64
MSYS2: 1
- TARGET: x86_64-pc-windows-msvc
CHANNEL: stable
BITS: 64
- TARGET: i686-pc-windows-gnu
CHANNEL: stable
BITS: 32
MSYS2: 1
- TARGET: i686-pc-windows-msvc
CHANNEL: stable
BITS: 32
matrix:
fast_finish: true
@ -35,8 +39,9 @@ matrix:
# (Based on from https://github.com/rust-lang/libc/blob/master/appveyor.yml)
install:
- curl -sSf -o rustup-init.exe https://win.rustup.rs/
- rustup-init.exe -y --default-host %TARGET% --no-modify-path
- if defined MSYS2_BITS set PATH=%PATH%;C:\msys64\mingw%MSYS2_BITS%\bin
- rustup-init.exe -y --default-host %TARGET%
- set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
- if defined MSYS2 set PATH=C:\msys64\mingw%BITS%\bin;%PATH%
- rustc -V
- cargo -V
@ -46,11 +51,11 @@ build: false
# Equivalent to Travis' `script` phase
# TODO modify this phase as you see fit
test_script:
- cargo test --verbose --all
- cargo test --verbose --all --features pcre2
before_deploy:
# Generate artifacts for release
- cargo build --release
- cargo build --release --features pcre2
- mkdir staging
- copy target\release\rg.exe staging
- ps: copy target\release\build\ripgrep-*\out\_rg.ps1 staging

View File

@ -4,6 +4,7 @@ extern crate clap;
extern crate lazy_static;
use std::env;
use std::ffi::OsString;
use std::fs::{self, File};
use std::io::{self, Read, Write};
use std::path::Path;
@ -18,6 +19,22 @@ use app::{RGArg, RGArgKind};
mod app;
fn main() {
// If our version of Rust has runtime SIMD detection, then set a cfg so
// we know we can test for it. We use this when generating ripgrep's
// --version output.
let version = rustc_version();
let parsed = match Version::parse(&version) {
Ok(parsed) => parsed,
Err(err) => {
eprintln!("failed to parse `rustc --version`: {}", err);
return;
}
};
let minimum = Version { major: 1, minor: 27, patch: 0 };
if version.contains("nightly") || parsed >= minimum {
println!("cargo:rustc-cfg=ripgrep_runtime_cpu");
}
// OUT_DIR is set by Cargo and it's where any additional build artifacts
// are written.
let outdir = match env::var_os("OUT_DIR") {
@ -182,3 +199,63 @@ fn formatted_doc_txt(arg: &RGArg) -> io::Result<String> {
fn ioerr(msg: String) -> io::Error {
io::Error::new(io::ErrorKind::Other, msg)
}
fn rustc_version() -> String {
let rustc = env::var_os("RUSTC").unwrap_or(OsString::from("rustc"));
let output = process::Command::new(&rustc)
.arg("--version")
.output()
.unwrap()
.stdout;
String::from_utf8(output).unwrap()
}
#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord)]
struct Version {
major: u32,
minor: u32,
patch: u32,
}
impl Version {
fn parse(mut s: &str) -> Result<Version, String> {
if !s.starts_with("rustc ") {
return Err(format!("unrecognized version string: {}", s));
}
s = &s["rustc ".len()..];
let parts: Vec<&str> = s.split(".").collect();
if parts.len() < 3 {
return Err(format!("not enough version parts: {:?}", parts));
}
let mut num = String::new();
for c in parts[0].chars() {
if !c.is_digit(10) {
break;
}
num.push(c);
}
let major = num.parse::<u32>().map_err(|e| e.to_string())?;
num.clear();
for c in parts[1].chars() {
if !c.is_digit(10) {
break;
}
num.push(c);
}
let minor = num.parse::<u32>().map_err(|e| e.to_string())?;
num.clear();
for c in parts[2].chars() {
if !c.is_digit(10) {
break;
}
num.push(c);
}
let patch = num.parse::<u32>().map_err(|e| e.to_string())?;
Ok(Version { major, minor, patch })
}
}

View File

@ -8,7 +8,11 @@ set -ex
# Generate artifacts for release
mk_artifacts() {
cargo build --target "$TARGET" --release
if is_arm; then
cargo build --target "$TARGET" --release
else
cargo build --target "$TARGET" --release --features 'pcre2'
fi
}
mk_tarball() {

View File

@ -8,7 +8,11 @@ set -ex
main() {
# Test a normal debug build.
cargo build --target "$TARGET" --verbose --all
if is_arm; then
cargo build --target "$TARGET" --verbose
else
cargo build --target "$TARGET" --verbose --all --features 'pcre2'
fi
# Show the output of the most recent build.rs stderr.
set +x
@ -40,7 +44,7 @@ main() {
"$(dirname "${0}")/test_complete.sh"
# Run tests for ripgrep and all sub-crates.
cargo test --target "$TARGET" --verbose --all
cargo test --target "$TARGET" --verbose --all --features 'pcre2'
}
main

View File

@ -55,13 +55,6 @@ gcc_prefix() {
esac
}
is_ssse3_target() {
case "$(architecture)" in
amd64) return 0 ;;
*) return 1 ;;
esac
}
is_x86() {
case "$(architecture)" in
amd64|i386) return 0 ;;

View File

@ -111,10 +111,18 @@ _rg() {
"--no-ignore-vcs[don't respect version control ignore files]"
$no'--ignore-vcs[respect version control ignore files]'
+ '(json)' # json options
'--json[output results in a JSON Lines format]'
$no"--no-json[output results in the standard format]"
+ '(line)' # Line-number options
{-n,--line-number}'[show line numbers for matches]'
{-N,--no-line-number}"[don't show line numbers for matches]"
+ '(line terminator)' # line terminator options
'--crlf[use CRLF as a line terminator]'
$no"--no-crlf[do not use CRLF as a line terminator]"
+ '(max-depth)' # Directory-depth options
'--max-depth=[specify max number of directories to descend]:number of directories'
'!--maxdepth=:number of directories'
@ -131,6 +139,11 @@ _rg() {
'--mmap[search using memory maps when possible]'
"--no-mmap[don't search using memory maps]"
+ '(multiline)' # multiline options
{-U,--multiline}'[permit matching across multiple lines]'
$no"--no-multiline[restrict matches to at most one line each]"
'--multiline-dotall[make "." match newline in multiline mode]'
+ '(only)' # Only-match options
'(passthru replace)'{-o,--only-matching}'[show only matching part of each line]'
@ -138,6 +151,12 @@ _rg() {
'(--vimgrep count only replace)--passthru[show both matching and non-matching lines]'
'!(--vimgrep count only replace)--passthrough'
+ '(pcre2)' # PCRE2 options
{-P,--pcre2}'[Enable matching with PCRE2]'
$no"--no-pcre2[don't use PCRE2]"
"--pcre2-unicode[Enable PCRE2 Unicode mode]"
$no"--pcre2-unicode[Disable PCRE2 Unicode mode]"
+ '(pre)' # Preprocessing options
'(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e'
$no'--no-pre[disable preprocessor utility]'
@ -158,6 +177,7 @@ _rg() {
+ stats # Statistics options
'(--files file-match)--stats[show search statistics]'
$no"--no-stats[don't show search statistics]"
+ '(text)' # Binary-search options
{-a,--text}'[search binary files as if they were text]'
@ -166,6 +186,10 @@ _rg() {
+ '(threads)' # Thread-count options
'(--sort-files)'{-j+,--threads=}'[specify approximate number of threads to use]:number of threads'
+ '(trim)' # trim options
'--trim[trim any ASCII whitespace prefix from each line]'
$no"--no-trim[don't trim ASCII whitespace prefix from each line]"
+ type # Type options
'*'{-t+,--type=}'[only search files matching specified type]: :_rg_types'
'*--type-add=[add new glob for specified file type]: :->typespec'
@ -203,6 +227,7 @@ _rg() {
'--max-filesize=[specify size above which files should be ignored]:file size (bytes)'
"--no-config[don't load configuration files]"
'(-0 --null)'{-0,--null}'[print NUL byte after file names]'
'--null-data[use NUL as a line terminator]'
'--path-separator=[specify path separator to use when printing file names]:separator'
'(-q --quiet)'{-q,--quiet}'[suppress normal output]'
'--regex-size-limit=[specify upper size limit of compiled regex]:regex size (bytes)'

View File

@ -4,7 +4,7 @@ Cross platform single glob and glob set matching. Glob set matching is the
process of matching one or more glob patterns against a single candidate path
simultaneously, and returning all of the globs that matched.
[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.png)](https://travis-ci.org/BurntSushi/ripgrep)
[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep)
[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep)
[![](https://img.shields.io/crates/v/globset.svg)](https://crates.io/crates/globset)

View File

@ -470,7 +470,6 @@ impl GlobSetBuilder {
}
/// Add a new pattern to this set.
#[allow(dead_code)]
pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder {
self.pats.push(pat);
self

View File

@ -1,6 +1,6 @@
[package]
name = "grep"
version = "0.1.9" #:version
version = "0.2.0" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Fast line oriented regex searching as a library.
@ -13,7 +13,18 @@ keywords = ["regex", "grep", "egrep", "search", "pattern"]
license = "Unlicense/MIT"
[dependencies]
log = "0.4"
memchr = "2"
regex = "1"
regex-syntax = "0.6"
grep-matcher = { version = "0.0.1", path = "../grep-matcher" }
grep-pcre2 = { version = "0.0.1", path = "../grep-pcre2", optional = true }
grep-printer = { version = "0.0.1", path = "../grep-printer" }
grep-regex = { version = "0.0.1", path = "../grep-regex" }
grep-searcher = { version = "0.0.1", path = "../grep-searcher" }
[dev-dependencies]
atty = "0.2.11"
termcolor = "1"
walkdir = "2.2.0"
[features]
avx-accel = ["grep-searcher/avx-accel"]
simd-accel = ["grep-searcher/simd-accel"]
pcre2 = ["grep-pcre2"]

View File

@ -1,4 +1,41 @@
grep
----
This is a *library* that provides grep-style line-by-line regex searching (with
comparable performance to `grep` itself).
ripgrep, as a library.
[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep)
[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep)
[![](https://img.shields.io/crates/v/grep.svg)](https://crates.io/crates/grep)
Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org).
### Documentation
[https://docs.rs/grep](https://docs.rs/grep)
NOTE: This crate isn't ready for wide use yet. Ambitious individuals can
probably piece together the parts, but there is no high level documentation
describing how all of the pieces fit together.
### Usage
Add this to your `Cargo.toml`:
```toml
[dependencies]
grep = "0.2"
```
and this to your crate root:
```rust
extern crate grep;
```
### Features
This crate provides a `pcre2` feature (disabled by default) which, when
enabled, re-exports the `grep-pcre2` crate as an alternative `Matcher`
implementation to the standard `grep-regex` implementation.

107
grep/examples/simplegrep.rs Normal file
View File

@ -0,0 +1,107 @@
extern crate atty;
extern crate grep;
extern crate termcolor;
extern crate walkdir;
use std::env;
use std::error;
use std::ffi::OsString;
use std::path::Path;
use std::process;
use std::result;
use grep::printer::{ColorSpecs, StandardBuilder};
use grep::regex::RegexMatcher;
use grep::searcher::{BinaryDetection, SearcherBuilder};
use termcolor::{ColorChoice, StandardStream};
use walkdir::WalkDir;
macro_rules! fail {
($($tt:tt)*) => {
return Err(From::from(format!($($tt)*)));
}
}
type Result<T> = result::Result<T, Box<error::Error>>;
fn main() {
if let Err(err) = try_main() {
eprintln!("{}", err);
process::exit(1);
}
}
fn try_main() -> Result<()> {
let mut args: Vec<OsString> = env::args_os().collect();
if args.len() < 2 {
fail!("Usage: simplegrep <pattern> [<path> ...]");
}
if args.len() == 2 {
args.push(OsString::from("./"));
}
let pattern = match args[1].clone().into_string() {
Ok(pattern) => pattern,
Err(_) => {
fail!(
"pattern is not valid UTF-8: '{:?}'",
args[1].to_string_lossy()
);
}
};
search(&pattern, &args[2..])
}
fn search(pattern: &str, paths: &[OsString]) -> Result<()> {
let matcher = RegexMatcher::new_line_matcher(&pattern)?;
let mut searcher = SearcherBuilder::new()
.binary_detection(BinaryDetection::quit(b'\x00'))
.build();
let mut printer = StandardBuilder::new()
.color_specs(colors())
.build(StandardStream::stdout(color_choice()));
for path in paths {
for result in WalkDir::new(path) {
let dent = match result {
Ok(dent) => dent,
Err(err) => {
eprintln!(
"{}: {}",
err.path().unwrap_or(Path::new("error")).display(),
err,
);
continue;
}
};
if !dent.file_type().is_file() {
continue;
}
let result = searcher.search_path(
&matcher,
dent.path(),
printer.sink_with_path(&matcher, dent.path()),
);
if let Err(err) = result {
eprintln!("{}: {}", dent.path().display(), err);
}
}
}
Ok(())
}
fn color_choice() -> ColorChoice {
if atty::is(atty::Stream::Stdout) {
ColorChoice::Auto
} else {
ColorChoice::Never
}
}
fn colors() -> ColorSpecs {
ColorSpecs::new(&[
"path:fg:magenta".parse().unwrap(),
"line:fg:green".parse().unwrap(),
"match:fg:red".parse().unwrap(),
"match:style:bold".parse().unwrap(),
])
}

View File

@ -1,84 +1,22 @@
#![deny(missing_docs)]
/*!
A fast line oriented regex searcher.
ripgrep, as a library.
This library is intended to provide a high level facade to the crates that
make up ripgrep's core searching routines. However, there is no high level
documentation available yet guiding users on how to fit all of the pieces
together.
Every public API item in the constituent crates is documented, but examples
are sparse.
A cookbook and a guide are planned.
*/
#[macro_use]
extern crate log;
extern crate memchr;
extern crate regex;
extern crate regex_syntax as syntax;
#![deny(missing_docs)]
use std::error;
use std::fmt;
use std::result;
pub use search::{Grep, GrepBuilder, Iter, Match};
mod literals;
mod nonl;
mod search;
mod smart_case;
mod word_boundary;
/// Result is a convenient type alias that fixes the type of the error to
/// the `Error` type defined in this crate.
pub type Result<T> = result::Result<T, Error>;
/// Error enumerates the list of possible error conditions when building or
/// using a `Grep` line searcher.
#[derive(Debug)]
pub enum Error {
/// An error from parsing or compiling a regex.
Regex(regex::Error),
/// This error occurs when an illegal literal was found in the regex
/// pattern. For example, if the line terminator is `\n` and the regex
/// pattern is `\w+\n\w+`, then the presence of `\n` will cause this error.
LiteralNotAllowed(char),
/// An unused enum variant that indicates this enum may be expanded in
/// the future and therefore should not be exhaustively matched.
#[doc(hidden)]
__Nonexhaustive,
}
impl error::Error for Error {
fn description(&self) -> &str {
match *self {
Error::Regex(ref err) => err.description(),
Error::LiteralNotAllowed(_) => "use of forbidden literal",
Error::__Nonexhaustive => unreachable!(),
}
}
fn cause(&self) -> Option<&error::Error> {
match *self {
Error::Regex(ref err) => err.cause(),
_ => None,
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::Regex(ref err) => err.fmt(f),
Error::LiteralNotAllowed(chr) => {
write!(f, "Literal {:?} not allowed.", chr)
}
Error::__Nonexhaustive => unreachable!(),
}
}
}
impl From<regex::Error> for Error {
fn from(err: regex::Error) -> Error {
Error::Regex(err)
}
}
impl From<syntax::Error> for Error {
fn from(err: syntax::Error) -> Error {
Error::Regex(regex::Error::Syntax(err.to_string()))
}
}
pub extern crate grep_matcher as matcher;
#[cfg(feature = "pcre2")]
pub extern crate grep_pcre2 as pcre2;
pub extern crate grep_printer as printer;
pub extern crate grep_regex as regex;
pub extern crate grep_searcher as searcher;

View File

@ -1,274 +0,0 @@
/*!
The literals module is responsible for extracting *inner* literals out of the
AST of a regular expression. Normally this is the job of the regex engine
itself, but the regex engine doesn't look for inner literals. Since we're doing
line based searching, we can use them, so we need to do it ourselves.
Note that this implementation is incredibly suspicious. We need something more
principled.
*/
use std::cmp;
use regex::bytes::RegexBuilder;
use syntax::hir::{self, Hir, HirKind};
use syntax::hir::literal::{Literal, Literals};
#[derive(Clone, Debug)]
pub struct LiteralSets {
prefixes: Literals,
suffixes: Literals,
required: Literals,
}
impl LiteralSets {
pub fn create(expr: &Hir) -> Self {
let mut required = Literals::empty();
union_required(expr, &mut required);
LiteralSets {
prefixes: Literals::prefixes(expr),
suffixes: Literals::suffixes(expr),
required: required,
}
}
pub fn to_regex_builder(&self) -> Option<RegexBuilder> {
if self.prefixes.all_complete() && !self.prefixes.is_empty() {
debug!("literal prefixes detected: {:?}", self.prefixes);
// When this is true, the regex engine will do a literal scan.
return None;
}
// Out of inner required literals, prefixes and suffixes, which one
// is the longest? We pick the longest to do fast literal scan under
// the assumption that a longer literal will have a lower false
// positive rate.
let pre_lcp = self.prefixes.longest_common_prefix();
let pre_lcs = self.prefixes.longest_common_suffix();
let suf_lcp = self.suffixes.longest_common_prefix();
let suf_lcs = self.suffixes.longest_common_suffix();
let req_lits = self.required.literals();
let req = match req_lits.iter().max_by_key(|lit| lit.len()) {
None => &[],
Some(req) => &***req,
};
let mut lit = pre_lcp;
if pre_lcs.len() > lit.len() {
lit = pre_lcs;
}
if suf_lcp.len() > lit.len() {
lit = suf_lcp;
}
if suf_lcs.len() > lit.len() {
lit = suf_lcs;
}
if req_lits.len() == 1 && req.len() > lit.len() {
lit = req;
}
// Special case: if we have any literals that are all whitespace,
// then this is probably a failing of the literal detection since
// whitespace is typically pretty common. In this case, don't bother
// with inner literal scanning at all and just defer to the regex.
let any_all_white = req_lits.iter()
.any(|lit| lit.iter().all(|&b| (b as char).is_whitespace()));
if any_all_white {
return None;
}
// Special case: if we detected an alternation of inner required
// literals and its longest literal is bigger than the longest
// prefix/suffix, then choose the alternation. In practice, this
// helps with case insensitive matching, which can generate lots of
// inner required literals.
let any_empty = req_lits.iter().any(|lit| lit.is_empty());
if req.len() > lit.len() && req_lits.len() > 1 && !any_empty {
debug!("required literals found: {:?}", req_lits);
let alts: Vec<String> =
req_lits.into_iter().map(|x| bytes_to_regex(x)).collect();
let mut builder = RegexBuilder::new(&alts.join("|"));
builder.unicode(false);
Some(builder)
} else if lit.is_empty() {
None
} else {
debug!("required literal found: {:?}", show(lit));
let mut builder = RegexBuilder::new(&bytes_to_regex(&lit));
builder.unicode(false);
Some(builder)
}
}
}
fn union_required(expr: &Hir, lits: &mut Literals) {
match *expr.kind() {
HirKind::Literal(hir::Literal::Unicode(c)) => {
let mut buf = [0u8; 4];
lits.cross_add(c.encode_utf8(&mut buf).as_bytes());
}
HirKind::Literal(hir::Literal::Byte(b)) => {
lits.cross_add(&[b]);
}
HirKind::Class(hir::Class::Unicode(ref cls)) => {
if count_unicode_class(cls) >= 5 || !lits.add_char_class(cls) {
lits.cut();
}
}
HirKind::Class(hir::Class::Bytes(ref cls)) => {
if count_byte_class(cls) >= 5 || !lits.add_byte_class(cls) {
lits.cut();
}
}
HirKind::Group(hir::Group { ref hir, .. }) => {
union_required(&**hir, lits);
}
HirKind::Repetition(ref x) => {
match x.kind {
hir::RepetitionKind::ZeroOrOne => lits.cut(),
hir::RepetitionKind::ZeroOrMore => lits.cut(),
hir::RepetitionKind::OneOrMore => {
union_required(&x.hir, lits);
lits.cut();
}
hir::RepetitionKind::Range(ref rng) => {
let (min, max) = match *rng {
hir::RepetitionRange::Exactly(m) => (m, Some(m)),
hir::RepetitionRange::AtLeast(m) => (m, None),
hir::RepetitionRange::Bounded(m, n) => (m, Some(n)),
};
repeat_range_literals(
&x.hir, min, max, x.greedy, lits, union_required);
}
}
}
HirKind::Concat(ref es) if es.is_empty() => {}
HirKind::Concat(ref es) if es.len() == 1 => {
union_required(&es[0], lits)
}
HirKind::Concat(ref es) => {
for e in es {
let mut lits2 = lits.to_empty();
union_required(e, &mut lits2);
if lits2.is_empty() {
lits.cut();
continue;
}
if lits2.contains_empty() {
lits.cut();
}
if !lits.cross_product(&lits2) {
// If this expression couldn't yield any literal that
// could be extended, then we need to quit. Since we're
// short-circuiting, we also need to freeze every member.
lits.cut();
break;
}
}
}
HirKind::Alternation(ref es) => {
alternate_literals(es, lits, union_required);
}
_ => lits.cut(),
}
}
fn repeat_range_literals<F: FnMut(&Hir, &mut Literals)>(
e: &Hir,
min: u32,
max: Option<u32>,
_greedy: bool,
lits: &mut Literals,
mut f: F,
) {
if min == 0 {
// This is a bit conservative. If `max` is set, then we could
// treat this as a finite set of alternations. For now, we
// just treat it as `e*`.
lits.cut();
} else {
let n = cmp::min(lits.limit_size(), min as usize);
// We only extract literals from a single repetition, even though
// we could do more. e.g., `a{3}` will have `a` extracted instead of
// `aaa`. The reason is that inner literal extraction can't be unioned
// across repetitions. e.g., extracting `foofoofoo` from `(\w+foo){3}`
// is wrong.
f(e, lits);
if n < min as usize {
lits.cut();
}
if max.map_or(true, |max| min < max) {
lits.cut();
}
}
}
fn alternate_literals<F: FnMut(&Hir, &mut Literals)>(
es: &[Hir],
lits: &mut Literals,
mut f: F,
) {
let mut lits2 = lits.to_empty();
for e in es {
let mut lits3 = lits.to_empty();
lits3.set_limit_size(lits.limit_size() / 5);
f(e, &mut lits3);
if lits3.is_empty() || !lits2.union(lits3) {
// If we couldn't find suffixes for *any* of the
// alternates, then the entire alternation has to be thrown
// away and any existing members must be frozen. Similarly,
// if the union couldn't complete, stop and freeze.
lits.cut();
return;
}
}
// All we do at the moment is look for prefixes and suffixes. If both
// are empty, then we report nothing. We should be able to do better than
// this, but we'll need something more expressive than just a "set of
// literals."
let lcp = lits2.longest_common_prefix();
let lcs = lits2.longest_common_suffix();
if !lcp.is_empty() {
lits.cross_add(lcp);
}
lits.cut();
if !lcs.is_empty() {
lits.add(Literal::empty());
lits.add(Literal::new(lcs.to_vec()));
}
}
/// Return the number of characters in the given class.
fn count_unicode_class(cls: &hir::ClassUnicode) -> u32 {
cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum()
}
/// Return the number of bytes in the given class.
fn count_byte_class(cls: &hir::ClassBytes) -> u32 {
cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum()
}
/// Converts an arbitrary sequence of bytes to a literal suitable for building
/// a regular expression.
fn bytes_to_regex(bs: &[u8]) -> String {
let mut s = String::with_capacity(bs.len());
for &b in bs {
s.push_str(&format!("\\x{:02x}", b));
}
s
}
/// Converts arbitrary bytes to a nice string.
fn show(bs: &[u8]) -> String {
// Why aren't we using this to feed to the regex? Doesn't really matter
// I guess. ---AG
use std::ascii::escape_default;
use std::str;
let mut nice = String::new();
for &b in bs {
let part: Vec<u8> = escape_default(b).collect();
nice.push_str(str::from_utf8(&part).unwrap());
}
nice
}

View File

@ -1,74 +0,0 @@
use syntax::hir::{self, Hir, HirKind};
use {Error, Result};
/// Returns a new expression that is guaranteed to never match the given
/// ASCII character.
///
/// If the expression contains the literal byte, then an error is returned.
///
/// If `byte` is not an ASCII character (i.e., greater than `0x7F`), then this
/// function panics.
pub fn remove(expr: Hir, byte: u8) -> Result<Hir> {
assert!(byte <= 0x7F);
let chr = byte as char;
assert!(chr.len_utf8() == 1);
Ok(match expr.into_kind() {
HirKind::Empty => Hir::empty(),
HirKind::Literal(hir::Literal::Unicode(c)) => {
if c == chr {
return Err(Error::LiteralNotAllowed(chr));
}
Hir::literal(hir::Literal::Unicode(c))
}
HirKind::Literal(hir::Literal::Byte(b)) => {
if b as char == chr {
return Err(Error::LiteralNotAllowed(chr));
}
Hir::literal(hir::Literal::Byte(b))
}
HirKind::Class(hir::Class::Unicode(mut cls)) => {
let remove = hir::ClassUnicode::new(Some(
hir::ClassUnicodeRange::new(chr, chr),
));
cls.difference(&remove);
if cls.iter().next().is_none() {
return Err(Error::LiteralNotAllowed(chr));
}
Hir::class(hir::Class::Unicode(cls))
}
HirKind::Class(hir::Class::Bytes(mut cls)) => {
let remove = hir::ClassBytes::new(Some(
hir::ClassBytesRange::new(byte, byte),
));
cls.difference(&remove);
if cls.iter().next().is_none() {
return Err(Error::LiteralNotAllowed(chr));
}
Hir::class(hir::Class::Bytes(cls))
}
HirKind::Anchor(x) => Hir::anchor(x),
HirKind::WordBoundary(x) => Hir::word_boundary(x),
HirKind::Repetition(mut x) => {
x.hir = Box::new(remove(*x.hir, byte)?);
Hir::repetition(x)
}
HirKind::Group(mut x) => {
x.hir = Box::new(remove(*x.hir, byte)?);
Hir::group(x)
}
HirKind::Concat(xs) => {
let xs = xs.into_iter()
.map(|e| remove(e, byte))
.collect::<Result<Vec<Hir>>>()?;
Hir::concat(xs)
}
HirKind::Alternation(xs) => {
let xs = xs.into_iter()
.map(|e| remove(e, byte))
.collect::<Result<Vec<Hir>>>()?;
Hir::alternation(xs)
}
})
}

View File

@ -1,317 +0,0 @@
use memchr::{memchr, memrchr};
use syntax::ParserBuilder;
use syntax::hir::Hir;
use regex::bytes::{Regex, RegexBuilder};
use literals::LiteralSets;
use nonl;
use smart_case::Cased;
use word_boundary::strip_unicode_word_boundaries;
use Result;
/// A matched line.
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct Match {
start: usize,
end: usize,
}
impl Match {
/// Create a new empty match value.
pub fn new() -> Match {
Match::default()
}
/// Return the starting byte offset of the line that matched.
#[inline]
pub fn start(&self) -> usize {
self.start
}
/// Return the ending byte offset of the line that matched.
#[inline]
pub fn end(&self) -> usize {
self.end
}
}
/// A fast line oriented regex searcher.
#[derive(Clone, Debug)]
pub struct Grep {
re: Regex,
required: Option<Regex>,
opts: Options,
}
/// A builder for a grep searcher.
#[derive(Clone, Debug)]
pub struct GrepBuilder {
pattern: String,
opts: Options,
}
#[derive(Clone, Debug)]
struct Options {
case_insensitive: bool,
case_smart: bool,
line_terminator: u8,
size_limit: usize,
dfa_size_limit: usize,
}
impl Default for Options {
fn default() -> Options {
Options {
case_insensitive: false,
case_smart: false,
line_terminator: b'\n',
size_limit: 10 * (1 << 20),
dfa_size_limit: 10 * (1 << 20),
}
}
}
impl GrepBuilder {
/// Create a new builder for line searching.
///
/// The pattern given should be a regular expression. The precise syntax
/// supported is documented on the regex crate.
pub fn new(pattern: &str) -> GrepBuilder {
GrepBuilder {
pattern: pattern.to_string(),
opts: Options::default(),
}
}
/// Set the line terminator.
///
/// The line terminator can be any ASCII character and serves to delineate
/// the match boundaries in the text searched.
///
/// This panics if `ascii_byte` is greater than `0x7F` (i.e., not ASCII).
pub fn line_terminator(mut self, ascii_byte: u8) -> GrepBuilder {
assert!(ascii_byte <= 0x7F);
self.opts.line_terminator = ascii_byte;
self
}
/// Set the case sensitive flag (`i`) on the regex.
pub fn case_insensitive(mut self, yes: bool) -> GrepBuilder {
self.opts.case_insensitive = yes;
self
}
/// Whether to enable smart case search or not (disabled by default).
///
/// Smart case uses case insensitive search if the pattern contains only
/// lowercase characters (ignoring any characters which immediately follow
/// a '\'). Otherwise, a case sensitive search is used instead.
///
/// Enabling the case_insensitive flag overrides this.
pub fn case_smart(mut self, yes: bool) -> GrepBuilder {
self.opts.case_smart = yes;
self
}
/// Set the approximate size limit of the compiled regular expression.
///
/// This roughly corresponds to the number of bytes occupied by a
/// single compiled program. If the program exceeds this number, then a
/// compilation error is returned.
pub fn size_limit(mut self, limit: usize) -> GrepBuilder {
self.opts.size_limit = limit;
self
}
/// Set the approximate size of the cache used by the DFA.
///
/// This roughly corresponds to the number of bytes that the DFA will use
/// while searching.
///
/// Note that this is a per thread limit. There is no way to set a global
/// limit. In particular, if a regex is used from multiple threads
/// simulanteously, then each thread may use up to the number of bytes
/// specified here.
pub fn dfa_size_limit(mut self, limit: usize) -> GrepBuilder {
self.opts.dfa_size_limit = limit;
self
}
/// Create a line searcher.
///
/// If there was a problem parsing or compiling the regex with the given
/// options, then an error is returned.
pub fn build(self) -> Result<Grep> {
let expr = self.parse()?;
let literals = LiteralSets::create(&expr);
let re = self.regex(&expr)?;
let required = match literals.to_regex_builder() {
Some(builder) => Some(self.regex_build(builder)?),
None => {
match strip_unicode_word_boundaries(&expr) {
None => None,
Some(expr) => {
debug!("Stripped Unicode word boundaries. \
New AST:\n{:?}", expr);
self.regex(&expr).ok()
}
}
}
};
Ok(Grep {
re: re,
required: required,
opts: self.opts,
})
}
/// Creates a new regex from the given expression with the current
/// configuration.
fn regex(&self, expr: &Hir) -> Result<Regex> {
let mut builder = RegexBuilder::new(&expr.to_string());
builder.unicode(true);
self.regex_build(builder)
}
/// Builds a new regex from the given builder using the caller's settings.
fn regex_build(&self, mut builder: RegexBuilder) -> Result<Regex> {
builder
.multi_line(true)
.size_limit(self.opts.size_limit)
.dfa_size_limit(self.opts.dfa_size_limit)
.build()
.map_err(From::from)
}
/// Parses the underlying pattern and ensures the pattern can never match
/// the line terminator.
fn parse(&self) -> Result<Hir> {
let expr = ParserBuilder::new()
.allow_invalid_utf8(true)
.case_insensitive(self.is_case_insensitive()?)
.multi_line(true)
.build()
.parse(&self.pattern)?;
debug!("original regex HIR pattern:\n{}", expr);
let expr = nonl::remove(expr, self.opts.line_terminator)?;
debug!("transformed regex HIR pattern:\n{}", expr);
Ok(expr)
}
/// Determines whether the case insensitive flag should be enabled or not.
fn is_case_insensitive(&self) -> Result<bool> {
if self.opts.case_insensitive {
return Ok(true);
}
if !self.opts.case_smart {
return Ok(false);
}
let cased = match Cased::from_pattern(&self.pattern) {
None => return Ok(false),
Some(cased) => cased,
};
Ok(cased.any_literal && !cased.any_uppercase)
}
}
impl Grep {
/// Returns a reference to the underlying regex used by the searcher.
pub fn regex(&self) -> &Regex {
&self.re
}
/// Returns an iterator over all matches in the given buffer.
pub fn iter<'b, 's>(&'s self, buf: &'b [u8]) -> Iter<'b, 's> {
Iter {
searcher: self,
buf: buf,
start: 0,
}
}
/// Fills in the next line that matches in the given buffer starting at
/// the position given.
///
/// If no match could be found, `false` is returned, otherwise, `true` is
/// returned.
pub fn read_match(
&self,
mat: &mut Match,
buf: &[u8],
mut start: usize,
) -> bool {
if start >= buf.len() {
return false;
}
if let Some(ref req) = self.required {
while start < buf.len() {
let e = match req.shortest_match(&buf[start..]) {
None => return false,
Some(e) => start + e,
};
let (prevnl, nextnl) = self.find_line(buf, e, e);
match self.re.shortest_match(&buf[prevnl..nextnl]) {
None => {
start = nextnl;
continue;
}
Some(_) => {
self.fill_match(mat, prevnl, nextnl);
return true;
}
}
}
false
} else {
let e = match self.re.shortest_match(&buf[start..]) {
None => return false,
Some(e) => start + e,
};
let (s, e) = self.find_line(buf, e, e);
self.fill_match(mat, s, e);
true
}
}
fn fill_match(&self, mat: &mut Match, start: usize, end: usize) {
mat.start = start;
mat.end = end;
}
fn find_line(&self, buf: &[u8], s: usize, e: usize) -> (usize, usize) {
(self.find_line_start(buf, s), self.find_line_end(buf, e))
}
fn find_line_start(&self, buf: &[u8], pos: usize) -> usize {
memrchr(self.opts.line_terminator, &buf[0..pos]).map_or(0, |i| i + 1)
}
fn find_line_end(&self, buf: &[u8], pos: usize) -> usize {
memchr(self.opts.line_terminator, &buf[pos..])
.map_or(buf.len(), |i| pos + i + 1)
}
}
/// An iterator over all matches in a particular buffer.
///
/// `'b` refers to the lifetime of the buffer, and `'s` refers to the lifetime
/// of the searcher.
pub struct Iter<'b, 's> {
searcher: &'s Grep,
buf: &'b [u8],
start: usize,
}
impl<'b, 's> Iterator for Iter<'b, 's> {
type Item = Match;
fn next(&mut self) -> Option<Match> {
let mut mat = Match::default();
if !self.searcher.read_match(&mut mat, self.buf, self.start) {
self.start = self.buf.len();
return None;
}
self.start = mat.end;
Some(mat)
}
}

View File

@ -1,191 +0,0 @@
use syntax::ast::{self, Ast};
use syntax::ast::parse::Parser;
/// The results of analyzing a regex for cased literals.
#[derive(Clone, Debug, Default)]
pub struct Cased {
/// True if and only if a literal uppercase character occurs in the regex.
///
/// A regex like `\pL` contains no uppercase literals, even though `L`
/// is uppercase and the `\pL` class contains uppercase characters.
pub any_uppercase: bool,
/// True if and only if the regex contains any literal at all. A regex like
/// `\pL` has this set to false.
pub any_literal: bool,
}
impl Cased {
/// Returns a `Cased` value by doing analysis on the AST of `pattern`.
///
/// If `pattern` is not a valid regular expression, then `None` is
/// returned.
pub fn from_pattern(pattern: &str) -> Option<Cased> {
Parser::new()
.parse(pattern)
.map(|ast| Cased::from_ast(&ast))
.ok()
}
fn from_ast(ast: &Ast) -> Cased {
let mut cased = Cased::default();
cased.from_ast_impl(ast);
cased
}
fn from_ast_impl(&mut self, ast: &Ast) {
if self.done() {
return;
}
match *ast {
Ast::Empty(_)
| Ast::Flags(_)
| Ast::Dot(_)
| Ast::Assertion(_)
| Ast::Class(ast::Class::Unicode(_))
| Ast::Class(ast::Class::Perl(_)) => {}
Ast::Literal(ref x) => {
self.from_ast_literal(x);
}
Ast::Class(ast::Class::Bracketed(ref x)) => {
self.from_ast_class_set(&x.kind);
}
Ast::Repetition(ref x) => {
self.from_ast_impl(&x.ast);
}
Ast::Group(ref x) => {
self.from_ast_impl(&x.ast);
}
Ast::Alternation(ref alt) => {
for x in &alt.asts {
self.from_ast_impl(x);
}
}
Ast::Concat(ref alt) => {
for x in &alt.asts {
self.from_ast_impl(x);
}
}
}
}
fn from_ast_class_set(&mut self, ast: &ast::ClassSet) {
if self.done() {
return;
}
match *ast {
ast::ClassSet::Item(ref item) => {
self.from_ast_class_set_item(item);
}
ast::ClassSet::BinaryOp(ref x) => {
self.from_ast_class_set(&x.lhs);
self.from_ast_class_set(&x.rhs);
}
}
}
fn from_ast_class_set_item(&mut self, ast: &ast::ClassSetItem) {
if self.done() {
return;
}
match *ast {
ast::ClassSetItem::Empty(_)
| ast::ClassSetItem::Ascii(_)
| ast::ClassSetItem::Unicode(_)
| ast::ClassSetItem::Perl(_) => {}
ast::ClassSetItem::Literal(ref x) => {
self.from_ast_literal(x);
}
ast::ClassSetItem::Range(ref x) => {
self.from_ast_literal(&x.start);
self.from_ast_literal(&x.end);
}
ast::ClassSetItem::Bracketed(ref x) => {
self.from_ast_class_set(&x.kind);
}
ast::ClassSetItem::Union(ref union) => {
for x in &union.items {
self.from_ast_class_set_item(x);
}
}
}
}
fn from_ast_literal(&mut self, ast: &ast::Literal) {
self.any_literal = true;
self.any_uppercase = self.any_uppercase || ast.c.is_uppercase();
}
/// Returns true if and only if the attributes can never change no matter
/// what other AST it might see.
fn done(&self) -> bool {
self.any_uppercase && self.any_literal
}
}
#[cfg(test)]
mod tests {
use super::*;
fn cased(pattern: &str) -> Cased {
Cased::from_pattern(pattern).unwrap()
}
#[test]
fn various() {
let x = cased("");
assert!(!x.any_uppercase);
assert!(!x.any_literal);
let x = cased("foo");
assert!(!x.any_uppercase);
assert!(x.any_literal);
let x = cased("Foo");
assert!(x.any_uppercase);
assert!(x.any_literal);
let x = cased("foO");
assert!(x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo\\");
assert!(!x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo\w");
assert!(!x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo\S");
assert!(!x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo\p{Ll}");
assert!(!x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo[a-z]");
assert!(!x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo[A-Z]");
assert!(x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo[\S\t]");
assert!(!x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"foo\\S");
assert!(x.any_uppercase);
assert!(x.any_literal);
let x = cased(r"\p{Ll}");
assert!(!x.any_uppercase);
assert!(!x.any_literal);
let x = cased(r"aBc\w");
assert!(x.any_uppercase);
assert!(x.any_literal);
}
}

View File

@ -1,53 +0,0 @@
use syntax::hir::{self, Hir, HirKind};
/// Strips Unicode word boundaries from the given expression.
///
/// The key invariant this maintains is that the expression returned will match
/// *at least* every where the expression given will match. Namely, a match of
/// the returned expression can report false positives but it will never report
/// false negatives.
///
/// If no word boundaries could be stripped, then None is returned.
pub fn strip_unicode_word_boundaries(expr: &Hir) -> Option<Hir> {
// The real reason we do this is because Unicode word boundaries are the
// one thing that Rust's regex DFA engine can't handle. When it sees a
// Unicode word boundary among non-ASCII text, it falls back to one of the
// slower engines. We work around this limitation by attempting to use
// a regex to find candidate matches without a Unicode word boundary. We'll
// only then use the full (and slower) regex to confirm a candidate as a
// match or not during search.
//
// It looks like we only check the outer edges for `\b`? I guess this is
// an attempt to optimize for the `-w/--word-regexp` flag? ---AG
match *expr.kind() {
HirKind::Concat(ref es) if !es.is_empty() => {
let first = is_unicode_word_boundary(&es[0]);
let last = is_unicode_word_boundary(es.last().unwrap());
// Be careful not to strip word boundaries if there are no other
// expressions to match.
match (first, last) {
(true, false) if es.len() > 1 => {
Some(Hir::concat(es[1..].to_vec()))
}
(false, true) if es.len() > 1 => {
Some(Hir::concat(es[..es.len() - 1].to_vec()))
}
(true, true) if es.len() > 2 => {
Some(Hir::concat(es[1..es.len() - 1].to_vec()))
}
_ => None,
}
}
_ => None,
}
}
/// Returns true if the given expression is a Unicode word boundary.
fn is_unicode_word_boundary(expr: &Hir) -> bool {
match *expr.kind() {
HirKind::WordBoundary(hir::WordBoundary::Unicode) => true,
HirKind::WordBoundary(hir::WordBoundary::UnicodeNegate) => true,
HirKind::Group(ref x) => is_unicode_word_boundary(&x.hir),
_ => false,
}
}

View File

@ -1,3 +0,0 @@
This project is dual-licensed under the Unlicense and MIT licenses.
You may use this code under the terms of either license.

View File

@ -1,23 +0,0 @@
[package]
name = "grep2"
version = "0.2.0" #:version
authors = ["Andrew Gallant <jamslam@gmail.com>"]
description = """
Fast line oriented regex searching as a library.
"""
documentation = "http://burntsushi.net/rustdoc/grep/"
homepage = "https://github.com/BurntSushi/ripgrep"
repository = "https://github.com/BurntSushi/ripgrep"
readme = "README.md"
keywords = ["regex", "grep", "egrep", "search", "pattern"]
license = "Unlicense/MIT"
[dependencies]
grep-matcher = { version = "0.0.1", path = "../grep-matcher" }
grep-printer = { version = "0.0.1", path = "../grep-printer" }
grep-regex = { version = "0.0.1", path = "../grep-regex" }
grep-searcher = { version = "0.0.1", path = "../grep-searcher" }
[features]
avx-accel = ["grep-searcher/avx-accel"]
simd-accel = ["grep-searcher/simd-accel"]

View File

@ -1,21 +0,0 @@
The MIT License (MIT)
Copyright (c) 2015 Andrew Gallant
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@ -1,4 +0,0 @@
grep
----
This is a *library* that provides grep-style line-by-line regex searching (with
comparable performance to `grep` itself).

View File

@ -1,24 +0,0 @@
This is free and unencumbered software released into the public domain.
Anyone is free to copy, modify, publish, use, compile, sell, or
distribute this software, either in source code form or as a compiled
binary, for any purpose, commercial or non-commercial, and by any
means.
In jurisdictions that recognize copyright laws, the author or authors
of this software dedicate any and all copyright interest in the
software to the public domain. We make this dedication for the benefit
of the public at large and to the detriment of our heirs and
successors. We intend this dedication to be an overt act of
relinquishment in perpetuity of all present and future rights to this
software under copyright law.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
For more information, please refer to <http://unlicense.org/>

View File

@ -1,10 +0,0 @@
/*!
TODO.
*/
#![deny(missing_docs)]
pub extern crate grep_matcher as matcher;
pub extern crate grep_printer as printer;
pub extern crate grep_regex as regex;
pub extern crate grep_searcher as searcher;

View File

@ -26,7 +26,7 @@ memchr = "2"
regex = "1"
same-file = "1"
thread_local = "0.3.2"
walkdir = "2"
walkdir = "2.2.0"
[target.'cfg(windows)'.dependencies.winapi]
version = "0.3"

View File

@ -4,7 +4,7 @@ The ignore crate provides a fast recursive directory iterator that respects
various filters such as globs, file types and `.gitignore` files. This crate
also provides lower level direct access to gitignore and file type matchers.
[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.png)](https://travis-ci.org/BurntSushi/ripgrep)
[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep)
[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep)
[![](https://img.shields.io/crates/v/ignore.svg)](https://crates.io/crates/ignore)

View File

@ -1,5 +1,3 @@
#![allow(dead_code, unused_imports, unused_mut, unused_variables)]
extern crate crossbeam;
extern crate ignore;
extern crate walkdir;
@ -8,7 +6,6 @@ use std::env;
use std::io::{self, Write};
use std::path::Path;
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::thread;
use crossbeam::sync::MsQueue;
@ -48,13 +45,11 @@ fn main() {
})
});
} else if simple {
let mut stdout = io::BufWriter::new(io::stdout());
let walker = WalkDir::new(path);
for result in walker {
queue.push(Some(DirEntry::X(result.unwrap())));
}
} else {
let mut stdout = io::BufWriter::new(io::stdout());
let walker = WalkBuilder::new(path).build();
for result in walker {
queue.push(Some(DirEntry::Y(result.unwrap())));

View File

@ -2,8 +2,8 @@
// including some light validation.
//
// This module is purposely written in a bare-bones way, since it is included
// in ripgrep's build.rs file as a way to generate completion files for common
// shells.
// in ripgrep's build.rs file as a way to generate a man page and completion
// files for common shells.
//
// The only other place that ripgrep deals with clap is in src/args.rs, which
// is where we read clap's configuration from the end user's arguments and turn
@ -82,7 +82,34 @@ pub fn app() -> App<'static, 'static> {
/// the RIPGREP_BUILD_GIT_HASH env var is inspect for it. If that isn't set,
/// then a revision hash is not included in the version string returned.
pub fn long_version(revision_hash: Option<&str>) -> String {
// Let's say whether faster CPU instructions are enabled or not.
// Do we have a git hash?
// (Yes, if ripgrep was built on a machine with `git` installed.)
let hash = match revision_hash.or(option_env!("RIPGREP_BUILD_GIT_HASH")) {
None => String::new(),
Some(githash) => format!(" (rev {})", githash),
};
// Put everything together.
let runtime = runtime_cpu_features();
if runtime.is_empty() {
format!(
"{}{}\n{} (compiled)",
crate_version!(),
hash,
compile_cpu_features().join(" ")
)
} else {
format!(
"{}{}\n{} (compiled)\n{} (runtime)",
crate_version!(),
hash,
compile_cpu_features().join(" "),
runtime.join(" ")
)
}
}
/// Returns the relevant CPU features enabled at compile time.
fn compile_cpu_features() -> Vec<&'static str> {
let mut features = vec![];
if cfg!(feature = "simd-accel") {
features.push("+SIMD");
@ -94,14 +121,33 @@ pub fn long_version(revision_hash: Option<&str>) -> String {
} else {
features.push("-AVX");
}
// Do we have a git hash?
// (Yes, if ripgrep was built on a machine with `git` installed.)
let hash = match revision_hash.or(option_env!("RIPGREP_BUILD_GIT_HASH")) {
None => String::new(),
Some(githash) => format!(" (rev {})", githash),
};
// Put everything together.
format!("{}{}\n{}", crate_version!(), hash, features.join(" "))
features
}
/// Returns the relevant CPU features enabled at runtime.
#[cfg(all(ripgrep_runtime_cpu, target_arch = "x86_64"))]
fn runtime_cpu_features() -> Vec<&'static str> {
// This is kind of a dirty violation of abstraction, since it assumes
// knowledge about what specific SIMD features are being used.
let mut features = vec![];
if is_x86_feature_detected!("ssse3") {
features.push("+SIMD");
} else {
features.push("-SIMD");
}
if is_x86_feature_detected!("avx2") {
features.push("+AVX");
} else {
features.push("-AVX");
}
features
}
/// Returns the relevant CPU features enabled at runtime.
#[cfg(not(all(ripgrep_runtime_cpu, target_arch = "x86_64")))]
fn runtime_cpu_features() -> Vec<&'static str> {
vec![]
}
/// Arg is a light alias for a clap::Arg that is specialized to compile time
@ -478,7 +524,7 @@ impl RGArg {
}
}
// We add an extra space to long descriptions so that a black line is inserted
// We add an extra space to long descriptions so that a blank line is inserted
// between flag descriptions in --help output.
macro_rules! long {
($lit:expr) => { concat!($lit, " ") }
@ -502,6 +548,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_context_separator(&mut args);
flag_count(&mut args);
flag_count_matches(&mut args);
flag_crlf(&mut args);
flag_debug(&mut args);
flag_dfa_size_limit(&mut args);
flag_encoding(&mut args);
@ -518,6 +565,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_ignore_case(&mut args);
flag_ignore_file(&mut args);
flag_invert_match(&mut args);
flag_json(&mut args);
flag_line_number(&mut args);
flag_line_regexp(&mut args);
flag_max_columns(&mut args);
@ -525,6 +573,8 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_max_depth(&mut args);
flag_max_filesize(&mut args);
flag_mmap(&mut args);
flag_multiline(&mut args);
flag_multiline_dotall(&mut args);
flag_no_config(&mut args);
flag_no_ignore(&mut args);
flag_no_ignore_global(&mut args);
@ -533,9 +583,12 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_no_ignore_vcs(&mut args);
flag_no_messages(&mut args);
flag_null(&mut args);
flag_null_data(&mut args);
flag_only_matching(&mut args);
flag_path_separator(&mut args);
flag_passthru(&mut args);
flag_pcre2(&mut args);
flag_pcre2_unicode(&mut args);
flag_pre(&mut args);
flag_pretty(&mut args);
flag_quiet(&mut args);
@ -548,6 +601,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
flag_stats(&mut args);
flag_text(&mut args);
flag_threads(&mut args);
flag_trim(&mut args);
flag_type(&mut args);
flag_type_add(&mut args);
flag_type_clear(&mut args);
@ -809,14 +863,53 @@ This overrides the --count flag. Note that when --count is combined with
args.push(arg);
}
fn flag_crlf(args: &mut Vec<RGArg>) {
const SHORT: &str = "Support CRLF line terminators (useful on Windows).";
const LONG: &str = long!("\
When enabled, ripgrep will treat CRLF ('\\r\\n') as a line terminator instead
of just '\\n'.
Principally, this permits '$' in regex patterns to match just before CRLF
instead of just before LF. The underlying regex engine may not support this
natively, so ripgrep will translate all instances of '$' to '(?:\\r??$)'. This
may produce slightly different than desired match offsets. It is intended as a
work-around until the regex engine supports this natively.
CRLF support can be disabled with --no-crlf.
");
let arg = RGArg::switch("crlf")
.help(SHORT).long_help(LONG)
.overrides("no-crlf")
.overrides("null-data");
args.push(arg);
let arg = RGArg::switch("no-crlf")
.hidden()
.overrides("crlf");
args.push(arg);
}
fn flag_debug(args: &mut Vec<RGArg>) {
const SHORT: &str = "Show debug messages.";
const LONG: &str = long!("\
Show debug messages. Please use this when filing a bug report.
The --debug flag is generally useful for figuring out why ripgrep skipped
searching a particular file. The debug messages should mention all files
skipped and why they were skipped.
To get even more debug output, use the --trace flag, which implies --debug
along with additional trace data. With --trace, the output could be quite
large and is generally more useful for development.
");
let arg = RGArg::switch("debug")
.help(SHORT).long_help(LONG);
args.push(arg);
let arg = RGArg::switch("trace")
.hidden()
.overrides("debug");
args.push(arg);
}
fn flag_dfa_size_limit(args: &mut Vec<RGArg>) {
@ -842,10 +935,17 @@ default value is 'auto', which will cause ripgrep to do a best effort automatic
detection of encoding on a per-file basis. Other supported values can be found
in the list of labels here:
https://encoding.spec.whatwg.org/#concept-encoding-get
This flag can be disabled with --no-encoding.
");
let arg = RGArg::flag("encoding", "ENCODING").short("E")
.help(SHORT).long_help(LONG);
args.push(arg);
let arg = RGArg::switch("no-encoding")
.hidden()
.overrides("encoding");
args.push(arg);
}
fn flag_file(args: &mut Vec<RGArg>) {
@ -1071,6 +1171,66 @@ Invert matching. Show lines that do not match the given patterns.
args.push(arg);
}
fn flag_json(args: &mut Vec<RGArg>) {
const SHORT: &str = "Show search results in a JSON Lines format.";
const LONG: &str = long!("\
Enable printing results in a JSON Lines format.
When this flag is provided, ripgrep will emit a sequence of messages, each
encoded as a JSON object, where there are five different message types:
**begin** - A message that indicates a file is being searched and contains at
least one match.
**end** - A message the indicates a file is done being searched. This message
also include summary statistics about the search for a particular file.
**match** - A message that indicates a match was found. This includes the text
and offsets of the match.
**context** - A message that indicates a contextual line was found. This
includes the text of the line, along with any match information if the search
was inverted.
**summary** - The final message emitted by ripgrep that contains summary
statistics about the search across all files.
Since file paths or the contents of files are not guaranteed to be valid UTF-8
and JSON itself must be representable by a Unicode encoding, ripgrep will emit
all data elements as objects with one of two keys: 'text' or 'bytes'. 'text' is
a normal JSON string when the data is valid UTF-8 while 'bytes' is the base64
encoded contents of the data.
The JSON Lines format is only supported for showing search results. It cannot
be used with other flags that emit other types of output, such as --files,
--files-with-matches, --files-without-match, --count or --count-matches.
ripgrep will report an error if any of the aforementioned flags are used in
concert with --json.
Other flags that control aspects of the standard output such as
--only-matching, --heading, --replace, --max-columns, etc., have no effect
when --json is set.
A more complete description of the JSON format used can be found here:
https://docs.rs/grep-printer/*/grep_printer/struct.JSON.html
The JSON Lines format can be disabled with --no-json.
");
let arg = RGArg::switch("json")
.help(SHORT).long_help(LONG)
.overrides("no-json")
.conflicts(&[
"count", "count-matches",
"files", "files-with-matches", "files-without-match",
]);
args.push(arg);
let arg = RGArg::switch("no-json")
.hidden()
.overrides("json");
args.push(arg);
}
fn flag_line_number(args: &mut Vec<RGArg>) {
const SHORT: &str = "Show line numbers.";
const LONG: &str = long!("\
@ -1198,6 +1358,79 @@ This flag overrides --mmap.
args.push(arg);
}
fn flag_multiline(args: &mut Vec<RGArg>) {
const SHORT: &str = "Enable matching across multiple lines.";
const LONG: &str = long!("\
Enable matching across multiple lines.
When multiline mode is enabled, ripgrep will lift the restriction that a match
cannot include a line terminator. For example, when multiline mode is not
enabled (the default), then the regex '\\p{any}' will match any Unicode
codepoint other than '\\n'. Similarly, the regex '\\n' is explicitly forbidden,
and if you try to use it, ripgrep will return an error. However, when multiline
mode is enabled, '\\p{any}' will match any Unicode codepoint, including '\\n',
and regexes like '\\n' are permitted.
An important caveat is that multiline mode does not change the match semantics
of '.'. Namely, in most regex matchers, a '.' will by default match any
character other than '\\n', and this is true in ripgrep as well. In order to
make '.' match '\\n', you must enable the \"dot all\" flag inside the regex.
For example, both '(?s).' and '(?s:.)' have the same semantics, where '.' will
match any character, including '\\n'. Alternatively, the '--multiline-dotall'
flag may be passed to make the \"dot all\" behavior the default. This flag only
applies when multiline search is enabled.
There is no limit on the number of the lines that a single match can span.
**WARNING**: Because of how the underlying regex engine works, multiline
searches may be slower than normal line-oriented searches, and they may also
use more memory. In particular, when multiline mode is enabled, ripgrep
requires that each file it searches is laid out contiguously in memory
(either by reading it onto the heap or by memory-mapping it). Things that
cannot be memory-mapped (such as stdin) will be consumed until EOF before
searching can begin. In general, ripgrep will only do these things when
necessary. Specifically, if the --multiline flag is provided but the regex
does not contain patterns that would match '\\n' characters, then ripgrep
will automatically avoid reading each file into memory before searching it.
Nevertheless, if you only care about matches spanning at most one line, then it
is always better to disable multiline mode.
This flag can be disabled with --no-multiline.
");
let arg = RGArg::switch("multiline").short("U")
.help(SHORT).long_help(LONG)
.overrides("no-multiline");
args.push(arg);
let arg = RGArg::switch("no-multiline")
.hidden()
.overrides("multiline");
args.push(arg);
}
fn flag_multiline_dotall(args: &mut Vec<RGArg>) {
const SHORT: &str = "Make '.' match new lines when multiline is enabled.";
const LONG: &str = long!("\
This flag enables \"dot all\" in your regex pattern, which causes '.' to match
newlines when multiline searching is enabled. This flag has no effect if
multiline searching isn't enabled with the --multiline flag.
Normally, a '.' will match any character except newlines. While this behavior
typically isn't relevant for line-oriented matching (since matches can span at
most one line), this can be useful when searching with the -U/--multiline flag.
By default, the multiline mode runs without this flag.
This flag is generally intended to be used in an alias or your ripgrep config
file if you prefer \"dot all\" semantics by default. Note that regardless of
whether this flag is used, \"dot all\" semantics can still be controlled via
inline flags in the regex pattern itself, e.g., '(?s:.)' always enables \"dot
all\" where as '(?-s:.)' always disables \"dot all\".
");
let arg = RGArg::switch("multiline-dotall")
.help(SHORT).long_help(LONG);
args.push(arg);
}
fn flag_no_config(args: &mut Vec<RGArg>) {
const SHORT: &str = "Never read configuration files.";
const LONG: &str = long!("\
@ -1340,6 +1573,29 @@ for use with xargs.
args.push(arg);
}
fn flag_null_data(args: &mut Vec<RGArg>) {
const SHORT: &str = "Use NUL as a line terminator instead of \\n.";
const LONG: &str = long!("\
Enabling this option causes ripgrep to use NUL as a line terminator instead of
the default of '\\n'.
This is useful when searching large binary files that would otherwise have very
long lines if '\\n' were used as the line terminator. In particular, ripgrep
requires that, at a minimum, each line must fit into memory. Use NUL instead
can be a useful stopgap to keep memory requirements low and avoid OOM (out of
memory) conditions.
This is also useful for processing NUL delimited data, such that that emitted
when using ripgrep's -0/--null flag or find's --print0 flag.
Using this flag implies -a/--text.
");
let arg = RGArg::switch("null-data")
.help(SHORT).long_help(LONG)
.overrides("crlf");
args.push(arg);
}
fn flag_only_matching(args: &mut Vec<RGArg>) {
const SHORT: &str = "Print only matches parts of a line.";
const LONG: &str = long!("\
@ -1374,13 +1630,76 @@ the empty string. For example, if you are searching using 'rg foo' then using
'rg \"^|foo\"' instead will emit every line in every file searched, but only
occurrences of 'foo' will be highlighted. This flag enables the same behavior
without needing to modify the pattern.
This flag conflicts with the --only-matching and --replace flags.
");
let arg = RGArg::switch("passthru")
.help(SHORT).long_help(LONG)
.alias("passthrough")
.conflicts(&["only-matching", "replace"]);
.alias("passthrough");
args.push(arg);
}
fn flag_pcre2(args: &mut Vec<RGArg>) {
const SHORT: &str = "Enable PCRE2 matching.";
const LONG: &str = long!("\
When this flag is present, ripgrep will use the PCRE2 regex engine instead of
its default regex engine.
This is generally useful when you want to use features such as look-around
or backreferences.
Note that PCRE2 is an optional ripgrep feature. If PCRE2 wasn't included in
your build of ripgrep, then using this flag will result in ripgrep printing
an error message and exiting.
This flag can be disabled with --no-pcre2.
");
let arg = RGArg::switch("pcre2").short("P")
.help(SHORT).long_help(LONG)
.overrides("no-pcre2");
args.push(arg);
let arg = RGArg::switch("no-pcre2")
.hidden()
.overrides("pcre2");
args.push(arg);
}
fn flag_pcre2_unicode(args: &mut Vec<RGArg>) {
const SHORT: &str = "Enable Unicode mode for PCRE2 matching.";
const LONG: &str = long!("\
When PCRE2 matching is enabled, this flag will enable Unicode mode. If PCRE2
matching is not enabled, then this flag has no effect.
This flag is enabled by default when PCRE2 matching is enabled.
When PCRE2's Unicode mode is enabled several different types of patterns become
Unicode aware. This includes '\\b', '\\B', '\\w', '\\W', '\\d', '\\D', '\\s'
and '\\S'. Similarly, the '.' meta character will match any Unicode codepoint
instead of any byte. Caseless matching will also use Unicode simple case
folding instead of ASCII-only case insensitivity.
Unicode mode in PCRE2 represents a critical trade off in the user experience
of ripgrep. In particular, unlike the default regex engine, PCRE2 does not
support the ability to search possibly invalid UTF-8 with Unicode features
enabled. Instead, PCRE2 *requires* that everything it searches when Unicode
mode is enabled is valid UTF-8. (Or valid UTF-16/UTF-32, but for the purposes
of ripgrep, we only discuss UTF-8.) This means that if you have PCRE2's Unicode
mode enabled and you attempt to search invalid UTF-8, then the search for that
file will hault and print an error. For this reason, when PCRE2's Unicode mode
is enabled, ripgrep will automatically \"fix\" invalid UTF-8 sequences by
replacing them with the Unicode replacement codepoint.
If you would rather see the encoding errors surfaced by PCRE2 when Unicode mode
is enabled, then pass the --no-encoding flag to disable all transcoding.
This flag can be disabled with --no-pcre2-unicode.
");
let arg = RGArg::switch("pcre2-unicode")
.help(SHORT).long_help(LONG);
args.push(arg);
let arg = RGArg::switch("no-pcre2-unicode")
.hidden()
.overrides("pcre2-unicode");
args.push(arg);
}
@ -1592,11 +1911,18 @@ searched, and the time taken for the entire search to complete.
This set of aggregate statistics may expand over time.
Note that this flag has no effect if --files, --files-with-matches or
--files-without-match is passed.");
--files-without-match is passed.
This flag can be disabled with --no-stats.
");
let arg = RGArg::switch("stats")
.help(SHORT).long_help(LONG);
.help(SHORT).long_help(LONG)
.overrides("no-stats");
args.push(arg);
let arg = RGArg::switch("no-stats")
.hidden()
.overrides("stats");
args.push(arg);
}
@ -1639,6 +1965,25 @@ causes ripgrep to choose the thread count using heuristics.
args.push(arg);
}
fn flag_trim(args: &mut Vec<RGArg>) {
const SHORT: &str = "Trim prefixed whitespace from matches.";
const LONG: &str = long!("\
When set, all ASCII whitespace at the beginning of each line printed will be
trimmed.
This flag can be disabled with --no-trim.
");
let arg = RGArg::switch("trim")
.help(SHORT).long_help(LONG)
.overrides("no-trim");
args.push(arg);
let arg = RGArg::switch("no-trim")
.hidden()
.overrides("trim");
args.push(arg);
}
fn flag_type(args: &mut Vec<RGArg>) {
const SHORT: &str = "Only search files matching TYPE.";
const LONG: &str = long!("\

File diff suppressed because it is too large Load Diff

View File

@ -12,10 +12,7 @@ use std::path::{Path, PathBuf};
use Result;
/// Return a sequence of arguments derived from ripgrep rc configuration files.
///
/// If no_messages is false and there was a problem reading a config file,
/// then errors are printed to stderr.
pub fn args(no_messages: bool) -> Vec<OsString> {
pub fn args() -> Vec<OsString> {
let config_path = match env::var_os("RIPGREP_CONFIG_PATH") {
None => return vec![],
Some(config_path) => {
@ -28,20 +25,20 @@ pub fn args(no_messages: bool) -> Vec<OsString> {
let (args, errs) = match parse(&config_path) {
Ok((args, errs)) => (args, errs),
Err(err) => {
if !no_messages {
eprintln!("{}", err);
}
message!("{}", err);
return vec![];
}
};
if !no_messages && !errs.is_empty() {
if !errs.is_empty() {
for err in errs {
eprintln!("{}:{}", config_path.display(), err);
message!("{}:{}", config_path.display(), err);
}
}
debug!(
"{}: arguments loaded from config file: {:?}",
config_path.display(), args);
config_path.display(),
args
);
args
}
@ -59,7 +56,7 @@ fn parse<P: AsRef<Path>>(
let path = path.as_ref();
match File::open(&path) {
Ok(file) => parse_reader(file),
Err(err) => errored!("{}: {}", path.display(), err),
Err(err) => Err(From::from(format!("{}: {}", path.display(), err))),
}
}

View File

@ -34,19 +34,30 @@ impl Log for Logger {
match (record.file(), record.line()) {
(Some(file), Some(line)) => {
eprintln!(
"{}/{}/{}:{}: {}",
record.level(), record.target(),
file, line, record.args());
"{}|{}|{}:{}: {}",
record.level(),
record.target(),
file,
line,
record.args()
);
}
(Some(file), None) => {
eprintln!(
"{}/{}/{}: {}",
record.level(), record.target(), file, record.args());
"{}|{}|{}: {}",
record.level(),
record.target(),
file,
record.args()
);
}
_ => {
eprintln!(
"{}/{}: {}",
record.level(), record.target(), record.args());
"{}|{}: {}",
record.level(),
record.target(),
record.args()
);
}
}
}

View File

@ -1,43 +1,34 @@
extern crate atty;
extern crate bytecount;
#[macro_use]
extern crate clap;
extern crate encoding_rs;
extern crate encoding_rs_io;
extern crate globset;
extern crate grep;
extern crate ignore;
#[macro_use]
extern crate lazy_static;
extern crate libc;
#[macro_use]
extern crate log;
extern crate memchr;
extern crate memmap;
extern crate num_cpus;
extern crate regex;
extern crate same_file;
#[macro_use]
extern crate serde_json;
extern crate termcolor;
#[cfg(windows)]
extern crate winapi;
use std::error::Error;
use std::io;
use std::process;
use std::result;
use std::sync::Arc;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::mpsc;
use std::thread;
use std::time::{Duration, Instant};
use std::sync::{Arc, Mutex};
use std::time::Instant;
use ignore::WalkState;
use args::Args;
use worker::Work;
use subject::Subject;
macro_rules! errored {
($($tt:tt)*) => {
return Err(From::from(format!($($tt)*)));
}
}
#[macro_use]
mod messages;
mod app;
mod args;
@ -45,20 +36,17 @@ mod config;
mod decompressor;
mod preprocessor;
mod logger;
mod pathutil;
mod printer;
mod search_buffer;
mod search_stream;
mod path_printer;
mod search;
mod subject;
mod unescape;
mod worker;
pub type Result<T> = result::Result<T, Box<Error>>;
pub type Result<T> = ::std::result::Result<T, Box<::std::error::Error>>;
fn main() {
reset_sigpipe();
match Args::parse().map(Arc::new).and_then(run) {
Ok(0) => process::exit(1),
Ok(_) => process::exit(0),
pub fn main() {
match Args::parse().and_then(run) {
Ok(true) => process::exit(0),
Ok(false) => process::exit(1),
Err(err) => {
eprintln!("{}", err);
process::exit(2);
@ -66,382 +54,242 @@ fn main() {
}
}
fn run(args: Arc<Args>) -> Result<u64> {
if args.never_match() {
return Ok(0);
}
let threads = args.threads();
if args.files() {
if threads == 1 || args.is_one_path() {
run_files_one_thread(&args)
} else {
run_files_parallel(args)
}
} else if args.type_list() {
run_types(&args)
} else if threads == 1 || args.is_one_path() {
run_one_thread(&args)
} else {
run_parallel(&args)
fn run(args: Args) -> Result<bool> {
use args::Command::*;
match args.command()? {
Search => search(args),
SearchParallel => search_parallel(args),
SearchNever => Ok(false),
Files => files(args),
FilesParallel => files_parallel(args),
Types => types(args),
}
}
fn run_parallel(args: &Arc<Args>) -> Result<u64> {
let start_time = Instant::now();
let bufwtr = Arc::new(args.buffer_writer());
let quiet_matched = args.quiet_matched();
let paths_searched = Arc::new(AtomicUsize::new(0));
let match_line_count = Arc::new(AtomicUsize::new(0));
let paths_matched = Arc::new(AtomicUsize::new(0));
/// The top-level entry point for single-threaded search. This recursively
/// steps through the file list (current directory by default) and searches
/// each file sequentially.
fn search(args: Args) -> Result<bool> {
let started_at = Instant::now();
let quit_after_match = args.quit_after_match()?;
let subject_builder = args.subject_builder();
let mut stats = args.stats()?;
let mut searcher = args.search_worker(args.stdout())?;
let mut matched = false;
args.walker_parallel().run(|| {
let args = Arc::clone(args);
let quiet_matched = quiet_matched.clone();
let paths_searched = paths_searched.clone();
let match_line_count = match_line_count.clone();
let paths_matched = paths_matched.clone();
for result in args.walker()? {
let subject = match subject_builder.build_from_result(result) {
Some(subject) => subject,
None => continue,
};
let search_result = match searcher.search(&subject) {
Ok(search_result) => search_result,
Err(err) => {
// A broken pipe means graceful termination.
if err.kind() == io::ErrorKind::BrokenPipe {
break;
}
message!("{}: {}", subject.path().display(), err);
continue;
}
};
matched = matched || search_result.has_match();
if let Some(ref mut stats) = stats {
*stats += search_result.stats().unwrap();
}
if matched && quit_after_match {
break;
}
}
if let Some(ref stats) = stats {
let elapsed = Instant::now().duration_since(started_at);
// We don't care if we couldn't print this successfully.
let _ = searcher.print_stats(elapsed, stats);
}
Ok(matched)
}
/// The top-level entry point for multi-threaded search. The parallelism is
/// itself achieved by the recursive directory traversal. All we need to do is
/// feed it a worker for performing a search on each file.
fn search_parallel(args: Args) -> Result<bool> {
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering::SeqCst;
let quit_after_match = args.quit_after_match()?;
let started_at = Instant::now();
let subject_builder = Arc::new(args.subject_builder());
let bufwtr = Arc::new(args.buffer_writer()?);
let stats = Arc::new(args.stats()?.map(Mutex::new));
let matched = Arc::new(AtomicBool::new(false));
let mut searcher_err = None;
args.walker_parallel()?.run(|| {
let args = args.clone();
let bufwtr = Arc::clone(&bufwtr);
let mut buf = bufwtr.buffer();
let mut worker = args.worker();
Box::new(move |result| {
use ignore::WalkState::*;
let stats = Arc::clone(&stats);
let matched = Arc::clone(&matched);
let subject_builder = Arc::clone(&subject_builder);
let mut searcher = match args.search_worker(bufwtr.buffer()) {
Ok(searcher) => searcher,
Err(err) => {
searcher_err = Some(err);
return Box::new(move |_| {
WalkState::Quit
});
}
};
if quiet_matched.has_match() {
return Quit;
}
let dent = match get_or_log_dir_entry(
result,
args.stdout_handle(),
args.files(),
args.no_messages(),
args.no_ignore_messages(),
) {
None => return Continue,
Some(dent) => dent,
Box::new(move |result| {
let subject = match subject_builder.build_from_result(result) {
Some(subject) => subject,
None => return WalkState::Continue,
};
paths_searched.fetch_add(1, Ordering::SeqCst);
buf.clear();
{
// This block actually executes the search and prints the
// results into outbuf.
let mut printer = args.printer(&mut buf);
let count =
if dent.is_stdin() {
worker.run(&mut printer, Work::Stdin)
} else {
worker.run(&mut printer, Work::DirEntry(dent))
};
match_line_count.fetch_add(count as usize, Ordering::SeqCst);
if quiet_matched.set_match(count > 0) {
return Quit;
}
if args.stats() && count > 0 {
paths_matched.fetch_add(1, Ordering::SeqCst);
searcher.printer().get_mut().clear();
let search_result = match searcher.search(&subject) {
Ok(search_result) => search_result,
Err(err) => {
message!("{}: {}", subject.path().display(), err);
return WalkState::Continue;
}
};
if search_result.has_match() {
matched.store(true, SeqCst);
}
if let Some(ref locked_stats) = *stats {
let mut stats = locked_stats.lock().unwrap();
*stats += search_result.stats().unwrap();
}
if let Err(err) = bufwtr.print(searcher.printer().get_mut()) {
// A broken pipe means graceful termination.
if err.kind() == io::ErrorKind::BrokenPipe {
return WalkState::Quit;
}
// Otherwise, we continue on our merry way.
message!("{}: {}", subject.path().display(), err);
}
if matched.load(SeqCst) && quit_after_match {
WalkState::Quit
} else {
WalkState::Continue
}
// BUG(burntsushi): We should handle this error instead of ignoring
// it. See: https://github.com/BurntSushi/ripgrep/issues/200
let _ = bufwtr.print(&buf);
Continue
})
});
if !args.paths().is_empty() && paths_searched.load(Ordering::SeqCst) == 0 {
if !args.no_messages() {
eprint_nothing_searched();
}
if let Some(err) = searcher_err.take() {
return Err(err);
}
let match_line_count = match_line_count.load(Ordering::SeqCst) as u64;
let paths_searched = paths_searched.load(Ordering::SeqCst) as u64;
let paths_matched = paths_matched.load(Ordering::SeqCst) as u64;
if args.stats() {
print_stats(
match_line_count,
paths_searched,
paths_matched,
start_time.elapsed(),
);
if let Some(ref locked_stats) = *stats {
let elapsed = Instant::now().duration_since(started_at);
let stats = locked_stats.lock().unwrap();
let mut searcher = args.search_worker(args.stdout())?;
// We don't care if we couldn't print this successfully.
let _ = searcher.print_stats(elapsed, &stats);
}
Ok(match_line_count)
Ok(matched.load(SeqCst))
}
fn run_one_thread(args: &Arc<Args>) -> Result<u64> {
let start_time = Instant::now();
let mut stdout = args.stdout();
let mut worker = args.worker();
let mut paths_searched: u64 = 0;
let mut match_line_count = 0;
let mut paths_matched: u64 = 0;
for result in args.walker() {
let dent = match get_or_log_dir_entry(
result,
args.stdout_handle(),
args.files(),
args.no_messages(),
args.no_ignore_messages(),
) {
/// The top-level entry point for listing files without searching them. This
/// recursively steps through the file list (current directory by default) and
/// prints each path sequentially using a single thread.
fn files(args: Args) -> Result<bool> {
let quit_after_match = args.quit_after_match()?;
let subject_builder = args.subject_builder();
let mut matched = false;
let mut path_printer = args.path_printer(args.stdout())?;
for result in args.walker()? {
let subject = match subject_builder.build_from_result(result) {
Some(subject) => subject,
None => continue,
Some(dent) => dent,
};
let mut printer = args.printer(&mut stdout);
if match_line_count > 0 {
if args.quiet() {
matched = true;
if quit_after_match {
break;
}
if let Err(err) = path_printer.write_path(subject.path()) {
// A broken pipe means graceful termination.
if err.kind() == io::ErrorKind::BrokenPipe {
break;
}
if let Some(sep) = args.file_separator() {
printer = printer.file_separator(sep);
}
}
paths_searched += 1;
let count =
if dent.is_stdin() {
worker.run(&mut printer, Work::Stdin)
} else {
worker.run(&mut printer, Work::DirEntry(dent))
};
match_line_count += count;
if args.stats() && count > 0 {
paths_matched += 1;
// Otherwise, we have some other error that's preventing us from
// writing to stdout, so we should bubble it up.
return Err(err.into());
}
}
if !args.paths().is_empty() && paths_searched == 0 {
if !args.no_messages() {
eprint_nothing_searched();
}
}
if args.stats() {
print_stats(
match_line_count,
paths_searched,
paths_matched,
start_time.elapsed(),
);
}
Ok(match_line_count)
Ok(matched)
}
fn run_files_parallel(args: Arc<Args>) -> Result<u64> {
let print_args = Arc::clone(&args);
let (tx, rx) = mpsc::channel::<ignore::DirEntry>();
let print_thread = thread::spawn(move || {
let mut printer = print_args.printer(print_args.stdout());
let mut file_count = 0;
for dent in rx.iter() {
if !print_args.quiet() {
printer.path(dent.path());
}
file_count += 1;
/// The top-level entry point for listing files without searching them. This
/// recursively steps through the file list (current directory by default) and
/// prints each path sequentially using multiple threads.
fn files_parallel(args: Args) -> Result<bool> {
use std::sync::atomic::AtomicBool;
use std::sync::atomic::Ordering::SeqCst;
use std::sync::mpsc;
use std::thread;
let quit_after_match = args.quit_after_match()?;
let subject_builder = Arc::new(args.subject_builder());
let mut path_printer = args.path_printer(args.stdout())?;
let matched = Arc::new(AtomicBool::new(false));
let (tx, rx) = mpsc::channel::<Subject>();
let print_thread = thread::spawn(move || -> io::Result<()> {
for subject in rx.iter() {
path_printer.write_path(subject.path())?;
}
file_count
Ok(())
});
args.walker_parallel().run(move || {
let args = Arc::clone(&args);
args.walker_parallel()?.run(|| {
let subject_builder = Arc::clone(&subject_builder);
let matched = Arc::clone(&matched);
let tx = tx.clone();
Box::new(move |result| {
if let Some(dent) = get_or_log_dir_entry(
result,
args.stdout_handle(),
args.files(),
args.no_messages(),
args.no_ignore_messages(),
) {
tx.send(dent).unwrap();
if args.quiet() {
return ignore::WalkState::Quit
let subject = match subject_builder.build_from_result(result) {
Some(subject) => subject,
None => return WalkState::Continue,
};
matched.store(true, SeqCst);
if quit_after_match {
WalkState::Quit
} else {
match tx.send(subject) {
Ok(_) => WalkState::Continue,
Err(_) => WalkState::Quit,
}
}
ignore::WalkState::Continue
})
});
Ok(print_thread.join().unwrap())
}
fn run_files_one_thread(args: &Arc<Args>) -> Result<u64> {
let mut printer = args.printer(args.stdout());
let mut file_count = 0;
for result in args.walker() {
let dent = match get_or_log_dir_entry(
result,
args.stdout_handle(),
args.files(),
args.no_messages(),
args.no_ignore_messages(),
) {
None => continue,
Some(dent) => dent,
};
file_count += 1;
if args.quiet() {
break;
} else {
printer.path(dent.path());
drop(tx);
if let Err(err) = print_thread.join().unwrap() {
// A broken pipe means graceful termination, so fall through.
// Otherwise, something bad happened while writing to stdout, so bubble
// it up.
if err.kind() != io::ErrorKind::BrokenPipe {
return Err(err.into());
}
}
Ok(file_count)
Ok(matched.load(SeqCst))
}
fn run_types(args: &Arc<Args>) -> Result<u64> {
let mut printer = args.printer(args.stdout());
let mut ty_count = 0;
for def in args.type_defs() {
printer.type_def(def);
ty_count += 1;
}
Ok(ty_count)
}
/// The top-level entry point for --type-list.
fn types(args: Args) -> Result<bool> {
let mut count = 0;
let mut stdout = args.stdout();
for def in args.type_defs()? {
count += 1;
stdout.write_all(def.name().as_bytes())?;
stdout.write_all(b": ")?;
fn get_or_log_dir_entry(
result: result::Result<ignore::DirEntry, ignore::Error>,
stdout_handle: Option<&same_file::Handle>,
files_only: bool,
no_messages: bool,
no_ignore_messages: bool,
) -> Option<ignore::DirEntry> {
match result {
Err(err) => {
if !no_messages {
eprintln!("{}", err);
let mut first = true;
for glob in def.globs() {
if !first {
stdout.write_all(b", ")?;
}
None
}
Ok(dent) => {
if let Some(err) = dent.error() {
if !no_messages && !no_ignore_messages {
eprintln!("{}", err);
}
}
if dent.file_type().is_none() {
return Some(dent); // entry is stdin
}
// A depth of 0 means the user gave the path explicitly, so we
// should always try to search it.
if dent.depth() == 0 && !ignore_entry_is_dir(&dent) {
return Some(dent);
} else if !ignore_entry_is_file(&dent) {
return None;
}
// If we are redirecting stdout to a file, then don't search that
// file.
if !files_only && is_stdout_file(&dent, stdout_handle, no_messages) {
return None;
}
Some(dent)
stdout.write_all(glob.as_bytes())?;
first = false;
}
stdout.write_all(b"\n")?;
}
}
/// Returns true if and only if the given `ignore::DirEntry` points to a
/// directory.
///
/// This works around a bug in Rust's standard library:
/// https://github.com/rust-lang/rust/issues/46484
#[cfg(windows)]
fn ignore_entry_is_dir(dent: &ignore::DirEntry) -> bool {
use std::os::windows::fs::MetadataExt;
use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY;
dent.metadata().map(|md| {
md.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0
}).unwrap_or(false)
}
/// Returns true if and only if the given `ignore::DirEntry` points to a
/// directory.
#[cfg(not(windows))]
fn ignore_entry_is_dir(dent: &ignore::DirEntry) -> bool {
dent.file_type().map_or(false, |ft| ft.is_dir())
}
/// Returns true if and only if the given `ignore::DirEntry` points to a
/// file.
///
/// This works around a bug in Rust's standard library:
/// https://github.com/rust-lang/rust/issues/46484
#[cfg(windows)]
fn ignore_entry_is_file(dent: &ignore::DirEntry) -> bool {
!ignore_entry_is_dir(dent)
}
/// Returns true if and only if the given `ignore::DirEntry` points to a
/// file.
#[cfg(not(windows))]
fn ignore_entry_is_file(dent: &ignore::DirEntry) -> bool {
dent.file_type().map_or(false, |ft| ft.is_file())
}
fn is_stdout_file(
dent: &ignore::DirEntry,
stdout_handle: Option<&same_file::Handle>,
no_messages: bool,
) -> bool {
let stdout_handle = match stdout_handle {
None => return false,
Some(stdout_handle) => stdout_handle,
};
// If we know for sure that these two things aren't equal, then avoid
// the costly extra stat call to determine equality.
if !maybe_dent_eq_handle(dent, stdout_handle) {
return false;
}
match same_file::Handle::from_path(dent.path()) {
Ok(h) => stdout_handle == &h,
Err(err) => {
if !no_messages {
eprintln!("{}: {}", dent.path().display(), err);
}
false
}
}
}
#[cfg(unix)]
fn maybe_dent_eq_handle(
dent: &ignore::DirEntry,
handle: &same_file::Handle,
) -> bool {
dent.ino() == Some(handle.ino())
}
#[cfg(not(unix))]
fn maybe_dent_eq_handle(_: &ignore::DirEntry, _: &same_file::Handle) -> bool {
true
}
fn eprint_nothing_searched() {
eprintln!("No files were searched, which means ripgrep probably \
applied a filter you didn't expect. \
Try running again with --debug.");
}
fn print_stats(
match_count: u64,
paths_searched: u64,
paths_matched: u64,
time_elapsed: Duration,
) {
let time_elapsed =
time_elapsed.as_secs() as f64
+ (time_elapsed.subsec_nanos() as f64 * 1e-9);
println!("\n{} matched lines\n\
{} files contained matches\n\
{} files searched\n\
{:.3} seconds", match_count, paths_matched,
paths_searched, time_elapsed);
}
// The Rust standard library suppresses the default SIGPIPE behavior, so that
// writing to a closed pipe doesn't kill the process. The goal is to instead
// handle errors through the normal result mechanism. Ripgrep needs some
// refactoring before it will be able to do that, however, so we re-enable the
// standard SIGPIPE behavior as a workaround. See
// https://github.com/BurntSushi/ripgrep/issues/200.
#[cfg(unix)]
fn reset_sigpipe() {
unsafe {
libc::signal(libc::SIGPIPE, libc::SIG_DFL);
}
}
#[cfg(not(unix))]
fn reset_sigpipe() {
// no-op
Ok(count > 0)
}

50
src/messages.rs Normal file
View File

@ -0,0 +1,50 @@
use std::sync::atomic::{ATOMIC_BOOL_INIT, AtomicBool, Ordering};
static MESSAGES: AtomicBool = ATOMIC_BOOL_INIT;
static IGNORE_MESSAGES: AtomicBool = ATOMIC_BOOL_INIT;
#[macro_export]
macro_rules! message {
($($tt:tt)*) => {
if ::messages::messages() {
eprintln!($($tt)*);
}
}
}
#[macro_export]
macro_rules! ignore_message {
($($tt:tt)*) => {
if ::messages::messages() && ::messages::ignore_messages() {
eprintln!($($tt)*);
}
}
}
/// Returns true if and only if messages should be shown.
pub fn messages() -> bool {
MESSAGES.load(Ordering::SeqCst)
}
/// Set whether messages should be shown or not.
///
/// By default, they are not shown.
pub fn set_messages(yes: bool) {
MESSAGES.store(yes, Ordering::SeqCst)
}
/// Returns true if and only if "ignore" related messages should be shown.
pub fn ignore_messages() -> bool {
IGNORE_MESSAGES.load(Ordering::SeqCst)
}
/// Set whether "ignore" related messages should be shown or not.
///
/// By default, they are not shown.
///
/// Note that this is overridden if `messages` is disabled. Namely, if
/// `messages` is disabled, then "ignore" messages are never shown, regardless
/// of this setting.
pub fn set_ignore_messages(yes: bool) {
IGNORE_MESSAGES.store(yes, Ordering::SeqCst)
}

101
src/path_printer.rs Normal file
View File

@ -0,0 +1,101 @@
use std::io;
use std::path::Path;
use grep::printer::{ColorSpecs, PrinterPath};
use termcolor::WriteColor;
/// A configuration for describing how paths should be written.
#[derive(Clone, Debug)]
struct Config {
colors: ColorSpecs,
separator: Option<u8>,
terminator: u8,
}
impl Default for Config {
fn default() -> Config {
Config {
colors: ColorSpecs::default(),
separator: None,
terminator: b'\n',
}
}
}
/// A builder for constructing things to search over.
#[derive(Clone, Debug)]
pub struct PathPrinterBuilder {
config: Config,
}
impl PathPrinterBuilder {
/// Return a new subject builder with a default configuration.
pub fn new() -> PathPrinterBuilder {
PathPrinterBuilder { config: Config::default() }
}
/// Create a new path printer with the current configuration that writes
/// paths to the given writer.
pub fn build<W: WriteColor>(&self, wtr: W) -> PathPrinter<W> {
PathPrinter {
config: self.config.clone(),
wtr: wtr,
}
}
/// Set the color specification for this printer.
///
/// Currently, only the `path` component of the given specification is
/// used.
pub fn color_specs(
&mut self,
specs: ColorSpecs,
) -> &mut PathPrinterBuilder {
self.config.colors = specs;
self
}
/// A path separator.
///
/// When provided, the path's default separator will be replaced with
/// the given separator.
///
/// This is not set by default, and the system's default path separator
/// will be used.
pub fn separator(&mut self, sep: Option<u8>) -> &mut PathPrinterBuilder {
self.config.separator = sep;
self
}
/// A path terminator.
///
/// When printing a path, it will be by terminated by the given byte.
///
/// This is set to `\n` by default.
pub fn terminator(&mut self, terminator: u8) -> &mut PathPrinterBuilder {
self.config.terminator = terminator;
self
}
}
/// A printer for emitting paths to a writer, with optional color support.
#[derive(Debug)]
pub struct PathPrinter<W> {
config: Config,
wtr: W,
}
impl<W: WriteColor> PathPrinter<W> {
/// Write the given path to the underlying writer.
pub fn write_path(&mut self, path: &Path) -> io::Result<()> {
let ppath = PrinterPath::with_separator(path, self.config.separator);
if !self.wtr.supports_color() {
self.wtr.write_all(ppath.as_bytes())?;
} else {
self.wtr.set_color(self.config.colors.path())?;
self.wtr.write_all(ppath.as_bytes())?;
self.wtr.reset()?;
}
self.wtr.write_all(&[self.config.terminator])
}
}

View File

@ -1,42 +0,0 @@
/*!
The pathutil module provides platform specific operations on paths that are
typically faster than the same operations as provided in `std::path`. In
particular, we really want to avoid the costly operation of parsing the path
into its constituent components. We give up on Windows, but on Unix, we deal
with the raw bytes directly.
On large repositories (like chromium), this can have a ~25% performance
improvement on just listing the files to search (!).
*/
use std::path::Path;
/// Strip `prefix` from the `path` and return the remainder.
///
/// If `path` doesn't have a prefix `prefix`, then return `None`.
#[cfg(unix)]
pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
prefix: &'a P,
path: &'a Path,
) -> Option<&'a Path> {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
let prefix = prefix.as_ref().as_os_str().as_bytes();
let path = path.as_os_str().as_bytes();
if prefix.len() > path.len() || prefix != &path[0..prefix.len()] {
None
} else {
Some(Path::new(OsStr::from_bytes(&path[prefix.len()..])))
}
}
/// Strip `prefix` from the `path` and return the remainder.
///
/// If `path` doesn't have a prefix `prefix`, then return `None`.
#[cfg(not(unix))]
pub fn strip_prefix<'a, P: AsRef<Path> + ?Sized>(
prefix: &'a P,
path: &'a Path,
) -> Option<&'a Path> {
path.strip_prefix(prefix).ok()
}

View File

@ -3,8 +3,6 @@ use std::io::{self, Read};
use std::path::{Path, PathBuf};
use std::process::{self, Stdio};
use Result;
/// PreprocessorReader provides an `io::Read` impl to read kids output.
#[derive(Debug)]
pub struct PreprocessorReader {
@ -26,7 +24,7 @@ impl PreprocessorReader {
pub fn from_cmd_path(
cmd: PathBuf,
path: &Path,
) -> Result<PreprocessorReader> {
) -> io::Result<PreprocessorReader> {
let child = process::Command::new(&cmd)
.arg(path)
.stdin(Stdio::from(File::open(path)?))
@ -34,10 +32,13 @@ impl PreprocessorReader {
.stderr(Stdio::piped())
.spawn()
.map_err(|err| {
format!(
"error running preprocessor command '{}': {}",
cmd.display(),
err,
io::Error::new(
io::ErrorKind::Other,
format!(
"error running preprocessor command '{}': {}",
cmd.display(),
err,
),
)
})?;
Ok(PreprocessorReader {

View File

@ -1,928 +0,0 @@
use std::error;
use std::fmt;
use std::path::Path;
use std::str::FromStr;
use regex::bytes::{Captures, Match, Regex, Replacer};
use termcolor::{Color, ColorSpec, ParseColorError, WriteColor};
use pathutil::strip_prefix;
use ignore::types::FileTypeDef;
/// Track the start and end of replacements to allow coloring them on output.
#[derive(Debug)]
struct Offset {
start: usize,
end: usize,
}
impl Offset {
fn new(start: usize, end: usize) -> Offset {
Offset { start: start, end: end }
}
}
impl<'m, 'r> From<&'m Match<'r>> for Offset {
fn from(m: &'m Match<'r>) -> Self {
Offset{ start: m.start(), end: m.end() }
}
}
/// `CountingReplacer` implements the Replacer interface for Regex,
/// and counts how often replacement is being performed.
struct CountingReplacer<'r> {
replace: &'r [u8],
count: &'r mut usize,
offsets: &'r mut Vec<Offset>,
}
impl<'r> CountingReplacer<'r> {
fn new(
replace: &'r [u8],
count: &'r mut usize,
offsets: &'r mut Vec<Offset>,
) -> CountingReplacer<'r> {
CountingReplacer { replace: replace, count: count, offsets: offsets, }
}
}
impl<'r> Replacer for CountingReplacer<'r> {
fn replace_append(&mut self, caps: &Captures, dst: &mut Vec<u8>) {
*self.count += 1;
let start = dst.len();
caps.expand(self.replace, dst);
let end = dst.len();
if start != end {
self.offsets.push(Offset::new(start, end));
}
}
}
/// Printer encapsulates all output logic for searching.
///
/// Note that we currently ignore all write errors. It's probably worthwhile
/// to fix this, but printers are only ever used for writes to stdout or
/// writes to memory, neither of which commonly fail.
pub struct Printer<W> {
/// The underlying writer.
wtr: W,
/// Whether anything has been printed to wtr yet.
has_printed: bool,
/// Whether to show column numbers for the first match or not.
column: bool,
/// The string to use to separate non-contiguous runs of context lines.
context_separator: Vec<u8>,
/// The end-of-line terminator used by the printer. In general, eols are
/// printed via the match directly, but occasionally we need to insert them
/// ourselves (for example, to print a context separator).
eol: u8,
/// A file separator to show before any matches are printed.
file_separator: Option<Vec<u8>>,
/// Whether to show file name as a heading or not.
///
/// N.B. If with_filename is false, then this setting has no effect.
heading: bool,
/// Whether to show every match on its own line.
line_per_match: bool,
/// Whether to print NUL bytes after a file path instead of new lines
/// or `:`.
null: bool,
/// Print only the matched (non-empty) parts of a matching line
only_matching: bool,
/// A string to use as a replacement of each match in a matching line.
replace: Option<Vec<u8>>,
/// Whether to prefix each match with the corresponding file name.
with_filename: bool,
/// The color specifications.
colors: ColorSpecs,
/// The separator to use for file paths. If empty, this is ignored.
path_separator: Option<u8>,
/// Restrict lines to this many columns.
max_columns: Option<usize>,
}
impl<W: WriteColor> Printer<W> {
/// Create a new printer that writes to wtr with the given color settings.
pub fn new(wtr: W) -> Printer<W> {
Printer {
wtr: wtr,
has_printed: false,
column: false,
context_separator: "--".to_string().into_bytes(),
eol: b'\n',
file_separator: None,
heading: false,
line_per_match: false,
null: false,
only_matching: false,
replace: None,
with_filename: false,
colors: ColorSpecs::default(),
path_separator: None,
max_columns: None,
}
}
/// Set the color specifications.
pub fn colors(mut self, colors: ColorSpecs) -> Printer<W> {
self.colors = colors;
self
}
/// When set, column numbers will be printed for the first match on each
/// line.
pub fn column(mut self, yes: bool) -> Printer<W> {
self.column = yes;
self
}
/// Set the context separator. The default is `--`.
pub fn context_separator(mut self, sep: Vec<u8>) -> Printer<W> {
self.context_separator = sep;
self
}
/// Set the end-of-line terminator. The default is `\n`.
pub fn eol(mut self, eol: u8) -> Printer<W> {
self.eol = eol;
self
}
/// If set, the separator is printed before any matches. By default, no
/// separator is printed.
pub fn file_separator(mut self, sep: Vec<u8>) -> Printer<W> {
self.file_separator = Some(sep);
self
}
/// Whether to show file name as a heading or not.
///
/// N.B. If with_filename is false, then this setting has no effect.
pub fn heading(mut self, yes: bool) -> Printer<W> {
self.heading = yes;
self
}
/// Whether to show every match on its own line.
pub fn line_per_match(mut self, yes: bool) -> Printer<W> {
self.line_per_match = yes;
self
}
/// Whether to cause NUL bytes to follow file paths instead of other
/// visual separators (like `:`, `-` and `\n`).
pub fn null(mut self, yes: bool) -> Printer<W> {
self.null = yes;
self
}
/// Print only the matched (non-empty) parts of a matching line
pub fn only_matching(mut self, yes: bool) -> Printer<W> {
self.only_matching = yes;
self
}
/// A separator to use when printing file paths. When empty, use the
/// default separator for the current platform. (/ on Unix, \ on Windows.)
pub fn path_separator(mut self, sep: Option<u8>) -> Printer<W> {
self.path_separator = sep;
self
}
/// Replace every match in each matching line with the replacement string
/// given.
pub fn replace(mut self, replacement: Vec<u8>) -> Printer<W> {
self.replace = Some(replacement);
self
}
/// When set, each match is prefixed with the file name that it came from.
pub fn with_filename(mut self, yes: bool) -> Printer<W> {
self.with_filename = yes;
self
}
/// Configure the max. number of columns used for printing matching lines.
pub fn max_columns(mut self, max_columns: Option<usize>) -> Printer<W> {
self.max_columns = max_columns;
self
}
/// Returns true if and only if something has been printed.
pub fn has_printed(&self) -> bool {
self.has_printed
}
/// Flushes the underlying writer and returns it.
#[allow(dead_code)]
pub fn into_inner(mut self) -> W {
let _ = self.wtr.flush();
self.wtr
}
/// Prints a type definition.
pub fn type_def(&mut self, def: &FileTypeDef) {
self.write(def.name().as_bytes());
self.write(b": ");
let mut first = true;
for glob in def.globs() {
if !first {
self.write(b", ");
}
self.write(glob.as_bytes());
first = false;
}
self.write_eol();
}
/// Prints the given path.
pub fn path<P: AsRef<Path>>(&mut self, path: P) {
let path = strip_prefix("./", path.as_ref()).unwrap_or(path.as_ref());
self.write_path(path);
self.write_path_eol();
}
/// Prints the given path and a count of the number of matches found.
pub fn path_count<P: AsRef<Path>>(&mut self, path: P, count: u64) {
if self.with_filename {
self.write_path(path);
self.write_path_sep(b':');
}
self.write(count.to_string().as_bytes());
self.write_eol();
}
/// Prints the context separator.
pub fn context_separate(&mut self) {
if self.context_separator.is_empty() {
return;
}
let _ = self.wtr.write_all(&self.context_separator);
self.write_eol();
}
pub fn matched<P: AsRef<Path>>(
&mut self,
re: &Regex,
path: P,
buf: &[u8],
start: usize,
end: usize,
line_number: Option<u64>,
byte_offset: Option<u64>
) {
if !self.line_per_match && !self.only_matching {
let mat =
if !self.needs_match() {
(0, 0)
} else {
re.find(&buf[start..end])
.map(|m| (m.start(), m.end()))
.unwrap_or((0, 0))
};
return self.write_match(
re, path, buf, start, end, line_number,
byte_offset, mat.0, mat.1);
}
for m in re.find_iter(&buf[start..end]) {
self.write_match(
re, path.as_ref(), buf, start, end, line_number,
byte_offset, m.start(), m.end());
}
}
fn needs_match(&self) -> bool {
self.column
|| self.replace.is_some()
|| self.only_matching
}
fn write_match<P: AsRef<Path>>(
&mut self,
re: &Regex,
path: P,
buf: &[u8],
start: usize,
end: usize,
line_number: Option<u64>,
byte_offset: Option<u64>,
match_start: usize,
match_end: usize,
) {
if self.heading && self.with_filename && !self.has_printed {
self.write_file_sep();
self.write_path(path);
self.write_path_eol();
} else if !self.heading && self.with_filename {
self.write_path(path);
self.write_path_sep(b':');
}
if let Some(line_number) = line_number {
self.line_number(line_number, b':');
}
if self.column {
self.column_number(match_start as u64 + 1, b':');
}
if let Some(byte_offset) = byte_offset {
if self.only_matching {
self.write_byte_offset(
byte_offset + ((start + match_start) as u64), b':');
} else {
self.write_byte_offset(byte_offset + (start as u64), b':');
}
}
if self.replace.is_some() {
let mut count = 0;
let mut offsets = Vec::new();
let line = {
let replacer = CountingReplacer::new(
self.replace.as_ref().unwrap(), &mut count, &mut offsets);
if self.only_matching {
re.replace_all(
&buf[start + match_start..start + match_end], replacer)
} else {
re.replace_all(&buf[start..end], replacer)
}
};
if self.max_columns.map_or(false, |m| line.len() > m) {
let msg = format!(
"[Omitted long line with {} replacements]", count);
self.write_colored(msg.as_bytes(), |colors| colors.matched());
self.write_eol();
return;
}
self.write_matched_line(offsets, &*line, false);
} else {
let buf = if self.only_matching {
&buf[start + match_start..start + match_end]
} else {
&buf[start..end]
};
if self.max_columns.map_or(false, |m| buf.len() > m) {
let count = re.find_iter(buf).count();
let msg = format!("[Omitted long line with {} matches]", count);
self.write_colored(msg.as_bytes(), |colors| colors.matched());
self.write_eol();
return;
}
let only_match = self.only_matching;
self.write_matched_line(
re.find_iter(buf).map(|x| Offset::from(&x)), buf, only_match);
}
}
fn write_matched_line<I>(&mut self, offsets: I, buf: &[u8], only_match: bool)
where I: IntoIterator<Item=Offset>,
{
if !self.wtr.supports_color() || self.colors.matched().is_none() {
self.write(buf);
} else if only_match {
self.write_colored(buf, |colors| colors.matched());
} else {
let mut last_written = 0;
for o in offsets {
self.write(&buf[last_written..o.start]);
// This conditional checks if the match is both empty *and*
// past the end of the line. In this case, we never want to
// emit an additional color escape.
if o.start != o.end || o.end != buf.len() {
self.write_colored(
&buf[o.start..o.end], |colors| colors.matched());
}
last_written = o.end;
}
self.write(&buf[last_written..]);
}
if buf.last() != Some(&self.eol) {
self.write_eol();
}
}
pub fn context<P: AsRef<Path>>(
&mut self,
path: P,
buf: &[u8],
start: usize,
end: usize,
line_number: Option<u64>,
byte_offset: Option<u64>,
) {
if self.heading && self.with_filename && !self.has_printed {
self.write_file_sep();
self.write_path(path);
self.write_path_eol();
} else if !self.heading && self.with_filename {
self.write_path(path);
self.write_path_sep(b'-');
}
if let Some(line_number) = line_number {
self.line_number(line_number, b'-');
}
if let Some(byte_offset) = byte_offset {
self.write_byte_offset(byte_offset + (start as u64), b'-');
}
if self.max_columns.map_or(false, |m| end - start > m) {
self.write(b"[Omitted long context line]");
self.write_eol();
return;
}
self.write(&buf[start..end]);
if buf[start..end].last() != Some(&self.eol) {
self.write_eol();
}
}
fn separator(&mut self, sep: &[u8]) {
self.write(sep);
}
fn write_path_sep(&mut self, sep: u8) {
if self.null {
self.write(b"\x00");
} else {
self.separator(&[sep]);
}
}
fn write_path_eol(&mut self) {
if self.null {
self.write(b"\x00");
} else {
self.write_eol();
}
}
#[cfg(unix)]
fn write_path<P: AsRef<Path>>(&mut self, path: P) {
use std::os::unix::ffi::OsStrExt;
let path = path.as_ref().as_os_str().as_bytes();
self.write_path_replace_separator(path);
}
#[cfg(not(unix))]
fn write_path<P: AsRef<Path>>(&mut self, path: P) {
let path = path.as_ref().to_string_lossy();
self.write_path_replace_separator(path.as_bytes());
}
fn write_path_replace_separator(&mut self, path: &[u8]) {
match self.path_separator {
None => self.write_colored(path, |colors| colors.path()),
Some(sep) => {
let transformed_path: Vec<_> = path.iter().map(|&b| {
if b == b'/' || (cfg!(windows) && b == b'\\') {
sep
} else {
b
}
}).collect();
self.write_colored(&transformed_path, |colors| colors.path());
}
}
}
fn line_number(&mut self, n: u64, sep: u8) {
let line_number = n.to_string();
self.write_colored(line_number.as_bytes(), |colors| colors.line());
self.separator(&[sep]);
}
fn column_number(&mut self, n: u64, sep: u8) {
self.write_colored(n.to_string().as_bytes(), |colors| colors.column());
self.separator(&[sep]);
}
fn write_byte_offset(&mut self, o: u64, sep: u8) {
self.write_colored(o.to_string().as_bytes(), |colors| colors.column());
self.separator(&[sep]);
}
fn write(&mut self, buf: &[u8]) {
self.has_printed = true;
let _ = self.wtr.write_all(buf);
}
fn write_eol(&mut self) {
let eol = self.eol;
self.write(&[eol]);
}
fn write_colored<F>(&mut self, buf: &[u8], get_color: F)
where F: Fn(&ColorSpecs) -> &ColorSpec
{
let _ = self.wtr.set_color(get_color(&self.colors));
self.write(buf);
let _ = self.wtr.reset();
}
fn write_file_sep(&mut self) {
if let Some(ref sep) = self.file_separator {
self.has_printed = true;
let _ = self.wtr.write_all(sep);
let _ = self.wtr.write_all(b"\n");
}
}
}
/// An error that can occur when parsing color specifications.
#[derive(Clone, Debug, Eq, PartialEq)]
pub enum Error {
/// This occurs when an unrecognized output type is used.
UnrecognizedOutType(String),
/// This occurs when an unrecognized spec type is used.
UnrecognizedSpecType(String),
/// This occurs when an unrecognized color name is used.
UnrecognizedColor(String, String),
/// This occurs when an unrecognized style attribute is used.
UnrecognizedStyle(String),
/// This occurs when the format of a color specification is invalid.
InvalidFormat(String),
}
impl error::Error for Error {
fn description(&self) -> &str {
match *self {
Error::UnrecognizedOutType(_) => "unrecognized output type",
Error::UnrecognizedSpecType(_) => "unrecognized spec type",
Error::UnrecognizedColor(_, _) => "unrecognized color name",
Error::UnrecognizedStyle(_) => "unrecognized style attribute",
Error::InvalidFormat(_) => "invalid color spec",
}
}
fn cause(&self) -> Option<&error::Error> {
None
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Error::UnrecognizedOutType(ref name) => {
write!(f, "Unrecognized output type '{}'. Choose from: \
path, line, column, match.", name)
}
Error::UnrecognizedSpecType(ref name) => {
write!(f, "Unrecognized spec type '{}'. Choose from: \
fg, bg, style, none.", name)
}
Error::UnrecognizedColor(_, ref msg) => {
write!(f, "{}", msg)
}
Error::UnrecognizedStyle(ref name) => {
write!(f, "Unrecognized style attribute '{}'. Choose from: \
nobold, bold, nointense, intense, nounderline, \
underline.", name)
}
Error::InvalidFormat(ref original) => {
write!(
f,
"Invalid color spec format: '{}'. Valid format \
is '(path|line|column|match):(fg|bg|style):(value)'.",
original)
}
}
}
}
impl From<ParseColorError> for Error {
fn from(err: ParseColorError) -> Error {
Error::UnrecognizedColor(err.invalid().to_string(), err.to_string())
}
}
/// A merged set of color specifications.
#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct ColorSpecs {
path: ColorSpec,
line: ColorSpec,
column: ColorSpec,
matched: ColorSpec,
}
/// A single color specification provided by the user.
///
/// A `ColorSpecs` can be built by merging a sequence of `Spec`s.
///
/// ## Example
///
/// The only way to build a `Spec` is to parse it from a string. Once multiple
/// `Spec`s have been constructed, then can be merged into a single
/// `ColorSpecs` value.
///
/// ```rust
/// use termcolor::{Color, ColorSpecs, Spec};
///
/// let spec1: Spec = "path:fg:blue".parse().unwrap();
/// let spec2: Spec = "match:bg:green".parse().unwrap();
/// let specs = ColorSpecs::new(&[spec1, spec2]);
///
/// assert_eq!(specs.path().fg(), Some(Color::Blue));
/// assert_eq!(specs.matched().bg(), Some(Color::Green));
/// ```
///
/// ## Format
///
/// The format of a `Spec` is a triple: `{type}:{attribute}:{value}`. Each
/// component is defined as follows:
///
/// * `{type}` can be one of `path`, `line`, `column` or `match`.
/// * `{attribute}` can be one of `fg`, `bg` or `style`. `{attribute}` may also
/// be the special value `none`, in which case, `{value}` can be omitted.
/// * `{value}` is either a color name (for `fg`/`bg`) or a style instruction.
///
/// `{type}` controls which part of the output should be styled and is
/// application dependent.
///
/// When `{attribute}` is `none`, then this should cause any existing color
/// settings to be cleared.
///
/// `{value}` should be a color when `{attribute}` is `fg` or `bg`, or it
/// should be a style instruction when `{attribute}` is `style`. When
/// `{attribute}` is `none`, `{value}` must be omitted.
///
/// Valid colors are `black`, `blue`, `green`, `red`, `cyan`, `magenta`,
/// `yellow`, `white`.
///
/// Valid style instructions are `nobold`, `bold`, `intense`, `nointense`,
/// `underline`, `nounderline`.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Spec {
ty: OutType,
value: SpecValue,
}
/// The actual value given by the specification.
#[derive(Clone, Debug, Eq, PartialEq)]
enum SpecValue {
None,
Fg(Color),
Bg(Color),
Style(Style),
}
/// The set of configurable portions of ripgrep's output.
#[derive(Clone, Debug, Eq, PartialEq)]
enum OutType {
Path,
Line,
Column,
Match,
}
/// The specification type.
#[derive(Clone, Debug, Eq, PartialEq)]
enum SpecType {
Fg,
Bg,
Style,
None,
}
/// The set of available styles for use in the terminal.
#[derive(Clone, Debug, Eq, PartialEq)]
enum Style {
Bold,
NoBold,
Intense,
NoIntense,
Underline,
NoUnderline
}
impl ColorSpecs {
/// Create color specifications from a list of user supplied
/// specifications.
pub fn new(user_specs: &[Spec]) -> ColorSpecs {
let mut specs = ColorSpecs::default();
for user_spec in user_specs {
match user_spec.ty {
OutType::Path => user_spec.merge_into(&mut specs.path),
OutType::Line => user_spec.merge_into(&mut specs.line),
OutType::Column => user_spec.merge_into(&mut specs.column),
OutType::Match => user_spec.merge_into(&mut specs.matched),
}
}
specs
}
/// Return the color specification for coloring file paths.
fn path(&self) -> &ColorSpec {
&self.path
}
/// Return the color specification for coloring line numbers.
fn line(&self) -> &ColorSpec {
&self.line
}
/// Return the color specification for coloring column numbers.
fn column(&self) -> &ColorSpec {
&self.column
}
/// Return the color specification for coloring matched text.
fn matched(&self) -> &ColorSpec {
&self.matched
}
}
impl Spec {
/// Merge this spec into the given color specification.
fn merge_into(&self, cspec: &mut ColorSpec) {
self.value.merge_into(cspec);
}
}
impl SpecValue {
/// Merge this spec value into the given color specification.
fn merge_into(&self, cspec: &mut ColorSpec) {
match *self {
SpecValue::None => cspec.clear(),
SpecValue::Fg(ref color) => { cspec.set_fg(Some(color.clone())); }
SpecValue::Bg(ref color) => { cspec.set_bg(Some(color.clone())); }
SpecValue::Style(ref style) => {
match *style {
Style::Bold => { cspec.set_bold(true); }
Style::NoBold => { cspec.set_bold(false); }
Style::Intense => { cspec.set_intense(true); }
Style::NoIntense => { cspec.set_intense(false); }
Style::Underline => { cspec.set_underline(true); }
Style::NoUnderline => { cspec.set_underline(false); }
}
}
}
}
}
impl FromStr for Spec {
type Err = Error;
fn from_str(s: &str) -> Result<Spec, Error> {
let pieces: Vec<&str> = s.split(':').collect();
if pieces.len() <= 1 || pieces.len() > 3 {
return Err(Error::InvalidFormat(s.to_string()));
}
let otype: OutType = pieces[0].parse()?;
match pieces[1].parse()? {
SpecType::None => Ok(Spec { ty: otype, value: SpecValue::None }),
SpecType::Style => {
if pieces.len() < 3 {
return Err(Error::InvalidFormat(s.to_string()));
}
let style: Style = pieces[2].parse()?;
Ok(Spec { ty: otype, value: SpecValue::Style(style) })
}
SpecType::Fg => {
if pieces.len() < 3 {
return Err(Error::InvalidFormat(s.to_string()));
}
let color: Color = pieces[2].parse()?;
Ok(Spec { ty: otype, value: SpecValue::Fg(color) })
}
SpecType::Bg => {
if pieces.len() < 3 {
return Err(Error::InvalidFormat(s.to_string()));
}
let color: Color = pieces[2].parse()?;
Ok(Spec { ty: otype, value: SpecValue::Bg(color) })
}
}
}
}
impl FromStr for OutType {
type Err = Error;
fn from_str(s: &str) -> Result<OutType, Error> {
match &*s.to_lowercase() {
"path" => Ok(OutType::Path),
"line" => Ok(OutType::Line),
"column" => Ok(OutType::Column),
"match" => Ok(OutType::Match),
_ => Err(Error::UnrecognizedOutType(s.to_string())),
}
}
}
impl FromStr for SpecType {
type Err = Error;
fn from_str(s: &str) -> Result<SpecType, Error> {
match &*s.to_lowercase() {
"fg" => Ok(SpecType::Fg),
"bg" => Ok(SpecType::Bg),
"style" => Ok(SpecType::Style),
"none" => Ok(SpecType::None),
_ => Err(Error::UnrecognizedSpecType(s.to_string())),
}
}
}
impl FromStr for Style {
type Err = Error;
fn from_str(s: &str) -> Result<Style, Error> {
match &*s.to_lowercase() {
"bold" => Ok(Style::Bold),
"nobold" => Ok(Style::NoBold),
"intense" => Ok(Style::Intense),
"nointense" => Ok(Style::NoIntense),
"underline" => Ok(Style::Underline),
"nounderline" => Ok(Style::NoUnderline),
_ => Err(Error::UnrecognizedStyle(s.to_string())),
}
}
}
#[cfg(test)]
mod tests {
use termcolor::{Color, ColorSpec};
use super::{ColorSpecs, Error, OutType, Spec, SpecValue, Style};
#[test]
fn merge() {
let user_specs: &[Spec] = &[
"match:fg:blue".parse().unwrap(),
"match:none".parse().unwrap(),
"match:style:bold".parse().unwrap(),
];
let mut expect_matched = ColorSpec::new();
expect_matched.set_bold(true);
assert_eq!(ColorSpecs::new(user_specs), ColorSpecs {
path: ColorSpec::default(),
line: ColorSpec::default(),
column: ColorSpec::default(),
matched: expect_matched,
});
}
#[test]
fn specs() {
let spec: Spec = "path:fg:blue".parse().unwrap();
assert_eq!(spec, Spec {
ty: OutType::Path,
value: SpecValue::Fg(Color::Blue),
});
let spec: Spec = "path:bg:red".parse().unwrap();
assert_eq!(spec, Spec {
ty: OutType::Path,
value: SpecValue::Bg(Color::Red),
});
let spec: Spec = "match:style:bold".parse().unwrap();
assert_eq!(spec, Spec {
ty: OutType::Match,
value: SpecValue::Style(Style::Bold),
});
let spec: Spec = "match:style:intense".parse().unwrap();
assert_eq!(spec, Spec {
ty: OutType::Match,
value: SpecValue::Style(Style::Intense),
});
let spec: Spec = "match:style:underline".parse().unwrap();
assert_eq!(spec, Spec {
ty: OutType::Match,
value: SpecValue::Style(Style::Underline),
});
let spec: Spec = "line:none".parse().unwrap();
assert_eq!(spec, Spec {
ty: OutType::Line,
value: SpecValue::None,
});
let spec: Spec = "column:bg:green".parse().unwrap();
assert_eq!(spec, Spec {
ty: OutType::Column,
value: SpecValue::Bg(Color::Green),
});
}
#[test]
fn spec_errors() {
let err = "line:nonee".parse::<Spec>().unwrap_err();
assert_eq!(err, Error::UnrecognizedSpecType("nonee".to_string()));
let err = "".parse::<Spec>().unwrap_err();
assert_eq!(err, Error::InvalidFormat("".to_string()));
let err = "foo".parse::<Spec>().unwrap_err();
assert_eq!(err, Error::InvalidFormat("foo".to_string()));
let err = "line:style:italic".parse::<Spec>().unwrap_err();
assert_eq!(err, Error::UnrecognizedStyle("italic".to_string()));
let err = "line:fg:brown".parse::<Spec>().unwrap_err();
match err {
Error::UnrecognizedColor(name, _) => assert_eq!(name, "brown"),
err => assert!(false, "unexpected error: {:?}", err),
}
let err = "foo:fg:brown".parse::<Spec>().unwrap_err();
assert_eq!(err, Error::UnrecognizedOutType("foo".to_string()));
}
}

408
src/search.rs Normal file
View File

@ -0,0 +1,408 @@
use std::io;
use std::path::{Path, PathBuf};
use std::time::Duration;
use grep::matcher::Matcher;
#[cfg(feature = "pcre2")]
use grep::pcre2::{RegexMatcher as PCRE2RegexMatcher};
use grep::printer::{JSON, Standard, Summary, Stats};
use grep::regex::{RegexMatcher as RustRegexMatcher};
use grep::searcher::Searcher;
use serde_json as json;
use termcolor::WriteColor;
use decompressor::{DecompressionReader, is_compressed};
use preprocessor::PreprocessorReader;
use subject::Subject;
/// The configuration for the search worker. Among a few other things, the
/// configuration primarily controls the way we show search results to users
/// at a very high level.
#[derive(Clone, Debug)]
struct Config {
json_stats: bool,
preprocessor: Option<PathBuf>,
search_zip: bool,
}
impl Default for Config {
fn default() -> Config {
Config {
json_stats: false,
preprocessor: None,
search_zip: false,
}
}
}
/// A builder for configuring and constructing a search worker.
#[derive(Clone, Debug)]
pub struct SearchWorkerBuilder {
config: Config,
}
impl Default for SearchWorkerBuilder {
fn default() -> SearchWorkerBuilder {
SearchWorkerBuilder::new()
}
}
impl SearchWorkerBuilder {
/// Create a new builder for configuring and constructing a search worker.
pub fn new() -> SearchWorkerBuilder {
SearchWorkerBuilder { config: Config::default() }
}
/// Create a new search worker using the given searcher, matcher and
/// printer.
pub fn build<W: WriteColor>(
&self,
matcher: PatternMatcher,
searcher: Searcher,
printer: Printer<W>,
) -> SearchWorker<W> {
let config = self.config.clone();
SearchWorker { config, matcher, searcher, printer }
}
/// Forcefully use JSON to emit statistics, even if the underlying printer
/// is not the JSON printer.
///
/// This is useful for implementing flag combinations like
/// `--json --quiet`, which uses the summary printer for implementing
/// `--quiet` but still wants to emit summary statistics, which should
/// be JSON formatted because of the `--json` flag.
pub fn json_stats(&mut self, yes: bool) -> &mut SearchWorkerBuilder {
self.config.json_stats = yes;
self
}
/// Set the path to a preprocessor command.
///
/// When this is set, instead of searching files directly, the given
/// command will be run with the file path as the first argument, and the
/// output of that command will be searched instead.
pub fn preprocessor(
&mut self,
cmd: Option<PathBuf>,
) -> &mut SearchWorkerBuilder {
self.config.preprocessor = cmd;
self
}
/// Enable the decompression and searching of common compressed files.
///
/// When enabled, if a particular file path is recognized as a compressed
/// file, then it is decompressed before searching.
///
/// Note that if a preprocessor command is set, then it overrides this
/// setting.
pub fn search_zip(&mut self, yes: bool) -> &mut SearchWorkerBuilder {
self.config.search_zip = yes;
self
}
}
/// The result of executing a search.
///
/// Generally speaking, the "result" of a search is sent to a printer, which
/// writes results to an underlying writer such as stdout or a file. However,
/// every search also has some aggregate statistics or meta data that may be
/// useful to higher level routines.
#[derive(Clone, Debug, Default)]
pub struct SearchResult {
has_match: bool,
stats: Option<Stats>,
}
impl SearchResult {
/// Whether the search found a match or not.
pub fn has_match(&self) -> bool {
self.has_match
}
/// Return aggregate search statistics for a single search, if available.
///
/// It can be expensive to compute statistics, so these are only present
/// if explicitly enabled in the printer provided by the caller.
pub fn stats(&self) -> Option<&Stats> {
self.stats.as_ref()
}
}
/// The pattern matcher used by a search worker.
#[derive(Clone, Debug)]
pub enum PatternMatcher {
RustRegex(RustRegexMatcher),
#[cfg(feature = "pcre2")]
PCRE2(PCRE2RegexMatcher),
}
/// The printer used by a search worker.
///
/// The `W` type parameter refers to the type of the underlying writer.
#[derive(Debug)]
pub enum Printer<W> {
/// Use the standard printer, which supports the classic grep-like format.
Standard(Standard<W>),
/// Use the summary printer, which supports aggregate displays of search
/// results.
Summary(Summary<W>),
/// A JSON printer, which emits results in the JSON Lines format.
JSON(JSON<W>),
}
impl<W: WriteColor> Printer<W> {
fn print_stats(
&mut self,
total_duration: Duration,
stats: &Stats,
) -> io::Result<()> {
match *self {
Printer::JSON(_) => {
self.print_stats_json(total_duration, stats)
}
Printer::Standard(_) | Printer::Summary(_) => {
self.print_stats_human(total_duration, stats)
}
}
}
fn print_stats_human(
&mut self,
total_duration: Duration,
stats: &Stats,
) -> io::Result<()> {
write!(
self.get_mut(),
"
{matches} matches
{lines} matched lines
{searches_with_match} files contained matches
{searches} files searched
{bytes_printed} bytes printed
{bytes_searched} bytes searched
{search_time:0.6} seconds spent searching
{process_time:0.6} seconds
",
matches = stats.matches(),
lines = stats.matched_lines(),
searches_with_match = stats.searches_with_match(),
searches = stats.searches(),
bytes_printed = stats.bytes_printed(),
bytes_searched = stats.bytes_searched(),
search_time = fractional_seconds(stats.elapsed()),
process_time = fractional_seconds(total_duration)
)
}
fn print_stats_json(
&mut self,
total_duration: Duration,
stats: &Stats,
) -> io::Result<()> {
// We specifically match the format laid out by the JSON printer in
// the grep-printer crate. We simply "extend" it with the 'summary'
// message type.
let fractional = fractional_seconds(total_duration);
json::to_writer(self.get_mut(), &json!({
"type": "summary",
"data": {
"stats": stats,
"elapsed_total": {
"secs": total_duration.as_secs(),
"nanos": total_duration.subsec_nanos(),
"human": format!("{:0.6}s", fractional),
},
}
}))?;
write!(self.get_mut(), "\n")
}
/// Return a mutable reference to the underlying printer's writer.
pub fn get_mut(&mut self) -> &mut W {
match *self {
Printer::Standard(ref mut p) => p.get_mut(),
Printer::Summary(ref mut p) => p.get_mut(),
Printer::JSON(ref mut p) => p.get_mut(),
}
}
}
/// A worker for executing searches.
///
/// It is intended for a single worker to execute many searches, and is
/// generally intended to be used from a single thread. When searching using
/// multiple threads, it is better to create a new worker for each thread.
#[derive(Debug)]
pub struct SearchWorker<W> {
config: Config,
matcher: PatternMatcher,
searcher: Searcher,
printer: Printer<W>,
}
impl<W: WriteColor> SearchWorker<W> {
/// Execute a search over the given subject.
pub fn search(&mut self, subject: &Subject) -> io::Result<SearchResult> {
self.search_impl(subject)
}
/// Return a mutable reference to the underlying printer.
pub fn printer(&mut self) -> &mut Printer<W> {
&mut self.printer
}
/// Print the given statistics to the underlying writer in a way that is
/// consistent with this searcher's printer's format.
///
/// While `Stats` contains a duration itself, this only corresponds to the
/// time spent searching, where as `total_duration` should roughly
/// approximate the lifespan of the ripgrep process itself.
pub fn print_stats(
&mut self,
total_duration: Duration,
stats: &Stats,
) -> io::Result<()> {
if self.config.json_stats {
self.printer().print_stats_json(total_duration, stats)
} else {
self.printer().print_stats(total_duration, stats)
}
}
/// Search the given subject using the appropriate strategy.
fn search_impl(&mut self, subject: &Subject) -> io::Result<SearchResult> {
let path = subject.path();
if subject.is_stdin() {
let stdin = io::stdin();
// A `return` here appeases the borrow checker. NLL will fix this.
return self.search_reader(path, stdin.lock());
} else if self.config.preprocessor.is_some() {
let cmd = self.config.preprocessor.clone().unwrap();
let rdr = PreprocessorReader::from_cmd_path(cmd, path)?;
self.search_reader(path, rdr)
} else if self.config.search_zip && is_compressed(path) {
match DecompressionReader::from_path(path) {
None => Ok(SearchResult::default()),
Some(rdr) => self.search_reader(path, rdr),
}
} else {
self.search_path(path)
}
}
/// Search the contents of the given file path.
fn search_path(&mut self, path: &Path) -> io::Result<SearchResult> {
use self::PatternMatcher::*;
let (searcher, printer) = (&mut self.searcher, &mut self.printer);
match self.matcher {
RustRegex(ref m) => search_path(m, searcher, printer, path),
#[cfg(feature = "pcre2")]
PCRE2(ref m) => search_path(m, searcher, printer, path),
}
}
/// Executes a search on the given reader, which may or may not correspond
/// directly to the contents of the given file path. Instead, the reader
/// may actually cause something else to be searched (for example, when
/// a preprocessor is set or when decompression is enabled). In those
/// cases, the file path is used for visual purposes only.
///
/// Generally speaking, this method should only be used when there is no
/// other choice. Searching via `search_path` provides more opportunities
/// for optimizations (such as memory maps).
fn search_reader<R: io::Read>(
&mut self,
path: &Path,
rdr: R,
) -> io::Result<SearchResult> {
use self::PatternMatcher::*;
let (searcher, printer) = (&mut self.searcher, &mut self.printer);
match self.matcher {
RustRegex(ref m) => search_reader(m, searcher, printer, path, rdr),
#[cfg(feature = "pcre2")]
PCRE2(ref m) => search_reader(m, searcher, printer, path, rdr),
}
}
}
/// Search the contents of the given file path using the given matcher,
/// searcher and printer.
fn search_path<M: Matcher, W: WriteColor>(
matcher: M,
searcher: &mut Searcher,
printer: &mut Printer<W>,
path: &Path,
) -> io::Result<SearchResult> {
match *printer {
Printer::Standard(ref mut p) => {
let mut sink = p.sink_with_path(&matcher, path);
searcher.search_path(&matcher, path, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
stats: sink.stats().map(|s| s.clone()),
})
}
Printer::Summary(ref mut p) => {
let mut sink = p.sink_with_path(&matcher, path);
searcher.search_path(&matcher, path, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
stats: sink.stats().map(|s| s.clone()),
})
}
Printer::JSON(ref mut p) => {
let mut sink = p.sink_with_path(&matcher, path);
searcher.search_path(&matcher, path, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
stats: Some(sink.stats().clone()),
})
}
}
}
/// Search the contents of the given reader using the given matcher, searcher
/// and printer.
fn search_reader<M: Matcher, R: io::Read, W: WriteColor>(
matcher: M,
searcher: &mut Searcher,
printer: &mut Printer<W>,
path: &Path,
rdr: R,
) -> io::Result<SearchResult> {
match *printer {
Printer::Standard(ref mut p) => {
let mut sink = p.sink_with_path(&matcher, path);
searcher.search_reader(&matcher, rdr, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
stats: sink.stats().map(|s| s.clone()),
})
}
Printer::Summary(ref mut p) => {
let mut sink = p.sink_with_path(&matcher, path);
searcher.search_reader(&matcher, rdr, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
stats: sink.stats().map(|s| s.clone()),
})
}
Printer::JSON(ref mut p) => {
let mut sink = p.sink_with_path(&matcher, path);
searcher.search_reader(&matcher, rdr, &mut sink)?;
Ok(SearchResult {
has_match: sink.has_match(),
stats: Some(sink.stats().clone()),
})
}
}
}
/// Return the given duration as fractional seconds.
fn fractional_seconds(duration: Duration) -> f64 {
(duration.as_secs() as f64) + (duration.subsec_nanos() as f64 * 1e-9)
}

View File

@ -1,424 +0,0 @@
/*!
The `search_buffer` module is responsible for searching a single file all in a
single buffer. Typically, the source of the buffer is a memory map. This can
be useful for when memory maps are faster than streaming search.
Note that this module doesn't quite support everything that `search_stream`
does. Notably, showing contexts.
*/
use std::cmp;
use std::path::Path;
use grep::Grep;
use termcolor::WriteColor;
use printer::Printer;
use search_stream::{IterLines, Options, count_lines, is_binary};
pub struct BufferSearcher<'a, W: 'a> {
opts: Options,
printer: &'a mut Printer<W>,
grep: &'a Grep,
path: &'a Path,
buf: &'a [u8],
match_line_count: u64,
match_count: Option<u64>,
line_count: Option<u64>,
byte_offset: Option<u64>,
last_line: usize,
}
impl<'a, W: WriteColor> BufferSearcher<'a, W> {
pub fn new(
printer: &'a mut Printer<W>,
grep: &'a Grep,
path: &'a Path,
buf: &'a [u8],
) -> BufferSearcher<'a, W> {
BufferSearcher {
opts: Options::default(),
printer: printer,
grep: grep,
path: path,
buf: buf,
match_line_count: 0,
match_count: None,
line_count: None,
byte_offset: None,
last_line: 0,
}
}
/// If enabled, searching will print a 0-based offset of the
/// matching line (or the actual match if -o is specified) before
/// printing the line itself.
///
/// Disabled by default.
pub fn byte_offset(mut self, yes: bool) -> Self {
self.opts.byte_offset = yes;
self
}
/// If enabled, searching will print a count instead of each match.
///
/// Disabled by default.
pub fn count(mut self, yes: bool) -> Self {
self.opts.count = yes;
self
}
/// If enabled, searching will print the count of individual matches
/// instead of each match.
///
/// Disabled by default.
pub fn count_matches(mut self, yes: bool) -> Self {
self.opts.count_matches = yes;
self
}
/// If enabled, searching will print the path instead of each match.
///
/// Disabled by default.
pub fn files_with_matches(mut self, yes: bool) -> Self {
self.opts.files_with_matches = yes;
self
}
/// If enabled, searching will print the path of files that *don't* match
/// the given pattern.
///
/// Disabled by default.
pub fn files_without_matches(mut self, yes: bool) -> Self {
self.opts.files_without_matches = yes;
self
}
/// Set the end-of-line byte used by this searcher.
pub fn eol(mut self, eol: u8) -> Self {
self.opts.eol = eol;
self
}
/// If enabled, matching is inverted so that lines that *don't* match the
/// given pattern are treated as matches.
pub fn invert_match(mut self, yes: bool) -> Self {
self.opts.invert_match = yes;
self
}
/// If enabled, compute line numbers and prefix each line of output with
/// them.
pub fn line_number(mut self, yes: bool) -> Self {
self.opts.line_number = yes;
self
}
/// Limit the number of matches to the given count.
///
/// The default is None, which corresponds to no limit.
pub fn max_count(mut self, count: Option<u64>) -> Self {
self.opts.max_count = count;
self
}
/// If enabled, don't show any output and quit searching after the first
/// match is found.
pub fn quiet(mut self, yes: bool) -> Self {
self.opts.quiet = yes;
self
}
/// If enabled, search binary files as if they were text.
pub fn text(mut self, yes: bool) -> Self {
self.opts.text = yes;
self
}
#[inline(never)]
pub fn run(mut self) -> u64 {
let binary_upto = cmp::min(10_240, self.buf.len());
if !self.opts.text && is_binary(&self.buf[..binary_upto], true) {
return 0;
}
self.match_line_count = 0;
self.line_count = if self.opts.line_number { Some(0) } else { None };
// The memory map searcher uses one contiguous block of bytes, so the
// offsets given the printer are sufficient to compute the byte offset.
self.byte_offset = if self.opts.byte_offset { Some(0) } else { None };
self.match_count = if self.opts.count_matches { Some(0) } else { None };
let mut last_end = 0;
for m in self.grep.iter(self.buf) {
if self.opts.invert_match {
self.print_inverted_matches(last_end, m.start());
} else {
self.print_match(m.start(), m.end());
}
last_end = m.end();
if self.opts.terminate(self.match_line_count) {
break;
}
}
if self.opts.invert_match && !self.opts.terminate(self.match_line_count) {
let upto = self.buf.len();
self.print_inverted_matches(last_end, upto);
}
if self.opts.count && self.match_line_count > 0 {
self.printer.path_count(self.path, self.match_line_count);
} else if self.opts.count_matches
&& self.match_count.map_or(false, |c| c > 0)
{
self.printer.path_count(self.path, self.match_count.unwrap());
}
if self.opts.files_with_matches && self.match_line_count > 0 {
self.printer.path(self.path);
}
if self.opts.files_without_matches && self.match_line_count == 0 {
self.printer.path(self.path);
}
self.match_line_count
}
#[inline(always)]
fn count_individual_matches(&mut self, start: usize, end: usize) {
if let Some(ref mut count) = self.match_count {
for _ in self.grep.regex().find_iter(&self.buf[start..end]) {
*count += 1;
}
}
}
#[inline(always)]
pub fn print_match(&mut self, start: usize, end: usize) {
self.match_line_count += 1;
self.count_individual_matches(start, end);
if self.opts.skip_matches() {
return;
}
self.count_lines(start);
self.add_line(end);
self.printer.matched(
self.grep.regex(), self.path, self.buf,
start, end, self.line_count, self.byte_offset);
}
#[inline(always)]
fn print_inverted_matches(&mut self, start: usize, end: usize) {
debug_assert!(self.opts.invert_match);
let mut it = IterLines::new(self.opts.eol, start);
while let Some((s, e)) = it.next(&self.buf[..end]) {
if self.opts.terminate(self.match_line_count) {
return;
}
self.print_match(s, e);
}
}
#[inline(always)]
fn count_lines(&mut self, upto: usize) {
if let Some(ref mut line_count) = self.line_count {
*line_count += count_lines(
&self.buf[self.last_line..upto], self.opts.eol);
self.last_line = upto;
}
}
#[inline(always)]
fn add_line(&mut self, line_end: usize) {
if let Some(ref mut line_count) = self.line_count {
*line_count += 1;
self.last_line = line_end;
}
}
}
#[cfg(test)]
mod tests {
use std::path::Path;
use grep::GrepBuilder;
use printer::Printer;
use termcolor;
use super::BufferSearcher;
const SHERLOCK: &'static str = "\
For the Doctor Watsons of this world, as opposed to the Sherlock
Holmeses, success in the province of detective work must always
be, to a very large extent, the result of luck. Sherlock Holmes
can extract a clew from a wisp of straw or a flake of cigar ash;
but Doctor Watson has to have it taken out for him and dusted,
and exhibited clearly, with a label attached.\
";
fn test_path() -> &'static Path {
&Path::new("/baz.rs")
}
type TestSearcher<'a> = BufferSearcher<'a, termcolor::NoColor<Vec<u8>>>;
fn search<F: FnMut(TestSearcher) -> TestSearcher>(
pat: &str,
haystack: &str,
mut map: F,
) -> (u64, String) {
let outbuf = termcolor::NoColor::new(vec![]);
let mut pp = Printer::new(outbuf).with_filename(true);
let grep = GrepBuilder::new(pat).build().unwrap();
let count = {
let searcher = BufferSearcher::new(
&mut pp, &grep, test_path(), haystack.as_bytes());
map(searcher).run()
};
(count, String::from_utf8(pp.into_inner().into_inner()).unwrap())
}
#[test]
fn basic_search() {
let (count, out) = search("Sherlock", SHERLOCK, |s|s);
assert_eq!(2, count);
assert_eq!(out, "\
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes
");
}
#[test]
fn binary() {
let text = "Sherlock\n\x00Holmes\n";
let (count, out) = search("Sherlock|Holmes", text, |s|s);
assert_eq!(0, count);
assert_eq!(out, "");
}
#[test]
fn binary_text() {
let text = "Sherlock\n\x00Holmes\n";
let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true));
assert_eq!(2, count);
assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n");
}
#[test]
fn line_numbers() {
let (count, out) = search(
"Sherlock", SHERLOCK, |s| s.line_number(true));
assert_eq!(2, count);
assert_eq!(out, "\
/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock
/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes
");
}
#[test]
fn byte_offset() {
let (_, out) = search(
"Sherlock", SHERLOCK, |s| s.byte_offset(true));
assert_eq!(out, "\
/baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock
/baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes
");
}
#[test]
fn byte_offset_inverted() {
let (_, out) = search("Sherlock", SHERLOCK, |s| {
s.invert_match(true).byte_offset(true)
});
assert_eq!(out, "\
/baz.rs:65:Holmeses, success in the province of detective work must always
/baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:258:but Doctor Watson has to have it taken out for him and dusted,
/baz.rs:321:and exhibited clearly, with a label attached.
");
}
#[test]
fn count() {
let (count, out) = search(
"Sherlock", SHERLOCK, |s| s.count(true));
assert_eq!(2, count);
assert_eq!(out, "/baz.rs:2\n");
}
#[test]
fn count_matches() {
let (_, out) = search(
"the", SHERLOCK, |s| s.count_matches(true));
assert_eq!(out, "/baz.rs:4\n");
}
#[test]
fn files_with_matches() {
let (count, out) = search(
"Sherlock", SHERLOCK, |s| s.files_with_matches(true));
assert_eq!(1, count);
assert_eq!(out, "/baz.rs\n");
}
#[test]
fn files_without_matches() {
let (count, out) = search(
"zzzz", SHERLOCK, |s| s.files_without_matches(true));
assert_eq!(0, count);
assert_eq!(out, "/baz.rs\n");
}
#[test]
fn max_count() {
let (count, out) = search(
"Sherlock", SHERLOCK, |s| s.max_count(Some(1)));
assert_eq!(1, count);
assert_eq!(out, "\
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
");
}
#[test]
fn invert_match_max_count() {
let (count, out) = search(
"zzzz", SHERLOCK, |s| s.invert_match(true).max_count(Some(1)));
assert_eq!(1, count);
assert_eq!(out, "\
/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock
");
}
#[test]
fn invert_match() {
let (count, out) = search(
"Sherlock", SHERLOCK, |s| s.invert_match(true));
assert_eq!(4, count);
assert_eq!(out, "\
/baz.rs:Holmeses, success in the province of detective work must always
/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:but Doctor Watson has to have it taken out for him and dusted,
/baz.rs:and exhibited clearly, with a label attached.
");
}
#[test]
fn invert_match_line_numbers() {
let (count, out) = search("Sherlock", SHERLOCK, |s| {
s.invert_match(true).line_number(true)
});
assert_eq!(4, count);
assert_eq!(out, "\
/baz.rs:2:Holmeses, success in the province of detective work must always
/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash;
/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted,
/baz.rs:6:and exhibited clearly, with a label attached.
");
}
#[test]
fn invert_match_count() {
let (count, out) = search("Sherlock", SHERLOCK, |s| {
s.invert_match(true).count(true)
});
assert_eq!(4, count);
assert_eq!(out, "/baz.rs:4\n");
}
}

File diff suppressed because it is too large Load Diff

230
src/subject.rs Normal file
View File

@ -0,0 +1,230 @@
use std::io;
use std::path::Path;
use std::sync::Arc;
use ignore::{self, DirEntry};
use same_file::Handle;
/// A configuration for describing how subjects should be built.
#[derive(Clone, Debug)]
struct Config {
skip: Option<Arc<Handle>>,
strip_dot_prefix: bool,
separator: Option<u8>,
terminator: Option<u8>,
}
impl Default for Config {
fn default() -> Config {
Config {
skip: None,
strip_dot_prefix: false,
separator: None,
terminator: None,
}
}
}
/// A builder for constructing things to search over.
#[derive(Clone, Debug)]
pub struct SubjectBuilder {
config: Config,
}
impl SubjectBuilder {
/// Return a new subject builder with a default configuration.
pub fn new() -> SubjectBuilder {
SubjectBuilder { config: Config::default() }
}
/// Create a new subject from a possibly missing directory entry.
///
/// If the directory entry isn't present, then the corresponding error is
/// logged if messages have been configured. Otherwise, if the subject is
/// deemed searchable, then it is returned.
pub fn build_from_result(
&self,
result: Result<DirEntry, ignore::Error>,
) -> Option<Subject> {
match result {
Ok(dent) => self.build(dent),
Err(err) => {
message!("{}", err);
None
}
}
}
/// Create a new subject using this builder's configuration.
///
/// If a subject could not be created or should otherwise not be searched,
/// then this returns `None` after emitting any relevant log messages.
pub fn build(&self, dent: DirEntry) -> Option<Subject> {
let subj = Subject {
dent: dent,
strip_dot_prefix: self.config.strip_dot_prefix,
};
if let Some(ignore_err) = subj.dent.error() {
ignore_message!("{}", ignore_err);
}
// If this entry represents stdin, then we always search it.
if subj.dent.is_stdin() {
return Some(subj);
}
// If we're supposed to skip a particular file, then skip it.
if let Some(ref handle) = self.config.skip {
match subj.equals(handle) {
Ok(false) => {} // fallthrough
Ok(true) => {
debug!(
"ignoring {}: (probably same file as stdout)",
subj.dent.path().display()
);
return None;
}
Err(err) => {
debug!(
"ignoring {}: got error: {}",
subj.dent.path().display(), err
);
return None;
}
}
}
// If this subject has a depth of 0, then it was provided explicitly
// by an end user (or via a shell glob). In this case, we always want
// to search it if it even smells like a file (e.g., a symlink).
if subj.dent.depth() == 0 && !subj.is_dir() {
return Some(subj);
}
// At this point, we only want to search something it's explicitly a
// file. This omits symlinks. (If ripgrep was configured to follow
// symlinks, then they have already been followed by the directory
// traversal.)
if subj.is_file() {
return Some(subj);
}
// We got nothin. Emit a debug message, but only if this isn't a
// directory. Otherwise, emitting messages for directories is just
// noisy.
if !subj.is_dir() {
debug!(
"ignoring {}: failed to pass subject filter: \
file type: {:?}, metadata: {:?}",
subj.dent.path().display(),
subj.dent.file_type(),
subj.dent.metadata()
);
}
None
}
/// When provided, subjects that represent the same file as the handle
/// given will be skipped.
///
/// Typically, it is useful to pass a handle referring to stdout, such
/// that the file being written to isn't searched, which can lead to
/// an unbounded feedback mechanism.
///
/// Only one handle to skip can be provided.
pub fn skip(
&mut self,
handle: Option<Handle>,
) -> &mut SubjectBuilder {
self.config.skip = handle.map(Arc::new);
self
}
/// When enabled, if the subject's file path starts with `./` then it is
/// stripped.
///
/// This is useful when implicitly searching the current working directory.
pub fn strip_dot_prefix(&mut self, yes: bool) -> &mut SubjectBuilder {
self.config.strip_dot_prefix = yes;
self
}
}
/// A subject is a thing we want to search. Generally, a subject is either a
/// file or stdin.
#[derive(Clone, Debug)]
pub struct Subject {
dent: DirEntry,
strip_dot_prefix: bool,
}
impl Subject {
/// Return the file path corresponding to this subject.
///
/// If this subject corresponds to stdin, then a special `<stdin>` path
/// is returned instead.
pub fn path(&self) -> &Path {
if self.strip_dot_prefix && self.dent.path().starts_with("./") {
self.dent.path().strip_prefix("./").unwrap()
} else {
self.dent.path()
}
}
/// Returns true if and only if this entry corresponds to stdin.
pub fn is_stdin(&self) -> bool {
self.dent.is_stdin()
}
/// Returns true if and only if this subject points to a directory.
///
/// This works around a bug in Rust's standard library:
/// https://github.com/rust-lang/rust/issues/46484
#[cfg(windows)]
fn is_dir(&self) -> bool {
use std::os::windows::fs::MetadataExt;
use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY;
self.dent.metadata().map(|md| {
md.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0
}).unwrap_or(false)
}
/// Returns true if and only if this subject points to a directory.
#[cfg(not(windows))]
fn is_dir(&self) -> bool {
self.dent.file_type().map_or(false, |ft| ft.is_dir())
}
/// Returns true if and only if this subject points to a file.
///
/// This works around a bug in Rust's standard library:
/// https://github.com/rust-lang/rust/issues/46484
#[cfg(windows)]
fn is_file(&self) -> bool {
!self.is_dir()
}
/// Returns true if and only if this subject points to a file.
#[cfg(not(windows))]
fn is_file(&self) -> bool {
self.dent.file_type().map_or(false, |ft| ft.is_file())
}
/// Returns true if and only if this subject is believed to be equivalent
/// to the given handle. If there was a problem querying this subject for
/// information to determine equality, then that error is returned.
fn equals(&self, handle: &Handle) -> io::Result<bool> {
#[cfg(unix)]
fn never_equal(dent: &DirEntry, handle: &Handle) -> bool {
dent.ino() != Some(handle.ino())
}
#[cfg(not(unix))]
fn never_equal(_: &DirEntry, _: &Handle) -> bool {
false
}
// If we know for sure that these two things aren't equal, then avoid
// the costly extra stat call to determine equality.
if self.dent.is_stdin() || never_equal(&self.dent, handle) {
return Ok(false);
}
Handle::from_path(self.path()).map(|h| &h == handle)
}
}

View File

@ -1,413 +0,0 @@
use std::fs::File;
use std::io;
use std::path::{Path, PathBuf};
use encoding_rs::Encoding;
use grep::Grep;
use ignore::DirEntry;
use memmap::Mmap;
use termcolor::WriteColor;
// use decoder::DecodeReader;
use encoding_rs_io::DecodeReaderBytesBuilder;
use decompressor::{self, DecompressionReader};
use preprocessor::PreprocessorReader;
use pathutil::strip_prefix;
use printer::Printer;
use search_buffer::BufferSearcher;
use search_stream::{InputBuffer, Searcher};
use Result;
pub enum Work {
Stdin,
DirEntry(DirEntry),
}
pub struct WorkerBuilder {
grep: Grep,
opts: Options,
}
#[derive(Clone, Debug)]
struct Options {
mmap: bool,
encoding: Option<&'static Encoding>,
after_context: usize,
before_context: usize,
byte_offset: bool,
count: bool,
count_matches: bool,
files_with_matches: bool,
files_without_matches: bool,
eol: u8,
invert_match: bool,
line_number: bool,
max_count: Option<u64>,
no_messages: bool,
quiet: bool,
text: bool,
preprocessor: Option<PathBuf>,
search_zip_files: bool
}
impl Default for Options {
fn default() -> Options {
Options {
mmap: false,
encoding: None,
after_context: 0,
before_context: 0,
byte_offset: false,
count: false,
count_matches: false,
files_with_matches: false,
files_without_matches: false,
eol: b'\n',
invert_match: false,
line_number: false,
max_count: None,
no_messages: false,
quiet: false,
text: false,
search_zip_files: false,
preprocessor: None,
}
}
}
impl WorkerBuilder {
/// Create a new builder for a worker.
///
/// A reusable input buffer and a grep matcher are required, but there
/// are numerous additional options that can be configured on this builder.
pub fn new(grep: Grep) -> WorkerBuilder {
WorkerBuilder {
grep: grep,
opts: Options::default(),
}
}
/// Create the worker from this builder.
pub fn build(self) -> Worker {
let mut inpbuf = InputBuffer::new();
inpbuf.eol(self.opts.eol);
Worker {
grep: self.grep,
inpbuf: inpbuf,
decodebuf: vec![0; 8 * (1<<10)],
opts: self.opts,
}
}
/// The number of contextual lines to show after each match. The default
/// is zero.
pub fn after_context(mut self, count: usize) -> Self {
self.opts.after_context = count;
self
}
/// The number of contextual lines to show before each match. The default
/// is zero.
pub fn before_context(mut self, count: usize) -> Self {
self.opts.before_context = count;
self
}
/// If enabled, searching will print a 0-based offset of the
/// matching line (or the actual match if -o is specified) before
/// printing the line itself.
///
/// Disabled by default.
pub fn byte_offset(mut self, yes: bool) -> Self {
self.opts.byte_offset = yes;
self
}
/// If enabled, searching will print a count instead of each match.
///
/// Disabled by default.
pub fn count(mut self, yes: bool) -> Self {
self.opts.count = yes;
self
}
/// If enabled, searching will print the count of individual matches
/// instead of each match.
///
/// Disabled by default.
pub fn count_matches(mut self, yes: bool) -> Self {
self.opts.count_matches = yes;
self
}
/// Set the encoding to use to read each file.
///
/// If the encoding is `None` (the default), then the encoding is
/// automatically detected on a best-effort per-file basis.
pub fn encoding(mut self, enc: Option<&'static Encoding>) -> Self {
self.opts.encoding = enc;
self
}
/// If enabled, searching will print the path instead of each match.
///
/// Disabled by default.
pub fn files_with_matches(mut self, yes: bool) -> Self {
self.opts.files_with_matches = yes;
self
}
/// If enabled, searching will print the path of files without any matches.
///
/// Disabled by default.
pub fn files_without_matches(mut self, yes: bool) -> Self {
self.opts.files_without_matches = yes;
self
}
/// Set the end-of-line byte used by this searcher.
pub fn eol(mut self, eol: u8) -> Self {
self.opts.eol = eol;
self
}
/// If enabled, matching is inverted so that lines that *don't* match the
/// given pattern are treated as matches.
pub fn invert_match(mut self, yes: bool) -> Self {
self.opts.invert_match = yes;
self
}
/// If enabled, compute line numbers and prefix each line of output with
/// them.
pub fn line_number(mut self, yes: bool) -> Self {
self.opts.line_number = yes;
self
}
/// Limit the number of matches to the given count.
///
/// The default is None, which corresponds to no limit.
pub fn max_count(mut self, count: Option<u64>) -> Self {
self.opts.max_count = count;
self
}
/// If enabled, try to use memory maps for searching if possible.
pub fn mmap(mut self, yes: bool) -> Self {
self.opts.mmap = yes;
self
}
/// If enabled, error messages are suppressed.
///
/// This is disabled by default.
pub fn no_messages(mut self, yes: bool) -> Self {
self.opts.no_messages = yes;
self
}
/// If enabled, don't show any output and quit searching after the first
/// match is found.
pub fn quiet(mut self, yes: bool) -> Self {
self.opts.quiet = yes;
self
}
/// If enabled, search binary files as if they were text.
pub fn text(mut self, yes: bool) -> Self {
self.opts.text = yes;
self
}
/// If enabled, search through compressed files as well
pub fn search_zip_files(mut self, yes: bool) -> Self {
self.opts.search_zip_files = yes;
self
}
/// If non-empty, search output of preprocessor run on each file
pub fn preprocessor(mut self, command: Option<PathBuf>) -> Self {
self.opts.preprocessor = command;
self
}
}
/// Worker is responsible for executing searches on file paths, while choosing
/// streaming search or memory map search as appropriate.
pub struct Worker {
grep: Grep,
inpbuf: InputBuffer,
decodebuf: Vec<u8>,
opts: Options,
}
impl Worker {
/// Execute the worker with the given printer and work item.
///
/// A work item can either be stdin or a file path.
pub fn run<W: WriteColor>(
&mut self,
printer: &mut Printer<W>,
work: Work,
) -> u64 {
let result = match work {
Work::Stdin => {
let stdin = io::stdin();
let stdin = stdin.lock();
self.search(printer, Path::new("<stdin>"), stdin)
}
Work::DirEntry(dent) => {
let mut path = dent.path();
if self.opts.preprocessor.is_some() {
let cmd = self.opts.preprocessor.clone().unwrap();
match PreprocessorReader::from_cmd_path(cmd, path) {
Ok(reader) => self.search(printer, path, reader),
Err(err) => {
if !self.opts.no_messages {
eprintln!("{}", err);
}
return 0;
}
}
} else if self.opts.search_zip_files
&& decompressor::is_compressed(path)
{
match DecompressionReader::from_path(path) {
Some(reader) => self.search(printer, path, reader),
None => {
return 0;
}
}
} else {
let file = match File::open(path) {
Ok(file) => file,
Err(err) => {
if !self.opts.no_messages {
eprintln!("{}: {}", path.display(), err);
}
return 0;
}
};
if let Some(p) = strip_prefix("./", path) {
path = p;
}
if self.opts.mmap {
self.search_mmap(printer, path, &file)
} else {
self.search(printer, path, file)
}
}
}
};
match result {
Ok(count) => {
count
}
Err(err) => {
if !self.opts.no_messages {
eprintln!("{}", err);
}
0
}
}
}
fn search<R: io::Read, W: WriteColor>(
&mut self,
printer: &mut Printer<W>,
path: &Path,
rdr: R,
) -> Result<u64> {
let rdr = DecodeReaderBytesBuilder::new()
.encoding(self.opts.encoding)
.utf8_passthru(true)
.build_with_buffer(rdr, &mut self.decodebuf)?;
let searcher = Searcher::new(
&mut self.inpbuf, printer, &self.grep, path, rdr);
searcher
.after_context(self.opts.after_context)
.before_context(self.opts.before_context)
.byte_offset(self.opts.byte_offset)
.count(self.opts.count)
.count_matches(self.opts.count_matches)
.files_with_matches(self.opts.files_with_matches)
.files_without_matches(self.opts.files_without_matches)
.eol(self.opts.eol)
.line_number(self.opts.line_number)
.invert_match(self.opts.invert_match)
.max_count(self.opts.max_count)
.quiet(self.opts.quiet)
.text(self.opts.text)
.run()
.map_err(From::from)
}
fn search_mmap<W: WriteColor>(
&mut self,
printer: &mut Printer<W>,
path: &Path,
file: &File,
) -> Result<u64> {
if file.metadata()?.len() == 0 {
// Opening a memory map with an empty file results in an error.
// However, this may not actually be an empty file! For example,
// /proc/cpuinfo reports itself as an empty file, but it can
// produce data when it's read from. Therefore, we fall back to
// regular read calls.
return self.search(printer, path, file);
}
let mmap = match self.mmap(file)? {
None => return self.search(printer, path, file),
Some(mmap) => mmap,
};
let buf = &*mmap;
if buf.len() >= 3 && Encoding::for_bom(buf).is_some() {
// If we have a UTF-16 bom in our memory map, then we need to fall
// back to the stream reader, which will do transcoding.
return self.search(printer, path, file);
}
let searcher = BufferSearcher::new(printer, &self.grep, path, buf);
Ok(searcher
.byte_offset(self.opts.byte_offset)
.count(self.opts.count)
.count_matches(self.opts.count_matches)
.files_with_matches(self.opts.files_with_matches)
.files_without_matches(self.opts.files_without_matches)
.eol(self.opts.eol)
.line_number(self.opts.line_number)
.invert_match(self.opts.invert_match)
.max_count(self.opts.max_count)
.quiet(self.opts.quiet)
.text(self.opts.text)
.run())
}
#[cfg(not(unix))]
fn mmap(&self, file: &File) -> Result<Option<Mmap>> {
Ok(Some(mmap_readonly(file)?))
}
#[cfg(unix)]
fn mmap(&self, file: &File) -> Result<Option<Mmap>> {
use libc::{EOVERFLOW, ENODEV, ENOMEM};
let err = match mmap_readonly(file) {
Ok(mmap) => return Ok(Some(mmap)),
Err(err) => err,
};
let code = err.raw_os_error();
if code == Some(EOVERFLOW)
|| code == Some(ENODEV)
|| code == Some(ENOMEM)
{
return Ok(None);
}
Err(From::from(err))
}
}
fn mmap_readonly(file: &File) -> io::Result<Mmap> {
unsafe { Mmap::map(file) }
}

View File

@ -91,8 +91,8 @@ be, to a very large extent, the result of luck. Sherlock Holmes
sherlock!(dir, "Sherlock", ".", |wd: WorkDir, mut cmd| {
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
@ -148,19 +148,19 @@ sherlock!(with_heading_default, "Sherlock", ".",
cmd.arg("-j1").arg("--heading");
let lines: String = wd.stdout(&mut cmd);
let expected1 = "\
foo
./foo
Sherlock Holmes lives on Baker Street.
sherlock
./sherlock
For the Doctor Watsons of this world, as opposed to the Sherlock
be, to a very large extent, the result of luck. Sherlock Holmes
";
let expected2 = "\
sherlock
./sherlock
For the Doctor Watsons of this world, as opposed to the Sherlock
be, to a very large extent, the result of luck. Sherlock Holmes
foo
./foo
Sherlock Holmes lives on Baker Street.
";
if lines != expected1 {
@ -289,14 +289,14 @@ sherlock!(file_types, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file.rs", "Sherlock");
cmd.arg("-t").arg("rust");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file.rs:Sherlock\n");
assert_eq!(lines, "./file.rs:Sherlock\n");
});
sherlock!(file_types_all, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file.py", "Sherlock");
cmd.arg("-t").arg("all");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file.py:Sherlock\n");
assert_eq!(lines, "./file.py:Sherlock\n");
});
sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
@ -305,7 +305,7 @@ sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file.rs", "Sherlock");
cmd.arg("-T").arg("rust");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file.py:Sherlock\n");
assert_eq!(lines, "./file.py:Sherlock\n");
});
sherlock!(file_types_negate_all, "Sherlock", ".",
@ -315,8 +315,8 @@ sherlock!(file_types_negate_all, "Sherlock", ".",
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
");
});
@ -333,18 +333,21 @@ sherlock!(file_type_add, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file.wat", "Sherlock");
cmd.arg("--type-add").arg("wat:*.wat").arg("-t").arg("wat");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file.wat:Sherlock\n");
assert_eq!(lines, "./file.wat:Sherlock\n");
});
sherlock!(file_type_add_compose, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
sherlock!(file_type_add_compose, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
wd.create("file.py", "Sherlock");
wd.create("file.rs", "Sherlock");
wd.create("file.wat", "Sherlock");
cmd.arg("--type-add").arg("wat:*.wat");
cmd.arg("--type-add").arg("combo:include:wat,py").arg("-t").arg("combo");
let lines: String = wd.stdout(&mut cmd);
println!("{}", lines);
assert_eq!(sort_lines(&lines), "file.py:Sherlock\nfile.wat:Sherlock\n");
assert_eq!(
sort_lines(&lines),
"./file.py:Sherlock\n./file.wat:Sherlock\n"
);
});
sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
@ -352,7 +355,7 @@ sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file.rs", "Sherlock");
cmd.arg("-g").arg("*.rs");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file.rs:Sherlock\n");
assert_eq!(lines, "./file.rs:Sherlock\n");
});
sherlock!(glob_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
@ -361,14 +364,14 @@ sherlock!(glob_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file.rs", "Sherlock");
cmd.arg("-g").arg("!*.rs");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file.py:Sherlock\n");
assert_eq!(lines, "./file.py:Sherlock\n");
});
sherlock!(iglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file.HTML", "Sherlock");
cmd.arg("--iglob").arg("*.html");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file.HTML:Sherlock\n");
assert_eq!(lines, "./file.HTML:Sherlock\n");
});
sherlock!(csglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
@ -376,15 +379,16 @@ sherlock!(csglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("file2.html", "Sherlock");
cmd.arg("--glob").arg("*.html");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file2.html:Sherlock\n");
assert_eq!(lines, "./file2.html:Sherlock\n");
});
sherlock!(byte_offset_only_matching, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
sherlock!(byte_offset_only_matching, "Sherlock", ".",
|wd: WorkDir, mut cmd: Command| {
cmd.arg("-b").arg("-o");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:56:Sherlock
sherlock:177:Sherlock
./sherlock:56:Sherlock
./sherlock:177:Sherlock
";
assert_eq!(lines, expected);
});
@ -392,35 +396,35 @@ sherlock:177:Sherlock
sherlock!(count, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--count");
let lines: String = wd.stdout(&mut cmd);
let expected = "sherlock:2\n";
let expected = "./sherlock:2\n";
assert_eq!(lines, expected);
});
sherlock!(count_matches, "the", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--count-matches");
let lines: String = wd.stdout(&mut cmd);
let expected = "sherlock:4\n";
let expected = "./sherlock:4\n";
assert_eq!(lines, expected);
});
sherlock!(count_matches_inverted, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--count-matches").arg("--invert-match");
let lines: String = wd.stdout(&mut cmd);
let expected = "sherlock:4\n";
let expected = "./sherlock:4\n";
assert_eq!(lines, expected);
});
sherlock!(count_matches_via_only, "the", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--count").arg("--only-matching");
let lines: String = wd.stdout(&mut cmd);
let expected = "sherlock:4\n";
let expected = "./sherlock:4\n";
assert_eq!(lines, expected);
});
sherlock!(files_with_matches, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--files-with-matches");
let lines: String = wd.stdout(&mut cmd);
let expected = "sherlock\n";
let expected = "./sherlock\n";
assert_eq!(lines, expected);
});
@ -429,7 +433,7 @@ sherlock!(files_without_matches, "Sherlock", ".",
wd.create("file.py", "foo");
cmd.arg("--files-without-match");
let lines: String = wd.stdout(&mut cmd);
let expected = "file.py\n";
let expected = "./file.py\n";
assert_eq!(lines, expected);
});
@ -527,7 +531,7 @@ sherlock!(max_filesize_parse_no_suffix, "Sherlock", ".",
cmd.arg("--max-filesize").arg("50").arg("--files");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
foo
./foo
";
assert_eq!(lines, expected);
});
@ -541,7 +545,7 @@ sherlock!(max_filesize_parse_k_suffix, "Sherlock", ".",
cmd.arg("--max-filesize").arg("4K").arg("--files");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
foo
./foo
";
assert_eq!(lines, expected);
});
@ -555,7 +559,7 @@ sherlock!(max_filesize_parse_m_suffix, "Sherlock", ".",
cmd.arg("--max-filesize").arg("1M").arg("--files");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
foo
./foo
";
assert_eq!(lines, expected);
});
@ -583,8 +587,8 @@ sherlock!(no_ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--hidden");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
@ -610,8 +614,8 @@ sherlock!(no_ignore, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--no-ignore");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
@ -653,8 +657,8 @@ sherlock!(ignore_git_parent_stop, "Sherlock", ".",
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
@ -686,8 +690,8 @@ sherlock!(ignore_git_parent_stop_file, "Sherlock", ".",
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
@ -740,8 +744,8 @@ sherlock!(no_parent_ignore_git, "Sherlock", ".",
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
@ -771,8 +775,8 @@ sherlock!(symlink_follow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
let lines: String = wd.stdout(&mut cmd);
let expected = "\
baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, path(expected));
});
@ -783,8 +787,8 @@ sherlock!(unrestricted1, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
@ -796,8 +800,8 @@ sherlock!(unrestricted2, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
let lines: String = wd.stdout(&mut cmd);
let expected = "\
.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
@ -807,7 +811,7 @@ sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("-uuu");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file:foo\x00bar\nfile:foo\x00baz\n");
assert_eq!(lines, "./file:foo\x00bar\n./file:foo\x00baz\n");
});
sherlock!(vimgrep, "Sherlock|Watson", ".", |wd: WorkDir, mut cmd: Command| {
@ -815,10 +819,10 @@ sherlock!(vimgrep, "Sherlock|Watson", ".", |wd: WorkDir, mut cmd: Command| {
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes
sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted,
./sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted,
";
assert_eq!(lines, expected);
});
@ -829,10 +833,10 @@ sherlock!(vimgrep_no_line, "Sherlock|Watson", ".",
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:16:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:57:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:49:be, to a very large extent, the result of luck. Sherlock Holmes
sherlock:12:but Doctor Watson has to have it taken out for him and dusted,
./sherlock:16:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:57:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:49:be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock:12:but Doctor Watson has to have it taken out for him and dusted,
";
assert_eq!(lines, expected);
});
@ -843,10 +847,10 @@ sherlock!(vimgrep_no_line_no_column, "Sherlock|Watson", ".",
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
sherlock:but Doctor Watson has to have it taken out for him and dusted,
./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock:but Doctor Watson has to have it taken out for him and dusted,
";
assert_eq!(lines, expected);
});
@ -869,12 +873,12 @@ clean!(regression_25, "test", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("src/llvm/foo", "test");
let lines: String = wd.stdout(&mut cmd);
let expected = path("src/llvm/foo:test\n");
let expected = path("./src/llvm/foo:test\n");
assert_eq!(lines, expected);
cmd.current_dir(wd.path().join("src"));
let lines: String = wd.stdout(&mut cmd);
let expected = path("llvm/foo:test\n");
let expected = path("./llvm/foo:test\n");
assert_eq!(lines, expected);
});
@ -885,7 +889,7 @@ clean!(regression_30, "test", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("vendor/manifest", "test");
let lines: String = wd.stdout(&mut cmd);
let expected = path("vendor/manifest:test\n");
let expected = path("./vendor/manifest:test\n");
assert_eq!(lines, expected);
});
@ -927,7 +931,7 @@ clean!(regression_67, "test", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("dir/bar", "test");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, path("dir/bar:test\n"));
assert_eq!(lines, path("./dir/bar:test\n"));
});
// See: https://github.com/BurntSushi/ripgrep/issues/87
@ -945,7 +949,7 @@ clean!(regression_90, "test", ".", |wd: WorkDir, mut cmd: Command| {
wd.create(".foo", "test");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, ".foo:test\n");
assert_eq!(lines, "./.foo:test\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/93
@ -954,7 +958,7 @@ clean!(regression_93, r"(\d{1,3}\.){3}\d{1,3}", ".",
wd.create("foo", "192.168.1.1");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:192.168.1.1\n");
assert_eq!(lines, "./foo:192.168.1.1\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/99
@ -966,7 +970,10 @@ clean!(regression_99, "test", ".",
cmd.arg("-j1").arg("--heading");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(sort_lines(&lines), sort_lines("bar\ntest\n\nfoo1\ntest\n"));
assert_eq!(
sort_lines(&lines),
sort_lines("./bar\ntest\n\n./foo1\ntest\n")
);
});
// See: https://github.com/BurntSushi/ripgrep/issues/105
@ -975,7 +982,7 @@ clean!(regression_105_part1, "test", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--vimgrep");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:1:3:zztest\n");
assert_eq!(lines, "./foo:1:3:zztest\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/105
@ -984,7 +991,7 @@ clean!(regression_105_part2, "test", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--column");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:1:3:zztest\n");
assert_eq!(lines, "./foo:1:3:zztest\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/127
@ -1009,8 +1016,8 @@ clean!(regression_127, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
let lines: String = wd.stdout(&mut cmd);
let expected = format!("\
{path}:For the Doctor Watsons of this world, as opposed to the Sherlock
{path}:be, to a very large extent, the result of luck. Sherlock Holmes
./{path}:For the Doctor Watsons of this world, as opposed to the Sherlock
./{path}:be, to a very large extent, the result of luck. Sherlock Holmes
", path=path("foo/watson"));
assert_eq!(lines, expected);
});
@ -1021,7 +1028,7 @@ clean!(regression_128, "x", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("-n");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:5:x\n");
assert_eq!(lines, "./foo:5:x\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/131
@ -1049,8 +1056,8 @@ sherlock!(regression_137, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| {
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
sym1:For the Doctor Watsons of this world, as opposed to the Sherlock
sym1:be, to a very large extent, the result of luck. Sherlock Holmes
sym2:For the Doctor Watsons of this world, as opposed to the Sherlock
@ -1094,11 +1101,11 @@ clean!(regression_184, "test", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("foo/bar/baz", "test");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, format!("{}:test\n", path("foo/bar/baz")));
assert_eq!(lines, format!("./{}:test\n", path("foo/bar/baz")));
cmd.current_dir(wd.path().join("./foo/bar"));
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "baz:test\n");
assert_eq!(lines, "./baz:test\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/199
@ -1107,7 +1114,7 @@ clean!(regression_199, r"\btest\b", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--smart-case");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:tEsT\n");
assert_eq!(lines, "./foo:tEsT\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/206
@ -1117,7 +1124,7 @@ clean!(regression_206, "test", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("-g").arg("*.txt");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, format!("{}:test\n", path("foo/bar.txt")));
assert_eq!(lines, format!("./{}:test\n", path("foo/bar.txt")));
});
// See: https://github.com/BurntSushi/ripgrep/issues/210
@ -1161,7 +1168,7 @@ clean!(regression_251, "привет", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("-i");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:привет\nfoo:Привет\nfoo:ПрИвЕт\n");
assert_eq!(lines, "./foo:привет\n./foo:Привет\n./foo:ПрИвЕт\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/256
@ -1205,7 +1212,7 @@ clean!(regression_405, "test", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("-g").arg("!/foo/**");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, format!("{}:test\n", path("bar/foo/file2.txt")));
assert_eq!(lines, format!("./{}:test\n", path("bar/foo/file2.txt")));
});
// See: https://github.com/BurntSushi/ripgrep/issues/428
@ -1220,7 +1227,7 @@ clean!(regression_428_color_context_path, "foo", ".",
let expected = format!(
"{colored_path}:foo\n{colored_path}-bar\n",
colored_path=format!(
"\x1b\x5b\x30\x6d\x1b\x5b\x33\x35\x6d{path}\x1b\x5b\x30\x6d",
"\x1b\x5b\x30\x6d\x1b\x5b\x33\x35\x6d./{path}\x1b\x5b\x30\x6d",
path=path("sherlock")));
assert_eq!(lines, expected);
});
@ -1234,16 +1241,17 @@ clean!(regression_428_unrecognized_style, "Sherlok", ".",
let output = cmd.output().unwrap();
let err = String::from_utf8_lossy(&output.stderr);
let expected = "\
Unrecognized style attribute ''. Choose from: nobold, bold, nointense, intense, \
unrecognized style attribute ''. Choose from: nobold, bold, nointense, intense, \
nounderline, underline.
";
assert_eq!(err, expected);
});
// See: https://github.com/BurntSushi/ripgrep/issues/493
clean!(regression_493, " 're ", "input.txt", |wd: WorkDir, mut cmd: Command| {
clean!(regression_493, r"\b 're \b", "input.txt",
|wd: WorkDir, mut cmd: Command| {
wd.create("input.txt", "peshwaship 're seminomata");
cmd.arg("-o").arg("-w");
cmd.arg("-o");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, " 're \n");
@ -1255,8 +1263,8 @@ sherlock!(regression_553_switch, "sherlock", ".",
cmd.arg("-i");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
@ -1264,8 +1272,8 @@ sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
cmd.arg("-i");
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
@ -1305,12 +1313,9 @@ clean!(regression_599, "^$", "input.txt", |wd: WorkDir, mut cmd: Command| {
]);
let lines: String = wd.stdout(&mut cmd);
// Technically, the expected output should only be two lines, but:
// https://github.com/BurntSushi/ripgrep/issues/441
let expected = "\
1:
2:
4:
";
assert_eq!(expected, lines);
});
@ -1326,7 +1331,7 @@ clean!(regression_807, "test", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--hidden");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, format!("{}:test\n", path(".a/c/file")));
assert_eq!(lines, format!("./{}:test\n", path(".a/c/file")));
});
// See: https://github.com/BurntSushi/ripgrep/issues/900
@ -1343,7 +1348,7 @@ clean!(feature_1_sjis, "Шерлок Холмс", ".", |wd: WorkDir, mut cmd: Co
cmd.arg("-Esjis");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:Шерлок Холмс\n");
assert_eq!(lines, "./foo:Шерлок Холмс\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/1
@ -1354,7 +1359,7 @@ clean!(feature_1_utf16_auto, "Шерлок Холмс", ".",
wd.create_bytes("foo", &sherlock[..]);
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:Шерлок Холмс\n");
assert_eq!(lines, "./foo:Шерлок Холмс\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/1
@ -1366,7 +1371,7 @@ clean!(feature_1_utf16_explicit, "Шерлок Холмс", ".",
cmd.arg("-Eutf-16le");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:Шерлок Холмс\n");
assert_eq!(lines, "./foo:Шерлок Холмс\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/1
@ -1378,7 +1383,7 @@ clean!(feature_1_eucjp, "Шерлок Холмс", ".",
cmd.arg("-Eeuc-jp");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:Шерлок Холмс\n");
assert_eq!(lines, "./foo:Шерлок Холмс\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/1
@ -1413,8 +1418,8 @@ sherlock!(feature_7_dash, "-f-", ".", |wd: WorkDir, mut cmd: Command| {
let output = wd.pipe(&mut cmd, "Sherlock");
let lines = String::from_utf8_lossy(&output.stdout);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
@ -1439,8 +1444,8 @@ sherlock!(feature_34_only_matching, "Sherlock", ".",
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:Sherlock
sherlock:Sherlock
./sherlock:Sherlock
./sherlock:Sherlock
";
assert_eq!(lines, expected);
});
@ -1452,8 +1457,8 @@ sherlock!(feature_34_only_matching_line_column, "Sherlock", ".",
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:1:57:Sherlock
sherlock:3:49:Sherlock
./sherlock:1:57:Sherlock
./sherlock:3:49:Sherlock
";
assert_eq!(lines, expected);
});
@ -1476,15 +1481,15 @@ sherlock!(feature_45_relative_cwd, "test", ".",
// First, get a baseline without applying ignore rules.
let lines = paths_from_stdout(wd.stdout(&mut cmd));
assert_eq!(lines, paths(&[
"bar/test", "baz/bar/test", "baz/baz/bar/test", "baz/foo",
"baz/test", "foo", "test",
"./bar/test", "./baz/bar/test", "./baz/baz/bar/test", "./baz/foo",
"./baz/test", "./foo", "./test",
]));
// Now try again with the ignore file activated.
cmd.arg("--ignore-file").arg(".not-an-ignore");
let lines = paths_from_stdout(wd.stdout(&mut cmd));
assert_eq!(lines, paths(&[
"baz/bar/test", "baz/baz/bar/test", "baz/test", "test",
"./baz/bar/test", "./baz/baz/bar/test", "./baz/test", "./test",
]));
// Now do it again, but inside the baz directory.
@ -1496,7 +1501,7 @@ sherlock!(feature_45_relative_cwd, "test", ".",
cmd.arg("test").arg(".").arg("--ignore-file").arg("../.not-an-ignore");
cmd.current_dir(wd.path().join("baz"));
let lines = paths_from_stdout(wd.stdout(&mut cmd));
assert_eq!(lines, paths(&["baz/bar/test", "test"]));
assert_eq!(lines, paths(&["./baz/bar/test", "./test"]));
});
// See: https://github.com/BurntSushi/ripgrep/issues/45
@ -1509,7 +1514,7 @@ sherlock!(feature_45_precedence_with_others, "test", ".",
cmd.arg("--ignore-file").arg(".not-an-ignore");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "imp.log:test\n");
assert_eq!(lines, "./imp.log:test\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/45
@ -1523,7 +1528,7 @@ sherlock!(feature_45_precedence_internal, "test", ".",
cmd.arg("--ignore-file").arg(".not-an-ignore1");
cmd.arg("--ignore-file").arg(".not-an-ignore2");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "imp.log:test\n");
assert_eq!(lines, "./imp.log:test\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/68
@ -1535,7 +1540,7 @@ clean!(feature_68_no_ignore_vcs, "test", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--no-ignore-vcs");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:test\n");
assert_eq!(lines, "./foo:test\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/70
@ -1545,8 +1550,8 @@ sherlock!(feature_70_smart_case, "sherlock", ".",
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
";
assert_eq!(lines, expected);
});
@ -1557,7 +1562,7 @@ sherlock!(feature_89_files_with_matches, "Sherlock", ".",
cmd.arg("--null").arg("--files-with-matches");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "sherlock\x00");
assert_eq!(lines, "./sherlock\x00");
});
// See: https://github.com/BurntSushi/ripgrep/issues/89
@ -1567,7 +1572,7 @@ sherlock!(feature_89_files_without_matches, "Sherlock", ".",
cmd.arg("--null").arg("--files-without-match");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "file.py\x00");
assert_eq!(lines, "./file.py\x00");
});
// See: https://github.com/BurntSushi/ripgrep/issues/89
@ -1576,7 +1581,7 @@ sherlock!(feature_89_count, "Sherlock", ".",
cmd.arg("--null").arg("--count");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "sherlock\x002\n");
assert_eq!(lines, "./sherlock\x002\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/89
@ -1585,7 +1590,7 @@ sherlock!(feature_89_files, "NADA", ".",
cmd.arg("--null").arg("--files");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "sherlock\x00");
assert_eq!(lines, "./sherlock\x00");
});
// See: https://github.com/BurntSushi/ripgrep/issues/89
@ -1595,10 +1600,10 @@ sherlock!(feature_89_match, "Sherlock", ".",
let lines: String = wd.stdout(&mut cmd);
let expected = "\
sherlock\x00For the Doctor Watsons of this world, as opposed to the Sherlock
sherlock\x00Holmeses, success in the province of detective work must always
sherlock\x00be, to a very large extent, the result of luck. Sherlock Holmes
sherlock\x00can extract a clew from a wisp of straw or a flake of cigar ash;
./sherlock\x00For the Doctor Watsons of this world, as opposed to the Sherlock
./sherlock\x00Holmeses, success in the province of detective work must always
./sherlock\x00be, to a very large extent, the result of luck. Sherlock Holmes
./sherlock\x00can extract a clew from a wisp of straw or a flake of cigar ash;
";
assert_eq!(lines, expected);
});
@ -1613,7 +1618,7 @@ clean!(feature_109_max_depth, "far", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--maxdepth").arg("2");
let lines: String = wd.stdout(&mut cmd);
let expected = path("one/pass:far\n");
let expected = path("./one/pass:far\n");
assert_eq!(lines, expected);
});
@ -1639,7 +1644,7 @@ clean!(feature_129_matches, "test", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("-M26");
let lines: String = wd.stdout(&mut cmd);
let expected = "foo:test\nfoo:[Omitted long line with 2 matches]\n";
let expected = "./foo:test\n./foo:[Omitted long matching line]\n";
assert_eq!(lines, expected);
});
@ -1649,7 +1654,7 @@ clean!(feature_129_context, "test", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("-M20").arg("-C1");
let lines: String = wd.stdout(&mut cmd);
let expected = "foo:test\nfoo-[Omitted long context line]\n";
let expected = "./foo:test\n./foo-[Omitted long context line]\n";
assert_eq!(lines, expected);
});
@ -1659,7 +1664,7 @@ clean!(feature_129_replace, "test", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("-M26").arg("-rfoo");
let lines: String = wd.stdout(&mut cmd);
let expected = "foo:foo\nfoo:[Omitted long line with 2 replacements]\n";
let expected = "./foo:foo\n./foo:[Omitted long line with 2 matches]\n";
assert_eq!(lines, expected);
});
@ -1668,7 +1673,7 @@ clean!(feature_159_works, "test", ".", |wd: WorkDir, mut cmd: Command| {
wd.create("foo", "test\ntest");
cmd.arg("-m1");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:test\n");
assert_eq!(lines, "./foo:test\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/159
@ -1684,7 +1689,7 @@ clean!(feature_243_column_line, "test", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--column");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "foo:1:1:test\n");
assert_eq!(lines, "./foo:1:1:test\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/263
@ -1696,7 +1701,7 @@ clean!(feature_263_sort_files, "test", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--sort-files");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "abc:test\nbar:test\nfoo:test\nzoo:test\n");
assert_eq!(lines, "./abc:test\n./bar:test\n./foo:test\n./zoo:test\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/275
@ -1706,7 +1711,7 @@ clean!(feature_275_pathsep, "test", ".", |wd: WorkDir, mut cmd: Command| {
cmd.arg("--path-separator").arg("Z");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "fooZbar:test\n");
assert_eq!(lines, ".ZfooZbar:test\n");
});
// See: https://github.com/BurntSushi/ripgrep/issues/362
@ -1746,7 +1751,7 @@ sherlock!(feature_419_zero_as_shortcut_for_null, "Sherlock", ".",
cmd.arg("-0").arg("--count");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "sherlock\x002\n");
assert_eq!(lines, "./sherlock\x002\n");
});
#[test]
@ -1932,59 +1937,52 @@ fn feature_411_parallel_search_stats() {
assert_eq!(lines.contains("seconds"), true);
}
sherlock!(feature_411_ignore_stats_1, |wd: WorkDir, mut cmd: Command| {
cmd.arg("--files-with-matches");
cmd.arg("--stats");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines.contains("seconds"), false);
});
sherlock!(feature_411_ignore_stats_2, |wd: WorkDir, mut cmd: Command| {
cmd.arg("--files-without-match");
cmd.arg("--stats");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines.contains("seconds"), false);
});
#[test]
fn feature_740_passthru() {
let wd = WorkDir::new("feature_740");
wd.create("file", "\nfoo\nbar\nfoobar\n\nbaz\n");
wd.create("patterns", "foo\n\nbar\n");
wd.create("patterns", "foo\nbar\n");
// We can't assume that the way colour specs are translated to ANSI
// sequences will remain stable, and --replace doesn't currently work with
// pass-through, so for now we don't actually test the match sub-strings
let common_args = &["-n", "--passthru"];
let expected = "\
1:
let foo_expected = "\
1-
2:foo
3:bar
3-bar
4:foobar
5:
6:baz
5-
6-baz
";
// With single pattern
let mut cmd = wd.command();
cmd.args(common_args).arg("foo").arg("file");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, expected);
assert_eq!(lines, foo_expected);
let foo_bar_expected = "\
1-
2:foo
3:bar
4:foobar
5-
6-baz
";
// With multiple -e patterns
let mut cmd = wd.command();
cmd.args(common_args)
.arg("-e").arg("foo").arg("-e").arg("bar").arg("file");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, expected);
assert_eq!(lines, foo_bar_expected);
// With multiple -f patterns
let mut cmd = wd.command();
cmd.args(common_args).arg("-f").arg("patterns").arg("file");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, expected);
assert_eq!(lines, foo_bar_expected);
// -c should override
let mut cmd = wd.command();
@ -1992,15 +1990,35 @@ fn feature_740_passthru() {
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, "2\n");
let only_foo_expected = "\
1-
2:foo
3-bar
4:foo
5-
6-baz
";
// -o should conflict
let mut cmd = wd.command();
cmd.args(common_args).arg("-o").arg("foo").arg("file");
wd.assert_err(&mut cmd);
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, only_foo_expected);
let replace_foo_expected = "\
1-
2:wat
3-bar
4:watbar
5-
6-baz
";
// -r should conflict
let mut cmd = wd.command();
cmd.args(common_args).arg("-r").arg("$0").arg("foo").arg("file");
wd.assert_err(&mut cmd);
cmd.args(common_args).arg("-r").arg("wat").arg("foo").arg("file");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, replace_foo_expected);
}
#[test]
@ -2081,7 +2099,7 @@ fn regression_270() {
let mut cmd = wd.command();
cmd.arg("-e").arg("-test").arg("./");
let lines: String = wd.stdout(&mut cmd);
assert_eq!(lines, path("foo:-test\n"));
assert_eq!(lines, path("./foo:-test\n"));
}
// See: https://github.com/BurntSushi/ripgrep/issues/391
@ -2232,8 +2250,8 @@ fn regression_693_context_option_in_contextless_mode() {
let lines: String = wd.stdout(&mut cmd);
let expected = "\
bar:1
foo:1
./bar:1
./foo:1
";
assert_eq!(lines, expected);
}