diff --git a/.travis.yml b/.travis.yml index 5fc57d60..d47249a0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,6 +17,8 @@ addons: # Needed for testing decompression search. - xz-utils - liblz4-tool + # For building MUSL static builds on Linux. + - musl-tools matrix: fast_finish: true include: diff --git a/Cargo.lock b/Cargo.lock index ff324797..57d2975b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -139,12 +139,16 @@ dependencies = [ [[package]] name = "grep" -version = "0.1.9" +version = "0.2.0" dependencies = [ - "log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", + "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "grep-matcher 0.0.1", + "grep-pcre2 0.0.1", + "grep-printer 0.0.1", + "grep-regex 0.0.1", + "grep-searcher 0.0.1", + "termcolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -204,16 +208,6 @@ dependencies = [ "regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "grep2" -version = "0.1.8" -dependencies = [ - "grep-matcher 0.0.1", - "grep-printer 0.0.1", - "grep-regex 0.0.1", - "grep-searcher 0.0.1", -] - [[package]] name = "ignore" version = "0.4.3" @@ -227,7 +221,7 @@ dependencies = [ "same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", - "walkdir 2.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -378,21 +372,16 @@ name = "ripgrep" version = "0.9.0" dependencies = [ "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", - "bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding_rs_io 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "globset 0.4.1", - "grep 0.1.9", + "grep 0.2.0", "ignore 0.4.3", "lazy_static 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)", "termcolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -520,7 +509,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "walkdir" -version = "2.1.4" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -608,7 +597,7 @@ dependencies = [ "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" -"checksum walkdir 2.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "63636bd0eb3d00ccb8b9036381b526efac53caf112b7783b730ab3f8e44da369" +"checksum walkdir 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f1b768ba943161a9226ccd59b26bcd901e5d60e6061f4fcad3034784e0c7372b" "checksum winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "773ef9dcc5f24b7d850d0ff101e542ff24c3b090a9768e03ff889fdef41f00fd" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index ec6ee1b5..74648607 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,6 @@ path = "tests/tests.rs" members = [ "globset", "grep", - "grep2", "grep-matcher", "grep-pcre2", "grep-printer", @@ -46,20 +45,15 @@ members = [ [dependencies] atty = "0.2.11" -bytecount = "0.3.2" -encoding_rs = "0.8" -encoding_rs_io = "0.1" globset = { version = "0.4.0", path = "globset" } -grep = { version = "0.1.8", path = "grep" } +grep = { version = "0.2.0", path = "grep" } ignore = { version = "0.4.0", path = "ignore" } lazy_static = "1" -libc = "0.2" log = "0.4" -memchr = "2" -memmap = "0.6" num_cpus = "1" regex = "1" same-file = "1" +serde_json = "1" termcolor = "1" [dependencies.clap] @@ -69,7 +63,7 @@ features = ["suggestions", "color"] [target.'cfg(windows)'.dependencies.winapi] version = "0.3" -features = ["std", "winnt"] +features = ["std", "fileapi", "winnt"] [build-dependencies] lazy_static = "1" @@ -80,15 +74,9 @@ default-features = false features = ["suggestions", "color"] [features] -avx-accel = [ - "bytecount/avx-accel", - "grep2/avx-accel", -] -simd-accel = [ - "bytecount/simd-accel", - "encoding_rs/simd-accel", - "grep2/simd-accel", -] +avx-accel = ["grep/avx-accel"] +simd-accel = ["grep/simd-accel"] +pcre2 = ["grep/pcre2"] [profile.release] -debug = true +debug = 1 diff --git a/FAQ.md b/FAQ.md index 868c4723..ff0bc5e5 100644 --- a/FAQ.md +++ b/FAQ.md @@ -157,13 +157,37 @@ tool. With that said, How do I use lookaround and/or backreferences? -This isn't currently possible. ripgrep uses finite automata to implement -regular expression search, and in turn, guarantees linear time searching on all -inputs. It is difficult to efficiently support lookaround and backreferences in -finite automata engines, so ripgrep does not provide these features. +ripgrep's default regex engine does not support lookaround or backreferences. +This is primarily because the default regex engine is implemented using finite +state machines in order to guarantee a linear worst case time complexity on all +inputs. Backreferences are not possible to implement in this paradigm, and +lookaround appears difficult to do efficiently. -If a production quality regular expression engine with these features is ever -written in Rust, then it is possible ripgrep will provide it as an opt-in +However, ripgrep optionally supports using PCRE2 as the regex engine instead of +the default one based on finite state machines. You can enable PCRE2 with the +`-P/--pcre2` flag. For example, in the root of the ripgrep repo, you can easily +find all palindromes: + +``` +$ rg -P '(\w{10})\1' +tests/misc.rs +483: cmd.arg("--max-filesize").arg("44444444444444444444"); +globset/src/glob.rs +1206: matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); +``` + +If your version of ripgrep doesn't support PCRE2, then you'll get an error +message when you try to use the `-P/--pcre2` flag: + +``` +$ rg -P '(\w{10})\1' +PCRE2 is not available in this build of ripgrep +``` + +Most of the releases distributed by the ripgrep project here on GitHub will +come bundled with PCRE2 enabled. If you installed ripgrep through a different +means (like your system's package manager), then please reach out to the +maintainer of that package to see whether it's possible to enable the PCRE2 feature. diff --git a/README.md b/README.md index 2b7ada60..351de389 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ available for [every release](https://github.com/BurntSushi/ripgrep/releases). ripgrep is similar to other popular search tools like The Silver Searcher, ack and grep. -[![Linux build status](https://travis-ci.org/BurntSushi/ripgrep.svg?branch=master)](https://travis-ci.org/BurntSushi/ripgrep) +[![Linux build status](https://travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) [![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) [![Crates.io](https://img.shields.io/crates/v/ripgrep.svg)](https://crates.io/crates/ripgrep) @@ -85,14 +85,16 @@ increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep. ### Why should I use ripgrep? -* It can replace many use cases served by both The Silver Searcher and GNU grep - because it is generally faster than both. (See [the FAQ](FAQ.md#posix4ever) - for more details on whether ripgrep can truly replace grep.) -* Like The Silver Searcher, ripgrep defaults to recursive directory search - and won't search files ignored by your `.gitignore` files. It also ignores - hidden and binary files by default. ripgrep also implements full support - for `.gitignore`, whereas there are many bugs related to that functionality - in The Silver Searcher. +* It can replace many use cases served by other search tools + because it contains most of their features and is generally faster. (See + [the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly + replace grep.) +* Like other tools specialized to code search, ripgrep defaults to recursive + directory search and won't search files ignored by your `.gitignore` files. + It also ignores hidden and binary files by default. ripgrep also implements + full support for `.gitignore`, whereas there are many bugs related to that + functionality in other code search tools claiming to provide the same + functionality. * ripgrep can search specific types of files. For example, `rg -tpy foo` limits your search to Python files and `rg -Tjs foo` excludes Javascript files from your search. ripgrep can be taught about new file types with @@ -117,22 +119,24 @@ bugs, and Unicode support. ### Why shouldn't I use ripgrep? -I'd like to try to convince you why you *shouldn't* use ripgrep. This should -give you a glimpse at some important downsides or missing features of -ripgrep. +Despite initially not wanting to add every feature under the sun to ripgrep, +over time, ripgrep has grown support for most features found in other file +searching tools. This includes searching for results spanning across multiple +lines, and opt-in support for PCRE2, which provides look-around and +backreference support. -* ripgrep uses a regex engine based on finite automata, so if you want fancy - regex features such as backreferences or lookaround, ripgrep won't provide - them to you. ripgrep does support lots of things though, including, but not - limited to: lazy quantification (e.g., `a+?`), repetitions (e.g., `a{2,5}`), - begin/end assertions (e.g., `^\w+$`), word boundaries (e.g., `\bfoo\b`), and - support for Unicode categories (e.g., `\p{Sc}` to match currency symbols or - `\p{Lu}` to match any uppercase letter). (Fancier regexes will never be - supported.) -* ripgrep doesn't have multiline search. (Will happen as an opt-in feature.) +At this point, the primary reasons not to use ripgrep probably consist of one +or more of the following: -In other words, if you like fancy regexes or multiline search, then ripgrep -may not quite meet your needs (yet). +* You need a portable and ubiquitous tool. While ripgrep works on Windows, + macOS and Linux, it is not ubiquitous and it does not conform to any + standard such as POSIX. The best tool for this job is good old grep. +* There still exists some other minor feature (or bug) found in another tool + that isn't in ripgrep. +* There is a performance edge case where ripgrep doesn't do well where another + tool does do well. (Please file a bug report!) +* ripgrep isn't possible to install on your machine or isn't available for your + platform. (Please file a bug report!) ### Is it really faster than everything else? @@ -145,7 +149,8 @@ Summarizing, ripgrep is fast because: * It is built on top of [Rust's regex engine](https://github.com/rust-lang-nursery/regex). Rust's regex engine uses finite automata, SIMD and aggressive literal - optimizations to make searching very fast. + optimizations to make searching very fast. (PCRE2 support can be opted into + with the `-P/--pcre2` flag.) * Rust's regex library maintains performance with full Unicode support by building UTF-8 decoding directly into its deterministic finite automaton engine. @@ -168,6 +173,11 @@ Andy Lester, author of [ack](https://beyondgrep.com/), has published an excellent table comparing the features of ack, ag, git-grep, GNU grep and ripgrep: https://beyondgrep.com/feature-comparison/ +Note that ripgrep has grown a few significant new features recently that +are not yet present in Andy's table. This includes, but is not limited to, +configuration files, passthru, support for searching compressed files, +multiline search and opt-in fancy regex support via PCRE2. + ### Installation @@ -207,13 +217,15 @@ If you're a **MacPorts** user, then you can install ripgrep from the $ sudo port install ripgrep ``` -If you're a **Windows Chocolatey** user, then you can install ripgrep from the [official repo](https://chocolatey.org/packages/ripgrep): +If you're a **Windows Chocolatey** user, then you can install ripgrep from the +[official repo](https://chocolatey.org/packages/ripgrep): ``` $ choco install ripgrep ``` -If you're a **Windows Scoop** user, then you can install ripgrep from the [official bucket](https://github.com/lukesampson/scoop/blob/master/bucket/ripgrep.json): +If you're a **Windows Scoop** user, then you can install ripgrep from the +[official bucket](https://github.com/lukesampson/scoop/blob/master/bucket/ripgrep.json): ``` $ scoop install ripgrep @@ -225,32 +237,37 @@ If you're an **Arch Linux** user, then you can install ripgrep from the official $ pacman -S ripgrep ``` -If you're a **Gentoo** user, you can install ripgrep from the [official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep): +If you're a **Gentoo** user, you can install ripgrep from the +[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep): ``` $ emerge sys-apps/ripgrep ``` -If you're a **Fedora 27+** user, you can install ripgrep from official repositories. +If you're a **Fedora 27+** user, you can install ripgrep from official +repositories. ``` $ sudo dnf install ripgrep ``` -If you're a **Fedora 24+** user, you can install ripgrep from [copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): +If you're a **Fedora 24+** user, you can install ripgrep from +[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): ``` $ sudo dnf copr enable carlwgeorge/ripgrep $ sudo dnf install ripgrep ``` -If you're an **openSUSE Tumbleweed** user, you can install ripgrep from the [official repo](http://software.opensuse.org/package/ripgrep): +If you're an **openSUSE Tumbleweed** user, you can install ripgrep from the +[official repo](http://software.opensuse.org/package/ripgrep): ``` $ sudo zypper install ripgrep ``` -If you're a **RHEL/CentOS 7** user, you can install ripgrep from [copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): +If you're a **RHEL/CentOS 7** user, you can install ripgrep from +[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): ``` $ sudo yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo @@ -286,25 +303,29 @@ seem to work right and generate a number of very strange bug reports that I don't know how to fix and don't have the time to fix. Therefore, it is no longer a recommended installation option.) -If you're a **FreeBSD** user, then you can install ripgrep from the [official ports](https://www.freshports.org/textproc/ripgrep/): +If you're a **FreeBSD** user, then you can install ripgrep from the +[official ports](https://www.freshports.org/textproc/ripgrep/): ``` # pkg install ripgrep ``` -If you're an **OpenBSD** user, then you can install ripgrep from the [official ports](http://openports.se/textproc/ripgrep): +If you're an **OpenBSD** user, then you can install ripgrep from the +[official ports](http://openports.se/textproc/ripgrep): ``` $ doas pkg_add ripgrep ``` -If you're a **NetBSD** user, then you can install ripgrep from [pkgsrc](http://pkgsrc.se/textproc/ripgrep): +If you're a **NetBSD** user, then you can install ripgrep from +[pkgsrc](http://pkgsrc.se/textproc/ripgrep): ``` # pkgin install ripgrep ``` If you're a **Rust programmer**, ripgrep can be installed with `cargo`. + * Note that the minimum supported version of Rust for ripgrep is **1.23.0**, although ripgrep may work with older versions. * Note that the binary may be bigger than expected because it contains debug @@ -353,6 +374,35 @@ are not necessary to get SIMD optimizations for search; those are enabled automatically. Hopefully, some day, the `simd-accel` and `avx-accel` features will similarly become unnecessary. +Finally, optional PCRE2 support can be built with ripgrep by enabling the +`pcre2` feature: + +``` +$ cargo build --release --features 'pcre2' +``` + +(Tip: use `--features 'pcre2 simd-accel avx-accel'` to also include compile +time SIMD optimizations.) + +Enabling the PCRE2 feature will attempt to automatically find and link with +your system's PCRE2 library via `pkg-config`. If one doesn't exist, then +ripgrep will build PCRE2 from source using your system's C compiler and then +statically link it into the final executable. Static linking can be forced even +when there is an available PCRE2 system library by either building ripgrep with +the MUSL target or by setting `PCRE2_SYS_STATIC=1`. + +ripgrep can be built with the MUSL target on Linux by first installing the MUSL +library on your system (consult your friendly neighborhood package manager). +Then you just need to add MUSL support to your Rust toolchain and rebuild +ripgrep, which yields a fully static executable: + +``` +$ rustup target add x86_64-unknown-linux-musl +$ cargo build --release --target x86_64-unknown-linux-musl +``` + +Applying the `--features` flag from above works as expected. + ### Running tests diff --git a/appveyor.yml b/appveyor.yml index 26daf224..bea157cf 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,8 +1,6 @@ -# Inspired from https://github.com/habitat-sh/habitat/blob/master/appveyor.yml cache: - c:\cargo\registry - c:\cargo\git - - c:\projects\ripgrep\target init: - mkdir c:\cargo @@ -19,14 +17,20 @@ environment: PROJECT_NAME: ripgrep RUST_BACKTRACE: full matrix: - - TARGET: i686-pc-windows-gnu - CHANNEL: stable - - TARGET: i686-pc-windows-msvc - CHANNEL: stable - TARGET: x86_64-pc-windows-gnu CHANNEL: stable + BITS: 64 + MSYS2: 1 - TARGET: x86_64-pc-windows-msvc CHANNEL: stable + BITS: 64 + - TARGET: i686-pc-windows-gnu + CHANNEL: stable + BITS: 32 + MSYS2: 1 + - TARGET: i686-pc-windows-msvc + CHANNEL: stable + BITS: 32 matrix: fast_finish: true @@ -35,8 +39,9 @@ matrix: # (Based on from https://github.com/rust-lang/libc/blob/master/appveyor.yml) install: - curl -sSf -o rustup-init.exe https://win.rustup.rs/ - - rustup-init.exe -y --default-host %TARGET% --no-modify-path - - if defined MSYS2_BITS set PATH=%PATH%;C:\msys64\mingw%MSYS2_BITS%\bin + - rustup-init.exe -y --default-host %TARGET% + - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin + - if defined MSYS2 set PATH=C:\msys64\mingw%BITS%\bin;%PATH% - rustc -V - cargo -V @@ -46,11 +51,11 @@ build: false # Equivalent to Travis' `script` phase # TODO modify this phase as you see fit test_script: - - cargo test --verbose --all + - cargo test --verbose --all --features pcre2 before_deploy: # Generate artifacts for release - - cargo build --release + - cargo build --release --features pcre2 - mkdir staging - copy target\release\rg.exe staging - ps: copy target\release\build\ripgrep-*\out\_rg.ps1 staging diff --git a/build.rs b/build.rs index b7f26f17..638f7646 100644 --- a/build.rs +++ b/build.rs @@ -4,6 +4,7 @@ extern crate clap; extern crate lazy_static; use std::env; +use std::ffi::OsString; use std::fs::{self, File}; use std::io::{self, Read, Write}; use std::path::Path; @@ -18,6 +19,22 @@ use app::{RGArg, RGArgKind}; mod app; fn main() { + // If our version of Rust has runtime SIMD detection, then set a cfg so + // we know we can test for it. We use this when generating ripgrep's + // --version output. + let version = rustc_version(); + let parsed = match Version::parse(&version) { + Ok(parsed) => parsed, + Err(err) => { + eprintln!("failed to parse `rustc --version`: {}", err); + return; + } + }; + let minimum = Version { major: 1, minor: 27, patch: 0 }; + if version.contains("nightly") || parsed >= minimum { + println!("cargo:rustc-cfg=ripgrep_runtime_cpu"); + } + // OUT_DIR is set by Cargo and it's where any additional build artifacts // are written. let outdir = match env::var_os("OUT_DIR") { @@ -182,3 +199,63 @@ fn formatted_doc_txt(arg: &RGArg) -> io::Result { fn ioerr(msg: String) -> io::Error { io::Error::new(io::ErrorKind::Other, msg) } + +fn rustc_version() -> String { + let rustc = env::var_os("RUSTC").unwrap_or(OsString::from("rustc")); + let output = process::Command::new(&rustc) + .arg("--version") + .output() + .unwrap() + .stdout; + String::from_utf8(output).unwrap() +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord)] +struct Version { + major: u32, + minor: u32, + patch: u32, +} + +impl Version { + fn parse(mut s: &str) -> Result { + if !s.starts_with("rustc ") { + return Err(format!("unrecognized version string: {}", s)); + } + s = &s["rustc ".len()..]; + + let parts: Vec<&str> = s.split(".").collect(); + if parts.len() < 3 { + return Err(format!("not enough version parts: {:?}", parts)); + } + + let mut num = String::new(); + for c in parts[0].chars() { + if !c.is_digit(10) { + break; + } + num.push(c); + } + let major = num.parse::().map_err(|e| e.to_string())?; + + num.clear(); + for c in parts[1].chars() { + if !c.is_digit(10) { + break; + } + num.push(c); + } + let minor = num.parse::().map_err(|e| e.to_string())?; + + num.clear(); + for c in parts[2].chars() { + if !c.is_digit(10) { + break; + } + num.push(c); + } + let patch = num.parse::().map_err(|e| e.to_string())?; + + Ok(Version { major, minor, patch }) + } +} diff --git a/ci/before_deploy.sh b/ci/before_deploy.sh index 7ee824ec..68f80bdf 100755 --- a/ci/before_deploy.sh +++ b/ci/before_deploy.sh @@ -8,7 +8,11 @@ set -ex # Generate artifacts for release mk_artifacts() { - cargo build --target "$TARGET" --release + if is_arm; then + cargo build --target "$TARGET" --release + else + cargo build --target "$TARGET" --release --features 'pcre2' + fi } mk_tarball() { diff --git a/ci/script.sh b/ci/script.sh index f513bb12..d1799e29 100755 --- a/ci/script.sh +++ b/ci/script.sh @@ -8,7 +8,11 @@ set -ex main() { # Test a normal debug build. - cargo build --target "$TARGET" --verbose --all + if is_arm; then + cargo build --target "$TARGET" --verbose + else + cargo build --target "$TARGET" --verbose --all --features 'pcre2' + fi # Show the output of the most recent build.rs stderr. set +x @@ -40,7 +44,7 @@ main() { "$(dirname "${0}")/test_complete.sh" # Run tests for ripgrep and all sub-crates. - cargo test --target "$TARGET" --verbose --all + cargo test --target "$TARGET" --verbose --all --features 'pcre2' } main diff --git a/ci/utils.sh b/ci/utils.sh index 2fb7fadb..1cf2b6dc 100644 --- a/ci/utils.sh +++ b/ci/utils.sh @@ -55,13 +55,6 @@ gcc_prefix() { esac } -is_ssse3_target() { - case "$(architecture)" in - amd64) return 0 ;; - *) return 1 ;; - esac -} - is_x86() { case "$(architecture)" in amd64|i386) return 0 ;; diff --git a/complete/_rg b/complete/_rg index 4342e8d2..7e17b93d 100644 --- a/complete/_rg +++ b/complete/_rg @@ -111,10 +111,18 @@ _rg() { "--no-ignore-vcs[don't respect version control ignore files]" $no'--ignore-vcs[respect version control ignore files]' + + '(json)' # json options + '--json[output results in a JSON Lines format]' + $no"--no-json[output results in the standard format]" + + '(line)' # Line-number options {-n,--line-number}'[show line numbers for matches]' {-N,--no-line-number}"[don't show line numbers for matches]" + + '(line terminator)' # line terminator options + '--crlf[use CRLF as a line terminator]' + $no"--no-crlf[do not use CRLF as a line terminator]" + + '(max-depth)' # Directory-depth options '--max-depth=[specify max number of directories to descend]:number of directories' '!--maxdepth=:number of directories' @@ -131,6 +139,11 @@ _rg() { '--mmap[search using memory maps when possible]' "--no-mmap[don't search using memory maps]" + + '(multiline)' # multiline options + {-U,--multiline}'[permit matching across multiple lines]' + $no"--no-multiline[restrict matches to at most one line each]" + '--multiline-dotall[make "." match newline in multiline mode]' + + '(only)' # Only-match options '(passthru replace)'{-o,--only-matching}'[show only matching part of each line]' @@ -138,6 +151,12 @@ _rg() { '(--vimgrep count only replace)--passthru[show both matching and non-matching lines]' '!(--vimgrep count only replace)--passthrough' + + '(pcre2)' # PCRE2 options + {-P,--pcre2}'[Enable matching with PCRE2]' + $no"--no-pcre2[don't use PCRE2]" + "--pcre2-unicode[Enable PCRE2 Unicode mode]" + $no"--pcre2-unicode[Disable PCRE2 Unicode mode]" + + '(pre)' # Preprocessing options '(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e' $no'--no-pre[disable preprocessor utility]' @@ -158,6 +177,7 @@ _rg() { + stats # Statistics options '(--files file-match)--stats[show search statistics]' + $no"--no-stats[don't show search statistics]" + '(text)' # Binary-search options {-a,--text}'[search binary files as if they were text]' @@ -166,6 +186,10 @@ _rg() { + '(threads)' # Thread-count options '(--sort-files)'{-j+,--threads=}'[specify approximate number of threads to use]:number of threads' + + '(trim)' # trim options + '--trim[trim any ASCII whitespace prefix from each line]' + $no"--no-trim[don't trim ASCII whitespace prefix from each line]" + + type # Type options '*'{-t+,--type=}'[only search files matching specified type]: :_rg_types' '*--type-add=[add new glob for specified file type]: :->typespec' @@ -203,6 +227,7 @@ _rg() { '--max-filesize=[specify size above which files should be ignored]:file size (bytes)' "--no-config[don't load configuration files]" '(-0 --null)'{-0,--null}'[print NUL byte after file names]' + '--null-data[use NUL as a line terminator]' '--path-separator=[specify path separator to use when printing file names]:separator' '(-q --quiet)'{-q,--quiet}'[suppress normal output]' '--regex-size-limit=[specify upper size limit of compiled regex]:regex size (bytes)' diff --git a/globset/README.md b/globset/README.md index f5caf22a..5d54172a 100644 --- a/globset/README.md +++ b/globset/README.md @@ -4,7 +4,7 @@ Cross platform single glob and glob set matching. Glob set matching is the process of matching one or more glob patterns against a single candidate path simultaneously, and returning all of the globs that matched. -[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.png)](https://travis-ci.org/BurntSushi/ripgrep) +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) [![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) [![](https://img.shields.io/crates/v/globset.svg)](https://crates.io/crates/globset) diff --git a/globset/src/lib.rs b/globset/src/lib.rs index 50c92e42..8d26e187 100644 --- a/globset/src/lib.rs +++ b/globset/src/lib.rs @@ -470,7 +470,6 @@ impl GlobSetBuilder { } /// Add a new pattern to this set. - #[allow(dead_code)] pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder { self.pats.push(pat); self diff --git a/grep/Cargo.toml b/grep/Cargo.toml index 562bde1e..e6e2fc07 100644 --- a/grep/Cargo.toml +++ b/grep/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "grep" -version = "0.1.9" #:version +version = "0.2.0" #:version authors = ["Andrew Gallant "] description = """ Fast line oriented regex searching as a library. @@ -13,7 +13,18 @@ keywords = ["regex", "grep", "egrep", "search", "pattern"] license = "Unlicense/MIT" [dependencies] -log = "0.4" -memchr = "2" -regex = "1" -regex-syntax = "0.6" +grep-matcher = { version = "0.0.1", path = "../grep-matcher" } +grep-pcre2 = { version = "0.0.1", path = "../grep-pcre2", optional = true } +grep-printer = { version = "0.0.1", path = "../grep-printer" } +grep-regex = { version = "0.0.1", path = "../grep-regex" } +grep-searcher = { version = "0.0.1", path = "../grep-searcher" } + +[dev-dependencies] +atty = "0.2.11" +termcolor = "1" +walkdir = "2.2.0" + +[features] +avx-accel = ["grep-searcher/avx-accel"] +simd-accel = ["grep-searcher/simd-accel"] +pcre2 = ["grep-pcre2"] diff --git a/grep/README.md b/grep/README.md index 86cc8c2c..c376d8af 100644 --- a/grep/README.md +++ b/grep/README.md @@ -1,4 +1,41 @@ grep ---- -This is a *library* that provides grep-style line-by-line regex searching (with -comparable performance to `grep` itself). +ripgrep, as a library. + +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) +[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) +[![](https://img.shields.io/crates/v/grep.svg)](https://crates.io/crates/grep) + +Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). + + +### Documentation + +[https://docs.rs/grep](https://docs.rs/grep) + +NOTE: This crate isn't ready for wide use yet. Ambitious individuals can +probably piece together the parts, but there is no high level documentation +describing how all of the pieces fit together. + + +### Usage + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +grep = "0.2" +``` + +and this to your crate root: + +```rust +extern crate grep; +``` + + +### Features + +This crate provides a `pcre2` feature (disabled by default) which, when +enabled, re-exports the `grep-pcre2` crate as an alternative `Matcher` +implementation to the standard `grep-regex` implementation. diff --git a/grep/examples/simplegrep.rs b/grep/examples/simplegrep.rs new file mode 100644 index 00000000..fb2d4001 --- /dev/null +++ b/grep/examples/simplegrep.rs @@ -0,0 +1,107 @@ +extern crate atty; +extern crate grep; +extern crate termcolor; +extern crate walkdir; + +use std::env; +use std::error; +use std::ffi::OsString; +use std::path::Path; +use std::process; +use std::result; + +use grep::printer::{ColorSpecs, StandardBuilder}; +use grep::regex::RegexMatcher; +use grep::searcher::{BinaryDetection, SearcherBuilder}; +use termcolor::{ColorChoice, StandardStream}; +use walkdir::WalkDir; + +macro_rules! fail { + ($($tt:tt)*) => { + return Err(From::from(format!($($tt)*))); + } +} + +type Result = result::Result>; + +fn main() { + if let Err(err) = try_main() { + eprintln!("{}", err); + process::exit(1); + } +} + +fn try_main() -> Result<()> { + let mut args: Vec = env::args_os().collect(); + if args.len() < 2 { + fail!("Usage: simplegrep [ ...]"); + } + if args.len() == 2 { + args.push(OsString::from("./")); + } + let pattern = match args[1].clone().into_string() { + Ok(pattern) => pattern, + Err(_) => { + fail!( + "pattern is not valid UTF-8: '{:?}'", + args[1].to_string_lossy() + ); + } + }; + search(&pattern, &args[2..]) +} + +fn search(pattern: &str, paths: &[OsString]) -> Result<()> { + let matcher = RegexMatcher::new_line_matcher(&pattern)?; + let mut searcher = SearcherBuilder::new() + .binary_detection(BinaryDetection::quit(b'\x00')) + .build(); + let mut printer = StandardBuilder::new() + .color_specs(colors()) + .build(StandardStream::stdout(color_choice())); + + for path in paths { + for result in WalkDir::new(path) { + let dent = match result { + Ok(dent) => dent, + Err(err) => { + eprintln!( + "{}: {}", + err.path().unwrap_or(Path::new("error")).display(), + err, + ); + continue; + } + }; + if !dent.file_type().is_file() { + continue; + } + let result = searcher.search_path( + &matcher, + dent.path(), + printer.sink_with_path(&matcher, dent.path()), + ); + if let Err(err) = result { + eprintln!("{}: {}", dent.path().display(), err); + } + } + } + Ok(()) +} + +fn color_choice() -> ColorChoice { + if atty::is(atty::Stream::Stdout) { + ColorChoice::Auto + } else { + ColorChoice::Never + } +} + +fn colors() -> ColorSpecs { + ColorSpecs::new(&[ + "path:fg:magenta".parse().unwrap(), + "line:fg:green".parse().unwrap(), + "match:fg:red".parse().unwrap(), + "match:style:bold".parse().unwrap(), + ]) +} diff --git a/grep/src/lib.rs b/grep/src/lib.rs index 023cd64a..ab0d78eb 100644 --- a/grep/src/lib.rs +++ b/grep/src/lib.rs @@ -1,84 +1,22 @@ -#![deny(missing_docs)] - /*! -A fast line oriented regex searcher. +ripgrep, as a library. + +This library is intended to provide a high level facade to the crates that +make up ripgrep's core searching routines. However, there is no high level +documentation available yet guiding users on how to fit all of the pieces +together. + +Every public API item in the constituent crates is documented, but examples +are sparse. + +A cookbook and a guide are planned. */ -#[macro_use] -extern crate log; -extern crate memchr; -extern crate regex; -extern crate regex_syntax as syntax; +#![deny(missing_docs)] -use std::error; -use std::fmt; -use std::result; - -pub use search::{Grep, GrepBuilder, Iter, Match}; - -mod literals; -mod nonl; -mod search; -mod smart_case; -mod word_boundary; - -/// Result is a convenient type alias that fixes the type of the error to -/// the `Error` type defined in this crate. -pub type Result = result::Result; - -/// Error enumerates the list of possible error conditions when building or -/// using a `Grep` line searcher. -#[derive(Debug)] -pub enum Error { - /// An error from parsing or compiling a regex. - Regex(regex::Error), - /// This error occurs when an illegal literal was found in the regex - /// pattern. For example, if the line terminator is `\n` and the regex - /// pattern is `\w+\n\w+`, then the presence of `\n` will cause this error. - LiteralNotAllowed(char), - /// An unused enum variant that indicates this enum may be expanded in - /// the future and therefore should not be exhaustively matched. - #[doc(hidden)] - __Nonexhaustive, -} - -impl error::Error for Error { - fn description(&self) -> &str { - match *self { - Error::Regex(ref err) => err.description(), - Error::LiteralNotAllowed(_) => "use of forbidden literal", - Error::__Nonexhaustive => unreachable!(), - } - } - - fn cause(&self) -> Option<&error::Error> { - match *self { - Error::Regex(ref err) => err.cause(), - _ => None, - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::Regex(ref err) => err.fmt(f), - Error::LiteralNotAllowed(chr) => { - write!(f, "Literal {:?} not allowed.", chr) - } - Error::__Nonexhaustive => unreachable!(), - } - } -} - -impl From for Error { - fn from(err: regex::Error) -> Error { - Error::Regex(err) - } -} - -impl From for Error { - fn from(err: syntax::Error) -> Error { - Error::Regex(regex::Error::Syntax(err.to_string())) - } -} +pub extern crate grep_matcher as matcher; +#[cfg(feature = "pcre2")] +pub extern crate grep_pcre2 as pcre2; +pub extern crate grep_printer as printer; +pub extern crate grep_regex as regex; +pub extern crate grep_searcher as searcher; diff --git a/grep/src/literals.rs b/grep/src/literals.rs deleted file mode 100644 index 5e3dc8ea..00000000 --- a/grep/src/literals.rs +++ /dev/null @@ -1,274 +0,0 @@ -/*! -The literals module is responsible for extracting *inner* literals out of the -AST of a regular expression. Normally this is the job of the regex engine -itself, but the regex engine doesn't look for inner literals. Since we're doing -line based searching, we can use them, so we need to do it ourselves. - -Note that this implementation is incredibly suspicious. We need something more -principled. -*/ -use std::cmp; - -use regex::bytes::RegexBuilder; -use syntax::hir::{self, Hir, HirKind}; -use syntax::hir::literal::{Literal, Literals}; - -#[derive(Clone, Debug)] -pub struct LiteralSets { - prefixes: Literals, - suffixes: Literals, - required: Literals, -} - -impl LiteralSets { - pub fn create(expr: &Hir) -> Self { - let mut required = Literals::empty(); - union_required(expr, &mut required); - LiteralSets { - prefixes: Literals::prefixes(expr), - suffixes: Literals::suffixes(expr), - required: required, - } - } - - pub fn to_regex_builder(&self) -> Option { - if self.prefixes.all_complete() && !self.prefixes.is_empty() { - debug!("literal prefixes detected: {:?}", self.prefixes); - // When this is true, the regex engine will do a literal scan. - return None; - } - - // Out of inner required literals, prefixes and suffixes, which one - // is the longest? We pick the longest to do fast literal scan under - // the assumption that a longer literal will have a lower false - // positive rate. - let pre_lcp = self.prefixes.longest_common_prefix(); - let pre_lcs = self.prefixes.longest_common_suffix(); - let suf_lcp = self.suffixes.longest_common_prefix(); - let suf_lcs = self.suffixes.longest_common_suffix(); - - let req_lits = self.required.literals(); - let req = match req_lits.iter().max_by_key(|lit| lit.len()) { - None => &[], - Some(req) => &***req, - }; - - let mut lit = pre_lcp; - if pre_lcs.len() > lit.len() { - lit = pre_lcs; - } - if suf_lcp.len() > lit.len() { - lit = suf_lcp; - } - if suf_lcs.len() > lit.len() { - lit = suf_lcs; - } - if req_lits.len() == 1 && req.len() > lit.len() { - lit = req; - } - - // Special case: if we have any literals that are all whitespace, - // then this is probably a failing of the literal detection since - // whitespace is typically pretty common. In this case, don't bother - // with inner literal scanning at all and just defer to the regex. - let any_all_white = req_lits.iter() - .any(|lit| lit.iter().all(|&b| (b as char).is_whitespace())); - if any_all_white { - return None; - } - - // Special case: if we detected an alternation of inner required - // literals and its longest literal is bigger than the longest - // prefix/suffix, then choose the alternation. In practice, this - // helps with case insensitive matching, which can generate lots of - // inner required literals. - let any_empty = req_lits.iter().any(|lit| lit.is_empty()); - if req.len() > lit.len() && req_lits.len() > 1 && !any_empty { - debug!("required literals found: {:?}", req_lits); - let alts: Vec = - req_lits.into_iter().map(|x| bytes_to_regex(x)).collect(); - let mut builder = RegexBuilder::new(&alts.join("|")); - builder.unicode(false); - Some(builder) - } else if lit.is_empty() { - None - } else { - debug!("required literal found: {:?}", show(lit)); - let mut builder = RegexBuilder::new(&bytes_to_regex(&lit)); - builder.unicode(false); - Some(builder) - } - } -} - -fn union_required(expr: &Hir, lits: &mut Literals) { - match *expr.kind() { - HirKind::Literal(hir::Literal::Unicode(c)) => { - let mut buf = [0u8; 4]; - lits.cross_add(c.encode_utf8(&mut buf).as_bytes()); - } - HirKind::Literal(hir::Literal::Byte(b)) => { - lits.cross_add(&[b]); - } - HirKind::Class(hir::Class::Unicode(ref cls)) => { - if count_unicode_class(cls) >= 5 || !lits.add_char_class(cls) { - lits.cut(); - } - } - HirKind::Class(hir::Class::Bytes(ref cls)) => { - if count_byte_class(cls) >= 5 || !lits.add_byte_class(cls) { - lits.cut(); - } - } - HirKind::Group(hir::Group { ref hir, .. }) => { - union_required(&**hir, lits); - } - HirKind::Repetition(ref x) => { - match x.kind { - hir::RepetitionKind::ZeroOrOne => lits.cut(), - hir::RepetitionKind::ZeroOrMore => lits.cut(), - hir::RepetitionKind::OneOrMore => { - union_required(&x.hir, lits); - lits.cut(); - } - hir::RepetitionKind::Range(ref rng) => { - let (min, max) = match *rng { - hir::RepetitionRange::Exactly(m) => (m, Some(m)), - hir::RepetitionRange::AtLeast(m) => (m, None), - hir::RepetitionRange::Bounded(m, n) => (m, Some(n)), - }; - repeat_range_literals( - &x.hir, min, max, x.greedy, lits, union_required); - } - } - } - HirKind::Concat(ref es) if es.is_empty() => {} - HirKind::Concat(ref es) if es.len() == 1 => { - union_required(&es[0], lits) - } - HirKind::Concat(ref es) => { - for e in es { - let mut lits2 = lits.to_empty(); - union_required(e, &mut lits2); - if lits2.is_empty() { - lits.cut(); - continue; - } - if lits2.contains_empty() { - lits.cut(); - } - if !lits.cross_product(&lits2) { - // If this expression couldn't yield any literal that - // could be extended, then we need to quit. Since we're - // short-circuiting, we also need to freeze every member. - lits.cut(); - break; - } - } - } - HirKind::Alternation(ref es) => { - alternate_literals(es, lits, union_required); - } - _ => lits.cut(), - } -} - -fn repeat_range_literals( - e: &Hir, - min: u32, - max: Option, - _greedy: bool, - lits: &mut Literals, - mut f: F, -) { - if min == 0 { - // This is a bit conservative. If `max` is set, then we could - // treat this as a finite set of alternations. For now, we - // just treat it as `e*`. - lits.cut(); - } else { - let n = cmp::min(lits.limit_size(), min as usize); - // We only extract literals from a single repetition, even though - // we could do more. e.g., `a{3}` will have `a` extracted instead of - // `aaa`. The reason is that inner literal extraction can't be unioned - // across repetitions. e.g., extracting `foofoofoo` from `(\w+foo){3}` - // is wrong. - f(e, lits); - if n < min as usize { - lits.cut(); - } - if max.map_or(true, |max| min < max) { - lits.cut(); - } - } -} - -fn alternate_literals( - es: &[Hir], - lits: &mut Literals, - mut f: F, -) { - let mut lits2 = lits.to_empty(); - for e in es { - let mut lits3 = lits.to_empty(); - lits3.set_limit_size(lits.limit_size() / 5); - f(e, &mut lits3); - if lits3.is_empty() || !lits2.union(lits3) { - // If we couldn't find suffixes for *any* of the - // alternates, then the entire alternation has to be thrown - // away and any existing members must be frozen. Similarly, - // if the union couldn't complete, stop and freeze. - lits.cut(); - return; - } - } - // All we do at the moment is look for prefixes and suffixes. If both - // are empty, then we report nothing. We should be able to do better than - // this, but we'll need something more expressive than just a "set of - // literals." - let lcp = lits2.longest_common_prefix(); - let lcs = lits2.longest_common_suffix(); - if !lcp.is_empty() { - lits.cross_add(lcp); - } - lits.cut(); - if !lcs.is_empty() { - lits.add(Literal::empty()); - lits.add(Literal::new(lcs.to_vec())); - } -} - -/// Return the number of characters in the given class. -fn count_unicode_class(cls: &hir::ClassUnicode) -> u32 { - cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum() -} - -/// Return the number of bytes in the given class. -fn count_byte_class(cls: &hir::ClassBytes) -> u32 { - cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum() -} - -/// Converts an arbitrary sequence of bytes to a literal suitable for building -/// a regular expression. -fn bytes_to_regex(bs: &[u8]) -> String { - let mut s = String::with_capacity(bs.len()); - for &b in bs { - s.push_str(&format!("\\x{:02x}", b)); - } - s -} - -/// Converts arbitrary bytes to a nice string. -fn show(bs: &[u8]) -> String { - // Why aren't we using this to feed to the regex? Doesn't really matter - // I guess. ---AG - use std::ascii::escape_default; - use std::str; - - let mut nice = String::new(); - for &b in bs { - let part: Vec = escape_default(b).collect(); - nice.push_str(str::from_utf8(&part).unwrap()); - } - nice -} diff --git a/grep/src/nonl.rs b/grep/src/nonl.rs deleted file mode 100644 index 3beb5f61..00000000 --- a/grep/src/nonl.rs +++ /dev/null @@ -1,74 +0,0 @@ -use syntax::hir::{self, Hir, HirKind}; - -use {Error, Result}; - -/// Returns a new expression that is guaranteed to never match the given -/// ASCII character. -/// -/// If the expression contains the literal byte, then an error is returned. -/// -/// If `byte` is not an ASCII character (i.e., greater than `0x7F`), then this -/// function panics. -pub fn remove(expr: Hir, byte: u8) -> Result { - assert!(byte <= 0x7F); - let chr = byte as char; - assert!(chr.len_utf8() == 1); - - Ok(match expr.into_kind() { - HirKind::Empty => Hir::empty(), - HirKind::Literal(hir::Literal::Unicode(c)) => { - if c == chr { - return Err(Error::LiteralNotAllowed(chr)); - } - Hir::literal(hir::Literal::Unicode(c)) - } - HirKind::Literal(hir::Literal::Byte(b)) => { - if b as char == chr { - return Err(Error::LiteralNotAllowed(chr)); - } - Hir::literal(hir::Literal::Byte(b)) - } - HirKind::Class(hir::Class::Unicode(mut cls)) => { - let remove = hir::ClassUnicode::new(Some( - hir::ClassUnicodeRange::new(chr, chr), - )); - cls.difference(&remove); - if cls.iter().next().is_none() { - return Err(Error::LiteralNotAllowed(chr)); - } - Hir::class(hir::Class::Unicode(cls)) - } - HirKind::Class(hir::Class::Bytes(mut cls)) => { - let remove = hir::ClassBytes::new(Some( - hir::ClassBytesRange::new(byte, byte), - )); - cls.difference(&remove); - if cls.iter().next().is_none() { - return Err(Error::LiteralNotAllowed(chr)); - } - Hir::class(hir::Class::Bytes(cls)) - } - HirKind::Anchor(x) => Hir::anchor(x), - HirKind::WordBoundary(x) => Hir::word_boundary(x), - HirKind::Repetition(mut x) => { - x.hir = Box::new(remove(*x.hir, byte)?); - Hir::repetition(x) - } - HirKind::Group(mut x) => { - x.hir = Box::new(remove(*x.hir, byte)?); - Hir::group(x) - } - HirKind::Concat(xs) => { - let xs = xs.into_iter() - .map(|e| remove(e, byte)) - .collect::>>()?; - Hir::concat(xs) - } - HirKind::Alternation(xs) => { - let xs = xs.into_iter() - .map(|e| remove(e, byte)) - .collect::>>()?; - Hir::alternation(xs) - } - }) -} diff --git a/grep/src/search.rs b/grep/src/search.rs deleted file mode 100644 index af7d680d..00000000 --- a/grep/src/search.rs +++ /dev/null @@ -1,317 +0,0 @@ -use memchr::{memchr, memrchr}; -use syntax::ParserBuilder; -use syntax::hir::Hir; -use regex::bytes::{Regex, RegexBuilder}; - -use literals::LiteralSets; -use nonl; -use smart_case::Cased; -use word_boundary::strip_unicode_word_boundaries; -use Result; - -/// A matched line. -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct Match { - start: usize, - end: usize, -} - -impl Match { - /// Create a new empty match value. - pub fn new() -> Match { - Match::default() - } - - /// Return the starting byte offset of the line that matched. - #[inline] - pub fn start(&self) -> usize { - self.start - } - - /// Return the ending byte offset of the line that matched. - #[inline] - pub fn end(&self) -> usize { - self.end - } -} - -/// A fast line oriented regex searcher. -#[derive(Clone, Debug)] -pub struct Grep { - re: Regex, - required: Option, - opts: Options, -} - -/// A builder for a grep searcher. -#[derive(Clone, Debug)] -pub struct GrepBuilder { - pattern: String, - opts: Options, -} - -#[derive(Clone, Debug)] -struct Options { - case_insensitive: bool, - case_smart: bool, - line_terminator: u8, - size_limit: usize, - dfa_size_limit: usize, -} - -impl Default for Options { - fn default() -> Options { - Options { - case_insensitive: false, - case_smart: false, - line_terminator: b'\n', - size_limit: 10 * (1 << 20), - dfa_size_limit: 10 * (1 << 20), - } - } -} - -impl GrepBuilder { - /// Create a new builder for line searching. - /// - /// The pattern given should be a regular expression. The precise syntax - /// supported is documented on the regex crate. - pub fn new(pattern: &str) -> GrepBuilder { - GrepBuilder { - pattern: pattern.to_string(), - opts: Options::default(), - } - } - - /// Set the line terminator. - /// - /// The line terminator can be any ASCII character and serves to delineate - /// the match boundaries in the text searched. - /// - /// This panics if `ascii_byte` is greater than `0x7F` (i.e., not ASCII). - pub fn line_terminator(mut self, ascii_byte: u8) -> GrepBuilder { - assert!(ascii_byte <= 0x7F); - self.opts.line_terminator = ascii_byte; - self - } - - /// Set the case sensitive flag (`i`) on the regex. - pub fn case_insensitive(mut self, yes: bool) -> GrepBuilder { - self.opts.case_insensitive = yes; - self - } - - /// Whether to enable smart case search or not (disabled by default). - /// - /// Smart case uses case insensitive search if the pattern contains only - /// lowercase characters (ignoring any characters which immediately follow - /// a '\'). Otherwise, a case sensitive search is used instead. - /// - /// Enabling the case_insensitive flag overrides this. - pub fn case_smart(mut self, yes: bool) -> GrepBuilder { - self.opts.case_smart = yes; - self - } - - /// Set the approximate size limit of the compiled regular expression. - /// - /// This roughly corresponds to the number of bytes occupied by a - /// single compiled program. If the program exceeds this number, then a - /// compilation error is returned. - pub fn size_limit(mut self, limit: usize) -> GrepBuilder { - self.opts.size_limit = limit; - self - } - - /// Set the approximate size of the cache used by the DFA. - /// - /// This roughly corresponds to the number of bytes that the DFA will use - /// while searching. - /// - /// Note that this is a per thread limit. There is no way to set a global - /// limit. In particular, if a regex is used from multiple threads - /// simulanteously, then each thread may use up to the number of bytes - /// specified here. - pub fn dfa_size_limit(mut self, limit: usize) -> GrepBuilder { - self.opts.dfa_size_limit = limit; - self - } - - /// Create a line searcher. - /// - /// If there was a problem parsing or compiling the regex with the given - /// options, then an error is returned. - pub fn build(self) -> Result { - let expr = self.parse()?; - let literals = LiteralSets::create(&expr); - let re = self.regex(&expr)?; - let required = match literals.to_regex_builder() { - Some(builder) => Some(self.regex_build(builder)?), - None => { - match strip_unicode_word_boundaries(&expr) { - None => None, - Some(expr) => { - debug!("Stripped Unicode word boundaries. \ - New AST:\n{:?}", expr); - self.regex(&expr).ok() - } - } - } - }; - Ok(Grep { - re: re, - required: required, - opts: self.opts, - }) - } - - /// Creates a new regex from the given expression with the current - /// configuration. - fn regex(&self, expr: &Hir) -> Result { - let mut builder = RegexBuilder::new(&expr.to_string()); - builder.unicode(true); - self.regex_build(builder) - } - - /// Builds a new regex from the given builder using the caller's settings. - fn regex_build(&self, mut builder: RegexBuilder) -> Result { - builder - .multi_line(true) - .size_limit(self.opts.size_limit) - .dfa_size_limit(self.opts.dfa_size_limit) - .build() - .map_err(From::from) - } - - /// Parses the underlying pattern and ensures the pattern can never match - /// the line terminator. - fn parse(&self) -> Result { - let expr = ParserBuilder::new() - .allow_invalid_utf8(true) - .case_insensitive(self.is_case_insensitive()?) - .multi_line(true) - .build() - .parse(&self.pattern)?; - debug!("original regex HIR pattern:\n{}", expr); - let expr = nonl::remove(expr, self.opts.line_terminator)?; - debug!("transformed regex HIR pattern:\n{}", expr); - Ok(expr) - } - - /// Determines whether the case insensitive flag should be enabled or not. - fn is_case_insensitive(&self) -> Result { - if self.opts.case_insensitive { - return Ok(true); - } - if !self.opts.case_smart { - return Ok(false); - } - let cased = match Cased::from_pattern(&self.pattern) { - None => return Ok(false), - Some(cased) => cased, - }; - Ok(cased.any_literal && !cased.any_uppercase) - } -} - -impl Grep { - /// Returns a reference to the underlying regex used by the searcher. - pub fn regex(&self) -> &Regex { - &self.re - } - - /// Returns an iterator over all matches in the given buffer. - pub fn iter<'b, 's>(&'s self, buf: &'b [u8]) -> Iter<'b, 's> { - Iter { - searcher: self, - buf: buf, - start: 0, - } - } - - /// Fills in the next line that matches in the given buffer starting at - /// the position given. - /// - /// If no match could be found, `false` is returned, otherwise, `true` is - /// returned. - pub fn read_match( - &self, - mat: &mut Match, - buf: &[u8], - mut start: usize, - ) -> bool { - if start >= buf.len() { - return false; - } - if let Some(ref req) = self.required { - while start < buf.len() { - let e = match req.shortest_match(&buf[start..]) { - None => return false, - Some(e) => start + e, - }; - let (prevnl, nextnl) = self.find_line(buf, e, e); - match self.re.shortest_match(&buf[prevnl..nextnl]) { - None => { - start = nextnl; - continue; - } - Some(_) => { - self.fill_match(mat, prevnl, nextnl); - return true; - } - } - } - false - } else { - let e = match self.re.shortest_match(&buf[start..]) { - None => return false, - Some(e) => start + e, - }; - let (s, e) = self.find_line(buf, e, e); - self.fill_match(mat, s, e); - true - } - } - - fn fill_match(&self, mat: &mut Match, start: usize, end: usize) { - mat.start = start; - mat.end = end; - } - - fn find_line(&self, buf: &[u8], s: usize, e: usize) -> (usize, usize) { - (self.find_line_start(buf, s), self.find_line_end(buf, e)) - } - - fn find_line_start(&self, buf: &[u8], pos: usize) -> usize { - memrchr(self.opts.line_terminator, &buf[0..pos]).map_or(0, |i| i + 1) - } - - fn find_line_end(&self, buf: &[u8], pos: usize) -> usize { - memchr(self.opts.line_terminator, &buf[pos..]) - .map_or(buf.len(), |i| pos + i + 1) - } -} - -/// An iterator over all matches in a particular buffer. -/// -/// `'b` refers to the lifetime of the buffer, and `'s` refers to the lifetime -/// of the searcher. -pub struct Iter<'b, 's> { - searcher: &'s Grep, - buf: &'b [u8], - start: usize, -} - -impl<'b, 's> Iterator for Iter<'b, 's> { - type Item = Match; - - fn next(&mut self) -> Option { - let mut mat = Match::default(); - if !self.searcher.read_match(&mut mat, self.buf, self.start) { - self.start = self.buf.len(); - return None; - } - self.start = mat.end; - Some(mat) - } -} diff --git a/grep/src/smart_case.rs b/grep/src/smart_case.rs deleted file mode 100644 index 1379b326..00000000 --- a/grep/src/smart_case.rs +++ /dev/null @@ -1,191 +0,0 @@ -use syntax::ast::{self, Ast}; -use syntax::ast::parse::Parser; - -/// The results of analyzing a regex for cased literals. -#[derive(Clone, Debug, Default)] -pub struct Cased { - /// True if and only if a literal uppercase character occurs in the regex. - /// - /// A regex like `\pL` contains no uppercase literals, even though `L` - /// is uppercase and the `\pL` class contains uppercase characters. - pub any_uppercase: bool, - /// True if and only if the regex contains any literal at all. A regex like - /// `\pL` has this set to false. - pub any_literal: bool, -} - -impl Cased { - /// Returns a `Cased` value by doing analysis on the AST of `pattern`. - /// - /// If `pattern` is not a valid regular expression, then `None` is - /// returned. - pub fn from_pattern(pattern: &str) -> Option { - Parser::new() - .parse(pattern) - .map(|ast| Cased::from_ast(&ast)) - .ok() - } - - fn from_ast(ast: &Ast) -> Cased { - let mut cased = Cased::default(); - cased.from_ast_impl(ast); - cased - } - - fn from_ast_impl(&mut self, ast: &Ast) { - if self.done() { - return; - } - match *ast { - Ast::Empty(_) - | Ast::Flags(_) - | Ast::Dot(_) - | Ast::Assertion(_) - | Ast::Class(ast::Class::Unicode(_)) - | Ast::Class(ast::Class::Perl(_)) => {} - Ast::Literal(ref x) => { - self.from_ast_literal(x); - } - Ast::Class(ast::Class::Bracketed(ref x)) => { - self.from_ast_class_set(&x.kind); - } - Ast::Repetition(ref x) => { - self.from_ast_impl(&x.ast); - } - Ast::Group(ref x) => { - self.from_ast_impl(&x.ast); - } - Ast::Alternation(ref alt) => { - for x in &alt.asts { - self.from_ast_impl(x); - } - } - Ast::Concat(ref alt) => { - for x in &alt.asts { - self.from_ast_impl(x); - } - } - } - } - - fn from_ast_class_set(&mut self, ast: &ast::ClassSet) { - if self.done() { - return; - } - match *ast { - ast::ClassSet::Item(ref item) => { - self.from_ast_class_set_item(item); - } - ast::ClassSet::BinaryOp(ref x) => { - self.from_ast_class_set(&x.lhs); - self.from_ast_class_set(&x.rhs); - } - } - } - - fn from_ast_class_set_item(&mut self, ast: &ast::ClassSetItem) { - if self.done() { - return; - } - match *ast { - ast::ClassSetItem::Empty(_) - | ast::ClassSetItem::Ascii(_) - | ast::ClassSetItem::Unicode(_) - | ast::ClassSetItem::Perl(_) => {} - ast::ClassSetItem::Literal(ref x) => { - self.from_ast_literal(x); - } - ast::ClassSetItem::Range(ref x) => { - self.from_ast_literal(&x.start); - self.from_ast_literal(&x.end); - } - ast::ClassSetItem::Bracketed(ref x) => { - self.from_ast_class_set(&x.kind); - } - ast::ClassSetItem::Union(ref union) => { - for x in &union.items { - self.from_ast_class_set_item(x); - } - } - } - } - - fn from_ast_literal(&mut self, ast: &ast::Literal) { - self.any_literal = true; - self.any_uppercase = self.any_uppercase || ast.c.is_uppercase(); - } - - /// Returns true if and only if the attributes can never change no matter - /// what other AST it might see. - fn done(&self) -> bool { - self.any_uppercase && self.any_literal - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn cased(pattern: &str) -> Cased { - Cased::from_pattern(pattern).unwrap() - } - - #[test] - fn various() { - let x = cased(""); - assert!(!x.any_uppercase); - assert!(!x.any_literal); - - let x = cased("foo"); - assert!(!x.any_uppercase); - assert!(x.any_literal); - - let x = cased("Foo"); - assert!(x.any_uppercase); - assert!(x.any_literal); - - let x = cased("foO"); - assert!(x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo\\"); - assert!(!x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo\w"); - assert!(!x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo\S"); - assert!(!x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo\p{Ll}"); - assert!(!x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo[a-z]"); - assert!(!x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo[A-Z]"); - assert!(x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo[\S\t]"); - assert!(!x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo\\S"); - assert!(x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"\p{Ll}"); - assert!(!x.any_uppercase); - assert!(!x.any_literal); - - let x = cased(r"aBc\w"); - assert!(x.any_uppercase); - assert!(x.any_literal); - } -} diff --git a/grep/src/word_boundary.rs b/grep/src/word_boundary.rs deleted file mode 100644 index 8e6b86d1..00000000 --- a/grep/src/word_boundary.rs +++ /dev/null @@ -1,53 +0,0 @@ -use syntax::hir::{self, Hir, HirKind}; - -/// Strips Unicode word boundaries from the given expression. -/// -/// The key invariant this maintains is that the expression returned will match -/// *at least* every where the expression given will match. Namely, a match of -/// the returned expression can report false positives but it will never report -/// false negatives. -/// -/// If no word boundaries could be stripped, then None is returned. -pub fn strip_unicode_word_boundaries(expr: &Hir) -> Option { - // The real reason we do this is because Unicode word boundaries are the - // one thing that Rust's regex DFA engine can't handle. When it sees a - // Unicode word boundary among non-ASCII text, it falls back to one of the - // slower engines. We work around this limitation by attempting to use - // a regex to find candidate matches without a Unicode word boundary. We'll - // only then use the full (and slower) regex to confirm a candidate as a - // match or not during search. - // - // It looks like we only check the outer edges for `\b`? I guess this is - // an attempt to optimize for the `-w/--word-regexp` flag? ---AG - match *expr.kind() { - HirKind::Concat(ref es) if !es.is_empty() => { - let first = is_unicode_word_boundary(&es[0]); - let last = is_unicode_word_boundary(es.last().unwrap()); - // Be careful not to strip word boundaries if there are no other - // expressions to match. - match (first, last) { - (true, false) if es.len() > 1 => { - Some(Hir::concat(es[1..].to_vec())) - } - (false, true) if es.len() > 1 => { - Some(Hir::concat(es[..es.len() - 1].to_vec())) - } - (true, true) if es.len() > 2 => { - Some(Hir::concat(es[1..es.len() - 1].to_vec())) - } - _ => None, - } - } - _ => None, - } -} - -/// Returns true if the given expression is a Unicode word boundary. -fn is_unicode_word_boundary(expr: &Hir) -> bool { - match *expr.kind() { - HirKind::WordBoundary(hir::WordBoundary::Unicode) => true, - HirKind::WordBoundary(hir::WordBoundary::UnicodeNegate) => true, - HirKind::Group(ref x) => is_unicode_word_boundary(&x.hir), - _ => false, - } -} diff --git a/grep2/COPYING b/grep2/COPYING deleted file mode 100644 index bb9c20a0..00000000 --- a/grep2/COPYING +++ /dev/null @@ -1,3 +0,0 @@ -This project is dual-licensed under the Unlicense and MIT licenses. - -You may use this code under the terms of either license. diff --git a/grep2/Cargo.toml b/grep2/Cargo.toml deleted file mode 100644 index caaf7a9c..00000000 --- a/grep2/Cargo.toml +++ /dev/null @@ -1,23 +0,0 @@ -[package] -name = "grep2" -version = "0.2.0" #:version -authors = ["Andrew Gallant "] -description = """ -Fast line oriented regex searching as a library. -""" -documentation = "http://burntsushi.net/rustdoc/grep/" -homepage = "https://github.com/BurntSushi/ripgrep" -repository = "https://github.com/BurntSushi/ripgrep" -readme = "README.md" -keywords = ["regex", "grep", "egrep", "search", "pattern"] -license = "Unlicense/MIT" - -[dependencies] -grep-matcher = { version = "0.0.1", path = "../grep-matcher" } -grep-printer = { version = "0.0.1", path = "../grep-printer" } -grep-regex = { version = "0.0.1", path = "../grep-regex" } -grep-searcher = { version = "0.0.1", path = "../grep-searcher" } - -[features] -avx-accel = ["grep-searcher/avx-accel"] -simd-accel = ["grep-searcher/simd-accel"] diff --git a/grep2/LICENSE-MIT b/grep2/LICENSE-MIT deleted file mode 100644 index 3b0a5dc0..00000000 --- a/grep2/LICENSE-MIT +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2015 Andrew Gallant - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/grep2/README.md b/grep2/README.md deleted file mode 100644 index 86cc8c2c..00000000 --- a/grep2/README.md +++ /dev/null @@ -1,4 +0,0 @@ -grep ----- -This is a *library* that provides grep-style line-by-line regex searching (with -comparable performance to `grep` itself). diff --git a/grep2/UNLICENSE b/grep2/UNLICENSE deleted file mode 100644 index 68a49daa..00000000 --- a/grep2/UNLICENSE +++ /dev/null @@ -1,24 +0,0 @@ -This is free and unencumbered software released into the public domain. - -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. - -For more information, please refer to diff --git a/grep2/src/lib.rs b/grep2/src/lib.rs deleted file mode 100644 index b6e02684..00000000 --- a/grep2/src/lib.rs +++ /dev/null @@ -1,10 +0,0 @@ -/*! -TODO. -*/ - -#![deny(missing_docs)] - -pub extern crate grep_matcher as matcher; -pub extern crate grep_printer as printer; -pub extern crate grep_regex as regex; -pub extern crate grep_searcher as searcher; diff --git a/ignore/Cargo.toml b/ignore/Cargo.toml index 13217135..42b043bf 100644 --- a/ignore/Cargo.toml +++ b/ignore/Cargo.toml @@ -26,7 +26,7 @@ memchr = "2" regex = "1" same-file = "1" thread_local = "0.3.2" -walkdir = "2" +walkdir = "2.2.0" [target.'cfg(windows)'.dependencies.winapi] version = "0.3" diff --git a/ignore/README.md b/ignore/README.md index f527da46..b0e659a9 100644 --- a/ignore/README.md +++ b/ignore/README.md @@ -4,7 +4,7 @@ The ignore crate provides a fast recursive directory iterator that respects various filters such as globs, file types and `.gitignore` files. This crate also provides lower level direct access to gitignore and file type matchers. -[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.png)](https://travis-ci.org/BurntSushi/ripgrep) +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) [![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) [![](https://img.shields.io/crates/v/ignore.svg)](https://crates.io/crates/ignore) diff --git a/ignore/examples/walk.rs b/ignore/examples/walk.rs index 0ff4ea94..ad64e015 100644 --- a/ignore/examples/walk.rs +++ b/ignore/examples/walk.rs @@ -1,5 +1,3 @@ -#![allow(dead_code, unused_imports, unused_mut, unused_variables)] - extern crate crossbeam; extern crate ignore; extern crate walkdir; @@ -8,7 +6,6 @@ use std::env; use std::io::{self, Write}; use std::path::Path; use std::sync::Arc; -use std::sync::atomic::{AtomicUsize, Ordering}; use std::thread; use crossbeam::sync::MsQueue; @@ -48,13 +45,11 @@ fn main() { }) }); } else if simple { - let mut stdout = io::BufWriter::new(io::stdout()); let walker = WalkDir::new(path); for result in walker { queue.push(Some(DirEntry::X(result.unwrap()))); } } else { - let mut stdout = io::BufWriter::new(io::stdout()); let walker = WalkBuilder::new(path).build(); for result in walker { queue.push(Some(DirEntry::Y(result.unwrap()))); diff --git a/src/app.rs b/src/app.rs index 24851c3b..a0b036d5 100644 --- a/src/app.rs +++ b/src/app.rs @@ -2,8 +2,8 @@ // including some light validation. // // This module is purposely written in a bare-bones way, since it is included -// in ripgrep's build.rs file as a way to generate completion files for common -// shells. +// in ripgrep's build.rs file as a way to generate a man page and completion +// files for common shells. // // The only other place that ripgrep deals with clap is in src/args.rs, which // is where we read clap's configuration from the end user's arguments and turn @@ -82,7 +82,34 @@ pub fn app() -> App<'static, 'static> { /// the RIPGREP_BUILD_GIT_HASH env var is inspect for it. If that isn't set, /// then a revision hash is not included in the version string returned. pub fn long_version(revision_hash: Option<&str>) -> String { - // Let's say whether faster CPU instructions are enabled or not. + // Do we have a git hash? + // (Yes, if ripgrep was built on a machine with `git` installed.) + let hash = match revision_hash.or(option_env!("RIPGREP_BUILD_GIT_HASH")) { + None => String::new(), + Some(githash) => format!(" (rev {})", githash), + }; + // Put everything together. + let runtime = runtime_cpu_features(); + if runtime.is_empty() { + format!( + "{}{}\n{} (compiled)", + crate_version!(), + hash, + compile_cpu_features().join(" ") + ) + } else { + format!( + "{}{}\n{} (compiled)\n{} (runtime)", + crate_version!(), + hash, + compile_cpu_features().join(" "), + runtime.join(" ") + ) + } +} + +/// Returns the relevant CPU features enabled at compile time. +fn compile_cpu_features() -> Vec<&'static str> { let mut features = vec![]; if cfg!(feature = "simd-accel") { features.push("+SIMD"); @@ -94,14 +121,33 @@ pub fn long_version(revision_hash: Option<&str>) -> String { } else { features.push("-AVX"); } - // Do we have a git hash? - // (Yes, if ripgrep was built on a machine with `git` installed.) - let hash = match revision_hash.or(option_env!("RIPGREP_BUILD_GIT_HASH")) { - None => String::new(), - Some(githash) => format!(" (rev {})", githash), - }; - // Put everything together. - format!("{}{}\n{}", crate_version!(), hash, features.join(" ")) + features +} + +/// Returns the relevant CPU features enabled at runtime. +#[cfg(all(ripgrep_runtime_cpu, target_arch = "x86_64"))] +fn runtime_cpu_features() -> Vec<&'static str> { + // This is kind of a dirty violation of abstraction, since it assumes + // knowledge about what specific SIMD features are being used. + + let mut features = vec![]; + if is_x86_feature_detected!("ssse3") { + features.push("+SIMD"); + } else { + features.push("-SIMD"); + } + if is_x86_feature_detected!("avx2") { + features.push("+AVX"); + } else { + features.push("-AVX"); + } + features +} + +/// Returns the relevant CPU features enabled at runtime. +#[cfg(not(all(ripgrep_runtime_cpu, target_arch = "x86_64")))] +fn runtime_cpu_features() -> Vec<&'static str> { + vec![] } /// Arg is a light alias for a clap::Arg that is specialized to compile time @@ -478,7 +524,7 @@ impl RGArg { } } -// We add an extra space to long descriptions so that a black line is inserted +// We add an extra space to long descriptions so that a blank line is inserted // between flag descriptions in --help output. macro_rules! long { ($lit:expr) => { concat!($lit, " ") } @@ -502,6 +548,7 @@ pub fn all_args_and_flags() -> Vec { flag_context_separator(&mut args); flag_count(&mut args); flag_count_matches(&mut args); + flag_crlf(&mut args); flag_debug(&mut args); flag_dfa_size_limit(&mut args); flag_encoding(&mut args); @@ -518,6 +565,7 @@ pub fn all_args_and_flags() -> Vec { flag_ignore_case(&mut args); flag_ignore_file(&mut args); flag_invert_match(&mut args); + flag_json(&mut args); flag_line_number(&mut args); flag_line_regexp(&mut args); flag_max_columns(&mut args); @@ -525,6 +573,8 @@ pub fn all_args_and_flags() -> Vec { flag_max_depth(&mut args); flag_max_filesize(&mut args); flag_mmap(&mut args); + flag_multiline(&mut args); + flag_multiline_dotall(&mut args); flag_no_config(&mut args); flag_no_ignore(&mut args); flag_no_ignore_global(&mut args); @@ -533,9 +583,12 @@ pub fn all_args_and_flags() -> Vec { flag_no_ignore_vcs(&mut args); flag_no_messages(&mut args); flag_null(&mut args); + flag_null_data(&mut args); flag_only_matching(&mut args); flag_path_separator(&mut args); flag_passthru(&mut args); + flag_pcre2(&mut args); + flag_pcre2_unicode(&mut args); flag_pre(&mut args); flag_pretty(&mut args); flag_quiet(&mut args); @@ -548,6 +601,7 @@ pub fn all_args_and_flags() -> Vec { flag_stats(&mut args); flag_text(&mut args); flag_threads(&mut args); + flag_trim(&mut args); flag_type(&mut args); flag_type_add(&mut args); flag_type_clear(&mut args); @@ -809,14 +863,53 @@ This overrides the --count flag. Note that when --count is combined with args.push(arg); } +fn flag_crlf(args: &mut Vec) { + const SHORT: &str = "Support CRLF line terminators (useful on Windows)."; + const LONG: &str = long!("\ +When enabled, ripgrep will treat CRLF ('\\r\\n') as a line terminator instead +of just '\\n'. + +Principally, this permits '$' in regex patterns to match just before CRLF +instead of just before LF. The underlying regex engine may not support this +natively, so ripgrep will translate all instances of '$' to '(?:\\r??$)'. This +may produce slightly different than desired match offsets. It is intended as a +work-around until the regex engine supports this natively. + +CRLF support can be disabled with --no-crlf. +"); + let arg = RGArg::switch("crlf") + .help(SHORT).long_help(LONG) + .overrides("no-crlf") + .overrides("null-data"); + args.push(arg); + + let arg = RGArg::switch("no-crlf") + .hidden() + .overrides("crlf"); + args.push(arg); +} + fn flag_debug(args: &mut Vec) { const SHORT: &str = "Show debug messages."; const LONG: &str = long!("\ Show debug messages. Please use this when filing a bug report. + +The --debug flag is generally useful for figuring out why ripgrep skipped +searching a particular file. The debug messages should mention all files +skipped and why they were skipped. + +To get even more debug output, use the --trace flag, which implies --debug +along with additional trace data. With --trace, the output could be quite +large and is generally more useful for development. "); let arg = RGArg::switch("debug") .help(SHORT).long_help(LONG); args.push(arg); + + let arg = RGArg::switch("trace") + .hidden() + .overrides("debug"); + args.push(arg); } fn flag_dfa_size_limit(args: &mut Vec) { @@ -842,10 +935,17 @@ default value is 'auto', which will cause ripgrep to do a best effort automatic detection of encoding on a per-file basis. Other supported values can be found in the list of labels here: https://encoding.spec.whatwg.org/#concept-encoding-get + +This flag can be disabled with --no-encoding. "); let arg = RGArg::flag("encoding", "ENCODING").short("E") .help(SHORT).long_help(LONG); args.push(arg); + + let arg = RGArg::switch("no-encoding") + .hidden() + .overrides("encoding"); + args.push(arg); } fn flag_file(args: &mut Vec) { @@ -1071,6 +1171,66 @@ Invert matching. Show lines that do not match the given patterns. args.push(arg); } +fn flag_json(args: &mut Vec) { + const SHORT: &str = "Show search results in a JSON Lines format."; + const LONG: &str = long!("\ +Enable printing results in a JSON Lines format. + +When this flag is provided, ripgrep will emit a sequence of messages, each +encoded as a JSON object, where there are five different message types: + +**begin** - A message that indicates a file is being searched and contains at +least one match. + +**end** - A message the indicates a file is done being searched. This message +also include summary statistics about the search for a particular file. + +**match** - A message that indicates a match was found. This includes the text +and offsets of the match. + +**context** - A message that indicates a contextual line was found. This +includes the text of the line, along with any match information if the search +was inverted. + +**summary** - The final message emitted by ripgrep that contains summary +statistics about the search across all files. + +Since file paths or the contents of files are not guaranteed to be valid UTF-8 +and JSON itself must be representable by a Unicode encoding, ripgrep will emit +all data elements as objects with one of two keys: 'text' or 'bytes'. 'text' is +a normal JSON string when the data is valid UTF-8 while 'bytes' is the base64 +encoded contents of the data. + +The JSON Lines format is only supported for showing search results. It cannot +be used with other flags that emit other types of output, such as --files, +--files-with-matches, --files-without-match, --count or --count-matches. +ripgrep will report an error if any of the aforementioned flags are used in +concert with --json. + +Other flags that control aspects of the standard output such as +--only-matching, --heading, --replace, --max-columns, etc., have no effect +when --json is set. + +A more complete description of the JSON format used can be found here: +https://docs.rs/grep-printer/*/grep_printer/struct.JSON.html + +The JSON Lines format can be disabled with --no-json. +"); + let arg = RGArg::switch("json") + .help(SHORT).long_help(LONG) + .overrides("no-json") + .conflicts(&[ + "count", "count-matches", + "files", "files-with-matches", "files-without-match", + ]); + args.push(arg); + + let arg = RGArg::switch("no-json") + .hidden() + .overrides("json"); + args.push(arg); +} + fn flag_line_number(args: &mut Vec) { const SHORT: &str = "Show line numbers."; const LONG: &str = long!("\ @@ -1198,6 +1358,79 @@ This flag overrides --mmap. args.push(arg); } +fn flag_multiline(args: &mut Vec) { + const SHORT: &str = "Enable matching across multiple lines."; + const LONG: &str = long!("\ +Enable matching across multiple lines. + +When multiline mode is enabled, ripgrep will lift the restriction that a match +cannot include a line terminator. For example, when multiline mode is not +enabled (the default), then the regex '\\p{any}' will match any Unicode +codepoint other than '\\n'. Similarly, the regex '\\n' is explicitly forbidden, +and if you try to use it, ripgrep will return an error. However, when multiline +mode is enabled, '\\p{any}' will match any Unicode codepoint, including '\\n', +and regexes like '\\n' are permitted. + +An important caveat is that multiline mode does not change the match semantics +of '.'. Namely, in most regex matchers, a '.' will by default match any +character other than '\\n', and this is true in ripgrep as well. In order to +make '.' match '\\n', you must enable the \"dot all\" flag inside the regex. +For example, both '(?s).' and '(?s:.)' have the same semantics, where '.' will +match any character, including '\\n'. Alternatively, the '--multiline-dotall' +flag may be passed to make the \"dot all\" behavior the default. This flag only +applies when multiline search is enabled. + +There is no limit on the number of the lines that a single match can span. + +**WARNING**: Because of how the underlying regex engine works, multiline +searches may be slower than normal line-oriented searches, and they may also +use more memory. In particular, when multiline mode is enabled, ripgrep +requires that each file it searches is laid out contiguously in memory +(either by reading it onto the heap or by memory-mapping it). Things that +cannot be memory-mapped (such as stdin) will be consumed until EOF before +searching can begin. In general, ripgrep will only do these things when +necessary. Specifically, if the --multiline flag is provided but the regex +does not contain patterns that would match '\\n' characters, then ripgrep +will automatically avoid reading each file into memory before searching it. +Nevertheless, if you only care about matches spanning at most one line, then it +is always better to disable multiline mode. + +This flag can be disabled with --no-multiline. +"); + let arg = RGArg::switch("multiline").short("U") + .help(SHORT).long_help(LONG) + .overrides("no-multiline"); + args.push(arg); + + let arg = RGArg::switch("no-multiline") + .hidden() + .overrides("multiline"); + args.push(arg); +} + +fn flag_multiline_dotall(args: &mut Vec) { + const SHORT: &str = "Make '.' match new lines when multiline is enabled."; + const LONG: &str = long!("\ +This flag enables \"dot all\" in your regex pattern, which causes '.' to match +newlines when multiline searching is enabled. This flag has no effect if +multiline searching isn't enabled with the --multiline flag. + +Normally, a '.' will match any character except newlines. While this behavior +typically isn't relevant for line-oriented matching (since matches can span at +most one line), this can be useful when searching with the -U/--multiline flag. +By default, the multiline mode runs without this flag. + +This flag is generally intended to be used in an alias or your ripgrep config +file if you prefer \"dot all\" semantics by default. Note that regardless of +whether this flag is used, \"dot all\" semantics can still be controlled via +inline flags in the regex pattern itself, e.g., '(?s:.)' always enables \"dot +all\" where as '(?-s:.)' always disables \"dot all\". +"); + let arg = RGArg::switch("multiline-dotall") + .help(SHORT).long_help(LONG); + args.push(arg); +} + fn flag_no_config(args: &mut Vec) { const SHORT: &str = "Never read configuration files."; const LONG: &str = long!("\ @@ -1340,6 +1573,29 @@ for use with xargs. args.push(arg); } +fn flag_null_data(args: &mut Vec) { + const SHORT: &str = "Use NUL as a line terminator instead of \\n."; + const LONG: &str = long!("\ +Enabling this option causes ripgrep to use NUL as a line terminator instead of +the default of '\\n'. + +This is useful when searching large binary files that would otherwise have very +long lines if '\\n' were used as the line terminator. In particular, ripgrep +requires that, at a minimum, each line must fit into memory. Use NUL instead +can be a useful stopgap to keep memory requirements low and avoid OOM (out of +memory) conditions. + +This is also useful for processing NUL delimited data, such that that emitted +when using ripgrep's -0/--null flag or find's --print0 flag. + +Using this flag implies -a/--text. +"); + let arg = RGArg::switch("null-data") + .help(SHORT).long_help(LONG) + .overrides("crlf"); + args.push(arg); +} + fn flag_only_matching(args: &mut Vec) { const SHORT: &str = "Print only matches parts of a line."; const LONG: &str = long!("\ @@ -1374,13 +1630,76 @@ the empty string. For example, if you are searching using 'rg foo' then using 'rg \"^|foo\"' instead will emit every line in every file searched, but only occurrences of 'foo' will be highlighted. This flag enables the same behavior without needing to modify the pattern. - -This flag conflicts with the --only-matching and --replace flags. "); let arg = RGArg::switch("passthru") .help(SHORT).long_help(LONG) - .alias("passthrough") - .conflicts(&["only-matching", "replace"]); + .alias("passthrough"); + args.push(arg); +} + +fn flag_pcre2(args: &mut Vec) { + const SHORT: &str = "Enable PCRE2 matching."; + const LONG: &str = long!("\ +When this flag is present, ripgrep will use the PCRE2 regex engine instead of +its default regex engine. + +This is generally useful when you want to use features such as look-around +or backreferences. + +Note that PCRE2 is an optional ripgrep feature. If PCRE2 wasn't included in +your build of ripgrep, then using this flag will result in ripgrep printing +an error message and exiting. + +This flag can be disabled with --no-pcre2. +"); + let arg = RGArg::switch("pcre2").short("P") + .help(SHORT).long_help(LONG) + .overrides("no-pcre2"); + args.push(arg); + + let arg = RGArg::switch("no-pcre2") + .hidden() + .overrides("pcre2"); + args.push(arg); +} + +fn flag_pcre2_unicode(args: &mut Vec) { + const SHORT: &str = "Enable Unicode mode for PCRE2 matching."; + const LONG: &str = long!("\ +When PCRE2 matching is enabled, this flag will enable Unicode mode. If PCRE2 +matching is not enabled, then this flag has no effect. + +This flag is enabled by default when PCRE2 matching is enabled. + +When PCRE2's Unicode mode is enabled several different types of patterns become +Unicode aware. This includes '\\b', '\\B', '\\w', '\\W', '\\d', '\\D', '\\s' +and '\\S'. Similarly, the '.' meta character will match any Unicode codepoint +instead of any byte. Caseless matching will also use Unicode simple case +folding instead of ASCII-only case insensitivity. + +Unicode mode in PCRE2 represents a critical trade off in the user experience +of ripgrep. In particular, unlike the default regex engine, PCRE2 does not +support the ability to search possibly invalid UTF-8 with Unicode features +enabled. Instead, PCRE2 *requires* that everything it searches when Unicode +mode is enabled is valid UTF-8. (Or valid UTF-16/UTF-32, but for the purposes +of ripgrep, we only discuss UTF-8.) This means that if you have PCRE2's Unicode +mode enabled and you attempt to search invalid UTF-8, then the search for that +file will hault and print an error. For this reason, when PCRE2's Unicode mode +is enabled, ripgrep will automatically \"fix\" invalid UTF-8 sequences by +replacing them with the Unicode replacement codepoint. + +If you would rather see the encoding errors surfaced by PCRE2 when Unicode mode +is enabled, then pass the --no-encoding flag to disable all transcoding. + +This flag can be disabled with --no-pcre2-unicode. +"); + let arg = RGArg::switch("pcre2-unicode") + .help(SHORT).long_help(LONG); + args.push(arg); + + let arg = RGArg::switch("no-pcre2-unicode") + .hidden() + .overrides("pcre2-unicode"); args.push(arg); } @@ -1592,11 +1911,18 @@ searched, and the time taken for the entire search to complete. This set of aggregate statistics may expand over time. Note that this flag has no effect if --files, --files-with-matches or ---files-without-match is passed."); +--files-without-match is passed. +This flag can be disabled with --no-stats. +"); let arg = RGArg::switch("stats") - .help(SHORT).long_help(LONG); + .help(SHORT).long_help(LONG) + .overrides("no-stats"); + args.push(arg); + let arg = RGArg::switch("no-stats") + .hidden() + .overrides("stats"); args.push(arg); } @@ -1639,6 +1965,25 @@ causes ripgrep to choose the thread count using heuristics. args.push(arg); } +fn flag_trim(args: &mut Vec) { + const SHORT: &str = "Trim prefixed whitespace from matches."; + const LONG: &str = long!("\ +When set, all ASCII whitespace at the beginning of each line printed will be +trimmed. + +This flag can be disabled with --no-trim. +"); + let arg = RGArg::switch("trim") + .help(SHORT).long_help(LONG) + .overrides("no-trim"); + args.push(arg); + + let arg = RGArg::switch("no-trim") + .hidden() + .overrides("trim"); + args.push(arg); +} + fn flag_type(args: &mut Vec) { const SHORT: &str = "Only search files matching TYPE."; const LONG: &str = long!("\ diff --git a/src/args.rs b/src/args.rs index 10b9e557..20e67b67 100644 --- a/src/args.rs +++ b/src/args.rs @@ -1,89 +1,117 @@ use std::cmp; use std::env; use std::ffi::OsStr; -use std::fs; +use std::fs::File; use std::io::{self, BufRead}; use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::sync::atomic::{AtomicBool, Ordering}; -use clap; -use encoding_rs::Encoding; -use grep::{Grep, GrepBuilder}; -use log; -use num_cpus; -use regex; -use same_file; -use termcolor; - -use app; use atty; +use clap; +use grep::matcher::LineTerminator; +#[cfg(feature = "pcre2")] +use grep::pcre2::{ + RegexMatcher as PCRE2RegexMatcher, + RegexMatcherBuilder as PCRE2RegexMatcherBuilder, +}; +use grep::printer::{ + ColorSpecs, Stats, + JSON, JSONBuilder, + Standard, StandardBuilder, + Summary, SummaryBuilder, SummaryKind, +}; +use grep::regex::{ + RegexMatcher as RustRegexMatcher, + RegexMatcherBuilder as RustRegexMatcherBuilder, +}; +use grep::searcher::{ + BinaryDetection, Encoding, MmapChoice, Searcher, SearcherBuilder, +}; use ignore::overrides::{Override, OverrideBuilder}; use ignore::types::{FileTypeDef, Types, TypesBuilder}; -use ignore; -use printer::{ColorSpecs, Printer}; -use unescape::{escape, unescape}; -use worker::{Worker, WorkerBuilder}; +use ignore::{Walk, WalkBuilder, WalkParallel}; +use log; +use num_cpus; +use path_printer::{PathPrinter, PathPrinterBuilder}; +use regex::{self, Regex}; +use same_file::Handle; +use termcolor::{ + WriteColor, + BufferedStandardStream, BufferWriter, ColorChoice, StandardStream, +}; +use app; use config; use logger::Logger; +use messages::{set_messages, set_ignore_messages}; +use search::{PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder}; +use subject::SubjectBuilder; +use unescape::{escape, unescape}; use Result; -/// `Args` are transformed/normalized from `ArgMatches`. -#[derive(Debug)] -pub struct Args { +/// The command that ripgrep should execute based on the command line +/// configuration. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Command { + /// Search using exactly one thread. + Search, + /// Search using possibly many threads. + SearchParallel, + /// The command line parameters suggest that a search should occur, but + /// ripgrep knows that a match can never be found (e.g., no given patterns + /// or --max-count=0). + SearchNever, + /// Show the files that would be searched, but don't actually search them, + /// and use exactly one thread. + Files, + /// Show the files that would be searched, but don't actually search them, + /// and perform directory traversal using possibly many threads. + FilesParallel, + /// List all file type definitions configured, including the default file + /// types and any additional file types added to the command line. + Types, +} + +impl Command { + /// Returns true if and only if this command requires executing a search. + fn is_search(&self) -> bool { + use self::Command::*; + + match *self { + Search | SearchParallel => true, + SearchNever | Files | FilesParallel | Types => false, + } + } +} + +/// The primary configuration object used throughout ripgrep. It provides a +/// high-level convenient interface to the provided command line arguments. +/// +/// An `Args` object is cheap to clone and can be used from multiple threads +/// simultaneously. +#[derive(Clone, Debug)] +pub struct Args(Arc); + +#[derive(Clone, Debug)] +struct ArgsImp { + /// Mid-to-low level routines for extracting CLI arguments. + matches: ArgMatches, + /// The patterns provided at the command line and/or via the -f/--file + /// flag. This may be empty. + patterns: Vec, + /// A matcher built from the patterns. + /// + /// It's important that this is only built once, since building this goes + /// through regex compilation and various types of analyses. That is, if + /// you need many of theses (one per thread, for example), it is better to + /// build it once and then clone it. + matcher: PatternMatcher, + /// The paths provided at the command line. This is guaranteed to be + /// non-empty. (If no paths are provided, then a default path is created.) paths: Vec, - after_context: usize, - before_context: usize, - byte_offset: bool, - can_match: bool, - color_choice: termcolor::ColorChoice, - colors: ColorSpecs, - column: bool, - context_separator: Vec, - count: bool, - count_matches: bool, - encoding: Option<&'static Encoding>, - files_with_matches: bool, - files_without_matches: bool, - eol: u8, - files: bool, - follow: bool, - glob_overrides: Override, - grep: Grep, - heading: bool, - hidden: bool, - ignore_files: Vec, - invert_match: bool, - line_number: bool, - line_per_match: bool, - max_columns: Option, - max_count: Option, - max_depth: Option, - max_filesize: Option, - mmap: bool, - no_ignore: bool, - no_ignore_global: bool, - no_ignore_messages: bool, - no_ignore_parent: bool, - no_ignore_vcs: bool, - no_messages: bool, - null: bool, - only_matching: bool, - path_separator: Option, - quiet: bool, - quiet_matched: QuietMatched, - replace: Option>, - sort_files: bool, - stdout_handle: Option, - text: bool, - threads: usize, - type_list: bool, - types: Types, - with_filename: bool, - search_zip_files: bool, - preprocessor: Option, - stats: bool + /// Returns true if and only if `paths` had to be populated with a single + /// default path. + using_default_path: bool, } impl Args { @@ -100,46 +128,262 @@ impl Args { // trying to parse config files. If a config file exists and has // arguments, then we re-parse argv, otherwise we just use the matches // we have here. - let early_matches = ArgMatches(app::app().get_matches()); + let early_matches = ArgMatches::new(app::app().get_matches()); + set_messages(!early_matches.is_present("no-messages")); + set_ignore_messages(!early_matches.is_present("no-ignore-messages")); if let Err(err) = Logger::init() { - errored!("failed to initialize logger: {}", err); + return Err(format!("failed to initialize logger: {}", err).into()); } - if early_matches.is_present("debug") { + if early_matches.is_present("trace") { + log::set_max_level(log::LevelFilter::Trace); + } else if early_matches.is_present("debug") { log::set_max_level(log::LevelFilter::Debug); } else { log::set_max_level(log::LevelFilter::Warn); } - let matches = Args::matches(early_matches); + let matches = early_matches.reconfigure(); // The logging level may have changed if we brought in additional // arguments from a configuration file, so recheck it and set the log // level as appropriate. - if matches.is_present("debug") { + if matches.is_present("trace") { + log::set_max_level(log::LevelFilter::Trace); + } else if matches.is_present("debug") { log::set_max_level(log::LevelFilter::Debug); } else { log::set_max_level(log::LevelFilter::Warn); } + set_messages(!matches.is_present("no-messages")); + set_ignore_messages(!matches.is_present("no-ignore-messages")); matches.to_args() } - /// Run clap and return the matches. If clap determines a problem with the - /// user provided arguments (or if --help or --version are given), then an - /// error/usage/version will be printed and the process will exit. + /// Return direct access to command line arguments. + fn matches(&self) -> &ArgMatches { + &self.0.matches + } + + /// Return the patterns found in the command line arguments. This includes + /// patterns read via the -f/--file flags. + fn patterns(&self) -> &[String] { + &self.0.patterns + } + + /// Return the matcher builder from the patterns. + fn matcher(&self) -> &PatternMatcher { + &self.0.matcher + } + + /// Return the paths found in the command line arguments. This is + /// guaranteed to be non-empty. In the case where no explicit arguments are + /// provided, a single default path is provided automatically. + fn paths(&self) -> &[PathBuf] { + &self.0.paths + } + + /// Returns true if and only if `paths` had to be populated with a default + /// path, which occurs only when no paths were given as command line + /// arguments. + fn using_default_path(&self) -> bool { + self.0.using_default_path + } + + /// Return the printer that should be used for formatting the output of + /// search results. + /// + /// The returned printer will write results to the given writer. + fn printer(&self, wtr: W) -> Result> { + match self.matches().output_kind() { + OutputKind::Standard => { + let separator_search = self.command()? == Command::Search; + self.matches() + .printer_standard(self.paths(), wtr, separator_search) + .map(Printer::Standard) + } + OutputKind::Summary => { + self.matches() + .printer_summary(self.paths(), wtr) + .map(Printer::Summary) + } + OutputKind::JSON => { + self.matches() + .printer_json(wtr) + .map(Printer::JSON) + } + } + } +} + +/// High level public routines for building data structures used by ripgrep +/// from command line arguments. +impl Args { + /// Create a new buffer writer for multi-threaded printing with color + /// support. + pub fn buffer_writer(&self) -> Result { + let mut wtr = BufferWriter::stdout(self.matches().color_choice()); + wtr.separator(self.matches().file_separator()?); + Ok(wtr) + } + + /// Return the high-level command that ripgrep should run. + pub fn command(&self) -> Result { + let is_one_search = self.matches().is_one_search(self.paths()); + let threads = self.matches().threads()?; + let one_thread = is_one_search || threads == 1; + + Ok(if self.matches().is_present("type-list") { + Command::Types + } else if self.matches().is_present("files") { + if one_thread { + Command::Files + } else { + Command::FilesParallel + } + } else if self.matches().can_never_match(self.patterns()) { + Command::SearchNever + } else if one_thread { + Command::Search + } else { + Command::SearchParallel + }) + } + + /// Builder a path printer that can be used for printing just file paths, + /// with optional color support. + /// + /// The printer will print paths to the given writer. + pub fn path_printer( + &self, + wtr: W, + ) -> Result> { + let mut builder = PathPrinterBuilder::new(); + builder + .color_specs(self.matches().color_specs()?) + .separator(self.matches().path_separator()?) + .terminator(self.matches().path_terminator().unwrap_or(b'\n')); + Ok(builder.build(wtr)) + } + + /// Returns true if and only if the search should quit after finding the + /// first match. + pub fn quit_after_match(&self) -> Result { + Ok(self.matches().is_present("quiet") && self.stats()?.is_none()) + } + + /// Build a worker for executing searches. + /// + /// Search results are written to the given writer. + pub fn search_worker( + &self, + wtr: W, + ) -> Result> { + let matcher = self.matcher().clone(); + let printer = self.printer(wtr)?; + let searcher = self.matches().searcher(self.paths())?; + let mut builder = SearchWorkerBuilder::new(); + builder + .json_stats(self.matches().is_present("json")) + .preprocessor(self.matches().preprocessor()) + .search_zip(self.matches().is_present("search-zip")); + Ok(builder.build(matcher, searcher, printer)) + } + + /// Returns a zero value for tracking statistics if and only if it has been + /// requested. + /// + /// When this returns a `Stats` value, then it is guaranteed that the + /// search worker will be configured to track statistics as well. + pub fn stats(&self) -> Result> { + Ok(if self.command()?.is_search() && self.matches().stats() { + Some(Stats::new()) + } else { + None + }) + } + + /// Return a builder for constructing subjects. A subject represents a + /// single unit of something to search. Typically, this corresponds to a + /// file or a stream such as stdin. + pub fn subject_builder(&self) -> SubjectBuilder { + let mut builder = SubjectBuilder::new(); + builder + .strip_dot_prefix(self.using_default_path()) + .skip(self.matches().stdout_handle()); + builder + } + + /// Execute the given function with a writer to stdout that enables color + /// support based on the command line configuration. + pub fn stdout(&self) -> Box { + let color_choice = self.matches().color_choice(); + if atty::is(atty::Stream::Stdout) { + Box::new(StandardStream::stdout(color_choice)) + } else { + Box::new(BufferedStandardStream::stdout(color_choice)) + } + } + + /// Return the type definitions compiled into ripgrep. + /// + /// If there was a problem reading and parsing the type definitions, then + /// this returns an error. + pub fn type_defs(&self) -> Result> { + Ok(self.matches().types()?.definitions().to_vec()) + } + + /// Return a walker that never uses additional threads. + pub fn walker(&self) -> Result { + Ok(self.matches().walker_builder(self.paths())?.build()) + } + + /// Return a walker that never uses additional threads. + pub fn walker_parallel(&self) -> Result { + Ok(self.matches().walker_builder(self.paths())?.build_parallel()) + } +} + +/// `ArgMatches` wraps `clap::ArgMatches` and provides semantic meaning to +/// the parsed arguments. +#[derive(Clone, Debug)] +struct ArgMatches(clap::ArgMatches<'static>); + +/// The output format. Generally, this corresponds to the printer that ripgrep +/// uses to show search results. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum OutputKind { + /// Classic grep-like or ack-like format. + Standard, + /// Show matching files and possibly the number of matches in each file. + Summary, + /// Emit match information in the JSON Lines format. + JSON, +} + +impl ArgMatches { + /// Create an ArgMatches from clap's parse result. + fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches { + ArgMatches(clap_matches) + } + + /// Run clap and return the matches using a config file if present. If clap + /// determines a problem with the user provided arguments (or if --help or + /// --version are given), then an error/usage/version will be printed and + /// the process will exit. /// /// If there are no additional arguments from the environment (e.g., a /// config file), then the given matches are returned as is. - fn matches(early_matches: ArgMatches<'static>) -> ArgMatches<'static> { + fn reconfigure(self) -> ArgMatches { // If the end user says no config, then respect it. - if early_matches.is_present("no-config") { + if self.is_present("no-config") { debug!("not reading config files because --no-config is present"); - return early_matches; + return self; } // If the user wants ripgrep to use a config file, then parse args // from that first. - let mut args = config::args(early_matches.is_present("no-messages")); + let mut args = config::args(); if args.is_empty() { - return early_matches; + return self; } let mut cliargs = env::args_os(); if let Some(bin) = cliargs.next() { @@ -147,672 +391,360 @@ impl Args { } args.extend(cliargs); debug!("final argv: {:?}", args); - ArgMatches(app::app().get_matches_from(args)) + ArgMatches::new(app::app().get_matches_from(args)) } - /// Returns true if ripgrep should print the files it will search and exit - /// (but not do any actual searching). - pub fn files(&self) -> bool { - self.files - } - - /// Create a new line based matcher. The matcher returned can be used - /// across multiple threads simultaneously. This matcher only supports - /// basic searching of regular expressions in a single buffer. - /// - /// The pattern and other flags are taken from the command line. - pub fn grep(&self) -> Grep { - self.grep.clone() - } - - /// Whether ripgrep should be quiet or not. - pub fn quiet(&self) -> bool { - self.quiet - } - - /// Returns a thread safe boolean for determining whether to quit a search - /// early when quiet mode is enabled. - /// - /// If quiet mode is disabled, then QuietMatched.has_match always returns - /// false. - pub fn quiet_matched(&self) -> QuietMatched { - self.quiet_matched.clone() - } - - /// Create a new printer of individual search results that writes to the - /// writer given. - pub fn printer(&self, wtr: W) -> Printer { - let mut p = Printer::new(wtr) - .colors(self.colors.clone()) - .column(self.column) - .context_separator(self.context_separator.clone()) - .eol(self.eol) - .heading(self.heading) - .line_per_match(self.line_per_match) - .null(self.null) - .only_matching(self.only_matching) - .path_separator(self.path_separator) - .with_filename(self.with_filename) - .max_columns(self.max_columns); - if let Some(ref rep) = self.replace { - p = p.replace(rep.clone()); - } - p - } - - /// Retrieve the configured file separator. - pub fn file_separator(&self) -> Option> { - let contextless = - self.count - || self.count_matches - || self.files_with_matches - || self.files_without_matches; - let use_heading_sep = self.heading && !contextless; - - if use_heading_sep { - Some(b"".to_vec()) - } else if !contextless - && (self.before_context > 0 || self.after_context > 0) { - Some(self.context_separator.clone()) - } else { - None - } - } - - /// Returns true if the given arguments are known to never produce a match. - pub fn never_match(&self) -> bool { - !self.can_match || self.max_count == Some(0) - } - - /// Returns whether ripgrep should track stats for this run - pub fn stats(&self) -> bool { - self.stats - } - - /// Create a new writer for single-threaded searching with color support. - pub fn stdout(&self) -> Box { - if atty::is(atty::Stream::Stdout) { - Box::new(termcolor::StandardStream::stdout(self.color_choice)) - } else { - Box::new( - termcolor::BufferedStandardStream::stdout(self.color_choice)) - } - } - - /// Returns a handle to stdout for filtering search. - /// - /// A handle is returned if and only if ripgrep's stdout is being - /// redirected to a file. The handle returned corresponds to that file. - /// - /// This can be used to ensure that we do not attempt to search a file - /// that ripgrep is writing to. - pub fn stdout_handle(&self) -> Option<&same_file::Handle> { - self.stdout_handle.as_ref() - } - - /// Create a new buffer writer for multi-threaded searching with color - /// support. - pub fn buffer_writer(&self) -> termcolor::BufferWriter { - let mut wtr = termcolor::BufferWriter::stdout(self.color_choice); - wtr.separator(self.file_separator()); - wtr - } - - /// Return the paths that should be searched. - pub fn paths(&self) -> &[PathBuf] { - &self.paths - } - - /// Returns true if there is exactly one file path given to search. - pub fn is_one_path(&self) -> bool { - self.paths.len() == 1 - && (self.paths[0] == Path::new("-") || path_is_file(&self.paths[0])) - } - - /// Create a worker whose configuration is taken from the - /// command line. - pub fn worker(&self) -> Worker { - WorkerBuilder::new(self.grep()) - .after_context(self.after_context) - .before_context(self.before_context) - .byte_offset(self.byte_offset) - .count(self.count) - .count_matches(self.count_matches) - .encoding(self.encoding) - .files_with_matches(self.files_with_matches) - .files_without_matches(self.files_without_matches) - .eol(self.eol) - .line_number(self.line_number) - .invert_match(self.invert_match) - .max_count(self.max_count) - .mmap(self.mmap) - .no_messages(self.no_messages) - .quiet(self.quiet) - .text(self.text) - .search_zip_files(self.search_zip_files) - .preprocessor(self.preprocessor.clone()) - .build() - } - - /// Returns the number of worker search threads that should be used. - pub fn threads(&self) -> usize { - self.threads - } - - /// Returns a list of type definitions currently loaded. - pub fn type_defs(&self) -> &[FileTypeDef] { - self.types.definitions() - } - - /// Returns true if ripgrep should print the type definitions currently - /// loaded and then exit. - pub fn type_list(&self) -> bool { - self.type_list - } - - /// Returns true if error messages should be suppressed. - pub fn no_messages(&self) -> bool { - self.no_messages - } - - /// Returns true if error messages associated with parsing .ignore or - /// .gitignore files should be suppressed. - pub fn no_ignore_messages(&self) -> bool { - self.no_ignore_messages - } - - /// Create a new recursive directory iterator over the paths in argv. - pub fn walker(&self) -> ignore::Walk { - self.walker_builder().build() - } - - /// Create a new parallel recursive directory iterator over the paths - /// in argv. - pub fn walker_parallel(&self) -> ignore::WalkParallel { - self.walker_builder().build_parallel() - } - - fn walker_builder(&self) -> ignore::WalkBuilder { - let paths = self.paths(); - let mut wd = ignore::WalkBuilder::new(&paths[0]); - for path in &paths[1..] { - wd.add(path); - } - for path in &self.ignore_files { - if let Some(err) = wd.add_ignore(path) { - if !self.no_messages && !self.no_ignore_messages { - eprintln!("{}", err); - } - } - } - - wd.follow_links(self.follow); - wd.hidden(!self.hidden); - wd.max_depth(self.max_depth); - wd.max_filesize(self.max_filesize); - wd.overrides(self.glob_overrides.clone()); - wd.types(self.types.clone()); - wd.git_global( - !self.no_ignore && !self.no_ignore_vcs && !self.no_ignore_global - ); - wd.git_ignore(!self.no_ignore && !self.no_ignore_vcs); - wd.git_exclude(!self.no_ignore && !self.no_ignore_vcs); - wd.ignore(!self.no_ignore); - if !self.no_ignore { - wd.add_custom_ignore_filename(".rgignore"); - } - wd.parents(!self.no_ignore_parent); - wd.threads(self.threads()); - if self.sort_files { - wd.sort_by_file_name(|a, b| a.cmp(b)); - } - wd + /// Convert the result of parsing CLI arguments into ripgrep's higher level + /// configuration structure. + fn to_args(self) -> Result { + // We compute these once since they could be large. + let patterns = self.patterns()?; + let matcher = self.matcher(&patterns)?; + let mut paths = self.paths(); + let using_default_path = + if paths.is_empty() { + paths.push(self.path_default()); + true + } else { + false + }; + Ok(Args(Arc::new(ArgsImp { + matches: self, + patterns: patterns, + matcher: matcher, + paths: paths, + using_default_path: using_default_path, + }))) } } -/// `ArgMatches` wraps `clap::ArgMatches` and provides semantic meaning to -/// several options/flags. -struct ArgMatches<'a>(clap::ArgMatches<'a>); - -impl<'a> ArgMatches<'a> { - /// Convert the result of parsing CLI arguments into ripgrep's - /// configuration. - fn to_args(&self) -> Result { - let paths = self.paths(); - let line_number = self.line_number(&paths); - let mmap = self.mmap(&paths)?; - let with_filename = self.with_filename(&paths); - let (before_context, after_context) = self.contexts()?; - let (count, count_matches) = self.counts(); - let quiet = self.is_present("quiet"); - let (grep, can_match) = self.grep()?; - let args = Args { - paths: paths, - after_context: after_context, - before_context: before_context, - byte_offset: self.is_present("byte-offset"), - can_match: can_match, - color_choice: self.color_choice(), - colors: self.color_specs()?, - column: self.column(), - context_separator: self.context_separator(), - count: count, - count_matches: count_matches, - encoding: self.encoding()?, - files_with_matches: self.is_present("files-with-matches"), - files_without_matches: self.is_present("files-without-match"), - eol: b'\n', - files: self.is_present("files"), - follow: self.is_present("follow"), - glob_overrides: self.overrides()?, - grep: grep, - heading: self.heading(), - hidden: self.hidden(), - ignore_files: self.ignore_files(), - invert_match: self.is_present("invert-match"), - line_number: line_number, - line_per_match: self.is_present("vimgrep"), - max_columns: self.usize_of_nonzero("max-columns")?, - max_count: self.usize_of("max-count")?.map(|n| n as u64), - max_depth: self.usize_of("max-depth")?, - max_filesize: self.max_filesize()?, - mmap: mmap, - no_ignore: self.no_ignore(), - no_ignore_global: self.no_ignore_global(), - no_ignore_messages: self.is_present("no-ignore-messages"), - no_ignore_parent: self.no_ignore_parent(), - no_ignore_vcs: self.no_ignore_vcs(), - no_messages: self.is_present("no-messages"), - null: self.is_present("null"), - only_matching: self.is_present("only-matching"), - path_separator: self.path_separator()?, - quiet: quiet, - quiet_matched: QuietMatched::new(quiet), - replace: self.replace(), - sort_files: self.is_present("sort-files"), - stdout_handle: self.stdout_handle(), - text: self.text(), - threads: self.threads()?, - type_list: self.is_present("type-list"), - types: self.types()?, - with_filename: with_filename, - search_zip_files: self.is_present("search-zip"), - preprocessor: self.preprocessor(), - stats: self.stats() - }; - if args.mmap { - debug!("will try to use memory maps"); - } - Ok(args) - } - - /// Return all file paths that ripgrep should search. - fn paths(&self) -> Vec { - let mut paths: Vec = match self.values_of_os("path") { - None => vec![], - Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(), - }; - // If --file, --files or --regexp is given, then the first path is - // always in `pattern`. - if self.is_present("file") - || self.is_present("files") - || self.is_present("regexp") { - if let Some(path) = self.value_of_os("pattern") { - paths.insert(0, Path::new(path).to_path_buf()); - } - } - if paths.is_empty() { - paths.push(self.default_path()); - } - paths - } - - /// Return the default path that ripgrep should search. - fn default_path(&self) -> PathBuf { - let file_is_stdin = - self.values_of_os("file").map_or(false, |mut files| { - files.any(|f| f == "-") - }); - let search_cwd = atty::is(atty::Stream::Stdin) - || !stdin_is_readable() - || (self.is_present("file") && file_is_stdin) - || self.is_present("files") - || self.is_present("type-list"); - if search_cwd { - Path::new("./").to_path_buf() +/// High level routines for converting command line arguments into various +/// data structures used by ripgrep. +/// +/// Methods are sorted alphabetically. +impl ArgMatches { + /// Return the matcher that should be used for searching. + /// + /// If there was a problem building the matcher (e.g., a syntax error), + /// then this returns an error. + #[cfg(feature = "pcre2")] + fn matcher(&self, patterns: &[String]) -> Result { + if self.is_present("pcre2") { + let matcher = self.matcher_pcre2(patterns)?; + Ok(PatternMatcher::PCRE2(matcher)) } else { - Path::new("-").to_path_buf() - } - } - - /// Return all of the ignore files given on the command line. - fn ignore_files(&self) -> Vec { - match self.values_of_os("ignore-file") { - None => return vec![], - Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(), - } - } - - /// Get a sequence of all available patterns from the command line. - /// This includes reading the -e/--regexp and -f/--file flags. - /// - /// Note that if -F/--fixed-strings is set, then all patterns will be - /// escaped. Similarly, if -w/--word-regexp is set, then all patterns - /// are surrounded by `\b`, and if -x/--line-regexp is set, then all - /// patterns are surrounded by `^...$`. Finally, if --passthru is set, - /// the pattern `^` is added to the end (to ensure that it works as - /// expected with multiple -e/-f patterns). - /// - /// If any pattern is invalid UTF-8, then an error is returned. - fn patterns(&self) -> Result> { - if self.is_present("files") || self.is_present("type-list") { - return Ok(vec![self.empty_pattern()]); - } - let mut pats = vec![]; - match self.values_of_os("regexp") { - None => { - if self.values_of_os("file").is_none() { - if let Some(os_pat) = self.value_of_os("pattern") { - pats.push(self.os_str_pattern(os_pat)?); - } + let matcher = match self.matcher_rust(patterns) { + Ok(matcher) => matcher, + Err(err) => { + return Err(From::from(suggest_pcre2(err.to_string()))); } + }; + Ok(PatternMatcher::RustRegex(matcher)) + } + } + + /// Return the matcher that should be used for searching. + /// + /// If there was a problem building the matcher (e.g., a syntax error), + /// then this returns an error. + #[cfg(not(feature = "pcre2"))] + fn matcher(&self, patterns: &[String]) -> Result { + if self.is_present("pcre2") { + return Err(From::from( + "PCRE2 is not available in this build of ripgrep", + )); + } + let matcher = self.matcher_rust(patterns)?; + Ok(PatternMatcher::RustRegex(matcher)) + } + + /// Build a matcher using Rust's regex engine. + /// + /// If there was a problem building the matcher (such as a regex syntax + /// error), then an error is returned. + fn matcher_rust(&self, patterns: &[String]) -> Result { + let mut builder = RustRegexMatcherBuilder::new(); + builder + .case_smart(self.case_smart()) + .case_insensitive(self.case_insensitive()) + .multi_line(true) + .unicode(true) + .octal(false) + .word(self.is_present("word-regexp")); + if self.is_present("multiline") { + builder.dot_matches_new_line(self.is_present("multiline-dotall")); + if self.is_present("crlf") { + builder + .crlf(true) + .line_terminator(None); } - Some(os_pats) => { - for os_pat in os_pats { - pats.push(self.os_str_pattern(os_pat)?); + } else { + builder + .line_terminator(Some(b'\n')) + .dot_matches_new_line(false); + if self.is_present("crlf") { + builder.crlf(true); + } + // We don't need to set this in multiline mode since mulitline + // matchers don't use optimizations related to line terminators. + // Moreover, a mulitline regex used with --null-data should + // be allowed to match NUL bytes explicitly, which this would + // otherwise forbid. + if self.is_present("null-data") { + builder.line_terminator(Some(b'\x00')); + } + } + if let Some(limit) = self.regex_size_limit()? { + builder.size_limit(limit); + } + if let Some(limit) = self.dfa_size_limit()? { + builder.dfa_size_limit(limit); + } + Ok(builder.build(&patterns.join("|"))?) + } + + /// Build a matcher using PCRE2. + /// + /// If there was a problem building the matcher (such as a regex syntax + /// error), then an error is returned. + #[cfg(feature = "pcre2")] + fn matcher_pcre2(&self, patterns: &[String]) -> Result { + let mut builder = PCRE2RegexMatcherBuilder::new(); + builder + .case_smart(self.case_smart()) + .caseless(self.case_insensitive()) + .multi_line(true) + .word(self.is_present("word-regexp")); + // For whatever reason, the JIT craps out during compilation with a + // "no more memory" error on 32 bit systems. So don't use it there. + if !cfg!(target_pointer_width = "32") { + builder.jit(true); + } + if self.pcre2_unicode() { + builder.utf(true).ucp(true); + if self.encoding()?.is_some() { + // SAFETY: If an encoding was specified, then we're guaranteed + // to get valid UTF-8, so we can disable PCRE2's UTF checking. + // (Feeding invalid UTF-8 to PCRE2 is UB.) + unsafe { + builder.disable_utf_check(); } } } - if let Some(files) = self.values_of_os("file") { - for file in files { - if file == "-" { - let stdin = io::stdin(); - for line in stdin.lock().lines() { - pats.push(self.str_pattern(&line?)); - } - } else { - let f = fs::File::open(file)?; - for line in io::BufReader::new(f).lines() { - pats.push(self.str_pattern(&line?)); - } - } - } + if self.is_present("multiline") { + builder.dotall(self.is_present("multiline-dotall")); } - // It's important that this be at the end; otherwise it would always - // match first, and we wouldn't get colours in the output - if self.is_present("passthru") && !self.is_present("count") { - pats.push("^".to_string()) + if self.is_present("crlf") { + builder.crlf(true); } - Ok(pats) + Ok(builder.build(&patterns.join("|"))?) } - /// Converts an OsStr pattern to a String pattern, including line/word - /// boundaries or escapes if applicable. + /// Build a JSON printer that writes results to the given writer. + fn printer_json(&self, wtr: W) -> Result> { + let mut builder = JSONBuilder::new(); + builder + .pretty(false) + .max_matches(self.max_count()?) + .always_begin_end(false); + Ok(builder.build(wtr)) + } + + /// Build a Standard printer that writes results to the given writer. /// - /// If the pattern is not valid UTF-8, then an error is returned. - fn os_str_pattern(&self, pat: &OsStr) -> Result { - let s = pattern_to_str(pat)?; - Ok(self.str_pattern(s)) - } - - /// Converts a &str pattern to a String pattern, including line/word - /// boundaries or escapes if applicable. - fn str_pattern(&self, pat: &str) -> String { - let litpat = self.literal_pattern(pat.to_string()); - let s = self.line_pattern(self.word_pattern(litpat)); - - if s.is_empty() { - self.empty_pattern() - } else { - s - } - } - - /// Returns the given pattern as a literal pattern if the - /// -F/--fixed-strings flag is set. Otherwise, the pattern is returned - /// unchanged. - fn literal_pattern(&self, pat: String) -> String { - if self.is_present("fixed-strings") { - regex::escape(&pat) - } else { - pat - } - } - - /// Returns the given pattern as a word pattern if the -w/--word-regexp - /// flag is set. Otherwise, the pattern is returned unchanged. - fn word_pattern(&self, pat: String) -> String { - if self.is_present("word-regexp") { - format!(r"\b(?:{})\b", pat) - } else { - pat - } - } - - /// Returns the given pattern as a line pattern if the -x/--line-regexp - /// flag is set. Otherwise, the pattern is returned unchanged. - fn line_pattern(&self, pat: String) -> String { - if self.is_present("line-regexp") { - format!(r"^(?:{})$", pat) - } else { - pat - } - } - - /// Empty pattern returns a pattern that is guaranteed to produce an empty - /// regular expression that is valid in any position. - fn empty_pattern(&self) -> String { - // This would normally just be an empty string, which works on its - // own, but if the patterns are joined in a set of alternations, then - // you wind up with `foo|`, which is invalid. - self.word_pattern("(?:z{0})*".to_string()) - } - - /// Returns true if and only if file names containing each match should - /// be emitted. + /// The given paths are used to configure aspects of the printer. /// - /// `paths` should be a slice of all top-level file paths that ripgrep - /// will need to search. - fn with_filename(&self, paths: &[PathBuf]) -> bool { - if self.is_present("no-filename") { - false - } else { - self.is_present("with-filename") - || self.is_present("vimgrep") - || paths.len() > 1 - || paths.get(0).map_or(false, |p| path_is_dir(p)) - } - } - - /// Returns a handle to stdout for filtering search. + /// If `separator_search` is true, then the returned printer will assume + /// the responsibility of printing a separator between each set of + /// search results, when appropriate (e.g., when contexts are enabled). + /// When it's set to false, the caller is responsible for handling + /// separators. /// - /// A handle is returned if and only if ripgrep's stdout is being - /// redirected to a file. The handle returned corresponds to that file. + /// In practice, we want the printer to handle it in the single threaded + /// case but not in the multi-threaded case. + fn printer_standard( + &self, + paths: &[PathBuf], + wtr: W, + separator_search: bool, + ) -> Result> { + let mut builder = StandardBuilder::new(); + builder + .color_specs(self.color_specs()?) + .stats(self.stats()) + .heading(self.heading()) + .path(self.with_filename(paths)) + .only_matching(self.is_present("only-matching")) + .per_match(self.is_present("vimgrep")) + .replacement(self.replacement()) + .max_columns(self.max_columns()?) + .max_matches(self.max_count()?) + .column(self.column()) + .byte_offset(self.is_present("byte-offset")) + .trim_ascii(self.is_present("trim")) + .separator_search(None) + .separator_context(Some(self.context_separator())) + .separator_field_match(b":".to_vec()) + .separator_field_context(b"-".to_vec()) + .separator_path(self.path_separator()?) + .path_terminator(self.path_terminator()); + if separator_search { + builder.separator_search(self.file_separator()?); + } + Ok(builder.build(wtr)) + } + + /// Build a Summary printer that writes results to the given writer. /// - /// This can be used to ensure that we do not attempt to search a file - /// that ripgrep is writing to. - fn stdout_handle(&self) -> Option { - let h = match same_file::Handle::stdout() { - Err(_) => return None, - Ok(h) => h, - }; - let md = match h.as_file().metadata() { - Err(_) => return None, - Ok(md) => md, - }; - if !md.is_file() { - return None; - } - Some(h) - } - - /// Returns true if and only if memory map searching should be tried. + /// The given paths are used to configure aspects of the printer. /// - /// `paths` should be a slice of all top-level file paths that ripgrep - /// will need to search. - fn mmap(&self, paths: &[PathBuf]) -> Result { - let (before, after) = self.contexts()?; - let enc = self.encoding()?; - Ok(if before > 0 || after > 0 || self.is_present("no-mmap") { - false - } else if self.is_present("mmap") { - true - } else if cfg!(target_os = "macos") { - // On Mac, memory maps appear to suck. Neat. - false - } else if enc.is_some() { - // There's no practical way to transcode a memory map that isn't - // isomorphic to searching over io::Read. - false - } else { - // If we're only searching a few paths and all of them are - // files, then memory maps are probably faster. - paths.len() <= 10 && paths.iter().all(|p| path_is_file(p)) - }) + /// This panics if the output format is not `OutputKind::Summary`. + fn printer_summary( + &self, + paths: &[PathBuf], + wtr: W, + ) -> Result> { + let mut builder = SummaryBuilder::new(); + builder + .kind(self.summary_kind().expect("summary format")) + .color_specs(self.color_specs()?) + .stats(self.stats()) + .path(self.with_filename(paths)) + .max_matches(self.max_count()?) + .separator_field(b":".to_vec()) + .separator_path(self.path_separator()?) + .path_terminator(self.path_terminator()); + Ok(builder.build(wtr)) } - /// Returns true if and only if line numbers should be shown. - fn line_number(&self, paths: &[PathBuf]) -> bool { - if self.is_present("no-line-number") || self.is_present("count") { - false - } else { - let only_stdin = paths == [Path::new("-")]; - (atty::is(atty::Stream::Stdout) && !only_stdin) - || self.is_present("line-number") - || self.is_present("column") - || self.is_present("pretty") - || self.is_present("vimgrep") - } - } - - /// Returns true if and only if column numbers should be shown. - fn column(&self) -> bool { - if self.is_present("no-column") { - return false; - } - self.is_present("column") || self.is_present("vimgrep") - } - - /// Returns true if and only if matches should be grouped with file name - /// headings. - fn heading(&self) -> bool { - if self.is_present("no-heading") || self.is_present("vimgrep") { - false - } else { - atty::is(atty::Stream::Stdout) - || self.is_present("heading") - || self.is_present("pretty") - } - } - - /// Returns the replacement string as UTF-8 bytes if it exists. - fn replace(&self) -> Option> { - self.value_of_lossy("replace").map(|s| s.into_bytes()) - } - - /// Returns the unescaped context separator in UTF-8 bytes. - fn context_separator(&self) -> Vec { - match self.value_of_lossy("context-separator") { - None => b"--".to_vec(), - Some(sep) => unescape(&sep), - } - } - - /// Returns the preprocessor command - fn preprocessor(&self) -> Option { - if let Some(path) = self.value_of_os("pre") { - if path.is_empty() { - None + /// Build a searcher from the command line parameters. + fn searcher(&self, paths: &[PathBuf]) -> Result { + let (ctx_before, ctx_after) = self.contexts()?; + let line_term = + if self.is_present("crlf") { + LineTerminator::crlf() + } else if self.is_present("null-data") { + LineTerminator::byte(b'\x00') } else { - Some(Path::new(path).to_path_buf()) - } - } else { - None - } + LineTerminator::byte(b'\n') + }; + let mut builder = SearcherBuilder::new(); + builder + .line_terminator(line_term) + .invert_match(self.is_present("invert-match")) + .line_number(self.line_number(paths)) + .multi_line(self.is_present("multiline")) + .before_context(ctx_before) + .after_context(ctx_after) + .passthru(self.is_present("passthru")) + .memory_map(self.mmap_choice(paths)) + .binary_detection(self.binary_detection()) + .encoding(self.encoding()?); + Ok(builder.build()) } - /// Returns the unescaped path separator in UTF-8 bytes. - fn path_separator(&self) -> Result> { - match self.value_of_lossy("path-separator") { - None => Ok(None), - Some(sep) => { - let sep = unescape(&sep); - if sep.is_empty() { - Ok(None) - } else if sep.len() > 1 { - Err(From::from(format!( - "A path separator must be exactly one byte, but \ - the given separator is {} bytes: {}\n\ - In some shells on Windows '/' is automatically \ - expanded. Use '//' instead.", - sep.len(), - escape(&sep), - ))) - } else { - Ok(Some(sep[0])) - } + /// Return a builder for recursively traversing a directory while + /// respecting ignore rules. + /// + /// If there was a problem parsing the CLI arguments necessary for + /// constructing the builder, then this returns an error. + fn walker_builder(&self, paths: &[PathBuf]) -> Result { + let mut builder = WalkBuilder::new(&paths[0]); + for path in &paths[1..] { + builder.add(path); + } + for path in self.ignore_paths() { + if let Some(err) = builder.add_ignore(path) { + ignore_message!("{}", err); } } - } - - /// Returns the before and after contexts from the command line. - /// - /// If a context setting was absent, then `0` is returned. - /// - /// If there was a problem parsing the values from the user as an integer, - /// then an error is returned. - fn contexts(&self) -> Result<(usize, usize)> { - let after = self.usize_of("after-context")?.unwrap_or(0); - let before = self.usize_of("before-context")?.unwrap_or(0); - let both = self.usize_of("context")?.unwrap_or(0); - Ok(if both > 0 { - (both, both) - } else { - (before, after) - }) - } - - /// Returns whether the -c/--count or the --count-matches flags were - /// passed from the command line. - /// - /// If --count-matches and --invert-match were passed in, behave - /// as if --count and --invert-match were passed in (i.e. rg will - /// count inverted matches as per existing behavior). - fn counts(&self) -> (bool, bool) { - let count = self.is_present("count"); - let count_matches = self.is_present("count-matches"); - let invert_matches = self.is_present("invert-match"); - let only_matching = self.is_present("only-matching"); - if count_matches && invert_matches { - // Treat `-v --count-matches` as `-v -c`. - (true, false) - } else if count && only_matching { - // Treat `-c --only-matching` as `--count-matches`. - (false, true) - } else { - (count, count_matches) + builder + .max_depth(self.usize_of("max-depth")?) + .follow_links(self.is_present("follow")) + .max_filesize(self.max_file_size()?) + .threads(self.threads()?) + .overrides(self.overrides()?) + .types(self.types()?) + .hidden(!self.hidden()) + .parents(!self.no_ignore_parent()) + .ignore(!self.no_ignore()) + .git_global( + !self.no_ignore() + && !self.no_ignore_vcs() + && !self.no_ignore_global()) + .git_ignore(!self.no_ignore() && !self.no_ignore_vcs()) + .git_exclude(!self.no_ignore() && !self.no_ignore_vcs()); + if !self.no_ignore() { + builder.add_custom_ignore_filename(".rgignore"); } + if self.is_present("sort-files") { + builder.sort_by_file_name(|a, b| a.cmp(b)); + } + Ok(builder) + } +} + +/// Mid level routines for converting command line arguments into various types +/// of data structures. +/// +/// Methods are sorted alphabetically. +impl ArgMatches { + /// Returns the form of binary detection to perform. + fn binary_detection(&self) -> BinaryDetection { + let none = + self.is_present("text") + || self.unrestricted_count() >= 3 + || self.is_present("null-data"); + if none { + BinaryDetection::none() + } else { + BinaryDetection::quit(b'\x00') + } + } + + /// Returns true if the command line configuration implies that a match + /// can never be shown. + fn can_never_match(&self, patterns: &[String]) -> bool { + patterns.is_empty() || self.max_count().ok() == Some(Some(0)) + } + + /// Returns true if and only if case should be ignore. + /// + /// If --case-sensitive is present, then case is never ignored, even if + /// --ignore-case is present. + fn case_insensitive(&self) -> bool { + self.is_present("ignore-case") && !self.is_present("case-sensitive") + } + + /// Returns true if and only if smart case has been enabled. + /// + /// If either --ignore-case of --case-sensitive are present, then smart + /// case is disabled. + fn case_smart(&self) -> bool { + self.is_present("smart-case") + && !self.is_present("ignore-case") + && !self.is_present("case-sensitive") } /// Returns the user's color choice based on command line parameters and /// environment. - fn color_choice(&self) -> termcolor::ColorChoice { + fn color_choice(&self) -> ColorChoice { let preference = match self.value_of_lossy("color") { None => "auto".to_string(), Some(v) => v, }; if preference == "always" { - termcolor::ColorChoice::Always + ColorChoice::Always } else if preference == "ansi" { - termcolor::ColorChoice::AlwaysAnsi + ColorChoice::AlwaysAnsi } else if preference == "auto" { if atty::is(atty::Stream::Stdout) || self.is_present("pretty") { - termcolor::ColorChoice::Auto + ColorChoice::Auto } else { - termcolor::ColorChoice::Never + ColorChoice::Never } } else { - termcolor::ColorChoice::Never + ColorChoice::Never } } @@ -837,184 +769,216 @@ impl<'a> ArgMatches<'a> { Ok(ColorSpecs::new(&specs)) } - /// Return the text encoding specified. - /// - /// If the label given by the caller doesn't correspond to a valid - /// supported encoding (and isn't `auto`), then return an error. - /// - /// A `None` encoding implies that the encoding should be automatically - /// detected on a per-file basis. - fn encoding(&self) -> Result> { - match self.value_of_lossy("encoding") { - None => Ok(None), - Some(label) => { - if label == "auto" { - return Ok(None); - } - match Encoding::for_label_no_replacement(label.as_bytes()) { - Some(enc) => Ok(Some(enc)), - None => Err(From::from( - format!("unsupported encoding: {}", label))), - } - } + /// Returns true if and only if column numbers should be shown. + fn column(&self) -> bool { + if self.is_present("no-column") { + return false; } + self.is_present("column") || self.is_present("vimgrep") } - /// Returns whether status should be tracked for this run of ripgrep - - /// This is automatically disabled if we're asked to only list the - /// files that wil be searched, files with matches or files - /// without matches. - fn stats(&self) -> bool { - if self.is_present("files-with-matches") || - self.is_present("files-without-match") { - return false; - } - self.is_present("stats") - } - - /// Returns the approximate number of threads that ripgrep should use. - fn threads(&self) -> Result { - if self.is_present("sort-files") { - return Ok(1); - } - let threads = self.usize_of("threads")?.unwrap_or(0); - Ok(if threads == 0 { - cmp::min(12, num_cpus::get()) + /// Returns the before and after contexts from the command line. + /// + /// If a context setting was absent, then `0` is returned. + /// + /// If there was a problem parsing the values from the user as an integer, + /// then an error is returned. + fn contexts(&self) -> Result<(usize, usize)> { + let after = self.usize_of("after-context")?.unwrap_or(0); + let before = self.usize_of("before-context")?.unwrap_or(0); + let both = self.usize_of("context")?.unwrap_or(0); + Ok(if both > 0 { + (both, both) } else { - threads + (before, after) }) } - /// Builds a grep matcher from the command line flags. + /// Returns the unescaped context separator in UTF-8 bytes. /// - /// If there was a problem extracting the pattern from the command line - /// flags, then an error is returned. - /// - /// If no match can ever occur, then `false` is returned. Otherwise, - /// `true` is returned. - fn grep(&self) -> Result<(Grep, bool)> { - let smart = - self.is_present("smart-case") - && !self.is_present("ignore-case") - && !self.is_present("case-sensitive"); - let casei = - self.is_present("ignore-case") - && !self.is_present("case-sensitive"); - let pats = self.patterns()?; - let ok = !pats.is_empty(); - let mut gb = GrepBuilder::new(&pats.join("|")) - .case_smart(smart) - .case_insensitive(casei) - .line_terminator(b'\n'); - - if let Some(limit) = self.dfa_size_limit()? { - gb = gb.dfa_size_limit(limit); + /// If one was not provided, the default `--` is returned. + fn context_separator(&self) -> Vec { + match self.value_of_lossy("context-separator") { + None => b"--".to_vec(), + Some(sep) => unescape(&sep), } - if let Some(limit) = self.regex_size_limit()? { - gb = gb.size_limit(limit); - } - Ok((gb.build()?, ok)) } - /// Builds the set of glob overrides from the command line flags. - fn overrides(&self) -> Result { - let mut ovr = OverrideBuilder::new(env::current_dir()?); - for glob in self.values_of_lossy_vec("glob") { - ovr.add(&glob)?; - } - // this is smelly. In the long run it might make sense - // to change overridebuilder to be like globsetbuilder - // but this would be a breaking change to the ignore crate - // so it is being shelved for now... - ovr.case_insensitive(true)?; - for glob in self.values_of_lossy_vec("iglob") { - ovr.add(&glob)?; - } - ovr.build().map_err(From::from) - } - - /// Builds a file type matcher from the command line flags. - fn types(&self) -> Result { - let mut btypes = TypesBuilder::new(); - btypes.add_defaults(); - for ty in self.values_of_lossy_vec("type-clear") { - btypes.clear(&ty); - } - for def in self.values_of_lossy_vec("type-add") { - btypes.add_def(&def)?; - } - for ty in self.values_of_lossy_vec("type") { - btypes.select(&ty); - } - for ty in self.values_of_lossy_vec("type-not") { - btypes.negate(&ty); - } - btypes.build().map_err(From::from) - } - - /// Parses an argument of the form `[0-9]+(KMG)?`. + /// Returns whether the -c/--count or the --count-matches flags were + /// passed from the command line. /// - /// This always returns the result as a type `u64`. This must be converted - /// to the appropriate type by the caller. - fn parse_human_readable_size_arg( - &self, - arg_name: &str, - ) -> Result> { - let arg_value = match self.value_of_lossy(arg_name) { - Some(x) => x, - None => return Ok(None) - }; - let re = regex::Regex::new("^([0-9]+)([KMG])?$").unwrap(); - let caps = - re.captures(&arg_value).ok_or_else(|| { - format!("invalid format for {}", arg_name) - })?; - - let value = caps[1].parse::()?; - let suffix = caps.get(2).map(|x| x.as_str()); - - let v_10 = value.checked_mul(1024); - let v_20 = v_10.and_then(|x| x.checked_mul(1024)); - let v_30 = v_20.and_then(|x| x.checked_mul(1024)); - - let try_suffix = |x: Option| { - if x.is_some() { - Ok(x) - } else { - Err(From::from(format!("number too large for {}", arg_name))) - } - }; - match suffix { - None => Ok(Some(value)), - Some("K") => try_suffix(v_10), - Some("M") => try_suffix(v_20), - Some("G") => try_suffix(v_30), - _ => Err(From::from(format!("invalid suffix for {}", arg_name))) + /// If --count-matches and --invert-match were passed in, behave + /// as if --count and --invert-match were passed in (i.e. rg will + /// count inverted matches as per existing behavior). + fn counts(&self) -> (bool, bool) { + let count = self.is_present("count"); + let count_matches = self.is_present("count-matches"); + let invert_matches = self.is_present("invert-match"); + let only_matching = self.is_present("only-matching"); + if count_matches && invert_matches { + // Treat `-v --count-matches` as `-v -c`. + (true, false) + } else if count && only_matching { + // Treat `-c --only-matching` as `--count-matches`. + (false, true) + } else { + (count, count_matches) } } /// Parse the dfa-size-limit argument option into a byte count. fn dfa_size_limit(&self) -> Result> { - let r = self.parse_human_readable_size_arg("dfa-size-limit")?; - human_readable_to_usize("dfa-size-limit", r) + let r = self.parse_human_readable_size("dfa-size-limit")?; + u64_to_usize("dfa-size-limit", r) } - /// Parse the regex-size-limit argument option into a byte count. - fn regex_size_limit(&self) -> Result> { - let r = self.parse_human_readable_size_arg("regex-size-limit")?; - human_readable_to_usize("regex-size-limit", r) + /// Returns the type of encoding to use. + /// + /// This only returns an encoding if one is explicitly specified. When no + /// encoding is present, the Searcher will still do BOM sniffing for UTF-16 + /// and transcode seamlessly. + fn encoding(&self) -> Result> { + if self.is_present("no-encoding") { + return Ok(None); + } + let label = match self.value_of_lossy("encoding") { + None if self.pcre2_unicode() => "utf-8".to_string(), + None => return Ok(None), + Some(label) => label, + }; + if label == "auto" { + return Ok(None); + } + Ok(Some(Encoding::new(&label)?)) + } + + /// Return the file separator to use based on the CLI configuration. + fn file_separator(&self) -> Result>> { + // File separators are only used for the standard grep-line format. + if self.output_kind() != OutputKind::Standard { + return Ok(None); + } + + let (ctx_before, ctx_after) = self.contexts()?; + Ok(if self.heading() { + Some(b"".to_vec()) + } else if ctx_before > 0 || ctx_after > 0 { + Some(self.context_separator().clone()) + } else { + None + }) + } + + /// Returns true if and only if matches should be grouped with file name + /// headings. + fn heading(&self) -> bool { + if self.is_present("no-heading") || self.is_present("vimgrep") { + false + } else { + atty::is(atty::Stream::Stdout) + || self.is_present("heading") + || self.is_present("pretty") + } + } + + /// Returns true if and only if hidden files/directories should be + /// searched. + fn hidden(&self) -> bool { + self.is_present("hidden") || self.unrestricted_count() >= 2 + } + + /// Return all of the ignore file paths given on the command line. + fn ignore_paths(&self) -> Vec { + let paths = match self.values_of_os("ignore-file") { + None => return vec![], + Some(paths) => paths, + }; + paths.map(|p| Path::new(p).to_path_buf()).collect() + } + + /// Returns true if and only if ripgrep is invoked in a way where it knows + /// it search exactly one thing. + fn is_one_search(&self, paths: &[PathBuf]) -> bool { + if paths.len() != 1 { + return false; + } + self.is_only_stdin(paths) || paths[0].is_file() + } + + /// Returns true if and only if we're only searching a single thing and + /// that thing is stdin. + fn is_only_stdin(&self, paths: &[PathBuf]) -> bool { + paths == [Path::new("-")] + } + + /// Returns true if and only if we should show line numbers. + fn line_number(&self, paths: &[PathBuf]) -> bool { + if self.output_kind() == OutputKind::Summary { + return false; + } + if self.is_present("no-line-number") { + return false; + } + if self.output_kind() == OutputKind::JSON { + return true; + } + + // A few things can imply counting line numbers. In particular, we + // generally want to show line numbers by default when printing to a + // tty for human consumption, except for one interesting case: when + // we're only searching stdin. This makes pipelines work as expected. + (atty::is(atty::Stream::Stdout) && !self.is_only_stdin(paths)) + || self.is_present("line-number") + || self.is_present("column") + || self.is_present("pretty") + || self.is_present("vimgrep") + } + + /// The maximum number of columns allowed on each line. + /// + /// If `0` is provided, then this returns `None`. + fn max_columns(&self) -> Result> { + Ok(self.usize_of_nonzero("max-columns")?.map(|n| n as u64)) + } + + /// The maximum number of matches permitted. + fn max_count(&self) -> Result> { + Ok(self.usize_of("max-count")?.map(|n| n as u64)) } /// Parses the max-filesize argument option into a byte count. - fn max_filesize(&self) -> Result> { - self.parse_human_readable_size_arg("max-filesize") + fn max_file_size(&self) -> Result> { + self.parse_human_readable_size("max-filesize") + } + + /// Returns whether we should attempt to use memory maps or not. + fn mmap_choice(&self, paths: &[PathBuf]) -> MmapChoice { + // SAFETY: Memory maps are difficult to impossible to encapsulate + // safely in a portable way that doesn't simultaneously negate some of + // the benfits of using memory maps. For ripgrep's use, we never mutate + // a memory map and generally never store the contents of memory map + // in a data structure that depends on immutability. Generally + // speaking, the worst thing that can happen is a SIGBUS (if the + // underlying file is truncated while reading it), which will cause + // ripgrep to abort. This reasoning should be treated as suspect. + let maybe = unsafe { MmapChoice::auto() }; + let never = MmapChoice::never(); + if self.is_present("no-mmap") { + never + } else if self.is_present("mmap") { + maybe + } else if paths.len() <= 10 && paths.iter().all(|p| p.is_file()) { + // If we're only searching a few paths and all of them are + // files, then memory maps are probably faster. + maybe + } else { + never + } } /// Returns true if ignore files should be ignored. fn no_ignore(&self) -> bool { - self.is_present("no-ignore") - || self.occurrences_of("unrestricted") >= 1 + self.is_present("no-ignore") || self.unrestricted_count() >= 1 } /// Returns true if global ignore files should be ignored. @@ -1032,18 +996,356 @@ impl<'a> ArgMatches<'a> { self.is_present("no-ignore-vcs") || self.no_ignore() } - /// Returns true if and only if hidden files/directories should be - /// searched. - fn hidden(&self) -> bool { - self.is_present("hidden") || self.occurrences_of("unrestricted") >= 2 + /// Determine the type of output we should produce. + fn output_kind(&self) -> OutputKind { + if self.is_present("quiet") { + // While we don't technically print results (or aggregate results) + // in quiet mode, we still support the --stats flag, and those + // stats are computed by the Summary printer for now. + return OutputKind::Summary; + } else if self.is_present("json") { + return OutputKind::JSON; + } + + let (count, count_matches) = self.counts(); + let summary = + count + || count_matches + || self.is_present("files-with-matches") + || self.is_present("files-without-match"); + if summary { + OutputKind::Summary + } else { + OutputKind::Standard + } } - /// Returns true if and only if all files should be treated as if they - /// were text, even if ripgrep would detect it as a binary file. - fn text(&self) -> bool { - self.is_present("text") || self.occurrences_of("unrestricted") >= 3 + /// Builds the set of glob overrides from the command line flags. + fn overrides(&self) -> Result { + let mut builder = OverrideBuilder::new(env::current_dir()?); + for glob in self.values_of_lossy_vec("glob") { + builder.add(&glob)?; + } + // This only enables case insensitivity for subsequent globs. + builder.case_insensitive(true)?; + for glob in self.values_of_lossy_vec("iglob") { + builder.add(&glob)?; + } + Ok(builder.build()?) } + /// Return all file paths that ripgrep should search. + /// + /// If no paths were given, then this returns an empty list. + fn paths(&self) -> Vec { + let mut paths: Vec = match self.values_of_os("path") { + None => vec![], + Some(paths) => paths.map(|p| Path::new(p).to_path_buf()).collect(), + }; + // If --file, --files or --regexp is given, then the first path is + // always in `pattern`. + if self.is_present("file") + || self.is_present("files") + || self.is_present("regexp") + { + if let Some(path) = self.value_of_os("pattern") { + paths.insert(0, Path::new(path).to_path_buf()); + } + } + paths + } + + /// Return the default path that ripgrep should search. This should only + /// be used when ripgrep is not otherwise given at least one file path + /// as a positional argument. + fn path_default(&self) -> PathBuf { + let file_is_stdin = self.values_of_os("file") + .map_or(false, |mut files| files.any(|f| f == "-")); + let search_cwd = + atty::is(atty::Stream::Stdin) + || !stdin_is_readable() + || (self.is_present("file") && file_is_stdin) + || self.is_present("files") + || self.is_present("type-list"); + if search_cwd { + Path::new("./").to_path_buf() + } else { + Path::new("-").to_path_buf() + } + } + + /// Returns the unescaped path separator as a single byte, if one exists. + /// + /// If the provided path separator is more than a single byte, then an + /// error is returned. + fn path_separator(&self) -> Result> { + let sep = match self.value_of_lossy("path-separator") { + None => return Ok(None), + Some(sep) => unescape(&sep), + }; + if sep.is_empty() { + Ok(None) + } else if sep.len() > 1 { + Err(From::from(format!( + "A path separator must be exactly one byte, but \ + the given separator is {} bytes: {}\n\ + In some shells on Windows '/' is automatically \ + expanded. Use '//' instead.", + sep.len(), + escape(&sep), + ))) + } else { + Ok(Some(sep[0])) + } + } + + /// Returns the byte that should be used to terminate paths. + /// + /// Typically, this is only set to `\x00` when the --null flag is provided, + /// and `None` otherwise. + fn path_terminator(&self) -> Option { + if self.is_present("null") { + Some(b'\x00') + } else { + None + } + } + + /// Get a sequence of all available patterns from the command line. + /// This includes reading the -e/--regexp and -f/--file flags. + /// + /// Note that if -F/--fixed-strings is set, then all patterns will be + /// escaped. If -x/--line-regexp is set, then all patterns are surrounded + /// by `^...$`. Other things, such as --word-regexp, are handled by the + /// regex matcher itself. + /// + /// If any pattern is invalid UTF-8, then an error is returned. + fn patterns(&self) -> Result> { + if self.is_present("files") || self.is_present("type-list") { + return Ok(vec![]); + } + let mut pats = vec![]; + match self.values_of_os("regexp") { + None => { + if self.values_of_os("file").is_none() { + if let Some(os_pat) = self.value_of_os("pattern") { + pats.push(self.pattern_from_os_str(os_pat)?); + } + } + } + Some(os_pats) => { + for os_pat in os_pats { + pats.push(self.pattern_from_os_str(os_pat)?); + } + } + } + if let Some(files) = self.values_of_os("file") { + for file in files { + if file == "-" { + let stdin = io::stdin(); + for line in stdin.lock().lines() { + pats.push(self.pattern_from_str(&line?)); + } + } else { + let f = File::open(file)?; + for line in io::BufReader::new(f).lines() { + pats.push(self.pattern_from_str(&line?)); + } + } + } + } + Ok(pats) + } + + /// Returns a pattern that is guaranteed to produce an empty regular + /// expression that is valid in any position. + fn pattern_empty(&self) -> String { + // This would normally just be an empty string, which works on its + // own, but if the patterns are joined in a set of alternations, then + // you wind up with `foo|`, which is currently invalid in Rust's regex + // engine. + "(?:z{0})*".to_string() + } + + /// Converts an OsStr pattern to a String pattern. The pattern is escaped + /// if -F/--fixed-strings is set. + /// + /// If the pattern is not valid UTF-8, then an error is returned. + fn pattern_from_os_str(&self, pat: &OsStr) -> Result { + let s = pattern_to_str(pat)?; + Ok(self.pattern_from_str(s)) + } + + /// Converts a &str pattern to a String pattern. The pattern is escaped + /// if -F/--fixed-strings is set. + fn pattern_from_str(&self, pat: &str) -> String { + let litpat = self.pattern_literal(pat.to_string()); + let s = self.pattern_line(litpat); + + if s.is_empty() { + self.pattern_empty() + } else { + s + } + } + + /// Returns the given pattern as a line pattern if the -x/--line-regexp + /// flag is set. Otherwise, the pattern is returned unchanged. + fn pattern_line(&self, pat: String) -> String { + if self.is_present("line-regexp") { + format!(r"^(?:{})$", pat) + } else { + pat + } + } + + /// Returns the given pattern as a literal pattern if the + /// -F/--fixed-strings flag is set. Otherwise, the pattern is returned + /// unchanged. + fn pattern_literal(&self, pat: String) -> String { + if self.is_present("fixed-strings") { + regex::escape(&pat) + } else { + pat + } + } + + /// Returns the preprocessor command if one was specified. + fn preprocessor(&self) -> Option { + let path = match self.value_of_os("pre") { + None => return None, + Some(path) => path, + }; + if path.is_empty() { + return None; + } + Some(Path::new(path).to_path_buf()) + } + + /// Parse the regex-size-limit argument option into a byte count. + fn regex_size_limit(&self) -> Result> { + let r = self.parse_human_readable_size("regex-size-limit")?; + u64_to_usize("regex-size-limit", r) + } + + /// Returns the replacement string as UTF-8 bytes if it exists. + fn replacement(&self) -> Option> { + self.value_of_lossy("replace").map(|s| s.into_bytes()) + } + + /// Returns true if and only if aggregate statistics for a search should + /// be tracked. + /// + /// Generally, this is only enabled when explicitly requested by in the + /// command line arguments via the --stats flag, but this can also be + /// enabled implicity via the output format, e.g., for JSON Lines. + fn stats(&self) -> bool { + self.output_kind() == OutputKind::JSON || self.is_present("stats") + } + + /// Returns a handle to stdout for filtering search. + /// + /// A handle is returned if and only if ripgrep's stdout is being + /// redirected to a file. The handle returned corresponds to that file. + /// + /// This can be used to ensure that we do not attempt to search a file + /// that ripgrep is writing to. + fn stdout_handle(&self) -> Option { + let h = match Handle::stdout() { + Err(_) => return None, + Ok(h) => h, + }; + let md = match h.as_file().metadata() { + Err(_) => return None, + Ok(md) => md, + }; + if !md.is_file() { + return None; + } + Some(h) + } + + /// When the output format is `Summary`, this returns the type of summary + /// output to show. + /// + /// This returns `None` if the output format is not `Summary`. + fn summary_kind(&self) -> Option { + let (count, count_matches) = self.counts(); + if self.is_present("quiet") { + Some(SummaryKind::Quiet) + } else if count_matches { + Some(SummaryKind::CountMatches) + } else if count { + Some(SummaryKind::Count) + } else if self.is_present("files-with-matches") { + Some(SummaryKind::PathWithMatch) + } else if self.is_present("files-without-match") { + Some(SummaryKind::PathWithoutMatch) + } else { + None + } + } + + /// Return the number of threads that should be used for parallelism. + fn threads(&self) -> Result { + if self.is_present("sort-files") { + return Ok(1); + } + let threads = self.usize_of("threads")?.unwrap_or(0); + Ok(if threads == 0 { + cmp::min(12, num_cpus::get()) + } else { + threads + }) + } + + /// Builds a file type matcher from the command line flags. + fn types(&self) -> Result { + let mut builder = TypesBuilder::new(); + builder.add_defaults(); + for ty in self.values_of_lossy_vec("type-clear") { + builder.clear(&ty); + } + for def in self.values_of_lossy_vec("type-add") { + builder.add_def(&def)?; + } + for ty in self.values_of_lossy_vec("type") { + builder.select(&ty); + } + for ty in self.values_of_lossy_vec("type-not") { + builder.negate(&ty); + } + builder.build().map_err(From::from) + } + + /// Returns the number of times the `unrestricted` flag is provided. + fn unrestricted_count(&self) -> u64 { + self.occurrences_of("unrestricted") + } + + /// Returns true if and only if PCRE2's Unicode mode should be enabled. + fn pcre2_unicode(&self) -> bool { + // PCRE2 Unicode is enabled by default, so only disable it when told + // to do so explicitly. + self.is_present("pcre2") && !self.is_present("no-pcre2-unicode") + } + + /// Returns true if and only if file names containing each match should + /// be emitted. + fn with_filename(&self, paths: &[PathBuf]) -> bool { + if self.is_present("no-filename") { + false + } else { + self.is_present("with-filename") + || self.is_present("vimgrep") + || paths.len() > 1 + || paths.get(0).map_or(false, |p| p.is_dir()) + } + } +} + +/// Lower level generic helper methods for teasing values out of clap. +impl ArgMatches { /// Like values_of_lossy, but returns an empty vec if the flag is not /// present. fn values_of_lossy_vec(&self, name: &str) -> Vec { @@ -1056,16 +1358,15 @@ impl<'a> ArgMatches<'a> { /// If the number is zero, then it is considered absent and `None` is /// returned. fn usize_of_nonzero(&self, name: &str) -> Result> { - match self.value_of_lossy(name) { - None => Ok(None), - Some(v) => v.parse().map_err(From::from).map(|n| { - if n == 0 { - None - } else { - Some(n) - } - }), - } + let n = match self.usize_of(name)? { + None => return Ok(None), + Some(n) => n, + }; + Ok(if n == 0 { + None + } else { + Some(n) + }) } /// Safely reads an arg value with the given name, and if it's present, @@ -1077,11 +1378,56 @@ impl<'a> ArgMatches<'a> { } } - // The following methods mostly dispatch to the underlying clap methods - // directly. Methods that would otherwise get a single value will fetch - // all values and return the last one. (Clap returns the first one.) We - // only define the ones we need. + /// Parses an argument of the form `[0-9]+(KMG)?`. + /// + /// If the aforementioned format is not recognized, then this returns an + /// error. + fn parse_human_readable_size( + &self, + arg_name: &str, + ) -> Result> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^([0-9]+)([KMG])?$").unwrap(); + } + let arg_value = match self.value_of_lossy(arg_name) { + Some(x) => x, + None => return Ok(None) + }; + let caps = RE + .captures(&arg_value) + .ok_or_else(|| { + format!("invalid format for {}", arg_name) + })?; + + let value = caps[1].parse::()?; + let suffix = caps.get(2).map(|x| x.as_str()); + + let v_10 = value.checked_mul(1024); + let v_20 = v_10.and_then(|x| x.checked_mul(1024)); + let v_30 = v_20.and_then(|x| x.checked_mul(1024)); + let try_suffix = |x: Option| { + if x.is_some() { + Ok(x) + } else { + Err(From::from(format!("number too large for {}", arg_name))) + } + }; + match suffix { + None => Ok(Some(value)), + Some("K") => try_suffix(v_10), + Some("M") => try_suffix(v_20), + Some("G") => try_suffix(v_30), + _ => Err(From::from(format!("invalid suffix for {}", arg_name))) + } + } +} + +/// The following methods mostly dispatch to the underlying clap methods +/// directly. Methods that would otherwise get a single value will fetch all +/// values and return the last one. (Clap returns the first one.) We only +/// define the ones we need. +impl ArgMatches { fn is_present(&self, name: &str) -> bool { self.0.is_present(name) } @@ -1098,83 +1444,61 @@ impl<'a> ArgMatches<'a> { self.0.values_of_lossy(name) } - fn value_of_os(&'a self, name: &str) -> Option<&'a OsStr> { + fn value_of_os(&self, name: &str) -> Option<&OsStr> { self.0.value_of_os(name) } - fn values_of_os(&'a self, name: &str) -> Option> { + fn values_of_os(&self, name: &str) -> Option { self.0.values_of_os(name) } } +/// Convert an OsStr to a Unicode string. +/// +/// Patterns _must_ be valid UTF-8, so if the given OsStr isn't valid UTF-8, +/// this returns an error. fn pattern_to_str(s: &OsStr) -> Result<&str> { - match s.to_str() { - Some(s) => Ok(s), - None => Err(From::from(format!( + s.to_str().ok_or_else(|| { + From::from(format!( "Argument '{}' is not valid UTF-8. \ Use hex escape sequences to match arbitrary \ bytes in a pattern (e.g., \\xFF).", - s.to_string_lossy()))), + s.to_string_lossy() + )) + }) +} + +/// Inspect an error resulting from building a Rust regex matcher, and if it's +/// believed to correspond to a syntax error that PCRE2 could handle, then +/// add a message to suggest the use of -P/--pcre2. +#[cfg(feature = "pcre2")] +fn suggest_pcre2(msg: String) -> String { + if !msg.contains("backreferences") && !msg.contains("look-around") { + msg + } else { + format!("{} + +Consider enabling PCRE2 with the --pcre2 flag, which can handle backreferences +and look-around.", msg) } } -/// A simple thread safe abstraction for determining whether a search should -/// stop if the user has requested quiet mode. -#[derive(Clone, Debug)] -pub struct QuietMatched(Arc>); - -impl QuietMatched { - /// Create a new QuietMatched value. - /// - /// If quiet is true, then set_match and has_match will reflect whether - /// a search should quit or not because it found a match. - /// - /// If quiet is false, then set_match is always a no-op and has_match - /// always returns false. - fn new(quiet: bool) -> QuietMatched { - let atomic = if quiet { Some(AtomicBool::new(false)) } else { None }; - QuietMatched(Arc::new(atomic)) - } - - /// Returns true if and only if quiet mode is enabled and a match has - /// occurred. - pub fn has_match(&self) -> bool { - match *self.0 { - None => false, - Some(ref matched) => matched.load(Ordering::SeqCst), - } - } - - /// Sets whether a match has occurred or not. - /// - /// If quiet mode is disabled, then this is a no-op. - pub fn set_match(&self, yes: bool) -> bool { - match *self.0 { - None => false, - Some(_) if !yes => false, - Some(ref m) => { m.store(true, Ordering::SeqCst); true } - } - } -} - -/// Convert the result of a `parse_human_readable_size_arg` call into -/// a `usize`, failing if the type does not fit. -fn human_readable_to_usize( +/// Convert the result of parsing a human readable file size to a `usize`, +/// failing if the type does not fit. +fn u64_to_usize( arg_name: &str, value: Option, ) -> Result> { use std::usize; - match value { - None => Ok(None), - Some(v) => { - if v <= usize::MAX as u64 { - Ok(Some(v as usize)) - } else { - let msg = format!("number too large for {}", arg_name); - Err(From::from(msg)) - } - } + let value = match value { + None => return Ok(None), + Some(value) => value, + }; + if value <= usize::MAX as u64 { + Ok(Some(value as usize)) + } else { + Err(From::from(format!("number too large for {}", arg_name))) } } @@ -1182,7 +1506,6 @@ fn human_readable_to_usize( #[cfg(unix)] fn stdin_is_readable() -> bool { use std::os::unix::fs::FileTypeExt; - use same_file::Handle; let ft = match Handle::stdin().and_then(|h| h.as_file().metadata()) { Err(_) => return false, @@ -1194,48 +1517,17 @@ fn stdin_is_readable() -> bool { /// Returns true if and only if stdin is deemed searchable. #[cfg(windows)] fn stdin_is_readable() -> bool { - // On Windows, it's not clear what the possibilities are to me, so just - // always return true. - true -} + use std::os::windows::io::AsRawHandle; + use winapi::um::fileapi::GetFileType; + use winapi::um::winbase::{FILE_TYPE_DISK, FILE_TYPE_PIPE}; -/// Returns true if and only if this path points to a directory. -/// -/// This works around a bug in Rust's standard library: -/// https://github.com/rust-lang/rust/issues/46484 -#[cfg(windows)] -fn path_is_dir(path: &Path) -> bool { - fs::metadata(path).map(|md| metadata_is_dir(&md)).unwrap_or(false) -} - -/// Returns true if and only if this entry points to a directory. -#[cfg(not(windows))] -fn path_is_dir(path: &Path) -> bool { - path.is_dir() -} - -/// Returns true if and only if this path points to a file. -/// -/// This works around a bug in Rust's standard library: -/// https://github.com/rust-lang/rust/issues/46484 -#[cfg(windows)] -fn path_is_file(path: &Path) -> bool { - !path_is_dir(path) -} - -/// Returns true if and only if this entry points to a directory. -#[cfg(not(windows))] -fn path_is_file(path: &Path) -> bool { - path.is_file() -} - -/// Returns true if and only if the given metadata points to a directory. -/// -/// This works around a bug in Rust's standard library: -/// https://github.com/rust-lang/rust/issues/46484 -#[cfg(windows)] -fn metadata_is_dir(md: &fs::Metadata) -> bool { - use std::os::windows::fs::MetadataExt; - use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY; - md.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0 + let handle = match Handle::stdin() { + Err(_) => return false, + Ok(handle) => handle, + }; + let raw_handle = handle.as_raw_handle(); + // SAFETY: As far as I can tell, it's not possible to use GetFileType in + // a way that violates safety. We give it a handle and we get an integer. + let ft = unsafe { GetFileType(raw_handle) }; + ft == FILE_TYPE_DISK || ft == FILE_TYPE_PIPE } diff --git a/src/config.rs b/src/config.rs index c47e6a50..eade0cca 100644 --- a/src/config.rs +++ b/src/config.rs @@ -12,10 +12,7 @@ use std::path::{Path, PathBuf}; use Result; /// Return a sequence of arguments derived from ripgrep rc configuration files. -/// -/// If no_messages is false and there was a problem reading a config file, -/// then errors are printed to stderr. -pub fn args(no_messages: bool) -> Vec { +pub fn args() -> Vec { let config_path = match env::var_os("RIPGREP_CONFIG_PATH") { None => return vec![], Some(config_path) => { @@ -28,20 +25,20 @@ pub fn args(no_messages: bool) -> Vec { let (args, errs) = match parse(&config_path) { Ok((args, errs)) => (args, errs), Err(err) => { - if !no_messages { - eprintln!("{}", err); - } + message!("{}", err); return vec![]; } }; - if !no_messages && !errs.is_empty() { + if !errs.is_empty() { for err in errs { - eprintln!("{}:{}", config_path.display(), err); + message!("{}:{}", config_path.display(), err); } } debug!( "{}: arguments loaded from config file: {:?}", - config_path.display(), args); + config_path.display(), + args + ); args } @@ -59,7 +56,7 @@ fn parse>( let path = path.as_ref(); match File::open(&path) { Ok(file) => parse_reader(file), - Err(err) => errored!("{}: {}", path.display(), err), + Err(err) => Err(From::from(format!("{}: {}", path.display(), err))), } } diff --git a/src/logger.rs b/src/logger.rs index 8bd7e09c..f12f0b19 100644 --- a/src/logger.rs +++ b/src/logger.rs @@ -34,19 +34,30 @@ impl Log for Logger { match (record.file(), record.line()) { (Some(file), Some(line)) => { eprintln!( - "{}/{}/{}:{}: {}", - record.level(), record.target(), - file, line, record.args()); + "{}|{}|{}:{}: {}", + record.level(), + record.target(), + file, + line, + record.args() + ); } (Some(file), None) => { eprintln!( - "{}/{}/{}: {}", - record.level(), record.target(), file, record.args()); + "{}|{}|{}: {}", + record.level(), + record.target(), + file, + record.args() + ); } _ => { eprintln!( - "{}/{}: {}", - record.level(), record.target(), record.args()); + "{}|{}: {}", + record.level(), + record.target(), + record.args() + ); } } } diff --git a/src/main.rs b/src/main.rs index af22373e..33bc84cd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,43 +1,34 @@ extern crate atty; -extern crate bytecount; #[macro_use] extern crate clap; -extern crate encoding_rs; -extern crate encoding_rs_io; extern crate globset; extern crate grep; extern crate ignore; #[macro_use] extern crate lazy_static; -extern crate libc; #[macro_use] extern crate log; -extern crate memchr; -extern crate memmap; extern crate num_cpus; extern crate regex; extern crate same_file; +#[macro_use] +extern crate serde_json; extern crate termcolor; #[cfg(windows)] extern crate winapi; -use std::error::Error; +use std::io; use std::process; -use std::result; -use std::sync::Arc; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::mpsc; -use std::thread; -use std::time::{Duration, Instant}; +use std::sync::{Arc, Mutex}; +use std::time::Instant; + +use ignore::WalkState; use args::Args; -use worker::Work; +use subject::Subject; -macro_rules! errored { - ($($tt:tt)*) => { - return Err(From::from(format!($($tt)*))); - } -} +#[macro_use] +mod messages; mod app; mod args; @@ -45,20 +36,17 @@ mod config; mod decompressor; mod preprocessor; mod logger; -mod pathutil; -mod printer; -mod search_buffer; -mod search_stream; +mod path_printer; +mod search; +mod subject; mod unescape; -mod worker; -pub type Result = result::Result>; +pub type Result = ::std::result::Result>; -fn main() { - reset_sigpipe(); - match Args::parse().map(Arc::new).and_then(run) { - Ok(0) => process::exit(1), - Ok(_) => process::exit(0), +pub fn main() { + match Args::parse().and_then(run) { + Ok(true) => process::exit(0), + Ok(false) => process::exit(1), Err(err) => { eprintln!("{}", err); process::exit(2); @@ -66,382 +54,242 @@ fn main() { } } -fn run(args: Arc) -> Result { - if args.never_match() { - return Ok(0); - } - let threads = args.threads(); - if args.files() { - if threads == 1 || args.is_one_path() { - run_files_one_thread(&args) - } else { - run_files_parallel(args) - } - } else if args.type_list() { - run_types(&args) - } else if threads == 1 || args.is_one_path() { - run_one_thread(&args) - } else { - run_parallel(&args) +fn run(args: Args) -> Result { + use args::Command::*; + + match args.command()? { + Search => search(args), + SearchParallel => search_parallel(args), + SearchNever => Ok(false), + Files => files(args), + FilesParallel => files_parallel(args), + Types => types(args), } } -fn run_parallel(args: &Arc) -> Result { - let start_time = Instant::now(); - let bufwtr = Arc::new(args.buffer_writer()); - let quiet_matched = args.quiet_matched(); - let paths_searched = Arc::new(AtomicUsize::new(0)); - let match_line_count = Arc::new(AtomicUsize::new(0)); - let paths_matched = Arc::new(AtomicUsize::new(0)); +/// The top-level entry point for single-threaded search. This recursively +/// steps through the file list (current directory by default) and searches +/// each file sequentially. +fn search(args: Args) -> Result { + let started_at = Instant::now(); + let quit_after_match = args.quit_after_match()?; + let subject_builder = args.subject_builder(); + let mut stats = args.stats()?; + let mut searcher = args.search_worker(args.stdout())?; + let mut matched = false; - args.walker_parallel().run(|| { - let args = Arc::clone(args); - let quiet_matched = quiet_matched.clone(); - let paths_searched = paths_searched.clone(); - let match_line_count = match_line_count.clone(); - let paths_matched = paths_matched.clone(); + for result in args.walker()? { + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, + None => continue, + }; + let search_result = match searcher.search(&subject) { + Ok(search_result) => search_result, + Err(err) => { + // A broken pipe means graceful termination. + if err.kind() == io::ErrorKind::BrokenPipe { + break; + } + message!("{}: {}", subject.path().display(), err); + continue; + } + }; + matched = matched || search_result.has_match(); + if let Some(ref mut stats) = stats { + *stats += search_result.stats().unwrap(); + } + if matched && quit_after_match { + break; + } + } + if let Some(ref stats) = stats { + let elapsed = Instant::now().duration_since(started_at); + // We don't care if we couldn't print this successfully. + let _ = searcher.print_stats(elapsed, stats); + } + Ok(matched) +} + +/// The top-level entry point for multi-threaded search. The parallelism is +/// itself achieved by the recursive directory traversal. All we need to do is +/// feed it a worker for performing a search on each file. +fn search_parallel(args: Args) -> Result { + use std::sync::atomic::AtomicBool; + use std::sync::atomic::Ordering::SeqCst; + + let quit_after_match = args.quit_after_match()?; + let started_at = Instant::now(); + let subject_builder = Arc::new(args.subject_builder()); + let bufwtr = Arc::new(args.buffer_writer()?); + let stats = Arc::new(args.stats()?.map(Mutex::new)); + let matched = Arc::new(AtomicBool::new(false)); + let mut searcher_err = None; + args.walker_parallel()?.run(|| { + let args = args.clone(); let bufwtr = Arc::clone(&bufwtr); - let mut buf = bufwtr.buffer(); - let mut worker = args.worker(); - Box::new(move |result| { - use ignore::WalkState::*; + let stats = Arc::clone(&stats); + let matched = Arc::clone(&matched); + let subject_builder = Arc::clone(&subject_builder); + let mut searcher = match args.search_worker(bufwtr.buffer()) { + Ok(searcher) => searcher, + Err(err) => { + searcher_err = Some(err); + return Box::new(move |_| { + WalkState::Quit + }); + } + }; - if quiet_matched.has_match() { - return Quit; - } - let dent = match get_or_log_dir_entry( - result, - args.stdout_handle(), - args.files(), - args.no_messages(), - args.no_ignore_messages(), - ) { - None => return Continue, - Some(dent) => dent, + Box::new(move |result| { + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, + None => return WalkState::Continue, }; - paths_searched.fetch_add(1, Ordering::SeqCst); - buf.clear(); - { - // This block actually executes the search and prints the - // results into outbuf. - let mut printer = args.printer(&mut buf); - let count = - if dent.is_stdin() { - worker.run(&mut printer, Work::Stdin) - } else { - worker.run(&mut printer, Work::DirEntry(dent)) - }; - match_line_count.fetch_add(count as usize, Ordering::SeqCst); - if quiet_matched.set_match(count > 0) { - return Quit; - } - if args.stats() && count > 0 { - paths_matched.fetch_add(1, Ordering::SeqCst); + searcher.printer().get_mut().clear(); + let search_result = match searcher.search(&subject) { + Ok(search_result) => search_result, + Err(err) => { + message!("{}: {}", subject.path().display(), err); + return WalkState::Continue; } + }; + if search_result.has_match() { + matched.store(true, SeqCst); + } + if let Some(ref locked_stats) = *stats { + let mut stats = locked_stats.lock().unwrap(); + *stats += search_result.stats().unwrap(); + } + if let Err(err) = bufwtr.print(searcher.printer().get_mut()) { + // A broken pipe means graceful termination. + if err.kind() == io::ErrorKind::BrokenPipe { + return WalkState::Quit; + } + // Otherwise, we continue on our merry way. + message!("{}: {}", subject.path().display(), err); + } + if matched.load(SeqCst) && quit_after_match { + WalkState::Quit + } else { + WalkState::Continue } - // BUG(burntsushi): We should handle this error instead of ignoring - // it. See: https://github.com/BurntSushi/ripgrep/issues/200 - let _ = bufwtr.print(&buf); - Continue }) }); - if !args.paths().is_empty() && paths_searched.load(Ordering::SeqCst) == 0 { - if !args.no_messages() { - eprint_nothing_searched(); - } + if let Some(err) = searcher_err.take() { + return Err(err); } - let match_line_count = match_line_count.load(Ordering::SeqCst) as u64; - let paths_searched = paths_searched.load(Ordering::SeqCst) as u64; - let paths_matched = paths_matched.load(Ordering::SeqCst) as u64; - if args.stats() { - print_stats( - match_line_count, - paths_searched, - paths_matched, - start_time.elapsed(), - ); + if let Some(ref locked_stats) = *stats { + let elapsed = Instant::now().duration_since(started_at); + let stats = locked_stats.lock().unwrap(); + let mut searcher = args.search_worker(args.stdout())?; + // We don't care if we couldn't print this successfully. + let _ = searcher.print_stats(elapsed, &stats); } - Ok(match_line_count) + Ok(matched.load(SeqCst)) } -fn run_one_thread(args: &Arc) -> Result { - let start_time = Instant::now(); - let mut stdout = args.stdout(); - let mut worker = args.worker(); - let mut paths_searched: u64 = 0; - let mut match_line_count = 0; - let mut paths_matched: u64 = 0; - for result in args.walker() { - let dent = match get_or_log_dir_entry( - result, - args.stdout_handle(), - args.files(), - args.no_messages(), - args.no_ignore_messages(), - ) { +/// The top-level entry point for listing files without searching them. This +/// recursively steps through the file list (current directory by default) and +/// prints each path sequentially using a single thread. +fn files(args: Args) -> Result { + let quit_after_match = args.quit_after_match()?; + let subject_builder = args.subject_builder(); + let mut matched = false; + let mut path_printer = args.path_printer(args.stdout())?; + for result in args.walker()? { + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, None => continue, - Some(dent) => dent, }; - let mut printer = args.printer(&mut stdout); - if match_line_count > 0 { - if args.quiet() { + matched = true; + if quit_after_match { + break; + } + if let Err(err) = path_printer.write_path(subject.path()) { + // A broken pipe means graceful termination. + if err.kind() == io::ErrorKind::BrokenPipe { break; } - if let Some(sep) = args.file_separator() { - printer = printer.file_separator(sep); - } - } - paths_searched += 1; - let count = - if dent.is_stdin() { - worker.run(&mut printer, Work::Stdin) - } else { - worker.run(&mut printer, Work::DirEntry(dent)) - }; - match_line_count += count; - if args.stats() && count > 0 { - paths_matched += 1; + // Otherwise, we have some other error that's preventing us from + // writing to stdout, so we should bubble it up. + return Err(err.into()); } } - if !args.paths().is_empty() && paths_searched == 0 { - if !args.no_messages() { - eprint_nothing_searched(); - } - } - if args.stats() { - print_stats( - match_line_count, - paths_searched, - paths_matched, - start_time.elapsed(), - ); - } - Ok(match_line_count) + Ok(matched) } -fn run_files_parallel(args: Arc) -> Result { - let print_args = Arc::clone(&args); - let (tx, rx) = mpsc::channel::(); - let print_thread = thread::spawn(move || { - let mut printer = print_args.printer(print_args.stdout()); - let mut file_count = 0; - for dent in rx.iter() { - if !print_args.quiet() { - printer.path(dent.path()); - } - file_count += 1; +/// The top-level entry point for listing files without searching them. This +/// recursively steps through the file list (current directory by default) and +/// prints each path sequentially using multiple threads. +fn files_parallel(args: Args) -> Result { + use std::sync::atomic::AtomicBool; + use std::sync::atomic::Ordering::SeqCst; + use std::sync::mpsc; + use std::thread; + + let quit_after_match = args.quit_after_match()?; + let subject_builder = Arc::new(args.subject_builder()); + let mut path_printer = args.path_printer(args.stdout())?; + let matched = Arc::new(AtomicBool::new(false)); + let (tx, rx) = mpsc::channel::(); + + let print_thread = thread::spawn(move || -> io::Result<()> { + for subject in rx.iter() { + path_printer.write_path(subject.path())?; } - file_count + Ok(()) }); - args.walker_parallel().run(move || { - let args = Arc::clone(&args); + args.walker_parallel()?.run(|| { + let subject_builder = Arc::clone(&subject_builder); + let matched = Arc::clone(&matched); let tx = tx.clone(); + Box::new(move |result| { - if let Some(dent) = get_or_log_dir_entry( - result, - args.stdout_handle(), - args.files(), - args.no_messages(), - args.no_ignore_messages(), - ) { - tx.send(dent).unwrap(); - if args.quiet() { - return ignore::WalkState::Quit + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, + None => return WalkState::Continue, + }; + matched.store(true, SeqCst); + if quit_after_match { + WalkState::Quit + } else { + match tx.send(subject) { + Ok(_) => WalkState::Continue, + Err(_) => WalkState::Quit, } } - ignore::WalkState::Continue }) }); - Ok(print_thread.join().unwrap()) -} - -fn run_files_one_thread(args: &Arc) -> Result { - let mut printer = args.printer(args.stdout()); - let mut file_count = 0; - for result in args.walker() { - let dent = match get_or_log_dir_entry( - result, - args.stdout_handle(), - args.files(), - args.no_messages(), - args.no_ignore_messages(), - ) { - None => continue, - Some(dent) => dent, - }; - file_count += 1; - if args.quiet() { - break; - } else { - printer.path(dent.path()); + drop(tx); + if let Err(err) = print_thread.join().unwrap() { + // A broken pipe means graceful termination, so fall through. + // Otherwise, something bad happened while writing to stdout, so bubble + // it up. + if err.kind() != io::ErrorKind::BrokenPipe { + return Err(err.into()); } } - Ok(file_count) + Ok(matched.load(SeqCst)) } -fn run_types(args: &Arc) -> Result { - let mut printer = args.printer(args.stdout()); - let mut ty_count = 0; - for def in args.type_defs() { - printer.type_def(def); - ty_count += 1; - } - Ok(ty_count) -} +/// The top-level entry point for --type-list. +fn types(args: Args) -> Result { + let mut count = 0; + let mut stdout = args.stdout(); + for def in args.type_defs()? { + count += 1; + stdout.write_all(def.name().as_bytes())?; + stdout.write_all(b": ")?; -fn get_or_log_dir_entry( - result: result::Result, - stdout_handle: Option<&same_file::Handle>, - files_only: bool, - no_messages: bool, - no_ignore_messages: bool, -) -> Option { - match result { - Err(err) => { - if !no_messages { - eprintln!("{}", err); + let mut first = true; + for glob in def.globs() { + if !first { + stdout.write_all(b", ")?; } - None - } - Ok(dent) => { - if let Some(err) = dent.error() { - if !no_messages && !no_ignore_messages { - eprintln!("{}", err); - } - } - if dent.file_type().is_none() { - return Some(dent); // entry is stdin - } - // A depth of 0 means the user gave the path explicitly, so we - // should always try to search it. - if dent.depth() == 0 && !ignore_entry_is_dir(&dent) { - return Some(dent); - } else if !ignore_entry_is_file(&dent) { - return None; - } - // If we are redirecting stdout to a file, then don't search that - // file. - if !files_only && is_stdout_file(&dent, stdout_handle, no_messages) { - return None; - } - Some(dent) + stdout.write_all(glob.as_bytes())?; + first = false; } + stdout.write_all(b"\n")?; } -} - -/// Returns true if and only if the given `ignore::DirEntry` points to a -/// directory. -/// -/// This works around a bug in Rust's standard library: -/// https://github.com/rust-lang/rust/issues/46484 -#[cfg(windows)] -fn ignore_entry_is_dir(dent: &ignore::DirEntry) -> bool { - use std::os::windows::fs::MetadataExt; - use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY; - - dent.metadata().map(|md| { - md.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0 - }).unwrap_or(false) -} - -/// Returns true if and only if the given `ignore::DirEntry` points to a -/// directory. -#[cfg(not(windows))] -fn ignore_entry_is_dir(dent: &ignore::DirEntry) -> bool { - dent.file_type().map_or(false, |ft| ft.is_dir()) -} - -/// Returns true if and only if the given `ignore::DirEntry` points to a -/// file. -/// -/// This works around a bug in Rust's standard library: -/// https://github.com/rust-lang/rust/issues/46484 -#[cfg(windows)] -fn ignore_entry_is_file(dent: &ignore::DirEntry) -> bool { - !ignore_entry_is_dir(dent) -} - -/// Returns true if and only if the given `ignore::DirEntry` points to a -/// file. -#[cfg(not(windows))] -fn ignore_entry_is_file(dent: &ignore::DirEntry) -> bool { - dent.file_type().map_or(false, |ft| ft.is_file()) -} - -fn is_stdout_file( - dent: &ignore::DirEntry, - stdout_handle: Option<&same_file::Handle>, - no_messages: bool, -) -> bool { - let stdout_handle = match stdout_handle { - None => return false, - Some(stdout_handle) => stdout_handle, - }; - // If we know for sure that these two things aren't equal, then avoid - // the costly extra stat call to determine equality. - if !maybe_dent_eq_handle(dent, stdout_handle) { - return false; - } - match same_file::Handle::from_path(dent.path()) { - Ok(h) => stdout_handle == &h, - Err(err) => { - if !no_messages { - eprintln!("{}: {}", dent.path().display(), err); - } - false - } - } -} - -#[cfg(unix)] -fn maybe_dent_eq_handle( - dent: &ignore::DirEntry, - handle: &same_file::Handle, -) -> bool { - dent.ino() == Some(handle.ino()) -} - -#[cfg(not(unix))] -fn maybe_dent_eq_handle(_: &ignore::DirEntry, _: &same_file::Handle) -> bool { - true -} - -fn eprint_nothing_searched() { - eprintln!("No files were searched, which means ripgrep probably \ - applied a filter you didn't expect. \ - Try running again with --debug."); -} - -fn print_stats( - match_count: u64, - paths_searched: u64, - paths_matched: u64, - time_elapsed: Duration, -) { - let time_elapsed = - time_elapsed.as_secs() as f64 - + (time_elapsed.subsec_nanos() as f64 * 1e-9); - println!("\n{} matched lines\n\ - {} files contained matches\n\ - {} files searched\n\ - {:.3} seconds", match_count, paths_matched, - paths_searched, time_elapsed); -} - -// The Rust standard library suppresses the default SIGPIPE behavior, so that -// writing to a closed pipe doesn't kill the process. The goal is to instead -// handle errors through the normal result mechanism. Ripgrep needs some -// refactoring before it will be able to do that, however, so we re-enable the -// standard SIGPIPE behavior as a workaround. See -// https://github.com/BurntSushi/ripgrep/issues/200. -#[cfg(unix)] -fn reset_sigpipe() { - unsafe { - libc::signal(libc::SIGPIPE, libc::SIG_DFL); - } -} - -#[cfg(not(unix))] -fn reset_sigpipe() { - // no-op + Ok(count > 0) } diff --git a/src/messages.rs b/src/messages.rs new file mode 100644 index 00000000..2016ff64 --- /dev/null +++ b/src/messages.rs @@ -0,0 +1,50 @@ +use std::sync::atomic::{ATOMIC_BOOL_INIT, AtomicBool, Ordering}; + +static MESSAGES: AtomicBool = ATOMIC_BOOL_INIT; +static IGNORE_MESSAGES: AtomicBool = ATOMIC_BOOL_INIT; + +#[macro_export] +macro_rules! message { + ($($tt:tt)*) => { + if ::messages::messages() { + eprintln!($($tt)*); + } + } +} + +#[macro_export] +macro_rules! ignore_message { + ($($tt:tt)*) => { + if ::messages::messages() && ::messages::ignore_messages() { + eprintln!($($tt)*); + } + } +} + +/// Returns true if and only if messages should be shown. +pub fn messages() -> bool { + MESSAGES.load(Ordering::SeqCst) +} + +/// Set whether messages should be shown or not. +/// +/// By default, they are not shown. +pub fn set_messages(yes: bool) { + MESSAGES.store(yes, Ordering::SeqCst) +} + +/// Returns true if and only if "ignore" related messages should be shown. +pub fn ignore_messages() -> bool { + IGNORE_MESSAGES.load(Ordering::SeqCst) +} + +/// Set whether "ignore" related messages should be shown or not. +/// +/// By default, they are not shown. +/// +/// Note that this is overridden if `messages` is disabled. Namely, if +/// `messages` is disabled, then "ignore" messages are never shown, regardless +/// of this setting. +pub fn set_ignore_messages(yes: bool) { + IGNORE_MESSAGES.store(yes, Ordering::SeqCst) +} diff --git a/src/path_printer.rs b/src/path_printer.rs new file mode 100644 index 00000000..324a27c4 --- /dev/null +++ b/src/path_printer.rs @@ -0,0 +1,101 @@ +use std::io; +use std::path::Path; + +use grep::printer::{ColorSpecs, PrinterPath}; +use termcolor::WriteColor; + +/// A configuration for describing how paths should be written. +#[derive(Clone, Debug)] +struct Config { + colors: ColorSpecs, + separator: Option, + terminator: u8, +} + +impl Default for Config { + fn default() -> Config { + Config { + colors: ColorSpecs::default(), + separator: None, + terminator: b'\n', + } + } +} + +/// A builder for constructing things to search over. +#[derive(Clone, Debug)] +pub struct PathPrinterBuilder { + config: Config, +} + +impl PathPrinterBuilder { + /// Return a new subject builder with a default configuration. + pub fn new() -> PathPrinterBuilder { + PathPrinterBuilder { config: Config::default() } + } + + /// Create a new path printer with the current configuration that writes + /// paths to the given writer. + pub fn build(&self, wtr: W) -> PathPrinter { + PathPrinter { + config: self.config.clone(), + wtr: wtr, + } + } + + /// Set the color specification for this printer. + /// + /// Currently, only the `path` component of the given specification is + /// used. + pub fn color_specs( + &mut self, + specs: ColorSpecs, + ) -> &mut PathPrinterBuilder { + self.config.colors = specs; + self + } + + /// A path separator. + /// + /// When provided, the path's default separator will be replaced with + /// the given separator. + /// + /// This is not set by default, and the system's default path separator + /// will be used. + pub fn separator(&mut self, sep: Option) -> &mut PathPrinterBuilder { + self.config.separator = sep; + self + } + + /// A path terminator. + /// + /// When printing a path, it will be by terminated by the given byte. + /// + /// This is set to `\n` by default. + pub fn terminator(&mut self, terminator: u8) -> &mut PathPrinterBuilder { + self.config.terminator = terminator; + self + } +} + +/// A printer for emitting paths to a writer, with optional color support. +#[derive(Debug)] +pub struct PathPrinter { + config: Config, + wtr: W, +} + +impl PathPrinter { + /// Write the given path to the underlying writer. + pub fn write_path(&mut self, path: &Path) -> io::Result<()> { + let ppath = PrinterPath::with_separator(path, self.config.separator); + if !self.wtr.supports_color() { + self.wtr.write_all(ppath.as_bytes())?; + } else { + self.wtr.set_color(self.config.colors.path())?; + self.wtr.write_all(ppath.as_bytes())?; + self.wtr.reset()?; + } + self.wtr.write_all(&[self.config.terminator]) + } +} diff --git a/src/pathutil.rs b/src/pathutil.rs deleted file mode 100644 index 8d1c1510..00000000 --- a/src/pathutil.rs +++ /dev/null @@ -1,42 +0,0 @@ -/*! -The pathutil module provides platform specific operations on paths that are -typically faster than the same operations as provided in `std::path`. In -particular, we really want to avoid the costly operation of parsing the path -into its constituent components. We give up on Windows, but on Unix, we deal -with the raw bytes directly. - -On large repositories (like chromium), this can have a ~25% performance -improvement on just listing the files to search (!). -*/ -use std::path::Path; - -/// Strip `prefix` from the `path` and return the remainder. -/// -/// If `path` doesn't have a prefix `prefix`, then return `None`. -#[cfg(unix)] -pub fn strip_prefix<'a, P: AsRef + ?Sized>( - prefix: &'a P, - path: &'a Path, -) -> Option<&'a Path> { - use std::ffi::OsStr; - use std::os::unix::ffi::OsStrExt; - - let prefix = prefix.as_ref().as_os_str().as_bytes(); - let path = path.as_os_str().as_bytes(); - if prefix.len() > path.len() || prefix != &path[0..prefix.len()] { - None - } else { - Some(Path::new(OsStr::from_bytes(&path[prefix.len()..]))) - } -} - -/// Strip `prefix` from the `path` and return the remainder. -/// -/// If `path` doesn't have a prefix `prefix`, then return `None`. -#[cfg(not(unix))] -pub fn strip_prefix<'a, P: AsRef + ?Sized>( - prefix: &'a P, - path: &'a Path, -) -> Option<&'a Path> { - path.strip_prefix(prefix).ok() -} diff --git a/src/preprocessor.rs b/src/preprocessor.rs index bb464f86..07f66e2d 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -3,8 +3,6 @@ use std::io::{self, Read}; use std::path::{Path, PathBuf}; use std::process::{self, Stdio}; -use Result; - /// PreprocessorReader provides an `io::Read` impl to read kids output. #[derive(Debug)] pub struct PreprocessorReader { @@ -26,7 +24,7 @@ impl PreprocessorReader { pub fn from_cmd_path( cmd: PathBuf, path: &Path, - ) -> Result { + ) -> io::Result { let child = process::Command::new(&cmd) .arg(path) .stdin(Stdio::from(File::open(path)?)) @@ -34,10 +32,13 @@ impl PreprocessorReader { .stderr(Stdio::piped()) .spawn() .map_err(|err| { - format!( - "error running preprocessor command '{}': {}", - cmd.display(), - err, + io::Error::new( + io::ErrorKind::Other, + format!( + "error running preprocessor command '{}': {}", + cmd.display(), + err, + ), ) })?; Ok(PreprocessorReader { diff --git a/src/printer.rs b/src/printer.rs deleted file mode 100644 index 20fd1c4d..00000000 --- a/src/printer.rs +++ /dev/null @@ -1,928 +0,0 @@ -use std::error; -use std::fmt; -use std::path::Path; -use std::str::FromStr; - -use regex::bytes::{Captures, Match, Regex, Replacer}; -use termcolor::{Color, ColorSpec, ParseColorError, WriteColor}; - -use pathutil::strip_prefix; -use ignore::types::FileTypeDef; - -/// Track the start and end of replacements to allow coloring them on output. -#[derive(Debug)] -struct Offset { - start: usize, - end: usize, -} - -impl Offset { - fn new(start: usize, end: usize) -> Offset { - Offset { start: start, end: end } - } -} - -impl<'m, 'r> From<&'m Match<'r>> for Offset { - fn from(m: &'m Match<'r>) -> Self { - Offset{ start: m.start(), end: m.end() } - } -} - -/// `CountingReplacer` implements the Replacer interface for Regex, -/// and counts how often replacement is being performed. -struct CountingReplacer<'r> { - replace: &'r [u8], - count: &'r mut usize, - offsets: &'r mut Vec, -} - -impl<'r> CountingReplacer<'r> { - fn new( - replace: &'r [u8], - count: &'r mut usize, - offsets: &'r mut Vec, - ) -> CountingReplacer<'r> { - CountingReplacer { replace: replace, count: count, offsets: offsets, } - } -} - -impl<'r> Replacer for CountingReplacer<'r> { - fn replace_append(&mut self, caps: &Captures, dst: &mut Vec) { - *self.count += 1; - let start = dst.len(); - caps.expand(self.replace, dst); - let end = dst.len(); - if start != end { - self.offsets.push(Offset::new(start, end)); - } - } -} - -/// Printer encapsulates all output logic for searching. -/// -/// Note that we currently ignore all write errors. It's probably worthwhile -/// to fix this, but printers are only ever used for writes to stdout or -/// writes to memory, neither of which commonly fail. -pub struct Printer { - /// The underlying writer. - wtr: W, - /// Whether anything has been printed to wtr yet. - has_printed: bool, - /// Whether to show column numbers for the first match or not. - column: bool, - /// The string to use to separate non-contiguous runs of context lines. - context_separator: Vec, - /// The end-of-line terminator used by the printer. In general, eols are - /// printed via the match directly, but occasionally we need to insert them - /// ourselves (for example, to print a context separator). - eol: u8, - /// A file separator to show before any matches are printed. - file_separator: Option>, - /// Whether to show file name as a heading or not. - /// - /// N.B. If with_filename is false, then this setting has no effect. - heading: bool, - /// Whether to show every match on its own line. - line_per_match: bool, - /// Whether to print NUL bytes after a file path instead of new lines - /// or `:`. - null: bool, - /// Print only the matched (non-empty) parts of a matching line - only_matching: bool, - /// A string to use as a replacement of each match in a matching line. - replace: Option>, - /// Whether to prefix each match with the corresponding file name. - with_filename: bool, - /// The color specifications. - colors: ColorSpecs, - /// The separator to use for file paths. If empty, this is ignored. - path_separator: Option, - /// Restrict lines to this many columns. - max_columns: Option, -} - -impl Printer { - /// Create a new printer that writes to wtr with the given color settings. - pub fn new(wtr: W) -> Printer { - Printer { - wtr: wtr, - has_printed: false, - column: false, - context_separator: "--".to_string().into_bytes(), - eol: b'\n', - file_separator: None, - heading: false, - line_per_match: false, - null: false, - only_matching: false, - replace: None, - with_filename: false, - colors: ColorSpecs::default(), - path_separator: None, - max_columns: None, - } - } - - /// Set the color specifications. - pub fn colors(mut self, colors: ColorSpecs) -> Printer { - self.colors = colors; - self - } - - /// When set, column numbers will be printed for the first match on each - /// line. - pub fn column(mut self, yes: bool) -> Printer { - self.column = yes; - self - } - - /// Set the context separator. The default is `--`. - pub fn context_separator(mut self, sep: Vec) -> Printer { - self.context_separator = sep; - self - } - - /// Set the end-of-line terminator. The default is `\n`. - pub fn eol(mut self, eol: u8) -> Printer { - self.eol = eol; - self - } - - /// If set, the separator is printed before any matches. By default, no - /// separator is printed. - pub fn file_separator(mut self, sep: Vec) -> Printer { - self.file_separator = Some(sep); - self - } - - /// Whether to show file name as a heading or not. - /// - /// N.B. If with_filename is false, then this setting has no effect. - pub fn heading(mut self, yes: bool) -> Printer { - self.heading = yes; - self - } - - /// Whether to show every match on its own line. - pub fn line_per_match(mut self, yes: bool) -> Printer { - self.line_per_match = yes; - self - } - - /// Whether to cause NUL bytes to follow file paths instead of other - /// visual separators (like `:`, `-` and `\n`). - pub fn null(mut self, yes: bool) -> Printer { - self.null = yes; - self - } - - /// Print only the matched (non-empty) parts of a matching line - pub fn only_matching(mut self, yes: bool) -> Printer { - self.only_matching = yes; - self - } - - /// A separator to use when printing file paths. When empty, use the - /// default separator for the current platform. (/ on Unix, \ on Windows.) - pub fn path_separator(mut self, sep: Option) -> Printer { - self.path_separator = sep; - self - } - - /// Replace every match in each matching line with the replacement string - /// given. - pub fn replace(mut self, replacement: Vec) -> Printer { - self.replace = Some(replacement); - self - } - - /// When set, each match is prefixed with the file name that it came from. - pub fn with_filename(mut self, yes: bool) -> Printer { - self.with_filename = yes; - self - } - - /// Configure the max. number of columns used for printing matching lines. - pub fn max_columns(mut self, max_columns: Option) -> Printer { - self.max_columns = max_columns; - self - } - - /// Returns true if and only if something has been printed. - pub fn has_printed(&self) -> bool { - self.has_printed - } - - /// Flushes the underlying writer and returns it. - #[allow(dead_code)] - pub fn into_inner(mut self) -> W { - let _ = self.wtr.flush(); - self.wtr - } - - /// Prints a type definition. - pub fn type_def(&mut self, def: &FileTypeDef) { - self.write(def.name().as_bytes()); - self.write(b": "); - let mut first = true; - for glob in def.globs() { - if !first { - self.write(b", "); - } - self.write(glob.as_bytes()); - first = false; - } - self.write_eol(); - } - - /// Prints the given path. - pub fn path>(&mut self, path: P) { - let path = strip_prefix("./", path.as_ref()).unwrap_or(path.as_ref()); - self.write_path(path); - self.write_path_eol(); - } - - /// Prints the given path and a count of the number of matches found. - pub fn path_count>(&mut self, path: P, count: u64) { - if self.with_filename { - self.write_path(path); - self.write_path_sep(b':'); - } - self.write(count.to_string().as_bytes()); - self.write_eol(); - } - - /// Prints the context separator. - pub fn context_separate(&mut self) { - if self.context_separator.is_empty() { - return; - } - let _ = self.wtr.write_all(&self.context_separator); - self.write_eol(); - } - - pub fn matched>( - &mut self, - re: &Regex, - path: P, - buf: &[u8], - start: usize, - end: usize, - line_number: Option, - byte_offset: Option - ) { - if !self.line_per_match && !self.only_matching { - let mat = - if !self.needs_match() { - (0, 0) - } else { - re.find(&buf[start..end]) - .map(|m| (m.start(), m.end())) - .unwrap_or((0, 0)) - }; - return self.write_match( - re, path, buf, start, end, line_number, - byte_offset, mat.0, mat.1); - } - for m in re.find_iter(&buf[start..end]) { - self.write_match( - re, path.as_ref(), buf, start, end, line_number, - byte_offset, m.start(), m.end()); - } - } - - fn needs_match(&self) -> bool { - self.column - || self.replace.is_some() - || self.only_matching - } - - fn write_match>( - &mut self, - re: &Regex, - path: P, - buf: &[u8], - start: usize, - end: usize, - line_number: Option, - byte_offset: Option, - match_start: usize, - match_end: usize, - ) { - if self.heading && self.with_filename && !self.has_printed { - self.write_file_sep(); - self.write_path(path); - self.write_path_eol(); - } else if !self.heading && self.with_filename { - self.write_path(path); - self.write_path_sep(b':'); - } - if let Some(line_number) = line_number { - self.line_number(line_number, b':'); - } - if self.column { - self.column_number(match_start as u64 + 1, b':'); - } - if let Some(byte_offset) = byte_offset { - if self.only_matching { - self.write_byte_offset( - byte_offset + ((start + match_start) as u64), b':'); - } else { - self.write_byte_offset(byte_offset + (start as u64), b':'); - } - } - if self.replace.is_some() { - let mut count = 0; - let mut offsets = Vec::new(); - let line = { - let replacer = CountingReplacer::new( - self.replace.as_ref().unwrap(), &mut count, &mut offsets); - if self.only_matching { - re.replace_all( - &buf[start + match_start..start + match_end], replacer) - } else { - re.replace_all(&buf[start..end], replacer) - } - }; - if self.max_columns.map_or(false, |m| line.len() > m) { - let msg = format!( - "[Omitted long line with {} replacements]", count); - self.write_colored(msg.as_bytes(), |colors| colors.matched()); - self.write_eol(); - return; - } - self.write_matched_line(offsets, &*line, false); - } else { - let buf = if self.only_matching { - &buf[start + match_start..start + match_end] - } else { - &buf[start..end] - }; - if self.max_columns.map_or(false, |m| buf.len() > m) { - let count = re.find_iter(buf).count(); - let msg = format!("[Omitted long line with {} matches]", count); - self.write_colored(msg.as_bytes(), |colors| colors.matched()); - self.write_eol(); - return; - } - let only_match = self.only_matching; - self.write_matched_line( - re.find_iter(buf).map(|x| Offset::from(&x)), buf, only_match); - } - } - - fn write_matched_line(&mut self, offsets: I, buf: &[u8], only_match: bool) - where I: IntoIterator, - { - if !self.wtr.supports_color() || self.colors.matched().is_none() { - self.write(buf); - } else if only_match { - self.write_colored(buf, |colors| colors.matched()); - } else { - let mut last_written = 0; - for o in offsets { - self.write(&buf[last_written..o.start]); - // This conditional checks if the match is both empty *and* - // past the end of the line. In this case, we never want to - // emit an additional color escape. - if o.start != o.end || o.end != buf.len() { - self.write_colored( - &buf[o.start..o.end], |colors| colors.matched()); - } - last_written = o.end; - } - self.write(&buf[last_written..]); - } - if buf.last() != Some(&self.eol) { - self.write_eol(); - } - } - - pub fn context>( - &mut self, - path: P, - buf: &[u8], - start: usize, - end: usize, - line_number: Option, - byte_offset: Option, - ) { - if self.heading && self.with_filename && !self.has_printed { - self.write_file_sep(); - self.write_path(path); - self.write_path_eol(); - } else if !self.heading && self.with_filename { - self.write_path(path); - self.write_path_sep(b'-'); - } - if let Some(line_number) = line_number { - self.line_number(line_number, b'-'); - } - if let Some(byte_offset) = byte_offset { - self.write_byte_offset(byte_offset + (start as u64), b'-'); - } - if self.max_columns.map_or(false, |m| end - start > m) { - self.write(b"[Omitted long context line]"); - self.write_eol(); - return; - } - self.write(&buf[start..end]); - if buf[start..end].last() != Some(&self.eol) { - self.write_eol(); - } - } - - fn separator(&mut self, sep: &[u8]) { - self.write(sep); - } - - fn write_path_sep(&mut self, sep: u8) { - if self.null { - self.write(b"\x00"); - } else { - self.separator(&[sep]); - } - } - - fn write_path_eol(&mut self) { - if self.null { - self.write(b"\x00"); - } else { - self.write_eol(); - } - } - - #[cfg(unix)] - fn write_path>(&mut self, path: P) { - use std::os::unix::ffi::OsStrExt; - let path = path.as_ref().as_os_str().as_bytes(); - self.write_path_replace_separator(path); - } - - #[cfg(not(unix))] - fn write_path>(&mut self, path: P) { - let path = path.as_ref().to_string_lossy(); - self.write_path_replace_separator(path.as_bytes()); - } - - fn write_path_replace_separator(&mut self, path: &[u8]) { - match self.path_separator { - None => self.write_colored(path, |colors| colors.path()), - Some(sep) => { - let transformed_path: Vec<_> = path.iter().map(|&b| { - if b == b'/' || (cfg!(windows) && b == b'\\') { - sep - } else { - b - } - }).collect(); - self.write_colored(&transformed_path, |colors| colors.path()); - } - } - } - - fn line_number(&mut self, n: u64, sep: u8) { - let line_number = n.to_string(); - self.write_colored(line_number.as_bytes(), |colors| colors.line()); - self.separator(&[sep]); - } - - fn column_number(&mut self, n: u64, sep: u8) { - self.write_colored(n.to_string().as_bytes(), |colors| colors.column()); - self.separator(&[sep]); - } - - fn write_byte_offset(&mut self, o: u64, sep: u8) { - self.write_colored(o.to_string().as_bytes(), |colors| colors.column()); - self.separator(&[sep]); - } - - fn write(&mut self, buf: &[u8]) { - self.has_printed = true; - let _ = self.wtr.write_all(buf); - } - - fn write_eol(&mut self) { - let eol = self.eol; - self.write(&[eol]); - } - - fn write_colored(&mut self, buf: &[u8], get_color: F) - where F: Fn(&ColorSpecs) -> &ColorSpec - { - let _ = self.wtr.set_color(get_color(&self.colors)); - self.write(buf); - let _ = self.wtr.reset(); - } - - fn write_file_sep(&mut self) { - if let Some(ref sep) = self.file_separator { - self.has_printed = true; - let _ = self.wtr.write_all(sep); - let _ = self.wtr.write_all(b"\n"); - } - } -} - -/// An error that can occur when parsing color specifications. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Error { - /// This occurs when an unrecognized output type is used. - UnrecognizedOutType(String), - /// This occurs when an unrecognized spec type is used. - UnrecognizedSpecType(String), - /// This occurs when an unrecognized color name is used. - UnrecognizedColor(String, String), - /// This occurs when an unrecognized style attribute is used. - UnrecognizedStyle(String), - /// This occurs when the format of a color specification is invalid. - InvalidFormat(String), -} - -impl error::Error for Error { - fn description(&self) -> &str { - match *self { - Error::UnrecognizedOutType(_) => "unrecognized output type", - Error::UnrecognizedSpecType(_) => "unrecognized spec type", - Error::UnrecognizedColor(_, _) => "unrecognized color name", - Error::UnrecognizedStyle(_) => "unrecognized style attribute", - Error::InvalidFormat(_) => "invalid color spec", - } - } - - fn cause(&self) -> Option<&error::Error> { - None - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::UnrecognizedOutType(ref name) => { - write!(f, "Unrecognized output type '{}'. Choose from: \ - path, line, column, match.", name) - } - Error::UnrecognizedSpecType(ref name) => { - write!(f, "Unrecognized spec type '{}'. Choose from: \ - fg, bg, style, none.", name) - } - Error::UnrecognizedColor(_, ref msg) => { - write!(f, "{}", msg) - } - Error::UnrecognizedStyle(ref name) => { - write!(f, "Unrecognized style attribute '{}'. Choose from: \ - nobold, bold, nointense, intense, nounderline, \ - underline.", name) - } - Error::InvalidFormat(ref original) => { - write!( - f, - "Invalid color spec format: '{}'. Valid format \ - is '(path|line|column|match):(fg|bg|style):(value)'.", - original) - } - } - } -} - -impl From for Error { - fn from(err: ParseColorError) -> Error { - Error::UnrecognizedColor(err.invalid().to_string(), err.to_string()) - } -} - -/// A merged set of color specifications. -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct ColorSpecs { - path: ColorSpec, - line: ColorSpec, - column: ColorSpec, - matched: ColorSpec, -} - -/// A single color specification provided by the user. -/// -/// A `ColorSpecs` can be built by merging a sequence of `Spec`s. -/// -/// ## Example -/// -/// The only way to build a `Spec` is to parse it from a string. Once multiple -/// `Spec`s have been constructed, then can be merged into a single -/// `ColorSpecs` value. -/// -/// ```rust -/// use termcolor::{Color, ColorSpecs, Spec}; -/// -/// let spec1: Spec = "path:fg:blue".parse().unwrap(); -/// let spec2: Spec = "match:bg:green".parse().unwrap(); -/// let specs = ColorSpecs::new(&[spec1, spec2]); -/// -/// assert_eq!(specs.path().fg(), Some(Color::Blue)); -/// assert_eq!(specs.matched().bg(), Some(Color::Green)); -/// ``` -/// -/// ## Format -/// -/// The format of a `Spec` is a triple: `{type}:{attribute}:{value}`. Each -/// component is defined as follows: -/// -/// * `{type}` can be one of `path`, `line`, `column` or `match`. -/// * `{attribute}` can be one of `fg`, `bg` or `style`. `{attribute}` may also -/// be the special value `none`, in which case, `{value}` can be omitted. -/// * `{value}` is either a color name (for `fg`/`bg`) or a style instruction. -/// -/// `{type}` controls which part of the output should be styled and is -/// application dependent. -/// -/// When `{attribute}` is `none`, then this should cause any existing color -/// settings to be cleared. -/// -/// `{value}` should be a color when `{attribute}` is `fg` or `bg`, or it -/// should be a style instruction when `{attribute}` is `style`. When -/// `{attribute}` is `none`, `{value}` must be omitted. -/// -/// Valid colors are `black`, `blue`, `green`, `red`, `cyan`, `magenta`, -/// `yellow`, `white`. -/// -/// Valid style instructions are `nobold`, `bold`, `intense`, `nointense`, -/// `underline`, `nounderline`. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Spec { - ty: OutType, - value: SpecValue, -} - -/// The actual value given by the specification. -#[derive(Clone, Debug, Eq, PartialEq)] -enum SpecValue { - None, - Fg(Color), - Bg(Color), - Style(Style), -} - -/// The set of configurable portions of ripgrep's output. -#[derive(Clone, Debug, Eq, PartialEq)] -enum OutType { - Path, - Line, - Column, - Match, -} - -/// The specification type. -#[derive(Clone, Debug, Eq, PartialEq)] -enum SpecType { - Fg, - Bg, - Style, - None, -} - -/// The set of available styles for use in the terminal. -#[derive(Clone, Debug, Eq, PartialEq)] -enum Style { - Bold, - NoBold, - Intense, - NoIntense, - Underline, - NoUnderline -} - -impl ColorSpecs { - /// Create color specifications from a list of user supplied - /// specifications. - pub fn new(user_specs: &[Spec]) -> ColorSpecs { - let mut specs = ColorSpecs::default(); - for user_spec in user_specs { - match user_spec.ty { - OutType::Path => user_spec.merge_into(&mut specs.path), - OutType::Line => user_spec.merge_into(&mut specs.line), - OutType::Column => user_spec.merge_into(&mut specs.column), - OutType::Match => user_spec.merge_into(&mut specs.matched), - } - } - specs - } - - /// Return the color specification for coloring file paths. - fn path(&self) -> &ColorSpec { - &self.path - } - - /// Return the color specification for coloring line numbers. - fn line(&self) -> &ColorSpec { - &self.line - } - - /// Return the color specification for coloring column numbers. - fn column(&self) -> &ColorSpec { - &self.column - } - - /// Return the color specification for coloring matched text. - fn matched(&self) -> &ColorSpec { - &self.matched - } -} - -impl Spec { - /// Merge this spec into the given color specification. - fn merge_into(&self, cspec: &mut ColorSpec) { - self.value.merge_into(cspec); - } -} - -impl SpecValue { - /// Merge this spec value into the given color specification. - fn merge_into(&self, cspec: &mut ColorSpec) { - match *self { - SpecValue::None => cspec.clear(), - SpecValue::Fg(ref color) => { cspec.set_fg(Some(color.clone())); } - SpecValue::Bg(ref color) => { cspec.set_bg(Some(color.clone())); } - SpecValue::Style(ref style) => { - match *style { - Style::Bold => { cspec.set_bold(true); } - Style::NoBold => { cspec.set_bold(false); } - Style::Intense => { cspec.set_intense(true); } - Style::NoIntense => { cspec.set_intense(false); } - Style::Underline => { cspec.set_underline(true); } - Style::NoUnderline => { cspec.set_underline(false); } - } - } - } - } -} - -impl FromStr for Spec { - type Err = Error; - - fn from_str(s: &str) -> Result { - let pieces: Vec<&str> = s.split(':').collect(); - if pieces.len() <= 1 || pieces.len() > 3 { - return Err(Error::InvalidFormat(s.to_string())); - } - let otype: OutType = pieces[0].parse()?; - match pieces[1].parse()? { - SpecType::None => Ok(Spec { ty: otype, value: SpecValue::None }), - SpecType::Style => { - if pieces.len() < 3 { - return Err(Error::InvalidFormat(s.to_string())); - } - let style: Style = pieces[2].parse()?; - Ok(Spec { ty: otype, value: SpecValue::Style(style) }) - } - SpecType::Fg => { - if pieces.len() < 3 { - return Err(Error::InvalidFormat(s.to_string())); - } - let color: Color = pieces[2].parse()?; - Ok(Spec { ty: otype, value: SpecValue::Fg(color) }) - } - SpecType::Bg => { - if pieces.len() < 3 { - return Err(Error::InvalidFormat(s.to_string())); - } - let color: Color = pieces[2].parse()?; - Ok(Spec { ty: otype, value: SpecValue::Bg(color) }) - } - } - } -} - -impl FromStr for OutType { - type Err = Error; - - fn from_str(s: &str) -> Result { - match &*s.to_lowercase() { - "path" => Ok(OutType::Path), - "line" => Ok(OutType::Line), - "column" => Ok(OutType::Column), - "match" => Ok(OutType::Match), - _ => Err(Error::UnrecognizedOutType(s.to_string())), - } - } -} - -impl FromStr for SpecType { - type Err = Error; - - fn from_str(s: &str) -> Result { - match &*s.to_lowercase() { - "fg" => Ok(SpecType::Fg), - "bg" => Ok(SpecType::Bg), - "style" => Ok(SpecType::Style), - "none" => Ok(SpecType::None), - _ => Err(Error::UnrecognizedSpecType(s.to_string())), - } - } -} - -impl FromStr for Style { - type Err = Error; - - fn from_str(s: &str) -> Result { - match &*s.to_lowercase() { - "bold" => Ok(Style::Bold), - "nobold" => Ok(Style::NoBold), - "intense" => Ok(Style::Intense), - "nointense" => Ok(Style::NoIntense), - "underline" => Ok(Style::Underline), - "nounderline" => Ok(Style::NoUnderline), - _ => Err(Error::UnrecognizedStyle(s.to_string())), - } - } -} - -#[cfg(test)] -mod tests { - use termcolor::{Color, ColorSpec}; - use super::{ColorSpecs, Error, OutType, Spec, SpecValue, Style}; - - #[test] - fn merge() { - let user_specs: &[Spec] = &[ - "match:fg:blue".parse().unwrap(), - "match:none".parse().unwrap(), - "match:style:bold".parse().unwrap(), - ]; - let mut expect_matched = ColorSpec::new(); - expect_matched.set_bold(true); - assert_eq!(ColorSpecs::new(user_specs), ColorSpecs { - path: ColorSpec::default(), - line: ColorSpec::default(), - column: ColorSpec::default(), - matched: expect_matched, - }); - } - - #[test] - fn specs() { - let spec: Spec = "path:fg:blue".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Path, - value: SpecValue::Fg(Color::Blue), - }); - - let spec: Spec = "path:bg:red".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Path, - value: SpecValue::Bg(Color::Red), - }); - - let spec: Spec = "match:style:bold".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Match, - value: SpecValue::Style(Style::Bold), - }); - - let spec: Spec = "match:style:intense".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Match, - value: SpecValue::Style(Style::Intense), - }); - - let spec: Spec = "match:style:underline".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Match, - value: SpecValue::Style(Style::Underline), - }); - - let spec: Spec = "line:none".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Line, - value: SpecValue::None, - }); - - let spec: Spec = "column:bg:green".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Column, - value: SpecValue::Bg(Color::Green), - }); - } - - #[test] - fn spec_errors() { - let err = "line:nonee".parse::().unwrap_err(); - assert_eq!(err, Error::UnrecognizedSpecType("nonee".to_string())); - - let err = "".parse::().unwrap_err(); - assert_eq!(err, Error::InvalidFormat("".to_string())); - - let err = "foo".parse::().unwrap_err(); - assert_eq!(err, Error::InvalidFormat("foo".to_string())); - - let err = "line:style:italic".parse::().unwrap_err(); - assert_eq!(err, Error::UnrecognizedStyle("italic".to_string())); - - let err = "line:fg:brown".parse::().unwrap_err(); - match err { - Error::UnrecognizedColor(name, _) => assert_eq!(name, "brown"), - err => assert!(false, "unexpected error: {:?}", err), - } - - let err = "foo:fg:brown".parse::().unwrap_err(); - assert_eq!(err, Error::UnrecognizedOutType("foo".to_string())); - } -} diff --git a/src/search.rs b/src/search.rs new file mode 100644 index 00000000..45f7cf87 --- /dev/null +++ b/src/search.rs @@ -0,0 +1,408 @@ +use std::io; +use std::path::{Path, PathBuf}; +use std::time::Duration; + +use grep::matcher::Matcher; +#[cfg(feature = "pcre2")] +use grep::pcre2::{RegexMatcher as PCRE2RegexMatcher}; +use grep::printer::{JSON, Standard, Summary, Stats}; +use grep::regex::{RegexMatcher as RustRegexMatcher}; +use grep::searcher::Searcher; +use serde_json as json; +use termcolor::WriteColor; + +use decompressor::{DecompressionReader, is_compressed}; +use preprocessor::PreprocessorReader; +use subject::Subject; + +/// The configuration for the search worker. Among a few other things, the +/// configuration primarily controls the way we show search results to users +/// at a very high level. +#[derive(Clone, Debug)] +struct Config { + json_stats: bool, + preprocessor: Option, + search_zip: bool, +} + +impl Default for Config { + fn default() -> Config { + Config { + json_stats: false, + preprocessor: None, + search_zip: false, + } + } +} + +/// A builder for configuring and constructing a search worker. +#[derive(Clone, Debug)] +pub struct SearchWorkerBuilder { + config: Config, +} + +impl Default for SearchWorkerBuilder { + fn default() -> SearchWorkerBuilder { + SearchWorkerBuilder::new() + } +} + +impl SearchWorkerBuilder { + /// Create a new builder for configuring and constructing a search worker. + pub fn new() -> SearchWorkerBuilder { + SearchWorkerBuilder { config: Config::default() } + } + + /// Create a new search worker using the given searcher, matcher and + /// printer. + pub fn build( + &self, + matcher: PatternMatcher, + searcher: Searcher, + printer: Printer, + ) -> SearchWorker { + let config = self.config.clone(); + SearchWorker { config, matcher, searcher, printer } + } + + /// Forcefully use JSON to emit statistics, even if the underlying printer + /// is not the JSON printer. + /// + /// This is useful for implementing flag combinations like + /// `--json --quiet`, which uses the summary printer for implementing + /// `--quiet` but still wants to emit summary statistics, which should + /// be JSON formatted because of the `--json` flag. + pub fn json_stats(&mut self, yes: bool) -> &mut SearchWorkerBuilder { + self.config.json_stats = yes; + self + } + + /// Set the path to a preprocessor command. + /// + /// When this is set, instead of searching files directly, the given + /// command will be run with the file path as the first argument, and the + /// output of that command will be searched instead. + pub fn preprocessor( + &mut self, + cmd: Option, + ) -> &mut SearchWorkerBuilder { + self.config.preprocessor = cmd; + self + } + + /// Enable the decompression and searching of common compressed files. + /// + /// When enabled, if a particular file path is recognized as a compressed + /// file, then it is decompressed before searching. + /// + /// Note that if a preprocessor command is set, then it overrides this + /// setting. + pub fn search_zip(&mut self, yes: bool) -> &mut SearchWorkerBuilder { + self.config.search_zip = yes; + self + } +} + +/// The result of executing a search. +/// +/// Generally speaking, the "result" of a search is sent to a printer, which +/// writes results to an underlying writer such as stdout or a file. However, +/// every search also has some aggregate statistics or meta data that may be +/// useful to higher level routines. +#[derive(Clone, Debug, Default)] +pub struct SearchResult { + has_match: bool, + stats: Option, +} + +impl SearchResult { + /// Whether the search found a match or not. + pub fn has_match(&self) -> bool { + self.has_match + } + + /// Return aggregate search statistics for a single search, if available. + /// + /// It can be expensive to compute statistics, so these are only present + /// if explicitly enabled in the printer provided by the caller. + pub fn stats(&self) -> Option<&Stats> { + self.stats.as_ref() + } +} + +/// The pattern matcher used by a search worker. +#[derive(Clone, Debug)] +pub enum PatternMatcher { + RustRegex(RustRegexMatcher), + #[cfg(feature = "pcre2")] + PCRE2(PCRE2RegexMatcher), +} + +/// The printer used by a search worker. +/// +/// The `W` type parameter refers to the type of the underlying writer. +#[derive(Debug)] +pub enum Printer { + /// Use the standard printer, which supports the classic grep-like format. + Standard(Standard), + /// Use the summary printer, which supports aggregate displays of search + /// results. + Summary(Summary), + /// A JSON printer, which emits results in the JSON Lines format. + JSON(JSON), +} + +impl Printer { + fn print_stats( + &mut self, + total_duration: Duration, + stats: &Stats, + ) -> io::Result<()> { + match *self { + Printer::JSON(_) => { + self.print_stats_json(total_duration, stats) + } + Printer::Standard(_) | Printer::Summary(_) => { + self.print_stats_human(total_duration, stats) + } + } + } + + fn print_stats_human( + &mut self, + total_duration: Duration, + stats: &Stats, + ) -> io::Result<()> { + write!( + self.get_mut(), + " +{matches} matches +{lines} matched lines +{searches_with_match} files contained matches +{searches} files searched +{bytes_printed} bytes printed +{bytes_searched} bytes searched +{search_time:0.6} seconds spent searching +{process_time:0.6} seconds +", + matches = stats.matches(), + lines = stats.matched_lines(), + searches_with_match = stats.searches_with_match(), + searches = stats.searches(), + bytes_printed = stats.bytes_printed(), + bytes_searched = stats.bytes_searched(), + search_time = fractional_seconds(stats.elapsed()), + process_time = fractional_seconds(total_duration) + ) + } + + fn print_stats_json( + &mut self, + total_duration: Duration, + stats: &Stats, + ) -> io::Result<()> { + // We specifically match the format laid out by the JSON printer in + // the grep-printer crate. We simply "extend" it with the 'summary' + // message type. + let fractional = fractional_seconds(total_duration); + json::to_writer(self.get_mut(), &json!({ + "type": "summary", + "data": { + "stats": stats, + "elapsed_total": { + "secs": total_duration.as_secs(), + "nanos": total_duration.subsec_nanos(), + "human": format!("{:0.6}s", fractional), + }, + } + }))?; + write!(self.get_mut(), "\n") + } + + /// Return a mutable reference to the underlying printer's writer. + pub fn get_mut(&mut self) -> &mut W { + match *self { + Printer::Standard(ref mut p) => p.get_mut(), + Printer::Summary(ref mut p) => p.get_mut(), + Printer::JSON(ref mut p) => p.get_mut(), + } + } +} + +/// A worker for executing searches. +/// +/// It is intended for a single worker to execute many searches, and is +/// generally intended to be used from a single thread. When searching using +/// multiple threads, it is better to create a new worker for each thread. +#[derive(Debug)] +pub struct SearchWorker { + config: Config, + matcher: PatternMatcher, + searcher: Searcher, + printer: Printer, +} + +impl SearchWorker { + /// Execute a search over the given subject. + pub fn search(&mut self, subject: &Subject) -> io::Result { + self.search_impl(subject) + } + + /// Return a mutable reference to the underlying printer. + pub fn printer(&mut self) -> &mut Printer { + &mut self.printer + } + + /// Print the given statistics to the underlying writer in a way that is + /// consistent with this searcher's printer's format. + /// + /// While `Stats` contains a duration itself, this only corresponds to the + /// time spent searching, where as `total_duration` should roughly + /// approximate the lifespan of the ripgrep process itself. + pub fn print_stats( + &mut self, + total_duration: Duration, + stats: &Stats, + ) -> io::Result<()> { + if self.config.json_stats { + self.printer().print_stats_json(total_duration, stats) + } else { + self.printer().print_stats(total_duration, stats) + } + } + + /// Search the given subject using the appropriate strategy. + fn search_impl(&mut self, subject: &Subject) -> io::Result { + let path = subject.path(); + if subject.is_stdin() { + let stdin = io::stdin(); + // A `return` here appeases the borrow checker. NLL will fix this. + return self.search_reader(path, stdin.lock()); + } else if self.config.preprocessor.is_some() { + let cmd = self.config.preprocessor.clone().unwrap(); + let rdr = PreprocessorReader::from_cmd_path(cmd, path)?; + self.search_reader(path, rdr) + } else if self.config.search_zip && is_compressed(path) { + match DecompressionReader::from_path(path) { + None => Ok(SearchResult::default()), + Some(rdr) => self.search_reader(path, rdr), + } + } else { + self.search_path(path) + } + } + + /// Search the contents of the given file path. + fn search_path(&mut self, path: &Path) -> io::Result { + use self::PatternMatcher::*; + + let (searcher, printer) = (&mut self.searcher, &mut self.printer); + match self.matcher { + RustRegex(ref m) => search_path(m, searcher, printer, path), + #[cfg(feature = "pcre2")] + PCRE2(ref m) => search_path(m, searcher, printer, path), + } + } + + /// Executes a search on the given reader, which may or may not correspond + /// directly to the contents of the given file path. Instead, the reader + /// may actually cause something else to be searched (for example, when + /// a preprocessor is set or when decompression is enabled). In those + /// cases, the file path is used for visual purposes only. + /// + /// Generally speaking, this method should only be used when there is no + /// other choice. Searching via `search_path` provides more opportunities + /// for optimizations (such as memory maps). + fn search_reader( + &mut self, + path: &Path, + rdr: R, + ) -> io::Result { + use self::PatternMatcher::*; + + let (searcher, printer) = (&mut self.searcher, &mut self.printer); + match self.matcher { + RustRegex(ref m) => search_reader(m, searcher, printer, path, rdr), + #[cfg(feature = "pcre2")] + PCRE2(ref m) => search_reader(m, searcher, printer, path, rdr), + } + } +} + +/// Search the contents of the given file path using the given matcher, +/// searcher and printer. +fn search_path( + matcher: M, + searcher: &mut Searcher, + printer: &mut Printer, + path: &Path, +) -> io::Result { + match *printer { + Printer::Standard(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::Summary(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::JSON(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: Some(sink.stats().clone()), + }) + } + } +} + +/// Search the contents of the given reader using the given matcher, searcher +/// and printer. +fn search_reader( + matcher: M, + searcher: &mut Searcher, + printer: &mut Printer, + path: &Path, + rdr: R, +) -> io::Result { + match *printer { + Printer::Standard(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::Summary(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::JSON(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: Some(sink.stats().clone()), + }) + } + } +} + +/// Return the given duration as fractional seconds. +fn fractional_seconds(duration: Duration) -> f64 { + (duration.as_secs() as f64) + (duration.subsec_nanos() as f64 * 1e-9) +} diff --git a/src/search_buffer.rs b/src/search_buffer.rs deleted file mode 100644 index 2777a06c..00000000 --- a/src/search_buffer.rs +++ /dev/null @@ -1,424 +0,0 @@ -/*! -The `search_buffer` module is responsible for searching a single file all in a -single buffer. Typically, the source of the buffer is a memory map. This can -be useful for when memory maps are faster than streaming search. - -Note that this module doesn't quite support everything that `search_stream` -does. Notably, showing contexts. -*/ -use std::cmp; -use std::path::Path; - -use grep::Grep; -use termcolor::WriteColor; - -use printer::Printer; -use search_stream::{IterLines, Options, count_lines, is_binary}; - -pub struct BufferSearcher<'a, W: 'a> { - opts: Options, - printer: &'a mut Printer, - grep: &'a Grep, - path: &'a Path, - buf: &'a [u8], - match_line_count: u64, - match_count: Option, - line_count: Option, - byte_offset: Option, - last_line: usize, -} - -impl<'a, W: WriteColor> BufferSearcher<'a, W> { - pub fn new( - printer: &'a mut Printer, - grep: &'a Grep, - path: &'a Path, - buf: &'a [u8], - ) -> BufferSearcher<'a, W> { - BufferSearcher { - opts: Options::default(), - printer: printer, - grep: grep, - path: path, - buf: buf, - match_line_count: 0, - match_count: None, - line_count: None, - byte_offset: None, - last_line: 0, - } - } - - /// If enabled, searching will print a 0-based offset of the - /// matching line (or the actual match if -o is specified) before - /// printing the line itself. - /// - /// Disabled by default. - pub fn byte_offset(mut self, yes: bool) -> Self { - self.opts.byte_offset = yes; - self - } - - /// If enabled, searching will print a count instead of each match. - /// - /// Disabled by default. - pub fn count(mut self, yes: bool) -> Self { - self.opts.count = yes; - self - } - - /// If enabled, searching will print the count of individual matches - /// instead of each match. - /// - /// Disabled by default. - pub fn count_matches(mut self, yes: bool) -> Self { - self.opts.count_matches = yes; - self - } - - /// If enabled, searching will print the path instead of each match. - /// - /// Disabled by default. - pub fn files_with_matches(mut self, yes: bool) -> Self { - self.opts.files_with_matches = yes; - self - } - - /// If enabled, searching will print the path of files that *don't* match - /// the given pattern. - /// - /// Disabled by default. - pub fn files_without_matches(mut self, yes: bool) -> Self { - self.opts.files_without_matches = yes; - self - } - - /// Set the end-of-line byte used by this searcher. - pub fn eol(mut self, eol: u8) -> Self { - self.opts.eol = eol; - self - } - - /// If enabled, matching is inverted so that lines that *don't* match the - /// given pattern are treated as matches. - pub fn invert_match(mut self, yes: bool) -> Self { - self.opts.invert_match = yes; - self - } - - /// If enabled, compute line numbers and prefix each line of output with - /// them. - pub fn line_number(mut self, yes: bool) -> Self { - self.opts.line_number = yes; - self - } - - /// Limit the number of matches to the given count. - /// - /// The default is None, which corresponds to no limit. - pub fn max_count(mut self, count: Option) -> Self { - self.opts.max_count = count; - self - } - - /// If enabled, don't show any output and quit searching after the first - /// match is found. - pub fn quiet(mut self, yes: bool) -> Self { - self.opts.quiet = yes; - self - } - - /// If enabled, search binary files as if they were text. - pub fn text(mut self, yes: bool) -> Self { - self.opts.text = yes; - self - } - - #[inline(never)] - pub fn run(mut self) -> u64 { - let binary_upto = cmp::min(10_240, self.buf.len()); - if !self.opts.text && is_binary(&self.buf[..binary_upto], true) { - return 0; - } - - self.match_line_count = 0; - self.line_count = if self.opts.line_number { Some(0) } else { None }; - // The memory map searcher uses one contiguous block of bytes, so the - // offsets given the printer are sufficient to compute the byte offset. - self.byte_offset = if self.opts.byte_offset { Some(0) } else { None }; - self.match_count = if self.opts.count_matches { Some(0) } else { None }; - let mut last_end = 0; - for m in self.grep.iter(self.buf) { - if self.opts.invert_match { - self.print_inverted_matches(last_end, m.start()); - } else { - self.print_match(m.start(), m.end()); - } - last_end = m.end(); - if self.opts.terminate(self.match_line_count) { - break; - } - } - if self.opts.invert_match && !self.opts.terminate(self.match_line_count) { - let upto = self.buf.len(); - self.print_inverted_matches(last_end, upto); - } - if self.opts.count && self.match_line_count > 0 { - self.printer.path_count(self.path, self.match_line_count); - } else if self.opts.count_matches - && self.match_count.map_or(false, |c| c > 0) - { - self.printer.path_count(self.path, self.match_count.unwrap()); - } - if self.opts.files_with_matches && self.match_line_count > 0 { - self.printer.path(self.path); - } - if self.opts.files_without_matches && self.match_line_count == 0 { - self.printer.path(self.path); - } - self.match_line_count - } - - #[inline(always)] - fn count_individual_matches(&mut self, start: usize, end: usize) { - if let Some(ref mut count) = self.match_count { - for _ in self.grep.regex().find_iter(&self.buf[start..end]) { - *count += 1; - } - } - } - - #[inline(always)] - pub fn print_match(&mut self, start: usize, end: usize) { - self.match_line_count += 1; - self.count_individual_matches(start, end); - if self.opts.skip_matches() { - return; - } - self.count_lines(start); - self.add_line(end); - self.printer.matched( - self.grep.regex(), self.path, self.buf, - start, end, self.line_count, self.byte_offset); - } - - #[inline(always)] - fn print_inverted_matches(&mut self, start: usize, end: usize) { - debug_assert!(self.opts.invert_match); - let mut it = IterLines::new(self.opts.eol, start); - while let Some((s, e)) = it.next(&self.buf[..end]) { - if self.opts.terminate(self.match_line_count) { - return; - } - self.print_match(s, e); - } - } - - #[inline(always)] - fn count_lines(&mut self, upto: usize) { - if let Some(ref mut line_count) = self.line_count { - *line_count += count_lines( - &self.buf[self.last_line..upto], self.opts.eol); - self.last_line = upto; - } - } - - #[inline(always)] - fn add_line(&mut self, line_end: usize) { - if let Some(ref mut line_count) = self.line_count { - *line_count += 1; - self.last_line = line_end; - } - } -} - -#[cfg(test)] -mod tests { - use std::path::Path; - - use grep::GrepBuilder; - - use printer::Printer; - use termcolor; - - use super::BufferSearcher; - - const SHERLOCK: &'static str = "\ -For the Doctor Watsons of this world, as opposed to the Sherlock -Holmeses, success in the province of detective work must always -be, to a very large extent, the result of luck. Sherlock Holmes -can extract a clew from a wisp of straw or a flake of cigar ash; -but Doctor Watson has to have it taken out for him and dusted, -and exhibited clearly, with a label attached.\ -"; - - fn test_path() -> &'static Path { - &Path::new("/baz.rs") - } - - type TestSearcher<'a> = BufferSearcher<'a, termcolor::NoColor>>; - - fn search TestSearcher>( - pat: &str, - haystack: &str, - mut map: F, - ) -> (u64, String) { - let outbuf = termcolor::NoColor::new(vec![]); - let mut pp = Printer::new(outbuf).with_filename(true); - let grep = GrepBuilder::new(pat).build().unwrap(); - let count = { - let searcher = BufferSearcher::new( - &mut pp, &grep, test_path(), haystack.as_bytes()); - map(searcher).run() - }; - (count, String::from_utf8(pp.into_inner().into_inner()).unwrap()) - } - - #[test] - fn basic_search() { - let (count, out) = search("Sherlock", SHERLOCK, |s|s); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn binary() { - let text = "Sherlock\n\x00Holmes\n"; - let (count, out) = search("Sherlock|Holmes", text, |s|s); - assert_eq!(0, count); - assert_eq!(out, ""); - } - - - #[test] - fn binary_text() { - let text = "Sherlock\n\x00Holmes\n"; - let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true)); - assert_eq!(2, count); - assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n"); - } - - #[test] - fn line_numbers() { - let (count, out) = search( - "Sherlock", SHERLOCK, |s| s.line_number(true)); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn byte_offset() { - let (_, out) = search( - "Sherlock", SHERLOCK, |s| s.byte_offset(true)); - assert_eq!(out, "\ -/baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn byte_offset_inverted() { - let (_, out) = search("Sherlock", SHERLOCK, |s| { - s.invert_match(true).byte_offset(true) - }); - assert_eq!(out, "\ -/baz.rs:65:Holmeses, success in the province of detective work must always -/baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:258:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:321:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn count() { - let (count, out) = search( - "Sherlock", SHERLOCK, |s| s.count(true)); - assert_eq!(2, count); - assert_eq!(out, "/baz.rs:2\n"); - } - - #[test] - fn count_matches() { - let (_, out) = search( - "the", SHERLOCK, |s| s.count_matches(true)); - assert_eq!(out, "/baz.rs:4\n"); - } - - #[test] - fn files_with_matches() { - let (count, out) = search( - "Sherlock", SHERLOCK, |s| s.files_with_matches(true)); - assert_eq!(1, count); - assert_eq!(out, "/baz.rs\n"); - } - - #[test] - fn files_without_matches() { - let (count, out) = search( - "zzzz", SHERLOCK, |s| s.files_without_matches(true)); - assert_eq!(0, count); - assert_eq!(out, "/baz.rs\n"); - } - - #[test] - fn max_count() { - let (count, out) = search( - "Sherlock", SHERLOCK, |s| s.max_count(Some(1))); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -"); - } - - #[test] - fn invert_match_max_count() { - let (count, out) = search( - "zzzz", SHERLOCK, |s| s.invert_match(true).max_count(Some(1))); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -"); - } - - #[test] - fn invert_match() { - let (count, out) = search( - "Sherlock", SHERLOCK, |s| s.invert_match(true)); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs:Holmeses, success in the province of detective work must always -/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn invert_match_line_numbers() { - let (count, out) = search("Sherlock", SHERLOCK, |s| { - s.invert_match(true).line_number(true) - }); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn invert_match_count() { - let (count, out) = search("Sherlock", SHERLOCK, |s| { - s.invert_match(true).count(true) - }); - assert_eq!(4, count); - assert_eq!(out, "/baz.rs:4\n"); - } -} diff --git a/src/search_stream.rs b/src/search_stream.rs deleted file mode 100644 index b218dd19..00000000 --- a/src/search_stream.rs +++ /dev/null @@ -1,1466 +0,0 @@ -/*! -The `search_stream` module is responsible for searching a single file and -printing matches. In particular, it searches the file in a streaming fashion -using `read` calls and a (roughly) fixed size buffer. -*/ - -use std::cmp; -use std::error::Error as StdError; -use std::fmt; -use std::io; -use std::path::{Path, PathBuf}; - -use bytecount; -use grep::{Grep, Match}; -use memchr::{memchr, memrchr}; -use termcolor::WriteColor; - -use printer::Printer; - -/// The default read size (capacity of input buffer). -const READ_SIZE: usize = 8 * (1<<10); - -/// Error describes errors that can occur while searching. -#[derive(Debug)] -pub enum Error { - /// A standard I/O error attached to a particular file path. - Io { - err: io::Error, - path: PathBuf, - } -} - -impl Error { - fn from_io>(err: io::Error, path: P) -> Error { - Error::Io { err: err, path: path.as_ref().to_path_buf() } - } -} - -impl StdError for Error { - fn description(&self) -> &str { - match *self { - Error::Io { ref err, .. } => err.description(), - } - } - - fn cause(&self) -> Option<&StdError> { - match *self { - Error::Io { ref err, .. } => Some(err), - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::Io { ref err, ref path } => { - write!(f, "{}: {}", path.display(), err) - } - } - } -} - -pub struct Searcher<'a, R, W: 'a> { - opts: Options, - inp: &'a mut InputBuffer, - printer: &'a mut Printer, - grep: &'a Grep, - path: &'a Path, - haystack: R, - match_line_count: u64, - match_count: Option, - line_count: Option, - byte_offset: Option, - last_match: Match, - last_printed: usize, - last_line: usize, - after_context_remaining: usize, -} - -/// Options for configuring search. -#[derive(Clone)] -pub struct Options { - pub after_context: usize, - pub before_context: usize, - pub byte_offset: bool, - pub count: bool, - pub count_matches: bool, - pub files_with_matches: bool, - pub files_without_matches: bool, - pub eol: u8, - pub invert_match: bool, - pub line_number: bool, - pub max_count: Option, - pub quiet: bool, - pub text: bool, -} - -impl Default for Options { - fn default() -> Options { - Options { - after_context: 0, - before_context: 0, - byte_offset: false, - count: false, - count_matches: false, - files_with_matches: false, - files_without_matches: false, - eol: b'\n', - invert_match: false, - line_number: false, - max_count: None, - quiet: false, - text: false, - } - } - -} - -impl Options { - /// Several options (--quiet, --count, --count-matches, --files-with-matches, - /// --files-without-match) imply that we shouldn't ever display matches. - pub fn skip_matches(&self) -> bool { - self.count || self.files_with_matches || self.files_without_matches - || self.quiet || self.count_matches - } - - /// Some options (--quiet, --files-with-matches, --files-without-match) - /// imply that we can stop searching after the first match. - pub fn stop_after_first_match(&self) -> bool { - self.files_with_matches || self.files_without_matches || self.quiet - } - - /// Returns true if the search should terminate based on the match line count. - pub fn terminate(&self, match_line_count: u64) -> bool { - if match_line_count > 0 && self.stop_after_first_match() { - return true; - } - if self.max_count.map_or(false, |max| match_line_count >= max) { - return true; - } - false - } -} - -impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { - /// Create a new searcher. - /// - /// `inp` is a reusable input buffer that is used as scratch space by this - /// searcher. - /// - /// `printer` is used to output all results of searching. - /// - /// `grep` is the actual matcher. - /// - /// `path` is the file path being searched. - /// - /// `haystack` is a reader of text to search. - pub fn new( - inp: &'a mut InputBuffer, - printer: &'a mut Printer, - grep: &'a Grep, - path: &'a Path, - haystack: R, - ) -> Searcher<'a, R, W> { - Searcher { - opts: Options::default(), - inp: inp, - printer: printer, - grep: grep, - path: path, - haystack: haystack, - match_line_count: 0, - match_count: None, - line_count: None, - byte_offset: None, - last_match: Match::default(), - last_printed: 0, - last_line: 0, - after_context_remaining: 0, - } - } - - /// The number of contextual lines to show after each match. The default - /// is zero. - pub fn after_context(mut self, count: usize) -> Self { - self.opts.after_context = count; - self - } - - /// The number of contextual lines to show before each match. The default - /// is zero. - pub fn before_context(mut self, count: usize) -> Self { - self.opts.before_context = count; - self - } - - /// If enabled, searching will print a 0-based offset of the - /// matching line (or the actual match if -o is specified) before - /// printing the line itself. - /// - /// Disabled by default. - pub fn byte_offset(mut self, yes: bool) -> Self { - self.opts.byte_offset = yes; - self - } - - /// If enabled, searching will print a count instead of each match. - /// - /// Disabled by default. - pub fn count(mut self, yes: bool) -> Self { - self.opts.count = yes; - self - } - - /// If enabled, searching will print the count of individual matches - /// instead of each match. - /// - /// Disabled by default. - pub fn count_matches(mut self, yes: bool) -> Self { - self.opts.count_matches = yes; - self - } - - /// If enabled, searching will print the path instead of each match. - /// - /// Disabled by default. - pub fn files_with_matches(mut self, yes: bool) -> Self { - self.opts.files_with_matches = yes; - self - } - - /// If enabled, searching will print the path of files without any matches. - /// - /// Disabled by default. - pub fn files_without_matches(mut self, yes: bool) -> Self { - self.opts.files_without_matches = yes; - self - } - - /// Set the end-of-line byte used by this searcher. - pub fn eol(mut self, eol: u8) -> Self { - self.opts.eol = eol; - self - } - - /// If enabled, matching is inverted so that lines that *don't* match the - /// given pattern are treated as matches. - pub fn invert_match(mut self, yes: bool) -> Self { - self.opts.invert_match = yes; - self - } - - /// If enabled, compute line numbers and prefix each line of output with - /// them. - pub fn line_number(mut self, yes: bool) -> Self { - self.opts.line_number = yes; - self - } - - /// Limit the number of matches to the given count. - /// - /// The default is None, which corresponds to no limit. - pub fn max_count(mut self, count: Option) -> Self { - self.opts.max_count = count; - self - } - - /// If enabled, don't show any output and quit searching after the first - /// match is found. - pub fn quiet(mut self, yes: bool) -> Self { - self.opts.quiet = yes; - self - } - - /// If enabled, search binary files as if they were text. - pub fn text(mut self, yes: bool) -> Self { - self.opts.text = yes; - self.inp.text(yes); - self - } - - /// Execute the search. Results are written to the printer and the total - /// number of matches is returned. - #[inline(never)] - pub fn run(mut self) -> Result { - self.inp.reset(); - self.match_line_count = 0; - self.line_count = if self.opts.line_number { Some(0) } else { None }; - self.byte_offset = if self.opts.byte_offset { Some(0) } else { None }; - self.match_count = if self.opts.count_matches { Some(0) } else { None }; - self.last_match = Match::default(); - self.after_context_remaining = 0; - while !self.terminate() { - let upto = self.inp.lastnl; - self.print_after_context(upto); - if !self.fill()? { - break; - } - while !self.terminate() && self.inp.pos < self.inp.lastnl { - let matched = self.grep.read_match( - &mut self.last_match, - &self.inp.buf[..self.inp.lastnl], - self.inp.pos); - if self.opts.invert_match { - let upto = - if matched { - self.last_match.start() - } else { - self.inp.lastnl - }; - if upto > self.inp.pos { - let upto_context = self.inp.pos; - self.print_after_context(upto_context); - self.print_before_context(upto_context); - self.print_inverted_matches(upto); - } - } else if matched { - let start = self.last_match.start(); - let end = self.last_match.end(); - self.print_after_context(start); - self.print_before_context(start); - self.print_match(start, end); - } - if matched { - self.inp.pos = self.last_match.end(); - } else { - self.inp.pos = self.inp.lastnl; - } - } - } - if self.after_context_remaining > 0 { - if self.last_printed == self.inp.lastnl { - self.fill()?; - } - let upto = self.inp.lastnl; - if upto > 0 { - self.print_after_context(upto); - } - } - if self.match_line_count > 0 { - if self.opts.count { - self.printer.path_count(self.path, self.match_line_count); - } else if self.opts.count_matches { - self.printer.path_count(self.path, self.match_count.unwrap()); - } else if self.opts.files_with_matches { - self.printer.path(self.path); - } - } else if self.opts.files_without_matches { - self.printer.path(self.path); - } - Ok(self.match_line_count) - } - - #[inline(always)] - fn terminate(&self) -> bool { - self.opts.terminate(self.match_line_count) - } - - #[inline(always)] - fn fill(&mut self) -> Result { - let keep = - if self.opts.before_context > 0 || self.opts.after_context > 0 { - let lines = 1 + cmp::max( - self.opts.before_context, self.opts.after_context); - start_of_previous_lines( - self.opts.eol, - &self.inp.buf, - self.inp.lastnl.saturating_sub(1), - lines) - } else { - self.inp.lastnl - }; - if keep < self.last_printed { - self.last_printed -= keep; - } else { - self.last_printed = 0; - } - if keep <= self.last_line { - self.last_line -= keep; - } else { - self.count_lines(keep); - self.last_line = 0; - } - self.count_byte_offset(keep); - let ok = self.inp.fill(&mut self.haystack, keep).map_err(|err| { - Error::from_io(err, &self.path) - })?; - Ok(ok) - } - - #[inline(always)] - fn print_inverted_matches(&mut self, upto: usize) { - debug_assert!(self.opts.invert_match); - let mut it = IterLines::new(self.opts.eol, self.inp.pos); - while let Some((start, end)) = it.next(&self.inp.buf[..upto]) { - if self.terminate() { - return; - } - self.print_match(start, end); - self.inp.pos = end; - } - } - - #[inline(always)] - fn print_before_context(&mut self, upto: usize) { - if self.opts.skip_matches() || self.opts.before_context == 0 { - return; - } - let start = self.last_printed; - let end = upto; - if start >= end { - return; - } - let before_context_start = - start + start_of_previous_lines( - self.opts.eol, - &self.inp.buf[start..], - end - start - 1, - self.opts.before_context); - let mut it = IterLines::new(self.opts.eol, before_context_start); - while let Some((s, e)) = it.next(&self.inp.buf[..end]) { - self.print_separator(s); - self.print_context(s, e); - } - } - - #[inline(always)] - fn print_after_context(&mut self, upto: usize) { - if self.opts.skip_matches() || self.after_context_remaining == 0 { - return; - } - let start = self.last_printed; - let end = upto; - let mut it = IterLines::new(self.opts.eol, start); - while let Some((s, e)) = it.next(&self.inp.buf[..end]) { - self.print_context(s, e); - self.after_context_remaining -= 1; - if self.after_context_remaining == 0 { - break; - } - } - } - - #[inline(always)] - fn print_match(&mut self, start: usize, end: usize) { - self.match_line_count += 1; - self.count_individual_matches(start, end); - if self.opts.skip_matches() { - return; - } - self.print_separator(start); - self.count_lines(start); - self.add_line(end); - self.printer.matched( - self.grep.regex(), self.path, - &self.inp.buf, start, end, self.line_count, self.byte_offset); - self.last_printed = end; - self.after_context_remaining = self.opts.after_context; - } - - #[inline(always)] - fn print_context(&mut self, start: usize, end: usize) { - self.count_lines(start); - self.add_line(end); - self.printer.context( - &self.path, &self.inp.buf, start, end, - self.line_count, self.byte_offset); - self.last_printed = end; - } - - #[inline(always)] - fn print_separator(&mut self, before: usize) { - if self.opts.before_context == 0 && self.opts.after_context == 0 { - return; - } - if !self.printer.has_printed() { - return; - } - if (self.last_printed == 0 && before > 0) - || self.last_printed < before { - self.printer.context_separate(); - } - } - - #[inline(always)] - fn count_byte_offset(&mut self, buf_last_end: usize) { - if let Some(ref mut byte_offset) = self.byte_offset { - *byte_offset += buf_last_end as u64; - } - } - - #[inline(always)] - fn count_individual_matches(&mut self, start: usize, end: usize) { - if let Some(ref mut count) = self.match_count { - for _ in self.grep.regex().find_iter(&self.inp.buf[start..end]) { - *count += 1; - } - } - } - - #[inline(always)] - fn count_lines(&mut self, upto: usize) { - if let Some(ref mut line_count) = self.line_count { - *line_count += count_lines( - &self.inp.buf[self.last_line..upto], self.opts.eol); - self.last_line = upto; - } - } - - #[inline(always)] - fn add_line(&mut self, line_end: usize) { - if let Some(ref mut line_count) = self.line_count { - *line_count += 1; - self.last_line = line_end; - } - } -} - -/// `InputBuffer` encapsulates the logic of maintaining a ~fixed sized buffer -/// on which to search. There are three key pieces of complexity: -/// -/// 1. We must be able to handle lines that are longer than the size of the -/// buffer. For this reason, the buffer is allowed to expand (and is -/// therefore not technically fixed). Note that once a buffer expands, it -/// will never contract. -/// 2. The contents of the buffer may end with a partial line, so we must keep -/// track of where the last complete line ends. Namely, the partial line -/// is only completed on subsequent reads *after* searching up through -/// the last complete line is done. -/// 3. When printing the context of a match, the last N lines of the buffer -/// may need to be rolled over into the next buffer. For example, a match -/// may occur at the beginning of a buffer, in which case, lines at the end -/// of the previous contents of the buffer need to be printed. -/// -/// An `InputBuffer` is designed to be reused and isn't tied to any particular -/// reader. -pub struct InputBuffer { - /// The number of bytes to attempt to read at a time. Once set, this is - /// never changed. - read_size: usize, - /// The end-of-line terminator used in this buffer. - eol: u8, - /// A scratch buffer. - tmp: Vec, - /// A buffer to read bytes into. All searches are executed directly against - /// this buffer and pos/lastnl/end point into it. - buf: Vec, - /// The current position in buf. The current position represents where the - /// next search should start. - pos: usize, - /// The position immediately following the last line terminator in buf. - /// This may be equal to end. - /// - /// Searching should never cross this boundary. In particular, the contents - /// of the buffer following this position may correspond to *partial* line. - /// All contents before this position are complete lines. - lastnl: usize, - /// The end position of the buffer. Data after this position is not - /// specified. - end: usize, - /// Set to true if and only if no reads have occurred yet. - first: bool, - /// Set to true if all binary data should be treated as if it were text. - text: bool, -} - -impl InputBuffer { - /// Create a new buffer with a default capacity. - pub fn new() -> InputBuffer { - InputBuffer::with_capacity(READ_SIZE) - } - - /// Create a new buffer with the capacity given. - /// - /// The capacity determines the size of each read from the underlying - /// reader. - /// - /// `cap` must be a minimum of `1`. - pub fn with_capacity(mut cap: usize) -> InputBuffer { - if cap == 0 { - cap = 1; - } - InputBuffer { - read_size: cap, - eol: b'\n', - buf: vec![0; cap], - tmp: vec![], - pos: 0, - lastnl: 0, - end: 0, - first: true, - text: false, - } - } - - /// Set the end-of-line terminator used by this input buffer. - pub fn eol(&mut self, eol: u8) -> &mut Self { - self.eol = eol; - self - } - - /// If enabled, search binary files as if they were text. - /// - /// Note that this may cause the buffer to load the entire contents of a - /// file into memory. - pub fn text(&mut self, yes: bool) -> &mut Self { - self.text = yes; - self - } - - /// Resets this buffer so that it may be reused with a new reader. - fn reset(&mut self) { - self.pos = 0; - self.lastnl = 0; - self.end = 0; - self.first = true; - } - - /// Fill the contents of this buffer with the reader given. The reader - /// given should be the same in every call to fill unless reset has been - /// called. - /// - /// The bytes in buf[keep_from..end] are rolled over into the beginning - /// of the buffer. - fn fill( - &mut self, - rdr: &mut R, - keep_from: usize, - ) -> Result { - // Rollover bytes from buf[keep_from..end] and update our various - // pointers. N.B. This could be done with the ptr::copy, but I haven't - // been able to produce a benchmark that notices a difference in - // performance. (Invariably, ptr::copy is seems clearer IMO, but it is - // not safe.) - self.tmp.clear(); - self.tmp.extend_from_slice(&self.buf[keep_from..self.end]); - self.buf[0..self.tmp.len()].copy_from_slice(&self.tmp); - self.pos = self.lastnl - keep_from; - self.lastnl = 0; - self.end = self.tmp.len(); - while self.lastnl == 0 { - // If our buffer isn't big enough to hold the contents of a full - // read, expand it. - if self.buf.len() - self.end < self.read_size { - let min_len = self.read_size + self.buf.len() - self.end; - let new_len = cmp::max(min_len, self.buf.len() * 2); - self.buf.resize(new_len, 0); - } - let n = rdr.read( - &mut self.buf[self.end..self.end + self.read_size])?; - if !self.text { - if is_binary(&self.buf[self.end..self.end + n], self.first) { - return Ok(false); - } - } - self.first = false; - // We assume that reading 0 bytes means we've hit EOF. - if n == 0 { - // If we've searched everything up to the end of the buffer, - // then there's nothing left to do. - if self.end - self.pos == 0 { - return Ok(false); - } - // Even if we hit EOF, we might still have to search the - // last line if it didn't contain a trailing terminator. - self.lastnl = self.end; - break; - } - self.lastnl = - memrchr(self.eol, &self.buf[self.end..self.end + n]) - .map(|i| self.end + i + 1) - .unwrap_or(0); - self.end += n; - } - Ok(true) - } -} - -/// Returns true if and only if the given buffer is determined to be "binary" -/// or otherwise not contain text data that is usefully searchable. -/// -/// Note that this may return both false positives and false negatives. -#[inline(always)] -pub fn is_binary(buf: &[u8], first: bool) -> bool { - if first && buf.len() >= 4 && &buf[0..4] == b"%PDF" { - return true; - } - memchr(b'\x00', buf).is_some() -} - -/// Count the number of lines in the given buffer. -#[inline(never)] -pub fn count_lines(buf: &[u8], eol: u8) -> u64 { - bytecount::count(buf, eol) as u64 -} - -/// Replaces a with b in buf. -#[allow(dead_code)] -fn replace_buf(buf: &mut [u8], a: u8, b: u8) { - if a == b { - return; - } - let mut pos = 0; - while let Some(i) = memchr(a, &buf[pos..]).map(|i| pos + i) { - buf[i] = b; - pos = i + 1; - while buf.get(pos) == Some(&a) { - buf[pos] = b; - pos += 1; - } - } -} - -/// An "iterator" over lines in a particular buffer. -/// -/// Idiomatic Rust would borrow the buffer and use it as internal state to -/// advance over the positions of each line. We neglect that approach to avoid -/// the borrow in the search code. (Because the borrow prevents composition -/// through other mutable methods.) -pub struct IterLines { - eol: u8, - pos: usize, -} - -impl IterLines { - /// Creates a new iterator over lines starting at the position given. - /// - /// The buffer is passed to the `next` method. - #[inline(always)] - pub fn new(eol: u8, start: usize) -> IterLines { - IterLines { - eol: eol, - pos: start, - } - } - - /// Return the start and end position of the next line in the buffer. The - /// buffer given should be the same on every call. - /// - /// The range returned includes the new line. - #[inline(always)] - pub fn next(&mut self, buf: &[u8]) -> Option<(usize, usize)> { - match memchr(self.eol, &buf[self.pos..]) { - None => { - if self.pos < buf.len() { - let start = self.pos; - self.pos = buf.len(); - Some((start, buf.len())) - } else { - None - } - } - Some(end) => { - let start = self.pos; - let end = self.pos + end + 1; - self.pos = end; - Some((start, end)) - } - } - } -} - -/// Returns the starting index of the Nth line preceding `end`. -/// -/// If `buf` is empty, then `0` is returned. If `count` is `0`, then `end` is -/// returned. -/// -/// If `end` points at a new line in `buf`, then searching starts as if `end` -/// pointed immediately before the new line. -/// -/// The position returned corresponds to the first byte in the given line. -#[inline(always)] -fn start_of_previous_lines( - eol: u8, - buf: &[u8], - mut end: usize, - mut count: usize, -) -> usize { - // TODO(burntsushi): This function needs to be badly simplified. The case - // analysis is impossible to follow. - if buf[..end].is_empty() { - return 0; - } - if count == 0 { - return end; - } - if end == buf.len() { - end -= 1; - } - if buf[end] == eol { - if end == 0 { - return end + 1; - } - end -= 1; - } - while count > 0 { - if buf[end] == eol { - count -= 1; - if count == 0 { - return end + 1; - } - if end == 0 { - return end; - } - end -= 1; - continue; - } - match memrchr(eol, &buf[..end]) { - None => { - return 0; - } - Some(i) => { - count -= 1; - end = i; - if end == 0 { - if buf[end] == eol && count == 0 { - end += 1; - } - return end; - } - end -= 1; - } - } - } - end + 2 -} - -#[cfg(test)] -mod tests { - use std::io; - use std::path::Path; - - use grep::GrepBuilder; - use printer::Printer; - use termcolor; - - use super::{InputBuffer, Searcher, start_of_previous_lines}; - - const SHERLOCK: &'static str = "\ -For the Doctor Watsons of this world, as opposed to the Sherlock -Holmeses, success in the province of detective work must always -be, to a very large extent, the result of luck. Sherlock Holmes -can extract a clew from a wisp of straw or a flake of cigar ash; -but Doctor Watson has to have it taken out for him and dusted, -and exhibited clearly, with a label attached.\ -"; - - const CODE: &'static str = "\ -extern crate snap; - -use std::io; - -fn main() { - let stdin = io::stdin(); - let stdout = io::stdout(); - - // Wrap the stdin reader in a Snappy reader. - let mut rdr = snap::Reader::new(stdin.lock()); - let mut wtr = stdout.lock(); - io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); -} -"; - - fn hay(s: &str) -> io::Cursor> { - io::Cursor::new(s.to_string().into_bytes()) - } - - fn test_path() -> &'static Path { - &Path::new("/baz.rs") - } - - type TestSearcher<'a> = Searcher< - 'a, - io::Cursor>, - termcolor::NoColor>, - >; - - fn search_smallcap TestSearcher>( - pat: &str, - haystack: &str, - mut map: F, - ) -> (u64, String) { - let mut inp = InputBuffer::with_capacity(1); - let outbuf = termcolor::NoColor::new(vec![]); - let mut pp = Printer::new(outbuf).with_filename(true); - let grep = GrepBuilder::new(pat).build().unwrap(); - let count = { - let searcher = Searcher::new( - &mut inp, &mut pp, &grep, test_path(), hay(haystack)); - map(searcher).run().unwrap() - }; - (count, String::from_utf8(pp.into_inner().into_inner()).unwrap()) - } - - fn search TestSearcher>( - pat: &str, - haystack: &str, - mut map: F, - ) -> (u64, String) { - let mut inp = InputBuffer::with_capacity(4096); - let outbuf = termcolor::NoColor::new(vec![]); - let mut pp = Printer::new(outbuf).with_filename(true); - let grep = GrepBuilder::new(pat).build().unwrap(); - let count = { - let searcher = Searcher::new( - &mut inp, &mut pp, &grep, test_path(), hay(haystack)); - map(searcher).run().unwrap() - }; - (count, String::from_utf8(pp.into_inner().into_inner()).unwrap()) - } - - #[test] - fn previous_lines() { - let eol = b'\n'; - let text = SHERLOCK.as_bytes(); - assert_eq!(366, text.len()); - - assert_eq!(0, start_of_previous_lines(eol, text, 366, 100)); - assert_eq!(366, start_of_previous_lines(eol, text, 366, 0)); - - assert_eq!(321, start_of_previous_lines(eol, text, 366, 1)); - assert_eq!(321, start_of_previous_lines(eol, text, 365, 1)); - assert_eq!(321, start_of_previous_lines(eol, text, 364, 1)); - assert_eq!(321, start_of_previous_lines(eol, text, 322, 1)); - assert_eq!(321, start_of_previous_lines(eol, text, 321, 1)); - assert_eq!(258, start_of_previous_lines(eol, text, 320, 1)); - - assert_eq!(258, start_of_previous_lines(eol, text, 366, 2)); - assert_eq!(258, start_of_previous_lines(eol, text, 365, 2)); - assert_eq!(258, start_of_previous_lines(eol, text, 364, 2)); - assert_eq!(258, start_of_previous_lines(eol, text, 322, 2)); - assert_eq!(258, start_of_previous_lines(eol, text, 321, 2)); - assert_eq!(193, start_of_previous_lines(eol, text, 320, 2)); - - assert_eq!(65, start_of_previous_lines(eol, text, 66, 1)); - assert_eq!(0, start_of_previous_lines(eol, text, 66, 2)); - assert_eq!(64, start_of_previous_lines(eol, text, 64, 0)); - assert_eq!(0, start_of_previous_lines(eol, text, 64, 1)); - assert_eq!(0, start_of_previous_lines(eol, text, 64, 2)); - - assert_eq!(0, start_of_previous_lines(eol, text, 0, 2)); - assert_eq!(0, start_of_previous_lines(eol, text, 0, 1)); - } - - #[test] - fn previous_lines_short() { - let eol = b'\n'; - let text = &b"a\nb\nc\nd\ne\nf\n"[..]; - assert_eq!(12, text.len()); - - assert_eq!(10, start_of_previous_lines(eol, text, 12, 1)); - assert_eq!(8, start_of_previous_lines(eol, text, 12, 2)); - assert_eq!(6, start_of_previous_lines(eol, text, 12, 3)); - assert_eq!(4, start_of_previous_lines(eol, text, 12, 4)); - assert_eq!(2, start_of_previous_lines(eol, text, 12, 5)); - assert_eq!(0, start_of_previous_lines(eol, text, 12, 6)); - assert_eq!(0, start_of_previous_lines(eol, text, 12, 7)); - assert_eq!(10, start_of_previous_lines(eol, text, 11, 1)); - assert_eq!(8, start_of_previous_lines(eol, text, 11, 2)); - assert_eq!(6, start_of_previous_lines(eol, text, 11, 3)); - assert_eq!(4, start_of_previous_lines(eol, text, 11, 4)); - assert_eq!(2, start_of_previous_lines(eol, text, 11, 5)); - assert_eq!(0, start_of_previous_lines(eol, text, 11, 6)); - assert_eq!(0, start_of_previous_lines(eol, text, 11, 7)); - assert_eq!(10, start_of_previous_lines(eol, text, 10, 1)); - assert_eq!(8, start_of_previous_lines(eol, text, 10, 2)); - assert_eq!(6, start_of_previous_lines(eol, text, 10, 3)); - assert_eq!(4, start_of_previous_lines(eol, text, 10, 4)); - assert_eq!(2, start_of_previous_lines(eol, text, 10, 5)); - assert_eq!(0, start_of_previous_lines(eol, text, 10, 6)); - assert_eq!(0, start_of_previous_lines(eol, text, 10, 7)); - - assert_eq!(8, start_of_previous_lines(eol, text, 9, 1)); - assert_eq!(8, start_of_previous_lines(eol, text, 8, 1)); - - assert_eq!(6, start_of_previous_lines(eol, text, 7, 1)); - assert_eq!(6, start_of_previous_lines(eol, text, 6, 1)); - - assert_eq!(4, start_of_previous_lines(eol, text, 5, 1)); - assert_eq!(4, start_of_previous_lines(eol, text, 4, 1)); - - assert_eq!(2, start_of_previous_lines(eol, text, 3, 1)); - assert_eq!(2, start_of_previous_lines(eol, text, 2, 1)); - - assert_eq!(0, start_of_previous_lines(eol, text, 1, 1)); - assert_eq!(0, start_of_previous_lines(eol, text, 0, 1)); - } - - #[test] - fn previous_lines_empty() { - let eol = b'\n'; - let text = &b"\n\n\nd\ne\nf\n"[..]; - assert_eq!(9, text.len()); - - assert_eq!(7, start_of_previous_lines(eol, text, 9, 1)); - assert_eq!(5, start_of_previous_lines(eol, text, 9, 2)); - assert_eq!(3, start_of_previous_lines(eol, text, 9, 3)); - assert_eq!(2, start_of_previous_lines(eol, text, 9, 4)); - assert_eq!(1, start_of_previous_lines(eol, text, 9, 5)); - assert_eq!(0, start_of_previous_lines(eol, text, 9, 6)); - assert_eq!(0, start_of_previous_lines(eol, text, 9, 7)); - - let text = &b"a\n\n\nd\ne\nf\n"[..]; - assert_eq!(10, text.len()); - - assert_eq!(8, start_of_previous_lines(eol, text, 10, 1)); - assert_eq!(6, start_of_previous_lines(eol, text, 10, 2)); - assert_eq!(4, start_of_previous_lines(eol, text, 10, 3)); - assert_eq!(3, start_of_previous_lines(eol, text, 10, 4)); - assert_eq!(2, start_of_previous_lines(eol, text, 10, 5)); - assert_eq!(0, start_of_previous_lines(eol, text, 10, 6)); - assert_eq!(0, start_of_previous_lines(eol, text, 10, 7)); - } - - #[test] - fn basic_search1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s|s); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn binary() { - let text = "Sherlock\n\x00Holmes\n"; - let (count, out) = search("Sherlock|Holmes", text, |s|s); - assert_eq!(0, count); - assert_eq!(out, ""); - } - - #[test] - fn binary_text() { - let text = "Sherlock\n\x00Holmes\n"; - let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true)); - assert_eq!(2, count); - assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n"); - } - - #[test] - fn line_numbers() { - let (count, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.line_number(true)); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn count() { - let (count, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.count(true)); - assert_eq!(2, count); - assert_eq!(out, "/baz.rs:2\n"); - } - - #[test] - fn byte_offset() { - let (_, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.byte_offset(true)); - assert_eq!(out, "\ -/baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn byte_offset_with_before_context() { - let (_, out) = search_smallcap("dusted", SHERLOCK, |s| { - s.line_number(true).byte_offset(true).before_context(2) - }); - assert_eq!(out, "\ -/baz.rs-3-129-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-193-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:258:but Doctor Watson has to have it taken out for him and dusted, -"); - } - - #[test] - fn byte_offset_inverted() { - let (_, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.invert_match(true).byte_offset(true) - }); - assert_eq!(out, "\ -/baz.rs:65:Holmeses, success in the province of detective work must always -/baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:258:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:321:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn count_matches() { - let (_, out) = search_smallcap( - "the", SHERLOCK, |s| s.count_matches(true)); - assert_eq!(out, "/baz.rs:4\n"); - } - - #[test] - fn files_with_matches() { - let (count, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.files_with_matches(true)); - assert_eq!(1, count); - assert_eq!(out, "/baz.rs\n"); - } - - #[test] - fn files_without_matches() { - let (count, out) = search_smallcap( - "zzzz", SHERLOCK, |s| s.files_without_matches(true)); - assert_eq!(0, count); - assert_eq!(out, "/baz.rs\n"); - } - - #[test] - fn max_count() { - let (count, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.max_count(Some(1))); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -"); - } - - #[test] - fn invert_match_max_count() { - let (count, out) = search( - "zzzz", SHERLOCK, |s| s.invert_match(true).max_count(Some(1))); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -"); - } - - #[test] - fn invert_match() { - let (count, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.invert_match(true)); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs:Holmeses, success in the province of detective work must always -/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn invert_match_line_numbers() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.invert_match(true).line_number(true) - }); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn invert_match_count() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.invert_match(true).count(true) - }); - assert_eq!(4, count); - assert_eq!(out, "/baz.rs:4\n"); - } - - #[test] - fn before_context_one1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).before_context(1) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn before_context_invert_one1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).before_context(1).invert_match(true) - }); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs-1-For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn before_context_invert_one2() { - let (count, out) = search_smallcap(" a ", SHERLOCK, |s| { - s.line_number(true).before_context(1).invert_match(true) - }); - assert_eq!(3, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:2:Holmeses, success in the province of detective work must always --- -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -"); - } - - #[test] - fn before_context_two1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).before_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn before_context_two2() { - let (count, out) = search_smallcap("dusted", SHERLOCK, |s| { - s.line_number(true).before_context(2) - }); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -"); - } - - #[test] - fn before_context_two3() { - let (count, out) = search_smallcap( - "success|attached", SHERLOCK, |s| { - s.line_number(true).before_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs-1-For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:2:Holmeses, success in the province of detective work must always --- -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs-5-but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn before_context_two4() { - let (count, out) = search("stdin", CODE, |s| { - s.line_number(true).before_context(2) - }); - assert_eq!(3, count); - assert_eq!(out, "\ -/baz.rs-4- -/baz.rs-5-fn main() { -/baz.rs:6: let stdin = io::stdin(); -/baz.rs-7- let stdout = io::stdout(); -/baz.rs-8- -/baz.rs:9: // Wrap the stdin reader in a Snappy reader. -/baz.rs:10: let mut rdr = snap::Reader::new(stdin.lock()); -"); - } - - #[test] - fn before_context_two5() { - let (count, out) = search("stdout", CODE, |s| { - s.line_number(true).before_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs-5-fn main() { -/baz.rs-6- let stdin = io::stdin(); -/baz.rs:7: let stdout = io::stdout(); --- -/baz.rs-9- // Wrap the stdin reader in a Snappy reader. -/baz.rs-10- let mut rdr = snap::Reader::new(stdin.lock()); -/baz.rs:11: let mut wtr = stdout.lock(); -"); - } - - #[test] - fn before_context_three1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).before_context(3) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn after_context_one1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).after_context(1) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -"); - } - - #[test] - fn after_context_invert_one1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).after_context(1).invert_match(true) - }); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn after_context_invert_one2() { - let (count, out) = search_smallcap(" a ", SHERLOCK, |s| { - s.line_number(true).after_context(1).invert_match(true) - }); - assert_eq!(3, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes --- -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs-6-and exhibited clearly, with a label attached. -"); - } - - #[test] - fn after_context_invert_one_max_count_two() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true) - .invert_match(true) - .after_context(1) - .max_count(Some(2)) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs-5-but Doctor Watson has to have it taken out for him and dusted, -"); - } - - #[test] - fn after_context_two1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).after_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs-5-but Doctor Watson has to have it taken out for him and dusted, -"); - } - - #[test] - fn after_context_two2() { - let (count, out) = search_smallcap("dusted", SHERLOCK, |s| { - s.line_number(true).after_context(2) - }); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs-6-and exhibited clearly, with a label attached. -"); - } - - #[test] - fn after_context_two3() { - let (count, out) = search_smallcap( - "success|attached", SHERLOCK, |s| { - s.line_number(true).after_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; --- -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn after_context_two_max_count_two() { - let (count, out) = search_smallcap( - "Doctor", SHERLOCK, |s| { - s.line_number(true).after_context(2).max_count(Some(2)) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes --- -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs-6-and exhibited clearly, with a label attached. -"); - } - - #[test] - fn after_context_three1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).after_context(3) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs-5-but Doctor Watson has to have it taken out for him and dusted, -/baz.rs-6-and exhibited clearly, with a label attached. -"); - } - - #[test] - fn before_after_context_two1() { - let (count, out) = search( - r"fn main|let mut rdr", CODE, |s| { - s.line_number(true).after_context(2).before_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs-3-use std::io; -/baz.rs-4- -/baz.rs:5:fn main() { -/baz.rs-6- let stdin = io::stdin(); -/baz.rs-7- let stdout = io::stdout(); -/baz.rs-8- -/baz.rs-9- // Wrap the stdin reader in a Snappy reader. -/baz.rs:10: let mut rdr = snap::Reader::new(stdin.lock()); -/baz.rs-11- let mut wtr = stdout.lock(); -/baz.rs-12- io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); -"); - } -} diff --git a/src/subject.rs b/src/subject.rs new file mode 100644 index 00000000..61b34554 --- /dev/null +++ b/src/subject.rs @@ -0,0 +1,230 @@ +use std::io; +use std::path::Path; +use std::sync::Arc; + +use ignore::{self, DirEntry}; +use same_file::Handle; + +/// A configuration for describing how subjects should be built. +#[derive(Clone, Debug)] +struct Config { + skip: Option>, + strip_dot_prefix: bool, + separator: Option, + terminator: Option, +} + +impl Default for Config { + fn default() -> Config { + Config { + skip: None, + strip_dot_prefix: false, + separator: None, + terminator: None, + } + } +} + +/// A builder for constructing things to search over. +#[derive(Clone, Debug)] +pub struct SubjectBuilder { + config: Config, +} + +impl SubjectBuilder { + /// Return a new subject builder with a default configuration. + pub fn new() -> SubjectBuilder { + SubjectBuilder { config: Config::default() } + } + + /// Create a new subject from a possibly missing directory entry. + /// + /// If the directory entry isn't present, then the corresponding error is + /// logged if messages have been configured. Otherwise, if the subject is + /// deemed searchable, then it is returned. + pub fn build_from_result( + &self, + result: Result, + ) -> Option { + match result { + Ok(dent) => self.build(dent), + Err(err) => { + message!("{}", err); + None + } + } + } + + /// Create a new subject using this builder's configuration. + /// + /// If a subject could not be created or should otherwise not be searched, + /// then this returns `None` after emitting any relevant log messages. + pub fn build(&self, dent: DirEntry) -> Option { + let subj = Subject { + dent: dent, + strip_dot_prefix: self.config.strip_dot_prefix, + }; + if let Some(ignore_err) = subj.dent.error() { + ignore_message!("{}", ignore_err); + } + // If this entry represents stdin, then we always search it. + if subj.dent.is_stdin() { + return Some(subj); + } + // If we're supposed to skip a particular file, then skip it. + if let Some(ref handle) = self.config.skip { + match subj.equals(handle) { + Ok(false) => {} // fallthrough + Ok(true) => { + debug!( + "ignoring {}: (probably same file as stdout)", + subj.dent.path().display() + ); + return None; + } + Err(err) => { + debug!( + "ignoring {}: got error: {}", + subj.dent.path().display(), err + ); + return None; + } + } + } + // If this subject has a depth of 0, then it was provided explicitly + // by an end user (or via a shell glob). In this case, we always want + // to search it if it even smells like a file (e.g., a symlink). + if subj.dent.depth() == 0 && !subj.is_dir() { + return Some(subj); + } + // At this point, we only want to search something it's explicitly a + // file. This omits symlinks. (If ripgrep was configured to follow + // symlinks, then they have already been followed by the directory + // traversal.) + if subj.is_file() { + return Some(subj); + } + // We got nothin. Emit a debug message, but only if this isn't a + // directory. Otherwise, emitting messages for directories is just + // noisy. + if !subj.is_dir() { + debug!( + "ignoring {}: failed to pass subject filter: \ + file type: {:?}, metadata: {:?}", + subj.dent.path().display(), + subj.dent.file_type(), + subj.dent.metadata() + ); + } + None + } + + /// When provided, subjects that represent the same file as the handle + /// given will be skipped. + /// + /// Typically, it is useful to pass a handle referring to stdout, such + /// that the file being written to isn't searched, which can lead to + /// an unbounded feedback mechanism. + /// + /// Only one handle to skip can be provided. + pub fn skip( + &mut self, + handle: Option, + ) -> &mut SubjectBuilder { + self.config.skip = handle.map(Arc::new); + self + } + + /// When enabled, if the subject's file path starts with `./` then it is + /// stripped. + /// + /// This is useful when implicitly searching the current working directory. + pub fn strip_dot_prefix(&mut self, yes: bool) -> &mut SubjectBuilder { + self.config.strip_dot_prefix = yes; + self + } +} + +/// A subject is a thing we want to search. Generally, a subject is either a +/// file or stdin. +#[derive(Clone, Debug)] +pub struct Subject { + dent: DirEntry, + strip_dot_prefix: bool, +} + +impl Subject { + /// Return the file path corresponding to this subject. + /// + /// If this subject corresponds to stdin, then a special `` path + /// is returned instead. + pub fn path(&self) -> &Path { + if self.strip_dot_prefix && self.dent.path().starts_with("./") { + self.dent.path().strip_prefix("./").unwrap() + } else { + self.dent.path() + } + } + + /// Returns true if and only if this entry corresponds to stdin. + pub fn is_stdin(&self) -> bool { + self.dent.is_stdin() + } + + /// Returns true if and only if this subject points to a directory. + /// + /// This works around a bug in Rust's standard library: + /// https://github.com/rust-lang/rust/issues/46484 + #[cfg(windows)] + fn is_dir(&self) -> bool { + use std::os::windows::fs::MetadataExt; + use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY; + + self.dent.metadata().map(|md| { + md.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0 + }).unwrap_or(false) + } + + /// Returns true if and only if this subject points to a directory. + #[cfg(not(windows))] + fn is_dir(&self) -> bool { + self.dent.file_type().map_or(false, |ft| ft.is_dir()) + } + + /// Returns true if and only if this subject points to a file. + /// + /// This works around a bug in Rust's standard library: + /// https://github.com/rust-lang/rust/issues/46484 + #[cfg(windows)] + fn is_file(&self) -> bool { + !self.is_dir() + } + + /// Returns true if and only if this subject points to a file. + #[cfg(not(windows))] + fn is_file(&self) -> bool { + self.dent.file_type().map_or(false, |ft| ft.is_file()) + } + + /// Returns true if and only if this subject is believed to be equivalent + /// to the given handle. If there was a problem querying this subject for + /// information to determine equality, then that error is returned. + fn equals(&self, handle: &Handle) -> io::Result { + #[cfg(unix)] + fn never_equal(dent: &DirEntry, handle: &Handle) -> bool { + dent.ino() != Some(handle.ino()) + } + + #[cfg(not(unix))] + fn never_equal(_: &DirEntry, _: &Handle) -> bool { + false + } + + // If we know for sure that these two things aren't equal, then avoid + // the costly extra stat call to determine equality. + if self.dent.is_stdin() || never_equal(&self.dent, handle) { + return Ok(false); + } + Handle::from_path(self.path()).map(|h| &h == handle) + } +} diff --git a/src/worker.rs b/src/worker.rs deleted file mode 100644 index 8e840400..00000000 --- a/src/worker.rs +++ /dev/null @@ -1,413 +0,0 @@ -use std::fs::File; -use std::io; -use std::path::{Path, PathBuf}; - -use encoding_rs::Encoding; -use grep::Grep; -use ignore::DirEntry; -use memmap::Mmap; -use termcolor::WriteColor; - -// use decoder::DecodeReader; -use encoding_rs_io::DecodeReaderBytesBuilder; -use decompressor::{self, DecompressionReader}; -use preprocessor::PreprocessorReader; -use pathutil::strip_prefix; -use printer::Printer; -use search_buffer::BufferSearcher; -use search_stream::{InputBuffer, Searcher}; - -use Result; - -pub enum Work { - Stdin, - DirEntry(DirEntry), -} - -pub struct WorkerBuilder { - grep: Grep, - opts: Options, -} - -#[derive(Clone, Debug)] -struct Options { - mmap: bool, - encoding: Option<&'static Encoding>, - after_context: usize, - before_context: usize, - byte_offset: bool, - count: bool, - count_matches: bool, - files_with_matches: bool, - files_without_matches: bool, - eol: u8, - invert_match: bool, - line_number: bool, - max_count: Option, - no_messages: bool, - quiet: bool, - text: bool, - preprocessor: Option, - search_zip_files: bool -} - -impl Default for Options { - fn default() -> Options { - Options { - mmap: false, - encoding: None, - after_context: 0, - before_context: 0, - byte_offset: false, - count: false, - count_matches: false, - files_with_matches: false, - files_without_matches: false, - eol: b'\n', - invert_match: false, - line_number: false, - max_count: None, - no_messages: false, - quiet: false, - text: false, - search_zip_files: false, - preprocessor: None, - } - } -} - -impl WorkerBuilder { - /// Create a new builder for a worker. - /// - /// A reusable input buffer and a grep matcher are required, but there - /// are numerous additional options that can be configured on this builder. - pub fn new(grep: Grep) -> WorkerBuilder { - WorkerBuilder { - grep: grep, - opts: Options::default(), - } - } - - /// Create the worker from this builder. - pub fn build(self) -> Worker { - let mut inpbuf = InputBuffer::new(); - inpbuf.eol(self.opts.eol); - Worker { - grep: self.grep, - inpbuf: inpbuf, - decodebuf: vec![0; 8 * (1<<10)], - opts: self.opts, - } - } - - /// The number of contextual lines to show after each match. The default - /// is zero. - pub fn after_context(mut self, count: usize) -> Self { - self.opts.after_context = count; - self - } - - /// The number of contextual lines to show before each match. The default - /// is zero. - pub fn before_context(mut self, count: usize) -> Self { - self.opts.before_context = count; - self - } - - /// If enabled, searching will print a 0-based offset of the - /// matching line (or the actual match if -o is specified) before - /// printing the line itself. - /// - /// Disabled by default. - pub fn byte_offset(mut self, yes: bool) -> Self { - self.opts.byte_offset = yes; - self - } - - /// If enabled, searching will print a count instead of each match. - /// - /// Disabled by default. - pub fn count(mut self, yes: bool) -> Self { - self.opts.count = yes; - self - } - - /// If enabled, searching will print the count of individual matches - /// instead of each match. - /// - /// Disabled by default. - pub fn count_matches(mut self, yes: bool) -> Self { - self.opts.count_matches = yes; - self - } - - /// Set the encoding to use to read each file. - /// - /// If the encoding is `None` (the default), then the encoding is - /// automatically detected on a best-effort per-file basis. - pub fn encoding(mut self, enc: Option<&'static Encoding>) -> Self { - self.opts.encoding = enc; - self - } - - /// If enabled, searching will print the path instead of each match. - /// - /// Disabled by default. - pub fn files_with_matches(mut self, yes: bool) -> Self { - self.opts.files_with_matches = yes; - self - } - - /// If enabled, searching will print the path of files without any matches. - /// - /// Disabled by default. - pub fn files_without_matches(mut self, yes: bool) -> Self { - self.opts.files_without_matches = yes; - self - } - - /// Set the end-of-line byte used by this searcher. - pub fn eol(mut self, eol: u8) -> Self { - self.opts.eol = eol; - self - } - - /// If enabled, matching is inverted so that lines that *don't* match the - /// given pattern are treated as matches. - pub fn invert_match(mut self, yes: bool) -> Self { - self.opts.invert_match = yes; - self - } - - /// If enabled, compute line numbers and prefix each line of output with - /// them. - pub fn line_number(mut self, yes: bool) -> Self { - self.opts.line_number = yes; - self - } - - /// Limit the number of matches to the given count. - /// - /// The default is None, which corresponds to no limit. - pub fn max_count(mut self, count: Option) -> Self { - self.opts.max_count = count; - self - } - - /// If enabled, try to use memory maps for searching if possible. - pub fn mmap(mut self, yes: bool) -> Self { - self.opts.mmap = yes; - self - } - - /// If enabled, error messages are suppressed. - /// - /// This is disabled by default. - pub fn no_messages(mut self, yes: bool) -> Self { - self.opts.no_messages = yes; - self - } - - /// If enabled, don't show any output and quit searching after the first - /// match is found. - pub fn quiet(mut self, yes: bool) -> Self { - self.opts.quiet = yes; - self - } - - /// If enabled, search binary files as if they were text. - pub fn text(mut self, yes: bool) -> Self { - self.opts.text = yes; - self - } - - /// If enabled, search through compressed files as well - pub fn search_zip_files(mut self, yes: bool) -> Self { - self.opts.search_zip_files = yes; - self - } - - /// If non-empty, search output of preprocessor run on each file - pub fn preprocessor(mut self, command: Option) -> Self { - self.opts.preprocessor = command; - self - } -} - -/// Worker is responsible for executing searches on file paths, while choosing -/// streaming search or memory map search as appropriate. -pub struct Worker { - grep: Grep, - inpbuf: InputBuffer, - decodebuf: Vec, - opts: Options, -} - -impl Worker { - /// Execute the worker with the given printer and work item. - /// - /// A work item can either be stdin or a file path. - pub fn run( - &mut self, - printer: &mut Printer, - work: Work, - ) -> u64 { - let result = match work { - Work::Stdin => { - let stdin = io::stdin(); - let stdin = stdin.lock(); - self.search(printer, Path::new(""), stdin) - } - Work::DirEntry(dent) => { - let mut path = dent.path(); - if self.opts.preprocessor.is_some() { - let cmd = self.opts.preprocessor.clone().unwrap(); - match PreprocessorReader::from_cmd_path(cmd, path) { - Ok(reader) => self.search(printer, path, reader), - Err(err) => { - if !self.opts.no_messages { - eprintln!("{}", err); - } - return 0; - } - } - } else if self.opts.search_zip_files - && decompressor::is_compressed(path) - { - match DecompressionReader::from_path(path) { - Some(reader) => self.search(printer, path, reader), - None => { - return 0; - } - } - } else { - let file = match File::open(path) { - Ok(file) => file, - Err(err) => { - if !self.opts.no_messages { - eprintln!("{}: {}", path.display(), err); - } - return 0; - } - }; - if let Some(p) = strip_prefix("./", path) { - path = p; - } - if self.opts.mmap { - self.search_mmap(printer, path, &file) - } else { - self.search(printer, path, file) - } - } - } - }; - match result { - Ok(count) => { - count - } - Err(err) => { - if !self.opts.no_messages { - eprintln!("{}", err); - } - 0 - } - } - } - - fn search( - &mut self, - printer: &mut Printer, - path: &Path, - rdr: R, - ) -> Result { - let rdr = DecodeReaderBytesBuilder::new() - .encoding(self.opts.encoding) - .utf8_passthru(true) - .build_with_buffer(rdr, &mut self.decodebuf)?; - let searcher = Searcher::new( - &mut self.inpbuf, printer, &self.grep, path, rdr); - searcher - .after_context(self.opts.after_context) - .before_context(self.opts.before_context) - .byte_offset(self.opts.byte_offset) - .count(self.opts.count) - .count_matches(self.opts.count_matches) - .files_with_matches(self.opts.files_with_matches) - .files_without_matches(self.opts.files_without_matches) - .eol(self.opts.eol) - .line_number(self.opts.line_number) - .invert_match(self.opts.invert_match) - .max_count(self.opts.max_count) - .quiet(self.opts.quiet) - .text(self.opts.text) - .run() - .map_err(From::from) - } - - fn search_mmap( - &mut self, - printer: &mut Printer, - path: &Path, - file: &File, - ) -> Result { - if file.metadata()?.len() == 0 { - // Opening a memory map with an empty file results in an error. - // However, this may not actually be an empty file! For example, - // /proc/cpuinfo reports itself as an empty file, but it can - // produce data when it's read from. Therefore, we fall back to - // regular read calls. - return self.search(printer, path, file); - } - let mmap = match self.mmap(file)? { - None => return self.search(printer, path, file), - Some(mmap) => mmap, - }; - let buf = &*mmap; - if buf.len() >= 3 && Encoding::for_bom(buf).is_some() { - // If we have a UTF-16 bom in our memory map, then we need to fall - // back to the stream reader, which will do transcoding. - return self.search(printer, path, file); - } - let searcher = BufferSearcher::new(printer, &self.grep, path, buf); - Ok(searcher - .byte_offset(self.opts.byte_offset) - .count(self.opts.count) - .count_matches(self.opts.count_matches) - .files_with_matches(self.opts.files_with_matches) - .files_without_matches(self.opts.files_without_matches) - .eol(self.opts.eol) - .line_number(self.opts.line_number) - .invert_match(self.opts.invert_match) - .max_count(self.opts.max_count) - .quiet(self.opts.quiet) - .text(self.opts.text) - .run()) - } - - #[cfg(not(unix))] - fn mmap(&self, file: &File) -> Result> { - Ok(Some(mmap_readonly(file)?)) - } - - #[cfg(unix)] - fn mmap(&self, file: &File) -> Result> { - use libc::{EOVERFLOW, ENODEV, ENOMEM}; - - let err = match mmap_readonly(file) { - Ok(mmap) => return Ok(Some(mmap)), - Err(err) => err, - }; - let code = err.raw_os_error(); - if code == Some(EOVERFLOW) - || code == Some(ENODEV) - || code == Some(ENOMEM) - { - return Ok(None); - } - Err(From::from(err)) - } -} - -fn mmap_readonly(file: &File) -> io::Result { - unsafe { Mmap::map(file) } -} diff --git a/tests/tests.rs b/tests/tests.rs index 2ddab867..1c40f22e 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -91,8 +91,8 @@ be, to a very large extent, the result of luck. Sherlock Holmes sherlock!(dir, "Sherlock", ".", |wd: WorkDir, mut cmd| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -148,19 +148,19 @@ sherlock!(with_heading_default, "Sherlock", ".", cmd.arg("-j1").arg("--heading"); let lines: String = wd.stdout(&mut cmd); let expected1 = "\ -foo +./foo Sherlock Holmes lives on Baker Street. -sherlock +./sherlock For the Doctor Watsons of this world, as opposed to the Sherlock be, to a very large extent, the result of luck. Sherlock Holmes "; let expected2 = "\ -sherlock +./sherlock For the Doctor Watsons of this world, as opposed to the Sherlock be, to a very large extent, the result of luck. Sherlock Holmes -foo +./foo Sherlock Holmes lives on Baker Street. "; if lines != expected1 { @@ -289,14 +289,14 @@ sherlock!(file_types, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.rs", "Sherlock"); cmd.arg("-t").arg("rust"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.rs:Sherlock\n"); + assert_eq!(lines, "./file.rs:Sherlock\n"); }); sherlock!(file_types_all, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.py", "Sherlock"); cmd.arg("-t").arg("all"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py:Sherlock\n"); + assert_eq!(lines, "./file.py:Sherlock\n"); }); sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { @@ -305,7 +305,7 @@ sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.rs", "Sherlock"); cmd.arg("-T").arg("rust"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py:Sherlock\n"); + assert_eq!(lines, "./file.py:Sherlock\n"); }); sherlock!(file_types_negate_all, "Sherlock", ".", @@ -315,8 +315,8 @@ sherlock!(file_types_negate_all, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); assert_eq!(lines, "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "); }); @@ -333,18 +333,21 @@ sherlock!(file_type_add, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.wat", "Sherlock"); cmd.arg("--type-add").arg("wat:*.wat").arg("-t").arg("wat"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.wat:Sherlock\n"); + assert_eq!(lines, "./file.wat:Sherlock\n"); }); -sherlock!(file_type_add_compose, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { +sherlock!(file_type_add_compose, "Sherlock", ".", +|wd: WorkDir, mut cmd: Command| { wd.create("file.py", "Sherlock"); wd.create("file.rs", "Sherlock"); wd.create("file.wat", "Sherlock"); cmd.arg("--type-add").arg("wat:*.wat"); cmd.arg("--type-add").arg("combo:include:wat,py").arg("-t").arg("combo"); let lines: String = wd.stdout(&mut cmd); - println!("{}", lines); - assert_eq!(sort_lines(&lines), "file.py:Sherlock\nfile.wat:Sherlock\n"); + assert_eq!( + sort_lines(&lines), + "./file.py:Sherlock\n./file.wat:Sherlock\n" + ); }); sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { @@ -352,7 +355,7 @@ sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.rs", "Sherlock"); cmd.arg("-g").arg("*.rs"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.rs:Sherlock\n"); + assert_eq!(lines, "./file.rs:Sherlock\n"); }); sherlock!(glob_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { @@ -361,14 +364,14 @@ sherlock!(glob_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.rs", "Sherlock"); cmd.arg("-g").arg("!*.rs"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py:Sherlock\n"); + assert_eq!(lines, "./file.py:Sherlock\n"); }); sherlock!(iglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.HTML", "Sherlock"); cmd.arg("--iglob").arg("*.html"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.HTML:Sherlock\n"); + assert_eq!(lines, "./file.HTML:Sherlock\n"); }); sherlock!(csglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { @@ -376,15 +379,16 @@ sherlock!(csglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file2.html", "Sherlock"); cmd.arg("--glob").arg("*.html"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file2.html:Sherlock\n"); + assert_eq!(lines, "./file2.html:Sherlock\n"); }); -sherlock!(byte_offset_only_matching, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { +sherlock!(byte_offset_only_matching, "Sherlock", ".", +|wd: WorkDir, mut cmd: Command| { cmd.arg("-b").arg("-o"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:56:Sherlock -sherlock:177:Sherlock +./sherlock:56:Sherlock +./sherlock:177:Sherlock "; assert_eq!(lines, expected); }); @@ -392,35 +396,35 @@ sherlock:177:Sherlock sherlock!(count, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--count"); let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock:2\n"; + let expected = "./sherlock:2\n"; assert_eq!(lines, expected); }); sherlock!(count_matches, "the", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--count-matches"); let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock:4\n"; + let expected = "./sherlock:4\n"; assert_eq!(lines, expected); }); sherlock!(count_matches_inverted, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--count-matches").arg("--invert-match"); let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock:4\n"; + let expected = "./sherlock:4\n"; assert_eq!(lines, expected); }); sherlock!(count_matches_via_only, "the", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--count").arg("--only-matching"); let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock:4\n"; + let expected = "./sherlock:4\n"; assert_eq!(lines, expected); }); sherlock!(files_with_matches, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--files-with-matches"); let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock\n"; + let expected = "./sherlock\n"; assert_eq!(lines, expected); }); @@ -429,7 +433,7 @@ sherlock!(files_without_matches, "Sherlock", ".", wd.create("file.py", "foo"); cmd.arg("--files-without-match"); let lines: String = wd.stdout(&mut cmd); - let expected = "file.py\n"; + let expected = "./file.py\n"; assert_eq!(lines, expected); }); @@ -527,7 +531,7 @@ sherlock!(max_filesize_parse_no_suffix, "Sherlock", ".", cmd.arg("--max-filesize").arg("50").arg("--files"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -foo +./foo "; assert_eq!(lines, expected); }); @@ -541,7 +545,7 @@ sherlock!(max_filesize_parse_k_suffix, "Sherlock", ".", cmd.arg("--max-filesize").arg("4K").arg("--files"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -foo +./foo "; assert_eq!(lines, expected); }); @@ -555,7 +559,7 @@ sherlock!(max_filesize_parse_m_suffix, "Sherlock", ".", cmd.arg("--max-filesize").arg("1M").arg("--files"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -foo +./foo "; assert_eq!(lines, expected); }); @@ -583,8 +587,8 @@ sherlock!(no_ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--hidden"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -610,8 +614,8 @@ sherlock!(no_ignore, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--no-ignore"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -653,8 +657,8 @@ sherlock!(ignore_git_parent_stop, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -686,8 +690,8 @@ sherlock!(ignore_git_parent_stop_file, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -740,8 +744,8 @@ sherlock!(no_parent_ignore_git, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -771,8 +775,8 @@ sherlock!(symlink_follow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, path(expected)); }); @@ -783,8 +787,8 @@ sherlock!(unrestricted1, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -796,8 +800,8 @@ sherlock!(unrestricted2, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -807,7 +811,7 @@ sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-uuu"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file:foo\x00bar\nfile:foo\x00baz\n"); + assert_eq!(lines, "./file:foo\x00bar\n./file:foo\x00baz\n"); }); sherlock!(vimgrep, "Sherlock|Watson", ".", |wd: WorkDir, mut cmd: Command| { @@ -815,10 +819,10 @@ sherlock!(vimgrep, "Sherlock|Watson", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes -sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted, +./sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq!(lines, expected); }); @@ -829,10 +833,10 @@ sherlock!(vimgrep_no_line, "Sherlock|Watson", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:16:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:57:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:49:be, to a very large extent, the result of luck. Sherlock Holmes -sherlock:12:but Doctor Watson has to have it taken out for him and dusted, +./sherlock:16:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:57:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:49:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:12:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq!(lines, expected); }); @@ -843,10 +847,10 @@ sherlock!(vimgrep_no_line_no_column, "Sherlock|Watson", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -sherlock:but Doctor Watson has to have it taken out for him and dusted, +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq!(lines, expected); }); @@ -869,12 +873,12 @@ clean!(regression_25, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create("src/llvm/foo", "test"); let lines: String = wd.stdout(&mut cmd); - let expected = path("src/llvm/foo:test\n"); + let expected = path("./src/llvm/foo:test\n"); assert_eq!(lines, expected); cmd.current_dir(wd.path().join("src")); let lines: String = wd.stdout(&mut cmd); - let expected = path("llvm/foo:test\n"); + let expected = path("./llvm/foo:test\n"); assert_eq!(lines, expected); }); @@ -885,7 +889,7 @@ clean!(regression_30, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create("vendor/manifest", "test"); let lines: String = wd.stdout(&mut cmd); - let expected = path("vendor/manifest:test\n"); + let expected = path("./vendor/manifest:test\n"); assert_eq!(lines, expected); }); @@ -927,7 +931,7 @@ clean!(regression_67, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create("dir/bar", "test"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, path("dir/bar:test\n")); + assert_eq!(lines, path("./dir/bar:test\n")); }); // See: https://github.com/BurntSushi/ripgrep/issues/87 @@ -945,7 +949,7 @@ clean!(regression_90, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create(".foo", "test"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, ".foo:test\n"); + assert_eq!(lines, "./.foo:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/93 @@ -954,7 +958,7 @@ clean!(regression_93, r"(\d{1,3}\.){3}\d{1,3}", ".", wd.create("foo", "192.168.1.1"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:192.168.1.1\n"); + assert_eq!(lines, "./foo:192.168.1.1\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/99 @@ -966,7 +970,10 @@ clean!(regression_99, "test", ".", cmd.arg("-j1").arg("--heading"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(sort_lines(&lines), sort_lines("bar\ntest\n\nfoo1\ntest\n")); + assert_eq!( + sort_lines(&lines), + sort_lines("./bar\ntest\n\n./foo1\ntest\n") + ); }); // See: https://github.com/BurntSushi/ripgrep/issues/105 @@ -975,7 +982,7 @@ clean!(regression_105_part1, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--vimgrep"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:1:3:zztest\n"); + assert_eq!(lines, "./foo:1:3:zztest\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/105 @@ -984,7 +991,7 @@ clean!(regression_105_part2, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--column"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:1:3:zztest\n"); + assert_eq!(lines, "./foo:1:3:zztest\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/127 @@ -1009,8 +1016,8 @@ clean!(regression_127, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = format!("\ -{path}:For the Doctor Watsons of this world, as opposed to the Sherlock -{path}:be, to a very large extent, the result of luck. Sherlock Holmes +./{path}:For the Doctor Watsons of this world, as opposed to the Sherlock +./{path}:be, to a very large extent, the result of luck. Sherlock Holmes ", path=path("foo/watson")); assert_eq!(lines, expected); }); @@ -1021,7 +1028,7 @@ clean!(regression_128, "x", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-n"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:5:x\n"); + assert_eq!(lines, "./foo:5:x\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/131 @@ -1049,8 +1056,8 @@ sherlock!(regression_137, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes sym1:For the Doctor Watsons of this world, as opposed to the Sherlock sym1:be, to a very large extent, the result of luck. Sherlock Holmes sym2:For the Doctor Watsons of this world, as opposed to the Sherlock @@ -1094,11 +1101,11 @@ clean!(regression_184, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create("foo/bar/baz", "test"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, format!("{}:test\n", path("foo/bar/baz"))); + assert_eq!(lines, format!("./{}:test\n", path("foo/bar/baz"))); cmd.current_dir(wd.path().join("./foo/bar")); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "baz:test\n"); + assert_eq!(lines, "./baz:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/199 @@ -1107,7 +1114,7 @@ clean!(regression_199, r"\btest\b", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--smart-case"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:tEsT\n"); + assert_eq!(lines, "./foo:tEsT\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/206 @@ -1117,7 +1124,7 @@ clean!(regression_206, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-g").arg("*.txt"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, format!("{}:test\n", path("foo/bar.txt"))); + assert_eq!(lines, format!("./{}:test\n", path("foo/bar.txt"))); }); // See: https://github.com/BurntSushi/ripgrep/issues/210 @@ -1161,7 +1168,7 @@ clean!(regression_251, "привет", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-i"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:привет\nfoo:Привет\nfoo:ПрИвЕт\n"); + assert_eq!(lines, "./foo:привет\n./foo:Привет\n./foo:ПрИвЕт\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/256 @@ -1205,7 +1212,7 @@ clean!(regression_405, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-g").arg("!/foo/**"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, format!("{}:test\n", path("bar/foo/file2.txt"))); + assert_eq!(lines, format!("./{}:test\n", path("bar/foo/file2.txt"))); }); // See: https://github.com/BurntSushi/ripgrep/issues/428 @@ -1220,7 +1227,7 @@ clean!(regression_428_color_context_path, "foo", ".", let expected = format!( "{colored_path}:foo\n{colored_path}-bar\n", colored_path=format!( - "\x1b\x5b\x30\x6d\x1b\x5b\x33\x35\x6d{path}\x1b\x5b\x30\x6d", + "\x1b\x5b\x30\x6d\x1b\x5b\x33\x35\x6d./{path}\x1b\x5b\x30\x6d", path=path("sherlock"))); assert_eq!(lines, expected); }); @@ -1234,16 +1241,17 @@ clean!(regression_428_unrecognized_style, "Sherlok", ".", let output = cmd.output().unwrap(); let err = String::from_utf8_lossy(&output.stderr); let expected = "\ -Unrecognized style attribute ''. Choose from: nobold, bold, nointense, intense, \ +unrecognized style attribute ''. Choose from: nobold, bold, nointense, intense, \ nounderline, underline. "; assert_eq!(err, expected); }); // See: https://github.com/BurntSushi/ripgrep/issues/493 -clean!(regression_493, " 're ", "input.txt", |wd: WorkDir, mut cmd: Command| { +clean!(regression_493, r"\b 're \b", "input.txt", +|wd: WorkDir, mut cmd: Command| { wd.create("input.txt", "peshwaship 're seminomata"); - cmd.arg("-o").arg("-w"); + cmd.arg("-o"); let lines: String = wd.stdout(&mut cmd); assert_eq!(lines, " 're \n"); @@ -1255,8 +1263,8 @@ sherlock!(regression_553_switch, "sherlock", ".", cmd.arg("-i"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); @@ -1264,8 +1272,8 @@ sherlock:be, to a very large extent, the result of luck. Sherlock Holmes cmd.arg("-i"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -1305,12 +1313,9 @@ clean!(regression_599, "^$", "input.txt", |wd: WorkDir, mut cmd: Command| { ]); let lines: String = wd.stdout(&mut cmd); - // Technically, the expected output should only be two lines, but: - // https://github.com/BurntSushi/ripgrep/issues/441 let expected = "\ 1: 2: -4: "; assert_eq!(expected, lines); }); @@ -1326,7 +1331,7 @@ clean!(regression_807, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--hidden"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, format!("{}:test\n", path(".a/c/file"))); + assert_eq!(lines, format!("./{}:test\n", path(".a/c/file"))); }); // See: https://github.com/BurntSushi/ripgrep/issues/900 @@ -1343,7 +1348,7 @@ clean!(feature_1_sjis, "Шерлок Холмс", ".", |wd: WorkDir, mut cmd: Co cmd.arg("-Esjis"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); + assert_eq!(lines, "./foo:Шерлок Холмс\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/1 @@ -1354,7 +1359,7 @@ clean!(feature_1_utf16_auto, "Шерлок Холмс", ".", wd.create_bytes("foo", &sherlock[..]); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); + assert_eq!(lines, "./foo:Шерлок Холмс\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/1 @@ -1366,7 +1371,7 @@ clean!(feature_1_utf16_explicit, "Шерлок Холмс", ".", cmd.arg("-Eutf-16le"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); + assert_eq!(lines, "./foo:Шерлок Холмс\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/1 @@ -1378,7 +1383,7 @@ clean!(feature_1_eucjp, "Шерлок Холмс", ".", cmd.arg("-Eeuc-jp"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); + assert_eq!(lines, "./foo:Шерлок Холмс\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/1 @@ -1413,8 +1418,8 @@ sherlock!(feature_7_dash, "-f-", ".", |wd: WorkDir, mut cmd: Command| { let output = wd.pipe(&mut cmd, "Sherlock"); let lines = String::from_utf8_lossy(&output.stdout); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -1439,8 +1444,8 @@ sherlock!(feature_34_only_matching, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:Sherlock -sherlock:Sherlock +./sherlock:Sherlock +./sherlock:Sherlock "; assert_eq!(lines, expected); }); @@ -1452,8 +1457,8 @@ sherlock!(feature_34_only_matching_line_column, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:1:57:Sherlock -sherlock:3:49:Sherlock +./sherlock:1:57:Sherlock +./sherlock:3:49:Sherlock "; assert_eq!(lines, expected); }); @@ -1476,15 +1481,15 @@ sherlock!(feature_45_relative_cwd, "test", ".", // First, get a baseline without applying ignore rules. let lines = paths_from_stdout(wd.stdout(&mut cmd)); assert_eq!(lines, paths(&[ - "bar/test", "baz/bar/test", "baz/baz/bar/test", "baz/foo", - "baz/test", "foo", "test", + "./bar/test", "./baz/bar/test", "./baz/baz/bar/test", "./baz/foo", + "./baz/test", "./foo", "./test", ])); // Now try again with the ignore file activated. cmd.arg("--ignore-file").arg(".not-an-ignore"); let lines = paths_from_stdout(wd.stdout(&mut cmd)); assert_eq!(lines, paths(&[ - "baz/bar/test", "baz/baz/bar/test", "baz/test", "test", + "./baz/bar/test", "./baz/baz/bar/test", "./baz/test", "./test", ])); // Now do it again, but inside the baz directory. @@ -1496,7 +1501,7 @@ sherlock!(feature_45_relative_cwd, "test", ".", cmd.arg("test").arg(".").arg("--ignore-file").arg("../.not-an-ignore"); cmd.current_dir(wd.path().join("baz")); let lines = paths_from_stdout(wd.stdout(&mut cmd)); - assert_eq!(lines, paths(&["baz/bar/test", "test"])); + assert_eq!(lines, paths(&["./baz/bar/test", "./test"])); }); // See: https://github.com/BurntSushi/ripgrep/issues/45 @@ -1509,7 +1514,7 @@ sherlock!(feature_45_precedence_with_others, "test", ".", cmd.arg("--ignore-file").arg(".not-an-ignore"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "imp.log:test\n"); + assert_eq!(lines, "./imp.log:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/45 @@ -1523,7 +1528,7 @@ sherlock!(feature_45_precedence_internal, "test", ".", cmd.arg("--ignore-file").arg(".not-an-ignore1"); cmd.arg("--ignore-file").arg(".not-an-ignore2"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "imp.log:test\n"); + assert_eq!(lines, "./imp.log:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/68 @@ -1535,7 +1540,7 @@ clean!(feature_68_no_ignore_vcs, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--no-ignore-vcs"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:test\n"); + assert_eq!(lines, "./foo:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/70 @@ -1545,8 +1550,8 @@ sherlock!(feature_70_smart_case, "sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -1557,7 +1562,7 @@ sherlock!(feature_89_files_with_matches, "Sherlock", ".", cmd.arg("--null").arg("--files-with-matches"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x00"); + assert_eq!(lines, "./sherlock\x00"); }); // See: https://github.com/BurntSushi/ripgrep/issues/89 @@ -1567,7 +1572,7 @@ sherlock!(feature_89_files_without_matches, "Sherlock", ".", cmd.arg("--null").arg("--files-without-match"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py\x00"); + assert_eq!(lines, "./file.py\x00"); }); // See: https://github.com/BurntSushi/ripgrep/issues/89 @@ -1576,7 +1581,7 @@ sherlock!(feature_89_count, "Sherlock", ".", cmd.arg("--null").arg("--count"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x002\n"); + assert_eq!(lines, "./sherlock\x002\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/89 @@ -1585,7 +1590,7 @@ sherlock!(feature_89_files, "NADA", ".", cmd.arg("--null").arg("--files"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x00"); + assert_eq!(lines, "./sherlock\x00"); }); // See: https://github.com/BurntSushi/ripgrep/issues/89 @@ -1595,10 +1600,10 @@ sherlock!(feature_89_match, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock\x00For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock\x00Holmeses, success in the province of detective work must always -sherlock\x00be, to a very large extent, the result of luck. Sherlock Holmes -sherlock\x00can extract a clew from a wisp of straw or a flake of cigar ash; +./sherlock\x00For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock\x00Holmeses, success in the province of detective work must always +./sherlock\x00be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock\x00can extract a clew from a wisp of straw or a flake of cigar ash; "; assert_eq!(lines, expected); }); @@ -1613,7 +1618,7 @@ clean!(feature_109_max_depth, "far", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--maxdepth").arg("2"); let lines: String = wd.stdout(&mut cmd); - let expected = path("one/pass:far\n"); + let expected = path("./one/pass:far\n"); assert_eq!(lines, expected); }); @@ -1639,7 +1644,7 @@ clean!(feature_129_matches, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-M26"); let lines: String = wd.stdout(&mut cmd); - let expected = "foo:test\nfoo:[Omitted long line with 2 matches]\n"; + let expected = "./foo:test\n./foo:[Omitted long matching line]\n"; assert_eq!(lines, expected); }); @@ -1649,7 +1654,7 @@ clean!(feature_129_context, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-M20").arg("-C1"); let lines: String = wd.stdout(&mut cmd); - let expected = "foo:test\nfoo-[Omitted long context line]\n"; + let expected = "./foo:test\n./foo-[Omitted long context line]\n"; assert_eq!(lines, expected); }); @@ -1659,7 +1664,7 @@ clean!(feature_129_replace, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-M26").arg("-rfoo"); let lines: String = wd.stdout(&mut cmd); - let expected = "foo:foo\nfoo:[Omitted long line with 2 replacements]\n"; + let expected = "./foo:foo\n./foo:[Omitted long line with 2 matches]\n"; assert_eq!(lines, expected); }); @@ -1668,7 +1673,7 @@ clean!(feature_159_works, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create("foo", "test\ntest"); cmd.arg("-m1"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:test\n"); + assert_eq!(lines, "./foo:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/159 @@ -1684,7 +1689,7 @@ clean!(feature_243_column_line, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--column"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:1:1:test\n"); + assert_eq!(lines, "./foo:1:1:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/263 @@ -1696,7 +1701,7 @@ clean!(feature_263_sort_files, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--sort-files"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "abc:test\nbar:test\nfoo:test\nzoo:test\n"); + assert_eq!(lines, "./abc:test\n./bar:test\n./foo:test\n./zoo:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/275 @@ -1706,7 +1711,7 @@ clean!(feature_275_pathsep, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--path-separator").arg("Z"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "fooZbar:test\n"); + assert_eq!(lines, ".ZfooZbar:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/362 @@ -1746,7 +1751,7 @@ sherlock!(feature_419_zero_as_shortcut_for_null, "Sherlock", ".", cmd.arg("-0").arg("--count"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x002\n"); + assert_eq!(lines, "./sherlock\x002\n"); }); #[test] @@ -1932,59 +1937,52 @@ fn feature_411_parallel_search_stats() { assert_eq!(lines.contains("seconds"), true); } -sherlock!(feature_411_ignore_stats_1, |wd: WorkDir, mut cmd: Command| { - cmd.arg("--files-with-matches"); - cmd.arg("--stats"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines.contains("seconds"), false); -}); - -sherlock!(feature_411_ignore_stats_2, |wd: WorkDir, mut cmd: Command| { - cmd.arg("--files-without-match"); - cmd.arg("--stats"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines.contains("seconds"), false); -}); - #[test] fn feature_740_passthru() { let wd = WorkDir::new("feature_740"); wd.create("file", "\nfoo\nbar\nfoobar\n\nbaz\n"); - wd.create("patterns", "foo\n\nbar\n"); + wd.create("patterns", "foo\nbar\n"); // We can't assume that the way colour specs are translated to ANSI // sequences will remain stable, and --replace doesn't currently work with // pass-through, so for now we don't actually test the match sub-strings let common_args = &["-n", "--passthru"]; - let expected = "\ -1: + let foo_expected = "\ +1- 2:foo -3:bar +3-bar 4:foobar -5: -6:baz +5- +6-baz "; // With single pattern let mut cmd = wd.command(); cmd.args(common_args).arg("foo").arg("file"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, expected); + assert_eq!(lines, foo_expected); + + let foo_bar_expected = "\ +1- +2:foo +3:bar +4:foobar +5- +6-baz +"; // With multiple -e patterns let mut cmd = wd.command(); cmd.args(common_args) .arg("-e").arg("foo").arg("-e").arg("bar").arg("file"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, expected); + assert_eq!(lines, foo_bar_expected); // With multiple -f patterns let mut cmd = wd.command(); cmd.args(common_args).arg("-f").arg("patterns").arg("file"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, expected); + assert_eq!(lines, foo_bar_expected); // -c should override let mut cmd = wd.command(); @@ -1992,15 +1990,35 @@ fn feature_740_passthru() { let lines: String = wd.stdout(&mut cmd); assert_eq!(lines, "2\n"); + let only_foo_expected = "\ +1- +2:foo +3-bar +4:foo +5- +6-baz +"; + // -o should conflict let mut cmd = wd.command(); cmd.args(common_args).arg("-o").arg("foo").arg("file"); - wd.assert_err(&mut cmd); + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines, only_foo_expected); + + let replace_foo_expected = "\ +1- +2:wat +3-bar +4:watbar +5- +6-baz +"; // -r should conflict let mut cmd = wd.command(); - cmd.args(common_args).arg("-r").arg("$0").arg("foo").arg("file"); - wd.assert_err(&mut cmd); + cmd.args(common_args).arg("-r").arg("wat").arg("foo").arg("file"); + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines, replace_foo_expected); } #[test] @@ -2081,7 +2099,7 @@ fn regression_270() { let mut cmd = wd.command(); cmd.arg("-e").arg("-test").arg("./"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, path("foo:-test\n")); + assert_eq!(lines, path("./foo:-test\n")); } // See: https://github.com/BurntSushi/ripgrep/issues/391 @@ -2232,8 +2250,8 @@ fn regression_693_context_option_in_contextless_mode() { let lines: String = wd.stdout(&mut cmd); let expected = "\ -bar:1 -foo:1 +./bar:1 +./foo:1 "; assert_eq!(lines, expected); }