From bb110c1ebeeda452046830b3991f705f5759da92 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Fri, 3 Aug 2018 17:26:22 -0400 Subject: [PATCH] ripgrep: migrate to libripgrep This commit does the work to delete the old `grep` crate and effectively rewrite most of ripgrep core to use the new libripgrep crates. The new `grep` crate is now a facade that collects the various crates that make up libripgrep. The most complex part of ripgrep core is now arguably the translation between command line parameters and the library options, which is ultimately where we want to be. --- .travis.yml | 2 + Cargo.lock | 39 +- Cargo.toml | 26 +- FAQ.md | 36 +- README.md | 118 +- appveyor.yml | 25 +- build.rs | 77 ++ ci/before_deploy.sh | 6 +- ci/script.sh | 8 +- ci/utils.sh | 7 - complete/_rg | 25 + globset/README.md | 2 +- globset/src/lib.rs | 1 - grep/Cargo.toml | 21 +- grep/README.md | 41 +- grep/examples/simplegrep.rs | 107 ++ grep/src/lib.rs | 98 +- grep/src/literals.rs | 274 ----- grep/src/nonl.rs | 74 -- grep/src/search.rs | 317 ----- grep/src/smart_case.rs | 191 --- grep/src/word_boundary.rs | 53 - grep2/COPYING | 3 - grep2/Cargo.toml | 23 - grep2/LICENSE-MIT | 21 - grep2/README.md | 4 - grep2/UNLICENSE | 24 - grep2/src/lib.rs | 10 - ignore/Cargo.toml | 2 +- ignore/README.md | 2 +- ignore/examples/walk.rs | 5 - src/app.rs | 381 +++++- src/args.rs | 2256 ++++++++++++++++++++--------------- src/config.rs | 19 +- src/logger.rs | 25 +- src/main.rs | 586 ++++----- src/messages.rs | 50 + src/path_printer.rs | 101 ++ src/pathutil.rs | 42 - src/preprocessor.rs | 15 +- src/printer.rs | 928 -------------- src/search.rs | 408 +++++++ src/search_buffer.rs | 424 ------- src/search_stream.rs | 1466 ----------------------- src/subject.rs | 230 ++++ src/worker.rs | 413 ------- tests/tests.rs | 336 +++--- 47 files changed, 3302 insertions(+), 6020 deletions(-) create mode 100644 grep/examples/simplegrep.rs delete mode 100644 grep/src/literals.rs delete mode 100644 grep/src/nonl.rs delete mode 100644 grep/src/search.rs delete mode 100644 grep/src/smart_case.rs delete mode 100644 grep/src/word_boundary.rs delete mode 100644 grep2/COPYING delete mode 100644 grep2/Cargo.toml delete mode 100644 grep2/LICENSE-MIT delete mode 100644 grep2/README.md delete mode 100644 grep2/UNLICENSE delete mode 100644 grep2/src/lib.rs create mode 100644 src/messages.rs create mode 100644 src/path_printer.rs delete mode 100644 src/pathutil.rs delete mode 100644 src/printer.rs create mode 100644 src/search.rs delete mode 100644 src/search_buffer.rs delete mode 100644 src/search_stream.rs create mode 100644 src/subject.rs delete mode 100644 src/worker.rs diff --git a/.travis.yml b/.travis.yml index 5fc57d60..d47249a0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,6 +17,8 @@ addons: # Needed for testing decompression search. - xz-utils - liblz4-tool + # For building MUSL static builds on Linux. + - musl-tools matrix: fast_finish: true include: diff --git a/Cargo.lock b/Cargo.lock index ff324797..57d2975b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -139,12 +139,16 @@ dependencies = [ [[package]] name = "grep" -version = "0.1.9" +version = "0.2.0" dependencies = [ - "log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "regex-syntax 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", + "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", + "grep-matcher 0.0.1", + "grep-pcre2 0.0.1", + "grep-printer 0.0.1", + "grep-regex 0.0.1", + "grep-searcher 0.0.1", + "termcolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -204,16 +208,6 @@ dependencies = [ "regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "grep2" -version = "0.1.8" -dependencies = [ - "grep-matcher 0.0.1", - "grep-printer 0.0.1", - "grep-regex 0.0.1", - "grep-searcher 0.0.1", -] - [[package]] name = "ignore" version = "0.4.3" @@ -227,7 +221,7 @@ dependencies = [ "same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "tempdir 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", "thread_local 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", - "walkdir 2.1.4 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -378,21 +372,16 @@ name = "ripgrep" version = "0.9.0" dependencies = [ "atty 0.2.11 (registry+https://github.com/rust-lang/crates.io-index)", - "bytecount 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.32.0 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding_rs 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)", - "encoding_rs_io 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "globset 0.4.1", - "grep 0.1.9", + "grep 0.2.0", "ignore 0.4.3", "lazy_static 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", - "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", - "memchr 2.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", "num_cpus 1.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_json 1.0.24 (registry+https://github.com/rust-lang/crates.io-index)", "termcolor 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -520,7 +509,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "walkdir" -version = "2.1.4" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "same-file 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -608,7 +597,7 @@ dependencies = [ "checksum unicode-width 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "882386231c45df4700b275c7ff55b6f3698780a650026380e72dabe76fa46526" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" -"checksum walkdir 2.1.4 (registry+https://github.com/rust-lang/crates.io-index)" = "63636bd0eb3d00ccb8b9036381b526efac53caf112b7783b730ab3f8e44da369" +"checksum walkdir 2.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f1b768ba943161a9226ccd59b26bcd901e5d60e6061f4fcad3034784e0c7372b" "checksum winapi 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "773ef9dcc5f24b7d850d0ff101e542ff24c3b090a9768e03ff889fdef41f00fd" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index ec6ee1b5..74648607 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -35,7 +35,6 @@ path = "tests/tests.rs" members = [ "globset", "grep", - "grep2", "grep-matcher", "grep-pcre2", "grep-printer", @@ -46,20 +45,15 @@ members = [ [dependencies] atty = "0.2.11" -bytecount = "0.3.2" -encoding_rs = "0.8" -encoding_rs_io = "0.1" globset = { version = "0.4.0", path = "globset" } -grep = { version = "0.1.8", path = "grep" } +grep = { version = "0.2.0", path = "grep" } ignore = { version = "0.4.0", path = "ignore" } lazy_static = "1" -libc = "0.2" log = "0.4" -memchr = "2" -memmap = "0.6" num_cpus = "1" regex = "1" same-file = "1" +serde_json = "1" termcolor = "1" [dependencies.clap] @@ -69,7 +63,7 @@ features = ["suggestions", "color"] [target.'cfg(windows)'.dependencies.winapi] version = "0.3" -features = ["std", "winnt"] +features = ["std", "fileapi", "winnt"] [build-dependencies] lazy_static = "1" @@ -80,15 +74,9 @@ default-features = false features = ["suggestions", "color"] [features] -avx-accel = [ - "bytecount/avx-accel", - "grep2/avx-accel", -] -simd-accel = [ - "bytecount/simd-accel", - "encoding_rs/simd-accel", - "grep2/simd-accel", -] +avx-accel = ["grep/avx-accel"] +simd-accel = ["grep/simd-accel"] +pcre2 = ["grep/pcre2"] [profile.release] -debug = true +debug = 1 diff --git a/FAQ.md b/FAQ.md index 868c4723..ff0bc5e5 100644 --- a/FAQ.md +++ b/FAQ.md @@ -157,13 +157,37 @@ tool. With that said, How do I use lookaround and/or backreferences? -This isn't currently possible. ripgrep uses finite automata to implement -regular expression search, and in turn, guarantees linear time searching on all -inputs. It is difficult to efficiently support lookaround and backreferences in -finite automata engines, so ripgrep does not provide these features. +ripgrep's default regex engine does not support lookaround or backreferences. +This is primarily because the default regex engine is implemented using finite +state machines in order to guarantee a linear worst case time complexity on all +inputs. Backreferences are not possible to implement in this paradigm, and +lookaround appears difficult to do efficiently. -If a production quality regular expression engine with these features is ever -written in Rust, then it is possible ripgrep will provide it as an opt-in +However, ripgrep optionally supports using PCRE2 as the regex engine instead of +the default one based on finite state machines. You can enable PCRE2 with the +`-P/--pcre2` flag. For example, in the root of the ripgrep repo, you can easily +find all palindromes: + +``` +$ rg -P '(\w{10})\1' +tests/misc.rs +483: cmd.arg("--max-filesize").arg("44444444444444444444"); +globset/src/glob.rs +1206: matches!(match7, "a*a*a*a*a*a*a*a*a", "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); +``` + +If your version of ripgrep doesn't support PCRE2, then you'll get an error +message when you try to use the `-P/--pcre2` flag: + +``` +$ rg -P '(\w{10})\1' +PCRE2 is not available in this build of ripgrep +``` + +Most of the releases distributed by the ripgrep project here on GitHub will +come bundled with PCRE2 enabled. If you installed ripgrep through a different +means (like your system's package manager), then please reach out to the +maintainer of that package to see whether it's possible to enable the PCRE2 feature. diff --git a/README.md b/README.md index 2b7ada60..351de389 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ available for [every release](https://github.com/BurntSushi/ripgrep/releases). ripgrep is similar to other popular search tools like The Silver Searcher, ack and grep. -[![Linux build status](https://travis-ci.org/BurntSushi/ripgrep.svg?branch=master)](https://travis-ci.org/BurntSushi/ripgrep) +[![Linux build status](https://travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) [![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) [![Crates.io](https://img.shields.io/crates/v/ripgrep.svg)](https://crates.io/crates/ripgrep) @@ -85,14 +85,16 @@ increases the times to `2.640s` for ripgrep and `10.277s` for GNU grep. ### Why should I use ripgrep? -* It can replace many use cases served by both The Silver Searcher and GNU grep - because it is generally faster than both. (See [the FAQ](FAQ.md#posix4ever) - for more details on whether ripgrep can truly replace grep.) -* Like The Silver Searcher, ripgrep defaults to recursive directory search - and won't search files ignored by your `.gitignore` files. It also ignores - hidden and binary files by default. ripgrep also implements full support - for `.gitignore`, whereas there are many bugs related to that functionality - in The Silver Searcher. +* It can replace many use cases served by other search tools + because it contains most of their features and is generally faster. (See + [the FAQ](FAQ.md#posix4ever) for more details on whether ripgrep can truly + replace grep.) +* Like other tools specialized to code search, ripgrep defaults to recursive + directory search and won't search files ignored by your `.gitignore` files. + It also ignores hidden and binary files by default. ripgrep also implements + full support for `.gitignore`, whereas there are many bugs related to that + functionality in other code search tools claiming to provide the same + functionality. * ripgrep can search specific types of files. For example, `rg -tpy foo` limits your search to Python files and `rg -Tjs foo` excludes Javascript files from your search. ripgrep can be taught about new file types with @@ -117,22 +119,24 @@ bugs, and Unicode support. ### Why shouldn't I use ripgrep? -I'd like to try to convince you why you *shouldn't* use ripgrep. This should -give you a glimpse at some important downsides or missing features of -ripgrep. +Despite initially not wanting to add every feature under the sun to ripgrep, +over time, ripgrep has grown support for most features found in other file +searching tools. This includes searching for results spanning across multiple +lines, and opt-in support for PCRE2, which provides look-around and +backreference support. -* ripgrep uses a regex engine based on finite automata, so if you want fancy - regex features such as backreferences or lookaround, ripgrep won't provide - them to you. ripgrep does support lots of things though, including, but not - limited to: lazy quantification (e.g., `a+?`), repetitions (e.g., `a{2,5}`), - begin/end assertions (e.g., `^\w+$`), word boundaries (e.g., `\bfoo\b`), and - support for Unicode categories (e.g., `\p{Sc}` to match currency symbols or - `\p{Lu}` to match any uppercase letter). (Fancier regexes will never be - supported.) -* ripgrep doesn't have multiline search. (Will happen as an opt-in feature.) +At this point, the primary reasons not to use ripgrep probably consist of one +or more of the following: -In other words, if you like fancy regexes or multiline search, then ripgrep -may not quite meet your needs (yet). +* You need a portable and ubiquitous tool. While ripgrep works on Windows, + macOS and Linux, it is not ubiquitous and it does not conform to any + standard such as POSIX. The best tool for this job is good old grep. +* There still exists some other minor feature (or bug) found in another tool + that isn't in ripgrep. +* There is a performance edge case where ripgrep doesn't do well where another + tool does do well. (Please file a bug report!) +* ripgrep isn't possible to install on your machine or isn't available for your + platform. (Please file a bug report!) ### Is it really faster than everything else? @@ -145,7 +149,8 @@ Summarizing, ripgrep is fast because: * It is built on top of [Rust's regex engine](https://github.com/rust-lang-nursery/regex). Rust's regex engine uses finite automata, SIMD and aggressive literal - optimizations to make searching very fast. + optimizations to make searching very fast. (PCRE2 support can be opted into + with the `-P/--pcre2` flag.) * Rust's regex library maintains performance with full Unicode support by building UTF-8 decoding directly into its deterministic finite automaton engine. @@ -168,6 +173,11 @@ Andy Lester, author of [ack](https://beyondgrep.com/), has published an excellent table comparing the features of ack, ag, git-grep, GNU grep and ripgrep: https://beyondgrep.com/feature-comparison/ +Note that ripgrep has grown a few significant new features recently that +are not yet present in Andy's table. This includes, but is not limited to, +configuration files, passthru, support for searching compressed files, +multiline search and opt-in fancy regex support via PCRE2. + ### Installation @@ -207,13 +217,15 @@ If you're a **MacPorts** user, then you can install ripgrep from the $ sudo port install ripgrep ``` -If you're a **Windows Chocolatey** user, then you can install ripgrep from the [official repo](https://chocolatey.org/packages/ripgrep): +If you're a **Windows Chocolatey** user, then you can install ripgrep from the +[official repo](https://chocolatey.org/packages/ripgrep): ``` $ choco install ripgrep ``` -If you're a **Windows Scoop** user, then you can install ripgrep from the [official bucket](https://github.com/lukesampson/scoop/blob/master/bucket/ripgrep.json): +If you're a **Windows Scoop** user, then you can install ripgrep from the +[official bucket](https://github.com/lukesampson/scoop/blob/master/bucket/ripgrep.json): ``` $ scoop install ripgrep @@ -225,32 +237,37 @@ If you're an **Arch Linux** user, then you can install ripgrep from the official $ pacman -S ripgrep ``` -If you're a **Gentoo** user, you can install ripgrep from the [official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep): +If you're a **Gentoo** user, you can install ripgrep from the +[official repo](https://packages.gentoo.org/packages/sys-apps/ripgrep): ``` $ emerge sys-apps/ripgrep ``` -If you're a **Fedora 27+** user, you can install ripgrep from official repositories. +If you're a **Fedora 27+** user, you can install ripgrep from official +repositories. ``` $ sudo dnf install ripgrep ``` -If you're a **Fedora 24+** user, you can install ripgrep from [copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): +If you're a **Fedora 24+** user, you can install ripgrep from +[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): ``` $ sudo dnf copr enable carlwgeorge/ripgrep $ sudo dnf install ripgrep ``` -If you're an **openSUSE Tumbleweed** user, you can install ripgrep from the [official repo](http://software.opensuse.org/package/ripgrep): +If you're an **openSUSE Tumbleweed** user, you can install ripgrep from the +[official repo](http://software.opensuse.org/package/ripgrep): ``` $ sudo zypper install ripgrep ``` -If you're a **RHEL/CentOS 7** user, you can install ripgrep from [copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): +If you're a **RHEL/CentOS 7** user, you can install ripgrep from +[copr](https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/): ``` $ sudo yum-config-manager --add-repo=https://copr.fedorainfracloud.org/coprs/carlwgeorge/ripgrep/repo/epel-7/carlwgeorge-ripgrep-epel-7.repo @@ -286,25 +303,29 @@ seem to work right and generate a number of very strange bug reports that I don't know how to fix and don't have the time to fix. Therefore, it is no longer a recommended installation option.) -If you're a **FreeBSD** user, then you can install ripgrep from the [official ports](https://www.freshports.org/textproc/ripgrep/): +If you're a **FreeBSD** user, then you can install ripgrep from the +[official ports](https://www.freshports.org/textproc/ripgrep/): ``` # pkg install ripgrep ``` -If you're an **OpenBSD** user, then you can install ripgrep from the [official ports](http://openports.se/textproc/ripgrep): +If you're an **OpenBSD** user, then you can install ripgrep from the +[official ports](http://openports.se/textproc/ripgrep): ``` $ doas pkg_add ripgrep ``` -If you're a **NetBSD** user, then you can install ripgrep from [pkgsrc](http://pkgsrc.se/textproc/ripgrep): +If you're a **NetBSD** user, then you can install ripgrep from +[pkgsrc](http://pkgsrc.se/textproc/ripgrep): ``` # pkgin install ripgrep ``` If you're a **Rust programmer**, ripgrep can be installed with `cargo`. + * Note that the minimum supported version of Rust for ripgrep is **1.23.0**, although ripgrep may work with older versions. * Note that the binary may be bigger than expected because it contains debug @@ -353,6 +374,35 @@ are not necessary to get SIMD optimizations for search; those are enabled automatically. Hopefully, some day, the `simd-accel` and `avx-accel` features will similarly become unnecessary. +Finally, optional PCRE2 support can be built with ripgrep by enabling the +`pcre2` feature: + +``` +$ cargo build --release --features 'pcre2' +``` + +(Tip: use `--features 'pcre2 simd-accel avx-accel'` to also include compile +time SIMD optimizations.) + +Enabling the PCRE2 feature will attempt to automatically find and link with +your system's PCRE2 library via `pkg-config`. If one doesn't exist, then +ripgrep will build PCRE2 from source using your system's C compiler and then +statically link it into the final executable. Static linking can be forced even +when there is an available PCRE2 system library by either building ripgrep with +the MUSL target or by setting `PCRE2_SYS_STATIC=1`. + +ripgrep can be built with the MUSL target on Linux by first installing the MUSL +library on your system (consult your friendly neighborhood package manager). +Then you just need to add MUSL support to your Rust toolchain and rebuild +ripgrep, which yields a fully static executable: + +``` +$ rustup target add x86_64-unknown-linux-musl +$ cargo build --release --target x86_64-unknown-linux-musl +``` + +Applying the `--features` flag from above works as expected. + ### Running tests diff --git a/appveyor.yml b/appveyor.yml index 26daf224..bea157cf 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -1,8 +1,6 @@ -# Inspired from https://github.com/habitat-sh/habitat/blob/master/appveyor.yml cache: - c:\cargo\registry - c:\cargo\git - - c:\projects\ripgrep\target init: - mkdir c:\cargo @@ -19,14 +17,20 @@ environment: PROJECT_NAME: ripgrep RUST_BACKTRACE: full matrix: - - TARGET: i686-pc-windows-gnu - CHANNEL: stable - - TARGET: i686-pc-windows-msvc - CHANNEL: stable - TARGET: x86_64-pc-windows-gnu CHANNEL: stable + BITS: 64 + MSYS2: 1 - TARGET: x86_64-pc-windows-msvc CHANNEL: stable + BITS: 64 + - TARGET: i686-pc-windows-gnu + CHANNEL: stable + BITS: 32 + MSYS2: 1 + - TARGET: i686-pc-windows-msvc + CHANNEL: stable + BITS: 32 matrix: fast_finish: true @@ -35,8 +39,9 @@ matrix: # (Based on from https://github.com/rust-lang/libc/blob/master/appveyor.yml) install: - curl -sSf -o rustup-init.exe https://win.rustup.rs/ - - rustup-init.exe -y --default-host %TARGET% --no-modify-path - - if defined MSYS2_BITS set PATH=%PATH%;C:\msys64\mingw%MSYS2_BITS%\bin + - rustup-init.exe -y --default-host %TARGET% + - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin + - if defined MSYS2 set PATH=C:\msys64\mingw%BITS%\bin;%PATH% - rustc -V - cargo -V @@ -46,11 +51,11 @@ build: false # Equivalent to Travis' `script` phase # TODO modify this phase as you see fit test_script: - - cargo test --verbose --all + - cargo test --verbose --all --features pcre2 before_deploy: # Generate artifacts for release - - cargo build --release + - cargo build --release --features pcre2 - mkdir staging - copy target\release\rg.exe staging - ps: copy target\release\build\ripgrep-*\out\_rg.ps1 staging diff --git a/build.rs b/build.rs index b7f26f17..638f7646 100644 --- a/build.rs +++ b/build.rs @@ -4,6 +4,7 @@ extern crate clap; extern crate lazy_static; use std::env; +use std::ffi::OsString; use std::fs::{self, File}; use std::io::{self, Read, Write}; use std::path::Path; @@ -18,6 +19,22 @@ use app::{RGArg, RGArgKind}; mod app; fn main() { + // If our version of Rust has runtime SIMD detection, then set a cfg so + // we know we can test for it. We use this when generating ripgrep's + // --version output. + let version = rustc_version(); + let parsed = match Version::parse(&version) { + Ok(parsed) => parsed, + Err(err) => { + eprintln!("failed to parse `rustc --version`: {}", err); + return; + } + }; + let minimum = Version { major: 1, minor: 27, patch: 0 }; + if version.contains("nightly") || parsed >= minimum { + println!("cargo:rustc-cfg=ripgrep_runtime_cpu"); + } + // OUT_DIR is set by Cargo and it's where any additional build artifacts // are written. let outdir = match env::var_os("OUT_DIR") { @@ -182,3 +199,63 @@ fn formatted_doc_txt(arg: &RGArg) -> io::Result { fn ioerr(msg: String) -> io::Error { io::Error::new(io::ErrorKind::Other, msg) } + +fn rustc_version() -> String { + let rustc = env::var_os("RUSTC").unwrap_or(OsString::from("rustc")); + let output = process::Command::new(&rustc) + .arg("--version") + .output() + .unwrap() + .stdout; + String::from_utf8(output).unwrap() +} + +#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord)] +struct Version { + major: u32, + minor: u32, + patch: u32, +} + +impl Version { + fn parse(mut s: &str) -> Result { + if !s.starts_with("rustc ") { + return Err(format!("unrecognized version string: {}", s)); + } + s = &s["rustc ".len()..]; + + let parts: Vec<&str> = s.split(".").collect(); + if parts.len() < 3 { + return Err(format!("not enough version parts: {:?}", parts)); + } + + let mut num = String::new(); + for c in parts[0].chars() { + if !c.is_digit(10) { + break; + } + num.push(c); + } + let major = num.parse::().map_err(|e| e.to_string())?; + + num.clear(); + for c in parts[1].chars() { + if !c.is_digit(10) { + break; + } + num.push(c); + } + let minor = num.parse::().map_err(|e| e.to_string())?; + + num.clear(); + for c in parts[2].chars() { + if !c.is_digit(10) { + break; + } + num.push(c); + } + let patch = num.parse::().map_err(|e| e.to_string())?; + + Ok(Version { major, minor, patch }) + } +} diff --git a/ci/before_deploy.sh b/ci/before_deploy.sh index 7ee824ec..68f80bdf 100755 --- a/ci/before_deploy.sh +++ b/ci/before_deploy.sh @@ -8,7 +8,11 @@ set -ex # Generate artifacts for release mk_artifacts() { - cargo build --target "$TARGET" --release + if is_arm; then + cargo build --target "$TARGET" --release + else + cargo build --target "$TARGET" --release --features 'pcre2' + fi } mk_tarball() { diff --git a/ci/script.sh b/ci/script.sh index f513bb12..d1799e29 100755 --- a/ci/script.sh +++ b/ci/script.sh @@ -8,7 +8,11 @@ set -ex main() { # Test a normal debug build. - cargo build --target "$TARGET" --verbose --all + if is_arm; then + cargo build --target "$TARGET" --verbose + else + cargo build --target "$TARGET" --verbose --all --features 'pcre2' + fi # Show the output of the most recent build.rs stderr. set +x @@ -40,7 +44,7 @@ main() { "$(dirname "${0}")/test_complete.sh" # Run tests for ripgrep and all sub-crates. - cargo test --target "$TARGET" --verbose --all + cargo test --target "$TARGET" --verbose --all --features 'pcre2' } main diff --git a/ci/utils.sh b/ci/utils.sh index 2fb7fadb..1cf2b6dc 100644 --- a/ci/utils.sh +++ b/ci/utils.sh @@ -55,13 +55,6 @@ gcc_prefix() { esac } -is_ssse3_target() { - case "$(architecture)" in - amd64) return 0 ;; - *) return 1 ;; - esac -} - is_x86() { case "$(architecture)" in amd64|i386) return 0 ;; diff --git a/complete/_rg b/complete/_rg index 4342e8d2..7e17b93d 100644 --- a/complete/_rg +++ b/complete/_rg @@ -111,10 +111,18 @@ _rg() { "--no-ignore-vcs[don't respect version control ignore files]" $no'--ignore-vcs[respect version control ignore files]' + + '(json)' # json options + '--json[output results in a JSON Lines format]' + $no"--no-json[output results in the standard format]" + + '(line)' # Line-number options {-n,--line-number}'[show line numbers for matches]' {-N,--no-line-number}"[don't show line numbers for matches]" + + '(line terminator)' # line terminator options + '--crlf[use CRLF as a line terminator]' + $no"--no-crlf[do not use CRLF as a line terminator]" + + '(max-depth)' # Directory-depth options '--max-depth=[specify max number of directories to descend]:number of directories' '!--maxdepth=:number of directories' @@ -131,6 +139,11 @@ _rg() { '--mmap[search using memory maps when possible]' "--no-mmap[don't search using memory maps]" + + '(multiline)' # multiline options + {-U,--multiline}'[permit matching across multiple lines]' + $no"--no-multiline[restrict matches to at most one line each]" + '--multiline-dotall[make "." match newline in multiline mode]' + + '(only)' # Only-match options '(passthru replace)'{-o,--only-matching}'[show only matching part of each line]' @@ -138,6 +151,12 @@ _rg() { '(--vimgrep count only replace)--passthru[show both matching and non-matching lines]' '!(--vimgrep count only replace)--passthrough' + + '(pcre2)' # PCRE2 options + {-P,--pcre2}'[Enable matching with PCRE2]' + $no"--no-pcre2[don't use PCRE2]" + "--pcre2-unicode[Enable PCRE2 Unicode mode]" + $no"--pcre2-unicode[Disable PCRE2 Unicode mode]" + + '(pre)' # Preprocessing options '(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e' $no'--no-pre[disable preprocessor utility]' @@ -158,6 +177,7 @@ _rg() { + stats # Statistics options '(--files file-match)--stats[show search statistics]' + $no"--no-stats[don't show search statistics]" + '(text)' # Binary-search options {-a,--text}'[search binary files as if they were text]' @@ -166,6 +186,10 @@ _rg() { + '(threads)' # Thread-count options '(--sort-files)'{-j+,--threads=}'[specify approximate number of threads to use]:number of threads' + + '(trim)' # trim options + '--trim[trim any ASCII whitespace prefix from each line]' + $no"--no-trim[don't trim ASCII whitespace prefix from each line]" + + type # Type options '*'{-t+,--type=}'[only search files matching specified type]: :_rg_types' '*--type-add=[add new glob for specified file type]: :->typespec' @@ -203,6 +227,7 @@ _rg() { '--max-filesize=[specify size above which files should be ignored]:file size (bytes)' "--no-config[don't load configuration files]" '(-0 --null)'{-0,--null}'[print NUL byte after file names]' + '--null-data[use NUL as a line terminator]' '--path-separator=[specify path separator to use when printing file names]:separator' '(-q --quiet)'{-q,--quiet}'[suppress normal output]' '--regex-size-limit=[specify upper size limit of compiled regex]:regex size (bytes)' diff --git a/globset/README.md b/globset/README.md index f5caf22a..5d54172a 100644 --- a/globset/README.md +++ b/globset/README.md @@ -4,7 +4,7 @@ Cross platform single glob and glob set matching. Glob set matching is the process of matching one or more glob patterns against a single candidate path simultaneously, and returning all of the globs that matched. -[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.png)](https://travis-ci.org/BurntSushi/ripgrep) +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) [![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) [![](https://img.shields.io/crates/v/globset.svg)](https://crates.io/crates/globset) diff --git a/globset/src/lib.rs b/globset/src/lib.rs index 50c92e42..8d26e187 100644 --- a/globset/src/lib.rs +++ b/globset/src/lib.rs @@ -470,7 +470,6 @@ impl GlobSetBuilder { } /// Add a new pattern to this set. - #[allow(dead_code)] pub fn add(&mut self, pat: Glob) -> &mut GlobSetBuilder { self.pats.push(pat); self diff --git a/grep/Cargo.toml b/grep/Cargo.toml index 562bde1e..e6e2fc07 100644 --- a/grep/Cargo.toml +++ b/grep/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "grep" -version = "0.1.9" #:version +version = "0.2.0" #:version authors = ["Andrew Gallant "] description = """ Fast line oriented regex searching as a library. @@ -13,7 +13,18 @@ keywords = ["regex", "grep", "egrep", "search", "pattern"] license = "Unlicense/MIT" [dependencies] -log = "0.4" -memchr = "2" -regex = "1" -regex-syntax = "0.6" +grep-matcher = { version = "0.0.1", path = "../grep-matcher" } +grep-pcre2 = { version = "0.0.1", path = "../grep-pcre2", optional = true } +grep-printer = { version = "0.0.1", path = "../grep-printer" } +grep-regex = { version = "0.0.1", path = "../grep-regex" } +grep-searcher = { version = "0.0.1", path = "../grep-searcher" } + +[dev-dependencies] +atty = "0.2.11" +termcolor = "1" +walkdir = "2.2.0" + +[features] +avx-accel = ["grep-searcher/avx-accel"] +simd-accel = ["grep-searcher/simd-accel"] +pcre2 = ["grep-pcre2"] diff --git a/grep/README.md b/grep/README.md index 86cc8c2c..c376d8af 100644 --- a/grep/README.md +++ b/grep/README.md @@ -1,4 +1,41 @@ grep ---- -This is a *library* that provides grep-style line-by-line regex searching (with -comparable performance to `grep` itself). +ripgrep, as a library. + +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) +[![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) +[![](https://img.shields.io/crates/v/grep.svg)](https://crates.io/crates/grep) + +Dual-licensed under MIT or the [UNLICENSE](http://unlicense.org). + + +### Documentation + +[https://docs.rs/grep](https://docs.rs/grep) + +NOTE: This crate isn't ready for wide use yet. Ambitious individuals can +probably piece together the parts, but there is no high level documentation +describing how all of the pieces fit together. + + +### Usage + +Add this to your `Cargo.toml`: + +```toml +[dependencies] +grep = "0.2" +``` + +and this to your crate root: + +```rust +extern crate grep; +``` + + +### Features + +This crate provides a `pcre2` feature (disabled by default) which, when +enabled, re-exports the `grep-pcre2` crate as an alternative `Matcher` +implementation to the standard `grep-regex` implementation. diff --git a/grep/examples/simplegrep.rs b/grep/examples/simplegrep.rs new file mode 100644 index 00000000..fb2d4001 --- /dev/null +++ b/grep/examples/simplegrep.rs @@ -0,0 +1,107 @@ +extern crate atty; +extern crate grep; +extern crate termcolor; +extern crate walkdir; + +use std::env; +use std::error; +use std::ffi::OsString; +use std::path::Path; +use std::process; +use std::result; + +use grep::printer::{ColorSpecs, StandardBuilder}; +use grep::regex::RegexMatcher; +use grep::searcher::{BinaryDetection, SearcherBuilder}; +use termcolor::{ColorChoice, StandardStream}; +use walkdir::WalkDir; + +macro_rules! fail { + ($($tt:tt)*) => { + return Err(From::from(format!($($tt)*))); + } +} + +type Result = result::Result>; + +fn main() { + if let Err(err) = try_main() { + eprintln!("{}", err); + process::exit(1); + } +} + +fn try_main() -> Result<()> { + let mut args: Vec = env::args_os().collect(); + if args.len() < 2 { + fail!("Usage: simplegrep [ ...]"); + } + if args.len() == 2 { + args.push(OsString::from("./")); + } + let pattern = match args[1].clone().into_string() { + Ok(pattern) => pattern, + Err(_) => { + fail!( + "pattern is not valid UTF-8: '{:?}'", + args[1].to_string_lossy() + ); + } + }; + search(&pattern, &args[2..]) +} + +fn search(pattern: &str, paths: &[OsString]) -> Result<()> { + let matcher = RegexMatcher::new_line_matcher(&pattern)?; + let mut searcher = SearcherBuilder::new() + .binary_detection(BinaryDetection::quit(b'\x00')) + .build(); + let mut printer = StandardBuilder::new() + .color_specs(colors()) + .build(StandardStream::stdout(color_choice())); + + for path in paths { + for result in WalkDir::new(path) { + let dent = match result { + Ok(dent) => dent, + Err(err) => { + eprintln!( + "{}: {}", + err.path().unwrap_or(Path::new("error")).display(), + err, + ); + continue; + } + }; + if !dent.file_type().is_file() { + continue; + } + let result = searcher.search_path( + &matcher, + dent.path(), + printer.sink_with_path(&matcher, dent.path()), + ); + if let Err(err) = result { + eprintln!("{}: {}", dent.path().display(), err); + } + } + } + Ok(()) +} + +fn color_choice() -> ColorChoice { + if atty::is(atty::Stream::Stdout) { + ColorChoice::Auto + } else { + ColorChoice::Never + } +} + +fn colors() -> ColorSpecs { + ColorSpecs::new(&[ + "path:fg:magenta".parse().unwrap(), + "line:fg:green".parse().unwrap(), + "match:fg:red".parse().unwrap(), + "match:style:bold".parse().unwrap(), + ]) +} diff --git a/grep/src/lib.rs b/grep/src/lib.rs index 023cd64a..ab0d78eb 100644 --- a/grep/src/lib.rs +++ b/grep/src/lib.rs @@ -1,84 +1,22 @@ -#![deny(missing_docs)] - /*! -A fast line oriented regex searcher. +ripgrep, as a library. + +This library is intended to provide a high level facade to the crates that +make up ripgrep's core searching routines. However, there is no high level +documentation available yet guiding users on how to fit all of the pieces +together. + +Every public API item in the constituent crates is documented, but examples +are sparse. + +A cookbook and a guide are planned. */ -#[macro_use] -extern crate log; -extern crate memchr; -extern crate regex; -extern crate regex_syntax as syntax; +#![deny(missing_docs)] -use std::error; -use std::fmt; -use std::result; - -pub use search::{Grep, GrepBuilder, Iter, Match}; - -mod literals; -mod nonl; -mod search; -mod smart_case; -mod word_boundary; - -/// Result is a convenient type alias that fixes the type of the error to -/// the `Error` type defined in this crate. -pub type Result = result::Result; - -/// Error enumerates the list of possible error conditions when building or -/// using a `Grep` line searcher. -#[derive(Debug)] -pub enum Error { - /// An error from parsing or compiling a regex. - Regex(regex::Error), - /// This error occurs when an illegal literal was found in the regex - /// pattern. For example, if the line terminator is `\n` and the regex - /// pattern is `\w+\n\w+`, then the presence of `\n` will cause this error. - LiteralNotAllowed(char), - /// An unused enum variant that indicates this enum may be expanded in - /// the future and therefore should not be exhaustively matched. - #[doc(hidden)] - __Nonexhaustive, -} - -impl error::Error for Error { - fn description(&self) -> &str { - match *self { - Error::Regex(ref err) => err.description(), - Error::LiteralNotAllowed(_) => "use of forbidden literal", - Error::__Nonexhaustive => unreachable!(), - } - } - - fn cause(&self) -> Option<&error::Error> { - match *self { - Error::Regex(ref err) => err.cause(), - _ => None, - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::Regex(ref err) => err.fmt(f), - Error::LiteralNotAllowed(chr) => { - write!(f, "Literal {:?} not allowed.", chr) - } - Error::__Nonexhaustive => unreachable!(), - } - } -} - -impl From for Error { - fn from(err: regex::Error) -> Error { - Error::Regex(err) - } -} - -impl From for Error { - fn from(err: syntax::Error) -> Error { - Error::Regex(regex::Error::Syntax(err.to_string())) - } -} +pub extern crate grep_matcher as matcher; +#[cfg(feature = "pcre2")] +pub extern crate grep_pcre2 as pcre2; +pub extern crate grep_printer as printer; +pub extern crate grep_regex as regex; +pub extern crate grep_searcher as searcher; diff --git a/grep/src/literals.rs b/grep/src/literals.rs deleted file mode 100644 index 5e3dc8ea..00000000 --- a/grep/src/literals.rs +++ /dev/null @@ -1,274 +0,0 @@ -/*! -The literals module is responsible for extracting *inner* literals out of the -AST of a regular expression. Normally this is the job of the regex engine -itself, but the regex engine doesn't look for inner literals. Since we're doing -line based searching, we can use them, so we need to do it ourselves. - -Note that this implementation is incredibly suspicious. We need something more -principled. -*/ -use std::cmp; - -use regex::bytes::RegexBuilder; -use syntax::hir::{self, Hir, HirKind}; -use syntax::hir::literal::{Literal, Literals}; - -#[derive(Clone, Debug)] -pub struct LiteralSets { - prefixes: Literals, - suffixes: Literals, - required: Literals, -} - -impl LiteralSets { - pub fn create(expr: &Hir) -> Self { - let mut required = Literals::empty(); - union_required(expr, &mut required); - LiteralSets { - prefixes: Literals::prefixes(expr), - suffixes: Literals::suffixes(expr), - required: required, - } - } - - pub fn to_regex_builder(&self) -> Option { - if self.prefixes.all_complete() && !self.prefixes.is_empty() { - debug!("literal prefixes detected: {:?}", self.prefixes); - // When this is true, the regex engine will do a literal scan. - return None; - } - - // Out of inner required literals, prefixes and suffixes, which one - // is the longest? We pick the longest to do fast literal scan under - // the assumption that a longer literal will have a lower false - // positive rate. - let pre_lcp = self.prefixes.longest_common_prefix(); - let pre_lcs = self.prefixes.longest_common_suffix(); - let suf_lcp = self.suffixes.longest_common_prefix(); - let suf_lcs = self.suffixes.longest_common_suffix(); - - let req_lits = self.required.literals(); - let req = match req_lits.iter().max_by_key(|lit| lit.len()) { - None => &[], - Some(req) => &***req, - }; - - let mut lit = pre_lcp; - if pre_lcs.len() > lit.len() { - lit = pre_lcs; - } - if suf_lcp.len() > lit.len() { - lit = suf_lcp; - } - if suf_lcs.len() > lit.len() { - lit = suf_lcs; - } - if req_lits.len() == 1 && req.len() > lit.len() { - lit = req; - } - - // Special case: if we have any literals that are all whitespace, - // then this is probably a failing of the literal detection since - // whitespace is typically pretty common. In this case, don't bother - // with inner literal scanning at all and just defer to the regex. - let any_all_white = req_lits.iter() - .any(|lit| lit.iter().all(|&b| (b as char).is_whitespace())); - if any_all_white { - return None; - } - - // Special case: if we detected an alternation of inner required - // literals and its longest literal is bigger than the longest - // prefix/suffix, then choose the alternation. In practice, this - // helps with case insensitive matching, which can generate lots of - // inner required literals. - let any_empty = req_lits.iter().any(|lit| lit.is_empty()); - if req.len() > lit.len() && req_lits.len() > 1 && !any_empty { - debug!("required literals found: {:?}", req_lits); - let alts: Vec = - req_lits.into_iter().map(|x| bytes_to_regex(x)).collect(); - let mut builder = RegexBuilder::new(&alts.join("|")); - builder.unicode(false); - Some(builder) - } else if lit.is_empty() { - None - } else { - debug!("required literal found: {:?}", show(lit)); - let mut builder = RegexBuilder::new(&bytes_to_regex(&lit)); - builder.unicode(false); - Some(builder) - } - } -} - -fn union_required(expr: &Hir, lits: &mut Literals) { - match *expr.kind() { - HirKind::Literal(hir::Literal::Unicode(c)) => { - let mut buf = [0u8; 4]; - lits.cross_add(c.encode_utf8(&mut buf).as_bytes()); - } - HirKind::Literal(hir::Literal::Byte(b)) => { - lits.cross_add(&[b]); - } - HirKind::Class(hir::Class::Unicode(ref cls)) => { - if count_unicode_class(cls) >= 5 || !lits.add_char_class(cls) { - lits.cut(); - } - } - HirKind::Class(hir::Class::Bytes(ref cls)) => { - if count_byte_class(cls) >= 5 || !lits.add_byte_class(cls) { - lits.cut(); - } - } - HirKind::Group(hir::Group { ref hir, .. }) => { - union_required(&**hir, lits); - } - HirKind::Repetition(ref x) => { - match x.kind { - hir::RepetitionKind::ZeroOrOne => lits.cut(), - hir::RepetitionKind::ZeroOrMore => lits.cut(), - hir::RepetitionKind::OneOrMore => { - union_required(&x.hir, lits); - lits.cut(); - } - hir::RepetitionKind::Range(ref rng) => { - let (min, max) = match *rng { - hir::RepetitionRange::Exactly(m) => (m, Some(m)), - hir::RepetitionRange::AtLeast(m) => (m, None), - hir::RepetitionRange::Bounded(m, n) => (m, Some(n)), - }; - repeat_range_literals( - &x.hir, min, max, x.greedy, lits, union_required); - } - } - } - HirKind::Concat(ref es) if es.is_empty() => {} - HirKind::Concat(ref es) if es.len() == 1 => { - union_required(&es[0], lits) - } - HirKind::Concat(ref es) => { - for e in es { - let mut lits2 = lits.to_empty(); - union_required(e, &mut lits2); - if lits2.is_empty() { - lits.cut(); - continue; - } - if lits2.contains_empty() { - lits.cut(); - } - if !lits.cross_product(&lits2) { - // If this expression couldn't yield any literal that - // could be extended, then we need to quit. Since we're - // short-circuiting, we also need to freeze every member. - lits.cut(); - break; - } - } - } - HirKind::Alternation(ref es) => { - alternate_literals(es, lits, union_required); - } - _ => lits.cut(), - } -} - -fn repeat_range_literals( - e: &Hir, - min: u32, - max: Option, - _greedy: bool, - lits: &mut Literals, - mut f: F, -) { - if min == 0 { - // This is a bit conservative. If `max` is set, then we could - // treat this as a finite set of alternations. For now, we - // just treat it as `e*`. - lits.cut(); - } else { - let n = cmp::min(lits.limit_size(), min as usize); - // We only extract literals from a single repetition, even though - // we could do more. e.g., `a{3}` will have `a` extracted instead of - // `aaa`. The reason is that inner literal extraction can't be unioned - // across repetitions. e.g., extracting `foofoofoo` from `(\w+foo){3}` - // is wrong. - f(e, lits); - if n < min as usize { - lits.cut(); - } - if max.map_or(true, |max| min < max) { - lits.cut(); - } - } -} - -fn alternate_literals( - es: &[Hir], - lits: &mut Literals, - mut f: F, -) { - let mut lits2 = lits.to_empty(); - for e in es { - let mut lits3 = lits.to_empty(); - lits3.set_limit_size(lits.limit_size() / 5); - f(e, &mut lits3); - if lits3.is_empty() || !lits2.union(lits3) { - // If we couldn't find suffixes for *any* of the - // alternates, then the entire alternation has to be thrown - // away and any existing members must be frozen. Similarly, - // if the union couldn't complete, stop and freeze. - lits.cut(); - return; - } - } - // All we do at the moment is look for prefixes and suffixes. If both - // are empty, then we report nothing. We should be able to do better than - // this, but we'll need something more expressive than just a "set of - // literals." - let lcp = lits2.longest_common_prefix(); - let lcs = lits2.longest_common_suffix(); - if !lcp.is_empty() { - lits.cross_add(lcp); - } - lits.cut(); - if !lcs.is_empty() { - lits.add(Literal::empty()); - lits.add(Literal::new(lcs.to_vec())); - } -} - -/// Return the number of characters in the given class. -fn count_unicode_class(cls: &hir::ClassUnicode) -> u32 { - cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum() -} - -/// Return the number of bytes in the given class. -fn count_byte_class(cls: &hir::ClassBytes) -> u32 { - cls.iter().map(|r| 1 + (r.end() as u32 - r.start() as u32)).sum() -} - -/// Converts an arbitrary sequence of bytes to a literal suitable for building -/// a regular expression. -fn bytes_to_regex(bs: &[u8]) -> String { - let mut s = String::with_capacity(bs.len()); - for &b in bs { - s.push_str(&format!("\\x{:02x}", b)); - } - s -} - -/// Converts arbitrary bytes to a nice string. -fn show(bs: &[u8]) -> String { - // Why aren't we using this to feed to the regex? Doesn't really matter - // I guess. ---AG - use std::ascii::escape_default; - use std::str; - - let mut nice = String::new(); - for &b in bs { - let part: Vec = escape_default(b).collect(); - nice.push_str(str::from_utf8(&part).unwrap()); - } - nice -} diff --git a/grep/src/nonl.rs b/grep/src/nonl.rs deleted file mode 100644 index 3beb5f61..00000000 --- a/grep/src/nonl.rs +++ /dev/null @@ -1,74 +0,0 @@ -use syntax::hir::{self, Hir, HirKind}; - -use {Error, Result}; - -/// Returns a new expression that is guaranteed to never match the given -/// ASCII character. -/// -/// If the expression contains the literal byte, then an error is returned. -/// -/// If `byte` is not an ASCII character (i.e., greater than `0x7F`), then this -/// function panics. -pub fn remove(expr: Hir, byte: u8) -> Result { - assert!(byte <= 0x7F); - let chr = byte as char; - assert!(chr.len_utf8() == 1); - - Ok(match expr.into_kind() { - HirKind::Empty => Hir::empty(), - HirKind::Literal(hir::Literal::Unicode(c)) => { - if c == chr { - return Err(Error::LiteralNotAllowed(chr)); - } - Hir::literal(hir::Literal::Unicode(c)) - } - HirKind::Literal(hir::Literal::Byte(b)) => { - if b as char == chr { - return Err(Error::LiteralNotAllowed(chr)); - } - Hir::literal(hir::Literal::Byte(b)) - } - HirKind::Class(hir::Class::Unicode(mut cls)) => { - let remove = hir::ClassUnicode::new(Some( - hir::ClassUnicodeRange::new(chr, chr), - )); - cls.difference(&remove); - if cls.iter().next().is_none() { - return Err(Error::LiteralNotAllowed(chr)); - } - Hir::class(hir::Class::Unicode(cls)) - } - HirKind::Class(hir::Class::Bytes(mut cls)) => { - let remove = hir::ClassBytes::new(Some( - hir::ClassBytesRange::new(byte, byte), - )); - cls.difference(&remove); - if cls.iter().next().is_none() { - return Err(Error::LiteralNotAllowed(chr)); - } - Hir::class(hir::Class::Bytes(cls)) - } - HirKind::Anchor(x) => Hir::anchor(x), - HirKind::WordBoundary(x) => Hir::word_boundary(x), - HirKind::Repetition(mut x) => { - x.hir = Box::new(remove(*x.hir, byte)?); - Hir::repetition(x) - } - HirKind::Group(mut x) => { - x.hir = Box::new(remove(*x.hir, byte)?); - Hir::group(x) - } - HirKind::Concat(xs) => { - let xs = xs.into_iter() - .map(|e| remove(e, byte)) - .collect::>>()?; - Hir::concat(xs) - } - HirKind::Alternation(xs) => { - let xs = xs.into_iter() - .map(|e| remove(e, byte)) - .collect::>>()?; - Hir::alternation(xs) - } - }) -} diff --git a/grep/src/search.rs b/grep/src/search.rs deleted file mode 100644 index af7d680d..00000000 --- a/grep/src/search.rs +++ /dev/null @@ -1,317 +0,0 @@ -use memchr::{memchr, memrchr}; -use syntax::ParserBuilder; -use syntax::hir::Hir; -use regex::bytes::{Regex, RegexBuilder}; - -use literals::LiteralSets; -use nonl; -use smart_case::Cased; -use word_boundary::strip_unicode_word_boundaries; -use Result; - -/// A matched line. -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct Match { - start: usize, - end: usize, -} - -impl Match { - /// Create a new empty match value. - pub fn new() -> Match { - Match::default() - } - - /// Return the starting byte offset of the line that matched. - #[inline] - pub fn start(&self) -> usize { - self.start - } - - /// Return the ending byte offset of the line that matched. - #[inline] - pub fn end(&self) -> usize { - self.end - } -} - -/// A fast line oriented regex searcher. -#[derive(Clone, Debug)] -pub struct Grep { - re: Regex, - required: Option, - opts: Options, -} - -/// A builder for a grep searcher. -#[derive(Clone, Debug)] -pub struct GrepBuilder { - pattern: String, - opts: Options, -} - -#[derive(Clone, Debug)] -struct Options { - case_insensitive: bool, - case_smart: bool, - line_terminator: u8, - size_limit: usize, - dfa_size_limit: usize, -} - -impl Default for Options { - fn default() -> Options { - Options { - case_insensitive: false, - case_smart: false, - line_terminator: b'\n', - size_limit: 10 * (1 << 20), - dfa_size_limit: 10 * (1 << 20), - } - } -} - -impl GrepBuilder { - /// Create a new builder for line searching. - /// - /// The pattern given should be a regular expression. The precise syntax - /// supported is documented on the regex crate. - pub fn new(pattern: &str) -> GrepBuilder { - GrepBuilder { - pattern: pattern.to_string(), - opts: Options::default(), - } - } - - /// Set the line terminator. - /// - /// The line terminator can be any ASCII character and serves to delineate - /// the match boundaries in the text searched. - /// - /// This panics if `ascii_byte` is greater than `0x7F` (i.e., not ASCII). - pub fn line_terminator(mut self, ascii_byte: u8) -> GrepBuilder { - assert!(ascii_byte <= 0x7F); - self.opts.line_terminator = ascii_byte; - self - } - - /// Set the case sensitive flag (`i`) on the regex. - pub fn case_insensitive(mut self, yes: bool) -> GrepBuilder { - self.opts.case_insensitive = yes; - self - } - - /// Whether to enable smart case search or not (disabled by default). - /// - /// Smart case uses case insensitive search if the pattern contains only - /// lowercase characters (ignoring any characters which immediately follow - /// a '\'). Otherwise, a case sensitive search is used instead. - /// - /// Enabling the case_insensitive flag overrides this. - pub fn case_smart(mut self, yes: bool) -> GrepBuilder { - self.opts.case_smart = yes; - self - } - - /// Set the approximate size limit of the compiled regular expression. - /// - /// This roughly corresponds to the number of bytes occupied by a - /// single compiled program. If the program exceeds this number, then a - /// compilation error is returned. - pub fn size_limit(mut self, limit: usize) -> GrepBuilder { - self.opts.size_limit = limit; - self - } - - /// Set the approximate size of the cache used by the DFA. - /// - /// This roughly corresponds to the number of bytes that the DFA will use - /// while searching. - /// - /// Note that this is a per thread limit. There is no way to set a global - /// limit. In particular, if a regex is used from multiple threads - /// simulanteously, then each thread may use up to the number of bytes - /// specified here. - pub fn dfa_size_limit(mut self, limit: usize) -> GrepBuilder { - self.opts.dfa_size_limit = limit; - self - } - - /// Create a line searcher. - /// - /// If there was a problem parsing or compiling the regex with the given - /// options, then an error is returned. - pub fn build(self) -> Result { - let expr = self.parse()?; - let literals = LiteralSets::create(&expr); - let re = self.regex(&expr)?; - let required = match literals.to_regex_builder() { - Some(builder) => Some(self.regex_build(builder)?), - None => { - match strip_unicode_word_boundaries(&expr) { - None => None, - Some(expr) => { - debug!("Stripped Unicode word boundaries. \ - New AST:\n{:?}", expr); - self.regex(&expr).ok() - } - } - } - }; - Ok(Grep { - re: re, - required: required, - opts: self.opts, - }) - } - - /// Creates a new regex from the given expression with the current - /// configuration. - fn regex(&self, expr: &Hir) -> Result { - let mut builder = RegexBuilder::new(&expr.to_string()); - builder.unicode(true); - self.regex_build(builder) - } - - /// Builds a new regex from the given builder using the caller's settings. - fn regex_build(&self, mut builder: RegexBuilder) -> Result { - builder - .multi_line(true) - .size_limit(self.opts.size_limit) - .dfa_size_limit(self.opts.dfa_size_limit) - .build() - .map_err(From::from) - } - - /// Parses the underlying pattern and ensures the pattern can never match - /// the line terminator. - fn parse(&self) -> Result { - let expr = ParserBuilder::new() - .allow_invalid_utf8(true) - .case_insensitive(self.is_case_insensitive()?) - .multi_line(true) - .build() - .parse(&self.pattern)?; - debug!("original regex HIR pattern:\n{}", expr); - let expr = nonl::remove(expr, self.opts.line_terminator)?; - debug!("transformed regex HIR pattern:\n{}", expr); - Ok(expr) - } - - /// Determines whether the case insensitive flag should be enabled or not. - fn is_case_insensitive(&self) -> Result { - if self.opts.case_insensitive { - return Ok(true); - } - if !self.opts.case_smart { - return Ok(false); - } - let cased = match Cased::from_pattern(&self.pattern) { - None => return Ok(false), - Some(cased) => cased, - }; - Ok(cased.any_literal && !cased.any_uppercase) - } -} - -impl Grep { - /// Returns a reference to the underlying regex used by the searcher. - pub fn regex(&self) -> &Regex { - &self.re - } - - /// Returns an iterator over all matches in the given buffer. - pub fn iter<'b, 's>(&'s self, buf: &'b [u8]) -> Iter<'b, 's> { - Iter { - searcher: self, - buf: buf, - start: 0, - } - } - - /// Fills in the next line that matches in the given buffer starting at - /// the position given. - /// - /// If no match could be found, `false` is returned, otherwise, `true` is - /// returned. - pub fn read_match( - &self, - mat: &mut Match, - buf: &[u8], - mut start: usize, - ) -> bool { - if start >= buf.len() { - return false; - } - if let Some(ref req) = self.required { - while start < buf.len() { - let e = match req.shortest_match(&buf[start..]) { - None => return false, - Some(e) => start + e, - }; - let (prevnl, nextnl) = self.find_line(buf, e, e); - match self.re.shortest_match(&buf[prevnl..nextnl]) { - None => { - start = nextnl; - continue; - } - Some(_) => { - self.fill_match(mat, prevnl, nextnl); - return true; - } - } - } - false - } else { - let e = match self.re.shortest_match(&buf[start..]) { - None => return false, - Some(e) => start + e, - }; - let (s, e) = self.find_line(buf, e, e); - self.fill_match(mat, s, e); - true - } - } - - fn fill_match(&self, mat: &mut Match, start: usize, end: usize) { - mat.start = start; - mat.end = end; - } - - fn find_line(&self, buf: &[u8], s: usize, e: usize) -> (usize, usize) { - (self.find_line_start(buf, s), self.find_line_end(buf, e)) - } - - fn find_line_start(&self, buf: &[u8], pos: usize) -> usize { - memrchr(self.opts.line_terminator, &buf[0..pos]).map_or(0, |i| i + 1) - } - - fn find_line_end(&self, buf: &[u8], pos: usize) -> usize { - memchr(self.opts.line_terminator, &buf[pos..]) - .map_or(buf.len(), |i| pos + i + 1) - } -} - -/// An iterator over all matches in a particular buffer. -/// -/// `'b` refers to the lifetime of the buffer, and `'s` refers to the lifetime -/// of the searcher. -pub struct Iter<'b, 's> { - searcher: &'s Grep, - buf: &'b [u8], - start: usize, -} - -impl<'b, 's> Iterator for Iter<'b, 's> { - type Item = Match; - - fn next(&mut self) -> Option { - let mut mat = Match::default(); - if !self.searcher.read_match(&mut mat, self.buf, self.start) { - self.start = self.buf.len(); - return None; - } - self.start = mat.end; - Some(mat) - } -} diff --git a/grep/src/smart_case.rs b/grep/src/smart_case.rs deleted file mode 100644 index 1379b326..00000000 --- a/grep/src/smart_case.rs +++ /dev/null @@ -1,191 +0,0 @@ -use syntax::ast::{self, Ast}; -use syntax::ast::parse::Parser; - -/// The results of analyzing a regex for cased literals. -#[derive(Clone, Debug, Default)] -pub struct Cased { - /// True if and only if a literal uppercase character occurs in the regex. - /// - /// A regex like `\pL` contains no uppercase literals, even though `L` - /// is uppercase and the `\pL` class contains uppercase characters. - pub any_uppercase: bool, - /// True if and only if the regex contains any literal at all. A regex like - /// `\pL` has this set to false. - pub any_literal: bool, -} - -impl Cased { - /// Returns a `Cased` value by doing analysis on the AST of `pattern`. - /// - /// If `pattern` is not a valid regular expression, then `None` is - /// returned. - pub fn from_pattern(pattern: &str) -> Option { - Parser::new() - .parse(pattern) - .map(|ast| Cased::from_ast(&ast)) - .ok() - } - - fn from_ast(ast: &Ast) -> Cased { - let mut cased = Cased::default(); - cased.from_ast_impl(ast); - cased - } - - fn from_ast_impl(&mut self, ast: &Ast) { - if self.done() { - return; - } - match *ast { - Ast::Empty(_) - | Ast::Flags(_) - | Ast::Dot(_) - | Ast::Assertion(_) - | Ast::Class(ast::Class::Unicode(_)) - | Ast::Class(ast::Class::Perl(_)) => {} - Ast::Literal(ref x) => { - self.from_ast_literal(x); - } - Ast::Class(ast::Class::Bracketed(ref x)) => { - self.from_ast_class_set(&x.kind); - } - Ast::Repetition(ref x) => { - self.from_ast_impl(&x.ast); - } - Ast::Group(ref x) => { - self.from_ast_impl(&x.ast); - } - Ast::Alternation(ref alt) => { - for x in &alt.asts { - self.from_ast_impl(x); - } - } - Ast::Concat(ref alt) => { - for x in &alt.asts { - self.from_ast_impl(x); - } - } - } - } - - fn from_ast_class_set(&mut self, ast: &ast::ClassSet) { - if self.done() { - return; - } - match *ast { - ast::ClassSet::Item(ref item) => { - self.from_ast_class_set_item(item); - } - ast::ClassSet::BinaryOp(ref x) => { - self.from_ast_class_set(&x.lhs); - self.from_ast_class_set(&x.rhs); - } - } - } - - fn from_ast_class_set_item(&mut self, ast: &ast::ClassSetItem) { - if self.done() { - return; - } - match *ast { - ast::ClassSetItem::Empty(_) - | ast::ClassSetItem::Ascii(_) - | ast::ClassSetItem::Unicode(_) - | ast::ClassSetItem::Perl(_) => {} - ast::ClassSetItem::Literal(ref x) => { - self.from_ast_literal(x); - } - ast::ClassSetItem::Range(ref x) => { - self.from_ast_literal(&x.start); - self.from_ast_literal(&x.end); - } - ast::ClassSetItem::Bracketed(ref x) => { - self.from_ast_class_set(&x.kind); - } - ast::ClassSetItem::Union(ref union) => { - for x in &union.items { - self.from_ast_class_set_item(x); - } - } - } - } - - fn from_ast_literal(&mut self, ast: &ast::Literal) { - self.any_literal = true; - self.any_uppercase = self.any_uppercase || ast.c.is_uppercase(); - } - - /// Returns true if and only if the attributes can never change no matter - /// what other AST it might see. - fn done(&self) -> bool { - self.any_uppercase && self.any_literal - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn cased(pattern: &str) -> Cased { - Cased::from_pattern(pattern).unwrap() - } - - #[test] - fn various() { - let x = cased(""); - assert!(!x.any_uppercase); - assert!(!x.any_literal); - - let x = cased("foo"); - assert!(!x.any_uppercase); - assert!(x.any_literal); - - let x = cased("Foo"); - assert!(x.any_uppercase); - assert!(x.any_literal); - - let x = cased("foO"); - assert!(x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo\\"); - assert!(!x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo\w"); - assert!(!x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo\S"); - assert!(!x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo\p{Ll}"); - assert!(!x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo[a-z]"); - assert!(!x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo[A-Z]"); - assert!(x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo[\S\t]"); - assert!(!x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"foo\\S"); - assert!(x.any_uppercase); - assert!(x.any_literal); - - let x = cased(r"\p{Ll}"); - assert!(!x.any_uppercase); - assert!(!x.any_literal); - - let x = cased(r"aBc\w"); - assert!(x.any_uppercase); - assert!(x.any_literal); - } -} diff --git a/grep/src/word_boundary.rs b/grep/src/word_boundary.rs deleted file mode 100644 index 8e6b86d1..00000000 --- a/grep/src/word_boundary.rs +++ /dev/null @@ -1,53 +0,0 @@ -use syntax::hir::{self, Hir, HirKind}; - -/// Strips Unicode word boundaries from the given expression. -/// -/// The key invariant this maintains is that the expression returned will match -/// *at least* every where the expression given will match. Namely, a match of -/// the returned expression can report false positives but it will never report -/// false negatives. -/// -/// If no word boundaries could be stripped, then None is returned. -pub fn strip_unicode_word_boundaries(expr: &Hir) -> Option { - // The real reason we do this is because Unicode word boundaries are the - // one thing that Rust's regex DFA engine can't handle. When it sees a - // Unicode word boundary among non-ASCII text, it falls back to one of the - // slower engines. We work around this limitation by attempting to use - // a regex to find candidate matches without a Unicode word boundary. We'll - // only then use the full (and slower) regex to confirm a candidate as a - // match or not during search. - // - // It looks like we only check the outer edges for `\b`? I guess this is - // an attempt to optimize for the `-w/--word-regexp` flag? ---AG - match *expr.kind() { - HirKind::Concat(ref es) if !es.is_empty() => { - let first = is_unicode_word_boundary(&es[0]); - let last = is_unicode_word_boundary(es.last().unwrap()); - // Be careful not to strip word boundaries if there are no other - // expressions to match. - match (first, last) { - (true, false) if es.len() > 1 => { - Some(Hir::concat(es[1..].to_vec())) - } - (false, true) if es.len() > 1 => { - Some(Hir::concat(es[..es.len() - 1].to_vec())) - } - (true, true) if es.len() > 2 => { - Some(Hir::concat(es[1..es.len() - 1].to_vec())) - } - _ => None, - } - } - _ => None, - } -} - -/// Returns true if the given expression is a Unicode word boundary. -fn is_unicode_word_boundary(expr: &Hir) -> bool { - match *expr.kind() { - HirKind::WordBoundary(hir::WordBoundary::Unicode) => true, - HirKind::WordBoundary(hir::WordBoundary::UnicodeNegate) => true, - HirKind::Group(ref x) => is_unicode_word_boundary(&x.hir), - _ => false, - } -} diff --git a/grep2/COPYING b/grep2/COPYING deleted file mode 100644 index bb9c20a0..00000000 --- a/grep2/COPYING +++ /dev/null @@ -1,3 +0,0 @@ -This project is dual-licensed under the Unlicense and MIT licenses. - -You may use this code under the terms of either license. diff --git a/grep2/Cargo.toml b/grep2/Cargo.toml deleted file mode 100644 index caaf7a9c..00000000 --- a/grep2/Cargo.toml +++ /dev/null @@ -1,23 +0,0 @@ -[package] -name = "grep2" -version = "0.2.0" #:version -authors = ["Andrew Gallant "] -description = """ -Fast line oriented regex searching as a library. -""" -documentation = "http://burntsushi.net/rustdoc/grep/" -homepage = "https://github.com/BurntSushi/ripgrep" -repository = "https://github.com/BurntSushi/ripgrep" -readme = "README.md" -keywords = ["regex", "grep", "egrep", "search", "pattern"] -license = "Unlicense/MIT" - -[dependencies] -grep-matcher = { version = "0.0.1", path = "../grep-matcher" } -grep-printer = { version = "0.0.1", path = "../grep-printer" } -grep-regex = { version = "0.0.1", path = "../grep-regex" } -grep-searcher = { version = "0.0.1", path = "../grep-searcher" } - -[features] -avx-accel = ["grep-searcher/avx-accel"] -simd-accel = ["grep-searcher/simd-accel"] diff --git a/grep2/LICENSE-MIT b/grep2/LICENSE-MIT deleted file mode 100644 index 3b0a5dc0..00000000 --- a/grep2/LICENSE-MIT +++ /dev/null @@ -1,21 +0,0 @@ -The MIT License (MIT) - -Copyright (c) 2015 Andrew Gallant - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/grep2/README.md b/grep2/README.md deleted file mode 100644 index 86cc8c2c..00000000 --- a/grep2/README.md +++ /dev/null @@ -1,4 +0,0 @@ -grep ----- -This is a *library* that provides grep-style line-by-line regex searching (with -comparable performance to `grep` itself). diff --git a/grep2/UNLICENSE b/grep2/UNLICENSE deleted file mode 100644 index 68a49daa..00000000 --- a/grep2/UNLICENSE +++ /dev/null @@ -1,24 +0,0 @@ -This is free and unencumbered software released into the public domain. - -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. - -For more information, please refer to diff --git a/grep2/src/lib.rs b/grep2/src/lib.rs deleted file mode 100644 index b6e02684..00000000 --- a/grep2/src/lib.rs +++ /dev/null @@ -1,10 +0,0 @@ -/*! -TODO. -*/ - -#![deny(missing_docs)] - -pub extern crate grep_matcher as matcher; -pub extern crate grep_printer as printer; -pub extern crate grep_regex as regex; -pub extern crate grep_searcher as searcher; diff --git a/ignore/Cargo.toml b/ignore/Cargo.toml index 13217135..42b043bf 100644 --- a/ignore/Cargo.toml +++ b/ignore/Cargo.toml @@ -26,7 +26,7 @@ memchr = "2" regex = "1" same-file = "1" thread_local = "0.3.2" -walkdir = "2" +walkdir = "2.2.0" [target.'cfg(windows)'.dependencies.winapi] version = "0.3" diff --git a/ignore/README.md b/ignore/README.md index f527da46..b0e659a9 100644 --- a/ignore/README.md +++ b/ignore/README.md @@ -4,7 +4,7 @@ The ignore crate provides a fast recursive directory iterator that respects various filters such as globs, file types and `.gitignore` files. This crate also provides lower level direct access to gitignore and file type matchers. -[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.png)](https://travis-ci.org/BurntSushi/ripgrep) +[![Linux build status](https://api.travis-ci.org/BurntSushi/ripgrep.svg)](https://travis-ci.org/BurntSushi/ripgrep) [![Windows build status](https://ci.appveyor.com/api/projects/status/github/BurntSushi/ripgrep?svg=true)](https://ci.appveyor.com/project/BurntSushi/ripgrep) [![](https://img.shields.io/crates/v/ignore.svg)](https://crates.io/crates/ignore) diff --git a/ignore/examples/walk.rs b/ignore/examples/walk.rs index 0ff4ea94..ad64e015 100644 --- a/ignore/examples/walk.rs +++ b/ignore/examples/walk.rs @@ -1,5 +1,3 @@ -#![allow(dead_code, unused_imports, unused_mut, unused_variables)] - extern crate crossbeam; extern crate ignore; extern crate walkdir; @@ -8,7 +6,6 @@ use std::env; use std::io::{self, Write}; use std::path::Path; use std::sync::Arc; -use std::sync::atomic::{AtomicUsize, Ordering}; use std::thread; use crossbeam::sync::MsQueue; @@ -48,13 +45,11 @@ fn main() { }) }); } else if simple { - let mut stdout = io::BufWriter::new(io::stdout()); let walker = WalkDir::new(path); for result in walker { queue.push(Some(DirEntry::X(result.unwrap()))); } } else { - let mut stdout = io::BufWriter::new(io::stdout()); let walker = WalkBuilder::new(path).build(); for result in walker { queue.push(Some(DirEntry::Y(result.unwrap()))); diff --git a/src/app.rs b/src/app.rs index 24851c3b..a0b036d5 100644 --- a/src/app.rs +++ b/src/app.rs @@ -2,8 +2,8 @@ // including some light validation. // // This module is purposely written in a bare-bones way, since it is included -// in ripgrep's build.rs file as a way to generate completion files for common -// shells. +// in ripgrep's build.rs file as a way to generate a man page and completion +// files for common shells. // // The only other place that ripgrep deals with clap is in src/args.rs, which // is where we read clap's configuration from the end user's arguments and turn @@ -82,7 +82,34 @@ pub fn app() -> App<'static, 'static> { /// the RIPGREP_BUILD_GIT_HASH env var is inspect for it. If that isn't set, /// then a revision hash is not included in the version string returned. pub fn long_version(revision_hash: Option<&str>) -> String { - // Let's say whether faster CPU instructions are enabled or not. + // Do we have a git hash? + // (Yes, if ripgrep was built on a machine with `git` installed.) + let hash = match revision_hash.or(option_env!("RIPGREP_BUILD_GIT_HASH")) { + None => String::new(), + Some(githash) => format!(" (rev {})", githash), + }; + // Put everything together. + let runtime = runtime_cpu_features(); + if runtime.is_empty() { + format!( + "{}{}\n{} (compiled)", + crate_version!(), + hash, + compile_cpu_features().join(" ") + ) + } else { + format!( + "{}{}\n{} (compiled)\n{} (runtime)", + crate_version!(), + hash, + compile_cpu_features().join(" "), + runtime.join(" ") + ) + } +} + +/// Returns the relevant CPU features enabled at compile time. +fn compile_cpu_features() -> Vec<&'static str> { let mut features = vec![]; if cfg!(feature = "simd-accel") { features.push("+SIMD"); @@ -94,14 +121,33 @@ pub fn long_version(revision_hash: Option<&str>) -> String { } else { features.push("-AVX"); } - // Do we have a git hash? - // (Yes, if ripgrep was built on a machine with `git` installed.) - let hash = match revision_hash.or(option_env!("RIPGREP_BUILD_GIT_HASH")) { - None => String::new(), - Some(githash) => format!(" (rev {})", githash), - }; - // Put everything together. - format!("{}{}\n{}", crate_version!(), hash, features.join(" ")) + features +} + +/// Returns the relevant CPU features enabled at runtime. +#[cfg(all(ripgrep_runtime_cpu, target_arch = "x86_64"))] +fn runtime_cpu_features() -> Vec<&'static str> { + // This is kind of a dirty violation of abstraction, since it assumes + // knowledge about what specific SIMD features are being used. + + let mut features = vec![]; + if is_x86_feature_detected!("ssse3") { + features.push("+SIMD"); + } else { + features.push("-SIMD"); + } + if is_x86_feature_detected!("avx2") { + features.push("+AVX"); + } else { + features.push("-AVX"); + } + features +} + +/// Returns the relevant CPU features enabled at runtime. +#[cfg(not(all(ripgrep_runtime_cpu, target_arch = "x86_64")))] +fn runtime_cpu_features() -> Vec<&'static str> { + vec![] } /// Arg is a light alias for a clap::Arg that is specialized to compile time @@ -478,7 +524,7 @@ impl RGArg { } } -// We add an extra space to long descriptions so that a black line is inserted +// We add an extra space to long descriptions so that a blank line is inserted // between flag descriptions in --help output. macro_rules! long { ($lit:expr) => { concat!($lit, " ") } @@ -502,6 +548,7 @@ pub fn all_args_and_flags() -> Vec { flag_context_separator(&mut args); flag_count(&mut args); flag_count_matches(&mut args); + flag_crlf(&mut args); flag_debug(&mut args); flag_dfa_size_limit(&mut args); flag_encoding(&mut args); @@ -518,6 +565,7 @@ pub fn all_args_and_flags() -> Vec { flag_ignore_case(&mut args); flag_ignore_file(&mut args); flag_invert_match(&mut args); + flag_json(&mut args); flag_line_number(&mut args); flag_line_regexp(&mut args); flag_max_columns(&mut args); @@ -525,6 +573,8 @@ pub fn all_args_and_flags() -> Vec { flag_max_depth(&mut args); flag_max_filesize(&mut args); flag_mmap(&mut args); + flag_multiline(&mut args); + flag_multiline_dotall(&mut args); flag_no_config(&mut args); flag_no_ignore(&mut args); flag_no_ignore_global(&mut args); @@ -533,9 +583,12 @@ pub fn all_args_and_flags() -> Vec { flag_no_ignore_vcs(&mut args); flag_no_messages(&mut args); flag_null(&mut args); + flag_null_data(&mut args); flag_only_matching(&mut args); flag_path_separator(&mut args); flag_passthru(&mut args); + flag_pcre2(&mut args); + flag_pcre2_unicode(&mut args); flag_pre(&mut args); flag_pretty(&mut args); flag_quiet(&mut args); @@ -548,6 +601,7 @@ pub fn all_args_and_flags() -> Vec { flag_stats(&mut args); flag_text(&mut args); flag_threads(&mut args); + flag_trim(&mut args); flag_type(&mut args); flag_type_add(&mut args); flag_type_clear(&mut args); @@ -809,14 +863,53 @@ This overrides the --count flag. Note that when --count is combined with args.push(arg); } +fn flag_crlf(args: &mut Vec) { + const SHORT: &str = "Support CRLF line terminators (useful on Windows)."; + const LONG: &str = long!("\ +When enabled, ripgrep will treat CRLF ('\\r\\n') as a line terminator instead +of just '\\n'. + +Principally, this permits '$' in regex patterns to match just before CRLF +instead of just before LF. The underlying regex engine may not support this +natively, so ripgrep will translate all instances of '$' to '(?:\\r??$)'. This +may produce slightly different than desired match offsets. It is intended as a +work-around until the regex engine supports this natively. + +CRLF support can be disabled with --no-crlf. +"); + let arg = RGArg::switch("crlf") + .help(SHORT).long_help(LONG) + .overrides("no-crlf") + .overrides("null-data"); + args.push(arg); + + let arg = RGArg::switch("no-crlf") + .hidden() + .overrides("crlf"); + args.push(arg); +} + fn flag_debug(args: &mut Vec) { const SHORT: &str = "Show debug messages."; const LONG: &str = long!("\ Show debug messages. Please use this when filing a bug report. + +The --debug flag is generally useful for figuring out why ripgrep skipped +searching a particular file. The debug messages should mention all files +skipped and why they were skipped. + +To get even more debug output, use the --trace flag, which implies --debug +along with additional trace data. With --trace, the output could be quite +large and is generally more useful for development. "); let arg = RGArg::switch("debug") .help(SHORT).long_help(LONG); args.push(arg); + + let arg = RGArg::switch("trace") + .hidden() + .overrides("debug"); + args.push(arg); } fn flag_dfa_size_limit(args: &mut Vec) { @@ -842,10 +935,17 @@ default value is 'auto', which will cause ripgrep to do a best effort automatic detection of encoding on a per-file basis. Other supported values can be found in the list of labels here: https://encoding.spec.whatwg.org/#concept-encoding-get + +This flag can be disabled with --no-encoding. "); let arg = RGArg::flag("encoding", "ENCODING").short("E") .help(SHORT).long_help(LONG); args.push(arg); + + let arg = RGArg::switch("no-encoding") + .hidden() + .overrides("encoding"); + args.push(arg); } fn flag_file(args: &mut Vec) { @@ -1071,6 +1171,66 @@ Invert matching. Show lines that do not match the given patterns. args.push(arg); } +fn flag_json(args: &mut Vec) { + const SHORT: &str = "Show search results in a JSON Lines format."; + const LONG: &str = long!("\ +Enable printing results in a JSON Lines format. + +When this flag is provided, ripgrep will emit a sequence of messages, each +encoded as a JSON object, where there are five different message types: + +**begin** - A message that indicates a file is being searched and contains at +least one match. + +**end** - A message the indicates a file is done being searched. This message +also include summary statistics about the search for a particular file. + +**match** - A message that indicates a match was found. This includes the text +and offsets of the match. + +**context** - A message that indicates a contextual line was found. This +includes the text of the line, along with any match information if the search +was inverted. + +**summary** - The final message emitted by ripgrep that contains summary +statistics about the search across all files. + +Since file paths or the contents of files are not guaranteed to be valid UTF-8 +and JSON itself must be representable by a Unicode encoding, ripgrep will emit +all data elements as objects with one of two keys: 'text' or 'bytes'. 'text' is +a normal JSON string when the data is valid UTF-8 while 'bytes' is the base64 +encoded contents of the data. + +The JSON Lines format is only supported for showing search results. It cannot +be used with other flags that emit other types of output, such as --files, +--files-with-matches, --files-without-match, --count or --count-matches. +ripgrep will report an error if any of the aforementioned flags are used in +concert with --json. + +Other flags that control aspects of the standard output such as +--only-matching, --heading, --replace, --max-columns, etc., have no effect +when --json is set. + +A more complete description of the JSON format used can be found here: +https://docs.rs/grep-printer/*/grep_printer/struct.JSON.html + +The JSON Lines format can be disabled with --no-json. +"); + let arg = RGArg::switch("json") + .help(SHORT).long_help(LONG) + .overrides("no-json") + .conflicts(&[ + "count", "count-matches", + "files", "files-with-matches", "files-without-match", + ]); + args.push(arg); + + let arg = RGArg::switch("no-json") + .hidden() + .overrides("json"); + args.push(arg); +} + fn flag_line_number(args: &mut Vec) { const SHORT: &str = "Show line numbers."; const LONG: &str = long!("\ @@ -1198,6 +1358,79 @@ This flag overrides --mmap. args.push(arg); } +fn flag_multiline(args: &mut Vec) { + const SHORT: &str = "Enable matching across multiple lines."; + const LONG: &str = long!("\ +Enable matching across multiple lines. + +When multiline mode is enabled, ripgrep will lift the restriction that a match +cannot include a line terminator. For example, when multiline mode is not +enabled (the default), then the regex '\\p{any}' will match any Unicode +codepoint other than '\\n'. Similarly, the regex '\\n' is explicitly forbidden, +and if you try to use it, ripgrep will return an error. However, when multiline +mode is enabled, '\\p{any}' will match any Unicode codepoint, including '\\n', +and regexes like '\\n' are permitted. + +An important caveat is that multiline mode does not change the match semantics +of '.'. Namely, in most regex matchers, a '.' will by default match any +character other than '\\n', and this is true in ripgrep as well. In order to +make '.' match '\\n', you must enable the \"dot all\" flag inside the regex. +For example, both '(?s).' and '(?s:.)' have the same semantics, where '.' will +match any character, including '\\n'. Alternatively, the '--multiline-dotall' +flag may be passed to make the \"dot all\" behavior the default. This flag only +applies when multiline search is enabled. + +There is no limit on the number of the lines that a single match can span. + +**WARNING**: Because of how the underlying regex engine works, multiline +searches may be slower than normal line-oriented searches, and they may also +use more memory. In particular, when multiline mode is enabled, ripgrep +requires that each file it searches is laid out contiguously in memory +(either by reading it onto the heap or by memory-mapping it). Things that +cannot be memory-mapped (such as stdin) will be consumed until EOF before +searching can begin. In general, ripgrep will only do these things when +necessary. Specifically, if the --multiline flag is provided but the regex +does not contain patterns that would match '\\n' characters, then ripgrep +will automatically avoid reading each file into memory before searching it. +Nevertheless, if you only care about matches spanning at most one line, then it +is always better to disable multiline mode. + +This flag can be disabled with --no-multiline. +"); + let arg = RGArg::switch("multiline").short("U") + .help(SHORT).long_help(LONG) + .overrides("no-multiline"); + args.push(arg); + + let arg = RGArg::switch("no-multiline") + .hidden() + .overrides("multiline"); + args.push(arg); +} + +fn flag_multiline_dotall(args: &mut Vec) { + const SHORT: &str = "Make '.' match new lines when multiline is enabled."; + const LONG: &str = long!("\ +This flag enables \"dot all\" in your regex pattern, which causes '.' to match +newlines when multiline searching is enabled. This flag has no effect if +multiline searching isn't enabled with the --multiline flag. + +Normally, a '.' will match any character except newlines. While this behavior +typically isn't relevant for line-oriented matching (since matches can span at +most one line), this can be useful when searching with the -U/--multiline flag. +By default, the multiline mode runs without this flag. + +This flag is generally intended to be used in an alias or your ripgrep config +file if you prefer \"dot all\" semantics by default. Note that regardless of +whether this flag is used, \"dot all\" semantics can still be controlled via +inline flags in the regex pattern itself, e.g., '(?s:.)' always enables \"dot +all\" where as '(?-s:.)' always disables \"dot all\". +"); + let arg = RGArg::switch("multiline-dotall") + .help(SHORT).long_help(LONG); + args.push(arg); +} + fn flag_no_config(args: &mut Vec) { const SHORT: &str = "Never read configuration files."; const LONG: &str = long!("\ @@ -1340,6 +1573,29 @@ for use with xargs. args.push(arg); } +fn flag_null_data(args: &mut Vec) { + const SHORT: &str = "Use NUL as a line terminator instead of \\n."; + const LONG: &str = long!("\ +Enabling this option causes ripgrep to use NUL as a line terminator instead of +the default of '\\n'. + +This is useful when searching large binary files that would otherwise have very +long lines if '\\n' were used as the line terminator. In particular, ripgrep +requires that, at a minimum, each line must fit into memory. Use NUL instead +can be a useful stopgap to keep memory requirements low and avoid OOM (out of +memory) conditions. + +This is also useful for processing NUL delimited data, such that that emitted +when using ripgrep's -0/--null flag or find's --print0 flag. + +Using this flag implies -a/--text. +"); + let arg = RGArg::switch("null-data") + .help(SHORT).long_help(LONG) + .overrides("crlf"); + args.push(arg); +} + fn flag_only_matching(args: &mut Vec) { const SHORT: &str = "Print only matches parts of a line."; const LONG: &str = long!("\ @@ -1374,13 +1630,76 @@ the empty string. For example, if you are searching using 'rg foo' then using 'rg \"^|foo\"' instead will emit every line in every file searched, but only occurrences of 'foo' will be highlighted. This flag enables the same behavior without needing to modify the pattern. - -This flag conflicts with the --only-matching and --replace flags. "); let arg = RGArg::switch("passthru") .help(SHORT).long_help(LONG) - .alias("passthrough") - .conflicts(&["only-matching", "replace"]); + .alias("passthrough"); + args.push(arg); +} + +fn flag_pcre2(args: &mut Vec) { + const SHORT: &str = "Enable PCRE2 matching."; + const LONG: &str = long!("\ +When this flag is present, ripgrep will use the PCRE2 regex engine instead of +its default regex engine. + +This is generally useful when you want to use features such as look-around +or backreferences. + +Note that PCRE2 is an optional ripgrep feature. If PCRE2 wasn't included in +your build of ripgrep, then using this flag will result in ripgrep printing +an error message and exiting. + +This flag can be disabled with --no-pcre2. +"); + let arg = RGArg::switch("pcre2").short("P") + .help(SHORT).long_help(LONG) + .overrides("no-pcre2"); + args.push(arg); + + let arg = RGArg::switch("no-pcre2") + .hidden() + .overrides("pcre2"); + args.push(arg); +} + +fn flag_pcre2_unicode(args: &mut Vec) { + const SHORT: &str = "Enable Unicode mode for PCRE2 matching."; + const LONG: &str = long!("\ +When PCRE2 matching is enabled, this flag will enable Unicode mode. If PCRE2 +matching is not enabled, then this flag has no effect. + +This flag is enabled by default when PCRE2 matching is enabled. + +When PCRE2's Unicode mode is enabled several different types of patterns become +Unicode aware. This includes '\\b', '\\B', '\\w', '\\W', '\\d', '\\D', '\\s' +and '\\S'. Similarly, the '.' meta character will match any Unicode codepoint +instead of any byte. Caseless matching will also use Unicode simple case +folding instead of ASCII-only case insensitivity. + +Unicode mode in PCRE2 represents a critical trade off in the user experience +of ripgrep. In particular, unlike the default regex engine, PCRE2 does not +support the ability to search possibly invalid UTF-8 with Unicode features +enabled. Instead, PCRE2 *requires* that everything it searches when Unicode +mode is enabled is valid UTF-8. (Or valid UTF-16/UTF-32, but for the purposes +of ripgrep, we only discuss UTF-8.) This means that if you have PCRE2's Unicode +mode enabled and you attempt to search invalid UTF-8, then the search for that +file will hault and print an error. For this reason, when PCRE2's Unicode mode +is enabled, ripgrep will automatically \"fix\" invalid UTF-8 sequences by +replacing them with the Unicode replacement codepoint. + +If you would rather see the encoding errors surfaced by PCRE2 when Unicode mode +is enabled, then pass the --no-encoding flag to disable all transcoding. + +This flag can be disabled with --no-pcre2-unicode. +"); + let arg = RGArg::switch("pcre2-unicode") + .help(SHORT).long_help(LONG); + args.push(arg); + + let arg = RGArg::switch("no-pcre2-unicode") + .hidden() + .overrides("pcre2-unicode"); args.push(arg); } @@ -1592,11 +1911,18 @@ searched, and the time taken for the entire search to complete. This set of aggregate statistics may expand over time. Note that this flag has no effect if --files, --files-with-matches or ---files-without-match is passed."); +--files-without-match is passed. +This flag can be disabled with --no-stats. +"); let arg = RGArg::switch("stats") - .help(SHORT).long_help(LONG); + .help(SHORT).long_help(LONG) + .overrides("no-stats"); + args.push(arg); + let arg = RGArg::switch("no-stats") + .hidden() + .overrides("stats"); args.push(arg); } @@ -1639,6 +1965,25 @@ causes ripgrep to choose the thread count using heuristics. args.push(arg); } +fn flag_trim(args: &mut Vec) { + const SHORT: &str = "Trim prefixed whitespace from matches."; + const LONG: &str = long!("\ +When set, all ASCII whitespace at the beginning of each line printed will be +trimmed. + +This flag can be disabled with --no-trim. +"); + let arg = RGArg::switch("trim") + .help(SHORT).long_help(LONG) + .overrides("no-trim"); + args.push(arg); + + let arg = RGArg::switch("no-trim") + .hidden() + .overrides("trim"); + args.push(arg); +} + fn flag_type(args: &mut Vec) { const SHORT: &str = "Only search files matching TYPE."; const LONG: &str = long!("\ diff --git a/src/args.rs b/src/args.rs index 10b9e557..20e67b67 100644 --- a/src/args.rs +++ b/src/args.rs @@ -1,89 +1,117 @@ use std::cmp; use std::env; use std::ffi::OsStr; -use std::fs; +use std::fs::File; use std::io::{self, BufRead}; use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::sync::atomic::{AtomicBool, Ordering}; -use clap; -use encoding_rs::Encoding; -use grep::{Grep, GrepBuilder}; -use log; -use num_cpus; -use regex; -use same_file; -use termcolor; - -use app; use atty; +use clap; +use grep::matcher::LineTerminator; +#[cfg(feature = "pcre2")] +use grep::pcre2::{ + RegexMatcher as PCRE2RegexMatcher, + RegexMatcherBuilder as PCRE2RegexMatcherBuilder, +}; +use grep::printer::{ + ColorSpecs, Stats, + JSON, JSONBuilder, + Standard, StandardBuilder, + Summary, SummaryBuilder, SummaryKind, +}; +use grep::regex::{ + RegexMatcher as RustRegexMatcher, + RegexMatcherBuilder as RustRegexMatcherBuilder, +}; +use grep::searcher::{ + BinaryDetection, Encoding, MmapChoice, Searcher, SearcherBuilder, +}; use ignore::overrides::{Override, OverrideBuilder}; use ignore::types::{FileTypeDef, Types, TypesBuilder}; -use ignore; -use printer::{ColorSpecs, Printer}; -use unescape::{escape, unescape}; -use worker::{Worker, WorkerBuilder}; +use ignore::{Walk, WalkBuilder, WalkParallel}; +use log; +use num_cpus; +use path_printer::{PathPrinter, PathPrinterBuilder}; +use regex::{self, Regex}; +use same_file::Handle; +use termcolor::{ + WriteColor, + BufferedStandardStream, BufferWriter, ColorChoice, StandardStream, +}; +use app; use config; use logger::Logger; +use messages::{set_messages, set_ignore_messages}; +use search::{PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder}; +use subject::SubjectBuilder; +use unescape::{escape, unescape}; use Result; -/// `Args` are transformed/normalized from `ArgMatches`. -#[derive(Debug)] -pub struct Args { +/// The command that ripgrep should execute based on the command line +/// configuration. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Command { + /// Search using exactly one thread. + Search, + /// Search using possibly many threads. + SearchParallel, + /// The command line parameters suggest that a search should occur, but + /// ripgrep knows that a match can never be found (e.g., no given patterns + /// or --max-count=0). + SearchNever, + /// Show the files that would be searched, but don't actually search them, + /// and use exactly one thread. + Files, + /// Show the files that would be searched, but don't actually search them, + /// and perform directory traversal using possibly many threads. + FilesParallel, + /// List all file type definitions configured, including the default file + /// types and any additional file types added to the command line. + Types, +} + +impl Command { + /// Returns true if and only if this command requires executing a search. + fn is_search(&self) -> bool { + use self::Command::*; + + match *self { + Search | SearchParallel => true, + SearchNever | Files | FilesParallel | Types => false, + } + } +} + +/// The primary configuration object used throughout ripgrep. It provides a +/// high-level convenient interface to the provided command line arguments. +/// +/// An `Args` object is cheap to clone and can be used from multiple threads +/// simultaneously. +#[derive(Clone, Debug)] +pub struct Args(Arc); + +#[derive(Clone, Debug)] +struct ArgsImp { + /// Mid-to-low level routines for extracting CLI arguments. + matches: ArgMatches, + /// The patterns provided at the command line and/or via the -f/--file + /// flag. This may be empty. + patterns: Vec, + /// A matcher built from the patterns. + /// + /// It's important that this is only built once, since building this goes + /// through regex compilation and various types of analyses. That is, if + /// you need many of theses (one per thread, for example), it is better to + /// build it once and then clone it. + matcher: PatternMatcher, + /// The paths provided at the command line. This is guaranteed to be + /// non-empty. (If no paths are provided, then a default path is created.) paths: Vec, - after_context: usize, - before_context: usize, - byte_offset: bool, - can_match: bool, - color_choice: termcolor::ColorChoice, - colors: ColorSpecs, - column: bool, - context_separator: Vec, - count: bool, - count_matches: bool, - encoding: Option<&'static Encoding>, - files_with_matches: bool, - files_without_matches: bool, - eol: u8, - files: bool, - follow: bool, - glob_overrides: Override, - grep: Grep, - heading: bool, - hidden: bool, - ignore_files: Vec, - invert_match: bool, - line_number: bool, - line_per_match: bool, - max_columns: Option, - max_count: Option, - max_depth: Option, - max_filesize: Option, - mmap: bool, - no_ignore: bool, - no_ignore_global: bool, - no_ignore_messages: bool, - no_ignore_parent: bool, - no_ignore_vcs: bool, - no_messages: bool, - null: bool, - only_matching: bool, - path_separator: Option, - quiet: bool, - quiet_matched: QuietMatched, - replace: Option>, - sort_files: bool, - stdout_handle: Option, - text: bool, - threads: usize, - type_list: bool, - types: Types, - with_filename: bool, - search_zip_files: bool, - preprocessor: Option, - stats: bool + /// Returns true if and only if `paths` had to be populated with a single + /// default path. + using_default_path: bool, } impl Args { @@ -100,46 +128,262 @@ impl Args { // trying to parse config files. If a config file exists and has // arguments, then we re-parse argv, otherwise we just use the matches // we have here. - let early_matches = ArgMatches(app::app().get_matches()); + let early_matches = ArgMatches::new(app::app().get_matches()); + set_messages(!early_matches.is_present("no-messages")); + set_ignore_messages(!early_matches.is_present("no-ignore-messages")); if let Err(err) = Logger::init() { - errored!("failed to initialize logger: {}", err); + return Err(format!("failed to initialize logger: {}", err).into()); } - if early_matches.is_present("debug") { + if early_matches.is_present("trace") { + log::set_max_level(log::LevelFilter::Trace); + } else if early_matches.is_present("debug") { log::set_max_level(log::LevelFilter::Debug); } else { log::set_max_level(log::LevelFilter::Warn); } - let matches = Args::matches(early_matches); + let matches = early_matches.reconfigure(); // The logging level may have changed if we brought in additional // arguments from a configuration file, so recheck it and set the log // level as appropriate. - if matches.is_present("debug") { + if matches.is_present("trace") { + log::set_max_level(log::LevelFilter::Trace); + } else if matches.is_present("debug") { log::set_max_level(log::LevelFilter::Debug); } else { log::set_max_level(log::LevelFilter::Warn); } + set_messages(!matches.is_present("no-messages")); + set_ignore_messages(!matches.is_present("no-ignore-messages")); matches.to_args() } - /// Run clap and return the matches. If clap determines a problem with the - /// user provided arguments (or if --help or --version are given), then an - /// error/usage/version will be printed and the process will exit. + /// Return direct access to command line arguments. + fn matches(&self) -> &ArgMatches { + &self.0.matches + } + + /// Return the patterns found in the command line arguments. This includes + /// patterns read via the -f/--file flags. + fn patterns(&self) -> &[String] { + &self.0.patterns + } + + /// Return the matcher builder from the patterns. + fn matcher(&self) -> &PatternMatcher { + &self.0.matcher + } + + /// Return the paths found in the command line arguments. This is + /// guaranteed to be non-empty. In the case where no explicit arguments are + /// provided, a single default path is provided automatically. + fn paths(&self) -> &[PathBuf] { + &self.0.paths + } + + /// Returns true if and only if `paths` had to be populated with a default + /// path, which occurs only when no paths were given as command line + /// arguments. + fn using_default_path(&self) -> bool { + self.0.using_default_path + } + + /// Return the printer that should be used for formatting the output of + /// search results. + /// + /// The returned printer will write results to the given writer. + fn printer(&self, wtr: W) -> Result> { + match self.matches().output_kind() { + OutputKind::Standard => { + let separator_search = self.command()? == Command::Search; + self.matches() + .printer_standard(self.paths(), wtr, separator_search) + .map(Printer::Standard) + } + OutputKind::Summary => { + self.matches() + .printer_summary(self.paths(), wtr) + .map(Printer::Summary) + } + OutputKind::JSON => { + self.matches() + .printer_json(wtr) + .map(Printer::JSON) + } + } + } +} + +/// High level public routines for building data structures used by ripgrep +/// from command line arguments. +impl Args { + /// Create a new buffer writer for multi-threaded printing with color + /// support. + pub fn buffer_writer(&self) -> Result { + let mut wtr = BufferWriter::stdout(self.matches().color_choice()); + wtr.separator(self.matches().file_separator()?); + Ok(wtr) + } + + /// Return the high-level command that ripgrep should run. + pub fn command(&self) -> Result { + let is_one_search = self.matches().is_one_search(self.paths()); + let threads = self.matches().threads()?; + let one_thread = is_one_search || threads == 1; + + Ok(if self.matches().is_present("type-list") { + Command::Types + } else if self.matches().is_present("files") { + if one_thread { + Command::Files + } else { + Command::FilesParallel + } + } else if self.matches().can_never_match(self.patterns()) { + Command::SearchNever + } else if one_thread { + Command::Search + } else { + Command::SearchParallel + }) + } + + /// Builder a path printer that can be used for printing just file paths, + /// with optional color support. + /// + /// The printer will print paths to the given writer. + pub fn path_printer( + &self, + wtr: W, + ) -> Result> { + let mut builder = PathPrinterBuilder::new(); + builder + .color_specs(self.matches().color_specs()?) + .separator(self.matches().path_separator()?) + .terminator(self.matches().path_terminator().unwrap_or(b'\n')); + Ok(builder.build(wtr)) + } + + /// Returns true if and only if the search should quit after finding the + /// first match. + pub fn quit_after_match(&self) -> Result { + Ok(self.matches().is_present("quiet") && self.stats()?.is_none()) + } + + /// Build a worker for executing searches. + /// + /// Search results are written to the given writer. + pub fn search_worker( + &self, + wtr: W, + ) -> Result> { + let matcher = self.matcher().clone(); + let printer = self.printer(wtr)?; + let searcher = self.matches().searcher(self.paths())?; + let mut builder = SearchWorkerBuilder::new(); + builder + .json_stats(self.matches().is_present("json")) + .preprocessor(self.matches().preprocessor()) + .search_zip(self.matches().is_present("search-zip")); + Ok(builder.build(matcher, searcher, printer)) + } + + /// Returns a zero value for tracking statistics if and only if it has been + /// requested. + /// + /// When this returns a `Stats` value, then it is guaranteed that the + /// search worker will be configured to track statistics as well. + pub fn stats(&self) -> Result> { + Ok(if self.command()?.is_search() && self.matches().stats() { + Some(Stats::new()) + } else { + None + }) + } + + /// Return a builder for constructing subjects. A subject represents a + /// single unit of something to search. Typically, this corresponds to a + /// file or a stream such as stdin. + pub fn subject_builder(&self) -> SubjectBuilder { + let mut builder = SubjectBuilder::new(); + builder + .strip_dot_prefix(self.using_default_path()) + .skip(self.matches().stdout_handle()); + builder + } + + /// Execute the given function with a writer to stdout that enables color + /// support based on the command line configuration. + pub fn stdout(&self) -> Box { + let color_choice = self.matches().color_choice(); + if atty::is(atty::Stream::Stdout) { + Box::new(StandardStream::stdout(color_choice)) + } else { + Box::new(BufferedStandardStream::stdout(color_choice)) + } + } + + /// Return the type definitions compiled into ripgrep. + /// + /// If there was a problem reading and parsing the type definitions, then + /// this returns an error. + pub fn type_defs(&self) -> Result> { + Ok(self.matches().types()?.definitions().to_vec()) + } + + /// Return a walker that never uses additional threads. + pub fn walker(&self) -> Result { + Ok(self.matches().walker_builder(self.paths())?.build()) + } + + /// Return a walker that never uses additional threads. + pub fn walker_parallel(&self) -> Result { + Ok(self.matches().walker_builder(self.paths())?.build_parallel()) + } +} + +/// `ArgMatches` wraps `clap::ArgMatches` and provides semantic meaning to +/// the parsed arguments. +#[derive(Clone, Debug)] +struct ArgMatches(clap::ArgMatches<'static>); + +/// The output format. Generally, this corresponds to the printer that ripgrep +/// uses to show search results. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum OutputKind { + /// Classic grep-like or ack-like format. + Standard, + /// Show matching files and possibly the number of matches in each file. + Summary, + /// Emit match information in the JSON Lines format. + JSON, +} + +impl ArgMatches { + /// Create an ArgMatches from clap's parse result. + fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches { + ArgMatches(clap_matches) + } + + /// Run clap and return the matches using a config file if present. If clap + /// determines a problem with the user provided arguments (or if --help or + /// --version are given), then an error/usage/version will be printed and + /// the process will exit. /// /// If there are no additional arguments from the environment (e.g., a /// config file), then the given matches are returned as is. - fn matches(early_matches: ArgMatches<'static>) -> ArgMatches<'static> { + fn reconfigure(self) -> ArgMatches { // If the end user says no config, then respect it. - if early_matches.is_present("no-config") { + if self.is_present("no-config") { debug!("not reading config files because --no-config is present"); - return early_matches; + return self; } // If the user wants ripgrep to use a config file, then parse args // from that first. - let mut args = config::args(early_matches.is_present("no-messages")); + let mut args = config::args(); if args.is_empty() { - return early_matches; + return self; } let mut cliargs = env::args_os(); if let Some(bin) = cliargs.next() { @@ -147,672 +391,360 @@ impl Args { } args.extend(cliargs); debug!("final argv: {:?}", args); - ArgMatches(app::app().get_matches_from(args)) + ArgMatches::new(app::app().get_matches_from(args)) } - /// Returns true if ripgrep should print the files it will search and exit - /// (but not do any actual searching). - pub fn files(&self) -> bool { - self.files - } - - /// Create a new line based matcher. The matcher returned can be used - /// across multiple threads simultaneously. This matcher only supports - /// basic searching of regular expressions in a single buffer. - /// - /// The pattern and other flags are taken from the command line. - pub fn grep(&self) -> Grep { - self.grep.clone() - } - - /// Whether ripgrep should be quiet or not. - pub fn quiet(&self) -> bool { - self.quiet - } - - /// Returns a thread safe boolean for determining whether to quit a search - /// early when quiet mode is enabled. - /// - /// If quiet mode is disabled, then QuietMatched.has_match always returns - /// false. - pub fn quiet_matched(&self) -> QuietMatched { - self.quiet_matched.clone() - } - - /// Create a new printer of individual search results that writes to the - /// writer given. - pub fn printer(&self, wtr: W) -> Printer { - let mut p = Printer::new(wtr) - .colors(self.colors.clone()) - .column(self.column) - .context_separator(self.context_separator.clone()) - .eol(self.eol) - .heading(self.heading) - .line_per_match(self.line_per_match) - .null(self.null) - .only_matching(self.only_matching) - .path_separator(self.path_separator) - .with_filename(self.with_filename) - .max_columns(self.max_columns); - if let Some(ref rep) = self.replace { - p = p.replace(rep.clone()); - } - p - } - - /// Retrieve the configured file separator. - pub fn file_separator(&self) -> Option> { - let contextless = - self.count - || self.count_matches - || self.files_with_matches - || self.files_without_matches; - let use_heading_sep = self.heading && !contextless; - - if use_heading_sep { - Some(b"".to_vec()) - } else if !contextless - && (self.before_context > 0 || self.after_context > 0) { - Some(self.context_separator.clone()) - } else { - None - } - } - - /// Returns true if the given arguments are known to never produce a match. - pub fn never_match(&self) -> bool { - !self.can_match || self.max_count == Some(0) - } - - /// Returns whether ripgrep should track stats for this run - pub fn stats(&self) -> bool { - self.stats - } - - /// Create a new writer for single-threaded searching with color support. - pub fn stdout(&self) -> Box { - if atty::is(atty::Stream::Stdout) { - Box::new(termcolor::StandardStream::stdout(self.color_choice)) - } else { - Box::new( - termcolor::BufferedStandardStream::stdout(self.color_choice)) - } - } - - /// Returns a handle to stdout for filtering search. - /// - /// A handle is returned if and only if ripgrep's stdout is being - /// redirected to a file. The handle returned corresponds to that file. - /// - /// This can be used to ensure that we do not attempt to search a file - /// that ripgrep is writing to. - pub fn stdout_handle(&self) -> Option<&same_file::Handle> { - self.stdout_handle.as_ref() - } - - /// Create a new buffer writer for multi-threaded searching with color - /// support. - pub fn buffer_writer(&self) -> termcolor::BufferWriter { - let mut wtr = termcolor::BufferWriter::stdout(self.color_choice); - wtr.separator(self.file_separator()); - wtr - } - - /// Return the paths that should be searched. - pub fn paths(&self) -> &[PathBuf] { - &self.paths - } - - /// Returns true if there is exactly one file path given to search. - pub fn is_one_path(&self) -> bool { - self.paths.len() == 1 - && (self.paths[0] == Path::new("-") || path_is_file(&self.paths[0])) - } - - /// Create a worker whose configuration is taken from the - /// command line. - pub fn worker(&self) -> Worker { - WorkerBuilder::new(self.grep()) - .after_context(self.after_context) - .before_context(self.before_context) - .byte_offset(self.byte_offset) - .count(self.count) - .count_matches(self.count_matches) - .encoding(self.encoding) - .files_with_matches(self.files_with_matches) - .files_without_matches(self.files_without_matches) - .eol(self.eol) - .line_number(self.line_number) - .invert_match(self.invert_match) - .max_count(self.max_count) - .mmap(self.mmap) - .no_messages(self.no_messages) - .quiet(self.quiet) - .text(self.text) - .search_zip_files(self.search_zip_files) - .preprocessor(self.preprocessor.clone()) - .build() - } - - /// Returns the number of worker search threads that should be used. - pub fn threads(&self) -> usize { - self.threads - } - - /// Returns a list of type definitions currently loaded. - pub fn type_defs(&self) -> &[FileTypeDef] { - self.types.definitions() - } - - /// Returns true if ripgrep should print the type definitions currently - /// loaded and then exit. - pub fn type_list(&self) -> bool { - self.type_list - } - - /// Returns true if error messages should be suppressed. - pub fn no_messages(&self) -> bool { - self.no_messages - } - - /// Returns true if error messages associated with parsing .ignore or - /// .gitignore files should be suppressed. - pub fn no_ignore_messages(&self) -> bool { - self.no_ignore_messages - } - - /// Create a new recursive directory iterator over the paths in argv. - pub fn walker(&self) -> ignore::Walk { - self.walker_builder().build() - } - - /// Create a new parallel recursive directory iterator over the paths - /// in argv. - pub fn walker_parallel(&self) -> ignore::WalkParallel { - self.walker_builder().build_parallel() - } - - fn walker_builder(&self) -> ignore::WalkBuilder { - let paths = self.paths(); - let mut wd = ignore::WalkBuilder::new(&paths[0]); - for path in &paths[1..] { - wd.add(path); - } - for path in &self.ignore_files { - if let Some(err) = wd.add_ignore(path) { - if !self.no_messages && !self.no_ignore_messages { - eprintln!("{}", err); - } - } - } - - wd.follow_links(self.follow); - wd.hidden(!self.hidden); - wd.max_depth(self.max_depth); - wd.max_filesize(self.max_filesize); - wd.overrides(self.glob_overrides.clone()); - wd.types(self.types.clone()); - wd.git_global( - !self.no_ignore && !self.no_ignore_vcs && !self.no_ignore_global - ); - wd.git_ignore(!self.no_ignore && !self.no_ignore_vcs); - wd.git_exclude(!self.no_ignore && !self.no_ignore_vcs); - wd.ignore(!self.no_ignore); - if !self.no_ignore { - wd.add_custom_ignore_filename(".rgignore"); - } - wd.parents(!self.no_ignore_parent); - wd.threads(self.threads()); - if self.sort_files { - wd.sort_by_file_name(|a, b| a.cmp(b)); - } - wd + /// Convert the result of parsing CLI arguments into ripgrep's higher level + /// configuration structure. + fn to_args(self) -> Result { + // We compute these once since they could be large. + let patterns = self.patterns()?; + let matcher = self.matcher(&patterns)?; + let mut paths = self.paths(); + let using_default_path = + if paths.is_empty() { + paths.push(self.path_default()); + true + } else { + false + }; + Ok(Args(Arc::new(ArgsImp { + matches: self, + patterns: patterns, + matcher: matcher, + paths: paths, + using_default_path: using_default_path, + }))) } } -/// `ArgMatches` wraps `clap::ArgMatches` and provides semantic meaning to -/// several options/flags. -struct ArgMatches<'a>(clap::ArgMatches<'a>); - -impl<'a> ArgMatches<'a> { - /// Convert the result of parsing CLI arguments into ripgrep's - /// configuration. - fn to_args(&self) -> Result { - let paths = self.paths(); - let line_number = self.line_number(&paths); - let mmap = self.mmap(&paths)?; - let with_filename = self.with_filename(&paths); - let (before_context, after_context) = self.contexts()?; - let (count, count_matches) = self.counts(); - let quiet = self.is_present("quiet"); - let (grep, can_match) = self.grep()?; - let args = Args { - paths: paths, - after_context: after_context, - before_context: before_context, - byte_offset: self.is_present("byte-offset"), - can_match: can_match, - color_choice: self.color_choice(), - colors: self.color_specs()?, - column: self.column(), - context_separator: self.context_separator(), - count: count, - count_matches: count_matches, - encoding: self.encoding()?, - files_with_matches: self.is_present("files-with-matches"), - files_without_matches: self.is_present("files-without-match"), - eol: b'\n', - files: self.is_present("files"), - follow: self.is_present("follow"), - glob_overrides: self.overrides()?, - grep: grep, - heading: self.heading(), - hidden: self.hidden(), - ignore_files: self.ignore_files(), - invert_match: self.is_present("invert-match"), - line_number: line_number, - line_per_match: self.is_present("vimgrep"), - max_columns: self.usize_of_nonzero("max-columns")?, - max_count: self.usize_of("max-count")?.map(|n| n as u64), - max_depth: self.usize_of("max-depth")?, - max_filesize: self.max_filesize()?, - mmap: mmap, - no_ignore: self.no_ignore(), - no_ignore_global: self.no_ignore_global(), - no_ignore_messages: self.is_present("no-ignore-messages"), - no_ignore_parent: self.no_ignore_parent(), - no_ignore_vcs: self.no_ignore_vcs(), - no_messages: self.is_present("no-messages"), - null: self.is_present("null"), - only_matching: self.is_present("only-matching"), - path_separator: self.path_separator()?, - quiet: quiet, - quiet_matched: QuietMatched::new(quiet), - replace: self.replace(), - sort_files: self.is_present("sort-files"), - stdout_handle: self.stdout_handle(), - text: self.text(), - threads: self.threads()?, - type_list: self.is_present("type-list"), - types: self.types()?, - with_filename: with_filename, - search_zip_files: self.is_present("search-zip"), - preprocessor: self.preprocessor(), - stats: self.stats() - }; - if args.mmap { - debug!("will try to use memory maps"); - } - Ok(args) - } - - /// Return all file paths that ripgrep should search. - fn paths(&self) -> Vec { - let mut paths: Vec = match self.values_of_os("path") { - None => vec![], - Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(), - }; - // If --file, --files or --regexp is given, then the first path is - // always in `pattern`. - if self.is_present("file") - || self.is_present("files") - || self.is_present("regexp") { - if let Some(path) = self.value_of_os("pattern") { - paths.insert(0, Path::new(path).to_path_buf()); - } - } - if paths.is_empty() { - paths.push(self.default_path()); - } - paths - } - - /// Return the default path that ripgrep should search. - fn default_path(&self) -> PathBuf { - let file_is_stdin = - self.values_of_os("file").map_or(false, |mut files| { - files.any(|f| f == "-") - }); - let search_cwd = atty::is(atty::Stream::Stdin) - || !stdin_is_readable() - || (self.is_present("file") && file_is_stdin) - || self.is_present("files") - || self.is_present("type-list"); - if search_cwd { - Path::new("./").to_path_buf() +/// High level routines for converting command line arguments into various +/// data structures used by ripgrep. +/// +/// Methods are sorted alphabetically. +impl ArgMatches { + /// Return the matcher that should be used for searching. + /// + /// If there was a problem building the matcher (e.g., a syntax error), + /// then this returns an error. + #[cfg(feature = "pcre2")] + fn matcher(&self, patterns: &[String]) -> Result { + if self.is_present("pcre2") { + let matcher = self.matcher_pcre2(patterns)?; + Ok(PatternMatcher::PCRE2(matcher)) } else { - Path::new("-").to_path_buf() - } - } - - /// Return all of the ignore files given on the command line. - fn ignore_files(&self) -> Vec { - match self.values_of_os("ignore-file") { - None => return vec![], - Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(), - } - } - - /// Get a sequence of all available patterns from the command line. - /// This includes reading the -e/--regexp and -f/--file flags. - /// - /// Note that if -F/--fixed-strings is set, then all patterns will be - /// escaped. Similarly, if -w/--word-regexp is set, then all patterns - /// are surrounded by `\b`, and if -x/--line-regexp is set, then all - /// patterns are surrounded by `^...$`. Finally, if --passthru is set, - /// the pattern `^` is added to the end (to ensure that it works as - /// expected with multiple -e/-f patterns). - /// - /// If any pattern is invalid UTF-8, then an error is returned. - fn patterns(&self) -> Result> { - if self.is_present("files") || self.is_present("type-list") { - return Ok(vec![self.empty_pattern()]); - } - let mut pats = vec![]; - match self.values_of_os("regexp") { - None => { - if self.values_of_os("file").is_none() { - if let Some(os_pat) = self.value_of_os("pattern") { - pats.push(self.os_str_pattern(os_pat)?); - } + let matcher = match self.matcher_rust(patterns) { + Ok(matcher) => matcher, + Err(err) => { + return Err(From::from(suggest_pcre2(err.to_string()))); } + }; + Ok(PatternMatcher::RustRegex(matcher)) + } + } + + /// Return the matcher that should be used for searching. + /// + /// If there was a problem building the matcher (e.g., a syntax error), + /// then this returns an error. + #[cfg(not(feature = "pcre2"))] + fn matcher(&self, patterns: &[String]) -> Result { + if self.is_present("pcre2") { + return Err(From::from( + "PCRE2 is not available in this build of ripgrep", + )); + } + let matcher = self.matcher_rust(patterns)?; + Ok(PatternMatcher::RustRegex(matcher)) + } + + /// Build a matcher using Rust's regex engine. + /// + /// If there was a problem building the matcher (such as a regex syntax + /// error), then an error is returned. + fn matcher_rust(&self, patterns: &[String]) -> Result { + let mut builder = RustRegexMatcherBuilder::new(); + builder + .case_smart(self.case_smart()) + .case_insensitive(self.case_insensitive()) + .multi_line(true) + .unicode(true) + .octal(false) + .word(self.is_present("word-regexp")); + if self.is_present("multiline") { + builder.dot_matches_new_line(self.is_present("multiline-dotall")); + if self.is_present("crlf") { + builder + .crlf(true) + .line_terminator(None); } - Some(os_pats) => { - for os_pat in os_pats { - pats.push(self.os_str_pattern(os_pat)?); + } else { + builder + .line_terminator(Some(b'\n')) + .dot_matches_new_line(false); + if self.is_present("crlf") { + builder.crlf(true); + } + // We don't need to set this in multiline mode since mulitline + // matchers don't use optimizations related to line terminators. + // Moreover, a mulitline regex used with --null-data should + // be allowed to match NUL bytes explicitly, which this would + // otherwise forbid. + if self.is_present("null-data") { + builder.line_terminator(Some(b'\x00')); + } + } + if let Some(limit) = self.regex_size_limit()? { + builder.size_limit(limit); + } + if let Some(limit) = self.dfa_size_limit()? { + builder.dfa_size_limit(limit); + } + Ok(builder.build(&patterns.join("|"))?) + } + + /// Build a matcher using PCRE2. + /// + /// If there was a problem building the matcher (such as a regex syntax + /// error), then an error is returned. + #[cfg(feature = "pcre2")] + fn matcher_pcre2(&self, patterns: &[String]) -> Result { + let mut builder = PCRE2RegexMatcherBuilder::new(); + builder + .case_smart(self.case_smart()) + .caseless(self.case_insensitive()) + .multi_line(true) + .word(self.is_present("word-regexp")); + // For whatever reason, the JIT craps out during compilation with a + // "no more memory" error on 32 bit systems. So don't use it there. + if !cfg!(target_pointer_width = "32") { + builder.jit(true); + } + if self.pcre2_unicode() { + builder.utf(true).ucp(true); + if self.encoding()?.is_some() { + // SAFETY: If an encoding was specified, then we're guaranteed + // to get valid UTF-8, so we can disable PCRE2's UTF checking. + // (Feeding invalid UTF-8 to PCRE2 is UB.) + unsafe { + builder.disable_utf_check(); } } } - if let Some(files) = self.values_of_os("file") { - for file in files { - if file == "-" { - let stdin = io::stdin(); - for line in stdin.lock().lines() { - pats.push(self.str_pattern(&line?)); - } - } else { - let f = fs::File::open(file)?; - for line in io::BufReader::new(f).lines() { - pats.push(self.str_pattern(&line?)); - } - } - } + if self.is_present("multiline") { + builder.dotall(self.is_present("multiline-dotall")); } - // It's important that this be at the end; otherwise it would always - // match first, and we wouldn't get colours in the output - if self.is_present("passthru") && !self.is_present("count") { - pats.push("^".to_string()) + if self.is_present("crlf") { + builder.crlf(true); } - Ok(pats) + Ok(builder.build(&patterns.join("|"))?) } - /// Converts an OsStr pattern to a String pattern, including line/word - /// boundaries or escapes if applicable. + /// Build a JSON printer that writes results to the given writer. + fn printer_json(&self, wtr: W) -> Result> { + let mut builder = JSONBuilder::new(); + builder + .pretty(false) + .max_matches(self.max_count()?) + .always_begin_end(false); + Ok(builder.build(wtr)) + } + + /// Build a Standard printer that writes results to the given writer. /// - /// If the pattern is not valid UTF-8, then an error is returned. - fn os_str_pattern(&self, pat: &OsStr) -> Result { - let s = pattern_to_str(pat)?; - Ok(self.str_pattern(s)) - } - - /// Converts a &str pattern to a String pattern, including line/word - /// boundaries or escapes if applicable. - fn str_pattern(&self, pat: &str) -> String { - let litpat = self.literal_pattern(pat.to_string()); - let s = self.line_pattern(self.word_pattern(litpat)); - - if s.is_empty() { - self.empty_pattern() - } else { - s - } - } - - /// Returns the given pattern as a literal pattern if the - /// -F/--fixed-strings flag is set. Otherwise, the pattern is returned - /// unchanged. - fn literal_pattern(&self, pat: String) -> String { - if self.is_present("fixed-strings") { - regex::escape(&pat) - } else { - pat - } - } - - /// Returns the given pattern as a word pattern if the -w/--word-regexp - /// flag is set. Otherwise, the pattern is returned unchanged. - fn word_pattern(&self, pat: String) -> String { - if self.is_present("word-regexp") { - format!(r"\b(?:{})\b", pat) - } else { - pat - } - } - - /// Returns the given pattern as a line pattern if the -x/--line-regexp - /// flag is set. Otherwise, the pattern is returned unchanged. - fn line_pattern(&self, pat: String) -> String { - if self.is_present("line-regexp") { - format!(r"^(?:{})$", pat) - } else { - pat - } - } - - /// Empty pattern returns a pattern that is guaranteed to produce an empty - /// regular expression that is valid in any position. - fn empty_pattern(&self) -> String { - // This would normally just be an empty string, which works on its - // own, but if the patterns are joined in a set of alternations, then - // you wind up with `foo|`, which is invalid. - self.word_pattern("(?:z{0})*".to_string()) - } - - /// Returns true if and only if file names containing each match should - /// be emitted. + /// The given paths are used to configure aspects of the printer. /// - /// `paths` should be a slice of all top-level file paths that ripgrep - /// will need to search. - fn with_filename(&self, paths: &[PathBuf]) -> bool { - if self.is_present("no-filename") { - false - } else { - self.is_present("with-filename") - || self.is_present("vimgrep") - || paths.len() > 1 - || paths.get(0).map_or(false, |p| path_is_dir(p)) - } - } - - /// Returns a handle to stdout for filtering search. + /// If `separator_search` is true, then the returned printer will assume + /// the responsibility of printing a separator between each set of + /// search results, when appropriate (e.g., when contexts are enabled). + /// When it's set to false, the caller is responsible for handling + /// separators. /// - /// A handle is returned if and only if ripgrep's stdout is being - /// redirected to a file. The handle returned corresponds to that file. + /// In practice, we want the printer to handle it in the single threaded + /// case but not in the multi-threaded case. + fn printer_standard( + &self, + paths: &[PathBuf], + wtr: W, + separator_search: bool, + ) -> Result> { + let mut builder = StandardBuilder::new(); + builder + .color_specs(self.color_specs()?) + .stats(self.stats()) + .heading(self.heading()) + .path(self.with_filename(paths)) + .only_matching(self.is_present("only-matching")) + .per_match(self.is_present("vimgrep")) + .replacement(self.replacement()) + .max_columns(self.max_columns()?) + .max_matches(self.max_count()?) + .column(self.column()) + .byte_offset(self.is_present("byte-offset")) + .trim_ascii(self.is_present("trim")) + .separator_search(None) + .separator_context(Some(self.context_separator())) + .separator_field_match(b":".to_vec()) + .separator_field_context(b"-".to_vec()) + .separator_path(self.path_separator()?) + .path_terminator(self.path_terminator()); + if separator_search { + builder.separator_search(self.file_separator()?); + } + Ok(builder.build(wtr)) + } + + /// Build a Summary printer that writes results to the given writer. /// - /// This can be used to ensure that we do not attempt to search a file - /// that ripgrep is writing to. - fn stdout_handle(&self) -> Option { - let h = match same_file::Handle::stdout() { - Err(_) => return None, - Ok(h) => h, - }; - let md = match h.as_file().metadata() { - Err(_) => return None, - Ok(md) => md, - }; - if !md.is_file() { - return None; - } - Some(h) - } - - /// Returns true if and only if memory map searching should be tried. + /// The given paths are used to configure aspects of the printer. /// - /// `paths` should be a slice of all top-level file paths that ripgrep - /// will need to search. - fn mmap(&self, paths: &[PathBuf]) -> Result { - let (before, after) = self.contexts()?; - let enc = self.encoding()?; - Ok(if before > 0 || after > 0 || self.is_present("no-mmap") { - false - } else if self.is_present("mmap") { - true - } else if cfg!(target_os = "macos") { - // On Mac, memory maps appear to suck. Neat. - false - } else if enc.is_some() { - // There's no practical way to transcode a memory map that isn't - // isomorphic to searching over io::Read. - false - } else { - // If we're only searching a few paths and all of them are - // files, then memory maps are probably faster. - paths.len() <= 10 && paths.iter().all(|p| path_is_file(p)) - }) + /// This panics if the output format is not `OutputKind::Summary`. + fn printer_summary( + &self, + paths: &[PathBuf], + wtr: W, + ) -> Result> { + let mut builder = SummaryBuilder::new(); + builder + .kind(self.summary_kind().expect("summary format")) + .color_specs(self.color_specs()?) + .stats(self.stats()) + .path(self.with_filename(paths)) + .max_matches(self.max_count()?) + .separator_field(b":".to_vec()) + .separator_path(self.path_separator()?) + .path_terminator(self.path_terminator()); + Ok(builder.build(wtr)) } - /// Returns true if and only if line numbers should be shown. - fn line_number(&self, paths: &[PathBuf]) -> bool { - if self.is_present("no-line-number") || self.is_present("count") { - false - } else { - let only_stdin = paths == [Path::new("-")]; - (atty::is(atty::Stream::Stdout) && !only_stdin) - || self.is_present("line-number") - || self.is_present("column") - || self.is_present("pretty") - || self.is_present("vimgrep") - } - } - - /// Returns true if and only if column numbers should be shown. - fn column(&self) -> bool { - if self.is_present("no-column") { - return false; - } - self.is_present("column") || self.is_present("vimgrep") - } - - /// Returns true if and only if matches should be grouped with file name - /// headings. - fn heading(&self) -> bool { - if self.is_present("no-heading") || self.is_present("vimgrep") { - false - } else { - atty::is(atty::Stream::Stdout) - || self.is_present("heading") - || self.is_present("pretty") - } - } - - /// Returns the replacement string as UTF-8 bytes if it exists. - fn replace(&self) -> Option> { - self.value_of_lossy("replace").map(|s| s.into_bytes()) - } - - /// Returns the unescaped context separator in UTF-8 bytes. - fn context_separator(&self) -> Vec { - match self.value_of_lossy("context-separator") { - None => b"--".to_vec(), - Some(sep) => unescape(&sep), - } - } - - /// Returns the preprocessor command - fn preprocessor(&self) -> Option { - if let Some(path) = self.value_of_os("pre") { - if path.is_empty() { - None + /// Build a searcher from the command line parameters. + fn searcher(&self, paths: &[PathBuf]) -> Result { + let (ctx_before, ctx_after) = self.contexts()?; + let line_term = + if self.is_present("crlf") { + LineTerminator::crlf() + } else if self.is_present("null-data") { + LineTerminator::byte(b'\x00') } else { - Some(Path::new(path).to_path_buf()) - } - } else { - None - } + LineTerminator::byte(b'\n') + }; + let mut builder = SearcherBuilder::new(); + builder + .line_terminator(line_term) + .invert_match(self.is_present("invert-match")) + .line_number(self.line_number(paths)) + .multi_line(self.is_present("multiline")) + .before_context(ctx_before) + .after_context(ctx_after) + .passthru(self.is_present("passthru")) + .memory_map(self.mmap_choice(paths)) + .binary_detection(self.binary_detection()) + .encoding(self.encoding()?); + Ok(builder.build()) } - /// Returns the unescaped path separator in UTF-8 bytes. - fn path_separator(&self) -> Result> { - match self.value_of_lossy("path-separator") { - None => Ok(None), - Some(sep) => { - let sep = unescape(&sep); - if sep.is_empty() { - Ok(None) - } else if sep.len() > 1 { - Err(From::from(format!( - "A path separator must be exactly one byte, but \ - the given separator is {} bytes: {}\n\ - In some shells on Windows '/' is automatically \ - expanded. Use '//' instead.", - sep.len(), - escape(&sep), - ))) - } else { - Ok(Some(sep[0])) - } + /// Return a builder for recursively traversing a directory while + /// respecting ignore rules. + /// + /// If there was a problem parsing the CLI arguments necessary for + /// constructing the builder, then this returns an error. + fn walker_builder(&self, paths: &[PathBuf]) -> Result { + let mut builder = WalkBuilder::new(&paths[0]); + for path in &paths[1..] { + builder.add(path); + } + for path in self.ignore_paths() { + if let Some(err) = builder.add_ignore(path) { + ignore_message!("{}", err); } } - } - - /// Returns the before and after contexts from the command line. - /// - /// If a context setting was absent, then `0` is returned. - /// - /// If there was a problem parsing the values from the user as an integer, - /// then an error is returned. - fn contexts(&self) -> Result<(usize, usize)> { - let after = self.usize_of("after-context")?.unwrap_or(0); - let before = self.usize_of("before-context")?.unwrap_or(0); - let both = self.usize_of("context")?.unwrap_or(0); - Ok(if both > 0 { - (both, both) - } else { - (before, after) - }) - } - - /// Returns whether the -c/--count or the --count-matches flags were - /// passed from the command line. - /// - /// If --count-matches and --invert-match were passed in, behave - /// as if --count and --invert-match were passed in (i.e. rg will - /// count inverted matches as per existing behavior). - fn counts(&self) -> (bool, bool) { - let count = self.is_present("count"); - let count_matches = self.is_present("count-matches"); - let invert_matches = self.is_present("invert-match"); - let only_matching = self.is_present("only-matching"); - if count_matches && invert_matches { - // Treat `-v --count-matches` as `-v -c`. - (true, false) - } else if count && only_matching { - // Treat `-c --only-matching` as `--count-matches`. - (false, true) - } else { - (count, count_matches) + builder + .max_depth(self.usize_of("max-depth")?) + .follow_links(self.is_present("follow")) + .max_filesize(self.max_file_size()?) + .threads(self.threads()?) + .overrides(self.overrides()?) + .types(self.types()?) + .hidden(!self.hidden()) + .parents(!self.no_ignore_parent()) + .ignore(!self.no_ignore()) + .git_global( + !self.no_ignore() + && !self.no_ignore_vcs() + && !self.no_ignore_global()) + .git_ignore(!self.no_ignore() && !self.no_ignore_vcs()) + .git_exclude(!self.no_ignore() && !self.no_ignore_vcs()); + if !self.no_ignore() { + builder.add_custom_ignore_filename(".rgignore"); } + if self.is_present("sort-files") { + builder.sort_by_file_name(|a, b| a.cmp(b)); + } + Ok(builder) + } +} + +/// Mid level routines for converting command line arguments into various types +/// of data structures. +/// +/// Methods are sorted alphabetically. +impl ArgMatches { + /// Returns the form of binary detection to perform. + fn binary_detection(&self) -> BinaryDetection { + let none = + self.is_present("text") + || self.unrestricted_count() >= 3 + || self.is_present("null-data"); + if none { + BinaryDetection::none() + } else { + BinaryDetection::quit(b'\x00') + } + } + + /// Returns true if the command line configuration implies that a match + /// can never be shown. + fn can_never_match(&self, patterns: &[String]) -> bool { + patterns.is_empty() || self.max_count().ok() == Some(Some(0)) + } + + /// Returns true if and only if case should be ignore. + /// + /// If --case-sensitive is present, then case is never ignored, even if + /// --ignore-case is present. + fn case_insensitive(&self) -> bool { + self.is_present("ignore-case") && !self.is_present("case-sensitive") + } + + /// Returns true if and only if smart case has been enabled. + /// + /// If either --ignore-case of --case-sensitive are present, then smart + /// case is disabled. + fn case_smart(&self) -> bool { + self.is_present("smart-case") + && !self.is_present("ignore-case") + && !self.is_present("case-sensitive") } /// Returns the user's color choice based on command line parameters and /// environment. - fn color_choice(&self) -> termcolor::ColorChoice { + fn color_choice(&self) -> ColorChoice { let preference = match self.value_of_lossy("color") { None => "auto".to_string(), Some(v) => v, }; if preference == "always" { - termcolor::ColorChoice::Always + ColorChoice::Always } else if preference == "ansi" { - termcolor::ColorChoice::AlwaysAnsi + ColorChoice::AlwaysAnsi } else if preference == "auto" { if atty::is(atty::Stream::Stdout) || self.is_present("pretty") { - termcolor::ColorChoice::Auto + ColorChoice::Auto } else { - termcolor::ColorChoice::Never + ColorChoice::Never } } else { - termcolor::ColorChoice::Never + ColorChoice::Never } } @@ -837,184 +769,216 @@ impl<'a> ArgMatches<'a> { Ok(ColorSpecs::new(&specs)) } - /// Return the text encoding specified. - /// - /// If the label given by the caller doesn't correspond to a valid - /// supported encoding (and isn't `auto`), then return an error. - /// - /// A `None` encoding implies that the encoding should be automatically - /// detected on a per-file basis. - fn encoding(&self) -> Result> { - match self.value_of_lossy("encoding") { - None => Ok(None), - Some(label) => { - if label == "auto" { - return Ok(None); - } - match Encoding::for_label_no_replacement(label.as_bytes()) { - Some(enc) => Ok(Some(enc)), - None => Err(From::from( - format!("unsupported encoding: {}", label))), - } - } + /// Returns true if and only if column numbers should be shown. + fn column(&self) -> bool { + if self.is_present("no-column") { + return false; } + self.is_present("column") || self.is_present("vimgrep") } - /// Returns whether status should be tracked for this run of ripgrep - - /// This is automatically disabled if we're asked to only list the - /// files that wil be searched, files with matches or files - /// without matches. - fn stats(&self) -> bool { - if self.is_present("files-with-matches") || - self.is_present("files-without-match") { - return false; - } - self.is_present("stats") - } - - /// Returns the approximate number of threads that ripgrep should use. - fn threads(&self) -> Result { - if self.is_present("sort-files") { - return Ok(1); - } - let threads = self.usize_of("threads")?.unwrap_or(0); - Ok(if threads == 0 { - cmp::min(12, num_cpus::get()) + /// Returns the before and after contexts from the command line. + /// + /// If a context setting was absent, then `0` is returned. + /// + /// If there was a problem parsing the values from the user as an integer, + /// then an error is returned. + fn contexts(&self) -> Result<(usize, usize)> { + let after = self.usize_of("after-context")?.unwrap_or(0); + let before = self.usize_of("before-context")?.unwrap_or(0); + let both = self.usize_of("context")?.unwrap_or(0); + Ok(if both > 0 { + (both, both) } else { - threads + (before, after) }) } - /// Builds a grep matcher from the command line flags. + /// Returns the unescaped context separator in UTF-8 bytes. /// - /// If there was a problem extracting the pattern from the command line - /// flags, then an error is returned. - /// - /// If no match can ever occur, then `false` is returned. Otherwise, - /// `true` is returned. - fn grep(&self) -> Result<(Grep, bool)> { - let smart = - self.is_present("smart-case") - && !self.is_present("ignore-case") - && !self.is_present("case-sensitive"); - let casei = - self.is_present("ignore-case") - && !self.is_present("case-sensitive"); - let pats = self.patterns()?; - let ok = !pats.is_empty(); - let mut gb = GrepBuilder::new(&pats.join("|")) - .case_smart(smart) - .case_insensitive(casei) - .line_terminator(b'\n'); - - if let Some(limit) = self.dfa_size_limit()? { - gb = gb.dfa_size_limit(limit); + /// If one was not provided, the default `--` is returned. + fn context_separator(&self) -> Vec { + match self.value_of_lossy("context-separator") { + None => b"--".to_vec(), + Some(sep) => unescape(&sep), } - if let Some(limit) = self.regex_size_limit()? { - gb = gb.size_limit(limit); - } - Ok((gb.build()?, ok)) } - /// Builds the set of glob overrides from the command line flags. - fn overrides(&self) -> Result { - let mut ovr = OverrideBuilder::new(env::current_dir()?); - for glob in self.values_of_lossy_vec("glob") { - ovr.add(&glob)?; - } - // this is smelly. In the long run it might make sense - // to change overridebuilder to be like globsetbuilder - // but this would be a breaking change to the ignore crate - // so it is being shelved for now... - ovr.case_insensitive(true)?; - for glob in self.values_of_lossy_vec("iglob") { - ovr.add(&glob)?; - } - ovr.build().map_err(From::from) - } - - /// Builds a file type matcher from the command line flags. - fn types(&self) -> Result { - let mut btypes = TypesBuilder::new(); - btypes.add_defaults(); - for ty in self.values_of_lossy_vec("type-clear") { - btypes.clear(&ty); - } - for def in self.values_of_lossy_vec("type-add") { - btypes.add_def(&def)?; - } - for ty in self.values_of_lossy_vec("type") { - btypes.select(&ty); - } - for ty in self.values_of_lossy_vec("type-not") { - btypes.negate(&ty); - } - btypes.build().map_err(From::from) - } - - /// Parses an argument of the form `[0-9]+(KMG)?`. + /// Returns whether the -c/--count or the --count-matches flags were + /// passed from the command line. /// - /// This always returns the result as a type `u64`. This must be converted - /// to the appropriate type by the caller. - fn parse_human_readable_size_arg( - &self, - arg_name: &str, - ) -> Result> { - let arg_value = match self.value_of_lossy(arg_name) { - Some(x) => x, - None => return Ok(None) - }; - let re = regex::Regex::new("^([0-9]+)([KMG])?$").unwrap(); - let caps = - re.captures(&arg_value).ok_or_else(|| { - format!("invalid format for {}", arg_name) - })?; - - let value = caps[1].parse::()?; - let suffix = caps.get(2).map(|x| x.as_str()); - - let v_10 = value.checked_mul(1024); - let v_20 = v_10.and_then(|x| x.checked_mul(1024)); - let v_30 = v_20.and_then(|x| x.checked_mul(1024)); - - let try_suffix = |x: Option| { - if x.is_some() { - Ok(x) - } else { - Err(From::from(format!("number too large for {}", arg_name))) - } - }; - match suffix { - None => Ok(Some(value)), - Some("K") => try_suffix(v_10), - Some("M") => try_suffix(v_20), - Some("G") => try_suffix(v_30), - _ => Err(From::from(format!("invalid suffix for {}", arg_name))) + /// If --count-matches and --invert-match were passed in, behave + /// as if --count and --invert-match were passed in (i.e. rg will + /// count inverted matches as per existing behavior). + fn counts(&self) -> (bool, bool) { + let count = self.is_present("count"); + let count_matches = self.is_present("count-matches"); + let invert_matches = self.is_present("invert-match"); + let only_matching = self.is_present("only-matching"); + if count_matches && invert_matches { + // Treat `-v --count-matches` as `-v -c`. + (true, false) + } else if count && only_matching { + // Treat `-c --only-matching` as `--count-matches`. + (false, true) + } else { + (count, count_matches) } } /// Parse the dfa-size-limit argument option into a byte count. fn dfa_size_limit(&self) -> Result> { - let r = self.parse_human_readable_size_arg("dfa-size-limit")?; - human_readable_to_usize("dfa-size-limit", r) + let r = self.parse_human_readable_size("dfa-size-limit")?; + u64_to_usize("dfa-size-limit", r) } - /// Parse the regex-size-limit argument option into a byte count. - fn regex_size_limit(&self) -> Result> { - let r = self.parse_human_readable_size_arg("regex-size-limit")?; - human_readable_to_usize("regex-size-limit", r) + /// Returns the type of encoding to use. + /// + /// This only returns an encoding if one is explicitly specified. When no + /// encoding is present, the Searcher will still do BOM sniffing for UTF-16 + /// and transcode seamlessly. + fn encoding(&self) -> Result> { + if self.is_present("no-encoding") { + return Ok(None); + } + let label = match self.value_of_lossy("encoding") { + None if self.pcre2_unicode() => "utf-8".to_string(), + None => return Ok(None), + Some(label) => label, + }; + if label == "auto" { + return Ok(None); + } + Ok(Some(Encoding::new(&label)?)) + } + + /// Return the file separator to use based on the CLI configuration. + fn file_separator(&self) -> Result>> { + // File separators are only used for the standard grep-line format. + if self.output_kind() != OutputKind::Standard { + return Ok(None); + } + + let (ctx_before, ctx_after) = self.contexts()?; + Ok(if self.heading() { + Some(b"".to_vec()) + } else if ctx_before > 0 || ctx_after > 0 { + Some(self.context_separator().clone()) + } else { + None + }) + } + + /// Returns true if and only if matches should be grouped with file name + /// headings. + fn heading(&self) -> bool { + if self.is_present("no-heading") || self.is_present("vimgrep") { + false + } else { + atty::is(atty::Stream::Stdout) + || self.is_present("heading") + || self.is_present("pretty") + } + } + + /// Returns true if and only if hidden files/directories should be + /// searched. + fn hidden(&self) -> bool { + self.is_present("hidden") || self.unrestricted_count() >= 2 + } + + /// Return all of the ignore file paths given on the command line. + fn ignore_paths(&self) -> Vec { + let paths = match self.values_of_os("ignore-file") { + None => return vec![], + Some(paths) => paths, + }; + paths.map(|p| Path::new(p).to_path_buf()).collect() + } + + /// Returns true if and only if ripgrep is invoked in a way where it knows + /// it search exactly one thing. + fn is_one_search(&self, paths: &[PathBuf]) -> bool { + if paths.len() != 1 { + return false; + } + self.is_only_stdin(paths) || paths[0].is_file() + } + + /// Returns true if and only if we're only searching a single thing and + /// that thing is stdin. + fn is_only_stdin(&self, paths: &[PathBuf]) -> bool { + paths == [Path::new("-")] + } + + /// Returns true if and only if we should show line numbers. + fn line_number(&self, paths: &[PathBuf]) -> bool { + if self.output_kind() == OutputKind::Summary { + return false; + } + if self.is_present("no-line-number") { + return false; + } + if self.output_kind() == OutputKind::JSON { + return true; + } + + // A few things can imply counting line numbers. In particular, we + // generally want to show line numbers by default when printing to a + // tty for human consumption, except for one interesting case: when + // we're only searching stdin. This makes pipelines work as expected. + (atty::is(atty::Stream::Stdout) && !self.is_only_stdin(paths)) + || self.is_present("line-number") + || self.is_present("column") + || self.is_present("pretty") + || self.is_present("vimgrep") + } + + /// The maximum number of columns allowed on each line. + /// + /// If `0` is provided, then this returns `None`. + fn max_columns(&self) -> Result> { + Ok(self.usize_of_nonzero("max-columns")?.map(|n| n as u64)) + } + + /// The maximum number of matches permitted. + fn max_count(&self) -> Result> { + Ok(self.usize_of("max-count")?.map(|n| n as u64)) } /// Parses the max-filesize argument option into a byte count. - fn max_filesize(&self) -> Result> { - self.parse_human_readable_size_arg("max-filesize") + fn max_file_size(&self) -> Result> { + self.parse_human_readable_size("max-filesize") + } + + /// Returns whether we should attempt to use memory maps or not. + fn mmap_choice(&self, paths: &[PathBuf]) -> MmapChoice { + // SAFETY: Memory maps are difficult to impossible to encapsulate + // safely in a portable way that doesn't simultaneously negate some of + // the benfits of using memory maps. For ripgrep's use, we never mutate + // a memory map and generally never store the contents of memory map + // in a data structure that depends on immutability. Generally + // speaking, the worst thing that can happen is a SIGBUS (if the + // underlying file is truncated while reading it), which will cause + // ripgrep to abort. This reasoning should be treated as suspect. + let maybe = unsafe { MmapChoice::auto() }; + let never = MmapChoice::never(); + if self.is_present("no-mmap") { + never + } else if self.is_present("mmap") { + maybe + } else if paths.len() <= 10 && paths.iter().all(|p| p.is_file()) { + // If we're only searching a few paths and all of them are + // files, then memory maps are probably faster. + maybe + } else { + never + } } /// Returns true if ignore files should be ignored. fn no_ignore(&self) -> bool { - self.is_present("no-ignore") - || self.occurrences_of("unrestricted") >= 1 + self.is_present("no-ignore") || self.unrestricted_count() >= 1 } /// Returns true if global ignore files should be ignored. @@ -1032,18 +996,356 @@ impl<'a> ArgMatches<'a> { self.is_present("no-ignore-vcs") || self.no_ignore() } - /// Returns true if and only if hidden files/directories should be - /// searched. - fn hidden(&self) -> bool { - self.is_present("hidden") || self.occurrences_of("unrestricted") >= 2 + /// Determine the type of output we should produce. + fn output_kind(&self) -> OutputKind { + if self.is_present("quiet") { + // While we don't technically print results (or aggregate results) + // in quiet mode, we still support the --stats flag, and those + // stats are computed by the Summary printer for now. + return OutputKind::Summary; + } else if self.is_present("json") { + return OutputKind::JSON; + } + + let (count, count_matches) = self.counts(); + let summary = + count + || count_matches + || self.is_present("files-with-matches") + || self.is_present("files-without-match"); + if summary { + OutputKind::Summary + } else { + OutputKind::Standard + } } - /// Returns true if and only if all files should be treated as if they - /// were text, even if ripgrep would detect it as a binary file. - fn text(&self) -> bool { - self.is_present("text") || self.occurrences_of("unrestricted") >= 3 + /// Builds the set of glob overrides from the command line flags. + fn overrides(&self) -> Result { + let mut builder = OverrideBuilder::new(env::current_dir()?); + for glob in self.values_of_lossy_vec("glob") { + builder.add(&glob)?; + } + // This only enables case insensitivity for subsequent globs. + builder.case_insensitive(true)?; + for glob in self.values_of_lossy_vec("iglob") { + builder.add(&glob)?; + } + Ok(builder.build()?) } + /// Return all file paths that ripgrep should search. + /// + /// If no paths were given, then this returns an empty list. + fn paths(&self) -> Vec { + let mut paths: Vec = match self.values_of_os("path") { + None => vec![], + Some(paths) => paths.map(|p| Path::new(p).to_path_buf()).collect(), + }; + // If --file, --files or --regexp is given, then the first path is + // always in `pattern`. + if self.is_present("file") + || self.is_present("files") + || self.is_present("regexp") + { + if let Some(path) = self.value_of_os("pattern") { + paths.insert(0, Path::new(path).to_path_buf()); + } + } + paths + } + + /// Return the default path that ripgrep should search. This should only + /// be used when ripgrep is not otherwise given at least one file path + /// as a positional argument. + fn path_default(&self) -> PathBuf { + let file_is_stdin = self.values_of_os("file") + .map_or(false, |mut files| files.any(|f| f == "-")); + let search_cwd = + atty::is(atty::Stream::Stdin) + || !stdin_is_readable() + || (self.is_present("file") && file_is_stdin) + || self.is_present("files") + || self.is_present("type-list"); + if search_cwd { + Path::new("./").to_path_buf() + } else { + Path::new("-").to_path_buf() + } + } + + /// Returns the unescaped path separator as a single byte, if one exists. + /// + /// If the provided path separator is more than a single byte, then an + /// error is returned. + fn path_separator(&self) -> Result> { + let sep = match self.value_of_lossy("path-separator") { + None => return Ok(None), + Some(sep) => unescape(&sep), + }; + if sep.is_empty() { + Ok(None) + } else if sep.len() > 1 { + Err(From::from(format!( + "A path separator must be exactly one byte, but \ + the given separator is {} bytes: {}\n\ + In some shells on Windows '/' is automatically \ + expanded. Use '//' instead.", + sep.len(), + escape(&sep), + ))) + } else { + Ok(Some(sep[0])) + } + } + + /// Returns the byte that should be used to terminate paths. + /// + /// Typically, this is only set to `\x00` when the --null flag is provided, + /// and `None` otherwise. + fn path_terminator(&self) -> Option { + if self.is_present("null") { + Some(b'\x00') + } else { + None + } + } + + /// Get a sequence of all available patterns from the command line. + /// This includes reading the -e/--regexp and -f/--file flags. + /// + /// Note that if -F/--fixed-strings is set, then all patterns will be + /// escaped. If -x/--line-regexp is set, then all patterns are surrounded + /// by `^...$`. Other things, such as --word-regexp, are handled by the + /// regex matcher itself. + /// + /// If any pattern is invalid UTF-8, then an error is returned. + fn patterns(&self) -> Result> { + if self.is_present("files") || self.is_present("type-list") { + return Ok(vec![]); + } + let mut pats = vec![]; + match self.values_of_os("regexp") { + None => { + if self.values_of_os("file").is_none() { + if let Some(os_pat) = self.value_of_os("pattern") { + pats.push(self.pattern_from_os_str(os_pat)?); + } + } + } + Some(os_pats) => { + for os_pat in os_pats { + pats.push(self.pattern_from_os_str(os_pat)?); + } + } + } + if let Some(files) = self.values_of_os("file") { + for file in files { + if file == "-" { + let stdin = io::stdin(); + for line in stdin.lock().lines() { + pats.push(self.pattern_from_str(&line?)); + } + } else { + let f = File::open(file)?; + for line in io::BufReader::new(f).lines() { + pats.push(self.pattern_from_str(&line?)); + } + } + } + } + Ok(pats) + } + + /// Returns a pattern that is guaranteed to produce an empty regular + /// expression that is valid in any position. + fn pattern_empty(&self) -> String { + // This would normally just be an empty string, which works on its + // own, but if the patterns are joined in a set of alternations, then + // you wind up with `foo|`, which is currently invalid in Rust's regex + // engine. + "(?:z{0})*".to_string() + } + + /// Converts an OsStr pattern to a String pattern. The pattern is escaped + /// if -F/--fixed-strings is set. + /// + /// If the pattern is not valid UTF-8, then an error is returned. + fn pattern_from_os_str(&self, pat: &OsStr) -> Result { + let s = pattern_to_str(pat)?; + Ok(self.pattern_from_str(s)) + } + + /// Converts a &str pattern to a String pattern. The pattern is escaped + /// if -F/--fixed-strings is set. + fn pattern_from_str(&self, pat: &str) -> String { + let litpat = self.pattern_literal(pat.to_string()); + let s = self.pattern_line(litpat); + + if s.is_empty() { + self.pattern_empty() + } else { + s + } + } + + /// Returns the given pattern as a line pattern if the -x/--line-regexp + /// flag is set. Otherwise, the pattern is returned unchanged. + fn pattern_line(&self, pat: String) -> String { + if self.is_present("line-regexp") { + format!(r"^(?:{})$", pat) + } else { + pat + } + } + + /// Returns the given pattern as a literal pattern if the + /// -F/--fixed-strings flag is set. Otherwise, the pattern is returned + /// unchanged. + fn pattern_literal(&self, pat: String) -> String { + if self.is_present("fixed-strings") { + regex::escape(&pat) + } else { + pat + } + } + + /// Returns the preprocessor command if one was specified. + fn preprocessor(&self) -> Option { + let path = match self.value_of_os("pre") { + None => return None, + Some(path) => path, + }; + if path.is_empty() { + return None; + } + Some(Path::new(path).to_path_buf()) + } + + /// Parse the regex-size-limit argument option into a byte count. + fn regex_size_limit(&self) -> Result> { + let r = self.parse_human_readable_size("regex-size-limit")?; + u64_to_usize("regex-size-limit", r) + } + + /// Returns the replacement string as UTF-8 bytes if it exists. + fn replacement(&self) -> Option> { + self.value_of_lossy("replace").map(|s| s.into_bytes()) + } + + /// Returns true if and only if aggregate statistics for a search should + /// be tracked. + /// + /// Generally, this is only enabled when explicitly requested by in the + /// command line arguments via the --stats flag, but this can also be + /// enabled implicity via the output format, e.g., for JSON Lines. + fn stats(&self) -> bool { + self.output_kind() == OutputKind::JSON || self.is_present("stats") + } + + /// Returns a handle to stdout for filtering search. + /// + /// A handle is returned if and only if ripgrep's stdout is being + /// redirected to a file. The handle returned corresponds to that file. + /// + /// This can be used to ensure that we do not attempt to search a file + /// that ripgrep is writing to. + fn stdout_handle(&self) -> Option { + let h = match Handle::stdout() { + Err(_) => return None, + Ok(h) => h, + }; + let md = match h.as_file().metadata() { + Err(_) => return None, + Ok(md) => md, + }; + if !md.is_file() { + return None; + } + Some(h) + } + + /// When the output format is `Summary`, this returns the type of summary + /// output to show. + /// + /// This returns `None` if the output format is not `Summary`. + fn summary_kind(&self) -> Option { + let (count, count_matches) = self.counts(); + if self.is_present("quiet") { + Some(SummaryKind::Quiet) + } else if count_matches { + Some(SummaryKind::CountMatches) + } else if count { + Some(SummaryKind::Count) + } else if self.is_present("files-with-matches") { + Some(SummaryKind::PathWithMatch) + } else if self.is_present("files-without-match") { + Some(SummaryKind::PathWithoutMatch) + } else { + None + } + } + + /// Return the number of threads that should be used for parallelism. + fn threads(&self) -> Result { + if self.is_present("sort-files") { + return Ok(1); + } + let threads = self.usize_of("threads")?.unwrap_or(0); + Ok(if threads == 0 { + cmp::min(12, num_cpus::get()) + } else { + threads + }) + } + + /// Builds a file type matcher from the command line flags. + fn types(&self) -> Result { + let mut builder = TypesBuilder::new(); + builder.add_defaults(); + for ty in self.values_of_lossy_vec("type-clear") { + builder.clear(&ty); + } + for def in self.values_of_lossy_vec("type-add") { + builder.add_def(&def)?; + } + for ty in self.values_of_lossy_vec("type") { + builder.select(&ty); + } + for ty in self.values_of_lossy_vec("type-not") { + builder.negate(&ty); + } + builder.build().map_err(From::from) + } + + /// Returns the number of times the `unrestricted` flag is provided. + fn unrestricted_count(&self) -> u64 { + self.occurrences_of("unrestricted") + } + + /// Returns true if and only if PCRE2's Unicode mode should be enabled. + fn pcre2_unicode(&self) -> bool { + // PCRE2 Unicode is enabled by default, so only disable it when told + // to do so explicitly. + self.is_present("pcre2") && !self.is_present("no-pcre2-unicode") + } + + /// Returns true if and only if file names containing each match should + /// be emitted. + fn with_filename(&self, paths: &[PathBuf]) -> bool { + if self.is_present("no-filename") { + false + } else { + self.is_present("with-filename") + || self.is_present("vimgrep") + || paths.len() > 1 + || paths.get(0).map_or(false, |p| p.is_dir()) + } + } +} + +/// Lower level generic helper methods for teasing values out of clap. +impl ArgMatches { /// Like values_of_lossy, but returns an empty vec if the flag is not /// present. fn values_of_lossy_vec(&self, name: &str) -> Vec { @@ -1056,16 +1358,15 @@ impl<'a> ArgMatches<'a> { /// If the number is zero, then it is considered absent and `None` is /// returned. fn usize_of_nonzero(&self, name: &str) -> Result> { - match self.value_of_lossy(name) { - None => Ok(None), - Some(v) => v.parse().map_err(From::from).map(|n| { - if n == 0 { - None - } else { - Some(n) - } - }), - } + let n = match self.usize_of(name)? { + None => return Ok(None), + Some(n) => n, + }; + Ok(if n == 0 { + None + } else { + Some(n) + }) } /// Safely reads an arg value with the given name, and if it's present, @@ -1077,11 +1378,56 @@ impl<'a> ArgMatches<'a> { } } - // The following methods mostly dispatch to the underlying clap methods - // directly. Methods that would otherwise get a single value will fetch - // all values and return the last one. (Clap returns the first one.) We - // only define the ones we need. + /// Parses an argument of the form `[0-9]+(KMG)?`. + /// + /// If the aforementioned format is not recognized, then this returns an + /// error. + fn parse_human_readable_size( + &self, + arg_name: &str, + ) -> Result> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^([0-9]+)([KMG])?$").unwrap(); + } + let arg_value = match self.value_of_lossy(arg_name) { + Some(x) => x, + None => return Ok(None) + }; + let caps = RE + .captures(&arg_value) + .ok_or_else(|| { + format!("invalid format for {}", arg_name) + })?; + + let value = caps[1].parse::()?; + let suffix = caps.get(2).map(|x| x.as_str()); + + let v_10 = value.checked_mul(1024); + let v_20 = v_10.and_then(|x| x.checked_mul(1024)); + let v_30 = v_20.and_then(|x| x.checked_mul(1024)); + let try_suffix = |x: Option| { + if x.is_some() { + Ok(x) + } else { + Err(From::from(format!("number too large for {}", arg_name))) + } + }; + match suffix { + None => Ok(Some(value)), + Some("K") => try_suffix(v_10), + Some("M") => try_suffix(v_20), + Some("G") => try_suffix(v_30), + _ => Err(From::from(format!("invalid suffix for {}", arg_name))) + } + } +} + +/// The following methods mostly dispatch to the underlying clap methods +/// directly. Methods that would otherwise get a single value will fetch all +/// values and return the last one. (Clap returns the first one.) We only +/// define the ones we need. +impl ArgMatches { fn is_present(&self, name: &str) -> bool { self.0.is_present(name) } @@ -1098,83 +1444,61 @@ impl<'a> ArgMatches<'a> { self.0.values_of_lossy(name) } - fn value_of_os(&'a self, name: &str) -> Option<&'a OsStr> { + fn value_of_os(&self, name: &str) -> Option<&OsStr> { self.0.value_of_os(name) } - fn values_of_os(&'a self, name: &str) -> Option> { + fn values_of_os(&self, name: &str) -> Option { self.0.values_of_os(name) } } +/// Convert an OsStr to a Unicode string. +/// +/// Patterns _must_ be valid UTF-8, so if the given OsStr isn't valid UTF-8, +/// this returns an error. fn pattern_to_str(s: &OsStr) -> Result<&str> { - match s.to_str() { - Some(s) => Ok(s), - None => Err(From::from(format!( + s.to_str().ok_or_else(|| { + From::from(format!( "Argument '{}' is not valid UTF-8. \ Use hex escape sequences to match arbitrary \ bytes in a pattern (e.g., \\xFF).", - s.to_string_lossy()))), + s.to_string_lossy() + )) + }) +} + +/// Inspect an error resulting from building a Rust regex matcher, and if it's +/// believed to correspond to a syntax error that PCRE2 could handle, then +/// add a message to suggest the use of -P/--pcre2. +#[cfg(feature = "pcre2")] +fn suggest_pcre2(msg: String) -> String { + if !msg.contains("backreferences") && !msg.contains("look-around") { + msg + } else { + format!("{} + +Consider enabling PCRE2 with the --pcre2 flag, which can handle backreferences +and look-around.", msg) } } -/// A simple thread safe abstraction for determining whether a search should -/// stop if the user has requested quiet mode. -#[derive(Clone, Debug)] -pub struct QuietMatched(Arc>); - -impl QuietMatched { - /// Create a new QuietMatched value. - /// - /// If quiet is true, then set_match and has_match will reflect whether - /// a search should quit or not because it found a match. - /// - /// If quiet is false, then set_match is always a no-op and has_match - /// always returns false. - fn new(quiet: bool) -> QuietMatched { - let atomic = if quiet { Some(AtomicBool::new(false)) } else { None }; - QuietMatched(Arc::new(atomic)) - } - - /// Returns true if and only if quiet mode is enabled and a match has - /// occurred. - pub fn has_match(&self) -> bool { - match *self.0 { - None => false, - Some(ref matched) => matched.load(Ordering::SeqCst), - } - } - - /// Sets whether a match has occurred or not. - /// - /// If quiet mode is disabled, then this is a no-op. - pub fn set_match(&self, yes: bool) -> bool { - match *self.0 { - None => false, - Some(_) if !yes => false, - Some(ref m) => { m.store(true, Ordering::SeqCst); true } - } - } -} - -/// Convert the result of a `parse_human_readable_size_arg` call into -/// a `usize`, failing if the type does not fit. -fn human_readable_to_usize( +/// Convert the result of parsing a human readable file size to a `usize`, +/// failing if the type does not fit. +fn u64_to_usize( arg_name: &str, value: Option, ) -> Result> { use std::usize; - match value { - None => Ok(None), - Some(v) => { - if v <= usize::MAX as u64 { - Ok(Some(v as usize)) - } else { - let msg = format!("number too large for {}", arg_name); - Err(From::from(msg)) - } - } + let value = match value { + None => return Ok(None), + Some(value) => value, + }; + if value <= usize::MAX as u64 { + Ok(Some(value as usize)) + } else { + Err(From::from(format!("number too large for {}", arg_name))) } } @@ -1182,7 +1506,6 @@ fn human_readable_to_usize( #[cfg(unix)] fn stdin_is_readable() -> bool { use std::os::unix::fs::FileTypeExt; - use same_file::Handle; let ft = match Handle::stdin().and_then(|h| h.as_file().metadata()) { Err(_) => return false, @@ -1194,48 +1517,17 @@ fn stdin_is_readable() -> bool { /// Returns true if and only if stdin is deemed searchable. #[cfg(windows)] fn stdin_is_readable() -> bool { - // On Windows, it's not clear what the possibilities are to me, so just - // always return true. - true -} + use std::os::windows::io::AsRawHandle; + use winapi::um::fileapi::GetFileType; + use winapi::um::winbase::{FILE_TYPE_DISK, FILE_TYPE_PIPE}; -/// Returns true if and only if this path points to a directory. -/// -/// This works around a bug in Rust's standard library: -/// https://github.com/rust-lang/rust/issues/46484 -#[cfg(windows)] -fn path_is_dir(path: &Path) -> bool { - fs::metadata(path).map(|md| metadata_is_dir(&md)).unwrap_or(false) -} - -/// Returns true if and only if this entry points to a directory. -#[cfg(not(windows))] -fn path_is_dir(path: &Path) -> bool { - path.is_dir() -} - -/// Returns true if and only if this path points to a file. -/// -/// This works around a bug in Rust's standard library: -/// https://github.com/rust-lang/rust/issues/46484 -#[cfg(windows)] -fn path_is_file(path: &Path) -> bool { - !path_is_dir(path) -} - -/// Returns true if and only if this entry points to a directory. -#[cfg(not(windows))] -fn path_is_file(path: &Path) -> bool { - path.is_file() -} - -/// Returns true if and only if the given metadata points to a directory. -/// -/// This works around a bug in Rust's standard library: -/// https://github.com/rust-lang/rust/issues/46484 -#[cfg(windows)] -fn metadata_is_dir(md: &fs::Metadata) -> bool { - use std::os::windows::fs::MetadataExt; - use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY; - md.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0 + let handle = match Handle::stdin() { + Err(_) => return false, + Ok(handle) => handle, + }; + let raw_handle = handle.as_raw_handle(); + // SAFETY: As far as I can tell, it's not possible to use GetFileType in + // a way that violates safety. We give it a handle and we get an integer. + let ft = unsafe { GetFileType(raw_handle) }; + ft == FILE_TYPE_DISK || ft == FILE_TYPE_PIPE } diff --git a/src/config.rs b/src/config.rs index c47e6a50..eade0cca 100644 --- a/src/config.rs +++ b/src/config.rs @@ -12,10 +12,7 @@ use std::path::{Path, PathBuf}; use Result; /// Return a sequence of arguments derived from ripgrep rc configuration files. -/// -/// If no_messages is false and there was a problem reading a config file, -/// then errors are printed to stderr. -pub fn args(no_messages: bool) -> Vec { +pub fn args() -> Vec { let config_path = match env::var_os("RIPGREP_CONFIG_PATH") { None => return vec![], Some(config_path) => { @@ -28,20 +25,20 @@ pub fn args(no_messages: bool) -> Vec { let (args, errs) = match parse(&config_path) { Ok((args, errs)) => (args, errs), Err(err) => { - if !no_messages { - eprintln!("{}", err); - } + message!("{}", err); return vec![]; } }; - if !no_messages && !errs.is_empty() { + if !errs.is_empty() { for err in errs { - eprintln!("{}:{}", config_path.display(), err); + message!("{}:{}", config_path.display(), err); } } debug!( "{}: arguments loaded from config file: {:?}", - config_path.display(), args); + config_path.display(), + args + ); args } @@ -59,7 +56,7 @@ fn parse>( let path = path.as_ref(); match File::open(&path) { Ok(file) => parse_reader(file), - Err(err) => errored!("{}: {}", path.display(), err), + Err(err) => Err(From::from(format!("{}: {}", path.display(), err))), } } diff --git a/src/logger.rs b/src/logger.rs index 8bd7e09c..f12f0b19 100644 --- a/src/logger.rs +++ b/src/logger.rs @@ -34,19 +34,30 @@ impl Log for Logger { match (record.file(), record.line()) { (Some(file), Some(line)) => { eprintln!( - "{}/{}/{}:{}: {}", - record.level(), record.target(), - file, line, record.args()); + "{}|{}|{}:{}: {}", + record.level(), + record.target(), + file, + line, + record.args() + ); } (Some(file), None) => { eprintln!( - "{}/{}/{}: {}", - record.level(), record.target(), file, record.args()); + "{}|{}|{}: {}", + record.level(), + record.target(), + file, + record.args() + ); } _ => { eprintln!( - "{}/{}: {}", - record.level(), record.target(), record.args()); + "{}|{}: {}", + record.level(), + record.target(), + record.args() + ); } } } diff --git a/src/main.rs b/src/main.rs index af22373e..33bc84cd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,43 +1,34 @@ extern crate atty; -extern crate bytecount; #[macro_use] extern crate clap; -extern crate encoding_rs; -extern crate encoding_rs_io; extern crate globset; extern crate grep; extern crate ignore; #[macro_use] extern crate lazy_static; -extern crate libc; #[macro_use] extern crate log; -extern crate memchr; -extern crate memmap; extern crate num_cpus; extern crate regex; extern crate same_file; +#[macro_use] +extern crate serde_json; extern crate termcolor; #[cfg(windows)] extern crate winapi; -use std::error::Error; +use std::io; use std::process; -use std::result; -use std::sync::Arc; -use std::sync::atomic::{AtomicUsize, Ordering}; -use std::sync::mpsc; -use std::thread; -use std::time::{Duration, Instant}; +use std::sync::{Arc, Mutex}; +use std::time::Instant; + +use ignore::WalkState; use args::Args; -use worker::Work; +use subject::Subject; -macro_rules! errored { - ($($tt:tt)*) => { - return Err(From::from(format!($($tt)*))); - } -} +#[macro_use] +mod messages; mod app; mod args; @@ -45,20 +36,17 @@ mod config; mod decompressor; mod preprocessor; mod logger; -mod pathutil; -mod printer; -mod search_buffer; -mod search_stream; +mod path_printer; +mod search; +mod subject; mod unescape; -mod worker; -pub type Result = result::Result>; +pub type Result = ::std::result::Result>; -fn main() { - reset_sigpipe(); - match Args::parse().map(Arc::new).and_then(run) { - Ok(0) => process::exit(1), - Ok(_) => process::exit(0), +pub fn main() { + match Args::parse().and_then(run) { + Ok(true) => process::exit(0), + Ok(false) => process::exit(1), Err(err) => { eprintln!("{}", err); process::exit(2); @@ -66,382 +54,242 @@ fn main() { } } -fn run(args: Arc) -> Result { - if args.never_match() { - return Ok(0); - } - let threads = args.threads(); - if args.files() { - if threads == 1 || args.is_one_path() { - run_files_one_thread(&args) - } else { - run_files_parallel(args) - } - } else if args.type_list() { - run_types(&args) - } else if threads == 1 || args.is_one_path() { - run_one_thread(&args) - } else { - run_parallel(&args) +fn run(args: Args) -> Result { + use args::Command::*; + + match args.command()? { + Search => search(args), + SearchParallel => search_parallel(args), + SearchNever => Ok(false), + Files => files(args), + FilesParallel => files_parallel(args), + Types => types(args), } } -fn run_parallel(args: &Arc) -> Result { - let start_time = Instant::now(); - let bufwtr = Arc::new(args.buffer_writer()); - let quiet_matched = args.quiet_matched(); - let paths_searched = Arc::new(AtomicUsize::new(0)); - let match_line_count = Arc::new(AtomicUsize::new(0)); - let paths_matched = Arc::new(AtomicUsize::new(0)); +/// The top-level entry point for single-threaded search. This recursively +/// steps through the file list (current directory by default) and searches +/// each file sequentially. +fn search(args: Args) -> Result { + let started_at = Instant::now(); + let quit_after_match = args.quit_after_match()?; + let subject_builder = args.subject_builder(); + let mut stats = args.stats()?; + let mut searcher = args.search_worker(args.stdout())?; + let mut matched = false; - args.walker_parallel().run(|| { - let args = Arc::clone(args); - let quiet_matched = quiet_matched.clone(); - let paths_searched = paths_searched.clone(); - let match_line_count = match_line_count.clone(); - let paths_matched = paths_matched.clone(); + for result in args.walker()? { + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, + None => continue, + }; + let search_result = match searcher.search(&subject) { + Ok(search_result) => search_result, + Err(err) => { + // A broken pipe means graceful termination. + if err.kind() == io::ErrorKind::BrokenPipe { + break; + } + message!("{}: {}", subject.path().display(), err); + continue; + } + }; + matched = matched || search_result.has_match(); + if let Some(ref mut stats) = stats { + *stats += search_result.stats().unwrap(); + } + if matched && quit_after_match { + break; + } + } + if let Some(ref stats) = stats { + let elapsed = Instant::now().duration_since(started_at); + // We don't care if we couldn't print this successfully. + let _ = searcher.print_stats(elapsed, stats); + } + Ok(matched) +} + +/// The top-level entry point for multi-threaded search. The parallelism is +/// itself achieved by the recursive directory traversal. All we need to do is +/// feed it a worker for performing a search on each file. +fn search_parallel(args: Args) -> Result { + use std::sync::atomic::AtomicBool; + use std::sync::atomic::Ordering::SeqCst; + + let quit_after_match = args.quit_after_match()?; + let started_at = Instant::now(); + let subject_builder = Arc::new(args.subject_builder()); + let bufwtr = Arc::new(args.buffer_writer()?); + let stats = Arc::new(args.stats()?.map(Mutex::new)); + let matched = Arc::new(AtomicBool::new(false)); + let mut searcher_err = None; + args.walker_parallel()?.run(|| { + let args = args.clone(); let bufwtr = Arc::clone(&bufwtr); - let mut buf = bufwtr.buffer(); - let mut worker = args.worker(); - Box::new(move |result| { - use ignore::WalkState::*; + let stats = Arc::clone(&stats); + let matched = Arc::clone(&matched); + let subject_builder = Arc::clone(&subject_builder); + let mut searcher = match args.search_worker(bufwtr.buffer()) { + Ok(searcher) => searcher, + Err(err) => { + searcher_err = Some(err); + return Box::new(move |_| { + WalkState::Quit + }); + } + }; - if quiet_matched.has_match() { - return Quit; - } - let dent = match get_or_log_dir_entry( - result, - args.stdout_handle(), - args.files(), - args.no_messages(), - args.no_ignore_messages(), - ) { - None => return Continue, - Some(dent) => dent, + Box::new(move |result| { + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, + None => return WalkState::Continue, }; - paths_searched.fetch_add(1, Ordering::SeqCst); - buf.clear(); - { - // This block actually executes the search and prints the - // results into outbuf. - let mut printer = args.printer(&mut buf); - let count = - if dent.is_stdin() { - worker.run(&mut printer, Work::Stdin) - } else { - worker.run(&mut printer, Work::DirEntry(dent)) - }; - match_line_count.fetch_add(count as usize, Ordering::SeqCst); - if quiet_matched.set_match(count > 0) { - return Quit; - } - if args.stats() && count > 0 { - paths_matched.fetch_add(1, Ordering::SeqCst); + searcher.printer().get_mut().clear(); + let search_result = match searcher.search(&subject) { + Ok(search_result) => search_result, + Err(err) => { + message!("{}: {}", subject.path().display(), err); + return WalkState::Continue; } + }; + if search_result.has_match() { + matched.store(true, SeqCst); + } + if let Some(ref locked_stats) = *stats { + let mut stats = locked_stats.lock().unwrap(); + *stats += search_result.stats().unwrap(); + } + if let Err(err) = bufwtr.print(searcher.printer().get_mut()) { + // A broken pipe means graceful termination. + if err.kind() == io::ErrorKind::BrokenPipe { + return WalkState::Quit; + } + // Otherwise, we continue on our merry way. + message!("{}: {}", subject.path().display(), err); + } + if matched.load(SeqCst) && quit_after_match { + WalkState::Quit + } else { + WalkState::Continue } - // BUG(burntsushi): We should handle this error instead of ignoring - // it. See: https://github.com/BurntSushi/ripgrep/issues/200 - let _ = bufwtr.print(&buf); - Continue }) }); - if !args.paths().is_empty() && paths_searched.load(Ordering::SeqCst) == 0 { - if !args.no_messages() { - eprint_nothing_searched(); - } + if let Some(err) = searcher_err.take() { + return Err(err); } - let match_line_count = match_line_count.load(Ordering::SeqCst) as u64; - let paths_searched = paths_searched.load(Ordering::SeqCst) as u64; - let paths_matched = paths_matched.load(Ordering::SeqCst) as u64; - if args.stats() { - print_stats( - match_line_count, - paths_searched, - paths_matched, - start_time.elapsed(), - ); + if let Some(ref locked_stats) = *stats { + let elapsed = Instant::now().duration_since(started_at); + let stats = locked_stats.lock().unwrap(); + let mut searcher = args.search_worker(args.stdout())?; + // We don't care if we couldn't print this successfully. + let _ = searcher.print_stats(elapsed, &stats); } - Ok(match_line_count) + Ok(matched.load(SeqCst)) } -fn run_one_thread(args: &Arc) -> Result { - let start_time = Instant::now(); - let mut stdout = args.stdout(); - let mut worker = args.worker(); - let mut paths_searched: u64 = 0; - let mut match_line_count = 0; - let mut paths_matched: u64 = 0; - for result in args.walker() { - let dent = match get_or_log_dir_entry( - result, - args.stdout_handle(), - args.files(), - args.no_messages(), - args.no_ignore_messages(), - ) { +/// The top-level entry point for listing files without searching them. This +/// recursively steps through the file list (current directory by default) and +/// prints each path sequentially using a single thread. +fn files(args: Args) -> Result { + let quit_after_match = args.quit_after_match()?; + let subject_builder = args.subject_builder(); + let mut matched = false; + let mut path_printer = args.path_printer(args.stdout())?; + for result in args.walker()? { + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, None => continue, - Some(dent) => dent, }; - let mut printer = args.printer(&mut stdout); - if match_line_count > 0 { - if args.quiet() { + matched = true; + if quit_after_match { + break; + } + if let Err(err) = path_printer.write_path(subject.path()) { + // A broken pipe means graceful termination. + if err.kind() == io::ErrorKind::BrokenPipe { break; } - if let Some(sep) = args.file_separator() { - printer = printer.file_separator(sep); - } - } - paths_searched += 1; - let count = - if dent.is_stdin() { - worker.run(&mut printer, Work::Stdin) - } else { - worker.run(&mut printer, Work::DirEntry(dent)) - }; - match_line_count += count; - if args.stats() && count > 0 { - paths_matched += 1; + // Otherwise, we have some other error that's preventing us from + // writing to stdout, so we should bubble it up. + return Err(err.into()); } } - if !args.paths().is_empty() && paths_searched == 0 { - if !args.no_messages() { - eprint_nothing_searched(); - } - } - if args.stats() { - print_stats( - match_line_count, - paths_searched, - paths_matched, - start_time.elapsed(), - ); - } - Ok(match_line_count) + Ok(matched) } -fn run_files_parallel(args: Arc) -> Result { - let print_args = Arc::clone(&args); - let (tx, rx) = mpsc::channel::(); - let print_thread = thread::spawn(move || { - let mut printer = print_args.printer(print_args.stdout()); - let mut file_count = 0; - for dent in rx.iter() { - if !print_args.quiet() { - printer.path(dent.path()); - } - file_count += 1; +/// The top-level entry point for listing files without searching them. This +/// recursively steps through the file list (current directory by default) and +/// prints each path sequentially using multiple threads. +fn files_parallel(args: Args) -> Result { + use std::sync::atomic::AtomicBool; + use std::sync::atomic::Ordering::SeqCst; + use std::sync::mpsc; + use std::thread; + + let quit_after_match = args.quit_after_match()?; + let subject_builder = Arc::new(args.subject_builder()); + let mut path_printer = args.path_printer(args.stdout())?; + let matched = Arc::new(AtomicBool::new(false)); + let (tx, rx) = mpsc::channel::(); + + let print_thread = thread::spawn(move || -> io::Result<()> { + for subject in rx.iter() { + path_printer.write_path(subject.path())?; } - file_count + Ok(()) }); - args.walker_parallel().run(move || { - let args = Arc::clone(&args); + args.walker_parallel()?.run(|| { + let subject_builder = Arc::clone(&subject_builder); + let matched = Arc::clone(&matched); let tx = tx.clone(); + Box::new(move |result| { - if let Some(dent) = get_or_log_dir_entry( - result, - args.stdout_handle(), - args.files(), - args.no_messages(), - args.no_ignore_messages(), - ) { - tx.send(dent).unwrap(); - if args.quiet() { - return ignore::WalkState::Quit + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, + None => return WalkState::Continue, + }; + matched.store(true, SeqCst); + if quit_after_match { + WalkState::Quit + } else { + match tx.send(subject) { + Ok(_) => WalkState::Continue, + Err(_) => WalkState::Quit, } } - ignore::WalkState::Continue }) }); - Ok(print_thread.join().unwrap()) -} - -fn run_files_one_thread(args: &Arc) -> Result { - let mut printer = args.printer(args.stdout()); - let mut file_count = 0; - for result in args.walker() { - let dent = match get_or_log_dir_entry( - result, - args.stdout_handle(), - args.files(), - args.no_messages(), - args.no_ignore_messages(), - ) { - None => continue, - Some(dent) => dent, - }; - file_count += 1; - if args.quiet() { - break; - } else { - printer.path(dent.path()); + drop(tx); + if let Err(err) = print_thread.join().unwrap() { + // A broken pipe means graceful termination, so fall through. + // Otherwise, something bad happened while writing to stdout, so bubble + // it up. + if err.kind() != io::ErrorKind::BrokenPipe { + return Err(err.into()); } } - Ok(file_count) + Ok(matched.load(SeqCst)) } -fn run_types(args: &Arc) -> Result { - let mut printer = args.printer(args.stdout()); - let mut ty_count = 0; - for def in args.type_defs() { - printer.type_def(def); - ty_count += 1; - } - Ok(ty_count) -} +/// The top-level entry point for --type-list. +fn types(args: Args) -> Result { + let mut count = 0; + let mut stdout = args.stdout(); + for def in args.type_defs()? { + count += 1; + stdout.write_all(def.name().as_bytes())?; + stdout.write_all(b": ")?; -fn get_or_log_dir_entry( - result: result::Result, - stdout_handle: Option<&same_file::Handle>, - files_only: bool, - no_messages: bool, - no_ignore_messages: bool, -) -> Option { - match result { - Err(err) => { - if !no_messages { - eprintln!("{}", err); + let mut first = true; + for glob in def.globs() { + if !first { + stdout.write_all(b", ")?; } - None - } - Ok(dent) => { - if let Some(err) = dent.error() { - if !no_messages && !no_ignore_messages { - eprintln!("{}", err); - } - } - if dent.file_type().is_none() { - return Some(dent); // entry is stdin - } - // A depth of 0 means the user gave the path explicitly, so we - // should always try to search it. - if dent.depth() == 0 && !ignore_entry_is_dir(&dent) { - return Some(dent); - } else if !ignore_entry_is_file(&dent) { - return None; - } - // If we are redirecting stdout to a file, then don't search that - // file. - if !files_only && is_stdout_file(&dent, stdout_handle, no_messages) { - return None; - } - Some(dent) + stdout.write_all(glob.as_bytes())?; + first = false; } + stdout.write_all(b"\n")?; } -} - -/// Returns true if and only if the given `ignore::DirEntry` points to a -/// directory. -/// -/// This works around a bug in Rust's standard library: -/// https://github.com/rust-lang/rust/issues/46484 -#[cfg(windows)] -fn ignore_entry_is_dir(dent: &ignore::DirEntry) -> bool { - use std::os::windows::fs::MetadataExt; - use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY; - - dent.metadata().map(|md| { - md.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0 - }).unwrap_or(false) -} - -/// Returns true if and only if the given `ignore::DirEntry` points to a -/// directory. -#[cfg(not(windows))] -fn ignore_entry_is_dir(dent: &ignore::DirEntry) -> bool { - dent.file_type().map_or(false, |ft| ft.is_dir()) -} - -/// Returns true if and only if the given `ignore::DirEntry` points to a -/// file. -/// -/// This works around a bug in Rust's standard library: -/// https://github.com/rust-lang/rust/issues/46484 -#[cfg(windows)] -fn ignore_entry_is_file(dent: &ignore::DirEntry) -> bool { - !ignore_entry_is_dir(dent) -} - -/// Returns true if and only if the given `ignore::DirEntry` points to a -/// file. -#[cfg(not(windows))] -fn ignore_entry_is_file(dent: &ignore::DirEntry) -> bool { - dent.file_type().map_or(false, |ft| ft.is_file()) -} - -fn is_stdout_file( - dent: &ignore::DirEntry, - stdout_handle: Option<&same_file::Handle>, - no_messages: bool, -) -> bool { - let stdout_handle = match stdout_handle { - None => return false, - Some(stdout_handle) => stdout_handle, - }; - // If we know for sure that these two things aren't equal, then avoid - // the costly extra stat call to determine equality. - if !maybe_dent_eq_handle(dent, stdout_handle) { - return false; - } - match same_file::Handle::from_path(dent.path()) { - Ok(h) => stdout_handle == &h, - Err(err) => { - if !no_messages { - eprintln!("{}: {}", dent.path().display(), err); - } - false - } - } -} - -#[cfg(unix)] -fn maybe_dent_eq_handle( - dent: &ignore::DirEntry, - handle: &same_file::Handle, -) -> bool { - dent.ino() == Some(handle.ino()) -} - -#[cfg(not(unix))] -fn maybe_dent_eq_handle(_: &ignore::DirEntry, _: &same_file::Handle) -> bool { - true -} - -fn eprint_nothing_searched() { - eprintln!("No files were searched, which means ripgrep probably \ - applied a filter you didn't expect. \ - Try running again with --debug."); -} - -fn print_stats( - match_count: u64, - paths_searched: u64, - paths_matched: u64, - time_elapsed: Duration, -) { - let time_elapsed = - time_elapsed.as_secs() as f64 - + (time_elapsed.subsec_nanos() as f64 * 1e-9); - println!("\n{} matched lines\n\ - {} files contained matches\n\ - {} files searched\n\ - {:.3} seconds", match_count, paths_matched, - paths_searched, time_elapsed); -} - -// The Rust standard library suppresses the default SIGPIPE behavior, so that -// writing to a closed pipe doesn't kill the process. The goal is to instead -// handle errors through the normal result mechanism. Ripgrep needs some -// refactoring before it will be able to do that, however, so we re-enable the -// standard SIGPIPE behavior as a workaround. See -// https://github.com/BurntSushi/ripgrep/issues/200. -#[cfg(unix)] -fn reset_sigpipe() { - unsafe { - libc::signal(libc::SIGPIPE, libc::SIG_DFL); - } -} - -#[cfg(not(unix))] -fn reset_sigpipe() { - // no-op + Ok(count > 0) } diff --git a/src/messages.rs b/src/messages.rs new file mode 100644 index 00000000..2016ff64 --- /dev/null +++ b/src/messages.rs @@ -0,0 +1,50 @@ +use std::sync::atomic::{ATOMIC_BOOL_INIT, AtomicBool, Ordering}; + +static MESSAGES: AtomicBool = ATOMIC_BOOL_INIT; +static IGNORE_MESSAGES: AtomicBool = ATOMIC_BOOL_INIT; + +#[macro_export] +macro_rules! message { + ($($tt:tt)*) => { + if ::messages::messages() { + eprintln!($($tt)*); + } + } +} + +#[macro_export] +macro_rules! ignore_message { + ($($tt:tt)*) => { + if ::messages::messages() && ::messages::ignore_messages() { + eprintln!($($tt)*); + } + } +} + +/// Returns true if and only if messages should be shown. +pub fn messages() -> bool { + MESSAGES.load(Ordering::SeqCst) +} + +/// Set whether messages should be shown or not. +/// +/// By default, they are not shown. +pub fn set_messages(yes: bool) { + MESSAGES.store(yes, Ordering::SeqCst) +} + +/// Returns true if and only if "ignore" related messages should be shown. +pub fn ignore_messages() -> bool { + IGNORE_MESSAGES.load(Ordering::SeqCst) +} + +/// Set whether "ignore" related messages should be shown or not. +/// +/// By default, they are not shown. +/// +/// Note that this is overridden if `messages` is disabled. Namely, if +/// `messages` is disabled, then "ignore" messages are never shown, regardless +/// of this setting. +pub fn set_ignore_messages(yes: bool) { + IGNORE_MESSAGES.store(yes, Ordering::SeqCst) +} diff --git a/src/path_printer.rs b/src/path_printer.rs new file mode 100644 index 00000000..324a27c4 --- /dev/null +++ b/src/path_printer.rs @@ -0,0 +1,101 @@ +use std::io; +use std::path::Path; + +use grep::printer::{ColorSpecs, PrinterPath}; +use termcolor::WriteColor; + +/// A configuration for describing how paths should be written. +#[derive(Clone, Debug)] +struct Config { + colors: ColorSpecs, + separator: Option, + terminator: u8, +} + +impl Default for Config { + fn default() -> Config { + Config { + colors: ColorSpecs::default(), + separator: None, + terminator: b'\n', + } + } +} + +/// A builder for constructing things to search over. +#[derive(Clone, Debug)] +pub struct PathPrinterBuilder { + config: Config, +} + +impl PathPrinterBuilder { + /// Return a new subject builder with a default configuration. + pub fn new() -> PathPrinterBuilder { + PathPrinterBuilder { config: Config::default() } + } + + /// Create a new path printer with the current configuration that writes + /// paths to the given writer. + pub fn build(&self, wtr: W) -> PathPrinter { + PathPrinter { + config: self.config.clone(), + wtr: wtr, + } + } + + /// Set the color specification for this printer. + /// + /// Currently, only the `path` component of the given specification is + /// used. + pub fn color_specs( + &mut self, + specs: ColorSpecs, + ) -> &mut PathPrinterBuilder { + self.config.colors = specs; + self + } + + /// A path separator. + /// + /// When provided, the path's default separator will be replaced with + /// the given separator. + /// + /// This is not set by default, and the system's default path separator + /// will be used. + pub fn separator(&mut self, sep: Option) -> &mut PathPrinterBuilder { + self.config.separator = sep; + self + } + + /// A path terminator. + /// + /// When printing a path, it will be by terminated by the given byte. + /// + /// This is set to `\n` by default. + pub fn terminator(&mut self, terminator: u8) -> &mut PathPrinterBuilder { + self.config.terminator = terminator; + self + } +} + +/// A printer for emitting paths to a writer, with optional color support. +#[derive(Debug)] +pub struct PathPrinter { + config: Config, + wtr: W, +} + +impl PathPrinter { + /// Write the given path to the underlying writer. + pub fn write_path(&mut self, path: &Path) -> io::Result<()> { + let ppath = PrinterPath::with_separator(path, self.config.separator); + if !self.wtr.supports_color() { + self.wtr.write_all(ppath.as_bytes())?; + } else { + self.wtr.set_color(self.config.colors.path())?; + self.wtr.write_all(ppath.as_bytes())?; + self.wtr.reset()?; + } + self.wtr.write_all(&[self.config.terminator]) + } +} diff --git a/src/pathutil.rs b/src/pathutil.rs deleted file mode 100644 index 8d1c1510..00000000 --- a/src/pathutil.rs +++ /dev/null @@ -1,42 +0,0 @@ -/*! -The pathutil module provides platform specific operations on paths that are -typically faster than the same operations as provided in `std::path`. In -particular, we really want to avoid the costly operation of parsing the path -into its constituent components. We give up on Windows, but on Unix, we deal -with the raw bytes directly. - -On large repositories (like chromium), this can have a ~25% performance -improvement on just listing the files to search (!). -*/ -use std::path::Path; - -/// Strip `prefix` from the `path` and return the remainder. -/// -/// If `path` doesn't have a prefix `prefix`, then return `None`. -#[cfg(unix)] -pub fn strip_prefix<'a, P: AsRef + ?Sized>( - prefix: &'a P, - path: &'a Path, -) -> Option<&'a Path> { - use std::ffi::OsStr; - use std::os::unix::ffi::OsStrExt; - - let prefix = prefix.as_ref().as_os_str().as_bytes(); - let path = path.as_os_str().as_bytes(); - if prefix.len() > path.len() || prefix != &path[0..prefix.len()] { - None - } else { - Some(Path::new(OsStr::from_bytes(&path[prefix.len()..]))) - } -} - -/// Strip `prefix` from the `path` and return the remainder. -/// -/// If `path` doesn't have a prefix `prefix`, then return `None`. -#[cfg(not(unix))] -pub fn strip_prefix<'a, P: AsRef + ?Sized>( - prefix: &'a P, - path: &'a Path, -) -> Option<&'a Path> { - path.strip_prefix(prefix).ok() -} diff --git a/src/preprocessor.rs b/src/preprocessor.rs index bb464f86..07f66e2d 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -3,8 +3,6 @@ use std::io::{self, Read}; use std::path::{Path, PathBuf}; use std::process::{self, Stdio}; -use Result; - /// PreprocessorReader provides an `io::Read` impl to read kids output. #[derive(Debug)] pub struct PreprocessorReader { @@ -26,7 +24,7 @@ impl PreprocessorReader { pub fn from_cmd_path( cmd: PathBuf, path: &Path, - ) -> Result { + ) -> io::Result { let child = process::Command::new(&cmd) .arg(path) .stdin(Stdio::from(File::open(path)?)) @@ -34,10 +32,13 @@ impl PreprocessorReader { .stderr(Stdio::piped()) .spawn() .map_err(|err| { - format!( - "error running preprocessor command '{}': {}", - cmd.display(), - err, + io::Error::new( + io::ErrorKind::Other, + format!( + "error running preprocessor command '{}': {}", + cmd.display(), + err, + ), ) })?; Ok(PreprocessorReader { diff --git a/src/printer.rs b/src/printer.rs deleted file mode 100644 index 20fd1c4d..00000000 --- a/src/printer.rs +++ /dev/null @@ -1,928 +0,0 @@ -use std::error; -use std::fmt; -use std::path::Path; -use std::str::FromStr; - -use regex::bytes::{Captures, Match, Regex, Replacer}; -use termcolor::{Color, ColorSpec, ParseColorError, WriteColor}; - -use pathutil::strip_prefix; -use ignore::types::FileTypeDef; - -/// Track the start and end of replacements to allow coloring them on output. -#[derive(Debug)] -struct Offset { - start: usize, - end: usize, -} - -impl Offset { - fn new(start: usize, end: usize) -> Offset { - Offset { start: start, end: end } - } -} - -impl<'m, 'r> From<&'m Match<'r>> for Offset { - fn from(m: &'m Match<'r>) -> Self { - Offset{ start: m.start(), end: m.end() } - } -} - -/// `CountingReplacer` implements the Replacer interface for Regex, -/// and counts how often replacement is being performed. -struct CountingReplacer<'r> { - replace: &'r [u8], - count: &'r mut usize, - offsets: &'r mut Vec, -} - -impl<'r> CountingReplacer<'r> { - fn new( - replace: &'r [u8], - count: &'r mut usize, - offsets: &'r mut Vec, - ) -> CountingReplacer<'r> { - CountingReplacer { replace: replace, count: count, offsets: offsets, } - } -} - -impl<'r> Replacer for CountingReplacer<'r> { - fn replace_append(&mut self, caps: &Captures, dst: &mut Vec) { - *self.count += 1; - let start = dst.len(); - caps.expand(self.replace, dst); - let end = dst.len(); - if start != end { - self.offsets.push(Offset::new(start, end)); - } - } -} - -/// Printer encapsulates all output logic for searching. -/// -/// Note that we currently ignore all write errors. It's probably worthwhile -/// to fix this, but printers are only ever used for writes to stdout or -/// writes to memory, neither of which commonly fail. -pub struct Printer { - /// The underlying writer. - wtr: W, - /// Whether anything has been printed to wtr yet. - has_printed: bool, - /// Whether to show column numbers for the first match or not. - column: bool, - /// The string to use to separate non-contiguous runs of context lines. - context_separator: Vec, - /// The end-of-line terminator used by the printer. In general, eols are - /// printed via the match directly, but occasionally we need to insert them - /// ourselves (for example, to print a context separator). - eol: u8, - /// A file separator to show before any matches are printed. - file_separator: Option>, - /// Whether to show file name as a heading or not. - /// - /// N.B. If with_filename is false, then this setting has no effect. - heading: bool, - /// Whether to show every match on its own line. - line_per_match: bool, - /// Whether to print NUL bytes after a file path instead of new lines - /// or `:`. - null: bool, - /// Print only the matched (non-empty) parts of a matching line - only_matching: bool, - /// A string to use as a replacement of each match in a matching line. - replace: Option>, - /// Whether to prefix each match with the corresponding file name. - with_filename: bool, - /// The color specifications. - colors: ColorSpecs, - /// The separator to use for file paths. If empty, this is ignored. - path_separator: Option, - /// Restrict lines to this many columns. - max_columns: Option, -} - -impl Printer { - /// Create a new printer that writes to wtr with the given color settings. - pub fn new(wtr: W) -> Printer { - Printer { - wtr: wtr, - has_printed: false, - column: false, - context_separator: "--".to_string().into_bytes(), - eol: b'\n', - file_separator: None, - heading: false, - line_per_match: false, - null: false, - only_matching: false, - replace: None, - with_filename: false, - colors: ColorSpecs::default(), - path_separator: None, - max_columns: None, - } - } - - /// Set the color specifications. - pub fn colors(mut self, colors: ColorSpecs) -> Printer { - self.colors = colors; - self - } - - /// When set, column numbers will be printed for the first match on each - /// line. - pub fn column(mut self, yes: bool) -> Printer { - self.column = yes; - self - } - - /// Set the context separator. The default is `--`. - pub fn context_separator(mut self, sep: Vec) -> Printer { - self.context_separator = sep; - self - } - - /// Set the end-of-line terminator. The default is `\n`. - pub fn eol(mut self, eol: u8) -> Printer { - self.eol = eol; - self - } - - /// If set, the separator is printed before any matches. By default, no - /// separator is printed. - pub fn file_separator(mut self, sep: Vec) -> Printer { - self.file_separator = Some(sep); - self - } - - /// Whether to show file name as a heading or not. - /// - /// N.B. If with_filename is false, then this setting has no effect. - pub fn heading(mut self, yes: bool) -> Printer { - self.heading = yes; - self - } - - /// Whether to show every match on its own line. - pub fn line_per_match(mut self, yes: bool) -> Printer { - self.line_per_match = yes; - self - } - - /// Whether to cause NUL bytes to follow file paths instead of other - /// visual separators (like `:`, `-` and `\n`). - pub fn null(mut self, yes: bool) -> Printer { - self.null = yes; - self - } - - /// Print only the matched (non-empty) parts of a matching line - pub fn only_matching(mut self, yes: bool) -> Printer { - self.only_matching = yes; - self - } - - /// A separator to use when printing file paths. When empty, use the - /// default separator for the current platform. (/ on Unix, \ on Windows.) - pub fn path_separator(mut self, sep: Option) -> Printer { - self.path_separator = sep; - self - } - - /// Replace every match in each matching line with the replacement string - /// given. - pub fn replace(mut self, replacement: Vec) -> Printer { - self.replace = Some(replacement); - self - } - - /// When set, each match is prefixed with the file name that it came from. - pub fn with_filename(mut self, yes: bool) -> Printer { - self.with_filename = yes; - self - } - - /// Configure the max. number of columns used for printing matching lines. - pub fn max_columns(mut self, max_columns: Option) -> Printer { - self.max_columns = max_columns; - self - } - - /// Returns true if and only if something has been printed. - pub fn has_printed(&self) -> bool { - self.has_printed - } - - /// Flushes the underlying writer and returns it. - #[allow(dead_code)] - pub fn into_inner(mut self) -> W { - let _ = self.wtr.flush(); - self.wtr - } - - /// Prints a type definition. - pub fn type_def(&mut self, def: &FileTypeDef) { - self.write(def.name().as_bytes()); - self.write(b": "); - let mut first = true; - for glob in def.globs() { - if !first { - self.write(b", "); - } - self.write(glob.as_bytes()); - first = false; - } - self.write_eol(); - } - - /// Prints the given path. - pub fn path>(&mut self, path: P) { - let path = strip_prefix("./", path.as_ref()).unwrap_or(path.as_ref()); - self.write_path(path); - self.write_path_eol(); - } - - /// Prints the given path and a count of the number of matches found. - pub fn path_count>(&mut self, path: P, count: u64) { - if self.with_filename { - self.write_path(path); - self.write_path_sep(b':'); - } - self.write(count.to_string().as_bytes()); - self.write_eol(); - } - - /// Prints the context separator. - pub fn context_separate(&mut self) { - if self.context_separator.is_empty() { - return; - } - let _ = self.wtr.write_all(&self.context_separator); - self.write_eol(); - } - - pub fn matched>( - &mut self, - re: &Regex, - path: P, - buf: &[u8], - start: usize, - end: usize, - line_number: Option, - byte_offset: Option - ) { - if !self.line_per_match && !self.only_matching { - let mat = - if !self.needs_match() { - (0, 0) - } else { - re.find(&buf[start..end]) - .map(|m| (m.start(), m.end())) - .unwrap_or((0, 0)) - }; - return self.write_match( - re, path, buf, start, end, line_number, - byte_offset, mat.0, mat.1); - } - for m in re.find_iter(&buf[start..end]) { - self.write_match( - re, path.as_ref(), buf, start, end, line_number, - byte_offset, m.start(), m.end()); - } - } - - fn needs_match(&self) -> bool { - self.column - || self.replace.is_some() - || self.only_matching - } - - fn write_match>( - &mut self, - re: &Regex, - path: P, - buf: &[u8], - start: usize, - end: usize, - line_number: Option, - byte_offset: Option, - match_start: usize, - match_end: usize, - ) { - if self.heading && self.with_filename && !self.has_printed { - self.write_file_sep(); - self.write_path(path); - self.write_path_eol(); - } else if !self.heading && self.with_filename { - self.write_path(path); - self.write_path_sep(b':'); - } - if let Some(line_number) = line_number { - self.line_number(line_number, b':'); - } - if self.column { - self.column_number(match_start as u64 + 1, b':'); - } - if let Some(byte_offset) = byte_offset { - if self.only_matching { - self.write_byte_offset( - byte_offset + ((start + match_start) as u64), b':'); - } else { - self.write_byte_offset(byte_offset + (start as u64), b':'); - } - } - if self.replace.is_some() { - let mut count = 0; - let mut offsets = Vec::new(); - let line = { - let replacer = CountingReplacer::new( - self.replace.as_ref().unwrap(), &mut count, &mut offsets); - if self.only_matching { - re.replace_all( - &buf[start + match_start..start + match_end], replacer) - } else { - re.replace_all(&buf[start..end], replacer) - } - }; - if self.max_columns.map_or(false, |m| line.len() > m) { - let msg = format!( - "[Omitted long line with {} replacements]", count); - self.write_colored(msg.as_bytes(), |colors| colors.matched()); - self.write_eol(); - return; - } - self.write_matched_line(offsets, &*line, false); - } else { - let buf = if self.only_matching { - &buf[start + match_start..start + match_end] - } else { - &buf[start..end] - }; - if self.max_columns.map_or(false, |m| buf.len() > m) { - let count = re.find_iter(buf).count(); - let msg = format!("[Omitted long line with {} matches]", count); - self.write_colored(msg.as_bytes(), |colors| colors.matched()); - self.write_eol(); - return; - } - let only_match = self.only_matching; - self.write_matched_line( - re.find_iter(buf).map(|x| Offset::from(&x)), buf, only_match); - } - } - - fn write_matched_line(&mut self, offsets: I, buf: &[u8], only_match: bool) - where I: IntoIterator, - { - if !self.wtr.supports_color() || self.colors.matched().is_none() { - self.write(buf); - } else if only_match { - self.write_colored(buf, |colors| colors.matched()); - } else { - let mut last_written = 0; - for o in offsets { - self.write(&buf[last_written..o.start]); - // This conditional checks if the match is both empty *and* - // past the end of the line. In this case, we never want to - // emit an additional color escape. - if o.start != o.end || o.end != buf.len() { - self.write_colored( - &buf[o.start..o.end], |colors| colors.matched()); - } - last_written = o.end; - } - self.write(&buf[last_written..]); - } - if buf.last() != Some(&self.eol) { - self.write_eol(); - } - } - - pub fn context>( - &mut self, - path: P, - buf: &[u8], - start: usize, - end: usize, - line_number: Option, - byte_offset: Option, - ) { - if self.heading && self.with_filename && !self.has_printed { - self.write_file_sep(); - self.write_path(path); - self.write_path_eol(); - } else if !self.heading && self.with_filename { - self.write_path(path); - self.write_path_sep(b'-'); - } - if let Some(line_number) = line_number { - self.line_number(line_number, b'-'); - } - if let Some(byte_offset) = byte_offset { - self.write_byte_offset(byte_offset + (start as u64), b'-'); - } - if self.max_columns.map_or(false, |m| end - start > m) { - self.write(b"[Omitted long context line]"); - self.write_eol(); - return; - } - self.write(&buf[start..end]); - if buf[start..end].last() != Some(&self.eol) { - self.write_eol(); - } - } - - fn separator(&mut self, sep: &[u8]) { - self.write(sep); - } - - fn write_path_sep(&mut self, sep: u8) { - if self.null { - self.write(b"\x00"); - } else { - self.separator(&[sep]); - } - } - - fn write_path_eol(&mut self) { - if self.null { - self.write(b"\x00"); - } else { - self.write_eol(); - } - } - - #[cfg(unix)] - fn write_path>(&mut self, path: P) { - use std::os::unix::ffi::OsStrExt; - let path = path.as_ref().as_os_str().as_bytes(); - self.write_path_replace_separator(path); - } - - #[cfg(not(unix))] - fn write_path>(&mut self, path: P) { - let path = path.as_ref().to_string_lossy(); - self.write_path_replace_separator(path.as_bytes()); - } - - fn write_path_replace_separator(&mut self, path: &[u8]) { - match self.path_separator { - None => self.write_colored(path, |colors| colors.path()), - Some(sep) => { - let transformed_path: Vec<_> = path.iter().map(|&b| { - if b == b'/' || (cfg!(windows) && b == b'\\') { - sep - } else { - b - } - }).collect(); - self.write_colored(&transformed_path, |colors| colors.path()); - } - } - } - - fn line_number(&mut self, n: u64, sep: u8) { - let line_number = n.to_string(); - self.write_colored(line_number.as_bytes(), |colors| colors.line()); - self.separator(&[sep]); - } - - fn column_number(&mut self, n: u64, sep: u8) { - self.write_colored(n.to_string().as_bytes(), |colors| colors.column()); - self.separator(&[sep]); - } - - fn write_byte_offset(&mut self, o: u64, sep: u8) { - self.write_colored(o.to_string().as_bytes(), |colors| colors.column()); - self.separator(&[sep]); - } - - fn write(&mut self, buf: &[u8]) { - self.has_printed = true; - let _ = self.wtr.write_all(buf); - } - - fn write_eol(&mut self) { - let eol = self.eol; - self.write(&[eol]); - } - - fn write_colored(&mut self, buf: &[u8], get_color: F) - where F: Fn(&ColorSpecs) -> &ColorSpec - { - let _ = self.wtr.set_color(get_color(&self.colors)); - self.write(buf); - let _ = self.wtr.reset(); - } - - fn write_file_sep(&mut self) { - if let Some(ref sep) = self.file_separator { - self.has_printed = true; - let _ = self.wtr.write_all(sep); - let _ = self.wtr.write_all(b"\n"); - } - } -} - -/// An error that can occur when parsing color specifications. -#[derive(Clone, Debug, Eq, PartialEq)] -pub enum Error { - /// This occurs when an unrecognized output type is used. - UnrecognizedOutType(String), - /// This occurs when an unrecognized spec type is used. - UnrecognizedSpecType(String), - /// This occurs when an unrecognized color name is used. - UnrecognizedColor(String, String), - /// This occurs when an unrecognized style attribute is used. - UnrecognizedStyle(String), - /// This occurs when the format of a color specification is invalid. - InvalidFormat(String), -} - -impl error::Error for Error { - fn description(&self) -> &str { - match *self { - Error::UnrecognizedOutType(_) => "unrecognized output type", - Error::UnrecognizedSpecType(_) => "unrecognized spec type", - Error::UnrecognizedColor(_, _) => "unrecognized color name", - Error::UnrecognizedStyle(_) => "unrecognized style attribute", - Error::InvalidFormat(_) => "invalid color spec", - } - } - - fn cause(&self) -> Option<&error::Error> { - None - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::UnrecognizedOutType(ref name) => { - write!(f, "Unrecognized output type '{}'. Choose from: \ - path, line, column, match.", name) - } - Error::UnrecognizedSpecType(ref name) => { - write!(f, "Unrecognized spec type '{}'. Choose from: \ - fg, bg, style, none.", name) - } - Error::UnrecognizedColor(_, ref msg) => { - write!(f, "{}", msg) - } - Error::UnrecognizedStyle(ref name) => { - write!(f, "Unrecognized style attribute '{}'. Choose from: \ - nobold, bold, nointense, intense, nounderline, \ - underline.", name) - } - Error::InvalidFormat(ref original) => { - write!( - f, - "Invalid color spec format: '{}'. Valid format \ - is '(path|line|column|match):(fg|bg|style):(value)'.", - original) - } - } - } -} - -impl From for Error { - fn from(err: ParseColorError) -> Error { - Error::UnrecognizedColor(err.invalid().to_string(), err.to_string()) - } -} - -/// A merged set of color specifications. -#[derive(Clone, Debug, Default, Eq, PartialEq)] -pub struct ColorSpecs { - path: ColorSpec, - line: ColorSpec, - column: ColorSpec, - matched: ColorSpec, -} - -/// A single color specification provided by the user. -/// -/// A `ColorSpecs` can be built by merging a sequence of `Spec`s. -/// -/// ## Example -/// -/// The only way to build a `Spec` is to parse it from a string. Once multiple -/// `Spec`s have been constructed, then can be merged into a single -/// `ColorSpecs` value. -/// -/// ```rust -/// use termcolor::{Color, ColorSpecs, Spec}; -/// -/// let spec1: Spec = "path:fg:blue".parse().unwrap(); -/// let spec2: Spec = "match:bg:green".parse().unwrap(); -/// let specs = ColorSpecs::new(&[spec1, spec2]); -/// -/// assert_eq!(specs.path().fg(), Some(Color::Blue)); -/// assert_eq!(specs.matched().bg(), Some(Color::Green)); -/// ``` -/// -/// ## Format -/// -/// The format of a `Spec` is a triple: `{type}:{attribute}:{value}`. Each -/// component is defined as follows: -/// -/// * `{type}` can be one of `path`, `line`, `column` or `match`. -/// * `{attribute}` can be one of `fg`, `bg` or `style`. `{attribute}` may also -/// be the special value `none`, in which case, `{value}` can be omitted. -/// * `{value}` is either a color name (for `fg`/`bg`) or a style instruction. -/// -/// `{type}` controls which part of the output should be styled and is -/// application dependent. -/// -/// When `{attribute}` is `none`, then this should cause any existing color -/// settings to be cleared. -/// -/// `{value}` should be a color when `{attribute}` is `fg` or `bg`, or it -/// should be a style instruction when `{attribute}` is `style`. When -/// `{attribute}` is `none`, `{value}` must be omitted. -/// -/// Valid colors are `black`, `blue`, `green`, `red`, `cyan`, `magenta`, -/// `yellow`, `white`. -/// -/// Valid style instructions are `nobold`, `bold`, `intense`, `nointense`, -/// `underline`, `nounderline`. -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct Spec { - ty: OutType, - value: SpecValue, -} - -/// The actual value given by the specification. -#[derive(Clone, Debug, Eq, PartialEq)] -enum SpecValue { - None, - Fg(Color), - Bg(Color), - Style(Style), -} - -/// The set of configurable portions of ripgrep's output. -#[derive(Clone, Debug, Eq, PartialEq)] -enum OutType { - Path, - Line, - Column, - Match, -} - -/// The specification type. -#[derive(Clone, Debug, Eq, PartialEq)] -enum SpecType { - Fg, - Bg, - Style, - None, -} - -/// The set of available styles for use in the terminal. -#[derive(Clone, Debug, Eq, PartialEq)] -enum Style { - Bold, - NoBold, - Intense, - NoIntense, - Underline, - NoUnderline -} - -impl ColorSpecs { - /// Create color specifications from a list of user supplied - /// specifications. - pub fn new(user_specs: &[Spec]) -> ColorSpecs { - let mut specs = ColorSpecs::default(); - for user_spec in user_specs { - match user_spec.ty { - OutType::Path => user_spec.merge_into(&mut specs.path), - OutType::Line => user_spec.merge_into(&mut specs.line), - OutType::Column => user_spec.merge_into(&mut specs.column), - OutType::Match => user_spec.merge_into(&mut specs.matched), - } - } - specs - } - - /// Return the color specification for coloring file paths. - fn path(&self) -> &ColorSpec { - &self.path - } - - /// Return the color specification for coloring line numbers. - fn line(&self) -> &ColorSpec { - &self.line - } - - /// Return the color specification for coloring column numbers. - fn column(&self) -> &ColorSpec { - &self.column - } - - /// Return the color specification for coloring matched text. - fn matched(&self) -> &ColorSpec { - &self.matched - } -} - -impl Spec { - /// Merge this spec into the given color specification. - fn merge_into(&self, cspec: &mut ColorSpec) { - self.value.merge_into(cspec); - } -} - -impl SpecValue { - /// Merge this spec value into the given color specification. - fn merge_into(&self, cspec: &mut ColorSpec) { - match *self { - SpecValue::None => cspec.clear(), - SpecValue::Fg(ref color) => { cspec.set_fg(Some(color.clone())); } - SpecValue::Bg(ref color) => { cspec.set_bg(Some(color.clone())); } - SpecValue::Style(ref style) => { - match *style { - Style::Bold => { cspec.set_bold(true); } - Style::NoBold => { cspec.set_bold(false); } - Style::Intense => { cspec.set_intense(true); } - Style::NoIntense => { cspec.set_intense(false); } - Style::Underline => { cspec.set_underline(true); } - Style::NoUnderline => { cspec.set_underline(false); } - } - } - } - } -} - -impl FromStr for Spec { - type Err = Error; - - fn from_str(s: &str) -> Result { - let pieces: Vec<&str> = s.split(':').collect(); - if pieces.len() <= 1 || pieces.len() > 3 { - return Err(Error::InvalidFormat(s.to_string())); - } - let otype: OutType = pieces[0].parse()?; - match pieces[1].parse()? { - SpecType::None => Ok(Spec { ty: otype, value: SpecValue::None }), - SpecType::Style => { - if pieces.len() < 3 { - return Err(Error::InvalidFormat(s.to_string())); - } - let style: Style = pieces[2].parse()?; - Ok(Spec { ty: otype, value: SpecValue::Style(style) }) - } - SpecType::Fg => { - if pieces.len() < 3 { - return Err(Error::InvalidFormat(s.to_string())); - } - let color: Color = pieces[2].parse()?; - Ok(Spec { ty: otype, value: SpecValue::Fg(color) }) - } - SpecType::Bg => { - if pieces.len() < 3 { - return Err(Error::InvalidFormat(s.to_string())); - } - let color: Color = pieces[2].parse()?; - Ok(Spec { ty: otype, value: SpecValue::Bg(color) }) - } - } - } -} - -impl FromStr for OutType { - type Err = Error; - - fn from_str(s: &str) -> Result { - match &*s.to_lowercase() { - "path" => Ok(OutType::Path), - "line" => Ok(OutType::Line), - "column" => Ok(OutType::Column), - "match" => Ok(OutType::Match), - _ => Err(Error::UnrecognizedOutType(s.to_string())), - } - } -} - -impl FromStr for SpecType { - type Err = Error; - - fn from_str(s: &str) -> Result { - match &*s.to_lowercase() { - "fg" => Ok(SpecType::Fg), - "bg" => Ok(SpecType::Bg), - "style" => Ok(SpecType::Style), - "none" => Ok(SpecType::None), - _ => Err(Error::UnrecognizedSpecType(s.to_string())), - } - } -} - -impl FromStr for Style { - type Err = Error; - - fn from_str(s: &str) -> Result { - match &*s.to_lowercase() { - "bold" => Ok(Style::Bold), - "nobold" => Ok(Style::NoBold), - "intense" => Ok(Style::Intense), - "nointense" => Ok(Style::NoIntense), - "underline" => Ok(Style::Underline), - "nounderline" => Ok(Style::NoUnderline), - _ => Err(Error::UnrecognizedStyle(s.to_string())), - } - } -} - -#[cfg(test)] -mod tests { - use termcolor::{Color, ColorSpec}; - use super::{ColorSpecs, Error, OutType, Spec, SpecValue, Style}; - - #[test] - fn merge() { - let user_specs: &[Spec] = &[ - "match:fg:blue".parse().unwrap(), - "match:none".parse().unwrap(), - "match:style:bold".parse().unwrap(), - ]; - let mut expect_matched = ColorSpec::new(); - expect_matched.set_bold(true); - assert_eq!(ColorSpecs::new(user_specs), ColorSpecs { - path: ColorSpec::default(), - line: ColorSpec::default(), - column: ColorSpec::default(), - matched: expect_matched, - }); - } - - #[test] - fn specs() { - let spec: Spec = "path:fg:blue".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Path, - value: SpecValue::Fg(Color::Blue), - }); - - let spec: Spec = "path:bg:red".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Path, - value: SpecValue::Bg(Color::Red), - }); - - let spec: Spec = "match:style:bold".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Match, - value: SpecValue::Style(Style::Bold), - }); - - let spec: Spec = "match:style:intense".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Match, - value: SpecValue::Style(Style::Intense), - }); - - let spec: Spec = "match:style:underline".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Match, - value: SpecValue::Style(Style::Underline), - }); - - let spec: Spec = "line:none".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Line, - value: SpecValue::None, - }); - - let spec: Spec = "column:bg:green".parse().unwrap(); - assert_eq!(spec, Spec { - ty: OutType::Column, - value: SpecValue::Bg(Color::Green), - }); - } - - #[test] - fn spec_errors() { - let err = "line:nonee".parse::().unwrap_err(); - assert_eq!(err, Error::UnrecognizedSpecType("nonee".to_string())); - - let err = "".parse::().unwrap_err(); - assert_eq!(err, Error::InvalidFormat("".to_string())); - - let err = "foo".parse::().unwrap_err(); - assert_eq!(err, Error::InvalidFormat("foo".to_string())); - - let err = "line:style:italic".parse::().unwrap_err(); - assert_eq!(err, Error::UnrecognizedStyle("italic".to_string())); - - let err = "line:fg:brown".parse::().unwrap_err(); - match err { - Error::UnrecognizedColor(name, _) => assert_eq!(name, "brown"), - err => assert!(false, "unexpected error: {:?}", err), - } - - let err = "foo:fg:brown".parse::().unwrap_err(); - assert_eq!(err, Error::UnrecognizedOutType("foo".to_string())); - } -} diff --git a/src/search.rs b/src/search.rs new file mode 100644 index 00000000..45f7cf87 --- /dev/null +++ b/src/search.rs @@ -0,0 +1,408 @@ +use std::io; +use std::path::{Path, PathBuf}; +use std::time::Duration; + +use grep::matcher::Matcher; +#[cfg(feature = "pcre2")] +use grep::pcre2::{RegexMatcher as PCRE2RegexMatcher}; +use grep::printer::{JSON, Standard, Summary, Stats}; +use grep::regex::{RegexMatcher as RustRegexMatcher}; +use grep::searcher::Searcher; +use serde_json as json; +use termcolor::WriteColor; + +use decompressor::{DecompressionReader, is_compressed}; +use preprocessor::PreprocessorReader; +use subject::Subject; + +/// The configuration for the search worker. Among a few other things, the +/// configuration primarily controls the way we show search results to users +/// at a very high level. +#[derive(Clone, Debug)] +struct Config { + json_stats: bool, + preprocessor: Option, + search_zip: bool, +} + +impl Default for Config { + fn default() -> Config { + Config { + json_stats: false, + preprocessor: None, + search_zip: false, + } + } +} + +/// A builder for configuring and constructing a search worker. +#[derive(Clone, Debug)] +pub struct SearchWorkerBuilder { + config: Config, +} + +impl Default for SearchWorkerBuilder { + fn default() -> SearchWorkerBuilder { + SearchWorkerBuilder::new() + } +} + +impl SearchWorkerBuilder { + /// Create a new builder for configuring and constructing a search worker. + pub fn new() -> SearchWorkerBuilder { + SearchWorkerBuilder { config: Config::default() } + } + + /// Create a new search worker using the given searcher, matcher and + /// printer. + pub fn build( + &self, + matcher: PatternMatcher, + searcher: Searcher, + printer: Printer, + ) -> SearchWorker { + let config = self.config.clone(); + SearchWorker { config, matcher, searcher, printer } + } + + /// Forcefully use JSON to emit statistics, even if the underlying printer + /// is not the JSON printer. + /// + /// This is useful for implementing flag combinations like + /// `--json --quiet`, which uses the summary printer for implementing + /// `--quiet` but still wants to emit summary statistics, which should + /// be JSON formatted because of the `--json` flag. + pub fn json_stats(&mut self, yes: bool) -> &mut SearchWorkerBuilder { + self.config.json_stats = yes; + self + } + + /// Set the path to a preprocessor command. + /// + /// When this is set, instead of searching files directly, the given + /// command will be run with the file path as the first argument, and the + /// output of that command will be searched instead. + pub fn preprocessor( + &mut self, + cmd: Option, + ) -> &mut SearchWorkerBuilder { + self.config.preprocessor = cmd; + self + } + + /// Enable the decompression and searching of common compressed files. + /// + /// When enabled, if a particular file path is recognized as a compressed + /// file, then it is decompressed before searching. + /// + /// Note that if a preprocessor command is set, then it overrides this + /// setting. + pub fn search_zip(&mut self, yes: bool) -> &mut SearchWorkerBuilder { + self.config.search_zip = yes; + self + } +} + +/// The result of executing a search. +/// +/// Generally speaking, the "result" of a search is sent to a printer, which +/// writes results to an underlying writer such as stdout or a file. However, +/// every search also has some aggregate statistics or meta data that may be +/// useful to higher level routines. +#[derive(Clone, Debug, Default)] +pub struct SearchResult { + has_match: bool, + stats: Option, +} + +impl SearchResult { + /// Whether the search found a match or not. + pub fn has_match(&self) -> bool { + self.has_match + } + + /// Return aggregate search statistics for a single search, if available. + /// + /// It can be expensive to compute statistics, so these are only present + /// if explicitly enabled in the printer provided by the caller. + pub fn stats(&self) -> Option<&Stats> { + self.stats.as_ref() + } +} + +/// The pattern matcher used by a search worker. +#[derive(Clone, Debug)] +pub enum PatternMatcher { + RustRegex(RustRegexMatcher), + #[cfg(feature = "pcre2")] + PCRE2(PCRE2RegexMatcher), +} + +/// The printer used by a search worker. +/// +/// The `W` type parameter refers to the type of the underlying writer. +#[derive(Debug)] +pub enum Printer { + /// Use the standard printer, which supports the classic grep-like format. + Standard(Standard), + /// Use the summary printer, which supports aggregate displays of search + /// results. + Summary(Summary), + /// A JSON printer, which emits results in the JSON Lines format. + JSON(JSON), +} + +impl Printer { + fn print_stats( + &mut self, + total_duration: Duration, + stats: &Stats, + ) -> io::Result<()> { + match *self { + Printer::JSON(_) => { + self.print_stats_json(total_duration, stats) + } + Printer::Standard(_) | Printer::Summary(_) => { + self.print_stats_human(total_duration, stats) + } + } + } + + fn print_stats_human( + &mut self, + total_duration: Duration, + stats: &Stats, + ) -> io::Result<()> { + write!( + self.get_mut(), + " +{matches} matches +{lines} matched lines +{searches_with_match} files contained matches +{searches} files searched +{bytes_printed} bytes printed +{bytes_searched} bytes searched +{search_time:0.6} seconds spent searching +{process_time:0.6} seconds +", + matches = stats.matches(), + lines = stats.matched_lines(), + searches_with_match = stats.searches_with_match(), + searches = stats.searches(), + bytes_printed = stats.bytes_printed(), + bytes_searched = stats.bytes_searched(), + search_time = fractional_seconds(stats.elapsed()), + process_time = fractional_seconds(total_duration) + ) + } + + fn print_stats_json( + &mut self, + total_duration: Duration, + stats: &Stats, + ) -> io::Result<()> { + // We specifically match the format laid out by the JSON printer in + // the grep-printer crate. We simply "extend" it with the 'summary' + // message type. + let fractional = fractional_seconds(total_duration); + json::to_writer(self.get_mut(), &json!({ + "type": "summary", + "data": { + "stats": stats, + "elapsed_total": { + "secs": total_duration.as_secs(), + "nanos": total_duration.subsec_nanos(), + "human": format!("{:0.6}s", fractional), + }, + } + }))?; + write!(self.get_mut(), "\n") + } + + /// Return a mutable reference to the underlying printer's writer. + pub fn get_mut(&mut self) -> &mut W { + match *self { + Printer::Standard(ref mut p) => p.get_mut(), + Printer::Summary(ref mut p) => p.get_mut(), + Printer::JSON(ref mut p) => p.get_mut(), + } + } +} + +/// A worker for executing searches. +/// +/// It is intended for a single worker to execute many searches, and is +/// generally intended to be used from a single thread. When searching using +/// multiple threads, it is better to create a new worker for each thread. +#[derive(Debug)] +pub struct SearchWorker { + config: Config, + matcher: PatternMatcher, + searcher: Searcher, + printer: Printer, +} + +impl SearchWorker { + /// Execute a search over the given subject. + pub fn search(&mut self, subject: &Subject) -> io::Result { + self.search_impl(subject) + } + + /// Return a mutable reference to the underlying printer. + pub fn printer(&mut self) -> &mut Printer { + &mut self.printer + } + + /// Print the given statistics to the underlying writer in a way that is + /// consistent with this searcher's printer's format. + /// + /// While `Stats` contains a duration itself, this only corresponds to the + /// time spent searching, where as `total_duration` should roughly + /// approximate the lifespan of the ripgrep process itself. + pub fn print_stats( + &mut self, + total_duration: Duration, + stats: &Stats, + ) -> io::Result<()> { + if self.config.json_stats { + self.printer().print_stats_json(total_duration, stats) + } else { + self.printer().print_stats(total_duration, stats) + } + } + + /// Search the given subject using the appropriate strategy. + fn search_impl(&mut self, subject: &Subject) -> io::Result { + let path = subject.path(); + if subject.is_stdin() { + let stdin = io::stdin(); + // A `return` here appeases the borrow checker. NLL will fix this. + return self.search_reader(path, stdin.lock()); + } else if self.config.preprocessor.is_some() { + let cmd = self.config.preprocessor.clone().unwrap(); + let rdr = PreprocessorReader::from_cmd_path(cmd, path)?; + self.search_reader(path, rdr) + } else if self.config.search_zip && is_compressed(path) { + match DecompressionReader::from_path(path) { + None => Ok(SearchResult::default()), + Some(rdr) => self.search_reader(path, rdr), + } + } else { + self.search_path(path) + } + } + + /// Search the contents of the given file path. + fn search_path(&mut self, path: &Path) -> io::Result { + use self::PatternMatcher::*; + + let (searcher, printer) = (&mut self.searcher, &mut self.printer); + match self.matcher { + RustRegex(ref m) => search_path(m, searcher, printer, path), + #[cfg(feature = "pcre2")] + PCRE2(ref m) => search_path(m, searcher, printer, path), + } + } + + /// Executes a search on the given reader, which may or may not correspond + /// directly to the contents of the given file path. Instead, the reader + /// may actually cause something else to be searched (for example, when + /// a preprocessor is set or when decompression is enabled). In those + /// cases, the file path is used for visual purposes only. + /// + /// Generally speaking, this method should only be used when there is no + /// other choice. Searching via `search_path` provides more opportunities + /// for optimizations (such as memory maps). + fn search_reader( + &mut self, + path: &Path, + rdr: R, + ) -> io::Result { + use self::PatternMatcher::*; + + let (searcher, printer) = (&mut self.searcher, &mut self.printer); + match self.matcher { + RustRegex(ref m) => search_reader(m, searcher, printer, path, rdr), + #[cfg(feature = "pcre2")] + PCRE2(ref m) => search_reader(m, searcher, printer, path, rdr), + } + } +} + +/// Search the contents of the given file path using the given matcher, +/// searcher and printer. +fn search_path( + matcher: M, + searcher: &mut Searcher, + printer: &mut Printer, + path: &Path, +) -> io::Result { + match *printer { + Printer::Standard(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::Summary(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::JSON(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: Some(sink.stats().clone()), + }) + } + } +} + +/// Search the contents of the given reader using the given matcher, searcher +/// and printer. +fn search_reader( + matcher: M, + searcher: &mut Searcher, + printer: &mut Printer, + path: &Path, + rdr: R, +) -> io::Result { + match *printer { + Printer::Standard(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::Summary(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: sink.stats().map(|s| s.clone()), + }) + } + Printer::JSON(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + stats: Some(sink.stats().clone()), + }) + } + } +} + +/// Return the given duration as fractional seconds. +fn fractional_seconds(duration: Duration) -> f64 { + (duration.as_secs() as f64) + (duration.subsec_nanos() as f64 * 1e-9) +} diff --git a/src/search_buffer.rs b/src/search_buffer.rs deleted file mode 100644 index 2777a06c..00000000 --- a/src/search_buffer.rs +++ /dev/null @@ -1,424 +0,0 @@ -/*! -The `search_buffer` module is responsible for searching a single file all in a -single buffer. Typically, the source of the buffer is a memory map. This can -be useful for when memory maps are faster than streaming search. - -Note that this module doesn't quite support everything that `search_stream` -does. Notably, showing contexts. -*/ -use std::cmp; -use std::path::Path; - -use grep::Grep; -use termcolor::WriteColor; - -use printer::Printer; -use search_stream::{IterLines, Options, count_lines, is_binary}; - -pub struct BufferSearcher<'a, W: 'a> { - opts: Options, - printer: &'a mut Printer, - grep: &'a Grep, - path: &'a Path, - buf: &'a [u8], - match_line_count: u64, - match_count: Option, - line_count: Option, - byte_offset: Option, - last_line: usize, -} - -impl<'a, W: WriteColor> BufferSearcher<'a, W> { - pub fn new( - printer: &'a mut Printer, - grep: &'a Grep, - path: &'a Path, - buf: &'a [u8], - ) -> BufferSearcher<'a, W> { - BufferSearcher { - opts: Options::default(), - printer: printer, - grep: grep, - path: path, - buf: buf, - match_line_count: 0, - match_count: None, - line_count: None, - byte_offset: None, - last_line: 0, - } - } - - /// If enabled, searching will print a 0-based offset of the - /// matching line (or the actual match if -o is specified) before - /// printing the line itself. - /// - /// Disabled by default. - pub fn byte_offset(mut self, yes: bool) -> Self { - self.opts.byte_offset = yes; - self - } - - /// If enabled, searching will print a count instead of each match. - /// - /// Disabled by default. - pub fn count(mut self, yes: bool) -> Self { - self.opts.count = yes; - self - } - - /// If enabled, searching will print the count of individual matches - /// instead of each match. - /// - /// Disabled by default. - pub fn count_matches(mut self, yes: bool) -> Self { - self.opts.count_matches = yes; - self - } - - /// If enabled, searching will print the path instead of each match. - /// - /// Disabled by default. - pub fn files_with_matches(mut self, yes: bool) -> Self { - self.opts.files_with_matches = yes; - self - } - - /// If enabled, searching will print the path of files that *don't* match - /// the given pattern. - /// - /// Disabled by default. - pub fn files_without_matches(mut self, yes: bool) -> Self { - self.opts.files_without_matches = yes; - self - } - - /// Set the end-of-line byte used by this searcher. - pub fn eol(mut self, eol: u8) -> Self { - self.opts.eol = eol; - self - } - - /// If enabled, matching is inverted so that lines that *don't* match the - /// given pattern are treated as matches. - pub fn invert_match(mut self, yes: bool) -> Self { - self.opts.invert_match = yes; - self - } - - /// If enabled, compute line numbers and prefix each line of output with - /// them. - pub fn line_number(mut self, yes: bool) -> Self { - self.opts.line_number = yes; - self - } - - /// Limit the number of matches to the given count. - /// - /// The default is None, which corresponds to no limit. - pub fn max_count(mut self, count: Option) -> Self { - self.opts.max_count = count; - self - } - - /// If enabled, don't show any output and quit searching after the first - /// match is found. - pub fn quiet(mut self, yes: bool) -> Self { - self.opts.quiet = yes; - self - } - - /// If enabled, search binary files as if they were text. - pub fn text(mut self, yes: bool) -> Self { - self.opts.text = yes; - self - } - - #[inline(never)] - pub fn run(mut self) -> u64 { - let binary_upto = cmp::min(10_240, self.buf.len()); - if !self.opts.text && is_binary(&self.buf[..binary_upto], true) { - return 0; - } - - self.match_line_count = 0; - self.line_count = if self.opts.line_number { Some(0) } else { None }; - // The memory map searcher uses one contiguous block of bytes, so the - // offsets given the printer are sufficient to compute the byte offset. - self.byte_offset = if self.opts.byte_offset { Some(0) } else { None }; - self.match_count = if self.opts.count_matches { Some(0) } else { None }; - let mut last_end = 0; - for m in self.grep.iter(self.buf) { - if self.opts.invert_match { - self.print_inverted_matches(last_end, m.start()); - } else { - self.print_match(m.start(), m.end()); - } - last_end = m.end(); - if self.opts.terminate(self.match_line_count) { - break; - } - } - if self.opts.invert_match && !self.opts.terminate(self.match_line_count) { - let upto = self.buf.len(); - self.print_inverted_matches(last_end, upto); - } - if self.opts.count && self.match_line_count > 0 { - self.printer.path_count(self.path, self.match_line_count); - } else if self.opts.count_matches - && self.match_count.map_or(false, |c| c > 0) - { - self.printer.path_count(self.path, self.match_count.unwrap()); - } - if self.opts.files_with_matches && self.match_line_count > 0 { - self.printer.path(self.path); - } - if self.opts.files_without_matches && self.match_line_count == 0 { - self.printer.path(self.path); - } - self.match_line_count - } - - #[inline(always)] - fn count_individual_matches(&mut self, start: usize, end: usize) { - if let Some(ref mut count) = self.match_count { - for _ in self.grep.regex().find_iter(&self.buf[start..end]) { - *count += 1; - } - } - } - - #[inline(always)] - pub fn print_match(&mut self, start: usize, end: usize) { - self.match_line_count += 1; - self.count_individual_matches(start, end); - if self.opts.skip_matches() { - return; - } - self.count_lines(start); - self.add_line(end); - self.printer.matched( - self.grep.regex(), self.path, self.buf, - start, end, self.line_count, self.byte_offset); - } - - #[inline(always)] - fn print_inverted_matches(&mut self, start: usize, end: usize) { - debug_assert!(self.opts.invert_match); - let mut it = IterLines::new(self.opts.eol, start); - while let Some((s, e)) = it.next(&self.buf[..end]) { - if self.opts.terminate(self.match_line_count) { - return; - } - self.print_match(s, e); - } - } - - #[inline(always)] - fn count_lines(&mut self, upto: usize) { - if let Some(ref mut line_count) = self.line_count { - *line_count += count_lines( - &self.buf[self.last_line..upto], self.opts.eol); - self.last_line = upto; - } - } - - #[inline(always)] - fn add_line(&mut self, line_end: usize) { - if let Some(ref mut line_count) = self.line_count { - *line_count += 1; - self.last_line = line_end; - } - } -} - -#[cfg(test)] -mod tests { - use std::path::Path; - - use grep::GrepBuilder; - - use printer::Printer; - use termcolor; - - use super::BufferSearcher; - - const SHERLOCK: &'static str = "\ -For the Doctor Watsons of this world, as opposed to the Sherlock -Holmeses, success in the province of detective work must always -be, to a very large extent, the result of luck. Sherlock Holmes -can extract a clew from a wisp of straw or a flake of cigar ash; -but Doctor Watson has to have it taken out for him and dusted, -and exhibited clearly, with a label attached.\ -"; - - fn test_path() -> &'static Path { - &Path::new("/baz.rs") - } - - type TestSearcher<'a> = BufferSearcher<'a, termcolor::NoColor>>; - - fn search TestSearcher>( - pat: &str, - haystack: &str, - mut map: F, - ) -> (u64, String) { - let outbuf = termcolor::NoColor::new(vec![]); - let mut pp = Printer::new(outbuf).with_filename(true); - let grep = GrepBuilder::new(pat).build().unwrap(); - let count = { - let searcher = BufferSearcher::new( - &mut pp, &grep, test_path(), haystack.as_bytes()); - map(searcher).run() - }; - (count, String::from_utf8(pp.into_inner().into_inner()).unwrap()) - } - - #[test] - fn basic_search() { - let (count, out) = search("Sherlock", SHERLOCK, |s|s); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn binary() { - let text = "Sherlock\n\x00Holmes\n"; - let (count, out) = search("Sherlock|Holmes", text, |s|s); - assert_eq!(0, count); - assert_eq!(out, ""); - } - - - #[test] - fn binary_text() { - let text = "Sherlock\n\x00Holmes\n"; - let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true)); - assert_eq!(2, count); - assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n"); - } - - #[test] - fn line_numbers() { - let (count, out) = search( - "Sherlock", SHERLOCK, |s| s.line_number(true)); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn byte_offset() { - let (_, out) = search( - "Sherlock", SHERLOCK, |s| s.byte_offset(true)); - assert_eq!(out, "\ -/baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn byte_offset_inverted() { - let (_, out) = search("Sherlock", SHERLOCK, |s| { - s.invert_match(true).byte_offset(true) - }); - assert_eq!(out, "\ -/baz.rs:65:Holmeses, success in the province of detective work must always -/baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:258:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:321:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn count() { - let (count, out) = search( - "Sherlock", SHERLOCK, |s| s.count(true)); - assert_eq!(2, count); - assert_eq!(out, "/baz.rs:2\n"); - } - - #[test] - fn count_matches() { - let (_, out) = search( - "the", SHERLOCK, |s| s.count_matches(true)); - assert_eq!(out, "/baz.rs:4\n"); - } - - #[test] - fn files_with_matches() { - let (count, out) = search( - "Sherlock", SHERLOCK, |s| s.files_with_matches(true)); - assert_eq!(1, count); - assert_eq!(out, "/baz.rs\n"); - } - - #[test] - fn files_without_matches() { - let (count, out) = search( - "zzzz", SHERLOCK, |s| s.files_without_matches(true)); - assert_eq!(0, count); - assert_eq!(out, "/baz.rs\n"); - } - - #[test] - fn max_count() { - let (count, out) = search( - "Sherlock", SHERLOCK, |s| s.max_count(Some(1))); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -"); - } - - #[test] - fn invert_match_max_count() { - let (count, out) = search( - "zzzz", SHERLOCK, |s| s.invert_match(true).max_count(Some(1))); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -"); - } - - #[test] - fn invert_match() { - let (count, out) = search( - "Sherlock", SHERLOCK, |s| s.invert_match(true)); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs:Holmeses, success in the province of detective work must always -/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn invert_match_line_numbers() { - let (count, out) = search("Sherlock", SHERLOCK, |s| { - s.invert_match(true).line_number(true) - }); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn invert_match_count() { - let (count, out) = search("Sherlock", SHERLOCK, |s| { - s.invert_match(true).count(true) - }); - assert_eq!(4, count); - assert_eq!(out, "/baz.rs:4\n"); - } -} diff --git a/src/search_stream.rs b/src/search_stream.rs deleted file mode 100644 index b218dd19..00000000 --- a/src/search_stream.rs +++ /dev/null @@ -1,1466 +0,0 @@ -/*! -The `search_stream` module is responsible for searching a single file and -printing matches. In particular, it searches the file in a streaming fashion -using `read` calls and a (roughly) fixed size buffer. -*/ - -use std::cmp; -use std::error::Error as StdError; -use std::fmt; -use std::io; -use std::path::{Path, PathBuf}; - -use bytecount; -use grep::{Grep, Match}; -use memchr::{memchr, memrchr}; -use termcolor::WriteColor; - -use printer::Printer; - -/// The default read size (capacity of input buffer). -const READ_SIZE: usize = 8 * (1<<10); - -/// Error describes errors that can occur while searching. -#[derive(Debug)] -pub enum Error { - /// A standard I/O error attached to a particular file path. - Io { - err: io::Error, - path: PathBuf, - } -} - -impl Error { - fn from_io>(err: io::Error, path: P) -> Error { - Error::Io { err: err, path: path.as_ref().to_path_buf() } - } -} - -impl StdError for Error { - fn description(&self) -> &str { - match *self { - Error::Io { ref err, .. } => err.description(), - } - } - - fn cause(&self) -> Option<&StdError> { - match *self { - Error::Io { ref err, .. } => Some(err), - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Error::Io { ref err, ref path } => { - write!(f, "{}: {}", path.display(), err) - } - } - } -} - -pub struct Searcher<'a, R, W: 'a> { - opts: Options, - inp: &'a mut InputBuffer, - printer: &'a mut Printer, - grep: &'a Grep, - path: &'a Path, - haystack: R, - match_line_count: u64, - match_count: Option, - line_count: Option, - byte_offset: Option, - last_match: Match, - last_printed: usize, - last_line: usize, - after_context_remaining: usize, -} - -/// Options for configuring search. -#[derive(Clone)] -pub struct Options { - pub after_context: usize, - pub before_context: usize, - pub byte_offset: bool, - pub count: bool, - pub count_matches: bool, - pub files_with_matches: bool, - pub files_without_matches: bool, - pub eol: u8, - pub invert_match: bool, - pub line_number: bool, - pub max_count: Option, - pub quiet: bool, - pub text: bool, -} - -impl Default for Options { - fn default() -> Options { - Options { - after_context: 0, - before_context: 0, - byte_offset: false, - count: false, - count_matches: false, - files_with_matches: false, - files_without_matches: false, - eol: b'\n', - invert_match: false, - line_number: false, - max_count: None, - quiet: false, - text: false, - } - } - -} - -impl Options { - /// Several options (--quiet, --count, --count-matches, --files-with-matches, - /// --files-without-match) imply that we shouldn't ever display matches. - pub fn skip_matches(&self) -> bool { - self.count || self.files_with_matches || self.files_without_matches - || self.quiet || self.count_matches - } - - /// Some options (--quiet, --files-with-matches, --files-without-match) - /// imply that we can stop searching after the first match. - pub fn stop_after_first_match(&self) -> bool { - self.files_with_matches || self.files_without_matches || self.quiet - } - - /// Returns true if the search should terminate based on the match line count. - pub fn terminate(&self, match_line_count: u64) -> bool { - if match_line_count > 0 && self.stop_after_first_match() { - return true; - } - if self.max_count.map_or(false, |max| match_line_count >= max) { - return true; - } - false - } -} - -impl<'a, R: io::Read, W: WriteColor> Searcher<'a, R, W> { - /// Create a new searcher. - /// - /// `inp` is a reusable input buffer that is used as scratch space by this - /// searcher. - /// - /// `printer` is used to output all results of searching. - /// - /// `grep` is the actual matcher. - /// - /// `path` is the file path being searched. - /// - /// `haystack` is a reader of text to search. - pub fn new( - inp: &'a mut InputBuffer, - printer: &'a mut Printer, - grep: &'a Grep, - path: &'a Path, - haystack: R, - ) -> Searcher<'a, R, W> { - Searcher { - opts: Options::default(), - inp: inp, - printer: printer, - grep: grep, - path: path, - haystack: haystack, - match_line_count: 0, - match_count: None, - line_count: None, - byte_offset: None, - last_match: Match::default(), - last_printed: 0, - last_line: 0, - after_context_remaining: 0, - } - } - - /// The number of contextual lines to show after each match. The default - /// is zero. - pub fn after_context(mut self, count: usize) -> Self { - self.opts.after_context = count; - self - } - - /// The number of contextual lines to show before each match. The default - /// is zero. - pub fn before_context(mut self, count: usize) -> Self { - self.opts.before_context = count; - self - } - - /// If enabled, searching will print a 0-based offset of the - /// matching line (or the actual match if -o is specified) before - /// printing the line itself. - /// - /// Disabled by default. - pub fn byte_offset(mut self, yes: bool) -> Self { - self.opts.byte_offset = yes; - self - } - - /// If enabled, searching will print a count instead of each match. - /// - /// Disabled by default. - pub fn count(mut self, yes: bool) -> Self { - self.opts.count = yes; - self - } - - /// If enabled, searching will print the count of individual matches - /// instead of each match. - /// - /// Disabled by default. - pub fn count_matches(mut self, yes: bool) -> Self { - self.opts.count_matches = yes; - self - } - - /// If enabled, searching will print the path instead of each match. - /// - /// Disabled by default. - pub fn files_with_matches(mut self, yes: bool) -> Self { - self.opts.files_with_matches = yes; - self - } - - /// If enabled, searching will print the path of files without any matches. - /// - /// Disabled by default. - pub fn files_without_matches(mut self, yes: bool) -> Self { - self.opts.files_without_matches = yes; - self - } - - /// Set the end-of-line byte used by this searcher. - pub fn eol(mut self, eol: u8) -> Self { - self.opts.eol = eol; - self - } - - /// If enabled, matching is inverted so that lines that *don't* match the - /// given pattern are treated as matches. - pub fn invert_match(mut self, yes: bool) -> Self { - self.opts.invert_match = yes; - self - } - - /// If enabled, compute line numbers and prefix each line of output with - /// them. - pub fn line_number(mut self, yes: bool) -> Self { - self.opts.line_number = yes; - self - } - - /// Limit the number of matches to the given count. - /// - /// The default is None, which corresponds to no limit. - pub fn max_count(mut self, count: Option) -> Self { - self.opts.max_count = count; - self - } - - /// If enabled, don't show any output and quit searching after the first - /// match is found. - pub fn quiet(mut self, yes: bool) -> Self { - self.opts.quiet = yes; - self - } - - /// If enabled, search binary files as if they were text. - pub fn text(mut self, yes: bool) -> Self { - self.opts.text = yes; - self.inp.text(yes); - self - } - - /// Execute the search. Results are written to the printer and the total - /// number of matches is returned. - #[inline(never)] - pub fn run(mut self) -> Result { - self.inp.reset(); - self.match_line_count = 0; - self.line_count = if self.opts.line_number { Some(0) } else { None }; - self.byte_offset = if self.opts.byte_offset { Some(0) } else { None }; - self.match_count = if self.opts.count_matches { Some(0) } else { None }; - self.last_match = Match::default(); - self.after_context_remaining = 0; - while !self.terminate() { - let upto = self.inp.lastnl; - self.print_after_context(upto); - if !self.fill()? { - break; - } - while !self.terminate() && self.inp.pos < self.inp.lastnl { - let matched = self.grep.read_match( - &mut self.last_match, - &self.inp.buf[..self.inp.lastnl], - self.inp.pos); - if self.opts.invert_match { - let upto = - if matched { - self.last_match.start() - } else { - self.inp.lastnl - }; - if upto > self.inp.pos { - let upto_context = self.inp.pos; - self.print_after_context(upto_context); - self.print_before_context(upto_context); - self.print_inverted_matches(upto); - } - } else if matched { - let start = self.last_match.start(); - let end = self.last_match.end(); - self.print_after_context(start); - self.print_before_context(start); - self.print_match(start, end); - } - if matched { - self.inp.pos = self.last_match.end(); - } else { - self.inp.pos = self.inp.lastnl; - } - } - } - if self.after_context_remaining > 0 { - if self.last_printed == self.inp.lastnl { - self.fill()?; - } - let upto = self.inp.lastnl; - if upto > 0 { - self.print_after_context(upto); - } - } - if self.match_line_count > 0 { - if self.opts.count { - self.printer.path_count(self.path, self.match_line_count); - } else if self.opts.count_matches { - self.printer.path_count(self.path, self.match_count.unwrap()); - } else if self.opts.files_with_matches { - self.printer.path(self.path); - } - } else if self.opts.files_without_matches { - self.printer.path(self.path); - } - Ok(self.match_line_count) - } - - #[inline(always)] - fn terminate(&self) -> bool { - self.opts.terminate(self.match_line_count) - } - - #[inline(always)] - fn fill(&mut self) -> Result { - let keep = - if self.opts.before_context > 0 || self.opts.after_context > 0 { - let lines = 1 + cmp::max( - self.opts.before_context, self.opts.after_context); - start_of_previous_lines( - self.opts.eol, - &self.inp.buf, - self.inp.lastnl.saturating_sub(1), - lines) - } else { - self.inp.lastnl - }; - if keep < self.last_printed { - self.last_printed -= keep; - } else { - self.last_printed = 0; - } - if keep <= self.last_line { - self.last_line -= keep; - } else { - self.count_lines(keep); - self.last_line = 0; - } - self.count_byte_offset(keep); - let ok = self.inp.fill(&mut self.haystack, keep).map_err(|err| { - Error::from_io(err, &self.path) - })?; - Ok(ok) - } - - #[inline(always)] - fn print_inverted_matches(&mut self, upto: usize) { - debug_assert!(self.opts.invert_match); - let mut it = IterLines::new(self.opts.eol, self.inp.pos); - while let Some((start, end)) = it.next(&self.inp.buf[..upto]) { - if self.terminate() { - return; - } - self.print_match(start, end); - self.inp.pos = end; - } - } - - #[inline(always)] - fn print_before_context(&mut self, upto: usize) { - if self.opts.skip_matches() || self.opts.before_context == 0 { - return; - } - let start = self.last_printed; - let end = upto; - if start >= end { - return; - } - let before_context_start = - start + start_of_previous_lines( - self.opts.eol, - &self.inp.buf[start..], - end - start - 1, - self.opts.before_context); - let mut it = IterLines::new(self.opts.eol, before_context_start); - while let Some((s, e)) = it.next(&self.inp.buf[..end]) { - self.print_separator(s); - self.print_context(s, e); - } - } - - #[inline(always)] - fn print_after_context(&mut self, upto: usize) { - if self.opts.skip_matches() || self.after_context_remaining == 0 { - return; - } - let start = self.last_printed; - let end = upto; - let mut it = IterLines::new(self.opts.eol, start); - while let Some((s, e)) = it.next(&self.inp.buf[..end]) { - self.print_context(s, e); - self.after_context_remaining -= 1; - if self.after_context_remaining == 0 { - break; - } - } - } - - #[inline(always)] - fn print_match(&mut self, start: usize, end: usize) { - self.match_line_count += 1; - self.count_individual_matches(start, end); - if self.opts.skip_matches() { - return; - } - self.print_separator(start); - self.count_lines(start); - self.add_line(end); - self.printer.matched( - self.grep.regex(), self.path, - &self.inp.buf, start, end, self.line_count, self.byte_offset); - self.last_printed = end; - self.after_context_remaining = self.opts.after_context; - } - - #[inline(always)] - fn print_context(&mut self, start: usize, end: usize) { - self.count_lines(start); - self.add_line(end); - self.printer.context( - &self.path, &self.inp.buf, start, end, - self.line_count, self.byte_offset); - self.last_printed = end; - } - - #[inline(always)] - fn print_separator(&mut self, before: usize) { - if self.opts.before_context == 0 && self.opts.after_context == 0 { - return; - } - if !self.printer.has_printed() { - return; - } - if (self.last_printed == 0 && before > 0) - || self.last_printed < before { - self.printer.context_separate(); - } - } - - #[inline(always)] - fn count_byte_offset(&mut self, buf_last_end: usize) { - if let Some(ref mut byte_offset) = self.byte_offset { - *byte_offset += buf_last_end as u64; - } - } - - #[inline(always)] - fn count_individual_matches(&mut self, start: usize, end: usize) { - if let Some(ref mut count) = self.match_count { - for _ in self.grep.regex().find_iter(&self.inp.buf[start..end]) { - *count += 1; - } - } - } - - #[inline(always)] - fn count_lines(&mut self, upto: usize) { - if let Some(ref mut line_count) = self.line_count { - *line_count += count_lines( - &self.inp.buf[self.last_line..upto], self.opts.eol); - self.last_line = upto; - } - } - - #[inline(always)] - fn add_line(&mut self, line_end: usize) { - if let Some(ref mut line_count) = self.line_count { - *line_count += 1; - self.last_line = line_end; - } - } -} - -/// `InputBuffer` encapsulates the logic of maintaining a ~fixed sized buffer -/// on which to search. There are three key pieces of complexity: -/// -/// 1. We must be able to handle lines that are longer than the size of the -/// buffer. For this reason, the buffer is allowed to expand (and is -/// therefore not technically fixed). Note that once a buffer expands, it -/// will never contract. -/// 2. The contents of the buffer may end with a partial line, so we must keep -/// track of where the last complete line ends. Namely, the partial line -/// is only completed on subsequent reads *after* searching up through -/// the last complete line is done. -/// 3. When printing the context of a match, the last N lines of the buffer -/// may need to be rolled over into the next buffer. For example, a match -/// may occur at the beginning of a buffer, in which case, lines at the end -/// of the previous contents of the buffer need to be printed. -/// -/// An `InputBuffer` is designed to be reused and isn't tied to any particular -/// reader. -pub struct InputBuffer { - /// The number of bytes to attempt to read at a time. Once set, this is - /// never changed. - read_size: usize, - /// The end-of-line terminator used in this buffer. - eol: u8, - /// A scratch buffer. - tmp: Vec, - /// A buffer to read bytes into. All searches are executed directly against - /// this buffer and pos/lastnl/end point into it. - buf: Vec, - /// The current position in buf. The current position represents where the - /// next search should start. - pos: usize, - /// The position immediately following the last line terminator in buf. - /// This may be equal to end. - /// - /// Searching should never cross this boundary. In particular, the contents - /// of the buffer following this position may correspond to *partial* line. - /// All contents before this position are complete lines. - lastnl: usize, - /// The end position of the buffer. Data after this position is not - /// specified. - end: usize, - /// Set to true if and only if no reads have occurred yet. - first: bool, - /// Set to true if all binary data should be treated as if it were text. - text: bool, -} - -impl InputBuffer { - /// Create a new buffer with a default capacity. - pub fn new() -> InputBuffer { - InputBuffer::with_capacity(READ_SIZE) - } - - /// Create a new buffer with the capacity given. - /// - /// The capacity determines the size of each read from the underlying - /// reader. - /// - /// `cap` must be a minimum of `1`. - pub fn with_capacity(mut cap: usize) -> InputBuffer { - if cap == 0 { - cap = 1; - } - InputBuffer { - read_size: cap, - eol: b'\n', - buf: vec![0; cap], - tmp: vec![], - pos: 0, - lastnl: 0, - end: 0, - first: true, - text: false, - } - } - - /// Set the end-of-line terminator used by this input buffer. - pub fn eol(&mut self, eol: u8) -> &mut Self { - self.eol = eol; - self - } - - /// If enabled, search binary files as if they were text. - /// - /// Note that this may cause the buffer to load the entire contents of a - /// file into memory. - pub fn text(&mut self, yes: bool) -> &mut Self { - self.text = yes; - self - } - - /// Resets this buffer so that it may be reused with a new reader. - fn reset(&mut self) { - self.pos = 0; - self.lastnl = 0; - self.end = 0; - self.first = true; - } - - /// Fill the contents of this buffer with the reader given. The reader - /// given should be the same in every call to fill unless reset has been - /// called. - /// - /// The bytes in buf[keep_from..end] are rolled over into the beginning - /// of the buffer. - fn fill( - &mut self, - rdr: &mut R, - keep_from: usize, - ) -> Result { - // Rollover bytes from buf[keep_from..end] and update our various - // pointers. N.B. This could be done with the ptr::copy, but I haven't - // been able to produce a benchmark that notices a difference in - // performance. (Invariably, ptr::copy is seems clearer IMO, but it is - // not safe.) - self.tmp.clear(); - self.tmp.extend_from_slice(&self.buf[keep_from..self.end]); - self.buf[0..self.tmp.len()].copy_from_slice(&self.tmp); - self.pos = self.lastnl - keep_from; - self.lastnl = 0; - self.end = self.tmp.len(); - while self.lastnl == 0 { - // If our buffer isn't big enough to hold the contents of a full - // read, expand it. - if self.buf.len() - self.end < self.read_size { - let min_len = self.read_size + self.buf.len() - self.end; - let new_len = cmp::max(min_len, self.buf.len() * 2); - self.buf.resize(new_len, 0); - } - let n = rdr.read( - &mut self.buf[self.end..self.end + self.read_size])?; - if !self.text { - if is_binary(&self.buf[self.end..self.end + n], self.first) { - return Ok(false); - } - } - self.first = false; - // We assume that reading 0 bytes means we've hit EOF. - if n == 0 { - // If we've searched everything up to the end of the buffer, - // then there's nothing left to do. - if self.end - self.pos == 0 { - return Ok(false); - } - // Even if we hit EOF, we might still have to search the - // last line if it didn't contain a trailing terminator. - self.lastnl = self.end; - break; - } - self.lastnl = - memrchr(self.eol, &self.buf[self.end..self.end + n]) - .map(|i| self.end + i + 1) - .unwrap_or(0); - self.end += n; - } - Ok(true) - } -} - -/// Returns true if and only if the given buffer is determined to be "binary" -/// or otherwise not contain text data that is usefully searchable. -/// -/// Note that this may return both false positives and false negatives. -#[inline(always)] -pub fn is_binary(buf: &[u8], first: bool) -> bool { - if first && buf.len() >= 4 && &buf[0..4] == b"%PDF" { - return true; - } - memchr(b'\x00', buf).is_some() -} - -/// Count the number of lines in the given buffer. -#[inline(never)] -pub fn count_lines(buf: &[u8], eol: u8) -> u64 { - bytecount::count(buf, eol) as u64 -} - -/// Replaces a with b in buf. -#[allow(dead_code)] -fn replace_buf(buf: &mut [u8], a: u8, b: u8) { - if a == b { - return; - } - let mut pos = 0; - while let Some(i) = memchr(a, &buf[pos..]).map(|i| pos + i) { - buf[i] = b; - pos = i + 1; - while buf.get(pos) == Some(&a) { - buf[pos] = b; - pos += 1; - } - } -} - -/// An "iterator" over lines in a particular buffer. -/// -/// Idiomatic Rust would borrow the buffer and use it as internal state to -/// advance over the positions of each line. We neglect that approach to avoid -/// the borrow in the search code. (Because the borrow prevents composition -/// through other mutable methods.) -pub struct IterLines { - eol: u8, - pos: usize, -} - -impl IterLines { - /// Creates a new iterator over lines starting at the position given. - /// - /// The buffer is passed to the `next` method. - #[inline(always)] - pub fn new(eol: u8, start: usize) -> IterLines { - IterLines { - eol: eol, - pos: start, - } - } - - /// Return the start and end position of the next line in the buffer. The - /// buffer given should be the same on every call. - /// - /// The range returned includes the new line. - #[inline(always)] - pub fn next(&mut self, buf: &[u8]) -> Option<(usize, usize)> { - match memchr(self.eol, &buf[self.pos..]) { - None => { - if self.pos < buf.len() { - let start = self.pos; - self.pos = buf.len(); - Some((start, buf.len())) - } else { - None - } - } - Some(end) => { - let start = self.pos; - let end = self.pos + end + 1; - self.pos = end; - Some((start, end)) - } - } - } -} - -/// Returns the starting index of the Nth line preceding `end`. -/// -/// If `buf` is empty, then `0` is returned. If `count` is `0`, then `end` is -/// returned. -/// -/// If `end` points at a new line in `buf`, then searching starts as if `end` -/// pointed immediately before the new line. -/// -/// The position returned corresponds to the first byte in the given line. -#[inline(always)] -fn start_of_previous_lines( - eol: u8, - buf: &[u8], - mut end: usize, - mut count: usize, -) -> usize { - // TODO(burntsushi): This function needs to be badly simplified. The case - // analysis is impossible to follow. - if buf[..end].is_empty() { - return 0; - } - if count == 0 { - return end; - } - if end == buf.len() { - end -= 1; - } - if buf[end] == eol { - if end == 0 { - return end + 1; - } - end -= 1; - } - while count > 0 { - if buf[end] == eol { - count -= 1; - if count == 0 { - return end + 1; - } - if end == 0 { - return end; - } - end -= 1; - continue; - } - match memrchr(eol, &buf[..end]) { - None => { - return 0; - } - Some(i) => { - count -= 1; - end = i; - if end == 0 { - if buf[end] == eol && count == 0 { - end += 1; - } - return end; - } - end -= 1; - } - } - } - end + 2 -} - -#[cfg(test)] -mod tests { - use std::io; - use std::path::Path; - - use grep::GrepBuilder; - use printer::Printer; - use termcolor; - - use super::{InputBuffer, Searcher, start_of_previous_lines}; - - const SHERLOCK: &'static str = "\ -For the Doctor Watsons of this world, as opposed to the Sherlock -Holmeses, success in the province of detective work must always -be, to a very large extent, the result of luck. Sherlock Holmes -can extract a clew from a wisp of straw or a flake of cigar ash; -but Doctor Watson has to have it taken out for him and dusted, -and exhibited clearly, with a label attached.\ -"; - - const CODE: &'static str = "\ -extern crate snap; - -use std::io; - -fn main() { - let stdin = io::stdin(); - let stdout = io::stdout(); - - // Wrap the stdin reader in a Snappy reader. - let mut rdr = snap::Reader::new(stdin.lock()); - let mut wtr = stdout.lock(); - io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); -} -"; - - fn hay(s: &str) -> io::Cursor> { - io::Cursor::new(s.to_string().into_bytes()) - } - - fn test_path() -> &'static Path { - &Path::new("/baz.rs") - } - - type TestSearcher<'a> = Searcher< - 'a, - io::Cursor>, - termcolor::NoColor>, - >; - - fn search_smallcap TestSearcher>( - pat: &str, - haystack: &str, - mut map: F, - ) -> (u64, String) { - let mut inp = InputBuffer::with_capacity(1); - let outbuf = termcolor::NoColor::new(vec![]); - let mut pp = Printer::new(outbuf).with_filename(true); - let grep = GrepBuilder::new(pat).build().unwrap(); - let count = { - let searcher = Searcher::new( - &mut inp, &mut pp, &grep, test_path(), hay(haystack)); - map(searcher).run().unwrap() - }; - (count, String::from_utf8(pp.into_inner().into_inner()).unwrap()) - } - - fn search TestSearcher>( - pat: &str, - haystack: &str, - mut map: F, - ) -> (u64, String) { - let mut inp = InputBuffer::with_capacity(4096); - let outbuf = termcolor::NoColor::new(vec![]); - let mut pp = Printer::new(outbuf).with_filename(true); - let grep = GrepBuilder::new(pat).build().unwrap(); - let count = { - let searcher = Searcher::new( - &mut inp, &mut pp, &grep, test_path(), hay(haystack)); - map(searcher).run().unwrap() - }; - (count, String::from_utf8(pp.into_inner().into_inner()).unwrap()) - } - - #[test] - fn previous_lines() { - let eol = b'\n'; - let text = SHERLOCK.as_bytes(); - assert_eq!(366, text.len()); - - assert_eq!(0, start_of_previous_lines(eol, text, 366, 100)); - assert_eq!(366, start_of_previous_lines(eol, text, 366, 0)); - - assert_eq!(321, start_of_previous_lines(eol, text, 366, 1)); - assert_eq!(321, start_of_previous_lines(eol, text, 365, 1)); - assert_eq!(321, start_of_previous_lines(eol, text, 364, 1)); - assert_eq!(321, start_of_previous_lines(eol, text, 322, 1)); - assert_eq!(321, start_of_previous_lines(eol, text, 321, 1)); - assert_eq!(258, start_of_previous_lines(eol, text, 320, 1)); - - assert_eq!(258, start_of_previous_lines(eol, text, 366, 2)); - assert_eq!(258, start_of_previous_lines(eol, text, 365, 2)); - assert_eq!(258, start_of_previous_lines(eol, text, 364, 2)); - assert_eq!(258, start_of_previous_lines(eol, text, 322, 2)); - assert_eq!(258, start_of_previous_lines(eol, text, 321, 2)); - assert_eq!(193, start_of_previous_lines(eol, text, 320, 2)); - - assert_eq!(65, start_of_previous_lines(eol, text, 66, 1)); - assert_eq!(0, start_of_previous_lines(eol, text, 66, 2)); - assert_eq!(64, start_of_previous_lines(eol, text, 64, 0)); - assert_eq!(0, start_of_previous_lines(eol, text, 64, 1)); - assert_eq!(0, start_of_previous_lines(eol, text, 64, 2)); - - assert_eq!(0, start_of_previous_lines(eol, text, 0, 2)); - assert_eq!(0, start_of_previous_lines(eol, text, 0, 1)); - } - - #[test] - fn previous_lines_short() { - let eol = b'\n'; - let text = &b"a\nb\nc\nd\ne\nf\n"[..]; - assert_eq!(12, text.len()); - - assert_eq!(10, start_of_previous_lines(eol, text, 12, 1)); - assert_eq!(8, start_of_previous_lines(eol, text, 12, 2)); - assert_eq!(6, start_of_previous_lines(eol, text, 12, 3)); - assert_eq!(4, start_of_previous_lines(eol, text, 12, 4)); - assert_eq!(2, start_of_previous_lines(eol, text, 12, 5)); - assert_eq!(0, start_of_previous_lines(eol, text, 12, 6)); - assert_eq!(0, start_of_previous_lines(eol, text, 12, 7)); - assert_eq!(10, start_of_previous_lines(eol, text, 11, 1)); - assert_eq!(8, start_of_previous_lines(eol, text, 11, 2)); - assert_eq!(6, start_of_previous_lines(eol, text, 11, 3)); - assert_eq!(4, start_of_previous_lines(eol, text, 11, 4)); - assert_eq!(2, start_of_previous_lines(eol, text, 11, 5)); - assert_eq!(0, start_of_previous_lines(eol, text, 11, 6)); - assert_eq!(0, start_of_previous_lines(eol, text, 11, 7)); - assert_eq!(10, start_of_previous_lines(eol, text, 10, 1)); - assert_eq!(8, start_of_previous_lines(eol, text, 10, 2)); - assert_eq!(6, start_of_previous_lines(eol, text, 10, 3)); - assert_eq!(4, start_of_previous_lines(eol, text, 10, 4)); - assert_eq!(2, start_of_previous_lines(eol, text, 10, 5)); - assert_eq!(0, start_of_previous_lines(eol, text, 10, 6)); - assert_eq!(0, start_of_previous_lines(eol, text, 10, 7)); - - assert_eq!(8, start_of_previous_lines(eol, text, 9, 1)); - assert_eq!(8, start_of_previous_lines(eol, text, 8, 1)); - - assert_eq!(6, start_of_previous_lines(eol, text, 7, 1)); - assert_eq!(6, start_of_previous_lines(eol, text, 6, 1)); - - assert_eq!(4, start_of_previous_lines(eol, text, 5, 1)); - assert_eq!(4, start_of_previous_lines(eol, text, 4, 1)); - - assert_eq!(2, start_of_previous_lines(eol, text, 3, 1)); - assert_eq!(2, start_of_previous_lines(eol, text, 2, 1)); - - assert_eq!(0, start_of_previous_lines(eol, text, 1, 1)); - assert_eq!(0, start_of_previous_lines(eol, text, 0, 1)); - } - - #[test] - fn previous_lines_empty() { - let eol = b'\n'; - let text = &b"\n\n\nd\ne\nf\n"[..]; - assert_eq!(9, text.len()); - - assert_eq!(7, start_of_previous_lines(eol, text, 9, 1)); - assert_eq!(5, start_of_previous_lines(eol, text, 9, 2)); - assert_eq!(3, start_of_previous_lines(eol, text, 9, 3)); - assert_eq!(2, start_of_previous_lines(eol, text, 9, 4)); - assert_eq!(1, start_of_previous_lines(eol, text, 9, 5)); - assert_eq!(0, start_of_previous_lines(eol, text, 9, 6)); - assert_eq!(0, start_of_previous_lines(eol, text, 9, 7)); - - let text = &b"a\n\n\nd\ne\nf\n"[..]; - assert_eq!(10, text.len()); - - assert_eq!(8, start_of_previous_lines(eol, text, 10, 1)); - assert_eq!(6, start_of_previous_lines(eol, text, 10, 2)); - assert_eq!(4, start_of_previous_lines(eol, text, 10, 3)); - assert_eq!(3, start_of_previous_lines(eol, text, 10, 4)); - assert_eq!(2, start_of_previous_lines(eol, text, 10, 5)); - assert_eq!(0, start_of_previous_lines(eol, text, 10, 6)); - assert_eq!(0, start_of_previous_lines(eol, text, 10, 7)); - } - - #[test] - fn basic_search1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s|s); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn binary() { - let text = "Sherlock\n\x00Holmes\n"; - let (count, out) = search("Sherlock|Holmes", text, |s|s); - assert_eq!(0, count); - assert_eq!(out, ""); - } - - #[test] - fn binary_text() { - let text = "Sherlock\n\x00Holmes\n"; - let (count, out) = search("Sherlock|Holmes", text, |s| s.text(true)); - assert_eq!(2, count); - assert_eq!(out, "/baz.rs:Sherlock\n/baz.rs:\x00Holmes\n"); - } - - #[test] - fn line_numbers() { - let (count, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.line_number(true)); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn count() { - let (count, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.count(true)); - assert_eq!(2, count); - assert_eq!(out, "/baz.rs:2\n"); - } - - #[test] - fn byte_offset() { - let (_, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.byte_offset(true)); - assert_eq!(out, "\ -/baz.rs:0:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:129:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn byte_offset_with_before_context() { - let (_, out) = search_smallcap("dusted", SHERLOCK, |s| { - s.line_number(true).byte_offset(true).before_context(2) - }); - assert_eq!(out, "\ -/baz.rs-3-129-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-193-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:258:but Doctor Watson has to have it taken out for him and dusted, -"); - } - - #[test] - fn byte_offset_inverted() { - let (_, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.invert_match(true).byte_offset(true) - }); - assert_eq!(out, "\ -/baz.rs:65:Holmeses, success in the province of detective work must always -/baz.rs:193:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:258:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:321:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn count_matches() { - let (_, out) = search_smallcap( - "the", SHERLOCK, |s| s.count_matches(true)); - assert_eq!(out, "/baz.rs:4\n"); - } - - #[test] - fn files_with_matches() { - let (count, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.files_with_matches(true)); - assert_eq!(1, count); - assert_eq!(out, "/baz.rs\n"); - } - - #[test] - fn files_without_matches() { - let (count, out) = search_smallcap( - "zzzz", SHERLOCK, |s| s.files_without_matches(true)); - assert_eq!(0, count); - assert_eq!(out, "/baz.rs\n"); - } - - #[test] - fn max_count() { - let (count, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.max_count(Some(1))); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -"); - } - - #[test] - fn invert_match_max_count() { - let (count, out) = search( - "zzzz", SHERLOCK, |s| s.invert_match(true).max_count(Some(1))); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs:For the Doctor Watsons of this world, as opposed to the Sherlock -"); - } - - #[test] - fn invert_match() { - let (count, out) = search_smallcap( - "Sherlock", SHERLOCK, |s| s.invert_match(true)); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs:Holmeses, success in the province of detective work must always -/baz.rs:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn invert_match_line_numbers() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.invert_match(true).line_number(true) - }); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn invert_match_count() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.invert_match(true).count(true) - }); - assert_eq!(4, count); - assert_eq!(out, "/baz.rs:4\n"); - } - - #[test] - fn before_context_one1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).before_context(1) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn before_context_invert_one1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).before_context(1).invert_match(true) - }); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs-1-For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn before_context_invert_one2() { - let (count, out) = search_smallcap(" a ", SHERLOCK, |s| { - s.line_number(true).before_context(1).invert_match(true) - }); - assert_eq!(3, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:2:Holmeses, success in the province of detective work must always --- -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -"); - } - - #[test] - fn before_context_two1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).before_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn before_context_two2() { - let (count, out) = search_smallcap("dusted", SHERLOCK, |s| { - s.line_number(true).before_context(2) - }); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -"); - } - - #[test] - fn before_context_two3() { - let (count, out) = search_smallcap( - "success|attached", SHERLOCK, |s| { - s.line_number(true).before_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs-1-For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:2:Holmeses, success in the province of detective work must always --- -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs-5-but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn before_context_two4() { - let (count, out) = search("stdin", CODE, |s| { - s.line_number(true).before_context(2) - }); - assert_eq!(3, count); - assert_eq!(out, "\ -/baz.rs-4- -/baz.rs-5-fn main() { -/baz.rs:6: let stdin = io::stdin(); -/baz.rs-7- let stdout = io::stdout(); -/baz.rs-8- -/baz.rs:9: // Wrap the stdin reader in a Snappy reader. -/baz.rs:10: let mut rdr = snap::Reader::new(stdin.lock()); -"); - } - - #[test] - fn before_context_two5() { - let (count, out) = search("stdout", CODE, |s| { - s.line_number(true).before_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs-5-fn main() { -/baz.rs-6- let stdin = io::stdin(); -/baz.rs:7: let stdout = io::stdout(); --- -/baz.rs-9- // Wrap the stdin reader in a Snappy reader. -/baz.rs-10- let mut rdr = snap::Reader::new(stdin.lock()); -/baz.rs:11: let mut wtr = stdout.lock(); -"); - } - - #[test] - fn before_context_three1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).before_context(3) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -"); - } - - #[test] - fn after_context_one1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).after_context(1) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -"); - } - - #[test] - fn after_context_invert_one1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).after_context(1).invert_match(true) - }); - assert_eq!(4, count); - assert_eq!(out, "\ -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn after_context_invert_one2() { - let (count, out) = search_smallcap(" a ", SHERLOCK, |s| { - s.line_number(true).after_context(1).invert_match(true) - }); - assert_eq!(3, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes --- -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs-6-and exhibited clearly, with a label attached. -"); - } - - #[test] - fn after_context_invert_one_max_count_two() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true) - .invert_match(true) - .after_context(1) - .max_count(Some(2)) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs:4:can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs-5-but Doctor Watson has to have it taken out for him and dusted, -"); - } - - #[test] - fn after_context_two1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).after_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs-5-but Doctor Watson has to have it taken out for him and dusted, -"); - } - - #[test] - fn after_context_two2() { - let (count, out) = search_smallcap("dusted", SHERLOCK, |s| { - s.line_number(true).after_context(2) - }); - assert_eq!(1, count); - assert_eq!(out, "\ -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs-6-and exhibited clearly, with a label attached. -"); - } - - #[test] - fn after_context_two3() { - let (count, out) = search_smallcap( - "success|attached", SHERLOCK, |s| { - s.line_number(true).after_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:2:Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; --- -/baz.rs:6:and exhibited clearly, with a label attached. -"); - } - - #[test] - fn after_context_two_max_count_two() { - let (count, out) = search_smallcap( - "Doctor", SHERLOCK, |s| { - s.line_number(true).after_context(2).max_count(Some(2)) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs-3-be, to a very large extent, the result of luck. Sherlock Holmes --- -/baz.rs:5:but Doctor Watson has to have it taken out for him and dusted, -/baz.rs-6-and exhibited clearly, with a label attached. -"); - } - - #[test] - fn after_context_three1() { - let (count, out) = search_smallcap("Sherlock", SHERLOCK, |s| { - s.line_number(true).after_context(3) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs:1:For the Doctor Watsons of this world, as opposed to the Sherlock -/baz.rs-2-Holmeses, success in the province of detective work must always -/baz.rs:3:be, to a very large extent, the result of luck. Sherlock Holmes -/baz.rs-4-can extract a clew from a wisp of straw or a flake of cigar ash; -/baz.rs-5-but Doctor Watson has to have it taken out for him and dusted, -/baz.rs-6-and exhibited clearly, with a label attached. -"); - } - - #[test] - fn before_after_context_two1() { - let (count, out) = search( - r"fn main|let mut rdr", CODE, |s| { - s.line_number(true).after_context(2).before_context(2) - }); - assert_eq!(2, count); - assert_eq!(out, "\ -/baz.rs-3-use std::io; -/baz.rs-4- -/baz.rs:5:fn main() { -/baz.rs-6- let stdin = io::stdin(); -/baz.rs-7- let stdout = io::stdout(); -/baz.rs-8- -/baz.rs-9- // Wrap the stdin reader in a Snappy reader. -/baz.rs:10: let mut rdr = snap::Reader::new(stdin.lock()); -/baz.rs-11- let mut wtr = stdout.lock(); -/baz.rs-12- io::copy(&mut rdr, &mut wtr).expect(\"I/O operation failed\"); -"); - } -} diff --git a/src/subject.rs b/src/subject.rs new file mode 100644 index 00000000..61b34554 --- /dev/null +++ b/src/subject.rs @@ -0,0 +1,230 @@ +use std::io; +use std::path::Path; +use std::sync::Arc; + +use ignore::{self, DirEntry}; +use same_file::Handle; + +/// A configuration for describing how subjects should be built. +#[derive(Clone, Debug)] +struct Config { + skip: Option>, + strip_dot_prefix: bool, + separator: Option, + terminator: Option, +} + +impl Default for Config { + fn default() -> Config { + Config { + skip: None, + strip_dot_prefix: false, + separator: None, + terminator: None, + } + } +} + +/// A builder for constructing things to search over. +#[derive(Clone, Debug)] +pub struct SubjectBuilder { + config: Config, +} + +impl SubjectBuilder { + /// Return a new subject builder with a default configuration. + pub fn new() -> SubjectBuilder { + SubjectBuilder { config: Config::default() } + } + + /// Create a new subject from a possibly missing directory entry. + /// + /// If the directory entry isn't present, then the corresponding error is + /// logged if messages have been configured. Otherwise, if the subject is + /// deemed searchable, then it is returned. + pub fn build_from_result( + &self, + result: Result, + ) -> Option { + match result { + Ok(dent) => self.build(dent), + Err(err) => { + message!("{}", err); + None + } + } + } + + /// Create a new subject using this builder's configuration. + /// + /// If a subject could not be created or should otherwise not be searched, + /// then this returns `None` after emitting any relevant log messages. + pub fn build(&self, dent: DirEntry) -> Option { + let subj = Subject { + dent: dent, + strip_dot_prefix: self.config.strip_dot_prefix, + }; + if let Some(ignore_err) = subj.dent.error() { + ignore_message!("{}", ignore_err); + } + // If this entry represents stdin, then we always search it. + if subj.dent.is_stdin() { + return Some(subj); + } + // If we're supposed to skip a particular file, then skip it. + if let Some(ref handle) = self.config.skip { + match subj.equals(handle) { + Ok(false) => {} // fallthrough + Ok(true) => { + debug!( + "ignoring {}: (probably same file as stdout)", + subj.dent.path().display() + ); + return None; + } + Err(err) => { + debug!( + "ignoring {}: got error: {}", + subj.dent.path().display(), err + ); + return None; + } + } + } + // If this subject has a depth of 0, then it was provided explicitly + // by an end user (or via a shell glob). In this case, we always want + // to search it if it even smells like a file (e.g., a symlink). + if subj.dent.depth() == 0 && !subj.is_dir() { + return Some(subj); + } + // At this point, we only want to search something it's explicitly a + // file. This omits symlinks. (If ripgrep was configured to follow + // symlinks, then they have already been followed by the directory + // traversal.) + if subj.is_file() { + return Some(subj); + } + // We got nothin. Emit a debug message, but only if this isn't a + // directory. Otherwise, emitting messages for directories is just + // noisy. + if !subj.is_dir() { + debug!( + "ignoring {}: failed to pass subject filter: \ + file type: {:?}, metadata: {:?}", + subj.dent.path().display(), + subj.dent.file_type(), + subj.dent.metadata() + ); + } + None + } + + /// When provided, subjects that represent the same file as the handle + /// given will be skipped. + /// + /// Typically, it is useful to pass a handle referring to stdout, such + /// that the file being written to isn't searched, which can lead to + /// an unbounded feedback mechanism. + /// + /// Only one handle to skip can be provided. + pub fn skip( + &mut self, + handle: Option, + ) -> &mut SubjectBuilder { + self.config.skip = handle.map(Arc::new); + self + } + + /// When enabled, if the subject's file path starts with `./` then it is + /// stripped. + /// + /// This is useful when implicitly searching the current working directory. + pub fn strip_dot_prefix(&mut self, yes: bool) -> &mut SubjectBuilder { + self.config.strip_dot_prefix = yes; + self + } +} + +/// A subject is a thing we want to search. Generally, a subject is either a +/// file or stdin. +#[derive(Clone, Debug)] +pub struct Subject { + dent: DirEntry, + strip_dot_prefix: bool, +} + +impl Subject { + /// Return the file path corresponding to this subject. + /// + /// If this subject corresponds to stdin, then a special `` path + /// is returned instead. + pub fn path(&self) -> &Path { + if self.strip_dot_prefix && self.dent.path().starts_with("./") { + self.dent.path().strip_prefix("./").unwrap() + } else { + self.dent.path() + } + } + + /// Returns true if and only if this entry corresponds to stdin. + pub fn is_stdin(&self) -> bool { + self.dent.is_stdin() + } + + /// Returns true if and only if this subject points to a directory. + /// + /// This works around a bug in Rust's standard library: + /// https://github.com/rust-lang/rust/issues/46484 + #[cfg(windows)] + fn is_dir(&self) -> bool { + use std::os::windows::fs::MetadataExt; + use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY; + + self.dent.metadata().map(|md| { + md.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0 + }).unwrap_or(false) + } + + /// Returns true if and only if this subject points to a directory. + #[cfg(not(windows))] + fn is_dir(&self) -> bool { + self.dent.file_type().map_or(false, |ft| ft.is_dir()) + } + + /// Returns true if and only if this subject points to a file. + /// + /// This works around a bug in Rust's standard library: + /// https://github.com/rust-lang/rust/issues/46484 + #[cfg(windows)] + fn is_file(&self) -> bool { + !self.is_dir() + } + + /// Returns true if and only if this subject points to a file. + #[cfg(not(windows))] + fn is_file(&self) -> bool { + self.dent.file_type().map_or(false, |ft| ft.is_file()) + } + + /// Returns true if and only if this subject is believed to be equivalent + /// to the given handle. If there was a problem querying this subject for + /// information to determine equality, then that error is returned. + fn equals(&self, handle: &Handle) -> io::Result { + #[cfg(unix)] + fn never_equal(dent: &DirEntry, handle: &Handle) -> bool { + dent.ino() != Some(handle.ino()) + } + + #[cfg(not(unix))] + fn never_equal(_: &DirEntry, _: &Handle) -> bool { + false + } + + // If we know for sure that these two things aren't equal, then avoid + // the costly extra stat call to determine equality. + if self.dent.is_stdin() || never_equal(&self.dent, handle) { + return Ok(false); + } + Handle::from_path(self.path()).map(|h| &h == handle) + } +} diff --git a/src/worker.rs b/src/worker.rs deleted file mode 100644 index 8e840400..00000000 --- a/src/worker.rs +++ /dev/null @@ -1,413 +0,0 @@ -use std::fs::File; -use std::io; -use std::path::{Path, PathBuf}; - -use encoding_rs::Encoding; -use grep::Grep; -use ignore::DirEntry; -use memmap::Mmap; -use termcolor::WriteColor; - -// use decoder::DecodeReader; -use encoding_rs_io::DecodeReaderBytesBuilder; -use decompressor::{self, DecompressionReader}; -use preprocessor::PreprocessorReader; -use pathutil::strip_prefix; -use printer::Printer; -use search_buffer::BufferSearcher; -use search_stream::{InputBuffer, Searcher}; - -use Result; - -pub enum Work { - Stdin, - DirEntry(DirEntry), -} - -pub struct WorkerBuilder { - grep: Grep, - opts: Options, -} - -#[derive(Clone, Debug)] -struct Options { - mmap: bool, - encoding: Option<&'static Encoding>, - after_context: usize, - before_context: usize, - byte_offset: bool, - count: bool, - count_matches: bool, - files_with_matches: bool, - files_without_matches: bool, - eol: u8, - invert_match: bool, - line_number: bool, - max_count: Option, - no_messages: bool, - quiet: bool, - text: bool, - preprocessor: Option, - search_zip_files: bool -} - -impl Default for Options { - fn default() -> Options { - Options { - mmap: false, - encoding: None, - after_context: 0, - before_context: 0, - byte_offset: false, - count: false, - count_matches: false, - files_with_matches: false, - files_without_matches: false, - eol: b'\n', - invert_match: false, - line_number: false, - max_count: None, - no_messages: false, - quiet: false, - text: false, - search_zip_files: false, - preprocessor: None, - } - } -} - -impl WorkerBuilder { - /// Create a new builder for a worker. - /// - /// A reusable input buffer and a grep matcher are required, but there - /// are numerous additional options that can be configured on this builder. - pub fn new(grep: Grep) -> WorkerBuilder { - WorkerBuilder { - grep: grep, - opts: Options::default(), - } - } - - /// Create the worker from this builder. - pub fn build(self) -> Worker { - let mut inpbuf = InputBuffer::new(); - inpbuf.eol(self.opts.eol); - Worker { - grep: self.grep, - inpbuf: inpbuf, - decodebuf: vec![0; 8 * (1<<10)], - opts: self.opts, - } - } - - /// The number of contextual lines to show after each match. The default - /// is zero. - pub fn after_context(mut self, count: usize) -> Self { - self.opts.after_context = count; - self - } - - /// The number of contextual lines to show before each match. The default - /// is zero. - pub fn before_context(mut self, count: usize) -> Self { - self.opts.before_context = count; - self - } - - /// If enabled, searching will print a 0-based offset of the - /// matching line (or the actual match if -o is specified) before - /// printing the line itself. - /// - /// Disabled by default. - pub fn byte_offset(mut self, yes: bool) -> Self { - self.opts.byte_offset = yes; - self - } - - /// If enabled, searching will print a count instead of each match. - /// - /// Disabled by default. - pub fn count(mut self, yes: bool) -> Self { - self.opts.count = yes; - self - } - - /// If enabled, searching will print the count of individual matches - /// instead of each match. - /// - /// Disabled by default. - pub fn count_matches(mut self, yes: bool) -> Self { - self.opts.count_matches = yes; - self - } - - /// Set the encoding to use to read each file. - /// - /// If the encoding is `None` (the default), then the encoding is - /// automatically detected on a best-effort per-file basis. - pub fn encoding(mut self, enc: Option<&'static Encoding>) -> Self { - self.opts.encoding = enc; - self - } - - /// If enabled, searching will print the path instead of each match. - /// - /// Disabled by default. - pub fn files_with_matches(mut self, yes: bool) -> Self { - self.opts.files_with_matches = yes; - self - } - - /// If enabled, searching will print the path of files without any matches. - /// - /// Disabled by default. - pub fn files_without_matches(mut self, yes: bool) -> Self { - self.opts.files_without_matches = yes; - self - } - - /// Set the end-of-line byte used by this searcher. - pub fn eol(mut self, eol: u8) -> Self { - self.opts.eol = eol; - self - } - - /// If enabled, matching is inverted so that lines that *don't* match the - /// given pattern are treated as matches. - pub fn invert_match(mut self, yes: bool) -> Self { - self.opts.invert_match = yes; - self - } - - /// If enabled, compute line numbers and prefix each line of output with - /// them. - pub fn line_number(mut self, yes: bool) -> Self { - self.opts.line_number = yes; - self - } - - /// Limit the number of matches to the given count. - /// - /// The default is None, which corresponds to no limit. - pub fn max_count(mut self, count: Option) -> Self { - self.opts.max_count = count; - self - } - - /// If enabled, try to use memory maps for searching if possible. - pub fn mmap(mut self, yes: bool) -> Self { - self.opts.mmap = yes; - self - } - - /// If enabled, error messages are suppressed. - /// - /// This is disabled by default. - pub fn no_messages(mut self, yes: bool) -> Self { - self.opts.no_messages = yes; - self - } - - /// If enabled, don't show any output and quit searching after the first - /// match is found. - pub fn quiet(mut self, yes: bool) -> Self { - self.opts.quiet = yes; - self - } - - /// If enabled, search binary files as if they were text. - pub fn text(mut self, yes: bool) -> Self { - self.opts.text = yes; - self - } - - /// If enabled, search through compressed files as well - pub fn search_zip_files(mut self, yes: bool) -> Self { - self.opts.search_zip_files = yes; - self - } - - /// If non-empty, search output of preprocessor run on each file - pub fn preprocessor(mut self, command: Option) -> Self { - self.opts.preprocessor = command; - self - } -} - -/// Worker is responsible for executing searches on file paths, while choosing -/// streaming search or memory map search as appropriate. -pub struct Worker { - grep: Grep, - inpbuf: InputBuffer, - decodebuf: Vec, - opts: Options, -} - -impl Worker { - /// Execute the worker with the given printer and work item. - /// - /// A work item can either be stdin or a file path. - pub fn run( - &mut self, - printer: &mut Printer, - work: Work, - ) -> u64 { - let result = match work { - Work::Stdin => { - let stdin = io::stdin(); - let stdin = stdin.lock(); - self.search(printer, Path::new(""), stdin) - } - Work::DirEntry(dent) => { - let mut path = dent.path(); - if self.opts.preprocessor.is_some() { - let cmd = self.opts.preprocessor.clone().unwrap(); - match PreprocessorReader::from_cmd_path(cmd, path) { - Ok(reader) => self.search(printer, path, reader), - Err(err) => { - if !self.opts.no_messages { - eprintln!("{}", err); - } - return 0; - } - } - } else if self.opts.search_zip_files - && decompressor::is_compressed(path) - { - match DecompressionReader::from_path(path) { - Some(reader) => self.search(printer, path, reader), - None => { - return 0; - } - } - } else { - let file = match File::open(path) { - Ok(file) => file, - Err(err) => { - if !self.opts.no_messages { - eprintln!("{}: {}", path.display(), err); - } - return 0; - } - }; - if let Some(p) = strip_prefix("./", path) { - path = p; - } - if self.opts.mmap { - self.search_mmap(printer, path, &file) - } else { - self.search(printer, path, file) - } - } - } - }; - match result { - Ok(count) => { - count - } - Err(err) => { - if !self.opts.no_messages { - eprintln!("{}", err); - } - 0 - } - } - } - - fn search( - &mut self, - printer: &mut Printer, - path: &Path, - rdr: R, - ) -> Result { - let rdr = DecodeReaderBytesBuilder::new() - .encoding(self.opts.encoding) - .utf8_passthru(true) - .build_with_buffer(rdr, &mut self.decodebuf)?; - let searcher = Searcher::new( - &mut self.inpbuf, printer, &self.grep, path, rdr); - searcher - .after_context(self.opts.after_context) - .before_context(self.opts.before_context) - .byte_offset(self.opts.byte_offset) - .count(self.opts.count) - .count_matches(self.opts.count_matches) - .files_with_matches(self.opts.files_with_matches) - .files_without_matches(self.opts.files_without_matches) - .eol(self.opts.eol) - .line_number(self.opts.line_number) - .invert_match(self.opts.invert_match) - .max_count(self.opts.max_count) - .quiet(self.opts.quiet) - .text(self.opts.text) - .run() - .map_err(From::from) - } - - fn search_mmap( - &mut self, - printer: &mut Printer, - path: &Path, - file: &File, - ) -> Result { - if file.metadata()?.len() == 0 { - // Opening a memory map with an empty file results in an error. - // However, this may not actually be an empty file! For example, - // /proc/cpuinfo reports itself as an empty file, but it can - // produce data when it's read from. Therefore, we fall back to - // regular read calls. - return self.search(printer, path, file); - } - let mmap = match self.mmap(file)? { - None => return self.search(printer, path, file), - Some(mmap) => mmap, - }; - let buf = &*mmap; - if buf.len() >= 3 && Encoding::for_bom(buf).is_some() { - // If we have a UTF-16 bom in our memory map, then we need to fall - // back to the stream reader, which will do transcoding. - return self.search(printer, path, file); - } - let searcher = BufferSearcher::new(printer, &self.grep, path, buf); - Ok(searcher - .byte_offset(self.opts.byte_offset) - .count(self.opts.count) - .count_matches(self.opts.count_matches) - .files_with_matches(self.opts.files_with_matches) - .files_without_matches(self.opts.files_without_matches) - .eol(self.opts.eol) - .line_number(self.opts.line_number) - .invert_match(self.opts.invert_match) - .max_count(self.opts.max_count) - .quiet(self.opts.quiet) - .text(self.opts.text) - .run()) - } - - #[cfg(not(unix))] - fn mmap(&self, file: &File) -> Result> { - Ok(Some(mmap_readonly(file)?)) - } - - #[cfg(unix)] - fn mmap(&self, file: &File) -> Result> { - use libc::{EOVERFLOW, ENODEV, ENOMEM}; - - let err = match mmap_readonly(file) { - Ok(mmap) => return Ok(Some(mmap)), - Err(err) => err, - }; - let code = err.raw_os_error(); - if code == Some(EOVERFLOW) - || code == Some(ENODEV) - || code == Some(ENOMEM) - { - return Ok(None); - } - Err(From::from(err)) - } -} - -fn mmap_readonly(file: &File) -> io::Result { - unsafe { Mmap::map(file) } -} diff --git a/tests/tests.rs b/tests/tests.rs index 2ddab867..1c40f22e 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -91,8 +91,8 @@ be, to a very large extent, the result of luck. Sherlock Holmes sherlock!(dir, "Sherlock", ".", |wd: WorkDir, mut cmd| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -148,19 +148,19 @@ sherlock!(with_heading_default, "Sherlock", ".", cmd.arg("-j1").arg("--heading"); let lines: String = wd.stdout(&mut cmd); let expected1 = "\ -foo +./foo Sherlock Holmes lives on Baker Street. -sherlock +./sherlock For the Doctor Watsons of this world, as opposed to the Sherlock be, to a very large extent, the result of luck. Sherlock Holmes "; let expected2 = "\ -sherlock +./sherlock For the Doctor Watsons of this world, as opposed to the Sherlock be, to a very large extent, the result of luck. Sherlock Holmes -foo +./foo Sherlock Holmes lives on Baker Street. "; if lines != expected1 { @@ -289,14 +289,14 @@ sherlock!(file_types, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.rs", "Sherlock"); cmd.arg("-t").arg("rust"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.rs:Sherlock\n"); + assert_eq!(lines, "./file.rs:Sherlock\n"); }); sherlock!(file_types_all, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.py", "Sherlock"); cmd.arg("-t").arg("all"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py:Sherlock\n"); + assert_eq!(lines, "./file.py:Sherlock\n"); }); sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { @@ -305,7 +305,7 @@ sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.rs", "Sherlock"); cmd.arg("-T").arg("rust"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py:Sherlock\n"); + assert_eq!(lines, "./file.py:Sherlock\n"); }); sherlock!(file_types_negate_all, "Sherlock", ".", @@ -315,8 +315,8 @@ sherlock!(file_types_negate_all, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); assert_eq!(lines, "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "); }); @@ -333,18 +333,21 @@ sherlock!(file_type_add, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.wat", "Sherlock"); cmd.arg("--type-add").arg("wat:*.wat").arg("-t").arg("wat"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.wat:Sherlock\n"); + assert_eq!(lines, "./file.wat:Sherlock\n"); }); -sherlock!(file_type_add_compose, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { +sherlock!(file_type_add_compose, "Sherlock", ".", +|wd: WorkDir, mut cmd: Command| { wd.create("file.py", "Sherlock"); wd.create("file.rs", "Sherlock"); wd.create("file.wat", "Sherlock"); cmd.arg("--type-add").arg("wat:*.wat"); cmd.arg("--type-add").arg("combo:include:wat,py").arg("-t").arg("combo"); let lines: String = wd.stdout(&mut cmd); - println!("{}", lines); - assert_eq!(sort_lines(&lines), "file.py:Sherlock\nfile.wat:Sherlock\n"); + assert_eq!( + sort_lines(&lines), + "./file.py:Sherlock\n./file.wat:Sherlock\n" + ); }); sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { @@ -352,7 +355,7 @@ sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.rs", "Sherlock"); cmd.arg("-g").arg("*.rs"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.rs:Sherlock\n"); + assert_eq!(lines, "./file.rs:Sherlock\n"); }); sherlock!(glob_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { @@ -361,14 +364,14 @@ sherlock!(glob_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.rs", "Sherlock"); cmd.arg("-g").arg("!*.rs"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py:Sherlock\n"); + assert_eq!(lines, "./file.py:Sherlock\n"); }); sherlock!(iglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.HTML", "Sherlock"); cmd.arg("--iglob").arg("*.html"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.HTML:Sherlock\n"); + assert_eq!(lines, "./file.HTML:Sherlock\n"); }); sherlock!(csglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { @@ -376,15 +379,16 @@ sherlock!(csglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file2.html", "Sherlock"); cmd.arg("--glob").arg("*.html"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file2.html:Sherlock\n"); + assert_eq!(lines, "./file2.html:Sherlock\n"); }); -sherlock!(byte_offset_only_matching, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { +sherlock!(byte_offset_only_matching, "Sherlock", ".", +|wd: WorkDir, mut cmd: Command| { cmd.arg("-b").arg("-o"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:56:Sherlock -sherlock:177:Sherlock +./sherlock:56:Sherlock +./sherlock:177:Sherlock "; assert_eq!(lines, expected); }); @@ -392,35 +396,35 @@ sherlock:177:Sherlock sherlock!(count, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--count"); let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock:2\n"; + let expected = "./sherlock:2\n"; assert_eq!(lines, expected); }); sherlock!(count_matches, "the", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--count-matches"); let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock:4\n"; + let expected = "./sherlock:4\n"; assert_eq!(lines, expected); }); sherlock!(count_matches_inverted, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--count-matches").arg("--invert-match"); let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock:4\n"; + let expected = "./sherlock:4\n"; assert_eq!(lines, expected); }); sherlock!(count_matches_via_only, "the", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--count").arg("--only-matching"); let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock:4\n"; + let expected = "./sherlock:4\n"; assert_eq!(lines, expected); }); sherlock!(files_with_matches, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--files-with-matches"); let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock\n"; + let expected = "./sherlock\n"; assert_eq!(lines, expected); }); @@ -429,7 +433,7 @@ sherlock!(files_without_matches, "Sherlock", ".", wd.create("file.py", "foo"); cmd.arg("--files-without-match"); let lines: String = wd.stdout(&mut cmd); - let expected = "file.py\n"; + let expected = "./file.py\n"; assert_eq!(lines, expected); }); @@ -527,7 +531,7 @@ sherlock!(max_filesize_parse_no_suffix, "Sherlock", ".", cmd.arg("--max-filesize").arg("50").arg("--files"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -foo +./foo "; assert_eq!(lines, expected); }); @@ -541,7 +545,7 @@ sherlock!(max_filesize_parse_k_suffix, "Sherlock", ".", cmd.arg("--max-filesize").arg("4K").arg("--files"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -foo +./foo "; assert_eq!(lines, expected); }); @@ -555,7 +559,7 @@ sherlock!(max_filesize_parse_m_suffix, "Sherlock", ".", cmd.arg("--max-filesize").arg("1M").arg("--files"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -foo +./foo "; assert_eq!(lines, expected); }); @@ -583,8 +587,8 @@ sherlock!(no_ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--hidden"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -610,8 +614,8 @@ sherlock!(no_ignore, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--no-ignore"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -653,8 +657,8 @@ sherlock!(ignore_git_parent_stop, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -686,8 +690,8 @@ sherlock!(ignore_git_parent_stop_file, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -740,8 +744,8 @@ sherlock!(no_parent_ignore_git, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -771,8 +775,8 @@ sherlock!(symlink_follow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, path(expected)); }); @@ -783,8 +787,8 @@ sherlock!(unrestricted1, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -796,8 +800,8 @@ sherlock!(unrestricted2, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -807,7 +811,7 @@ sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-uuu"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file:foo\x00bar\nfile:foo\x00baz\n"); + assert_eq!(lines, "./file:foo\x00bar\n./file:foo\x00baz\n"); }); sherlock!(vimgrep, "Sherlock|Watson", ".", |wd: WorkDir, mut cmd: Command| { @@ -815,10 +819,10 @@ sherlock!(vimgrep, "Sherlock|Watson", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes -sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted, +./sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq!(lines, expected); }); @@ -829,10 +833,10 @@ sherlock!(vimgrep_no_line, "Sherlock|Watson", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:16:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:57:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:49:be, to a very large extent, the result of luck. Sherlock Holmes -sherlock:12:but Doctor Watson has to have it taken out for him and dusted, +./sherlock:16:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:57:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:49:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:12:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq!(lines, expected); }); @@ -843,10 +847,10 @@ sherlock!(vimgrep_no_line_no_column, "Sherlock|Watson", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -sherlock:but Doctor Watson has to have it taken out for him and dusted, +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq!(lines, expected); }); @@ -869,12 +873,12 @@ clean!(regression_25, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create("src/llvm/foo", "test"); let lines: String = wd.stdout(&mut cmd); - let expected = path("src/llvm/foo:test\n"); + let expected = path("./src/llvm/foo:test\n"); assert_eq!(lines, expected); cmd.current_dir(wd.path().join("src")); let lines: String = wd.stdout(&mut cmd); - let expected = path("llvm/foo:test\n"); + let expected = path("./llvm/foo:test\n"); assert_eq!(lines, expected); }); @@ -885,7 +889,7 @@ clean!(regression_30, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create("vendor/manifest", "test"); let lines: String = wd.stdout(&mut cmd); - let expected = path("vendor/manifest:test\n"); + let expected = path("./vendor/manifest:test\n"); assert_eq!(lines, expected); }); @@ -927,7 +931,7 @@ clean!(regression_67, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create("dir/bar", "test"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, path("dir/bar:test\n")); + assert_eq!(lines, path("./dir/bar:test\n")); }); // See: https://github.com/BurntSushi/ripgrep/issues/87 @@ -945,7 +949,7 @@ clean!(regression_90, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create(".foo", "test"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, ".foo:test\n"); + assert_eq!(lines, "./.foo:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/93 @@ -954,7 +958,7 @@ clean!(regression_93, r"(\d{1,3}\.){3}\d{1,3}", ".", wd.create("foo", "192.168.1.1"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:192.168.1.1\n"); + assert_eq!(lines, "./foo:192.168.1.1\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/99 @@ -966,7 +970,10 @@ clean!(regression_99, "test", ".", cmd.arg("-j1").arg("--heading"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(sort_lines(&lines), sort_lines("bar\ntest\n\nfoo1\ntest\n")); + assert_eq!( + sort_lines(&lines), + sort_lines("./bar\ntest\n\n./foo1\ntest\n") + ); }); // See: https://github.com/BurntSushi/ripgrep/issues/105 @@ -975,7 +982,7 @@ clean!(regression_105_part1, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--vimgrep"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:1:3:zztest\n"); + assert_eq!(lines, "./foo:1:3:zztest\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/105 @@ -984,7 +991,7 @@ clean!(regression_105_part2, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--column"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:1:3:zztest\n"); + assert_eq!(lines, "./foo:1:3:zztest\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/127 @@ -1009,8 +1016,8 @@ clean!(regression_127, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = format!("\ -{path}:For the Doctor Watsons of this world, as opposed to the Sherlock -{path}:be, to a very large extent, the result of luck. Sherlock Holmes +./{path}:For the Doctor Watsons of this world, as opposed to the Sherlock +./{path}:be, to a very large extent, the result of luck. Sherlock Holmes ", path=path("foo/watson")); assert_eq!(lines, expected); }); @@ -1021,7 +1028,7 @@ clean!(regression_128, "x", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-n"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:5:x\n"); + assert_eq!(lines, "./foo:5:x\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/131 @@ -1049,8 +1056,8 @@ sherlock!(regression_137, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes sym1:For the Doctor Watsons of this world, as opposed to the Sherlock sym1:be, to a very large extent, the result of luck. Sherlock Holmes sym2:For the Doctor Watsons of this world, as opposed to the Sherlock @@ -1094,11 +1101,11 @@ clean!(regression_184, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create("foo/bar/baz", "test"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, format!("{}:test\n", path("foo/bar/baz"))); + assert_eq!(lines, format!("./{}:test\n", path("foo/bar/baz"))); cmd.current_dir(wd.path().join("./foo/bar")); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "baz:test\n"); + assert_eq!(lines, "./baz:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/199 @@ -1107,7 +1114,7 @@ clean!(regression_199, r"\btest\b", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--smart-case"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:tEsT\n"); + assert_eq!(lines, "./foo:tEsT\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/206 @@ -1117,7 +1124,7 @@ clean!(regression_206, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-g").arg("*.txt"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, format!("{}:test\n", path("foo/bar.txt"))); + assert_eq!(lines, format!("./{}:test\n", path("foo/bar.txt"))); }); // See: https://github.com/BurntSushi/ripgrep/issues/210 @@ -1161,7 +1168,7 @@ clean!(regression_251, "привет", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-i"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:привет\nfoo:Привет\nfoo:ПрИвЕт\n"); + assert_eq!(lines, "./foo:привет\n./foo:Привет\n./foo:ПрИвЕт\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/256 @@ -1205,7 +1212,7 @@ clean!(regression_405, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-g").arg("!/foo/**"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, format!("{}:test\n", path("bar/foo/file2.txt"))); + assert_eq!(lines, format!("./{}:test\n", path("bar/foo/file2.txt"))); }); // See: https://github.com/BurntSushi/ripgrep/issues/428 @@ -1220,7 +1227,7 @@ clean!(regression_428_color_context_path, "foo", ".", let expected = format!( "{colored_path}:foo\n{colored_path}-bar\n", colored_path=format!( - "\x1b\x5b\x30\x6d\x1b\x5b\x33\x35\x6d{path}\x1b\x5b\x30\x6d", + "\x1b\x5b\x30\x6d\x1b\x5b\x33\x35\x6d./{path}\x1b\x5b\x30\x6d", path=path("sherlock"))); assert_eq!(lines, expected); }); @@ -1234,16 +1241,17 @@ clean!(regression_428_unrecognized_style, "Sherlok", ".", let output = cmd.output().unwrap(); let err = String::from_utf8_lossy(&output.stderr); let expected = "\ -Unrecognized style attribute ''. Choose from: nobold, bold, nointense, intense, \ +unrecognized style attribute ''. Choose from: nobold, bold, nointense, intense, \ nounderline, underline. "; assert_eq!(err, expected); }); // See: https://github.com/BurntSushi/ripgrep/issues/493 -clean!(regression_493, " 're ", "input.txt", |wd: WorkDir, mut cmd: Command| { +clean!(regression_493, r"\b 're \b", "input.txt", +|wd: WorkDir, mut cmd: Command| { wd.create("input.txt", "peshwaship 're seminomata"); - cmd.arg("-o").arg("-w"); + cmd.arg("-o"); let lines: String = wd.stdout(&mut cmd); assert_eq!(lines, " 're \n"); @@ -1255,8 +1263,8 @@ sherlock!(regression_553_switch, "sherlock", ".", cmd.arg("-i"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); @@ -1264,8 +1272,8 @@ sherlock:be, to a very large extent, the result of luck. Sherlock Holmes cmd.arg("-i"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -1305,12 +1313,9 @@ clean!(regression_599, "^$", "input.txt", |wd: WorkDir, mut cmd: Command| { ]); let lines: String = wd.stdout(&mut cmd); - // Technically, the expected output should only be two lines, but: - // https://github.com/BurntSushi/ripgrep/issues/441 let expected = "\ 1: 2: -4: "; assert_eq!(expected, lines); }); @@ -1326,7 +1331,7 @@ clean!(regression_807, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--hidden"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, format!("{}:test\n", path(".a/c/file"))); + assert_eq!(lines, format!("./{}:test\n", path(".a/c/file"))); }); // See: https://github.com/BurntSushi/ripgrep/issues/900 @@ -1343,7 +1348,7 @@ clean!(feature_1_sjis, "Шерлок Холмс", ".", |wd: WorkDir, mut cmd: Co cmd.arg("-Esjis"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); + assert_eq!(lines, "./foo:Шерлок Холмс\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/1 @@ -1354,7 +1359,7 @@ clean!(feature_1_utf16_auto, "Шерлок Холмс", ".", wd.create_bytes("foo", &sherlock[..]); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); + assert_eq!(lines, "./foo:Шерлок Холмс\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/1 @@ -1366,7 +1371,7 @@ clean!(feature_1_utf16_explicit, "Шерлок Холмс", ".", cmd.arg("-Eutf-16le"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); + assert_eq!(lines, "./foo:Шерлок Холмс\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/1 @@ -1378,7 +1383,7 @@ clean!(feature_1_eucjp, "Шерлок Холмс", ".", cmd.arg("-Eeuc-jp"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); + assert_eq!(lines, "./foo:Шерлок Холмс\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/1 @@ -1413,8 +1418,8 @@ sherlock!(feature_7_dash, "-f-", ".", |wd: WorkDir, mut cmd: Command| { let output = wd.pipe(&mut cmd, "Sherlock"); let lines = String::from_utf8_lossy(&output.stdout); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -1439,8 +1444,8 @@ sherlock!(feature_34_only_matching, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:Sherlock -sherlock:Sherlock +./sherlock:Sherlock +./sherlock:Sherlock "; assert_eq!(lines, expected); }); @@ -1452,8 +1457,8 @@ sherlock!(feature_34_only_matching_line_column, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:1:57:Sherlock -sherlock:3:49:Sherlock +./sherlock:1:57:Sherlock +./sherlock:3:49:Sherlock "; assert_eq!(lines, expected); }); @@ -1476,15 +1481,15 @@ sherlock!(feature_45_relative_cwd, "test", ".", // First, get a baseline without applying ignore rules. let lines = paths_from_stdout(wd.stdout(&mut cmd)); assert_eq!(lines, paths(&[ - "bar/test", "baz/bar/test", "baz/baz/bar/test", "baz/foo", - "baz/test", "foo", "test", + "./bar/test", "./baz/bar/test", "./baz/baz/bar/test", "./baz/foo", + "./baz/test", "./foo", "./test", ])); // Now try again with the ignore file activated. cmd.arg("--ignore-file").arg(".not-an-ignore"); let lines = paths_from_stdout(wd.stdout(&mut cmd)); assert_eq!(lines, paths(&[ - "baz/bar/test", "baz/baz/bar/test", "baz/test", "test", + "./baz/bar/test", "./baz/baz/bar/test", "./baz/test", "./test", ])); // Now do it again, but inside the baz directory. @@ -1496,7 +1501,7 @@ sherlock!(feature_45_relative_cwd, "test", ".", cmd.arg("test").arg(".").arg("--ignore-file").arg("../.not-an-ignore"); cmd.current_dir(wd.path().join("baz")); let lines = paths_from_stdout(wd.stdout(&mut cmd)); - assert_eq!(lines, paths(&["baz/bar/test", "test"])); + assert_eq!(lines, paths(&["./baz/bar/test", "./test"])); }); // See: https://github.com/BurntSushi/ripgrep/issues/45 @@ -1509,7 +1514,7 @@ sherlock!(feature_45_precedence_with_others, "test", ".", cmd.arg("--ignore-file").arg(".not-an-ignore"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "imp.log:test\n"); + assert_eq!(lines, "./imp.log:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/45 @@ -1523,7 +1528,7 @@ sherlock!(feature_45_precedence_internal, "test", ".", cmd.arg("--ignore-file").arg(".not-an-ignore1"); cmd.arg("--ignore-file").arg(".not-an-ignore2"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "imp.log:test\n"); + assert_eq!(lines, "./imp.log:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/68 @@ -1535,7 +1540,7 @@ clean!(feature_68_no_ignore_vcs, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--no-ignore-vcs"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:test\n"); + assert_eq!(lines, "./foo:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/70 @@ -1545,8 +1550,8 @@ sherlock!(feature_70_smart_case, "sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -1557,7 +1562,7 @@ sherlock!(feature_89_files_with_matches, "Sherlock", ".", cmd.arg("--null").arg("--files-with-matches"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x00"); + assert_eq!(lines, "./sherlock\x00"); }); // See: https://github.com/BurntSushi/ripgrep/issues/89 @@ -1567,7 +1572,7 @@ sherlock!(feature_89_files_without_matches, "Sherlock", ".", cmd.arg("--null").arg("--files-without-match"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py\x00"); + assert_eq!(lines, "./file.py\x00"); }); // See: https://github.com/BurntSushi/ripgrep/issues/89 @@ -1576,7 +1581,7 @@ sherlock!(feature_89_count, "Sherlock", ".", cmd.arg("--null").arg("--count"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x002\n"); + assert_eq!(lines, "./sherlock\x002\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/89 @@ -1585,7 +1590,7 @@ sherlock!(feature_89_files, "NADA", ".", cmd.arg("--null").arg("--files"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x00"); + assert_eq!(lines, "./sherlock\x00"); }); // See: https://github.com/BurntSushi/ripgrep/issues/89 @@ -1595,10 +1600,10 @@ sherlock!(feature_89_match, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock\x00For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock\x00Holmeses, success in the province of detective work must always -sherlock\x00be, to a very large extent, the result of luck. Sherlock Holmes -sherlock\x00can extract a clew from a wisp of straw or a flake of cigar ash; +./sherlock\x00For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock\x00Holmeses, success in the province of detective work must always +./sherlock\x00be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock\x00can extract a clew from a wisp of straw or a flake of cigar ash; "; assert_eq!(lines, expected); }); @@ -1613,7 +1618,7 @@ clean!(feature_109_max_depth, "far", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--maxdepth").arg("2"); let lines: String = wd.stdout(&mut cmd); - let expected = path("one/pass:far\n"); + let expected = path("./one/pass:far\n"); assert_eq!(lines, expected); }); @@ -1639,7 +1644,7 @@ clean!(feature_129_matches, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-M26"); let lines: String = wd.stdout(&mut cmd); - let expected = "foo:test\nfoo:[Omitted long line with 2 matches]\n"; + let expected = "./foo:test\n./foo:[Omitted long matching line]\n"; assert_eq!(lines, expected); }); @@ -1649,7 +1654,7 @@ clean!(feature_129_context, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-M20").arg("-C1"); let lines: String = wd.stdout(&mut cmd); - let expected = "foo:test\nfoo-[Omitted long context line]\n"; + let expected = "./foo:test\n./foo-[Omitted long context line]\n"; assert_eq!(lines, expected); }); @@ -1659,7 +1664,7 @@ clean!(feature_129_replace, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-M26").arg("-rfoo"); let lines: String = wd.stdout(&mut cmd); - let expected = "foo:foo\nfoo:[Omitted long line with 2 replacements]\n"; + let expected = "./foo:foo\n./foo:[Omitted long line with 2 matches]\n"; assert_eq!(lines, expected); }); @@ -1668,7 +1673,7 @@ clean!(feature_159_works, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create("foo", "test\ntest"); cmd.arg("-m1"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:test\n"); + assert_eq!(lines, "./foo:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/159 @@ -1684,7 +1689,7 @@ clean!(feature_243_column_line, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--column"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:1:1:test\n"); + assert_eq!(lines, "./foo:1:1:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/263 @@ -1696,7 +1701,7 @@ clean!(feature_263_sort_files, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--sort-files"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "abc:test\nbar:test\nfoo:test\nzoo:test\n"); + assert_eq!(lines, "./abc:test\n./bar:test\n./foo:test\n./zoo:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/275 @@ -1706,7 +1711,7 @@ clean!(feature_275_pathsep, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--path-separator").arg("Z"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "fooZbar:test\n"); + assert_eq!(lines, ".ZfooZbar:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/362 @@ -1746,7 +1751,7 @@ sherlock!(feature_419_zero_as_shortcut_for_null, "Sherlock", ".", cmd.arg("-0").arg("--count"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x002\n"); + assert_eq!(lines, "./sherlock\x002\n"); }); #[test] @@ -1932,59 +1937,52 @@ fn feature_411_parallel_search_stats() { assert_eq!(lines.contains("seconds"), true); } -sherlock!(feature_411_ignore_stats_1, |wd: WorkDir, mut cmd: Command| { - cmd.arg("--files-with-matches"); - cmd.arg("--stats"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines.contains("seconds"), false); -}); - -sherlock!(feature_411_ignore_stats_2, |wd: WorkDir, mut cmd: Command| { - cmd.arg("--files-without-match"); - cmd.arg("--stats"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines.contains("seconds"), false); -}); - #[test] fn feature_740_passthru() { let wd = WorkDir::new("feature_740"); wd.create("file", "\nfoo\nbar\nfoobar\n\nbaz\n"); - wd.create("patterns", "foo\n\nbar\n"); + wd.create("patterns", "foo\nbar\n"); // We can't assume that the way colour specs are translated to ANSI // sequences will remain stable, and --replace doesn't currently work with // pass-through, so for now we don't actually test the match sub-strings let common_args = &["-n", "--passthru"]; - let expected = "\ -1: + let foo_expected = "\ +1- 2:foo -3:bar +3-bar 4:foobar -5: -6:baz +5- +6-baz "; // With single pattern let mut cmd = wd.command(); cmd.args(common_args).arg("foo").arg("file"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, expected); + assert_eq!(lines, foo_expected); + + let foo_bar_expected = "\ +1- +2:foo +3:bar +4:foobar +5- +6-baz +"; // With multiple -e patterns let mut cmd = wd.command(); cmd.args(common_args) .arg("-e").arg("foo").arg("-e").arg("bar").arg("file"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, expected); + assert_eq!(lines, foo_bar_expected); // With multiple -f patterns let mut cmd = wd.command(); cmd.args(common_args).arg("-f").arg("patterns").arg("file"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, expected); + assert_eq!(lines, foo_bar_expected); // -c should override let mut cmd = wd.command(); @@ -1992,15 +1990,35 @@ fn feature_740_passthru() { let lines: String = wd.stdout(&mut cmd); assert_eq!(lines, "2\n"); + let only_foo_expected = "\ +1- +2:foo +3-bar +4:foo +5- +6-baz +"; + // -o should conflict let mut cmd = wd.command(); cmd.args(common_args).arg("-o").arg("foo").arg("file"); - wd.assert_err(&mut cmd); + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines, only_foo_expected); + + let replace_foo_expected = "\ +1- +2:wat +3-bar +4:watbar +5- +6-baz +"; // -r should conflict let mut cmd = wd.command(); - cmd.args(common_args).arg("-r").arg("$0").arg("foo").arg("file"); - wd.assert_err(&mut cmd); + cmd.args(common_args).arg("-r").arg("wat").arg("foo").arg("file"); + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines, replace_foo_expected); } #[test] @@ -2081,7 +2099,7 @@ fn regression_270() { let mut cmd = wd.command(); cmd.arg("-e").arg("-test").arg("./"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, path("foo:-test\n")); + assert_eq!(lines, path("./foo:-test\n")); } // See: https://github.com/BurntSushi/ripgrep/issues/391 @@ -2232,8 +2250,8 @@ fn regression_693_context_option_in_contextless_mode() { let lines: String = wd.stdout(&mut cmd); let expected = "\ -bar:1 -foo:1 +./bar:1 +./foo:1 "; assert_eq!(lines, expected); }