2016-11-10 00:19:40 +02:00
|
|
|
use std::cmp;
|
2016-09-05 06:52:23 +02:00
|
|
|
use std::env;
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
use std::ffi::OsStr;
|
2016-11-09 13:07:53 +02:00
|
|
|
use std::fs;
|
|
|
|
use std::io::{self, BufRead};
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
use std::ops;
|
2016-09-05 06:52:23 +02:00
|
|
|
use std::path::{Path, PathBuf};
|
2016-11-06 19:21:36 +02:00
|
|
|
use std::process;
|
2016-12-24 19:53:09 +02:00
|
|
|
use std::sync::Arc;
|
|
|
|
use std::sync::atomic::{AtomicBool, Ordering};
|
2016-09-05 06:52:23 +02:00
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
use clap;
|
Add support for additional text encodings.
This includes, but is not limited to, UTF-16, latin-1, GBK, EUC-JP and
Shift_JIS. (Courtesy of the `encoding_rs` crate.)
Specifically, this feature enables ripgrep to search files that are
encoded in an encoding other than UTF-8. The list of available encodings
is tied directly to what the `encoding_rs` crate supports, which is in
turn tied to the Encoding Standard. The full list of available encodings
can be found here: https://encoding.spec.whatwg.org/#concept-encoding-get
This pull request also introduces the notion that text encodings can be
automatically detected on a best effort basis. Currently, the only
support for this is checking for a UTF-16 bom. In all other cases, a
text encoding of `auto` (the default) implies a UTF-8 or ASCII
compatible source encoding. When a text encoding is otherwise specified,
it is unconditionally used for all files searched.
Since ripgrep's regex engine is fundamentally built on top of UTF-8,
this feature works by transcoding the files to be searched from their
source encoding to UTF-8. This transcoding only happens when:
1. `auto` is specified and a non-UTF-8 encoding is detected.
2. A specific encoding is given by end users (including UTF-8).
When transcoding occurs, errors are handled by automatically inserting
the Unicode replacement character. In this case, ripgrep's output is
guaranteed to be valid UTF-8 (excluding non-UTF-8 file paths, if they
are printed).
In all other cases, the source text is searched directly, which implies
an assumption that it is at least ASCII compatible, but where UTF-8 is
most useful. In this scenario, encoding errors are not detected. In this
case, ripgrep's output will match the input exactly, byte-for-byte.
This design may not be optimal in all cases, but it has some advantages:
1. In the happy path ("UTF-8 everywhere") remains happy. I have not been
able to witness any performance regressions.
2. In the non-UTF-8 path, implementation complexity is kept relatively
low. The cost here is transcoding itself. A potentially superior
implementation might build decoding of any encoding into the regex
engine itself. In particular, the fundamental problem with
transcoding everything first is that literal optimizations are nearly
negated.
Future work should entail improving the user experience. For example, we
might want to auto-detect more text encodings. A more elaborate UX
experience might permit end users to specify multiple text encodings,
although this seems hard to pull off in an ergonomic way.
Fixes #1
2017-03-09 03:22:48 +02:00
|
|
|
use encoding_rs::Encoding;
|
2016-09-05 06:52:23 +02:00
|
|
|
use env_logger;
|
|
|
|
use grep::{Grep, GrepBuilder};
|
|
|
|
use log;
|
|
|
|
use num_cpus;
|
|
|
|
use regex;
|
Don't search stdout redirected file.
When running ripgrep like this:
rg foo > output
we must be careful not to search `output` since ripgrep is actively writing
to it. Searching it can cause massive blowups where the file grows without
bound.
While this is conceptually easy to fix (check the inode of the redirection
and the inode of the file you're about to search), there are a few problems
with it.
First, inodes are a Unix thing, so we need a Windows specific solution to
this as well. To resolve this concern, I created a new crate, `same-file`,
which provides a cross platform abstraction.
Second, stat'ing every file is costly. This is not avoidable on Windows,
but on Unix, we can get the inode number directly from directory traversal.
However, this information wasn't exposed, but now it is (through both the
ignore and walkdir crates).
Fixes #286
2017-01-08 17:27:30 +02:00
|
|
|
use same_file;
|
Completely re-work colored output and tty handling.
This commit completely guts all of the color handling code and replaces
most of it with two new crates: wincolor and termcolor. wincolor
provides a simple API to coloring using the Windows console and
termcolor provides a platform independent coloring API tuned for
multithreaded command line programs. This required a lot more
flexibility than what the `term` crate provided, so it was dropped.
We instead switch to writing ANSI escape sequences directly and ignore
the TERMINFO database.
In addition to fixing several bugs, this commit also permits end users
to customize colors to a certain extent. For example, this command will
set the match color to magenta and the line number background to yellow:
rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo
For tty handling, we've adopted a hack from `git` to do tty detection in
MSYS/mintty terminals. As a result, ripgrep should get both color
detection and piping correct on Windows regardless of which terminal you
use.
Finally, switch to line buffering. Performance doesn't seem to be
impacted and it's an otherwise more user friendly option.
Fixes #37, Fixes #51, Fixes #94, Fixes #117, Fixes #182, Fixes #231
2016-11-20 18:14:52 +02:00
|
|
|
use termcolor;
|
2016-09-05 06:52:23 +02:00
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
use app;
|
Completely re-work colored output and tty handling.
This commit completely guts all of the color handling code and replaces
most of it with two new crates: wincolor and termcolor. wincolor
provides a simple API to coloring using the Windows console and
termcolor provides a platform independent coloring API tuned for
multithreaded command line programs. This required a lot more
flexibility than what the `term` crate provided, so it was dropped.
We instead switch to writing ANSI escape sequences directly and ignore
the TERMINFO database.
In addition to fixing several bugs, this commit also permits end users
to customize colors to a certain extent. For example, this command will
set the match color to magenta and the line number background to yellow:
rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo
For tty handling, we've adopted a hack from `git` to do tty detection in
MSYS/mintty terminals. As a result, ripgrep should get both color
detection and piping correct on Windows regardless of which terminal you
use.
Finally, switch to line buffering. Performance doesn't seem to be
impacted and it's an otherwise more user friendly option.
Fixes #37, Fixes #51, Fixes #94, Fixes #117, Fixes #182, Fixes #231
2016-11-20 18:14:52 +02:00
|
|
|
use atty;
|
2016-10-12 01:57:09 +02:00
|
|
|
use ignore::overrides::{Override, OverrideBuilder};
|
|
|
|
use ignore::types::{FileTypeDef, Types, TypesBuilder};
|
|
|
|
use ignore;
|
Completely re-work colored output and tty handling.
This commit completely guts all of the color handling code and replaces
most of it with two new crates: wincolor and termcolor. wincolor
provides a simple API to coloring using the Windows console and
termcolor provides a platform independent coloring API tuned for
multithreaded command line programs. This required a lot more
flexibility than what the `term` crate provided, so it was dropped.
We instead switch to writing ANSI escape sequences directly and ignore
the TERMINFO database.
In addition to fixing several bugs, this commit also permits end users
to customize colors to a certain extent. For example, this command will
set the match color to magenta and the line number background to yellow:
rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo
For tty handling, we've adopted a hack from `git` to do tty detection in
MSYS/mintty terminals. As a result, ripgrep should get both color
detection and piping correct on Windows regardless of which terminal you
use.
Finally, switch to line buffering. Performance doesn't seem to be
impacted and it's an otherwise more user friendly option.
Fixes #37, Fixes #51, Fixes #94, Fixes #117, Fixes #182, Fixes #231
2016-11-20 18:14:52 +02:00
|
|
|
use printer::{ColorSpecs, Printer};
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
use unescape::unescape;
|
2016-11-06 03:44:15 +02:00
|
|
|
use worker::{Worker, WorkerBuilder};
|
2016-09-05 06:52:23 +02:00
|
|
|
|
2016-12-24 17:06:37 +02:00
|
|
|
use Result;
|
2016-09-05 06:52:23 +02:00
|
|
|
|
2016-12-23 21:53:35 +02:00
|
|
|
/// `Args` are transformed/normalized from `ArgMatches`.
|
2016-09-05 06:52:23 +02:00
|
|
|
#[derive(Debug)]
|
|
|
|
pub struct Args {
|
|
|
|
paths: Vec<PathBuf>,
|
|
|
|
after_context: usize,
|
|
|
|
before_context: usize,
|
2016-09-05 23:36:41 +02:00
|
|
|
color: bool,
|
Completely re-work colored output and tty handling.
This commit completely guts all of the color handling code and replaces
most of it with two new crates: wincolor and termcolor. wincolor
provides a simple API to coloring using the Windows console and
termcolor provides a platform independent coloring API tuned for
multithreaded command line programs. This required a lot more
flexibility than what the `term` crate provided, so it was dropped.
We instead switch to writing ANSI escape sequences directly and ignore
the TERMINFO database.
In addition to fixing several bugs, this commit also permits end users
to customize colors to a certain extent. For example, this command will
set the match color to magenta and the line number background to yellow:
rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo
For tty handling, we've adopted a hack from `git` to do tty detection in
MSYS/mintty terminals. As a result, ripgrep should get both color
detection and piping correct on Windows regardless of which terminal you
use.
Finally, switch to line buffering. Performance doesn't seem to be
impacted and it's an otherwise more user friendly option.
Fixes #37, Fixes #51, Fixes #94, Fixes #117, Fixes #182, Fixes #231
2016-11-20 18:14:52 +02:00
|
|
|
color_choice: termcolor::ColorChoice,
|
|
|
|
colors: ColorSpecs,
|
2016-09-07 01:50:27 +02:00
|
|
|
column: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
context_separator: Vec<u8>,
|
|
|
|
count: bool,
|
Add support for additional text encodings.
This includes, but is not limited to, UTF-16, latin-1, GBK, EUC-JP and
Shift_JIS. (Courtesy of the `encoding_rs` crate.)
Specifically, this feature enables ripgrep to search files that are
encoded in an encoding other than UTF-8. The list of available encodings
is tied directly to what the `encoding_rs` crate supports, which is in
turn tied to the Encoding Standard. The full list of available encodings
can be found here: https://encoding.spec.whatwg.org/#concept-encoding-get
This pull request also introduces the notion that text encodings can be
automatically detected on a best effort basis. Currently, the only
support for this is checking for a UTF-16 bom. In all other cases, a
text encoding of `auto` (the default) implies a UTF-8 or ASCII
compatible source encoding. When a text encoding is otherwise specified,
it is unconditionally used for all files searched.
Since ripgrep's regex engine is fundamentally built on top of UTF-8,
this feature works by transcoding the files to be searched from their
source encoding to UTF-8. This transcoding only happens when:
1. `auto` is specified and a non-UTF-8 encoding is detected.
2. A specific encoding is given by end users (including UTF-8).
When transcoding occurs, errors are handled by automatically inserting
the Unicode replacement character. In this case, ripgrep's output is
guaranteed to be valid UTF-8 (excluding non-UTF-8 file paths, if they
are printed).
In all other cases, the source text is searched directly, which implies
an assumption that it is at least ASCII compatible, but where UTF-8 is
most useful. In this scenario, encoding errors are not detected. In this
case, ripgrep's output will match the input exactly, byte-for-byte.
This design may not be optimal in all cases, but it has some advantages:
1. In the happy path ("UTF-8 everywhere") remains happy. I have not been
able to witness any performance regressions.
2. In the non-UTF-8 path, implementation complexity is kept relatively
low. The cost here is transcoding itself. A potentially superior
implementation might build decoding of any encoding into the regex
engine itself. In particular, the fundamental problem with
transcoding everything first is that literal optimizations are nearly
negated.
Future work should entail improving the user experience. For example, we
might want to auto-detect more text encodings. A more elaborate UX
experience might permit end users to specify multiple text encodings,
although this seems hard to pull off in an ergonomic way.
Fixes #1
2017-03-09 03:22:48 +02:00
|
|
|
encoding: Option<&'static Encoding>,
|
2016-09-24 04:06:34 +02:00
|
|
|
files_with_matches: bool,
|
2016-11-20 01:48:59 +02:00
|
|
|
files_without_matches: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
eol: u8,
|
|
|
|
files: bool,
|
|
|
|
follow: bool,
|
2016-10-12 01:57:09 +02:00
|
|
|
glob_overrides: Override,
|
2016-09-07 01:33:19 +02:00
|
|
|
grep: Grep,
|
2016-09-05 23:36:41 +02:00
|
|
|
heading: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
hidden: bool,
|
2016-10-12 01:57:09 +02:00
|
|
|
ignore_files: Vec<PathBuf>,
|
2016-09-05 06:52:23 +02:00
|
|
|
invert_match: bool,
|
|
|
|
line_number: bool,
|
2016-09-23 03:32:38 +02:00
|
|
|
line_per_match: bool,
|
2016-11-06 20:09:53 +02:00
|
|
|
max_count: Option<u64>,
|
2017-02-28 06:53:52 +02:00
|
|
|
max_filesize: Option<u64>,
|
2016-09-27 05:56:15 +02:00
|
|
|
maxdepth: Option<usize>,
|
2016-09-07 03:47:33 +02:00
|
|
|
mmap: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
no_ignore: bool,
|
2016-09-05 23:36:41 +02:00
|
|
|
no_ignore_parent: bool,
|
2016-09-25 03:31:24 +02:00
|
|
|
no_ignore_vcs: bool,
|
2016-11-06 21:36:08 +02:00
|
|
|
no_messages: bool,
|
2016-09-27 01:21:17 +02:00
|
|
|
null: bool,
|
2017-01-11 01:16:15 +02:00
|
|
|
path_separator: Option<u8>,
|
2016-09-05 06:52:23 +02:00
|
|
|
quiet: bool,
|
2016-12-24 19:53:09 +02:00
|
|
|
quiet_matched: QuietMatched,
|
2016-09-05 23:36:41 +02:00
|
|
|
replace: Option<Vec<u8>>,
|
2017-01-07 05:43:59 +02:00
|
|
|
sort_files: bool,
|
Don't search stdout redirected file.
When running ripgrep like this:
rg foo > output
we must be careful not to search `output` since ripgrep is actively writing
to it. Searching it can cause massive blowups where the file grows without
bound.
While this is conceptually easy to fix (check the inode of the redirection
and the inode of the file you're about to search), there are a few problems
with it.
First, inodes are a Unix thing, so we need a Windows specific solution to
this as well. To resolve this concern, I created a new crate, `same-file`,
which provides a cross platform abstraction.
Second, stat'ing every file is costly. This is not avoidable on Windows,
but on Unix, we can get the inode number directly from directory traversal.
However, this information wasn't exposed, but now it is (through both the
ignore and walkdir crates).
Fixes #286
2017-01-08 17:27:30 +02:00
|
|
|
stdout_handle: Option<same_file::Handle>,
|
2016-09-05 06:52:23 +02:00
|
|
|
text: bool,
|
|
|
|
threads: usize,
|
|
|
|
type_list: bool,
|
|
|
|
types: Types,
|
|
|
|
with_filename: bool,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Args {
|
|
|
|
/// Parse the command line arguments for this process.
|
|
|
|
///
|
|
|
|
/// If a CLI usage error occurred, then exit the process and print a usage
|
|
|
|
/// or error message. Similarly, if the user requested the version of
|
2016-09-08 22:15:44 +02:00
|
|
|
/// ripgrep, then print the version and exit.
|
2016-09-05 06:52:23 +02:00
|
|
|
///
|
|
|
|
/// Also, initialize a global logger.
|
|
|
|
pub fn parse() -> Result<Args> {
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
let matches = app::app_short().get_matches();
|
|
|
|
if matches.is_present("help-short") {
|
|
|
|
let _ = ::app::app_short().print_help();
|
2016-12-23 21:53:35 +02:00
|
|
|
println!("");
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
process::exit(0);
|
|
|
|
}
|
|
|
|
if matches.is_present("help") {
|
|
|
|
let _ = ::app::app_long().print_help();
|
2016-12-23 21:53:35 +02:00
|
|
|
println!("");
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
process::exit(0);
|
|
|
|
}
|
|
|
|
if matches.is_present("version") {
|
|
|
|
println!("ripgrep {}", crate_version!());
|
|
|
|
process::exit(0);
|
2016-09-11 19:27:08 +02:00
|
|
|
}
|
2016-09-05 06:52:23 +02:00
|
|
|
|
|
|
|
let mut logb = env_logger::LogBuilder::new();
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
if matches.is_present("debug") {
|
2016-09-05 06:52:23 +02:00
|
|
|
logb.filter(None, log::LogLevelFilter::Debug);
|
|
|
|
} else {
|
|
|
|
logb.filter(None, log::LogLevelFilter::Warn);
|
|
|
|
}
|
|
|
|
if let Err(err) = logb.init() {
|
|
|
|
errored!("failed to initialize logger: {}", err);
|
|
|
|
}
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
ArgMatches(matches).to_args()
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
2016-09-08 22:15:44 +02:00
|
|
|
/// Returns true if ripgrep should print the files it will search and exit
|
2016-09-05 06:52:23 +02:00
|
|
|
/// (but not do any actual searching).
|
|
|
|
pub fn files(&self) -> bool {
|
|
|
|
self.files
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a new line based matcher. The matcher returned can be used
|
|
|
|
/// across multiple threads simultaneously. This matcher only supports
|
|
|
|
/// basic searching of regular expressions in a single buffer.
|
|
|
|
///
|
|
|
|
/// The pattern and other flags are taken from the command line.
|
2016-09-07 01:33:19 +02:00
|
|
|
pub fn grep(&self) -> Grep {
|
|
|
|
self.grep.clone()
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
2016-09-29 02:50:50 +02:00
|
|
|
/// Whether ripgrep should be quiet or not.
|
|
|
|
pub fn quiet(&self) -> bool {
|
|
|
|
self.quiet
|
|
|
|
}
|
|
|
|
|
2016-12-24 19:53:09 +02:00
|
|
|
/// Returns a thread safe boolean for determining whether to quit a search
|
|
|
|
/// early when quiet mode is enabled.
|
|
|
|
///
|
|
|
|
/// If quiet mode is disabled, then QuietMatched.has_match always returns
|
|
|
|
/// false.
|
|
|
|
pub fn quiet_matched(&self) -> QuietMatched {
|
|
|
|
self.quiet_matched.clone()
|
|
|
|
}
|
|
|
|
|
2016-09-05 06:52:23 +02:00
|
|
|
/// Create a new printer of individual search results that writes to the
|
|
|
|
/// writer given.
|
Completely re-work colored output and tty handling.
This commit completely guts all of the color handling code and replaces
most of it with two new crates: wincolor and termcolor. wincolor
provides a simple API to coloring using the Windows console and
termcolor provides a platform independent coloring API tuned for
multithreaded command line programs. This required a lot more
flexibility than what the `term` crate provided, so it was dropped.
We instead switch to writing ANSI escape sequences directly and ignore
the TERMINFO database.
In addition to fixing several bugs, this commit also permits end users
to customize colors to a certain extent. For example, this command will
set the match color to magenta and the line number background to yellow:
rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo
For tty handling, we've adopted a hack from `git` to do tty detection in
MSYS/mintty terminals. As a result, ripgrep should get both color
detection and piping correct on Windows regardless of which terminal you
use.
Finally, switch to line buffering. Performance doesn't seem to be
impacted and it's an otherwise more user friendly option.
Fixes #37, Fixes #51, Fixes #94, Fixes #117, Fixes #182, Fixes #231
2016-11-20 18:14:52 +02:00
|
|
|
pub fn printer<W: termcolor::WriteColor>(&self, wtr: W) -> Printer<W> {
|
2016-09-09 03:46:14 +02:00
|
|
|
let mut p = Printer::new(wtr)
|
Completely re-work colored output and tty handling.
This commit completely guts all of the color handling code and replaces
most of it with two new crates: wincolor and termcolor. wincolor
provides a simple API to coloring using the Windows console and
termcolor provides a platform independent coloring API tuned for
multithreaded command line programs. This required a lot more
flexibility than what the `term` crate provided, so it was dropped.
We instead switch to writing ANSI escape sequences directly and ignore
the TERMINFO database.
In addition to fixing several bugs, this commit also permits end users
to customize colors to a certain extent. For example, this command will
set the match color to magenta and the line number background to yellow:
rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo
For tty handling, we've adopted a hack from `git` to do tty detection in
MSYS/mintty terminals. As a result, ripgrep should get both color
detection and piping correct on Windows regardless of which terminal you
use.
Finally, switch to line buffering. Performance doesn't seem to be
impacted and it's an otherwise more user friendly option.
Fixes #37, Fixes #51, Fixes #94, Fixes #117, Fixes #182, Fixes #231
2016-11-20 18:14:52 +02:00
|
|
|
.colors(self.colors.clone())
|
2016-09-07 01:50:27 +02:00
|
|
|
.column(self.column)
|
2016-09-05 06:52:23 +02:00
|
|
|
.context_separator(self.context_separator.clone())
|
|
|
|
.eol(self.eol)
|
2016-09-05 23:36:41 +02:00
|
|
|
.heading(self.heading)
|
2016-09-23 03:32:38 +02:00
|
|
|
.line_per_match(self.line_per_match)
|
2016-09-27 01:21:17 +02:00
|
|
|
.null(self.null)
|
2017-01-11 01:16:15 +02:00
|
|
|
.path_separator(self.path_separator)
|
2016-09-05 23:36:41 +02:00
|
|
|
.with_filename(self.with_filename);
|
|
|
|
if let Some(ref rep) = self.replace {
|
|
|
|
p = p.replace(rep.clone());
|
|
|
|
}
|
|
|
|
p
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
2016-09-26 03:27:17 +02:00
|
|
|
/// Retrieve the configured file separator.
|
|
|
|
pub fn file_separator(&self) -> Option<Vec<u8>> {
|
2016-12-24 19:53:09 +02:00
|
|
|
let use_heading_sep =
|
|
|
|
self.heading
|
|
|
|
&& !self.count
|
|
|
|
&& !self.files_with_matches
|
|
|
|
&& !self.files_without_matches;
|
|
|
|
if use_heading_sep {
|
2016-09-26 03:27:17 +02:00
|
|
|
Some(b"".to_vec())
|
2016-09-05 23:36:41 +02:00
|
|
|
} else if self.before_context > 0 || self.after_context > 0 {
|
2016-09-26 03:27:17 +02:00
|
|
|
Some(self.context_separator.clone())
|
|
|
|
} else {
|
|
|
|
None
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-11-06 20:09:53 +02:00
|
|
|
/// Returns true if the given arguments are known to never produce a match.
|
|
|
|
pub fn never_match(&self) -> bool {
|
|
|
|
self.max_count == Some(0)
|
|
|
|
}
|
|
|
|
|
Completely re-work colored output and tty handling.
This commit completely guts all of the color handling code and replaces
most of it with two new crates: wincolor and termcolor. wincolor
provides a simple API to coloring using the Windows console and
termcolor provides a platform independent coloring API tuned for
multithreaded command line programs. This required a lot more
flexibility than what the `term` crate provided, so it was dropped.
We instead switch to writing ANSI escape sequences directly and ignore
the TERMINFO database.
In addition to fixing several bugs, this commit also permits end users
to customize colors to a certain extent. For example, this command will
set the match color to magenta and the line number background to yellow:
rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo
For tty handling, we've adopted a hack from `git` to do tty detection in
MSYS/mintty terminals. As a result, ripgrep should get both color
detection and piping correct on Windows regardless of which terminal you
use.
Finally, switch to line buffering. Performance doesn't seem to be
impacted and it's an otherwise more user friendly option.
Fixes #37, Fixes #51, Fixes #94, Fixes #117, Fixes #182, Fixes #231
2016-11-20 18:14:52 +02:00
|
|
|
/// Create a new writer for single-threaded searching with color support.
|
2017-02-05 18:02:54 +02:00
|
|
|
pub fn stdout(&self) -> termcolor::StandardStream {
|
|
|
|
termcolor::StandardStream::stdout(self.color_choice)
|
2016-09-14 03:11:46 +02:00
|
|
|
}
|
|
|
|
|
Don't search stdout redirected file.
When running ripgrep like this:
rg foo > output
we must be careful not to search `output` since ripgrep is actively writing
to it. Searching it can cause massive blowups where the file grows without
bound.
While this is conceptually easy to fix (check the inode of the redirection
and the inode of the file you're about to search), there are a few problems
with it.
First, inodes are a Unix thing, so we need a Windows specific solution to
this as well. To resolve this concern, I created a new crate, `same-file`,
which provides a cross platform abstraction.
Second, stat'ing every file is costly. This is not avoidable on Windows,
but on Unix, we can get the inode number directly from directory traversal.
However, this information wasn't exposed, but now it is (through both the
ignore and walkdir crates).
Fixes #286
2017-01-08 17:27:30 +02:00
|
|
|
/// Returns a handle to stdout for filtering search.
|
|
|
|
///
|
|
|
|
/// A handle is returned if and only if ripgrep's stdout is being
|
|
|
|
/// redirected to a file. The handle returned corresponds to that file.
|
|
|
|
///
|
|
|
|
/// This can be used to ensure that we do not attempt to search a file
|
|
|
|
/// that ripgrep is writing to.
|
|
|
|
pub fn stdout_handle(&self) -> Option<&same_file::Handle> {
|
|
|
|
self.stdout_handle.as_ref()
|
|
|
|
}
|
|
|
|
|
Completely re-work colored output and tty handling.
This commit completely guts all of the color handling code and replaces
most of it with two new crates: wincolor and termcolor. wincolor
provides a simple API to coloring using the Windows console and
termcolor provides a platform independent coloring API tuned for
multithreaded command line programs. This required a lot more
flexibility than what the `term` crate provided, so it was dropped.
We instead switch to writing ANSI escape sequences directly and ignore
the TERMINFO database.
In addition to fixing several bugs, this commit also permits end users
to customize colors to a certain extent. For example, this command will
set the match color to magenta and the line number background to yellow:
rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo
For tty handling, we've adopted a hack from `git` to do tty detection in
MSYS/mintty terminals. As a result, ripgrep should get both color
detection and piping correct on Windows regardless of which terminal you
use.
Finally, switch to line buffering. Performance doesn't seem to be
impacted and it's an otherwise more user friendly option.
Fixes #37, Fixes #51, Fixes #94, Fixes #117, Fixes #182, Fixes #231
2016-11-20 18:14:52 +02:00
|
|
|
/// Create a new buffer writer for multi-threaded searching with color
|
|
|
|
/// support.
|
|
|
|
pub fn buffer_writer(&self) -> termcolor::BufferWriter {
|
|
|
|
let mut wtr = termcolor::BufferWriter::stdout(self.color_choice);
|
|
|
|
wtr.separator(self.file_separator());
|
|
|
|
wtr
|
2016-09-09 03:46:14 +02:00
|
|
|
}
|
|
|
|
|
2016-09-05 06:52:23 +02:00
|
|
|
/// Return the paths that should be searched.
|
|
|
|
pub fn paths(&self) -> &[PathBuf] {
|
|
|
|
&self.paths
|
|
|
|
}
|
|
|
|
|
2016-11-06 03:44:15 +02:00
|
|
|
/// Returns true if there is exactly one file path given to search.
|
|
|
|
pub fn is_one_path(&self) -> bool {
|
|
|
|
self.paths.len() == 1
|
|
|
|
&& (self.paths[0] == Path::new("-") || self.paths[0].is_file())
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
2016-11-06 03:44:15 +02:00
|
|
|
/// Create a worker whose configuration is taken from the
|
|
|
|
/// command line.
|
|
|
|
pub fn worker(&self) -> Worker {
|
|
|
|
WorkerBuilder::new(self.grep())
|
|
|
|
.after_context(self.after_context)
|
|
|
|
.before_context(self.before_context)
|
2016-09-07 03:47:33 +02:00
|
|
|
.count(self.count)
|
Add support for additional text encodings.
This includes, but is not limited to, UTF-16, latin-1, GBK, EUC-JP and
Shift_JIS. (Courtesy of the `encoding_rs` crate.)
Specifically, this feature enables ripgrep to search files that are
encoded in an encoding other than UTF-8. The list of available encodings
is tied directly to what the `encoding_rs` crate supports, which is in
turn tied to the Encoding Standard. The full list of available encodings
can be found here: https://encoding.spec.whatwg.org/#concept-encoding-get
This pull request also introduces the notion that text encodings can be
automatically detected on a best effort basis. Currently, the only
support for this is checking for a UTF-16 bom. In all other cases, a
text encoding of `auto` (the default) implies a UTF-8 or ASCII
compatible source encoding. When a text encoding is otherwise specified,
it is unconditionally used for all files searched.
Since ripgrep's regex engine is fundamentally built on top of UTF-8,
this feature works by transcoding the files to be searched from their
source encoding to UTF-8. This transcoding only happens when:
1. `auto` is specified and a non-UTF-8 encoding is detected.
2. A specific encoding is given by end users (including UTF-8).
When transcoding occurs, errors are handled by automatically inserting
the Unicode replacement character. In this case, ripgrep's output is
guaranteed to be valid UTF-8 (excluding non-UTF-8 file paths, if they
are printed).
In all other cases, the source text is searched directly, which implies
an assumption that it is at least ASCII compatible, but where UTF-8 is
most useful. In this scenario, encoding errors are not detected. In this
case, ripgrep's output will match the input exactly, byte-for-byte.
This design may not be optimal in all cases, but it has some advantages:
1. In the happy path ("UTF-8 everywhere") remains happy. I have not been
able to witness any performance regressions.
2. In the non-UTF-8 path, implementation complexity is kept relatively
low. The cost here is transcoding itself. A potentially superior
implementation might build decoding of any encoding into the regex
engine itself. In particular, the fundamental problem with
transcoding everything first is that literal optimizations are nearly
negated.
Future work should entail improving the user experience. For example, we
might want to auto-detect more text encodings. A more elaborate UX
experience might permit end users to specify multiple text encodings,
although this seems hard to pull off in an ergonomic way.
Fixes #1
2017-03-09 03:22:48 +02:00
|
|
|
.encoding(self.encoding)
|
2016-09-24 04:06:34 +02:00
|
|
|
.files_with_matches(self.files_with_matches)
|
2016-11-20 01:48:59 +02:00
|
|
|
.files_without_matches(self.files_without_matches)
|
2016-09-07 03:47:33 +02:00
|
|
|
.eol(self.eol)
|
|
|
|
.line_number(self.line_number)
|
|
|
|
.invert_match(self.invert_match)
|
2016-11-06 20:09:53 +02:00
|
|
|
.max_count(self.max_count)
|
2016-11-06 03:44:15 +02:00
|
|
|
.mmap(self.mmap)
|
2016-11-20 21:27:18 +02:00
|
|
|
.no_messages(self.no_messages)
|
2016-09-29 02:50:50 +02:00
|
|
|
.quiet(self.quiet)
|
2016-09-07 03:47:33 +02:00
|
|
|
.text(self.text)
|
2016-11-06 03:44:15 +02:00
|
|
|
.build()
|
2016-09-07 03:47:33 +02:00
|
|
|
}
|
|
|
|
|
2016-09-05 06:52:23 +02:00
|
|
|
/// Returns the number of worker search threads that should be used.
|
|
|
|
pub fn threads(&self) -> usize {
|
|
|
|
self.threads
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns a list of type definitions currently loaded.
|
|
|
|
pub fn type_defs(&self) -> &[FileTypeDef] {
|
2016-09-28 22:30:57 +02:00
|
|
|
self.types.definitions()
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
2016-09-08 22:15:44 +02:00
|
|
|
/// Returns true if ripgrep should print the type definitions currently
|
|
|
|
/// loaded and then exit.
|
2016-09-05 06:52:23 +02:00
|
|
|
pub fn type_list(&self) -> bool {
|
|
|
|
self.type_list
|
|
|
|
}
|
|
|
|
|
2016-11-06 21:36:08 +02:00
|
|
|
/// Returns true if error messages should be suppressed.
|
|
|
|
pub fn no_messages(&self) -> bool {
|
|
|
|
self.no_messages
|
|
|
|
}
|
|
|
|
|
2016-10-12 01:57:09 +02:00
|
|
|
/// Create a new recursive directory iterator over the paths in argv.
|
2016-11-06 03:44:15 +02:00
|
|
|
pub fn walker(&self) -> ignore::Walk {
|
|
|
|
self.walker_builder().build()
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a new parallel recursive directory iterator over the paths
|
|
|
|
/// in argv.
|
|
|
|
pub fn walker_parallel(&self) -> ignore::WalkParallel {
|
|
|
|
self.walker_builder().build_parallel()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn walker_builder(&self) -> ignore::WalkBuilder {
|
2016-10-12 01:57:09 +02:00
|
|
|
let paths = self.paths();
|
|
|
|
let mut wd = ignore::WalkBuilder::new(&paths[0]);
|
|
|
|
for path in &paths[1..] {
|
|
|
|
wd.add(path);
|
2016-09-27 05:56:15 +02:00
|
|
|
}
|
2016-10-12 01:57:09 +02:00
|
|
|
for path in &self.ignore_files {
|
|
|
|
if let Some(err) = wd.add_ignore(path) {
|
2016-11-06 21:36:08 +02:00
|
|
|
if !self.no_messages {
|
|
|
|
eprintln!("{}", err);
|
|
|
|
}
|
2016-09-27 00:43:15 +02:00
|
|
|
}
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
2016-10-12 01:57:09 +02:00
|
|
|
|
|
|
|
wd.follow_links(self.follow);
|
|
|
|
wd.hidden(!self.hidden);
|
|
|
|
wd.max_depth(self.maxdepth);
|
2017-02-28 06:53:52 +02:00
|
|
|
wd.max_filesize(self.max_filesize);
|
2016-10-12 01:57:09 +02:00
|
|
|
wd.overrides(self.glob_overrides.clone());
|
|
|
|
wd.types(self.types.clone());
|
|
|
|
wd.git_global(!self.no_ignore && !self.no_ignore_vcs);
|
|
|
|
wd.git_ignore(!self.no_ignore && !self.no_ignore_vcs);
|
|
|
|
wd.git_exclude(!self.no_ignore && !self.no_ignore_vcs);
|
|
|
|
wd.ignore(!self.no_ignore);
|
|
|
|
wd.parents(!self.no_ignore_parent);
|
2016-11-06 03:44:15 +02:00
|
|
|
wd.threads(self.threads());
|
2017-01-07 05:43:59 +02:00
|
|
|
if self.sort_files {
|
|
|
|
wd.sort_by(|a, b| a.cmp(b));
|
|
|
|
}
|
2016-11-06 03:44:15 +02:00
|
|
|
wd
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-12-24 19:53:09 +02:00
|
|
|
/// `ArgMatches` wraps `clap::ArgMatches` and provides semantic meaning to
|
|
|
|
/// several options/flags.
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
struct ArgMatches<'a>(clap::ArgMatches<'a>);
|
2016-09-05 06:52:23 +02:00
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
impl<'a> ops::Deref for ArgMatches<'a> {
|
|
|
|
type Target = clap::ArgMatches<'a>;
|
|
|
|
fn deref(&self) -> &clap::ArgMatches<'a> { &self.0 }
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
impl<'a> ArgMatches<'a> {
|
|
|
|
/// Convert the result of parsing CLI arguments into ripgrep's
|
|
|
|
/// configuration.
|
|
|
|
fn to_args(&self) -> Result<Args> {
|
|
|
|
let paths = self.paths();
|
|
|
|
let mmap = try!(self.mmap(&paths));
|
|
|
|
let with_filename = self.with_filename(&paths);
|
|
|
|
let (before_context, after_context) = try!(self.contexts());
|
2016-12-24 19:53:09 +02:00
|
|
|
let quiet = self.is_present("quiet");
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
let args = Args {
|
|
|
|
paths: paths,
|
|
|
|
after_context: after_context,
|
|
|
|
before_context: before_context,
|
|
|
|
color: self.color(),
|
Completely re-work colored output and tty handling.
This commit completely guts all of the color handling code and replaces
most of it with two new crates: wincolor and termcolor. wincolor
provides a simple API to coloring using the Windows console and
termcolor provides a platform independent coloring API tuned for
multithreaded command line programs. This required a lot more
flexibility than what the `term` crate provided, so it was dropped.
We instead switch to writing ANSI escape sequences directly and ignore
the TERMINFO database.
In addition to fixing several bugs, this commit also permits end users
to customize colors to a certain extent. For example, this command will
set the match color to magenta and the line number background to yellow:
rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo
For tty handling, we've adopted a hack from `git` to do tty detection in
MSYS/mintty terminals. As a result, ripgrep should get both color
detection and piping correct on Windows regardless of which terminal you
use.
Finally, switch to line buffering. Performance doesn't seem to be
impacted and it's an otherwise more user friendly option.
Fixes #37, Fixes #51, Fixes #94, Fixes #117, Fixes #182, Fixes #231
2016-11-20 18:14:52 +02:00
|
|
|
color_choice: self.color_choice(),
|
|
|
|
colors: try!(self.color_specs()),
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
column: self.column(),
|
|
|
|
context_separator: self.context_separator(),
|
|
|
|
count: self.is_present("count"),
|
Add support for additional text encodings.
This includes, but is not limited to, UTF-16, latin-1, GBK, EUC-JP and
Shift_JIS. (Courtesy of the `encoding_rs` crate.)
Specifically, this feature enables ripgrep to search files that are
encoded in an encoding other than UTF-8. The list of available encodings
is tied directly to what the `encoding_rs` crate supports, which is in
turn tied to the Encoding Standard. The full list of available encodings
can be found here: https://encoding.spec.whatwg.org/#concept-encoding-get
This pull request also introduces the notion that text encodings can be
automatically detected on a best effort basis. Currently, the only
support for this is checking for a UTF-16 bom. In all other cases, a
text encoding of `auto` (the default) implies a UTF-8 or ASCII
compatible source encoding. When a text encoding is otherwise specified,
it is unconditionally used for all files searched.
Since ripgrep's regex engine is fundamentally built on top of UTF-8,
this feature works by transcoding the files to be searched from their
source encoding to UTF-8. This transcoding only happens when:
1. `auto` is specified and a non-UTF-8 encoding is detected.
2. A specific encoding is given by end users (including UTF-8).
When transcoding occurs, errors are handled by automatically inserting
the Unicode replacement character. In this case, ripgrep's output is
guaranteed to be valid UTF-8 (excluding non-UTF-8 file paths, if they
are printed).
In all other cases, the source text is searched directly, which implies
an assumption that it is at least ASCII compatible, but where UTF-8 is
most useful. In this scenario, encoding errors are not detected. In this
case, ripgrep's output will match the input exactly, byte-for-byte.
This design may not be optimal in all cases, but it has some advantages:
1. In the happy path ("UTF-8 everywhere") remains happy. I have not been
able to witness any performance regressions.
2. In the non-UTF-8 path, implementation complexity is kept relatively
low. The cost here is transcoding itself. A potentially superior
implementation might build decoding of any encoding into the regex
engine itself. In particular, the fundamental problem with
transcoding everything first is that literal optimizations are nearly
negated.
Future work should entail improving the user experience. For example, we
might want to auto-detect more text encodings. A more elaborate UX
experience might permit end users to specify multiple text encodings,
although this seems hard to pull off in an ergonomic way.
Fixes #1
2017-03-09 03:22:48 +02:00
|
|
|
encoding: try!(self.encoding()),
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
files_with_matches: self.is_present("files-with-matches"),
|
2016-11-20 03:15:41 +02:00
|
|
|
files_without_matches: self.is_present("files-without-match"),
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
eol: b'\n',
|
|
|
|
files: self.is_present("files"),
|
|
|
|
follow: self.is_present("follow"),
|
|
|
|
glob_overrides: try!(self.overrides()),
|
|
|
|
grep: try!(self.grep()),
|
|
|
|
heading: self.heading(),
|
|
|
|
hidden: self.hidden(),
|
|
|
|
ignore_files: self.ignore_files(),
|
|
|
|
invert_match: self.is_present("invert-match"),
|
|
|
|
line_number: self.line_number(),
|
|
|
|
line_per_match: self.is_present("vimgrep"),
|
|
|
|
max_count: try!(self.usize_of("max-count")).map(|max| max as u64),
|
2017-02-28 06:53:52 +02:00
|
|
|
max_filesize: try!(self.max_filesize()),
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
maxdepth: try!(self.usize_of("maxdepth")),
|
|
|
|
mmap: mmap,
|
|
|
|
no_ignore: self.no_ignore(),
|
|
|
|
no_ignore_parent: self.no_ignore_parent(),
|
|
|
|
no_ignore_vcs: self.no_ignore_vcs(),
|
|
|
|
no_messages: self.is_present("no-messages"),
|
|
|
|
null: self.is_present("null"),
|
2017-01-11 01:16:15 +02:00
|
|
|
path_separator: try!(self.path_separator()),
|
2016-12-24 19:53:09 +02:00
|
|
|
quiet: quiet,
|
|
|
|
quiet_matched: QuietMatched::new(quiet),
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
replace: self.replace(),
|
2017-01-07 05:43:59 +02:00
|
|
|
sort_files: self.is_present("sort-files"),
|
Don't search stdout redirected file.
When running ripgrep like this:
rg foo > output
we must be careful not to search `output` since ripgrep is actively writing
to it. Searching it can cause massive blowups where the file grows without
bound.
While this is conceptually easy to fix (check the inode of the redirection
and the inode of the file you're about to search), there are a few problems
with it.
First, inodes are a Unix thing, so we need a Windows specific solution to
this as well. To resolve this concern, I created a new crate, `same-file`,
which provides a cross platform abstraction.
Second, stat'ing every file is costly. This is not avoidable on Windows,
but on Unix, we can get the inode number directly from directory traversal.
However, this information wasn't exposed, but now it is (through both the
ignore and walkdir crates).
Fixes #286
2017-01-08 17:27:30 +02:00
|
|
|
stdout_handle: self.stdout_handle(),
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
text: self.text(),
|
|
|
|
threads: try!(self.threads()),
|
|
|
|
type_list: self.is_present("type-list"),
|
|
|
|
types: try!(self.types()),
|
|
|
|
with_filename: with_filename,
|
|
|
|
};
|
|
|
|
if args.mmap {
|
|
|
|
debug!("will try to use memory maps");
|
|
|
|
}
|
|
|
|
Ok(args)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Return all file paths that ripgrep should search.
|
|
|
|
fn paths(&self) -> Vec<PathBuf> {
|
|
|
|
let mut paths: Vec<PathBuf> = match self.values_of_os("path") {
|
|
|
|
None => vec![],
|
|
|
|
Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(),
|
|
|
|
};
|
|
|
|
// If --file, --files or --regexp is given, then the first path is
|
|
|
|
// always in `pattern`.
|
|
|
|
if self.is_present("file")
|
|
|
|
|| self.is_present("files")
|
|
|
|
|| self.is_present("regexp") {
|
|
|
|
if let Some(path) = self.value_of_os("pattern") {
|
|
|
|
paths.insert(0, Path::new(path).to_path_buf());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if paths.is_empty() {
|
|
|
|
paths.push(self.default_path());
|
|
|
|
}
|
|
|
|
paths
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Return the default path that ripgrep should search.
|
|
|
|
fn default_path(&self) -> PathBuf {
|
2016-11-18 03:48:11 +02:00
|
|
|
let file_is_stdin =
|
|
|
|
self.values_of_os("file").map_or(false, |mut files| {
|
|
|
|
files.any(|f| f == "-")
|
|
|
|
});
|
2017-01-15 23:32:30 +02:00
|
|
|
let search_cwd = atty::is(atty::Stream::Stdin)
|
|
|
|
|| !stdin_is_readable()
|
2016-11-18 03:48:11 +02:00
|
|
|
|| (self.is_present("file") && file_is_stdin)
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
|| self.is_present("files")
|
2016-11-18 03:48:11 +02:00
|
|
|
|| self.is_present("type-list");
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
if search_cwd {
|
|
|
|
Path::new("./").to_path_buf()
|
|
|
|
} else {
|
|
|
|
Path::new("-").to_path_buf()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Return all of the ignore files given on the command line.
|
|
|
|
fn ignore_files(&self) -> Vec<PathBuf> {
|
|
|
|
match self.values_of_os("ignore-file") {
|
|
|
|
None => return vec![],
|
|
|
|
Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Return the pattern that should be used for searching.
|
|
|
|
///
|
|
|
|
/// If multiple -e/--regexp flags are given, then they are all collapsed
|
|
|
|
/// into one pattern.
|
|
|
|
///
|
|
|
|
/// If any part of the pattern isn't valid UTF-8, then an error is
|
|
|
|
/// returned.
|
|
|
|
fn pattern(&self) -> Result<String> {
|
|
|
|
Ok(try!(self.patterns()).join("|"))
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Get a sequence of all available patterns from the command line.
|
|
|
|
/// This includes reading the -e/--regexp and -f/--file flags.
|
|
|
|
///
|
|
|
|
/// Note that if -F/--fixed-strings is set, then all patterns will be
|
|
|
|
/// escaped. Similarly, if -w/--word-regexp is set, then all patterns
|
|
|
|
/// are surrounded by `\b`.
|
|
|
|
///
|
|
|
|
/// If any pattern is invalid UTF-8, then an error is returned.
|
|
|
|
fn patterns(&self) -> Result<Vec<String>> {
|
2017-02-18 22:34:54 +02:00
|
|
|
if self.is_present("files") || self.is_present("type-list") {
|
|
|
|
return Ok(vec![self.empty_pattern()]);
|
|
|
|
}
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
let mut pats = vec![];
|
|
|
|
match self.values_of_os("regexp") {
|
|
|
|
None => {
|
|
|
|
if self.values_of_os("file").is_none() {
|
|
|
|
if let Some(os_pat) = self.value_of_os("pattern") {
|
|
|
|
pats.push(try!(self.os_str_pattern(os_pat)));
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
Some(os_pats) => {
|
|
|
|
for os_pat in os_pats {
|
|
|
|
pats.push(try!(self.os_str_pattern(os_pat)));
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
}
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
}
|
|
|
|
if let Some(files) = self.values_of_os("file") {
|
|
|
|
for file in files {
|
|
|
|
if file == "-" {
|
|
|
|
let stdin = io::stdin();
|
|
|
|
for line in stdin.lock().lines() {
|
|
|
|
pats.push(self.str_pattern(&try!(line)));
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
} else {
|
|
|
|
let f = try!(fs::File::open(file));
|
|
|
|
for line in io::BufReader::new(f).lines() {
|
|
|
|
pats.push(self.str_pattern(&try!(line)));
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
if pats.is_empty() {
|
|
|
|
pats.push(self.empty_pattern())
|
|
|
|
}
|
|
|
|
Ok(pats)
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
|
|
|
|
/// Converts an OsStr pattern to a String pattern, including word
|
|
|
|
/// boundaries or escapes if applicable.
|
|
|
|
///
|
|
|
|
/// If the pattern is not valid UTF-8, then an error is returned.
|
|
|
|
fn os_str_pattern(&self, pat: &OsStr) -> Result<String> {
|
|
|
|
let s = try!(pattern_to_str(pat));
|
|
|
|
Ok(self.str_pattern(s))
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Converts a &str pattern to a String pattern, including word
|
|
|
|
/// boundaries or escapes if applicable.
|
|
|
|
fn str_pattern(&self, pat: &str) -> String {
|
|
|
|
let s = self.word_pattern(self.literal_pattern(pat.to_string()));
|
|
|
|
if s.is_empty() {
|
|
|
|
self.empty_pattern()
|
|
|
|
} else {
|
|
|
|
s
|
|
|
|
}
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
/// Returns the given pattern as a literal pattern if the
|
|
|
|
/// -F/--fixed-strings flag is set. Otherwise, the pattern is returned
|
|
|
|
/// unchanged.
|
|
|
|
fn literal_pattern(&self, pat: String) -> String {
|
|
|
|
if self.is_present("fixed-strings") {
|
2016-12-30 23:24:09 +02:00
|
|
|
regex::escape(&pat)
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
} else {
|
|
|
|
pat
|
|
|
|
}
|
|
|
|
}
|
2016-09-05 06:52:23 +02:00
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
/// Returns the given pattern as a word pattern if the -w/--word-regexp
|
|
|
|
/// flag is set. Otherwise, the pattern is returned unchanged.
|
|
|
|
fn word_pattern(&self, pat: String) -> String {
|
|
|
|
if self.is_present("word-regexp") {
|
|
|
|
format!(r"\b{}\b", pat)
|
|
|
|
} else {
|
|
|
|
pat
|
|
|
|
}
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
/// Empty pattern returns a pattern that is guaranteed to produce an empty
|
|
|
|
/// regular expression that is valid in any position.
|
|
|
|
fn empty_pattern(&self) -> String {
|
|
|
|
// This would normally just be an empty string, which works on its
|
|
|
|
// own, but if the patterns are joined in a set of alternations, then
|
|
|
|
// you wind up with `foo|`, which is invalid.
|
|
|
|
self.word_pattern("z{0}".to_string())
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
/// Returns true if and only if file names containing each match should
|
|
|
|
/// be emitted.
|
|
|
|
///
|
|
|
|
/// `paths` should be a slice of all top-level file paths that ripgrep
|
|
|
|
/// will need to search.
|
|
|
|
fn with_filename(&self, paths: &[PathBuf]) -> bool {
|
|
|
|
if self.is_present("no-filename") {
|
|
|
|
false
|
|
|
|
} else {
|
|
|
|
self.is_present("with-filename")
|
|
|
|
|| paths.len() > 1
|
|
|
|
|| paths.get(0).map_or(false, |p| p.is_dir())
|
|
|
|
}
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
Don't search stdout redirected file.
When running ripgrep like this:
rg foo > output
we must be careful not to search `output` since ripgrep is actively writing
to it. Searching it can cause massive blowups where the file grows without
bound.
While this is conceptually easy to fix (check the inode of the redirection
and the inode of the file you're about to search), there are a few problems
with it.
First, inodes are a Unix thing, so we need a Windows specific solution to
this as well. To resolve this concern, I created a new crate, `same-file`,
which provides a cross platform abstraction.
Second, stat'ing every file is costly. This is not avoidable on Windows,
but on Unix, we can get the inode number directly from directory traversal.
However, this information wasn't exposed, but now it is (through both the
ignore and walkdir crates).
Fixes #286
2017-01-08 17:27:30 +02:00
|
|
|
/// Returns a handle to stdout for filtering search.
|
|
|
|
///
|
|
|
|
/// A handle is returned if and only if ripgrep's stdout is being
|
|
|
|
/// redirected to a file. The handle returned corresponds to that file.
|
|
|
|
///
|
|
|
|
/// This can be used to ensure that we do not attempt to search a file
|
|
|
|
/// that ripgrep is writing to.
|
|
|
|
fn stdout_handle(&self) -> Option<same_file::Handle> {
|
|
|
|
let h = match same_file::Handle::stdout() {
|
|
|
|
Err(_) => return None,
|
|
|
|
Ok(h) => h,
|
|
|
|
};
|
|
|
|
let md = match h.as_file().metadata() {
|
|
|
|
Err(_) => return None,
|
|
|
|
Ok(md) => md,
|
|
|
|
};
|
|
|
|
if !md.is_file() {
|
|
|
|
return None;
|
|
|
|
}
|
|
|
|
Some(h)
|
|
|
|
}
|
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
/// Returns true if and only if memory map searching should be tried.
|
|
|
|
///
|
|
|
|
/// `paths` should be a slice of all top-level file paths that ripgrep
|
|
|
|
/// will need to search.
|
|
|
|
fn mmap(&self, paths: &[PathBuf]) -> Result<bool> {
|
|
|
|
let (before, after) = try!(self.contexts());
|
Add support for additional text encodings.
This includes, but is not limited to, UTF-16, latin-1, GBK, EUC-JP and
Shift_JIS. (Courtesy of the `encoding_rs` crate.)
Specifically, this feature enables ripgrep to search files that are
encoded in an encoding other than UTF-8. The list of available encodings
is tied directly to what the `encoding_rs` crate supports, which is in
turn tied to the Encoding Standard. The full list of available encodings
can be found here: https://encoding.spec.whatwg.org/#concept-encoding-get
This pull request also introduces the notion that text encodings can be
automatically detected on a best effort basis. Currently, the only
support for this is checking for a UTF-16 bom. In all other cases, a
text encoding of `auto` (the default) implies a UTF-8 or ASCII
compatible source encoding. When a text encoding is otherwise specified,
it is unconditionally used for all files searched.
Since ripgrep's regex engine is fundamentally built on top of UTF-8,
this feature works by transcoding the files to be searched from their
source encoding to UTF-8. This transcoding only happens when:
1. `auto` is specified and a non-UTF-8 encoding is detected.
2. A specific encoding is given by end users (including UTF-8).
When transcoding occurs, errors are handled by automatically inserting
the Unicode replacement character. In this case, ripgrep's output is
guaranteed to be valid UTF-8 (excluding non-UTF-8 file paths, if they
are printed).
In all other cases, the source text is searched directly, which implies
an assumption that it is at least ASCII compatible, but where UTF-8 is
most useful. In this scenario, encoding errors are not detected. In this
case, ripgrep's output will match the input exactly, byte-for-byte.
This design may not be optimal in all cases, but it has some advantages:
1. In the happy path ("UTF-8 everywhere") remains happy. I have not been
able to witness any performance regressions.
2. In the non-UTF-8 path, implementation complexity is kept relatively
low. The cost here is transcoding itself. A potentially superior
implementation might build decoding of any encoding into the regex
engine itself. In particular, the fundamental problem with
transcoding everything first is that literal optimizations are nearly
negated.
Future work should entail improving the user experience. For example, we
might want to auto-detect more text encodings. A more elaborate UX
experience might permit end users to specify multiple text encodings,
although this seems hard to pull off in an ergonomic way.
Fixes #1
2017-03-09 03:22:48 +02:00
|
|
|
let enc = try!(self.encoding());
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
Ok(if before > 0 || after > 0 || self.is_present("no-mmap") {
|
|
|
|
false
|
|
|
|
} else if self.is_present("mmap") {
|
|
|
|
true
|
|
|
|
} else if cfg!(target_os = "macos") {
|
|
|
|
// On Mac, memory maps appear to suck. Neat.
|
|
|
|
false
|
Add support for additional text encodings.
This includes, but is not limited to, UTF-16, latin-1, GBK, EUC-JP and
Shift_JIS. (Courtesy of the `encoding_rs` crate.)
Specifically, this feature enables ripgrep to search files that are
encoded in an encoding other than UTF-8. The list of available encodings
is tied directly to what the `encoding_rs` crate supports, which is in
turn tied to the Encoding Standard. The full list of available encodings
can be found here: https://encoding.spec.whatwg.org/#concept-encoding-get
This pull request also introduces the notion that text encodings can be
automatically detected on a best effort basis. Currently, the only
support for this is checking for a UTF-16 bom. In all other cases, a
text encoding of `auto` (the default) implies a UTF-8 or ASCII
compatible source encoding. When a text encoding is otherwise specified,
it is unconditionally used for all files searched.
Since ripgrep's regex engine is fundamentally built on top of UTF-8,
this feature works by transcoding the files to be searched from their
source encoding to UTF-8. This transcoding only happens when:
1. `auto` is specified and a non-UTF-8 encoding is detected.
2. A specific encoding is given by end users (including UTF-8).
When transcoding occurs, errors are handled by automatically inserting
the Unicode replacement character. In this case, ripgrep's output is
guaranteed to be valid UTF-8 (excluding non-UTF-8 file paths, if they
are printed).
In all other cases, the source text is searched directly, which implies
an assumption that it is at least ASCII compatible, but where UTF-8 is
most useful. In this scenario, encoding errors are not detected. In this
case, ripgrep's output will match the input exactly, byte-for-byte.
This design may not be optimal in all cases, but it has some advantages:
1. In the happy path ("UTF-8 everywhere") remains happy. I have not been
able to witness any performance regressions.
2. In the non-UTF-8 path, implementation complexity is kept relatively
low. The cost here is transcoding itself. A potentially superior
implementation might build decoding of any encoding into the regex
engine itself. In particular, the fundamental problem with
transcoding everything first is that literal optimizations are nearly
negated.
Future work should entail improving the user experience. For example, we
might want to auto-detect more text encodings. A more elaborate UX
experience might permit end users to specify multiple text encodings,
although this seems hard to pull off in an ergonomic way.
Fixes #1
2017-03-09 03:22:48 +02:00
|
|
|
} else if enc.is_some() {
|
|
|
|
// There's no practical way to transcode a memory map that isn't
|
|
|
|
// isomorphic to searching over io::Read.
|
|
|
|
false
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
} else {
|
|
|
|
// If we're only searching a few paths and all of them are
|
|
|
|
// files, then memory maps are probably faster.
|
|
|
|
paths.len() <= 10 && paths.iter().all(|p| p.is_file())
|
|
|
|
})
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
/// Returns true if and only if line numbers should be shown.
|
|
|
|
fn line_number(&self) -> bool {
|
|
|
|
if self.is_present("no-line-number") || self.is_present("count") {
|
|
|
|
false
|
|
|
|
} else {
|
|
|
|
self.is_present("line-number")
|
2017-01-12 01:53:35 +02:00
|
|
|
|| self.is_present("column")
|
2017-01-15 23:32:30 +02:00
|
|
|
|| atty::is(atty::Stream::Stdout)
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
|| self.is_present("pretty")
|
|
|
|
|| self.is_present("vimgrep")
|
|
|
|
}
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
/// Returns true if and only if column numbers should be shown.
|
|
|
|
fn column(&self) -> bool {
|
|
|
|
self.is_present("column") || self.is_present("vimgrep")
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
/// Returns true if and only if matches should be grouped with file name
|
|
|
|
/// headings.
|
|
|
|
fn heading(&self) -> bool {
|
|
|
|
if self.is_present("no-heading") {
|
|
|
|
false
|
|
|
|
} else {
|
|
|
|
self.is_present("heading")
|
2017-01-15 23:32:30 +02:00
|
|
|
|| atty::is(atty::Stream::Stdout)
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
|| self.is_present("pretty")
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns the replacement string as UTF-8 bytes if it exists.
|
|
|
|
fn replace(&self) -> Option<Vec<u8>> {
|
|
|
|
self.value_of_lossy("replace").map(|s| s.into_owned().into_bytes())
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
/// Returns the unescaped context separator in UTF-8 bytes.
|
|
|
|
fn context_separator(&self) -> Vec<u8> {
|
|
|
|
match self.value_of_lossy("context-separator") {
|
|
|
|
None => b"--".to_vec(),
|
|
|
|
Some(sep) => unescape(&sep),
|
|
|
|
}
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
2017-01-11 01:16:15 +02:00
|
|
|
/// Returns the unescaped path separator in UTF-8 bytes.
|
|
|
|
fn path_separator(&self) -> Result<Option<u8>> {
|
|
|
|
match self.value_of_lossy("path-separator") {
|
|
|
|
None => Ok(None),
|
|
|
|
Some(sep) => {
|
|
|
|
let sep = unescape(&sep);
|
|
|
|
if sep.is_empty() {
|
|
|
|
Ok(None)
|
|
|
|
} else if sep.len() > 1 {
|
|
|
|
Err(From::from(format!(
|
|
|
|
"A path separator must be exactly one byte, but \
|
|
|
|
the given separator is {} bytes.", sep.len())))
|
|
|
|
} else {
|
|
|
|
Ok(Some(sep[0]))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
/// Returns the before and after contexts from the command line.
|
|
|
|
///
|
|
|
|
/// If a context setting was absent, then `0` is returned.
|
|
|
|
///
|
|
|
|
/// If there was a problem parsing the values from the user as an integer,
|
|
|
|
/// then an error is returned.
|
|
|
|
fn contexts(&self) -> Result<(usize, usize)> {
|
|
|
|
let after = try!(self.usize_of("after-context")).unwrap_or(0);
|
|
|
|
let before = try!(self.usize_of("before-context")).unwrap_or(0);
|
|
|
|
let both = try!(self.usize_of("context")).unwrap_or(0);
|
|
|
|
Ok(if both > 0 {
|
|
|
|
(both, both)
|
|
|
|
} else {
|
|
|
|
(before, after)
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if and only if ripgrep should color its output.
|
|
|
|
fn color(&self) -> bool {
|
|
|
|
let preference = match self.0.value_of_lossy("color") {
|
|
|
|
None => "auto".to_string(),
|
|
|
|
Some(v) => v.into_owned(),
|
|
|
|
};
|
|
|
|
if preference == "always" {
|
|
|
|
true
|
|
|
|
} else if self.is_present("vimgrep") {
|
|
|
|
false
|
|
|
|
} else if preference == "auto" {
|
2017-01-15 23:32:30 +02:00
|
|
|
atty::is(atty::Stream::Stdout) || self.is_present("pretty")
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
} else {
|
|
|
|
false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Completely re-work colored output and tty handling.
This commit completely guts all of the color handling code and replaces
most of it with two new crates: wincolor and termcolor. wincolor
provides a simple API to coloring using the Windows console and
termcolor provides a platform independent coloring API tuned for
multithreaded command line programs. This required a lot more
flexibility than what the `term` crate provided, so it was dropped.
We instead switch to writing ANSI escape sequences directly and ignore
the TERMINFO database.
In addition to fixing several bugs, this commit also permits end users
to customize colors to a certain extent. For example, this command will
set the match color to magenta and the line number background to yellow:
rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo
For tty handling, we've adopted a hack from `git` to do tty detection in
MSYS/mintty terminals. As a result, ripgrep should get both color
detection and piping correct on Windows regardless of which terminal you
use.
Finally, switch to line buffering. Performance doesn't seem to be
impacted and it's an otherwise more user friendly option.
Fixes #37, Fixes #51, Fixes #94, Fixes #117, Fixes #182, Fixes #231
2016-11-20 18:14:52 +02:00
|
|
|
/// Returns the user's color choice based on command line parameters and
|
|
|
|
/// environment.
|
|
|
|
fn color_choice(&self) -> termcolor::ColorChoice {
|
|
|
|
let preference = match self.0.value_of_lossy("color") {
|
|
|
|
None => "auto".to_string(),
|
|
|
|
Some(v) => v.into_owned(),
|
|
|
|
};
|
|
|
|
if preference == "always" {
|
|
|
|
termcolor::ColorChoice::Always
|
|
|
|
} else if preference == "ansi" {
|
|
|
|
termcolor::ColorChoice::AlwaysAnsi
|
|
|
|
} else if self.is_present("vimgrep") {
|
|
|
|
termcolor::ColorChoice::Never
|
|
|
|
} else if preference == "auto" {
|
2017-01-15 23:32:30 +02:00
|
|
|
if atty::is(atty::Stream::Stdout) || self.is_present("pretty") {
|
Completely re-work colored output and tty handling.
This commit completely guts all of the color handling code and replaces
most of it with two new crates: wincolor and termcolor. wincolor
provides a simple API to coloring using the Windows console and
termcolor provides a platform independent coloring API tuned for
multithreaded command line programs. This required a lot more
flexibility than what the `term` crate provided, so it was dropped.
We instead switch to writing ANSI escape sequences directly and ignore
the TERMINFO database.
In addition to fixing several bugs, this commit also permits end users
to customize colors to a certain extent. For example, this command will
set the match color to magenta and the line number background to yellow:
rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo
For tty handling, we've adopted a hack from `git` to do tty detection in
MSYS/mintty terminals. As a result, ripgrep should get both color
detection and piping correct on Windows regardless of which terminal you
use.
Finally, switch to line buffering. Performance doesn't seem to be
impacted and it's an otherwise more user friendly option.
Fixes #37, Fixes #51, Fixes #94, Fixes #117, Fixes #182, Fixes #231
2016-11-20 18:14:52 +02:00
|
|
|
termcolor::ColorChoice::Auto
|
|
|
|
} else {
|
|
|
|
termcolor::ColorChoice::Never
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
termcolor::ColorChoice::Never
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns the color specifications given by the user on the CLI.
|
|
|
|
///
|
|
|
|
/// If the was a problem parsing any of the provided specs, then an error
|
|
|
|
/// is returned.
|
|
|
|
fn color_specs(&self) -> Result<ColorSpecs> {
|
|
|
|
// Start with a default set of color specs.
|
|
|
|
let mut specs = vec![
|
2017-01-07 03:07:29 +02:00
|
|
|
"path:fg:magenta".parse().unwrap(),
|
|
|
|
"line:fg:green".parse().unwrap(),
|
Completely re-work colored output and tty handling.
This commit completely guts all of the color handling code and replaces
most of it with two new crates: wincolor and termcolor. wincolor
provides a simple API to coloring using the Windows console and
termcolor provides a platform independent coloring API tuned for
multithreaded command line programs. This required a lot more
flexibility than what the `term` crate provided, so it was dropped.
We instead switch to writing ANSI escape sequences directly and ignore
the TERMINFO database.
In addition to fixing several bugs, this commit also permits end users
to customize colors to a certain extent. For example, this command will
set the match color to magenta and the line number background to yellow:
rg --colors 'match:fg:magenta' --colors 'line:bg:yellow' foo
For tty handling, we've adopted a hack from `git` to do tty detection in
MSYS/mintty terminals. As a result, ripgrep should get both color
detection and piping correct on Windows regardless of which terminal you
use.
Finally, switch to line buffering. Performance doesn't seem to be
impacted and it's an otherwise more user friendly option.
Fixes #37, Fixes #51, Fixes #94, Fixes #117, Fixes #182, Fixes #231
2016-11-20 18:14:52 +02:00
|
|
|
"match:fg:red".parse().unwrap(),
|
|
|
|
"match:style:bold".parse().unwrap(),
|
|
|
|
];
|
|
|
|
for spec_str in self.values_of_lossy_vec("colors") {
|
|
|
|
specs.push(try!(spec_str.parse()));
|
|
|
|
}
|
|
|
|
Ok(ColorSpecs::new(&specs))
|
|
|
|
}
|
|
|
|
|
Add support for additional text encodings.
This includes, but is not limited to, UTF-16, latin-1, GBK, EUC-JP and
Shift_JIS. (Courtesy of the `encoding_rs` crate.)
Specifically, this feature enables ripgrep to search files that are
encoded in an encoding other than UTF-8. The list of available encodings
is tied directly to what the `encoding_rs` crate supports, which is in
turn tied to the Encoding Standard. The full list of available encodings
can be found here: https://encoding.spec.whatwg.org/#concept-encoding-get
This pull request also introduces the notion that text encodings can be
automatically detected on a best effort basis. Currently, the only
support for this is checking for a UTF-16 bom. In all other cases, a
text encoding of `auto` (the default) implies a UTF-8 or ASCII
compatible source encoding. When a text encoding is otherwise specified,
it is unconditionally used for all files searched.
Since ripgrep's regex engine is fundamentally built on top of UTF-8,
this feature works by transcoding the files to be searched from their
source encoding to UTF-8. This transcoding only happens when:
1. `auto` is specified and a non-UTF-8 encoding is detected.
2. A specific encoding is given by end users (including UTF-8).
When transcoding occurs, errors are handled by automatically inserting
the Unicode replacement character. In this case, ripgrep's output is
guaranteed to be valid UTF-8 (excluding non-UTF-8 file paths, if they
are printed).
In all other cases, the source text is searched directly, which implies
an assumption that it is at least ASCII compatible, but where UTF-8 is
most useful. In this scenario, encoding errors are not detected. In this
case, ripgrep's output will match the input exactly, byte-for-byte.
This design may not be optimal in all cases, but it has some advantages:
1. In the happy path ("UTF-8 everywhere") remains happy. I have not been
able to witness any performance regressions.
2. In the non-UTF-8 path, implementation complexity is kept relatively
low. The cost here is transcoding itself. A potentially superior
implementation might build decoding of any encoding into the regex
engine itself. In particular, the fundamental problem with
transcoding everything first is that literal optimizations are nearly
negated.
Future work should entail improving the user experience. For example, we
might want to auto-detect more text encodings. A more elaborate UX
experience might permit end users to specify multiple text encodings,
although this seems hard to pull off in an ergonomic way.
Fixes #1
2017-03-09 03:22:48 +02:00
|
|
|
/// Return the text encoding specified.
|
|
|
|
///
|
|
|
|
/// If the label given by the caller doesn't correspond to a valid
|
|
|
|
/// supported encoding (and isn't `auto`), then return an error.
|
|
|
|
///
|
|
|
|
/// A `None` encoding implies that the encoding should be automatically
|
|
|
|
/// detected on a per-file basis.
|
|
|
|
fn encoding(&self) -> Result<Option<&'static Encoding>> {
|
|
|
|
match self.0.value_of_lossy("encoding") {
|
|
|
|
None => Ok(None),
|
|
|
|
Some(label) => {
|
|
|
|
if label == "auto" {
|
|
|
|
return Ok(None);
|
|
|
|
}
|
|
|
|
match Encoding::for_label(label.as_bytes()) {
|
|
|
|
Some(enc) => Ok(Some(enc)),
|
|
|
|
None => Err(From::from(
|
|
|
|
format!("unsupported encoding: {}", label))),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
/// Returns the approximate number of threads that ripgrep should use.
|
|
|
|
fn threads(&self) -> Result<usize> {
|
2017-01-07 05:43:59 +02:00
|
|
|
if self.is_present("sort-files") {
|
|
|
|
return Ok(1);
|
|
|
|
}
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
let threads = try!(self.usize_of("threads")).unwrap_or(0);
|
|
|
|
Ok(if threads == 0 {
|
|
|
|
cmp::min(12, num_cpus::get())
|
|
|
|
} else {
|
|
|
|
threads
|
|
|
|
})
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Builds a grep matcher from the command line flags.
|
|
|
|
///
|
|
|
|
/// If there was a problem extracting the pattern from the command line
|
|
|
|
/// flags, then an error is returned.
|
|
|
|
fn grep(&self) -> Result<Grep> {
|
|
|
|
let smart =
|
|
|
|
self.is_present("smart-case")
|
|
|
|
&& !self.is_present("ignore-case")
|
|
|
|
&& !self.is_present("case-sensitive");
|
|
|
|
let casei =
|
|
|
|
self.is_present("ignore-case")
|
|
|
|
&& !self.is_present("case-sensitive");
|
|
|
|
GrepBuilder::new(&try!(self.pattern()))
|
|
|
|
.case_smart(smart)
|
|
|
|
.case_insensitive(casei)
|
|
|
|
.line_terminator(b'\n')
|
|
|
|
.build()
|
|
|
|
.map_err(From::from)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Builds the set of glob overrides from the command line flags.
|
|
|
|
fn overrides(&self) -> Result<Override> {
|
|
|
|
let mut ovr = OverrideBuilder::new(try!(env::current_dir()));
|
|
|
|
for glob in self.values_of_lossy_vec("glob") {
|
|
|
|
try!(ovr.add(&glob));
|
|
|
|
}
|
|
|
|
ovr.build().map_err(From::from)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Builds a file type matcher from the command line flags.
|
|
|
|
fn types(&self) -> Result<Types> {
|
|
|
|
let mut btypes = TypesBuilder::new();
|
|
|
|
btypes.add_defaults();
|
|
|
|
for ty in self.values_of_lossy_vec("type-clear") {
|
|
|
|
btypes.clear(&ty);
|
|
|
|
}
|
|
|
|
for def in self.values_of_lossy_vec("type-add") {
|
|
|
|
try!(btypes.add_def(&def));
|
|
|
|
}
|
|
|
|
for ty in self.values_of_lossy_vec("type") {
|
|
|
|
btypes.select(&ty);
|
|
|
|
}
|
|
|
|
for ty in self.values_of_lossy_vec("type-not") {
|
|
|
|
btypes.negate(&ty);
|
|
|
|
}
|
|
|
|
btypes.build().map_err(From::from)
|
|
|
|
}
|
|
|
|
|
2017-02-28 06:53:52 +02:00
|
|
|
/// Parses the max-filesize argument option into a byte count.
|
|
|
|
fn max_filesize(&self) -> Result<Option<u64>> {
|
|
|
|
use regex::Regex;
|
|
|
|
|
|
|
|
let max_filesize = match self.value_of_lossy("max-filesize") {
|
|
|
|
Some(x) => x,
|
|
|
|
None => return Ok(None)
|
|
|
|
};
|
|
|
|
|
2017-03-01 07:38:06 +02:00
|
|
|
let re = Regex::new("^([0-9]+)([KMG])?$").unwrap();
|
2017-02-28 06:53:52 +02:00
|
|
|
let caps = try!(re.captures(&max_filesize)
|
|
|
|
.ok_or("invalid format for max-filesize argument"));
|
|
|
|
|
2017-03-01 07:38:06 +02:00
|
|
|
let value = try!(caps[1].parse::<u64>().map_err(|err| err.to_string()));
|
2017-02-28 06:53:52 +02:00
|
|
|
let suffix = caps.get(2).map(|x| x.as_str());
|
2017-03-01 07:38:06 +02:00
|
|
|
|
2017-02-28 06:53:52 +02:00
|
|
|
match suffix {
|
2017-03-01 07:38:06 +02:00
|
|
|
None => Ok(Some(value)),
|
|
|
|
Some("K") => Ok(Some(value * 1024)),
|
|
|
|
Some("M") => Ok(Some(value * 1024 * 1024)),
|
|
|
|
Some("G") => Ok(Some(value * 1024 * 1024 * 1024)),
|
2017-02-28 06:53:52 +02:00
|
|
|
_ => Err(From::from("invalid suffix for max-filesize argument"))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
/// Returns true if ignore files should be ignored.
|
|
|
|
fn no_ignore(&self) -> bool {
|
|
|
|
self.is_present("no-ignore")
|
|
|
|
|| self.occurrences_of("unrestricted") >= 1
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if parent ignore files should be ignored.
|
|
|
|
fn no_ignore_parent(&self) -> bool {
|
|
|
|
self.is_present("no-ignore-parent") || self.no_ignore()
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if VCS ignore files should be ignored.
|
|
|
|
fn no_ignore_vcs(&self) -> bool {
|
|
|
|
self.is_present("no-ignore-vcs") || self.no_ignore()
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if and only if hidden files/directories should be
|
|
|
|
/// searched.
|
|
|
|
fn hidden(&self) -> bool {
|
|
|
|
self.is_present("hidden") || self.occurrences_of("unrestricted") >= 2
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if and only if all files should be treated as if they
|
|
|
|
/// were text, even if ripgrep would detect it as a binary file.
|
|
|
|
fn text(&self) -> bool {
|
|
|
|
self.is_present("text") || self.occurrences_of("unrestricted") >= 3
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Like values_of_lossy, but returns an empty vec if the flag is not
|
|
|
|
/// present.
|
|
|
|
fn values_of_lossy_vec(&self, name: &str) -> Vec<String> {
|
2016-12-23 21:53:35 +02:00
|
|
|
self.values_of_lossy(name).unwrap_or_else(Vec::new)
|
Switch from Docopt to Clap.
There were two important reasons for the switch:
1. Performance. Docopt does poorly when the argv becomes large, which is
a reasonable common use case for search tools. (e.g., use with xargs)
2. Better failure modes. Clap knows a lot more about how a particular
argv might be invalid, and can therefore provide much clearer error
messages.
While both were important, (1) made it urgent.
Note that since Clap requires at least Rust 1.11, this will in turn
increase the minimum Rust version supported by ripgrep from Rust 1.9 to
Rust 1.11. It is therefore a breaking change, so the soonest release of
ripgrep with Clap will have to be 0.3.
There is also at least one subtle breaking change in real usage.
Previous to this commit, this used to work:
rg -e -foo
Where this would cause ripgrep to search for the string `-foo`. Clap
currently has problems supporting this use case
(see: https://github.com/kbknapp/clap-rs/issues/742),
but it can be worked around by using this instead:
rg -e [-]foo
or even
rg [-]foo
and this still works:
rg -- -foo
This commit also adds Bash, Fish and PowerShell completion files to the
release, fixes a bug that prevented ripgrep from working on file
paths containing invalid UTF-8 and shows short descriptions in the
output of `-h` but longer descriptions in the output of `--help`.
Fixes #136, Fixes #189, Fixes #210, Fixes #230
2016-11-13 04:48:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Safely reads an arg value with the given name, and if it's present,
|
|
|
|
/// tries to parse it as a usize value.
|
|
|
|
fn usize_of(&self, name: &str) -> Result<Option<usize>> {
|
|
|
|
match self.value_of_lossy(name) {
|
|
|
|
None => Ok(None),
|
|
|
|
Some(v) => v.parse().map(Some).map_err(From::from),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn pattern_to_str(s: &OsStr) -> Result<&str> {
|
|
|
|
match s.to_str() {
|
|
|
|
Some(s) => Ok(s),
|
|
|
|
None => Err(From::from(format!(
|
|
|
|
"Argument '{}' is not valid UTF-8. \
|
|
|
|
Use hex escape sequences to match arbitrary \
|
|
|
|
bytes in a pattern (e.g., \\xFF).",
|
|
|
|
s.to_string_lossy()))),
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
}
|
2016-12-24 19:53:09 +02:00
|
|
|
|
|
|
|
/// A simple thread safe abstraction for determining whether a search should
|
|
|
|
/// stop if the user has requested quiet mode.
|
|
|
|
#[derive(Clone, Debug)]
|
|
|
|
pub struct QuietMatched(Arc<Option<AtomicBool>>);
|
|
|
|
|
|
|
|
impl QuietMatched {
|
|
|
|
/// Create a new QuietMatched value.
|
|
|
|
///
|
|
|
|
/// If quiet is true, then set_match and has_match will reflect whether
|
|
|
|
/// a search should quit or not because it found a match.
|
|
|
|
///
|
|
|
|
/// If quiet is false, then set_match is always a no-op and has_match
|
|
|
|
/// always returns false.
|
|
|
|
fn new(quiet: bool) -> QuietMatched {
|
|
|
|
let atomic = if quiet { Some(AtomicBool::new(false)) } else { None };
|
|
|
|
QuietMatched(Arc::new(atomic))
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if and only if quiet mode is enabled and a match has
|
|
|
|
/// occurred.
|
|
|
|
pub fn has_match(&self) -> bool {
|
|
|
|
match *self.0 {
|
|
|
|
None => false,
|
|
|
|
Some(ref matched) => matched.load(Ordering::SeqCst),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Sets whether a match has occurred or not.
|
|
|
|
///
|
|
|
|
/// If quiet mode is disabled, then this is a no-op.
|
|
|
|
pub fn set_match(&self, yes: bool) -> bool {
|
|
|
|
match *self.0 {
|
|
|
|
None => false,
|
|
|
|
Some(_) if !yes => false,
|
|
|
|
Some(ref m) => { m.store(true, Ordering::SeqCst); true }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2017-01-15 23:32:30 +02:00
|
|
|
|
|
|
|
/// Returns true if and only if stdin is deemed searchable.
|
|
|
|
#[cfg(unix)]
|
|
|
|
fn stdin_is_readable() -> bool {
|
|
|
|
use std::os::unix::fs::FileTypeExt;
|
|
|
|
use same_file::Handle;
|
|
|
|
|
|
|
|
let ft = match Handle::stdin().and_then(|h| h.as_file().metadata()) {
|
|
|
|
Err(_) => return false,
|
|
|
|
Ok(md) => md.file_type(),
|
|
|
|
};
|
|
|
|
ft.is_file() || ft.is_fifo()
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns true if and only if stdin is deemed searchable.
|
|
|
|
#[cfg(windows)]
|
|
|
|
fn stdin_is_readable() -> bool {
|
|
|
|
// On Windows, it's not clear what the possibilities are to me, so just
|
|
|
|
// always return true.
|
|
|
|
true
|
|
|
|
}
|