2016-09-05 06:52:23 +02:00
|
|
|
use std::cmp;
|
|
|
|
use std::env;
|
|
|
|
use std::io;
|
|
|
|
use std::path::{Path, PathBuf};
|
|
|
|
|
|
|
|
use docopt::Docopt;
|
|
|
|
use env_logger;
|
|
|
|
use grep::{Grep, GrepBuilder};
|
|
|
|
use log;
|
|
|
|
use num_cpus;
|
|
|
|
use regex;
|
2016-09-17 18:54:44 +02:00
|
|
|
use term::Terminal;
|
|
|
|
#[cfg(not(windows))]
|
|
|
|
use term;
|
2016-09-14 03:11:46 +02:00
|
|
|
#[cfg(windows)]
|
|
|
|
use term::WinConsole;
|
2016-09-05 06:52:23 +02:00
|
|
|
use walkdir::WalkDir;
|
|
|
|
|
2016-09-10 06:05:20 +02:00
|
|
|
use atty;
|
2016-09-05 06:52:23 +02:00
|
|
|
use gitignore::{Gitignore, GitignoreBuilder};
|
|
|
|
use ignore::Ignore;
|
2016-09-14 03:11:46 +02:00
|
|
|
use out::{Out, ColoredTerminal};
|
2016-09-05 06:52:23 +02:00
|
|
|
use printer::Printer;
|
2016-09-07 03:47:33 +02:00
|
|
|
use search_buffer::BufferSearcher;
|
2016-09-10 06:08:42 +02:00
|
|
|
use search_stream::{InputBuffer, Searcher};
|
2016-09-14 03:11:46 +02:00
|
|
|
#[cfg(windows)]
|
|
|
|
use terminal_win::WindowsBuffer;
|
2016-09-05 06:52:23 +02:00
|
|
|
use types::{FileTypeDef, Types, TypesBuilder};
|
|
|
|
use walk;
|
|
|
|
|
|
|
|
use Result;
|
|
|
|
|
|
|
|
/// The Docopt usage string.
|
|
|
|
///
|
|
|
|
/// If you've never heard of Docopt before, see: http://docopt.org
|
|
|
|
/// (TL;DR: The CLI parser is generated from the usage string below.)
|
|
|
|
const USAGE: &'static str = "
|
2016-09-17 22:55:58 +02:00
|
|
|
Usage: rg [options] -e PATTERN ... [<path> ...]
|
|
|
|
rg [options] <pattern> [<path> ...]
|
2016-09-08 22:15:44 +02:00
|
|
|
rg [options] --files [<path> ...]
|
|
|
|
rg [options] --type-list
|
2016-09-25 03:13:24 +02:00
|
|
|
rg [options] --help
|
|
|
|
rg [options] --version
|
2016-09-05 06:52:23 +02:00
|
|
|
|
2016-09-25 01:26:28 +02:00
|
|
|
rg recursively searches your current directory for a regex pattern.
|
2016-09-05 06:52:23 +02:00
|
|
|
|
|
|
|
Common options:
|
|
|
|
-a, --text Search binary files as if they were text.
|
|
|
|
-c, --count Only show count of line matches for each file.
|
2016-09-05 23:36:41 +02:00
|
|
|
--color WHEN Whether to use coloring in match.
|
|
|
|
Valid values are never, always or auto.
|
|
|
|
[default: auto]
|
2016-09-17 22:55:58 +02:00
|
|
|
-e, --regexp PATTERN ... Use PATTERN to search. This option can be
|
|
|
|
provided multiple times, where all patterns
|
|
|
|
given are searched.
|
|
|
|
-F, --fixed-strings Treat the pattern as a literal string instead of
|
|
|
|
a regular expression.
|
2016-09-05 06:52:23 +02:00
|
|
|
-g, --glob GLOB ... Include or exclude files for searching that
|
|
|
|
match the given glob. This always overrides any
|
|
|
|
other ignore logic. Multiple glob flags may be
|
|
|
|
used. Globbing rules match .gitignore globs.
|
|
|
|
Precede a glob with a '!' to exclude it.
|
|
|
|
-h, --help Show this usage message.
|
|
|
|
-i, --ignore-case Case insensitive search.
|
2016-09-05 23:36:41 +02:00
|
|
|
-n, --line-number Show line numbers (1-based). This is enabled
|
|
|
|
by default at a tty.
|
|
|
|
-N, --no-line-number Suppress line numbers.
|
2016-09-25 21:01:27 +02:00
|
|
|
-q, --quiet Do not print anything to stdout. If a match is
|
|
|
|
found in a file, stop searching that file.
|
2016-09-05 23:36:41 +02:00
|
|
|
-r, --replace ARG Replace every match with the string given.
|
|
|
|
Capture group indices (e.g., $5) and names
|
|
|
|
(e.g., $foo) are supported.
|
2016-09-05 06:52:23 +02:00
|
|
|
-t, --type TYPE ... Only search files matching TYPE. Multiple type
|
|
|
|
flags may be provided. Use the --type-list flag
|
|
|
|
to list all available types.
|
|
|
|
-T, --type-not TYPE ... Do not search files matching TYPE. Multiple
|
|
|
|
not-type flags may be provided.
|
2016-09-21 02:24:03 +02:00
|
|
|
-u, --unrestricted ... Reduce the level of 'smart' searching. A
|
|
|
|
single -u doesn't respect .gitignore (etc.)
|
|
|
|
files. Two -u flags will search hidden files
|
|
|
|
and directories. Three -u flags will search
|
|
|
|
binary files. -uu is equivalent to grep -r,
|
|
|
|
and -uuu is equivalent to grep -a -r.
|
2016-09-05 06:52:23 +02:00
|
|
|
-v, --invert-match Invert matching.
|
|
|
|
-w, --word-regexp Only show matches surrounded by word boundaries.
|
|
|
|
This is equivalent to putting \\b before and
|
|
|
|
after the search pattern.
|
|
|
|
|
|
|
|
Less common options:
|
|
|
|
-A, --after-context NUM
|
|
|
|
Show NUM lines after each match.
|
|
|
|
|
|
|
|
-B, --before-context NUM
|
|
|
|
Show NUM lines before each match.
|
|
|
|
|
|
|
|
-C, --context NUM
|
|
|
|
Show NUM lines before and after each match.
|
|
|
|
|
2016-09-07 01:50:27 +02:00
|
|
|
--column
|
|
|
|
Show column numbers (1 based) in output. This only shows the column
|
|
|
|
numbers for the first match on each line. Note that this doesn't try
|
|
|
|
to account for Unicode. One byte is equal to one column.
|
|
|
|
|
2016-09-05 06:52:23 +02:00
|
|
|
--context-separator ARG
|
|
|
|
The string to use when separating non-continuous context lines. Escape
|
|
|
|
sequences may be used. [default: --]
|
|
|
|
|
|
|
|
--debug
|
|
|
|
Show debug messages.
|
|
|
|
|
|
|
|
--files
|
|
|
|
Print each file that would be searched (but don't search).
|
|
|
|
|
2016-09-26 00:32:41 +02:00
|
|
|
-l, --files-with-matches
|
|
|
|
Only show path of each file with matches.
|
|
|
|
|
2016-09-05 06:52:23 +02:00
|
|
|
-H, --with-filename
|
|
|
|
Prefix each match with the file name that contains it. This is the
|
|
|
|
default when more than one file is searched.
|
|
|
|
|
2016-09-25 01:23:19 +02:00
|
|
|
--no-filename
|
|
|
|
Never show the filename for a match. This is the default when
|
|
|
|
one file is searched.
|
|
|
|
|
2016-09-05 23:36:41 +02:00
|
|
|
--heading
|
|
|
|
Show the file name above clusters of matches from each file.
|
|
|
|
This is the default mode at a tty.
|
|
|
|
|
|
|
|
--no-heading
|
|
|
|
Don't show any file name heading.
|
|
|
|
|
2016-09-05 06:52:23 +02:00
|
|
|
--hidden
|
2016-09-10 06:05:20 +02:00
|
|
|
Search hidden directories and files. (Hidden directories and files are
|
|
|
|
skipped by default.)
|
2016-09-05 06:52:23 +02:00
|
|
|
|
|
|
|
-L, --follow
|
|
|
|
Follow symlinks.
|
|
|
|
|
2016-09-07 03:47:33 +02:00
|
|
|
--mmap
|
|
|
|
Search using memory maps when possible. This is enabled by default
|
2016-09-08 22:15:44 +02:00
|
|
|
when ripgrep thinks it will be faster. (Note that mmap searching
|
2016-09-22 02:47:40 +02:00
|
|
|
doesn't currently support the various context related options.)
|
2016-09-07 03:47:33 +02:00
|
|
|
|
|
|
|
--no-mmap
|
|
|
|
Never use memory maps, even when they might be faster.
|
|
|
|
|
2016-09-05 06:52:23 +02:00
|
|
|
--no-ignore
|
2016-09-24 04:44:17 +02:00
|
|
|
Don't respect ignore files (.gitignore, .ignore, etc.)
|
2016-09-14 20:33:37 +02:00
|
|
|
This implies --no-ignore-parent.
|
2016-09-05 06:52:23 +02:00
|
|
|
|
2016-09-05 23:36:41 +02:00
|
|
|
--no-ignore-parent
|
|
|
|
Don't respect ignore files in parent directories.
|
|
|
|
|
2016-09-25 03:31:24 +02:00
|
|
|
--no-ignore-vcs
|
|
|
|
Don't respect version control ignore files (e.g., .gitignore).
|
|
|
|
Note that .ignore files will continue to be respected.
|
|
|
|
|
2016-09-05 23:36:41 +02:00
|
|
|
-p, --pretty
|
|
|
|
Alias for --color=always --heading -n.
|
|
|
|
|
2016-09-25 03:51:04 +02:00
|
|
|
-S, --smart-case
|
|
|
|
Search case insensitively if the pattern is all lowercase.
|
|
|
|
Search case sensitively otherwise.
|
|
|
|
|
2016-09-05 23:36:41 +02:00
|
|
|
-j, --threads ARG
|
2016-09-05 06:52:23 +02:00
|
|
|
The number of threads to use. Defaults to the number of logical CPUs
|
|
|
|
(capped at 6). [default: 0]
|
|
|
|
|
|
|
|
--version
|
2016-09-08 22:15:44 +02:00
|
|
|
Show the version number of ripgrep and exit.
|
2016-09-05 06:52:23 +02:00
|
|
|
|
2016-09-23 03:32:38 +02:00
|
|
|
--vimgrep
|
|
|
|
Show results with every match on its own line, including line
|
|
|
|
numbers and column numbers. (With this option, a line with more
|
|
|
|
than one match of the regex will be printed more than once.)
|
|
|
|
|
2016-09-05 06:52:23 +02:00
|
|
|
File type management options:
|
|
|
|
--type-list
|
|
|
|
Show all supported file types and their associated globs.
|
|
|
|
|
|
|
|
--type-add ARG ...
|
2016-09-25 20:47:35 +02:00
|
|
|
Add a new glob for a particular file type. Only one glob can be added
|
|
|
|
at a time. Multiple type-add flags can be provided. Unless type-clear
|
|
|
|
is used, globs are added to any existing globs inside of ripgrep. Note
|
|
|
|
that this must be passed to every invocation of rg.
|
2016-09-25 00:55:48 +02:00
|
|
|
|
2016-09-25 20:36:53 +02:00
|
|
|
Example: `--type-add html:*.html`
|
2016-09-05 06:52:23 +02:00
|
|
|
|
|
|
|
--type-clear TYPE ...
|
2016-09-25 00:55:48 +02:00
|
|
|
Clear the file type globs previously defined for TYPE. This only clears
|
|
|
|
the default type definitions that are found inside of ripgrep. Note
|
|
|
|
that this must be passed to every invocation of rg.
|
2016-09-05 06:52:23 +02:00
|
|
|
";
|
|
|
|
|
|
|
|
/// RawArgs are the args as they are parsed from Docopt. They aren't used
|
2016-09-08 22:15:44 +02:00
|
|
|
/// directly by the rest of ripgrep.
|
2016-09-05 06:52:23 +02:00
|
|
|
#[derive(Debug, RustcDecodable)]
|
|
|
|
pub struct RawArgs {
|
|
|
|
arg_pattern: String,
|
|
|
|
arg_path: Vec<String>,
|
|
|
|
flag_after_context: usize,
|
|
|
|
flag_before_context: usize,
|
2016-09-05 23:36:41 +02:00
|
|
|
flag_color: String,
|
2016-09-07 01:50:27 +02:00
|
|
|
flag_column: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
flag_context: usize,
|
|
|
|
flag_context_separator: String,
|
|
|
|
flag_count: bool,
|
2016-09-24 04:06:34 +02:00
|
|
|
flag_files_with_matches: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
flag_debug: bool,
|
|
|
|
flag_files: bool,
|
|
|
|
flag_follow: bool,
|
|
|
|
flag_glob: Vec<String>,
|
2016-09-05 23:36:41 +02:00
|
|
|
flag_heading: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
flag_hidden: bool,
|
|
|
|
flag_ignore_case: bool,
|
|
|
|
flag_invert_match: bool,
|
|
|
|
flag_line_number: bool,
|
2016-09-17 22:55:58 +02:00
|
|
|
flag_fixed_strings: bool,
|
2016-09-07 03:47:33 +02:00
|
|
|
flag_mmap: bool,
|
2016-09-05 23:36:41 +02:00
|
|
|
flag_no_heading: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
flag_no_ignore: bool,
|
2016-09-05 23:36:41 +02:00
|
|
|
flag_no_ignore_parent: bool,
|
2016-09-25 03:31:24 +02:00
|
|
|
flag_no_ignore_vcs: bool,
|
2016-09-05 23:36:41 +02:00
|
|
|
flag_no_line_number: bool,
|
2016-09-07 03:47:33 +02:00
|
|
|
flag_no_mmap: bool,
|
2016-09-25 01:23:19 +02:00
|
|
|
flag_no_filename: bool,
|
2016-09-05 23:36:41 +02:00
|
|
|
flag_pretty: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
flag_quiet: bool,
|
2016-09-17 22:55:58 +02:00
|
|
|
flag_regexp: Vec<String>,
|
2016-09-05 23:36:41 +02:00
|
|
|
flag_replace: Option<String>,
|
2016-09-25 03:51:04 +02:00
|
|
|
flag_smart_case: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
flag_text: bool,
|
|
|
|
flag_threads: usize,
|
|
|
|
flag_type: Vec<String>,
|
|
|
|
flag_type_not: Vec<String>,
|
|
|
|
flag_type_list: bool,
|
|
|
|
flag_type_add: Vec<String>,
|
|
|
|
flag_type_clear: Vec<String>,
|
2016-09-21 02:24:03 +02:00
|
|
|
flag_unrestricted: u32,
|
2016-09-23 03:32:38 +02:00
|
|
|
flag_vimgrep: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
flag_with_filename: bool,
|
|
|
|
flag_word_regexp: bool,
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Args are transformed/normalized from RawArgs.
|
|
|
|
#[derive(Debug)]
|
|
|
|
pub struct Args {
|
|
|
|
pattern: String,
|
|
|
|
paths: Vec<PathBuf>,
|
|
|
|
after_context: usize,
|
|
|
|
before_context: usize,
|
2016-09-05 23:36:41 +02:00
|
|
|
color: bool,
|
2016-09-07 01:50:27 +02:00
|
|
|
column: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
context_separator: Vec<u8>,
|
|
|
|
count: bool,
|
2016-09-24 04:06:34 +02:00
|
|
|
files_with_matches: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
eol: u8,
|
|
|
|
files: bool,
|
|
|
|
follow: bool,
|
|
|
|
glob_overrides: Option<Gitignore>,
|
2016-09-07 01:33:19 +02:00
|
|
|
grep: Grep,
|
2016-09-05 23:36:41 +02:00
|
|
|
heading: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
hidden: bool,
|
|
|
|
ignore_case: bool,
|
|
|
|
invert_match: bool,
|
|
|
|
line_number: bool,
|
2016-09-23 03:32:38 +02:00
|
|
|
line_per_match: bool,
|
2016-09-07 03:47:33 +02:00
|
|
|
mmap: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
no_ignore: bool,
|
2016-09-05 23:36:41 +02:00
|
|
|
no_ignore_parent: bool,
|
2016-09-25 03:31:24 +02:00
|
|
|
no_ignore_vcs: bool,
|
2016-09-05 06:52:23 +02:00
|
|
|
quiet: bool,
|
2016-09-05 23:36:41 +02:00
|
|
|
replace: Option<Vec<u8>>,
|
2016-09-05 06:52:23 +02:00
|
|
|
text: bool,
|
|
|
|
threads: usize,
|
|
|
|
type_defs: Vec<FileTypeDef>,
|
|
|
|
type_list: bool,
|
|
|
|
types: Types,
|
|
|
|
with_filename: bool,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl RawArgs {
|
2016-09-08 22:15:44 +02:00
|
|
|
/// Convert arguments parsed into a configuration used by ripgrep.
|
2016-09-05 06:52:23 +02:00
|
|
|
fn to_args(&self) -> Result<Args> {
|
2016-09-17 22:55:58 +02:00
|
|
|
let pattern = self.pattern();
|
2016-09-05 06:52:23 +02:00
|
|
|
let paths =
|
|
|
|
if self.arg_path.is_empty() {
|
2016-09-10 06:05:20 +02:00
|
|
|
if atty::on_stdin()
|
2016-09-10 04:58:30 +02:00
|
|
|
|| self.flag_files
|
2016-09-25 17:13:16 +02:00
|
|
|
|| self.flag_type_list
|
|
|
|
|| !atty::stdin_is_readable() {
|
2016-09-05 23:36:41 +02:00
|
|
|
vec![Path::new("./").to_path_buf()]
|
|
|
|
} else {
|
|
|
|
vec![Path::new("-").to_path_buf()]
|
|
|
|
}
|
2016-09-05 06:52:23 +02:00
|
|
|
} else {
|
|
|
|
self.arg_path.iter().map(|p| {
|
|
|
|
Path::new(p).to_path_buf()
|
|
|
|
}).collect()
|
|
|
|
};
|
|
|
|
let (after_context, before_context) =
|
|
|
|
if self.flag_context > 0 {
|
|
|
|
(self.flag_context, self.flag_context)
|
|
|
|
} else {
|
|
|
|
(self.flag_after_context, self.flag_before_context)
|
|
|
|
};
|
2016-09-07 03:47:33 +02:00
|
|
|
let mmap =
|
|
|
|
if before_context > 0 || after_context > 0 || self.flag_no_mmap {
|
|
|
|
false
|
|
|
|
} else if self.flag_mmap {
|
|
|
|
true
|
2016-09-22 02:47:40 +02:00
|
|
|
} else if cfg!(windows) {
|
|
|
|
// On Windows, memory maps appear faster than read calls. Neat.
|
|
|
|
true
|
2016-09-25 02:46:20 +02:00
|
|
|
} else if cfg!(darwin) {
|
|
|
|
// On Mac, memory maps appear to suck. Neat.
|
|
|
|
false
|
2016-09-07 03:47:33 +02:00
|
|
|
} else {
|
|
|
|
// If we're only searching a few paths and all of them are
|
|
|
|
// files, then memory maps are probably faster.
|
|
|
|
paths.len() <= 10 && paths.iter().all(|p| p.is_file())
|
|
|
|
};
|
|
|
|
if mmap {
|
|
|
|
debug!("will try to use memory maps");
|
|
|
|
}
|
2016-09-05 06:52:23 +02:00
|
|
|
let glob_overrides =
|
|
|
|
if self.flag_glob.is_empty() {
|
|
|
|
None
|
|
|
|
} else {
|
|
|
|
let cwd = try!(env::current_dir());
|
|
|
|
let mut bgi = GitignoreBuilder::new(cwd);
|
|
|
|
for pat in &self.flag_glob {
|
|
|
|
try!(bgi.add("<argv>", pat));
|
|
|
|
}
|
|
|
|
Some(try!(bgi.build()))
|
|
|
|
};
|
|
|
|
let threads =
|
|
|
|
if self.flag_threads == 0 {
|
2016-09-11 06:42:39 +02:00
|
|
|
cmp::min(8, num_cpus::get())
|
2016-09-05 06:52:23 +02:00
|
|
|
} else {
|
|
|
|
self.flag_threads
|
|
|
|
};
|
2016-09-05 23:36:41 +02:00
|
|
|
let color =
|
2016-09-23 03:32:38 +02:00
|
|
|
if self.flag_vimgrep {
|
|
|
|
false
|
|
|
|
} else if self.flag_color == "auto" {
|
2016-09-10 06:05:20 +02:00
|
|
|
atty::on_stdout() || self.flag_pretty
|
2016-09-05 23:36:41 +02:00
|
|
|
} else {
|
|
|
|
self.flag_color == "always"
|
|
|
|
};
|
2016-09-10 04:58:30 +02:00
|
|
|
let eol = b'\n';
|
2016-09-25 01:23:19 +02:00
|
|
|
|
2016-09-05 06:52:23 +02:00
|
|
|
let mut with_filename = self.flag_with_filename;
|
|
|
|
if !with_filename {
|
|
|
|
with_filename = paths.len() > 1 || paths[0].is_dir();
|
|
|
|
}
|
2016-09-25 01:23:19 +02:00
|
|
|
with_filename = with_filename && !self.flag_no_filename;
|
|
|
|
|
2016-09-05 06:52:23 +02:00
|
|
|
let mut btypes = TypesBuilder::new();
|
|
|
|
btypes.add_defaults();
|
|
|
|
try!(self.add_types(&mut btypes));
|
|
|
|
let types = try!(btypes.build());
|
2016-09-07 01:33:19 +02:00
|
|
|
let grep = try!(
|
|
|
|
GrepBuilder::new(&pattern)
|
2016-09-25 03:51:04 +02:00
|
|
|
.case_smart(self.flag_smart_case)
|
2016-09-07 01:33:19 +02:00
|
|
|
.case_insensitive(self.flag_ignore_case)
|
|
|
|
.line_terminator(eol)
|
|
|
|
.build()
|
|
|
|
);
|
2016-09-21 02:24:03 +02:00
|
|
|
let no_ignore = self.flag_no_ignore || self.flag_unrestricted >= 1;
|
|
|
|
let hidden = self.flag_hidden || self.flag_unrestricted >= 2;
|
|
|
|
let text = self.flag_text || self.flag_unrestricted >= 3;
|
2016-09-05 23:36:41 +02:00
|
|
|
let mut args = Args {
|
2016-09-05 06:52:23 +02:00
|
|
|
pattern: pattern,
|
|
|
|
paths: paths,
|
|
|
|
after_context: after_context,
|
|
|
|
before_context: before_context,
|
2016-09-05 23:36:41 +02:00
|
|
|
color: color,
|
2016-09-07 01:50:27 +02:00
|
|
|
column: self.flag_column,
|
2016-09-05 06:52:23 +02:00
|
|
|
context_separator: unescape(&self.flag_context_separator),
|
|
|
|
count: self.flag_count,
|
2016-09-24 04:06:34 +02:00
|
|
|
files_with_matches: self.flag_files_with_matches,
|
2016-09-05 06:52:23 +02:00
|
|
|
eol: eol,
|
|
|
|
files: self.flag_files,
|
|
|
|
follow: self.flag_follow,
|
|
|
|
glob_overrides: glob_overrides,
|
2016-09-07 01:33:19 +02:00
|
|
|
grep: grep,
|
2016-09-05 23:36:41 +02:00
|
|
|
heading: !self.flag_no_heading && self.flag_heading,
|
2016-09-21 02:24:03 +02:00
|
|
|
hidden: hidden,
|
2016-09-05 06:52:23 +02:00
|
|
|
ignore_case: self.flag_ignore_case,
|
|
|
|
invert_match: self.flag_invert_match,
|
2016-09-05 23:36:41 +02:00
|
|
|
line_number: !self.flag_no_line_number && self.flag_line_number,
|
2016-09-23 03:32:38 +02:00
|
|
|
line_per_match: self.flag_vimgrep,
|
2016-09-07 03:47:33 +02:00
|
|
|
mmap: mmap,
|
2016-09-21 02:24:03 +02:00
|
|
|
no_ignore: no_ignore,
|
2016-09-14 20:33:37 +02:00
|
|
|
no_ignore_parent:
|
|
|
|
// --no-ignore implies --no-ignore-parent
|
2016-09-21 02:24:03 +02:00
|
|
|
self.flag_no_ignore_parent || no_ignore,
|
2016-09-25 03:31:24 +02:00
|
|
|
no_ignore_vcs:
|
|
|
|
// --no-ignore implies --no-ignore-vcs
|
|
|
|
self.flag_no_ignore_vcs || no_ignore,
|
2016-09-05 06:52:23 +02:00
|
|
|
quiet: self.flag_quiet,
|
2016-09-05 23:36:41 +02:00
|
|
|
replace: self.flag_replace.clone().map(|s| s.into_bytes()),
|
2016-09-21 02:24:03 +02:00
|
|
|
text: text,
|
2016-09-05 06:52:23 +02:00
|
|
|
threads: threads,
|
|
|
|
type_defs: btypes.definitions(),
|
|
|
|
type_list: self.flag_type_list,
|
|
|
|
types: types,
|
|
|
|
with_filename: with_filename,
|
2016-09-05 23:36:41 +02:00
|
|
|
};
|
|
|
|
// If stdout is a tty, then apply some special default options.
|
2016-09-10 06:05:20 +02:00
|
|
|
if atty::on_stdout() || self.flag_pretty {
|
2016-09-05 23:36:41 +02:00
|
|
|
if !self.flag_no_line_number && !args.count {
|
|
|
|
args.line_number = true;
|
|
|
|
}
|
|
|
|
if !self.flag_no_heading {
|
|
|
|
args.heading = true;
|
|
|
|
}
|
|
|
|
}
|
2016-09-23 03:32:38 +02:00
|
|
|
if self.flag_vimgrep {
|
|
|
|
args.column = true;
|
|
|
|
args.line_number = true;
|
|
|
|
}
|
2016-09-05 23:36:41 +02:00
|
|
|
Ok(args)
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
fn add_types(&self, types: &mut TypesBuilder) -> Result<()> {
|
|
|
|
for ty in &self.flag_type_clear {
|
|
|
|
types.clear(ty);
|
|
|
|
}
|
|
|
|
for def in &self.flag_type_add {
|
|
|
|
try!(types.add_def(def));
|
|
|
|
}
|
|
|
|
for ty in &self.flag_type {
|
|
|
|
types.select(ty);
|
|
|
|
}
|
|
|
|
for ty in &self.flag_type_not {
|
2016-09-11 19:26:53 +02:00
|
|
|
types.negate(ty);
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
Ok(())
|
|
|
|
}
|
2016-09-17 22:55:58 +02:00
|
|
|
|
|
|
|
fn pattern(&self) -> String {
|
|
|
|
if !self.flag_regexp.is_empty() {
|
|
|
|
if self.flag_fixed_strings {
|
|
|
|
self.flag_regexp.iter().cloned().map(|lit| {
|
|
|
|
self.word_pattern(regex::quote(&lit))
|
|
|
|
}).collect::<Vec<String>>().join("|")
|
|
|
|
} else {
|
|
|
|
self.flag_regexp.iter().cloned().map(|pat| {
|
|
|
|
self.word_pattern(pat)
|
|
|
|
}).collect::<Vec<String>>().join("|")
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if self.flag_fixed_strings {
|
|
|
|
self.word_pattern(regex::quote(&self.arg_pattern))
|
|
|
|
} else {
|
|
|
|
self.word_pattern(self.arg_pattern.clone())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn word_pattern(&self, s: String) -> String {
|
|
|
|
if self.flag_word_regexp {
|
|
|
|
format!(r"\b{}\b", s)
|
|
|
|
} else {
|
|
|
|
s
|
|
|
|
}
|
|
|
|
}
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Args {
|
|
|
|
/// Parse the command line arguments for this process.
|
|
|
|
///
|
|
|
|
/// If a CLI usage error occurred, then exit the process and print a usage
|
|
|
|
/// or error message. Similarly, if the user requested the version of
|
2016-09-08 22:15:44 +02:00
|
|
|
/// ripgrep, then print the version and exit.
|
2016-09-05 06:52:23 +02:00
|
|
|
///
|
|
|
|
/// Also, initialize a global logger.
|
|
|
|
pub fn parse() -> Result<Args> {
|
2016-09-11 19:27:08 +02:00
|
|
|
// Get all of the arguments, being careful to require valid UTF-8.
|
|
|
|
let mut argv = vec![];
|
2016-09-11 19:33:25 +02:00
|
|
|
for arg in env::args_os() {
|
2016-09-11 19:27:08 +02:00
|
|
|
match arg.into_string() {
|
|
|
|
Ok(s) => argv.push(s),
|
|
|
|
Err(s) => {
|
|
|
|
errored!("Argument '{}' is not valid UTF-8. \
|
|
|
|
Use hex escape sequences to match arbitrary \
|
|
|
|
bytes in a pattern (e.g., \\xFF).",
|
|
|
|
s.to_string_lossy());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2016-09-25 02:22:02 +02:00
|
|
|
let mut raw: RawArgs =
|
2016-09-05 06:52:23 +02:00
|
|
|
Docopt::new(USAGE)
|
2016-09-11 19:27:08 +02:00
|
|
|
.and_then(|d| d.argv(argv).version(Some(version())).decode())
|
2016-09-05 06:52:23 +02:00
|
|
|
.unwrap_or_else(|e| e.exit());
|
|
|
|
|
|
|
|
let mut logb = env_logger::LogBuilder::new();
|
|
|
|
if raw.flag_debug {
|
|
|
|
logb.filter(None, log::LogLevelFilter::Debug);
|
|
|
|
} else {
|
|
|
|
logb.filter(None, log::LogLevelFilter::Warn);
|
|
|
|
}
|
|
|
|
if let Err(err) = logb.init() {
|
|
|
|
errored!("failed to initialize logger: {}", err);
|
|
|
|
}
|
|
|
|
|
2016-09-25 02:22:02 +02:00
|
|
|
// *sigh*... If --files is given, then the first path ends up in
|
|
|
|
// pattern.
|
|
|
|
if raw.flag_files {
|
|
|
|
if !raw.arg_pattern.is_empty() {
|
|
|
|
raw.arg_path.insert(0, raw.arg_pattern.clone());
|
|
|
|
}
|
|
|
|
}
|
2016-09-05 06:52:23 +02:00
|
|
|
raw.to_args().map_err(From::from)
|
|
|
|
}
|
|
|
|
|
2016-09-08 22:15:44 +02:00
|
|
|
/// Returns true if ripgrep should print the files it will search and exit
|
2016-09-05 06:52:23 +02:00
|
|
|
/// (but not do any actual searching).
|
|
|
|
pub fn files(&self) -> bool {
|
|
|
|
self.files
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a new line based matcher. The matcher returned can be used
|
|
|
|
/// across multiple threads simultaneously. This matcher only supports
|
|
|
|
/// basic searching of regular expressions in a single buffer.
|
|
|
|
///
|
|
|
|
/// The pattern and other flags are taken from the command line.
|
2016-09-07 01:33:19 +02:00
|
|
|
pub fn grep(&self) -> Grep {
|
|
|
|
self.grep.clone()
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Creates a new input buffer that is used in searching.
|
|
|
|
pub fn input_buffer(&self) -> InputBuffer {
|
|
|
|
let mut inp = InputBuffer::new();
|
|
|
|
inp.eol(self.eol);
|
|
|
|
inp
|
|
|
|
}
|
|
|
|
|
2016-09-07 03:47:33 +02:00
|
|
|
/// Whether we should prefer memory maps for searching or not.
|
|
|
|
pub fn mmap(&self) -> bool {
|
|
|
|
self.mmap
|
|
|
|
}
|
|
|
|
|
2016-09-05 06:52:23 +02:00
|
|
|
/// Create a new printer of individual search results that writes to the
|
|
|
|
/// writer given.
|
2016-09-14 03:11:46 +02:00
|
|
|
pub fn printer<W: Terminal + Send>(&self, wtr: W) -> Printer<W> {
|
2016-09-09 03:46:14 +02:00
|
|
|
let mut p = Printer::new(wtr)
|
2016-09-07 01:50:27 +02:00
|
|
|
.column(self.column)
|
2016-09-05 06:52:23 +02:00
|
|
|
.context_separator(self.context_separator.clone())
|
|
|
|
.eol(self.eol)
|
2016-09-05 23:36:41 +02:00
|
|
|
.heading(self.heading)
|
2016-09-23 03:32:38 +02:00
|
|
|
.line_per_match(self.line_per_match)
|
2016-09-05 06:52:23 +02:00
|
|
|
.quiet(self.quiet)
|
2016-09-05 23:36:41 +02:00
|
|
|
.with_filename(self.with_filename);
|
|
|
|
if let Some(ref rep) = self.replace {
|
|
|
|
p = p.replace(rep.clone());
|
|
|
|
}
|
|
|
|
p
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a new printer of search results for an entire file that writes
|
|
|
|
/// to the writer given.
|
2016-09-09 03:46:14 +02:00
|
|
|
pub fn out(&self) -> Out {
|
|
|
|
let mut out = Out::new(self.color);
|
2016-09-26 03:27:17 +02:00
|
|
|
if let Some(filesep) = self.file_separator() {
|
|
|
|
out = out.file_separator(filesep);
|
|
|
|
}
|
|
|
|
out
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Retrieve the configured file separator.
|
|
|
|
pub fn file_separator(&self) -> Option<Vec<u8>> {
|
2016-09-24 04:06:34 +02:00
|
|
|
if self.heading && !self.count && !self.files_with_matches {
|
2016-09-26 03:27:17 +02:00
|
|
|
Some(b"".to_vec())
|
2016-09-05 23:36:41 +02:00
|
|
|
} else if self.before_context > 0 || self.after_context > 0 {
|
2016-09-26 03:27:17 +02:00
|
|
|
Some(self.context_separator.clone())
|
|
|
|
} else {
|
|
|
|
None
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-09-09 03:46:14 +02:00
|
|
|
/// Create a new buffer for use with searching.
|
2016-09-14 03:11:46 +02:00
|
|
|
#[cfg(not(windows))]
|
|
|
|
pub fn outbuf(&self) -> ColoredTerminal<term::TerminfoTerminal<Vec<u8>>> {
|
|
|
|
ColoredTerminal::new(vec![], self.color)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a new buffer for use with searching.
|
|
|
|
#[cfg(windows)]
|
|
|
|
pub fn outbuf(&self) -> ColoredTerminal<WindowsBuffer> {
|
|
|
|
ColoredTerminal::new_buffer(self.color)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a new buffer for use with searching.
|
|
|
|
#[cfg(not(windows))]
|
|
|
|
pub fn stdout(
|
|
|
|
&self,
|
|
|
|
) -> ColoredTerminal<term::TerminfoTerminal<io::BufWriter<io::Stdout>>> {
|
|
|
|
ColoredTerminal::new(io::BufWriter::new(io::stdout()), self.color)
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a new buffer for use with searching.
|
|
|
|
#[cfg(windows)]
|
|
|
|
pub fn stdout(&self) -> ColoredTerminal<WinConsole<io::Stdout>> {
|
|
|
|
ColoredTerminal::new_stdout(self.color)
|
2016-09-09 03:46:14 +02:00
|
|
|
}
|
|
|
|
|
2016-09-05 06:52:23 +02:00
|
|
|
/// Return the paths that should be searched.
|
|
|
|
pub fn paths(&self) -> &[PathBuf] {
|
|
|
|
&self.paths
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a new line based searcher whose configuration is taken from the
|
|
|
|
/// command line. This searcher supports a dizzying array of features:
|
|
|
|
/// inverted matching, line counting, context control and more.
|
2016-09-09 03:46:14 +02:00
|
|
|
pub fn searcher<'a, R: io::Read, W: Send + Terminal>(
|
2016-09-05 06:52:23 +02:00
|
|
|
&self,
|
|
|
|
inp: &'a mut InputBuffer,
|
|
|
|
printer: &'a mut Printer<W>,
|
|
|
|
grep: &'a Grep,
|
|
|
|
path: &'a Path,
|
|
|
|
rdr: R,
|
|
|
|
) -> Searcher<'a, R, W> {
|
|
|
|
Searcher::new(inp, printer, grep, path, rdr)
|
|
|
|
.after_context(self.after_context)
|
|
|
|
.before_context(self.before_context)
|
|
|
|
.count(self.count)
|
2016-09-24 04:06:34 +02:00
|
|
|
.files_with_matches(self.files_with_matches)
|
2016-09-05 06:52:23 +02:00
|
|
|
.eol(self.eol)
|
|
|
|
.line_number(self.line_number)
|
|
|
|
.invert_match(self.invert_match)
|
|
|
|
.text(self.text)
|
|
|
|
}
|
|
|
|
|
2016-09-07 03:47:33 +02:00
|
|
|
/// Create a new line based searcher whose configuration is taken from the
|
|
|
|
/// command line. This search operates on an entire file all once (which
|
|
|
|
/// may have been memory mapped).
|
2016-09-09 03:46:14 +02:00
|
|
|
pub fn searcher_buffer<'a, W: Send + Terminal>(
|
2016-09-07 03:47:33 +02:00
|
|
|
&self,
|
|
|
|
printer: &'a mut Printer<W>,
|
|
|
|
grep: &'a Grep,
|
|
|
|
path: &'a Path,
|
|
|
|
buf: &'a [u8],
|
|
|
|
) -> BufferSearcher<'a, W> {
|
|
|
|
BufferSearcher::new(printer, grep, path, buf)
|
|
|
|
.count(self.count)
|
2016-09-24 04:06:34 +02:00
|
|
|
.files_with_matches(self.files_with_matches)
|
2016-09-07 03:47:33 +02:00
|
|
|
.eol(self.eol)
|
|
|
|
.line_number(self.line_number)
|
|
|
|
.invert_match(self.invert_match)
|
|
|
|
.text(self.text)
|
|
|
|
}
|
|
|
|
|
2016-09-05 06:52:23 +02:00
|
|
|
/// Returns the number of worker search threads that should be used.
|
|
|
|
pub fn threads(&self) -> usize {
|
|
|
|
self.threads
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Returns a list of type definitions currently loaded.
|
|
|
|
pub fn type_defs(&self) -> &[FileTypeDef] {
|
|
|
|
&self.type_defs
|
|
|
|
}
|
|
|
|
|
2016-09-08 22:15:44 +02:00
|
|
|
/// Returns true if ripgrep should print the type definitions currently
|
|
|
|
/// loaded and then exit.
|
2016-09-05 06:52:23 +02:00
|
|
|
pub fn type_list(&self) -> bool {
|
|
|
|
self.type_list
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Create a new recursive directory iterator at the path given.
|
2016-09-05 23:36:41 +02:00
|
|
|
pub fn walker(&self, path: &Path) -> Result<walk::Iter> {
|
2016-09-05 06:52:23 +02:00
|
|
|
let wd = WalkDir::new(path).follow_links(self.follow);
|
|
|
|
let mut ig = Ignore::new();
|
2016-09-27 00:43:15 +02:00
|
|
|
// Only register ignore rules if this is a directory. If it's a file,
|
|
|
|
// then it was explicitly given by the end user, so we always search
|
|
|
|
// it.
|
|
|
|
if path.is_dir() {
|
|
|
|
ig.ignore_hidden(!self.hidden);
|
|
|
|
ig.no_ignore(self.no_ignore);
|
|
|
|
ig.no_ignore_vcs(self.no_ignore_vcs);
|
|
|
|
ig.add_types(self.types.clone());
|
|
|
|
if !self.no_ignore_parent {
|
|
|
|
try!(ig.push_parents(path));
|
|
|
|
}
|
|
|
|
if let Some(ref overrides) = self.glob_overrides {
|
|
|
|
ig.add_override(overrides.clone());
|
|
|
|
}
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
2016-09-05 23:36:41 +02:00
|
|
|
Ok(walk::Iter::new(ig, wd))
|
2016-09-05 06:52:23 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn version() -> String {
|
|
|
|
let (maj, min, pat) = (
|
|
|
|
option_env!("CARGO_PKG_VERSION_MAJOR"),
|
|
|
|
option_env!("CARGO_PKG_VERSION_MINOR"),
|
|
|
|
option_env!("CARGO_PKG_VERSION_PATCH"),
|
|
|
|
);
|
|
|
|
match (maj, min, pat) {
|
|
|
|
(Some(maj), Some(min), Some(pat)) =>
|
|
|
|
format!("{}.{}.{}", maj, min, pat),
|
|
|
|
_ => "".to_owned(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// A single state in the state machine used by `unescape`.
|
|
|
|
#[derive(Clone, Copy, Eq, PartialEq)]
|
|
|
|
enum State {
|
|
|
|
Escape,
|
|
|
|
HexFirst,
|
|
|
|
HexSecond(char),
|
|
|
|
Literal,
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Unescapes a string given on the command line. It supports a limit set of
|
|
|
|
/// escape sequences:
|
|
|
|
///
|
|
|
|
/// * \t, \r and \n are mapped to their corresponding ASCII bytes.
|
|
|
|
/// * \xZZ hexadecimal escapes are mapped to their byte.
|
|
|
|
fn unescape(s: &str) -> Vec<u8> {
|
|
|
|
use self::State::*;
|
|
|
|
|
|
|
|
let mut bytes = vec![];
|
|
|
|
let mut state = Literal;
|
|
|
|
for c in s.chars() {
|
|
|
|
match state {
|
|
|
|
Escape => {
|
|
|
|
match c {
|
|
|
|
'n' => { bytes.push(b'\n'); state = Literal; }
|
|
|
|
'r' => { bytes.push(b'\r'); state = Literal; }
|
|
|
|
't' => { bytes.push(b'\t'); state = Literal; }
|
|
|
|
'x' => { state = HexFirst; }
|
|
|
|
c => {
|
|
|
|
bytes.extend(&format!(r"\{}", c).into_bytes());
|
|
|
|
state = Literal;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
HexFirst => {
|
|
|
|
match c {
|
|
|
|
'0'...'9' | 'A'...'F' | 'a'...'f' => {
|
|
|
|
state = HexSecond(c);
|
|
|
|
}
|
|
|
|
c => {
|
|
|
|
bytes.extend(&format!(r"\x{}", c).into_bytes());
|
|
|
|
state = Literal;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
HexSecond(first) => {
|
|
|
|
match c {
|
|
|
|
'0'...'9' | 'A'...'F' | 'a'...'f' => {
|
|
|
|
let ordinal = format!("{}{}", first, c);
|
|
|
|
let byte = u8::from_str_radix(&ordinal, 16).unwrap();
|
|
|
|
bytes.push(byte);
|
|
|
|
state = Literal;
|
|
|
|
}
|
|
|
|
c => {
|
|
|
|
let original = format!(r"\x{}{}", first, c);
|
|
|
|
bytes.extend(&original.into_bytes());
|
|
|
|
state = Literal;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Literal => {
|
|
|
|
match c {
|
|
|
|
'\\' => { state = Escape; }
|
|
|
|
c => { bytes.extend(c.to_string().as_bytes()); }
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
match state {
|
|
|
|
Escape => bytes.push(b'\\'),
|
|
|
|
HexFirst => bytes.extend(b"\\x"),
|
|
|
|
HexSecond(c) => bytes.extend(&format!("\\x{}", c).into_bytes()),
|
|
|
|
Literal => {}
|
|
|
|
}
|
|
|
|
bytes
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::unescape;
|
|
|
|
|
|
|
|
fn b(bytes: &'static [u8]) -> Vec<u8> {
|
|
|
|
bytes.to_vec()
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn unescape_nul() {
|
|
|
|
assert_eq!(b(b"\x00"), unescape(r"\x00"));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn unescape_nl() {
|
|
|
|
assert_eq!(b(b"\n"), unescape(r"\n"));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn unescape_tab() {
|
|
|
|
assert_eq!(b(b"\t"), unescape(r"\t"));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn unescape_carriage() {
|
|
|
|
assert_eq!(b(b"\r"), unescape(r"\r"));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn unescape_nothing_simple() {
|
|
|
|
assert_eq!(b(b"\\a"), unescape(r"\a"));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn unescape_nothing_hex0() {
|
|
|
|
assert_eq!(b(b"\\x"), unescape(r"\x"));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn unescape_nothing_hex1() {
|
|
|
|
assert_eq!(b(b"\\xz"), unescape(r"\xz"));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn unescape_nothing_hex2() {
|
|
|
|
assert_eq!(b(b"\\xzz"), unescape(r"\xzz"));
|
|
|
|
}
|
|
|
|
}
|