mirror of https://github.com/BurntSushi/ripgrep.git synced 2024-12-12 19:18:24 +02:00
2016-11-20 15:01:37 -05:00

752 lines
25 KiB

use std::cmp;
use std::env;
use std::ffi::OsStr;
use std::fs;
use std::io::{self, BufRead};
use std::ops;
use std::path::{Path, PathBuf};
use std::process;
use clap;
use env_logger;
use grep::{Grep, GrepBuilder};
use log;
use num_cpus;
use regex;
use termcolor;
use app;
use atty;
use ignore::overrides::{Override, OverrideBuilder};
use ignore::types::{FileTypeDef, Types, TypesBuilder};
use ignore;
use printer::{ColorSpecs, Printer};
use unescape::unescape;
use worker::{Worker, WorkerBuilder};
use {Result, version};
/// Args are transformed/normalized from ArgMatches.
pub struct Args {
paths: Vec<PathBuf>,
after_context: usize,
before_context: usize,
color: bool,
color_choice: termcolor::ColorChoice,
colors: ColorSpecs,
column: bool,
context_separator: Vec<u8>,
count: bool,
files_with_matches: bool,
files_without_matches: bool,
eol: u8,
files: bool,
follow: bool,
glob_overrides: Override,
grep: Grep,
heading: bool,
hidden: bool,
ignore_files: Vec<PathBuf>,
invert_match: bool,
line_number: bool,
line_per_match: bool,
max_count: Option<u64>,
maxdepth: Option<usize>,
mmap: bool,
no_ignore: bool,
no_ignore_parent: bool,
no_ignore_vcs: bool,
no_messages: bool,
null: bool,
quiet: bool,
replace: Option<Vec<u8>>,
text: bool,
threads: usize,
type_list: bool,
types: Types,
with_filename: bool,
impl Args {
/// Parse the command line arguments for this process.
/// If a CLI usage error occurred, then exit the process and print a usage
/// or error message. Similarly, if the user requested the version of
/// ripgrep, then print the version and exit.
/// Also, initialize a global logger.
pub fn parse() -> Result<Args> {
let matches = app::app_short().get_matches();
if matches.is_present("help-short") {
let _ = ::app::app_short().print_help();
let _ = println!("");
if matches.is_present("help") {
let _ = ::app::app_long().print_help();
let _ = println!("");
if matches.is_present("version") {
println!("ripgrep {}", crate_version!());
let mut logb = env_logger::LogBuilder::new();
if matches.is_present("debug") {
logb.filter(None, log::LogLevelFilter::Debug);
} else {
logb.filter(None, log::LogLevelFilter::Warn);
if let Err(err) = logb.init() {
errored!("failed to initialize logger: {}", err);
/// Returns true if ripgrep should print the files it will search and exit
/// (but not do any actual searching).
pub fn files(&self) -> bool {
/// Create a new line based matcher. The matcher returned can be used
/// across multiple threads simultaneously. This matcher only supports
/// basic searching of regular expressions in a single buffer.
/// The pattern and other flags are taken from the command line.
pub fn grep(&self) -> Grep {
/// Whether ripgrep should be quiet or not.
pub fn quiet(&self) -> bool {
/// Create a new printer of individual search results that writes to the
/// writer given.
pub fn printer<W: termcolor::WriteColor>(&self, wtr: W) -> Printer<W> {
let mut p = Printer::new(wtr)
if let Some(ref rep) = self.replace {
p = p.replace(rep.clone());
/// Retrieve the configured file separator.
pub fn file_separator(&self) -> Option<Vec<u8>> {
if self.heading && !self.count && !self.files_with_matches && !self.files_without_matches {
} else if self.before_context > 0 || self.after_context > 0 {
} else {
/// Returns true if the given arguments are known to never produce a match.
pub fn never_match(&self) -> bool {
self.max_count == Some(0)
/// Create a new writer for single-threaded searching with color support.
pub fn stdout(&self) -> termcolor::Stdout {
/// Create a new buffer writer for multi-threaded searching with color
/// support.
pub fn buffer_writer(&self) -> termcolor::BufferWriter {
let mut wtr = termcolor::BufferWriter::stdout(self.color_choice);
/// Return the paths that should be searched.
pub fn paths(&self) -> &[PathBuf] {
/// Returns true if there is exactly one file path given to search.
pub fn is_one_path(&self) -> bool {
self.paths.len() == 1
&& (self.paths[0] == Path::new("-") || self.paths[0].is_file())
/// Create a worker whose configuration is taken from the
/// command line.
pub fn worker(&self) -> Worker {
/// Returns the number of worker search threads that should be used.
pub fn threads(&self) -> usize {
/// Returns a list of type definitions currently loaded.
pub fn type_defs(&self) -> &[FileTypeDef] {
/// Returns true if ripgrep should print the type definitions currently
/// loaded and then exit.
pub fn type_list(&self) -> bool {
/// Returns true if error messages should be suppressed.
pub fn no_messages(&self) -> bool {
/// Create a new recursive directory iterator over the paths in argv.
pub fn walker(&self) -> ignore::Walk {
/// Create a new parallel recursive directory iterator over the paths
/// in argv.
pub fn walker_parallel(&self) -> ignore::WalkParallel {
fn walker_builder(&self) -> ignore::WalkBuilder {
let paths = self.paths();
let mut wd = ignore::WalkBuilder::new(&paths[0]);
for path in &paths[1..] {
for path in &self.ignore_files {
if let Some(err) = wd.add_ignore(path) {
if !self.no_messages {
eprintln!("{}", err);
wd.git_global(!self.no_ignore && !self.no_ignore_vcs);
wd.git_ignore(!self.no_ignore && !self.no_ignore_vcs);
wd.git_exclude(!self.no_ignore && !self.no_ignore_vcs);
/// ArgMatches wraps clap::ArgMatches and provides semantic meaning to several
/// options/flags.
struct ArgMatches<'a>(clap::ArgMatches<'a>);
impl<'a> ops::Deref for ArgMatches<'a> {
type Target = clap::ArgMatches<'a>;
fn deref(&self) -> &clap::ArgMatches<'a> { &self.0 }
impl<'a> ArgMatches<'a> {
/// Convert the result of parsing CLI arguments into ripgrep's
/// configuration.
fn to_args(&self) -> Result<Args> {
let paths = self.paths();
let mmap = try!(self.mmap(&paths));
let with_filename = self.with_filename(&paths);
let (before_context, after_context) = try!(self.contexts());
let args = Args {
paths: paths,
after_context: after_context,
before_context: before_context,
color: self.color(),
color_choice: self.color_choice(),
colors: try!(self.color_specs()),
column: self.column(),
context_separator: self.context_separator(),
count: self.is_present("count"),
files_with_matches: self.is_present("files-with-matches"),
files_without_matches: self.is_present("files-without-match"),
eol: b'\n',
files: self.is_present("files"),
follow: self.is_present("follow"),
glob_overrides: try!(self.overrides()),
grep: try!(self.grep()),
heading: self.heading(),
hidden: self.hidden(),
ignore_files: self.ignore_files(),
invert_match: self.is_present("invert-match"),
line_number: self.line_number(),
line_per_match: self.is_present("vimgrep"),
max_count: try!(self.usize_of("max-count")).map(|max| max as u64),
maxdepth: try!(self.usize_of("maxdepth")),
mmap: mmap,
no_ignore: self.no_ignore(),
no_ignore_parent: self.no_ignore_parent(),
no_ignore_vcs: self.no_ignore_vcs(),
no_messages: self.is_present("no-messages"),
null: self.is_present("null"),
quiet: self.is_present("quiet"),
replace: self.replace(),
text: self.text(),
threads: try!(self.threads()),
type_list: self.is_present("type-list"),
types: try!(self.types()),
with_filename: with_filename,
if args.mmap {
debug!("will try to use memory maps");
/// Return all file paths that ripgrep should search.
fn paths(&self) -> Vec<PathBuf> {
let mut paths: Vec<PathBuf> = match self.values_of_os("path") {
None => vec![],
Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(),
// If --file, --files or --regexp is given, then the first path is
// always in `pattern`.
if self.is_present("file")
|| self.is_present("files")
|| self.is_present("regexp") {
if let Some(path) = self.value_of_os("pattern") {
paths.insert(0, Path::new(path).to_path_buf());
if paths.is_empty() {
/// Return the default path that ripgrep should search.
fn default_path(&self) -> PathBuf {
let file_is_stdin =
self.values_of_os("file").map_or(false, |mut files| {
files.any(|f| f == "-")
let search_cwd = atty::on_stdin()
|| !atty::stdin_is_readable()
|| (self.is_present("file") && file_is_stdin)
|| self.is_present("files")
|| self.is_present("type-list");
if search_cwd {
} else {
/// Return all of the ignore files given on the command line.
fn ignore_files(&self) -> Vec<PathBuf> {
match self.values_of_os("ignore-file") {
None => return vec![],
Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(),
/// Return the pattern that should be used for searching.
/// If multiple -e/--regexp flags are given, then they are all collapsed
/// into one pattern.
/// If any part of the pattern isn't valid UTF-8, then an error is
/// returned.
fn pattern(&self) -> Result<String> {
/// Get a sequence of all available patterns from the command line.
/// This includes reading the -e/--regexp and -f/--file flags.
/// Note that if -F/--fixed-strings is set, then all patterns will be
/// escaped. Similarly, if -w/--word-regexp is set, then all patterns
/// are surrounded by `\b`.
/// If any pattern is invalid UTF-8, then an error is returned.
fn patterns(&self) -> Result<Vec<String>> {
let mut pats = vec![];
match self.values_of_os("regexp") {
None => {
if self.values_of_os("file").is_none() {
if let Some(os_pat) = self.value_of_os("pattern") {
Some(os_pats) => {
for os_pat in os_pats {
if let Some(files) = self.values_of_os("file") {
for file in files {
if file == "-" {
let stdin = io::stdin();
for line in stdin.lock().lines() {
} else {
let f = try!(fs::File::open(file));
for line in io::BufReader::new(f).lines() {
if pats.is_empty() {
/// Converts an OsStr pattern to a String pattern, including word
/// boundaries or escapes if applicable.
/// If the pattern is not valid UTF-8, then an error is returned.
fn os_str_pattern(&self, pat: &OsStr) -> Result<String> {
let s = try!(pattern_to_str(pat));
/// Converts a &str pattern to a String pattern, including word
/// boundaries or escapes if applicable.
fn str_pattern(&self, pat: &str) -> String {
let s = self.word_pattern(self.literal_pattern(pat.to_string()));
if s.is_empty() {
} else {
/// Returns the given pattern as a literal pattern if the
/// -F/--fixed-strings flag is set. Otherwise, the pattern is returned
/// unchanged.
fn literal_pattern(&self, pat: String) -> String {
if self.is_present("fixed-strings") {
} else {
/// Returns the given pattern as a word pattern if the -w/--word-regexp
/// flag is set. Otherwise, the pattern is returned unchanged.
fn word_pattern(&self, pat: String) -> String {
if self.is_present("word-regexp") {
format!(r"\b{}\b", pat)
} else {
/// Empty pattern returns a pattern that is guaranteed to produce an empty
/// regular expression that is valid in any position.
fn empty_pattern(&self) -> String {
// This would normally just be an empty string, which works on its
// own, but if the patterns are joined in a set of alternations, then
// you wind up with `foo|`, which is invalid.
/// Returns true if and only if file names containing each match should
/// be emitted.
/// `paths` should be a slice of all top-level file paths that ripgrep
/// will need to search.
fn with_filename(&self, paths: &[PathBuf]) -> bool {
if self.is_present("no-filename") {
} else {
|| paths.len() > 1
|| paths.get(0).map_or(false, |p| p.is_dir())
/// Returns true if and only if memory map searching should be tried.
/// `paths` should be a slice of all top-level file paths that ripgrep
/// will need to search.
fn mmap(&self, paths: &[PathBuf]) -> Result<bool> {
let (before, after) = try!(self.contexts());
Ok(if before > 0 || after > 0 || self.is_present("no-mmap") {
} else if self.is_present("mmap") {
} else if cfg!(windows) {
// On Windows, memory maps appear faster than read calls. Neat.
} else if cfg!(target_os = "macos") {
// On Mac, memory maps appear to suck. Neat.
} else {
// If we're only searching a few paths and all of them are
// files, then memory maps are probably faster.
paths.len() <= 10 && paths.iter().all(|p| p.is_file())
/// Returns true if and only if line numbers should be shown.
fn line_number(&self) -> bool {
if self.is_present("no-line-number") || self.is_present("count") {
} else {
|| atty::on_stdout()
|| self.is_present("pretty")
|| self.is_present("vimgrep")
/// Returns true if and only if column numbers should be shown.
fn column(&self) -> bool {
self.is_present("column") || self.is_present("vimgrep")
/// Returns true if and only if matches should be grouped with file name
/// headings.
fn heading(&self) -> bool {
if self.is_present("no-heading") {
} else {
|| atty::on_stdout()
|| self.is_present("pretty")
/// Returns the replacement string as UTF-8 bytes if it exists.
fn replace(&self) -> Option<Vec<u8>> {
self.value_of_lossy("replace").map(|s| s.into_owned().into_bytes())
/// Returns the unescaped context separator in UTF-8 bytes.
fn context_separator(&self) -> Vec<u8> {
match self.value_of_lossy("context-separator") {
None => b"--".to_vec(),
Some(sep) => unescape(&sep),
/// Returns the before and after contexts from the command line.
/// If a context setting was absent, then `0` is returned.
/// If there was a problem parsing the values from the user as an integer,
/// then an error is returned.
fn contexts(&self) -> Result<(usize, usize)> {
let after = try!(self.usize_of("after-context")).unwrap_or(0);
let before = try!(self.usize_of("before-context")).unwrap_or(0);
let both = try!(self.usize_of("context")).unwrap_or(0);
Ok(if both > 0 {
(both, both)
} else {
(before, after)
/// Returns true if and only if ripgrep should color its output.
fn color(&self) -> bool {
let preference = match self.0.value_of_lossy("color") {
None => "auto".to_string(),
Some(v) => v.into_owned(),
if preference == "always" {
} else if self.is_present("vimgrep") {
} else if preference == "auto" {
atty::on_stdout() || self.is_present("pretty")
} else {
/// Returns the user's color choice based on command line parameters and
/// environment.
fn color_choice(&self) -> termcolor::ColorChoice {
let preference = match self.0.value_of_lossy("color") {
None => "auto".to_string(),
Some(v) => v.into_owned(),
if preference == "always" {
} else if preference == "ansi" {
} else if self.is_present("vimgrep") {
} else if preference == "auto" {
if atty::on_stdout() || self.is_present("pretty") {
} else {
} else {
/// Returns the color specifications given by the user on the CLI.
/// If the was a problem parsing any of the provided specs, then an error
/// is returned.
fn color_specs(&self) -> Result<ColorSpecs> {
// Start with a default set of color specs.
let mut specs = vec![
for spec_str in self.values_of_lossy_vec("colors") {
/// Returns the approximate number of threads that ripgrep should use.
fn threads(&self) -> Result<usize> {
let threads = try!(self.usize_of("threads")).unwrap_or(0);
Ok(if threads == 0 {
cmp::min(12, num_cpus::get())
} else {
/// Builds a grep matcher from the command line flags.
/// If there was a problem extracting the pattern from the command line
/// flags, then an error is returned.
fn grep(&self) -> Result<Grep> {
let smart =
&& !self.is_present("ignore-case")
&& !self.is_present("case-sensitive");
let casei =
&& !self.is_present("case-sensitive");
/// Builds the set of glob overrides from the command line flags.
fn overrides(&self) -> Result<Override> {
let mut ovr = OverrideBuilder::new(try!(env::current_dir()));
for glob in self.values_of_lossy_vec("glob") {
/// Builds a file type matcher from the command line flags.
fn types(&self) -> Result<Types> {
let mut btypes = TypesBuilder::new();
for ty in self.values_of_lossy_vec("type-clear") {
for def in self.values_of_lossy_vec("type-add") {
for ty in self.values_of_lossy_vec("type") {
for ty in self.values_of_lossy_vec("type-not") {
/// Returns true if ignore files should be ignored.
fn no_ignore(&self) -> bool {
|| self.occurrences_of("unrestricted") >= 1
/// Returns true if parent ignore files should be ignored.
fn no_ignore_parent(&self) -> bool {
self.is_present("no-ignore-parent") || self.no_ignore()
/// Returns true if VCS ignore files should be ignored.
fn no_ignore_vcs(&self) -> bool {
self.is_present("no-ignore-vcs") || self.no_ignore()
/// Returns true if and only if hidden files/directories should be
/// searched.
fn hidden(&self) -> bool {
self.is_present("hidden") || self.occurrences_of("unrestricted") >= 2
/// Returns true if and only if all files should be treated as if they
/// were text, even if ripgrep would detect it as a binary file.
fn text(&self) -> bool {
self.is_present("text") || self.occurrences_of("unrestricted") >= 3
/// Like values_of_lossy, but returns an empty vec if the flag is not
/// present.
fn values_of_lossy_vec(&self, name: &str) -> Vec<String> {
/// Safely reads an arg value with the given name, and if it's present,
/// tries to parse it as a usize value.
fn usize_of(&self, name: &str) -> Result<Option<usize>> {
match self.value_of_lossy(name) {
None => Ok(None),
Some(v) => v.parse().map(Some).map_err(From::from),
fn pattern_to_str(s: &OsStr) -> Result<&str> {
match s.to_str() {
Some(s) => Ok(s),
None => Err(From::from(format!(
"Argument '{}' is not valid UTF-8. \
Use hex escape sequences to match arbitrary \
bytes in a pattern (e.g., \\xFF).",