1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2024-12-12 19:18:24 +02:00
ripgrep/src/args.rs
2016-11-20 15:01:37 -05:00

752 lines
25 KiB
Rust

use std::cmp;
use std::env;
use std::ffi::OsStr;
use std::fs;
use std::io::{self, BufRead};
use std::ops;
use std::path::{Path, PathBuf};
use std::process;
use clap;
use env_logger;
use grep::{Grep, GrepBuilder};
use log;
use num_cpus;
use regex;
use termcolor;
use app;
use atty;
use ignore::overrides::{Override, OverrideBuilder};
use ignore::types::{FileTypeDef, Types, TypesBuilder};
use ignore;
use printer::{ColorSpecs, Printer};
use unescape::unescape;
use worker::{Worker, WorkerBuilder};
use {Result, version};
/// Args are transformed/normalized from ArgMatches.
#[derive(Debug)]
pub struct Args {
paths: Vec<PathBuf>,
after_context: usize,
before_context: usize,
color: bool,
color_choice: termcolor::ColorChoice,
colors: ColorSpecs,
column: bool,
context_separator: Vec<u8>,
count: bool,
files_with_matches: bool,
files_without_matches: bool,
eol: u8,
files: bool,
follow: bool,
glob_overrides: Override,
grep: Grep,
heading: bool,
hidden: bool,
ignore_files: Vec<PathBuf>,
invert_match: bool,
line_number: bool,
line_per_match: bool,
max_count: Option<u64>,
maxdepth: Option<usize>,
mmap: bool,
no_ignore: bool,
no_ignore_parent: bool,
no_ignore_vcs: bool,
no_messages: bool,
null: bool,
quiet: bool,
replace: Option<Vec<u8>>,
text: bool,
threads: usize,
type_list: bool,
types: Types,
with_filename: bool,
}
impl Args {
/// Parse the command line arguments for this process.
///
/// If a CLI usage error occurred, then exit the process and print a usage
/// or error message. Similarly, if the user requested the version of
/// ripgrep, then print the version and exit.
///
/// Also, initialize a global logger.
pub fn parse() -> Result<Args> {
let matches = app::app_short().get_matches();
if matches.is_present("help-short") {
let _ = ::app::app_short().print_help();
let _ = println!("");
process::exit(0);
}
if matches.is_present("help") {
let _ = ::app::app_long().print_help();
let _ = println!("");
process::exit(0);
}
if matches.is_present("version") {
println!("ripgrep {}", crate_version!());
process::exit(0);
}
let mut logb = env_logger::LogBuilder::new();
if matches.is_present("debug") {
logb.filter(None, log::LogLevelFilter::Debug);
} else {
logb.filter(None, log::LogLevelFilter::Warn);
}
if let Err(err) = logb.init() {
errored!("failed to initialize logger: {}", err);
}
ArgMatches(matches).to_args()
}
/// Returns true if ripgrep should print the files it will search and exit
/// (but not do any actual searching).
pub fn files(&self) -> bool {
self.files
}
/// Create a new line based matcher. The matcher returned can be used
/// across multiple threads simultaneously. This matcher only supports
/// basic searching of regular expressions in a single buffer.
///
/// The pattern and other flags are taken from the command line.
pub fn grep(&self) -> Grep {
self.grep.clone()
}
/// Whether ripgrep should be quiet or not.
pub fn quiet(&self) -> bool {
self.quiet
}
/// Create a new printer of individual search results that writes to the
/// writer given.
pub fn printer<W: termcolor::WriteColor>(&self, wtr: W) -> Printer<W> {
let mut p = Printer::new(wtr)
.colors(self.colors.clone())
.column(self.column)
.context_separator(self.context_separator.clone())
.eol(self.eol)
.heading(self.heading)
.line_per_match(self.line_per_match)
.null(self.null)
.with_filename(self.with_filename);
if let Some(ref rep) = self.replace {
p = p.replace(rep.clone());
}
p
}
/// Retrieve the configured file separator.
pub fn file_separator(&self) -> Option<Vec<u8>> {
if self.heading && !self.count && !self.files_with_matches && !self.files_without_matches {
Some(b"".to_vec())
} else if self.before_context > 0 || self.after_context > 0 {
Some(self.context_separator.clone())
} else {
None
}
}
/// Returns true if the given arguments are known to never produce a match.
pub fn never_match(&self) -> bool {
self.max_count == Some(0)
}
/// Create a new writer for single-threaded searching with color support.
pub fn stdout(&self) -> termcolor::Stdout {
termcolor::Stdout::new(self.color_choice)
}
/// Create a new buffer writer for multi-threaded searching with color
/// support.
pub fn buffer_writer(&self) -> termcolor::BufferWriter {
let mut wtr = termcolor::BufferWriter::stdout(self.color_choice);
wtr.separator(self.file_separator());
wtr
}
/// Return the paths that should be searched.
pub fn paths(&self) -> &[PathBuf] {
&self.paths
}
/// Returns true if there is exactly one file path given to search.
pub fn is_one_path(&self) -> bool {
self.paths.len() == 1
&& (self.paths[0] == Path::new("-") || self.paths[0].is_file())
}
/// Create a worker whose configuration is taken from the
/// command line.
pub fn worker(&self) -> Worker {
WorkerBuilder::new(self.grep())
.after_context(self.after_context)
.before_context(self.before_context)
.count(self.count)
.files_with_matches(self.files_with_matches)
.files_without_matches(self.files_without_matches)
.eol(self.eol)
.line_number(self.line_number)
.invert_match(self.invert_match)
.max_count(self.max_count)
.mmap(self.mmap)
.no_messages(self.no_messages)
.quiet(self.quiet)
.text(self.text)
.build()
}
/// Returns the number of worker search threads that should be used.
pub fn threads(&self) -> usize {
self.threads
}
/// Returns a list of type definitions currently loaded.
pub fn type_defs(&self) -> &[FileTypeDef] {
self.types.definitions()
}
/// Returns true if ripgrep should print the type definitions currently
/// loaded and then exit.
pub fn type_list(&self) -> bool {
self.type_list
}
/// Returns true if error messages should be suppressed.
pub fn no_messages(&self) -> bool {
self.no_messages
}
/// Create a new recursive directory iterator over the paths in argv.
pub fn walker(&self) -> ignore::Walk {
self.walker_builder().build()
}
/// Create a new parallel recursive directory iterator over the paths
/// in argv.
pub fn walker_parallel(&self) -> ignore::WalkParallel {
self.walker_builder().build_parallel()
}
fn walker_builder(&self) -> ignore::WalkBuilder {
let paths = self.paths();
let mut wd = ignore::WalkBuilder::new(&paths[0]);
for path in &paths[1..] {
wd.add(path);
}
for path in &self.ignore_files {
if let Some(err) = wd.add_ignore(path) {
if !self.no_messages {
eprintln!("{}", err);
}
}
}
wd.follow_links(self.follow);
wd.hidden(!self.hidden);
wd.max_depth(self.maxdepth);
wd.overrides(self.glob_overrides.clone());
wd.types(self.types.clone());
wd.git_global(!self.no_ignore && !self.no_ignore_vcs);
wd.git_ignore(!self.no_ignore && !self.no_ignore_vcs);
wd.git_exclude(!self.no_ignore && !self.no_ignore_vcs);
wd.ignore(!self.no_ignore);
wd.parents(!self.no_ignore_parent);
wd.threads(self.threads());
wd
}
}
/// ArgMatches wraps clap::ArgMatches and provides semantic meaning to several
/// options/flags.
struct ArgMatches<'a>(clap::ArgMatches<'a>);
impl<'a> ops::Deref for ArgMatches<'a> {
type Target = clap::ArgMatches<'a>;
fn deref(&self) -> &clap::ArgMatches<'a> { &self.0 }
}
impl<'a> ArgMatches<'a> {
/// Convert the result of parsing CLI arguments into ripgrep's
/// configuration.
fn to_args(&self) -> Result<Args> {
let paths = self.paths();
let mmap = try!(self.mmap(&paths));
let with_filename = self.with_filename(&paths);
let (before_context, after_context) = try!(self.contexts());
let args = Args {
paths: paths,
after_context: after_context,
before_context: before_context,
color: self.color(),
color_choice: self.color_choice(),
colors: try!(self.color_specs()),
column: self.column(),
context_separator: self.context_separator(),
count: self.is_present("count"),
files_with_matches: self.is_present("files-with-matches"),
files_without_matches: self.is_present("files-without-match"),
eol: b'\n',
files: self.is_present("files"),
follow: self.is_present("follow"),
glob_overrides: try!(self.overrides()),
grep: try!(self.grep()),
heading: self.heading(),
hidden: self.hidden(),
ignore_files: self.ignore_files(),
invert_match: self.is_present("invert-match"),
line_number: self.line_number(),
line_per_match: self.is_present("vimgrep"),
max_count: try!(self.usize_of("max-count")).map(|max| max as u64),
maxdepth: try!(self.usize_of("maxdepth")),
mmap: mmap,
no_ignore: self.no_ignore(),
no_ignore_parent: self.no_ignore_parent(),
no_ignore_vcs: self.no_ignore_vcs(),
no_messages: self.is_present("no-messages"),
null: self.is_present("null"),
quiet: self.is_present("quiet"),
replace: self.replace(),
text: self.text(),
threads: try!(self.threads()),
type_list: self.is_present("type-list"),
types: try!(self.types()),
with_filename: with_filename,
};
if args.mmap {
debug!("will try to use memory maps");
}
Ok(args)
}
/// Return all file paths that ripgrep should search.
fn paths(&self) -> Vec<PathBuf> {
let mut paths: Vec<PathBuf> = match self.values_of_os("path") {
None => vec![],
Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(),
};
// If --file, --files or --regexp is given, then the first path is
// always in `pattern`.
if self.is_present("file")
|| self.is_present("files")
|| self.is_present("regexp") {
if let Some(path) = self.value_of_os("pattern") {
paths.insert(0, Path::new(path).to_path_buf());
}
}
if paths.is_empty() {
paths.push(self.default_path());
}
paths
}
/// Return the default path that ripgrep should search.
fn default_path(&self) -> PathBuf {
let file_is_stdin =
self.values_of_os("file").map_or(false, |mut files| {
files.any(|f| f == "-")
});
let search_cwd = atty::on_stdin()
|| !atty::stdin_is_readable()
|| (self.is_present("file") && file_is_stdin)
|| self.is_present("files")
|| self.is_present("type-list");
if search_cwd {
Path::new("./").to_path_buf()
} else {
Path::new("-").to_path_buf()
}
}
/// Return all of the ignore files given on the command line.
fn ignore_files(&self) -> Vec<PathBuf> {
match self.values_of_os("ignore-file") {
None => return vec![],
Some(vals) => vals.map(|p| Path::new(p).to_path_buf()).collect(),
}
}
/// Return the pattern that should be used for searching.
///
/// If multiple -e/--regexp flags are given, then they are all collapsed
/// into one pattern.
///
/// If any part of the pattern isn't valid UTF-8, then an error is
/// returned.
fn pattern(&self) -> Result<String> {
Ok(try!(self.patterns()).join("|"))
}
/// Get a sequence of all available patterns from the command line.
/// This includes reading the -e/--regexp and -f/--file flags.
///
/// Note that if -F/--fixed-strings is set, then all patterns will be
/// escaped. Similarly, if -w/--word-regexp is set, then all patterns
/// are surrounded by `\b`.
///
/// If any pattern is invalid UTF-8, then an error is returned.
fn patterns(&self) -> Result<Vec<String>> {
let mut pats = vec![];
match self.values_of_os("regexp") {
None => {
if self.values_of_os("file").is_none() {
if let Some(os_pat) = self.value_of_os("pattern") {
pats.push(try!(self.os_str_pattern(os_pat)));
}
}
}
Some(os_pats) => {
for os_pat in os_pats {
pats.push(try!(self.os_str_pattern(os_pat)));
}
}
}
if let Some(files) = self.values_of_os("file") {
for file in files {
if file == "-" {
let stdin = io::stdin();
for line in stdin.lock().lines() {
pats.push(self.str_pattern(&try!(line)));
}
} else {
let f = try!(fs::File::open(file));
for line in io::BufReader::new(f).lines() {
pats.push(self.str_pattern(&try!(line)));
}
}
}
}
if pats.is_empty() {
pats.push(self.empty_pattern())
}
Ok(pats)
}
/// Converts an OsStr pattern to a String pattern, including word
/// boundaries or escapes if applicable.
///
/// If the pattern is not valid UTF-8, then an error is returned.
fn os_str_pattern(&self, pat: &OsStr) -> Result<String> {
let s = try!(pattern_to_str(pat));
Ok(self.str_pattern(s))
}
/// Converts a &str pattern to a String pattern, including word
/// boundaries or escapes if applicable.
fn str_pattern(&self, pat: &str) -> String {
let s = self.word_pattern(self.literal_pattern(pat.to_string()));
if s.is_empty() {
self.empty_pattern()
} else {
s
}
}
/// Returns the given pattern as a literal pattern if the
/// -F/--fixed-strings flag is set. Otherwise, the pattern is returned
/// unchanged.
fn literal_pattern(&self, pat: String) -> String {
if self.is_present("fixed-strings") {
regex::quote(&pat)
} else {
pat
}
}
/// Returns the given pattern as a word pattern if the -w/--word-regexp
/// flag is set. Otherwise, the pattern is returned unchanged.
fn word_pattern(&self, pat: String) -> String {
if self.is_present("word-regexp") {
format!(r"\b{}\b", pat)
} else {
pat
}
}
/// Empty pattern returns a pattern that is guaranteed to produce an empty
/// regular expression that is valid in any position.
fn empty_pattern(&self) -> String {
// This would normally just be an empty string, which works on its
// own, but if the patterns are joined in a set of alternations, then
// you wind up with `foo|`, which is invalid.
self.word_pattern("z{0}".to_string())
}
/// Returns true if and only if file names containing each match should
/// be emitted.
///
/// `paths` should be a slice of all top-level file paths that ripgrep
/// will need to search.
fn with_filename(&self, paths: &[PathBuf]) -> bool {
if self.is_present("no-filename") {
false
} else {
self.is_present("with-filename")
|| paths.len() > 1
|| paths.get(0).map_or(false, |p| p.is_dir())
}
}
/// Returns true if and only if memory map searching should be tried.
///
/// `paths` should be a slice of all top-level file paths that ripgrep
/// will need to search.
fn mmap(&self, paths: &[PathBuf]) -> Result<bool> {
let (before, after) = try!(self.contexts());
Ok(if before > 0 || after > 0 || self.is_present("no-mmap") {
false
} else if self.is_present("mmap") {
true
} else if cfg!(windows) {
// On Windows, memory maps appear faster than read calls. Neat.
true
} else if cfg!(target_os = "macos") {
// On Mac, memory maps appear to suck. Neat.
false
} else {
// If we're only searching a few paths and all of them are
// files, then memory maps are probably faster.
paths.len() <= 10 && paths.iter().all(|p| p.is_file())
})
}
/// Returns true if and only if line numbers should be shown.
fn line_number(&self) -> bool {
if self.is_present("no-line-number") || self.is_present("count") {
false
} else {
self.is_present("line-number")
|| atty::on_stdout()
|| self.is_present("pretty")
|| self.is_present("vimgrep")
}
}
/// Returns true if and only if column numbers should be shown.
fn column(&self) -> bool {
self.is_present("column") || self.is_present("vimgrep")
}
/// Returns true if and only if matches should be grouped with file name
/// headings.
fn heading(&self) -> bool {
if self.is_present("no-heading") {
false
} else {
self.is_present("heading")
|| atty::on_stdout()
|| self.is_present("pretty")
}
}
/// Returns the replacement string as UTF-8 bytes if it exists.
fn replace(&self) -> Option<Vec<u8>> {
self.value_of_lossy("replace").map(|s| s.into_owned().into_bytes())
}
/// Returns the unescaped context separator in UTF-8 bytes.
fn context_separator(&self) -> Vec<u8> {
match self.value_of_lossy("context-separator") {
None => b"--".to_vec(),
Some(sep) => unescape(&sep),
}
}
/// Returns the before and after contexts from the command line.
///
/// If a context setting was absent, then `0` is returned.
///
/// If there was a problem parsing the values from the user as an integer,
/// then an error is returned.
fn contexts(&self) -> Result<(usize, usize)> {
let after = try!(self.usize_of("after-context")).unwrap_or(0);
let before = try!(self.usize_of("before-context")).unwrap_or(0);
let both = try!(self.usize_of("context")).unwrap_or(0);
Ok(if both > 0 {
(both, both)
} else {
(before, after)
})
}
/// Returns true if and only if ripgrep should color its output.
fn color(&self) -> bool {
let preference = match self.0.value_of_lossy("color") {
None => "auto".to_string(),
Some(v) => v.into_owned(),
};
if preference == "always" {
true
} else if self.is_present("vimgrep") {
false
} else if preference == "auto" {
atty::on_stdout() || self.is_present("pretty")
} else {
false
}
}
/// Returns the user's color choice based on command line parameters and
/// environment.
fn color_choice(&self) -> termcolor::ColorChoice {
let preference = match self.0.value_of_lossy("color") {
None => "auto".to_string(),
Some(v) => v.into_owned(),
};
if preference == "always" {
termcolor::ColorChoice::Always
} else if preference == "ansi" {
termcolor::ColorChoice::AlwaysAnsi
} else if self.is_present("vimgrep") {
termcolor::ColorChoice::Never
} else if preference == "auto" {
if atty::on_stdout() || self.is_present("pretty") {
termcolor::ColorChoice::Auto
} else {
termcolor::ColorChoice::Never
}
} else {
termcolor::ColorChoice::Never
}
}
/// Returns the color specifications given by the user on the CLI.
///
/// If the was a problem parsing any of the provided specs, then an error
/// is returned.
fn color_specs(&self) -> Result<ColorSpecs> {
// Start with a default set of color specs.
let mut specs = vec![
"path:fg:green".parse().unwrap(),
"path:style:bold".parse().unwrap(),
"line:fg:blue".parse().unwrap(),
"line:style:bold".parse().unwrap(),
"match:fg:red".parse().unwrap(),
"match:style:bold".parse().unwrap(),
];
for spec_str in self.values_of_lossy_vec("colors") {
specs.push(try!(spec_str.parse()));
}
Ok(ColorSpecs::new(&specs))
}
/// Returns the approximate number of threads that ripgrep should use.
fn threads(&self) -> Result<usize> {
let threads = try!(self.usize_of("threads")).unwrap_or(0);
Ok(if threads == 0 {
cmp::min(12, num_cpus::get())
} else {
threads
})
}
/// Builds a grep matcher from the command line flags.
///
/// If there was a problem extracting the pattern from the command line
/// flags, then an error is returned.
fn grep(&self) -> Result<Grep> {
let smart =
self.is_present("smart-case")
&& !self.is_present("ignore-case")
&& !self.is_present("case-sensitive");
let casei =
self.is_present("ignore-case")
&& !self.is_present("case-sensitive");
GrepBuilder::new(&try!(self.pattern()))
.case_smart(smart)
.case_insensitive(casei)
.line_terminator(b'\n')
.build()
.map_err(From::from)
}
/// Builds the set of glob overrides from the command line flags.
fn overrides(&self) -> Result<Override> {
let mut ovr = OverrideBuilder::new(try!(env::current_dir()));
for glob in self.values_of_lossy_vec("glob") {
try!(ovr.add(&glob));
}
ovr.build().map_err(From::from)
}
/// Builds a file type matcher from the command line flags.
fn types(&self) -> Result<Types> {
let mut btypes = TypesBuilder::new();
btypes.add_defaults();
for ty in self.values_of_lossy_vec("type-clear") {
btypes.clear(&ty);
}
for def in self.values_of_lossy_vec("type-add") {
try!(btypes.add_def(&def));
}
for ty in self.values_of_lossy_vec("type") {
btypes.select(&ty);
}
for ty in self.values_of_lossy_vec("type-not") {
btypes.negate(&ty);
}
btypes.build().map_err(From::from)
}
/// Returns true if ignore files should be ignored.
fn no_ignore(&self) -> bool {
self.is_present("no-ignore")
|| self.occurrences_of("unrestricted") >= 1
}
/// Returns true if parent ignore files should be ignored.
fn no_ignore_parent(&self) -> bool {
self.is_present("no-ignore-parent") || self.no_ignore()
}
/// Returns true if VCS ignore files should be ignored.
fn no_ignore_vcs(&self) -> bool {
self.is_present("no-ignore-vcs") || self.no_ignore()
}
/// Returns true if and only if hidden files/directories should be
/// searched.
fn hidden(&self) -> bool {
self.is_present("hidden") || self.occurrences_of("unrestricted") >= 2
}
/// Returns true if and only if all files should be treated as if they
/// were text, even if ripgrep would detect it as a binary file.
fn text(&self) -> bool {
self.is_present("text") || self.occurrences_of("unrestricted") >= 3
}
/// Like values_of_lossy, but returns an empty vec if the flag is not
/// present.
fn values_of_lossy_vec(&self, name: &str) -> Vec<String> {
self.values_of_lossy(name).unwrap_or(vec![])
}
/// Safely reads an arg value with the given name, and if it's present,
/// tries to parse it as a usize value.
fn usize_of(&self, name: &str) -> Result<Option<usize>> {
match self.value_of_lossy(name) {
None => Ok(None),
Some(v) => v.parse().map(Some).map_err(From::from),
}
}
}
fn pattern_to_str(s: &OsStr) -> Result<&str> {
match s.to_str() {
Some(s) => Ok(s),
None => Err(From::from(format!(
"Argument '{}' is not valid UTF-8. \
Use hex escape sequences to match arbitrary \
bytes in a pattern (e.g., \\xFF).",
s.to_string_lossy()))),
}
}