use std::cmp; use std::env; use std::ffi::OsStr; use std::fs; use std::io; use std::path::{Path, PathBuf}; use std::sync::Arc; use std::time::SystemTime; use clap; use grep::cli; use grep::matcher::LineTerminator; #[cfg(feature = "pcre2")] use grep::pcre2::{ RegexMatcher as PCRE2RegexMatcher, RegexMatcherBuilder as PCRE2RegexMatcherBuilder, }; use grep::printer::{ ColorSpecs, Stats, JSON, JSONBuilder, Standard, StandardBuilder, Summary, SummaryBuilder, SummaryKind, default_color_specs, }; use grep::regex::{ RegexMatcher as RustRegexMatcher, RegexMatcherBuilder as RustRegexMatcherBuilder, }; use grep::searcher::{ BinaryDetection, Encoding, MmapChoice, Searcher, SearcherBuilder, }; use ignore::overrides::{Override, OverrideBuilder}; use ignore::types::{FileTypeDef, Types, TypesBuilder}; use ignore::{Walk, WalkBuilder, WalkParallel}; use log; use num_cpus; use path_printer::{PathPrinter, PathPrinterBuilder}; use regex; use termcolor::{ WriteColor, BufferWriter, ColorChoice, }; use app; use config; use logger::Logger; use messages::{set_messages, set_ignore_messages}; use search::{PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder}; use subject::SubjectBuilder; use Result; /// The command that ripgrep should execute based on the command line /// configuration. #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub enum Command { /// Search using exactly one thread. Search, /// Search using possibly many threads. SearchParallel, /// The command line parameters suggest that a search should occur, but /// ripgrep knows that a match can never be found (e.g., no given patterns /// or --max-count=0). SearchNever, /// Show the files that would be searched, but don't actually search them, /// and use exactly one thread. Files, /// Show the files that would be searched, but don't actually search them, /// and perform directory traversal using possibly many threads. FilesParallel, /// List all file type definitions configured, including the default file /// types and any additional file types added to the command line. Types, } impl Command { /// Returns true if and only if this command requires executing a search. fn is_search(&self) -> bool { use self::Command::*; match *self { Search | SearchParallel => true, SearchNever | Files | FilesParallel | Types => false, } } } /// The primary configuration object used throughout ripgrep. It provides a /// high-level convenient interface to the provided command line arguments. /// /// An `Args` object is cheap to clone and can be used from multiple threads /// simultaneously. #[derive(Clone, Debug)] pub struct Args(Arc); #[derive(Clone, Debug)] struct ArgsImp { /// Mid-to-low level routines for extracting CLI arguments. matches: ArgMatches, /// The patterns provided at the command line and/or via the -f/--file /// flag. This may be empty. patterns: Vec, /// A matcher built from the patterns. /// /// It's important that this is only built once, since building this goes /// through regex compilation and various types of analyses. That is, if /// you need many of theses (one per thread, for example), it is better to /// build it once and then clone it. matcher: PatternMatcher, /// The paths provided at the command line. This is guaranteed to be /// non-empty. (If no paths are provided, then a default path is created.) paths: Vec, /// Returns true if and only if `paths` had to be populated with a single /// default path. using_default_path: bool, } impl Args { /// Parse the command line arguments for this process. /// /// If a CLI usage error occurred, then exit the process and print a usage /// or error message. Similarly, if the user requested the version of /// ripgrep, then print the version and exit. /// /// Also, initialize a global logger. pub fn parse() -> Result { // We parse the args given on CLI. This does not include args from // the config. We use the CLI args as an initial configuration while // trying to parse config files. If a config file exists and has // arguments, then we re-parse argv, otherwise we just use the matches // we have here. let early_matches = ArgMatches::new(app::app().get_matches()); set_messages(!early_matches.is_present("no-messages")); set_ignore_messages(!early_matches.is_present("no-ignore-messages")); if let Err(err) = Logger::init() { return Err(format!("failed to initialize logger: {}", err).into()); } if early_matches.is_present("trace") { log::set_max_level(log::LevelFilter::Trace); } else if early_matches.is_present("debug") { log::set_max_level(log::LevelFilter::Debug); } else { log::set_max_level(log::LevelFilter::Warn); } let matches = early_matches.reconfigure(); // The logging level may have changed if we brought in additional // arguments from a configuration file, so recheck it and set the log // level as appropriate. if matches.is_present("trace") { log::set_max_level(log::LevelFilter::Trace); } else if matches.is_present("debug") { log::set_max_level(log::LevelFilter::Debug); } else { log::set_max_level(log::LevelFilter::Warn); } set_messages(!matches.is_present("no-messages")); set_ignore_messages(!matches.is_present("no-ignore-messages")); matches.to_args() } /// Return direct access to command line arguments. fn matches(&self) -> &ArgMatches { &self.0.matches } /// Return the patterns found in the command line arguments. This includes /// patterns read via the -f/--file flags. fn patterns(&self) -> &[String] { &self.0.patterns } /// Return the matcher builder from the patterns. fn matcher(&self) -> &PatternMatcher { &self.0.matcher } /// Return the paths found in the command line arguments. This is /// guaranteed to be non-empty. In the case where no explicit arguments are /// provided, a single default path is provided automatically. fn paths(&self) -> &[PathBuf] { &self.0.paths } /// Returns true if and only if `paths` had to be populated with a default /// path, which occurs only when no paths were given as command line /// arguments. fn using_default_path(&self) -> bool { self.0.using_default_path } /// Return the printer that should be used for formatting the output of /// search results. /// /// The returned printer will write results to the given writer. fn printer(&self, wtr: W) -> Result> { match self.matches().output_kind() { OutputKind::Standard => { let separator_search = self.command()? == Command::Search; self.matches() .printer_standard(self.paths(), wtr, separator_search) .map(Printer::Standard) } OutputKind::Summary => { self.matches() .printer_summary(self.paths(), wtr) .map(Printer::Summary) } OutputKind::JSON => { self.matches() .printer_json(wtr) .map(Printer::JSON) } } } } /// High level public routines for building data structures used by ripgrep /// from command line arguments. impl Args { /// Create a new buffer writer for multi-threaded printing with color /// support. pub fn buffer_writer(&self) -> Result { let mut wtr = BufferWriter::stdout(self.matches().color_choice()); wtr.separator(self.matches().file_separator()?); Ok(wtr) } /// Return the high-level command that ripgrep should run. pub fn command(&self) -> Result { let is_one_search = self.matches().is_one_search(self.paths()); let threads = self.matches().threads()?; let one_thread = is_one_search || threads == 1; Ok(if self.matches().is_present("type-list") { Command::Types } else if self.matches().is_present("files") { if one_thread { Command::Files } else { Command::FilesParallel } } else if self.matches().can_never_match(self.patterns()) { Command::SearchNever } else if one_thread { Command::Search } else { Command::SearchParallel }) } /// Builder a path printer that can be used for printing just file paths, /// with optional color support. /// /// The printer will print paths to the given writer. pub fn path_printer( &self, wtr: W, ) -> Result> { let mut builder = PathPrinterBuilder::new(); builder .color_specs(self.matches().color_specs()?) .separator(self.matches().path_separator()?) .terminator(self.matches().path_terminator().unwrap_or(b'\n')); Ok(builder.build(wtr)) } /// Returns true if and only if the search should quit after finding the /// first match. pub fn quit_after_match(&self) -> Result { Ok(self.matches().is_present("quiet") && self.stats()?.is_none()) } /// Build a worker for executing searches. /// /// Search results are written to the given writer. pub fn search_worker( &self, wtr: W, ) -> Result> { let matcher = self.matcher().clone(); let printer = self.printer(wtr)?; let searcher = self.matches().searcher(self.paths())?; let mut builder = SearchWorkerBuilder::new(); builder .json_stats(self.matches().is_present("json")) .preprocessor(self.matches().preprocessor()) .preprocessor_globs(self.matches().preprocessor_globs()?) .search_zip(self.matches().is_present("search-zip")); Ok(builder.build(matcher, searcher, printer)) } /// Returns a zero value for tracking statistics if and only if it has been /// requested. /// /// When this returns a `Stats` value, then it is guaranteed that the /// search worker will be configured to track statistics as well. pub fn stats(&self) -> Result> { Ok(if self.command()?.is_search() && self.matches().stats() { Some(Stats::new()) } else { None }) } /// Return a builder for constructing subjects. A subject represents a /// single unit of something to search. Typically, this corresponds to a /// file or a stream such as stdin. pub fn subject_builder(&self) -> SubjectBuilder { let mut builder = SubjectBuilder::new(); builder.strip_dot_prefix(self.using_default_path()); builder } /// Execute the given function with a writer to stdout that enables color /// support based on the command line configuration. pub fn stdout(&self) -> cli::StandardStream { let color = self.matches().color_choice(); if self.matches().is_present("line-buffered") { cli::stdout_buffered_line(color) } else if self.matches().is_present("block-buffered") { cli::stdout_buffered_block(color) } else { cli::stdout(color) } } /// Return the type definitions compiled into ripgrep. /// /// If there was a problem reading and parsing the type definitions, then /// this returns an error. pub fn type_defs(&self) -> Result> { Ok(self.matches().types()?.definitions().to_vec()) } /// Return a walker that never uses additional threads. pub fn walker(&self) -> Result { Ok(self.matches().walker_builder(self.paths())?.build()) } /// Return a walker that never uses additional threads. pub fn walker_parallel(&self) -> Result { Ok(self.matches().walker_builder(self.paths())?.build_parallel()) } } /// `ArgMatches` wraps `clap::ArgMatches` and provides semantic meaning to /// the parsed arguments. #[derive(Clone, Debug)] struct ArgMatches(clap::ArgMatches<'static>); /// The output format. Generally, this corresponds to the printer that ripgrep /// uses to show search results. #[derive(Clone, Copy, Debug, Eq, PartialEq)] enum OutputKind { /// Classic grep-like or ack-like format. Standard, /// Show matching files and possibly the number of matches in each file. Summary, /// Emit match information in the JSON Lines format. JSON, } /// The sort criteria, if present. #[derive(Clone, Copy, Debug, Eq, PartialEq)] struct SortBy { /// Whether to reverse the sort criteria (i.e., descending order). reverse: bool, /// The actual sorting criteria. kind: SortByKind, } #[derive(Clone, Copy, Debug, Eq, PartialEq)] enum SortByKind { /// No sorting at all. None, /// Sort by path. Path, /// Sort by last modified time. LastModified, /// Sort by last accessed time. LastAccessed, /// Sort by creation time. Created, } impl SortBy { fn asc(kind: SortByKind) -> SortBy { SortBy { reverse: false, kind: kind } } fn desc(kind: SortByKind) -> SortBy { SortBy { reverse: true, kind: kind } } fn none() -> SortBy { SortBy::asc(SortByKind::None) } /// Try to check that the sorting criteria selected is actually supported. /// If it isn't, then an error is returned. fn check(&self) -> Result<()> { match self.kind { SortByKind::None | SortByKind::Path => {} SortByKind::LastModified => { env::current_exe()?.metadata()?.modified()?; } SortByKind::LastAccessed => { env::current_exe()?.metadata()?.accessed()?; } SortByKind::Created => { env::current_exe()?.metadata()?.created()?; } } Ok(()) } fn configure_walk_builder(self, builder: &mut WalkBuilder) { // This isn't entirely optimal. In particular, we will wind up issuing // a stat for many files redundantly. Aside from having potentially // inconsistent results with respect to sorting, this is also slow. // We could fix this here at the expense of memory by caching stat // calls. A better fix would be to find a way to push this down into // directory traversal itself, but that's a somewhat nasty change. match self.kind { SortByKind::None => {} SortByKind::Path => { if self.reverse { builder.sort_by_file_name(|a, b| a.cmp(b).reverse()); } else { builder.sort_by_file_name(|a, b| a.cmp(b)); } } SortByKind::LastModified => { builder.sort_by_file_path(move |a, b| { sort_by_metadata_time( a, b, self.reverse, |md| md.modified(), ) }); } SortByKind::LastAccessed => { builder.sort_by_file_path(move |a, b| { sort_by_metadata_time( a, b, self.reverse, |md| md.accessed(), ) }); } SortByKind::Created => { builder.sort_by_file_path(move |a, b| { sort_by_metadata_time( a, b, self.reverse, |md| md.created(), ) }); } } } } impl SortByKind { fn new(kind: &str) -> SortByKind { match kind { "none" => SortByKind::None, "path" => SortByKind::Path, "modified" => SortByKind::LastModified, "accessed" => SortByKind::LastAccessed, "created" => SortByKind::Created, _ => SortByKind::None, } } } impl ArgMatches { /// Create an ArgMatches from clap's parse result. fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches { ArgMatches(clap_matches) } /// Run clap and return the matches using a config file if present. If clap /// determines a problem with the user provided arguments (or if --help or /// --version are given), then an error/usage/version will be printed and /// the process will exit. /// /// If there are no additional arguments from the environment (e.g., a /// config file), then the given matches are returned as is. fn reconfigure(self) -> ArgMatches { // If the end user says no config, then respect it. if self.is_present("no-config") { debug!("not reading config files because --no-config is present"); return self; } // If the user wants ripgrep to use a config file, then parse args // from that first. let mut args = config::args(); if args.is_empty() { return self; } let mut cliargs = env::args_os(); if let Some(bin) = cliargs.next() { args.insert(0, bin); } args.extend(cliargs); debug!("final argv: {:?}", args); ArgMatches::new(app::app().get_matches_from(args)) } /// Convert the result of parsing CLI arguments into ripgrep's higher level /// configuration structure. fn to_args(self) -> Result { // We compute these once since they could be large. let patterns = self.patterns()?; let matcher = self.matcher(&patterns)?; let mut paths = self.paths(); let using_default_path = if paths.is_empty() { paths.push(self.path_default()); true } else { false }; Ok(Args(Arc::new(ArgsImp { matches: self, patterns: patterns, matcher: matcher, paths: paths, using_default_path: using_default_path, }))) } } /// High level routines for converting command line arguments into various /// data structures used by ripgrep. /// /// Methods are sorted alphabetically. impl ArgMatches { /// Return the matcher that should be used for searching. /// /// If there was a problem building the matcher (e.g., a syntax error), /// then this returns an error. #[cfg(feature = "pcre2")] fn matcher(&self, patterns: &[String]) -> Result { if self.is_present("pcre2") { let matcher = self.matcher_pcre2(patterns)?; Ok(PatternMatcher::PCRE2(matcher)) } else { let matcher = match self.matcher_rust(patterns) { Ok(matcher) => matcher, Err(err) => { return Err(From::from(suggest_pcre2(err.to_string()))); } }; Ok(PatternMatcher::RustRegex(matcher)) } } /// Return the matcher that should be used for searching. /// /// If there was a problem building the matcher (e.g., a syntax error), /// then this returns an error. #[cfg(not(feature = "pcre2"))] fn matcher(&self, patterns: &[String]) -> Result { if self.is_present("pcre2") { return Err(From::from( "PCRE2 is not available in this build of ripgrep", )); } let matcher = self.matcher_rust(patterns)?; Ok(PatternMatcher::RustRegex(matcher)) } /// Build a matcher using Rust's regex engine. /// /// If there was a problem building the matcher (such as a regex syntax /// error), then an error is returned. fn matcher_rust(&self, patterns: &[String]) -> Result { let mut builder = RustRegexMatcherBuilder::new(); builder .case_smart(self.case_smart()) .case_insensitive(self.case_insensitive()) .multi_line(true) .unicode(true) .octal(false) .word(self.is_present("word-regexp")); if self.is_present("multiline") { builder.dot_matches_new_line(self.is_present("multiline-dotall")); if self.is_present("crlf") { builder .crlf(true) .line_terminator(None); } } else { builder .line_terminator(Some(b'\n')) .dot_matches_new_line(false); if self.is_present("crlf") { builder.crlf(true); } // We don't need to set this in multiline mode since mulitline // matchers don't use optimizations related to line terminators. // Moreover, a mulitline regex used with --null-data should // be allowed to match NUL bytes explicitly, which this would // otherwise forbid. if self.is_present("null-data") { builder.line_terminator(Some(b'\x00')); } } if let Some(limit) = self.regex_size_limit()? { builder.size_limit(limit); } if let Some(limit) = self.dfa_size_limit()? { builder.dfa_size_limit(limit); } Ok(builder.build(&patterns.join("|"))?) } /// Build a matcher using PCRE2. /// /// If there was a problem building the matcher (such as a regex syntax /// error), then an error is returned. #[cfg(feature = "pcre2")] fn matcher_pcre2(&self, patterns: &[String]) -> Result { let mut builder = PCRE2RegexMatcherBuilder::new(); builder .case_smart(self.case_smart()) .caseless(self.case_insensitive()) .multi_line(true) .word(self.is_present("word-regexp")); // For whatever reason, the JIT craps out during regex compilation with // a "no more memory" error on 32 bit systems. So don't use it there. if !cfg!(target_pointer_width = "32") { builder.jit_if_available(true); } if self.pcre2_unicode() { builder.utf(true).ucp(true); if self.encoding()?.is_some() { // SAFETY: If an encoding was specified, then we're guaranteed // to get valid UTF-8, so we can disable PCRE2's UTF checking. // (Feeding invalid UTF-8 to PCRE2 is undefined behavior.) unsafe { builder.disable_utf_check(); } } } if self.is_present("multiline") { builder.dotall(self.is_present("multiline-dotall")); } if self.is_present("crlf") { builder.crlf(true); } Ok(builder.build(&patterns.join("|"))?) } /// Build a JSON printer that writes results to the given writer. fn printer_json(&self, wtr: W) -> Result> { let mut builder = JSONBuilder::new(); builder .pretty(false) .max_matches(self.max_count()?) .always_begin_end(false); Ok(builder.build(wtr)) } /// Build a Standard printer that writes results to the given writer. /// /// The given paths are used to configure aspects of the printer. /// /// If `separator_search` is true, then the returned printer will assume /// the responsibility of printing a separator between each set of /// search results, when appropriate (e.g., when contexts are enabled). /// When it's set to false, the caller is responsible for handling /// separators. /// /// In practice, we want the printer to handle it in the single threaded /// case but not in the multi-threaded case. fn printer_standard( &self, paths: &[PathBuf], wtr: W, separator_search: bool, ) -> Result> { let mut builder = StandardBuilder::new(); builder .color_specs(self.color_specs()?) .stats(self.stats()) .heading(self.heading()) .path(self.with_filename(paths)) .only_matching(self.is_present("only-matching")) .per_match(self.is_present("vimgrep")) .replacement(self.replacement()) .max_columns(self.max_columns()?) .max_matches(self.max_count()?) .column(self.column()) .byte_offset(self.is_present("byte-offset")) .trim_ascii(self.is_present("trim")) .separator_search(None) .separator_context(Some(self.context_separator())) .separator_field_match(b":".to_vec()) .separator_field_context(b"-".to_vec()) .separator_path(self.path_separator()?) .path_terminator(self.path_terminator()); if separator_search { builder.separator_search(self.file_separator()?); } Ok(builder.build(wtr)) } /// Build a Summary printer that writes results to the given writer. /// /// The given paths are used to configure aspects of the printer. /// /// This panics if the output format is not `OutputKind::Summary`. fn printer_summary( &self, paths: &[PathBuf], wtr: W, ) -> Result> { let mut builder = SummaryBuilder::new(); builder .kind(self.summary_kind().expect("summary format")) .color_specs(self.color_specs()?) .stats(self.stats()) .path(self.with_filename(paths)) .max_matches(self.max_count()?) .separator_field(b":".to_vec()) .separator_path(self.path_separator()?) .path_terminator(self.path_terminator()); Ok(builder.build(wtr)) } /// Build a searcher from the command line parameters. fn searcher(&self, paths: &[PathBuf]) -> Result { let (ctx_before, ctx_after) = self.contexts()?; let line_term = if self.is_present("crlf") { LineTerminator::crlf() } else if self.is_present("null-data") { LineTerminator::byte(b'\x00') } else { LineTerminator::byte(b'\n') }; let mut builder = SearcherBuilder::new(); builder .line_terminator(line_term) .invert_match(self.is_present("invert-match")) .line_number(self.line_number(paths)) .multi_line(self.is_present("multiline")) .before_context(ctx_before) .after_context(ctx_after) .passthru(self.is_present("passthru")) .memory_map(self.mmap_choice(paths)) .binary_detection(self.binary_detection()) .encoding(self.encoding()?); Ok(builder.build()) } /// Return a builder for recursively traversing a directory while /// respecting ignore rules. /// /// If there was a problem parsing the CLI arguments necessary for /// constructing the builder, then this returns an error. fn walker_builder(&self, paths: &[PathBuf]) -> Result { let mut builder = WalkBuilder::new(&paths[0]); for path in &paths[1..] { builder.add(path); } for path in self.ignore_paths() { if let Some(err) = builder.add_ignore(path) { ignore_message!("{}", err); } } builder .max_depth(self.usize_of("max-depth")?) .follow_links(self.is_present("follow")) .max_filesize(self.max_file_size()?) .threads(self.threads()?) .same_file_system(self.is_present("one-file-system")) .skip_stdout(true) .overrides(self.overrides()?) .types(self.types()?) .hidden(!self.hidden()) .parents(!self.no_ignore_parent()) .ignore(!self.no_ignore()) .git_global( !self.no_ignore() && !self.no_ignore_vcs() && !self.no_ignore_global()) .git_ignore(!self.no_ignore() && !self.no_ignore_vcs()) .git_exclude(!self.no_ignore() && !self.no_ignore_vcs()); if !self.no_ignore() { builder.add_custom_ignore_filename(".rgignore"); } let sortby = self.sort_by()?; sortby.check()?; sortby.configure_walk_builder(&mut builder); Ok(builder) } } /// Mid level routines for converting command line arguments into various types /// of data structures. /// /// Methods are sorted alphabetically. impl ArgMatches { /// Returns the form of binary detection to perform. fn binary_detection(&self) -> BinaryDetection { let none = self.is_present("text") || self.unrestricted_count() >= 3 || self.is_present("null-data"); if none { BinaryDetection::none() } else { BinaryDetection::quit(b'\x00') } } /// Returns true if the command line configuration implies that a match /// can never be shown. fn can_never_match(&self, patterns: &[String]) -> bool { patterns.is_empty() || self.max_count().ok() == Some(Some(0)) } /// Returns true if and only if case should be ignore. /// /// If --case-sensitive is present, then case is never ignored, even if /// --ignore-case is present. fn case_insensitive(&self) -> bool { self.is_present("ignore-case") && !self.is_present("case-sensitive") } /// Returns true if and only if smart case has been enabled. /// /// If either --ignore-case of --case-sensitive are present, then smart /// case is disabled. fn case_smart(&self) -> bool { self.is_present("smart-case") && !self.is_present("ignore-case") && !self.is_present("case-sensitive") } /// Returns the user's color choice based on command line parameters and /// environment. fn color_choice(&self) -> ColorChoice { let preference = match self.value_of_lossy("color") { None => "auto".to_string(), Some(v) => v, }; if preference == "always" { ColorChoice::Always } else if preference == "ansi" { ColorChoice::AlwaysAnsi } else if preference == "auto" { if cli::is_tty_stdout() || self.is_present("pretty") { ColorChoice::Auto } else { ColorChoice::Never } } else { ColorChoice::Never } } /// Returns the color specifications given by the user on the CLI. /// /// If the was a problem parsing any of the provided specs, then an error /// is returned. fn color_specs(&self) -> Result { // Start with a default set of color specs. let mut specs = default_color_specs(); for spec_str in self.values_of_lossy_vec("colors") { specs.push(spec_str.parse()?); } Ok(ColorSpecs::new(&specs)) } /// Returns true if and only if column numbers should be shown. fn column(&self) -> bool { if self.is_present("no-column") { return false; } self.is_present("column") || self.is_present("vimgrep") } /// Returns the before and after contexts from the command line. /// /// If a context setting was absent, then `0` is returned. /// /// If there was a problem parsing the values from the user as an integer, /// then an error is returned. fn contexts(&self) -> Result<(usize, usize)> { let after = self.usize_of("after-context")?.unwrap_or(0); let before = self.usize_of("before-context")?.unwrap_or(0); let both = self.usize_of("context")?.unwrap_or(0); Ok(if both > 0 { (both, both) } else { (before, after) }) } /// Returns the unescaped context separator in UTF-8 bytes. /// /// If one was not provided, the default `--` is returned. fn context_separator(&self) -> Vec { match self.value_of_os("context-separator") { None => b"--".to_vec(), Some(sep) => cli::unescape_os(&sep), } } /// Returns whether the -c/--count or the --count-matches flags were /// passed from the command line. /// /// If --count-matches and --invert-match were passed in, behave /// as if --count and --invert-match were passed in (i.e. rg will /// count inverted matches as per existing behavior). fn counts(&self) -> (bool, bool) { let count = self.is_present("count"); let count_matches = self.is_present("count-matches"); let invert_matches = self.is_present("invert-match"); let only_matching = self.is_present("only-matching"); if count_matches && invert_matches { // Treat `-v --count-matches` as `-v -c`. (true, false) } else if count && only_matching { // Treat `-c --only-matching` as `--count-matches`. (false, true) } else { (count, count_matches) } } /// Parse the dfa-size-limit argument option into a byte count. fn dfa_size_limit(&self) -> Result> { let r = self.parse_human_readable_size("dfa-size-limit")?; u64_to_usize("dfa-size-limit", r) } /// Returns the type of encoding to use. /// /// This only returns an encoding if one is explicitly specified. When no /// encoding is present, the Searcher will still do BOM sniffing for UTF-16 /// and transcode seamlessly. fn encoding(&self) -> Result> { if self.is_present("no-encoding") { return Ok(None); } let label = match self.value_of_lossy("encoding") { None if self.pcre2_unicode() => "utf-8".to_string(), None => return Ok(None), Some(label) => label, }; if label == "auto" { return Ok(None); } Ok(Some(Encoding::new(&label)?)) } /// Return the file separator to use based on the CLI configuration. fn file_separator(&self) -> Result>> { // File separators are only used for the standard grep-line format. if self.output_kind() != OutputKind::Standard { return Ok(None); } let (ctx_before, ctx_after) = self.contexts()?; Ok(if self.heading() { Some(b"".to_vec()) } else if ctx_before > 0 || ctx_after > 0 { Some(self.context_separator().clone()) } else { None }) } /// Returns true if and only if matches should be grouped with file name /// headings. fn heading(&self) -> bool { if self.is_present("no-heading") || self.is_present("vimgrep") { false } else { cli::is_tty_stdout() || self.is_present("heading") || self.is_present("pretty") } } /// Returns true if and only if hidden files/directories should be /// searched. fn hidden(&self) -> bool { self.is_present("hidden") || self.unrestricted_count() >= 2 } /// Return all of the ignore file paths given on the command line. fn ignore_paths(&self) -> Vec { let paths = match self.values_of_os("ignore-file") { None => return vec![], Some(paths) => paths, }; paths.map(|p| Path::new(p).to_path_buf()).collect() } /// Returns true if and only if ripgrep is invoked in a way where it knows /// it search exactly one thing. fn is_one_search(&self, paths: &[PathBuf]) -> bool { if paths.len() != 1 { return false; } self.is_only_stdin(paths) || paths[0].is_file() } /// Returns true if and only if we're only searching a single thing and /// that thing is stdin. fn is_only_stdin(&self, paths: &[PathBuf]) -> bool { paths == [Path::new("-")] } /// Returns true if and only if we should show line numbers. fn line_number(&self, paths: &[PathBuf]) -> bool { if self.output_kind() == OutputKind::Summary { return false; } if self.is_present("no-line-number") { return false; } if self.output_kind() == OutputKind::JSON { return true; } // A few things can imply counting line numbers. In particular, we // generally want to show line numbers by default when printing to a // tty for human consumption, except for one interesting case: when // we're only searching stdin. This makes pipelines work as expected. (cli::is_tty_stdout() && !self.is_only_stdin(paths)) || self.is_present("line-number") || self.is_present("column") || self.is_present("pretty") || self.is_present("vimgrep") } /// The maximum number of columns allowed on each line. /// /// If `0` is provided, then this returns `None`. fn max_columns(&self) -> Result> { Ok(self.usize_of_nonzero("max-columns")?.map(|n| n as u64)) } /// The maximum number of matches permitted. fn max_count(&self) -> Result> { Ok(self.usize_of("max-count")?.map(|n| n as u64)) } /// Parses the max-filesize argument option into a byte count. fn max_file_size(&self) -> Result> { self.parse_human_readable_size("max-filesize") } /// Returns whether we should attempt to use memory maps or not. fn mmap_choice(&self, paths: &[PathBuf]) -> MmapChoice { // SAFETY: Memory maps are difficult to impossible to encapsulate // safely in a portable way that doesn't simultaneously negate some of // the benfits of using memory maps. For ripgrep's use, we never mutate // a memory map and generally never store the contents of memory map // in a data structure that depends on immutability. Generally // speaking, the worst thing that can happen is a SIGBUS (if the // underlying file is truncated while reading it), which will cause // ripgrep to abort. This reasoning should be treated as suspect. let maybe = unsafe { MmapChoice::auto() }; let never = MmapChoice::never(); if self.is_present("no-mmap") { never } else if self.is_present("mmap") { maybe } else if paths.len() <= 10 && paths.iter().all(|p| p.is_file()) { // If we're only searching a few paths and all of them are // files, then memory maps are probably faster. maybe } else { never } } /// Returns true if ignore files should be ignored. fn no_ignore(&self) -> bool { self.is_present("no-ignore") || self.unrestricted_count() >= 1 } /// Returns true if global ignore files should be ignored. fn no_ignore_global(&self) -> bool { self.is_present("no-ignore-global") || self.no_ignore() } /// Returns true if parent ignore files should be ignored. fn no_ignore_parent(&self) -> bool { self.is_present("no-ignore-parent") || self.no_ignore() } /// Returns true if VCS ignore files should be ignored. fn no_ignore_vcs(&self) -> bool { self.is_present("no-ignore-vcs") || self.no_ignore() } /// Determine the type of output we should produce. fn output_kind(&self) -> OutputKind { if self.is_present("quiet") { // While we don't technically print results (or aggregate results) // in quiet mode, we still support the --stats flag, and those // stats are computed by the Summary printer for now. return OutputKind::Summary; } else if self.is_present("json") { return OutputKind::JSON; } let (count, count_matches) = self.counts(); let summary = count || count_matches || self.is_present("files-with-matches") || self.is_present("files-without-match"); if summary { OutputKind::Summary } else { OutputKind::Standard } } /// Builds the set of glob overrides from the command line flags. fn overrides(&self) -> Result { let mut builder = OverrideBuilder::new(env::current_dir()?); for glob in self.values_of_lossy_vec("glob") { builder.add(&glob)?; } // This only enables case insensitivity for subsequent globs. builder.case_insensitive(true)?; for glob in self.values_of_lossy_vec("iglob") { builder.add(&glob)?; } Ok(builder.build()?) } /// Return all file paths that ripgrep should search. /// /// If no paths were given, then this returns an empty list. fn paths(&self) -> Vec { let mut paths: Vec = match self.values_of_os("path") { None => vec![], Some(paths) => paths.map(|p| Path::new(p).to_path_buf()).collect(), }; // If --file, --files or --regexp is given, then the first path is // always in `pattern`. if self.is_present("file") || self.is_present("files") || self.is_present("regexp") { if let Some(path) = self.value_of_os("pattern") { paths.insert(0, Path::new(path).to_path_buf()); } } paths } /// Return the default path that ripgrep should search. This should only /// be used when ripgrep is not otherwise given at least one file path /// as a positional argument. fn path_default(&self) -> PathBuf { let file_is_stdin = self.values_of_os("file") .map_or(false, |mut files| files.any(|f| f == "-")); let search_cwd = !cli::is_readable_stdin() || (self.is_present("file") && file_is_stdin) || self.is_present("files") || self.is_present("type-list"); if search_cwd { Path::new("./").to_path_buf() } else { Path::new("-").to_path_buf() } } /// Returns the unescaped path separator as a single byte, if one exists. /// /// If the provided path separator is more than a single byte, then an /// error is returned. fn path_separator(&self) -> Result> { let sep = match self.value_of_os("path-separator") { None => return Ok(None), Some(sep) => cli::unescape_os(&sep), }; if sep.is_empty() { Ok(None) } else if sep.len() > 1 { Err(From::from(format!( "A path separator must be exactly one byte, but \ the given separator is {} bytes: {}\n\ In some shells on Windows '/' is automatically \ expanded. Use '//' instead.", sep.len(), cli::escape(&sep), ))) } else { Ok(Some(sep[0])) } } /// Returns the byte that should be used to terminate paths. /// /// Typically, this is only set to `\x00` when the --null flag is provided, /// and `None` otherwise. fn path_terminator(&self) -> Option { if self.is_present("null") { Some(b'\x00') } else { None } } /// Get a sequence of all available patterns from the command line. /// This includes reading the -e/--regexp and -f/--file flags. /// /// Note that if -F/--fixed-strings is set, then all patterns will be /// escaped. If -x/--line-regexp is set, then all patterns are surrounded /// by `^...$`. Other things, such as --word-regexp, are handled by the /// regex matcher itself. /// /// If any pattern is invalid UTF-8, then an error is returned. fn patterns(&self) -> Result> { if self.is_present("files") || self.is_present("type-list") { return Ok(vec![]); } let mut pats = vec![]; match self.values_of_os("regexp") { None => { if self.values_of_os("file").is_none() { if let Some(os_pat) = self.value_of_os("pattern") { pats.push(self.pattern_from_os_str(os_pat)?); } } } Some(os_pats) => { for os_pat in os_pats { pats.push(self.pattern_from_os_str(os_pat)?); } } } if let Some(paths) = self.values_of_os("file") { for path in paths { if path == "-" { pats.extend(cli::patterns_from_stdin()?); } else { pats.extend(cli::patterns_from_path(path)?); } } } Ok(pats) } /// Returns a pattern that is guaranteed to produce an empty regular /// expression that is valid in any position. fn pattern_empty(&self) -> String { // This would normally just be an empty string, which works on its // own, but if the patterns are joined in a set of alternations, then // you wind up with `foo|`, which is currently invalid in Rust's regex // engine. "(?:z{0})*".to_string() } /// Converts an OsStr pattern to a String pattern. The pattern is escaped /// if -F/--fixed-strings is set. /// /// If the pattern is not valid UTF-8, then an error is returned. fn pattern_from_os_str(&self, pat: &OsStr) -> Result { let s = cli::pattern_from_os(pat)?; Ok(self.pattern_from_str(s)) } /// Converts a &str pattern to a String pattern. The pattern is escaped /// if -F/--fixed-strings is set. fn pattern_from_str(&self, pat: &str) -> String { let litpat = self.pattern_literal(pat.to_string()); let s = self.pattern_line(litpat); if s.is_empty() { self.pattern_empty() } else { s } } /// Returns the given pattern as a line pattern if the -x/--line-regexp /// flag is set. Otherwise, the pattern is returned unchanged. fn pattern_line(&self, pat: String) -> String { if self.is_present("line-regexp") { format!(r"^(?:{})$", pat) } else { pat } } /// Returns the given pattern as a literal pattern if the /// -F/--fixed-strings flag is set. Otherwise, the pattern is returned /// unchanged. fn pattern_literal(&self, pat: String) -> String { if self.is_present("fixed-strings") { regex::escape(&pat) } else { pat } } /// Returns the preprocessor command if one was specified. fn preprocessor(&self) -> Option { let path = match self.value_of_os("pre") { None => return None, Some(path) => path, }; if path.is_empty() { return None; } Some(Path::new(path).to_path_buf()) } /// Builds the set of globs for filtering files to apply to the --pre /// flag. If no --pre-globs are available, then this always returns an /// empty set of globs. fn preprocessor_globs(&self) -> Result { let mut builder = OverrideBuilder::new(env::current_dir()?); for glob in self.values_of_lossy_vec("pre-glob") { builder.add(&glob)?; } Ok(builder.build()?) } /// Parse the regex-size-limit argument option into a byte count. fn regex_size_limit(&self) -> Result> { let r = self.parse_human_readable_size("regex-size-limit")?; u64_to_usize("regex-size-limit", r) } /// Returns the replacement string as UTF-8 bytes if it exists. fn replacement(&self) -> Option> { self.value_of_lossy("replace").map(|s| s.into_bytes()) } /// Returns the sorting criteria based on command line parameters. fn sort_by(&self) -> Result { // For backcompat, continue supporting deprecated --sort-files flag. if self.is_present("sort-files") { return Ok(SortBy::asc(SortByKind::Path)); } let sortby = match self.value_of_lossy("sort") { None => match self.value_of_lossy("sortr") { None => return Ok(SortBy::none()), Some(choice) => SortBy::desc(SortByKind::new(&choice)), } Some(choice) => SortBy::asc(SortByKind::new(&choice)), }; Ok(sortby) } /// Returns true if and only if aggregate statistics for a search should /// be tracked. /// /// Generally, this is only enabled when explicitly requested by in the /// command line arguments via the --stats flag, but this can also be /// enabled implicity via the output format, e.g., for JSON Lines. fn stats(&self) -> bool { self.output_kind() == OutputKind::JSON || self.is_present("stats") } /// When the output format is `Summary`, this returns the type of summary /// output to show. /// /// This returns `None` if the output format is not `Summary`. fn summary_kind(&self) -> Option { let (count, count_matches) = self.counts(); if self.is_present("quiet") { Some(SummaryKind::Quiet) } else if count_matches { Some(SummaryKind::CountMatches) } else if count { Some(SummaryKind::Count) } else if self.is_present("files-with-matches") { Some(SummaryKind::PathWithMatch) } else if self.is_present("files-without-match") { Some(SummaryKind::PathWithoutMatch) } else { None } } /// Return the number of threads that should be used for parallelism. fn threads(&self) -> Result { if self.sort_by()?.kind != SortByKind::None { return Ok(1); } let threads = self.usize_of("threads")?.unwrap_or(0); Ok(if threads == 0 { cmp::min(12, num_cpus::get()) } else { threads }) } /// Builds a file type matcher from the command line flags. fn types(&self) -> Result { let mut builder = TypesBuilder::new(); builder.add_defaults(); for ty in self.values_of_lossy_vec("type-clear") { builder.clear(&ty); } for def in self.values_of_lossy_vec("type-add") { builder.add_def(&def)?; } for ty in self.values_of_lossy_vec("type") { builder.select(&ty); } for ty in self.values_of_lossy_vec("type-not") { builder.negate(&ty); } builder.build().map_err(From::from) } /// Returns the number of times the `unrestricted` flag is provided. fn unrestricted_count(&self) -> u64 { self.occurrences_of("unrestricted") } /// Returns true if and only if PCRE2's Unicode mode should be enabled. fn pcre2_unicode(&self) -> bool { // PCRE2 Unicode is enabled by default, so only disable it when told // to do so explicitly. self.is_present("pcre2") && !self.is_present("no-pcre2-unicode") } /// Returns true if and only if file names containing each match should /// be emitted. fn with_filename(&self, paths: &[PathBuf]) -> bool { if self.is_present("no-filename") { false } else { self.is_present("with-filename") || self.is_present("vimgrep") || paths.len() > 1 || paths.get(0).map_or(false, |p| p.is_dir()) } } } /// Lower level generic helper methods for teasing values out of clap. impl ArgMatches { /// Like values_of_lossy, but returns an empty vec if the flag is not /// present. fn values_of_lossy_vec(&self, name: &str) -> Vec { self.values_of_lossy(name).unwrap_or_else(Vec::new) } /// Safely reads an arg value with the given name, and if it's present, /// tries to parse it as a usize value. /// /// If the number is zero, then it is considered absent and `None` is /// returned. fn usize_of_nonzero(&self, name: &str) -> Result> { let n = match self.usize_of(name)? { None => return Ok(None), Some(n) => n, }; Ok(if n == 0 { None } else { Some(n) }) } /// Safely reads an arg value with the given name, and if it's present, /// tries to parse it as a usize value. fn usize_of(&self, name: &str) -> Result> { match self.value_of_lossy(name) { None => Ok(None), Some(v) => v.parse().map(Some).map_err(From::from), } } /// Parses an argument of the form `[0-9]+(KMG)?`. /// /// If the aforementioned format is not recognized, then this returns an /// error. fn parse_human_readable_size( &self, arg_name: &str, ) -> Result> { let size = match self.value_of_lossy(arg_name) { None => return Ok(None), Some(size) => size, }; Ok(Some(cli::parse_human_readable_size(&size)?)) } } /// The following methods mostly dispatch to the underlying clap methods /// directly. Methods that would otherwise get a single value will fetch all /// values and return the last one. (Clap returns the first one.) We only /// define the ones we need. impl ArgMatches { fn is_present(&self, name: &str) -> bool { self.0.is_present(name) } fn occurrences_of(&self, name: &str) -> u64 { self.0.occurrences_of(name) } fn value_of_lossy(&self, name: &str) -> Option { self.0.value_of_lossy(name).map(|s| s.into_owned()) } fn values_of_lossy(&self, name: &str) -> Option> { self.0.values_of_lossy(name) } fn value_of_os(&self, name: &str) -> Option<&OsStr> { self.0.value_of_os(name) } fn values_of_os(&self, name: &str) -> Option { self.0.values_of_os(name) } } /// Inspect an error resulting from building a Rust regex matcher, and if it's /// believed to correspond to a syntax error that PCRE2 could handle, then /// add a message to suggest the use of -P/--pcre2. #[cfg(feature = "pcre2")] fn suggest_pcre2(msg: String) -> String { if !msg.contains("backreferences") && !msg.contains("look-around") { msg } else { format!("{} Consider enabling PCRE2 with the --pcre2 flag, which can handle backreferences and look-around.", msg) } } /// Convert the result of parsing a human readable file size to a `usize`, /// failing if the type does not fit. fn u64_to_usize( arg_name: &str, value: Option, ) -> Result> { use std::usize; let value = match value { None => return Ok(None), Some(value) => value, }; if value <= usize::MAX as u64 { Ok(Some(value as usize)) } else { Err(From::from(format!("number too large for {}", arg_name))) } } /// Builds a comparator for sorting two files according to a system time /// extracted from the file's metadata. /// /// If there was a problem extracting the metadata or if the time is not /// available, then both entries compare equal. fn sort_by_metadata_time( p1: &Path, p2: &Path, reverse: bool, get_time: G, ) -> cmp::Ordering where G: Fn(&fs::Metadata) -> io::Result { let t1 = match p1.metadata().and_then(|md| get_time(&md)) { Ok(t) => t, Err(_) => return cmp::Ordering::Equal, }; let t2 = match p2.metadata().and_then(|md| get_time(&md)) { Ok(t) => t, Err(_) => return cmp::Ordering::Equal, }; if reverse { t1.cmp(&t2).reverse() } else { t1.cmp(&t2) } }