From 7eb34e8b32cd7a9b0a5e12ca180cb3f333598b8d Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Fri, 3 Aug 2018 17:26:22 -0400 Subject: [PATCH] ripgrep: migrate to libripgrep --- Cargo.lock | 3 +- Cargo.toml | 1 + complete/_rg | 4 + src/app.rs | 43 +- src/args.rs | 27 +- src/args2.rs | 1384 +++++++++++++++++++++++++++++++++++++++++++ src/config.rs | 19 +- src/logger.rs | 25 +- src/main.rs | 74 ++- src/main2.rs | 263 ++++++++ src/messages.rs | 50 ++ src/path_printer.rs | 101 ++++ src/pathutil.rs | 3 - src/preprocessor.rs | 15 +- src/search.rs | 372 ++++++++++++ src/subject.rs | 231 ++++++++ src/worker.rs | 22 +- tests/tests.rs | 336 ++++++----- 18 files changed, 2698 insertions(+), 275 deletions(-) create mode 100644 src/args2.rs create mode 100644 src/main2.rs create mode 100644 src/messages.rs create mode 100644 src/path_printer.rs create mode 100644 src/search.rs create mode 100644 src/subject.rs diff --git a/Cargo.lock b/Cargo.lock index e22e1f54..01002532 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -194,7 +194,7 @@ dependencies = [ [[package]] name = "grep2" -version = "0.1.8" +version = "0.2.0" dependencies = [ "grep-matcher 0.0.1", "grep-printer 0.0.1", @@ -345,6 +345,7 @@ dependencies = [ "encoding_rs_io 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "globset 0.4.1", "grep 0.1.9", + "grep2 0.2.0", "ignore 0.4.3", "lazy_static 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.42 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index d806c3c2..b919e0e2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,7 @@ encoding_rs = "0.8" encoding_rs_io = "0.1" globset = { version = "0.4.0", path = "globset" } grep = { version = "0.1.8", path = "grep" } +grep2 = { version = "0.2.0", path = "grep2" } ignore = { version = "0.4.0", path = "ignore" } lazy_static = "1" libc = "0.2" diff --git a/complete/_rg b/complete/_rg index 4342e8d2..00026a08 100644 --- a/complete/_rg +++ b/complete/_rg @@ -131,6 +131,10 @@ _rg() { '--mmap[search using memory maps when possible]' "--no-mmap[don't search using memory maps]" + + '(multiline)' # multiline options + '--multiline[permit matching across multiple lines]' + $no"--no-multiline[restrict matches to at most one line each]" + + '(only)' # Only-match options '(passthru replace)'{-o,--only-matching}'[show only matching part of each line]' diff --git a/src/app.rs b/src/app.rs index 24851c3b..e0011fb9 100644 --- a/src/app.rs +++ b/src/app.rs @@ -2,8 +2,8 @@ // including some light validation. // // This module is purposely written in a bare-bones way, since it is included -// in ripgrep's build.rs file as a way to generate completion files for common -// shells. +// in ripgrep's build.rs file as a way to generate a man page and completion +// files for common shells. // // The only other place that ripgrep deals with clap is in src/args.rs, which // is where we read clap's configuration from the end user's arguments and turn @@ -478,7 +478,7 @@ impl RGArg { } } -// We add an extra space to long descriptions so that a black line is inserted +// We add an extra space to long descriptions so that a blank line is inserted // between flag descriptions in --help output. macro_rules! long { ($lit:expr) => { concat!($lit, " ") } @@ -525,6 +525,7 @@ pub fn all_args_and_flags() -> Vec { flag_max_depth(&mut args); flag_max_filesize(&mut args); flag_mmap(&mut args); + flag_multiline(&mut args); flag_no_config(&mut args); flag_no_ignore(&mut args); flag_no_ignore_global(&mut args); @@ -813,10 +814,23 @@ fn flag_debug(args: &mut Vec) { const SHORT: &str = "Show debug messages."; const LONG: &str = long!("\ Show debug messages. Please use this when filing a bug report. + +The --debug flag is generally useful for figuring out why ripgrep skipped +searching a particular file. The debug messages should mention all files +skipped and why they were skipped. + +To get even more debug output, use the --trace flag, which implies --debug +along with additional trace data. With --trace, the output could be quite +large and is generally more useful for development. "); let arg = RGArg::switch("debug") .help(SHORT).long_help(LONG); args.push(arg); + + let arg = RGArg::switch("trace") + .hidden() + .overrides("debug"); + args.push(arg); } fn flag_dfa_size_limit(args: &mut Vec) { @@ -1198,6 +1212,24 @@ This flag overrides --mmap. args.push(arg); } +fn flag_multiline(args: &mut Vec) { + const SHORT: &str = "Enable matching across multiple lines."; + const LONG: &str = long!("\ +Enable matching across multiple lines. + +This flag can be disabled with --no-multiline. +"); + let arg = RGArg::switch("multiline") + .help(SHORT).long_help(LONG) + .overrides("no-multiline"); + args.push(arg); + + let arg = RGArg::switch("no-multiline") + .hidden() + .overrides("multiline"); + args.push(arg); +} + fn flag_no_config(args: &mut Vec) { const SHORT: &str = "Never read configuration files."; const LONG: &str = long!("\ @@ -1374,13 +1406,10 @@ the empty string. For example, if you are searching using 'rg foo' then using 'rg \"^|foo\"' instead will emit every line in every file searched, but only occurrences of 'foo' will be highlighted. This flag enables the same behavior without needing to modify the pattern. - -This flag conflicts with the --only-matching and --replace flags. "); let arg = RGArg::switch("passthru") .help(SHORT).long_help(LONG) - .alias("passthrough") - .conflicts(&["only-matching", "replace"]); + .alias("passthrough"); args.push(arg); } diff --git a/src/args.rs b/src/args.rs index 10b9e557..7def2706 100644 --- a/src/args.rs +++ b/src/args.rs @@ -21,6 +21,7 @@ use atty; use ignore::overrides::{Override, OverrideBuilder}; use ignore::types::{FileTypeDef, Types, TypesBuilder}; use ignore; +use messages::{set_messages, set_ignore_messages}; use printer::{ColorSpecs, Printer}; use unescape::{escape, unescape}; use worker::{Worker, WorkerBuilder}; @@ -64,10 +65,8 @@ pub struct Args { mmap: bool, no_ignore: bool, no_ignore_global: bool, - no_ignore_messages: bool, no_ignore_parent: bool, no_ignore_vcs: bool, - no_messages: bool, null: bool, only_matching: bool, path_separator: Option, @@ -101,6 +100,8 @@ impl Args { // arguments, then we re-parse argv, otherwise we just use the matches // we have here. let early_matches = ArgMatches(app::app().get_matches()); + set_messages(!early_matches.is_present("no-messages")); + set_ignore_messages(!early_matches.is_present("no-ignore-messages")); if let Err(err) = Logger::init() { errored!("failed to initialize logger: {}", err); @@ -120,6 +121,8 @@ impl Args { } else { log::set_max_level(log::LevelFilter::Warn); } + set_messages(!matches.is_present("no-messages")); + set_ignore_messages(!matches.is_present("no-ignore-messages")); matches.to_args() } @@ -137,7 +140,7 @@ impl Args { } // If the user wants ripgrep to use a config file, then parse args // from that first. - let mut args = config::args(early_matches.is_present("no-messages")); + let mut args = config::args(); if args.is_empty() { return early_matches; } @@ -286,7 +289,6 @@ impl Args { .invert_match(self.invert_match) .max_count(self.max_count) .mmap(self.mmap) - .no_messages(self.no_messages) .quiet(self.quiet) .text(self.text) .search_zip_files(self.search_zip_files) @@ -310,17 +312,6 @@ impl Args { self.type_list } - /// Returns true if error messages should be suppressed. - pub fn no_messages(&self) -> bool { - self.no_messages - } - - /// Returns true if error messages associated with parsing .ignore or - /// .gitignore files should be suppressed. - pub fn no_ignore_messages(&self) -> bool { - self.no_ignore_messages - } - /// Create a new recursive directory iterator over the paths in argv. pub fn walker(&self) -> ignore::Walk { self.walker_builder().build() @@ -340,9 +331,7 @@ impl Args { } for path in &self.ignore_files { if let Some(err) = wd.add_ignore(path) { - if !self.no_messages && !self.no_ignore_messages { - eprintln!("{}", err); - } + ignore_message!("{}", err); } } @@ -419,10 +408,8 @@ impl<'a> ArgMatches<'a> { mmap: mmap, no_ignore: self.no_ignore(), no_ignore_global: self.no_ignore_global(), - no_ignore_messages: self.is_present("no-ignore-messages"), no_ignore_parent: self.no_ignore_parent(), no_ignore_vcs: self.no_ignore_vcs(), - no_messages: self.is_present("no-messages"), null: self.is_present("null"), only_matching: self.is_present("only-matching"), path_separator: self.path_separator()?, diff --git a/src/args2.rs b/src/args2.rs new file mode 100644 index 00000000..6a5d1502 --- /dev/null +++ b/src/args2.rs @@ -0,0 +1,1384 @@ +use std::cmp; +use std::env; +use std::ffi::OsStr; +use std::fs::File; +use std::io::{self, BufRead}; +use std::path::{Path, PathBuf}; +use std::sync::Arc; + +use atty; +use clap; +use grep2::matcher::LineTerminator; +use grep2::searcher::{ + BinaryDetection, Encoding, MmapChoice, Searcher, SearcherBuilder, +}; +use grep2::printer::{ + ColorSpecs, Stats, + JSON, JSONBuilder, + Standard, StandardBuilder, + Summary, SummaryBuilder, SummaryKind, +}; +use grep2::regex::{RegexMatcher, RegexMatcherBuilder}; +use ignore::overrides::{Override, OverrideBuilder}; +use ignore::types::{FileTypeDef, Types, TypesBuilder}; +use ignore::{Walk, WalkBuilder, WalkParallel}; +use log; +use num_cpus; +use path_printer::{PathPrinter, PathPrinterBuilder}; +use regex::{self, Regex}; +use same_file::Handle; +use termcolor::{ + WriteColor, + BufferedStandardStream, BufferWriter, ColorChoice, StandardStream, +}; + +use app; +use config; +use logger::Logger; +use messages::{set_messages, set_ignore_messages}; +use search::{PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder}; +use subject::SubjectBuilder; +use unescape::{escape, unescape}; +use Result; + +/// The command that ripgrep should execute based on the command line +/// configuration. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Command { + /// Search using exactly one thread. + Search, + /// Search using possibly many threads. + SearchParallel, + /// The command line parameters suggest that a search should occur, but + /// ripgrep knows that a match can never be found (e.g., no given patterns + /// or --max-count=0). + SearchNever, + /// Show the files that would be searched, but don't actually search them, + /// and use exactly one thread. + Files, + /// Show the files that would be searched, but don't actually search them, + /// and perform directory traversal using possibly many threads. + FilesParallel, + /// List all file type definitions configured, including the default file + /// types and any additional file types added to the command line. + Types, +} + +impl Command { + /// Returns true if and only if this command requires executing a search. + fn is_search(&self) -> bool { + use self::Command::*; + + match *self { + Search | SearchParallel => true, + SearchNever | Files | FilesParallel | Types => false, + } + } +} + +/// The primary configuration object used throughout ripgrep. It provides a +/// high-level convenient interface to the provided command line arguments. +/// +/// An `Args` object is cheap to clone and can be used from multiple threads +/// simultaneously. +#[derive(Clone, Debug)] +pub struct Args(Arc); + +#[derive(Clone, Debug)] +struct ArgsImp { + /// Mid-to-low level routines for extracting CLI arguments. + matches: ArgMatches, + /// The patterns provided at the command line and/or via the -f/--file + /// flag. This may be empty. + patterns: Vec, + /// A matcher built from the patterns. + /// + /// It's important that this is only built once, since building this goes + /// through regex compilation and various types of analyses. That is, if + /// you need many of theses (one per thread, for example), it is better to + /// build it once and then clone it. + matcher: PatternMatcher, + /// The paths provided at the command line. This is guaranteed to be + /// non-empty. (If no paths are provided, then a default path is created.) + paths: Vec, + /// Returns true if and only if `paths` had to be populated with a single + /// default path. + using_default_path: bool, +} + +impl Args { + /// Parse the command line arguments for this process. + /// + /// If a CLI usage error occurred, then exit the process and print a usage + /// or error message. Similarly, if the user requested the version of + /// ripgrep, then print the version and exit. + /// + /// Also, initialize a global logger. + pub fn parse() -> Result { + // We parse the args given on CLI. This does not include args from + // the config. We use the CLI args as an initial configuration while + // trying to parse config files. If a config file exists and has + // arguments, then we re-parse argv, otherwise we just use the matches + // we have here. + let early_matches = ArgMatches::new(app::app().get_matches()); + set_messages(!early_matches.is_present("no-messages")); + set_ignore_messages(!early_matches.is_present("no-ignore-messages")); + + if let Err(err) = Logger::init() { + errored!("failed to initialize logger: {}", err); + } + if early_matches.is_present("trace") { + log::set_max_level(log::LevelFilter::Trace); + } else if early_matches.is_present("debug") { + log::set_max_level(log::LevelFilter::Debug); + } else { + log::set_max_level(log::LevelFilter::Warn); + } + + let matches = early_matches.reconfigure(); + // The logging level may have changed if we brought in additional + // arguments from a configuration file, so recheck it and set the log + // level as appropriate. + if matches.is_present("trace") { + log::set_max_level(log::LevelFilter::Trace); + } else if matches.is_present("debug") { + log::set_max_level(log::LevelFilter::Debug); + } else { + log::set_max_level(log::LevelFilter::Warn); + } + set_messages(!matches.is_present("no-messages")); + set_ignore_messages(!matches.is_present("no-ignore-messages")); + matches.to_args() + } + + /// Return direct access to command line arguments. + fn matches(&self) -> &ArgMatches { + &self.0.matches + } + + /// Return the patterns found in the command line arguments. This includes + /// patterns read via the -f/--file flags. + fn patterns(&self) -> &[String] { + &self.0.patterns + } + + /// Return the matcher builder from the patterns. + fn matcher(&self) -> &PatternMatcher { + &self.0.matcher + } + + /// Return the paths found in the command line arguments. This is + /// guaranteed to be non-empty. In the case where no explicit arguments are + /// provided, a single default path is provided automatically. + fn paths(&self) -> &[PathBuf] { + &self.0.paths + } + + /// Returns true if and only if `paths` had to be populated with a default + /// path, which occurs only when no paths were given as command line + /// arguments. + fn using_default_path(&self) -> bool { + self.0.using_default_path + } + + /// Return the printer that should be used for formatting the output of + /// search results. + /// + /// The returned printer will write results to the given writer. + fn printer(&self, wtr: W) -> Result> { + match self.matches().output_kind() { + OutputKind::Standard => { + let separator_search = self.command()? == Command::Search; + self.matches() + .printer_standard(self.paths(), wtr, separator_search) + .map(Printer::Standard) + } + OutputKind::Summary => { + self.matches() + .printer_summary(self.paths(), wtr) + .map(Printer::Summary) + } + OutputKind::JSON => { + self.matches() + .printer_json(wtr) + .map(Printer::JSON) + } + } + } +} + +/// High level public routines for building data structures used by ripgrep +/// from command line arguments. +impl Args { + /// Create a new buffer writer for multi-threaded printing with color + /// support. + pub fn buffer_writer(&self) -> Result { + let mut wtr = BufferWriter::stdout(self.matches().color_choice()); + wtr.separator(self.matches().file_separator()?); + Ok(wtr) + } + + /// Return the high-level command that ripgrep should run. + pub fn command(&self) -> Result { + let is_one_search = self.matches().is_one_search(self.paths()); + let threads = self.matches().threads()?; + let one_thread = is_one_search || threads == 1; + + Ok(if self.matches().is_present("type-list") { + Command::Types + } else if self.matches().is_present("files") { + if one_thread { + Command::Files + } else { + Command::FilesParallel + } + } else if self.matches().can_never_match(self.patterns()) { + Command::SearchNever + } else if one_thread { + Command::Search + } else { + Command::SearchParallel + }) + } + + /// Builder a path printer that can be used for printing just file paths, + /// with optional color support. + /// + /// The printer will print paths to the given writer. + pub fn path_printer( + &self, + wtr: W, + ) -> Result> { + let mut builder = PathPrinterBuilder::new(); + builder + .color_specs(self.matches().color_specs()?) + .separator(self.matches().path_separator()?) + .terminator(self.matches().path_terminator().unwrap_or(b'\n')); + Ok(builder.build(wtr)) + } + + /// Returns true if and only if the search should quit after finding the + /// first match. + pub fn quit_after_match(&self) -> Result { + Ok(self.matches().is_present("quiet") && self.stats()?.is_none()) + } + + /// Build a worker for executing searches. + /// + /// Search results are written to the given writer. + pub fn search_worker( + &self, + wtr: W, + ) -> Result> { + let matcher = self.matcher().clone(); + let printer = self.printer(wtr)?; + let searcher = self.matches().searcher(self.paths())?; + let mut builder = SearchWorkerBuilder::new(); + builder + .preprocessor(self.matches().preprocessor()) + .search_zip(self.matches().is_present("search-zip")); + Ok(builder.build(matcher, searcher, printer)) + } + + /// Returns a zero value for tracking statistics if and only if it has been + /// requested. + /// + /// When this returns a `Stats` value, then it is guaranteed that the + /// search worker will be configured to track statistics as well. + pub fn stats(&self) -> Result> { + Ok(if self.command()?.is_search() && self.matches().stats() { + Some(Stats::new()) + } else { + None + }) + } + + /// Return a builder for constructing subjects. A subject represents a + /// single unit of something to search. Typically, this corresponds to a + /// file or a stream such as stdin. + pub fn subject_builder(&self) -> SubjectBuilder { + let mut builder = SubjectBuilder::new(); + builder + .strip_dot_prefix(self.using_default_path()) + .skip(self.matches().stdout_handle()); + builder + } + + /// Execute the given function with a writer to stdout that enables color + /// support based on the command line configuration. + pub fn stdout(&self) -> Box { + let color_choice = self.matches().color_choice(); + if atty::is(atty::Stream::Stdout) { + Box::new(StandardStream::stdout(color_choice)) + } else { + Box::new(BufferedStandardStream::stdout(color_choice)) + } + } + + /// Return the type definitions compiled into ripgrep. + /// + /// If there was a problem reading and parsing the type definitions, then + /// this returns an error. + pub fn type_defs(&self) -> Result> { + Ok(self.matches().types()?.definitions().to_vec()) + } + + /// Return a walker that never uses additional threads. + pub fn walker(&self) -> Result { + Ok(self.matches().walker_builder(self.paths())?.build()) + } + + /// Return a walker that never uses additional threads. + pub fn walker_parallel(&self) -> Result { + Ok(self.matches().walker_builder(self.paths())?.build_parallel()) + } +} + +/// `ArgMatches` wraps `clap::ArgMatches` and provides semantic meaning to +/// the parsed arguments. +#[derive(Clone, Debug)] +struct ArgMatches(clap::ArgMatches<'static>); + +/// The output format. Generally, this corresponds to the printer that ripgrep +/// uses to show search results. +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +enum OutputKind { + /// Classic grep-like or ack-like format. + Standard, + /// Show matching files and possibly the number of matches in each file. + Summary, + /// Emit match information in the JSON Lines format. + JSON, +} + +impl ArgMatches { + /// Create an ArgMatches from clap's parse result. + fn new(clap_matches: clap::ArgMatches<'static>) -> ArgMatches { + ArgMatches(clap_matches) + } + + /// Run clap and return the matches using a config file if present. If clap + /// determines a problem with the user provided arguments (or if --help or + /// --version are given), then an error/usage/version will be printed and + /// the process will exit. + /// + /// If there are no additional arguments from the environment (e.g., a + /// config file), then the given matches are returned as is. + fn reconfigure(self) -> ArgMatches { + // If the end user says no config, then respect it. + if self.is_present("no-config") { + debug!("not reading config files because --no-config is present"); + return self; + } + // If the user wants ripgrep to use a config file, then parse args + // from that first. + let mut args = config::args(); + if args.is_empty() { + return self; + } + let mut cliargs = env::args_os(); + if let Some(bin) = cliargs.next() { + args.insert(0, bin); + } + args.extend(cliargs); + debug!("final argv: {:?}", args); + ArgMatches::new(app::app().get_matches_from(args)) + } + + /// Convert the result of parsing CLI arguments into ripgrep's higher level + /// configuration structure. + fn to_args(self) -> Result { + // We compute these once since they could be large. + let patterns = self.patterns()?; + let matcher = self.matcher(&patterns)?; + let mut paths = self.paths(); + let using_default_path = + if paths.is_empty() { + paths.push(self.path_default()); + true + } else { + false + }; + Ok(Args(Arc::new(ArgsImp { + matches: self, + patterns: patterns, + matcher: matcher, + paths: paths, + using_default_path: using_default_path, + }))) + } +} + +/// High level routines for converting command line arguments into various +/// data structures used by ripgrep. +/// +/// Methods are sorted alphabetically. +impl ArgMatches { + /// Return the matcher that should be used for searching. + /// + /// If there was a problem building the matcher (e.g., a syntax error), + /// then this returns an error. + fn matcher(&self, patterns: &[String]) -> Result { + let matcher = self.matcher_rust(patterns)?; + Ok(PatternMatcher::RustRegex(matcher)) + } + + /// Build a matcher using Rust's regex engine. + /// + /// If there was a problem building the matcher (such as a regex syntax + /// error), then an error is returned. + fn matcher_rust(&self, patterns: &[String]) -> Result { + let mut builder = RegexMatcherBuilder::new(); + builder + .case_smart(self.case_smart()) + .case_insensitive(self.case_insensitive()) + .multi_line(true) + .dot_matches_new_line(false) + .unicode(true) + .octal(false) + .word(self.is_present("word-regexp")); + if !self.is_present("multiline") { + builder.line_terminator(Some(b'\n')); + } + if let Some(limit) = self.regex_size_limit()? { + builder.size_limit(limit); + } + if let Some(limit) = self.dfa_size_limit()? { + builder.dfa_size_limit(limit); + } + Ok(builder.build(&patterns.join("|"))?) + } + + /// Build a JSON printer that writes results to the given writer. + fn printer_json(&self, wtr: W) -> Result> { + let mut builder = JSONBuilder::new(); + builder + .pretty(false) + .max_matches(self.max_count()?) + .always_begin_end(false); + Ok(builder.build(wtr)) + } + + /// Build a Standard printer that writes results to the given writer. + /// + /// The given paths are used to configure aspects of the printer. + /// + /// If `separator_search` is true, then the returned printer will assume + /// the responsibility of printing a separator between each set of + /// search results, when appropriate (e.g., when contexts are enabled). + /// When it's set to false, the caller is responsible for handling + /// separators. + /// + /// In practice, we want the printer to handle it in the single threaded + /// case but not in the multi-threaded case. + fn printer_standard( + &self, + paths: &[PathBuf], + wtr: W, + separator_search: bool, + ) -> Result> { + let mut builder = StandardBuilder::new(); + builder + .color_specs(self.color_specs()?) + .stats(self.stats()) + .heading(self.heading()) + .path(self.with_filename(paths)) + .only_matching(self.is_present("only-matching")) + .per_match(self.is_present("vimgrep")) + .replacement(self.replacement()) + .max_columns(self.max_columns()?) + .max_matches(self.max_count()?) + .column(self.column()) + .byte_offset(self.is_present("byte-offset")) + .trim_ascii(false) + .separator_search(None) + .separator_context(Some(self.context_separator())) + .separator_field_match(b":".to_vec()) + .separator_field_context(b"-".to_vec()) + .separator_path(self.path_separator()?) + .path_terminator(self.path_terminator()); + if separator_search { + builder.separator_search(self.file_separator()?); + } + Ok(builder.build(wtr)) + } + + /// Build a Summary printer that writes results to the given writer. + /// + /// The given paths are used to configure aspects of the printer. + /// + /// This panics if the output format is not `OutputKind::Summary`. + fn printer_summary( + &self, + paths: &[PathBuf], + wtr: W, + ) -> Result> { + let mut builder = SummaryBuilder::new(); + builder + .kind(self.summary_kind().expect("summary format")) + .color_specs(self.color_specs()?) + .stats(self.stats()) + .path(self.with_filename(paths)) + .max_matches(self.max_count()?) + .separator_field(b":".to_vec()) + .separator_path(self.path_separator()?) + .path_terminator(self.path_terminator()); + Ok(builder.build(wtr)) + } + + /// Build a searcher from the command line parameters. + fn searcher(&self, paths: &[PathBuf]) -> Result { + let (ctx_before, ctx_after) = self.contexts()?; + let mut builder = SearcherBuilder::new(); + builder + .line_terminator(LineTerminator::byte(b'\n')) + .invert_match(self.is_present("invert-match")) + .line_number(self.line_number(paths)) + .multi_line(self.is_present("multiline")) + .before_context(ctx_before) + .after_context(ctx_after) + .passthru(self.is_present("passthru")) + .memory_map(self.mmap_choice(paths)) + .binary_detection(self.binary_detection()) + .encoding(self.encoding()?); + Ok(builder.build()) + } + + /// Return a builder for recursively traversing a directory while + /// respecting ignore rules. + /// + /// If there was a problem parsing the CLI arguments necessary for + /// constructing the builder, then this returns an error. + fn walker_builder(&self, paths: &[PathBuf]) -> Result { + let mut builder = WalkBuilder::new(&paths[0]); + for path in &paths[1..] { + builder.add(path); + } + for path in self.ignore_paths() { + if let Some(err) = builder.add_ignore(path) { + ignore_message!("{}", err); + } + } + builder + .max_depth(self.usize_of("max-depth")?) + .follow_links(self.is_present("follow")) + .max_filesize(self.max_file_size()?) + .threads(self.threads()?) + .overrides(self.overrides()?) + .types(self.types()?) + .hidden(!self.hidden()) + .parents(!self.no_ignore_parent()) + .ignore(!self.no_ignore()) + .git_global( + !self.no_ignore() + && !self.no_ignore_vcs() + && !self.no_ignore_global()) + .git_ignore(!self.no_ignore() && !self.no_ignore_vcs()) + .git_exclude(!self.no_ignore() && !self.no_ignore_vcs()); + if !self.no_ignore() { + builder.add_custom_ignore_filename(".rgignore"); + } + if self.is_present("sort-files") { + builder.sort_by_file_name(|a, b| a.cmp(b)); + } + Ok(builder) + } +} + +/// Mid level routines for converting command line arguments into various types +/// of data structures. +/// +/// Methods are sorted alphabetically. +impl ArgMatches { + /// Returns the form of binary detection to perform. + fn binary_detection(&self) -> BinaryDetection { + if self.is_present("text") || self.unrestricted_count() >= 3 { + BinaryDetection::none() + } else { + BinaryDetection::quit(b'\x00') + } + } + + /// Returns true if the command line configuration implies that a match + /// can never be shown. + fn can_never_match(&self, patterns: &[String]) -> bool { + patterns.is_empty() || self.max_count().ok() == Some(Some(0)) + } + + /// Returns true if and only if case should be ignore. + /// + /// If --case-sensitive is present, then case is never ignored, even if + /// --ignore-case is present. + fn case_insensitive(&self) -> bool { + self.is_present("ignore-case") && !self.is_present("case-sensitive") + } + + /// Returns true if and only if smart case has been enabled. + /// + /// If either --ignore-case of --case-sensitive are present, then smart + /// case is disabled. + fn case_smart(&self) -> bool { + self.is_present("smart-case") + && !self.is_present("ignore-case") + && !self.is_present("case-sensitive") + } + + /// Returns the user's color choice based on command line parameters and + /// environment. + fn color_choice(&self) -> ColorChoice { + let preference = match self.value_of_lossy("color") { + None => "auto".to_string(), + Some(v) => v, + }; + if preference == "always" { + ColorChoice::Always + } else if preference == "ansi" { + ColorChoice::AlwaysAnsi + } else if preference == "auto" { + if atty::is(atty::Stream::Stdout) || self.is_present("pretty") { + ColorChoice::Auto + } else { + ColorChoice::Never + } + } else { + ColorChoice::Never + } + } + + /// Returns the color specifications given by the user on the CLI. + /// + /// If the was a problem parsing any of the provided specs, then an error + /// is returned. + fn color_specs(&self) -> Result { + // Start with a default set of color specs. + let mut specs = vec![ + #[cfg(unix)] + "path:fg:magenta".parse().unwrap(), + #[cfg(windows)] + "path:fg:cyan".parse().unwrap(), + "line:fg:green".parse().unwrap(), + "match:fg:red".parse().unwrap(), + "match:style:bold".parse().unwrap(), + ]; + for spec_str in self.values_of_lossy_vec("colors") { + specs.push(spec_str.parse()?); + } + Ok(ColorSpecs::new(&specs)) + } + + /// Returns true if and only if column numbers should be shown. + fn column(&self) -> bool { + if self.is_present("no-column") { + return false; + } + self.is_present("column") || self.is_present("vimgrep") + } + + /// Returns the before and after contexts from the command line. + /// + /// If a context setting was absent, then `0` is returned. + /// + /// If there was a problem parsing the values from the user as an integer, + /// then an error is returned. + fn contexts(&self) -> Result<(usize, usize)> { + let after = self.usize_of("after-context")?.unwrap_or(0); + let before = self.usize_of("before-context")?.unwrap_or(0); + let both = self.usize_of("context")?.unwrap_or(0); + Ok(if both > 0 { + (both, both) + } else { + (before, after) + }) + } + + /// Returns the unescaped context separator in UTF-8 bytes. + /// + /// If one was not provided, the default `--` is returned. + fn context_separator(&self) -> Vec { + match self.value_of_lossy("context-separator") { + None => b"--".to_vec(), + Some(sep) => unescape(&sep), + } + } + + /// Returns whether the -c/--count or the --count-matches flags were + /// passed from the command line. + /// + /// If --count-matches and --invert-match were passed in, behave + /// as if --count and --invert-match were passed in (i.e. rg will + /// count inverted matches as per existing behavior). + fn counts(&self) -> (bool, bool) { + let count = self.is_present("count"); + let count_matches = self.is_present("count-matches"); + let invert_matches = self.is_present("invert-match"); + let only_matching = self.is_present("only-matching"); + if count_matches && invert_matches { + // Treat `-v --count-matches` as `-v -c`. + (true, false) + } else if count && only_matching { + // Treat `-c --only-matching` as `--count-matches`. + (false, true) + } else { + (count, count_matches) + } + } + + /// Parse the dfa-size-limit argument option into a byte count. + fn dfa_size_limit(&self) -> Result> { + let r = self.parse_human_readable_size("dfa-size-limit")?; + u64_to_usize("dfa-size-limit", r) + } + + /// Returns the type of encoding to use. + /// + /// This only returns an encoding if one is explicitly specified. When no + /// encoding is present, the Searcher will still do BOM sniffing for UTF-16 + /// and transcode seamlessly. + fn encoding(&self) -> Result> { + let label = match self.value_of_lossy("encoding") { + None => return Ok(None), + Some(label) => label, + }; + if label == "auto" { + return Ok(None); + } + Ok(Some(Encoding::new(&label)?)) + } + + /// Return the file separator to use based on the CLI configuration. + fn file_separator(&self) -> Result>> { + // File separators are only used for the standard grep-line format. + if self.output_kind() != OutputKind::Standard { + return Ok(None); + } + + let (ctx_before, ctx_after) = self.contexts()?; + Ok(if self.heading() { + Some(b"".to_vec()) + } else if ctx_before > 0 || ctx_after > 0 { + Some(self.context_separator().clone()) + } else { + None + }) + } + + + /// Returns true if and only if matches should be grouped with file name + /// headings. + fn heading(&self) -> bool { + if self.is_present("no-heading") || self.is_present("vimgrep") { + false + } else { + atty::is(atty::Stream::Stdout) + || self.is_present("heading") + || self.is_present("pretty") + } + } + + /// Returns true if and only if hidden files/directories should be + /// searched. + fn hidden(&self) -> bool { + self.is_present("hidden") || self.unrestricted_count() >= 2 + } + + /// Return all of the ignore file paths given on the command line. + fn ignore_paths(&self) -> Vec { + let paths = match self.values_of_os("ignore-file") { + None => return vec![], + Some(paths) => paths, + }; + paths.map(|p| Path::new(p).to_path_buf()).collect() + } + + /// Returns true if and only if ripgrep is invoked in a way where it knows + /// it search exactly one thing. + fn is_one_search(&self, paths: &[PathBuf]) -> bool { + if paths.len() != 1 { + return false; + } + self.is_only_stdin(paths) || paths[0].is_file() + } + + /// Returns true if and only if we're only searching a single thing and + /// that thing is stdin. + fn is_only_stdin(&self, paths: &[PathBuf]) -> bool { + paths == [Path::new("-")] + } + + /// Returns true if and only if we should show line numbers. + fn line_number(&self, paths: &[PathBuf]) -> bool { + if self.output_kind() == OutputKind::Summary { + return false; + } + if self.is_present("no-line-number") { + return false; + } + + // A few things can imply counting line numbers. In particular, we + // generally want to show line numbers by default when printing to a + // tty for human consumption, except for one interesting case: when + // we're only searching stdin. This makes pipelines work as expected. + (atty::is(atty::Stream::Stdout) && !self.is_only_stdin(paths)) + || self.is_present("line-number") + || self.is_present("column") + || self.is_present("pretty") + || self.is_present("vimgrep") + } + + /// The maximum number of columns allowed on each line. + /// + /// If `0` is provided, then this returns `None`. + fn max_columns(&self) -> Result> { + Ok(self.usize_of_nonzero("max-columns")?.map(|n| n as u64)) + } + + /// The maximum number of matches permitted. + fn max_count(&self) -> Result> { + Ok(self.usize_of("max-count")?.map(|n| n as u64)) + } + + /// Parses the max-filesize argument option into a byte count. + fn max_file_size(&self) -> Result> { + self.parse_human_readable_size("max-filesize") + } + + /// Returns whether we should attempt to use memory maps or not. + fn mmap_choice(&self, paths: &[PathBuf]) -> MmapChoice { + // SAFETY: Memory maps are difficult to impossible to encapsulate + // safely in a portable way that doesn't simultaneously negate some of + // the benfits of using memory maps. For ripgrep's use, we never mutate + // a memory map and generally never store the contents of memory map + // in a data structure that depends on immutability. Generally + // speaking, the worst thing that can happen is a SIGBUS (if the + // underlying file is truncated while reading it), which will cause + // ripgrep to abort. + let maybe = unsafe { MmapChoice::auto() }; + let never = MmapChoice::never(); + if self.is_present("no-mmap") { + never + } else if self.is_present("mmap") { + maybe + } else if paths.len() <= 10 && paths.iter().all(|p| p.is_file()) { + // If we're only searching a few paths and all of them are + // files, then memory maps are probably faster. + maybe + } else { + never + } + } + + /// Returns true if ignore files should be ignored. + fn no_ignore(&self) -> bool { + self.is_present("no-ignore") || self.unrestricted_count() >= 1 + } + + /// Returns true if global ignore files should be ignored. + fn no_ignore_global(&self) -> bool { + self.is_present("no-ignore-global") || self.no_ignore() + } + + /// Returns true if parent ignore files should be ignored. + fn no_ignore_parent(&self) -> bool { + self.is_present("no-ignore-parent") || self.no_ignore() + } + + /// Returns true if VCS ignore files should be ignored. + fn no_ignore_vcs(&self) -> bool { + self.is_present("no-ignore-vcs") || self.no_ignore() + } + + /// Determine the type of output we should produce. + fn output_kind(&self) -> OutputKind { + let (count, count_matches) = self.counts(); + let summary = + count + || count_matches + || self.is_present("files-with-matches") + || self.is_present("files-without-match") + || self.is_present("quiet"); + if summary { + OutputKind::Summary + } else { + OutputKind::Standard + } + } + + /// Builds the set of glob overrides from the command line flags. + fn overrides(&self) -> Result { + let mut builder = OverrideBuilder::new(env::current_dir()?); + for glob in self.values_of_lossy_vec("glob") { + builder.add(&glob)?; + } + // This only enables case insensitivity for subsequent globs. + builder.case_insensitive(true)?; + for glob in self.values_of_lossy_vec("iglob") { + builder.add(&glob)?; + } + Ok(builder.build()?) + } + + /// Return all file paths that ripgrep should search. + /// + /// If no paths were given, then this returns an empty list. + fn paths(&self) -> Vec { + let mut paths: Vec = match self.values_of_os("path") { + None => vec![], + Some(paths) => paths.map(|p| Path::new(p).to_path_buf()).collect(), + }; + // If --file, --files or --regexp is given, then the first path is + // always in `pattern`. + if self.is_present("file") + || self.is_present("files") + || self.is_present("regexp") + { + if let Some(path) = self.value_of_os("pattern") { + paths.insert(0, Path::new(path).to_path_buf()); + } + } + paths + } + + /// Return the default path that ripgrep should search. This should only + /// be used when ripgrep is not otherwise given at least one file path + /// as a positional argument. + fn path_default(&self) -> PathBuf { + let file_is_stdin = self.values_of_os("file") + .map_or(false, |mut files| files.any(|f| f == "-")); + let search_cwd = + atty::is(atty::Stream::Stdin) + || !stdin_is_readable() + || (self.is_present("file") && file_is_stdin) + || self.is_present("files") + || self.is_present("type-list"); + if search_cwd { + Path::new("./").to_path_buf() + } else { + Path::new("-").to_path_buf() + } + } + + /// Returns the unescaped path separator as a single byte, if one exists. + /// + /// If the provided path separator is more than a single byte, then an + /// error is returned. + fn path_separator(&self) -> Result> { + let sep = match self.value_of_lossy("path-separator") { + None => return Ok(None), + Some(sep) => unescape(&sep), + }; + if sep.is_empty() { + Ok(None) + } else if sep.len() > 1 { + Err(From::from(format!( + "A path separator must be exactly one byte, but \ + the given separator is {} bytes: {}\n\ + In some shells on Windows '/' is automatically \ + expanded. Use '//' instead.", + sep.len(), + escape(&sep), + ))) + } else { + Ok(Some(sep[0])) + } + } + + /// Returns the byte that should be used to terminate paths. + /// + /// Typically, this is only set to `\x00` when the --null flag is provided, + /// and `None` otherwise. + fn path_terminator(&self) -> Option { + if self.is_present("null") { + Some(b'\x00') + } else { + None + } + } + + /// Get a sequence of all available patterns from the command line. + /// This includes reading the -e/--regexp and -f/--file flags. + /// + /// Note that if -F/--fixed-strings is set, then all patterns will be + /// escaped. If -x/--line-regexp is set, then all patterns are surrounded + /// by `^...$`. Other things, such as --word-regexp, are handled by the + /// regex matcher itself. + /// + /// If any pattern is invalid UTF-8, then an error is returned. + fn patterns(&self) -> Result> { + if self.is_present("files") || self.is_present("type-list") { + return Ok(vec![]); + } + let mut pats = vec![]; + match self.values_of_os("regexp") { + None => { + if self.values_of_os("file").is_none() { + if let Some(os_pat) = self.value_of_os("pattern") { + pats.push(self.pattern_from_os_str(os_pat)?); + } + } + } + Some(os_pats) => { + for os_pat in os_pats { + pats.push(self.pattern_from_os_str(os_pat)?); + } + } + } + if let Some(files) = self.values_of_os("file") { + for file in files { + if file == "-" { + let stdin = io::stdin(); + for line in stdin.lock().lines() { + pats.push(self.pattern_from_str(&line?)); + } + } else { + let f = File::open(file)?; + for line in io::BufReader::new(f).lines() { + pats.push(self.pattern_from_str(&line?)); + } + } + } + } + Ok(pats) + } + + /// Returns a pattern that is guaranteed to produce an empty regular + /// expression that is valid in any position. + fn pattern_empty(&self) -> String { + // This would normally just be an empty string, which works on its + // own, but if the patterns are joined in a set of alternations, then + // you wind up with `foo|`, which is currently invalid in Rust's regex + // engine. + "(?:z{0})*".to_string() + } + + /// Converts an OsStr pattern to a String pattern. The pattern is escaped + /// if -F/--fixed-strings is set. + /// + /// If the pattern is not valid UTF-8, then an error is returned. + fn pattern_from_os_str(&self, pat: &OsStr) -> Result { + let s = pattern_to_str(pat)?; + Ok(self.pattern_from_str(s)) + } + + /// Converts a &str pattern to a String pattern. The pattern is escaped + /// if -F/--fixed-strings is set. + fn pattern_from_str(&self, pat: &str) -> String { + let litpat = self.pattern_literal(pat.to_string()); + let s = self.pattern_line(litpat); + + if s.is_empty() { + self.pattern_empty() + } else { + s + } + } + + /// Returns the given pattern as a line pattern if the -x/--line-regexp + /// flag is set. Otherwise, the pattern is returned unchanged. + fn pattern_line(&self, pat: String) -> String { + if self.is_present("line-regexp") { + format!(r"^(?:{})$", pat) + } else { + pat + } + } + + /// Returns the given pattern as a literal pattern if the + /// -F/--fixed-strings flag is set. Otherwise, the pattern is returned + /// unchanged. + fn pattern_literal(&self, pat: String) -> String { + if self.is_present("fixed-strings") { + regex::escape(&pat) + } else { + pat + } + } + + /// Returns the preprocessor command if one was specified. + fn preprocessor(&self) -> Option { + let path = match self.value_of_os("pre") { + None => return None, + Some(path) => path, + }; + if path.is_empty() { + return None; + } + Some(Path::new(path).to_path_buf()) + } + + /// Parse the regex-size-limit argument option into a byte count. + fn regex_size_limit(&self) -> Result> { + let r = self.parse_human_readable_size("regex-size-limit")?; + u64_to_usize("regex-size-limit", r) + } + + /// Returns the replacement string as UTF-8 bytes if it exists. + fn replacement(&self) -> Option> { + self.value_of_lossy("replace").map(|s| s.into_bytes()) + } + + /// Returns true if and only if aggregate statistics for a search should + /// be tracked. + /// + /// Generally, this is only enabled when explicitly requested by in the + /// command line arguments via the --stats flag, but this can also be + /// enabled implicity via the output format, e.g., for JSON Lines. + fn stats(&self) -> bool { + self.output_kind() == OutputKind::JSON || self.is_present("stats") + } + + /// Returns a handle to stdout for filtering search. + /// + /// A handle is returned if and only if ripgrep's stdout is being + /// redirected to a file. The handle returned corresponds to that file. + /// + /// This can be used to ensure that we do not attempt to search a file + /// that ripgrep is writing to. + fn stdout_handle(&self) -> Option { + let h = match Handle::stdout() { + Err(_) => return None, + Ok(h) => h, + }; + let md = match h.as_file().metadata() { + Err(_) => return None, + Ok(md) => md, + }; + if !md.is_file() { + return None; + } + Some(h) + } + + /// When the output format is `Summary`, this returns the type of summary + /// output to show. + /// + /// This returns `None` if the output format is not `Summary`. + fn summary_kind(&self) -> Option { + let (count, count_matches) = self.counts(); + if self.is_present("quiet") { + Some(SummaryKind::Quiet) + } else if count_matches { + Some(SummaryKind::CountMatches) + } else if count { + Some(SummaryKind::Count) + } else if self.is_present("files-with-matches") { + Some(SummaryKind::PathWithMatch) + } else if self.is_present("files-without-match") { + Some(SummaryKind::PathWithoutMatch) + } else { + None + } + } + + /// Return the number of threads that should be used for parallelism. + fn threads(&self) -> Result { + if self.is_present("sort-files") { + return Ok(1); + } + let threads = self.usize_of("threads")?.unwrap_or(0); + Ok(if threads == 0 { + cmp::min(12, num_cpus::get()) + } else { + threads + }) + } + + /// Builds a file type matcher from the command line flags. + fn types(&self) -> Result { + let mut builder = TypesBuilder::new(); + builder.add_defaults(); + for ty in self.values_of_lossy_vec("type-clear") { + builder.clear(&ty); + } + for def in self.values_of_lossy_vec("type-add") { + builder.add_def(&def)?; + } + for ty in self.values_of_lossy_vec("type") { + builder.select(&ty); + } + for ty in self.values_of_lossy_vec("type-not") { + builder.negate(&ty); + } + builder.build().map_err(From::from) + } + + /// Returns the number of times the `unrestricted` flag is provided. + fn unrestricted_count(&self) -> u64 { + self.occurrences_of("unrestricted") + } + + /// Returns true if and only if file names containing each match should + /// be emitted. + fn with_filename(&self, paths: &[PathBuf]) -> bool { + if self.is_present("no-filename") { + false + } else { + self.is_present("with-filename") + || self.is_present("vimgrep") + || paths.len() > 1 + || paths.get(0).map_or(false, |p| p.is_dir()) + } + } +} + +/// Lower level generic helper methods for teasing values out of clap. +impl ArgMatches { + /// Like values_of_lossy, but returns an empty vec if the flag is not + /// present. + fn values_of_lossy_vec(&self, name: &str) -> Vec { + self.values_of_lossy(name).unwrap_or_else(Vec::new) + } + + /// Safely reads an arg value with the given name, and if it's present, + /// tries to parse it as a usize value. + /// + /// If the number is zero, then it is considered absent and `None` is + /// returned. + fn usize_of_nonzero(&self, name: &str) -> Result> { + let n = match self.usize_of(name)? { + None => return Ok(None), + Some(n) => n, + }; + Ok(if n == 0 { + None + } else { + Some(n) + }) + } + + /// Safely reads an arg value with the given name, and if it's present, + /// tries to parse it as a usize value. + fn usize_of(&self, name: &str) -> Result> { + match self.value_of_lossy(name) { + None => Ok(None), + Some(v) => v.parse().map(Some).map_err(From::from), + } + } + + /// Parses an argument of the form `[0-9]+(KMG)?`. + /// + /// If the aforementioned format is not recognized, then this returns an + /// error. + fn parse_human_readable_size( + &self, + arg_name: &str, + ) -> Result> { + lazy_static! { + static ref RE: Regex = Regex::new(r"^([0-9]+)([KMG])?$").unwrap(); + } + + let arg_value = match self.value_of_lossy(arg_name) { + Some(x) => x, + None => return Ok(None) + }; + let caps = RE + .captures(&arg_value) + .ok_or_else(|| { + format!("invalid format for {}", arg_name) + })?; + + let value = caps[1].parse::()?; + let suffix = caps.get(2).map(|x| x.as_str()); + + let v_10 = value.checked_mul(1024); + let v_20 = v_10.and_then(|x| x.checked_mul(1024)); + let v_30 = v_20.and_then(|x| x.checked_mul(1024)); + let try_suffix = |x: Option| { + if x.is_some() { + Ok(x) + } else { + Err(From::from(format!("number too large for {}", arg_name))) + } + }; + match suffix { + None => Ok(Some(value)), + Some("K") => try_suffix(v_10), + Some("M") => try_suffix(v_20), + Some("G") => try_suffix(v_30), + _ => Err(From::from(format!("invalid suffix for {}", arg_name))) + } + } +} + +/// The following methods mostly dispatch to the underlying clap methods +/// directly. Methods that would otherwise get a single value will fetch all +/// values and return the last one. (Clap returns the first one.) We only +/// define the ones we need. +impl ArgMatches { + fn is_present(&self, name: &str) -> bool { + self.0.is_present(name) + } + + fn occurrences_of(&self, name: &str) -> u64 { + self.0.occurrences_of(name) + } + + fn value_of_lossy(&self, name: &str) -> Option { + self.0.value_of_lossy(name).map(|s| s.into_owned()) + } + + fn values_of_lossy(&self, name: &str) -> Option> { + self.0.values_of_lossy(name) + } + + fn value_of_os(&self, name: &str) -> Option<&OsStr> { + self.0.value_of_os(name) + } + + fn values_of_os(&self, name: &str) -> Option { + self.0.values_of_os(name) + } +} + +/// Convert an OsStr to a Unicode string. +/// +/// Patterns _must_ be valid UTF-8, so if the given OsStr isn't valid UTF-8, +/// this returns an error. +fn pattern_to_str(s: &OsStr) -> Result<&str> { + s.to_str().ok_or_else(|| { + From::from(format!( + "Argument '{}' is not valid UTF-8. \ + Use hex escape sequences to match arbitrary \ + bytes in a pattern (e.g., \\xFF).", + s.to_string_lossy() + )) + }) +} + +/// Convert the result of parsing a human readable file size to a `usize`, +/// failing if the type does not fit. +fn u64_to_usize( + arg_name: &str, + value: Option, +) -> Result> { + use std::usize; + + let value = match value { + None => return Ok(None), + Some(value) => value, + }; + if value <= usize::MAX as u64 { + Ok(Some(value as usize)) + } else { + Err(From::from(format!("number too large for {}", arg_name))) + } +} + +/// Returns true if and only if stdin is deemed searchable. +#[cfg(unix)] +fn stdin_is_readable() -> bool { + use std::os::unix::fs::FileTypeExt; + + let ft = match Handle::stdin().and_then(|h| h.as_file().metadata()) { + Err(_) => return false, + Ok(md) => md.file_type(), + }; + ft.is_file() || ft.is_fifo() +} + +/// Returns true if and only if stdin is deemed searchable. +#[cfg(windows)] +fn stdin_is_readable() -> bool { + // On Windows, it's not clear what the possibilities are to me, so just + // always return true. + true +} diff --git a/src/config.rs b/src/config.rs index c47e6a50..eade0cca 100644 --- a/src/config.rs +++ b/src/config.rs @@ -12,10 +12,7 @@ use std::path::{Path, PathBuf}; use Result; /// Return a sequence of arguments derived from ripgrep rc configuration files. -/// -/// If no_messages is false and there was a problem reading a config file, -/// then errors are printed to stderr. -pub fn args(no_messages: bool) -> Vec { +pub fn args() -> Vec { let config_path = match env::var_os("RIPGREP_CONFIG_PATH") { None => return vec![], Some(config_path) => { @@ -28,20 +25,20 @@ pub fn args(no_messages: bool) -> Vec { let (args, errs) = match parse(&config_path) { Ok((args, errs)) => (args, errs), Err(err) => { - if !no_messages { - eprintln!("{}", err); - } + message!("{}", err); return vec![]; } }; - if !no_messages && !errs.is_empty() { + if !errs.is_empty() { for err in errs { - eprintln!("{}:{}", config_path.display(), err); + message!("{}:{}", config_path.display(), err); } } debug!( "{}: arguments loaded from config file: {:?}", - config_path.display(), args); + config_path.display(), + args + ); args } @@ -59,7 +56,7 @@ fn parse>( let path = path.as_ref(); match File::open(&path) { Ok(file) => parse_reader(file), - Err(err) => errored!("{}: {}", path.display(), err), + Err(err) => Err(From::from(format!("{}: {}", path.display(), err))), } } diff --git a/src/logger.rs b/src/logger.rs index 8bd7e09c..f12f0b19 100644 --- a/src/logger.rs +++ b/src/logger.rs @@ -34,19 +34,30 @@ impl Log for Logger { match (record.file(), record.line()) { (Some(file), Some(line)) => { eprintln!( - "{}/{}/{}:{}: {}", - record.level(), record.target(), - file, line, record.args()); + "{}|{}|{}:{}: {}", + record.level(), + record.target(), + file, + line, + record.args() + ); } (Some(file), None) => { eprintln!( - "{}/{}/{}: {}", - record.level(), record.target(), file, record.args()); + "{}|{}|{}: {}", + record.level(), + record.target(), + file, + record.args() + ); } _ => { eprintln!( - "{}/{}: {}", - record.level(), record.target(), record.args()); + "{}|{}: {}", + record.level(), + record.target(), + record.args() + ); } } } diff --git a/src/main.rs b/src/main.rs index af22373e..41e4dc19 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,3 +1,5 @@ +#![allow(dead_code, unused_imports, unused_mut, unused_variables)] + extern crate atty; extern crate bytecount; #[macro_use] @@ -6,6 +8,7 @@ extern crate encoding_rs; extern crate encoding_rs_io; extern crate globset; extern crate grep; +extern crate grep2; extern crate ignore; #[macro_use] extern crate lazy_static; @@ -39,31 +42,40 @@ macro_rules! errored { } } +#[macro_use] +mod messages; + mod app; mod args; +mod args2; mod config; mod decompressor; mod preprocessor; mod logger; +mod main2; +mod path_printer; mod pathutil; mod printer; +mod search; mod search_buffer; mod search_stream; +mod subject; mod unescape; mod worker; pub type Result = result::Result>; fn main() { - reset_sigpipe(); - match Args::parse().map(Arc::new).and_then(run) { - Ok(0) => process::exit(1), - Ok(_) => process::exit(0), - Err(err) => { - eprintln!("{}", err); - process::exit(2); - } - } + main2::main2(); + // reset_sigpipe(); + // match Args::parse().map(Arc::new).and_then(run) { + // Ok(0) => process::exit(1), + // Ok(_) => process::exit(0), + // Err(err) => { + // eprintln!("{}", err); + // process::exit(2); + // } + // } } fn run(args: Arc) -> Result { @@ -113,8 +125,6 @@ fn run_parallel(args: &Arc) -> Result { result, args.stdout_handle(), args.files(), - args.no_messages(), - args.no_ignore_messages(), ) { None => return Continue, Some(dent) => dent, @@ -145,10 +155,8 @@ fn run_parallel(args: &Arc) -> Result { Continue }) }); - if !args.paths().is_empty() && paths_searched.load(Ordering::SeqCst) == 0 { - if !args.no_messages() { - eprint_nothing_searched(); - } + if paths_searched.load(Ordering::SeqCst) == 0 { + eprint_nothing_searched(); } let match_line_count = match_line_count.load(Ordering::SeqCst) as u64; let paths_searched = paths_searched.load(Ordering::SeqCst) as u64; @@ -176,8 +184,6 @@ fn run_one_thread(args: &Arc) -> Result { result, args.stdout_handle(), args.files(), - args.no_messages(), - args.no_ignore_messages(), ) { None => continue, Some(dent) => dent, @@ -203,10 +209,8 @@ fn run_one_thread(args: &Arc) -> Result { paths_matched += 1; } } - if !args.paths().is_empty() && paths_searched == 0 { - if !args.no_messages() { - eprint_nothing_searched(); - } + if paths_searched == 0 { + eprint_nothing_searched(); } if args.stats() { print_stats( @@ -241,8 +245,6 @@ fn run_files_parallel(args: Arc) -> Result { result, args.stdout_handle(), args.files(), - args.no_messages(), - args.no_ignore_messages(), ) { tx.send(dent).unwrap(); if args.quiet() { @@ -263,8 +265,6 @@ fn run_files_one_thread(args: &Arc) -> Result { result, args.stdout_handle(), args.files(), - args.no_messages(), - args.no_ignore_messages(), ) { None => continue, Some(dent) => dent, @@ -293,21 +293,15 @@ fn get_or_log_dir_entry( result: result::Result, stdout_handle: Option<&same_file::Handle>, files_only: bool, - no_messages: bool, - no_ignore_messages: bool, ) -> Option { match result { Err(err) => { - if !no_messages { - eprintln!("{}", err); - } + message!("{}", err); None } Ok(dent) => { if let Some(err) = dent.error() { - if !no_messages && !no_ignore_messages { - eprintln!("{}", err); - } + ignore_message!("{}", err); } if dent.file_type().is_none() { return Some(dent); // entry is stdin @@ -321,7 +315,7 @@ fn get_or_log_dir_entry( } // If we are redirecting stdout to a file, then don't search that // file. - if !files_only && is_stdout_file(&dent, stdout_handle, no_messages) { + if !files_only && is_stdout_file(&dent, stdout_handle) { return None; } Some(dent) @@ -371,7 +365,6 @@ fn ignore_entry_is_file(dent: &ignore::DirEntry) -> bool { fn is_stdout_file( dent: &ignore::DirEntry, stdout_handle: Option<&same_file::Handle>, - no_messages: bool, ) -> bool { let stdout_handle = match stdout_handle { None => return false, @@ -385,9 +378,7 @@ fn is_stdout_file( match same_file::Handle::from_path(dent.path()) { Ok(h) => stdout_handle == &h, Err(err) => { - if !no_messages { - eprintln!("{}: {}", dent.path().display(), err); - } + message!("{}: {}", dent.path().display(), err); false } } @@ -407,9 +398,10 @@ fn maybe_dent_eq_handle(_: &ignore::DirEntry, _: &same_file::Handle) -> bool { } fn eprint_nothing_searched() { - eprintln!("No files were searched, which means ripgrep probably \ - applied a filter you didn't expect. \ - Try running again with --debug."); + message!( + "No files were searched, which means ripgrep probably \ + applied a filter you didn't expect. \ + Try running again with --debug."); } fn print_stats( diff --git a/src/main2.rs b/src/main2.rs new file mode 100644 index 00000000..4ad18f44 --- /dev/null +++ b/src/main2.rs @@ -0,0 +1,263 @@ +use std::io; +use std::process; +use std::sync::{Arc, Mutex}; +use std::time::Instant; + +use grep2::printer::Stats; +use ignore::WalkState; + +use args2::Args; +use subject::Subject; +use Result; + +pub fn main2() { + match Args::parse().and_then(run) { + Ok(false) => process::exit(1), + Ok(true) => process::exit(0), + Err(err) => { + eprintln!("{}", err); + process::exit(2); + } + } +} + +fn run(args: Args) -> Result { + use args2::Command::*; + + match args.command()? { + Search => search(args), + SearchParallel => search_parallel(args), + SearchNever => Ok(false), + Files => files(args), + FilesParallel => files_parallel(args), + Types => types(args), + } +} + +/// The top-level entry point for single-threaded search. This recursively +/// steps through the file list (current directory by default) and searches +/// each file sequentially. +fn search(args: Args) -> Result { + let started_at = Instant::now(); + let quit_after_match = args.quit_after_match()?; + let subject_builder = args.subject_builder(); + let mut stats = args.stats()?; + let mut searcher = args.search_worker(args.stdout())?; + let mut matched = false; + + for result in args.walker()? { + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, + None => continue, + }; + let search_result = match searcher.search(&subject) { + Ok(search_result) => search_result, + Err(err) => { + // A broken pipe means graceful termination. + if err.kind() == io::ErrorKind::BrokenPipe { + break; + } + message!("{}: {}", subject.path().display(), err); + continue; + } + }; + matched = matched || search_result.has_match(); + if let Some(ref mut stats) = stats { + *stats += search_result.stats().unwrap(); + } + if matched && quit_after_match { + break; + } + } + if let Some(ref stats) = stats { + let elapsed = Instant::now().duration_since(started_at); + // We don't care if we couldn't print this successfully. + let _ = searcher.printer().print_stats(elapsed, stats); + } + Ok(matched) +} + +/// The top-level entry point for multi-threaded search. The parallelism is +/// itself achieved by the recursive directory traversal. All we need to do is +/// feed it a worker for performing a search on each file. +fn search_parallel(args: Args) -> Result { + use std::sync::atomic::AtomicBool; + use std::sync::atomic::Ordering::SeqCst; + + let quit_after_match = args.quit_after_match()?; + let started_at = Instant::now(); + let subject_builder = Arc::new(args.subject_builder()); + let bufwtr = Arc::new(args.buffer_writer()?); + let stats = Arc::new(args.stats()?.map(Mutex::new)); + let matched = Arc::new(AtomicBool::new(false)); + let mut searcher_err = None; + args.walker_parallel()?.run(|| { + let args = args.clone(); + let bufwtr = Arc::clone(&bufwtr); + let stats = Arc::clone(&stats); + let matched = Arc::clone(&matched); + let subject_builder = Arc::clone(&subject_builder); + let mut searcher = match args.search_worker(bufwtr.buffer()) { + Ok(searcher) => searcher, + Err(err) => { + searcher_err = Some(err); + return Box::new(move |_| { + WalkState::Quit + }); + } + }; + + Box::new(move |result| { + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, + None => return WalkState::Continue, + }; + searcher.printer().get_mut().clear(); + let search_result = match searcher.search(&subject) { + Ok(search_result) => search_result, + Err(err) => { + message!("{}: {}", subject.path().display(), err); + return WalkState::Continue; + } + }; + if search_result.has_match() { + matched.store(true, SeqCst); + } + if let Some(ref locked_stats) = *stats { + let mut stats = locked_stats.lock().unwrap(); + *stats += search_result.stats().unwrap(); + } + if let Err(err) = bufwtr.print(searcher.printer().get_mut()) { + // A broken pipe means graceful termination. + if err.kind() == io::ErrorKind::BrokenPipe { + return WalkState::Quit; + } + // Otherwise, we continue on our merry way. + message!("{}: {}", subject.path().display(), err); + } + if matched.load(SeqCst) && quit_after_match { + WalkState::Quit + } else { + WalkState::Continue + } + }) + }); + if let Some(err) = searcher_err.take() { + return Err(err); + } + if let Some(ref locked_stats) = *stats { + let elapsed = Instant::now().duration_since(started_at); + let stats = locked_stats.lock().unwrap(); + let mut searcher = args.search_worker(args.stdout())?; + // We don't care if we couldn't print this successfully. + let _ = searcher.printer().print_stats(elapsed, &stats); + } + Ok(matched.load(SeqCst)) +} + +/// The top-level entry point for listing files without searching them. This +/// recursively steps through the file list (current directory by default) and +/// prints each path sequentially using a single thread. +fn files(args: Args) -> Result { + let quit_after_match = args.quit_after_match()?; + let subject_builder = args.subject_builder(); + let mut matched = false; + let mut path_printer = args.path_printer(args.stdout())?; + for result in args.walker()? { + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, + None => continue, + }; + matched = true; + if quit_after_match { + break; + } + if let Err(err) = path_printer.write_path(subject.path()) { + // A broken pipe means graceful termination. + if err.kind() == io::ErrorKind::BrokenPipe { + break; + } + // Otherwise, we have some other error that's preventing us from + // writing to stdout, so we should bubble it up. + return Err(err.into()); + } + } + Ok(matched) +} + +/// The top-level entry point for listing files without searching them. This +/// recursively steps through the file list (current directory by default) and +/// prints each path sequentially using multiple threads. +fn files_parallel(args: Args) -> Result { + use std::sync::atomic::AtomicBool; + use std::sync::atomic::Ordering::SeqCst; + use std::sync::mpsc; + use std::thread; + + let quit_after_match = args.quit_after_match()?; + let subject_builder = Arc::new(args.subject_builder()); + let mut path_printer = args.path_printer(args.stdout())?; + let matched = Arc::new(AtomicBool::new(false)); + let (tx, rx) = mpsc::channel::(); + + let print_thread = thread::spawn(move || -> io::Result<()> { + for subject in rx.iter() { + path_printer.write_path(subject.path())?; + } + Ok(()) + }); + args.walker_parallel()?.run(|| { + let args = args.clone(); + let subject_builder = Arc::clone(&subject_builder); + let matched = Arc::clone(&matched); + let tx = tx.clone(); + + Box::new(move |result| { + let subject = match subject_builder.build_from_result(result) { + Some(subject) => subject, + None => return WalkState::Continue, + }; + matched.store(true, SeqCst); + if quit_after_match { + WalkState::Quit + } else { + match tx.send(subject) { + Ok(_) => WalkState::Continue, + Err(_) => WalkState::Quit, + } + } + }) + }); + drop(tx); + if let Err(err) = print_thread.join().unwrap() { + // A broken pipe means graceful termination, so fall through. + // Otherwise, something bad happened while writing to stdout, so bubble + // it up. + if err.kind() != io::ErrorKind::BrokenPipe { + return Err(err.into()); + } + } + Ok(matched.load(SeqCst)) +} + +/// The top-level entry point for --type-list. +fn types(args: Args) -> Result { + let mut count = 0; + let mut stdout = args.stdout(); + for def in args.type_defs()? { + count += 1; + stdout.write_all(def.name().as_bytes())?; + stdout.write_all(b": ")?; + + let mut first = true; + for glob in def.globs() { + if !first { + stdout.write_all(b", ")?; + } + stdout.write_all(glob.as_bytes())?; + first = false; + } + stdout.write_all(b"\n")?; + } + Ok(count > 0) +} diff --git a/src/messages.rs b/src/messages.rs new file mode 100644 index 00000000..2016ff64 --- /dev/null +++ b/src/messages.rs @@ -0,0 +1,50 @@ +use std::sync::atomic::{ATOMIC_BOOL_INIT, AtomicBool, Ordering}; + +static MESSAGES: AtomicBool = ATOMIC_BOOL_INIT; +static IGNORE_MESSAGES: AtomicBool = ATOMIC_BOOL_INIT; + +#[macro_export] +macro_rules! message { + ($($tt:tt)*) => { + if ::messages::messages() { + eprintln!($($tt)*); + } + } +} + +#[macro_export] +macro_rules! ignore_message { + ($($tt:tt)*) => { + if ::messages::messages() && ::messages::ignore_messages() { + eprintln!($($tt)*); + } + } +} + +/// Returns true if and only if messages should be shown. +pub fn messages() -> bool { + MESSAGES.load(Ordering::SeqCst) +} + +/// Set whether messages should be shown or not. +/// +/// By default, they are not shown. +pub fn set_messages(yes: bool) { + MESSAGES.store(yes, Ordering::SeqCst) +} + +/// Returns true if and only if "ignore" related messages should be shown. +pub fn ignore_messages() -> bool { + IGNORE_MESSAGES.load(Ordering::SeqCst) +} + +/// Set whether "ignore" related messages should be shown or not. +/// +/// By default, they are not shown. +/// +/// Note that this is overridden if `messages` is disabled. Namely, if +/// `messages` is disabled, then "ignore" messages are never shown, regardless +/// of this setting. +pub fn set_ignore_messages(yes: bool) { + IGNORE_MESSAGES.store(yes, Ordering::SeqCst) +} diff --git a/src/path_printer.rs b/src/path_printer.rs new file mode 100644 index 00000000..96017ff1 --- /dev/null +++ b/src/path_printer.rs @@ -0,0 +1,101 @@ +use std::io; +use std::path::Path; + +use grep2::printer::{ColorSpecs, PrinterPath}; +use termcolor::WriteColor; + +/// A configuration for describing how paths should be written. +#[derive(Clone, Debug)] +struct Config { + colors: ColorSpecs, + separator: Option, + terminator: u8, +} + +impl Default for Config { + fn default() -> Config { + Config { + colors: ColorSpecs::default(), + separator: None, + terminator: b'\n', + } + } +} + +/// A builder for constructing things to search over. +#[derive(Clone, Debug)] +pub struct PathPrinterBuilder { + config: Config, +} + +impl PathPrinterBuilder { + /// Return a new subject builder with a default configuration. + pub fn new() -> PathPrinterBuilder { + PathPrinterBuilder { config: Config::default() } + } + + /// Create a new path printer with the current configuration that writes + /// paths to the given writer. + pub fn build(&self, wtr: W) -> PathPrinter { + PathPrinter { + config: self.config.clone(), + wtr: wtr, + } + } + + /// Set the color specification for this printer. + /// + /// Currently, only the `path` component of the given specification is + /// used. + pub fn color_specs( + &mut self, + specs: ColorSpecs, + ) -> &mut PathPrinterBuilder { + self.config.colors = specs; + self + } + + /// A path separator. + /// + /// When provided, the path's default separator will be replaced with + /// the given separator. + /// + /// This is not set by default, and the system's default path separator + /// will be used. + pub fn separator(&mut self, sep: Option) -> &mut PathPrinterBuilder { + self.config.separator = sep; + self + } + + /// A path terminator. + /// + /// When printing a path, it will be by terminated by the given byte. + /// + /// This is set to `\n` by default. + pub fn terminator(&mut self, terminator: u8) -> &mut PathPrinterBuilder { + self.config.terminator = terminator; + self + } +} + +/// A printer for emitting paths to a writer, with optional color support. +#[derive(Debug)] +pub struct PathPrinter { + config: Config, + wtr: W, +} + +impl PathPrinter { + /// Write the given path to the underlying writer. + pub fn write_path(&mut self, path: &Path) -> io::Result<()> { + let ppath = PrinterPath::with_separator(path, self.config.separator); + if !self.wtr.supports_color() { + self.wtr.write_all(ppath.as_bytes())?; + } else { + self.wtr.set_color(self.config.colors.path())?; + self.wtr.write_all(ppath.as_bytes())?; + self.wtr.reset()?; + } + self.wtr.write_all(&[self.config.terminator]) + } +} diff --git a/src/pathutil.rs b/src/pathutil.rs index 8d1c1510..0b51bfbc 100644 --- a/src/pathutil.rs +++ b/src/pathutil.rs @@ -4,9 +4,6 @@ typically faster than the same operations as provided in `std::path`. In particular, we really want to avoid the costly operation of parsing the path into its constituent components. We give up on Windows, but on Unix, we deal with the raw bytes directly. - -On large repositories (like chromium), this can have a ~25% performance -improvement on just listing the files to search (!). */ use std::path::Path; diff --git a/src/preprocessor.rs b/src/preprocessor.rs index bb464f86..07f66e2d 100644 --- a/src/preprocessor.rs +++ b/src/preprocessor.rs @@ -3,8 +3,6 @@ use std::io::{self, Read}; use std::path::{Path, PathBuf}; use std::process::{self, Stdio}; -use Result; - /// PreprocessorReader provides an `io::Read` impl to read kids output. #[derive(Debug)] pub struct PreprocessorReader { @@ -26,7 +24,7 @@ impl PreprocessorReader { pub fn from_cmd_path( cmd: PathBuf, path: &Path, - ) -> Result { + ) -> io::Result { let child = process::Command::new(&cmd) .arg(path) .stdin(Stdio::from(File::open(path)?)) @@ -34,10 +32,13 @@ impl PreprocessorReader { .stderr(Stdio::piped()) .spawn() .map_err(|err| { - format!( - "error running preprocessor command '{}': {}", - cmd.display(), - err, + io::Error::new( + io::ErrorKind::Other, + format!( + "error running preprocessor command '{}': {}", + cmd.display(), + err, + ), ) })?; Ok(PreprocessorReader { diff --git a/src/search.rs b/src/search.rs new file mode 100644 index 00000000..c27237db --- /dev/null +++ b/src/search.rs @@ -0,0 +1,372 @@ +use std::io; +use std::path::{Path, PathBuf}; +use std::time::Duration; + +use grep2::matcher::Matcher; +use grep2::printer::{JSON, Standard, Summary, Stats}; +use grep2::regex::RegexMatcher; +use grep2::searcher::Searcher; +use termcolor::WriteColor; + +use decompressor::{DecompressionReader, is_compressed}; +use preprocessor::PreprocessorReader; +use subject::Subject; + +/// The configuration for the search worker. Among a few other things, the +/// configuration primarily controls the way we show search results to users +/// at a very high level. +#[derive(Clone, Debug)] +struct Config { + preprocessor: Option, + search_zip: bool, +} + +impl Default for Config { + fn default() -> Config { + Config { + preprocessor: None, + search_zip: false, + } + } +} + +/// A builder for configuring and constructing a search worker. +#[derive(Clone, Debug)] +pub struct SearchWorkerBuilder { + config: Config, +} + +impl Default for SearchWorkerBuilder { + fn default() -> SearchWorkerBuilder { + SearchWorkerBuilder::new() + } +} + +impl SearchWorkerBuilder { + /// Create a new builder for configuring and constructing a search worker. + pub fn new() -> SearchWorkerBuilder { + SearchWorkerBuilder { config: Config::default() } + } + + /// Create a new search worker using the given searcher, matcher and + /// printer. + pub fn build( + &self, + matcher: PatternMatcher, + searcher: Searcher, + printer: Printer, + ) -> SearchWorker { + let config = self.config.clone(); + SearchWorker { config, matcher, searcher, printer } + } + + /// Set the path to a preprocessor command. + /// + /// When this is set, instead of searching files directly, the given + /// command will be run with the file path as the first argument, and the + /// output of that command will be searched instead. + pub fn preprocessor( + &mut self, + cmd: Option, + ) -> &mut SearchWorkerBuilder { + self.config.preprocessor = cmd; + self + } + + /// Enable the decompression and searching of common compressed files. + /// + /// When enabled, if a particular file path is recognized as a compressed + /// file, then it is decompressed before searching. + /// + /// Note that if a preprocessor command is set, then it overrides this + /// setting. + pub fn search_zip(&mut self, yes: bool) -> &mut SearchWorkerBuilder { + self.config.search_zip = yes; + self + } +} + +/// The result of executing a search. +/// +/// Generally speaking, the "result" of a search is sent to a printer, which +/// writes results to an underlying writer such as stdout or a file. However, +/// every search also has some aggregate statistics or meta data that may be +/// useful to higher level routines. +#[derive(Clone, Debug, Default)] +pub struct SearchResult { + has_match: bool, + binary_byte_offset: Option, + stats: Option, +} + +impl SearchResult { + /// Whether the search found a match or not. + pub fn has_match(&self) -> bool { + self.has_match + } + + /// Whether the search found binary data, and if so, the first absolute + /// byte offset at which it was detected. + /// + /// This always returns `None` if binary data detection is disabled, even + /// when binary data is present. + pub fn binary_byte_offset(&self) -> Option { + self.binary_byte_offset + } + + /// Return aggregate search statistics for a single search, if available. + /// + /// It can be expensive to compute statistics, so these are only present + /// if explicitly enabled in the printer provided by the caller. + pub fn stats(&self) -> Option<&Stats> { + self.stats.as_ref() + } +} + +/// The pattern matcher used by a search worker. +#[derive(Clone, Debug)] +pub enum PatternMatcher { + RustRegex(RegexMatcher), +} + +/// The printer used by a search worker. +/// +/// The `W` type parameter refers to the type of the underlying writer. +#[derive(Debug)] +pub enum Printer { + /// Use the standard printer, which supports the classic grep-like format. + Standard(Standard), + /// Use the summary printer, which supports aggregate displays of search + /// results. + Summary(Summary), + /// A JSON printer, which emits results in the JSON Lines format. + JSON(JSON), +} + +impl Printer { + /// Print the given statistics to the underlying writer in a way that is + /// consistent with this printer's format. + /// + /// While `Stats` contains a duration itself, this only corresponds to the + /// time spent searching, where as `total_duration` should roughly + /// approximate the lifespan of the ripgrep process itself. + pub fn print_stats( + &mut self, + total_duration: Duration, + stats: &Stats, + ) -> io::Result<()> { + match *self { + Printer::JSON(_) => unimplemented!(), + Printer::Standard(_) | Printer::Summary(_) => { + self.print_stats_human(total_duration, stats) + } + } + } + + fn print_stats_human( + &mut self, + total_duration: Duration, + stats: &Stats, + ) -> io::Result<()> { + let mut wtr = self.get_mut(); + + write!( + wtr, + " +{matches} matches +{lines} matched lines +{searches_with_match} files contained matches +{searches} files searched +{bytes_printed} bytes printed +{bytes_searched} bytes searched +{search_time:.6} seconds spent searching +{process_time:.6} seconds +", + matches = stats.matches(), + lines = stats.matched_lines(), + searches_with_match = stats.searches_with_match(), + searches = stats.searches(), + bytes_printed = stats.bytes_printed(), + bytes_searched = stats.bytes_searched(), + search_time = fractional_seconds(stats.elapsed()), + process_time = fractional_seconds(total_duration) + ) + } + + /// Return a mutable reference to the underlying printer's writer. + pub fn get_mut(&mut self) -> &mut W { + match *self { + Printer::Standard(ref mut p) => p.get_mut(), + Printer::Summary(ref mut p) => p.get_mut(), + Printer::JSON(ref mut p) => p.get_mut(), + } + } +} + +/// A worker for executing searches. +/// +/// It is intended for a single worker to execute many searches, and is +/// generally intended to be used from a single thread. When searching using +/// multiple threads, it is better to create a new worker for each thread. +#[derive(Debug)] +pub struct SearchWorker { + config: Config, + matcher: PatternMatcher, + searcher: Searcher, + printer: Printer, +} + +impl SearchWorker { + /// Execute a search over the given subject. + pub fn search(&mut self, subject: &Subject) -> io::Result { + self.search_impl(subject) + } + + /// Return a mutable reference to the underlying printer. + pub fn printer(&mut self) -> &mut Printer { + &mut self.printer + } + + /// Search the given subject using the appropriate strategy. + fn search_impl(&mut self, subject: &Subject) -> io::Result { + let path = subject.path(); + if subject.is_stdin() { + let stdin = io::stdin(); + // A `return` here appeases the borrow checker. NLL will fix this. + return self.search_reader(path, stdin.lock()); + } else if self.config.preprocessor.is_some() { + let cmd = self.config.preprocessor.clone().unwrap(); + let rdr = PreprocessorReader::from_cmd_path(cmd, path)?; + self.search_reader(path, rdr) + } else if self.config.search_zip && is_compressed(path) { + match DecompressionReader::from_path(path) { + None => Ok(SearchResult::default()), + Some(rdr) => self.search_reader(path, rdr), + } + } else { + self.search_path(path) + } + } + + /// Search the contents of the given file path. + fn search_path(&mut self, path: &Path) -> io::Result { + use self::PatternMatcher::*; + + let (searcher, printer) = (&mut self.searcher, &mut self.printer); + match self.matcher { + RustRegex(ref m) => search_path(m, searcher, printer, path), + } + } + + /// Executes a search on the given reader, which may or may not correspond + /// directly to the contents of the given file path. Instead, the reader + /// may actually cause something else to be searched (for example, when + /// a preprocessor is set or when decompression is enabled). In those + /// cases, the file path is used for visual purposes only. + /// + /// Generally speaking, this method should only be used when there is no + /// other choice. Searching via `search_path` provides more opportunities + /// for optimizations (such as memory maps). + fn search_reader( + &mut self, + path: &Path, + rdr: R, + ) -> io::Result { + use self::PatternMatcher::*; + + let (searcher, printer) = (&mut self.searcher, &mut self.printer); + match self.matcher { + RustRegex(ref m) => search_reader(m, searcher, printer, path, rdr), + } + } +} + +/// Search the contents of the given file path using the given matcher, +/// searcher and printer. +fn search_path( + matcher: M, + searcher: &mut Searcher, + printer: &mut Printer, + path: &Path, +) -> io::Result { + match *printer { + Printer::Standard(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + binary_byte_offset: sink.binary_byte_offset(), + stats: sink.stats().map(|s| s.clone()), + ..SearchResult::default() + }) + } + Printer::Summary(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + binary_byte_offset: sink.binary_byte_offset(), + stats: sink.stats().map(|s| s.clone()), + ..SearchResult::default() + }) + } + Printer::JSON(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_path(&matcher, path, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + binary_byte_offset: sink.binary_byte_offset(), + stats: Some(sink.stats().clone()), + ..SearchResult::default() + }) + } + } +} + +/// Search the contents of the given reader using the given matcher, searcher +/// and printer. +fn search_reader( + matcher: M, + searcher: &mut Searcher, + printer: &mut Printer, + path: &Path, + rdr: R, +) -> io::Result { + match *printer { + Printer::Standard(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + binary_byte_offset: sink.binary_byte_offset(), + stats: sink.stats().map(|s| s.clone()), + ..SearchResult::default() + }) + } + Printer::Summary(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + binary_byte_offset: sink.binary_byte_offset(), + stats: sink.stats().map(|s| s.clone()), + ..SearchResult::default() + }) + } + Printer::JSON(ref mut p) => { + let mut sink = p.sink_with_path(&matcher, path); + searcher.search_reader(&matcher, rdr, &mut sink)?; + Ok(SearchResult { + has_match: sink.has_match(), + binary_byte_offset: sink.binary_byte_offset(), + stats: Some(sink.stats().clone()), + ..SearchResult::default() + }) + } + } +} + +/// Return the given duration as fractional seconds. +fn fractional_seconds(duration: Duration) -> f64 { + (duration.as_secs() as f64) + (duration.subsec_nanos() as f64 * 1e-9) +} diff --git a/src/subject.rs b/src/subject.rs new file mode 100644 index 00000000..880ad882 --- /dev/null +++ b/src/subject.rs @@ -0,0 +1,231 @@ +use std::io; +use std::path::Path; +use std::sync::Arc; + +use ignore::{self, DirEntry}; +use same_file::Handle; + +/// A configuration for describing how subjects should be built. +#[derive(Clone, Debug)] +struct Config { + skip: Option>, + strip_dot_prefix: bool, + separator: Option, + terminator: Option, +} + +impl Default for Config { + fn default() -> Config { + Config { + skip: None, + strip_dot_prefix: false, + separator: None, + terminator: None, + } + } +} + +/// A builder for constructing things to search over. +#[derive(Clone, Debug)] +pub struct SubjectBuilder { + config: Config, +} + +impl SubjectBuilder { + /// Return a new subject builder with a default configuration. + pub fn new() -> SubjectBuilder { + SubjectBuilder { config: Config::default() } + } + + /// Create a new subject from a possibly missing directory entry. + /// + /// If the directory entry isn't present, then the corresponding error is + /// logged if messages have been configured. Otherwise, if the subject is + /// deemed searchable, then it is returned. + pub fn build_from_result( + &self, + result: Result, + ) -> Option { + match result { + Ok(dent) => self.build(dent), + Err(err) => { + message!("{}", err); + None + } + } + } + + /// Create a new subject using this builder's configuration. + /// + /// If a subject could not be created or should otherwise not be searched, + /// then this returns `None` after emitting any relevant log messages. + pub fn build(&self, dent: DirEntry) -> Option { + let subj = Subject { + dent: dent, + strip_dot_prefix: self.config.strip_dot_prefix, + }; + if let Some(ignore_err) = subj.dent.error() { + ignore_message!("{}", ignore_err); + } + // If this entry represents stdin, then we always search it. + if subj.dent.is_stdin() { + return Some(subj); + } + // If we're supposed to skip a particular file, then skip it. + if let Some(ref handle) = self.config.skip { + match subj.equals(handle) { + Ok(false) => {} // fallthrough + Ok(true) => { + debug!( + "ignoring {}: (probably same file as stdout)", + subj.dent.path().display() + ); + return None; + } + Err(err) => { + message!("{}: {}", subj.dent.path().display(), err); + debug!( + "ignoring {}: got error: {}", + subj.dent.path().display(), err + ); + return None; + } + } + } + // If this subject has a depth of 0, then it was provided explicitly + // by an end user (or via a shell glob). In this case, we always want + // to search it if it even smells like a file (e.g., a symlink). + if subj.dent.depth() == 0 && !subj.is_dir() { + return Some(subj); + } + // At this point, we only want to search something it's explicitly a + // file. This omits symlinks. (If ripgrep was configured to follow + // symlinks, then they have already been followed by the directory + // traversal.) + if subj.is_file() { + return Some(subj); + } + // We got nothin. Emit a debug message, but only if this isn't a + // directory. Otherwise, emitting messages for directories is just + // noisy. + if !subj.is_dir() { + debug!( + "ignoring {}: failed to pass subject filter: \ + file type: {:?}, metadata: {:?}", + subj.dent.path().display(), + subj.dent.file_type(), + subj.dent.metadata() + ); + } + None + } + + /// When provided, subjects that represent the same file as the handle + /// given will be skipped. + /// + /// Typically, it is useful to pass a handle referring to stdout, such + /// that the file being written to isn't searched, which can lead to + /// an unbounded feedback mechanism. + /// + /// Only one handle to skip can be provided. + pub fn skip( + &mut self, + handle: Option, + ) -> &mut SubjectBuilder { + self.config.skip = handle.map(Arc::new); + self + } + + /// When enabled, if the subject's file path starts with `./` then it is + /// stripped. + /// + /// This is useful when implicitly searching the current working directory. + pub fn strip_dot_prefix(&mut self, yes: bool) -> &mut SubjectBuilder { + self.config.strip_dot_prefix = yes; + self + } +} + +/// A subject is a thing we want to search. Generally, a subject is either a +/// file or stdin. +#[derive(Clone, Debug)] +pub struct Subject { + dent: DirEntry, + strip_dot_prefix: bool, +} + +impl Subject { + /// Return the file path corresponding to this subject. + /// + /// If this subject corresponds to stdin, then a special `` path + /// is returned instead. + pub fn path(&self) -> &Path { + if self.strip_dot_prefix && self.dent.path().starts_with("./") { + self.dent.path().strip_prefix("./").unwrap() + } else { + self.dent.path() + } + } + + /// Returns true if and only if this entry corresponds to stdin. + pub fn is_stdin(&self) -> bool { + self.dent.is_stdin() + } + + /// Returns true if and only if this subject points to a directory. + /// + /// This works around a bug in Rust's standard library: + /// https://github.com/rust-lang/rust/issues/46484 + #[cfg(windows)] + fn is_dir(&self) -> bool { + use std::os::windows::fs::MetadataExt; + use winapi::um::winnt::FILE_ATTRIBUTE_DIRECTORY; + + self.dent.metadata().map(|md| { + md.file_attributes() & FILE_ATTRIBUTE_DIRECTORY != 0 + }).unwrap_or(false) + } + + /// Returns true if and only if this subject points to a directory. + #[cfg(not(windows))] + fn is_dir(&self) -> bool { + self.dent.file_type().map_or(false, |ft| ft.is_dir()) + } + + /// Returns true if and only if this subject points to a file. + /// + /// This works around a bug in Rust's standard library: + /// https://github.com/rust-lang/rust/issues/46484 + #[cfg(windows)] + fn is_file(&self) -> bool { + !self.is_dir() + } + + /// Returns true if and only if this subject points to a file. + #[cfg(not(windows))] + fn is_file(&self) -> bool { + self.dent.file_type().map_or(false, |ft| ft.is_file()) + } + + /// Returns true if and only if this subject is believed to be equivalent + /// to the given handle. If there was a problem querying this subject for + /// information to determine equality, then that error is returned. + fn equals(&self, handle: &Handle) -> io::Result { + #[cfg(unix)] + fn never_equal(dent: &DirEntry, handle: &Handle) -> bool { + dent.ino() != Some(handle.ino()) + } + + #[cfg(not(unix))] + fn never_equal(_: &DirEntry, _: &Handle) -> bool { + false + } + + // If we know for sure that these two things aren't equal, then avoid + // the costly extra stat call to determine equality. + if self.dent.is_stdin() || never_equal(&self.dent, handle) { + return Ok(false); + } + Handle::from_path(self.path()).map(|h| &h == handle) + } +} diff --git a/src/worker.rs b/src/worker.rs index 8e840400..cd8b81d2 100644 --- a/src/worker.rs +++ b/src/worker.rs @@ -44,7 +44,6 @@ struct Options { invert_match: bool, line_number: bool, max_count: Option, - no_messages: bool, quiet: bool, text: bool, preprocessor: Option, @@ -67,7 +66,6 @@ impl Default for Options { invert_match: false, line_number: false, max_count: None, - no_messages: false, quiet: false, text: false, search_zip_files: false, @@ -200,14 +198,6 @@ impl WorkerBuilder { self } - /// If enabled, error messages are suppressed. - /// - /// This is disabled by default. - pub fn no_messages(mut self, yes: bool) -> Self { - self.opts.no_messages = yes; - self - } - /// If enabled, don't show any output and quit searching after the first /// match is found. pub fn quiet(mut self, yes: bool) -> Self { @@ -265,9 +255,7 @@ impl Worker { match PreprocessorReader::from_cmd_path(cmd, path) { Ok(reader) => self.search(printer, path, reader), Err(err) => { - if !self.opts.no_messages { - eprintln!("{}", err); - } + message!("{}", err); return 0; } } @@ -284,9 +272,7 @@ impl Worker { let file = match File::open(path) { Ok(file) => file, Err(err) => { - if !self.opts.no_messages { - eprintln!("{}: {}", path.display(), err); - } + message!("{}: {}", path.display(), err); return 0; } }; @@ -306,9 +292,7 @@ impl Worker { count } Err(err) => { - if !self.opts.no_messages { - eprintln!("{}", err); - } + message!("{}", err); 0 } } diff --git a/tests/tests.rs b/tests/tests.rs index 2ddab867..1c40f22e 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -91,8 +91,8 @@ be, to a very large extent, the result of luck. Sherlock Holmes sherlock!(dir, "Sherlock", ".", |wd: WorkDir, mut cmd| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -148,19 +148,19 @@ sherlock!(with_heading_default, "Sherlock", ".", cmd.arg("-j1").arg("--heading"); let lines: String = wd.stdout(&mut cmd); let expected1 = "\ -foo +./foo Sherlock Holmes lives on Baker Street. -sherlock +./sherlock For the Doctor Watsons of this world, as opposed to the Sherlock be, to a very large extent, the result of luck. Sherlock Holmes "; let expected2 = "\ -sherlock +./sherlock For the Doctor Watsons of this world, as opposed to the Sherlock be, to a very large extent, the result of luck. Sherlock Holmes -foo +./foo Sherlock Holmes lives on Baker Street. "; if lines != expected1 { @@ -289,14 +289,14 @@ sherlock!(file_types, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.rs", "Sherlock"); cmd.arg("-t").arg("rust"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.rs:Sherlock\n"); + assert_eq!(lines, "./file.rs:Sherlock\n"); }); sherlock!(file_types_all, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.py", "Sherlock"); cmd.arg("-t").arg("all"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py:Sherlock\n"); + assert_eq!(lines, "./file.py:Sherlock\n"); }); sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { @@ -305,7 +305,7 @@ sherlock!(file_types_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.rs", "Sherlock"); cmd.arg("-T").arg("rust"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py:Sherlock\n"); + assert_eq!(lines, "./file.py:Sherlock\n"); }); sherlock!(file_types_negate_all, "Sherlock", ".", @@ -315,8 +315,8 @@ sherlock!(file_types_negate_all, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); assert_eq!(lines, "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "); }); @@ -333,18 +333,21 @@ sherlock!(file_type_add, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.wat", "Sherlock"); cmd.arg("--type-add").arg("wat:*.wat").arg("-t").arg("wat"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.wat:Sherlock\n"); + assert_eq!(lines, "./file.wat:Sherlock\n"); }); -sherlock!(file_type_add_compose, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { +sherlock!(file_type_add_compose, "Sherlock", ".", +|wd: WorkDir, mut cmd: Command| { wd.create("file.py", "Sherlock"); wd.create("file.rs", "Sherlock"); wd.create("file.wat", "Sherlock"); cmd.arg("--type-add").arg("wat:*.wat"); cmd.arg("--type-add").arg("combo:include:wat,py").arg("-t").arg("combo"); let lines: String = wd.stdout(&mut cmd); - println!("{}", lines); - assert_eq!(sort_lines(&lines), "file.py:Sherlock\nfile.wat:Sherlock\n"); + assert_eq!( + sort_lines(&lines), + "./file.py:Sherlock\n./file.wat:Sherlock\n" + ); }); sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { @@ -352,7 +355,7 @@ sherlock!(glob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.rs", "Sherlock"); cmd.arg("-g").arg("*.rs"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.rs:Sherlock\n"); + assert_eq!(lines, "./file.rs:Sherlock\n"); }); sherlock!(glob_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { @@ -361,14 +364,14 @@ sherlock!(glob_negate, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.rs", "Sherlock"); cmd.arg("-g").arg("!*.rs"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py:Sherlock\n"); + assert_eq!(lines, "./file.py:Sherlock\n"); }); sherlock!(iglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file.HTML", "Sherlock"); cmd.arg("--iglob").arg("*.html"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.HTML:Sherlock\n"); + assert_eq!(lines, "./file.HTML:Sherlock\n"); }); sherlock!(csglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { @@ -376,15 +379,16 @@ sherlock!(csglob, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { wd.create("file2.html", "Sherlock"); cmd.arg("--glob").arg("*.html"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file2.html:Sherlock\n"); + assert_eq!(lines, "./file2.html:Sherlock\n"); }); -sherlock!(byte_offset_only_matching, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { +sherlock!(byte_offset_only_matching, "Sherlock", ".", +|wd: WorkDir, mut cmd: Command| { cmd.arg("-b").arg("-o"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:56:Sherlock -sherlock:177:Sherlock +./sherlock:56:Sherlock +./sherlock:177:Sherlock "; assert_eq!(lines, expected); }); @@ -392,35 +396,35 @@ sherlock:177:Sherlock sherlock!(count, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--count"); let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock:2\n"; + let expected = "./sherlock:2\n"; assert_eq!(lines, expected); }); sherlock!(count_matches, "the", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--count-matches"); let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock:4\n"; + let expected = "./sherlock:4\n"; assert_eq!(lines, expected); }); sherlock!(count_matches_inverted, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--count-matches").arg("--invert-match"); let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock:4\n"; + let expected = "./sherlock:4\n"; assert_eq!(lines, expected); }); sherlock!(count_matches_via_only, "the", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--count").arg("--only-matching"); let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock:4\n"; + let expected = "./sherlock:4\n"; assert_eq!(lines, expected); }); sherlock!(files_with_matches, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--files-with-matches"); let lines: String = wd.stdout(&mut cmd); - let expected = "sherlock\n"; + let expected = "./sherlock\n"; assert_eq!(lines, expected); }); @@ -429,7 +433,7 @@ sherlock!(files_without_matches, "Sherlock", ".", wd.create("file.py", "foo"); cmd.arg("--files-without-match"); let lines: String = wd.stdout(&mut cmd); - let expected = "file.py\n"; + let expected = "./file.py\n"; assert_eq!(lines, expected); }); @@ -527,7 +531,7 @@ sherlock!(max_filesize_parse_no_suffix, "Sherlock", ".", cmd.arg("--max-filesize").arg("50").arg("--files"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -foo +./foo "; assert_eq!(lines, expected); }); @@ -541,7 +545,7 @@ sherlock!(max_filesize_parse_k_suffix, "Sherlock", ".", cmd.arg("--max-filesize").arg("4K").arg("--files"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -foo +./foo "; assert_eq!(lines, expected); }); @@ -555,7 +559,7 @@ sherlock!(max_filesize_parse_m_suffix, "Sherlock", ".", cmd.arg("--max-filesize").arg("1M").arg("--files"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -foo +./foo "; assert_eq!(lines, expected); }); @@ -583,8 +587,8 @@ sherlock!(no_ignore_hidden, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--hidden"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -610,8 +614,8 @@ sherlock!(no_ignore, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--no-ignore"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -653,8 +657,8 @@ sherlock!(ignore_git_parent_stop, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -686,8 +690,8 @@ sherlock!(ignore_git_parent_stop_file, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -740,8 +744,8 @@ sherlock!(no_parent_ignore_git, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -771,8 +775,8 @@ sherlock!(symlink_follow, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./baz/sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./baz/sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, path(expected)); }); @@ -783,8 +787,8 @@ sherlock!(unrestricted1, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -796,8 +800,8 @@ sherlock!(unrestricted2, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./.sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./.sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -807,7 +811,7 @@ sherlock!(unrestricted3, "foo", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-uuu"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file:foo\x00bar\nfile:foo\x00baz\n"); + assert_eq!(lines, "./file:foo\x00bar\n./file:foo\x00baz\n"); }); sherlock!(vimgrep, "Sherlock|Watson", ".", |wd: WorkDir, mut cmd: Command| { @@ -815,10 +819,10 @@ sherlock!(vimgrep, "Sherlock|Watson", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes -sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted, +./sherlock:1:16:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:1:57:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:3:49:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:5:12:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq!(lines, expected); }); @@ -829,10 +833,10 @@ sherlock!(vimgrep_no_line, "Sherlock|Watson", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:16:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:57:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:49:be, to a very large extent, the result of luck. Sherlock Holmes -sherlock:12:but Doctor Watson has to have it taken out for him and dusted, +./sherlock:16:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:57:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:49:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:12:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq!(lines, expected); }); @@ -843,10 +847,10 @@ sherlock!(vimgrep_no_line_no_column, "Sherlock|Watson", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes -sherlock:but Doctor Watson has to have it taken out for him and dusted, +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:but Doctor Watson has to have it taken out for him and dusted, "; assert_eq!(lines, expected); }); @@ -869,12 +873,12 @@ clean!(regression_25, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create("src/llvm/foo", "test"); let lines: String = wd.stdout(&mut cmd); - let expected = path("src/llvm/foo:test\n"); + let expected = path("./src/llvm/foo:test\n"); assert_eq!(lines, expected); cmd.current_dir(wd.path().join("src")); let lines: String = wd.stdout(&mut cmd); - let expected = path("llvm/foo:test\n"); + let expected = path("./llvm/foo:test\n"); assert_eq!(lines, expected); }); @@ -885,7 +889,7 @@ clean!(regression_30, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create("vendor/manifest", "test"); let lines: String = wd.stdout(&mut cmd); - let expected = path("vendor/manifest:test\n"); + let expected = path("./vendor/manifest:test\n"); assert_eq!(lines, expected); }); @@ -927,7 +931,7 @@ clean!(regression_67, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create("dir/bar", "test"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, path("dir/bar:test\n")); + assert_eq!(lines, path("./dir/bar:test\n")); }); // See: https://github.com/BurntSushi/ripgrep/issues/87 @@ -945,7 +949,7 @@ clean!(regression_90, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create(".foo", "test"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, ".foo:test\n"); + assert_eq!(lines, "./.foo:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/93 @@ -954,7 +958,7 @@ clean!(regression_93, r"(\d{1,3}\.){3}\d{1,3}", ".", wd.create("foo", "192.168.1.1"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:192.168.1.1\n"); + assert_eq!(lines, "./foo:192.168.1.1\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/99 @@ -966,7 +970,10 @@ clean!(regression_99, "test", ".", cmd.arg("-j1").arg("--heading"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(sort_lines(&lines), sort_lines("bar\ntest\n\nfoo1\ntest\n")); + assert_eq!( + sort_lines(&lines), + sort_lines("./bar\ntest\n\n./foo1\ntest\n") + ); }); // See: https://github.com/BurntSushi/ripgrep/issues/105 @@ -975,7 +982,7 @@ clean!(regression_105_part1, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--vimgrep"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:1:3:zztest\n"); + assert_eq!(lines, "./foo:1:3:zztest\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/105 @@ -984,7 +991,7 @@ clean!(regression_105_part2, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--column"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:1:3:zztest\n"); + assert_eq!(lines, "./foo:1:3:zztest\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/127 @@ -1009,8 +1016,8 @@ clean!(regression_127, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = format!("\ -{path}:For the Doctor Watsons of this world, as opposed to the Sherlock -{path}:be, to a very large extent, the result of luck. Sherlock Holmes +./{path}:For the Doctor Watsons of this world, as opposed to the Sherlock +./{path}:be, to a very large extent, the result of luck. Sherlock Holmes ", path=path("foo/watson")); assert_eq!(lines, expected); }); @@ -1021,7 +1028,7 @@ clean!(regression_128, "x", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-n"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:5:x\n"); + assert_eq!(lines, "./foo:5:x\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/131 @@ -1049,8 +1056,8 @@ sherlock!(regression_137, "Sherlock", ".", |wd: WorkDir, mut cmd: Command| { let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes sym1:For the Doctor Watsons of this world, as opposed to the Sherlock sym1:be, to a very large extent, the result of luck. Sherlock Holmes sym2:For the Doctor Watsons of this world, as opposed to the Sherlock @@ -1094,11 +1101,11 @@ clean!(regression_184, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create("foo/bar/baz", "test"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, format!("{}:test\n", path("foo/bar/baz"))); + assert_eq!(lines, format!("./{}:test\n", path("foo/bar/baz"))); cmd.current_dir(wd.path().join("./foo/bar")); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "baz:test\n"); + assert_eq!(lines, "./baz:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/199 @@ -1107,7 +1114,7 @@ clean!(regression_199, r"\btest\b", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--smart-case"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:tEsT\n"); + assert_eq!(lines, "./foo:tEsT\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/206 @@ -1117,7 +1124,7 @@ clean!(regression_206, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-g").arg("*.txt"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, format!("{}:test\n", path("foo/bar.txt"))); + assert_eq!(lines, format!("./{}:test\n", path("foo/bar.txt"))); }); // See: https://github.com/BurntSushi/ripgrep/issues/210 @@ -1161,7 +1168,7 @@ clean!(regression_251, "привет", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-i"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:привет\nfoo:Привет\nfoo:ПрИвЕт\n"); + assert_eq!(lines, "./foo:привет\n./foo:Привет\n./foo:ПрИвЕт\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/256 @@ -1205,7 +1212,7 @@ clean!(regression_405, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-g").arg("!/foo/**"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, format!("{}:test\n", path("bar/foo/file2.txt"))); + assert_eq!(lines, format!("./{}:test\n", path("bar/foo/file2.txt"))); }); // See: https://github.com/BurntSushi/ripgrep/issues/428 @@ -1220,7 +1227,7 @@ clean!(regression_428_color_context_path, "foo", ".", let expected = format!( "{colored_path}:foo\n{colored_path}-bar\n", colored_path=format!( - "\x1b\x5b\x30\x6d\x1b\x5b\x33\x35\x6d{path}\x1b\x5b\x30\x6d", + "\x1b\x5b\x30\x6d\x1b\x5b\x33\x35\x6d./{path}\x1b\x5b\x30\x6d", path=path("sherlock"))); assert_eq!(lines, expected); }); @@ -1234,16 +1241,17 @@ clean!(regression_428_unrecognized_style, "Sherlok", ".", let output = cmd.output().unwrap(); let err = String::from_utf8_lossy(&output.stderr); let expected = "\ -Unrecognized style attribute ''. Choose from: nobold, bold, nointense, intense, \ +unrecognized style attribute ''. Choose from: nobold, bold, nointense, intense, \ nounderline, underline. "; assert_eq!(err, expected); }); // See: https://github.com/BurntSushi/ripgrep/issues/493 -clean!(regression_493, " 're ", "input.txt", |wd: WorkDir, mut cmd: Command| { +clean!(regression_493, r"\b 're \b", "input.txt", +|wd: WorkDir, mut cmd: Command| { wd.create("input.txt", "peshwaship 're seminomata"); - cmd.arg("-o").arg("-w"); + cmd.arg("-o"); let lines: String = wd.stdout(&mut cmd); assert_eq!(lines, " 're \n"); @@ -1255,8 +1263,8 @@ sherlock!(regression_553_switch, "sherlock", ".", cmd.arg("-i"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); @@ -1264,8 +1272,8 @@ sherlock:be, to a very large extent, the result of luck. Sherlock Holmes cmd.arg("-i"); let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -1305,12 +1313,9 @@ clean!(regression_599, "^$", "input.txt", |wd: WorkDir, mut cmd: Command| { ]); let lines: String = wd.stdout(&mut cmd); - // Technically, the expected output should only be two lines, but: - // https://github.com/BurntSushi/ripgrep/issues/441 let expected = "\ 1: 2: -4: "; assert_eq!(expected, lines); }); @@ -1326,7 +1331,7 @@ clean!(regression_807, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--hidden"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, format!("{}:test\n", path(".a/c/file"))); + assert_eq!(lines, format!("./{}:test\n", path(".a/c/file"))); }); // See: https://github.com/BurntSushi/ripgrep/issues/900 @@ -1343,7 +1348,7 @@ clean!(feature_1_sjis, "Шерлок Холмс", ".", |wd: WorkDir, mut cmd: Co cmd.arg("-Esjis"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); + assert_eq!(lines, "./foo:Шерлок Холмс\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/1 @@ -1354,7 +1359,7 @@ clean!(feature_1_utf16_auto, "Шерлок Холмс", ".", wd.create_bytes("foo", &sherlock[..]); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); + assert_eq!(lines, "./foo:Шерлок Холмс\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/1 @@ -1366,7 +1371,7 @@ clean!(feature_1_utf16_explicit, "Шерлок Холмс", ".", cmd.arg("-Eutf-16le"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); + assert_eq!(lines, "./foo:Шерлок Холмс\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/1 @@ -1378,7 +1383,7 @@ clean!(feature_1_eucjp, "Шерлок Холмс", ".", cmd.arg("-Eeuc-jp"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:Шерлок Холмс\n"); + assert_eq!(lines, "./foo:Шерлок Холмс\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/1 @@ -1413,8 +1418,8 @@ sherlock!(feature_7_dash, "-f-", ".", |wd: WorkDir, mut cmd: Command| { let output = wd.pipe(&mut cmd, "Sherlock"); let lines = String::from_utf8_lossy(&output.stdout); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -1439,8 +1444,8 @@ sherlock!(feature_34_only_matching, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:Sherlock -sherlock:Sherlock +./sherlock:Sherlock +./sherlock:Sherlock "; assert_eq!(lines, expected); }); @@ -1452,8 +1457,8 @@ sherlock!(feature_34_only_matching_line_column, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:1:57:Sherlock -sherlock:3:49:Sherlock +./sherlock:1:57:Sherlock +./sherlock:3:49:Sherlock "; assert_eq!(lines, expected); }); @@ -1476,15 +1481,15 @@ sherlock!(feature_45_relative_cwd, "test", ".", // First, get a baseline without applying ignore rules. let lines = paths_from_stdout(wd.stdout(&mut cmd)); assert_eq!(lines, paths(&[ - "bar/test", "baz/bar/test", "baz/baz/bar/test", "baz/foo", - "baz/test", "foo", "test", + "./bar/test", "./baz/bar/test", "./baz/baz/bar/test", "./baz/foo", + "./baz/test", "./foo", "./test", ])); // Now try again with the ignore file activated. cmd.arg("--ignore-file").arg(".not-an-ignore"); let lines = paths_from_stdout(wd.stdout(&mut cmd)); assert_eq!(lines, paths(&[ - "baz/bar/test", "baz/baz/bar/test", "baz/test", "test", + "./baz/bar/test", "./baz/baz/bar/test", "./baz/test", "./test", ])); // Now do it again, but inside the baz directory. @@ -1496,7 +1501,7 @@ sherlock!(feature_45_relative_cwd, "test", ".", cmd.arg("test").arg(".").arg("--ignore-file").arg("../.not-an-ignore"); cmd.current_dir(wd.path().join("baz")); let lines = paths_from_stdout(wd.stdout(&mut cmd)); - assert_eq!(lines, paths(&["baz/bar/test", "test"])); + assert_eq!(lines, paths(&["./baz/bar/test", "./test"])); }); // See: https://github.com/BurntSushi/ripgrep/issues/45 @@ -1509,7 +1514,7 @@ sherlock!(feature_45_precedence_with_others, "test", ".", cmd.arg("--ignore-file").arg(".not-an-ignore"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "imp.log:test\n"); + assert_eq!(lines, "./imp.log:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/45 @@ -1523,7 +1528,7 @@ sherlock!(feature_45_precedence_internal, "test", ".", cmd.arg("--ignore-file").arg(".not-an-ignore1"); cmd.arg("--ignore-file").arg(".not-an-ignore2"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "imp.log:test\n"); + assert_eq!(lines, "./imp.log:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/68 @@ -1535,7 +1540,7 @@ clean!(feature_68_no_ignore_vcs, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--no-ignore-vcs"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:test\n"); + assert_eq!(lines, "./foo:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/70 @@ -1545,8 +1550,8 @@ sherlock!(feature_70_smart_case, "sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock:be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock:be, to a very large extent, the result of luck. Sherlock Holmes "; assert_eq!(lines, expected); }); @@ -1557,7 +1562,7 @@ sherlock!(feature_89_files_with_matches, "Sherlock", ".", cmd.arg("--null").arg("--files-with-matches"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x00"); + assert_eq!(lines, "./sherlock\x00"); }); // See: https://github.com/BurntSushi/ripgrep/issues/89 @@ -1567,7 +1572,7 @@ sherlock!(feature_89_files_without_matches, "Sherlock", ".", cmd.arg("--null").arg("--files-without-match"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "file.py\x00"); + assert_eq!(lines, "./file.py\x00"); }); // See: https://github.com/BurntSushi/ripgrep/issues/89 @@ -1576,7 +1581,7 @@ sherlock!(feature_89_count, "Sherlock", ".", cmd.arg("--null").arg("--count"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x002\n"); + assert_eq!(lines, "./sherlock\x002\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/89 @@ -1585,7 +1590,7 @@ sherlock!(feature_89_files, "NADA", ".", cmd.arg("--null").arg("--files"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x00"); + assert_eq!(lines, "./sherlock\x00"); }); // See: https://github.com/BurntSushi/ripgrep/issues/89 @@ -1595,10 +1600,10 @@ sherlock!(feature_89_match, "Sherlock", ".", let lines: String = wd.stdout(&mut cmd); let expected = "\ -sherlock\x00For the Doctor Watsons of this world, as opposed to the Sherlock -sherlock\x00Holmeses, success in the province of detective work must always -sherlock\x00be, to a very large extent, the result of luck. Sherlock Holmes -sherlock\x00can extract a clew from a wisp of straw or a flake of cigar ash; +./sherlock\x00For the Doctor Watsons of this world, as opposed to the Sherlock +./sherlock\x00Holmeses, success in the province of detective work must always +./sherlock\x00be, to a very large extent, the result of luck. Sherlock Holmes +./sherlock\x00can extract a clew from a wisp of straw or a flake of cigar ash; "; assert_eq!(lines, expected); }); @@ -1613,7 +1618,7 @@ clean!(feature_109_max_depth, "far", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--maxdepth").arg("2"); let lines: String = wd.stdout(&mut cmd); - let expected = path("one/pass:far\n"); + let expected = path("./one/pass:far\n"); assert_eq!(lines, expected); }); @@ -1639,7 +1644,7 @@ clean!(feature_129_matches, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-M26"); let lines: String = wd.stdout(&mut cmd); - let expected = "foo:test\nfoo:[Omitted long line with 2 matches]\n"; + let expected = "./foo:test\n./foo:[Omitted long matching line]\n"; assert_eq!(lines, expected); }); @@ -1649,7 +1654,7 @@ clean!(feature_129_context, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-M20").arg("-C1"); let lines: String = wd.stdout(&mut cmd); - let expected = "foo:test\nfoo-[Omitted long context line]\n"; + let expected = "./foo:test\n./foo-[Omitted long context line]\n"; assert_eq!(lines, expected); }); @@ -1659,7 +1664,7 @@ clean!(feature_129_replace, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("-M26").arg("-rfoo"); let lines: String = wd.stdout(&mut cmd); - let expected = "foo:foo\nfoo:[Omitted long line with 2 replacements]\n"; + let expected = "./foo:foo\n./foo:[Omitted long line with 2 matches]\n"; assert_eq!(lines, expected); }); @@ -1668,7 +1673,7 @@ clean!(feature_159_works, "test", ".", |wd: WorkDir, mut cmd: Command| { wd.create("foo", "test\ntest"); cmd.arg("-m1"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:test\n"); + assert_eq!(lines, "./foo:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/159 @@ -1684,7 +1689,7 @@ clean!(feature_243_column_line, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--column"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "foo:1:1:test\n"); + assert_eq!(lines, "./foo:1:1:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/263 @@ -1696,7 +1701,7 @@ clean!(feature_263_sort_files, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--sort-files"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "abc:test\nbar:test\nfoo:test\nzoo:test\n"); + assert_eq!(lines, "./abc:test\n./bar:test\n./foo:test\n./zoo:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/275 @@ -1706,7 +1711,7 @@ clean!(feature_275_pathsep, "test", ".", |wd: WorkDir, mut cmd: Command| { cmd.arg("--path-separator").arg("Z"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "fooZbar:test\n"); + assert_eq!(lines, ".ZfooZbar:test\n"); }); // See: https://github.com/BurntSushi/ripgrep/issues/362 @@ -1746,7 +1751,7 @@ sherlock!(feature_419_zero_as_shortcut_for_null, "Sherlock", ".", cmd.arg("-0").arg("--count"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, "sherlock\x002\n"); + assert_eq!(lines, "./sherlock\x002\n"); }); #[test] @@ -1932,59 +1937,52 @@ fn feature_411_parallel_search_stats() { assert_eq!(lines.contains("seconds"), true); } -sherlock!(feature_411_ignore_stats_1, |wd: WorkDir, mut cmd: Command| { - cmd.arg("--files-with-matches"); - cmd.arg("--stats"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines.contains("seconds"), false); -}); - -sherlock!(feature_411_ignore_stats_2, |wd: WorkDir, mut cmd: Command| { - cmd.arg("--files-without-match"); - cmd.arg("--stats"); - - let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines.contains("seconds"), false); -}); - #[test] fn feature_740_passthru() { let wd = WorkDir::new("feature_740"); wd.create("file", "\nfoo\nbar\nfoobar\n\nbaz\n"); - wd.create("patterns", "foo\n\nbar\n"); + wd.create("patterns", "foo\nbar\n"); // We can't assume that the way colour specs are translated to ANSI // sequences will remain stable, and --replace doesn't currently work with // pass-through, so for now we don't actually test the match sub-strings let common_args = &["-n", "--passthru"]; - let expected = "\ -1: + let foo_expected = "\ +1- 2:foo -3:bar +3-bar 4:foobar -5: -6:baz +5- +6-baz "; // With single pattern let mut cmd = wd.command(); cmd.args(common_args).arg("foo").arg("file"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, expected); + assert_eq!(lines, foo_expected); + + let foo_bar_expected = "\ +1- +2:foo +3:bar +4:foobar +5- +6-baz +"; // With multiple -e patterns let mut cmd = wd.command(); cmd.args(common_args) .arg("-e").arg("foo").arg("-e").arg("bar").arg("file"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, expected); + assert_eq!(lines, foo_bar_expected); // With multiple -f patterns let mut cmd = wd.command(); cmd.args(common_args).arg("-f").arg("patterns").arg("file"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, expected); + assert_eq!(lines, foo_bar_expected); // -c should override let mut cmd = wd.command(); @@ -1992,15 +1990,35 @@ fn feature_740_passthru() { let lines: String = wd.stdout(&mut cmd); assert_eq!(lines, "2\n"); + let only_foo_expected = "\ +1- +2:foo +3-bar +4:foo +5- +6-baz +"; + // -o should conflict let mut cmd = wd.command(); cmd.args(common_args).arg("-o").arg("foo").arg("file"); - wd.assert_err(&mut cmd); + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines, only_foo_expected); + + let replace_foo_expected = "\ +1- +2:wat +3-bar +4:watbar +5- +6-baz +"; // -r should conflict let mut cmd = wd.command(); - cmd.args(common_args).arg("-r").arg("$0").arg("foo").arg("file"); - wd.assert_err(&mut cmd); + cmd.args(common_args).arg("-r").arg("wat").arg("foo").arg("file"); + let lines: String = wd.stdout(&mut cmd); + assert_eq!(lines, replace_foo_expected); } #[test] @@ -2081,7 +2099,7 @@ fn regression_270() { let mut cmd = wd.command(); cmd.arg("-e").arg("-test").arg("./"); let lines: String = wd.stdout(&mut cmd); - assert_eq!(lines, path("foo:-test\n")); + assert_eq!(lines, path("./foo:-test\n")); } // See: https://github.com/BurntSushi/ripgrep/issues/391 @@ -2232,8 +2250,8 @@ fn regression_693_context_option_in_contextless_mode() { let lines: String = wd.stdout(&mut cmd); let expected = "\ -bar:1 -foo:1 +./bar:1 +./foo:1 "; assert_eq!(lines, expected); }