mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2024-12-12 19:18:24 +02:00
9ed7565fcb
Basically, unless the -a/--text flag is given, it is generally always an error to search for an explicit NUL byte because the binary detection will prevent it from matching. Fixes #1838
1457 lines
55 KiB
Rust
1457 lines
55 KiB
Rust
/*!
|
|
Provides the definition of high level arguments from CLI flags.
|
|
*/
|
|
|
|
use std::{
|
|
collections::HashSet,
|
|
path::{Path, PathBuf},
|
|
};
|
|
|
|
use {
|
|
bstr::BString,
|
|
grep::printer::{ColorSpecs, SummaryKind},
|
|
};
|
|
|
|
use crate::{
|
|
flags::lowargs::{
|
|
BinaryMode, BoundaryMode, BufferMode, CaseMode, ColorChoice,
|
|
ContextMode, ContextSeparator, EncodingMode, EngineChoice,
|
|
FieldContextSeparator, FieldMatchSeparator, LowArgs, MmapMode, Mode,
|
|
PatternSource, SearchMode, SortMode, SortModeKind, TypeChange,
|
|
},
|
|
haystack::{Haystack, HaystackBuilder},
|
|
search::{PatternMatcher, Printer, SearchWorker, SearchWorkerBuilder},
|
|
};
|
|
|
|
/// A high level representation of CLI arguments.
|
|
///
|
|
/// The distinction between low and high level arguments is somewhat arbitrary
|
|
/// and wishy washy. The main idea here is that high level arguments generally
|
|
/// require all of CLI parsing to be finished. For example, one cannot
|
|
/// construct a glob matcher until all of the glob patterns are known.
|
|
///
|
|
/// So while low level arguments are collected during parsing itself, high
|
|
/// level arguments aren't created until parsing has completely finished.
|
|
#[derive(Debug)]
|
|
pub(crate) struct HiArgs {
|
|
binary: BinaryDetection,
|
|
boundary: Option<BoundaryMode>,
|
|
buffer: BufferMode,
|
|
byte_offset: bool,
|
|
case: CaseMode,
|
|
color: ColorChoice,
|
|
colors: grep::printer::ColorSpecs,
|
|
column: bool,
|
|
context: ContextMode,
|
|
context_separator: ContextSeparator,
|
|
crlf: bool,
|
|
dfa_size_limit: Option<usize>,
|
|
encoding: EncodingMode,
|
|
engine: EngineChoice,
|
|
field_context_separator: FieldContextSeparator,
|
|
field_match_separator: FieldMatchSeparator,
|
|
file_separator: Option<Vec<u8>>,
|
|
fixed_strings: bool,
|
|
follow: bool,
|
|
globs: ignore::overrides::Override,
|
|
heading: bool,
|
|
hidden: bool,
|
|
hyperlink_config: grep::printer::HyperlinkConfig,
|
|
ignore_file_case_insensitive: bool,
|
|
ignore_file: Vec<PathBuf>,
|
|
include_zero: bool,
|
|
invert_match: bool,
|
|
is_terminal_stdout: bool,
|
|
line_number: bool,
|
|
max_columns: Option<u64>,
|
|
max_columns_preview: bool,
|
|
max_count: Option<u64>,
|
|
max_depth: Option<usize>,
|
|
max_filesize: Option<u64>,
|
|
mmap_choice: grep::searcher::MmapChoice,
|
|
mode: Mode,
|
|
multiline: bool,
|
|
multiline_dotall: bool,
|
|
no_ignore_dot: bool,
|
|
no_ignore_exclude: bool,
|
|
no_ignore_files: bool,
|
|
no_ignore_global: bool,
|
|
no_ignore_parent: bool,
|
|
no_ignore_vcs: bool,
|
|
no_require_git: bool,
|
|
no_unicode: bool,
|
|
null_data: bool,
|
|
one_file_system: bool,
|
|
only_matching: bool,
|
|
path_separator: Option<u8>,
|
|
paths: Paths,
|
|
path_terminator: Option<u8>,
|
|
patterns: Patterns,
|
|
pre: Option<PathBuf>,
|
|
pre_globs: ignore::overrides::Override,
|
|
quiet: bool,
|
|
quit_after_match: bool,
|
|
regex_size_limit: Option<usize>,
|
|
replace: Option<BString>,
|
|
search_zip: bool,
|
|
sort: Option<SortMode>,
|
|
stats: Option<grep::printer::Stats>,
|
|
stop_on_nonmatch: bool,
|
|
threads: usize,
|
|
trim: bool,
|
|
types: ignore::types::Types,
|
|
vimgrep: bool,
|
|
with_filename: bool,
|
|
}
|
|
|
|
impl HiArgs {
|
|
/// Convert low level arguments into high level arguments.
|
|
///
|
|
/// This process can fail for a variety of reasons. For example, invalid
|
|
/// globs or some kind of environment issue.
|
|
pub(crate) fn from_low_args(mut low: LowArgs) -> anyhow::Result<HiArgs> {
|
|
// Callers should not be trying to convert low-level arguments when
|
|
// a short-circuiting special mode is present.
|
|
assert_eq!(None, low.special, "special mode demands short-circuiting");
|
|
// If the sorting mode isn't supported, then we bail loudly. I'm not
|
|
// sure if this is the right thing to do. We could silently "not sort"
|
|
// as well. If we wanted to go that route, then we could just set
|
|
// `low.sort = None` if `supported()` returns an error.
|
|
if let Some(ref sort) = low.sort {
|
|
sort.supported()?;
|
|
}
|
|
|
|
// We modify the mode in-place on `low` so that subsequent conversions
|
|
// see the correct mode.
|
|
match low.mode {
|
|
Mode::Search(ref mut mode) => match *mode {
|
|
// treat `-v --count-matches` as `-v --count`
|
|
SearchMode::CountMatches if low.invert_match => {
|
|
*mode = SearchMode::Count;
|
|
}
|
|
// treat `-o --count` as `--count-matches`
|
|
SearchMode::Count if low.only_matching => {
|
|
*mode = SearchMode::CountMatches;
|
|
}
|
|
_ => {}
|
|
},
|
|
_ => {}
|
|
}
|
|
|
|
let mut state = State::new()?;
|
|
let patterns = Patterns::from_low_args(&mut state, &mut low)?;
|
|
let paths = Paths::from_low_args(&mut state, &patterns, &mut low)?;
|
|
|
|
let binary = BinaryDetection::from_low_args(&state, &low);
|
|
let colors = take_color_specs(&mut state, &mut low);
|
|
let hyperlink_config = take_hyperlink_config(&mut state, &mut low)?;
|
|
let stats = stats(&low);
|
|
let types = types(&low)?;
|
|
let globs = globs(&state, &low)?;
|
|
let pre_globs = preprocessor_globs(&state, &low)?;
|
|
|
|
let color = match low.color {
|
|
ColorChoice::Auto if !state.is_terminal_stdout => {
|
|
ColorChoice::Never
|
|
}
|
|
_ => low.color,
|
|
};
|
|
let column = low.column.unwrap_or(low.vimgrep);
|
|
let heading = match low.heading {
|
|
None => !low.vimgrep && state.is_terminal_stdout,
|
|
Some(false) => false,
|
|
Some(true) => !low.vimgrep,
|
|
};
|
|
let path_terminator = if low.null { Some(b'\x00') } else { None };
|
|
let quit_after_match = stats.is_none() && low.quiet;
|
|
let threads = if low.sort.is_some() || paths.is_one_file {
|
|
1
|
|
} else if let Some(threads) = low.threads {
|
|
threads
|
|
} else {
|
|
std::thread::available_parallelism().map_or(1, |n| n.get()).min(12)
|
|
};
|
|
log::debug!("using {threads} thread(s)");
|
|
let with_filename = low
|
|
.with_filename
|
|
.unwrap_or_else(|| low.vimgrep || !paths.is_one_file);
|
|
|
|
let file_separator = match low.mode {
|
|
Mode::Search(SearchMode::Standard) => {
|
|
if heading {
|
|
Some(b"".to_vec())
|
|
} else if let ContextMode::Limited(ref limited) = low.context {
|
|
let (before, after) = limited.get();
|
|
if before > 0 || after > 0 {
|
|
low.context_separator.clone().into_bytes()
|
|
} else {
|
|
None
|
|
}
|
|
} else {
|
|
None
|
|
}
|
|
}
|
|
_ => None,
|
|
};
|
|
|
|
let line_number = low.line_number.unwrap_or_else(|| {
|
|
if low.quiet {
|
|
return false;
|
|
}
|
|
let Mode::Search(ref search_mode) = low.mode else { return false };
|
|
match *search_mode {
|
|
SearchMode::FilesWithMatches
|
|
| SearchMode::FilesWithoutMatch
|
|
| SearchMode::Count
|
|
| SearchMode::CountMatches => return false,
|
|
SearchMode::JSON => return true,
|
|
SearchMode::Standard => {
|
|
// A few things can imply counting line numbers. In
|
|
// particular, we generally want to show line numbers by
|
|
// default when printing to a tty for human consumption,
|
|
// except for one interesting case: when we're only
|
|
// searching stdin. This makes pipelines work as expected.
|
|
(state.is_terminal_stdout && !paths.is_only_stdin())
|
|
|| column
|
|
|| low.vimgrep
|
|
}
|
|
}
|
|
});
|
|
|
|
let mmap_choice = {
|
|
// SAFETY: Memory maps are difficult to impossible to encapsulate
|
|
// safely in a portable way that doesn't simultaneously negate some
|
|
// of the benfits of using memory maps. For ripgrep's use, we never
|
|
// mutate a memory map and generally never store the contents of
|
|
// memory map in a data structure that depends on immutability.
|
|
// Generally speaking, the worst thing that can happen is a SIGBUS
|
|
// (if the underlying file is truncated while reading it), which
|
|
// will cause ripgrep to abort. This reasoning should be treated as
|
|
// suspect.
|
|
let maybe = unsafe { grep::searcher::MmapChoice::auto() };
|
|
let never = grep::searcher::MmapChoice::never();
|
|
match low.mmap {
|
|
MmapMode::Auto => {
|
|
if paths.paths.len() <= 10
|
|
&& paths.paths.iter().all(|p| p.is_file())
|
|
{
|
|
// If we're only searching a few paths and all of them
|
|
// are files, then memory maps are probably faster.
|
|
maybe
|
|
} else {
|
|
never
|
|
}
|
|
}
|
|
MmapMode::AlwaysTryMmap => maybe,
|
|
MmapMode::Never => never,
|
|
}
|
|
};
|
|
|
|
Ok(HiArgs {
|
|
mode: low.mode,
|
|
patterns,
|
|
paths,
|
|
binary,
|
|
boundary: low.boundary,
|
|
buffer: low.buffer,
|
|
byte_offset: low.byte_offset,
|
|
case: low.case,
|
|
color,
|
|
colors,
|
|
column,
|
|
context: low.context,
|
|
context_separator: low.context_separator,
|
|
crlf: low.crlf,
|
|
dfa_size_limit: low.dfa_size_limit,
|
|
encoding: low.encoding,
|
|
engine: low.engine,
|
|
field_context_separator: low.field_context_separator,
|
|
field_match_separator: low.field_match_separator,
|
|
file_separator,
|
|
fixed_strings: low.fixed_strings,
|
|
follow: low.follow,
|
|
heading,
|
|
hidden: low.hidden,
|
|
hyperlink_config,
|
|
ignore_file: low.ignore_file,
|
|
ignore_file_case_insensitive: low.ignore_file_case_insensitive,
|
|
include_zero: low.include_zero,
|
|
invert_match: low.invert_match,
|
|
is_terminal_stdout: state.is_terminal_stdout,
|
|
line_number,
|
|
max_columns: low.max_columns,
|
|
max_columns_preview: low.max_columns_preview,
|
|
max_count: low.max_count,
|
|
max_depth: low.max_depth,
|
|
max_filesize: low.max_filesize,
|
|
mmap_choice,
|
|
multiline: low.multiline,
|
|
multiline_dotall: low.multiline_dotall,
|
|
no_ignore_dot: low.no_ignore_dot,
|
|
no_ignore_exclude: low.no_ignore_exclude,
|
|
no_ignore_files: low.no_ignore_files,
|
|
no_ignore_global: low.no_ignore_global,
|
|
no_ignore_parent: low.no_ignore_parent,
|
|
no_ignore_vcs: low.no_ignore_vcs,
|
|
no_require_git: low.no_require_git,
|
|
no_unicode: low.no_unicode,
|
|
null_data: low.null_data,
|
|
one_file_system: low.one_file_system,
|
|
only_matching: low.only_matching,
|
|
globs,
|
|
path_separator: low.path_separator,
|
|
path_terminator,
|
|
pre: low.pre,
|
|
pre_globs,
|
|
quiet: low.quiet,
|
|
quit_after_match,
|
|
regex_size_limit: low.regex_size_limit,
|
|
replace: low.replace,
|
|
search_zip: low.search_zip,
|
|
sort: low.sort,
|
|
stats,
|
|
stop_on_nonmatch: low.stop_on_nonmatch,
|
|
threads,
|
|
trim: low.trim,
|
|
types,
|
|
vimgrep: low.vimgrep,
|
|
with_filename,
|
|
})
|
|
}
|
|
|
|
/// Returns a writer for printing buffers to stdout.
|
|
///
|
|
/// This is intended to be used from multiple threads. Namely, a buffer
|
|
/// writer can create new buffers that are sent to threads. Threads can
|
|
/// then independently write to the buffers. Once a unit of work is
|
|
/// complete, a buffer can be given to the buffer writer to write to
|
|
/// stdout.
|
|
pub(crate) fn buffer_writer(&self) -> termcolor::BufferWriter {
|
|
let mut wtr =
|
|
termcolor::BufferWriter::stdout(self.color.to_termcolor());
|
|
wtr.separator(self.file_separator.clone());
|
|
wtr
|
|
}
|
|
|
|
/// Returns true when ripgrep had to guess to search the current working
|
|
/// directory. That is, it's true when ripgrep is called without any file
|
|
/// paths or directories to search.
|
|
///
|
|
/// Other than changing how file paths are printed (i.e., without the
|
|
/// leading `./`), it's also useful to know for diagnostic reasons. For
|
|
/// example, ripgrep will print an error message when nothing is searched
|
|
/// since it's possible the ignore rules in play are too aggressive. But
|
|
/// this warning is only emitted when ripgrep was called without any
|
|
/// explicit file paths since otherwise the warning would likely be too
|
|
/// aggressive.
|
|
pub(crate) fn has_implicit_path(&self) -> bool {
|
|
self.paths.has_implicit_path
|
|
}
|
|
|
|
/// Return a properly configured builder for constructing haystacks.
|
|
///
|
|
/// The builder can be used to turn a directory entry (from the `ignore`
|
|
/// crate) into something that can be searched.
|
|
pub(crate) fn haystack_builder(&self) -> HaystackBuilder {
|
|
let mut builder = HaystackBuilder::new();
|
|
builder.strip_dot_prefix(self.paths.has_implicit_path);
|
|
builder
|
|
}
|
|
|
|
/// Return the matcher that should be used for searching using the engine
|
|
/// choice made by the user.
|
|
///
|
|
/// If there was a problem building the matcher (e.g., a syntax error),
|
|
/// then this returns an error.
|
|
pub(crate) fn matcher(&self) -> anyhow::Result<PatternMatcher> {
|
|
match self.engine {
|
|
EngineChoice::Default => match self.matcher_rust() {
|
|
Ok(m) => Ok(m),
|
|
Err(err) => {
|
|
anyhow::bail!(suggest_other_engine(err.to_string()));
|
|
}
|
|
},
|
|
EngineChoice::PCRE2 => Ok(self.matcher_pcre2()?),
|
|
EngineChoice::Auto => {
|
|
let rust_err = match self.matcher_rust() {
|
|
Ok(m) => return Ok(m),
|
|
Err(err) => err,
|
|
};
|
|
log::debug!(
|
|
"error building Rust regex in hybrid mode:\n{rust_err}",
|
|
);
|
|
|
|
let pcre_err = match self.matcher_pcre2() {
|
|
Ok(m) => return Ok(m),
|
|
Err(err) => err,
|
|
};
|
|
let divider = "~".repeat(79);
|
|
anyhow::bail!(
|
|
"regex could not be compiled with either the default \
|
|
regex engine or with PCRE2.\n\n\
|
|
default regex engine error:\n\
|
|
{divider}\n\
|
|
{rust_err}\n\
|
|
{divider}\n\n\
|
|
PCRE2 regex engine error:\n{pcre_err}",
|
|
);
|
|
}
|
|
}
|
|
}
|
|
|
|
/// Build a matcher using PCRE2.
|
|
///
|
|
/// If there was a problem building the matcher (such as a regex syntax
|
|
/// error), then an error is returned.
|
|
///
|
|
/// If the `pcre2` feature is not enabled then this always returns an
|
|
/// error.
|
|
fn matcher_pcre2(&self) -> anyhow::Result<PatternMatcher> {
|
|
#[cfg(feature = "pcre2")]
|
|
{
|
|
let mut builder = grep::pcre2::RegexMatcherBuilder::new();
|
|
builder.multi_line(true).fixed_strings(self.fixed_strings);
|
|
match self.case {
|
|
CaseMode::Sensitive => builder.caseless(false),
|
|
CaseMode::Insensitive => builder.caseless(true),
|
|
CaseMode::Smart => builder.case_smart(true),
|
|
};
|
|
if let Some(ref boundary) = self.boundary {
|
|
match *boundary {
|
|
BoundaryMode::Line => builder.whole_line(true),
|
|
BoundaryMode::Word => builder.word(true),
|
|
};
|
|
}
|
|
// For whatever reason, the JIT craps out during regex compilation with
|
|
// a "no more memory" error on 32 bit systems. So don't use it there.
|
|
if cfg!(target_pointer_width = "64") {
|
|
builder
|
|
.jit_if_available(true)
|
|
// The PCRE2 docs say that 32KB is the default, and that 1MB
|
|
// should be big enough for anything. But let's crank it to
|
|
// 10MB.
|
|
.max_jit_stack_size(Some(10 * (1 << 20)));
|
|
}
|
|
if !self.no_unicode {
|
|
builder.utf(true).ucp(true);
|
|
}
|
|
if self.multiline {
|
|
builder.dotall(self.multiline_dotall);
|
|
}
|
|
if self.crlf {
|
|
builder.crlf(true);
|
|
}
|
|
let m = builder.build_many(&self.patterns.patterns)?;
|
|
Ok(PatternMatcher::PCRE2(m))
|
|
}
|
|
#[cfg(not(feature = "pcre2"))]
|
|
{
|
|
Err(anyhow::anyhow!(
|
|
"PCRE2 is not available in this build of ripgrep"
|
|
))
|
|
}
|
|
}
|
|
|
|
/// Build a matcher using Rust's regex engine.
|
|
///
|
|
/// If there was a problem building the matcher (such as a regex syntax
|
|
/// error), then an error is returned.
|
|
fn matcher_rust(&self) -> anyhow::Result<PatternMatcher> {
|
|
let mut builder = grep::regex::RegexMatcherBuilder::new();
|
|
builder
|
|
.multi_line(true)
|
|
.unicode(!self.no_unicode)
|
|
.octal(false)
|
|
.fixed_strings(self.fixed_strings);
|
|
match self.case {
|
|
CaseMode::Sensitive => builder.case_insensitive(false),
|
|
CaseMode::Insensitive => builder.case_insensitive(true),
|
|
CaseMode::Smart => builder.case_smart(true),
|
|
};
|
|
if let Some(ref boundary) = self.boundary {
|
|
match *boundary {
|
|
BoundaryMode::Line => builder.whole_line(true),
|
|
BoundaryMode::Word => builder.word(true),
|
|
};
|
|
}
|
|
if self.multiline {
|
|
builder.dot_matches_new_line(self.multiline_dotall);
|
|
if self.crlf {
|
|
builder.crlf(true).line_terminator(None);
|
|
}
|
|
} else {
|
|
builder.line_terminator(Some(b'\n')).dot_matches_new_line(false);
|
|
if self.crlf {
|
|
builder.crlf(true);
|
|
}
|
|
// We don't need to set this in multiline mode since mulitline
|
|
// matchers don't use optimizations related to line terminators.
|
|
// Moreover, a mulitline regex used with --null-data should
|
|
// be allowed to match NUL bytes explicitly, which this would
|
|
// otherwise forbid.
|
|
if self.null_data {
|
|
builder.line_terminator(Some(b'\x00'));
|
|
}
|
|
}
|
|
if let Some(limit) = self.regex_size_limit {
|
|
builder.size_limit(limit);
|
|
}
|
|
if let Some(limit) = self.dfa_size_limit {
|
|
builder.dfa_size_limit(limit);
|
|
}
|
|
if !self.binary.is_none() {
|
|
builder.ban_byte(Some(b'\x00'));
|
|
}
|
|
let m = match builder.build_many(&self.patterns.patterns) {
|
|
Ok(m) => m,
|
|
Err(err) => {
|
|
anyhow::bail!(suggest_text(suggest_multiline(err.to_string())))
|
|
}
|
|
};
|
|
Ok(PatternMatcher::RustRegex(m))
|
|
}
|
|
|
|
/// Returns true if some non-zero number of matches is believed to be
|
|
/// possible.
|
|
///
|
|
/// When this returns false, it is impossible for ripgrep to ever report
|
|
/// a match.
|
|
pub(crate) fn matches_possible(&self) -> bool {
|
|
if self.patterns.patterns.is_empty() {
|
|
return false;
|
|
}
|
|
if self.max_count == Some(0) {
|
|
return false;
|
|
}
|
|
true
|
|
}
|
|
|
|
/// Returns the "mode" that ripgrep should operate in.
|
|
///
|
|
/// This is generally useful for determining what action ripgrep should
|
|
/// take. The main mode is of course to "search," but there are other
|
|
/// non-search modes such as `--type-list` and `--files`.
|
|
pub(crate) fn mode(&self) -> Mode {
|
|
self.mode
|
|
}
|
|
|
|
/// Returns a builder for constructing a "path printer."
|
|
///
|
|
/// This is useful for the `--files` mode in ripgrep, where the printer
|
|
/// just needs to emit paths and not need to worry about the functionality
|
|
/// of searching.
|
|
pub(crate) fn path_printer_builder(
|
|
&self,
|
|
) -> grep::printer::PathPrinterBuilder {
|
|
let mut builder = grep::printer::PathPrinterBuilder::new();
|
|
builder
|
|
.color_specs(self.colors.clone())
|
|
.hyperlink(self.hyperlink_config.clone())
|
|
.separator(self.path_separator.clone())
|
|
.terminator(self.path_terminator.unwrap_or(b'\n'));
|
|
builder
|
|
}
|
|
|
|
/// Returns a printer for the given search mode.
|
|
///
|
|
/// This chooses which printer to build (JSON, summary or standard) based
|
|
/// on the search mode given.
|
|
pub(crate) fn printer<W: termcolor::WriteColor>(
|
|
&self,
|
|
search_mode: SearchMode,
|
|
wtr: W,
|
|
) -> Printer<W> {
|
|
let summary_kind = if self.quiet {
|
|
SummaryKind::Quiet
|
|
} else {
|
|
match search_mode {
|
|
SearchMode::FilesWithMatches => SummaryKind::PathWithMatch,
|
|
SearchMode::FilesWithoutMatch => SummaryKind::PathWithoutMatch,
|
|
SearchMode::Count => SummaryKind::Count,
|
|
SearchMode::CountMatches => SummaryKind::CountMatches,
|
|
SearchMode::JSON => {
|
|
return Printer::JSON(self.printer_json(wtr))
|
|
}
|
|
SearchMode::Standard => {
|
|
return Printer::Standard(self.printer_standard(wtr))
|
|
}
|
|
}
|
|
};
|
|
Printer::Summary(self.printer_summary(wtr, summary_kind))
|
|
}
|
|
|
|
/// Builds a JSON printer.
|
|
fn printer_json<W: std::io::Write>(
|
|
&self,
|
|
wtr: W,
|
|
) -> grep::printer::JSON<W> {
|
|
grep::printer::JSONBuilder::new()
|
|
.pretty(false)
|
|
.max_matches(self.max_count)
|
|
.always_begin_end(false)
|
|
.build(wtr)
|
|
}
|
|
|
|
/// Builds a "standard" grep printer where matches are printed as plain
|
|
/// text lines.
|
|
fn printer_standard<W: termcolor::WriteColor>(
|
|
&self,
|
|
wtr: W,
|
|
) -> grep::printer::Standard<W> {
|
|
let mut builder = grep::printer::StandardBuilder::new();
|
|
builder
|
|
.byte_offset(self.byte_offset)
|
|
.color_specs(self.colors.clone())
|
|
.column(self.column)
|
|
.heading(self.heading)
|
|
.hyperlink(self.hyperlink_config.clone())
|
|
.max_columns_preview(self.max_columns_preview)
|
|
.max_columns(self.max_columns)
|
|
.max_matches(self.max_count)
|
|
.only_matching(self.only_matching)
|
|
.path(self.with_filename)
|
|
.path_terminator(self.path_terminator.clone())
|
|
.per_match_one_line(true)
|
|
.per_match(self.vimgrep)
|
|
.replacement(self.replace.clone().map(|r| r.into()))
|
|
.separator_context(self.context_separator.clone().into_bytes())
|
|
.separator_field_context(
|
|
self.field_context_separator.clone().into_bytes(),
|
|
)
|
|
.separator_field_match(
|
|
self.field_match_separator.clone().into_bytes(),
|
|
)
|
|
.separator_path(self.path_separator.clone())
|
|
.stats(self.stats.is_some())
|
|
.trim_ascii(self.trim);
|
|
// When doing multi-threaded searching, the buffer writer is
|
|
// responsible for writing separators since it is the only thing that
|
|
// knows whether something has been printed or not. But for the single
|
|
// threaded case, we don't use a buffer writer and thus can let the
|
|
// printer own this.
|
|
if self.threads == 1 {
|
|
builder.separator_search(self.file_separator.clone());
|
|
}
|
|
builder.build(wtr)
|
|
}
|
|
|
|
/// Builds a "summary" printer where search results are aggregated on a
|
|
/// file-by-file basis.
|
|
fn printer_summary<W: termcolor::WriteColor>(
|
|
&self,
|
|
wtr: W,
|
|
kind: SummaryKind,
|
|
) -> grep::printer::Summary<W> {
|
|
grep::printer::SummaryBuilder::new()
|
|
.color_specs(self.colors.clone())
|
|
.exclude_zero(!self.include_zero)
|
|
.hyperlink(self.hyperlink_config.clone())
|
|
.kind(kind)
|
|
.max_matches(self.max_count)
|
|
.path(self.with_filename)
|
|
.path_terminator(self.path_terminator.clone())
|
|
.separator_field(b":".to_vec())
|
|
.separator_path(self.path_separator.clone())
|
|
.stats(self.stats.is_some())
|
|
.build(wtr)
|
|
}
|
|
|
|
/// Returns true if ripgrep should operate in "quiet" mode.
|
|
///
|
|
/// Generally speaking, quiet mode means that ripgrep should not print
|
|
/// anything to stdout. There are some exceptions. For example, when the
|
|
/// user has provided `--stats`, then ripgrep will print statistics to
|
|
/// stdout.
|
|
pub(crate) fn quiet(&self) -> bool {
|
|
self.quiet
|
|
}
|
|
|
|
/// Returns true when ripgrep should stop searching after a single match is
|
|
/// found.
|
|
///
|
|
/// This is useful for example when quiet mode is enabled. In that case,
|
|
/// users generally can't tell the difference in behavior between a search
|
|
/// that finds all matches and a search that only finds one of them. (An
|
|
/// exception here is if `--stats` is given, then `quit_after_match` will
|
|
/// always return false since the user expects ripgrep to find everything.)
|
|
pub(crate) fn quit_after_match(&self) -> bool {
|
|
self.quit_after_match
|
|
}
|
|
|
|
/// Build a worker for executing searches.
|
|
///
|
|
/// Search results are found using the given matcher and written to the
|
|
/// given printer.
|
|
pub(crate) fn search_worker<W: termcolor::WriteColor>(
|
|
&self,
|
|
matcher: PatternMatcher,
|
|
searcher: grep::searcher::Searcher,
|
|
printer: Printer<W>,
|
|
) -> anyhow::Result<SearchWorker<W>> {
|
|
let mut builder = SearchWorkerBuilder::new();
|
|
builder
|
|
.preprocessor(self.pre.clone())?
|
|
.preprocessor_globs(self.pre_globs.clone())
|
|
.search_zip(self.search_zip)
|
|
.binary_detection_explicit(self.binary.explicit.clone())
|
|
.binary_detection_implicit(self.binary.implicit.clone());
|
|
Ok(builder.build(matcher, searcher, printer))
|
|
}
|
|
|
|
/// Build a searcher from the command line parameters.
|
|
pub(crate) fn searcher(&self) -> anyhow::Result<grep::searcher::Searcher> {
|
|
let line_term = if self.crlf {
|
|
grep::matcher::LineTerminator::crlf()
|
|
} else if self.null_data {
|
|
grep::matcher::LineTerminator::byte(b'\x00')
|
|
} else {
|
|
grep::matcher::LineTerminator::byte(b'\n')
|
|
};
|
|
let mut builder = grep::searcher::SearcherBuilder::new();
|
|
builder
|
|
.line_terminator(line_term)
|
|
.invert_match(self.invert_match)
|
|
.line_number(self.line_number)
|
|
.multi_line(self.multiline)
|
|
.memory_map(self.mmap_choice.clone())
|
|
.stop_on_nonmatch(self.stop_on_nonmatch);
|
|
match self.context {
|
|
ContextMode::Passthru => {
|
|
builder.passthru(true);
|
|
}
|
|
ContextMode::Limited(ref limited) => {
|
|
let (before, after) = limited.get();
|
|
builder.before_context(before);
|
|
builder.after_context(after);
|
|
}
|
|
}
|
|
match self.encoding {
|
|
EncodingMode::Auto => {} // default for the searcher
|
|
EncodingMode::Some(ref enc) => {
|
|
builder.encoding(Some(enc.clone()));
|
|
}
|
|
EncodingMode::Disabled => {
|
|
builder.bom_sniffing(false);
|
|
}
|
|
}
|
|
Ok(builder.build())
|
|
}
|
|
|
|
/// Given an iterator of haystacks, sort them if necessary.
|
|
///
|
|
/// When sorting is necessary, this will collect the entire iterator into
|
|
/// memory, sort them and then return a new iterator. When sorting is not
|
|
/// necessary, then the iterator given is returned as is without collecting
|
|
/// it into memory.
|
|
///
|
|
/// Once special case is when sorting by path in ascending order has been
|
|
/// requested. In this case, the iterator given is returned as is without
|
|
/// any additional sorting. This is done because `walk_builder()` will sort
|
|
/// the iterator it yields during directory traversal, so no additional
|
|
/// sorting is needed.
|
|
pub(crate) fn sort<'a, I>(
|
|
&self,
|
|
haystacks: I,
|
|
) -> Box<dyn Iterator<Item = Haystack> + 'a>
|
|
where
|
|
I: Iterator<Item = Haystack> + 'a,
|
|
{
|
|
use std::{cmp::Ordering, fs::Metadata, io, time::SystemTime};
|
|
|
|
fn attach_timestamps(
|
|
haystacks: impl Iterator<Item = Haystack>,
|
|
get: impl Fn(&Metadata) -> io::Result<SystemTime>,
|
|
) -> impl Iterator<Item = (Haystack, Option<SystemTime>)> {
|
|
haystacks.map(move |s| {
|
|
let time = s.path().metadata().and_then(|m| get(&m)).ok();
|
|
(s, time)
|
|
})
|
|
}
|
|
|
|
let Some(ref sort) = self.sort else { return Box::new(haystacks) };
|
|
let mut with_timestamps: Vec<_> = match sort.kind {
|
|
SortModeKind::Path if !sort.reverse => return Box::new(haystacks),
|
|
SortModeKind::Path => todo!(),
|
|
SortModeKind::LastModified => {
|
|
attach_timestamps(haystacks, |md| md.modified()).collect()
|
|
}
|
|
SortModeKind::LastAccessed => {
|
|
attach_timestamps(haystacks, |md| md.accessed()).collect()
|
|
}
|
|
SortModeKind::Created => {
|
|
attach_timestamps(haystacks, |md| md.created()).collect()
|
|
}
|
|
};
|
|
with_timestamps.sort_by(|(_, ref t1), (_, ref t2)| {
|
|
let ordering = match (*t1, *t2) {
|
|
// Both have metadata, do the obvious thing.
|
|
(Some(t1), Some(t2)) => t1.cmp(&t2),
|
|
// Things that error should appear later (when ascending).
|
|
(Some(_), None) => Ordering::Less,
|
|
// Things that error should appear later (when ascending).
|
|
(None, Some(_)) => Ordering::Greater,
|
|
// When both error, we can't distinguish, so treat as equal.
|
|
(None, None) => Ordering::Equal,
|
|
};
|
|
if sort.reverse {
|
|
ordering.reverse()
|
|
} else {
|
|
ordering
|
|
}
|
|
});
|
|
Box::new(with_timestamps.into_iter().map(|(s, _)| s))
|
|
}
|
|
|
|
/// Returns a stats object if the user requested that ripgrep keep track
|
|
/// of various metrics during a search.
|
|
///
|
|
/// When this returns `None`, then callers may assume that the user did
|
|
/// not request statistics.
|
|
pub(crate) fn stats(&self) -> Option<grep::printer::Stats> {
|
|
self.stats.clone()
|
|
}
|
|
|
|
/// Returns a color-enabled writer for stdout.
|
|
///
|
|
/// The writer returned is also configured to do either line or block
|
|
/// buffering, based on either explicit configuration from the user via CLI
|
|
/// flags, or automatically based on whether stdout is connected to a tty.
|
|
pub(crate) fn stdout(&self) -> grep::cli::StandardStream {
|
|
let color = self.color.to_termcolor();
|
|
match self.buffer {
|
|
BufferMode::Auto => {
|
|
if self.is_terminal_stdout {
|
|
grep::cli::stdout_buffered_line(color)
|
|
} else {
|
|
grep::cli::stdout_buffered_block(color)
|
|
}
|
|
}
|
|
BufferMode::Line => grep::cli::stdout_buffered_line(color),
|
|
BufferMode::Block => grep::cli::stdout_buffered_block(color),
|
|
}
|
|
}
|
|
|
|
/// Returns the total number of threads ripgrep should use to execute a
|
|
/// search.
|
|
///
|
|
/// This number is the result of reasoning about both heuristics (like
|
|
/// the available number of cores) and whether ripgrep's mode supports
|
|
/// parallelism. It is intended that this number be used to directly
|
|
/// determine how many threads to spawn.
|
|
pub(crate) fn threads(&self) -> usize {
|
|
self.threads
|
|
}
|
|
|
|
/// Returns the file type matcher that was built.
|
|
///
|
|
/// The matcher includes both the default rules and any rules added by the
|
|
/// user for this specific invocation.
|
|
pub(crate) fn types(&self) -> &ignore::types::Types {
|
|
&self.types
|
|
}
|
|
|
|
/// Create a new builder for recursive directory traversal.
|
|
///
|
|
/// The builder returned can be used to start a single threaded or multi
|
|
/// threaded directory traversal. For multi threaded traversal, the number
|
|
/// of threads configured is equivalent to `HiArgs::threads`.
|
|
///
|
|
/// If `HiArgs::threads` is equal to `1`, then callers should generally
|
|
/// choose to explicitly use single threaded traversal since it won't have
|
|
/// the unnecessary overhead of synchronization.
|
|
pub(crate) fn walk_builder(&self) -> anyhow::Result<ignore::WalkBuilder> {
|
|
let mut builder = ignore::WalkBuilder::new(&self.paths.paths[0]);
|
|
for path in self.paths.paths.iter().skip(1) {
|
|
builder.add(path);
|
|
}
|
|
if !self.no_ignore_files {
|
|
for path in self.ignore_file.iter() {
|
|
if let Some(err) = builder.add_ignore(path) {
|
|
ignore_message!("{err}");
|
|
}
|
|
}
|
|
}
|
|
builder
|
|
.max_depth(self.max_depth)
|
|
.follow_links(self.follow)
|
|
.max_filesize(self.max_filesize)
|
|
.threads(self.threads)
|
|
.same_file_system(self.one_file_system)
|
|
.skip_stdout(matches!(self.mode, Mode::Search(_)))
|
|
.overrides(self.globs.clone())
|
|
.types(self.types.clone())
|
|
.hidden(!self.hidden)
|
|
.parents(!self.no_ignore_parent)
|
|
.ignore(!self.no_ignore_dot)
|
|
.git_global(!self.no_ignore_vcs && !self.no_ignore_global)
|
|
.git_ignore(!self.no_ignore_vcs)
|
|
.git_exclude(!self.no_ignore_vcs && !self.no_ignore_exclude)
|
|
.require_git(!self.no_require_git)
|
|
.ignore_case_insensitive(self.ignore_file_case_insensitive);
|
|
if !self.no_ignore_dot {
|
|
builder.add_custom_ignore_filename(".rgignore");
|
|
}
|
|
// When we want to sort paths lexicographically in ascending order,
|
|
// then we can actually do this during directory traversal itself.
|
|
// Otherwise, sorting is done by collecting all paths, sorting them and
|
|
// then searching them.
|
|
if let Some(ref sort) = self.sort {
|
|
assert_eq!(1, self.threads, "sorting implies single threaded");
|
|
if !sort.reverse && matches!(sort.kind, SortModeKind::Path) {
|
|
builder.sort_by_file_name(|a, b| a.cmp(b));
|
|
}
|
|
}
|
|
Ok(builder)
|
|
}
|
|
}
|
|
|
|
/// State that only needs to be computed once during argument parsing.
|
|
///
|
|
/// This state is meant to be somewhat generic and shared across multiple
|
|
/// low->high argument conversions. The state can even be mutated by various
|
|
/// conversions as a way to communicate changes to other conversions. For
|
|
/// example, reading patterns might consume from stdin. If we know stdin
|
|
/// has been consumed and no other file paths have been given, then we know
|
|
/// for sure that we should search the CWD. In this way, a state change
|
|
/// when reading the patterns can impact how the file paths are ultimately
|
|
/// generated.
|
|
#[derive(Debug)]
|
|
struct State {
|
|
/// Whether it's believed that tty is connected to stdout. Note that on
|
|
/// unix systems, this is always correct. On Windows, heuristics are used
|
|
/// by Rust's standard library, particularly for cygwin/MSYS environments.
|
|
is_terminal_stdout: bool,
|
|
/// Whether stdin has already been consumed. This is useful to know and for
|
|
/// providing good error messages when the user has tried to read from stdin
|
|
/// in two different places. For example, `rg -f - -`.
|
|
stdin_consumed: bool,
|
|
/// The current working directory.
|
|
cwd: PathBuf,
|
|
}
|
|
|
|
impl State {
|
|
/// Initialize state to some sensible defaults.
|
|
///
|
|
/// Note that the state values may change throughout the lifetime of
|
|
/// argument parsing.
|
|
fn new() -> anyhow::Result<State> {
|
|
use std::io::IsTerminal;
|
|
|
|
Ok(State {
|
|
is_terminal_stdout: std::io::stdout().is_terminal(),
|
|
stdin_consumed: false,
|
|
cwd: current_dir()?,
|
|
})
|
|
}
|
|
}
|
|
|
|
/// The disjunction of patterns to search for.
|
|
///
|
|
/// The number of patterns can be empty, e.g., via `-f /dev/null`.
|
|
#[derive(Debug)]
|
|
struct Patterns {
|
|
/// The actual patterns to match.
|
|
patterns: Vec<String>,
|
|
}
|
|
|
|
impl Patterns {
|
|
/// Pulls the patterns out of the low arguments.
|
|
///
|
|
/// This includes collecting patterns from -e/--regexp and -f/--file.
|
|
///
|
|
/// If the invocation implies that the first positional argument is a
|
|
/// pattern (the common case), then the first positional argument is
|
|
/// extracted as well.
|
|
fn from_low_args(
|
|
state: &mut State,
|
|
low: &mut LowArgs,
|
|
) -> anyhow::Result<Patterns> {
|
|
// The first positional is only a pattern when ripgrep is instructed to
|
|
// search and neither -e/--regexp nor -f/--file is given. Basically,
|
|
// the first positional is a pattern only when a pattern hasn't been
|
|
// given in some other way.
|
|
|
|
// No search means no patterns. Even if -e/--regexp or -f/--file is
|
|
// given, we know we won't use them so don't bother collecting them.
|
|
if !matches!(low.mode, Mode::Search(_)) {
|
|
return Ok(Patterns { patterns: vec![] });
|
|
}
|
|
// If we got nothing from -e/--regexp and -f/--file, then the first
|
|
// positional is a pattern.
|
|
if low.patterns.is_empty() {
|
|
anyhow::ensure!(
|
|
!low.positional.is_empty(),
|
|
"ripgrep requires at least one pattern to execute a search"
|
|
);
|
|
let ospat = low.positional.remove(0);
|
|
let Ok(pat) = ospat.into_string() else {
|
|
anyhow::bail!("pattern given is not valid UTF-8")
|
|
};
|
|
return Ok(Patterns { patterns: vec![pat] });
|
|
}
|
|
// Otherwise, we need to slurp up our patterns from -e/--regexp and
|
|
// -f/--file. We de-duplicate as we go. If we don't de-duplicate,
|
|
// then it can actually lead to major slow downs for sloppy inputs.
|
|
// This might be surprising, and the regex engine will eventually
|
|
// de-duplicate duplicative branches in a single regex (maybe), but
|
|
// not until after it has gone through parsing and some other layers.
|
|
// If there are a lot of duplicates, then that can lead to a sizeable
|
|
// extra cost. It is lamentable that we pay the extra cost here to
|
|
// de-duplicate for a likely uncommon case, but I've seen this have a
|
|
// big impact on real world data.
|
|
let mut seen = HashSet::new();
|
|
let mut patterns = Vec::with_capacity(low.patterns.len());
|
|
let mut add = |pat: String| {
|
|
if !seen.contains(&pat) {
|
|
seen.insert(pat.clone());
|
|
patterns.push(pat);
|
|
}
|
|
};
|
|
for source in low.patterns.drain(..) {
|
|
match source {
|
|
PatternSource::Regexp(pat) => add(pat),
|
|
PatternSource::File(path) => {
|
|
if path == Path::new("-") {
|
|
anyhow::ensure!(
|
|
!state.stdin_consumed,
|
|
"error reading -f/--file from stdin: stdin \
|
|
has already been consumed"
|
|
);
|
|
for pat in grep::cli::patterns_from_stdin()? {
|
|
add(pat);
|
|
}
|
|
state.stdin_consumed = true;
|
|
} else {
|
|
for pat in grep::cli::patterns_from_path(&path)? {
|
|
add(pat);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
Ok(Patterns { patterns })
|
|
}
|
|
}
|
|
|
|
/// The collection of paths we want to search for.
|
|
///
|
|
/// This guarantees that there is always at least one path.
|
|
#[derive(Debug)]
|
|
struct Paths {
|
|
/// The actual paths.
|
|
paths: Vec<PathBuf>,
|
|
/// This is true when ripgrep had to guess to search the current working
|
|
/// directory. e.g., When the user just runs `rg foo`. It is odd to need
|
|
/// this, but it subtly changes how the paths are printed. When no explicit
|
|
/// path is given, then ripgrep doesn't prefix each path with `./`. But
|
|
/// otherwise it does! This curious behavior matches what GNU grep does.
|
|
has_implicit_path: bool,
|
|
/// Set to true if it is known that only a single file descriptor will
|
|
/// be searched.
|
|
is_one_file: bool,
|
|
}
|
|
|
|
impl Paths {
|
|
/// Drain the search paths out of the given low arguments.
|
|
fn from_low_args(
|
|
state: &mut State,
|
|
_: &Patterns,
|
|
low: &mut LowArgs,
|
|
) -> anyhow::Result<Paths> {
|
|
// We require a `&Patterns` even though we don't use it to ensure that
|
|
// patterns have already been read from LowArgs. This let's us safely
|
|
// assume that all remaining positional arguments are intended to be
|
|
// file paths.
|
|
|
|
let mut paths = Vec::with_capacity(low.positional.len());
|
|
for osarg in low.positional.drain(..) {
|
|
let path = PathBuf::from(osarg);
|
|
if state.stdin_consumed && path == Path::new("-") {
|
|
anyhow::bail!(
|
|
"error: attempted to read patterns from stdin \
|
|
while also searching stdin",
|
|
);
|
|
}
|
|
paths.push(path);
|
|
}
|
|
if !paths.is_empty() {
|
|
let is_one_file = paths.len() == 1
|
|
&& (paths[0] == Path::new("-") || paths[0].is_file());
|
|
return Ok(Paths { paths, has_implicit_path: false, is_one_file });
|
|
}
|
|
// N.B. is_readable_stdin is a heuristic! Part of the issue is that a
|
|
// lot of "exec process" APIs will open a stdin pipe even though stdin
|
|
// isn't really being used. ripgrep then thinks it should search stdin
|
|
// and one gets the appearance of it hanging. It's a terrible failure
|
|
// mode, but there really is no good way to mitigate it. It's just a
|
|
// consequence of letting the user type 'rg foo' and "guessing" that
|
|
// they meant to search the CWD.
|
|
let is_readable_stdin = grep::cli::is_readable_stdin();
|
|
let use_cwd = !is_readable_stdin
|
|
|| state.stdin_consumed
|
|
|| !matches!(low.mode, Mode::Search(_));
|
|
log::debug!(
|
|
"using heuristics to determine whether to read from \
|
|
stdin or search ./ (\
|
|
is_readable_stdin={is_readable_stdin}, \
|
|
stdin_consumed={stdin_consumed}, \
|
|
mode={mode:?})",
|
|
stdin_consumed = state.stdin_consumed,
|
|
mode = low.mode,
|
|
);
|
|
let (path, is_one_file) = if use_cwd {
|
|
log::debug!("heuristic chose to search ./");
|
|
(PathBuf::from("./"), false)
|
|
} else {
|
|
log::debug!("heuristic chose to search stdin");
|
|
(PathBuf::from("-"), true)
|
|
};
|
|
Ok(Paths { paths: vec![path], has_implicit_path: true, is_one_file })
|
|
}
|
|
|
|
/// Returns true if ripgrep will only search stdin and nothing else.
|
|
fn is_only_stdin(&self) -> bool {
|
|
self.paths.len() == 1 && self.paths[0] == Path::new("-")
|
|
}
|
|
}
|
|
|
|
/// The "binary detection" configuration that ripgrep should use.
|
|
///
|
|
/// ripgrep actually uses two different binary detection heuristics depending
|
|
/// on whether a file is explicitly being searched (e.g., via a CLI argument)
|
|
/// or implicitly searched (e.g., via directory traversal). In general, the
|
|
/// former can never use a heuristic that lets it "quit" seaching before
|
|
/// either getting EOF or finding a match. (Because doing otherwise would be
|
|
/// considered a filter, and ripgrep follows the rule that an explicitly given
|
|
/// file is always searched.)
|
|
#[derive(Debug)]
|
|
struct BinaryDetection {
|
|
explicit: grep::searcher::BinaryDetection,
|
|
implicit: grep::searcher::BinaryDetection,
|
|
}
|
|
|
|
impl BinaryDetection {
|
|
/// Determines the correct binary detection mode from low-level arguments.
|
|
fn from_low_args(_: &State, low: &LowArgs) -> BinaryDetection {
|
|
let none = matches!(low.binary, BinaryMode::AsText) || low.null_data;
|
|
let convert = matches!(low.binary, BinaryMode::SearchAndSuppress);
|
|
let explicit = if none {
|
|
grep::searcher::BinaryDetection::none()
|
|
} else {
|
|
grep::searcher::BinaryDetection::convert(b'\x00')
|
|
};
|
|
let implicit = if none {
|
|
grep::searcher::BinaryDetection::none()
|
|
} else if convert {
|
|
grep::searcher::BinaryDetection::convert(b'\x00')
|
|
} else {
|
|
grep::searcher::BinaryDetection::quit(b'\x00')
|
|
};
|
|
BinaryDetection { explicit, implicit }
|
|
}
|
|
|
|
/// Returns true when both implicit and explicit binary detection is
|
|
/// disabled.
|
|
pub(crate) fn is_none(&self) -> bool {
|
|
let none = grep::searcher::BinaryDetection::none();
|
|
self.explicit == none && self.implicit == none
|
|
}
|
|
}
|
|
|
|
/// Builds the file type matcher from low level arguments.
|
|
fn types(low: &LowArgs) -> anyhow::Result<ignore::types::Types> {
|
|
let mut builder = ignore::types::TypesBuilder::new();
|
|
builder.add_defaults();
|
|
for tychange in low.type_changes.iter() {
|
|
match tychange {
|
|
TypeChange::Clear { ref name } => {
|
|
builder.clear(name);
|
|
}
|
|
TypeChange::Add { ref def } => {
|
|
builder.add_def(def)?;
|
|
}
|
|
TypeChange::Select { ref name } => {
|
|
builder.select(name);
|
|
}
|
|
TypeChange::Negate { ref name } => {
|
|
builder.negate(name);
|
|
}
|
|
}
|
|
}
|
|
Ok(builder.build()?)
|
|
}
|
|
|
|
/// Builds the glob "override" matcher from the CLI `-g/--glob` and `--iglob`
|
|
/// flags.
|
|
fn globs(
|
|
state: &State,
|
|
low: &LowArgs,
|
|
) -> anyhow::Result<ignore::overrides::Override> {
|
|
if low.globs.is_empty() && low.iglobs.is_empty() {
|
|
return Ok(ignore::overrides::Override::empty());
|
|
}
|
|
let mut builder = ignore::overrides::OverrideBuilder::new(&state.cwd);
|
|
// Make all globs case insensitive with --glob-case-insensitive.
|
|
if low.glob_case_insensitive {
|
|
builder.case_insensitive(true).unwrap();
|
|
}
|
|
for glob in low.globs.iter() {
|
|
builder.add(glob)?;
|
|
}
|
|
// This only enables case insensitivity for subsequent globs.
|
|
builder.case_insensitive(true).unwrap();
|
|
for glob in low.iglobs.iter() {
|
|
builder.add(&glob)?;
|
|
}
|
|
Ok(builder.build()?)
|
|
}
|
|
|
|
/// Builds a glob matcher for all of the preprocessor globs (via `--pre-glob`).
|
|
fn preprocessor_globs(
|
|
state: &State,
|
|
low: &LowArgs,
|
|
) -> anyhow::Result<ignore::overrides::Override> {
|
|
if low.pre_glob.is_empty() {
|
|
return Ok(ignore::overrides::Override::empty());
|
|
}
|
|
let mut builder = ignore::overrides::OverrideBuilder::new(&state.cwd);
|
|
for glob in low.pre_glob.iter() {
|
|
builder.add(glob)?;
|
|
}
|
|
Ok(builder.build()?)
|
|
}
|
|
|
|
/// Determines whether stats should be tracked for this search. If so, a stats
|
|
/// object is returned.
|
|
fn stats(low: &LowArgs) -> Option<grep::printer::Stats> {
|
|
if !matches!(low.mode, Mode::Search(_)) {
|
|
return None;
|
|
}
|
|
if low.stats || matches!(low.mode, Mode::Search(SearchMode::JSON)) {
|
|
return Some(grep::printer::Stats::new());
|
|
}
|
|
None
|
|
}
|
|
|
|
/// Pulls out any color specs provided by the user and assembles them into one
|
|
/// single configuration.
|
|
fn take_color_specs(_: &mut State, low: &mut LowArgs) -> ColorSpecs {
|
|
let mut specs = grep::printer::default_color_specs();
|
|
for spec in low.colors.drain(..) {
|
|
specs.push(spec);
|
|
}
|
|
ColorSpecs::new(&specs)
|
|
}
|
|
|
|
/// Pulls out the necessary info from the low arguments to build a full
|
|
/// hyperlink configuration.
|
|
fn take_hyperlink_config(
|
|
_: &mut State,
|
|
low: &mut LowArgs,
|
|
) -> anyhow::Result<grep::printer::HyperlinkConfig> {
|
|
let mut env = grep::printer::HyperlinkEnvironment::new();
|
|
if let Some(hostname) = hostname(low.hostname_bin.as_deref()) {
|
|
log::debug!("found hostname for hyperlink configuration: {hostname}");
|
|
env.host(Some(hostname));
|
|
}
|
|
if let Some(wsl_prefix) = wsl_prefix() {
|
|
log::debug!(
|
|
"found wsl_prefix for hyperlink configuration: {wsl_prefix}"
|
|
);
|
|
env.wsl_prefix(Some(wsl_prefix));
|
|
}
|
|
let fmt = std::mem::take(&mut low.hyperlink_format);
|
|
log::debug!("hyperlink format: {:?}", fmt.to_string());
|
|
Ok(grep::printer::HyperlinkConfig::new(env, fmt))
|
|
}
|
|
|
|
/// Attempts to discover the current working directory.
|
|
///
|
|
/// This mostly just defers to the standard library, however, such things will
|
|
/// fail if ripgrep is in a directory that no longer exists. We attempt some
|
|
/// fallback mechanisms, such as querying the PWD environment variable, but
|
|
/// otherwise return an error.
|
|
fn current_dir() -> anyhow::Result<PathBuf> {
|
|
let err = match std::env::current_dir() {
|
|
Err(err) => err,
|
|
Ok(cwd) => return Ok(cwd),
|
|
};
|
|
if let Some(cwd) = std::env::var_os("PWD") {
|
|
if !cwd.is_empty() {
|
|
return Ok(PathBuf::from(cwd));
|
|
}
|
|
}
|
|
anyhow::bail!(
|
|
"failed to get current working directory: {err}\n\
|
|
did your CWD get deleted?",
|
|
)
|
|
}
|
|
|
|
/// Retrieves the hostname that should be used wherever a hostname is required.
|
|
///
|
|
/// Currently, this is only used in the hyperlink format.
|
|
///
|
|
/// This works by first running the given binary program (if present and with
|
|
/// no arguments) to get the hostname after trimming leading and trailing
|
|
/// whitespace. If that fails for any reason, then it falls back to getting
|
|
/// the hostname via platform specific means (e.g., `gethostname` on Unix).
|
|
///
|
|
/// The purpose of `bin` is to make it possible for end users to override how
|
|
/// ripgrep determines the hostname.
|
|
fn hostname(bin: Option<&Path>) -> Option<String> {
|
|
let Some(bin) = bin else { return platform_hostname() };
|
|
let bin = match grep::cli::resolve_binary(bin) {
|
|
Ok(bin) => bin,
|
|
Err(err) => {
|
|
log::debug!(
|
|
"failed to run command '{bin:?}' to get hostname \
|
|
(falling back to platform hostname): {err}",
|
|
);
|
|
return platform_hostname();
|
|
}
|
|
};
|
|
let mut cmd = std::process::Command::new(&bin);
|
|
cmd.stdin(std::process::Stdio::null());
|
|
let rdr = match grep::cli::CommandReader::new(&mut cmd) {
|
|
Ok(rdr) => rdr,
|
|
Err(err) => {
|
|
log::debug!(
|
|
"failed to spawn command '{bin:?}' to get \
|
|
hostname (falling back to platform hostname): {err}",
|
|
);
|
|
return platform_hostname();
|
|
}
|
|
};
|
|
let out = match std::io::read_to_string(rdr) {
|
|
Ok(out) => out,
|
|
Err(err) => {
|
|
log::debug!(
|
|
"failed to read output from command '{bin:?}' to get \
|
|
hostname (falling back to platform hostname): {err}",
|
|
);
|
|
return platform_hostname();
|
|
}
|
|
};
|
|
let hostname = out.trim();
|
|
if hostname.is_empty() {
|
|
log::debug!(
|
|
"output from command '{bin:?}' is empty after trimming \
|
|
leading and trailing whitespace (falling back to \
|
|
platform hostname)",
|
|
);
|
|
return platform_hostname();
|
|
}
|
|
Some(hostname.to_string())
|
|
}
|
|
|
|
/// Attempts to get the hostname by using platform specific routines.
|
|
///
|
|
/// For example, this will do `gethostname` on Unix and `GetComputerNameExW` on
|
|
/// Windows.
|
|
fn platform_hostname() -> Option<String> {
|
|
let hostname_os = match grep::cli::hostname() {
|
|
Ok(x) => x,
|
|
Err(err) => {
|
|
log::debug!("could not get hostname: {}", err);
|
|
return None;
|
|
}
|
|
};
|
|
let Some(hostname) = hostname_os.to_str() else {
|
|
log::debug!(
|
|
"got hostname {:?}, but it's not valid UTF-8",
|
|
hostname_os
|
|
);
|
|
return None;
|
|
};
|
|
Some(hostname.to_string())
|
|
}
|
|
|
|
/// Returns the value for the `{wslprefix}` variable in a hyperlink format.
|
|
///
|
|
/// A WSL prefix is a share/network like thing that is meant to permit Windows
|
|
/// applications to open files stored within a WSL drive.
|
|
///
|
|
/// If a WSL distro name is unavailable, not valid UTF-8 or this isn't running
|
|
/// in a Unix environment, then this returns None.
|
|
///
|
|
/// See: <https://learn.microsoft.com/en-us/windows/wsl/filesystems>
|
|
fn wsl_prefix() -> Option<String> {
|
|
if !cfg!(unix) {
|
|
return None;
|
|
}
|
|
let distro_os = std::env::var_os("WSL_DISTRO_NAME")?;
|
|
let Some(distro) = distro_os.to_str() else {
|
|
log::debug!(
|
|
"found WSL_DISTRO_NAME={:?}, but value is not UTF-8",
|
|
distro_os
|
|
);
|
|
return None;
|
|
};
|
|
Some(format!("wsl$/{distro}"))
|
|
}
|
|
|
|
/// Possibly suggest another regex engine based on the error message given.
|
|
///
|
|
/// This inspects an error resulting from building a Rust regex matcher, and
|
|
/// if it's believed to correspond to a syntax error that another engine could
|
|
/// handle, then add a message to suggest the use of the engine flag.
|
|
fn suggest_other_engine(msg: String) -> String {
|
|
if let Some(pcre_msg) = suggest_pcre2(&msg) {
|
|
return pcre_msg;
|
|
}
|
|
msg
|
|
}
|
|
|
|
/// Possibly suggest PCRE2 based on the error message given.
|
|
///
|
|
/// Inspect an error resulting from building a Rust regex matcher, and if it's
|
|
/// believed to correspond to a syntax error that PCRE2 could handle, then
|
|
/// add a message to suggest the use of -P/--pcre2.
|
|
fn suggest_pcre2(msg: &str) -> Option<String> {
|
|
if !cfg!(feature = "pcre2") {
|
|
return None;
|
|
}
|
|
if !msg.contains("backreferences") && !msg.contains("look-around") {
|
|
None
|
|
} else {
|
|
Some(format!(
|
|
"{msg}
|
|
|
|
Consider enabling PCRE2 with the --pcre2 flag, which can handle backreferences
|
|
and look-around.",
|
|
))
|
|
}
|
|
}
|
|
|
|
/// Possibly suggest multiline mode based on the error message given.
|
|
///
|
|
/// Does a bit of a hacky inspection of the given error message, and if it
|
|
/// looks like the user tried to type a literal line terminator then it will
|
|
/// return a new error message suggesting the use of -U/--multiline.
|
|
fn suggest_multiline(msg: String) -> String {
|
|
if msg.contains("the literal") && msg.contains("not allowed") {
|
|
format!(
|
|
"{msg}
|
|
|
|
Consider enabling multiline mode with the --multiline flag (or -U for short).
|
|
When multiline mode is enabled, new line characters can be matched.",
|
|
)
|
|
} else {
|
|
msg
|
|
}
|
|
}
|
|
|
|
/// Possibly suggest the `-a/--text` flag.
|
|
fn suggest_text(msg: String) -> String {
|
|
if msg.contains("pattern contains \"\\0\"") {
|
|
format!(
|
|
"{msg}
|
|
|
|
Consider enabling text mode with the --text flag (or -a for short). Otherwise,
|
|
binary detection is enabled and matching a NUL byte is impossible.",
|
|
)
|
|
} else {
|
|
msg
|
|
}
|
|
}
|