mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-04-24 17:12:16 +02:00
ripgrep: add --pre-glob flag
The --pre-glob flag is like the --glob flag, except it applies to filtering files through the preprocessor instead of for search. This makes it possible to apply the preprocessor to only a small subset of files, which can greatly reduce the process overhead of using a preprocessor when searching large directories.
This commit is contained in:
parent
b6e30124e0
commit
241bc8f8fc
@ -46,6 +46,8 @@ Feature enhancements:
|
|||||||
The `--passthru` flag now works with the `--replace` flag.
|
The `--passthru` flag now works with the `--replace` flag.
|
||||||
* FEATURE:
|
* FEATURE:
|
||||||
Add `--line-buffered` and `--block-buffered` for forcing a buffer strategy.
|
Add `--line-buffered` and `--block-buffered` for forcing a buffer strategy.
|
||||||
|
* FEATURE:
|
||||||
|
Add `--pre-glob` for filtering files through the `--pre` flag.
|
||||||
|
|
||||||
Bug fixes:
|
Bug fixes:
|
||||||
|
|
||||||
|
@ -183,6 +183,9 @@ _rg() {
|
|||||||
'(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e'
|
'(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e'
|
||||||
$no'--no-pre[disable preprocessor utility]'
|
$no'--no-pre[disable preprocessor utility]'
|
||||||
|
|
||||||
|
+ pre-glob # Preprocessing glob options
|
||||||
|
'*--pre-glob[include/exclude files for preprocessing with --pre]'
|
||||||
|
|
||||||
+ '(pretty-vimgrep)' # Pretty/vimgrep display options
|
+ '(pretty-vimgrep)' # Pretty/vimgrep display options
|
||||||
'(heading)'{-p,--pretty}'[alias for --color=always --heading -n]'
|
'(heading)'{-p,--pretty}'[alias for --color=always --heading -n]'
|
||||||
'(heading passthru)--vimgrep[show results in vim-compatible format]'
|
'(heading passthru)--vimgrep[show results in vim-compatible format]'
|
||||||
|
150
src/app.rs
150
src/app.rs
@ -598,6 +598,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
|
|||||||
flag_passthru(&mut args);
|
flag_passthru(&mut args);
|
||||||
flag_pcre2(&mut args);
|
flag_pcre2(&mut args);
|
||||||
flag_pre(&mut args);
|
flag_pre(&mut args);
|
||||||
|
flag_pre_glob(&mut args);
|
||||||
flag_pretty(&mut args);
|
flag_pretty(&mut args);
|
||||||
flag_quiet(&mut args);
|
flag_quiet(&mut args);
|
||||||
flag_regex_size_limit(&mut args);
|
flag_regex_size_limit(&mut args);
|
||||||
@ -1819,6 +1820,97 @@ This flag can be disabled with --no-pcre2.
|
|||||||
args.push(arg);
|
args.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn flag_pre(args: &mut Vec<RGArg>) {
|
||||||
|
const SHORT: &str = "search outputs of COMMAND FILE for each FILE";
|
||||||
|
const LONG: &str = long!("\
|
||||||
|
For each input FILE, search the standard output of COMMAND FILE rather than the
|
||||||
|
contents of FILE. This option expects the COMMAND program to either be an
|
||||||
|
absolute path or to be available in your PATH. Either an empty string COMMAND
|
||||||
|
or the `--no-pre` flag will disable this behavior.
|
||||||
|
|
||||||
|
WARNING: When this flag is set, ripgrep will unconditionally spawn a
|
||||||
|
process for every file that is searched. Therefore, this can incur an
|
||||||
|
unnecessarily large performance penalty if you don't otherwise need the
|
||||||
|
flexibility offered by this flag.
|
||||||
|
|
||||||
|
A preprocessor is not run when ripgrep is searching stdin.
|
||||||
|
|
||||||
|
When searching over sets of files that may require one of several decoders
|
||||||
|
as preprocessors, COMMAND should be a wrapper program or script which first
|
||||||
|
classifies FILE based on magic numbers/content or based on the FILE name and
|
||||||
|
then dispatches to an appropriate preprocessor. Each COMMAND also has its
|
||||||
|
standard input connected to FILE for convenience.
|
||||||
|
|
||||||
|
For example, a shell script for COMMAND might look like:
|
||||||
|
|
||||||
|
case \"$1\" in
|
||||||
|
*.pdf)
|
||||||
|
exec pdftotext \"$1\" -
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
case $(file \"$1\") in
|
||||||
|
*Zstandard*)
|
||||||
|
exec pzstd -cdq
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
exec cat
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
The above script uses `pdftotext` to convert a PDF file to plain text. For
|
||||||
|
all other files, the script uses the `file` utility to sniff the type of the
|
||||||
|
file based on its contents. If it is a compressed file in the Zstandard format,
|
||||||
|
then `pzstd` is used to decompress the contents to stdout.
|
||||||
|
|
||||||
|
This overrides the -z/--search-zip flag.
|
||||||
|
");
|
||||||
|
let arg = RGArg::flag("pre", "COMMAND")
|
||||||
|
.help(SHORT).long_help(LONG)
|
||||||
|
.overrides("no-pre")
|
||||||
|
.overrides("search-zip");
|
||||||
|
args.push(arg);
|
||||||
|
|
||||||
|
let arg = RGArg::switch("no-pre")
|
||||||
|
.hidden()
|
||||||
|
.overrides("pre");
|
||||||
|
args.push(arg);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn flag_pre_glob(args: &mut Vec<RGArg>) {
|
||||||
|
const SHORT: &str =
|
||||||
|
"Include or exclude files from a preprocessing command.";
|
||||||
|
const LONG: &str = long!("\
|
||||||
|
This flag works in conjunction with the --pre flag. Namely, when one or more
|
||||||
|
--pre-glob flags are given, then only files that match the given set of globs
|
||||||
|
will be handed to the command specified by the --pre flag. Any non-matching
|
||||||
|
files will be searched without using the preprocessor command.
|
||||||
|
|
||||||
|
This flag is useful when searching many files with the --pre flag. Namely,
|
||||||
|
it permits the ability to avoid process overhead for files that don't need
|
||||||
|
preprocessing. For example, given the following shell script, 'pre-pdftotext':
|
||||||
|
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
pdftotext \"$1\" -
|
||||||
|
|
||||||
|
then it is possible to use '--pre pre-pdftotext --pre-glob \'*.pdf\'' to make
|
||||||
|
it so ripgrep only executes the 'pre-pdftotext' command on files with a '.pdf'
|
||||||
|
extension.
|
||||||
|
|
||||||
|
Multiple --pre-glob flags may be used. Globbing rules match .gitignore globs.
|
||||||
|
Precede a glob with a ! to exclude it.
|
||||||
|
|
||||||
|
This flag has no effect if the --pre flag is not used.
|
||||||
|
");
|
||||||
|
let arg = RGArg::flag("pre-glob", "GLOB")
|
||||||
|
.help(SHORT).long_help(LONG)
|
||||||
|
.multiple()
|
||||||
|
.allow_leading_hyphen();
|
||||||
|
args.push(arg);
|
||||||
|
}
|
||||||
|
|
||||||
fn flag_pretty(args: &mut Vec<RGArg>) {
|
fn flag_pretty(args: &mut Vec<RGArg>) {
|
||||||
const SHORT: &str = "Alias for --color always --heading --line-number.";
|
const SHORT: &str = "Alias for --color always --heading --line-number.";
|
||||||
const LONG: &str = long!("\
|
const LONG: &str = long!("\
|
||||||
@ -1924,64 +2016,6 @@ This flag can be disabled with --no-search-zip.
|
|||||||
args.push(arg);
|
args.push(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn flag_pre(args: &mut Vec<RGArg>) {
|
|
||||||
const SHORT: &str = "search outputs of COMMAND FILE for each FILE";
|
|
||||||
const LONG: &str = long!("\
|
|
||||||
For each input FILE, search the standard output of COMMAND FILE rather than the
|
|
||||||
contents of FILE. This option expects the COMMAND program to either be an
|
|
||||||
absolute path or to be available in your PATH. Either an empty string COMMAND
|
|
||||||
or the `--no-pre` flag will disable this behavior.
|
|
||||||
|
|
||||||
WARNING: When this flag is set, ripgrep will unconditionally spawn a
|
|
||||||
process for every file that is searched. Therefore, this can incur an
|
|
||||||
unnecessarily large performance penalty if you don't otherwise need the
|
|
||||||
flexibility offered by this flag.
|
|
||||||
|
|
||||||
A preprocessor is not run when ripgrep is searching stdin.
|
|
||||||
|
|
||||||
When searching over sets of files that may require one of several decoders
|
|
||||||
as preprocessors, COMMAND should be a wrapper program or script which first
|
|
||||||
classifies FILE based on magic numbers/content or based on the FILE name and
|
|
||||||
then dispatches to an appropriate preprocessor. Each COMMAND also has its
|
|
||||||
standard input connected to FILE for convenience.
|
|
||||||
|
|
||||||
For example, a shell script for COMMAND might look like:
|
|
||||||
|
|
||||||
case \"$1\" in
|
|
||||||
*.pdf)
|
|
||||||
exec pdftotext \"$1\" -
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
case $(file \"$1\") in
|
|
||||||
*Zstandard*)
|
|
||||||
exec pzstd -cdq
|
|
||||||
;;
|
|
||||||
*)
|
|
||||||
exec cat
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
;;
|
|
||||||
esac
|
|
||||||
|
|
||||||
The above script uses `pdftotext` to convert a PDF file to plain text. For
|
|
||||||
all other files, the script uses the `file` utility to sniff the type of the
|
|
||||||
file based on its contents. If it is a compressed file in the Zstandard format,
|
|
||||||
then `pzstd` is used to decompress the contents to stdout.
|
|
||||||
|
|
||||||
This overrides the -z/--search-zip flag.
|
|
||||||
");
|
|
||||||
let arg = RGArg::flag("pre", "COMMAND")
|
|
||||||
.help(SHORT).long_help(LONG)
|
|
||||||
.overrides("no-pre")
|
|
||||||
.overrides("search-zip");
|
|
||||||
args.push(arg);
|
|
||||||
|
|
||||||
let arg = RGArg::switch("no-pre")
|
|
||||||
.hidden()
|
|
||||||
.overrides("pre");
|
|
||||||
args.push(arg);
|
|
||||||
}
|
|
||||||
|
|
||||||
fn flag_smart_case(args: &mut Vec<RGArg>) {
|
fn flag_smart_case(args: &mut Vec<RGArg>) {
|
||||||
const SHORT: &str = "Smart case search.";
|
const SHORT: &str = "Smart case search.";
|
||||||
const LONG: &str = long!("\
|
const LONG: &str = long!("\
|
||||||
|
12
src/args.rs
12
src/args.rs
@ -285,6 +285,7 @@ impl Args {
|
|||||||
builder
|
builder
|
||||||
.json_stats(self.matches().is_present("json"))
|
.json_stats(self.matches().is_present("json"))
|
||||||
.preprocessor(self.matches().preprocessor())
|
.preprocessor(self.matches().preprocessor())
|
||||||
|
.preprocessor_globs(self.matches().preprocessor_globs()?)
|
||||||
.search_zip(self.matches().is_present("search-zip"));
|
.search_zip(self.matches().is_present("search-zip"));
|
||||||
Ok(builder.build(matcher, searcher, printer))
|
Ok(builder.build(matcher, searcher, printer))
|
||||||
}
|
}
|
||||||
@ -1323,6 +1324,17 @@ impl ArgMatches {
|
|||||||
Some(Path::new(path).to_path_buf())
|
Some(Path::new(path).to_path_buf())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Builds the set of globs for filtering files to apply to the --pre
|
||||||
|
/// flag. If no --pre-globs are available, then this always returns an
|
||||||
|
/// empty set of globs.
|
||||||
|
fn preprocessor_globs(&self) -> Result<Override> {
|
||||||
|
let mut builder = OverrideBuilder::new(env::current_dir()?);
|
||||||
|
for glob in self.values_of_lossy_vec("pre-glob") {
|
||||||
|
builder.add(&glob)?;
|
||||||
|
}
|
||||||
|
Ok(builder.build()?)
|
||||||
|
}
|
||||||
|
|
||||||
/// Parse the regex-size-limit argument option into a byte count.
|
/// Parse the regex-size-limit argument option into a byte count.
|
||||||
fn regex_size_limit(&self) -> Result<Option<usize>> {
|
fn regex_size_limit(&self) -> Result<Option<usize>> {
|
||||||
let r = self.parse_human_readable_size("regex-size-limit")?;
|
let r = self.parse_human_readable_size("regex-size-limit")?;
|
||||||
|
@ -11,6 +11,7 @@ use grep::pcre2::{RegexMatcher as PCRE2RegexMatcher};
|
|||||||
use grep::printer::{JSON, Standard, Summary, Stats};
|
use grep::printer::{JSON, Standard, Summary, Stats};
|
||||||
use grep::regex::{RegexMatcher as RustRegexMatcher};
|
use grep::regex::{RegexMatcher as RustRegexMatcher};
|
||||||
use grep::searcher::Searcher;
|
use grep::searcher::Searcher;
|
||||||
|
use ignore::overrides::Override;
|
||||||
use serde_json as json;
|
use serde_json as json;
|
||||||
use termcolor::WriteColor;
|
use termcolor::WriteColor;
|
||||||
|
|
||||||
@ -23,6 +24,7 @@ use subject::Subject;
|
|||||||
struct Config {
|
struct Config {
|
||||||
json_stats: bool,
|
json_stats: bool,
|
||||||
preprocessor: Option<PathBuf>,
|
preprocessor: Option<PathBuf>,
|
||||||
|
preprocessor_globs: Override,
|
||||||
search_zip: bool,
|
search_zip: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -31,6 +33,7 @@ impl Default for Config {
|
|||||||
Config {
|
Config {
|
||||||
json_stats: false,
|
json_stats: false,
|
||||||
preprocessor: None,
|
preprocessor: None,
|
||||||
|
preprocessor_globs: Override::empty(),
|
||||||
search_zip: false,
|
search_zip: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -108,6 +111,17 @@ impl SearchWorkerBuilder {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Set the globs for determining which files should be run through the
|
||||||
|
/// preprocessor. By default, with no globs and a preprocessor specified,
|
||||||
|
/// every file is run through the preprocessor.
|
||||||
|
pub fn preprocessor_globs(
|
||||||
|
&mut self,
|
||||||
|
globs: Override,
|
||||||
|
) -> &mut SearchWorkerBuilder {
|
||||||
|
self.config.preprocessor_globs = globs;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
/// Enable the decompression and searching of common compressed files.
|
/// Enable the decompression and searching of common compressed files.
|
||||||
///
|
///
|
||||||
/// When enabled, if a particular file path is recognized as a compressed
|
/// When enabled, if a particular file path is recognized as a compressed
|
||||||
@ -298,7 +312,7 @@ impl<W: WriteColor> SearchWorker<W> {
|
|||||||
let stdin = io::stdin();
|
let stdin = io::stdin();
|
||||||
// A `return` here appeases the borrow checker. NLL will fix this.
|
// A `return` here appeases the borrow checker. NLL will fix this.
|
||||||
return self.search_reader(path, stdin.lock());
|
return self.search_reader(path, stdin.lock());
|
||||||
} else if self.config.preprocessor.is_some() {
|
} else if self.should_preprocess(path) {
|
||||||
self.search_preprocessor(path)
|
self.search_preprocessor(path)
|
||||||
} else if self.should_decompress(path) {
|
} else if self.should_decompress(path) {
|
||||||
self.search_decompress(path)
|
self.search_decompress(path)
|
||||||
@ -316,6 +330,20 @@ impl<W: WriteColor> SearchWorker<W> {
|
|||||||
self.decomp_builder.get_matcher().has_command(path)
|
self.decomp_builder.get_matcher().has_command(path)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if and only if the given file path should be run through
|
||||||
|
/// the preprocessor.
|
||||||
|
fn should_preprocess(&self, path: &Path) -> bool {
|
||||||
|
if !self.config.preprocessor.is_some() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if self.config.preprocessor_globs.is_empty() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
!self.config.preprocessor_globs.matched(path, false).is_ignore()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Search the given file path by first asking the preprocessor for the
|
||||||
|
/// data to search instead of opening the path directly.
|
||||||
fn search_preprocessor(
|
fn search_preprocessor(
|
||||||
&mut self,
|
&mut self,
|
||||||
path: &Path,
|
path: &Path,
|
||||||
@ -333,6 +361,9 @@ impl<W: WriteColor> SearchWorker<W> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Attempt to decompress the data at the given file path and search the
|
||||||
|
/// result. If the given file path isn't recognized as a compressed file,
|
||||||
|
/// then search it without doing any decompression.
|
||||||
fn search_decompress(
|
fn search_decompress(
|
||||||
&mut self,
|
&mut self,
|
||||||
path: &Path,
|
path: &Path,
|
||||||
|
@ -816,6 +816,24 @@ be, to a very large extent, the result of luck. Sherlock Holmes
|
|||||||
eqnice!(expected, cmd.stdout());
|
eqnice!(expected, cmd.stdout());
|
||||||
});
|
});
|
||||||
|
|
||||||
|
rgtest!(preprocessing_glob, |dir: Dir, mut cmd: TestCommand| {
|
||||||
|
if !cmd_exists("xzcat") {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
dir.create("sherlock", SHERLOCK);
|
||||||
|
dir.create_bytes("sherlock.xz", include_bytes!("./data/sherlock.xz"));
|
||||||
|
cmd.args(&["--pre", "xzcat", "--pre-glob", "*.xz", "Sherlock"]);
|
||||||
|
|
||||||
|
let expected = "\
|
||||||
|
sherlock.xz:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||||
|
sherlock.xz:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||||
|
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||||
|
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||||
|
";
|
||||||
|
eqnice!(sort_lines(expected), sort_lines(&cmd.stdout()));
|
||||||
|
});
|
||||||
|
|
||||||
rgtest!(compressed_gzip, |dir: Dir, mut cmd: TestCommand| {
|
rgtest!(compressed_gzip, |dir: Dir, mut cmd: TestCommand| {
|
||||||
if !cmd_exists("gzip") {
|
if !cmd_exists("gzip") {
|
||||||
return;
|
return;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user