mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2024-12-07 11:13:17 +02:00
ripgrep: add --pre-glob flag
The --pre-glob flag is like the --glob flag, except it applies to filtering files through the preprocessor instead of for search. This makes it possible to apply the preprocessor to only a small subset of files, which can greatly reduce the process overhead of using a preprocessor when searching large directories.
This commit is contained in:
parent
b6e30124e0
commit
241bc8f8fc
@ -46,6 +46,8 @@ Feature enhancements:
|
||||
The `--passthru` flag now works with the `--replace` flag.
|
||||
* FEATURE:
|
||||
Add `--line-buffered` and `--block-buffered` for forcing a buffer strategy.
|
||||
* FEATURE:
|
||||
Add `--pre-glob` for filtering files through the `--pre` flag.
|
||||
|
||||
Bug fixes:
|
||||
|
||||
|
@ -183,6 +183,9 @@ _rg() {
|
||||
'(-z --search-zip)--pre=[specify preprocessor utility]:preprocessor utility:_command_names -e'
|
||||
$no'--no-pre[disable preprocessor utility]'
|
||||
|
||||
+ pre-glob # Preprocessing glob options
|
||||
'*--pre-glob[include/exclude files for preprocessing with --pre]'
|
||||
|
||||
+ '(pretty-vimgrep)' # Pretty/vimgrep display options
|
||||
'(heading)'{-p,--pretty}'[alias for --color=always --heading -n]'
|
||||
'(heading passthru)--vimgrep[show results in vim-compatible format]'
|
||||
|
150
src/app.rs
150
src/app.rs
@ -598,6 +598,7 @@ pub fn all_args_and_flags() -> Vec<RGArg> {
|
||||
flag_passthru(&mut args);
|
||||
flag_pcre2(&mut args);
|
||||
flag_pre(&mut args);
|
||||
flag_pre_glob(&mut args);
|
||||
flag_pretty(&mut args);
|
||||
flag_quiet(&mut args);
|
||||
flag_regex_size_limit(&mut args);
|
||||
@ -1819,6 +1820,97 @@ This flag can be disabled with --no-pcre2.
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_pre(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "search outputs of COMMAND FILE for each FILE";
|
||||
const LONG: &str = long!("\
|
||||
For each input FILE, search the standard output of COMMAND FILE rather than the
|
||||
contents of FILE. This option expects the COMMAND program to either be an
|
||||
absolute path or to be available in your PATH. Either an empty string COMMAND
|
||||
or the `--no-pre` flag will disable this behavior.
|
||||
|
||||
WARNING: When this flag is set, ripgrep will unconditionally spawn a
|
||||
process for every file that is searched. Therefore, this can incur an
|
||||
unnecessarily large performance penalty if you don't otherwise need the
|
||||
flexibility offered by this flag.
|
||||
|
||||
A preprocessor is not run when ripgrep is searching stdin.
|
||||
|
||||
When searching over sets of files that may require one of several decoders
|
||||
as preprocessors, COMMAND should be a wrapper program or script which first
|
||||
classifies FILE based on magic numbers/content or based on the FILE name and
|
||||
then dispatches to an appropriate preprocessor. Each COMMAND also has its
|
||||
standard input connected to FILE for convenience.
|
||||
|
||||
For example, a shell script for COMMAND might look like:
|
||||
|
||||
case \"$1\" in
|
||||
*.pdf)
|
||||
exec pdftotext \"$1\" -
|
||||
;;
|
||||
*)
|
||||
case $(file \"$1\") in
|
||||
*Zstandard*)
|
||||
exec pzstd -cdq
|
||||
;;
|
||||
*)
|
||||
exec cat
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
|
||||
The above script uses `pdftotext` to convert a PDF file to plain text. For
|
||||
all other files, the script uses the `file` utility to sniff the type of the
|
||||
file based on its contents. If it is a compressed file in the Zstandard format,
|
||||
then `pzstd` is used to decompress the contents to stdout.
|
||||
|
||||
This overrides the -z/--search-zip flag.
|
||||
");
|
||||
let arg = RGArg::flag("pre", "COMMAND")
|
||||
.help(SHORT).long_help(LONG)
|
||||
.overrides("no-pre")
|
||||
.overrides("search-zip");
|
||||
args.push(arg);
|
||||
|
||||
let arg = RGArg::switch("no-pre")
|
||||
.hidden()
|
||||
.overrides("pre");
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_pre_glob(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str =
|
||||
"Include or exclude files from a preprocessing command.";
|
||||
const LONG: &str = long!("\
|
||||
This flag works in conjunction with the --pre flag. Namely, when one or more
|
||||
--pre-glob flags are given, then only files that match the given set of globs
|
||||
will be handed to the command specified by the --pre flag. Any non-matching
|
||||
files will be searched without using the preprocessor command.
|
||||
|
||||
This flag is useful when searching many files with the --pre flag. Namely,
|
||||
it permits the ability to avoid process overhead for files that don't need
|
||||
preprocessing. For example, given the following shell script, 'pre-pdftotext':
|
||||
|
||||
#!/bin/sh
|
||||
|
||||
pdftotext \"$1\" -
|
||||
|
||||
then it is possible to use '--pre pre-pdftotext --pre-glob \'*.pdf\'' to make
|
||||
it so ripgrep only executes the 'pre-pdftotext' command on files with a '.pdf'
|
||||
extension.
|
||||
|
||||
Multiple --pre-glob flags may be used. Globbing rules match .gitignore globs.
|
||||
Precede a glob with a ! to exclude it.
|
||||
|
||||
This flag has no effect if the --pre flag is not used.
|
||||
");
|
||||
let arg = RGArg::flag("pre-glob", "GLOB")
|
||||
.help(SHORT).long_help(LONG)
|
||||
.multiple()
|
||||
.allow_leading_hyphen();
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_pretty(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Alias for --color always --heading --line-number.";
|
||||
const LONG: &str = long!("\
|
||||
@ -1924,64 +2016,6 @@ This flag can be disabled with --no-search-zip.
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_pre(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "search outputs of COMMAND FILE for each FILE";
|
||||
const LONG: &str = long!("\
|
||||
For each input FILE, search the standard output of COMMAND FILE rather than the
|
||||
contents of FILE. This option expects the COMMAND program to either be an
|
||||
absolute path or to be available in your PATH. Either an empty string COMMAND
|
||||
or the `--no-pre` flag will disable this behavior.
|
||||
|
||||
WARNING: When this flag is set, ripgrep will unconditionally spawn a
|
||||
process for every file that is searched. Therefore, this can incur an
|
||||
unnecessarily large performance penalty if you don't otherwise need the
|
||||
flexibility offered by this flag.
|
||||
|
||||
A preprocessor is not run when ripgrep is searching stdin.
|
||||
|
||||
When searching over sets of files that may require one of several decoders
|
||||
as preprocessors, COMMAND should be a wrapper program or script which first
|
||||
classifies FILE based on magic numbers/content or based on the FILE name and
|
||||
then dispatches to an appropriate preprocessor. Each COMMAND also has its
|
||||
standard input connected to FILE for convenience.
|
||||
|
||||
For example, a shell script for COMMAND might look like:
|
||||
|
||||
case \"$1\" in
|
||||
*.pdf)
|
||||
exec pdftotext \"$1\" -
|
||||
;;
|
||||
*)
|
||||
case $(file \"$1\") in
|
||||
*Zstandard*)
|
||||
exec pzstd -cdq
|
||||
;;
|
||||
*)
|
||||
exec cat
|
||||
;;
|
||||
esac
|
||||
;;
|
||||
esac
|
||||
|
||||
The above script uses `pdftotext` to convert a PDF file to plain text. For
|
||||
all other files, the script uses the `file` utility to sniff the type of the
|
||||
file based on its contents. If it is a compressed file in the Zstandard format,
|
||||
then `pzstd` is used to decompress the contents to stdout.
|
||||
|
||||
This overrides the -z/--search-zip flag.
|
||||
");
|
||||
let arg = RGArg::flag("pre", "COMMAND")
|
||||
.help(SHORT).long_help(LONG)
|
||||
.overrides("no-pre")
|
||||
.overrides("search-zip");
|
||||
args.push(arg);
|
||||
|
||||
let arg = RGArg::switch("no-pre")
|
||||
.hidden()
|
||||
.overrides("pre");
|
||||
args.push(arg);
|
||||
}
|
||||
|
||||
fn flag_smart_case(args: &mut Vec<RGArg>) {
|
||||
const SHORT: &str = "Smart case search.";
|
||||
const LONG: &str = long!("\
|
||||
|
12
src/args.rs
12
src/args.rs
@ -285,6 +285,7 @@ impl Args {
|
||||
builder
|
||||
.json_stats(self.matches().is_present("json"))
|
||||
.preprocessor(self.matches().preprocessor())
|
||||
.preprocessor_globs(self.matches().preprocessor_globs()?)
|
||||
.search_zip(self.matches().is_present("search-zip"));
|
||||
Ok(builder.build(matcher, searcher, printer))
|
||||
}
|
||||
@ -1323,6 +1324,17 @@ impl ArgMatches {
|
||||
Some(Path::new(path).to_path_buf())
|
||||
}
|
||||
|
||||
/// Builds the set of globs for filtering files to apply to the --pre
|
||||
/// flag. If no --pre-globs are available, then this always returns an
|
||||
/// empty set of globs.
|
||||
fn preprocessor_globs(&self) -> Result<Override> {
|
||||
let mut builder = OverrideBuilder::new(env::current_dir()?);
|
||||
for glob in self.values_of_lossy_vec("pre-glob") {
|
||||
builder.add(&glob)?;
|
||||
}
|
||||
Ok(builder.build()?)
|
||||
}
|
||||
|
||||
/// Parse the regex-size-limit argument option into a byte count.
|
||||
fn regex_size_limit(&self) -> Result<Option<usize>> {
|
||||
let r = self.parse_human_readable_size("regex-size-limit")?;
|
||||
|
@ -11,6 +11,7 @@ use grep::pcre2::{RegexMatcher as PCRE2RegexMatcher};
|
||||
use grep::printer::{JSON, Standard, Summary, Stats};
|
||||
use grep::regex::{RegexMatcher as RustRegexMatcher};
|
||||
use grep::searcher::Searcher;
|
||||
use ignore::overrides::Override;
|
||||
use serde_json as json;
|
||||
use termcolor::WriteColor;
|
||||
|
||||
@ -23,6 +24,7 @@ use subject::Subject;
|
||||
struct Config {
|
||||
json_stats: bool,
|
||||
preprocessor: Option<PathBuf>,
|
||||
preprocessor_globs: Override,
|
||||
search_zip: bool,
|
||||
}
|
||||
|
||||
@ -31,6 +33,7 @@ impl Default for Config {
|
||||
Config {
|
||||
json_stats: false,
|
||||
preprocessor: None,
|
||||
preprocessor_globs: Override::empty(),
|
||||
search_zip: false,
|
||||
}
|
||||
}
|
||||
@ -108,6 +111,17 @@ impl SearchWorkerBuilder {
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the globs for determining which files should be run through the
|
||||
/// preprocessor. By default, with no globs and a preprocessor specified,
|
||||
/// every file is run through the preprocessor.
|
||||
pub fn preprocessor_globs(
|
||||
&mut self,
|
||||
globs: Override,
|
||||
) -> &mut SearchWorkerBuilder {
|
||||
self.config.preprocessor_globs = globs;
|
||||
self
|
||||
}
|
||||
|
||||
/// Enable the decompression and searching of common compressed files.
|
||||
///
|
||||
/// When enabled, if a particular file path is recognized as a compressed
|
||||
@ -298,7 +312,7 @@ impl<W: WriteColor> SearchWorker<W> {
|
||||
let stdin = io::stdin();
|
||||
// A `return` here appeases the borrow checker. NLL will fix this.
|
||||
return self.search_reader(path, stdin.lock());
|
||||
} else if self.config.preprocessor.is_some() {
|
||||
} else if self.should_preprocess(path) {
|
||||
self.search_preprocessor(path)
|
||||
} else if self.should_decompress(path) {
|
||||
self.search_decompress(path)
|
||||
@ -316,6 +330,20 @@ impl<W: WriteColor> SearchWorker<W> {
|
||||
self.decomp_builder.get_matcher().has_command(path)
|
||||
}
|
||||
|
||||
/// Returns true if and only if the given file path should be run through
|
||||
/// the preprocessor.
|
||||
fn should_preprocess(&self, path: &Path) -> bool {
|
||||
if !self.config.preprocessor.is_some() {
|
||||
return false;
|
||||
}
|
||||
if self.config.preprocessor_globs.is_empty() {
|
||||
return true;
|
||||
}
|
||||
!self.config.preprocessor_globs.matched(path, false).is_ignore()
|
||||
}
|
||||
|
||||
/// Search the given file path by first asking the preprocessor for the
|
||||
/// data to search instead of opening the path directly.
|
||||
fn search_preprocessor(
|
||||
&mut self,
|
||||
path: &Path,
|
||||
@ -333,6 +361,9 @@ impl<W: WriteColor> SearchWorker<W> {
|
||||
})
|
||||
}
|
||||
|
||||
/// Attempt to decompress the data at the given file path and search the
|
||||
/// result. If the given file path isn't recognized as a compressed file,
|
||||
/// then search it without doing any decompression.
|
||||
fn search_decompress(
|
||||
&mut self,
|
||||
path: &Path,
|
||||
|
@ -816,6 +816,24 @@ be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
eqnice!(expected, cmd.stdout());
|
||||
});
|
||||
|
||||
rgtest!(preprocessing_glob, |dir: Dir, mut cmd: TestCommand| {
|
||||
if !cmd_exists("xzcat") {
|
||||
return;
|
||||
}
|
||||
|
||||
dir.create("sherlock", SHERLOCK);
|
||||
dir.create_bytes("sherlock.xz", include_bytes!("./data/sherlock.xz"));
|
||||
cmd.args(&["--pre", "xzcat", "--pre-glob", "*.xz", "Sherlock"]);
|
||||
|
||||
let expected = "\
|
||||
sherlock.xz:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock.xz:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
sherlock:For the Doctor Watsons of this world, as opposed to the Sherlock
|
||||
sherlock:be, to a very large extent, the result of luck. Sherlock Holmes
|
||||
";
|
||||
eqnice!(sort_lines(expected), sort_lines(&cmd.stdout()));
|
||||
});
|
||||
|
||||
rgtest!(compressed_gzip, |dir: Dir, mut cmd: TestCommand| {
|
||||
if !cmd_exists("gzip") {
|
||||
return;
|
||||
|
Loading…
Reference in New Issue
Block a user