mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-08-04 21:52:54 +02:00
printer: support -r/--replace
with --json
This adds a `replacement` field to each submatch object in the JSON output. In effect, this extends the `-r/--replace` flag so that it works with `--json`. This adds a new field instead of replacing the match text (which is how the standard printer works) for maximum flexibility. This way, consumers of the JSON output can access the original match text (and always rely on it corresponding to the original match text) while also getting the replacement text without needing to do the replacement themselves. Closes #1872, Closes #2883
This commit is contained in:
committed by
Andrew Gallant
parent
0904f55d3e
commit
cf91d6e67a
@ -22,6 +22,8 @@ Bug fixes:
|
||||
Feature enhancements:
|
||||
|
||||
* Many enhancements to the default set of file types available for filtering.
|
||||
* [FEATURE #1872](https://github.com/BurntSushi/ripgrep/issues/1872):
|
||||
Make `-r/--replace` work with `--json`.
|
||||
* [FEATURE #2708](https://github.com/BurntSushi/ripgrep/pull/2708):
|
||||
Completions for the fish shell take ripgrep's config file into account.
|
||||
* [FEATURE #2841](https://github.com/BurntSushi/ripgrep/pull/2841):
|
||||
|
@ -589,6 +589,7 @@ impl HiArgs {
|
||||
.pretty(false)
|
||||
.max_matches(self.max_count)
|
||||
.always_begin_end(false)
|
||||
.replacement(self.replace.clone().map(|r| r.into()))
|
||||
.build(wtr)
|
||||
}
|
||||
|
||||
|
@ -1,6 +1,7 @@
|
||||
use std::{
|
||||
io::{self, Write},
|
||||
path::Path,
|
||||
sync::Arc,
|
||||
time::Instant,
|
||||
};
|
||||
|
||||
@ -13,7 +14,8 @@ use {
|
||||
};
|
||||
|
||||
use crate::{
|
||||
counter::CounterWriter, jsont, stats::Stats, util::find_iter_at_in_context,
|
||||
counter::CounterWriter, jsont, stats::Stats,
|
||||
util::find_iter_at_in_context, util::Replacer,
|
||||
};
|
||||
|
||||
/// The configuration for the JSON printer.
|
||||
@ -26,11 +28,17 @@ struct Config {
|
||||
pretty: bool,
|
||||
max_matches: Option<u64>,
|
||||
always_begin_end: bool,
|
||||
replacement: Arc<Option<Vec<u8>>>,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
fn default() -> Config {
|
||||
Config { pretty: false, max_matches: None, always_begin_end: false }
|
||||
Config {
|
||||
pretty: false,
|
||||
max_matches: None,
|
||||
always_begin_end: false,
|
||||
replacement: Arc::new(None),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -98,6 +106,24 @@ impl JSONBuilder {
|
||||
self.config.always_begin_end = yes;
|
||||
self
|
||||
}
|
||||
|
||||
/// Set the bytes that will be used to replace each occurrence of a match
|
||||
/// found.
|
||||
///
|
||||
/// The replacement bytes given may include references to capturing groups,
|
||||
/// which may either be in index form (e.g., `$2`) or can reference named
|
||||
/// capturing groups if present in the original pattern (e.g., `$foo`).
|
||||
///
|
||||
/// For documentation on the full format, please see the `Capture` trait's
|
||||
/// `interpolate` method in the
|
||||
/// [grep-printer](https://docs.rs/grep-printer) crate.
|
||||
pub fn replacement(
|
||||
&mut self,
|
||||
replacement: Option<Vec<u8>>,
|
||||
) -> &mut JSONBuilder {
|
||||
self.config.replacement = Arc::new(replacement);
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
/// The JSON printer, which emits results in a JSON lines format.
|
||||
@ -256,7 +282,8 @@ impl JSONBuilder {
|
||||
/// encoded, then the byte offsets correspond to the data after base64
|
||||
/// decoding.) The `submatch` objects are guaranteed to be sorted by their
|
||||
/// starting offsets. Note that it is possible for this array to be empty,
|
||||
/// for example, when searching reports inverted matches.
|
||||
/// for example, when searching reports inverted matches. If the configuration
|
||||
/// specifies a replacement, the resulting replacement text is also present.
|
||||
///
|
||||
/// #### Message: **context**
|
||||
///
|
||||
@ -286,7 +313,9 @@ impl JSONBuilder {
|
||||
/// decoding.) The `submatch` objects are guaranteed to be sorted by
|
||||
/// their starting offsets. Note that it is possible for this array to be
|
||||
/// non-empty, for example, when searching reports inverted matches such that
|
||||
/// the original matcher could match things in the contextual lines.
|
||||
/// the original matcher could match things in the contextual lines. If the
|
||||
/// configuration specifies a replacemement, the resulting replacement text
|
||||
/// is also present.
|
||||
///
|
||||
/// #### Object: **submatch**
|
||||
///
|
||||
@ -308,6 +337,10 @@ impl JSONBuilder {
|
||||
/// the `lines` field in the
|
||||
/// [`match`](#message-match) or [`context`](#message-context)
|
||||
/// messages.
|
||||
/// * **replacement** (optional) - An
|
||||
/// [arbitrary data object](#object-arbitrary-data) corresponding to the
|
||||
/// replacement text for this submatch, if the configuration specifies
|
||||
/// a replacement.
|
||||
///
|
||||
/// #### Object: **stats**
|
||||
///
|
||||
@ -447,6 +480,23 @@ impl JSONBuilder {
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
/// and here's what a match type item would looks like if a replacement text
|
||||
/// of 'Moriarity' was given as a parameter:
|
||||
/// ```json
|
||||
/// {
|
||||
/// "type": "match",
|
||||
/// "data": {
|
||||
/// "path": {"text": "/home/andrew/sherlock"},
|
||||
/// "lines": {"text": "For the Doctor Watsons of this world, as opposed to the Sherlock\n"},
|
||||
/// "line_number": 1,
|
||||
/// "absolute_offset": 0,
|
||||
/// "submatches": [
|
||||
/// {"match": {"text": "Watson"}, "replacement": {"text": "Moriarity"}, "start": 15, "end": 21}
|
||||
/// ]
|
||||
/// }
|
||||
/// }
|
||||
/// ```
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct JSON<W> {
|
||||
config: Config,
|
||||
@ -471,6 +521,7 @@ impl<W: io::Write> JSON<W> {
|
||||
) -> JSONSink<'static, 's, M, W> {
|
||||
JSONSink {
|
||||
matcher,
|
||||
replacer: Replacer::new(),
|
||||
json: self,
|
||||
path: None,
|
||||
start_time: Instant::now(),
|
||||
@ -497,6 +548,7 @@ impl<W: io::Write> JSON<W> {
|
||||
{
|
||||
JSONSink {
|
||||
matcher,
|
||||
replacer: Replacer::new(),
|
||||
json: self,
|
||||
path: Some(path.as_ref()),
|
||||
start_time: Instant::now(),
|
||||
@ -559,6 +611,7 @@ impl<W> JSON<W> {
|
||||
#[derive(Debug)]
|
||||
pub struct JSONSink<'p, 's, M: Matcher, W> {
|
||||
matcher: M,
|
||||
replacer: Replacer<M>,
|
||||
json: &'s mut JSON<W>,
|
||||
path: Option<&'p Path>,
|
||||
start_time: Instant,
|
||||
@ -643,6 +696,31 @@ impl<'p, 's, M: Matcher, W: io::Write> JSONSink<'p, 's, M, W> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// If the configuration specifies a replacement, then this executes the
|
||||
/// replacement, lazily allocating memory if necessary.
|
||||
///
|
||||
/// To access the result of a replacement, use `replacer.replacement()`.
|
||||
fn replace(
|
||||
&mut self,
|
||||
searcher: &Searcher,
|
||||
bytes: &[u8],
|
||||
range: std::ops::Range<usize>,
|
||||
) -> io::Result<()> {
|
||||
self.replacer.clear();
|
||||
if self.json.config.replacement.is_some() {
|
||||
let replacement =
|
||||
(*self.json.config.replacement).as_ref().map(|r| &*r).unwrap();
|
||||
self.replacer.replace_all(
|
||||
searcher,
|
||||
&self.matcher,
|
||||
bytes,
|
||||
range,
|
||||
replacement,
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Returns true if this printer should quit.
|
||||
///
|
||||
/// This implements the logic for handling quitting after seeing a certain
|
||||
@ -711,10 +789,15 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
|
||||
mat.buffer(),
|
||||
mat.bytes_range_in_buffer(),
|
||||
)?;
|
||||
self.replace(searcher, mat.buffer(), mat.bytes_range_in_buffer())?;
|
||||
self.stats.add_matches(self.json.matches.len() as u64);
|
||||
self.stats.add_matched_lines(mat.lines().count() as u64);
|
||||
|
||||
let submatches = SubMatches::new(mat.bytes(), &self.json.matches);
|
||||
let submatches = SubMatches::new(
|
||||
mat.bytes(),
|
||||
&self.json.matches,
|
||||
self.replacer.replacement(),
|
||||
);
|
||||
let msg = jsont::Message::Match(jsont::Match {
|
||||
path: self.path,
|
||||
lines: mat.bytes(),
|
||||
@ -740,7 +823,12 @@ impl<'p, 's, M: Matcher, W: io::Write> Sink for JSONSink<'p, 's, M, W> {
|
||||
}
|
||||
let submatches = if searcher.invert_match() {
|
||||
self.record_matches(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
|
||||
SubMatches::new(ctx.bytes(), &self.json.matches)
|
||||
self.replace(searcher, ctx.bytes(), 0..ctx.bytes().len())?;
|
||||
SubMatches::new(
|
||||
ctx.bytes(),
|
||||
&self.json.matches,
|
||||
self.replacer.replacement(),
|
||||
)
|
||||
} else {
|
||||
SubMatches::empty()
|
||||
};
|
||||
@ -831,19 +919,27 @@ enum SubMatches<'a> {
|
||||
impl<'a> SubMatches<'a> {
|
||||
/// Create a new set of match ranges from a set of matches and the
|
||||
/// corresponding bytes that those matches apply to.
|
||||
fn new(bytes: &'a [u8], matches: &[Match]) -> SubMatches<'a> {
|
||||
fn new(
|
||||
bytes: &'a [u8],
|
||||
matches: &[Match],
|
||||
replacement: Option<(&'a [u8], &'a [Match])>,
|
||||
) -> SubMatches<'a> {
|
||||
if matches.len() == 1 {
|
||||
let mat = matches[0];
|
||||
SubMatches::Small([jsont::SubMatch {
|
||||
m: &bytes[mat],
|
||||
replacement: replacement
|
||||
.map(|(rbuf, rmatches)| &rbuf[rmatches[0]]),
|
||||
start: mat.start(),
|
||||
end: mat.end(),
|
||||
}])
|
||||
} else {
|
||||
let mut match_ranges = vec![];
|
||||
for &mat in matches {
|
||||
for (i, &mat) in matches.iter().enumerate() {
|
||||
match_ranges.push(jsont::SubMatch {
|
||||
m: &bytes[mat],
|
||||
replacement: replacement
|
||||
.map(|(rbuf, rmatches)| &rbuf[rmatches[i]]),
|
||||
start: mat.start(),
|
||||
end: mat.end(),
|
||||
});
|
||||
|
@ -135,6 +135,7 @@ impl<'a> serde::Serialize for Context<'a> {
|
||||
|
||||
pub(crate) struct SubMatch<'a> {
|
||||
pub(crate) m: &'a [u8],
|
||||
pub(crate) replacement: Option<&'a [u8]>,
|
||||
pub(crate) start: usize,
|
||||
pub(crate) end: usize,
|
||||
}
|
||||
@ -148,6 +149,9 @@ impl<'a> serde::Serialize for SubMatch<'a> {
|
||||
|
||||
let mut state = s.serialize_struct("SubMatch", 3)?;
|
||||
state.serialize_field("match", &Data::from_bytes(self.m))?;
|
||||
if let Some(r) = self.replacement {
|
||||
state.serialize_field("replacement", &Data::from_bytes(r))?;
|
||||
}
|
||||
state.serialize_field("start", &self.start)?;
|
||||
state.serialize_field("end", &self.end)?;
|
||||
state.end()
|
||||
|
@ -55,11 +55,13 @@ impl Message {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
struct Begin {
|
||||
path: Option<Data>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
struct End {
|
||||
path: Option<Data>,
|
||||
binary_offset: Option<u64>,
|
||||
@ -67,12 +69,14 @@ struct End {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
struct Summary {
|
||||
elapsed_total: Duration,
|
||||
stats: Stats,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
struct Match {
|
||||
path: Option<Data>,
|
||||
lines: Data,
|
||||
@ -82,6 +86,7 @@ struct Match {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
struct Context {
|
||||
path: Option<Data>,
|
||||
lines: Data,
|
||||
@ -91,9 +96,11 @@ struct Context {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
struct SubMatch {
|
||||
#[serde(rename = "match")]
|
||||
m: Data,
|
||||
replacement: Option<Data>,
|
||||
start: usize,
|
||||
end: usize,
|
||||
}
|
||||
@ -117,6 +124,7 @@ impl Data {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
struct Stats {
|
||||
elapsed: Duration,
|
||||
searches: u64,
|
||||
@ -128,6 +136,7 @@ struct Stats {
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
struct Duration {
|
||||
#[serde(flatten)]
|
||||
duration: time::Duration,
|
||||
@ -178,6 +187,7 @@ rgtest!(basic, |dir: Dir, mut cmd: TestCommand| {
|
||||
absolute_offset: 129,
|
||||
submatches: vec![SubMatch {
|
||||
m: Data::text("Sherlock Holmes"),
|
||||
replacement: None,
|
||||
start: 48,
|
||||
end: 63,
|
||||
},],
|
||||
@ -189,6 +199,57 @@ rgtest!(basic, |dir: Dir, mut cmd: TestCommand| {
|
||||
assert_eq!(msgs[4].unwrap_summary().stats.bytes_printed, 494);
|
||||
});
|
||||
|
||||
rgtest!(replacement, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("sherlock", SHERLOCK);
|
||||
cmd.arg("--json")
|
||||
.arg("-B1")
|
||||
.arg("Sherlock Holmes")
|
||||
.args(["-r", "John Watson"])
|
||||
.arg("sherlock");
|
||||
|
||||
let msgs = json_decode(&cmd.stdout());
|
||||
|
||||
assert_eq!(
|
||||
msgs[0].unwrap_begin(),
|
||||
Begin { path: Some(Data::text("sherlock")) }
|
||||
);
|
||||
assert_eq!(
|
||||
msgs[1].unwrap_context(),
|
||||
Context {
|
||||
path: Some(Data::text("sherlock")),
|
||||
lines: Data::text(
|
||||
"Holmeses, success in the province of \
|
||||
detective work must always\n",
|
||||
),
|
||||
line_number: Some(2),
|
||||
absolute_offset: 65,
|
||||
submatches: vec![],
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
msgs[2].unwrap_match(),
|
||||
Match {
|
||||
path: Some(Data::text("sherlock")),
|
||||
lines: Data::text(
|
||||
"be, to a very large extent, the result of luck. \
|
||||
Sherlock Holmes\n",
|
||||
),
|
||||
line_number: Some(3),
|
||||
absolute_offset: 129,
|
||||
submatches: vec![SubMatch {
|
||||
m: Data::text("Sherlock Holmes"),
|
||||
replacement: Some(Data::text("John Watson")),
|
||||
start: 48,
|
||||
end: 63,
|
||||
},],
|
||||
}
|
||||
);
|
||||
assert_eq!(msgs[3].unwrap_end().path, Some(Data::text("sherlock")));
|
||||
assert_eq!(msgs[3].unwrap_end().binary_offset, None);
|
||||
assert_eq!(msgs[4].unwrap_summary().stats.searches_with_match, 1);
|
||||
assert_eq!(msgs[4].unwrap_summary().stats.bytes_printed, 531);
|
||||
});
|
||||
|
||||
rgtest!(quiet_stats, |dir: Dir, mut cmd: TestCommand| {
|
||||
dir.create("sherlock", SHERLOCK);
|
||||
cmd.arg("--json")
|
||||
@ -244,6 +305,7 @@ rgtest!(notutf8, |dir: Dir, mut cmd: TestCommand| {
|
||||
absolute_offset: 0,
|
||||
submatches: vec![SubMatch {
|
||||
m: Data::bytes("/w=="),
|
||||
replacement: None,
|
||||
start: 4,
|
||||
end: 5,
|
||||
},],
|
||||
@ -285,6 +347,7 @@ rgtest!(notutf8_file, |dir: Dir, mut cmd: TestCommand| {
|
||||
absolute_offset: 0,
|
||||
submatches: vec![SubMatch {
|
||||
m: Data::bytes("/w=="),
|
||||
replacement: None,
|
||||
start: 4,
|
||||
end: 5,
|
||||
},],
|
||||
@ -305,7 +368,12 @@ rgtest!(crlf, |dir: Dir, mut cmd: TestCommand| {
|
||||
|
||||
assert_eq!(
|
||||
msgs[1].unwrap_match().submatches[0].clone(),
|
||||
SubMatch { m: Data::text("Sherlock"), start: 56, end: 64 },
|
||||
SubMatch {
|
||||
m: Data::text("Sherlock"),
|
||||
replacement: None,
|
||||
start: 56,
|
||||
end: 64
|
||||
},
|
||||
);
|
||||
});
|
||||
|
||||
|
Reference in New Issue
Block a user