1
0
mirror of https://github.com/BurntSushi/ripgrep.git synced 2025-04-02 20:45:38 +02:00
ripgrep/tests/json.rs
Andrew Gallant eb184d7711 tests: re-tool integration tests
This basically rewrites every integration test. We reduce the amount of
magic involved here in terms of which arguments are being passed to
ripgrep processes. To make up for the boiler plate saved by the magic,
we make the Dir (formerly WorkDir) type a bit nicer to use, along with a
new TestCommand that wraps a std::process::Command. In exchange, we get
tests that are easier to read and write.

We also run every test with the `--pcre2` flag to make sure that works,
when PCRE2 is available.
2018-08-20 07:10:19 -04:00

264 lines
6.7 KiB
Rust

use std::time;
use serde_json as json;
use hay::{SHERLOCK, SHERLOCK_CRLF};
use util::{Dir, TestCommand};
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[serde(tag = "type", content = "data")]
#[serde(rename_all = "snake_case")]
enum Message {
Begin(Begin),
End(End),
Match(Match),
Context(Context),
Summary(Summary),
}
impl Message {
fn unwrap_begin(&self) -> Begin {
match *self {
Message::Begin(ref x) => x.clone(),
ref x => panic!("expected Message::Begin but got {:?}", x),
}
}
fn unwrap_end(&self) -> End {
match *self {
Message::End(ref x) => x.clone(),
ref x => panic!("expected Message::End but got {:?}", x),
}
}
fn unwrap_match(&self) -> Match {
match *self {
Message::Match(ref x) => x.clone(),
ref x => panic!("expected Message::Match but got {:?}", x),
}
}
fn unwrap_context(&self) -> Context {
match *self {
Message::Context(ref x) => x.clone(),
ref x => panic!("expected Message::Context but got {:?}", x),
}
}
fn unwrap_summary(&self) -> Summary {
match *self {
Message::Summary(ref x) => x.clone(),
ref x => panic!("expected Message::Summary but got {:?}", x),
}
}
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
struct Begin {
path: Option<Data>,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
struct End {
path: Option<Data>,
binary_offset: Option<u64>,
stats: Stats,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
struct Summary {
elapsed_total: Duration,
stats: Stats,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
struct Match {
path: Option<Data>,
lines: Data,
line_number: Option<u64>,
absolute_offset: u64,
submatches: Vec<SubMatch>,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
struct Context {
path: Option<Data>,
lines: Data,
line_number: Option<u64>,
absolute_offset: u64,
submatches: Vec<SubMatch>,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
struct SubMatch {
#[serde(rename = "match")]
m: Data,
start: usize,
end: usize,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
#[serde(untagged)]
enum Data {
Text { text: String },
// This variant is used when the data isn't valid UTF-8. The bytes are
// base64 encoded, so using a String here is OK.
Bytes { bytes: String },
}
impl Data {
fn text(s: &str) -> Data { Data::Text { text: s.to_string() } }
fn bytes(s: &str) -> Data { Data::Bytes { bytes: s.to_string() } }
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
struct Stats {
elapsed: Duration,
searches: u64,
searches_with_match: u64,
bytes_searched: u64,
bytes_printed: u64,
matched_lines: u64,
matches: u64,
}
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
struct Duration {
#[serde(flatten)]
duration: time::Duration,
human: String,
}
/// Decode JSON Lines into a Vec<Message>. If there was an error decoding,
/// this function panics.
fn json_decode(jsonlines: &str) -> Vec<Message> {
json::Deserializer::from_str(jsonlines)
.into_iter()
.collect::<Result<Vec<Message>, _>>()
.unwrap()
}
rgtest!(basic, |dir: Dir, mut cmd: TestCommand| {
dir.create("sherlock", SHERLOCK);
cmd.arg("--json").arg("-B1").arg("Sherlock Holmes").arg("sherlock");
let msgs = json_decode(&cmd.stdout());
assert_eq!(
msgs[0].unwrap_begin(),
Begin { path: Some(Data::text("sherlock")) }
);
assert_eq!(
msgs[1].unwrap_context(),
Context {
path: Some(Data::text("sherlock")),
lines: Data::text("Holmeses, success in the province of detective work must always\n"),
line_number: Some(2),
absolute_offset: 65,
submatches: vec![],
}
);
assert_eq!(
msgs[2].unwrap_match(),
Match {
path: Some(Data::text("sherlock")),
lines: Data::text("be, to a very large extent, the result of luck. Sherlock Holmes\n"),
line_number: Some(3),
absolute_offset: 129,
submatches: vec![
SubMatch {
m: Data::text("Sherlock Holmes"),
start: 48,
end: 63,
},
],
}
);
assert_eq!(
msgs[3].unwrap_end().path,
Some(Data::text("sherlock"))
);
assert_eq!(
msgs[3].unwrap_end().binary_offset,
None
);
assert_eq!(
msgs[4].unwrap_summary().stats.searches_with_match,
1
);
assert_eq!(
msgs[4].unwrap_summary().stats.bytes_printed,
494
);
});
#[cfg(unix)]
rgtest!(notutf8, |dir: Dir, mut cmd: TestCommand| {
use std::ffi::OsStr;
use std::os::unix::ffi::OsStrExt;
// This test does not work with PCRE2 because PCRE2 does not support the
// `u` flag.
if dir.is_pcre2() {
return;
}
// macOS doesn't like this either... sigh.
if cfg!(target_os = "macos") {
return;
}
let name = &b"foo\xFFbar"[..];
let contents = &b"quux\xFFbaz"[..];
// APFS does not support creating files with invalid UTF-8 bytes, so just
// skip the test if we can't create our file.
if !dir.try_create_bytes(OsStr::from_bytes(name), contents).is_ok() {
return;
}
cmd.arg("--json").arg(r"(?-u)\xFF");
let msgs = json_decode(&cmd.stdout());
assert_eq!(
msgs[0].unwrap_begin(),
Begin { path: Some(Data::bytes("Zm9v/2Jhcg==")) }
);
assert_eq!(
msgs[1].unwrap_match(),
Match {
path: Some(Data::bytes("Zm9v/2Jhcg==")),
lines: Data::bytes("cXV1eP9iYXo="),
line_number: Some(1),
absolute_offset: 0,
submatches: vec![
SubMatch {
m: Data::bytes("/w=="),
start: 4,
end: 5,
},
],
}
);
});
// See: https://github.com/BurntSushi/ripgrep/issues/416
//
// This test in particular checks that our match does _not_ include the `\r`
// even though the '$' may be rewritten as '(?:\r??$)' and could thus include
// `\r` in the match.
rgtest!(crlf, |dir: Dir, mut cmd: TestCommand| {
dir.create("sherlock", SHERLOCK_CRLF);
cmd.arg("--json").arg("--crlf").arg(r"Sherlock$").arg("sherlock");
let msgs = json_decode(&cmd.stdout());
assert_eq!(
msgs[1].unwrap_match().submatches[0].clone(),
SubMatch {
m: Data::text("Sherlock"),
start: 56,
end: 64,
},
);
});