2018-08-06 20:11:58 -04:00
|
|
|
use std::time;
|
|
|
|
|
2019-01-19 10:15:56 -05:00
|
|
|
use serde_derive::Deserialize;
|
2018-08-06 20:11:58 -04:00
|
|
|
use serde_json as json;
|
|
|
|
|
2019-01-19 10:15:56 -05:00
|
|
|
use crate::hay::{SHERLOCK, SHERLOCK_CRLF};
|
|
|
|
use crate::util::{Dir, TestCommand};
|
2018-08-06 20:11:58 -04:00
|
|
|
|
|
|
|
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
|
|
|
#[serde(tag = "type", content = "data")]
|
|
|
|
#[serde(rename_all = "snake_case")]
|
|
|
|
enum Message {
|
|
|
|
Begin(Begin),
|
|
|
|
End(End),
|
|
|
|
Match(Match),
|
|
|
|
Context(Context),
|
|
|
|
Summary(Summary),
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Message {
|
|
|
|
fn unwrap_begin(&self) -> Begin {
|
|
|
|
match *self {
|
|
|
|
Message::Begin(ref x) => x.clone(),
|
|
|
|
ref x => panic!("expected Message::Begin but got {:?}", x),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn unwrap_end(&self) -> End {
|
|
|
|
match *self {
|
|
|
|
Message::End(ref x) => x.clone(),
|
|
|
|
ref x => panic!("expected Message::End but got {:?}", x),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn unwrap_match(&self) -> Match {
|
|
|
|
match *self {
|
|
|
|
Message::Match(ref x) => x.clone(),
|
|
|
|
ref x => panic!("expected Message::Match but got {:?}", x),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn unwrap_context(&self) -> Context {
|
|
|
|
match *self {
|
|
|
|
Message::Context(ref x) => x.clone(),
|
|
|
|
ref x => panic!("expected Message::Context but got {:?}", x),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn unwrap_summary(&self) -> Summary {
|
|
|
|
match *self {
|
|
|
|
Message::Summary(ref x) => x.clone(),
|
|
|
|
ref x => panic!("expected Message::Summary but got {:?}", x),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
|
|
|
struct Begin {
|
|
|
|
path: Option<Data>,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
|
|
|
struct End {
|
|
|
|
path: Option<Data>,
|
|
|
|
binary_offset: Option<u64>,
|
|
|
|
stats: Stats,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
|
|
|
struct Summary {
|
|
|
|
elapsed_total: Duration,
|
|
|
|
stats: Stats,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
|
|
|
struct Match {
|
|
|
|
path: Option<Data>,
|
|
|
|
lines: Data,
|
|
|
|
line_number: Option<u64>,
|
|
|
|
absolute_offset: u64,
|
|
|
|
submatches: Vec<SubMatch>,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
|
|
|
struct Context {
|
|
|
|
path: Option<Data>,
|
|
|
|
lines: Data,
|
|
|
|
line_number: Option<u64>,
|
|
|
|
absolute_offset: u64,
|
|
|
|
submatches: Vec<SubMatch>,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
|
|
|
struct SubMatch {
|
|
|
|
#[serde(rename = "match")]
|
|
|
|
m: Data,
|
|
|
|
start: usize,
|
|
|
|
end: usize,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
|
|
|
#[serde(untagged)]
|
|
|
|
enum Data {
|
|
|
|
Text { text: String },
|
|
|
|
// This variant is used when the data isn't valid UTF-8. The bytes are
|
|
|
|
// base64 encoded, so using a String here is OK.
|
|
|
|
Bytes { bytes: String },
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Data {
|
|
|
|
fn text(s: &str) -> Data { Data::Text { text: s.to_string() } }
|
|
|
|
fn bytes(s: &str) -> Data { Data::Bytes { bytes: s.to_string() } }
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
|
|
|
struct Stats {
|
|
|
|
elapsed: Duration,
|
|
|
|
searches: u64,
|
|
|
|
searches_with_match: u64,
|
|
|
|
bytes_searched: u64,
|
|
|
|
bytes_printed: u64,
|
|
|
|
matched_lines: u64,
|
|
|
|
matches: u64,
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Clone, Debug, Deserialize, PartialEq, Eq)]
|
|
|
|
struct Duration {
|
|
|
|
#[serde(flatten)]
|
|
|
|
duration: time::Duration,
|
|
|
|
human: String,
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Decode JSON Lines into a Vec<Message>. If there was an error decoding,
|
|
|
|
/// this function panics.
|
|
|
|
fn json_decode(jsonlines: &str) -> Vec<Message> {
|
|
|
|
json::Deserializer::from_str(jsonlines)
|
|
|
|
.into_iter()
|
|
|
|
.collect::<Result<Vec<Message>, _>>()
|
|
|
|
.unwrap()
|
|
|
|
}
|
|
|
|
|
|
|
|
rgtest!(basic, |dir: Dir, mut cmd: TestCommand| {
|
|
|
|
dir.create("sherlock", SHERLOCK);
|
|
|
|
cmd.arg("--json").arg("-B1").arg("Sherlock Holmes").arg("sherlock");
|
|
|
|
|
|
|
|
let msgs = json_decode(&cmd.stdout());
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
msgs[0].unwrap_begin(),
|
|
|
|
Begin { path: Some(Data::text("sherlock")) }
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
msgs[1].unwrap_context(),
|
|
|
|
Context {
|
|
|
|
path: Some(Data::text("sherlock")),
|
2019-01-26 12:25:21 -05:00
|
|
|
lines: Data::text(
|
|
|
|
"Holmeses, success in the province of \
|
|
|
|
detective work must always\n",
|
|
|
|
),
|
2018-08-06 20:11:58 -04:00
|
|
|
line_number: Some(2),
|
|
|
|
absolute_offset: 65,
|
|
|
|
submatches: vec![],
|
|
|
|
}
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
msgs[2].unwrap_match(),
|
|
|
|
Match {
|
|
|
|
path: Some(Data::text("sherlock")),
|
2019-01-26 12:25:21 -05:00
|
|
|
lines: Data::text(
|
|
|
|
"be, to a very large extent, the result of luck. \
|
|
|
|
Sherlock Holmes\n",
|
|
|
|
),
|
2018-08-06 20:11:58 -04:00
|
|
|
line_number: Some(3),
|
|
|
|
absolute_offset: 129,
|
|
|
|
submatches: vec![
|
|
|
|
SubMatch {
|
|
|
|
m: Data::text("Sherlock Holmes"),
|
|
|
|
start: 48,
|
|
|
|
end: 63,
|
|
|
|
},
|
|
|
|
],
|
|
|
|
}
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
msgs[3].unwrap_end().path,
|
|
|
|
Some(Data::text("sherlock"))
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
msgs[3].unwrap_end().binary_offset,
|
|
|
|
None
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
msgs[4].unwrap_summary().stats.searches_with_match,
|
|
|
|
1
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
msgs[4].unwrap_summary().stats.bytes_printed,
|
|
|
|
494
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
|
|
|
#[cfg(unix)]
|
|
|
|
rgtest!(notutf8, |dir: Dir, mut cmd: TestCommand| {
|
|
|
|
use std::ffi::OsStr;
|
|
|
|
use std::os::unix::ffi::OsStrExt;
|
|
|
|
|
|
|
|
// This test does not work with PCRE2 because PCRE2 does not support the
|
|
|
|
// `u` flag.
|
|
|
|
if dir.is_pcre2() {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
// macOS doesn't like this either... sigh.
|
|
|
|
if cfg!(target_os = "macos") {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
let name = &b"foo\xFFbar"[..];
|
|
|
|
let contents = &b"quux\xFFbaz"[..];
|
|
|
|
|
|
|
|
// APFS does not support creating files with invalid UTF-8 bytes, so just
|
2019-01-26 12:25:21 -05:00
|
|
|
// skip the test if we can't create our file. Presumably we don't need this
|
|
|
|
// check if we're already skipping it on macOS, but maybe other file
|
|
|
|
// systems won't like this test either?
|
2018-08-06 20:11:58 -04:00
|
|
|
if !dir.try_create_bytes(OsStr::from_bytes(name), contents).is_ok() {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
cmd.arg("--json").arg(r"(?-u)\xFF");
|
|
|
|
|
|
|
|
let msgs = json_decode(&cmd.stdout());
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
msgs[0].unwrap_begin(),
|
|
|
|
Begin { path: Some(Data::bytes("Zm9v/2Jhcg==")) }
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
msgs[1].unwrap_match(),
|
|
|
|
Match {
|
|
|
|
path: Some(Data::bytes("Zm9v/2Jhcg==")),
|
|
|
|
lines: Data::bytes("cXV1eP9iYXo="),
|
|
|
|
line_number: Some(1),
|
|
|
|
absolute_offset: 0,
|
|
|
|
submatches: vec![
|
|
|
|
SubMatch {
|
|
|
|
m: Data::bytes("/w=="),
|
|
|
|
start: 4,
|
|
|
|
end: 5,
|
|
|
|
},
|
|
|
|
],
|
|
|
|
}
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
2018-08-21 20:26:33 -04:00
|
|
|
rgtest!(notutf8_file, |dir: Dir, mut cmd: TestCommand| {
|
|
|
|
use std::ffi::OsStr;
|
|
|
|
|
|
|
|
// This test does not work with PCRE2 because PCRE2 does not support the
|
|
|
|
// `u` flag.
|
|
|
|
if dir.is_pcre2() {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
let name = "foo";
|
|
|
|
let contents = &b"quux\xFFbaz"[..];
|
|
|
|
|
|
|
|
// APFS does not support creating files with invalid UTF-8 bytes, so just
|
|
|
|
// skip the test if we can't create our file.
|
|
|
|
if !dir.try_create_bytes(OsStr::new(name), contents).is_ok() {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
cmd.arg("--json").arg(r"(?-u)\xFF");
|
|
|
|
|
|
|
|
let msgs = json_decode(&cmd.stdout());
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
msgs[0].unwrap_begin(),
|
|
|
|
Begin { path: Some(Data::text("foo")) }
|
|
|
|
);
|
|
|
|
assert_eq!(
|
|
|
|
msgs[1].unwrap_match(),
|
|
|
|
Match {
|
|
|
|
path: Some(Data::text("foo")),
|
|
|
|
lines: Data::bytes("cXV1eP9iYXo="),
|
|
|
|
line_number: Some(1),
|
|
|
|
absolute_offset: 0,
|
|
|
|
submatches: vec![
|
|
|
|
SubMatch {
|
|
|
|
m: Data::bytes("/w=="),
|
|
|
|
start: 4,
|
|
|
|
end: 5,
|
|
|
|
},
|
|
|
|
],
|
|
|
|
}
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
2018-08-06 20:11:58 -04:00
|
|
|
// See: https://github.com/BurntSushi/ripgrep/issues/416
|
|
|
|
//
|
|
|
|
// This test in particular checks that our match does _not_ include the `\r`
|
|
|
|
// even though the '$' may be rewritten as '(?:\r??$)' and could thus include
|
|
|
|
// `\r` in the match.
|
|
|
|
rgtest!(crlf, |dir: Dir, mut cmd: TestCommand| {
|
|
|
|
dir.create("sherlock", SHERLOCK_CRLF);
|
|
|
|
cmd.arg("--json").arg("--crlf").arg(r"Sherlock$").arg("sherlock");
|
|
|
|
|
|
|
|
let msgs = json_decode(&cmd.stdout());
|
|
|
|
|
|
|
|
assert_eq!(
|
|
|
|
msgs[1].unwrap_match().submatches[0].clone(),
|
|
|
|
SubMatch {
|
|
|
|
m: Data::text("Sherlock"),
|
|
|
|
start: 56,
|
|
|
|
end: 64,
|
|
|
|
},
|
|
|
|
);
|
|
|
|
});
|
2019-01-26 12:25:21 -05:00
|
|
|
|
|
|
|
// See: https://github.com/BurntSushi/ripgrep/issues/1095
|
|
|
|
//
|
|
|
|
// This test checks that we don't drop the \r\n in a matching line when --crlf
|
|
|
|
// mode is enabled.
|
|
|
|
rgtest!(r1095_missing_crlf, |dir: Dir, mut cmd: TestCommand| {
|
|
|
|
dir.create("foo", "test\r\n");
|
|
|
|
|
|
|
|
// Check without --crlf flag.
|
|
|
|
let msgs = json_decode(&cmd.arg("--json").arg("test").stdout());
|
|
|
|
assert_eq!(msgs.len(), 4);
|
|
|
|
assert_eq!(msgs[1].unwrap_match().lines, Data::text("test\r\n"));
|
|
|
|
|
|
|
|
// Now check with --crlf flag.
|
|
|
|
let msgs = json_decode(&cmd.arg("--crlf").stdout());
|
|
|
|
assert_eq!(msgs.len(), 4);
|
|
|
|
assert_eq!(msgs[1].unwrap_match().lines, Data::text("test\r\n"));
|
|
|
|
});
|
|
|
|
|
|
|
|
// See: https://github.com/BurntSushi/ripgrep/issues/1095
|
|
|
|
//
|
|
|
|
// This test checks that we don't return empty submatches when matching a `\n`
|
|
|
|
// in CRLF mode.
|
|
|
|
rgtest!(r1095_crlf_empty_match, |dir: Dir, mut cmd: TestCommand| {
|
|
|
|
dir.create("foo", "test\r\n\n");
|
|
|
|
|
|
|
|
// Check without --crlf flag.
|
|
|
|
let msgs = json_decode(&cmd.arg("-U").arg("--json").arg("\n").stdout());
|
|
|
|
assert_eq!(msgs.len(), 5);
|
|
|
|
|
|
|
|
let m = msgs[1].unwrap_match();
|
|
|
|
assert_eq!(m.lines, Data::text("test\r\n"));
|
|
|
|
assert_eq!(m.submatches[0].m, Data::text("\n"));
|
|
|
|
|
|
|
|
let m = msgs[2].unwrap_match();
|
|
|
|
assert_eq!(m.lines, Data::text("\n"));
|
|
|
|
assert_eq!(m.submatches[0].m, Data::text("\n"));
|
|
|
|
|
|
|
|
// Now check with --crlf flag.
|
|
|
|
let msgs = json_decode(&cmd.arg("--crlf").stdout());
|
|
|
|
|
|
|
|
let m = msgs[1].unwrap_match();
|
|
|
|
assert_eq!(m.lines, Data::text("test\r\n"));
|
|
|
|
assert_eq!(m.submatches[0].m, Data::text("\n"));
|
|
|
|
|
|
|
|
let m = msgs[2].unwrap_match();
|
|
|
|
assert_eq!(m.lines, Data::text("\n"));
|
|
|
|
assert_eq!(m.submatches[0].m, Data::text("\n"));
|
|
|
|
});
|