mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2024-12-12 19:18:24 +02:00
127 lines
3.1 KiB
Rust
127 lines
3.1 KiB
Rust
/*!
|
|
This module benchmarks the glob implementation. For benchmarks on the ripgrep
|
|
tool itself, see the benchsuite directory.
|
|
*/
|
|
#![feature(test)]
|
|
|
|
extern crate glob;
|
|
#[macro_use]
|
|
extern crate lazy_static;
|
|
extern crate regex;
|
|
extern crate test;
|
|
|
|
const SHORT: &'static str = "some/needle.txt";
|
|
const SHORT_PAT: &'static str = "some/**/needle.txt";
|
|
|
|
const LONG: &'static str = "some/a/bigger/path/to/the/crazy/needle.txt";
|
|
const LONG_PAT: &'static str = "some/**/needle.txt";
|
|
|
|
#[allow(dead_code, unused_variables)]
|
|
#[path = "../src/glob.rs"]
|
|
mod reglob;
|
|
|
|
fn new_glob(pat: &str) -> glob::Pattern {
|
|
glob::Pattern::new(pat).unwrap()
|
|
}
|
|
|
|
fn new_reglob(pat: &str) -> reglob::Set {
|
|
let mut builder = reglob::SetBuilder::new();
|
|
builder.add(pat).unwrap();
|
|
builder.build().unwrap()
|
|
}
|
|
|
|
fn new_reglob_many(pats: &[&str]) -> reglob::Set {
|
|
let mut builder = reglob::SetBuilder::new();
|
|
for pat in pats {
|
|
builder.add(pat).unwrap();
|
|
}
|
|
builder.build().unwrap()
|
|
}
|
|
|
|
#[bench]
|
|
fn short_glob(b: &mut test::Bencher) {
|
|
let pat = new_glob(SHORT_PAT);
|
|
b.iter(|| assert!(pat.matches(SHORT)));
|
|
}
|
|
|
|
#[bench]
|
|
fn short_regex(b: &mut test::Bencher) {
|
|
let set = new_reglob(SHORT_PAT);
|
|
b.iter(|| assert!(set.is_match(SHORT)));
|
|
}
|
|
|
|
#[bench]
|
|
fn long_glob(b: &mut test::Bencher) {
|
|
let pat = new_glob(LONG_PAT);
|
|
b.iter(|| assert!(pat.matches(LONG)));
|
|
}
|
|
|
|
#[bench]
|
|
fn long_regex(b: &mut test::Bencher) {
|
|
let set = new_reglob(LONG_PAT);
|
|
b.iter(|| assert!(set.is_match(LONG)));
|
|
}
|
|
|
|
const MANY_SHORT_GLOBS: &'static [&'static str] = &[
|
|
// Taken from a random .gitignore on my system.
|
|
".*.swp",
|
|
"tags",
|
|
"target",
|
|
"*.lock",
|
|
"tmp",
|
|
"*.csv",
|
|
"*.fst",
|
|
"*-got",
|
|
"*.csv.idx",
|
|
"words",
|
|
"98m*",
|
|
"dict",
|
|
"test",
|
|
"months",
|
|
];
|
|
|
|
const MANY_SHORT_SEARCH: &'static str = "98m-blah.csv.idx";
|
|
|
|
#[bench]
|
|
fn many_short_glob(b: &mut test::Bencher) {
|
|
let pats: Vec<_> = MANY_SHORT_GLOBS.iter().map(|&s| new_glob(s)).collect();
|
|
b.iter(|| {
|
|
let mut count = 0;
|
|
for pat in &pats {
|
|
if pat.matches(MANY_SHORT_SEARCH) {
|
|
count += 1;
|
|
}
|
|
}
|
|
assert_eq!(2, count);
|
|
})
|
|
}
|
|
|
|
#[bench]
|
|
fn many_short_regex_set(b: &mut test::Bencher) {
|
|
let set = new_reglob_many(MANY_SHORT_GLOBS);
|
|
b.iter(|| assert_eq!(2, set.matches(MANY_SHORT_SEARCH).iter().count()));
|
|
}
|
|
|
|
// This is the fastest on my system (beating many_glob by about 2x). This
|
|
// suggests that a RegexSet needs quite a few regexes (or a larger haystack)
|
|
// in order for it to scale.
|
|
//
|
|
// TODO(burntsushi): come up with a benchmark that uses more complex patterns
|
|
// or a longer haystack.
|
|
#[bench]
|
|
fn many_short_regex_pattern(b: &mut test::Bencher) {
|
|
let pats: Vec<_> = MANY_SHORT_GLOBS.iter().map(|&s| {
|
|
let pat = reglob::Pattern::new(s).unwrap();
|
|
regex::Regex::new(&pat.to_regex()).unwrap()
|
|
}).collect();
|
|
b.iter(|| {
|
|
let mut count = 0;
|
|
for pat in &pats {
|
|
if pat.is_match(MANY_SHORT_SEARCH) {
|
|
count += 1;
|
|
}
|
|
}
|
|
assert_eq!(2, count);
|
|
})
|
|
}
|