mirror of
https://github.com/BurntSushi/ripgrep.git
synced 2025-04-14 00:58:43 +02:00
Improve ergonomics of benchsuite.
The runner now detects if commands exist and permits running incomplete benchmarks. Also, explicitly use Python 3 since that's what default Ubuntu 16.04 seems to want.
This commit is contained in:
parent
5a0c873f61
commit
bc9d12c4c8
@ -1,4 +1,4 @@
|
|||||||
#!/usr/bin/env python
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
'''
|
'''
|
||||||
benchsuite is a benchmark runner for comparing command line search tools.
|
benchsuite is a benchmark runner for comparing command line search tools.
|
||||||
@ -10,6 +10,7 @@ import os
|
|||||||
import os.path as path
|
import os.path as path
|
||||||
from multiprocessing import cpu_count
|
from multiprocessing import cpu_count
|
||||||
import re
|
import re
|
||||||
|
import shutil
|
||||||
import statistics
|
import statistics
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
@ -718,6 +719,23 @@ class MissingDependencies(Exception):
|
|||||||
return 'MissingDependency(%s)' % repr(self.missing_names)
|
return 'MissingDependency(%s)' % repr(self.missing_names)
|
||||||
|
|
||||||
|
|
||||||
|
class MissingCommands(Exception):
|
||||||
|
'''
|
||||||
|
A missing command exception.
|
||||||
|
|
||||||
|
This exception occurs when running a command in a benchmark
|
||||||
|
where the command could not be found on the current system.
|
||||||
|
|
||||||
|
:ivar list(str) missing_names:
|
||||||
|
The names of the command binaries that could not be found.
|
||||||
|
'''
|
||||||
|
def __init__(self, missing_names):
|
||||||
|
self.missing_names = sorted(set(missing_names))
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return 'MissingCommands(%s)' % repr(self.missing_names)
|
||||||
|
|
||||||
|
|
||||||
class Benchmark(object):
|
class Benchmark(object):
|
||||||
'''
|
'''
|
||||||
A single benchmark corresponding to a grouping of commands.
|
A single benchmark corresponding to a grouping of commands.
|
||||||
@ -727,7 +745,8 @@ class Benchmark(object):
|
|||||||
'''
|
'''
|
||||||
|
|
||||||
def __init__(self, name=None, pattern=None, commands=None,
|
def __init__(self, name=None, pattern=None, commands=None,
|
||||||
warmup_count=1, count=3, line_count=True):
|
warmup_count=1, count=3, line_count=True,
|
||||||
|
allow_missing_commands=False):
|
||||||
'''
|
'''
|
||||||
Create a single benchmark.
|
Create a single benchmark.
|
||||||
|
|
||||||
@ -765,15 +784,37 @@ class Benchmark(object):
|
|||||||
self.warmup_count = warmup_count
|
self.warmup_count = warmup_count
|
||||||
self.count = count
|
self.count = count
|
||||||
self.line_count = line_count
|
self.line_count = line_count
|
||||||
|
self.allow_missing_commands = allow_missing_commands
|
||||||
|
|
||||||
|
def raise_if_missing(self):
|
||||||
|
'''
|
||||||
|
Raises a MissingCommands exception if applicable.
|
||||||
|
|
||||||
|
A MissingCommands exception is raised when the following
|
||||||
|
criteria are met: 1) allow_missing_commands is False, and 2) at
|
||||||
|
least one command in this benchmark could not be found on this
|
||||||
|
system.
|
||||||
|
'''
|
||||||
|
missing_commands = \
|
||||||
|
[c.binary_name for c in self.commands if not c.exists()]
|
||||||
|
if not self.allow_missing_commands and len(missing_commands) > 0:
|
||||||
|
raise MissingCommands(missing_commands)
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
'''
|
'''
|
||||||
Runs this benchmark and returns the results.
|
Runs this benchmark and returns the results.
|
||||||
|
|
||||||
:rtype: Result
|
:rtype: Result
|
||||||
|
:raises:
|
||||||
|
MissingCommands if any command doesn't exist.
|
||||||
|
(Unless allow_missing_commands is enabled.)
|
||||||
'''
|
'''
|
||||||
|
self.raise_if_missing()
|
||||||
result = Result(self)
|
result = Result(self)
|
||||||
for cmd in self.commands:
|
for cmd in self.commands:
|
||||||
|
if self.allow_missing_commands and not cmd.exists():
|
||||||
|
# Skip this command if we're OK with it.
|
||||||
|
continue
|
||||||
# Do a warmup first.
|
# Do a warmup first.
|
||||||
for _ in range(self.warmup_count):
|
for _ in range(self.warmup_count):
|
||||||
self.run_one(cmd)
|
self.run_one(cmd)
|
||||||
@ -798,6 +839,8 @@ class Benchmark(object):
|
|||||||
it is the number of lines in the search output.
|
it is the number of lines in the search output.
|
||||||
:rtype: int
|
:rtype: int
|
||||||
'''
|
'''
|
||||||
|
if not cmd.exists():
|
||||||
|
raise MissingCommand(cmd.cmd[0])
|
||||||
cmd.kwargs['stderr'] = subprocess.DEVNULL
|
cmd.kwargs['stderr'] = subprocess.DEVNULL
|
||||||
if self.line_count:
|
if self.line_count:
|
||||||
cmd.kwargs['stdout'] = subprocess.PIPE
|
cmd.kwargs['stdout'] = subprocess.PIPE
|
||||||
@ -867,6 +910,8 @@ class Result(object):
|
|||||||
means = []
|
means = []
|
||||||
for cmd in self.benchmark.commands:
|
for cmd in self.benchmark.commands:
|
||||||
mean, _ = self.distribution_for(cmd)
|
mean, _ = self.distribution_for(cmd)
|
||||||
|
if mean is None:
|
||||||
|
continue
|
||||||
means.append((cmd, mean))
|
means.append((cmd, mean))
|
||||||
return min(means, key=lambda tup: tup[1])[0]
|
return min(means, key=lambda tup: tup[1])[0]
|
||||||
|
|
||||||
@ -889,16 +934,18 @@ class Result(object):
|
|||||||
'''
|
'''
|
||||||
Returns the distribution (mean +/- std) of the given command.
|
Returns the distribution (mean +/- std) of the given command.
|
||||||
|
|
||||||
|
If there are no samples for this command (i.e., it was skipped),
|
||||||
|
then return ``(None, None)``.
|
||||||
|
|
||||||
:rtype: (float, float)
|
:rtype: (float, float)
|
||||||
:returns:
|
:returns:
|
||||||
A tuple containing the mean and standard deviation, in that
|
A tuple containing the mean and standard deviation, in that
|
||||||
order.
|
order.
|
||||||
'''
|
'''
|
||||||
mean = statistics.mean(
|
samples = list(s['duration'] for s in self.samples_for(cmd))
|
||||||
s['duration'] for s in self.samples_for(cmd))
|
if len(samples) == 0:
|
||||||
stdev = statistics.stdev(
|
return None, None
|
||||||
s['duration'] for s in self.samples_for(cmd))
|
return statistics.mean(samples), statistics.stdev(samples)
|
||||||
return mean, stdev
|
|
||||||
|
|
||||||
|
|
||||||
class Command(object):
|
class Command(object):
|
||||||
@ -928,6 +975,15 @@ class Command(object):
|
|||||||
self.args = args
|
self.args = args
|
||||||
self.kwargs = kwargs
|
self.kwargs = kwargs
|
||||||
|
|
||||||
|
def exists(self):
|
||||||
|
'Returns true if and only if this command exists.'
|
||||||
|
return shutil.which(self.binary_name) is not None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def binary_name(self):
|
||||||
|
'Return the binary name of this command.'
|
||||||
|
return self.cmd[0]
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
'''
|
'''
|
||||||
Runs this command and returns its status.
|
Runs this command and returns its status.
|
||||||
@ -1068,7 +1124,8 @@ def download(suite_dir, choices):
|
|||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
def collect_benchmarks(suite_dir, filter_pat=None):
|
def collect_benchmarks(suite_dir, filter_pat=None,
|
||||||
|
allow_missing_commands=False):
|
||||||
'''
|
'''
|
||||||
Return an iterable of all runnable benchmarks.
|
Return an iterable of all runnable benchmarks.
|
||||||
|
|
||||||
@ -1090,6 +1147,9 @@ def collect_benchmarks(suite_dir, filter_pat=None):
|
|||||||
continue
|
continue
|
||||||
try:
|
try:
|
||||||
benchmark = globals()[fun](suite_dir)
|
benchmark = globals()[fun](suite_dir)
|
||||||
|
benchmark.name = name
|
||||||
|
benchmark.allow_missing_commands = allow_missing_commands
|
||||||
|
benchmark.raise_if_missing()
|
||||||
except MissingDependencies as e:
|
except MissingDependencies as e:
|
||||||
eprint(
|
eprint(
|
||||||
'missing: %s, skipping benchmark %s (try running with: %s)' % (
|
'missing: %s, skipping benchmark %s (try running with: %s)' % (
|
||||||
@ -1097,24 +1157,32 @@ def collect_benchmarks(suite_dir, filter_pat=None):
|
|||||||
name,
|
name,
|
||||||
' '.join(['--download %s' % n for n in e.missing_names]),
|
' '.join(['--download %s' % n for n in e.missing_names]),
|
||||||
))
|
))
|
||||||
|
except MissingCommands as e:
|
||||||
|
fmt = 'missing commands: %s, skipping benchmark %s ' \
|
||||||
|
'(run with --allow-missing to run incomplete benchmarks)'
|
||||||
|
eprint(fmt % (', '.join(e.missing_names), name))
|
||||||
continue
|
continue
|
||||||
benchmark.name = name
|
|
||||||
yield benchmark
|
yield benchmark
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
download_choices = ['all', 'linux', 'subtitles-en', 'subtitles-ru']
|
||||||
p = argparse.ArgumentParser('Command line search tool benchmark suite.')
|
p = argparse.ArgumentParser('Command line search tool benchmark suite.')
|
||||||
p.add_argument(
|
p.add_argument(
|
||||||
'--dir', metavar='PATH', default=os.getcwd(),
|
'--dir', metavar='PATH', default=os.getcwd(),
|
||||||
help='The directory in which to download data and perform searches.')
|
help='The directory in which to download data and perform searches.')
|
||||||
p.add_argument(
|
p.add_argument(
|
||||||
'--download', metavar='CORPUS', action='append',
|
'--download', metavar='CORPUS', action='append',
|
||||||
choices=['all', 'linux', 'subtitles-en', 'subtitles-ru'],
|
choices=download_choices,
|
||||||
help='Download and prepare corpus data, then exit without running '
|
help='Download and prepare corpus data, then exit without running '
|
||||||
'any benchmarks. Note that this command is intended to be '
|
'any benchmarks. Note that this command is intended to be '
|
||||||
'idempotent. WARNING: This downloads over a gigabyte of data, '
|
'idempotent. WARNING: This downloads over a gigabyte of data, '
|
||||||
'and also includes building the Linux kernel. If "all" is used '
|
'and also includes building the Linux kernel. If "all" is used '
|
||||||
'then the total uncompressed size is around 13 GB.')
|
'then the total uncompressed size is around 13 GB. '
|
||||||
|
'Choices: %s' % ', '.join(download_choices))
|
||||||
|
p.add_argument(
|
||||||
|
'--allow-missing', action='store_true',
|
||||||
|
help='Permit benchmarks to run even if some commands are missing.')
|
||||||
p.add_argument(
|
p.add_argument(
|
||||||
'-f', '--force', action='store_true',
|
'-f', '--force', action='store_true',
|
||||||
help='Overwrite existing files if there is a conflict.')
|
help='Overwrite existing files if there is a conflict.')
|
||||||
@ -1130,6 +1198,13 @@ def main():
|
|||||||
help='A regex pattern that will only run benchmarks that match.')
|
help='A regex pattern that will only run benchmarks that match.')
|
||||||
args = p.parse_args()
|
args = p.parse_args()
|
||||||
|
|
||||||
|
if args.list:
|
||||||
|
benchmarks = collect_benchmarks(
|
||||||
|
args.dir, filter_pat=args.bench,
|
||||||
|
allow_missing_commands=args.allow_missing)
|
||||||
|
for b in benchmarks:
|
||||||
|
print(b.name)
|
||||||
|
sys.exit(0)
|
||||||
if args.download is not None and len(args.download) > 0:
|
if args.download is not None and len(args.download) > 0:
|
||||||
download(args.dir, args.download)
|
download(args.dir, args.download)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
@ -1149,7 +1224,9 @@ def main():
|
|||||||
raw_csv_wtr = csv.DictWriter(raw_handle, fields)
|
raw_csv_wtr = csv.DictWriter(raw_handle, fields)
|
||||||
raw_csv_wtr.writerow({x: x for x in fields})
|
raw_csv_wtr.writerow({x: x for x in fields})
|
||||||
|
|
||||||
benchmarks = collect_benchmarks(args.dir, filter_pat=args.bench)
|
benchmarks = collect_benchmarks(
|
||||||
|
args.dir, filter_pat=args.bench,
|
||||||
|
allow_missing_commands=args.allow_missing)
|
||||||
for i, b in enumerate(benchmarks):
|
for i, b in enumerate(benchmarks):
|
||||||
result = b.run()
|
result = b.run()
|
||||||
fastest_cmd = result.fastest_cmd()
|
fastest_cmd = result.fastest_cmd()
|
||||||
@ -1163,6 +1240,12 @@ def main():
|
|||||||
for cmd in b.commands:
|
for cmd in b.commands:
|
||||||
name = cmd.name
|
name = cmd.name
|
||||||
mean, stdev = result.distribution_for(cmd)
|
mean, stdev = result.distribution_for(cmd)
|
||||||
|
if mean is None:
|
||||||
|
# If we couldn't get a distribution for this command then
|
||||||
|
# it was skipped.
|
||||||
|
print('{name:{pad}} SKIPPED'.format(
|
||||||
|
name=name, pad=max_name_len + 2))
|
||||||
|
continue
|
||||||
line_counts = result.line_counts_for(cmd)
|
line_counts = result.line_counts_for(cmd)
|
||||||
show_fast_cmd, show_line_counts = '', ''
|
show_fast_cmd, show_line_counts = '', ''
|
||||||
if fastest_cmd.name == cmd.name:
|
if fastest_cmd.name == cmd.name:
|
||||||
|
Loading…
x
Reference in New Issue
Block a user