#!/usr/bin/env python ''' benchsuite is a benchmark runner for comparing command line search tools. ''' import argparse import csv import os import os.path as path from multiprocessing import cpu_count import re import statistics import subprocess import sys import time # Some constants for identifying the corpora we use to run tests. # We establish two very different kinds of corpora: a small number of large # files and a large number of small files. These are vastly different use cases # not only because of their performance characteristics, but also the # strategies used to increase the relevance of results returned. SUBTITLES_DIR = 'subtitles' SUBTITLES_EN_NAME = 'OpenSubtitles2016.raw.en' SUBTITLES_EN_NAME_GZ = '%s.gz' % SUBTITLES_EN_NAME SUBTITLES_EN_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.en.gz' SUBTITLES_RU_NAME = 'OpenSubtitles2016.raw.ru' SUBTITLES_RU_NAME_GZ = '%s.gz' % SUBTITLES_RU_NAME SUBTITLES_RU_URL = 'http://opus.lingfil.uu.se/OpenSubtitles2016/mono/OpenSubtitles2016.raw.ru.gz' LINUX_DIR = 'linux' LINUX_CLONE = 'git://github.com/BurntSushi/linux' def bench_linux_literal_default(suite_dir): ''' Benchmark the speed of a literal using *default* settings. This is a purposefully unfair benchmark for use in performance analysis, but it is pedagogically useful. ''' require(suite_dir, 'linux') cwd = path.join(suite_dir, LINUX_DIR) pat = 'PM_RESUME' def mkcmd(*args, **kwargs): kwargs['cwd'] = cwd return Command(*args, **kwargs) # N.B. This is a purposefully unfair benchmark for illustrative purposes # of how the default modes for each search tool differ. return Benchmark(pattern=pat, commands=[ mkcmd('rg', ['rg', pat]), mkcmd('ag', ['ag', pat]), # ucg reports the exact same matches as ag and rg even though it # doesn't read gitignore files. Instead, it has a file whitelist # that happens to match up exactly with the gitignores for this search. mkcmd('ucg', ['ucg', pat]), mkcmd('git grep', ['git', 'grep', pat], env={'LC_ALL': 'C'}), mkcmd('pt', ['pt', pat]), # sift reports an extra line here for a binary file matched. mkcmd('sift', ['sift', pat]), ]) def bench_linux_literal(suite_dir): ''' Benchmark the speed of a literal, attempting to be fair. This tries to use the minimum set of options available in all tools to test how fast they are. For example, it makes sure there is no case insensitive matching and that line numbers are computed. ''' require(suite_dir, 'linux') cwd = path.join(suite_dir, LINUX_DIR) pat = 'PM_RESUME' def mkcmd(*args, **kwargs): kwargs['cwd'] = cwd return Command(*args, **kwargs) return Benchmark(pattern=pat, commands=[ mkcmd('rg', ['rg', '-n', pat]), mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]), mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]), mkcmd('ag', ['ag', '-s', pat]), mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]), mkcmd('ucg', ['ucg', '--nosmart-case', pat]), mkcmd('git grep', [ 'git', 'grep', '-I', '-n', pat, ], env={'LC_ALL': 'C'}), mkcmd('pt', ['pt', pat]), mkcmd('sift', [ 'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat, ]), ]) def bench_linux_literal_casei(suite_dir): ''' Benchmark the speed of a case insensitive literal search. This is like the linux_literal benchmark, except we ask the search tools to do case insensitive search. ''' require(suite_dir, 'linux') cwd = path.join(suite_dir, LINUX_DIR) pat = 'PM_RESUME' def mkcmd(*args, **kwargs): kwargs['cwd'] = cwd return Command(*args, **kwargs) return Benchmark(pattern=pat, commands=[ mkcmd('rg', ['rg', '-n', '-i', pat]), mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]), mkcmd('rg-novcs-mmap', [ 'rg', '--mmap', '--no-ignore', '-n', '-i', pat, ]), mkcmd('ag', ['ag', '-i', pat]), mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-i', pat]), mkcmd('ucg', ['ucg', '-i', pat]), mkcmd('git grep', [ 'git', 'grep', '-I', '-n', '-i', pat, ], env={'LC_ALL': 'C'}), # sift yields more matches than it should here. Specifically, it gets # matches in Module.symvers and System.map in the repo root. Both of # those files show up in the repo root's .gitignore file. mkcmd('sift', [ 'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-i', pat, ]), ]) def bench_linux_re_literal_suffix(suite_dir): ''' Benchmark the speed of a literal inside a regex. This, for example, inhibits a prefix byte optimization used inside of Go's regex engine (relevant for sift and pt). ''' require(suite_dir, 'linux') cwd = path.join(suite_dir, LINUX_DIR) pat = '[A-Z]+_RESUME' def mkcmd(*args, **kwargs): kwargs['cwd'] = cwd return Command(*args, **kwargs) return Benchmark(pattern=pat, commands=[ mkcmd('rg', ['rg', '-n', pat]), mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]), mkcmd('rg-novcs-mmap', ['rg', '--mmap', '--no-ignore', '-n', pat]), mkcmd('ag', ['ag', '-s', pat]), mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', pat]), mkcmd('ucg', ['ucg', '--nosmart-case', pat]), mkcmd( 'git grep', ['git', 'grep', '-E', '-I', '-n', pat], env={'LC_ALL': 'C'}, ), mkcmd('sift', [ 'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat, ]), ]) def bench_linux_word(suite_dir): ''' Benchmark use of the -w ("match word") flag in each tool. sift has a lot of trouble with this because it forces it into Go's regex engine by surrounding the pattern with \b assertions. ''' require(suite_dir, 'linux') cwd = path.join(suite_dir, LINUX_DIR) pat = 'PM_RESUME' def mkcmd(*args, **kwargs): kwargs['cwd'] = cwd return Command(*args, **kwargs) return Benchmark(pattern=pat, commands=[ mkcmd('rg', ['rg', '-n', '-w', pat]), mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-w', pat]), mkcmd('rg-novcs-mmap', [ 'rg', '--mmap', '--no-ignore', '-n', '-w', pat, ]), mkcmd('ag', ['ag', '-s', '-w', pat]), mkcmd('ag-novcs', ['ag', '--skip-vcs-ignores', '-s', '-w', pat]), mkcmd('ucg', ['ucg', '--nosmart-case', '-w', pat]), mkcmd( 'git grep', ['git', 'grep', '-E', '-I', '-n', '-w', pat], env={'LC_ALL': 'C'}, ), mkcmd('sift', [ 'sift', '-n', '--binary-skip', '--exclude-files', '.*', '-w', pat, ]), ]) def bench_linux_unicode_greek(suite_dir): ''' Benchmark matching of a Unicode category. Only three tools (ripgrep, sift and pt) support this. ''' require(suite_dir, 'linux') cwd = path.join(suite_dir, LINUX_DIR) pat = r'\p{Greek}' def mkcmd(*args, **kwargs): kwargs['cwd'] = cwd return Command(*args, **kwargs) return Benchmark(pattern=pat, commands=[ mkcmd('rg', ['rg', '-n', pat]), # sift tries to search a bunch of PDF files and clutters up the # results, even though --binary-skip is provided. They are excluded # here explicitly, but don't have a measurable impact on performance. mkcmd('sift', [ 'sift', '-n', '--binary-skip', '--exclude-files', '.*', '--exclude-files', '*.pdf', pat, ]), ]) def bench_linux_unicode_greek_casei(suite_dir): ''' Benchmark matching of a Unicode category, case insensitively. Only ripgrep gets this right (and it's still fast). ''' require(suite_dir, 'linux') cwd = path.join(suite_dir, LINUX_DIR) pat = r'\p{Greek}' def mkcmd(*args, **kwargs): kwargs['cwd'] = cwd return Command(*args, **kwargs) return Benchmark(pattern=pat, commands=[ mkcmd('rg', ['rg', '-n', '-i', pat]), # sift tries to search a bunch of PDF files and clutters up the # results, even though --binary-skip is provided. They are excluded # here explicitly, but don't have a measurable impact on performance. mkcmd('sift', [ 'sift', '-n', '--binary-skip', '--exclude-files', '.*', '--exclude-files', '*.pdf', pat, ]), ]) def bench_linux_unicode_word(suite_dir): ''' Benchmark Unicode aware \w character class. Only ripgrep and git-grep (with LC_ALL=en_US.UTF-8) actually get this right. Everything else uses the standard ASCII interpretation of \w. ''' require(suite_dir, 'linux') cwd = path.join(suite_dir, LINUX_DIR) pat = r'\wAh' def mkcmd(*args, **kwargs): kwargs['cwd'] = cwd return Command(*args, **kwargs) return Benchmark(pattern=pat, commands=[ mkcmd('rg', ['rg', '-n', pat]), mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]), mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]), mkcmd('rg-novcs-mmap', [ 'rg', '--mmap', '--no-ignore', '-n', pat, ]), mkcmd('ag (no Unicode)', ['ag', '-s', pat]), mkcmd('ag-novcs (no Unicode)', [ 'ag', '--skip-vcs-ignores', '-s', pat, ]), mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]), mkcmd( 'git grep', ['git', 'grep', '-E', '-I', '-n', pat], env={'LC_ALL': 'en_US.UTF-8'}, ), mkcmd( 'git grep (no Unicode)', ['git', 'grep', '-E', '-I', '-n', pat], env={'LC_ALL': 'C'}, ), mkcmd('sift (no Unicode)', [ 'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat, ]), ]) def bench_linux_no_literal(suite_dir): ''' Benchmark a regex that defeats all literal optimizations. Most search patterns have some kind of literal in them, which typically permits searches to take some shortcuts. Therefore, the applicability of this benchmark is somewhat suspicious, but the suite wouldn't feel complete without it. ''' require(suite_dir, 'linux') cwd = path.join(suite_dir, LINUX_DIR) pat = r'\w{5}\s+\w{5}\s+\w{5}\s+\w{5}\s+\w{5}' def mkcmd(*args, **kwargs): kwargs['cwd'] = cwd return Command(*args, **kwargs) return Benchmark(pattern=pat, commands=[ mkcmd('rg', ['rg', '-n', pat]), mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]), mkcmd('rg (no Unicode)', ['rg', '-n', '(?-u)' + pat]), mkcmd('rg-novcs (no Unicode)', [ 'rg', '--no-ignore', '-n', '(?-u)' + pat, ]), mkcmd('ag (no Unicode)', ['ag', '-s', pat]), mkcmd('ag-novcs (no Unicode)', [ 'ag', '--skip-vcs-ignores', '-s', pat, ]), mkcmd('ucg (no Unicode)', ['ucg', '--nosmart-case', pat]), mkcmd( 'git grep', ['git', 'grep', '-E', '-I', '-n', pat], env={'LC_ALL': 'en_US.UTF-8'}, ), mkcmd( 'git grep (no Unicode)', ['git', 'grep', '-E', '-I', '-n', pat], env={'LC_ALL': 'C'}, ), mkcmd('sift (no Unicode)', [ 'sift', '-n', '--binary-skip', '--exclude-files', '.*', pat, ]), ]) def bench_linux_alternates(suite_dir): ''' Benchmark a small alternation of literals. sift doesn't make the cut. It's more than 10x slower than the next fastest result. The slowdown is likely because the Go regexp engine doesn't do any literal optimizations for this case (there is no common leading byte). ''' require(suite_dir, 'linux') cwd = path.join(suite_dir, LINUX_DIR) pat = 'ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT' def mkcmd(*args, **kwargs): kwargs['cwd'] = cwd return Command(*args, **kwargs) return Benchmark(pattern=pat, commands=[ mkcmd('rg', ['rg', '-n', pat]), mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', pat]), mkcmd('rg-novcs-mmap', [ 'rg', '--mmap', '--no-ignore', '-n', pat, ]), mkcmd('ag', ['ag', '-s', pat]), mkcmd('ag-novcs', [ 'ag', '--skip-vcs-ignores', '-s', pat, ]), mkcmd('ucg', ['ucg', '--nosmart-case', pat]), mkcmd( 'git grep', ['git', 'grep', '-E', '-I', '-n', pat], env={'LC_ALL': 'C'}, ), ]) def bench_linux_alternates_casei(suite_dir): 'Benchmark a small alternation of literals case insensitively.' require(suite_dir, 'linux') cwd = path.join(suite_dir, LINUX_DIR) pat = 'ERR_SYS|PME_TURN_OFF|LINK_REQ_RST|CFG_BME_EVT' def mkcmd(*args, **kwargs): kwargs['cwd'] = cwd return Command(*args, **kwargs) return Benchmark(pattern=pat, commands=[ mkcmd('rg', ['rg', '-n', '-i', pat]), mkcmd('rg-novcs', ['rg', '--no-ignore', '-n', '-i', pat]), mkcmd('rg-novcs-mmap', [ 'rg', '--mmap', '--no-ignore', '-n', '-i', pat, ]), mkcmd('ag', ['ag', '-i', pat]), mkcmd('ag-novcs', [ 'ag', '--skip-vcs-ignores', '-i', pat, ]), mkcmd('ucg', ['ucg', '-i', pat]), mkcmd( 'git grep', ['git', 'grep', '-E', '-I', '-n', '-i', pat], env={'LC_ALL': 'C'}, ), ]) # BREADCRUMBS(burntsushi): We should benchmark an alternation for `linux` as # well. def bench_sherlock(suite_dir): 'TODO: Fix this and add more single file benchmarks.' require(suite_dir, 'subtitles-en') en = path.join(suite_dir, SUBTITLES_DIR, SUBTITLES_EN_NAME) pat = 'Sherlock' return Benchmark(pattern=pat, commands=[ Command('rg', ['rg', pat, en]), Command('grep', ['grep', '-a', pat, en]) ]) class MissingDependencies(Exception): ''' A missing dependency exception. This exception occurs when running a benchmark that requires a particular corpus that isn't available. :ivar list(str) missing_names: A list of missing dependency names. These names correspond to names that can be used with the --download flag. ''' def __init__(self, missing_names): self.missing_names = missing_names def __str__(self): return 'MissingDependency(%s)' % repr(self.missing_names) class Benchmark(object): ''' A single benchmark corresponding to a grouping of commands. The main purpose of a benchmark is to compare the performance characteristics of a group of commands. ''' def __init__(self, name=None, pattern=None, commands=None, warmup_count=1, count=3, line_count=True): ''' Create a single benchmark. A single benchmark is composed of a set of commands that are benchmarked and compared against one another. A benchmark may have multiple commands that use the same search tool (but probably should have something differentiating them). The grouping of commands is a purely human driven process. By default, the output of every command is sent to /dev/null. Other types of behavior are available via the methods defined on this benchmark. :param str name: A human readable string denoting the name of this benchmark. :param str pattern: The pattern that is used in search. :param list(Command) commands: A list of commands to initialize this benchmark with. More commands may be added before running the benchmark. :param int warmup_count: The number of times to run each command before recording samples. :param int count: The number of samples to collect from each command. :param bool line_count: When set, the lines of each search are counted and included in the samples produced. ''' self.name = name self.pattern = pattern self.commands = commands or [] self.warmup_count = warmup_count self.count = count self.line_count = line_count def run(self): ''' Runs this benchmark and returns the results. :rtype: Result ''' result = Result(self) for cmd in self.commands: # Do a warmup first. for _ in range(self.warmup_count): self.run_one(cmd) for _ in range(self.count): result.add(cmd, **self.run_one(cmd)) return result def run_one(self, cmd): ''' Runs the given command exactly once. Returns an object that includes the time taken by the command. If this benchmark was configured to count the number of lines returned, then the line count is also returned. :param Command cmd: The command to run. :returns: A dict with two fields, duration and line_count. The duration is in seconds, with fractional milliseconds, and is guaranteed to be available. The line_count is set to None unless line counting is enabled, in which case, it is the number of lines in the search output. :rtype: int ''' cmd.kwargs['stderr'] = subprocess.DEVNULL if self.line_count: cmd.kwargs['stdout'] = subprocess.PIPE else: cmd.kwargs['stdout'] = subprocess.DEVNULL start = time.time() completed = cmd.run() end = time.time() line_count = None if self.line_count: line_count = completed.stdout.count(b'\n') return { 'duration': end - start, 'line_count': line_count, } class Result(object): ''' The result of running a benchmark. Benchmark results consist of a set of samples, where each sample corresponds to a single run of a single command in the benchmark. Various statistics can be computed from these samples such as mean and standard deviation. ''' def __init__(self, benchmark): ''' Create a new set of results, initially empty. :param Benchmarl benchmark: The benchmark that produced these results. ''' self.benchmark = benchmark self.samples = [] def add(self, cmd, duration, line_count=None): ''' Add a new sample to this result set. :param Command cmd: The command that produced this sample. :param int duration: The duration, in milliseconds, that the command took to run. :param int line_count: The number of lines in the search output. This is optional. ''' self.samples.append({ 'cmd': cmd, 'duration': duration, 'line_count': line_count, }) def fastest_sample(self): ''' Returns the fastest recorded sample. ''' return min(self.samples, key=lambda s: s['duration']) def fastest_cmd(self): ''' Returns the fastest command according to distribution. ''' means = [] for cmd in self.benchmark.commands: mean, _ = self.distribution_for(cmd) means.append((cmd, mean)) return min(means, key=lambda tup: tup[1])[0] def samples_for(self, cmd): 'Returns an iterable of samples for cmd' yield from (s for s in self.samples if s['cmd'].name == cmd.name) def line_counts_for(self, cmd): ''' Returns the line counts recorded for each command. :returns: A dictionary from command name to a set of line counts recorded. ''' return {s['line_count'] for s in self.samples_for(cmd) if s['line_count'] is not None} def distribution_for(self, cmd): ''' Returns the distribution (mean +/- std) of the given command. :rtype: (float, float) :returns: A tuple containing the mean and standard deviation, in that order. ''' mean = statistics.mean( s['duration'] for s in self.samples_for(cmd)) stdev = statistics.stdev( s['duration'] for s in self.samples_for(cmd)) return mean, stdev class Command(object): def __init__(self, name, cmd, *args, **kwargs): ''' Create a new command that is run as part of a benchmark. *args and **kwargs are passed directly to ``subprocess.run``. An exception to this is stdin/stdout/stderr. Output redirection is completely controlled by the benchmark harness. Trying to set them here will trigger an assert. :param str name: The human readable name of this command. This is particularly useful if the same search tool is used multiple times in the same benchmark with different arguments. :param list(str) cmd: The command to run as a list of arguments (including the command name itself). ''' assert 'stdin' not in kwargs assert 'stdout' not in kwargs assert 'stderr' not in kwargs self.name = name self.cmd = cmd self.args = args self.kwargs = kwargs def run(self): ''' Runs this command and returns its status. :rtype: subprocess.CompletedProcess ''' return subprocess.run(self.cmd, *self.args, **self.kwargs) def eprint(*args, **kwargs): 'Like print, but to stderr.' kwargs['file'] = sys.stderr print(*args, **kwargs) def run_cmd(cmd, *args, **kwargs): ''' Print the command to stderr and run it. If the command fails, throw a traceback. ''' eprint('# %s' % ' '.join(cmd)) kwargs['check'] = True return subprocess.run(cmd, *args, **kwargs) def require(suite_dir, *names): ''' Declare a dependency on the given names for a benchmark. If any dependency doesn't exist, then fail with an error message. ''' errs = [] for name in names: fun_name = name.replace('-', '_') if not globals()['has_%s' % fun_name](suite_dir): errs.append(name) if len(errs) > 0: raise MissingDependencies(errs) def download_linux(suite_dir): 'Download and build the Linux kernel.' checkout_dir = path.join(suite_dir, LINUX_DIR) if not os.path.isdir(checkout_dir): # Clone from my fork so that we always get the same corpus *and* still # do a shallow clone. Shallow clones are much much cheaper than full # clones. run_cmd(['git', 'clone', '--depth', '1', LINUX_CLONE, checkout_dir]) # We want to build the kernel because the process of building it produces # a lot of junk in the repository that a search tool probably shouldn't # touch. if not os.path.exists(path.join(checkout_dir, 'vmlinux')): eprint('# Building Linux kernel...') run_cmd(['make', 'defconfig'], cwd=checkout_dir) run_cmd(['make', '-j', str(cpu_count())], cwd=checkout_dir) def has_linux(suite_dir): 'Returns true if we believe the Linux kernel is built.' checkout_dir = path.join(suite_dir, LINUX_DIR) return path.exists(path.join(checkout_dir, 'vmlinux')) def download_subtitles_en(suite_dir): 'Download and decompress English subtitles.' subtitle_dir = path.join(suite_dir, SUBTITLES_DIR) en_path_gz = path.join(subtitle_dir, SUBTITLES_EN_NAME_GZ) en_path = path.join(subtitle_dir, SUBTITLES_EN_NAME) if not os.path.isdir(subtitle_dir): os.makedirs(subtitle_dir) if not os.path.exists(en_path): if not os.path.exists(en_path_gz): run_cmd(['curl', '-LO', SUBTITLES_EN_URL], cwd=subtitle_dir) run_cmd(['gunzip', en_path_gz], cwd=subtitle_dir) def has_subtitles_en(suite_dir): 'Returns true if English subtitles have been downloaded.' subtitle_dir = path.join(suite_dir, SUBTITLES_DIR) return path.exists(path.join(subtitle_dir, SUBTITLES_EN_NAME)) def download_subtitles_ru(suite_dir): 'Download and decompress Russian subtitles.' subtitle_dir = path.join(suite_dir, SUBTITLES_DIR) ru_path_gz = path.join(subtitle_dir, SUBTITLES_RU_NAME_GZ) ru_path = path.join(subtitle_dir, SUBTITLES_RU_NAME) if not os.path.isdir(subtitle_dir): os.makedirs(subtitle_dir) if not os.path.exists(ru_path): if not os.path.exists(ru_path_gz): run_cmd(['curl', '-LO', SUBTITLES_RU_URL], cwd=subtitle_dir) run_cmd(['gunzip', ru_path_gz], cwd=subtitle_dir) def has_subtitles_ru(suite_dir): 'Returns true if Russian subtitles have been downloaded.' subtitle_dir = path.join(suite_dir, SUBTITLES_DIR) return path.exists(path.join(subtitle_dir, SUBTITLES_RU_NAME)) def download(suite_dir, choices): ''' Download choices into suite_dir. Specifically, choices specifies a list of corpora to fetch. :param str suite_dir: The directory in which to download corpora. :param list(str) choices: A list of corpora to download. Available choices are: all, linux, subtitles-en, subtitles-ru. ''' for choice in args.download: if choice == 'linux': download_linux(suite_dir) elif choice == 'subtitles-en': download_subtitles_en(suite_dir) elif choice == 'subtitles-ru': download_subtitles_ru(suite_dir) elif choice == 'all': download_linux(suite_dir) download_subtitles_en(suite_dir) download_subtitles_ru(suite_dir) else: eprint('Unrecognized download choice: %s' % choice) sys.exit(1) def collect_benchmarks(suite_dir, filter_pat=None): ''' Return an iterable of all runnable benchmarks. :param str suite_dir: The directory containing corpora. :param str filter_pat: A single regular expression that is used to filter benchmarks by their name. When not specified, all benchmarks are run. :returns: An iterable over all runnable benchmarks. If a benchmark requires corpora that are missing, then a log message is emitted to stderr and it is not yielded. ''' for fun in sorted(globals()): if not fun.startswith('bench_'): continue name = re.sub('^bench_', '', fun) if filter_pat is not None and not re.search(filter_pat, name): continue try: benchmark = globals()[fun](suite_dir) except MissingDependencies as e: eprint( 'missing: %s, skipping benchmark %s (try running with: %s)' % ( ', '.join(e.missing_names), name, ' '.join(['--download %s' % n for n in e.missing_names]), )) continue benchmark.name = name yield benchmark def main(): p = argparse.ArgumentParser('Command line search tool benchmark suite.') p.add_argument( '--dir', metavar='PATH', default=os.getcwd(), help='The directory in which to download data and perform searches.') p.add_argument( '--download', metavar='CORPUS', action='append', choices=['all', 'linux', 'subtitles-en', 'subtitles-ru'], help='Download and prepare corpus data, then exit without running ' 'any benchmarks. Note that this command is intended to be ' 'idempotent. WARNING: This downloads over a gigabyte of data, ' 'and also includes building the Linux kernel. If "all" is used ' 'then the total uncompressed size is around 13 GB.') p.add_argument( '-f', '--force', action='store_true', help='Overwrite existing files if there is a conflict.') p.add_argument( '--list', action='store_true', help='List available benchmarks by name.') p.add_argument( '--raw', metavar='PATH', help='Dump raw data (all samples collected) in CSV format to the ' 'file path provided.') p.add_argument( 'bench', metavar='PAT', nargs='?', help='A regex pattern that will only run benchmarks that match.') args = p.parse_args() if args.download is not None and len(args.download) > 0: download(args.dir, args.choices) sys.exit(0) if not path.isdir(args.dir): os.makedirs(args.dir) if args.raw is not None and path.exists(args.raw) and not args.force: eprint('File %s already exists (delete it or use --force)' % args.raw) sys.exit(1) raw_handle, raw_csv_wtr = None, None if args.raw is not None: fields = [ 'benchmark', 'warmup_iter', 'iter', 'name', 'command', 'duration', 'lines', 'env', ] raw_handle = open(args.raw, 'w+') raw_csv_wtr = csv.DictWriter(raw_handle, fields) raw_csv_wtr.writerow({x: x for x in fields}) benchmarks = collect_benchmarks(args.dir, filter_pat=args.bench) for i, b in enumerate(benchmarks): result = b.run() fastest_cmd = result.fastest_cmd() fastest_sample = result.fastest_sample() max_name_len = max(len(cmd.name) for cmd in b.commands) if i > 0: print() header = '%s (pattern: %s)' % (b.name, b.pattern) print('%s\n%s' % (header, '-' * len(header))) for cmd in b.commands: name = cmd.name mean, stdev = result.distribution_for(cmd) line_counts = result.line_counts_for(cmd) show_fast_cmd, show_line_counts = '', '' if fastest_cmd.name == cmd.name: show_fast_cmd = '*' if fastest_sample['cmd'].name == cmd.name: name += '*' if len(line_counts) > 0: counts = map(str, line_counts) show_line_counts = ' (lines: %s)' % ', '.join(counts) fmt = '{name:{pad}} {mean:0.3f} +/- {stdev:0.3f}{lines}{fast_cmd}' print(fmt.format( name=name, pad=max_name_len + 2, fast_cmd=show_fast_cmd, mean=mean, stdev=stdev, lines=show_line_counts)) sys.stdout.flush() if raw_csv_wtr is not None: for sample in result.samples: cmd, duration = sample['cmd'], sample['duration'] env = ' '.join(['%s=%s' % (k, v) for k, v in cmd.kwargs.get('env', {}).items()]) raw_csv_wtr.writerow({ 'benchmark': b.name, 'warmup_iter': b.warmup_count, 'iter': b.count, 'name': sample['cmd'].name, 'command': ' '.join(cmd.cmd), 'duration': duration, 'lines': sample['line_count'] or '', 'env': env, }) raw_handle.flush() if __name__ == '__main__': main()