From 4f7f59b990bbdf8edfdd7e357031c097678251c8 Mon Sep 17 00:00:00 2001 From: Batuhan Taskaya Date: Tue, 14 Dec 2021 18:05:25 +0300 Subject: [PATCH] Add initial benchmarking infrastructure (#1232) * Add initial benchmarking infrastructure * Add CI file * Try to comment on commits * Implement file download benchmarks! * drop commit comments (they dont work) * Allow running local binary * Better action * More docs! * Better look? * even better look * add pretty=all, none benchmarks --- .github/workflows/benchmark.yml | 52 ++++++ CONTRIBUTING.md | 14 ++ Makefile | 2 +- extras/profiling/benchmarks.py | 203 ++++++++++++++++++++++ extras/profiling/run.py | 287 ++++++++++++++++++++++++++++++++ 5 files changed, 557 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/benchmark.yml create mode 100644 extras/profiling/benchmarks.py create mode 100644 extras/profiling/run.py diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml new file mode 100644 index 00000000..560835da --- /dev/null +++ b/.github/workflows/benchmark.yml @@ -0,0 +1,52 @@ +name: Benchmark + +on: + pull_request: + types: [ labeled ] + +permissions: + issues: write + pull-requests: write + +jobs: + test: + if: github.event.label.name == 'benchmark' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: "3.9" + + - id: benchmarks + name: Run Benchmarks + run: | + python -m pip install pyperf>=2.3.0 + python extras/profiling/run.py --fresh --complex --min-speed=6 --file output.txt + body=$(cat output.txt) + body="${body//'%'/'%25'}" + body="${body//$'\n'/'%0A'}" + body="${body//$'\r'/'%0D'}" + echo "::set-output name=body::$body" + + - name: Find Comment + uses: peter-evans/find-comment@v1 + id: fc + with: + issue-number: ${{ github.event.pull_request.number }} + comment-author: 'github-actions[bot]' + body-includes: '# Benchmarks' + + - name: Create or update comment + uses: peter-evans/create-or-update-comment@v1 + with: + comment-id: ${{ steps.fc.outputs.comment-id }} + issue-number: ${{ github.event.pull_request.number }} + body: | + # Benchmarks + ${{ steps.benchmarks.outputs.body }} + edit-mode: replace + + - uses: actions-ecosystem/action-remove-labels@v1 + with: + labels: benchmark diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 6b015ca6..2eeac05d 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -144,6 +144,20 @@ $ python -m pytest tests/test_uploads.py::TestMultipartFormDataFileUpload::test_ See [Makefile](https://github.com/httpie/httpie/blob/master/Makefile) for additional development utilities. +#### Running benchmarks + +If you are trying to work on speeding up HTTPie and want to verify your results, you +can run the benchmark suite. The suite will compare the last commit of your branch +with the master branch of your repository (or a fresh checkout of HTTPie master, through +`--fresh`) and report the results back. + +```bash +$ python extras/benchmarks/run.py +``` + +The benchmarks can also be run on the CI. Since it is a long process, it requires manual +oversight. Ping one of the maintainers to get a `benchmark` label on your branch. + #### Windows If you are on a Windows machine and not able to run `make`, diff --git a/Makefile b/Makefile index 2bcc2697..ed9ea9b9 100644 --- a/Makefile +++ b/Makefile @@ -130,7 +130,7 @@ pycodestyle: codestyle codestyle: @echo $(H1)Running flake8$(H1END) @[ -f $(VENV_BIN)/flake8 ] || $(VENV_PIP) install --upgrade --editable '.[dev]' - $(VENV_BIN)/flake8 httpie/ tests/ docs/packaging/brew/ *.py + $(VENV_BIN)/flake8 httpie/ tests/ extras/profiling/ docs/packaging/brew/ *.py @echo diff --git a/extras/profiling/benchmarks.py b/extras/profiling/benchmarks.py new file mode 100644 index 00000000..50a53a5a --- /dev/null +++ b/extras/profiling/benchmarks.py @@ -0,0 +1,203 @@ +""" +This file is the declaration of benchmarks for HTTPie. It +is also used to run them with the current environment. + +Each instance of BaseRunner class will be an individual +benchmark. And if run without any arguments, this file +will execute every benchmark instance and report the +timings. + +The benchmarks are run through 'pyperf', which allows to +do get very precise results. For micro-benchmarks like startup, +please run `pyperf system tune` to get even more acurrate results. + +Examples: + + # Run everything as usual, the default is that we do 3 warmup runs + # and 5 actual runs. + $ python extras/profiling/benchmarks.py + + # For retrieving results faster, pass --fast + $ python extras/profiling/benchmarks.py --fast + + # For verify everything works as expected, pass --debug-single-value. + # It will only run everything once, so the resuls are not realiable. But + # very useful when iterating on a benchmark + $ python extras/profiling/benchmarks.py --debug-single-value + + # If you want to run with a custom HTTPie command (for example with + # and HTTPie instance installed in another virtual environment), + # pass HTTPIE_COMMAND variable. + $ HTTPIE_COMMAND="/my/python /my/httpie" python extras/profiling/benchmarks.py +""" + +from __future__ import annotations + +import os +import shlex +import subprocess +import sys +import threading +from contextlib import ExitStack, contextmanager +from dataclasses import dataclass, field +from functools import cached_property, partial +from http.server import HTTPServer, SimpleHTTPRequestHandler +from tempfile import TemporaryDirectory +from typing import ClassVar, Final, List + +import pyperf + +# For download benchmarks, define a set of files. +# file: (block_size, count) => total_size = block_size * count +PREDEFINED_FILES: Final = {'3G': (3 * 1024 ** 2, 1024)} + + +class QuietSimpleHTTPServer(SimpleHTTPRequestHandler): + def log_message(self, *args, **kwargs): + pass + + +@contextmanager +def start_server(): + """Create a server to serve local files. It will create the + PREDEFINED_FILES through dd.""" + with TemporaryDirectory() as directory: + for file_name, (block_size, count) in PREDEFINED_FILES.items(): + subprocess.check_call( + [ + 'dd', + 'if=/dev/zero', + f'of={file_name}', + f'bs={block_size}', + f'count={count}', + ], + cwd=directory, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ) + + handler = partial(QuietSimpleHTTPServer, directory=directory) + server = HTTPServer(('localhost', 0), handler) + + thread = threading.Thread(target=server.serve_forever) + thread.start() + yield '{}:{}'.format(*server.socket.getsockname()) + server.shutdown() + thread.join(timeout=0.5) + + +@dataclass +class Context: + benchmarks: ClassVar[List[BaseRunner]] = [] + stack: ExitStack = field(default_factory=ExitStack) + runner: pyperf.Runner = field(default_factory=pyperf.Runner) + + def run(self) -> pyperf.BenchmarkSuite: + results = [benchmark.run(self) for benchmark in self.benchmarks] + return pyperf.BenchmarkSuite(results) + + @property + def cmd(self) -> List[str]: + if cmd := os.getenv('HTTPIE_COMMAND'): + return shlex.split(cmd) + + http = os.path.join(os.path.dirname(sys.executable), 'http') + assert os.path.exists(http) + return [sys.executable, http] + + @cached_property + def server(self) -> str: + return self.stack.enter_context(start_server()) + + def __enter__(self): + return self + + def __exit__(self, *exc_info): + self.stack.close() + + +@dataclass +class BaseRunner: + """ + An individual benchmark case. By default it has the category + (e.g like startup or download) and a name. + """ + + category: str + title: str + + def __post_init__(self): + Context.benchmarks.append(self) + + def run(self, context: Context) -> pyperf.Benchmark: + raise NotImplementedError + + @property + def name(self) -> str: + return f'{self.title} ({self.category})' + + +@dataclass +class CommandRunner(BaseRunner): + """ + Run a single command, and benchmark it. + """ + + args: List[str] + + def run(self, context: Context) -> pyperf.Benchmark: + return context.runner.bench_command(self.name, [*context.cmd, *self.args]) + + +@dataclass +class DownloadRunner(BaseRunner): + """ + Benchmark downloading a single file from the + remote server. + """ + + file_name: str + + def run(self, context: Context) -> pyperf.Benchmark: + return context.runner.bench_command( + self.name, + [ + *context.cmd, + '--download', + 'GET', + f'{context.server}/{self.file_name}', + ], + ) + + +CommandRunner('startup', '`http --version`', ['--version']) +CommandRunner('startup', '`http --offline pie.dev/get`', ['--offline', 'pie.dev/get']) +for pretty in ['all', 'none']: + CommandRunner( + 'startup', + f'`http --pretty={pretty} pie.dev/stream/1000`', + [ + '--print=HBhb', + '--stream', + f'--pretty={pretty}', + 'httpbin.org/stream/100' + ] + ) +DownloadRunner('download', '`http --download :/big_file.txt` (3GB)', '3G') + + +def main() -> None: + # PyPerf will bring it's own argument parser, so configure the script. + # The somewhat fast and also precise enough configuration is this. We run + # benchmarks 3 times to warmup (e.g especially for download benchmark, this + # is important). And then 5 actual runs where we record. + sys.argv.extend( + ['--worker', '--loops=1', '--warmup=3', '--values=5', '--processes=2'] + ) + + with Context() as context: + context.run() + + +if __name__ == '__main__': + main() diff --git a/extras/profiling/run.py b/extras/profiling/run.py new file mode 100644 index 00000000..86913673 --- /dev/null +++ b/extras/profiling/run.py @@ -0,0 +1,287 @@ +""" +Run the HTTPie benchmark suite with multiple environments. + +This script is configured in a way that, it will create +two (or more) isolated environments and compare the *last +commit* of this repository with it's master. + +> If you didn't commit yet, it won't be showing results. + +You can also pass --fresh, which would test the *last +commit* of this repository with a fresh copy of HTTPie +itself. This way even if you don't have an up-to-date +master branch, you can still compare it with the upstream's +master. + +You can also pass --complex to add 2 additional environments, +which would include additional dependencies like pyOpenSSL. + +Examples: + + # Run everything as usual, and compare last commit with master + $ python extras/benchmarks/run.py + + # Include complex environments + $ python extras/benchmarks/run.py --complex + + # Compare against a fresh copy + $ python extras/benchmarks/run.py --fresh + + # Compare against a custom branch of a custom repo + $ python extras/benchmarks/run.py --target-repo my_repo --target-branch my_branch + + # Debug changes made on this script (only run benchmarks once) + $ python extras/benchmarks/run.py --debug +""" + +import dataclasses +import shlex +import subprocess +import sys +import tempfile +import venv +from argparse import ArgumentParser, FileType +from contextlib import contextmanager +from dataclasses import dataclass +from pathlib import Path +from typing import (IO, Dict, Generator, Iterable, List, Optional, + Tuple) + +BENCHMARK_SCRIPT = Path(__file__).parent / 'benchmarks.py' +CURRENT_REPO = Path(__file__).parent.parent.parent + +GITHUB_URL = 'https://github.com/httpie/httpie.git' +TARGET_BRANCH = 'master' + +# Additional dependencies for --complex +ADDITIONAL_DEPS = ('pyOpenSSL',) + + +def call(*args, **kwargs): + kwargs.setdefault('stdout', subprocess.DEVNULL) + return subprocess.check_call(*args, **kwargs) + + +class Environment: + """ + Each environment defines how to create an isolated instance + where we could install HTTPie and run benchmarks without any + environmental factors. + """ + + @contextmanager + def on_repo(self) -> Generator[Tuple[Path, Dict[str, str]], None, None]: + """ + Return the path to the python interpreter and the + environment variables (e.g HTTPIE_COMMAND) to be + used on the benchmarks. + """ + raise NotImplementedError + + +@dataclass +class HTTPieEnvironment(Environment): + repo_url: str + branch: Optional[str] = None + dependencies: Iterable[str] = () + + @contextmanager + def on_repo(self) -> Generator[Path, None, None]: + with tempfile.TemporaryDirectory() as directory_path: + directory = Path(directory_path) + + # Clone the repo + repo_path = directory / 'httpie' + call( + ['git', 'clone', self.repo_url, repo_path], + stderr=subprocess.DEVNULL, + ) + + if self.branch is not None: + call( + ['git', 'checkout', self.branch], + cwd=repo_path, + stderr=subprocess.DEVNULL, + ) + + # Prepare the environment + venv_path = directory / '.venv' + venv.create(venv_path, with_pip=True) + + # Install basic dependencies + python = venv_path / 'bin' / 'python' + call( + [ + python, + '-m', + 'pip', + 'install', + 'wheel', + 'pyperf==2.3.0', + *self.dependencies, + ] + ) + + # Create a wheel distribution of HTTPie + call([python, 'setup.py', 'bdist_wheel'], cwd=repo_path) + + # Install httpie + distribution_path = next((repo_path / 'dist').iterdir()) + call( + [python, '-m', 'pip', 'install', distribution_path], + cwd=repo_path, + ) + + http = venv_path / 'bin' / 'http' + yield python, {'HTTPIE_COMMAND': shlex.join([str(python), str(http)])} + + +@dataclass +class LocalCommandEnvironment(Environment): + local_command: str + + @contextmanager + def on_repo(self) -> Generator[Path, None, None]: + yield sys.executable, {'HTTPIE_COMMAND': self.local_command} + + +def dump_results( + results: List[str], + file: IO[str], + min_speed: Optional[str] = None +) -> None: + for result in results: + lines = result.strip().splitlines() + if min_speed is not None and "hidden" in lines[-1]: + lines[-1] = ( + 'Some benchmarks were hidden from this list ' + 'because their timings did not change in a ' + 'significant way (change was within the error ' + 'margin ±{margin}%).' + ).format(margin=min_speed) + result = '\n'.join(lines) + + print(result, file=file) + print("\n---\n", file=file) + + +def compare(*args, directory: Path, min_speed: Optional[str] = None): + compare_args = ['pyperf', 'compare_to', '--table', '--table-format=md', *args] + if min_speed: + compare_args.extend(['--min-speed', min_speed]) + return subprocess.check_output( + compare_args, + cwd=directory, + text=True, + ) + + +def run( + configs: List[Dict[str, Environment]], + file: IO[str], + debug: bool = False, + min_speed: Optional[str] = None, +) -> None: + result_directory = Path(tempfile.mkdtemp()) + results = [] + + current = 1 + total = sum(1 for config in configs for _ in config.items()) + + def iterate(env_name, status): + print( + f'Iteration: {env_name} ({current}/{total}) ({status})' + ' ' * 10, + end='\r', + flush=True, + ) + + for config in configs: + for env_name, env in config.items(): + iterate(env_name, 'setting up') + with env.on_repo() as (python, env_vars): + iterate(env_name, 'running benchmarks') + args = [python, BENCHMARK_SCRIPT, '-o', env_name] + if debug: + args.append('--debug-single-value') + call( + args, + cwd=result_directory, + env=env_vars, + ) + current += 1 + + results.append(compare( + *config.keys(), + directory=result_directory, + min_speed=min_speed + )) + + dump_results(results, file=file, min_speed=min_speed) + print('Results are available at:', result_directory) + + +def main() -> None: + parser = ArgumentParser() + parser.add_argument('--local-repo', default=CURRENT_REPO) + parser.add_argument('--local-branch', default=None) + parser.add_argument('--target-repo', default=CURRENT_REPO) + parser.add_argument('--target-branch', default=TARGET_BRANCH) + parser.add_argument( + '--fresh', + action='store_const', + const=GITHUB_URL, + dest='target_repo', + help='Clone the target repo from upstream GitHub URL', + ) + parser.add_argument( + '--complex', + action='store_true', + help='Add a second run, with a complex python environment.', + ) + parser.add_argument( + '--local-bin', + help='Run the suite with the given local binary in addition to' + ' existing runners. (E.g --local-bin $(command -v xh))', + ) + parser.add_argument( + '--file', + type=FileType('w'), + default=sys.stdout, + help='File to print the actual results', + ) + parser.add_argument( + '--min-speed', + help='Minimum of speed in percent to consider that a ' + 'benchmark is significant' + ) + parser.add_argument( + '--debug', + action='store_true', + ) + + options = parser.parse_args() + + configs = [] + + base_config = { + options.target_branch: HTTPieEnvironment(options.target_repo, options.target_branch), + 'this_branch': HTTPieEnvironment(options.local_repo, options.local_branch), + } + configs.append(base_config) + + if options.complex: + complex_config = { + env_name + + '-complex': dataclasses.replace(env, dependencies=ADDITIONAL_DEPS) + for env_name, env in base_config.items() + } + configs.append(complex_config) + + if options.local_bin: + base_config['binary'] = LocalCommandEnvironment(options.local_bin) + + run(configs, file=options.file, debug=options.debug, min_speed=options.min_speed) + + +if __name__ == '__main__': + main()