From aa3c1702c9b98d8ab8cb34eee50dd9f19a495bf3 Mon Sep 17 00:00:00 2001 From: Piotr Balcer Date: Mon, 4 Nov 2024 11:06:45 +0100 Subject: [PATCH] improve benchmark scripts and add a nightly job This patch: - Adds a benchmark suite abstraction and moves all existing benchmarks into it. This makes individual benchmark types self-contained and allows us to skip over benchmarks that don't have their dependencies met. - Makes sycl and ur optional, and they are no longer positional arguments. - Creates a benchmark history that stores benchmark runs. This then enables us to do comparisions not just with the latest result but also against e.g., a historical average. - adds a nightly job to store baseline results. - adds HTML output --- .github/workflows/benchmarks-nightly.yml | 38 ++ ...ks_compute.yml => benchmarks-reusable.yml} | 73 ++-- .github/workflows/benchmarks.yml | 68 ++++ .github/workflows/docs.yml | 18 +- README.md | 1 + scripts/benchmarks/README.md | 7 +- scripts/benchmarks/benches/SobelFilter.py | 39 -- scripts/benchmarks/benches/base.py | 15 +- scripts/benchmarks/benches/bitcracker.py | 34 -- scripts/benchmarks/benches/compute.py | 48 ++- scripts/benchmarks/benches/cudaSift.py | 35 -- scripts/benchmarks/benches/easywave.py | 56 --- scripts/benchmarks/benches/hashtable.py | 34 -- scripts/benchmarks/benches/options.py | 15 +- scripts/benchmarks/benches/quicksilver.py | 45 --- scripts/benchmarks/benches/result.py | 13 + scripts/benchmarks/benches/syclbench.py | 56 ++- scripts/benchmarks/benches/test.py | 68 ++++ scripts/benchmarks/benches/velocity.py | 207 +++++++++- scripts/benchmarks/history.py | 135 +++++++ scripts/benchmarks/main.py | 177 ++++----- scripts/benchmarks/output_html.py | 358 ++++++++++++++++++ .../{output.py => output_markdown.py} | 9 +- scripts/benchmarks/utils/utils.py | 23 +- third_party/benchmark_requirements.txt | 43 +++ 25 files changed, 1182 insertions(+), 433 deletions(-) create mode 100644 .github/workflows/benchmarks-nightly.yml rename .github/workflows/{benchmarks_compute.yml => benchmarks-reusable.yml} (79%) create mode 100644 .github/workflows/benchmarks.yml delete mode 100644 scripts/benchmarks/benches/SobelFilter.py delete mode 100644 scripts/benchmarks/benches/bitcracker.py delete mode 100644 scripts/benchmarks/benches/cudaSift.py delete mode 100644 scripts/benchmarks/benches/easywave.py delete mode 100644 scripts/benchmarks/benches/hashtable.py delete mode 100644 scripts/benchmarks/benches/quicksilver.py create mode 100644 scripts/benchmarks/benches/test.py create mode 100644 scripts/benchmarks/history.py create mode 100644 scripts/benchmarks/output_html.py rename scripts/benchmarks/{output.py => output_markdown.py} (96%) create mode 100644 third_party/benchmark_requirements.txt diff --git a/.github/workflows/benchmarks-nightly.yml b/.github/workflows/benchmarks-nightly.yml new file mode 100644 index 0000000000..3da0d09c7a --- /dev/null +++ b/.github/workflows/benchmarks-nightly.yml @@ -0,0 +1,38 @@ +name: Compute Benchmarks Nightly + +on: + schedule: + - cron: '0 0 * * *' # Runs at midnight UTC every day + +permissions: + contents: read + pull-requests: write + +jobs: + nightly: + name: Compute Benchmarks Nightly level-zero + uses: ./.github/workflows/benchmarks-reusable.yml + with: + str_name: 'level_zero' + unit: 'gpu' + pr_no: 0 + bench_script_params: '--save baseline' + sycl_config_params: '' + sycl_repo: 'intel/llvm' + sycl_commit: '' + + nightly2: + # we need to wait until previous job is done so that the html report + # contains both runs + needs: nightly + name: Compute Benchmarks Nightly level-zero v2 + uses: ./.github/workflows/benchmarks-reusable.yml + with: + str_name: 'level_zero_v2' + unit: 'gpu' + pr_no: 0 + bench_script_params: '--save baseline-v2' + sycl_config_params: '' + sycl_repo: 'intel/llvm' + sycl_commit: '' + upload_report: true diff --git a/.github/workflows/benchmarks_compute.yml b/.github/workflows/benchmarks-reusable.yml similarity index 79% rename from .github/workflows/benchmarks_compute.yml rename to .github/workflows/benchmarks-reusable.yml index ee74a52ad0..dafa754cbd 100644 --- a/.github/workflows/benchmarks_compute.yml +++ b/.github/workflows/benchmarks-reusable.yml @@ -1,50 +1,39 @@ -name: Compute Benchmarks +name: Benchmarks Reusable on: - # Can be triggered via manual "dispatch" (from workflow view in GitHub Actions tab) - workflow_dispatch: - # acceptable input for adapter-specific runs + workflow_call: inputs: str_name: - description: Formatted adapter name - type: choice required: true - default: 'level_zero' - options: - - level_zero - - level_zero_v2 + type: string unit: - description: Test unit (cpu/gpu) - type: choice required: true - default: 'gpu' - options: - - cpu - - gpu + type: string pr_no: - description: PR number (if 0, it'll run on the main) - type: number required: true - bench_script_params: - description: Parameters passed to script executing benchmark + # even though this is a number, this is a workaround for issues with + # reusable workflow calls that result in "Unexpected value '0'" error. type: string + bench_script_params: required: false + type: string default: '' sycl_config_params: - description: Extra params for SYCL configuration - type: string required: false + type: string default: '' sycl_repo: - description: 'Compiler repo' - type: string required: true + type: string default: 'intel/llvm' sycl_commit: - description: 'Compiler commit' - type: string required: false + type: string default: '' + upload_report: + required: false + type: boolean + default: false permissions: contents: read @@ -56,19 +45,17 @@ jobs: strategy: matrix: adapter: [ - {str_name: "${{inputs.str_name}}", - sycl_config: "${{inputs.sycl_config_params}}", - unit: "${{inputs.unit}}" + {str_name: "${{ inputs.str_name }}", + sycl_config: "${{ inputs.sycl_config_params }}", + unit: "${{ inputs.unit }}" } ] build_type: [Release] compiler: [{c: clang, cxx: clang++}] - runs-on: "${{inputs.str_name}}_PERF" + runs-on: "${{ inputs.str_name }}_PERF" steps: - # Workspace on self-hosted runners is not cleaned automatically. - # We have to delete the files created outside of using actions. - name: Cleanup self-hosted workspace if: always() run: | @@ -99,7 +86,8 @@ jobs: path: ur-repo - name: Install pip packages - run: pip install -r ${{github.workspace}}/ur-repo/third_party/requirements.txt + run: | + pip install --force-reinstall -r ${{github.workspace}}/ur-repo/third_party/benchmark_requirements.txt # We need to fetch special ref for proper PR's merge commit. Note, this ref may be absent if the PR is already merged. - name: Fetch PR's merge commit @@ -169,13 +157,15 @@ jobs: run: cmake --install ${{github.workspace}}/ur_build - name: Run benchmarks + working-directory: ${{ github.workspace }}/ur-repo/ id: benchmarks run: > - numactl -N 0 ${{ github.workspace }}/ur-repo/scripts/benchmarks/main.py - ~/bench_workdir - ${{github.workspace}}/sycl_build - ${{github.workspace}}/ur_install - ${{ matrix.adapter.str_name }} + ${{ github.workspace }}/ur-repo/scripts/benchmarks/main.py + ~/bench_workdir + --sycl ${{ github.workspace }}/sycl_build + --ur ${{ github.workspace }}/ur_install + --adapter ${{ matrix.adapter.str_name }} + ${{ inputs.upload_report && '--output-html' || '' }} ${{ inputs.bench_script_params }} - name: Add comment to PR @@ -204,3 +194,10 @@ jobs: repo: context.repo.repo, body: body }) + + - name: Upload HTML report + if: ${{ always() && inputs.upload_report }} + uses: actions/cache/save@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 + with: + path: ${{ github.workspace }}/ur-repo/benchmark_results.html + key: benchmark-results-${{ matrix.adapter.str_name }}-${{ github.run_id }} diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml new file mode 100644 index 0000000000..af62d40e85 --- /dev/null +++ b/.github/workflows/benchmarks.yml @@ -0,0 +1,68 @@ +name: Compute Benchmarks + +on: + workflow_dispatch: + inputs: + str_name: + description: Formatted adapter name + type: choice + required: true + default: 'level_zero' + options: + - level_zero + - level_zero_v2 + unit: + description: Test unit (cpu/gpu) + type: choice + required: true + default: 'gpu' + options: + - cpu + - gpu + pr_no: + description: PR number (if 0, it'll run on the main) + type: number + required: true + bench_script_params: + description: Parameters passed to script executing benchmark + type: string + required: false + default: '' + sycl_config_params: + description: Extra params for SYCL configuration + type: string + required: false + default: '' + sycl_repo: + description: 'Compiler repo' + type: string + required: true + default: 'intel/llvm' + sycl_commit: + description: 'Compiler commit' + type: string + required: false + default: '' + upload_report: + description: 'Upload HTML report' + type: boolean + required: false + default: false + +permissions: + contents: read + pull-requests: write + +jobs: + manual: + name: Compute Benchmarks + uses: ./.github/workflows/benchmarks-reusable.yml + with: + str_name: ${{ inputs.str_name }} + unit: ${{ inputs.unit }} + pr_no: ${{ inputs.pr_no }} + bench_script_params: ${{ inputs.bench_script_params }} + sycl_config_params: ${{ inputs.sycl_config_params }} + sycl_repo: ${{ inputs.sycl_repo }} + sycl_commit: ${{ inputs.sycl_commit }} + upload_report: ${{ inputs.upload_report }} diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 710aa659c8..bdd4cf1c52 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -45,7 +45,23 @@ jobs: - name: Build Documentation working-directory: ${{github.workspace}}/scripts - run: python3 run.py --core + run: | + python3 run.py --core + mkdir -p ${{ github.workspace }}/ur-repo/ + mkdir -p ${{github.workspace}}/docs/html + + - name: Download benchmark HTML + id: download-bench-html + uses: actions/cache/restore@6849a6489940f00c2f30c0fb92c6274307ccb58a # v4.1.2 + with: + path: ${{ github.workspace }}/ur-repo/benchmark_results.html + key: benchmark-results- + + - name: Move benchmark HTML + # exact or partial cache hit + if: steps.download-bench-html.outputs.cache-hit != '' + run: | + mv ${{ github.workspace }}/ur-repo/benchmark_results.html ${{ github.workspace }}/docs/html/ - name: Upload artifact uses: actions/upload-pages-artifact@0252fc4ba7626f0298f0cf00902a25c6afc77fa8 # v3.0.0 diff --git a/README.md b/README.md index 262a861b9d..dc70f43876 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ [![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/oneapi-src/unified-runtime/badge)](https://securityscorecards.dev/viewer/?uri=github.com/oneapi-src/unified-runtime) [![Trivy](https://github.com/oneapi-src/unified-runtime/actions/workflows/trivy.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/trivy.yml) [![Deploy documentation to Pages](https://github.com/oneapi-src/unified-runtime/actions/workflows/docs.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/docs.yml) +[![Compute Benchmarks Nightly](https://github.com/oneapi-src/unified-runtime/actions/workflows/benchmarks-nightly.yml/badge.svg)](https://github.com/oneapi-src/unified-runtime/actions/workflows/benchmarks-nightly.yml) diff --git a/scripts/benchmarks/README.md b/scripts/benchmarks/README.md index 64a7a3eeb9..bd6de60a0a 100644 --- a/scripts/benchmarks/README.md +++ b/scripts/benchmarks/README.md @@ -37,9 +37,10 @@ By default, the benchmark results are not stored. To store them, use the option To compare a benchmark run with a previously stored result, use the option `--compare `. You can compare with more than one result. -If no `--compare` option is specified, the benchmark run is compared against a previously stored `baseline`. This baseline is **not** automatically updated. To update it, use the `--save baseline` option. -The recommended way of updating the baseline is running the benchmarking -job on main after a merge of relevant changes. +If no `--compare` option is specified, the benchmark run is compared against a previously stored `baseline`. + +Baseline, as well as baseline-v2 (for the level-zero adapter v2) is updated automatically during a nightly job. The results +are stored [here](https://oneapi-src.github.io/unified-runtime/benchmark_results.html). ## Requirements diff --git a/scripts/benchmarks/benches/SobelFilter.py b/scripts/benchmarks/benches/SobelFilter.py deleted file mode 100644 index b9e7619e47..0000000000 --- a/scripts/benchmarks/benches/SobelFilter.py +++ /dev/null @@ -1,39 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from .base import Benchmark -from .result import Result -from .velocity import VelocityBase, VelocityBench -from utils.utils import run -import re - -class SobelFilter(VelocityBase): - def __init__(self, vb: VelocityBench): - super().__init__("sobel_filter", "sobel_filter", vb) - - def download_deps(self): - self.download_untar("sobel_filter", "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=", "sobel_filter_data.tgz") - return - - def name(self): - return "Velocity-Bench Sobel Filter" - - def unit(self): - return "ms" - - def bin_args(self) -> list[str]: - return ["-i", f"{self.data_path}/sobel_filter_data/silverfalls_32Kx32K.png", - "-n", "5"] - - def extra_env_vars(self) -> dict: - return {"OPENCV_IO_MAX_IMAGE_PIXELS" : "1677721600"} - - def parse_output(self, stdout: str) -> float: - match = re.search(r'sobelfilter - total time for whole calculation: (\d+\.\d+) s', stdout) - if match: - return round(float(match.group(1)) * 1000, 3) - else: - raise ValueError("{self.__class__.__name__}: Failed to parse benchmark output.") - diff --git a/scripts/benchmarks/benches/base.py b/scripts/benchmarks/benches/base.py index 36f252cb42..3871938bfd 100644 --- a/scripts/benchmarks/benches/base.py +++ b/scripts/benchmarks/benches/base.py @@ -20,7 +20,7 @@ def __init__(self, directory): def get_adapter_full_path(): for libs_dir_name in ['lib', 'lib64']: adapter_path = os.path.join( - options.ur_dir, libs_dir_name, f"libur_adapter_{options.ur_adapter_name}.so") + options.ur, libs_dir_name, f"libur_adapter_{options.ur_adapter}.so") if os.path.isfile(adapter_path): return adapter_path assert False, \ @@ -28,8 +28,10 @@ def get_adapter_full_path(): def run_bench(self, command, env_vars): env_vars_with_forced_adapter = env_vars.copy() - env_vars_with_forced_adapter.update( - {'UR_ADAPTERS_FORCE_LOAD': Benchmark.get_adapter_full_path()}) + if options.ur is not None: + env_vars_with_forced_adapter.update( + {'UR_ADAPTERS_FORCE_LOAD': Benchmark.get_adapter_full_path()}) + return run( command=command, env_vars=env_vars_with_forced_adapter, @@ -76,3 +78,10 @@ def run(self, env_vars) -> list[Result]: def teardown(self): raise NotImplementedError() + +class Suite: + def benchmarks(self) -> list[Benchmark]: + raise NotImplementedError() + + def setup(self): + return diff --git a/scripts/benchmarks/benches/bitcracker.py b/scripts/benchmarks/benches/bitcracker.py deleted file mode 100644 index bb198433fa..0000000000 --- a/scripts/benchmarks/benches/bitcracker.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from .base import Benchmark -from .result import Result -from .velocity import VelocityBase, VelocityBench -from utils.utils import run -import os -import re - -class Bitcracker(VelocityBase): - def __init__(self, vb: VelocityBench): - super().__init__("bitcracker", "bitcracker", vb) - self.data_path = os.path.join(vb.repo_path, "bitcracker", "hash_pass") - - def name(self): - return "Velocity-Bench Bitcracker" - - def unit(self): - return "s" - - def bin_args(self) -> list[str]: - return ["-f", f"{self.data_path}/img_win8_user_hash.txt", - "-d", f"{self.data_path}/user_passwords_60000.txt", - "-b", "60000"] - - def parse_output(self, stdout: str) -> float: - match = re.search(r'bitcracker - total time for whole calculation: (\d+\.\d+) s', stdout) - if match: - return float(match.group(1)) - else: - raise ValueError("{self.__class__.__name__}: Failed to parse benchmark output.") diff --git a/scripts/benchmarks/benches/compute.py b/scripts/benchmarks/benches/compute.py index 9c84739f75..e08109f77e 100644 --- a/scripts/benchmarks/benches/compute.py +++ b/scripts/benchmarks/benches/compute.py @@ -7,17 +7,16 @@ import csv import io from utils.utils import run, git_clone, create_build_path -from .base import Benchmark +from .base import Benchmark, Suite from .result import Result from .options import options -class ComputeBench: +class ComputeBench(Suite): def __init__(self, directory): self.directory = directory - self.built = False def setup(self): - if self.built: + if options.sycl is None: return repo_path = git_clone(self.directory, "compute-benchmarks-repo", "https://github.com/intel/compute-benchmarks.git", "aa6a3b2108bb86202b654ad28129156fa746d41d") @@ -31,10 +30,14 @@ def setup(self): f"-DBUILD_SYCL=ON", f"-DSYCL_COMPILER_ROOT={options.sycl}", f"-DALLOW_WARNINGS=ON", - f"-DBUILD_UR=ON", - f"-Dunified-runtime_DIR={options.ur_dir}/lib/cmake/unified-runtime", ] + if options.ur is not None: + configure_command += [ + f"-DBUILD_UR=ON", + f"-Dunified-runtime_DIR={options.ur}/lib/cmake/unified-runtime", + ] + print(f"{self.__class__.__name__}: Run {configure_command}") run(configure_command, add_sycl=True) print(f"{self.__class__.__name__}: Run cmake --build {build_path} -j") @@ -42,6 +45,36 @@ def setup(self): self.built = True + def benchmarks(self) -> list[Benchmark]: + if options.sycl is None: + return [] + + benches = [ + SubmitKernelSYCL(self, 0), + SubmitKernelSYCL(self, 1), + QueueInOrderMemcpy(self, 0, 'Device', 'Device', 1024), + QueueInOrderMemcpy(self, 0, 'Host', 'Device', 1024), + QueueMemcpy(self, 'Device', 'Device', 1024), + StreamMemory(self, 'Triad', 10 * 1024, 'Device'), + ExecImmediateCopyQueue(self, 0, 1, 'Device', 'Device', 1024), + ExecImmediateCopyQueue(self, 1, 1, 'Device', 'Host', 1024), + VectorSum(self), + MemcpyExecute(self, 400, 8, 1024, 100), + MemcpyExecute(self, 400, 8, 102400, 10), + MemcpyExecute(self, 500, 8, 102400, 10), + MemcpyExecute(self, 400, 1, 1024, 1000), + MemcpyExecute(self, 10, 16, 1024, 1000), + MemcpyExecute(self, 10, 16, 102400, 100), + ] + + if options.ur is not None: + benches += [ + SubmitKernelUR(self, 0), + SubmitKernelUR(self, 1), + ] + + return benches + class ComputeBenchmark(Benchmark): def __init__(self, bench, name, test): self.bench = bench @@ -60,7 +93,6 @@ def unit(self): def setup(self): self.benchmark_bin = os.path.join(self.bench.directory, 'compute-benchmarks-build', 'bin', self.bench_name) - self.bench.setup() def run(self, env_vars) -> list[Result]: command = [ @@ -75,7 +107,7 @@ def run(self, env_vars) -> list[Result]: result = self.run_bench(command, env_vars) (label, mean) = self.parse_output(result) - return [ Result(label=self.name(), value=mean, command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better()) ] + return [ Result(label=self.name(), value=mean, command=command, env=env_vars, stdout=result) ] def parse_output(self, output): csv_file = io.StringIO(output) diff --git a/scripts/benchmarks/benches/cudaSift.py b/scripts/benchmarks/benches/cudaSift.py deleted file mode 100644 index 482d258052..0000000000 --- a/scripts/benchmarks/benches/cudaSift.py +++ /dev/null @@ -1,35 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from .base import Benchmark -from .result import Result -from .velocity import VelocityBase, VelocityBench -from utils.utils import run -import os -import re -import shutil - -class CudaSift(VelocityBase): - def __init__(self, vb: VelocityBench): - super().__init__("cudaSift", "cudaSift", vb) - - def download_deps(self): - images = os.path.join(self.vb.repo_path, self.bench_name, 'inputData') - dest = os.path.join(self.directory, 'inputData') - if not os.path.exists(dest): - shutil.copytree(images, dest) - - def name(self): - return "Velocity-Bench CudaSift" - - def unit(self): - return "ms" - - def parse_output(self, stdout: str) -> float: - match = re.search(r'Avg workload time = (\d+\.\d+) ms', stdout) - if match: - return float(match.group(1)) - else: - raise ValueError("Failed to parse benchmark output.") diff --git a/scripts/benchmarks/benches/easywave.py b/scripts/benchmarks/benches/easywave.py deleted file mode 100644 index 2f89482329..0000000000 --- a/scripts/benchmarks/benches/easywave.py +++ /dev/null @@ -1,56 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from .base import Benchmark -from .result import Result -from .velocity import VelocityBase, VelocityBench -from utils.utils import run -from .options import options -import re -import os - -class Easywave(VelocityBase): - def __init__(self, vb: VelocityBench): - super().__init__("easywave", "easyWave_sycl", vb) - - def download_deps(self): - self.download_untar("easywave", "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz", "examples.tar.gz") - - def name(self): - return "Velocity-Bench Easywave" - - def unit(self): - return "ms" - - def bin_args(self) -> list[str]: - return ["-grid", f"{self.data_path}/examples/e2Asean.grd", - "-source", f"{self.data_path}/examples/BengkuluSept2007.flt", - "-time", "120"] - - # easywave doesn't output a useful single perf value. Instead, we parse the - # output logs looking for the very last line containing the elapsed time of the - # application. - def get_last_elapsed_time(self, log_file_path) -> float: - elapsed_time_pattern = re.compile(r'Model time = (\d{2}:\d{2}:\d{2}),\s+elapsed: (\d+) msec') - last_elapsed_time = None - - try: - with open(log_file_path, 'r') as file: - for line in file: - match = elapsed_time_pattern.search(line) - if match: - last_elapsed_time = int(match.group(2)) - - if last_elapsed_time is not None: - return last_elapsed_time - else: - raise ValueError("No elapsed time found in the log file.") - except FileNotFoundError: - raise FileNotFoundError(f"The file {log_file_path} does not exist.") - except Exception as e: - raise e - - def parse_output(self, stdout: str) -> float: - return self.get_last_elapsed_time(os.path.join(options.benchmark_cwd, "easywave.log")) diff --git a/scripts/benchmarks/benches/hashtable.py b/scripts/benchmarks/benches/hashtable.py deleted file mode 100644 index c5ed397dbb..0000000000 --- a/scripts/benchmarks/benches/hashtable.py +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from .base import Benchmark -from .result import Result -from .velocity import VelocityBase, VelocityBench -from utils.utils import run -import os -import re - -class Hashtable(VelocityBase): - def __init__(self, vb: VelocityBench): - super().__init__("hashtable", "hashtable_sycl", vb) - - def name(self): - return "Velocity-Bench Hashtable" - - def unit(self): - return "M keys/sec" - - def bin_args(self) -> list[str]: - return ["--no-verify"] - - def lower_is_better(self): - return False - - def parse_output(self, stdout: str) -> float: - match = re.search(r'(\d+\.\d+) million keys/second', stdout) - if match: - return float(match.group(1)) - else: - raise ValueError("{self.__class__.__name__}: Failed to parse keys per second from benchmark output.") diff --git a/scripts/benchmarks/benches/options.py b/scripts/benchmarks/benches/options.py index c035ce6800..5997cdedb8 100644 --- a/scripts/benchmarks/benches/options.py +++ b/scripts/benchmarks/benches/options.py @@ -1,13 +1,26 @@ from dataclasses import dataclass +from enum import Enum + +class Compare(Enum): + LATEST = 'latest' + AVERAGE = 'average' + MEDIAN = 'median' @dataclass class Options: - sycl: str = "" + sycl: str = None + ur: str = None + ur_adapter: str = None rebuild: bool = True benchmark_cwd: str = "INVALID" timeout: float = 600 iterations: int = 5 verbose: bool = False + compare: Compare = Compare.LATEST + compare_max: int = 10 # average/median over how many results + output_html: bool = False + output_markdown: bool = True + dry_run: bool = False options = Options() diff --git a/scripts/benchmarks/benches/quicksilver.py b/scripts/benchmarks/benches/quicksilver.py deleted file mode 100644 index b7600d11be..0000000000 --- a/scripts/benchmarks/benches/quicksilver.py +++ /dev/null @@ -1,45 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. -# See LICENSE.TXT -# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception - -from .base import Benchmark -from .result import Result -from .velocity import VelocityBase, VelocityBench -from utils.utils import run -import os -import re - -class QuickSilver(VelocityBase): - def __init__(self, vb: VelocityBench): - super().__init__("QuickSilver", "qs", vb) - self.data_path = os.path.join(vb.repo_path, "QuickSilver", "Examples", "AllScattering") - - def run(self, env_vars) -> list[Result]: - # TODO: fix the crash in QuickSilver when UR_L0_USE_IMMEDIATE_COMMANDLISTS=0 - if 'UR_L0_USE_IMMEDIATE_COMMANDLISTS' in env_vars and env_vars['UR_L0_USE_IMMEDIATE_COMMANDLISTS'] == '0': - return None - - return super().run(env_vars) - - def name(self): - return "Velocity-Bench QuickSilver" - - def unit(self): - return "MMS/CTT" - - def lower_is_better(self): - return False - - def bin_args(self) -> list[str]: - return ["-i", f"{self.data_path}/scatteringOnly.inp"] - - def extra_env_vars(self) -> dict: - return {"QS_DEVICE" : "GPU"} - - def parse_output(self, stdout: str) -> float: - match = re.search(r'Figure Of Merit\s+(\d+\.\d+)', stdout) - if match: - return float(match.group(1)) - else: - raise ValueError("{self.__class__.__name__}: Failed to parse benchmark output.") diff --git a/scripts/benchmarks/benches/result.py b/scripts/benchmarks/benches/result.py index 6fc7e16095..07ee70148a 100644 --- a/scripts/benchmarks/benches/result.py +++ b/scripts/benchmarks/benches/result.py @@ -4,7 +4,9 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception from dataclasses import dataclass +from typing import Optional from dataclasses_json import dataclass_json +from datetime import datetime @dataclass_json @dataclass @@ -15,6 +17,17 @@ class Result: env: str stdout: str passed: bool = True + # values should not be set by the benchmark unit: str = "" name: str = "" lower_is_better: bool = True + git_hash: str = '' + date: Optional[datetime] = None + +@dataclass_json +@dataclass +class BenchmarkRun: + results: list[Result] + name: str = 'This PR' + git_hash: str = '' + date: datetime = None diff --git a/scripts/benchmarks/benches/syclbench.py b/scripts/benchmarks/benches/syclbench.py index b9d6e50623..fbfd009935 100644 --- a/scripts/benchmarks/benches/syclbench.py +++ b/scripts/benchmarks/benches/syclbench.py @@ -7,19 +7,20 @@ import csv import io from utils.utils import run, git_clone, create_build_path -from .base import Benchmark +from .base import Benchmark, Suite from .result import Result from .options import options -class SyclBench: +class SyclBench(Suite): def __init__(self, directory): + if options.sycl is None: + return + self.directory = directory - self.built = False - self.setup() return def setup(self): - if self.built: + if options.sycl is None: return build_path = create_build_path(self.directory, 'sycl-bench-build') @@ -40,6 +41,50 @@ def setup(self): self.built = True + def benchmarks(self) -> list[Benchmark]: + if options.sycl is None: + return [] + + return [ + # Blocked_transform(self), # run time < 1ms + DagTaskI(self), + DagTaskS(self), + HostDevBandwidth(self), + LocalMem(self), + Pattern_L2(self), + Reduction(self), + ScalarProd(self), + SegmentReduction(self), + UsmAccLatency(self), + UsmAllocLatency(self), + UsmInstrMix(self), + UsmPinnedOverhead(self), + VecAdd(self), + + # *** sycl-bench single benchmarks + # TwoDConvolution(self), # run time < 1ms + Two_mm(self), + Three_mm(self), + # Arith(self), # run time < 1ms + Atax(self), + # Atomic_reduction(self), # run time < 1ms + Bicg(self), + Correlation(self), + Covariance(self), + Gemm(self), + Gesumv(self), + Gramschmidt(self), + KMeans(self), + LinRegCoeff(self), + # LinRegError(self), # run time < 1ms + MatmulChain(self), + MolDyn(self), + Mvt(self), + Sf(self), + Syr2k(self), + Syrk(self), + ] + class SyclBenchmark(Benchmark): def __init__(self, bench, name, test): self.bench = bench @@ -58,7 +103,6 @@ def unit(self): return "ms" def setup(self): - self.bench.setup() self.benchmark_bin = os.path.join(self.directory, 'sycl-bench-build', self.bench_name) def run(self, env_vars) -> list[Result]: diff --git a/scripts/benchmarks/benches/test.py b/scripts/benchmarks/benches/test.py new file mode 100644 index 0000000000..88bc29a649 --- /dev/null +++ b/scripts/benchmarks/benches/test.py @@ -0,0 +1,68 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import random +from utils.utils import git_clone +from .base import Benchmark, Suite +from .result import Result +from utils.utils import run, create_build_path +from .options import options +import os + +class TestSuite(Suite): + def __init__(self): + return + + def setup(self): + return + + def benchmarks(self) -> list[Benchmark]: + bench_configs = [ + ("Memory Bandwidth", 2000, 200), + ("Latency", 100, 20), + ("Throughput", 1500, 150), + ("FLOPS", 3000, 300), + ("Cache Miss Rate", 250, 25), + ] + + result = [] + for base_name, base_value, base_diff in bench_configs: + for variant in range(6): + value_multiplier = 1.0 + (variant * 0.2) + name = f"{base_name} {variant+1}" + value = base_value * value_multiplier + diff = base_diff * value_multiplier + + result.append(TestBench(name, value, diff)) + + return result + +class TestBench(Benchmark): + def __init__(self, name, value, diff): + self.bname = name + self.value = value + self.diff = diff + super().__init__("") + + def name(self): + return self.bname + + def unit(self): + return "ms" + + def lower_is_better(self): + return True + + def setup(self): + return + + def run(self, env_vars) -> list[Result]: + random_value = self.value + random.uniform(-1 * (self.diff), self.diff) + return [ + Result(label=self.name(), value=random_value, command="", env={"A": "B"}, stdout="no output") + ] + + def teardown(self): + return diff --git a/scripts/benchmarks/benches/velocity.py b/scripts/benchmarks/benches/velocity.py index 3c903bf11b..38efa42f56 100644 --- a/scripts/benchmarks/benches/velocity.py +++ b/scripts/benchmarks/benches/velocity.py @@ -3,18 +3,41 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +import re +import shutil from utils.utils import git_clone -from .base import Benchmark +from .base import Benchmark, Suite from .result import Result from utils.utils import run, create_build_path from .options import options import os -class VelocityBench: +class VelocityBench(Suite): def __init__(self, directory): + if options.sycl is None: + return + self.directory = directory + + def setup(self): + if options.sycl is None: + return + self.repo_path = git_clone(self.directory, "velocity-bench-repo", "https://github.com/oneapi-src/Velocity-Bench/", "b22215c16f789100449c34bf4eaa3fb178983d69") + def benchmarks(self) -> list[Benchmark]: + if options.sycl is None: + return [] + + return [ + Hashtable(self), + Bitcracker(self), + CudaSift(self), + Easywave(self), + QuickSilver(self), + SobelFilter(self) + ] + class VelocityBase(Benchmark): def __init__(self, name: str, bin_name: str, vb: VelocityBench): super().__init__(vb.directory) @@ -60,7 +83,185 @@ def run(self, env_vars) -> list[Result]: result = self.run_bench(command, env_vars) - return [ Result(label=self.name(), value=self.parse_output(result), command=command, env=env_vars, stdout=result, lower_is_better=self.lower_is_better()) ] + return [ Result(label=self.name(), value=self.parse_output(result), command=command, env=env_vars, stdout=result) ] def teardown(self): return + +class Hashtable(VelocityBase): + def __init__(self, vb: VelocityBench): + super().__init__("hashtable", "hashtable_sycl", vb) + + def name(self): + return "Velocity-Bench Hashtable" + + def unit(self): + return "M keys/sec" + + def bin_args(self) -> list[str]: + return ["--no-verify"] + + def lower_is_better(self): + return False + + def parse_output(self, stdout: str) -> float: + match = re.search(r'(\d+\.\d+) million keys/second', stdout) + if match: + return float(match.group(1)) + else: + raise ValueError("{self.__class__.__name__}: Failed to parse keys per second from benchmark output.") + + +class Bitcracker(VelocityBase): + def __init__(self, vb: VelocityBench): + super().__init__("bitcracker", "bitcracker", vb) + self.data_path = os.path.join(vb.repo_path, "bitcracker", "hash_pass") + + def name(self): + return "Velocity-Bench Bitcracker" + + def unit(self): + return "s" + + def bin_args(self) -> list[str]: + return ["-f", f"{self.data_path}/img_win8_user_hash.txt", + "-d", f"{self.data_path}/user_passwords_60000.txt", + "-b", "60000"] + + def parse_output(self, stdout: str) -> float: + match = re.search(r'bitcracker - total time for whole calculation: (\d+\.\d+) s', stdout) + if match: + return float(match.group(1)) + else: + raise ValueError("{self.__class__.__name__}: Failed to parse benchmark output.") + +class SobelFilter(VelocityBase): + def __init__(self, vb: VelocityBench): + super().__init__("sobel_filter", "sobel_filter", vb) + + def download_deps(self): + self.download_untar("sobel_filter", "https://github.com/oneapi-src/Velocity-Bench/raw/main/sobel_filter/res/sobel_filter_data.tgz?download=", "sobel_filter_data.tgz") + return + + def name(self): + return "Velocity-Bench Sobel Filter" + + def unit(self): + return "ms" + + def bin_args(self) -> list[str]: + return ["-i", f"{self.data_path}/sobel_filter_data/silverfalls_32Kx32K.png", + "-n", "5"] + + def extra_env_vars(self) -> dict: + return {"OPENCV_IO_MAX_IMAGE_PIXELS" : "1677721600"} + + def parse_output(self, stdout: str) -> float: + match = re.search(r'sobelfilter - total time for whole calculation: (\d+\.\d+) s', stdout) + if match: + return round(float(match.group(1)) * 1000, 3) + else: + raise ValueError("{self.__class__.__name__}: Failed to parse benchmark output.") + + +class QuickSilver(VelocityBase): + def __init__(self, vb: VelocityBench): + super().__init__("QuickSilver", "qs", vb) + self.data_path = os.path.join(vb.repo_path, "QuickSilver", "Examples", "AllScattering") + + def run(self, env_vars) -> list[Result]: + # TODO: fix the crash in QuickSilver when UR_L0_USE_IMMEDIATE_COMMANDLISTS=0 + if 'UR_L0_USE_IMMEDIATE_COMMANDLISTS' in env_vars and env_vars['UR_L0_USE_IMMEDIATE_COMMANDLISTS'] == '0': + return None + + return super().run(env_vars) + + def name(self): + return "Velocity-Bench QuickSilver" + + def unit(self): + return "MMS/CTT" + + def lower_is_better(self): + return False + + def bin_args(self) -> list[str]: + return ["-i", f"{self.data_path}/scatteringOnly.inp"] + + def extra_env_vars(self) -> dict: + return {"QS_DEVICE" : "GPU"} + + def parse_output(self, stdout: str) -> float: + match = re.search(r'Figure Of Merit\s+(\d+\.\d+)', stdout) + if match: + return float(match.group(1)) + else: + raise ValueError("{self.__class__.__name__}: Failed to parse benchmark output.") + +class Easywave(VelocityBase): + def __init__(self, vb: VelocityBench): + super().__init__("easywave", "easyWave_sycl", vb) + + def download_deps(self): + self.download_untar("easywave", "https://git.gfz-potsdam.de/id2/geoperil/easyWave/-/raw/master/data/examples.tar.gz", "examples.tar.gz") + + def name(self): + return "Velocity-Bench Easywave" + + def unit(self): + return "ms" + + def bin_args(self) -> list[str]: + return ["-grid", f"{self.data_path}/examples/e2Asean.grd", + "-source", f"{self.data_path}/examples/BengkuluSept2007.flt", + "-time", "120"] + + # easywave doesn't output a useful single perf value. Instead, we parse the + # output logs looking for the very last line containing the elapsed time of the + # application. + def get_last_elapsed_time(self, log_file_path) -> float: + elapsed_time_pattern = re.compile(r'Model time = (\d{2}:\d{2}:\d{2}),\s+elapsed: (\d+) msec') + last_elapsed_time = None + + try: + with open(log_file_path, 'r') as file: + for line in file: + match = elapsed_time_pattern.search(line) + if match: + last_elapsed_time = int(match.group(2)) + + if last_elapsed_time is not None: + return last_elapsed_time + else: + raise ValueError("No elapsed time found in the log file.") + except FileNotFoundError: + raise FileNotFoundError(f"The file {log_file_path} does not exist.") + except Exception as e: + raise e + + def parse_output(self, stdout: str) -> float: + return self.get_last_elapsed_time(os.path.join(options.benchmark_cwd, "easywave.log")) + + +class CudaSift(VelocityBase): + def __init__(self, vb: VelocityBench): + super().__init__("cudaSift", "cudaSift", vb) + + def download_deps(self): + images = os.path.join(self.vb.repo_path, self.bench_name, 'inputData') + dest = os.path.join(self.directory, 'inputData') + if not os.path.exists(dest): + shutil.copytree(images, dest) + + def name(self): + return "Velocity-Bench CudaSift" + + def unit(self): + return "ms" + + def parse_output(self, stdout: str) -> float: + match = re.search(r'Avg workload time = (\d+\.\d+) ms', stdout) + if match: + return float(match.group(1)) + else: + raise ValueError("Failed to parse benchmark output.") diff --git a/scripts/benchmarks/history.py b/scripts/benchmarks/history.py new file mode 100644 index 0000000000..5b83ef9479 --- /dev/null +++ b/scripts/benchmarks/history.py @@ -0,0 +1,135 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import os +import json +from pathlib import Path +from benches.result import Result, BenchmarkRun +from benches.options import Compare, options +from datetime import datetime, timezone +from utils.utils import run; + +class BenchmarkHistory: + benchmark_run_index_max = 0 + runs = [] + + def __init__(self, dir): + self.dir = dir + + def load_result(self, file_path: Path) -> BenchmarkRun: + if file_path.exists(): + with file_path.open('r') as file: + data = json.load(file) + return BenchmarkRun.from_json(data) + else: + return None + + def load(self, n: int): + results_dir = Path(self.dir) / 'results' + if not results_dir.exists() or not results_dir.is_dir(): + return [] + + # Get all JSON files in the results directory + benchmark_files = list(results_dir.glob('*.json')) + + # Extract index numbers and sort files by index number + def extract_index(file_path: Path) -> int: + try: + return int(file_path.stem.split('_')[0]) + except (IndexError, ValueError): + return -1 + + benchmark_files = [file for file in benchmark_files if extract_index(file) != -1] + benchmark_files.sort(key=extract_index) + + # Load the first n benchmark files + benchmark_runs = [] + for file_path in benchmark_files[n::-1]: + benchmark_run = self.load_result(file_path) + if benchmark_run: + benchmark_runs.append(benchmark_run) + + if benchmark_files: + self.benchmark_run_index_max = extract_index(benchmark_files[-1]) + + self.runs = benchmark_runs + + def create_run(self, name: str, results: list[Result]) -> BenchmarkRun: + try: + result = run("git rev-parse --short HEAD") + git_hash = result.stdout.decode().strip() + except: + git_hash = 'unknown' + + return BenchmarkRun(name = name, git_hash = git_hash, date = datetime.now(tz=timezone.utc), results = results) + + def save(self, save_name, results: list[Result], to_file = True): + benchmark_data = self.create_run(save_name, results) + self.runs.append(benchmark_data) + + if not to_file: + return + + serialized = benchmark_data.to_json() + results_dir = Path(os.path.join(self.dir, 'results')) + os.makedirs(results_dir, exist_ok=True) + + self.benchmark_run_index_max += 1 + file_path = Path(os.path.join(results_dir, f"{self.benchmark_run_index_max}_{save_name}.json")) + with file_path.open('w') as file: + json.dump(serialized, file, indent=4) + print(f"Benchmark results saved to {file_path}") + + def find_first(self, name: str) -> BenchmarkRun: + for r in self.runs: + if r.name == name: + return r + return None + + def compute_average(self, data: list[BenchmarkRun]): + first_run = data[0] + average_results = [] + + for i in range(len(first_run.results)): + all_values = [run.results[i].value for run in data] + + # Calculate the average value for the current result index + average_value = sum(all_values) / len(all_values) + + average_result = first_run.results[i] + average_result.value = average_value + + average_results.append(average_result) + + average_benchmark_run = BenchmarkRun( + results = average_results, + name = first_run.name, + git_hash = "average", + date = first_run.date # should this be different? + ) + + return average_benchmark_run + + def get_compare(self, name: str) -> BenchmarkRun: + if options.compare == Compare.LATEST: + return self.find_first(name) + + data = [] + for r in self.runs: + if r.name == name: + data.append(r) + if len(data) == options.compare_max: + break + + if len(data) == 0: + return None + + if options.compare == Compare.MEDIAN: + return data[len(data) // 2] + + if options.compare == Compare.AVERAGE: + return self.compute_average(data) + + raise Exception("invalid compare type") diff --git a/scripts/benchmarks/main.py b/scripts/benchmarks/main.py index 85d9b6b608..0756554e77 100755 --- a/scripts/benchmarks/main.py +++ b/scripts/benchmarks/main.py @@ -5,108 +5,53 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -from utils.utils import prepare_workdir, load_benchmark_results, save_benchmark_results; from benches.compute import * -from benches.hashtable import Hashtable -from benches.bitcracker import Bitcracker -from benches.cudaSift import CudaSift -from benches.easywave import Easywave -from benches.quicksilver import QuickSilver -from benches.SobelFilter import SobelFilter from benches.velocity import VelocityBench from benches.syclbench import * -from benches.options import options -from output import generate_markdown +from benches.test import TestSuite +from benches.options import Compare, options +from output_markdown import generate_markdown +from output_html import generate_html +from history import BenchmarkHistory +from utils.utils import prepare_workdir; + import argparse import re -import subprocess # Update this if you are changing the layout of the results files -INTERNAL_WORKDIR_VERSION = '1.7' +INTERNAL_WORKDIR_VERSION = '2.0' def main(directory, additional_env_vars, save_name, compare_names, filter): prepare_workdir(directory, INTERNAL_WORKDIR_VERSION) - cb = ComputeBench(directory) - vb = VelocityBench(directory) - sb = SyclBench(directory) - - benchmarks = [ - # *** Compute benchmarks - SubmitKernelSYCL(cb, 0), - SubmitKernelSYCL(cb, 1), - SubmitKernelUR(cb, 0), - SubmitKernelUR(cb, 1), - QueueInOrderMemcpy(cb, 0, 'Device', 'Device', 1024), - QueueInOrderMemcpy(cb, 0, 'Host', 'Device', 1024), - QueueMemcpy(cb, 'Device', 'Device', 1024), - StreamMemory(cb, 'Triad', 10 * 1024, 'Device'), - ExecImmediateCopyQueue(cb, 0, 1, 'Device', 'Device', 1024), - ExecImmediateCopyQueue(cb, 1, 1, 'Device', 'Host', 1024), - VectorSum(cb), - MemcpyExecute(cb, 400, 8, 1024, 100), - MemcpyExecute(cb, 400, 8, 102400, 10), - MemcpyExecute(cb, 500, 8, 102400, 10), - MemcpyExecute(cb, 400, 1, 1024, 1000), - MemcpyExecute(cb, 10, 16, 1024, 1000), - MemcpyExecute(cb, 10, 16, 102400, 100), - - # *** Velocity benchmarks - Hashtable(vb), - Bitcracker(vb), - CudaSift(vb), - Easywave(vb), - QuickSilver(vb), - SobelFilter(vb), - - # *** sycl-bench multi benchmarks - # Blocked_transform(sb), # run time < 1ms - DagTaskI(sb), - DagTaskS(sb), - HostDevBandwidth(sb), - LocalMem(sb), - Pattern_L2(sb), - Reduction(sb), - ScalarProd(sb), - SegmentReduction(sb), - UsmAccLatency(sb), - UsmAllocLatency(sb), - UsmInstrMix(sb), - UsmPinnedOverhead(sb), - VecAdd(sb), - - # *** sycl-bench single benchmarks - # TwoDConvolution(sb), # run time < 1ms - Two_mm(sb), - Three_mm(sb), - # Arith(sb), # run time < 1ms - Atax(sb), - # Atomic_reduction(sb), # run time < 1ms - Bicg(sb), - Correlation(sb), - Covariance(sb), - Gemm(sb), - Gesumv(sb), - Gramschmidt(sb), - KMeans(sb), - LinRegCoeff(sb), - # LinRegError(sb), # run time < 1ms - MatmulChain(sb), - MolDyn(sb), - Mvt(sb), - Sf(sb), - Syr2k(sb), - Syrk(sb), - ] + suites = [ + ComputeBench(directory), + VelocityBench(directory), + SyclBench(directory) + #TestSuite() + ] if not options.dry_run else [] + + benchmarks = [] + + for s in suites: + print(f"Setting up {type(s).__name__}") + s.setup() + print(f"{type(s).__name__} setup complete.") + + for s in suites: + benchmarks += s.benchmarks() if filter: benchmarks = [benchmark for benchmark in benchmarks if filter.search(benchmark.name())] + for b in benchmarks: + print(b.name()) + for benchmark in benchmarks: try: - print(f"setting up {benchmark.name()}... ", end='', flush=True) + print(f"Setting up {benchmark.name()}... ") benchmark.setup() - print("complete.") + print(f"{benchmark.name()} setup complete.") except Exception as e: if options.exit_on_failure: @@ -131,7 +76,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): iteration_results.append(bench_result) else: print(f"did not finish (OK for sycl-bench).") - break; + break if len(iteration_results) == 0: continue @@ -145,6 +90,7 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): median_result.unit = benchmark.unit() median_result.name = label + median_result.lower_is_better = benchmark.lower_is_better() results.append(median_result) except Exception as e: @@ -158,23 +104,44 @@ def main(directory, additional_env_vars, save_name, compare_names, filter): benchmark.teardown() print("complete.") - chart_data = {"This PR" : results} + this_name = "This PR" + + chart_data = {this_name : results} + + history = BenchmarkHistory(directory) + # limit how many files we load. + # should this be configurable? + history.load(1000) for name in compare_names: print(f"compare name: {name}") - compare_result = load_benchmark_results(directory, name) + compare_result = history.get_compare(name) if compare_result: - chart_data[name] = compare_result + chart_data[name] = compare_result.results + + if options.output_markdown: + markdown_content = generate_markdown(this_name, chart_data) + + with open('benchmark_results.md', 'w') as file: + file.write(markdown_content) + + saved_name = save_name if save_name is not None else this_name - if save_name: - save_benchmark_results(directory, save_name, results) + # It's important we don't save the current results into history before + # we calculate historical averages or get latest results for compare. + # Otherwise we might be comparing the results to themselves. + if not options.dry_run: + history.save(saved_name, results, save_name is not None) + print(f"Markdown with benchmark results has been written to {os.getcwd()}/benchmark_results.md") + compare_names.append(saved_name) - markdown_content = generate_markdown(chart_data) + if options.output_html: + html_content = generate_html(history.runs, 'oneapi-src/unified-runtime', compare_names) - with open('benchmark_results.md', 'w') as file: - file.write(markdown_content) + with open('benchmark_results.html', 'w') as file: + file.write(html_content) - print(f"Markdown with benchmark results has been written to {os.getcwd()}/benchmark_results.md") + print(f"HTML with benchmark results has been written to {os.getcwd()}/benchmark_results.html") def validate_and_parse_env_args(env_args): env_vars = {} @@ -188,9 +155,9 @@ def validate_and_parse_env_args(env_args): if __name__ == "__main__": parser = argparse.ArgumentParser(description='Unified Runtime Benchmark Runner') parser.add_argument('benchmark_directory', type=str, help='Working directory to setup benchmarks.') - parser.add_argument('sycl', type=str, help='Root directory of the SYCL compiler.') - parser.add_argument('ur_dir', type=str, help='UR install prefix path') - parser.add_argument('ur_adapter_name', type=str, help='Options to build the Unified Runtime as part of the benchmark') + parser.add_argument('--sycl', type=str, help='Root directory of the SYCL compiler.', default=None) + parser.add_argument('--ur', type=str, help='UR install prefix path', default=None) + parser.add_argument('--adapter', type=str, help='Options to build the Unified Runtime as part of the benchmark', default="level_zero") parser.add_argument("--no-rebuild", help='Rebuild the benchmarks from scratch.', action="store_true") parser.add_argument("--env", type=str, help='Use env variable for a benchmark run.', action="append", default=[]) parser.add_argument("--save", type=str, help='Save the results for comparison under a specified name.') @@ -200,7 +167,12 @@ def validate_and_parse_env_args(env_args): parser.add_argument("--filter", type=str, help='Regex pattern to filter benchmarks by name.', default=None) parser.add_argument("--epsilon", type=float, help='Threshold to consider change of performance significant', default=0.005) parser.add_argument("--verbose", help='Print output of all the commands.', action="store_true") - parser.add_argument("--exit_on_failure", help='Exit on first failure.', action="store_true") + parser.add_argument("--exit-on-failure", help='Exit on first failure.', action="store_true") + parser.add_argument("--compare-type", type=str, choices=[e.value for e in Compare], help='Compare results against previously saved data.', default=Compare.LATEST.value) + parser.add_argument("--compare-max", type=int, help='How many results to read for comparisions', default=10) + parser.add_argument("--output-html", help='Create HTML output', action="store_true", default=False) + parser.add_argument("--output-markdown", help='Create Markdown output', action="store_true", default=True) + parser.add_argument("--dry-run", help='Do not run any actual benchmarks', action="store_true", default=False) args = parser.parse_args() additional_env_vars = validate_and_parse_env_args(args.env) @@ -211,9 +183,14 @@ def validate_and_parse_env_args(env_args): options.iterations = args.iterations options.timeout = args.timeout options.epsilon = args.epsilon - options.ur_dir = args.ur_dir - options.ur_adapter_name = args.ur_adapter_name + options.ur = args.ur + options.ur_adapter = args.adapter options.exit_on_failure = args.exit_on_failure + options.compare = Compare(args.compare_type) + options.compare_max = args.compare_max + options.output_html = args.output_html + options.output_markdown = args.output_markdown + options.dry_run = args.dry_run benchmark_filter = re.compile(args.filter) if args.filter else None diff --git a/scripts/benchmarks/output_html.py b/scripts/benchmarks/output_html.py new file mode 100644 index 0000000000..8249bc75c9 --- /dev/null +++ b/scripts/benchmarks/output_html.py @@ -0,0 +1,358 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import matplotlib.pyplot as plt +import mpld3 +from collections import defaultdict +from dataclasses import dataclass +import matplotlib.dates as mdates +import numpy as np +from benches.result import BenchmarkRun, Result + +@dataclass +class BenchmarkMetadata: + unit: str + lower_is_better: bool + +@dataclass +class BenchmarkSeries: + label: str + metadata: BenchmarkMetadata + runs: list[BenchmarkRun] + +@dataclass +class LatestResults: + benchmark_label: str + run_values: dict[str, float] + + @classmethod + def from_dict(cls, label: str, values: dict[str, float]) -> 'LatestResults': + return cls(benchmark_label=label, run_values=values) + +def get_latest_results(benchmarks: list[BenchmarkSeries]) -> dict[str, LatestResults]: + latest_results: dict[str, LatestResults] = {} + for benchmark in benchmarks: + run_values = { + run.name: max(run.results, key=lambda x: x.date).value + for run in benchmark.runs + } + latest_results[benchmark.label] = LatestResults.from_dict(benchmark.label, run_values) + return latest_results + +def prepare_normalized_data(latest_results: dict[str, LatestResults], + benchmarks: list[BenchmarkSeries], + group_benchmarks: list[str], + non_baseline_runs: list[str], + baseline_name: str) -> list[list[float]]: + normalized_data = [] + benchmark_map = {b.label: b for b in benchmarks} + + for run_name in non_baseline_runs: + run_data: list[float] = [] + for benchmark_label in group_benchmarks: + benchmark_data = latest_results[benchmark_label].run_values + if run_name not in benchmark_data or baseline_name not in benchmark_data: + run_data.append(None) + continue + + baseline_value = benchmark_data[baseline_name] + current_value = benchmark_data[run_name] + + normalized_value = ((baseline_value / current_value) if benchmark_map[benchmark_label].metadata.lower_is_better + else (current_value / baseline_value)) * 100 + run_data.append(normalized_value) + normalized_data.append(run_data) + return normalized_data + +def format_benchmark_label(label: str) -> list[str]: + words = label.split() + if len(words) <= 2: + return [label] + + mid = len(words) // 2 + return [' '.join(words[:mid]), ' '.join(words[mid:])] + +def create_bar_plot(ax: plt.Axes, + normalized_data: list[list[float]], + group_benchmarks: list[str], + non_baseline_runs: list[str], + latest_results: dict[str, LatestResults], + benchmarks: list[BenchmarkSeries], + baseline_name: str) -> float: + x = np.arange(len(group_benchmarks)) + width = 0.8 / len(non_baseline_runs) + max_height = 0 + benchmark_map = {b.label: b for b in benchmarks} + + for i, (run_name, run_data) in enumerate(zip(non_baseline_runs, normalized_data)): + offset = width * i - width * (len(non_baseline_runs) - 1) / 2 + positions = x + offset + valid_data = [v if v is not None else 0 for v in run_data] + rects = ax.bar(positions, valid_data, width, label=run_name) + + for rect, value, benchmark_label in zip(rects, run_data, group_benchmarks): + if value is not None: + height = rect.get_height() + if height > max_height: + max_height = height + + ax.text(rect.get_x() + rect.get_width()/2., height + 2, + f'{value:.1f}%', + ha='center', va='bottom') + + benchmark_data = latest_results[benchmark_label].run_values + baseline_value = benchmark_data[baseline_name] + current_value = benchmark_data[run_name] + unit = benchmark_map[benchmark_label].metadata.unit + + tooltip_labels = [ + f"Run: {run_name}\n" + f"Benchmark: {benchmark_label}\n" + f"Value: {current_value:.2f} {unit}\n" + f"Baseline ({baseline_name}): {baseline_value:.2f} {unit}\n" + f"Normalized: {value:.1f}%" + ] + tooltip = mpld3.plugins.LineHTMLTooltip(rect, tooltip_labels, css='.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}') + mpld3.plugins.connect(ax.figure, tooltip) + + return max_height + +def add_chart_elements(ax: plt.Axes, + group_benchmarks: list[str], + group_name: str, + max_height: float) -> None: + top_padding = max_height * 0.2 + ax.set_ylim(0, max_height + top_padding) + ax.set_ylabel('Performance relative to baseline (%)') + ax.set_title(f'Performance Comparison (Normalized to Baseline) - {group_name} Group') + ax.set_xticks([]) + + for idx, label in enumerate(group_benchmarks): + split_labels = format_benchmark_label(label) + for i, sublabel in enumerate(split_labels): + y_pos = max_height + (top_padding * 0.5) + 2 - (i * top_padding * 0.15) + ax.text(idx, y_pos, sublabel, + ha='center', + style='italic', + color='#666666') + + ax.grid(True, axis='y', alpha=0.2) + ax.legend(bbox_to_anchor=(1, 1), loc='upper left') + +def create_normalized_bar_chart(benchmarks: list[BenchmarkSeries], baseline_name: str) -> list[str]: + latest_results = get_latest_results(benchmarks) + + run_names = sorted(list(set( + name for result in latest_results.values() + for name in result.run_values.keys() + ))) + + if baseline_name not in run_names: + return [] + + benchmark_labels = [b.label for b in benchmarks] + + benchmark_groups = defaultdict(list) + for label in benchmark_labels: + group_name = label.split()[0] + benchmark_groups[group_name].append(label) + + html_charts = [] + + for group_name, group_benchmarks in benchmark_groups.items(): + plt.close('all') + non_baseline_runs = [n for n in run_names if n != baseline_name] + + if len(non_baseline_runs) == 0: + continue + + normalized_data = prepare_normalized_data( + latest_results, benchmarks, group_benchmarks, + non_baseline_runs, baseline_name + ) + + fig, ax = plt.subplots(figsize=(10, 6)) + max_height = create_bar_plot( + ax, normalized_data, group_benchmarks, non_baseline_runs, + latest_results, benchmarks, baseline_name + ) + add_chart_elements(ax, group_benchmarks, group_name, max_height) + + plt.tight_layout() + html_charts.append(mpld3.fig_to_html(fig)) + plt.close(fig) + + return html_charts + +def create_time_series_chart(benchmarks: list[BenchmarkSeries], github_repo: str) -> str: + plt.close('all') + + num_benchmarks = len(benchmarks) + if num_benchmarks == 0: + return + + fig, axes = plt.subplots(num_benchmarks, 1, figsize=(10, max(4 * num_benchmarks, 30))) + + if num_benchmarks == 1: + axes = [axes] + + for idx, benchmark in enumerate(benchmarks): + ax = axes[idx] + + for run in benchmark.runs: + sorted_points = sorted(run.results, key=lambda x: x.date) + dates = [point.date for point in sorted_points] + values = [point.value for point in sorted_points] + + ax.plot_date(dates, values, '-', label=run.name, alpha=0.5) + scatter = ax.scatter(dates, values, picker=True) + + tooltip_labels = [ + f"Date: {point.date.strftime('%Y-%m-%d %H:%M:%S')}\n" + f"Value: {point.value:.2f}\n" + f"Git Hash: {point.git_hash}" + for point in sorted_points + ] + + targets = [f"https://github.com/{github_repo}/commit/{point.git_hash}" + for point in sorted_points] + + tooltip = mpld3.plugins.PointHTMLTooltip(scatter, tooltip_labels, + css='.mpld3-tooltip{background:white;padding:8px;border:1px solid #ddd;border-radius:4px;font-family:monospace;white-space:pre;}', + targets=targets) + mpld3.plugins.connect(fig, tooltip) + + ax.set_title(benchmark.label, pad=20) + performance_indicator = "lower is better" if benchmark.metadata.lower_is_better else "higher is better" + ax.text(0.5, 1.05, f"({performance_indicator})", + ha='center', + transform=ax.transAxes, + style='italic', + fontsize=7, + color='#666666') + + ax.set_xlabel('') + unit = benchmark.metadata.unit + ax.set_ylabel(f"Value ({unit})" if unit else "Value") + ax.grid(True, alpha=0.2) + ax.legend(bbox_to_anchor=(1, 1), loc='upper left') + ax.xaxis.set_major_formatter(mdates.ConciseDateFormatter('%Y-%m-%d %H:%M:%S')) + ax.xaxis.set_major_locator(mdates.AutoDateLocator()) + + plt.tight_layout() + html = mpld3.fig_to_html(fig) + + plt.close(fig) + return html + +def process_benchmark_data(benchmark_runs: list[BenchmarkRun], compare_names: list[str]) -> list[BenchmarkSeries]: + benchmark_metadata: dict[str, BenchmarkMetadata] = {} + run_map: dict[str, dict[str, list[Result]]] = defaultdict(lambda: defaultdict(list)) + + for run in benchmark_runs: + if run.name not in compare_names: + continue + + for result in run.results: + if result.label not in benchmark_metadata: + benchmark_metadata[result.label] = BenchmarkMetadata( + unit=result.unit, + lower_is_better=result.lower_is_better + ) + + result.date = run.date + result.git_hash = run.git_hash + run_map[result.label][run.name].append(result) + + benchmark_series = [] + for label, metadata in benchmark_metadata.items(): + runs = [ + BenchmarkRun(name=run_name, results=results) + for run_name, results in run_map[label].items() + ] + benchmark_series.append(BenchmarkSeries( + label=label, + metadata=metadata, + runs=runs + )) + + return benchmark_series + +def generate_html(benchmark_runs: list[BenchmarkRun], github_repo: str, compare_names: list[str]) -> str: + baseline_name = compare_names[0] + benchmarks = process_benchmark_data(benchmark_runs, compare_names) + + comparison_html_charts = create_normalized_bar_chart(benchmarks, baseline_name) + timeseries_html = create_time_series_chart(benchmarks, github_repo) + comparison_charts_html = '\n'.join(f'
{chart}
' for chart in comparison_html_charts) + + html_template = f""" + + + + + + Benchmark Results + + + +
+

Benchmark Results

+

Latest Results Comparison

+
+ {comparison_charts_html} +
+

Historical Results

+
+ {timeseries_html} +
+
+ + + """ + + return html_template diff --git a/scripts/benchmarks/output.py b/scripts/benchmarks/output_markdown.py similarity index 96% rename from scripts/benchmarks/output.py rename to scripts/benchmarks/output_markdown.py index eec8957fe7..177869f8f0 100644 --- a/scripts/benchmarks/output.py +++ b/scripts/benchmarks/output_markdown.py @@ -4,7 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import collections, re -from benches.base import Result +from benches.result import Result from benches.options import options import math @@ -126,7 +126,7 @@ def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]): if oln.diff != None: oln.row += f" | {(oln.diff - 1)*100:.2f}%" delta = oln.diff - 1 - oln.bars = round(10*(oln.diff - 1)/max_diff) + oln.bars = round(10*(oln.diff - 1)/max_diff) if max_diff != 0.0 else 0 if oln.bars == 0 or abs(delta) < options.epsilon: oln.row += " | . |" elif oln.bars > 0: @@ -155,7 +155,6 @@ def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]): if options.verbose: print(oln.row) summary_table += oln.row + "\n" - grouped_objects = collections.defaultdict(list) for oln in output_detailed_list: @@ -211,7 +210,7 @@ def generate_summary_table_and_chart(chart_data: dict[str, list[Result]]): return summary_line, summary_table -def generate_markdown(chart_data: dict[str, list[Result]]): +def generate_markdown(name: str, chart_data: dict[str, list[Result]]): (summary_line, summary_table) = generate_summary_table_and_chart(chart_data) return f""" @@ -220,5 +219,5 @@ def generate_markdown(chart_data: dict[str, list[Result]]): (result is better)\n {summary_table} # Details -{generate_markdown_details(chart_data["This PR"])} +{generate_markdown_details(chart_data[name])} """ diff --git a/scripts/benchmarks/utils/utils.py b/scripts/benchmarks/utils/utils.py index 586837fc6f..0cd10b9513 100644 --- a/scripts/benchmarks/utils/utils.py +++ b/scripts/benchmarks/utils/utils.py @@ -4,12 +4,10 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception import os -import json import shutil import subprocess # nosec B404 -from pathlib import Path -from benches.result import Result from benches.options import options +from pathlib import Path def run(command, env_vars={}, cwd=None, add_sycl=False): try: @@ -51,25 +49,6 @@ def git_clone(dir, name, repo, commit): raise Exception(f"The directory {repo_path} exists but is not a git repository.") return repo_path -def save_benchmark_results(dir, save_name, benchmark_data: list[Result]): - serialized = [res.to_json() for res in benchmark_data] - results_dir = Path(os.path.join(dir, 'results')) - os.makedirs(results_dir, exist_ok=True) - - file_path = Path(os.path.join(results_dir, f"{save_name}.json")) - with file_path.open('w') as file: - json.dump(serialized, file, indent=4) - print(f"Benchmark results saved to {file_path}") - -def load_benchmark_results(dir, compare_name) -> list[Result]: - file_path = Path(os.path.join(dir, 'results', f"{compare_name}.json")) - if file_path.exists(): - with file_path.open('r') as file: - data = json.load(file) - return [Result.from_json(item) for item in data] - else: - return None - def prepare_bench_cwd(dir): # we need 2 deep to workaround a problem with a fixed relative path in cudaSift options.benchmark_cwd = os.path.join(dir, 'bcwd', 'bcwd') diff --git a/third_party/benchmark_requirements.txt b/third_party/benchmark_requirements.txt new file mode 100644 index 0000000000..c01a2215c5 --- /dev/null +++ b/third_party/benchmark_requirements.txt @@ -0,0 +1,43 @@ +six==1.16.0 +matplotlib==3.9.2 +mpld3==0.5.10 +alabaster==0.7.12 +Babel==2.14.0 +bandit==1.6.2 +beautifulsoup4==4.11.1 +breathe==4.33.1 +bs4==0.0.1 +certifi==2024.07.04 +chardet==3.0.4 +clang-format==15.0.7 +colorama==0.4.1 +docutils==0.15.2 +exhale==0.3.0 +idna==3.7 +imagesize==1.1.0 +Jinja2==3.1.4 +lxml==4.9.3 +Mako==1.3.0 +MarkupSafe==2.1.5 +packaging==24.1 +Pygments==2.17.2 +pyparsing==2.4.5 +pytest>=7.0 +pytz==2019.3 +PyYAML==6.0.1 +requests==2.32.2 +rst2pdf==0.102 +snowballstemmer==2.0.0 +soupsieve==1.9.5 +Sphinx==4.5.0 +sphinx-book-theme==0.3.3 +sphinxcontrib-applehelp==1.0.2 +sphinxcontrib-devhelp==1.0.2 +sphinxcontrib-htmlhelp==2.0.0 +sphinxcontrib-jsmath==1.0.1 +sphinxcontrib-qthelp==1.0.3 +sphinxcontrib-serializinghtml==1.1.5 +sphinxcontrib-websupport==1.2.4 +sphinx-rtd-theme==1.0.0 +urllib3==2.2.2 +dataclasses-json==0.6.7