# Copyright Kani Contributors # SPDX-License-Identifier: Apache-2.0 OR MIT # # Benchcomp regression testing suite. This suite uses Python's stdlib unittest # module, but nevertheless actually runs the binary rather than running unit # tests. import pathlib import subprocess import tempfile import textwrap import unittest import uuid import yaml class Benchcomp: """Invocation of benchcomp binary with optional subcommand and flags""" def __init__(self, config): self.proc, self.stdout, self.stderr = None, None, None with tempfile.NamedTemporaryFile( mode="w", delete=False, suffix=".yaml") as tmp: yaml.dump(config, tmp, default_flow_style=False) self.config_file = tmp.name self.bc = str(pathlib.Path(__file__).parent.parent / "bin" / "benchcomp") wd = tempfile.mkdtemp() self.working_directory = pathlib.Path(wd) def __call__(self, subcommand=None, default_flags=None, flags=None): subcommand = subcommand or [] default_flags = default_flags or [ "--out-prefix", "/tmp/benchcomp/test"] config_flags = ["--config", str(self.config_file)] flags = flags or [] cmd = [self.bc, *config_flags, *subcommand, *default_flags, *flags] self.proc = subprocess.Popen( cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, cwd=self.working_directory) self.stdout, self.stderr = self.proc.communicate() class RegressionTests(unittest.TestCase): def setUp(self): self.kani_dir = pathlib.Path(__file__).parent.parent.parent.parent def test_kani_perf_fail(self): cmd = ( "rm -rf build target &&" "mkdir -p build/tests/perf/Unwind-Attribute/expected &&" "kani tests/kani/Unwind-Attribute/fixme_lib.rs > " "build/tests/perf/Unwind-Attribute/expected/expected.out" ) self._run_kani_perf_test(cmd, False) def test_kani_perf_success(self): cmd = ( "rm -rf build target &&" "mkdir -p build/tests/perf/Arbitrary/expected &&" "kani tests/kani/Arbitrary/arbitrary_impls.rs > " "build/tests/perf/Arbitrary/expected/expected.out" ) self._run_kani_perf_test(cmd, True) def _run_kani_perf_test(self, command, expected_pass): """Ensure that the kani_perf parser can parse the output of a perf test""" # The two variants are identical; we're not actually checking the # returned metrics in this test, only checking that the parser works run_bc = Benchcomp({ "variants": { "run_1": { "config": { "directory": str(self.kani_dir), "command_line": command, }, }, "run_2": { "config": { "directory": str(self.kani_dir), "command_line": command, }, }, }, "run": { "suites": { "suite_1": { "parser": { "module": "kani_perf" }, "variants": ["run_1", "run_2"] } } }, "visualize": [{ "type": "dump_yaml", "out_file": "-" }], }) run_bc() self.assertEqual(run_bc.proc.returncode, 0, msg=run_bc.stderr) results = yaml.safe_load(run_bc.stdout) expected_types = { "solver_runtime": float, "symex_runtime": float, "verification_time": float, "success": bool, "number_program_steps": int, "number_vccs": int, } all_succeeded = True for _, bench in results["benchmarks"].items(): for _, variant in bench["variants"].items(): all_succeeded &= variant["metrics"]["success"] for metric, ttype in expected_types.items(): self.assertIn(metric, variant["metrics"], msg=run_bc.stdout) self.assertTrue( isinstance(variant["metrics"][metric], ttype), msg=run_bc.stdout) self.assertEqual(expected_pass, all_succeeded, msg=run_bc.stdout) def test_error_on_regression_two_benchmarks_previously_failed(self): """Ensure that benchcomp terminates with exit of 0 when the "error_on_regression" visualization is configured and one of the benchmarks continues to fail (no regression).""" with tempfile.TemporaryDirectory() as tmp: run_bc = Benchcomp({ "variants": { "passed": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 bench_2 && " "echo true > bench_1/success &&" "echo false > bench_2/success" }, }, "failed": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 bench_2 && " "echo true > bench_1/success &&" "echo false > bench_2/success" } } }, "run": { "suites": { "suite_1": { "parser": { "module": "test_file_to_metric" }, "variants": ["passed", "failed"] } } }, "visualize": [{ "type": "error_on_regression", "variant_pairs": [["passed", "failed"]], "checks": [{ "metric": "success", "test": "lambda old, new: False if not old else not new" }] }] }) run_bc() self.assertEqual( run_bc.proc.returncode, 0, msg=run_bc.stderr) def test_error_on_regression_two_benchmarks_one_failed(self): """Ensure that benchcomp terminates with exit of 1 when the "error_on_regression" visualization is configured and one of the benchmarks' success metric has regressed""" with tempfile.TemporaryDirectory() as tmp: run_bc = Benchcomp({ "variants": { "passed": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 bench_2 && " "echo true > bench_1/success &&" "echo true > bench_2/success" }, }, "failed": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 bench_2 && " "echo true > bench_1/success &&" "echo false > bench_2/success" } } }, "run": { "suites": { "suite_1": { "parser": { "module": "test_file_to_metric" }, "variants": ["passed", "failed"] } } }, "visualize": [{ "type": "error_on_regression", "variant_pairs": [["passed", "failed"]], "checks": [{ "metric": "success", "test": "lambda old, new: False if not old else not new" }] }] }) run_bc() self.assertEqual( run_bc.proc.returncode, 1, msg=run_bc.stderr) def test_error_on_regression_visualization_success_regressed(self): """Ensure that benchcomp terminates with exit of 1 when the "error_on_regression" visualization is configured and one of the benchmarks' success metric has regressed""" with tempfile.TemporaryDirectory() as tmp: run_bc = Benchcomp({ "variants": { "passed": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 && echo true > bench_1/success" }, }, "failed": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 && echo false > bench_1/success" } } }, "run": { "suites": { "suite_1": { "parser": { "module": "test_file_to_metric" }, "variants": ["passed", "failed"] } } }, "visualize": [{ "type": "error_on_regression", "variant_pairs": [["passed", "failed"]], "checks": [{ "metric": "success", "test": "lambda old, new: False if not old else not new" }] }] }) run_bc() self.assertEqual( run_bc.proc.returncode, 1, msg=run_bc.stderr) def test_error_on_regression_visualization_success_no_regressed(self): """Ensure that benchcomp terminates with exit of 0 when the "error_on_regression" visualization is configured and none of the benchmarks' success metric has regressed""" with tempfile.TemporaryDirectory() as tmp: run_bc = Benchcomp({ "variants": { "passed": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 && echo true > bench_1/success" }, }, "failed": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 && echo true > bench_1/success" } } }, "run": { "suites": { "suite_1": { "parser": { "module": "test_file_to_metric" }, "variants": ["passed", "failed"] } } }, "visualize": [{ "type": "error_on_regression", "variant_pairs": [["passed", "failed"]], "checks": [{ "metric": "success", "test": "lambda old, new: False if not old else not new" }] }] }) run_bc() self.assertEqual( run_bc.proc.returncode, 0, msg=run_bc.stderr) def test_error_on_regression_visualization_ratio_no_regressed(self): """Ensure that benchcomp terminates with exit of 0 when the "error_on_regression" visualization is configured and none of the metrics regressed""" with tempfile.TemporaryDirectory() as tmp: run_bc = Benchcomp({ "variants": { "more": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 && echo 10 > bench_1/n_bugs" }, }, "less": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 && echo 5 > bench_1/n_bugs" } } }, "run": { "suites": { "suite_1": { "parser": { "module": "test_file_to_metric" }, "variants": ["less", "more"] } } }, "visualize": [{ "type": "error_on_regression", "variant_pairs": [["more", "less"]], "checks": [{ "metric": "n_bugs", "test": "lambda old, new: new / old > 1.75", }] }] }) run_bc() self.assertEqual( run_bc.proc.returncode, 0, msg=run_bc.stderr) def test_error_on_regression_visualization_ratio_regressed(self): """Ensure that benchcomp terminates with exit of 1 when the "error_on_regression" visualization is configured and one of the metrics regressed""" with tempfile.TemporaryDirectory() as tmp: run_bc = Benchcomp({ "variants": { "more": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 && echo 10 > bench_1/n_bugs" }, }, "less": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 && echo 5 > bench_1/n_bugs" } } }, "run": { "suites": { "suite_1": { "parser": { "module": "test_file_to_metric" }, "variants": ["less", "more"] } } }, "visualize": [{ "type": "error_on_regression", "variant_pairs": [["less", "more"]], "checks": [{ "metric": "n_bugs", "test": "lambda old, new: new / old > 1.75", }] }] }) run_bc() self.assertEqual( run_bc.proc.returncode, 1, msg=run_bc.stderr) def test_markdown_results_table(self): """Run the markdown results table visualization""" with tempfile.TemporaryDirectory() as tmp: run_bc = Benchcomp({ "variants": { "variant_1": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 bench_2 bench_3" "&& echo true > bench_1/success" "&& echo true > bench_2/success" "&& echo false > bench_3/success" "&& echo 5 > bench_1/runtime" "&& echo 10 > bench_2/runtime" }, }, "variant_2": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 bench_2 bench_3" "&& echo true > bench_1/success" "&& echo false > bench_2/success" "&& echo true > bench_3/success" "&& echo 10 > bench_1/runtime" "&& echo 5 > bench_2/runtime" } } }, "run": { "suites": { "suite_1": { "parser": { "module": "test_file_to_metric" }, "variants": ["variant_1", "variant_2"] } } }, "visualize": [{ "type": "dump_markdown_results_table", "out_file": "-", "extra_columns": { "runtime": [{ "column_name": "ratio", "text": "lambda b: str(b['variant_2']/b['variant_1'])" "if b['variant_2'] < 1.5 * b['variant_1'] " "else '**' + str(b['variant_2']/b['variant_1']) + '**'" }], "success": [{ "column_name": "notes", "text": "lambda b: '' if b['variant_2'] == b['variant_1']" "else 'newly passing' if b['variant_2'] " "else 'regressed'" }] } }] }) run_bc() self.assertEqual(run_bc.proc.returncode, 0, msg=run_bc.stderr) self.assertEqual( run_bc.stdout, textwrap.dedent(""" ## runtime | Benchmark | variant_1 | variant_2 | ratio | | --- | --- | --- | --- | | bench_1 | 5 | 10 | **2.0** | | bench_2 | 10 | 5 | 0.5 | ## success | Benchmark | variant_1 | variant_2 | notes | | --- | --- | --- | --- | | bench_1 | True | True | | | bench_2 | True | False | regressed | | bench_3 | False | True | newly passing | """)) def test_only_dump_yaml(self): """Ensure that benchcomp terminates with return code 0 when `--only dump_yaml` is passed, even if the error_on_regression visualization would have resulted in a return code of 1""" with tempfile.TemporaryDirectory() as tmp: run_bc = Benchcomp({ "variants": { "passed": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 bench_2 && " "echo true > bench_1/success &&" "echo true > bench_2/success" }, }, "failed": { "config": { "directory": str(tmp), "command_line": "mkdir bench_1 bench_2 && " "echo true > bench_1/success &&" "echo false > bench_2/success" } } }, "run": { "suites": { "suite_1": { "parser": { "module": "test_file_to_metric" }, "variants": ["passed", "failed"] } } }, "visualize": [{ "type": "dump_yaml", "out_file": "-", }, { "type": "error_on_regression", "variant_pairs": [["passed", "failed"]], "checks": [{ "metric": "success", "test": "lambda old, new: True" }] }] }) run_bc(flags=["--only", "dump_yaml"]) self.assertEqual( run_bc.proc.returncode, 0, msg=run_bc.stderr) with open(run_bc.working_directory / "result.yaml") as handle: result = yaml.safe_load(handle) def test_ignore_dump_yaml(self): """Ensure that benchcomp does not print any YAML output even with the dump_yaml visualization when the `--except dump_yaml` flag is passed""" with tempfile.TemporaryDirectory() as tmp: run_bc = Benchcomp({ "variants": { "variant_1": { "config": { "directory": tmp, "command_line": "true", } }, "variant_2": { "config": { "directory": tmp, "command_line": "true", } } }, "run": { "suites": { "suite_1": { "parser": {"module": "test"}, "variants": ["variant_1", "variant_2"] } } }, "visualize": [{ "type": "dump_yaml", "out_file": "-", }], }) run_bc(flags=["--except", "dump_yaml"]) self.assertEqual( run_bc.stdout, "", msg=run_bc.stdout) with open(run_bc.working_directory / "result.yaml") as handle: result = yaml.safe_load(handle) def test_return_0(self): """Ensure that benchcomp terminates with return code 0""" with tempfile.TemporaryDirectory() as tmp: run_bc = Benchcomp({ "variants": { "variant_1": { "config": { "directory": tmp, "command_line": "true", } }, "variant_2": { "config": { "directory": tmp, "command_line": "true", } } }, "run": { "suites": { "suite_1": { "parser": {"module": "test"}, "variants": ["variant_1", "variant_2"] } } }, "visualize": [], }) run_bc() self.assertEqual( run_bc.proc.returncode, 0, msg=run_bc.stderr) with open(run_bc.working_directory / "result.yaml") as handle: result = yaml.safe_load(handle) def test_return_0_on_fail(self): """Ensure that benchcomp terminates with 0 even if a suite fails""" with tempfile.TemporaryDirectory() as tmp: run_bc = Benchcomp({ "variants": { "variant_1": { "config": { "directory": tmp, "command_line": "false", } }, "variant_2": { "config": { "directory": tmp, "command_line": "true", } } }, "run": { "suites": { "suite_1": { "parser": {"module": "test"}, "variants": ["variant_1", "variant_2"] } } }, "visualize": [], }) run_bc() self.assertEqual( run_bc.proc.returncode, 0, msg=run_bc.stderr) with open(run_bc.working_directory / "result.yaml") as handle: result = yaml.safe_load(handle) def test_env(self): """Ensure that benchcomp reads the 'env' key of variant config""" with tempfile.TemporaryDirectory() as tmp: run_bc = Benchcomp({ "variants": { "env_set": { "config": { "command_line": "echo $QJTX > out", "directory": tmp, "env": {"QJTX": "foo"} } }, "env_unset": { "config": { "command_line": "echo $QJTX > out", "directory": tmp, } } }, "run": { "suites": { "suite_1": { "parser": {"module": "test"}, "variants": ["env_unset", "env_set"] } } }, "visualize": [], }) run_bc() self.assertEqual( run_bc.proc.returncode, 0, msg=run_bc.stderr) with open(run_bc.working_directory / "result.yaml") as handle: result = yaml.safe_load(handle) self.assertEqual( result["benchmarks"]["suite_1"]["variants"][ "env_set"]["metrics"]["foos"], 1, msg=yaml.dump(result, default_flow_style=False)) self.assertEqual( result["benchmarks"]["suite_1"]["variants"][ "env_unset"]["metrics"]["foos"], 0, msg=yaml.dump(result, default_flow_style=False)) def test_command_parser(self): """Ensure that CommandParser can execute and read the output of a parser""" with tempfile.TemporaryDirectory() as tmp: run_bc = Benchcomp({ "variants": { "v1": { "config": { "command_line": "true", "directory": tmp, } }, "v2": { "config": { "command_line": "true", "directory": tmp, } } }, "run": { "suites": { "suite_1": { "parser": { "command": """ echo '{ "benchmarks": {}, "metrics": {} }' """ }, "variants": ["v2", "v1"] } } }, "visualize": [], }) run_bc() self.assertEqual( run_bc.proc.returncode, 0, msg=run_bc.stderr) with open(run_bc.working_directory / "result.yaml") as handle: result = yaml.safe_load(handle) for item in ["benchmarks", "metrics"]: self.assertIn(item, result) def test_run_command_visualization(self): """Ensure that the run_command visualization can execute a command""" with tempfile.TemporaryDirectory() as tmp: out_file = pathlib.Path(tmp) / str(uuid.uuid4()) run_bc = Benchcomp({ "variants": { "v1": { "config": { "command_line": "true", "directory": tmp, } }, "v2": { "config": { "command_line": "true", "directory": tmp, } } }, "run": { "suites": { "suite_1": { "parser": { "command": """ echo '{ "benchmarks": {}, "metrics": {} }' """ }, "variants": ["v2", "v1"] } } }, "visualize": [{ "type": "run_command", "command": f"cat - > {out_file}" }], }) run_bc() self.assertEqual( run_bc.proc.returncode, 0, msg=run_bc.stderr) with open(out_file) as handle: result = yaml.safe_load(handle) for item in ["benchmarks", "metrics"]: self.assertIn(item, result) def test_run_failing_command_visualization(self): """Ensure that benchcomp terminates with a non-zero return code when run_command visualization fails""" with tempfile.TemporaryDirectory() as tmp: out_file = pathlib.Path(tmp) / str(uuid.uuid4()) run_bc = Benchcomp({ "variants": { "v1": { "config": { "command_line": "true", "directory": tmp, } }, "v2": { "config": { "command_line": "true", "directory": tmp, } } }, "run": { "suites": { "suite_1": { "parser": { "command": """ echo '{ "benchmarks": {}, "metrics": {} }' """ }, "variants": ["v2", "v1"] } } }, "visualize": [{ "type": "run_command", "command": f"cat - > {out_file}; false" }], }) run_bc() self.assertNotEqual( run_bc.proc.returncode, 0, msg=run_bc.stderr)