#!/usr/bin/env python3 """List stubbed functions and missing functions in a set of CBMC proofs. This script scans the json files produced by cbmc-viewer for a set of proofs and constructs for each proof three lists: * stubbed: the list of stubbed functions used in the proof (those functions defined in code outside the project code itself and under the cbmc directory in most proof projects), * removed: the list of undefined functions removed by the proof author (and listed as as "expected-missing-functions" in the cbmc-viewer configuration file cbmc-viewer.json), and * missing: the list of undefined functions that are simply missing from the proof. All of the needed data is in the files viewer-result.json (listing the undefined functions in the proof), viewer-reachable.json (list the reachable functions in the proof), and cbmc-viewer.json (listing the known undefined functions). The script requires some location information (eg, the paths to the json files) that can be provided with command line flags, but the script tries to guess this location information when it is omitted from the command line. You can view the guesses made with --verbose if the script seems to have guessed wrong. NOTE: All of this information we need is in the goto binary itself, but 'goto-analyzer --reachable-functions' does not include undefined functions in the list of reachable functions. """ import re import argparse import json import logging import os import subprocess import sys ################################################################ # Command line parser def create_parser(): desc = "Summarize the stubs and undefined functions used in a proof." args = [ { "flag": "--proof", "nargs": "*", "help": """ A list of proof names. A proof name is interpreted as a relative path from PROOFDIR to a directory containing a proof (see --proofdir PROOFDIR). By default, if PROOF is not specified, use all proofs discovered under PROOFDIR. """ }, { "flag": "--srcdir", "help": """ The root of the source tree. SRCDIR is used only to guess CBMCPATH if it is not specified (see --cbmcpath CBMCPATH.) By default, if SRCDIR is not specified, look for the Makefile in a proof directory and use the argument to --srcdir used in the invocation of cbmc-viewer by that Makefile. """ }, { "flag": "--proofdir", "default": '.', "help": """ The root of the proof tree. PROOFDIR is used only to compute the full path to a proof directory. A proof name is interpreted as a relative path from PROOFDIR to a directory containing a proof, so PROOFDIR can by any directory containing a proof. By default, PROOFDIR is the current directory. (Default: %(default)s) """ }, { "flag": "--cbmcpath", "help": """ The relative path from the source root SRCDIR to the cbmc root CBMCDIR (see --srcdir SRCDIR), typically 'test/cbmc'. CBMCDIR contains all code written specifically to build the proofs and typically also includes the proofs themselves. All code under CBMCDIR is considered to be a proof stub. By default, if CBMCPATH is not specified, then CBMCDIR is the parent of PROOFDIR (see --proofdir PROOFDIR), and CBMCPATH is the relative path from SRCDIR to CBMCDIR (see --srcdir SRCDIR). """ }, { "flag": "--jsonpath", "help": """ The relative path from the proof directory to the json directory containing cbmc-viewer output, typically 'report/json' where 'report' is the string specified by the --reportdir or --htmldir flag to cbmc-viewer. By default, if JSONPATH is not specified, use the path from a proof directory to a directory containing the file 'viewer-result.json'. """ }, { "flag": "--stubbed-usage", "action": "store_true", "help": """ Summarize stubbed function usage by "name" mapping each function to the proofs using the stub and by "count" giving a histogram mapping each function to the number of times it is used. """ }, { "flag": "--removed-usage", "action": "store_true", "help": """ Summarize removed function usage by "name" mapping each function to the proofs using the stub and by "count" giving a histogram mapping each function to the number of times it is used. """ }, { "flag": "--missing-usage", "action": "store_true", "help": """ Summarize missing function usage by "name" mapping each function to the proofs using the stub and by "count" giving a histogram mapping each function to the number of times it is used. """ }, { "flag": "--config", "default": "cbmc-viewer.json", "help": """ Name of the cbmc-viewer configuration file. (Default: %(default)s) """ }, { "flag": "--viewer-reachable", "default": "viewer-reachable.json", "help": """ Name of the cbmc-viewer summary of reachable functions. (Default: %(default)s) """ }, { "flag": "--viewer-result", "default": "viewer-result.json", "help": """ Name of the cbmc-viewer summary of results. (Default: %(default)s) """ }, { "flag": "--verbose", "action": "store_true", "help": "Verbose output" }, { "flag": "--debug", "action": "store_true", "help": "Debug output" } ] epilog = """ The simplest way to use the script is to run '%(prog)s' in the directory that is the root of the proof tree (usually called 'proofs'). """ parser = argparse.ArgumentParser(description=desc, epilog=epilog) for arg in args: flag = arg['flag'] del arg['flag'] parser.add_argument(flag, **arg) return parser def parse_arguments(args=None): return create_parser().parse_args(args=args) def configure_logger(verbose=False, debug=False): # Logging is configured by the first invocation of logging.basicConfig fmt = '%(levelname)s: %(message)s' if debug: logging.basicConfig(level=logging.DEBUG, format=fmt) if verbose: logging.basicConfig(level=logging.INFO, format=fmt) logging.basicConfig(format=fmt) ################################################################ # Shell out commands def run(cmd, cwd=None, encoding=None): """Run a command in a subshell and return the standard output. Run the command cmd in the directory cwd and use encoding to decode the standard output. """ kwds = { 'cwd': cwd, 'stdout': subprocess.PIPE, 'stderr': subprocess.PIPE, 'text': True, } if sys.version_info >= (3, 6): # encoding introduced in Python 3.6 kwds['encoding'] = encoding logging.debug('Running "%s" in %s', ' '.join(cmd), cwd) result = subprocess.run(cmd, **kwds, check=False) if result.returncode: logging.debug('Failed command: %s', ' '.join(cmd)) logging.debug('Failed return code: %s', result.returncode) logging.debug('Failed stdout: %s', result.stdout.strip()) logging.debug('Failed stderr: %s', result.stderr.strip()) return [] # Remove line continuations before splitting stdout into lines # Running command with text=True converts line endings to \n in stdout lines = result.stdout.replace('\\\n', ' ').splitlines() return [strip_whitespace(line) for line in lines] def strip_whitespace(string): return re.sub(r'\s+', ' ', string).strip() ################################################################ # Load viewer JSON data def load_json(name="", path=None): res = {} try: with open(path, encoding="utf-8") as handle: res = json.load(handle) except TypeError: logging.debug("%s file name %s is not a string", name, path) except FileNotFoundError: logging.debug("Can't open %s file %s", name, path) except json.decoder.JSONDecodeError: logging.debug("Can't parse %s file %s as json", name, path) return res def load_config(path=None): path = path or 'cbmc-viewer.json' return load_json("config", path) def load_viewer_result(path=None): path = path or 'report/json/viewer-result.json' return load_json("viewer result", path) def load_viewer_reachable(path=None): path = path or 'report/json/viewer-reachable.json' return load_json("viewer reachable", path) ################################################################ # Parse viewer JSON data def load_reachable_functions(path=None): reachable = load_viewer_reachable(path) return reachable.get('viewer-reachable', {}).get('reachable', []) def load_missing_functions(path=None): def missing(warning): if warning.strip().startswith('**** WARNING: no body for function'): return warning.strip().split()[-1] return None result = load_viewer_result(path) warnings = result.get('viewer-result', {}).get('warning', []) return [fcn for fcn in [missing(warning) for warning in warnings] if fcn] def load_expected_missing_functions(path=None): config = load_config(path) return config.get('expected-missing-functions', []) ################################################################ # Summarize proof abstraction def is_builtin(path): if not path: logging.info("Can't test for builtin: path %s is empty", path) return False path = os.path.normpath(path) name = os.path.split(path)[-1] return name.startswith(' 0 return proofs def guess_jsonpath_from_result_paths(args, result_paths): if args.jsonpath: # don't guess, already defined return args.jsonpath # CBMC viewer puts viewer-result.json under report/json in every # proof directory where report is the value of the --reportdir # flag to viewer. # # Assume the directory two levels above viewer-result.json is reportdir. reportdirs = [ os.path.normpath( os.path.basename(os.path.dirname(os.path.dirname( os.path.normpath(result_path))))) for result_path in result_paths] reportdirs = sorted(set(reportdirs)) assert len(reportdirs) == 1 reportdir = reportdirs[0] return os.path.normpath(os.path.join(reportdir, 'json')) ################################################################ # Guess missing flags from flags to cbmc-viewer command def guess_jsonpath_from_viewer_command(args, viewer_command): if args.jsonpath: # don't guess, already defined return args.jsonpath reportdir = extract_reportdir(viewer_command) if not reportdir: return args.jsonpath return os.path.normpath(os.path.join(reportdir, 'json')) def guess_srcdir_from_viewer_command(args, viewer_command): if args.srcdir: # don't guess, already defined return args.srcdir assert len(args.proof) > 0 # validated by guess_proof() proof = [args.proofdir, args.proof[0]] viewer_command = get_viewer_command(*proof) if not viewer_command: logging.info("Can't guess srcdir: cbmc-viewer command not found.") return args.cbmcpath srcdir = extract_srcdir(viewer_command) if not srcdir: logging.info("Can't guess srcdir: " "--srcdir not found in cbmc-viewer command: %s", viewer_command) return args.cbmcpath return os.path.abspath(srcdir) ################################################################ # Guess flags from other arguments def guess_cbmcpath_from_args(args): if args.cbmcpath: # don't guess, already defined return args.cbmcpath # Guess cbmcpath from srcdir and proofdir if not args.srcdir: logging.info("Can't guess cbmcpath: srcdir is empty.") return args.cbmcpath if not args.proofdir: logging.info("Can't guess cbmcpath: proofdir is empty.") return args.cbmcpath # Guess that cbmcdir is the parent of proofdir, and guess that # cbmcpath is the path from srcdir to cbmcdir. proofdir = os.path.abspath(args.proofdir) srcdir = os.path.abspath(args.srcdir) cbmcpath = os.path.relpath(os.path.dirname(proofdir), srcdir) if cbmcpath.startswith('..'): logging.info("Can't guess cbmcpath: " "srcdir %s does not contain proofdir %s", srcdir, proofdir) return args.cbmcpath return cbmcpath ################################################################ # Compute histogram def defined_histogram(summary, key='stubbed'): histogram_by_name = {} for proof, proof_summary in summary.items(): for _, stubbed_functions in proof_summary[key].items(): for stub in stubbed_functions: histogram_by_name[stub] = histogram_by_name.get(stub, []) histogram_by_name[stub].append(proof) counts = [(stub, len(users)) for stub, users in histogram_by_name.items()] counts = sorted(counts, key=lambda pair: (pair[1], pair[0]), reverse=True) histogram_by_count = {pair[0]: pair[1] for pair in counts} return {"name": histogram_by_name, "count": histogram_by_count} def undefined_histogram(summary, key): histogram_by_name = {} for proof, proof_summary in summary.items(): for undefined in proof_summary[key]: histogram_by_name[undefined] = histogram_by_name.get(undefined, []) histogram_by_name[undefined].append(proof) counts = [(func, len(users)) for func, users in histogram_by_name.items()] counts = sorted(counts, key=lambda pair: (pair[1], pair[0]), reverse=True) histogram_by_count = {pair[0]: pair[1] for pair in counts} return {"name": histogram_by_name, "count": histogram_by_count} ################################################################ def compute_default_arguments(args): # Ensure absolute paths to important directories if args.proofdir: args.proofdir = os.path.abspath(args.proofdir) if args.srcdir: args.srcdir = os.path.abspath(args.srcdir) if args.proof: args.proof = [os.path.normpath(proof) for proof in args.proof] assert args.proofdir result_paths = viewer_result_paths(args) args.proof = guess_proof_from_result_paths(args, result_paths) args.jsonpath = guess_jsonpath_from_result_paths(args, result_paths) assert args.proofdir assert args.proof viewer_command = get_viewer_command(args.proofdir, args.proof[0]) args.jsonpath = guess_jsonpath_from_viewer_command(args, viewer_command) args.srcdir = guess_srcdir_from_viewer_command(args, viewer_command) args.cbmcpath = guess_cbmcpath_from_args(args) return args def log_arguments(args, text=''): label = ' '.join(['ARG', text]).strip() for arg in dir(args): if not arg.startswith('_'): logging.info('%s: %s: %s', label, arg, getattr(args, arg)) def main(): args = parse_arguments() configure_logger(args.verbose, args.debug) log_arguments(args) args = compute_default_arguments(args) log_arguments(args) summary = scan_proofs(args.proof, args) if args.stubbed_usage: print(json.dumps(defined_histogram(summary, 'stubbed'), indent=2)) return if args.removed_usage: print(json.dumps(undefined_histogram(summary, 'removed'), indent=2)) return if args.missing_usage: print(json.dumps(undefined_histogram(summary, 'missing'), indent=2)) return print(json.dumps(summary, indent=2)) if __name__ == "__main__": main()