# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 """Annotated source code.""" import html import logging import os import re import voluptuous import voluptuous.humanize from cbmc_viewer import markup_link from cbmc_viewer import templates from cbmc_viewer import util ################################################################ # Data passed to jinja to generate annotated code from code.jinja.html VALID_LINE = voluptuous.Schema({ 'num': int, 'status': voluptuous.Any('hit', 'missed', 'both', 'none'), 'code': str, }, required=True) VALID_CODE = voluptuous.Schema({ 'filename': str, 'path_to_root': str, 'lines': [VALID_LINE], 'outdir': str }, required=True) ################################################################ # An annotated file of source code. class Code: """An annotated file of source code.""" def __init__(self, root, path, symbols, coverage, outdir='.'): try: # load code into a string with open(os.path.join(root, path), encoding='utf-8') as source: code = html.escape(untabify_code(source.read()), quote=False) # split code into blocks of code, comments, and string literals blocks = split_code_into_blocks(code) # link symbols in code blocks to symbol definitions linked_blocks = link_symbols_in_code_blocks(path, blocks, symbols) # reform code as a string with symbols linked to definitions linked_code = ''.join(linked_blocks) # break code into lines annotated with line number and line coverage annotated_lines = annotate_code(path, linked_code, coverage) self.lines = annotated_lines except FileNotFoundError: # The goto symbol table occassional refers to header files # like gcc_builtin_headers_types.h that are part of the # CBMC implementation. # * We skip source annotation: We treat the file as a # zero-length file with nothing to annotate. # * We print a simple warning message: The relative path # to the file in the symbol table was interpreted by # the symbol table parser as relative to the working # directory. Printing this path in the warning is # confusing, so we print just the base name. logging.warning("Skipping source file annotation: %s", os.path.basename(path)) self.lines = [] self.filename = path self.path_to_root = markup_link.path_to_file('.', path) self.outdir = outdir self.validate() def __str__(self): """Render annotated code as html.""" return templates.render_code(self.filename, self.path_to_root, self.lines) def validate(self): """Validate members of a code object.""" return voluptuous.humanize.validate_with_humanized_errors( self.__dict__, VALID_CODE ) def dump(self, filename=None, outdir=None): """Write annotated code to a file rendered as html.""" util.dump(self, filename or self.filename + ".html", outdir or self.outdir) ################################################################ # Untabify code: replace tabs with spaces. def untabify_code(code, tabstop=8): """Untabify a block of code.""" return '\n'.join( [untabify_line(line, tabstop) for line in code.splitlines()] ) def untabify_line(line, tabstop=8): """Untabify a line of code.""" strings = [] length = 0 for string in re.split('(\t)', line): if string == '\t': string = ' '*(tabstop - (length % tabstop)) length += len(string) strings.append(string) return ''.join(strings) ################################################################ # Split code into code blocks and strings/comments def split_code_into_blocks(code): """Split code into blocks of code, comments, and string literals.""" def is_noncode_start(code, idx=0): """String starts something other than source code (eg, a comment).""" return (is_quote(code, idx) or is_multiline_comment_start(code, idx) or is_singleline_comment_start(code, idx)) def find_predicate(code, predicate, idx=0): """First position in a string satisfying a predicate.""" while idx < len(code) and not predicate(code, idx): idx += 1 return idx blocks = [] while code: idx = find_predicate(code, is_noncode_start) block, code, idx = code[:idx], code[idx:], 0 if block: blocks.append(block) if not code: break if is_quote(code): idx = find_predicate(code, is_quote, 1) elif is_multiline_comment_start(code): idx = find_predicate(code, is_multiline_comment_end, 2) elif is_singleline_comment_start(code): idx = find_predicate(code, is_singleline_comment_end, 2) block, code, idx = code[:idx+1], code[idx+1:], 0 if block: blocks.append(block) return blocks def is_quote(code, idx=0): """Position in string is an unescaped quotation mark.""" return (0 <= idx < len(code) and code[idx] == '"' and (idx == 0 or code[idx-1] != '\\')) def is_multiline_comment_start(code, idx=0): """Position in string starts a multi-line comment.""" return idx >= 0 and idx+2 <= len(code) and code[idx:idx+2] == '/*' def is_multiline_comment_end(code, idx=0): """Position in string ends a multi-line comment.""" return idx-1 >= 0 and idx+1 <= len(code) and code[idx-1:idx+1] == '*/' def is_singleline_comment_start(code, idx=0): """Position in string starts a one-line comment.""" return idx >= 0 and idx+2 <= len(code) and code[idx:idx+2] == '//' def is_singleline_comment_end(code, idx=0): """Position in string ends a one-line comment.""" return idx >= 0 and idx+1 < len(code) and code[idx+1] == '\n' ################################################################ # Link symbols in code blocks def link_symbols_in_code_blocks(path, blocks, symbols): """Link symbols appearing a sequence of blocks.""" return [link_symbols_in_code_block(path, block, symbols) for block in blocks] def link_symbols_in_code_block(path, block, symbols): """Link symbols appearing a code block.""" if (is_quote(block) or is_multiline_comment_start(block) or is_singleline_comment_start(block)): return block return link_symbols(path, block, symbols) def link_symbols(path, code, symbols): """Link symbols appearing a code string.""" tokens = split_code_into_symbols(code) return ''.join( [markup_link.link_text_to_symbol(tkn, tkn, symbols, from_file=path, escape_text=False) for tkn in tokens] ) def split_code_into_symbols(code): """Split a code string into a list of symbols and nonsymbols.""" return re.split('([_a-zA-Z][_a-zA-Z0-9]*)', code) ################################################################ # Annotate lines of symbol-linked code with line numbers and coverage def annotate_code(path, code, coverage): """Annotate lines of code with line numbers and coverage status.""" return [{ # line_num is 0-based, line numbers are 1-based 'num': line_num+1, 'status': str(coverage.lookup(path, line_num+1)).lower(), 'code': line } for (line_num, line) in enumerate(code.splitlines())]