// Copyright Kani Contributors // SPDX-License-Identifier: Apache-2.0 OR MIT //! This module defines all compiler extensions that form the Kani compiler. //! //! The [KaniCompiler] can be used across multiple rustc driver runs ([RunCompiler::run()]), //! which is used to implement stubs. //! //! In the first run, [KaniCompiler::config] will implement the compiler configuration and it will //! also collect any stubs that may need to be applied. This method will be a no-op for any //! subsequent runs. The [KaniCompiler] will parse options that are passed via `-C llvm-args`. //! //! If no stubs need to be applied, the compiler will proceed to generate goto code, and it won't //! need any extra runs. However, if stubs are required, we will have to restart the rustc driver //! in order to apply the stubs. For the subsequent runs, we add the stub configuration to //! `-C llvm-args`. #[cfg(feature = "cprover")] use crate::codegen_cprover_gotoc::GotocCodegenBackend; use crate::kani_middle::attributes::is_proof_harness; use crate::kani_middle::check_crate_items; use crate::kani_middle::metadata::gen_proof_metadata; use crate::kani_middle::reachability::filter_crate_items; use crate::kani_middle::stubbing::{self, harness_stub_map}; use crate::kani_queries::{QueryDb, ReachabilityType}; use crate::parser::{self, KaniCompilerParser}; use crate::session::init_session; use kani_metadata::{ArtifactType, HarnessMetadata, KaniMetadata}; use rustc_codegen_ssa::traits::CodegenBackend; use rustc_driver::{Callbacks, Compilation, RunCompiler}; use rustc_hir::def_id::LOCAL_CRATE; use rustc_hir::definitions::DefPathHash; use rustc_interface::Config; use rustc_middle::ty::TyCtxt; use rustc_session::config::{ErrorOutputType, OutputType}; use rustc_session::EarlyErrorHandler; use rustc_span::ErrorGuaranteed; use std::collections::{BTreeMap, HashMap}; use std::fs::File; use std::io::BufWriter; use std::mem; use std::path::{Path, PathBuf}; use std::process::ExitCode; use std::sync::{Arc, Mutex}; use tracing::debug; /// Run the Kani flavour of the compiler. /// This may require multiple runs of the rustc driver ([RunCompiler::run]). pub fn run(args: Vec) -> ExitCode { let mut kani_compiler = KaniCompiler::new(); match kani_compiler.run(args) { Ok(()) => ExitCode::SUCCESS, Err(_) => ExitCode::FAILURE, } } /// Configure the cprover backend that generate goto-programs. #[cfg(feature = "cprover")] fn backend(queries: Arc>) -> Box { Box::new(GotocCodegenBackend::new(queries)) } /// Fallback backend. It will trigger an error if no backend has been enabled. #[cfg(not(feature = "cprover"))] fn backend(queries: Arc>) -> Box { compile_error!("No backend is available. Only supported value today is `cprover`"); } /// A stable (across compilation sessions) identifier for the harness function. type HarnessId = DefPathHash; /// A set of stubs. type Stubs = BTreeMap; #[derive(Clone, Debug)] struct HarnessInfo { pub metadata: HarnessMetadata, pub stub_map: Stubs, } /// Store some relevant information about the crate compilation. #[derive(Clone, Debug)] struct CrateInfo { /// The name of the crate being compiled. pub name: String, /// The metadata output path that shall be generated as part of the crate compilation. pub output_path: PathBuf, } /// Represents the current compilation stage. /// /// The Kani compiler may run the Rust compiler multiple times since stubbing has to be applied /// to the entire Rust compiler session. /// /// - We always start in the [CompilationStage::Init]. /// - After [CompilationStage::Init] we transition to either /// - [CompilationStage::CodegenNoStubs] on a regular crate compilation, this will follow Init. /// - [CompilationStage::CompilationSkipped], running the compiler to gather information, such as /// `--version` will skip code generation completely, and there is no work to be done. /// - After the [CompilationStage::CodegenNoStubs], we transition to either /// - [CompilationStage::CodegenWithStubs] when there is at least one harness with stubs. /// - [CompilationStage::Done] where there is no harness left to process. /// - The [CompilationStage::CodegenWithStubs] can last multiple Rust compiler runs. Once there is /// no harness left, we move to [CompilationStage::Done]. /// - The final stages are either [CompilationStage::Done] or [CompilationStage::CompilationSkipped]. /// - [CompilationStage::Done] represents the final state of the compiler after a successful /// compilation. The crate metadata is stored here (even if no codegen was actually performed). /// - [CompilationStage::CompilationSkipped] no compilation was actually performed. /// No work needs to be done. /// - Note: In a scenario where the compilation fails, the compiler will exit immediately, /// independent on the stage. Any artifact produced shouldn't be used. /// I.e.: /// ```dot /// graph CompilationStage { /// Init -> {CodegenNoStubs, CompilationSkipped} /// CodegenNoStubs -> {CodegenStubs, Done} /// // Loop up to N harnesses times. /// CodegenStubs -> {CodegenStubs, Done} /// CompilationSkipped /// Done /// } /// ``` #[allow(dead_code)] #[derive(Debug)] enum CompilationStage { /// Initial state that the compiler is always instantiated with. /// In this stage, we initialize the Query and collect all harnesses. Init, /// State where the compiler ran but didn't actually compile anything (e.g.: --version). CompilationSkipped, /// Stage where the compiler will perform codegen of all harnesses that don't use stub. CodegenNoStubs { target_harnesses: Vec, next_harnesses: Vec>, all_harnesses: HashMap, crate_info: CrateInfo, }, /// Stage where the compiler will codegen harnesses that use stub, one group at a time. /// The harnesses at this stage are grouped according to the stubs they are using. For now, /// we ensure they have the exact same set of stubs. CodegenWithStubs { target_harnesses: Vec, next_harnesses: Vec>, all_harnesses: HashMap, crate_info: CrateInfo, }, Done { metadata: Option<(KaniMetadata, CrateInfo)>, }, } impl CompilationStage { pub fn is_init(&self) -> bool { matches!(self, CompilationStage::Init) } } /// This object controls the compiler behavior. /// /// It is responsible for initializing the query database, as well as controlling the compiler /// state machine. For stubbing, we may require multiple iterations of the rustc driver, which is /// controlled and configured via KaniCompiler. struct KaniCompiler { /// Store the queries database. The queries should be initialized as part of `config` when the /// compiler state is Init. /// Note that we need to share the queries with the backend before `config` is called. pub queries: Arc>, /// The state which the compiler is at. stage: CompilationStage, } impl KaniCompiler { /// Create a new [KaniCompiler] instance. pub fn new() -> KaniCompiler { KaniCompiler { queries: QueryDb::new(), stage: CompilationStage::Init } } /// Compile the current crate with the given arguments. /// /// Since harnesses may have different attributes that affect compilation, Kani compiler can /// actually invoke the rust compiler multiple times. pub fn run(&mut self, orig_args: Vec) -> Result<(), ErrorGuaranteed> { loop { debug!(next=?self.stage, "run"); match &self.stage { CompilationStage::Init => { self.run_compilation_session(&orig_args)?; } CompilationStage::CodegenNoStubs { .. } => { unreachable!("This stage should always run in the same session as Init"); } CompilationStage::CodegenWithStubs { target_harnesses, all_harnesses, .. } => { assert!(!target_harnesses.is_empty(), "expected at least one target harness"); let target_harness = &target_harnesses[0]; let extra_arg = stubbing::mk_rustc_arg(&all_harnesses[&target_harness].stub_map); let mut args = orig_args.clone(); args.push(extra_arg); self.run_compilation_session(&args)?; } CompilationStage::Done { metadata: Some((kani_metadata, crate_info)) } => { // Only store metadata for harnesses for now. // TODO: This should only skip None. // https://github.com/model-checking/kani/issues/2493 if self.queries.lock().unwrap().reachability_analysis == ReachabilityType::Harnesses { // Store metadata file. // We delay storing the metadata so we can include information collected // during codegen. self.store_metadata(&kani_metadata, &crate_info.output_path); } return Ok(()); } CompilationStage::Done { metadata: None } | CompilationStage::CompilationSkipped => { return Ok(()); } }; self.next_stage(); } } /// Set up the next compilation stage after a `rustc` run. fn next_stage(&mut self) { self.stage = match &mut self.stage { CompilationStage::Init => { // This may occur when user passes arguments like --version or --help. CompilationStage::Done { metadata: None } } CompilationStage::CodegenNoStubs { next_harnesses, all_harnesses, crate_info, .. } | CompilationStage::CodegenWithStubs { next_harnesses, all_harnesses, crate_info, .. } => { if let Some(target_harnesses) = next_harnesses.pop() { assert!(!target_harnesses.is_empty(), "expected at least one target harness"); CompilationStage::CodegenWithStubs { target_harnesses, next_harnesses: mem::take(next_harnesses), all_harnesses: mem::take(all_harnesses), crate_info: crate_info.clone(), } } else { CompilationStage::Done { metadata: Some(( generate_metadata(&crate_info, all_harnesses), crate_info.clone(), )), } } } CompilationStage::Done { .. } | CompilationStage::CompilationSkipped => { unreachable!() } }; } /// Run the Rust compiler with the given arguments and pass `&mut self` to handle callbacks. fn run_compilation_session(&mut self, args: &[String]) -> Result<(), ErrorGuaranteed> { debug!(?args, "run_compilation_session"); let queries = self.queries.clone(); let mut compiler = RunCompiler::new(args, self); compiler.set_make_codegen_backend(Some(Box::new(move |_cfg| backend(queries)))); compiler.run() } /// Gather and process all harnesses from this crate that shall be compiled. fn process_harnesses(&self, tcx: TyCtxt) -> CompilationStage { let crate_info = CrateInfo { name: tcx.crate_name(LOCAL_CRATE).as_str().into(), output_path: metadata_output_path(tcx), }; if self.queries.lock().unwrap().reachability_analysis == ReachabilityType::Harnesses { let base_filename = tcx.output_filenames(()).output_path(OutputType::Object); let harnesses = filter_crate_items(tcx, |_, def_id| is_proof_harness(tcx, def_id)); let all_harnesses = harnesses .into_iter() .map(|harness| { let def_id = harness.def_id(); let def_path = tcx.def_path_hash(def_id); let metadata = gen_proof_metadata(tcx, def_id, &base_filename); let stub_map = harness_stub_map(tcx, def_id, &metadata); (def_path, HarnessInfo { metadata, stub_map }) }) .collect::>(); let (no_stubs, with_stubs): (Vec<_>, Vec<_>) = if self.queries.lock().unwrap().stubbing_enabled { // Partition harnesses that don't have stub with the ones with stub. all_harnesses .keys() .cloned() .partition(|harness| all_harnesses[harness].stub_map.is_empty()) } else { // Generate code without stubs. (all_harnesses.keys().cloned().collect(), vec![]) }; // Even if no_stubs is empty we still need to store rustc metadata. CompilationStage::CodegenNoStubs { target_harnesses: no_stubs, next_harnesses: group_by_stubs(with_stubs, &all_harnesses), all_harnesses, crate_info, } } else { // Leave other reachability type handling as is for now. CompilationStage::CodegenNoStubs { target_harnesses: vec![], next_harnesses: vec![], all_harnesses: HashMap::default(), crate_info, } } } /// Prepare the query for the next codegen stage. fn prepare_codegen(&mut self) -> Compilation { debug!(stage=?self.stage, "prepare_codegen"); match &self.stage { CompilationStage::CodegenNoStubs { target_harnesses, all_harnesses, .. } | CompilationStage::CodegenWithStubs { target_harnesses, all_harnesses, .. } => { debug!( harnesses=?target_harnesses .iter() .map(|h| &all_harnesses[h].metadata.pretty_name) .collect::>(), "prepare_codegen" ); let queries = &mut (*self.queries.lock().unwrap()); queries.harnesses_info = target_harnesses .iter() .map(|harness| { (*harness, all_harnesses[harness].metadata.goto_file.clone().unwrap()) }) .collect(); Compilation::Continue } CompilationStage::Init | CompilationStage::Done { .. } | CompilationStage::CompilationSkipped => unreachable!(), } } /// Write the metadata to a file fn store_metadata(&self, metadata: &KaniMetadata, filename: &Path) { debug!(?filename, "write_metadata"); let out_file = File::create(&filename).unwrap(); let writer = BufWriter::new(out_file); if self.queries.lock().unwrap().output_pretty_json { serde_json::to_writer_pretty(writer, &metadata).unwrap(); } else { serde_json::to_writer(writer, &metadata).unwrap(); } } } /// Group the harnesses by their stubs. fn group_by_stubs( harnesses: Vec, all_harnesses: &HashMap, ) -> Vec> { let mut per_stubs: BTreeMap<&Stubs, Vec> = BTreeMap::default(); for harness in harnesses { per_stubs.entry(&all_harnesses[&harness].stub_map).or_default().push(harness) } per_stubs.into_values().collect() } /// Use default function implementations. impl Callbacks for KaniCompiler { /// Configure the [KaniCompiler] `self` object during the [CompilationStage::Init]. fn config(&mut self, config: &mut Config) { if self.stage.is_init() { let mut args = vec!["kani-compiler".to_string()]; args.extend(config.opts.cg.llvm_args.iter().cloned()); let matches = parser::parser().get_matches_from(&args); init_session( &matches, matches!(config.opts.error_format, ErrorOutputType::Json { .. }), ); // Configure queries. let queries = &mut (*self.queries.lock().unwrap()); queries.emit_vtable_restrictions = matches.get_flag(parser::RESTRICT_FN_PTRS); queries.check_assertion_reachability = matches.get_flag(parser::ASSERTION_REACH_CHECKS); queries.check_coverage = matches.get_flag(parser::COVERAGE_CHECKS); queries.output_pretty_json = matches.get_flag(parser::PRETTY_OUTPUT_FILES); queries.ignore_global_asm = matches.get_flag(parser::IGNORE_GLOBAL_ASM); queries.write_json_symtab = cfg!(feature = "write_json_symtab") || matches.get_flag(parser::WRITE_JSON_SYMTAB); queries.reachability_analysis = matches.reachability_type(); if let Some(features) = matches.get_many::(parser::UNSTABLE_FEATURE) { queries.unstable_features = features.cloned().collect::>(); } if matches.get_flag(parser::ENABLE_STUBBING) && queries.reachability_analysis == ReachabilityType::Harnesses { queries.stubbing_enabled = true; } debug!(?queries, "config end"); } } /// During the initialization state, we collect the crate harnesses and prepare for codegen. fn after_analysis<'tcx>( &mut self, _handler: &EarlyErrorHandler, _compiler: &rustc_interface::interface::Compiler, rustc_queries: &'tcx rustc_interface::Queries<'tcx>, ) -> Compilation { if self.stage.is_init() { self.stage = rustc_queries.global_ctxt().unwrap().enter(|tcx| { check_crate_items(tcx, self.queries.lock().unwrap().ignore_global_asm); self.process_harnesses(tcx) }); } self.prepare_codegen() } } /// Generate [KaniMetadata] for the target crate. fn generate_metadata( crate_info: &CrateInfo, all_harnesses: &HashMap, ) -> KaniMetadata { let (proof_harnesses, test_harnesses) = all_harnesses .values() .map(|info| &info.metadata) .cloned() .partition(|md| md.attributes.proof); KaniMetadata { crate_name: crate_info.name.clone(), proof_harnesses, unsupported_features: vec![], test_harnesses, } } /// Extract the filename for the metadata file. fn metadata_output_path(tcx: TyCtxt) -> PathBuf { let mut filename = tcx.output_filenames(()).output_path(OutputType::Object); filename.set_extension(ArtifactType::Metadata); filename } #[cfg(test)] mod tests { use super::*; use kani_metadata::{HarnessAttributes, HarnessMetadata}; use rustc_data_structures::fingerprint::Fingerprint; use rustc_hir::definitions::DefPathHash; use std::collections::HashMap; fn mock_next_id() -> HarnessId { static mut COUNTER: u64 = 0; unsafe { COUNTER += 1 }; let id = unsafe { COUNTER }; DefPathHash(Fingerprint::new(id, 0)) } fn mock_metadata(name: String, krate: String) -> HarnessMetadata { HarnessMetadata { pretty_name: name.clone(), mangled_name: name.clone(), original_file: format!("{}.rs", krate), crate_name: krate, original_start_line: 10, original_end_line: 20, goto_file: None, attributes: HarnessAttributes::default(), } } fn mock_info_with_stubs(stub_map: Stubs) -> HarnessInfo { HarnessInfo { metadata: mock_metadata("dummy".to_string(), "crate".to_string()), stub_map } } #[test] fn test_group_by_stubs_works() { // Set up the inputs let harness_1 = mock_next_id(); let harness_2 = mock_next_id(); let harness_3 = mock_next_id(); let harnesses = vec![harness_1, harness_2, harness_3]; let stub_1 = (mock_next_id(), mock_next_id()); let stub_2 = (mock_next_id(), mock_next_id()); let stub_3 = (mock_next_id(), mock_next_id()); let stub_4 = (stub_3.0, mock_next_id()); let set_1 = Stubs::from([stub_1, stub_2, stub_3]); let set_2 = Stubs::from([stub_1, stub_2, stub_4]); let set_3 = Stubs::from([stub_1, stub_3, stub_2]); assert_eq!(set_1, set_3); assert_ne!(set_1, set_2); let harnesses_info = HashMap::from([ (harness_1, mock_info_with_stubs(set_1)), (harness_2, mock_info_with_stubs(set_2)), (harness_3, mock_info_with_stubs(set_3)), ]); assert_eq!(harnesses_info.len(), 3); // Run the function under test. let grouped = group_by_stubs(harnesses, &harnesses_info); // Verify output. assert_eq!(grouped.len(), 2); assert!( grouped.contains(&vec![harness_1, harness_3]) || grouped.contains(&vec![harness_3, harness_1]) ); assert!(grouped.contains(&vec![harness_2])); } #[test] fn test_generate_metadata() { // Mock inputs. let name = "my_crate".to_string(); let crate_info = CrateInfo { name: name.clone(), output_path: PathBuf::default() }; let mut info = mock_info_with_stubs(Stubs::default()); info.metadata.attributes.proof = true; let id = mock_next_id(); let all_harnesses = HashMap::from([(id, info.clone())]); // Call generate metadata. let metadata = generate_metadata(&crate_info, &all_harnesses); // Check output. assert_eq!(metadata.crate_name, name); assert_eq!(metadata.proof_harnesses.len(), 1); assert_eq!(*metadata.proof_harnesses.first().unwrap(), info.metadata); } #[test] fn test_generate_empty_metadata() { // Mock inputs. let name = "my_crate".to_string(); let crate_info = CrateInfo { name: name.clone(), output_path: PathBuf::default() }; let all_harnesses = HashMap::new(); // Call generate metadata. let metadata = generate_metadata(&crate_info, &all_harnesses); // Check output. assert_eq!(metadata.crate_name, name); assert_eq!(metadata.proof_harnesses.len(), 0); } #[test] fn test_generate_metadata_with_multiple_harness() { // Mock inputs. let krate = "my_crate".to_string(); let crate_info = CrateInfo { name: krate.clone(), output_path: PathBuf::default() }; let harnesses = ["h1", "h2", "h3"]; let infos = harnesses.map(|harness| { let mut metadata = mock_metadata(harness.to_string(), krate.clone()); metadata.attributes.proof = true; (mock_next_id(), HarnessInfo { stub_map: Stubs::default(), metadata }) }); let all_harnesses = HashMap::from(infos.clone()); // Call generate metadata. let metadata = generate_metadata(&crate_info, &all_harnesses); // Check output. assert_eq!(metadata.crate_name, krate); assert_eq!(metadata.proof_harnesses.len(), infos.len()); assert!( metadata .proof_harnesses .iter() .all(|harness| harnesses.contains(&&*harness.pretty_name)) ); } }