/*! apiclient is a client for interacting with the Bottlerocket Update API. Brupop volume mounts apiclient binary into the agent container. Bottlerocket Update API: https://github.com/bottlerocket-os/bottlerocket/tree/develop/sources/updater Bottlerocket apiclient: https://github.com/bottlerocket-os/bottlerocket/tree/develop/sources/api/apiclient */ use semver::Version; use serde::Deserialize; use snafu::{ensure, ResultExt}; use std::process::{Command, Output}; use tokio::time::{sleep, Duration}; use tracing::{event, Level}; const API_CLIENT_BIN: &str = "apiclient"; const UPDATES_STATUS_URI: &str = "/updates/status"; const OS_URI: &str = "/os"; const REFRESH_UPDATES_URI: &str = "/actions/refresh-updates"; const PREPARE_UPDATES_URI: &str = "/actions/prepare-update"; const ACTIVATE_UPDATES_URI: &str = "/actions/activate-update"; const REBOOT_URI: &str = "/actions/reboot"; const MAX_ATTEMPTS: i8 = 5; const UPDATE_API_SLEEP_DURATION: Duration = Duration::from_millis(10000); const UPDATE_API_BUSY_STATUSCODE: &str = "423"; /// The module-wide result type. pub type Result = std::result::Result; /// UpdateState represents four states during system update process #[derive(Debug, Clone, Eq, PartialEq, Hash, Deserialize)] pub enum UpdateState { // Idle: Unknown Idle, // Available: available versions system can update to Available, // Staged: processing system update (refresh, prepare, activate) Staged, // Ready: successfully complete update commands, ready to reboot Ready, } /// UpdateCommand represents three commands to update system #[derive(Debug, Clone, Eq, PartialEq, Hash, Deserialize)] #[serde(rename_all = "lowercase")] pub enum UpdateCommand { // Refresh: refresh the list of available updates Refresh, // Prepare: request that the update be downloaded and applied to disk Prepare, // Activate: proceed to "activate" the update Activate, } /// CommandStatus represents three status after running update command #[derive(Debug, Clone, Eq, PartialEq, Hash, Deserialize)] enum CommandStatus { Success, Failed, Unknown, } #[derive(Debug, Deserialize)] pub struct StagedImage { image: Option, next_to_boot: bool, } #[derive(Debug, Deserialize)] pub struct CommandResult { cmd_type: UpdateCommand, cmd_status: CommandStatus, timestamp: String, exit_status: u32, stderr: String, } #[derive(Debug, Deserialize)] pub struct UpdateImage { pub(super) arch: String, pub(super) version: Version, pub(super) variant: String, } #[derive(Debug, Deserialize)] pub struct OsInfo { pub version_id: Version, } #[derive(Debug, Deserialize)] pub struct UpdateStatus { update_state: UpdateState, available_updates: Vec, chosen_update: Option, active_partition: Option, staging_partition: Option, most_recent_command: CommandResult, } fn get_raw_args(mut args: Vec) -> Vec { let mut subcommand_args = vec!["raw".to_string(), "-u".to_string()]; subcommand_args.append(&mut args); subcommand_args } /// Extract error statuscode from stderr string /// Error Example: /// "Failed POST request to '/actions/refresh-updates': Status 423 when POSTing /actions/refresh-updates: Update lock held\n" fn extract_status_code_from_error(error: &str) -> &str { let error_content_split_by_status: Vec<&str> = error.split("Status").collect(); let error_content_split_by_whitespace: Vec<&str> = error_content_split_by_status[1] .split_whitespace() .collect(); error_content_split_by_whitespace[0] } /// Apiclient binary has been volume mounted into the agent container, so agent is able to /// invoke `/bin apiclient` to interact with the Bottlerocket Update API. /// This function helps to invoke apiclient raw command. async fn invoke_apiclient(args: Vec) -> Result { let mut attempts: i8 = 0; // Retry up to 5 times in case the Update API is busy; Waiting 10 seconds between each attempt. while attempts < MAX_ATTEMPTS { let output = Command::new(API_CLIENT_BIN) .args(&args) .output() .context(apiclient_error::ApiClientRawCommandSnafu { args: args.clone() })?; match output.status.success() { true => return Ok(output), false => { // Return value `exit status` is Option. When the value has `some` value, we need extract error info from stderr and handle those errors. // Otherwise, on Unix, this will return `None` if the process was terminated by a signal. Signal termination is not considered a success. // Apiclient `Reboot` command will send signal to terminate the process, so we have to consider this situation and have extra logic to recognize // return value `None` as success and terminate the process properly. match output.status.code() { // when return value has `some` code, this part will handle those errors properly. Some(_code) => { let error_content = String::from_utf8_lossy(&output.stderr).to_string(); let error_statuscode = extract_status_code_from_error(&error_content); match error_statuscode { UPDATE_API_BUSY_STATUSCODE => { event!(Level::INFO, "API server busy, retrying later ..."); // Retry after ten seconds if we get a 423 Locked response (update API busy) sleep(UPDATE_API_SLEEP_DURATION).await; attempts += 1; } _ => { // API response was a non-transient error, bail out return apiclient_error::BadHttpResponseSnafu { args: args, error_content: &error_content, statuscode: error_statuscode, } .fail(); } }; } // when it returns `None`, this part will treat it as success and then gracefully exit brupop agent. _ => { event!( Level::INFO, "Bottlerocket node is terminated by reboot signal" ); std::process::exit(0) } } } } } // Update API is currently unavailable, bail out Err(apiclient_error::Error::UpdateApiUnavailable { args }) } pub async fn get_update_status() -> Result { // Refresh list of updates and check if there are any available refresh_updates().await?; let raw_args = vec![UPDATES_STATUS_URI.to_string()]; let update_status_output = invoke_apiclient(get_raw_args(raw_args)).await?; let update_status_string = String::from_utf8_lossy(&update_status_output.stdout).to_string(); let update_status: UpdateStatus = serde_json::from_str(&update_status_string) .context(apiclient_error::UpdateStatusContentSnafu)?; Ok(update_status) } /// extract OS info from the local system. For now this is just the version id. pub async fn get_os_info() -> Result { let raw_args = vec![OS_URI.to_string()]; let os_info_output = invoke_apiclient(get_raw_args(raw_args)).await?; let os_info_content_string = String::from_utf8_lossy(&os_info_output.stdout).to_string(); let os_info: OsInfo = serde_json::from_str(&os_info_content_string).context(apiclient_error::OsContentSnafu)?; Ok(os_info) } pub async fn refresh_updates() -> Result { let raw_args = vec![ REFRESH_UPDATES_URI.to_string(), "-m".to_string(), "POST".to_string(), ]; invoke_apiclient(get_raw_args(raw_args)).await } pub async fn prepare_update() -> Result<()> { let raw_args = vec![ PREPARE_UPDATES_URI.to_string(), "-m".to_string(), "POST".to_string(), ]; invoke_apiclient(get_raw_args(raw_args)).await?; Ok(()) } pub async fn activate_update() -> Result<()> { let raw_args = vec![ ACTIVATE_UPDATES_URI.to_string(), "-m".to_string(), "POST".to_string(), ]; invoke_apiclient(get_raw_args(raw_args)).await?; Ok(()) } pub async fn reboot() -> Result { let raw_args = vec![REBOOT_URI.to_string(), "-m".to_string(), "POST".to_string()]; invoke_apiclient(get_raw_args(raw_args)).await } // get chosen update which contains latest Bottlerocket OS can update to. pub async fn get_chosen_update() -> Result> { // Refresh list of updates and check if there are any available refresh_updates().await?; let update_status = get_update_status().await?; // Raise error if failed to refresh update or update acton performed out of band ensure!( update_status.most_recent_command.cmd_type == UpdateCommand::Refresh && update_status.most_recent_command.cmd_status == CommandStatus::Success, apiclient_error::RefreshUpdateSnafu ); Ok(update_status.chosen_update) } pub async fn prepare() -> Result<()> { let update_status = get_update_status().await?; ensure!( update_status.update_state == UpdateState::Available || update_status.update_state == UpdateState::Staged, apiclient_error::UpdateStageSnafu { expect_state: "Available or Staged".to_string(), update_state: update_status.update_state, }, ); // Download the update and apply it to the inactive partition prepare_update().await?; // Raise error if failed to prepare update or update action performed out of band let recent_command = get_update_status().await?.most_recent_command; ensure!( recent_command.cmd_type == UpdateCommand::Prepare || recent_command.cmd_status == CommandStatus::Success, apiclient_error::PrepareUpdateSnafu ); Ok(()) } pub async fn update() -> Result<()> { let update_status = get_update_status().await?; ensure!( update_status.update_state == UpdateState::Staged, apiclient_error::UpdateStageSnafu { expect_state: "Staged".to_string(), update_state: update_status.update_state, } ); // Activate the prepared update activate_update().await?; // Raise error if failed to activate update or update action performed out of band let recent_command = get_update_status().await?.most_recent_command; ensure!( recent_command.cmd_type == UpdateCommand::Activate || recent_command.cmd_status == CommandStatus::Success, apiclient_error::UpdateSnafu ); Ok(()) } // Reboot the host into the activated update pub async fn boot_update() -> Result { let update_status = get_update_status().await?; ensure!( update_status.update_state == UpdateState::Ready, apiclient_error::UpdateStageSnafu { expect_state: "Ready".to_string(), update_state: update_status.update_state, } ); reboot().await } pub mod apiclient_error { use crate::apiclient::UpdateState; use snafu::Snafu; #[derive(Debug, Snafu)] #[snafu(visibility(pub))] pub enum Error { #[snafu(display("Failed to run apiclient command apiclient {:?}: {}", args.join(" "), source))] ApiClientRawCommand { args: Vec, source: std::io::Error, }, #[snafu(display("Failed to deserialize Os info: {}", source))] OsContent { source: serde_json::Error }, #[snafu(display("Failed to deserialize update status: {}", source))] UpdateStatusContent { source: serde_json::Error }, #[snafu(display("Failed to refresh updates or update action performed out of band"))] RefreshUpdate {}, #[snafu(display("Failed to prepare update or update action performed out of band"))] PrepareUpdate {}, #[snafu(display("Failed to activate update or update action performed out of band"))] Update {}, #[snafu(display( "Unexpected update state: {:?}, expecting state to be {}. Update action performed out of band?", update_state, expect_state ))] UpdateStage { expect_state: String, update_state: UpdateState, }, #[snafu(display("Bad http response when running command {} due to status code {}. Error output: {}", args.join(" "), statuscode, error_content))] BadHttpResponse { args: Vec, error_content: String, statuscode: String, }, #[snafu(display("Unable to process command apiclient {}: update API unavailable: retries exhausted", args.join(" ")))] UpdateApiUnavailable { args: Vec }, #[snafu(display("Unable to parse version information: '{}'", source))] VersionParseError { source: semver::Error }, } }