''' EC2 Amazon Linux Kernel Autotuning configuration engine. Configuration engine picks up the configuration and applies these tunables onto the running instance. Configurations have to be generated by invoking ec2_instance_cfg_gen before calling ec2_instance_cfg_engine. If a valid configuration does not exist, then engine bails out and fails. Configuration engine saves system state and upon shutdown of ec2sys-autotune service, all earlier system settings are restored. ''' import os import sys import stat import json import glob from syslog import syslog try: from configparser import RawConfigParser except ImportError: # Backward compatibility with python versions earlier to 3.0 from ConfigParser import RawConfigParser from ec2sys_autotune.ec2_instance_fetch_cfg import fetch_configuration from ec2sys_autotune.ec2_autotune_utils import exec_cmds from ec2sys_autotune.ec2_autotune_utils import get_cmd_output from ec2sys_autotune.ec2_autotune_utils import get_piped_cmd_output from ec2sys_autotune.ec2_autotune_utils import read_sysfs_file from ec2sys_autotune.ec2_autotune_utils import write_sysfs_file # Exception from ec2sys_autotune.ec2_instance_exception import Ec2AutotuneError # Types of tunables being tuned by autotune SERVICE = "service" SYSCTL = "sysctl" SYSFS = "sysfs" CPU = "cpu" class Ec2InstanceCfgEngine(object): ''' Core configuration engine class to configure and restore system settings. Need to instantiate by passing in log file, config_dir, profile and user_profile. Recovery instances need only log file to replay from. ''' def __init__(self, log_file, config_dir=None, profile=None, user_profile=None): # In-memory log object to save system settings self.log_object = None # Dictionary of types of tunables and their service functions self.tunables = { # tunable : service functions SERVICE: {"get": self.get_service_state, "set": self.set_service_state}, SYSCTL: {"get": self.get_sysctl_value, "set": self.set_sysctl_value}, SYSFS: {"get": self.get_sysfs_value, "set": self.set_sysfs_value}, CPU: {"get": self.get_cpu_value, "set": self.set_cpu_value}} ''' Recovery instance do not need config_dir and profiles. However if this instance is going to configure tunables, then log file, config_dir, profile and user_profile need to be passed in. Validate passed in params. ''' self.recovery_instance = (config_dir is None and profile is None and user_profile is None) if (self.recovery_instance is False): # Instance to configure tunable # Bail out if auto generated profile doesn't exist self.auto_profile = "{0}/{1}.ini".format(config_dir, profile) if (os.path.isfile(self.auto_profile) is False): # Fatal error, auto profile doesn't exist raise Ec2AutotuneError( "Auto generated tunables file {0} does not " "exist.".format(self.auto_profile)) # Bail out if user config profile doesn't exist self.user_profile = user_profile if (os.path.isfile(self.user_profile) is False): # Fatal error, user profile doesn't exist raise Ec2AutotuneError( "User customized tunables file {0} " "does not exist.".format(self.user_profile)) # Validate the tunables to be set (dry run) try: self.validate_tunables_to_set() except Ec2AutotuneError, e: raise Ec2AutotuneError( "Validation of tunables to be set " "failed: {0}".format(e.msg)) # Read in entries from /etc/sysctl.d/*.conf tmp_lines = [] self.sysctl_conf = [] for filename in glob.glob("/etc/sysctl.d/*.conf"): tmp_lines = [ln.rstrip('\n') for ln in open(filename)] for ln in tmp_lines: # Strip comments and blank lines ln = ln.lstrip() if (len(ln) == 0 or ln[:1] == "#"): continue self.sysctl_conf.append(ln) self.log_file = log_file return def start_recovery_logging(self): ''' Start logging all tuned values for recovery at service shutdown ''' self.log_object = { SERVICE: [], SYSCTL: [], SYSFS: [], CPU: []} return def stop_recovery_logging(self): ''' Stop and commit all logs ''' # Save the log in json format on disk json_log = json.dumps(self.log_object, indent=4, separators=(',', ': ')) log_file = open(self.log_file, "wb") # Make sure log file is not accessed by anybody os.chmod(self.log_file, 0600) log_file.write(json_log) log_file.close() self.log_object = None return def set_tunable(self, tunable, cmd, get_tunable, log_index, new_value, orig_value): ''' Core function that actually does the job of setting tunable. orig_value will be passed only during recovery phase. orig_value param will be NULL during system configuration phase. ''' # This is the action of current function invocation action = "{0} = {1}".format(tunable, new_value) # Fetch current tunable value in the system try: current_value = get_tunable(tunable) except Ec2AutotuneError, e: syslog(e.msg) return ''' Current value is different from expected original value, then this tunable is being tweaked by system user and should be left untouched. ''' if (orig_value is not None and orig_value != current_value): return # If new value to be set is same as current value, nothing to set if (new_value == current_value): return try: # Set new value & log the new value for recovery exec_cmds(cmd) syslog("set {0}".format(action)) if (self.recovery_instance is False and current_value is not None): assert(orig_value is None) # Configuring system: log system changes tmp_log_object = {"Name": tunable, "Original": current_value, "Changed": new_value} self.log_object[log_index].append(tmp_log_object) except Ec2AutotuneError, e: syslog(e.msg) return def safe_set_tunable(self, tunable, data, get_tunable, log_index, new_value, orig_value): ''' This function has the same functionality as set_tunable. This version uses python's internal write library call to set the tunable instead of forking to shell. We should deprecate calling set_tunable as much as possible in future to avoid forking to shell. ''' ''' Core function that actually does the job of setting tunable. orig_value will be passed only during recovery phase. orig_value param will be NULL during system configuration phase. ''' # This is the action of current function invocation action = "{0} = {1}".format(tunable, new_value) # Fetch current tunable value in the system try: current_value = get_tunable(tunable) except Ec2AutotuneError, e: syslog(e.msg) return ''' Current value is different from expected original value, then this tunable is being tweaked by system user and should be left untouched. ''' if (orig_value is not None and orig_value != current_value): return # If new value to be set is same as current value, nothing to set if (new_value == current_value): return try: # Set new value & log the new value for recovery write_sysfs_file(tunable, data) syslog("set {0}".format(action)) if (self.recovery_instance is False and current_value is not None): assert(orig_value is None) # Configuring system: log system changes tmp_log_object = {"Name": tunable, "Original": current_value, "Changed": new_value} self.log_object[log_index].append(tmp_log_object) except Ec2AutotuneError, e: syslog(e.msg) return def parse_tunable_output(self, output): ''' Tunables values are in four formats (for tunables we are working): 1: number 2: [foo] bar 3: foo 4: number1\tnumber2\tnumber2 Second format above is tricky as the value set is the one enclosed within []. In cases like these, we need to extract the string enclosed between [] and return it to caller. For all format, convert the output into a list and return ''' if (output is None or len(output) == 0): return None output = output.strip() start = output.find("[") if (start != -1): end = output.find("]") if (end == -1): # Should never happen raise Ec2AutotuneError( "Parsing error of {0}".format(output)) output = output[start+1:end] output = output.split() if (output[0].isdigit() is True): output = map(int, output) return output def convert_input_value(self, value): ''' value will be a list, return string representation ''' if (isinstance(value, list) is False): raise Ec2AutotuneError( "input value is in an invalid format.") length = len(value) value = " ".join(map(str, value)) # Embed multiple values inside strings if (length > 1): value = "\"{0}\"".format(value) return value def get_service_state(self, service): ''' Returns the current status of service in the system ''' try: output = get_piped_cmd_output( "/bin/systemctl status {0}".format(service), "/bin/grep Active:") if ("running" in output): return (["start"]) elif ("dead" in output): return (["stop"]) else: raise Ec2AutotuneError( "{0} package not installed.".format(service)) except Ec2AutotuneError, e: raise e def set_service_state(self, service, new_state, orig_state=None): ''' Set the passed in service state. ''' try: tmp_new_state = self.convert_input_value(new_state) except Ec2AutotuneError, e: syslog("Failed to set {0} = {1}".format(service, e.msg)) return cmd = ["/bin/systemctl {0} {1}".format(tmp_new_state, service)] return self.set_tunable(service, cmd, self.get_service_state, SERVICE, new_state, orig_state) def get_sysctl_value(self, sysctl_setting): ''' Get value of a particular kernel setting ''' try: output = get_cmd_output("/sbin/sysctl {0}".format(sysctl_setting)) if (len(output) == 0): return None output = output.split("=")[1] return self.parse_tunable_output(output) except Ec2AutotuneError, e: raise e def set_sysctl_value(self, sysctl_setting, new_value, orig_value=None): ''' Set value of a particular kernel setting. ''' # If the value to be configured is also being modified as part of # /etc/sysctl.d/*.conf, then this is a conflict. for ln in self.sysctl_conf: if (sysctl_setting in ln): syslog("Skipping {0} as it conflicts with " "/etc/sysctl.d framework.".format(sysctl_setting)) return try: tmp_new_value = self.convert_input_value(new_value) except Ec2AutotuneError, e: syslog("Failed to set {0} = {1}".format(sysctl_setting, e.msg)) return cmd = ["/sbin/sysctl -q -w {0}={1}".format(sysctl_setting, tmp_new_value)] return self.set_tunable(sysctl_setting, cmd, self.get_sysctl_value, SYSCTL, new_value, orig_value) def get_sysfs_value(self, sysfs_file): ''' Get value of a particular sysfs setting ''' try: output = read_sysfs_file(sysfs_file) if (len(output) == 0): return None return self.parse_tunable_output(output) except Ec2AutotuneError, e: raise e def set_sysfs_value(self, sysfs_file, new_value, orig_value=None): ''' Set value of a particular sysfs setting. orig_value will be passed only during recovery phase. orig_value param will be NULL during system configuring phase. ''' if (os.path.isfile(sysfs_file) is False): syslog("invalid sysfs_file={0}".format(sysfs_file)) return try: tmp_new_value = self.convert_input_value(new_value) except Ec2AutotuneError, e: syslog("Failed to set {0} = {1}".format(sysfs_file, e.msg)) return return self.safe_set_tunable(sysfs_file, tmp_new_value, self.get_sysfs_value, SYSFS, new_value, orig_value) def get_cpu_value(self, cpu_state): ''' Get value of a particular CPU state ''' try: retcode = 0 output = None # Query frequency governor if (cpu_state == "p-state"): # Work only with intel_pstate driver driver = get_piped_cmd_output( "/bin/cpupower frequency-info --driver", "/bin/grep \"driver: intel_pstate\"") if(len(driver) > 0): # Return current governor being used output = get_piped_cmd_output( "/bin/cpupower frequency-info", "/bin/grep \"The governor\"") start = output.find("\"") if (start == -1): raise Ec2AutotuneError( "Parsing error of current " "frequency-info governor") end = output.find("\"", start+1) if (end == -1): raise Ec2AutotuneError( "Parsing error of current " "frequency-info governor") return ([output[start+1:end]]) # Query CPU idle state elif (cpu_state == "c-state"): # Work only with intel_idle driver driver = get_piped_cmd_output( "/bin/cpupower idle-info --silent", "/bin/grep \"driver: intel_idle\"") if(len(driver) > 0): # Number of idle states output = get_piped_cmd_output( "/bin/cpupower idle-info", "/bin/grep \"Number of idle states:\"") max_states = int(output[(output.index("states: ") + 8):]) # Available idle states idle_states = [] output = get_piped_cmd_output( "/bin/cpupower idle-info", "/bin/grep \"Available idle states:\"") beg = 0 end = len(output) for state in range(max_states): try: idx = output.rindex(" ", beg, end) + 1 except: raise Ec2AutotuneError( "Parsing error of available idle states") if (idx == -1): raise Ec2AutotuneError( "Parsing error of available idle states") idle_states.append(output[idx:end]) end = idx - 1 idle_states.reverse() # Return deepest enabled state output = get_piped_cmd_output( "/bin/cpupower idle-info", "/bin/grep DISABLED") if (len(output) == 0): # No state is disabled, return deepest state return ([idle_states[max_states - 1]]) else: index = output.index(" ") output = output[:index] return ([idle_states[idle_states.index(output) - 1]]) else: # State should always be either p-state or c-state raise Ec2AutotuneError( "Invalid {0} state".format(cpu_state)) except Ec2AutotuneError, e: raise e def set_cpu_value(self, cpu_state, new_value, orig_value=None): ''' Set value of a particular CPU state. orig_value will be passed only during recovery phase. orig_value param will be NULL during system configuring phase. We set this only on systems which has intel drivers (no acpi driver). ''' try: tmp_new_value = self.convert_input_value(new_value) except Ec2AutotuneError, e: syslog("Failed to set {0} = {1}".format(cpu_state, e.msg)) return if (cpu_state == "p-state"): ''' P-state: value has to be one of the supported governors by intel_pstate driver. ''' cmd = ["/bin/cpupower frequency-set -g {0}".format(tmp_new_value)] elif (cpu_state == "c-state"): ''' C-state: value has to be one of the supported idle states by intel_idle driver ''' idle_states = { # state : latency "POLL": 0, "C1": 2, "C1E": 10, "C3": 40, "C6": 133} if (not(tmp_new_value in idle_states)): raise Ec2AutotuneError( "Invalid value for c-state = " "{0}".format(tmp_new_value)) cmd = (["/bin/cpupower idle-set --enable-all", "/bin/cpupower idle-set --disable-by-latency {0}" .format(str(idle_states[tmp_new_value] + 1))]) else: raise Ec2AutotuneError( "Invalid CPU state {0}".format(cpu_state)) return self.set_tunable(cpu_state, cmd, self.get_cpu_value, CPU, new_value, orig_value) def configure_system(self, section, configure, dry_run=False): if (dry_run is True): fetch_configuration(self.auto_profile, self.user_profile, section, None, None, None, configure) else: fetch_configuration(self.auto_profile, self.user_profile, section, configure, None, configure, None) return def validate_tunables_to_set(self): ''' Function that validates the tunable before applying ''' # Recovery instance should not be trying to configure if (self.recovery_instance is True): raise Ec2AutotuneError( "Incorrect instantiation and object usage.") # Validate tunables for tunable, functions in self.tunables.items(): try: self.configure_system(tunable, functions["get"], True) except Ec2AutotuneError, e: raise e return def configure_system_settings(self): ''' Main function that configures the system ''' self.start_recovery_logging() # Recovery instance should not be trying to configure if (self.recovery_instance is True): raise Ec2AutotuneError( "Incorrect instantiation and object usage.") # Configure tunables and save their defaults for tunable, functions in self.tunables.items(): self.configure_system(tunable, functions["set"]) self.tunables = None self.stop_recovery_logging() return def restore_system(self, list_log, tunable, restore): ''' Core function used by restore_system_settings() to read-in individual log entries and restore. ''' tunable_list = list_log[tunable] for item in tunable_list: restore(item["Name"], item["Original"], item["Changed"]) return def restore_system_settings(self): ''' Function called during shutdown to restore system to eariler default ''' # Read log file for restoring to earlier state (json format) if (os.path.isfile(self.log_file) is False): raise Ec2AutotuneError( "Recovery log file does not exist, " "failed to revert original settings.") log_file = open(self.log_file, "rb") list_log = json.load(log_file) log_file.close() # Restore tunables to their earlier defaults for tunable, functions in self.tunables.items(): self.restore_system(list_log, tunable, functions["set"]) self.tunables = None return