apiVersion: v1 kind: ConfigMap metadata: name: adot-collector-config namespace: ${Namespace} labels: app: aws-adot component: adot-collector-config data: adot-collector-config: | receivers: prometheus: config: global: scrape_interval: 1m scrape_timeout: 50s scrape_configs: - job_name: 'kubelets-cadvisor-metrics' sample_limit: 10000 scheme: https kubernetes_sd_configs: - role: node tls_config: ca_file: /var/run/secrets/kubernetes.io/serviceaccount/ca.crt insecure_skip_verify: true bearer_token_file: /var/run/secrets/kubernetes.io/serviceaccount/token relabel_configs: - action: labelmap regex: __meta_kubernetes_node_label_(.+) # Only for Kubernetes ^1.7.3. # See: https://github.com/prometheus/prometheus/issues/2916 - target_label: __address__ # Changes the address to Kube API server's default address and port replacement: kubernetes.default.svc:443 - source_labels: [__meta_kubernetes_node_name] regex: (.+) target_label: __metrics_path__ # Changes the default metrics path to kubelet's proxy cadvdisor metrics endpoint replacement: /api/v1/nodes/$$${1}/proxy/metrics/cadvisor metric_relabel_configs: # extract readable container/pod name from id field - action: replace source_labels: [id] regex: '^/machine\.slice/machine-rkt\\x2d([^\\]+)\\.+/([^/]+)\.service$' target_label: rkt_container_name replacement: '$$${2}-$$${1}' - action: replace source_labels: [id] regex: '^/system\.slice/(.+)\.service$' target_label: systemd_service_name replacement: '$$${1}' processors: # rename labels which apply to all metrics and are used in metricstransform/rename processor metricstransform/label_1: transforms: - include: .* match_type: regexp action: update operations: - action: update_label label: name new_label: container_id - action: update_label label: kubernetes_io_hostname new_label: NodeName - action: update_label label: eks_amazonaws_com_compute_type new_label: LaunchType # rename container and pod metrics which we care about. # container metrics are renamed to `new_container_*` to differentiate them with unused container metrics metricstransform/rename: transforms: - include: container_spec_cpu_quota new_name: new_container_cpu_limit_raw action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_spec_cpu_shares new_name: new_container_cpu_request action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_cpu_usage_seconds_total new_name: new_container_cpu_usage_seconds_total action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_spec_memory_limit_bytes new_name: new_container_memory_limit action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_cache new_name: new_container_memory_cache action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_max_usage_bytes new_name: new_container_memory_max_usage action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_usage_bytes new_name: new_container_memory_usage action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_working_set_bytes new_name: new_container_memory_working_set action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_rss new_name: new_container_memory_rss action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_swap new_name: new_container_memory_swap action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_failcnt new_name: new_container_memory_failcnt action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_failures_total new_name: new_container_memory_hierarchical_pgfault action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate", "failure_type": "pgfault", "scope": "hierarchy"} - include: container_memory_failures_total new_name: new_container_memory_hierarchical_pgmajfault action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate", "failure_type": "pgmajfault", "scope": "hierarchy"} - include: container_memory_failures_total new_name: new_container_memory_pgfault action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate", "failure_type": "pgfault", "scope": "container"} - include: container_memory_failures_total new_name: new_container_memory_pgmajfault action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate", "failure_type": "pgmajfault", "scope": "container"} - include: container_fs_limit_bytes new_name: new_container_filesystem_capacity action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_fs_usage_bytes new_name: new_container_filesystem_usage action: insert match_type: regexp experimental_match_labels: {"container": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} # POD LEVEL METRICS - include: container_spec_cpu_quota new_name: pod_cpu_limit_raw action: insert match_type: regexp experimental_match_labels: {"image": "^$", "container": "^$", "pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_spec_cpu_shares new_name: pod_cpu_request action: insert match_type: regexp experimental_match_labels: {"image": "^$", "container": "^$", "pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_cpu_usage_seconds_total new_name: pod_cpu_usage_seconds_total action: insert match_type: regexp experimental_match_labels: {"image": "^$", "container": "^$", "pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_spec_memory_limit_bytes new_name: pod_memory_limit action: insert match_type: regexp experimental_match_labels: {"image": "^$", "container": "^$", "pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_cache new_name: pod_memory_cache action: insert match_type: regexp experimental_match_labels: {"image": "^$", "container": "^$", "pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_max_usage_bytes new_name: pod_memory_max_usage action: insert match_type: regexp experimental_match_labels: {"image": "^$", "container": "^$", "pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_usage_bytes new_name: pod_memory_usage action: insert match_type: regexp experimental_match_labels: {"image": "^$", "container": "^$", "pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_working_set_bytes new_name: pod_memory_working_set action: insert match_type: regexp experimental_match_labels: {"image": "^$", "container": "^$", "pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_rss new_name: pod_memory_rss action: insert match_type: regexp experimental_match_labels: {"image": "^$", "container": "^$", "pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_swap new_name: pod_memory_swap action: insert match_type: regexp experimental_match_labels: {"image": "^$", "container": "^$", "pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_failcnt new_name: pod_memory_failcnt action: insert match_type: regexp experimental_match_labels: {"image": "^$", "container": "^$", "pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_memory_failures_total new_name: pod_memory_hierarchical_pgfault action: insert match_type: regexp experimental_match_labels: {"image": "^$", "container": "^$", "pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate", "failure_type": "pgfault", "scope": "hierarchy"} - include: container_memory_failures_total new_name: pod_memory_hierarchical_pgmajfault action: insert match_type: regexp experimental_match_labels: {"image": "^$", "container": "^$", "pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate", "failure_type": "pgmajfault", "scope": "hierarchy"} - include: container_memory_failures_total new_name: pod_memory_pgfault action: insert match_type: regexp experimental_match_labels: {"image": "^$", "container": "^$", "pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate", "failure_type": "pgfault", "scope": "container"} - include: container_memory_failures_total new_name: pod_memory_pgmajfault action: insert match_type: regexp experimental_match_labels: {"image": "^$", "container": "^$", "pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate", "failure_type": "pgmajfault", "scope": "container"} - include: container_network_receive_bytes_total new_name: pod_network_rx_bytes action: insert match_type: regexp experimental_match_labels: {"pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_network_receive_packets_dropped_total new_name: pod_network_rx_dropped action: insert match_type: regexp experimental_match_labels: {"pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_network_receive_errors_total new_name: pod_network_rx_errors action: insert match_type: regexp experimental_match_labels: {"pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_network_receive_packets_total new_name: pod_network_rx_packets action: insert match_type: regexp experimental_match_labels: {"pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_network_transmit_bytes_total new_name: pod_network_tx_bytes action: insert match_type: regexp experimental_match_labels: {"pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_network_transmit_packets_dropped_total new_name: pod_network_tx_dropped action: insert match_type: regexp experimental_match_labels: {"pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_network_transmit_errors_total new_name: pod_network_tx_errors action: insert match_type: regexp experimental_match_labels: {"pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} - include: container_network_transmit_packets_total new_name: pod_network_tx_packets action: insert match_type: regexp experimental_match_labels: {"pod": "(.|\\s)*\\S(.|\\s)*", "LaunchType": "fargate"} # filter out only renamed metrics which we care about filter: metrics: include: match_type: regexp metric_names: - new_container_.* - pod_.* # converts cumulative sum to delta cumulativetodelta: include: metrics: - new_container_cpu_usage_seconds_total - pod_cpu_usage_seconds_total - pod_memory_pgfault - pod_memory_pgmajfault - pod_memory_hierarchical_pgfault - pod_memory_hierarchical_pgmajfault - pod_network_rx_bytes - pod_network_rx_dropped - pod_network_rx_errors - pod_network_rx_packets - pod_network_tx_bytes - pod_network_tx_dropped - pod_network_tx_errors - pod_network_tx_packets - new_container_memory_pgfault - new_container_memory_pgmajfault - new_container_memory_hierarchical_pgfault - new_container_memory_hierarchical_pgmajfault match_type: strict # converts cumulative delta to rate deltatorate: metrics: - new_container_cpu_usage_seconds_total - pod_cpu_usage_seconds_total - pod_memory_pgfault - pod_memory_pgmajfault - pod_memory_hierarchical_pgfault - pod_memory_hierarchical_pgmajfault - pod_network_rx_bytes - pod_network_rx_dropped - pod_network_rx_errors - pod_network_rx_packets - pod_network_tx_bytes - pod_network_tx_dropped - pod_network_tx_errors - pod_network_tx_packets - new_container_memory_pgfault - new_container_memory_pgmajfault - new_container_memory_hierarchical_pgfault - new_container_memory_hierarchical_pgmajfault experimental_metricsgeneration/1: rules: - name: pod_network_total_bytes unit: Bytes/Second type: calculate metric1: pod_network_rx_bytes metric2: pod_network_tx_bytes operation: add - name: pod_memory_utilization_over_pod_limit_${TestingId} unit: Percent type: calculate metric1: pod_memory_working_set metric2: pod_memory_limit operation: percent - name: pod_cpu_usage_total unit: Millicore type: scale metric1: pod_cpu_usage_seconds_total operation: multiply # core to millicore: multiply by 1000 # millicore seconds to millicore nanoseconds: multiply by 10^9 scale_by: 1000 - name: pod_cpu_limit unit: Millicore type: scale metric1: pod_cpu_limit_raw operation: divide scale_by: 100 experimental_metricsgeneration/2: rules: - name: pod_cpu_utilization_over_pod_limit_${TestingId} type: calculate unit: Percent metric1: pod_cpu_usage_total metric2: pod_cpu_limit operation: percent # add `Type` and rename metrics and labels metricstransform/label_2: transforms: - include: pod_.* match_type: regexp action: update operations: - action: add_label new_label: Type new_value: "Pod" - include: new_container_.* match_type: regexp action: update operations: - action: add_label new_label: Type new_value: Container - include: .* match_type: regexp action: update operations: - action: update_label label: namespace new_label: Namespace - action: update_label label: pod new_label: PodName - include: ^new_container_(.*)$$ match_type: regexp action: update new_name: container_$$1 # add cluster name from env variable and EKS metadata resourcedetection: detectors: [env, eks] batch: timeout: 60s exporters: awsemf: log_group_name: '/aws/containerinsights/{ClusterName}/performance' log_stream_name: '{PodName}-aug-24-04' namespace: 'ContainerInsightsFargate' region: us-west-2 resource_to_telemetry_conversion: enabled: true eks_fargate_container_insights_enabled: true parse_json_encoded_attr_values: ["kubernetes"] dimension_rollup_option: NoDimensionRollup metric_declarations: - dimensions: [ [ClusterName, LaunchType], [ClusterName, Namespace, LaunchType], [ClusterName, Namespace, PodName, LaunchType]] metric_name_selectors: - pod_cpu_utilization_over_pod_limit_${TestingId} - pod_memory_utilization_over_pod_limit_${TestingId} - pod_memory_working_set - pod_memory_limit - pod_network_rx_bytes - pod_network_tx_bytes logging: verbosity: detailed extensions: health_check: service: pipelines: metrics: receivers: [prometheus] processors: [metricstransform/label_1, resourcedetection, metricstransform/rename, filter, cumulativetodelta, deltatorate, experimental_metricsgeneration/1, experimental_metricsgeneration/2, metricstransform/label_2, batch] exporters: [logging, awsemf] extensions: [health_check]