# NOTE: replace {{cluster_name}} with actual cluster name in both extensions and exporter config. extensions: ecs_observer: cluster_name: '{{cluster_name}}' # cluster name requires manual config cluster_region: 'us-west-2' # region can be configured directly or use AWS_REGION env var result_file: '/etc/ecs_sd_targets.yaml' # the directory for file must already exists refresh_interval: 60s # format is https://golang.org/pkg/time/#ParseDuration # custom name for 'job' so we can rename it back to 'job' using metricstransform processor job_label_name: prometheus_job # NGINX https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/ContainerInsights-Prometheus-Setup-nginx-ecs.html services: - name_pattern: '^.*nginx-plus-service$' metrics_ports: - 9113 job_name: nginx-plus-prometheus-exporter - name_pattern: '^.*nginx-service$' metrics_ports: - 9113 job_name: nginx-prometheus-exporter # JMX docker_labels: - port_label: 'ECS_PROMETHEUS_EXPORTER_PORT' # App Mesh port and metrics are from envoy sidecar task_definitions: - arn_pattern: '.*:task-definition/.*-ColorTeller-(white):[0-9]+' metrics_path: '/stats/prometheus' metrics_ports: - 9901 job_name: ecs-appmesh-colorteller - arn_pattern: '.*:task-definition/.*-ColorGateway:[0-9]+' metrics_path: '/stats/prometheus' metrics_ports: - 9901 job_name: ecs-appmesh-colorgateway receivers: prometheus: config: scrape_configs: - job_name: "ecssd" file_sd_configs: - files: - '/etc/ecs_sd_targets.yaml' # MUST match the file name in ecs_observer.result_file relabel_configs: # Relabel here because label with __ prefix will be dropped by receiver. - source_labels: [ __meta_ecs_cluster_name ] # ClusterName action: replace target_label: ClusterName - source_labels: [ __meta_ecs_service_name ] # ServiceName action: replace target_label: ServiceName - source_labels: [ __meta_ecs_task_definition_family ] # TaskDefinitionFamily action: replace target_label: TaskDefinitionFamily - source_labels: [ __meta_ecs_task_launch_type ] # LaunchType action: replace target_label: LaunchType - source_labels: [ __meta_ecs_container_name ] # container_name action: replace target_label: container_name - action: labelmap # Convert docker labels on container to metric labels regex: ^__meta_ecs_container_labels_(.+)$ # Capture the key using regex, e.g. __meta_ecs_container_labels_Java_EMF_Metrics -> Java_EMF_Metrics replacement: '$$1' processors: resource: attributes: - key: receiver # Insert receiver: prometheus for CloudWatch EMF Exporter to add prom_metric_type value: "prometheus" action: insert metricstransform: transforms: - include: ".*" # Rename customized job label back to job match_type: regexp action: update operations: - label: prometheus_job # must match the value configured in ecs_observer new_label: job action: update_label exporters: awsemf: namespace: ECS/ContainerInsights/Prometheus # Use the exact namespace for builtin dashboard to work log_group_name: "/aws/ecs/containerinsights/{{cluster_name}}/prometheus" # Log group name format is fixed as well, the only variable is cluster name dimension_rollup_option: NoDimensionRollup metric_declarations: # NGINX - dimensions: [ [ ClusterName, TaskDefinitionFamily, ServiceName ] ] label_matchers: - label_names: - ServiceName regex: '^.*nginx-service$' metric_name_selectors: - "^nginx_.*$" # NGINX Plus - dimensions: [ [ ClusterName, TaskDefinitionFamily, ServiceName ] ] label_matchers: - label_names: - ServiceName regex: '^.*nginx-plus-service$' metric_name_selectors: - "^nginxplus_connections_accepted$" - "^nginxplus_connections_active$" - "^nginxplus_connections_dropped$" - "^nginxplus_connections_idle$" - "^nginxplus_http_requests_total$" - "^nginxplus_ssl_handshakes$" - "^nginxplus_ssl_handshakes_failed$" - "^nginxplus_up$" - "^nginxplus_upstream_server_health_checks_fails$" - dimensions: [ [ ClusterName, TaskDefinitionFamily, ServiceName, upstream ] ] label_matchers: - label_names: - ServiceName regex: '^.*nginx-plus-service$' metric_name_selectors: - "^nginxplus_upstream_server_response_time$" - dimensions: [ [ ClusterName, TaskDefinitionFamily, ServiceName, code ] ] label_matchers: - label_names: - ServiceName regex: '^.*nginx-plus-service$' metric_name_selectors: - "^nginxplus_upstream_server_responses$" - "^nginxplus_server_zone_responses$" # JMX - dimensions: [ [ ClusterName, TaskDefinitionFamily, area ] ] label_matchers: - label_names: - Java_EMF_Metrics regex: ^true$ metric_name_selectors: - "^jvm_memory_bytes_used$" - dimensions: [ [ ClusterName, TaskDefinitionFamily, pool ] ] label_matchers: - label_names: - Java_EMF_Metrics regex: ^true$ metric_name_selectors: - "^jvm_memory_pool_bytes_used$" - dimensions: [ [ ClusterName, TaskDefinitionFamily ] ] label_matchers: - label_names: - Java_EMF_Metrics regex: ^true$ metric_name_selectors: - "^jvm_threads_(current|daemon)$" - "^jvm_classes_loaded$" - "^java_lang_operatingsystem_(freephysicalmemorysize|totalphysicalmemorysize|freeswapspacesize|totalswapspacesize|systemcpuload|processcpuload|availableprocessors|openfiledescriptorcount)$" - "^catalina_manager_(rejectedsessions|activesessions)$" - "^jvm_gc_collection_seconds_(count|sum)$" - "^catalina_globalrequestprocessor_(bytesreceived|bytessent|requestcount|errorcount|processingtime)$" # AppMesh Envoy - dimensions: [ [ "ClusterName","TaskDefinitionFamily" ] ] label_matchers: - label_names: - container_name regex: ^envoy$ metric_name_selectors: - "^envoy_http_downstream_rq_(total|xx)$" - "^envoy_cluster_upstream_cx_(r|t)x_bytes_total$" - "^envoy_cluster_membership_(healthy|total)$" - "^envoy_server_memory_(allocated|heap_size)$" - "^envoy_cluster_upstream_cx_(connect_timeout|destroy_local_with_active_rq)$" - "^envoy_cluster_upstream_rq_(pending_failure_eject|pending_overflow|timeout|per_try_timeout|rx_reset|maintenance_mode)$" - "^envoy_http_downstream_cx_destroy_remote_active_rq$" - "^envoy_cluster_upstream_flow_control_(paused_reading_total|resumed_reading_total|backed_up_total|drained_total)$" - "^envoy_cluster_upstream_rq_retry$" - "^envoy_cluster_upstream_rq_retry_(success|overflow)$" - "^envoy_server_(version|uptime|live)$" - dimensions: [ [ "ClusterName","TaskDefinitionFamily","envoy_http_conn_manager_prefix","envoy_response_code_class" ] ] label_matchers: - label_names: - container_name regex: ^envoy$ metric_name_selectors: - "^envoy_http_downstream_rq_xx$" service: extensions: [ ecs_observer ] pipelines: metrics: receivers: [ prometheus ] processors: [ resource, metricstransform ] exporters: [ awsemf ]