# create amazon-cloudwatch namespace apiVersion: v1 kind: Namespace metadata: name: amazon-cloudwatch labels: name: amazon-cloudwatch # create configmap for prometheus cwagent config apiVersion: v1 data: # cwagent json config cwagentconfig.json: | { "logs": { "metrics_collected": { "prometheus": { "prometheus_config_path": "/etc/prometheusconfig/prometheus.yaml", "emf_processor": { "metric_declaration_dedup": false, "metric_declaration": [ { "source_labels": ["Service"], "label_matcher": ".*nginx.*", "dimensions": [["Service","Namespace","ClusterName"]], "metric_selectors": [ "^nginx_ingress_controller_requests$", "^nginx_ingress_controller_nginx_process_connections$", "^nginx_ingress_controller_nginx_process_connections_total$", "^nginx_ingress_controller_success$", "^nginx_ingress_controller_nginx_process_resident_memory_bytes$", "^nginx_ingress_controller_nginx_process_cpu_seconds_total$", "^nginx_ingress_controller_config_last_reload_successful$" ] }, { "source_labels": ["Service"], "label_matcher": ".*nginx.*", "dimensions": [["Service","Namespace","ClusterName","ingress"],["Service","Namespace","ClusterName","status"]], "metric_selectors": ["^nginx_ingress_controller_requests$"] }, { "source_labels": ["Service"], "label_matcher": ".*haproxy-ingress-controller.*", "dimensions": [["Service","Namespace","ClusterName","frontend","code"],["Service","Namespace","ClusterName","backend","code"]], "metric_selectors": [ "^haproxy_frontend_http_responses_total$", "^haproxy_backend_http_responses_total$" ] }, { "source_labels": ["Service"], "label_matcher": ".*haproxy-ingress-controller.*", "dimensions": [["Service","Namespace","ClusterName"]], "metric_selectors": [ "^haproxy_backend_up$", "^haproxy_backend_weight$", "^haproxy_backend_bytes_in_total$", "^haproxy_backend_bytes_out_total$", "^haproxy_backend_connections_total$", "^haproxy_backend_connection_errors_total$", "^haproxy_backend_response_errors_total$", "^haproxy_backend_redispatch_warnings_total$", "^haproxy_backend_retry_warnings_total$", "^haproxy_backend_current_queue$", "^haproxy_backend_current_sessions$", "^haproxy_backend_current_session_rate$", "^haproxy_backend_max_queue$", "^haproxy_frontend_http_responses_total$", "^haproxy_backend_http_responses_total$", "^haproxy_backend_max_sessions$", "^haproxy_backend_limit_sessions$", "^haproxy_backend_max_session_rate$", "^haproxy_frontend_bytes_out_total$", "^haproxy_frontend_bytes_in_total$", "^haproxy_frontend_connections_total$", "^haproxy_frontend_http_requests_total$", "^haproxy_frontend_request_errors_total$", "^haproxy_frontend_requests_denied_total$", "^haproxy_frontend_current_sessions$", "^haproxy_frontend_current_session_rate$", "^haproxy_frontend_max_sessions$", "^haproxy_frontend_limit_sessions$", "^haproxy_frontend_max_session_rate$", "^haproxy_frontend_limit_session_rate$" ] }, { "source_labels": ["Service"], "label_matcher": ".*memcached.*", "dimensions": [["Service","Namespace","ClusterName"]], "metric_selectors": [ "^memcached_current_bytes$", "^memcached_limit_bytes$", "^memcached_items_evicted_total$", "^memcached_items_reclaimed_total$", "^memcached_current_items$", "^memcached_read_bytes_total$", "^memcached_written_bytes_total$", "^memcached_current_connections$", "^memcached_commands_total$" ] }, { "source_labels": ["Service", "status", "command"], "label_matcher": ".*memcached.*;hit;get", "dimensions": [["Service","Namespace","ClusterName","status","command"]], "metric_selectors": [ "^memcached_commands_total$" ] }, { "source_labels": ["Service", "command"], "label_matcher": ".*memcached.*;(get|set)", "dimensions": [["Service","Namespace","ClusterName","command"]], "metric_selectors": [ "^memcached_commands_total$" ] }, { "source_labels": ["container_name"], "label_matcher": "^envoy$", "dimensions": [["ClusterName","Namespace"]], "metric_selectors": [ "^envoy_http_downstream_rq_total$", "^envoy_http_downstream_rq_xx$", "^envoy_cluster_upstream_cx_rx_bytes_total$", "^envoy_cluster_upstream_cx_tx_bytes_total$", "^envoy_cluster_membership_healthy$", "^envoy_cluster_membership_total$", "^envoy_server_memory_heap_size$", "^envoy_server_memory_allocated$", "^envoy_cluster_upstream_cx_connect_timeout$", "^envoy_cluster_upstream_rq_pending_failure_eject$", "^envoy_cluster_upstream_rq_pending_overflow$", "^envoy_cluster_upstream_rq_timeout$", "^envoy_cluster_upstream_rq_per_try_timeout$", "^envoy_cluster_upstream_rq_rx_reset$", "^envoy_cluster_upstream_cx_destroy_local_with_active_rq$", "^envoy_http_downstream_cx_destroy_remote_active_rq$", "^envoy_cluster_upstream_rq_maintenance_mode$", "^envoy_cluster_upstream_flow_control_paused_reading_total$", "^envoy_cluster_upstream_flow_control_resumed_reading_total$", "^envoy_cluster_upstream_flow_control_backed_up_total$", "^envoy_cluster_upstream_flow_control_drained_total$", "^envoy_cluster_upstream_rq_retry$", "^envoy_cluster_upstream_rq_retry_success$", "^envoy_cluster_upstream_rq_retry_overflow$", "^envoy_server_live$", "^envoy_server_uptime$", "^envoy_server_version$" ] }, { "source_labels": ["container_name"], "label_matcher": "^envoy$", "dimensions": [["ClusterName","Namespace","envoy_http_conn_manager_prefix","envoy_response_code_class"]], "metric_selectors": [ "^envoy_http_downstream_rq_xx$" ] }, { "source_labels": ["job"], "label_matcher": "^kubernetes-pod-jmx$", "dimensions": [["ClusterName","Namespace"]], "metric_selectors": [ "^jvm_threads_current$", "^jvm_threads_daemon$", "^jvm_classes_loaded$", "^java_lang_operatingsystem_freephysicalmemorysize$", "^java_lang_operatingsystem_totalphysicalmemorysize$", "^java_lang_operatingsystem_freeswapspacesize$", "^java_lang_operatingsystem_totalswapspacesize$", "^java_lang_operatingsystem_systemcpuload$", "^java_lang_operatingsystem_processcpuload$", "^java_lang_operatingsystem_availableprocessors$", "^java_lang_operatingsystem_openfiledescriptorcount$", "^catalina_manager_activesessions$", "^catalina_manager_rejectedsessions$", "^jvm_gc_collection_seconds_sum$", "^jvm_gc_collection_seconds_count$", "^catalina_globalrequestprocessor_bytesreceived$", "^catalina_globalrequestprocessor_bytessent$", "^catalina_globalrequestprocessor_requestcount$", "^catalina_globalrequestprocessor_errorcount$", "^catalina_globalrequestprocessor_processingtime$" ] }, { "source_labels": ["job"], "label_matcher": "^kubernetes-pod-jmx$", "dimensions": [["ClusterName","Namespace","area"]], "metric_selectors": [ "^jvm_memory_bytes_used$" ] }, { "source_labels": ["job"], "label_matcher": "^kubernetes-pod-jmx$", "dimensions": [["ClusterName","Namespace","pool"]], "metric_selectors": [ "^jvm_memory_pool_bytes_used$" ] }, { "source_labels": ["job"], "label_matcher": "prometheusdemo-dotnet", "dimensions": [["ClusterName","Namespace"]], "metric_selectors": [ "^process_cpu_seconds_total$", "^process_open_handles$", "^process_virtual_memory_bytes$", "^process_start_time_seconds$", "^process_private_memory_bytes$", "^process_working_set_bytes$", "^process_num_threads$" ] }, { "source_labels": ["job"], "label_matcher": "^prometheusdemo-dotnet$", "dimensions": [["ClusterName","Namespace"]], "metric_selectors": [ "^dotnet_total_memory_bytes$", "^dotnet_collection_count_total$", "^dotnet_gc_finalization_queue_length$", "^dotnet_jit_method_seconds_total$", "^dotnet_jit_method_total$", "^dotnet_threadpool_adjustments_total$", "^dotnet_threadpool_io_num_threads$", "^dotnet_threadpool_num_threads$", "^dotnet_gc_pinned_objects$" ] }, { "source_labels": ["job"], "label_matcher": "^prometheusdemo-dotnet$", "dimensions": [["ClusterName","Namespace","gc_heap"]], "metric_selectors": [ "^dotnet_gc_allocated_bytes_total$" ] }, { "source_labels": ["job"], "label_matcher": "prometheusdemo-dotnet", "dimensions": [["ClusterName","Namespace","app"]], "metric_selectors": [ "^PrometheusDemo_HomePage_Hit_Count$", "^PrometheusDemo_SiteVisitors_Gauge$", "^PrometheusDemo_ProductsPage_Hit_Count$" ] } ] } } }, "force_flush_interval": 5 } } kind: ConfigMap metadata: name: prometheus-cwagentconfig namespace: amazon-cloudwatch --- # create configmap for prometheus scrape config apiVersion: v1 data: # prometheus config prometheus.yaml: | global: evaluation_interval: 1m scrape_interval: 1m scrape_timeout: 10s scrape_configs: - job_name: 'kubernetes-pod-appmesh-envoy' sample_limit: 10000 metrics_path: /stats/prometheus kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: [__meta_kubernetes_pod_container_name] action: keep regex: '^envoy$' - source_labels: [__address__, __meta_kubernetes_pod_annotation_prometheus_io_port] action: replace regex: ([^:]+)(?::\d+)?;(\d+) replacement: ${1}:9901 target_label: __address__ - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - action: replace source_labels: - __meta_kubernetes_namespace target_label: Namespace - source_labels: [__meta_kubernetes_pod_name] action: replace target_label: pod_name - action: replace source_labels: - __meta_kubernetes_pod_container_name target_label: container_name - action: replace source_labels: - __meta_kubernetes_pod_controller_name target_label: pod_controller_name - action: replace source_labels: - __meta_kubernetes_pod_controller_kind target_label: pod_controller_kind - action: replace source_labels: - __meta_kubernetes_pod_phase target_label: pod_phase - job_name: kubernetes-service-endpoints sample_limit: 10000 kubernetes_sd_configs: - role: endpoints relabel_configs: - action: keep regex: true source_labels: - __meta_kubernetes_service_annotation_prometheus_io_scrape - action: replace regex: (https?) source_labels: - __meta_kubernetes_service_annotation_prometheus_io_scheme target_label: __scheme__ - action: replace regex: (.+) source_labels: - __meta_kubernetes_service_annotation_prometheus_io_path target_label: __metrics_path__ - action: replace regex: ([^:]+)(?::\d+)?;(\d+) replacement: $1:$2 source_labels: - __address__ - __meta_kubernetes_service_annotation_prometheus_io_port target_label: __address__ - action: labelmap regex: __meta_kubernetes_service_label_(.+) - action: replace source_labels: - __meta_kubernetes_namespace target_label: Namespace - action: replace source_labels: - __meta_kubernetes_service_name target_label: Service - action: replace source_labels: - __meta_kubernetes_pod_node_name target_label: kubernetes_node - action: replace source_labels: - __meta_kubernetes_pod_name target_label: pod_name - action: replace source_labels: - __meta_kubernetes_pod_container_name target_label: container_name - job_name: 'kubernetes-pod-jmx' sample_limit: 10000 metrics_path: /metrics kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: [__address__] action: keep regex: '.*:9404$' - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - action: replace source_labels: - __meta_kubernetes_namespace target_label: Namespace - source_labels: [__meta_kubernetes_pod_name] action: replace target_label: pod_name - action: replace source_labels: - __meta_kubernetes_pod_container_name target_label: container_name - action: replace source_labels: - __meta_kubernetes_pod_controller_name target_label: pod_controller_name - action: replace source_labels: - __meta_kubernetes_pod_controller_kind target_label: pod_controller_kind - action: replace source_labels: - __meta_kubernetes_pod_phase target_label: pod_phase - job_name: 'prometheusdemo-dotnet' sample_limit: 10000 metrics_path: /metrics kubernetes_sd_configs: - role: pod relabel_configs: - source_labels: [__address__] action: keep regex: '.*:80$' - action: labelmap regex: __meta_kubernetes_pod_label_(.+) - action: replace source_labels: - __meta_kubernetes_namespace target_label: Namespace - source_labels: [__meta_kubernetes_pod_name] action: replace target_label: pod_name - action: replace source_labels: - __meta_kubernetes_pod_container_name target_label: container_name - action: replace source_labels: - __meta_kubernetes_pod_controller_name target_label: pod_controller_name - action: replace source_labels: - __meta_kubernetes_pod_controller_kind target_label: pod_controller_kind - action: replace source_labels: - __meta_kubernetes_pod_phase target_label: pod_phase kind: ConfigMap metadata: name: prometheus-config namespace: amazon-cloudwatch --- # create cwagent service account and role binding apiVersion: v1 kind: ServiceAccount metadata: name: cwagent-prometheus namespace: amazon-cloudwatch --- kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 metadata: name: cwagent-prometheus-role rules: - apiGroups: [""] resources: - nodes - nodes/proxy - services - endpoints - pods verbs: ["get", "list", "watch"] - apiGroups: - extensions resources: - ingresses verbs: ["get", "list", "watch"] - nonResourceURLs: ["/metrics"] verbs: ["get"] --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: name: cwagent-prometheus-role-binding subjects: - kind: ServiceAccount name: cwagent-prometheus namespace: amazon-cloudwatch roleRef: kind: ClusterRole name: cwagent-prometheus-role apiGroup: rbac.authorization.k8s.io --- # Deployment apiVersion: apps/v1 kind: Deployment metadata: name: cwagent-prometheus namespace: amazon-cloudwatch spec: replicas: 1 selector: matchLabels: app: cwagent-prometheus template: metadata: labels: app: cwagent-prometheus spec: containers: - name: cloudwatch-agent image: amazon/cloudwatch-agent:1.243835.0-prometheus-beta imagePullPolicy: Always resources: limits: cpu: 1000m memory: 1000Mi requests: cpu: 200m memory: 200Mi # Please don't change below envs env: - name: CI_VERSION value: "k8s/1.1.0-prometheus-beta" # Please don't change the mountPath volumeMounts: - name: prometheus-cwagentconfig mountPath: /etc/cwagentconfig - name: prometheus-config mountPath: /etc/prometheusconfig volumes: - name: prometheus-cwagentconfig configMap: name: prometheus-cwagentconfig - name: prometheus-config configMap: name: prometheus-config terminationGracePeriodSeconds: 60 serviceAccountName: cwagent-prometheus