# create amazon-cloudwatch namespace apiVersion: v1 kind: Namespace metadata: name: amazon-cloudwatch labels: name: amazon-cloudwatch --- # create cwagent service account and role binding apiVersion: v1 kind: ServiceAccount metadata: name: cloudwatch-agent namespace: amazon-cloudwatch annotations: eks.amazonaws.com/role-arn: {{cw_sa_role}} --- kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 metadata: name: cloudwatch-agent-role rules: - apiGroups: [""] resources: ["pods", "nodes", "endpoints"] verbs: ["list", "watch"] - apiGroups: ["apps"] resources: ["replicasets"] verbs: ["list", "watch"] - apiGroups: ["batch"] resources: ["jobs"] verbs: ["list", "watch"] - apiGroups: [""] resources: ["nodes/proxy"] verbs: ["get"] - apiGroups: [""] resources: ["nodes/stats", "configmaps", "events"] verbs: ["create"] - apiGroups: [""] resources: ["configmaps"] resourceNames: ["cwagent-clusterleader"] verbs: ["get","update"] --- kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: name: cloudwatch-agent-role-binding subjects: - kind: ServiceAccount name: cloudwatch-agent namespace: amazon-cloudwatch roleRef: kind: ClusterRole name: cloudwatch-agent-role apiGroup: rbac.authorization.k8s.io --- # create configmap for cwagent config apiVersion: v1 data: # Configuration is in Json format. No matter what configure change you make, # please keep the Json blob valid. cwagentconfig.json: | { "agent": { "region": "{{region_name}}" }, "logs": { "metrics_collected": { "kubernetes": { "cluster_name": "{{cluster_name}}", "metrics_collection_interval": 60 } }, "force_flush_interval": 5 } } kind: ConfigMap metadata: name: cwagentconfig namespace: amazon-cloudwatch --- # deploy cwagent as daemonset apiVersion: apps/v1 kind: DaemonSet metadata: name: cloudwatch-agent namespace: amazon-cloudwatch spec: selector: matchLabels: name: cloudwatch-agent template: metadata: labels: name: cloudwatch-agent spec: containers: - name: cloudwatch-agent image: public.ecr.aws/cloudwatch-agent/cloudwatch-agent:latest #ports: # - containerPort: 8125 # hostPort: 8125 # protocol: UDP resources: limits: cpu: 200m memory: 200Mi requests: cpu: 200m memory: 200Mi # Please don't change below envs env: - name: HOST_IP valueFrom: fieldRef: fieldPath: status.hostIP - name: HOST_NAME valueFrom: fieldRef: fieldPath: spec.nodeName - name: K8S_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace - name: CI_VERSION value: "k8s/1.3.2" # Please don't change the mountPath volumeMounts: - name: cwagentconfig mountPath: /etc/cwagentconfig - name: rootfs mountPath: /rootfs readOnly: true - name: dockersock mountPath: /var/run/docker.sock readOnly: true - name: varlibdocker mountPath: /var/lib/docker readOnly: true - name: sys mountPath: /sys readOnly: true - name: devdisk mountPath: /dev/disk readOnly: true volumes: - name: cwagentconfig configMap: name: cwagentconfig - name: rootfs hostPath: path: / - name: dockersock hostPath: path: /var/run/docker.sock - name: varlibdocker hostPath: path: /var/lib/docker - name: sys hostPath: path: /sys - name: devdisk hostPath: path: /dev/disk/ terminationGracePeriodSeconds: 60 serviceAccountName: cloudwatch-agent --- # create configmap for cluster name and aws region for CloudWatch Logs # need to replace the placeholders {{cluster_name}} and {{region_name}} # and need to replace {{http_server_toggle}} and {{http_server_port}} # and need to replace {{read_from_head}} and {{read_from_tail}} apiVersion: v1 data: cluster.name: {{cluster_name}} http.port: "2020" http.server: "On" logs.region: {{cluster_region}} read.head: "Off" read.tail: "On" kind: ConfigMap metadata: name: fluent-bit-cluster-info namespace: amazon-cloudwatch --- apiVersion: v1 kind: ServiceAccount metadata: name: fluent-bit namespace: amazon-cloudwatch annotations: eks.amazonaws.com/role-arn: {{cw_sa_role}} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: name: fluent-bit-role rules: - nonResourceURLs: - /metrics verbs: - get - apiGroups: [""] resources: - namespaces - pods - pods/logs verbs: ["get", "list", "watch"] --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding metadata: name: fluent-bit-role-binding roleRef: apiGroup: rbac.authorization.k8s.io kind: ClusterRole name: fluent-bit-role subjects: - kind: ServiceAccount name: fluent-bit namespace: amazon-cloudwatch --- apiVersion: v1 kind: ConfigMap metadata: name: fluent-bit-config namespace: amazon-cloudwatch labels: k8s-app: fluent-bit data: fluent-bit.conf: | [SERVICE] Flush 5 Log_Level info Daemon off Parsers_File parsers.conf HTTP_Server ${HTTP_SERVER} HTTP_Listen 0.0.0.0 HTTP_Port ${HTTP_PORT} storage.path /var/fluent-bit/state/flb-storage/ storage.sync normal storage.checksum off storage.backlog.mem_limit 5M @INCLUDE application-log.conf @INCLUDE dataplane-log.conf @INCLUDE host-log.conf application-log.conf: | [INPUT] Name tail Tag application.* Exclude_Path /var/log/containers/cloudwatch-agent*, /var/log/containers/fluent-bit*, /var/log/containers/aws-node*, /var/log/containers/kube-proxy* Path /var/log/containers/*.log Docker_Mode On Docker_Mode_Flush 5 Docker_Mode_Parser container_firstline Parser docker DB /var/fluent-bit/state/flb_container.db Mem_Buf_Limit 50MB Skip_Long_Lines On Refresh_Interval 10 Rotate_Wait 30 storage.type filesystem Read_from_Head ${READ_FROM_HEAD} [INPUT] Name tail Tag application.* Path /var/log/containers/fluent-bit* Parser docker DB /var/fluent-bit/state/flb_log.db Mem_Buf_Limit 5MB Skip_Long_Lines On Refresh_Interval 10 Read_from_Head ${READ_FROM_HEAD} [INPUT] Name tail Tag application.* Path /var/log/containers/cloudwatch-agent* Docker_Mode On Docker_Mode_Flush 5 Docker_Mode_Parser cwagent_firstline Parser docker DB /var/fluent-bit/state/flb_cwagent.db Mem_Buf_Limit 5MB Skip_Long_Lines On Refresh_Interval 10 Read_from_Head ${READ_FROM_HEAD} [FILTER] Name kubernetes Match application.* Kube_URL https://kubernetes.default.svc:443 Kube_Tag_Prefix application.var.log.containers. Merge_Log On Merge_Log_Key log_processed K8S-Logging.Parser On K8S-Logging.Exclude Off Labels Off Annotations Off [OUTPUT] Name cloudwatch_logs Match application.* region ${AWS_REGION} log_group_name /aws/containerinsights/${CLUSTER_NAME}/application log_stream_prefix ${HOST_NAME}- auto_create_group true extra_user_agent container-insights dataplane-log.conf: | [INPUT] Name systemd Tag dataplane.systemd.* Systemd_Filter _SYSTEMD_UNIT=docker.service DB /var/fluent-bit/state/systemd.db Path /var/log/journal Read_From_Tail ${READ_FROM_TAIL} [INPUT] Name tail Tag dataplane.tail.* Path /var/log/containers/aws-node*, /var/log/containers/kube-proxy* Docker_Mode On Docker_Mode_Flush 5 Docker_Mode_Parser container_firstline Parser docker DB /var/fluent-bit/state/flb_dataplane_tail.db Mem_Buf_Limit 50MB Skip_Long_Lines On Refresh_Interval 10 Rotate_Wait 30 storage.type filesystem Read_from_Head ${READ_FROM_HEAD} [FILTER] Name modify Match dataplane.systemd.* Rename _HOSTNAME hostname Rename _SYSTEMD_UNIT systemd_unit Rename MESSAGE message Remove_regex ^((?!hostname|systemd_unit|message).)*$ [FILTER] Name aws Match dataplane.* imds_version v1 [OUTPUT] Name cloudwatch_logs Match dataplane.* region ${AWS_REGION} log_group_name /aws/containerinsights/${CLUSTER_NAME}/dataplane log_stream_prefix ${HOST_NAME}- auto_create_group true extra_user_agent container-insights host-log.conf: | [INPUT] Name tail Tag host.dmesg Path /var/log/dmesg Parser syslog DB /var/fluent-bit/state/flb_dmesg.db Mem_Buf_Limit 5MB Skip_Long_Lines On Refresh_Interval 10 Read_from_Head ${READ_FROM_HEAD} [INPUT] Name tail Tag host.messages Path /var/log/messages Parser syslog DB /var/fluent-bit/state/flb_messages.db Mem_Buf_Limit 5MB Skip_Long_Lines On Refresh_Interval 10 Read_from_Head ${READ_FROM_HEAD} [INPUT] Name tail Tag host.secure Path /var/log/secure Parser syslog DB /var/fluent-bit/state/flb_secure.db Mem_Buf_Limit 5MB Skip_Long_Lines On Refresh_Interval 10 Read_from_Head ${READ_FROM_HEAD} [FILTER] Name aws Match host.* imds_version v1 [OUTPUT] Name cloudwatch_logs Match host.* region ${AWS_REGION} log_group_name /aws/containerinsights/${CLUSTER_NAME}/host log_stream_prefix ${HOST_NAME}. auto_create_group true extra_user_agent container-insights parsers.conf: | [PARSER] Name docker Format json Time_Key time Time_Format %Y-%m-%dT%H:%M:%S.%LZ [PARSER] Name syslog Format regex Regex ^(?<time>[^ ]* {1,2}[^ ]* [^ ]*) (?<host>[^ ]*) (?<ident>[a-zA-Z0-9_\/\.\-]*)(?:\[(?<pid>[0-9]+)\])?(?:[^\:]*\:)? *(?<message>.*)$ Time_Key time Time_Format %b %d %H:%M:%S [PARSER] Name container_firstline Format regex Regex (?<log>(?<="log":")\S(?!\.).*?)(?<!\\)".*(?<stream>(?<="stream":").*?)".*(?<time>\d{4}-\d{1,2}-\d{1,2}T\d{2}:\d{2}:\d{2}\.\w*).*(?=}) Time_Key time Time_Format %Y-%m-%dT%H:%M:%S.%LZ [PARSER] Name cwagent_firstline Format regex Regex (?<log>(?<="log":")\d{4}[\/-]\d{1,2}[\/-]\d{1,2}[ T]\d{2}:\d{2}:\d{2}(?!\.).*?)(?<!\\)".*(?<stream>(?<="stream":").*?)".*(?<time>\d{4}-\d{1,2}-\d{1,2}T\d{2}:\d{2}:\d{2}\.\w*).*(?=}) Time_Key time Time_Format %Y-%m-%dT%H:%M:%S.%LZ --- apiVersion: apps/v1 kind: DaemonSet metadata: name: fluent-bit namespace: amazon-cloudwatch labels: k8s-app: fluent-bit version: v1 kubernetes.io/cluster-service: "true" spec: selector: matchLabels: k8s-app: fluent-bit template: metadata: labels: k8s-app: fluent-bit version: v1 kubernetes.io/cluster-service: "true" spec: containers: - name: fluent-bit image: public.ecr.aws/aws-observability/aws-for-fluent-bit:stable imagePullPolicy: Always env: - name: AWS_REGION valueFrom: configMapKeyRef: name: fluent-bit-cluster-info key: logs.region - name: CLUSTER_NAME valueFrom: configMapKeyRef: name: fluent-bit-cluster-info key: cluster.name - name: HTTP_SERVER valueFrom: configMapKeyRef: name: fluent-bit-cluster-info key: http.server - name: HTTP_PORT valueFrom: configMapKeyRef: name: fluent-bit-cluster-info key: http.port - name: READ_FROM_HEAD valueFrom: configMapKeyRef: name: fluent-bit-cluster-info key: read.head - name: READ_FROM_TAIL valueFrom: configMapKeyRef: name: fluent-bit-cluster-info key: read.tail - name: HOST_NAME valueFrom: fieldRef: fieldPath: spec.nodeName - name: CI_VERSION value: "k8s/1.3.2" resources: limits: memory: 200Mi requests: cpu: 500m memory: 100Mi volumeMounts: # Please don't change below read-only permissions - name: fluentbitstate mountPath: /var/fluent-bit/state - name: varlog mountPath: /var/log readOnly: true - name: varlibdockercontainers mountPath: /var/lib/docker/containers readOnly: true - name: fluent-bit-config mountPath: /fluent-bit/etc/ - name: runlogjournal mountPath: /run/log/journal readOnly: true - name: dmesg mountPath: /var/log/dmesg readOnly: true terminationGracePeriodSeconds: 10 volumes: - name: fluentbitstate hostPath: path: /var/fluent-bit/state - name: varlog hostPath: path: /var/log - name: varlibdockercontainers hostPath: path: /var/lib/docker/containers - name: fluent-bit-config configMap: name: fluent-bit-config - name: runlogjournal hostPath: path: /run/log/journal - name: dmesg hostPath: path: /var/log/dmesg serviceAccountName: fluent-bit tolerations: - key: node-role.kubernetes.io/master operator: Exists effect: NoSchedule - operator: "Exists" effect: "NoExecute" - operator: "Exists" effect: "NoSchedule" --- # The `cluster-info` configmap in the `amazon-cloudwatch` namespace is used by the current # Python implementation for the `AwsEksResourceDetector` # # See here for info on AWS resource detectors: https://aws-otel.github.io/docs/getting-started/python-sdk/trace-manual-instr#using-the-aws-resource-detectors # AwsEksResourceDetector implementation: https://github.com/open-telemetry/opentelemetry-python-contrib/blob/main/sdk-extension/opentelemetry-sdk-extension-aws/src/opentelemetry/sdk/extension/aws/resource/eks.py # Cluster-info definition borrowed from FluentD setup: https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/Container-Insights-setup-EKS-quickstart.html#Container-Insights-setup-EKS-quickstart-Fluentd apiVersion: v1 data: cluster.name: {{cluster_name}} logs.region: {{region_name}} kind: ConfigMap metadata: name: cluster-info namespace: amazon-cloudwatch