# ------------------------------------------------------------------------------------------------- # # Real time visualization and monitoring of network bytes transferred via VPC endpoints, NAT Gateway and NAT instances # used for Databricks data transfer # ---Provisions CloudWatch Contributor Insights Rules for time series analysis of network bytes transferred # ---Provisions CloudWatch Metrics based on Contributor Insights rules # ---Provisions CloudWatch dashboard for visualizing time series analysis based on Contributor Insights rules # # # @kmmahaj # --------------------------------------------------------------------------------------------------- AWSTemplateFormatVersion: 2010-09-09 Description: Databricks network transfer visualizations using CloudWatch Contributor Insights Parameters: ContributorInsightRuleState: Type: String Description: Select to enable or disable the Contributor Insight Rules on Creation. Default: ENABLED AllowedValues: - ENABLED - DISABLED VPCFlowLogsCloudWatchLogGroup: Description: Name of the CloudWatch Log group for the VPC Flow logs Type: String Default: vpcloggroup-vpc1 VPCEndpointInterfaceId: Description: ENI identifier (eni-xxxx) for the VPC endpoint (Kinesis) Type: String Default: eni-01234123412341234 PrivateLinkVPCEndpointInterfaceId: Description: ENI identifier (eni-xxxx) for the Consumer side VPC endpoint (i.e. in the Consumer Managed VPC for Databricks) Type: String Default: eni-01234123412341234 NATInstancePrivateIP: Description: Private IP address of the NAT instance Type: String Default: '' S3GatewayPrivateIP: Description: Private IP address from the S3 Gateway endpoint prefixlist Type: String Default: '' NATGWPrivateIP: Description: Private IP address of the NAT GW Type: String Default: '' EmailAddress: Description: Email Address for notifications Type: String Default: admin@yourdomain.com TopicName: Description: SNS Topic Name Type: String Default: NetworkSNS Outputs: OutputDashboardURI: Description: Link to the CloudWatch Dashboard Value: !Sub 'https://console.aws.amazon.com/cloudwatch/home?region=${AWS::Region}#dashboards:name=${DatabricksCWDashboard}' Conditions: CreateVPCEndpointRule: !Not - !Equals - !Ref VPCEndpointInterfaceId - '' CreatePrivateLinkRule: !Not - !Equals - !Ref PrivateLinkVPCEndpointInterfaceId - '' CreateNATInstanceRule: !Not - !Equals - !Ref NATInstancePrivateIP - '' CreateNATGatewayRule: !Not - !Equals - !Ref NATGWPrivateIP - '' CreateS3GatewayRule: !Not - !Equals - !Ref S3GatewayPrivateIP - '' CreateDashboardRule: !Or - !Condition CreateVPCEndpointRule - !Condition CreatePrivateLinkRule - !Condition CreateNATInstanceRule - !Condition CreateNATGatewayRule - !Condition CreateS3GatewayRule Resources: # SNS topic for CloudWatch Alarm Notifications AlarmNotificationTopic: Type: 'AWS::SNS::Topic' Properties: DisplayName: !Ref TopicName TopicName: !Ref TopicName # Email Subscription for SNS topic AlarmEmailSubscription: Type: 'AWS::SNS::Subscription' Properties: Protocol: email Endpoint: !Ref EmailAddress TopicArn: !Ref AlarmNotificationTopic #=============================================================================================================================== # VPC Interface Endpoint (Kinesis) data transfer monitoring and visualization from AWS Kinesis to AWS Databricks data plane # (Sum of bytes transferred indexed on source and destination ip) #=============================================================================================================================== VPCEndpointBytesInsightRule: Type: AWS::CloudWatch::InsightRule Properties: RuleBody: !Sub | { "Schema": { "Name": "CloudWatchLogRule", "Version": 1 }, "LogGroupNames": [ "${VPCFlowLogsCloudWatchLogGroup}" ], "LogFormat": "CLF", "Fields": { "4": "srcaddr", "5": "dstaddr", "3": "interface_id", "10": "bytes" }, "Contribution": { "Filters": [ { "In": [ "${VPCEndpointInterfaceId}" ], "Match": "interface_id" } ], "Keys": [ "srcaddr", "dstaddr" ], "ValueOf": "bytes" }, "AggregateOn": "Sum" } RuleName: VPCEndpointBytesInsightRule RuleState: !Ref ContributorInsightRuleState #=============================================================================================================================== # NAT instance data transfer monitoring and visualization - Azure Databricks to AWS Databricks data plane # (Sum of bytes transferred by source and destination ip) #=============================================================================================================================== NATInstanceBytesInsightRule: Type: AWS::CloudWatch::InsightRule Properties: RuleBody: !Sub | { "Schema": { "Name": "CloudWatchLogRule", "Version": 1 }, "LogGroupNames": [ "${VPCFlowLogsCloudWatchLogGroup}" ], "LogFormat": "CLF", "Fields": { "4": "srcaddr", "5": "dstaddr", "3": "interface_id", "10": "bytes" }, "Contribution": { "Filters": [ { "In": [ "${NATInstancePrivateIP}" ], "Match": "srcaddr" } ], "Keys": [ "srcaddr", "dstaddr" ], "ValueOf": "bytes" }, "AggregateOn": "Sum" } RuleName: NATInstanceBytesInsightRule RuleState: !Ref ContributorInsightRuleState #=============================================================================================================================== # S3 Gateway data transfer monitoring and visualization - AWS Databricks data plane to S3 # (Sum of bytes transferred by source and destination ip) #=============================================================================================================================== S3GatewayBytesInsightRule: Type: AWS::CloudWatch::InsightRule Properties: RuleBody: !Sub | { "Schema": { "Name": "CloudWatchLogRule", "Version": 1 }, "LogGroupNames": [ "${VPCFlowLogsCloudWatchLogGroup}" ], "LogFormat": "CLF", "Fields": { "4": "srcaddr", "5": "dstaddr", "3": "interface_id", "10": "bytes" }, "Contribution": { "Filters": [ { "In": [ "${S3GatewayPrivateIP}" ], "Match": "srcaddr" } ], "Keys": [ "srcaddr", "dstaddr" ], "ValueOf": "bytes" }, "AggregateOn": "Sum" } RuleName: S3GatewayBytesInsightRule RuleState: !Ref ContributorInsightRuleState #=============================================================================================================================== # NAT Gateway data transfer monitoring and visualization - outbound from AWS Databricks data plane (to Databricks control plane) # (Sum of bytes transferred indexed on source and destination ip) #=============================================================================================================================== NATGatewayBytesInsightRule: Type: AWS::CloudWatch::InsightRule Properties: RuleBody: !Sub | { "Schema": { "Name": "CloudWatchLogRule", "Version": 1 }, "LogGroupNames": [ "${VPCFlowLogsCloudWatchLogGroup}" ], "LogFormat": "CLF", "Fields": { "4": "srcaddr", "5": "dstaddr", "3": "interface_id", "10": "bytes" }, "Contribution": { "Filters": [ { "In": [ "${NATGWPrivateIP}" ], "Match": "srcaddr" } ], "Keys": [ "srcaddr", "dstaddr" ], "ValueOf": "bytes" }, "AggregateOn": "Sum" } RuleName: NATGatewayBytesInsightRule RuleState: !Ref ContributorInsightRuleState #=============================================================================================================================== # Private Link data transfer monitoring and visualization from Databricks data plane to Databricks control plane # - Monitors total data transfer from the Consumer Endpoint # - This should replace NAT GW usage # (Sum of bytes transferred from the Consumer Endpoint for each sourceaddr-destaddr pair) #=============================================================================================================================== PrivateLinkBytesInsightRule: Type: AWS::CloudWatch::InsightRule Properties: RuleBody: !Sub | { "Schema": { "Name": "CloudWatchLogRule", "Version": 1 }, "LogGroupNames": [ "${VPCFlowLogsCloudWatchLogGroup}" ], "LogFormat": "CLF", "Fields": { "4": "srcaddr", "5": "dstaddr", "3": "interface_id", "10": "bytes" }, "Contribution": { "Filters": [ { "In": [ "${PrivateLinkVPCEndpointInterfaceId}" ], "Match": "interface_id" } ], "Keys": [ "srcaddr", "dstaddr" ], "ValueOf": "bytes" }, "AggregateOn": "Sum" } RuleName: PrivateLinkBytesInsightRule RuleState: !Ref ContributorInsightRuleState #=============================================================================================================================== # Private Link data transfer monitoring and visualization from Databricks data plane to Databricks control plane # - Monitors total # of connections from the Consumer Endpoint # - This should replace NAT GW usage # (Total # of TCP connections from the Consumer Endpoint from each source address) #=============================================================================================================================== PrivateLinkConnectionsInsightRule: Type: AWS::CloudWatch::InsightRule Properties: RuleBody: !Sub | { "Schema": { "Name": "CloudWatchLogRule", "Version": 1 }, "LogGroupNames": [ "${VPCFlowLogsCloudWatchLogGroup}" ], "LogFormat": "CLF", "Fields": { "4": "srcaddr", "5": "dstaddr", "3": "interface_id", "8": "protocol", "10": "bytes", "13": "action" }, "Contribution": { "Filters": [ { "In": [ "${PrivateLinkVPCEndpointInterfaceId}" ], "Match": "interface_id" }, { "Match": "protocol", "EqualTo": 6 } ], "Keys": [ "interface_id", "srcaddr" ] }, "AggregateOn": "Sum" } RuleName: PrivateLinkConnectionsInsightRule RuleState: !Ref ContributorInsightRuleState #=============================================================================================================================== # Private Link data transfer monitoring and visualization from Databricks data plane to Databricks control plane # - Monitors total # of connections from the Consumer Endpoint # - This should replace NAT GW usage # (Total # of TCP connections that were rejected from the Consumer Endpoint from each source address) #=============================================================================================================================== PrivateLinkRejectConnectionsInsightRule: Type: AWS::CloudWatch::InsightRule Properties: RuleBody: !Sub | { "Schema": { "Name": "CloudWatchLogRule", "Version": 1 }, "LogGroupNames": [ "${VPCFlowLogsCloudWatchLogGroup}" ], "LogFormat": "CLF", "Fields": { "4": "srcaddr", "5": "dstaddr", "3": "interface_id", "8": "protocol", "10": "bytes", "13": "action" }, "Contribution": { "Filters": [ { "In": [ "${PrivateLinkVPCEndpointInterfaceId}" ], "Match": "interface_id" }, { "Match": "protocol", "EqualTo": 6 }, { "In": [ "REJECT" ], "Match": "action" } ], "Keys": [ "interface_id", "srcaddr" ] }, "AggregateOn": "Sum" } RuleName: PrivateLinkRejectConnectionsInsightRule RuleState: !Ref ContributorInsightRuleState #==================================================================================================================== # CloudWatch Metrics and CloudWatch Alarms based on NAT instance CloudWatch Contributor Insights Rule #==================================================================================================================== NATInstanceBytesTransferredAlarm: Type: 'AWS::CloudWatch::Alarm' DependsOn: NATInstanceBytesInsightRule Properties: EvaluationPeriods: '1' AlarmDescription: Alarms when there is a high bytes transfer AlarmName: 'NATInstanceHighBytesTransferAlarm' ComparisonOperator: GreaterThanOrEqualToThreshold Threshold: 1000 AlarmActions: - !Ref AlarmNotificationTopic Metrics: - Expression: INSIGHT_RULE_METRIC("NATInstanceBytesInsightRule", "Sum") Id: m1 Period: 60 Label: High_Bytes_Transfer ReturnData: True #==================================================================================================================== # Centralized CloudWatch Dashboard #==================================================================================================================== DatabricksCWDashboard: Type: AWS::CloudWatch::Dashboard Condition: CreateDashboardRule Properties: DashboardBody: !Sub | { "widgets": [ { "type": "metric", "x": 0, "y": 12, "width": 12, "height": 6, "properties": { "period": 60, "insightRule": { "maxContributorCount": 50, "orderBy": "Sum", "ruleName": "${VPCEndpointBytesInsightRule.RuleName}" }, "stacked": false, "view": "timeSeries", "yAxis": { "left": { "showUnits": false }, "right": { "showUnits": false } }, "region": "${AWS::Region}", "title": "VPC Endpoints Bytes - Kinesis to Databricks", "legend": { "position": "right" } } }, { "type": "metric", "x": 0, "y": 12, "width": 12, "height": 6, "properties": { "period": 60, "insightRule": { "maxContributorCount": 50, "orderBy": "Sum", "ruleName": "${S3GatewayBytesInsightRule.RuleName}" }, "stacked": false, "view": "timeSeries", "yAxis": { "left": { "showUnits": false }, "right": { "showUnits": false } }, "region": "${AWS::Region}", "title": "Bytes Transfer - AWS Databricks Data Plane to S3", "legend": { "position": "right" } } }, { "type": "metric", "x": 0, "y": 12, "width": 12, "height": 6, "properties": { "period": 60, "insightRule": { "maxContributorCount": 50, "orderBy": "Sum", "ruleName": "${PrivateLinkBytesInsightRule.RuleName}" }, "stacked": false, "view": "timeSeries", "yAxis": { "left": { "showUnits": false }, "right": { "showUnits": false } }, "region": "${AWS::Region}", "title": "Bytes Transfer - AWS Databricks to Databricks Control Plane", "legend": { "position": "right" } } }, { "type": "metric", "x": 0, "y": 12, "width": 12, "height": 6, "properties": { "period": 60, "insightRule": { "maxContributorCount": 50, "orderBy": "Sum", "ruleName": "${NATInstanceBytesInsightRule.RuleName}" }, "stacked": false, "view": "timeSeries", "yAxis": { "left": { "showUnits": false }, "right": { "showUnits": false } }, "region": "${AWS::Region}", "title": "Bytes Transfer - Azure to AWS Databricks", "legend": { "position": "right" } } }, { "type": "metric", "x": 0, "y": 12, "width": 12, "height": 6, "properties": { "period": 60, "insightRule": { "maxContributorCount": 50, "orderBy": "Sum", "ruleName": "${NATGatewayBytesInsightRule.RuleName}" }, "stacked": false, "view": "timeSeries", "yAxis": { "left": { "showUnits": false }, "right": { "showUnits": false } }, "region": "${AWS::Region}", "title": "Bytes Transfer - AWS Databricks to Databricks Control Plane", "legend": { "position": "right" } } } ] } DashboardName: DatabricksCWDashboard