# BEGIN variables variable "credentials" { description = "path to the aws credentials file" default = "~/.aws/credentials" type = string } variable "profile" { description = "name of the aws config profile" default = "default" type = string } variable "cluster_name" { description = "unique name of the eks cluster" type = string } variable "k8s_version" { description = "kubernetes version" default = "1.27" type = string } variable "region" { description = "name of aws region to use" type = string } variable "azs" { description = "list of aws availabilty zones in aws region" type = list } variable "cidr_vpc" { description = "RFC 1918 CIDR range for EKS cluster VPC" default = "192.168.0.0/16" type = string } variable "cidr_private" { description = "RFC 1918 CIDR range list for EKS cluster VPC subnets" default = ["192.168.64.0/18", "192.168.128.0/18", "192.168.192.0/18"] type = list } variable "cidr_public" { description = "RFC 1918 CIDR range list for EKS cluster VPC subnets" default = ["192.168.0.0/24", "192.168.1.0/24", "192.168.2.0/24"] type = list } variable "efs_performance_mode" { default = "generalPurpose" type = string } variable "efs_throughput_mode" { description = "EFS performance mode" default = "bursting" type = string } variable "import_path" { description = "fsx for lustre s3 import path" type = string default = "" } variable "inference_max" { description = "Maximum inference nodes" type = string default = "2" } variable "inference_instance_type" { description = "GPU enabled instance types for inference. Must have 1 GPU." default = "g4dn.xlarge,g5.xlarge" type = string } variable "nodegroup_name" { description = "Training node group name in cluster" type = string default = "training" } variable "node_volume_size" { description = "EKS cluster worker node EBS volume size in GBs" default="200" type=string } variable "node_instance_type" { description = "GPU enabled instance types for training. Must have 8 GPUs." default = "p3dn.24xlarge" type = string } variable "key_pair" { description = "Name of EC2 key pair used to launch EKS cluster worker node EC2 instances" type = string default="" } variable "node_group_desired" { description = "EKS worker node auto-scaling group desired size" default = "0" type = string } variable "node_group_max" { description = "EKS worker node auto-scaling group maximum" default = "8" type = string } variable "node_group_min" { description = "EKS worker node auto-scaling group minimum" default = "0" type = string } variable "capacity_type" { description = "ON_DEMAND or SPOT capacity" default = "ON_DEMAND" type = string } # END variables provider "aws" { region = var.region shared_credentials_files = [var.credentials] profile = var.profile } data "aws_caller_identity" "current" {} resource "aws_vpc" "vpc" { cidr_block = var.cidr_vpc enable_dns_support = true enable_dns_hostnames = true tags = { Name = "${var.cluster_name}-vpc", } } resource "aws_subnet" "private" { count = length(var.azs) availability_zone = var.azs[count.index] cidr_block = var.cidr_private[count.index] vpc_id = aws_vpc.vpc.id tags = { Name = "${var.cluster_name}-subnet-${count.index}", "kubernetes.io/cluster/${var.cluster_name}" = "shared" } } resource "aws_subnet" "public" { count = length(var.azs) availability_zone = var.azs[count.index] cidr_block = var.cidr_public[count.index] vpc_id = aws_vpc.vpc.id tags = { Name = "${var.cluster_name}-subnet-${count.index}", "kubernetes.io/cluster/${var.cluster_name}" = "shared" } } resource "aws_internet_gateway" "igw" { vpc_id = aws_vpc.vpc.id tags = { Name = "${var.cluster_name}-igw" } } resource "aws_eip" "ip" { } resource "aws_nat_gateway" "ngw" { allocation_id = aws_eip.ip.id subnet_id = aws_subnet.public[0].id tags = { Name = "${var.cluster_name}-ngw" } depends_on = [aws_internet_gateway.igw, aws_subnet.public] } resource "aws_route_table" "private" { vpc_id = aws_vpc.vpc.id route { cidr_block = "0.0.0.0/0" gateway_id = aws_nat_gateway.ngw.id } tags = { Name = "${var.cluster_name}-private" } } resource "aws_route_table" "public" { vpc_id = aws_vpc.vpc.id route { cidr_block = "0.0.0.0/0" gateway_id = aws_internet_gateway.igw.id } tags = { Name = "${var.cluster_name}-public" } } resource "aws_route_table_association" "private" { count = length(var.azs) subnet_id = aws_subnet.private.*.id[count.index] route_table_id = aws_route_table.private.id } resource "aws_route_table_association" "public" { count = length(var.azs) subnet_id = aws_subnet.public.*.id[count.index] route_table_id = aws_route_table.public.id } resource "aws_iam_role" "cluster_role" { name = "${var.cluster_name}-control-role" assume_role_policy = <