package tinkerbell import ( "context" "errors" "fmt" "time" etcdv1 "github.com/aws/etcdadm-controller/api/v1beta1" tinkv1alpha1 "github.com/tinkerbell/tink/pkg/apis/core/v1alpha1" corev1 "k8s.io/api/core/v1" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" "github.com/aws/eks-anywhere/pkg/api/v1alpha1" "github.com/aws/eks-anywhere/pkg/cluster" "github.com/aws/eks-anywhere/pkg/constants" "github.com/aws/eks-anywhere/pkg/executables" "github.com/aws/eks-anywhere/pkg/filewriter" "github.com/aws/eks-anywhere/pkg/networkutils" "github.com/aws/eks-anywhere/pkg/providers" "github.com/aws/eks-anywhere/pkg/providers/common" "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/hardware" "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/rufiounreleased" "github.com/aws/eks-anywhere/pkg/providers/tinkerbell/stack" "github.com/aws/eks-anywhere/pkg/registrymirror" "github.com/aws/eks-anywhere/pkg/retrier" "github.com/aws/eks-anywhere/pkg/types" releasev1alpha1 "github.com/aws/eks-anywhere/release/api/v1alpha1" "github.com/aws/eks-anywhere/release/pkg/util/slices" ) const ( maxRetries = 30 backOffPeriod = 5 * time.Second ) var ( eksaTinkerbellDatacenterResourceType = fmt.Sprintf("tinkerbelldatacenterconfigs.%s", v1alpha1.GroupVersion.Group) eksaTinkerbellMachineResourceType = fmt.Sprintf("tinkerbellmachineconfigs.%s", v1alpha1.GroupVersion.Group) tinkerbellStackPorts = []int{42113, 50051, 50061} // errExternalEtcdUnsupported is returned from create or update when the user attempts to create // or upgrade a cluster with an external etcd configuration. errExternalEtcdUnsupported = errors.New("external etcd configuration is unsupported") ) type Provider struct { clusterConfig *v1alpha1.Cluster datacenterConfig *v1alpha1.TinkerbellDatacenterConfig machineConfigs map[string]*v1alpha1.TinkerbellMachineConfig stackInstaller stack.StackInstaller providerKubectlClient ProviderKubectlClient templateBuilder *TemplateBuilder writer filewriter.FileWriter keyGenerator SSHAuthKeyGenerator hardwareCSVFile string catalogue *hardware.Catalogue tinkerbellIP string // TODO(chrisdoheryt4) Temporarily depend on the netclient until the validator can be injected. // This is already a dependency, just uncached, because we require it during the initializing // constructor call for constructing the validator in-line. netClient networkutils.NetClient forceCleanup bool skipIpCheck bool retrier *retrier.Retrier } type ProviderKubectlClient interface { ApplyKubeSpecFromBytesForce(ctx context.Context, cluster *types.Cluster, data []byte) error ApplyKubeSpecFromBytesWithNamespace(ctx context.Context, cluster *types.Cluster, data []byte, namespace string) error DeleteEksaDatacenterConfig(ctx context.Context, eksaTinkerbellDatacenterResourceType string, tinkerbellDatacenterConfigName string, kubeconfigFile string, namespace string) error DeleteEksaMachineConfig(ctx context.Context, eksaTinkerbellMachineResourceType string, tinkerbellMachineConfigName string, kubeconfigFile string, namespace string) error GetMachineDeployment(ctx context.Context, machineDeploymentName string, opts ...executables.KubectlOpt) (*clusterv1.MachineDeployment, error) GetEksaCluster(ctx context.Context, cluster *types.Cluster, clusterName string) (*v1alpha1.Cluster, error) GetEksaTinkerbellDatacenterConfig(ctx context.Context, tinkerbellDatacenterConfigName string, kubeconfigFile string, namespace string) (*v1alpha1.TinkerbellDatacenterConfig, error) GetEksaTinkerbellMachineConfig(ctx context.Context, tinkerbellMachineConfigName string, kubeconfigFile string, namespace string) (*v1alpha1.TinkerbellMachineConfig, error) GetKubeadmControlPlane(ctx context.Context, cluster *types.Cluster, clusterName string, opts ...executables.KubectlOpt) (*controlplanev1.KubeadmControlPlane, error) GetEtcdadmCluster(ctx context.Context, cluster *types.Cluster, clusterName string, opts ...executables.KubectlOpt) (*etcdv1.EtcdadmCluster, error) GetSecret(ctx context.Context, secretObjectName string, opts ...executables.KubectlOpt) (*corev1.Secret, error) UpdateAnnotation(ctx context.Context, resourceType, objectName string, annotations map[string]string, opts ...executables.KubectlOpt) error WaitForDeployment(ctx context.Context, cluster *types.Cluster, timeout string, condition string, target string, namespace string) error GetUnprovisionedTinkerbellHardware(_ context.Context, kubeconfig, namespace string) ([]tinkv1alpha1.Hardware, error) GetProvisionedTinkerbellHardware(_ context.Context, kubeconfig, namespace string) ([]tinkv1alpha1.Hardware, error) WaitForRufioMachines(ctx context.Context, cluster *types.Cluster, timeout string, condition string, namespace string) error SearchTinkerbellMachineConfig(ctx context.Context, name string, kubeconfigFile string, namespace string) ([]*v1alpha1.TinkerbellMachineConfig, error) SearchTinkerbellDatacenterConfig(ctx context.Context, name string, kubeconfigFile string, namespace string) ([]*v1alpha1.TinkerbellDatacenterConfig, error) AllTinkerbellHardware(ctx context.Context, kuebconfig string) ([]tinkv1alpha1.Hardware, error) AllBaseboardManagements(ctx context.Context, kubeconfig string) ([]rufiounreleased.BaseboardManagement, error) HasCRD(ctx context.Context, kubeconfig, crd string) (bool, error) DeleteCRD(ctx context.Context, kubeconfig, crd string) error } // KeyGenerator generates ssh keys and writes them to a FileWriter. type SSHAuthKeyGenerator interface { GenerateSSHAuthKey(filewriter.FileWriter) (string, error) } func NewProvider( datacenterConfig *v1alpha1.TinkerbellDatacenterConfig, machineConfigs map[string]*v1alpha1.TinkerbellMachineConfig, clusterConfig *v1alpha1.Cluster, hardwareCSVPath string, writer filewriter.FileWriter, docker stack.Docker, helm stack.Helm, providerKubectlClient ProviderKubectlClient, tinkerbellIP string, now types.NowFunc, forceCleanup bool, skipIpCheck bool, ) (*Provider, error) { var controlPlaneMachineSpec, workerNodeGroupMachineSpec, etcdMachineSpec *v1alpha1.TinkerbellMachineConfigSpec if clusterConfig.Spec.ControlPlaneConfiguration.MachineGroupRef != nil && machineConfigs[clusterConfig.Spec.ControlPlaneConfiguration.MachineGroupRef.Name] != nil { controlPlaneMachineSpec = &machineConfigs[clusterConfig.Spec.ControlPlaneConfiguration.MachineGroupRef.Name].Spec } workerNodeGroupMachineSpecs := make(map[string]v1alpha1.TinkerbellMachineConfigSpec, len(machineConfigs)) for _, wnConfig := range clusterConfig.Spec.WorkerNodeGroupConfigurations { if wnConfig.MachineGroupRef != nil && machineConfigs[wnConfig.MachineGroupRef.Name] != nil { workerNodeGroupMachineSpec = &machineConfigs[wnConfig.MachineGroupRef.Name].Spec workerNodeGroupMachineSpecs[wnConfig.MachineGroupRef.Name] = *workerNodeGroupMachineSpec } } if clusterConfig.Spec.ExternalEtcdConfiguration != nil { if clusterConfig.Spec.ExternalEtcdConfiguration.MachineGroupRef != nil && machineConfigs[clusterConfig.Spec.ExternalEtcdConfiguration.MachineGroupRef.Name] != nil { etcdMachineSpec = &machineConfigs[clusterConfig.Spec.ExternalEtcdConfiguration.MachineGroupRef.Name].Spec } } var proxyConfig *v1alpha1.ProxyConfiguration if clusterConfig.Spec.ProxyConfiguration != nil { proxyConfig = &v1alpha1.ProxyConfiguration{ HttpProxy: clusterConfig.Spec.ProxyConfiguration.HttpProxy, HttpsProxy: clusterConfig.Spec.ProxyConfiguration.HttpsProxy, NoProxy: generateNoProxyList(clusterConfig, datacenterConfig.Spec), } // We need local tinkerbell IP only in case of management // cluster's create and upgrade that too for the kind cluster. // GenerateNoProxyList is getting used by all the cluster operations. // Thus moving adding tinkerbell Local IP to here. if !slices.SliceContains(proxyConfig.NoProxy, tinkerbellIP) { proxyConfig.NoProxy = append(proxyConfig.NoProxy, tinkerbellIP) } } else { proxyConfig = nil } return &Provider{ clusterConfig: clusterConfig, datacenterConfig: datacenterConfig, machineConfigs: machineConfigs, stackInstaller: stack.NewInstaller(docker, writer, helm, constants.EksaSystemNamespace, clusterConfig.Spec.ClusterNetwork.Pods.CidrBlocks[0], registrymirror.FromCluster(clusterConfig), proxyConfig), providerKubectlClient: providerKubectlClient, templateBuilder: &TemplateBuilder{ datacenterSpec: &datacenterConfig.Spec, controlPlaneMachineSpec: controlPlaneMachineSpec, WorkerNodeGroupMachineSpecs: workerNodeGroupMachineSpecs, etcdMachineSpec: etcdMachineSpec, tinkerbellIP: tinkerbellIP, now: now, }, writer: writer, hardwareCSVFile: hardwareCSVPath, // TODO(chrisdoherty4) Inject the catalogue dependency so we can dynamically construcft the // indexing capabilities. catalogue: hardware.NewCatalogue( hardware.WithHardwareIDIndex(), hardware.WithHardwareBMCRefIndex(), hardware.WithBMCNameIndex(), hardware.WithSecretNameIndex(), ), tinkerbellIP: tinkerbellIP, netClient: &networkutils.DefaultNetClient{}, retrier: retrier.NewWithMaxRetries(maxRetries, backOffPeriod), // (chrisdoherty4) We're hard coding the dependency and monkey patching in testing because the provider // isn't very testable right now and we already have tests in the `tinkerbell` package so can monkey patch // directly. This is very much a hack for testability. keyGenerator: common.SshAuthKeyGenerator{}, // Behavioral flags. forceCleanup: forceCleanup, skipIpCheck: skipIpCheck, }, nil } func (p *Provider) Name() string { return constants.TinkerbellProviderName } func (p *Provider) DatacenterResourceType() string { return eksaTinkerbellDatacenterResourceType } func (p *Provider) MachineResourceType() string { return eksaTinkerbellMachineResourceType } func (p *Provider) UpdateSecrets(ctx context.Context, cluster *types.Cluster, _ *cluster.Spec) error { // TODO: implement return nil } func (p *Provider) UpdateKubeConfig(content *[]byte, clusterName string) error { // TODO: Figure out if something is needed here return nil } func (p *Provider) Version(clusterSpec *cluster.Spec) string { versionsBundle := clusterSpec.ControlPlaneVersionsBundle() return versionsBundle.Tinkerbell.Version } func (p *Provider) EnvMap(spec *cluster.Spec) (map[string]string, error) { return map[string]string{ // The TINKERBELL_IP is input for the CAPT deployment and used as part of default template // generation. However, we use custom templates and leverage the template override // functionality of CAPT hence this never gets used. // // Deployment manifest requiring the env var for replacement. // https://github.com/tinkerbell/cluster-api-provider-tinkerbell/blob/main/config/manager/manager.yaml#L23 // // Template override // https://github.com/tinkerbell/cluster-api-provider-tinkerbell/blob/main/controllers/machine.go#L182 // // Env read having set TINKERBELL_IP in the deployment manifest. // https://github.com/tinkerbell/cluster-api-provider-tinkerbell/blob/main/controllers/machine.go#L192 "TINKERBELL_IP": "IGNORED", "KUBEADM_BOOTSTRAP_TOKEN_TTL": "120m", }, nil } // SetStackInstaller configures p to use installer for Tinkerbell stack install and upgrade. func (p *Provider) SetStackInstaller(installer stack.StackInstaller) { p.stackInstaller = installer } func (p *Provider) GetDeployments() map[string][]string { return map[string][]string{ "capt-system": {"capt-controller-manager"}, } } func (p *Provider) GetInfrastructureBundle(clusterSpec *cluster.Spec) *types.InfrastructureBundle { versionsBundle := clusterSpec.ControlPlaneVersionsBundle() folderName := fmt.Sprintf("infrastructure-tinkerbell/%s/", versionsBundle.Tinkerbell.Version) infraBundle := types.InfrastructureBundle{ FolderName: folderName, Manifests: []releasev1alpha1.Manifest{ versionsBundle.Tinkerbell.Components, versionsBundle.Tinkerbell.Metadata, versionsBundle.Tinkerbell.ClusterTemplate, }, } return &infraBundle } func (p *Provider) DatacenterConfig(_ *cluster.Spec) providers.DatacenterConfig { return p.datacenterConfig } func (p *Provider) MachineConfigs(_ *cluster.Spec) []providers.MachineConfig { configs := make(map[string]providers.MachineConfig, len(p.machineConfigs)) controlPlaneMachineName := p.clusterConfig.Spec.ControlPlaneConfiguration.MachineGroupRef.Name p.machineConfigs[controlPlaneMachineName].Annotations = map[string]string{p.clusterConfig.ControlPlaneAnnotation(): "true"} if p.clusterConfig.IsManaged() { p.machineConfigs[controlPlaneMachineName].SetManagedBy(p.clusterConfig.ManagedBy()) } configs[controlPlaneMachineName] = p.machineConfigs[controlPlaneMachineName] if p.clusterConfig.Spec.ExternalEtcdConfiguration != nil { etcdMachineName := p.clusterConfig.Spec.ExternalEtcdConfiguration.MachineGroupRef.Name p.machineConfigs[etcdMachineName].Annotations = map[string]string{p.clusterConfig.EtcdAnnotation(): "true"} if etcdMachineName != controlPlaneMachineName { configs[etcdMachineName] = p.machineConfigs[etcdMachineName] if p.clusterConfig.IsManaged() { p.machineConfigs[etcdMachineName].SetManagedBy(p.clusterConfig.ManagedBy()) } } } for _, workerNodeGroupConfiguration := range p.clusterConfig.Spec.WorkerNodeGroupConfigurations { workerMachineName := workerNodeGroupConfiguration.MachineGroupRef.Name if _, ok := configs[workerMachineName]; !ok { configs[workerMachineName] = p.machineConfigs[workerMachineName] if p.clusterConfig.IsManaged() { p.machineConfigs[workerMachineName].SetManagedBy(p.clusterConfig.ManagedBy()) } } } return providers.ConfigsMapToSlice(configs) } func (p *Provider) ChangeDiff(currentSpec, newSpec *cluster.Spec) *types.ComponentChangeDiff { currentVersionsBundle := currentSpec.ControlPlaneVersionsBundle() newVersionsBundle := newSpec.ControlPlaneVersionsBundle() if currentVersionsBundle.Tinkerbell.Version == newVersionsBundle.Tinkerbell.Version { return nil } return &types.ComponentChangeDiff{ ComponentName: constants.TinkerbellProviderName, NewVersion: newVersionsBundle.Tinkerbell.Version, OldVersion: currentVersionsBundle.Tinkerbell.Version, } } func (p *Provider) InstallCustomProviderComponents(ctx context.Context, kubeconfigFile string) error { return nil }