From 849aa74da904a28b6778b06736e7eee0b9216475 Mon Sep 17 00:00:00 2001 From: Rajashree Mandaogane Date: Fri, 6 Aug 2021 17:16:39 -0700 Subject: [PATCH 03/34] Unstacked etcd and controlplane upgrade Rename controlplane upgrade annotation variable Add controlplane upgrade complete annotation to sync etcd to v1beta1 Check if controlplane is paused before patching it with paused annotation Patch etcd cluster with upgrade complete annotation only after upgrade After KCP upgrade is completed, the controller checks the condition "MachinesSpecUpToDate" exists and marks it to true. The controller also updates the etcd cluster with an annotation to indicate the controlplane upgrade is complete. But it should annotate etcd cluster only if the MachineSpecUpToDate condition is False, since that will happen only immediately after an upgrade. Without checking for false it will keep annotating the etcd cluster on further reconcile calls. cr: https://code.amazon.com/reviews/CR-55949234 Synchronize upgrade flow Etcd machines need to be rolled out first so that KCP can get the correct set of etcd endpoints. KCP rollout should be stalled till etcd upgrade is done. Cluster controller may not be able to add the paused annotation on time once updated spec is applied, since etcd and KCP controllers could have started reconciling at the same time. So KCP checks if etcd cluster has the upgrading annotation, and does not proceed further until the annotation is removed. Once KCP has rolled out updated machines, it will add an annotation on the etcd cluster to indicate that the KCP upgrade is complete. In absence of static etcd endpoints, currently etcd controller retains older etcd members for KCP upgrade, and once KCP upgrade complete annotation is set, the etcd controller will remove these older etcd members cr: https://code.amazon.com/reviews/CR-54933898 --- api/v1alpha3/common_types.go | 3 ++ api/v1beta1/common_types.go | 3 ++ controllers/external/util.go | 29 +++++++++++++ .../kubeadm/api/v1alpha3/condition_consts.go | 8 ++++ .../kubeadm/api/v1beta1/condition_consts.go | 8 ++++ .../internal/controllers/controller.go | 42 +++++++++++++++++++ .../kubeadm/internal/controllers/upgrade.go | 6 +++ .../kubeadm/internal/workload_cluster.go | 1 + .../kubeadm/internal/workload_cluster_etcd.go | 9 +++- .../cluster/cluster_controller_phases.go | 10 +++-- 10 files changed, 114 insertions(+), 5 deletions(-) diff --git a/api/v1alpha3/common_types.go b/api/v1alpha3/common_types.go index 58ef4a74e..1c7d411bd 100644 --- a/api/v1alpha3/common_types.go +++ b/api/v1alpha3/common_types.go @@ -70,6 +70,9 @@ const ( // ClusterSecretType defines the type of secret created by core components. ClusterSecretType corev1.SecretType = "cluster.x-k8s.io/secret" //nolint:gosec + + // ControlPlaneUpgradeCompletedAnnotation is set by the controlplane on the external etcd object after controlplane upgrade is completed + ControlPlaneUpgradeCompletedAnnotation = "controlplane.cluster.x-k8s.io/upgrade-complete" ) // MachineAddressType describes a valid MachineAddress type. diff --git a/api/v1beta1/common_types.go b/api/v1beta1/common_types.go index 947ce108c..a557464fc 100644 --- a/api/v1beta1/common_types.go +++ b/api/v1beta1/common_types.go @@ -142,6 +142,9 @@ const ( // will receive the resulting object. TopologyDryRunAnnotation = "topology.cluster.x-k8s.io/dry-run" + // ControlPlaneUpgradeCompletedAnnotation is set by the controlplane on the external etcd object after controlplane upgrade is completed. + ControlPlaneUpgradeCompletedAnnotation = "controlplane.cluster.x-k8s.io/upgrade-complete" + // ReplicasManagedByAnnotation is an annotation that indicates external (non-Cluster API) management of infra scaling. // The practical effect of this is that the capi "replica" count should be passively derived from the number of observed infra machines, // instead of being a source of truth for eventual consistency. diff --git a/controllers/external/util.go b/controllers/external/util.go index 7e0fd392a..5b6443c78 100644 --- a/controllers/external/util.go +++ b/controllers/external/util.go @@ -254,3 +254,32 @@ func GetExternalEtcdEndpoints(externalEtcd *unstructured.Unstructured) (string, return endpoints, found, nil } + +func IsExternalEtcdUpgrading(externalEtcd *unstructured.Unstructured) (bool, error) { + annotations, hasAnnotations, err := unstructured.NestedStringMap(externalEtcd.Object, "metadata", "annotations") + if err != nil { + return false, errors.Wrapf(err, "failed to check if external etcd is undergoing upgrade %v %q", externalEtcd.GroupVersionKind(), + externalEtcd.GetName()) + } + + if !hasAnnotations { + return false, nil + } + + _, hasUpgradingAnnotation := annotations["etcdcluster.cluster.x-k8s.io/upgrading"] + return hasUpgradingAnnotation, nil +} + +func SetKCPUpdateCompleteAnnotationOnEtcdadmCluster(externalEtcd *unstructured.Unstructured) error { + annotations, hasAnnotations, err := unstructured.NestedStringMap(externalEtcd.Object, "metadata", "annotations") + if err != nil { + return errors.Wrapf(err, "failed to update external etcd annotation after controlplane upgrade completed %v %q", externalEtcd.GroupVersionKind(), + externalEtcd.GetName()) + } + + if !hasAnnotations { + annotations = make(map[string]string) + } + annotations[clusterv1.ControlPlaneUpgradeCompletedAnnotation] = "true" + return unstructured.SetNestedStringMap(externalEtcd.UnstructuredContent(), annotations, "metadata", "annotations") +} diff --git a/controlplane/kubeadm/api/v1alpha3/condition_consts.go b/controlplane/kubeadm/api/v1alpha3/condition_consts.go index 6d3d96850..84b50124b 100644 --- a/controlplane/kubeadm/api/v1alpha3/condition_consts.go +++ b/controlplane/kubeadm/api/v1alpha3/condition_consts.go @@ -54,6 +54,14 @@ const ( // RollingUpdateInProgressReason (Severity=Warning) documents a KubeadmControlPlane object executing a // rolling upgrade for aligning the machines spec to the desired state. RollingUpdateInProgressReason = "RollingUpdateInProgress" + + // ExternalEtcdEndpointsAvailable documents that the external etcd cluster's endpoints are available, and if KCP spec has changed + // then a KCP rollout can progress. + ExternalEtcdEndpointsAvailable clusterv1alpha3.ConditionType = "ExternalEtcdEndpointsAvailable" + + // ExternalEtcdUndergoingUpgrade (Severity=Info) documents the external etcd cluster being used by current KCP object is + // undergoing an upgrade and that the etcd endpoints will change once the upgrade completes + ExternalEtcdUndergoingUpgrade = "ExternalEtcdUndergoingUpgrade" ) const ( diff --git a/controlplane/kubeadm/api/v1beta1/condition_consts.go b/controlplane/kubeadm/api/v1beta1/condition_consts.go index e9870d34c..adc1b2a0a 100644 --- a/controlplane/kubeadm/api/v1beta1/condition_consts.go +++ b/controlplane/kubeadm/api/v1beta1/condition_consts.go @@ -54,6 +54,14 @@ const ( // RollingUpdateInProgressReason (Severity=Warning) documents a KubeadmControlPlane object executing a // rolling upgrade for aligning the machines spec to the desired state. RollingUpdateInProgressReason = "RollingUpdateInProgress" + + // ExternalEtcdEndpointsAvailable documents that the external etcd cluster's endpoints are available, and if KCP spec has changed + // then a KCP rollout can progress. + ExternalEtcdEndpointsAvailable clusterv1.ConditionType = "ExternalEtcdEndpointsAvailable" + + // ExternalEtcdUndergoingUpgrade (Severity=Info) documents the external etcd cluster being used by current KCP object is + // undergoing an upgrade and that the etcd endpoints will change once the upgrade completes + ExternalEtcdUndergoingUpgrade = "ExternalEtcdUndergoingUpgrade" ) const ( diff --git a/controlplane/kubeadm/internal/controllers/controller.go b/controlplane/kubeadm/internal/controllers/controller.go index 277cd2e09..0ebace482 100644 --- a/controlplane/kubeadm/internal/controllers/controller.go +++ b/controlplane/kubeadm/internal/controllers/controller.go @@ -196,12 +196,35 @@ func (r *KubeadmControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl. sort.Strings(currentEtcdEndpoints) currentKCPEndpoints := kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.External.Endpoints if !reflect.DeepEqual(currentEtcdEndpoints, currentKCPEndpoints) { + /* During upgrade, KCP spec's endpoints will again be an empty list, and will get populated by the cluster controller once the + external etcd controller has set them. If the KCP controller proceeds without checking whether the etcd cluster is undergoing upgrade, + there is a chance it will get the current un-updated endpoints from the etcd cluster object, and those would end up being endpoints of the + etcd members that will get deleted during upgrade. Hence the controller checks and stalls if the etcd cluster is undergoing upgrade and proceeds + only after the etcd upgrade is completed as that guarantees that the KCP has latest set of endpoints. + */ + etcdUpgradeInProgress, err := external.IsExternalEtcdUpgrading(externalEtcd) + if err != nil { + return ctrl.Result{}, err + } + if etcdUpgradeInProgress { + log.Info("Etcd undergoing upgrade, marking etcd endpoints available condition as false, since new endpoints will be available only after etcd upgrade") + if conditions.IsTrue(kcp, controlplanev1.ExternalEtcdEndpointsAvailable) || conditions.IsUnknown(kcp, controlplanev1.ExternalEtcdEndpointsAvailable) { + conditions.MarkFalse(kcp, controlplanev1.ExternalEtcdEndpointsAvailable, controlplanev1.ExternalEtcdUndergoingUpgrade, clusterv1.ConditionSeverityInfo, "") + if err := patchKubeadmControlPlane(ctx, patchHelper, kcp); err != nil { + return ctrl.Result{}, err + } + } + return ctrl.Result{RequeueAfter: 1 * time.Minute}, nil + } kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.External.Endpoints = currentEtcdEndpoints if err := patchHelper.Patch(ctx, kcp); err != nil { log.Error(err, "Failed to patch KubeadmControlPlane to update external etcd endpoints") return ctrl.Result{}, err } } + if conditions.IsFalse(kcp, controlplanev1.ExternalEtcdEndpointsAvailable) { + conditions.MarkTrue(kcp, controlplanev1.ExternalEtcdEndpointsAvailable) + } } // Add finalizer first if not exist to avoid the race condition between init and delete @@ -414,6 +437,25 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * // NOTE: we are checking the condition already exists in order to avoid to set this condition at the first // reconciliation/before a rolling upgrade actually starts. if conditions.Has(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition) { + if conditions.IsFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition) { + /* Once KCP upgrade has completed, the controller will annotate the external etcd object to indicate that the older KCP machines + are no longer part of the cluster, and so any older out-of-date etcd members and machines can be deleted + */ + if cluster.Spec.ManagedExternalEtcdRef != nil { + etcdRef := cluster.Spec.ManagedExternalEtcdRef + externalEtcd, err := external.Get(ctx, r.Client, etcdRef, cluster.Namespace) + if err != nil { + return ctrl.Result{}, err + } + log.Info("Adding upgrade complete annotation on etcdadmCluster") + if err := external.SetKCPUpdateCompleteAnnotationOnEtcdadmCluster(externalEtcd); err != nil { + return ctrl.Result{}, err + } + if err := r.Client.Update(ctx, externalEtcd); err != nil { + return ctrl.Result{}, err + } + } + } conditions.MarkTrue(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition) } } diff --git a/controlplane/kubeadm/internal/controllers/upgrade.go b/controlplane/kubeadm/internal/controllers/upgrade.go index a3c0a6f8e..cde7f6ba0 100644 --- a/controlplane/kubeadm/internal/controllers/upgrade.go +++ b/controlplane/kubeadm/internal/controllers/upgrade.go @@ -102,6 +102,12 @@ func (r *KubeadmControlPlaneReconciler) upgradeControlPlane( } } + if kcp.Spec.KubeadmConfigSpec.ClusterConfiguration != nil && kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.External != nil { + if err := workloadCluster.UpdateExternalEtcdEndpointsInKubeadmConfigMap(ctx, kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.Etcd.External.Endpoints, parsedVersion); err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to update the external etcd endpoints in the kubeadm config map") + } + } + if kcp.Spec.KubeadmConfigSpec.ClusterConfiguration != nil { if err := workloadCluster.UpdateAPIServerInKubeadmConfigMap(ctx, kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.APIServer, parsedVersion); err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to update api server in the kubeadm config map") diff --git a/controlplane/kubeadm/internal/workload_cluster.go b/controlplane/kubeadm/internal/workload_cluster.go index 4112106f5..329ae81b1 100644 --- a/controlplane/kubeadm/internal/workload_cluster.go +++ b/controlplane/kubeadm/internal/workload_cluster.go @@ -109,6 +109,7 @@ type WorkloadCluster interface { UpdateImageRepositoryInKubeadmConfigMap(ctx context.Context, imageRepository string, version semver.Version) error UpdateEtcdVersionInKubeadmConfigMap(ctx context.Context, imageRepository, imageTag string, version semver.Version) error UpdateEtcdExtraArgsInKubeadmConfigMap(ctx context.Context, extraArgs map[string]string, version semver.Version) error + UpdateExternalEtcdEndpointsInKubeadmConfigMap(ctx context.Context, endpoints []string, version semver.Version) error UpdateAPIServerInKubeadmConfigMap(ctx context.Context, apiServer bootstrapv1.APIServer, version semver.Version) error UpdateControllerManagerInKubeadmConfigMap(ctx context.Context, controllerManager bootstrapv1.ControlPlaneComponent, version semver.Version) error UpdateSchedulerInKubeadmConfigMap(ctx context.Context, scheduler bootstrapv1.ControlPlaneComponent, version semver.Version) error diff --git a/controlplane/kubeadm/internal/workload_cluster_etcd.go b/controlplane/kubeadm/internal/workload_cluster_etcd.go index 3b3662a29..6c814656e 100644 --- a/controlplane/kubeadm/internal/workload_cluster_etcd.go +++ b/controlplane/kubeadm/internal/workload_cluster_etcd.go @@ -18,7 +18,6 @@ package internal import ( "context" - "github.com/blang/semver" "github.com/pkg/errors" kerrors "k8s.io/apimachinery/pkg/util/errors" @@ -111,6 +110,14 @@ func (w *Workload) UpdateEtcdExtraArgsInKubeadmConfigMap(ctx context.Context, ex }, version) } +func (w *Workload) UpdateExternalEtcdEndpointsInKubeadmConfigMap(ctx context.Context, endpoints []string, version semver.Version) error { + return w.updateClusterConfiguration(ctx, func(c *bootstrapv1.ClusterConfiguration) { + if c.Etcd.External != nil { + c.Etcd.External.Endpoints = endpoints + } + }, version) +} + // RemoveEtcdMemberForMachine removes the etcd member from the target cluster's etcd cluster. // Removing the last remaining member of the cluster is not supported. func (w *Workload) RemoveEtcdMemberForMachine(ctx context.Context, machine *clusterv1.Machine) error { diff --git a/internal/controllers/cluster/cluster_controller_phases.go b/internal/controllers/cluster/cluster_controller_phases.go index 9572a8ebd..7f2a32379 100644 --- a/internal/controllers/cluster/cluster_controller_phases.go +++ b/internal/controllers/cluster/cluster_controller_phases.go @@ -355,10 +355,12 @@ func (r *Reconciler) reconcileEtcdCluster(ctx context.Context, cluster *clusterv } return ctrl.Result{}, err } - unstructured.RemoveNestedField(controlPlane.Object, "metadata", "annotations", clusterv1.PausedAnnotation) - if err := r.Client.Update(ctx, controlPlane, &client.UpdateOptions{}); err != nil { - log.Error(err, "error resuming control plane") - return ctrl.Result{Requeue: true}, err + if annotations.HasPaused(controlPlane) { + unstructured.RemoveNestedField(controlPlane.Object, "metadata", "annotations", clusterv1.PausedAnnotation) + if err := r.Client.Update(ctx, controlPlane, &client.UpdateOptions{}); err != nil { + log.Error(err, "error resuming control plane") + return ctrl.Result{Requeue: true}, err + } } } -- 2.40.0