Fixing the issue where a rapid scale up and scale down can result in a cordoned machine in the cluster.#1087
Fixing the issue where a rapid scale up and scale down can result in a cordoned machine in the cluster.#1087r4mek wants to merge 6 commits intogardener:masterfrom
Conversation
|
[APPROVALNOTIFIER] This PR is NOT APPROVED This pull-request has been approved by: The full list of commands accepted by this bot can be found here. DetailsNeeds approval from an approver in each of these files:Approvers can indicate their approval by writing |
|
/kind bug |
| TriggerDeletionByMCM = "node.machine.sapcloud.io/trigger-deletion-by-mcm" | ||
|
|
||
| // LastReplicaChangeAnnotation contains the timestamp of last replica change in the machine deployment. | ||
| // This annotation is used so that MCS only deletes the machines for which it has observed change in replicas.:w |
There was a problem hiding this comment.
| // This annotation is used so that MCS only deletes the machines for which it has observed change in replicas.:w | |
| // This annotation is used so that MCS only deletes the machines for which it has observed change in replicas. |
| } | ||
| }) | ||
|
|
||
| It("It should delete the marked machines with no change in machineSet replicas", func() { |
There was a problem hiding this comment.
You don't need to use "It" in the description.
| It("It should delete the marked machines with no change in machineSet replicas", func() { | |
| It("should delete the marked machines with no change in machineSet replicas", func() { |
| Expect(k8sError.IsNotFound(Err2)).Should(BeTrue()) | ||
| }) | ||
|
|
||
| It("It should delete the marked machines after reducing change in machineSet replicas", func() { |
There was a problem hiding this comment.
Same here. No need to say "It".
| Expect(machine2.Annotations[machineutils.MachinePriority]).To(Equal("1")) | ||
| Expect(machine2.Annotations[machineutils.LastReplicaChangeAnnotation]).To(Equal(ts)) | ||
| }, | ||
| Entry("mark the machines named in TriggerDeletionByMCM annotation for deletion by setting their MachinePriority annotation to 1 and also set the LastReplicaChangeAnnotation on them same as that is passed with the annotation", |
There was a problem hiding this comment.
| Entry("mark the machines named in TriggerDeletionByMCM annotation for deletion by setting their MachinePriority annotation to 1 and also set the LastReplicaChangeAnnotation on them same as that is passed with the annotation", | |
| Entry("mark the machines named in TriggerDeletionByMCM annotation for deletion by setting their MachinePriority annotation to 1 and also set their LastReplicaChangeAnnotation to the same value passed with the annotation", |
| _, err := dc.controlMachineClient.MachineDeployments(mcd.Namespace).Update(ctx, mcdAdjust, metav1.UpdateOptions{}) | ||
| if err != nil { | ||
| klog.Errorf("Failed to update MachineDeployment %q with #%d machine names still pending deletion, triggerDeletionAnnotValue=%q", mcd.Name, len(triggerForDeletionMachineNames), triggerDeletionAnnotValue) | ||
| klog.Errorf("Failed to update MachineDeployment %q with #%d machine names still pending deletion, triggerDeletionAnnotValue=%q", mcd.Name, len(triggerForDeletionMachineNamesWithTS), triggerDeletionAnnotValue) |
There was a problem hiding this comment.
| klog.Errorf("Failed to update MachineDeployment %q with #%d machine names still pending deletion, triggerDeletionAnnotValue=%q", mcd.Name, len(triggerForDeletionMachineNamesWithTS), triggerDeletionAnnotValue) | |
| klog.Errorf("failed to update MachineDeployment %q with #%d machine names still pending deletion, triggerDeletionAnnotValue=%q", mcd.Name, len(triggerForDeletionMachineNamesWithTS), triggerDeletionAnnotValue) |
| scaleDownMachines := []*v1alpha1.Machine{} | ||
| if machineSet.Annotations[machineutils.LastReplicaChangeAnnotation] != "" { | ||
| machineSetLRCA, err := time.Parse(time.RFC3339, machineSet.Annotations[machineutils.LastReplicaChangeAnnotation]) | ||
| if err == nil { | ||
| for _, machine := range allMachines { | ||
| if machine.Annotations[machineutils.LastReplicaChangeAnnotation] == "" || machine.Annotations[machineutils.MachinePriority] != "1" { | ||
| continue | ||
| } | ||
| machineLRCA, err := time.Parse(time.RFC3339, machine.Annotations[machineutils.LastReplicaChangeAnnotation]) | ||
| if err != nil { | ||
| continue | ||
| } | ||
| if machineLRCA.Before(machineSetLRCA) || machineLRCA.Equal(machineSetLRCA) { | ||
| scaleDownMachines = append(scaleDownMachines, machine) | ||
| } | ||
| } |
There was a problem hiding this comment.
Could you please add a comment here explaining the change?
|
|
||
| scaleDownMachines := []*v1alpha1.Machine{} | ||
| if machineSet.Annotations[machineutils.LastReplicaChangeAnnotation] != "" { | ||
| machineSetLRCA, err := time.Parse(time.RFC3339, machineSet.Annotations[machineutils.LastReplicaChangeAnnotation]) |
There was a problem hiding this comment.
The err is not handled. Maybe log the err as a warning here?
| } | ||
| } | ||
| if len(scaleDownMachines) >= 1 { | ||
| klog.V(3).Infof("Deleting stale machines %s", getMachineKeys(scaleDownMachines)) |
There was a problem hiding this comment.
Please change these logs to not call them stale machines.
Similarly for the below error for the terminate call as well.
| for _, machineName := range triggerForDeletionMachineNames { | ||
| klog.V(3).Infof("MachineDeployment %q has #%d machine(s) marked for deletion, triggerForDeletionMachineNames=%v", mcd.Name, len(triggerForDeletionMachineNamesWithTS), triggerForDeletionMachineNamesWithTS) | ||
| for _, machineNameWithTS := range triggerForDeletionMachineNamesWithTS { | ||
| parts := strings.Split(machineNameWithTS, "~") |
There was a problem hiding this comment.
Add a warning is len(parts) != 2 and continue to the next if this is the case
| klog.V(3).Infof("MachineDeployment %q has #%d machine(s) marked for deletion, triggerForDeletionMachineNames=%v", mcd.Name, len(triggerForDeletionMachineNamesWithTS), triggerForDeletionMachineNamesWithTS) | ||
| for _, machineNameWithTS := range triggerForDeletionMachineNamesWithTS { | ||
| parts := strings.Split(machineNameWithTS, "~") | ||
| machineName, machineDeletionTS := parts[0], parts[1] |
There was a problem hiding this comment.
Please parse machineDeletionTS to ensure it is a valid timestamp. if it is not, log it and continue to the next
Also, if annotation is invalid, please add machine to skipTriggerForDeletionMachineNames
| mcAdjust.Annotations = make(map[string]string) | ||
| } | ||
| mcAdjust.Annotations[machineutils.MachinePriority] = "1" | ||
| mcAdjust.Annotations[machineutils.LastReplicaChangeAnnotation] = machineDeletionTS |
There was a problem hiding this comment.
Add a check to not update LastReplicaChangeAnnotation if already set
Also add a log line is TS is already set and continue
| @@ -643,22 +644,24 @@ func (dc *controller) updateMachineDeploymentFinalizers(ctx context.Context, mac | |||
| } | |||
|
|
|||
| func (dc *controller) setMachinePriorityAnnotationAndUpdateTriggeredForDeletion(ctx context.Context, mcd *v1alpha1.MachineDeployment) error { | |||
There was a problem hiding this comment.
Add another function that is called by this one func ComputeMachineTriggerDeletionData(mcd) triggerDeletionData that takes care of computing the machineNames/timestamps and other data necessary for application from the trigger deletionAnnotation.
The struct can be as follows
type triggerDeletionData struct{
machineNamesWithTS []string
skipMachineNamesWithTS []string
machineMarkedDeletionTimes map[string]time.Time (consider map[string]string based on implementation)
}
Also, add another method on this structure to give you annotations that you should add to the machineDeployment. This method can be called latestTriggerDeletionAnnotationValue()
| @@ -643,22 +644,24 @@ func (dc *controller) updateMachineDeploymentFinalizers(ctx context.Context, mac | |||
| } | |||
|
|
|||
| func (dc *controller) setMachinePriorityAnnotationAndUpdateTriggeredForDeletion(ctx context.Context, mcd *v1alpha1.MachineDeployment) error { | |||
There was a problem hiding this comment.
Please rename this function to be more accurate to what it is doing
| func (dc *controller) setMachinePriorityAnnotationAndUpdateTriggeredForDeletion(ctx context.Context, mcd *v1alpha1.MachineDeployment) error { | |
| func (dc *controller) updateMachineAndMachineDeploymentDeletionAnnotations(ctx context.Context, mcd *v1alpha1.MachineDeployment) error { |
| var err error | ||
| if sizeNeedsUpdate || annotationsNeedUpdate { | ||
| isCopy.Spec.Replicas = newScale | ||
| isCopy.Annotations[machineutils.LastReplicaChangeAnnotation] = deployment.Annotations[machineutils.LastReplicaChangeAnnotation] |
There was a problem hiding this comment.
Please change the annotation name to machine.sapcloud.io/last-deployment-replica-change-time-by-scaler
This is to be used on machineDeployments and machineSets only
| mcAdjust.Annotations = make(map[string]string) | ||
| } | ||
| mcAdjust.Annotations[machineutils.MachinePriority] = "1" | ||
| mcAdjust.Annotations[machineutils.LastReplicaChangeAnnotation] = machineDeletionTS |
There was a problem hiding this comment.
Please add a new annotation to be used on machines. It can be called machine.sapcloud.io/mark-deletion-time
| scaleDownMachines := []*v1alpha1.Machine{} | ||
| if machineSet.Annotations[machineutils.LastReplicaChangeAnnotation] != "" { | ||
| machineSetLRCA, err := time.Parse(time.RFC3339, machineSet.Annotations[machineutils.LastReplicaChangeAnnotation]) | ||
| if err == nil { | ||
| for _, machine := range allMachines { | ||
| if machine.Annotations[machineutils.LastReplicaChangeAnnotation] == "" || machine.Annotations[machineutils.MachinePriority] != "1" { | ||
| continue | ||
| } | ||
| machineLRCA, err := time.Parse(time.RFC3339, machine.Annotations[machineutils.LastReplicaChangeAnnotation]) | ||
| if err != nil { | ||
| continue | ||
| } | ||
| if machineLRCA.Before(machineSetLRCA) || machineLRCA.Equal(machineSetLRCA) { | ||
| scaleDownMachines = append(scaleDownMachines, machine) | ||
| } | ||
| } | ||
| if len(scaleDownMachines) >= 1 { | ||
| klog.V(3).Infof("Deleting stale machines %s", getMachineKeys(scaleDownMachines)) | ||
| if err := c.terminateMachines(ctx, scaleDownMachines, machineSet); err != nil { | ||
| klog.Errorf("failed to terminate stale machines for machineset %s: %v", machineSet.Name, err) | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
There was a problem hiding this comment.
Move this to a function. Accept allMachines and machineSet.Annotations[machineutils.LastReplicaChangeAnnotation], and it should return a list of machineNames to delete
| // manageReplicas checks and updates replicas for the given MachineSet. | ||
| // Does NOT modify <filteredMachines>. | ||
| // It will requeue the machine set in case of an error while creating/deleting machines. | ||
| func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1.Machine, machineSet *v1alpha1.MachineSet) error { |
There was a problem hiding this comment.
As part of manageReplicas, please make the following 2 changes
- Update the sorting function in
getMachinesToDelete()to now consider the timestamp set on themark-for-deletionannotation - Do not delete machines inside the
machinesWithoutUpdateSuccessfulLabelDiff > 0block, but rather store all the machined intended to be deleted in a list and delete them later in at the end of the method after also computing stale machines
What this PR does / why we need it:
This PR contains a fix for the issue where a rapid scale up and scale down can result in a cordoned machine in the cluster.
The case happens when CA scales down a MCD followed quickly with scaling up the same MCD before the MCD controller notices the replica change and start reconciliation. The result is the MCD ending up with the same number of replicas that it started off with, causing the MCD controller not to have any work to do since from it's point of view, the MCD replicas have not changed.
As part of the scale down operation, CA will cordon an underutilised node. This node just remains part of the cluster as MCM makes no further attempt to remove it
Which issue(s) this PR fixes:
Fixes #1085, #1084
Special notes for your reviewer:
There is also a PR raised on
gardener/autoscalerwhich has the changes from the autoscaler and is also required to fix the issue. PR(gardener/autoscaler#401)Release note: