Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cmd/vsphere/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,13 +165,15 @@ func main() {
// Create a taskIDCache for create task IDs in case they are lost due to
// network error or stale cache.
taskIDCache := make(map[string]string)
failedProvStatusUpdate := make(map[string]*machinev1.VSphereMachineProviderStatus)

// Initialize machine actuator.
machineActuator := machine.NewActuator(machine.ActuatorParams{
Client: mgr.GetClient(),
APIReader: mgr.GetAPIReader(),
EventRecorder: mgr.GetEventRecorderFor("vspherecontroller"),
TaskIDCache: taskIDCache,
FailedProvStatusUpdate: failedProvStatusUpdate,
FeatureGates: defaultMutableGate,
OpenshiftConfigNamespace: vsphere.OpenshiftConfigNamespace,
})
Expand Down
23 changes: 21 additions & 2 deletions pkg/controller/vsphere/actuator.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type Actuator struct {
apiReader runtimeclient.Reader
eventRecorder record.EventRecorder
TaskIDCache map[string]string
FailedProvStatusUpdate map[string]*machinev1.VSphereMachineProviderStatus
FeatureGates featuregate.MutableFeatureGate
openshiftConfigNamespace string
}
Expand All @@ -43,6 +44,7 @@ type ActuatorParams struct {
APIReader runtimeclient.Reader
EventRecorder record.EventRecorder
TaskIDCache map[string]string
FailedProvStatusUpdate map[string]*machinev1.VSphereMachineProviderStatus
FeatureGates featuregate.MutableFeatureGate
OpenshiftConfigNamespace string
}
Expand All @@ -54,6 +56,7 @@ func NewActuator(params ActuatorParams) *Actuator {
apiReader: params.APIReader,
eventRecorder: params.EventRecorder,
TaskIDCache: params.TaskIDCache,
FailedProvStatusUpdate: params.FailedProvStatusUpdate,
FeatureGates: params.FeatureGates,
openshiftConfigNamespace: params.OpenshiftConfigNamespace,
}
Expand Down Expand Up @@ -90,8 +93,20 @@ func (a *Actuator) Create(ctx context.Context, machine *machinev1.Machine) error
// This is a workaround for a cache race condition.
if val, ok := a.TaskIDCache[machine.Name]; ok {
if val != scope.providerStatus.TaskRef {
klog.Errorf("%s: machine object missing expected provider task ID, requeue", machine.GetName())
return &machinecontroller.RequeueAfterError{RequeueAfter: requeueAfterSeconds * time.Second}
if a.FailedProvStatusUpdate[machine.Name] != nil {
// Attempt to update previous status
klog.Infof("Attempting to re-patch machine %s", machine.Name)
scope.providerStatus = a.FailedProvStatusUpdate[machine.Name]
if err := scope.PatchMachine(); err != nil {
// Still not having any luck. Return the error and retry later.
return err
}
// Update worked. Clear out the failed patch info.
delete(a.FailedProvStatusUpdate, machine.Name)
} else {
klog.Errorf("%s: machine object missing expected provider task ID, requeue", machine.GetName())
return &machinecontroller.RequeueAfterError{RequeueAfter: requeueAfterSeconds * time.Second}
}
}
}

Expand All @@ -109,6 +124,8 @@ func (a *Actuator) Create(ctx context.Context, machine *machinev1.Machine) error
}

if err := scope.PatchMachine(); err != nil {
// An error occurred while saving status fields. Save off and try again later.
a.FailedProvStatusUpdate[scope.machine.Name] = scope.providerStatus
return err
}

Expand All @@ -135,6 +152,7 @@ func (a *Actuator) Update(ctx context.Context, machine *machinev1.Machine) error
klog.Infof("%s: actuator updating machine", machine.GetName())
// Cleanup TaskIDCache so we don't continually grow
delete(a.TaskIDCache, machine.Name)
delete(a.FailedProvStatusUpdate, machine.Name)

scope, err := newMachineScope(machineScopeParams{
Context: ctx,
Expand Down Expand Up @@ -177,6 +195,7 @@ func (a *Actuator) Delete(ctx context.Context, machine *machinev1.Machine) error
// Cleanup TaskIDCache so we don't continually grow
// Cleanup here as well in case Update() was never successfully called.
delete(a.TaskIDCache, machine.Name)
delete(a.FailedProvStatusUpdate, machine.Name)

scope, err := newMachineScope(machineScopeParams{
Context: ctx,
Expand Down
Loading