leader election: use better duration defaults
OpenShift has good defaults for these duration fields that we can
use instead of coming up with them ourselves:
e14e06ba8d/pkg/config/leaderelection/leaderelection.go (L87-L109)
Copied here for easy future reference:
// We want to be able to tolerate 60s of kube-apiserver disruption without causing pod restarts.
// We want the graceful lease re-acquisition fairly quick to avoid waits on new deployments and other rollouts.
// We want a single set of guidance for nearly every lease in openshift. If you're special, we'll let you know.
// 1. clock skew tolerance is leaseDuration-renewDeadline == 30s
// 2. kube-apiserver downtime tolerance is == 78s
// lastRetry=floor(renewDeadline/retryPeriod)*retryPeriod == 104
// downtimeTolerance = lastRetry-retryPeriod == 78s
// 3. worst non-graceful lease acquisition is leaseDuration+retryPeriod == 163s
// 4. worst graceful lease acquisition is retryPeriod == 26s
if ret.LeaseDuration.Duration == 0 {
ret.LeaseDuration.Duration = 137 * time.Second
}
if ret.RenewDeadline.Duration == 0 {
// this gives 107/26=4 retries and allows for 137-107=30 seconds of clock skew
// if the kube-apiserver is unavailable for 60s starting just before t=26 (the first renew),
// then we will retry on 26s intervals until t=104 (kube-apiserver came back up at 86), and there will
// be 33 seconds of extra time before the lease is lost.
ret.RenewDeadline.Duration = 107 * time.Second
}
if ret.RetryPeriod.Duration == 0 {
ret.RetryPeriod.Duration = 26 * time.Second
}
Signed-off-by: Monis Khan <mok@vmware.com>
This commit is contained in:
parent
c0617ceda4
commit
c71ffdcd1e
@ -136,9 +136,13 @@ func newLeaderElectionConfig(namespace, leaseName, identity string, internalClie
|
||||
identity: identity,
|
||||
},
|
||||
ReleaseOnCancel: true, // semantics for correct release handled by releaseLock.Update and controllersWithLeaderElector below
|
||||
LeaseDuration: 60 * time.Second,
|
||||
RenewDeadline: 15 * time.Second,
|
||||
RetryPeriod: 5 * time.Second,
|
||||
|
||||
// Copied from defaults used in OpenShift since we want the same semantics:
|
||||
// https://github.com/openshift/library-go/blob/e14e06ba8d476429b10cc6f6c0fcfe6ea4f2c591/pkg/config/leaderelection/leaderelection.go#L87-L109
|
||||
LeaseDuration: 137 * time.Second,
|
||||
RenewDeadline: 107 * time.Second,
|
||||
RetryPeriod: 26 * time.Second,
|
||||
|
||||
Callbacks: leaderelection.LeaderCallbacks{
|
||||
OnStartedLeading: func(_ context.Context) {
|
||||
plog.Debug("leader gained", "identity", identity)
|
||||
|
@ -205,7 +205,7 @@ func waitForIdentity(ctx context.Context, t *testing.T, namespace *corev1.Namesp
|
||||
}
|
||||
out = lease
|
||||
return lease.Spec.HolderIdentity != nil && identities.Has(*lease.Spec.HolderIdentity), nil
|
||||
}, 3*time.Minute, time.Second)
|
||||
}, 5*time.Minute, time.Second)
|
||||
|
||||
return out
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user