OpenShift has good defaults for these duration fields that we can
use instead of coming up with them ourselves:
e14e06ba8d/pkg/config/leaderelection/leaderelection.go (L87-L109)
Copied here for easy future reference:
// We want to be able to tolerate 60s of kube-apiserver disruption without causing pod restarts.
// We want the graceful lease re-acquisition fairly quick to avoid waits on new deployments and other rollouts.
// We want a single set of guidance for nearly every lease in openshift. If you're special, we'll let you know.
// 1. clock skew tolerance is leaseDuration-renewDeadline == 30s
// 2. kube-apiserver downtime tolerance is == 78s
// lastRetry=floor(renewDeadline/retryPeriod)*retryPeriod == 104
// downtimeTolerance = lastRetry-retryPeriod == 78s
// 3. worst non-graceful lease acquisition is leaseDuration+retryPeriod == 163s
// 4. worst graceful lease acquisition is retryPeriod == 26s
if ret.LeaseDuration.Duration == 0 {
ret.LeaseDuration.Duration = 137 * time.Second
}
if ret.RenewDeadline.Duration == 0 {
// this gives 107/26=4 retries and allows for 137-107=30 seconds of clock skew
// if the kube-apiserver is unavailable for 60s starting just before t=26 (the first renew),
// then we will retry on 26s intervals until t=104 (kube-apiserver came back up at 86), and there will
// be 33 seconds of extra time before the lease is lost.
ret.RenewDeadline.Duration = 107 * time.Second
}
if ret.RetryPeriod.Duration == 0 {
ret.RetryPeriod.Duration = 26 * time.Second
}
Signed-off-by: Monis Khan <mok@vmware.com>