ContainerImage.Pinniped/internal/leaderelection/leaderelection.go
Monis Khan c356710f1f
Add leader election middleware
Signed-off-by: Monis Khan <mok@vmware.com>
2021-08-20 12:18:25 -04:00

151 lines
5.7 KiB
Go

// Copyright 2021 the Pinniped contributors. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package leaderelection
import (
"context"
"fmt"
"time"
"go.uber.org/atomic"
appsv1 "k8s.io/api/apps/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/tools/leaderelection"
"k8s.io/client-go/tools/leaderelection/resourcelock"
"go.pinniped.dev/internal/constable"
"go.pinniped.dev/internal/downward"
"go.pinniped.dev/internal/kubeclient"
"go.pinniped.dev/internal/plog"
)
const ErrNotLeader constable.Error = "write attempt rejected as client is not leader"
// New returns a client that has a leader election middleware injected into it.
// This middleware will prevent all non-read requests to the Kubernetes API when
// the current process does not hold the leader election lock. Unlike normal
// leader election where the process blocks until it acquires the lock, this
// middleware approach lets the process run as normal for all read requests.
// Another difference is that if the process acquires the lock and then loses it
// (i.e. a failed renewal), it will not exit (i.e. restart). Instead, it will
// simply attempt to acquire the lock again.
//
// The returned function is blocking and will run the leader election polling
// logic and will coordinate lease release with the input controller starter function.
func New(podInfo *downward.PodInfo, deployment *appsv1.Deployment, opts ...kubeclient.Option) (
*kubeclient.Client,
func(context.Context, func(context.Context)),
error,
) {
internalClient, err := kubeclient.New(opts...)
if err != nil {
return nil, nil, fmt.Errorf("could not create internal client for leader election: %w", err)
}
isLeader := atomic.NewBool(false)
identity := podInfo.Name
leaseName := deployment.Name
leaderElectionConfig := leaderelection.LeaderElectionConfig{
Lock: &resourcelock.LeaseLock{
LeaseMeta: metav1.ObjectMeta{
Namespace: podInfo.Namespace,
Name: leaseName,
},
Client: internalClient.Kubernetes.CoordinationV1(),
LockConfig: resourcelock.ResourceLockConfig{
Identity: identity,
},
},
ReleaseOnCancel: true, // semantics for correct release handled by controllersWithLeaderElector below
LeaseDuration: 60 * time.Second,
RenewDeadline: 15 * time.Second,
RetryPeriod: 5 * time.Second,
Callbacks: leaderelection.LeaderCallbacks{
OnStartedLeading: func(_ context.Context) {
plog.Debug("leader gained", "identity", identity)
isLeader.Store(true)
},
OnStoppedLeading: func() {
plog.Debug("leader lost", "identity", identity)
isLeader.Store(false)
},
OnNewLeader: func(newLeader string) {
if newLeader == identity {
return
}
plog.Debug("new leader elected", "newLeader", newLeader)
},
},
Name: leaseName,
// this must be set to nil because we do not want to associate /healthz with a failed
// leader election renewal as we do not want to exit the process if the leader changes.
WatchDog: nil,
}
// validate our config here before we rely on it being functioning below
if _, err := leaderelection.NewLeaderElector(leaderElectionConfig); err != nil {
return nil, nil, fmt.Errorf("invalid config - could not create leader elector: %w", err)
}
writeOnlyWhenLeader := kubeclient.MiddlewareFunc(func(_ context.Context, rt kubeclient.RoundTrip) {
switch rt.Verb() {
case kubeclient.VerbGet, kubeclient.VerbList, kubeclient.VerbWatch:
// reads are always allowed.
// note that while our pods/exec into the kube cert agent pod is a write request from the
// perspective of the Kube API, it is semantically a read request since no mutation occurs.
// we simply use it to fill a cache, and we need all pods to have a functioning cache.
// however, we do not need to handle it here because remotecommand.NewSPDYExecutor uses a
// kubeclient.Client.JSONConfig as input. since our middleware logic is only injected into
// the generated clientset code, this JSONConfig simply ignores this middleware all together.
return
}
if isLeader.Load() { // only perform "expensive" test for writes
return // we are currently the leader, all actions are permitted
}
rt.MutateRequest(func(_ kubeclient.Object) error {
return ErrNotLeader // we are not the leader, fail the write request
})
})
leaderElectionOpts := append(
// all middleware are always executed so this being the first middleware is not relevant
[]kubeclient.Option{kubeclient.WithMiddleware(writeOnlyWhenLeader)},
opts..., // do not mutate input slice
)
client, err := kubeclient.New(leaderElectionOpts...)
if err != nil {
return nil, nil, fmt.Errorf("could not create leader election client: %w", err)
}
controllersWithLeaderElector := func(ctx context.Context, controllers func(context.Context)) {
leaderElectorCtx, leaderElectorCancel := context.WithCancel(context.Background()) // purposefully detached context
go func() {
controllers(ctx) // run the controllers with the global context, this blocks until the context is canceled
leaderElectorCancel() // once the controllers have all stopped, tell the leader elector to release the lock
}()
for { // run (and rerun on release) the leader elector with its own context (blocking)
select {
case <-leaderElectorCtx.Done():
return // keep trying to run until process exit
default:
// blocks while trying to acquire lease, unblocks on release.
// note that this creates a new leader elector on each loop to
// prevent any bugs from reusing that struct across elections.
// our config was validated above so this should never die.
leaderelection.RunOrDie(leaderElectorCtx, leaderElectionConfig)
}
}
}
return client, controllersWithLeaderElector, nil
}