Add a new "legacy pod cleaner" controller.

This controller is responsible for cleaning up kube-cert-agent pods that were deployed by previous versions.

They are easily identified because they use a different `kube-cert-agent.pinniped.dev` label compared to the new agent pods (`true` vs. `v2`).

Signed-off-by: Matt Moyer <moyerm@vmware.com>
This commit is contained in:
Matt Moyer 2021-04-20 14:56:43 -05:00
parent 54a8297cc4
commit e532a88647
No known key found for this signature in database
GPG Key ID: EAE88AD172C5AE2D
5 changed files with 281 additions and 0 deletions

View File

@ -90,6 +90,10 @@ rules:
- apiGroups: [ "" ] - apiGroups: [ "" ]
resources: [ pods/exec ] resources: [ pods/exec ]
verbs: [ create ] verbs: [ create ]
#! We need to be able to delete pods in our namespace so we can clean up legacy kube-cert-agent pods.
- apiGroups: [ "" ]
resources: [ pods ]
verbs: [ delete ]
#! We need to be able to create and update deployments in our namespace so we can manage the kube-cert-agent Deployment. #! We need to be able to create and update deployments in our namespace so we can manage the kube-cert-agent Deployment.
- apiGroups: [ apps ] - apiGroups: [ apps ]
resources: [ deployments ] resources: [ deployments ]

View File

@ -0,0 +1,63 @@
// Copyright 2021 the Pinniped contributors. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package kubecertagent
import (
"fmt"
"github.com/go-logr/logr"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
corev1informers "k8s.io/client-go/informers/core/v1"
"k8s.io/klog/v2"
pinnipedcontroller "go.pinniped.dev/internal/controller"
"go.pinniped.dev/internal/controllerlib"
"go.pinniped.dev/internal/kubeclient"
)
// NewLegacyPodCleanerController returns a controller that cleans up legacy kube-cert-agent Pods created by Pinniped v0.7.0 and below.
func NewLegacyPodCleanerController(
cfg AgentConfig,
client *kubeclient.Client,
agentPods corev1informers.PodInformer,
log logr.Logger,
options ...controllerlib.Option,
) controllerlib.Controller {
// legacyAgentLabels are the Kubernetes labels we previously added to agent pods (the new value is "v2").
// We also expect these pods to have the "extra" labels configured on the Concierge.
legacyAgentLabels := map[string]string{"kube-cert-agent.pinniped.dev": "true"}
for k, v := range cfg.Labels {
legacyAgentLabels[k] = v
}
legacyAgentSelector := labels.SelectorFromSet(legacyAgentLabels)
log = log.WithName("legacy-pod-cleaner-controller")
return controllerlib.New(
controllerlib.Config{
Name: "legacy-pod-cleaner-controller",
Syncer: controllerlib.SyncFunc(func(ctx controllerlib.Context) error {
if err := client.Kubernetes.CoreV1().Pods(ctx.Key.Namespace).Delete(ctx.Context, ctx.Key.Name, metav1.DeleteOptions{}); err != nil {
if k8serrors.IsNotFound(err) {
return nil
}
return fmt.Errorf("could not delete legacy agent pod: %w", err)
}
log.Info("deleted legacy kube-cert-agent pod", "pod", klog.KRef(ctx.Key.Namespace, ctx.Key.Name))
return nil
}),
},
append([]controllerlib.Option{
controllerlib.WithInformer(
agentPods,
pinnipedcontroller.SimpleFilter(func(obj metav1.Object) bool {
return obj.GetNamespace() == cfg.Namespace && legacyAgentSelector.Matches(labels.Set(obj.GetLabels()))
}, nil),
controllerlib.InformerOption{},
),
}, options...)...,
)
}

View File

@ -0,0 +1,145 @@
// Copyright 2021 the Pinniped contributors. All Rights Reserved.
// SPDX-License-Identifier: Apache-2.0
package kubecertagent
import (
"context"
"fmt"
"testing"
"time"
"github.com/stretchr/testify/assert"
corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers"
kubefake "k8s.io/client-go/kubernetes/fake"
coretesting "k8s.io/client-go/testing"
"go.pinniped.dev/internal/controllerlib"
"go.pinniped.dev/internal/kubeclient"
"go.pinniped.dev/internal/testutil/testlogger"
)
func TestLegacyPodCleanerController(t *testing.T) {
t.Parallel()
legacyAgentPodWithoutExtraLabel := &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
Namespace: "concierge",
Name: "pinniped-concierge-kube-cert-agent-without-extra-label",
Labels: map[string]string{"kube-cert-agent.pinniped.dev": "true"},
},
Spec: corev1.PodSpec{},
Status: corev1.PodStatus{Phase: corev1.PodRunning},
}
legacyAgentPodWithExtraLabel := legacyAgentPodWithoutExtraLabel.DeepCopy()
legacyAgentPodWithExtraLabel.Name = "pinniped-concierge-kube-cert-agent-with-extra-label"
legacyAgentPodWithExtraLabel.Labels["extralabel"] = "labelvalue"
legacyAgentPodWithExtraLabel.Labels["anotherextralabel"] = "labelvalue"
nonLegacyAgentPod := legacyAgentPodWithExtraLabel.DeepCopy()
nonLegacyAgentPod.Name = "pinniped-concierge-kube-cert-agent-not-legacy"
nonLegacyAgentPod.Labels["kube-cert-agent.pinniped.dev"] = "v2"
tests := []struct {
name string
kubeObjects []runtime.Object
addKubeReactions func(*kubefake.Clientset)
wantDistinctErrors []string
wantDistinctLogs []string
wantActions []coretesting.Action
}{
{
name: "no pods",
wantActions: []coretesting.Action{},
},
{
name: "mix of pods",
kubeObjects: []runtime.Object{
legacyAgentPodWithoutExtraLabel, // should not be delete (missing extra label)
legacyAgentPodWithExtraLabel, // should be deleted
nonLegacyAgentPod, // should not be deleted (missing legacy agent label)
},
wantDistinctErrors: []string{""},
wantDistinctLogs: []string{
`legacy-pod-cleaner-controller "level"=0 "msg"="deleted legacy kube-cert-agent pod" "pod"={"name":"pinniped-concierge-kube-cert-agent-with-extra-label","namespace":"concierge"}`,
},
wantActions: []coretesting.Action{ // the first delete triggers the informer again, but the second invocation triggers a Not Found
coretesting.NewDeleteAction(corev1.Resource("pods").WithVersion("v1"), "concierge", legacyAgentPodWithExtraLabel.Name),
coretesting.NewDeleteAction(corev1.Resource("pods").WithVersion("v1"), "concierge", legacyAgentPodWithExtraLabel.Name),
},
},
{
name: "fail to delete",
kubeObjects: []runtime.Object{
legacyAgentPodWithoutExtraLabel, // should not be delete (missing extra label)
legacyAgentPodWithExtraLabel, // should be deleted
nonLegacyAgentPod, // should not be deleted (missing legacy agent label)
},
addKubeReactions: func(clientset *kubefake.Clientset) {
clientset.PrependReactor("delete", "*", func(action coretesting.Action) (handled bool, ret runtime.Object, err error) {
return true, nil, fmt.Errorf("some delete error")
})
},
wantDistinctErrors: []string{
"could not delete legacy agent pod: some delete error",
},
wantActions: []coretesting.Action{
coretesting.NewDeleteAction(corev1.Resource("pods").WithVersion("v1"), "concierge", legacyAgentPodWithExtraLabel.Name),
coretesting.NewDeleteAction(corev1.Resource("pods").WithVersion("v1"), "concierge", legacyAgentPodWithExtraLabel.Name),
},
},
{
name: "fail to delete because of not found error",
kubeObjects: []runtime.Object{
legacyAgentPodWithoutExtraLabel, // should not be delete (missing extra label)
legacyAgentPodWithExtraLabel, // should be deleted
nonLegacyAgentPod, // should not be deleted (missing legacy agent label)
},
addKubeReactions: func(clientset *kubefake.Clientset) {
clientset.PrependReactor("delete", "*", func(action coretesting.Action) (handled bool, ret runtime.Object, err error) {
return true, nil, k8serrors.NewNotFound(action.GetResource().GroupResource(), "")
})
},
wantDistinctErrors: []string{""},
wantActions: []coretesting.Action{
coretesting.NewDeleteAction(corev1.Resource("pods").WithVersion("v1"), "concierge", legacyAgentPodWithExtraLabel.Name),
},
},
}
for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
kubeClientset := kubefake.NewSimpleClientset(tt.kubeObjects...)
if tt.addKubeReactions != nil {
tt.addKubeReactions(kubeClientset)
}
kubeInformers := informers.NewSharedInformerFactory(kubeClientset, 0)
log := testlogger.New(t)
controller := NewLegacyPodCleanerController(
AgentConfig{
Namespace: "concierge",
Labels: map[string]string{"extralabel": "labelvalue"},
},
&kubeclient.Client{Kubernetes: kubeClientset},
kubeInformers.Core().V1().Pods(),
log,
controllerlib.WithMaxRetries(1),
)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
errorMessages := runControllerUntilQuiet(ctx, t, controller, kubeInformers)
assert.Equal(t, tt.wantDistinctErrors, deduplicate(errorMessages), "unexpected errors")
assert.Equal(t, tt.wantDistinctLogs, deduplicate(log.Lines()), "unexpected logs")
assert.Equal(t, tt.wantActions, kubeClientset.Actions()[2:], "unexpected actions")
})
}
}

View File

@ -207,6 +207,17 @@ func PrepareControllers(c *Config) (func(ctx context.Context), error) {
), ),
singletonWorker, singletonWorker,
). ).
// The kube-cert-agent legacy pod cleaner controller is responsible for cleaning up pods that were deployed by
// versions of Pinniped prior to v0.7.0. If we stop supporting upgrades from v0.7.0, we can safely remove this.
WithController(
kubecertagent.NewLegacyPodCleanerController(
agentConfig,
client,
informers.installationNamespaceK8s.Core().V1().Pods(),
klogr.New(),
),
singletonWorker,
).
// The cache filler/cleaner controllers are responsible for keep an in-memory representation of active // The cache filler/cleaner controllers are responsible for keep an in-memory representation of active
// authenticators up to date. // authenticators up to date.
WithController( WithController(

View File

@ -9,8 +9,12 @@ import (
"testing" "testing"
"time" "time"
"github.com/stretchr/testify/require"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/utils/pointer"
conciergev1alpha "go.pinniped.dev/generated/latest/apis/concierge/config/v1alpha1" conciergev1alpha "go.pinniped.dev/generated/latest/apis/concierge/config/v1alpha1"
"go.pinniped.dev/test/library" "go.pinniped.dev/test/library"
@ -88,3 +92,57 @@ func findSuccessfulStrategy(credentialIssuer *conciergev1alpha.CredentialIssuer,
} }
return nil return nil
} }
func TestLegacyPodCleaner(t *testing.T) {
env := library.IntegrationEnv(t).WithCapability(library.ClusterSigningKeyIsAvailable)
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
kubeClient := library.NewKubernetesClientset(t)
// Pick the same labels that the legacy code would have used to run the kube-cert-agent pod.
legacyAgentLabels := map[string]string{}
for k, v := range env.ConciergeCustomLabels {
legacyAgentLabels[k] = v
}
legacyAgentLabels["app"] = env.ConciergeAppName
legacyAgentLabels["kube-cert-agent.pinniped.dev"] = "true"
legacyAgentLabels["pinniped.dev/test"] = ""
// Deploy a fake legacy agent pod using those labels.
pod, err := kubeClient.CoreV1().Pods(env.ConciergeNamespace).Create(ctx, &corev1.Pod{
ObjectMeta: metav1.ObjectMeta{
GenerateName: "test-legacy-kube-cert-agent-",
Labels: legacyAgentLabels,
Annotations: map[string]string{"pinniped.dev/testName": t.Name()},
},
Spec: corev1.PodSpec{
Containers: []corev1.Container{{
Name: "sleeper",
Image: "debian:10.9-slim",
Command: []string{"/bin/sleep", "infinity"},
}},
},
}, metav1.CreateOptions{})
require.NoError(t, err, "failed to create fake legacy agent pod")
t.Logf("deployed fake legacy agent pod %s/%s with labels %s", pod.Namespace, pod.Name, labels.SelectorFromSet(legacyAgentLabels).String())
// No matter what happens, clean up the agent pod at the end of the test (normally it will already have been deleted).
t.Cleanup(func() {
ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
defer cancel()
err := kubeClient.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: pointer.Int64Ptr(0)})
if !k8serrors.IsNotFound(err) {
require.NoError(t, err, "failed to clean up fake legacy agent pod")
}
})
// Expect the legacy-pod-cleaner controller to delete the pod.
library.RequireEventuallyWithoutError(t, func() (bool, error) {
_, err := kubeClient.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{})
if k8serrors.IsNotFound(err) {
t.Logf("fake legacy agent pod %s/%s was deleted as expected", pod.Namespace, pod.Name)
return true, nil
}
return false, err
}, 60*time.Second, 1*time.Second)
}