diff --git a/deploy/concierge/rbac.yaml b/deploy/concierge/rbac.yaml index 6bd56fe0..74e5653e 100644 --- a/deploy/concierge/rbac.yaml +++ b/deploy/concierge/rbac.yaml @@ -90,6 +90,10 @@ rules: - apiGroups: [ "" ] resources: [ pods/exec ] verbs: [ create ] + #! We need to be able to delete pods in our namespace so we can clean up legacy kube-cert-agent pods. + - apiGroups: [ "" ] + resources: [ pods ] + verbs: [ delete ] #! We need to be able to create and update deployments in our namespace so we can manage the kube-cert-agent Deployment. - apiGroups: [ apps ] resources: [ deployments ] diff --git a/internal/controller/kubecertagent/legacypodcleaner.go b/internal/controller/kubecertagent/legacypodcleaner.go new file mode 100644 index 00000000..1a44477e --- /dev/null +++ b/internal/controller/kubecertagent/legacypodcleaner.go @@ -0,0 +1,63 @@ +// Copyright 2021 the Pinniped contributors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +package kubecertagent + +import ( + "fmt" + + "github.com/go-logr/logr" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + corev1informers "k8s.io/client-go/informers/core/v1" + "k8s.io/klog/v2" + + pinnipedcontroller "go.pinniped.dev/internal/controller" + "go.pinniped.dev/internal/controllerlib" + "go.pinniped.dev/internal/kubeclient" +) + +// NewLegacyPodCleanerController returns a controller that cleans up legacy kube-cert-agent Pods created by Pinniped v0.7.0 and below. +func NewLegacyPodCleanerController( + cfg AgentConfig, + client *kubeclient.Client, + agentPods corev1informers.PodInformer, + log logr.Logger, + options ...controllerlib.Option, +) controllerlib.Controller { + // legacyAgentLabels are the Kubernetes labels we previously added to agent pods (the new value is "v2"). + // We also expect these pods to have the "extra" labels configured on the Concierge. + legacyAgentLabels := map[string]string{"kube-cert-agent.pinniped.dev": "true"} + for k, v := range cfg.Labels { + legacyAgentLabels[k] = v + } + legacyAgentSelector := labels.SelectorFromSet(legacyAgentLabels) + + log = log.WithName("legacy-pod-cleaner-controller") + + return controllerlib.New( + controllerlib.Config{ + Name: "legacy-pod-cleaner-controller", + Syncer: controllerlib.SyncFunc(func(ctx controllerlib.Context) error { + if err := client.Kubernetes.CoreV1().Pods(ctx.Key.Namespace).Delete(ctx.Context, ctx.Key.Name, metav1.DeleteOptions{}); err != nil { + if k8serrors.IsNotFound(err) { + return nil + } + return fmt.Errorf("could not delete legacy agent pod: %w", err) + } + log.Info("deleted legacy kube-cert-agent pod", "pod", klog.KRef(ctx.Key.Namespace, ctx.Key.Name)) + return nil + }), + }, + append([]controllerlib.Option{ + controllerlib.WithInformer( + agentPods, + pinnipedcontroller.SimpleFilter(func(obj metav1.Object) bool { + return obj.GetNamespace() == cfg.Namespace && legacyAgentSelector.Matches(labels.Set(obj.GetLabels())) + }, nil), + controllerlib.InformerOption{}, + ), + }, options...)..., + ) +} diff --git a/internal/controller/kubecertagent/legacypodcleaner_test.go b/internal/controller/kubecertagent/legacypodcleaner_test.go new file mode 100644 index 00000000..b2b1a5e7 --- /dev/null +++ b/internal/controller/kubecertagent/legacypodcleaner_test.go @@ -0,0 +1,145 @@ +// Copyright 2021 the Pinniped contributors. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +package kubecertagent + +import ( + "context" + "fmt" + "testing" + "time" + + "github.com/stretchr/testify/assert" + corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/informers" + kubefake "k8s.io/client-go/kubernetes/fake" + coretesting "k8s.io/client-go/testing" + + "go.pinniped.dev/internal/controllerlib" + "go.pinniped.dev/internal/kubeclient" + "go.pinniped.dev/internal/testutil/testlogger" +) + +func TestLegacyPodCleanerController(t *testing.T) { + t.Parallel() + + legacyAgentPodWithoutExtraLabel := &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "concierge", + Name: "pinniped-concierge-kube-cert-agent-without-extra-label", + Labels: map[string]string{"kube-cert-agent.pinniped.dev": "true"}, + }, + Spec: corev1.PodSpec{}, + Status: corev1.PodStatus{Phase: corev1.PodRunning}, + } + + legacyAgentPodWithExtraLabel := legacyAgentPodWithoutExtraLabel.DeepCopy() + legacyAgentPodWithExtraLabel.Name = "pinniped-concierge-kube-cert-agent-with-extra-label" + legacyAgentPodWithExtraLabel.Labels["extralabel"] = "labelvalue" + legacyAgentPodWithExtraLabel.Labels["anotherextralabel"] = "labelvalue" + + nonLegacyAgentPod := legacyAgentPodWithExtraLabel.DeepCopy() + nonLegacyAgentPod.Name = "pinniped-concierge-kube-cert-agent-not-legacy" + nonLegacyAgentPod.Labels["kube-cert-agent.pinniped.dev"] = "v2" + + tests := []struct { + name string + kubeObjects []runtime.Object + addKubeReactions func(*kubefake.Clientset) + wantDistinctErrors []string + wantDistinctLogs []string + wantActions []coretesting.Action + }{ + { + name: "no pods", + wantActions: []coretesting.Action{}, + }, + { + name: "mix of pods", + kubeObjects: []runtime.Object{ + legacyAgentPodWithoutExtraLabel, // should not be delete (missing extra label) + legacyAgentPodWithExtraLabel, // should be deleted + nonLegacyAgentPod, // should not be deleted (missing legacy agent label) + }, + wantDistinctErrors: []string{""}, + wantDistinctLogs: []string{ + `legacy-pod-cleaner-controller "level"=0 "msg"="deleted legacy kube-cert-agent pod" "pod"={"name":"pinniped-concierge-kube-cert-agent-with-extra-label","namespace":"concierge"}`, + }, + wantActions: []coretesting.Action{ // the first delete triggers the informer again, but the second invocation triggers a Not Found + coretesting.NewDeleteAction(corev1.Resource("pods").WithVersion("v1"), "concierge", legacyAgentPodWithExtraLabel.Name), + coretesting.NewDeleteAction(corev1.Resource("pods").WithVersion("v1"), "concierge", legacyAgentPodWithExtraLabel.Name), + }, + }, + { + name: "fail to delete", + kubeObjects: []runtime.Object{ + legacyAgentPodWithoutExtraLabel, // should not be delete (missing extra label) + legacyAgentPodWithExtraLabel, // should be deleted + nonLegacyAgentPod, // should not be deleted (missing legacy agent label) + }, + addKubeReactions: func(clientset *kubefake.Clientset) { + clientset.PrependReactor("delete", "*", func(action coretesting.Action) (handled bool, ret runtime.Object, err error) { + return true, nil, fmt.Errorf("some delete error") + }) + }, + wantDistinctErrors: []string{ + "could not delete legacy agent pod: some delete error", + }, + wantActions: []coretesting.Action{ + coretesting.NewDeleteAction(corev1.Resource("pods").WithVersion("v1"), "concierge", legacyAgentPodWithExtraLabel.Name), + coretesting.NewDeleteAction(corev1.Resource("pods").WithVersion("v1"), "concierge", legacyAgentPodWithExtraLabel.Name), + }, + }, + { + name: "fail to delete because of not found error", + kubeObjects: []runtime.Object{ + legacyAgentPodWithoutExtraLabel, // should not be delete (missing extra label) + legacyAgentPodWithExtraLabel, // should be deleted + nonLegacyAgentPod, // should not be deleted (missing legacy agent label) + }, + addKubeReactions: func(clientset *kubefake.Clientset) { + clientset.PrependReactor("delete", "*", func(action coretesting.Action) (handled bool, ret runtime.Object, err error) { + return true, nil, k8serrors.NewNotFound(action.GetResource().GroupResource(), "") + }) + }, + wantDistinctErrors: []string{""}, + wantActions: []coretesting.Action{ + coretesting.NewDeleteAction(corev1.Resource("pods").WithVersion("v1"), "concierge", legacyAgentPodWithExtraLabel.Name), + }, + }, + } + for _, tt := range tests { + tt := tt + t.Run(tt.name, func(t *testing.T) { + t.Parallel() + + kubeClientset := kubefake.NewSimpleClientset(tt.kubeObjects...) + if tt.addKubeReactions != nil { + tt.addKubeReactions(kubeClientset) + } + kubeInformers := informers.NewSharedInformerFactory(kubeClientset, 0) + log := testlogger.New(t) + controller := NewLegacyPodCleanerController( + AgentConfig{ + Namespace: "concierge", + Labels: map[string]string{"extralabel": "labelvalue"}, + }, + &kubeclient.Client{Kubernetes: kubeClientset}, + kubeInformers.Core().V1().Pods(), + log, + controllerlib.WithMaxRetries(1), + ) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + errorMessages := runControllerUntilQuiet(ctx, t, controller, kubeInformers) + assert.Equal(t, tt.wantDistinctErrors, deduplicate(errorMessages), "unexpected errors") + assert.Equal(t, tt.wantDistinctLogs, deduplicate(log.Lines()), "unexpected logs") + assert.Equal(t, tt.wantActions, kubeClientset.Actions()[2:], "unexpected actions") + }) + } +} diff --git a/internal/controllermanager/prepare_controllers.go b/internal/controllermanager/prepare_controllers.go index 2383fabc..8990a4ec 100644 --- a/internal/controllermanager/prepare_controllers.go +++ b/internal/controllermanager/prepare_controllers.go @@ -207,6 +207,17 @@ func PrepareControllers(c *Config) (func(ctx context.Context), error) { ), singletonWorker, ). + // The kube-cert-agent legacy pod cleaner controller is responsible for cleaning up pods that were deployed by + // versions of Pinniped prior to v0.7.0. If we stop supporting upgrades from v0.7.0, we can safely remove this. + WithController( + kubecertagent.NewLegacyPodCleanerController( + agentConfig, + client, + informers.installationNamespaceK8s.Core().V1().Pods(), + klogr.New(), + ), + singletonWorker, + ). // The cache filler/cleaner controllers are responsible for keep an in-memory representation of active // authenticators up to date. WithController( diff --git a/test/integration/concierge_kubecertagent_test.go b/test/integration/concierge_kubecertagent_test.go index 8fded736..4b46ef05 100644 --- a/test/integration/concierge_kubecertagent_test.go +++ b/test/integration/concierge_kubecertagent_test.go @@ -9,8 +9,12 @@ import ( "testing" "time" + "github.com/stretchr/testify/require" corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/utils/pointer" conciergev1alpha "go.pinniped.dev/generated/latest/apis/concierge/config/v1alpha1" "go.pinniped.dev/test/library" @@ -88,3 +92,57 @@ func findSuccessfulStrategy(credentialIssuer *conciergev1alpha.CredentialIssuer, } return nil } + +func TestLegacyPodCleaner(t *testing.T) { + env := library.IntegrationEnv(t).WithCapability(library.ClusterSigningKeyIsAvailable) + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cancel() + kubeClient := library.NewKubernetesClientset(t) + + // Pick the same labels that the legacy code would have used to run the kube-cert-agent pod. + legacyAgentLabels := map[string]string{} + for k, v := range env.ConciergeCustomLabels { + legacyAgentLabels[k] = v + } + legacyAgentLabels["app"] = env.ConciergeAppName + legacyAgentLabels["kube-cert-agent.pinniped.dev"] = "true" + legacyAgentLabels["pinniped.dev/test"] = "" + + // Deploy a fake legacy agent pod using those labels. + pod, err := kubeClient.CoreV1().Pods(env.ConciergeNamespace).Create(ctx, &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "test-legacy-kube-cert-agent-", + Labels: legacyAgentLabels, + Annotations: map[string]string{"pinniped.dev/testName": t.Name()}, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{{ + Name: "sleeper", + Image: "debian:10.9-slim", + Command: []string{"/bin/sleep", "infinity"}, + }}, + }, + }, metav1.CreateOptions{}) + require.NoError(t, err, "failed to create fake legacy agent pod") + t.Logf("deployed fake legacy agent pod %s/%s with labels %s", pod.Namespace, pod.Name, labels.SelectorFromSet(legacyAgentLabels).String()) + + // No matter what happens, clean up the agent pod at the end of the test (normally it will already have been deleted). + t.Cleanup(func() { + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute) + defer cancel() + err := kubeClient.CoreV1().Pods(pod.Namespace).Delete(ctx, pod.Name, metav1.DeleteOptions{GracePeriodSeconds: pointer.Int64Ptr(0)}) + if !k8serrors.IsNotFound(err) { + require.NoError(t, err, "failed to clean up fake legacy agent pod") + } + }) + + // Expect the legacy-pod-cleaner controller to delete the pod. + library.RequireEventuallyWithoutError(t, func() (bool, error) { + _, err := kubeClient.CoreV1().Pods(pod.Namespace).Get(ctx, pod.Name, metav1.GetOptions{}) + if k8serrors.IsNotFound(err) { + t.Logf("fake legacy agent pod %s/%s was deleted as expected", pod.Namespace, pod.Name) + return true, nil + } + return false, err + }, 60*time.Second, 1*time.Second) +}