kubecertagent: fix flakey tests

This commit makes the following changes to the kube cert agent tests:

1. Informers are synced on start using the controllerinit code
2. Deployment client and informer are synced per controller sync loop
3. Controller sync loop exits after two consistent errors
4. Use assert instead of require to avoid ending the test early

Signed-off-by: Monis Khan <mok@vmware.com>
This commit is contained in:
Monis Khan 2021-09-16 10:03:43 -04:00
parent 95b9782549
commit 09467d3e24
No known key found for this signature in database
GPG Key ID: 52C90ADA01B269B8
5 changed files with 118 additions and 83 deletions

View File

@ -184,7 +184,6 @@ func newAgentController(
clock clock.Clock, clock clock.Clock,
execCache *cache.Expiring, execCache *cache.Expiring,
log logr.Logger, log logr.Logger,
options ...controllerlib.Option,
) controllerlib.Controller { ) controllerlib.Controller {
return controllerlib.New( return controllerlib.New(
controllerlib.Config{ controllerlib.Config{
@ -204,47 +203,45 @@ func newAgentController(
execCache: execCache, execCache: execCache,
}, },
}, },
append([]controllerlib.Option{ controllerlib.WithInformer(
controllerlib.WithInformer( kubeSystemPods,
kubeSystemPods, pinnipedcontroller.SimpleFilterWithSingletonQueue(func(obj metav1.Object) bool {
pinnipedcontroller.SimpleFilterWithSingletonQueue(func(obj metav1.Object) bool { return controllerManagerLabels.Matches(labels.Set(obj.GetLabels()))
return controllerManagerLabels.Matches(labels.Set(obj.GetLabels())) }),
}), controllerlib.InformerOption{},
controllerlib.InformerOption{}, ),
), controllerlib.WithInformer(
controllerlib.WithInformer( agentDeployments,
agentDeployments, pinnipedcontroller.SimpleFilterWithSingletonQueue(func(obj metav1.Object) bool {
pinnipedcontroller.SimpleFilterWithSingletonQueue(func(obj metav1.Object) bool { return obj.GetNamespace() == cfg.Namespace && obj.GetName() == cfg.deploymentName()
return obj.GetNamespace() == cfg.Namespace && obj.GetName() == cfg.deploymentName() }),
}), controllerlib.InformerOption{},
controllerlib.InformerOption{}, ),
), controllerlib.WithInformer(
controllerlib.WithInformer( agentPods,
agentPods, pinnipedcontroller.SimpleFilterWithSingletonQueue(func(obj metav1.Object) bool {
pinnipedcontroller.SimpleFilterWithSingletonQueue(func(obj metav1.Object) bool { return agentLabels.Matches(labels.Set(obj.GetLabels()))
return agentLabels.Matches(labels.Set(obj.GetLabels())) }),
}), controllerlib.InformerOption{},
controllerlib.InformerOption{}, ),
), controllerlib.WithInformer(
controllerlib.WithInformer( kubePublicConfigMaps,
kubePublicConfigMaps, pinnipedcontroller.SimpleFilterWithSingletonQueue(func(obj metav1.Object) bool {
pinnipedcontroller.SimpleFilterWithSingletonQueue(func(obj metav1.Object) bool { return obj.GetNamespace() == ClusterInfoNamespace && obj.GetName() == clusterInfoName
return obj.GetNamespace() == ClusterInfoNamespace && obj.GetName() == clusterInfoName }),
}), controllerlib.InformerOption{},
controllerlib.InformerOption{}, ),
), controllerlib.WithInformer(
controllerlib.WithInformer( credentialIssuers,
credentialIssuers, pinnipedcontroller.SimpleFilterWithSingletonQueue(func(obj metav1.Object) bool {
pinnipedcontroller.SimpleFilterWithSingletonQueue(func(obj metav1.Object) bool { return obj.GetName() == cfg.CredentialIssuerName
return obj.GetName() == cfg.CredentialIssuerName }),
}), controllerlib.InformerOption{},
controllerlib.InformerOption{}, ),
), // Be sure to run once even to make sure the CredentialIssuer is updated if there are no controller manager
// Be sure to run once even to make sure the CredentialIssuer is updated if there are no controller manager // pods. We should be able to pass an empty key since we don't use the key in the sync (we sync
// pods. We should be able to pass an empty key since we don't use the key in the sync (we sync // the world).
// the world). controllerlib.WithInitialEvent(controllerlib.Key{}),
controllerlib.WithInitialEvent(controllerlib.Key{}),
}, options...)...,
) )
} }

View File

@ -14,6 +14,7 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
appsv1 "k8s.io/api/apps/v1" appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1" corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource" "k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime"
@ -21,6 +22,7 @@ import (
"k8s.io/apimachinery/pkg/util/cache" "k8s.io/apimachinery/pkg/util/cache"
"k8s.io/apimachinery/pkg/util/clock" "k8s.io/apimachinery/pkg/util/clock"
"k8s.io/client-go/informers" "k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes"
kubefake "k8s.io/client-go/kubernetes/fake" kubefake "k8s.io/client-go/kubernetes/fake"
coretesting "k8s.io/client-go/testing" coretesting "k8s.io/client-go/testing"
"k8s.io/utils/pointer" "k8s.io/utils/pointer"
@ -29,11 +31,13 @@ import (
conciergefake "go.pinniped.dev/generated/latest/client/concierge/clientset/versioned/fake" conciergefake "go.pinniped.dev/generated/latest/client/concierge/clientset/versioned/fake"
conciergeinformers "go.pinniped.dev/generated/latest/client/concierge/informers/externalversions" conciergeinformers "go.pinniped.dev/generated/latest/client/concierge/informers/externalversions"
"go.pinniped.dev/internal/controller/kubecertagent/mocks" "go.pinniped.dev/internal/controller/kubecertagent/mocks"
"go.pinniped.dev/internal/controllerinit"
"go.pinniped.dev/internal/controllerlib" "go.pinniped.dev/internal/controllerlib"
"go.pinniped.dev/internal/here" "go.pinniped.dev/internal/here"
"go.pinniped.dev/internal/kubeclient" "go.pinniped.dev/internal/kubeclient"
"go.pinniped.dev/internal/testutil" "go.pinniped.dev/internal/testutil"
"go.pinniped.dev/internal/testutil/testlogger" "go.pinniped.dev/internal/testutil/testlogger"
"go.pinniped.dev/test/testlib"
) )
func TestAgentController(t *testing.T) { func TestAgentController(t *testing.T) {
@ -496,12 +500,10 @@ func TestAgentController(t *testing.T) {
}, },
wantAgentDeployment: healthyAgentDeploymentWithOldStyleSelector, // couldn't be deleted, so it didn't change wantAgentDeployment: healthyAgentDeploymentWithOldStyleSelector, // couldn't be deleted, so it didn't change
// delete to try to recreate deployment when Selector field changes, but delete always fails, so keeps trying to delete // delete to try to recreate deployment when Selector field changes, but delete always fails, so keeps trying to delete
wantDeploymentActionVerbs: []string{"list", "watch", "delete", "delete", "delete", "delete"}, wantDeploymentActionVerbs: []string{"list", "watch", "delete", "delete"},
wantDeploymentDeleteActionOpts: []metav1.DeleteOptions{ wantDeploymentDeleteActionOpts: []metav1.DeleteOptions{
testutil.NewPreconditions(healthyAgentDeploymentWithOldStyleSelector.UID, healthyAgentDeploymentWithOldStyleSelector.ResourceVersion), testutil.NewPreconditions(healthyAgentDeploymentWithOldStyleSelector.UID, healthyAgentDeploymentWithOldStyleSelector.ResourceVersion),
testutil.NewPreconditions(healthyAgentDeploymentWithOldStyleSelector.UID, healthyAgentDeploymentWithOldStyleSelector.ResourceVersion), testutil.NewPreconditions(healthyAgentDeploymentWithOldStyleSelector.UID, healthyAgentDeploymentWithOldStyleSelector.ResourceVersion),
testutil.NewPreconditions(healthyAgentDeploymentWithOldStyleSelector.UID, healthyAgentDeploymentWithOldStyleSelector.ResourceVersion),
testutil.NewPreconditions(healthyAgentDeploymentWithOldStyleSelector.UID, healthyAgentDeploymentWithOldStyleSelector.ResourceVersion),
}, },
wantStrategy: &configv1alpha1.CredentialIssuerStrategy{ wantStrategy: &configv1alpha1.CredentialIssuerStrategy{
Type: configv1alpha1.KubeClusterSigningCertificateStrategyType, Type: configv1alpha1.KubeClusterSigningCertificateStrategyType,
@ -536,7 +538,7 @@ func TestAgentController(t *testing.T) {
}, },
wantAgentDeployment: nil, // was deleted, but couldn't be recreated wantAgentDeployment: nil, // was deleted, but couldn't be recreated
// delete to try to recreate deployment when Selector field changes, but create always fails, so keeps trying to recreate // delete to try to recreate deployment when Selector field changes, but create always fails, so keeps trying to recreate
wantDeploymentActionVerbs: []string{"list", "watch", "delete", "create", "create", "create", "create"}, wantDeploymentActionVerbs: []string{"list", "watch", "delete", "create", "create"},
wantDeploymentDeleteActionOpts: []metav1.DeleteOptions{ wantDeploymentDeleteActionOpts: []metav1.DeleteOptions{
testutil.NewPreconditions(healthyAgentDeploymentWithOldStyleSelector.UID, healthyAgentDeploymentWithOldStyleSelector.ResourceVersion), testutil.NewPreconditions(healthyAgentDeploymentWithOldStyleSelector.UID, healthyAgentDeploymentWithOldStyleSelector.ResourceVersion),
}, },
@ -1029,53 +1031,54 @@ func TestAgentController(t *testing.T) {
fakeClock, fakeClock,
execCache, execCache,
log, log,
controllerlib.WithMaxRetries(1),
) )
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel() defer cancel()
errorMessages := runControllerUntilQuiet(ctx, t, controller, kubeInformers, conciergeInformers) errorMessages := runControllerUntilQuiet(ctx, t, controller, hasDeploymentSynced(trackDeleteKubeClient, kubeInformers), kubeInformers, conciergeInformers)
actualErrors := deduplicate(errorMessages) actualErrors := deduplicate(errorMessages)
require.Subsetf(t, actualErrors, tt.wantDistinctErrors, "required error(s) were not found in the actual errors") assert.Subsetf(t, actualErrors, tt.wantDistinctErrors, "required error(s) were not found in the actual errors")
allAllowedErrors := append([]string{}, tt.wantDistinctErrors...) allAllowedErrors := append([]string{}, tt.wantDistinctErrors...)
allAllowedErrors = append(allAllowedErrors, tt.alsoAllowUndesiredDistinctErrors...) allAllowedErrors = append(allAllowedErrors, tt.alsoAllowUndesiredDistinctErrors...)
require.Subsetf(t, allAllowedErrors, actualErrors, "actual errors contained additional error(s) which is not expected by the test") assert.Subsetf(t, allAllowedErrors, actualErrors, "actual errors contained additional error(s) which is not expected by the test")
assert.Equal(t, tt.wantDistinctLogs, deduplicate(log.Lines()), "unexpected logs") assert.Equal(t, tt.wantDistinctLogs, deduplicate(log.Lines()), "unexpected logs")
// Assert on all actions that happened to deployments. // Assert on all actions that happened to deployments.
var actualDeploymentActionVerbs []string var actualDeploymentActionVerbs []string
for _, a := range kubeClientset.Actions() { for _, a := range kubeClientset.Actions() {
if a.GetResource().Resource == "deployments" { if a.GetResource().Resource == "deployments" && a.GetVerb() != "get" { // ignore gets caused by hasDeploymentSynced
actualDeploymentActionVerbs = append(actualDeploymentActionVerbs, a.GetVerb()) actualDeploymentActionVerbs = append(actualDeploymentActionVerbs, a.GetVerb())
} }
} }
if tt.wantDeploymentActionVerbs != nil { if tt.wantDeploymentActionVerbs != nil {
require.Equal(t, tt.wantDeploymentActionVerbs, actualDeploymentActionVerbs) assert.Equal(t, tt.wantDeploymentActionVerbs, actualDeploymentActionVerbs)
} }
if tt.wantDeploymentDeleteActionOpts != nil { if tt.wantDeploymentDeleteActionOpts != nil {
require.Equal(t, tt.wantDeploymentDeleteActionOpts, *actualDeleteActionOpts) assert.Equal(t, tt.wantDeploymentDeleteActionOpts, *actualDeleteActionOpts)
} }
// Assert that the agent deployment is in the expected final state. // Assert that the agent deployment is in the expected final state.
deployments, err := kubeClientset.AppsV1().Deployments("concierge").List(ctx, metav1.ListOptions{}) deployments, err := kubeClientset.AppsV1().Deployments("concierge").List(ctx, metav1.ListOptions{})
require.NoError(t, err) require.NoError(t, err)
if tt.wantAgentDeployment == nil { if tt.wantAgentDeployment == nil {
require.Empty(t, deployments.Items, "did not expect an agent deployment") assert.Empty(t, deployments.Items, "did not expect an agent deployment")
} else { } else { //nolint: gocritic
require.Len(t, deployments.Items, 1, "expected a single agent deployment") if assert.Len(t, deployments.Items, 1, "expected a single agent deployment") {
require.Equal(t, tt.wantAgentDeployment, &deployments.Items[0]) assert.Equal(t, tt.wantAgentDeployment, &deployments.Items[0])
}
} }
// Assert that the CredentialIssuer is in the expected final state // Assert that the CredentialIssuer is in the expected final state
if tt.wantStrategy != nil { if tt.wantStrategy != nil {
credIssuer, err := conciergeClientset.ConfigV1alpha1().CredentialIssuers().Get(ctx, initialCredentialIssuer.Name, metav1.GetOptions{}) credIssuer, err := conciergeClientset.ConfigV1alpha1().CredentialIssuers().Get(ctx, initialCredentialIssuer.Name, metav1.GetOptions{})
require.NoError(t, err) ok := assert.NoError(t, err)
require.Len(t, credIssuer.Status.Strategies, 1, "expected a single strategy in the CredentialIssuer") if ok && assert.Len(t, credIssuer.Status.Strategies, 1, "expected a single strategy in the CredentialIssuer") {
require.Equal(t, tt.wantStrategy, &credIssuer.Status.Strategies[0]) assert.Equal(t, tt.wantStrategy, &credIssuer.Status.Strategies[0])
}
} }
}) })
} }
@ -1156,24 +1159,43 @@ func deduplicate(strings []string) []string {
return result return result
} }
func runControllerUntilQuiet(ctx context.Context, t *testing.T, controller controllerlib.Controller, informers ...interface{ Start(<-chan struct{}) }) []string { func runControllerUntilQuiet(ctx context.Context, t *testing.T, controller controllerlib.Controller, synced func(ctx context.Context, t *testing.T), informers ...controllerinit.Informer) []string {
ctx, cancel := context.WithCancel(ctx) ctx, cancel := context.WithCancel(ctx)
defer cancel() defer cancel()
errorStream := make(chan error) var syncErrs []error // track the errors we see during each iteration
errorStream := make(chan error, 100) // do not block the controller loop
controllerlib.TestWrap(t, controller, func(syncer controllerlib.Syncer) controllerlib.Syncer { controllerlib.TestWrap(t, controller, func(syncer controllerlib.Syncer) controllerlib.Syncer {
return controllerlib.SyncFunc(func(ctx controllerlib.Context) error { return controllerlib.SyncFunc(func(ctx controllerlib.Context) error {
synced(ctx.Context, t) // make sure that our informer has caught up with our client
// if we got the same error twice in a row, prevent the controller sync loop from running
if len(syncErrs) >= 2 {
lastErr := syncErrs[len(syncErrs)-1]
secondToLastErr := syncErrs[len(syncErrs)-2]
if lastErr != nil && secondToLastErr != nil && lastErr.Error() == secondToLastErr.Error() {
cancel() // not explicitly required but matches our intent
return nil
}
}
err := syncer.Sync(ctx) err := syncer.Sync(ctx)
errorStream <- err errorStream <- err
syncErrs = append(syncErrs, err)
return err return err
}) })
}) })
for _, informer := range informers { // start and sync the informers before running the controller
informer.Start(ctx.Done()) runController, err := controllerinit.Prepare(
} func(ctx context.Context) { controller.Run(ctx, 1) },
func(ctx context.Context, runner controllerinit.Runner) { runner(ctx) },
go controller.Run(ctx, 1) informers...,
)(ctx)
require.NoError(t, err)
go runController(ctx)
// Wait until the controller is quiet for two seconds. // Wait until the controller is quiet for two seconds.
var errorMessages []string var errorMessages []string
@ -1192,3 +1214,24 @@ done:
} }
return errorMessages return errorMessages
} }
func hasDeploymentSynced(client kubernetes.Interface, kubeInformers informers.SharedInformerFactory) func(ctx context.Context, t *testing.T) {
return func(ctx context.Context, t *testing.T) {
testlib.RequireEventually(t, func(requireEventually *require.Assertions) {
realDep, realErr := client.AppsV1().Deployments("concierge").
Get(ctx, "pinniped-concierge-kube-cert-agent", metav1.GetOptions{})
cachedDep, cachedErr := kubeInformers.Apps().V1().Deployments().Lister().Deployments("concierge").
Get("pinniped-concierge-kube-cert-agent")
if errors.IsNotFound(realErr) && errors.IsNotFound(cachedErr) {
return
}
requireEventually.NoError(realErr)
requireEventually.NoError(cachedErr)
requireEventually.Equal(realDep, cachedDep)
}, 2*time.Second, 100*time.Millisecond)
}
}

View File

@ -24,7 +24,6 @@ func NewLegacyPodCleanerController(
client *kubeclient.Client, client *kubeclient.Client,
agentPods corev1informers.PodInformer, agentPods corev1informers.PodInformer,
log logr.Logger, log logr.Logger,
options ...controllerlib.Option,
) controllerlib.Controller { ) controllerlib.Controller {
// legacyAgentLabels are the Kubernetes labels we previously added to agent pods (the new value is "v2"). // legacyAgentLabels are the Kubernetes labels we previously added to agent pods (the new value is "v2").
// We also expect these pods to have the "extra" labels configured on the Concierge. // We also expect these pods to have the "extra" labels configured on the Concierge.
@ -67,14 +66,12 @@ func NewLegacyPodCleanerController(
return nil return nil
}), }),
}, },
append([]controllerlib.Option{ controllerlib.WithInformer(
controllerlib.WithInformer( agentPods,
agentPods, pinnipedcontroller.SimpleFilter(func(obj metav1.Object) bool {
pinnipedcontroller.SimpleFilter(func(obj metav1.Object) bool { return obj.GetNamespace() == cfg.Namespace && legacyAgentSelector.Matches(labels.Set(obj.GetLabels()))
return obj.GetNamespace() == cfg.Namespace && legacyAgentSelector.Matches(labels.Set(obj.GetLabels())) }, nil),
}, nil), controllerlib.InformerOption{},
controllerlib.InformerOption{}, ),
),
}, options...)...,
) )
} }

View File

@ -18,7 +18,6 @@ import (
kubefake "k8s.io/client-go/kubernetes/fake" kubefake "k8s.io/client-go/kubernetes/fake"
coretesting "k8s.io/client-go/testing" coretesting "k8s.io/client-go/testing"
"go.pinniped.dev/internal/controllerlib"
"go.pinniped.dev/internal/kubeclient" "go.pinniped.dev/internal/kubeclient"
"go.pinniped.dev/internal/testutil" "go.pinniped.dev/internal/testutil"
"go.pinniped.dev/internal/testutil/testlogger" "go.pinniped.dev/internal/testutil/testlogger"
@ -175,13 +174,12 @@ func TestLegacyPodCleanerController(t *testing.T) {
&kubeclient.Client{Kubernetes: trackDeleteClient}, &kubeclient.Client{Kubernetes: trackDeleteClient},
kubeInformers.Core().V1().Pods(), kubeInformers.Core().V1().Pods(),
log, log,
controllerlib.WithMaxRetries(1),
) )
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel() defer cancel()
errorMessages := runControllerUntilQuiet(ctx, t, controller, kubeInformers) errorMessages := runControllerUntilQuiet(ctx, t, controller, func(_ context.Context, _ *testing.T) {}, kubeInformers)
assert.Equal(t, tt.wantDistinctErrors, deduplicate(errorMessages), "unexpected errors") assert.Equal(t, tt.wantDistinctErrors, deduplicate(errorMessages), "unexpected errors")
assert.Equal(t, tt.wantDistinctLogs, deduplicate(log.Lines()), "unexpected logs") assert.Equal(t, tt.wantDistinctLogs, deduplicate(log.Lines()), "unexpected logs")
assert.Equal(t, tt.wantActions, kubeClientset.Actions()[2:], "unexpected actions") assert.Equal(t, tt.wantActions, kubeClientset.Actions()[2:], "unexpected actions")

View File

@ -22,8 +22,8 @@ type RunnerWrapper func(context.Context, Runner)
// It is expected to be called in the main go routine since the construction can fail. // It is expected to be called in the main go routine since the construction can fail.
type RunnerBuilder func(context.Context) (Runner, error) type RunnerBuilder func(context.Context) (Runner, error)
// informer is the subset of SharedInformerFactory needed for starting an informer cache and waiting for it to sync. // Informer is the subset of SharedInformerFactory needed for starting an informer cache and waiting for it to sync.
type informer interface { type Informer interface {
Start(stopCh <-chan struct{}) Start(stopCh <-chan struct{})
WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool WaitForCacheSync(stopCh <-chan struct{}) map[reflect.Type]bool
} }
@ -31,7 +31,7 @@ type informer interface {
// Prepare returns RunnerBuilder that, when called: // Prepare returns RunnerBuilder that, when called:
// 1. Starts all provided informers and waits for them sync (and fails if they hang) // 1. Starts all provided informers and waits for them sync (and fails if they hang)
// 2. Returns a Runner that combines the Runner and RunnerWrapper passed into Prepare // 2. Returns a Runner that combines the Runner and RunnerWrapper passed into Prepare
func Prepare(controllers Runner, controllersWrapper RunnerWrapper, informers ...informer) RunnerBuilder { func Prepare(controllers Runner, controllersWrapper RunnerWrapper, informers ...Informer) RunnerBuilder {
return func(ctx context.Context) (Runner, error) { return func(ctx context.Context) (Runner, error) {
for _, informer := range informers { for _, informer := range informers {
informer := informer informer := informer