diff --git a/internal/controller/supervisorstorage/garbage_collector.go b/internal/controller/supervisorstorage/garbage_collector.go index 187f913d..5025060f 100644 --- a/internal/controller/supervisorstorage/garbage_collector.go +++ b/internal/controller/supervisorstorage/garbage_collector.go @@ -121,28 +121,40 @@ func (c *garbageCollectorController) Sync(ctx controllerlib.Context) error { continue } - if garbageCollectAfterTime.Before(frozenClock.Now()) { - storageType, isSessionStorage := secret.Labels[crud.SecretLabelKey] - if isSessionStorage { - err := c.maybeRevokeUpstreamOIDCRefreshToken(ctx.Context, storageType, secret) - if err != nil { - // Log the error for debugging purposes, but still carry on to delete the Secret despite the error. - plog.WarningErr("garbage collector could not revoke upstream refresh token", err, logKV(secret)) + if !garbageCollectAfterTime.Before(frozenClock.Now()) { + // not old enough yet + continue + } + + storageType, isSessionStorage := secret.Labels[crud.SecretLabelKey] + if isSessionStorage { + err := c.maybeRevokeUpstreamOIDCRefreshToken(ctx.Context, storageType, secret) + if err != nil { + // Log the error for debugging purposes, but still carry on to delete the Secret despite the error. + plog.WarningErr("garbage collector could not revoke upstream refresh token", err, logKV(secret)) + // If the error is of a type that is worth retrying, then not delete the Secret right away. + // A future call to Sync will try revocation again for that secret. However, if the Secret is + // getting too old, then just delete it anyway. We don't want to extend the lifetime of these + // session Secrets by too much time, since the garbage collector is the only thing that is + // cleaning them out of etcd storage. + fourHoursAgo := frozenClock.Now().Add(-4 * time.Hour) + if errors.As(err, &retryableRevocationError{}) && garbageCollectAfterTime.After(fourHoursAgo) { + continue } } - - err = c.kubeClient.CoreV1().Secrets(secret.Namespace).Delete(ctx.Context, secret.Name, metav1.DeleteOptions{ - Preconditions: &metav1.Preconditions{ - UID: &secret.UID, - ResourceVersion: &secret.ResourceVersion, - }, - }) - if err != nil { - plog.WarningErr("failed to garbage collect resource", err, logKV(secret)) - continue - } - plog.Info("storage garbage collector deleted resource", logKV(secret)) } + + err = c.kubeClient.CoreV1().Secrets(secret.Namespace).Delete(ctx.Context, secret.Name, metav1.DeleteOptions{ + Preconditions: &metav1.Preconditions{ + UID: &secret.UID, + ResourceVersion: &secret.ResourceVersion, + }, + }) + if err != nil { + plog.WarningErr("failed to garbage collect resource", err, logKV(secret)) + continue + } + plog.Info("storage garbage collector deleted resource", logKV(secret)) } return nil @@ -222,6 +234,7 @@ func (c *garbageCollectorController) revokeUpstreamOIDCRefreshToken(ctx context. for _, p := range c.idpCache.GetOIDCIdentityProviders() { if p.GetName() == customSessionData.ProviderName && p.GetResourceUID() == customSessionData.ProviderUID { foundOIDCIdentityProviderI = p + break } } if foundOIDCIdentityProviderI == nil { @@ -231,13 +244,29 @@ func (c *garbageCollectorController) revokeUpstreamOIDCRefreshToken(ctx context. // Revoke the upstream refresh token. This is a noop if the upstream provider does not offer a revocation endpoint. err := foundOIDCIdentityProviderI.RevokeRefreshToken(ctx, customSessionData.OIDC.UpstreamRefreshToken) if err != nil { - return err + // This could be a network failure, a 503 result which we should retry + // (see https://datatracker.ietf.org/doc/html/rfc7009#section-2.2.1), + // or any other non-200 response from the revocation endpoint. + // Regardless of which, it is probably worth retrying. + return retryableRevocationError{wrapped: err} } plog.Trace("garbage collector successfully revoked upstream OIDC refresh token (or provider has no revocation endpoint)", logKV(secret)) return nil } +type retryableRevocationError struct { + wrapped error +} + +func (e retryableRevocationError) Error() string { + return fmt.Sprintf("retryable revocation error: %v", e.wrapped) +} + +func (e retryableRevocationError) Unwrap() error { + return e.wrapped +} + func logKV(secret *v1.Secret) []interface{} { return []interface{}{ "secretName", secret.Name, diff --git a/internal/controller/supervisorstorage/garbage_collector_test.go b/internal/controller/supervisorstorage/garbage_collector_test.go index 7e446934..042c4d38 100644 --- a/internal/controller/supervisorstorage/garbage_collector_test.go +++ b/internal/controller/supervisorstorage/garbage_collector_test.go @@ -625,7 +625,7 @@ func TestGarbageCollectorControllerSync(t *testing.T) { }) }) - when("there is a valid, expired authcode secret but the upstream revocation fails", func() { + when("there is a valid, recently expired authcode secret but the upstream revocation fails", func() { it.Before(func() { activeOIDCAuthcodeSession := &authorizationcode.Session{ Version: "2", @@ -654,7 +654,8 @@ func TestGarbageCollectorControllerSync(t *testing.T) { UID: "uid-123", ResourceVersion: "rv-123", Annotations: map[string]string{ - "storage.pinniped.dev/garbage-collect-after": frozenNow.Add(-time.Second).Format(time.RFC3339), + // expired almost 4 hours ago, but not quite 4 hours + "storage.pinniped.dev/garbage-collect-after": frozenNow.Add((-time.Hour * 4) + time.Second).Format(time.RFC3339), }, Labels: map[string]string{ "storage.pinniped.dev/type": authorizationcode.TypeLabelValue, @@ -672,7 +673,7 @@ func TestGarbageCollectorControllerSync(t *testing.T) { r.NoError(kubeClient.Tracker().Add(activeOIDCAuthcodeSessionSecret)) }) - it("should remove the secret anyway because it has expired", func() { + it("keeps the secret for a while longer so the revocation can be retried on a future sync", func() { happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder(). WithName("upstream-oidc-provider-name"). WithResourceUID("upstream-oidc-provider-uid"). @@ -691,7 +692,79 @@ func TestGarbageCollectorControllerSync(t *testing.T) { }, ) - // The authcode session secrets is still deleted because it is expired. + // The authcode session secrets is not deleted. + r.Empty(kubeClient.Actions()) + }) + }) + + when("there is a valid, long-since expired authcode secret but the upstream revocation fails", func() { + it.Before(func() { + activeOIDCAuthcodeSession := &authorizationcode.Session{ + Version: "2", + Active: true, + Request: &fosite.Request{ + ID: "request-id-1", + Client: &clientregistry.Client{}, + Session: &psession.PinnipedSession{ + Custom: &psession.CustomSessionData{ + ProviderUID: "upstream-oidc-provider-uid", + ProviderName: "upstream-oidc-provider-name", + ProviderType: psession.ProviderTypeOIDC, + OIDC: &psession.OIDCSessionData{ + UpstreamRefreshToken: "fake-upstream-refresh-token", + }, + }, + }, + }, + } + activeOIDCAuthcodeSessionJSON, err := json.Marshal(activeOIDCAuthcodeSession) + r.NoError(err) + activeOIDCAuthcodeSessionSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "activeOIDCAuthcodeSession", + Namespace: installedInNamespace, + UID: "uid-123", + ResourceVersion: "rv-123", + Annotations: map[string]string{ + // expired just over 4 hours ago + "storage.pinniped.dev/garbage-collect-after": frozenNow.Add((-time.Hour * 4) - time.Second).Format(time.RFC3339), + }, + Labels: map[string]string{ + "storage.pinniped.dev/type": authorizationcode.TypeLabelValue, + }, + }, + Data: map[string][]byte{ + "pinniped-storage-data": activeOIDCAuthcodeSessionJSON, + "pinniped-storage-version": []byte("1"), + }, + Type: "storage.pinniped.dev/" + authorizationcode.TypeLabelValue, + } + _, err = authorizationcode.ReadFromSecret(activeOIDCAuthcodeSessionSecret) + r.NoError(err, "the test author accidentally formed an invalid authcode secret") + r.NoError(kubeInformerClient.Tracker().Add(activeOIDCAuthcodeSessionSecret)) + r.NoError(kubeClient.Tracker().Add(activeOIDCAuthcodeSessionSecret)) + }) + + it("deletes the secret because it has probably been retrying revocation for hours without success", func() { + happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder(). + WithName("upstream-oidc-provider-name"). + WithResourceUID("upstream-oidc-provider-uid"). + WithRevokeRefreshTokenError(errors.New("some upstream revocation error")) // the upstream revocation will fail + idpListerBuilder := oidctestutil.NewUpstreamIDPListerBuilder().WithOIDC(happyOIDCUpstream.Build()) + + startInformersAndController(idpListerBuilder.Build()) + r.NoError(controllerlib.TestSync(t, subject, *syncContext)) + + // Tried to revoke it, although this revocation will fail. + idpListerBuilder.RequireExactlyOneCallToRevokeRefreshToken(t, + "upstream-oidc-provider-name", + &oidctestutil.RevokeRefreshTokenArgs{ + Ctx: syncContext.Context, + RefreshToken: "fake-upstream-refresh-token", + }, + ) + + // The authcode session secrets is deleted. r.ElementsMatch( []kubetesting.Action{ kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "activeOIDCAuthcodeSession"),