diff --git a/internal/controller/supervisorstorage/garbage_collector.go b/internal/controller/supervisorstorage/garbage_collector.go index 5f388b80..fb48ac4e 100644 --- a/internal/controller/supervisorstorage/garbage_collector.go +++ b/internal/controller/supervisorstorage/garbage_collector.go @@ -112,25 +112,30 @@ func (c *garbageCollectorController) Sync(ctx controllerlib.Context) error { timeString, ok := secret.Annotations[crud.SecretLifetimeAnnotationKey] if !ok { + // Secret did not request garbage collection via annotations, so skip deletion. continue } garbageCollectAfterTime, err := time.Parse(crud.SecretLifetimeAnnotationDateFormat, timeString) if err != nil { plog.WarningErr("could not parse resource timestamp for garbage collection", err, logKV(secret)...) + // Can't tell if the Secret has expired or not, so skip deletion. continue } if !garbageCollectAfterTime.Before(frozenClock.Now()) { - // not old enough yet + // Secret is not old enough yet, so skip deletion. continue } + // The Secret has expired. Check if it is a downstream session storage Secret, which may require extra processing. storageType, isSessionStorage := secret.Labels[crud.SecretLabelKey] if isSessionStorage { - err := c.maybeRevokeUpstreamOIDCRefreshToken(ctx.Context, storageType, secret) - if err != nil { - plog.WarningErr("garbage collector could not revoke upstream refresh token", err, logKV(secret)...) + revokeErr := c.maybeRevokeUpstreamOIDCToken(ctx.Context, storageType, secret) + if revokeErr != nil { + plog.WarningErr("garbage collector could not revoke upstream OIDC token", revokeErr, logKV(secret)...) + // Note that RevokeToken (called by the private helper) might have returned an error of type + // provider.RetryableRevocationError, in which case we would like to retry the revocation later. // If the error is of a type that is worth retrying, then do not delete the Secret right away. // A future call to Sync will try revocation again for that secret. However, if the Secret is // getting too old, then just delete it anyway. We don't want to extend the lifetime of these @@ -138,13 +143,15 @@ func (c *garbageCollectorController) Sync(ctx controllerlib.Context) error { // cleaning them out of etcd storage. fourHoursAgo := frozenClock.Now().Add(-4 * time.Hour) nowIsLessThanFourHoursBeyondSecretGCTime := garbageCollectAfterTime.After(fourHoursAgo) - if errors.As(err, &retryableRevocationError{}) && nowIsLessThanFourHoursBeyondSecretGCTime { + if errors.As(revokeErr, &provider.RetryableRevocationError{}) && nowIsLessThanFourHoursBeyondSecretGCTime { // Hasn't been very long since secret expired, so skip deletion to try revocation again later. + plog.Trace("garbage collector keeping Secret to retry upstream OIDC token revocation later", logKV(secret)...) continue } } } + // Garbage collect the Secret. err = c.kubeClient.CoreV1().Secrets(secret.Namespace).Delete(ctx.Context, secret.Name, metav1.DeleteOptions{ Preconditions: &metav1.Preconditions{ UID: &secret.UID, @@ -161,30 +168,36 @@ func (c *garbageCollectorController) Sync(ctx controllerlib.Context) error { return nil } -func (c *garbageCollectorController) maybeRevokeUpstreamOIDCRefreshToken(ctx context.Context, storageType string, secret *v1.Secret) error { - // All session storage types hold upstream refresh tokens when the upstream IDP is an OIDC provider. +func (c *garbageCollectorController) maybeRevokeUpstreamOIDCToken(ctx context.Context, storageType string, secret *v1.Secret) error { + // All downstream session storage types hold upstream tokens when the upstream IDP is an OIDC provider. // However, some of them will be outdated because they are not updated by fosite after creation. // Our goal below is to always revoke the latest upstream refresh token that we are holding for the - // session, and only the latest. + // session, and only the latest, or to revoke the original upstream access token. Note that we don't + // bother to store new upstream access tokens seen during upstream refresh because we only need to store + // the upstream access token when we intend to use it *instead* of an upstream refresh token. + // This implies that all the storage types will contain a copy of the original upstream access token, + // since it is never updated in the session. Thus, we can use the same logic to decide which upstream + // access token to revoke as we use for upstream refresh tokens, which allows us to avoid revoking an + // upstream access token more than once. switch storageType { case authorizationcode.TypeLabelValue: authorizeCodeSession, err := authorizationcode.ReadFromSecret(secret) if err != nil { return err } - // Check if this downstream authcode was already used. If it was already used (i.e. not active anymore), then - // the latest upstream refresh token can be found in one of the other storage types handled below instead. + // Check if this downstream authcode was already used. If it was already used (i.e. not active anymore), + // then the latest upstream token can be found in one of the other storage types handled below instead. if !authorizeCodeSession.Active { return nil } - // When the downstream authcode was never used, then its storage must contain the latest upstream refresh token. - return c.revokeUpstreamOIDCRefreshToken(ctx, authorizeCodeSession.Request.Session.(*psession.PinnipedSession).Custom, secret) + // When the downstream authcode was never used, then its storage must contain the latest upstream token. + return c.tryRevokeUpstreamOIDCToken(ctx, authorizeCodeSession.Request.Session.(*psession.PinnipedSession).Custom, secret) case accesstoken.TypeLabelValue: // For access token storage, check if the "offline_access" scope was granted on the downstream session. - // If it was granted, then the latest upstream refresh token should be found in the refresh token storage instead. + // If it was granted, then the latest upstream token should be found in the refresh token storage instead. // If it was not granted, then the user could not possibly have performed a downstream refresh, so the - // access token storage has the latest version of the upstream refresh token. + // access token storage has the latest version of the upstream token. accessTokenSession, err := accesstoken.ReadFromSecret(secret) if err != nil { return err @@ -193,29 +206,29 @@ func (c *garbageCollectorController) maybeRevokeUpstreamOIDCRefreshToken(ctx con if slices.Contains(accessTokenSession.Request.GetGrantedScopes(), coreosoidc.ScopeOfflineAccess) { return nil } - return c.revokeUpstreamOIDCRefreshToken(ctx, pinnipedSession.Custom, secret) + return c.tryRevokeUpstreamOIDCToken(ctx, pinnipedSession.Custom, secret) case refreshtoken.TypeLabelValue: - // For refresh token storage, always revoke its upstream refresh token. This refresh token storage could - // be the result of the initial downstream authcode exchange, or it could be the result of a downstream - // refresh. Either way, it always contains the latest upstream refresh token when it exists. + // For refresh token storage, always revoke its upstream token. This refresh token storage could be + // the result of the initial downstream authcode exchange, or it could be the result of a downstream + // refresh. Either way, it always contains the latest upstream token when it exists. refreshTokenSession, err := refreshtoken.ReadFromSecret(secret) if err != nil { return err } - return c.revokeUpstreamOIDCRefreshToken(ctx, refreshTokenSession.Request.Session.(*psession.PinnipedSession).Custom, secret) + return c.tryRevokeUpstreamOIDCToken(ctx, refreshTokenSession.Request.Session.(*psession.PinnipedSession).Custom, secret) case pkce.TypeLabelValue: - // For PKCE storage, its very existence means that the authcode was never exchanged, because these - // are deleted during authcode exchange. No need to do anything, since the upstream refresh token - // revocation is handled by authcode storage case above. + // For PKCE storage, its very existence means that the downstream authcode was never exchanged, because + // these are deleted during downstream authcode exchange. No need to do anything, since the upstream + // token revocation is handled by authcode storage case above. return nil case openidconnect.TypeLabelValue: // For OIDC storage, there is no need to do anything for reasons similar to the PKCE storage. - // These are not deleted during authcode exchange, probably due to a bug in fosite, even though it - // will never be read or updated again. However, the refresh token contained inside will be revoked - // by one of the other cases above. + // These are not deleted during downstream authcode exchange, probably due to a bug in fosite, even + // though it will never be read or updated again. However, the upstream token contained inside will + // be revoked by one of the other cases above. return nil default: @@ -224,8 +237,8 @@ func (c *garbageCollectorController) maybeRevokeUpstreamOIDCRefreshToken(ctx con } } -func (c *garbageCollectorController) revokeUpstreamOIDCRefreshToken(ctx context.Context, customSessionData *psession.CustomSessionData, secret *v1.Secret) error { - // When session was for another upstream IDP type, e.g. LDAP, there is no upstream OIDC refresh token involved. +func (c *garbageCollectorController) tryRevokeUpstreamOIDCToken(ctx context.Context, customSessionData *psession.CustomSessionData, secret *v1.Secret) error { + // When session was for another upstream IDP type, e.g. LDAP, there is no upstream OIDC token involved. if customSessionData.ProviderType != psession.ProviderTypeOIDC { return nil } @@ -242,32 +255,29 @@ func (c *garbageCollectorController) revokeUpstreamOIDCRefreshToken(ctx context. return fmt.Errorf("could not find upstream OIDC provider named %q with resource UID %q", customSessionData.ProviderName, customSessionData.ProviderUID) } - // Revoke the upstream refresh token. This is a noop if the upstream provider does not offer a revocation endpoint. - err := foundOIDCIdentityProviderI.RevokeToken(ctx, customSessionData.OIDC.UpstreamRefreshToken, provider.RefreshTokenType) - if err != nil { - // This could be a network failure, a 503 result which we should retry - // (see https://datatracker.ietf.org/doc/html/rfc7009#section-2.2.1), - // or any other non-200 response from the revocation endpoint. - // Regardless of which, it is probably worth retrying. - return retryableRevocationError{wrapped: err} + // In practice, there should only be one of these tokens saved in the session. + upstreamRefreshToken := customSessionData.OIDC.UpstreamRefreshToken + upstreamAccessToken := customSessionData.OIDC.UpstreamAccessToken + + if upstreamRefreshToken != "" { + err := foundOIDCIdentityProviderI.RevokeToken(ctx, upstreamRefreshToken, provider.RefreshTokenType) + if err != nil { + return err + } + plog.Trace("garbage collector successfully revoked upstream OIDC refresh token (or provider has no revocation endpoint)", logKV(secret)...) + } + + if upstreamAccessToken != "" { + err := foundOIDCIdentityProviderI.RevokeToken(ctx, upstreamAccessToken, provider.AccessTokenType) + if err != nil { + return err + } + plog.Trace("garbage collector successfully revoked upstream OIDC access token (or provider has no revocation endpoint)", logKV(secret)...) } - plog.Trace("garbage collector successfully revoked upstream OIDC refresh token (or provider has no revocation endpoint)", logKV(secret)...) return nil } -type retryableRevocationError struct { - wrapped error -} - -func (e retryableRevocationError) Error() string { - return fmt.Sprintf("retryable revocation error: %v", e.wrapped) -} - -func (e retryableRevocationError) Unwrap() error { - return e.wrapped -} - func logKV(secret *v1.Secret) []interface{} { return []interface{}{ "secretName", secret.Name, diff --git a/internal/controller/supervisorstorage/garbage_collector_test.go b/internal/controller/supervisorstorage/garbage_collector_test.go index d442c474..84f293c6 100644 --- a/internal/controller/supervisorstorage/garbage_collector_test.go +++ b/internal/controller/supervisorstorage/garbage_collector_test.go @@ -271,7 +271,7 @@ func TestGarbageCollectorControllerSync(t *testing.T) { }) }) - when("there are valid, expired authcode secrets", func() { + when("there are valid, expired authcode secrets which contain upstream refresh tokens", func() { it.Before(func() { activeOIDCAuthcodeSession := &authorizationcode.Session{ Version: "2", @@ -400,6 +400,135 @@ func TestGarbageCollectorControllerSync(t *testing.T) { }) }) + when("there are valid, expired authcode secrets which contain upstream access tokens", func() { + it.Before(func() { + activeOIDCAuthcodeSession := &authorizationcode.Session{ + Version: "2", + Active: true, + Request: &fosite.Request{ + ID: "request-id-1", + Client: &clientregistry.Client{}, + Session: &psession.PinnipedSession{ + Custom: &psession.CustomSessionData{ + ProviderUID: "upstream-oidc-provider-uid", + ProviderName: "upstream-oidc-provider-name", + ProviderType: psession.ProviderTypeOIDC, + OIDC: &psession.OIDCSessionData{ + UpstreamAccessToken: "fake-upstream-access-token", + }, + }, + }, + }, + } + activeOIDCAuthcodeSessionJSON, err := json.Marshal(activeOIDCAuthcodeSession) + r.NoError(err) + activeOIDCAuthcodeSessionSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "activeOIDCAuthcodeSession", + Namespace: installedInNamespace, + UID: "uid-123", + ResourceVersion: "rv-123", + Annotations: map[string]string{ + "storage.pinniped.dev/garbage-collect-after": frozenNow.Add(-time.Second).Format(time.RFC3339), + }, + Labels: map[string]string{ + "storage.pinniped.dev/type": authorizationcode.TypeLabelValue, + }, + }, + Data: map[string][]byte{ + "pinniped-storage-data": activeOIDCAuthcodeSessionJSON, + "pinniped-storage-version": []byte("1"), + }, + Type: "storage.pinniped.dev/" + authorizationcode.TypeLabelValue, + } + _, err = authorizationcode.ReadFromSecret(activeOIDCAuthcodeSessionSecret) + r.NoError(err, "the test author accidentally formed an invalid authcode secret") + r.NoError(kubeInformerClient.Tracker().Add(activeOIDCAuthcodeSessionSecret)) + r.NoError(kubeClient.Tracker().Add(activeOIDCAuthcodeSessionSecret)) + + inactiveOIDCAuthcodeSession := &authorizationcode.Session{ + Version: "2", + Active: false, + Request: &fosite.Request{ + ID: "request-id-2", + Client: &clientregistry.Client{}, + Session: &psession.PinnipedSession{ + Custom: &psession.CustomSessionData{ + ProviderUID: "upstream-oidc-provider-uid", + ProviderName: "upstream-oidc-provider-name", + ProviderType: psession.ProviderTypeOIDC, + OIDC: &psession.OIDCSessionData{ + UpstreamAccessToken: "other-fake-upstream-access-token", + }, + }, + }, + }, + } + inactiveOIDCAuthcodeSessionJSON, err := json.Marshal(inactiveOIDCAuthcodeSession) + r.NoError(err) + inactiveOIDCAuthcodeSessionSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "inactiveOIDCAuthcodeSession", + Namespace: installedInNamespace, + UID: "uid-456", + ResourceVersion: "rv-456", + Annotations: map[string]string{ + "storage.pinniped.dev/garbage-collect-after": frozenNow.Add(-time.Second).Format(time.RFC3339), + }, + Labels: map[string]string{ + "storage.pinniped.dev/type": authorizationcode.TypeLabelValue, + }, + }, + Data: map[string][]byte{ + "pinniped-storage-data": inactiveOIDCAuthcodeSessionJSON, + "pinniped-storage-version": []byte("1"), + }, + Type: "storage.pinniped.dev/" + authorizationcode.TypeLabelValue, + } + _, err = authorizationcode.ReadFromSecret(inactiveOIDCAuthcodeSessionSecret) + r.NoError(err, "the test author accidentally formed an invalid authcode secret") + r.NoError(kubeInformerClient.Tracker().Add(inactiveOIDCAuthcodeSessionSecret)) + r.NoError(kubeClient.Tracker().Add(inactiveOIDCAuthcodeSessionSecret)) + }) + + it("should revoke upstream tokens only from the active authcode secrets and delete them all", func() { + happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder(). + WithName("upstream-oidc-provider-name"). + WithResourceUID("upstream-oidc-provider-uid"). + WithRevokeTokenError(nil) + idpListerBuilder := oidctestutil.NewUpstreamIDPListerBuilder().WithOIDC(happyOIDCUpstream.Build()) + + startInformersAndController(idpListerBuilder.Build()) + r.NoError(controllerlib.TestSync(t, subject, *syncContext)) + + // The upstream refresh token is only revoked for the active authcode session. + idpListerBuilder.RequireExactlyOneCallToRevokeToken(t, + "upstream-oidc-provider-name", + &oidctestutil.RevokeTokenArgs{ + Ctx: syncContext.Context, + Token: "fake-upstream-access-token", + TokenType: provider.AccessTokenType, + }, + ) + + // Both authcode session secrets are deleted. + r.ElementsMatch( + []kubetesting.Action{ + kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "activeOIDCAuthcodeSession"), + kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "inactiveOIDCAuthcodeSession"), + }, + kubeClient.Actions(), + ) + r.ElementsMatch( + []metav1.DeleteOptions{ + testutil.NewPreconditions("uid-123", "rv-123"), + testutil.NewPreconditions("uid-456", "rv-456"), + }, + *deleteOptions, + ) + }) + }) + when("there is an invalid, expired authcode secret", func() { it.Before(func() { invalidOIDCAuthcodeSession := &authorizationcode.Session{ @@ -674,11 +803,12 @@ func TestGarbageCollectorControllerSync(t *testing.T) { r.NoError(kubeClient.Tracker().Add(activeOIDCAuthcodeSessionSecret)) }) - it("keeps the secret for a while longer so the revocation can be retried on a future sync", func() { + it("keeps the secret for a while longer so the revocation can be retried on a future sync for retryable errors", func() { happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder(). WithName("upstream-oidc-provider-name"). WithResourceUID("upstream-oidc-provider-uid"). - WithRevokeTokenError(errors.New("some upstream revocation error")) // the upstream revocation will fail + // make the upstream revocation fail in a retryable way + WithRevokeTokenError(provider.NewRetryableRevocationError(errors.New("some retryable upstream revocation error"))) idpListerBuilder := oidctestutil.NewUpstreamIDPListerBuilder().WithOIDC(happyOIDCUpstream.Build()) startInformersAndController(idpListerBuilder.Build()) @@ -697,6 +827,42 @@ func TestGarbageCollectorControllerSync(t *testing.T) { // The authcode session secrets is not deleted. r.Empty(kubeClient.Actions()) }) + + it("deletes the secret for non-retryable errors", func() { + happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder(). + WithName("upstream-oidc-provider-name"). + WithResourceUID("upstream-oidc-provider-uid"). + // make the upstream revocation fail in a non-retryable way + WithRevokeTokenError(errors.New("some upstream revocation error not worth retrying")) + idpListerBuilder := oidctestutil.NewUpstreamIDPListerBuilder().WithOIDC(happyOIDCUpstream.Build()) + + startInformersAndController(idpListerBuilder.Build()) + r.NoError(controllerlib.TestSync(t, subject, *syncContext)) + + // Tried to revoke it, although this revocation will fail. + idpListerBuilder.RequireExactlyOneCallToRevokeToken(t, + "upstream-oidc-provider-name", + &oidctestutil.RevokeTokenArgs{ + Ctx: syncContext.Context, + Token: "fake-upstream-refresh-token", + TokenType: provider.RefreshTokenType, + }, + ) + + // The authcode session secrets is still deleted because it is expired and the revocation error is not retryable. + r.ElementsMatch( + []kubetesting.Action{ + kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "activeOIDCAuthcodeSession"), + }, + kubeClient.Actions(), + ) + r.ElementsMatch( + []metav1.DeleteOptions{ + testutil.NewPreconditions("uid-123", "rv-123"), + }, + *deleteOptions, + ) + }) }) when("there is a valid, long-since expired authcode secret but the upstream revocation fails", func() { @@ -783,7 +949,7 @@ func TestGarbageCollectorControllerSync(t *testing.T) { }) }) - when("there are valid, expired access token secrets", func() { + when("there are valid, expired access token secrets which contain upstream refresh tokens", func() { it.Before(func() { offlineAccessGrantedOIDCAccessTokenSession := &accesstoken.Session{ Version: "2", @@ -912,7 +1078,136 @@ func TestGarbageCollectorControllerSync(t *testing.T) { }) }) - when("there are valid, expired refresh secrets", func() { + when("there are valid, expired access token secrets which contain upstream access tokens", func() { + it.Before(func() { + offlineAccessGrantedOIDCAccessTokenSession := &accesstoken.Session{ + Version: "2", + Request: &fosite.Request{ + GrantedScope: fosite.Arguments{"scope1", "scope2", "offline_access"}, + ID: "request-id-1", + Client: &clientregistry.Client{}, + Session: &psession.PinnipedSession{ + Custom: &psession.CustomSessionData{ + ProviderUID: "upstream-oidc-provider-uid", + ProviderName: "upstream-oidc-provider-name", + ProviderType: psession.ProviderTypeOIDC, + OIDC: &psession.OIDCSessionData{ + UpstreamAccessToken: "offline-access-granted-fake-upstream-access-token", + }, + }, + }, + }, + } + offlineAccessGrantedOIDCAccessTokenSessionJSON, err := json.Marshal(offlineAccessGrantedOIDCAccessTokenSession) + r.NoError(err) + offlineAccessGrantedOIDCAccessTokenSessionSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "offlineAccessGrantedOIDCAccessTokenSession", + Namespace: installedInNamespace, + UID: "uid-123", + ResourceVersion: "rv-123", + Annotations: map[string]string{ + "storage.pinniped.dev/garbage-collect-after": frozenNow.Add(-time.Second).Format(time.RFC3339), + }, + Labels: map[string]string{ + "storage.pinniped.dev/type": accesstoken.TypeLabelValue, + }, + }, + Data: map[string][]byte{ + "pinniped-storage-data": offlineAccessGrantedOIDCAccessTokenSessionJSON, + "pinniped-storage-version": []byte("1"), + }, + Type: "storage.pinniped.dev/" + accesstoken.TypeLabelValue, + } + _, err = accesstoken.ReadFromSecret(offlineAccessGrantedOIDCAccessTokenSessionSecret) + r.NoError(err, "the test author accidentally formed an invalid accesstoken secret") + r.NoError(kubeInformerClient.Tracker().Add(offlineAccessGrantedOIDCAccessTokenSessionSecret)) + r.NoError(kubeClient.Tracker().Add(offlineAccessGrantedOIDCAccessTokenSessionSecret)) + + offlineAccessNotGrantedOIDCAccessTokenSession := &accesstoken.Session{ + Version: "2", + Request: &fosite.Request{ + GrantedScope: fosite.Arguments{"scope1", "scope2"}, + ID: "request-id-2", + Client: &clientregistry.Client{}, + Session: &psession.PinnipedSession{ + Custom: &psession.CustomSessionData{ + ProviderUID: "upstream-oidc-provider-uid", + ProviderName: "upstream-oidc-provider-name", + ProviderType: psession.ProviderTypeOIDC, + OIDC: &psession.OIDCSessionData{ + UpstreamAccessToken: "fake-upstream-access-token", + }, + }, + }, + }, + } + offlineAccessNotGrantedOIDCAccessTokenSessionJSON, err := json.Marshal(offlineAccessNotGrantedOIDCAccessTokenSession) + r.NoError(err) + offlineAccessNotGrantedOIDCAccessTokenSessionSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "offlineAccessNotGrantedOIDCAccessTokenSession", + Namespace: installedInNamespace, + UID: "uid-456", + ResourceVersion: "rv-456", + Annotations: map[string]string{ + "storage.pinniped.dev/garbage-collect-after": frozenNow.Add(-time.Second).Format(time.RFC3339), + }, + Labels: map[string]string{ + "storage.pinniped.dev/type": accesstoken.TypeLabelValue, + }, + }, + Data: map[string][]byte{ + "pinniped-storage-data": offlineAccessNotGrantedOIDCAccessTokenSessionJSON, + "pinniped-storage-version": []byte("1"), + }, + Type: "storage.pinniped.dev/" + accesstoken.TypeLabelValue, + } + _, err = accesstoken.ReadFromSecret(offlineAccessNotGrantedOIDCAccessTokenSessionSecret) + r.NoError(err, "the test author accidentally formed an invalid accesstoken secret") + r.NoError(kubeInformerClient.Tracker().Add(offlineAccessNotGrantedOIDCAccessTokenSessionSecret)) + r.NoError(kubeClient.Tracker().Add(offlineAccessNotGrantedOIDCAccessTokenSessionSecret)) + }) + + it("should revoke upstream tokens only from the active authcode secrets and delete them all", func() { + happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder(). + WithName("upstream-oidc-provider-name"). + WithResourceUID("upstream-oidc-provider-uid"). + WithRevokeTokenError(nil) + idpListerBuilder := oidctestutil.NewUpstreamIDPListerBuilder().WithOIDC(happyOIDCUpstream.Build()) + + startInformersAndController(idpListerBuilder.Build()) + r.NoError(controllerlib.TestSync(t, subject, *syncContext)) + + // The upstream refresh token is only revoked for the downstream session which had offline_access granted. + idpListerBuilder.RequireExactlyOneCallToRevokeToken(t, + "upstream-oidc-provider-name", + &oidctestutil.RevokeTokenArgs{ + Ctx: syncContext.Context, + Token: "fake-upstream-access-token", + TokenType: provider.AccessTokenType, + }, + ) + + // Both session secrets are deleted. + r.ElementsMatch( + []kubetesting.Action{ + kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "offlineAccessGrantedOIDCAccessTokenSession"), + kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "offlineAccessNotGrantedOIDCAccessTokenSession"), + }, + kubeClient.Actions(), + ) + r.ElementsMatch( + []metav1.DeleteOptions{ + testutil.NewPreconditions("uid-123", "rv-123"), + testutil.NewPreconditions("uid-456", "rv-456"), + }, + *deleteOptions, + ) + }) + }) + + when("there are valid, expired refresh secrets which contain upstream refresh tokens", func() { it.Before(func() { oidcRefreshSession := &refreshtoken.Session{ Version: "2", @@ -994,6 +1289,88 @@ func TestGarbageCollectorControllerSync(t *testing.T) { }) }) + when("there are valid, expired refresh secrets which contain upstream access tokens", func() { + it.Before(func() { + oidcRefreshSession := &refreshtoken.Session{ + Version: "2", + Request: &fosite.Request{ + ID: "request-id-1", + Client: &clientregistry.Client{}, + Session: &psession.PinnipedSession{ + Custom: &psession.CustomSessionData{ + ProviderUID: "upstream-oidc-provider-uid", + ProviderName: "upstream-oidc-provider-name", + ProviderType: psession.ProviderTypeOIDC, + OIDC: &psession.OIDCSessionData{ + UpstreamAccessToken: "fake-upstream-access-token", + }, + }, + }, + }, + } + oidcRefreshSessionJSON, err := json.Marshal(oidcRefreshSession) + r.NoError(err) + oidcRefreshSessionSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "oidcRefreshSession", + Namespace: installedInNamespace, + UID: "uid-123", + ResourceVersion: "rv-123", + Annotations: map[string]string{ + "storage.pinniped.dev/garbage-collect-after": frozenNow.Add(-time.Second).Format(time.RFC3339), + }, + Labels: map[string]string{ + "storage.pinniped.dev/type": refreshtoken.TypeLabelValue, + }, + }, + Data: map[string][]byte{ + "pinniped-storage-data": oidcRefreshSessionJSON, + "pinniped-storage-version": []byte("1"), + }, + Type: "storage.pinniped.dev/" + refreshtoken.TypeLabelValue, + } + _, err = refreshtoken.ReadFromSecret(oidcRefreshSessionSecret) + r.NoError(err, "the test author accidentally formed an invalid refresh token secret") + r.NoError(kubeInformerClient.Tracker().Add(oidcRefreshSessionSecret)) + r.NoError(kubeClient.Tracker().Add(oidcRefreshSessionSecret)) + }) + + it("should revoke upstream tokens from the secrets and delete them all", func() { + happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder(). + WithName("upstream-oidc-provider-name"). + WithResourceUID("upstream-oidc-provider-uid"). + WithRevokeTokenError(nil) + idpListerBuilder := oidctestutil.NewUpstreamIDPListerBuilder().WithOIDC(happyOIDCUpstream.Build()) + + startInformersAndController(idpListerBuilder.Build()) + r.NoError(controllerlib.TestSync(t, subject, *syncContext)) + + // The upstream refresh token is revoked. + idpListerBuilder.RequireExactlyOneCallToRevokeToken(t, + "upstream-oidc-provider-name", + &oidctestutil.RevokeTokenArgs{ + Ctx: syncContext.Context, + Token: "fake-upstream-access-token", + TokenType: provider.AccessTokenType, + }, + ) + + // The secret is deleted. + r.ElementsMatch( + []kubetesting.Action{ + kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "oidcRefreshSession"), + }, + kubeClient.Actions(), + ) + r.ElementsMatch( + []metav1.DeleteOptions{ + testutil.NewPreconditions("uid-123", "rv-123"), + }, + *deleteOptions, + ) + }) + }) + when("very little time has passed since the previous sync call", func() { it.Before(func() { // Add a secret that will expire in 20 seconds. diff --git a/internal/oidc/provider/dynamic_upstream_idp_provider.go b/internal/oidc/provider/dynamic_upstream_idp_provider.go index e5be51ff..78ed7998 100644 --- a/internal/oidc/provider/dynamic_upstream_idp_provider.go +++ b/internal/oidc/provider/dynamic_upstream_idp_provider.go @@ -5,6 +5,7 @@ package provider import ( "context" + "fmt" "net/url" "sync" @@ -77,6 +78,9 @@ type UpstreamOIDCIdentityProviderI interface { PerformRefresh(ctx context.Context, refreshToken string) (*oauth2.Token, error) // RevokeToken will attempt to revoke the given token, if the provider has a revocation endpoint. + // It may return an error wrapped by a RetryableRevocationError, which is an error indicating that it may + // be worth trying to revoke the same token again later. Any other error returned should be assumed to + // represent an error such that it is not worth retrying revocation later, even though revocation failed. RevokeToken(ctx context.Context, token string, tokenType RevocableTokenType) error // ValidateToken will validate the ID token. It will also merge the claims from the userinfo endpoint response @@ -163,3 +167,19 @@ func (p *dynamicUpstreamIDPProvider) GetActiveDirectoryIdentityProviders() []Ups defer p.mutex.RUnlock() return p.activeDirectoryUpstreams } + +type RetryableRevocationError struct { + wrapped error +} + +func NewRetryableRevocationError(wrapped error) RetryableRevocationError { + return RetryableRevocationError{wrapped: wrapped} +} + +func (e RetryableRevocationError) Error() string { + return fmt.Sprintf("retryable revocation error: %v", e.wrapped) +} + +func (e RetryableRevocationError) Unwrap() error { + return e.wrapped +} diff --git a/internal/upstreamoidc/upstreamoidc.go b/internal/upstreamoidc/upstreamoidc.go index 310a6df4..d3f24b9f 100644 --- a/internal/upstreamoidc/upstreamoidc.go +++ b/internal/upstreamoidc/upstreamoidc.go @@ -138,6 +138,9 @@ func (p *ProviderConfig) PerformRefresh(ctx context.Context, refreshToken string } // RevokeToken will attempt to revoke the given token, if the provider has a revocation endpoint. +// It may return an error wrapped by a RetryableRevocationError, which is an error indicating that it may +// be worth trying to revoke the same token again later. Any other error returned should be assumed to +// represent an error such that it is not worth retrying revocation later, even though revocation failed. func (p *ProviderConfig) RevokeToken(ctx context.Context, token string, tokenType provider.RevocableTokenType) error { if p.RevocationURL == nil { plog.Trace("RevokeToken() was called but upstream provider has no available revocation endpoint", @@ -197,22 +200,25 @@ func (p *ProviderConfig) tryRevokeToken( resp, err := httpClient.Do(req) if err != nil { // Couldn't connect to the server or some similar error. - return false, err + // Could be a temporary network problem, so it might be worth retrying. + return false, provider.NewRetryableRevocationError(err) } defer resp.Body.Close() - switch resp.StatusCode { - case http.StatusOK: + status := resp.StatusCode + + switch { + case status == http.StatusOK: // Success! plog.Trace("RevokeToken() got 200 OK response from provider's revocation endpoint", "providerName", p.Name, "usedBasicAuth", useBasicAuth) return false, nil - case http.StatusBadRequest: + case status == http.StatusBadRequest: // Bad request might be due to bad client auth method. Try to detect that. plog.Trace("RevokeToken() got 400 Bad Request response from provider's revocation endpoint", "providerName", p.Name, "usedBasicAuth", useBasicAuth) body, err := io.ReadAll(resp.Body) if err != nil { return false, - fmt.Errorf("error reading response body on response with status code %d: %w", resp.StatusCode, err) + fmt.Errorf("error reading response body on response with status code %d: %w", status, err) } var parsedResp struct { ErrorType string `json:"error"` @@ -222,21 +228,34 @@ func (p *ProviderConfig) tryRevokeToken( err = json.Unmarshal(body, &parsedResp) if err != nil { return false, - fmt.Errorf("error parsing response body %q on response with status code %d: %w", bodyStr, resp.StatusCode, err) + fmt.Errorf("error parsing response body %q on response with status code %d: %w", bodyStr, status, err) } - err = fmt.Errorf("server responded with status %d with body: %s", resp.StatusCode, bodyStr) + err = fmt.Errorf("server responded with status %d with body: %s", status, bodyStr) if parsedResp.ErrorType != "invalid_client" { - // Got an error unrelated to client auth, so not worth trying again. + // Got an error unrelated to client auth, so not worth trying client auth again. Also, these are errors + // of the type where the server is pretty conclusively rejecting our request, so they are generally + // not worth trying again later either. + // These errors could be any of the other errors from https://datatracker.ietf.org/doc/html/rfc6749#section-5.2 + // or "unsupported_token_type" from https://datatracker.ietf.org/doc/html/rfc7009#section-2.2.1 + // or could be some unspecified custom error added by the OIDC provider. return false, err } // Got an "invalid_client" response, which might mean client auth failed, so it may be worth trying again // using another client auth method. See https://datatracker.ietf.org/doc/html/rfc6749#section-5.2 plog.Trace("RevokeToken()'s 400 Bad Request response from provider's revocation endpoint was type 'invalid_client'", "providerName", p.Name, "usedBasicAuth", useBasicAuth) return true, err + case status >= 500 && status <= 599: + // The spec says 503 Service Unavailable should be retried by the client later. + // See https://datatracker.ietf.org/doc/html/rfc7009#section-2.2.1. + // Other forms of 5xx server errors are not particularly conclusive failures. For example, gateway errors could + // be caused by an underlying problem which could potentially become resolved in the near future. We'll be + // optimistic and call all 5xx errors retryable. + plog.Trace("RevokeToken() got unexpected error response from provider's revocation endpoint", "providerName", p.Name, "usedBasicAuth", useBasicAuth, "statusCode", status) + return false, provider.NewRetryableRevocationError(fmt.Errorf("server responded with status %d", status)) default: - // Any other error is probably not due to failed client auth. - plog.Trace("RevokeToken() got unexpected error response from provider's revocation endpoint", "providerName", p.Name, "usedBasicAuth", useBasicAuth, "statusCode", resp.StatusCode) - return false, fmt.Errorf("server responded with status %d", resp.StatusCode) + // Any other error is probably not due to failed client auth, and is probably not worth retrying later. + plog.Trace("RevokeToken() got unexpected error response from provider's revocation endpoint", "providerName", p.Name, "usedBasicAuth", useBasicAuth, "statusCode", status) + return false, fmt.Errorf("server responded with status %d", status) } } diff --git a/internal/upstreamoidc/upstreamoidc_test.go b/internal/upstreamoidc/upstreamoidc_test.go index ecef00a5..00e6c6e7 100644 --- a/internal/upstreamoidc/upstreamoidc_test.go +++ b/internal/upstreamoidc/upstreamoidc_test.go @@ -458,14 +458,17 @@ func TestProviderConfig(t *testing.T) { t.Run("RevokeToken", func(t *testing.T) { tests := []struct { - name string - tokenType provider.RevocableTokenType - nilRevocationURL bool - statusCodes []int - returnErrBodies []string - wantErr string - wantNumRequests int - wantTokenTypeHint string + name string + tokenType provider.RevocableTokenType + nilRevocationURL bool + unreachableServer bool + returnStatusCodes []int + returnErrBodies []string + wantErr string + wantErrRegexp string // use either wantErr or wantErrRegexp + wantRetryableErrType bool // additionally assert error type when wantErr is non-empty + wantNumRequests int + wantTokenTypeHint string }{ { name: "success without calling the server when there is no revocation URL set for refresh token", @@ -482,88 +485,142 @@ func TestProviderConfig(t *testing.T) { { name: "success when the server returns 200 OK on the first call for refresh token", tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusOK}, + returnStatusCodes: []int{http.StatusOK}, wantNumRequests: 1, wantTokenTypeHint: "refresh_token", }, { name: "success when the server returns 200 OK on the first call for access token", tokenType: provider.AccessTokenType, - statusCodes: []int{http.StatusOK}, + returnStatusCodes: []int{http.StatusOK}, wantNumRequests: 1, wantTokenTypeHint: "access_token", }, { - name: "success when the server returns 400 Bad Request on the first call due to client auth, then 200 OK on second call for refresh token", - tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusBadRequest, http.StatusOK}, + name: "success when the server returns 400 Bad Request on the first call due to client auth, then 200 OK on second call for refresh token", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusBadRequest, http.StatusOK}, // https://datatracker.ietf.org/doc/html/rfc6749#section-5.2 defines this as the error for client auth failure returnErrBodies: []string{`{ "error":"invalid_client", "error_description":"unhappy" }`}, wantNumRequests: 2, wantTokenTypeHint: "refresh_token", }, { - name: "success when the server returns 400 Bad Request on the first call due to client auth, then 200 OK on second call for access token", - tokenType: provider.AccessTokenType, - statusCodes: []int{http.StatusBadRequest, http.StatusOK}, + name: "success when the server returns 400 Bad Request on the first call due to client auth, then 200 OK on second call for access token", + tokenType: provider.AccessTokenType, + returnStatusCodes: []int{http.StatusBadRequest, http.StatusOK}, // https://datatracker.ietf.org/doc/html/rfc6749#section-5.2 defines this as the error for client auth failure returnErrBodies: []string{`{ "error":"invalid_client", "error_description":"unhappy" }`}, wantNumRequests: 2, wantTokenTypeHint: "access_token", }, { - name: "error when the server returns 400 Bad Request on the first call due to client auth, then any 400 error on second call", - tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusBadRequest, http.StatusBadRequest}, - returnErrBodies: []string{`{ "error":"invalid_client", "error_description":"unhappy" }`, `{ "error":"anything", "error_description":"unhappy" }`}, - wantErr: `server responded with status 400 with body: { "error":"anything", "error_description":"unhappy" }`, - wantNumRequests: 2, - wantTokenTypeHint: "refresh_token", + name: "error when the server returns 400 Bad Request on the first call due to client auth, then any 400 error on second call", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusBadRequest, http.StatusBadRequest}, + returnErrBodies: []string{`{ "error":"invalid_client", "error_description":"unhappy" }`, `{ "error":"anything", "error_description":"unhappy" }`}, + wantErr: `server responded with status 400 with body: { "error":"anything", "error_description":"unhappy" }`, + wantRetryableErrType: false, + wantNumRequests: 2, + wantTokenTypeHint: "refresh_token", }, { - name: "error when the server returns 400 Bad Request with bad JSON body on the first call", - tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusBadRequest}, - returnErrBodies: []string{`invalid JSON body`}, - wantErr: `error parsing response body "invalid JSON body" on response with status code 400: invalid character 'i' looking for beginning of value`, - wantNumRequests: 1, - wantTokenTypeHint: "refresh_token", + name: "error when the server returns 400 Bad Request with bad JSON body on the first call", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusBadRequest}, + returnErrBodies: []string{`invalid JSON body`}, + wantErr: `error parsing response body "invalid JSON body" on response with status code 400: invalid character 'i' looking for beginning of value`, + wantRetryableErrType: false, + wantNumRequests: 1, + wantTokenTypeHint: "refresh_token", }, { - name: "error when the server returns 400 Bad Request with empty body", - tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusBadRequest}, - returnErrBodies: []string{``}, - wantErr: `error parsing response body "" on response with status code 400: unexpected end of JSON input`, - wantNumRequests: 1, - wantTokenTypeHint: "refresh_token", + name: "error when the server returns 400 Bad Request with empty body", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusBadRequest}, + returnErrBodies: []string{``}, + wantErr: `error parsing response body "" on response with status code 400: unexpected end of JSON input`, + wantRetryableErrType: false, + wantNumRequests: 1, + wantTokenTypeHint: "refresh_token", }, { - name: "error when the server returns 400 Bad Request on the first call due to client auth, then any other error on second call", - tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusBadRequest, http.StatusForbidden}, - returnErrBodies: []string{`{ "error":"invalid_client", "error_description":"unhappy" }`, ""}, - wantErr: "server responded with status 403", - wantNumRequests: 2, - wantTokenTypeHint: "refresh_token", + name: "error when the server returns 400 Bad Request on the first call due to client auth, then any other error on second call", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusBadRequest, http.StatusForbidden}, + returnErrBodies: []string{`{ "error":"invalid_client", "error_description":"unhappy" }`, ""}, + wantErr: "server responded with status 403", + wantRetryableErrType: false, + wantNumRequests: 2, + wantTokenTypeHint: "refresh_token", }, { - name: "error when server returns any other 400 error on first call", - tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusBadRequest}, - returnErrBodies: []string{`{ "error":"anything_else", "error_description":"unhappy" }`}, - wantErr: `server responded with status 400 with body: { "error":"anything_else", "error_description":"unhappy" }`, - wantNumRequests: 1, - wantTokenTypeHint: "refresh_token", + name: "error when server returns any other 400 error on first call", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusBadRequest}, + returnErrBodies: []string{`{ "error":"anything_else", "error_description":"unhappy" }`}, + wantErr: `server responded with status 400 with body: { "error":"anything_else", "error_description":"unhappy" }`, + wantRetryableErrType: false, + wantNumRequests: 1, + wantTokenTypeHint: "refresh_token", }, { - name: "error when server returns any other error aside from 400 on first call", - tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusForbidden}, - returnErrBodies: []string{""}, - wantErr: "server responded with status 403", - wantNumRequests: 1, - wantTokenTypeHint: "refresh_token", + name: "error when server returns any other error aside from 400 on first call", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusForbidden}, + returnErrBodies: []string{""}, + wantErr: "server responded with status 403", + wantRetryableErrType: false, + wantNumRequests: 1, + wantTokenTypeHint: "refresh_token", + }, + { + name: "retryable error when server returns 503 on first call", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusServiceUnavailable}, // 503 + returnErrBodies: []string{""}, + wantErr: "retryable revocation error: server responded with status 503", + wantRetryableErrType: true, + wantNumRequests: 1, + wantTokenTypeHint: "refresh_token", + }, + { + name: "retryable error when the server returns 400 Bad Request on the first call due to client auth, then 503 on second call", + tokenType: provider.AccessTokenType, + returnStatusCodes: []int{http.StatusBadRequest, http.StatusServiceUnavailable}, // 400, 503 + returnErrBodies: []string{`{ "error":"invalid_client", "error_description":"unhappy" }`, ""}, + wantErr: "retryable revocation error: server responded with status 503", + wantRetryableErrType: true, + wantNumRequests: 2, + wantTokenTypeHint: "access_token", + }, + { + name: "retryable error when server returns any 5xx status on first call, testing lower bound of 5xx range", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusInternalServerError}, // 500 + returnErrBodies: []string{""}, + wantErr: "retryable revocation error: server responded with status 500", + wantRetryableErrType: true, + wantNumRequests: 1, + wantTokenTypeHint: "refresh_token", + }, + { + name: "retryable error when server returns any 5xx status on first call, testing upper bound of 5xx range", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{599}, // not defined by an RFC, but sometimes considered Network Connect Timeout Error + returnErrBodies: []string{""}, + wantErr: "retryable revocation error: server responded with status 599", + wantRetryableErrType: true, + wantNumRequests: 1, + wantTokenTypeHint: "refresh_token", + }, + { + name: "retryable error when the server cannot be reached", + tokenType: provider.AccessTokenType, + unreachableServer: true, + wantErrRegexp: "^retryable revocation error: Post .*: dial tcp .*: connect: connection refused$", + wantRetryableErrType: true, + wantNumRequests: 0, }, } for _, tt := range tests { @@ -592,9 +649,9 @@ func TestProviderConfig(t *testing.T) { require.Equal(t, "test-client-id", username) require.Equal(t, "test-client-secret", password) } - if tt.statusCodes[numRequests-1] != http.StatusOK { + if tt.returnStatusCodes[numRequests-1] != http.StatusOK { w.Header().Set("content-type", "application/json") - http.Error(w, tt.returnErrBodies[numRequests-1], tt.statusCodes[numRequests-1]) + http.Error(w, tt.returnErrBodies[numRequests-1], tt.returnStatusCodes[numRequests-1]) } // Otherwise, responds with 200 OK and empty body by default. })) @@ -616,6 +673,10 @@ func TestProviderConfig(t *testing.T) { p.RevocationURL = nil } + if tt.unreachableServer { + tokenServer.Close() // make the sever unreachable by closing it before making any requests + } + err = p.RevokeToken( context.Background(), "test-upstream-token", @@ -625,8 +686,20 @@ func TestProviderConfig(t *testing.T) { require.Equal(t, tt.wantNumRequests, numRequests, "did not make expected number of requests to revocation endpoint") - if tt.wantErr != "" { - require.EqualError(t, err, tt.wantErr) + if tt.wantErr != "" || tt.wantErrRegexp != "" { // nolint:nestif + if tt.wantErr != "" { + require.EqualError(t, err, tt.wantErr) + } else { + require.Error(t, err) + require.Regexp(t, tt.wantErrRegexp, err.Error()) + } + + if tt.wantRetryableErrType { + require.ErrorAs(t, err, &provider.RetryableRevocationError{}) + } else if errors.As(err, &provider.RetryableRevocationError{}) { + // There is no NotErrorAs() assertion available in the current version of testify, so do the equivalent. + require.Fail(t, "error should not be As RetryableRevocationError") + } } else { require.NoError(t, err) }