From f410d2bd00802cd9ab1f3775d59511b3bf108bc5 Mon Sep 17 00:00:00 2001 From: Ryan Richard Date: Wed, 8 Dec 2021 14:29:25 -0800 Subject: [PATCH] Add revocation of upstream access tokens to garbage collector Also refactor the code that decides which types of revocation failures are worth retrying. Be more selective by only retrying those types of errors that are likely to be worth retrying. --- .../supervisorstorage/garbage_collector.go | 106 ++--- .../garbage_collector_test.go | 387 +++++++++++++++++- .../provider/dynamic_upstream_idp_provider.go | 20 + internal/upstreamoidc/upstreamoidc.go | 41 +- internal/upstreamoidc/upstreamoidc_test.go | 197 ++++++--- 5 files changed, 625 insertions(+), 126 deletions(-) diff --git a/internal/controller/supervisorstorage/garbage_collector.go b/internal/controller/supervisorstorage/garbage_collector.go index 5f388b80..fb48ac4e 100644 --- a/internal/controller/supervisorstorage/garbage_collector.go +++ b/internal/controller/supervisorstorage/garbage_collector.go @@ -112,25 +112,30 @@ func (c *garbageCollectorController) Sync(ctx controllerlib.Context) error { timeString, ok := secret.Annotations[crud.SecretLifetimeAnnotationKey] if !ok { + // Secret did not request garbage collection via annotations, so skip deletion. continue } garbageCollectAfterTime, err := time.Parse(crud.SecretLifetimeAnnotationDateFormat, timeString) if err != nil { plog.WarningErr("could not parse resource timestamp for garbage collection", err, logKV(secret)...) + // Can't tell if the Secret has expired or not, so skip deletion. continue } if !garbageCollectAfterTime.Before(frozenClock.Now()) { - // not old enough yet + // Secret is not old enough yet, so skip deletion. continue } + // The Secret has expired. Check if it is a downstream session storage Secret, which may require extra processing. storageType, isSessionStorage := secret.Labels[crud.SecretLabelKey] if isSessionStorage { - err := c.maybeRevokeUpstreamOIDCRefreshToken(ctx.Context, storageType, secret) - if err != nil { - plog.WarningErr("garbage collector could not revoke upstream refresh token", err, logKV(secret)...) + revokeErr := c.maybeRevokeUpstreamOIDCToken(ctx.Context, storageType, secret) + if revokeErr != nil { + plog.WarningErr("garbage collector could not revoke upstream OIDC token", revokeErr, logKV(secret)...) + // Note that RevokeToken (called by the private helper) might have returned an error of type + // provider.RetryableRevocationError, in which case we would like to retry the revocation later. // If the error is of a type that is worth retrying, then do not delete the Secret right away. // A future call to Sync will try revocation again for that secret. However, if the Secret is // getting too old, then just delete it anyway. We don't want to extend the lifetime of these @@ -138,13 +143,15 @@ func (c *garbageCollectorController) Sync(ctx controllerlib.Context) error { // cleaning them out of etcd storage. fourHoursAgo := frozenClock.Now().Add(-4 * time.Hour) nowIsLessThanFourHoursBeyondSecretGCTime := garbageCollectAfterTime.After(fourHoursAgo) - if errors.As(err, &retryableRevocationError{}) && nowIsLessThanFourHoursBeyondSecretGCTime { + if errors.As(revokeErr, &provider.RetryableRevocationError{}) && nowIsLessThanFourHoursBeyondSecretGCTime { // Hasn't been very long since secret expired, so skip deletion to try revocation again later. + plog.Trace("garbage collector keeping Secret to retry upstream OIDC token revocation later", logKV(secret)...) continue } } } + // Garbage collect the Secret. err = c.kubeClient.CoreV1().Secrets(secret.Namespace).Delete(ctx.Context, secret.Name, metav1.DeleteOptions{ Preconditions: &metav1.Preconditions{ UID: &secret.UID, @@ -161,30 +168,36 @@ func (c *garbageCollectorController) Sync(ctx controllerlib.Context) error { return nil } -func (c *garbageCollectorController) maybeRevokeUpstreamOIDCRefreshToken(ctx context.Context, storageType string, secret *v1.Secret) error { - // All session storage types hold upstream refresh tokens when the upstream IDP is an OIDC provider. +func (c *garbageCollectorController) maybeRevokeUpstreamOIDCToken(ctx context.Context, storageType string, secret *v1.Secret) error { + // All downstream session storage types hold upstream tokens when the upstream IDP is an OIDC provider. // However, some of them will be outdated because they are not updated by fosite after creation. // Our goal below is to always revoke the latest upstream refresh token that we are holding for the - // session, and only the latest. + // session, and only the latest, or to revoke the original upstream access token. Note that we don't + // bother to store new upstream access tokens seen during upstream refresh because we only need to store + // the upstream access token when we intend to use it *instead* of an upstream refresh token. + // This implies that all the storage types will contain a copy of the original upstream access token, + // since it is never updated in the session. Thus, we can use the same logic to decide which upstream + // access token to revoke as we use for upstream refresh tokens, which allows us to avoid revoking an + // upstream access token more than once. switch storageType { case authorizationcode.TypeLabelValue: authorizeCodeSession, err := authorizationcode.ReadFromSecret(secret) if err != nil { return err } - // Check if this downstream authcode was already used. If it was already used (i.e. not active anymore), then - // the latest upstream refresh token can be found in one of the other storage types handled below instead. + // Check if this downstream authcode was already used. If it was already used (i.e. not active anymore), + // then the latest upstream token can be found in one of the other storage types handled below instead. if !authorizeCodeSession.Active { return nil } - // When the downstream authcode was never used, then its storage must contain the latest upstream refresh token. - return c.revokeUpstreamOIDCRefreshToken(ctx, authorizeCodeSession.Request.Session.(*psession.PinnipedSession).Custom, secret) + // When the downstream authcode was never used, then its storage must contain the latest upstream token. + return c.tryRevokeUpstreamOIDCToken(ctx, authorizeCodeSession.Request.Session.(*psession.PinnipedSession).Custom, secret) case accesstoken.TypeLabelValue: // For access token storage, check if the "offline_access" scope was granted on the downstream session. - // If it was granted, then the latest upstream refresh token should be found in the refresh token storage instead. + // If it was granted, then the latest upstream token should be found in the refresh token storage instead. // If it was not granted, then the user could not possibly have performed a downstream refresh, so the - // access token storage has the latest version of the upstream refresh token. + // access token storage has the latest version of the upstream token. accessTokenSession, err := accesstoken.ReadFromSecret(secret) if err != nil { return err @@ -193,29 +206,29 @@ func (c *garbageCollectorController) maybeRevokeUpstreamOIDCRefreshToken(ctx con if slices.Contains(accessTokenSession.Request.GetGrantedScopes(), coreosoidc.ScopeOfflineAccess) { return nil } - return c.revokeUpstreamOIDCRefreshToken(ctx, pinnipedSession.Custom, secret) + return c.tryRevokeUpstreamOIDCToken(ctx, pinnipedSession.Custom, secret) case refreshtoken.TypeLabelValue: - // For refresh token storage, always revoke its upstream refresh token. This refresh token storage could - // be the result of the initial downstream authcode exchange, or it could be the result of a downstream - // refresh. Either way, it always contains the latest upstream refresh token when it exists. + // For refresh token storage, always revoke its upstream token. This refresh token storage could be + // the result of the initial downstream authcode exchange, or it could be the result of a downstream + // refresh. Either way, it always contains the latest upstream token when it exists. refreshTokenSession, err := refreshtoken.ReadFromSecret(secret) if err != nil { return err } - return c.revokeUpstreamOIDCRefreshToken(ctx, refreshTokenSession.Request.Session.(*psession.PinnipedSession).Custom, secret) + return c.tryRevokeUpstreamOIDCToken(ctx, refreshTokenSession.Request.Session.(*psession.PinnipedSession).Custom, secret) case pkce.TypeLabelValue: - // For PKCE storage, its very existence means that the authcode was never exchanged, because these - // are deleted during authcode exchange. No need to do anything, since the upstream refresh token - // revocation is handled by authcode storage case above. + // For PKCE storage, its very existence means that the downstream authcode was never exchanged, because + // these are deleted during downstream authcode exchange. No need to do anything, since the upstream + // token revocation is handled by authcode storage case above. return nil case openidconnect.TypeLabelValue: // For OIDC storage, there is no need to do anything for reasons similar to the PKCE storage. - // These are not deleted during authcode exchange, probably due to a bug in fosite, even though it - // will never be read or updated again. However, the refresh token contained inside will be revoked - // by one of the other cases above. + // These are not deleted during downstream authcode exchange, probably due to a bug in fosite, even + // though it will never be read or updated again. However, the upstream token contained inside will + // be revoked by one of the other cases above. return nil default: @@ -224,8 +237,8 @@ func (c *garbageCollectorController) maybeRevokeUpstreamOIDCRefreshToken(ctx con } } -func (c *garbageCollectorController) revokeUpstreamOIDCRefreshToken(ctx context.Context, customSessionData *psession.CustomSessionData, secret *v1.Secret) error { - // When session was for another upstream IDP type, e.g. LDAP, there is no upstream OIDC refresh token involved. +func (c *garbageCollectorController) tryRevokeUpstreamOIDCToken(ctx context.Context, customSessionData *psession.CustomSessionData, secret *v1.Secret) error { + // When session was for another upstream IDP type, e.g. LDAP, there is no upstream OIDC token involved. if customSessionData.ProviderType != psession.ProviderTypeOIDC { return nil } @@ -242,32 +255,29 @@ func (c *garbageCollectorController) revokeUpstreamOIDCRefreshToken(ctx context. return fmt.Errorf("could not find upstream OIDC provider named %q with resource UID %q", customSessionData.ProviderName, customSessionData.ProviderUID) } - // Revoke the upstream refresh token. This is a noop if the upstream provider does not offer a revocation endpoint. - err := foundOIDCIdentityProviderI.RevokeToken(ctx, customSessionData.OIDC.UpstreamRefreshToken, provider.RefreshTokenType) - if err != nil { - // This could be a network failure, a 503 result which we should retry - // (see https://datatracker.ietf.org/doc/html/rfc7009#section-2.2.1), - // or any other non-200 response from the revocation endpoint. - // Regardless of which, it is probably worth retrying. - return retryableRevocationError{wrapped: err} + // In practice, there should only be one of these tokens saved in the session. + upstreamRefreshToken := customSessionData.OIDC.UpstreamRefreshToken + upstreamAccessToken := customSessionData.OIDC.UpstreamAccessToken + + if upstreamRefreshToken != "" { + err := foundOIDCIdentityProviderI.RevokeToken(ctx, upstreamRefreshToken, provider.RefreshTokenType) + if err != nil { + return err + } + plog.Trace("garbage collector successfully revoked upstream OIDC refresh token (or provider has no revocation endpoint)", logKV(secret)...) + } + + if upstreamAccessToken != "" { + err := foundOIDCIdentityProviderI.RevokeToken(ctx, upstreamAccessToken, provider.AccessTokenType) + if err != nil { + return err + } + plog.Trace("garbage collector successfully revoked upstream OIDC access token (or provider has no revocation endpoint)", logKV(secret)...) } - plog.Trace("garbage collector successfully revoked upstream OIDC refresh token (or provider has no revocation endpoint)", logKV(secret)...) return nil } -type retryableRevocationError struct { - wrapped error -} - -func (e retryableRevocationError) Error() string { - return fmt.Sprintf("retryable revocation error: %v", e.wrapped) -} - -func (e retryableRevocationError) Unwrap() error { - return e.wrapped -} - func logKV(secret *v1.Secret) []interface{} { return []interface{}{ "secretName", secret.Name, diff --git a/internal/controller/supervisorstorage/garbage_collector_test.go b/internal/controller/supervisorstorage/garbage_collector_test.go index d442c474..84f293c6 100644 --- a/internal/controller/supervisorstorage/garbage_collector_test.go +++ b/internal/controller/supervisorstorage/garbage_collector_test.go @@ -271,7 +271,7 @@ func TestGarbageCollectorControllerSync(t *testing.T) { }) }) - when("there are valid, expired authcode secrets", func() { + when("there are valid, expired authcode secrets which contain upstream refresh tokens", func() { it.Before(func() { activeOIDCAuthcodeSession := &authorizationcode.Session{ Version: "2", @@ -400,6 +400,135 @@ func TestGarbageCollectorControllerSync(t *testing.T) { }) }) + when("there are valid, expired authcode secrets which contain upstream access tokens", func() { + it.Before(func() { + activeOIDCAuthcodeSession := &authorizationcode.Session{ + Version: "2", + Active: true, + Request: &fosite.Request{ + ID: "request-id-1", + Client: &clientregistry.Client{}, + Session: &psession.PinnipedSession{ + Custom: &psession.CustomSessionData{ + ProviderUID: "upstream-oidc-provider-uid", + ProviderName: "upstream-oidc-provider-name", + ProviderType: psession.ProviderTypeOIDC, + OIDC: &psession.OIDCSessionData{ + UpstreamAccessToken: "fake-upstream-access-token", + }, + }, + }, + }, + } + activeOIDCAuthcodeSessionJSON, err := json.Marshal(activeOIDCAuthcodeSession) + r.NoError(err) + activeOIDCAuthcodeSessionSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "activeOIDCAuthcodeSession", + Namespace: installedInNamespace, + UID: "uid-123", + ResourceVersion: "rv-123", + Annotations: map[string]string{ + "storage.pinniped.dev/garbage-collect-after": frozenNow.Add(-time.Second).Format(time.RFC3339), + }, + Labels: map[string]string{ + "storage.pinniped.dev/type": authorizationcode.TypeLabelValue, + }, + }, + Data: map[string][]byte{ + "pinniped-storage-data": activeOIDCAuthcodeSessionJSON, + "pinniped-storage-version": []byte("1"), + }, + Type: "storage.pinniped.dev/" + authorizationcode.TypeLabelValue, + } + _, err = authorizationcode.ReadFromSecret(activeOIDCAuthcodeSessionSecret) + r.NoError(err, "the test author accidentally formed an invalid authcode secret") + r.NoError(kubeInformerClient.Tracker().Add(activeOIDCAuthcodeSessionSecret)) + r.NoError(kubeClient.Tracker().Add(activeOIDCAuthcodeSessionSecret)) + + inactiveOIDCAuthcodeSession := &authorizationcode.Session{ + Version: "2", + Active: false, + Request: &fosite.Request{ + ID: "request-id-2", + Client: &clientregistry.Client{}, + Session: &psession.PinnipedSession{ + Custom: &psession.CustomSessionData{ + ProviderUID: "upstream-oidc-provider-uid", + ProviderName: "upstream-oidc-provider-name", + ProviderType: psession.ProviderTypeOIDC, + OIDC: &psession.OIDCSessionData{ + UpstreamAccessToken: "other-fake-upstream-access-token", + }, + }, + }, + }, + } + inactiveOIDCAuthcodeSessionJSON, err := json.Marshal(inactiveOIDCAuthcodeSession) + r.NoError(err) + inactiveOIDCAuthcodeSessionSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "inactiveOIDCAuthcodeSession", + Namespace: installedInNamespace, + UID: "uid-456", + ResourceVersion: "rv-456", + Annotations: map[string]string{ + "storage.pinniped.dev/garbage-collect-after": frozenNow.Add(-time.Second).Format(time.RFC3339), + }, + Labels: map[string]string{ + "storage.pinniped.dev/type": authorizationcode.TypeLabelValue, + }, + }, + Data: map[string][]byte{ + "pinniped-storage-data": inactiveOIDCAuthcodeSessionJSON, + "pinniped-storage-version": []byte("1"), + }, + Type: "storage.pinniped.dev/" + authorizationcode.TypeLabelValue, + } + _, err = authorizationcode.ReadFromSecret(inactiveOIDCAuthcodeSessionSecret) + r.NoError(err, "the test author accidentally formed an invalid authcode secret") + r.NoError(kubeInformerClient.Tracker().Add(inactiveOIDCAuthcodeSessionSecret)) + r.NoError(kubeClient.Tracker().Add(inactiveOIDCAuthcodeSessionSecret)) + }) + + it("should revoke upstream tokens only from the active authcode secrets and delete them all", func() { + happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder(). + WithName("upstream-oidc-provider-name"). + WithResourceUID("upstream-oidc-provider-uid"). + WithRevokeTokenError(nil) + idpListerBuilder := oidctestutil.NewUpstreamIDPListerBuilder().WithOIDC(happyOIDCUpstream.Build()) + + startInformersAndController(idpListerBuilder.Build()) + r.NoError(controllerlib.TestSync(t, subject, *syncContext)) + + // The upstream refresh token is only revoked for the active authcode session. + idpListerBuilder.RequireExactlyOneCallToRevokeToken(t, + "upstream-oidc-provider-name", + &oidctestutil.RevokeTokenArgs{ + Ctx: syncContext.Context, + Token: "fake-upstream-access-token", + TokenType: provider.AccessTokenType, + }, + ) + + // Both authcode session secrets are deleted. + r.ElementsMatch( + []kubetesting.Action{ + kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "activeOIDCAuthcodeSession"), + kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "inactiveOIDCAuthcodeSession"), + }, + kubeClient.Actions(), + ) + r.ElementsMatch( + []metav1.DeleteOptions{ + testutil.NewPreconditions("uid-123", "rv-123"), + testutil.NewPreconditions("uid-456", "rv-456"), + }, + *deleteOptions, + ) + }) + }) + when("there is an invalid, expired authcode secret", func() { it.Before(func() { invalidOIDCAuthcodeSession := &authorizationcode.Session{ @@ -674,11 +803,12 @@ func TestGarbageCollectorControllerSync(t *testing.T) { r.NoError(kubeClient.Tracker().Add(activeOIDCAuthcodeSessionSecret)) }) - it("keeps the secret for a while longer so the revocation can be retried on a future sync", func() { + it("keeps the secret for a while longer so the revocation can be retried on a future sync for retryable errors", func() { happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder(). WithName("upstream-oidc-provider-name"). WithResourceUID("upstream-oidc-provider-uid"). - WithRevokeTokenError(errors.New("some upstream revocation error")) // the upstream revocation will fail + // make the upstream revocation fail in a retryable way + WithRevokeTokenError(provider.NewRetryableRevocationError(errors.New("some retryable upstream revocation error"))) idpListerBuilder := oidctestutil.NewUpstreamIDPListerBuilder().WithOIDC(happyOIDCUpstream.Build()) startInformersAndController(idpListerBuilder.Build()) @@ -697,6 +827,42 @@ func TestGarbageCollectorControllerSync(t *testing.T) { // The authcode session secrets is not deleted. r.Empty(kubeClient.Actions()) }) + + it("deletes the secret for non-retryable errors", func() { + happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder(). + WithName("upstream-oidc-provider-name"). + WithResourceUID("upstream-oidc-provider-uid"). + // make the upstream revocation fail in a non-retryable way + WithRevokeTokenError(errors.New("some upstream revocation error not worth retrying")) + idpListerBuilder := oidctestutil.NewUpstreamIDPListerBuilder().WithOIDC(happyOIDCUpstream.Build()) + + startInformersAndController(idpListerBuilder.Build()) + r.NoError(controllerlib.TestSync(t, subject, *syncContext)) + + // Tried to revoke it, although this revocation will fail. + idpListerBuilder.RequireExactlyOneCallToRevokeToken(t, + "upstream-oidc-provider-name", + &oidctestutil.RevokeTokenArgs{ + Ctx: syncContext.Context, + Token: "fake-upstream-refresh-token", + TokenType: provider.RefreshTokenType, + }, + ) + + // The authcode session secrets is still deleted because it is expired and the revocation error is not retryable. + r.ElementsMatch( + []kubetesting.Action{ + kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "activeOIDCAuthcodeSession"), + }, + kubeClient.Actions(), + ) + r.ElementsMatch( + []metav1.DeleteOptions{ + testutil.NewPreconditions("uid-123", "rv-123"), + }, + *deleteOptions, + ) + }) }) when("there is a valid, long-since expired authcode secret but the upstream revocation fails", func() { @@ -783,7 +949,7 @@ func TestGarbageCollectorControllerSync(t *testing.T) { }) }) - when("there are valid, expired access token secrets", func() { + when("there are valid, expired access token secrets which contain upstream refresh tokens", func() { it.Before(func() { offlineAccessGrantedOIDCAccessTokenSession := &accesstoken.Session{ Version: "2", @@ -912,7 +1078,136 @@ func TestGarbageCollectorControllerSync(t *testing.T) { }) }) - when("there are valid, expired refresh secrets", func() { + when("there are valid, expired access token secrets which contain upstream access tokens", func() { + it.Before(func() { + offlineAccessGrantedOIDCAccessTokenSession := &accesstoken.Session{ + Version: "2", + Request: &fosite.Request{ + GrantedScope: fosite.Arguments{"scope1", "scope2", "offline_access"}, + ID: "request-id-1", + Client: &clientregistry.Client{}, + Session: &psession.PinnipedSession{ + Custom: &psession.CustomSessionData{ + ProviderUID: "upstream-oidc-provider-uid", + ProviderName: "upstream-oidc-provider-name", + ProviderType: psession.ProviderTypeOIDC, + OIDC: &psession.OIDCSessionData{ + UpstreamAccessToken: "offline-access-granted-fake-upstream-access-token", + }, + }, + }, + }, + } + offlineAccessGrantedOIDCAccessTokenSessionJSON, err := json.Marshal(offlineAccessGrantedOIDCAccessTokenSession) + r.NoError(err) + offlineAccessGrantedOIDCAccessTokenSessionSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "offlineAccessGrantedOIDCAccessTokenSession", + Namespace: installedInNamespace, + UID: "uid-123", + ResourceVersion: "rv-123", + Annotations: map[string]string{ + "storage.pinniped.dev/garbage-collect-after": frozenNow.Add(-time.Second).Format(time.RFC3339), + }, + Labels: map[string]string{ + "storage.pinniped.dev/type": accesstoken.TypeLabelValue, + }, + }, + Data: map[string][]byte{ + "pinniped-storage-data": offlineAccessGrantedOIDCAccessTokenSessionJSON, + "pinniped-storage-version": []byte("1"), + }, + Type: "storage.pinniped.dev/" + accesstoken.TypeLabelValue, + } + _, err = accesstoken.ReadFromSecret(offlineAccessGrantedOIDCAccessTokenSessionSecret) + r.NoError(err, "the test author accidentally formed an invalid accesstoken secret") + r.NoError(kubeInformerClient.Tracker().Add(offlineAccessGrantedOIDCAccessTokenSessionSecret)) + r.NoError(kubeClient.Tracker().Add(offlineAccessGrantedOIDCAccessTokenSessionSecret)) + + offlineAccessNotGrantedOIDCAccessTokenSession := &accesstoken.Session{ + Version: "2", + Request: &fosite.Request{ + GrantedScope: fosite.Arguments{"scope1", "scope2"}, + ID: "request-id-2", + Client: &clientregistry.Client{}, + Session: &psession.PinnipedSession{ + Custom: &psession.CustomSessionData{ + ProviderUID: "upstream-oidc-provider-uid", + ProviderName: "upstream-oidc-provider-name", + ProviderType: psession.ProviderTypeOIDC, + OIDC: &psession.OIDCSessionData{ + UpstreamAccessToken: "fake-upstream-access-token", + }, + }, + }, + }, + } + offlineAccessNotGrantedOIDCAccessTokenSessionJSON, err := json.Marshal(offlineAccessNotGrantedOIDCAccessTokenSession) + r.NoError(err) + offlineAccessNotGrantedOIDCAccessTokenSessionSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "offlineAccessNotGrantedOIDCAccessTokenSession", + Namespace: installedInNamespace, + UID: "uid-456", + ResourceVersion: "rv-456", + Annotations: map[string]string{ + "storage.pinniped.dev/garbage-collect-after": frozenNow.Add(-time.Second).Format(time.RFC3339), + }, + Labels: map[string]string{ + "storage.pinniped.dev/type": accesstoken.TypeLabelValue, + }, + }, + Data: map[string][]byte{ + "pinniped-storage-data": offlineAccessNotGrantedOIDCAccessTokenSessionJSON, + "pinniped-storage-version": []byte("1"), + }, + Type: "storage.pinniped.dev/" + accesstoken.TypeLabelValue, + } + _, err = accesstoken.ReadFromSecret(offlineAccessNotGrantedOIDCAccessTokenSessionSecret) + r.NoError(err, "the test author accidentally formed an invalid accesstoken secret") + r.NoError(kubeInformerClient.Tracker().Add(offlineAccessNotGrantedOIDCAccessTokenSessionSecret)) + r.NoError(kubeClient.Tracker().Add(offlineAccessNotGrantedOIDCAccessTokenSessionSecret)) + }) + + it("should revoke upstream tokens only from the active authcode secrets and delete them all", func() { + happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder(). + WithName("upstream-oidc-provider-name"). + WithResourceUID("upstream-oidc-provider-uid"). + WithRevokeTokenError(nil) + idpListerBuilder := oidctestutil.NewUpstreamIDPListerBuilder().WithOIDC(happyOIDCUpstream.Build()) + + startInformersAndController(idpListerBuilder.Build()) + r.NoError(controllerlib.TestSync(t, subject, *syncContext)) + + // The upstream refresh token is only revoked for the downstream session which had offline_access granted. + idpListerBuilder.RequireExactlyOneCallToRevokeToken(t, + "upstream-oidc-provider-name", + &oidctestutil.RevokeTokenArgs{ + Ctx: syncContext.Context, + Token: "fake-upstream-access-token", + TokenType: provider.AccessTokenType, + }, + ) + + // Both session secrets are deleted. + r.ElementsMatch( + []kubetesting.Action{ + kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "offlineAccessGrantedOIDCAccessTokenSession"), + kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "offlineAccessNotGrantedOIDCAccessTokenSession"), + }, + kubeClient.Actions(), + ) + r.ElementsMatch( + []metav1.DeleteOptions{ + testutil.NewPreconditions("uid-123", "rv-123"), + testutil.NewPreconditions("uid-456", "rv-456"), + }, + *deleteOptions, + ) + }) + }) + + when("there are valid, expired refresh secrets which contain upstream refresh tokens", func() { it.Before(func() { oidcRefreshSession := &refreshtoken.Session{ Version: "2", @@ -994,6 +1289,88 @@ func TestGarbageCollectorControllerSync(t *testing.T) { }) }) + when("there are valid, expired refresh secrets which contain upstream access tokens", func() { + it.Before(func() { + oidcRefreshSession := &refreshtoken.Session{ + Version: "2", + Request: &fosite.Request{ + ID: "request-id-1", + Client: &clientregistry.Client{}, + Session: &psession.PinnipedSession{ + Custom: &psession.CustomSessionData{ + ProviderUID: "upstream-oidc-provider-uid", + ProviderName: "upstream-oidc-provider-name", + ProviderType: psession.ProviderTypeOIDC, + OIDC: &psession.OIDCSessionData{ + UpstreamAccessToken: "fake-upstream-access-token", + }, + }, + }, + }, + } + oidcRefreshSessionJSON, err := json.Marshal(oidcRefreshSession) + r.NoError(err) + oidcRefreshSessionSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: "oidcRefreshSession", + Namespace: installedInNamespace, + UID: "uid-123", + ResourceVersion: "rv-123", + Annotations: map[string]string{ + "storage.pinniped.dev/garbage-collect-after": frozenNow.Add(-time.Second).Format(time.RFC3339), + }, + Labels: map[string]string{ + "storage.pinniped.dev/type": refreshtoken.TypeLabelValue, + }, + }, + Data: map[string][]byte{ + "pinniped-storage-data": oidcRefreshSessionJSON, + "pinniped-storage-version": []byte("1"), + }, + Type: "storage.pinniped.dev/" + refreshtoken.TypeLabelValue, + } + _, err = refreshtoken.ReadFromSecret(oidcRefreshSessionSecret) + r.NoError(err, "the test author accidentally formed an invalid refresh token secret") + r.NoError(kubeInformerClient.Tracker().Add(oidcRefreshSessionSecret)) + r.NoError(kubeClient.Tracker().Add(oidcRefreshSessionSecret)) + }) + + it("should revoke upstream tokens from the secrets and delete them all", func() { + happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder(). + WithName("upstream-oidc-provider-name"). + WithResourceUID("upstream-oidc-provider-uid"). + WithRevokeTokenError(nil) + idpListerBuilder := oidctestutil.NewUpstreamIDPListerBuilder().WithOIDC(happyOIDCUpstream.Build()) + + startInformersAndController(idpListerBuilder.Build()) + r.NoError(controllerlib.TestSync(t, subject, *syncContext)) + + // The upstream refresh token is revoked. + idpListerBuilder.RequireExactlyOneCallToRevokeToken(t, + "upstream-oidc-provider-name", + &oidctestutil.RevokeTokenArgs{ + Ctx: syncContext.Context, + Token: "fake-upstream-access-token", + TokenType: provider.AccessTokenType, + }, + ) + + // The secret is deleted. + r.ElementsMatch( + []kubetesting.Action{ + kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "oidcRefreshSession"), + }, + kubeClient.Actions(), + ) + r.ElementsMatch( + []metav1.DeleteOptions{ + testutil.NewPreconditions("uid-123", "rv-123"), + }, + *deleteOptions, + ) + }) + }) + when("very little time has passed since the previous sync call", func() { it.Before(func() { // Add a secret that will expire in 20 seconds. diff --git a/internal/oidc/provider/dynamic_upstream_idp_provider.go b/internal/oidc/provider/dynamic_upstream_idp_provider.go index e5be51ff..78ed7998 100644 --- a/internal/oidc/provider/dynamic_upstream_idp_provider.go +++ b/internal/oidc/provider/dynamic_upstream_idp_provider.go @@ -5,6 +5,7 @@ package provider import ( "context" + "fmt" "net/url" "sync" @@ -77,6 +78,9 @@ type UpstreamOIDCIdentityProviderI interface { PerformRefresh(ctx context.Context, refreshToken string) (*oauth2.Token, error) // RevokeToken will attempt to revoke the given token, if the provider has a revocation endpoint. + // It may return an error wrapped by a RetryableRevocationError, which is an error indicating that it may + // be worth trying to revoke the same token again later. Any other error returned should be assumed to + // represent an error such that it is not worth retrying revocation later, even though revocation failed. RevokeToken(ctx context.Context, token string, tokenType RevocableTokenType) error // ValidateToken will validate the ID token. It will also merge the claims from the userinfo endpoint response @@ -163,3 +167,19 @@ func (p *dynamicUpstreamIDPProvider) GetActiveDirectoryIdentityProviders() []Ups defer p.mutex.RUnlock() return p.activeDirectoryUpstreams } + +type RetryableRevocationError struct { + wrapped error +} + +func NewRetryableRevocationError(wrapped error) RetryableRevocationError { + return RetryableRevocationError{wrapped: wrapped} +} + +func (e RetryableRevocationError) Error() string { + return fmt.Sprintf("retryable revocation error: %v", e.wrapped) +} + +func (e RetryableRevocationError) Unwrap() error { + return e.wrapped +} diff --git a/internal/upstreamoidc/upstreamoidc.go b/internal/upstreamoidc/upstreamoidc.go index 310a6df4..d3f24b9f 100644 --- a/internal/upstreamoidc/upstreamoidc.go +++ b/internal/upstreamoidc/upstreamoidc.go @@ -138,6 +138,9 @@ func (p *ProviderConfig) PerformRefresh(ctx context.Context, refreshToken string } // RevokeToken will attempt to revoke the given token, if the provider has a revocation endpoint. +// It may return an error wrapped by a RetryableRevocationError, which is an error indicating that it may +// be worth trying to revoke the same token again later. Any other error returned should be assumed to +// represent an error such that it is not worth retrying revocation later, even though revocation failed. func (p *ProviderConfig) RevokeToken(ctx context.Context, token string, tokenType provider.RevocableTokenType) error { if p.RevocationURL == nil { plog.Trace("RevokeToken() was called but upstream provider has no available revocation endpoint", @@ -197,22 +200,25 @@ func (p *ProviderConfig) tryRevokeToken( resp, err := httpClient.Do(req) if err != nil { // Couldn't connect to the server or some similar error. - return false, err + // Could be a temporary network problem, so it might be worth retrying. + return false, provider.NewRetryableRevocationError(err) } defer resp.Body.Close() - switch resp.StatusCode { - case http.StatusOK: + status := resp.StatusCode + + switch { + case status == http.StatusOK: // Success! plog.Trace("RevokeToken() got 200 OK response from provider's revocation endpoint", "providerName", p.Name, "usedBasicAuth", useBasicAuth) return false, nil - case http.StatusBadRequest: + case status == http.StatusBadRequest: // Bad request might be due to bad client auth method. Try to detect that. plog.Trace("RevokeToken() got 400 Bad Request response from provider's revocation endpoint", "providerName", p.Name, "usedBasicAuth", useBasicAuth) body, err := io.ReadAll(resp.Body) if err != nil { return false, - fmt.Errorf("error reading response body on response with status code %d: %w", resp.StatusCode, err) + fmt.Errorf("error reading response body on response with status code %d: %w", status, err) } var parsedResp struct { ErrorType string `json:"error"` @@ -222,21 +228,34 @@ func (p *ProviderConfig) tryRevokeToken( err = json.Unmarshal(body, &parsedResp) if err != nil { return false, - fmt.Errorf("error parsing response body %q on response with status code %d: %w", bodyStr, resp.StatusCode, err) + fmt.Errorf("error parsing response body %q on response with status code %d: %w", bodyStr, status, err) } - err = fmt.Errorf("server responded with status %d with body: %s", resp.StatusCode, bodyStr) + err = fmt.Errorf("server responded with status %d with body: %s", status, bodyStr) if parsedResp.ErrorType != "invalid_client" { - // Got an error unrelated to client auth, so not worth trying again. + // Got an error unrelated to client auth, so not worth trying client auth again. Also, these are errors + // of the type where the server is pretty conclusively rejecting our request, so they are generally + // not worth trying again later either. + // These errors could be any of the other errors from https://datatracker.ietf.org/doc/html/rfc6749#section-5.2 + // or "unsupported_token_type" from https://datatracker.ietf.org/doc/html/rfc7009#section-2.2.1 + // or could be some unspecified custom error added by the OIDC provider. return false, err } // Got an "invalid_client" response, which might mean client auth failed, so it may be worth trying again // using another client auth method. See https://datatracker.ietf.org/doc/html/rfc6749#section-5.2 plog.Trace("RevokeToken()'s 400 Bad Request response from provider's revocation endpoint was type 'invalid_client'", "providerName", p.Name, "usedBasicAuth", useBasicAuth) return true, err + case status >= 500 && status <= 599: + // The spec says 503 Service Unavailable should be retried by the client later. + // See https://datatracker.ietf.org/doc/html/rfc7009#section-2.2.1. + // Other forms of 5xx server errors are not particularly conclusive failures. For example, gateway errors could + // be caused by an underlying problem which could potentially become resolved in the near future. We'll be + // optimistic and call all 5xx errors retryable. + plog.Trace("RevokeToken() got unexpected error response from provider's revocation endpoint", "providerName", p.Name, "usedBasicAuth", useBasicAuth, "statusCode", status) + return false, provider.NewRetryableRevocationError(fmt.Errorf("server responded with status %d", status)) default: - // Any other error is probably not due to failed client auth. - plog.Trace("RevokeToken() got unexpected error response from provider's revocation endpoint", "providerName", p.Name, "usedBasicAuth", useBasicAuth, "statusCode", resp.StatusCode) - return false, fmt.Errorf("server responded with status %d", resp.StatusCode) + // Any other error is probably not due to failed client auth, and is probably not worth retrying later. + plog.Trace("RevokeToken() got unexpected error response from provider's revocation endpoint", "providerName", p.Name, "usedBasicAuth", useBasicAuth, "statusCode", status) + return false, fmt.Errorf("server responded with status %d", status) } } diff --git a/internal/upstreamoidc/upstreamoidc_test.go b/internal/upstreamoidc/upstreamoidc_test.go index ecef00a5..00e6c6e7 100644 --- a/internal/upstreamoidc/upstreamoidc_test.go +++ b/internal/upstreamoidc/upstreamoidc_test.go @@ -458,14 +458,17 @@ func TestProviderConfig(t *testing.T) { t.Run("RevokeToken", func(t *testing.T) { tests := []struct { - name string - tokenType provider.RevocableTokenType - nilRevocationURL bool - statusCodes []int - returnErrBodies []string - wantErr string - wantNumRequests int - wantTokenTypeHint string + name string + tokenType provider.RevocableTokenType + nilRevocationURL bool + unreachableServer bool + returnStatusCodes []int + returnErrBodies []string + wantErr string + wantErrRegexp string // use either wantErr or wantErrRegexp + wantRetryableErrType bool // additionally assert error type when wantErr is non-empty + wantNumRequests int + wantTokenTypeHint string }{ { name: "success without calling the server when there is no revocation URL set for refresh token", @@ -482,88 +485,142 @@ func TestProviderConfig(t *testing.T) { { name: "success when the server returns 200 OK on the first call for refresh token", tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusOK}, + returnStatusCodes: []int{http.StatusOK}, wantNumRequests: 1, wantTokenTypeHint: "refresh_token", }, { name: "success when the server returns 200 OK on the first call for access token", tokenType: provider.AccessTokenType, - statusCodes: []int{http.StatusOK}, + returnStatusCodes: []int{http.StatusOK}, wantNumRequests: 1, wantTokenTypeHint: "access_token", }, { - name: "success when the server returns 400 Bad Request on the first call due to client auth, then 200 OK on second call for refresh token", - tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusBadRequest, http.StatusOK}, + name: "success when the server returns 400 Bad Request on the first call due to client auth, then 200 OK on second call for refresh token", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusBadRequest, http.StatusOK}, // https://datatracker.ietf.org/doc/html/rfc6749#section-5.2 defines this as the error for client auth failure returnErrBodies: []string{`{ "error":"invalid_client", "error_description":"unhappy" }`}, wantNumRequests: 2, wantTokenTypeHint: "refresh_token", }, { - name: "success when the server returns 400 Bad Request on the first call due to client auth, then 200 OK on second call for access token", - tokenType: provider.AccessTokenType, - statusCodes: []int{http.StatusBadRequest, http.StatusOK}, + name: "success when the server returns 400 Bad Request on the first call due to client auth, then 200 OK on second call for access token", + tokenType: provider.AccessTokenType, + returnStatusCodes: []int{http.StatusBadRequest, http.StatusOK}, // https://datatracker.ietf.org/doc/html/rfc6749#section-5.2 defines this as the error for client auth failure returnErrBodies: []string{`{ "error":"invalid_client", "error_description":"unhappy" }`}, wantNumRequests: 2, wantTokenTypeHint: "access_token", }, { - name: "error when the server returns 400 Bad Request on the first call due to client auth, then any 400 error on second call", - tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusBadRequest, http.StatusBadRequest}, - returnErrBodies: []string{`{ "error":"invalid_client", "error_description":"unhappy" }`, `{ "error":"anything", "error_description":"unhappy" }`}, - wantErr: `server responded with status 400 with body: { "error":"anything", "error_description":"unhappy" }`, - wantNumRequests: 2, - wantTokenTypeHint: "refresh_token", + name: "error when the server returns 400 Bad Request on the first call due to client auth, then any 400 error on second call", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusBadRequest, http.StatusBadRequest}, + returnErrBodies: []string{`{ "error":"invalid_client", "error_description":"unhappy" }`, `{ "error":"anything", "error_description":"unhappy" }`}, + wantErr: `server responded with status 400 with body: { "error":"anything", "error_description":"unhappy" }`, + wantRetryableErrType: false, + wantNumRequests: 2, + wantTokenTypeHint: "refresh_token", }, { - name: "error when the server returns 400 Bad Request with bad JSON body on the first call", - tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusBadRequest}, - returnErrBodies: []string{`invalid JSON body`}, - wantErr: `error parsing response body "invalid JSON body" on response with status code 400: invalid character 'i' looking for beginning of value`, - wantNumRequests: 1, - wantTokenTypeHint: "refresh_token", + name: "error when the server returns 400 Bad Request with bad JSON body on the first call", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusBadRequest}, + returnErrBodies: []string{`invalid JSON body`}, + wantErr: `error parsing response body "invalid JSON body" on response with status code 400: invalid character 'i' looking for beginning of value`, + wantRetryableErrType: false, + wantNumRequests: 1, + wantTokenTypeHint: "refresh_token", }, { - name: "error when the server returns 400 Bad Request with empty body", - tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusBadRequest}, - returnErrBodies: []string{``}, - wantErr: `error parsing response body "" on response with status code 400: unexpected end of JSON input`, - wantNumRequests: 1, - wantTokenTypeHint: "refresh_token", + name: "error when the server returns 400 Bad Request with empty body", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusBadRequest}, + returnErrBodies: []string{``}, + wantErr: `error parsing response body "" on response with status code 400: unexpected end of JSON input`, + wantRetryableErrType: false, + wantNumRequests: 1, + wantTokenTypeHint: "refresh_token", }, { - name: "error when the server returns 400 Bad Request on the first call due to client auth, then any other error on second call", - tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusBadRequest, http.StatusForbidden}, - returnErrBodies: []string{`{ "error":"invalid_client", "error_description":"unhappy" }`, ""}, - wantErr: "server responded with status 403", - wantNumRequests: 2, - wantTokenTypeHint: "refresh_token", + name: "error when the server returns 400 Bad Request on the first call due to client auth, then any other error on second call", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusBadRequest, http.StatusForbidden}, + returnErrBodies: []string{`{ "error":"invalid_client", "error_description":"unhappy" }`, ""}, + wantErr: "server responded with status 403", + wantRetryableErrType: false, + wantNumRequests: 2, + wantTokenTypeHint: "refresh_token", }, { - name: "error when server returns any other 400 error on first call", - tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusBadRequest}, - returnErrBodies: []string{`{ "error":"anything_else", "error_description":"unhappy" }`}, - wantErr: `server responded with status 400 with body: { "error":"anything_else", "error_description":"unhappy" }`, - wantNumRequests: 1, - wantTokenTypeHint: "refresh_token", + name: "error when server returns any other 400 error on first call", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusBadRequest}, + returnErrBodies: []string{`{ "error":"anything_else", "error_description":"unhappy" }`}, + wantErr: `server responded with status 400 with body: { "error":"anything_else", "error_description":"unhappy" }`, + wantRetryableErrType: false, + wantNumRequests: 1, + wantTokenTypeHint: "refresh_token", }, { - name: "error when server returns any other error aside from 400 on first call", - tokenType: provider.RefreshTokenType, - statusCodes: []int{http.StatusForbidden}, - returnErrBodies: []string{""}, - wantErr: "server responded with status 403", - wantNumRequests: 1, - wantTokenTypeHint: "refresh_token", + name: "error when server returns any other error aside from 400 on first call", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusForbidden}, + returnErrBodies: []string{""}, + wantErr: "server responded with status 403", + wantRetryableErrType: false, + wantNumRequests: 1, + wantTokenTypeHint: "refresh_token", + }, + { + name: "retryable error when server returns 503 on first call", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusServiceUnavailable}, // 503 + returnErrBodies: []string{""}, + wantErr: "retryable revocation error: server responded with status 503", + wantRetryableErrType: true, + wantNumRequests: 1, + wantTokenTypeHint: "refresh_token", + }, + { + name: "retryable error when the server returns 400 Bad Request on the first call due to client auth, then 503 on second call", + tokenType: provider.AccessTokenType, + returnStatusCodes: []int{http.StatusBadRequest, http.StatusServiceUnavailable}, // 400, 503 + returnErrBodies: []string{`{ "error":"invalid_client", "error_description":"unhappy" }`, ""}, + wantErr: "retryable revocation error: server responded with status 503", + wantRetryableErrType: true, + wantNumRequests: 2, + wantTokenTypeHint: "access_token", + }, + { + name: "retryable error when server returns any 5xx status on first call, testing lower bound of 5xx range", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{http.StatusInternalServerError}, // 500 + returnErrBodies: []string{""}, + wantErr: "retryable revocation error: server responded with status 500", + wantRetryableErrType: true, + wantNumRequests: 1, + wantTokenTypeHint: "refresh_token", + }, + { + name: "retryable error when server returns any 5xx status on first call, testing upper bound of 5xx range", + tokenType: provider.RefreshTokenType, + returnStatusCodes: []int{599}, // not defined by an RFC, but sometimes considered Network Connect Timeout Error + returnErrBodies: []string{""}, + wantErr: "retryable revocation error: server responded with status 599", + wantRetryableErrType: true, + wantNumRequests: 1, + wantTokenTypeHint: "refresh_token", + }, + { + name: "retryable error when the server cannot be reached", + tokenType: provider.AccessTokenType, + unreachableServer: true, + wantErrRegexp: "^retryable revocation error: Post .*: dial tcp .*: connect: connection refused$", + wantRetryableErrType: true, + wantNumRequests: 0, }, } for _, tt := range tests { @@ -592,9 +649,9 @@ func TestProviderConfig(t *testing.T) { require.Equal(t, "test-client-id", username) require.Equal(t, "test-client-secret", password) } - if tt.statusCodes[numRequests-1] != http.StatusOK { + if tt.returnStatusCodes[numRequests-1] != http.StatusOK { w.Header().Set("content-type", "application/json") - http.Error(w, tt.returnErrBodies[numRequests-1], tt.statusCodes[numRequests-1]) + http.Error(w, tt.returnErrBodies[numRequests-1], tt.returnStatusCodes[numRequests-1]) } // Otherwise, responds with 200 OK and empty body by default. })) @@ -616,6 +673,10 @@ func TestProviderConfig(t *testing.T) { p.RevocationURL = nil } + if tt.unreachableServer { + tokenServer.Close() // make the sever unreachable by closing it before making any requests + } + err = p.RevokeToken( context.Background(), "test-upstream-token", @@ -625,8 +686,20 @@ func TestProviderConfig(t *testing.T) { require.Equal(t, tt.wantNumRequests, numRequests, "did not make expected number of requests to revocation endpoint") - if tt.wantErr != "" { - require.EqualError(t, err, tt.wantErr) + if tt.wantErr != "" || tt.wantErrRegexp != "" { // nolint:nestif + if tt.wantErr != "" { + require.EqualError(t, err, tt.wantErr) + } else { + require.Error(t, err) + require.Regexp(t, tt.wantErrRegexp, err.Error()) + } + + if tt.wantRetryableErrType { + require.ErrorAs(t, err, &provider.RetryableRevocationError{}) + } else if errors.As(err, &provider.RetryableRevocationError{}) { + // There is no NotErrorAs() assertion available in the current version of testify, so do the equivalent. + require.Fail(t, "error should not be As RetryableRevocationError") + } } else { require.NoError(t, err) }