GC retries failed upstream revocations for a while, but not forever

This commit is contained in:
Ryan Richard 2021-11-17 15:58:44 -08:00
parent 48518e9513
commit 3b3641568a
2 changed files with 126 additions and 24 deletions

View File

@ -121,28 +121,40 @@ func (c *garbageCollectorController) Sync(ctx controllerlib.Context) error {
continue continue
} }
if garbageCollectAfterTime.Before(frozenClock.Now()) { if !garbageCollectAfterTime.Before(frozenClock.Now()) {
storageType, isSessionStorage := secret.Labels[crud.SecretLabelKey] // not old enough yet
if isSessionStorage { continue
err := c.maybeRevokeUpstreamOIDCRefreshToken(ctx.Context, storageType, secret) }
if err != nil {
// Log the error for debugging purposes, but still carry on to delete the Secret despite the error. storageType, isSessionStorage := secret.Labels[crud.SecretLabelKey]
plog.WarningErr("garbage collector could not revoke upstream refresh token", err, logKV(secret)) if isSessionStorage {
err := c.maybeRevokeUpstreamOIDCRefreshToken(ctx.Context, storageType, secret)
if err != nil {
// Log the error for debugging purposes, but still carry on to delete the Secret despite the error.
plog.WarningErr("garbage collector could not revoke upstream refresh token", err, logKV(secret))
// If the error is of a type that is worth retrying, then not delete the Secret right away.
// A future call to Sync will try revocation again for that secret. However, if the Secret is
// getting too old, then just delete it anyway. We don't want to extend the lifetime of these
// session Secrets by too much time, since the garbage collector is the only thing that is
// cleaning them out of etcd storage.
fourHoursAgo := frozenClock.Now().Add(-4 * time.Hour)
if errors.As(err, &retryableRevocationError{}) && garbageCollectAfterTime.After(fourHoursAgo) {
continue
} }
} }
err = c.kubeClient.CoreV1().Secrets(secret.Namespace).Delete(ctx.Context, secret.Name, metav1.DeleteOptions{
Preconditions: &metav1.Preconditions{
UID: &secret.UID,
ResourceVersion: &secret.ResourceVersion,
},
})
if err != nil {
plog.WarningErr("failed to garbage collect resource", err, logKV(secret))
continue
}
plog.Info("storage garbage collector deleted resource", logKV(secret))
} }
err = c.kubeClient.CoreV1().Secrets(secret.Namespace).Delete(ctx.Context, secret.Name, metav1.DeleteOptions{
Preconditions: &metav1.Preconditions{
UID: &secret.UID,
ResourceVersion: &secret.ResourceVersion,
},
})
if err != nil {
plog.WarningErr("failed to garbage collect resource", err, logKV(secret))
continue
}
plog.Info("storage garbage collector deleted resource", logKV(secret))
} }
return nil return nil
@ -222,6 +234,7 @@ func (c *garbageCollectorController) revokeUpstreamOIDCRefreshToken(ctx context.
for _, p := range c.idpCache.GetOIDCIdentityProviders() { for _, p := range c.idpCache.GetOIDCIdentityProviders() {
if p.GetName() == customSessionData.ProviderName && p.GetResourceUID() == customSessionData.ProviderUID { if p.GetName() == customSessionData.ProviderName && p.GetResourceUID() == customSessionData.ProviderUID {
foundOIDCIdentityProviderI = p foundOIDCIdentityProviderI = p
break
} }
} }
if foundOIDCIdentityProviderI == nil { if foundOIDCIdentityProviderI == nil {
@ -231,13 +244,29 @@ func (c *garbageCollectorController) revokeUpstreamOIDCRefreshToken(ctx context.
// Revoke the upstream refresh token. This is a noop if the upstream provider does not offer a revocation endpoint. // Revoke the upstream refresh token. This is a noop if the upstream provider does not offer a revocation endpoint.
err := foundOIDCIdentityProviderI.RevokeRefreshToken(ctx, customSessionData.OIDC.UpstreamRefreshToken) err := foundOIDCIdentityProviderI.RevokeRefreshToken(ctx, customSessionData.OIDC.UpstreamRefreshToken)
if err != nil { if err != nil {
return err // This could be a network failure, a 503 result which we should retry
// (see https://datatracker.ietf.org/doc/html/rfc7009#section-2.2.1),
// or any other non-200 response from the revocation endpoint.
// Regardless of which, it is probably worth retrying.
return retryableRevocationError{wrapped: err}
} }
plog.Trace("garbage collector successfully revoked upstream OIDC refresh token (or provider has no revocation endpoint)", logKV(secret)) plog.Trace("garbage collector successfully revoked upstream OIDC refresh token (or provider has no revocation endpoint)", logKV(secret))
return nil return nil
} }
type retryableRevocationError struct {
wrapped error
}
func (e retryableRevocationError) Error() string {
return fmt.Sprintf("retryable revocation error: %v", e.wrapped)
}
func (e retryableRevocationError) Unwrap() error {
return e.wrapped
}
func logKV(secret *v1.Secret) []interface{} { func logKV(secret *v1.Secret) []interface{} {
return []interface{}{ return []interface{}{
"secretName", secret.Name, "secretName", secret.Name,

View File

@ -625,7 +625,7 @@ func TestGarbageCollectorControllerSync(t *testing.T) {
}) })
}) })
when("there is a valid, expired authcode secret but the upstream revocation fails", func() { when("there is a valid, recently expired authcode secret but the upstream revocation fails", func() {
it.Before(func() { it.Before(func() {
activeOIDCAuthcodeSession := &authorizationcode.Session{ activeOIDCAuthcodeSession := &authorizationcode.Session{
Version: "2", Version: "2",
@ -654,7 +654,8 @@ func TestGarbageCollectorControllerSync(t *testing.T) {
UID: "uid-123", UID: "uid-123",
ResourceVersion: "rv-123", ResourceVersion: "rv-123",
Annotations: map[string]string{ Annotations: map[string]string{
"storage.pinniped.dev/garbage-collect-after": frozenNow.Add(-time.Second).Format(time.RFC3339), // expired almost 4 hours ago, but not quite 4 hours
"storage.pinniped.dev/garbage-collect-after": frozenNow.Add((-time.Hour * 4) + time.Second).Format(time.RFC3339),
}, },
Labels: map[string]string{ Labels: map[string]string{
"storage.pinniped.dev/type": authorizationcode.TypeLabelValue, "storage.pinniped.dev/type": authorizationcode.TypeLabelValue,
@ -672,7 +673,7 @@ func TestGarbageCollectorControllerSync(t *testing.T) {
r.NoError(kubeClient.Tracker().Add(activeOIDCAuthcodeSessionSecret)) r.NoError(kubeClient.Tracker().Add(activeOIDCAuthcodeSessionSecret))
}) })
it("should remove the secret anyway because it has expired", func() { it("keeps the secret for a while longer so the revocation can be retried on a future sync", func() {
happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder(). happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder().
WithName("upstream-oidc-provider-name"). WithName("upstream-oidc-provider-name").
WithResourceUID("upstream-oidc-provider-uid"). WithResourceUID("upstream-oidc-provider-uid").
@ -691,7 +692,79 @@ func TestGarbageCollectorControllerSync(t *testing.T) {
}, },
) )
// The authcode session secrets is still deleted because it is expired. // The authcode session secrets is not deleted.
r.Empty(kubeClient.Actions())
})
})
when("there is a valid, long-since expired authcode secret but the upstream revocation fails", func() {
it.Before(func() {
activeOIDCAuthcodeSession := &authorizationcode.Session{
Version: "2",
Active: true,
Request: &fosite.Request{
ID: "request-id-1",
Client: &clientregistry.Client{},
Session: &psession.PinnipedSession{
Custom: &psession.CustomSessionData{
ProviderUID: "upstream-oidc-provider-uid",
ProviderName: "upstream-oidc-provider-name",
ProviderType: psession.ProviderTypeOIDC,
OIDC: &psession.OIDCSessionData{
UpstreamRefreshToken: "fake-upstream-refresh-token",
},
},
},
},
}
activeOIDCAuthcodeSessionJSON, err := json.Marshal(activeOIDCAuthcodeSession)
r.NoError(err)
activeOIDCAuthcodeSessionSecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "activeOIDCAuthcodeSession",
Namespace: installedInNamespace,
UID: "uid-123",
ResourceVersion: "rv-123",
Annotations: map[string]string{
// expired just over 4 hours ago
"storage.pinniped.dev/garbage-collect-after": frozenNow.Add((-time.Hour * 4) - time.Second).Format(time.RFC3339),
},
Labels: map[string]string{
"storage.pinniped.dev/type": authorizationcode.TypeLabelValue,
},
},
Data: map[string][]byte{
"pinniped-storage-data": activeOIDCAuthcodeSessionJSON,
"pinniped-storage-version": []byte("1"),
},
Type: "storage.pinniped.dev/" + authorizationcode.TypeLabelValue,
}
_, err = authorizationcode.ReadFromSecret(activeOIDCAuthcodeSessionSecret)
r.NoError(err, "the test author accidentally formed an invalid authcode secret")
r.NoError(kubeInformerClient.Tracker().Add(activeOIDCAuthcodeSessionSecret))
r.NoError(kubeClient.Tracker().Add(activeOIDCAuthcodeSessionSecret))
})
it("deletes the secret because it has probably been retrying revocation for hours without success", func() {
happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder().
WithName("upstream-oidc-provider-name").
WithResourceUID("upstream-oidc-provider-uid").
WithRevokeRefreshTokenError(errors.New("some upstream revocation error")) // the upstream revocation will fail
idpListerBuilder := oidctestutil.NewUpstreamIDPListerBuilder().WithOIDC(happyOIDCUpstream.Build())
startInformersAndController(idpListerBuilder.Build())
r.NoError(controllerlib.TestSync(t, subject, *syncContext))
// Tried to revoke it, although this revocation will fail.
idpListerBuilder.RequireExactlyOneCallToRevokeRefreshToken(t,
"upstream-oidc-provider-name",
&oidctestutil.RevokeRefreshTokenArgs{
Ctx: syncContext.Context,
RefreshToken: "fake-upstream-refresh-token",
},
)
// The authcode session secrets is deleted.
r.ElementsMatch( r.ElementsMatch(
[]kubetesting.Action{ []kubetesting.Action{
kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "activeOIDCAuthcodeSession"), kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "activeOIDCAuthcodeSession"),