GC retries failed upstream revocations for a while, but not forever

This commit is contained in:
Ryan Richard 2021-11-17 15:58:44 -08:00
parent 48518e9513
commit 3b3641568a
2 changed files with 126 additions and 24 deletions

View File

@ -121,28 +121,40 @@ func (c *garbageCollectorController) Sync(ctx controllerlib.Context) error {
continue
}
if garbageCollectAfterTime.Before(frozenClock.Now()) {
storageType, isSessionStorage := secret.Labels[crud.SecretLabelKey]
if isSessionStorage {
err := c.maybeRevokeUpstreamOIDCRefreshToken(ctx.Context, storageType, secret)
if err != nil {
// Log the error for debugging purposes, but still carry on to delete the Secret despite the error.
plog.WarningErr("garbage collector could not revoke upstream refresh token", err, logKV(secret))
if !garbageCollectAfterTime.Before(frozenClock.Now()) {
// not old enough yet
continue
}
storageType, isSessionStorage := secret.Labels[crud.SecretLabelKey]
if isSessionStorage {
err := c.maybeRevokeUpstreamOIDCRefreshToken(ctx.Context, storageType, secret)
if err != nil {
// Log the error for debugging purposes, but still carry on to delete the Secret despite the error.
plog.WarningErr("garbage collector could not revoke upstream refresh token", err, logKV(secret))
// If the error is of a type that is worth retrying, then not delete the Secret right away.
// A future call to Sync will try revocation again for that secret. However, if the Secret is
// getting too old, then just delete it anyway. We don't want to extend the lifetime of these
// session Secrets by too much time, since the garbage collector is the only thing that is
// cleaning them out of etcd storage.
fourHoursAgo := frozenClock.Now().Add(-4 * time.Hour)
if errors.As(err, &retryableRevocationError{}) && garbageCollectAfterTime.After(fourHoursAgo) {
continue
}
}
err = c.kubeClient.CoreV1().Secrets(secret.Namespace).Delete(ctx.Context, secret.Name, metav1.DeleteOptions{
Preconditions: &metav1.Preconditions{
UID: &secret.UID,
ResourceVersion: &secret.ResourceVersion,
},
})
if err != nil {
plog.WarningErr("failed to garbage collect resource", err, logKV(secret))
continue
}
plog.Info("storage garbage collector deleted resource", logKV(secret))
}
err = c.kubeClient.CoreV1().Secrets(secret.Namespace).Delete(ctx.Context, secret.Name, metav1.DeleteOptions{
Preconditions: &metav1.Preconditions{
UID: &secret.UID,
ResourceVersion: &secret.ResourceVersion,
},
})
if err != nil {
plog.WarningErr("failed to garbage collect resource", err, logKV(secret))
continue
}
plog.Info("storage garbage collector deleted resource", logKV(secret))
}
return nil
@ -222,6 +234,7 @@ func (c *garbageCollectorController) revokeUpstreamOIDCRefreshToken(ctx context.
for _, p := range c.idpCache.GetOIDCIdentityProviders() {
if p.GetName() == customSessionData.ProviderName && p.GetResourceUID() == customSessionData.ProviderUID {
foundOIDCIdentityProviderI = p
break
}
}
if foundOIDCIdentityProviderI == nil {
@ -231,13 +244,29 @@ func (c *garbageCollectorController) revokeUpstreamOIDCRefreshToken(ctx context.
// Revoke the upstream refresh token. This is a noop if the upstream provider does not offer a revocation endpoint.
err := foundOIDCIdentityProviderI.RevokeRefreshToken(ctx, customSessionData.OIDC.UpstreamRefreshToken)
if err != nil {
return err
// This could be a network failure, a 503 result which we should retry
// (see https://datatracker.ietf.org/doc/html/rfc7009#section-2.2.1),
// or any other non-200 response from the revocation endpoint.
// Regardless of which, it is probably worth retrying.
return retryableRevocationError{wrapped: err}
}
plog.Trace("garbage collector successfully revoked upstream OIDC refresh token (or provider has no revocation endpoint)", logKV(secret))
return nil
}
type retryableRevocationError struct {
wrapped error
}
func (e retryableRevocationError) Error() string {
return fmt.Sprintf("retryable revocation error: %v", e.wrapped)
}
func (e retryableRevocationError) Unwrap() error {
return e.wrapped
}
func logKV(secret *v1.Secret) []interface{} {
return []interface{}{
"secretName", secret.Name,

View File

@ -625,7 +625,7 @@ func TestGarbageCollectorControllerSync(t *testing.T) {
})
})
when("there is a valid, expired authcode secret but the upstream revocation fails", func() {
when("there is a valid, recently expired authcode secret but the upstream revocation fails", func() {
it.Before(func() {
activeOIDCAuthcodeSession := &authorizationcode.Session{
Version: "2",
@ -654,7 +654,8 @@ func TestGarbageCollectorControllerSync(t *testing.T) {
UID: "uid-123",
ResourceVersion: "rv-123",
Annotations: map[string]string{
"storage.pinniped.dev/garbage-collect-after": frozenNow.Add(-time.Second).Format(time.RFC3339),
// expired almost 4 hours ago, but not quite 4 hours
"storage.pinniped.dev/garbage-collect-after": frozenNow.Add((-time.Hour * 4) + time.Second).Format(time.RFC3339),
},
Labels: map[string]string{
"storage.pinniped.dev/type": authorizationcode.TypeLabelValue,
@ -672,7 +673,7 @@ func TestGarbageCollectorControllerSync(t *testing.T) {
r.NoError(kubeClient.Tracker().Add(activeOIDCAuthcodeSessionSecret))
})
it("should remove the secret anyway because it has expired", func() {
it("keeps the secret for a while longer so the revocation can be retried on a future sync", func() {
happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder().
WithName("upstream-oidc-provider-name").
WithResourceUID("upstream-oidc-provider-uid").
@ -691,7 +692,79 @@ func TestGarbageCollectorControllerSync(t *testing.T) {
},
)
// The authcode session secrets is still deleted because it is expired.
// The authcode session secrets is not deleted.
r.Empty(kubeClient.Actions())
})
})
when("there is a valid, long-since expired authcode secret but the upstream revocation fails", func() {
it.Before(func() {
activeOIDCAuthcodeSession := &authorizationcode.Session{
Version: "2",
Active: true,
Request: &fosite.Request{
ID: "request-id-1",
Client: &clientregistry.Client{},
Session: &psession.PinnipedSession{
Custom: &psession.CustomSessionData{
ProviderUID: "upstream-oidc-provider-uid",
ProviderName: "upstream-oidc-provider-name",
ProviderType: psession.ProviderTypeOIDC,
OIDC: &psession.OIDCSessionData{
UpstreamRefreshToken: "fake-upstream-refresh-token",
},
},
},
},
}
activeOIDCAuthcodeSessionJSON, err := json.Marshal(activeOIDCAuthcodeSession)
r.NoError(err)
activeOIDCAuthcodeSessionSecret := &corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "activeOIDCAuthcodeSession",
Namespace: installedInNamespace,
UID: "uid-123",
ResourceVersion: "rv-123",
Annotations: map[string]string{
// expired just over 4 hours ago
"storage.pinniped.dev/garbage-collect-after": frozenNow.Add((-time.Hour * 4) - time.Second).Format(time.RFC3339),
},
Labels: map[string]string{
"storage.pinniped.dev/type": authorizationcode.TypeLabelValue,
},
},
Data: map[string][]byte{
"pinniped-storage-data": activeOIDCAuthcodeSessionJSON,
"pinniped-storage-version": []byte("1"),
},
Type: "storage.pinniped.dev/" + authorizationcode.TypeLabelValue,
}
_, err = authorizationcode.ReadFromSecret(activeOIDCAuthcodeSessionSecret)
r.NoError(err, "the test author accidentally formed an invalid authcode secret")
r.NoError(kubeInformerClient.Tracker().Add(activeOIDCAuthcodeSessionSecret))
r.NoError(kubeClient.Tracker().Add(activeOIDCAuthcodeSessionSecret))
})
it("deletes the secret because it has probably been retrying revocation for hours without success", func() {
happyOIDCUpstream := oidctestutil.NewTestUpstreamOIDCIdentityProviderBuilder().
WithName("upstream-oidc-provider-name").
WithResourceUID("upstream-oidc-provider-uid").
WithRevokeRefreshTokenError(errors.New("some upstream revocation error")) // the upstream revocation will fail
idpListerBuilder := oidctestutil.NewUpstreamIDPListerBuilder().WithOIDC(happyOIDCUpstream.Build())
startInformersAndController(idpListerBuilder.Build())
r.NoError(controllerlib.TestSync(t, subject, *syncContext))
// Tried to revoke it, although this revocation will fail.
idpListerBuilder.RequireExactlyOneCallToRevokeRefreshToken(t,
"upstream-oidc-provider-name",
&oidctestutil.RevokeRefreshTokenArgs{
Ctx: syncContext.Context,
RefreshToken: "fake-upstream-refresh-token",
},
)
// The authcode session secrets is deleted.
r.ElementsMatch(
[]kubetesting.Action{
kubetesting.NewDeleteAction(secretsGVR, installedInNamespace, "activeOIDCAuthcodeSession"),