From f1e63c55d4859b5f7cc53ac8161cb5aede284094 Mon Sep 17 00:00:00 2001 From: Ryan Richard Date: Wed, 7 Jul 2021 12:50:13 -0700 Subject: [PATCH 1/5] Add `https_proxy` and `no_proxy` settings for the Supervisor - Add new optional ytt params for the Supervisor deployment. - When the Supervisor is making calls to an upstream OIDC provider, use these variables if they were provided. - These settings are integration tested in the main CI pipeline by sometimes setting them on deployments in certain cases, and then letting the existing integration tests (e.g. TestE2EFullIntegration) provide the coverage, so there are no explicit changes to the integration tests themselves in this commit. --- deploy/supervisor/deployment.yaml | 9 +++++++++ deploy/supervisor/values.yaml | 8 ++++++++ .../oidcupstreamwatcher/oidc_upstream_watcher.go | 8 ++++++-- .../oidcupstreamwatcher/oidc_upstream_watcher_test.go | 10 ++++++++++ 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/deploy/supervisor/deployment.yaml b/deploy/supervisor/deployment.yaml index 48607e3f..783f10aa 100644 --- a/deploy/supervisor/deployment.yaml +++ b/deploy/supervisor/deployment.yaml @@ -102,6 +102,15 @@ spec: protocol: TCP - containerPort: 8443 protocol: TCP + env: + #@ if data.values.https_proxy: + - name: HTTPS_PROXY + value: #@ data.values.https_proxy + #@ end + #@ if data.values.no_proxy: + - name: NO_PROXY + value: #@ data.values.no_proxy + #@ end livenessProbe: httpGet: path: /healthz diff --git a/deploy/supervisor/values.yaml b/deploy/supervisor/values.yaml index 1cbed34c..041d8b70 100644 --- a/deploy/supervisor/values.yaml +++ b/deploy/supervisor/values.yaml @@ -65,3 +65,11 @@ run_as_group: 1001 #! run_as_group specifies the group ID that will own the proc #! authentication.concierge.pinniped.dev, etc. As an example, if this is set to tuna.io, then #! Pinniped API groups will look like foo.tuna.io. authentication.concierge.tuna.io, etc. api_group_suffix: pinniped.dev + +#! Set the standard golang HTTPS_PROXY and NO_PROXY environment variables on the Supervisor containers. +#! These will be used when the Supervisor makes backend-to-backend calls to upstream identity providers using HTTPS, +#! e.g. when the Supervisor fetches discovery documents, JWKS keys, and tokens from an upstream OIDC Provider. +#! The Supervisor never makes insecure HTTP calls, so there is no reason to set HTTP_PROXY. +#! Optional. +https_proxy: #! e.g. http://proxy.example.com +no_proxy: #! e.g. 127.0.0.1 diff --git a/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher.go b/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher.go index b610a2c6..d5a8d302 100644 --- a/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher.go +++ b/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher.go @@ -263,9 +263,13 @@ func (c *oidcWatcherController) validateIssuer(ctx context.Context, upstream *v1 Message: err.Error(), } } - httpClient = &http.Client{Transport: &http.Transport{TLSClientConfig: tlsConfig}} - discoveredProvider, err = oidc.NewProvider(oidc.ClientContext(ctx, httpClient), upstream.Spec.Issuer) + httpClient = &http.Client{Transport: &http.Transport{Proxy: http.ProxyFromEnvironment, TLSClientConfig: tlsConfig}} + + timeoutCtx, cancelFunc := context.WithTimeout(oidc.ClientContext(ctx, httpClient), time.Minute) + defer cancelFunc() + + discoveredProvider, err = oidc.NewProvider(timeoutCtx, upstream.Spec.Issuer) if err != nil { const klogLevelTrace = 6 c.log.V(klogLevelTrace).WithValues( diff --git a/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher_test.go b/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher_test.go index 9370aad3..24cb9b14 100644 --- a/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher_test.go +++ b/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher_test.go @@ -9,6 +9,7 @@ import ( "encoding/json" "net/http" "net/url" + "reflect" "strings" "testing" "time" @@ -797,6 +798,15 @@ oidc: issuer did not match the issuer returned by provider, expected "` + testIs require.Equal(t, tt.wantResultingCache[i].GetUsernameClaim(), actualIDP.GetUsernameClaim()) require.Equal(t, tt.wantResultingCache[i].GetGroupsClaim(), actualIDP.GetGroupsClaim()) require.ElementsMatch(t, tt.wantResultingCache[i].GetScopes(), actualIDP.GetScopes()) + + // We always want to use the proxy from env on these clients, so although the following assertions + // are a little hacky, this is a cheap way to test that we are using it. + actualTransport, ok := actualIDP.Client.Transport.(*http.Transport) + require.True(t, ok, "expected cached provider to have client with Transport of type *http.Transport") + httpProxyFromEnvFunction := reflect.ValueOf(http.ProxyFromEnvironment).Pointer() + actualTransportProxyFunction := reflect.ValueOf(actualTransport.Proxy).Pointer() + require.Equal(t, httpProxyFromEnvFunction, actualTransportProxyFunction, + "Transport should have used http.ProxyFromEnvironment as its Proxy func") } actualUpstreams, err := fakePinnipedClient.IDPV1alpha1().OIDCIdentityProviders(testNamespace).List(ctx, metav1.ListOptions{}) From f0d120a6ca4155548e5d52fb6277f2146fd1b17c Mon Sep 17 00:00:00 2001 From: Ryan Richard Date: Thu, 8 Jul 2021 09:44:02 -0700 Subject: [PATCH 2/5] Fix broken upstream OIDC discovery timeout added in previous commit After noticing that the upstream OIDC discovery calls can hang indefinitely, I had tried to impose a one minute timeout on them by giving them a timeout context. However, I hadn't noticed that the context also gets passed into the JWKS fetching object, which gets added to our cache and used later. Therefore the timeout context was added to the cache and timed out while sitting in the cache, causing later JWKS fetchers to fail. This commit is trying again to impose a reasonable timeout on these discovery and JWKS calls, but this time by using http.Client's Timeout field, which is documented to be a timeout for *each* request/response cycle, so hopefully this is a more appropriate way to impose a timeout for this use case. The http.Client instance ends up in the cache on the JWKS fetcher object, so the timeout should apply to each JWKS request as well. Requests that can hang forever are effectively a server-side resource leak, which could theoretically be taken advantage of in a denial of service attempt, so it would be nice to avoid having them. --- .../oidcupstreamwatcher/oidc_upstream_watcher.go | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher.go b/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher.go index d5a8d302..889d66fc 100644 --- a/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher.go +++ b/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher.go @@ -264,12 +264,15 @@ func (c *oidcWatcherController) validateIssuer(ctx context.Context, upstream *v1 } } - httpClient = &http.Client{Transport: &http.Transport{Proxy: http.ProxyFromEnvironment, TLSClientConfig: tlsConfig}} + httpClient = &http.Client{ + Timeout: time.Minute, + Transport: &http.Transport{ + Proxy: http.ProxyFromEnvironment, + TLSClientConfig: tlsConfig, + }, + } - timeoutCtx, cancelFunc := context.WithTimeout(oidc.ClientContext(ctx, httpClient), time.Minute) - defer cancelFunc() - - discoveredProvider, err = oidc.NewProvider(timeoutCtx, upstream.Spec.Issuer) + discoveredProvider, err = oidc.NewProvider(oidc.ClientContext(ctx, httpClient), upstream.Spec.Issuer) if err != nil { const klogLevelTrace = 6 c.log.V(klogLevelTrace).WithValues( From 709c10227f30bfb13508837fbf23d3c4a9428668 Mon Sep 17 00:00:00 2001 From: Ryan Richard Date: Thu, 8 Jul 2021 11:10:53 -0700 Subject: [PATCH 3/5] Run the LDAP client's integration tests only on Kind TestSimultaneousLDAPRequestsOnSingleProvider proved to be unreliable on AKS due to some kind of kubectl port-forward issue, so only run the LDAP client's integration tests on Kind. They are testing the integration between the client code and the OpenLDAP test server, not testing anything about Kubernetes, so running only on Kind should give us sufficient test coverage. --- test/cluster_capabilities/aks.yaml | 3 +++ test/cluster_capabilities/eks.yaml | 3 +++ test/cluster_capabilities/gke.yaml | 3 +++ test/cluster_capabilities/kind.yaml | 3 +++ test/cluster_capabilities/tkgs.yaml | 5 ++++- test/integration/ldap_client_test.go | 12 ++++++++++-- test/testlib/env.go | 21 +++++++++++++++++++++ 7 files changed, 47 insertions(+), 3 deletions(-) diff --git a/test/cluster_capabilities/aks.yaml b/test/cluster_capabilities/aks.yaml index 2a944bda..8bdfa98e 100644 --- a/test/cluster_capabilities/aks.yaml +++ b/test/cluster_capabilities/aks.yaml @@ -1,6 +1,9 @@ # Copyright 2021 the Pinniped contributors. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +# The name of the cluster type. +kubernetesDistribution: AKS + # Describe the capabilities of the cluster against which the integration tests will run. capabilities: diff --git a/test/cluster_capabilities/eks.yaml b/test/cluster_capabilities/eks.yaml index 9bce553d..304922f8 100644 --- a/test/cluster_capabilities/eks.yaml +++ b/test/cluster_capabilities/eks.yaml @@ -1,6 +1,9 @@ # Copyright 2021 the Pinniped contributors. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +# The name of the cluster type. +kubernetesDistribution: EKS + # Describe the capabilities of the cluster against which the integration tests will run. capabilities: diff --git a/test/cluster_capabilities/gke.yaml b/test/cluster_capabilities/gke.yaml index 080cec4a..a1247788 100644 --- a/test/cluster_capabilities/gke.yaml +++ b/test/cluster_capabilities/gke.yaml @@ -1,6 +1,9 @@ # Copyright 2020-2021 the Pinniped contributors. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +# The name of the cluster type. +kubernetesDistribution: GKE + # Describe the capabilities of the cluster against which the integration tests will run. capabilities: diff --git a/test/cluster_capabilities/kind.yaml b/test/cluster_capabilities/kind.yaml index 92759ed9..485ba506 100644 --- a/test/cluster_capabilities/kind.yaml +++ b/test/cluster_capabilities/kind.yaml @@ -1,6 +1,9 @@ # Copyright 2020-2021 the Pinniped contributors. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +# The name of the cluster type. +kubernetesDistribution: Kind + # Describe the capabilities of the cluster against which the integration tests will run. capabilities: diff --git a/test/cluster_capabilities/tkgs.yaml b/test/cluster_capabilities/tkgs.yaml index 4c7d05ba..86220291 100644 --- a/test/cluster_capabilities/tkgs.yaml +++ b/test/cluster_capabilities/tkgs.yaml @@ -1,6 +1,9 @@ # Copyright 2020-2021 the Pinniped contributors. All Rights Reserved. # SPDX-License-Identifier: Apache-2.0 +# The name of the cluster type. +kubernetesDistribution: TKGS + # Describe the capabilities of the cluster against which the integration tests will run. capabilities: @@ -15,4 +18,4 @@ capabilities: anonymousAuthenticationSupported: true # Are LDAP ports on the Internet reachable without interference from network firewalls or proxies? - canReachInternetLDAPPorts: false + canReachInternetLDAPPorts: true diff --git a/test/integration/ldap_client_test.go b/test/integration/ldap_client_test.go index 0b41d00d..99a6b7fb 100644 --- a/test/integration/ldap_client_test.go +++ b/test/integration/ldap_client_test.go @@ -25,7 +25,11 @@ import ( ) func TestLDAPSearch(t *testing.T) { - env := testlib.IntegrationEnv(t) + // This test does not interact with Kubernetes itself. It is a test of our LDAP client code, and only interacts + // with our test OpenLDAP server, which is exposed directly to this test via kubectl port-forward. + // Theoretically we should always be able to run this test, but something about the kubectl port forwarding + // was very flaky on AKS, so we'll get the coverage by only running it on kind. + env := testlib.IntegrationEnv(t).WithKubeDistribution(testlib.KindDistro) // Note that these tests depend on the values hard-coded in the LDIF file in test/deploy/tools/ldap.yaml. // It requires the test LDAP server from the tools deployment. @@ -613,7 +617,11 @@ func TestLDAPSearch(t *testing.T) { } func TestSimultaneousLDAPRequestsOnSingleProvider(t *testing.T) { - env := testlib.IntegrationEnv(t) + // This test does not interact with Kubernetes itself. It is a test of our LDAP client code, and only interacts + // with our test OpenLDAP server, which is exposed directly to this test via kubectl port-forward. + // Theoretically we should always be able to run this test, but something about the kubectl port forwarding + // was very flaky on AKS, so we'll get the coverage by only running it on kind. + env := testlib.IntegrationEnv(t).WithKubeDistribution(testlib.KindDistro) // Note that these tests depend on the values hard-coded in the LDIF file in test/deploy/tools/ldap.yaml. // It requires the test LDAP server from the tools deployment. diff --git a/test/testlib/env.go b/test/testlib/env.go index 3ff75f03..ea6834b6 100644 --- a/test/testlib/env.go +++ b/test/testlib/env.go @@ -19,12 +19,19 @@ import ( ) type Capability string +type KubeDistro string const ( ClusterSigningKeyIsAvailable Capability = "clusterSigningKeyIsAvailable" AnonymousAuthenticationSupported Capability = "anonymousAuthenticationSupported" HasExternalLoadBalancerProvider Capability = "hasExternalLoadBalancerProvider" CanReachInternetLDAPPorts Capability = "canReachInternetLDAPPorts" + + KindDistro KubeDistro = "Kind" + GKEDistro KubeDistro = "GKE" + AKSDistro KubeDistro = "AKS" + EKSDistro KubeDistro = "EKS" + TKGSDistro KubeDistro = "TKGS" ) // TestEnv captures all the external parameters consumed by our integration tests. @@ -38,6 +45,7 @@ type TestEnv struct { SupervisorAppName string `json:"supervisorAppName"` SupervisorCustomLabels map[string]string `json:"supervisorCustomLabels"` ConciergeCustomLabels map[string]string `json:"conciergeCustomLabels"` + KubernetesDistribution KubeDistro `json:"kubernetesDistribution"` Capabilities map[Capability]bool `json:"capabilities"` TestWebhook auth1alpha1.WebhookAuthenticatorSpec `json:"testWebhook"` SupervisorHTTPAddress string `json:"supervisorHttpAddress"` @@ -285,3 +293,16 @@ func (e *TestEnv) WithoutCapability(cap Capability) *TestEnv { } return e } + +// WithKubeDistribution skips the test unless it will run on the expected cluster type. +// Please use this sparingly. We would prefer that a test run on every cluster type where it can possibly run, so +// prefer to run everywhere when possible or use cluster capabilities when needed, rather than looking at the +// type of cluster to decide to skip a test. However, there are some tests that do not depend on or interact with +// Kubernetes itself which really only need to run on on a single platform to give us the coverage that we desire. +func (e *TestEnv) WithKubeDistribution(distro KubeDistro) *TestEnv { + e.t.Helper() + if e.KubernetesDistribution != distro { + e.t.Skipf("skipping integration test because test environment is running %q but this test wants %q", e.KubernetesDistribution, distro) + } + return e +} From 2f7dbed321d0f31985dc03d9f2ba2b1073de5ea7 Mon Sep 17 00:00:00 2001 From: Ryan Richard Date: Thu, 8 Jul 2021 11:17:22 -0700 Subject: [PATCH 4/5] Try increasing the "eventually" timeouts in one integration test There were 10 second timeouts in `TestAPIServingCertificateAutoCreationAndRotation` which fail often on CI. Maybe increasing the timeouts will help? --- test/integration/concierge_api_serving_certs_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/test/integration/concierge_api_serving_certs_test.go b/test/integration/concierge_api_serving_certs_test.go index 3689ed7a..2bb02f26 100644 --- a/test/integration/concierge_api_serving_certs_test.go +++ b/test/integration/concierge_api_serving_certs_test.go @@ -111,7 +111,7 @@ func TestAPIServingCertificateAutoCreationAndRotation(t *testing.T) { var err error secret, err = kubeClient.CoreV1().Secrets(env.ConciergeNamespace).Get(ctx, defaultServingCertResourceName, metav1.GetOptions{}) requireEventually.NoError(err) - }, 10*time.Second, 250*time.Millisecond) + }, time.Minute, 250*time.Millisecond) regeneratedCACert := secret.Data["caCertificate"] regeneratedPrivateKey := secret.Data["tlsPrivateKey"] regeneratedCertChain := secret.Data["tlsCertificateChain"] @@ -131,7 +131,7 @@ func TestAPIServingCertificateAutoCreationAndRotation(t *testing.T) { apiService, err := aggregatedClient.ApiregistrationV1().APIServices().Get(ctx, apiServiceName, metav1.GetOptions{}) requireEventually.NoErrorf(err, "get for APIService %q returned error", apiServiceName) requireEventually.Equalf(regeneratedCACert, apiService.Spec.CABundle, "CA bundle in APIService %q does not yet have the expected value", apiServiceName) - }, 10*time.Second, 250*time.Millisecond, "never saw CA certificate rotate to expected value") + }, time.Minute, 250*time.Millisecond, "never saw CA certificate rotate to expected value") // Check that we can still make requests to the aggregated API through the kube API server, // because the kube API server uses these certs when proxying requests to the aggregated API server, @@ -150,7 +150,7 @@ func TestAPIServingCertificateAutoCreationAndRotation(t *testing.T) { }, metav1.CreateOptions{}) requireEventually.NoError(err, "dynamiccertificates.Notifier broken?") } - }, 30*time.Second, 250*time.Millisecond) + }, time.Minute, 250*time.Millisecond) }) } } From e130da6daa592c544fce9399cf6b69f75c061f81 Mon Sep 17 00:00:00 2001 From: Ryan Richard Date: Thu, 8 Jul 2021 11:47:49 -0700 Subject: [PATCH 5/5] Add unit test assertion for new OIDC client request timeout --- .../oidcupstreamwatcher/oidc_upstream_watcher_test.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher_test.go b/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher_test.go index 24cb9b14..8712af26 100644 --- a/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher_test.go +++ b/internal/controller/supervisorconfig/oidcupstreamwatcher/oidc_upstream_watcher_test.go @@ -807,6 +807,8 @@ oidc: issuer did not match the issuer returned by provider, expected "` + testIs actualTransportProxyFunction := reflect.ValueOf(actualTransport.Proxy).Pointer() require.Equal(t, httpProxyFromEnvFunction, actualTransportProxyFunction, "Transport should have used http.ProxyFromEnvironment as its Proxy func") + // We also want a reasonable timeout on each request/response cycle for OIDC discovery and JWKS. + require.Equal(t, time.Minute, actualIDP.Client.Timeout) } actualUpstreams, err := fakePinnipedClient.IDPV1alpha1().OIDCIdentityProviders(testNamespace).List(ctx, metav1.ListOptions{})