2023-06-07 23:07:43 +00:00
|
|
|
// Copyright 2020-2023 the Pinniped contributors. All Rights Reserved.
|
2020-09-16 14:19:51 +00:00
|
|
|
// SPDX-License-Identifier: Apache-2.0
|
2020-07-08 17:06:44 +00:00
|
|
|
|
2020-10-06 18:59:03 +00:00
|
|
|
// Package server is the command line entry point for pinniped-concierge.
|
2020-07-27 20:32:14 +00:00
|
|
|
package server
|
2020-07-08 17:06:44 +00:00
|
|
|
|
|
|
|
import (
|
2020-07-13 19:30:16 +00:00
|
|
|
"context"
|
|
|
|
"fmt"
|
2020-07-08 17:06:44 +00:00
|
|
|
"io"
|
2021-07-26 16:18:43 +00:00
|
|
|
"os"
|
2020-08-19 18:21:07 +00:00
|
|
|
"time"
|
2020-07-08 17:06:44 +00:00
|
|
|
|
2020-07-27 12:55:33 +00:00
|
|
|
"github.com/spf13/cobra"
|
2021-01-13 01:27:41 +00:00
|
|
|
"k8s.io/apimachinery/pkg/runtime"
|
|
|
|
"k8s.io/apimachinery/pkg/runtime/schema"
|
|
|
|
"k8s.io/apimachinery/pkg/runtime/serializer"
|
2022-04-16 02:43:53 +00:00
|
|
|
apimachineryversion "k8s.io/apimachinery/pkg/version"
|
2022-08-30 19:11:17 +00:00
|
|
|
openapinamer "k8s.io/apiserver/pkg/endpoints/openapi"
|
2020-07-23 15:05:21 +00:00
|
|
|
genericapiserver "k8s.io/apiserver/pkg/server"
|
|
|
|
genericoptions "k8s.io/apiserver/pkg/server/options"
|
2021-07-26 16:18:43 +00:00
|
|
|
"k8s.io/client-go/rest"
|
2020-07-16 19:24:30 +00:00
|
|
|
|
2022-08-30 19:11:17 +00:00
|
|
|
conciergeopenapi "go.pinniped.dev/generated/latest/client/concierge/openapi"
|
2020-09-23 13:53:21 +00:00
|
|
|
"go.pinniped.dev/internal/certauthority/dynamiccertauthority"
|
2023-06-22 20:12:50 +00:00
|
|
|
"go.pinniped.dev/internal/clientcertissuer"
|
2020-10-06 18:59:03 +00:00
|
|
|
"go.pinniped.dev/internal/concierge/apiserver"
|
2021-02-15 23:00:10 +00:00
|
|
|
conciergescheme "go.pinniped.dev/internal/concierge/scheme"
|
2020-10-15 19:40:56 +00:00
|
|
|
"go.pinniped.dev/internal/config/concierge"
|
2020-10-30 19:02:21 +00:00
|
|
|
"go.pinniped.dev/internal/controller/authenticator/authncache"
|
2021-08-29 00:38:50 +00:00
|
|
|
"go.pinniped.dev/internal/controllerinit"
|
2020-09-18 19:56:24 +00:00
|
|
|
"go.pinniped.dev/internal/controllermanager"
|
2021-10-20 11:59:24 +00:00
|
|
|
"go.pinniped.dev/internal/crypto/ptls"
|
2020-09-18 19:56:24 +00:00
|
|
|
"go.pinniped.dev/internal/downward"
|
2020-09-23 12:26:59 +00:00
|
|
|
"go.pinniped.dev/internal/dynamiccert"
|
2020-09-18 19:56:24 +00:00
|
|
|
"go.pinniped.dev/internal/here"
|
2021-10-20 11:59:24 +00:00
|
|
|
"go.pinniped.dev/internal/kubeclient"
|
2022-04-16 02:43:53 +00:00
|
|
|
"go.pinniped.dev/internal/plog"
|
2023-08-28 16:54:27 +00:00
|
|
|
"go.pinniped.dev/internal/pversion"
|
2020-09-18 19:56:24 +00:00
|
|
|
"go.pinniped.dev/internal/registry/credentialrequest"
|
2020-07-08 17:06:44 +00:00
|
|
|
)
|
|
|
|
|
2020-10-06 18:59:03 +00:00
|
|
|
// App is an object that represents the pinniped-concierge application.
|
2020-07-08 17:06:44 +00:00
|
|
|
type App struct {
|
2020-08-11 01:53:53 +00:00
|
|
|
cmd *cobra.Command
|
2020-07-08 17:06:44 +00:00
|
|
|
|
2020-07-16 19:24:30 +00:00
|
|
|
// CLI flags
|
2020-08-20 19:17:18 +00:00
|
|
|
configPath string
|
|
|
|
downwardAPIPath string
|
2020-07-08 17:06:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// New constructs a new App with command line args, stdout and stderr.
|
2020-07-23 15:05:21 +00:00
|
|
|
func New(ctx context.Context, args []string, stdout, stderr io.Writer) *App {
|
2020-08-07 21:49:04 +00:00
|
|
|
app := &App{}
|
|
|
|
app.addServerCommand(ctx, args, stdout, stderr)
|
|
|
|
return app
|
|
|
|
}
|
|
|
|
|
|
|
|
// Run the server.
|
2020-08-11 01:53:53 +00:00
|
|
|
func (a *App) Run() error {
|
|
|
|
return a.cmd.Execute()
|
2020-08-07 21:49:04 +00:00
|
|
|
}
|
2020-07-08 17:06:44 +00:00
|
|
|
|
2020-08-07 21:49:04 +00:00
|
|
|
// Create the server command and save it into the App.
|
2020-08-11 01:53:53 +00:00
|
|
|
func (a *App) addServerCommand(ctx context.Context, args []string, stdout, stderr io.Writer) {
|
2020-07-08 17:06:44 +00:00
|
|
|
cmd := &cobra.Command{
|
2020-10-06 18:59:03 +00:00
|
|
|
Use: "pinniped-concierge",
|
2020-09-12 01:15:24 +00:00
|
|
|
Long: here.Doc(`
|
2020-10-06 18:59:03 +00:00
|
|
|
pinniped-concierge provides a generic API for mapping an external
|
2020-09-12 01:15:24 +00:00
|
|
|
credential from somewhere to an internal credential to be used for
|
|
|
|
authenticating to the Kubernetes API.`),
|
2020-08-11 01:53:53 +00:00
|
|
|
RunE: func(cmd *cobra.Command, args []string) error { return a.runServer(ctx) },
|
2020-07-08 17:06:44 +00:00
|
|
|
Args: cobra.NoArgs,
|
|
|
|
}
|
|
|
|
|
|
|
|
cmd.SetArgs(args)
|
|
|
|
cmd.SetOut(stdout)
|
|
|
|
cmd.SetErr(stderr)
|
2020-08-11 01:53:53 +00:00
|
|
|
addCommandlineFlagsToCommand(cmd, a)
|
2020-07-08 17:06:44 +00:00
|
|
|
|
2020-08-11 01:53:53 +00:00
|
|
|
a.cmd = cmd
|
2020-08-07 21:49:04 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Define the app's commandline flags.
|
|
|
|
func addCommandlineFlagsToCommand(cmd *cobra.Command, app *App) {
|
2020-07-08 17:06:44 +00:00
|
|
|
cmd.Flags().StringVarP(
|
2020-08-07 21:49:04 +00:00
|
|
|
&app.configPath,
|
2020-07-08 17:06:44 +00:00
|
|
|
"config",
|
|
|
|
"c",
|
2020-08-20 17:54:15 +00:00
|
|
|
"pinniped.yaml",
|
2020-07-08 17:06:44 +00:00
|
|
|
"path to configuration file",
|
|
|
|
)
|
|
|
|
|
2020-07-16 19:24:30 +00:00
|
|
|
cmd.Flags().StringVar(
|
2020-08-07 21:49:04 +00:00
|
|
|
&app.downwardAPIPath,
|
2020-07-16 19:24:30 +00:00
|
|
|
"downward-api-path",
|
|
|
|
"/etc/podinfo",
|
|
|
|
"path to Downward API volume mount",
|
|
|
|
)
|
2020-07-08 17:06:44 +00:00
|
|
|
}
|
|
|
|
|
2020-08-07 21:49:04 +00:00
|
|
|
// Boot the aggregated API server, which will in turn boot the controllers.
|
Fix deadlock during shutdown which prevented leader election cleanup
Before this fix, the deadlock would prevent the leader pod from giving
up its lease, which would make it take several minutes for new pods to
be allowed to elect a new leader. During that time, no Pinniped
controllers could write to the Kube API, so important resources were not
being updated during that window. It would also make pod shutdown take
about 1 minute.
After this fix, the leader gives up its lease immediately, and pod
shutdown takes about 1 second. This improves restart/upgrade time and
also fixes the problem where there was no leader for several minutes
after a restart/upgrade.
The deadlock was between the post-start hook and the pre-shutdown hook.
The pre-shutdown hook blocked until a certain background goroutine in
the post-start hook finished, but that goroutine could not finish until
the pre-shutdown hook finished. Thus, they were both blocked, waiting
for each other infinitely. Eventually the process would be externally
killed.
This deadlock was most likely introduced by some change in Kube's
generic api server package related to how the many complex channels used
during server shutdown interact with each other, and was not noticed
when we upgraded to the version which introduced the change.
2023-09-20 23:51:23 +00:00
|
|
|
// In practice, the ctx passed in should be one which will be cancelled when the process receives SIGTERM or SIGINT.
|
2020-08-11 01:53:53 +00:00
|
|
|
func (a *App) runServer(ctx context.Context) error {
|
2020-08-07 21:49:04 +00:00
|
|
|
// Read the server config file.
|
2022-04-16 02:43:53 +00:00
|
|
|
cfg, err := concierge.FromPath(ctx, a.configPath)
|
2020-07-14 15:50:14 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("could not load config: %w", err)
|
|
|
|
}
|
|
|
|
|
2020-08-25 01:07:34 +00:00
|
|
|
// Discover in which namespace we are installed.
|
|
|
|
podInfo, err := downward.Load(a.downwardAPIPath)
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("could not read pod metadata: %w", err)
|
|
|
|
}
|
|
|
|
|
2020-10-30 19:02:21 +00:00
|
|
|
// Initialize the cache of active authenticators.
|
|
|
|
authenticators := authncache.New()
|
2020-07-14 15:50:14 +00:00
|
|
|
|
2020-08-09 17:04:05 +00:00
|
|
|
// This cert provider will provide certs to the API server and will
|
|
|
|
// be mutated by a controller to keep the certs up to date with what
|
2023-06-07 23:07:43 +00:00
|
|
|
// is stored in a k8s Secret. Therefore, it acts as an in-memory cache
|
|
|
|
// of what is stored in the k8s Secret, helping to keep incoming requests
|
|
|
|
// fast.
|
2021-03-15 16:24:07 +00:00
|
|
|
dynamicServingCertProvider := dynamiccert.NewServingCert("concierge-serving-cert")
|
2020-09-23 13:53:21 +00:00
|
|
|
|
2021-03-10 18:30:06 +00:00
|
|
|
// This cert provider will be used to provide the Kube signing key to the
|
2023-06-07 23:07:43 +00:00
|
|
|
// cert issuer used to issue certs to Pinniped clients wishing to log in.
|
2021-03-15 16:24:07 +00:00
|
|
|
dynamicSigningCertProvider := dynamiccert.NewCA("concierge-kube-signing-cert")
|
2020-07-13 19:30:16 +00:00
|
|
|
|
2021-03-10 18:30:06 +00:00
|
|
|
// This cert provider will be used to provide the impersonation proxy signing key to the
|
2023-06-07 23:07:43 +00:00
|
|
|
// cert issuer used to issue certs to Pinniped clients wishing to log in.
|
2021-03-15 16:24:07 +00:00
|
|
|
impersonationProxySigningCertProvider := dynamiccert.NewCA("impersonation-proxy-signing-cert")
|
2021-03-10 18:30:06 +00:00
|
|
|
|
2021-02-15 23:00:10 +00:00
|
|
|
// Get the "real" name of the login concierge API group (i.e., the API group name with the
|
|
|
|
// injected suffix).
|
2021-02-23 17:10:52 +00:00
|
|
|
scheme, loginGV, identityGV := conciergescheme.New(*cfg.APIGroupSuffix)
|
2021-02-15 23:00:10 +00:00
|
|
|
|
2020-08-07 21:49:04 +00:00
|
|
|
// Prepare to start the controllers, but defer actually starting them until the
|
|
|
|
// post start hook of the aggregated API server.
|
2021-08-29 00:38:50 +00:00
|
|
|
buildControllers, err := controllermanager.PrepareControllers(
|
2020-09-21 18:16:14 +00:00
|
|
|
&controllermanager.Config{
|
2021-03-10 18:30:06 +00:00
|
|
|
ServerInstallationInfo: podInfo,
|
|
|
|
APIGroupSuffix: *cfg.APIGroupSuffix,
|
|
|
|
NamesConfig: &cfg.NamesConfig,
|
|
|
|
Labels: cfg.Labels,
|
|
|
|
KubeCertAgentConfig: &cfg.KubeCertAgentConfig,
|
|
|
|
DiscoveryURLOverride: cfg.DiscoveryInfo.URL,
|
|
|
|
DynamicServingCertProvider: dynamicServingCertProvider,
|
|
|
|
DynamicSigningCertProvider: dynamicSigningCertProvider,
|
|
|
|
ImpersonationSigningCertProvider: impersonationProxySigningCertProvider,
|
|
|
|
ServingCertDuration: time.Duration(*cfg.APIConfig.ServingCertificateConfig.DurationSeconds) * time.Second,
|
|
|
|
ServingCertRenewBefore: time.Duration(*cfg.APIConfig.ServingCertificateConfig.RenewBeforeSeconds) * time.Second,
|
|
|
|
AuthenticatorCache: authenticators,
|
2021-11-17 21:27:59 +00:00
|
|
|
// This port should be safe to cast because the config reader already validated it.
|
|
|
|
ImpersonationProxyServerPort: int(*cfg.ImpersonationProxyServerPort),
|
2020-09-21 18:16:14 +00:00
|
|
|
},
|
2020-08-03 14:17:11 +00:00
|
|
|
)
|
2020-08-07 21:49:04 +00:00
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("could not prepare controllers: %w", err)
|
|
|
|
}
|
|
|
|
|
2023-06-22 20:12:50 +00:00
|
|
|
certIssuer := clientcertissuer.ClientCertIssuers{
|
2021-03-10 18:30:06 +00:00
|
|
|
dynamiccertauthority.New(dynamicSigningCertProvider), // attempt to use the real Kube CA if possible
|
|
|
|
dynamiccertauthority.New(impersonationProxySigningCertProvider), // fallback to our internal CA if we need to
|
|
|
|
}
|
|
|
|
|
2020-08-07 21:49:04 +00:00
|
|
|
// Get the aggregated API server config.
|
|
|
|
aggregatedAPIServerConfig, err := getAggregatedAPIServerConfig(
|
2020-09-23 13:53:21 +00:00
|
|
|
dynamicServingCertProvider,
|
2020-10-30 19:02:21 +00:00
|
|
|
authenticators,
|
2021-03-10 18:30:06 +00:00
|
|
|
certIssuer,
|
2021-08-29 00:38:50 +00:00
|
|
|
buildControllers,
|
2021-01-19 15:52:12 +00:00
|
|
|
*cfg.APIGroupSuffix,
|
2021-11-17 00:43:51 +00:00
|
|
|
*cfg.AggregatedAPIServerPort,
|
2021-02-23 17:10:52 +00:00
|
|
|
scheme,
|
|
|
|
loginGV,
|
|
|
|
identityGV,
|
2020-07-31 16:08:07 +00:00
|
|
|
)
|
2020-07-23 15:05:21 +00:00
|
|
|
if err != nil {
|
2020-08-07 21:49:04 +00:00
|
|
|
return fmt.Errorf("could not configure aggregated API server: %w", err)
|
2020-07-13 19:30:16 +00:00
|
|
|
}
|
|
|
|
|
2020-08-07 21:49:04 +00:00
|
|
|
// Complete the aggregated API server config and make a server instance.
|
|
|
|
server, err := aggregatedAPIServerConfig.Complete().New()
|
2020-07-23 15:05:21 +00:00
|
|
|
if err != nil {
|
2020-08-07 21:49:04 +00:00
|
|
|
return fmt.Errorf("could not create aggregated API server: %w", err)
|
2020-07-23 15:05:21 +00:00
|
|
|
}
|
2020-07-14 15:50:14 +00:00
|
|
|
|
Fix deadlock during shutdown which prevented leader election cleanup
Before this fix, the deadlock would prevent the leader pod from giving
up its lease, which would make it take several minutes for new pods to
be allowed to elect a new leader. During that time, no Pinniped
controllers could write to the Kube API, so important resources were not
being updated during that window. It would also make pod shutdown take
about 1 minute.
After this fix, the leader gives up its lease immediately, and pod
shutdown takes about 1 second. This improves restart/upgrade time and
also fixes the problem where there was no leader for several minutes
after a restart/upgrade.
The deadlock was between the post-start hook and the pre-shutdown hook.
The pre-shutdown hook blocked until a certain background goroutine in
the post-start hook finished, but that goroutine could not finish until
the pre-shutdown hook finished. Thus, they were both blocked, waiting
for each other infinitely. Eventually the process would be externally
killed.
This deadlock was most likely introduced by some change in Kube's
generic api server package related to how the many complex channels used
during server shutdown interact with each other, and was not noticed
when we upgraded to the version which introduced the change.
2023-09-20 23:51:23 +00:00
|
|
|
// Run the server. Its post-start hook will start the controllers. Its pre shutdown hook will be called when ctx is
|
|
|
|
// cancelled, and that hook should graceful stop the controllers and give up the leader election lease. See the
|
|
|
|
// code for these hooks in internal/concierge/apiserver.go.
|
2020-07-23 15:05:21 +00:00
|
|
|
return server.GenericAPIServer.PrepareRun().Run(ctx.Done())
|
2020-07-13 19:30:16 +00:00
|
|
|
}
|
|
|
|
|
2020-08-07 21:49:04 +00:00
|
|
|
// Create a configuration for the aggregated API server.
|
|
|
|
func getAggregatedAPIServerConfig(
|
2021-03-15 16:24:07 +00:00
|
|
|
dynamicCertProvider dynamiccert.Private,
|
2020-09-21 16:37:54 +00:00
|
|
|
authenticator credentialrequest.TokenCredentialRequestAuthenticator,
|
2023-06-22 20:12:50 +00:00
|
|
|
issuer clientcertissuer.ClientCertIssuer,
|
2021-08-29 00:38:50 +00:00
|
|
|
buildControllers controllerinit.RunnerBuilder,
|
2021-01-19 15:52:12 +00:00
|
|
|
apiGroupSuffix string,
|
2021-11-17 00:43:51 +00:00
|
|
|
aggregatedAPIServerPort int64,
|
2021-02-23 17:10:52 +00:00
|
|
|
scheme *runtime.Scheme,
|
|
|
|
loginConciergeGroupVersion, identityConciergeGroupVersion schema.GroupVersion,
|
2020-07-31 16:08:07 +00:00
|
|
|
) (*apiserver.Config, error) {
|
2021-01-13 01:27:41 +00:00
|
|
|
codecs := serializer.NewCodecFactory(scheme)
|
|
|
|
|
2021-02-19 15:10:30 +00:00
|
|
|
// this is unused for now but it is a safe value that we could use in the future
|
|
|
|
defaultEtcdPathPrefix := fmt.Sprintf("/pinniped-concierge-registry/%s", apiGroupSuffix)
|
2021-01-19 15:52:12 +00:00
|
|
|
|
2020-08-07 21:49:04 +00:00
|
|
|
recommendedOptions := genericoptions.NewRecommendedOptions(
|
|
|
|
defaultEtcdPathPrefix,
|
2021-02-19 18:21:10 +00:00
|
|
|
codecs.LegacyCodec(loginConciergeGroupVersion, identityConciergeGroupVersion),
|
2020-08-07 21:49:04 +00:00
|
|
|
)
|
|
|
|
recommendedOptions.Etcd = nil // turn off etcd storage because we don't need it yet
|
2020-08-09 17:04:05 +00:00
|
|
|
recommendedOptions.SecureServing.ServerCert.GeneratedCert = dynamicCertProvider
|
2021-11-17 00:43:51 +00:00
|
|
|
|
|
|
|
// This port is configurable. It should be safe to cast because the config reader already validated it.
|
|
|
|
recommendedOptions.SecureServing.BindPort = int(aggregatedAPIServerPort)
|
2020-07-13 19:30:16 +00:00
|
|
|
|
2021-10-20 11:59:24 +00:00
|
|
|
// secure TLS for connections coming from and going to the Kube API server
|
|
|
|
// this is best effort because not all options provide the right hooks to override TLS config
|
|
|
|
// since our only client is the Kube API server, this uses the most secure TLS config
|
|
|
|
if err := ptls.SecureRecommendedOptions(recommendedOptions, kubeclient.Secure); err != nil {
|
|
|
|
return nil, fmt.Errorf("failed to secure recommended options: %w", err)
|
|
|
|
}
|
|
|
|
|
2021-01-13 01:27:41 +00:00
|
|
|
serverConfig := genericapiserver.NewRecommendedConfig(codecs)
|
2022-08-30 19:11:17 +00:00
|
|
|
// Add the generated openapi docs to the server config. Publishing openapi docs allows
|
|
|
|
// `kubectl explain` to work for the Concierge's aggregated API resources.
|
|
|
|
serverConfig.OpenAPIConfig = genericapiserver.DefaultOpenAPIConfig(
|
|
|
|
conciergeopenapi.GetOpenAPIDefinitions, openapinamer.NewDefinitionNamer(scheme))
|
2023-08-24 18:08:15 +00:00
|
|
|
// serverConfig.OpenAPIConfig.Info.InfoProps.Title = "Pinniped Concierge"
|
2023-08-24 14:22:42 +00:00
|
|
|
serverConfig.OpenAPIV3Config = genericapiserver.DefaultOpenAPIV3Config(
|
|
|
|
conciergeopenapi.GetOpenAPIDefinitions, openapinamer.NewDefinitionNamer(scheme))
|
2023-08-24 18:08:15 +00:00
|
|
|
// serverConfig.OpenAPIV3Config.Info.InfoProps.Title = "Pinniped Concierge"
|
2020-08-09 17:04:05 +00:00
|
|
|
// Note that among other things, this ApplyTo() function copies
|
|
|
|
// `recommendedOptions.SecureServing.ServerCert.GeneratedCert` into
|
|
|
|
// `serverConfig.SecureServing.Cert` thus making `dynamicCertProvider`
|
|
|
|
// the cert provider for the running server. The provider will be called
|
|
|
|
// by the API machinery periodically. When the provider returns nil certs,
|
|
|
|
// the API server will return "the server is currently unable to
|
|
|
|
// handle the request" error responses for all incoming requests.
|
|
|
|
// If the provider later starts returning certs, then the API server
|
|
|
|
// will use them to handle the incoming requests successfully.
|
2020-08-07 21:49:04 +00:00
|
|
|
if err := recommendedOptions.ApplyTo(serverConfig); err != nil {
|
2021-10-20 11:59:24 +00:00
|
|
|
return nil, fmt.Errorf("failed to apply recommended options: %w", err)
|
2020-07-23 15:05:21 +00:00
|
|
|
}
|
2020-07-13 19:30:16 +00:00
|
|
|
|
2020-07-23 15:05:21 +00:00
|
|
|
apiServerConfig := &apiserver.Config{
|
|
|
|
GenericConfig: serverConfig,
|
|
|
|
ExtraConfig: apiserver.ExtraConfig{
|
2020-09-21 16:37:54 +00:00
|
|
|
Authenticator: authenticator,
|
|
|
|
Issuer: issuer,
|
2021-08-29 00:38:50 +00:00
|
|
|
BuildControllersPostStartHook: buildControllers,
|
2021-01-13 01:27:41 +00:00
|
|
|
Scheme: scheme,
|
|
|
|
NegotiatedSerializer: codecs,
|
2021-02-19 18:21:10 +00:00
|
|
|
LoginConciergeGroupVersion: loginConciergeGroupVersion,
|
|
|
|
IdentityConciergeGroupVersion: identityConciergeGroupVersion,
|
2020-07-23 15:05:21 +00:00
|
|
|
},
|
|
|
|
}
|
|
|
|
return apiServerConfig, nil
|
2020-07-13 19:30:16 +00:00
|
|
|
}
|
2021-07-26 16:18:43 +00:00
|
|
|
|
2023-07-07 16:52:32 +00:00
|
|
|
// main returns an error instead of calling plog.Fatal to allow defer statements to run.
|
2023-06-07 23:07:43 +00:00
|
|
|
func main() error {
|
2022-04-16 02:43:53 +00:00
|
|
|
defer plog.Setup()()
|
2021-07-26 16:18:43 +00:00
|
|
|
|
|
|
|
// Dump out the time since compile (mostly useful for benchmarking our local development cycle latency).
|
|
|
|
var timeSinceCompile time.Duration
|
2023-08-28 16:54:27 +00:00
|
|
|
if buildDate, err := time.Parse(time.RFC3339, pversion.Get().BuildDate); err == nil {
|
2021-07-26 16:18:43 +00:00
|
|
|
timeSinceCompile = time.Since(buildDate).Round(time.Second)
|
|
|
|
}
|
2022-04-16 02:43:53 +00:00
|
|
|
|
|
|
|
plog.Always("Running concierge",
|
|
|
|
"user-agent", rest.DefaultKubernetesUserAgent(),
|
2023-08-28 16:54:27 +00:00
|
|
|
"version", versionInfo(pversion.Get()),
|
2022-04-16 02:43:53 +00:00
|
|
|
"time-since-build", timeSinceCompile,
|
|
|
|
)
|
2021-07-26 16:18:43 +00:00
|
|
|
|
Fix deadlock during shutdown which prevented leader election cleanup
Before this fix, the deadlock would prevent the leader pod from giving
up its lease, which would make it take several minutes for new pods to
be allowed to elect a new leader. During that time, no Pinniped
controllers could write to the Kube API, so important resources were not
being updated during that window. It would also make pod shutdown take
about 1 minute.
After this fix, the leader gives up its lease immediately, and pod
shutdown takes about 1 second. This improves restart/upgrade time and
also fixes the problem where there was no leader for several minutes
after a restart/upgrade.
The deadlock was between the post-start hook and the pre-shutdown hook.
The pre-shutdown hook blocked until a certain background goroutine in
the post-start hook finished, but that goroutine could not finish until
the pre-shutdown hook finished. Thus, they were both blocked, waiting
for each other infinitely. Eventually the process would be externally
killed.
This deadlock was most likely introduced by some change in Kube's
generic api server package related to how the many complex channels used
during server shutdown interact with each other, and was not noticed
when we upgraded to the version which introduced the change.
2023-09-20 23:51:23 +00:00
|
|
|
// This context will be cancelled upon the first SIGTERM or SIGINT, and will os.Exit() to kill the process
|
|
|
|
// upon the second SIGTERM or SIGINT.
|
2021-07-26 16:18:43 +00:00
|
|
|
ctx := genericapiserver.SetupSignalContext()
|
|
|
|
|
Fix deadlock during shutdown which prevented leader election cleanup
Before this fix, the deadlock would prevent the leader pod from giving
up its lease, which would make it take several minutes for new pods to
be allowed to elect a new leader. During that time, no Pinniped
controllers could write to the Kube API, so important resources were not
being updated during that window. It would also make pod shutdown take
about 1 minute.
After this fix, the leader gives up its lease immediately, and pod
shutdown takes about 1 second. This improves restart/upgrade time and
also fixes the problem where there was no leader for several minutes
after a restart/upgrade.
The deadlock was between the post-start hook and the pre-shutdown hook.
The pre-shutdown hook blocked until a certain background goroutine in
the post-start hook finished, but that goroutine could not finish until
the pre-shutdown hook finished. Thus, they were both blocked, waiting
for each other infinitely. Eventually the process would be externally
killed.
This deadlock was most likely introduced by some change in Kube's
generic api server package related to how the many complex channels used
during server shutdown interact with each other, and was not noticed
when we upgraded to the version which introduced the change.
2023-09-20 23:51:23 +00:00
|
|
|
// Just for debugging purposes, log when the first signal is received.
|
|
|
|
go func() {
|
|
|
|
<-ctx.Done() // wait for the Done channel to be closed, indicating that ctx was cancelled by the signal handler
|
|
|
|
plog.Debug("concierge shutdown initiated due to process receiving SIGTERM or SIGINT")
|
|
|
|
}()
|
|
|
|
|
2021-08-29 00:38:50 +00:00
|
|
|
return New(ctx, os.Args[1:], os.Stdout, os.Stderr).Run()
|
|
|
|
}
|
|
|
|
|
|
|
|
func Main() {
|
|
|
|
if err := main(); err != nil {
|
2022-04-16 02:43:53 +00:00
|
|
|
plog.Fatal(err)
|
2021-07-26 16:18:43 +00:00
|
|
|
}
|
|
|
|
}
|
2022-04-16 02:43:53 +00:00
|
|
|
|
|
|
|
type versionInfo apimachineryversion.Info // hide .String() method from plog
|