From ecde8fa8af5a0e1dde962079ac846beafa808a17 Mon Sep 17 00:00:00 2001 From: Ryan Richard Date: Mon, 17 Aug 2020 16:44:42 -0700 Subject: [PATCH] Implement basic liveness and readiness probes - Call the auto-generated /healthz endpoint of our aggregated API server - Use http for liveness even though tcp seems like it might be more appropriate, because tcp probes cause TLS handshake errors to appear in our logs every few seconds - Use conservative timeouts and retries on the liveness probe to avoid having our container get restarted when it is temporarily slow due to running in an environment under resource pressure - Use less conservative timeouts and retries for the readiness probe to remove an unhealthy pod from the service less conservatively than restarting the container - Tuning the settings for retries and timeouts seem to be a mysterious art, so these are just a first draft --- deploy/deployment.yaml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/deploy/deployment.yaml b/deploy/deployment.yaml index f5ef824b..d0a3cac0 100644 --- a/deploy/deployment.yaml +++ b/deploy/deployment.yaml @@ -88,6 +88,24 @@ spec: mountPath: /etc/podinfo - name: k8s-certs mountPath: /etc/kubernetes/pki + livenessProbe: + httpGet: + path: /healthz + port: 443 + scheme: HTTPS + initialDelaySeconds: 20 + timeoutSeconds: 15 + periodSeconds: 10 + failureThreshold: 5 + readinessProbe: + httpGet: + path: /healthz + port: 443 + scheme: HTTPS + initialDelaySeconds: 20 + timeoutSeconds: 3 + periodSeconds: 10 + failureThreshold: 3 volumes: - name: config-volume configMap: