[CASSANDRA] Loosen probe check time
As for main cassandra chart, with Azure and also some internal
deployments, `nodepool status` takes more than 3 seconds and so
cassandra is not coming up or quite randomly.
This patch gives more room to `nodepool status` to answer.
Issue-ID: OOM-2687
Signed-off-by: Sylvain Desbureaux <sylvain.desbureaux@orange.com>
Change-Id: If6a148a432ed3d83a1e89d38f20fe87e89ab0f57
diff --git a/kubernetes/common/music/components/music-cassandra/templates/statefulset.yaml b/kubernetes/common/music/components/music-cassandra/templates/statefulset.yaml
index 665cdaa..2a1fb4f 100644
--- a/kubernetes/common/music/components/music-cassandra/templates/statefulset.yaml
+++ b/kubernetes/common/music/components/music-cassandra/templates/statefulset.yaml
@@ -58,6 +58,9 @@
- nodetool status | grep $POD_IP | awk '$1!="UN" { exit 1; }'
initialDelaySeconds: {{ .Values.liveness.initialDelaySeconds }}
periodSeconds: {{ .Values.liveness.periodSeconds }}
+ timeoutSeconds: {{ .Values.liveness.timeoutSeconds }}
+ successThreshold: {{ .Values.liveness.successThreshold }}
+ failureThreshold: {{ .Values.liveness.failureThreshold }}
{{ end -}}
readinessProbe:
exec:
@@ -67,6 +70,9 @@
- nodetool status | grep $POD_IP | awk '$1!="UN" { exit 1; }'
initialDelaySeconds: {{ .Values.readiness.initialDelaySeconds }}
periodSeconds: {{ .Values.readiness.periodSeconds }}
+ timeoutSeconds: {{ .Values.readiness.timeoutSeconds }}
+ successThreshold: {{ .Values.readiness.successThreshold }}
+ failureThreshold: {{ .Values.readiness.failureThreshold }}
lifecycle:
preStop:
exec:
diff --git a/kubernetes/common/music/components/music-cassandra/values.yaml b/kubernetes/common/music/components/music-cassandra/values.yaml
index 317087c..8530172 100644
--- a/kubernetes/common/music/components/music-cassandra/values.yaml
+++ b/kubernetes/common/music/components/music-cassandra/values.yaml
@@ -73,7 +73,10 @@
# probe configuration parameters
liveness:
initialDelaySeconds: 120
- periodSeconds: 10
+ periodSeconds: 20
+ timeoutSeconds: 10
+ successThreshold: 1
+ failureThreshold: 3
# necessary to disable liveness probe when setting breakpoints
# in debugger so K8s doesn't restart unresponsive container
enabled: true
@@ -82,7 +85,10 @@
readiness:
initialDelaySeconds: 10
- periodSeconds: 10
+ periodSeconds: 20
+ timeoutSeconds: 10
+ successThreshold: 1
+ failureThreshold: 3
podManagementPolicy: OrderedReady
updateStrategy:
diff --git a/kubernetes/portal/components/portal-cassandra/templates/deployment.yaml b/kubernetes/portal/components/portal-cassandra/templates/deployment.yaml
index 20c396f..59eace6 100644
--- a/kubernetes/portal/components/portal-cassandra/templates/deployment.yaml
+++ b/kubernetes/portal/components/portal-cassandra/templates/deployment.yaml
@@ -62,6 +62,7 @@
nodetool status
initialDelaySeconds: {{ .Values.liveness.initialDelaySeconds }}
periodSeconds: {{ .Values.liveness.periodSeconds }}
+ timeoutSeconds: {{ .Values.liveness.timeoutSeconds }}
successThreshold: {{ .Values.liveness.successThreshold }}
failureThreshold: {{ .Values.liveness.failureThreshold }}
{{ end }}
@@ -74,6 +75,7 @@
nodetool status | grep -E "^UN\\s+${POD_IP}"
initialDelaySeconds: {{ .Values.readiness.initialDelaySeconds }}
periodSeconds: {{ .Values.readiness.periodSeconds }}
+ timeoutSeconds: {{ .Values.readiness.timeoutSeconds }}
successThreshold: {{ .Values.readiness.successThreshold }}
failureThreshold: {{ .Values.readiness.failureThreshold }}
lifecycle:
diff --git a/kubernetes/portal/components/portal-cassandra/values.yaml b/kubernetes/portal/components/portal-cassandra/values.yaml
index b06761a..bed75e5 100644
--- a/kubernetes/portal/components/portal-cassandra/values.yaml
+++ b/kubernetes/portal/components/portal-cassandra/values.yaml
@@ -53,16 +53,18 @@
# probe configuration parameters
liveness:
initialDelaySeconds: 10
- periodSeconds: 10
+ periodSeconds: 20
+ timeoutSeconds: 10
+ successThreshold: 1
+ failureThreshold: 3
# necessary to disable liveness probe when setting breakpoints
# in debugger so K8s doesn't restart unresponsive container
enabled: true
- successThreshold: 1
- failureThreshold: 3
readiness:
initialDelaySeconds: 10
- periodSeconds: 10
+ periodSeconds: 20
+ timeoutSeconds: 10
successThreshold: 1
failureThreshold: 3