blob: 2195a49fd340a486e19aba5300cb5485c8e5474b [file] [log] [blame]
Sylvain Desbureaux2faa6e62020-05-11 15:03:44 +02001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3# Copyright © 2020 Orange
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""
18Kubernetes readiness check.
19
20Checks if a container is ready or if a job is finished.
21The check is done according to the name of the container, not the name of
22its parent (Job, Deployment, StatefulSet, DaemonSet).
23"""
24
25import getopt
26import logging
27import os
28import sys
29import time
30import random
31
32from kubernetes import client
33from kubernetes.client.rest import ApiException
34
35# extract env variables.
36namespace = os.environ['NAMESPACE']
37cert = os.environ['CERT']
38host = os.environ['KUBERNETES_SERVICE_HOST']
39token_path = os.environ['TOKEN']
40
41with open(token_path, 'r') as token_file:
42 token = token_file.read().replace('\n', '')
43
44# setup logging
45log = logging.getLogger(__name__)
46handler = logging.StreamHandler(sys.stdout)
47formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
48handler.setFormatter(formatter)
49handler.setLevel(logging.INFO)
50log.addHandler(handler)
51log.setLevel(logging.INFO)
52
53configuration = client.Configuration()
54configuration.host = "https://" + host
55configuration.ssl_ca_cert = cert
56configuration.api_key['authorization'] = token
57configuration.api_key_prefix['authorization'] = 'Bearer'
58coreV1Api = client.CoreV1Api(client.ApiClient(configuration))
Sylvain Desbureauxb468f082020-07-22 17:47:06 +020059api = client.AppsV1(client.ApiClient(configuration))
Sylvain Desbureaux2faa6e62020-05-11 15:03:44 +020060batchV1Api = client.BatchV1Api(client.ApiClient(configuration))
61
62
63def is_job_complete(job_name):
64 """
65 Check if Job is complete.
66
67 Args:
68 job_name (str): the name of the Job.
69
70 Returns:
71 True if job is complete, false otherwise
72 """
73 complete = False
74 log.info("Checking if %s is complete", job_name)
75 try:
76 response = batchV1Api.read_namespaced_job_status(job_name, namespace)
77 if response.status.succeeded == 1:
78 job_status_type = response.status.conditions[0].type
79 if job_status_type == "Complete":
80 complete = True
81 log.info("%s is complete", job_name)
82 else:
83 log.info("%s is NOT complete", job_name)
84 else:
85 log.info("%s has not succeeded yet", job_name)
86 except ApiException as exc:
87 log.error("Exception when calling read_namespaced_job_status: %s\n",
88 exc)
89 return complete
90
91
92def wait_for_statefulset_complete(statefulset_name):
93 """
94 Check if StatefulSet is running.
95
96 Args:
97 statefulset_name (str): the name of the StatefulSet.
98
99 Returns:
100 True if StatefulSet is running, false otherwise
101 """
102 complete = False
103 try:
104 response = api.read_namespaced_stateful_set(statefulset_name,
105 namespace)
106 status = response.status
107 if (status.replicas == response.spec.replicas and
108 status.ready_replicas == response.spec.replicas and
109 status.observed_generation == response.metadata.generation):
110 log.info("Statefulset %s is ready", statefulset_name)
111 complete = True
112 else:
113 log.info("Statefulset %s is NOT ready", statefulset_name)
114 except ApiException as exc:
115 log.error("Exception when waiting for Statefulset status: %s\n", exc)
116 return complete
117
118
119def wait_for_deployment_complete(deployment_name):
120 """
121 Check if Deployment is running.
122
123 Args:
124 deployment_name (str): the name of the Deployment.
125
126 Returns:
127 True if Deployment is running, false otherwise
128 """
129 complete = False
130 try:
131 response = api.read_namespaced_deployment(deployment_name, namespace)
132 status = response.status
133 if (status.unavailable_replicas is None and
134 (status.updated_replicas is None or
135 status.updated_replicas == response.spec.replicas) and
136 status.replicas == response.spec.replicas and
137 status.ready_replicas == response.spec.replicas and
138 status.observed_generation == response.metadata.generation):
139 log.info("Deployment %s is ready", deployment_name)
140 complete = True
141 else:
142 log.info("Deployment %s is NOT ready", deployment_name)
143 except ApiException as exc:
144 log.error("Exception when waiting for deployment status: %s\n", exc)
145 return complete
146
147
148def wait_for_daemonset_complete(daemonset_name):
149 """
150 Check if DaemonSet is running.
151
152 Args:
153 daemonset_name (str): the name of the DaemonSet.
154
155 Returns:
156 True if DaemonSet is running, false otherwise
157 """
158 complete = False
159 try:
Sylvain Desbureauxb468f082020-07-22 17:47:06 +0200160 response = api.read_namespaced_daemon_set(
Sylvain Desbureaux2faa6e62020-05-11 15:03:44 +0200161 daemonset_name, namespace)
162 status = response.status
163 if status.desired_number_scheduled == status.number_ready:
164 log.info("DaemonSet: %s/%s nodes ready --> %s is ready",
165 status.number_ready, status.desired_number_scheduled,
166 daemonset_name)
167 complete = True
168 else:
169 log.info("DaemonSet: %s/%s nodes ready --> %s is NOT ready",
170 status.number_ready, status.desired_number_scheduled,
171 daemonset_name)
172 except ApiException as exc:
173 log.error("Exception when waiting for DaemonSet status: %s\n", exc)
174 return complete
175
176
177def is_ready(container_name):
178 """
179 Check if a container is ready.
180
181 For a container owned by a Job, it means the Job is complete.
182 Otherwise, it means the parent (Deployment, StatefulSet, DaemonSet) is
183 running with the right number of replicas
184
185 Args:
186 container_name (str): the name of the container.
187
188 Returns:
189 True if container is ready, false otherwise
190 """
191 ready = False
192 log.info("Checking if %s is ready", container_name)
193 try:
194 response = coreV1Api.list_namespaced_pod(namespace=namespace,
195 watch=False)
196 for item in response.items:
197 # container_statuses can be None, which is non-iterable.
198 if item.status.container_statuses is None:
199 continue
200 for container in item.status.container_statuses:
201 if container.name == container_name:
202 name = read_name(item)
203 if item.metadata.owner_references[0].kind == "StatefulSet":
204 ready = wait_for_statefulset_complete(name)
205 elif item.metadata.owner_references[0].kind == "ReplicaSet":
206 deployment_name = get_deployment_name(name)
207 ready = wait_for_deployment_complete(deployment_name)
208 elif item.metadata.owner_references[0].kind == "Job":
209 ready = is_job_complete(name)
210 elif item.metadata.owner_references[0].kind == "DaemonSet":
211 ready = wait_for_daemonset_complete(
212 item.metadata.owner_references[0].name)
213 return ready
214 except ApiException as exc:
215 log.error("Exception when calling list_namespaced_pod: %s\n", exc)
216 return ready
217
218
219def read_name(item):
220 """
221 Return the name of the owner's item.
222
223 Args:
224 item (str): the item.
225
226 Returns:
227 the name of first owner's item
228 """
229 return item.metadata.owner_references[0].name
230
231
232def get_deployment_name(replicaset):
233 """
234 Return the name of the Deployment owning the ReplicatSet.
235
236 Args:
237 replicaset (str): the ReplicatSet.
238
239 Returns:
240 the name of the Deployment owning the ReplicatSet
241 """
Sylvain Desbureauxb468f082020-07-22 17:47:06 +0200242 api_response = api.read_namespaced_replica_set_status(replicaset,
243 namespace)
Sylvain Desbureaux2faa6e62020-05-11 15:03:44 +0200244 deployment_name = read_name(api_response)
245 return deployment_name
246
247
248DEF_TIMEOUT = 10
249DESCRIPTION = "Kubernetes container readiness check utility"
250USAGE = "Usage: ready.py [-t <timeout>] -c <container_name> " \
251 "[-c <container_name> ...]\n" \
252 "where\n" \
253 "<timeout> - wait for container readiness timeout in min, " \
254 "default is " + str(DEF_TIMEOUT) + "\n" \
255 "<container_name> - name of the container to wait for\n"
256
257
258def main(argv):
259 """
260 Checks if a container is ready or if a job is finished.
261 The check is done according to the name of the container, not the name of
262 its parent (Job, Deployment, StatefulSet, DaemonSet).
263
264 Args:
265 argv: the command line
266 """
267 # args are a list of container names
268 container_names = []
269 timeout = DEF_TIMEOUT
270 try:
271 opts, _args = getopt.getopt(argv, "hc:t:", ["container-name=",
272 "timeout=",
273 "help"])
274 for opt, arg in opts:
275 if opt in ("-h", "--help"):
276 print("{}\n\n{}".format(DESCRIPTION, USAGE))
277 sys.exit()
278 elif opt in ("-c", "--container-name"):
279 container_names.append(arg)
280 elif opt in ("-t", "--timeout"):
281 timeout = float(arg)
282 except (getopt.GetoptError, ValueError) as exc:
283 print("Error parsing input parameters: {}\n".format(exc))
284 print(USAGE)
285 sys.exit(2)
286 if container_names.__len__() == 0:
287 print("Missing required input parameter(s)\n")
288 print(USAGE)
289 sys.exit(2)
290
291 for container_name in container_names:
292 timeout = time.time() + timeout * 60
293 while True:
294 ready = is_ready(container_name)
295 if ready is True:
296 break
297 if time.time() > timeout:
298 log.warning("timed out waiting for '%s' to be ready",
299 container_name)
300 sys.exit(1)
301 else:
302 # spread in time potentially parallel execution in multiple
303 # containers
304 time.sleep(random.randint(5, 11))
305
306
307if __name__ == "__main__":
308 main(sys.argv[1:])