blob: 0640ed5fc57b0041cf69337345510947de67878c [file] [log] [blame]
Sylvain Desbureaux2faa6e62020-05-11 15:03:44 +02001#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
3# Copyright © 2020 Orange
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16
17"""
18Kubernetes readiness check.
19
20Checks if a container is ready or if a job is finished.
21The check is done according to the name of the container, not the name of
22its parent (Job, Deployment, StatefulSet, DaemonSet).
23"""
24
25import getopt
26import logging
27import os
28import sys
29import time
30import random
31
32from kubernetes import client
33from kubernetes.client.rest import ApiException
34
35# extract env variables.
36namespace = os.environ['NAMESPACE']
37cert = os.environ['CERT']
38host = os.environ['KUBERNETES_SERVICE_HOST']
39token_path = os.environ['TOKEN']
40
41with open(token_path, 'r') as token_file:
42 token = token_file.read().replace('\n', '')
43
44# setup logging
45log = logging.getLogger(__name__)
46handler = logging.StreamHandler(sys.stdout)
47formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
48handler.setFormatter(formatter)
49handler.setLevel(logging.INFO)
50log.addHandler(handler)
51log.setLevel(logging.INFO)
52
53configuration = client.Configuration()
54configuration.host = "https://" + host
55configuration.ssl_ca_cert = cert
56configuration.api_key['authorization'] = token
57configuration.api_key_prefix['authorization'] = 'Bearer'
58coreV1Api = client.CoreV1Api(client.ApiClient(configuration))
59api_instance = client.ExtensionsV1beta1Api(client.ApiClient(configuration))
60api = client.AppsV1beta1Api(client.ApiClient(configuration))
61batchV1Api = client.BatchV1Api(client.ApiClient(configuration))
62
63
64def is_job_complete(job_name):
65 """
66 Check if Job is complete.
67
68 Args:
69 job_name (str): the name of the Job.
70
71 Returns:
72 True if job is complete, false otherwise
73 """
74 complete = False
75 log.info("Checking if %s is complete", job_name)
76 try:
77 response = batchV1Api.read_namespaced_job_status(job_name, namespace)
78 if response.status.succeeded == 1:
79 job_status_type = response.status.conditions[0].type
80 if job_status_type == "Complete":
81 complete = True
82 log.info("%s is complete", job_name)
83 else:
84 log.info("%s is NOT complete", job_name)
85 else:
86 log.info("%s has not succeeded yet", job_name)
87 except ApiException as exc:
88 log.error("Exception when calling read_namespaced_job_status: %s\n",
89 exc)
90 return complete
91
92
93def wait_for_statefulset_complete(statefulset_name):
94 """
95 Check if StatefulSet is running.
96
97 Args:
98 statefulset_name (str): the name of the StatefulSet.
99
100 Returns:
101 True if StatefulSet is running, false otherwise
102 """
103 complete = False
104 try:
105 response = api.read_namespaced_stateful_set(statefulset_name,
106 namespace)
107 status = response.status
108 if (status.replicas == response.spec.replicas and
109 status.ready_replicas == response.spec.replicas and
110 status.observed_generation == response.metadata.generation):
111 log.info("Statefulset %s is ready", statefulset_name)
112 complete = True
113 else:
114 log.info("Statefulset %s is NOT ready", statefulset_name)
115 except ApiException as exc:
116 log.error("Exception when waiting for Statefulset status: %s\n", exc)
117 return complete
118
119
120def wait_for_deployment_complete(deployment_name):
121 """
122 Check if Deployment is running.
123
124 Args:
125 deployment_name (str): the name of the Deployment.
126
127 Returns:
128 True if Deployment is running, false otherwise
129 """
130 complete = False
131 try:
132 response = api.read_namespaced_deployment(deployment_name, namespace)
133 status = response.status
134 if (status.unavailable_replicas is None and
135 (status.updated_replicas is None or
136 status.updated_replicas == response.spec.replicas) and
137 status.replicas == response.spec.replicas and
138 status.ready_replicas == response.spec.replicas and
139 status.observed_generation == response.metadata.generation):
140 log.info("Deployment %s is ready", deployment_name)
141 complete = True
142 else:
143 log.info("Deployment %s is NOT ready", deployment_name)
144 except ApiException as exc:
145 log.error("Exception when waiting for deployment status: %s\n", exc)
146 return complete
147
148
149def wait_for_daemonset_complete(daemonset_name):
150 """
151 Check if DaemonSet is running.
152
153 Args:
154 daemonset_name (str): the name of the DaemonSet.
155
156 Returns:
157 True if DaemonSet is running, false otherwise
158 """
159 complete = False
160 try:
161 response = api_instance.read_namespaced_daemon_set(
162 daemonset_name, namespace)
163 status = response.status
164 if status.desired_number_scheduled == status.number_ready:
165 log.info("DaemonSet: %s/%s nodes ready --> %s is ready",
166 status.number_ready, status.desired_number_scheduled,
167 daemonset_name)
168 complete = True
169 else:
170 log.info("DaemonSet: %s/%s nodes ready --> %s is NOT ready",
171 status.number_ready, status.desired_number_scheduled,
172 daemonset_name)
173 except ApiException as exc:
174 log.error("Exception when waiting for DaemonSet status: %s\n", exc)
175 return complete
176
177
178def is_ready(container_name):
179 """
180 Check if a container is ready.
181
182 For a container owned by a Job, it means the Job is complete.
183 Otherwise, it means the parent (Deployment, StatefulSet, DaemonSet) is
184 running with the right number of replicas
185
186 Args:
187 container_name (str): the name of the container.
188
189 Returns:
190 True if container is ready, false otherwise
191 """
192 ready = False
193 log.info("Checking if %s is ready", container_name)
194 try:
195 response = coreV1Api.list_namespaced_pod(namespace=namespace,
196 watch=False)
197 for item in response.items:
198 # container_statuses can be None, which is non-iterable.
199 if item.status.container_statuses is None:
200 continue
201 for container in item.status.container_statuses:
202 if container.name == container_name:
203 name = read_name(item)
204 if item.metadata.owner_references[0].kind == "StatefulSet":
205 ready = wait_for_statefulset_complete(name)
206 elif item.metadata.owner_references[0].kind == "ReplicaSet":
207 deployment_name = get_deployment_name(name)
208 ready = wait_for_deployment_complete(deployment_name)
209 elif item.metadata.owner_references[0].kind == "Job":
210 ready = is_job_complete(name)
211 elif item.metadata.owner_references[0].kind == "DaemonSet":
212 ready = wait_for_daemonset_complete(
213 item.metadata.owner_references[0].name)
214 return ready
215 except ApiException as exc:
216 log.error("Exception when calling list_namespaced_pod: %s\n", exc)
217 return ready
218
219
220def read_name(item):
221 """
222 Return the name of the owner's item.
223
224 Args:
225 item (str): the item.
226
227 Returns:
228 the name of first owner's item
229 """
230 return item.metadata.owner_references[0].name
231
232
233def get_deployment_name(replicaset):
234 """
235 Return the name of the Deployment owning the ReplicatSet.
236
237 Args:
238 replicaset (str): the ReplicatSet.
239
240 Returns:
241 the name of the Deployment owning the ReplicatSet
242 """
243 api_response = api_instance.read_namespaced_replica_set_status(replicaset,
244 namespace)
245 deployment_name = read_name(api_response)
246 return deployment_name
247
248
249DEF_TIMEOUT = 10
250DESCRIPTION = "Kubernetes container readiness check utility"
251USAGE = "Usage: ready.py [-t <timeout>] -c <container_name> " \
252 "[-c <container_name> ...]\n" \
253 "where\n" \
254 "<timeout> - wait for container readiness timeout in min, " \
255 "default is " + str(DEF_TIMEOUT) + "\n" \
256 "<container_name> - name of the container to wait for\n"
257
258
259def main(argv):
260 """
261 Checks if a container is ready or if a job is finished.
262 The check is done according to the name of the container, not the name of
263 its parent (Job, Deployment, StatefulSet, DaemonSet).
264
265 Args:
266 argv: the command line
267 """
268 # args are a list of container names
269 container_names = []
270 timeout = DEF_TIMEOUT
271 try:
272 opts, _args = getopt.getopt(argv, "hc:t:", ["container-name=",
273 "timeout=",
274 "help"])
275 for opt, arg in opts:
276 if opt in ("-h", "--help"):
277 print("{}\n\n{}".format(DESCRIPTION, USAGE))
278 sys.exit()
279 elif opt in ("-c", "--container-name"):
280 container_names.append(arg)
281 elif opt in ("-t", "--timeout"):
282 timeout = float(arg)
283 except (getopt.GetoptError, ValueError) as exc:
284 print("Error parsing input parameters: {}\n".format(exc))
285 print(USAGE)
286 sys.exit(2)
287 if container_names.__len__() == 0:
288 print("Missing required input parameter(s)\n")
289 print(USAGE)
290 sys.exit(2)
291
292 for container_name in container_names:
293 timeout = time.time() + timeout * 60
294 while True:
295 ready = is_ready(container_name)
296 if ready is True:
297 break
298 if time.time() > timeout:
299 log.warning("timed out waiting for '%s' to be ready",
300 container_name)
301 sys.exit(1)
302 else:
303 # spread in time potentially parallel execution in multiple
304 # containers
305 time.sleep(random.randint(5, 11))
306
307
308if __name__ == "__main__":
309 main(sys.argv[1:])