Blame - ready.py - onap/oom/readiness

blob: 0640ed5fc57b0041cf69337345510947de67878c [file] [log] [blame]

Sylvain Desbureaux	2faa6e6	2020-05-11 15:03:44 +0200	[diff] [blame^]	1	#!/usr/bin/env python3
				2	# -- coding: utf-8 --
				3	# Copyright © 2020 Orange
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the "License");
				6	# you may not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# http://www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an "AS IS" BASIS,
				13	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16
				17	"""
				18	Kubernetes readiness check.
				19
				20	Checks if a container is ready or if a job is finished.
				21	The check is done according to the name of the container, not the name of
				22	its parent (Job, Deployment, StatefulSet, DaemonSet).
				23	"""
				24
				25	import getopt
				26	import logging
				27	import os
				28	import sys
				29	import time
				30	import random
				31
				32	from kubernetes import client
				33	from kubernetes.client.rest import ApiException
				34
				35	# extract env variables.
				36	namespace = os.environ['NAMESPACE']
				37	cert = os.environ['CERT']
				38	host = os.environ['KUBERNETES_SERVICE_HOST']
				39	token_path = os.environ['TOKEN']
				40
				41	with open(token_path, 'r') as token_file:
				42	token = token_file.read().replace('\n', '')
				43
				44	# setup logging
				45	log = logging.getLogger(__name__)
				46	handler = logging.StreamHandler(sys.stdout)
				47	formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
				48	handler.setFormatter(formatter)
				49	handler.setLevel(logging.INFO)
				50	log.addHandler(handler)
				51	log.setLevel(logging.INFO)
				52
				53	configuration = client.Configuration()
				54	configuration.host = "https://" + host
				55	configuration.ssl_ca_cert = cert
				56	configuration.api_key['authorization'] = token
				57	configuration.api_key_prefix['authorization'] = 'Bearer'
				58	coreV1Api = client.CoreV1Api(client.ApiClient(configuration))
				59	api_instance = client.ExtensionsV1beta1Api(client.ApiClient(configuration))
				60	api = client.AppsV1beta1Api(client.ApiClient(configuration))
				61	batchV1Api = client.BatchV1Api(client.ApiClient(configuration))
				62
				63
				64	def is_job_complete(job_name):
				65	"""
				66	Check if Job is complete.
				67
				68	Args:
				69	job_name (str): the name of the Job.
				70
				71	Returns:
				72	True if job is complete, false otherwise
				73	"""
				74	complete = False
				75	log.info("Checking if %s is complete", job_name)
				76	try:
				77	response = batchV1Api.read_namespaced_job_status(job_name, namespace)
				78	if response.status.succeeded == 1:
				79	job_status_type = response.status.conditions[0].type
				80	if job_status_type == "Complete":
				81	complete = True
				82	log.info("%s is complete", job_name)
				83	else:
				84	log.info("%s is NOT complete", job_name)
				85	else:
				86	log.info("%s has not succeeded yet", job_name)
				87	except ApiException as exc:
				88	log.error("Exception when calling read_namespaced_job_status: %s\n",
				89	exc)
				90	return complete
				91
				92
				93	def wait_for_statefulset_complete(statefulset_name):
				94	"""
				95	Check if StatefulSet is running.
				96
				97	Args:
				98	statefulset_name (str): the name of the StatefulSet.
				99
				100	Returns:
				101	True if StatefulSet is running, false otherwise
				102	"""
				103	complete = False
				104	try:
				105	response = api.read_namespaced_stateful_set(statefulset_name,
				106	namespace)
				107	status = response.status
				108	if (status.replicas == response.spec.replicas and
				109	status.ready_replicas == response.spec.replicas and
				110	status.observed_generation == response.metadata.generation):
				111	log.info("Statefulset %s is ready", statefulset_name)
				112	complete = True
				113	else:
				114	log.info("Statefulset %s is NOT ready", statefulset_name)
				115	except ApiException as exc:
				116	log.error("Exception when waiting for Statefulset status: %s\n", exc)
				117	return complete
				118
				119
				120	def wait_for_deployment_complete(deployment_name):
				121	"""
				122	Check if Deployment is running.
				123
				124	Args:
				125	deployment_name (str): the name of the Deployment.
				126
				127	Returns:
				128	True if Deployment is running, false otherwise
				129	"""
				130	complete = False
				131	try:
				132	response = api.read_namespaced_deployment(deployment_name, namespace)
				133	status = response.status
				134	if (status.unavailable_replicas is None and
				135	(status.updated_replicas is None or
				136	status.updated_replicas == response.spec.replicas) and
				137	status.replicas == response.spec.replicas and
				138	status.ready_replicas == response.spec.replicas and
				139	status.observed_generation == response.metadata.generation):
				140	log.info("Deployment %s is ready", deployment_name)
				141	complete = True
				142	else:
				143	log.info("Deployment %s is NOT ready", deployment_name)
				144	except ApiException as exc:
				145	log.error("Exception when waiting for deployment status: %s\n", exc)
				146	return complete
				147
				148
				149	def wait_for_daemonset_complete(daemonset_name):
				150	"""
				151	Check if DaemonSet is running.
				152
				153	Args:
				154	daemonset_name (str): the name of the DaemonSet.
				155
				156	Returns:
				157	True if DaemonSet is running, false otherwise
				158	"""
				159	complete = False
				160	try:
				161	response = api_instance.read_namespaced_daemon_set(
				162	daemonset_name, namespace)
				163	status = response.status
				164	if status.desired_number_scheduled == status.number_ready:
				165	log.info("DaemonSet: %s/%s nodes ready --> %s is ready",
				166	status.number_ready, status.desired_number_scheduled,
				167	daemonset_name)
				168	complete = True
				169	else:
				170	log.info("DaemonSet: %s/%s nodes ready --> %s is NOT ready",
				171	status.number_ready, status.desired_number_scheduled,
				172	daemonset_name)
				173	except ApiException as exc:
				174	log.error("Exception when waiting for DaemonSet status: %s\n", exc)
				175	return complete
				176
				177
				178	def is_ready(container_name):
				179	"""
				180	Check if a container is ready.
				181
				182	For a container owned by a Job, it means the Job is complete.
				183	Otherwise, it means the parent (Deployment, StatefulSet, DaemonSet) is
				184	running with the right number of replicas
				185
				186	Args:
				187	container_name (str): the name of the container.
				188
				189	Returns:
				190	True if container is ready, false otherwise
				191	"""
				192	ready = False
				193	log.info("Checking if %s is ready", container_name)
				194	try:
				195	response = coreV1Api.list_namespaced_pod(namespace=namespace,
				196	watch=False)
				197	for item in response.items:
				198	# container_statuses can be None, which is non-iterable.
				199	if item.status.container_statuses is None:
				200	continue
				201	for container in item.status.container_statuses:
				202	if container.name == container_name:
				203	name = read_name(item)
				204	if item.metadata.owner_references[0].kind == "StatefulSet":
				205	ready = wait_for_statefulset_complete(name)
				206	elif item.metadata.owner_references[0].kind == "ReplicaSet":
				207	deployment_name = get_deployment_name(name)
				208	ready = wait_for_deployment_complete(deployment_name)
				209	elif item.metadata.owner_references[0].kind == "Job":
				210	ready = is_job_complete(name)
				211	elif item.metadata.owner_references[0].kind == "DaemonSet":
				212	ready = wait_for_daemonset_complete(
				213	item.metadata.owner_references[0].name)
				214	return ready
				215	except ApiException as exc:
				216	log.error("Exception when calling list_namespaced_pod: %s\n", exc)
				217	return ready
				218
				219
				220	def read_name(item):
				221	"""
				222	Return the name of the owner's item.
				223
				224	Args:
				225	item (str): the item.
				226
				227	Returns:
				228	the name of first owner's item
				229	"""
				230	return item.metadata.owner_references[0].name
				231
				232
				233	def get_deployment_name(replicaset):
				234	"""
				235	Return the name of the Deployment owning the ReplicatSet.
				236
				237	Args:
				238	replicaset (str): the ReplicatSet.
				239
				240	Returns:
				241	the name of the Deployment owning the ReplicatSet
				242	"""
				243	api_response = api_instance.read_namespaced_replica_set_status(replicaset,
				244	namespace)
				245	deployment_name = read_name(api_response)
				246	return deployment_name
				247
				248
				249	DEF_TIMEOUT = 10
				250	DESCRIPTION = "Kubernetes container readiness check utility"
				251	USAGE = "Usage: ready.py [-t <timeout>] -c <container_name> " \
				252	"[-c <container_name> ...]\n" \
				253	"where\n" \
				254	"<timeout> - wait for container readiness timeout in min, " \
				255	"default is " + str(DEF_TIMEOUT) + "\n" \
				256	"<container_name> - name of the container to wait for\n"
				257
				258
				259	def main(argv):
				260	"""
				261	Checks if a container is ready or if a job is finished.
				262	The check is done according to the name of the container, not the name of
				263	its parent (Job, Deployment, StatefulSet, DaemonSet).
				264
				265	Args:
				266	argv: the command line
				267	"""
				268	# args are a list of container names
				269	container_names = []
				270	timeout = DEF_TIMEOUT
				271	try:
				272	opts, _args = getopt.getopt(argv, "hc:t:", ["container-name=",
				273	"timeout=",
				274	"help"])
				275	for opt, arg in opts:
				276	if opt in ("-h", "--help"):
				277	print("{}\n\n{}".format(DESCRIPTION, USAGE))
				278	sys.exit()
				279	elif opt in ("-c", "--container-name"):
				280	container_names.append(arg)
				281	elif opt in ("-t", "--timeout"):
				282	timeout = float(arg)
				283	except (getopt.GetoptError, ValueError) as exc:
				284	print("Error parsing input parameters: {}\n".format(exc))
				285	print(USAGE)
				286	sys.exit(2)
				287	if container_names.__len__() == 0:
				288	print("Missing required input parameter(s)\n")
				289	print(USAGE)
				290	sys.exit(2)
				291
				292	for container_name in container_names:
				293	timeout = time.time() + timeout * 60
				294	while True:
				295	ready = is_ready(container_name)
				296	if ready is True:
				297	break
				298	if time.time() > timeout:
				299	log.warning("timed out waiting for '%s' to be ready",
				300	container_name)
				301	sys.exit(1)
				302	else:
				303	# spread in time potentially parallel execution in multiple
				304	# containers
				305	time.sleep(random.randint(5, 11))
				306
				307
				308	if __name__ == "__main__":
				309	main(sys.argv[1:])