Blame - ready.py - onap/oom/readiness

blob: 2195a49fd340a486e19aba5300cb5485c8e5474b [file] [log] [blame]

Sylvain Desbureaux	2faa6e6	2020-05-11 15:03:44 +0200	[diff] [blame]	1	#!/usr/bin/env python3
				2	# -- coding: utf-8 --
				3	# Copyright © 2020 Orange
				4	#
				5	# Licensed under the Apache License, Version 2.0 (the "License");
				6	# you may not use this file except in compliance with the License.
				7	# You may obtain a copy of the License at
				8	#
				9	# http://www.apache.org/licenses/LICENSE-2.0
				10	#
				11	# Unless required by applicable law or agreed to in writing, software
				12	# distributed under the License is distributed on an "AS IS" BASIS,
				13	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				14	# See the License for the specific language governing permissions and
				15	# limitations under the License.
				16
				17	"""
				18	Kubernetes readiness check.
				19
				20	Checks if a container is ready or if a job is finished.
				21	The check is done according to the name of the container, not the name of
				22	its parent (Job, Deployment, StatefulSet, DaemonSet).
				23	"""
				24
				25	import getopt
				26	import logging
				27	import os
				28	import sys
				29	import time
				30	import random
				31
				32	from kubernetes import client
				33	from kubernetes.client.rest import ApiException
				34
				35	# extract env variables.
				36	namespace = os.environ['NAMESPACE']
				37	cert = os.environ['CERT']
				38	host = os.environ['KUBERNETES_SERVICE_HOST']
				39	token_path = os.environ['TOKEN']
				40
				41	with open(token_path, 'r') as token_file:
				42	token = token_file.read().replace('\n', '')
				43
				44	# setup logging
				45	log = logging.getLogger(__name__)
				46	handler = logging.StreamHandler(sys.stdout)
				47	formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
				48	handler.setFormatter(formatter)
				49	handler.setLevel(logging.INFO)
				50	log.addHandler(handler)
				51	log.setLevel(logging.INFO)
				52
				53	configuration = client.Configuration()
				54	configuration.host = "https://" + host
				55	configuration.ssl_ca_cert = cert
				56	configuration.api_key['authorization'] = token
				57	configuration.api_key_prefix['authorization'] = 'Bearer'
				58	coreV1Api = client.CoreV1Api(client.ApiClient(configuration))
Sylvain Desbureaux	b468f08	2020-07-22 17:47:06 +0200	[diff] [blame^]	59	api = client.AppsV1(client.ApiClient(configuration))
Sylvain Desbureaux	2faa6e6	2020-05-11 15:03:44 +0200	[diff] [blame]	60	batchV1Api = client.BatchV1Api(client.ApiClient(configuration))
				61
				62
				63	def is_job_complete(job_name):
				64	"""
				65	Check if Job is complete.
				66
				67	Args:
				68	job_name (str): the name of the Job.
				69
				70	Returns:
				71	True if job is complete, false otherwise
				72	"""
				73	complete = False
				74	log.info("Checking if %s is complete", job_name)
				75	try:
				76	response = batchV1Api.read_namespaced_job_status(job_name, namespace)
				77	if response.status.succeeded == 1:
				78	job_status_type = response.status.conditions[0].type
				79	if job_status_type == "Complete":
				80	complete = True
				81	log.info("%s is complete", job_name)
				82	else:
				83	log.info("%s is NOT complete", job_name)
				84	else:
				85	log.info("%s has not succeeded yet", job_name)
				86	except ApiException as exc:
				87	log.error("Exception when calling read_namespaced_job_status: %s\n",
				88	exc)
				89	return complete
				90
				91
				92	def wait_for_statefulset_complete(statefulset_name):
				93	"""
				94	Check if StatefulSet is running.
				95
				96	Args:
				97	statefulset_name (str): the name of the StatefulSet.
				98
				99	Returns:
				100	True if StatefulSet is running, false otherwise
				101	"""
				102	complete = False
				103	try:
				104	response = api.read_namespaced_stateful_set(statefulset_name,
				105	namespace)
				106	status = response.status
				107	if (status.replicas == response.spec.replicas and
				108	status.ready_replicas == response.spec.replicas and
				109	status.observed_generation == response.metadata.generation):
				110	log.info("Statefulset %s is ready", statefulset_name)
				111	complete = True
				112	else:
				113	log.info("Statefulset %s is NOT ready", statefulset_name)
				114	except ApiException as exc:
				115	log.error("Exception when waiting for Statefulset status: %s\n", exc)
				116	return complete
				117
				118
				119	def wait_for_deployment_complete(deployment_name):
				120	"""
				121	Check if Deployment is running.
				122
				123	Args:
				124	deployment_name (str): the name of the Deployment.
				125
				126	Returns:
				127	True if Deployment is running, false otherwise
				128	"""
				129	complete = False
				130	try:
				131	response = api.read_namespaced_deployment(deployment_name, namespace)
				132	status = response.status
				133	if (status.unavailable_replicas is None and
				134	(status.updated_replicas is None or
				135	status.updated_replicas == response.spec.replicas) and
				136	status.replicas == response.spec.replicas and
				137	status.ready_replicas == response.spec.replicas and
				138	status.observed_generation == response.metadata.generation):
				139	log.info("Deployment %s is ready", deployment_name)
				140	complete = True
				141	else:
				142	log.info("Deployment %s is NOT ready", deployment_name)
				143	except ApiException as exc:
				144	log.error("Exception when waiting for deployment status: %s\n", exc)
				145	return complete
				146
				147
				148	def wait_for_daemonset_complete(daemonset_name):
				149	"""
				150	Check if DaemonSet is running.
				151
				152	Args:
				153	daemonset_name (str): the name of the DaemonSet.
				154
				155	Returns:
				156	True if DaemonSet is running, false otherwise
				157	"""
				158	complete = False
				159	try:
Sylvain Desbureaux	b468f08	2020-07-22 17:47:06 +0200	[diff] [blame^]	160	response = api.read_namespaced_daemon_set(
Sylvain Desbureaux	2faa6e6	2020-05-11 15:03:44 +0200	[diff] [blame]	161	daemonset_name, namespace)
				162	status = response.status
				163	if status.desired_number_scheduled == status.number_ready:
				164	log.info("DaemonSet: %s/%s nodes ready --> %s is ready",
				165	status.number_ready, status.desired_number_scheduled,
				166	daemonset_name)
				167	complete = True
				168	else:
				169	log.info("DaemonSet: %s/%s nodes ready --> %s is NOT ready",
				170	status.number_ready, status.desired_number_scheduled,
				171	daemonset_name)
				172	except ApiException as exc:
				173	log.error("Exception when waiting for DaemonSet status: %s\n", exc)
				174	return complete
				175
				176
				177	def is_ready(container_name):
				178	"""
				179	Check if a container is ready.
				180
				181	For a container owned by a Job, it means the Job is complete.
				182	Otherwise, it means the parent (Deployment, StatefulSet, DaemonSet) is
				183	running with the right number of replicas
				184
				185	Args:
				186	container_name (str): the name of the container.
				187
				188	Returns:
				189	True if container is ready, false otherwise
				190	"""
				191	ready = False
				192	log.info("Checking if %s is ready", container_name)
				193	try:
				194	response = coreV1Api.list_namespaced_pod(namespace=namespace,
				195	watch=False)
				196	for item in response.items:
				197	# container_statuses can be None, which is non-iterable.
				198	if item.status.container_statuses is None:
				199	continue
				200	for container in item.status.container_statuses:
				201	if container.name == container_name:
				202	name = read_name(item)
				203	if item.metadata.owner_references[0].kind == "StatefulSet":
				204	ready = wait_for_statefulset_complete(name)
				205	elif item.metadata.owner_references[0].kind == "ReplicaSet":
				206	deployment_name = get_deployment_name(name)
				207	ready = wait_for_deployment_complete(deployment_name)
				208	elif item.metadata.owner_references[0].kind == "Job":
				209	ready = is_job_complete(name)
				210	elif item.metadata.owner_references[0].kind == "DaemonSet":
				211	ready = wait_for_daemonset_complete(
				212	item.metadata.owner_references[0].name)
				213	return ready
				214	except ApiException as exc:
				215	log.error("Exception when calling list_namespaced_pod: %s\n", exc)
				216	return ready
				217
				218
				219	def read_name(item):
				220	"""
				221	Return the name of the owner's item.
				222
				223	Args:
				224	item (str): the item.
				225
				226	Returns:
				227	the name of first owner's item
				228	"""
				229	return item.metadata.owner_references[0].name
				230
				231
				232	def get_deployment_name(replicaset):
				233	"""
				234	Return the name of the Deployment owning the ReplicatSet.
				235
				236	Args:
				237	replicaset (str): the ReplicatSet.
				238
				239	Returns:
				240	the name of the Deployment owning the ReplicatSet
				241	"""
Sylvain Desbureaux	b468f08	2020-07-22 17:47:06 +0200	[diff] [blame^]	242	api_response = api.read_namespaced_replica_set_status(replicaset,
				243	namespace)
Sylvain Desbureaux	2faa6e6	2020-05-11 15:03:44 +0200	[diff] [blame]	244	deployment_name = read_name(api_response)
				245	return deployment_name
				246
				247
				248	DEF_TIMEOUT = 10
				249	DESCRIPTION = "Kubernetes container readiness check utility"
				250	USAGE = "Usage: ready.py [-t <timeout>] -c <container_name> " \
				251	"[-c <container_name> ...]\n" \
				252	"where\n" \
				253	"<timeout> - wait for container readiness timeout in min, " \
				254	"default is " + str(DEF_TIMEOUT) + "\n" \
				255	"<container_name> - name of the container to wait for\n"
				256
				257
				258	def main(argv):
				259	"""
				260	Checks if a container is ready or if a job is finished.
				261	The check is done according to the name of the container, not the name of
				262	its parent (Job, Deployment, StatefulSet, DaemonSet).
				263
				264	Args:
				265	argv: the command line
				266	"""
				267	# args are a list of container names
				268	container_names = []
				269	timeout = DEF_TIMEOUT
				270	try:
				271	opts, _args = getopt.getopt(argv, "hc:t:", ["container-name=",
				272	"timeout=",
				273	"help"])
				274	for opt, arg in opts:
				275	if opt in ("-h", "--help"):
				276	print("{}\n\n{}".format(DESCRIPTION, USAGE))
				277	sys.exit()
				278	elif opt in ("-c", "--container-name"):
				279	container_names.append(arg)
				280	elif opt in ("-t", "--timeout"):
				281	timeout = float(arg)
				282	except (getopt.GetoptError, ValueError) as exc:
				283	print("Error parsing input parameters: {}\n".format(exc))
				284	print(USAGE)
				285	sys.exit(2)
				286	if container_names.__len__() == 0:
				287	print("Missing required input parameter(s)\n")
				288	print(USAGE)
				289	sys.exit(2)
				290
				291	for container_name in container_names:
				292	timeout = time.time() + timeout * 60
				293	while True:
				294	ready = is_ready(container_name)
				295	if ready is True:
				296	break
				297	if time.time() > timeout:
				298	log.warning("timed out waiting for '%s' to be ready",
				299	container_name)
				300	sys.exit(1)
				301	else:
				302	# spread in time potentially parallel execution in multiple
				303	# containers
				304	time.sleep(random.randint(5, 11))
				305
				306
				307	if __name__ == "__main__":
				308	main(sys.argv[1:])