Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 1 | #!/usr/bin/env python3 |
| 2 | # -*- coding: utf-8 -*- |
| 3 | # Copyright © 2020 Orange |
Krzysztof Kuzmicki | e342ced | 2020-08-10 15:56:21 +0200 | [diff] [blame] | 4 | # Copyright © 2020 Nokia |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 5 | # |
| 6 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | # you may not use this file except in compliance with the License. |
| 8 | # You may obtain a copy of the License at |
| 9 | # |
| 10 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | # |
| 12 | # Unless required by applicable law or agreed to in writing, software |
| 13 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | # See the License for the specific language governing permissions and |
| 16 | # limitations under the License. |
| 17 | |
| 18 | """ |
| 19 | Kubernetes readiness check. |
| 20 | |
| 21 | Checks if a container is ready or if a job is finished. |
| 22 | The check is done according to the name of the container, not the name of |
| 23 | its parent (Job, Deployment, StatefulSet, DaemonSet). |
| 24 | """ |
| 25 | |
| 26 | import getopt |
| 27 | import logging |
| 28 | import os |
| 29 | import sys |
| 30 | import time |
| 31 | import random |
othman touijer | 87a99b1 | 2021-11-24 10:41:24 +0100 | [diff] [blame] | 32 | import requests |
Andreas Geissler | 0adc4b0 | 2023-08-28 13:43:28 +0200 | [diff] [blame] | 33 | import socket |
| 34 | from contextlib import closing |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 35 | |
othman touijer | 5274bc7 | 2021-11-15 11:19:33 +0100 | [diff] [blame] | 36 | from kubernetes import client, config |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 37 | from kubernetes.client.rest import ApiException |
| 38 | |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 39 | namespace = "" |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 40 | |
| 41 | # setup logging |
| 42 | log = logging.getLogger(__name__) |
| 43 | handler = logging.StreamHandler(sys.stdout) |
| 44 | formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') |
| 45 | handler.setFormatter(formatter) |
| 46 | handler.setLevel(logging.INFO) |
| 47 | log.addHandler(handler) |
| 48 | log.setLevel(logging.INFO) |
| 49 | |
othman touijer | 5274bc7 | 2021-11-15 11:19:33 +0100 | [diff] [blame] | 50 | config.load_incluster_config() |
Andreas Geissler | 0adc4b0 | 2023-08-28 13:43:28 +0200 | [diff] [blame] | 51 | # use for local testing: |
| 52 | #config.load_kube_config() |
othman touijer | 5274bc7 | 2021-11-15 11:19:33 +0100 | [diff] [blame] | 53 | coreV1Api = client.CoreV1Api() |
| 54 | api = client.AppsV1Api() |
| 55 | batchV1Api = client.BatchV1Api() |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 56 | |
| 57 | def is_job_complete(job_name): |
| 58 | """ |
| 59 | Check if Job is complete. |
| 60 | |
| 61 | Args: |
| 62 | job_name (str): the name of the Job. |
| 63 | |
| 64 | Returns: |
| 65 | True if job is complete, false otherwise |
| 66 | """ |
| 67 | complete = False |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 68 | log.info("Checking if job %s is complete", job_name) |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 69 | try: |
| 70 | response = batchV1Api.read_namespaced_job_status(job_name, namespace) |
| 71 | if response.status.succeeded == 1: |
| 72 | job_status_type = response.status.conditions[0].type |
| 73 | if job_status_type == "Complete": |
| 74 | complete = True |
| 75 | log.info("%s is complete", job_name) |
| 76 | else: |
| 77 | log.info("%s is NOT complete", job_name) |
| 78 | else: |
| 79 | log.info("%s has not succeeded yet", job_name) |
| 80 | except ApiException as exc: |
| 81 | log.error("Exception when calling read_namespaced_job_status: %s\n", |
| 82 | exc) |
| 83 | return complete |
| 84 | |
| 85 | |
| 86 | def wait_for_statefulset_complete(statefulset_name): |
| 87 | """ |
| 88 | Check if StatefulSet is running. |
| 89 | |
| 90 | Args: |
| 91 | statefulset_name (str): the name of the StatefulSet. |
| 92 | |
| 93 | Returns: |
| 94 | True if StatefulSet is running, false otherwise |
| 95 | """ |
| 96 | complete = False |
| 97 | try: |
| 98 | response = api.read_namespaced_stateful_set(statefulset_name, |
| 99 | namespace) |
| 100 | status = response.status |
| 101 | if (status.replicas == response.spec.replicas and |
| 102 | status.ready_replicas == response.spec.replicas and |
| 103 | status.observed_generation == response.metadata.generation): |
| 104 | log.info("Statefulset %s is ready", statefulset_name) |
| 105 | complete = True |
| 106 | else: |
| 107 | log.info("Statefulset %s is NOT ready", statefulset_name) |
| 108 | except ApiException as exc: |
| 109 | log.error("Exception when waiting for Statefulset status: %s\n", exc) |
| 110 | return complete |
| 111 | |
| 112 | |
| 113 | def wait_for_deployment_complete(deployment_name): |
| 114 | """ |
| 115 | Check if Deployment is running. |
| 116 | |
| 117 | Args: |
| 118 | deployment_name (str): the name of the Deployment. |
| 119 | |
| 120 | Returns: |
| 121 | True if Deployment is running, false otherwise |
| 122 | """ |
| 123 | complete = False |
| 124 | try: |
| 125 | response = api.read_namespaced_deployment(deployment_name, namespace) |
| 126 | status = response.status |
| 127 | if (status.unavailable_replicas is None and |
| 128 | (status.updated_replicas is None or |
| 129 | status.updated_replicas == response.spec.replicas) and |
| 130 | status.replicas == response.spec.replicas and |
| 131 | status.ready_replicas == response.spec.replicas and |
| 132 | status.observed_generation == response.metadata.generation): |
| 133 | log.info("Deployment %s is ready", deployment_name) |
| 134 | complete = True |
| 135 | else: |
| 136 | log.info("Deployment %s is NOT ready", deployment_name) |
| 137 | except ApiException as exc: |
| 138 | log.error("Exception when waiting for deployment status: %s\n", exc) |
| 139 | return complete |
| 140 | |
| 141 | |
| 142 | def wait_for_daemonset_complete(daemonset_name): |
| 143 | """ |
| 144 | Check if DaemonSet is running. |
| 145 | |
| 146 | Args: |
| 147 | daemonset_name (str): the name of the DaemonSet. |
| 148 | |
| 149 | Returns: |
| 150 | True if DaemonSet is running, false otherwise |
| 151 | """ |
| 152 | complete = False |
| 153 | try: |
Sylvain Desbureaux | b468f08 | 2020-07-22 17:47:06 +0200 | [diff] [blame] | 154 | response = api.read_namespaced_daemon_set( |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 155 | daemonset_name, namespace) |
| 156 | status = response.status |
| 157 | if status.desired_number_scheduled == status.number_ready: |
| 158 | log.info("DaemonSet: %s/%s nodes ready --> %s is ready", |
| 159 | status.number_ready, status.desired_number_scheduled, |
| 160 | daemonset_name) |
| 161 | complete = True |
| 162 | else: |
| 163 | log.info("DaemonSet: %s/%s nodes ready --> %s is NOT ready", |
| 164 | status.number_ready, status.desired_number_scheduled, |
| 165 | daemonset_name) |
| 166 | except ApiException as exc: |
| 167 | log.error("Exception when waiting for DaemonSet status: %s\n", exc) |
| 168 | return complete |
| 169 | |
| 170 | |
| 171 | def is_ready(container_name): |
| 172 | """ |
| 173 | Check if a container is ready. |
| 174 | |
| 175 | For a container owned by a Job, it means the Job is complete. |
| 176 | Otherwise, it means the parent (Deployment, StatefulSet, DaemonSet) is |
| 177 | running with the right number of replicas |
| 178 | |
| 179 | Args: |
| 180 | container_name (str): the name of the container. |
| 181 | |
| 182 | Returns: |
| 183 | True if container is ready, false otherwise |
| 184 | """ |
| 185 | ready = False |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 186 | log.info("Checking if container %s is ready", container_name) |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 187 | try: |
| 188 | response = coreV1Api.list_namespaced_pod(namespace=namespace, |
| 189 | watch=False) |
| 190 | for item in response.items: |
| 191 | # container_statuses can be None, which is non-iterable. |
| 192 | if item.status.container_statuses is None: |
| 193 | continue |
| 194 | for container in item.status.container_statuses: |
| 195 | if container.name == container_name: |
| 196 | name = read_name(item) |
| 197 | if item.metadata.owner_references[0].kind == "StatefulSet": |
| 198 | ready = wait_for_statefulset_complete(name) |
| 199 | elif item.metadata.owner_references[0].kind == "ReplicaSet": |
| 200 | deployment_name = get_deployment_name(name) |
| 201 | ready = wait_for_deployment_complete(deployment_name) |
| 202 | elif item.metadata.owner_references[0].kind == "Job": |
| 203 | ready = is_job_complete(name) |
| 204 | elif item.metadata.owner_references[0].kind == "DaemonSet": |
| 205 | ready = wait_for_daemonset_complete( |
| 206 | item.metadata.owner_references[0].name) |
| 207 | return ready |
| 208 | except ApiException as exc: |
| 209 | log.error("Exception when calling list_namespaced_pod: %s\n", exc) |
| 210 | return ready |
| 211 | |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 212 | def is_service_ready(service_name): |
| 213 | """ |
| 214 | Check if a service is ready. |
| 215 | |
| 216 | The service is ready, if the selected pod is finally deployed. |
| 217 | It means the parent (Deployment, StatefulSet, DaemonSet) is |
| 218 | running with the right number of replicas |
| 219 | |
| 220 | Args: |
| 221 | service_name (str): the name of the service. |
| 222 | |
| 223 | Returns: |
| 224 | True if service is ready, false otherwise |
| 225 | """ |
| 226 | ready = False |
| 227 | log.info("Checking if service %s is ready", service_name) |
| 228 | try: |
| 229 | services = coreV1Api.list_namespaced_service(namespace=namespace, |
| 230 | watch=False) |
| 231 | for svc in services.items: |
| 232 | if (svc.metadata.name.startswith(service_name)): |
| 233 | if svc.spec.selector: |
| 234 | # convert the selector dictionary into a string selector |
| 235 | # for example: {"app":"redis"} => "app=redis" |
| 236 | selector = '' |
| 237 | for k,v in svc.spec.selector.items(): |
| 238 | selector += k + '=' + v + ',' |
| 239 | selector = selector[:-1] |
Andreas Geissler | 8249f15 | 2024-04-08 10:24:42 +0200 | [diff] [blame] | 240 | log.info("Found Selector %s", selector) |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 241 | # Get the pods that match the selector |
| 242 | pods = coreV1Api.list_namespaced_pod(namespace=namespace, |
| 243 | label_selector=selector, |
| 244 | watch=False) |
| 245 | for item in pods.items: |
| 246 | name = read_name(item) |
| 247 | log.info("Found pod %s selected by service %s", name, service_name) |
Andreas Geissler | 8249f15 | 2024-04-08 10:24:42 +0200 | [diff] [blame] | 248 | return is_pod_ready (name) |
| 249 | else: |
| 250 | log.info("No Selector found, check Endpoints") |
| 251 | endpoints = coreV1Api.list_namespaced_endpoints(namespace=namespace, |
| 252 | watch=False) |
| 253 | for ep in endpoints.items: |
| 254 | if (ep.metadata.name.startswith(service_name)): |
| 255 | if ep.subsets: |
| 256 | addresses = ep.subsets[0].addresses |
| 257 | if addresses: |
| 258 | name = addresses[0].target_ref.name |
| 259 | log.info("Found pod %s selected by service %s", name, service_name) |
| 260 | return is_pod_ready (name) |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 261 | except ApiException as exc: |
| 262 | log.error("Exception when calling list_namespaced_service: %s\n", exc) |
| 263 | return ready |
| 264 | |
Andreas Geissler | 0adc4b0 | 2023-08-28 13:43:28 +0200 | [diff] [blame] | 265 | def is_pod_ready(pod_name): |
| 266 | """ |
| 267 | Check if a pod is ready. |
| 268 | |
| 269 | For a pod owned by a Job, it means the Job is complete. |
| 270 | Otherwise, it means the parent (Deployment, StatefulSet, DaemonSet) is |
| 271 | running with the right number of replicas |
| 272 | |
| 273 | Args: |
| 274 | pod_name (str): the name of the pod. |
| 275 | |
| 276 | Returns: |
| 277 | True if pod is ready, false otherwise |
| 278 | """ |
| 279 | ready = False |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 280 | log.info("Checking if pod %s is ready", pod_name) |
Andreas Geissler | 0adc4b0 | 2023-08-28 13:43:28 +0200 | [diff] [blame] | 281 | try: |
| 282 | response = coreV1Api.list_namespaced_pod(namespace=namespace, |
| 283 | watch=False) |
| 284 | for item in response.items: |
| 285 | if (item.metadata.name.startswith(pod_name)): |
| 286 | name = read_name(item) |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 287 | log.info("Found pod %s", name) |
Andreas Geissler | 0adc4b0 | 2023-08-28 13:43:28 +0200 | [diff] [blame] | 288 | if item.metadata.owner_references[0].kind == "StatefulSet": |
| 289 | ready = wait_for_statefulset_complete(name) |
| 290 | elif item.metadata.owner_references[0].kind == "ReplicaSet": |
| 291 | deployment_name = get_deployment_name(name) |
| 292 | ready = wait_for_deployment_complete(deployment_name) |
| 293 | elif item.metadata.owner_references[0].kind == "Job": |
| 294 | ready = is_job_complete(name) |
| 295 | elif item.metadata.owner_references[0].kind == "DaemonSet": |
| 296 | ready = wait_for_daemonset_complete( |
| 297 | item.metadata.owner_references[0].name) |
| 298 | return ready |
| 299 | except ApiException as exc: |
| 300 | log.error("Exception when calling list_namespaced_pod: %s\n", exc) |
| 301 | return ready |
| 302 | |
| 303 | def is_app_ready(app_name): |
| 304 | """ |
| 305 | Check if a pod with app-label is ready. |
| 306 | |
| 307 | For a pod owned by a Job, it means the Job is complete. |
| 308 | Otherwise, it means the parent (Deployment, StatefulSet, DaemonSet) is |
| 309 | running with the right number of replicas |
| 310 | |
| 311 | Args: |
| 312 | app_name (str): the app label of the pod. |
| 313 | |
| 314 | Returns: |
| 315 | True if pod is ready, false otherwise |
| 316 | """ |
| 317 | ready = False |
| 318 | log.info("Checking if pod with app-label %s is ready", app_name) |
| 319 | try: |
| 320 | response = coreV1Api.list_namespaced_pod(namespace=namespace, |
| 321 | watch=False) |
| 322 | for item in response.items: |
| 323 | if item.metadata.labels.get('app', "NOKEY") == app_name: |
| 324 | name = read_name(item) |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 325 | log.info("Found pod %s", name) |
Andreas Geissler | 8249f15 | 2024-04-08 10:24:42 +0200 | [diff] [blame] | 326 | return is_pod_ready (name) |
Andreas Geissler | 0adc4b0 | 2023-08-28 13:43:28 +0200 | [diff] [blame] | 327 | except ApiException as exc: |
| 328 | log.error("Exception when calling list_namespaced_pod: %s\n", exc) |
| 329 | return ready |
| 330 | |
rope252 | d8ce8e5 | 2021-08-31 21:11:24 +0100 | [diff] [blame] | 331 | def service_mesh_job_check(container_name): |
| 332 | """ |
| 333 | Check if a Job's primary container is complete. Used for ensuring the sidecar can be killed after Job completion. |
| 334 | Args: |
| 335 | container_name (str): the name of the Job's primary container. |
| 336 | |
| 337 | Returns: |
| 338 | True if job's container is in the completed state, false otherwise |
| 339 | """ |
| 340 | complete = False |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 341 | log.info("Checking if container %s is complete", container_name) |
rope252 | d8ce8e5 | 2021-08-31 21:11:24 +0100 | [diff] [blame] | 342 | try: |
| 343 | response = coreV1Api.list_namespaced_pod(namespace=namespace, watch=False) |
| 344 | for item in response.items: |
| 345 | # container_statuses can be None, which is non-iterable. |
| 346 | if item.status.container_statuses is None: |
| 347 | continue |
| 348 | for container in item.status.container_statuses: |
othman touijer | 960cab9 | 2021-12-02 18:17:45 +0100 | [diff] [blame] | 349 | if container.name == container_name and item.status.phase == "Running": |
rope252 | d8ce8e5 | 2021-08-31 21:11:24 +0100 | [diff] [blame] | 350 | name = read_name(item) |
| 351 | log.info("Container Details %s ", container) |
| 352 | log.info("Container Status %s ", container.state.terminated) |
othman touijer | aabacc1 | 2021-12-02 15:24:57 +0100 | [diff] [blame] | 353 | |
| 354 | if container.state.terminated: |
| 355 | log.info("Container Terminated with reason %s ", container.state.terminated.reason) |
| 356 | complete = True |
| 357 | |
rope252 | d8ce8e5 | 2021-08-31 21:11:24 +0100 | [diff] [blame] | 358 | except ApiException as exc: |
| 359 | log.error("Exception when calling read_namespaced_job_status: %s\n", |
| 360 | exc) |
| 361 | return complete |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 362 | |
| 363 | def read_name(item): |
| 364 | """ |
| 365 | Return the name of the owner's item. |
| 366 | |
| 367 | Args: |
| 368 | item (str): the item. |
| 369 | |
| 370 | Returns: |
| 371 | the name of first owner's item |
| 372 | """ |
| 373 | return item.metadata.owner_references[0].name |
| 374 | |
| 375 | |
| 376 | def get_deployment_name(replicaset): |
| 377 | """ |
| 378 | Return the name of the Deployment owning the ReplicatSet. |
| 379 | |
| 380 | Args: |
| 381 | replicaset (str): the ReplicatSet. |
| 382 | |
| 383 | Returns: |
| 384 | the name of the Deployment owning the ReplicatSet |
| 385 | """ |
Sylvain Desbureaux | b468f08 | 2020-07-22 17:47:06 +0200 | [diff] [blame] | 386 | api_response = api.read_namespaced_replica_set_status(replicaset, |
| 387 | namespace) |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 388 | deployment_name = read_name(api_response) |
| 389 | return deployment_name |
Andreas Geissler | 68eb3f5 | 2023-08-30 08:41:53 +0200 | [diff] [blame] | 390 | |
Andreas Geissler | 0adc4b0 | 2023-08-28 13:43:28 +0200 | [diff] [blame] | 391 | def check_socket(host, port): |
| 392 | with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock: |
| 393 | if sock.connect_ex((host, port)) == 0: |
| 394 | print("Port is open") |
| 395 | return True |
| 396 | else: |
| 397 | print("Port is not open") |
| 398 | return False |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 399 | |
rope252 | d8ce8e5 | 2021-08-31 21:11:24 +0100 | [diff] [blame] | 400 | def quitquitquit_post(apiurl): |
| 401 | URL = apiurl |
Andreas Geissler | 0adc4b0 | 2023-08-28 13:43:28 +0200 | [diff] [blame] | 402 | if check_socket("127.0.0.1", 15020) is False: |
| 403 | log.info("no sidecar exists, exiting") |
| 404 | return True |
rope252 | d8ce8e5 | 2021-08-31 21:11:24 +0100 | [diff] [blame] | 405 | response = requests.post(url = URL) |
| 406 | responseStatus = response.ok |
| 407 | try: |
| 408 | if responseStatus is True: |
| 409 | log.info("quitquitquit returned True") |
| 410 | return True |
| 411 | else: |
| 412 | log.info("quitquitquit returned False") |
| 413 | return False |
| 414 | except: |
| 415 | log.info("quitquitquit call failed with exception") |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 416 | |
| 417 | DEF_TIMEOUT = 10 |
rope252 | d8ce8e5 | 2021-08-31 21:11:24 +0100 | [diff] [blame] | 418 | DEF_URL = "http://127.0.0.1:15020/quitquitquit" |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 419 | DESCRIPTION = "Kubernetes container readiness check utility" |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 420 | USAGE = "Usage: ready.py [-t <timeout>] [-n <namespace>] -c <container_name> .. \n" \ |
| 421 | "| -s <service_name> .. | -p <pod_name> .. | -a <app_name> .. \n" \ |
| 422 | "| -j <job_name> .. \n" \ |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 423 | "where\n" \ |
| 424 | "<timeout> - wait for container readiness timeout in min, " \ |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 425 | "<namespace> - K8S namespace the check is done" \ |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 426 | "default is " + str(DEF_TIMEOUT) + "\n" \ |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 427 | "<service_name> - name of the service to wait for\n" \ |
Krzysztof Kuzmicki | e342ced | 2020-08-10 15:56:21 +0200 | [diff] [blame] | 428 | "<container_name> - name of the container to wait for\n" \ |
Andreas Geissler | 0adc4b0 | 2023-08-28 13:43:28 +0200 | [diff] [blame] | 429 | "<pod_name> - name of the pod to wait for\n" \ |
| 430 | "<app_name> - app label of the pod to wait for\n" \ |
Krzysztof Kuzmicki | e342ced | 2020-08-10 15:56:21 +0200 | [diff] [blame] | 431 | "<job_name> - name of the job to wait for\n" |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 432 | |
| 433 | |
| 434 | def main(argv): |
| 435 | """ |
Fiete Ostkamp | 3469bc6 | 2024-08-08 09:28:40 +0200 | [diff] [blame] | 436 | Checks if a container, pod or service is ready, |
Andreas Geissler | 0adc4b0 | 2023-08-28 13:43:28 +0200 | [diff] [blame] | 437 | if a job is finished or if the main container of a job has completed. |
| 438 | The check is done according to the name of the container op pod, |
| 439 | not the name of its parent (Job, Deployment, StatefulSet, DaemonSet). |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 440 | |
| 441 | Args: |
| 442 | argv: the command line |
| 443 | """ |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 444 | global namespace |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 445 | # args are a list of container names |
| 446 | container_names = [] |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 447 | service_names = [] |
Andreas Geissler | 0adc4b0 | 2023-08-28 13:43:28 +0200 | [diff] [blame] | 448 | pod_names = [] |
| 449 | app_names = [] |
Krzysztof Kuzmicki | e342ced | 2020-08-10 15:56:21 +0200 | [diff] [blame] | 450 | job_names = [] |
rope252 | d8ce8e5 | 2021-08-31 21:11:24 +0100 | [diff] [blame] | 451 | service_mesh_job_container_names = [] |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 452 | timeout = DEF_TIMEOUT |
rope252 | d8ce8e5 | 2021-08-31 21:11:24 +0100 | [diff] [blame] | 453 | url = DEF_URL |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 454 | ns = "" |
Fiete Ostkamp | 3469bc6 | 2024-08-08 09:28:40 +0200 | [diff] [blame] | 455 | interval=None |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 456 | try: |
Fiete Ostkamp | 3469bc6 | 2024-08-08 09:28:40 +0200 | [diff] [blame] | 457 | opts, _args = getopt.getopt(argv, "hj:s:c:p:a:t:m:u:n:i:", ["service-name=", |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 458 | "container-name=", |
Andreas Geissler | 68eb3f5 | 2023-08-30 08:41:53 +0200 | [diff] [blame] | 459 | "pod-name=", |
| 460 | "app-name=", |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 461 | "timeout=", |
rope252 | d8ce8e5 | 2021-08-31 21:11:24 +0100 | [diff] [blame] | 462 | "service-mesh-check=", |
| 463 | "url=", |
Krzysztof Kuzmicki | e342ced | 2020-08-10 15:56:21 +0200 | [diff] [blame] | 464 | "job-name=", |
Fiete Ostkamp | 3469bc6 | 2024-08-08 09:28:40 +0200 | [diff] [blame] | 465 | "namespace=", |
| 466 | "interval=" |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 467 | "help"]) |
| 468 | for opt, arg in opts: |
| 469 | if opt in ("-h", "--help"): |
| 470 | print("{}\n\n{}".format(DESCRIPTION, USAGE)) |
| 471 | sys.exit() |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 472 | elif opt in ("-s", "--service-name"): |
| 473 | service_names.append(arg) |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 474 | elif opt in ("-c", "--container-name"): |
| 475 | container_names.append(arg) |
Andreas Geissler | 0adc4b0 | 2023-08-28 13:43:28 +0200 | [diff] [blame] | 476 | elif opt in ("-p", "--pod-name"): |
| 477 | pod_names.append(arg) |
| 478 | elif opt in ("-a", "--app-name"): |
| 479 | app_names.append(arg) |
Krzysztof Kuzmicki | e342ced | 2020-08-10 15:56:21 +0200 | [diff] [blame] | 480 | elif opt in ("-j", "--job-name"): |
| 481 | job_names.append(arg) |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 482 | elif opt in ("-m", "--service-mesh-check"): |
rope252 | d8ce8e5 | 2021-08-31 21:11:24 +0100 | [diff] [blame] | 483 | service_mesh_job_container_names.append(arg) |
| 484 | elif opt in ("-u", "--url"): |
| 485 | url = arg |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 486 | elif opt in ("-n", "--namespace"): |
| 487 | ns = arg |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 488 | elif opt in ("-t", "--timeout"): |
| 489 | timeout = float(arg) |
Fiete Ostkamp | 3469bc6 | 2024-08-08 09:28:40 +0200 | [diff] [blame] | 490 | elif opt in ("-i", "--interval"): |
| 491 | interval = int(arg) |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 492 | except (getopt.GetoptError, ValueError) as exc: |
| 493 | print("Error parsing input parameters: {}\n".format(exc)) |
| 494 | print(USAGE) |
| 495 | sys.exit(2) |
Andreas Geissler | 0adc4b0 | 2023-08-28 13:43:28 +0200 | [diff] [blame] | 496 | if container_names.__len__() == 0 and job_names.__len__() == 0 and pod_names.__len__() == 0 \ |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 497 | and app_names.__len__() == 0 and service_mesh_job_container_names.__len__() == 0 \ |
| 498 | and service_names.__len__() == 0: |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 499 | print("Missing required input parameter(s)\n") |
| 500 | print(USAGE) |
| 501 | sys.exit(2) |
Andreas Geissler | ee99ae3 | 2024-03-22 12:35:40 +0100 | [diff] [blame] | 502 | if ns == "": |
| 503 | # extract ns from env variable |
| 504 | namespace = os.environ['NAMESPACE'] |
| 505 | else: |
| 506 | namespace = ns |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 507 | |
Fiete Ostkamp | 3469bc6 | 2024-08-08 09:28:40 +0200 | [diff] [blame] | 508 | check_service_readiness(service_names, timeout, interval) |
| 509 | check_container_readiness(container_names, timeout, interval) |
| 510 | check_pod_readiness(pod_names, timeout, interval) |
| 511 | check_app_readiness(app_names, timeout, interval) |
| 512 | check_job_readiness(job_names, timeout, interval) |
| 513 | check_service_mesh_job_readiness(service_mesh_job_container_names, timeout, url) |
| 514 | |
| 515 | def check_service_mesh_job_readiness(service_mesh_job_container_names, timeout, url): |
rope252 | d8ce8e5 | 2021-08-31 21:11:24 +0100 | [diff] [blame] | 516 | for service_mesh_job_container_name in service_mesh_job_container_names: |
| 517 | timeout = time.time() + timeout * 60 |
| 518 | while True: |
| 519 | ready = service_mesh_job_check(service_mesh_job_container_name) |
| 520 | if ready is True: |
| 521 | sideCarKilled = quitquitquit_post(url) |
| 522 | if sideCarKilled is True: |
| 523 | log.info("Side Car Killed through QuitQuitQuit API") |
| 524 | else: |
| 525 | log.info("Side Car Failed to be Killed through QuitQuitQuit API") |
| 526 | break |
| 527 | if time.time() > timeout: |
| 528 | log.warning("timed out waiting for '%s' to be ready", |
othman touijer | aabacc1 | 2021-12-02 15:24:57 +0100 | [diff] [blame] | 529 | service_mesh_job_container_name) |
rope252 | d8ce8e5 | 2021-08-31 21:11:24 +0100 | [diff] [blame] | 530 | sys.exit(1) |
| 531 | else: |
| 532 | # spread in time potentially parallel execution in multiple |
| 533 | # containers |
Fiete Ostkamp | 3469bc6 | 2024-08-08 09:28:40 +0200 | [diff] [blame] | 534 | time.sleep(random.randint(2, 6)) |
| 535 | |
| 536 | def check_job_readiness(job_names, timeout, interval=None): |
| 537 | for job_name in job_names: |
| 538 | timeout = time.time() + timeout * 60 |
| 539 | while True: |
| 540 | ready = is_job_complete(job_name) |
| 541 | if ready is True: |
| 542 | break |
| 543 | if time.time() > timeout: |
| 544 | log.warning("timed out waiting for '%s' to be ready", |
| 545 | job_name) |
| 546 | sys.exit(1) |
| 547 | else: |
| 548 | if interval != None: |
| 549 | time.sleep(interval) |
| 550 | else: |
| 551 | # spread in time potentially parallel execution in multiple |
| 552 | # containers |
| 553 | time.sleep(random.randint(2, 6)) |
| 554 | |
| 555 | def check_app_readiness(app_names, timeout, interval=None): |
| 556 | for app_name in app_names: |
| 557 | timeout = time.time() + timeout * 60 |
| 558 | while True: |
| 559 | ready = is_app_ready(app_name) |
| 560 | if ready is True: |
| 561 | break |
| 562 | if time.time() > timeout: |
| 563 | log.warning("timed out waiting for '%s' to be ready", |
| 564 | app_name) |
| 565 | sys.exit(1) |
| 566 | else: |
| 567 | if interval != None: |
| 568 | time.sleep(interval) |
| 569 | else: |
| 570 | # spread in time potentially parallel execution in multiple |
| 571 | # containers |
| 572 | time.sleep(random.randint(2, 6)) |
| 573 | |
| 574 | def check_pod_readiness(pod_names, timeout, interval=None): |
| 575 | for pod_name in pod_names: |
| 576 | timeout = time.time() + timeout * 60 |
| 577 | while True: |
| 578 | ready = is_pod_ready(pod_name) |
| 579 | if ready is True: |
| 580 | break |
| 581 | if time.time() > timeout: |
| 582 | log.warning("timed out waiting for '%s' to be ready", |
| 583 | pod_name) |
| 584 | sys.exit(1) |
| 585 | else: |
| 586 | if interval != None: |
| 587 | time.sleep(interval) |
| 588 | else: |
| 589 | # spread in time potentially parallel execution in multiple |
| 590 | # containers |
| 591 | time.sleep(random.randint(2, 6)) |
| 592 | |
| 593 | def check_container_readiness(container_names, timeout, interval=None): |
| 594 | for container_name in container_names: |
| 595 | timeout = time.time() + timeout * 60 |
| 596 | while True: |
| 597 | ready = is_ready(container_name) |
| 598 | if ready is True: |
| 599 | break |
| 600 | if time.time() > timeout: |
| 601 | log.warning("timed out waiting for '%s' to be ready", |
| 602 | container_name) |
| 603 | sys.exit(1) |
| 604 | else: |
| 605 | if interval != None: |
| 606 | time.sleep(interval) |
| 607 | else: |
| 608 | # spread in time potentially parallel execution in multiple |
| 609 | # containers |
| 610 | time.sleep(random.randint(2, 6)) |
| 611 | |
| 612 | def check_service_readiness(service_names, timeout, interval=None): |
| 613 | for service_name in service_names: |
| 614 | timeout = time.time() + timeout * 60 |
| 615 | while True: |
| 616 | ready = is_service_ready(service_name) |
| 617 | if ready is True: |
| 618 | break |
| 619 | if time.time() > timeout: |
| 620 | log.warning("timed out waiting for '%s' to be ready", |
| 621 | service_name) |
| 622 | sys.exit(1) |
| 623 | else: |
| 624 | if interval != None: |
| 625 | time.sleep(interval) |
| 626 | else: |
| 627 | # spread in time potentially parallel execution in multiple |
| 628 | # containers |
| 629 | time.sleep(random.randint(2, 6)) |
Sylvain Desbureaux | 2faa6e6 | 2020-05-11 15:03:44 +0200 | [diff] [blame] | 630 | |
| 631 | if __name__ == "__main__": |
| 632 | main(sys.argv[1:]) |