Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # -*- coding: utf-8 -*- |
| 3 | |
| 4 | # COPYRIGHT NOTICE STARTS HERE |
| 5 | |
| 6 | # Copyright 2019 © Samsung Electronics Co., Ltd. |
| 7 | # |
| 8 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 9 | # you may not use this file except in compliance with the License. |
| 10 | # You may obtain a copy of the License at |
| 11 | # |
| 12 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 13 | # |
| 14 | # Unless required by applicable law or agreed to in writing, software |
| 15 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 17 | # See the License for the specific language governing permissions and |
| 18 | # limitations under the License. |
| 19 | |
| 20 | # COPYRIGHT NOTICE ENDS HERE |
| 21 | |
| 22 | |
| 23 | from __future__ import print_function |
| 24 | import sys |
| 25 | import argparse |
| 26 | import yaml |
| 27 | import requests |
| 28 | import subprocess |
| 29 | import datetime |
| 30 | from time import sleep |
| 31 | from os.path import expanduser |
| 32 | from itertools import chain |
| 33 | import csv |
| 34 | from requests.packages.urllib3.exceptions import InsecureRequestWarning |
| 35 | |
| 36 | |
| 37 | def add_resource_kind(resources, kind): |
| 38 | for item in resources: |
| 39 | item['kind'] = kind |
| 40 | return resources |
| 41 | |
| 42 | def get_resources(server, namespace, api, kind, ssl_verify=False): |
| 43 | url = '/'.join([server, api, 'namespaces', namespace, kind]) |
| 44 | try: |
| 45 | req = requests.get(url, verify=ssl_verify) |
| 46 | except requests.exceptions.ConnectionError as err: |
| 47 | sys.exit('Could not connect to {}'.format(server)) |
| 48 | json = req.json() |
| 49 | # kind is <resource>List in response so [:-4] removes 'List' from value |
| 50 | return add_resource_kind(json['items'], json['kind'][:-4]) |
| 51 | |
| 52 | def pods_by_parent(pods, parent): |
| 53 | for pod in pods: |
| 54 | if pod['metadata']['labels']['app'] == parent: |
| 55 | yield pod |
| 56 | |
| 57 | def k8s_controller_ready(k8s_controller): |
| 58 | if k8s_controller['kind'] == 'Job': |
| 59 | return k8s_controller['status'].get('succeeded', 0) == k8s_controller['spec']['completions'] |
| 60 | return k8s_controller['status'].get('readyReplicas', 0) == k8s_controller['spec']['replicas'] |
| 61 | |
| 62 | def get_not_ready(data): |
| 63 | return [x for x in data if not k8s_controller_ready(x)] |
| 64 | |
| 65 | def get_apps(data): |
| 66 | return [x['metadata']['labels']['app'] for x in data] |
| 67 | |
| 68 | def get_names(data): |
| 69 | return [x['metadata']['name'] for x in data] |
| 70 | |
| 71 | def pod_ready(pod): |
| 72 | try: |
| 73 | return [x['status'] for x in pod['status']['conditions'] |
| 74 | if x['type'] == 'Ready'][0] == 'True' |
| 75 | except (KeyError, IndexError): |
| 76 | return False |
| 77 | |
| 78 | def not_ready_pods(pods): |
| 79 | for pod in pods: |
| 80 | if not pod_ready(pod): |
| 81 | yield pod |
| 82 | |
| 83 | def analyze_k8s_controllers(resources_data): |
| 84 | resources = {'total_count': len(resources_data)} |
| 85 | resources['not_ready_list'] = get_apps(get_not_ready(resources_data)) |
| 86 | resources['ready_count'] = resources['total_count'] - len(resources['not_ready_list']) |
| 87 | |
| 88 | return resources |
| 89 | |
| 90 | def get_k8s_controllers(namespace, k8s_url): |
| 91 | k8s_controllers = {} |
| 92 | |
| 93 | k8s_controllers['deployments'] = {'data': get_resources(k8s_url, namespace, |
| 94 | 'apis/apps/v1', 'deployments')} |
| 95 | k8s_controllers['deployments'].update(analyze_k8s_controllers(k8s_controllers['deployments']['data'])) |
| 96 | |
| 97 | k8s_controllers['statefulsets'] = {'data': get_resources(k8s_url, namespace, |
| 98 | 'apis/apps/v1', 'statefulsets')} |
| 99 | k8s_controllers['statefulsets'].update(analyze_k8s_controllers(k8s_controllers['statefulsets']['data'])) |
| 100 | |
| 101 | k8s_controllers['jobs'] = {'data': get_resources(k8s_url, namespace, |
| 102 | 'apis/batch/v1', 'jobs')} |
| 103 | k8s_controllers['jobs'].update(analyze_k8s_controllers(k8s_controllers['jobs']['data'])) |
| 104 | |
| 105 | not_ready_controllers = chain.from_iterable( |
| 106 | k8s_controllers[x]['not_ready_list'] for x in k8s_controllers) |
| 107 | |
| 108 | return k8s_controllers, list(not_ready_controllers) |
| 109 | |
| 110 | def get_k8s_url(kube_config): |
| 111 | # TODO: Get login info |
| 112 | with open(kube_config) as f: |
| 113 | config = yaml.load(f) |
| 114 | # TODO: Support cluster by name |
| 115 | return config['clusters'][0]['cluster']['server'] |
| 116 | |
| 117 | def exec_healthcheck(hp_script, namespace): |
| 118 | try: |
| 119 | hc = subprocess.check_output( |
| 120 | ['sh', hp_script, namespace, 'health'], |
| 121 | stderr=subprocess.STDOUT) |
| 122 | return 0, hc.output |
| 123 | except subprocess.CalledProcessError as err: |
| 124 | return err.returncode, err.output |
| 125 | |
| 126 | def check_readiness(k8s_url, namespace, verbosity): |
| 127 | k8s_controllers, not_ready_controllers = get_k8s_controllers(namespace, k8s_url) |
| 128 | |
| 129 | # check pods only when it is explicitly wanted (judging readiness by deployment status) |
| 130 | if verbosity > 1: |
| 131 | pods = get_resources(k8s_url, namespace, 'api/v1', 'pods') |
| 132 | unready_pods = chain.from_iterable( |
| 133 | get_names(not_ready_pods( |
| 134 | pods_by_parent(pods, x))) |
| 135 | for x in not_ready_controllers) |
| 136 | else: |
| 137 | unready_pods = [] |
| 138 | |
| 139 | print_status(verbosity, k8s_controllers, unready_pods) |
| 140 | return not not_ready_controllers |
| 141 | |
| 142 | def check_in_loop(k8s_url, namespace, max_time, sleep_time, verbosity): |
| 143 | max_end_time = datetime.datetime.now() + datetime.timedelta(minutes=max_time) |
| 144 | ready = False |
| 145 | while datetime.datetime.now() < max_end_time: |
| 146 | ready = check_readiness(k8s_url, namespace, verbosity) |
| 147 | if ready: |
| 148 | return ready |
| 149 | sleep(sleep_time) |
| 150 | return ready |
| 151 | |
| 152 | def check_helm_releases(): |
| 153 | helm = subprocess.check_output(['helm', 'ls']) |
| 154 | if helm == '': |
| 155 | sys.exit('No Helm releases detected.') |
| 156 | helm_releases = csv.DictReader( |
| 157 | map(lambda x: x.replace(' ', ''), helm.split('\n')), |
| 158 | delimiter='\t') |
| 159 | failed_releases = [release['NAME'] for release in helm_releases |
| 160 | if release['STATUS'] == 'FAILED'] |
| 161 | return helm, failed_releases |
| 162 | |
| 163 | |
| 164 | def create_ready_string(ready, total, prefix): |
| 165 | return '{:12} {}/{}'.format(prefix, ready, total) |
| 166 | |
| 167 | def print_status(verbosity, resources, not_ready_pods): |
| 168 | ready_strings = [] |
| 169 | ready = {k: v['ready_count'] for k,v in resources.items()} |
| 170 | count = {k: v['total_count'] for k,v in resources.items()} |
| 171 | if verbosity > 0: |
| 172 | ready_strings += [ |
| 173 | create_ready_string(ready[k], count[k], k.capitalize()) for k in ready |
| 174 | ] |
| 175 | total_ready = sum(ready.values()) |
| 176 | total_count = sum(count.values()) |
| 177 | ready_strings.append(create_ready_string(total_ready, total_count, 'Ready')) |
| 178 | status_strings = ['\n'.join(ready_strings)] |
| 179 | if verbosity > 1: |
| 180 | if not_ready_pods: |
| 181 | status_strings.append('\nWaiting for pods:\n{}'.format('\n'.join(not_ready_pods))) |
| 182 | else: |
| 183 | status_strings.append('\nAll pods are ready!') |
| 184 | print('\n'.join(status_strings), '\n') |
| 185 | |
| 186 | def parse_args(): |
| 187 | parser = argparse.ArgumentParser(description='Monitor ONAP deployment progress') |
| 188 | parser.add_argument('--namespace', '-n', default='onap', |
| 189 | help='Kubernetes namespace of ONAP') |
| 190 | parser.add_argument('--server', '-s', help='address of Kubernetes cluster') |
| 191 | parser.add_argument('--kubeconfig', '-c', |
| 192 | default=expanduser('~') + '/.kube/config', |
| 193 | help='path to .kube/config file') |
| 194 | parser.add_argument('--health-path', '-hp', help='path to ONAP robot ete-k8s.sh') |
| 195 | parser.add_argument('--no-helm', action='store_true', help='Do not check Helm') |
| 196 | parser.add_argument('--check-frequency', '-w', default=300, type=int, |
| 197 | help='time between readiness checks in seconds') |
| 198 | parser.add_argument('--max-time', '-t', default=120, type=int, |
| 199 | help='max time to run readiness checks in minutes') |
| 200 | parser.add_argument('--single-run', '-1', action='store_true', |
| 201 | help='run check loop only once') |
| 202 | parser.add_argument('-v', dest='verbosity', action='count', default=0, |
| 203 | help='increase output verbosity, e.g. -vv is more verbose than -v') |
| 204 | |
| 205 | return parser.parse_args() |
| 206 | |
| 207 | def main(): |
| 208 | args = parse_args() |
| 209 | |
| 210 | if not args.no_helm: |
| 211 | try: |
| 212 | helm_output, failed_releases = check_helm_releases() |
| 213 | if failed_releases: |
| 214 | print('Deployment of {} failed.'.format(','.join(failed_releases))) |
| 215 | sys.exit(1) |
| 216 | elif args.verbosity > 1: |
| 217 | print(helm_output) |
| 218 | except IOError as err: |
| 219 | sys.exit(err.strerror) |
| 220 | |
| 221 | requests.packages.urllib3.disable_warnings(InsecureRequestWarning) |
| 222 | k8s_url = args.server if args.server is not None else get_k8s_url(args.kubeconfig) |
| 223 | |
| 224 | ready = False |
| 225 | if args.single_run: |
| 226 | ready = check_readiness(k8s_url, args.namespace, args.verbosity) |
| 227 | else: |
| 228 | if not check_in_loop(k8s_url, args.namespace, args.max_time, args.check_frequency, args.verbosity): |
| 229 | # Double-check last 5 minutes and write verbosely in case it is not ready |
| 230 | ready = check_readiness(k8s_url, args.namespace, 2) |
| 231 | |
| 232 | if args.health_path is not None: |
| 233 | try: |
| 234 | hc_rc, hc_output = exec_healthcheck(args.health_path, args.namespace) |
| 235 | except IOError as err: |
| 236 | sys.exit(err.strerror) |
| 237 | if args.verbosity > 1 or hc_rc > 0: |
| 238 | print(hc_output.decode('utf-8')) |
| 239 | sys.exit(hc_rc) |
| 240 | |
| 241 | if not ready: |
| 242 | sys.exit('Deployment is not ready') |
| 243 | |
| 244 | if __name__ == '__main__': |
| 245 | main() |