Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 1 | #!/usr/bin/env python |
| 2 | # -*- coding: utf-8 -*- |
| 3 | |
| 4 | # COPYRIGHT NOTICE STARTS HERE |
| 5 | |
| 6 | # Copyright 2019 © Samsung Electronics Co., Ltd. |
| 7 | # |
| 8 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 9 | # you may not use this file except in compliance with the License. |
| 10 | # You may obtain a copy of the License at |
| 11 | # |
| 12 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 13 | # |
| 14 | # Unless required by applicable law or agreed to in writing, software |
| 15 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 17 | # See the License for the specific language governing permissions and |
| 18 | # limitations under the License. |
| 19 | |
| 20 | # COPYRIGHT NOTICE ENDS HERE |
| 21 | |
| 22 | |
| 23 | from __future__ import print_function |
| 24 | import sys |
| 25 | import argparse |
| 26 | import yaml |
| 27 | import requests |
Bartek Grzybowski | 4a0a39a | 2019-06-13 15:44:01 +0200 | [diff] [blame] | 28 | from subprocess import Popen,STDOUT,PIPE,check_output |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 29 | import datetime |
| 30 | from time import sleep |
| 31 | from os.path import expanduser |
| 32 | from itertools import chain |
| 33 | import csv |
| 34 | from requests.packages.urllib3.exceptions import InsecureRequestWarning |
Bartek Grzybowski | 929f090 | 2019-05-27 14:57:55 +0200 | [diff] [blame] | 35 | from base64 import b64decode |
| 36 | from tempfile import NamedTemporaryFile |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 37 | |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 38 | def add_resource_kind(resources, kind): |
| 39 | for item in resources: |
| 40 | item['kind'] = kind |
| 41 | return resources |
| 42 | |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 43 | def pods_by_parent(pods, parent): |
| 44 | for pod in pods: |
| 45 | if pod['metadata']['labels']['app'] == parent: |
| 46 | yield pod |
| 47 | |
| 48 | def k8s_controller_ready(k8s_controller): |
| 49 | if k8s_controller['kind'] == 'Job': |
| 50 | return k8s_controller['status'].get('succeeded', 0) == k8s_controller['spec']['completions'] |
| 51 | return k8s_controller['status'].get('readyReplicas', 0) == k8s_controller['spec']['replicas'] |
| 52 | |
| 53 | def get_not_ready(data): |
| 54 | return [x for x in data if not k8s_controller_ready(x)] |
| 55 | |
| 56 | def get_apps(data): |
| 57 | return [x['metadata']['labels']['app'] for x in data] |
| 58 | |
| 59 | def get_names(data): |
| 60 | return [x['metadata']['name'] for x in data] |
| 61 | |
| 62 | def pod_ready(pod): |
| 63 | try: |
| 64 | return [x['status'] for x in pod['status']['conditions'] |
| 65 | if x['type'] == 'Ready'][0] == 'True' |
| 66 | except (KeyError, IndexError): |
| 67 | return False |
| 68 | |
| 69 | def not_ready_pods(pods): |
| 70 | for pod in pods: |
| 71 | if not pod_ready(pod): |
| 72 | yield pod |
| 73 | |
| 74 | def analyze_k8s_controllers(resources_data): |
| 75 | resources = {'total_count': len(resources_data)} |
| 76 | resources['not_ready_list'] = get_apps(get_not_ready(resources_data)) |
| 77 | resources['ready_count'] = resources['total_count'] - len(resources['not_ready_list']) |
| 78 | |
| 79 | return resources |
| 80 | |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 81 | def get_k8s_controllers(k8s): |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 82 | k8s_controllers = {} |
| 83 | |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 84 | k8s_controllers['deployments'] = {'data': k8s.get_resources( |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 85 | 'apis/apps/v1', 'deployments')} |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 86 | k8s_controllers['deployments'].update(analyze_k8s_controllers( |
| 87 | k8s_controllers['deployments']['data'])) |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 88 | |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 89 | k8s_controllers['statefulsets'] = {'data': k8s.get_resources( |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 90 | 'apis/apps/v1', 'statefulsets')} |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 91 | k8s_controllers['statefulsets'].update(analyze_k8s_controllers( |
| 92 | k8s_controllers['statefulsets']['data'])) |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 93 | |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 94 | k8s_controllers['jobs'] = {'data': k8s.get_resources( |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 95 | 'apis/batch/v1', 'jobs')} |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 96 | k8s_controllers['jobs'].update(analyze_k8s_controllers( |
| 97 | k8s_controllers['jobs']['data'])) |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 98 | |
| 99 | not_ready_controllers = chain.from_iterable( |
| 100 | k8s_controllers[x]['not_ready_list'] for x in k8s_controllers) |
| 101 | |
| 102 | return k8s_controllers, list(not_ready_controllers) |
| 103 | |
Bartek Grzybowski | 8e9812a | 2019-05-28 15:59:05 +0200 | [diff] [blame] | 104 | def exec_healthcheck(hp_script, namespace, hp_mode): |
Bartek Grzybowski | bca8435 | 2019-05-30 10:12:36 +0200 | [diff] [blame] | 105 | # spawn healthcheck script and redirect it's stderr to stdout |
| 106 | hc = Popen(['sh',hp_script,namespace,hp_mode],stdout=PIPE,stderr=STDOUT) |
| 107 | # Trace the output of subprocess until it has finished |
| 108 | for line in iter(hc.stdout.readline, ''): |
| 109 | print(line.strip()) |
| 110 | hc.poll() # set returncode in Popen object |
| 111 | return hc.returncode |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 112 | |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 113 | def check_readiness(k8s, verbosity): |
| 114 | k8s_controllers, not_ready_controllers = get_k8s_controllers(k8s) |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 115 | |
| 116 | # check pods only when it is explicitly wanted (judging readiness by deployment status) |
| 117 | if verbosity > 1: |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 118 | pods = k8s.get_resources('api/v1', 'pods') |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 119 | unready_pods = chain.from_iterable( |
| 120 | get_names(not_ready_pods( |
| 121 | pods_by_parent(pods, x))) |
| 122 | for x in not_ready_controllers) |
| 123 | else: |
| 124 | unready_pods = [] |
| 125 | |
| 126 | print_status(verbosity, k8s_controllers, unready_pods) |
| 127 | return not not_ready_controllers |
| 128 | |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 129 | def check_in_loop(k8s, max_time, sleep_time, verbosity): |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 130 | max_end_time = datetime.datetime.now() + datetime.timedelta(minutes=max_time) |
| 131 | ready = False |
| 132 | while datetime.datetime.now() < max_end_time: |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 133 | ready = check_readiness(k8s, verbosity) |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 134 | if ready: |
| 135 | return ready |
| 136 | sleep(sleep_time) |
| 137 | return ready |
| 138 | |
| 139 | def check_helm_releases(): |
Bartek Grzybowski | 4a0a39a | 2019-06-13 15:44:01 +0200 | [diff] [blame] | 140 | helm = check_output(['helm', 'ls']) |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 141 | if helm == '': |
| 142 | sys.exit('No Helm releases detected.') |
| 143 | helm_releases = csv.DictReader( |
| 144 | map(lambda x: x.replace(' ', ''), helm.split('\n')), |
| 145 | delimiter='\t') |
| 146 | failed_releases = [release['NAME'] for release in helm_releases |
| 147 | if release['STATUS'] == 'FAILED'] |
| 148 | return helm, failed_releases |
| 149 | |
| 150 | |
| 151 | def create_ready_string(ready, total, prefix): |
| 152 | return '{:12} {}/{}'.format(prefix, ready, total) |
| 153 | |
| 154 | def print_status(verbosity, resources, not_ready_pods): |
| 155 | ready_strings = [] |
| 156 | ready = {k: v['ready_count'] for k,v in resources.items()} |
| 157 | count = {k: v['total_count'] for k,v in resources.items()} |
| 158 | if verbosity > 0: |
| 159 | ready_strings += [ |
| 160 | create_ready_string(ready[k], count[k], k.capitalize()) for k in ready |
| 161 | ] |
| 162 | total_ready = sum(ready.values()) |
| 163 | total_count = sum(count.values()) |
| 164 | ready_strings.append(create_ready_string(total_ready, total_count, 'Ready')) |
| 165 | status_strings = ['\n'.join(ready_strings)] |
| 166 | if verbosity > 1: |
| 167 | if not_ready_pods: |
| 168 | status_strings.append('\nWaiting for pods:\n{}'.format('\n'.join(not_ready_pods))) |
| 169 | else: |
| 170 | status_strings.append('\nAll pods are ready!') |
| 171 | print('\n'.join(status_strings), '\n') |
| 172 | |
| 173 | def parse_args(): |
Bartek Grzybowski | 8e9812a | 2019-05-28 15:59:05 +0200 | [diff] [blame] | 174 | parser = argparse.ArgumentParser(description='Monitor ONAP deployment progress', |
| 175 | formatter_class=argparse.ArgumentDefaultsHelpFormatter) |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 176 | parser.add_argument('--namespace', '-n', default='onap', |
| 177 | help='Kubernetes namespace of ONAP') |
| 178 | parser.add_argument('--server', '-s', help='address of Kubernetes cluster') |
| 179 | parser.add_argument('--kubeconfig', '-c', |
| 180 | default=expanduser('~') + '/.kube/config', |
| 181 | help='path to .kube/config file') |
| 182 | parser.add_argument('--health-path', '-hp', help='path to ONAP robot ete-k8s.sh') |
Bartek Grzybowski | 4a0a39a | 2019-06-13 15:44:01 +0200 | [diff] [blame] | 183 | parser.add_argument('--health-mode', '-hm', default='health', help='healthcheck mode') |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 184 | parser.add_argument('--no-helm', action='store_true', help='Do not check Helm') |
| 185 | parser.add_argument('--check-frequency', '-w', default=300, type=int, |
| 186 | help='time between readiness checks in seconds') |
| 187 | parser.add_argument('--max-time', '-t', default=120, type=int, |
| 188 | help='max time to run readiness checks in minutes') |
| 189 | parser.add_argument('--single-run', '-1', action='store_true', |
| 190 | help='run check loop only once') |
| 191 | parser.add_argument('-v', dest='verbosity', action='count', default=0, |
| 192 | help='increase output verbosity, e.g. -vv is more verbose than -v') |
Bartek Grzybowski | 929f090 | 2019-05-27 14:57:55 +0200 | [diff] [blame] | 193 | parser.add_argument('--no-ssl-auth', action='store_true', |
| 194 | help='Disable SSL certificate based authentication while connecting to server') |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 195 | |
| 196 | return parser.parse_args() |
| 197 | |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 198 | class Kubernetes: |
| 199 | '''Class exposing get_resources() routine for connecting to kube API. |
| 200 | It keeps all attributes required by that call as an internal |
| 201 | object state.''' |
| 202 | |
| 203 | requests.packages.urllib3.disable_warnings(InsecureRequestWarning) |
| 204 | |
| 205 | def __init__(self,args): |
| 206 | |
| 207 | self.config = args.kubeconfig |
| 208 | self.url = args.server if args.server is not None else \ |
| 209 | self._get_k8s_url() |
Bartek Grzybowski | 929f090 | 2019-05-27 14:57:55 +0200 | [diff] [blame] | 210 | self.no_ssl_auth = args.no_ssl_auth |
| 211 | self.certs = self._get_k8s_certs() if not self.no_ssl_auth else {} |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 212 | self.namespace = args.namespace |
| 213 | |
Bartek Grzybowski | 929f090 | 2019-05-27 14:57:55 +0200 | [diff] [blame] | 214 | # Setup tmp file with ca chain only if certs were gathered successfully |
| 215 | # and --no-ssl-auth wasn't set |
| 216 | if self.certs and not self.no_ssl_auth: |
| 217 | self._setup_cert_files() |
| 218 | |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 219 | def get_resources(self, api, kind): |
| 220 | '''Performs actual API call''' |
| 221 | url = '/'.join([self.url, api, 'namespaces', self.namespace, kind]) |
| 222 | try: |
Bartek Grzybowski | 929f090 | 2019-05-27 14:57:55 +0200 | [diff] [blame] | 223 | if self.no_ssl_auth: |
| 224 | req = requests.get(url, verify=False) |
| 225 | else: |
| 226 | req = requests.get(url, verify=self.crt_tmp_file.name, cert=self.crt_tmp_file.name) |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 227 | except requests.exceptions.ConnectionError as err: |
| 228 | sys.exit('Error: Could not connect to {}'.format(self.url)) |
| 229 | if req.status_code == 200: |
| 230 | json = req.json() |
| 231 | # kind is <resource>List in response so [:-4] removes 'List' from value |
| 232 | return add_resource_kind(json['items'], json['kind'][:-4]) |
| 233 | elif (req.status_code == 401): |
| 234 | sys.exit('Error: Server replied with "401 Unauthorized" while making connection') |
| 235 | else: |
| 236 | sys.exit("Error: There's been an unspecified issue while making a request to the API") |
| 237 | |
Bartek Grzybowski | 929f090 | 2019-05-27 14:57:55 +0200 | [diff] [blame] | 238 | def _setup_cert_files(self): |
| 239 | '''Helper funtion to setup named file for requests.get() call |
| 240 | in self.get_resources() which is able read certificate only |
| 241 | from file''' |
| 242 | ca_chain = NamedTemporaryFile() |
| 243 | for crt in self.certs.values(): |
| 244 | ca_chain.write(crt) |
| 245 | ca_chain.read() # flush the file buffer |
| 246 | self.crt_tmp_file = ca_chain |
| 247 | |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 248 | def _get_k8s_url(self): |
| 249 | # TODO: Get login info |
| 250 | with open(self.config) as f: |
| 251 | config = yaml.load(f) |
| 252 | # TODO: Support cluster by name |
| 253 | return config['clusters'][0]['cluster']['server'] |
| 254 | |
Bartek Grzybowski | 929f090 | 2019-05-27 14:57:55 +0200 | [diff] [blame] | 255 | def _get_k8s_certs(self): |
| 256 | '''Helper function to read and decode certificates from kube config''' |
| 257 | with open(self.config) as f: |
| 258 | config = yaml.load(f) |
| 259 | certs = {} |
| 260 | try: |
| 261 | certs.update(dict(ca_cert=b64decode( |
| 262 | config['clusters'][0]['cluster']['certificate-authority-data']))) |
| 263 | certs.update(dict(client_cert=b64decode( |
| 264 | config['users'][0]['user']['client-certificate-data']))) |
| 265 | certs.update(dict(client_key=b64decode( |
| 266 | config['users'][0]['user']['client-key-data']))) |
| 267 | except KeyError as err: |
| 268 | print('Warning: could not get Kubernetes config for certificates. ' \ |
| 269 | 'Turning off SSL authentication.') |
| 270 | self.no_ssl_auth = True |
| 271 | return certs |
| 272 | |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 273 | def main(): |
| 274 | args = parse_args() |
| 275 | |
| 276 | if not args.no_helm: |
| 277 | try: |
| 278 | helm_output, failed_releases = check_helm_releases() |
| 279 | if failed_releases: |
| 280 | print('Deployment of {} failed.'.format(','.join(failed_releases))) |
| 281 | sys.exit(1) |
| 282 | elif args.verbosity > 1: |
| 283 | print(helm_output) |
| 284 | except IOError as err: |
| 285 | sys.exit(err.strerror) |
| 286 | |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 287 | k8s = Kubernetes(args) |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 288 | |
| 289 | ready = False |
| 290 | if args.single_run: |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 291 | ready = check_readiness(k8s, args.verbosity) |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 292 | else: |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 293 | if not check_in_loop(k8s, args.max_time, args.check_frequency, args.verbosity): |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 294 | # Double-check last 5 minutes and write verbosely in case it is not ready |
Bartek Grzybowski | c2b38a5 | 2019-05-27 14:39:26 +0200 | [diff] [blame] | 295 | ready = check_readiness(k8s, 2) |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 296 | |
| 297 | if args.health_path is not None: |
Bartek Grzybowski | bca8435 | 2019-05-30 10:12:36 +0200 | [diff] [blame] | 298 | hc_rc = exec_healthcheck(args.health_path, args.namespace, args.health_mode) |
| 299 | if hc_rc: |
| 300 | sys.exit(hc_rc) |
Milan Verespej | 1a23047 | 2019-03-20 13:51:40 +0100 | [diff] [blame] | 301 | |
| 302 | if not ready: |
| 303 | sys.exit('Deployment is not ready') |
| 304 | |
| 305 | if __name__ == '__main__': |
| 306 | main() |