Lott, Christopher (cl778h) | 81084bc | 2020-06-01 20:53:12 -0400 | [diff] [blame] | 1 | # ================================================================================== |
| 2 | # Copyright (c) 2020 AT&T Intellectual Property. |
| 3 | # Copyright (c) 2020 Nokia |
| 4 | # |
| 5 | # Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | # you may not use this file except in compliance with the License. |
| 7 | # You may obtain a copy of the License at |
| 8 | # |
| 9 | # http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | # |
| 11 | # Unless required by applicable law or agreed to in writing, software |
| 12 | # distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | # See the License for the specific language governing permissions and |
| 15 | # limitations under the License. |
| 16 | # ================================================================================== |
| 17 | """ |
| 18 | Provides classes and methods to define, raise, reraise and clear alarms. |
Lott, Christopher (cl778h) | a03c517 | 2020-07-06 15:13:07 -0400 | [diff] [blame] | 19 | All actions are implemented by sending RMR messages to the Alarm Adapter. |
| 20 | The alarm target host and port are set by environment variables. The alarm |
| 21 | message contents comply with the JSON schema in file alarm-schema.json. |
Lott, Christopher (cl778h) | 81084bc | 2020-06-01 20:53:12 -0400 | [diff] [blame] | 22 | """ |
| 23 | |
| 24 | from ctypes import c_void_p |
| 25 | from enum import Enum, auto |
| 26 | import json |
Lott, Christopher (cl778h) | a03c517 | 2020-07-06 15:13:07 -0400 | [diff] [blame] | 27 | import os |
Lott, Christopher (cl778h) | 81084bc | 2020-06-01 20:53:12 -0400 | [diff] [blame] | 28 | import time |
| 29 | from mdclogpy import Logger |
| 30 | from ricxappframe.rmr import rmr |
Lott, Christopher (cl778h) | a03c517 | 2020-07-06 15:13:07 -0400 | [diff] [blame] | 31 | from ricxappframe.alarm.exceptions import InitFailed |
Lott, Christopher (cl778h) | 81084bc | 2020-06-01 20:53:12 -0400 | [diff] [blame] | 32 | |
| 33 | ############## |
| 34 | # PRIVATE API |
| 35 | ############## |
| 36 | |
| 37 | mdc_logger = Logger(name=__name__) |
| 38 | RETRIES = 4 |
| 39 | |
| 40 | ############## |
| 41 | # PUBLIC API |
| 42 | ############## |
| 43 | |
| 44 | # constants |
Lott, Christopher (cl778h) | 42aa0da | 2020-07-07 05:56:07 -0400 | [diff] [blame] | 45 | RIC_ALARM_UPDATE = 110 # message type |
Lott, Christopher (cl778h) | a03c517 | 2020-07-06 15:13:07 -0400 | [diff] [blame] | 46 | ALARM_MGR_SERVICE_NAME_ENV = "ALARM_MGR_SERVICE_NAME" |
| 47 | ALARM_MGR_SERVICE_PORT_ENV = "ALARM_MGR_SERVICE_PORT" |
Lott, Christopher (cl778h) | 81084bc | 2020-06-01 20:53:12 -0400 | [diff] [blame] | 48 | |
| 49 | # Publish dict keys as constants for convenience of client code. |
| 50 | # Mixed lower/upper casing to comply with the Adapter JSON requirements. |
| 51 | KEY_ALARM = "alarm" |
| 52 | KEY_MANAGED_OBJECT_ID = "managedObjectId" |
| 53 | KEY_APPLICATION_ID = "applicationId" |
| 54 | KEY_SPECIFIC_PROBLEM = "specificProblem" |
| 55 | KEY_PERCEIVED_SEVERITY = "perceivedSeverity" |
| 56 | KEY_ADDITIONAL_INFO = "additionalInfo" |
| 57 | KEY_IDENTIFYING_INFO = "identifyingInfo" |
| 58 | KEY_ALARM_ACTION = "AlarmAction" |
| 59 | KEY_ALARM_TIME = "AlarmTime" |
| 60 | |
| 61 | |
| 62 | class AlarmAction(Enum): |
| 63 | """ |
| 64 | Action to perform at the Alarm Adapter |
| 65 | """ |
| 66 | RAISE = auto() |
| 67 | CLEAR = auto() |
| 68 | CLEARALL = auto() |
| 69 | |
| 70 | |
| 71 | class AlarmSeverity(Enum): |
| 72 | """ |
| 73 | Severity of an alarm |
| 74 | """ |
| 75 | UNSPECIFIED = auto() |
| 76 | CRITICAL = auto() |
| 77 | MAJOR = auto() |
| 78 | MINOR = auto() |
| 79 | WARNING = auto() |
| 80 | CLEARED = auto() |
| 81 | DEFAULT = auto() |
| 82 | |
| 83 | |
| 84 | class AlarmDetail(dict): |
| 85 | """ |
| 86 | An alarm that can be raised or cleared. |
| 87 | |
| 88 | Parameters |
| 89 | ---------- |
| 90 | managed_object_id: str |
| 91 | The name of the managed object that is the cause of the fault (required) |
| 92 | |
| 93 | application_id: str |
| 94 | The name of the process that raised the alarm (required) |
| 95 | |
| 96 | specific_problem: int |
| 97 | The problem that is the cause of the alarm |
| 98 | |
| 99 | perceived_severity: AlarmSeverity |
| 100 | The severity of the alarm, a value from the enum. |
| 101 | |
| 102 | identifying_info: str |
| 103 | Identifying additional information, which is part of alarm identity |
| 104 | |
| 105 | additional_info: str |
| 106 | Additional information given by the application (optional) |
| 107 | """ |
| 108 | # pylint: disable=too-many-arguments |
| 109 | def __init__(self, |
| 110 | managed_object_id: str, |
| 111 | application_id: str, |
| 112 | specific_problem: int, |
| 113 | perceived_severity: AlarmSeverity, |
| 114 | identifying_info: str, |
| 115 | additional_info: str = ""): |
| 116 | """ |
| 117 | Creates an object with the specified items. |
| 118 | """ |
| 119 | dict.__init__(self) |
| 120 | self[KEY_MANAGED_OBJECT_ID] = managed_object_id |
| 121 | self[KEY_APPLICATION_ID] = application_id |
| 122 | self[KEY_SPECIFIC_PROBLEM] = specific_problem |
| 123 | self[KEY_PERCEIVED_SEVERITY] = perceived_severity.name |
| 124 | self[KEY_IDENTIFYING_INFO] = identifying_info |
| 125 | self[KEY_ADDITIONAL_INFO] = additional_info |
| 126 | |
| 127 | |
| 128 | class AlarmManager: |
| 129 | """ |
| 130 | Provides an API for an Xapp to raise and clear alarms by sending messages |
Lott, Christopher (cl778h) | a03c517 | 2020-07-06 15:13:07 -0400 | [diff] [blame] | 131 | via RMR directly to an Alarm Adapter. Requires environment variables |
| 132 | ALARM_MGR_SERVICE_NAME and ALARM_MGR_SERVICE_PORT with the destination host |
| 133 | (service) name and port number; raises an exception if not found. |
Lott, Christopher (cl778h) | 81084bc | 2020-06-01 20:53:12 -0400 | [diff] [blame] | 134 | |
| 135 | Parameters |
| 136 | ---------- |
| 137 | vctx: ctypes c_void_p |
| 138 | Pointer to RMR context obtained by initializing RMR. |
| 139 | The context is used to allocate space and send messages. |
Lott, Christopher (cl778h) | 81084bc | 2020-06-01 20:53:12 -0400 | [diff] [blame] | 140 | |
| 141 | managed_object_id: str |
| 142 | The name of the managed object that raises alarms |
| 143 | |
| 144 | application_id: str |
| 145 | The name of the process that raises alarms |
| 146 | """ |
| 147 | def __init__(self, |
| 148 | vctx: c_void_p, |
| 149 | managed_object_id: str, |
| 150 | application_id: str): |
| 151 | """ |
| 152 | Creates an alarm manager. |
| 153 | """ |
| 154 | self.vctx = vctx |
| 155 | self.managed_object_id = managed_object_id |
| 156 | self.application_id = application_id |
Lott, Christopher (cl778h) | a03c517 | 2020-07-06 15:13:07 -0400 | [diff] [blame] | 157 | service = os.environ.get(ALARM_MGR_SERVICE_NAME_ENV, None) |
| 158 | port = os.environ.get(ALARM_MGR_SERVICE_PORT_ENV, None) |
| 159 | if service is None or port is None: |
| 160 | mdc_logger.error("init: missing env var(s) {0}, {1}".format(ALARM_MGR_SERVICE_NAME_ENV, ALARM_MGR_SERVICE_PORT_ENV)) |
| 161 | raise InitFailed |
| 162 | target = "{0}:{1}".format(service, port) |
| 163 | self._wormhole_id = rmr.rmr_wh_open(self.vctx, target.encode('utf-8')) |
| 164 | if rmr.rmr_wh_state(self.vctx, self._wormhole_id) != rmr.RMR_OK: |
| 165 | mdc_logger.error("init: failed to open wormhole to target {}".format(target)) |
| 166 | raise InitFailed |
Lott, Christopher (cl778h) | 81084bc | 2020-06-01 20:53:12 -0400 | [diff] [blame] | 167 | |
| 168 | def create_alarm(self, |
| 169 | specific_problem: int, |
| 170 | perceived_severity: AlarmSeverity, |
| 171 | identifying_info: str, |
| 172 | additional_info: str = ""): |
| 173 | """ |
| 174 | Convenience method that creates an alarm instance, an AlarmDetail object, |
Lott, Christopher (cl778h) | a03c517 | 2020-07-06 15:13:07 -0400 | [diff] [blame] | 175 | using cached values for the managed object ID and application ID. |
Lott, Christopher (cl778h) | 81084bc | 2020-06-01 20:53:12 -0400 | [diff] [blame] | 176 | |
| 177 | Parameters |
| 178 | ---------- |
| 179 | specific_problem: int |
| 180 | The problem that is the cause of the alarm |
| 181 | |
| 182 | perceived_severity: AlarmSeverity |
| 183 | The severity of the alarm, a value from the enum. |
| 184 | |
| 185 | identifying_info: str |
| 186 | Identifying additional information, which is part of alarm identity |
| 187 | |
| 188 | additional_info: str |
| 189 | Additional information given by the application (optional) |
| 190 | |
| 191 | Returns |
| 192 | ------- |
| 193 | AlarmDetail |
| 194 | """ |
| 195 | return AlarmDetail(managed_object_id=self.managed_object_id, |
| 196 | application_id=self.application_id, |
| 197 | specific_problem=specific_problem, perceived_severity=perceived_severity, |
| 198 | identifying_info=identifying_info, additional_info=additional_info) |
| 199 | |
| 200 | @staticmethod |
| 201 | def _create_alarm_message(alarm: AlarmDetail, action: AlarmAction): |
| 202 | """ |
| 203 | Creates a dict with the specified alarm detail plus action and time. |
| 204 | Uses the current system time in milliseconds since the Epoch. |
| 205 | |
| 206 | Parameters |
| 207 | ---------- |
| 208 | detail: AlarmDetail |
| 209 | The alarm details. |
| 210 | |
| 211 | action: AlarmAction |
| 212 | The action to perform at the Alarm Adapter on this alarm. |
| 213 | """ |
| 214 | return { |
| 215 | **alarm, |
| 216 | KEY_ALARM_ACTION: action.name, |
| 217 | KEY_ALARM_TIME: int(round(time.time() * 1000)) |
| 218 | } |
| 219 | |
| 220 | def _rmr_send_alarm(self, msg: dict): |
| 221 | """ |
Lott, Christopher (cl778h) | a03c517 | 2020-07-06 15:13:07 -0400 | [diff] [blame] | 222 | Serializes the dict and sends the result via RMR using a predefined message |
| 223 | type to the wormhole initialized at start. |
Lott, Christopher (cl778h) | 81084bc | 2020-06-01 20:53:12 -0400 | [diff] [blame] | 224 | |
| 225 | Parameters |
| 226 | ---------- |
| 227 | msg: dict |
| 228 | Dictionary with alarm message to encode and send |
| 229 | |
| 230 | Returns |
| 231 | ------- |
| 232 | bool |
| 233 | True if the send succeeded (possibly with retries), False otherwise |
| 234 | """ |
| 235 | payload = json.dumps(msg).encode() |
| 236 | mdc_logger.debug("_rmr_send_alarm: payload is {}".format(payload)) |
| 237 | sbuf = rmr.rmr_alloc_msg(vctx=self.vctx, size=len(payload), payload=payload, |
| 238 | mtype=RIC_ALARM_UPDATE, gen_transaction_id=True) |
| 239 | |
| 240 | for _ in range(0, RETRIES): |
Lott, Christopher (cl778h) | a03c517 | 2020-07-06 15:13:07 -0400 | [diff] [blame] | 241 | sbuf = rmr.rmr_wh_send_msg(self.vctx, self._wormhole_id, sbuf) |
Lott, Christopher (cl778h) | 81084bc | 2020-06-01 20:53:12 -0400 | [diff] [blame] | 242 | post_send_summary = rmr.message_summary(sbuf) |
| 243 | mdc_logger.debug("_rmr_send_alarm: try {0} result is {1}".format(_, post_send_summary[rmr.RMR_MS_MSG_STATE])) |
| 244 | # stop trying if RMR does not indicate retry |
| 245 | if post_send_summary[rmr.RMR_MS_MSG_STATE] != rmr.RMR_ERR_RETRY: |
| 246 | break |
| 247 | |
| 248 | rmr.rmr_free_msg(sbuf) |
| 249 | if post_send_summary[rmr.RMR_MS_MSG_STATE] != rmr.RMR_OK: |
| 250 | mdc_logger.warning("_rmr_send_alarm: failed after {} retries".format(RETRIES)) |
| 251 | return False |
| 252 | |
| 253 | return True |
| 254 | |
| 255 | def raise_alarm(self, detail: AlarmDetail): |
| 256 | """ |
| 257 | Builds and sends a message to the AlarmAdapter to raise an alarm |
| 258 | with the specified detail. |
| 259 | |
| 260 | Parameters |
| 261 | ---------- |
| 262 | detail: AlarmDetail |
| 263 | Alarm to raise |
| 264 | |
| 265 | Returns |
| 266 | ------- |
| 267 | bool |
| 268 | True if the send succeeded (possibly with retries), False otherwise |
| 269 | """ |
| 270 | msg = self._create_alarm_message(detail, AlarmAction.RAISE) |
| 271 | return self._rmr_send_alarm(msg) |
| 272 | |
| 273 | def clear_alarm(self, detail: AlarmDetail): |
| 274 | """ |
| 275 | Builds and sends a message to the AlarmAdapter to clear the alarm |
| 276 | with the specified detail. |
| 277 | |
| 278 | Parameters |
| 279 | ---------- |
| 280 | detail: AlarmDetail |
| 281 | Alarm to clear |
| 282 | |
| 283 | Returns |
| 284 | ------- |
| 285 | bool |
| 286 | True if the send succeeded (possibly with retries), False otherwise |
| 287 | """ |
| 288 | msg = self._create_alarm_message(detail, AlarmAction.CLEAR) |
| 289 | return self._rmr_send_alarm(msg) |
| 290 | |
| 291 | def reraise_alarm(self, detail: AlarmDetail): |
| 292 | """ |
| 293 | Builds and sends a message to the AlarmAdapter to clear the alarm with the |
| 294 | the specified detail, then builds and sends a message to raise the alarm again. |
| 295 | |
| 296 | Parameters |
| 297 | ---------- |
| 298 | detail: AlarmDetail |
| 299 | Alarm to clear and raise again. |
| 300 | |
| 301 | Returns |
| 302 | ------- |
| 303 | bool |
| 304 | True if the send succeeded (possibly with retries), False otherwise |
| 305 | """ |
| 306 | success = self.clear_alarm(detail) |
| 307 | if success: |
| 308 | success = self.raise_alarm(detail) |
| 309 | return success |
| 310 | |
| 311 | def clear_all_alarms(self): |
| 312 | """ |
| 313 | Builds and sends a message to the AlarmAdapter to clear all alarms. |
| 314 | |
| 315 | Returns |
| 316 | ------- |
| 317 | bool |
| 318 | True if the send succeeded (possibly with retries), False otherwise |
| 319 | """ |
| 320 | detail = self.create_alarm(0, AlarmSeverity.DEFAULT, "", "") |
| 321 | msg = self._create_alarm_message(detail, AlarmAction.CLEARALL) |
| 322 | return self._rmr_send_alarm(msg) |