blob: 3711165783a19baf8b59abfe6e242f55b336c6df [file] [log] [blame]
Lott, Christopher (cl778h)81084bc2020-06-01 20:53:12 -04001# ==================================================================================
2# Copyright (c) 2020 AT&T Intellectual Property.
3# Copyright (c) 2020 Nokia
4#
5# Licensed under the Apache License, Version 2.0 (the "License");
6# you may not use this file except in compliance with the License.
7# You may obtain a copy of the License at
8#
9# http://www.apache.org/licenses/LICENSE-2.0
10#
11# Unless required by applicable law or agreed to in writing, software
12# distributed under the License is distributed on an "AS IS" BASIS,
13# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14# See the License for the specific language governing permissions and
15# limitations under the License.
16# ==================================================================================
17"""
18Provides classes and methods to define, raise, reraise and clear alarms.
Lott, Christopher (cl778h)a03c5172020-07-06 15:13:07 -040019All actions are implemented by sending RMR messages to the Alarm Adapter.
20The alarm target host and port are set by environment variables. The alarm
21message contents comply with the JSON schema in file alarm-schema.json.
Lott, Christopher (cl778h)81084bc2020-06-01 20:53:12 -040022"""
23
24from ctypes import c_void_p
25from enum import Enum, auto
26import json
Lott, Christopher (cl778h)a03c5172020-07-06 15:13:07 -040027import os
Lott, Christopher (cl778h)81084bc2020-06-01 20:53:12 -040028import time
29from mdclogpy import Logger
30from ricxappframe.rmr import rmr
Lott, Christopher (cl778h)a03c5172020-07-06 15:13:07 -040031from ricxappframe.alarm.exceptions import InitFailed
Lott, Christopher (cl778h)81084bc2020-06-01 20:53:12 -040032
33##############
34# PRIVATE API
35##############
36
37mdc_logger = Logger(name=__name__)
38RETRIES = 4
39
40##############
41# PUBLIC API
42##############
43
44# constants
Lott, Christopher (cl778h)37f008e2020-06-22 16:16:48 -040045RIC_ALARM_UPDATE = 110
Lott, Christopher (cl778h)a03c5172020-07-06 15:13:07 -040046ALARM_MGR_SERVICE_NAME_ENV = "ALARM_MGR_SERVICE_NAME"
47ALARM_MGR_SERVICE_PORT_ENV = "ALARM_MGR_SERVICE_PORT"
Lott, Christopher (cl778h)81084bc2020-06-01 20:53:12 -040048
49# Publish dict keys as constants for convenience of client code.
50# Mixed lower/upper casing to comply with the Adapter JSON requirements.
51KEY_ALARM = "alarm"
52KEY_MANAGED_OBJECT_ID = "managedObjectId"
53KEY_APPLICATION_ID = "applicationId"
54KEY_SPECIFIC_PROBLEM = "specificProblem"
55KEY_PERCEIVED_SEVERITY = "perceivedSeverity"
56KEY_ADDITIONAL_INFO = "additionalInfo"
57KEY_IDENTIFYING_INFO = "identifyingInfo"
58KEY_ALARM_ACTION = "AlarmAction"
59KEY_ALARM_TIME = "AlarmTime"
60
61
62class AlarmAction(Enum):
63 """
64 Action to perform at the Alarm Adapter
65 """
66 RAISE = auto()
67 CLEAR = auto()
68 CLEARALL = auto()
69
70
71class AlarmSeverity(Enum):
72 """
73 Severity of an alarm
74 """
75 UNSPECIFIED = auto()
76 CRITICAL = auto()
77 MAJOR = auto()
78 MINOR = auto()
79 WARNING = auto()
80 CLEARED = auto()
81 DEFAULT = auto()
82
83
84class AlarmDetail(dict):
85 """
86 An alarm that can be raised or cleared.
87
88 Parameters
89 ----------
90 managed_object_id: str
91 The name of the managed object that is the cause of the fault (required)
92
93 application_id: str
94 The name of the process that raised the alarm (required)
95
96 specific_problem: int
97 The problem that is the cause of the alarm
98
99 perceived_severity: AlarmSeverity
100 The severity of the alarm, a value from the enum.
101
102 identifying_info: str
103 Identifying additional information, which is part of alarm identity
104
105 additional_info: str
106 Additional information given by the application (optional)
107 """
108 # pylint: disable=too-many-arguments
109 def __init__(self,
110 managed_object_id: str,
111 application_id: str,
112 specific_problem: int,
113 perceived_severity: AlarmSeverity,
114 identifying_info: str,
115 additional_info: str = ""):
116 """
117 Creates an object with the specified items.
118 """
119 dict.__init__(self)
120 self[KEY_MANAGED_OBJECT_ID] = managed_object_id
121 self[KEY_APPLICATION_ID] = application_id
122 self[KEY_SPECIFIC_PROBLEM] = specific_problem
123 self[KEY_PERCEIVED_SEVERITY] = perceived_severity.name
124 self[KEY_IDENTIFYING_INFO] = identifying_info
125 self[KEY_ADDITIONAL_INFO] = additional_info
126
127
128class AlarmManager:
129 """
130 Provides an API for an Xapp to raise and clear alarms by sending messages
Lott, Christopher (cl778h)a03c5172020-07-06 15:13:07 -0400131 via RMR directly to an Alarm Adapter. Requires environment variables
132 ALARM_MGR_SERVICE_NAME and ALARM_MGR_SERVICE_PORT with the destination host
133 (service) name and port number; raises an exception if not found.
Lott, Christopher (cl778h)81084bc2020-06-01 20:53:12 -0400134
135 Parameters
136 ----------
137 vctx: ctypes c_void_p
138 Pointer to RMR context obtained by initializing RMR.
139 The context is used to allocate space and send messages.
Lott, Christopher (cl778h)81084bc2020-06-01 20:53:12 -0400140
141 managed_object_id: str
142 The name of the managed object that raises alarms
143
144 application_id: str
145 The name of the process that raises alarms
146 """
147 def __init__(self,
148 vctx: c_void_p,
149 managed_object_id: str,
150 application_id: str):
151 """
152 Creates an alarm manager.
153 """
154 self.vctx = vctx
155 self.managed_object_id = managed_object_id
156 self.application_id = application_id
Lott, Christopher (cl778h)a03c5172020-07-06 15:13:07 -0400157 service = os.environ.get(ALARM_MGR_SERVICE_NAME_ENV, None)
158 port = os.environ.get(ALARM_MGR_SERVICE_PORT_ENV, None)
159 if service is None or port is None:
160 mdc_logger.error("init: missing env var(s) {0}, {1}".format(ALARM_MGR_SERVICE_NAME_ENV, ALARM_MGR_SERVICE_PORT_ENV))
161 raise InitFailed
162 target = "{0}:{1}".format(service, port)
163 self._wormhole_id = rmr.rmr_wh_open(self.vctx, target.encode('utf-8'))
164 if rmr.rmr_wh_state(self.vctx, self._wormhole_id) != rmr.RMR_OK:
165 mdc_logger.error("init: failed to open wormhole to target {}".format(target))
166 raise InitFailed
Lott, Christopher (cl778h)81084bc2020-06-01 20:53:12 -0400167
168 def create_alarm(self,
169 specific_problem: int,
170 perceived_severity: AlarmSeverity,
171 identifying_info: str,
172 additional_info: str = ""):
173 """
174 Convenience method that creates an alarm instance, an AlarmDetail object,
Lott, Christopher (cl778h)a03c5172020-07-06 15:13:07 -0400175 using cached values for the managed object ID and application ID.
Lott, Christopher (cl778h)81084bc2020-06-01 20:53:12 -0400176
177 Parameters
178 ----------
179 specific_problem: int
180 The problem that is the cause of the alarm
181
182 perceived_severity: AlarmSeverity
183 The severity of the alarm, a value from the enum.
184
185 identifying_info: str
186 Identifying additional information, which is part of alarm identity
187
188 additional_info: str
189 Additional information given by the application (optional)
190
191 Returns
192 -------
193 AlarmDetail
194 """
195 return AlarmDetail(managed_object_id=self.managed_object_id,
196 application_id=self.application_id,
197 specific_problem=specific_problem, perceived_severity=perceived_severity,
198 identifying_info=identifying_info, additional_info=additional_info)
199
200 @staticmethod
201 def _create_alarm_message(alarm: AlarmDetail, action: AlarmAction):
202 """
203 Creates a dict with the specified alarm detail plus action and time.
204 Uses the current system time in milliseconds since the Epoch.
205
206 Parameters
207 ----------
208 detail: AlarmDetail
209 The alarm details.
210
211 action: AlarmAction
212 The action to perform at the Alarm Adapter on this alarm.
213 """
214 return {
215 **alarm,
216 KEY_ALARM_ACTION: action.name,
217 KEY_ALARM_TIME: int(round(time.time() * 1000))
218 }
219
220 def _rmr_send_alarm(self, msg: dict):
221 """
Lott, Christopher (cl778h)a03c5172020-07-06 15:13:07 -0400222 Serializes the dict and sends the result via RMR using a predefined message
223 type to the wormhole initialized at start.
Lott, Christopher (cl778h)81084bc2020-06-01 20:53:12 -0400224
225 Parameters
226 ----------
227 msg: dict
228 Dictionary with alarm message to encode and send
229
230 Returns
231 -------
232 bool
233 True if the send succeeded (possibly with retries), False otherwise
234 """
235 payload = json.dumps(msg).encode()
236 mdc_logger.debug("_rmr_send_alarm: payload is {}".format(payload))
237 sbuf = rmr.rmr_alloc_msg(vctx=self.vctx, size=len(payload), payload=payload,
238 mtype=RIC_ALARM_UPDATE, gen_transaction_id=True)
239
240 for _ in range(0, RETRIES):
Lott, Christopher (cl778h)a03c5172020-07-06 15:13:07 -0400241 sbuf = rmr.rmr_wh_send_msg(self.vctx, self._wormhole_id, sbuf)
Lott, Christopher (cl778h)81084bc2020-06-01 20:53:12 -0400242 post_send_summary = rmr.message_summary(sbuf)
243 mdc_logger.debug("_rmr_send_alarm: try {0} result is {1}".format(_, post_send_summary[rmr.RMR_MS_MSG_STATE]))
244 # stop trying if RMR does not indicate retry
245 if post_send_summary[rmr.RMR_MS_MSG_STATE] != rmr.RMR_ERR_RETRY:
246 break
247
248 rmr.rmr_free_msg(sbuf)
249 if post_send_summary[rmr.RMR_MS_MSG_STATE] != rmr.RMR_OK:
250 mdc_logger.warning("_rmr_send_alarm: failed after {} retries".format(RETRIES))
251 return False
252
253 return True
254
255 def raise_alarm(self, detail: AlarmDetail):
256 """
257 Builds and sends a message to the AlarmAdapter to raise an alarm
258 with the specified detail.
259
260 Parameters
261 ----------
262 detail: AlarmDetail
263 Alarm to raise
264
265 Returns
266 -------
267 bool
268 True if the send succeeded (possibly with retries), False otherwise
269 """
270 msg = self._create_alarm_message(detail, AlarmAction.RAISE)
271 return self._rmr_send_alarm(msg)
272
273 def clear_alarm(self, detail: AlarmDetail):
274 """
275 Builds and sends a message to the AlarmAdapter to clear the alarm
276 with the specified detail.
277
278 Parameters
279 ----------
280 detail: AlarmDetail
281 Alarm to clear
282
283 Returns
284 -------
285 bool
286 True if the send succeeded (possibly with retries), False otherwise
287 """
288 msg = self._create_alarm_message(detail, AlarmAction.CLEAR)
289 return self._rmr_send_alarm(msg)
290
291 def reraise_alarm(self, detail: AlarmDetail):
292 """
293 Builds and sends a message to the AlarmAdapter to clear the alarm with the
294 the specified detail, then builds and sends a message to raise the alarm again.
295
296 Parameters
297 ----------
298 detail: AlarmDetail
299 Alarm to clear and raise again.
300
301 Returns
302 -------
303 bool
304 True if the send succeeded (possibly with retries), False otherwise
305 """
306 success = self.clear_alarm(detail)
307 if success:
308 success = self.raise_alarm(detail)
309 return success
310
311 def clear_all_alarms(self):
312 """
313 Builds and sends a message to the AlarmAdapter to clear all alarms.
314
315 Returns
316 -------
317 bool
318 True if the send succeeded (possibly with retries), False otherwise
319 """
320 detail = self.create_alarm(0, AlarmSeverity.DEFAULT, "", "")
321 msg = self._create_alarm_message(detail, AlarmAction.CLEARALL)
322 return self._rmr_send_alarm(msg)