blob: 660d6372c9065daf751f21e80850a401f0403bdf [file] [log] [blame]
Mohamed Abukar3e038152020-03-04 10:01:45 +02001/*
2 * Copyright (c) 2020 AT&T Intellectual Property.
3 * Copyright (c) 2020 Nokia.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 * This source code is part of the near-RT RIC (RAN Intelligent Controller)
18 * platform project (RICP).
19 */
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020020
Mohamed Abukar3e038152020-03-04 10:01:45 +020021package main
22
23import (
Mohamed Abukar105030f2020-10-22 18:08:34 +030024 "bytes"
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020025 "encoding/json"
26 "fmt"
Anssi Mannila18fd03c2020-10-29 10:01:00 +020027 "io/ioutil"
28 "net/http"
29 "os"
30 "time"
31
vipin14323a92020-09-25 10:03:43 +000032 "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
33 app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020034 clientruntime "github.com/go-openapi/runtime/client"
35 "github.com/go-openapi/strfmt"
36 "github.com/prometheus/alertmanager/api/v2/client"
37 "github.com/prometheus/alertmanager/api/v2/client/alert"
38 "github.com/prometheus/alertmanager/api/v2/models"
39 "github.com/spf13/viper"
Mohamed Abukar3e038152020-03-04 10:01:45 +020040)
41
Mohamed Abukar3649fae2020-10-30 23:51:39 +020042func (a *AlarmManager) ClearExpiredAlarms(m AlarmNotification, idx int, mLocked bool) bool {
43 d, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]
44 if !ok || d.TimeToLive == 0 {
45 return false
46 }
47
48 elapsed := (time.Now().UnixNano() - m.AlarmTime) / 1e9
49 if int(elapsed) >= d.TimeToLive {
50 app.Logger.Info("Alarm (sp=%d id=%d) with TTL=%d expired, clearing ...", m.Alarm.SpecificProblem, m.AlarmId, d.TimeToLive)
51
52 m.AlarmAction = alarm.AlarmActionClear
53 m.AlarmTime = time.Now().UnixNano()
54
55 if !mLocked { // For testing purpose
56 a.mutex.Lock()
57 }
58 a.ProcessClearAlarm(&m, d, idx)
59 return true
60 }
61 return false
62}
63
64func (a *AlarmManager) StartTTLTimer(interval int) {
65 tick := time.Tick(time.Duration(interval) * time.Second)
66 for range tick {
67 a.mutex.Lock()
68 for idx, m := range a.activeAlarms {
69 if a.ClearExpiredAlarms(m, idx, true) {
70 a.mutex.Lock() // ClearExpiredAlarms unlocks the mutex, so re-lock here
71 continue
72 }
73 }
74 a.mutex.Unlock()
75 }
76}
77
Abukar Mohamed121e8b62020-09-18 11:41:33 +000078func (a *AlarmManager) StartAlertTimer() {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020079 tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
80 for range tick {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +020081 a.mutex.Lock()
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020082 for _, m := range a.activeAlarms {
83 app.Logger.Info("Re-raising alarm: %v", m)
vipinba2ef5b2020-11-06 11:24:48 +000084 a.PostAlert(a.GenerateAlertLabels(m.AlarmId, m.Alarm, AlertStatusActive, m.AlarmTime))
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020085 }
Mohamed Abukaraf0c5702020-03-11 10:29:40 +020086 a.mutex.Unlock()
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020087 }
88}
89
Abukar Mohamed121e8b62020-09-18 11:41:33 +000090func (a *AlarmManager) Consume(rp *app.RMRParams) (err error) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020091 app.Logger.Info("Message received!")
92
93 defer app.Rmr.Free(rp.Mbuf)
94 switch rp.Mtype {
95 case alarm.RIC_ALARM_UPDATE:
96 a.HandleAlarms(rp)
97 default:
98 app.Logger.Info("Unknown Message Type '%d', discarding", rp.Mtype)
99 }
100
101 return nil
102}
103
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000104func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, error) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200105 var m alarm.AlarmMessage
Lott, Christopher (cl778h)3e8e2aa2020-06-03 08:52:14 -0400106 app.Logger.Info("Received JSON: %s", rp.Payload)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200107 if err := json.Unmarshal(rp.Payload, &m); err != nil {
108 app.Logger.Error("json.Unmarshal failed: %v", err)
109 return nil, err
110 }
111 app.Logger.Info("newAlarm: %v", m)
112
Mohamed Abukar105030f2020-10-22 18:08:34 +0300113 return a.ProcessAlarm(&AlarmNotification{m, alarm.AlarmDefinition{}})
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300114}
115
Mohamed Abukar105030f2020-10-22 18:08:34 +0300116func (a *AlarmManager) ProcessAlarm(m *AlarmNotification) (*alert.PostAlertsOK, error) {
vipin6f73fa32020-10-06 06:51:53 +0000117 a.mutex.Lock()
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200118 alarmDef := &alarm.AlarmDefinition{}
119 var ok bool
120 if alarmDef, ok = alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300121 app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
vipin6f73fa32020-10-06 06:51:53 +0000122 a.mutex.Unlock()
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200123 return nil, nil
124 }
125
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200126 idx, found := a.IsMatchFound(m.Alarm)
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200127 // Suppress duplicate alarms
vipin4cedd502020-09-25 05:58:31 +0000128 if found && m.AlarmAction == alarm.AlarmActionRaise {
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300129 app.Logger.Info("Duplicate alarm found, suppressing ...")
Anssi Mannilafe07bd12020-09-24 14:02:57 +0300130 if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
131 // Duplicate with same severity found
vipin6f73fa32020-10-06 06:51:53 +0000132 a.mutex.Unlock()
Anssi Mannilafe07bd12020-09-24 14:02:57 +0300133 return nil, nil
134 } else {
135 // Remove duplicate with different severity
136 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
137 }
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200138 }
139
140 // Clear alarm if found from active alarm list
Mohamed Abukar2336a842020-10-30 16:19:38 +0200141 if found && m.AlarmAction == alarm.AlarmActionClear {
142 return a.ProcessClearAlarm(m, alarmDef, idx)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200143 }
144
145 // New alarm -> update active alarms and post to Alert Manager
146 if m.AlarmAction == alarm.AlarmActionRaise {
Mohamed Abukar2336a842020-10-30 16:19:38 +0200147 return a.ProcessRaiseAlarm(m, alarmDef)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200148 }
149
vipin6f73fa32020-10-06 06:51:53 +0000150 a.mutex.Unlock()
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200151 return nil, nil
152}
153
Mohamed Abukar2336a842020-10-30 16:19:38 +0200154func (a *AlarmManager) ProcessRaiseAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition) (*alert.PostAlertsOK, error) {
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200155 app.Logger.Debug("Raise alarmDef.RaiseDelay = %v, AlarmNotification = %v", alarmDef.RaiseDelay, *m)
Mohamed Abukar2336a842020-10-30 16:19:38 +0200156
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200157 // RaiseDelay > 0 in an alarm object in active alarm table indicates that raise delay is still ongoing for the alarm
158 m.AlarmDefinition.RaiseDelay = alarmDef.RaiseDelay
159 a.UpdateAlarmFields(a.GenerateAlarmId(), m)
160 a.UpdateActiveAlarmList(m)
161 a.mutex.Unlock()
Mohamed Abukar2336a842020-10-30 16:19:38 +0200162
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200163 if alarmDef.RaiseDelay > 0 {
164 timerDelay(alarmDef.RaiseDelay)
165 a.mutex.Lock()
166 // Alarm may have been deleted from active alarms table during delay or table index may have changed
167 idx, found := a.IsMatchFound(m.Alarm)
168 if found {
169 // Alarm is not showed in active alarms or alarm history via CLI before RaiseDelay has elapsed, i.e the value is 0
170 a.activeAlarms[idx].AlarmDefinition.RaiseDelay = 0
171 app.Logger.Debug("Raise after delay alarmDef.RaiseDelay = %v, AlarmNotification = %v", alarmDef.RaiseDelay, *m)
172 a.mutex.Unlock()
173 } else {
174 app.Logger.Debug("Alarm deleted during raise delay. AlarmNotification = %v", *m)
175 a.mutex.Unlock()
Mohamed Abukar2336a842020-10-30 16:19:38 +0200176 return nil, nil
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200177 }
178 }
Mohamed Abukar2336a842020-10-30 16:19:38 +0200179
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200180 m.AlarmDefinition.RaiseDelay = 0
181 a.UpdateAlarmHistoryList(m)
Mohamed Abukar2336a842020-10-30 16:19:38 +0200182 a.WriteAlarmInfoToPersistentVolume()
183
184 // Send alarm notification to NOMA, if enabled
185 if app.Config.GetBool("controls.noma.enabled") {
186 return a.PostAlarm(m)
187 }
vipinba2ef5b2020-11-06 11:24:48 +0000188 return a.PostAlert(a.GenerateAlertLabels(m.AlarmId, m.Alarm, AlertStatusActive, m.AlarmTime))
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200189}
190
Mohamed Abukar2336a842020-10-30 16:19:38 +0200191func (a *AlarmManager) ProcessClearAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition, idx int) (*alert.PostAlertsOK, error) {
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200192 app.Logger.Debug("Clear alarmDef.ClearDelay = %v, AlarmNotification = %v", alarmDef.ClearDelay, *m)
193 if alarmDef.ClearDelay > 0 {
194 a.mutex.Unlock()
195 timerDelay(alarmDef.ClearDelay)
196 app.Logger.Debug("Clear after delay alarmDef.ClearDelay = %v, AlarmNotification = %v", alarmDef.ClearDelay, *m)
197 a.mutex.Lock()
198 // Another alarm clear may have happened during delay and active alarms table index changed
199 var found bool
200 idx, found = a.IsMatchFound(m.Alarm)
201 if !found {
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200202 a.mutex.Unlock()
Mohamed Abukar2336a842020-10-30 16:19:38 +0200203 return nil, nil
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200204 }
205 }
206 a.UpdateAlarmFields(a.activeAlarms[idx].AlarmId, m)
207 a.alarmHistory = append(a.alarmHistory, *m)
208 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
209 if (len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false) {
210 app.Logger.Warn("alarm history count exceeded maxAlarmHistory threshold")
211 a.GenerateThresholdAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, "history")
212 }
213
214 if a.exceededActiveAlarmOn && m.Alarm.SpecificProblem == alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD {
215 a.exceededActiveAlarmOn = false
216 }
217
218 if a.exceededAlarmHistoryOn && m.Alarm.SpecificProblem == alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD {
219 a.exceededAlarmHistoryOn = false
220 }
Mohamed Abukar2336a842020-10-30 16:19:38 +0200221 a.WriteAlarmInfoToPersistentVolume()
222
223 a.mutex.Unlock()
224 if a.postClear && app.Config.GetBool("controls.noma.enabled") {
225 m.PerceivedSeverity = alarm.SeverityCleared
226 return a.PostAlarm(m)
227 }
228 return nil, nil
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200229}
230
231func timerDelay(delay int) {
232 timer := time.NewTimer(time.Duration(delay) * time.Second)
233 <-timer.C
234}
235
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000236func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200237 for i, m := range a.activeAlarms {
238 if m.ManagedObjectId == newAlarm.ManagedObjectId && m.ApplicationId == newAlarm.ApplicationId &&
Mohamed Abukar0c389732020-09-17 14:47:50 +0300239 m.SpecificProblem == newAlarm.SpecificProblem && m.IdentifyingInfo == newAlarm.IdentifyingInfo {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200240 return i, true
241 }
242 }
243 return -1, false
244}
245
Mohamed Abukar105030f2020-10-22 18:08:34 +0300246func (a *AlarmManager) RemoveAlarm(alarms []AlarmNotification, i int, listName string) []AlarmNotification {
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300247 app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200248 copy(alarms[i:], alarms[i+1:])
249 return alarms[:len(alarms)-1]
250}
251
Mohamed Abukar105030f2020-10-22 18:08:34 +0300252func (a *AlarmManager) GenerateAlarmId() int {
Mohamed Abukarf5a8e712020-10-19 16:58:17 +0300253 a.uniqueAlarmId++ // @todo: generate a unique ID
Mohamed Abukar105030f2020-10-22 18:08:34 +0300254 return a.uniqueAlarmId
255}
256
257func (a *AlarmManager) UpdateAlarmFields(alarmId int, newAlarm *AlarmNotification) {
258 alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
259 newAlarm.AlarmId = alarmId
Mohamed Abukarf5a8e712020-10-19 16:58:17 +0300260 newAlarm.AlarmText = alarmDef.AlarmText
261 newAlarm.EventType = alarmDef.EventType
262}
263
Mohamed Abukar105030f2020-10-22 18:08:34 +0300264func (a *AlarmManager) GenerateThresholdAlarm(sp int, data string) bool {
265 thresholdAlarm := a.alarmClient.NewAlarm(sp, alarm.SeverityWarning, "threshold", data)
266 thresholdMessage := alarm.AlarmMessage{
267 Alarm: thresholdAlarm,
268 AlarmAction: alarm.AlarmActionRaise,
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200269 AlarmTime: time.Now().UnixNano(),
Mohamed Abukar105030f2020-10-22 18:08:34 +0300270 }
vipin78b2b0a2020-10-28 10:10:18 +0000271 alarmDef := alarm.RICAlarmDefinitions[sp]
272 alarmId := a.GenerateAlarmId()
273 alarmDef.AlarmId = alarmId
274 a.activeAlarms = append(a.activeAlarms, AlarmNotification{thresholdMessage, *alarmDef})
275 a.alarmHistory = append(a.alarmHistory, AlarmNotification{thresholdMessage, *alarmDef})
Mohamed Abukar105030f2020-10-22 18:08:34 +0300276
277 return true
278}
279
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200280func (a *AlarmManager) UpdateActiveAlarmList(newAlarm *AlarmNotification) {
vipin541eb502020-09-22 12:04:59 +0000281 /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200282 The attempt to raise the alarm next time will be suppressed when found as duplicate. */
Mohamed Abukarf5a8e712020-10-19 16:58:17 +0300283 if (len(a.activeAlarms) >= a.maxActiveAlarms) && (a.exceededActiveAlarmOn == false) {
Mohamed Abukar105030f2020-10-22 18:08:34 +0300284 app.Logger.Warn("active alarm count exceeded maxActiveAlarms threshold")
285 a.exceededActiveAlarmOn = a.GenerateThresholdAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, "active")
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300286 }
287
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200288 // @todo: For now just keep the active alarms in-memory. Use SDL later for persistence
289 a.activeAlarms = append(a.activeAlarms, *newAlarm)
290}
291
292func (a *AlarmManager) UpdateAlarmHistoryList(newAlarm *AlarmNotification) {
293 /* If maximum number of events in alarm history is reached, an error log writing is made,
294 and new alarm indicating the problem is raised. The attempt to add new event time will
295 be suppressed */
296
Mohamed Abukarf5a8e712020-10-19 16:58:17 +0300297 if (len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false) {
Mohamed Abukar105030f2020-10-22 18:08:34 +0300298 app.Logger.Warn("alarm history count exceeded maxAlarmHistory threshold")
299 a.exceededAlarmHistoryOn = a.GenerateThresholdAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, "history")
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300300 }
301
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200302 // @todo: For now just keep the alarms history in-memory. Use SDL later for persistence
Mohamed Abukar0c389732020-09-17 14:47:50 +0300303 a.alarmHistory = append(a.alarmHistory, *newAlarm)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200304}
305
Mohamed Abukar105030f2020-10-22 18:08:34 +0300306func (a *AlarmManager) PostAlarm(m *AlarmNotification) (*alert.PostAlertsOK, error) {
307 result, err := json.Marshal(m)
308 if err != nil {
309 app.Logger.Info("json.Marshal failed: %v", err)
310 return nil, err
311 }
312
313 fullUrl := fmt.Sprintf("%s/%s", app.Config.GetString("controls.noma.host"), app.Config.GetString("controls.noma.alarmUrl"))
314 app.Logger.Info("Posting alarm to '%s'", fullUrl)
315
316 resp, err := http.Post(fullUrl, "application/json", bytes.NewReader(result))
317 if err != nil || resp == nil {
318 app.Logger.Info("Unable to post alarm to '%s': %v", fullUrl, err)
319 }
320
321 return nil, err
322}
323
vipinba2ef5b2020-11-06 11:24:48 +0000324func (a *AlarmManager) GenerateAlertLabels(alarmId int, newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200325 alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200326 amLabels := models.LabelSet{
Mohamed Abukarb2f29a82020-03-17 09:31:55 +0200327 "status": string(status),
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200328 "alertname": alarmDef.AlarmText,
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200329 "severity": string(newAlarm.PerceivedSeverity),
Mohamed Abukar105030f2020-10-22 18:08:34 +0300330 "service": fmt.Sprintf("%s/%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
Mohamed Abukar273c8e82021-10-21 10:02:07 +0300331 "info": newAlarm.IdentifyingInfo,
Mohamed Abukar105030f2020-10-22 18:08:34 +0300332 "system_name": "RIC",
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200333 }
334 amAnnotations := models.LabelSet{
vipinba2ef5b2020-11-06 11:24:48 +0000335 "alarm_id": fmt.Sprintf("%d", alarmId),
Mohamed Abukar105030f2020-10-22 18:08:34 +0300336 "specific_problem": fmt.Sprintf("%d", newAlarm.SpecificProblem),
337 "event_type": alarmDef.EventType,
338 "identifying_info": newAlarm.IdentifyingInfo,
339 "additional_info": newAlarm.AdditionalInfo,
340 "description": fmt.Sprintf("%s:%s", newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo),
341 "instructions": alarmDef.OperationInstructions,
342 "timestamp": fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200343 }
344
345 return amLabels, amAnnotations
346}
347
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000348func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200349 cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes)
350 return client.New(cr, strfmt.Default)
351}
352
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000353func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200354 pa := &models.PostableAlert{
355 Alert: models.Alert{
356 GeneratorURL: strfmt.URI(""),
357 Labels: amLabels,
358 },
359 Annotations: amAnnotations,
360 }
361 alertParams := alert.NewPostAlertsParams().WithAlerts(models.PostableAlerts{pa})
362
Mohamed Abukar643241f2020-06-09 15:26:00 +0300363 app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations)
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200364 ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams)
365 if err != nil {
366 app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err)
367 }
368 return ok, err
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200369}
370
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000371func (a *AlarmManager) StatusCB() bool {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200372 if !a.rmrReady {
373 app.Logger.Info("RMR not ready yet!")
374 }
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200375 return a.rmrReady
Mohamed Abukar3e038152020-03-04 10:01:45 +0200376}
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300377
vipin541eb502020-09-22 12:04:59 +0000378func (a *AlarmManager) ConfigChangeCB(configparam string) {
vipin541eb502020-09-22 12:04:59 +0000379 a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
Anssi Mannilaac56b892020-11-20 14:50:00 +0200380 if a.maxActiveAlarms == 0 {
381 a.maxActiveAlarms = 5000
382 }
383
vipin541eb502020-09-22 12:04:59 +0000384 a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
Anssi Mannilaac56b892020-11-20 14:50:00 +0200385 if a.maxAlarmHistory == 0 {
386 a.maxAlarmHistory = 20000
387 }
Mohamed Abukar2a74ea52021-03-07 10:10:58 +0200388
Anssi Mannila4450a892020-09-25 10:24:29 +0300389 a.alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
390 a.amHost = viper.GetString("controls.promAlertManager.address")
vipin541eb502020-09-22 12:04:59 +0000391
392 app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
393 app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
Anssi Mannila4450a892020-09-25 10:24:29 +0300394 app.Logger.Debug("ConfigChangeCB: alertInterval %v", a.alertInterval)
395 app.Logger.Debug("ConfigChangeCB: amHost = %v", a.amHost)
vipin541eb502020-09-22 12:04:59 +0000396
397 return
398}
399
vipin14323a92020-09-25 10:03:43 +0000400func (a *AlarmManager) ReadAlarmDefinitionFromJson() {
401
402 filename := os.Getenv("DEF_FILE")
403 file, err := ioutil.ReadFile(filename)
404 if err == nil {
405 data := RicAlarmDefinitions{}
406 err = json.Unmarshal([]byte(file), &data)
407 if err == nil {
408 for _, alarmDefinition := range data.AlarmDefinitions {
409 _, exists := alarm.RICAlarmDefinitions[alarmDefinition.AlarmId]
410 if exists {
411 app.Logger.Error("ReadAlarmDefinitionFromJson: alarm definition already exists for %v", alarmDefinition.AlarmId)
412 } else {
413 app.Logger.Debug("ReadAlarmDefinitionFromJson: alarm %v", alarmDefinition.AlarmId)
414 ricAlarmDefintion := new(alarm.AlarmDefinition)
415 ricAlarmDefintion.AlarmId = alarmDefinition.AlarmId
416 ricAlarmDefintion.AlarmText = alarmDefinition.AlarmText
417 ricAlarmDefintion.EventType = alarmDefinition.EventType
418 ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200419 ricAlarmDefintion.RaiseDelay = alarmDefinition.RaiseDelay
420 ricAlarmDefintion.ClearDelay = alarmDefinition.ClearDelay
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200421 ricAlarmDefintion.TimeToLive = alarmDefinition.TimeToLive
vipin14323a92020-09-25 10:03:43 +0000422 alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
423 }
424 }
425 } else {
vipin6f73fa32020-10-06 06:51:53 +0000426 app.Logger.Error("ReadAlarmDefinitionFromJson: json.Unmarshal failed with error %v", err)
vipin14323a92020-09-25 10:03:43 +0000427 }
428 } else {
vipin6f73fa32020-10-06 06:51:53 +0000429 app.Logger.Error("ReadAlarmDefinitionFromJson: ioutil.ReadFile failed with error %v", err)
vipin14323a92020-09-25 10:03:43 +0000430 }
431}
432
vipin78b2b0a2020-10-28 10:10:18 +0000433func (a *AlarmManager) ReadAlarmInfoFromPersistentVolume() {
434 var alarmpersistentinfo AlarmPersistentInfo
435 byteValue, rerr := ioutil.ReadFile(a.alarmInfoPvFile)
436 if rerr != nil {
437 app.Logger.Error("ararminfo.json file read error %v", rerr)
438 } else {
439 err := json.Unmarshal(byteValue, &alarmpersistentinfo)
440 if err != nil {
441 app.Logger.Error("alarmpersistentinfo json unmarshal error %v", err)
442 } else {
443 a.uniqueAlarmId = alarmpersistentinfo.UniqueAlarmId
444 a.activeAlarms = make([]AlarmNotification, len(alarmpersistentinfo.ActiveAlarms))
445 a.alarmHistory = make([]AlarmNotification, len(alarmpersistentinfo.AlarmHistory))
446 copy(a.activeAlarms, alarmpersistentinfo.ActiveAlarms)
447 copy(a.alarmHistory, alarmpersistentinfo.AlarmHistory)
448 }
449 }
450}
451
452func (a *AlarmManager) WriteAlarmInfoToPersistentVolume() {
453 var alarmpersistentinfo AlarmPersistentInfo
454 alarmpersistentinfo.UniqueAlarmId = a.uniqueAlarmId
455 alarmpersistentinfo.ActiveAlarms = make([]AlarmNotification, len(a.activeAlarms))
456 alarmpersistentinfo.AlarmHistory = make([]AlarmNotification, len(a.alarmHistory))
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200457
vipin78b2b0a2020-10-28 10:10:18 +0000458 copy(alarmpersistentinfo.ActiveAlarms, a.activeAlarms)
459 copy(alarmpersistentinfo.AlarmHistory, a.alarmHistory)
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200460
vipin78b2b0a2020-10-28 10:10:18 +0000461 wdata, err := json.MarshalIndent(alarmpersistentinfo, "", " ")
462 if err != nil {
463 app.Logger.Error("alarmpersistentinfo json marshal error %v", err)
464 } else {
465 werr := ioutil.WriteFile(a.alarmInfoPvFile, wdata, 0777)
466 if werr != nil {
467 app.Logger.Error("alarminfo.json file write error %v", werr)
468 }
469 }
470}
471
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200472func (a *AlarmManager) Run(sdlcheck bool, ttlInterval int) {
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000473 app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300474 app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
475 app.Resource.InjectStatusCb(a.StatusCB)
vipin541eb502020-09-22 12:04:59 +0000476 app.AddConfigChangeListener(a.ConfigChangeCB)
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300477
vipin54a3a4f2020-09-23 12:19:58 +0000478 alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
vipin14323a92020-09-25 10:03:43 +0000479 a.ReadAlarmDefinitionFromJson()
vipin54a3a4f2020-09-23 12:19:58 +0000480
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200481 a.InjectRoutes()
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300482
483 // Start background timer for re-raising alerts
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300484 go a.StartAlertTimer()
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200485 go a.StartTTLTimer(ttlInterval)
486
vipin541eb502020-09-22 12:04:59 +0000487 a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300488
vipin78b2b0a2020-10-28 10:10:18 +0000489 a.ReadAlarmInfoFromPersistentVolume()
490
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300491 app.RunWithParams(a, sdlcheck)
492}
493
Mohamed Abukar105030f2020-10-22 18:08:34 +0300494func NewAlarmManager(amHost string, alertInterval int, clearAlarm bool) *AlarmManager {
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300495 if alertInterval == 0 {
496 alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
497 }
498
499 if amHost == "" {
500 amHost = viper.GetString("controls.promAlertManager.address")
501 }
502
Anssi Mannilaac56b892020-11-20 14:50:00 +0200503 maxActiveAlarms := app.Config.GetInt("controls.maxActiveAlarms")
504 if maxActiveAlarms == 0 {
505 maxActiveAlarms = 5000
506 }
507
508 maxAlarmHistory := app.Config.GetInt("controls.maxAlarmHistory")
509 if maxAlarmHistory == 0 {
510 maxAlarmHistory = 20000
511 }
512
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000513 return &AlarmManager{
Mohamed Abukarf5a8e712020-10-19 16:58:17 +0300514 rmrReady: false,
Mohamed Abukar105030f2020-10-22 18:08:34 +0300515 postClear: clearAlarm,
Mohamed Abukarf5a8e712020-10-19 16:58:17 +0300516 amHost: amHost,
Mohamed Abukar105030f2020-10-22 18:08:34 +0300517 amBaseUrl: app.Config.GetString("controls.promAlertManager.baseUrl"),
518 amSchemes: []string{app.Config.GetString("controls.promAlertManager.schemes")},
Mohamed Abukarf5a8e712020-10-19 16:58:17 +0300519 alertInterval: alertInterval,
Mohamed Abukar105030f2020-10-22 18:08:34 +0300520 activeAlarms: make([]AlarmNotification, 0),
521 alarmHistory: make([]AlarmNotification, 0),
522 uniqueAlarmId: 0,
Anssi Mannilaac56b892020-11-20 14:50:00 +0200523 maxActiveAlarms: maxActiveAlarms,
524 maxAlarmHistory: maxAlarmHistory,
vipin6f73fa32020-10-06 06:51:53 +0000525 exceededActiveAlarmOn: false,
526 exceededAlarmHistoryOn: false,
vipin78b2b0a2020-10-28 10:10:18 +0000527 alarmInfoPvFile: app.Config.GetString("controls.alarmInfoPvFile"),
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300528 }
529}
530
531// Main function
532func main() {
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200533 NewAlarmManager("", 0, true).Run(true, 10)
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300534}