blob: dfa6b3df267abed4b7c6fa2bf67200b04f61ba2b [file] [log] [blame]
Mohamed Abukar3e038152020-03-04 10:01:45 +02001/*
2 * Copyright (c) 2020 AT&T Intellectual Property.
3 * Copyright (c) 2020 Nokia.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 * This source code is part of the near-RT RIC (RAN Intelligent Controller)
18 * platform project (RICP).
19 */
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020020
Mohamed Abukar3e038152020-03-04 10:01:45 +020021package main
22
23import (
Mohamed Abukar105030f2020-10-22 18:08:34 +030024 "bytes"
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020025 "encoding/json"
26 "fmt"
Anssi Mannila18fd03c2020-10-29 10:01:00 +020027 "io/ioutil"
28 "net/http"
29 "os"
Mohamed Abukar48ac36c2022-03-27 07:47:08 +000030 "strconv"
31 "strings"
Anssi Mannila18fd03c2020-10-29 10:01:00 +020032 "time"
33
Juha Hyttinen281d0602021-12-23 10:14:40 +020034 "gerrit.o-ran-sc.org/r/ric-plt/alarm-go.git/alarm"
vipin14323a92020-09-25 10:03:43 +000035 app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020036 clientruntime "github.com/go-openapi/runtime/client"
37 "github.com/go-openapi/strfmt"
38 "github.com/prometheus/alertmanager/api/v2/client"
39 "github.com/prometheus/alertmanager/api/v2/client/alert"
40 "github.com/prometheus/alertmanager/api/v2/models"
41 "github.com/spf13/viper"
Mohamed Abukar3e038152020-03-04 10:01:45 +020042)
43
Mohamed Abukar3649fae2020-10-30 23:51:39 +020044func (a *AlarmManager) ClearExpiredAlarms(m AlarmNotification, idx int, mLocked bool) bool {
45 d, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]
46 if !ok || d.TimeToLive == 0 {
47 return false
48 }
49
50 elapsed := (time.Now().UnixNano() - m.AlarmTime) / 1e9
51 if int(elapsed) >= d.TimeToLive {
52 app.Logger.Info("Alarm (sp=%d id=%d) with TTL=%d expired, clearing ...", m.Alarm.SpecificProblem, m.AlarmId, d.TimeToLive)
53
54 m.AlarmAction = alarm.AlarmActionClear
55 m.AlarmTime = time.Now().UnixNano()
56
57 if !mLocked { // For testing purpose
58 a.mutex.Lock()
59 }
60 a.ProcessClearAlarm(&m, d, idx)
61 return true
62 }
63 return false
64}
65
66func (a *AlarmManager) StartTTLTimer(interval int) {
67 tick := time.Tick(time.Duration(interval) * time.Second)
68 for range tick {
69 a.mutex.Lock()
70 for idx, m := range a.activeAlarms {
71 if a.ClearExpiredAlarms(m, idx, true) {
72 a.mutex.Lock() // ClearExpiredAlarms unlocks the mutex, so re-lock here
73 continue
74 }
75 }
76 a.mutex.Unlock()
77 }
78}
79
Abukar Mohamed121e8b62020-09-18 11:41:33 +000080func (a *AlarmManager) StartAlertTimer() {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020081 tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
82 for range tick {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +020083 a.mutex.Lock()
Mohamed Abukar48ac36c2022-03-27 07:47:08 +000084
85 a.ProcessAlerts()
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020086 for _, m := range a.activeAlarms {
87 app.Logger.Info("Re-raising alarm: %v", m)
vipinba2ef5b2020-11-06 11:24:48 +000088 a.PostAlert(a.GenerateAlertLabels(m.AlarmId, m.Alarm, AlertStatusActive, m.AlarmTime))
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020089 }
Mohamed Abukaraf0c5702020-03-11 10:29:40 +020090 a.mutex.Unlock()
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020091 }
92}
93
Abukar Mohamed121e8b62020-09-18 11:41:33 +000094func (a *AlarmManager) Consume(rp *app.RMRParams) (err error) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020095 app.Logger.Info("Message received!")
96
97 defer app.Rmr.Free(rp.Mbuf)
98 switch rp.Mtype {
99 case alarm.RIC_ALARM_UPDATE:
100 a.HandleAlarms(rp)
101 default:
102 app.Logger.Info("Unknown Message Type '%d', discarding", rp.Mtype)
103 }
104
105 return nil
106}
107
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000108func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, error) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200109 var m alarm.AlarmMessage
Lott, Christopher (cl778h)3e8e2aa2020-06-03 08:52:14 -0400110 app.Logger.Info("Received JSON: %s", rp.Payload)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200111 if err := json.Unmarshal(rp.Payload, &m); err != nil {
112 app.Logger.Error("json.Unmarshal failed: %v", err)
113 return nil, err
114 }
115 app.Logger.Info("newAlarm: %v", m)
116
Mohamed Abukar105030f2020-10-22 18:08:34 +0300117 return a.ProcessAlarm(&AlarmNotification{m, alarm.AlarmDefinition{}})
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300118}
119
Mohamed Abukar105030f2020-10-22 18:08:34 +0300120func (a *AlarmManager) ProcessAlarm(m *AlarmNotification) (*alert.PostAlertsOK, error) {
vipin6f73fa32020-10-06 06:51:53 +0000121 a.mutex.Lock()
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200122 alarmDef := &alarm.AlarmDefinition{}
123 var ok bool
124 if alarmDef, ok = alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300125 app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
vipin6f73fa32020-10-06 06:51:53 +0000126 a.mutex.Unlock()
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200127 return nil, nil
128 }
129
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200130 idx, found := a.IsMatchFound(m.Alarm)
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200131 // Suppress duplicate alarms
vipin4cedd502020-09-25 05:58:31 +0000132 if found && m.AlarmAction == alarm.AlarmActionRaise {
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300133 app.Logger.Info("Duplicate alarm found, suppressing ...")
Anssi Mannilafe07bd12020-09-24 14:02:57 +0300134 if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
135 // Duplicate with same severity found
vipin6f73fa32020-10-06 06:51:53 +0000136 a.mutex.Unlock()
Anssi Mannilafe07bd12020-09-24 14:02:57 +0300137 return nil, nil
138 } else {
139 // Remove duplicate with different severity
140 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
141 }
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200142 }
143
144 // Clear alarm if found from active alarm list
Mohamed Abukar2336a842020-10-30 16:19:38 +0200145 if found && m.AlarmAction == alarm.AlarmActionClear {
146 return a.ProcessClearAlarm(m, alarmDef, idx)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200147 }
148
149 // New alarm -> update active alarms and post to Alert Manager
150 if m.AlarmAction == alarm.AlarmActionRaise {
Mohamed Abukar2336a842020-10-30 16:19:38 +0200151 return a.ProcessRaiseAlarm(m, alarmDef)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200152 }
153
vipin6f73fa32020-10-06 06:51:53 +0000154 a.mutex.Unlock()
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200155 return nil, nil
156}
157
Mohamed Abukar2336a842020-10-30 16:19:38 +0200158func (a *AlarmManager) ProcessRaiseAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition) (*alert.PostAlertsOK, error) {
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200159 app.Logger.Debug("Raise alarmDef.RaiseDelay = %v, AlarmNotification = %v", alarmDef.RaiseDelay, *m)
Mohamed Abukar2336a842020-10-30 16:19:38 +0200160
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200161 // RaiseDelay > 0 in an alarm object in active alarm table indicates that raise delay is still ongoing for the alarm
162 m.AlarmDefinition.RaiseDelay = alarmDef.RaiseDelay
163 a.UpdateAlarmFields(a.GenerateAlarmId(), m)
164 a.UpdateActiveAlarmList(m)
165 a.mutex.Unlock()
Mohamed Abukar2336a842020-10-30 16:19:38 +0200166
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200167 if alarmDef.RaiseDelay > 0 {
168 timerDelay(alarmDef.RaiseDelay)
169 a.mutex.Lock()
170 // Alarm may have been deleted from active alarms table during delay or table index may have changed
171 idx, found := a.IsMatchFound(m.Alarm)
172 if found {
173 // Alarm is not showed in active alarms or alarm history via CLI before RaiseDelay has elapsed, i.e the value is 0
174 a.activeAlarms[idx].AlarmDefinition.RaiseDelay = 0
175 app.Logger.Debug("Raise after delay alarmDef.RaiseDelay = %v, AlarmNotification = %v", alarmDef.RaiseDelay, *m)
176 a.mutex.Unlock()
177 } else {
178 app.Logger.Debug("Alarm deleted during raise delay. AlarmNotification = %v", *m)
179 a.mutex.Unlock()
Mohamed Abukar2336a842020-10-30 16:19:38 +0200180 return nil, nil
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200181 }
182 }
Mohamed Abukar2336a842020-10-30 16:19:38 +0200183
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200184 m.AlarmDefinition.RaiseDelay = 0
185 a.UpdateAlarmHistoryList(m)
Mohamed Abukar2336a842020-10-30 16:19:38 +0200186 a.WriteAlarmInfoToPersistentVolume()
187
188 // Send alarm notification to NOMA, if enabled
189 if app.Config.GetBool("controls.noma.enabled") {
190 return a.PostAlarm(m)
191 }
vipinba2ef5b2020-11-06 11:24:48 +0000192 return a.PostAlert(a.GenerateAlertLabels(m.AlarmId, m.Alarm, AlertStatusActive, m.AlarmTime))
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200193}
194
Mohamed Abukar2336a842020-10-30 16:19:38 +0200195func (a *AlarmManager) ProcessClearAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition, idx int) (*alert.PostAlertsOK, error) {
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200196 app.Logger.Debug("Clear alarmDef.ClearDelay = %v, AlarmNotification = %v", alarmDef.ClearDelay, *m)
197 if alarmDef.ClearDelay > 0 {
198 a.mutex.Unlock()
199 timerDelay(alarmDef.ClearDelay)
200 app.Logger.Debug("Clear after delay alarmDef.ClearDelay = %v, AlarmNotification = %v", alarmDef.ClearDelay, *m)
201 a.mutex.Lock()
202 // Another alarm clear may have happened during delay and active alarms table index changed
203 var found bool
204 idx, found = a.IsMatchFound(m.Alarm)
205 if !found {
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200206 a.mutex.Unlock()
Mohamed Abukar2336a842020-10-30 16:19:38 +0200207 return nil, nil
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200208 }
209 }
210 a.UpdateAlarmFields(a.activeAlarms[idx].AlarmId, m)
211 a.alarmHistory = append(a.alarmHistory, *m)
212 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
213 if (len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false) {
214 app.Logger.Warn("alarm history count exceeded maxAlarmHistory threshold")
215 a.GenerateThresholdAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, "history")
216 }
217
218 if a.exceededActiveAlarmOn && m.Alarm.SpecificProblem == alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD {
219 a.exceededActiveAlarmOn = false
220 }
221
222 if a.exceededAlarmHistoryOn && m.Alarm.SpecificProblem == alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD {
223 a.exceededAlarmHistoryOn = false
224 }
Mohamed Abukar2336a842020-10-30 16:19:38 +0200225 a.WriteAlarmInfoToPersistentVolume()
226
227 a.mutex.Unlock()
228 if a.postClear && app.Config.GetBool("controls.noma.enabled") {
229 m.PerceivedSeverity = alarm.SeverityCleared
230 return a.PostAlarm(m)
231 }
232 return nil, nil
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200233}
234
235func timerDelay(delay int) {
236 timer := time.NewTimer(time.Duration(delay) * time.Second)
237 <-timer.C
238}
239
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000240func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200241 for i, m := range a.activeAlarms {
242 if m.ManagedObjectId == newAlarm.ManagedObjectId && m.ApplicationId == newAlarm.ApplicationId &&
Mohamed Abukar0c389732020-09-17 14:47:50 +0300243 m.SpecificProblem == newAlarm.SpecificProblem && m.IdentifyingInfo == newAlarm.IdentifyingInfo {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200244 return i, true
245 }
246 }
247 return -1, false
248}
249
Mohamed Abukar105030f2020-10-22 18:08:34 +0300250func (a *AlarmManager) RemoveAlarm(alarms []AlarmNotification, i int, listName string) []AlarmNotification {
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300251 app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200252 copy(alarms[i:], alarms[i+1:])
253 return alarms[:len(alarms)-1]
254}
255
Mohamed Abukar105030f2020-10-22 18:08:34 +0300256func (a *AlarmManager) GenerateAlarmId() int {
Mohamed Abukarf5a8e712020-10-19 16:58:17 +0300257 a.uniqueAlarmId++ // @todo: generate a unique ID
Mohamed Abukar105030f2020-10-22 18:08:34 +0300258 return a.uniqueAlarmId
259}
260
261func (a *AlarmManager) UpdateAlarmFields(alarmId int, newAlarm *AlarmNotification) {
262 alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
263 newAlarm.AlarmId = alarmId
Mohamed Abukarf5a8e712020-10-19 16:58:17 +0300264 newAlarm.AlarmText = alarmDef.AlarmText
265 newAlarm.EventType = alarmDef.EventType
266}
267
Mohamed Abukar105030f2020-10-22 18:08:34 +0300268func (a *AlarmManager) GenerateThresholdAlarm(sp int, data string) bool {
269 thresholdAlarm := a.alarmClient.NewAlarm(sp, alarm.SeverityWarning, "threshold", data)
270 thresholdMessage := alarm.AlarmMessage{
271 Alarm: thresholdAlarm,
272 AlarmAction: alarm.AlarmActionRaise,
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200273 AlarmTime: time.Now().UnixNano(),
Mohamed Abukar105030f2020-10-22 18:08:34 +0300274 }
vipin78b2b0a2020-10-28 10:10:18 +0000275 alarmDef := alarm.RICAlarmDefinitions[sp]
276 alarmId := a.GenerateAlarmId()
277 alarmDef.AlarmId = alarmId
278 a.activeAlarms = append(a.activeAlarms, AlarmNotification{thresholdMessage, *alarmDef})
279 a.alarmHistory = append(a.alarmHistory, AlarmNotification{thresholdMessage, *alarmDef})
Mohamed Abukar105030f2020-10-22 18:08:34 +0300280
281 return true
282}
283
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200284func (a *AlarmManager) UpdateActiveAlarmList(newAlarm *AlarmNotification) {
vipin541eb502020-09-22 12:04:59 +0000285 /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200286 The attempt to raise the alarm next time will be suppressed when found as duplicate. */
Mohamed Abukarf5a8e712020-10-19 16:58:17 +0300287 if (len(a.activeAlarms) >= a.maxActiveAlarms) && (a.exceededActiveAlarmOn == false) {
Mohamed Abukar105030f2020-10-22 18:08:34 +0300288 app.Logger.Warn("active alarm count exceeded maxActiveAlarms threshold")
289 a.exceededActiveAlarmOn = a.GenerateThresholdAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, "active")
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300290 }
291
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200292 // @todo: For now just keep the active alarms in-memory. Use SDL later for persistence
293 a.activeAlarms = append(a.activeAlarms, *newAlarm)
294}
295
296func (a *AlarmManager) UpdateAlarmHistoryList(newAlarm *AlarmNotification) {
297 /* If maximum number of events in alarm history is reached, an error log writing is made,
298 and new alarm indicating the problem is raised. The attempt to add new event time will
299 be suppressed */
300
Mohamed Abukarf5a8e712020-10-19 16:58:17 +0300301 if (len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false) {
Mohamed Abukar105030f2020-10-22 18:08:34 +0300302 app.Logger.Warn("alarm history count exceeded maxAlarmHistory threshold")
303 a.exceededAlarmHistoryOn = a.GenerateThresholdAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, "history")
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300304 }
305
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200306 // @todo: For now just keep the alarms history in-memory. Use SDL later for persistence
Mohamed Abukar0c389732020-09-17 14:47:50 +0300307 a.alarmHistory = append(a.alarmHistory, *newAlarm)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200308}
309
Mohamed Abukar105030f2020-10-22 18:08:34 +0300310func (a *AlarmManager) PostAlarm(m *AlarmNotification) (*alert.PostAlertsOK, error) {
311 result, err := json.Marshal(m)
312 if err != nil {
313 app.Logger.Info("json.Marshal failed: %v", err)
314 return nil, err
315 }
316
317 fullUrl := fmt.Sprintf("%s/%s", app.Config.GetString("controls.noma.host"), app.Config.GetString("controls.noma.alarmUrl"))
318 app.Logger.Info("Posting alarm to '%s'", fullUrl)
319
320 resp, err := http.Post(fullUrl, "application/json", bytes.NewReader(result))
321 if err != nil || resp == nil {
322 app.Logger.Info("Unable to post alarm to '%s': %v", fullUrl, err)
323 }
324
325 return nil, err
326}
327
vipinba2ef5b2020-11-06 11:24:48 +0000328func (a *AlarmManager) GenerateAlertLabels(alarmId int, newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
Mohamed Abukar48ac36c2022-03-27 07:47:08 +0000329 if strings.Contains(newAlarm.ApplicationId, "FM") {
330 app.Logger.Info("Alarm '%d' is originated from FM, ignoring ...", alarmId)
331 return models.LabelSet{}, models.LabelSet{}
332 }
333
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200334 alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200335 amLabels := models.LabelSet{
Mohamed Abukarb2f29a82020-03-17 09:31:55 +0200336 "status": string(status),
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200337 "alertname": alarmDef.AlarmText,
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200338 "severity": string(newAlarm.PerceivedSeverity),
Mohamed Abukar105030f2020-10-22 18:08:34 +0300339 "service": fmt.Sprintf("%s/%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
Mohamed Abukar273c8e82021-10-21 10:02:07 +0300340 "info": newAlarm.IdentifyingInfo,
Mohamed Abukar105030f2020-10-22 18:08:34 +0300341 "system_name": "RIC",
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200342 }
343 amAnnotations := models.LabelSet{
vipinba2ef5b2020-11-06 11:24:48 +0000344 "alarm_id": fmt.Sprintf("%d", alarmId),
Mohamed Abukar105030f2020-10-22 18:08:34 +0300345 "specific_problem": fmt.Sprintf("%d", newAlarm.SpecificProblem),
346 "event_type": alarmDef.EventType,
347 "identifying_info": newAlarm.IdentifyingInfo,
348 "additional_info": newAlarm.AdditionalInfo,
349 "description": fmt.Sprintf("%s:%s", newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo),
Mohamed Abukar48ac36c2022-03-27 07:47:08 +0000350 "summary": newAlarm.IdentifyingInfo,
Mohamed Abukar105030f2020-10-22 18:08:34 +0300351 "instructions": alarmDef.OperationInstructions,
352 "timestamp": fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200353 }
354
355 return amLabels, amAnnotations
356}
357
Juha Hyttinen5f9b88b2023-06-02 10:04:15 +0300358func (a *AlarmManager) NewAlertmanagerClient() *client.AlertmanagerAPI {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200359 cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes)
360 return client.New(cr, strfmt.Default)
361}
362
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000363func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) {
Mohamed Abukar48ac36c2022-03-27 07:47:08 +0000364 if len(amLabels) == 0 || len(amAnnotations) == 0 {
365 return &alert.PostAlertsOK{}, nil
366 }
367
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200368 pa := &models.PostableAlert{
369 Alert: models.Alert{
Mohamed Abukar48ac36c2022-03-27 07:47:08 +0000370 GeneratorURL: strfmt.URI("http://service-ricplt-alarmmanager-http.ricplt:8080/ric/v1/alarms"),
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200371 Labels: amLabels,
372 },
373 Annotations: amAnnotations,
374 }
375 alertParams := alert.NewPostAlertsParams().WithAlerts(models.PostableAlerts{pa})
376
Mohamed Abukar643241f2020-06-09 15:26:00 +0300377 app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations)
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200378 ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams)
379 if err != nil {
Mohamed Abukar48ac36c2022-03-27 07:47:08 +0000380 app.Logger.Error("Posting alerts to '%s/%s' failed: %v", a.amHost, a.amBaseUrl, err)
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200381 }
382 return ok, err
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200383}
384
Mohamed Abukar48ac36c2022-03-27 07:47:08 +0000385func (a *AlarmManager) GetAlerts() (*alert.GetAlertsOK, error) {
386 active := true
387 alertParams := alert.NewGetAlertsParams()
388 alertParams.Active = &active
389 resp, err := a.NewAlertmanagerClient().Alert.GetAlerts(alertParams)
390 if err != nil {
391 app.Logger.Error("Getting alerts from '%s/%s' failed: %v", a.amHost, a.amBaseUrl, err)
392 return resp, nil
393 }
394 app.Logger.Info("GetAlerts: %+v", resp)
395
396 return resp, err
397}
398
399func (a *AlarmManager) ProcessAlerts() {
400 resp, err := a.GetAlerts()
401 if err != nil || resp == nil {
402 app.Logger.Error("Getting alerts from '%s/%s' failed: %v", a.amHost, a.amBaseUrl, err)
403 return
404 }
405
406 var buildAlarm = func(alert *models.GettableAlert) alarm.Alarm {
407 a := alarm.Alarm{ManagedObjectId: "SEP", ApplicationId: "FM"}
408
409 if v, ok := alert.Alert.Labels["specific_problem"]; ok {
410 sp, _ := strconv.Atoi(v)
411 a.SpecificProblem = sp
412 }
413
414 if v, ok := alert.Alert.Labels["severity"]; ok {
415 a.PerceivedSeverity = alarm.Severity(fmt.Sprint(v))
416 }
417
418 if v, ok := alert.Alert.Labels["name"]; ok {
419 a.AdditionalInfo = v
420 }
421
422 if v, ok := alert.Annotations["description"]; ok {
423 a.IdentifyingInfo = v
424 }
425
426 return a
427 }
428
429 // Remove cleared alerts first
430 for _, m := range a.activeAlarms {
431 if m.ApplicationId != "FM" {
432 continue
433 }
434
435 found := false
436 for _, alert := range resp.Payload {
437 v, ok := alert.Alert.Labels["service"]
438 if !ok || !strings.Contains(v, "FM") {
439 continue
440 }
441
442 a := buildAlarm(alert)
443 if m.ManagedObjectId == a.ManagedObjectId && m.ApplicationId == a.ApplicationId &&
444 m.SpecificProblem == a.SpecificProblem && m.IdentifyingInfo == a.IdentifyingInfo {
445 found = true
446 break
447 }
448 }
449
450 if !found {
451 m.AlarmAction = alarm.AlarmActionClear
452 go a.ProcessAlarm(&m)
453 }
454 }
455
456 for _, alert := range resp.Payload {
457 v, ok := alert.Alert.Labels["service"]
458 if ok && strings.Contains(v, "FM") {
459 m := alarm.AlarmMessage{Alarm: buildAlarm(alert), AlarmAction: alarm.AlarmActionRaise, AlarmTime: time.Now().UnixNano()}
460 go a.ProcessAlarm(&AlarmNotification{m, alarm.AlarmDefinition{}})
461 }
462 }
463}
464
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000465func (a *AlarmManager) StatusCB() bool {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200466 if !a.rmrReady {
467 app.Logger.Info("RMR not ready yet!")
468 }
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200469 return a.rmrReady
Mohamed Abukar3e038152020-03-04 10:01:45 +0200470}
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300471
vipin541eb502020-09-22 12:04:59 +0000472func (a *AlarmManager) ConfigChangeCB(configparam string) {
vipin541eb502020-09-22 12:04:59 +0000473 a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
Anssi Mannilaac56b892020-11-20 14:50:00 +0200474 if a.maxActiveAlarms == 0 {
475 a.maxActiveAlarms = 5000
476 }
477
vipin541eb502020-09-22 12:04:59 +0000478 a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
Anssi Mannilaac56b892020-11-20 14:50:00 +0200479 if a.maxAlarmHistory == 0 {
480 a.maxAlarmHistory = 20000
481 }
Mohamed Abukar2a74ea52021-03-07 10:10:58 +0200482
Anssi Mannila4450a892020-09-25 10:24:29 +0300483 a.alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
484 a.amHost = viper.GetString("controls.promAlertManager.address")
vipin541eb502020-09-22 12:04:59 +0000485
486 app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
487 app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
Anssi Mannila4450a892020-09-25 10:24:29 +0300488 app.Logger.Debug("ConfigChangeCB: alertInterval %v", a.alertInterval)
489 app.Logger.Debug("ConfigChangeCB: amHost = %v", a.amHost)
vipin541eb502020-09-22 12:04:59 +0000490
491 return
492}
493
vipin14323a92020-09-25 10:03:43 +0000494func (a *AlarmManager) ReadAlarmDefinitionFromJson() {
495
496 filename := os.Getenv("DEF_FILE")
497 file, err := ioutil.ReadFile(filename)
498 if err == nil {
499 data := RicAlarmDefinitions{}
500 err = json.Unmarshal([]byte(file), &data)
501 if err == nil {
502 for _, alarmDefinition := range data.AlarmDefinitions {
503 _, exists := alarm.RICAlarmDefinitions[alarmDefinition.AlarmId]
504 if exists {
505 app.Logger.Error("ReadAlarmDefinitionFromJson: alarm definition already exists for %v", alarmDefinition.AlarmId)
506 } else {
507 app.Logger.Debug("ReadAlarmDefinitionFromJson: alarm %v", alarmDefinition.AlarmId)
508 ricAlarmDefintion := new(alarm.AlarmDefinition)
509 ricAlarmDefintion.AlarmId = alarmDefinition.AlarmId
510 ricAlarmDefintion.AlarmText = alarmDefinition.AlarmText
511 ricAlarmDefintion.EventType = alarmDefinition.EventType
512 ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions
Anssi Mannila18fd03c2020-10-29 10:01:00 +0200513 ricAlarmDefintion.RaiseDelay = alarmDefinition.RaiseDelay
514 ricAlarmDefintion.ClearDelay = alarmDefinition.ClearDelay
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200515 ricAlarmDefintion.TimeToLive = alarmDefinition.TimeToLive
vipin14323a92020-09-25 10:03:43 +0000516 alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion
517 }
518 }
519 } else {
vipin6f73fa32020-10-06 06:51:53 +0000520 app.Logger.Error("ReadAlarmDefinitionFromJson: json.Unmarshal failed with error %v", err)
vipin14323a92020-09-25 10:03:43 +0000521 }
522 } else {
vipin6f73fa32020-10-06 06:51:53 +0000523 app.Logger.Error("ReadAlarmDefinitionFromJson: ioutil.ReadFile failed with error %v", err)
vipin14323a92020-09-25 10:03:43 +0000524 }
525}
526
vipin78b2b0a2020-10-28 10:10:18 +0000527func (a *AlarmManager) ReadAlarmInfoFromPersistentVolume() {
528 var alarmpersistentinfo AlarmPersistentInfo
529 byteValue, rerr := ioutil.ReadFile(a.alarmInfoPvFile)
530 if rerr != nil {
manoj166f87262022-05-27 08:06:58 -0400531 app.Logger.Info("Unable to read alarminfo.json : %v", rerr)
vipin78b2b0a2020-10-28 10:10:18 +0000532 } else {
533 err := json.Unmarshal(byteValue, &alarmpersistentinfo)
534 if err != nil {
535 app.Logger.Error("alarmpersistentinfo json unmarshal error %v", err)
536 } else {
537 a.uniqueAlarmId = alarmpersistentinfo.UniqueAlarmId
538 a.activeAlarms = make([]AlarmNotification, len(alarmpersistentinfo.ActiveAlarms))
539 a.alarmHistory = make([]AlarmNotification, len(alarmpersistentinfo.AlarmHistory))
540 copy(a.activeAlarms, alarmpersistentinfo.ActiveAlarms)
541 copy(a.alarmHistory, alarmpersistentinfo.AlarmHistory)
542 }
543 }
544}
545
546func (a *AlarmManager) WriteAlarmInfoToPersistentVolume() {
547 var alarmpersistentinfo AlarmPersistentInfo
548 alarmpersistentinfo.UniqueAlarmId = a.uniqueAlarmId
549 alarmpersistentinfo.ActiveAlarms = make([]AlarmNotification, len(a.activeAlarms))
550 alarmpersistentinfo.AlarmHistory = make([]AlarmNotification, len(a.alarmHistory))
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200551
vipin78b2b0a2020-10-28 10:10:18 +0000552 copy(alarmpersistentinfo.ActiveAlarms, a.activeAlarms)
553 copy(alarmpersistentinfo.AlarmHistory, a.alarmHistory)
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200554
vipin78b2b0a2020-10-28 10:10:18 +0000555 wdata, err := json.MarshalIndent(alarmpersistentinfo, "", " ")
556 if err != nil {
557 app.Logger.Error("alarmpersistentinfo json marshal error %v", err)
558 } else {
559 werr := ioutil.WriteFile(a.alarmInfoPvFile, wdata, 0777)
560 if werr != nil {
561 app.Logger.Error("alarminfo.json file write error %v", werr)
562 }
563 }
564}
565
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200566func (a *AlarmManager) Run(sdlcheck bool, ttlInterval int) {
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000567 app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300568 app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
569 app.Resource.InjectStatusCb(a.StatusCB)
vipin541eb502020-09-22 12:04:59 +0000570 app.AddConfigChangeListener(a.ConfigChangeCB)
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300571
vipin54a3a4f2020-09-23 12:19:58 +0000572 alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
vipin14323a92020-09-25 10:03:43 +0000573 a.ReadAlarmDefinitionFromJson()
vipin54a3a4f2020-09-23 12:19:58 +0000574
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200575 a.InjectRoutes()
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300576
577 // Start background timer for re-raising alerts
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300578 go a.StartAlertTimer()
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200579 go a.StartTTLTimer(ttlInterval)
580
vipin541eb502020-09-22 12:04:59 +0000581 a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300582
vipin78b2b0a2020-10-28 10:10:18 +0000583 a.ReadAlarmInfoFromPersistentVolume()
584
Juha Hyttinen5f9b88b2023-06-02 10:04:15 +0300585 time.Sleep(8 * time.Second)
586 app.RunWithRunParams(a, app.RunParams{SdlCheck: sdlcheck, DisableAlarmClient: true})
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300587}
588
Mohamed Abukar105030f2020-10-22 18:08:34 +0300589func NewAlarmManager(amHost string, alertInterval int, clearAlarm bool) *AlarmManager {
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300590 if alertInterval == 0 {
591 alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
592 }
593
594 if amHost == "" {
595 amHost = viper.GetString("controls.promAlertManager.address")
596 }
597
Anssi Mannilaac56b892020-11-20 14:50:00 +0200598 maxActiveAlarms := app.Config.GetInt("controls.maxActiveAlarms")
599 if maxActiveAlarms == 0 {
600 maxActiveAlarms = 5000
601 }
602
603 maxAlarmHistory := app.Config.GetInt("controls.maxAlarmHistory")
604 if maxAlarmHistory == 0 {
605 maxAlarmHistory = 20000
606 }
607
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000608 return &AlarmManager{
Mohamed Abukarf5a8e712020-10-19 16:58:17 +0300609 rmrReady: false,
Mohamed Abukar105030f2020-10-22 18:08:34 +0300610 postClear: clearAlarm,
Mohamed Abukarf5a8e712020-10-19 16:58:17 +0300611 amHost: amHost,
Mohamed Abukar105030f2020-10-22 18:08:34 +0300612 amBaseUrl: app.Config.GetString("controls.promAlertManager.baseUrl"),
613 amSchemes: []string{app.Config.GetString("controls.promAlertManager.schemes")},
Mohamed Abukarf5a8e712020-10-19 16:58:17 +0300614 alertInterval: alertInterval,
Mohamed Abukar105030f2020-10-22 18:08:34 +0300615 activeAlarms: make([]AlarmNotification, 0),
616 alarmHistory: make([]AlarmNotification, 0),
617 uniqueAlarmId: 0,
Anssi Mannilaac56b892020-11-20 14:50:00 +0200618 maxActiveAlarms: maxActiveAlarms,
619 maxAlarmHistory: maxAlarmHistory,
vipin6f73fa32020-10-06 06:51:53 +0000620 exceededActiveAlarmOn: false,
621 exceededAlarmHistoryOn: false,
vipin78b2b0a2020-10-28 10:10:18 +0000622 alarmInfoPvFile: app.Config.GetString("controls.alarmInfoPvFile"),
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300623 }
624}
625
626// Main function
627func main() {
Mohamed Abukar3649fae2020-10-30 23:51:39 +0200628 NewAlarmManager("", 0, true).Run(true, 10)
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300629}