Mohamed Abukar | 3e03815 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2020 AT&T Intellectual Property. |
| 3 | * Copyright (c) 2020 Nokia. |
| 4 | * |
| 5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | * you may not use this file except in compliance with the License. |
| 7 | * You may obtain a copy of the License at |
| 8 | * |
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | * |
| 11 | * Unless required by applicable law or agreed to in writing, software |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | * See the License for the specific language governing permissions and |
| 15 | * limitations under the License. |
| 16 | * |
| 17 | * This source code is part of the near-RT RIC (RAN Intelligent Controller) |
| 18 | * platform project (RICP). |
| 19 | */ |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 20 | |
Mohamed Abukar | 3e03815 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 21 | package main |
| 22 | |
| 23 | import ( |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 24 | "bytes" |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 25 | "encoding/json" |
| 26 | "fmt" |
Anssi Mannila | 18fd03c | 2020-10-29 10:01:00 +0200 | [diff] [blame^] | 27 | "io/ioutil" |
| 28 | "net/http" |
| 29 | "os" |
| 30 | "time" |
| 31 | |
vipin | 14323a9 | 2020-09-25 10:03:43 +0000 | [diff] [blame] | 32 | "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm" |
| 33 | app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp" |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 34 | clientruntime "github.com/go-openapi/runtime/client" |
| 35 | "github.com/go-openapi/strfmt" |
| 36 | "github.com/prometheus/alertmanager/api/v2/client" |
| 37 | "github.com/prometheus/alertmanager/api/v2/client/alert" |
| 38 | "github.com/prometheus/alertmanager/api/v2/models" |
| 39 | "github.com/spf13/viper" |
Mohamed Abukar | 3e03815 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 40 | ) |
| 41 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 42 | func (a *AlarmManager) StartAlertTimer() { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 43 | tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond) |
| 44 | for range tick { |
Mohamed Abukar | af0c570 | 2020-03-11 10:29:40 +0200 | [diff] [blame] | 45 | a.mutex.Lock() |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 46 | for _, m := range a.activeAlarms { |
| 47 | app.Logger.Info("Re-raising alarm: %v", m) |
Anssi Mannila | 2be5ac5 | 2020-09-22 12:17:05 +0300 | [diff] [blame] | 48 | a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime)) |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 49 | } |
Mohamed Abukar | af0c570 | 2020-03-11 10:29:40 +0200 | [diff] [blame] | 50 | a.mutex.Unlock() |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 51 | } |
| 52 | } |
| 53 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 54 | func (a *AlarmManager) Consume(rp *app.RMRParams) (err error) { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 55 | app.Logger.Info("Message received!") |
| 56 | |
| 57 | defer app.Rmr.Free(rp.Mbuf) |
| 58 | switch rp.Mtype { |
| 59 | case alarm.RIC_ALARM_UPDATE: |
| 60 | a.HandleAlarms(rp) |
| 61 | default: |
| 62 | app.Logger.Info("Unknown Message Type '%d', discarding", rp.Mtype) |
| 63 | } |
| 64 | |
| 65 | return nil |
| 66 | } |
| 67 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 68 | func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, error) { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 69 | var m alarm.AlarmMessage |
Lott, Christopher (cl778h) | 3e8e2aa | 2020-06-03 08:52:14 -0400 | [diff] [blame] | 70 | app.Logger.Info("Received JSON: %s", rp.Payload) |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 71 | if err := json.Unmarshal(rp.Payload, &m); err != nil { |
| 72 | app.Logger.Error("json.Unmarshal failed: %v", err) |
| 73 | return nil, err |
| 74 | } |
| 75 | app.Logger.Info("newAlarm: %v", m) |
| 76 | |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 77 | return a.ProcessAlarm(&AlarmNotification{m, alarm.AlarmDefinition{}}) |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 78 | } |
| 79 | |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 80 | func (a *AlarmManager) ProcessAlarm(m *AlarmNotification) (*alert.PostAlertsOK, error) { |
vipin | 6f73fa3 | 2020-10-06 06:51:53 +0000 | [diff] [blame] | 81 | a.mutex.Lock() |
Anssi Mannila | 18fd03c | 2020-10-29 10:01:00 +0200 | [diff] [blame^] | 82 | alarmDef := &alarm.AlarmDefinition{} |
| 83 | var ok bool |
| 84 | if alarmDef, ok = alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok { |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 85 | app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem) |
vipin | 6f73fa3 | 2020-10-06 06:51:53 +0000 | [diff] [blame] | 86 | a.mutex.Unlock() |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 87 | return nil, nil |
| 88 | } |
| 89 | |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 90 | idx, found := a.IsMatchFound(m.Alarm) |
Anssi Mannila | 18fd03c | 2020-10-29 10:01:00 +0200 | [diff] [blame^] | 91 | // Suppress duplicate alarms |
vipin | 4cedd50 | 2020-09-25 05:58:31 +0000 | [diff] [blame] | 92 | if found && m.AlarmAction == alarm.AlarmActionRaise { |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 93 | app.Logger.Info("Duplicate alarm found, suppressing ...") |
Anssi Mannila | fe07bd1 | 2020-09-24 14:02:57 +0300 | [diff] [blame] | 94 | if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity { |
| 95 | // Duplicate with same severity found |
vipin | 6f73fa3 | 2020-10-06 06:51:53 +0000 | [diff] [blame] | 96 | a.mutex.Unlock() |
Anssi Mannila | fe07bd1 | 2020-09-24 14:02:57 +0300 | [diff] [blame] | 97 | return nil, nil |
| 98 | } else { |
| 99 | // Remove duplicate with different severity |
| 100 | a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active") |
| 101 | } |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 102 | } |
| 103 | |
| 104 | // Clear alarm if found from active alarm list |
| 105 | if m.AlarmAction == alarm.AlarmActionClear { |
| 106 | if found { |
Anssi Mannila | 18fd03c | 2020-10-29 10:01:00 +0200 | [diff] [blame^] | 107 | if a.ProcessClearAlarm(m, alarmDef, idx) == false { |
| 108 | return nil, nil |
vipin | 887efa3 | 2020-09-28 13:26:28 +0000 | [diff] [blame] | 109 | } |
Mohamed Abukar | b2f29a8 | 2020-03-17 09:31:55 +0200 | [diff] [blame] | 110 | if a.postClear { |
vipin | 6f73fa3 | 2020-10-06 06:51:53 +0000 | [diff] [blame] | 111 | a.mutex.Unlock() |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 112 | |
| 113 | // Send alarm notification to NOMA, if enabled |
| 114 | if app.Config.GetBool("controls.noma.enabled") { |
| 115 | m.PerceivedSeverity = alarm.SeverityCleared |
| 116 | return a.PostAlarm(m) |
| 117 | } |
Anssi Mannila | 2be5ac5 | 2020-09-22 12:17:05 +0300 | [diff] [blame] | 118 | return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime)) |
Mohamed Abukar | b2f29a8 | 2020-03-17 09:31:55 +0200 | [diff] [blame] | 119 | } |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 120 | } |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 121 | app.Logger.Info("No matching active alarm found, suppressing ...") |
vipin | 6f73fa3 | 2020-10-06 06:51:53 +0000 | [diff] [blame] | 122 | a.mutex.Unlock() |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 123 | return nil, nil |
| 124 | } |
| 125 | |
| 126 | // New alarm -> update active alarms and post to Alert Manager |
| 127 | if m.AlarmAction == alarm.AlarmActionRaise { |
Anssi Mannila | 18fd03c | 2020-10-29 10:01:00 +0200 | [diff] [blame^] | 128 | if a.ProcessRaiseAlarm(m, alarmDef) == false { |
| 129 | return nil, nil |
| 130 | } |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 131 | // Send alarm notification to NOMA, if enabled |
| 132 | if app.Config.GetBool("controls.noma.enabled") { |
| 133 | return a.PostAlarm(m) |
| 134 | } |
Anssi Mannila | 2be5ac5 | 2020-09-22 12:17:05 +0300 | [diff] [blame] | 135 | return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime)) |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 136 | } |
| 137 | |
vipin | 6f73fa3 | 2020-10-06 06:51:53 +0000 | [diff] [blame] | 138 | a.mutex.Unlock() |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 139 | return nil, nil |
| 140 | } |
| 141 | |
Anssi Mannila | 18fd03c | 2020-10-29 10:01:00 +0200 | [diff] [blame^] | 142 | func (a *AlarmManager)ProcessRaiseAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition) bool { |
| 143 | app.Logger.Debug("Raise alarmDef.RaiseDelay = %v, AlarmNotification = %v", alarmDef.RaiseDelay, *m) |
| 144 | // RaiseDelay > 0 in an alarm object in active alarm table indicates that raise delay is still ongoing for the alarm |
| 145 | m.AlarmDefinition.RaiseDelay = alarmDef.RaiseDelay |
| 146 | a.UpdateAlarmFields(a.GenerateAlarmId(), m) |
| 147 | a.UpdateActiveAlarmList(m) |
| 148 | a.mutex.Unlock() |
| 149 | if alarmDef.RaiseDelay > 0 { |
| 150 | timerDelay(alarmDef.RaiseDelay) |
| 151 | a.mutex.Lock() |
| 152 | // Alarm may have been deleted from active alarms table during delay or table index may have changed |
| 153 | idx, found := a.IsMatchFound(m.Alarm) |
| 154 | if found { |
| 155 | // Alarm is not showed in active alarms or alarm history via CLI before RaiseDelay has elapsed, i.e the value is 0 |
| 156 | a.activeAlarms[idx].AlarmDefinition.RaiseDelay = 0 |
| 157 | app.Logger.Debug("Raise after delay alarmDef.RaiseDelay = %v, AlarmNotification = %v", alarmDef.RaiseDelay, *m) |
| 158 | a.mutex.Unlock() |
| 159 | } else { |
| 160 | app.Logger.Debug("Alarm deleted during raise delay. AlarmNotification = %v", *m) |
| 161 | a.mutex.Unlock() |
| 162 | return false |
| 163 | } |
| 164 | } |
| 165 | m.AlarmDefinition.RaiseDelay = 0 |
| 166 | a.UpdateAlarmHistoryList(m) |
| 167 | a.WriteAlarmInfoToPersistentVolume() |
| 168 | return true |
| 169 | } |
| 170 | |
| 171 | func (a *AlarmManager)ProcessClearAlarm(m *AlarmNotification, alarmDef *alarm.AlarmDefinition, idx int) bool { |
| 172 | app.Logger.Debug("Clear alarmDef.ClearDelay = %v, AlarmNotification = %v", alarmDef.ClearDelay, *m) |
| 173 | if alarmDef.ClearDelay > 0 { |
| 174 | a.mutex.Unlock() |
| 175 | timerDelay(alarmDef.ClearDelay) |
| 176 | app.Logger.Debug("Clear after delay alarmDef.ClearDelay = %v, AlarmNotification = %v", alarmDef.ClearDelay, *m) |
| 177 | a.mutex.Lock() |
| 178 | // Another alarm clear may have happened during delay and active alarms table index changed |
| 179 | var found bool |
| 180 | idx, found = a.IsMatchFound(m.Alarm) |
| 181 | if !found { |
| 182 | app.Logger.Debug("Alarm not anymore in the active alarms table. AlarmNotification = %v", *m) |
| 183 | a.mutex.Unlock() |
| 184 | return false |
| 185 | } |
| 186 | } |
| 187 | a.UpdateAlarmFields(a.activeAlarms[idx].AlarmId, m) |
| 188 | a.alarmHistory = append(a.alarmHistory, *m) |
| 189 | a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active") |
| 190 | if (len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false) { |
| 191 | app.Logger.Warn("alarm history count exceeded maxAlarmHistory threshold") |
| 192 | a.GenerateThresholdAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, "history") |
| 193 | } |
| 194 | |
| 195 | if a.exceededActiveAlarmOn && m.Alarm.SpecificProblem == alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD { |
| 196 | a.exceededActiveAlarmOn = false |
| 197 | } |
| 198 | |
| 199 | if a.exceededAlarmHistoryOn && m.Alarm.SpecificProblem == alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD { |
| 200 | a.exceededAlarmHistoryOn = false |
| 201 | } |
| 202 | a.WriteAlarmInfoToPersistentVolume() |
| 203 | return true |
| 204 | } |
| 205 | |
| 206 | func timerDelay(delay int) { |
| 207 | timer := time.NewTimer(time.Duration(delay) * time.Second) |
| 208 | <-timer.C |
| 209 | } |
| 210 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 211 | func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 212 | for i, m := range a.activeAlarms { |
| 213 | if m.ManagedObjectId == newAlarm.ManagedObjectId && m.ApplicationId == newAlarm.ApplicationId && |
Mohamed Abukar | 0c38973 | 2020-09-17 14:47:50 +0300 | [diff] [blame] | 214 | m.SpecificProblem == newAlarm.SpecificProblem && m.IdentifyingInfo == newAlarm.IdentifyingInfo { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 215 | return i, true |
| 216 | } |
| 217 | } |
| 218 | return -1, false |
| 219 | } |
| 220 | |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 221 | func (a *AlarmManager) RemoveAlarm(alarms []AlarmNotification, i int, listName string) []AlarmNotification { |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 222 | app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName) |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 223 | copy(alarms[i:], alarms[i+1:]) |
| 224 | return alarms[:len(alarms)-1] |
| 225 | } |
| 226 | |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 227 | func (a *AlarmManager) GenerateAlarmId() int { |
Mohamed Abukar | f5a8e71 | 2020-10-19 16:58:17 +0300 | [diff] [blame] | 228 | a.uniqueAlarmId++ // @todo: generate a unique ID |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 229 | return a.uniqueAlarmId |
| 230 | } |
| 231 | |
| 232 | func (a *AlarmManager) UpdateAlarmFields(alarmId int, newAlarm *AlarmNotification) { |
| 233 | alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem] |
| 234 | newAlarm.AlarmId = alarmId |
Mohamed Abukar | f5a8e71 | 2020-10-19 16:58:17 +0300 | [diff] [blame] | 235 | newAlarm.AlarmText = alarmDef.AlarmText |
| 236 | newAlarm.EventType = alarmDef.EventType |
| 237 | } |
| 238 | |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 239 | func (a *AlarmManager) GenerateThresholdAlarm(sp int, data string) bool { |
| 240 | thresholdAlarm := a.alarmClient.NewAlarm(sp, alarm.SeverityWarning, "threshold", data) |
| 241 | thresholdMessage := alarm.AlarmMessage{ |
| 242 | Alarm: thresholdAlarm, |
| 243 | AlarmAction: alarm.AlarmActionRaise, |
| 244 | AlarmTime: (time.Now().UnixNano()), |
| 245 | } |
vipin | 78b2b0a | 2020-10-28 10:10:18 +0000 | [diff] [blame] | 246 | alarmDef := alarm.RICAlarmDefinitions[sp] |
| 247 | alarmId := a.GenerateAlarmId() |
| 248 | alarmDef.AlarmId = alarmId |
| 249 | a.activeAlarms = append(a.activeAlarms, AlarmNotification{thresholdMessage, *alarmDef}) |
| 250 | a.alarmHistory = append(a.alarmHistory, AlarmNotification{thresholdMessage, *alarmDef}) |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 251 | |
| 252 | return true |
| 253 | } |
| 254 | |
Anssi Mannila | 18fd03c | 2020-10-29 10:01:00 +0200 | [diff] [blame^] | 255 | func (a *AlarmManager) UpdateActiveAlarmList(newAlarm *AlarmNotification) { |
vipin | 541eb50 | 2020-09-22 12:04:59 +0000 | [diff] [blame] | 256 | /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised. |
Anssi Mannila | 18fd03c | 2020-10-29 10:01:00 +0200 | [diff] [blame^] | 257 | The attempt to raise the alarm next time will be suppressed when found as duplicate. */ |
Mohamed Abukar | f5a8e71 | 2020-10-19 16:58:17 +0300 | [diff] [blame] | 258 | if (len(a.activeAlarms) >= a.maxActiveAlarms) && (a.exceededActiveAlarmOn == false) { |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 259 | app.Logger.Warn("active alarm count exceeded maxActiveAlarms threshold") |
| 260 | a.exceededActiveAlarmOn = a.GenerateThresholdAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, "active") |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 261 | } |
| 262 | |
Anssi Mannila | 18fd03c | 2020-10-29 10:01:00 +0200 | [diff] [blame^] | 263 | // @todo: For now just keep the active alarms in-memory. Use SDL later for persistence |
| 264 | a.activeAlarms = append(a.activeAlarms, *newAlarm) |
| 265 | } |
| 266 | |
| 267 | func (a *AlarmManager) UpdateAlarmHistoryList(newAlarm *AlarmNotification) { |
| 268 | /* If maximum number of events in alarm history is reached, an error log writing is made, |
| 269 | and new alarm indicating the problem is raised. The attempt to add new event time will |
| 270 | be suppressed */ |
| 271 | |
Mohamed Abukar | f5a8e71 | 2020-10-19 16:58:17 +0300 | [diff] [blame] | 272 | if (len(a.alarmHistory) >= a.maxAlarmHistory) && (a.exceededAlarmHistoryOn == false) { |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 273 | app.Logger.Warn("alarm history count exceeded maxAlarmHistory threshold") |
| 274 | a.exceededAlarmHistoryOn = a.GenerateThresholdAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, "history") |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 275 | } |
| 276 | |
Anssi Mannila | 18fd03c | 2020-10-29 10:01:00 +0200 | [diff] [blame^] | 277 | // @todo: For now just keep the alarms history in-memory. Use SDL later for persistence |
Mohamed Abukar | 0c38973 | 2020-09-17 14:47:50 +0300 | [diff] [blame] | 278 | a.alarmHistory = append(a.alarmHistory, *newAlarm) |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 279 | } |
| 280 | |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 281 | func (a *AlarmManager) PostAlarm(m *AlarmNotification) (*alert.PostAlertsOK, error) { |
| 282 | result, err := json.Marshal(m) |
| 283 | if err != nil { |
| 284 | app.Logger.Info("json.Marshal failed: %v", err) |
| 285 | return nil, err |
| 286 | } |
| 287 | |
| 288 | fullUrl := fmt.Sprintf("%s/%s", app.Config.GetString("controls.noma.host"), app.Config.GetString("controls.noma.alarmUrl")) |
| 289 | app.Logger.Info("Posting alarm to '%s'", fullUrl) |
| 290 | |
| 291 | resp, err := http.Post(fullUrl, "application/json", bytes.NewReader(result)) |
| 292 | if err != nil || resp == nil { |
| 293 | app.Logger.Info("Unable to post alarm to '%s': %v", fullUrl, err) |
| 294 | } |
| 295 | |
| 296 | return nil, err |
| 297 | } |
| 298 | |
Anssi Mannila | 2be5ac5 | 2020-09-22 12:17:05 +0300 | [diff] [blame] | 299 | func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) { |
Mohamed Abukar | af0c570 | 2020-03-11 10:29:40 +0200 | [diff] [blame] | 300 | alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem] |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 301 | amLabels := models.LabelSet{ |
Mohamed Abukar | b2f29a8 | 2020-03-17 09:31:55 +0200 | [diff] [blame] | 302 | "status": string(status), |
Mohamed Abukar | af0c570 | 2020-03-11 10:29:40 +0200 | [diff] [blame] | 303 | "alertname": alarmDef.AlarmText, |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 304 | "severity": string(newAlarm.PerceivedSeverity), |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 305 | "service": fmt.Sprintf("%s/%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId), |
| 306 | "system_name": "RIC", |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 307 | } |
| 308 | amAnnotations := models.LabelSet{ |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 309 | "alarm_id": fmt.Sprintf("%d", alarmDef.AlarmId), |
| 310 | "specific_problem": fmt.Sprintf("%d", newAlarm.SpecificProblem), |
| 311 | "event_type": alarmDef.EventType, |
| 312 | "identifying_info": newAlarm.IdentifyingInfo, |
| 313 | "additional_info": newAlarm.AdditionalInfo, |
| 314 | "description": fmt.Sprintf("%s:%s", newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo), |
| 315 | "instructions": alarmDef.OperationInstructions, |
| 316 | "timestamp": fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")), |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 317 | } |
| 318 | |
| 319 | return amLabels, amAnnotations |
| 320 | } |
| 321 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 322 | func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 323 | cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes) |
| 324 | return client.New(cr, strfmt.Default) |
| 325 | } |
| 326 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 327 | func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 328 | pa := &models.PostableAlert{ |
| 329 | Alert: models.Alert{ |
| 330 | GeneratorURL: strfmt.URI(""), |
| 331 | Labels: amLabels, |
| 332 | }, |
| 333 | Annotations: amAnnotations, |
| 334 | } |
| 335 | alertParams := alert.NewPostAlertsParams().WithAlerts(models.PostableAlerts{pa}) |
| 336 | |
Mohamed Abukar | 643241f | 2020-06-09 15:26:00 +0300 | [diff] [blame] | 337 | app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations) |
Mohamed Abukar | af0c570 | 2020-03-11 10:29:40 +0200 | [diff] [blame] | 338 | ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams) |
| 339 | if err != nil { |
| 340 | app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err) |
| 341 | } |
| 342 | return ok, err |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 343 | } |
| 344 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 345 | func (a *AlarmManager) StatusCB() bool { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 346 | if !a.rmrReady { |
| 347 | app.Logger.Info("RMR not ready yet!") |
| 348 | } |
| 349 | |
| 350 | return a.rmrReady |
Mohamed Abukar | 3e03815 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 351 | } |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 352 | |
vipin | 541eb50 | 2020-09-22 12:04:59 +0000 | [diff] [blame] | 353 | func (a *AlarmManager) ConfigChangeCB(configparam string) { |
| 354 | |
| 355 | a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms") |
| 356 | a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory") |
Anssi Mannila | 4450a89 | 2020-09-25 10:24:29 +0300 | [diff] [blame] | 357 | a.alertInterval = viper.GetInt("controls.promAlertManager.alertInterval") |
| 358 | a.amHost = viper.GetString("controls.promAlertManager.address") |
vipin | 541eb50 | 2020-09-22 12:04:59 +0000 | [diff] [blame] | 359 | |
| 360 | app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms) |
| 361 | app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory) |
Anssi Mannila | 4450a89 | 2020-09-25 10:24:29 +0300 | [diff] [blame] | 362 | app.Logger.Debug("ConfigChangeCB: alertInterval %v", a.alertInterval) |
| 363 | app.Logger.Debug("ConfigChangeCB: amHost = %v", a.amHost) |
vipin | 541eb50 | 2020-09-22 12:04:59 +0000 | [diff] [blame] | 364 | |
| 365 | return |
| 366 | } |
| 367 | |
vipin | 14323a9 | 2020-09-25 10:03:43 +0000 | [diff] [blame] | 368 | func (a *AlarmManager) ReadAlarmDefinitionFromJson() { |
| 369 | |
| 370 | filename := os.Getenv("DEF_FILE") |
| 371 | file, err := ioutil.ReadFile(filename) |
| 372 | if err == nil { |
| 373 | data := RicAlarmDefinitions{} |
| 374 | err = json.Unmarshal([]byte(file), &data) |
| 375 | if err == nil { |
| 376 | for _, alarmDefinition := range data.AlarmDefinitions { |
| 377 | _, exists := alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] |
| 378 | if exists { |
| 379 | app.Logger.Error("ReadAlarmDefinitionFromJson: alarm definition already exists for %v", alarmDefinition.AlarmId) |
| 380 | } else { |
| 381 | app.Logger.Debug("ReadAlarmDefinitionFromJson: alarm %v", alarmDefinition.AlarmId) |
| 382 | ricAlarmDefintion := new(alarm.AlarmDefinition) |
| 383 | ricAlarmDefintion.AlarmId = alarmDefinition.AlarmId |
| 384 | ricAlarmDefintion.AlarmText = alarmDefinition.AlarmText |
| 385 | ricAlarmDefintion.EventType = alarmDefinition.EventType |
| 386 | ricAlarmDefintion.OperationInstructions = alarmDefinition.OperationInstructions |
Anssi Mannila | 18fd03c | 2020-10-29 10:01:00 +0200 | [diff] [blame^] | 387 | ricAlarmDefintion.RaiseDelay = alarmDefinition.RaiseDelay |
| 388 | ricAlarmDefintion.ClearDelay = alarmDefinition.ClearDelay |
vipin | 14323a9 | 2020-09-25 10:03:43 +0000 | [diff] [blame] | 389 | alarm.RICAlarmDefinitions[alarmDefinition.AlarmId] = ricAlarmDefintion |
| 390 | } |
| 391 | } |
| 392 | } else { |
vipin | 6f73fa3 | 2020-10-06 06:51:53 +0000 | [diff] [blame] | 393 | app.Logger.Error("ReadAlarmDefinitionFromJson: json.Unmarshal failed with error %v", err) |
vipin | 14323a9 | 2020-09-25 10:03:43 +0000 | [diff] [blame] | 394 | } |
| 395 | } else { |
vipin | 6f73fa3 | 2020-10-06 06:51:53 +0000 | [diff] [blame] | 396 | app.Logger.Error("ReadAlarmDefinitionFromJson: ioutil.ReadFile failed with error %v", err) |
vipin | 14323a9 | 2020-09-25 10:03:43 +0000 | [diff] [blame] | 397 | } |
| 398 | } |
| 399 | |
vipin | 78b2b0a | 2020-10-28 10:10:18 +0000 | [diff] [blame] | 400 | func (a *AlarmManager) ReadAlarmInfoFromPersistentVolume() { |
| 401 | var alarmpersistentinfo AlarmPersistentInfo |
| 402 | byteValue, rerr := ioutil.ReadFile(a.alarmInfoPvFile) |
| 403 | if rerr != nil { |
| 404 | app.Logger.Error("ararminfo.json file read error %v", rerr) |
| 405 | } else { |
| 406 | err := json.Unmarshal(byteValue, &alarmpersistentinfo) |
| 407 | if err != nil { |
| 408 | app.Logger.Error("alarmpersistentinfo json unmarshal error %v", err) |
| 409 | } else { |
| 410 | a.uniqueAlarmId = alarmpersistentinfo.UniqueAlarmId |
| 411 | a.activeAlarms = make([]AlarmNotification, len(alarmpersistentinfo.ActiveAlarms)) |
| 412 | a.alarmHistory = make([]AlarmNotification, len(alarmpersistentinfo.AlarmHistory)) |
| 413 | copy(a.activeAlarms, alarmpersistentinfo.ActiveAlarms) |
| 414 | copy(a.alarmHistory, alarmpersistentinfo.AlarmHistory) |
| 415 | } |
| 416 | } |
| 417 | } |
| 418 | |
| 419 | func (a *AlarmManager) WriteAlarmInfoToPersistentVolume() { |
| 420 | var alarmpersistentinfo AlarmPersistentInfo |
| 421 | alarmpersistentinfo.UniqueAlarmId = a.uniqueAlarmId |
| 422 | alarmpersistentinfo.ActiveAlarms = make([]AlarmNotification, len(a.activeAlarms)) |
| 423 | alarmpersistentinfo.AlarmHistory = make([]AlarmNotification, len(a.alarmHistory)) |
| 424 | copy(alarmpersistentinfo.ActiveAlarms, a.activeAlarms) |
| 425 | copy(alarmpersistentinfo.AlarmHistory, a.alarmHistory) |
| 426 | wdata, err := json.MarshalIndent(alarmpersistentinfo, "", " ") |
| 427 | if err != nil { |
| 428 | app.Logger.Error("alarmpersistentinfo json marshal error %v", err) |
| 429 | } else { |
| 430 | werr := ioutil.WriteFile(a.alarmInfoPvFile, wdata, 0777) |
| 431 | if werr != nil { |
| 432 | app.Logger.Error("alarminfo.json file write error %v", werr) |
| 433 | } |
| 434 | } |
| 435 | } |
| 436 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 437 | func (a *AlarmManager) Run(sdlcheck bool) { |
| 438 | app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash)) |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 439 | app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true) |
| 440 | app.Resource.InjectStatusCb(a.StatusCB) |
vipin | 541eb50 | 2020-09-22 12:04:59 +0000 | [diff] [blame] | 441 | app.AddConfigChangeListener(a.ConfigChangeCB) |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 442 | |
vipin | 54a3a4f | 2020-09-23 12:19:58 +0000 | [diff] [blame] | 443 | alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition) |
vipin | 14323a9 | 2020-09-25 10:03:43 +0000 | [diff] [blame] | 444 | a.ReadAlarmDefinitionFromJson() |
vipin | 54a3a4f | 2020-09-23 12:19:58 +0000 | [diff] [blame] | 445 | |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 446 | app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST") |
| 447 | app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE") |
| 448 | app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET") |
| 449 | app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET") |
vipin | 541eb50 | 2020-09-22 12:04:59 +0000 | [diff] [blame] | 450 | app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST") |
| 451 | app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET") |
vipin | 54a3a4f | 2020-09-23 12:19:58 +0000 | [diff] [blame] | 452 | app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST") |
| 453 | app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE") |
| 454 | app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET") |
vipin | 4cedd50 | 2020-09-25 05:58:31 +0000 | [diff] [blame] | 455 | app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.GetAlarmDefinition, "GET") |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 456 | |
| 457 | // Start background timer for re-raising alerts |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 458 | go a.StartAlertTimer() |
vipin | 541eb50 | 2020-09-22 12:04:59 +0000 | [diff] [blame] | 459 | a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER") |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 460 | |
vipin | 78b2b0a | 2020-10-28 10:10:18 +0000 | [diff] [blame] | 461 | a.ReadAlarmInfoFromPersistentVolume() |
| 462 | |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 463 | app.RunWithParams(a, sdlcheck) |
| 464 | } |
| 465 | |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 466 | func NewAlarmManager(amHost string, alertInterval int, clearAlarm bool) *AlarmManager { |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 467 | if alertInterval == 0 { |
| 468 | alertInterval = viper.GetInt("controls.promAlertManager.alertInterval") |
| 469 | } |
| 470 | |
| 471 | if amHost == "" { |
| 472 | amHost = viper.GetString("controls.promAlertManager.address") |
| 473 | } |
| 474 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 475 | return &AlarmManager{ |
Mohamed Abukar | f5a8e71 | 2020-10-19 16:58:17 +0300 | [diff] [blame] | 476 | rmrReady: false, |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 477 | postClear: clearAlarm, |
Mohamed Abukar | f5a8e71 | 2020-10-19 16:58:17 +0300 | [diff] [blame] | 478 | amHost: amHost, |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 479 | amBaseUrl: app.Config.GetString("controls.promAlertManager.baseUrl"), |
| 480 | amSchemes: []string{app.Config.GetString("controls.promAlertManager.schemes")}, |
Mohamed Abukar | f5a8e71 | 2020-10-19 16:58:17 +0300 | [diff] [blame] | 481 | alertInterval: alertInterval, |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 482 | activeAlarms: make([]AlarmNotification, 0), |
| 483 | alarmHistory: make([]AlarmNotification, 0), |
| 484 | uniqueAlarmId: 0, |
Mohamed Abukar | f5a8e71 | 2020-10-19 16:58:17 +0300 | [diff] [blame] | 485 | maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"), |
| 486 | maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"), |
vipin | 6f73fa3 | 2020-10-06 06:51:53 +0000 | [diff] [blame] | 487 | exceededActiveAlarmOn: false, |
| 488 | exceededAlarmHistoryOn: false, |
vipin | 78b2b0a | 2020-10-28 10:10:18 +0000 | [diff] [blame] | 489 | alarmInfoPvFile: app.Config.GetString("controls.alarmInfoPvFile"), |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 490 | } |
| 491 | } |
| 492 | |
| 493 | // Main function |
| 494 | func main() { |
Mohamed Abukar | 105030f | 2020-10-22 18:08:34 +0300 | [diff] [blame] | 495 | NewAlarmManager("", 0, true).Run(true) |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 496 | } |