Mohamed Abukar | 3e03815 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2020 AT&T Intellectual Property. |
| 3 | * Copyright (c) 2020 Nokia. |
| 4 | * |
| 5 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | * you may not use this file except in compliance with the License. |
| 7 | * You may obtain a copy of the License at |
| 8 | * |
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | * |
| 11 | * Unless required by applicable law or agreed to in writing, software |
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | * See the License for the specific language governing permissions and |
| 15 | * limitations under the License. |
| 16 | * |
| 17 | * This source code is part of the near-RT RIC (RAN Intelligent Controller) |
| 18 | * platform project (RICP). |
| 19 | */ |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 20 | |
Mohamed Abukar | 3e03815 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 21 | package main |
| 22 | |
| 23 | import ( |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 24 | "encoding/json" |
| 25 | "fmt" |
Mohamed Abukar | b2f29a8 | 2020-03-17 09:31:55 +0200 | [diff] [blame] | 26 | "time" |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 27 | |
| 28 | clientruntime "github.com/go-openapi/runtime/client" |
| 29 | "github.com/go-openapi/strfmt" |
| 30 | "github.com/prometheus/alertmanager/api/v2/client" |
| 31 | "github.com/prometheus/alertmanager/api/v2/client/alert" |
| 32 | "github.com/prometheus/alertmanager/api/v2/models" |
| 33 | "github.com/spf13/viper" |
| 34 | |
| 35 | "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm" |
| 36 | app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp" |
Mohamed Abukar | 3e03815 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 37 | ) |
| 38 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 39 | func (a *AlarmManager) StartAlertTimer() { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 40 | tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond) |
| 41 | for range tick { |
Mohamed Abukar | af0c570 | 2020-03-11 10:29:40 +0200 | [diff] [blame] | 42 | a.mutex.Lock() |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 43 | for _, m := range a.activeAlarms { |
| 44 | app.Logger.Info("Re-raising alarm: %v", m) |
Anssi Mannila | 2be5ac5 | 2020-09-22 12:17:05 +0300 | [diff] [blame^] | 45 | a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime)) |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 46 | } |
Mohamed Abukar | af0c570 | 2020-03-11 10:29:40 +0200 | [diff] [blame] | 47 | a.mutex.Unlock() |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 48 | } |
| 49 | } |
| 50 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 51 | func (a *AlarmManager) Consume(rp *app.RMRParams) (err error) { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 52 | app.Logger.Info("Message received!") |
| 53 | |
| 54 | defer app.Rmr.Free(rp.Mbuf) |
| 55 | switch rp.Mtype { |
| 56 | case alarm.RIC_ALARM_UPDATE: |
| 57 | a.HandleAlarms(rp) |
| 58 | default: |
| 59 | app.Logger.Info("Unknown Message Type '%d', discarding", rp.Mtype) |
| 60 | } |
| 61 | |
| 62 | return nil |
| 63 | } |
| 64 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 65 | func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, error) { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 66 | var m alarm.AlarmMessage |
Lott, Christopher (cl778h) | 3e8e2aa | 2020-06-03 08:52:14 -0400 | [diff] [blame] | 67 | app.Logger.Info("Received JSON: %s", rp.Payload) |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 68 | if err := json.Unmarshal(rp.Payload, &m); err != nil { |
| 69 | app.Logger.Error("json.Unmarshal failed: %v", err) |
| 70 | return nil, err |
| 71 | } |
| 72 | app.Logger.Info("newAlarm: %v", m) |
| 73 | |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 74 | return a.ProcessAlarm(&m) |
| 75 | } |
| 76 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 77 | func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, error) { |
Mohamed Abukar | af0c570 | 2020-03-11 10:29:40 +0200 | [diff] [blame] | 78 | if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok { |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 79 | app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem) |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 80 | return nil, nil |
| 81 | } |
| 82 | |
| 83 | // Suppress duplicate alarms |
| 84 | idx, found := a.IsMatchFound(m.Alarm) |
| 85 | if found && m.AlarmAction != alarm.AlarmActionClear { |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 86 | app.Logger.Info("Duplicate alarm found, suppressing ...") |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 87 | return nil, nil |
| 88 | } |
| 89 | |
| 90 | // Clear alarm if found from active alarm list |
| 91 | if m.AlarmAction == alarm.AlarmActionClear { |
| 92 | if found { |
Mohamed Abukar | 3873c22 | 2020-09-21 09:54:57 +0300 | [diff] [blame] | 93 | a.alarmHistory = append(a.alarmHistory, *m) |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 94 | a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active") |
Mohamed Abukar | b2f29a8 | 2020-03-17 09:31:55 +0200 | [diff] [blame] | 95 | |
| 96 | if a.postClear { |
Anssi Mannila | 2be5ac5 | 2020-09-22 12:17:05 +0300 | [diff] [blame^] | 97 | return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime)) |
Mohamed Abukar | b2f29a8 | 2020-03-17 09:31:55 +0200 | [diff] [blame] | 98 | } |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 99 | } |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 100 | app.Logger.Info("No matching active alarm found, suppressing ...") |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 101 | return nil, nil |
| 102 | } |
| 103 | |
| 104 | // New alarm -> update active alarms and post to Alert Manager |
| 105 | if m.AlarmAction == alarm.AlarmActionRaise { |
Mohamed Abukar | 0c38973 | 2020-09-17 14:47:50 +0300 | [diff] [blame] | 106 | a.UpdateAlarmLists(m) |
Anssi Mannila | 2be5ac5 | 2020-09-22 12:17:05 +0300 | [diff] [blame^] | 107 | return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime)) |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 108 | } |
| 109 | |
| 110 | return nil, nil |
| 111 | } |
| 112 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 113 | func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 114 | for i, m := range a.activeAlarms { |
| 115 | if m.ManagedObjectId == newAlarm.ManagedObjectId && m.ApplicationId == newAlarm.ApplicationId && |
Mohamed Abukar | 0c38973 | 2020-09-17 14:47:50 +0300 | [diff] [blame] | 116 | m.SpecificProblem == newAlarm.SpecificProblem && m.IdentifyingInfo == newAlarm.IdentifyingInfo { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 117 | return i, true |
| 118 | } |
| 119 | } |
| 120 | return -1, false |
| 121 | } |
| 122 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 123 | func (a *AlarmManager) RemoveAlarm(alarms []alarm.AlarmMessage, i int, listName string) []alarm.AlarmMessage { |
Mohamed Abukar | af0c570 | 2020-03-11 10:29:40 +0200 | [diff] [blame] | 124 | a.mutex.Lock() |
| 125 | defer a.mutex.Unlock() |
| 126 | |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 127 | app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName) |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 128 | copy(alarms[i:], alarms[i+1:]) |
| 129 | return alarms[:len(alarms)-1] |
| 130 | } |
| 131 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 132 | func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) { |
Mohamed Abukar | af0c570 | 2020-03-11 10:29:40 +0200 | [diff] [blame] | 133 | a.mutex.Lock() |
| 134 | defer a.mutex.Unlock() |
| 135 | |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 136 | // If maximum number of active alarms is reached, purge the oldest alarm |
| 137 | if len(a.activeAlarms) >= viper.GetInt("controls.maxActiveAlarms") { |
| 138 | a.activeAlarms = a.RemoveAlarm(a.activeAlarms, 0, "active") |
| 139 | } |
| 140 | |
| 141 | if len(a.alarmHistory) >= viper.GetInt("controls.maxAlarmHistory") { |
| 142 | a.alarmHistory = a.RemoveAlarm(a.alarmHistory, 0, "history") |
| 143 | } |
| 144 | |
| 145 | // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence |
Mohamed Abukar | 0c38973 | 2020-09-17 14:47:50 +0300 | [diff] [blame] | 146 | a.activeAlarms = append(a.activeAlarms, *newAlarm) |
| 147 | a.alarmHistory = append(a.alarmHistory, *newAlarm) |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 148 | } |
| 149 | |
Anssi Mannila | 2be5ac5 | 2020-09-22 12:17:05 +0300 | [diff] [blame^] | 150 | func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) { |
Mohamed Abukar | af0c570 | 2020-03-11 10:29:40 +0200 | [diff] [blame] | 151 | alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem] |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 152 | amLabels := models.LabelSet{ |
Mohamed Abukar | b2f29a8 | 2020-03-17 09:31:55 +0200 | [diff] [blame] | 153 | "status": string(status), |
Mohamed Abukar | af0c570 | 2020-03-11 10:29:40 +0200 | [diff] [blame] | 154 | "alertname": alarmDef.AlarmText, |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 155 | "severity": string(newAlarm.PerceivedSeverity), |
| 156 | "service": fmt.Sprintf("%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId), |
Mohamed Abukar | 643241f | 2020-06-09 15:26:00 +0300 | [diff] [blame] | 157 | "system_name": fmt.Sprintf("RIC:%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId), |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 158 | } |
| 159 | amAnnotations := models.LabelSet{ |
Mohamed Abukar | 643241f | 2020-06-09 15:26:00 +0300 | [diff] [blame] | 160 | "alarm_id": fmt.Sprintf("%d", alarmDef.AlarmId), |
| 161 | "description": fmt.Sprintf("%d:%s:%s", newAlarm.SpecificProblem, newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo), |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 162 | "additional_info": newAlarm.AdditionalInfo, |
Mohamed Abukar | af0c570 | 2020-03-11 10:29:40 +0200 | [diff] [blame] | 163 | "summary": alarmDef.EventType, |
| 164 | "instructions": alarmDef.OperationInstructions, |
Anssi Mannila | 2be5ac5 | 2020-09-22 12:17:05 +0300 | [diff] [blame^] | 165 | "timestamp": fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")), |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 166 | } |
| 167 | |
| 168 | return amLabels, amAnnotations |
| 169 | } |
| 170 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 171 | func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 172 | cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes) |
| 173 | return client.New(cr, strfmt.Default) |
| 174 | } |
| 175 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 176 | func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 177 | pa := &models.PostableAlert{ |
| 178 | Alert: models.Alert{ |
| 179 | GeneratorURL: strfmt.URI(""), |
| 180 | Labels: amLabels, |
| 181 | }, |
| 182 | Annotations: amAnnotations, |
| 183 | } |
| 184 | alertParams := alert.NewPostAlertsParams().WithAlerts(models.PostableAlerts{pa}) |
| 185 | |
Mohamed Abukar | 643241f | 2020-06-09 15:26:00 +0300 | [diff] [blame] | 186 | app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations) |
Mohamed Abukar | af0c570 | 2020-03-11 10:29:40 +0200 | [diff] [blame] | 187 | ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams) |
| 188 | if err != nil { |
| 189 | app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err) |
| 190 | } |
| 191 | return ok, err |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 192 | } |
| 193 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 194 | func (a *AlarmManager) StatusCB() bool { |
Mohamed Abukar | 4e7e712 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 195 | if !a.rmrReady { |
| 196 | app.Logger.Info("RMR not ready yet!") |
| 197 | } |
| 198 | |
| 199 | return a.rmrReady |
Mohamed Abukar | 3e03815 | 2020-03-04 10:01:45 +0200 | [diff] [blame] | 200 | } |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 201 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 202 | func (a *AlarmManager) Run(sdlcheck bool) { |
| 203 | app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash)) |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 204 | app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true) |
| 205 | app.Resource.InjectStatusCb(a.StatusCB) |
| 206 | |
| 207 | app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST") |
| 208 | app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE") |
| 209 | app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET") |
| 210 | app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET") |
| 211 | |
| 212 | // Start background timer for re-raising alerts |
| 213 | a.postClear = sdlcheck |
| 214 | go a.StartAlertTimer() |
| 215 | |
| 216 | app.RunWithParams(a, sdlcheck) |
| 217 | } |
| 218 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 219 | func NewAlarmManager(amHost string, alertInterval int) *AlarmManager { |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 220 | if alertInterval == 0 { |
| 221 | alertInterval = viper.GetInt("controls.promAlertManager.alertInterval") |
| 222 | } |
| 223 | |
| 224 | if amHost == "" { |
| 225 | amHost = viper.GetString("controls.promAlertManager.address") |
| 226 | } |
| 227 | |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 228 | return &AlarmManager{ |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 229 | rmrReady: false, |
| 230 | amHost: amHost, |
| 231 | amBaseUrl: viper.GetString("controls.promAlertManager.baseUrl"), |
| 232 | amSchemes: []string{viper.GetString("controls.promAlertManager.schemes")}, |
| 233 | alertInterval: alertInterval, |
Mohamed Abukar | 0c38973 | 2020-09-17 14:47:50 +0300 | [diff] [blame] | 234 | activeAlarms: make([]alarm.AlarmMessage, 0), |
| 235 | alarmHistory: make([]alarm.AlarmMessage, 0), |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 236 | } |
| 237 | } |
| 238 | |
| 239 | // Main function |
| 240 | func main() { |
Abukar Mohamed | 121e8b6 | 2020-09-18 11:41:33 +0000 | [diff] [blame] | 241 | NewAlarmManager("", 0).Run(true) |
Mohamed Abukar | 540ceee | 2020-09-09 08:07:40 +0300 | [diff] [blame] | 242 | } |