blob: 8b4ac748337c598a14c132fd67f81c23ff66420a [file] [log] [blame]
Mohamed Abukar3e038152020-03-04 10:01:45 +02001/*
2 * Copyright (c) 2020 AT&T Intellectual Property.
3 * Copyright (c) 2020 Nokia.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 * This source code is part of the near-RT RIC (RAN Intelligent Controller)
18 * platform project (RICP).
19 */
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020020
Mohamed Abukar3e038152020-03-04 10:01:45 +020021package main
22
23import (
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020024 "encoding/json"
25 "fmt"
Mohamed Abukarb2f29a82020-03-17 09:31:55 +020026 "time"
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020027
28 clientruntime "github.com/go-openapi/runtime/client"
29 "github.com/go-openapi/strfmt"
30 "github.com/prometheus/alertmanager/api/v2/client"
31 "github.com/prometheus/alertmanager/api/v2/client/alert"
32 "github.com/prometheus/alertmanager/api/v2/models"
33 "github.com/spf13/viper"
34
35 "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
36 app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
Mohamed Abukar3e038152020-03-04 10:01:45 +020037)
38
Abukar Mohamed121e8b62020-09-18 11:41:33 +000039func (a *AlarmManager) StartAlertTimer() {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020040 tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
41 for range tick {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +020042 a.mutex.Lock()
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020043 for _, m := range a.activeAlarms {
44 app.Logger.Info("Re-raising alarm: %v", m)
Anssi Mannila2be5ac52020-09-22 12:17:05 +030045 a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020046 }
Mohamed Abukaraf0c5702020-03-11 10:29:40 +020047 a.mutex.Unlock()
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020048 }
49}
50
Abukar Mohamed121e8b62020-09-18 11:41:33 +000051func (a *AlarmManager) Consume(rp *app.RMRParams) (err error) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020052 app.Logger.Info("Message received!")
53
54 defer app.Rmr.Free(rp.Mbuf)
55 switch rp.Mtype {
56 case alarm.RIC_ALARM_UPDATE:
57 a.HandleAlarms(rp)
58 default:
59 app.Logger.Info("Unknown Message Type '%d', discarding", rp.Mtype)
60 }
61
62 return nil
63}
64
Abukar Mohamed121e8b62020-09-18 11:41:33 +000065func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, error) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020066 var m alarm.AlarmMessage
Lott, Christopher (cl778h)3e8e2aa2020-06-03 08:52:14 -040067 app.Logger.Info("Received JSON: %s", rp.Payload)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020068 if err := json.Unmarshal(rp.Payload, &m); err != nil {
69 app.Logger.Error("json.Unmarshal failed: %v", err)
70 return nil, err
71 }
72 app.Logger.Info("newAlarm: %v", m)
73
Mohamed Abukar540ceee2020-09-09 08:07:40 +030074 return a.ProcessAlarm(&m)
75}
76
Abukar Mohamed121e8b62020-09-18 11:41:33 +000077func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, error) {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +020078 if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
Mohamed Abukar540ceee2020-09-09 08:07:40 +030079 app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020080 return nil, nil
81 }
82
83 // Suppress duplicate alarms
84 idx, found := a.IsMatchFound(m.Alarm)
Anssi Mannilafe07bd12020-09-24 14:02:57 +030085 if found && m.AlarmAction == alarm.AlarmActionRaise {
Mohamed Abukar540ceee2020-09-09 08:07:40 +030086 app.Logger.Info("Duplicate alarm found, suppressing ...")
Anssi Mannilafe07bd12020-09-24 14:02:57 +030087 if m.PerceivedSeverity == a.activeAlarms[idx].PerceivedSeverity {
88 // Duplicate with same severity found
89 return nil, nil
90 } else {
91 // Remove duplicate with different severity
92 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
93 }
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020094 }
95
Anssi Mannilafe07bd12020-09-24 14:02:57 +030096
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020097 // Clear alarm if found from active alarm list
98 if m.AlarmAction == alarm.AlarmActionClear {
99 if found {
Mohamed Abukar3873c222020-09-21 09:54:57 +0300100 a.alarmHistory = append(a.alarmHistory, *m)
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300101 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
Mohamed Abukarb2f29a82020-03-17 09:31:55 +0200102
103 if a.postClear {
Anssi Mannila2be5ac52020-09-22 12:17:05 +0300104 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
Mohamed Abukarb2f29a82020-03-17 09:31:55 +0200105 }
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200106 }
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300107 app.Logger.Info("No matching active alarm found, suppressing ...")
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200108 return nil, nil
109 }
110
111 // New alarm -> update active alarms and post to Alert Manager
112 if m.AlarmAction == alarm.AlarmActionRaise {
Mohamed Abukar0c389732020-09-17 14:47:50 +0300113 a.UpdateAlarmLists(m)
Anssi Mannila2be5ac52020-09-22 12:17:05 +0300114 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200115 }
116
117 return nil, nil
118}
119
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000120func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200121 for i, m := range a.activeAlarms {
122 if m.ManagedObjectId == newAlarm.ManagedObjectId && m.ApplicationId == newAlarm.ApplicationId &&
Mohamed Abukar0c389732020-09-17 14:47:50 +0300123 m.SpecificProblem == newAlarm.SpecificProblem && m.IdentifyingInfo == newAlarm.IdentifyingInfo {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200124 return i, true
125 }
126 }
127 return -1, false
128}
129
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000130func (a *AlarmManager) RemoveAlarm(alarms []alarm.AlarmMessage, i int, listName string) []alarm.AlarmMessage {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200131 a.mutex.Lock()
132 defer a.mutex.Unlock()
133
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300134 app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200135 copy(alarms[i:], alarms[i+1:])
136 return alarms[:len(alarms)-1]
137}
138
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000139func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200140 a.mutex.Lock()
141 defer a.mutex.Unlock()
142
vipin541eb502020-09-22 12:04:59 +0000143 /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
144 The attempt to raise the alarm next time will be supressed when found as duplicate. */
145 if len(a.activeAlarms) >= a.maxActiveAlarms {
146 app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold")
147 actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "active alarms full")
148 actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
149 a.activeAlarms = append(a.activeAlarms, actAlarmMessage)
150 a.alarmHistory = append(a.alarmHistory, actAlarmMessage)
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300151 }
152
vipin541eb502020-09-22 12:04:59 +0000153 if len(a.alarmHistory) >= a.maxAlarmHistory {
154 app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
155 histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "alarm history full")
156 histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
157 a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
158 a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300159 }
160
161 // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
Mohamed Abukar0c389732020-09-17 14:47:50 +0300162 a.activeAlarms = append(a.activeAlarms, *newAlarm)
163 a.alarmHistory = append(a.alarmHistory, *newAlarm)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200164}
165
Anssi Mannila2be5ac52020-09-22 12:17:05 +0300166func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200167 alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200168 amLabels := models.LabelSet{
Mohamed Abukarb2f29a82020-03-17 09:31:55 +0200169 "status": string(status),
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200170 "alertname": alarmDef.AlarmText,
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200171 "severity": string(newAlarm.PerceivedSeverity),
172 "service": fmt.Sprintf("%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
Mohamed Abukar643241f2020-06-09 15:26:00 +0300173 "system_name": fmt.Sprintf("RIC:%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200174 }
175 amAnnotations := models.LabelSet{
Mohamed Abukar643241f2020-06-09 15:26:00 +0300176 "alarm_id": fmt.Sprintf("%d", alarmDef.AlarmId),
177 "description": fmt.Sprintf("%d:%s:%s", newAlarm.SpecificProblem, newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo),
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200178 "additional_info": newAlarm.AdditionalInfo,
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200179 "summary": alarmDef.EventType,
180 "instructions": alarmDef.OperationInstructions,
Anssi Mannila2be5ac52020-09-22 12:17:05 +0300181 "timestamp": fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200182 }
183
184 return amLabels, amAnnotations
185}
186
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000187func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200188 cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes)
189 return client.New(cr, strfmt.Default)
190}
191
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000192func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200193 pa := &models.PostableAlert{
194 Alert: models.Alert{
195 GeneratorURL: strfmt.URI(""),
196 Labels: amLabels,
197 },
198 Annotations: amAnnotations,
199 }
200 alertParams := alert.NewPostAlertsParams().WithAlerts(models.PostableAlerts{pa})
201
Mohamed Abukar643241f2020-06-09 15:26:00 +0300202 app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations)
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200203 ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams)
204 if err != nil {
205 app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err)
206 }
207 return ok, err
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200208}
209
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000210func (a *AlarmManager) StatusCB() bool {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200211 if !a.rmrReady {
212 app.Logger.Info("RMR not ready yet!")
213 }
214
215 return a.rmrReady
Mohamed Abukar3e038152020-03-04 10:01:45 +0200216}
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300217
vipin541eb502020-09-22 12:04:59 +0000218func (a *AlarmManager) ConfigChangeCB(configparam string) {
219
220 a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
221 a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
222
223 app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
224 app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
225
226 return
227}
228
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000229func (a *AlarmManager) Run(sdlcheck bool) {
230 app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300231 app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
232 app.Resource.InjectStatusCb(a.StatusCB)
vipin541eb502020-09-22 12:04:59 +0000233 app.AddConfigChangeListener(a.ConfigChangeCB)
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300234
vipin54a3a4f2020-09-23 12:19:58 +0000235 alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
236
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300237 app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
238 app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
239 app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
240 app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
vipin541eb502020-09-22 12:04:59 +0000241 app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
242 app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
vipin54a3a4f2020-09-23 12:19:58 +0000243 app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
244 app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
245 app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300246
247 // Start background timer for re-raising alerts
248 a.postClear = sdlcheck
249 go a.StartAlertTimer()
vipin541eb502020-09-22 12:04:59 +0000250 a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300251
252 app.RunWithParams(a, sdlcheck)
253}
254
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000255func NewAlarmManager(amHost string, alertInterval int) *AlarmManager {
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300256 if alertInterval == 0 {
257 alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
258 }
259
260 if amHost == "" {
261 amHost = viper.GetString("controls.promAlertManager.address")
262 }
263
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000264 return &AlarmManager{
vipin541eb502020-09-22 12:04:59 +0000265 rmrReady: false,
266 amHost: amHost,
267 amBaseUrl: viper.GetString("controls.promAlertManager.baseUrl"),
268 amSchemes: []string{viper.GetString("controls.promAlertManager.schemes")},
269 alertInterval: alertInterval,
270 activeAlarms: make([]alarm.AlarmMessage, 0),
271 alarmHistory: make([]alarm.AlarmMessage, 0),
272 maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
273 maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300274 }
275}
276
277// Main function
278func main() {
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000279 NewAlarmManager("", 0).Run(true)
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300280}