blob: 397cd3e472e8bc12abf2f366d85bfb1496912eb8 [file] [log] [blame]
Mohamed Abukar3e038152020-03-04 10:01:45 +02001/*
2 * Copyright (c) 2020 AT&T Intellectual Property.
3 * Copyright (c) 2020 Nokia.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 * This source code is part of the near-RT RIC (RAN Intelligent Controller)
18 * platform project (RICP).
19 */
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020020
Mohamed Abukar3e038152020-03-04 10:01:45 +020021package main
22
23import (
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020024 "encoding/json"
25 "fmt"
Mohamed Abukarb2f29a82020-03-17 09:31:55 +020026 "time"
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020027
28 clientruntime "github.com/go-openapi/runtime/client"
29 "github.com/go-openapi/strfmt"
30 "github.com/prometheus/alertmanager/api/v2/client"
31 "github.com/prometheus/alertmanager/api/v2/client/alert"
32 "github.com/prometheus/alertmanager/api/v2/models"
33 "github.com/spf13/viper"
34
35 "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
36 app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
Mohamed Abukar3e038152020-03-04 10:01:45 +020037)
38
Abukar Mohamed121e8b62020-09-18 11:41:33 +000039func (a *AlarmManager) StartAlertTimer() {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020040 tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
41 for range tick {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +020042 a.mutex.Lock()
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020043 for _, m := range a.activeAlarms {
44 app.Logger.Info("Re-raising alarm: %v", m)
Anssi Mannila2be5ac52020-09-22 12:17:05 +030045 a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020046 }
Mohamed Abukaraf0c5702020-03-11 10:29:40 +020047 a.mutex.Unlock()
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020048 }
49}
50
Abukar Mohamed121e8b62020-09-18 11:41:33 +000051func (a *AlarmManager) Consume(rp *app.RMRParams) (err error) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020052 app.Logger.Info("Message received!")
53
54 defer app.Rmr.Free(rp.Mbuf)
55 switch rp.Mtype {
56 case alarm.RIC_ALARM_UPDATE:
57 a.HandleAlarms(rp)
58 default:
59 app.Logger.Info("Unknown Message Type '%d', discarding", rp.Mtype)
60 }
61
62 return nil
63}
64
Abukar Mohamed121e8b62020-09-18 11:41:33 +000065func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, error) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020066 var m alarm.AlarmMessage
Lott, Christopher (cl778h)3e8e2aa2020-06-03 08:52:14 -040067 app.Logger.Info("Received JSON: %s", rp.Payload)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020068 if err := json.Unmarshal(rp.Payload, &m); err != nil {
69 app.Logger.Error("json.Unmarshal failed: %v", err)
70 return nil, err
71 }
72 app.Logger.Info("newAlarm: %v", m)
73
Mohamed Abukar540ceee2020-09-09 08:07:40 +030074 return a.ProcessAlarm(&m)
75}
76
Abukar Mohamed121e8b62020-09-18 11:41:33 +000077func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, error) {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +020078 if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
Mohamed Abukar540ceee2020-09-09 08:07:40 +030079 app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020080 return nil, nil
81 }
82
83 // Suppress duplicate alarms
84 idx, found := a.IsMatchFound(m.Alarm)
85 if found && m.AlarmAction != alarm.AlarmActionClear {
Mohamed Abukar540ceee2020-09-09 08:07:40 +030086 app.Logger.Info("Duplicate alarm found, suppressing ...")
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020087 return nil, nil
88 }
89
90 // Clear alarm if found from active alarm list
91 if m.AlarmAction == alarm.AlarmActionClear {
92 if found {
Mohamed Abukar3873c222020-09-21 09:54:57 +030093 a.alarmHistory = append(a.alarmHistory, *m)
Mohamed Abukar540ceee2020-09-09 08:07:40 +030094 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
Mohamed Abukarb2f29a82020-03-17 09:31:55 +020095
96 if a.postClear {
Anssi Mannila2be5ac52020-09-22 12:17:05 +030097 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
Mohamed Abukarb2f29a82020-03-17 09:31:55 +020098 }
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020099 }
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300100 app.Logger.Info("No matching active alarm found, suppressing ...")
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200101 return nil, nil
102 }
103
104 // New alarm -> update active alarms and post to Alert Manager
105 if m.AlarmAction == alarm.AlarmActionRaise {
Mohamed Abukar0c389732020-09-17 14:47:50 +0300106 a.UpdateAlarmLists(m)
Anssi Mannila2be5ac52020-09-22 12:17:05 +0300107 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200108 }
109
110 return nil, nil
111}
112
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000113func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200114 for i, m := range a.activeAlarms {
115 if m.ManagedObjectId == newAlarm.ManagedObjectId && m.ApplicationId == newAlarm.ApplicationId &&
Mohamed Abukar0c389732020-09-17 14:47:50 +0300116 m.SpecificProblem == newAlarm.SpecificProblem && m.IdentifyingInfo == newAlarm.IdentifyingInfo {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200117 return i, true
118 }
119 }
120 return -1, false
121}
122
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000123func (a *AlarmManager) RemoveAlarm(alarms []alarm.AlarmMessage, i int, listName string) []alarm.AlarmMessage {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200124 a.mutex.Lock()
125 defer a.mutex.Unlock()
126
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300127 app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200128 copy(alarms[i:], alarms[i+1:])
129 return alarms[:len(alarms)-1]
130}
131
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000132func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200133 a.mutex.Lock()
134 defer a.mutex.Unlock()
135
vipin541eb502020-09-22 12:04:59 +0000136 /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
137 The attempt to raise the alarm next time will be supressed when found as duplicate. */
138 if len(a.activeAlarms) >= a.maxActiveAlarms {
139 app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold")
140 actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "active alarms full")
141 actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
142 a.activeAlarms = append(a.activeAlarms, actAlarmMessage)
143 a.alarmHistory = append(a.alarmHistory, actAlarmMessage)
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300144 }
145
vipin541eb502020-09-22 12:04:59 +0000146 if len(a.alarmHistory) >= a.maxAlarmHistory {
147 app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
148 histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "alarm history full")
149 histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
150 a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
151 a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300152 }
153
154 // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
Mohamed Abukar0c389732020-09-17 14:47:50 +0300155 a.activeAlarms = append(a.activeAlarms, *newAlarm)
156 a.alarmHistory = append(a.alarmHistory, *newAlarm)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200157}
158
Anssi Mannila2be5ac52020-09-22 12:17:05 +0300159func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200160 alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200161 amLabels := models.LabelSet{
Mohamed Abukarb2f29a82020-03-17 09:31:55 +0200162 "status": string(status),
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200163 "alertname": alarmDef.AlarmText,
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200164 "severity": string(newAlarm.PerceivedSeverity),
165 "service": fmt.Sprintf("%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
Mohamed Abukar643241f2020-06-09 15:26:00 +0300166 "system_name": fmt.Sprintf("RIC:%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200167 }
168 amAnnotations := models.LabelSet{
Mohamed Abukar643241f2020-06-09 15:26:00 +0300169 "alarm_id": fmt.Sprintf("%d", alarmDef.AlarmId),
170 "description": fmt.Sprintf("%d:%s:%s", newAlarm.SpecificProblem, newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo),
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200171 "additional_info": newAlarm.AdditionalInfo,
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200172 "summary": alarmDef.EventType,
173 "instructions": alarmDef.OperationInstructions,
Anssi Mannila2be5ac52020-09-22 12:17:05 +0300174 "timestamp": fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200175 }
176
177 return amLabels, amAnnotations
178}
179
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000180func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200181 cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes)
182 return client.New(cr, strfmt.Default)
183}
184
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000185func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200186 pa := &models.PostableAlert{
187 Alert: models.Alert{
188 GeneratorURL: strfmt.URI(""),
189 Labels: amLabels,
190 },
191 Annotations: amAnnotations,
192 }
193 alertParams := alert.NewPostAlertsParams().WithAlerts(models.PostableAlerts{pa})
194
Mohamed Abukar643241f2020-06-09 15:26:00 +0300195 app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations)
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200196 ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams)
197 if err != nil {
198 app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err)
199 }
200 return ok, err
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200201}
202
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000203func (a *AlarmManager) StatusCB() bool {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200204 if !a.rmrReady {
205 app.Logger.Info("RMR not ready yet!")
206 }
207
208 return a.rmrReady
Mohamed Abukar3e038152020-03-04 10:01:45 +0200209}
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300210
vipin541eb502020-09-22 12:04:59 +0000211func (a *AlarmManager) ConfigChangeCB(configparam string) {
212
213 a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
214 a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
215
216 app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
217 app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
218
219 return
220}
221
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000222func (a *AlarmManager) Run(sdlcheck bool) {
223 app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300224 app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
225 app.Resource.InjectStatusCb(a.StatusCB)
vipin541eb502020-09-22 12:04:59 +0000226 app.AddConfigChangeListener(a.ConfigChangeCB)
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300227
vipin54a3a4f2020-09-23 12:19:58 +0000228 alarm.RICAlarmDefinitions = make(map[int]*alarm.AlarmDefinition)
229
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300230 app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
231 app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
232 app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
233 app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
vipin541eb502020-09-22 12:04:59 +0000234 app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
235 app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
vipin54a3a4f2020-09-23 12:19:58 +0000236 app.Resource.InjectRoute("/ric/v1/alarms/define", a.SetAlarmDefinition, "POST")
237 app.Resource.InjectRoute("/ric/v1/alarms/define/{alarmId}", a.DeleteAlarmDefinition, "DELETE")
238 app.Resource.InjectRoute("/ric/v1/alarms/define", a.GetAlarmDefinition, "GET")
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300239
240 // Start background timer for re-raising alerts
241 a.postClear = sdlcheck
242 go a.StartAlertTimer()
vipin541eb502020-09-22 12:04:59 +0000243 a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300244
245 app.RunWithParams(a, sdlcheck)
246}
247
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000248func NewAlarmManager(amHost string, alertInterval int) *AlarmManager {
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300249 if alertInterval == 0 {
250 alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
251 }
252
253 if amHost == "" {
254 amHost = viper.GetString("controls.promAlertManager.address")
255 }
256
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000257 return &AlarmManager{
vipin541eb502020-09-22 12:04:59 +0000258 rmrReady: false,
259 amHost: amHost,
260 amBaseUrl: viper.GetString("controls.promAlertManager.baseUrl"),
261 amSchemes: []string{viper.GetString("controls.promAlertManager.schemes")},
262 alertInterval: alertInterval,
263 activeAlarms: make([]alarm.AlarmMessage, 0),
264 alarmHistory: make([]alarm.AlarmMessage, 0),
265 maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
266 maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300267 }
268}
269
270// Main function
271func main() {
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000272 NewAlarmManager("", 0).Run(true)
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300273}