blob: 3ca2d84c411b71f296a0db285e8d41bfa4551713 [file] [log] [blame]
Mohamed Abukar3e038152020-03-04 10:01:45 +02001/*
2 * Copyright (c) 2020 AT&T Intellectual Property.
3 * Copyright (c) 2020 Nokia.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 * http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 *
17 * This source code is part of the near-RT RIC (RAN Intelligent Controller)
18 * platform project (RICP).
19 */
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020020
Mohamed Abukar3e038152020-03-04 10:01:45 +020021package main
22
23import (
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020024 "encoding/json"
25 "fmt"
Mohamed Abukarb2f29a82020-03-17 09:31:55 +020026 "time"
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020027
28 clientruntime "github.com/go-openapi/runtime/client"
29 "github.com/go-openapi/strfmt"
30 "github.com/prometheus/alertmanager/api/v2/client"
31 "github.com/prometheus/alertmanager/api/v2/client/alert"
32 "github.com/prometheus/alertmanager/api/v2/models"
33 "github.com/spf13/viper"
34
35 "gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
36 app "gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
Mohamed Abukar3e038152020-03-04 10:01:45 +020037)
38
Abukar Mohamed121e8b62020-09-18 11:41:33 +000039func (a *AlarmManager) StartAlertTimer() {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020040 tick := time.Tick(time.Duration(a.alertInterval) * time.Millisecond)
41 for range tick {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +020042 a.mutex.Lock()
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020043 for _, m := range a.activeAlarms {
44 app.Logger.Info("Re-raising alarm: %v", m)
Anssi Mannila2be5ac52020-09-22 12:17:05 +030045 a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020046 }
Mohamed Abukaraf0c5702020-03-11 10:29:40 +020047 a.mutex.Unlock()
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020048 }
49}
50
Abukar Mohamed121e8b62020-09-18 11:41:33 +000051func (a *AlarmManager) Consume(rp *app.RMRParams) (err error) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020052 app.Logger.Info("Message received!")
53
54 defer app.Rmr.Free(rp.Mbuf)
55 switch rp.Mtype {
56 case alarm.RIC_ALARM_UPDATE:
57 a.HandleAlarms(rp)
58 default:
59 app.Logger.Info("Unknown Message Type '%d', discarding", rp.Mtype)
60 }
61
62 return nil
63}
64
Abukar Mohamed121e8b62020-09-18 11:41:33 +000065func (a *AlarmManager) HandleAlarms(rp *app.RMRParams) (*alert.PostAlertsOK, error) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020066 var m alarm.AlarmMessage
Lott, Christopher (cl778h)3e8e2aa2020-06-03 08:52:14 -040067 app.Logger.Info("Received JSON: %s", rp.Payload)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020068 if err := json.Unmarshal(rp.Payload, &m); err != nil {
69 app.Logger.Error("json.Unmarshal failed: %v", err)
70 return nil, err
71 }
72 app.Logger.Info("newAlarm: %v", m)
73
Mohamed Abukar540ceee2020-09-09 08:07:40 +030074 return a.ProcessAlarm(&m)
75}
76
Abukar Mohamed121e8b62020-09-18 11:41:33 +000077func (a *AlarmManager) ProcessAlarm(m *alarm.AlarmMessage) (*alert.PostAlertsOK, error) {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +020078 if _, ok := alarm.RICAlarmDefinitions[m.Alarm.SpecificProblem]; !ok {
Mohamed Abukar540ceee2020-09-09 08:07:40 +030079 app.Logger.Warn("Alarm (SP='%d') not recognized, suppressing ...", m.Alarm.SpecificProblem)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020080 return nil, nil
81 }
82
83 // Suppress duplicate alarms
84 idx, found := a.IsMatchFound(m.Alarm)
85 if found && m.AlarmAction != alarm.AlarmActionClear {
Mohamed Abukar540ceee2020-09-09 08:07:40 +030086 app.Logger.Info("Duplicate alarm found, suppressing ...")
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020087 return nil, nil
88 }
89
90 // Clear alarm if found from active alarm list
91 if m.AlarmAction == alarm.AlarmActionClear {
92 if found {
Mohamed Abukar3873c222020-09-21 09:54:57 +030093 a.alarmHistory = append(a.alarmHistory, *m)
Mohamed Abukar540ceee2020-09-09 08:07:40 +030094 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, idx, "active")
Mohamed Abukarb2f29a82020-03-17 09:31:55 +020095
96 if a.postClear {
Anssi Mannila2be5ac52020-09-22 12:17:05 +030097 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusResolved, m.AlarmTime))
Mohamed Abukarb2f29a82020-03-17 09:31:55 +020098 }
Mohamed Abukar4e7e7122020-03-04 10:01:45 +020099 }
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300100 app.Logger.Info("No matching active alarm found, suppressing ...")
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200101 return nil, nil
102 }
103
104 // New alarm -> update active alarms and post to Alert Manager
105 if m.AlarmAction == alarm.AlarmActionRaise {
Mohamed Abukar0c389732020-09-17 14:47:50 +0300106 a.UpdateAlarmLists(m)
Anssi Mannila2be5ac52020-09-22 12:17:05 +0300107 return a.PostAlert(a.GenerateAlertLabels(m.Alarm, AlertStatusActive, m.AlarmTime))
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200108 }
109
110 return nil, nil
111}
112
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000113func (a *AlarmManager) IsMatchFound(newAlarm alarm.Alarm) (int, bool) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200114 for i, m := range a.activeAlarms {
115 if m.ManagedObjectId == newAlarm.ManagedObjectId && m.ApplicationId == newAlarm.ApplicationId &&
Mohamed Abukar0c389732020-09-17 14:47:50 +0300116 m.SpecificProblem == newAlarm.SpecificProblem && m.IdentifyingInfo == newAlarm.IdentifyingInfo {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200117 return i, true
118 }
119 }
120 return -1, false
121}
122
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000123func (a *AlarmManager) RemoveAlarm(alarms []alarm.AlarmMessage, i int, listName string) []alarm.AlarmMessage {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200124 a.mutex.Lock()
125 defer a.mutex.Unlock()
126
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300127 app.Logger.Info("Alarm '%+v' deleted from the '%s' list", alarms[i], listName)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200128 copy(alarms[i:], alarms[i+1:])
129 return alarms[:len(alarms)-1]
130}
131
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000132func (a *AlarmManager) UpdateAlarmLists(newAlarm *alarm.AlarmMessage) {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200133 a.mutex.Lock()
134 defer a.mutex.Unlock()
135
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300136 // If maximum number of active alarms is reached, purge the oldest alarm
137 if len(a.activeAlarms) >= viper.GetInt("controls.maxActiveAlarms") {
138 a.activeAlarms = a.RemoveAlarm(a.activeAlarms, 0, "active")
139 }
140
141 if len(a.alarmHistory) >= viper.GetInt("controls.maxAlarmHistory") {
142 a.alarmHistory = a.RemoveAlarm(a.alarmHistory, 0, "history")
143 }
144
145 // @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
Mohamed Abukar0c389732020-09-17 14:47:50 +0300146 a.activeAlarms = append(a.activeAlarms, *newAlarm)
147 a.alarmHistory = append(a.alarmHistory, *newAlarm)
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200148}
149
Anssi Mannila2be5ac52020-09-22 12:17:05 +0300150func (a *AlarmManager) GenerateAlertLabels(newAlarm alarm.Alarm, status AlertStatus, alarmTime int64) (models.LabelSet, models.LabelSet) {
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200151 alarmDef := alarm.RICAlarmDefinitions[newAlarm.SpecificProblem]
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200152 amLabels := models.LabelSet{
Mohamed Abukarb2f29a82020-03-17 09:31:55 +0200153 "status": string(status),
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200154 "alertname": alarmDef.AlarmText,
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200155 "severity": string(newAlarm.PerceivedSeverity),
156 "service": fmt.Sprintf("%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
Mohamed Abukar643241f2020-06-09 15:26:00 +0300157 "system_name": fmt.Sprintf("RIC:%s:%s", newAlarm.ManagedObjectId, newAlarm.ApplicationId),
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200158 }
159 amAnnotations := models.LabelSet{
Mohamed Abukar643241f2020-06-09 15:26:00 +0300160 "alarm_id": fmt.Sprintf("%d", alarmDef.AlarmId),
161 "description": fmt.Sprintf("%d:%s:%s", newAlarm.SpecificProblem, newAlarm.IdentifyingInfo, newAlarm.AdditionalInfo),
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200162 "additional_info": newAlarm.AdditionalInfo,
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200163 "summary": alarmDef.EventType,
164 "instructions": alarmDef.OperationInstructions,
Anssi Mannila2be5ac52020-09-22 12:17:05 +0300165 "timestamp": fmt.Sprintf("%s", time.Unix(0, alarmTime).Format("02/01/2006, 15:04:05")),
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200166 }
167
168 return amLabels, amAnnotations
169}
170
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000171func (a *AlarmManager) NewAlertmanagerClient() *client.Alertmanager {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200172 cr := clientruntime.New(a.amHost, a.amBaseUrl, a.amSchemes)
173 return client.New(cr, strfmt.Default)
174}
175
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000176func (a *AlarmManager) PostAlert(amLabels, amAnnotations models.LabelSet) (*alert.PostAlertsOK, error) {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200177 pa := &models.PostableAlert{
178 Alert: models.Alert{
179 GeneratorURL: strfmt.URI(""),
180 Labels: amLabels,
181 },
182 Annotations: amAnnotations,
183 }
184 alertParams := alert.NewPostAlertsParams().WithAlerts(models.PostableAlerts{pa})
185
Mohamed Abukar643241f2020-06-09 15:26:00 +0300186 app.Logger.Info("Posting alerts: labels: %+v, annotations: %+v", amLabels, amAnnotations)
Mohamed Abukaraf0c5702020-03-11 10:29:40 +0200187 ok, err := a.NewAlertmanagerClient().Alert.PostAlerts(alertParams)
188 if err != nil {
189 app.Logger.Error("Posting alerts to '%s/%s' failed with error: %v", a.amHost, a.amBaseUrl, err)
190 }
191 return ok, err
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200192}
193
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000194func (a *AlarmManager) StatusCB() bool {
Mohamed Abukar4e7e7122020-03-04 10:01:45 +0200195 if !a.rmrReady {
196 app.Logger.Info("RMR not ready yet!")
197 }
198
199 return a.rmrReady
Mohamed Abukar3e038152020-03-04 10:01:45 +0200200}
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300201
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000202func (a *AlarmManager) Run(sdlcheck bool) {
203 app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300204 app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
205 app.Resource.InjectStatusCb(a.StatusCB)
206
207 app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
208 app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
209 app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
210 app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
211
212 // Start background timer for re-raising alerts
213 a.postClear = sdlcheck
214 go a.StartAlertTimer()
215
216 app.RunWithParams(a, sdlcheck)
217}
218
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000219func NewAlarmManager(amHost string, alertInterval int) *AlarmManager {
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300220 if alertInterval == 0 {
221 alertInterval = viper.GetInt("controls.promAlertManager.alertInterval")
222 }
223
224 if amHost == "" {
225 amHost = viper.GetString("controls.promAlertManager.address")
226 }
227
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000228 return &AlarmManager{
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300229 rmrReady: false,
230 amHost: amHost,
231 amBaseUrl: viper.GetString("controls.promAlertManager.baseUrl"),
232 amSchemes: []string{viper.GetString("controls.promAlertManager.schemes")},
233 alertInterval: alertInterval,
Mohamed Abukar0c389732020-09-17 14:47:50 +0300234 activeAlarms: make([]alarm.AlarmMessage, 0),
235 alarmHistory: make([]alarm.AlarmMessage, 0),
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300236 }
237}
238
239// Main function
240func main() {
Abukar Mohamed121e8b62020-09-18 11:41:33 +0000241 NewAlarmManager("", 0).Run(true)
Mohamed Abukar540ceee2020-09-09 08:07:40 +0300242}