LN0739_FM_FR8: relaxing the active alarm and alarm history restrictions
- new alarms can still be added if max active alarm threshold or alarm
history threshold is reached. Alarm manager raised new alarm under
such situations.
- Review comments closed.
- Review comments closed.
Change-Id: I885418dcc19c587d1139f8251eda735b4a2bba00
Signed-off-by: vipin <vipin.mavila@nokia.com>
diff --git a/alarm/types.go b/alarm/types.go
index dc77d43..149671a 100755
--- a/alarm/types.go
+++ b/alarm/types.go
@@ -67,6 +67,11 @@
AlarmTime int64
}
+type AlarmConfigParams struct {
+ MaxActiveAlarms int `json:"maxactivealarms"`
+ MaxAlarmHistory int `json:"maxalarmhistory"`
+}
+
// RICAlarm is an alarm instance
type RICAlarm struct {
moId string
@@ -88,6 +93,8 @@
TCP_CONNECTIVITY_LOST_TO_DBAAS int = 8005
E2_CONNECTIVITY_LOST_TO_GNODEB int = 8006
E2_CONNECTIVITY_LOST_TO_ENODEB int = 8007
+ ACTIVE_ALARM_EXCEED_MAX_THRESHOLD int = 8008
+ ALARM_HISTORY_EXCEED_MAX_THRESHOLD int = 8009
)
type AlarmDefinition struct {
@@ -122,6 +129,18 @@
EventType: "Communication error",
OperationInstructions: "Not defined",
},
+ ACTIVE_ALARM_EXCEED_MAX_THRESHOLD: {
+ AlarmId: ACTIVE_ALARM_EXCEED_MAX_THRESHOLD,
+ AlarmText: "ACTIVE ALARM EXCEED MAX THRESHOLD",
+ EventType: "Warning",
+ OperationInstructions: "Not defined",
+ },
+ ALARM_HISTORY_EXCEED_MAX_THRESHOLD: {
+ AlarmId: ALARM_HISTORY_EXCEED_MAX_THRESHOLD,
+ AlarmText: "ALARM HISTORY EXCEED MAX THRESHOLD",
+ EventType: "Warning",
+ OperationInstructions: "Not defined",
+ },
}
const (
diff --git a/build/build_ubuntu.sh b/build/build_ubuntu.sh
index 3e94016..29b9d77 100755
--- a/build/build_ubuntu.sh
+++ b/build/build_ubuntu.sh
@@ -22,11 +22,11 @@
echo "--> build_ubuntu.sh starts"
# Install RMR from deb packages at packagecloud.io
-rmr=rmr_4.0.2_amd64.deb
+rmr=rmr_4.1.2_amd64.deb
wget --content-disposition https://packagecloud.io/o-ran-sc/release/packages/debian/stretch/$rmr/download.deb
sudo dpkg -i $rmr
rm $rmr
-rmrdev=rmr-dev_4.0.2_amd64.deb
+rmrdev=rmr-dev_4.1.2_amd64.deb
wget --content-disposition https://packagecloud.io/o-ran-sc/release/packages/debian/stretch/$rmrdev/download.deb
sudo dpkg -i $rmrdev
rm $rmrdev
diff --git a/cli/alarm-cli.go b/cli/alarm-cli.go
index 665f968..97ccb91 100755
--- a/cli/alarm-cli.go
+++ b/cli/alarm-cli.go
@@ -74,6 +74,18 @@
postAlarm(flags, readAlarmParams(flags, true), alarm.AlarmActionClear)
})
+ // Configure an alarm manager
+ commando.
+ Register("configure").
+ SetShortDescription("Configure alarm manager with given parameters").
+ AddFlag("mal", "max active alarms", commando.Int, nil).
+ AddFlag("mah", "max alarm history", commando.Int, nil).
+ AddFlag("host", "Alarm manager host address", commando.String, "localhost").
+ AddFlag("port", "Alarm manager host address", commando.String, "8080").
+ SetAction(func(args map[string]commando.ArgValue, flags map[string]commando.FlagValue) {
+ postAlarmConfig(flags)
+ })
+
// parse command-line arguments
commando.Parse(nil)
}
@@ -162,3 +174,24 @@
t.SetStyle(table.StyleColoredBright)
t.Render()
}
+
+func postAlarmConfig(flags map[string]commando.FlagValue) {
+ host, _ := flags["host"].GetString()
+ port, _ := flags["port"].GetString()
+ maxactivealarms, _ := flags["mal"].GetInt()
+ maxalarmhistory, _ := flags["mah"].GetInt()
+ targetUrl := fmt.Sprintf("http://%s:%s/ric/v1/alarms/config", host, port)
+
+ m := alarm.AlarmConfigParams{MaxActiveAlarms: maxactivealarms, MaxAlarmHistory: maxalarmhistory}
+ jsonData, err := json.Marshal(m)
+ if err != nil {
+ fmt.Println("json.Marshal failed: %v", err)
+ return
+ }
+
+ resp, err := http.Post(targetUrl, "application/json", bytes.NewBuffer(jsonData))
+ if err != nil || resp == nil {
+ fmt.Println("Couldn't fetch post alarm configuration due to error: %v", err)
+ return
+ }
+}
diff --git a/go.mod b/go.mod
index 7d78729..2827b3e 100644
--- a/go.mod
+++ b/go.mod
@@ -16,7 +16,8 @@
gerrit.o-ran-sc.org/r/ric-plt/xapp-frame v0.0.0-00010101000000-000000000000
github.com/go-openapi/runtime v0.19.11
github.com/go-openapi/strfmt v0.19.4
- github.com/jedib0t/go-pretty v4.3.0+incompatible
+ github.com/gorilla/mux v1.7.1
+ github.com/jedib0t/go-pretty v4.3.0+incompatible // indirect
github.com/mattn/go-runewidth v0.0.9 // indirect
github.com/prometheus/alertmanager v0.20.0
github.com/spf13/viper v1.6.2
diff --git a/manager/cmd/manager.go b/manager/cmd/manager.go
index 3ca2d84..9a42187 100755
--- a/manager/cmd/manager.go
+++ b/manager/cmd/manager.go
@@ -133,13 +133,22 @@
a.mutex.Lock()
defer a.mutex.Unlock()
- // If maximum number of active alarms is reached, purge the oldest alarm
- if len(a.activeAlarms) >= viper.GetInt("controls.maxActiveAlarms") {
- a.activeAlarms = a.RemoveAlarm(a.activeAlarms, 0, "active")
+ /* If maximum number of active alarms is reached, an error log writing is made, and new alarm indicating the problem is raised.
+ The attempt to raise the alarm next time will be supressed when found as duplicate. */
+ if len(a.activeAlarms) >= a.maxActiveAlarms {
+ app.Logger.Error("active alarm count exceeded maxActiveAlarms threshold")
+ actAlarm := a.alarmClient.NewAlarm(alarm.ACTIVE_ALARM_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "active alarms full")
+ actAlarmMessage := alarm.AlarmMessage{Alarm: actAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
+ a.activeAlarms = append(a.activeAlarms, actAlarmMessage)
+ a.alarmHistory = append(a.alarmHistory, actAlarmMessage)
}
- if len(a.alarmHistory) >= viper.GetInt("controls.maxAlarmHistory") {
- a.alarmHistory = a.RemoveAlarm(a.alarmHistory, 0, "history")
+ if len(a.alarmHistory) >= a.maxAlarmHistory {
+ app.Logger.Error("alarm history count exceeded maxAlarmHistory threshold")
+ histAlarm := a.alarmClient.NewAlarm(alarm.ALARM_HISTORY_EXCEED_MAX_THRESHOLD, alarm.SeverityWarning, "clear alarms or raise threshold", "alarm history full")
+ histAlarmMessage := alarm.AlarmMessage{Alarm: histAlarm, AlarmAction: alarm.AlarmActionRaise, AlarmTime: (time.Now().UnixNano())}
+ a.activeAlarms = append(a.activeAlarms, histAlarmMessage)
+ a.alarmHistory = append(a.alarmHistory, histAlarmMessage)
}
// @todo: For now just keep the alarms (both active and history) in-memory. Use SDL later for persistence
@@ -199,19 +208,34 @@
return a.rmrReady
}
+func (a *AlarmManager) ConfigChangeCB(configparam string) {
+
+ a.maxActiveAlarms = app.Config.GetInt("controls.maxActiveAlarms")
+ a.maxAlarmHistory = app.Config.GetInt("controls.maxAlarmHistory")
+
+ app.Logger.Debug("ConfigChangeCB: maxActiveAlarms %v", a.maxActiveAlarms)
+ app.Logger.Debug("ConfigChangeCB: maxAlarmHistory = %v", a.maxAlarmHistory)
+
+ return
+}
+
func (a *AlarmManager) Run(sdlcheck bool) {
app.Logger.SetMdc("alarmManager", fmt.Sprintf("%s:%s", Version, Hash))
app.SetReadyCB(func(d interface{}) { a.rmrReady = true }, true)
app.Resource.InjectStatusCb(a.StatusCB)
+ app.AddConfigChangeListener(a.ConfigChangeCB)
app.Resource.InjectRoute("/ric/v1/alarms", a.RaiseAlarm, "POST")
app.Resource.InjectRoute("/ric/v1/alarms", a.ClearAlarm, "DELETE")
app.Resource.InjectRoute("/ric/v1/alarms/active", a.GetActiveAlarms, "GET")
app.Resource.InjectRoute("/ric/v1/alarms/history", a.GetAlarmHistory, "GET")
+ app.Resource.InjectRoute("/ric/v1/alarms/config", a.SetAlarmConfig, "POST")
+ app.Resource.InjectRoute("/ric/v1/alarms/config", a.GetAlarmConfig, "GET")
// Start background timer for re-raising alerts
a.postClear = sdlcheck
go a.StartAlertTimer()
+ a.alarmClient, _ = alarm.InitAlarm("SEP", "ALARMMANAGER")
app.RunWithParams(a, sdlcheck)
}
@@ -226,13 +250,15 @@
}
return &AlarmManager{
- rmrReady: false,
- amHost: amHost,
- amBaseUrl: viper.GetString("controls.promAlertManager.baseUrl"),
- amSchemes: []string{viper.GetString("controls.promAlertManager.schemes")},
- alertInterval: alertInterval,
- activeAlarms: make([]alarm.AlarmMessage, 0),
- alarmHistory: make([]alarm.AlarmMessage, 0),
+ rmrReady: false,
+ amHost: amHost,
+ amBaseUrl: viper.GetString("controls.promAlertManager.baseUrl"),
+ amSchemes: []string{viper.GetString("controls.promAlertManager.schemes")},
+ alertInterval: alertInterval,
+ activeAlarms: make([]alarm.AlarmMessage, 0),
+ alarmHistory: make([]alarm.AlarmMessage, 0),
+ maxActiveAlarms: app.Config.GetInt("controls.maxActiveAlarms"),
+ maxAlarmHistory: app.Config.GetInt("controls.maxAlarmHistory"),
}
}
diff --git a/manager/cmd/manager_test.go b/manager/cmd/manager_test.go
index c048f4b..26cf5c5 100755
--- a/manager/cmd/manager_test.go
+++ b/manager/cmd/manager_test.go
@@ -33,7 +33,7 @@
"strings"
"testing"
"time"
-
+ "github.com/gorilla/mux"
"gerrit.o-ran-sc.org/r/ric-plt/alarm-go/alarm"
"gerrit.o-ran-sc.org/r/ric-plt/xapp-frame/pkg/xapp"
"github.com/prometheus/alertmanager/api/v2/models"
@@ -56,6 +56,7 @@
}
alarmer, _ = alarm.InitAlarm("my-pod", "my-app")
+ alarmManager.alarmClient = alarmer
time.Sleep(time.Duration(5) * time.Second)
eventChan = make(chan string)
@@ -130,8 +131,10 @@
assert.Nil(t, alarmer.Raise(a), "raise failed")
VerifyAlarm(t, a, 1)
+ assert.Nil(t, alarmer.Clear(a), "clear failed")
}
+
func TestInvalidAlarms(t *testing.T) {
a := alarmer.NewAlarm(1111, alarm.SeverityMajor, "Some App data", "eth 0 1")
assert.Nil(t, alarmer.Raise(a), "raise failed")
@@ -153,6 +156,38 @@
assert.Equal(t, true, alarmManager.StatusCB())
}
+func TestActiveAlarmMaxThresholds(t *testing.T) {
+ xapp.Logger.Info("TestActiveAlarmMaxThresholds")
+ ts := CreatePromAlertSimulator(t, "POST", "/api/v2/alerts", http.StatusOK, models.LabelSet{})
+ alarmManager.maxActiveAlarms = 0
+ alarmManager.maxAlarmHistory = 10
+
+ a := alarmer.NewAlarm(alarm.E2_CONNECTIVITY_LOST_TO_GNODEB, alarm.SeverityCritical, "Some Application data", "eth 0 2")
+ assert.Nil(t, alarmer.Raise(a), "raise failed")
+
+ var alarmConfigParams alarm.AlarmConfigParams
+ req, _ := http.NewRequest("GET", "/ric/v1/alarms/config", nil)
+ req = mux.SetURLVars(req, nil)
+ handleFunc := http.HandlerFunc(alarmManager.GetAlarmConfig)
+ response := executeRequest(req, handleFunc)
+
+ // Check HTTP Status Code
+ checkResponseCode(t, http.StatusOK, response.Code)
+
+ // Decode the json output from handler
+ json.NewDecoder(response.Body).Decode(&alarmConfigParams)
+ if alarmConfigParams.MaxActiveAlarms != 0 || alarmConfigParams.MaxAlarmHistory != 10 {
+ t.Errorf("Incorrect alarm thresholds")
+ }
+
+ time.Sleep(time.Duration(1) * time.Second)
+ alarmManager.maxActiveAlarms = 5000
+ alarmManager.maxAlarmHistory = 20000
+ VerifyAlarm(t, a, 2)
+ VerifyAlarm(t, a, 2)
+ ts.Close()
+}
+
func VerifyAlarm(t *testing.T, a alarm.Alarm, expectedCount int) string {
receivedAlert := waitForEvent()
@@ -204,3 +239,20 @@
eventChan <- fmt.Sprintf("%s", reqBody)
}
+
+func executeRequest(req *http.Request, handleR http.HandlerFunc) *httptest.ResponseRecorder {
+ rr := httptest.NewRecorder()
+
+ handleR.ServeHTTP(rr, req)
+
+ return rr
+}
+
+func checkResponseCode(t *testing.T, expected, actual int) bool {
+ if expected != actual {
+ t.Errorf("Expected response code %d. Got %d\n", expected, actual)
+ return false
+ }
+ return true
+}
+
diff --git a/manager/cmd/restapi.go b/manager/cmd/restapi.go
index 6463a2c..c455e87 100755
--- a/manager/cmd/restapi.go
+++ b/manager/cmd/restapi.go
@@ -104,3 +104,26 @@
return nil
}
+
+func (a *AlarmManager) SetAlarmConfig(w http.ResponseWriter, r *http.Request) {
+ var m alarm.AlarmConfigParams
+ if err := json.NewDecoder(r.Body).Decode(&m); err != nil {
+ app.Logger.Error("json.NewDecoder failed: %v", err)
+ } else {
+ a.maxActiveAlarms = m.MaxActiveAlarms
+ a.maxAlarmHistory = m.MaxAlarmHistory
+ app.Logger.Debug("new maxActiveAlarms = %v", a.maxActiveAlarms)
+ app.Logger.Debug("new maxAlarmHistory = %v", a.maxAlarmHistory)
+ a.respondWithJSON(w, http.StatusOK, err)
+ }
+}
+
+func (a *AlarmManager) GetAlarmConfig(w http.ResponseWriter, r *http.Request) {
+ var m alarm.AlarmConfigParams
+
+ m.MaxActiveAlarms = a.maxActiveAlarms
+ m.MaxAlarmHistory = a.maxAlarmHistory
+
+ a.respondWithJSON(w, http.StatusOK, m)
+ return
+}
diff --git a/manager/cmd/types.go b/manager/cmd/types.go
index 68b9e0d..40a110f 100755
--- a/manager/cmd/types.go
+++ b/manager/cmd/types.go
@@ -27,15 +27,18 @@
)
type AlarmManager struct {
- amHost string
- amBaseUrl string
- amSchemes []string
- alertInterval int
- activeAlarms []alarm.AlarmMessage
- alarmHistory []alarm.AlarmMessage
- mutex sync.Mutex
- rmrReady bool
- postClear bool
+ amHost string
+ amBaseUrl string
+ amSchemes []string
+ alertInterval int
+ activeAlarms []alarm.AlarmMessage
+ alarmHistory []alarm.AlarmMessage
+ mutex sync.Mutex
+ rmrReady bool
+ postClear bool
+ maxActiveAlarms int
+ maxAlarmHistory int
+ alarmClient *alarm.RICAlarm
}
type AlertStatus string
@@ -47,3 +50,4 @@
var Version string
var Hash string
+
diff --git a/schemas/alarm-schema.json b/schemas/alarm-schema.json
index 5e999ad..ca01b1c 100644
--- a/schemas/alarm-schema.json
+++ b/schemas/alarm-schema.json
@@ -70,7 +70,7 @@
"identifyingInfo": {
"type": "string",
"title": "The identifyingInfo schema",
- "description": "Identifying additional information, which is part of alarm identity.",
+ "description": "Identifying additional information which is part of alarm identity.",
"default": ""
},
"AlarmAction": {