[STRIMZI] Monitoring chart improvement
Add Monitoring into charts of Strimzi
Issue-ID: OOM-3150
Signed-off-by: miroslavmasaryk <miroslav.masaryk@telekom.com>
Change-Id: I0621399f5f555f40f96d52f6c64e404bd91f119b
diff --git a/kubernetes/strimzi/resources/metrics/cruisecontrol-metrics-config.yml b/kubernetes/strimzi/resources/metrics/cruisecontrol-metrics-config.yml
new file mode 100644
index 0000000..12c742e
--- /dev/null
+++ b/kubernetes/strimzi/resources/metrics/cruisecontrol-metrics-config.yml
@@ -0,0 +1,20 @@
+{{/*
+# Copyright (c) 2023 Deutsche Telekom
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License..
+*/}}
+lowercaseOutputName: true
+rules:
+ - pattern: kafka.cruisecontrol<name=(.+)><>(\w+)
+ name: kafka_cruisecontrol_$1_$2
+ type: GAUGE
\ No newline at end of file
diff --git a/kubernetes/strimzi/resources/metrics/kafka-metrics-config.yml b/kubernetes/strimzi/resources/metrics/kafka-metrics-config.yml
new file mode 100644
index 0000000..7ad971f
--- /dev/null
+++ b/kubernetes/strimzi/resources/metrics/kafka-metrics-config.yml
@@ -0,0 +1,137 @@
+{{/*
+# Copyright (c) 2023 Deutsche Telekom
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License..
+*/}}
+lowercaseOutputName: true
+rules:
+ # Special cases and very specific rules
+ - pattern: kafka.server<type=(.+), name=(.+), clientId=(.+), topic=(.+), partition=(.*)><>Value
+ name: kafka_server_$1_$2
+ type: GAUGE
+ labels:
+ clientId: "$3"
+ topic: "$4"
+ partition: "$5"
+ - pattern: kafka.server<type=(.+), name=(.+), clientId=(.+), brokerHost=(.+), brokerPort=(.+)><>Value
+ name: kafka_server_$1_$2
+ type: GAUGE
+ labels:
+ clientId: "$3"
+ broker: "$4:$5"
+ - pattern: kafka.server<type=(.+), cipher=(.+), protocol=(.+), listener=(.+), networkProcessor=(.+)><>connections
+ name: kafka_server_$1_connections_tls_info
+ type: GAUGE
+ labels:
+ cipher: "$2"
+ protocol: "$3"
+ listener: "$4"
+ networkProcessor: "$5"
+ - pattern: kafka.server<type=(.+), clientSoftwareName=(.+), clientSoftwareVersion=(.+), listener=(.+), networkProcessor=(.+)><>connections
+ name: kafka_server_$1_connections_software
+ type: GAUGE
+ labels:
+ clientSoftwareName: "$2"
+ clientSoftwareVersion: "$3"
+ listener: "$4"
+ networkProcessor: "$5"
+ - pattern: "kafka.server<type=(.+), listener=(.+), networkProcessor=(.+)><>(.+):"
+ name: kafka_server_$1_$4
+ type: GAUGE
+ labels:
+ listener: "$2"
+ networkProcessor: "$3"
+ - pattern: kafka.server<type=(.+), listener=(.+), networkProcessor=(.+)><>(.+)
+ name: kafka_server_$1_$4
+ type: GAUGE
+ labels:
+ listener: "$2"
+ networkProcessor: "$3"
+ # Some percent metrics use MeanRate attribute
+ # Ex) kafka.server<type=(KafkaRequestHandlerPool), name=(RequestHandlerAvgIdlePercent)><>MeanRate
+ - pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*><>MeanRate
+ name: kafka_$1_$2_$3_percent
+ type: GAUGE
+ # Generic gauges for percents
+ - pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*><>Value
+ name: kafka_$1_$2_$3_percent
+ type: GAUGE
+ - pattern: kafka.(\w+)<type=(.+), name=(.+)Percent\w*, (.+)=(.+)><>Value
+ name: kafka_$1_$2_$3_percent
+ type: GAUGE
+ labels:
+ "$4": "$5"
+ # Generic per-second counters with 0-2 key/value pairs
+ - pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*, (.+)=(.+), (.+)=(.+)><>Count
+ name: kafka_$1_$2_$3_total
+ type: COUNTER
+ labels:
+ "$4": "$5"
+ "$6": "$7"
+ - pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*, (.+)=(.+)><>Count
+ name: kafka_$1_$2_$3_total
+ type: COUNTER
+ labels:
+ "$4": "$5"
+ - pattern: kafka.(\w+)<type=(.+), name=(.+)PerSec\w*><>Count
+ name: kafka_$1_$2_$3_total
+ type: COUNTER
+ # Generic gauges with 0-2 key/value pairs
+ - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+), (.+)=(.+)><>Value
+ name: kafka_$1_$2_$3
+ type: GAUGE
+ labels:
+ "$4": "$5"
+ "$6": "$7"
+ - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+)><>Value
+ name: kafka_$1_$2_$3
+ type: GAUGE
+ labels:
+ "$4": "$5"
+ - pattern: kafka.(\w+)<type=(.+), name=(.+)><>Value
+ name: kafka_$1_$2_$3
+ type: GAUGE
+ # Emulate Prometheus 'Summary' metrics for the exported 'Histogram's.
+ # Note that these are missing the '_sum' metric!
+ - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+), (.+)=(.+)><>Count
+ name: kafka_$1_$2_$3_count
+ type: COUNTER
+ labels:
+ "$4": "$5"
+ "$6": "$7"
+ - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.*), (.+)=(.+)><>(\d+)thPercentile
+ name: kafka_$1_$2_$3
+ type: GAUGE
+ labels:
+ "$4": "$5"
+ "$6": "$7"
+ quantile: "0.$8"
+ - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.+)><>Count
+ name: kafka_$1_$2_$3_count
+ type: COUNTER
+ labels:
+ "$4": "$5"
+ - pattern: kafka.(\w+)<type=(.+), name=(.+), (.+)=(.*)><>(\d+)thPercentile
+ name: kafka_$1_$2_$3
+ type: GAUGE
+ labels:
+ "$4": "$5"
+ quantile: "0.$6"
+ - pattern: kafka.(\w+)<type=(.+), name=(.+)><>Count
+ name: kafka_$1_$2_$3_count
+ type: COUNTER
+ - pattern: kafka.(\w+)<type=(.+), name=(.+)><>(\d+)thPercentile
+ name: kafka_$1_$2_$3
+ type: GAUGE
+ labels:
+ quantile: "0.$4"
\ No newline at end of file
diff --git a/kubernetes/strimzi/resources/metrics/zookeeper-metrics-config.yml b/kubernetes/strimzi/resources/metrics/zookeeper-metrics-config.yml
new file mode 100644
index 0000000..6a1eab7
--- /dev/null
+++ b/kubernetes/strimzi/resources/metrics/zookeeper-metrics-config.yml
@@ -0,0 +1,44 @@
+{{/*
+# Copyright (c) 2023 Deutsche Telekom
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License..
+*/}}
+lowercaseOutputName: true
+rules:
+ # replicated Zookeeper
+ - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+)><>(\\w+)"
+ name: "zookeeper_$2"
+ type: GAUGE
+ - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+)><>(\\w+)"
+ name: "zookeeper_$3"
+ type: GAUGE
+ labels:
+ replicaId: "$2"
+ - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+), name2=(\\w+)><>(Packets\\w+)"
+ name: "zookeeper_$4"
+ type: COUNTER
+ labels:
+ replicaId: "$2"
+ memberType: "$3"
+ - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+), name2=(\\w+)><>(\\w+)"
+ name: "zookeeper_$4"
+ type: GAUGE
+ labels:
+ replicaId: "$2"
+ memberType: "$3"
+ - pattern: "org.apache.ZooKeeperService<name0=ReplicatedServer_id(\\d+), name1=replica.(\\d+), name2=(\\w+), name3=(\\w+)><>(\\w+)"
+ name: "zookeeper_$4_$5"
+ type: GAUGE
+ labels:
+ replicaId: "$2"
+ memberType: "$3"
\ No newline at end of file
diff --git a/kubernetes/strimzi/templates/configmap.yaml b/kubernetes/strimzi/templates/configmap.yaml
new file mode 100644
index 0000000..ace51f7
--- /dev/null
+++ b/kubernetes/strimzi/templates/configmap.yaml
@@ -0,0 +1,21 @@
+{{/*
+# Copyright (c) 2023 Deutsche Telekom
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License..
+*/}}
+{{- if .Values.metrics.enabled }}
+apiVersion: v1
+kind: ConfigMap
+metadata: {{- include "common.resourceMetadata" . | nindent 2 }}
+data: {{ tpl (.Files.Glob "resources/metrics/*").AsConfig . | nindent 2 }}
+{{ end }}
diff --git a/kubernetes/strimzi/templates/kafka-rebalance.yaml b/kubernetes/strimzi/templates/kafka-rebalance.yaml
new file mode 100644
index 0000000..6d5f143
--- /dev/null
+++ b/kubernetes/strimzi/templates/kafka-rebalance.yaml
@@ -0,0 +1,24 @@
+{{/*
+# Copyright (c) 2023 Deutsche Telekom
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License..
+*/}}
+{{- if .Values.cruiseControl.kafkaRebalance.enabled }}
+apiVersion: kafka.strimzi.io/v1beta2
+kind: KafkaRebalance
+metadata:
+ name: {{ include "common.fullname" . }}-kafka-rebalance
+ labels:
+ strimzi.io/cluster: {{ include "common.release" . }}-strimzi
+spec: {}
+{{- end }}
diff --git a/kubernetes/strimzi/templates/pod-monitor.yaml b/kubernetes/strimzi/templates/pod-monitor.yaml
new file mode 100644
index 0000000..be288a4
--- /dev/null
+++ b/kubernetes/strimzi/templates/pod-monitor.yaml
@@ -0,0 +1,45 @@
+{{/*
+# Copyright (c) 2023 Deutsche Telekom
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License..
+*/}}
+{{- if .Values.metrics.podMonitor.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: PodMonitor
+metadata:
+ name: {{ include "common.fullname" . }}-podmonitor
+ ## podMonitor labels for prometheus to pick up the podMonitor
+ ## ref: https://github.com/prometheus-operator/prometheus-operator/blob/main/Documentation/api.md#monitoring.coreos.com/v1.PodMonitor
+ ##
+ # labels:
+ # prometheus: kube-prometheus
+ labels: {{- toYaml $.Values.metrics.podMonitor.labels | nindent 4 }}
+spec:
+ selector:
+ matchLabels:
+ strimzi.io/cluster: {{ include "common.release" . }}-strimzi
+ podMetricsEndpoints:
+ - port: {{ .Values.metrics.podMonitor.port }}
+ {{- if .Values.metrics.podMonitor.relabelings }}
+ ## RelabelConfigs to apply to samples before scraping
+ ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
+ ## Value is evalued as a template
+ relabelings: {{- toYaml .Values.metrics.podMonitor.relabelings | nindent 6 }}
+ {{- end }}
+ {{- if .Values.metrics.podMonitor.metricRelabelings }}
+ metricRelabelings: {{- toYaml .Values.metrics.podMonitor.metricRelabelings | nindent 6 }}
+ ## MetricRelabelConfigs to apply to samples before ingestion
+ ## ref: https://github.com/coreos/prometheus-operator/blob/master/Documentation/api.md#relabelconfig
+ ## Value is evalued as a template
+ {{- end }}
+{{- end }}
diff --git a/kubernetes/strimzi/templates/strimzi-kafka.yaml b/kubernetes/strimzi/templates/strimzi-kafka.yaml
index 3ce7b1d..421d93a 100644
--- a/kubernetes/strimzi/templates/strimzi-kafka.yaml
+++ b/kubernetes/strimzi/templates/strimzi-kafka.yaml
@@ -89,6 +89,14 @@
size: {{ .Values.persistence.kafka.size }}
deleteClaim: true
class: {{ include "common.storageClass" (dict "dot" . "suffix" "kafka" "persistenceInfos" .Values.persistence.kafka) }}
+ {{- if .Values.metrics.kafkaExporter.enabled }}
+ metricsConfig:
+ type: {{ .Values.metrics.kafkaExporter.metricsConfig.type }}
+ valueFrom:
+ configMapKeyRef:
+ name: {{ include "common.fullname" . }}
+ key: kafka-metrics-config.yml
+ {{- end }}
zookeeper:
template:
pod:
@@ -107,7 +115,43 @@
size: {{ .Values.persistence.zookeeper.size }}
deleteClaim: true
class: {{ include "common.storageClass" (dict "dot" . "suffix" "zk" "persistenceInfos" .Values.persistence.zookeeper) }}
+ {{- if .Values.metrics.kafkaExporter.enabled }}
+ metricsConfig:
+ type: {{ .Values.metrics.kafkaExporter.metricsConfig.type }}
+ valueFrom:
+ configMapKeyRef:
+ name: {{ include "common.fullname" . }}
+ key: zookeeper-metrics-config.yml
+ {{- end }}
entityOperator:
topicOperator: {}
userOperator: {}
-
+ {{- if .Values.cruiseControl.enabled }}
+ cruiseControl:
+ metricsConfig:
+ type: {{ .Values.cruiseControl.metricsConfig.type }}
+ valueFrom:
+ configMapKeyRef:
+ name: {{ include "common.fullname" . }}
+ key: cruisecontrol-metrics-config.yml
+ {{- end }}
+ {{- if .Values.metrics.kafkaExporter.enabled }}
+ kafkaExporter:
+ topicRegex: {{ .Values.metrics.kafkaExporter.topicRegex }}
+ groupRegex: {{ .Values.metrics.kafkaExporter.groupRegex }}
+ resources:
+ requests:
+ cpu: {{ .Values.metrics.kafkaExporter.resources.requests.cpu }}
+ memory: {{ .Values.metrics.kafkaExporter.resources.requests.memory }}
+ limits:
+ cpu: {{ .Values.metrics.kafkaExporter.resources.limits.cpu }}
+ memory: {{ .Values.metrics.kafkaExporter.resources.limits.memory }}
+ logging: {{ .Values.metrics.kafkaExporter.logging }}
+ enableSaramaLogging: {{ .Values.metrics.kafkaExporter.enableSaramaLogging }}
+ readinessProbe:
+ initialDelaySeconds: {{ .Values.metrics.kafkaExporter.readinessProbe.initialDelaySeconds }}
+ timeoutSeconds: {{ .Values.metrics.kafkaExporter.readinessProbe.timeoutSeconds }}
+ livenessProbe:
+ initialDelaySeconds: {{ .Values.metrics.kafkaExporter.livenessProbe.initialDelaySeconds }}
+ timeoutSeconds: {{ .Values.metrics.kafkaExporter.livenessProbe.timeoutSeconds }}
+ {{- end }}
diff --git a/kubernetes/strimzi/values.yaml b/kubernetes/strimzi/values.yaml
index 057f200..8963cf3 100644
--- a/kubernetes/strimzi/values.yaml
+++ b/kubernetes/strimzi/values.yaml
@@ -90,6 +90,54 @@
exposedPort: *advertizedPortBroker2
exposedProtocol: TLS
+# Kafka Exporter for metrics
+metrics:
+ enabled: false
+ kafkaExporter:
+ enabled: false
+ metricsConfig:
+ type: jmxPrometheusExporter
+ topicRegex: ".*"
+ groupRegex: ".*"
+ resources:
+ requests:
+ cpu: 2000m
+ memory: 640Mi
+ limits:
+ cpu: 5000m
+ memory: 1280Mi
+ logging: debug
+ enableSaramaLogging: true
+ readinessProbe:
+ initialDelaySeconds: 15
+ timeoutSeconds: 5
+ livenessProbe:
+ initialDelaySeconds: 15
+ timeoutSeconds: 5
+ podMonitor:
+ # Prometheus pre requisite. Currently an optional addon in the OOM docs
+ enabled: false
+ # default port for strimzi metrics
+ port: "tcp-prometheus"
+ # podMonitor labels for prometheus to pick up the podMonitor
+ # dummy value
+ labels:
+ release: dummy
+ relabelings: []
+ metricRelabelings: []
+
+cruiseControl:
+## Cruise Control provides a Kafka metrics reporter implementation
+## once installed into the Kafka brokers, filters and records a wide range of metrics provided by the brokers themselves.
+## pre requisite is having 2 or more broker nodes
+ enabled: false
+ metricsConfig:
+ type: jmxPrometheusExporter
+ ## Custom resource for Kafka that can rebalance your cluster
+ # ref. https://strimzi.io/blog/2020/06/15/cruise-control/
+ kafkaRebalance:
+ enabled: false
+
######################
# Component overrides
######################