Skip to content

Commit b637a59

Browse files
committed
feat: cluster/component healh backend & resources update
1 parent 529d684 commit b637a59

File tree

9 files changed

+250
-42
lines changed

9 files changed

+250
-42
lines changed

bundle/manifests/observability-operator.clusterserviceversion.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,13 @@ spec:
408408
- get
409409
- list
410410
- watch
411+
- apiGroups:
412+
- config.openshift.io
413+
resources:
414+
- clusteroperators
415+
verbs:
416+
- get
417+
- list
411418
- apiGroups:
412419
- config.openshift.io
413420
resources:
@@ -437,6 +444,13 @@ spec:
437444
- get
438445
- list
439446
- watch
447+
- apiGroups:
448+
- kubevirt.io
449+
resources:
450+
- kubevirts
451+
verbs:
452+
- get
453+
- list
440454
- apiGroups:
441455
- loki.grafana.com
442456
resources:
@@ -453,6 +467,13 @@ spec:
453467
verbs:
454468
- get
455469
- list
470+
- apiGroups:
471+
- machineconfiguration.openshift.io
472+
resources:
473+
- machineconfigpools
474+
verbs:
475+
- get
476+
- list
456477
- apiGroups:
457478
- monitoring.coreos.com
458479
resourceNames:

deploy/operator/observability-operator-cluster-role.yaml

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,13 @@ rules:
9292
- get
9393
- list
9494
- watch
95+
- apiGroups:
96+
- config.openshift.io
97+
resources:
98+
- clusteroperators
99+
verbs:
100+
- get
101+
- list
95102
- apiGroups:
96103
- config.openshift.io
97104
resources:
@@ -121,6 +128,13 @@ rules:
121128
- get
122129
- list
123130
- watch
131+
- apiGroups:
132+
- kubevirt.io
133+
resources:
134+
- kubevirts
135+
verbs:
136+
- get
137+
- list
124138
- apiGroups:
125139
- loki.grafana.com
126140
resources:
@@ -137,6 +151,13 @@ rules:
137151
verbs:
138152
- get
139153
- list
154+
- apiGroups:
155+
- machineconfiguration.openshift.io
156+
resources:
157+
- machineconfigpools
158+
verbs:
159+
- get
160+
- list
140161
- apiGroups:
141162
- monitoring.coreos.com
142163
resourceNames:

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ require (
2020
github.com/prometheus/common v0.67.4
2121
github.com/rhobs/obo-prometheus-operator v0.87.0-rhobs1
2222
github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring v0.87.0-rhobs1
23-
github.com/rhobs/observability-operator/pkg/apis v0.0.0-20251009091129-76135c924ed6
23+
github.com/rhobs/observability-operator/pkg/apis v0.0.0-20260115120443-7527133cfea4
2424
github.com/rhobs/perses v0.0.0-20260113083341-bce6f0039b5d
2525
github.com/rhobs/perses-operator v0.1.10-0.20260119104604-801af29f7716
2626
github.com/stretchr/testify v1.11.1

go.sum

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -440,16 +440,8 @@ github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring v0.87.0-rhobs1 h1:f
440440
github.com/rhobs/obo-prometheus-operator/pkg/apis/monitoring v0.87.0-rhobs1/go.mod h1:amf29isbT7UfgLWDCamPG6jaELHKoXyCd25Nx3DBlNY=
441441
github.com/rhobs/obo-prometheus-operator/pkg/client v0.87.0-rhobs1 h1:/3w2Mky/kVuzNH19uT6vsSgmCuZwEFftjF0A7LuX+Mo=
442442
github.com/rhobs/obo-prometheus-operator/pkg/client v0.87.0-rhobs1/go.mod h1:FMfzfWcCGBtflg+gT1SNchG243iywu015rJskh1U+/c=
443-
github.com/rhobs/perses v0.0.0-20260108135452-fa4ca8ac8e73 h1:oVpyW74yXZzixJlsCprgnIkG1QnSd3t8XQIQRe84vQc=
444-
github.com/rhobs/perses v0.0.0-20260108135452-fa4ca8ac8e73/go.mod h1:t5HVls3hnD2zn9pg/sjVd3n1ULHUizAn69lX7Wf9wUE=
445443
github.com/rhobs/perses v0.0.0-20260113083341-bce6f0039b5d h1:hagDxkfSvy6vGxH0qPFIOQoPjdblKCfNAhjqCgsuy7Q=
446444
github.com/rhobs/perses v0.0.0-20260113083341-bce6f0039b5d/go.mod h1:t5HVls3hnD2zn9pg/sjVd3n1ULHUizAn69lX7Wf9wUE=
447-
github.com/rhobs/perses-operator v0.1.10-0.20260108143425-6efe058ff3ec h1:qDlJf6dGLi3DqVB6Tb7msGEM9AD9HhVOW+f724Az7fc=
448-
github.com/rhobs/perses-operator v0.1.10-0.20260108143425-6efe058ff3ec/go.mod h1:ciSWn2z4DA7/47GHN65UDDNvVXE/PSPlQ27csHytKRI=
449-
github.com/rhobs/perses-operator v0.1.10-0.20260113122614-fbdfa948934b h1:G7rsnn43gSfd/81rbZqbZwzD0wpGg6tqZ8VR+5fKMtA=
450-
github.com/rhobs/perses-operator v0.1.10-0.20260113122614-fbdfa948934b/go.mod h1:ciSWn2z4DA7/47GHN65UDDNvVXE/PSPlQ27csHytKRI=
451-
github.com/rhobs/perses-operator v0.1.10-0.20260113145446-31dca3123509 h1:qsacWU0fdzSvbLQEHnhsk5Pi0hwPZKQVdcbTHKhjM1E=
452-
github.com/rhobs/perses-operator v0.1.10-0.20260113145446-31dca3123509/go.mod h1:ciSWn2z4DA7/47GHN65UDDNvVXE/PSPlQ27csHytKRI=
453445
github.com/rhobs/perses-operator v0.1.10-0.20260119104604-801af29f7716 h1:fFiorvTDcBHhT/FSfrlEHrtHKg+p8h7nhPzP6bRY93c=
454446
github.com/rhobs/perses-operator v0.1.10-0.20260119104604-801af29f7716/go.mod h1:7XmXHWocDHqOerp3lmggbQzbBZ9VkK1+FCv/LGGMHvE=
455447
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=

pkg/controllers/uiplugin/components.go

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -121,15 +121,30 @@ func pluginComponentReconcilers(plugin *uiv1alpha1.UIPlugin, pluginInfo UIPlugin
121121
monitoringConfig.Incidents != nil &&
122122
monitoringConfig.Incidents.Enabled &&
123123
pluginInfo.HealthAnalyzerImage != ""
124+
125+
healthAnalyzerEnabled := monitoringConfig != nil &&
126+
monitoringConfig.ClusterHealthAnalyzer != nil &&
127+
monitoringConfig.ClusterHealthAnalyzer.Enabled &&
128+
pluginInfo.HealthAnalyzerImage != ""
129+
130+
components = append(components,
131+
reconciler.NewOptionalUpdater(componentsHealthClusterRole("components-health-view"), plugin, healthAnalyzerEnabled),
132+
reconciler.NewOptionalUpdater(newClusterRoleBinding(namespace, serviceAccountName, "components-health-view", plugin.Name+"-"+"components-health-view"), plugin, healthAnalyzerEnabled),
133+
reconciler.NewOptionalUpdater(newComponentHealthConfig(namespace), plugin, healthAnalyzerEnabled),
134+
)
135+
136+
deployHealthAnalyzer := incidentsEnabled || healthAnalyzerEnabled
137+
124138
components = append(components,
125-
reconciler.NewOptionalUpdater(newClusterRoleBinding(namespace, serviceAccountName, monitorClusterroleName, plugin.Name+"-"+monitorClusterroleName), plugin, incidentsEnabled),
126-
reconciler.NewOptionalUpdater(newClusterRoleBinding(namespace, serviceAccountName, "system:auth-delegator", serviceAccountName+"-system-auth-delegator"), plugin, incidentsEnabled),
127-
reconciler.NewOptionalUpdater(newAlertManagerViewRoleBinding(serviceAccountName, namespace), plugin, incidentsEnabled),
128-
reconciler.NewOptionalUpdater(newHealthAnalyzerPrometheusRole(namespace), plugin, incidentsEnabled),
129-
reconciler.NewOptionalUpdater(newHealthAnalyzerPrometheusRoleBinding(namespace), plugin, incidentsEnabled),
130-
reconciler.NewOptionalUpdater(newHealthAnalyzerService(namespace), plugin, incidentsEnabled),
131-
reconciler.NewOptionalUpdater(newHealthAnalyzerDeployment(namespace, serviceAccountName, pluginInfo), plugin, incidentsEnabled),
132-
reconciler.NewOptionalUpdater(newHealthAnalyzerServiceMonitor(namespace), plugin, incidentsEnabled),
139+
reconciler.NewOptionalUpdater(newClusterRoleBinding(namespace, serviceAccountName, "cluster-monitoring-view", plugin.Name+"cluster-monitoring-view"), plugin, deployHealthAnalyzer),
140+
reconciler.NewOptionalUpdater(newClusterRoleBinding(namespace, serviceAccountName, "system:auth-delegator", serviceAccountName+"-system-auth-delegator"), plugin, deployHealthAnalyzer),
141+
reconciler.NewOptionalUpdater(newAlertManagerViewRoleBinding(serviceAccountName, namespace), plugin, deployHealthAnalyzer),
142+
reconciler.NewOptionalUpdater(newHealthAnalyzerPrometheusRole(namespace), plugin, deployHealthAnalyzer),
143+
reconciler.NewOptionalUpdater(newHealthAnalyzerPrometheusRoleBinding(namespace), plugin, deployHealthAnalyzer),
144+
reconciler.NewOptionalUpdater(newHealthAnalyzerService(namespace), plugin, deployHealthAnalyzer),
145+
reconciler.NewOptionalUpdater(newHealthAnalyzerDeployment(namespace, serviceAccountName, pluginInfo.HealthAnalyzerImage, healthAnalyzerEnabled),
146+
plugin, deployHealthAnalyzer),
147+
reconciler.NewOptionalUpdater(newHealthAnalyzerServiceMonitor(namespace), plugin, deployHealthAnalyzer),
133148
)
134149

135150
persesServiceAccountName := "perses" + serviceAccountSuffix
@@ -436,6 +451,43 @@ func newService(info UIPluginInfo, namespace string) *corev1.Service {
436451
}
437452
}
438453

454+
// componentsHealthClusterRole creates a new clusterrole with the provided name.
455+
// The clusterrole has read permissions to the cluster resources and it is required
456+
// for the component health evaluation.
457+
func componentsHealthClusterRole(name string) *rbacv1.ClusterRole {
458+
return &rbacv1.ClusterRole{
459+
TypeMeta: metav1.TypeMeta{
460+
APIVersion: rbacv1.SchemeGroupVersion.String(),
461+
Kind: "ClusterRole",
462+
},
463+
ObjectMeta: metav1.ObjectMeta{
464+
Name: name,
465+
},
466+
Rules: []rbacv1.PolicyRule{
467+
{
468+
APIGroups: []string{""},
469+
Resources: []string{"nodes"},
470+
Verbs: []string{"get", "list"},
471+
},
472+
{
473+
APIGroups: []string{"config.openshift.io"},
474+
Resources: []string{"clusteroperators"},
475+
Verbs: []string{"get", "list"},
476+
},
477+
{
478+
APIGroups: []string{"machineconfiguration.openshift.io"},
479+
Resources: []string{"machineconfigpools"},
480+
Verbs: []string{"get", "list"},
481+
},
482+
{
483+
APIGroups: []string{"kubevirt.io"},
484+
Resources: []string{"kubevirts"},
485+
Verbs: []string{"get", "list"},
486+
},
487+
},
488+
}
489+
}
490+
439491
func newKorrel8rDeployment(name string, namespace string, info UIPluginInfo) *appsv1.Deployment {
440492
volumes := []corev1.Volume{
441493
{
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# Default definition of the component tree used to evaluate component health
2+
# by the cluster-health-analyzer.
3+
components:
4+
- name: control-plane
5+
children:
6+
- name: nodes
7+
objects:
8+
- resource: nodes
9+
selectors:
10+
- matchLabels:
11+
node-role.kubernetes.io/control-plane: []
12+
- resource: machineconfigpools
13+
group: machineconfiguration.openshift.io
14+
selectors:
15+
- matchLabels:
16+
pools.operator.machineconfiguration.openshift.io/master: []
17+
- name: capacity
18+
children:
19+
- name: cpu
20+
alerts:
21+
selectors:
22+
- matchLabels:
23+
alertname: ["KubeCPUOvercommit","HighOverallControlPlaneCPU", "ExtremelyHighIndividualControlPlaneCPU"]
24+
- name: memory
25+
alerts:
26+
selectors:
27+
- matchLabels:
28+
alertname: ["HighOverallControlPlaneMemory", "ExtremelyHighIndividualControlPlaneMemory", "SystemMemoryExceedsReservation"]
29+
- name: operators
30+
children:
31+
- name: etcd
32+
alerts:
33+
selectors:
34+
- matchLabels:
35+
namespace: ["openshift-etcd","openshift-etcd-operator"]
36+
- name: addons
37+
children:
38+
- name: kubevirt
39+
alerts:
40+
selectors:
41+
- matchLabels:
42+
kubernetes_operator_part_of: ["kubevirt"]
43+
- matchLabels:
44+
namespace: ["openshift-cnv"]
45+
objects:
46+
- group: kubevirt.io
47+
resource: kubevirts

pkg/controllers/uiplugin/controller.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,9 @@ const (
106106
//+kubebuilder:rbac:groups=authentication.k8s.io,resources=tokenreviews,verbs=create
107107
//+kubebuilder:rbac:groups=authorization.k8s.io,resources=subjectaccessreviews,verbs=create
108108
//+kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors,verbs=get;create;update;patch;delete
109+
//+kubebuilder:rbac:groups=config.openshift.io,resources=clusteroperators,verbs=get;list
110+
//+kubebuilder:rbac:groups=machineconfiguration.openshift.io,resources=machineconfigpools,verbs=get;list
111+
//+kubebuilder:rbac:groups=kubevirt.io,resources=kubevirts,verbs=get;list
109112

110113
const finalizerName = "uiplugin.observability.openshift.io/finalizer"
111114

pkg/controllers/uiplugin/health_analyzer.go

Lines changed: 78 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
package uiplugin
22

33
import (
4+
_ "embed"
5+
46
monv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
57
appsv1 "k8s.io/api/apps/v1"
68
corev1 "k8s.io/api/core/v1"
9+
v1 "k8s.io/api/core/v1"
710
rbacv1 "k8s.io/api/rbac/v1"
811
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
912
"k8s.io/apimachinery/pkg/util/intstr"
@@ -15,6 +18,9 @@ const (
1518
volumeMountName = name + "-tls"
1619
)
1720

21+
//go:embed config/health-analyzer.yaml
22+
var componentHealthConfig string
23+
1824
func newHealthAnalyzerPrometheusRole(namespace string) *rbacv1.Role {
1925
role := &rbacv1.Role{
2026
TypeMeta: metav1.TypeMeta{
@@ -94,7 +100,53 @@ func newHealthAnalyzerService(namespace string) *corev1.Service {
94100
return service
95101
}
96102

97-
func newHealthAnalyzerDeployment(namespace string, serviceAccountName string, pluginInfo UIPluginInfo) *appsv1.Deployment {
103+
func newHealthAnalyzerDeployment(namespace string,
104+
serviceAccountName string,
105+
image string,
106+
healthAnalyzerEnabled bool) *appsv1.Deployment {
107+
args := []string{
108+
"serve",
109+
"--tls-cert-file=/etc/tls/private/tls.crt",
110+
"--tls-private-key-file=/etc/tls/private/tls.key",
111+
}
112+
volumes := []corev1.Volume{
113+
{
114+
Name: volumeMountName,
115+
VolumeSource: corev1.VolumeSource{
116+
Secret: &corev1.SecretVolumeSource{
117+
SecretName: volumeMountName,
118+
},
119+
},
120+
},
121+
}
122+
123+
volumeMounts := []corev1.VolumeMount{
124+
{
125+
MountPath: "/etc/tls/private",
126+
Name: volumeMountName,
127+
ReadOnly: true,
128+
},
129+
}
130+
131+
if healthAnalyzerEnabled {
132+
args = append(args, "--enable-components-health")
133+
volumes = append(volumes, corev1.Volume{
134+
Name: "components-health-config",
135+
VolumeSource: corev1.VolumeSource{
136+
ConfigMap: &corev1.ConfigMapVolumeSource{
137+
LocalObjectReference: corev1.LocalObjectReference{
138+
Name: "components-config",
139+
},
140+
},
141+
},
142+
})
143+
volumeMounts = append(volumeMounts, corev1.VolumeMount{
144+
Name: "components-health-config",
145+
MountPath: "/etc/config",
146+
ReadOnly: true,
147+
})
148+
}
149+
98150
deploy := &appsv1.Deployment{
99151
TypeMeta: metav1.TypeMeta{
100152
APIVersion: appsv1.SchemeGroupVersion.String(),
@@ -122,13 +174,9 @@ func newHealthAnalyzerDeployment(namespace string, serviceAccountName string, pl
122174
Containers: []corev1.Container{
123175
{
124176
Name: name,
125-
Image: pluginInfo.HealthAnalyzerImage,
177+
Image: image,
126178
ImagePullPolicy: corev1.PullAlways,
127-
Args: []string{
128-
"serve",
129-
"--tls-cert-file=/etc/tls/private/tls.crt",
130-
"--tls-private-key-file=/etc/tls/private/tls.key",
131-
},
179+
Args: args,
132180
Env: []corev1.EnvVar{
133181
{
134182
Name: "PROM_URL",
@@ -156,25 +204,10 @@ func newHealthAnalyzerDeployment(namespace string, serviceAccountName string, pl
156204
},
157205
},
158206
TerminationMessagePolicy: corev1.TerminationMessageFallbackToLogsOnError,
159-
VolumeMounts: []corev1.VolumeMount{
160-
{
161-
MountPath: "/etc/tls/private",
162-
Name: volumeMountName,
163-
ReadOnly: true,
164-
},
165-
},
166-
},
167-
},
168-
Volumes: []corev1.Volume{
169-
{
170-
Name: volumeMountName,
171-
VolumeSource: corev1.VolumeSource{
172-
Secret: &corev1.SecretVolumeSource{
173-
SecretName: volumeMountName,
174-
},
175-
},
207+
VolumeMounts: volumeMounts,
176208
},
177209
},
210+
Volumes: volumes,
178211
},
179212
},
180213
},
@@ -218,3 +251,24 @@ func newHealthAnalyzerServiceMonitor(namespace string) *monv1.ServiceMonitor {
218251

219252
return serviceMonitor
220253
}
254+
255+
// newComponentHealthConfig creates a new ConfigMap
256+
// that defines the components whose health is evaluated.
257+
func newComponentHealthConfig(namespace string) *v1.ConfigMap {
258+
cm := v1.ConfigMap{
259+
TypeMeta: metav1.TypeMeta{
260+
APIVersion: v1.SchemeGroupVersion.String(),
261+
Kind: "ConfigMap",
262+
},
263+
ObjectMeta: metav1.ObjectMeta{
264+
Namespace: namespace,
265+
Name: "components-config",
266+
Labels: componentLabels("monitoring"),
267+
},
268+
Data: map[string]string{
269+
"components.yaml": componentHealthConfig,
270+
},
271+
}
272+
273+
return &cm
274+
}

0 commit comments

Comments
 (0)