Skip to content
This repository was archived by the owner on Mar 24, 2025. It is now read-only.

Commit 9822b61

Browse files
authored
✨ Operator endpoints for cluster insights (#1312)
1 parent bb2f76a commit 9822b61

File tree

4 files changed

+169
-0
lines changed

4 files changed

+169
-0
lines changed

cmd/rig-operator/main.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"connectrpc.com/grpcreflect"
1616
"github.com/go-logr/logr"
1717
"github.com/rigdev/rig-go-api/operator/api/v1/capabilities/capabilitiesconnect"
18+
"github.com/rigdev/rig-go-api/operator/api/v1/cluster/clusterconnect"
1819
"github.com/rigdev/rig-go-api/operator/api/v1/pipeline/pipelineconnect"
1920
"github.com/rigdev/rig/cmd/rig-operator/apichecker"
2021
"github.com/rigdev/rig/cmd/rig-operator/certgen"
@@ -23,10 +24,12 @@ import (
2324
"github.com/rigdev/rig/pkg/build"
2425
"github.com/rigdev/rig/pkg/controller/plugin"
2526
"github.com/rigdev/rig/pkg/handler/api/capabilities"
27+
"github.com/rigdev/rig/pkg/handler/api/cluster"
2628
"github.com/rigdev/rig/pkg/handler/api/pipeline"
2729
"github.com/rigdev/rig/pkg/manager"
2830
"github.com/rigdev/rig/pkg/scheme"
2931
svccapabilities "github.com/rigdev/rig/pkg/service/capabilities"
32+
svccluster "github.com/rigdev/rig/pkg/service/cluster"
3033
"github.com/rigdev/rig/pkg/service/config"
3134
"github.com/rigdev/rig/pkg/service/objectstatus"
3235
svcpipeline "github.com/rigdev/rig/pkg/service/pipeline"
@@ -132,6 +135,8 @@ func run(cmd *cobra.Command, _ []string) error {
132135
svcpipeline.NewService,
133136
objectstatus.NewService,
134137
pipeline.NewHandler,
138+
svccluster.New,
139+
cluster.NewHandler,
135140
manager.New,
136141
),
137142
fx.Invoke(
@@ -143,10 +148,12 @@ func run(cmd *cobra.Command, _ []string) error {
143148
sh fx.Shutdowner,
144149
cap capabilitiesconnect.ServiceHandler,
145150
pip pipelineconnect.ServiceHandler,
151+
cluster clusterconnect.ServiceHandler,
146152
) {
147153
mux := http.NewServeMux()
148154
mux.Handle(capabilitiesconnect.NewServiceHandler(cap))
149155
mux.Handle(pipelineconnect.NewServiceHandler(pip))
156+
mux.Handle(clusterconnect.NewServiceHandler(cluster))
150157
mux.Handle(grpcreflect.NewHandlerV1(grpcreflect.NewStaticReflector(
151158
capabilitiesconnect.ServiceName,
152159
pipelineconnect.ServiceName,

deploy/charts/rig-operator/templates/clusterrole.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ rules:
1212
- secrets
1313
- pods
1414
- events
15+
- nodes
1516
verbs:
1617
- get
1718
- list
@@ -153,6 +154,14 @@ rules:
153154
- patch
154155
- update
155156
- watch
157+
- apiGroups:
158+
- metrics.k8s.io
159+
resources:
160+
- nodes
161+
verbs:
162+
- get
163+
- list
164+
- watch
156165
{{- range .Values.rbac.rules }}
157166
- apiGroups:
158167
{{- range .apiGroups }}

pkg/handler/api/cluster/handler.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package cluster
2+
3+
import (
4+
"context"
5+
6+
"connectrpc.com/connect"
7+
api_cluster "github.com/rigdev/rig-go-api/operator/api/v1/cluster"
8+
"github.com/rigdev/rig-go-api/operator/api/v1/cluster/clusterconnect"
9+
"github.com/rigdev/rig/pkg/service/cluster"
10+
)
11+
12+
func NewHandler(
13+
cluster cluster.Service,
14+
) clusterconnect.ServiceHandler {
15+
return &handler{
16+
cluster: cluster,
17+
}
18+
}
19+
20+
type handler struct {
21+
cluster cluster.Service
22+
}
23+
24+
func (h *handler) GetNodes(
25+
ctx context.Context, _ *connect.Request[api_cluster.GetNodesRequest],
26+
) (*connect.Response[api_cluster.GetNodesResponse], error) {
27+
nodes, err := h.cluster.GetNodes(ctx)
28+
if err != nil {
29+
return nil, err
30+
}
31+
32+
return connect.NewResponse(&api_cluster.GetNodesResponse{
33+
Nodes: nodes,
34+
}), nil
35+
}
36+
37+
func (h *handler) GetNodePods(
38+
ctx context.Context, req *connect.Request[api_cluster.GetNodePodsRequest],
39+
) (*connect.Response[api_cluster.GetNodePodsResponse], error) {
40+
pods, err := h.cluster.GetNodePods(ctx, req.Msg.GetNodeName())
41+
if err != nil {
42+
return nil, err
43+
}
44+
45+
return connect.NewResponse(&api_cluster.GetNodePodsResponse{
46+
Pods: pods,
47+
}), nil
48+
}

pkg/service/cluster/service.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package cluster
2+
3+
import (
4+
"context"
5+
"fmt"
6+
"maps"
7+
"slices"
8+
9+
"github.com/rigdev/rig-go-api/model"
10+
api_cluster "github.com/rigdev/rig-go-api/operator/api/v1/cluster"
11+
"github.com/rigdev/rig/pkg/pipeline"
12+
corev1 "k8s.io/api/core/v1"
13+
metricsv1beta1 "k8s.io/metrics/pkg/apis/metrics/v1beta1"
14+
"sigs.k8s.io/controller-runtime/pkg/client"
15+
)
16+
17+
type Service interface {
18+
GetNodes(ctx context.Context) ([]*api_cluster.Node, error)
19+
GetNodePods(ctx context.Context, nodeName string) ([]*api_cluster.Pod, error)
20+
}
21+
22+
func New(client client.Client) Service {
23+
return &service{
24+
client: client,
25+
}
26+
}
27+
28+
type service struct {
29+
client client.Client
30+
}
31+
32+
func (s *service) GetNodes(ctx context.Context) ([]*api_cluster.Node, error) {
33+
listReq := corev1.NodeList{}
34+
if err := s.client.List(ctx, &listReq); err != nil {
35+
return nil, fmt.Errorf("failed to list nodes: %w", err)
36+
}
37+
38+
nodes := map[string]*api_cluster.Node{}
39+
40+
for _, node := range listReq.Items {
41+
nodes[node.GetName()] = &api_cluster.Node{
42+
NodeName: node.GetName(),
43+
Allocateable: &model.Resources{
44+
CpuMillis: uint64(node.Status.Allocatable.Cpu().MilliValue()),
45+
MemoryBytes: uint64(node.Status.Allocatable.Memory().Value()),
46+
},
47+
MaxPods: uint64(node.Status.Allocatable.Pods().Value()),
48+
}
49+
}
50+
51+
list := metricsv1beta1.NodeMetricsList{}
52+
if err := s.client.List(ctx, &list); err != nil {
53+
return nil, fmt.Errorf("failed to list node metrics: %w", err)
54+
}
55+
56+
for _, node := range list.Items {
57+
n, ok := nodes[node.GetName()]
58+
if !ok {
59+
n = &api_cluster.Node{
60+
NodeName: node.GetName(),
61+
Allocateable: &model.Resources{},
62+
Usage: &model.Resources{},
63+
MaxPods: 0,
64+
}
65+
nodes[node.GetName()] = n
66+
}
67+
n.Usage = &model.Resources{
68+
CpuMillis: uint64(node.Usage.Cpu().MilliValue()),
69+
MemoryBytes: uint64(node.Usage.Memory().Value()),
70+
}
71+
}
72+
73+
keys := slices.Sorted((maps.Keys(nodes)))
74+
var res []*api_cluster.Node
75+
for _, k := range keys {
76+
res = append(res, nodes[k])
77+
}
78+
return res, nil
79+
}
80+
81+
func (s *service) GetNodePods(ctx context.Context, nodeName string) ([]*api_cluster.Pod, error) {
82+
listReq := corev1.PodList{}
83+
if err := s.client.List(ctx, &listReq, client.MatchingFields{
84+
"spec.nodeName": nodeName,
85+
}); err != nil {
86+
return nil, err
87+
}
88+
89+
var res []*api_cluster.Pod
90+
for _, pod := range listReq.Items {
91+
req := &model.Resources{}
92+
for _, c := range pod.Spec.Containers {
93+
req.CpuMillis += uint64(c.Resources.Requests.Cpu().MilliValue())
94+
req.MemoryBytes += uint64(c.Resources.Requests.Memory().Value())
95+
}
96+
res = append(res, &api_cluster.Pod{
97+
PodName: pod.GetName(),
98+
Namespace: pod.GetNamespace(),
99+
Requested: req,
100+
CapsuleName: pod.Labels[pipeline.LabelCapsule],
101+
})
102+
}
103+
104+
return res, nil
105+
}

0 commit comments

Comments
 (0)