Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion internal/controller/node/labels.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ import (
const (
nodeLabel = "sei.io/node"
dataDir = "/sei"
defaultSidecarImage = "189176372795.dkr.ecr.us-east-2.amazonaws.com/sei/seictl@sha256:d3962ce60c1a466d57086d6dff54750c2ea5c9227d40a6224e85f94de35b7379"
defaultSidecarImage = "ghcr.io/sei-protocol/seictl@sha256:63860a7cf1810e70cc8647d72ff705f87a203250b12bbdec2f88f26b850b628e"
)

// resourceLabelsForNode returns labels for the StatefulSet pod template.
Expand Down
48 changes: 48 additions & 0 deletions internal/controller/node/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (

seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1"
"github.com/sei-protocol/sei-k8s-controller/internal/controller/observability"
"github.com/sei-protocol/sei-k8s-controller/internal/planner"
)

var allNodePhases = []string{
Expand All @@ -16,6 +17,12 @@ var allNodePhases = []string{
string(seiv1alpha1.PhaseTerminating),
}

var allTaskStatuses = []string{
string(seiv1alpha1.TaskPending),
string(seiv1alpha1.TaskComplete),
string(seiv1alpha1.TaskFailed),
}

var (
nodePhaseGauge = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Expand Down Expand Up @@ -66,6 +73,22 @@ var (
},
[]string{"namespace", "node"},
)

monitorTaskCompletedTotal = prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "sei_controller_monitor_task_completed_total",
Help: "Monitor task terminal state transitions (DivergenceDetected, TaskFailed, TaskLost)",
},
[]string{"namespace", "node", "task_type", "reason"},
)

monitorTaskStatus = prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "sei_controller_monitor_task_status",
Help: "Current status of each monitor task (1=active, 0=inactive)",
},
[]string{"namespace", "node", "task_type", "status"},
)
)

func init() {
Expand All @@ -76,16 +99,41 @@ func init() {
nodeLastInitDuration,
sidecarRequestDuration,
sidecarUnreachableTotal,
monitorTaskCompletedTotal,
monitorTaskStatus,
)
}

func emitNodePhase(ns, name string, phase seiv1alpha1.SeiNodePhase) {
observability.EmitPhaseGauge(nodePhaseGauge, ns, name, string(phase), allNodePhases)
}

func emitMonitorTaskTerminal(ns, node, taskType, reason string) {
monitorTaskCompletedTotal.WithLabelValues(ns, node, taskType, reason).Inc()
}

func emitMonitorTaskStatus(ns, node, taskType, status string) {
for _, s := range allTaskStatuses {
val := float64(0)
if s == status {
val = 1
}
monitorTaskStatus.WithLabelValues(ns, node, taskType, s).Set(val)
}
}

func cleanupMonitorTaskMetrics(ns, name string, taskTypes []string) {
for _, tt := range taskTypes {
for _, s := range allTaskStatuses {
monitorTaskStatus.DeleteLabelValues(ns, name, tt, s)
}
}
}

func cleanupNodeMetrics(namespace, name string) {
observability.DeletePhaseGauge(nodePhaseGauge, namespace, name, allNodePhases)
nodeLastInitDuration.DeleteLabelValues(namespace, name)
sidecarUnreachableTotal.DeleteLabelValues(namespace, name)
observability.ReconcileErrorsTotal.DeleteLabelValues(seiNodeControllerName, namespace, name)
cleanupMonitorTaskMetrics(namespace, name, []string{planner.TaskResultExport})
}
7 changes: 7 additions & 0 deletions internal/controller/node/monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ func (r *SeiNodeReconciler) ensureMonitorTask(ctx context.Context, node *seiv1al
Status: seiv1alpha1.TaskPending,
SubmittedAt: now,
}
emitMonitorTaskStatus(node.Namespace, node.Name, req.Type, string(seiv1alpha1.TaskPending))
return r.Status().Patch(ctx, node, patch)
}

Expand Down Expand Up @@ -121,6 +122,9 @@ func (r *SeiNodeReconciler) pollMonitorTasks(ctx context.Context, node *seiv1alp
patched = true
terminal = true

emitMonitorTaskTerminal(node.Namespace, node.Name, key, ReasonDivergenceDetected)
emitMonitorTaskStatus(node.Namespace, node.Name, key, string(seiv1alpha1.TaskComplete))

meta.SetStatusCondition(&node.Status.Conditions, metav1.Condition{
Type: ConditionResultExportComplete,
Status: metav1.ConditionTrue,
Expand Down Expand Up @@ -157,6 +161,9 @@ func (r *SeiNodeReconciler) failMonitorTask(node *seiv1alpha1.SeiNode, key strin
mt.Error = errMsg
node.Status.MonitorTasks[key] = mt

emitMonitorTaskTerminal(node.Namespace, node.Name, key, reason)
emitMonitorTaskStatus(node.Namespace, node.Name, key, string(seiv1alpha1.TaskFailed))

meta.SetStatusCondition(&node.Status.Conditions, metav1.Condition{
Type: ConditionResultExportComplete,
Status: metav1.ConditionFalse,
Expand Down
2 changes: 1 addition & 1 deletion internal/task/bootstrap_resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (
const (
bootstrapTerminationGracePeriod = int64(120)
bootstrapDataDir = "/sei"
bootstrapDefaultSidecarImage = "189176372795.dkr.ecr.us-east-2.amazonaws.com/sei/seictl@sha256:d3962ce60c1a466d57086d6dff54750c2ea5c9227d40a6224e85f94de35b7379"
bootstrapDefaultSidecarImage = "ghcr.io/sei-protocol/seictl@sha256:63860a7cf1810e70cc8647d72ff705f87a203250b12bbdec2f88f26b850b628e"
bootstrapNodeLabel = "sei.io/node"
bootstrapComponentLabel = "sei.io/component"
)
Expand Down
2 changes: 1 addition & 1 deletion manifests/samples/seinode/pacific-1-full-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ spec:
args: ["start", "--home", "/sei"]

sidecar:
image: ghcr.io/sei-protocol/seictl@sha256:6314a5a05cf532841a181a4cd55c6d501db4c4c19bfe8173d887f8e435bc490c
image: ghcr.io/sei-protocol/seictl@sha256:63860a7cf1810e70cc8647d72ff705f87a203250b12bbdec2f88f26b850b628e

genesis:
s3:
Expand Down
10 changes: 5 additions & 5 deletions manifests/samples/seinode/pacific-1-shadow-replayer.yaml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
# SeiNode — Pacific-1 Shadow Replayer
#
# Identical to pacific-1-replay except the entrypoint passes
# --skip-app-hash-validation so the node can block sync with a
# different execution engine (V2 vs Giga). The node is queryable
# via RPC for external comparison against the canonical chain.
# Replays the canonical pacific-1 chain with a different execution engine
# (Giga) via --skip-app-hash-validation. The sidecar continuously compares
# block results against the canonical RPC and completes when app-hash
# divergence is detected, uploading a DivergenceReport to S3.
apiVersion: sei.io/v1alpha1
kind: SeiNode
metadata:
Expand All @@ -14,7 +14,7 @@ spec:
image: ghcr.io/bdchatham/sei-shadow@sha256:e29af54effc908c3b998655f93713765ae476e40e39765530dc7a271648257ca

sidecar:
image: ghcr.io/sei-protocol/seictl@sha256:6314a5a05cf532841a181a4cd55c6d501db4c4c19bfe8173d887f8e435bc490c
image: ghcr.io/sei-protocol/seictl@sha256:63860a7cf1810e70cc8647d72ff705f87a203250b12bbdec2f88f26b850b628e

entrypoint:
command: ["seid"]
Expand Down
2 changes: 1 addition & 1 deletion manifests/samples/seinode/pacific-1-snapshotter.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ spec:
image: "ghcr.io/sei-protocol/sei:v6.3.0"

sidecar:
image: ghcr.io/sei-protocol/seictl@sha256:6314a5a05cf532841a181a4cd55c6d501db4c4c19bfe8173d887f8e435bc490c
image: ghcr.io/sei-protocol/seictl@sha256:63860a7cf1810e70cc8647d72ff705f87a203250b12bbdec2f88f26b850b628e

entrypoint:
command: ["seid"]
Expand Down
2 changes: 1 addition & 1 deletion manifests/samples/seinode/pacific-1-state-syncer.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ spec:
image: "ghcr.io/sei-protocol/sei:v6.3.0"

sidecar:
image: ghcr.io/sei-protocol/seictl@sha256:6314a5a05cf532841a181a4cd55c6d501db4c4c19bfe8173d887f8e435bc490c
image: ghcr.io/sei-protocol/seictl@sha256:63860a7cf1810e70cc8647d72ff705f87a203250b12bbdec2f88f26b850b628e

entrypoint:
command: ["seid"]
Expand Down
2 changes: 1 addition & 1 deletion manifests/samples/seinodegroup/pacific-1-rpc-group.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ spec:
args: ["start", "--home", "/sei"]

sidecar:
image: ghcr.io/sei-protocol/seictl@sha256:6314a5a05cf532841a181a4cd55c6d501db4c4c19bfe8173d887f8e435bc490c
image: ghcr.io/sei-protocol/seictl@sha256:63860a7cf1810e70cc8647d72ff705f87a203250b12bbdec2f88f26b850b628e

genesis:
s3:
Expand Down
Loading