From 58df4b501b75893bbc3a85d250b8029e322cc243 Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Mon, 23 Feb 2026 18:56:56 +0100 Subject: [PATCH 1/3] fix infinite loop on reconciliation --- .../controller/postgrescluster_controller.go | 95 +++++++++++++------ 1 file changed, 66 insertions(+), 29 deletions(-) diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go index ed14df3bb..741c4ff47 100644 --- a/internal/controller/postgrescluster_controller.go +++ b/internal/controller/postgrescluster_controller.go @@ -32,7 +32,6 @@ import ( ctrl "sigs.k8s.io/controller-runtime" client "sigs.k8s.io/controller-runtime/pkg/client" logs "sigs.k8s.io/controller-runtime/pkg/log" - "time" ) // PostgresClusterReconciler reconciles a PostgresCluster object @@ -41,9 +40,20 @@ type PostgresClusterReconciler struct { Scheme *runtime.Scheme } -const ( - retryDelaytimer = time.Second * 15 -) +// This struct is used to compare the merged configuration from PostgresClusterClass and PostgresClusterSpec +// in a normalized way, and not to use CNPG-default values which are causing false positive diff state while reconciliation loop. +// It contains only the fields that are relevant for our reconciliation and that we want to compare when deciding whether to update the CNPG Cluster spec or not. +type normalizedCNPGClusterSpec struct { + ImageName string + Instances int + // Parameters we set, instead of complete spec from CNPG + CustomDefinedParameters map[string]string + PgHBA []string + DefaultDatabase string + Owner string + StorageSize string + Resources corev1.ResourceRequirements +} // +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=enterprise.splunk.com,resources=postgresclusters/status,verbs=get;update;patch @@ -93,35 +103,31 @@ func (r *PostgresClusterReconciler) Reconcile(ctx context.Context, req ctrl.Requ // 4. Build the desired CNPG Cluster spec based on the merged configuration. desiredSpec := r.buildCNPGClusterSpec(mergedConfig) - // 5. If CNPG cluster doesn't exist, create it. + // 5. Fetch existing CNPG Cluster or create it if it doesn't exist yet. existingCNPG := &cnpgv1.Cluster{} - getCNPGClusterErr := r.Get(ctx, types.NamespacedName{Name: postgresCluster.Name, Namespace: postgresCluster.Namespace}, existingCNPG) - if apierrors.IsNotFound(getCNPGClusterErr) { - logger.Info("CNPG Cluster not found, creating:", "name", postgresCluster.Name) - cnpgCluster = r.buildCNPGCluster(postgresCluster, mergedConfig) - if buildCNPGClusterErr := r.Create(ctx, cnpgCluster); buildCNPGClusterErr != nil { - logger.Error(buildCNPGClusterErr, "Failed to create CNPG Cluster") - r.setCondition(postgresCluster, metav1.ConditionFalse, "ClusterBuildFailed", buildCNPGClusterErr.Error()) - return res, buildCNPGClusterErr + err = r.Get(ctx, types.NamespacedName{Name: postgresCluster.Name, Namespace: postgresCluster.Namespace}, existingCNPG) + switch { + case apierrors.IsNotFound(err): + logger.Info("CNPG Cluster not found, creating", "name", postgresCluster.Name) + newCluster := r.buildCNPGCluster(postgresCluster, mergedConfig) + if err = r.Create(ctx, newCluster); err != nil { + logger.Error(err, "Failed to create CNPG Cluster") + r.setCondition(postgresCluster, metav1.ConditionFalse, "ClusterBuildFailed", err.Error()) + return res, err } - r.setCondition(postgresCluster, metav1.ConditionTrue, "ClusterBuildSucceeded", fmt.Sprintf("CNPG cluster build Succeeded: %s", postgresCluster.Name)) logger.Info("CNPG Cluster created successfully, requeueing for status update", "name", postgresCluster.Name) - return ctrl.Result{RequeueAfter: retryDelaytimer}, nil - } else if getCNPGClusterErr != nil { - logger.Error(getCNPGClusterErr, "Failed to get CNPG Cluster") - r.setCondition(postgresCluster, metav1.ConditionFalse, "ClusterGetFailed", getCNPGClusterErr.Error()) - return res, getCNPGClusterErr - } else { - cnpgCluster = existingCNPG - } - - // 6. Synchronize the existing CNPG Cluster spec with the desired configuration. - if err := r.Get(ctx, types.NamespacedName{Name: cnpgCluster.Name, Namespace: cnpgCluster.Namespace}, cnpgCluster); err != nil { - logger.Error(err, "Failed to fetch CNPG Cluster for update check", "name", cnpgCluster.Name) - r.setCondition(postgresCluster, metav1.ConditionFalse, "ClusterGetFailed", fmt.Sprintf("Failed to fetch CNPG Cluster for update check: %v", err)) + return ctrl.Result{RequeueAfter: retryDelay}, nil + case err != nil: + logger.Error(err, "Failed to get CNPG Cluster") + r.setCondition(postgresCluster, metav1.ConditionFalse, "ClusterGetFailed", err.Error()) return res, err } - if !equality.Semantic.DeepEqual(cnpgCluster.Spec, desiredSpec) { + cnpgCluster = existingCNPG + + // 6. If CNPG Cluster exists, compare the current spec with the desired spec and update if necessary. + currentNormalizedSpec := normalizeCNPGClusterSpec(cnpgCluster.Spec, mergedConfig.PostgreSQLConfig) + desiredNormalizedSpec := normalizeCNPGClusterSpec(desiredSpec, mergedConfig.PostgreSQLConfig) + if !equality.Semantic.DeepEqual(currentNormalizedSpec, desiredNormalizedSpec) { logger.Info("Desired CNPG Cluster spec is different from the current spec, need to patch", "name", cnpgCluster.Name) originalCluster := cnpgCluster.DeepCopy() cnpgCluster.Spec = desiredSpec @@ -134,6 +140,8 @@ func (r *PostgresClusterReconciler) Reconcile(ctx context.Context, req ctrl.Requ r.setCondition(postgresCluster, metav1.ConditionFalse, "ClusterUpdateFailed", patchCNPGClusterErr.Error()) return res, patchCNPGClusterErr } + logger.Info("CNPG Cluster patched successfully, requeueing for status update", "name", cnpgCluster.Name) + return ctrl.Result{RequeueAfter: retryDelay}, nil } // 7. Report progress back to the user and manage the reconciliation lifecycle. logger.Info("Reconciliation completed successfully", "name", postgresCluster.Name) @@ -180,7 +188,9 @@ func (r *PostgresClusterReconciler) getMergedConfig(clusterClass *enterprisev4.P return resultConfig } -// buildCNPGClusterSpec builds the desired CNPG ClusterSpec and returns an error if mandatory fields are missing. +// buildCNPGClusterSpec builds the desired CNPG ClusterSpec. +// IMPORTANT: any field added here must also be added to normalizedCNPGClusterSpec and normalizeCNPGClusterSpec, +// otherwise it will not be included in drift detection and changes will be silently ignored. func (r *PostgresClusterReconciler) buildCNPGClusterSpec(mergedConfig *enterprisev4.PostgresClusterSpec) cnpgv1.ClusterSpec { // 3. Build the Spec @@ -277,6 +287,33 @@ func (r *PostgresClusterReconciler) setCondition(postgresCluster *enterprisev4.P }) } +func normalizeCNPGClusterSpec(spec cnpgv1.ClusterSpec, customDefinedParameters map[string]string) normalizedCNPGClusterSpec { + normalizedConf := normalizedCNPGClusterSpec{ + ImageName: spec.ImageName, + Instances: spec.Instances, + // Parameters intentionally excluded — CNPG injects defaults that we don't change + StorageSize: spec.StorageConfiguration.Size, + Resources: spec.Resources, + } + + if len(customDefinedParameters) > 0 { + normalizedConf.CustomDefinedParameters = make(map[string]string) + for k := range customDefinedParameters { + normalizedConf.CustomDefinedParameters[k] = spec.PostgresConfiguration.Parameters[k] + } + } + if len(spec.PostgresConfiguration.PgHBA) > 0 { + normalizedConf.PgHBA = spec.PostgresConfiguration.PgHBA + } + + // To prevent nil pointer dereference in case Bootstrap or InitDB is nil, we need to check for that before accessing the fields. + if spec.Bootstrap != nil && spec.Bootstrap.InitDB != nil { + normalizedConf.DefaultDatabase = spec.Bootstrap.InitDB.Database + normalizedConf.Owner = spec.Bootstrap.InitDB.Owner + } + return normalizedConf +} + // SetupWithManager sets up the controller with the Manager. func (r *PostgresClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). From 3d3d57908764bb419245e5071d99b047e6cade5b Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Tue, 24 Feb 2026 16:46:31 +0100 Subject: [PATCH 2/3] Removed redundant comment. --- internal/controller/postgrescluster_controller.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go index 741c4ff47..4d8da0d8b 100644 --- a/internal/controller/postgrescluster_controller.go +++ b/internal/controller/postgrescluster_controller.go @@ -306,7 +306,6 @@ func normalizeCNPGClusterSpec(spec cnpgv1.ClusterSpec, customDefinedParameters m normalizedConf.PgHBA = spec.PostgresConfiguration.PgHBA } - // To prevent nil pointer dereference in case Bootstrap or InitDB is nil, we need to check for that before accessing the fields. if spec.Bootstrap != nil && spec.Bootstrap.InitDB != nil { normalizedConf.DefaultDatabase = spec.Bootstrap.InitDB.Database normalizedConf.Owner = spec.Bootstrap.InitDB.Owner From 48ba1c1e9bab559b93507ea845b520f939de894c Mon Sep 17 00:00:00 2001 From: dpishchenkov Date: Wed, 25 Feb 2026 18:01:59 +0100 Subject: [PATCH 3/3] Resolve merge conflicts --- internal/controller/postgrescluster_controller.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/internal/controller/postgrescluster_controller.go b/internal/controller/postgrescluster_controller.go index d1faeb9b7..aa4d47294 100644 --- a/internal/controller/postgrescluster_controller.go +++ b/internal/controller/postgrescluster_controller.go @@ -20,8 +20,6 @@ import ( "context" "errors" "fmt" - "time" - cnpgv1 "github.com/cloudnative-pg/cloudnative-pg/api/v1" enterprisev4 "github.com/splunk/splunk-operator/api/v4" corev1 "k8s.io/api/core/v1"