Files
Go.Rig-Operator/deploy/rig-operator/internal/controller/clusterblueprint_controller.go
2026-01-15 09:58:01 +00:00

292 lines
10 KiB
Go

package controller
import (
"context"
"fmt"
"time"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/types"
"k8s.io/client-go/tools/record"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
"sigs.k8s.io/controller-runtime/pkg/log"
rigv1 "vanderlande.com/ittp/appstack/rig-operator/api/v1alpha1"
"vanderlande.com/ittp/appstack/rig-operator/internal/builder"
"vanderlande.com/ittp/appstack/rig-operator/internal/helm"
"vanderlande.com/ittp/appstack/rig-operator/internal/provider"
"vanderlande.com/ittp/appstack/rig-operator/internal/provider/harvester"
harvesterTemplate "vanderlande.com/ittp/appstack/rig-operator/internal/templates/harvester"
"vanderlande.com/ittp/appstack/rig-operator/internal/provider/vsphere"
vsphereTemplate "vanderlande.com/ittp/appstack/rig-operator/internal/templates/vsphere"
)
const (
rigFinalizer = "rig.appstack.io/finalizer"
)
// ClusterBlueprintReconciler reconciles a ClusterBlueprint object
type ClusterBlueprintReconciler struct {
client.Client
Scheme *runtime.Scheme
Recorder record.EventRecorder
}
// +kubebuilder:rbac:groups=rig.appstack.io,resources=clusterblueprints,verbs=get;list;watch;create;update;patch;delete
// +kubebuilder:rbac:groups=rig.appstack.io,resources=clusterblueprints/status,verbs=get;update;patch
// +kubebuilder:rbac:groups=rig.appstack.io,resources=clusterblueprints/finalizers,verbs=update
// +kubebuilder:rbac:groups=rig.appstack.io,resources=infrablueprints,verbs=get;list;watch
// +kubebuilder:rbac:groups=rig.appstack.io,resources=harvesterblueprints,verbs=get;list;watch
// +kubebuilder:rbac:groups="",resources=secrets,verbs=get;list;watch;create;update;patch;delete
func (r *ClusterBlueprintReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
l := log.FromContext(ctx)
// 1. Fetch ClusterBlueprint (CBP)
cbp := &rigv1.ClusterBlueprint{}
if err := r.Get(ctx, req.NamespacedName, cbp); err != nil {
return ctrl.Result{}, client.IgnoreNotFound(err)
}
// 2. Handle Deletion ... (Same as before)
if !cbp.ObjectMeta.DeletionTimestamp.IsZero() {
return r.handleDelete(ctx, cbp)
}
// 3. Ensure Finalizer ... (Same as before)
if !controllerutil.ContainsFinalizer(cbp, rigFinalizer) {
controllerutil.AddFinalizer(cbp, rigFinalizer)
if err := r.Update(ctx, cbp); err != nil {
return ctrl.Result{}, err
}
}
// 4. Fetch InfraBlueprint (IBP)
ibp := &rigv1.InfraBlueprint{}
if err := r.Get(ctx, types.NamespacedName{Name: cbp.Spec.InfraBlueprintRef, Namespace: cbp.Namespace}, ibp); err != nil {
l.Error(err, "InfraBlueprint not found", "Infra", cbp.Spec.InfraBlueprintRef)
r.updateStatus(ctx, cbp, "PendingInfra", false)
return ctrl.Result{RequeueAfter: 1 * time.Minute}, nil
}
// =====================================================================
// 4.5. QUOTA CHECK (The Gatekeeper)
// Only check quota if we are NOT already deployed.
// (Existing clusters keep running even if quota shrinks later)
// =====================================================================
if cbp.Status.Phase != "Deployed" {
if err := r.checkQuota(cbp, ibp); err != nil {
l.Error(err, "Quota Exceeded")
// We stop here! Helm Apply will NOT run.
r.updateStatus(ctx, cbp, "QuotaExceeded", false)
// Requeue slowly to check if resources freed up later
return ctrl.Result{RequeueAfter: 5 * time.Minute}, nil
}
}
// 5. Select Strategy based on Infra ProviderRef
var selectedStrategy provider.Strategy
var baseTemplate []byte
var credentialSecret string
switch ibp.Spec.ProviderRef.Kind {
case "HarvesterBlueprint":
// A. Fetch the specific Harvester Config (HBP)
hbp := &rigv1.HarvesterBlueprint{}
hbpName := types.NamespacedName{Name: ibp.Spec.ProviderRef.Name, Namespace: cbp.Namespace}
if err := r.Get(ctx, hbpName, hbp); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to load HarvesterBlueprint: %w", err)
}
// B. Ensure Identity (Mint ServiceAccount/Secret)
idMgr := harvester.NewIdentityManager(r.Client, r.Scheme)
secretName, err := idMgr.Ensure(ctx, cbp, ibp, hbp)
if err != nil {
l.Error(err, "Failed to ensure identity")
r.updateStatus(ctx, cbp, "ProvisioningFailed", false)
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
}
credentialSecret = secretName
// C. Load Defaults & Init Strategy
defaults, err := harvesterTemplate.GetDefaults()
if err != nil {
return ctrl.Result{}, err
}
baseTemplate = harvesterTemplate.GetBaseValues()
// [UPDATED] Pass ibp.Spec.RancherURL to the factory
selectedStrategy = harvester.NewStrategy(
hbp,
ibp.Spec.UserData,
ibp.Spec.RancherURL, // <--- Passing the URL here
defaults,
)
case "VsphereBlueprint":
// A. Fetch the specific vSphere Config (VBP)
vbp := &rigv1.VsphereBlueprint{}
vbpName := types.NamespacedName{Name: ibp.Spec.ProviderRef.Name, Namespace: cbp.Namespace}
if err := r.Get(ctx, vbpName, vbp); err != nil {
return ctrl.Result{}, fmt.Errorf("failed to load VsphereBlueprint: %w", err)
}
// B. Load Defaults (CPU/RAM sizing safety nets)
defaults, err := vsphereTemplate.GetDefaults()
if err != nil {
return ctrl.Result{}, err
}
baseTemplate = vsphereTemplate.GetBaseValues()
// C. Init Strategy
// Note: vSphere typically uses the global 'cloudCredentialSecret' defined in InfraBlueprint
// rather than minting dynamic tokens per cluster like Harvester does.
credentialSecret = ibp.Spec.CloudCredentialSecret
selectedStrategy = vsphere.NewStrategy(
vbp,
ibp.Spec.UserData,
ibp.Spec.RancherURL,
defaults,
)
default:
return ctrl.Result{}, fmt.Errorf("unsupported provider kind: %s", ibp.Spec.ProviderRef.Kind)
}
// 6. Build Helm Values (Generic Engine)
masterBuilder := builder.NewMasterBuilder(selectedStrategy, baseTemplate)
values, err := masterBuilder.Build(ctx, cbp, credentialSecret)
if err != nil {
l.Error(err, "Failed to build helm values")
r.updateStatus(ctx, cbp, "ConfigGenerationFailed", false)
return ctrl.Result{}, nil // Fatal error, don't retry until config changes
}
// 7. Apply Helm Chart
// We use the ChartConfig extracted by the MasterBuilder (from the YAML defaults)
chartCfg := masterBuilder.GetChartConfig()
helmConfig := helm.Config{
Namespace: cbp.Namespace,
ReleaseName: cbp.Name, // We use the Cluster name as the Release name
RepoURL: chartCfg.Repo,
ChartName: chartCfg.Name,
Version: chartCfg.Version,
Values: values,
}
l.Info("Applying Helm Release", "Release", cbp.Name, "Chart", chartCfg.Name)
if err := helm.Apply(helmConfig); err != nil {
l.Error(err, "Helm Install/Upgrade failed")
r.updateStatus(ctx, cbp, "HelmApplyFailed", false)
return ctrl.Result{RequeueAfter: 1 * time.Minute}, nil
}
// 8. Success!
r.updateStatus(ctx, cbp, "Deployed", true)
return ctrl.Result{RequeueAfter: 10 * time.Minute}, nil // Re-sync periodically
}
func (r *ClusterBlueprintReconciler) handleDelete(ctx context.Context, cbp *rigv1.ClusterBlueprint) (ctrl.Result, error) {
if controllerutil.ContainsFinalizer(cbp, rigFinalizer) {
// 1. Uninstall Helm Release
helmCfg := helm.Config{
Namespace: cbp.Namespace,
ReleaseName: cbp.Name,
}
// Best effort uninstall
if err := helm.Uninstall(helmCfg); err != nil {
log.FromContext(ctx).Error(err, "Failed to uninstall helm release during cleanup")
}
// 2. Cleanup Identity (Harvester SA)
// We need to look up IBP -> HBP again to know WHERE to clean up
// This is a simplified lookup; in production we might need to handle missing IBP gracefully
ibp := &rigv1.InfraBlueprint{}
if err := r.Get(ctx, types.NamespacedName{Name: cbp.Spec.InfraBlueprintRef, Namespace: cbp.Namespace}, ibp); err == nil {
if ibp.Spec.ProviderRef.Kind == "HarvesterBlueprint" {
hbp := &rigv1.HarvesterBlueprint{}
if err := r.Get(ctx, types.NamespacedName{Name: ibp.Spec.ProviderRef.Name, Namespace: cbp.Namespace}, hbp); err == nil {
idMgr := harvester.NewIdentityManager(r.Client, r.Scheme)
idMgr.Cleanup(ctx, cbp, ibp, hbp)
}
}
}
// 3. Remove Finalizer
controllerutil.RemoveFinalizer(cbp, rigFinalizer)
if err := r.Update(ctx, cbp); err != nil {
return ctrl.Result{}, err
}
}
return ctrl.Result{}, nil
}
func (r *ClusterBlueprintReconciler) updateStatus(ctx context.Context, cbp *rigv1.ClusterBlueprint, phase string, ready bool) {
cbp.Status.Phase = phase
cbp.Status.Ready = ready
if err := r.Status().Update(ctx, cbp); err != nil {
log.FromContext(ctx).Error(err, "Failed to update status")
}
}
// SetupWithManager sets up the controller with the Manager.
func (r *ClusterBlueprintReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
For(&rigv1.ClusterBlueprint{}).
Complete(r)
}
// Helper function to calculate required resources vs available
func (r *ClusterBlueprintReconciler) checkQuota(cbp *rigv1.ClusterBlueprint, ibp *rigv1.InfraBlueprint) error {
// 1. Calculate what this cluster needs
var reqCpu, reqMem, reqDisk int
// Control Plane Sizing (Using safe defaults or template logic)
// Ideally, this should match the defaults in your template/strategy
cpCount := 1
if cbp.Spec.ControlPlaneHA {
cpCount = 3
}
reqCpu += cpCount * 4
reqMem += cpCount * 8
reqDisk += cpCount * 40
// Worker Pools Sizing
for _, pool := range cbp.Spec.WorkerPools {
reqCpu += pool.Quantity * pool.CpuCores
reqMem += pool.Quantity * pool.MemoryGB
reqDisk += pool.Quantity * pool.DiskGB
}
// 2. Check against Limits
// Note: We use the Status.Usage which is calculated by the InfraController.
// This includes "other" clusters, but might include "this" cluster if it was already counted.
// For strict "Admission Control", usually we check:
// (CurrentUsage + Request) > MaxLimit
// However, since InfraController runs asynchronously, 'Status.Usage' might NOT yet include this new cluster.
// So (Usage + Request) > Max is the safest check for a new provisioning.
q := ibp.Spec.Quota
u := ibp.Status.Usage
if q.MaxCPU > 0 && (u.UsedCPU+reqCpu) > q.MaxCPU {
return fmt.Errorf("requested CPU %d exceeds remaining quota (Max: %d, Used: %d)", reqCpu, q.MaxCPU, u.UsedCPU)
}
if q.MaxMemoryGB > 0 && (u.UsedMemoryGB+reqMem) > q.MaxMemoryGB {
return fmt.Errorf("requested Mem %dGB exceeds remaining quota (Max: %d, Used: %d)", reqMem, q.MaxMemoryGB, u.UsedMemoryGB)
}
if q.MaxDiskGB > 0 && (u.UsedDiskGB+reqDisk) > q.MaxDiskGB {
return fmt.Errorf("requested Disk %dGB exceeds remaining quota (Max: %d, Used: %d)", reqDisk, q.MaxDiskGB, u.UsedDiskGB)
}
return nil
}