diff --git a/charts/openstack-hypervisor-operator/templates/manager-rbac.yaml b/charts/openstack-hypervisor-operator/templates/manager-rbac.yaml index 53af34e..da15994 100644 --- a/charts/openstack-hypervisor-operator/templates/manager-rbac.yaml +++ b/charts/openstack-hypervisor-operator/templates/manager-rbac.yaml @@ -13,7 +13,6 @@ rules: - get - list - patch - - update - watch - apiGroups: - "" diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 528d2cb..d769277 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -12,7 +12,6 @@ rules: - get - list - patch - - update - watch - apiGroups: - "" diff --git a/internal/controller/gardener_node_lifecycle_controller.go b/internal/controller/gardener_node_lifecycle_controller.go index fb1a113..0f30146 100644 --- a/internal/controller/gardener_node_lifecycle_controller.go +++ b/internal/controller/gardener_node_lifecycle_controller.go @@ -32,7 +32,6 @@ import ( corev1ac "k8s.io/client-go/applyconfigurations/core/v1" v1 "k8s.io/client-go/applyconfigurations/meta/v1" policyv1ac "k8s.io/client-go/applyconfigurations/policy/v1" - "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/apiutil" @@ -51,14 +50,13 @@ const ( labelDeployment = "cobaltcore-maintenance-controller" maintenancePodsNamespace = "kube-system" labelCriticalComponent = "node.gardener.cloud/critical-component" - valueReasonTerminating = "terminating" MaintenanceControllerName = "maintenance" ) // The counter-side in gardener is here: // https://github.com/gardener/machine-controller-manager/blob/rel-v0.56/pkg/util/provider/machinecontroller/machine.go#L646 -// +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch;patch;update;watch +// +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors,verbs=get;list;watch;update;patch // +kubebuilder:rbac:groups="apps",resources=deployments,verbs=create;delete;get;list;patch;update;watch // +kubebuilder:rbac:groups="policy",resources=poddisruptionbudgets,verbs=create;delete;get;list;patch;update;watch @@ -66,63 +64,45 @@ func (r *GardenerNodeLifecycleController) Reconcile(ctx context.Context, req ctr log := logger.FromContext(ctx).WithName(req.Name) ctx = logger.IntoContext(ctx, log) - node := &corev1.Node{} - if err := r.Get(ctx, req.NamespacedName, node); err != nil { - // ignore not found errors, could be deleted + hv := &kvmv1.Hypervisor{} + if err := r.Get(ctx, req.NamespacedName, hv); err != nil { return ctrl.Result{}, k8sclient.IgnoreNotFound(err) } - hv := kvmv1.Hypervisor{} - if err := r.Get(ctx, k8sclient.ObjectKey{Name: req.Name}, &hv); k8sclient.IgnoreNotFound(err) != nil { - return ctrl.Result{}, err - } if !hv.Spec.LifecycleEnabled { // Nothing to be done return ctrl.Result{}, nil } - if isTerminating(node) { - changed, err := setNodeLabels(ctx, r.Client, node, map[string]string{labelEvictionRequired: valueReasonTerminating}) - if changed || err != nil { - return ctrl.Result{}, err - } - } - - // We do not care about the particular value, as long as it isn't an error - var minAvailable int32 = 1 - evictionValue, found := node.Labels[labelEvictionApproved] - if found && evictionValue != "false" { - minAvailable = 0 + var minAvailable int32 = 0 + if !meta.IsStatusConditionFalse(hv.Status.Conditions, kvmv1.ConditionTypeEvicting) { + // Evicting condition is either not present or is true (i.e. ongoing) + minAvailable = 1 // Do not allow draining of the pod } - if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - return r.ensureBlockingPodDisruptionBudget(ctx, node, minAvailable) - }); err != nil { + if err := r.ensureBlockingPodDisruptionBudget(ctx, hv, minAvailable); err != nil { return ctrl.Result{}, err } onboardingCompleted := meta.IsStatusConditionFalse(hv.Status.Conditions, ConditionTypeOnboarding) - - if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - return r.ensureSignallingDeployment(ctx, node, minAvailable, onboardingCompleted) - }); err != nil { + if err := r.ensureSignallingDeployment(ctx, hv, minAvailable, onboardingCompleted); err != nil { return ctrl.Result{}, err } return ctrl.Result{}, nil } -func (r *GardenerNodeLifecycleController) ensureBlockingPodDisruptionBudget(ctx context.Context, node *corev1.Node, minAvailable int32) error { - name := nameForNode(node) - nodeLabels := labelsForNode(node) - gvk, err := apiutil.GVKForObject(node, r.Scheme) +func (r *GardenerNodeLifecycleController) ensureBlockingPodDisruptionBudget(ctx context.Context, hypervisor *kvmv1.Hypervisor, minAvailable int32) error { + name := nameForHypervisor(hypervisor) + nodeLabels := labelsForHypervisor(hypervisor) + gvk, err := apiutil.GVKForObject(hypervisor, r.Scheme) if err != nil { return err } podDisruptionBudget := policyv1ac.PodDisruptionBudget(name, maintenancePodsNamespace). WithLabels(nodeLabels). - WithOwnerReferences(OwnerReference(node, &gvk)). + WithOwnerReferences(OwnerReference(hypervisor, &gvk)). WithSpec(policyv1ac.PodDisruptionBudgetSpec(). WithMinAvailable(intstr.FromInt32(minAvailable)). WithSelector(v1.LabelSelector().WithMatchLabels(nodeLabels))) @@ -130,35 +110,19 @@ func (r *GardenerNodeLifecycleController) ensureBlockingPodDisruptionBudget(ctx return r.Apply(ctx, podDisruptionBudget, k8sclient.FieldOwner(MaintenanceControllerName)) } -func isTerminating(node *corev1.Node) bool { - conditions := node.Status.Conditions - if conditions == nil { - return false - } - - // See: https://github.com/gardener/machine-controller-manager/blob/rel-v0.56/pkg/util/provider/machinecontroller/machine.go#L658-L659 - for _, condition := range conditions { - if condition.Type == "Terminating" { - return true - } - } - - return false -} - -func nameForNode(node *corev1.Node) string { - return fmt.Sprintf("maint-%v", node.Name) +func nameForHypervisor(hypervisor *kvmv1.Hypervisor) string { + return fmt.Sprintf("maint-%v", hypervisor.Name) } -func labelsForNode(node *corev1.Node) map[string]string { +func labelsForHypervisor(hypervisor *kvmv1.Hypervisor) map[string]string { return map[string]string{ - labelDeployment: nameForNode(node), + labelDeployment: nameForHypervisor(hypervisor), } } -func (r *GardenerNodeLifecycleController) ensureSignallingDeployment(ctx context.Context, node *corev1.Node, scale int32, ready bool) error { - name := nameForNode(node) - labels := labelsForNode(node) +func (r *GardenerNodeLifecycleController) ensureSignallingDeployment(ctx context.Context, hypervisor *kvmv1.Hypervisor, scale int32, ready bool) error { + name := nameForHypervisor(hypervisor) + labels := labelsForHypervisor(hypervisor) podLabels := maps.Clone(labels) podLabels[labelCriticalComponent] = "true" @@ -170,13 +134,13 @@ func (r *GardenerNodeLifecycleController) ensureSignallingDeployment(ctx context command = "/bin/false" } - gvk, err := apiutil.GVKForObject(node, r.Scheme) + gvk, err := apiutil.GVKForObject(hypervisor, r.Scheme) if err != nil { return err } deployment := apps1ac.Deployment(name, maintenancePodsNamespace). - WithOwnerReferences(OwnerReference(node, &gvk)). + WithOwnerReferences(OwnerReference(hypervisor, &gvk)). WithLabels(labels). WithSpec(apps1ac.DeploymentSpec(). WithReplicas(scale). @@ -191,7 +155,7 @@ func (r *GardenerNodeLifecycleController) ensureSignallingDeployment(ctx context WithSpec(corev1ac.PodSpec(). WithHostNetwork(true). WithNodeSelector(map[string]string{ - corev1.LabelHostname: node.Labels[corev1.LabelHostname], + corev1.LabelHostname: hypervisor.Labels[corev1.LabelHostname], }). WithTerminationGracePeriodSeconds(1). WithTolerations( @@ -225,7 +189,7 @@ func (r *GardenerNodeLifecycleController) SetupWithManager(mgr ctrl.Manager, nam return ctrl.NewControllerManagedBy(mgr). Named(MaintenanceControllerName). - For(&corev1.Node{}). + For(&kvmv1.Hypervisor{}). Owns(&appsv1.Deployment{}). // trigger the r.Reconcile whenever an Own-ed deployment is created/updated/deleted Owns(&policyv1.PodDisruptionBudget{}). Complete(r) diff --git a/internal/controller/gardener_node_lifecycle_controller_test.go b/internal/controller/gardener_node_lifecycle_controller_test.go index fdfafad..0cea528 100644 --- a/internal/controller/gardener_node_lifecycle_controller_test.go +++ b/internal/controller/gardener_node_lifecycle_controller_test.go @@ -18,18 +18,27 @@ limitations under the License. package controller import ( + "context" + "fmt" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" + appsv1 "k8s.io/api/apps/v1" + policyv1 "k8s.io/api/policy/v1" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" + + kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" ) -var _ = Describe("Gardener Maintenance Controller", func() { - const nodeName = "node-test" - var controller *GardenerNodeLifecycleController +var _ = Describe("GardenerNodeLifecycleController", func() { + var ( + controller *GardenerNodeLifecycleController + hypervisorName = types.NamespacedName{Name: "hv-test"} + podName = types.NamespacedName{Name: fmt.Sprintf("maint-%v", hypervisorName.Name), Namespace: "kube-system"} + ) BeforeEach(func() { controller = &GardenerNodeLifecycleController{ @@ -37,35 +46,99 @@ var _ = Describe("Gardener Maintenance Controller", func() { Scheme: k8sClient.Scheme(), } - By("creating the namespace for the reconciler") - ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "monsoon3"}} - Expect(client.IgnoreAlreadyExists(k8sClient.Create(ctx, ns))).To(Succeed()) - - By("creating the core resource for the Kind Node") - resource := &corev1.Node{ + By("Creating a Hypervisor resource with LifecycleEnabled") + hypervisor := &kvmv1.Hypervisor{ ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - Labels: map[string]string{labelEvictionRequired: "true"}, + Name: hypervisorName.Name, + Namespace: hypervisorName.Namespace, + }, + Spec: kvmv1.HypervisorSpec{ + LifecycleEnabled: true, }, } - Expect(k8sClient.Create(ctx, resource)).To(Succeed()) + Expect(k8sClient.Create(ctx, hypervisor)).To(Succeed()) + DeferCleanup(func(ctx context.Context) { + By("Deleting the Hypervisor resource") + hypervisor := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, hypervisorName, hypervisor)).To(Succeed()) + Expect(k8sClient.Delete(ctx, hypervisor)).To(Succeed()) + }) + }) + + // After the setup in JustBefore, we want to reconcile + JustBeforeEach(func(ctx context.Context) { + req := ctrl.Request{NamespacedName: hypervisorName} + for range 3 { + _, err := controller.Reconcile(ctx, req) + Expect(err).NotTo(HaveOccurred()) + } }) - AfterEach(func() { - node := &corev1.Node{ObjectMeta: metav1.ObjectMeta{Name: nodeName}} - By("Cleanup the specific node") - Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, node))).To(Succeed()) + Context("setting the pod disruption budget", func() { + When("not evicted", func() { + It("should set the pdb to minimum 1", func() { + pdb := &policyv1.PodDisruptionBudget{} + Expect(k8sClient.Get(ctx, podName, pdb)).To(Succeed()) + Expect(pdb.Spec.MinAvailable).To(HaveValue(HaveField("IntVal", BeEquivalentTo(1)))) + }) + }) + When("evicted", func() { + BeforeEach(func() { + hv := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, hypervisorName, hv)).To(Succeed()) + meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeEvicting, + Status: metav1.ConditionFalse, + Reason: "dontcare", + Message: "dontcare", + }) + Expect(k8sClient.Status().Update(ctx, hv)).To(Succeed()) + }) + + It("should set the pdb to minimum 0", func() { + pdb := &policyv1.PodDisruptionBudget{} + Expect(k8sClient.Get(ctx, podName, pdb)).To(Succeed()) + Expect(pdb.Spec.MinAvailable).To(HaveValue(HaveField("IntVal", BeEquivalentTo(0)))) + }) + }) }) - Context("When reconciling a node", func() { - It("should successfully reconcile the resource", func() { - req := ctrl.Request{ - NamespacedName: types.NamespacedName{Name: nodeName}, - } + Context("create a signalling deployment", func() { + When("onboarding not completed", func() { + It("should create a failing deployment for the node", func() { + deployment := &appsv1.Deployment{} + Expect(k8sClient.Get(ctx, podName, deployment)).To(Succeed()) + Expect(deployment.Spec.Template.Spec.Containers).To( + ContainElement( + HaveField("StartupProbe", + HaveField("ProbeHandler", + HaveField("Exec", + HaveField("Command", ContainElements("/bin/false"))))))) + }) + }) + When("onboarding is completed", func() { + BeforeEach(func() { + hv := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, hypervisorName, hv)).To(Succeed()) + meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + Type: ConditionTypeOnboarding, + Status: metav1.ConditionFalse, + Reason: "dontcare", + Message: "dontcare", + }) + Expect(k8sClient.Status().Update(ctx, hv)).To(Succeed()) + }) - By("Reconciling the created resource") - _, err := controller.Reconcile(ctx, req) - Expect(err).NotTo(HaveOccurred()) + It("should create a succeeding deployment for the node", func() { + deployment := &appsv1.Deployment{} + Expect(k8sClient.Get(ctx, podName, deployment)).To(Succeed()) + Expect(deployment.Spec.Template.Spec.Containers).To( + ContainElement( + HaveField("StartupProbe", + HaveField("ProbeHandler", + HaveField("Exec", + HaveField("Command", ContainElements("/bin/true"))))))) + }) }) }) }) diff --git a/internal/controller/utils.go b/internal/controller/utils.go index 5548684..9c9e3b3 100644 --- a/internal/controller/utils.go +++ b/internal/controller/utils.go @@ -19,11 +19,9 @@ package controller import ( "bytes" - "context" "errors" "fmt" "io" - "maps" "net/http" "os" "slices" @@ -32,22 +30,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" v1ac "k8s.io/client-go/applyconfigurations/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" ) -// setNodeLabels sets the labels on the node. -func setNodeLabels(ctx context.Context, writer client.Writer, node *corev1.Node, labels map[string]string) (bool, error) { - newNode := node.DeepCopy() - maps.Copy(newNode.Labels, labels) - if maps.Equal(node.Labels, newNode.Labels) { - return false, nil - } - - return true, writer.Patch(ctx, newNode, client.MergeFrom(node)) -} - func InstanceHaUrl(region, zone, hostname string) string { if haURL, found := os.LookupEnv("KVM_HA_SERVICE_URL"); found { return haURL