Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -491,10 +491,13 @@ post-install-hook:
x509-cert: ## Create X.509 cert at path tmp/x509/ (see docs/x509-user.md)
go run scripts/create_x509.go

clean: ## Clean built binaries
.PHONY: clean-gen-crds
clean-gen-crds: ## Clean only generated CRD files
rm -f config/generated/crd/bases/crds.yaml

clean: clean-gen-crds ## Clean built binaries
rm -rf bin/*
rm -rf config/manifests/bases/
rm -f config/generated/crd/bases/crds.yaml
rm -f config/crd/bases/*.yaml
rm -f helm-charts/atlas-operator-crds/templates/*.yaml
rm -f config/rbac/clusterwide/role.yaml
Expand Down Expand Up @@ -607,7 +610,7 @@ clear-e2e-leftovers: ## Clear the e2e test leftovers quickly
install-crds: manifests ## Install CRDs in Kubernetes
kubectl apply -k config/crd
ifdef EXPERIMENTAL
$(MAKE) clean gen-crds
$(MAKE) regen-crds
kubectl apply -f config/generated/crd/bases/crds.yaml
endif

Expand Down Expand Up @@ -881,6 +884,9 @@ gen-crds: tools/openapi2crd/bin/openapi2crd
--output $(realpath .)/config/generated/crd/bases/crds.yaml
cp $(realpath .)/config/generated/crd/bases/crds.yaml $(realpath .)/internal/generated/crds/crds.yaml

.PHONY: regen-crds
regen-crds: clean-gen-crds gen-crds ## Clean and regenerate CRDs

gen-go-types:
@echo "==> Generating Go models from CRDs..."
$(CRD2GO) --input $(realpath .)/config/generated/crd/bases/crds.yaml \
Expand Down
16 changes: 16 additions & 0 deletions config/samples/atlas_generated_v1_flexcluster.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: atlas.generated.mongodb.com/v1
kind: FlexCluster
metadata:
name: flexy
spec:
connectionSecretRef:
name: mongodb-atlas-operator-api-key
v20250312:
groupId: "60f1b3c4e4b0e8b8c8b8c8b"
entry:
name: flexy
terminationProtectionEnabled: true
providerSettings:
backingProviderName: GCP
regionName: CENTRAL_US

Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
apiVersion: atlas.generated.mongodb.com/v1
kind: Group
metadata:
name: my-group-for-flexcluster
spec:
connectionSecretRef:
name: mongodb-atlas-operator-api-key
v20250312:
entry:
orgId: "60f1b3c4e4b0e8b8c8b8c8b"
name: my-group-for-flexcluster
---
apiVersion: atlas.generated.mongodb.com/v1
kind: FlexCluster
metadata:
name: flexy-with-groupref
annotations:
some-tag: tag
spec:
v20250312:
groupRef:
name: my-group-for-flexcluster
entry:
name: flexy-with-groupref
terminationProtectionEnabled: true
providerSettings:
backingProviderName: GCP
regionName: CENTRAL_US

12 changes: 12 additions & 0 deletions config/samples/atlas_generated_v1_group.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
apiVersion: atlas.generated.mongodb.com/v1
kind: Group
metadata:
name: my-group
spec:
connectionSecretRef:
name: mongodb-atlas-operator-api-key
v20250312:
entry:
orgId: "60f1b3c4e4b0e8b8c8b8c8b"
name: my-group

3 changes: 3 additions & 0 deletions config/samples/kustomization.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,7 @@ resources:
- atlas_v1_atlasbackupcompliancepolicy.yaml
- atlas_v1_atlascustomrole.yaml
- atlas_v1_atlasthirdpartyintegration.yaml
- atlas_generated_v1_group.yaml
- atlas_generated_v1_flexcluster.yaml
- atlas_generated_v1_flexcluster_with_groupref.yaml
# +kubebuilder:scaffold:manifestskustomizesamples
47 changes: 45 additions & 2 deletions internal/generated/controller/flexcluster/handler_v20250312.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"fmt"

v20250312sdk "go.mongodb.org/atlas-sdk/v20250312009/admin"
apierrors "k8s.io/apimachinery/pkg/api/errors"
controllerruntime "sigs.k8s.io/controller-runtime"
builder "sigs.k8s.io/controller-runtime/pkg/builder"
client "sigs.k8s.io/controller-runtime/pkg/client"
Expand Down Expand Up @@ -71,6 +72,26 @@ func (h *Handlerv20250312) getDependencies(ctx context.Context, flexcluster *ako
return result, nil
}

// getMinimalGroupFromStatusOrSpec creates a minimal Group object with group ID from status (preferred) or spec (fallback).
// Returns nil if no group ID is available. This allows deletion to proceed even if the Group CR is gone from Kubernetes.
func (h *Handlerv20250312) getMinimalGroupFromStatusOrSpec(flexcluster *akov2generated.FlexCluster) *akov2generated.Group {
var groupID *string
if flexcluster.Status.V20250312 != nil {
groupID = flexcluster.Status.V20250312.GroupId
}
if groupID == nil && flexcluster.Spec.V20250312 != nil {
groupID = flexcluster.Spec.V20250312.GroupId
}
if groupID == nil || *groupID == "" {
return nil
}
return &akov2generated.Group{
Status: akov2generated.GroupStatus{
V20250312: &akov2generated.GroupStatusV20250312{Id: groupID},
},
}
}

// HandleInitial handles the initial state for version v20250312
func (h *Handlerv20250312) HandleInitial(ctx context.Context, flexcluster *akov2generated.FlexCluster) (ctrlstate.Result, error) {
deps, err := h.getDependencies(ctx, flexcluster)
Expand Down Expand Up @@ -165,7 +186,18 @@ func (h *Handlerv20250312) HandleDeletionRequested(ctx context.Context, flexclus

deps, err := h.getDependencies(ctx, flexcluster)
if err != nil {
return result.Error(state.StateDeletionRequested, fmt.Errorf("failed to get dependencies: %w", err))
// Race condition: Group CR may be deleted from K8s before FlexCluster finishes deletion.
// If Group is not found but we have group ID in status, use it to proceed with deletion.
var statusErr *apierrors.StatusError
if errors.As(err, &statusErr) && apierrors.IsNotFound(statusErr) {
if group := h.getMinimalGroupFromStatusOrSpec(flexcluster); group != nil {
Copy link
Collaborator

@s-urbaniak s-urbaniak Dec 22, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is doable in this case but not all objects have Group IDs (i.e. DatabaseUsers). I rather suggest blocking deletion of the Group in the group reconciler in the first place:

func (h *Handlerv20250312) HandleDeletionRequested(ctx context.Context, group *akov2generated.Group) (ctrlstate.Result, error) {
	var dependents []reconcile.Request
	dependents = append(dependents, indexers.NewFlexClusterByGroupMapFunc(h.kubeClient)(ctx, group)...)
	dependents = append(dependents, indexers.NewDatabaseUserByGroupMapFunc(h.kubeClient)(ctx, group)...)
	dependents = append(dependents, indexers.NewClusterByGroupMapFunc(h.kubeClient)(ctx, group)...)
	if len(dependends) > 0 {
		return result.NextState(state.StateDeletionRequested, fmt.Sprintf("failed to delete group because %v resources depend on it.", len(dependents)))
	}

	if customresource.IsResourcePolicyKeepOrDefault(group, h.deletionProtection) {
		return result.NextState(state.StateDeleted, "Group deleted.")
	}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The good news is that we have the mapping functions auto-generated already!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I just tried the above code and it works nicely. I first:

  1. Tried to delete the group. This blocks because a Cluster, a FlexCluster, and a DatabaseUser depend on it:
Screenshot 2025-12-22 at 20 51 37
  1. Deleted the Cluster, a FlexCluster, and a DatabaseUser. In the meantime the count of dependend resources went down:
Screenshot 2025-12-22 at 20 53 14
  1. Finally the deletion in Atlas was initiated and executed:
Screenshot 2025-12-22 at 20 53 33

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

side note: this is a pretty canonical use of indexes in the combination with finalizers (in our case nicely gated via the universal state machine)

deps = []client.Object{group}
} else {
return result.Error(state.StateDeletionRequested, fmt.Errorf("failed to get dependencies: %w", err))
}
} else {
return result.Error(state.StateDeletionRequested, fmt.Errorf("failed to get dependencies: %w", err))
}
}

params := &v20250312sdk.DeleteFlexClusterApiParams{}
Expand All @@ -189,7 +221,18 @@ func (h *Handlerv20250312) HandleDeletionRequested(ctx context.Context, flexclus
func (h *Handlerv20250312) HandleDeleting(ctx context.Context, flexcluster *akov2generated.FlexCluster) (ctrlstate.Result, error) {
deps, err := h.getDependencies(ctx, flexcluster)
if err != nil {
return result.Error(state.StateDeleting, fmt.Errorf("failed to get dependencies: %w", err))
// Race condition: Group CR may be deleted from K8s before FlexCluster finishes deletion.
// If Group is not found but we have group ID in status, use it to proceed with deletion.
var statusErr *apierrors.StatusError
if errors.As(err, &statusErr) && apierrors.IsNotFound(statusErr) {
if group := h.getMinimalGroupFromStatusOrSpec(flexcluster); group != nil {
deps = []client.Object{group}
} else {
return result.Error(state.StateDeleting, fmt.Errorf("failed to get dependencies: %w", err))
}
} else {
return result.Error(state.StateDeleting, fmt.Errorf("failed to get dependencies: %w", err))
}
}

params := &v20250312sdk.GetFlexClusterApiParams{}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1114,18 +1114,20 @@ func withGeneration(flexCluster *akov2generated.FlexCluster, generation int64) *
// withObservedGeneration sets the observed generation in status conditions
func withObservedGeneration(flexCluster *akov2generated.FlexCluster, observedGen int64) *akov2generated.FlexCluster {
if flexCluster.Status.Conditions == nil {
flexCluster.Status.Conditions = &[]metav1.Condition{}
// Allocate a new empty slice pointer to avoid storing a pointer to a temporary value
flexCluster.Status.Conditions = new([]metav1.Condition)
}
conditions := *flexCluster.Status.Conditions
conditions = append(conditions, metav1.Condition{
Type: state.StateCondition,
ObservedGeneration: observedGen,
Status: metav1.ConditionTrue,
})
// Allocate a new slice to avoid storing a pointer to a local variable
// Allocate a new slice pointer that persists beyond function scope
newConditions := make([]metav1.Condition, len(conditions))
copy(newConditions, conditions)
flexCluster.Status.Conditions = &newConditions
flexCluster.Status.Conditions = new([]metav1.Condition)
*flexCluster.Status.Conditions = newConditions
return flexCluster
}

Expand Down
12 changes: 8 additions & 4 deletions test/e2e2/ako_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,14 @@ var _ = Describe("Atlas Operator Start and Stop test", Ordered, Label("ako-start
ako = runTestAKO(DefaultGlobalCredentials, control.MustEnvVar("OPERATOR_NAMESPACE"), deletionProtectionOff)
ako.Start(GinkgoT())

// Register cleanup - this should even when the process is interrupted with Ctrl+C
// AfterAll is not reliable in such cases.
DeferCleanup(func() {
if ako != nil {
ako.Stop(GinkgoT())
}
})

ctx = context.Background()
client, err := kube.NewTestClient()
Expect(err).To(Succeed())
Expand All @@ -57,10 +65,6 @@ var _ = Describe("Atlas Operator Start and Stop test", Ordered, Label("ako-start
})).To(Succeed())
})

_ = AfterAll(func() {
ako.Stop(GinkgoT())
})

_ = BeforeEach(func() {
testNamespace = &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{
Name: utils.RandomName("ako-ns"),
Expand Down
1 change: 1 addition & 0 deletions test/e2e2/e2e2_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ func initTestLogging(t *testing.T) {
ctrllog.SetLogger(logrLogger.WithName("test"))
}

// nolint:unparam
func runTestAKO(globalCreds, ns string, deletionprotection bool) operator.Operator {
args := []string{
"--log-level=-9",
Expand Down
14 changes: 8 additions & 6 deletions test/e2e2/flex_to_dedicated_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,18 +51,20 @@ var _ = Describe("Flex to Dedicated Upgrade", Ordered, Label("flex-to-dedicated"
ako = runTestAKO(DefaultGlobalCredentials, control.MustEnvVar("OPERATOR_NAMESPACE"), false)
ako.Start(GinkgoT())

// Register cleanup - this should even when the process is interrupted with Ctrl+C
// AfterAll is not reliable in such cases.
DeferCleanup(func() {
if ako != nil {
ako.Stop(GinkgoT())
}
})

ctx = context.Background()
client, err := kube.NewTestClient()
Expect(err).ToNot(HaveOccurred())
kubeClient = client
})

_ = AfterAll(func() {
if ako != nil {
ako.Stop(GinkgoT())
}
})

_ = BeforeEach(func() {
resourcePrefix = utils.RandomName("flex-to-dedicated")
testNamespace = &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{
Expand Down
Loading