From 9eb45c72662685890723c3e5d675dbdeb6f5ae64 Mon Sep 17 00:00:00 2001 From: Jason Lynch Date: Mon, 19 Jan 2026 14:25:09 -0500 Subject: [PATCH 1/2] refactor: add event apply method Adds a new `Event.Apply` method that's responsible for calling the correct lifecycle method on the resource, managing the resource data fields, and determining when to return an error. This change helps to consolidate the resource model logic in the `resource` package. PLAT-212 --- server/internal/resource/event.go | 124 ++++++++++++++++ server/internal/resource/event_test.go | 105 ++++++++++++++ server/internal/resource/state.go | 37 ----- server/internal/resource/state_test.go | 132 +++++++++++------- .../workflows/activities/apply_event.go | 87 ++++-------- 5 files changed, 331 insertions(+), 154 deletions(-) create mode 100644 server/internal/resource/event.go create mode 100644 server/internal/resource/event_test.go diff --git a/server/internal/resource/event.go b/server/internal/resource/event.go new file mode 100644 index 00000000..4be34a95 --- /dev/null +++ b/server/internal/resource/event.go @@ -0,0 +1,124 @@ +package resource + +import ( + "context" + "errors" + "fmt" + + "github.com/wI2L/jsondiff" +) + +type EventType string + +const ( + EventTypeRefresh EventType = "refresh" + EventTypeCreate EventType = "create" + EventTypeUpdate EventType = "update" + EventTypeDelete EventType = "delete" +) + +type EventReason string + +const ( + EventReasonDoesNotExist EventReason = "does_not_exist" + EventReasonNeedsRecreate EventReason = "needs_recreate" + EventReasonHasDiff EventReason = "has_diff" + EventReasonForceUpdate EventReason = "force_update" + EventReasonDependencyUpdated EventReason = "dependency_updated" + EventReasonHasError EventReason = "has_error" +) + +type Event struct { + Type EventType `json:"type"` + Resource *ResourceData `json:"resource"` + Reason EventReason `json:"reason,omitempty"` + Diff jsondiff.Patch `json:"diff,omitempty"` +} + +// Apply applies this event to its resource. It does not modify the state in the +// given Context. +func (e *Event) Apply(ctx context.Context, rc *Context) error { + resource, err := rc.Registry.Resource(e.Resource) + if err != nil { + return err + } + + switch e.Type { + case EventTypeRefresh: + return e.refresh(ctx, rc, resource) + case EventTypeCreate: + return e.create(ctx, rc, resource) + case EventTypeUpdate: + return e.update(ctx, rc, resource) + case EventTypeDelete: + return e.delete(ctx, rc, resource) + default: + return fmt.Errorf("unknown event type: %s", e.Type) + } +} + +func (e *Event) refresh(ctx context.Context, rc *Context, resource Resource) error { + var needsRecreate bool + + err := resource.Refresh(ctx, rc) + if errors.Is(err, ErrNotFound) { + needsRecreate = true + } else if err != nil { + return fmt.Errorf("failed to refresh resource %s: %w", resource.Identifier(), err) + } + + updated, err := ToResourceData(resource) + if err != nil { + return err + } + updated.NeedsRecreate = needsRecreate + + e.Resource = updated + + return nil +} + +func (e *Event) create(ctx context.Context, rc *Context, resource Resource) error { + if err := resource.Create(ctx, rc); err != nil { + return fmt.Errorf("failed to create resource %s: %w", resource.Identifier(), err) + } + + updated, err := ToResourceData(resource) + if err != nil { + return err + } + + e.Resource = updated + + return nil +} + +func (e *Event) update(ctx context.Context, rc *Context, resource Resource) error { + if err := resource.Update(ctx, rc); err != nil { + return fmt.Errorf("failed to update resource %s: %w", resource.Identifier(), err) + } + + updated, err := ToResourceData(resource) + if err != nil { + return err + } + + e.Resource = updated + + return nil +} + +func (e *Event) delete(ctx context.Context, rc *Context, resource Resource) error { + if err := resource.Delete(ctx, rc); err != nil { + return fmt.Errorf("failed to delete resource %s: %w", resource.Identifier(), err) + } + + updated, err := ToResourceData(resource) + if err != nil { + return err + } + + e.Resource = updated + + return nil +} diff --git a/server/internal/resource/event_test.go b/server/internal/resource/event_test.go new file mode 100644 index 00000000..4557b735 --- /dev/null +++ b/server/internal/resource/event_test.go @@ -0,0 +1,105 @@ +package resource_test + +import ( + "testing" + + "github.com/pgEdge/control-plane/server/internal/resource" + "github.com/samber/do" + "github.com/stretchr/testify/assert" +) + +func TestEvent(t *testing.T) { + t.Run("Apply", func(t *testing.T) { + registry := resource.NewRegistry() + resource.RegisterResourceType[*testResource](registry, testResourceType) + + rc := &resource.Context{ + State: resource.NewState(), + Registry: registry, + Injector: do.New(), + } + + for _, tc := range []struct { + name string + eventType resource.EventType + notFound bool + lifecycleError string + expectedErr string + expectedResourceNeedsRecreate bool + }{ + { + name: "refresh success", + eventType: resource.EventTypeRefresh, + }, + { + name: "refresh not found", + eventType: resource.EventTypeRefresh, + notFound: true, + expectedResourceNeedsRecreate: true, + }, + { + name: "refresh failed", + eventType: resource.EventTypeRefresh, + lifecycleError: "some error", + expectedErr: "failed to refresh resource test_resource::test: some error", + }, + { + name: "create success", + eventType: resource.EventTypeCreate, + }, + { + name: "create failed", + eventType: resource.EventTypeCreate, + lifecycleError: "some error", + expectedErr: "failed to create resource test_resource::test: some error", + }, + { + name: "update success", + eventType: resource.EventTypeUpdate, + }, + { + name: "update failed", + eventType: resource.EventTypeUpdate, + lifecycleError: "some error", + expectedErr: "failed to update resource test_resource::test: some error", + }, + { + name: "delete success", + eventType: resource.EventTypeDelete, + }, + { + name: "delete failed", + eventType: resource.EventTypeDelete, + lifecycleError: "some error", + expectedErr: "failed to delete resource test_resource::test: some error", + }, + } { + t.Run(tc.name, func(t *testing.T) { + r := &testResource{ + ID: "test", + NotFound: tc.notFound, + Error: tc.lifecycleError, + } + + original := r.data(t) + + expected := r.data(t) + expected.NeedsRecreate = tc.expectedResourceNeedsRecreate + + event := &resource.Event{ + Type: tc.eventType, + Resource: original, + } + + err := event.Apply(t.Context(), rc) + + if tc.expectedErr != "" { + assert.ErrorContains(t, err, tc.expectedErr) + } else { + assert.NoError(t, err) + assert.Equal(t, expected, event.Resource) + } + }) + } + }) +} diff --git a/server/internal/resource/state.go b/server/internal/resource/state.go index d47fadc2..41244d48 100644 --- a/server/internal/resource/state.go +++ b/server/internal/resource/state.go @@ -6,48 +6,11 @@ import ( "maps" "slices" - "github.com/wI2L/jsondiff" "gonum.org/v1/gonum/graph/simple" "github.com/pgEdge/control-plane/server/internal/ds" ) -type EventType string - -const ( - EventTypeRefresh EventType = "refresh" - EventTypeCreate EventType = "create" - EventTypeUpdate EventType = "update" - EventTypeDelete EventType = "delete" -) - -type EventReason string - -const ( - EventReasonDoesNotExist EventReason = "does_not_exist" - EventReasonNeedsRecreate EventReason = "needs_recreate" - EventReasonHasDiff EventReason = "has_diff" - EventReasonForceUpdate EventReason = "force_update" - EventReasonDependencyUpdated EventReason = "dependency_updated" -) - -type Event struct { - Type EventType `json:"type"` - Resource *ResourceData `json:"resource"` - Reason EventReason `json:"reason,omitempty"` - Diff jsondiff.Patch `json:"diff,omitempty"` -} - -// WithData returns a clone of this event with the given data. -func (e *Event) WithData(data *ResourceData) *Event { - return &Event{ - Type: e.Type, - Resource: data, - Reason: e.Reason, - Diff: e.Diff, - } -} - type State struct { Resources map[Type]map[string]*ResourceData `json:"resources"` } diff --git a/server/internal/resource/state_test.go b/server/internal/resource/state_test.go index 79fd5146..ca939abb 100644 --- a/server/internal/resource/state_test.go +++ b/server/internal/resource/state_test.go @@ -2,6 +2,7 @@ package resource_test import ( "context" + "errors" "testing" "github.com/pgEdge/control-plane/server/internal/resource" @@ -13,8 +14,8 @@ func TestState(t *testing.T) { t.Run("PlanRefresh", func(t *testing.T) { t.Run("from empty state", func(t *testing.T) { resource1 := &testResource{ - identifier: testResourceID("test1"), - dependencies: []resource.Identifier{ + ID: "test1", + TestDependencies: []resource.Identifier{ testResourceID("test2"), }, } @@ -22,8 +23,8 @@ func TestState(t *testing.T) { require.NoError(t, err) resource2 := &testResource{ - identifier: testResourceID("test2"), - dependencies: []resource.Identifier{ + ID: "test2", + TestDependencies: []resource.Identifier{ testResourceID("test3"), }, } @@ -31,7 +32,7 @@ func TestState(t *testing.T) { require.NoError(t, err) resource3 := &testResource{ - identifier: testResourceID("test3"), + ID: "test3", } resource3Data, err := resource.ToResourceData(resource3) require.NoError(t, err) @@ -72,8 +73,8 @@ func TestState(t *testing.T) { t.Run("Plan", func(t *testing.T) { t.Run("from empty state", func(t *testing.T) { resource1 := &testResource{ - identifier: testResourceID("test1"), - dependencies: []resource.Identifier{ + ID: "test1", + TestDependencies: []resource.Identifier{ testResourceID("test2"), }, } @@ -81,8 +82,8 @@ func TestState(t *testing.T) { require.NoError(t, err) resource2 := &testResource{ - identifier: testResourceID("test2"), - dependencies: []resource.Identifier{ + ID: "test2", + TestDependencies: []resource.Identifier{ testResourceID("test3"), }, } @@ -90,7 +91,7 @@ func TestState(t *testing.T) { require.NoError(t, err) resource3 := &testResource{ - identifier: testResourceID("test3"), + ID: "test3", } resource3Data, err := resource.ToResourceData(resource3) require.NoError(t, err) @@ -133,8 +134,8 @@ func TestState(t *testing.T) { }) t.Run("from nonempty state", func(t *testing.T) { resource1 := &testResource{ - identifier: testResourceID("test1"), - dependencies: []resource.Identifier{ + ID: "test1", + TestDependencies: []resource.Identifier{ testResourceID("test2"), }, } @@ -142,8 +143,8 @@ func TestState(t *testing.T) { require.NoError(t, err) resource2 := &testResource{ - identifier: testResourceID("test2"), - dependencies: []resource.Identifier{ + ID: "test2", + TestDependencies: []resource.Identifier{ testResourceID("test3"), }, } @@ -151,7 +152,7 @@ func TestState(t *testing.T) { require.NoError(t, err) resource3 := &testResource{ - identifier: testResourceID("test3"), + ID: "test3", } current := resource.NewState() @@ -187,8 +188,8 @@ func TestState(t *testing.T) { }) t.Run("with update", func(t *testing.T) { resource1 := &testResource{ - identifier: testResourceID("test1"), - dependencies: []resource.Identifier{ + ID: "test1", + TestDependencies: []resource.Identifier{ testResourceID("test2"), }, } @@ -196,8 +197,8 @@ func TestState(t *testing.T) { require.NoError(t, err) resource2 := &testResource{ - identifier: testResourceID("test2"), - dependencies: []resource.Identifier{ + ID: "test2", + TestDependencies: []resource.Identifier{ testResourceID("test3"), }, } @@ -205,13 +206,13 @@ func TestState(t *testing.T) { require.NoError(t, err) resource3 := &testResource{ - identifier: testResourceID("test3"), + ID: "test3", } updatedResource2 := &testResource{ SomeAttribute: "updated", - identifier: testResourceID("test2"), - dependencies: []resource.Identifier{ + ID: "test2", + TestDependencies: []resource.Identifier{ testResourceID("test3"), }, } @@ -259,8 +260,8 @@ func TestState(t *testing.T) { }) t.Run("to empty state", func(t *testing.T) { resource1 := &testResource{ - identifier: testResourceID("test1"), - dependencies: []resource.Identifier{ + ID: "test1", + TestDependencies: []resource.Identifier{ testResourceID("test2"), }, } @@ -268,8 +269,8 @@ func TestState(t *testing.T) { require.NoError(t, err) resource2 := &testResource{ - identifier: testResourceID("test2"), - dependencies: []resource.Identifier{ + ID: "test2", + TestDependencies: []resource.Identifier{ testResourceID("test3"), }, } @@ -277,7 +278,7 @@ func TestState(t *testing.T) { require.NoError(t, err) resource3 := &testResource{ - identifier: testResourceID("test3"), + ID: "test3", } resource3Data, err := resource.ToResourceData(resource3) require.NoError(t, err) @@ -317,8 +318,8 @@ func TestState(t *testing.T) { }) t.Run("mixed creates and deletes", func(t *testing.T) { resource1 := &testResource{ - identifier: testResourceID("test1"), - dependencies: []resource.Identifier{ + ID: "test1", + TestDependencies: []resource.Identifier{ testResourceID("test2"), }, } @@ -326,14 +327,14 @@ func TestState(t *testing.T) { require.NoError(t, err) resource2 := &testResource{ - identifier: testResourceID("test2"), + ID: "test2", } resource2Data, err := resource.ToResourceData(resource2) require.NoError(t, err) resource3 := &testResource{ - identifier: testResourceID("test3"), - dependencies: []resource.Identifier{ + ID: "test3", + TestDependencies: []resource.Identifier{ testResourceID("test4"), }, } @@ -341,20 +342,20 @@ func TestState(t *testing.T) { require.NoError(t, err) resource4 := &testResource{ - identifier: testResourceID("test4"), + ID: "test4", } resource4Data, err := resource.ToResourceData(resource4) require.NoError(t, err) resource5 := &testResource{ - identifier: testResourceID("test5"), + ID: "test5", } resource5Data, err := resource.ToResourceData(resource5) require.NoError(t, err) resource6 := &testResource{ - identifier: testResourceID("test6"), - dependencies: []resource.Identifier{ + ID: "test6", + TestDependencies: []resource.Identifier{ testResourceID("test5"), }, } @@ -423,8 +424,8 @@ func TestState(t *testing.T) { }) t.Run("missing create dependency", func(t *testing.T) { resource1 := &testResource{ - identifier: testResourceID("test1"), - dependencies: []resource.Identifier{ + ID: "test1", + TestDependencies: []resource.Identifier{ testResourceID("test2"), }, } @@ -442,8 +443,8 @@ func TestState(t *testing.T) { t.Run("missing delete dependency", func(t *testing.T) { resource1 := &testResource{ - identifier: testResourceID("test1"), - dependencies: []resource.Identifier{ + ID: "test1", + TestDependencies: []resource.Identifier{ testResourceID("test2"), }, } @@ -473,10 +474,10 @@ func TestState(t *testing.T) { t.Run("ignored attributes", func(t *testing.T) { currentResource := &testResource{ SomeIgnoredAttribute: "ignored", - identifier: testResourceID("test1"), + ID: "test1", } desiredResource := &testResource{ - identifier: testResourceID("test1"), + ID: "test1", } current := resource.NewState() @@ -493,18 +494,22 @@ func TestState(t *testing.T) { }) } +const testResourceType = resource.Type("test_resource") + func testResourceID(id string) resource.Identifier { return resource.Identifier{ - Type: "test_resource", + Type: testResourceType, ID: id, } } type testResource struct { - SomeAttribute string `json:"some_attribute"` - SomeIgnoredAttribute string `json:"some_ignored_attribute"` - identifier resource.Identifier - dependencies []resource.Identifier + SomeAttribute string `json:"some_attribute"` + SomeIgnoredAttribute string `json:"some_ignored_attribute"` + ID string `json:"id"` + TestDependencies []resource.Identifier `json:"test_dependencies"` + NotFound bool `json:"not_found"` + Error string `json:"error"` } func (r *testResource) ResourceVersion() string { @@ -522,29 +527,48 @@ func (r *testResource) Executor() resource.Executor { } func (r *testResource) Identifier() resource.Identifier { - return r.identifier + return testResourceID(r.ID) } func (r *testResource) Dependencies() []resource.Identifier { - return r.dependencies -} - -func (r *testResource) Validate() error { - return nil + return r.TestDependencies } func (r *testResource) Refresh(ctx context.Context, rc *resource.Context) error { - return nil + switch { + case r.NotFound: + return resource.ErrNotFound + case r.Error != "": + return errors.New(r.Error) + default: + return nil + } } func (r *testResource) Create(ctx context.Context, rc *resource.Context) error { + if r.Error != "" { + return errors.New(r.Error) + } return nil } func (r *testResource) Update(ctx context.Context, rc *resource.Context) error { + if r.Error != "" { + return errors.New(r.Error) + } return nil } func (r *testResource) Delete(ctx context.Context, rc *resource.Context) error { + if r.Error != "" { + return errors.New(r.Error) + } return nil } + +func (r *testResource) data(t testing.TB) *resource.ResourceData { + data, err := resource.ToResourceData(r) + require.NoError(t, err) + + return data +} diff --git a/server/internal/workflows/activities/apply_event.go b/server/internal/workflows/activities/apply_event.go index 052b7f3b..8325a349 100644 --- a/server/internal/workflows/activities/apply_event.go +++ b/server/internal/workflows/activities/apply_event.go @@ -72,77 +72,38 @@ func (a *Activities) ApplyEvent(ctx context.Context, input *ApplyEventInput) (*A return nil, err } - r, err := registry.Resource(input.Event.Resource) - if err != nil { - return nil, err - } - rc := &resource.Context{ State: input.State, Injector: a.Injector, Registry: registry, } - var needsCreate bool - - ctxWithCancel, cancel := context.WithCancel(ctx) - defer cancel() - - resultCh := make(chan error, 1) - - go func() { - defer close(resultCh) - - switch input.Event.Type { - case resource.EventTypeRefresh: - err := r.Refresh(ctxWithCancel, rc) - if errors.Is(err, resource.ErrNotFound) { - needsCreate = true - } else if err != nil { - resultCh <- fmt.Errorf("failed to refresh resource %s: %w", r.Identifier().String(), err) - } - case resource.EventTypeCreate: - err := a.logResourceEvent(ctxWithCancel, input.DatabaseID, input.TaskID, "creating", r, func() error { - return r.Create(ctxWithCancel, rc) - }) - if err != nil { - resultCh <- fmt.Errorf("failed to create resource %s: %w", r.Identifier().String(), err) - } - case resource.EventTypeUpdate: - err := a.logResourceEvent(ctxWithCancel, input.DatabaseID, input.TaskID, "updating", r, func() error { - return r.Update(ctxWithCancel, rc) - }) - if err != nil { - resultCh <- fmt.Errorf("failed to update resource %s: %w", r.Identifier().String(), err) - } - case resource.EventTypeDelete: - err := a.logResourceEvent(ctxWithCancel, input.DatabaseID, input.TaskID, "deleting", r, func() error { - return r.Delete(ctxWithCancel, rc) - }) - if err != nil { - resultCh <- fmt.Errorf("failed to delete resource %s: %w", r.Identifier().String(), err) - } - default: - resultCh <- fmt.Errorf("unknown event type: %s", input.Event.Type) - } - }() + event := input.Event + apply := func() error { + return event.Apply(ctx, rc) + } - select { - case <-ctx.Done(): - return nil, fmt.Errorf("activity canceled: %w", ctx.Err()) - case err := <-resultCh: - if err != nil { - return nil, err + switch input.Event.Type { + case resource.EventTypeCreate: + apply = func() error { + return a.logResourceEvent(ctx, input.DatabaseID, input.TaskID, "creating", event, rc) + } + case resource.EventTypeUpdate: + apply = func() error { + return a.logResourceEvent(ctx, input.DatabaseID, input.TaskID, "updating", event, rc) + } + case resource.EventTypeDelete: + apply = func() error { + return a.logResourceEvent(ctx, input.DatabaseID, input.TaskID, "deleting", event, rc) } } - data, err := resource.ToResourceData(r) - if err != nil { - return nil, fmt.Errorf("failed to prepare resource for serialization: %w", err) + + if err := apply(); err != nil { + return nil, err } - data.NeedsRecreate = needsCreate return &ApplyEventOutput{ - Event: input.Event.WithData(data), + Event: event, }, nil } @@ -151,10 +112,10 @@ func (a *Activities) logResourceEvent( databaseID string, taskID uuid.UUID, verb string, - resource resource.Resource, - apply func() error, + event *resource.Event, + rc *resource.Context, ) error { - resourceIdentifier := resource.Identifier() + resourceIdentifier := event.Resource.Identifier fields := map[string]any{ "resource_type": resourceIdentifier.Type, "resource_id": resourceIdentifier.ID, @@ -173,7 +134,7 @@ func (a *Activities) logResourceEvent( } start := time.Now() - applyErr := apply() + applyErr := event.Apply(ctx, rc) duration := time.Since(start) fields["duration_ms"] = duration.Milliseconds() From 65e3adf1e9d0d3df5b0d185f4ff370a0f254c105 Mon Sep 17 00:00:00 2001 From: Jason Lynch Date: Mon, 19 Jan 2026 14:30:38 -0500 Subject: [PATCH 2/2] fix: record failed lifecycle operations Adds the ability to record partial failures from resource lifecycle methods by adding a new `ResourceData.Error` field. This field is populated when an error occurs during `Create` and `Update` events. In this case, we'll persist failed resource back to the state before we terminate the plan. Combined with the existing `NeedsCreate` field, we can record when an operation needs to be retried on the next run. Recording partial failures also makes it so resources can be properly deleted if the user opts to perform a delete rather than a retry. PLAT-212 --- changes/unreleased/Fixed-20260119-095716.yaml | 3 + server/internal/resource/event.go | 31 ++++++- server/internal/resource/event_test.go | 49 ++++++++-- server/internal/resource/resource.go | 2 + server/internal/resource/state.go | 6 ++ server/internal/resource/state_test.go | 91 +++++++++++++++++++ .../workflows/activities/apply_event.go | 33 ++++--- server/internal/workflows/common.go | 7 ++ 8 files changed, 198 insertions(+), 24 deletions(-) create mode 100644 changes/unreleased/Fixed-20260119-095716.yaml diff --git a/changes/unreleased/Fixed-20260119-095716.yaml b/changes/unreleased/Fixed-20260119-095716.yaml new file mode 100644 index 00000000..8cbbcf3c --- /dev/null +++ b/changes/unreleased/Fixed-20260119-095716.yaml @@ -0,0 +1,3 @@ +kind: Fixed +body: Fixed a bug that prevented database deletion when we failed to create the Swarm service. +time: 2026-01-19T09:57:16.746487-05:00 diff --git a/server/internal/resource/event.go b/server/internal/resource/event.go index 4be34a95..724539dc 100644 --- a/server/internal/resource/event.go +++ b/server/internal/resource/event.go @@ -35,6 +35,13 @@ type Event struct { Diff jsondiff.Patch `json:"diff,omitempty"` } +func (e *Event) ResourceError() error { + if e.Resource != nil && e.Resource.Error != "" { + return errors.New(e.Resource.Error) + } + return nil +} + // Apply applies this event to its resource. It does not modify the state in the // given Context. func (e *Event) Apply(ctx context.Context, rc *Context) error { @@ -58,7 +65,10 @@ func (e *Event) Apply(ctx context.Context, rc *Context) error { } func (e *Event) refresh(ctx context.Context, rc *Context, resource Resource) error { - var needsRecreate bool + // Retain the original Error and NeedsRecreate fields so that they're + // available for planCreates. + needsRecreate := e.Resource.NeedsRecreate + applyErr := e.Resource.Error err := resource.Refresh(ctx, rc) if errors.Is(err, ErrNotFound) { @@ -71,7 +81,9 @@ func (e *Event) refresh(ctx context.Context, rc *Context, resource Resource) err if err != nil { return err } + updated.NeedsRecreate = needsRecreate + updated.Error = applyErr e.Resource = updated @@ -79,14 +91,20 @@ func (e *Event) refresh(ctx context.Context, rc *Context, resource Resource) err } func (e *Event) create(ctx context.Context, rc *Context, resource Resource) error { + var needsRecreate bool + var applyErr string + if err := resource.Create(ctx, rc); err != nil { - return fmt.Errorf("failed to create resource %s: %w", resource.Identifier(), err) + needsRecreate = true + applyErr = fmt.Sprintf("failed to create resource %s: %s", resource.Identifier(), err.Error()) } updated, err := ToResourceData(resource) if err != nil { return err } + updated.NeedsRecreate = needsRecreate + updated.Error = applyErr e.Resource = updated @@ -94,14 +112,17 @@ func (e *Event) create(ctx context.Context, rc *Context, resource Resource) erro } func (e *Event) update(ctx context.Context, rc *Context, resource Resource) error { + var applyErr string + if err := resource.Update(ctx, rc); err != nil { - return fmt.Errorf("failed to update resource %s: %w", resource.Identifier(), err) + applyErr = fmt.Sprintf("failed to update resource %s: %s", resource.Identifier(), err.Error()) } updated, err := ToResourceData(resource) if err != nil { return err } + updated.Error = applyErr e.Resource = updated @@ -110,6 +131,10 @@ func (e *Event) update(ctx context.Context, rc *Context, resource Resource) erro func (e *Event) delete(ctx context.Context, rc *Context, resource Resource) error { if err := resource.Delete(ctx, rc); err != nil { + // We need to return an error here to indicate that this event should + // not be applied to the state. Applying a delete event to the state + // removes the resource, so if we didn't return the error it would be + // impossible to retry this operation. return fmt.Errorf("failed to delete resource %s: %w", resource.Identifier(), err) } diff --git a/server/internal/resource/event_test.go b/server/internal/resource/event_test.go index 4557b735..91545b85 100644 --- a/server/internal/resource/event_test.go +++ b/server/internal/resource/event_test.go @@ -24,13 +24,24 @@ func TestEvent(t *testing.T) { eventType resource.EventType notFound bool lifecycleError string + originalResourceError string + originalResourceNeedsRecreate bool expectedErr string + expectedResourceError string expectedResourceNeedsRecreate bool }{ { name: "refresh success", eventType: resource.EventTypeRefresh, }, + { + name: "refresh success retains Error and NeedsRecreate", + eventType: resource.EventTypeRefresh, + originalResourceError: "some error", + originalResourceNeedsRecreate: true, + expectedResourceError: "some error", + expectedResourceNeedsRecreate: true, + }, { name: "refresh not found", eventType: resource.EventTypeRefresh, @@ -48,25 +59,44 @@ func TestEvent(t *testing.T) { eventType: resource.EventTypeCreate, }, { - name: "create failed", - eventType: resource.EventTypeCreate, - lifecycleError: "some error", - expectedErr: "failed to create resource test_resource::test: some error", + name: "create success clears Error and NeedsRecreate", + eventType: resource.EventTypeCreate, + originalResourceError: "some error", + originalResourceNeedsRecreate: true, + }, + { + name: "create failed", + eventType: resource.EventTypeCreate, + lifecycleError: "some error", + expectedResourceError: "failed to create resource test_resource::test: some error", + expectedResourceNeedsRecreate: true, }, { name: "update success", eventType: resource.EventTypeUpdate, }, { - name: "update failed", - eventType: resource.EventTypeUpdate, - lifecycleError: "some error", - expectedErr: "failed to update resource test_resource::test: some error", + name: "update success clears Error and NeedsRecreate", + eventType: resource.EventTypeUpdate, + originalResourceError: "some error", + originalResourceNeedsRecreate: true, + }, + { + name: "update failed", + eventType: resource.EventTypeUpdate, + lifecycleError: "some error", + expectedResourceError: "failed to update resource test_resource::test: some error", }, { name: "delete success", eventType: resource.EventTypeDelete, }, + { + name: "delete success clears Error and NeedsRecreate", + eventType: resource.EventTypeDelete, + originalResourceError: "some error", + originalResourceNeedsRecreate: true, + }, { name: "delete failed", eventType: resource.EventTypeDelete, @@ -82,8 +112,11 @@ func TestEvent(t *testing.T) { } original := r.data(t) + original.Error = tc.originalResourceError + original.NeedsRecreate = tc.originalResourceNeedsRecreate expected := r.data(t) + expected.Error = tc.expectedResourceError expected.NeedsRecreate = tc.expectedResourceNeedsRecreate event := &resource.Event{ diff --git a/server/internal/resource/resource.go b/server/internal/resource/resource.go index e598e51f..d0a18f4b 100644 --- a/server/internal/resource/resource.go +++ b/server/internal/resource/resource.go @@ -43,6 +43,7 @@ type ResourceData struct { DiffIgnore []string `json:"diff_ignore"` ResourceVersion string `json:"resource_version"` PendingDeletion bool `json:"pending_deletion"` + Error string `json:"error"` } func (r *ResourceData) Diff(other *ResourceData) (jsondiff.Patch, error) { @@ -70,6 +71,7 @@ func (r *ResourceData) Clone() *ResourceData { DiffIgnore: slices.Clone(r.DiffIgnore), ResourceVersion: r.ResourceVersion, PendingDeletion: r.PendingDeletion, + Error: r.Error, } } diff --git a/server/internal/resource/state.go b/server/internal/resource/state.go index 41244d48..3c0171e0 100644 --- a/server/internal/resource/state.go +++ b/server/internal/resource/state.go @@ -297,6 +297,12 @@ func (s *State) planCreates(options PlanOptions, desired *State) (Plan, error) { Resource: resource, Reason: EventReasonNeedsRecreate, } + case currentResource.Error != "": + event = &Event{ + Type: EventTypeUpdate, + Resource: resource, + Reason: EventReasonHasError, + } case options.ForceUpdate: event = &Event{ Type: EventTypeUpdate, diff --git a/server/internal/resource/state_test.go b/server/internal/resource/state_test.go index ca939abb..45bb0240 100644 --- a/server/internal/resource/state_test.go +++ b/server/internal/resource/state_test.go @@ -422,6 +422,97 @@ func TestState(t *testing.T) { assert.ElementsMatch(t, expected[i], phase) } }) + t.Run("error from previous create", func(t *testing.T) { + resource1Data, err := resource.ToResourceData(&testResource{ + ID: "test1", + TestDependencies: []resource.Identifier{ + testResourceID("test2"), + }, + }) + require.NoError(t, err) + + resource2Data, err := resource.ToResourceData(&testResource{ + ID: "test2", + }) + require.NoError(t, err) + + current := resource.NewState() + desired := resource.NewState() + + resource2WithError := resource2Data.Clone() + resource2WithError.NeedsRecreate = true + resource2WithError.Error = "some error" + + current.Add(resource2WithError) + desired.Add(resource1Data, resource2Data) + + plan, err := current.Plan(resource.PlanOptions{}, desired) + assert.NoError(t, err) + + expected := resource.Plan{ + { + { + Type: resource.EventTypeCreate, + Resource: resource2Data, + Reason: resource.EventReasonNeedsRecreate, + }, + }, + { + { + Type: resource.EventTypeCreate, + Resource: resource1Data, + Reason: resource.EventReasonDoesNotExist, + }, + }, + } + + assert.Equal(t, expected, plan) + }) + t.Run("error from previous update", func(t *testing.T) { + resource1Data, err := resource.ToResourceData(&testResource{ + ID: "test1", + TestDependencies: []resource.Identifier{ + testResourceID("test2"), + }, + }) + require.NoError(t, err) + + resource2Data, err := resource.ToResourceData(&testResource{ + ID: "test2", + }) + require.NoError(t, err) + + current := resource.NewState() + desired := resource.NewState() + + resource2WithError := resource2Data.Clone() + resource2WithError.Error = "some error" + + current.Add(resource1Data, resource2WithError) + desired.Add(resource1Data, resource2Data) + + plan, err := current.Plan(resource.PlanOptions{}, desired) + assert.NoError(t, err) + + expected := resource.Plan{ + { + { + Type: resource.EventTypeUpdate, + Resource: resource2Data, + Reason: resource.EventReasonHasError, + }, + }, + { + { + Type: resource.EventTypeUpdate, + Resource: resource1Data, + Reason: resource.EventReasonDependencyUpdated, + }, + }, + } + + assert.Equal(t, expected, plan) + }) t.Run("missing create dependency", func(t *testing.T) { resource1 := &testResource{ ID: "test1", diff --git a/server/internal/workflows/activities/apply_event.go b/server/internal/workflows/activities/apply_event.go index 8325a349..6599bd86 100644 --- a/server/internal/workflows/activities/apply_event.go +++ b/server/internal/workflows/activities/apply_event.go @@ -139,9 +139,17 @@ func (a *Activities) logResourceEvent( fields["duration_ms"] = duration.Milliseconds() - if applyErr != nil { + var applyErrStr string + switch { + case applyErr != nil: + applyErrStr = applyErr.Error() + case event.Type != resource.EventTypeRefresh: + applyErrStr = event.Resource.Error + } + + if applyErrStr != "" { fields["success"] = false - fields["error"] = applyErr.Error() + fields["error"] = applyErrStr err := log(task.LogEntry{ Message: fmt.Sprintf("error while %s resource %s", verb, resourceIdentifier), @@ -153,18 +161,17 @@ func (a *Activities) logResourceEvent( fmt.Errorf("failed to record event error: %w", err), ) } - return applyErr - } - - fields["success"] = true + } else { + fields["success"] = true - err = log(task.LogEntry{ - Message: fmt.Sprintf("finished %s resource %s (took %s)", verb, resourceIdentifier, duration), - Fields: fields, - }) - if err != nil { - return fmt.Errorf("failed to record event completion: %w", err) + err = log(task.LogEntry{ + Message: fmt.Sprintf("finished %s resource %s (took %s)", verb, resourceIdentifier, duration), + Fields: fields, + }) + if err != nil { + return fmt.Errorf("failed to record event completion: %w", err) + } } - return nil + return applyErr } diff --git a/server/internal/workflows/common.go b/server/internal/workflows/common.go index b098abae..9f6d8da6 100644 --- a/server/internal/workflows/common.go +++ b/server/internal/workflows/common.go @@ -75,6 +75,13 @@ func (w *Workflows) applyEvents( event := phase[i] errs = append(errs, fmt.Errorf("failed to apply %s event from %s to state: %w", event.Type, event.Resource.Identifier, err)) } + if err := out.Event.ResourceError(); err != nil && out.Event.Type != resource.EventTypeRefresh { + // Returns errors that originated from the resource's lifecycle + // method. They're already formatted with the event type and the + // resource identifier. We still want to apply the event to the + // state to record partial creates/updates. + errs = append(errs, err) + } } if err := errors.Join(errs...); err != nil { return fmt.Errorf("failed while modifying resources: %w", err)