diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 09f61bf3a3..ff697d77a8 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -355,6 +355,20 @@ export class Task extends EventEmitter implements TaskLike { userMessageContent: (Anthropic.TextBlockParam | Anthropic.ImageBlockParam | Anthropic.ToolResultBlockParam)[] = [] userMessageContentReady = false + /** + * Flag indicating whether the assistant message for the current streaming session + * has been saved to API conversation history. + * + * This is critical for parallel tool calling: tools should NOT execute until + * the assistant message is saved. Otherwise, if a tool like `new_task` triggers + * `flushPendingToolResultsToHistory()`, the user message with tool_results would + * appear BEFORE the assistant message with tool_uses, causing API errors. + * + * Reset to `false` at the start of each API request. + * Set to `true` after the assistant message is saved in `recursivelyMakeClineRequests`. + */ + assistantMessageSavedToHistory = false + /** * Push a tool_result block to userMessageContent, preventing duplicates. * Duplicate tool_use_ids cause API errors. @@ -1063,6 +1077,36 @@ export class Task extends EventEmitter implements TaskLike { return } + // CRITICAL: Wait for the assistant message to be saved to API history first. + // Without this, tool_result blocks would appear BEFORE tool_use blocks in the + // conversation history, causing API errors like: + // "unexpected `tool_use_id` found in `tool_result` blocks" + // + // This can happen when parallel tools are called (e.g., update_todo_list + new_task). + // Tools execute during streaming via presentAssistantMessage, BEFORE the assistant + // message is saved. When new_task triggers delegation, it calls this method to + // flush pending results - but the assistant message hasn't been saved yet. + // + // The assistantMessageSavedToHistory flag is: + // - Reset to false at the start of each API request + // - Set to true after the assistant message is saved in recursivelyMakeClineRequests + if (!this.assistantMessageSavedToHistory) { + await pWaitFor(() => this.assistantMessageSavedToHistory || this.abort, { + interval: 50, + timeout: 30_000, // 30 second timeout as safety net + }).catch(() => { + // If timeout or abort, log and proceed anyway to avoid hanging + console.warn( + `[Task#${this.taskId}] flushPendingToolResultsToHistory: timed out waiting for assistant message to be saved`, + ) + }) + } + + // If task was aborted while waiting, don't flush + if (this.abort) { + return + } + // Save the user message with tool_result blocks const userMessage: Anthropic.MessageParam = { role: "user", @@ -2707,6 +2751,7 @@ export class Task extends EventEmitter implements TaskLike { this.userMessageContentReady = false this.didRejectTool = false this.didAlreadyUseTool = false + this.assistantMessageSavedToHistory = false // Reset tool failure flag for each new assistant turn - this ensures that tool failures // only prevent attempt_completion within the same assistant message, not across turns // (e.g., if a tool fails, then user sends a message saying "just complete anyway") @@ -3488,6 +3533,7 @@ export class Task extends EventEmitter implements TaskLike { { role: "assistant", content: assistantContent }, reasoningMessage || undefined, ) + this.assistantMessageSavedToHistory = true TelemetryService.instance.captureConversationMessage(this.taskId, "assistant") } diff --git a/src/core/task/__tests__/flushPendingToolResultsToHistory.spec.ts b/src/core/task/__tests__/flushPendingToolResultsToHistory.spec.ts index 4f6f79970e..ca68347cbd 100644 --- a/src/core/task/__tests__/flushPendingToolResultsToHistory.spec.ts +++ b/src/core/task/__tests__/flushPendingToolResultsToHistory.spec.ts @@ -38,8 +38,12 @@ vi.mock("fs/promises", async (importOriginal) => { } }) +const { mockPWaitFor } = vi.hoisted(() => { + return { mockPWaitFor: vi.fn().mockImplementation(async () => Promise.resolve()) } +}) + vi.mock("p-wait-for", () => ({ - default: vi.fn().mockImplementation(async () => Promise.resolve()), + default: mockPWaitFor, })) vi.mock("vscode", () => { @@ -344,4 +348,99 @@ describe("flushPendingToolResultsToHistory", () => { expect((task.apiConversationHistory[0] as any).ts).toBeGreaterThanOrEqual(beforeTs) expect((task.apiConversationHistory[0] as any).ts).toBeLessThanOrEqual(afterTs) }) + + it("should skip waiting for assistantMessageSavedToHistory when flag is already true", async () => { + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Set flag to true (assistant message already saved) + task.assistantMessageSavedToHistory = true + + // Set up pending tool result + task.userMessageContent = [ + { + type: "tool_result", + tool_use_id: "tool-skip-wait", + content: "Result when flag is true", + }, + ] + + // Clear mock call history + mockPWaitFor.mockClear() + + await task.flushPendingToolResultsToHistory() + + // Should not have called pWaitFor since flag was already true + expect(mockPWaitFor).not.toHaveBeenCalled() + + // Should still save the message + expect(task.apiConversationHistory.length).toBe(1) + expect((task.apiConversationHistory[0].content as any[])[0].tool_use_id).toBe("tool-skip-wait") + }) + + it("should wait for assistantMessageSavedToHistory when flag is false", async () => { + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Flag is false by default - assistant message not yet saved + expect(task.assistantMessageSavedToHistory).toBe(false) + + // Set up pending tool result + task.userMessageContent = [ + { + type: "tool_result", + tool_use_id: "tool-wait", + content: "Result when flag is false", + }, + ] + + // Clear mock call history + mockPWaitFor.mockClear() + + await task.flushPendingToolResultsToHistory() + + // Should have called pWaitFor since flag was false + expect(mockPWaitFor).toHaveBeenCalled() + + // Should still save the message (mock resolves immediately) + expect(task.apiConversationHistory.length).toBe(1) + }) + + it("should not flush when task is aborted during wait", async () => { + const task = new Task({ + provider: mockProvider, + apiConfiguration: mockApiConfig, + task: "test task", + startTask: false, + }) + + // Flag is false - will need to wait + task.assistantMessageSavedToHistory = false + + // Set up pending tool result + task.userMessageContent = [ + { + type: "tool_result", + tool_use_id: "tool-aborted", + content: "Should not be saved", + }, + ] + + // Set abort flag - this will cause the condition in pWaitFor to return true + // AND will cause early return after the wait + task.abort = true + + await task.flushPendingToolResultsToHistory() + + // Should not have saved anything since task was aborted + expect(task.apiConversationHistory.length).toBe(0) + }) })