microsoft · robotdad · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025
diff --git a/.gitignore b/.gitignore
@@ -71,3 +71,4 @@ next-steps.md
 
 # Working folders
 ai_working/tmp
+tests/recipes/DECISIONS.md
diff --git a/tests/recipes/features/agent-delegation.yaml b/tests/recipes/features/agent-delegation.yaml
@@ -0,0 +1,100 @@
+name: "agent-delegation"
+description: "Phase 1 Feature Test: Sub-session spawning via task tool (agent delegation)"
+version: "1.0.0"
+author: "Amplifier Recipes Team"
+created: "2025-12-10T15:48:00Z"
+tags: ["phase-1", "cli", "agent-delegation", "task-tool", "sub-session", "feature-test"]
+
+# Test context
+context:
+  test_word: "testing"
+  expected_count: 7
+
+# Test Steps
+steps:
+  # Step 1: Delegate task to sub-agent
+  # The explorer agent will use the task tool to spawn a sub-agent
+  # that counts letters in the word "testing"
+  - id: "delegate-task"
+    agent: "foundation:explorer"
+    timeout: 60
+    prompt: |
+      Use the task tool to delegate this counting task to a sub-agent:
+
+      "Count the number of letters in the word '{{test_word}}'"
+
+      The sub-agent should analyze the word and return the exact count.
+      Expected result: {{expected_count}} letters
+
+      After the sub-agent completes, capture its response in your output.
+    output: "delegation_result"
+    retry:
+      max_attempts: 2
+      backoff: "exponential"
+      initial_delay: 5
+
+  # Step 2: Validate delegation behavior
+  # The result-validator checks that the delegation mechanism worked correctly
+  - id: "validate"
+    agent: "recipes:result-validator"
+    timeout: 60
+    prompt: |
+      Validate the agent delegation test results:
+
+      TEST: Agent Delegation via Task Tool
+
+      Delegation Result:
+      {{delegation_result}}
+
+      VALIDATION CRITERIA (all must pass):
+
+      1. Task Tool Invocation
+         - Verify the parent agent (foundation:explorer) successfully invoked the task tool
+         - Evidence: delegation_result should contain task tool usage
+
+      2. Sub-Agent Session Spawned
+         - Verify a sub-agent session was created
+         - Evidence: delegation_result should show sub-agent activity
+
+      3. Correct Answer Produced
+         - Verify sub-agent counted letters in "{{test_word}}" correctly
+         - Expected: {{expected_count}} letters
+         - Evidence: delegation_result should contain the number {{expected_count}}
+
+      4. Result Returned to Parent
+         - Verify the sub-agent's answer flowed back to the parent agent
+         - Evidence: delegation_result contains sub-agent's complete response
+
+      VERDICT FORMAT:
+      - Start with "PASS:" if all criteria met
+      - Start with "FAIL:" if any criteria failed
+      - Provide evidence for each criterion
+      - List which criteria passed/failed
+
+      What we're testing:
+      - CLI spawns sub-sessions correctly
+      - Task tool integration works end-to-end
+      - Parent-child session communication works
+      - Results flow back properly from sub-agent to parent
+    output: "validation_verdict"
+    on_error: "fail"
+
+# Recipe Usage:
+#
+# Execute test:
+#   amplifier recipes execute amplifier-app-cli/tests/recipes/features/agent-delegation.yaml
+#
+# Expected behavior:
+#   - Step 1: foundation:explorer delegates counting task to sub-agent
+#   - Sub-agent counts 7 letters in "testing"
+#   - Result flows back to parent agent
+#   - Step 2: result-validator verifies all delegation mechanisms worked
+#   - Final verdict: PASS or FAIL
+#
+# Success criteria:
+#   - validation_verdict starts with "PASS:"
+#   - All 4 validation criteria met
+#   - Sub-agent correctly identified 7 letters
+#
+# This test validates core CLI capability:
+#   Agent delegation via task tool sub-session spawning
diff --git a/tests/recipes/features/collection-resolution.yaml b/tests/recipes/features/collection-resolution.yaml
@@ -0,0 +1,36 @@
+name: "collection-resolution"
+description: "Test @mention path resolution for collection files via CLI"
+version: "1.0.0"
+author: "Amplifier Recipes Team"
+created: "2025-12-10T15:48:54Z"
+tags: ["phase1", "feature-test", "collection-resolution", "@mention"]
+
+steps:
+  - id: "read-collection-file"
+    agent: "foundation:explorer"
+    prompt: |
+      Use the read_file tool to read the file at this exact @mention path:
+      @foundation:context/IMPLEMENTATION_PHILOSOPHY.md
+
+      This tests whether the CLI correctly resolves @collection: style paths.
+
+      Return the following in your response:
+      1. Whether read_file was successfully invoked with the @mention path
+      2. The first 500 characters of the file content (to verify successful read)
+      3. Whether the content contains expected philosophy keywords like "simplicity"
+    output: "collection_content"
+    timeout: 30
+
+  - id: "validate"
+    agent: "recipes:result-validator"
+    prompt: |
+      Validate this collection file resolution test result:
+      {{collection_content}}
+
+      PASS criteria (ALL must be true):
+      1. read_file tool was invoked with @mention path (@foundation:context/IMPLEMENTATION_PHILOSOPHY.md)
+      2. File content was successfully returned (not an error)
+      3. Content contains philosophy-related terms (e.g., "simplicity", "philosophy", "principles")
+
+      Provide clear PASS or FAIL verdict with evidence.
+    timeout: 30
diff --git a/tests/recipes/features/tool-bash.yaml b/tests/recipes/features/tool-bash.yaml
@@ -0,0 +1,33 @@
+name: "tool-bash-test"
+description: "Phase 1 feature test: Validates bash tool execution capability"
+version: "1.0.0"
+author: "Amplifier Recipes Team"
+tags: ["phase1", "feature-test", "bash", "tool-test"]
+
+steps:
+  - id: "execute-bash"
+    agent: "foundation:explorer"
+    prompt: |
+      Execute this bash command and return the output:
+
+      echo "test-output" | wc -c
+
+      This should return "12" (11 characters plus newline).
+      Use the bash tool to execute this command and report the exact output.
+    output: "bash_result"
+    timeout: 30
+
+  - id: "validate"
+    agent: "recipes:result-validator"
+    prompt: |
+      Validate the bash tool execution from the previous step.
+
+      Result to validate: {{bash_result}}
+
+      Check that:
+      1. The bash tool was successfully invoked
+      2. The command executed without errors
+      3. The output contains "12" (indicating 11 chars + newline)
+
+      Provide a clear PASS or FAIL verdict.
+    timeout: 30
diff --git a/tests/recipes/features/tool-filesystem.yaml b/tests/recipes/features/tool-filesystem.yaml
@@ -0,0 +1,84 @@
+name: "tool-filesystem"
+description: "Phase 1 feature test: Validates filesystem tool operations (read_file, write_file, glob)"
+version: "1.0.0"
+author: "Amplifier Recipes Team"
+created: "2025-12-10T15:55:30Z"
+tags: ["phase1", "feature-test", "filesystem", "tools"]
+
+context:
+  fixture_dir: "tests/recipes/fixtures"
+  fixture_file: "tests/recipes/fixtures/sample.txt"
+  output_file: "tests/recipes/fixtures/test-output.txt"
+  test_content: "Filesystem test output from recipe execution"
+
+steps:
+  - id: "test-read"
+    agent: "foundation:explorer"
+    prompt: |
+      Use the read_file tool to read the file at: {{fixture_file}}
+
+      Return the complete content of the file exactly as it appears.
+    output: "read_result"
+    timeout: 30
+
+  - id: "test-glob"
+    agent: "foundation:explorer"
+    prompt: |
+      Use the glob tool to find all .txt files in: {{fixture_dir}}
+
+      Pattern to use: {{fixture_dir}}/*.txt
+
+      Return the list of matching files found.
+    output: "glob_result"
+    timeout: 30
+
+  - id: "test-write"
+    agent: "foundation:explorer"
+    prompt: |
+      Use the write_file tool to create a new file at: {{output_file}}
+
+      Write this exact content: {{test_content}}
+
+      Confirm the write operation completed successfully.
+    output: "write_result"
+    timeout: 30
+
+  - id: "validate"
+    agent: "recipes:result-validator"
+    prompt: |
+      Validate the filesystem tool operations completed successfully.
+
+      **Validation Criteria:**
+
+      1. READ OPERATION:
+         - read_result should contain content from sample.txt
+         - Must include: "Sample content for filesystem testing"
+         - Must include: "This file tests read_file operations"
+         - Must include: "Line three contains test data"
+
+      2. GLOB OPERATION:
+         - glob_result should list files matching *.txt pattern
+         - Must include: sample.txt in the results
+         - Results should be from {{fixture_dir}} directory
+
+      3. WRITE OPERATION:
+         - write_result should confirm successful write
+         - File {{output_file}} should have been created
+         - Content written: "{{test_content}}"
+
+      **Results to validate:**
+
+      Read result: {{read_result}}
+
+      Glob result: {{glob_result}}
+
+      Write result: {{write_result}}
+
+      **Verdict Required:**
+      Respond with clear PASS or FAIL verdict.
+      - PASS if all three operations completed successfully
+      - FAIL if any operation failed or returned incorrect results
+
+      Provide evidence for your verdict.
+    output: "validation_result"
+    timeout: 30
diff --git a/tests/recipes/features/tool-search.yaml b/tests/recipes/features/tool-search.yaml
@@ -0,0 +1,64 @@
+name: "tool-search"
+description: "Phase 1 feature test: Validates grep/search tool operations in CLI"
+version: "1.0.0"
+author: "Amplifier Recipes Team"
+created: "2025-12-10T15:49:07Z"
+tags: ["phase1", "feature-test", "tools", "grep", "search", "cli"]
+
+context:
+  fixtures_dir: "tests/recipes/fixtures"
+  search_pattern: "def test_function"
+  expected_file: "sample-code.py"
+
+steps:
+  - id: "test-grep"
+    agent: "foundation:explorer"
+    prompt: |
+      Search for the pattern "{{search_pattern}}" in the directory {{fixtures_dir}}.
+
+      Use the grep tool to find this pattern. Report back:
+      1. Whether the search was successful
+      2. Which file(s) contain the pattern
+      3. The matching lines found
+
+      Be specific about the grep operation and results.
+    output: "grep_result"
+    timeout: 30
+
+  - id: "validate"
+    agent: "recipes:result-validator"
+    prompt: |
+      Validate the grep tool execution from the previous step.
+
+      RESULT TO VALIDATE:
+      {{grep_result}}
+
+      VALIDATION CRITERIA:
+      1. The grep tool was successfully invoked
+      2. The search pattern "{{search_pattern}}" was found
+      3. The results include the file path (should contain "{{expected_file}}")
+      4. The matching content includes the actual pattern text
+
+      Provide a clear PASS or FAIL verdict.
+
+      PASS if: All criteria met - grep tool worked and found the pattern
+      FAIL if: Grep tool failed, pattern not found, or incomplete results
+
+      Format your response with:
+      - Evidence for each criterion
+      - Final verdict: PASS or FAIL
+    timeout: 30
+
+# Test Execution:
+#   amplifier recipes execute amplifier-app-cli/tests/recipes/features/tool-search.yaml
+#
+# What This Tests:
+#   - CLI loads grep/search tool module correctly
+#   - Search tool is accessible to agents (foundation:explorer)
+#   - Pattern search operations work correctly
+#   - Results are returned with file paths and match content
+#
+# Expected Outcome:
+#   - Step 1: foundation:explorer uses grep to find "def test_function" in fixtures
+#   - Step 2: result-validator confirms PASS verdict
+#   - Overall: Validates grep tool integration in CLI
diff --git a/tests/recipes/features/tool-web.yaml b/tests/recipes/features/tool-web.yaml
@@ -0,0 +1,38 @@
+name: "tool-web"
+description: "Phase 1 feature test: Web tool operations (web_fetch)"
+version: "1.0.0"
+author: "Amplifier Recipes Team"
+tags: ["test", "feature-test", "phase-1", "web-tools"]
+
+context:
+  test_url: "https://httpbin.org/status/200"
+
+steps:
+  - id: "test-fetch"
+    agent: "foundation:explorer"
+    prompt: |
+      Test the web_fetch tool by fetching this URL: {{test_url}}
+
+      Use the web_fetch tool to retrieve the content. Verify the operation succeeds.
+
+      Report:
+      - Whether the fetch succeeded
+      - Any content retrieved
+      - Any errors encountered
+    output: "fetch_result"
+    timeout: 45
+
+  - id: "validate"
+    agent: "recipes:result-validator"
+    prompt: |
+      Validate the web tool test results from the previous step.
+
+      Test execution: {{fetch_result}}
+
+      Verify:
+      1. The web_fetch tool was invoked
+      2. The fetch operation succeeded (no errors)
+      3. Content was returned from {{test_url}}
+
+      Provide a clear PASS or FAIL verdict with evidence.
+    timeout: 45
Original file line number	Diff line number	Diff line change
Expand Up		@@ -71,3 +71,4 @@ next-steps.md

		# Working folders
		ai_working/tmp
		tests/recipes/DECISIONS.md