clockworklabs · cloutiertyler · Jan 13, 2026 · Jan 13, 2026 · Jan 13, 2026 · Jan 12, 2026
diff --git a/.github/workflows/llm-benchmark-update.yml b/.github/workflows/llm-benchmark-update.yml
@@ -28,7 +28,11 @@ jobs:
     if: |
       (github.event_name == 'issue_comment' && github.event.issue.pull_request && startsWith(github.event.comment.body, '/update-llm-benchmark')) ||
       (github.event_name == 'workflow_dispatch')
-    runs-on: ubuntu-latest
+    runs-on: spacetimedb-new-runner
+    container:
+      image: localhost:5000/spacetimedb-ci:latest
+      options: >-
+        --privileged
     steps:
       # Here we install the spacetime CLI for faster execution of the tests
       # SpacetimeDB itself is not under test here, rather it's the docs.
@@ -201,6 +205,18 @@ jobs:
           llm_benchmark ci-quickfix
           llm_benchmark ci-check
 
+      # Generate failure analysis if there are any failures
+      - name: Generate failure analysis
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        run: |
+          llm_benchmark analyze -o docs/llms/docs-benchmark-analysis.md || true
+
+      # Generate PR comment markdown (compares against master baseline)
+      - name: Generate PR comment markdown
+        run: |
+          llm_benchmark ci-comment
+
       - name: Ensure only docs/llms changed
         run: |
           set -euo pipefail
@@ -226,77 +242,41 @@ jobs:
           github-token: ${{ secrets.CLOCKWORK_LABS_BOT_PAT }}
           script: |
             const fs = require('fs');
-            // docs-benchmark files are used for CI (testing documentation quality)
-            const summaryPath = 'docs/llms/docs-benchmark-summary.json';
-            const summary = JSON.parse(fs.readFileSync(summaryPath, 'utf8'));
-
-            // Extract results for the modes checked by ci-check
-            // Rust: rustdoc_json, C#: docs
-            const rustResults = summary.by_language?.rust?.modes?.rustdoc_json?.models?.['GPT-5'];
-            const csharpResults = summary.by_language?.csharp?.modes?.docs?.models?.['GPT-5'];
-
-            const formatPct = (val) => val !== undefined ? `${val.toFixed(1)}%` : 'N/A';
-
-            let table = `## LLM Benchmark Results (ci-quickfix)\n\n`;
-            table += `| Language | Mode | Category | Tests Passed | Pass % | Task Pass % |\n`;
-            table += `|----------|------|----------|--------------|--------|-------------|\n`;
-
-            if (rustResults) {
-              const cats = rustResults.categories || {};
-              if (cats.basics) {
-                const c = cats.basics;
-                table += `| Rust | rustdoc_json | basics | ${c.passed_tests}/${c.total_tests} | ${formatPct(c.pass_pct)} | ${formatPct(c.task_pass_pct)} |\n`;
-              }
-              if (cats.schema) {
-                const c = cats.schema;
-                table += `| Rust | rustdoc_json | schema | ${c.passed_tests}/${c.total_tests} | ${formatPct(c.pass_pct)} | ${formatPct(c.task_pass_pct)} |\n`;
-              }
-              const t = rustResults.totals;
-              table += `| Rust | rustdoc_json | **total** | ${t.passed_tests}/${t.total_tests} | ${formatPct(t.pass_pct)} | ${formatPct(t.task_pass_pct)} |\n`;
-            }
 
-            if (csharpResults) {
-              const cats = csharpResults.categories || {};
-              if (cats.basics) {
-                const c = cats.basics;
-                table += `| C# | docs | basics | ${c.passed_tests}/${c.total_tests} | ${formatPct(c.pass_pct)} | ${formatPct(c.task_pass_pct)} |\n`;
-              }
-              if (cats.schema) {
-                const c = cats.schema;
-                table += `| C# | docs | schema | ${c.passed_tests}/${c.total_tests} | ${formatPct(c.pass_pct)} | ${formatPct(c.task_pass_pct)} |\n`;
+            // Read the pre-generated comment markdown
+            const commentPath = 'docs/llms/docs-benchmark-comment.md';
+            if (!fs.existsSync(commentPath)) {
+              core.setFailed(`Comment file not found: ${commentPath}`);
+              return;
+            }
+            let body = fs.readFileSync(commentPath, 'utf8');
+
+            // Check if failure analysis exists and append it
+            const analysisPath = 'docs/llms/docs-benchmark-analysis.md';
+            if (fs.existsSync(analysisPath)) {
+              const analysis = fs.readFileSync(analysisPath, 'utf8');
+              // Only include if there's meaningful content (not just "no failures")
+              if (!analysis.includes('No failures found')) {
+                body += `\n<details>\n<summary>Failure Analysis (click to expand)</summary>\n\n${analysis}\n</details>`;
               }
-              const t = csharpResults.totals;
-              table += `| C# | docs | **total** | ${t.passed_tests}/${t.total_tests} | ${formatPct(t.pass_pct)} | ${formatPct(t.task_pass_pct)} |\n`;
             }
 
-            table += `\n<sub>Generated at: ${summary.generated_at}</sub>`;
-
             const issue_number = Number(process.env.PR_NUMBER);
 
-            // Find and update existing comment or create new one
-            const comments = await github.rest.issues.listComments({
-              owner: context.repo.owner,
-              repo: context.repo.repo,
-              issue_number,
-            });
-
-            const marker = '## LLM Benchmark Results (ci-quickfix)';
-            const existingComment = comments.data.find(c => c.body.startsWith(marker));
-
-            if (existingComment) {
-              await github.rest.issues.updateComment({
-                owner: context.repo.owner,
-                repo: context.repo.repo,
-                comment_id: existingComment.id,
-                body: table,
-              });
-            } else {
+            // Always post a new comment
+            console.log(`Posting new comment on PR #${issue_number}...`);
+            try {
               await github.rest.issues.createComment({
                 owner: context.repo.owner,
                 repo: context.repo.repo,
                 issue_number,
-                body: table,
+                body,
               });
+              console.log('Comment created successfully');
+            } catch (err) {
+              console.error('Failed to post comment:', err.message);
+              console.error('Full error:', JSON.stringify(err, null, 2));
+              throw err;
             }
 
       # The benchmarks only modify the docs/llms directory.

diff --git a/Cargo.lock b/Cargo.lock
diff --git a/crates/cli/build.rs b/crates/cli/build.rs
@@ -58,6 +58,8 @@ fn get_manifest_dir() -> PathBuf {
 //                            templates list at templates/templates-list.json
 //   * `get_ai_rules_base` - returns base AI rules for all languages
 //   * `get_ai_rules_typescript` - returns TypeScript-specific AI rules
+//   * `get_ai_rules_rust` - returns Rust-specific AI rules
+//   * `get_ai_rules_csharp` - returns C#-specific AI rules
 fn generate_template_files() {
     let manifest_dir = get_manifest_dir();
     let repo_root = get_repo_root();
@@ -140,6 +142,34 @@ fn generate_template_files() {
         panic!("Could not find \"docs/static/ai-rules/spacetimedb-typescript.mdc\" file.");
     }
 
+    // Rust-specific rules
+    let rust_rules_path = ai_rules_dir.join("spacetimedb-rust.mdc");
+    if rust_rules_path.exists() {
+        generated_code.push_str("pub fn get_ai_rules_rust() -> &'static str {\n");
+        generated_code.push_str(&format!(
+            "    include_str!(\"{}\")\n",
+            rust_rules_path.to_str().unwrap().replace('\\', "\\\\")
+        ));
+        generated_code.push_str("}\n\n");
+        println!("cargo:rerun-if-changed={}", rust_rules_path.display());
+    } else {
+        panic!("Could not find \"docs/static/ai-rules/spacetimedb-rust.mdc\" file.");
+    }
+
+    // C#-specific rules
+    let csharp_rules_path = ai_rules_dir.join("spacetimedb-csharp.mdc");
+    if csharp_rules_path.exists() {
+        generated_code.push_str("pub fn get_ai_rules_csharp() -> &'static str {\n");
+        generated_code.push_str(&format!(
+            "    include_str!(\"{}\")\n",
+            csharp_rules_path.to_str().unwrap().replace('\\', "\\\\")
+        ));
+        generated_code.push_str("}\n\n");
+        println!("cargo:rerun-if-changed={}", csharp_rules_path.display());
+    } else {
+        panic!("Could not find \"docs/static/ai-rules/spacetimedb-csharp.mdc\" file.");
+    }
+
     // Expose workspace metadata so `spacetime init` can rewrite template manifests without hardcoding versions.
     generated_code.push_str("pub fn get_workspace_edition() -> &'static str {\n");
     generated_code.push_str(&format!("    \"{}\"\n", workspace_edition.escape_default()));

@@ -1667,15 +1667,22 @@ fn set_dependency_version(item: &mut Item, version: &str, remove_path: bool) {
 /// Writes rules to:
 /// - .cursor/rules/ (Cursor)
 /// - CLAUDE.md (Claude Code)
+/// - AGENTS.md (Opencode)
 /// - .windsurfrules (Windsurf)
 /// - .github/copilot-instructions.md (VS Code Copilot)
 fn install_ai_rules(config: &TemplateConfig, project_path: &Path) -> anyhow::Result<()> {
     let base_rules = embedded::get_ai_rules_base();
     let ts_rules = embedded::get_ai_rules_typescript();
+    let rust_rules = embedded::get_ai_rules_rust();
+    let csharp_rules = embedded::get_ai_rules_csharp();
 
-    // Check if TypeScript is used in either server or client
+    // Check which languages are used in server or client
     let uses_typescript = config.server_lang == Some(ServerLanguage::TypeScript)
         || config.client_lang == Some(ClientLanguage::TypeScript);
+    let uses_rust =
+        config.server_lang == Some(ServerLanguage::Rust) || config.client_lang == Some(ClientLanguage::Rust);
+    let uses_csharp =
+        config.server_lang == Some(ServerLanguage::Csharp) || config.client_lang == Some(ClientLanguage::Csharp);
 
     // 1. Cursor: .cursor/rules/ directory with separate files
     let cursor_dir = project_path.join(".cursor/rules");
@@ -1684,24 +1691,44 @@ fn install_ai_rules(config: &TemplateConfig, project_path: &Path) -> anyhow::Res
     if uses_typescript {
         fs::write(cursor_dir.join("spacetimedb-typescript.mdc"), ts_rules)?;
     }
+    if uses_rust {
+        fs::write(cursor_dir.join("spacetimedb-rust.mdc"), rust_rules)?;
+    }
+    if uses_csharp {
+        fs::write(cursor_dir.join("spacetimedb-csharp.mdc"), csharp_rules)?;
+    }
 
     // Build combined content for single-file AI assistants
     // Strip the YAML frontmatter from the .mdc files for non-Cursor tools
     let base_content = strip_mdc_frontmatter(base_rules);
-    let combined_content = if uses_typescript {
+    let mut combined_content = base_content.to_string();
+
+    if uses_typescript {
         let ts_content = strip_mdc_frontmatter(ts_rules);
-        format!("{}\n\n{}", base_content, ts_content)
-    } else {
-        base_content.to_string()
-    };
+        combined_content.push_str("\n\n");
+        combined_content.push_str(ts_content);
+    }
+    if uses_rust {
+        let rust_content = strip_mdc_frontmatter(rust_rules);
+        combined_content.push_str("\n\n");
+        combined_content.push_str(rust_content);
+    }
+    if uses_csharp {
+        let csharp_content = strip_mdc_frontmatter(csharp_rules);
+        combined_content.push_str("\n\n");
+        combined_content.push_str(csharp_content);
+    }
 
     // 2. Claude Code: CLAUDE.md
     fs::write(project_path.join("CLAUDE.md"), &combined_content)?;
 
-    // 3. Windsurf: .windsurfrules
+    // 3. Opencode: AGENTS.md
+    fs::write(project_path.join("AGENTS.md"), &combined_content)?;
+
+    // 4. Windsurf: .windsurfrules
     fs::write(project_path.join(".windsurfrules"), &combined_content)?;
 
-    // 4. VS Code Copilot: .github/copilot-instructions.md
+    // 5. VS Code Copilot: .github/copilot-instructions.md
     let github_dir = project_path.join(".github");
     fs::create_dir_all(&github_dir)?;
     fs::write(github_dir.join("copilot-instructions.md"), &combined_content)?;

diff --git a/docs/DEVELOP.md b/docs/DEVELOP.md
@@ -9,7 +9,8 @@ This document explains how to configure the environment, run the LLM benchmark t
 1. [Quick Checks & Fixes](#quick-checks-fixes)
 2. [Environment Variables](#environment-variables)
 3. [Benchmark Suite](#benchmark-suite)
-4. [Troubleshooting](#troubleshooting)
+4. [Context Construction](#context-construction)
+5. [Troubleshooting](#troubleshooting)
 ---
 
 ## Quick Checks & Fixes
@@ -231,6 +232,11 @@ cargo llm run --force
 cargo llm ci-check --lang rust
 cargo llm ci-check --lang csharp
 
+# Generate PR comment markdown (compares against master baseline)
+cargo llm ci-comment
+# With custom baseline ref
+cargo llm ci-comment --baseline-ref origin/main
+
 ```
 
 Outputs:
@@ -239,6 +245,62 @@ Outputs:
 
 ---
 
+## Context Construction
+
+The benchmark tool constructs a context (documentation) that is sent to the LLM along with each task prompt. The context varies by language and mode.
+
+### Modes
+
+| Mode | Language | Source | Description |
+|------|----------|--------|-------------|
+| `rustdoc_json` | Rust | `crates/bindings` | Generates rustdoc JSON and extracts documentation from the spacetimedb crate |
+| `docs` | C# | `docs/docs/**/*.md` | Concatenates all markdown files from the documentation |
+
+### Tab Filtering
+
+When building context for a specific language, the tool filters `<Tabs>` components to only include content relevant to the target language. This reduces noise and helps the LLM focus on the correct syntax.
+
+**Filtered tab groupIds:**
+
+| groupId | Purpose | Tab Values |
+|---------|---------|------------|
+| `server-language` | Server module code examples | `rust`, `csharp`, `typescript` |
+| `client-language` | Client SDK code examples | `rust`, `csharp`, `typescript`, `cpp`, `blueprint` |
+
+**Filtering behavior:**
+- For C# tests: Only `value="csharp"` tabs are kept
+- For Rust tests: Only `value="rust"` tabs are kept
+- If no matching tab exists (e.g., `client-language` with only `cpp`/`blueprint`), the entire tabs block is removed
+
+**Example transformation:**
+
+Before (in markdown):
+```html
+<Tabs groupId="server-language" queryString>
+<TabItem value="csharp" label="C#">
+C# code here
+</TabItem>
+<TabItem value="rust" label="Rust">
+Rust code here
+</TabItem>
+</Tabs>
+```
+
+After (for C# context):
+```
+C# code here
+```
+
+### Documentation Best Practices
+
+When writing documentation that will be used by the benchmark:
+
+1. **Use consistent tab groupIds**: Always use `server-language` for server module code and `client-language` for client SDK code
+2. **Include all supported languages**: Ensure each `<Tabs>` block has tabs for all languages you want to test
+3. **Use consistent naming conventions**: The benchmark compares LLM output against golden answers, so documentation should reflect the expected conventions (e.g., PascalCase table names for C#)
+
+---
+
 ## Troubleshooting
 
 **HTTP 400/404 from providers**

diff --git a/docs/docs/00100-intro/00100-getting-started/00400-key-architecture.md b/docs/docs/00100-intro/00100-getting-started/00400-key-architecture.md
@@ -47,7 +47,7 @@ const players = table(
 <TabItem value="csharp" label="C#">
 
 ```csharp
-[SpacetimeDB.Table(Name = "players", Public = true)]
+[SpacetimeDB.Table(Name = "Player", Public = true)]
 public partial struct Player
 {
     [SpacetimeDB.PrimaryKey]
@@ -193,7 +193,7 @@ spacetimedb.reducer('world', (ctx) => {
 ```
 
 While SpacetimeDB doesn't support nested transactions,
-a reducer can [schedule another reducer](/tables/scheduled-tables) to run at an interval,
+a reducer can [schedule another reducer](/tables/schedule-tables) to run at an interval,
 or at a specific time.
 
 </TabItem>
@@ -218,7 +218,7 @@ public static void World(ReducerContext ctx)
 ```
 
 While SpacetimeDB doesn't support nested transactions,
-a reducer can [schedule another reducer](/tables/scheduled-tables) to run at an interval,
+a reducer can [schedule another reducer](/tables/schedule-tables) to run at an interval,
 or at a specific time.
 
 </TabItem>