From ef9bf93673b294b27d6193b973f63cd4580a19e5 Mon Sep 17 00:00:00 2001 From: Zeke Foppa Date: Wed, 14 Jan 2026 10:36:48 -0800 Subject: [PATCH 1/3] [bfops/fix-llm-hint]: Fix hint for fixing llm benchmarks --- docs/DEVELOP.md | 15 --------------- .../xtask-llm-benchmark/src/bin/llm_benchmark.rs | 6 +++--- 2 files changed, 3 insertions(+), 18 deletions(-) diff --git a/docs/DEVELOP.md b/docs/DEVELOP.md index f4e614823fc..db844db4674 100644 --- a/docs/DEVELOP.md +++ b/docs/DEVELOP.md @@ -12,21 +12,6 @@ This document explains how to configure the environment, run the LLM benchmark t 4. [Troubleshooting](#troubleshooting) --- -## Quick Checks & Fixes - -Use this single command to quickly unblock CI by regenerating hashes and running only GPT-5 for the minimal Rust + C# passes. This is not the full benchmark suite. - -`cargo llm ci-quickfix` -What this does: -1. Runs Rust rustdoc_json pass for GPT-5 only. -2. Runs C# docs pass for GPT-5 only. -3. Writes updated results & summary. - ---- - -> Model IDs passed to `--models` must match configured routes (see `model_routes.rs`), e.g. `"openai:gpt-5"`. - - ### Spacetime CLI Publishing is performed via the `spacetime` CLI (`spacetime publish -c -y --server `). Ensure: - `spacetime` is on PATH diff --git a/tools/xtask-llm-benchmark/src/bin/llm_benchmark.rs b/tools/xtask-llm-benchmark/src/bin/llm_benchmark.rs index 53e7eae176a..83b3f799393 100644 --- a/tools/xtask-llm-benchmark/src/bin/llm_benchmark.rs +++ b/tools/xtask-llm-benchmark/src/bin/llm_benchmark.rs @@ -283,7 +283,7 @@ fn cmd_ci_check(args: CiCheckArgs) -> Result<()> { }; // Debug hint for how to (re)generate entries - let hint_for = |_lang: Lang| -> &'static str { "cargo llm ci-quickfix" }; + let hint_for = |_lang: Lang| -> &'static str { "posting /update-llm-benchmark on the PR" }; // Load docs-benchmark summary to compare hashes against let summary_path = docs_benchmark_summary(); @@ -298,13 +298,13 @@ fn cmd_ci_check(args: CiCheckArgs) -> Result<()> { match xtask_llm_benchmark::context::resolve_mode_paths(mode) { Ok(paths) if !paths.is_empty() => {} Ok(_) => bail!( - "CI check FAILED: {}/{} resolved to 0 paths.\n→ Try: {}", + "CI check FAILED: {}/{} resolved to 0 paths.\n→ Try {}", mode, lang_str, hint_for(lang) ), Err(e) => bail!( - "CI check FAILED: {}/{} not available: {}.\n→ Try: {}", + "CI check FAILED: {}/{} not available: {}.\n→ Try {}", mode, lang_str, e, From 2105c4d6f71725e0f4c79e28e0c1563932bdc6c9 Mon Sep 17 00:00:00 2001 From: Zeke Foppa Date: Thu, 15 Jan 2026 12:43:23 -0800 Subject: [PATCH 2/3] [bfops/fix-llm-hint]: update --- docs/DEVELOP.md | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/docs/DEVELOP.md b/docs/DEVELOP.md index db844db4674..5617ece18a0 100644 --- a/docs/DEVELOP.md +++ b/docs/DEVELOP.md @@ -12,6 +12,23 @@ This document explains how to configure the environment, run the LLM benchmark t 4. [Troubleshooting](#troubleshooting) --- +## Quick Checks & Fixes + +Use this single command to quickly unblock CI by regenerating hashes and running only GPT-5 for the minimal Rust + C# passes. This is not the full benchmark suite. + +**Note: You will need OpenAI API keys to run this locally**. Alternatively, any SpacetimeDB member can comment `/update-llm-benchmark` on a PR to start a CI job to do this. + +`cargo llm ci-quickfix` +What this does: +1. Runs Rust rustdoc_json pass for GPT-5 only. +2. Runs C# docs pass for GPT-5 only. +3. Writes updated results & summary. + +--- + +> Model IDs passed to `--models` must match configured routes (see `model_routes.rs`), e.g. `"openai:gpt-5"`. + + ### Spacetime CLI Publishing is performed via the `spacetime` CLI (`spacetime publish -c -y --server `). Ensure: - `spacetime` is on PATH From e58d492f160c9f94429ee5c670df2d0f6dc3af3b Mon Sep 17 00:00:00 2001 From: Zeke Foppa Date: Thu, 15 Jan 2026 13:17:35 -0800 Subject: [PATCH 3/3] [bfops/fix-llm-hint]: update cli hint --- tools/xtask-llm-benchmark/src/bin/llm_benchmark.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/xtask-llm-benchmark/src/bin/llm_benchmark.rs b/tools/xtask-llm-benchmark/src/bin/llm_benchmark.rs index 83b3f799393..be7d49a8970 100644 --- a/tools/xtask-llm-benchmark/src/bin/llm_benchmark.rs +++ b/tools/xtask-llm-benchmark/src/bin/llm_benchmark.rs @@ -283,7 +283,7 @@ fn cmd_ci_check(args: CiCheckArgs) -> Result<()> { }; // Debug hint for how to (re)generate entries - let hint_for = |_lang: Lang| -> &'static str { "posting /update-llm-benchmark on the PR" }; + let hint_for = |_lang: Lang| -> &'static str { "Check DEVELOP.md for instructions on how to proceed." }; // Load docs-benchmark summary to compare hashes against let summary_path = docs_benchmark_summary(); @@ -298,13 +298,13 @@ fn cmd_ci_check(args: CiCheckArgs) -> Result<()> { match xtask_llm_benchmark::context::resolve_mode_paths(mode) { Ok(paths) if !paths.is_empty() => {} Ok(_) => bail!( - "CI check FAILED: {}/{} resolved to 0 paths.\n→ Try {}", + "CI check FAILED: {}/{} resolved to 0 paths.\n→ {}", mode, lang_str, hint_for(lang) ), Err(e) => bail!( - "CI check FAILED: {}/{} not available: {}.\n→ Try {}", + "CI check FAILED: {}/{} not available: {}.\n→ {}", mode, lang_str, e,