From 6b50c440dba8a6aebe5519c5030d3d2d9d1e69c0 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Wed, 28 Jan 2026 18:55:21 -0500 Subject: [PATCH 1/3] feat: add OpenAI-compatible base provider and migrate Moonshot to AI SDK - Add OpenAICompatibleHandler base class using @ai-sdk/openai-compatible - Migrate MoonshotHandler to extend OpenAICompatibleHandler - Leverage AI SDK utilities (convertToAiSdkMessages, convertToolsForAiSdk, processAiSdkStreamPart) - Update moonshot tests to mock AI SDK functions (streamText, generateText) - Add @ai-sdk/openai-compatible dependency to src/package.json - All 19 moonshot tests pass --- pnpm-lock.yaml | 36 ++ src/api/providers/__tests__/moonshot.spec.ts | 373 ++++++++++++------- src/api/providers/index.ts | 2 + src/api/providers/moonshot.ts | 69 ++-- src/api/providers/openai-compatible.ts | 212 +++++++++++ src/package.json | 1 + 6 files changed, 539 insertions(+), 154 deletions(-) create mode 100644 src/api/providers/openai-compatible.ts diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b8ca01240be..e57f20b536e 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -990,6 +990,9 @@ importers: specifier: 3.25.76 version: 3.25.76 devDependencies: + '@ai-sdk/openai-compatible': + specifier: ^1.0.0 + version: 1.0.31(zod@3.25.76) '@openrouter/ai-sdk-provider': specifier: ^2.0.4 version: 2.1.1(ai@6.0.57(zod@3.25.76))(zod@3.25.76) @@ -1384,12 +1387,28 @@ packages: peerDependencies: zod: 3.25.76 + '@ai-sdk/openai-compatible@1.0.31': + resolution: {integrity: sha512-znBvaVHM0M6yWNerIEy3hR+O8ZK2sPcE7e2cxfb6kYLEX3k//JH5VDnRnajseVofg7LXtTCFFdjsB7WLf1BdeQ==} + engines: {node: '>=18'} + peerDependencies: + zod: 3.25.76 + + '@ai-sdk/provider-utils@3.0.20': + resolution: {integrity: sha512-iXHVe0apM2zUEzauqJwqmpC37A5rihrStAih5Ks+JE32iTe4LZ58y17UGBjpQQTCRw9YxMeo2UFLxLpBluyvLQ==} + engines: {node: '>=18'} + peerDependencies: + zod: 3.25.76 + '@ai-sdk/provider-utils@4.0.10': resolution: {integrity: sha512-VeDAiCH+ZK8Xs4hb9Cw7pHlujWNL52RKe8TExOkrw6Ir1AmfajBZTb9XUdKOZO08RwQElIKA8+Ltm+Gqfo8djQ==} engines: {node: '>=18'} peerDependencies: zod: 3.25.76 + '@ai-sdk/provider@2.0.1': + resolution: {integrity: sha512-KCUwswvsC5VsW2PWFqF8eJgSCu5Ysj7m1TxiHTVA6g7k360bk0RNQENT8KTMAYEs+8fWPD3Uu4dEmzGHc+jGng==} + engines: {node: '>=18'} + '@ai-sdk/provider@3.0.5': resolution: {integrity: sha512-2Xmoq6DBJqmSl80U6V9z5jJSJP7ehaJJQMy2iFUqTay06wdCqTnPVBBQbtEL8RCChenL+q5DC5H5WzU3vV3v8w==} engines: {node: '>=18'} @@ -10784,6 +10803,19 @@ snapshots: '@vercel/oidc': 3.1.0 zod: 3.25.76 + '@ai-sdk/openai-compatible@1.0.31(zod@3.25.76)': + dependencies: + '@ai-sdk/provider': 2.0.1 + '@ai-sdk/provider-utils': 3.0.20(zod@3.25.76) + zod: 3.25.76 + + '@ai-sdk/provider-utils@3.0.20(zod@3.25.76)': + dependencies: + '@ai-sdk/provider': 2.0.1 + '@standard-schema/spec': 1.1.0 + eventsource-parser: 3.0.6 + zod: 3.25.76 + '@ai-sdk/provider-utils@4.0.10(zod@3.25.76)': dependencies: '@ai-sdk/provider': 3.0.5 @@ -10791,6 +10823,10 @@ snapshots: eventsource-parser: 3.0.6 zod: 3.25.76 + '@ai-sdk/provider@2.0.1': + dependencies: + json-schema: 0.4.0 + '@ai-sdk/provider@3.0.5': dependencies: json-schema: 0.4.0 diff --git a/src/api/providers/__tests__/moonshot.spec.ts b/src/api/providers/__tests__/moonshot.spec.ts index ab919c53c23..9040ed23ca7 100644 --- a/src/api/providers/__tests__/moonshot.spec.ts +++ b/src/api/providers/__tests__/moonshot.spec.ts @@ -1,67 +1,28 @@ -// Mocks must come first, before imports -const mockCreate = vi.fn() -vi.mock("openai", () => { +// Use vi.hoisted to define mock functions that can be referenced in hoisted vi.mock() calls +const { mockStreamText, mockGenerateText } = vi.hoisted(() => ({ + mockStreamText: vi.fn(), + mockGenerateText: vi.fn(), +})) + +vi.mock("ai", async (importOriginal) => { + const actual = await importOriginal() return { - __esModule: true, - default: vi.fn().mockImplementation(() => ({ - chat: { - completions: { - create: mockCreate.mockImplementation(async (options) => { - if (!options.stream) { - return { - id: "test-completion", - choices: [ - { - message: { role: "assistant", content: "Test response", refusal: null }, - finish_reason: "stop", - index: 0, - }, - ], - usage: { - prompt_tokens: 10, - completion_tokens: 5, - total_tokens: 15, - cached_tokens: 2, - }, - } - } - - // Return async iterator for streaming - return { - [Symbol.asyncIterator]: async function* () { - yield { - choices: [ - { - delta: { content: "Test response" }, - index: 0, - }, - ], - usage: null, - } - yield { - choices: [ - { - delta: {}, - index: 0, - }, - ], - usage: { - prompt_tokens: 10, - completion_tokens: 5, - total_tokens: 15, - cached_tokens: 2, - }, - } - }, - } - }), - }, - }, - })), + ...actual, + streamText: mockStreamText, + generateText: mockGenerateText, } }) -import OpenAI from "openai" +vi.mock("@ai-sdk/openai-compatible", () => ({ + createOpenAICompatible: vi.fn(() => { + // Return a function that returns a mock language model + return vi.fn(() => ({ + modelId: "moonshot-chat", + provider: "moonshot", + })) + }), +})) + import type { Anthropic } from "@anthropic-ai/sdk" import { moonshotDefaultModelId } from "@roo-code/types" @@ -90,15 +51,6 @@ describe("MoonshotHandler", () => { expect(handler.getModel().id).toBe(mockOptions.apiModelId) }) - it.skip("should throw error if API key is missing", () => { - expect(() => { - new MoonshotHandler({ - ...mockOptions, - moonshotApiKey: undefined, - }) - }).toThrow("Moonshot API key is required") - }) - it("should use default model ID if not provided", () => { const handlerWithoutModel = new MoonshotHandler({ ...mockOptions, @@ -113,12 +65,6 @@ describe("MoonshotHandler", () => { moonshotBaseUrl: undefined, }) expect(handlerWithoutBaseUrl).toBeInstanceOf(MoonshotHandler) - // The base URL is passed to OpenAI client internally - expect(OpenAI).toHaveBeenCalledWith( - expect.objectContaining({ - baseURL: "https://api.moonshot.ai/v1", - }), - ) }) it("should use chinese base URL if provided", () => { @@ -128,18 +74,6 @@ describe("MoonshotHandler", () => { moonshotBaseUrl: customBaseUrl, }) expect(handlerWithCustomUrl).toBeInstanceOf(MoonshotHandler) - // The custom base URL is passed to OpenAI client - expect(OpenAI).toHaveBeenCalledWith( - expect.objectContaining({ - baseURL: customBaseUrl, - }), - ) - }) - - it("should set includeMaxTokens to true", () => { - // Create a new handler and verify OpenAI client was called with includeMaxTokens - const _handler = new MoonshotHandler(mockOptions) - expect(OpenAI).toHaveBeenCalledWith(expect.objectContaining({ apiKey: mockOptions.moonshotApiKey })) }) }) @@ -151,7 +85,7 @@ describe("MoonshotHandler", () => { expect(model.info.maxTokens).toBe(16384) expect(model.info.contextWindow).toBe(262144) expect(model.info.supportsImages).toBe(false) - expect(model.info.supportsPromptCache).toBe(true) // Should be true now + expect(model.info.supportsPromptCache).toBe(true) }) it("should return provided model ID with default model info if model does not exist", () => { @@ -162,11 +96,8 @@ describe("MoonshotHandler", () => { const model = handlerWithInvalidModel.getModel() expect(model.id).toBe("invalid-model") // Returns provided ID expect(model.info).toBeDefined() - // With the current implementation, it's the same object reference when using default model info - expect(model.info).toBe(handler.getModel().info) - // Should have the same base properties + // Should have the same base properties as default model expect(model.info.contextWindow).toBe(handler.getModel().info.contextWindow) - // And should have supportsPromptCache set to true expect(model.info.supportsPromptCache).toBe(true) }) @@ -203,6 +134,24 @@ describe("MoonshotHandler", () => { ] it("should handle streaming responses", async () => { + // Mock the fullStream async generator + async function* mockFullStream() { + yield { type: "text-delta", text: "Test response" } + } + + // Mock usage promise + const mockUsage = Promise.resolve({ + inputTokens: 10, + outputTokens: 5, + details: { cachedInputTokens: undefined }, + raw: { cached_tokens: 2 }, + }) + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: mockUsage, + }) + const stream = handler.createMessage(systemPrompt, messages) const chunks: any[] = [] for await (const chunk of stream) { @@ -216,6 +165,22 @@ describe("MoonshotHandler", () => { }) it("should include usage information", async () => { + async function* mockFullStream() { + yield { type: "text-delta", text: "Test response" } + } + + const mockUsage = Promise.resolve({ + inputTokens: 10, + outputTokens: 5, + details: {}, + raw: { cached_tokens: 2 }, + }) + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: mockUsage, + }) + const stream = handler.createMessage(systemPrompt, messages) const chunks: any[] = [] for await (const chunk of stream) { @@ -229,6 +194,22 @@ describe("MoonshotHandler", () => { }) it("should include cache metrics in usage information", async () => { + async function* mockFullStream() { + yield { type: "text-delta", text: "Test response" } + } + + const mockUsage = Promise.resolve({ + inputTokens: 10, + outputTokens: 5, + details: {}, + raw: { cached_tokens: 2 }, + }) + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: mockUsage, + }) + const stream = handler.createMessage(systemPrompt, messages) const chunks: any[] = [] for await (const chunk of stream) { @@ -242,6 +223,23 @@ describe("MoonshotHandler", () => { }) }) + describe("completePrompt", () => { + it("should complete a prompt using generateText", async () => { + mockGenerateText.mockResolvedValue({ + text: "Test completion", + }) + + const result = await handler.completePrompt("Test prompt") + + expect(result).toBe("Test completion") + expect(mockGenerateText).toHaveBeenCalledWith( + expect.objectContaining({ + prompt: "Test prompt", + }), + ) + }) + }) + describe("processUsageMetrics", () => { it("should correctly process usage metrics including cache information", () => { // We need to access the protected method, so we'll create a test subclass @@ -254,10 +252,12 @@ describe("MoonshotHandler", () => { const testHandler = new TestMoonshotHandler(mockOptions) const usage = { - prompt_tokens: 100, - completion_tokens: 50, - total_tokens: 150, - cached_tokens: 20, + inputTokens: 100, + outputTokens: 50, + details: {}, + raw: { + cached_tokens: 20, + }, } const result = testHandler.testProcessUsageMetrics(usage) @@ -279,10 +279,10 @@ describe("MoonshotHandler", () => { const testHandler = new TestMoonshotHandler(mockOptions) const usage = { - prompt_tokens: 100, - completion_tokens: 50, - total_tokens: 150, - // No cached_tokens + inputTokens: 100, + outputTokens: 50, + details: {}, + raw: {}, } const result = testHandler.testProcessUsageMetrics(usage) @@ -295,31 +295,25 @@ describe("MoonshotHandler", () => { }) }) - describe("addMaxTokensIfNeeded", () => { - it("should always add max_tokens regardless of includeMaxTokens option", () => { - // Create a test subclass to access the protected method + describe("getMaxOutputTokens", () => { + it("should return maxTokens from model info", () => { class TestMoonshotHandler extends MoonshotHandler { - public testAddMaxTokensIfNeeded(requestOptions: any, modelInfo: any) { - this.addMaxTokensIfNeeded(requestOptions, modelInfo) + public testGetMaxOutputTokens() { + return this.getMaxOutputTokens() } } const testHandler = new TestMoonshotHandler(mockOptions) - const requestOptions: any = {} - const modelInfo = { - maxTokens: 32_000, - } - - // Test with includeMaxTokens set to false - should still add max tokens - testHandler.testAddMaxTokensIfNeeded(requestOptions, modelInfo) + const result = testHandler.testGetMaxOutputTokens() - expect(requestOptions.max_tokens).toBe(32_000) + // Default model maxTokens is 16384 + expect(result).toBe(16384) }) it("should use modelMaxTokens when provided", () => { class TestMoonshotHandler extends MoonshotHandler { - public testAddMaxTokensIfNeeded(requestOptions: any, modelInfo: any) { - this.addMaxTokensIfNeeded(requestOptions, modelInfo) + public testGetMaxOutputTokens() { + return this.getMaxOutputTokens() } } @@ -328,32 +322,153 @@ describe("MoonshotHandler", () => { ...mockOptions, modelMaxTokens: customMaxTokens, }) - const requestOptions: any = {} - const modelInfo = { - maxTokens: 32_000, - } - testHandler.testAddMaxTokensIfNeeded(requestOptions, modelInfo) - - expect(requestOptions.max_tokens).toBe(customMaxTokens) + const result = testHandler.testGetMaxOutputTokens() + expect(result).toBe(customMaxTokens) }) it("should fall back to modelInfo.maxTokens when modelMaxTokens is not provided", () => { class TestMoonshotHandler extends MoonshotHandler { - public testAddMaxTokensIfNeeded(requestOptions: any, modelInfo: any) { - this.addMaxTokensIfNeeded(requestOptions, modelInfo) + public testGetMaxOutputTokens() { + return this.getMaxOutputTokens() } } const testHandler = new TestMoonshotHandler(mockOptions) - const requestOptions: any = {} - const modelInfo = { - maxTokens: 16_000, + const result = testHandler.testGetMaxOutputTokens() + + // moonshot-chat has maxTokens of 16384 + expect(result).toBe(16384) + }) + }) + + describe("tool handling", () => { + const systemPrompt = "You are a helpful assistant." + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello!" }], + }, + ] + + it("should handle tool calls in streaming", async () => { + async function* mockFullStream() { + yield { + type: "tool-input-start", + id: "tool-call-1", + toolName: "read_file", + } + yield { + type: "tool-input-delta", + id: "tool-call-1", + delta: '{"path":"test.ts"}', + } + yield { + type: "tool-input-end", + id: "tool-call-1", + } } - testHandler.testAddMaxTokensIfNeeded(requestOptions, modelInfo) + const mockUsage = Promise.resolve({ + inputTokens: 10, + outputTokens: 5, + details: {}, + raw: {}, + }) + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: mockUsage, + }) + + const stream = handler.createMessage(systemPrompt, messages, { + taskId: "test-task", + tools: [ + { + type: "function", + function: { + name: "read_file", + description: "Read a file", + parameters: { + type: "object", + properties: { path: { type: "string" } }, + required: ["path"], + }, + }, + }, + ], + }) + + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const toolCallStartChunks = chunks.filter((c) => c.type === "tool_call_start") + const toolCallDeltaChunks = chunks.filter((c) => c.type === "tool_call_delta") + const toolCallEndChunks = chunks.filter((c) => c.type === "tool_call_end") + + expect(toolCallStartChunks.length).toBe(1) + expect(toolCallStartChunks[0].id).toBe("tool-call-1") + expect(toolCallStartChunks[0].name).toBe("read_file") + + expect(toolCallDeltaChunks.length).toBe(1) + expect(toolCallDeltaChunks[0].delta).toBe('{"path":"test.ts"}') + + expect(toolCallEndChunks.length).toBe(1) + expect(toolCallEndChunks[0].id).toBe("tool-call-1") + }) + + it("should handle complete tool calls", async () => { + async function* mockFullStream() { + yield { + type: "tool-call", + toolCallId: "tool-call-1", + toolName: "read_file", + input: { path: "test.ts" }, + } + } + + const mockUsage = Promise.resolve({ + inputTokens: 10, + outputTokens: 5, + details: {}, + raw: {}, + }) + + mockStreamText.mockReturnValue({ + fullStream: mockFullStream(), + usage: mockUsage, + }) + + const stream = handler.createMessage(systemPrompt, messages, { + taskId: "test-task", + tools: [ + { + type: "function", + function: { + name: "read_file", + description: "Read a file", + parameters: { + type: "object", + properties: { path: { type: "string" } }, + required: ["path"], + }, + }, + }, + ], + }) + + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } - expect(requestOptions.max_tokens).toBe(16_000) + const toolCallChunks = chunks.filter((c) => c.type === "tool_call") + expect(toolCallChunks.length).toBe(1) + expect(toolCallChunks[0].id).toBe("tool-call-1") + expect(toolCallChunks[0].name).toBe("read_file") + expect(toolCallChunks[0].arguments).toBe('{"path":"test.ts"}') }) }) }) diff --git a/src/api/providers/index.ts b/src/api/providers/index.ts index 141839e29f9..cf49f75f189 100644 --- a/src/api/providers/index.ts +++ b/src/api/providers/index.ts @@ -17,6 +17,8 @@ export { MistralHandler } from "./mistral" export { OpenAiCodexHandler } from "./openai-codex" export { OpenAiNativeHandler } from "./openai-native" export { OpenAiHandler } from "./openai" +export { OpenAICompatibleHandler } from "./openai-compatible" +export type { OpenAICompatibleConfig } from "./openai-compatible" export { OpenRouterHandler } from "./openrouter" export { QwenCodeHandler } from "./qwen-code" export { RequestyHandler } from "./requesty" diff --git a/src/api/providers/moonshot.ts b/src/api/providers/moonshot.ts index d29a10a3b3e..f7a849cc025 100644 --- a/src/api/providers/moonshot.ts +++ b/src/api/providers/moonshot.ts @@ -1,4 +1,3 @@ -import OpenAI from "openai" import { moonshotModels, moonshotDefaultModelId, type ModelInfo } from "@roo-code/types" import type { ApiHandlerOptions } from "../../shared/api" @@ -6,18 +5,25 @@ import type { ApiHandlerOptions } from "../../shared/api" import type { ApiStreamUsageChunk } from "../transform/stream" import { getModelParams } from "../transform/model-params" -import { OpenAiHandler } from "./openai" +import { OpenAICompatibleHandler, OpenAICompatibleConfig } from "./openai-compatible" -export class MoonshotHandler extends OpenAiHandler { +export class MoonshotHandler extends OpenAICompatibleHandler { constructor(options: ApiHandlerOptions) { - super({ - ...options, - openAiApiKey: options.moonshotApiKey ?? "not-provided", - openAiModelId: options.apiModelId ?? moonshotDefaultModelId, - openAiBaseUrl: options.moonshotBaseUrl ?? "https://api.moonshot.ai/v1", - openAiStreamingEnabled: true, - includeMaxTokens: true, - }) + const modelId = options.apiModelId ?? moonshotDefaultModelId + const modelInfo = + moonshotModels[modelId as keyof typeof moonshotModels] || moonshotModels[moonshotDefaultModelId] + + const config: OpenAICompatibleConfig = { + providerName: "moonshot", + baseURL: options.moonshotBaseUrl ?? "https://api.moonshot.ai/v1", + apiKey: options.moonshotApiKey ?? "not-provided", + modelId, + modelInfo, + modelMaxTokens: options.modelMaxTokens ?? undefined, + temperature: options.modelTemperature ?? undefined, + } + + super(options, config) } override getModel() { @@ -27,25 +33,38 @@ export class MoonshotHandler extends OpenAiHandler { return { id, info, ...params } } - // Override to handle Moonshot's usage metrics, including caching. - protected override processUsageMetrics(usage: any): ApiStreamUsageChunk { + /** + * Override to handle Moonshot's usage metrics, including caching. + * Moonshot returns cached_tokens in a different location than standard OpenAI. + */ + protected override processUsageMetrics(usage: { + inputTokens?: number + outputTokens?: number + details?: { + cachedInputTokens?: number + reasoningTokens?: number + } + raw?: Record + }): ApiStreamUsageChunk { + // Moonshot uses cached_tokens at the top level of raw usage data + const rawUsage = usage.raw as { cached_tokens?: number } | undefined + return { type: "usage", - inputTokens: usage?.prompt_tokens || 0, - outputTokens: usage?.completion_tokens || 0, + inputTokens: usage.inputTokens || 0, + outputTokens: usage.outputTokens || 0, cacheWriteTokens: 0, - cacheReadTokens: usage?.cached_tokens, + cacheReadTokens: rawUsage?.cached_tokens ?? usage.details?.cachedInputTokens, } } - // Override to always include max_tokens for Moonshot (not max_completion_tokens) - protected override addMaxTokensIfNeeded( - requestOptions: - | OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming - | OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming, - modelInfo: ModelInfo, - ): void { - // Moonshot uses max_tokens instead of max_completion_tokens - requestOptions.max_tokens = this.options.modelMaxTokens || modelInfo.maxTokens + /** + * Override to always include max_tokens for Moonshot (not max_completion_tokens). + * Moonshot requires max_tokens parameter to be sent. + */ + protected override getMaxOutputTokens(): number | undefined { + const modelInfo = this.config.modelInfo + // Moonshot always requires max_tokens + return this.options.modelMaxTokens || modelInfo.maxTokens || undefined } } diff --git a/src/api/providers/openai-compatible.ts b/src/api/providers/openai-compatible.ts new file mode 100644 index 00000000000..d129e72452f --- /dev/null +++ b/src/api/providers/openai-compatible.ts @@ -0,0 +1,212 @@ +/** + * OpenAI-compatible provider base class using Vercel AI SDK. + * This provides a parallel implementation to OpenAiHandler using @ai-sdk/openai-compatible. + */ + +import { Anthropic } from "@anthropic-ai/sdk" +import OpenAI from "openai" +import { createOpenAICompatible } from "@ai-sdk/openai-compatible" +import { streamText, generateText, LanguageModel, ToolSet } from "ai" + +import type { ModelInfo } from "@roo-code/types" + +import type { ApiHandlerOptions } from "../../shared/api" + +import { convertToAiSdkMessages, convertToolsForAiSdk, processAiSdkStreamPart } from "../transform/ai-sdk" +import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" + +import { DEFAULT_HEADERS } from "./constants" +import { BaseProvider } from "./base-provider" +import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" + +/** + * Configuration options for creating an OpenAI-compatible provider. + */ +export interface OpenAICompatibleConfig { + /** Provider name for identification */ + providerName: string + /** Base URL for the API endpoint */ + baseURL: string + /** API key for authentication */ + apiKey: string + /** Model ID to use */ + modelId: string + /** Model information */ + modelInfo: ModelInfo + /** Optional custom headers */ + headers?: Record + /** Whether to include max_tokens in requests (default: false uses max_completion_tokens) */ + useMaxTokens?: boolean + /** User-configured max tokens override */ + modelMaxTokens?: number + /** Temperature setting */ + temperature?: number +} + +/** + * Base class for OpenAI-compatible API providers using Vercel AI SDK. + * Extends BaseProvider and implements SingleCompletionHandler. + */ +export abstract class OpenAICompatibleHandler extends BaseProvider implements SingleCompletionHandler { + protected options: ApiHandlerOptions + protected config: OpenAICompatibleConfig + protected provider: ReturnType + + constructor(options: ApiHandlerOptions, config: OpenAICompatibleConfig) { + super() + this.options = options + this.config = config + + // Create the OpenAI-compatible provider using AI SDK + this.provider = createOpenAICompatible({ + name: config.providerName, + baseURL: config.baseURL, + apiKey: config.apiKey, + headers: { + ...DEFAULT_HEADERS, + ...(config.headers || {}), + }, + }) + } + + /** + * Get the language model for the configured model ID. + */ + protected getLanguageModel(): LanguageModel { + return this.provider(this.config.modelId) + } + + /** + * Get the model information. Must be implemented by subclasses. + */ + abstract override getModel(): { id: string; info: ModelInfo; maxTokens?: number; temperature?: number } + + /** + * Process usage metrics from the AI SDK response. + * Can be overridden by subclasses to handle provider-specific usage formats. + */ + protected processUsageMetrics(usage: { + inputTokens?: number + outputTokens?: number + details?: { + cachedInputTokens?: number + reasoningTokens?: number + } + raw?: Record + }): ApiStreamUsageChunk { + return { + type: "usage", + inputTokens: usage.inputTokens || 0, + outputTokens: usage.outputTokens || 0, + cacheReadTokens: usage.details?.cachedInputTokens, + reasoningTokens: usage.details?.reasoningTokens, + } + } + + /** + * Map OpenAI tool_choice to AI SDK toolChoice format. + */ + protected mapToolChoice( + toolChoice: OpenAI.Chat.ChatCompletionCreateParams["tool_choice"], + ): "auto" | "none" | "required" | { type: "tool"; toolName: string } | undefined { + if (!toolChoice) { + return undefined + } + + // Handle string values + if (typeof toolChoice === "string") { + switch (toolChoice) { + case "auto": + return "auto" + case "none": + return "none" + case "required": + return "required" + default: + return "auto" + } + } + + // Handle object values (OpenAI ChatCompletionNamedToolChoice format) + if (typeof toolChoice === "object" && "type" in toolChoice) { + if (toolChoice.type === "function" && "function" in toolChoice && toolChoice.function?.name) { + return { type: "tool", toolName: toolChoice.function.name } + } + } + + return undefined + } + + /** + * Get the max tokens parameter to include in the request. + */ + protected getMaxOutputTokens(): number | undefined { + const modelInfo = this.config.modelInfo + const maxTokens = this.config.modelMaxTokens || modelInfo.maxTokens + + return maxTokens ?? undefined + } + + /** + * Create a message stream using the AI SDK. + */ + override async *createMessage( + systemPrompt: string, + messages: Anthropic.Messages.MessageParam[], + metadata?: ApiHandlerCreateMessageMetadata, + ): ApiStream { + const model = this.getModel() + const languageModel = this.getLanguageModel() + + // Convert messages to AI SDK format + const aiSdkMessages = convertToAiSdkMessages(messages) + + // Convert tools to OpenAI format first, then to AI SDK format + const openAiTools = this.convertToolsForOpenAI(metadata?.tools) + const aiSdkTools = convertToolsForAiSdk(openAiTools) as ToolSet | undefined + + // Build the request options + const requestOptions: Parameters[0] = { + model: languageModel, + system: systemPrompt, + messages: aiSdkMessages, + temperature: model.temperature ?? this.config.temperature ?? 0, + maxOutputTokens: this.getMaxOutputTokens(), + tools: aiSdkTools, + toolChoice: this.mapToolChoice(metadata?.tool_choice), + } + + // Use streamText for streaming responses + const result = streamText(requestOptions) + + // Process the full stream to get all events + for await (const part of result.fullStream) { + // Use the processAiSdkStreamPart utility to convert stream parts + for (const chunk of processAiSdkStreamPart(part)) { + yield chunk + } + } + + // Yield usage metrics at the end + const usage = await result.usage + if (usage) { + yield this.processUsageMetrics(usage) + } + } + + /** + * Complete a prompt using the AI SDK generateText. + */ + async completePrompt(prompt: string): Promise { + const languageModel = this.getLanguageModel() + + const { text } = await generateText({ + model: languageModel, + prompt, + maxOutputTokens: this.getMaxOutputTokens(), + temperature: this.config.temperature ?? 0, + }) + + return text + } +} diff --git a/src/package.json b/src/package.json index bf4a009a946..736ffdea131 100644 --- a/src/package.json +++ b/src/package.json @@ -532,6 +532,7 @@ "zod": "3.25.76" }, "devDependencies": { + "@ai-sdk/openai-compatible": "^1.0.0", "@openrouter/ai-sdk-provider": "^2.0.4", "@roo-code/build": "workspace:^", "@roo-code/config-eslint": "workspace:^", From d64a3927ef3bb8df01610d409f8c4f6f444c0bd7 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Wed, 28 Jan 2026 19:28:36 -0500 Subject: [PATCH 2/3] feat: add kimi-k2.5 model to Moonshot provider - Add new kimi-k2.5 model with 262k context window - Input price: /bin/sh.60/1M tokens (cache miss), /bin/sh.10/1M tokens (cache hit) - Output price: .00/1M tokens - Supports prompt caching --- packages/types/src/providers/moonshot.ts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/packages/types/src/providers/moonshot.ts b/packages/types/src/providers/moonshot.ts index 7ddafab76b7..14d8d37a5f4 100644 --- a/packages/types/src/providers/moonshot.ts +++ b/packages/types/src/providers/moonshot.ts @@ -53,6 +53,17 @@ export const moonshotModels = { defaultTemperature: 1.0, description: `The kimi-k2-thinking model is a general-purpose agentic reasoning model developed by Moonshot AI. Thanks to its strength in deep reasoning and multi-turn tool use, it can solve even the hardest problems.`, }, + "kimi-k2.5": { + maxTokens: 16_384, + contextWindow: 262_144, + supportsImages: false, + supportsPromptCache: true, + inputPrice: 0.6, // $0.60 per million tokens (cache miss) + outputPrice: 3.0, // $3.00 per million tokens + cacheReadsPrice: 0.1, // $0.10 per million tokens (cache hit) + description: + "Kimi K2.5 is the latest generation of Moonshot AI's Kimi series, featuring improved reasoning capabilities and enhanced performance across diverse tasks.", + }, } as const satisfies Record export const MOONSHOT_DEFAULT_TEMPERATURE = 0.6 From 4782b7bcb1ccd3774d2b51bb788e6368f5343385 Mon Sep 17 00:00:00 2001 From: daniel-lxs Date: Wed, 28 Jan 2026 19:33:37 -0500 Subject: [PATCH 3/3] feat(types): enforce temperature 1.0 for kimi-k2.5 model --- packages/types/src/providers/moonshot.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/packages/types/src/providers/moonshot.ts b/packages/types/src/providers/moonshot.ts index 14d8d37a5f4..a825475644b 100644 --- a/packages/types/src/providers/moonshot.ts +++ b/packages/types/src/providers/moonshot.ts @@ -61,6 +61,8 @@ export const moonshotModels = { inputPrice: 0.6, // $0.60 per million tokens (cache miss) outputPrice: 3.0, // $3.00 per million tokens cacheReadsPrice: 0.1, // $0.10 per million tokens (cache hit) + supportsTemperature: true, + defaultTemperature: 1.0, description: "Kimi K2.5 is the latest generation of Moonshot AI's Kimi series, featuring improved reasoning capabilities and enhanced performance across diverse tasks.", },