diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts index fc3d769ae2a..5720837c285 100644 --- a/src/api/providers/base-openai-compatible-provider.ts +++ b/src/api/providers/base-openai-compatible-provider.ts @@ -3,7 +3,7 @@ import OpenAI from "openai" import type { ModelInfo } from "@roo-code/types" -import { type ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api" +import { type ApiHandlerOptions, getModelMaxOutputTokens, shouldUseReasoningEffort } from "../../shared/api" import { TagMatcher } from "../../utils/tag-matcher" import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" @@ -14,6 +14,7 @@ import { BaseProvider } from "./base-provider" import { handleOpenAIError } from "./utils/openai-error-handler" import { calculateApiCostOpenAI } from "../../shared/cost" import { getApiRequestTimeout } from "./utils/timeout-config" +import { getGlmModelOptions } from "./utils/glm-model-detection" type BaseOpenAiCompatibleProviderOptions = ApiHandlerOptions & { providerName: string @@ -23,6 +24,11 @@ type BaseOpenAiCompatibleProviderOptions = ApiHandlerO defaultTemperature?: number } +// Extended chat completion params to support thinking mode for GLM-4.7+ +type ChatCompletionParamsWithThinking = OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming & { + thinking?: { type: "enabled" | "disabled" } +} + export abstract class BaseOpenAiCompatibleProvider extends BaseProvider implements SingleCompletionHandler @@ -75,6 +81,9 @@ export abstract class BaseOpenAiCompatibleProvider ) { const { id: model, info } = this.getModel() + // Check if this is a GLM model and get recommended options + const glmOptions = getGlmModelOptions(model) + // Centralized cap: clamp to 20% of the context window (unless provider-specific exceptions apply) const max_tokens = getModelMaxOutputTokens({ @@ -86,21 +95,35 @@ export abstract class BaseOpenAiCompatibleProvider const temperature = this.options.modelTemperature ?? info.defaultTemperature ?? this.defaultTemperature - const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { + // For GLM models, disable parallel_tool_calls as they may not support it + const parallelToolCalls = glmOptions?.disableParallelToolCalls ? false : (metadata?.parallelToolCalls ?? true) + + const params: ChatCompletionParamsWithThinking = { model, max_tokens, temperature, - messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)], + messages: [ + { role: "system", content: systemPrompt }, + ...convertToOpenAiMessages(messages, { + mergeToolResultText: glmOptions?.mergeToolResultText ?? false, + }), + ], stream: true, stream_options: { include_usage: true }, tools: this.convertToolsForOpenAI(metadata?.tools), tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + parallel_tool_calls: parallelToolCalls, } // Add thinking parameter if reasoning is enabled and model supports it if (this.options.enableReasoningEffort && info.supportsReasoningBinary) { - ;(params as any).thinking = { type: "enabled" } + params.thinking = { type: "enabled" } + } + + // For GLM-4.7+ models, add thinking mode support similar to Z.ai + if (glmOptions?.supportsThinking) { + const useReasoning = shouldUseReasoningEffort({ model: info, settings: this.options }) + params.thinking = useReasoning ? { type: "enabled" } : { type: "disabled" } } try { diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts index a771394c535..9ba354c0c65 100644 --- a/src/api/providers/lm-studio.ts +++ b/src/api/providers/lm-studio.ts @@ -4,7 +4,7 @@ import axios from "axios" import { type ModelInfo, openAiModelInfoSaneDefaults, LMSTUDIO_DEFAULT_TEMPERATURE } from "@roo-code/types" -import type { ApiHandlerOptions } from "../../shared/api" +import { type ApiHandlerOptions, shouldUseReasoningEffort } from "../../shared/api" import { NativeToolCallParser } from "../../core/assistant-message/NativeToolCallParser" import { TagMatcher } from "../../utils/tag-matcher" @@ -17,6 +17,13 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". import { getModelsFromCache } from "./fetchers/modelCache" import { getApiRequestTimeout } from "./utils/timeout-config" import { handleOpenAIError } from "./utils/openai-error-handler" +import { getGlmModelOptions } from "./utils/glm-model-detection" + +// Extended chat completion params to support thinking mode for GLM-4.7+ +type ChatCompletionParamsWithThinking = OpenAI.Chat.ChatCompletionCreateParamsStreaming & { + thinking?: { type: "enabled" | "disabled" } + draft_model?: string +} export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions @@ -42,9 +49,16 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { + const { id: modelId, info: modelInfo } = this.getModel() + + // Check if this is a GLM model and get recommended options + const glmOptions = getGlmModelOptions(modelId) + const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ { role: "system", content: systemPrompt }, - ...convertToOpenAiMessages(messages), + ...convertToOpenAiMessages(messages, { + mergeToolResultText: glmOptions?.mergeToolResultText ?? false, + }), ] // ------------------------- @@ -83,20 +97,31 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan let assistantText = "" try { - const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming & { draft_model?: string } = { - model: this.getModel().id, + // For GLM models, disable parallel_tool_calls as they may not support it + const parallelToolCalls = glmOptions?.disableParallelToolCalls + ? false + : (metadata?.parallelToolCalls ?? true) + + const params: ChatCompletionParamsWithThinking = { + model: modelId, messages: openAiMessages, temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE, stream: true, tools: this.convertToolsForOpenAI(metadata?.tools), tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + parallel_tool_calls: parallelToolCalls, } if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) { params.draft_model = this.options.lmStudioDraftModelId } + // For GLM-4.7+ models, add thinking mode support similar to Z.ai + if (glmOptions?.supportsThinking) { + const useReasoning = shouldUseReasoningEffort({ model: modelInfo, settings: this.options }) + params.thinking = useReasoning ? { type: "enabled" } : { type: "disabled" } + } + let results try { results = await this.client.chat.completions.create(params) diff --git a/src/api/providers/utils/__tests__/glm-model-detection.spec.ts b/src/api/providers/utils/__tests__/glm-model-detection.spec.ts new file mode 100644 index 00000000000..a249bf04d14 --- /dev/null +++ b/src/api/providers/utils/__tests__/glm-model-detection.spec.ts @@ -0,0 +1,296 @@ +import { isGlmModel, isGlm47Plus, getGlmModelOptions } from "../glm-model-detection" + +describe("GLM Model Detection", () => { + describe("isGlmModel", () => { + describe("should detect GLM models", () => { + const validGlmModels = [ + // Standard Z.ai format + "glm-4.5", + "glm-4.6", + "glm-4.7", + "glm-4.7-flash", + "glm-4.7-flashx", + "glm-4.5-air", + "glm-4.5v", + // MLX format (from user's report) + "mlx-community/GLM-4.5-4bit", + "mlx-community/GLM-4.5-8bit", + "mlx-community/GLM-4.7-4bit", + // GGUF format (from user's report) + "GLM-4.5-UD-Q8_K_XL-00001-of-00008.gguf", + "GLM-4.5-UD-Q4_K_M.gguf", + "GLM-4.7-UD-Q8_K_XL.gguf", + // HuggingFace format + "THUDM/glm-4-9b-chat", + "THUDM/glm-4v-9b", + "THUDM/glm-4.7-chat", + // ChatGLM variants + "chatglm-6b", + "chatglm2-6b", + "chatglm3-6b", + "ChatGLM-6B", + // Without hyphen + "glm4", + "GLM4", + "glm47", + "GLM4.7", + // Mixed case + "GLM-4.5", + "Glm-4.5", + "Glm-4.7", + ] + + test.each(validGlmModels)('should detect "%s" as a GLM model', (modelId) => { + expect(isGlmModel(modelId)).toBe(true) + }) + }) + + describe("should NOT detect non-GLM models", () => { + const nonGlmModels = [ + // OpenAI models + "gpt-4", + "gpt-4-turbo", + "gpt-3.5-turbo", + "o1-preview", + // Anthropic models + "claude-3-opus", + "claude-3.5-sonnet", + // Llama models + "llama-3.1-70b", + "meta-llama/Llama-3.1-8B-Instruct", + // Mistral models + "mistral-7b", + "mixtral-8x7b", + // DeepSeek models + "deepseek-coder", + "deepseek-reasoner", + // Qwen models + "qwen-2.5-72b", + "qwen-coder", + // Empty/undefined + "", + ] + + test.each(nonGlmModels)('should NOT detect "%s" as a GLM model', (modelId) => { + expect(isGlmModel(modelId)).toBe(false) + }) + }) + + it("should return false for undefined modelId", () => { + expect(isGlmModel(undefined)).toBe(false) + }) + }) + + describe("isGlm47Plus", () => { + describe("should detect GLM-4.7+ models", () => { + const glm47PlusModels = [ + // Standard GLM-4.7 variants (with dot separator) + "glm-4.7", + "GLM-4.7", + "glm-4.7-flash", + "glm-4.7-flashx", + "GLM-4.7-Flash", + // GLM-4.8, 4.9, etc. + "glm-4.8", + "glm-4.9", + "glm-4.10", + "glm-4.99", + // GLM-5.0 and above + "glm-5.0", + "glm-6.0", + "glm-9.0", + // With underscores + "glm_4.7", + "glm_4.8", + // MLX format + "mlx-community/GLM-4.7-4bit", + "mlx-community/GLM-4.8-8bit", + // GGUF format + "GLM-4.7-UD-Q8_K_XL.gguf", + "GLM-4.9-UD-Q4_K_M.gguf", + // HuggingFace format + "THUDM/glm-4.7-chat", + "THUDM/glm-5.0-instruct", + ] + + test.each(glm47PlusModels)('should detect "%s" as GLM-4.7+', (modelId) => { + expect(isGlm47Plus(modelId)).toBe(true) + }) + }) + + describe("should NOT detect GLM-4.6 and lower versions", () => { + const olderGlmModels = [ + // GLM-4.6 and lower + "glm-4.6", + "glm-4.5", + "glm-4.0", + "GLM-4.6", + "glm-4.5-air", + "glm-4.6v", + // Without proper version separator (ambiguous format) + "glm47", + "glm48", + "glm4.7", + "GLM4.7", + // ChatGLM older versions (different model series) + "chatglm-6b", + "chatglm2-6b", + "chatglm3-6b", + // MLX format with older versions + "mlx-community/GLM-4.5-4bit", + "mlx-community/GLM-4.6-8bit", + // GGUF format with older versions + "GLM-4.5-UD-Q8_K_XL.gguf", + "GLM-4.6-UD-Q4_K_M.gguf", + // Non-GLM models + "gpt-4.7", + "llama-4.7", + "", + ] + + test.each(olderGlmModels)('should NOT detect "%s" as GLM-4.7+', (modelId) => { + expect(isGlm47Plus(modelId)).toBe(false) + }) + }) + + it("should return false for undefined modelId", () => { + expect(isGlm47Plus(undefined)).toBe(false) + }) + }) + + describe("getGlmModelOptions", () => { + describe("GLM-4.7+ models", () => { + it("should return options with thinking support for glm-4.7", () => { + const options = getGlmModelOptions("glm-4.7") + expect(options).toEqual({ + mergeToolResultText: true, + disableParallelToolCalls: true, + supportsThinking: true, + }) + }) + + it("should return options with thinking support for GLM-4.7-flash", () => { + const options = getGlmModelOptions("GLM-4.7-flash") + expect(options).toEqual({ + mergeToolResultText: true, + disableParallelToolCalls: true, + supportsThinking: true, + }) + }) + + it("should return options with thinking support for MLX GLM-4.7 models", () => { + const options = getGlmModelOptions("mlx-community/GLM-4.7-4bit") + expect(options).toEqual({ + mergeToolResultText: true, + disableParallelToolCalls: true, + supportsThinking: true, + }) + }) + + it("should return options with thinking support for GGUF GLM-4.7 models", () => { + const options = getGlmModelOptions("GLM-4.7-UD-Q8_K_XL-00001-of-00008.gguf") + expect(options).toEqual({ + mergeToolResultText: true, + disableParallelToolCalls: true, + supportsThinking: true, + }) + }) + + it("should return options with thinking support for GLM-4.8+", () => { + const options = getGlmModelOptions("glm-4.8") + expect(options).toEqual({ + mergeToolResultText: true, + disableParallelToolCalls: true, + supportsThinking: true, + }) + }) + + it("should return options with thinking support for GLM-5.0+", () => { + const options = getGlmModelOptions("glm-5.0") + expect(options).toEqual({ + mergeToolResultText: true, + disableParallelToolCalls: true, + supportsThinking: true, + }) + }) + }) + + describe("GLM-4.6 and lower models", () => { + it("should return options WITHOUT thinking support for glm-4.5", () => { + const options = getGlmModelOptions("glm-4.5") + expect(options).toEqual({ + mergeToolResultText: true, + disableParallelToolCalls: true, + supportsThinking: false, + }) + }) + + it("should return options WITHOUT thinking support for glm-4.6", () => { + const options = getGlmModelOptions("glm-4.6") + expect(options).toEqual({ + mergeToolResultText: true, + disableParallelToolCalls: true, + supportsThinking: false, + }) + }) + + it("should return options WITHOUT thinking support for MLX GLM-4.5 models", () => { + const options = getGlmModelOptions("mlx-community/GLM-4.5-4bit") + expect(options).toEqual({ + mergeToolResultText: true, + disableParallelToolCalls: true, + supportsThinking: false, + }) + }) + + it("should return options WITHOUT thinking support for GGUF GLM-4.5 models", () => { + const options = getGlmModelOptions("GLM-4.5-UD-Q8_K_XL-00001-of-00008.gguf") + expect(options).toEqual({ + mergeToolResultText: true, + disableParallelToolCalls: true, + supportsThinking: false, + }) + }) + + it("should return options WITHOUT thinking support for chatglm (different series)", () => { + const options = getGlmModelOptions("chatglm-6b") + expect(options).toEqual({ + mergeToolResultText: true, + disableParallelToolCalls: true, + supportsThinking: false, + }) + }) + + it("should return options WITHOUT thinking support for ambiguous formats", () => { + const options = getGlmModelOptions("glm47") + expect(options).toEqual({ + mergeToolResultText: true, + disableParallelToolCalls: true, + supportsThinking: false, + }) + }) + }) + + describe("non-GLM models", () => { + it("should return undefined for gpt-4", () => { + expect(getGlmModelOptions("gpt-4")).toBeUndefined() + }) + + it("should return undefined for llama-3.1", () => { + expect(getGlmModelOptions("llama-3.1")).toBeUndefined() + }) + + it("should return undefined for claude-3", () => { + expect(getGlmModelOptions("claude-3")).toBeUndefined() + }) + + it("should return undefined for undefined modelId", () => { + expect(getGlmModelOptions(undefined)).toBeUndefined() + }) + + it("should return undefined for empty string", () => { + expect(getGlmModelOptions("")).toBeUndefined() + }) + }) + }) +}) diff --git a/src/api/providers/utils/glm-model-detection.ts b/src/api/providers/utils/glm-model-detection.ts new file mode 100644 index 00000000000..b8f141a66c9 --- /dev/null +++ b/src/api/providers/utils/glm-model-detection.ts @@ -0,0 +1,122 @@ +/** + * Utility functions for detecting GLM (General Language Model) models. + * + * GLM models from Z.ai/THUDM may require special handling: + * - mergeToolResultText: true - prevents conversation flow disruption + * - parallel_tool_calls: false - some GLM models do not support this parameter + * - For GLM-4.7+: thinking mode support with reasoning effort + */ + +/** + * Pattern to detect GLM models in model IDs. + * + * This regex matches "glm" anywhere in the model ID (case-insensitive), + * including common variations like: + * - "glm-4.5" (standard Z.ai format) + * - "glm4" (without hyphen) + * - "chatglm" (ChatGLM variants) + * - "mlx-community/GLM-4.5-4bit" (MLX format with prefix) + * - "GLM-4.5-UD-Q8_K_XL-00001-of-00008.gguf" (GGUF format) + * - "THUDM/glm-4-9b-chat" (HuggingFace format) + */ +const GLM_MODEL_PATTERN = /glm/i + +/** + * Pattern to detect GLM-4.7 or higher versions. + * Matches variations like: glm-4.7, GLM-4.7, glm_4.7, GLM-4.8, glm-5.0, etc. + * Does NOT match: chatglm-6b, glm47 (without dot separator) + */ +const GLM_4_7_PLUS_PATTERN = /glm[-_]4\.([7-9]|\d{2,})|glm[-_][5-9]\./i + +/** + * Detects if a model ID represents a GLM (General Language Model) model. + * + * @param modelId - The model ID to check (e.g., "glm-4.5", "mlx-community/GLM-4.5-4bit") + * @returns true if the model ID indicates a GLM model, false otherwise + * + * @example + * ```typescript + * isGlmModel("glm-4.5") // true + * isGlmModel("mlx-community/GLM-4.5-4bit") // true + * isGlmModel("GLM-4.5-UD-Q8_K_XL.gguf") // true + * isGlmModel("chatglm-6b") // true + * isGlmModel("gpt-4") // false + * isGlmModel("llama-3.1") // false + * ``` + */ +export function isGlmModel(modelId: string | undefined): boolean { + if (!modelId) { + return false + } + return GLM_MODEL_PATTERN.test(modelId) +} + +/** + * Detects if a model ID represents GLM-4.7 or higher version. + * + * @param modelId - The model ID to check + * @returns true if the model is GLM-4.7 or higher + * + * @example + * ```typescript + * isGlm47Plus("glm-4.7") // true + * isGlm47Plus("GLM-4.7-flash") // true + * isGlm47Plus("mlx-community/GLM-4.7-4bit") // true + * isGlm47Plus("glm-4.8") // true + * isGlm47Plus("glm-5.0") // true + * isGlm47Plus("glm-4.6") // false + * isGlm47Plus("glm-4.5") // false + * ``` + */ +export function isGlm47Plus(modelId: string | undefined): boolean { + if (!modelId) { + return false + } + return GLM_4_7_PLUS_PATTERN.test(modelId) +} + +/** + * Configuration options for GLM models when used via LM Studio + * or OpenAI-compatible endpoints. + */ +export interface GlmModelOptions { + /** + * If true, merge text content after tool_results into the last tool message + * instead of creating a separate user message. This prevents GLM models from + * losing context or reasoning_content after tool results. + */ + mergeToolResultText: boolean + + /** + * If true, disable parallel_tool_calls parameter for GLM models + * since they may not support it. + */ + disableParallelToolCalls: boolean + + /** + * If true, the model supports thinking mode (GLM-4.7+). + * This enables the `thinking` parameter to be sent in API requests. + */ + supportsThinking: boolean +} + +/** + * Returns the recommended configuration options for a GLM model. + * + * @param modelId - The model ID to check + * @returns GlmModelOptions if GLM model detected, undefined otherwise + */ +export function getGlmModelOptions(modelId: string | undefined): GlmModelOptions | undefined { + if (!isGlmModel(modelId)) { + return undefined + } + + // Check if this is GLM-4.7 or higher (supports thinking mode) + const supportsThinking = isGlm47Plus(modelId) + + return { + mergeToolResultText: true, + disableParallelToolCalls: true, + supportsThinking, + } +}