RooCodeInc · roomote · Jan 30, 2026
@@ -7,13 +7,15 @@ import { type ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/ap
 import { TagMatcher } from "../../utils/tag-matcher"
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { convertToOpenAiMessages } from "../transform/openai-format"
+import { convertToZAiFormat } from "../transform/zai-format"
 
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
 import { DEFAULT_HEADERS } from "./constants"
 import { BaseProvider } from "./base-provider"
 import { handleOpenAIError } from "./utils/openai-error-handler"
 import { calculateApiCostOpenAI } from "../../shared/cost"
 import { getApiRequestTimeout } from "./utils/timeout-config"
+import { detectGlmModel, logGlmDetection, type GlmModelConfig } from "./utils/glm-model-detection"
 
 type BaseOpenAiCompatibleProviderOptions<ModelName extends string> = ApiHandlerOptions & {
 	providerName: string
@@ -36,6 +38,7 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 	protected readonly options: ApiHandlerOptions
 
 	protected client: OpenAI
+	protected glmConfig: GlmModelConfig | null = null
 
 	constructor({
 		providerName,
@@ -65,6 +68,13 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 			defaultHeaders: DEFAULT_HEADERS,
 			timeout: getApiRequestTimeout(),
 		})
+
+		// Detect GLM model on construction if model ID is available
+		const modelId = this.options.apiModelId || ""
+		if (modelId) {
+			this.glmConfig = detectGlmModel(modelId)
+			logGlmDetection(this.providerName, modelId, this.glmConfig)
+		}
 	}
 
 	protected createStream(
@@ -75,6 +85,12 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 	) {
 		const { id: model, info } = this.getModel()
 
+		// Re-detect GLM model if not already done or if model ID changed
+		if (!this.glmConfig || this.glmConfig.originalModelId !== model) {
+			this.glmConfig = detectGlmModel(model)
+			logGlmDetection(this.providerName, model, this.glmConfig)
+		}
+
 		// Centralized cap: clamp to 20% of the context window (unless provider-specific exceptions apply)
 		const max_tokens =
 			getModelMaxOutputTokens({
@@ -86,23 +102,48 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 
 		const temperature = this.options.modelTemperature ?? info.defaultTemperature ?? this.defaultTemperature
 
+		// Convert messages based on whether this is a GLM model
+		// GLM models benefit from mergeToolResultText to prevent reasoning_content loss
+		const convertedMessages = this.glmConfig.isGlmModel
+			? convertToZAiFormat(messages, { mergeToolResultText: this.glmConfig.mergeToolResultText })
+			: convertToOpenAiMessages(messages)
+
+		// Determine parallel_tool_calls setting
+		// Disable for GLM models as they may not support it properly
+		let parallelToolCalls: boolean
+		if (this.glmConfig.isGlmModel && this.glmConfig.disableParallelToolCalls) {
+			parallelToolCalls = false
+			console.log(`[${this.providerName}] parallel_tool_calls disabled for GLM model`)
+		} else {
+			parallelToolCalls = metadata?.parallelToolCalls ?? true
+		}
+
 		const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 			model,
 			max_tokens,
 			temperature,
-			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
+			messages: [{ role: "system", content: systemPrompt }, ...convertedMessages],
 			stream: true,
 			stream_options: { include_usage: true },
 			tools: this.convertToolsForOpenAI(metadata?.tools),
 			tool_choice: metadata?.tool_choice,
-			parallel_tool_calls: metadata?.parallelToolCalls ?? true,
+			parallel_tool_calls: parallelToolCalls,
 		}
 
 		// Add thinking parameter if reasoning is enabled and model supports it
 		if (this.options.enableReasoningEffort && info.supportsReasoningBinary) {
 			;(params as any).thinking = { type: "enabled" }
 		}
 
+		// For GLM-4.7 models with thinking support, add thinking parameter
+		if (this.glmConfig.isGlmModel && this.glmConfig.supportsThinking) {
+			const useReasoning = this.options.enableReasoningEffort !== false // Default to enabled for GLM-4.7
+			;(params as any).thinking = useReasoning ? { type: "enabled" } : { type: "disabled" }
+			console.log(
+				`[${this.providerName}] GLM thinking mode: ${useReasoning ? "enabled" : "disabled"} for ${this.glmConfig.displayName}`,
+			)
+		}
+
 		try {
 			return this.client.chat.completions.create(params, requestOptions)
 		} catch (error) {
@@ -222,6 +263,12 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 	async completePrompt(prompt: string): Promise<string> {
 		const { id: modelId, info: modelInfo } = this.getModel()
 
+		// Re-detect GLM model if not already done or if model ID changed
+		if (!this.glmConfig || this.glmConfig.originalModelId !== modelId) {
+			this.glmConfig = detectGlmModel(modelId)
+			logGlmDetection(this.providerName, modelId, this.glmConfig)
+		}
+
 		const params: OpenAI.Chat.Completions.ChatCompletionCreateParams = {
 			model: modelId,
 			messages: [{ role: "user", content: prompt }],
@@ -232,6 +279,12 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 			;(params as any).thinking = { type: "enabled" }
 		}
 
+		// For GLM-4.7 models with thinking support, add thinking parameter
+		if (this.glmConfig.isGlmModel && this.glmConfig.supportsThinking) {
+			const useReasoning = this.options.enableReasoningEffort !== false
+			;(params as any).thinking = useReasoning ? { type: "enabled" } : { type: "disabled" }
+		}
+
 		try {
 			const response = await this.client.chat.completions.create(params)
 

@@ -10,18 +10,21 @@ import { NativeToolCallParser } from "../../core/assistant-message/NativeToolCal
 import { TagMatcher } from "../../utils/tag-matcher"
 
 import { convertToOpenAiMessages } from "../transform/openai-format"
+import { convertToZAiFormat } from "../transform/zai-format"
 import { ApiStream } from "../transform/stream"
 
 import { BaseProvider } from "./base-provider"
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
 import { getModelsFromCache } from "./fetchers/modelCache"
 import { getApiRequestTimeout } from "./utils/timeout-config"
 import { handleOpenAIError } from "./utils/openai-error-handler"
+import { detectGlmModel, logGlmDetection, type GlmModelConfig } from "./utils/glm-model-detection"
 
 export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
 	private readonly providerName = "LM Studio"
+	private glmConfig: GlmModelConfig | null = null
 
 	constructor(options: ApiHandlerOptions) {
 		super()
@@ -35,16 +38,37 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 			apiKey: apiKey,
 			timeout: getApiRequestTimeout(),
 		})
+
+		// Detect GLM model on construction if model ID is available
+		const modelId = this.options.lmStudioModelId || ""
+		if (modelId) {
+			this.glmConfig = detectGlmModel(modelId)
+			logGlmDetection(this.providerName, modelId, this.glmConfig)
+		}
 	}
 
 	override async *createMessage(
 		systemPrompt: string,
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
+		const modelId = this.getModel().id
+
+		// Re-detect GLM model if not already done or if model ID changed
+		if (!this.glmConfig || this.glmConfig.originalModelId !== modelId) {
+			this.glmConfig = detectGlmModel(modelId)
+			logGlmDetection(this.providerName, modelId, this.glmConfig)
+		}
+
+		// Convert messages based on whether this is a GLM model
+		// GLM models benefit from mergeToolResultText to prevent reasoning_content loss
+		const convertedMessages = this.glmConfig.isGlmModel
+			? convertToZAiFormat(messages, { mergeToolResultText: this.glmConfig.mergeToolResultText })
+			: convertToOpenAiMessages(messages)
+
 		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
 			{ role: "system", content: systemPrompt },
-			...convertToOpenAiMessages(messages),
+			...convertedMessages,
 		]
 
 		// -------------------------
@@ -83,14 +107,24 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 		let assistantText = ""
 
 		try {
+			// Determine parallel_tool_calls setting
+			// Disable for GLM models as they may not support it properly
+			let parallelToolCalls: boolean
+			if (this.glmConfig?.isGlmModel && this.glmConfig.disableParallelToolCalls) {
+				parallelToolCalls = false
+				console.log(`[${this.providerName}] parallel_tool_calls disabled for GLM model`)
+			} else {
+				parallelToolCalls = metadata?.parallelToolCalls ?? true
+			}
+
 			const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming & { draft_model?: string } = {
-				model: this.getModel().id,
+				model: modelId,
 				messages: openAiMessages,
 				temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE,
 				stream: true,
 				tools: this.convertToolsForOpenAI(metadata?.tools),
 				tool_choice: metadata?.tool_choice,
-				parallel_tool_calls: metadata?.parallelToolCalls ?? true,
+				parallel_tool_calls: parallelToolCalls,
 			}
 
 			if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) {
@@ -124,6 +158,14 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 					}
 				}
 
+				// Handle reasoning_content for GLM models with thinking support
+				if (delta && this.glmConfig?.supportsThinking) {
+					const deltaAny = delta as any
+					if (deltaAny.reasoning_content) {
+						yield { type: "reasoning", text: deltaAny.reasoning_content }
+					}
+				}
+
 				// Handle tool calls in stream - emit partial chunks for NativeToolCallParser
 				if (delta?.tool_calls) {
 					for (const toolCall of delta.tool_calls) {
@@ -186,10 +228,22 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 	}
 
 	async completePrompt(prompt: string): Promise<string> {
+		const modelId = this.getModel().id
+
+		// Re-detect GLM model if not already done or if model ID changed
+		if (!this.glmConfig || this.glmConfig.originalModelId !== modelId) {
+			this.glmConfig = detectGlmModel(modelId)
+			logGlmDetection(this.providerName, modelId, this.glmConfig)
+		}
+
 		try {
+			// Determine parallel_tool_calls setting for GLM models
+			const parallelToolCalls =
+				this.glmConfig?.isGlmModel && this.glmConfig.disableParallelToolCalls ? false : true
+
 			// Create params object with optional draft model
 			const params: any = {
-				model: this.getModel().id,
+				model: modelId,
 				messages: [{ role: "user", content: prompt }],
 				temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE,
 				stream: false,