RooCodeInc · roomote · Jan 29, 2026 · Jan 29, 2026
@@ -14,6 +14,7 @@ import { BaseProvider } from "./base-provider"
 import { handleOpenAIError } from "./utils/openai-error-handler"
 import { calculateApiCostOpenAI } from "../../shared/cost"
 import { getApiRequestTimeout } from "./utils/timeout-config"
+import { getGlmModelOptions } from "./utils/model-detection"
 
 type BaseOpenAiCompatibleProviderOptions<ModelName extends string> = ApiHandlerOptions & {
 	providerName: string
@@ -75,6 +76,12 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 	) {
 		const { id: model, info } = this.getModel()
 
+		// Get model-specific options for GLM models (applies Z.ai optimizations)
+		// This allows third-party GLM models via OpenAI-compatible endpoints to benefit
+		// from the same optimizations used by Z.ai
+		console.log(`[${this.providerName}] Using model ID: "${model}"`)
+		const glmOptions = getGlmModelOptions(model)
+
 		// Centralized cap: clamp to 20% of the context window (unless provider-specific exceptions apply)
 		const max_tokens =
 			getModelMaxOutputTokens({
@@ -86,16 +93,30 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 
 		const temperature = this.options.modelTemperature ?? info.defaultTemperature ?? this.defaultTemperature
 
+		// For GLM models, disable parallel_tool_calls by default as they may not support it
+		// Users can still explicitly enable it via metadata if their model supports it
+		const parallelToolCalls = glmOptions.disableParallelToolCalls
+			? (metadata?.parallelToolCalls ?? false)
+			: (metadata?.parallelToolCalls ?? true)
+
+		console.log(`[${this.providerName}] parallel_tool_calls set to: ${parallelToolCalls}`)
+
+		// Convert messages with GLM-specific handling when applicable
+		// mergeToolResultText prevents GLM models from dropping reasoning_content
+		const convertedMessages = convertToOpenAiMessages(messages, {
+			mergeToolResultText: glmOptions.mergeToolResultText,
+		})
+
 		const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 			model,
 			max_tokens,
 			temperature,
-			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
+			messages: [{ role: "system", content: systemPrompt }, ...convertedMessages],
 			stream: true,
 			stream_options: { include_usage: true },
 			tools: this.convertToolsForOpenAI(metadata?.tools),
 			tool_choice: metadata?.tool_choice,
-			parallel_tool_calls: metadata?.parallelToolCalls ?? true,
+			parallel_tool_calls: parallelToolCalls,
 		}
 
 		// Add thinking parameter if reasoning is enabled and model supports it

@@ -17,6 +17,7 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
 import { getModelsFromCache } from "./fetchers/modelCache"
 import { getApiRequestTimeout } from "./utils/timeout-config"
 import { handleOpenAIError } from "./utils/openai-error-handler"
+import { getGlmModelOptions } from "./utils/model-detection"
 
 export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
@@ -42,9 +43,16 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
+		// Get model-specific options for GLM models (applies Z.ai optimizations)
+		const modelId = this.getModel().id
+		console.log(`[LM Studio] Using model ID: "${modelId}"`)
+		const glmOptions = getGlmModelOptions(modelId)
+
+		// Convert messages with GLM-specific handling when applicable
+		// mergeToolResultText prevents GLM models from dropping reasoning_content
 		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
 			{ role: "system", content: systemPrompt },
-			...convertToOpenAiMessages(messages),
+			...convertToOpenAiMessages(messages, { mergeToolResultText: glmOptions.mergeToolResultText }),
 		]
 
 		// -------------------------
@@ -83,14 +91,22 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 		let assistantText = ""
 
 		try {
+			// For GLM models, disable parallel_tool_calls by default as they may not support it
+			// Users can still explicitly enable it via metadata if their model supports it
+			const parallelToolCalls = glmOptions.disableParallelToolCalls
+				? (metadata?.parallelToolCalls ?? false)
+				: (metadata?.parallelToolCalls ?? true)
+
+			console.log(`[LM Studio] parallel_tool_calls set to: ${parallelToolCalls}`)
+
 			const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming & { draft_model?: string } = {
-				model: this.getModel().id,
+				model: modelId,
 				messages: openAiMessages,
 				temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE,
 				stream: true,
 				tools: this.convertToolsForOpenAI(metadata?.tools),
 				tool_choice: metadata?.tool_choice,
-				parallel_tool_calls: metadata?.parallelToolCalls ?? true,
+				parallel_tool_calls: parallelToolCalls,
 			}
 
 			if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) {

@@ -0,0 +1,113 @@
+import { isGlmModel, getGlmModelOptions, GlmModelOptions } from "../model-detection"
+
+describe("isGlmModel", () => {
+	describe("GLM model detection", () => {
+		it("should detect official GLM model names with dash", () => {
+			expect(isGlmModel("glm-4")).toBe(true)
+			expect(isGlmModel("glm-4.5")).toBe(true)
+			expect(isGlmModel("glm-4.7")).toBe(true)
+			expect(isGlmModel("glm-4-plus")).toBe(true)
+		})
+
+		it("should detect GLM models with uppercase", () => {
+			expect(isGlmModel("GLM-4")).toBe(true)
+			expect(isGlmModel("GLM-4.5")).toBe(true)
+			expect(isGlmModel("GLM-4.7")).toBe(true)
+		})
+
+		it("should detect compact GLM model names without dash", () => {
+			expect(isGlmModel("glm4")).toBe(true)
+			expect(isGlmModel("GLM4")).toBe(true)
+			expect(isGlmModel("glm4-9b")).toBe(true)
+		})
+
+		it("should detect LM Studio GGUF model names", () => {
+			expect(isGlmModel("GLM4-9B-Chat-GGUF")).toBe(true)
+			expect(isGlmModel("glm4-9b-chat-gguf")).toBe(true)
+		})
+
+		it("should detect ChatGLM models", () => {
+			expect(isGlmModel("chatglm")).toBe(true)
+			expect(isGlmModel("ChatGLM")).toBe(true)
+			expect(isGlmModel("chatglm-6b")).toBe(true)
+			expect(isGlmModel("chatglm3-6b")).toBe(true)
+		})
+	})
+
+	describe("non-GLM model detection", () => {
+		it("should not detect OpenAI models as GLM", () => {
+			expect(isGlmModel("gpt-4")).toBe(false)
+			expect(isGlmModel("gpt-4-turbo")).toBe(false)
+			expect(isGlmModel("gpt-3.5-turbo")).toBe(false)
+			expect(isGlmModel("o1-preview")).toBe(false)
+		})
+
+		it("should not detect Anthropic models as GLM", () => {
+			expect(isGlmModel("claude-3")).toBe(false)
+			expect(isGlmModel("claude-3-sonnet")).toBe(false)
+			expect(isGlmModel("claude-3-opus")).toBe(false)
+		})
+
+		it("should not detect DeepSeek models as GLM", () => {
+			expect(isGlmModel("deepseek-coder")).toBe(false)
+			expect(isGlmModel("deepseek-reasoner")).toBe(false)
+		})
+
+		it("should not detect Gemini models as GLM", () => {
+			expect(isGlmModel("gemini-pro")).toBe(false)
+			expect(isGlmModel("gemini-2-flash")).toBe(false)
+		})
+
+		it("should not detect Qwen models as GLM", () => {
+			expect(isGlmModel("qwen-7b")).toBe(false)
+			expect(isGlmModel("qwen2-7b")).toBe(false)
+		})
+
+		it("should not detect Llama models as GLM", () => {
+			expect(isGlmModel("llama-2-7b")).toBe(false)
+			expect(isGlmModel("llama-3-8b")).toBe(false)
+			expect(isGlmModel("codellama")).toBe(false)
+		})
+	})
+
+	describe("edge cases", () => {
+		it("should handle empty string", () => {
+			expect(isGlmModel("")).toBe(false)
+		})
+
+		it("should handle undefined-like values", () => {
+			expect(isGlmModel(null as unknown as string)).toBe(false)
+			expect(isGlmModel(undefined as unknown as string)).toBe(false)
+		})
+
+		it("should not match 'glm' in the middle of unrelated model names", () => {
+			// This tests that we're not accidentally matching "glm" as a substring
+			// in unrelated contexts
+			expect(isGlmModel("myglmodel")).toBe(false)
+			expect(isGlmModel("some-glm-inspired-model")).toBe(false)
+		})
+	})
+})
+
+describe("getGlmModelOptions", () => {
+	it("should return GLM-optimized options for GLM models", () => {
+		const options = getGlmModelOptions("glm-4.5")
+
+		expect(options.mergeToolResultText).toBe(true)
+		expect(options.disableParallelToolCalls).toBe(true)
+	})
+
+	it("should return default options for non-GLM models", () => {
+		const options = getGlmModelOptions("gpt-4")
+
+		expect(options.mergeToolResultText).toBe(false)
+		expect(options.disableParallelToolCalls).toBe(false)
+	})
+
+	it("should return the correct type", () => {
+		const options: GlmModelOptions = getGlmModelOptions("glm-4")
+
+		expect(options).toHaveProperty("mergeToolResultText")
+		expect(options).toHaveProperty("disableParallelToolCalls")
+	})
+})
@@ -0,0 +1,86 @@
+/**
+ * Utility functions for detecting model types based on model ID patterns.
+ * These functions help providers apply model-specific handling for third-party
+ * models running on LM Studio, OpenAI-compatible endpoints, etc.
+ */
+
+/**
+ * Detects if a model ID represents a GLM (General Language Model) from Zhipu AI.
+ *
+ * GLM models (like GLM-4, GLM-4.5, GLM-4.7) have specific requirements:
+ * - They benefit from `mergeToolResultText: true` to avoid dropping reasoning_content
+ * - They may not support `parallel_tool_calls` parameter
+ *
+ * This detection allows LM Studio and OpenAI-compatible providers to apply
+ * the same optimizations that Z.ai uses for GLM models.
+ *
+ * @param modelId - The model identifier (e.g., "glm-4.5", "GLM4-9B-Chat-GGUF")
+ * @returns true if the model is a GLM model, false otherwise
+ *
+ * @example
+ * ```typescript
+ * isGlmModel("glm-4.5") // true
+ * isGlmModel("GLM4-9B-Chat-GGUF") // true
+ * isGlmModel("glm-4.7") // true
+ * isGlmModel("gpt-4") // false
+ * isGlmModel("claude-3") // false
+ * ```
+ */
+export function isGlmModel(modelId: string): boolean {
+	if (!modelId) {
+		return false
+	}
+
+	// Case-insensitive check for "glm" prefix or pattern
+	// Matches: glm-4, glm-4.5, glm-4.7, GLM4-9B-Chat, glm4, etc.
+	const lowerModelId = modelId.toLowerCase()
+
+	// Check for common GLM model patterns:
+	// - "glm-" prefix (official naming: glm-4, glm-4.5, glm-4.7)
+	// - "glm4" (compact naming without dash)
+	// - "chatglm" (older ChatGLM models)
+	return lowerModelId.startsWith("glm-") || lowerModelId.startsWith("glm4") || lowerModelId.includes("chatglm")
+}
+
+/**
+ * Configuration options for GLM model-specific handling.
+ * These options are derived from Z.ai's optimizations for GLM models.
+ */
+export interface GlmModelOptions {
+	/**
+	 * Whether to merge text content after tool_results into the last tool message.
+	 * This prevents GLM models from dropping reasoning_content when they see
+	 * a user message after tool results.
+	 */
+	mergeToolResultText: boolean
+
+	/**
+	 * Whether to disable parallel_tool_calls for this model.
+	 * GLM models may not support this parameter and can behave unexpectedly
+	 * when it's enabled.
+	 */
+	disableParallelToolCalls: boolean
+}
+
+/**
+ * Returns the recommended configuration options for a GLM model.
+ * Non-GLM models will receive default options that maintain existing behavior.
+ *
+ * @param modelId - The model identifier
+ * @returns Configuration options for the model
+ */
+export function getGlmModelOptions(modelId: string): GlmModelOptions {
+	const isGlm = isGlmModel(modelId)
+
+	// Log GLM model detection result for diagnostics
+	if (isGlm) {
+		console.log(`[GLM Detection] ✓ GLM model detected: "${modelId}"`)
+		console.log(`[GLM Detection]   - mergeToolResultText: true`)
+		console.log(`[GLM Detection]   - disableParallelToolCalls: true`)
+	}
+
+	return {
+		mergeToolResultText: isGlm,
+		disableParallelToolCalls: isGlm,
+	}
+}