From 96b68450b287b441e9b3957da0eb0e417f97e868 Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Fri, 30 Jan 2026 01:09:28 +0000
Subject: [PATCH 1/3] fix: combine tool call sync fix with GLM model detection
 for issue #11071

This PR combines:
1. PR #11093 fix: NativeToolCallParser processFinishReason hasStarted check
2. GLM model detection utility for LM Studio and OpenAI-compatible providers
3. mergeToolResultText optimization for GLM models
4. Disable parallel_tool_calls for GLM models
5. GLM-4.7 thinking parameter support
6. Diagnostic logging for GLM detection

Closes #11071
---
 .../base-openai-compatible-provider.ts        |  43 ++-
 src/api/providers/lm-studio.ts                |  60 ++++-
 .../__tests__/glm-model-detection.spec.ts     | 254 ++++++++++++++++++
 .../providers/utils/glm-model-detection.ts    | 199 ++++++++++++++
 .../assistant-message/NativeToolCallParser.ts |  27 +-
 .../__tests__/NativeToolCallParser.spec.ts    | 103 +++++++
 6 files changed, 677 insertions(+), 9 deletions(-)
 create mode 100644 src/api/providers/utils/__tests__/glm-model-detection.spec.ts
 create mode 100644 src/api/providers/utils/glm-model-detection.ts
diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts
index fc3d769ae2a..7db09ac9f36 100644
--- a/src/api/providers/base-openai-compatible-provider.ts
+++ b/src/api/providers/base-openai-compatible-provider.ts
@@ -7,6 +7,7 @@ import { type ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/ap
 import { TagMatcher } from "../../utils/tag-matcher"
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { convertToOpenAiMessages } from "../transform/openai-format"
+import { convertToZAiFormat } from "../transform/zai-format"
 
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
 import { DEFAULT_HEADERS } from "./constants"
@@ -14,6 +15,7 @@ import { BaseProvider } from "./base-provider"
 import { handleOpenAIError } from "./utils/openai-error-handler"
 import { calculateApiCostOpenAI } from "../../shared/cost"
 import { getApiRequestTimeout } from "./utils/timeout-config"
+import { detectGlmModel, logGlmDetection, type GlmModelConfig } from "./utils/glm-model-detection"
 
 type BaseOpenAiCompatibleProviderOptions<ModelName extends string> = ApiHandlerOptions & {
 	providerName: string
@@ -36,6 +38,7 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 	protected readonly options: ApiHandlerOptions
 
 	protected client: OpenAI
+	protected glmConfig: GlmModelConfig | null = null
 
 	constructor({
 		providerName,
@@ -65,6 +68,13 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 			defaultHeaders: DEFAULT_HEADERS,
 			timeout: getApiRequestTimeout(),
 		})
+
+		// Detect GLM model on construction if model ID is available
+		const modelId = this.options.apiModelId || ""
+		if (modelId) {
+			this.glmConfig = detectGlmModel(modelId)
+			logGlmDetection(this.providerName, modelId, this.glmConfig)
+		}
 	}
 
 	protected createStream(
@@ -75,6 +85,12 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 	) {
 		const { id: model, info } = this.getModel()
 
+		// Re-detect GLM model if not already done or if model ID changed
+		if (!this.glmConfig || this.glmConfig.originalModelId !== model) {
+			this.glmConfig = detectGlmModel(model)
+			logGlmDetection(this.providerName, model, this.glmConfig)
+		}
+
 		// Centralized cap: clamp to 20% of the context window (unless provider-specific exceptions apply)
 		const max_tokens =
 			getModelMaxOutputTokens({
@@ -86,16 +102,32 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 
 		const temperature = this.options.modelTemperature ?? info.defaultTemperature ?? this.defaultTemperature
 
+		// Convert messages based on whether this is a GLM model
+		// GLM models benefit from mergeToolResultText to prevent reasoning_content loss
+		const convertedMessages = this.glmConfig.isGlmModel
+			? convertToZAiFormat(messages, { mergeToolResultText: this.glmConfig.mergeToolResultText })
+			: convertToOpenAiMessages(messages)
+
+		// Determine parallel_tool_calls setting
+		// Disable for GLM models as they may not support it properly
+		let parallelToolCalls: boolean
+		if (this.glmConfig.isGlmModel && this.glmConfig.disableParallelToolCalls) {
+			parallelToolCalls = false
+			console.log(`[${this.providerName}] parallel_tool_calls disabled for GLM model`)
+		} else {
+			parallelToolCalls = metadata?.parallelToolCalls ?? true
+		}
+
 		const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 			model,
 			max_tokens,
 			temperature,
-			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
+			messages: [{ role: "system", content: systemPrompt }, ...convertedMessages],
 			stream: true,
 			stream_options: { include_usage: true },
 			tools: this.convertToolsForOpenAI(metadata?.tools),
 			tool_choice: metadata?.tool_choice,
-			parallel_tool_calls: metadata?.parallelToolCalls ?? true,
+			parallel_tool_calls: parallelToolCalls,
 		}
 
 		// Add thinking parameter if reasoning is enabled and model supports it
@@ -103,6 +135,13 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 			;(params as any).thinking = { type: "enabled" }
 		}
 
+		// For GLM-4.7 models with thinking support, add thinking parameter
+		if (this.glmConfig.isGlmModel && this.glmConfig.supportsThinking) {
+			const useReasoning = this.options.enableReasoningEffort !== false // Default to enabled for GLM-4.7
+			;(params as any).thinking = useReasoning ? { type: "enabled" } : { type: "disabled" }
+			console.log(`[${this.providerName}] GLM-4.7 thinking mode: ${useReasoning ? "enabled" : "disabled"}`)
+		}
+
 		try {
 			return this.client.chat.completions.create(params, requestOptions)
 		} catch (error) {
diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts
index a771394c535..480919af1aa 100644
--- a/src/api/providers/lm-studio.ts
+++ b/src/api/providers/lm-studio.ts
@@ -10,6 +10,7 @@ import { NativeToolCallParser } from "../../core/assistant-message/NativeToolCal
 import { TagMatcher } from "../../utils/tag-matcher"
 
 import { convertToOpenAiMessages } from "../transform/openai-format"
+import { convertToZAiFormat } from "../transform/zai-format"
 import { ApiStream } from "../transform/stream"
 
 import { BaseProvider } from "./base-provider"
@@ -17,11 +18,13 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
 import { getModelsFromCache } from "./fetchers/modelCache"
 import { getApiRequestTimeout } from "./utils/timeout-config"
 import { handleOpenAIError } from "./utils/openai-error-handler"
+import { detectGlmModel, logGlmDetection, type GlmModelConfig } from "./utils/glm-model-detection"
 
 export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
 	private readonly providerName = "LM Studio"
+	private glmConfig: GlmModelConfig | null = null
 
 	constructor(options: ApiHandlerOptions) {
 		super()
@@ -35,6 +38,13 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 			apiKey: apiKey,
 			timeout: getApiRequestTimeout(),
 		})
+
+		// Detect GLM model on construction if model ID is available
+		const modelId = this.options.lmStudioModelId || ""
+		if (modelId) {
+			this.glmConfig = detectGlmModel(modelId)
+			logGlmDetection(this.providerName, modelId, this.glmConfig)
+		}
 	}
 
 	override async *createMessage(
@@ -42,9 +52,23 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
+		const model = this.getModel()
+
+		// Re-detect GLM model if not already done or if model ID changed
+		if (!this.glmConfig || this.glmConfig.originalModelId !== model.id) {
+			this.glmConfig = detectGlmModel(model.id)
+			logGlmDetection(this.providerName, model.id, this.glmConfig)
+		}
+
+		// Convert messages based on whether this is a GLM model
+		// GLM models benefit from mergeToolResultText to prevent reasoning_content loss
+		const convertedMessages = this.glmConfig.isGlmModel
+			? convertToZAiFormat(messages, { mergeToolResultText: this.glmConfig.mergeToolResultText })
+			: convertToOpenAiMessages(messages)
+
 		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
 			{ role: "system", content: systemPrompt },
-			...convertToOpenAiMessages(messages),
+			...convertedMessages,
 		]
 
 		// -------------------------
@@ -83,20 +107,37 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 		let assistantText = ""
 
 		try {
+			// Determine parallel_tool_calls setting
+			// Disable for GLM models as they may not support it properly
+			let parallelToolCalls: boolean
+			if (this.glmConfig.isGlmModel && this.glmConfig.disableParallelToolCalls) {
+				parallelToolCalls = false
+				console.log(`[${this.providerName}] parallel_tool_calls disabled for GLM model`)
+			} else {
+				parallelToolCalls = metadata?.parallelToolCalls ?? true
+			}
+
 			const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming & { draft_model?: string } = {
-				model: this.getModel().id,
+				model: model.id,
 				messages: openAiMessages,
 				temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE,
 				stream: true,
 				tools: this.convertToolsForOpenAI(metadata?.tools),
 				tool_choice: metadata?.tool_choice,
-				parallel_tool_calls: metadata?.parallelToolCalls ?? true,
+				parallel_tool_calls: parallelToolCalls,
 			}
 
 			if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) {
 				params.draft_model = this.options.lmStudioDraftModelId
 			}
 
+			// For GLM-4.7 models with thinking support, add thinking parameter
+			if (this.glmConfig.isGlmModel && this.glmConfig.supportsThinking) {
+				const useReasoning = this.options.enableReasoningEffort !== false // Default to enabled for GLM-4.7
+				;(params as any).thinking = useReasoning ? { type: "enabled" } : { type: "disabled" }
+				console.log(`[${this.providerName}] GLM-4.7 thinking mode: ${useReasoning ? "enabled" : "disabled"}`)
+			}
+
 			let results
 			try {
 				results = await this.client.chat.completions.create(params)
@@ -124,6 +165,19 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 					}
 				}
 
+				// Handle reasoning_content for GLM models (similar to Z.ai)
+				if (delta) {
+					for (const key of ["reasoning_content", "reasoning"] as const) {
+						if (key in delta) {
+							const reasoning_content = ((delta as any)[key] as string | undefined) || ""
+							if (reasoning_content?.trim()) {
+								yield { type: "reasoning", text: reasoning_content }
+							}
+							break
+						}
+					}
+				}
+
 				// Handle tool calls in stream - emit partial chunks for NativeToolCallParser
 				if (delta?.tool_calls) {
 					for (const toolCall of delta.tool_calls) {
diff --git a/src/api/providers/utils/__tests__/glm-model-detection.spec.ts b/src/api/providers/utils/__tests__/glm-model-detection.spec.ts
new file mode 100644
index 00000000000..78f134e9fac
--- /dev/null
+++ b/src/api/providers/utils/__tests__/glm-model-detection.spec.ts
@@ -0,0 +1,254 @@
+import { detectGlmModel, logGlmDetection, isGlmModel } from "../glm-model-detection"
+
+describe("GLM Model Detection", () => {
+	describe("detectGlmModel", () => {
+		describe("non-GLM models", () => {
+			it("should return isGlmModel=false for non-GLM models", () => {
+				expect(detectGlmModel("gpt-4").isGlmModel).toBe(false)
+				expect(detectGlmModel("claude-3-opus").isGlmModel).toBe(false)
+				expect(detectGlmModel("llama-3.1").isGlmModel).toBe(false)
+				expect(detectGlmModel("qwen-2.5").isGlmModel).toBe(false)
+			})
+
+			it("should NOT enable GLM optimizations for non-GLM models", () => {
+				const config = detectGlmModel("gpt-4")
+				expect(config.mergeToolResultText).toBe(false)
+				expect(config.disableParallelToolCalls).toBe(false)
+			})
+		})
+
+		describe("GLM model detection", () => {
+			it("should detect standard GLM model IDs", () => {
+				expect(detectGlmModel("glm-4.5").isGlmModel).toBe(true)
+				expect(detectGlmModel("glm-4.6").isGlmModel).toBe(true)
+				expect(detectGlmModel("glm-4.7").isGlmModel).toBe(true)
+				expect(detectGlmModel("GLM-4.5").isGlmModel).toBe(true)
+			})
+
+			it("should detect GLM models with various prefixes", () => {
+				expect(detectGlmModel("mlx-community/GLM-4.5-4bit").isGlmModel).toBe(true)
+				expect(detectGlmModel("local/glm-4.7-flash").isGlmModel).toBe(true)
+			})
+
+			it("should detect GGUF GLM models", () => {
+				const result = detectGlmModel("GLM-4.5-UD-Q8_K_XL-00001-of-00008.gguf")
+				expect(result.isGlmModel).toBe(true)
+				expect(result.version).toBe("4.5")
+			})
+
+			it("should detect ChatGLM models", () => {
+				expect(detectGlmModel("chatglm-6b").isGlmModel).toBe(true)
+				expect(detectGlmModel("chatglm3-6b").isGlmModel).toBe(true)
+			})
+		})
+
+		describe("version detection", () => {
+			it("should detect GLM-4.5 version", () => {
+				expect(detectGlmModel("glm-4.5").version).toBe("4.5")
+				expect(detectGlmModel("glm-4-5-flash").version).toBe("4.5")
+				expect(detectGlmModel("accounts/fireworks/models/glm-4p5").version).toBe("4.5")
+			})
+
+			it("should detect GLM-4.6 version", () => {
+				expect(detectGlmModel("glm-4.6").version).toBe("4.6")
+				expect(detectGlmModel("GLM-4.6V").version).toBe("4.6")
+				expect(detectGlmModel("glm-4-6-flash").version).toBe("4.6")
+			})
+
+			it("should detect GLM-4.7 version", () => {
+				expect(detectGlmModel("glm-4.7").version).toBe("4.7")
+				expect(detectGlmModel("GLM-4.7-Flash").version).toBe("4.7")
+				expect(detectGlmModel("glm-4-7-flashx").version).toBe("4.7")
+			})
+		})
+
+		describe("variant detection", () => {
+			describe("base variant", () => {
+				it("should detect base variant", () => {
+					expect(detectGlmModel("glm-4.5").variant).toBe("base")
+					expect(detectGlmModel("glm-4.6").variant).toBe("base")
+					expect(detectGlmModel("glm-4.7").variant).toBe("base")
+				})
+			})
+
+			describe("air variants", () => {
+				it("should detect air variant", () => {
+					expect(detectGlmModel("glm-4.5-air").variant).toBe("air")
+					expect(detectGlmModel("GLM-4.5-Air").variant).toBe("air")
+				})
+
+				it("should detect airx variant", () => {
+					expect(detectGlmModel("glm-4.5-airx").variant).toBe("airx")
+					expect(detectGlmModel("GLM-4.5-AirX").variant).toBe("airx")
+				})
+			})
+
+			describe("flash variants", () => {
+				it("should detect flash variant", () => {
+					expect(detectGlmModel("glm-4.5-flash").variant).toBe("flash")
+					expect(detectGlmModel("glm-4.7-flash").variant).toBe("flash")
+				})
+
+				it("should detect flashx variant", () => {
+					expect(detectGlmModel("glm-4.7-flashx").variant).toBe("flashx")
+					expect(detectGlmModel("GLM-4.7-FlashX").variant).toBe("flashx")
+				})
+			})
+
+			describe("x variant", () => {
+				it("should detect x variant", () => {
+					expect(detectGlmModel("glm-4.5-x").variant).toBe("x")
+					expect(detectGlmModel("GLM-4.5-X").variant).toBe("x")
+				})
+			})
+
+			describe("vision variants", () => {
+				it("should detect v (vision) variant for 4.5", () => {
+					const result = detectGlmModel("glm-4.5v")
+					expect(result.variant).toBe("v")
+					expect(result.supportsVision).toBe(true)
+				})
+
+				it("should detect v (vision) variant for 4.6", () => {
+					const result = detectGlmModel("glm-4.6v")
+					expect(result.variant).toBe("v")
+					expect(result.supportsVision).toBe(true)
+				})
+
+				it("should detect v-flash variant", () => {
+					const result = detectGlmModel("glm-4.6v-flash")
+					expect(result.variant).toBe("v-flash")
+					expect(result.supportsVision).toBe(true)
+				})
+
+				it("should detect v-flashx variant", () => {
+					const result = detectGlmModel("glm-4.6v-flashx")
+					expect(result.variant).toBe("v-flashx")
+					expect(result.supportsVision).toBe(true)
+				})
+			})
+		})
+
+		describe("thinking support detection", () => {
+			it("should detect thinking support for GLM-4.7", () => {
+				expect(detectGlmModel("glm-4.7").supportsThinking).toBe(true)
+				expect(detectGlmModel("glm-4.7-flash").supportsThinking).toBe(true)
+				expect(detectGlmModel("GLM-4.7-FlashX").supportsThinking).toBe(true)
+			})
+
+			it("should NOT detect thinking support for GLM-4.5 and GLM-4.6", () => {
+				expect(detectGlmModel("glm-4.5").supportsThinking).toBe(false)
+				expect(detectGlmModel("glm-4.6").supportsThinking).toBe(false)
+				expect(detectGlmModel("glm-4.5-air").supportsThinking).toBe(false)
+				expect(detectGlmModel("glm-4.6v").supportsThinking).toBe(false)
+			})
+		})
+
+		describe("configuration flags", () => {
+			it("should enable mergeToolResultText for all GLM models", () => {
+				expect(detectGlmModel("glm-4.5").mergeToolResultText).toBe(true)
+				expect(detectGlmModel("glm-4.6").mergeToolResultText).toBe(true)
+				expect(detectGlmModel("glm-4.7").mergeToolResultText).toBe(true)
+			})
+
+			it("should disable parallel tool calls for all GLM models", () => {
+				expect(detectGlmModel("glm-4.5").disableParallelToolCalls).toBe(true)
+				expect(detectGlmModel("glm-4.6").disableParallelToolCalls).toBe(true)
+				expect(detectGlmModel("glm-4.7").disableParallelToolCalls).toBe(true)
+			})
+		})
+
+		describe("display name generation", () => {
+			it("should generate correct display names for base variants", () => {
+				expect(detectGlmModel("glm-4.5").displayName).toBe("GLM-4.5")
+				expect(detectGlmModel("glm-4.6").displayName).toBe("GLM-4.6")
+				expect(detectGlmModel("glm-4.7").displayName).toBe("GLM-4.7")
+			})
+
+			it("should generate correct display names for variants", () => {
+				expect(detectGlmModel("glm-4.5-air").displayName).toBe("GLM-4.5 AIR")
+				expect(detectGlmModel("glm-4.5-flash").displayName).toBe("GLM-4.5 FLASH")
+				expect(detectGlmModel("glm-4.7-flashx").displayName).toBe("GLM-4.7 FLASHX")
+				expect(detectGlmModel("glm-4.6v").displayName).toBe("GLM-4.6 V")
+				expect(detectGlmModel("glm-4.6v-flash").displayName).toBe("GLM-4.6 V FLASH")
+			})
+
+			it("should handle unknown version", () => {
+				// ChatGLM doesn't have a specific version number
+				const result = detectGlmModel("chatglm-6b")
+				expect(result.displayName).toBe("GLM-4.x")
+			})
+		})
+
+		describe("real-world model ID formats", () => {
+			it("should correctly detect MLX community models", () => {
+				const result = detectGlmModel("mlx-community/GLM-4.5-4bit")
+				expect(result.isGlmModel).toBe(true)
+				expect(result.version).toBe("4.5")
+				expect(result.variant).toBe("base")
+			})
+
+			it("should correctly detect GGUF models", () => {
+				const result = detectGlmModel("GLM-4.5-UD-Q8_K_XL-00001-of-00008.gguf")
+				expect(result.isGlmModel).toBe(true)
+				expect(result.version).toBe("4.5")
+			})
+
+			it("should correctly detect Fireworks models", () => {
+				const result = detectGlmModel("accounts/fireworks/models/glm-4p5")
+				expect(result.isGlmModel).toBe(true)
+				expect(result.version).toBe("4.5")
+			})
+
+			it("should correctly detect Fireworks air models", () => {
+				const result = detectGlmModel("accounts/fireworks/models/glm-4p5-air")
+				expect(result.isGlmModel).toBe(true)
+				expect(result.version).toBe("4.5")
+				expect(result.variant).toBe("air")
+			})
+		})
+	})
+
+	describe("logGlmDetection", () => {
+		let consoleLogSpy: any
+
+		beforeEach(() => {
+			consoleLogSpy = vi.spyOn(console, "log").mockImplementation(() => {})
+		})
+
+		afterEach(() => {
+			consoleLogSpy.mockRestore()
+		})
+
+		it("should log detection results for GLM models", () => {
+			const config = detectGlmModel("glm-4.5")
+			logGlmDetection("LM Studio", "glm-4.5", config)
+
+			expect(consoleLogSpy).toHaveBeenCalledWith('[LM Studio] Using model ID: "glm-4.5"')
+			expect(consoleLogSpy).toHaveBeenCalledWith('[GLM Detection] ✓ GLM model detected: "glm-4.5"')
+			expect(consoleLogSpy).toHaveBeenCalledWith("[GLM Detection]   - Version: 4.5")
+			expect(consoleLogSpy).toHaveBeenCalledWith("[GLM Detection]   - Variant: base")
+		})
+
+		it("should log when model is NOT a GLM model", () => {
+			const config = detectGlmModel("gpt-4")
+			logGlmDetection("OpenAI-compatible", "gpt-4", config)
+
+			expect(consoleLogSpy).toHaveBeenCalledWith('[OpenAI-compatible] Using model ID: "gpt-4"')
+			expect(consoleLogSpy).toHaveBeenCalledWith('[GLM Detection] ✗ Not a GLM model: "gpt-4"')
+		})
+	})
+
+	describe("isGlmModel", () => {
+		it("should return true for GLM models", () => {
+			expect(isGlmModel("glm-4.5")).toBe(true)
+			expect(isGlmModel("GLM-4.7-Flash")).toBe(true)
+			expect(isGlmModel("chatglm-6b")).toBe(true)
+		})
+
+		it("should return false for non-GLM models", () => {
+			expect(isGlmModel("gpt-4")).toBe(false)
+			expect(isGlmModel("claude-3-opus")).toBe(false)
+		})
+	})
+})
diff --git a/src/api/providers/utils/glm-model-detection.ts b/src/api/providers/utils/glm-model-detection.ts
new file mode 100644
index 00000000000..dddd2da281d
--- /dev/null
+++ b/src/api/providers/utils/glm-model-detection.ts
@@ -0,0 +1,199 @@
+/**
+ * GLM Model Detection Utility
+ *
+ * Detects GLM models from Z.ai (Zhipu AI) and returns appropriate configuration
+ * for optimal interaction. This utility supports various model ID formats from
+ * different providers like LM Studio and OpenAI-compatible endpoints.
+ *
+ * GLM Model Family:
+ * - GLM-4.5: Base model with 355B parameters
+ * - GLM-4.5-Air: Lightweight version balancing performance and cost
+ * - GLM-4.5-X: High-performance variant with ultra-fast responses
+ * - GLM-4.5-AirX: Lightweight ultra-fast variant
+ * - GLM-4.5-Flash: Free high-speed model
+ * - GLM-4.5V: Multimodal visual model
+ * - GLM-4.6: Extended 200k context window
+ * - GLM-4.6V: Multimodal vision model
+ * - GLM-4.6V-Flash: Free high-speed vision model
+ * - GLM-4.7: Built-in thinking capabilities
+ * - GLM-4.7-Flash: Free high-speed variant of GLM-4.7
+ * - GLM-4.7-FlashX: Ultra-fast variant
+ */
+
+/**
+ * GLM model version enumeration
+ */
+export type GlmVersion = "4.5" | "4.6" | "4.7" | "unknown"
+
+/**
+ * GLM model variant - specific model within a version
+ */
+export type GlmVariant =
+	| "base"
+	| "air"
+	| "x"
+	| "airx"
+	| "flash"
+	| "flashx"
+	| "v" // vision
+	| "v-flash"
+	| "v-flashx"
+
+/**
+ * Configuration options for GLM models
+ */
+export interface GlmModelConfig {
+	/** Whether this is a GLM model */
+	isGlmModel: boolean
+	/** The detected GLM version (4.5, 4.6, 4.7) */
+	version: GlmVersion
+	/** The detected variant (base, air, flash, v, etc.) */
+	variant: GlmVariant | "unknown"
+	/** Whether this model supports vision/images */
+	supportsVision: boolean
+	/** Whether this model has built-in thinking/reasoning support */
+	supportsThinking: boolean
+	/** Whether to merge tool result text into tool messages */
+	mergeToolResultText: boolean
+	/** Whether to disable parallel tool calls */
+	disableParallelToolCalls: boolean
+	/** The original model ID */
+	originalModelId: string
+	/** A normalized/canonical model name for display */
+	displayName: string
+}
+
+/**
+ * Detects if a model ID represents a GLM model and returns its configuration.
+ *
+ * Supports various model ID formats:
+ * - Standard: "glm-4.5", "glm-4.7-flash"
+ * - With prefix: "mlx-community/GLM-4.5-4bit"
+ * - GGUF files: "GLM-4.5-UD-Q8_K_XL-00001-of-00008.gguf"
+ * - ChatGLM: "chatglm-6b", "chatglm3-6b"
+ *
+ * @param modelId The model identifier string
+ * @returns GLM model configuration
+ */
+export function detectGlmModel(modelId: string): GlmModelConfig {
+	const lowerModelId = modelId.toLowerCase()
+
+	// Check if this is a GLM model using case-insensitive matching
+	// Match patterns: "glm-", "glm4", "chatglm", or "glm" followed by a version number
+	const isGlm = /glm[-_]?4|chatglm|\/glm[-_]|^glm[-_]/i.test(modelId)
+
+	if (!isGlm) {
+		return {
+			isGlmModel: false,
+			version: "unknown",
+			variant: "unknown",
+			supportsVision: false,
+			supportsThinking: false,
+			mergeToolResultText: false,
+			disableParallelToolCalls: false,
+			originalModelId: modelId,
+			displayName: modelId,
+		}
+	}
+
+	// Detect version (4.5, 4.6, 4.7)
+	let version: GlmVersion = "unknown"
+	if (/4\.7|4-7|47/i.test(lowerModelId)) {
+		version = "4.7"
+	} else if (/4\.6|4-6|46/i.test(lowerModelId)) {
+		version = "4.6"
+	} else if (/4\.5|4-5|45|4p5/i.test(lowerModelId)) {
+		version = "4.5"
+	}
+
+	// Detect variant
+	let variant: GlmVariant = "base"
+	let supportsVision = false
+
+	// Check for vision variants first (they may also have flash/etc.)
+	if (/4\.5v|4-5v|45v|4p5v|glm-4\.5v/i.test(lowerModelId)) {
+		variant = "v"
+		supportsVision = true
+	} else if (/4\.6v[-_]?flashx|4-6v[-_]?flashx/i.test(lowerModelId)) {
+		variant = "v-flashx"
+		supportsVision = true
+	} else if (/4\.6v[-_]?flash|4-6v[-_]?flash/i.test(lowerModelId)) {
+		variant = "v-flash"
+		supportsVision = true
+	} else if (/4\.6v|4-6v|46v/i.test(lowerModelId)) {
+		variant = "v"
+		supportsVision = true
+	}
+	// Non-vision variants
+	else if (/flashx/i.test(lowerModelId)) {
+		variant = "flashx"
+	} else if (/flash/i.test(lowerModelId)) {
+		variant = "flash"
+	} else if (/airx/i.test(lowerModelId)) {
+		variant = "airx"
+	} else if (/air/i.test(lowerModelId)) {
+		variant = "air"
+	} else if (/[-_]x\b/i.test(lowerModelId)) {
+		// Match "-x" or "_x" at word boundary (to avoid matching "flashx", "airx")
+		variant = "x"
+	}
+
+	// GLM-4.7 has built-in thinking support
+	const supportsThinking = version === "4.7"
+
+	// Generate display name
+	let displayName = `GLM-${version !== "unknown" ? version : "4.x"}`
+	if (variant !== "base") {
+		const variantName = variant.toUpperCase().replace("-", " ")
+		displayName += ` ${variantName}`
+	}
+
+	return {
+		isGlmModel: true,
+		version,
+		variant,
+		supportsVision,
+		supportsThinking,
+		// All GLM models benefit from mergeToolResultText to prevent reasoning_content loss
+		mergeToolResultText: true,
+		// Disable parallel tool calls for GLM models as they may not support it properly
+		disableParallelToolCalls: true,
+		originalModelId: modelId,
+		displayName,
+	}
+}
+
+/**
+ * Logs GLM model detection results to the console for debugging.
+ *
+ * @param providerName The name of the provider (e.g., "LM Studio", "OpenAI-compatible")
+ * @param modelId The model ID being used
+ * @param config The detected GLM configuration
+ */
+export function logGlmDetection(providerName: string, modelId: string, config: GlmModelConfig): void {
+	console.log(`[${providerName}] Using model ID: "${modelId}"`)
+
+	if (config.isGlmModel) {
+		console.log(`[GLM Detection] ✓ GLM model detected: "${modelId}"`)
+		console.log(`[GLM Detection]   - Version: ${config.version}`)
+		console.log(`[GLM Detection]   - Variant: ${config.variant}`)
+		console.log(`[GLM Detection]   - Display name: ${config.displayName}`)
+		console.log(`[GLM Detection]   - Supports vision: ${config.supportsVision}`)
+		console.log(`[GLM Detection]   - Supports thinking: ${config.supportsThinking}`)
+		console.log(`[GLM Detection]   - mergeToolResultText: ${config.mergeToolResultText}`)
+		console.log(`[GLM Detection]   - disableParallelToolCalls: ${config.disableParallelToolCalls}`)
+	} else {
+		console.log(`[GLM Detection] ✗ Not a GLM model: "${modelId}"`)
+	}
+}
+
+/**
+ * Simple check if a model ID is a GLM model without full configuration.
+ * Use this for quick checks where you only need a boolean.
+ *
+ * @param modelId The model identifier string
+ * @returns true if the model is a GLM model
+ */
+export function isGlmModel(modelId: string): boolean {
+	return /glm[-_]?4|chatglm|\/glm[-_]|^glm[-_]/i.test(modelId)
+}
diff --git a/src/core/assistant-message/NativeToolCallParser.ts b/src/core/assistant-message/NativeToolCallParser.ts
index 72c34f94a07..02d24c6b3b1 100644
--- a/src/core/assistant-message/NativeToolCallParser.ts
+++ b/src/core/assistant-message/NativeToolCallParser.ts
@@ -166,16 +166,35 @@ export class NativeToolCallParser {
 	/**
 	 * Process stream finish reason.
 	 * Emits end events when finish_reason is 'tool_calls'.
+	 *
+	 * IMPORTANT: Only emits tool_call_end for tool calls that have actually started
+	 * (i.e., where tool_call_start was emitted). This prevents finalizeStreamingToolCall
+	 * from receiving IDs that were never registered via startStreamingToolCall, which
+	 * would cause tool results to be silently dropped and trigger infinite retry loops.
 	 */
 	public static processFinishReason(finishReason: string | null | undefined): ToolCallStreamEvent[] {
 		const events: ToolCallStreamEvent[] = []
 
 		if (finishReason === "tool_calls" && this.rawChunkTracker.size > 0) {
 			for (const [, tracked] of this.rawChunkTracker.entries()) {
-				events.push({
-					type: "tool_call_end",
-					id: tracked.id,
-				})
+				// Only emit tool_call_end for tool calls that have actually started.
+				// Tool calls without hasStarted=true never had a tool_call_start emitted
+				// (likely due to missing tool name), so they were never registered in
+				// streamingToolCalls. Emitting tool_call_end for these would cause
+				// finalizeStreamingToolCall to fail, resulting in no tool_result being
+				// sent to the model and triggering infinite retry loops.
+				if (tracked.hasStarted) {
+					events.push({
+						type: "tool_call_end",
+						id: tracked.id,
+					})
+				} else {
+					// Log a warning for tool calls that were tracked but never started.
+					// This helps diagnose issues with models that send malformed tool calls.
+					console.warn(
+						`[NativeToolCallParser] Skipping tool_call_end for unstarted tool call: ${tracked.id} (no name received)`,
+					)
+				}
 			}
 		}
 
diff --git a/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts b/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts
index db0dc00de41..1b4b1a7e179 100644
--- a/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts
+++ b/src/core/assistant-message/__tests__/NativeToolCallParser.spec.ts
@@ -343,4 +343,107 @@ describe("NativeToolCallParser", () => {
 			})
 		})
 	})
+
+	describe("processFinishReason", () => {
+		describe("tool call tracking synchronization", () => {
+			it("should emit tool_call_end only for tool calls that have started", () => {
+				// Simulate a tool call with both ID and name (will start)
+				NativeToolCallParser.processRawChunk({
+					index: 0,
+					id: "call_started_123",
+					name: "read_file",
+				})
+
+				const events = NativeToolCallParser.processFinishReason("tool_calls")
+
+				expect(events).toHaveLength(1)
+				expect(events[0]).toEqual({
+					type: "tool_call_end",
+					id: "call_started_123",
+				})
+			})
+
+			it("should NOT emit tool_call_end for tool calls without a name (never started)", () => {
+				// Simulate a tool call with ID but NO name - this happens when models
+				// send malformed tool calls or split ID/name across chunks incorrectly
+				NativeToolCallParser.processRawChunk({
+					index: 0,
+					id: "call_no_name_456",
+					// No name provided - tool_call_start will not be emitted
+				})
+
+				// Capture console.warn to verify warning is logged
+				const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {})
+
+				const events = NativeToolCallParser.processFinishReason("tool_calls")
+
+				// Should NOT emit tool_call_end since tool was never started
+				expect(events).toHaveLength(0)
+
+				// Should log a warning about the unstarted tool call
+				expect(warnSpy).toHaveBeenCalledWith(
+					expect.stringContaining("Skipping tool_call_end for unstarted tool call"),
+				)
+
+				warnSpy.mockRestore()
+			})
+
+			it("should handle mixed started and unstarted tool calls correctly", () => {
+				// Tool call with ID and name (will start)
+				NativeToolCallParser.processRawChunk({
+					index: 0,
+					id: "call_with_name",
+					name: "read_file",
+				})
+
+				// Tool call with only ID (will not start)
+				NativeToolCallParser.processRawChunk({
+					index: 1,
+					id: "call_without_name",
+					// No name
+				})
+
+				// Another tool call with ID and name (will start)
+				NativeToolCallParser.processRawChunk({
+					index: 2,
+					id: "call_also_with_name",
+					name: "write_to_file",
+				})
+
+				const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {})
+
+				const events = NativeToolCallParser.processFinishReason("tool_calls")
+
+				// Should only emit tool_call_end for the two started tool calls
+				expect(events).toHaveLength(2)
+				expect(events.map((e) => e.id)).toContain("call_with_name")
+				expect(events.map((e) => e.id)).toContain("call_also_with_name")
+				expect(events.map((e) => e.id)).not.toContain("call_without_name")
+
+				// Should log warning for the unstarted tool call
+				expect(warnSpy).toHaveBeenCalledTimes(1)
+				expect(warnSpy).toHaveBeenCalledWith(expect.stringContaining("call_without_name"))
+
+				warnSpy.mockRestore()
+			})
+
+			it("should return empty array when finish_reason is not tool_calls", () => {
+				NativeToolCallParser.processRawChunk({
+					index: 0,
+					id: "call_123",
+					name: "read_file",
+				})
+
+				const events = NativeToolCallParser.processFinishReason("stop")
+
+				expect(events).toHaveLength(0)
+			})
+
+			it("should return empty array when no tool calls are tracked", () => {
+				const events = NativeToolCallParser.processFinishReason("tool_calls")
+
+				expect(events).toHaveLength(0)
+			})
+		})
+	})
 })

From 9e96578df455792aa056aeaf2434d14222f267e6 Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Fri, 30 Jan 2026 12:12:44 +0000
Subject: [PATCH 2/3] fix: add GLM model detection to OpenAI Compatible
 provider

This commit adds GLM model detection and related optimizations to the
OpenAI Compatible provider (OpenAiHandler), which handles the "OpenAI
Compatible" option in the UI when users set a custom base URL.

Changes:
- Import GLM detection utilities and Z.ai format converter
- Add glmConfig property to track GLM model configuration
- Detect GLM model on construction when model ID is available
- Re-detect GLM model in createMessage if model ID changes
- For GLM models:
  - Use convertToZAiFormat with mergeToolResultText to prevent
    conversation flow disruption
  - Disable parallel_tool_calls as GLM models may not support it
  - Add thinking parameter for GLM-4.7 models
- Add console logging for detection results and applied optimizations

This addresses the feedback in issue #11071 where the user clarified
they are using the "OpenAI Compatible" provider, not the "OpenAI"
provider. The previous changes in this PR only affected the
base-openai-compatible-provider.ts (used by Z.ai, Groq, etc.) and
lm-studio.ts, but not openai.ts which handles OpenAI Compatible.
---
 src/api/providers/openai.ts | 77 ++++++++++++++++++++++++++++++++++---
 1 file changed, 71 insertions(+), 6 deletions(-)

diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts
index 87589b93960..37ac133ec76 100644
--- a/src/api/providers/openai.ts
+++ b/src/api/providers/openai.ts
@@ -15,6 +15,7 @@ import type { ApiHandlerOptions } from "../../shared/api"
 import { TagMatcher } from "../../utils/tag-matcher"
 
 import { convertToOpenAiMessages } from "../transform/openai-format"
+import { convertToZAiFormat } from "../transform/zai-format"
 import { convertToR1Format } from "../transform/r1-format"
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { getModelParams } from "../transform/model-params"
@@ -24,6 +25,7 @@ import { BaseProvider } from "./base-provider"
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
 import { getApiRequestTimeout } from "./utils/timeout-config"
 import { handleOpenAIError } from "./utils/openai-error-handler"
+import { detectGlmModel, logGlmDetection, type GlmModelConfig } from "./utils/glm-model-detection"
 
 // TODO: Rename this to OpenAICompatibleHandler. Also, I think the
 // `OpenAINativeHandler` can subclass from this, since it's obviously
@@ -31,7 +33,8 @@ import { handleOpenAIError } from "./utils/openai-error-handler"
 export class OpenAiHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	protected client: OpenAI
-	private readonly providerName = "OpenAI"
+	private readonly providerName = "OpenAI Compatible"
+	private glmConfig: GlmModelConfig | null = null
 
 	constructor(options: ApiHandlerOptions) {
 		super()
@@ -77,6 +80,13 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				timeout,
 			})
 		}
+
+		// Detect GLM model on construction if model ID is available
+		const modelId = this.options.openAiModelId || ""
+		if (modelId) {
+			this.glmConfig = detectGlmModel(modelId)
+			logGlmDetection(this.providerName, modelId, this.glmConfig)
+		}
 	}
 
 	override async *createMessage(
@@ -91,6 +101,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 		const isAzureAiInference = this._isAzureAiInference(modelUrl)
 		const deepseekReasoner = modelId.includes("deepseek-reasoner") || enabledR1Format
 
+		// Re-detect GLM model if not already done or if model ID changed
+		if (!this.glmConfig || this.glmConfig.originalModelId !== modelId) {
+			this.glmConfig = detectGlmModel(modelId)
+			logGlmDetection(this.providerName, modelId, this.glmConfig)
+		}
+
 		if (modelId.includes("o1") || modelId.includes("o3") || modelId.includes("o4")) {
 			yield* this.handleO3FamilyMessage(modelId, systemPrompt, messages, metadata)
 			return
@@ -106,6 +122,12 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 			if (deepseekReasoner) {
 				convertedMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
+			} else if (this.glmConfig.isGlmModel) {
+				// GLM models benefit from mergeToolResultText to prevent reasoning_content loss
+				const glmConvertedMessages = convertToZAiFormat(messages, {
+					mergeToolResultText: this.glmConfig.mergeToolResultText,
+				})
+				convertedMessages = [systemMessage, ...glmConvertedMessages]
 			} else {
 				if (modelInfo.supportsPromptCache) {
 					systemMessage = {
@@ -152,6 +174,16 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 
 			const isGrokXAI = this._isGrokXAI(this.options.openAiBaseUrl)
 
+			// Determine parallel_tool_calls setting
+			// Disable for GLM models as they may not support it properly
+			let parallelToolCalls: boolean
+			if (this.glmConfig.isGlmModel && this.glmConfig.disableParallelToolCalls) {
+				parallelToolCalls = false
+				console.log(`[${this.providerName}] parallel_tool_calls disabled for GLM model`)
+			} else {
+				parallelToolCalls = metadata?.parallelToolCalls ?? true
+			}
+
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 				model: modelId,
 				temperature: this.options.modelTemperature ?? (deepseekReasoner ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0),
@@ -161,12 +193,19 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				...(reasoning && reasoning),
 				tools: this.convertToolsForOpenAI(metadata?.tools),
 				tool_choice: metadata?.tool_choice,
-				parallel_tool_calls: metadata?.parallelToolCalls ?? true,
+				parallel_tool_calls: parallelToolCalls,
 			}
 
 			// Add max_tokens if needed
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
 
+			// For GLM-4.7 models with thinking support, add thinking parameter
+			if (this.glmConfig.isGlmModel && this.glmConfig.supportsThinking) {
+				const useReasoning = this.options.enableReasoningEffort !== false // Default to enabled for GLM-4.7
+				;(requestOptions as any).thinking = useReasoning ? { type: "enabled" } : { type: "disabled" }
+				console.log(`[${this.providerName}] GLM-4.7 thinking mode: ${useReasoning ? "enabled" : "disabled"}`)
+			}
+
 			let stream
 			try {
 				stream = await this.client.chat.completions.create(
@@ -221,20 +260,46 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl
 				yield this.processUsageMetrics(lastUsage, modelInfo)
 			}
 		} else {
+			// Determine message conversion based on model type
+			let nonStreamingMessages
+			if (deepseekReasoner) {
+				nonStreamingMessages = convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
+			} else if (this.glmConfig.isGlmModel) {
+				// GLM models benefit from mergeToolResultText to prevent reasoning_content loss
+				const glmConvertedMessages = convertToZAiFormat(messages, {
+					mergeToolResultText: this.glmConfig.mergeToolResultText,
+				})
+				nonStreamingMessages = [systemMessage, ...glmConvertedMessages]
+			} else {
+				nonStreamingMessages = [systemMessage, ...convertToOpenAiMessages(messages)]
+			}
+
+			// Determine parallel_tool_calls setting for non-streaming
+			let nonStreamingParallelToolCalls: boolean
+			if (this.glmConfig.isGlmModel && this.glmConfig.disableParallelToolCalls) {
+				nonStreamingParallelToolCalls = false
+			} else {
+				nonStreamingParallelToolCalls = metadata?.parallelToolCalls ?? true
+			}
+
 			const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = {
 				model: modelId,
-				messages: deepseekReasoner
-					? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages])
-					: [systemMessage, ...convertToOpenAiMessages(messages)],
+				messages: nonStreamingMessages,
 				// Tools are always present (minimum ALWAYS_AVAILABLE_TOOLS)
 				tools: this.convertToolsForOpenAI(metadata?.tools),
 				tool_choice: metadata?.tool_choice,
-				parallel_tool_calls: metadata?.parallelToolCalls ?? true,
+				parallel_tool_calls: nonStreamingParallelToolCalls,
 			}
 
 			// Add max_tokens if needed
 			this.addMaxTokensIfNeeded(requestOptions, modelInfo)
 
+			// For GLM-4.7 models with thinking support, add thinking parameter
+			if (this.glmConfig.isGlmModel && this.glmConfig.supportsThinking) {
+				const useReasoning = this.options.enableReasoningEffort !== false
+				;(requestOptions as any).thinking = useReasoning ? { type: "enabled" } : { type: "disabled" }
+			}
+
 			let response
 			try {
 				response = await this.client.chat.completions.create(

From 23256cd6daa4b03541d0b195919cc7f195af3181 Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Fri, 30 Jan 2026 16:22:33 +0000
Subject: [PATCH 3/3] feat: add thinking mode support for GLM-4.6 and GLM-4.6V
 models

- Add supportsReasoningEffort to GLM-4.6 and GLM-4.6V model definitions
- Update GLM detection logic to recognize GLM-4.6/4.6V thinking support
- Update Z.ai provider to handle thinking mode for all GLM models with reasoning
- Update tests to reflect GLM-4.6/4.6V thinking support

Addresses issue #11071 where Z.ai documentation shows GLM-4.6 and GLM-4.6V
support thinking mode, but this was not reflected in the codebase.
---
 packages/types/src/providers/zai.ts           | 12 ++++++++++++
 .../__tests__/glm-model-detection.spec.ts     | 19 +++++++++++++++----
 .../providers/utils/glm-model-detection.ts    |  8 ++++++--
 src/api/providers/zai.ts                      | 16 ++++++++--------
 4 files changed, 41 insertions(+), 14 deletions(-)

diff --git a/packages/types/src/providers/zai.ts b/packages/types/src/providers/zai.ts
index 41a6a808ca0..0fc175deb96 100644
--- a/packages/types/src/providers/zai.ts
+++ b/packages/types/src/providers/zai.ts
@@ -86,6 +86,9 @@ export const internationalZAiModels = {
 		contextWindow: 131_072,
 		supportsImages: true,
 		supportsPromptCache: true,
+		supportsReasoningEffort: ["disable", "medium"],
+		reasoningEffort: "medium",
+		preserveReasoning: true,
 		inputPrice: 0.3,
 		outputPrice: 0.9,
 		cacheWritesPrice: 0,
@@ -98,6 +101,9 @@ export const internationalZAiModels = {
 		contextWindow: 200_000,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsReasoningEffort: ["disable", "medium"],
+		reasoningEffort: "medium",
+		preserveReasoning: true,
 		inputPrice: 0.6,
 		outputPrice: 2.2,
 		cacheWritesPrice: 0,
@@ -259,6 +265,9 @@ export const mainlandZAiModels = {
 		contextWindow: 204_800,
 		supportsImages: false,
 		supportsPromptCache: true,
+		supportsReasoningEffort: ["disable", "medium"],
+		reasoningEffort: "medium",
+		preserveReasoning: true,
 		inputPrice: 0.29,
 		outputPrice: 1.14,
 		cacheWritesPrice: 0,
@@ -310,6 +319,9 @@ export const mainlandZAiModels = {
 		contextWindow: 131_072,
 		supportsImages: true,
 		supportsPromptCache: true,
+		supportsReasoningEffort: ["disable", "medium"],
+		reasoningEffort: "medium",
+		preserveReasoning: true,
 		inputPrice: 0.15,
 		outputPrice: 0.45,
 		cacheWritesPrice: 0,
diff --git a/src/api/providers/utils/__tests__/glm-model-detection.spec.ts b/src/api/providers/utils/__tests__/glm-model-detection.spec.ts
index 78f134e9fac..dfcbe3a8abc 100644
--- a/src/api/providers/utils/__tests__/glm-model-detection.spec.ts
+++ b/src/api/providers/utils/__tests__/glm-model-detection.spec.ts
@@ -130,17 +130,28 @@ describe("GLM Model Detection", () => {
 		})
 
 		describe("thinking support detection", () => {
-			it("should detect thinking support for GLM-4.7", () => {
+			it("should detect thinking support for GLM-4.7 variants", () => {
 				expect(detectGlmModel("glm-4.7").supportsThinking).toBe(true)
 				expect(detectGlmModel("glm-4.7-flash").supportsThinking).toBe(true)
 				expect(detectGlmModel("GLM-4.7-FlashX").supportsThinking).toBe(true)
 			})
 
-			it("should NOT detect thinking support for GLM-4.5 and GLM-4.6", () => {
+			it("should detect thinking support for GLM-4.6 base model", () => {
+				expect(detectGlmModel("glm-4.6").supportsThinking).toBe(true)
+			})
+
+			it("should detect thinking support for GLM-4.6V vision variants", () => {
+				expect(detectGlmModel("glm-4.6v").supportsThinking).toBe(true)
+				expect(detectGlmModel("GLM-4.6V").supportsThinking).toBe(true)
+				expect(detectGlmModel("glm-4.6v-flash").supportsThinking).toBe(true)
+				expect(detectGlmModel("glm-4.6v-flashx").supportsThinking).toBe(true)
+			})
+
+			it("should NOT detect thinking support for GLM-4.5 variants", () => {
 				expect(detectGlmModel("glm-4.5").supportsThinking).toBe(false)
-				expect(detectGlmModel("glm-4.6").supportsThinking).toBe(false)
 				expect(detectGlmModel("glm-4.5-air").supportsThinking).toBe(false)
-				expect(detectGlmModel("glm-4.6v").supportsThinking).toBe(false)
+				expect(detectGlmModel("glm-4.5-flash").supportsThinking).toBe(false)
+				expect(detectGlmModel("glm-4.5v").supportsThinking).toBe(false)
 			})
 		})
 
diff --git a/src/api/providers/utils/glm-model-detection.ts b/src/api/providers/utils/glm-model-detection.ts
index dddd2da281d..ac7c3a538f1 100644
--- a/src/api/providers/utils/glm-model-detection.ts
+++ b/src/api/providers/utils/glm-model-detection.ts
@@ -138,8 +138,12 @@ export function detectGlmModel(modelId: string): GlmModelConfig {
 		variant = "x"
 	}
 
-	// GLM-4.7 has built-in thinking support
-	const supportsThinking = version === "4.7"
+	// GLM-4.6, GLM-4.6V, and GLM-4.7 have built-in thinking support
+	// For GLM-4.6, only the base model and vision variants support thinking
+	const supportsThinking =
+		version === "4.7" ||
+		(version === "4.6" &&
+			(variant === "base" || variant === "v" || variant === "v-flash" || variant === "v-flashx"))
 
 	// Generate display name
 	let displayName = `GLM-${version !== "unknown" ? version : "4.x"}`
diff --git a/src/api/providers/zai.ts b/src/api/providers/zai.ts
index a2e3740c56f..7e9a362c68a 100644
--- a/src/api/providers/zai.ts
+++ b/src/api/providers/zai.ts
@@ -40,9 +40,9 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 	}
 
 	/**
-	 * Override createStream to handle GLM-4.7's thinking mode.
-	 * GLM-4.7 has thinking enabled by default in the API, so we need to
-	 * explicitly send { type: "disabled" } when the user turns off reasoning.
+	 * Override createStream to handle thinking mode for GLM models.
+	 * GLM-4.6, GLM-4.6V, and GLM-4.7 have thinking enabled by default in the API,
+	 * so we need to explicitly send { type: "disabled" } when the user turns off reasoning.
 	 */
 	protected override createStream(
 		systemPrompt: string,
@@ -52,11 +52,11 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 	) {
 		const { id: modelId, info } = this.getModel()
 
-		// Check if this is a GLM-4.7 model with thinking support
-		const isThinkingModel = modelId === "glm-4.7" && Array.isArray(info.supportsReasoningEffort)
+		// Check if this is a GLM model with thinking support (GLM-4.6, GLM-4.6V, GLM-4.7)
+		const isThinkingModel = Array.isArray(info.supportsReasoningEffort)
 
 		if (isThinkingModel) {
-			// For GLM-4.7, thinking is ON by default in the API.
+			// For GLM thinking models, thinking is ON by default in the API.
 			// We need to explicitly disable it when reasoning is off.
 			const useReasoning = shouldUseReasoningEffort({ model: info, settings: this.options })
 
@@ -69,7 +69,7 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 	}
 
 	/**
-	 * Creates a stream with explicit thinking control for GLM-4.7
+	 * Creates a stream with explicit thinking control for GLM thinking models (4.6, 4.6V, 4.7)
 	 */
 	private createStreamWithThinking(
 		systemPrompt: string,
@@ -99,7 +99,7 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider<string> {
 			messages: [{ role: "system", content: systemPrompt }, ...convertedMessages],
 			stream: true,
 			stream_options: { include_usage: true },
-			// For GLM-4.7: thinking is ON by default, so we explicitly disable when needed
+			// For GLM thinking models: thinking is ON by default, so we explicitly disable when needed
 			thinking: useReasoning ? { type: "enabled" } : { type: "disabled" },
 			tools: this.convertToolsForOpenAI(metadata?.tools),
 			tool_choice: metadata?.tool_choice,