From d0100159057e3564bbf47a7c0b40f306c9a350ac Mon Sep 17 00:00:00 2001
From: Roo Code <roomote@roocode.com>
Date: Fri, 30 Jan 2026 00:41:05 +0000
Subject: [PATCH] feat: add strategic GLM model family detection for LM Studio
 and OpenAI-compatible providers

This PR addresses Issue #11071 by implementing a comprehensive GLM model detection system:

1. Created glm-model-detection.ts utility that:
   - Detects GLM family models (GLM-4.5, 4.6, 4.7 and variants)
   - Supports various model ID formats (standard, MLX, GGUF, ChatGLM)
   - Identifies version (4.5, 4.6, 4.7) and variant (base, air, flash, v, etc.)
   - Returns appropriate configuration for each model

2. Updated LmStudioHandler to:
   - Detect GLM models and log detection results to console
   - Use convertToZAiFormat with mergeToolResultText for GLM models
   - Disable parallel_tool_calls for GLM models
   - Handle reasoning_content for GLM-4.7 models

3. Updated BaseOpenAiCompatibleProvider similarly

4. Added 33 comprehensive tests for the detection utility

The detection uses flexible regex patterns to match model IDs like:
- mlx-community/GLM-4.5-4bit
- GLM-4.5-UD-Q8_K_XL-00001-of-00008.gguf
- glm-4.5, glm-4.7-flash, etc.
---
 .../base-openai-compatible-provider.ts        |  57 +++-
 src/api/providers/lm-studio.ts                |  62 ++++-
 .../__tests__/glm-model-detection.spec.ts     | 254 ++++++++++++++++++
 .../providers/utils/glm-model-detection.ts    | 199 ++++++++++++++
 4 files changed, 566 insertions(+), 6 deletions(-)
 create mode 100644 src/api/providers/utils/__tests__/glm-model-detection.spec.ts
 create mode 100644 src/api/providers/utils/glm-model-detection.ts
diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts
index fc3d769ae2a..31cfb49a320 100644
--- a/src/api/providers/base-openai-compatible-provider.ts
+++ b/src/api/providers/base-openai-compatible-provider.ts
@@ -7,6 +7,7 @@ import { type ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/ap
 import { TagMatcher } from "../../utils/tag-matcher"
 import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
 import { convertToOpenAiMessages } from "../transform/openai-format"
+import { convertToZAiFormat } from "../transform/zai-format"
 
 import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
 import { DEFAULT_HEADERS } from "./constants"
@@ -14,6 +15,7 @@ import { BaseProvider } from "./base-provider"
 import { handleOpenAIError } from "./utils/openai-error-handler"
 import { calculateApiCostOpenAI } from "../../shared/cost"
 import { getApiRequestTimeout } from "./utils/timeout-config"
+import { detectGlmModel, logGlmDetection, type GlmModelConfig } from "./utils/glm-model-detection"
 
 type BaseOpenAiCompatibleProviderOptions<ModelName extends string> = ApiHandlerOptions & {
 	providerName: string
@@ -36,6 +38,7 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 	protected readonly options: ApiHandlerOptions
 
 	protected client: OpenAI
+	protected glmConfig: GlmModelConfig | null = null
 
 	constructor({
 		providerName,
@@ -65,6 +68,13 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 			defaultHeaders: DEFAULT_HEADERS,
 			timeout: getApiRequestTimeout(),
 		})
+
+		// Detect GLM model on construction if model ID is available
+		const modelId = this.options.apiModelId || ""
+		if (modelId) {
+			this.glmConfig = detectGlmModel(modelId)
+			logGlmDetection(this.providerName, modelId, this.glmConfig)
+		}
 	}
 
 	protected createStream(
@@ -75,6 +85,12 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 	) {
 		const { id: model, info } = this.getModel()
 
+		// Re-detect GLM model if not already done or if model ID changed
+		if (!this.glmConfig || this.glmConfig.originalModelId !== model) {
+			this.glmConfig = detectGlmModel(model)
+			logGlmDetection(this.providerName, model, this.glmConfig)
+		}
+
 		// Centralized cap: clamp to 20% of the context window (unless provider-specific exceptions apply)
 		const max_tokens =
 			getModelMaxOutputTokens({
@@ -86,16 +102,32 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 
 		const temperature = this.options.modelTemperature ?? info.defaultTemperature ?? this.defaultTemperature
 
+		// Convert messages based on whether this is a GLM model
+		// GLM models benefit from mergeToolResultText to prevent reasoning_content loss
+		const convertedMessages = this.glmConfig.isGlmModel
+			? convertToZAiFormat(messages, { mergeToolResultText: this.glmConfig.mergeToolResultText })
+			: convertToOpenAiMessages(messages)
+
+		// Determine parallel_tool_calls setting
+		// Disable for GLM models as they may not support it properly
+		let parallelToolCalls: boolean
+		if (this.glmConfig.isGlmModel && this.glmConfig.disableParallelToolCalls) {
+			parallelToolCalls = false
+			console.log(`[${this.providerName}] parallel_tool_calls disabled for GLM model`)
+		} else {
+			parallelToolCalls = metadata?.parallelToolCalls ?? true
+		}
+
 		const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
 			model,
 			max_tokens,
 			temperature,
-			messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
+			messages: [{ role: "system", content: systemPrompt }, ...convertedMessages],
 			stream: true,
 			stream_options: { include_usage: true },
 			tools: this.convertToolsForOpenAI(metadata?.tools),
 			tool_choice: metadata?.tool_choice,
-			parallel_tool_calls: metadata?.parallelToolCalls ?? true,
+			parallel_tool_calls: parallelToolCalls,
 		}
 
 		// Add thinking parameter if reasoning is enabled and model supports it
@@ -103,6 +135,15 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 			;(params as any).thinking = { type: "enabled" }
 		}
 
+		// For GLM-4.7 models with thinking support, add thinking parameter
+		if (this.glmConfig.isGlmModel && this.glmConfig.supportsThinking) {
+			const useReasoning = this.options.enableReasoningEffort !== false // Default to enabled for GLM-4.7
+			;(params as any).thinking = useReasoning ? { type: "enabled" } : { type: "disabled" }
+			console.log(
+				`[${this.providerName}] GLM thinking mode: ${useReasoning ? "enabled" : "disabled"} for ${this.glmConfig.displayName}`,
+			)
+		}
+
 		try {
 			return this.client.chat.completions.create(params, requestOptions)
 		} catch (error) {
@@ -222,6 +263,12 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 	async completePrompt(prompt: string): Promise<string> {
 		const { id: modelId, info: modelInfo } = this.getModel()
 
+		// Re-detect GLM model if not already done or if model ID changed
+		if (!this.glmConfig || this.glmConfig.originalModelId !== modelId) {
+			this.glmConfig = detectGlmModel(modelId)
+			logGlmDetection(this.providerName, modelId, this.glmConfig)
+		}
+
 		const params: OpenAI.Chat.Completions.ChatCompletionCreateParams = {
 			model: modelId,
 			messages: [{ role: "user", content: prompt }],
@@ -232,6 +279,12 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
 			;(params as any).thinking = { type: "enabled" }
 		}
 
+		// For GLM-4.7 models with thinking support, add thinking parameter
+		if (this.glmConfig.isGlmModel && this.glmConfig.supportsThinking) {
+			const useReasoning = this.options.enableReasoningEffort !== false
+			;(params as any).thinking = useReasoning ? { type: "enabled" } : { type: "disabled" }
+		}
+
 		try {
 			const response = await this.client.chat.completions.create(params)
 
diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts
index a771394c535..223969e25bd 100644
--- a/src/api/providers/lm-studio.ts
+++ b/src/api/providers/lm-studio.ts
@@ -10,6 +10,7 @@ import { NativeToolCallParser } from "../../core/assistant-message/NativeToolCal
 import { TagMatcher } from "../../utils/tag-matcher"
 
 import { convertToOpenAiMessages } from "../transform/openai-format"
+import { convertToZAiFormat } from "../transform/zai-format"
 import { ApiStream } from "../transform/stream"
 
 import { BaseProvider } from "./base-provider"
@@ -17,11 +18,13 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
 import { getModelsFromCache } from "./fetchers/modelCache"
 import { getApiRequestTimeout } from "./utils/timeout-config"
 import { handleOpenAIError } from "./utils/openai-error-handler"
+import { detectGlmModel, logGlmDetection, type GlmModelConfig } from "./utils/glm-model-detection"
 
 export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler {
 	protected options: ApiHandlerOptions
 	private client: OpenAI
 	private readonly providerName = "LM Studio"
+	private glmConfig: GlmModelConfig | null = null
 
 	constructor(options: ApiHandlerOptions) {
 		super()
@@ -35,6 +38,13 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 			apiKey: apiKey,
 			timeout: getApiRequestTimeout(),
 		})
+
+		// Detect GLM model on construction if model ID is available
+		const modelId = this.options.lmStudioModelId || ""
+		if (modelId) {
+			this.glmConfig = detectGlmModel(modelId)
+			logGlmDetection(this.providerName, modelId, this.glmConfig)
+		}
 	}
 
 	override async *createMessage(
@@ -42,9 +52,23 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 		messages: Anthropic.Messages.MessageParam[],
 		metadata?: ApiHandlerCreateMessageMetadata,
 	): ApiStream {
+		const modelId = this.getModel().id
+
+		// Re-detect GLM model if not already done or if model ID changed
+		if (!this.glmConfig || this.glmConfig.originalModelId !== modelId) {
+			this.glmConfig = detectGlmModel(modelId)
+			logGlmDetection(this.providerName, modelId, this.glmConfig)
+		}
+
+		// Convert messages based on whether this is a GLM model
+		// GLM models benefit from mergeToolResultText to prevent reasoning_content loss
+		const convertedMessages = this.glmConfig.isGlmModel
+			? convertToZAiFormat(messages, { mergeToolResultText: this.glmConfig.mergeToolResultText })
+			: convertToOpenAiMessages(messages)
+
 		const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
 			{ role: "system", content: systemPrompt },
-			...convertToOpenAiMessages(messages),
+			...convertedMessages,
 		]
 
 		// -------------------------
@@ -83,14 +107,24 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 		let assistantText = ""
 
 		try {
+			// Determine parallel_tool_calls setting
+			// Disable for GLM models as they may not support it properly
+			let parallelToolCalls: boolean
+			if (this.glmConfig?.isGlmModel && this.glmConfig.disableParallelToolCalls) {
+				parallelToolCalls = false
+				console.log(`[${this.providerName}] parallel_tool_calls disabled for GLM model`)
+			} else {
+				parallelToolCalls = metadata?.parallelToolCalls ?? true
+			}
+
 			const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming & { draft_model?: string } = {
-				model: this.getModel().id,
+				model: modelId,
 				messages: openAiMessages,
 				temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE,
 				stream: true,
 				tools: this.convertToolsForOpenAI(metadata?.tools),
 				tool_choice: metadata?.tool_choice,
-				parallel_tool_calls: metadata?.parallelToolCalls ?? true,
+				parallel_tool_calls: parallelToolCalls,
 			}
 
 			if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) {
@@ -124,6 +158,14 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 					}
 				}
 
+				// Handle reasoning_content for GLM models with thinking support
+				if (delta && this.glmConfig?.supportsThinking) {
+					const deltaAny = delta as any
+					if (deltaAny.reasoning_content) {
+						yield { type: "reasoning", text: deltaAny.reasoning_content }
+					}
+				}
+
 				// Handle tool calls in stream - emit partial chunks for NativeToolCallParser
 				if (delta?.tool_calls) {
 					for (const toolCall of delta.tool_calls) {
@@ -186,10 +228,22 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
 	}
 
 	async completePrompt(prompt: string): Promise<string> {
+		const modelId = this.getModel().id
+
+		// Re-detect GLM model if not already done or if model ID changed
+		if (!this.glmConfig || this.glmConfig.originalModelId !== modelId) {
+			this.glmConfig = detectGlmModel(modelId)
+			logGlmDetection(this.providerName, modelId, this.glmConfig)
+		}
+
 		try {
+			// Determine parallel_tool_calls setting for GLM models
+			const parallelToolCalls =
+				this.glmConfig?.isGlmModel && this.glmConfig.disableParallelToolCalls ? false : true
+
 			// Create params object with optional draft model
 			const params: any = {
-				model: this.getModel().id,
+				model: modelId,
 				messages: [{ role: "user", content: prompt }],
 				temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE,
 				stream: false,
diff --git a/src/api/providers/utils/__tests__/glm-model-detection.spec.ts b/src/api/providers/utils/__tests__/glm-model-detection.spec.ts
new file mode 100644
index 00000000000..21b03e952df
--- /dev/null
+++ b/src/api/providers/utils/__tests__/glm-model-detection.spec.ts
@@ -0,0 +1,254 @@
+// cd src && npx vitest run api/providers/utils/__tests__/glm-model-detection.spec.ts
+
+import { detectGlmModel, isGlmModel, logGlmDetection, type GlmModelConfig } from "../glm-model-detection"
+
+describe("glm-model-detection", () => {
+	describe("isGlmModel", () => {
+		it("should detect standard GLM model IDs", () => {
+			expect(isGlmModel("glm-4.5")).toBe(true)
+			expect(isGlmModel("glm-4.6")).toBe(true)
+			expect(isGlmModel("glm-4.7")).toBe(true)
+			expect(isGlmModel("glm-4.7-flash")).toBe(true)
+		})
+
+		it("should detect GLM models with prefix paths", () => {
+			expect(isGlmModel("mlx-community/GLM-4.5-4bit")).toBe(true)
+			expect(isGlmModel("THUDM/glm-4-9b-chat")).toBe(true)
+			expect(isGlmModel("some-user/GLM-4.5-Air")).toBe(true)
+		})
+
+		it("should detect GGUF file format GLM models", () => {
+			expect(isGlmModel("GLM-4.5-UD-Q8_K_XL-00001-of-00008.gguf")).toBe(true)
+			expect(isGlmModel("glm-4.7-flash-Q4_K_M.gguf")).toBe(true)
+		})
+
+		it("should detect ChatGLM models", () => {
+			expect(isGlmModel("chatglm-6b")).toBe(true)
+			expect(isGlmModel("chatglm3-6b")).toBe(true)
+		})
+
+		it("should be case-insensitive", () => {
+			expect(isGlmModel("GLM-4.5")).toBe(true)
+			expect(isGlmModel("Glm-4.6")).toBe(true)
+			expect(isGlmModel("CHATGLM-6B")).toBe(true)
+		})
+
+		it("should NOT detect non-GLM models", () => {
+			expect(isGlmModel("gpt-4")).toBe(false)
+			expect(isGlmModel("claude-3-sonnet")).toBe(false)
+			expect(isGlmModel("llama-3-70b")).toBe(false)
+			expect(isGlmModel("mistral-7b")).toBe(false)
+			expect(isGlmModel("qwen2-7b")).toBe(false)
+		})
+	})
+
+	describe("detectGlmModel", () => {
+		describe("non-GLM models", () => {
+			it("should return isGlmModel: false for non-GLM models", () => {
+				const result = detectGlmModel("gpt-4")
+				expect(result.isGlmModel).toBe(false)
+				expect(result.version).toBe("unknown")
+				expect(result.variant).toBe("unknown")
+				expect(result.mergeToolResultText).toBe(false)
+				expect(result.disableParallelToolCalls).toBe(false)
+			})
+		})
+
+		describe("version detection", () => {
+			it("should detect GLM-4.5 version", () => {
+				expect(detectGlmModel("glm-4.5").version).toBe("4.5")
+				expect(detectGlmModel("GLM-4.5-Air").version).toBe("4.5")
+				expect(detectGlmModel("glm-4-5-flash").version).toBe("4.5")
+				expect(detectGlmModel("accounts/fireworks/models/glm-4p5").version).toBe("4.5")
+			})
+
+			it("should detect GLM-4.6 version", () => {
+				expect(detectGlmModel("glm-4.6").version).toBe("4.6")
+				expect(detectGlmModel("GLM-4.6V").version).toBe("4.6")
+				expect(detectGlmModel("glm-4-6-flash").version).toBe("4.6")
+			})
+
+			it("should detect GLM-4.7 version", () => {
+				expect(detectGlmModel("glm-4.7").version).toBe("4.7")
+				expect(detectGlmModel("GLM-4.7-Flash").version).toBe("4.7")
+				expect(detectGlmModel("glm-4-7-flashx").version).toBe("4.7")
+			})
+		})
+
+		describe("variant detection", () => {
+			describe("base variant", () => {
+				it("should detect base variant", () => {
+					expect(detectGlmModel("glm-4.5").variant).toBe("base")
+					expect(detectGlmModel("glm-4.6").variant).toBe("base")
+					expect(detectGlmModel("glm-4.7").variant).toBe("base")
+				})
+			})
+
+			describe("air variants", () => {
+				it("should detect air variant", () => {
+					expect(detectGlmModel("glm-4.5-air").variant).toBe("air")
+					expect(detectGlmModel("GLM-4.5-Air").variant).toBe("air")
+				})
+
+				it("should detect airx variant", () => {
+					expect(detectGlmModel("glm-4.5-airx").variant).toBe("airx")
+					expect(detectGlmModel("GLM-4.5-AirX").variant).toBe("airx")
+				})
+			})
+
+			describe("flash variants", () => {
+				it("should detect flash variant", () => {
+					expect(detectGlmModel("glm-4.5-flash").variant).toBe("flash")
+					expect(detectGlmModel("glm-4.7-flash").variant).toBe("flash")
+				})
+
+				it("should detect flashx variant", () => {
+					expect(detectGlmModel("glm-4.7-flashx").variant).toBe("flashx")
+					expect(detectGlmModel("GLM-4.7-FlashX").variant).toBe("flashx")
+				})
+			})
+
+			describe("x variant", () => {
+				it("should detect x variant", () => {
+					expect(detectGlmModel("glm-4.5-x").variant).toBe("x")
+					expect(detectGlmModel("GLM-4.5-X").variant).toBe("x")
+				})
+			})
+
+			describe("vision variants", () => {
+				it("should detect v (vision) variant for 4.5", () => {
+					const result = detectGlmModel("glm-4.5v")
+					expect(result.variant).toBe("v")
+					expect(result.supportsVision).toBe(true)
+				})
+
+				it("should detect v (vision) variant for 4.6", () => {
+					const result = detectGlmModel("glm-4.6v")
+					expect(result.variant).toBe("v")
+					expect(result.supportsVision).toBe(true)
+				})
+
+				it("should detect v-flash variant", () => {
+					const result = detectGlmModel("glm-4.6v-flash")
+					expect(result.variant).toBe("v-flash")
+					expect(result.supportsVision).toBe(true)
+				})
+
+				it("should detect v-flashx variant", () => {
+					const result = detectGlmModel("glm-4.6v-flashx")
+					expect(result.variant).toBe("v-flashx")
+					expect(result.supportsVision).toBe(true)
+				})
+			})
+		})
+
+		describe("thinking support detection", () => {
+			it("should detect thinking support for GLM-4.7", () => {
+				expect(detectGlmModel("glm-4.7").supportsThinking).toBe(true)
+				expect(detectGlmModel("glm-4.7-flash").supportsThinking).toBe(true)
+				expect(detectGlmModel("GLM-4.7-FlashX").supportsThinking).toBe(true)
+			})
+
+			it("should NOT detect thinking support for GLM-4.5 and GLM-4.6", () => {
+				expect(detectGlmModel("glm-4.5").supportsThinking).toBe(false)
+				expect(detectGlmModel("glm-4.6").supportsThinking).toBe(false)
+				expect(detectGlmModel("glm-4.5-air").supportsThinking).toBe(false)
+				expect(detectGlmModel("glm-4.6v").supportsThinking).toBe(false)
+			})
+		})
+
+		describe("configuration flags", () => {
+			it("should enable mergeToolResultText for all GLM models", () => {
+				expect(detectGlmModel("glm-4.5").mergeToolResultText).toBe(true)
+				expect(detectGlmModel("glm-4.6").mergeToolResultText).toBe(true)
+				expect(detectGlmModel("glm-4.7").mergeToolResultText).toBe(true)
+			})
+
+			it("should disable parallel tool calls for all GLM models", () => {
+				expect(detectGlmModel("glm-4.5").disableParallelToolCalls).toBe(true)
+				expect(detectGlmModel("glm-4.6").disableParallelToolCalls).toBe(true)
+				expect(detectGlmModel("glm-4.7").disableParallelToolCalls).toBe(true)
+			})
+		})
+
+		describe("display name generation", () => {
+			it("should generate correct display names for base variants", () => {
+				expect(detectGlmModel("glm-4.5").displayName).toBe("GLM-4.5")
+				expect(detectGlmModel("glm-4.6").displayName).toBe("GLM-4.6")
+				expect(detectGlmModel("glm-4.7").displayName).toBe("GLM-4.7")
+			})
+
+			it("should generate correct display names for variants", () => {
+				expect(detectGlmModel("glm-4.5-air").displayName).toBe("GLM-4.5 AIR")
+				expect(detectGlmModel("glm-4.5-flash").displayName).toBe("GLM-4.5 FLASH")
+				expect(detectGlmModel("glm-4.7-flashx").displayName).toBe("GLM-4.7 FLASHX")
+				expect(detectGlmModel("glm-4.6v").displayName).toBe("GLM-4.6 V")
+				expect(detectGlmModel("glm-4.6v-flash").displayName).toBe("GLM-4.6 V FLASH")
+			})
+
+			it("should handle unknown version", () => {
+				// ChatGLM doesn't have a specific version number
+				const result = detectGlmModel("chatglm-6b")
+				expect(result.displayName).toBe("GLM-4.x")
+			})
+		})
+
+		describe("real-world model ID formats", () => {
+			it("should correctly detect MLX community models", () => {
+				const result = detectGlmModel("mlx-community/GLM-4.5-4bit")
+				expect(result.isGlmModel).toBe(true)
+				expect(result.version).toBe("4.5")
+				expect(result.variant).toBe("base")
+			})
+
+			it("should correctly detect GGUF models", () => {
+				const result = detectGlmModel("GLM-4.5-UD-Q8_K_XL-00001-of-00008.gguf")
+				expect(result.isGlmModel).toBe(true)
+				expect(result.version).toBe("4.5")
+			})
+
+			it("should correctly detect Fireworks models", () => {
+				const result = detectGlmModel("accounts/fireworks/models/glm-4p5")
+				expect(result.isGlmModel).toBe(true)
+				expect(result.version).toBe("4.5")
+			})
+
+			it("should correctly detect Fireworks air models", () => {
+				const result = detectGlmModel("accounts/fireworks/models/glm-4p5-air")
+				expect(result.isGlmModel).toBe(true)
+				expect(result.version).toBe("4.5")
+				expect(result.variant).toBe("air")
+			})
+		})
+	})
+
+	describe("logGlmDetection", () => {
+		let consoleLogSpy: any
+
+		beforeEach(() => {
+			consoleLogSpy = vi.spyOn(console, "log").mockImplementation(() => {})
+		})
+
+		afterEach(() => {
+			consoleLogSpy.mockRestore()
+		})
+
+		it("should log detection results for GLM models", () => {
+			const config = detectGlmModel("glm-4.5")
+			logGlmDetection("LM Studio", "glm-4.5", config)
+
+			expect(consoleLogSpy).toHaveBeenCalledWith('[LM Studio] Using model ID: "glm-4.5"')
+			expect(consoleLogSpy).toHaveBeenCalledWith('[GLM Detection] ✓ GLM model detected: "glm-4.5"')
+			expect(consoleLogSpy).toHaveBeenCalledWith("[GLM Detection]   - Version: 4.5")
+			expect(consoleLogSpy).toHaveBeenCalledWith("[GLM Detection]   - Variant: base")
+		})
+
+		it("should log when model is NOT a GLM model", () => {
+			const config = detectGlmModel("gpt-4")
+			logGlmDetection("OpenAI-compatible", "gpt-4", config)
+
+			expect(consoleLogSpy).toHaveBeenCalledWith('[OpenAI-compatible] Using model ID: "gpt-4"')
+			expect(consoleLogSpy).toHaveBeenCalledWith('[GLM Detection] ✗ Not a GLM model: "gpt-4"')
+		})
+	})
+})
diff --git a/src/api/providers/utils/glm-model-detection.ts b/src/api/providers/utils/glm-model-detection.ts
new file mode 100644
index 00000000000..dddd2da281d
--- /dev/null
+++ b/src/api/providers/utils/glm-model-detection.ts
@@ -0,0 +1,199 @@
+/**
+ * GLM Model Detection Utility
+ *
+ * Detects GLM models from Z.ai (Zhipu AI) and returns appropriate configuration
+ * for optimal interaction. This utility supports various model ID formats from
+ * different providers like LM Studio and OpenAI-compatible endpoints.
+ *
+ * GLM Model Family:
+ * - GLM-4.5: Base model with 355B parameters
+ * - GLM-4.5-Air: Lightweight version balancing performance and cost
+ * - GLM-4.5-X: High-performance variant with ultra-fast responses
+ * - GLM-4.5-AirX: Lightweight ultra-fast variant
+ * - GLM-4.5-Flash: Free high-speed model
+ * - GLM-4.5V: Multimodal visual model
+ * - GLM-4.6: Extended 200k context window
+ * - GLM-4.6V: Multimodal vision model
+ * - GLM-4.6V-Flash: Free high-speed vision model
+ * - GLM-4.7: Built-in thinking capabilities
+ * - GLM-4.7-Flash: Free high-speed variant of GLM-4.7
+ * - GLM-4.7-FlashX: Ultra-fast variant
+ */
+
+/**
+ * GLM model version enumeration
+ */
+export type GlmVersion = "4.5" | "4.6" | "4.7" | "unknown"
+
+/**
+ * GLM model variant - specific model within a version
+ */
+export type GlmVariant =
+	| "base"
+	| "air"
+	| "x"
+	| "airx"
+	| "flash"
+	| "flashx"
+	| "v" // vision
+	| "v-flash"
+	| "v-flashx"
+
+/**
+ * Configuration options for GLM models
+ */
+export interface GlmModelConfig {
+	/** Whether this is a GLM model */
+	isGlmModel: boolean
+	/** The detected GLM version (4.5, 4.6, 4.7) */
+	version: GlmVersion
+	/** The detected variant (base, air, flash, v, etc.) */
+	variant: GlmVariant | "unknown"
+	/** Whether this model supports vision/images */
+	supportsVision: boolean
+	/** Whether this model has built-in thinking/reasoning support */
+	supportsThinking: boolean
+	/** Whether to merge tool result text into tool messages */
+	mergeToolResultText: boolean
+	/** Whether to disable parallel tool calls */
+	disableParallelToolCalls: boolean
+	/** The original model ID */
+	originalModelId: string
+	/** A normalized/canonical model name for display */
+	displayName: string
+}
+
+/**
+ * Detects if a model ID represents a GLM model and returns its configuration.
+ *
+ * Supports various model ID formats:
+ * - Standard: "glm-4.5", "glm-4.7-flash"
+ * - With prefix: "mlx-community/GLM-4.5-4bit"
+ * - GGUF files: "GLM-4.5-UD-Q8_K_XL-00001-of-00008.gguf"
+ * - ChatGLM: "chatglm-6b", "chatglm3-6b"
+ *
+ * @param modelId The model identifier string
+ * @returns GLM model configuration
+ */
+export function detectGlmModel(modelId: string): GlmModelConfig {
+	const lowerModelId = modelId.toLowerCase()
+
+	// Check if this is a GLM model using case-insensitive matching
+	// Match patterns: "glm-", "glm4", "chatglm", or "glm" followed by a version number
+	const isGlm = /glm[-_]?4|chatglm|\/glm[-_]|^glm[-_]/i.test(modelId)
+
+	if (!isGlm) {
+		return {
+			isGlmModel: false,
+			version: "unknown",
+			variant: "unknown",
+			supportsVision: false,
+			supportsThinking: false,
+			mergeToolResultText: false,
+			disableParallelToolCalls: false,
+			originalModelId: modelId,
+			displayName: modelId,
+		}
+	}
+
+	// Detect version (4.5, 4.6, 4.7)
+	let version: GlmVersion = "unknown"
+	if (/4\.7|4-7|47/i.test(lowerModelId)) {
+		version = "4.7"
+	} else if (/4\.6|4-6|46/i.test(lowerModelId)) {
+		version = "4.6"
+	} else if (/4\.5|4-5|45|4p5/i.test(lowerModelId)) {
+		version = "4.5"
+	}
+
+	// Detect variant
+	let variant: GlmVariant = "base"
+	let supportsVision = false
+
+	// Check for vision variants first (they may also have flash/etc.)
+	if (/4\.5v|4-5v|45v|4p5v|glm-4\.5v/i.test(lowerModelId)) {
+		variant = "v"
+		supportsVision = true
+	} else if (/4\.6v[-_]?flashx|4-6v[-_]?flashx/i.test(lowerModelId)) {
+		variant = "v-flashx"
+		supportsVision = true
+	} else if (/4\.6v[-_]?flash|4-6v[-_]?flash/i.test(lowerModelId)) {
+		variant = "v-flash"
+		supportsVision = true
+	} else if (/4\.6v|4-6v|46v/i.test(lowerModelId)) {
+		variant = "v"
+		supportsVision = true
+	}
+	// Non-vision variants
+	else if (/flashx/i.test(lowerModelId)) {
+		variant = "flashx"
+	} else if (/flash/i.test(lowerModelId)) {
+		variant = "flash"
+	} else if (/airx/i.test(lowerModelId)) {
+		variant = "airx"
+	} else if (/air/i.test(lowerModelId)) {
+		variant = "air"
+	} else if (/[-_]x\b/i.test(lowerModelId)) {
+		// Match "-x" or "_x" at word boundary (to avoid matching "flashx", "airx")
+		variant = "x"
+	}
+
+	// GLM-4.7 has built-in thinking support
+	const supportsThinking = version === "4.7"
+
+	// Generate display name
+	let displayName = `GLM-${version !== "unknown" ? version : "4.x"}`
+	if (variant !== "base") {
+		const variantName = variant.toUpperCase().replace("-", " ")
+		displayName += ` ${variantName}`
+	}
+
+	return {
+		isGlmModel: true,
+		version,
+		variant,
+		supportsVision,
+		supportsThinking,
+		// All GLM models benefit from mergeToolResultText to prevent reasoning_content loss
+		mergeToolResultText: true,
+		// Disable parallel tool calls for GLM models as they may not support it properly
+		disableParallelToolCalls: true,
+		originalModelId: modelId,
+		displayName,
+	}
+}
+
+/**
+ * Logs GLM model detection results to the console for debugging.
+ *
+ * @param providerName The name of the provider (e.g., "LM Studio", "OpenAI-compatible")
+ * @param modelId The model ID being used
+ * @param config The detected GLM configuration
+ */
+export function logGlmDetection(providerName: string, modelId: string, config: GlmModelConfig): void {
+	console.log(`[${providerName}] Using model ID: "${modelId}"`)
+
+	if (config.isGlmModel) {
+		console.log(`[GLM Detection] ✓ GLM model detected: "${modelId}"`)
+		console.log(`[GLM Detection]   - Version: ${config.version}`)
+		console.log(`[GLM Detection]   - Variant: ${config.variant}`)
+		console.log(`[GLM Detection]   - Display name: ${config.displayName}`)
+		console.log(`[GLM Detection]   - Supports vision: ${config.supportsVision}`)
+		console.log(`[GLM Detection]   - Supports thinking: ${config.supportsThinking}`)
+		console.log(`[GLM Detection]   - mergeToolResultText: ${config.mergeToolResultText}`)
+		console.log(`[GLM Detection]   - disableParallelToolCalls: ${config.disableParallelToolCalls}`)
+	} else {
+		console.log(`[GLM Detection] ✗ Not a GLM model: "${modelId}"`)
+	}
+}
+
+/**
+ * Simple check if a model ID is a GLM model without full configuration.
+ * Use this for quick checks where you only need a boolean.
+ *
+ * @param modelId The model identifier string
+ * @returns true if the model is a GLM model
+ */
+export function isGlmModel(modelId: string): boolean {
+	return /glm[-_]?4|chatglm|\/glm[-_]|^glm[-_]/i.test(modelId)
+}