From d0100159057e3564bbf47a7c0b40f306c9a350ac Mon Sep 17 00:00:00 2001 From: Roo Code Date: Fri, 30 Jan 2026 00:41:05 +0000 Subject: [PATCH] feat: add strategic GLM model family detection for LM Studio and OpenAI-compatible providers This PR addresses Issue #11071 by implementing a comprehensive GLM model detection system: 1. Created glm-model-detection.ts utility that: - Detects GLM family models (GLM-4.5, 4.6, 4.7 and variants) - Supports various model ID formats (standard, MLX, GGUF, ChatGLM) - Identifies version (4.5, 4.6, 4.7) and variant (base, air, flash, v, etc.) - Returns appropriate configuration for each model 2. Updated LmStudioHandler to: - Detect GLM models and log detection results to console - Use convertToZAiFormat with mergeToolResultText for GLM models - Disable parallel_tool_calls for GLM models - Handle reasoning_content for GLM-4.7 models 3. Updated BaseOpenAiCompatibleProvider similarly 4. Added 33 comprehensive tests for the detection utility The detection uses flexible regex patterns to match model IDs like: - mlx-community/GLM-4.5-4bit - GLM-4.5-UD-Q8_K_XL-00001-of-00008.gguf - glm-4.5, glm-4.7-flash, etc. --- .../base-openai-compatible-provider.ts | 57 +++- src/api/providers/lm-studio.ts | 62 ++++- .../__tests__/glm-model-detection.spec.ts | 254 ++++++++++++++++++ .../providers/utils/glm-model-detection.ts | 199 ++++++++++++++ 4 files changed, 566 insertions(+), 6 deletions(-) create mode 100644 src/api/providers/utils/__tests__/glm-model-detection.spec.ts create mode 100644 src/api/providers/utils/glm-model-detection.ts diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts index fc3d769ae2a..31cfb49a320 100644 --- a/src/api/providers/base-openai-compatible-provider.ts +++ b/src/api/providers/base-openai-compatible-provider.ts @@ -7,6 +7,7 @@ import { type ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/ap import { TagMatcher } from "../../utils/tag-matcher" import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { convertToOpenAiMessages } from "../transform/openai-format" +import { convertToZAiFormat } from "../transform/zai-format" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { DEFAULT_HEADERS } from "./constants" @@ -14,6 +15,7 @@ import { BaseProvider } from "./base-provider" import { handleOpenAIError } from "./utils/openai-error-handler" import { calculateApiCostOpenAI } from "../../shared/cost" import { getApiRequestTimeout } from "./utils/timeout-config" +import { detectGlmModel, logGlmDetection, type GlmModelConfig } from "./utils/glm-model-detection" type BaseOpenAiCompatibleProviderOptions = ApiHandlerOptions & { providerName: string @@ -36,6 +38,7 @@ export abstract class BaseOpenAiCompatibleProvider protected readonly options: ApiHandlerOptions protected client: OpenAI + protected glmConfig: GlmModelConfig | null = null constructor({ providerName, @@ -65,6 +68,13 @@ export abstract class BaseOpenAiCompatibleProvider defaultHeaders: DEFAULT_HEADERS, timeout: getApiRequestTimeout(), }) + + // Detect GLM model on construction if model ID is available + const modelId = this.options.apiModelId || "" + if (modelId) { + this.glmConfig = detectGlmModel(modelId) + logGlmDetection(this.providerName, modelId, this.glmConfig) + } } protected createStream( @@ -75,6 +85,12 @@ export abstract class BaseOpenAiCompatibleProvider ) { const { id: model, info } = this.getModel() + // Re-detect GLM model if not already done or if model ID changed + if (!this.glmConfig || this.glmConfig.originalModelId !== model) { + this.glmConfig = detectGlmModel(model) + logGlmDetection(this.providerName, model, this.glmConfig) + } + // Centralized cap: clamp to 20% of the context window (unless provider-specific exceptions apply) const max_tokens = getModelMaxOutputTokens({ @@ -86,16 +102,32 @@ export abstract class BaseOpenAiCompatibleProvider const temperature = this.options.modelTemperature ?? info.defaultTemperature ?? this.defaultTemperature + // Convert messages based on whether this is a GLM model + // GLM models benefit from mergeToolResultText to prevent reasoning_content loss + const convertedMessages = this.glmConfig.isGlmModel + ? convertToZAiFormat(messages, { mergeToolResultText: this.glmConfig.mergeToolResultText }) + : convertToOpenAiMessages(messages) + + // Determine parallel_tool_calls setting + // Disable for GLM models as they may not support it properly + let parallelToolCalls: boolean + if (this.glmConfig.isGlmModel && this.glmConfig.disableParallelToolCalls) { + parallelToolCalls = false + console.log(`[${this.providerName}] parallel_tool_calls disabled for GLM model`) + } else { + parallelToolCalls = metadata?.parallelToolCalls ?? true + } + const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model, max_tokens, temperature, - messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)], + messages: [{ role: "system", content: systemPrompt }, ...convertedMessages], stream: true, stream_options: { include_usage: true }, tools: this.convertToolsForOpenAI(metadata?.tools), tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + parallel_tool_calls: parallelToolCalls, } // Add thinking parameter if reasoning is enabled and model supports it @@ -103,6 +135,15 @@ export abstract class BaseOpenAiCompatibleProvider ;(params as any).thinking = { type: "enabled" } } + // For GLM-4.7 models with thinking support, add thinking parameter + if (this.glmConfig.isGlmModel && this.glmConfig.supportsThinking) { + const useReasoning = this.options.enableReasoningEffort !== false // Default to enabled for GLM-4.7 + ;(params as any).thinking = useReasoning ? { type: "enabled" } : { type: "disabled" } + console.log( + `[${this.providerName}] GLM thinking mode: ${useReasoning ? "enabled" : "disabled"} for ${this.glmConfig.displayName}`, + ) + } + try { return this.client.chat.completions.create(params, requestOptions) } catch (error) { @@ -222,6 +263,12 @@ export abstract class BaseOpenAiCompatibleProvider async completePrompt(prompt: string): Promise { const { id: modelId, info: modelInfo } = this.getModel() + // Re-detect GLM model if not already done or if model ID changed + if (!this.glmConfig || this.glmConfig.originalModelId !== modelId) { + this.glmConfig = detectGlmModel(modelId) + logGlmDetection(this.providerName, modelId, this.glmConfig) + } + const params: OpenAI.Chat.Completions.ChatCompletionCreateParams = { model: modelId, messages: [{ role: "user", content: prompt }], @@ -232,6 +279,12 @@ export abstract class BaseOpenAiCompatibleProvider ;(params as any).thinking = { type: "enabled" } } + // For GLM-4.7 models with thinking support, add thinking parameter + if (this.glmConfig.isGlmModel && this.glmConfig.supportsThinking) { + const useReasoning = this.options.enableReasoningEffort !== false + ;(params as any).thinking = useReasoning ? { type: "enabled" } : { type: "disabled" } + } + try { const response = await this.client.chat.completions.create(params) diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts index a771394c535..223969e25bd 100644 --- a/src/api/providers/lm-studio.ts +++ b/src/api/providers/lm-studio.ts @@ -10,6 +10,7 @@ import { NativeToolCallParser } from "../../core/assistant-message/NativeToolCal import { TagMatcher } from "../../utils/tag-matcher" import { convertToOpenAiMessages } from "../transform/openai-format" +import { convertToZAiFormat } from "../transform/zai-format" import { ApiStream } from "../transform/stream" import { BaseProvider } from "./base-provider" @@ -17,11 +18,13 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". import { getModelsFromCache } from "./fetchers/modelCache" import { getApiRequestTimeout } from "./utils/timeout-config" import { handleOpenAIError } from "./utils/openai-error-handler" +import { detectGlmModel, logGlmDetection, type GlmModelConfig } from "./utils/glm-model-detection" export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions private client: OpenAI private readonly providerName = "LM Studio" + private glmConfig: GlmModelConfig | null = null constructor(options: ApiHandlerOptions) { super() @@ -35,6 +38,13 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan apiKey: apiKey, timeout: getApiRequestTimeout(), }) + + // Detect GLM model on construction if model ID is available + const modelId = this.options.lmStudioModelId || "" + if (modelId) { + this.glmConfig = detectGlmModel(modelId) + logGlmDetection(this.providerName, modelId, this.glmConfig) + } } override async *createMessage( @@ -42,9 +52,23 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { + const modelId = this.getModel().id + + // Re-detect GLM model if not already done or if model ID changed + if (!this.glmConfig || this.glmConfig.originalModelId !== modelId) { + this.glmConfig = detectGlmModel(modelId) + logGlmDetection(this.providerName, modelId, this.glmConfig) + } + + // Convert messages based on whether this is a GLM model + // GLM models benefit from mergeToolResultText to prevent reasoning_content loss + const convertedMessages = this.glmConfig.isGlmModel + ? convertToZAiFormat(messages, { mergeToolResultText: this.glmConfig.mergeToolResultText }) + : convertToOpenAiMessages(messages) + const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ { role: "system", content: systemPrompt }, - ...convertToOpenAiMessages(messages), + ...convertedMessages, ] // ------------------------- @@ -83,14 +107,24 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan let assistantText = "" try { + // Determine parallel_tool_calls setting + // Disable for GLM models as they may not support it properly + let parallelToolCalls: boolean + if (this.glmConfig?.isGlmModel && this.glmConfig.disableParallelToolCalls) { + parallelToolCalls = false + console.log(`[${this.providerName}] parallel_tool_calls disabled for GLM model`) + } else { + parallelToolCalls = metadata?.parallelToolCalls ?? true + } + const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming & { draft_model?: string } = { - model: this.getModel().id, + model: modelId, messages: openAiMessages, temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE, stream: true, tools: this.convertToolsForOpenAI(metadata?.tools), tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + parallel_tool_calls: parallelToolCalls, } if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) { @@ -124,6 +158,14 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan } } + // Handle reasoning_content for GLM models with thinking support + if (delta && this.glmConfig?.supportsThinking) { + const deltaAny = delta as any + if (deltaAny.reasoning_content) { + yield { type: "reasoning", text: deltaAny.reasoning_content } + } + } + // Handle tool calls in stream - emit partial chunks for NativeToolCallParser if (delta?.tool_calls) { for (const toolCall of delta.tool_calls) { @@ -186,10 +228,22 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan } async completePrompt(prompt: string): Promise { + const modelId = this.getModel().id + + // Re-detect GLM model if not already done or if model ID changed + if (!this.glmConfig || this.glmConfig.originalModelId !== modelId) { + this.glmConfig = detectGlmModel(modelId) + logGlmDetection(this.providerName, modelId, this.glmConfig) + } + try { + // Determine parallel_tool_calls setting for GLM models + const parallelToolCalls = + this.glmConfig?.isGlmModel && this.glmConfig.disableParallelToolCalls ? false : true + // Create params object with optional draft model const params: any = { - model: this.getModel().id, + model: modelId, messages: [{ role: "user", content: prompt }], temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE, stream: false, diff --git a/src/api/providers/utils/__tests__/glm-model-detection.spec.ts b/src/api/providers/utils/__tests__/glm-model-detection.spec.ts new file mode 100644 index 00000000000..21b03e952df --- /dev/null +++ b/src/api/providers/utils/__tests__/glm-model-detection.spec.ts @@ -0,0 +1,254 @@ +// cd src && npx vitest run api/providers/utils/__tests__/glm-model-detection.spec.ts + +import { detectGlmModel, isGlmModel, logGlmDetection, type GlmModelConfig } from "../glm-model-detection" + +describe("glm-model-detection", () => { + describe("isGlmModel", () => { + it("should detect standard GLM model IDs", () => { + expect(isGlmModel("glm-4.5")).toBe(true) + expect(isGlmModel("glm-4.6")).toBe(true) + expect(isGlmModel("glm-4.7")).toBe(true) + expect(isGlmModel("glm-4.7-flash")).toBe(true) + }) + + it("should detect GLM models with prefix paths", () => { + expect(isGlmModel("mlx-community/GLM-4.5-4bit")).toBe(true) + expect(isGlmModel("THUDM/glm-4-9b-chat")).toBe(true) + expect(isGlmModel("some-user/GLM-4.5-Air")).toBe(true) + }) + + it("should detect GGUF file format GLM models", () => { + expect(isGlmModel("GLM-4.5-UD-Q8_K_XL-00001-of-00008.gguf")).toBe(true) + expect(isGlmModel("glm-4.7-flash-Q4_K_M.gguf")).toBe(true) + }) + + it("should detect ChatGLM models", () => { + expect(isGlmModel("chatglm-6b")).toBe(true) + expect(isGlmModel("chatglm3-6b")).toBe(true) + }) + + it("should be case-insensitive", () => { + expect(isGlmModel("GLM-4.5")).toBe(true) + expect(isGlmModel("Glm-4.6")).toBe(true) + expect(isGlmModel("CHATGLM-6B")).toBe(true) + }) + + it("should NOT detect non-GLM models", () => { + expect(isGlmModel("gpt-4")).toBe(false) + expect(isGlmModel("claude-3-sonnet")).toBe(false) + expect(isGlmModel("llama-3-70b")).toBe(false) + expect(isGlmModel("mistral-7b")).toBe(false) + expect(isGlmModel("qwen2-7b")).toBe(false) + }) + }) + + describe("detectGlmModel", () => { + describe("non-GLM models", () => { + it("should return isGlmModel: false for non-GLM models", () => { + const result = detectGlmModel("gpt-4") + expect(result.isGlmModel).toBe(false) + expect(result.version).toBe("unknown") + expect(result.variant).toBe("unknown") + expect(result.mergeToolResultText).toBe(false) + expect(result.disableParallelToolCalls).toBe(false) + }) + }) + + describe("version detection", () => { + it("should detect GLM-4.5 version", () => { + expect(detectGlmModel("glm-4.5").version).toBe("4.5") + expect(detectGlmModel("GLM-4.5-Air").version).toBe("4.5") + expect(detectGlmModel("glm-4-5-flash").version).toBe("4.5") + expect(detectGlmModel("accounts/fireworks/models/glm-4p5").version).toBe("4.5") + }) + + it("should detect GLM-4.6 version", () => { + expect(detectGlmModel("glm-4.6").version).toBe("4.6") + expect(detectGlmModel("GLM-4.6V").version).toBe("4.6") + expect(detectGlmModel("glm-4-6-flash").version).toBe("4.6") + }) + + it("should detect GLM-4.7 version", () => { + expect(detectGlmModel("glm-4.7").version).toBe("4.7") + expect(detectGlmModel("GLM-4.7-Flash").version).toBe("4.7") + expect(detectGlmModel("glm-4-7-flashx").version).toBe("4.7") + }) + }) + + describe("variant detection", () => { + describe("base variant", () => { + it("should detect base variant", () => { + expect(detectGlmModel("glm-4.5").variant).toBe("base") + expect(detectGlmModel("glm-4.6").variant).toBe("base") + expect(detectGlmModel("glm-4.7").variant).toBe("base") + }) + }) + + describe("air variants", () => { + it("should detect air variant", () => { + expect(detectGlmModel("glm-4.5-air").variant).toBe("air") + expect(detectGlmModel("GLM-4.5-Air").variant).toBe("air") + }) + + it("should detect airx variant", () => { + expect(detectGlmModel("glm-4.5-airx").variant).toBe("airx") + expect(detectGlmModel("GLM-4.5-AirX").variant).toBe("airx") + }) + }) + + describe("flash variants", () => { + it("should detect flash variant", () => { + expect(detectGlmModel("glm-4.5-flash").variant).toBe("flash") + expect(detectGlmModel("glm-4.7-flash").variant).toBe("flash") + }) + + it("should detect flashx variant", () => { + expect(detectGlmModel("glm-4.7-flashx").variant).toBe("flashx") + expect(detectGlmModel("GLM-4.7-FlashX").variant).toBe("flashx") + }) + }) + + describe("x variant", () => { + it("should detect x variant", () => { + expect(detectGlmModel("glm-4.5-x").variant).toBe("x") + expect(detectGlmModel("GLM-4.5-X").variant).toBe("x") + }) + }) + + describe("vision variants", () => { + it("should detect v (vision) variant for 4.5", () => { + const result = detectGlmModel("glm-4.5v") + expect(result.variant).toBe("v") + expect(result.supportsVision).toBe(true) + }) + + it("should detect v (vision) variant for 4.6", () => { + const result = detectGlmModel("glm-4.6v") + expect(result.variant).toBe("v") + expect(result.supportsVision).toBe(true) + }) + + it("should detect v-flash variant", () => { + const result = detectGlmModel("glm-4.6v-flash") + expect(result.variant).toBe("v-flash") + expect(result.supportsVision).toBe(true) + }) + + it("should detect v-flashx variant", () => { + const result = detectGlmModel("glm-4.6v-flashx") + expect(result.variant).toBe("v-flashx") + expect(result.supportsVision).toBe(true) + }) + }) + }) + + describe("thinking support detection", () => { + it("should detect thinking support for GLM-4.7", () => { + expect(detectGlmModel("glm-4.7").supportsThinking).toBe(true) + expect(detectGlmModel("glm-4.7-flash").supportsThinking).toBe(true) + expect(detectGlmModel("GLM-4.7-FlashX").supportsThinking).toBe(true) + }) + + it("should NOT detect thinking support for GLM-4.5 and GLM-4.6", () => { + expect(detectGlmModel("glm-4.5").supportsThinking).toBe(false) + expect(detectGlmModel("glm-4.6").supportsThinking).toBe(false) + expect(detectGlmModel("glm-4.5-air").supportsThinking).toBe(false) + expect(detectGlmModel("glm-4.6v").supportsThinking).toBe(false) + }) + }) + + describe("configuration flags", () => { + it("should enable mergeToolResultText for all GLM models", () => { + expect(detectGlmModel("glm-4.5").mergeToolResultText).toBe(true) + expect(detectGlmModel("glm-4.6").mergeToolResultText).toBe(true) + expect(detectGlmModel("glm-4.7").mergeToolResultText).toBe(true) + }) + + it("should disable parallel tool calls for all GLM models", () => { + expect(detectGlmModel("glm-4.5").disableParallelToolCalls).toBe(true) + expect(detectGlmModel("glm-4.6").disableParallelToolCalls).toBe(true) + expect(detectGlmModel("glm-4.7").disableParallelToolCalls).toBe(true) + }) + }) + + describe("display name generation", () => { + it("should generate correct display names for base variants", () => { + expect(detectGlmModel("glm-4.5").displayName).toBe("GLM-4.5") + expect(detectGlmModel("glm-4.6").displayName).toBe("GLM-4.6") + expect(detectGlmModel("glm-4.7").displayName).toBe("GLM-4.7") + }) + + it("should generate correct display names for variants", () => { + expect(detectGlmModel("glm-4.5-air").displayName).toBe("GLM-4.5 AIR") + expect(detectGlmModel("glm-4.5-flash").displayName).toBe("GLM-4.5 FLASH") + expect(detectGlmModel("glm-4.7-flashx").displayName).toBe("GLM-4.7 FLASHX") + expect(detectGlmModel("glm-4.6v").displayName).toBe("GLM-4.6 V") + expect(detectGlmModel("glm-4.6v-flash").displayName).toBe("GLM-4.6 V FLASH") + }) + + it("should handle unknown version", () => { + // ChatGLM doesn't have a specific version number + const result = detectGlmModel("chatglm-6b") + expect(result.displayName).toBe("GLM-4.x") + }) + }) + + describe("real-world model ID formats", () => { + it("should correctly detect MLX community models", () => { + const result = detectGlmModel("mlx-community/GLM-4.5-4bit") + expect(result.isGlmModel).toBe(true) + expect(result.version).toBe("4.5") + expect(result.variant).toBe("base") + }) + + it("should correctly detect GGUF models", () => { + const result = detectGlmModel("GLM-4.5-UD-Q8_K_XL-00001-of-00008.gguf") + expect(result.isGlmModel).toBe(true) + expect(result.version).toBe("4.5") + }) + + it("should correctly detect Fireworks models", () => { + const result = detectGlmModel("accounts/fireworks/models/glm-4p5") + expect(result.isGlmModel).toBe(true) + expect(result.version).toBe("4.5") + }) + + it("should correctly detect Fireworks air models", () => { + const result = detectGlmModel("accounts/fireworks/models/glm-4p5-air") + expect(result.isGlmModel).toBe(true) + expect(result.version).toBe("4.5") + expect(result.variant).toBe("air") + }) + }) + }) + + describe("logGlmDetection", () => { + let consoleLogSpy: any + + beforeEach(() => { + consoleLogSpy = vi.spyOn(console, "log").mockImplementation(() => {}) + }) + + afterEach(() => { + consoleLogSpy.mockRestore() + }) + + it("should log detection results for GLM models", () => { + const config = detectGlmModel("glm-4.5") + logGlmDetection("LM Studio", "glm-4.5", config) + + expect(consoleLogSpy).toHaveBeenCalledWith('[LM Studio] Using model ID: "glm-4.5"') + expect(consoleLogSpy).toHaveBeenCalledWith('[GLM Detection] ✓ GLM model detected: "glm-4.5"') + expect(consoleLogSpy).toHaveBeenCalledWith("[GLM Detection] - Version: 4.5") + expect(consoleLogSpy).toHaveBeenCalledWith("[GLM Detection] - Variant: base") + }) + + it("should log when model is NOT a GLM model", () => { + const config = detectGlmModel("gpt-4") + logGlmDetection("OpenAI-compatible", "gpt-4", config) + + expect(consoleLogSpy).toHaveBeenCalledWith('[OpenAI-compatible] Using model ID: "gpt-4"') + expect(consoleLogSpy).toHaveBeenCalledWith('[GLM Detection] ✗ Not a GLM model: "gpt-4"') + }) + }) +}) diff --git a/src/api/providers/utils/glm-model-detection.ts b/src/api/providers/utils/glm-model-detection.ts new file mode 100644 index 00000000000..dddd2da281d --- /dev/null +++ b/src/api/providers/utils/glm-model-detection.ts @@ -0,0 +1,199 @@ +/** + * GLM Model Detection Utility + * + * Detects GLM models from Z.ai (Zhipu AI) and returns appropriate configuration + * for optimal interaction. This utility supports various model ID formats from + * different providers like LM Studio and OpenAI-compatible endpoints. + * + * GLM Model Family: + * - GLM-4.5: Base model with 355B parameters + * - GLM-4.5-Air: Lightweight version balancing performance and cost + * - GLM-4.5-X: High-performance variant with ultra-fast responses + * - GLM-4.5-AirX: Lightweight ultra-fast variant + * - GLM-4.5-Flash: Free high-speed model + * - GLM-4.5V: Multimodal visual model + * - GLM-4.6: Extended 200k context window + * - GLM-4.6V: Multimodal vision model + * - GLM-4.6V-Flash: Free high-speed vision model + * - GLM-4.7: Built-in thinking capabilities + * - GLM-4.7-Flash: Free high-speed variant of GLM-4.7 + * - GLM-4.7-FlashX: Ultra-fast variant + */ + +/** + * GLM model version enumeration + */ +export type GlmVersion = "4.5" | "4.6" | "4.7" | "unknown" + +/** + * GLM model variant - specific model within a version + */ +export type GlmVariant = + | "base" + | "air" + | "x" + | "airx" + | "flash" + | "flashx" + | "v" // vision + | "v-flash" + | "v-flashx" + +/** + * Configuration options for GLM models + */ +export interface GlmModelConfig { + /** Whether this is a GLM model */ + isGlmModel: boolean + /** The detected GLM version (4.5, 4.6, 4.7) */ + version: GlmVersion + /** The detected variant (base, air, flash, v, etc.) */ + variant: GlmVariant | "unknown" + /** Whether this model supports vision/images */ + supportsVision: boolean + /** Whether this model has built-in thinking/reasoning support */ + supportsThinking: boolean + /** Whether to merge tool result text into tool messages */ + mergeToolResultText: boolean + /** Whether to disable parallel tool calls */ + disableParallelToolCalls: boolean + /** The original model ID */ + originalModelId: string + /** A normalized/canonical model name for display */ + displayName: string +} + +/** + * Detects if a model ID represents a GLM model and returns its configuration. + * + * Supports various model ID formats: + * - Standard: "glm-4.5", "glm-4.7-flash" + * - With prefix: "mlx-community/GLM-4.5-4bit" + * - GGUF files: "GLM-4.5-UD-Q8_K_XL-00001-of-00008.gguf" + * - ChatGLM: "chatglm-6b", "chatglm3-6b" + * + * @param modelId The model identifier string + * @returns GLM model configuration + */ +export function detectGlmModel(modelId: string): GlmModelConfig { + const lowerModelId = modelId.toLowerCase() + + // Check if this is a GLM model using case-insensitive matching + // Match patterns: "glm-", "glm4", "chatglm", or "glm" followed by a version number + const isGlm = /glm[-_]?4|chatglm|\/glm[-_]|^glm[-_]/i.test(modelId) + + if (!isGlm) { + return { + isGlmModel: false, + version: "unknown", + variant: "unknown", + supportsVision: false, + supportsThinking: false, + mergeToolResultText: false, + disableParallelToolCalls: false, + originalModelId: modelId, + displayName: modelId, + } + } + + // Detect version (4.5, 4.6, 4.7) + let version: GlmVersion = "unknown" + if (/4\.7|4-7|47/i.test(lowerModelId)) { + version = "4.7" + } else if (/4\.6|4-6|46/i.test(lowerModelId)) { + version = "4.6" + } else if (/4\.5|4-5|45|4p5/i.test(lowerModelId)) { + version = "4.5" + } + + // Detect variant + let variant: GlmVariant = "base" + let supportsVision = false + + // Check for vision variants first (they may also have flash/etc.) + if (/4\.5v|4-5v|45v|4p5v|glm-4\.5v/i.test(lowerModelId)) { + variant = "v" + supportsVision = true + } else if (/4\.6v[-_]?flashx|4-6v[-_]?flashx/i.test(lowerModelId)) { + variant = "v-flashx" + supportsVision = true + } else if (/4\.6v[-_]?flash|4-6v[-_]?flash/i.test(lowerModelId)) { + variant = "v-flash" + supportsVision = true + } else if (/4\.6v|4-6v|46v/i.test(lowerModelId)) { + variant = "v" + supportsVision = true + } + // Non-vision variants + else if (/flashx/i.test(lowerModelId)) { + variant = "flashx" + } else if (/flash/i.test(lowerModelId)) { + variant = "flash" + } else if (/airx/i.test(lowerModelId)) { + variant = "airx" + } else if (/air/i.test(lowerModelId)) { + variant = "air" + } else if (/[-_]x\b/i.test(lowerModelId)) { + // Match "-x" or "_x" at word boundary (to avoid matching "flashx", "airx") + variant = "x" + } + + // GLM-4.7 has built-in thinking support + const supportsThinking = version === "4.7" + + // Generate display name + let displayName = `GLM-${version !== "unknown" ? version : "4.x"}` + if (variant !== "base") { + const variantName = variant.toUpperCase().replace("-", " ") + displayName += ` ${variantName}` + } + + return { + isGlmModel: true, + version, + variant, + supportsVision, + supportsThinking, + // All GLM models benefit from mergeToolResultText to prevent reasoning_content loss + mergeToolResultText: true, + // Disable parallel tool calls for GLM models as they may not support it properly + disableParallelToolCalls: true, + originalModelId: modelId, + displayName, + } +} + +/** + * Logs GLM model detection results to the console for debugging. + * + * @param providerName The name of the provider (e.g., "LM Studio", "OpenAI-compatible") + * @param modelId The model ID being used + * @param config The detected GLM configuration + */ +export function logGlmDetection(providerName: string, modelId: string, config: GlmModelConfig): void { + console.log(`[${providerName}] Using model ID: "${modelId}"`) + + if (config.isGlmModel) { + console.log(`[GLM Detection] ✓ GLM model detected: "${modelId}"`) + console.log(`[GLM Detection] - Version: ${config.version}`) + console.log(`[GLM Detection] - Variant: ${config.variant}`) + console.log(`[GLM Detection] - Display name: ${config.displayName}`) + console.log(`[GLM Detection] - Supports vision: ${config.supportsVision}`) + console.log(`[GLM Detection] - Supports thinking: ${config.supportsThinking}`) + console.log(`[GLM Detection] - mergeToolResultText: ${config.mergeToolResultText}`) + console.log(`[GLM Detection] - disableParallelToolCalls: ${config.disableParallelToolCalls}`) + } else { + console.log(`[GLM Detection] ✗ Not a GLM model: "${modelId}"`) + } +} + +/** + * Simple check if a model ID is a GLM model without full configuration. + * Use this for quick checks where you only need a boolean. + * + * @param modelId The model identifier string + * @returns true if the model is a GLM model + */ +export function isGlmModel(modelId: string): boolean { + return /glm[-_]?4|chatglm|\/glm[-_]|^glm[-_]/i.test(modelId) +}