diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts index fc3d769ae2a..be985128953 100644 --- a/src/api/providers/base-openai-compatible-provider.ts +++ b/src/api/providers/base-openai-compatible-provider.ts @@ -14,6 +14,7 @@ import { BaseProvider } from "./base-provider" import { handleOpenAIError } from "./utils/openai-error-handler" import { calculateApiCostOpenAI } from "../../shared/cost" import { getApiRequestTimeout } from "./utils/timeout-config" +import { getGlmModelOptions } from "./utils/model-detection" type BaseOpenAiCompatibleProviderOptions = ApiHandlerOptions & { providerName: string @@ -75,6 +76,12 @@ export abstract class BaseOpenAiCompatibleProvider ) { const { id: model, info } = this.getModel() + // Get model-specific options for GLM models (applies Z.ai optimizations) + // This allows third-party GLM models via OpenAI-compatible endpoints to benefit + // from the same optimizations used by Z.ai + console.log(`[${this.providerName}] Using model ID: "${model}"`) + const glmOptions = getGlmModelOptions(model) + // Centralized cap: clamp to 20% of the context window (unless provider-specific exceptions apply) const max_tokens = getModelMaxOutputTokens({ @@ -86,16 +93,30 @@ export abstract class BaseOpenAiCompatibleProvider const temperature = this.options.modelTemperature ?? info.defaultTemperature ?? this.defaultTemperature + // For GLM models, disable parallel_tool_calls by default as they may not support it + // Users can still explicitly enable it via metadata if their model supports it + const parallelToolCalls = glmOptions.disableParallelToolCalls + ? (metadata?.parallelToolCalls ?? false) + : (metadata?.parallelToolCalls ?? true) + + console.log(`[${this.providerName}] parallel_tool_calls set to: ${parallelToolCalls}`) + + // Convert messages with GLM-specific handling when applicable + // mergeToolResultText prevents GLM models from dropping reasoning_content + const convertedMessages = convertToOpenAiMessages(messages, { + mergeToolResultText: glmOptions.mergeToolResultText, + }) + const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model, max_tokens, temperature, - messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)], + messages: [{ role: "system", content: systemPrompt }, ...convertedMessages], stream: true, stream_options: { include_usage: true }, tools: this.convertToolsForOpenAI(metadata?.tools), tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + parallel_tool_calls: parallelToolCalls, } // Add thinking parameter if reasoning is enabled and model supports it diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts index a771394c535..9ff105f4294 100644 --- a/src/api/providers/lm-studio.ts +++ b/src/api/providers/lm-studio.ts @@ -17,6 +17,7 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". import { getModelsFromCache } from "./fetchers/modelCache" import { getApiRequestTimeout } from "./utils/timeout-config" import { handleOpenAIError } from "./utils/openai-error-handler" +import { getGlmModelOptions } from "./utils/model-detection" export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler { protected options: ApiHandlerOptions @@ -42,9 +43,16 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, ): ApiStream { + // Get model-specific options for GLM models (applies Z.ai optimizations) + const modelId = this.getModel().id + console.log(`[LM Studio] Using model ID: "${modelId}"`) + const glmOptions = getGlmModelOptions(modelId) + + // Convert messages with GLM-specific handling when applicable + // mergeToolResultText prevents GLM models from dropping reasoning_content const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ { role: "system", content: systemPrompt }, - ...convertToOpenAiMessages(messages), + ...convertToOpenAiMessages(messages, { mergeToolResultText: glmOptions.mergeToolResultText }), ] // ------------------------- @@ -83,14 +91,22 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan let assistantText = "" try { + // For GLM models, disable parallel_tool_calls by default as they may not support it + // Users can still explicitly enable it via metadata if their model supports it + const parallelToolCalls = glmOptions.disableParallelToolCalls + ? (metadata?.parallelToolCalls ?? false) + : (metadata?.parallelToolCalls ?? true) + + console.log(`[LM Studio] parallel_tool_calls set to: ${parallelToolCalls}`) + const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming & { draft_model?: string } = { - model: this.getModel().id, + model: modelId, messages: openAiMessages, temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE, stream: true, tools: this.convertToolsForOpenAI(metadata?.tools), tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + parallel_tool_calls: parallelToolCalls, } if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) { diff --git a/src/api/providers/utils/__tests__/model-detection.spec.ts b/src/api/providers/utils/__tests__/model-detection.spec.ts new file mode 100644 index 00000000000..b551b4670b0 --- /dev/null +++ b/src/api/providers/utils/__tests__/model-detection.spec.ts @@ -0,0 +1,113 @@ +import { isGlmModel, getGlmModelOptions, GlmModelOptions } from "../model-detection" + +describe("isGlmModel", () => { + describe("GLM model detection", () => { + it("should detect official GLM model names with dash", () => { + expect(isGlmModel("glm-4")).toBe(true) + expect(isGlmModel("glm-4.5")).toBe(true) + expect(isGlmModel("glm-4.7")).toBe(true) + expect(isGlmModel("glm-4-plus")).toBe(true) + }) + + it("should detect GLM models with uppercase", () => { + expect(isGlmModel("GLM-4")).toBe(true) + expect(isGlmModel("GLM-4.5")).toBe(true) + expect(isGlmModel("GLM-4.7")).toBe(true) + }) + + it("should detect compact GLM model names without dash", () => { + expect(isGlmModel("glm4")).toBe(true) + expect(isGlmModel("GLM4")).toBe(true) + expect(isGlmModel("glm4-9b")).toBe(true) + }) + + it("should detect LM Studio GGUF model names", () => { + expect(isGlmModel("GLM4-9B-Chat-GGUF")).toBe(true) + expect(isGlmModel("glm4-9b-chat-gguf")).toBe(true) + }) + + it("should detect ChatGLM models", () => { + expect(isGlmModel("chatglm")).toBe(true) + expect(isGlmModel("ChatGLM")).toBe(true) + expect(isGlmModel("chatglm-6b")).toBe(true) + expect(isGlmModel("chatglm3-6b")).toBe(true) + }) + }) + + describe("non-GLM model detection", () => { + it("should not detect OpenAI models as GLM", () => { + expect(isGlmModel("gpt-4")).toBe(false) + expect(isGlmModel("gpt-4-turbo")).toBe(false) + expect(isGlmModel("gpt-3.5-turbo")).toBe(false) + expect(isGlmModel("o1-preview")).toBe(false) + }) + + it("should not detect Anthropic models as GLM", () => { + expect(isGlmModel("claude-3")).toBe(false) + expect(isGlmModel("claude-3-sonnet")).toBe(false) + expect(isGlmModel("claude-3-opus")).toBe(false) + }) + + it("should not detect DeepSeek models as GLM", () => { + expect(isGlmModel("deepseek-coder")).toBe(false) + expect(isGlmModel("deepseek-reasoner")).toBe(false) + }) + + it("should not detect Gemini models as GLM", () => { + expect(isGlmModel("gemini-pro")).toBe(false) + expect(isGlmModel("gemini-2-flash")).toBe(false) + }) + + it("should not detect Qwen models as GLM", () => { + expect(isGlmModel("qwen-7b")).toBe(false) + expect(isGlmModel("qwen2-7b")).toBe(false) + }) + + it("should not detect Llama models as GLM", () => { + expect(isGlmModel("llama-2-7b")).toBe(false) + expect(isGlmModel("llama-3-8b")).toBe(false) + expect(isGlmModel("codellama")).toBe(false) + }) + }) + + describe("edge cases", () => { + it("should handle empty string", () => { + expect(isGlmModel("")).toBe(false) + }) + + it("should handle undefined-like values", () => { + expect(isGlmModel(null as unknown as string)).toBe(false) + expect(isGlmModel(undefined as unknown as string)).toBe(false) + }) + + it("should not match 'glm' in the middle of unrelated model names", () => { + // This tests that we're not accidentally matching "glm" as a substring + // in unrelated contexts + expect(isGlmModel("myglmodel")).toBe(false) + expect(isGlmModel("some-glm-inspired-model")).toBe(false) + }) + }) +}) + +describe("getGlmModelOptions", () => { + it("should return GLM-optimized options for GLM models", () => { + const options = getGlmModelOptions("glm-4.5") + + expect(options.mergeToolResultText).toBe(true) + expect(options.disableParallelToolCalls).toBe(true) + }) + + it("should return default options for non-GLM models", () => { + const options = getGlmModelOptions("gpt-4") + + expect(options.mergeToolResultText).toBe(false) + expect(options.disableParallelToolCalls).toBe(false) + }) + + it("should return the correct type", () => { + const options: GlmModelOptions = getGlmModelOptions("glm-4") + + expect(options).toHaveProperty("mergeToolResultText") + expect(options).toHaveProperty("disableParallelToolCalls") + }) +}) diff --git a/src/api/providers/utils/model-detection.ts b/src/api/providers/utils/model-detection.ts new file mode 100644 index 00000000000..3ba660e8961 --- /dev/null +++ b/src/api/providers/utils/model-detection.ts @@ -0,0 +1,86 @@ +/** + * Utility functions for detecting model types based on model ID patterns. + * These functions help providers apply model-specific handling for third-party + * models running on LM Studio, OpenAI-compatible endpoints, etc. + */ + +/** + * Detects if a model ID represents a GLM (General Language Model) from Zhipu AI. + * + * GLM models (like GLM-4, GLM-4.5, GLM-4.7) have specific requirements: + * - They benefit from `mergeToolResultText: true` to avoid dropping reasoning_content + * - They may not support `parallel_tool_calls` parameter + * + * This detection allows LM Studio and OpenAI-compatible providers to apply + * the same optimizations that Z.ai uses for GLM models. + * + * @param modelId - The model identifier (e.g., "glm-4.5", "GLM4-9B-Chat-GGUF") + * @returns true if the model is a GLM model, false otherwise + * + * @example + * ```typescript + * isGlmModel("glm-4.5") // true + * isGlmModel("GLM4-9B-Chat-GGUF") // true + * isGlmModel("glm-4.7") // true + * isGlmModel("gpt-4") // false + * isGlmModel("claude-3") // false + * ``` + */ +export function isGlmModel(modelId: string): boolean { + if (!modelId) { + return false + } + + // Case-insensitive check for "glm" prefix or pattern + // Matches: glm-4, glm-4.5, glm-4.7, GLM4-9B-Chat, glm4, etc. + const lowerModelId = modelId.toLowerCase() + + // Check for common GLM model patterns: + // - "glm-" prefix (official naming: glm-4, glm-4.5, glm-4.7) + // - "glm4" (compact naming without dash) + // - "chatglm" (older ChatGLM models) + return lowerModelId.startsWith("glm-") || lowerModelId.startsWith("glm4") || lowerModelId.includes("chatglm") +} + +/** + * Configuration options for GLM model-specific handling. + * These options are derived from Z.ai's optimizations for GLM models. + */ +export interface GlmModelOptions { + /** + * Whether to merge text content after tool_results into the last tool message. + * This prevents GLM models from dropping reasoning_content when they see + * a user message after tool results. + */ + mergeToolResultText: boolean + + /** + * Whether to disable parallel_tool_calls for this model. + * GLM models may not support this parameter and can behave unexpectedly + * when it's enabled. + */ + disableParallelToolCalls: boolean +} + +/** + * Returns the recommended configuration options for a GLM model. + * Non-GLM models will receive default options that maintain existing behavior. + * + * @param modelId - The model identifier + * @returns Configuration options for the model + */ +export function getGlmModelOptions(modelId: string): GlmModelOptions { + const isGlm = isGlmModel(modelId) + + // Log GLM model detection result for diagnostics + if (isGlm) { + console.log(`[GLM Detection] ✓ GLM model detected: "${modelId}"`) + console.log(`[GLM Detection] - mergeToolResultText: true`) + console.log(`[GLM Detection] - disableParallelToolCalls: true`) + } + + return { + mergeToolResultText: isGlm, + disableParallelToolCalls: isGlm, + } +}