Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 23 additions & 2 deletions src/api/providers/base-openai-compatible-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import { BaseProvider } from "./base-provider"
import { handleOpenAIError } from "./utils/openai-error-handler"
import { calculateApiCostOpenAI } from "../../shared/cost"
import { getApiRequestTimeout } from "./utils/timeout-config"
import { getGlmModelOptions } from "./utils/model-detection"

type BaseOpenAiCompatibleProviderOptions<ModelName extends string> = ApiHandlerOptions & {
providerName: string
Expand Down Expand Up @@ -75,6 +76,12 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
) {
const { id: model, info } = this.getModel()

// Get model-specific options for GLM models (applies Z.ai optimizations)
// This allows third-party GLM models via OpenAI-compatible endpoints to benefit
// from the same optimizations used by Z.ai
console.log(`[${this.providerName}] Using model ID: "${model}"`)
const glmOptions = getGlmModelOptions(model)

// Centralized cap: clamp to 20% of the context window (unless provider-specific exceptions apply)
const max_tokens =
getModelMaxOutputTokens({
Expand All @@ -86,16 +93,30 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>

const temperature = this.options.modelTemperature ?? info.defaultTemperature ?? this.defaultTemperature

// For GLM models, disable parallel_tool_calls by default as they may not support it
// Users can still explicitly enable it via metadata if their model supports it
const parallelToolCalls = glmOptions.disableParallelToolCalls
? (metadata?.parallelToolCalls ?? false)
: (metadata?.parallelToolCalls ?? true)

console.log(`[${this.providerName}] parallel_tool_calls set to: ${parallelToolCalls}`)

// Convert messages with GLM-specific handling when applicable
// mergeToolResultText prevents GLM models from dropping reasoning_content
const convertedMessages = convertToOpenAiMessages(messages, {
mergeToolResultText: glmOptions.mergeToolResultText,
})

const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
model,
max_tokens,
temperature,
messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
messages: [{ role: "system", content: systemPrompt }, ...convertedMessages],
stream: true,
stream_options: { include_usage: true },
tools: this.convertToolsForOpenAI(metadata?.tools),
tool_choice: metadata?.tool_choice,
parallel_tool_calls: metadata?.parallelToolCalls ?? true,
parallel_tool_calls: parallelToolCalls,
}

// Add thinking parameter if reasoning is enabled and model supports it
Expand Down
22 changes: 19 additions & 3 deletions src/api/providers/lm-studio.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ".
import { getModelsFromCache } from "./fetchers/modelCache"
import { getApiRequestTimeout } from "./utils/timeout-config"
import { handleOpenAIError } from "./utils/openai-error-handler"
import { getGlmModelOptions } from "./utils/model-detection"

export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler {
protected options: ApiHandlerOptions
Expand All @@ -42,9 +43,16 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
messages: Anthropic.Messages.MessageParam[],
metadata?: ApiHandlerCreateMessageMetadata,
): ApiStream {
// Get model-specific options for GLM models (applies Z.ai optimizations)
const modelId = this.getModel().id
console.log(`[LM Studio] Using model ID: "${modelId}"`)
const glmOptions = getGlmModelOptions(modelId)

// Convert messages with GLM-specific handling when applicable
// mergeToolResultText prevents GLM models from dropping reasoning_content
const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
{ role: "system", content: systemPrompt },
...convertToOpenAiMessages(messages),
...convertToOpenAiMessages(messages, { mergeToolResultText: glmOptions.mergeToolResultText }),
]

// -------------------------
Expand Down Expand Up @@ -83,14 +91,22 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
let assistantText = ""

try {
// For GLM models, disable parallel_tool_calls by default as they may not support it
// Users can still explicitly enable it via metadata if their model supports it
const parallelToolCalls = glmOptions.disableParallelToolCalls
? (metadata?.parallelToolCalls ?? false)
: (metadata?.parallelToolCalls ?? true)

console.log(`[LM Studio] parallel_tool_calls set to: ${parallelToolCalls}`)

const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming & { draft_model?: string } = {
model: this.getModel().id,
model: modelId,
messages: openAiMessages,
temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE,
stream: true,
tools: this.convertToolsForOpenAI(metadata?.tools),
tool_choice: metadata?.tool_choice,
parallel_tool_calls: metadata?.parallelToolCalls ?? true,
parallel_tool_calls: parallelToolCalls,
}

if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) {
Expand Down
113 changes: 113 additions & 0 deletions src/api/providers/utils/__tests__/model-detection.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import { isGlmModel, getGlmModelOptions, GlmModelOptions } from "../model-detection"

describe("isGlmModel", () => {
describe("GLM model detection", () => {
it("should detect official GLM model names with dash", () => {
expect(isGlmModel("glm-4")).toBe(true)
expect(isGlmModel("glm-4.5")).toBe(true)
expect(isGlmModel("glm-4.7")).toBe(true)
expect(isGlmModel("glm-4-plus")).toBe(true)
})

it("should detect GLM models with uppercase", () => {
expect(isGlmModel("GLM-4")).toBe(true)
expect(isGlmModel("GLM-4.5")).toBe(true)
expect(isGlmModel("GLM-4.7")).toBe(true)
})

it("should detect compact GLM model names without dash", () => {
expect(isGlmModel("glm4")).toBe(true)
expect(isGlmModel("GLM4")).toBe(true)
expect(isGlmModel("glm4-9b")).toBe(true)
})

it("should detect LM Studio GGUF model names", () => {
expect(isGlmModel("GLM4-9B-Chat-GGUF")).toBe(true)
expect(isGlmModel("glm4-9b-chat-gguf")).toBe(true)
})

it("should detect ChatGLM models", () => {
expect(isGlmModel("chatglm")).toBe(true)
expect(isGlmModel("ChatGLM")).toBe(true)
expect(isGlmModel("chatglm-6b")).toBe(true)
expect(isGlmModel("chatglm3-6b")).toBe(true)
})
})

describe("non-GLM model detection", () => {
it("should not detect OpenAI models as GLM", () => {
expect(isGlmModel("gpt-4")).toBe(false)
expect(isGlmModel("gpt-4-turbo")).toBe(false)
expect(isGlmModel("gpt-3.5-turbo")).toBe(false)
expect(isGlmModel("o1-preview")).toBe(false)
})

it("should not detect Anthropic models as GLM", () => {
expect(isGlmModel("claude-3")).toBe(false)
expect(isGlmModel("claude-3-sonnet")).toBe(false)
expect(isGlmModel("claude-3-opus")).toBe(false)
})

it("should not detect DeepSeek models as GLM", () => {
expect(isGlmModel("deepseek-coder")).toBe(false)
expect(isGlmModel("deepseek-reasoner")).toBe(false)
})

it("should not detect Gemini models as GLM", () => {
expect(isGlmModel("gemini-pro")).toBe(false)
expect(isGlmModel("gemini-2-flash")).toBe(false)
})

it("should not detect Qwen models as GLM", () => {
expect(isGlmModel("qwen-7b")).toBe(false)
expect(isGlmModel("qwen2-7b")).toBe(false)
})

it("should not detect Llama models as GLM", () => {
expect(isGlmModel("llama-2-7b")).toBe(false)
expect(isGlmModel("llama-3-8b")).toBe(false)
expect(isGlmModel("codellama")).toBe(false)
})
})

describe("edge cases", () => {
it("should handle empty string", () => {
expect(isGlmModel("")).toBe(false)
})

it("should handle undefined-like values", () => {
expect(isGlmModel(null as unknown as string)).toBe(false)
expect(isGlmModel(undefined as unknown as string)).toBe(false)
})

it("should not match 'glm' in the middle of unrelated model names", () => {
// This tests that we're not accidentally matching "glm" as a substring
// in unrelated contexts
expect(isGlmModel("myglmodel")).toBe(false)
expect(isGlmModel("some-glm-inspired-model")).toBe(false)
})
})
})

describe("getGlmModelOptions", () => {
it("should return GLM-optimized options for GLM models", () => {
const options = getGlmModelOptions("glm-4.5")

expect(options.mergeToolResultText).toBe(true)
expect(options.disableParallelToolCalls).toBe(true)
})

it("should return default options for non-GLM models", () => {
const options = getGlmModelOptions("gpt-4")

expect(options.mergeToolResultText).toBe(false)
expect(options.disableParallelToolCalls).toBe(false)
})

it("should return the correct type", () => {
const options: GlmModelOptions = getGlmModelOptions("glm-4")

expect(options).toHaveProperty("mergeToolResultText")
expect(options).toHaveProperty("disableParallelToolCalls")
})
})
86 changes: 86 additions & 0 deletions src/api/providers/utils/model-detection.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/**
* Utility functions for detecting model types based on model ID patterns.
* These functions help providers apply model-specific handling for third-party
* models running on LM Studio, OpenAI-compatible endpoints, etc.
*/

/**
* Detects if a model ID represents a GLM (General Language Model) from Zhipu AI.
*
* GLM models (like GLM-4, GLM-4.5, GLM-4.7) have specific requirements:
* - They benefit from `mergeToolResultText: true` to avoid dropping reasoning_content
* - They may not support `parallel_tool_calls` parameter
*
* This detection allows LM Studio and OpenAI-compatible providers to apply
* the same optimizations that Z.ai uses for GLM models.
*
* @param modelId - The model identifier (e.g., "glm-4.5", "GLM4-9B-Chat-GGUF")
* @returns true if the model is a GLM model, false otherwise
*
* @example
* ```typescript
* isGlmModel("glm-4.5") // true
* isGlmModel("GLM4-9B-Chat-GGUF") // true
* isGlmModel("glm-4.7") // true
* isGlmModel("gpt-4") // false
* isGlmModel("claude-3") // false
* ```
*/
export function isGlmModel(modelId: string): boolean {
if (!modelId) {
return false
}

// Case-insensitive check for "glm" prefix or pattern
// Matches: glm-4, glm-4.5, glm-4.7, GLM4-9B-Chat, glm4, etc.
const lowerModelId = modelId.toLowerCase()

// Check for common GLM model patterns:
// - "glm-" prefix (official naming: glm-4, glm-4.5, glm-4.7)
// - "glm4" (compact naming without dash)
// - "chatglm" (older ChatGLM models)
return lowerModelId.startsWith("glm-") || lowerModelId.startsWith("glm4") || lowerModelId.includes("chatglm")
}

/**
* Configuration options for GLM model-specific handling.
* These options are derived from Z.ai's optimizations for GLM models.
*/
export interface GlmModelOptions {
/**
* Whether to merge text content after tool_results into the last tool message.
* This prevents GLM models from dropping reasoning_content when they see
* a user message after tool results.
*/
mergeToolResultText: boolean

/**
* Whether to disable parallel_tool_calls for this model.
* GLM models may not support this parameter and can behave unexpectedly
* when it's enabled.
*/
disableParallelToolCalls: boolean
}

/**
* Returns the recommended configuration options for a GLM model.
* Non-GLM models will receive default options that maintain existing behavior.
*
* @param modelId - The model identifier
* @returns Configuration options for the model
*/
export function getGlmModelOptions(modelId: string): GlmModelOptions {
const isGlm = isGlmModel(modelId)

// Log GLM model detection result for diagnostics
if (isGlm) {
console.log(`[GLM Detection] ✓ GLM model detected: "${modelId}"`)
console.log(`[GLM Detection] - mergeToolResultText: true`)
console.log(`[GLM Detection] - disableParallelToolCalls: true`)
}

return {
mergeToolResultText: isGlm,
disableParallelToolCalls: isGlm,
}
}
Loading