Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 55 additions & 2 deletions src/api/providers/base-openai-compatible-provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,15 @@ import { type ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/ap
import { TagMatcher } from "../../utils/tag-matcher"
import { ApiStream, ApiStreamUsageChunk } from "../transform/stream"
import { convertToOpenAiMessages } from "../transform/openai-format"
import { convertToZAiFormat } from "../transform/zai-format"

import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
import { DEFAULT_HEADERS } from "./constants"
import { BaseProvider } from "./base-provider"
import { handleOpenAIError } from "./utils/openai-error-handler"
import { calculateApiCostOpenAI } from "../../shared/cost"
import { getApiRequestTimeout } from "./utils/timeout-config"
import { detectGlmModel, logGlmDetection, type GlmModelConfig } from "./utils/glm-model-detection"

type BaseOpenAiCompatibleProviderOptions<ModelName extends string> = ApiHandlerOptions & {
providerName: string
Expand All @@ -36,6 +38,7 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
protected readonly options: ApiHandlerOptions

protected client: OpenAI
protected glmConfig: GlmModelConfig | null = null

constructor({
providerName,
Expand Down Expand Up @@ -65,6 +68,13 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
defaultHeaders: DEFAULT_HEADERS,
timeout: getApiRequestTimeout(),
})

// Detect GLM model on construction if model ID is available
const modelId = this.options.apiModelId || ""
if (modelId) {
this.glmConfig = detectGlmModel(modelId)
logGlmDetection(this.providerName, modelId, this.glmConfig)
}
}

protected createStream(
Expand All @@ -75,6 +85,12 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
) {
const { id: model, info } = this.getModel()

// Re-detect GLM model if not already done or if model ID changed
if (!this.glmConfig || this.glmConfig.originalModelId !== model) {
this.glmConfig = detectGlmModel(model)
logGlmDetection(this.providerName, model, this.glmConfig)
}

// Centralized cap: clamp to 20% of the context window (unless provider-specific exceptions apply)
const max_tokens =
getModelMaxOutputTokens({
Expand All @@ -86,23 +102,48 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>

const temperature = this.options.modelTemperature ?? info.defaultTemperature ?? this.defaultTemperature

// Convert messages based on whether this is a GLM model
// GLM models benefit from mergeToolResultText to prevent reasoning_content loss
const convertedMessages = this.glmConfig.isGlmModel
? convertToZAiFormat(messages, { mergeToolResultText: this.glmConfig.mergeToolResultText })
: convertToOpenAiMessages(messages)

// Determine parallel_tool_calls setting
// Disable for GLM models as they may not support it properly
let parallelToolCalls: boolean
if (this.glmConfig.isGlmModel && this.glmConfig.disableParallelToolCalls) {
parallelToolCalls = false
console.log(`[${this.providerName}] parallel_tool_calls disabled for GLM model`)
} else {
parallelToolCalls = metadata?.parallelToolCalls ?? true
}

const params: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = {
model,
max_tokens,
temperature,
messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)],
messages: [{ role: "system", content: systemPrompt }, ...convertedMessages],
stream: true,
stream_options: { include_usage: true },
tools: this.convertToolsForOpenAI(metadata?.tools),
tool_choice: metadata?.tool_choice,
parallel_tool_calls: metadata?.parallelToolCalls ?? true,
parallel_tool_calls: parallelToolCalls,
}

// Add thinking parameter if reasoning is enabled and model supports it
if (this.options.enableReasoningEffort && info.supportsReasoningBinary) {
;(params as any).thinking = { type: "enabled" }
}

// For GLM-4.7 models with thinking support, add thinking parameter
if (this.glmConfig.isGlmModel && this.glmConfig.supportsThinking) {
const useReasoning = this.options.enableReasoningEffort !== false // Default to enabled for GLM-4.7
;(params as any).thinking = useReasoning ? { type: "enabled" } : { type: "disabled" }
console.log(
`[${this.providerName}] GLM thinking mode: ${useReasoning ? "enabled" : "disabled"} for ${this.glmConfig.displayName}`,
)
}

try {
return this.client.chat.completions.create(params, requestOptions)
} catch (error) {
Expand Down Expand Up @@ -222,6 +263,12 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
async completePrompt(prompt: string): Promise<string> {
const { id: modelId, info: modelInfo } = this.getModel()

// Re-detect GLM model if not already done or if model ID changed
if (!this.glmConfig || this.glmConfig.originalModelId !== modelId) {
this.glmConfig = detectGlmModel(modelId)
logGlmDetection(this.providerName, modelId, this.glmConfig)
}

const params: OpenAI.Chat.Completions.ChatCompletionCreateParams = {
model: modelId,
messages: [{ role: "user", content: prompt }],
Expand All @@ -232,6 +279,12 @@ export abstract class BaseOpenAiCompatibleProvider<ModelName extends string>
;(params as any).thinking = { type: "enabled" }
}

// For GLM-4.7 models with thinking support, add thinking parameter
if (this.glmConfig.isGlmModel && this.glmConfig.supportsThinking) {
const useReasoning = this.options.enableReasoningEffort !== false
;(params as any).thinking = useReasoning ? { type: "enabled" } : { type: "disabled" }
}

try {
const response = await this.client.chat.completions.create(params)

Expand Down
62 changes: 58 additions & 4 deletions src/api/providers/lm-studio.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,21 @@ import { NativeToolCallParser } from "../../core/assistant-message/NativeToolCal
import { TagMatcher } from "../../utils/tag-matcher"

import { convertToOpenAiMessages } from "../transform/openai-format"
import { convertToZAiFormat } from "../transform/zai-format"
import { ApiStream } from "../transform/stream"

import { BaseProvider } from "./base-provider"
import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index"
import { getModelsFromCache } from "./fetchers/modelCache"
import { getApiRequestTimeout } from "./utils/timeout-config"
import { handleOpenAIError } from "./utils/openai-error-handler"
import { detectGlmModel, logGlmDetection, type GlmModelConfig } from "./utils/glm-model-detection"

export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler {
protected options: ApiHandlerOptions
private client: OpenAI
private readonly providerName = "LM Studio"
private glmConfig: GlmModelConfig | null = null

constructor(options: ApiHandlerOptions) {
super()
Expand All @@ -35,16 +38,37 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
apiKey: apiKey,
timeout: getApiRequestTimeout(),
})

// Detect GLM model on construction if model ID is available
const modelId = this.options.lmStudioModelId || ""
if (modelId) {
this.glmConfig = detectGlmModel(modelId)
logGlmDetection(this.providerName, modelId, this.glmConfig)
}
}

override async *createMessage(
systemPrompt: string,
messages: Anthropic.Messages.MessageParam[],
metadata?: ApiHandlerCreateMessageMetadata,
): ApiStream {
const modelId = this.getModel().id

// Re-detect GLM model if not already done or if model ID changed
if (!this.glmConfig || this.glmConfig.originalModelId !== modelId) {
this.glmConfig = detectGlmModel(modelId)
logGlmDetection(this.providerName, modelId, this.glmConfig)
}

// Convert messages based on whether this is a GLM model
// GLM models benefit from mergeToolResultText to prevent reasoning_content loss
const convertedMessages = this.glmConfig.isGlmModel
? convertToZAiFormat(messages, { mergeToolResultText: this.glmConfig.mergeToolResultText })
: convertToOpenAiMessages(messages)

const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [
{ role: "system", content: systemPrompt },
...convertToOpenAiMessages(messages),
...convertedMessages,
]

// -------------------------
Expand Down Expand Up @@ -83,14 +107,24 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
let assistantText = ""

try {
// Determine parallel_tool_calls setting
// Disable for GLM models as they may not support it properly
let parallelToolCalls: boolean
if (this.glmConfig?.isGlmModel && this.glmConfig.disableParallelToolCalls) {
parallelToolCalls = false
console.log(`[${this.providerName}] parallel_tool_calls disabled for GLM model`)
} else {
parallelToolCalls = metadata?.parallelToolCalls ?? true
}

const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming & { draft_model?: string } = {
model: this.getModel().id,
model: modelId,
messages: openAiMessages,
temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE,
stream: true,
tools: this.convertToolsForOpenAI(metadata?.tools),
tool_choice: metadata?.tool_choice,
parallel_tool_calls: metadata?.parallelToolCalls ?? true,
parallel_tool_calls: parallelToolCalls,
}
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing thinking parameter for GLM-4.7 models. Unlike BaseOpenAiCompatibleProvider, this handler does not add the thinking parameter to requests when this.glmConfig.supportsThinking is true. While the code correctly handles reasoning_content in responses (lines 161-167), without sending the thinking parameter, GLM-4.7's thinking mode won't be activated. Consider adding the same logic used in BaseOpenAiCompatibleProvider (lines 138-145) here.

Fix it with Roo Code or mention @roomote and request a fix.


if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) {
Expand Down Expand Up @@ -124,6 +158,14 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
}
}

// Handle reasoning_content for GLM models with thinking support
if (delta && this.glmConfig?.supportsThinking) {
const deltaAny = delta as any
if (deltaAny.reasoning_content) {
yield { type: "reasoning", text: deltaAny.reasoning_content }
}
}

// Handle tool calls in stream - emit partial chunks for NativeToolCallParser
if (delta?.tool_calls) {
for (const toolCall of delta.tool_calls) {
Expand Down Expand Up @@ -186,10 +228,22 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan
}

async completePrompt(prompt: string): Promise<string> {
const modelId = this.getModel().id

// Re-detect GLM model if not already done or if model ID changed
if (!this.glmConfig || this.glmConfig.originalModelId !== modelId) {
this.glmConfig = detectGlmModel(modelId)
logGlmDetection(this.providerName, modelId, this.glmConfig)
}

try {
// Determine parallel_tool_calls setting for GLM models
const parallelToolCalls =
this.glmConfig?.isGlmModel && this.glmConfig.disableParallelToolCalls ? false : true
Comment on lines +240 to +242
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Dead code: parallelToolCalls is computed here but never used. The params object below does not include parallel_tool_calls. Either add it to params or remove this computation.

Suggested change
// Determine parallel_tool_calls setting for GLM models
const parallelToolCalls =
this.glmConfig?.isGlmModel && this.glmConfig.disableParallelToolCalls ? false : true

Fix it with Roo Code or mention @roomote and request a fix.


// Create params object with optional draft model
const params: any = {
model: this.getModel().id,
model: modelId,
messages: [{ role: "user", content: prompt }],
temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE,
stream: false,
Expand Down
Loading
Loading