-
Notifications
You must be signed in to change notification settings - Fork 2.8k
feat: add GLM-4.7+ thinking mode support for LM Studio and OpenAI-compatible endpoints (#11071) #11090
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
feat: add GLM-4.7+ thinking mode support for LM Studio and OpenAI-compatible endpoints (#11071) #11090
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -4,7 +4,7 @@ import axios from "axios" | |
|
|
||
| import { type ModelInfo, openAiModelInfoSaneDefaults, LMSTUDIO_DEFAULT_TEMPERATURE } from "@roo-code/types" | ||
|
|
||
| import type { ApiHandlerOptions } from "../../shared/api" | ||
| import { type ApiHandlerOptions, shouldUseReasoningEffort } from "../../shared/api" | ||
|
|
||
| import { NativeToolCallParser } from "../../core/assistant-message/NativeToolCallParser" | ||
| import { TagMatcher } from "../../utils/tag-matcher" | ||
|
|
@@ -17,6 +17,13 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". | |
| import { getModelsFromCache } from "./fetchers/modelCache" | ||
| import { getApiRequestTimeout } from "./utils/timeout-config" | ||
| import { handleOpenAIError } from "./utils/openai-error-handler" | ||
| import { getGlmModelOptions } from "./utils/glm-model-detection" | ||
|
|
||
| // Extended chat completion params to support thinking mode for GLM-4.7+ | ||
| type ChatCompletionParamsWithThinking = OpenAI.Chat.ChatCompletionCreateParamsStreaming & { | ||
| thinking?: { type: "enabled" | "disabled" } | ||
| draft_model?: string | ||
| } | ||
|
|
||
| export class LmStudioHandler extends BaseProvider implements SingleCompletionHandler { | ||
| protected options: ApiHandlerOptions | ||
|
|
@@ -42,9 +49,16 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan | |
| messages: Anthropic.Messages.MessageParam[], | ||
| metadata?: ApiHandlerCreateMessageMetadata, | ||
| ): ApiStream { | ||
| const { id: modelId, info: modelInfo } = this.getModel() | ||
|
|
||
| // Check if this is a GLM model and get recommended options | ||
| const glmOptions = getGlmModelOptions(modelId) | ||
|
|
||
| const openAiMessages: OpenAI.Chat.ChatCompletionMessageParam[] = [ | ||
| { role: "system", content: systemPrompt }, | ||
| ...convertToOpenAiMessages(messages), | ||
| ...convertToOpenAiMessages(messages, { | ||
| mergeToolResultText: glmOptions?.mergeToolResultText ?? false, | ||
| }), | ||
| ] | ||
|
|
||
| // ------------------------- | ||
|
|
@@ -83,20 +97,31 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan | |
| let assistantText = "" | ||
|
|
||
| try { | ||
| const params: OpenAI.Chat.ChatCompletionCreateParamsStreaming & { draft_model?: string } = { | ||
| model: this.getModel().id, | ||
| // For GLM models, disable parallel_tool_calls as they may not support it | ||
| const parallelToolCalls = glmOptions?.disableParallelToolCalls | ||
| ? false | ||
| : (metadata?.parallelToolCalls ?? true) | ||
|
|
||
| const params: ChatCompletionParamsWithThinking = { | ||
| model: modelId, | ||
| messages: openAiMessages, | ||
| temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE, | ||
| stream: true, | ||
| tools: this.convertToolsForOpenAI(metadata?.tools), | ||
| tool_choice: metadata?.tool_choice, | ||
| parallel_tool_calls: metadata?.parallelToolCalls ?? true, | ||
| parallel_tool_calls: parallelToolCalls, | ||
| } | ||
|
|
||
| if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) { | ||
| params.draft_model = this.options.lmStudioDraftModelId | ||
| } | ||
|
|
||
| // For GLM-4.7+ models, add thinking mode support similar to Z.ai | ||
| if (glmOptions?.supportsThinking) { | ||
| const useReasoning = shouldUseReasoningEffort({ model: modelInfo, settings: this.options }) | ||
| params.thinking = useReasoning ? { type: "enabled" } : { type: "disabled" } | ||
| } | ||
|
Comment on lines
+119
to
+123
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Fix it with Roo Code or mention @roomote and request a fix. |
||
|
|
||
| let results | ||
| try { | ||
| results = await this.client.chat.completions.create(params) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Same issue as in
lm-studio.ts:shouldUseReasoningEffort()will always returnfalsefor models that use default model info (e.g., dynamic providers) becauseopenAiModelInfoSaneDefaultslackssupportsReasoningEffortandreasoningEffortproperties. The thinking parameter will always be set todisabledfor GLM-4.7+ models using generic OpenAI-compatible providers.Fix it with Roo Code or mention @roomote and request a fix.