Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

90 changes: 90 additions & 0 deletions src/api/providers/__tests__/vscode-lm.spec.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import type { Mock } from "vitest"
import { checkModelSupportsImages, IMAGE_CAPABLE_MODEL_PREFIXES } from "../vscode-lm"

// Mocks must come first, before imports
vi.mock("vscode", () => {
Expand Down Expand Up @@ -537,3 +538,92 @@ describe("VsCodeLmHandler", () => {
})
})
})

describe("checkModelSupportsImages", () => {
describe("OpenAI GPT models", () => {
it("should return true for all gpt-* models (GitHub Copilot)", () => {
// All GPT models in GitHub Copilot support images
expect(checkModelSupportsImages("gpt", "gpt-4o")).toBe(true)
expect(checkModelSupportsImages("gpt", "gpt-4.1")).toBe(true)
expect(checkModelSupportsImages("gpt", "gpt-5")).toBe(true)
expect(checkModelSupportsImages("gpt", "gpt-5.1")).toBe(true)
expect(checkModelSupportsImages("gpt", "gpt-5.2")).toBe(true)
expect(checkModelSupportsImages("gpt-mini", "gpt-5-mini")).toBe(true)
expect(checkModelSupportsImages("gpt-codex", "gpt-5.1-codex")).toBe(true)
expect(checkModelSupportsImages("gpt-codex", "gpt-5.2-codex")).toBe(true)
expect(checkModelSupportsImages("gpt-codex", "gpt-5.1-codex-max")).toBe(true)
expect(checkModelSupportsImages("gpt-codex", "gpt-5.1-codex-mini")).toBe(true)
})

it("should return true for o1 and o3 reasoning models", () => {
expect(checkModelSupportsImages("o1", "o1-preview")).toBe(true)
expect(checkModelSupportsImages("o1", "o1-mini")).toBe(true)
expect(checkModelSupportsImages("o3", "o3")).toBe(true)
})
})

describe("Anthropic Claude models", () => {
it("should return true for all claude-* models (GitHub Copilot)", () => {
// All Claude models in GitHub Copilot support images
expect(checkModelSupportsImages("claude-haiku", "claude-haiku-4.5")).toBe(true)
expect(checkModelSupportsImages("claude-opus", "claude-opus-4.5")).toBe(true)
expect(checkModelSupportsImages("claude-sonnet", "claude-sonnet-4")).toBe(true)
expect(checkModelSupportsImages("claude-sonnet", "claude-sonnet-4.5")).toBe(true)
})
})

describe("Google Gemini models", () => {
it("should return true for all gemini-* models (GitHub Copilot)", () => {
// All Gemini models in GitHub Copilot support images
expect(checkModelSupportsImages("gemini-pro", "gemini-2.5-pro")).toBe(true)
expect(checkModelSupportsImages("gemini-flash", "gemini-3-flash-preview")).toBe(true)
expect(checkModelSupportsImages("gemini-pro", "gemini-3-pro-preview")).toBe(true)
})
})

describe("non-vision models", () => {
it("should return false for grok models (text-only in GitHub Copilot)", () => {
// Grok is the only model family in GitHub Copilot that doesn't support images
expect(checkModelSupportsImages("grok", "grok-code-fast-1")).toBe(false)
})

it("should return false for models with non-matching prefixes", () => {
// Models that don't start with gpt, claude, gemini, o1, or o3
expect(checkModelSupportsImages("mistral", "mistral-large")).toBe(false)
expect(checkModelSupportsImages("llama", "llama-3-70b")).toBe(false)
expect(checkModelSupportsImages("unknown", "some-random-model")).toBe(false)
})
})

describe("case insensitivity", () => {
it("should match regardless of case", () => {
expect(checkModelSupportsImages("GPT", "GPT-4O")).toBe(true)
expect(checkModelSupportsImages("CLAUDE", "CLAUDE-SONNET-4")).toBe(true)
expect(checkModelSupportsImages("GEMINI", "GEMINI-2.5-PRO")).toBe(true)
})
})

describe("prefix matching", () => {
it("should only match IDs that start with known prefixes", () => {
// ID must START with the prefix, not just contain it
expect(checkModelSupportsImages("custom", "gpt-4o")).toBe(true) // ID starts with gpt
expect(checkModelSupportsImages("custom", "my-gpt-model")).toBe(false) // gpt not at start
expect(checkModelSupportsImages("custom", "not-claude-model")).toBe(false) // claude not at start
})
})
})

describe("IMAGE_CAPABLE_MODEL_PREFIXES", () => {
it("should export the model prefixes array", () => {
expect(Array.isArray(IMAGE_CAPABLE_MODEL_PREFIXES)).toBe(true)
expect(IMAGE_CAPABLE_MODEL_PREFIXES.length).toBeGreaterThan(0)
})

it("should include key model prefixes", () => {
expect(IMAGE_CAPABLE_MODEL_PREFIXES).toContain("gpt")
expect(IMAGE_CAPABLE_MODEL_PREFIXES).toContain("claude")
expect(IMAGE_CAPABLE_MODEL_PREFIXES).toContain("gemini")
expect(IMAGE_CAPABLE_MODEL_PREFIXES).toContain("o1")
expect(IMAGE_CAPABLE_MODEL_PREFIXES).toContain("o3")
})
})
45 changes: 42 additions & 3 deletions src/api/providers/vscode-lm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -529,14 +529,18 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan

const modelId = this.client.id || modelParts.join(SELECTOR_SEPARATOR)

// Check if the model supports images based on known model families
// VS Code Language Model API 1.106+ supports image inputs via LanguageModelDataPart
const supportsImages = checkModelSupportsImages(this.client.family, this.client.id)

// Build model info with conservative defaults for missing values
const modelInfo: ModelInfo = {
maxTokens: -1, // Unlimited tokens by default
contextWindow:
typeof this.client.maxInputTokens === "number"
? Math.max(0, this.client.maxInputTokens)
: openAiModelInfoSaneDefaults.contextWindow,
supportsImages: false, // VSCode Language Model API currently doesn't support image inputs
supportsImages,
supportsPromptCache: true,
inputPrice: 0,
outputPrice: 0,
Expand Down Expand Up @@ -586,8 +590,43 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
}
}

// Static blacklist of VS Code Language Model IDs that should be excluded from the model list e.g. because they will never work
const VSCODE_LM_STATIC_BLACKLIST: string[] = ["claude-3.7-sonnet", "claude-3.7-sonnet-thought"]
/**
* Model ID prefixes that support image inputs via VS Code Language Model API.
* These models support the LanguageModelDataPart.image() API introduced in VS Code 1.106+.
*
* All GitHub Copilot models with these prefixes support images.
* Only grok-* models don't support images (text only).
*
* Source: https://models.dev/api.json (github-copilot provider models)
*/
export const IMAGE_CAPABLE_MODEL_PREFIXES = [
"gpt", // All GPT models (gpt-4o, gpt-4.1, gpt-5, gpt-5.1, gpt-5.2, gpt-5-mini, gpt-5.1-codex, etc.)
"claude", // All Claude models (claude-haiku-4.5, claude-opus-4.5, claude-sonnet-4, claude-sonnet-4.5)
"gemini", // All Gemini models (gemini-2.5-pro, gemini-3-flash-preview, gemini-3-pro-preview)
"o1", // OpenAI o1 reasoning models
"o3", // OpenAI o3 reasoning models
]

/**
* Checks if a model supports image inputs based on its model ID.
* Uses prefix matching against known image-capable model families.
*
* @param _family The model family (unused, kept for API compatibility)
* @param id The model ID
* @returns true if the model supports image inputs
*/
export function checkModelSupportsImages(_family: string, id: string): boolean {
const idLower = id.toLowerCase()
return IMAGE_CAPABLE_MODEL_PREFIXES.some((prefix) => idLower.startsWith(prefix))
}

// Static blacklist of VS Code Language Model IDs that should be excluded from the model list
// e.g. because they don't support native tool calling or will never work
const VSCODE_LM_STATIC_BLACKLIST: string[] = [
"claude-3.7-sonnet",
"claude-3.7-sonnet-thought",
"claude-opus-41", // Does not support native tool calling
]

export async function getVsCodeLmModels() {
try {
Expand Down
154 changes: 149 additions & 5 deletions src/api/transform/__tests__/vscode-lm-format.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,13 @@ interface MockLanguageModelToolCallPart {
interface MockLanguageModelToolResultPart {
type: "tool_result"
callId: string
content: MockLanguageModelTextPart[]
content: (MockLanguageModelTextPart | MockLanguageModelDataPart)[]
}

interface MockLanguageModelDataPart {
type: "data"
data: Uint8Array
mimeType: string
}

// Mock vscode namespace
Expand Down Expand Up @@ -54,10 +60,32 @@ vitest.mock("vscode", () => {
type = "tool_result"
constructor(
public callId: string,
public content: MockLanguageModelTextPart[],
public content: (MockLanguageModelTextPart | MockLanguageModelDataPart)[],
) {}
}

class MockLanguageModelDataPart {
type = "data"
constructor(
public data: Uint8Array,
public mimeType: string,
) {}

static image(data: Uint8Array, mime: string) {
return new MockLanguageModelDataPart(data, mime)
}

static json(value: any, mime?: string) {
const bytes = new TextEncoder().encode(JSON.stringify(value))
return new MockLanguageModelDataPart(bytes, mime || "application/json")
}

static text(value: string, mime?: string) {
const bytes = new TextEncoder().encode(value)
return new MockLanguageModelDataPart(bytes, mime || "text/plain")
}
}

return {
LanguageModelChatMessage: {
Assistant: vitest.fn((content) => ({
Expand All @@ -75,6 +103,7 @@ vitest.mock("vscode", () => {
LanguageModelTextPart: MockLanguageModelTextPart,
LanguageModelToolCallPart: MockLanguageModelToolCallPart,
LanguageModelToolResultPart: MockLanguageModelToolResultPart,
LanguageModelDataPart: MockLanguageModelDataPart,
}
})

Expand Down Expand Up @@ -150,7 +179,7 @@ describe("convertToVsCodeLmMessages", () => {
expect(toolCall.type).toBe("tool_call")
})

it("should handle image blocks with appropriate placeholders", () => {
it("should convert image blocks to LanguageModelDataPart", () => {
const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
Expand All @@ -161,7 +190,7 @@ describe("convertToVsCodeLmMessages", () => {
source: {
type: "base64",
media_type: "image/png",
data: "base64data",
data: "dGVzdA==", // "test" in base64
},
},
],
Expand All @@ -171,8 +200,123 @@ describe("convertToVsCodeLmMessages", () => {
const result = convertToVsCodeLmMessages(messages)

expect(result).toHaveLength(1)
expect(result[0].content).toHaveLength(2)

// First part should be text
const textPart = result[0].content[0] as MockLanguageModelTextPart
expect(textPart.type).toBe("text")
expect(textPart.value).toBe("Look at this:")

// Second part should be a LanguageModelDataPart for the image
const imagePart = result[0].content[1] as unknown as MockLanguageModelDataPart
expect(imagePart.type).toBe("data")
expect(imagePart.mimeType).toBe("image/png")
expect(imagePart.data).toBeInstanceOf(Uint8Array)
})

it("should handle images in tool results", () => {
const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: [
{
type: "tool_result",
tool_use_id: "tool-1",
content: [
{ type: "text", text: "Screenshot result:" },
{
type: "image",
source: {
type: "base64",
media_type: "image/jpeg",
data: "dGVzdA==",
},
},
],
},
],
},
]

const result = convertToVsCodeLmMessages(messages)

expect(result).toHaveLength(1)
expect(result[0].content).toHaveLength(1)

const toolResult = result[0].content[0] as MockLanguageModelToolResultPart
expect(toolResult.type).toBe("tool_result")
expect(toolResult.content).toHaveLength(2)

// First item in tool result should be text
const textPart = toolResult.content[0] as MockLanguageModelTextPart
expect(textPart.type).toBe("text")

// Second item should be image data
const imagePart = toolResult.content[1] as MockLanguageModelDataPart
expect(imagePart.type).toBe("data")
expect(imagePart.mimeType).toBe("image/jpeg")
})

it("should return text placeholder for URL-based images", () => {
const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: [
{ type: "text", text: "Check this image:" },
{
type: "image",
source: {
type: "url",
url: "https://example.com/image.png",
} as any,
},
],
},
]

const result = convertToVsCodeLmMessages(messages)

expect(result).toHaveLength(1)
expect(result[0].content).toHaveLength(2)

// First part should be text
const textPart = result[0].content[0] as MockLanguageModelTextPart
expect(textPart.type).toBe("text")
expect(textPart.value).toBe("Check this image:")

// Second part should be a text placeholder (not an empty DataPart)
const imagePlaceholder = result[0].content[1] as MockLanguageModelTextPart
expect(imagePlaceholder.value).toContain("[Image (base64): image/png not supported by VSCode LM API]")
expect(imagePlaceholder.type).toBe("text")
expect(imagePlaceholder.value).toContain("URL not supported")
expect(imagePlaceholder.value).toContain("https://example.com/image.png")
})

it("should return text placeholder for unknown image source types", () => {
const messages: Anthropic.Messages.MessageParam[] = [
{
role: "user",
content: [
{
type: "image",
source: {
type: "unknown",
media_type: "image/png",
data: "", // Required by type but ignored for unknown source types
} as any,
},
],
},
]

const result = convertToVsCodeLmMessages(messages)

expect(result).toHaveLength(1)
expect(result[0].content).toHaveLength(1)

// Should return a text placeholder for unknown source types
const placeholder = result[0].content[0] as MockLanguageModelTextPart
expect(placeholder.type).toBe("text")
expect(placeholder.value).toContain("unsupported source type")
})
})

Expand Down
Loading
Loading