Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 42 additions & 4 deletions src/api/providers/native-ollama.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ interface OllamaChatOptions {
num_ctx?: number
}

// Default timeout for Ollama requests (5 minutes to accommodate slow model loading)
// Ollama models can take 30-60+ seconds to load into memory on first use
const DEFAULT_OLLAMA_TIMEOUT_MS = 300_000 // 5 minutes
Comment on lines +17 to +19
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

DEFAULT_OLLAMA_TIMEOUT_MS is defined but never used. The constant and its comments suggest timeout handling exists for slow model loading, but it's not passed to the Ollama client anywhere. Either remove this dead code or apply the timeout to the client configuration if that was the intent.

Fix it with Roo Code or mention @roomote and request a fix.


function convertToOllamaMessages(anthropicMessages: Anthropic.Messages.MessageParam[]): Message[] {
const ollamaMessages: Message[] = []

Expand Down Expand Up @@ -158,9 +162,11 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
private ensureClient(): Ollama {
if (!this.client) {
try {
const baseUrl = this.options.ollamaBaseUrl || "http://localhost:11434"
console.log(`[Ollama] Creating client for host: ${baseUrl}`)

const clientOptions: OllamaOptions = {
host: this.options.ollamaBaseUrl || "http://localhost:11434",
// Note: The ollama npm package handles timeouts internally
host: baseUrl,
}

// Add API key if provided (for Ollama cloud or authenticated instances)
Expand All @@ -172,6 +178,7 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio

this.client = new Ollama(clientOptions)
} catch (error: any) {
console.error(`[Ollama] Error creating client: ${error.message}`)
throw new Error(`Error creating Ollama client: ${error.message}`)
}
}
Expand Down Expand Up @@ -205,8 +212,29 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
messages: Anthropic.Messages.MessageParam[],
metadata?: ApiHandlerCreateMessageMetadata,
): ApiStream {
const baseUrl = this.options.ollamaBaseUrl || "http://localhost:11434"
const requestStartTime = Date.now()

console.log(`[Ollama] createMessage: Starting request at ${new Date().toISOString()}`)

const client = this.ensureClient()
const { id: modelId } = await this.fetchModel()

console.log(`[Ollama] createMessage: Fetching model info...`)
const { id: modelId, info: modelInfo } = await this.fetchModel()
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

modelInfo is destructured here but never used in the method. Consider removing it to avoid the unused variable.

Suggested change
const { id: modelId, info: modelInfo } = await this.fetchModel()
const { id: modelId } = await this.fetchModel()

Fix it with Roo Code or mention @roomote and request a fix.

console.log(
`[Ollama] createMessage: Model '${modelId}' fetched in ${Date.now() - requestStartTime}ms, ` +
`found in cache: ${!!this.models[modelId]}`,
)

// Warn if model wasn't found in the tool-capable models list
if (!this.models[modelId]) {
console.warn(
`[Ollama] Warning: Model '${modelId}' was not found in the list of tool-capable models. ` +
`This may indicate the model does not support native tool calling, or your Ollama version ` +
`does not report capabilities. Check with: ollama show ${modelId}`,
)
}

const useR1Format = modelId.toLowerCase().includes("deepseek-r1")

const ollamaMessages: Message[] = [
Expand Down Expand Up @@ -234,15 +262,25 @@ export class NativeOllamaHandler extends BaseProvider implements SingleCompletio
chatOptions.num_ctx = this.options.ollamaNumCtx
}

const toolsToSend = this.convertToolsToOllama(metadata?.tools)
console.log(
`[Ollama] createMessage: Sending chat request to ${baseUrl}/api/chat with model '${modelId}', ` +
`${ollamaMessages.length} messages, ${toolsToSend?.length ?? 0} tools`,
)

const chatStartTime = Date.now()

// Create the actual API request promise
const stream = await client.chat({
model: modelId,
messages: ollamaMessages,
stream: true,
options: chatOptions,
tools: this.convertToolsToOllama(metadata?.tools),
tools: toolsToSend,
})

console.log(`[Ollama] createMessage: Stream started after ${Date.now() - chatStartTime}ms`)

let totalInputTokens = 0
let totalOutputTokens = 0
// Track tool calls across chunks (Ollama may send complete tool_calls in final chunk)
Expand Down
Loading