Skip to main content

construct/providers/
ollama.rs

1use crate::multimodal;
2use crate::providers::traits::{
3    ChatMessage, ChatResponse, Provider, ProviderCapabilities, TokenUsage, ToolCall,
4};
5use async_trait::async_trait;
6use reqwest::Client;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9
10pub struct OllamaProvider {
11    base_url: String,
12    api_key: Option<String>,
13    reasoning_enabled: Option<bool>,
14}
15
16// ─── Request Structures ───────────────────────────────────────────────────────
17
18#[derive(Debug, Serialize)]
19struct ChatRequest {
20    model: String,
21    messages: Vec<Message>,
22    stream: bool,
23    options: Options,
24    #[serde(skip_serializing_if = "Option::is_none")]
25    think: Option<bool>,
26    #[serde(skip_serializing_if = "Option::is_none")]
27    tools: Option<Vec<serde_json::Value>>,
28}
29
30#[derive(Debug, Clone, Serialize)]
31struct Message {
32    role: String,
33    #[serde(skip_serializing_if = "Option::is_none")]
34    content: Option<String>,
35    #[serde(skip_serializing_if = "Option::is_none")]
36    images: Option<Vec<String>>,
37    #[serde(skip_serializing_if = "Option::is_none")]
38    tool_calls: Option<Vec<OutgoingToolCall>>,
39    #[serde(skip_serializing_if = "Option::is_none")]
40    tool_name: Option<String>,
41}
42
43#[derive(Debug, Clone, Serialize)]
44struct OutgoingToolCall {
45    #[serde(rename = "type")]
46    kind: String,
47    function: OutgoingFunction,
48}
49
50#[derive(Debug, Clone, Serialize)]
51struct OutgoingFunction {
52    name: String,
53    arguments: serde_json::Value,
54}
55
56#[derive(Debug, Serialize)]
57struct Options {
58    temperature: f64,
59}
60
61// ─── Response Structures ──────────────────────────────────────────────────────
62
63#[derive(Debug, Deserialize)]
64struct ApiChatResponse {
65    message: ResponseMessage,
66    #[serde(default)]
67    prompt_eval_count: Option<u64>,
68    #[serde(default)]
69    eval_count: Option<u64>,
70}
71
72#[derive(Debug, Deserialize)]
73struct ResponseMessage {
74    #[serde(default)]
75    content: String,
76    #[serde(default)]
77    tool_calls: Vec<OllamaToolCall>,
78    /// Some models return a "thinking" field with internal reasoning
79    #[serde(default)]
80    thinking: Option<String>,
81}
82
83#[derive(Debug, Deserialize)]
84struct OllamaToolCall {
85    id: Option<String>,
86    function: OllamaFunction,
87}
88
89#[derive(Debug, Deserialize)]
90struct OllamaFunction {
91    name: String,
92    #[serde(default, deserialize_with = "deserialize_args")]
93    arguments: serde_json::Value,
94}
95
96// ─── serde Helpers ───────────────────────────────────────────────────────────
97fn deserialize_args<'de, D>(deserializer: D) -> Result<serde_json::Value, D::Error>
98where
99    D: serde::Deserializer<'de>,
100{
101    let value = serde_json::Value::deserialize(deserializer)?;
102
103    if let Some(s) = value.as_str() {
104        match serde_json::from_str::<serde_json::Value>(s) {
105            Ok(v) => Ok(v),
106            Err(_) => Ok(serde_json::json!({})),
107        }
108    } else {
109        Ok(value)
110    }
111}
112// ─── Implementation ───────────────────────────────────────────────────────────
113
114impl OllamaProvider {
115    fn normalize_base_url(raw_url: &str) -> String {
116        let trimmed = raw_url.trim().trim_end_matches('/');
117        if trimmed.is_empty() {
118            return String::new();
119        }
120
121        trimmed
122            .strip_suffix("/api/chat")
123            .or_else(|| trimmed.strip_suffix("/api"))
124            .unwrap_or(trimmed)
125            .trim_end_matches('/')
126            .to_string()
127    }
128
129    pub fn new(base_url: Option<&str>, api_key: Option<&str>) -> Self {
130        Self::new_with_reasoning(base_url, api_key, None)
131    }
132
133    pub fn new_with_reasoning(
134        base_url: Option<&str>,
135        api_key: Option<&str>,
136        reasoning_enabled: Option<bool>,
137    ) -> Self {
138        let api_key = api_key.and_then(|value| {
139            let trimmed = value.trim();
140            (!trimmed.is_empty()).then(|| trimmed.to_string())
141        });
142
143        Self {
144            base_url: Self::normalize_base_url(base_url.unwrap_or("http://localhost:11434")),
145            api_key,
146            reasoning_enabled,
147        }
148    }
149
150    fn is_local_endpoint(&self) -> bool {
151        reqwest::Url::parse(&self.base_url)
152            .ok()
153            .and_then(|url| url.host_str().map(|host| host.to_string()))
154            .is_some_and(|host| matches!(host.as_str(), "localhost" | "127.0.0.1" | "::1"))
155    }
156
157    fn http_client(&self) -> Client {
158        crate::config::build_runtime_proxy_client_with_timeouts("provider.ollama", 300, 10)
159    }
160
161    fn resolve_request_details(&self, model: &str) -> anyhow::Result<(String, bool)> {
162        let requests_cloud = model.ends_with(":cloud");
163        let normalized_model = model.strip_suffix(":cloud").unwrap_or(model).to_string();
164
165        if requests_cloud && self.is_local_endpoint() {
166            anyhow::bail!(
167                "Model '{}' requested cloud routing, but Ollama endpoint is local. Configure api_url with a remote Ollama endpoint.",
168                model
169            );
170        }
171
172        if requests_cloud && self.api_key.is_none() {
173            anyhow::bail!(
174                "Model '{}' requested cloud routing, but no API key is configured. Set OLLAMA_API_KEY or config api_key.",
175                model
176            );
177        }
178
179        let should_auth = self.api_key.is_some() && !self.is_local_endpoint();
180
181        Ok((normalized_model, should_auth))
182    }
183
184    fn parse_tool_arguments(arguments: &str) -> serde_json::Value {
185        serde_json::from_str(arguments).unwrap_or_else(|_| serde_json::json!({}))
186    }
187
188    fn normalize_response_text(content: String) -> Option<String> {
189        let stripped = Self::strip_think_tags(&content);
190        if stripped.trim().is_empty() {
191            None
192        } else {
193            Some(stripped)
194        }
195    }
196
197    /// Remove `<think>...</think>` blocks from model output.
198    /// Qwen and other reasoning models may embed chain-of-thought inline
199    /// in the `content` field using `<think>` tags.  These must be stripped
200    /// before returning text to the user or parsing for tool calls.
201    fn strip_think_tags(s: &str) -> String {
202        let mut result = String::with_capacity(s.len());
203        let mut rest = s;
204        loop {
205            if let Some(start) = rest.find("<think>") {
206                result.push_str(&rest[..start]);
207                if let Some(end) = rest[start..].find("</think>") {
208                    rest = &rest[start + end + "</think>".len()..];
209                } else {
210                    // Unclosed tag: drop the rest to avoid leaking partial reasoning.
211                    break;
212                }
213            } else {
214                result.push_str(rest);
215                break;
216            }
217        }
218        result.trim().to_string()
219    }
220
221    /// Derive the effective text content from a response, stripping `<think>` tags
222    /// and falling back to the `thinking` field when `content` is empty after
223    /// stripping.  This ensures that tool-call XML tags embedded alongside (or
224    /// after) thinking blocks are preserved for downstream parsing.
225    fn effective_content(content: &str, thinking: Option<&str>) -> Option<String> {
226        // First try the content field with think tags stripped.
227        let stripped = Self::strip_think_tags(content);
228        if !stripped.trim().is_empty() {
229            return Some(stripped);
230        }
231
232        // Content was empty or only thinking — check the thinking field.
233        // Some models (Qwen) put the full output including tool-call XML in
234        // the thinking field when `think: true` is set.
235        if let Some(thinking) = thinking.map(str::trim).filter(|t| !t.is_empty()) {
236            let stripped_thinking = Self::strip_think_tags(thinking);
237            if !stripped_thinking.trim().is_empty() {
238                tracing::debug!(
239                    "Ollama: using thinking field as effective content ({} chars)",
240                    stripped_thinking.len()
241                );
242                return Some(stripped_thinking);
243            }
244        }
245
246        None
247    }
248
249    fn fallback_text_for_empty_content(model: &str, thinking: Option<&str>) -> String {
250        if let Some(thinking) = thinking.map(str::trim).filter(|value| !value.is_empty()) {
251            let thinking_log_excerpt: String = thinking.chars().take(100).collect();
252            let thinking_reply_excerpt: String = thinking.chars().take(200).collect();
253            tracing::warn!(
254                "Ollama returned empty content with only thinking for model '{}': '{}'. Model may have stopped prematurely.",
255                model,
256                thinking_log_excerpt
257            );
258            return format!(
259                "I was thinking about this: {}... but I didn't complete my response. Could you try asking again?",
260                thinking_reply_excerpt
261            );
262        }
263
264        tracing::warn!(
265            "Ollama returned empty or whitespace content with no tool calls for model '{}'",
266            model
267        );
268        "I couldn't get a complete response from Ollama. Please try again or switch to a different model."
269            .to_string()
270    }
271
272    fn build_chat_request(
273        &self,
274        messages: Vec<Message>,
275        model: &str,
276        temperature: f64,
277        tools: Option<&[serde_json::Value]>,
278    ) -> ChatRequest {
279        self.build_chat_request_with_think(
280            messages,
281            model,
282            temperature,
283            tools,
284            self.reasoning_enabled,
285        )
286    }
287
288    /// Build a chat request with an explicit `think` value.
289    fn build_chat_request_with_think(
290        &self,
291        messages: Vec<Message>,
292        model: &str,
293        temperature: f64,
294        tools: Option<&[serde_json::Value]>,
295        think: Option<bool>,
296    ) -> ChatRequest {
297        ChatRequest {
298            model: model.to_string(),
299            messages,
300            stream: false,
301            options: Options { temperature },
302            think,
303            tools: tools.map(|t| t.to_vec()),
304        }
305    }
306
307    fn convert_user_message_content(&self, content: &str) -> (Option<String>, Option<Vec<String>>) {
308        let (cleaned, image_refs) = multimodal::parse_image_markers(content);
309        if image_refs.is_empty() {
310            return (Some(content.to_string()), None);
311        }
312
313        let images: Vec<String> = image_refs
314            .iter()
315            .filter_map(|reference| multimodal::extract_ollama_image_payload(reference))
316            .collect();
317
318        if images.is_empty() {
319            return (Some(content.to_string()), None);
320        }
321
322        let cleaned = cleaned.trim();
323        let content = if cleaned.is_empty() {
324            None
325        } else {
326            Some(cleaned.to_string())
327        };
328
329        (content, Some(images))
330    }
331
332    /// Convert internal chat history format to Ollama's native tool-call message schema.
333    ///
334    /// `run_tool_call_loop` stores native assistant/tool entries as JSON strings in
335    /// `ChatMessage.content`. We decode those payloads here so follow-up requests send
336    /// structured `assistant.tool_calls` and `tool.tool_name`, as expected by Ollama.
337    fn convert_messages(&self, messages: &[ChatMessage]) -> Vec<Message> {
338        let mut tool_name_by_id: HashMap<String, String> = HashMap::new();
339
340        messages
341            .iter()
342            .map(|message| {
343                if message.role == "assistant" {
344                    if let Ok(value) = serde_json::from_str::<serde_json::Value>(&message.content) {
345                        if let Some(tool_calls_value) = value.get("tool_calls") {
346                            if let Ok(parsed_calls) =
347                                serde_json::from_value::<Vec<ToolCall>>(tool_calls_value.clone())
348                            {
349                                let outgoing_calls: Vec<OutgoingToolCall> = parsed_calls
350                                    .into_iter()
351                                    .map(|call| {
352                                        tool_name_by_id.insert(call.id.clone(), call.name.clone());
353                                        OutgoingToolCall {
354                                            kind: "function".to_string(),
355                                            function: OutgoingFunction {
356                                                name: call.name,
357                                                arguments: Self::parse_tool_arguments(
358                                                    &call.arguments,
359                                                ),
360                                            },
361                                        }
362                                    })
363                                    .collect();
364                                let content = value
365                                    .get("content")
366                                    .and_then(serde_json::Value::as_str)
367                                    .map(ToString::to_string);
368                                return Message {
369                                    role: "assistant".to_string(),
370                                    content,
371                                    images: None,
372                                    tool_calls: Some(outgoing_calls),
373                                    tool_name: None,
374                                };
375                            }
376                        }
377                    }
378                }
379
380                if message.role == "tool" {
381                    if let Ok(value) = serde_json::from_str::<serde_json::Value>(&message.content) {
382                        let tool_name = value
383                            .get("tool_name")
384                            .and_then(serde_json::Value::as_str)
385                            .map(ToString::to_string)
386                            .or_else(|| {
387                                value
388                                    .get("tool_call_id")
389                                    .and_then(serde_json::Value::as_str)
390                                    .and_then(|id| tool_name_by_id.get(id))
391                                    .cloned()
392                            });
393                        let content = value
394                            .get("content")
395                            .and_then(serde_json::Value::as_str)
396                            .map(ToString::to_string)
397                            .or_else(|| {
398                                (!message.content.trim().is_empty())
399                                    .then_some(message.content.clone())
400                            });
401
402                        return Message {
403                            role: "tool".to_string(),
404                            content,
405                            images: None,
406                            tool_calls: None,
407                            tool_name,
408                        };
409                    }
410                }
411
412                if message.role == "user" {
413                    let (content, images) = self.convert_user_message_content(&message.content);
414                    return Message {
415                        role: "user".to_string(),
416                        content,
417                        images,
418                        tool_calls: None,
419                        tool_name: None,
420                    };
421                }
422
423                Message {
424                    role: message.role.clone(),
425                    content: Some(message.content.clone()),
426                    images: None,
427                    tool_calls: None,
428                    tool_name: None,
429                }
430            })
431            .collect()
432    }
433
434    /// Send a single HTTP request to Ollama and parse the response.
435    async fn send_request_inner(
436        &self,
437        messages: &[Message],
438        model: &str,
439        temperature: f64,
440        should_auth: bool,
441        tools: Option<&[serde_json::Value]>,
442        think: Option<bool>,
443    ) -> anyhow::Result<ApiChatResponse> {
444        let request =
445            self.build_chat_request_with_think(messages.to_vec(), model, temperature, tools, think);
446
447        let url = format!("{}/api/chat", self.base_url);
448
449        tracing::debug!(
450            "Ollama request: url={} model={} message_count={} temperature={} think={:?} tool_count={}",
451            url,
452            model,
453            request.messages.len(),
454            temperature,
455            request.think,
456            request.tools.as_ref().map_or(0, |t| t.len()),
457        );
458
459        let mut request_builder = self.http_client().post(&url).json(&request);
460
461        if should_auth {
462            if let Some(key) = self.api_key.as_ref() {
463                request_builder = request_builder.bearer_auth(key);
464            }
465        }
466
467        let response = match request_builder.send().await {
468            Ok(r) => r,
469            Err(e) => {
470                tracing::error!(
471                    url = %url,
472                    model = model,
473                    "Ollama HTTP send failed: {:#} (debug: {:?})",
474                    e,
475                    e,
476                );
477                return Err(e.into());
478            }
479        };
480        let status = response.status();
481        tracing::debug!("Ollama response status: {}", status);
482
483        let body = response.bytes().await?;
484        tracing::debug!("Ollama response body length: {} bytes", body.len());
485
486        if !status.is_success() {
487            let raw = String::from_utf8_lossy(&body);
488            let sanitized = super::sanitize_api_error(&raw);
489            tracing::error!(
490                "Ollama error response: status={} body_excerpt={}",
491                status,
492                sanitized
493            );
494            anyhow::bail!(
495                "Ollama API error ({}): {}. Is Ollama running? (brew install ollama && ollama serve)",
496                status,
497                sanitized
498            );
499        }
500
501        let chat_response: ApiChatResponse = match serde_json::from_slice(&body) {
502            Ok(r) => r,
503            Err(e) => {
504                let raw = String::from_utf8_lossy(&body);
505                let sanitized = super::sanitize_api_error(&raw);
506                tracing::error!(
507                    "Ollama response deserialization failed: {e}. body_excerpt={}",
508                    sanitized
509                );
510                anyhow::bail!("Failed to parse Ollama response: {e}");
511            }
512        };
513
514        Ok(chat_response)
515    }
516
517    /// Send a request to Ollama and get the parsed response.
518    /// Pass `tools` to enable native function-calling for models that support it.
519    ///
520    /// When `reasoning_enabled` (`think`) is set to `true`, the first request
521    /// includes `think: true`.  If that request fails (the model may not support
522    /// the `think` parameter), we automatically retry once with `think` omitted
523    /// so the call succeeds instead of entering an infinite retry loop.
524    async fn send_request(
525        &self,
526        messages: Vec<Message>,
527        model: &str,
528        temperature: f64,
529        should_auth: bool,
530        tools: Option<&[serde_json::Value]>,
531    ) -> anyhow::Result<ApiChatResponse> {
532        let result = self
533            .send_request_inner(
534                &messages,
535                model,
536                temperature,
537                should_auth,
538                tools,
539                self.reasoning_enabled,
540            )
541            .await;
542
543        match result {
544            Ok(resp) => Ok(resp),
545            Err(first_err) if self.reasoning_enabled == Some(true) => {
546                tracing::warn!(
547                    model = model,
548                    error = %first_err,
549                    "Ollama request failed with think=true; retrying without reasoning \
550                     (model may not support it)"
551                );
552                // Retry with think omitted from the request entirely.
553                self.send_request_inner(&messages, model, temperature, should_auth, tools, None)
554                    .await
555                    .map_err(|retry_err| {
556                        // Both attempts failed — return the original error for clarity.
557                        tracing::error!(
558                            model = model,
559                            original_error = %first_err,
560                            retry_error = %retry_err,
561                            "Ollama request also failed without think; returning original error"
562                        );
563                        first_err
564                    })
565            }
566            Err(e) => Err(e),
567        }
568    }
569
570    /// Convert Ollama tool calls to the JSON format expected by parse_tool_calls in loop_.rs
571    ///
572    /// Handles quirky model behavior where tool calls are wrapped:
573    /// - `{"name": "tool_call", "arguments": {"name": "shell", "arguments": {...}}}`
574    /// - `{"name": "tool.shell", "arguments": {...}}`
575    fn format_tool_calls_for_loop(&self, tool_calls: &[OllamaToolCall]) -> String {
576        let formatted_calls: Vec<serde_json::Value> = tool_calls
577            .iter()
578            .map(|tc| {
579                let (tool_name, tool_args) = self.extract_tool_name_and_args(tc);
580
581                // Arguments must be a JSON string for parse_tool_calls compatibility
582                let args_str =
583                    serde_json::to_string(&tool_args).unwrap_or_else(|_| "{}".to_string());
584
585                serde_json::json!({
586                    "id": tc.id,
587                    "type": "function",
588                    "function": {
589                        "name": tool_name,
590                        "arguments": args_str
591                    }
592                })
593            })
594            .collect();
595
596        serde_json::json!({
597            "content": "",
598            "tool_calls": formatted_calls
599        })
600        .to_string()
601    }
602
603    /// Extract the actual tool name and arguments from potentially nested structures
604    fn extract_tool_name_and_args(&self, tc: &OllamaToolCall) -> (String, serde_json::Value) {
605        let name = &tc.function.name;
606        let args = &tc.function.arguments;
607
608        // Pattern 1: Nested tool_call wrapper (various malformed versions)
609        // {"name": "tool_call", "arguments": {"name": "shell", "arguments": {"command": "date"}}}
610        // {"name": "tool_call><json", "arguments": {"name": "shell", ...}}
611        // {"name": "tool.call", "arguments": {"name": "shell", ...}}
612        if name == "tool_call"
613            || name == "tool.call"
614            || name.starts_with("tool_call>")
615            || name.starts_with("tool_call<")
616        {
617            if let Some(nested_name) = args.get("name").and_then(|v| v.as_str()) {
618                let nested_args = args
619                    .get("arguments")
620                    .cloned()
621                    .unwrap_or(serde_json::json!({}));
622                tracing::debug!(
623                    "Unwrapped nested tool call: {} -> {} with args {:?}",
624                    name,
625                    nested_name,
626                    nested_args
627                );
628                return (nested_name.to_string(), nested_args);
629            }
630        }
631
632        // Pattern 2: Prefixed tool name (tool.shell, tool.file_read, etc.)
633        if let Some(stripped) = name.strip_prefix("tool.") {
634            return (stripped.to_string(), args.clone());
635        }
636
637        // Pattern 3: Normal tool call
638        (name.clone(), args.clone())
639    }
640}
641
642#[async_trait]
643impl Provider for OllamaProvider {
644    fn capabilities(&self) -> ProviderCapabilities {
645        ProviderCapabilities {
646            native_tool_calling: false,
647            vision: true,
648            prompt_caching: false,
649        }
650    }
651
652    async fn chat_with_system(
653        &self,
654        system_prompt: Option<&str>,
655        message: &str,
656        model: &str,
657        temperature: f64,
658    ) -> anyhow::Result<String> {
659        let (normalized_model, should_auth) = self.resolve_request_details(model)?;
660
661        let mut messages = Vec::new();
662
663        if let Some(sys) = system_prompt {
664            messages.push(Message {
665                role: "system".to_string(),
666                content: Some(sys.to_string()),
667                images: None,
668                tool_calls: None,
669                tool_name: None,
670            });
671        }
672
673        let (user_content, user_images) = self.convert_user_message_content(message);
674        messages.push(Message {
675            role: "user".to_string(),
676            content: user_content,
677            images: user_images,
678            tool_calls: None,
679            tool_name: None,
680        });
681
682        let response = self
683            .send_request(messages, &normalized_model, temperature, should_auth, None)
684            .await?;
685
686        // If model returned tool calls, format them for loop_.rs's parse_tool_calls
687        if !response.message.tool_calls.is_empty() {
688            tracing::debug!(
689                "Ollama returned {} tool call(s), formatting for loop parser",
690                response.message.tool_calls.len()
691            );
692            return Ok(self.format_tool_calls_for_loop(&response.message.tool_calls));
693        }
694
695        // Plain text response — strip <think> tags and fall back to thinking field.
696        if let Some(content) = Self::effective_content(
697            &response.message.content,
698            response.message.thinking.as_deref(),
699        ) {
700            return Ok(content);
701        }
702
703        Ok(Self::fallback_text_for_empty_content(
704            &normalized_model,
705            response.message.thinking.as_deref(),
706        ))
707    }
708
709    async fn chat_with_history(
710        &self,
711        messages: &[crate::providers::ChatMessage],
712        model: &str,
713        temperature: f64,
714    ) -> anyhow::Result<String> {
715        let (normalized_model, should_auth) = self.resolve_request_details(model)?;
716
717        let api_messages = self.convert_messages(messages);
718
719        let response = self
720            .send_request(
721                api_messages,
722                &normalized_model,
723                temperature,
724                should_auth,
725                None,
726            )
727            .await?;
728
729        // If model returned tool calls, format them for loop_.rs's parse_tool_calls
730        if !response.message.tool_calls.is_empty() {
731            tracing::debug!(
732                "Ollama returned {} tool call(s), formatting for loop parser",
733                response.message.tool_calls.len()
734            );
735            return Ok(self.format_tool_calls_for_loop(&response.message.tool_calls));
736        }
737
738        // Plain text response — strip <think> tags and fall back to thinking field.
739        if let Some(content) = Self::effective_content(
740            &response.message.content,
741            response.message.thinking.as_deref(),
742        ) {
743            return Ok(content);
744        }
745
746        Ok(Self::fallback_text_for_empty_content(
747            &normalized_model,
748            response.message.thinking.as_deref(),
749        ))
750    }
751
752    async fn chat_with_tools(
753        &self,
754        messages: &[ChatMessage],
755        tools: &[serde_json::Value],
756        model: &str,
757        temperature: f64,
758    ) -> anyhow::Result<ChatResponse> {
759        let (normalized_model, should_auth) = self.resolve_request_details(model)?;
760
761        let api_messages = self.convert_messages(messages);
762
763        // Tools arrive pre-formatted in OpenAI/Ollama-compatible JSON from
764        // tools_to_openai_format() in loop_.rs — pass them through directly.
765        let tools_opt = if tools.is_empty() { None } else { Some(tools) };
766
767        let response = self
768            .send_request(
769                api_messages,
770                &normalized_model,
771                temperature,
772                should_auth,
773                tools_opt,
774            )
775            .await?;
776
777        let usage = if response.prompt_eval_count.is_some() || response.eval_count.is_some() {
778            Some(TokenUsage {
779                input_tokens: response.prompt_eval_count,
780                output_tokens: response.eval_count,
781                cached_input_tokens: None,
782            })
783        } else {
784            None
785        };
786
787        // Native tool calls returned by the model.
788        if !response.message.tool_calls.is_empty() {
789            let tool_calls: Vec<ToolCall> = response
790                .message
791                .tool_calls
792                .iter()
793                .map(|tc| {
794                    let (name, args) = self.extract_tool_name_and_args(tc);
795                    ToolCall {
796                        id: tc
797                            .id
798                            .clone()
799                            .unwrap_or_else(|| uuid::Uuid::new_v4().to_string()),
800                        name,
801                        arguments: serde_json::to_string(&args)
802                            .unwrap_or_else(|_| "{}".to_string()),
803                    }
804                })
805                .collect();
806            let text = Self::normalize_response_text(response.message.content);
807            return Ok(ChatResponse {
808                text,
809                tool_calls,
810                usage,
811                reasoning_content: None,
812            });
813        }
814
815        // No native tool calls — use the effective content (content with
816        // `<think>` tags stripped, falling back to thinking field).
817        // The loop_.rs `parse_tool_calls` will extract any XML-style tool
818        // calls from the text, so preserve `<tool_call>` tags here.
819        let effective = Self::effective_content(
820            &response.message.content,
821            response.message.thinking.as_deref(),
822        );
823        let text = if let Some(content) = effective {
824            content
825        } else {
826            Self::fallback_text_for_empty_content(
827                &normalized_model,
828                response.message.thinking.as_deref(),
829            )
830        };
831        Ok(ChatResponse {
832            text: Some(text),
833            tool_calls: vec![],
834            usage,
835            reasoning_content: None,
836        })
837    }
838
839    fn supports_native_tools(&self) -> bool {
840        // Use XML-based tool calling (<tool_call> tags in system prompt)
841        // instead of Ollama's native `tools` API parameter.
842        //
843        // Local models (Gemma4, etc.) often ignore native tool calling
844        // entirely — they chat without invoking tools.  XML instructions
845        // in the system prompt are more explicit and harder to skip.
846        //
847        // The chat() override below respects this flag: when false it
848        // does NOT send tools via the API, avoiding the double-instruction
849        // bug that previously confused models.
850        false
851    }
852
853    async fn chat(
854        &self,
855        request: crate::providers::traits::ChatRequest<'_>,
856        model: &str,
857        temperature: f64,
858    ) -> anyhow::Result<ChatResponse> {
859        // Only send native tools via the API when supports_native_tools()
860        // is true.  When false, tool instructions are already in the system
861        // prompt as XML — sending native tools too causes double instructions
862        // that confuse local models.
863        if self.supports_native_tools() {
864            if let Some(specs) = request.tools {
865                if !specs.is_empty() {
866                    let tools: Vec<serde_json::Value> = specs
867                        .iter()
868                        .map(|s| {
869                            serde_json::json!({
870                                "type": "function",
871                                "function": {
872                                    "name": s.name,
873                                    "description": s.description,
874                                    "parameters": s.parameters
875                                }
876                            })
877                        })
878                        .collect();
879                    return self
880                        .chat_with_tools(request.messages, &tools, model, temperature)
881                        .await;
882                }
883            }
884        }
885
886        // No native tools — plain text chat.  The agent loop will parse
887        // <tool_call> tags from the response text.
888        let text = self
889            .chat_with_history(request.messages, model, temperature)
890            .await?;
891        Ok(ChatResponse {
892            text: Some(text),
893            tool_calls: vec![],
894            usage: None,
895            reasoning_content: None,
896        })
897    }
898}
899
900// ─── Tests ────────────────────────────────────────────────────────────────────
901
902#[cfg(test)]
903mod tests {
904    use super::*;
905
906    #[test]
907    fn default_url() {
908        let p = OllamaProvider::new(None, None);
909        assert_eq!(p.base_url, "http://localhost:11434");
910    }
911
912    #[test]
913    fn custom_url_trailing_slash() {
914        let p = OllamaProvider::new(Some("http://192.168.1.100:11434/"), None);
915        assert_eq!(p.base_url, "http://192.168.1.100:11434");
916    }
917
918    #[test]
919    fn custom_url_no_trailing_slash() {
920        let p = OllamaProvider::new(Some("http://myserver:11434"), None);
921        assert_eq!(p.base_url, "http://myserver:11434");
922    }
923
924    #[test]
925    fn custom_url_strips_api_suffix() {
926        let p = OllamaProvider::new(Some("https://ollama.com/api/"), None);
927        assert_eq!(p.base_url, "https://ollama.com");
928    }
929
930    #[test]
931    fn custom_url_strips_api_chat_suffix() {
932        let p = OllamaProvider::new(Some("http://172.30.30.50:11434/api/chat"), None);
933        assert_eq!(p.base_url, "http://172.30.30.50:11434");
934    }
935
936    #[test]
937    fn empty_url_uses_empty() {
938        let p = OllamaProvider::new(Some(""), None);
939        assert_eq!(p.base_url, "");
940    }
941
942    #[test]
943    fn cloud_suffix_strips_model_name() {
944        let p = OllamaProvider::new(Some("https://ollama.com"), Some("ollama-key"));
945        let (model, should_auth) = p.resolve_request_details("qwen3:cloud").unwrap();
946        assert_eq!(model, "qwen3");
947        assert!(should_auth);
948    }
949
950    #[test]
951    fn cloud_suffix_with_local_endpoint_errors() {
952        let p = OllamaProvider::new(None, Some("ollama-key"));
953        let error = p
954            .resolve_request_details("qwen3:cloud")
955            .expect_err("cloud suffix should fail on local endpoint");
956        assert!(
957            error
958                .to_string()
959                .contains("requested cloud routing, but Ollama endpoint is local")
960        );
961    }
962
963    #[test]
964    fn cloud_suffix_without_api_key_errors() {
965        let p = OllamaProvider::new(Some("https://ollama.com"), None);
966        let error = p
967            .resolve_request_details("qwen3:cloud")
968            .expect_err("cloud suffix should require API key");
969        assert!(
970            error
971                .to_string()
972                .contains("requested cloud routing, but no API key is configured")
973        );
974    }
975
976    #[test]
977    fn remote_endpoint_auth_enabled_when_key_present() {
978        let p = OllamaProvider::new(Some("https://ollama.com"), Some("ollama-key"));
979        let (_model, should_auth) = p.resolve_request_details("qwen3").unwrap();
980        assert!(should_auth);
981    }
982
983    #[test]
984    fn remote_endpoint_with_api_suffix_still_allows_cloud_models() {
985        let p = OllamaProvider::new(Some("https://ollama.com/api"), Some("ollama-key"));
986        let (model, should_auth) = p.resolve_request_details("qwen3:cloud").unwrap();
987        assert_eq!(model, "qwen3");
988        assert!(should_auth);
989    }
990
991    #[test]
992    fn local_endpoint_auth_disabled_even_with_key() {
993        let p = OllamaProvider::new(None, Some("ollama-key"));
994        let (_model, should_auth) = p.resolve_request_details("llama3").unwrap();
995        assert!(!should_auth);
996    }
997
998    #[test]
999    fn request_omits_think_when_reasoning_not_configured() {
1000        let provider = OllamaProvider::new(None, None);
1001        let request = provider.build_chat_request(
1002            vec![Message {
1003                role: "user".to_string(),
1004                content: Some("hello".to_string()),
1005                images: None,
1006                tool_calls: None,
1007                tool_name: None,
1008            }],
1009            "llama3",
1010            0.7,
1011            None,
1012        );
1013
1014        let json = serde_json::to_value(request).unwrap();
1015        assert!(json.get("think").is_none());
1016    }
1017
1018    #[test]
1019    fn request_includes_think_when_reasoning_configured() {
1020        let provider = OllamaProvider::new_with_reasoning(None, None, Some(false));
1021        let request = provider.build_chat_request(
1022            vec![Message {
1023                role: "user".to_string(),
1024                content: Some("hello".to_string()),
1025                images: None,
1026                tool_calls: None,
1027                tool_name: None,
1028            }],
1029            "llama3",
1030            0.7,
1031            None,
1032        );
1033
1034        let json = serde_json::to_value(request).unwrap();
1035        assert_eq!(json.get("think"), Some(&serde_json::json!(false)));
1036    }
1037
1038    #[test]
1039    fn response_deserializes() {
1040        let json = r#"{"message":{"role":"assistant","content":"Hello from Ollama!"}}"#;
1041        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1042        assert_eq!(resp.message.content, "Hello from Ollama!");
1043    }
1044
1045    #[test]
1046    fn response_with_empty_content() {
1047        let json = r#"{"message":{"role":"assistant","content":""}}"#;
1048        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1049        assert!(resp.message.content.is_empty());
1050    }
1051
1052    #[test]
1053    fn normalize_response_text_rejects_whitespace_only_content() {
1054        assert_eq!(
1055            OllamaProvider::normalize_response_text("\n \t".to_string()),
1056            None
1057        );
1058        assert_eq!(
1059            OllamaProvider::normalize_response_text(" hello ".to_string()),
1060            Some("hello".to_string())
1061        );
1062    }
1063
1064    #[test]
1065    fn normalize_response_text_strips_think_tags() {
1066        assert_eq!(
1067            OllamaProvider::normalize_response_text("<think>reasoning</think> hello".to_string()),
1068            Some("hello".to_string())
1069        );
1070    }
1071
1072    #[test]
1073    fn normalize_response_text_rejects_think_only_content() {
1074        assert_eq!(
1075            OllamaProvider::normalize_response_text(
1076                "<think>only thinking here</think>".to_string()
1077            ),
1078            None
1079        );
1080    }
1081
1082    #[test]
1083    fn fallback_text_for_empty_content_without_thinking_is_generic() {
1084        let text = OllamaProvider::fallback_text_for_empty_content("qwen3-coder", None);
1085        assert!(text.contains("couldn't get a complete response from Ollama"));
1086    }
1087
1088    #[test]
1089    fn response_with_missing_content_defaults_to_empty() {
1090        let json = r#"{"message":{"role":"assistant"}}"#;
1091        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1092        assert!(resp.message.content.is_empty());
1093    }
1094
1095    #[test]
1096    fn response_with_thinking_field_extracts_content() {
1097        let json =
1098            r#"{"message":{"role":"assistant","content":"hello","thinking":"internal reasoning"}}"#;
1099        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1100        assert_eq!(resp.message.content, "hello");
1101    }
1102
1103    #[test]
1104    fn response_with_tool_calls_parses_correctly() {
1105        let json = r#"{"message":{"role":"assistant","content":"","tool_calls":[{"id":"call_123","function":{"name":"shell","arguments":{"command":"date"}}}]}}"#;
1106        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1107        assert!(resp.message.content.is_empty());
1108        assert_eq!(resp.message.tool_calls.len(), 1);
1109        assert_eq!(resp.message.tool_calls[0].function.name, "shell");
1110    }
1111
1112    #[test]
1113    fn extract_tool_name_handles_nested_tool_call() {
1114        let provider = OllamaProvider::new(None, None);
1115        let tc = OllamaToolCall {
1116            id: Some("call_123".into()),
1117            function: OllamaFunction {
1118                name: "tool_call".into(),
1119                arguments: serde_json::json!({
1120                    "name": "shell",
1121                    "arguments": {"command": "date"}
1122                }),
1123            },
1124        };
1125        let (name, args) = provider.extract_tool_name_and_args(&tc);
1126        assert_eq!(name, "shell");
1127        assert_eq!(args.get("command").unwrap(), "date");
1128    }
1129
1130    #[test]
1131    fn extract_tool_name_handles_prefixed_name() {
1132        let provider = OllamaProvider::new(None, None);
1133        let tc = OllamaToolCall {
1134            id: Some("call_123".into()),
1135            function: OllamaFunction {
1136                name: "tool.shell".into(),
1137                arguments: serde_json::json!({"command": "ls"}),
1138            },
1139        };
1140        let (name, args) = provider.extract_tool_name_and_args(&tc);
1141        assert_eq!(name, "shell");
1142        assert_eq!(args.get("command").unwrap(), "ls");
1143    }
1144
1145    #[test]
1146    fn extract_tool_name_handles_normal_call() {
1147        let provider = OllamaProvider::new(None, None);
1148        let tc = OllamaToolCall {
1149            id: Some("call_123".into()),
1150            function: OllamaFunction {
1151                name: "file_read".into(),
1152                arguments: serde_json::json!({"path": "/tmp/test"}),
1153            },
1154        };
1155        let (name, args) = provider.extract_tool_name_and_args(&tc);
1156        assert_eq!(name, "file_read");
1157        assert_eq!(args.get("path").unwrap(), "/tmp/test");
1158    }
1159
1160    #[test]
1161    fn format_tool_calls_produces_valid_json() {
1162        let provider = OllamaProvider::new(None, None);
1163        let tool_calls = vec![OllamaToolCall {
1164            id: Some("call_abc".into()),
1165            function: OllamaFunction {
1166                name: "shell".into(),
1167                arguments: serde_json::json!({"command": "date"}),
1168            },
1169        }];
1170
1171        let formatted = provider.format_tool_calls_for_loop(&tool_calls);
1172        let parsed: serde_json::Value = serde_json::from_str(&formatted).unwrap();
1173
1174        assert!(parsed.get("tool_calls").is_some());
1175        let calls = parsed.get("tool_calls").unwrap().as_array().unwrap();
1176        assert_eq!(calls.len(), 1);
1177
1178        let func = calls[0].get("function").unwrap();
1179        assert_eq!(func.get("name").unwrap(), "shell");
1180        // arguments should be a string (JSON-encoded)
1181        assert!(func.get("arguments").unwrap().is_string());
1182    }
1183
1184    #[test]
1185    fn convert_messages_parses_native_assistant_tool_calls() {
1186        let provider = OllamaProvider::new(None, None);
1187        let messages = vec![ChatMessage {
1188            role: "assistant".into(),
1189            content: r#"{"content":null,"tool_calls":[{"id":"call_1","name":"shell","arguments":"{\"command\":\"ls\"}"}]}"#.into(),
1190        }];
1191
1192        let converted = provider.convert_messages(&messages);
1193
1194        assert_eq!(converted.len(), 1);
1195        assert_eq!(converted[0].role, "assistant");
1196        assert!(converted[0].content.is_none());
1197        let calls = converted[0]
1198            .tool_calls
1199            .as_ref()
1200            .expect("tool calls expected");
1201        assert_eq!(calls.len(), 1);
1202        assert_eq!(calls[0].kind, "function");
1203        assert_eq!(calls[0].function.name, "shell");
1204        assert_eq!(calls[0].function.arguments.get("command").unwrap(), "ls");
1205    }
1206
1207    #[test]
1208    fn convert_messages_maps_tool_result_call_id_to_tool_name() {
1209        let provider = OllamaProvider::new(None, None);
1210        let messages = vec![
1211            ChatMessage {
1212                role: "assistant".into(),
1213                content: r#"{"content":null,"tool_calls":[{"id":"call_7","name":"file_read","arguments":"{\"path\":\"README.md\"}"}]}"#.into(),
1214            },
1215            ChatMessage {
1216                role: "tool".into(),
1217                content: r#"{"tool_call_id":"call_7","content":"ok"}"#.into(),
1218            },
1219        ];
1220
1221        let converted = provider.convert_messages(&messages);
1222
1223        assert_eq!(converted.len(), 2);
1224        assert_eq!(converted[1].role, "tool");
1225        assert_eq!(converted[1].tool_name.as_deref(), Some("file_read"));
1226        assert_eq!(converted[1].content.as_deref(), Some("ok"));
1227        assert!(converted[1].tool_calls.is_none());
1228    }
1229
1230    #[test]
1231    fn convert_messages_extracts_images_from_user_marker() {
1232        let provider = OllamaProvider::new(None, None);
1233        let messages = vec![ChatMessage {
1234            role: "user".into(),
1235            content: "Inspect this screenshot [IMAGE:data:image/png;base64,abcd==]".into(),
1236        }];
1237
1238        let converted = provider.convert_messages(&messages);
1239        assert_eq!(converted.len(), 1);
1240        assert_eq!(converted[0].role, "user");
1241        assert_eq!(
1242            converted[0].content.as_deref(),
1243            Some("Inspect this screenshot")
1244        );
1245        let images = converted[0]
1246            .images
1247            .as_ref()
1248            .expect("images should be present");
1249        assert_eq!(images, &vec!["abcd==".to_string()]);
1250    }
1251
1252    #[test]
1253    fn capabilities_disable_native_tools_and_enable_vision() {
1254        let provider = OllamaProvider::new(None, None);
1255        let caps = <OllamaProvider as Provider>::capabilities(&provider);
1256        assert!(
1257            !caps.native_tool_calling,
1258            "Ollama should default to prompt-guided tool calling"
1259        );
1260        assert!(caps.vision);
1261    }
1262
1263    #[test]
1264    fn api_response_parses_eval_counts() {
1265        let json = r#"{
1266            "message": {"content": "Hello", "tool_calls": []},
1267            "prompt_eval_count": 50,
1268            "eval_count": 25
1269        }"#;
1270        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1271        assert_eq!(resp.prompt_eval_count, Some(50));
1272        assert_eq!(resp.eval_count, Some(25));
1273    }
1274
1275    #[test]
1276    fn api_response_parses_without_eval_counts() {
1277        let json = r#"{"message": {"content": "Hello", "tool_calls": []}}"#;
1278        let resp: ApiChatResponse = serde_json::from_str(json).unwrap();
1279        assert!(resp.prompt_eval_count.is_none());
1280        assert!(resp.eval_count.is_none());
1281    }
1282
1283    // ═══════════════════════════════════════════════════════════════════════
1284    // <think> tag stripping tests
1285    // ═══════════════════════════════════════════════════════════════════════
1286
1287    #[test]
1288    fn strip_think_tags_removes_single_block() {
1289        let input = "<think>internal reasoning</think>Hello world";
1290        assert_eq!(OllamaProvider::strip_think_tags(input), "Hello world");
1291    }
1292
1293    #[test]
1294    fn strip_think_tags_removes_multiple_blocks() {
1295        let input = "<think>first</think>A<think>second</think>B";
1296        assert_eq!(OllamaProvider::strip_think_tags(input), "AB");
1297    }
1298
1299    #[test]
1300    fn strip_think_tags_handles_unclosed_block() {
1301        let input = "visible<think>hidden tail";
1302        assert_eq!(OllamaProvider::strip_think_tags(input), "visible");
1303    }
1304
1305    #[test]
1306    fn strip_think_tags_preserves_text_without_tags() {
1307        let input = "plain text response";
1308        assert_eq!(
1309            OllamaProvider::strip_think_tags(input),
1310            "plain text response"
1311        );
1312    }
1313
1314    #[test]
1315    fn strip_think_tags_returns_empty_for_think_only() {
1316        let input = "<think>only thinking</think>";
1317        assert_eq!(OllamaProvider::strip_think_tags(input), "");
1318    }
1319
1320    // ═══════════════════════════════════════════════════════════════════════
1321    // effective_content tests
1322    // ═══════════════════════════════════════════════════════════════════════
1323
1324    #[test]
1325    fn effective_content_strips_think_and_returns_rest() {
1326        let result = OllamaProvider::effective_content(
1327            "<think>reasoning</think>\n<tool_call>{\"name\":\"shell\",\"arguments\":{\"command\":\"ls\"}}</tool_call>",
1328            None,
1329        );
1330        assert!(result.is_some());
1331        let text = result.unwrap();
1332        assert!(text.contains("<tool_call>"));
1333        assert!(!text.contains("<think>"));
1334    }
1335
1336    #[test]
1337    fn effective_content_falls_back_to_thinking_field() {
1338        let result = OllamaProvider::effective_content(
1339            "",
1340            Some(
1341                "<tool_call>{\"name\":\"shell\",\"arguments\":{\"command\":\"date\"}}</tool_call>",
1342            ),
1343        );
1344        assert!(result.is_some());
1345        assert!(result.unwrap().contains("<tool_call>"));
1346    }
1347
1348    #[test]
1349    fn effective_content_returns_none_when_both_empty() {
1350        assert!(OllamaProvider::effective_content("", None).is_none());
1351        assert!(OllamaProvider::effective_content("", Some("")).is_none());
1352        assert!(
1353            OllamaProvider::effective_content(
1354                "<think>only thinking</think>",
1355                Some("<think>also only thinking</think>")
1356            )
1357            .is_none()
1358        );
1359    }
1360
1361    #[test]
1362    fn effective_content_prefers_content_over_thinking() {
1363        let result = OllamaProvider::effective_content("content text", Some("thinking text"));
1364        assert_eq!(result, Some("content text".to_string()));
1365    }
1366
1367    #[test]
1368    fn effective_content_uses_thinking_when_content_is_think_only() {
1369        let result = OllamaProvider::effective_content(
1370            "<think>just reasoning</think>",
1371            Some("actual useful text from thinking field"),
1372        );
1373        assert_eq!(
1374            result,
1375            Some("actual useful text from thinking field".to_string())
1376        );
1377    }
1378
1379    // ═══════════════════════════════════════════════════════════════════════
1380    // Qwen tool-call regression scenario tests
1381    // ═══════════════════════════════════════════════════════════════════════
1382
1383    #[test]
1384    fn qwen_think_with_tool_call_in_content_preserved() {
1385        // Qwen produces <think> tags followed by <tool_call> in content,
1386        // with no structured tool_calls. The <tool_call> tags must survive
1387        // for downstream parse_tool_calls to extract them.
1388        let content = "<think>I should list files</think>\n<tool_call>\n{\"name\":\"shell\",\"arguments\":{\"command\":\"ls\"}}\n</tool_call>";
1389        let result = OllamaProvider::effective_content(content, None);
1390        assert!(result.is_some());
1391        let text = result.unwrap();
1392        assert!(text.contains("<tool_call>"));
1393        assert!(text.contains("shell"));
1394        assert!(!text.contains("<think>"));
1395    }
1396
1397    #[test]
1398    fn qwen_thinking_field_with_tool_call_xml_extracted() {
1399        // When think=true, Ollama separates thinking, but Qwen may put tool
1400        // call XML in the thinking field with empty content.
1401        let content = "";
1402        let thinking = "I need to check the date\n<tool_call>\n{\"name\":\"shell\",\"arguments\":{\"command\":\"date\"}}\n</tool_call>";
1403        let result = OllamaProvider::effective_content(content, Some(thinking));
1404        assert!(result.is_some());
1405        let text = result.unwrap();
1406        assert!(text.contains("<tool_call>"));
1407        assert!(text.contains("date"));
1408    }
1409}