Skip to main content

vtcode_core/llm/providers/
ollama.rs

1use crate::config::TimeoutsConfig;
2use crate::config::constants::{env_vars, models, urls};
3use crate::config::core::{AnthropicConfig, ModelConfig, PromptCachingConfig};
4use crate::llm::client::LLMClient;
5use crate::llm::provider::{
6    ContentPart, FinishReason, LLMError, LLMProvider, LLMRequest, LLMResponse, LLMStream,
7    LLMStreamEvent, Message, MessageContent, MessageRole, ToolCall, ToolChoice, ToolDefinition,
8    Usage,
9};
10use crate::utils::http_client;
11use anyhow::Result;
12use async_stream::try_stream;
13use async_trait::async_trait;
14use futures::StreamExt;
15use hashbrown::HashMap;
16use reqwest::Client as HttpClient;
17use serde::{Deserialize, Serialize};
18use serde_json::{Map, Value};
19
20pub mod client;
21pub mod parser;
22pub mod pull;
23pub mod url;
24
25pub use client::OllamaClient;
26pub use parser::pull_events_from_value;
27pub use pull::{
28    CliPullProgressReporter, OllamaPullEvent, OllamaPullProgressReporter, TuiPullProgressReporter,
29};
30pub use url::{base_url_to_host_root, is_openai_compatible_base_url};
31
32use semver::Version;
33
34use super::common::{
35    assistant_interleaved_history_text, collect_history_system_directives,
36    extract_reasoning_text_from_detail_values, extract_reasoning_text_from_serialized_details,
37    is_minimax_m2_model, merge_system_prompt_with_history_directives, override_base_url,
38    parse_client_prompt_common, resolve_model, serialize_reasoning_detail_values,
39};
40use super::error_handling::{format_network_error, format_parse_error};
41
42// ============================================================================
43// Wire API Detection (adapted from OpenAI Codex's codex-ollama/src/lib.rs)
44// ============================================================================
45
46/// Wire protocol that the Ollama server supports.
47/// Based on OpenAI Codex's WireApi enum.
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub enum OllamaWireApi {
50    /// The Responses API (OpenAI-compatible at `/v1/responses`).
51    Responses,
52    /// Regular Chat Completions compatible with `/v1/chat/completions`.
53    Chat,
54}
55
56/// Result of detecting which wire API the Ollama server supports.
57pub struct WireApiDetection {
58    pub wire_api: OllamaWireApi,
59    pub version: Option<Version>,
60}
61
62/// Minimum Ollama version that supports the Responses API.
63/// Ollama versions >= 0.13.3 support the Responses API.
64fn min_responses_version() -> Version {
65    Version::new(0, 13, 3)
66}
67
68/// Determine which wire API to use based on the Ollama server version.
69fn wire_api_for_version(version: &Version) -> OllamaWireApi {
70    // Version 0.0.0 is used for development builds, which typically support latest features
71    if *version == Version::new(0, 0, 0) || *version >= min_responses_version() {
72        OllamaWireApi::Responses
73    } else {
74        OllamaWireApi::Chat
75    }
76}
77
78/// Detect which wire API the running Ollama server supports based on its version.
79/// Returns `Ok(None)` when the version endpoint is missing or unparsable; callers
80/// should keep the configured default in that case.
81///
82/// Adapted from OpenAI Codex's codex-ollama/src/lib.rs
83pub async fn detect_wire_api(
84    base_url: Option<String>,
85) -> std::io::Result<Option<WireApiDetection>> {
86    let resolved_base_url = override_base_url(
87        urls::OLLAMA_API_BASE,
88        base_url,
89        Some(env_vars::OLLAMA_BASE_URL),
90    );
91
92    let client = match OllamaClient::try_from_base_url(&resolved_base_url).await {
93        Ok(c) => c,
94        Err(e) => {
95            tracing::debug!("Failed to connect to Ollama server for version detection: {e}");
96            return Ok(None);
97        }
98    };
99
100    let Some(version) = client.fetch_version().await? else {
101        return Ok(None);
102    };
103
104    let wire_api = wire_api_for_version(&version);
105
106    Ok(Some(WireApiDetection {
107        wire_api,
108        version: Some(version),
109    }))
110}
111
112/// Prepare the local OSS environment when using Ollama.
113///
114/// - Ensures a local Ollama server is reachable.
115/// - Checks if the model exists locally and pulls it if missing.
116///
117/// Adapted from OpenAI Codex's codex-ollama/src/lib.rs
118pub async fn ensure_oss_ready(
119    model: Option<&str>,
120    base_url: Option<String>,
121) -> std::io::Result<()> {
122    let target_model = model.unwrap_or(models::ollama::DEFAULT_MODEL);
123
124    let resolved_base_url = override_base_url(
125        urls::OLLAMA_API_BASE,
126        base_url,
127        Some(env_vars::OLLAMA_BASE_URL),
128    );
129
130    // Verify local Ollama is reachable
131    let ollama_client = OllamaClient::try_from_base_url(&resolved_base_url).await?;
132
133    // If the model is not present locally, pull it
134    match ollama_client.fetch_models().await {
135        Ok(existing_models) => {
136            if !existing_models.iter().any(|m| m == target_model) {
137                tracing::info!("Model '{target_model}' not found locally, pulling...");
138                let mut reporter = CliPullProgressReporter::new();
139                ollama_client
140                    .pull_with_reporter(target_model, &mut reporter)
141                    .await?;
142            }
143        }
144        Err(e) => {
145            tracing::warn!("Failed to list Ollama models: {e}");
146            // Continue anyway; model might exist but listing failed
147        }
148    }
149
150    Ok(())
151}
152
153#[derive(Debug, Deserialize, Serialize)]
154struct OllamaTagsResponse {
155    models: Vec<OllamaTag>,
156}
157
158#[derive(Debug, Deserialize, Serialize)]
159struct OllamaTag {
160    name: Option<String>,
161    model: Option<String>,
162    modified_at: Option<String>,
163    size: Option<u64>,
164    digest: Option<String>,
165    details: Option<OllamaModelDetails>,
166}
167
168#[derive(Debug, Deserialize, Serialize)]
169struct OllamaModelDetails {
170    format: Option<String>,
171    family: Option<String>,
172    families: Option<Vec<String>>,
173    parameter_size: Option<String>,
174    quantization_level: Option<String>,
175}
176
177pub(super) fn ollama_model_name_from_fields<'a>(
178    name: Option<&'a str>,
179    model: Option<&'a str>,
180) -> Option<&'a str> {
181    name.or(model)
182        .map(str::trim)
183        .filter(|value| !value.is_empty())
184}
185
186pub(super) const OLLAMA_CONNECTION_ERROR: &str = "No running Ollama server detected. Start it with: `ollama serve` (after installing)\n\
187     Install instructions: https://github.com/ollama/ollama?tab=readme-ov-file";
188
189/// Fetches available local Ollama models from the Ollama API endpoint
190pub async fn fetch_ollama_models(base_url: Option<String>) -> Result<Vec<String>, anyhow::Error> {
191    use crate::config::constants::{env_vars, urls};
192
193    let resolved_base_url = override_base_url(
194        urls::OLLAMA_API_BASE,
195        base_url,
196        Some(env_vars::OLLAMA_BASE_URL),
197    );
198
199    // Construct the tags endpoint URL
200    let tags_url = format!("{}/api/tags", resolved_base_url);
201
202    // Create HTTP client with connection timeout
203    let client = http_client::create_client_with_timeout(std::time::Duration::from_secs(5));
204
205    // Make GET request to fetch models
206    let response = client
207        .get(&tags_url)
208        .header("Content-Type", "application/json")
209        .send()
210        .await
211        .map_err(|e| {
212            tracing::warn!("Failed to connect to Ollama server: {e:?}");
213            anyhow::anyhow!(OLLAMA_CONNECTION_ERROR)
214        })?;
215
216    if !response.status().is_success() {
217        return Err(anyhow::anyhow!(
218            "Failed to fetch Ollama models: HTTP {}. {}",
219            response.status(),
220            if response.status() == reqwest::StatusCode::NOT_FOUND {
221                "Ensure Ollama server is running."
222            } else {
223                ""
224            }
225        ));
226    }
227
228    // Parse the response
229    let tags_response: OllamaTagsResponse = response
230        .json()
231        .await
232        .map_err(|e| anyhow::anyhow!("Failed to parse Ollama models response: {}", e))?;
233
234    // Extract model names
235    let model_names: Vec<String> = tags_response
236        .models
237        .into_iter()
238        .filter_map(|model| {
239            ollama_model_name_from_fields(model.name.as_deref(), model.model.as_deref())
240                .map(str::to_string)
241        })
242        .collect();
243
244    Ok(model_names)
245}
246
247pub struct OllamaProvider {
248    http_client: HttpClient,
249    base_url: String,
250    model: String,
251    api_key: Option<String>,
252    model_behavior: Option<ModelConfig>,
253}
254
255impl OllamaProvider {
256    fn merged_system_prompt(request: &LLMRequest) -> Option<String> {
257        const HISTORY_DIRECTIVES_SECTION_HEADER: &str = "[History Directives]";
258        let directives = collect_history_system_directives(request);
259        merge_system_prompt_with_history_directives(
260            request.system_prompt.as_ref().map(|prompt| prompt.as_str()),
261            &directives,
262            HISTORY_DIRECTIVES_SECTION_HEADER,
263        )
264    }
265
266    pub fn new(api_key: String) -> Self {
267        Self::with_model(api_key, models::ollama::DEFAULT_MODEL.to_string())
268    }
269
270    pub fn with_model(api_key: String, model: String) -> Self {
271        Self::with_model_internal(model, None, Some(api_key), None)
272    }
273
274    pub fn new_with_client(
275        api_key: String,
276        model: String,
277        http_client: reqwest::Client,
278        base_url: String,
279        _timeouts: TimeoutsConfig,
280    ) -> Self {
281        Self {
282            http_client,
283            base_url,
284            model,
285            api_key: Some(api_key),
286            model_behavior: None,
287        }
288    }
289
290    pub fn from_config(
291        api_key: Option<String>,
292        model: Option<String>,
293        base_url: Option<String>,
294        _prompt_cache: Option<PromptCachingConfig>,
295        _timeouts: Option<TimeoutsConfig>,
296        _anthropic: Option<AnthropicConfig>,
297        model_behavior: Option<ModelConfig>,
298    ) -> Self {
299        let resolved_model = resolve_model(model, models::ollama::DEFAULT_MODEL);
300        Self::with_model_internal(resolved_model, base_url, api_key, model_behavior)
301    }
302
303    fn normalize_api_key(api_key: Option<String>) -> Option<String> {
304        api_key.and_then(|value| {
305            let trimmed = value.trim();
306            if trimmed.is_empty() {
307                None
308            } else {
309                Some(trimmed.to_string())
310            }
311        })
312    }
313
314    fn is_local_base_url(base_url: &str) -> bool {
315        let lowered = base_url.trim().to_ascii_lowercase();
316        const LOCAL_PREFIXES: &[&str] = &[
317            "http://localhost",
318            "https://localhost",
319            "http://127.",
320            "https://127.",
321            "http://0.0.0.0",
322            "https://0.0.0.0",
323            "http://[::1]",
324            "https://[::1]",
325        ];
326
327        LOCAL_PREFIXES
328            .iter()
329            .any(|prefix| lowered.starts_with(prefix))
330    }
331
332    fn with_model_internal(
333        model: String,
334        base_url: Option<String>,
335        api_key: Option<String>,
336        model_behavior: Option<ModelConfig>,
337    ) -> Self {
338        let normalized_api_key = Self::normalize_api_key(api_key);
339        let is_cloud_model = model.contains(":cloud") || model.contains("-cloud");
340
341        let default_base = if is_cloud_model {
342            urls::OLLAMA_CLOUD_API_BASE
343        } else {
344            urls::OLLAMA_API_BASE
345        };
346
347        let resolved_base =
348            override_base_url(default_base, base_url, Some(env_vars::OLLAMA_BASE_URL));
349        let target_is_local = Self::is_local_base_url(&resolved_base);
350
351        // Never send API keys to local endpoints; keep keys for cloud/remote targets
352        let effective_api_key = if target_is_local {
353            None
354        } else {
355            normalized_api_key
356        };
357
358        Self {
359            http_client: http_client::create_default_client(),
360            base_url: resolved_base,
361            model,
362            api_key: effective_api_key,
363            model_behavior,
364        }
365    }
366
367    fn chat_url(&self) -> String {
368        format!("{}/api/chat", self.base_url.trim_end_matches('/'))
369    }
370
371    fn authorized_post(&self, url: String) -> reqwest::RequestBuilder {
372        let builder = self.http_client.post(url);
373        if let Some(api_key) = &self.api_key {
374            builder.bearer_auth(api_key)
375        } else {
376            builder
377        }
378    }
379
380    fn parse_client_prompt(&self, prompt: &str) -> LLMRequest {
381        parse_client_prompt_common(prompt, &self.model, |value| self.parse_chat_request(value))
382    }
383
384    fn parse_chat_request(&self, value: &Value) -> Option<LLMRequest> {
385        let messages_value = value.get("messages")?.as_array()?;
386        let mut system_prompt = value
387            .get("system")
388            .and_then(|entry| entry.as_str())
389            .filter(|text| !text.trim().is_empty())
390            .map(|text| text.to_string());
391        let mut messages = Vec::new();
392
393        for entry in messages_value {
394            let role = entry
395                .get("role")
396                .and_then(|r| r.as_str())
397                .unwrap_or(crate::config::constants::message_roles::USER);
398            let content = entry
399                .get("content")
400                .map(|c| match c {
401                    Value::String(text) => text.to_string(),
402                    other => other.to_string(),
403                })
404                .unwrap_or_default();
405
406            if content.trim().is_empty() {
407                continue;
408            }
409
410            match role {
411                "system" => {
412                    if system_prompt.is_none() {
413                        system_prompt = Some(content);
414                    }
415                }
416                "assistant" => messages.push(Message::assistant(content)),
417                "user" => messages.push(Message::user(content)),
418                _ => {}
419            }
420        }
421
422        if messages.is_empty() {
423            return None;
424        }
425
426        let tools = value
427            .get("tools")
428            .and_then(|entry| serde_json::from_value::<Vec<ToolDefinition>>(entry.clone()).ok());
429
430        Some(LLMRequest {
431            messages,
432            system_prompt: system_prompt.map(std::sync::Arc::new),
433            tools: tools.map(std::sync::Arc::new),
434            model: value
435                .get("model")
436                .and_then(|m| m.as_str())
437                .filter(|m| !m.trim().is_empty())
438                .map(|m| m.to_string())
439                .unwrap_or_else(|| self.model.clone()),
440            max_tokens: value
441                .get("max_tokens")
442                .and_then(|entry| entry.as_u64())
443                .map(|value| value as u32),
444            temperature: value
445                .get("temperature")
446                .and_then(|entry| entry.as_f64())
447                .map(|value| value as f32),
448            stream: value
449                .get("stream")
450                .and_then(|entry| entry.as_bool())
451                .unwrap_or(false),
452            ..Default::default()
453        })
454    }
455
456    fn build_payload(
457        &self,
458        request: &LLMRequest,
459        stream: bool,
460    ) -> Result<OllamaChatRequest, LLMError> {
461        let mut messages = Vec::new();
462        let mut tool_names: HashMap<String, String> = HashMap::new();
463        let minimax_tool_followup_compat = Self::minimax_tool_followup_compat_mode(request);
464
465        if let Some(system) = Self::merged_system_prompt(request) {
466            messages.push(OllamaChatMessage {
467                role: "system".to_string(),
468                content: Some(system),
469                thinking: None,
470                tool_calls: None,
471                tool_call_id: None,
472                tool_name: None,
473                images: None,
474            });
475        }
476
477        for message in &request.messages {
478            let interleaved_content = assistant_interleaved_history_text(message, &request.model);
479            let used_interleaved_content = interleaved_content.is_some();
480            let (content_text, images) = if let Some(interleaved_content) = interleaved_content {
481                (interleaved_content, None)
482            } else {
483                Self::extract_content_and_images(&message.content)
484            };
485            match message.role {
486                MessageRole::System => continue,
487                MessageRole::Tool => {
488                    let tool_name = message
489                        .tool_call_id
490                        .as_ref()
491                        .and_then(|id| tool_names.get(id).cloned());
492                    let tool_name = tool_name.or_else(|| message.origin_tool.clone());
493                    let tool_call_id = if minimax_tool_followup_compat && tool_name.is_some() {
494                        None
495                    } else {
496                        message.tool_call_id.clone()
497                    };
498                    messages.push(OllamaChatMessage {
499                        role: "tool".to_string(),
500                        content: Some(content_text),
501                        thinking: None,
502                        tool_calls: None,
503                        tool_call_id,
504                        tool_name,
505                        images: None,
506                    });
507                }
508                _ => {
509                    let thinking = if used_interleaved_content {
510                        None
511                    } else {
512                        Self::assistant_thinking_history_text(message)
513                    };
514                    let mut payload_message = OllamaChatMessage {
515                        role: message.role.as_generic_str().to_string(),
516                        content: Some(content_text),
517                        thinking,
518                        tool_calls: None,
519                        tool_call_id: None,
520                        tool_name: None,
521                        images,
522                    };
523
524                    if let Some(tool_calls) = message.get_tool_calls() {
525                        let mut converted = Vec::new();
526                        for (index, tool_call) in tool_calls.iter().enumerate() {
527                            if let Some(ref func) = tool_call.function {
528                                if !tool_call.id.is_empty() {
529                                    tool_names
530                                        .entry(tool_call.id.clone())
531                                        .or_insert_with(|| func.name.clone());
532                                }
533
534                                let arguments = tool_call.execution_arguments().map_err(|err| {
535                                    LLMError::InvalidRequest {
536                                        message: format!(
537                                            "Failed to parse tool arguments for Ollama: {err}"
538                                        ),
539                                        metadata: None,
540                                    }
541                                })?;
542                                converted.push(OllamaToolCall {
543                                    call_type: tool_call.call_type.clone(),
544                                    function: OllamaToolFunctionCall {
545                                        name: func.name.clone(),
546                                        arguments: Some(arguments),
547                                        index: Some(index as u32),
548                                    },
549                                });
550                            }
551                        }
552
553                        if !converted.is_empty() {
554                            payload_message.tool_calls = Some(converted);
555                            if payload_message.content.is_none() {
556                                payload_message.content = Some(String::new());
557                            }
558                        }
559                    }
560
561                    messages.push(payload_message);
562                }
563            }
564        }
565
566        let options = if request.temperature.is_some() || request.max_tokens.is_some() {
567            Some(OllamaChatOptions {
568                temperature: request.temperature,
569                num_predict: request.max_tokens,
570            })
571        } else {
572            None
573        };
574
575        let tools = match request.tool_choice {
576            Some(ToolChoice::None) => None,
577            _ => request.tools.as_ref().map(|tools| {
578                tools
579                    .iter()
580                    .filter_map(|tool| {
581                        // Normalize all tools to function type for Ollama compatibility
582                        tool.function.as_ref().map(|func| {
583                            ToolDefinition::function(
584                                func.name.clone(),
585                                func.description.clone(),
586                                func.parameters.clone(),
587                            )
588                        })
589                    })
590                    .collect()
591            }),
592        };
593
594        Ok(OllamaChatRequest {
595            model: request.model.clone(),
596            messages,
597            stream,
598            format: request.output_format.clone(),
599            options,
600            tools,
601            think: Self::think_value(request),
602        })
603    }
604
605    fn assistant_thinking_history_text(message: &Message) -> Option<String> {
606        if message.role != MessageRole::Assistant {
607            return None;
608        }
609
610        message
611            .reasoning
612            .as_deref()
613            .map(str::trim)
614            .filter(|value| !value.is_empty())
615            .map(str::to_owned)
616            .or_else(|| {
617                message
618                    .reasoning_details
619                    .as_deref()
620                    .and_then(extract_reasoning_text_from_detail_values)
621            })
622    }
623
624    fn extract_content_and_images(content: &MessageContent) -> (String, Option<Vec<String>>) {
625        let mut images = Vec::new();
626        if let MessageContent::Parts(parts) = content {
627            for part in parts {
628                if let ContentPart::Image { data, .. } = part {
629                    images.push(data.clone());
630                }
631            }
632        }
633
634        let text = content.as_text().into_owned();
635        let images = if images.is_empty() {
636            None
637        } else {
638            Some(images)
639        };
640        (text, images)
641    }
642
643    fn think_value(request: &LLMRequest) -> Option<Value> {
644        let model_id = request.model.as_str();
645        if Self::minimax_tool_followup_compat_mode(request) {
646            return None;
647        }
648        if !models::ollama::REASONING_MODELS.contains(&model_id) {
649            return None;
650        }
651
652        if models::ollama::REASONING_LEVEL_MODELS.contains(&model_id) {
653            request
654                .reasoning_effort
655                .map(|effort| Value::String(effort.to_string()))
656        } else {
657            Some(Value::Bool(true))
658        }
659    }
660
661    fn minimax_tool_followup_compat_mode(request: &LLMRequest) -> bool {
662        is_minimax_m2_model(&request.model)
663            && request
664                .messages
665                .iter()
666                .any(|message| message.role == MessageRole::Tool || message.has_tool_calls())
667    }
668
669    fn convert_tool_calls(
670        tool_calls: Option<Vec<OllamaResponseToolCall>>,
671    ) -> Result<Option<Vec<ToolCall>>, LLMError> {
672        let Some(tool_calls) = tool_calls else {
673            return Ok(None);
674        };
675
676        if tool_calls.is_empty() {
677            return Ok(None);
678        }
679
680        let mut converted = Vec::new();
681        for (index, call) in tool_calls.into_iter().enumerate() {
682            let function = call.function.ok_or_else(|| LLMError::Provider {
683                message: "Ollama response missing function details for tool call".to_string(),
684                metadata: None,
685            })?;
686
687            let name = function.name.ok_or_else(|| LLMError::Provider {
688                message: "Ollama response missing tool function name".to_string(),
689                metadata: None,
690            })?;
691
692            let arguments_value = function
693                .arguments
694                .unwrap_or_else(|| Value::Object(Map::new()));
695            let arguments = match arguments_value {
696                Value::String(raw) => raw,
697                other => serde_json::to_string(&other).map_err(|err| LLMError::Provider {
698                    message: format!("Failed to serialize Ollama tool arguments: {err}"),
699                    metadata: None,
700                })?,
701            };
702
703            let id = function
704                .index
705                .map(|value| format!("tool_call_{value}"))
706                .unwrap_or_else(|| format!("tool_call_{index}"));
707
708            converted.push(ToolCall::function(id, name, arguments));
709        }
710
711        Ok(Some(converted))
712    }
713
714    fn usage_from_counts(
715        prompt_tokens: Option<u32>,
716        completion_tokens: Option<u32>,
717    ) -> Option<Usage> {
718        if prompt_tokens.is_none() && completion_tokens.is_none() {
719            return None;
720        }
721
722        let prompt = prompt_tokens.unwrap_or_default();
723        let completion = completion_tokens.unwrap_or_default();
724        Some(Usage {
725            prompt_tokens: prompt,
726            completion_tokens: completion,
727            total_tokens: prompt + completion,
728            cached_prompt_tokens: None,
729            cache_creation_tokens: None,
730            cache_read_tokens: None,
731        })
732    }
733
734    fn finish_reason_from(reason: Option<&str>) -> FinishReason {
735        match reason {
736            Some("stop") | None => FinishReason::Stop,
737            Some("length") => FinishReason::Length,
738            Some("tool_calls") => FinishReason::ToolCalls,
739            Some(other) => FinishReason::Error(other.to_string()),
740        }
741    }
742
743    fn build_response(
744        content: Option<String>,
745        tool_calls: Option<Vec<ToolCall>>,
746        reasoning: Option<String>,
747        reasoning_details: Option<Vec<String>>,
748        model: String,
749        finish_reason: Option<&str>,
750        prompt_tokens: Option<u32>,
751        completion_tokens: Option<u32>,
752    ) -> LLMResponse {
753        let mut finish = Self::finish_reason_from(finish_reason);
754        if tool_calls.as_ref().is_some_and(|calls| !calls.is_empty()) {
755            finish = FinishReason::ToolCalls;
756        }
757
758        LLMResponse {
759            content,
760            tool_calls,
761            model,
762            usage: Self::usage_from_counts(prompt_tokens, completion_tokens),
763            finish_reason: finish,
764            reasoning,
765            reasoning_details,
766            tool_references: Vec::new(),
767            request_id: None,
768            organization_id: None,
769            compaction: None,
770        }
771    }
772
773    fn response_from_chat_payload(
774        model: String,
775        parsed: OllamaChatResponse,
776    ) -> Result<LLMResponse, LLMError> {
777        if let Some(error) = parsed.error {
778            return Err(LLMError::Provider {
779                message: error,
780                metadata: None,
781            });
782        }
783
784        let (content, reasoning, tool_calls, native_reasoning_details) =
785            if let Some(message) = parsed.message {
786                let content = message
787                    .content
788                    .and_then(|value| (!value.is_empty()).then_some(value));
789                let reasoning = message
790                    .thinking
791                    .and_then(|value| (!value.is_empty()).then_some(value));
792                let tool_calls = Self::convert_tool_calls(message.tool_calls)?;
793                let native_reasoning_details = message.reasoning_details.filter(|d| !d.is_empty());
794                (content, reasoning, tool_calls, native_reasoning_details)
795            } else {
796                (None, None, None, None)
797            };
798
799        let reasoning = reasoning.or_else(|| {
800            native_reasoning_details
801                .as_deref()
802                .and_then(extract_reasoning_text_from_detail_values)
803        });
804        let mut reasoning_details = native_reasoning_details
805            .as_deref()
806            .and_then(serialize_reasoning_detail_values);
807
808        // Fallback: Extract reasoning from content if not provided natively
809        // This handles MiniMax-M2.5 cloud models that use <think></think> tags
810        let (final_reasoning, final_content) = if reasoning.is_none() {
811            if let Some(ref content_str) = content {
812                let (reasoning_parts, cleaned_content) =
813                    crate::llm::utils::extract_reasoning_content(content_str);
814                if reasoning_parts.is_empty() {
815                    (None, content)
816                } else {
817                    super::common::preserve_interleaved_content_in_reasoning_details(
818                        &mut reasoning_details,
819                        content_str,
820                    );
821                    (
822                        Some(reasoning_parts.join("\n\n")),
823                        cleaned_content.or(content),
824                    )
825                }
826            } else {
827                (None, content)
828            }
829        } else {
830            (reasoning, content)
831        };
832
833        Ok(Self::build_response(
834            final_content,
835            tool_calls,
836            final_reasoning,
837            reasoning_details,
838            model,
839            parsed.done_reason.as_deref(),
840            parsed.prompt_eval_count,
841            parsed.eval_count,
842        ))
843    }
844
845    fn authorized_post_with_key(
846        http_client: &HttpClient,
847        url: &str,
848        api_key: Option<&str>,
849    ) -> reqwest::RequestBuilder {
850        let builder = http_client.post(url.to_string());
851        if let Some(value) = api_key {
852            builder.bearer_auth(value)
853        } else {
854            builder
855        }
856    }
857
858    async fn request_non_stream_response(
859        http_client: &HttpClient,
860        url: &str,
861        api_key: Option<&str>,
862        payload: &OllamaChatRequest,
863        model: String,
864    ) -> Result<LLMResponse, LLMError> {
865        let response = Self::authorized_post_with_key(http_client, url, api_key)
866            .json(payload)
867            .send()
868            .await
869            .map_err(|e| format_network_error("Ollama", &e))?;
870
871        if !response.status().is_success() {
872            let status = response.status();
873            let body = response.text().await.unwrap_or_default();
874            let error_message = Self::extract_error(&body)
875                .unwrap_or_else(|| format!("Ollama request failed ({status}): {body}"));
876            return Err(LLMError::Provider {
877                message: error_message,
878                metadata: None,
879            });
880        }
881
882        let parsed = response
883            .json::<OllamaChatResponse>()
884            .await
885            .map_err(|e| format_parse_error("Ollama", &e))?;
886        Self::response_from_chat_payload(model, parsed)
887    }
888
889    fn extract_error(body: &str) -> Option<String> {
890        serde_json::from_str::<OllamaErrorResponse>(body)
891            .ok()
892            .and_then(|resp| resp.error)
893    }
894}
895
896#[derive(Debug, Serialize)]
897struct OllamaChatRequest {
898    model: String,
899    messages: Vec<OllamaChatMessage>,
900    stream: bool,
901    #[serde(skip_serializing_if = "Option::is_none")]
902    format: Option<Value>,
903    #[serde(skip_serializing_if = "Option::is_none")]
904    options: Option<OllamaChatOptions>,
905    #[serde(skip_serializing_if = "Option::is_none")]
906    tools: Option<Vec<ToolDefinition>>,
907    #[serde(skip_serializing_if = "Option::is_none")]
908    think: Option<Value>,
909}
910
911#[derive(Debug, Serialize)]
912struct OllamaChatMessage {
913    role: String,
914    #[serde(skip_serializing_if = "Option::is_none")]
915    content: Option<String>,
916    #[serde(skip_serializing_if = "Option::is_none")]
917    thinking: Option<String>,
918    #[serde(skip_serializing_if = "Option::is_none")]
919    images: Option<Vec<String>>,
920    #[serde(skip_serializing_if = "Option::is_none")]
921    tool_calls: Option<Vec<OllamaToolCall>>,
922    #[serde(skip_serializing_if = "Option::is_none")]
923    tool_call_id: Option<String>,
924    #[serde(skip_serializing_if = "Option::is_none")]
925    tool_name: Option<String>,
926}
927
928#[derive(Debug, Serialize)]
929struct OllamaChatOptions {
930    #[serde(skip_serializing_if = "Option::is_none")]
931    temperature: Option<f32>,
932    #[serde(skip_serializing_if = "Option::is_none")]
933    num_predict: Option<u32>,
934}
935
936#[derive(Debug, Serialize)]
937struct OllamaToolCall {
938    #[serde(rename = "type")]
939    call_type: String,
940    function: OllamaToolFunctionCall,
941}
942
943#[derive(Debug, Serialize)]
944struct OllamaToolFunctionCall {
945    name: String,
946    #[serde(skip_serializing_if = "Option::is_none")]
947    arguments: Option<Value>,
948    #[serde(skip_serializing_if = "Option::is_none")]
949    index: Option<u32>,
950}
951
952#[derive(Debug, Deserialize)]
953struct OllamaChatResponse {
954    message: Option<OllamaResponseMessage>,
955    #[serde(default)]
956    done: bool,
957    #[serde(default)]
958    done_reason: Option<String>,
959    #[serde(default)]
960    prompt_eval_count: Option<u32>,
961    #[serde(default)]
962    eval_count: Option<u32>,
963    #[serde(default)]
964    error: Option<String>,
965}
966
967#[derive(Debug, Deserialize)]
968struct OllamaResponseMessage {
969    #[serde(default)]
970    #[expect(dead_code)]
971    role: Option<String>,
972    #[serde(default)]
973    content: Option<String>,
974    #[serde(default)]
975    thinking: Option<String>,
976    #[serde(default)]
977    reasoning_details: Option<Vec<Value>>,
978    #[serde(default)]
979    tool_calls: Option<Vec<OllamaResponseToolCall>>,
980}
981
982#[derive(Debug, Deserialize, Serialize, Clone)]
983struct OllamaResponseToolCall {
984    #[serde(default)]
985    #[serde(rename = "type")]
986    call_type: Option<String>,
987    #[serde(default)]
988    function: Option<OllamaResponseFunctionCall>,
989}
990
991#[derive(Debug, Deserialize, Serialize, Clone)]
992struct OllamaResponseFunctionCall {
993    #[serde(default)]
994    name: Option<String>,
995    #[serde(default)]
996    arguments: Option<Value>,
997    #[serde(default)]
998    index: Option<u32>,
999}
1000
1001#[derive(Debug, Deserialize)]
1002struct OllamaErrorResponse {
1003    error: Option<String>,
1004}
1005
1006fn parse_stream_chunk(line: &str) -> Result<OllamaChatResponse, LLMError> {
1007    serde_json::from_str::<OllamaChatResponse>(line).map_err(|err| LLMError::Provider {
1008        message: format!("Failed to parse Ollama stream chunk: {err}"),
1009        metadata: None,
1010    })
1011}
1012
1013#[async_trait]
1014impl LLMProvider for OllamaProvider {
1015    fn name(&self) -> &str {
1016        "ollama"
1017    }
1018
1019    fn supports_streaming(&self) -> bool {
1020        true
1021    }
1022
1023    fn supports_tools(&self, _model: &str) -> bool {
1024        true
1025    }
1026
1027    fn supports_reasoning(&self, model: &str) -> bool {
1028        // Codex-inspired robustness: Setting model_supports_reasoning to false
1029        // does NOT disable it for known reasoning models.
1030        models::ollama::REASONING_MODELS.contains(&model)
1031            || self
1032                .model_behavior
1033                .as_ref()
1034                .and_then(|b| b.model_supports_reasoning)
1035                .unwrap_or(false)
1036    }
1037
1038    fn supports_reasoning_effort(&self, model: &str) -> bool {
1039        // Same robustness logic for reasoning effort
1040        models::ollama::REASONING_LEVEL_MODELS.contains(&model)
1041            || self
1042                .model_behavior
1043                .as_ref()
1044                .and_then(|b| b.model_supports_reasoning_effort)
1045                .unwrap_or(false)
1046    }
1047
1048    async fn generate(&self, mut request: LLMRequest) -> Result<LLMResponse, LLMError> {
1049        self.validate_request(&request)?;
1050        if request.model.is_empty() {
1051            request.model = self.model.clone();
1052        }
1053        let model = request.model.clone();
1054        let payload = self.build_payload(&request, false)?;
1055        let url = self.chat_url();
1056        Self::request_non_stream_response(
1057            &self.http_client,
1058            &url,
1059            self.api_key.as_deref(),
1060            &payload,
1061            model,
1062        )
1063        .await
1064    }
1065
1066    async fn stream(&self, mut request: LLMRequest) -> Result<LLMStream, LLMError> {
1067        self.validate_request(&request)?;
1068        if request.model.is_empty() {
1069            request.model = self.model.clone();
1070        }
1071        let model = request.model.clone();
1072        let payload = self.build_payload(&request, true)?;
1073        let fallback_payload = self.build_payload(&request, false)?;
1074        let url = self.chat_url();
1075
1076        let response = self
1077            .authorized_post(url.clone())
1078            .header(reqwest::header::ACCEPT_ENCODING, "identity")
1079            .json(&payload)
1080            .send()
1081            .await
1082            .map_err(|e| format_network_error("Ollama", &e))?;
1083
1084        if !response.status().is_success() {
1085            let status = response.status();
1086            let body = response.text().await.unwrap_or_default();
1087            let error_message = Self::extract_error(&body)
1088                .unwrap_or_else(|| format!("Ollama streaming request failed ({status}): {body}"));
1089            return Err(LLMError::Provider {
1090                message: error_message,
1091                metadata: None,
1092            });
1093        }
1094
1095        let byte_stream = response.bytes_stream();
1096        let mut buffer: Vec<u8> = Vec::new();
1097        let mut aggregator = crate::llm::providers::shared::StreamAggregator::new(model.clone());
1098        let fallback_http_client = self.http_client.clone();
1099        let fallback_api_key = self.api_key.clone();
1100        let fallback_model = model.clone();
1101        let fallback_url = url.clone();
1102        let any_interleaved = request
1103            .messages
1104            .iter()
1105            .any(|msg| assistant_interleaved_history_text(msg, &request.model).is_some());
1106        let stream = try_stream! {
1107            let mut prompt_tokens: Option<u32> = None;
1108            let mut completion_tokens: Option<u32> = None;
1109            let mut finish_reason: Option<String> = None;
1110            let mut completed = false;
1111            let mut saw_stream_chunk = false;
1112
1113            futures::pin_mut!(byte_stream);
1114            while let Some(chunk_result) = byte_stream.next().await {
1115                let chunk = match chunk_result {
1116                    Ok(chunk) => {
1117                        saw_stream_chunk = true;
1118                        chunk
1119                    }
1120                    Err(err) if !saw_stream_chunk => {
1121                        tracing::warn!(
1122                            model = %fallback_model,
1123                            url = %fallback_url,
1124                            error = %err,
1125                            "Ollama stream failed before first chunk; retrying once as non-stream response"
1126                        );
1127                        let fallback_response = Self::request_non_stream_response(
1128                            &fallback_http_client,
1129                            &fallback_url,
1130                            fallback_api_key.as_deref(),
1131                            &fallback_payload,
1132                            fallback_model.clone(),
1133                        ).await?;
1134                        yield LLMStreamEvent::Completed { response: Box::new(fallback_response) };
1135                        return;
1136                    }
1137                    Err(err) => Err(format_network_error("Ollama", &err))?,
1138                };
1139                buffer.extend_from_slice(&chunk);
1140
1141                while let Some(pos) = buffer.iter().position(|b| *b == b'\n') {
1142                    let line_bytes: Vec<u8> = buffer.drain(..=pos).collect();
1143                    let line = std::str::from_utf8(&line_bytes)
1144                        .map_err(|err| LLMError::Provider {
1145                            message: format!("Invalid UTF-8 in Ollama stream: {err}"),
1146                            metadata: None,
1147                        })?;
1148                    let line = line.trim();
1149
1150                    if line.is_empty() {
1151                        continue;
1152                    }
1153
1154                    let parsed = parse_stream_chunk(line)?;
1155
1156                    if let Some(error) = parsed.error {
1157                        Err(LLMError::Provider {
1158                            message: error,
1159                            metadata: None,
1160                        })?;
1161                    }
1162
1163                    if let Some(message) = parsed.message {
1164                        if let Some(reasoning_details) = message.reasoning_details.as_deref() {
1165                            aggregator.set_reasoning_details(reasoning_details);
1166                        }
1167
1168                        let has_explicit_thinking = message
1169                            .thinking
1170                            .as_ref()
1171                            .map(|v| !v.is_empty())
1172                            .unwrap_or(false);
1173
1174                        if let Some(thinking) = message.thinking
1175                            && let Some(delta) = aggregator.handle_reasoning(&thinking) {
1176                                yield LLMStreamEvent::Reasoning { delta };
1177                            }
1178
1179                        if let Some(content) = message.content {
1180                            for event in aggregator.handle_content(&content) {
1181                                match &event {
1182                                    LLMStreamEvent::Reasoning { .. }
1183                                        if has_explicit_thinking || any_interleaved =>
1184                                    {
1185                                    }
1186                                    _ => yield event,
1187                                }
1188                            }
1189                        }
1190
1191                        if let Some(tool_calls) = message.tool_calls {
1192                            let tool_calls_json: Vec<Value> = tool_calls
1193                                .into_iter()
1194                                .map(|tc| serde_json::to_value(tc).unwrap_or(Value::Null))
1195                                .filter(|v| !v.is_null())
1196                                .collect();
1197                            aggregator.handle_tool_calls(&tool_calls_json);
1198                        }
1199                    }
1200
1201                    if parsed.done {
1202                        prompt_tokens = parsed.prompt_eval_count;
1203                        completion_tokens = parsed.eval_count;
1204                        finish_reason = parsed.done_reason;
1205                        completed = true;
1206                    }
1207                }
1208
1209                if completed {
1210                    break;
1211                }
1212            }
1213
1214            if !completed {
1215                Err(LLMError::Provider {
1216                    message: "Ollama stream ended without completion signal".to_string(),
1217                    metadata: None,
1218                })?;
1219            }
1220
1221            let mut response = aggregator.finalize();
1222            if let Some(pt) = prompt_tokens {
1223                let mut usage = response.usage.unwrap_or_default();
1224                usage.prompt_tokens = pt;
1225                if let Some(ct) = completion_tokens {
1226                    usage.completion_tokens = ct;
1227                    usage.total_tokens = pt + ct;
1228                }
1229                response.usage = Some(usage);
1230            }
1231            if let Some(fr) = finish_reason {
1232                response.finish_reason = crate::llm::providers::common::map_finish_reason_common(&fr);
1233            }
1234            if response.reasoning.is_none()
1235                && let Some(details) = response.reasoning_details.as_ref()
1236            {
1237                response.reasoning = extract_reasoning_text_from_serialized_details(details);
1238            }
1239
1240            yield LLMStreamEvent::Completed { response: Box::new(response) };
1241        };
1242
1243        Ok(Box::pin(stream))
1244    }
1245
1246    fn supported_models(&self) -> Vec<String> {
1247        models::ollama::SUPPORTED_MODELS
1248            .iter()
1249            .map(|model| model.to_string())
1250            .collect()
1251    }
1252
1253    fn validate_request(&self, request: &LLMRequest) -> Result<(), LLMError> {
1254        if let Some(tool_choice) = &request.tool_choice {
1255            match tool_choice {
1256                ToolChoice::Auto | ToolChoice::None => {}
1257                _ => {
1258                    return Err(LLMError::InvalidRequest {
1259                        message: "Ollama does not support explicit tool_choice overrides"
1260                            .to_string(),
1261                        metadata: None,
1262                    });
1263                }
1264            }
1265        }
1266
1267        if request.parallel_tool_calls.is_some() || request.parallel_tool_config.is_some() {
1268            return Err(LLMError::InvalidRequest {
1269                message: "Ollama does not support parallel tool configuration".to_string(),
1270                metadata: None,
1271            });
1272        }
1273
1274        for message in &request.messages {
1275            if matches!(message.role, MessageRole::Tool) && message.tool_call_id.is_none() {
1276                return Err(LLMError::InvalidRequest {
1277                    message: "Ollama tool responses must include tool_call_id".to_string(),
1278                    metadata: None,
1279                });
1280            }
1281        }
1282
1283        Ok(())
1284    }
1285}
1286
1287#[async_trait]
1288impl LLMClient for OllamaProvider {
1289    async fn generate(&mut self, prompt: &str) -> Result<LLMResponse, LLMError> {
1290        let mut request = self.parse_client_prompt(prompt);
1291        if request.model.is_empty() {
1292            request.model = self.model.clone();
1293        }
1294        Ok(LLMProvider::generate(self, request).await?)
1295    }
1296
1297    fn model_id(&self) -> &str {
1298        &self.model
1299    }
1300}
1301
1302#[cfg(test)]
1303mod tests {
1304    use super::*;
1305    use crate::config::types::ReasoningEffortLevel;
1306    use crate::llm::provider::{ContentPart, Message, MessageContent};
1307    use serde_json::json;
1308
1309    fn test_provider() -> OllamaProvider {
1310        OllamaProvider::from_config(
1311            None,
1312            Some("test-model".to_string()),
1313            Some("http://localhost".to_string()),
1314            None,
1315            None,
1316            None,
1317            None,
1318        )
1319    }
1320
1321    #[test]
1322    fn build_payload_includes_images() {
1323        let provider = test_provider();
1324        let parts = vec![
1325            ContentPart::text("see ".to_string()),
1326            ContentPart::image("BASE64DATA".to_string(), "image/png".to_string()),
1327        ];
1328        let request = LLMRequest {
1329            model: "test-model".to_string(),
1330            messages: vec![Message::user_with_parts(parts)],
1331            ..Default::default()
1332        };
1333
1334        let payload = provider.build_payload(&request, false).unwrap();
1335        assert_eq!(payload.messages.len(), 1);
1336        let message = &payload.messages[0];
1337        assert_eq!(message.content.as_deref(), Some("see "));
1338        assert_eq!(
1339            message.images.as_ref(),
1340            Some(&vec!["BASE64DATA".to_string()])
1341        );
1342    }
1343
1344    #[test]
1345    fn build_payload_omits_images_when_none_present() {
1346        let provider = test_provider();
1347        let content = MessageContent::text("no images".to_string());
1348        let request = LLMRequest {
1349            model: "test-model".to_string(),
1350            messages: vec![Message::user(content.as_text().into_owned())],
1351            ..Default::default()
1352        };
1353
1354        let payload = provider.build_payload(&request, false).unwrap();
1355        assert_eq!(payload.messages.len(), 1);
1356        let message = &payload.messages[0];
1357        assert_eq!(message.content.as_deref(), Some("no images"));
1358        assert!(message.images.is_none());
1359    }
1360
1361    #[test]
1362    fn build_payload_minimax_tool_followup_omits_tool_call_id() {
1363        let provider = test_provider();
1364        let tool_call_id = "direct_run_pty_cmd_1".to_string();
1365        let request = LLMRequest {
1366            model: models::ollama::MINIMAX_M25_CLOUD.to_string(),
1367            messages: vec![
1368                Message::assistant_with_tools(
1369                    String::new(),
1370                    vec![ToolCall::function(
1371                        tool_call_id.clone(),
1372                        "run_pty_cmd".to_string(),
1373                        "{\"command\":\"cargo fmt\"}".to_string(),
1374                    )],
1375                ),
1376                Message::tool_response(
1377                    tool_call_id,
1378                    "{\"output\":\"\",\"exit_code\":0}".to_string(),
1379                ),
1380            ],
1381            reasoning_effort: Some(ReasoningEffortLevel::Low),
1382            ..Default::default()
1383        };
1384
1385        let payload = provider.build_payload(&request, false).unwrap();
1386        assert_eq!(payload.messages.len(), 2);
1387        assert_eq!(payload.messages[1].role, "tool");
1388        assert_eq!(
1389            payload.messages[1].tool_name.as_deref(),
1390            Some("run_pty_cmd")
1391        );
1392        assert!(payload.messages[1].tool_call_id.is_none());
1393        assert!(payload.think.is_none());
1394    }
1395
1396    #[test]
1397    fn build_payload_non_minimax_tool_followup_keeps_tool_call_id() {
1398        let provider = test_provider();
1399        let tool_call_id = "direct_run_pty_cmd_1".to_string();
1400        let request = LLMRequest {
1401            model: models::ollama::GPT_OSS_20B_CLOUD.to_string(),
1402            messages: vec![
1403                Message::assistant_with_tools(
1404                    String::new(),
1405                    vec![ToolCall::function(
1406                        tool_call_id.clone(),
1407                        "run_pty_cmd".to_string(),
1408                        "{\"command\":\"cargo fmt\"}".to_string(),
1409                    )],
1410                ),
1411                Message::tool_response(
1412                    tool_call_id.clone(),
1413                    "{\"output\":\"\",\"exit_code\":0}".to_string(),
1414                ),
1415            ],
1416            reasoning_effort: Some(ReasoningEffortLevel::Low),
1417            ..Default::default()
1418        };
1419
1420        let payload = provider.build_payload(&request, false).unwrap();
1421        assert_eq!(payload.messages.len(), 2);
1422        assert_eq!(payload.messages[1].role, "tool");
1423        assert_eq!(
1424            payload.messages[1].tool_name.as_deref(),
1425            Some("run_pty_cmd")
1426        );
1427        assert_eq!(
1428            payload.messages[1].tool_call_id.as_deref(),
1429            Some(tool_call_id.as_str())
1430        );
1431        assert_eq!(payload.think, Some(Value::String("low".to_string())));
1432    }
1433
1434    #[test]
1435    fn build_payload_hoists_history_system_directives_into_system_prompt() {
1436        let provider = test_provider();
1437        let request = LLMRequest {
1438            model: models::ollama::MINIMAX_M25_CLOUD.to_string(),
1439            system_prompt: Some(std::sync::Arc::new(
1440                "stable system instructions".to_string(),
1441            )),
1442            messages: vec![
1443                Message::user("explore architecture".to_string()),
1444                Message::system(
1445                    "Previous turn already completed tool execution. Reuse the latest tool outputs in history instead of rerunning the same exploration.".to_string(),
1446                ),
1447            ],
1448            ..Default::default()
1449        };
1450
1451        let payload = provider.build_payload(&request, false).unwrap();
1452        assert_eq!(payload.messages.len(), 2);
1453        assert_eq!(payload.messages[0].role, "system");
1454        assert!(
1455            payload.messages[0]
1456                .content
1457                .as_deref()
1458                .unwrap_or("")
1459                .contains("stable system instructions")
1460        );
1461        assert!(
1462            payload.messages[0]
1463                .content
1464                .as_deref()
1465                .unwrap_or("")
1466                .contains("[History Directives]")
1467        );
1468        assert!(
1469            payload.messages[0]
1470                .content
1471                .as_deref()
1472                .unwrap_or("")
1473                .contains("Previous turn already completed tool execution")
1474        );
1475        assert_eq!(payload.messages[1].role, "user");
1476        assert_eq!(
1477            payload.messages[1].content.as_deref(),
1478            Some("explore architecture")
1479        );
1480    }
1481
1482    #[test]
1483    fn build_payload_promotes_history_system_directive_without_base_system_prompt() {
1484        let provider = test_provider();
1485        let request = LLMRequest {
1486            model: models::ollama::MINIMAX_M25_CLOUD.to_string(),
1487            messages: vec![
1488                Message::system(
1489                    "Repeated read-only exploration hit the per-turn family cap. Scheduling a final recovery pass without more tools.".to_string(),
1490                ),
1491                Message::user("summarize the architecture".to_string()),
1492            ],
1493            ..Default::default()
1494        };
1495
1496        let payload = provider.build_payload(&request, false).unwrap();
1497        assert_eq!(payload.messages.len(), 2);
1498        assert_eq!(payload.messages[0].role, "system");
1499        assert!(
1500            payload.messages[0]
1501                .content
1502                .as_deref()
1503                .unwrap_or("")
1504                .contains("[History Directives]")
1505        );
1506        assert!(
1507            payload.messages[0]
1508                .content
1509                .as_deref()
1510                .unwrap_or("")
1511                .contains("Repeated read-only exploration hit the per-turn family cap")
1512        );
1513        assert_eq!(payload.messages[1].role, "user");
1514    }
1515
1516    #[test]
1517    fn build_payload_recovers_balanced_prefix_from_malformed_history_tool_arguments() {
1518        let provider = test_provider();
1519        let request = LLMRequest {
1520            model: "test-model".to_string(),
1521            messages: vec![Message::assistant_with_tools(
1522                String::new(),
1523                vec![ToolCall::function(
1524                    "tool_call_0".to_string(),
1525                    "unified_file".to_string(),
1526                    "{\"action\":\"read\",\"path\":\"docs/ARCHITECTURE.md\",\"offset\":1,\"limit\":100}{\"action\":\"read\",\"path\":\"README.md\"}"
1527                        .to_string(),
1528                )],
1529            )],
1530            ..Default::default()
1531        };
1532
1533        let payload = provider
1534            .build_payload(&request, false)
1535            .expect("payload should recover malformed history tool arguments");
1536
1537        let tool_calls = payload.messages[0]
1538            .tool_calls
1539            .as_ref()
1540            .expect("tool calls should be present");
1541        assert_eq!(tool_calls.len(), 1);
1542        assert_eq!(
1543            tool_calls[0].function.arguments,
1544            Some(json!({
1545                "action": "read",
1546                "path": "docs/ARCHITECTURE.md",
1547                "offset": 1,
1548                "limit": 100
1549            }))
1550        );
1551    }
1552
1553    #[test]
1554    fn build_payload_rehydrates_glm_interleaved_history_into_content() {
1555        let provider = test_provider();
1556        let request = LLMRequest {
1557            model: models::ollama::GLM_5_CLOUD.to_string(),
1558            messages: vec![
1559                Message::assistant("done".to_string()).with_reasoning(Some("trace".to_string())),
1560            ],
1561            ..Default::default()
1562        };
1563
1564        let payload = provider.build_payload(&request, false).unwrap();
1565
1566        assert_eq!(
1567            payload.messages[0].content.as_deref(),
1568            Some("<think>trace</think>done")
1569        );
1570        assert!(payload.messages[0].thinking.is_none());
1571    }
1572
1573    #[test]
1574    fn build_payload_replays_assistant_reasoning_as_ollama_thinking() {
1575        let provider = test_provider();
1576        let request = LLMRequest {
1577            model: models::ollama::GPT_OSS_20B.to_string(),
1578            messages: vec![
1579                Message::assistant("need a tool".to_string())
1580                    .with_reasoning(Some("reasoning trace".to_string())),
1581            ],
1582            ..Default::default()
1583        };
1584
1585        let payload = provider.build_payload(&request, false).unwrap();
1586
1587        assert_eq!(payload.messages[0].content.as_deref(), Some("need a tool"));
1588        assert_eq!(
1589            payload.messages[0].thinking.as_deref(),
1590            Some("reasoning trace")
1591        );
1592    }
1593
1594    #[test]
1595    fn build_payload_includes_apply_patch_as_normal_tool() {
1596        let provider = test_provider();
1597        let request = LLMRequest {
1598            model: "test-model".to_string(),
1599            messages: vec![Message::user("patch this file".to_string())],
1600            tools: Some(std::sync::Arc::new(vec![ToolDefinition::apply_patch(
1601                "Apply VT Code patches".to_string(),
1602            )])),
1603            ..Default::default()
1604        };
1605
1606        let payload = provider.build_payload(&request, false).unwrap();
1607        let tools = payload.tools.expect("tools should be present");
1608        assert_eq!(tools.len(), 1);
1609        assert_eq!(tools[0].function_name(), "apply_patch");
1610    }
1611
1612    #[test]
1613    fn response_payload_preserves_reasoning_details() {
1614        let parsed = OllamaChatResponse {
1615            message: Some(OllamaResponseMessage {
1616                role: Some("assistant".to_string()),
1617                content: Some("answer".to_string()),
1618                thinking: None,
1619                reasoning_details: Some(vec![json!({
1620                    "type": "reasoning.text",
1621                    "text": "step one"
1622                })]),
1623                tool_calls: None,
1624            }),
1625            done: true,
1626            done_reason: Some("stop".to_string()),
1627            prompt_eval_count: Some(1),
1628            eval_count: Some(2),
1629            error: None,
1630        };
1631
1632        let response = OllamaProvider::response_from_chat_payload("test-model".to_string(), parsed)
1633            .expect("response should parse");
1634        assert_eq!(response.reasoning.as_deref(), Some("step one"));
1635        assert!(response.reasoning_details.is_some());
1636
1637        let first_detail = response
1638            .reasoning_details
1639            .as_ref()
1640            .and_then(|details| details.first())
1641            .expect("reasoning detail should exist");
1642        let parsed_detail: Value =
1643            serde_json::from_str(first_detail).expect("reasoning detail should be json");
1644        assert_eq!(parsed_detail["type"], "reasoning.text");
1645    }
1646
1647    #[test]
1648    fn tags_response_accepts_partial_model_summaries() {
1649        let parsed: OllamaTagsResponse = serde_json::from_value(json!({
1650            "models": [
1651                { "model": "qwen3:8b" }
1652            ]
1653        }))
1654        .expect("partial model summaries should parse");
1655
1656        let names: Vec<String> = parsed
1657            .models
1658            .into_iter()
1659            .filter_map(|model| model.name.or(model.model))
1660            .collect();
1661        assert_eq!(names, vec!["qwen3:8b".to_string()]);
1662    }
1663}