Skip to main content

vtcode_core/llm/providers/
ollama.rs

1use crate::config::TimeoutsConfig;
2use crate::config::constants::{env_vars, models, urls};
3use crate::config::core::{AnthropicConfig, ModelConfig, PromptCachingConfig};
4use crate::llm::client::LLMClient;
5use crate::llm::provider::{
6    ContentPart, FinishReason, LLMError, LLMProvider, LLMRequest, LLMResponse, LLMStream,
7    LLMStreamEvent, Message, MessageContent, MessageRole, ToolCall, ToolChoice, ToolDefinition,
8    Usage,
9};
10use crate::utils::http_client;
11use anyhow::Result;
12use async_stream::try_stream;
13use async_trait::async_trait;
14use futures::StreamExt;
15use hashbrown::HashMap;
16use reqwest::Client as HttpClient;
17use serde::{Deserialize, Serialize};
18use serde_json::{Map, Value};
19
20pub mod client;
21pub mod parser;
22pub mod pull;
23pub mod url;
24
25pub use client::OllamaClient;
26pub use parser::pull_events_from_value;
27pub use pull::{
28    CliPullProgressReporter, OllamaPullEvent, OllamaPullProgressReporter, TuiPullProgressReporter,
29};
30pub use url::{base_url_to_host_root, is_openai_compatible_base_url};
31
32use semver::{Version, VersionReq};
33
34use super::common::{
35    assistant_interleaved_history_text, collect_history_system_directives,
36    extract_reasoning_text_from_detail_values, extract_reasoning_text_from_serialized_details,
37    is_minimax_m2_model, merge_system_prompt_with_history_directives, override_base_url,
38    parse_client_prompt_common, resolve_model, serialize_reasoning_detail_values,
39};
40use super::error_handling::{format_network_error, format_parse_error};
41
42// ============================================================================
43// Wire API Detection (adapted from OpenAI Codex's codex-ollama/src/lib.rs)
44// ============================================================================
45
46/// Wire protocol that the Ollama server supports.
47/// Based on OpenAI Codex's WireApi enum.
48#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub enum OllamaWireApi {
50    /// The Responses API (OpenAI-compatible at `/v1/responses`).
51    Responses,
52    /// Regular Chat Completions compatible with `/v1/chat/completions`.
53    Chat,
54}
55
56/// Result of detecting which wire API the Ollama server supports.
57pub struct WireApiDetection {
58    pub wire_api: OllamaWireApi,
59    pub version: Option<Version>,
60}
61
62/// Version requirement for Ollama servers that support the Responses API.
63/// Release versions >= 0.13.3 support the Responses API.
64static RESPONSES_API_VERSION_REQ: std::sync::LazyLock<VersionReq> =
65    std::sync::LazyLock::new(|| {
66        VersionReq::parse(">=0.13.3").expect("valid version requirement literal")
67    });
68
69/// Determine which wire API to use based on the Ollama server version.
70///
71/// Version 0.0.0 is used for development builds, which typically support the
72/// latest features.
73fn wire_api_for_version(version: &Version) -> OllamaWireApi {
74    if *version == Version::new(0, 0, 0) || RESPONSES_API_VERSION_REQ.matches(version) {
75        OllamaWireApi::Responses
76    } else {
77        OllamaWireApi::Chat
78    }
79}
80
81/// Detect which wire API the running Ollama server supports based on its version.
82/// Returns `Ok(None)` when the version endpoint is missing or unparsable; callers
83/// should keep the configured default in that case.
84///
85/// Adapted from OpenAI Codex's codex-ollama/src/lib.rs
86pub async fn detect_wire_api(
87    base_url: Option<String>,
88) -> std::io::Result<Option<WireApiDetection>> {
89    let resolved_base_url = override_base_url(
90        urls::OLLAMA_API_BASE,
91        base_url,
92        Some(env_vars::OLLAMA_BASE_URL),
93    );
94
95    let client = match OllamaClient::try_from_base_url(&resolved_base_url).await {
96        Ok(c) => c,
97        Err(e) => {
98            tracing::debug!("Failed to connect to Ollama server for version detection: {e}");
99            return Ok(None);
100        }
101    };
102
103    let Some(version) = client.fetch_version().await? else {
104        return Ok(None);
105    };
106
107    let wire_api = wire_api_for_version(&version);
108
109    Ok(Some(WireApiDetection {
110        wire_api,
111        version: Some(version),
112    }))
113}
114
115/// Prepare the local OSS environment when using Ollama.
116///
117/// - Ensures a local Ollama server is reachable.
118/// - Checks if the model exists locally and pulls it if missing.
119///
120/// Adapted from OpenAI Codex's codex-ollama/src/lib.rs
121pub async fn ensure_oss_ready(
122    model: Option<&str>,
123    base_url: Option<String>,
124) -> std::io::Result<()> {
125    let target_model = model.unwrap_or(models::ollama::DEFAULT_MODEL);
126
127    let resolved_base_url = override_base_url(
128        urls::OLLAMA_API_BASE,
129        base_url,
130        Some(env_vars::OLLAMA_BASE_URL),
131    );
132
133    // Verify local Ollama is reachable
134    let ollama_client = OllamaClient::try_from_base_url(&resolved_base_url).await?;
135
136    // If the model is not present locally, pull it
137    match ollama_client.fetch_models().await {
138        Ok(existing_models) => {
139            if !existing_models.iter().any(|m| m == target_model) {
140                tracing::info!("Model '{target_model}' not found locally, pulling...");
141                let mut reporter = CliPullProgressReporter::new();
142                ollama_client
143                    .pull_with_reporter(target_model, &mut reporter)
144                    .await?;
145            }
146        }
147        Err(e) => {
148            tracing::warn!("Failed to list Ollama models: {e}");
149            // Continue anyway; model might exist but listing failed
150        }
151    }
152
153    Ok(())
154}
155
156#[derive(Debug, Deserialize, Serialize)]
157struct OllamaTagsResponse {
158    models: Vec<OllamaTag>,
159}
160
161#[derive(Debug, Deserialize, Serialize)]
162struct OllamaTag {
163    name: Option<String>,
164    model: Option<String>,
165    modified_at: Option<String>,
166    size: Option<u64>,
167    digest: Option<String>,
168    details: Option<OllamaModelDetails>,
169}
170
171#[derive(Debug, Deserialize, Serialize)]
172struct OllamaModelDetails {
173    format: Option<String>,
174    family: Option<String>,
175    families: Option<Vec<String>>,
176    parameter_size: Option<String>,
177    quantization_level: Option<String>,
178}
179
180pub(super) fn ollama_model_name_from_fields<'a>(
181    name: Option<&'a str>,
182    model: Option<&'a str>,
183) -> Option<&'a str> {
184    name.or(model)
185        .map(str::trim)
186        .filter(|value| !value.is_empty())
187}
188
189pub(super) const OLLAMA_CONNECTION_ERROR: &str = "No running Ollama server detected. Start it with: `ollama serve` (after installing)\n\
190     Install instructions: https://github.com/ollama/ollama?tab=readme-ov-file";
191
192/// Fetches available local Ollama models from the Ollama API endpoint
193pub async fn fetch_ollama_models(base_url: Option<String>) -> Result<Vec<String>, anyhow::Error> {
194    use crate::config::constants::{env_vars, urls};
195
196    let resolved_base_url = override_base_url(
197        urls::OLLAMA_API_BASE,
198        base_url,
199        Some(env_vars::OLLAMA_BASE_URL),
200    );
201
202    // Construct the tags endpoint URL
203    let tags_url = format!("{}/api/tags", resolved_base_url);
204
205    // Create HTTP client with connection timeout
206    let client = http_client::create_client_with_timeout(std::time::Duration::from_secs(5));
207
208    // Make GET request to fetch models
209    let response = client
210        .get(&tags_url)
211        .header("Content-Type", "application/json")
212        .send()
213        .await
214        .map_err(|e| {
215            tracing::warn!("Failed to connect to Ollama server: {e:?}");
216            anyhow::anyhow!(OLLAMA_CONNECTION_ERROR)
217        })?;
218
219    if !response.status().is_success() {
220        return Err(anyhow::anyhow!(
221            "Failed to fetch Ollama models: HTTP {}. {}",
222            response.status(),
223            if response.status() == reqwest::StatusCode::NOT_FOUND {
224                "Ensure Ollama server is running."
225            } else {
226                ""
227            }
228        ));
229    }
230
231    // Parse the response
232    let tags_response: OllamaTagsResponse = response
233        .json()
234        .await
235        .map_err(|e| anyhow::anyhow!("Failed to parse Ollama models response: {}", e))?;
236
237    // Extract model names
238    let model_names: Vec<String> = tags_response
239        .models
240        .into_iter()
241        .filter_map(|model| {
242            ollama_model_name_from_fields(model.name.as_deref(), model.model.as_deref())
243                .map(str::to_string)
244        })
245        .collect();
246
247    Ok(model_names)
248}
249
250pub struct OllamaProvider {
251    http_client: HttpClient,
252    base_url: String,
253    model: String,
254    api_key: Option<String>,
255    model_behavior: Option<ModelConfig>,
256}
257
258impl OllamaProvider {
259    fn merged_system_prompt(request: &LLMRequest) -> Option<String> {
260        const HISTORY_DIRECTIVES_SECTION_HEADER: &str = "[History Directives]";
261        let directives = collect_history_system_directives(request);
262        merge_system_prompt_with_history_directives(
263            request.system_prompt.as_ref().map(|prompt| prompt.as_str()),
264            &directives,
265            HISTORY_DIRECTIVES_SECTION_HEADER,
266        )
267    }
268
269    pub fn new(api_key: String) -> Self {
270        Self::with_model(api_key, models::ollama::DEFAULT_MODEL.to_string())
271    }
272
273    pub fn with_model(api_key: String, model: String) -> Self {
274        Self::with_model_internal(model, None, Some(api_key), None)
275    }
276
277    pub fn new_with_client(
278        api_key: String,
279        model: String,
280        http_client: reqwest::Client,
281        base_url: String,
282        _timeouts: TimeoutsConfig,
283    ) -> Self {
284        Self {
285            http_client,
286            base_url,
287            model,
288            api_key: Some(api_key),
289            model_behavior: None,
290        }
291    }
292
293    pub fn from_config(
294        api_key: Option<String>,
295        model: Option<String>,
296        base_url: Option<String>,
297        _prompt_cache: Option<PromptCachingConfig>,
298        _timeouts: Option<TimeoutsConfig>,
299        _anthropic: Option<AnthropicConfig>,
300        model_behavior: Option<ModelConfig>,
301    ) -> Self {
302        let resolved_model = resolve_model(model, models::ollama::DEFAULT_MODEL);
303        Self::with_model_internal(resolved_model, base_url, api_key, model_behavior)
304    }
305
306    fn normalize_api_key(api_key: Option<String>) -> Option<String> {
307        api_key.and_then(|value| {
308            let trimmed = value.trim();
309            if trimmed.is_empty() {
310                None
311            } else {
312                Some(trimmed.to_string())
313            }
314        })
315    }
316
317    fn is_local_base_url(base_url: &str) -> bool {
318        let lowered = base_url.trim().to_ascii_lowercase();
319        const LOCAL_PREFIXES: &[&str] = &[
320            "http://localhost",
321            "https://localhost",
322            "http://127.",
323            "https://127.",
324            "http://0.0.0.0",
325            "https://0.0.0.0",
326            "http://[::1]",
327            "https://[::1]",
328        ];
329
330        LOCAL_PREFIXES
331            .iter()
332            .any(|prefix| lowered.starts_with(prefix))
333    }
334
335    fn with_model_internal(
336        model: String,
337        base_url: Option<String>,
338        api_key: Option<String>,
339        model_behavior: Option<ModelConfig>,
340    ) -> Self {
341        let normalized_api_key = Self::normalize_api_key(api_key);
342        let is_cloud_model = model.contains(":cloud") || model.contains("-cloud");
343
344        let default_base = if is_cloud_model {
345            urls::OLLAMA_CLOUD_API_BASE
346        } else {
347            urls::OLLAMA_API_BASE
348        };
349
350        let resolved_base =
351            override_base_url(default_base, base_url, Some(env_vars::OLLAMA_BASE_URL));
352        let target_is_local = Self::is_local_base_url(&resolved_base);
353
354        // Never send API keys to local endpoints; keep keys for cloud/remote targets
355        let effective_api_key = if target_is_local {
356            None
357        } else {
358            normalized_api_key
359        };
360
361        Self {
362            http_client: http_client::create_default_client(),
363            base_url: resolved_base,
364            model,
365            api_key: effective_api_key,
366            model_behavior,
367        }
368    }
369
370    fn chat_url(&self) -> String {
371        format!("{}/api/chat", self.base_url.trim_end_matches('/'))
372    }
373
374    fn authorized_post(&self, url: String) -> reqwest::RequestBuilder {
375        let builder = self.http_client.post(url);
376        if let Some(api_key) = &self.api_key {
377            builder.bearer_auth(api_key)
378        } else {
379            builder
380        }
381    }
382
383    fn parse_client_prompt(&self, prompt: &str) -> LLMRequest {
384        parse_client_prompt_common(prompt, &self.model, |value| self.parse_chat_request(value))
385    }
386
387    fn parse_chat_request(&self, value: &Value) -> Option<LLMRequest> {
388        let messages_value = value.get("messages")?.as_array()?;
389        let mut system_prompt = value
390            .get("system")
391            .and_then(|entry| entry.as_str())
392            .filter(|text| !text.trim().is_empty())
393            .map(|text| text.to_string());
394        let mut messages = Vec::new();
395
396        for entry in messages_value {
397            let role = entry
398                .get("role")
399                .and_then(|r| r.as_str())
400                .unwrap_or(crate::config::constants::message_roles::USER);
401            let content = entry
402                .get("content")
403                .map(|c| match c {
404                    Value::String(text) => text.to_string(),
405                    other => other.to_string(),
406                })
407                .unwrap_or_default();
408
409            if content.trim().is_empty() {
410                continue;
411            }
412
413            match role {
414                "system" => {
415                    if system_prompt.is_none() {
416                        system_prompt = Some(content);
417                    }
418                }
419                "assistant" => messages.push(Message::assistant(content)),
420                "user" => messages.push(Message::user(content)),
421                _ => {}
422            }
423        }
424
425        if messages.is_empty() {
426            return None;
427        }
428
429        let tools = value
430            .get("tools")
431            .and_then(|entry| serde_json::from_value::<Vec<ToolDefinition>>(entry.clone()).ok());
432
433        Some(LLMRequest {
434            messages,
435            system_prompt: system_prompt.map(std::sync::Arc::new),
436            tools: tools.map(std::sync::Arc::new),
437            model: value
438                .get("model")
439                .and_then(|m| m.as_str())
440                .filter(|m| !m.trim().is_empty())
441                .map(|m| m.to_string())
442                .unwrap_or_else(|| self.model.clone()),
443            max_tokens: value
444                .get("max_tokens")
445                .and_then(|entry| entry.as_u64())
446                .map(|value| value as u32),
447            temperature: value
448                .get("temperature")
449                .and_then(|entry| entry.as_f64())
450                .map(|value| value as f32),
451            stream: value
452                .get("stream")
453                .and_then(|entry| entry.as_bool())
454                .unwrap_or(false),
455            ..Default::default()
456        })
457    }
458
459    fn build_payload(
460        &self,
461        request: &LLMRequest,
462        stream: bool,
463    ) -> Result<OllamaChatRequest, LLMError> {
464        let mut messages = Vec::new();
465        let mut tool_names: HashMap<String, String> = HashMap::new();
466        let minimax_tool_followup_compat = Self::minimax_tool_followup_compat_mode(request);
467
468        if let Some(system) = Self::merged_system_prompt(request) {
469            messages.push(OllamaChatMessage {
470                role: "system".to_string(),
471                content: Some(system),
472                thinking: None,
473                tool_calls: None,
474                tool_call_id: None,
475                tool_name: None,
476                images: None,
477            });
478        }
479
480        for message in &request.messages {
481            let interleaved_content = assistant_interleaved_history_text(message, &request.model);
482            let used_interleaved_content = interleaved_content.is_some();
483            let (content_text, images) = if let Some(interleaved_content) = interleaved_content {
484                (interleaved_content, None)
485            } else {
486                Self::extract_content_and_images(&message.content)
487            };
488            match message.role {
489                MessageRole::System => continue,
490                MessageRole::Tool => {
491                    let tool_name = message
492                        .tool_call_id
493                        .as_ref()
494                        .and_then(|id| tool_names.get(id).cloned());
495                    let tool_name = tool_name.or_else(|| message.origin_tool.clone());
496                    let tool_call_id = if minimax_tool_followup_compat && tool_name.is_some() {
497                        None
498                    } else {
499                        message.tool_call_id.clone()
500                    };
501                    messages.push(OllamaChatMessage {
502                        role: "tool".to_string(),
503                        content: Some(content_text),
504                        thinking: None,
505                        tool_calls: None,
506                        tool_call_id,
507                        tool_name,
508                        images: None,
509                    });
510                }
511                _ => {
512                    let thinking = if used_interleaved_content {
513                        None
514                    } else {
515                        Self::assistant_thinking_history_text(message)
516                    };
517                    let mut payload_message = OllamaChatMessage {
518                        role: message.role.as_generic_str().to_string(),
519                        content: Some(content_text),
520                        thinking,
521                        tool_calls: None,
522                        tool_call_id: None,
523                        tool_name: None,
524                        images,
525                    };
526
527                    if let Some(tool_calls) = message.get_tool_calls() {
528                        let mut converted = Vec::new();
529                        for (index, tool_call) in tool_calls.iter().enumerate() {
530                            if let Some(ref func) = tool_call.function {
531                                if !tool_call.id.is_empty() {
532                                    tool_names
533                                        .entry(tool_call.id.clone())
534                                        .or_insert_with(|| func.name.clone());
535                                }
536
537                                let arguments = tool_call.execution_arguments().map_err(|err| {
538                                    LLMError::InvalidRequest {
539                                        message: format!(
540                                            "Failed to parse tool arguments for Ollama: {err}"
541                                        ),
542                                        metadata: None,
543                                    }
544                                })?;
545                                converted.push(OllamaToolCall {
546                                    call_type: tool_call.call_type.clone(),
547                                    function: OllamaToolFunctionCall {
548                                        name: func.name.clone(),
549                                        arguments: Some(arguments),
550                                        index: Some(index as u32),
551                                    },
552                                });
553                            }
554                        }
555
556                        if !converted.is_empty() {
557                            payload_message.tool_calls = Some(converted);
558                            if payload_message.content.is_none() {
559                                payload_message.content = Some(String::new());
560                            }
561                        }
562                    }
563
564                    messages.push(payload_message);
565                }
566            }
567        }
568
569        let options = if request.temperature.is_some() || request.max_tokens.is_some() {
570            Some(OllamaChatOptions {
571                temperature: request.temperature,
572                num_predict: request.max_tokens,
573            })
574        } else {
575            None
576        };
577
578        let tools = match request.tool_choice {
579            Some(ToolChoice::None) => None,
580            _ => request.tools.as_ref().map(|tools| {
581                tools
582                    .iter()
583                    .filter_map(|tool| {
584                        // Normalize all tools to function type for Ollama compatibility
585                        tool.function.as_ref().map(|func| {
586                            ToolDefinition::function(
587                                func.name.clone(),
588                                func.description.clone(),
589                                func.parameters.clone(),
590                            )
591                        })
592                    })
593                    .collect()
594            }),
595        };
596
597        Ok(OllamaChatRequest {
598            model: request.model.clone(),
599            messages,
600            stream,
601            format: request.output_format.clone(),
602            options,
603            tools,
604            think: Self::think_value(request),
605        })
606    }
607
608    fn assistant_thinking_history_text(message: &Message) -> Option<String> {
609        if message.role != MessageRole::Assistant {
610            return None;
611        }
612
613        message
614            .reasoning
615            .as_deref()
616            .map(str::trim)
617            .filter(|value| !value.is_empty())
618            .map(str::to_owned)
619            .or_else(|| {
620                message
621                    .reasoning_details
622                    .as_deref()
623                    .and_then(extract_reasoning_text_from_detail_values)
624            })
625    }
626
627    fn extract_content_and_images(content: &MessageContent) -> (String, Option<Vec<String>>) {
628        let mut images = Vec::new();
629        if let MessageContent::Parts(parts) = content {
630            for part in parts {
631                if let ContentPart::Image { data, .. } = part {
632                    images.push(data.clone());
633                }
634            }
635        }
636
637        let text = content.as_text().into_owned();
638        let images = if images.is_empty() {
639            None
640        } else {
641            Some(images)
642        };
643        (text, images)
644    }
645
646    fn think_value(request: &LLMRequest) -> Option<Value> {
647        let model_id = request.model.as_str();
648        if Self::minimax_tool_followup_compat_mode(request) {
649            return None;
650        }
651        if !models::ollama::REASONING_MODELS.contains(&model_id) {
652            return None;
653        }
654
655        if models::ollama::REASONING_LEVEL_MODELS.contains(&model_id) {
656            request
657                .reasoning_effort
658                .map(|effort| Value::String(effort.to_string()))
659        } else {
660            Some(Value::Bool(true))
661        }
662    }
663
664    fn minimax_tool_followup_compat_mode(request: &LLMRequest) -> bool {
665        is_minimax_m2_model(&request.model)
666            && request
667                .messages
668                .iter()
669                .any(|message| message.role == MessageRole::Tool || message.has_tool_calls())
670    }
671
672    fn convert_tool_calls(
673        tool_calls: Option<Vec<OllamaResponseToolCall>>,
674    ) -> Result<Option<Vec<ToolCall>>, LLMError> {
675        let Some(tool_calls) = tool_calls else {
676            return Ok(None);
677        };
678
679        if tool_calls.is_empty() {
680            return Ok(None);
681        }
682
683        let mut converted = Vec::new();
684        for (index, call) in tool_calls.into_iter().enumerate() {
685            let function = call.function.ok_or_else(|| LLMError::Provider {
686                message: "Ollama response missing function details for tool call".to_string(),
687                metadata: None,
688            })?;
689
690            let name = function.name.ok_or_else(|| LLMError::Provider {
691                message: "Ollama response missing tool function name".to_string(),
692                metadata: None,
693            })?;
694
695            let arguments_value = function
696                .arguments
697                .unwrap_or_else(|| Value::Object(Map::new()));
698            let arguments = match arguments_value {
699                Value::String(raw) => raw,
700                other => serde_json::to_string(&other).map_err(|err| LLMError::Provider {
701                    message: format!("Failed to serialize Ollama tool arguments: {err}"),
702                    metadata: None,
703                })?,
704            };
705
706            let id = function
707                .index
708                .map(|value| format!("tool_call_{value}"))
709                .unwrap_or_else(|| format!("tool_call_{index}"));
710
711            converted.push(ToolCall::function(id, name, arguments));
712        }
713
714        Ok(Some(converted))
715    }
716
717    fn usage_from_counts(
718        prompt_tokens: Option<u32>,
719        completion_tokens: Option<u32>,
720    ) -> Option<Usage> {
721        if prompt_tokens.is_none() && completion_tokens.is_none() {
722            return None;
723        }
724
725        let prompt = prompt_tokens.unwrap_or_default();
726        let completion = completion_tokens.unwrap_or_default();
727        Some(Usage {
728            prompt_tokens: prompt,
729            completion_tokens: completion,
730            total_tokens: prompt + completion,
731            cached_prompt_tokens: None,
732            cache_creation_tokens: None,
733            cache_read_tokens: None,
734            iterations: None,
735        })
736    }
737
738    fn finish_reason_from(reason: Option<&str>) -> FinishReason {
739        match reason {
740            Some("stop") | None => FinishReason::Stop,
741            Some("length") => FinishReason::Length,
742            Some("tool_calls") => FinishReason::ToolCalls,
743            Some(other) => FinishReason::Error(other.to_string()),
744        }
745    }
746
747    fn build_response(
748        content: Option<String>,
749        tool_calls: Option<Vec<ToolCall>>,
750        reasoning: Option<String>,
751        reasoning_details: Option<Vec<String>>,
752        model: String,
753        finish_reason: Option<&str>,
754        prompt_tokens: Option<u32>,
755        completion_tokens: Option<u32>,
756    ) -> LLMResponse {
757        let mut finish = Self::finish_reason_from(finish_reason);
758        if tool_calls.as_ref().is_some_and(|calls| !calls.is_empty()) {
759            finish = FinishReason::ToolCalls;
760        }
761
762        LLMResponse {
763            content,
764            tool_calls,
765            model,
766            usage: Self::usage_from_counts(prompt_tokens, completion_tokens),
767            finish_reason: finish,
768            reasoning,
769            reasoning_details,
770            tool_references: Vec::new(),
771            request_id: None,
772            organization_id: None,
773            compaction: None,
774        }
775    }
776
777    fn response_from_chat_payload(
778        model: String,
779        parsed: OllamaChatResponse,
780    ) -> Result<LLMResponse, LLMError> {
781        if let Some(error) = parsed.error {
782            return Err(LLMError::Provider {
783                message: error,
784                metadata: None,
785            });
786        }
787
788        let (content, reasoning, tool_calls, native_reasoning_details) =
789            if let Some(message) = parsed.message {
790                let content = message
791                    .content
792                    .and_then(|value| (!value.is_empty()).then_some(value));
793                let reasoning = message
794                    .thinking
795                    .and_then(|value| (!value.is_empty()).then_some(value));
796                let tool_calls = Self::convert_tool_calls(message.tool_calls)?;
797                let native_reasoning_details = message.reasoning_details.filter(|d| !d.is_empty());
798                (content, reasoning, tool_calls, native_reasoning_details)
799            } else {
800                (None, None, None, None)
801            };
802
803        let reasoning = reasoning.or_else(|| {
804            native_reasoning_details
805                .as_deref()
806                .and_then(extract_reasoning_text_from_detail_values)
807        });
808        let mut reasoning_details = native_reasoning_details
809            .as_deref()
810            .and_then(serialize_reasoning_detail_values);
811
812        // Fallback: Extract reasoning from content if not provided natively
813        // This handles MiniMax-M2.5 cloud models that use <think></think> tags
814        let (final_reasoning, final_content) = if reasoning.is_none() {
815            if let Some(ref content_str) = content {
816                let (reasoning_parts, cleaned_content) =
817                    crate::llm::utils::extract_reasoning_content(content_str);
818                if reasoning_parts.is_empty() {
819                    (None, content)
820                } else {
821                    super::common::preserve_interleaved_content_in_reasoning_details(
822                        &mut reasoning_details,
823                        content_str,
824                    );
825                    (
826                        Some(reasoning_parts.join("\n\n")),
827                        cleaned_content.or(content),
828                    )
829                }
830            } else {
831                (None, content)
832            }
833        } else {
834            (reasoning, content)
835        };
836
837        Ok(Self::build_response(
838            final_content,
839            tool_calls,
840            final_reasoning,
841            reasoning_details,
842            model,
843            parsed.done_reason.as_deref(),
844            parsed.prompt_eval_count,
845            parsed.eval_count,
846        ))
847    }
848
849    fn authorized_post_with_key(
850        http_client: &HttpClient,
851        url: &str,
852        api_key: Option<&str>,
853    ) -> reqwest::RequestBuilder {
854        let builder = http_client.post(url.to_string());
855        if let Some(value) = api_key {
856            builder.bearer_auth(value)
857        } else {
858            builder
859        }
860    }
861
862    async fn request_non_stream_response(
863        http_client: &HttpClient,
864        url: &str,
865        api_key: Option<&str>,
866        payload: &OllamaChatRequest,
867        model: String,
868    ) -> Result<LLMResponse, LLMError> {
869        let response = Self::authorized_post_with_key(http_client, url, api_key)
870            .json(payload)
871            .send()
872            .await
873            .map_err(|e| format_network_error("Ollama", &e))?;
874
875        if !response.status().is_success() {
876            let status = response.status();
877            let body = response.text().await.unwrap_or_default();
878            let error_message = Self::extract_error(&body)
879                .unwrap_or_else(|| format!("Ollama request failed ({status}): {body}"));
880            return Err(LLMError::Provider {
881                message: error_message,
882                metadata: None,
883            });
884        }
885
886        let parsed = response
887            .json::<OllamaChatResponse>()
888            .await
889            .map_err(|e| format_parse_error("Ollama", &e))?;
890        Self::response_from_chat_payload(model, parsed)
891    }
892
893    fn extract_error(body: &str) -> Option<String> {
894        serde_json::from_str::<OllamaErrorResponse>(body)
895            .ok()
896            .and_then(|resp| resp.error)
897    }
898}
899
900#[derive(Debug, Serialize)]
901struct OllamaChatRequest {
902    model: String,
903    messages: Vec<OllamaChatMessage>,
904    stream: bool,
905    #[serde(skip_serializing_if = "Option::is_none")]
906    format: Option<Value>,
907    #[serde(skip_serializing_if = "Option::is_none")]
908    options: Option<OllamaChatOptions>,
909    #[serde(skip_serializing_if = "Option::is_none")]
910    tools: Option<Vec<ToolDefinition>>,
911    #[serde(skip_serializing_if = "Option::is_none")]
912    think: Option<Value>,
913}
914
915#[derive(Debug, Serialize)]
916struct OllamaChatMessage {
917    role: String,
918    #[serde(skip_serializing_if = "Option::is_none")]
919    content: Option<String>,
920    #[serde(skip_serializing_if = "Option::is_none")]
921    thinking: Option<String>,
922    #[serde(skip_serializing_if = "Option::is_none")]
923    images: Option<Vec<String>>,
924    #[serde(skip_serializing_if = "Option::is_none")]
925    tool_calls: Option<Vec<OllamaToolCall>>,
926    #[serde(skip_serializing_if = "Option::is_none")]
927    tool_call_id: Option<String>,
928    #[serde(skip_serializing_if = "Option::is_none")]
929    tool_name: Option<String>,
930}
931
932#[derive(Debug, Serialize)]
933struct OllamaChatOptions {
934    #[serde(skip_serializing_if = "Option::is_none")]
935    temperature: Option<f32>,
936    #[serde(skip_serializing_if = "Option::is_none")]
937    num_predict: Option<u32>,
938}
939
940#[derive(Debug, Serialize)]
941struct OllamaToolCall {
942    #[serde(rename = "type")]
943    call_type: String,
944    function: OllamaToolFunctionCall,
945}
946
947#[derive(Debug, Serialize)]
948struct OllamaToolFunctionCall {
949    name: String,
950    #[serde(skip_serializing_if = "Option::is_none")]
951    arguments: Option<Value>,
952    #[serde(skip_serializing_if = "Option::is_none")]
953    index: Option<u32>,
954}
955
956#[derive(Debug, Deserialize)]
957struct OllamaChatResponse {
958    message: Option<OllamaResponseMessage>,
959    #[serde(default)]
960    done: bool,
961    #[serde(default)]
962    done_reason: Option<String>,
963    #[serde(default)]
964    prompt_eval_count: Option<u32>,
965    #[serde(default)]
966    eval_count: Option<u32>,
967    #[serde(default)]
968    error: Option<String>,
969}
970
971#[derive(Debug, Deserialize)]
972struct OllamaResponseMessage {
973    #[serde(default)]
974    #[expect(dead_code)]
975    role: Option<String>,
976    #[serde(default)]
977    content: Option<String>,
978    #[serde(default)]
979    thinking: Option<String>,
980    #[serde(default)]
981    reasoning_details: Option<Vec<Value>>,
982    #[serde(default)]
983    tool_calls: Option<Vec<OllamaResponseToolCall>>,
984}
985
986#[derive(Debug, Deserialize, Serialize, Clone)]
987struct OllamaResponseToolCall {
988    #[serde(default)]
989    #[serde(rename = "type")]
990    call_type: Option<String>,
991    #[serde(default)]
992    function: Option<OllamaResponseFunctionCall>,
993}
994
995#[derive(Debug, Deserialize, Serialize, Clone)]
996struct OllamaResponseFunctionCall {
997    #[serde(default)]
998    name: Option<String>,
999    #[serde(default)]
1000    arguments: Option<Value>,
1001    #[serde(default)]
1002    index: Option<u32>,
1003}
1004
1005#[derive(Debug, Deserialize)]
1006struct OllamaErrorResponse {
1007    error: Option<String>,
1008}
1009
1010fn parse_stream_chunk(line: &str) -> Result<OllamaChatResponse, LLMError> {
1011    serde_json::from_str::<OllamaChatResponse>(line).map_err(|err| LLMError::Provider {
1012        message: format!("Failed to parse Ollama stream chunk: {err}"),
1013        metadata: None,
1014    })
1015}
1016
1017#[async_trait]
1018impl LLMProvider for OllamaProvider {
1019    fn name(&self) -> &str {
1020        "ollama"
1021    }
1022
1023    fn supports_streaming(&self) -> bool {
1024        true
1025    }
1026
1027    fn supports_tools(&self, _model: &str) -> bool {
1028        true
1029    }
1030
1031    fn supports_reasoning(&self, model: &str) -> bool {
1032        // Codex-inspired robustness: Setting model_supports_reasoning to false
1033        // does NOT disable it for known reasoning models.
1034        models::ollama::REASONING_MODELS.contains(&model)
1035            || self
1036                .model_behavior
1037                .as_ref()
1038                .and_then(|b| b.model_supports_reasoning)
1039                .unwrap_or(false)
1040    }
1041
1042    fn supports_reasoning_effort(&self, model: &str) -> bool {
1043        // Same robustness logic for reasoning effort
1044        models::ollama::REASONING_LEVEL_MODELS.contains(&model)
1045            || self
1046                .model_behavior
1047                .as_ref()
1048                .and_then(|b| b.model_supports_reasoning_effort)
1049                .unwrap_or(false)
1050    }
1051
1052    async fn generate(&self, mut request: LLMRequest) -> Result<LLMResponse, LLMError> {
1053        self.validate_request(&request)?;
1054        if request.model.is_empty() {
1055            request.model = self.model.clone();
1056        }
1057        let model = request.model.clone();
1058        let payload = self.build_payload(&request, false)?;
1059        let url = self.chat_url();
1060        Self::request_non_stream_response(
1061            &self.http_client,
1062            &url,
1063            self.api_key.as_deref(),
1064            &payload,
1065            model,
1066        )
1067        .await
1068    }
1069
1070    async fn stream(&self, mut request: LLMRequest) -> Result<LLMStream, LLMError> {
1071        self.validate_request(&request)?;
1072        if request.model.is_empty() {
1073            request.model = self.model.clone();
1074        }
1075        let model = request.model.clone();
1076        let payload = self.build_payload(&request, true)?;
1077        let fallback_payload = self.build_payload(&request, false)?;
1078        let url = self.chat_url();
1079
1080        let response = self
1081            .authorized_post(url.clone())
1082            .header(reqwest::header::ACCEPT_ENCODING, "identity")
1083            .json(&payload)
1084            .send()
1085            .await
1086            .map_err(|e| format_network_error("Ollama", &e))?;
1087
1088        if !response.status().is_success() {
1089            let status = response.status();
1090            let body = response.text().await.unwrap_or_default();
1091            let error_message = Self::extract_error(&body)
1092                .unwrap_or_else(|| format!("Ollama streaming request failed ({status}): {body}"));
1093            return Err(LLMError::Provider {
1094                message: error_message,
1095                metadata: None,
1096            });
1097        }
1098
1099        let byte_stream = response.bytes_stream();
1100        let mut buffer: Vec<u8> = Vec::new();
1101        let mut aggregator = crate::llm::providers::shared::StreamAggregator::new(model.clone());
1102        let fallback_http_client = self.http_client.clone();
1103        let fallback_api_key = self.api_key.clone();
1104        let fallback_model = model.clone();
1105        let fallback_url = url.clone();
1106        let any_interleaved = request
1107            .messages
1108            .iter()
1109            .any(|msg| assistant_interleaved_history_text(msg, &request.model).is_some());
1110        let stream = try_stream! {
1111            let mut prompt_tokens: Option<u32> = None;
1112            let mut completion_tokens: Option<u32> = None;
1113            let mut finish_reason: Option<String> = None;
1114            let mut completed = false;
1115            let mut saw_stream_chunk = false;
1116
1117            futures::pin_mut!(byte_stream);
1118            while let Some(chunk_result) = byte_stream.next().await {
1119                let chunk = match chunk_result {
1120                    Ok(chunk) => {
1121                        saw_stream_chunk = true;
1122                        chunk
1123                    }
1124                    Err(err) if !saw_stream_chunk => {
1125                        tracing::warn!(
1126                            model = %fallback_model,
1127                            url = %fallback_url,
1128                            error = %err,
1129                            "Ollama stream failed before first chunk; retrying once as non-stream response"
1130                        );
1131                        let fallback_response = Self::request_non_stream_response(
1132                            &fallback_http_client,
1133                            &fallback_url,
1134                            fallback_api_key.as_deref(),
1135                            &fallback_payload,
1136                            fallback_model.clone(),
1137                        ).await?;
1138                        yield LLMStreamEvent::Completed { response: Box::new(fallback_response) };
1139                        return;
1140                    }
1141                    Err(err) => Err(format_network_error("Ollama", &err))?,
1142                };
1143                buffer.extend_from_slice(&chunk);
1144
1145                while let Some(pos) = buffer.iter().position(|b| *b == b'\n') {
1146                    let line_bytes: Vec<u8> = buffer.drain(..=pos).collect();
1147                    let line = std::str::from_utf8(&line_bytes)
1148                        .map_err(|err| LLMError::Provider {
1149                            message: format!("Invalid UTF-8 in Ollama stream: {err}"),
1150                            metadata: None,
1151                        })?;
1152                    let line = line.trim();
1153
1154                    if line.is_empty() {
1155                        continue;
1156                    }
1157
1158                    let parsed = parse_stream_chunk(line)?;
1159
1160                    if let Some(error) = parsed.error {
1161                        Err(LLMError::Provider {
1162                            message: error,
1163                            metadata: None,
1164                        })?;
1165                    }
1166
1167                    if let Some(message) = parsed.message {
1168                        if let Some(reasoning_details) = message.reasoning_details.as_deref() {
1169                            aggregator.set_reasoning_details(reasoning_details);
1170                        }
1171
1172                        let has_explicit_thinking = message
1173                            .thinking
1174                            .as_ref()
1175                            .map(|v| !v.is_empty())
1176                            .unwrap_or(false);
1177
1178                        if let Some(thinking) = message.thinking
1179                            && let Some(delta) = aggregator.handle_reasoning(&thinking) {
1180                                yield LLMStreamEvent::Reasoning { delta };
1181                            }
1182
1183                        if let Some(content) = message.content {
1184                            for event in aggregator.handle_content(&content) {
1185                                match &event {
1186                                    LLMStreamEvent::Reasoning { .. }
1187                                        if has_explicit_thinking || any_interleaved =>
1188                                    {
1189                                    }
1190                                    _ => yield event,
1191                                }
1192                            }
1193                        }
1194
1195                        if let Some(tool_calls) = message.tool_calls {
1196                            let tool_calls_json: Vec<Value> = tool_calls
1197                                .into_iter()
1198                                .map(|tc| serde_json::to_value(tc).unwrap_or(Value::Null))
1199                                .filter(|v| !v.is_null())
1200                                .collect();
1201                            aggregator.handle_tool_calls(&tool_calls_json);
1202                        }
1203                    }
1204
1205                    if parsed.done {
1206                        prompt_tokens = parsed.prompt_eval_count;
1207                        completion_tokens = parsed.eval_count;
1208                        finish_reason = parsed.done_reason;
1209                        completed = true;
1210                    }
1211                }
1212
1213                if completed {
1214                    break;
1215                }
1216            }
1217
1218            if !completed {
1219                Err(LLMError::Provider {
1220                    message: "Ollama stream ended without completion signal".to_string(),
1221                    metadata: None,
1222                })?;
1223            }
1224
1225            let mut response = aggregator.finalize();
1226            if let Some(pt) = prompt_tokens {
1227                let mut usage = response.usage.unwrap_or_default();
1228                usage.prompt_tokens = pt;
1229                if let Some(ct) = completion_tokens {
1230                    usage.completion_tokens = ct;
1231                    usage.total_tokens = pt + ct;
1232                }
1233                response.usage = Some(usage);
1234            }
1235            if let Some(fr) = finish_reason {
1236                response.finish_reason = crate::llm::providers::common::map_finish_reason_common(&fr);
1237            }
1238            if response.reasoning.is_none()
1239                && let Some(details) = response.reasoning_details.as_ref()
1240            {
1241                response.reasoning = extract_reasoning_text_from_serialized_details(details);
1242            }
1243
1244            yield LLMStreamEvent::Completed { response: Box::new(response) };
1245        };
1246
1247        Ok(Box::pin(stream))
1248    }
1249
1250    fn supported_models(&self) -> Vec<String> {
1251        models::ollama::SUPPORTED_MODELS
1252            .iter()
1253            .map(|model| model.to_string())
1254            .collect()
1255    }
1256
1257    fn validate_request(&self, request: &LLMRequest) -> Result<(), LLMError> {
1258        if let Some(tool_choice) = &request.tool_choice {
1259            match tool_choice {
1260                ToolChoice::Auto | ToolChoice::None => {}
1261                _ => {
1262                    return Err(LLMError::InvalidRequest {
1263                        message: "Ollama does not support explicit tool_choice overrides"
1264                            .to_string(),
1265                        metadata: None,
1266                    });
1267                }
1268            }
1269        }
1270
1271        if request.parallel_tool_calls.is_some() || request.parallel_tool_config.is_some() {
1272            return Err(LLMError::InvalidRequest {
1273                message: "Ollama does not support parallel tool configuration".to_string(),
1274                metadata: None,
1275            });
1276        }
1277
1278        for message in &request.messages {
1279            if matches!(message.role, MessageRole::Tool) && message.tool_call_id.is_none() {
1280                return Err(LLMError::InvalidRequest {
1281                    message: "Ollama tool responses must include tool_call_id".to_string(),
1282                    metadata: None,
1283                });
1284            }
1285        }
1286
1287        Ok(())
1288    }
1289}
1290
1291#[async_trait]
1292impl LLMClient for OllamaProvider {
1293    async fn generate(&mut self, prompt: &str) -> Result<LLMResponse, LLMError> {
1294        let mut request = self.parse_client_prompt(prompt);
1295        if request.model.is_empty() {
1296            request.model = self.model.clone();
1297        }
1298        Ok(LLMProvider::generate(self, request).await?)
1299    }
1300
1301    fn model_id(&self) -> &str {
1302        &self.model
1303    }
1304}
1305
1306#[cfg(test)]
1307mod tests {
1308    use super::*;
1309    use crate::config::types::ReasoningEffortLevel;
1310    use crate::llm::provider::{ContentPart, Message, MessageContent};
1311    use serde_json::json;
1312
1313    fn test_provider() -> OllamaProvider {
1314        OllamaProvider::from_config(
1315            None,
1316            Some("test-model".to_string()),
1317            Some("http://localhost".to_string()),
1318            None,
1319            None,
1320            None,
1321            None,
1322        )
1323    }
1324
1325    #[test]
1326    fn build_payload_includes_images() {
1327        let provider = test_provider();
1328        let parts = vec![
1329            ContentPart::text("see ".to_string()),
1330            ContentPart::image("BASE64DATA".to_string(), "image/png".to_string()),
1331        ];
1332        let request = LLMRequest {
1333            model: "test-model".to_string(),
1334            messages: vec![Message::user_with_parts(parts)],
1335            ..Default::default()
1336        };
1337
1338        let payload = provider.build_payload(&request, false).unwrap();
1339        assert_eq!(payload.messages.len(), 1);
1340        let message = &payload.messages[0];
1341        assert_eq!(message.content.as_deref(), Some("see "));
1342        assert_eq!(
1343            message.images.as_ref(),
1344            Some(&vec!["BASE64DATA".to_string()])
1345        );
1346    }
1347
1348    #[test]
1349    fn build_payload_omits_images_when_none_present() {
1350        let provider = test_provider();
1351        let content = MessageContent::text("no images".to_string());
1352        let request = LLMRequest {
1353            model: "test-model".to_string(),
1354            messages: vec![Message::user(content.as_text().into_owned())],
1355            ..Default::default()
1356        };
1357
1358        let payload = provider.build_payload(&request, false).unwrap();
1359        assert_eq!(payload.messages.len(), 1);
1360        let message = &payload.messages[0];
1361        assert_eq!(message.content.as_deref(), Some("no images"));
1362        assert!(message.images.is_none());
1363    }
1364
1365    #[test]
1366    fn build_payload_minimax_tool_followup_omits_tool_call_id() {
1367        let provider = test_provider();
1368        let tool_call_id = "direct_run_pty_cmd_1".to_string();
1369        let request = LLMRequest {
1370            model: models::ollama::MINIMAX_M25_CLOUD.to_string(),
1371            messages: vec![
1372                Message::assistant_with_tools(
1373                    String::new(),
1374                    vec![ToolCall::function(
1375                        tool_call_id.clone(),
1376                        "run_pty_cmd".to_string(),
1377                        "{\"command\":\"cargo fmt\"}".to_string(),
1378                    )],
1379                ),
1380                Message::tool_response(
1381                    tool_call_id,
1382                    "{\"output\":\"\",\"exit_code\":0}".to_string(),
1383                ),
1384            ],
1385            reasoning_effort: Some(ReasoningEffortLevel::Low),
1386            ..Default::default()
1387        };
1388
1389        let payload = provider.build_payload(&request, false).unwrap();
1390        assert_eq!(payload.messages.len(), 2);
1391        assert_eq!(payload.messages[1].role, "tool");
1392        assert_eq!(
1393            payload.messages[1].tool_name.as_deref(),
1394            Some("run_pty_cmd")
1395        );
1396        assert!(payload.messages[1].tool_call_id.is_none());
1397        assert!(payload.think.is_none());
1398    }
1399
1400    #[test]
1401    fn build_payload_non_minimax_tool_followup_keeps_tool_call_id() {
1402        let provider = test_provider();
1403        let tool_call_id = "direct_run_pty_cmd_1".to_string();
1404        let request = LLMRequest {
1405            model: models::ollama::GPT_OSS_20B_CLOUD.to_string(),
1406            messages: vec![
1407                Message::assistant_with_tools(
1408                    String::new(),
1409                    vec![ToolCall::function(
1410                        tool_call_id.clone(),
1411                        "run_pty_cmd".to_string(),
1412                        "{\"command\":\"cargo fmt\"}".to_string(),
1413                    )],
1414                ),
1415                Message::tool_response(
1416                    tool_call_id.clone(),
1417                    "{\"output\":\"\",\"exit_code\":0}".to_string(),
1418                ),
1419            ],
1420            reasoning_effort: Some(ReasoningEffortLevel::Low),
1421            ..Default::default()
1422        };
1423
1424        let payload = provider.build_payload(&request, false).unwrap();
1425        assert_eq!(payload.messages.len(), 2);
1426        assert_eq!(payload.messages[1].role, "tool");
1427        assert_eq!(
1428            payload.messages[1].tool_name.as_deref(),
1429            Some("run_pty_cmd")
1430        );
1431        assert_eq!(
1432            payload.messages[1].tool_call_id.as_deref(),
1433            Some(tool_call_id.as_str())
1434        );
1435        assert_eq!(payload.think, Some(Value::String("low".to_string())));
1436    }
1437
1438    #[test]
1439    fn build_payload_hoists_history_system_directives_into_system_prompt() {
1440        let provider = test_provider();
1441        let request = LLMRequest {
1442            model: models::ollama::MINIMAX_M25_CLOUD.to_string(),
1443            system_prompt: Some(std::sync::Arc::new(
1444                "stable system instructions".to_string(),
1445            )),
1446            messages: vec![
1447                Message::user("explore architecture".to_string()),
1448                Message::system(
1449                    "Previous turn already completed tool execution. Reuse the latest tool outputs in history instead of rerunning the same exploration.".to_string(),
1450                ),
1451            ],
1452            ..Default::default()
1453        };
1454
1455        let payload = provider.build_payload(&request, false).unwrap();
1456        assert_eq!(payload.messages.len(), 2);
1457        assert_eq!(payload.messages[0].role, "system");
1458        assert!(
1459            payload.messages[0]
1460                .content
1461                .as_deref()
1462                .unwrap_or("")
1463                .contains("stable system instructions")
1464        );
1465        assert!(
1466            payload.messages[0]
1467                .content
1468                .as_deref()
1469                .unwrap_or("")
1470                .contains("[History Directives]")
1471        );
1472        assert!(
1473            payload.messages[0]
1474                .content
1475                .as_deref()
1476                .unwrap_or("")
1477                .contains("Previous turn already completed tool execution")
1478        );
1479        assert_eq!(payload.messages[1].role, "user");
1480        assert_eq!(
1481            payload.messages[1].content.as_deref(),
1482            Some("explore architecture")
1483        );
1484    }
1485
1486    #[test]
1487    fn build_payload_promotes_history_system_directive_without_base_system_prompt() {
1488        let provider = test_provider();
1489        let request = LLMRequest {
1490            model: models::ollama::MINIMAX_M25_CLOUD.to_string(),
1491            messages: vec![
1492                Message::system(
1493                    "Repeated read-only exploration hit the per-turn family cap. Scheduling a final recovery pass without more tools.".to_string(),
1494                ),
1495                Message::user("summarize the architecture".to_string()),
1496            ],
1497            ..Default::default()
1498        };
1499
1500        let payload = provider.build_payload(&request, false).unwrap();
1501        assert_eq!(payload.messages.len(), 2);
1502        assert_eq!(payload.messages[0].role, "system");
1503        assert!(
1504            payload.messages[0]
1505                .content
1506                .as_deref()
1507                .unwrap_or("")
1508                .contains("[History Directives]")
1509        );
1510        assert!(
1511            payload.messages[0]
1512                .content
1513                .as_deref()
1514                .unwrap_or("")
1515                .contains("Repeated read-only exploration hit the per-turn family cap")
1516        );
1517        assert_eq!(payload.messages[1].role, "user");
1518    }
1519
1520    #[test]
1521    fn build_payload_recovers_balanced_prefix_from_malformed_history_tool_arguments() {
1522        let provider = test_provider();
1523        let request = LLMRequest {
1524            model: "test-model".to_string(),
1525            messages: vec![Message::assistant_with_tools(
1526                String::new(),
1527                vec![ToolCall::function(
1528                    "tool_call_0".to_string(),
1529                    "unified_file".to_string(),
1530                    "{\"action\":\"read\",\"path\":\"docs/ARCHITECTURE.md\",\"offset\":1,\"limit\":100}{\"action\":\"read\",\"path\":\"README.md\"}"
1531                        .to_string(),
1532                )],
1533            )],
1534            ..Default::default()
1535        };
1536
1537        let payload = provider
1538            .build_payload(&request, false)
1539            .expect("payload should recover malformed history tool arguments");
1540
1541        let tool_calls = payload.messages[0]
1542            .tool_calls
1543            .as_ref()
1544            .expect("tool calls should be present");
1545        assert_eq!(tool_calls.len(), 1);
1546        assert_eq!(
1547            tool_calls[0].function.arguments,
1548            Some(json!({
1549                "action": "read",
1550                "path": "docs/ARCHITECTURE.md",
1551                "offset": 1,
1552                "limit": 100
1553            }))
1554        );
1555    }
1556
1557    #[test]
1558    fn build_payload_rehydrates_glm_interleaved_history_into_content() {
1559        let provider = test_provider();
1560        let request = LLMRequest {
1561            model: models::ollama::GLM_5_CLOUD.to_string(),
1562            messages: vec![
1563                Message::assistant("done".to_string()).with_reasoning(Some("trace".to_string())),
1564            ],
1565            ..Default::default()
1566        };
1567
1568        let payload = provider.build_payload(&request, false).unwrap();
1569
1570        assert_eq!(
1571            payload.messages[0].content.as_deref(),
1572            Some("<think>trace</think>done")
1573        );
1574        assert!(payload.messages[0].thinking.is_none());
1575    }
1576
1577    #[test]
1578    fn build_payload_replays_assistant_reasoning_as_ollama_thinking() {
1579        let provider = test_provider();
1580        let request = LLMRequest {
1581            model: models::ollama::GPT_OSS_20B.to_string(),
1582            messages: vec![
1583                Message::assistant("need a tool".to_string())
1584                    .with_reasoning(Some("reasoning trace".to_string())),
1585            ],
1586            ..Default::default()
1587        };
1588
1589        let payload = provider.build_payload(&request, false).unwrap();
1590
1591        assert_eq!(payload.messages[0].content.as_deref(), Some("need a tool"));
1592        assert_eq!(
1593            payload.messages[0].thinking.as_deref(),
1594            Some("reasoning trace")
1595        );
1596    }
1597
1598    #[test]
1599    fn build_payload_includes_apply_patch_as_normal_tool() {
1600        let provider = test_provider();
1601        let request = LLMRequest {
1602            model: "test-model".to_string(),
1603            messages: vec![Message::user("patch this file".to_string())],
1604            tools: Some(std::sync::Arc::new(vec![ToolDefinition::apply_patch(
1605                "Apply VT Code patches".to_string(),
1606            )])),
1607            ..Default::default()
1608        };
1609
1610        let payload = provider.build_payload(&request, false).unwrap();
1611        let tools = payload.tools.expect("tools should be present");
1612        assert_eq!(tools.len(), 1);
1613        assert_eq!(tools[0].function_name(), "apply_patch");
1614    }
1615
1616    #[test]
1617    fn response_payload_preserves_reasoning_details() {
1618        let parsed = OllamaChatResponse {
1619            message: Some(OllamaResponseMessage {
1620                role: Some("assistant".to_string()),
1621                content: Some("answer".to_string()),
1622                thinking: None,
1623                reasoning_details: Some(vec![json!({
1624                    "type": "reasoning.text",
1625                    "text": "step one"
1626                })]),
1627                tool_calls: None,
1628            }),
1629            done: true,
1630            done_reason: Some("stop".to_string()),
1631            prompt_eval_count: Some(1),
1632            eval_count: Some(2),
1633            error: None,
1634        };
1635
1636        let response = OllamaProvider::response_from_chat_payload("test-model".to_string(), parsed)
1637            .expect("response should parse");
1638        assert_eq!(response.reasoning.as_deref(), Some("step one"));
1639        assert!(response.reasoning_details.is_some());
1640
1641        let first_detail = response
1642            .reasoning_details
1643            .as_ref()
1644            .and_then(|details| details.first())
1645            .expect("reasoning detail should exist");
1646        let parsed_detail: Value =
1647            serde_json::from_str(first_detail).expect("reasoning detail should be json");
1648        assert_eq!(parsed_detail["type"], "reasoning.text");
1649    }
1650
1651    #[test]
1652    fn tags_response_accepts_partial_model_summaries() {
1653        let parsed: OllamaTagsResponse = serde_json::from_value(json!({
1654            "models": [
1655                { "model": "qwen3:8b" }
1656            ]
1657        }))
1658        .expect("partial model summaries should parse");
1659
1660        let names: Vec<String> = parsed
1661            .models
1662            .into_iter()
1663            .filter_map(|model| model.name.or(model.model))
1664            .collect();
1665        assert_eq!(names, vec!["qwen3:8b".to_string()]);
1666    }
1667
1668    #[test]
1669    fn wire_api_responses_for_dev_build() {
1670        assert_eq!(
1671            wire_api_for_version(&Version::new(0, 0, 0)),
1672            OllamaWireApi::Responses,
1673        );
1674    }
1675
1676    #[test]
1677    fn wire_api_responses_for_exact_threshold() {
1678        assert_eq!(
1679            wire_api_for_version(&Version::new(0, 13, 3)),
1680            OllamaWireApi::Responses,
1681        );
1682    }
1683
1684    #[test]
1685    fn wire_api_responses_for_above_threshold() {
1686        assert_eq!(
1687            wire_api_for_version(&Version::new(0, 14, 0)),
1688            OllamaWireApi::Responses,
1689        );
1690        assert_eq!(
1691            wire_api_for_version(&Version::new(1, 0, 0)),
1692            OllamaWireApi::Responses,
1693        );
1694    }
1695
1696    #[test]
1697    fn wire_api_chat_for_below_threshold() {
1698        assert_eq!(
1699            wire_api_for_version(&Version::new(0, 13, 2)),
1700            OllamaWireApi::Chat,
1701        );
1702        assert_eq!(
1703            wire_api_for_version(&Version::new(0, 12, 0)),
1704            OllamaWireApi::Chat,
1705        );
1706        assert_eq!(
1707            wire_api_for_version(&Version::new(0, 1, 0)),
1708            OllamaWireApi::Chat,
1709        );
1710    }
1711}