1use crate::config::TimeoutsConfig;
2use crate::config::constants::{env_vars, models, urls};
3use crate::config::core::{AnthropicConfig, ModelConfig, PromptCachingConfig};
4use crate::llm::client::LLMClient;
5use crate::llm::provider::{
6 ContentPart, FinishReason, LLMError, LLMProvider, LLMRequest, LLMResponse, LLMStream,
7 LLMStreamEvent, Message, MessageContent, MessageRole, ToolCall, ToolChoice, ToolDefinition,
8 Usage,
9};
10use crate::utils::http_client;
11use anyhow::Result;
12use async_stream::try_stream;
13use async_trait::async_trait;
14use futures::StreamExt;
15use hashbrown::HashMap;
16use reqwest::Client as HttpClient;
17use serde::{Deserialize, Serialize};
18use serde_json::{Map, Value};
19
20pub mod client;
21pub mod parser;
22pub mod pull;
23pub mod url;
24
25pub use client::OllamaClient;
26pub use parser::pull_events_from_value;
27pub use pull::{
28 CliPullProgressReporter, OllamaPullEvent, OllamaPullProgressReporter, TuiPullProgressReporter,
29};
30pub use url::{base_url_to_host_root, is_openai_compatible_base_url};
31
32use semver::Version;
33
34use super::common::{
35 assistant_interleaved_history_text, collect_history_system_directives,
36 extract_reasoning_text_from_detail_values, extract_reasoning_text_from_serialized_details,
37 is_minimax_m2_model, merge_system_prompt_with_history_directives, override_base_url,
38 parse_client_prompt_common, resolve_model, serialize_reasoning_detail_values,
39};
40use super::error_handling::{format_network_error, format_parse_error};
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub enum OllamaWireApi {
50 Responses,
52 Chat,
54}
55
56pub struct WireApiDetection {
58 pub wire_api: OllamaWireApi,
59 pub version: Option<Version>,
60}
61
62fn min_responses_version() -> Version {
65 Version::new(0, 13, 3)
66}
67
68fn wire_api_for_version(version: &Version) -> OllamaWireApi {
70 if *version == Version::new(0, 0, 0) || *version >= min_responses_version() {
72 OllamaWireApi::Responses
73 } else {
74 OllamaWireApi::Chat
75 }
76}
77
78pub async fn detect_wire_api(
84 base_url: Option<String>,
85) -> std::io::Result<Option<WireApiDetection>> {
86 let resolved_base_url = override_base_url(
87 urls::OLLAMA_API_BASE,
88 base_url,
89 Some(env_vars::OLLAMA_BASE_URL),
90 );
91
92 let client = match OllamaClient::try_from_base_url(&resolved_base_url).await {
93 Ok(c) => c,
94 Err(e) => {
95 tracing::debug!("Failed to connect to Ollama server for version detection: {e}");
96 return Ok(None);
97 }
98 };
99
100 let Some(version) = client.fetch_version().await? else {
101 return Ok(None);
102 };
103
104 let wire_api = wire_api_for_version(&version);
105
106 Ok(Some(WireApiDetection {
107 wire_api,
108 version: Some(version),
109 }))
110}
111
112pub async fn ensure_oss_ready(
119 model: Option<&str>,
120 base_url: Option<String>,
121) -> std::io::Result<()> {
122 let target_model = model.unwrap_or(models::ollama::DEFAULT_MODEL);
123
124 let resolved_base_url = override_base_url(
125 urls::OLLAMA_API_BASE,
126 base_url,
127 Some(env_vars::OLLAMA_BASE_URL),
128 );
129
130 let ollama_client = OllamaClient::try_from_base_url(&resolved_base_url).await?;
132
133 match ollama_client.fetch_models().await {
135 Ok(existing_models) => {
136 if !existing_models.iter().any(|m| m == target_model) {
137 tracing::info!("Model '{target_model}' not found locally, pulling...");
138 let mut reporter = CliPullProgressReporter::new();
139 ollama_client
140 .pull_with_reporter(target_model, &mut reporter)
141 .await?;
142 }
143 }
144 Err(e) => {
145 tracing::warn!("Failed to list Ollama models: {e}");
146 }
148 }
149
150 Ok(())
151}
152
153#[derive(Debug, Deserialize, Serialize)]
154struct OllamaTagsResponse {
155 models: Vec<OllamaTag>,
156}
157
158#[derive(Debug, Deserialize, Serialize)]
159struct OllamaTag {
160 name: Option<String>,
161 model: Option<String>,
162 modified_at: Option<String>,
163 size: Option<u64>,
164 digest: Option<String>,
165 details: Option<OllamaModelDetails>,
166}
167
168#[derive(Debug, Deserialize, Serialize)]
169struct OllamaModelDetails {
170 format: Option<String>,
171 family: Option<String>,
172 families: Option<Vec<String>>,
173 parameter_size: Option<String>,
174 quantization_level: Option<String>,
175}
176
177pub(super) fn ollama_model_name_from_fields<'a>(
178 name: Option<&'a str>,
179 model: Option<&'a str>,
180) -> Option<&'a str> {
181 name.or(model)
182 .map(str::trim)
183 .filter(|value| !value.is_empty())
184}
185
186pub(super) const OLLAMA_CONNECTION_ERROR: &str = "No running Ollama server detected. Start it with: `ollama serve` (after installing)\n\
187 Install instructions: https://github.com/ollama/ollama?tab=readme-ov-file";
188
189pub async fn fetch_ollama_models(base_url: Option<String>) -> Result<Vec<String>, anyhow::Error> {
191 use crate::config::constants::{env_vars, urls};
192
193 let resolved_base_url = override_base_url(
194 urls::OLLAMA_API_BASE,
195 base_url,
196 Some(env_vars::OLLAMA_BASE_URL),
197 );
198
199 let tags_url = format!("{}/api/tags", resolved_base_url);
201
202 let client = http_client::create_client_with_timeout(std::time::Duration::from_secs(5));
204
205 let response = client
207 .get(&tags_url)
208 .header("Content-Type", "application/json")
209 .send()
210 .await
211 .map_err(|e| {
212 tracing::warn!("Failed to connect to Ollama server: {e:?}");
213 anyhow::anyhow!(OLLAMA_CONNECTION_ERROR)
214 })?;
215
216 if !response.status().is_success() {
217 return Err(anyhow::anyhow!(
218 "Failed to fetch Ollama models: HTTP {}. {}",
219 response.status(),
220 if response.status() == reqwest::StatusCode::NOT_FOUND {
221 "Ensure Ollama server is running."
222 } else {
223 ""
224 }
225 ));
226 }
227
228 let tags_response: OllamaTagsResponse = response
230 .json()
231 .await
232 .map_err(|e| anyhow::anyhow!("Failed to parse Ollama models response: {}", e))?;
233
234 let model_names: Vec<String> = tags_response
236 .models
237 .into_iter()
238 .filter_map(|model| {
239 ollama_model_name_from_fields(model.name.as_deref(), model.model.as_deref())
240 .map(str::to_string)
241 })
242 .collect();
243
244 Ok(model_names)
245}
246
247pub struct OllamaProvider {
248 http_client: HttpClient,
249 base_url: String,
250 model: String,
251 api_key: Option<String>,
252 model_behavior: Option<ModelConfig>,
253}
254
255impl OllamaProvider {
256 fn merged_system_prompt(request: &LLMRequest) -> Option<String> {
257 const HISTORY_DIRECTIVES_SECTION_HEADER: &str = "[History Directives]";
258 let directives = collect_history_system_directives(request);
259 merge_system_prompt_with_history_directives(
260 request.system_prompt.as_ref().map(|prompt| prompt.as_str()),
261 &directives,
262 HISTORY_DIRECTIVES_SECTION_HEADER,
263 )
264 }
265
266 pub fn new(api_key: String) -> Self {
267 Self::with_model(api_key, models::ollama::DEFAULT_MODEL.to_string())
268 }
269
270 pub fn with_model(api_key: String, model: String) -> Self {
271 Self::with_model_internal(model, None, Some(api_key), None)
272 }
273
274 pub fn new_with_client(
275 api_key: String,
276 model: String,
277 http_client: reqwest::Client,
278 base_url: String,
279 _timeouts: TimeoutsConfig,
280 ) -> Self {
281 Self {
282 http_client,
283 base_url,
284 model,
285 api_key: Some(api_key),
286 model_behavior: None,
287 }
288 }
289
290 pub fn from_config(
291 api_key: Option<String>,
292 model: Option<String>,
293 base_url: Option<String>,
294 _prompt_cache: Option<PromptCachingConfig>,
295 _timeouts: Option<TimeoutsConfig>,
296 _anthropic: Option<AnthropicConfig>,
297 model_behavior: Option<ModelConfig>,
298 ) -> Self {
299 let resolved_model = resolve_model(model, models::ollama::DEFAULT_MODEL);
300 Self::with_model_internal(resolved_model, base_url, api_key, model_behavior)
301 }
302
303 fn normalize_api_key(api_key: Option<String>) -> Option<String> {
304 api_key.and_then(|value| {
305 let trimmed = value.trim();
306 if trimmed.is_empty() {
307 None
308 } else {
309 Some(trimmed.to_string())
310 }
311 })
312 }
313
314 fn is_local_base_url(base_url: &str) -> bool {
315 let lowered = base_url.trim().to_ascii_lowercase();
316 const LOCAL_PREFIXES: &[&str] = &[
317 "http://localhost",
318 "https://localhost",
319 "http://127.",
320 "https://127.",
321 "http://0.0.0.0",
322 "https://0.0.0.0",
323 "http://[::1]",
324 "https://[::1]",
325 ];
326
327 LOCAL_PREFIXES
328 .iter()
329 .any(|prefix| lowered.starts_with(prefix))
330 }
331
332 fn with_model_internal(
333 model: String,
334 base_url: Option<String>,
335 api_key: Option<String>,
336 model_behavior: Option<ModelConfig>,
337 ) -> Self {
338 let normalized_api_key = Self::normalize_api_key(api_key);
339 let is_cloud_model = model.contains(":cloud") || model.contains("-cloud");
340
341 let default_base = if is_cloud_model {
342 urls::OLLAMA_CLOUD_API_BASE
343 } else {
344 urls::OLLAMA_API_BASE
345 };
346
347 let resolved_base =
348 override_base_url(default_base, base_url, Some(env_vars::OLLAMA_BASE_URL));
349 let target_is_local = Self::is_local_base_url(&resolved_base);
350
351 let effective_api_key = if target_is_local {
353 None
354 } else {
355 normalized_api_key
356 };
357
358 Self {
359 http_client: http_client::create_default_client(),
360 base_url: resolved_base,
361 model,
362 api_key: effective_api_key,
363 model_behavior,
364 }
365 }
366
367 fn chat_url(&self) -> String {
368 format!("{}/api/chat", self.base_url.trim_end_matches('/'))
369 }
370
371 fn authorized_post(&self, url: String) -> reqwest::RequestBuilder {
372 let builder = self.http_client.post(url);
373 if let Some(api_key) = &self.api_key {
374 builder.bearer_auth(api_key)
375 } else {
376 builder
377 }
378 }
379
380 fn parse_client_prompt(&self, prompt: &str) -> LLMRequest {
381 parse_client_prompt_common(prompt, &self.model, |value| self.parse_chat_request(value))
382 }
383
384 fn parse_chat_request(&self, value: &Value) -> Option<LLMRequest> {
385 let messages_value = value.get("messages")?.as_array()?;
386 let mut system_prompt = value
387 .get("system")
388 .and_then(|entry| entry.as_str())
389 .filter(|text| !text.trim().is_empty())
390 .map(|text| text.to_string());
391 let mut messages = Vec::new();
392
393 for entry in messages_value {
394 let role = entry
395 .get("role")
396 .and_then(|r| r.as_str())
397 .unwrap_or(crate::config::constants::message_roles::USER);
398 let content = entry
399 .get("content")
400 .map(|c| match c {
401 Value::String(text) => text.to_string(),
402 other => other.to_string(),
403 })
404 .unwrap_or_default();
405
406 if content.trim().is_empty() {
407 continue;
408 }
409
410 match role {
411 "system" => {
412 if system_prompt.is_none() {
413 system_prompt = Some(content);
414 }
415 }
416 "assistant" => messages.push(Message::assistant(content)),
417 "user" => messages.push(Message::user(content)),
418 _ => {}
419 }
420 }
421
422 if messages.is_empty() {
423 return None;
424 }
425
426 let tools = value
427 .get("tools")
428 .and_then(|entry| serde_json::from_value::<Vec<ToolDefinition>>(entry.clone()).ok());
429
430 Some(LLMRequest {
431 messages,
432 system_prompt: system_prompt.map(std::sync::Arc::new),
433 tools: tools.map(std::sync::Arc::new),
434 model: value
435 .get("model")
436 .and_then(|m| m.as_str())
437 .filter(|m| !m.trim().is_empty())
438 .map(|m| m.to_string())
439 .unwrap_or_else(|| self.model.clone()),
440 max_tokens: value
441 .get("max_tokens")
442 .and_then(|entry| entry.as_u64())
443 .map(|value| value as u32),
444 temperature: value
445 .get("temperature")
446 .and_then(|entry| entry.as_f64())
447 .map(|value| value as f32),
448 stream: value
449 .get("stream")
450 .and_then(|entry| entry.as_bool())
451 .unwrap_or(false),
452 ..Default::default()
453 })
454 }
455
456 fn build_payload(
457 &self,
458 request: &LLMRequest,
459 stream: bool,
460 ) -> Result<OllamaChatRequest, LLMError> {
461 let mut messages = Vec::new();
462 let mut tool_names: HashMap<String, String> = HashMap::new();
463 let minimax_tool_followup_compat = Self::minimax_tool_followup_compat_mode(request);
464
465 if let Some(system) = Self::merged_system_prompt(request) {
466 messages.push(OllamaChatMessage {
467 role: "system".to_string(),
468 content: Some(system),
469 thinking: None,
470 tool_calls: None,
471 tool_call_id: None,
472 tool_name: None,
473 images: None,
474 });
475 }
476
477 for message in &request.messages {
478 let interleaved_content = assistant_interleaved_history_text(message, &request.model);
479 let used_interleaved_content = interleaved_content.is_some();
480 let (content_text, images) = if let Some(interleaved_content) = interleaved_content {
481 (interleaved_content, None)
482 } else {
483 Self::extract_content_and_images(&message.content)
484 };
485 match message.role {
486 MessageRole::System => continue,
487 MessageRole::Tool => {
488 let tool_name = message
489 .tool_call_id
490 .as_ref()
491 .and_then(|id| tool_names.get(id).cloned());
492 let tool_name = tool_name.or_else(|| message.origin_tool.clone());
493 let tool_call_id = if minimax_tool_followup_compat && tool_name.is_some() {
494 None
495 } else {
496 message.tool_call_id.clone()
497 };
498 messages.push(OllamaChatMessage {
499 role: "tool".to_string(),
500 content: Some(content_text),
501 thinking: None,
502 tool_calls: None,
503 tool_call_id,
504 tool_name,
505 images: None,
506 });
507 }
508 _ => {
509 let thinking = if used_interleaved_content {
510 None
511 } else {
512 Self::assistant_thinking_history_text(message)
513 };
514 let mut payload_message = OllamaChatMessage {
515 role: message.role.as_generic_str().to_string(),
516 content: Some(content_text),
517 thinking,
518 tool_calls: None,
519 tool_call_id: None,
520 tool_name: None,
521 images,
522 };
523
524 if let Some(tool_calls) = message.get_tool_calls() {
525 let mut converted = Vec::new();
526 for (index, tool_call) in tool_calls.iter().enumerate() {
527 if let Some(ref func) = tool_call.function {
528 if !tool_call.id.is_empty() {
529 tool_names
530 .entry(tool_call.id.clone())
531 .or_insert_with(|| func.name.clone());
532 }
533
534 let arguments = tool_call.execution_arguments().map_err(|err| {
535 LLMError::InvalidRequest {
536 message: format!(
537 "Failed to parse tool arguments for Ollama: {err}"
538 ),
539 metadata: None,
540 }
541 })?;
542 converted.push(OllamaToolCall {
543 call_type: tool_call.call_type.clone(),
544 function: OllamaToolFunctionCall {
545 name: func.name.clone(),
546 arguments: Some(arguments),
547 index: Some(index as u32),
548 },
549 });
550 }
551 }
552
553 if !converted.is_empty() {
554 payload_message.tool_calls = Some(converted);
555 if payload_message.content.is_none() {
556 payload_message.content = Some(String::new());
557 }
558 }
559 }
560
561 messages.push(payload_message);
562 }
563 }
564 }
565
566 let options = if request.temperature.is_some() || request.max_tokens.is_some() {
567 Some(OllamaChatOptions {
568 temperature: request.temperature,
569 num_predict: request.max_tokens,
570 })
571 } else {
572 None
573 };
574
575 let tools = match request.tool_choice {
576 Some(ToolChoice::None) => None,
577 _ => request.tools.as_ref().map(|tools| {
578 tools
579 .iter()
580 .filter_map(|tool| {
581 tool.function.as_ref().map(|func| {
583 ToolDefinition::function(
584 func.name.clone(),
585 func.description.clone(),
586 func.parameters.clone(),
587 )
588 })
589 })
590 .collect()
591 }),
592 };
593
594 Ok(OllamaChatRequest {
595 model: request.model.clone(),
596 messages,
597 stream,
598 format: request.output_format.clone(),
599 options,
600 tools,
601 think: Self::think_value(request),
602 })
603 }
604
605 fn assistant_thinking_history_text(message: &Message) -> Option<String> {
606 if message.role != MessageRole::Assistant {
607 return None;
608 }
609
610 message
611 .reasoning
612 .as_deref()
613 .map(str::trim)
614 .filter(|value| !value.is_empty())
615 .map(str::to_owned)
616 .or_else(|| {
617 message
618 .reasoning_details
619 .as_deref()
620 .and_then(extract_reasoning_text_from_detail_values)
621 })
622 }
623
624 fn extract_content_and_images(content: &MessageContent) -> (String, Option<Vec<String>>) {
625 let mut images = Vec::new();
626 if let MessageContent::Parts(parts) = content {
627 for part in parts {
628 if let ContentPart::Image { data, .. } = part {
629 images.push(data.clone());
630 }
631 }
632 }
633
634 let text = content.as_text().into_owned();
635 let images = if images.is_empty() {
636 None
637 } else {
638 Some(images)
639 };
640 (text, images)
641 }
642
643 fn think_value(request: &LLMRequest) -> Option<Value> {
644 let model_id = request.model.as_str();
645 if Self::minimax_tool_followup_compat_mode(request) {
646 return None;
647 }
648 if !models::ollama::REASONING_MODELS.contains(&model_id) {
649 return None;
650 }
651
652 if models::ollama::REASONING_LEVEL_MODELS.contains(&model_id) {
653 request
654 .reasoning_effort
655 .map(|effort| Value::String(effort.to_string()))
656 } else {
657 Some(Value::Bool(true))
658 }
659 }
660
661 fn minimax_tool_followup_compat_mode(request: &LLMRequest) -> bool {
662 is_minimax_m2_model(&request.model)
663 && request
664 .messages
665 .iter()
666 .any(|message| message.role == MessageRole::Tool || message.has_tool_calls())
667 }
668
669 fn convert_tool_calls(
670 tool_calls: Option<Vec<OllamaResponseToolCall>>,
671 ) -> Result<Option<Vec<ToolCall>>, LLMError> {
672 let Some(tool_calls) = tool_calls else {
673 return Ok(None);
674 };
675
676 if tool_calls.is_empty() {
677 return Ok(None);
678 }
679
680 let mut converted = Vec::new();
681 for (index, call) in tool_calls.into_iter().enumerate() {
682 let function = call.function.ok_or_else(|| LLMError::Provider {
683 message: "Ollama response missing function details for tool call".to_string(),
684 metadata: None,
685 })?;
686
687 let name = function.name.ok_or_else(|| LLMError::Provider {
688 message: "Ollama response missing tool function name".to_string(),
689 metadata: None,
690 })?;
691
692 let arguments_value = function
693 .arguments
694 .unwrap_or_else(|| Value::Object(Map::new()));
695 let arguments = match arguments_value {
696 Value::String(raw) => raw,
697 other => serde_json::to_string(&other).map_err(|err| LLMError::Provider {
698 message: format!("Failed to serialize Ollama tool arguments: {err}"),
699 metadata: None,
700 })?,
701 };
702
703 let id = function
704 .index
705 .map(|value| format!("tool_call_{value}"))
706 .unwrap_or_else(|| format!("tool_call_{index}"));
707
708 converted.push(ToolCall::function(id, name, arguments));
709 }
710
711 Ok(Some(converted))
712 }
713
714 fn usage_from_counts(
715 prompt_tokens: Option<u32>,
716 completion_tokens: Option<u32>,
717 ) -> Option<Usage> {
718 if prompt_tokens.is_none() && completion_tokens.is_none() {
719 return None;
720 }
721
722 let prompt = prompt_tokens.unwrap_or_default();
723 let completion = completion_tokens.unwrap_or_default();
724 Some(Usage {
725 prompt_tokens: prompt,
726 completion_tokens: completion,
727 total_tokens: prompt + completion,
728 cached_prompt_tokens: None,
729 cache_creation_tokens: None,
730 cache_read_tokens: None,
731 })
732 }
733
734 fn finish_reason_from(reason: Option<&str>) -> FinishReason {
735 match reason {
736 Some("stop") | None => FinishReason::Stop,
737 Some("length") => FinishReason::Length,
738 Some("tool_calls") => FinishReason::ToolCalls,
739 Some(other) => FinishReason::Error(other.to_string()),
740 }
741 }
742
743 fn build_response(
744 content: Option<String>,
745 tool_calls: Option<Vec<ToolCall>>,
746 reasoning: Option<String>,
747 reasoning_details: Option<Vec<String>>,
748 model: String,
749 finish_reason: Option<&str>,
750 prompt_tokens: Option<u32>,
751 completion_tokens: Option<u32>,
752 ) -> LLMResponse {
753 let mut finish = Self::finish_reason_from(finish_reason);
754 if tool_calls.as_ref().is_some_and(|calls| !calls.is_empty()) {
755 finish = FinishReason::ToolCalls;
756 }
757
758 LLMResponse {
759 content,
760 tool_calls,
761 model,
762 usage: Self::usage_from_counts(prompt_tokens, completion_tokens),
763 finish_reason: finish,
764 reasoning,
765 reasoning_details,
766 tool_references: Vec::new(),
767 request_id: None,
768 organization_id: None,
769 compaction: None,
770 }
771 }
772
773 fn response_from_chat_payload(
774 model: String,
775 parsed: OllamaChatResponse,
776 ) -> Result<LLMResponse, LLMError> {
777 if let Some(error) = parsed.error {
778 return Err(LLMError::Provider {
779 message: error,
780 metadata: None,
781 });
782 }
783
784 let (content, reasoning, tool_calls, native_reasoning_details) =
785 if let Some(message) = parsed.message {
786 let content = message
787 .content
788 .and_then(|value| (!value.is_empty()).then_some(value));
789 let reasoning = message
790 .thinking
791 .and_then(|value| (!value.is_empty()).then_some(value));
792 let tool_calls = Self::convert_tool_calls(message.tool_calls)?;
793 let native_reasoning_details = message.reasoning_details.filter(|d| !d.is_empty());
794 (content, reasoning, tool_calls, native_reasoning_details)
795 } else {
796 (None, None, None, None)
797 };
798
799 let reasoning = reasoning.or_else(|| {
800 native_reasoning_details
801 .as_deref()
802 .and_then(extract_reasoning_text_from_detail_values)
803 });
804 let mut reasoning_details = native_reasoning_details
805 .as_deref()
806 .and_then(serialize_reasoning_detail_values);
807
808 let (final_reasoning, final_content) = if reasoning.is_none() {
811 if let Some(ref content_str) = content {
812 let (reasoning_parts, cleaned_content) =
813 crate::llm::utils::extract_reasoning_content(content_str);
814 if reasoning_parts.is_empty() {
815 (None, content)
816 } else {
817 super::common::preserve_interleaved_content_in_reasoning_details(
818 &mut reasoning_details,
819 content_str,
820 );
821 (
822 Some(reasoning_parts.join("\n\n")),
823 cleaned_content.or(content),
824 )
825 }
826 } else {
827 (None, content)
828 }
829 } else {
830 (reasoning, content)
831 };
832
833 Ok(Self::build_response(
834 final_content,
835 tool_calls,
836 final_reasoning,
837 reasoning_details,
838 model,
839 parsed.done_reason.as_deref(),
840 parsed.prompt_eval_count,
841 parsed.eval_count,
842 ))
843 }
844
845 fn authorized_post_with_key(
846 http_client: &HttpClient,
847 url: &str,
848 api_key: Option<&str>,
849 ) -> reqwest::RequestBuilder {
850 let builder = http_client.post(url.to_string());
851 if let Some(value) = api_key {
852 builder.bearer_auth(value)
853 } else {
854 builder
855 }
856 }
857
858 async fn request_non_stream_response(
859 http_client: &HttpClient,
860 url: &str,
861 api_key: Option<&str>,
862 payload: &OllamaChatRequest,
863 model: String,
864 ) -> Result<LLMResponse, LLMError> {
865 let response = Self::authorized_post_with_key(http_client, url, api_key)
866 .json(payload)
867 .send()
868 .await
869 .map_err(|e| format_network_error("Ollama", &e))?;
870
871 if !response.status().is_success() {
872 let status = response.status();
873 let body = response.text().await.unwrap_or_default();
874 let error_message = Self::extract_error(&body)
875 .unwrap_or_else(|| format!("Ollama request failed ({status}): {body}"));
876 return Err(LLMError::Provider {
877 message: error_message,
878 metadata: None,
879 });
880 }
881
882 let parsed = response
883 .json::<OllamaChatResponse>()
884 .await
885 .map_err(|e| format_parse_error("Ollama", &e))?;
886 Self::response_from_chat_payload(model, parsed)
887 }
888
889 fn extract_error(body: &str) -> Option<String> {
890 serde_json::from_str::<OllamaErrorResponse>(body)
891 .ok()
892 .and_then(|resp| resp.error)
893 }
894}
895
896#[derive(Debug, Serialize)]
897struct OllamaChatRequest {
898 model: String,
899 messages: Vec<OllamaChatMessage>,
900 stream: bool,
901 #[serde(skip_serializing_if = "Option::is_none")]
902 format: Option<Value>,
903 #[serde(skip_serializing_if = "Option::is_none")]
904 options: Option<OllamaChatOptions>,
905 #[serde(skip_serializing_if = "Option::is_none")]
906 tools: Option<Vec<ToolDefinition>>,
907 #[serde(skip_serializing_if = "Option::is_none")]
908 think: Option<Value>,
909}
910
911#[derive(Debug, Serialize)]
912struct OllamaChatMessage {
913 role: String,
914 #[serde(skip_serializing_if = "Option::is_none")]
915 content: Option<String>,
916 #[serde(skip_serializing_if = "Option::is_none")]
917 thinking: Option<String>,
918 #[serde(skip_serializing_if = "Option::is_none")]
919 images: Option<Vec<String>>,
920 #[serde(skip_serializing_if = "Option::is_none")]
921 tool_calls: Option<Vec<OllamaToolCall>>,
922 #[serde(skip_serializing_if = "Option::is_none")]
923 tool_call_id: Option<String>,
924 #[serde(skip_serializing_if = "Option::is_none")]
925 tool_name: Option<String>,
926}
927
928#[derive(Debug, Serialize)]
929struct OllamaChatOptions {
930 #[serde(skip_serializing_if = "Option::is_none")]
931 temperature: Option<f32>,
932 #[serde(skip_serializing_if = "Option::is_none")]
933 num_predict: Option<u32>,
934}
935
936#[derive(Debug, Serialize)]
937struct OllamaToolCall {
938 #[serde(rename = "type")]
939 call_type: String,
940 function: OllamaToolFunctionCall,
941}
942
943#[derive(Debug, Serialize)]
944struct OllamaToolFunctionCall {
945 name: String,
946 #[serde(skip_serializing_if = "Option::is_none")]
947 arguments: Option<Value>,
948 #[serde(skip_serializing_if = "Option::is_none")]
949 index: Option<u32>,
950}
951
952#[derive(Debug, Deserialize)]
953struct OllamaChatResponse {
954 message: Option<OllamaResponseMessage>,
955 #[serde(default)]
956 done: bool,
957 #[serde(default)]
958 done_reason: Option<String>,
959 #[serde(default)]
960 prompt_eval_count: Option<u32>,
961 #[serde(default)]
962 eval_count: Option<u32>,
963 #[serde(default)]
964 error: Option<String>,
965}
966
967#[derive(Debug, Deserialize)]
968struct OllamaResponseMessage {
969 #[serde(default)]
970 #[expect(dead_code)]
971 role: Option<String>,
972 #[serde(default)]
973 content: Option<String>,
974 #[serde(default)]
975 thinking: Option<String>,
976 #[serde(default)]
977 reasoning_details: Option<Vec<Value>>,
978 #[serde(default)]
979 tool_calls: Option<Vec<OllamaResponseToolCall>>,
980}
981
982#[derive(Debug, Deserialize, Serialize, Clone)]
983struct OllamaResponseToolCall {
984 #[serde(default)]
985 #[serde(rename = "type")]
986 call_type: Option<String>,
987 #[serde(default)]
988 function: Option<OllamaResponseFunctionCall>,
989}
990
991#[derive(Debug, Deserialize, Serialize, Clone)]
992struct OllamaResponseFunctionCall {
993 #[serde(default)]
994 name: Option<String>,
995 #[serde(default)]
996 arguments: Option<Value>,
997 #[serde(default)]
998 index: Option<u32>,
999}
1000
1001#[derive(Debug, Deserialize)]
1002struct OllamaErrorResponse {
1003 error: Option<String>,
1004}
1005
1006fn parse_stream_chunk(line: &str) -> Result<OllamaChatResponse, LLMError> {
1007 serde_json::from_str::<OllamaChatResponse>(line).map_err(|err| LLMError::Provider {
1008 message: format!("Failed to parse Ollama stream chunk: {err}"),
1009 metadata: None,
1010 })
1011}
1012
1013#[async_trait]
1014impl LLMProvider for OllamaProvider {
1015 fn name(&self) -> &str {
1016 "ollama"
1017 }
1018
1019 fn supports_streaming(&self) -> bool {
1020 true
1021 }
1022
1023 fn supports_tools(&self, _model: &str) -> bool {
1024 true
1025 }
1026
1027 fn supports_reasoning(&self, model: &str) -> bool {
1028 models::ollama::REASONING_MODELS.contains(&model)
1031 || self
1032 .model_behavior
1033 .as_ref()
1034 .and_then(|b| b.model_supports_reasoning)
1035 .unwrap_or(false)
1036 }
1037
1038 fn supports_reasoning_effort(&self, model: &str) -> bool {
1039 models::ollama::REASONING_LEVEL_MODELS.contains(&model)
1041 || self
1042 .model_behavior
1043 .as_ref()
1044 .and_then(|b| b.model_supports_reasoning_effort)
1045 .unwrap_or(false)
1046 }
1047
1048 async fn generate(&self, mut request: LLMRequest) -> Result<LLMResponse, LLMError> {
1049 self.validate_request(&request)?;
1050 if request.model.is_empty() {
1051 request.model = self.model.clone();
1052 }
1053 let model = request.model.clone();
1054 let payload = self.build_payload(&request, false)?;
1055 let url = self.chat_url();
1056 Self::request_non_stream_response(
1057 &self.http_client,
1058 &url,
1059 self.api_key.as_deref(),
1060 &payload,
1061 model,
1062 )
1063 .await
1064 }
1065
1066 async fn stream(&self, mut request: LLMRequest) -> Result<LLMStream, LLMError> {
1067 self.validate_request(&request)?;
1068 if request.model.is_empty() {
1069 request.model = self.model.clone();
1070 }
1071 let model = request.model.clone();
1072 let payload = self.build_payload(&request, true)?;
1073 let fallback_payload = self.build_payload(&request, false)?;
1074 let url = self.chat_url();
1075
1076 let response = self
1077 .authorized_post(url.clone())
1078 .header(reqwest::header::ACCEPT_ENCODING, "identity")
1079 .json(&payload)
1080 .send()
1081 .await
1082 .map_err(|e| format_network_error("Ollama", &e))?;
1083
1084 if !response.status().is_success() {
1085 let status = response.status();
1086 let body = response.text().await.unwrap_or_default();
1087 let error_message = Self::extract_error(&body)
1088 .unwrap_or_else(|| format!("Ollama streaming request failed ({status}): {body}"));
1089 return Err(LLMError::Provider {
1090 message: error_message,
1091 metadata: None,
1092 });
1093 }
1094
1095 let byte_stream = response.bytes_stream();
1096 let mut buffer: Vec<u8> = Vec::new();
1097 let mut aggregator = crate::llm::providers::shared::StreamAggregator::new(model.clone());
1098 let fallback_http_client = self.http_client.clone();
1099 let fallback_api_key = self.api_key.clone();
1100 let fallback_model = model.clone();
1101 let fallback_url = url.clone();
1102 let any_interleaved = request
1103 .messages
1104 .iter()
1105 .any(|msg| assistant_interleaved_history_text(msg, &request.model).is_some());
1106 let stream = try_stream! {
1107 let mut prompt_tokens: Option<u32> = None;
1108 let mut completion_tokens: Option<u32> = None;
1109 let mut finish_reason: Option<String> = None;
1110 let mut completed = false;
1111 let mut saw_stream_chunk = false;
1112
1113 futures::pin_mut!(byte_stream);
1114 while let Some(chunk_result) = byte_stream.next().await {
1115 let chunk = match chunk_result {
1116 Ok(chunk) => {
1117 saw_stream_chunk = true;
1118 chunk
1119 }
1120 Err(err) if !saw_stream_chunk => {
1121 tracing::warn!(
1122 model = %fallback_model,
1123 url = %fallback_url,
1124 error = %err,
1125 "Ollama stream failed before first chunk; retrying once as non-stream response"
1126 );
1127 let fallback_response = Self::request_non_stream_response(
1128 &fallback_http_client,
1129 &fallback_url,
1130 fallback_api_key.as_deref(),
1131 &fallback_payload,
1132 fallback_model.clone(),
1133 ).await?;
1134 yield LLMStreamEvent::Completed { response: Box::new(fallback_response) };
1135 return;
1136 }
1137 Err(err) => Err(format_network_error("Ollama", &err))?,
1138 };
1139 buffer.extend_from_slice(&chunk);
1140
1141 while let Some(pos) = buffer.iter().position(|b| *b == b'\n') {
1142 let line_bytes: Vec<u8> = buffer.drain(..=pos).collect();
1143 let line = std::str::from_utf8(&line_bytes)
1144 .map_err(|err| LLMError::Provider {
1145 message: format!("Invalid UTF-8 in Ollama stream: {err}"),
1146 metadata: None,
1147 })?;
1148 let line = line.trim();
1149
1150 if line.is_empty() {
1151 continue;
1152 }
1153
1154 let parsed = parse_stream_chunk(line)?;
1155
1156 if let Some(error) = parsed.error {
1157 Err(LLMError::Provider {
1158 message: error,
1159 metadata: None,
1160 })?;
1161 }
1162
1163 if let Some(message) = parsed.message {
1164 if let Some(reasoning_details) = message.reasoning_details.as_deref() {
1165 aggregator.set_reasoning_details(reasoning_details);
1166 }
1167
1168 let has_explicit_thinking = message
1169 .thinking
1170 .as_ref()
1171 .map(|v| !v.is_empty())
1172 .unwrap_or(false);
1173
1174 if let Some(thinking) = message.thinking
1175 && let Some(delta) = aggregator.handle_reasoning(&thinking) {
1176 yield LLMStreamEvent::Reasoning { delta };
1177 }
1178
1179 if let Some(content) = message.content {
1180 for event in aggregator.handle_content(&content) {
1181 match &event {
1182 LLMStreamEvent::Reasoning { .. }
1183 if has_explicit_thinking || any_interleaved =>
1184 {
1185 }
1186 _ => yield event,
1187 }
1188 }
1189 }
1190
1191 if let Some(tool_calls) = message.tool_calls {
1192 let tool_calls_json: Vec<Value> = tool_calls
1193 .into_iter()
1194 .map(|tc| serde_json::to_value(tc).unwrap_or(Value::Null))
1195 .filter(|v| !v.is_null())
1196 .collect();
1197 aggregator.handle_tool_calls(&tool_calls_json);
1198 }
1199 }
1200
1201 if parsed.done {
1202 prompt_tokens = parsed.prompt_eval_count;
1203 completion_tokens = parsed.eval_count;
1204 finish_reason = parsed.done_reason;
1205 completed = true;
1206 }
1207 }
1208
1209 if completed {
1210 break;
1211 }
1212 }
1213
1214 if !completed {
1215 Err(LLMError::Provider {
1216 message: "Ollama stream ended without completion signal".to_string(),
1217 metadata: None,
1218 })?;
1219 }
1220
1221 let mut response = aggregator.finalize();
1222 if let Some(pt) = prompt_tokens {
1223 let mut usage = response.usage.unwrap_or_default();
1224 usage.prompt_tokens = pt;
1225 if let Some(ct) = completion_tokens {
1226 usage.completion_tokens = ct;
1227 usage.total_tokens = pt + ct;
1228 }
1229 response.usage = Some(usage);
1230 }
1231 if let Some(fr) = finish_reason {
1232 response.finish_reason = crate::llm::providers::common::map_finish_reason_common(&fr);
1233 }
1234 if response.reasoning.is_none()
1235 && let Some(details) = response.reasoning_details.as_ref()
1236 {
1237 response.reasoning = extract_reasoning_text_from_serialized_details(details);
1238 }
1239
1240 yield LLMStreamEvent::Completed { response: Box::new(response) };
1241 };
1242
1243 Ok(Box::pin(stream))
1244 }
1245
1246 fn supported_models(&self) -> Vec<String> {
1247 models::ollama::SUPPORTED_MODELS
1248 .iter()
1249 .map(|model| model.to_string())
1250 .collect()
1251 }
1252
1253 fn validate_request(&self, request: &LLMRequest) -> Result<(), LLMError> {
1254 if let Some(tool_choice) = &request.tool_choice {
1255 match tool_choice {
1256 ToolChoice::Auto | ToolChoice::None => {}
1257 _ => {
1258 return Err(LLMError::InvalidRequest {
1259 message: "Ollama does not support explicit tool_choice overrides"
1260 .to_string(),
1261 metadata: None,
1262 });
1263 }
1264 }
1265 }
1266
1267 if request.parallel_tool_calls.is_some() || request.parallel_tool_config.is_some() {
1268 return Err(LLMError::InvalidRequest {
1269 message: "Ollama does not support parallel tool configuration".to_string(),
1270 metadata: None,
1271 });
1272 }
1273
1274 for message in &request.messages {
1275 if matches!(message.role, MessageRole::Tool) && message.tool_call_id.is_none() {
1276 return Err(LLMError::InvalidRequest {
1277 message: "Ollama tool responses must include tool_call_id".to_string(),
1278 metadata: None,
1279 });
1280 }
1281 }
1282
1283 Ok(())
1284 }
1285}
1286
1287#[async_trait]
1288impl LLMClient for OllamaProvider {
1289 async fn generate(&mut self, prompt: &str) -> Result<LLMResponse, LLMError> {
1290 let mut request = self.parse_client_prompt(prompt);
1291 if request.model.is_empty() {
1292 request.model = self.model.clone();
1293 }
1294 Ok(LLMProvider::generate(self, request).await?)
1295 }
1296
1297 fn model_id(&self) -> &str {
1298 &self.model
1299 }
1300}
1301
1302#[cfg(test)]
1303mod tests {
1304 use super::*;
1305 use crate::config::types::ReasoningEffortLevel;
1306 use crate::llm::provider::{ContentPart, Message, MessageContent};
1307 use serde_json::json;
1308
1309 fn test_provider() -> OllamaProvider {
1310 OllamaProvider::from_config(
1311 None,
1312 Some("test-model".to_string()),
1313 Some("http://localhost".to_string()),
1314 None,
1315 None,
1316 None,
1317 None,
1318 )
1319 }
1320
1321 #[test]
1322 fn build_payload_includes_images() {
1323 let provider = test_provider();
1324 let parts = vec![
1325 ContentPart::text("see ".to_string()),
1326 ContentPart::image("BASE64DATA".to_string(), "image/png".to_string()),
1327 ];
1328 let request = LLMRequest {
1329 model: "test-model".to_string(),
1330 messages: vec![Message::user_with_parts(parts)],
1331 ..Default::default()
1332 };
1333
1334 let payload = provider.build_payload(&request, false).unwrap();
1335 assert_eq!(payload.messages.len(), 1);
1336 let message = &payload.messages[0];
1337 assert_eq!(message.content.as_deref(), Some("see "));
1338 assert_eq!(
1339 message.images.as_ref(),
1340 Some(&vec!["BASE64DATA".to_string()])
1341 );
1342 }
1343
1344 #[test]
1345 fn build_payload_omits_images_when_none_present() {
1346 let provider = test_provider();
1347 let content = MessageContent::text("no images".to_string());
1348 let request = LLMRequest {
1349 model: "test-model".to_string(),
1350 messages: vec![Message::user(content.as_text().into_owned())],
1351 ..Default::default()
1352 };
1353
1354 let payload = provider.build_payload(&request, false).unwrap();
1355 assert_eq!(payload.messages.len(), 1);
1356 let message = &payload.messages[0];
1357 assert_eq!(message.content.as_deref(), Some("no images"));
1358 assert!(message.images.is_none());
1359 }
1360
1361 #[test]
1362 fn build_payload_minimax_tool_followup_omits_tool_call_id() {
1363 let provider = test_provider();
1364 let tool_call_id = "direct_run_pty_cmd_1".to_string();
1365 let request = LLMRequest {
1366 model: models::ollama::MINIMAX_M25_CLOUD.to_string(),
1367 messages: vec![
1368 Message::assistant_with_tools(
1369 String::new(),
1370 vec![ToolCall::function(
1371 tool_call_id.clone(),
1372 "run_pty_cmd".to_string(),
1373 "{\"command\":\"cargo fmt\"}".to_string(),
1374 )],
1375 ),
1376 Message::tool_response(
1377 tool_call_id,
1378 "{\"output\":\"\",\"exit_code\":0}".to_string(),
1379 ),
1380 ],
1381 reasoning_effort: Some(ReasoningEffortLevel::Low),
1382 ..Default::default()
1383 };
1384
1385 let payload = provider.build_payload(&request, false).unwrap();
1386 assert_eq!(payload.messages.len(), 2);
1387 assert_eq!(payload.messages[1].role, "tool");
1388 assert_eq!(
1389 payload.messages[1].tool_name.as_deref(),
1390 Some("run_pty_cmd")
1391 );
1392 assert!(payload.messages[1].tool_call_id.is_none());
1393 assert!(payload.think.is_none());
1394 }
1395
1396 #[test]
1397 fn build_payload_non_minimax_tool_followup_keeps_tool_call_id() {
1398 let provider = test_provider();
1399 let tool_call_id = "direct_run_pty_cmd_1".to_string();
1400 let request = LLMRequest {
1401 model: models::ollama::GPT_OSS_20B_CLOUD.to_string(),
1402 messages: vec![
1403 Message::assistant_with_tools(
1404 String::new(),
1405 vec![ToolCall::function(
1406 tool_call_id.clone(),
1407 "run_pty_cmd".to_string(),
1408 "{\"command\":\"cargo fmt\"}".to_string(),
1409 )],
1410 ),
1411 Message::tool_response(
1412 tool_call_id.clone(),
1413 "{\"output\":\"\",\"exit_code\":0}".to_string(),
1414 ),
1415 ],
1416 reasoning_effort: Some(ReasoningEffortLevel::Low),
1417 ..Default::default()
1418 };
1419
1420 let payload = provider.build_payload(&request, false).unwrap();
1421 assert_eq!(payload.messages.len(), 2);
1422 assert_eq!(payload.messages[1].role, "tool");
1423 assert_eq!(
1424 payload.messages[1].tool_name.as_deref(),
1425 Some("run_pty_cmd")
1426 );
1427 assert_eq!(
1428 payload.messages[1].tool_call_id.as_deref(),
1429 Some(tool_call_id.as_str())
1430 );
1431 assert_eq!(payload.think, Some(Value::String("low".to_string())));
1432 }
1433
1434 #[test]
1435 fn build_payload_hoists_history_system_directives_into_system_prompt() {
1436 let provider = test_provider();
1437 let request = LLMRequest {
1438 model: models::ollama::MINIMAX_M25_CLOUD.to_string(),
1439 system_prompt: Some(std::sync::Arc::new(
1440 "stable system instructions".to_string(),
1441 )),
1442 messages: vec![
1443 Message::user("explore architecture".to_string()),
1444 Message::system(
1445 "Previous turn already completed tool execution. Reuse the latest tool outputs in history instead of rerunning the same exploration.".to_string(),
1446 ),
1447 ],
1448 ..Default::default()
1449 };
1450
1451 let payload = provider.build_payload(&request, false).unwrap();
1452 assert_eq!(payload.messages.len(), 2);
1453 assert_eq!(payload.messages[0].role, "system");
1454 assert!(
1455 payload.messages[0]
1456 .content
1457 .as_deref()
1458 .unwrap_or("")
1459 .contains("stable system instructions")
1460 );
1461 assert!(
1462 payload.messages[0]
1463 .content
1464 .as_deref()
1465 .unwrap_or("")
1466 .contains("[History Directives]")
1467 );
1468 assert!(
1469 payload.messages[0]
1470 .content
1471 .as_deref()
1472 .unwrap_or("")
1473 .contains("Previous turn already completed tool execution")
1474 );
1475 assert_eq!(payload.messages[1].role, "user");
1476 assert_eq!(
1477 payload.messages[1].content.as_deref(),
1478 Some("explore architecture")
1479 );
1480 }
1481
1482 #[test]
1483 fn build_payload_promotes_history_system_directive_without_base_system_prompt() {
1484 let provider = test_provider();
1485 let request = LLMRequest {
1486 model: models::ollama::MINIMAX_M25_CLOUD.to_string(),
1487 messages: vec![
1488 Message::system(
1489 "Repeated read-only exploration hit the per-turn family cap. Scheduling a final recovery pass without more tools.".to_string(),
1490 ),
1491 Message::user("summarize the architecture".to_string()),
1492 ],
1493 ..Default::default()
1494 };
1495
1496 let payload = provider.build_payload(&request, false).unwrap();
1497 assert_eq!(payload.messages.len(), 2);
1498 assert_eq!(payload.messages[0].role, "system");
1499 assert!(
1500 payload.messages[0]
1501 .content
1502 .as_deref()
1503 .unwrap_or("")
1504 .contains("[History Directives]")
1505 );
1506 assert!(
1507 payload.messages[0]
1508 .content
1509 .as_deref()
1510 .unwrap_or("")
1511 .contains("Repeated read-only exploration hit the per-turn family cap")
1512 );
1513 assert_eq!(payload.messages[1].role, "user");
1514 }
1515
1516 #[test]
1517 fn build_payload_recovers_balanced_prefix_from_malformed_history_tool_arguments() {
1518 let provider = test_provider();
1519 let request = LLMRequest {
1520 model: "test-model".to_string(),
1521 messages: vec![Message::assistant_with_tools(
1522 String::new(),
1523 vec![ToolCall::function(
1524 "tool_call_0".to_string(),
1525 "unified_file".to_string(),
1526 "{\"action\":\"read\",\"path\":\"docs/ARCHITECTURE.md\",\"offset\":1,\"limit\":100}{\"action\":\"read\",\"path\":\"README.md\"}"
1527 .to_string(),
1528 )],
1529 )],
1530 ..Default::default()
1531 };
1532
1533 let payload = provider
1534 .build_payload(&request, false)
1535 .expect("payload should recover malformed history tool arguments");
1536
1537 let tool_calls = payload.messages[0]
1538 .tool_calls
1539 .as_ref()
1540 .expect("tool calls should be present");
1541 assert_eq!(tool_calls.len(), 1);
1542 assert_eq!(
1543 tool_calls[0].function.arguments,
1544 Some(json!({
1545 "action": "read",
1546 "path": "docs/ARCHITECTURE.md",
1547 "offset": 1,
1548 "limit": 100
1549 }))
1550 );
1551 }
1552
1553 #[test]
1554 fn build_payload_rehydrates_glm_interleaved_history_into_content() {
1555 let provider = test_provider();
1556 let request = LLMRequest {
1557 model: models::ollama::GLM_5_CLOUD.to_string(),
1558 messages: vec![
1559 Message::assistant("done".to_string()).with_reasoning(Some("trace".to_string())),
1560 ],
1561 ..Default::default()
1562 };
1563
1564 let payload = provider.build_payload(&request, false).unwrap();
1565
1566 assert_eq!(
1567 payload.messages[0].content.as_deref(),
1568 Some("<think>trace</think>done")
1569 );
1570 assert!(payload.messages[0].thinking.is_none());
1571 }
1572
1573 #[test]
1574 fn build_payload_replays_assistant_reasoning_as_ollama_thinking() {
1575 let provider = test_provider();
1576 let request = LLMRequest {
1577 model: models::ollama::GPT_OSS_20B.to_string(),
1578 messages: vec![
1579 Message::assistant("need a tool".to_string())
1580 .with_reasoning(Some("reasoning trace".to_string())),
1581 ],
1582 ..Default::default()
1583 };
1584
1585 let payload = provider.build_payload(&request, false).unwrap();
1586
1587 assert_eq!(payload.messages[0].content.as_deref(), Some("need a tool"));
1588 assert_eq!(
1589 payload.messages[0].thinking.as_deref(),
1590 Some("reasoning trace")
1591 );
1592 }
1593
1594 #[test]
1595 fn build_payload_includes_apply_patch_as_normal_tool() {
1596 let provider = test_provider();
1597 let request = LLMRequest {
1598 model: "test-model".to_string(),
1599 messages: vec![Message::user("patch this file".to_string())],
1600 tools: Some(std::sync::Arc::new(vec![ToolDefinition::apply_patch(
1601 "Apply VT Code patches".to_string(),
1602 )])),
1603 ..Default::default()
1604 };
1605
1606 let payload = provider.build_payload(&request, false).unwrap();
1607 let tools = payload.tools.expect("tools should be present");
1608 assert_eq!(tools.len(), 1);
1609 assert_eq!(tools[0].function_name(), "apply_patch");
1610 }
1611
1612 #[test]
1613 fn response_payload_preserves_reasoning_details() {
1614 let parsed = OllamaChatResponse {
1615 message: Some(OllamaResponseMessage {
1616 role: Some("assistant".to_string()),
1617 content: Some("answer".to_string()),
1618 thinking: None,
1619 reasoning_details: Some(vec![json!({
1620 "type": "reasoning.text",
1621 "text": "step one"
1622 })]),
1623 tool_calls: None,
1624 }),
1625 done: true,
1626 done_reason: Some("stop".to_string()),
1627 prompt_eval_count: Some(1),
1628 eval_count: Some(2),
1629 error: None,
1630 };
1631
1632 let response = OllamaProvider::response_from_chat_payload("test-model".to_string(), parsed)
1633 .expect("response should parse");
1634 assert_eq!(response.reasoning.as_deref(), Some("step one"));
1635 assert!(response.reasoning_details.is_some());
1636
1637 let first_detail = response
1638 .reasoning_details
1639 .as_ref()
1640 .and_then(|details| details.first())
1641 .expect("reasoning detail should exist");
1642 let parsed_detail: Value =
1643 serde_json::from_str(first_detail).expect("reasoning detail should be json");
1644 assert_eq!(parsed_detail["type"], "reasoning.text");
1645 }
1646
1647 #[test]
1648 fn tags_response_accepts_partial_model_summaries() {
1649 let parsed: OllamaTagsResponse = serde_json::from_value(json!({
1650 "models": [
1651 { "model": "qwen3:8b" }
1652 ]
1653 }))
1654 .expect("partial model summaries should parse");
1655
1656 let names: Vec<String> = parsed
1657 .models
1658 .into_iter()
1659 .filter_map(|model| model.name.or(model.model))
1660 .collect();
1661 assert_eq!(names, vec!["qwen3:8b".to_string()]);
1662 }
1663}