1use crate::config::TimeoutsConfig;
2use crate::config::constants::{env_vars, models, urls};
3use crate::config::core::{AnthropicConfig, ModelConfig, PromptCachingConfig};
4use crate::llm::client::LLMClient;
5use crate::llm::provider::{
6 ContentPart, FinishReason, LLMError, LLMProvider, LLMRequest, LLMResponse, LLMStream,
7 LLMStreamEvent, Message, MessageContent, MessageRole, ToolCall, ToolChoice, ToolDefinition,
8 Usage,
9};
10use crate::utils::http_client;
11use anyhow::Result;
12use async_stream::try_stream;
13use async_trait::async_trait;
14use futures::StreamExt;
15use hashbrown::HashMap;
16use reqwest::Client as HttpClient;
17use serde::{Deserialize, Serialize};
18use serde_json::{Map, Value};
19
20pub mod client;
21pub mod parser;
22pub mod pull;
23pub mod url;
24
25pub use client::OllamaClient;
26pub use parser::pull_events_from_value;
27pub use pull::{
28 CliPullProgressReporter, OllamaPullEvent, OllamaPullProgressReporter, TuiPullProgressReporter,
29};
30pub use url::{base_url_to_host_root, is_openai_compatible_base_url};
31
32use semver::{Version, VersionReq};
33
34use super::common::{
35 assistant_interleaved_history_text, collect_history_system_directives,
36 extract_reasoning_text_from_detail_values, extract_reasoning_text_from_serialized_details,
37 is_minimax_m2_model, merge_system_prompt_with_history_directives, override_base_url,
38 parse_client_prompt_common, resolve_model, serialize_reasoning_detail_values,
39};
40use super::error_handling::{format_network_error, format_parse_error};
41
42#[derive(Debug, Clone, Copy, PartialEq, Eq)]
49pub enum OllamaWireApi {
50 Responses,
52 Chat,
54}
55
56pub struct WireApiDetection {
58 pub wire_api: OllamaWireApi,
59 pub version: Option<Version>,
60}
61
62static RESPONSES_API_VERSION_REQ: std::sync::LazyLock<VersionReq> =
65 std::sync::LazyLock::new(|| {
66 VersionReq::parse(">=0.13.3").expect("valid version requirement literal")
67 });
68
69fn wire_api_for_version(version: &Version) -> OllamaWireApi {
74 if *version == Version::new(0, 0, 0) || RESPONSES_API_VERSION_REQ.matches(version) {
75 OllamaWireApi::Responses
76 } else {
77 OllamaWireApi::Chat
78 }
79}
80
81pub async fn detect_wire_api(
87 base_url: Option<String>,
88) -> std::io::Result<Option<WireApiDetection>> {
89 let resolved_base_url = override_base_url(
90 urls::OLLAMA_API_BASE,
91 base_url,
92 Some(env_vars::OLLAMA_BASE_URL),
93 );
94
95 let client = match OllamaClient::try_from_base_url(&resolved_base_url).await {
96 Ok(c) => c,
97 Err(e) => {
98 tracing::debug!("Failed to connect to Ollama server for version detection: {e}");
99 return Ok(None);
100 }
101 };
102
103 let Some(version) = client.fetch_version().await? else {
104 return Ok(None);
105 };
106
107 let wire_api = wire_api_for_version(&version);
108
109 Ok(Some(WireApiDetection {
110 wire_api,
111 version: Some(version),
112 }))
113}
114
115pub async fn ensure_oss_ready(
122 model: Option<&str>,
123 base_url: Option<String>,
124) -> std::io::Result<()> {
125 let target_model = model.unwrap_or(models::ollama::DEFAULT_MODEL);
126
127 let resolved_base_url = override_base_url(
128 urls::OLLAMA_API_BASE,
129 base_url,
130 Some(env_vars::OLLAMA_BASE_URL),
131 );
132
133 let ollama_client = OllamaClient::try_from_base_url(&resolved_base_url).await?;
135
136 match ollama_client.fetch_models().await {
138 Ok(existing_models) => {
139 if !existing_models.iter().any(|m| m == target_model) {
140 tracing::info!("Model '{target_model}' not found locally, pulling...");
141 let mut reporter = CliPullProgressReporter::new();
142 ollama_client
143 .pull_with_reporter(target_model, &mut reporter)
144 .await?;
145 }
146 }
147 Err(e) => {
148 tracing::warn!("Failed to list Ollama models: {e}");
149 }
151 }
152
153 Ok(())
154}
155
156#[derive(Debug, Deserialize, Serialize)]
157struct OllamaTagsResponse {
158 models: Vec<OllamaTag>,
159}
160
161#[derive(Debug, Deserialize, Serialize)]
162struct OllamaTag {
163 name: Option<String>,
164 model: Option<String>,
165 modified_at: Option<String>,
166 size: Option<u64>,
167 digest: Option<String>,
168 details: Option<OllamaModelDetails>,
169}
170
171#[derive(Debug, Deserialize, Serialize)]
172struct OllamaModelDetails {
173 format: Option<String>,
174 family: Option<String>,
175 families: Option<Vec<String>>,
176 parameter_size: Option<String>,
177 quantization_level: Option<String>,
178}
179
180pub(super) fn ollama_model_name_from_fields<'a>(
181 name: Option<&'a str>,
182 model: Option<&'a str>,
183) -> Option<&'a str> {
184 name.or(model)
185 .map(str::trim)
186 .filter(|value| !value.is_empty())
187}
188
189pub(super) const OLLAMA_CONNECTION_ERROR: &str = "No running Ollama server detected. Start it with: `ollama serve` (after installing)\n\
190 Install instructions: https://github.com/ollama/ollama?tab=readme-ov-file";
191
192pub async fn fetch_ollama_models(base_url: Option<String>) -> Result<Vec<String>, anyhow::Error> {
194 use crate::config::constants::{env_vars, urls};
195
196 let resolved_base_url = override_base_url(
197 urls::OLLAMA_API_BASE,
198 base_url,
199 Some(env_vars::OLLAMA_BASE_URL),
200 );
201
202 let tags_url = format!("{}/api/tags", resolved_base_url);
204
205 let client = http_client::create_client_with_timeout(std::time::Duration::from_secs(5));
207
208 let response = client
210 .get(&tags_url)
211 .header("Content-Type", "application/json")
212 .send()
213 .await
214 .map_err(|e| {
215 tracing::warn!("Failed to connect to Ollama server: {e:?}");
216 anyhow::anyhow!(OLLAMA_CONNECTION_ERROR)
217 })?;
218
219 if !response.status().is_success() {
220 return Err(anyhow::anyhow!(
221 "Failed to fetch Ollama models: HTTP {}. {}",
222 response.status(),
223 if response.status() == reqwest::StatusCode::NOT_FOUND {
224 "Ensure Ollama server is running."
225 } else {
226 ""
227 }
228 ));
229 }
230
231 let tags_response: OllamaTagsResponse = response
233 .json()
234 .await
235 .map_err(|e| anyhow::anyhow!("Failed to parse Ollama models response: {}", e))?;
236
237 let model_names: Vec<String> = tags_response
239 .models
240 .into_iter()
241 .filter_map(|model| {
242 ollama_model_name_from_fields(model.name.as_deref(), model.model.as_deref())
243 .map(str::to_string)
244 })
245 .collect();
246
247 Ok(model_names)
248}
249
250pub struct OllamaProvider {
251 http_client: HttpClient,
252 base_url: String,
253 model: String,
254 api_key: Option<String>,
255 model_behavior: Option<ModelConfig>,
256}
257
258impl OllamaProvider {
259 fn merged_system_prompt(request: &LLMRequest) -> Option<String> {
260 const HISTORY_DIRECTIVES_SECTION_HEADER: &str = "[History Directives]";
261 let directives = collect_history_system_directives(request);
262 merge_system_prompt_with_history_directives(
263 request.system_prompt.as_ref().map(|prompt| prompt.as_str()),
264 &directives,
265 HISTORY_DIRECTIVES_SECTION_HEADER,
266 )
267 }
268
269 pub fn new(api_key: String) -> Self {
270 Self::with_model(api_key, models::ollama::DEFAULT_MODEL.to_string())
271 }
272
273 pub fn with_model(api_key: String, model: String) -> Self {
274 Self::with_model_internal(model, None, Some(api_key), None)
275 }
276
277 pub fn new_with_client(
278 api_key: String,
279 model: String,
280 http_client: reqwest::Client,
281 base_url: String,
282 _timeouts: TimeoutsConfig,
283 ) -> Self {
284 Self {
285 http_client,
286 base_url,
287 model,
288 api_key: Some(api_key),
289 model_behavior: None,
290 }
291 }
292
293 pub fn from_config(
294 api_key: Option<String>,
295 model: Option<String>,
296 base_url: Option<String>,
297 _prompt_cache: Option<PromptCachingConfig>,
298 _timeouts: Option<TimeoutsConfig>,
299 _anthropic: Option<AnthropicConfig>,
300 model_behavior: Option<ModelConfig>,
301 ) -> Self {
302 let resolved_model = resolve_model(model, models::ollama::DEFAULT_MODEL);
303 Self::with_model_internal(resolved_model, base_url, api_key, model_behavior)
304 }
305
306 fn normalize_api_key(api_key: Option<String>) -> Option<String> {
307 api_key.and_then(|value| {
308 let trimmed = value.trim();
309 if trimmed.is_empty() {
310 None
311 } else {
312 Some(trimmed.to_string())
313 }
314 })
315 }
316
317 fn is_local_base_url(base_url: &str) -> bool {
318 let lowered = base_url.trim().to_ascii_lowercase();
319 const LOCAL_PREFIXES: &[&str] = &[
320 "http://localhost",
321 "https://localhost",
322 "http://127.",
323 "https://127.",
324 "http://0.0.0.0",
325 "https://0.0.0.0",
326 "http://[::1]",
327 "https://[::1]",
328 ];
329
330 LOCAL_PREFIXES
331 .iter()
332 .any(|prefix| lowered.starts_with(prefix))
333 }
334
335 fn with_model_internal(
336 model: String,
337 base_url: Option<String>,
338 api_key: Option<String>,
339 model_behavior: Option<ModelConfig>,
340 ) -> Self {
341 let normalized_api_key = Self::normalize_api_key(api_key);
342 let is_cloud_model = model.contains(":cloud") || model.contains("-cloud");
343
344 let default_base = if is_cloud_model {
345 urls::OLLAMA_CLOUD_API_BASE
346 } else {
347 urls::OLLAMA_API_BASE
348 };
349
350 let resolved_base =
351 override_base_url(default_base, base_url, Some(env_vars::OLLAMA_BASE_URL));
352 let target_is_local = Self::is_local_base_url(&resolved_base);
353
354 let effective_api_key = if target_is_local {
356 None
357 } else {
358 normalized_api_key
359 };
360
361 Self {
362 http_client: http_client::create_default_client(),
363 base_url: resolved_base,
364 model,
365 api_key: effective_api_key,
366 model_behavior,
367 }
368 }
369
370 fn chat_url(&self) -> String {
371 format!("{}/api/chat", self.base_url.trim_end_matches('/'))
372 }
373
374 fn authorized_post(&self, url: String) -> reqwest::RequestBuilder {
375 let builder = self.http_client.post(url);
376 if let Some(api_key) = &self.api_key {
377 builder.bearer_auth(api_key)
378 } else {
379 builder
380 }
381 }
382
383 fn parse_client_prompt(&self, prompt: &str) -> LLMRequest {
384 parse_client_prompt_common(prompt, &self.model, |value| self.parse_chat_request(value))
385 }
386
387 fn parse_chat_request(&self, value: &Value) -> Option<LLMRequest> {
388 let messages_value = value.get("messages")?.as_array()?;
389 let mut system_prompt = value
390 .get("system")
391 .and_then(|entry| entry.as_str())
392 .filter(|text| !text.trim().is_empty())
393 .map(|text| text.to_string());
394 let mut messages = Vec::new();
395
396 for entry in messages_value {
397 let role = entry
398 .get("role")
399 .and_then(|r| r.as_str())
400 .unwrap_or(crate::config::constants::message_roles::USER);
401 let content = entry
402 .get("content")
403 .map(|c| match c {
404 Value::String(text) => text.to_string(),
405 other => other.to_string(),
406 })
407 .unwrap_or_default();
408
409 if content.trim().is_empty() {
410 continue;
411 }
412
413 match role {
414 "system" => {
415 if system_prompt.is_none() {
416 system_prompt = Some(content);
417 }
418 }
419 "assistant" => messages.push(Message::assistant(content)),
420 "user" => messages.push(Message::user(content)),
421 _ => {}
422 }
423 }
424
425 if messages.is_empty() {
426 return None;
427 }
428
429 let tools = value
430 .get("tools")
431 .and_then(|entry| serde_json::from_value::<Vec<ToolDefinition>>(entry.clone()).ok());
432
433 Some(LLMRequest {
434 messages,
435 system_prompt: system_prompt.map(std::sync::Arc::new),
436 tools: tools.map(std::sync::Arc::new),
437 model: value
438 .get("model")
439 .and_then(|m| m.as_str())
440 .filter(|m| !m.trim().is_empty())
441 .map(|m| m.to_string())
442 .unwrap_or_else(|| self.model.clone()),
443 max_tokens: value
444 .get("max_tokens")
445 .and_then(|entry| entry.as_u64())
446 .map(|value| value as u32),
447 temperature: value
448 .get("temperature")
449 .and_then(|entry| entry.as_f64())
450 .map(|value| value as f32),
451 stream: value
452 .get("stream")
453 .and_then(|entry| entry.as_bool())
454 .unwrap_or(false),
455 ..Default::default()
456 })
457 }
458
459 fn build_payload(
460 &self,
461 request: &LLMRequest,
462 stream: bool,
463 ) -> Result<OllamaChatRequest, LLMError> {
464 let mut messages = Vec::new();
465 let mut tool_names: HashMap<String, String> = HashMap::new();
466 let minimax_tool_followup_compat = Self::minimax_tool_followup_compat_mode(request);
467
468 if let Some(system) = Self::merged_system_prompt(request) {
469 messages.push(OllamaChatMessage {
470 role: "system".to_string(),
471 content: Some(system),
472 thinking: None,
473 tool_calls: None,
474 tool_call_id: None,
475 tool_name: None,
476 images: None,
477 });
478 }
479
480 for message in &request.messages {
481 let interleaved_content = assistant_interleaved_history_text(message, &request.model);
482 let used_interleaved_content = interleaved_content.is_some();
483 let (content_text, images) = if let Some(interleaved_content) = interleaved_content {
484 (interleaved_content, None)
485 } else {
486 Self::extract_content_and_images(&message.content)
487 };
488 match message.role {
489 MessageRole::System => continue,
490 MessageRole::Tool => {
491 let tool_name = message
492 .tool_call_id
493 .as_ref()
494 .and_then(|id| tool_names.get(id).cloned());
495 let tool_name = tool_name.or_else(|| message.origin_tool.clone());
496 let tool_call_id = if minimax_tool_followup_compat && tool_name.is_some() {
497 None
498 } else {
499 message.tool_call_id.clone()
500 };
501 messages.push(OllamaChatMessage {
502 role: "tool".to_string(),
503 content: Some(content_text),
504 thinking: None,
505 tool_calls: None,
506 tool_call_id,
507 tool_name,
508 images: None,
509 });
510 }
511 _ => {
512 let thinking = if used_interleaved_content {
513 None
514 } else {
515 Self::assistant_thinking_history_text(message)
516 };
517 let mut payload_message = OllamaChatMessage {
518 role: message.role.as_generic_str().to_string(),
519 content: Some(content_text),
520 thinking,
521 tool_calls: None,
522 tool_call_id: None,
523 tool_name: None,
524 images,
525 };
526
527 if let Some(tool_calls) = message.get_tool_calls() {
528 let mut converted = Vec::new();
529 for (index, tool_call) in tool_calls.iter().enumerate() {
530 if let Some(ref func) = tool_call.function {
531 if !tool_call.id.is_empty() {
532 tool_names
533 .entry(tool_call.id.clone())
534 .or_insert_with(|| func.name.clone());
535 }
536
537 let arguments = tool_call.execution_arguments().map_err(|err| {
538 LLMError::InvalidRequest {
539 message: format!(
540 "Failed to parse tool arguments for Ollama: {err}"
541 ),
542 metadata: None,
543 }
544 })?;
545 converted.push(OllamaToolCall {
546 call_type: tool_call.call_type.clone(),
547 function: OllamaToolFunctionCall {
548 name: func.name.clone(),
549 arguments: Some(arguments),
550 index: Some(index as u32),
551 },
552 });
553 }
554 }
555
556 if !converted.is_empty() {
557 payload_message.tool_calls = Some(converted);
558 if payload_message.content.is_none() {
559 payload_message.content = Some(String::new());
560 }
561 }
562 }
563
564 messages.push(payload_message);
565 }
566 }
567 }
568
569 let options = if request.temperature.is_some() || request.max_tokens.is_some() {
570 Some(OllamaChatOptions {
571 temperature: request.temperature,
572 num_predict: request.max_tokens,
573 })
574 } else {
575 None
576 };
577
578 let tools = match request.tool_choice {
579 Some(ToolChoice::None) => None,
580 _ => request.tools.as_ref().map(|tools| {
581 tools
582 .iter()
583 .filter_map(|tool| {
584 tool.function.as_ref().map(|func| {
586 ToolDefinition::function(
587 func.name.clone(),
588 func.description.clone(),
589 func.parameters.clone(),
590 )
591 })
592 })
593 .collect()
594 }),
595 };
596
597 Ok(OllamaChatRequest {
598 model: request.model.clone(),
599 messages,
600 stream,
601 format: request.output_format.clone(),
602 options,
603 tools,
604 think: Self::think_value(request),
605 })
606 }
607
608 fn assistant_thinking_history_text(message: &Message) -> Option<String> {
609 if message.role != MessageRole::Assistant {
610 return None;
611 }
612
613 message
614 .reasoning
615 .as_deref()
616 .map(str::trim)
617 .filter(|value| !value.is_empty())
618 .map(str::to_owned)
619 .or_else(|| {
620 message
621 .reasoning_details
622 .as_deref()
623 .and_then(extract_reasoning_text_from_detail_values)
624 })
625 }
626
627 fn extract_content_and_images(content: &MessageContent) -> (String, Option<Vec<String>>) {
628 let mut images = Vec::new();
629 if let MessageContent::Parts(parts) = content {
630 for part in parts {
631 if let ContentPart::Image { data, .. } = part {
632 images.push(data.clone());
633 }
634 }
635 }
636
637 let text = content.as_text().into_owned();
638 let images = if images.is_empty() {
639 None
640 } else {
641 Some(images)
642 };
643 (text, images)
644 }
645
646 fn think_value(request: &LLMRequest) -> Option<Value> {
647 let model_id = request.model.as_str();
648 if Self::minimax_tool_followup_compat_mode(request) {
649 return None;
650 }
651 if !models::ollama::REASONING_MODELS.contains(&model_id) {
652 return None;
653 }
654
655 if models::ollama::REASONING_LEVEL_MODELS.contains(&model_id) {
656 request
657 .reasoning_effort
658 .map(|effort| Value::String(effort.to_string()))
659 } else {
660 Some(Value::Bool(true))
661 }
662 }
663
664 fn minimax_tool_followup_compat_mode(request: &LLMRequest) -> bool {
665 is_minimax_m2_model(&request.model)
666 && request
667 .messages
668 .iter()
669 .any(|message| message.role == MessageRole::Tool || message.has_tool_calls())
670 }
671
672 fn convert_tool_calls(
673 tool_calls: Option<Vec<OllamaResponseToolCall>>,
674 ) -> Result<Option<Vec<ToolCall>>, LLMError> {
675 let Some(tool_calls) = tool_calls else {
676 return Ok(None);
677 };
678
679 if tool_calls.is_empty() {
680 return Ok(None);
681 }
682
683 let mut converted = Vec::new();
684 for (index, call) in tool_calls.into_iter().enumerate() {
685 let function = call.function.ok_or_else(|| LLMError::Provider {
686 message: "Ollama response missing function details for tool call".to_string(),
687 metadata: None,
688 })?;
689
690 let name = function.name.ok_or_else(|| LLMError::Provider {
691 message: "Ollama response missing tool function name".to_string(),
692 metadata: None,
693 })?;
694
695 let arguments_value = function
696 .arguments
697 .unwrap_or_else(|| Value::Object(Map::new()));
698 let arguments = match arguments_value {
699 Value::String(raw) => raw,
700 other => serde_json::to_string(&other).map_err(|err| LLMError::Provider {
701 message: format!("Failed to serialize Ollama tool arguments: {err}"),
702 metadata: None,
703 })?,
704 };
705
706 let id = function
707 .index
708 .map(|value| format!("tool_call_{value}"))
709 .unwrap_or_else(|| format!("tool_call_{index}"));
710
711 converted.push(ToolCall::function(id, name, arguments));
712 }
713
714 Ok(Some(converted))
715 }
716
717 fn usage_from_counts(
718 prompt_tokens: Option<u32>,
719 completion_tokens: Option<u32>,
720 ) -> Option<Usage> {
721 if prompt_tokens.is_none() && completion_tokens.is_none() {
722 return None;
723 }
724
725 let prompt = prompt_tokens.unwrap_or_default();
726 let completion = completion_tokens.unwrap_or_default();
727 Some(Usage {
728 prompt_tokens: prompt,
729 completion_tokens: completion,
730 total_tokens: prompt + completion,
731 cached_prompt_tokens: None,
732 cache_creation_tokens: None,
733 cache_read_tokens: None,
734 iterations: None,
735 })
736 }
737
738 fn finish_reason_from(reason: Option<&str>) -> FinishReason {
739 match reason {
740 Some("stop") | None => FinishReason::Stop,
741 Some("length") => FinishReason::Length,
742 Some("tool_calls") => FinishReason::ToolCalls,
743 Some(other) => FinishReason::Error(other.to_string()),
744 }
745 }
746
747 fn build_response(
748 content: Option<String>,
749 tool_calls: Option<Vec<ToolCall>>,
750 reasoning: Option<String>,
751 reasoning_details: Option<Vec<String>>,
752 model: String,
753 finish_reason: Option<&str>,
754 prompt_tokens: Option<u32>,
755 completion_tokens: Option<u32>,
756 ) -> LLMResponse {
757 let mut finish = Self::finish_reason_from(finish_reason);
758 if tool_calls.as_ref().is_some_and(|calls| !calls.is_empty()) {
759 finish = FinishReason::ToolCalls;
760 }
761
762 LLMResponse {
763 content,
764 tool_calls,
765 model,
766 usage: Self::usage_from_counts(prompt_tokens, completion_tokens),
767 finish_reason: finish,
768 reasoning,
769 reasoning_details,
770 tool_references: Vec::new(),
771 request_id: None,
772 organization_id: None,
773 compaction: None,
774 }
775 }
776
777 fn response_from_chat_payload(
778 model: String,
779 parsed: OllamaChatResponse,
780 ) -> Result<LLMResponse, LLMError> {
781 if let Some(error) = parsed.error {
782 return Err(LLMError::Provider {
783 message: error,
784 metadata: None,
785 });
786 }
787
788 let (content, reasoning, tool_calls, native_reasoning_details) =
789 if let Some(message) = parsed.message {
790 let content = message
791 .content
792 .and_then(|value| (!value.is_empty()).then_some(value));
793 let reasoning = message
794 .thinking
795 .and_then(|value| (!value.is_empty()).then_some(value));
796 let tool_calls = Self::convert_tool_calls(message.tool_calls)?;
797 let native_reasoning_details = message.reasoning_details.filter(|d| !d.is_empty());
798 (content, reasoning, tool_calls, native_reasoning_details)
799 } else {
800 (None, None, None, None)
801 };
802
803 let reasoning = reasoning.or_else(|| {
804 native_reasoning_details
805 .as_deref()
806 .and_then(extract_reasoning_text_from_detail_values)
807 });
808 let mut reasoning_details = native_reasoning_details
809 .as_deref()
810 .and_then(serialize_reasoning_detail_values);
811
812 let (final_reasoning, final_content) = if reasoning.is_none() {
815 if let Some(ref content_str) = content {
816 let (reasoning_parts, cleaned_content) =
817 crate::llm::utils::extract_reasoning_content(content_str);
818 if reasoning_parts.is_empty() {
819 (None, content)
820 } else {
821 super::common::preserve_interleaved_content_in_reasoning_details(
822 &mut reasoning_details,
823 content_str,
824 );
825 (
826 Some(reasoning_parts.join("\n\n")),
827 cleaned_content.or(content),
828 )
829 }
830 } else {
831 (None, content)
832 }
833 } else {
834 (reasoning, content)
835 };
836
837 Ok(Self::build_response(
838 final_content,
839 tool_calls,
840 final_reasoning,
841 reasoning_details,
842 model,
843 parsed.done_reason.as_deref(),
844 parsed.prompt_eval_count,
845 parsed.eval_count,
846 ))
847 }
848
849 fn authorized_post_with_key(
850 http_client: &HttpClient,
851 url: &str,
852 api_key: Option<&str>,
853 ) -> reqwest::RequestBuilder {
854 let builder = http_client.post(url.to_string());
855 if let Some(value) = api_key {
856 builder.bearer_auth(value)
857 } else {
858 builder
859 }
860 }
861
862 async fn request_non_stream_response(
863 http_client: &HttpClient,
864 url: &str,
865 api_key: Option<&str>,
866 payload: &OllamaChatRequest,
867 model: String,
868 ) -> Result<LLMResponse, LLMError> {
869 let response = Self::authorized_post_with_key(http_client, url, api_key)
870 .json(payload)
871 .send()
872 .await
873 .map_err(|e| format_network_error("Ollama", &e))?;
874
875 if !response.status().is_success() {
876 let status = response.status();
877 let body = response.text().await.unwrap_or_default();
878 let error_message = Self::extract_error(&body)
879 .unwrap_or_else(|| format!("Ollama request failed ({status}): {body}"));
880 return Err(LLMError::Provider {
881 message: error_message,
882 metadata: None,
883 });
884 }
885
886 let parsed = response
887 .json::<OllamaChatResponse>()
888 .await
889 .map_err(|e| format_parse_error("Ollama", &e))?;
890 Self::response_from_chat_payload(model, parsed)
891 }
892
893 fn extract_error(body: &str) -> Option<String> {
894 serde_json::from_str::<OllamaErrorResponse>(body)
895 .ok()
896 .and_then(|resp| resp.error)
897 }
898}
899
900#[derive(Debug, Serialize)]
901struct OllamaChatRequest {
902 model: String,
903 messages: Vec<OllamaChatMessage>,
904 stream: bool,
905 #[serde(skip_serializing_if = "Option::is_none")]
906 format: Option<Value>,
907 #[serde(skip_serializing_if = "Option::is_none")]
908 options: Option<OllamaChatOptions>,
909 #[serde(skip_serializing_if = "Option::is_none")]
910 tools: Option<Vec<ToolDefinition>>,
911 #[serde(skip_serializing_if = "Option::is_none")]
912 think: Option<Value>,
913}
914
915#[derive(Debug, Serialize)]
916struct OllamaChatMessage {
917 role: String,
918 #[serde(skip_serializing_if = "Option::is_none")]
919 content: Option<String>,
920 #[serde(skip_serializing_if = "Option::is_none")]
921 thinking: Option<String>,
922 #[serde(skip_serializing_if = "Option::is_none")]
923 images: Option<Vec<String>>,
924 #[serde(skip_serializing_if = "Option::is_none")]
925 tool_calls: Option<Vec<OllamaToolCall>>,
926 #[serde(skip_serializing_if = "Option::is_none")]
927 tool_call_id: Option<String>,
928 #[serde(skip_serializing_if = "Option::is_none")]
929 tool_name: Option<String>,
930}
931
932#[derive(Debug, Serialize)]
933struct OllamaChatOptions {
934 #[serde(skip_serializing_if = "Option::is_none")]
935 temperature: Option<f32>,
936 #[serde(skip_serializing_if = "Option::is_none")]
937 num_predict: Option<u32>,
938}
939
940#[derive(Debug, Serialize)]
941struct OllamaToolCall {
942 #[serde(rename = "type")]
943 call_type: String,
944 function: OllamaToolFunctionCall,
945}
946
947#[derive(Debug, Serialize)]
948struct OllamaToolFunctionCall {
949 name: String,
950 #[serde(skip_serializing_if = "Option::is_none")]
951 arguments: Option<Value>,
952 #[serde(skip_serializing_if = "Option::is_none")]
953 index: Option<u32>,
954}
955
956#[derive(Debug, Deserialize)]
957struct OllamaChatResponse {
958 message: Option<OllamaResponseMessage>,
959 #[serde(default)]
960 done: bool,
961 #[serde(default)]
962 done_reason: Option<String>,
963 #[serde(default)]
964 prompt_eval_count: Option<u32>,
965 #[serde(default)]
966 eval_count: Option<u32>,
967 #[serde(default)]
968 error: Option<String>,
969}
970
971#[derive(Debug, Deserialize)]
972struct OllamaResponseMessage {
973 #[serde(default)]
974 #[expect(dead_code)]
975 role: Option<String>,
976 #[serde(default)]
977 content: Option<String>,
978 #[serde(default)]
979 thinking: Option<String>,
980 #[serde(default)]
981 reasoning_details: Option<Vec<Value>>,
982 #[serde(default)]
983 tool_calls: Option<Vec<OllamaResponseToolCall>>,
984}
985
986#[derive(Debug, Deserialize, Serialize, Clone)]
987struct OllamaResponseToolCall {
988 #[serde(default)]
989 #[serde(rename = "type")]
990 call_type: Option<String>,
991 #[serde(default)]
992 function: Option<OllamaResponseFunctionCall>,
993}
994
995#[derive(Debug, Deserialize, Serialize, Clone)]
996struct OllamaResponseFunctionCall {
997 #[serde(default)]
998 name: Option<String>,
999 #[serde(default)]
1000 arguments: Option<Value>,
1001 #[serde(default)]
1002 index: Option<u32>,
1003}
1004
1005#[derive(Debug, Deserialize)]
1006struct OllamaErrorResponse {
1007 error: Option<String>,
1008}
1009
1010fn parse_stream_chunk(line: &str) -> Result<OllamaChatResponse, LLMError> {
1011 serde_json::from_str::<OllamaChatResponse>(line).map_err(|err| LLMError::Provider {
1012 message: format!("Failed to parse Ollama stream chunk: {err}"),
1013 metadata: None,
1014 })
1015}
1016
1017#[async_trait]
1018impl LLMProvider for OllamaProvider {
1019 fn name(&self) -> &str {
1020 "ollama"
1021 }
1022
1023 fn supports_streaming(&self) -> bool {
1024 true
1025 }
1026
1027 fn supports_tools(&self, _model: &str) -> bool {
1028 true
1029 }
1030
1031 fn supports_reasoning(&self, model: &str) -> bool {
1032 models::ollama::REASONING_MODELS.contains(&model)
1035 || self
1036 .model_behavior
1037 .as_ref()
1038 .and_then(|b| b.model_supports_reasoning)
1039 .unwrap_or(false)
1040 }
1041
1042 fn supports_reasoning_effort(&self, model: &str) -> bool {
1043 models::ollama::REASONING_LEVEL_MODELS.contains(&model)
1045 || self
1046 .model_behavior
1047 .as_ref()
1048 .and_then(|b| b.model_supports_reasoning_effort)
1049 .unwrap_or(false)
1050 }
1051
1052 async fn generate(&self, mut request: LLMRequest) -> Result<LLMResponse, LLMError> {
1053 self.validate_request(&request)?;
1054 if request.model.is_empty() {
1055 request.model = self.model.clone();
1056 }
1057 let model = request.model.clone();
1058 let payload = self.build_payload(&request, false)?;
1059 let url = self.chat_url();
1060 Self::request_non_stream_response(
1061 &self.http_client,
1062 &url,
1063 self.api_key.as_deref(),
1064 &payload,
1065 model,
1066 )
1067 .await
1068 }
1069
1070 async fn stream(&self, mut request: LLMRequest) -> Result<LLMStream, LLMError> {
1071 self.validate_request(&request)?;
1072 if request.model.is_empty() {
1073 request.model = self.model.clone();
1074 }
1075 let model = request.model.clone();
1076 let payload = self.build_payload(&request, true)?;
1077 let fallback_payload = self.build_payload(&request, false)?;
1078 let url = self.chat_url();
1079
1080 let response = self
1081 .authorized_post(url.clone())
1082 .header(reqwest::header::ACCEPT_ENCODING, "identity")
1083 .json(&payload)
1084 .send()
1085 .await
1086 .map_err(|e| format_network_error("Ollama", &e))?;
1087
1088 if !response.status().is_success() {
1089 let status = response.status();
1090 let body = response.text().await.unwrap_or_default();
1091 let error_message = Self::extract_error(&body)
1092 .unwrap_or_else(|| format!("Ollama streaming request failed ({status}): {body}"));
1093 return Err(LLMError::Provider {
1094 message: error_message,
1095 metadata: None,
1096 });
1097 }
1098
1099 let byte_stream = response.bytes_stream();
1100 let mut buffer: Vec<u8> = Vec::new();
1101 let mut aggregator = crate::llm::providers::shared::StreamAggregator::new(model.clone());
1102 let fallback_http_client = self.http_client.clone();
1103 let fallback_api_key = self.api_key.clone();
1104 let fallback_model = model.clone();
1105 let fallback_url = url.clone();
1106 let any_interleaved = request
1107 .messages
1108 .iter()
1109 .any(|msg| assistant_interleaved_history_text(msg, &request.model).is_some());
1110 let stream = try_stream! {
1111 let mut prompt_tokens: Option<u32> = None;
1112 let mut completion_tokens: Option<u32> = None;
1113 let mut finish_reason: Option<String> = None;
1114 let mut completed = false;
1115 let mut saw_stream_chunk = false;
1116
1117 futures::pin_mut!(byte_stream);
1118 while let Some(chunk_result) = byte_stream.next().await {
1119 let chunk = match chunk_result {
1120 Ok(chunk) => {
1121 saw_stream_chunk = true;
1122 chunk
1123 }
1124 Err(err) if !saw_stream_chunk => {
1125 tracing::warn!(
1126 model = %fallback_model,
1127 url = %fallback_url,
1128 error = %err,
1129 "Ollama stream failed before first chunk; retrying once as non-stream response"
1130 );
1131 let fallback_response = Self::request_non_stream_response(
1132 &fallback_http_client,
1133 &fallback_url,
1134 fallback_api_key.as_deref(),
1135 &fallback_payload,
1136 fallback_model.clone(),
1137 ).await?;
1138 yield LLMStreamEvent::Completed { response: Box::new(fallback_response) };
1139 return;
1140 }
1141 Err(err) => Err(format_network_error("Ollama", &err))?,
1142 };
1143 buffer.extend_from_slice(&chunk);
1144
1145 while let Some(pos) = buffer.iter().position(|b| *b == b'\n') {
1146 let line_bytes: Vec<u8> = buffer.drain(..=pos).collect();
1147 let line = std::str::from_utf8(&line_bytes)
1148 .map_err(|err| LLMError::Provider {
1149 message: format!("Invalid UTF-8 in Ollama stream: {err}"),
1150 metadata: None,
1151 })?;
1152 let line = line.trim();
1153
1154 if line.is_empty() {
1155 continue;
1156 }
1157
1158 let parsed = parse_stream_chunk(line)?;
1159
1160 if let Some(error) = parsed.error {
1161 Err(LLMError::Provider {
1162 message: error,
1163 metadata: None,
1164 })?;
1165 }
1166
1167 if let Some(message) = parsed.message {
1168 if let Some(reasoning_details) = message.reasoning_details.as_deref() {
1169 aggregator.set_reasoning_details(reasoning_details);
1170 }
1171
1172 let has_explicit_thinking = message
1173 .thinking
1174 .as_ref()
1175 .map(|v| !v.is_empty())
1176 .unwrap_or(false);
1177
1178 if let Some(thinking) = message.thinking
1179 && let Some(delta) = aggregator.handle_reasoning(&thinking) {
1180 yield LLMStreamEvent::Reasoning { delta };
1181 }
1182
1183 if let Some(content) = message.content {
1184 for event in aggregator.handle_content(&content) {
1185 match &event {
1186 LLMStreamEvent::Reasoning { .. }
1187 if has_explicit_thinking || any_interleaved =>
1188 {
1189 }
1190 _ => yield event,
1191 }
1192 }
1193 }
1194
1195 if let Some(tool_calls) = message.tool_calls {
1196 let tool_calls_json: Vec<Value> = tool_calls
1197 .into_iter()
1198 .map(|tc| serde_json::to_value(tc).unwrap_or(Value::Null))
1199 .filter(|v| !v.is_null())
1200 .collect();
1201 aggregator.handle_tool_calls(&tool_calls_json);
1202 }
1203 }
1204
1205 if parsed.done {
1206 prompt_tokens = parsed.prompt_eval_count;
1207 completion_tokens = parsed.eval_count;
1208 finish_reason = parsed.done_reason;
1209 completed = true;
1210 }
1211 }
1212
1213 if completed {
1214 break;
1215 }
1216 }
1217
1218 if !completed {
1219 Err(LLMError::Provider {
1220 message: "Ollama stream ended without completion signal".to_string(),
1221 metadata: None,
1222 })?;
1223 }
1224
1225 let mut response = aggregator.finalize();
1226 if let Some(pt) = prompt_tokens {
1227 let mut usage = response.usage.unwrap_or_default();
1228 usage.prompt_tokens = pt;
1229 if let Some(ct) = completion_tokens {
1230 usage.completion_tokens = ct;
1231 usage.total_tokens = pt + ct;
1232 }
1233 response.usage = Some(usage);
1234 }
1235 if let Some(fr) = finish_reason {
1236 response.finish_reason = crate::llm::providers::common::map_finish_reason_common(&fr);
1237 }
1238 if response.reasoning.is_none()
1239 && let Some(details) = response.reasoning_details.as_ref()
1240 {
1241 response.reasoning = extract_reasoning_text_from_serialized_details(details);
1242 }
1243
1244 yield LLMStreamEvent::Completed { response: Box::new(response) };
1245 };
1246
1247 Ok(Box::pin(stream))
1248 }
1249
1250 fn supported_models(&self) -> Vec<String> {
1251 models::ollama::SUPPORTED_MODELS
1252 .iter()
1253 .map(|model| model.to_string())
1254 .collect()
1255 }
1256
1257 fn validate_request(&self, request: &LLMRequest) -> Result<(), LLMError> {
1258 if let Some(tool_choice) = &request.tool_choice {
1259 match tool_choice {
1260 ToolChoice::Auto | ToolChoice::None => {}
1261 _ => {
1262 return Err(LLMError::InvalidRequest {
1263 message: "Ollama does not support explicit tool_choice overrides"
1264 .to_string(),
1265 metadata: None,
1266 });
1267 }
1268 }
1269 }
1270
1271 if request.parallel_tool_calls.is_some() || request.parallel_tool_config.is_some() {
1272 return Err(LLMError::InvalidRequest {
1273 message: "Ollama does not support parallel tool configuration".to_string(),
1274 metadata: None,
1275 });
1276 }
1277
1278 for message in &request.messages {
1279 if matches!(message.role, MessageRole::Tool) && message.tool_call_id.is_none() {
1280 return Err(LLMError::InvalidRequest {
1281 message: "Ollama tool responses must include tool_call_id".to_string(),
1282 metadata: None,
1283 });
1284 }
1285 }
1286
1287 Ok(())
1288 }
1289}
1290
1291#[async_trait]
1292impl LLMClient for OllamaProvider {
1293 async fn generate(&mut self, prompt: &str) -> Result<LLMResponse, LLMError> {
1294 let mut request = self.parse_client_prompt(prompt);
1295 if request.model.is_empty() {
1296 request.model = self.model.clone();
1297 }
1298 Ok(LLMProvider::generate(self, request).await?)
1299 }
1300
1301 fn model_id(&self) -> &str {
1302 &self.model
1303 }
1304}
1305
1306#[cfg(test)]
1307mod tests {
1308 use super::*;
1309 use crate::config::types::ReasoningEffortLevel;
1310 use crate::llm::provider::{ContentPart, Message, MessageContent};
1311 use serde_json::json;
1312
1313 fn test_provider() -> OllamaProvider {
1314 OllamaProvider::from_config(
1315 None,
1316 Some("test-model".to_string()),
1317 Some("http://localhost".to_string()),
1318 None,
1319 None,
1320 None,
1321 None,
1322 )
1323 }
1324
1325 #[test]
1326 fn build_payload_includes_images() {
1327 let provider = test_provider();
1328 let parts = vec![
1329 ContentPart::text("see ".to_string()),
1330 ContentPart::image("BASE64DATA".to_string(), "image/png".to_string()),
1331 ];
1332 let request = LLMRequest {
1333 model: "test-model".to_string(),
1334 messages: vec![Message::user_with_parts(parts)],
1335 ..Default::default()
1336 };
1337
1338 let payload = provider.build_payload(&request, false).unwrap();
1339 assert_eq!(payload.messages.len(), 1);
1340 let message = &payload.messages[0];
1341 assert_eq!(message.content.as_deref(), Some("see "));
1342 assert_eq!(
1343 message.images.as_ref(),
1344 Some(&vec!["BASE64DATA".to_string()])
1345 );
1346 }
1347
1348 #[test]
1349 fn build_payload_omits_images_when_none_present() {
1350 let provider = test_provider();
1351 let content = MessageContent::text("no images".to_string());
1352 let request = LLMRequest {
1353 model: "test-model".to_string(),
1354 messages: vec![Message::user(content.as_text().into_owned())],
1355 ..Default::default()
1356 };
1357
1358 let payload = provider.build_payload(&request, false).unwrap();
1359 assert_eq!(payload.messages.len(), 1);
1360 let message = &payload.messages[0];
1361 assert_eq!(message.content.as_deref(), Some("no images"));
1362 assert!(message.images.is_none());
1363 }
1364
1365 #[test]
1366 fn build_payload_minimax_tool_followup_omits_tool_call_id() {
1367 let provider = test_provider();
1368 let tool_call_id = "direct_run_pty_cmd_1".to_string();
1369 let request = LLMRequest {
1370 model: models::ollama::MINIMAX_M25_CLOUD.to_string(),
1371 messages: vec![
1372 Message::assistant_with_tools(
1373 String::new(),
1374 vec![ToolCall::function(
1375 tool_call_id.clone(),
1376 "run_pty_cmd".to_string(),
1377 "{\"command\":\"cargo fmt\"}".to_string(),
1378 )],
1379 ),
1380 Message::tool_response(
1381 tool_call_id,
1382 "{\"output\":\"\",\"exit_code\":0}".to_string(),
1383 ),
1384 ],
1385 reasoning_effort: Some(ReasoningEffortLevel::Low),
1386 ..Default::default()
1387 };
1388
1389 let payload = provider.build_payload(&request, false).unwrap();
1390 assert_eq!(payload.messages.len(), 2);
1391 assert_eq!(payload.messages[1].role, "tool");
1392 assert_eq!(
1393 payload.messages[1].tool_name.as_deref(),
1394 Some("run_pty_cmd")
1395 );
1396 assert!(payload.messages[1].tool_call_id.is_none());
1397 assert!(payload.think.is_none());
1398 }
1399
1400 #[test]
1401 fn build_payload_non_minimax_tool_followup_keeps_tool_call_id() {
1402 let provider = test_provider();
1403 let tool_call_id = "direct_run_pty_cmd_1".to_string();
1404 let request = LLMRequest {
1405 model: models::ollama::GPT_OSS_20B_CLOUD.to_string(),
1406 messages: vec![
1407 Message::assistant_with_tools(
1408 String::new(),
1409 vec![ToolCall::function(
1410 tool_call_id.clone(),
1411 "run_pty_cmd".to_string(),
1412 "{\"command\":\"cargo fmt\"}".to_string(),
1413 )],
1414 ),
1415 Message::tool_response(
1416 tool_call_id.clone(),
1417 "{\"output\":\"\",\"exit_code\":0}".to_string(),
1418 ),
1419 ],
1420 reasoning_effort: Some(ReasoningEffortLevel::Low),
1421 ..Default::default()
1422 };
1423
1424 let payload = provider.build_payload(&request, false).unwrap();
1425 assert_eq!(payload.messages.len(), 2);
1426 assert_eq!(payload.messages[1].role, "tool");
1427 assert_eq!(
1428 payload.messages[1].tool_name.as_deref(),
1429 Some("run_pty_cmd")
1430 );
1431 assert_eq!(
1432 payload.messages[1].tool_call_id.as_deref(),
1433 Some(tool_call_id.as_str())
1434 );
1435 assert_eq!(payload.think, Some(Value::String("low".to_string())));
1436 }
1437
1438 #[test]
1439 fn build_payload_hoists_history_system_directives_into_system_prompt() {
1440 let provider = test_provider();
1441 let request = LLMRequest {
1442 model: models::ollama::MINIMAX_M25_CLOUD.to_string(),
1443 system_prompt: Some(std::sync::Arc::new(
1444 "stable system instructions".to_string(),
1445 )),
1446 messages: vec![
1447 Message::user("explore architecture".to_string()),
1448 Message::system(
1449 "Previous turn already completed tool execution. Reuse the latest tool outputs in history instead of rerunning the same exploration.".to_string(),
1450 ),
1451 ],
1452 ..Default::default()
1453 };
1454
1455 let payload = provider.build_payload(&request, false).unwrap();
1456 assert_eq!(payload.messages.len(), 2);
1457 assert_eq!(payload.messages[0].role, "system");
1458 assert!(
1459 payload.messages[0]
1460 .content
1461 .as_deref()
1462 .unwrap_or("")
1463 .contains("stable system instructions")
1464 );
1465 assert!(
1466 payload.messages[0]
1467 .content
1468 .as_deref()
1469 .unwrap_or("")
1470 .contains("[History Directives]")
1471 );
1472 assert!(
1473 payload.messages[0]
1474 .content
1475 .as_deref()
1476 .unwrap_or("")
1477 .contains("Previous turn already completed tool execution")
1478 );
1479 assert_eq!(payload.messages[1].role, "user");
1480 assert_eq!(
1481 payload.messages[1].content.as_deref(),
1482 Some("explore architecture")
1483 );
1484 }
1485
1486 #[test]
1487 fn build_payload_promotes_history_system_directive_without_base_system_prompt() {
1488 let provider = test_provider();
1489 let request = LLMRequest {
1490 model: models::ollama::MINIMAX_M25_CLOUD.to_string(),
1491 messages: vec![
1492 Message::system(
1493 "Repeated read-only exploration hit the per-turn family cap. Scheduling a final recovery pass without more tools.".to_string(),
1494 ),
1495 Message::user("summarize the architecture".to_string()),
1496 ],
1497 ..Default::default()
1498 };
1499
1500 let payload = provider.build_payload(&request, false).unwrap();
1501 assert_eq!(payload.messages.len(), 2);
1502 assert_eq!(payload.messages[0].role, "system");
1503 assert!(
1504 payload.messages[0]
1505 .content
1506 .as_deref()
1507 .unwrap_or("")
1508 .contains("[History Directives]")
1509 );
1510 assert!(
1511 payload.messages[0]
1512 .content
1513 .as_deref()
1514 .unwrap_or("")
1515 .contains("Repeated read-only exploration hit the per-turn family cap")
1516 );
1517 assert_eq!(payload.messages[1].role, "user");
1518 }
1519
1520 #[test]
1521 fn build_payload_recovers_balanced_prefix_from_malformed_history_tool_arguments() {
1522 let provider = test_provider();
1523 let request = LLMRequest {
1524 model: "test-model".to_string(),
1525 messages: vec![Message::assistant_with_tools(
1526 String::new(),
1527 vec![ToolCall::function(
1528 "tool_call_0".to_string(),
1529 "unified_file".to_string(),
1530 "{\"action\":\"read\",\"path\":\"docs/ARCHITECTURE.md\",\"offset\":1,\"limit\":100}{\"action\":\"read\",\"path\":\"README.md\"}"
1531 .to_string(),
1532 )],
1533 )],
1534 ..Default::default()
1535 };
1536
1537 let payload = provider
1538 .build_payload(&request, false)
1539 .expect("payload should recover malformed history tool arguments");
1540
1541 let tool_calls = payload.messages[0]
1542 .tool_calls
1543 .as_ref()
1544 .expect("tool calls should be present");
1545 assert_eq!(tool_calls.len(), 1);
1546 assert_eq!(
1547 tool_calls[0].function.arguments,
1548 Some(json!({
1549 "action": "read",
1550 "path": "docs/ARCHITECTURE.md",
1551 "offset": 1,
1552 "limit": 100
1553 }))
1554 );
1555 }
1556
1557 #[test]
1558 fn build_payload_rehydrates_glm_interleaved_history_into_content() {
1559 let provider = test_provider();
1560 let request = LLMRequest {
1561 model: models::ollama::GLM_5_CLOUD.to_string(),
1562 messages: vec![
1563 Message::assistant("done".to_string()).with_reasoning(Some("trace".to_string())),
1564 ],
1565 ..Default::default()
1566 };
1567
1568 let payload = provider.build_payload(&request, false).unwrap();
1569
1570 assert_eq!(
1571 payload.messages[0].content.as_deref(),
1572 Some("<think>trace</think>done")
1573 );
1574 assert!(payload.messages[0].thinking.is_none());
1575 }
1576
1577 #[test]
1578 fn build_payload_replays_assistant_reasoning_as_ollama_thinking() {
1579 let provider = test_provider();
1580 let request = LLMRequest {
1581 model: models::ollama::GPT_OSS_20B.to_string(),
1582 messages: vec![
1583 Message::assistant("need a tool".to_string())
1584 .with_reasoning(Some("reasoning trace".to_string())),
1585 ],
1586 ..Default::default()
1587 };
1588
1589 let payload = provider.build_payload(&request, false).unwrap();
1590
1591 assert_eq!(payload.messages[0].content.as_deref(), Some("need a tool"));
1592 assert_eq!(
1593 payload.messages[0].thinking.as_deref(),
1594 Some("reasoning trace")
1595 );
1596 }
1597
1598 #[test]
1599 fn build_payload_includes_apply_patch_as_normal_tool() {
1600 let provider = test_provider();
1601 let request = LLMRequest {
1602 model: "test-model".to_string(),
1603 messages: vec![Message::user("patch this file".to_string())],
1604 tools: Some(std::sync::Arc::new(vec![ToolDefinition::apply_patch(
1605 "Apply VT Code patches".to_string(),
1606 )])),
1607 ..Default::default()
1608 };
1609
1610 let payload = provider.build_payload(&request, false).unwrap();
1611 let tools = payload.tools.expect("tools should be present");
1612 assert_eq!(tools.len(), 1);
1613 assert_eq!(tools[0].function_name(), "apply_patch");
1614 }
1615
1616 #[test]
1617 fn response_payload_preserves_reasoning_details() {
1618 let parsed = OllamaChatResponse {
1619 message: Some(OllamaResponseMessage {
1620 role: Some("assistant".to_string()),
1621 content: Some("answer".to_string()),
1622 thinking: None,
1623 reasoning_details: Some(vec![json!({
1624 "type": "reasoning.text",
1625 "text": "step one"
1626 })]),
1627 tool_calls: None,
1628 }),
1629 done: true,
1630 done_reason: Some("stop".to_string()),
1631 prompt_eval_count: Some(1),
1632 eval_count: Some(2),
1633 error: None,
1634 };
1635
1636 let response = OllamaProvider::response_from_chat_payload("test-model".to_string(), parsed)
1637 .expect("response should parse");
1638 assert_eq!(response.reasoning.as_deref(), Some("step one"));
1639 assert!(response.reasoning_details.is_some());
1640
1641 let first_detail = response
1642 .reasoning_details
1643 .as_ref()
1644 .and_then(|details| details.first())
1645 .expect("reasoning detail should exist");
1646 let parsed_detail: Value =
1647 serde_json::from_str(first_detail).expect("reasoning detail should be json");
1648 assert_eq!(parsed_detail["type"], "reasoning.text");
1649 }
1650
1651 #[test]
1652 fn tags_response_accepts_partial_model_summaries() {
1653 let parsed: OllamaTagsResponse = serde_json::from_value(json!({
1654 "models": [
1655 { "model": "qwen3:8b" }
1656 ]
1657 }))
1658 .expect("partial model summaries should parse");
1659
1660 let names: Vec<String> = parsed
1661 .models
1662 .into_iter()
1663 .filter_map(|model| model.name.or(model.model))
1664 .collect();
1665 assert_eq!(names, vec!["qwen3:8b".to_string()]);
1666 }
1667
1668 #[test]
1669 fn wire_api_responses_for_dev_build() {
1670 assert_eq!(
1671 wire_api_for_version(&Version::new(0, 0, 0)),
1672 OllamaWireApi::Responses,
1673 );
1674 }
1675
1676 #[test]
1677 fn wire_api_responses_for_exact_threshold() {
1678 assert_eq!(
1679 wire_api_for_version(&Version::new(0, 13, 3)),
1680 OllamaWireApi::Responses,
1681 );
1682 }
1683
1684 #[test]
1685 fn wire_api_responses_for_above_threshold() {
1686 assert_eq!(
1687 wire_api_for_version(&Version::new(0, 14, 0)),
1688 OllamaWireApi::Responses,
1689 );
1690 assert_eq!(
1691 wire_api_for_version(&Version::new(1, 0, 0)),
1692 OllamaWireApi::Responses,
1693 );
1694 }
1695
1696 #[test]
1697 fn wire_api_chat_for_below_threshold() {
1698 assert_eq!(
1699 wire_api_for_version(&Version::new(0, 13, 2)),
1700 OllamaWireApi::Chat,
1701 );
1702 assert_eq!(
1703 wire_api_for_version(&Version::new(0, 12, 0)),
1704 OllamaWireApi::Chat,
1705 );
1706 assert_eq!(
1707 wire_api_for_version(&Version::new(0, 1, 0)),
1708 OllamaWireApi::Chat,
1709 );
1710 }
1711}