Skip to main content

rig_core/providers/
ollama.rs

1//! Ollama API client and Rig integration
2//!
3//! # Example
4//! ```rust,ignore
5//! use rig_core::client::{Nothing, CompletionClient};
6//! use rig_core::completion::Prompt;
7//! use rig_core::providers::ollama;
8//!
9//! // Create a new Ollama client (defaults to http://localhost:11434, no auth)
10//! let client = ollama::Client::new(Nothing).unwrap();
11//!
12//! // Or connect to a remote/proxied Ollama instance with authentication
13//! let client = ollama::Client::builder()
14//!     .api_key("my-secret-key")
15//!     .base_url("http://remote-ollama:11434")
16//!     .build()
17//!     .unwrap();
18//!
19//! // Create an agent with a preamble
20//! let comedian_agent = client
21//!     .agent("qwen2.5:14b")
22//!     .preamble("You are a comedian here to entertain the user using humour and jokes.")
23//!     .build();
24//!
25//! // Prompt the agent and print the response
26//! let response = comedian_agent.prompt("Entertain me!").await?;
27//! println!("{response}");
28//!
29//! // Create an embedding model using the "all-minilm" model
30//! let emb_model = client.embedding_model("all-minilm", 384);
31//! let embeddings = emb_model.embed_texts(vec![
32//!     "Why is the sky blue?".to_owned(),
33//!     "Why is the grass green?".to_owned()
34//! ]).await?;
35//! println!("Embedding response: {:?}", embeddings);
36//!
37//! // Create an extractor if needed
38//! let extractor = client.extractor::<serde_json::Value>("llama3.2").build();
39//! ```
40use crate::client::{
41    self, ApiKey, Capabilities, Capable, DebugExt, ModelLister, Nothing, Provider, ProviderBuilder,
42    ProviderClient,
43};
44use crate::completion::{GetTokenUsage, Usage};
45use crate::http_client::{self, HttpClientExt};
46use crate::message::DocumentSourceKind;
47use crate::model::{Model, ModelList, ModelListingError};
48use crate::streaming::RawStreamingChoice;
49use crate::{
50    OneOrMany,
51    completion::{self, CompletionError, CompletionRequest},
52    embeddings::{self, EmbeddingError},
53    json_utils, message,
54    message::{ImageDetail, Text},
55    streaming,
56    wasm_compat::{WasmCompatSend, WasmCompatSync},
57};
58use async_stream::try_stream;
59use bytes::Bytes;
60use futures::StreamExt;
61use serde::{Deserialize, Serialize};
62use serde_json::{Value, json};
63use std::{convert::TryFrom, str::FromStr};
64use tracing::info_span;
65use tracing_futures::Instrument;
66// ---------- Main Client ----------
67
68const OLLAMA_API_BASE_URL: &str = "http://localhost:11434";
69
70/// Optional API key for Ollama. By default Ollama requires no authentication,
71/// but proxied or secured deployments may require a Bearer token.
72#[derive(Debug, Default, Clone)]
73pub struct OllamaApiKey(Option<String>);
74
75impl ApiKey for OllamaApiKey {
76    fn into_header(
77        self,
78    ) -> Option<http_client::Result<(http::header::HeaderName, http::header::HeaderValue)>> {
79        self.0.map(http_client::make_auth_header)
80    }
81}
82
83impl From<Nothing> for OllamaApiKey {
84    fn from(_: Nothing) -> Self {
85        Self(None)
86    }
87}
88
89impl From<String> for OllamaApiKey {
90    fn from(key: String) -> Self {
91        if key.is_empty() {
92            Self(None)
93        } else {
94            Self(Some(key))
95        }
96    }
97}
98
99impl From<&str> for OllamaApiKey {
100    fn from(key: &str) -> Self {
101        if key.is_empty() {
102            Self(None)
103        } else {
104            Self(Some(key.to_owned()))
105        }
106    }
107}
108
109#[derive(Debug, Default, Clone, Copy)]
110pub struct OllamaExt;
111
112#[derive(Debug, Default, Clone, Copy)]
113pub struct OllamaBuilder;
114
115impl Provider for OllamaExt {
116    type Builder = OllamaBuilder;
117    const VERIFY_PATH: &'static str = "api/tags";
118}
119
120impl<H> Capabilities<H> for OllamaExt {
121    type Completion = Capable<CompletionModel<H>>;
122    type Transcription = Nothing;
123    type Embeddings = Capable<EmbeddingModel<H>>;
124    type ModelListing = Capable<OllamaModelLister<H>>;
125    #[cfg(feature = "image")]
126    type ImageGeneration = Nothing;
127
128    #[cfg(feature = "audio")]
129    type AudioGeneration = Nothing;
130}
131
132impl DebugExt for OllamaExt {}
133
134impl ProviderBuilder for OllamaBuilder {
135    type Extension<H>
136        = OllamaExt
137    where
138        H: HttpClientExt;
139    type ApiKey = OllamaApiKey;
140
141    const BASE_URL: &'static str = OLLAMA_API_BASE_URL;
142
143    fn build<H>(
144        _builder: &client::ClientBuilder<Self, Self::ApiKey, H>,
145    ) -> http_client::Result<Self::Extension<H>>
146    where
147        H: HttpClientExt,
148    {
149        Ok(OllamaExt)
150    }
151}
152
153pub type Client<H = reqwest::Client> = client::Client<OllamaExt, H>;
154pub type ClientBuilder<H = crate::markers::Missing> =
155    client::ClientBuilder<OllamaBuilder, OllamaApiKey, H>;
156
157impl ProviderClient for Client {
158    type Input = OllamaApiKey;
159    type Error = crate::client::ProviderClientError;
160
161    fn from_env() -> Result<Self, Self::Error> {
162        let api_base = crate::client::optional_env_var("OLLAMA_API_BASE_URL")?
163            .unwrap_or_else(|| OLLAMA_API_BASE_URL.to_string());
164
165        let api_key = crate::client::optional_env_var("OLLAMA_API_KEY")?
166            .map(OllamaApiKey::from)
167            .unwrap_or_default();
168
169        Self::builder()
170            .api_key(api_key)
171            .base_url(&api_base)
172            .build()
173            .map_err(Into::into)
174    }
175
176    fn from_val(api_key: Self::Input) -> Result<Self, Self::Error> {
177        Self::builder().api_key(api_key).build().map_err(Into::into)
178    }
179}
180
181// ---------- API Error and Response Structures ----------
182
183#[derive(Debug, Deserialize)]
184struct ApiErrorResponse {
185    message: String,
186}
187
188#[derive(Debug, Deserialize)]
189#[serde(untagged)]
190enum ApiResponse<T> {
191    Ok(T),
192    Err(ApiErrorResponse),
193}
194
195// ---------- Embedding API ----------
196
197pub const ALL_MINILM: &str = "all-minilm";
198pub const NOMIC_EMBED_TEXT: &str = "nomic-embed-text";
199
200fn model_dimensions_from_identifier(identifier: &str) -> Option<usize> {
201    match identifier {
202        ALL_MINILM => Some(384),
203        NOMIC_EMBED_TEXT => Some(768),
204        _ => None,
205    }
206}
207
208#[derive(Debug, Serialize, Deserialize)]
209pub struct EmbeddingResponse {
210    pub model: String,
211    pub embeddings: Vec<Vec<f64>>,
212    #[serde(default)]
213    pub total_duration: Option<u64>,
214    #[serde(default)]
215    pub load_duration: Option<u64>,
216    #[serde(default)]
217    pub prompt_eval_count: Option<u64>,
218}
219
220impl From<ApiErrorResponse> for EmbeddingError {
221    fn from(err: ApiErrorResponse) -> Self {
222        EmbeddingError::ProviderError(err.message)
223    }
224}
225
226impl From<ApiResponse<EmbeddingResponse>> for Result<EmbeddingResponse, EmbeddingError> {
227    fn from(value: ApiResponse<EmbeddingResponse>) -> Self {
228        match value {
229            ApiResponse::Ok(response) => Ok(response),
230            ApiResponse::Err(err) => Err(EmbeddingError::ProviderError(err.message)),
231        }
232    }
233}
234
235// ---------- Embedding Model ----------
236
237#[derive(Clone)]
238pub struct EmbeddingModel<T = reqwest::Client> {
239    client: Client<T>,
240    pub model: String,
241    ndims: usize,
242}
243
244impl<T> EmbeddingModel<T> {
245    pub fn new(client: Client<T>, model: impl Into<String>, ndims: usize) -> Self {
246        Self {
247            client,
248            model: model.into(),
249            ndims,
250        }
251    }
252
253    pub fn with_model(client: Client<T>, model: &str, ndims: usize) -> Self {
254        Self {
255            client,
256            model: model.into(),
257            ndims,
258        }
259    }
260}
261
262impl<T> embeddings::EmbeddingModel for EmbeddingModel<T>
263where
264    T: HttpClientExt + Clone + 'static,
265{
266    type Client = Client<T>;
267
268    fn make(client: &Self::Client, model: impl Into<String>, dims: Option<usize>) -> Self {
269        let model = model.into();
270        let dims = dims
271            .or(model_dimensions_from_identifier(&model))
272            .unwrap_or_default();
273        Self::new(client.clone(), model, dims)
274    }
275
276    const MAX_DOCUMENTS: usize = 1024;
277    fn ndims(&self) -> usize {
278        self.ndims
279    }
280
281    async fn embed_texts(
282        &self,
283        documents: impl IntoIterator<Item = String>,
284    ) -> Result<Vec<embeddings::Embedding>, EmbeddingError> {
285        let docs: Vec<String> = documents.into_iter().collect();
286
287        let body = serde_json::to_vec(&json!({
288            "model": self.model,
289            "input": docs
290        }))?;
291
292        let req = self
293            .client
294            .post("api/embed")?
295            .body(body)
296            .map_err(|e| EmbeddingError::HttpError(e.into()))?;
297
298        let response = self.client.send::<_, Vec<u8>>(req).await?;
299
300        if !response.status().is_success() {
301            let text = http_client::text(response).await?;
302            return Err(EmbeddingError::ProviderError(text));
303        }
304
305        let bytes: Vec<u8> = response.into_body().await?;
306
307        let api_resp: EmbeddingResponse = serde_json::from_slice(&bytes)?;
308
309        if api_resp.embeddings.len() != docs.len() {
310            return Err(EmbeddingError::ResponseError(
311                "Number of returned embeddings does not match input".into(),
312            ));
313        }
314        Ok(api_resp
315            .embeddings
316            .into_iter()
317            .zip(docs.into_iter())
318            .map(|(vec, document)| embeddings::Embedding { document, vec })
319            .collect())
320    }
321}
322
323// ---------- Completion API ----------
324
325pub const LLAMA3_2: &str = "llama3.2";
326pub const LLAVA: &str = "llava";
327pub const MISTRAL: &str = "mistral";
328
329#[derive(Debug, Serialize, Deserialize)]
330pub struct CompletionResponse {
331    pub model: String,
332    pub created_at: String,
333    pub message: Message,
334    pub done: bool,
335    #[serde(default)]
336    pub done_reason: Option<String>,
337    #[serde(default)]
338    pub total_duration: Option<u64>,
339    #[serde(default)]
340    pub load_duration: Option<u64>,
341    #[serde(default)]
342    pub prompt_eval_count: Option<u64>,
343    #[serde(default)]
344    pub prompt_eval_duration: Option<u64>,
345    #[serde(default)]
346    pub eval_count: Option<u64>,
347    #[serde(default)]
348    pub eval_duration: Option<u64>,
349}
350impl TryFrom<CompletionResponse> for completion::CompletionResponse<CompletionResponse> {
351    type Error = CompletionError;
352    fn try_from(resp: CompletionResponse) -> Result<Self, Self::Error> {
353        match resp.message {
354            // Process only if an assistant message is present.
355            Message::Assistant {
356                content,
357                thinking,
358                tool_calls,
359                ..
360            } => {
361                let mut assistant_contents = Vec::new();
362                // Add the assistant's text content if any.
363                if !content.is_empty() {
364                    assistant_contents.push(completion::AssistantContent::text(&content));
365                }
366                // Process tool_calls following Ollama's chat response definition.
367                // Each ToolCall has an id, a type, and a function field.
368                for tc in tool_calls.iter() {
369                    assistant_contents.push(completion::AssistantContent::tool_call(
370                        tc.function.name.clone(),
371                        tc.function.name.clone(),
372                        tc.function.arguments.clone(),
373                    ));
374                }
375                let choice = OneOrMany::many(assistant_contents).map_err(|_| {
376                    CompletionError::ResponseError("No content provided".to_owned())
377                })?;
378                let prompt_tokens = resp.prompt_eval_count.unwrap_or(0);
379                let completion_tokens = resp.eval_count.unwrap_or(0);
380
381                let raw_response = CompletionResponse {
382                    model: resp.model,
383                    created_at: resp.created_at,
384                    done: resp.done,
385                    done_reason: resp.done_reason,
386                    total_duration: resp.total_duration,
387                    load_duration: resp.load_duration,
388                    prompt_eval_count: resp.prompt_eval_count,
389                    prompt_eval_duration: resp.prompt_eval_duration,
390                    eval_count: resp.eval_count,
391                    eval_duration: resp.eval_duration,
392                    message: Message::Assistant {
393                        content,
394                        thinking,
395                        images: None,
396                        name: None,
397                        tool_calls,
398                    },
399                };
400
401                Ok(completion::CompletionResponse {
402                    choice,
403                    usage: Usage {
404                        input_tokens: prompt_tokens,
405                        output_tokens: completion_tokens,
406                        total_tokens: prompt_tokens + completion_tokens,
407                        cached_input_tokens: 0,
408                        cache_creation_input_tokens: 0,
409                        reasoning_tokens: 0,
410                    },
411                    raw_response,
412                    message_id: None,
413                })
414            }
415            _ => Err(CompletionError::ResponseError(
416                "Chat response does not include an assistant message".into(),
417            )),
418        }
419    }
420}
421
422#[derive(Debug, Serialize, Deserialize)]
423pub(super) struct OllamaCompletionRequest {
424    model: String,
425    pub messages: Vec<Message>,
426    #[serde(skip_serializing_if = "Option::is_none")]
427    temperature: Option<f64>,
428    #[serde(skip_serializing_if = "Vec::is_empty")]
429    tools: Vec<ToolDefinition>,
430    pub stream: bool,
431    think: Think,
432    #[serde(skip_serializing_if = "Option::is_none")]
433    max_tokens: Option<u64>,
434    #[serde(skip_serializing_if = "Option::is_none")]
435    keep_alive: Option<String>,
436    #[serde(skip_serializing_if = "Option::is_none")]
437    format: Option<schemars::Schema>,
438    options: serde_json::Value,
439}
440
441impl TryFrom<(&str, CompletionRequest)> for OllamaCompletionRequest {
442    type Error = CompletionError;
443
444    fn try_from((model, req): (&str, CompletionRequest)) -> Result<Self, Self::Error> {
445        let model = req.model.clone().unwrap_or_else(|| model.to_string());
446        if req.tool_choice.is_some() {
447            tracing::warn!("WARNING: `tool_choice` not supported for Ollama");
448        }
449        // Build up the order of messages (context, chat_history, prompt)
450        let mut partial_history = vec![];
451        if let Some(docs) = req.normalized_documents() {
452            partial_history.push(docs);
453        }
454        partial_history.extend(req.chat_history);
455
456        // Add preamble to chat history (if available)
457        let mut full_history: Vec<Message> = match &req.preamble {
458            Some(preamble) => vec![Message::system(preamble)],
459            None => vec![],
460        };
461
462        // Convert and extend the rest of the history
463        full_history.extend(
464            partial_history
465                .into_iter()
466                .map(message::Message::try_into)
467                .collect::<Result<Vec<Vec<Message>>, _>>()?
468                .into_iter()
469                .flatten()
470                .collect::<Vec<_>>(),
471        );
472
473        let mut think = Think::Bool(false);
474        let mut keep_alive: Option<String> = None;
475
476        let options = if let Some(mut extra) = req.additional_params {
477            // Extract top-level parameters that should not be in `options`
478            if let Some(obj) = extra.as_object_mut() {
479                // Extract `think` parameter
480                if let Some(think_val) = obj.remove("think") {
481                    think = match think_val {
482                        Value::Bool(think) => Think::Bool(think),
483                        Value::String(think) => Think::Level(match think.to_lowercase().as_str() {
484                            "low" => Level::Low,
485                            "medium" => Level::Medium,
486                            "high" => Level::High,
487                            _ => {
488                                return Err(CompletionError::RequestError(
489                                    "`think` must be a 'low', 'medium', 'high', or bool".into(),
490                                ));
491                            }
492                        }),
493                        _ => {
494                            return Err(CompletionError::RequestError(
495                                "`think` must be a 'low', 'medium', 'high', or bool".into(),
496                            ));
497                        }
498                    };
499                }
500
501                // Extract `keep_alive` parameter
502                if let Some(keep_alive_val) = obj.remove("keep_alive") {
503                    keep_alive = Some(
504                        keep_alive_val
505                            .as_str()
506                            .ok_or_else(|| {
507                                CompletionError::RequestError(
508                                    "`keep_alive` must be a string".into(),
509                                )
510                            })?
511                            .to_string(),
512                    );
513                }
514            }
515
516            json_utils::merge(json!({ "temperature": req.temperature }), extra)
517        } else {
518            json!({ "temperature": req.temperature })
519        };
520
521        Ok(Self {
522            model: model.to_string(),
523            messages: full_history,
524            temperature: req.temperature,
525            max_tokens: req.max_tokens,
526            stream: false,
527            think,
528            keep_alive,
529            format: req.output_schema,
530            tools: req
531                .tools
532                .clone()
533                .into_iter()
534                .map(ToolDefinition::from)
535                .collect::<Vec<_>>(),
536            options,
537        })
538    }
539}
540
541#[derive(Clone)]
542pub struct CompletionModel<T = reqwest::Client> {
543    client: Client<T>,
544    pub model: String,
545}
546
547impl<T> CompletionModel<T> {
548    pub fn new(client: Client<T>, model: &str) -> Self {
549        Self {
550            client,
551            model: model.to_owned(),
552        }
553    }
554}
555
556#[derive(Debug, Clone, Serialize, Deserialize)]
557#[serde(untagged)]
558enum Think {
559    Bool(bool),
560    Level(Level),
561}
562
563#[derive(Debug, Clone, Serialize, Deserialize)]
564#[serde(rename_all = "lowercase")]
565enum Level {
566    Low,
567    Medium,
568    High,
569}
570
571// ---------- CompletionModel Implementation ----------
572
573#[derive(Clone, Serialize, Deserialize, Debug)]
574pub struct StreamingCompletionResponse {
575    pub done_reason: Option<String>,
576    pub total_duration: Option<u64>,
577    pub load_duration: Option<u64>,
578    pub prompt_eval_count: Option<u64>,
579    pub prompt_eval_duration: Option<u64>,
580    pub eval_count: Option<u64>,
581    pub eval_duration: Option<u64>,
582}
583
584impl GetTokenUsage for StreamingCompletionResponse {
585    fn token_usage(&self) -> Option<crate::completion::Usage> {
586        let mut usage = crate::completion::Usage::new();
587        let input_tokens = self.prompt_eval_count.unwrap_or_default();
588        let output_tokens = self.eval_count.unwrap_or_default();
589        usage.input_tokens = input_tokens;
590        usage.output_tokens = output_tokens;
591        usage.total_tokens = input_tokens + output_tokens;
592
593        Some(usage)
594    }
595}
596
597impl<T> completion::CompletionModel for CompletionModel<T>
598where
599    T: HttpClientExt + Clone + Default + std::fmt::Debug + Send + 'static,
600{
601    type Response = CompletionResponse;
602    type StreamingResponse = StreamingCompletionResponse;
603
604    type Client = Client<T>;
605
606    fn make(client: &Self::Client, model: impl Into<String>) -> Self {
607        Self::new(client.clone(), model.into().as_str())
608    }
609
610    async fn completion(
611        &self,
612        completion_request: CompletionRequest,
613    ) -> Result<completion::CompletionResponse<Self::Response>, CompletionError> {
614        let span = if tracing::Span::current().is_disabled() {
615            info_span!(
616                target: "rig::completions",
617                "chat",
618                gen_ai.operation.name = "chat",
619                gen_ai.provider.name = "ollama",
620                gen_ai.request.model = self.model,
621                gen_ai.system_instructions = tracing::field::Empty,
622                gen_ai.response.id = tracing::field::Empty,
623                gen_ai.response.model = tracing::field::Empty,
624                gen_ai.usage.output_tokens = tracing::field::Empty,
625                gen_ai.usage.input_tokens = tracing::field::Empty,
626                gen_ai.usage.cache_read.input_tokens = tracing::field::Empty,
627            )
628        } else {
629            tracing::Span::current()
630        };
631
632        span.record("gen_ai.system_instructions", &completion_request.preamble);
633        let request = OllamaCompletionRequest::try_from((self.model.as_ref(), completion_request))?;
634
635        if tracing::enabled!(tracing::Level::TRACE) {
636            tracing::trace!(target: "rig::completions",
637                "Ollama completion request: {}",
638                serde_json::to_string_pretty(&request)?
639            );
640        }
641
642        let body = serde_json::to_vec(&request)?;
643
644        let req = self
645            .client
646            .post("api/chat")?
647            .body(body)
648            .map_err(http_client::Error::from)?;
649
650        let async_block = async move {
651            let response = self.client.send::<_, Bytes>(req).await?;
652            let status = response.status();
653            let response_body = response.into_body().into_future().await?.to_vec();
654
655            if !status.is_success() {
656                return Err(CompletionError::ProviderError(
657                    String::from_utf8_lossy(&response_body).to_string(),
658                ));
659            }
660
661            let response: CompletionResponse = serde_json::from_slice(&response_body)?;
662            let span = tracing::Span::current();
663            span.record("gen_ai.response.model", &response.model);
664            span.record(
665                "gen_ai.usage.input_tokens",
666                response.prompt_eval_count.unwrap_or_default(),
667            );
668            span.record(
669                "gen_ai.usage.output_tokens",
670                response.eval_count.unwrap_or_default(),
671            );
672
673            if tracing::enabled!(tracing::Level::TRACE) {
674                tracing::trace!(target: "rig::completions",
675                    "Ollama completion response: {}",
676                    serde_json::to_string_pretty(&response)?
677                );
678            }
679
680            let response: completion::CompletionResponse<CompletionResponse> =
681                response.try_into()?;
682
683            Ok(response)
684        };
685
686        tracing::Instrument::instrument(async_block, span).await
687    }
688
689    async fn stream(
690        &self,
691        request: CompletionRequest,
692    ) -> Result<streaming::StreamingCompletionResponse<Self::StreamingResponse>, CompletionError>
693    {
694        let span = if tracing::Span::current().is_disabled() {
695            info_span!(
696                target: "rig::completions",
697                "chat_streaming",
698                gen_ai.operation.name = "chat_streaming",
699                gen_ai.provider.name = "ollama",
700                gen_ai.request.model = self.model,
701                gen_ai.system_instructions = tracing::field::Empty,
702                gen_ai.response.id = tracing::field::Empty,
703                gen_ai.response.model = self.model,
704                gen_ai.usage.output_tokens = tracing::field::Empty,
705                gen_ai.usage.input_tokens = tracing::field::Empty,
706                gen_ai.usage.cache_read.input_tokens = tracing::field::Empty,
707            )
708        } else {
709            tracing::Span::current()
710        };
711
712        span.record("gen_ai.system_instructions", &request.preamble);
713
714        let mut request = OllamaCompletionRequest::try_from((self.model.as_ref(), request))?;
715        request.stream = true;
716
717        if tracing::enabled!(tracing::Level::TRACE) {
718            tracing::trace!(target: "rig::completions",
719                "Ollama streaming completion request: {}",
720                serde_json::to_string_pretty(&request)?
721            );
722        }
723
724        let body = serde_json::to_vec(&request)?;
725
726        let req = self
727            .client
728            .post("api/chat")?
729            .body(body)
730            .map_err(http_client::Error::from)?;
731
732        let response = self.client.send_streaming(req).await?;
733        let status = response.status();
734        let mut byte_stream = response.into_body();
735
736        if !status.is_success() {
737            return Err(CompletionError::ProviderError(format!(
738                "Got error status code trying to send a request to Ollama: {status}"
739            )));
740        }
741
742        let stream = try_stream! {
743            let span = tracing::Span::current();
744            let mut tool_calls_final = Vec::new();
745            let mut text_response = String::new();
746            let mut thinking_response = String::new();
747
748            while let Some(chunk) = byte_stream.next().await {
749                let bytes = chunk.map_err(|e| http_client::Error::Instance(e.into()))?;
750
751                for line in bytes.split(|&b| b == b'\n') {
752                    if line.is_empty() {
753                        continue;
754                    }
755
756                    tracing::debug!(target: "rig", "Received NDJSON line from Ollama: {}", String::from_utf8_lossy(line));
757
758                    let response: CompletionResponse = serde_json::from_slice(line)?;
759
760                    if let Message::Assistant { content, thinking, tool_calls, .. } = response.message {
761                        if let Some(thinking_content) = thinking && !thinking_content.is_empty() {
762                            thinking_response += &thinking_content;
763                            yield RawStreamingChoice::ReasoningDelta {
764                                id: None,
765                                reasoning: thinking_content,
766                            };
767                        }
768
769                        if !content.is_empty() {
770                            text_response += &content;
771                            yield RawStreamingChoice::Message(content);
772                        }
773
774                        for tool_call in tool_calls {
775                            tool_calls_final.push(tool_call.clone());
776                            yield RawStreamingChoice::ToolCall(
777                                crate::streaming::RawStreamingToolCall::new(String::new(), tool_call.function.name, tool_call.function.arguments)
778                            );
779                        }
780                    }
781
782                    if response.done {
783                        span.record("gen_ai.usage.input_tokens", response.prompt_eval_count);
784                        span.record("gen_ai.usage.output_tokens", response.eval_count);
785                        let message = Message::Assistant {
786                            content: text_response.clone(),
787                            thinking: if thinking_response.is_empty() { None } else { Some(thinking_response.clone()) },
788                            images: None,
789                            name: None,
790                            tool_calls: tool_calls_final.clone()
791                        };
792                        if let Ok(serialized_message) = serde_json::to_string(&vec![message]) {
793                            span.record("gen_ai.output.messages", serialized_message);
794                        }
795                        yield RawStreamingChoice::FinalResponse(
796                            StreamingCompletionResponse {
797                                total_duration: response.total_duration,
798                                load_duration: response.load_duration,
799                                prompt_eval_count: response.prompt_eval_count,
800                                prompt_eval_duration: response.prompt_eval_duration,
801                                eval_count: response.eval_count,
802                                eval_duration: response.eval_duration,
803                                done_reason: response.done_reason,
804                            }
805                        );
806                        break;
807                    }
808                }
809            }
810        }.instrument(span);
811
812        Ok(streaming::StreamingCompletionResponse::stream(Box::pin(
813            stream,
814        )))
815    }
816}
817
818// ---------- Model Listing  ----------
819
820#[derive(Debug, Deserialize)]
821struct ListModelsResponse {
822    models: Vec<ListModelEntry>,
823}
824
825#[derive(Debug, Deserialize)]
826struct ListModelEntry {
827    name: String,
828    model: String,
829}
830
831impl From<ListModelEntry> for Model {
832    fn from(value: ListModelEntry) -> Self {
833        Model::new(value.model, value.name)
834    }
835}
836
837/// [`ModelLister`] implementation for the Ollama API (`GET /api/tags`).
838#[derive(Clone)]
839pub struct OllamaModelLister<H = reqwest::Client> {
840    client: Client<H>,
841}
842
843impl<H> ModelLister<H> for OllamaModelLister<H>
844where
845    H: HttpClientExt + WasmCompatSend + WasmCompatSync + 'static,
846{
847    type Client = Client<H>;
848
849    fn new(client: Self::Client) -> Self {
850        Self { client }
851    }
852
853    async fn list_all(&self) -> Result<ModelList, ModelListingError> {
854        let path = "/api/tags";
855        let req = self.client.get(path)?.body(http_client::NoBody)?;
856        let response = self.client.send::<_, Vec<u8>>(req).await?;
857
858        if !response.status().is_success() {
859            let status_code = response.status().as_u16();
860            let body = response.into_body().await?;
861            return Err(ModelListingError::api_error_with_context(
862                "Ollama",
863                path,
864                status_code,
865                &body,
866            ));
867        }
868
869        let body = response.into_body().await?;
870        let api_resp: ListModelsResponse = serde_json::from_slice(&body).map_err(|error| {
871            ModelListingError::parse_error_with_context("Ollama", path, &error, &body)
872        })?;
873        let models = api_resp.models.into_iter().map(Model::from).collect();
874
875        Ok(ModelList::new(models))
876    }
877}
878
879// ---------- Tool Definition Conversion ----------
880
881/// Ollama-required tool definition format.
882#[derive(Clone, Debug, Deserialize, Serialize)]
883pub struct ToolDefinition {
884    #[serde(rename = "type")]
885    pub type_field: String, // Fixed as "function"
886    pub function: completion::ToolDefinition,
887}
888
889/// Convert internal ToolDefinition (from the completion module) into Ollama's tool definition.
890impl From<crate::completion::ToolDefinition> for ToolDefinition {
891    fn from(tool: crate::completion::ToolDefinition) -> Self {
892        ToolDefinition {
893            type_field: "function".to_owned(),
894            function: completion::ToolDefinition {
895                name: tool.name,
896                description: tool.description,
897                parameters: tool.parameters,
898            },
899        }
900    }
901}
902
903#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
904pub struct ToolCall {
905    #[serde(default, rename = "type")]
906    pub r#type: ToolType,
907    pub function: Function,
908}
909#[derive(Default, Debug, Serialize, Deserialize, PartialEq, Clone)]
910#[serde(rename_all = "lowercase")]
911pub enum ToolType {
912    #[default]
913    Function,
914}
915#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
916pub struct Function {
917    pub name: String,
918    pub arguments: Value,
919}
920
921// ---------- Provider Message Definition ----------
922
923#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
924#[serde(tag = "role", rename_all = "lowercase")]
925pub enum Message {
926    User {
927        content: String,
928        #[serde(skip_serializing_if = "Option::is_none")]
929        images: Option<Vec<String>>,
930        #[serde(skip_serializing_if = "Option::is_none")]
931        name: Option<String>,
932    },
933    Assistant {
934        #[serde(default)]
935        content: String,
936        #[serde(skip_serializing_if = "Option::is_none")]
937        thinking: Option<String>,
938        #[serde(skip_serializing_if = "Option::is_none")]
939        images: Option<Vec<String>>,
940        #[serde(skip_serializing_if = "Option::is_none")]
941        name: Option<String>,
942        #[serde(default, deserialize_with = "json_utils::null_or_vec")]
943        tool_calls: Vec<ToolCall>,
944    },
945    System {
946        content: String,
947        #[serde(skip_serializing_if = "Option::is_none")]
948        images: Option<Vec<String>>,
949        #[serde(skip_serializing_if = "Option::is_none")]
950        name: Option<String>,
951    },
952    #[serde(rename = "tool")]
953    ToolResult {
954        #[serde(rename = "tool_name")]
955        name: String,
956        content: String,
957    },
958}
959
960/// -----------------------------
961/// Provider Message Conversions
962/// -----------------------------
963/// Conversion from an internal Rig message (crate::message::Message) to a provider Message.
964/// (Only User and Assistant variants are supported.)
965impl TryFrom<crate::message::Message> for Vec<Message> {
966    type Error = crate::message::MessageError;
967    fn try_from(internal_msg: crate::message::Message) -> Result<Self, Self::Error> {
968        use crate::message::Message as InternalMessage;
969        match internal_msg {
970            InternalMessage::System { content } => Ok(vec![Message::System {
971                content,
972                images: None,
973                name: None,
974            }]),
975            InternalMessage::User { content, .. } => {
976                let (tool_results, other_content): (Vec<_>, Vec<_>) =
977                    content.into_iter().partition(|content| {
978                        matches!(content, crate::message::UserContent::ToolResult(_))
979                    });
980
981                if !tool_results.is_empty() {
982                    tool_results
983                        .into_iter()
984                        .map(|content| match content {
985                            crate::message::UserContent::ToolResult(
986                                crate::message::ToolResult { id, content, .. },
987                            ) => {
988                                // Ollama expects a single string for tool results, so we concatenate
989                                let content_string = content
990                                    .into_iter()
991                                    .map(|content| match content {
992                                        crate::message::ToolResultContent::Text(text) => text.text,
993                                        _ => "[Non-text content]".to_string(),
994                                    })
995                                    .collect::<Vec<_>>()
996                                    .join("\n");
997
998                                Ok::<_, crate::message::MessageError>(Message::ToolResult {
999                                    name: id,
1000                                    content: content_string,
1001                                })
1002                            }
1003                            _ => Err(crate::message::MessageError::ConversionError(
1004                                "expected tool result content while converting Ollama input".into(),
1005                            )),
1006                        })
1007                        .collect::<Result<Vec<_>, _>>()
1008                } else {
1009                    // Ollama requires separate text content and images array
1010                    let (texts, images) = other_content.into_iter().fold(
1011                        (Vec::new(), Vec::new()),
1012                        |(mut texts, mut images), content| {
1013                            match content {
1014                                crate::message::UserContent::Text(crate::message::Text {
1015                                    text,
1016                                }) => texts.push(text),
1017                                crate::message::UserContent::Image(crate::message::Image {
1018                                    data: DocumentSourceKind::Base64(data),
1019                                    ..
1020                                }) => images.push(data),
1021                                crate::message::UserContent::Document(
1022                                    crate::message::Document {
1023                                        data:
1024                                            DocumentSourceKind::Base64(data)
1025                                            | DocumentSourceKind::String(data),
1026                                        ..
1027                                    },
1028                                ) => texts.push(data),
1029                                _ => {} // Audio not supported by Ollama
1030                            }
1031                            (texts, images)
1032                        },
1033                    );
1034
1035                    Ok(vec![Message::User {
1036                        content: texts.join(" "),
1037                        images: if images.is_empty() {
1038                            None
1039                        } else {
1040                            Some(
1041                                images
1042                                    .into_iter()
1043                                    .map(|x| x.to_string())
1044                                    .collect::<Vec<String>>(),
1045                            )
1046                        },
1047                        name: None,
1048                    }])
1049                }
1050            }
1051            InternalMessage::Assistant { content, .. } => {
1052                let mut thinking: Option<String> = None;
1053                let mut text_content = Vec::new();
1054                let mut tool_calls = Vec::new();
1055
1056                for content in content.into_iter() {
1057                    match content {
1058                        crate::message::AssistantContent::Text(text) => {
1059                            text_content.push(text.text)
1060                        }
1061                        crate::message::AssistantContent::ToolCall(tool_call) => {
1062                            tool_calls.push(tool_call)
1063                        }
1064                        crate::message::AssistantContent::Reasoning(reasoning) => {
1065                            let display = reasoning.display_text();
1066                            if !display.is_empty() {
1067                                thinking = Some(display);
1068                            }
1069                        }
1070                        crate::message::AssistantContent::Image(_) => {
1071                            return Err(crate::message::MessageError::ConversionError(
1072                                "Ollama currently doesn't support images.".into(),
1073                            ));
1074                        }
1075                    }
1076                }
1077
1078                // `OneOrMany` ensures at least one `AssistantContent::Text` or `ToolCall` exists,
1079                //  so either `content` or `tool_calls` will have some content.
1080                Ok(vec![Message::Assistant {
1081                    content: text_content.join(" "),
1082                    thinking,
1083                    images: None,
1084                    name: None,
1085                    tool_calls: tool_calls
1086                        .into_iter()
1087                        .map(|tool_call| tool_call.into())
1088                        .collect::<Vec<_>>(),
1089                }])
1090            }
1091        }
1092    }
1093}
1094
1095/// Conversion from provider Message to a completion message.
1096/// This is needed so that responses can be converted back into chat history.
1097impl From<Message> for crate::completion::Message {
1098    fn from(msg: Message) -> Self {
1099        match msg {
1100            Message::User { content, .. } => crate::completion::Message::User {
1101                content: OneOrMany::one(crate::completion::message::UserContent::Text(Text {
1102                    text: content,
1103                })),
1104            },
1105            Message::Assistant {
1106                content,
1107                tool_calls,
1108                ..
1109            } => {
1110                let mut assistant_contents =
1111                    vec![crate::completion::message::AssistantContent::Text(Text {
1112                        text: content,
1113                    })];
1114                for tc in tool_calls {
1115                    assistant_contents.push(
1116                        crate::completion::message::AssistantContent::tool_call(
1117                            tc.function.name.clone(),
1118                            tc.function.name,
1119                            tc.function.arguments,
1120                        ),
1121                    );
1122                }
1123                let content =
1124                    OneOrMany::from_iter_optional(assistant_contents).unwrap_or_else(|| {
1125                        OneOrMany::one(crate::completion::message::AssistantContent::Text(Text {
1126                            text: String::new(),
1127                        }))
1128                    });
1129
1130                crate::completion::Message::Assistant { id: None, content }
1131            }
1132            // System and ToolResult are converted to User message as needed.
1133            Message::System { content, .. } => crate::completion::Message::User {
1134                content: OneOrMany::one(crate::completion::message::UserContent::Text(Text {
1135                    text: content,
1136                })),
1137            },
1138            Message::ToolResult { name, content } => crate::completion::Message::User {
1139                content: OneOrMany::one(message::UserContent::tool_result(
1140                    name,
1141                    OneOrMany::one(message::ToolResultContent::text(content)),
1142                )),
1143            },
1144        }
1145    }
1146}
1147
1148impl Message {
1149    /// Constructs a system message.
1150    pub fn system(content: &str) -> Self {
1151        Message::System {
1152            content: content.to_owned(),
1153            images: None,
1154            name: None,
1155        }
1156    }
1157}
1158
1159// ---------- Additional Message Types ----------
1160
1161impl From<crate::message::ToolCall> for ToolCall {
1162    fn from(tool_call: crate::message::ToolCall) -> Self {
1163        Self {
1164            r#type: ToolType::Function,
1165            function: Function {
1166                name: tool_call.function.name,
1167                arguments: tool_call.function.arguments,
1168            },
1169        }
1170    }
1171}
1172
1173#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1174pub struct SystemContent {
1175    #[serde(default)]
1176    r#type: SystemContentType,
1177    text: String,
1178}
1179
1180#[derive(Default, Debug, Serialize, Deserialize, PartialEq, Clone)]
1181#[serde(rename_all = "lowercase")]
1182pub enum SystemContentType {
1183    #[default]
1184    Text,
1185}
1186
1187impl From<String> for SystemContent {
1188    fn from(s: String) -> Self {
1189        SystemContent {
1190            r#type: SystemContentType::default(),
1191            text: s,
1192        }
1193    }
1194}
1195
1196impl FromStr for SystemContent {
1197    type Err = std::convert::Infallible;
1198    fn from_str(s: &str) -> Result<Self, Self::Err> {
1199        Ok(SystemContent {
1200            r#type: SystemContentType::default(),
1201            text: s.to_string(),
1202        })
1203    }
1204}
1205
1206#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1207pub struct AssistantContent {
1208    pub text: String,
1209}
1210
1211impl FromStr for AssistantContent {
1212    type Err = std::convert::Infallible;
1213    fn from_str(s: &str) -> Result<Self, Self::Err> {
1214        Ok(AssistantContent { text: s.to_owned() })
1215    }
1216}
1217
1218#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1219#[serde(tag = "type", rename_all = "lowercase")]
1220pub enum UserContent {
1221    Text { text: String },
1222    Image { image_url: ImageUrl },
1223    // Audio variant removed as Ollama API does not support audio input.
1224}
1225
1226impl FromStr for UserContent {
1227    type Err = std::convert::Infallible;
1228    fn from_str(s: &str) -> Result<Self, Self::Err> {
1229        Ok(UserContent::Text { text: s.to_owned() })
1230    }
1231}
1232
1233#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1234pub struct ImageUrl {
1235    pub url: String,
1236    #[serde(default)]
1237    pub detail: ImageDetail,
1238}
1239
1240// =================================================================
1241// Tests
1242// =================================================================
1243
1244#[cfg(test)]
1245mod tests {
1246    use super::*;
1247    use serde_json::json;
1248
1249    // Test deserialization and conversion for the /api/chat endpoint.
1250    #[tokio::test]
1251    async fn test_chat_completion() {
1252        // Sample JSON response from /api/chat (non-streaming) based on Ollama docs.
1253        let sample_chat_response = json!({
1254            "model": "llama3.2",
1255            "created_at": "2023-08-04T19:22:45.499127Z",
1256            "message": {
1257                "role": "assistant",
1258                "content": "The sky is blue because of Rayleigh scattering.",
1259                "images": null,
1260                "tool_calls": [
1261                    {
1262                        "type": "function",
1263                        "function": {
1264                            "name": "get_current_weather",
1265                            "arguments": {
1266                                "location": "San Francisco, CA",
1267                                "format": "celsius"
1268                            }
1269                        }
1270                    }
1271                ]
1272            },
1273            "done": true,
1274            "total_duration": 8000000000u64,
1275            "load_duration": 6000000u64,
1276            "prompt_eval_count": 61u64,
1277            "prompt_eval_duration": 400000000u64,
1278            "eval_count": 468u64,
1279            "eval_duration": 7700000000u64
1280        });
1281        let sample_text = sample_chat_response.to_string();
1282
1283        let chat_resp: CompletionResponse =
1284            serde_json::from_str(&sample_text).expect("Invalid JSON structure");
1285        let conv: completion::CompletionResponse<CompletionResponse> =
1286            chat_resp.try_into().unwrap();
1287        assert!(
1288            !conv.choice.is_empty(),
1289            "Expected non-empty choice in chat response"
1290        );
1291    }
1292
1293    // Test conversion from provider Message to completion Message.
1294    #[test]
1295    fn test_message_conversion() {
1296        // Construct a provider Message (User variant with String content).
1297        let provider_msg = Message::User {
1298            content: "Test message".to_owned(),
1299            images: None,
1300            name: None,
1301        };
1302        // Convert it into a completion::Message.
1303        let comp_msg: crate::completion::Message = provider_msg.into();
1304        match comp_msg {
1305            crate::completion::Message::User { content } => {
1306                // Assume OneOrMany<T> has a method first() to access the first element.
1307                let first_content = content.first();
1308                // The expected type is crate::completion::message::UserContent::Text wrapping a Text struct.
1309                match first_content {
1310                    crate::completion::message::UserContent::Text(text_struct) => {
1311                        assert_eq!(text_struct.text, "Test message");
1312                    }
1313                    _ => panic!("Expected text content in conversion"),
1314                }
1315            }
1316            _ => panic!("Conversion from provider Message to completion Message failed"),
1317        }
1318    }
1319
1320    // Test conversion of internal tool definition to Ollama's ToolDefinition format.
1321    #[test]
1322    fn test_tool_definition_conversion() {
1323        // Internal tool definition from the completion module.
1324        let internal_tool = crate::completion::ToolDefinition {
1325            name: "get_current_weather".to_owned(),
1326            description: "Get the current weather for a location".to_owned(),
1327            parameters: json!({
1328                "type": "object",
1329                "properties": {
1330                    "location": {
1331                        "type": "string",
1332                        "description": "The location to get the weather for, e.g. San Francisco, CA"
1333                    },
1334                    "format": {
1335                        "type": "string",
1336                        "description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
1337                        "enum": ["celsius", "fahrenheit"]
1338                    }
1339                },
1340                "required": ["location", "format"]
1341            }),
1342        };
1343        // Convert internal tool to Ollama's tool definition.
1344        let ollama_tool: ToolDefinition = internal_tool.into();
1345        assert_eq!(ollama_tool.type_field, "function");
1346        assert_eq!(ollama_tool.function.name, "get_current_weather");
1347        assert_eq!(
1348            ollama_tool.function.description,
1349            "Get the current weather for a location"
1350        );
1351        // Check JSON fields in parameters.
1352        let params = &ollama_tool.function.parameters;
1353        assert_eq!(params["properties"]["location"]["type"], "string");
1354    }
1355
1356    // Test deserialization of chat response with thinking content
1357    #[tokio::test]
1358    async fn test_chat_completion_with_thinking() {
1359        let sample_response = json!({
1360            "model": "qwen-thinking",
1361            "created_at": "2023-08-04T19:22:45.499127Z",
1362            "message": {
1363                "role": "assistant",
1364                "content": "The answer is 42.",
1365                "thinking": "Let me think about this carefully. The question asks for the meaning of life...",
1366                "images": null,
1367                "tool_calls": []
1368            },
1369            "done": true,
1370            "total_duration": 8000000000u64,
1371            "load_duration": 6000000u64,
1372            "prompt_eval_count": 61u64,
1373            "prompt_eval_duration": 400000000u64,
1374            "eval_count": 468u64,
1375            "eval_duration": 7700000000u64
1376        });
1377
1378        let chat_resp: CompletionResponse =
1379            serde_json::from_value(sample_response).expect("Failed to deserialize");
1380
1381        // Verify thinking field is present
1382        if let Message::Assistant {
1383            thinking, content, ..
1384        } = &chat_resp.message
1385        {
1386            assert_eq!(
1387                thinking.as_ref().unwrap(),
1388                "Let me think about this carefully. The question asks for the meaning of life..."
1389            );
1390            assert_eq!(content, "The answer is 42.");
1391        } else {
1392            panic!("Expected Assistant message");
1393        }
1394    }
1395
1396    // Test deserialization of chat response without thinking content
1397    #[tokio::test]
1398    async fn test_chat_completion_without_thinking() {
1399        let sample_response = json!({
1400            "model": "llama3.2",
1401            "created_at": "2023-08-04T19:22:45.499127Z",
1402            "message": {
1403                "role": "assistant",
1404                "content": "Hello!",
1405                "images": null,
1406                "tool_calls": []
1407            },
1408            "done": true,
1409            "total_duration": 8000000000u64,
1410            "load_duration": 6000000u64,
1411            "prompt_eval_count": 10u64,
1412            "prompt_eval_duration": 400000000u64,
1413            "eval_count": 5u64,
1414            "eval_duration": 7700000000u64
1415        });
1416
1417        let chat_resp: CompletionResponse =
1418            serde_json::from_value(sample_response).expect("Failed to deserialize");
1419
1420        // Verify thinking field is None when not provided
1421        if let Message::Assistant {
1422            thinking, content, ..
1423        } = &chat_resp.message
1424        {
1425            assert!(thinking.is_none());
1426            assert_eq!(content, "Hello!");
1427        } else {
1428            panic!("Expected Assistant message");
1429        }
1430    }
1431
1432    // Test deserialization of streaming response with thinking content
1433    #[test]
1434    fn test_streaming_response_with_thinking() {
1435        let sample_chunk = json!({
1436            "model": "qwen-thinking",
1437            "created_at": "2023-08-04T19:22:45.499127Z",
1438            "message": {
1439                "role": "assistant",
1440                "content": "",
1441                "thinking": "Analyzing the problem...",
1442                "images": null,
1443                "tool_calls": []
1444            },
1445            "done": false
1446        });
1447
1448        let chunk: CompletionResponse =
1449            serde_json::from_value(sample_chunk).expect("Failed to deserialize");
1450
1451        if let Message::Assistant {
1452            thinking, content, ..
1453        } = &chunk.message
1454        {
1455            assert_eq!(thinking.as_ref().unwrap(), "Analyzing the problem...");
1456            assert_eq!(content, "");
1457        } else {
1458            panic!("Expected Assistant message");
1459        }
1460    }
1461
1462    // Test message conversion with thinking content
1463    #[test]
1464    fn test_message_conversion_with_thinking() {
1465        // Create an internal message with reasoning content
1466        let reasoning_content = crate::message::Reasoning::new("Step 1: Consider the problem");
1467
1468        let internal_msg = crate::message::Message::Assistant {
1469            id: None,
1470            content: crate::OneOrMany::many(vec![
1471                crate::message::AssistantContent::Reasoning(reasoning_content),
1472                crate::message::AssistantContent::Text(crate::message::Text {
1473                    text: "The answer is X".to_string(),
1474                }),
1475            ])
1476            .unwrap(),
1477        };
1478
1479        // Convert to provider Message
1480        let provider_msgs: Vec<Message> = internal_msg.try_into().unwrap();
1481        assert_eq!(provider_msgs.len(), 1);
1482
1483        if let Message::Assistant {
1484            thinking, content, ..
1485        } = &provider_msgs[0]
1486        {
1487            assert_eq!(thinking.as_ref().unwrap(), "Step 1: Consider the problem");
1488            assert_eq!(content, "The answer is X");
1489        } else {
1490            panic!("Expected Assistant message with thinking");
1491        }
1492    }
1493
1494    // Test empty thinking content is handled correctly
1495    #[test]
1496    fn test_empty_thinking_content() {
1497        let sample_response = json!({
1498            "model": "llama3.2",
1499            "created_at": "2023-08-04T19:22:45.499127Z",
1500            "message": {
1501                "role": "assistant",
1502                "content": "Response",
1503                "thinking": "",
1504                "images": null,
1505                "tool_calls": []
1506            },
1507            "done": true,
1508            "total_duration": 8000000000u64,
1509            "load_duration": 6000000u64,
1510            "prompt_eval_count": 10u64,
1511            "prompt_eval_duration": 400000000u64,
1512            "eval_count": 5u64,
1513            "eval_duration": 7700000000u64
1514        });
1515
1516        let chat_resp: CompletionResponse =
1517            serde_json::from_value(sample_response).expect("Failed to deserialize");
1518
1519        if let Message::Assistant {
1520            thinking, content, ..
1521        } = &chat_resp.message
1522        {
1523            // Empty string should still deserialize as Some("")
1524            assert_eq!(thinking.as_ref().unwrap(), "");
1525            assert_eq!(content, "Response");
1526        } else {
1527            panic!("Expected Assistant message");
1528        }
1529    }
1530
1531    // Test thinking with tool calls
1532    #[test]
1533    fn test_thinking_with_tool_calls() {
1534        let sample_response = json!({
1535            "model": "qwen-thinking",
1536            "created_at": "2023-08-04T19:22:45.499127Z",
1537            "message": {
1538                "role": "assistant",
1539                "content": "Let me check the weather.",
1540                "thinking": "User wants weather info, I should use the weather tool",
1541                "images": null,
1542                "tool_calls": [
1543                    {
1544                        "type": "function",
1545                        "function": {
1546                            "name": "get_weather",
1547                            "arguments": {
1548                                "location": "San Francisco"
1549                            }
1550                        }
1551                    }
1552                ]
1553            },
1554            "done": true,
1555            "total_duration": 8000000000u64,
1556            "load_duration": 6000000u64,
1557            "prompt_eval_count": 30u64,
1558            "prompt_eval_duration": 400000000u64,
1559            "eval_count": 50u64,
1560            "eval_duration": 7700000000u64
1561        });
1562
1563        let chat_resp: CompletionResponse =
1564            serde_json::from_value(sample_response).expect("Failed to deserialize");
1565
1566        if let Message::Assistant {
1567            thinking,
1568            content,
1569            tool_calls,
1570            ..
1571        } = &chat_resp.message
1572        {
1573            assert_eq!(
1574                thinking.as_ref().unwrap(),
1575                "User wants weather info, I should use the weather tool"
1576            );
1577            assert_eq!(content, "Let me check the weather.");
1578            assert_eq!(tool_calls.len(), 1);
1579            assert_eq!(tool_calls[0].function.name, "get_weather");
1580        } else {
1581            panic!("Expected Assistant message with thinking and tool calls");
1582        }
1583    }
1584
1585    // Test that `think` and `keep_alive` are extracted as top-level params, not in `options`
1586    #[test]
1587    fn test_completion_request_with_think_param() {
1588        use crate::OneOrMany;
1589        use crate::completion::Message as CompletionMessage;
1590        use crate::message::{Text, UserContent};
1591
1592        // Create a CompletionRequest with "think": true, "keep_alive", and "num_ctx" in additional_params
1593        let completion_request = CompletionRequest {
1594            model: None,
1595            preamble: Some("You are a helpful assistant.".to_string()),
1596            chat_history: OneOrMany::one(CompletionMessage::User {
1597                content: OneOrMany::one(UserContent::Text(Text {
1598                    text: "What is 2 + 2?".to_string(),
1599                })),
1600            }),
1601            documents: vec![],
1602            tools: vec![],
1603            temperature: Some(0.7),
1604            max_tokens: Some(1024),
1605            tool_choice: None,
1606            additional_params: Some(json!({
1607                "think": true,
1608                "keep_alive": "-1m",
1609                "num_ctx": 4096
1610            })),
1611            output_schema: None,
1612        };
1613
1614        // Convert to OllamaCompletionRequest
1615        let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1616            .expect("Failed to create Ollama request");
1617
1618        // Serialize to JSON
1619        let serialized =
1620            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1621
1622        // Assert equality with expected JSON
1623        // - "tools" is skipped when empty (skip_serializing_if)
1624        // - "think" should be a top-level boolean, NOT in options
1625        // - "keep_alive" should be a top-level string, NOT in options
1626        // - "num_ctx" should be in options (it's a model parameter)
1627        let expected = json!({
1628            "model": "qwen3:8b",
1629            "messages": [
1630                {
1631                    "role": "system",
1632                    "content": "You are a helpful assistant."
1633                },
1634                {
1635                    "role": "user",
1636                    "content": "What is 2 + 2?"
1637                }
1638            ],
1639            "temperature": 0.7,
1640            "stream": false,
1641            "think": true,
1642            "max_tokens": 1024,
1643            "keep_alive": "-1m",
1644            "options": {
1645                "temperature": 0.7,
1646                "num_ctx": 4096
1647            }
1648        });
1649
1650        assert_eq!(serialized, expected);
1651    }
1652
1653    // Test that `think` and `keep_alive` are extracted as top-level params, not in `options`
1654    #[test]
1655    fn test_completion_request_with_level_low_think_param() {
1656        use crate::OneOrMany;
1657        use crate::completion::Message as CompletionMessage;
1658        use crate::message::{Text, UserContent};
1659
1660        // Create a CompletionRequest with "think": true, "keep_alive", and "num_ctx" in additional_params
1661        let completion_request = CompletionRequest {
1662            model: None,
1663            preamble: Some("You are a helpful assistant.".to_string()),
1664            chat_history: OneOrMany::one(CompletionMessage::User {
1665                content: OneOrMany::one(UserContent::Text(Text {
1666                    text: "What is 2 + 2?".to_string(),
1667                })),
1668            }),
1669            documents: vec![],
1670            tools: vec![],
1671            temperature: Some(0.7),
1672            max_tokens: Some(1024),
1673            tool_choice: None,
1674            additional_params: Some(json!({
1675                "think": "low",
1676                "keep_alive": "-1m",
1677                "num_ctx": 4096
1678            })),
1679            output_schema: None,
1680        };
1681
1682        // Convert to OllamaCompletionRequest
1683        let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1684            .expect("Failed to create Ollama request");
1685
1686        // Serialize to JSON
1687        let serialized =
1688            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1689
1690        // Assert equality with expected JSON
1691        // - "tools" is skipped when empty (skip_serializing_if)
1692        // - "think" should be a top-level boolean, NOT in options
1693        // - "keep_alive" should be a top-level string, NOT in options
1694        // - "num_ctx" should be in options (it's a model parameter)
1695        let expected = json!({
1696            "model": "qwen3:8b",
1697            "messages": [
1698                {
1699                    "role": "system",
1700                    "content": "You are a helpful assistant."
1701                },
1702                {
1703                    "role": "user",
1704                    "content": "What is 2 + 2?"
1705                }
1706            ],
1707            "temperature": 0.7,
1708            "stream": false,
1709            "think": "low",
1710            "max_tokens": 1024,
1711            "keep_alive": "-1m",
1712            "options": {
1713                "temperature": 0.7,
1714                "num_ctx": 4096
1715            }
1716        });
1717
1718        assert_eq!(serialized, expected);
1719    }
1720
1721    // Test that `think` and `keep_alive` are extracted as top-level params, not in `options`
1722    #[test]
1723    fn test_completion_request_with_level_medium_think_param() {
1724        use crate::OneOrMany;
1725        use crate::completion::Message as CompletionMessage;
1726        use crate::message::{Text, UserContent};
1727
1728        // Create a CompletionRequest with "think": true, "keep_alive", and "num_ctx" in additional_params
1729        let completion_request = CompletionRequest {
1730            model: None,
1731            preamble: Some("You are a helpful assistant.".to_string()),
1732            chat_history: OneOrMany::one(CompletionMessage::User {
1733                content: OneOrMany::one(UserContent::Text(Text {
1734                    text: "What is 2 + 2?".to_string(),
1735                })),
1736            }),
1737            documents: vec![],
1738            tools: vec![],
1739            temperature: Some(0.7),
1740            max_tokens: Some(1024),
1741            tool_choice: None,
1742            additional_params: Some(json!({
1743                "think": "medium",
1744                "keep_alive": "-1m",
1745                "num_ctx": 4096
1746            })),
1747            output_schema: None,
1748        };
1749
1750        // Convert to OllamaCompletionRequest
1751        let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1752            .expect("Failed to create Ollama request");
1753
1754        // Serialize to JSON
1755        let serialized =
1756            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1757
1758        // Assert equality with expected JSON
1759        // - "tools" is skipped when empty (skip_serializing_if)
1760        // - "think" should be a top-level boolean, NOT in options
1761        // - "keep_alive" should be a top-level string, NOT in options
1762        // - "num_ctx" should be in options (it's a model parameter)
1763        let expected = json!({
1764            "model": "qwen3:8b",
1765            "messages": [
1766                {
1767                    "role": "system",
1768                    "content": "You are a helpful assistant."
1769                },
1770                {
1771                    "role": "user",
1772                    "content": "What is 2 + 2?"
1773                }
1774            ],
1775            "temperature": 0.7,
1776            "stream": false,
1777            "think": "medium",
1778            "max_tokens": 1024,
1779            "keep_alive": "-1m",
1780            "options": {
1781                "temperature": 0.7,
1782                "num_ctx": 4096
1783            }
1784        });
1785
1786        assert_eq!(serialized, expected);
1787    }
1788
1789    // Test that `think` and `keep_alive` are extracted as top-level params, not in `options`
1790    #[test]
1791    fn test_completion_request_with_level_high_think_param() {
1792        use crate::OneOrMany;
1793        use crate::completion::Message as CompletionMessage;
1794        use crate::message::{Text, UserContent};
1795
1796        // Create a CompletionRequest with "think": true, "keep_alive", and "num_ctx" in additional_params
1797        let completion_request = CompletionRequest {
1798            model: None,
1799            preamble: Some("You are a helpful assistant.".to_string()),
1800            chat_history: OneOrMany::one(CompletionMessage::User {
1801                content: OneOrMany::one(UserContent::Text(Text {
1802                    text: "What is 2 + 2?".to_string(),
1803                })),
1804            }),
1805            documents: vec![],
1806            tools: vec![],
1807            temperature: Some(0.7),
1808            max_tokens: Some(1024),
1809            tool_choice: None,
1810            additional_params: Some(json!({
1811                "think": "high",
1812                "keep_alive": "-1m",
1813                "num_ctx": 4096
1814            })),
1815            output_schema: None,
1816        };
1817
1818        // Convert to OllamaCompletionRequest
1819        let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1820            .expect("Failed to create Ollama request");
1821
1822        // Serialize to JSON
1823        let serialized =
1824            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1825
1826        // Assert equality with expected JSON
1827        // - "tools" is skipped when empty (skip_serializing_if)
1828        // - "think" should be a top-level boolean, NOT in options
1829        // - "keep_alive" should be a top-level string, NOT in options
1830        // - "num_ctx" should be in options (it's a model parameter)
1831        let expected = json!({
1832            "model": "qwen3:8b",
1833            "messages": [
1834                {
1835                    "role": "system",
1836                    "content": "You are a helpful assistant."
1837                },
1838                {
1839                    "role": "user",
1840                    "content": "What is 2 + 2?"
1841                }
1842            ],
1843            "temperature": 0.7,
1844            "stream": false,
1845            "think": "high",
1846            "max_tokens": 1024,
1847            "keep_alive": "-1m",
1848            "options": {
1849                "temperature": 0.7,
1850                "num_ctx": 4096
1851            }
1852        });
1853
1854        assert_eq!(serialized, expected);
1855    }
1856
1857    // Test that `think` and `keep_alive` are extracted as top-level params, not in `options`
1858    #[test]
1859    fn test_completion_request_with_level_invalid_think_param() {
1860        use crate::OneOrMany;
1861        use crate::completion::Message as CompletionMessage;
1862        use crate::message::{Text, UserContent};
1863
1864        // Create a CompletionRequest with "think": true, "keep_alive", and "num_ctx" in additional_params
1865        let completion_request = CompletionRequest {
1866            model: None,
1867            preamble: Some("You are a helpful assistant.".to_string()),
1868            chat_history: OneOrMany::one(CompletionMessage::User {
1869                content: OneOrMany::one(UserContent::Text(Text {
1870                    text: "What is 2 + 2?".to_string(),
1871                })),
1872            }),
1873            documents: vec![],
1874            tools: vec![],
1875            temperature: Some(0.7),
1876            max_tokens: Some(1024),
1877            tool_choice: None,
1878            additional_params: Some(json!({
1879                "think": "invalid",
1880                "keep_alive": "-1m",
1881                "num_ctx": 4096
1882            })),
1883            output_schema: None,
1884        };
1885
1886        // Convert to OllamaCompletionRequest
1887        let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request));
1888
1889        assert!(ollama_request.is_err())
1890    }
1891
1892    // Test that `think` defaults to false when not specified
1893    #[test]
1894    fn test_completion_request_with_think_false_default() {
1895        use crate::OneOrMany;
1896        use crate::completion::Message as CompletionMessage;
1897        use crate::message::{Text, UserContent};
1898
1899        // Create a CompletionRequest WITHOUT "think" in additional_params
1900        let completion_request = CompletionRequest {
1901            model: None,
1902            preamble: Some("You are a helpful assistant.".to_string()),
1903            chat_history: OneOrMany::one(CompletionMessage::User {
1904                content: OneOrMany::one(UserContent::Text(Text {
1905                    text: "Hello!".to_string(),
1906                })),
1907            }),
1908            documents: vec![],
1909            tools: vec![],
1910            temperature: Some(0.5),
1911            max_tokens: None,
1912            tool_choice: None,
1913            additional_params: None,
1914            output_schema: None,
1915        };
1916
1917        // Convert to OllamaCompletionRequest
1918        let ollama_request = OllamaCompletionRequest::try_from(("llama3.2", completion_request))
1919            .expect("Failed to create Ollama request");
1920
1921        // Serialize to JSON
1922        let serialized =
1923            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1924
1925        // Assert that "think" defaults to false and "keep_alive" is not present
1926        let expected = json!({
1927            "model": "llama3.2",
1928            "messages": [
1929                {
1930                    "role": "system",
1931                    "content": "You are a helpful assistant."
1932                },
1933                {
1934                    "role": "user",
1935                    "content": "Hello!"
1936                }
1937            ],
1938            "temperature": 0.5,
1939            "stream": false,
1940            "think": false,
1941            "options": {
1942                "temperature": 0.5
1943            }
1944        });
1945
1946        assert_eq!(serialized, expected);
1947    }
1948
1949    #[test]
1950    fn test_completion_request_with_output_schema() {
1951        use crate::OneOrMany;
1952        use crate::completion::Message as CompletionMessage;
1953        use crate::message::{Text, UserContent};
1954
1955        let schema: schemars::Schema = serde_json::from_value(json!({
1956            "type": "object",
1957            "properties": {
1958                "age": { "type": "integer" },
1959                "available": { "type": "boolean" }
1960            },
1961            "required": ["age", "available"]
1962        }))
1963        .expect("Failed to parse schema");
1964
1965        let completion_request = CompletionRequest {
1966            model: Some("llama3.1".to_string()),
1967            preamble: None,
1968            chat_history: OneOrMany::one(CompletionMessage::User {
1969                content: OneOrMany::one(UserContent::Text(Text {
1970                    text: "How old is Ollama?".to_string(),
1971                })),
1972            }),
1973            documents: vec![],
1974            tools: vec![],
1975            temperature: None,
1976            max_tokens: None,
1977            tool_choice: None,
1978            additional_params: None,
1979            output_schema: Some(schema),
1980        };
1981
1982        let ollama_request = OllamaCompletionRequest::try_from(("llama3.1", completion_request))
1983            .expect("Failed to create Ollama request");
1984
1985        let serialized =
1986            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1987
1988        let format = serialized
1989            .get("format")
1990            .expect("format field should be present");
1991        assert_eq!(
1992            *format,
1993            json!({
1994                "type": "object",
1995                "properties": {
1996                    "age": { "type": "integer" },
1997                    "available": { "type": "boolean" }
1998                },
1999                "required": ["age", "available"]
2000            })
2001        );
2002    }
2003
2004    #[test]
2005    fn test_completion_request_without_output_schema() {
2006        use crate::OneOrMany;
2007        use crate::completion::Message as CompletionMessage;
2008        use crate::message::{Text, UserContent};
2009
2010        let completion_request = CompletionRequest {
2011            model: Some("llama3.1".to_string()),
2012            preamble: None,
2013            chat_history: OneOrMany::one(CompletionMessage::User {
2014                content: OneOrMany::one(UserContent::Text(Text {
2015                    text: "Hello!".to_string(),
2016                })),
2017            }),
2018            documents: vec![],
2019            tools: vec![],
2020            temperature: None,
2021            max_tokens: None,
2022            tool_choice: None,
2023            additional_params: None,
2024            output_schema: None,
2025        };
2026
2027        let ollama_request = OllamaCompletionRequest::try_from(("llama3.1", completion_request))
2028            .expect("Failed to create Ollama request");
2029
2030        let serialized =
2031            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
2032
2033        assert!(
2034            serialized.get("format").is_none(),
2035            "format field should be absent when output_schema is None"
2036        );
2037    }
2038
2039    #[test]
2040    fn test_client_initialization() {
2041        let _client = crate::providers::ollama::Client::new(Nothing).expect("Client::new() failed");
2042        let _client_from_builder = crate::providers::ollama::Client::builder()
2043            .api_key(Nothing)
2044            .build()
2045            .expect("Client::builder() failed");
2046    }
2047}