Skip to main content

rig/providers/
ollama.rs

1//! Ollama API client and Rig integration
2//!
3//! # Example
4//! ```rust,ignore
5//! use rig::client::{Nothing, CompletionClient};
6//! use rig::completion::Prompt;
7//! use rig::providers::ollama;
8//!
9//! // Create a new Ollama client (defaults to http://localhost:11434)
10//! // In the case of ollama, no API key is necessary, so we use the `Nothing` struct
11//! let client = ollama::Client::new(Nothing).unwrap();
12//!
13//! // Create an agent with a preamble
14//! let comedian_agent = client
15//!     .agent("qwen2.5:14b")
16//!     .preamble("You are a comedian here to entertain the user using humour and jokes.")
17//!     .build();
18//!
19//! // Prompt the agent and print the response
20//! let response = comedian_agent.prompt("Entertain me!").await?;
21//! println!("{response}");
22//!
23//! // Create an embedding model using the "all-minilm" model
24//! let emb_model = client.embedding_model("all-minilm", 384);
25//! let embeddings = emb_model.embed_texts(vec![
26//!     "Why is the sky blue?".to_owned(),
27//!     "Why is the grass green?".to_owned()
28//! ]).await?;
29//! println!("Embedding response: {:?}", embeddings);
30//!
31//! // Create an extractor if needed
32//! let extractor = client.extractor::<serde_json::Value>("llama3.2").build();
33//! ```
34use crate::client::{
35    self, Capabilities, Capable, DebugExt, ModelLister, Nothing, Provider, ProviderBuilder,
36    ProviderClient,
37};
38use crate::completion::{GetTokenUsage, Usage};
39use crate::http_client::{self, HttpClientExt};
40use crate::message::DocumentSourceKind;
41use crate::model::{Model, ModelList, ModelListingError};
42use crate::streaming::RawStreamingChoice;
43use crate::{
44    OneOrMany,
45    completion::{self, CompletionError, CompletionRequest},
46    embeddings::{self, EmbeddingError},
47    json_utils, message,
48    message::{ImageDetail, Text},
49    streaming,
50};
51use async_stream::try_stream;
52use bytes::Bytes;
53use futures::StreamExt;
54use serde::{Deserialize, Serialize};
55use serde_json::{Value, json};
56use std::{convert::TryFrom, str::FromStr};
57use tracing::info_span;
58use tracing_futures::Instrument;
59// ---------- Main Client ----------
60
61const OLLAMA_API_BASE_URL: &str = "http://localhost:11434";
62
63#[derive(Debug, Default, Clone, Copy)]
64pub struct OllamaExt;
65
66#[derive(Debug, Default, Clone, Copy)]
67pub struct OllamaBuilder;
68
69impl Provider for OllamaExt {
70    type Builder = OllamaBuilder;
71    const VERIFY_PATH: &'static str = "api/tags";
72}
73
74impl<H> Capabilities<H> for OllamaExt {
75    type Completion = Capable<CompletionModel<H>>;
76    type Transcription = Nothing;
77    type Embeddings = Capable<EmbeddingModel<H>>;
78    type ModelListing = Capable<OllamaModelLister<H>>;
79    #[cfg(feature = "image")]
80    type ImageGeneration = Nothing;
81
82    #[cfg(feature = "audio")]
83    type AudioGeneration = Nothing;
84}
85
86impl DebugExt for OllamaExt {}
87
88impl ProviderBuilder for OllamaBuilder {
89    type Extension<H>
90        = OllamaExt
91    where
92        H: HttpClientExt;
93    type ApiKey = Nothing;
94
95    const BASE_URL: &'static str = OLLAMA_API_BASE_URL;
96
97    fn build<H>(
98        _builder: &client::ClientBuilder<Self, Self::ApiKey, H>,
99    ) -> http_client::Result<Self::Extension<H>>
100    where
101        H: HttpClientExt,
102    {
103        Ok(OllamaExt)
104    }
105}
106
107pub type Client<H = reqwest::Client> = client::Client<OllamaExt, H>;
108pub type ClientBuilder<H = reqwest::Client> = client::ClientBuilder<OllamaBuilder, Nothing, H>;
109
110impl ProviderClient for Client {
111    type Input = Nothing;
112
113    fn from_env() -> Self {
114        let api_base = std::env::var("OLLAMA_API_BASE_URL").expect("OLLAMA_API_BASE_URL not set");
115
116        Self::builder()
117            .api_key(Nothing)
118            .base_url(&api_base)
119            .build()
120            .unwrap()
121    }
122
123    fn from_val(_: Self::Input) -> Self {
124        Self::builder().api_key(Nothing).build().unwrap()
125    }
126}
127
128// ---------- API Error and Response Structures ----------
129
130#[derive(Debug, Deserialize)]
131struct ApiErrorResponse {
132    message: String,
133}
134
135#[derive(Debug, Deserialize)]
136#[serde(untagged)]
137enum ApiResponse<T> {
138    Ok(T),
139    Err(ApiErrorResponse),
140}
141
142// ---------- Embedding API ----------
143
144pub const ALL_MINILM: &str = "all-minilm";
145pub const NOMIC_EMBED_TEXT: &str = "nomic-embed-text";
146
147fn model_dimensions_from_identifier(identifier: &str) -> Option<usize> {
148    match identifier {
149        ALL_MINILM => Some(384),
150        NOMIC_EMBED_TEXT => Some(768),
151        _ => None,
152    }
153}
154
155#[derive(Debug, Serialize, Deserialize)]
156pub struct EmbeddingResponse {
157    pub model: String,
158    pub embeddings: Vec<Vec<f64>>,
159    #[serde(default)]
160    pub total_duration: Option<u64>,
161    #[serde(default)]
162    pub load_duration: Option<u64>,
163    #[serde(default)]
164    pub prompt_eval_count: Option<u64>,
165}
166
167impl From<ApiErrorResponse> for EmbeddingError {
168    fn from(err: ApiErrorResponse) -> Self {
169        EmbeddingError::ProviderError(err.message)
170    }
171}
172
173impl From<ApiResponse<EmbeddingResponse>> for Result<EmbeddingResponse, EmbeddingError> {
174    fn from(value: ApiResponse<EmbeddingResponse>) -> Self {
175        match value {
176            ApiResponse::Ok(response) => Ok(response),
177            ApiResponse::Err(err) => Err(EmbeddingError::ProviderError(err.message)),
178        }
179    }
180}
181
182// ---------- Embedding Model ----------
183
184#[derive(Clone)]
185pub struct EmbeddingModel<T = reqwest::Client> {
186    client: Client<T>,
187    pub model: String,
188    ndims: usize,
189}
190
191impl<T> EmbeddingModel<T> {
192    pub fn new(client: Client<T>, model: impl Into<String>, ndims: usize) -> Self {
193        Self {
194            client,
195            model: model.into(),
196            ndims,
197        }
198    }
199
200    pub fn with_model(client: Client<T>, model: &str, ndims: usize) -> Self {
201        Self {
202            client,
203            model: model.into(),
204            ndims,
205        }
206    }
207}
208
209impl<T> embeddings::EmbeddingModel for EmbeddingModel<T>
210where
211    T: HttpClientExt + Clone + 'static,
212{
213    type Client = Client<T>;
214
215    fn make(client: &Self::Client, model: impl Into<String>, dims: Option<usize>) -> Self {
216        let model = model.into();
217        let dims = dims
218            .or(model_dimensions_from_identifier(&model))
219            .unwrap_or_default();
220        Self::new(client.clone(), model, dims)
221    }
222
223    const MAX_DOCUMENTS: usize = 1024;
224    fn ndims(&self) -> usize {
225        self.ndims
226    }
227
228    async fn embed_texts(
229        &self,
230        documents: impl IntoIterator<Item = String>,
231    ) -> Result<Vec<embeddings::Embedding>, EmbeddingError> {
232        let docs: Vec<String> = documents.into_iter().collect();
233
234        let body = serde_json::to_vec(&json!({
235            "model": self.model,
236            "input": docs
237        }))?;
238
239        let req = self
240            .client
241            .post("api/embed")?
242            .body(body)
243            .map_err(|e| EmbeddingError::HttpError(e.into()))?;
244
245        let response = self.client.send::<_, Vec<u8>>(req).await?;
246
247        if !response.status().is_success() {
248            let text = http_client::text(response).await?;
249            return Err(EmbeddingError::ProviderError(text));
250        }
251
252        let bytes: Vec<u8> = response.into_body().await?;
253
254        let api_resp: EmbeddingResponse = serde_json::from_slice(&bytes)?;
255
256        if api_resp.embeddings.len() != docs.len() {
257            return Err(EmbeddingError::ResponseError(
258                "Number of returned embeddings does not match input".into(),
259            ));
260        }
261        Ok(api_resp
262            .embeddings
263            .into_iter()
264            .zip(docs.into_iter())
265            .map(|(vec, document)| embeddings::Embedding { document, vec })
266            .collect())
267    }
268}
269
270// ---------- Completion API ----------
271
272pub const LLAMA3_2: &str = "llama3.2";
273pub const LLAVA: &str = "llava";
274pub const MISTRAL: &str = "mistral";
275
276#[derive(Debug, Serialize, Deserialize)]
277pub struct CompletionResponse {
278    pub model: String,
279    pub created_at: String,
280    pub message: Message,
281    pub done: bool,
282    #[serde(default)]
283    pub done_reason: Option<String>,
284    #[serde(default)]
285    pub total_duration: Option<u64>,
286    #[serde(default)]
287    pub load_duration: Option<u64>,
288    #[serde(default)]
289    pub prompt_eval_count: Option<u64>,
290    #[serde(default)]
291    pub prompt_eval_duration: Option<u64>,
292    #[serde(default)]
293    pub eval_count: Option<u64>,
294    #[serde(default)]
295    pub eval_duration: Option<u64>,
296}
297impl TryFrom<CompletionResponse> for completion::CompletionResponse<CompletionResponse> {
298    type Error = CompletionError;
299    fn try_from(resp: CompletionResponse) -> Result<Self, Self::Error> {
300        match resp.message {
301            // Process only if an assistant message is present.
302            Message::Assistant {
303                content,
304                thinking,
305                tool_calls,
306                ..
307            } => {
308                let mut assistant_contents = Vec::new();
309                // Add the assistant's text content if any.
310                if !content.is_empty() {
311                    assistant_contents.push(completion::AssistantContent::text(&content));
312                }
313                // Process tool_calls following Ollama's chat response definition.
314                // Each ToolCall has an id, a type, and a function field.
315                for tc in tool_calls.iter() {
316                    assistant_contents.push(completion::AssistantContent::tool_call(
317                        tc.function.name.clone(),
318                        tc.function.name.clone(),
319                        tc.function.arguments.clone(),
320                    ));
321                }
322                let choice = OneOrMany::many(assistant_contents).map_err(|_| {
323                    CompletionError::ResponseError("No content provided".to_owned())
324                })?;
325                let prompt_tokens = resp.prompt_eval_count.unwrap_or(0);
326                let completion_tokens = resp.eval_count.unwrap_or(0);
327
328                let raw_response = CompletionResponse {
329                    model: resp.model,
330                    created_at: resp.created_at,
331                    done: resp.done,
332                    done_reason: resp.done_reason,
333                    total_duration: resp.total_duration,
334                    load_duration: resp.load_duration,
335                    prompt_eval_count: resp.prompt_eval_count,
336                    prompt_eval_duration: resp.prompt_eval_duration,
337                    eval_count: resp.eval_count,
338                    eval_duration: resp.eval_duration,
339                    message: Message::Assistant {
340                        content,
341                        thinking,
342                        images: None,
343                        name: None,
344                        tool_calls,
345                    },
346                };
347
348                Ok(completion::CompletionResponse {
349                    choice,
350                    usage: Usage {
351                        input_tokens: prompt_tokens,
352                        output_tokens: completion_tokens,
353                        total_tokens: prompt_tokens + completion_tokens,
354                        cached_input_tokens: 0,
355                        cache_creation_input_tokens: 0,
356                    },
357                    raw_response,
358                    message_id: None,
359                })
360            }
361            _ => Err(CompletionError::ResponseError(
362                "Chat response does not include an assistant message".into(),
363            )),
364        }
365    }
366}
367
368#[derive(Debug, Serialize, Deserialize)]
369pub(super) struct OllamaCompletionRequest {
370    model: String,
371    pub messages: Vec<Message>,
372    #[serde(skip_serializing_if = "Option::is_none")]
373    temperature: Option<f64>,
374    #[serde(skip_serializing_if = "Vec::is_empty")]
375    tools: Vec<ToolDefinition>,
376    pub stream: bool,
377    think: bool,
378    #[serde(skip_serializing_if = "Option::is_none")]
379    max_tokens: Option<u64>,
380    #[serde(skip_serializing_if = "Option::is_none")]
381    keep_alive: Option<String>,
382    #[serde(skip_serializing_if = "Option::is_none")]
383    format: Option<schemars::Schema>,
384    options: serde_json::Value,
385}
386
387impl TryFrom<(&str, CompletionRequest)> for OllamaCompletionRequest {
388    type Error = CompletionError;
389
390    fn try_from((model, req): (&str, CompletionRequest)) -> Result<Self, Self::Error> {
391        let model = req.model.clone().unwrap_or_else(|| model.to_string());
392        if req.tool_choice.is_some() {
393            tracing::warn!("WARNING: `tool_choice` not supported for Ollama");
394        }
395        // Build up the order of messages (context, chat_history, prompt)
396        let mut partial_history = vec![];
397        if let Some(docs) = req.normalized_documents() {
398            partial_history.push(docs);
399        }
400        partial_history.extend(req.chat_history);
401
402        // Add preamble to chat history (if available)
403        let mut full_history: Vec<Message> = match &req.preamble {
404            Some(preamble) => vec![Message::system(preamble)],
405            None => vec![],
406        };
407
408        // Convert and extend the rest of the history
409        full_history.extend(
410            partial_history
411                .into_iter()
412                .map(message::Message::try_into)
413                .collect::<Result<Vec<Vec<Message>>, _>>()?
414                .into_iter()
415                .flatten()
416                .collect::<Vec<_>>(),
417        );
418
419        let mut think = false;
420        let mut keep_alive: Option<String> = None;
421
422        let options = if let Some(mut extra) = req.additional_params {
423            // Extract top-level parameters that should not be in `options`
424            if let Some(obj) = extra.as_object_mut() {
425                // Extract `think` parameter
426                if let Some(think_val) = obj.remove("think") {
427                    think = think_val.as_bool().ok_or_else(|| {
428                        CompletionError::RequestError("`think` must be a bool".into())
429                    })?;
430                }
431
432                // Extract `keep_alive` parameter
433                if let Some(keep_alive_val) = obj.remove("keep_alive") {
434                    keep_alive = Some(
435                        keep_alive_val
436                            .as_str()
437                            .ok_or_else(|| {
438                                CompletionError::RequestError(
439                                    "`keep_alive` must be a string".into(),
440                                )
441                            })?
442                            .to_string(),
443                    );
444                }
445            }
446
447            json_utils::merge(json!({ "temperature": req.temperature }), extra)
448        } else {
449            json!({ "temperature": req.temperature })
450        };
451
452        Ok(Self {
453            model: model.to_string(),
454            messages: full_history,
455            temperature: req.temperature,
456            max_tokens: req.max_tokens,
457            stream: false,
458            think,
459            keep_alive,
460            format: req.output_schema,
461            tools: req
462                .tools
463                .clone()
464                .into_iter()
465                .map(ToolDefinition::from)
466                .collect::<Vec<_>>(),
467            options,
468        })
469    }
470}
471
472#[derive(Clone)]
473pub struct CompletionModel<T = reqwest::Client> {
474    client: Client<T>,
475    pub model: String,
476}
477
478impl<T> CompletionModel<T> {
479    pub fn new(client: Client<T>, model: &str) -> Self {
480        Self {
481            client,
482            model: model.to_owned(),
483        }
484    }
485}
486
487// ---------- CompletionModel Implementation ----------
488
489#[derive(Clone, Serialize, Deserialize, Debug)]
490pub struct StreamingCompletionResponse {
491    pub done_reason: Option<String>,
492    pub total_duration: Option<u64>,
493    pub load_duration: Option<u64>,
494    pub prompt_eval_count: Option<u64>,
495    pub prompt_eval_duration: Option<u64>,
496    pub eval_count: Option<u64>,
497    pub eval_duration: Option<u64>,
498}
499
500impl GetTokenUsage for StreamingCompletionResponse {
501    fn token_usage(&self) -> Option<crate::completion::Usage> {
502        let mut usage = crate::completion::Usage::new();
503        let input_tokens = self.prompt_eval_count.unwrap_or_default();
504        let output_tokens = self.eval_count.unwrap_or_default();
505        usage.input_tokens = input_tokens;
506        usage.output_tokens = output_tokens;
507        usage.total_tokens = input_tokens + output_tokens;
508
509        Some(usage)
510    }
511}
512
513impl<T> completion::CompletionModel for CompletionModel<T>
514where
515    T: HttpClientExt + Clone + Default + std::fmt::Debug + Send + 'static,
516{
517    type Response = CompletionResponse;
518    type StreamingResponse = StreamingCompletionResponse;
519
520    type Client = Client<T>;
521
522    fn make(client: &Self::Client, model: impl Into<String>) -> Self {
523        Self::new(client.clone(), model.into().as_str())
524    }
525
526    async fn completion(
527        &self,
528        completion_request: CompletionRequest,
529    ) -> Result<completion::CompletionResponse<Self::Response>, CompletionError> {
530        let span = if tracing::Span::current().is_disabled() {
531            info_span!(
532                target: "rig::completions",
533                "chat",
534                gen_ai.operation.name = "chat",
535                gen_ai.provider.name = "ollama",
536                gen_ai.request.model = self.model,
537                gen_ai.system_instructions = tracing::field::Empty,
538                gen_ai.response.id = tracing::field::Empty,
539                gen_ai.response.model = tracing::field::Empty,
540                gen_ai.usage.output_tokens = tracing::field::Empty,
541                gen_ai.usage.input_tokens = tracing::field::Empty,
542                gen_ai.usage.cached_tokens = tracing::field::Empty,
543            )
544        } else {
545            tracing::Span::current()
546        };
547
548        span.record("gen_ai.system_instructions", &completion_request.preamble);
549        let request = OllamaCompletionRequest::try_from((self.model.as_ref(), completion_request))?;
550
551        if tracing::enabled!(tracing::Level::TRACE) {
552            tracing::trace!(target: "rig::completions",
553                "Ollama completion request: {}",
554                serde_json::to_string_pretty(&request)?
555            );
556        }
557
558        let body = serde_json::to_vec(&request)?;
559
560        let req = self
561            .client
562            .post("api/chat")?
563            .body(body)
564            .map_err(http_client::Error::from)?;
565
566        let async_block = async move {
567            let response = self.client.send::<_, Bytes>(req).await?;
568            let status = response.status();
569            let response_body = response.into_body().into_future().await?.to_vec();
570
571            if !status.is_success() {
572                return Err(CompletionError::ProviderError(
573                    String::from_utf8_lossy(&response_body).to_string(),
574                ));
575            }
576
577            let response: CompletionResponse = serde_json::from_slice(&response_body)?;
578            let span = tracing::Span::current();
579            span.record("gen_ai.response.model_name", &response.model);
580            span.record(
581                "gen_ai.usage.input_tokens",
582                response.prompt_eval_count.unwrap_or_default(),
583            );
584            span.record(
585                "gen_ai.usage.output_tokens",
586                response.eval_count.unwrap_or_default(),
587            );
588
589            if tracing::enabled!(tracing::Level::TRACE) {
590                tracing::trace!(target: "rig::completions",
591                    "Ollama completion response: {}",
592                    serde_json::to_string_pretty(&response)?
593                );
594            }
595
596            let response: completion::CompletionResponse<CompletionResponse> =
597                response.try_into()?;
598
599            Ok(response)
600        };
601
602        tracing::Instrument::instrument(async_block, span).await
603    }
604
605    async fn stream(
606        &self,
607        request: CompletionRequest,
608    ) -> Result<streaming::StreamingCompletionResponse<Self::StreamingResponse>, CompletionError>
609    {
610        let span = if tracing::Span::current().is_disabled() {
611            info_span!(
612                target: "rig::completions",
613                "chat_streaming",
614                gen_ai.operation.name = "chat_streaming",
615                gen_ai.provider.name = "ollama",
616                gen_ai.request.model = self.model,
617                gen_ai.system_instructions = tracing::field::Empty,
618                gen_ai.response.id = tracing::field::Empty,
619                gen_ai.response.model = self.model,
620                gen_ai.usage.output_tokens = tracing::field::Empty,
621                gen_ai.usage.input_tokens = tracing::field::Empty,
622                gen_ai.usage.cached_tokens = tracing::field::Empty,
623            )
624        } else {
625            tracing::Span::current()
626        };
627
628        span.record("gen_ai.system_instructions", &request.preamble);
629
630        let mut request = OllamaCompletionRequest::try_from((self.model.as_ref(), request))?;
631        request.stream = true;
632
633        if tracing::enabled!(tracing::Level::TRACE) {
634            tracing::trace!(target: "rig::completions",
635                "Ollama streaming completion request: {}",
636                serde_json::to_string_pretty(&request)?
637            );
638        }
639
640        let body = serde_json::to_vec(&request)?;
641
642        let req = self
643            .client
644            .post("api/chat")?
645            .body(body)
646            .map_err(http_client::Error::from)?;
647
648        let response = self.client.send_streaming(req).await?;
649        let status = response.status();
650        let mut byte_stream = response.into_body();
651
652        if !status.is_success() {
653            return Err(CompletionError::ProviderError(format!(
654                "Got error status code trying to send a request to Ollama: {status}"
655            )));
656        }
657
658        let stream = try_stream! {
659            let span = tracing::Span::current();
660            let mut tool_calls_final = Vec::new();
661            let mut text_response = String::new();
662            let mut thinking_response = String::new();
663
664            while let Some(chunk) = byte_stream.next().await {
665                let bytes = chunk.map_err(|e| http_client::Error::Instance(e.into()))?;
666
667                for line in bytes.split(|&b| b == b'\n') {
668                    if line.is_empty() {
669                        continue;
670                    }
671
672                    tracing::debug!(target: "rig", "Received NDJSON line from Ollama: {}", String::from_utf8_lossy(line));
673
674                    let response: CompletionResponse = serde_json::from_slice(line)?;
675
676                    if let Message::Assistant { content, thinking, tool_calls, .. } = response.message {
677                        if let Some(thinking_content) = thinking && !thinking_content.is_empty() {
678                            thinking_response += &thinking_content;
679                            yield RawStreamingChoice::ReasoningDelta {
680                                id: None,
681                                reasoning: thinking_content,
682                            };
683                        }
684
685                        if !content.is_empty() {
686                            text_response += &content;
687                            yield RawStreamingChoice::Message(content);
688                        }
689
690                        for tool_call in tool_calls {
691                            tool_calls_final.push(tool_call.clone());
692                            yield RawStreamingChoice::ToolCall(
693                                crate::streaming::RawStreamingToolCall::new(String::new(), tool_call.function.name, tool_call.function.arguments)
694                            );
695                        }
696                    }
697
698                    if response.done {
699                        span.record("gen_ai.usage.input_tokens", response.prompt_eval_count);
700                        span.record("gen_ai.usage.output_tokens", response.eval_count);
701                        let message = Message::Assistant {
702                            content: text_response.clone(),
703                            thinking: if thinking_response.is_empty() { None } else { Some(thinking_response.clone()) },
704                            images: None,
705                            name: None,
706                            tool_calls: tool_calls_final.clone()
707                        };
708                        span.record("gen_ai.output.messages", serde_json::to_string(&vec![message]).unwrap());
709                        yield RawStreamingChoice::FinalResponse(
710                            StreamingCompletionResponse {
711                                total_duration: response.total_duration,
712                                load_duration: response.load_duration,
713                                prompt_eval_count: response.prompt_eval_count,
714                                prompt_eval_duration: response.prompt_eval_duration,
715                                eval_count: response.eval_count,
716                                eval_duration: response.eval_duration,
717                                done_reason: response.done_reason,
718                            }
719                        );
720                        break;
721                    }
722                }
723            }
724        }.instrument(span);
725
726        Ok(streaming::StreamingCompletionResponse::stream(Box::pin(
727            stream,
728        )))
729    }
730}
731
732// ---------- Model Listing  ----------
733
734#[derive(Debug, Deserialize)]
735struct ListModelsResponse {
736    models: Vec<ListModelEntry>,
737}
738
739#[derive(Debug, Deserialize)]
740struct ListModelEntry {
741    name: String,
742    model: String,
743}
744
745impl From<ListModelEntry> for Model {
746    fn from(value: ListModelEntry) -> Self {
747        Model::new(value.model, value.name)
748    }
749}
750
751/// [`ModelLister`] implementation for the Ollama API (`GET /api/tags`).
752#[derive(Clone)]
753pub struct OllamaModelLister<H = reqwest::Client> {
754    client: Client<H>,
755}
756
757impl<H> ModelLister<H> for OllamaModelLister<H>
758where
759    H: HttpClientExt + Send + Sync + 'static,
760{
761    type Client = Client<H>;
762
763    fn new(client: Self::Client) -> Self {
764        Self { client }
765    }
766
767    async fn list_all(&self) -> Result<ModelList, ModelListingError> {
768        let path = "/api/tags";
769        let req = self.client.get(path)?.body(http_client::NoBody)?;
770        let response = self.client.send::<_, Vec<u8>>(req).await?;
771
772        if !response.status().is_success() {
773            let status_code = response.status().as_u16();
774            let body = response.into_body().await?;
775            return Err(ModelListingError::api_error_with_context(
776                "Ollama",
777                path,
778                status_code,
779                &body,
780            ));
781        }
782
783        let body = response.into_body().await?;
784        let api_resp: ListModelsResponse = serde_json::from_slice(&body).map_err(|error| {
785            ModelListingError::parse_error_with_context("Ollama", path, &error, &body)
786        })?;
787        let models = api_resp.models.into_iter().map(Model::from).collect();
788
789        Ok(ModelList::new(models))
790    }
791}
792
793// ---------- Tool Definition Conversion ----------
794
795/// Ollama-required tool definition format.
796#[derive(Clone, Debug, Deserialize, Serialize)]
797pub struct ToolDefinition {
798    #[serde(rename = "type")]
799    pub type_field: String, // Fixed as "function"
800    pub function: completion::ToolDefinition,
801}
802
803/// Convert internal ToolDefinition (from the completion module) into Ollama's tool definition.
804impl From<crate::completion::ToolDefinition> for ToolDefinition {
805    fn from(tool: crate::completion::ToolDefinition) -> Self {
806        ToolDefinition {
807            type_field: "function".to_owned(),
808            function: completion::ToolDefinition {
809                name: tool.name,
810                description: tool.description,
811                parameters: tool.parameters,
812            },
813        }
814    }
815}
816
817#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
818pub struct ToolCall {
819    #[serde(default, rename = "type")]
820    pub r#type: ToolType,
821    pub function: Function,
822}
823#[derive(Default, Debug, Serialize, Deserialize, PartialEq, Clone)]
824#[serde(rename_all = "lowercase")]
825pub enum ToolType {
826    #[default]
827    Function,
828}
829#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
830pub struct Function {
831    pub name: String,
832    pub arguments: Value,
833}
834
835// ---------- Provider Message Definition ----------
836
837#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
838#[serde(tag = "role", rename_all = "lowercase")]
839pub enum Message {
840    User {
841        content: String,
842        #[serde(skip_serializing_if = "Option::is_none")]
843        images: Option<Vec<String>>,
844        #[serde(skip_serializing_if = "Option::is_none")]
845        name: Option<String>,
846    },
847    Assistant {
848        #[serde(default)]
849        content: String,
850        #[serde(skip_serializing_if = "Option::is_none")]
851        thinking: Option<String>,
852        #[serde(skip_serializing_if = "Option::is_none")]
853        images: Option<Vec<String>>,
854        #[serde(skip_serializing_if = "Option::is_none")]
855        name: Option<String>,
856        #[serde(default, deserialize_with = "json_utils::null_or_vec")]
857        tool_calls: Vec<ToolCall>,
858    },
859    System {
860        content: String,
861        #[serde(skip_serializing_if = "Option::is_none")]
862        images: Option<Vec<String>>,
863        #[serde(skip_serializing_if = "Option::is_none")]
864        name: Option<String>,
865    },
866    #[serde(rename = "tool")]
867    ToolResult {
868        #[serde(rename = "tool_name")]
869        name: String,
870        content: String,
871    },
872}
873
874/// -----------------------------
875/// Provider Message Conversions
876/// -----------------------------
877/// Conversion from an internal Rig message (crate::message::Message) to a provider Message.
878/// (Only User and Assistant variants are supported.)
879impl TryFrom<crate::message::Message> for Vec<Message> {
880    type Error = crate::message::MessageError;
881    fn try_from(internal_msg: crate::message::Message) -> Result<Self, Self::Error> {
882        use crate::message::Message as InternalMessage;
883        match internal_msg {
884            InternalMessage::System { content } => Ok(vec![Message::System {
885                content,
886                images: None,
887                name: None,
888            }]),
889            InternalMessage::User { content, .. } => {
890                let (tool_results, other_content): (Vec<_>, Vec<_>) =
891                    content.into_iter().partition(|content| {
892                        matches!(content, crate::message::UserContent::ToolResult(_))
893                    });
894
895                if !tool_results.is_empty() {
896                    tool_results
897                        .into_iter()
898                        .map(|content| match content {
899                            crate::message::UserContent::ToolResult(
900                                crate::message::ToolResult { id, content, .. },
901                            ) => {
902                                // Ollama expects a single string for tool results, so we concatenate
903                                let content_string = content
904                                    .into_iter()
905                                    .map(|content| match content {
906                                        crate::message::ToolResultContent::Text(text) => text.text,
907                                        _ => "[Non-text content]".to_string(),
908                                    })
909                                    .collect::<Vec<_>>()
910                                    .join("\n");
911
912                                Ok::<_, crate::message::MessageError>(Message::ToolResult {
913                                    name: id,
914                                    content: content_string,
915                                })
916                            }
917                            _ => unreachable!(),
918                        })
919                        .collect::<Result<Vec<_>, _>>()
920                } else {
921                    // Ollama requires separate text content and images array
922                    let (texts, images) = other_content.into_iter().fold(
923                        (Vec::new(), Vec::new()),
924                        |(mut texts, mut images), content| {
925                            match content {
926                                crate::message::UserContent::Text(crate::message::Text {
927                                    text,
928                                }) => texts.push(text),
929                                crate::message::UserContent::Image(crate::message::Image {
930                                    data: DocumentSourceKind::Base64(data),
931                                    ..
932                                }) => images.push(data),
933                                crate::message::UserContent::Document(
934                                    crate::message::Document {
935                                        data:
936                                            DocumentSourceKind::Base64(data)
937                                            | DocumentSourceKind::String(data),
938                                        ..
939                                    },
940                                ) => texts.push(data),
941                                _ => {} // Audio not supported by Ollama
942                            }
943                            (texts, images)
944                        },
945                    );
946
947                    Ok(vec![Message::User {
948                        content: texts.join(" "),
949                        images: if images.is_empty() {
950                            None
951                        } else {
952                            Some(
953                                images
954                                    .into_iter()
955                                    .map(|x| x.to_string())
956                                    .collect::<Vec<String>>(),
957                            )
958                        },
959                        name: None,
960                    }])
961                }
962            }
963            InternalMessage::Assistant { content, .. } => {
964                let mut thinking: Option<String> = None;
965                let mut text_content = Vec::new();
966                let mut tool_calls = Vec::new();
967
968                for content in content.into_iter() {
969                    match content {
970                        crate::message::AssistantContent::Text(text) => {
971                            text_content.push(text.text)
972                        }
973                        crate::message::AssistantContent::ToolCall(tool_call) => {
974                            tool_calls.push(tool_call)
975                        }
976                        crate::message::AssistantContent::Reasoning(reasoning) => {
977                            let display = reasoning.display_text();
978                            if !display.is_empty() {
979                                thinking = Some(display);
980                            }
981                        }
982                        crate::message::AssistantContent::Image(_) => {
983                            return Err(crate::message::MessageError::ConversionError(
984                                "Ollama currently doesn't support images.".into(),
985                            ));
986                        }
987                    }
988                }
989
990                // `OneOrMany` ensures at least one `AssistantContent::Text` or `ToolCall` exists,
991                //  so either `content` or `tool_calls` will have some content.
992                Ok(vec![Message::Assistant {
993                    content: text_content.join(" "),
994                    thinking,
995                    images: None,
996                    name: None,
997                    tool_calls: tool_calls
998                        .into_iter()
999                        .map(|tool_call| tool_call.into())
1000                        .collect::<Vec<_>>(),
1001                }])
1002            }
1003        }
1004    }
1005}
1006
1007/// Conversion from provider Message to a completion message.
1008/// This is needed so that responses can be converted back into chat history.
1009impl From<Message> for crate::completion::Message {
1010    fn from(msg: Message) -> Self {
1011        match msg {
1012            Message::User { content, .. } => crate::completion::Message::User {
1013                content: OneOrMany::one(crate::completion::message::UserContent::Text(Text {
1014                    text: content,
1015                })),
1016            },
1017            Message::Assistant {
1018                content,
1019                tool_calls,
1020                ..
1021            } => {
1022                let mut assistant_contents =
1023                    vec![crate::completion::message::AssistantContent::Text(Text {
1024                        text: content,
1025                    })];
1026                for tc in tool_calls {
1027                    assistant_contents.push(
1028                        crate::completion::message::AssistantContent::tool_call(
1029                            tc.function.name.clone(),
1030                            tc.function.name,
1031                            tc.function.arguments,
1032                        ),
1033                    );
1034                }
1035                crate::completion::Message::Assistant {
1036                    id: None,
1037                    content: OneOrMany::many(assistant_contents).unwrap(),
1038                }
1039            }
1040            // System and ToolResult are converted to User message as needed.
1041            Message::System { content, .. } => crate::completion::Message::User {
1042                content: OneOrMany::one(crate::completion::message::UserContent::Text(Text {
1043                    text: content,
1044                })),
1045            },
1046            Message::ToolResult { name, content } => crate::completion::Message::User {
1047                content: OneOrMany::one(message::UserContent::tool_result(
1048                    name,
1049                    OneOrMany::one(message::ToolResultContent::text(content)),
1050                )),
1051            },
1052        }
1053    }
1054}
1055
1056impl Message {
1057    /// Constructs a system message.
1058    pub fn system(content: &str) -> Self {
1059        Message::System {
1060            content: content.to_owned(),
1061            images: None,
1062            name: None,
1063        }
1064    }
1065}
1066
1067// ---------- Additional Message Types ----------
1068
1069impl From<crate::message::ToolCall> for ToolCall {
1070    fn from(tool_call: crate::message::ToolCall) -> Self {
1071        Self {
1072            r#type: ToolType::Function,
1073            function: Function {
1074                name: tool_call.function.name,
1075                arguments: tool_call.function.arguments,
1076            },
1077        }
1078    }
1079}
1080
1081#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1082pub struct SystemContent {
1083    #[serde(default)]
1084    r#type: SystemContentType,
1085    text: String,
1086}
1087
1088#[derive(Default, Debug, Serialize, Deserialize, PartialEq, Clone)]
1089#[serde(rename_all = "lowercase")]
1090pub enum SystemContentType {
1091    #[default]
1092    Text,
1093}
1094
1095impl From<String> for SystemContent {
1096    fn from(s: String) -> Self {
1097        SystemContent {
1098            r#type: SystemContentType::default(),
1099            text: s,
1100        }
1101    }
1102}
1103
1104impl FromStr for SystemContent {
1105    type Err = std::convert::Infallible;
1106    fn from_str(s: &str) -> Result<Self, Self::Err> {
1107        Ok(SystemContent {
1108            r#type: SystemContentType::default(),
1109            text: s.to_string(),
1110        })
1111    }
1112}
1113
1114#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1115pub struct AssistantContent {
1116    pub text: String,
1117}
1118
1119impl FromStr for AssistantContent {
1120    type Err = std::convert::Infallible;
1121    fn from_str(s: &str) -> Result<Self, Self::Err> {
1122        Ok(AssistantContent { text: s.to_owned() })
1123    }
1124}
1125
1126#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1127#[serde(tag = "type", rename_all = "lowercase")]
1128pub enum UserContent {
1129    Text { text: String },
1130    Image { image_url: ImageUrl },
1131    // Audio variant removed as Ollama API does not support audio input.
1132}
1133
1134impl FromStr for UserContent {
1135    type Err = std::convert::Infallible;
1136    fn from_str(s: &str) -> Result<Self, Self::Err> {
1137        Ok(UserContent::Text { text: s.to_owned() })
1138    }
1139}
1140
1141#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1142pub struct ImageUrl {
1143    pub url: String,
1144    #[serde(default)]
1145    pub detail: ImageDetail,
1146}
1147
1148// =================================================================
1149// Tests
1150// =================================================================
1151
1152#[cfg(test)]
1153mod tests {
1154    use super::*;
1155    use serde_json::json;
1156
1157    // Test deserialization and conversion for the /api/chat endpoint.
1158    #[tokio::test]
1159    async fn test_chat_completion() {
1160        // Sample JSON response from /api/chat (non-streaming) based on Ollama docs.
1161        let sample_chat_response = json!({
1162            "model": "llama3.2",
1163            "created_at": "2023-08-04T19:22:45.499127Z",
1164            "message": {
1165                "role": "assistant",
1166                "content": "The sky is blue because of Rayleigh scattering.",
1167                "images": null,
1168                "tool_calls": [
1169                    {
1170                        "type": "function",
1171                        "function": {
1172                            "name": "get_current_weather",
1173                            "arguments": {
1174                                "location": "San Francisco, CA",
1175                                "format": "celsius"
1176                            }
1177                        }
1178                    }
1179                ]
1180            },
1181            "done": true,
1182            "total_duration": 8000000000u64,
1183            "load_duration": 6000000u64,
1184            "prompt_eval_count": 61u64,
1185            "prompt_eval_duration": 400000000u64,
1186            "eval_count": 468u64,
1187            "eval_duration": 7700000000u64
1188        });
1189        let sample_text = sample_chat_response.to_string();
1190
1191        let chat_resp: CompletionResponse =
1192            serde_json::from_str(&sample_text).expect("Invalid JSON structure");
1193        let conv: completion::CompletionResponse<CompletionResponse> =
1194            chat_resp.try_into().unwrap();
1195        assert!(
1196            !conv.choice.is_empty(),
1197            "Expected non-empty choice in chat response"
1198        );
1199    }
1200
1201    // Test conversion from provider Message to completion Message.
1202    #[test]
1203    fn test_message_conversion() {
1204        // Construct a provider Message (User variant with String content).
1205        let provider_msg = Message::User {
1206            content: "Test message".to_owned(),
1207            images: None,
1208            name: None,
1209        };
1210        // Convert it into a completion::Message.
1211        let comp_msg: crate::completion::Message = provider_msg.into();
1212        match comp_msg {
1213            crate::completion::Message::User { content } => {
1214                // Assume OneOrMany<T> has a method first() to access the first element.
1215                let first_content = content.first();
1216                // The expected type is crate::completion::message::UserContent::Text wrapping a Text struct.
1217                match first_content {
1218                    crate::completion::message::UserContent::Text(text_struct) => {
1219                        assert_eq!(text_struct.text, "Test message");
1220                    }
1221                    _ => panic!("Expected text content in conversion"),
1222                }
1223            }
1224            _ => panic!("Conversion from provider Message to completion Message failed"),
1225        }
1226    }
1227
1228    // Test conversion of internal tool definition to Ollama's ToolDefinition format.
1229    #[test]
1230    fn test_tool_definition_conversion() {
1231        // Internal tool definition from the completion module.
1232        let internal_tool = crate::completion::ToolDefinition {
1233            name: "get_current_weather".to_owned(),
1234            description: "Get the current weather for a location".to_owned(),
1235            parameters: json!({
1236                "type": "object",
1237                "properties": {
1238                    "location": {
1239                        "type": "string",
1240                        "description": "The location to get the weather for, e.g. San Francisco, CA"
1241                    },
1242                    "format": {
1243                        "type": "string",
1244                        "description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
1245                        "enum": ["celsius", "fahrenheit"]
1246                    }
1247                },
1248                "required": ["location", "format"]
1249            }),
1250        };
1251        // Convert internal tool to Ollama's tool definition.
1252        let ollama_tool: ToolDefinition = internal_tool.into();
1253        assert_eq!(ollama_tool.type_field, "function");
1254        assert_eq!(ollama_tool.function.name, "get_current_weather");
1255        assert_eq!(
1256            ollama_tool.function.description,
1257            "Get the current weather for a location"
1258        );
1259        // Check JSON fields in parameters.
1260        let params = &ollama_tool.function.parameters;
1261        assert_eq!(params["properties"]["location"]["type"], "string");
1262    }
1263
1264    // Test deserialization of chat response with thinking content
1265    #[tokio::test]
1266    async fn test_chat_completion_with_thinking() {
1267        let sample_response = json!({
1268            "model": "qwen-thinking",
1269            "created_at": "2023-08-04T19:22:45.499127Z",
1270            "message": {
1271                "role": "assistant",
1272                "content": "The answer is 42.",
1273                "thinking": "Let me think about this carefully. The question asks for the meaning of life...",
1274                "images": null,
1275                "tool_calls": []
1276            },
1277            "done": true,
1278            "total_duration": 8000000000u64,
1279            "load_duration": 6000000u64,
1280            "prompt_eval_count": 61u64,
1281            "prompt_eval_duration": 400000000u64,
1282            "eval_count": 468u64,
1283            "eval_duration": 7700000000u64
1284        });
1285
1286        let chat_resp: CompletionResponse =
1287            serde_json::from_value(sample_response).expect("Failed to deserialize");
1288
1289        // Verify thinking field is present
1290        if let Message::Assistant {
1291            thinking, content, ..
1292        } = &chat_resp.message
1293        {
1294            assert_eq!(
1295                thinking.as_ref().unwrap(),
1296                "Let me think about this carefully. The question asks for the meaning of life..."
1297            );
1298            assert_eq!(content, "The answer is 42.");
1299        } else {
1300            panic!("Expected Assistant message");
1301        }
1302    }
1303
1304    // Test deserialization of chat response without thinking content
1305    #[tokio::test]
1306    async fn test_chat_completion_without_thinking() {
1307        let sample_response = json!({
1308            "model": "llama3.2",
1309            "created_at": "2023-08-04T19:22:45.499127Z",
1310            "message": {
1311                "role": "assistant",
1312                "content": "Hello!",
1313                "images": null,
1314                "tool_calls": []
1315            },
1316            "done": true,
1317            "total_duration": 8000000000u64,
1318            "load_duration": 6000000u64,
1319            "prompt_eval_count": 10u64,
1320            "prompt_eval_duration": 400000000u64,
1321            "eval_count": 5u64,
1322            "eval_duration": 7700000000u64
1323        });
1324
1325        let chat_resp: CompletionResponse =
1326            serde_json::from_value(sample_response).expect("Failed to deserialize");
1327
1328        // Verify thinking field is None when not provided
1329        if let Message::Assistant {
1330            thinking, content, ..
1331        } = &chat_resp.message
1332        {
1333            assert!(thinking.is_none());
1334            assert_eq!(content, "Hello!");
1335        } else {
1336            panic!("Expected Assistant message");
1337        }
1338    }
1339
1340    // Test deserialization of streaming response with thinking content
1341    #[test]
1342    fn test_streaming_response_with_thinking() {
1343        let sample_chunk = json!({
1344            "model": "qwen-thinking",
1345            "created_at": "2023-08-04T19:22:45.499127Z",
1346            "message": {
1347                "role": "assistant",
1348                "content": "",
1349                "thinking": "Analyzing the problem...",
1350                "images": null,
1351                "tool_calls": []
1352            },
1353            "done": false
1354        });
1355
1356        let chunk: CompletionResponse =
1357            serde_json::from_value(sample_chunk).expect("Failed to deserialize");
1358
1359        if let Message::Assistant {
1360            thinking, content, ..
1361        } = &chunk.message
1362        {
1363            assert_eq!(thinking.as_ref().unwrap(), "Analyzing the problem...");
1364            assert_eq!(content, "");
1365        } else {
1366            panic!("Expected Assistant message");
1367        }
1368    }
1369
1370    // Test message conversion with thinking content
1371    #[test]
1372    fn test_message_conversion_with_thinking() {
1373        // Create an internal message with reasoning content
1374        let reasoning_content = crate::message::Reasoning::new("Step 1: Consider the problem");
1375
1376        let internal_msg = crate::message::Message::Assistant {
1377            id: None,
1378            content: crate::OneOrMany::many(vec![
1379                crate::message::AssistantContent::Reasoning(reasoning_content),
1380                crate::message::AssistantContent::Text(crate::message::Text {
1381                    text: "The answer is X".to_string(),
1382                }),
1383            ])
1384            .unwrap(),
1385        };
1386
1387        // Convert to provider Message
1388        let provider_msgs: Vec<Message> = internal_msg.try_into().unwrap();
1389        assert_eq!(provider_msgs.len(), 1);
1390
1391        if let Message::Assistant {
1392            thinking, content, ..
1393        } = &provider_msgs[0]
1394        {
1395            assert_eq!(thinking.as_ref().unwrap(), "Step 1: Consider the problem");
1396            assert_eq!(content, "The answer is X");
1397        } else {
1398            panic!("Expected Assistant message with thinking");
1399        }
1400    }
1401
1402    // Test empty thinking content is handled correctly
1403    #[test]
1404    fn test_empty_thinking_content() {
1405        let sample_response = json!({
1406            "model": "llama3.2",
1407            "created_at": "2023-08-04T19:22:45.499127Z",
1408            "message": {
1409                "role": "assistant",
1410                "content": "Response",
1411                "thinking": "",
1412                "images": null,
1413                "tool_calls": []
1414            },
1415            "done": true,
1416            "total_duration": 8000000000u64,
1417            "load_duration": 6000000u64,
1418            "prompt_eval_count": 10u64,
1419            "prompt_eval_duration": 400000000u64,
1420            "eval_count": 5u64,
1421            "eval_duration": 7700000000u64
1422        });
1423
1424        let chat_resp: CompletionResponse =
1425            serde_json::from_value(sample_response).expect("Failed to deserialize");
1426
1427        if let Message::Assistant {
1428            thinking, content, ..
1429        } = &chat_resp.message
1430        {
1431            // Empty string should still deserialize as Some("")
1432            assert_eq!(thinking.as_ref().unwrap(), "");
1433            assert_eq!(content, "Response");
1434        } else {
1435            panic!("Expected Assistant message");
1436        }
1437    }
1438
1439    // Test thinking with tool calls
1440    #[test]
1441    fn test_thinking_with_tool_calls() {
1442        let sample_response = json!({
1443            "model": "qwen-thinking",
1444            "created_at": "2023-08-04T19:22:45.499127Z",
1445            "message": {
1446                "role": "assistant",
1447                "content": "Let me check the weather.",
1448                "thinking": "User wants weather info, I should use the weather tool",
1449                "images": null,
1450                "tool_calls": [
1451                    {
1452                        "type": "function",
1453                        "function": {
1454                            "name": "get_weather",
1455                            "arguments": {
1456                                "location": "San Francisco"
1457                            }
1458                        }
1459                    }
1460                ]
1461            },
1462            "done": true,
1463            "total_duration": 8000000000u64,
1464            "load_duration": 6000000u64,
1465            "prompt_eval_count": 30u64,
1466            "prompt_eval_duration": 400000000u64,
1467            "eval_count": 50u64,
1468            "eval_duration": 7700000000u64
1469        });
1470
1471        let chat_resp: CompletionResponse =
1472            serde_json::from_value(sample_response).expect("Failed to deserialize");
1473
1474        if let Message::Assistant {
1475            thinking,
1476            content,
1477            tool_calls,
1478            ..
1479        } = &chat_resp.message
1480        {
1481            assert_eq!(
1482                thinking.as_ref().unwrap(),
1483                "User wants weather info, I should use the weather tool"
1484            );
1485            assert_eq!(content, "Let me check the weather.");
1486            assert_eq!(tool_calls.len(), 1);
1487            assert_eq!(tool_calls[0].function.name, "get_weather");
1488        } else {
1489            panic!("Expected Assistant message with thinking and tool calls");
1490        }
1491    }
1492
1493    // Test that `think` and `keep_alive` are extracted as top-level params, not in `options`
1494    #[test]
1495    fn test_completion_request_with_think_param() {
1496        use crate::OneOrMany;
1497        use crate::completion::Message as CompletionMessage;
1498        use crate::message::{Text, UserContent};
1499
1500        // Create a CompletionRequest with "think": true, "keep_alive", and "num_ctx" in additional_params
1501        let completion_request = CompletionRequest {
1502            model: None,
1503            preamble: Some("You are a helpful assistant.".to_string()),
1504            chat_history: OneOrMany::one(CompletionMessage::User {
1505                content: OneOrMany::one(UserContent::Text(Text {
1506                    text: "What is 2 + 2?".to_string(),
1507                })),
1508            }),
1509            documents: vec![],
1510            tools: vec![],
1511            temperature: Some(0.7),
1512            max_tokens: Some(1024),
1513            tool_choice: None,
1514            additional_params: Some(json!({
1515                "think": true,
1516                "keep_alive": "-1m",
1517                "num_ctx": 4096
1518            })),
1519            output_schema: None,
1520        };
1521
1522        // Convert to OllamaCompletionRequest
1523        let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1524            .expect("Failed to create Ollama request");
1525
1526        // Serialize to JSON
1527        let serialized =
1528            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1529
1530        // Assert equality with expected JSON
1531        // - "tools" is skipped when empty (skip_serializing_if)
1532        // - "think" should be a top-level boolean, NOT in options
1533        // - "keep_alive" should be a top-level string, NOT in options
1534        // - "num_ctx" should be in options (it's a model parameter)
1535        let expected = json!({
1536            "model": "qwen3:8b",
1537            "messages": [
1538                {
1539                    "role": "system",
1540                    "content": "You are a helpful assistant."
1541                },
1542                {
1543                    "role": "user",
1544                    "content": "What is 2 + 2?"
1545                }
1546            ],
1547            "temperature": 0.7,
1548            "stream": false,
1549            "think": true,
1550            "max_tokens": 1024,
1551            "keep_alive": "-1m",
1552            "options": {
1553                "temperature": 0.7,
1554                "num_ctx": 4096
1555            }
1556        });
1557
1558        assert_eq!(serialized, expected);
1559    }
1560
1561    // Test that `think` defaults to false when not specified
1562    #[test]
1563    fn test_completion_request_with_think_false_default() {
1564        use crate::OneOrMany;
1565        use crate::completion::Message as CompletionMessage;
1566        use crate::message::{Text, UserContent};
1567
1568        // Create a CompletionRequest WITHOUT "think" in additional_params
1569        let completion_request = CompletionRequest {
1570            model: None,
1571            preamble: Some("You are a helpful assistant.".to_string()),
1572            chat_history: OneOrMany::one(CompletionMessage::User {
1573                content: OneOrMany::one(UserContent::Text(Text {
1574                    text: "Hello!".to_string(),
1575                })),
1576            }),
1577            documents: vec![],
1578            tools: vec![],
1579            temperature: Some(0.5),
1580            max_tokens: None,
1581            tool_choice: None,
1582            additional_params: None,
1583            output_schema: None,
1584        };
1585
1586        // Convert to OllamaCompletionRequest
1587        let ollama_request = OllamaCompletionRequest::try_from(("llama3.2", completion_request))
1588            .expect("Failed to create Ollama request");
1589
1590        // Serialize to JSON
1591        let serialized =
1592            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1593
1594        // Assert that "think" defaults to false and "keep_alive" is not present
1595        let expected = json!({
1596            "model": "llama3.2",
1597            "messages": [
1598                {
1599                    "role": "system",
1600                    "content": "You are a helpful assistant."
1601                },
1602                {
1603                    "role": "user",
1604                    "content": "Hello!"
1605                }
1606            ],
1607            "temperature": 0.5,
1608            "stream": false,
1609            "think": false,
1610            "options": {
1611                "temperature": 0.5
1612            }
1613        });
1614
1615        assert_eq!(serialized, expected);
1616    }
1617
1618    #[test]
1619    fn test_completion_request_with_output_schema() {
1620        use crate::OneOrMany;
1621        use crate::completion::Message as CompletionMessage;
1622        use crate::message::{Text, UserContent};
1623
1624        let schema: schemars::Schema = serde_json::from_value(json!({
1625            "type": "object",
1626            "properties": {
1627                "age": { "type": "integer" },
1628                "available": { "type": "boolean" }
1629            },
1630            "required": ["age", "available"]
1631        }))
1632        .expect("Failed to parse schema");
1633
1634        let completion_request = CompletionRequest {
1635            model: Some("llama3.1".to_string()),
1636            preamble: None,
1637            chat_history: OneOrMany::one(CompletionMessage::User {
1638                content: OneOrMany::one(UserContent::Text(Text {
1639                    text: "How old is Ollama?".to_string(),
1640                })),
1641            }),
1642            documents: vec![],
1643            tools: vec![],
1644            temperature: None,
1645            max_tokens: None,
1646            tool_choice: None,
1647            additional_params: None,
1648            output_schema: Some(schema),
1649        };
1650
1651        let ollama_request = OllamaCompletionRequest::try_from(("llama3.1", completion_request))
1652            .expect("Failed to create Ollama request");
1653
1654        let serialized =
1655            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1656
1657        let format = serialized
1658            .get("format")
1659            .expect("format field should be present");
1660        assert_eq!(
1661            *format,
1662            json!({
1663                "type": "object",
1664                "properties": {
1665                    "age": { "type": "integer" },
1666                    "available": { "type": "boolean" }
1667                },
1668                "required": ["age", "available"]
1669            })
1670        );
1671    }
1672
1673    #[test]
1674    fn test_completion_request_without_output_schema() {
1675        use crate::OneOrMany;
1676        use crate::completion::Message as CompletionMessage;
1677        use crate::message::{Text, UserContent};
1678
1679        let completion_request = CompletionRequest {
1680            model: Some("llama3.1".to_string()),
1681            preamble: None,
1682            chat_history: OneOrMany::one(CompletionMessage::User {
1683                content: OneOrMany::one(UserContent::Text(Text {
1684                    text: "Hello!".to_string(),
1685                })),
1686            }),
1687            documents: vec![],
1688            tools: vec![],
1689            temperature: None,
1690            max_tokens: None,
1691            tool_choice: None,
1692            additional_params: None,
1693            output_schema: None,
1694        };
1695
1696        let ollama_request = OllamaCompletionRequest::try_from(("llama3.1", completion_request))
1697            .expect("Failed to create Ollama request");
1698
1699        let serialized =
1700            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1701
1702        assert!(
1703            serialized.get("format").is_none(),
1704            "format field should be absent when output_schema is None"
1705        );
1706    }
1707
1708    #[test]
1709    fn test_client_initialization() {
1710        let _client = crate::providers::ollama::Client::new(Nothing).expect("Client::new() failed");
1711        let _client_from_builder = crate::providers::ollama::Client::builder()
1712            .api_key(Nothing)
1713            .build()
1714            .expect("Client::builder() failed");
1715    }
1716}