Skip to main content

rig_core/providers/
ollama.rs

1//! Ollama API client and Rig integration
2//!
3//! # Example
4//! ```rust,ignore
5//! use rig_core::client::{Nothing, CompletionClient};
6//! use rig_core::completion::Prompt;
7//! use rig_core::providers::ollama;
8//!
9//! // Create a new Ollama client (defaults to http://localhost:11434, no auth)
10//! let client = ollama::Client::new(Nothing).unwrap();
11//!
12//! // Or connect to a remote/proxied Ollama instance with authentication
13//! let client = ollama::Client::builder()
14//!     .api_key("my-secret-key")
15//!     .base_url("http://remote-ollama:11434")
16//!     .build()
17//!     .unwrap();
18//!
19//! // Create an agent with a preamble
20//! let comedian_agent = client
21//!     .agent("qwen2.5:14b")
22//!     .preamble("You are a comedian here to entertain the user using humour and jokes.")
23//!     .build();
24//!
25//! // Prompt the agent and print the response
26//! let response = comedian_agent.prompt("Entertain me!").await?;
27//! println!("{response}");
28//!
29//! // Create an embedding model using the "all-minilm" model
30//! let emb_model = client.embedding_model("all-minilm", 384);
31//! let embeddings = emb_model.embed_texts(vec![
32//!     "Why is the sky blue?".to_owned(),
33//!     "Why is the grass green?".to_owned()
34//! ]).await?;
35//! println!("Embedding response: {:?}", embeddings);
36//!
37//! // Create an extractor if needed
38//! let extractor = client.extractor::<serde_json::Value>("llama3.2").build();
39//! ```
40use crate::client::{
41    self, ApiKey, Capabilities, Capable, DebugExt, ModelLister, Nothing, Provider, ProviderBuilder,
42    ProviderClient,
43};
44use crate::completion::{GetTokenUsage, Usage};
45use crate::http_client::{self, HttpClientExt};
46use crate::message::DocumentSourceKind;
47use crate::model::{Model, ModelList, ModelListingError};
48use crate::streaming::RawStreamingChoice;
49use crate::{
50    OneOrMany,
51    completion::{self, CompletionError, CompletionRequest},
52    embeddings::{self, EmbeddingError},
53    json_utils, message,
54    message::{ImageDetail, Text},
55    streaming,
56    wasm_compat::{WasmCompatSend, WasmCompatSync},
57};
58use async_stream::try_stream;
59use bytes::Bytes;
60use futures::StreamExt;
61use serde::{Deserialize, Serialize};
62use serde_json::{Value, json};
63use std::{convert::TryFrom, str::FromStr};
64use tracing::info_span;
65use tracing_futures::Instrument;
66// ---------- Main Client ----------
67
68const OLLAMA_API_BASE_URL: &str = "http://localhost:11434";
69
70/// Optional API key for Ollama. By default Ollama requires no authentication,
71/// but proxied or secured deployments may require a Bearer token.
72#[derive(Debug, Default, Clone)]
73pub struct OllamaApiKey(Option<String>);
74
75impl ApiKey for OllamaApiKey {
76    fn into_header(
77        self,
78    ) -> Option<http_client::Result<(http::header::HeaderName, http::header::HeaderValue)>> {
79        self.0.map(http_client::make_auth_header)
80    }
81}
82
83impl From<Nothing> for OllamaApiKey {
84    fn from(_: Nothing) -> Self {
85        Self(None)
86    }
87}
88
89impl From<String> for OllamaApiKey {
90    fn from(key: String) -> Self {
91        if key.is_empty() {
92            Self(None)
93        } else {
94            Self(Some(key))
95        }
96    }
97}
98
99impl From<&str> for OllamaApiKey {
100    fn from(key: &str) -> Self {
101        if key.is_empty() {
102            Self(None)
103        } else {
104            Self(Some(key.to_owned()))
105        }
106    }
107}
108
109#[derive(Debug, Default, Clone, Copy)]
110pub struct OllamaExt;
111
112#[derive(Debug, Default, Clone, Copy)]
113pub struct OllamaBuilder;
114
115impl Provider for OllamaExt {
116    type Builder = OllamaBuilder;
117    const VERIFY_PATH: &'static str = "api/tags";
118}
119
120impl<H> Capabilities<H> for OllamaExt {
121    type Completion = Capable<CompletionModel<H>>;
122    type Transcription = Nothing;
123    type Embeddings = Capable<EmbeddingModel<H>>;
124    type ModelListing = Capable<OllamaModelLister<H>>;
125    #[cfg(feature = "image")]
126    type ImageGeneration = Nothing;
127
128    #[cfg(feature = "audio")]
129    type AudioGeneration = Nothing;
130}
131
132impl DebugExt for OllamaExt {}
133
134impl ProviderBuilder for OllamaBuilder {
135    type Extension<H>
136        = OllamaExt
137    where
138        H: HttpClientExt;
139    type ApiKey = OllamaApiKey;
140
141    const BASE_URL: &'static str = OLLAMA_API_BASE_URL;
142
143    fn build<H>(
144        _builder: &client::ClientBuilder<Self, Self::ApiKey, H>,
145    ) -> http_client::Result<Self::Extension<H>>
146    where
147        H: HttpClientExt,
148    {
149        Ok(OllamaExt)
150    }
151}
152
153pub type Client<H = reqwest::Client> = client::Client<OllamaExt, H>;
154pub type ClientBuilder<H = crate::markers::Missing> =
155    client::ClientBuilder<OllamaBuilder, OllamaApiKey, H>;
156
157impl ProviderClient for Client {
158    type Input = OllamaApiKey;
159    type Error = crate::client::ProviderClientError;
160
161    fn from_env() -> Result<Self, Self::Error> {
162        let api_base = crate::client::optional_env_var("OLLAMA_API_BASE_URL")?
163            .unwrap_or_else(|| OLLAMA_API_BASE_URL.to_string());
164
165        let api_key = crate::client::optional_env_var("OLLAMA_API_KEY")?
166            .map(OllamaApiKey::from)
167            .unwrap_or_default();
168
169        Self::builder()
170            .api_key(api_key)
171            .base_url(&api_base)
172            .build()
173            .map_err(Into::into)
174    }
175
176    fn from_val(api_key: Self::Input) -> Result<Self, Self::Error> {
177        Self::builder().api_key(api_key).build().map_err(Into::into)
178    }
179}
180
181// ---------- API Error and Response Structures ----------
182
183#[derive(Debug, Deserialize)]
184struct ApiErrorResponse {
185    message: String,
186}
187
188#[derive(Debug, Deserialize)]
189#[serde(untagged)]
190enum ApiResponse<T> {
191    Ok(T),
192    Err(ApiErrorResponse),
193}
194
195// ---------- Embedding API ----------
196
197pub const ALL_MINILM: &str = "all-minilm";
198pub const NOMIC_EMBED_TEXT: &str = "nomic-embed-text";
199
200fn model_dimensions_from_identifier(identifier: &str) -> Option<usize> {
201    match identifier {
202        ALL_MINILM => Some(384),
203        NOMIC_EMBED_TEXT => Some(768),
204        _ => None,
205    }
206}
207
208#[derive(Debug, Serialize, Deserialize)]
209pub struct EmbeddingResponse {
210    pub model: String,
211    pub embeddings: Vec<Vec<f64>>,
212    #[serde(default)]
213    pub total_duration: Option<u64>,
214    #[serde(default)]
215    pub load_duration: Option<u64>,
216    #[serde(default)]
217    pub prompt_eval_count: Option<u64>,
218}
219
220impl From<ApiErrorResponse> for EmbeddingError {
221    fn from(err: ApiErrorResponse) -> Self {
222        EmbeddingError::ProviderError(err.message)
223    }
224}
225
226impl From<ApiResponse<EmbeddingResponse>> for Result<EmbeddingResponse, EmbeddingError> {
227    fn from(value: ApiResponse<EmbeddingResponse>) -> Self {
228        match value {
229            ApiResponse::Ok(response) => Ok(response),
230            ApiResponse::Err(err) => Err(EmbeddingError::ProviderError(err.message)),
231        }
232    }
233}
234
235// ---------- Embedding Model ----------
236
237#[derive(Clone)]
238pub struct EmbeddingModel<T = reqwest::Client> {
239    client: Client<T>,
240    pub model: String,
241    ndims: usize,
242}
243
244impl<T> EmbeddingModel<T> {
245    pub fn new(client: Client<T>, model: impl Into<String>, ndims: usize) -> Self {
246        Self {
247            client,
248            model: model.into(),
249            ndims,
250        }
251    }
252
253    pub fn with_model(client: Client<T>, model: &str, ndims: usize) -> Self {
254        Self {
255            client,
256            model: model.into(),
257            ndims,
258        }
259    }
260}
261
262impl<T> embeddings::EmbeddingModel for EmbeddingModel<T>
263where
264    T: HttpClientExt + Clone + 'static,
265{
266    type Client = Client<T>;
267
268    fn make(client: &Self::Client, model: impl Into<String>, dims: Option<usize>) -> Self {
269        let model = model.into();
270        let dims = dims
271            .or(model_dimensions_from_identifier(&model))
272            .unwrap_or_default();
273        Self::new(client.clone(), model, dims)
274    }
275
276    const MAX_DOCUMENTS: usize = 1024;
277    fn ndims(&self) -> usize {
278        self.ndims
279    }
280
281    async fn embed_texts(
282        &self,
283        documents: impl IntoIterator<Item = String>,
284    ) -> Result<Vec<embeddings::Embedding>, EmbeddingError> {
285        let docs: Vec<String> = documents.into_iter().collect();
286
287        let body = serde_json::to_vec(&json!({
288            "model": self.model,
289            "input": docs
290        }))?;
291
292        let req = self
293            .client
294            .post("api/embed")?
295            .body(body)
296            .map_err(|e| EmbeddingError::HttpError(e.into()))?;
297
298        let response = self.client.send::<_, Vec<u8>>(req).await?;
299
300        if !response.status().is_success() {
301            let text = http_client::text(response).await?;
302            return Err(EmbeddingError::ProviderError(text));
303        }
304
305        let bytes: Vec<u8> = response.into_body().await?;
306
307        let api_resp: EmbeddingResponse = serde_json::from_slice(&bytes)?;
308
309        if api_resp.embeddings.len() != docs.len() {
310            return Err(EmbeddingError::ResponseError(
311                "Number of returned embeddings does not match input".into(),
312            ));
313        }
314        Ok(api_resp
315            .embeddings
316            .into_iter()
317            .zip(docs.into_iter())
318            .map(|(vec, document)| embeddings::Embedding { document, vec })
319            .collect())
320    }
321}
322
323// ---------- Completion API ----------
324
325pub const LLAMA3_2: &str = "llama3.2";
326pub const LLAVA: &str = "llava";
327pub const MISTRAL: &str = "mistral";
328
329#[derive(Debug, Serialize, Deserialize)]
330pub struct CompletionResponse {
331    pub model: String,
332    pub created_at: String,
333    pub message: Message,
334    pub done: bool,
335    #[serde(default)]
336    pub done_reason: Option<String>,
337    #[serde(default)]
338    pub total_duration: Option<u64>,
339    #[serde(default)]
340    pub load_duration: Option<u64>,
341    #[serde(default)]
342    pub prompt_eval_count: Option<u64>,
343    #[serde(default)]
344    pub prompt_eval_duration: Option<u64>,
345    #[serde(default)]
346    pub eval_count: Option<u64>,
347    #[serde(default)]
348    pub eval_duration: Option<u64>,
349}
350impl TryFrom<CompletionResponse> for completion::CompletionResponse<CompletionResponse> {
351    type Error = CompletionError;
352    fn try_from(resp: CompletionResponse) -> Result<Self, Self::Error> {
353        match resp.message {
354            // Process only if an assistant message is present.
355            Message::Assistant {
356                content,
357                thinking,
358                tool_calls,
359                ..
360            } => {
361                let mut assistant_contents = Vec::new();
362                // Add the assistant's text content if any.
363                if !content.is_empty() {
364                    assistant_contents.push(completion::AssistantContent::text(&content));
365                }
366                // Process tool_calls following Ollama's chat response definition.
367                // Each ToolCall has an id, a type, and a function field.
368                for tc in tool_calls.iter() {
369                    assistant_contents.push(completion::AssistantContent::tool_call(
370                        tc.function.name.clone(),
371                        tc.function.name.clone(),
372                        tc.function.arguments.clone(),
373                    ));
374                }
375                let choice = OneOrMany::many(assistant_contents).map_err(|_| {
376                    CompletionError::ResponseError("No content provided".to_owned())
377                })?;
378                let prompt_tokens = resp.prompt_eval_count.unwrap_or(0);
379                let completion_tokens = resp.eval_count.unwrap_or(0);
380
381                let raw_response = CompletionResponse {
382                    model: resp.model,
383                    created_at: resp.created_at,
384                    done: resp.done,
385                    done_reason: resp.done_reason,
386                    total_duration: resp.total_duration,
387                    load_duration: resp.load_duration,
388                    prompt_eval_count: resp.prompt_eval_count,
389                    prompt_eval_duration: resp.prompt_eval_duration,
390                    eval_count: resp.eval_count,
391                    eval_duration: resp.eval_duration,
392                    message: Message::Assistant {
393                        content,
394                        thinking,
395                        images: None,
396                        name: None,
397                        tool_calls,
398                    },
399                };
400
401                Ok(completion::CompletionResponse {
402                    choice,
403                    usage: Usage {
404                        input_tokens: prompt_tokens,
405                        output_tokens: completion_tokens,
406                        total_tokens: prompt_tokens + completion_tokens,
407                        cached_input_tokens: 0,
408                        cache_creation_input_tokens: 0,
409                        tool_use_prompt_tokens: 0,
410                        reasoning_tokens: 0,
411                    },
412                    raw_response,
413                    message_id: None,
414                })
415            }
416            _ => Err(CompletionError::ResponseError(
417                "Chat response does not include an assistant message".into(),
418            )),
419        }
420    }
421}
422
423#[derive(Debug, Serialize, Deserialize)]
424pub(super) struct OllamaCompletionRequest {
425    model: String,
426    pub messages: Vec<Message>,
427    #[serde(skip_serializing_if = "Option::is_none")]
428    temperature: Option<f64>,
429    #[serde(skip_serializing_if = "Vec::is_empty")]
430    tools: Vec<ToolDefinition>,
431    pub stream: bool,
432    think: Think,
433    #[serde(skip_serializing_if = "Option::is_none")]
434    max_tokens: Option<u64>,
435    #[serde(skip_serializing_if = "Option::is_none")]
436    keep_alive: Option<String>,
437    #[serde(skip_serializing_if = "Option::is_none")]
438    format: Option<schemars::Schema>,
439    options: serde_json::Value,
440}
441
442impl TryFrom<(&str, CompletionRequest)> for OllamaCompletionRequest {
443    type Error = CompletionError;
444
445    fn try_from((model, req): (&str, CompletionRequest)) -> Result<Self, Self::Error> {
446        let model = req.model.clone().unwrap_or_else(|| model.to_string());
447        if req.tool_choice.is_some() {
448            tracing::warn!("WARNING: `tool_choice` not supported for Ollama");
449        }
450        // Build up the order of messages (context, chat_history, prompt)
451        let mut partial_history = vec![];
452        if let Some(docs) = req.normalized_documents() {
453            partial_history.push(docs);
454        }
455        partial_history.extend(req.chat_history);
456
457        // Add preamble to chat history (if available)
458        let mut full_history: Vec<Message> = match &req.preamble {
459            Some(preamble) => vec![Message::system(preamble)],
460            None => vec![],
461        };
462
463        // Convert and extend the rest of the history
464        full_history.extend(
465            partial_history
466                .into_iter()
467                .map(message::Message::try_into)
468                .collect::<Result<Vec<Vec<Message>>, _>>()?
469                .into_iter()
470                .flatten()
471                .collect::<Vec<_>>(),
472        );
473
474        let mut think = Think::Bool(false);
475        let mut keep_alive: Option<String> = None;
476
477        let options = if let Some(mut extra) = req.additional_params {
478            // Extract top-level parameters that should not be in `options`
479            if let Some(obj) = extra.as_object_mut() {
480                // Extract `think` parameter
481                if let Some(think_val) = obj.remove("think") {
482                    think = match think_val {
483                        Value::Bool(think) => Think::Bool(think),
484                        Value::String(think) => Think::Level(match think.to_lowercase().as_str() {
485                            "low" => Level::Low,
486                            "medium" => Level::Medium,
487                            "high" => Level::High,
488                            _ => {
489                                return Err(CompletionError::RequestError(
490                                    "`think` must be a 'low', 'medium', 'high', or bool".into(),
491                                ));
492                            }
493                        }),
494                        _ => {
495                            return Err(CompletionError::RequestError(
496                                "`think` must be a 'low', 'medium', 'high', or bool".into(),
497                            ));
498                        }
499                    };
500                }
501
502                // Extract `keep_alive` parameter
503                if let Some(keep_alive_val) = obj.remove("keep_alive") {
504                    keep_alive = Some(
505                        keep_alive_val
506                            .as_str()
507                            .ok_or_else(|| {
508                                CompletionError::RequestError(
509                                    "`keep_alive` must be a string".into(),
510                                )
511                            })?
512                            .to_string(),
513                    );
514                }
515            }
516
517            json_utils::merge(json!({ "temperature": req.temperature }), extra)
518        } else {
519            json!({ "temperature": req.temperature })
520        };
521
522        Ok(Self {
523            model: model.to_string(),
524            messages: full_history,
525            temperature: req.temperature,
526            max_tokens: req.max_tokens,
527            stream: false,
528            think,
529            keep_alive,
530            format: req.output_schema,
531            tools: req
532                .tools
533                .clone()
534                .into_iter()
535                .map(ToolDefinition::from)
536                .collect::<Vec<_>>(),
537            options,
538        })
539    }
540}
541
542#[derive(Clone)]
543pub struct CompletionModel<T = reqwest::Client> {
544    client: Client<T>,
545    pub model: String,
546}
547
548impl<T> CompletionModel<T> {
549    pub fn new(client: Client<T>, model: &str) -> Self {
550        Self {
551            client,
552            model: model.to_owned(),
553        }
554    }
555}
556
557#[derive(Debug, Clone, Serialize, Deserialize)]
558#[serde(untagged)]
559enum Think {
560    Bool(bool),
561    Level(Level),
562}
563
564#[derive(Debug, Clone, Serialize, Deserialize)]
565#[serde(rename_all = "lowercase")]
566enum Level {
567    Low,
568    Medium,
569    High,
570}
571
572// ---------- CompletionModel Implementation ----------
573
574#[derive(Clone, Serialize, Deserialize, Debug)]
575pub struct StreamingCompletionResponse {
576    pub done_reason: Option<String>,
577    pub total_duration: Option<u64>,
578    pub load_duration: Option<u64>,
579    pub prompt_eval_count: Option<u64>,
580    pub prompt_eval_duration: Option<u64>,
581    pub eval_count: Option<u64>,
582    pub eval_duration: Option<u64>,
583}
584
585impl GetTokenUsage for StreamingCompletionResponse {
586    fn token_usage(&self) -> Option<crate::completion::Usage> {
587        let mut usage = crate::completion::Usage::new();
588        let input_tokens = self.prompt_eval_count.unwrap_or_default();
589        let output_tokens = self.eval_count.unwrap_or_default();
590        usage.input_tokens = input_tokens;
591        usage.output_tokens = output_tokens;
592        usage.total_tokens = input_tokens + output_tokens;
593
594        Some(usage)
595    }
596}
597
598/// Reassembles newline-delimited JSON lines from a chunked HTTP byte stream.
599///
600/// `bytes_stream` makes no promises about chunk boundaries, so a single NDJSON
601/// line can be split across multiple chunks. `NdjsonBuffer` holds the trailing
602/// fragment between calls and yields only fully terminated lines.
603#[derive(Default)]
604struct NdjsonBuffer {
605    buf: Vec<u8>,
606}
607
608impl NdjsonBuffer {
609    fn new() -> Self {
610        Self::default()
611    }
612
613    /// Appends `chunk` to the buffer and returns any newly completed lines.
614    /// Empty lines are skipped; trailing partial data is retained for the next call.
615    fn decode(&mut self, chunk: &[u8]) -> Vec<Vec<u8>> {
616        self.buf.extend_from_slice(chunk);
617
618        let mut lines = Vec::new();
619        while let Some(pos) = self.buf.iter().position(|&b| b == b'\n') {
620            let mut line: Vec<u8> = self.buf.drain(..=pos).collect();
621            line.pop();
622            if !line.is_empty() {
623                lines.push(line);
624            }
625        }
626        lines
627    }
628}
629
630impl<T> completion::CompletionModel for CompletionModel<T>
631where
632    T: HttpClientExt + Clone + Default + std::fmt::Debug + Send + 'static,
633{
634    type Response = CompletionResponse;
635    type StreamingResponse = StreamingCompletionResponse;
636
637    type Client = Client<T>;
638
639    fn make(client: &Self::Client, model: impl Into<String>) -> Self {
640        Self::new(client.clone(), model.into().as_str())
641    }
642
643    async fn completion(
644        &self,
645        completion_request: CompletionRequest,
646    ) -> Result<completion::CompletionResponse<Self::Response>, CompletionError> {
647        let span = if tracing::Span::current().is_disabled() {
648            info_span!(
649                target: "rig::completions",
650                "chat",
651                gen_ai.operation.name = "chat",
652                gen_ai.provider.name = "ollama",
653                gen_ai.request.model = self.model,
654                gen_ai.system_instructions = tracing::field::Empty,
655                gen_ai.response.id = tracing::field::Empty,
656                gen_ai.response.model = tracing::field::Empty,
657                gen_ai.usage.output_tokens = tracing::field::Empty,
658                gen_ai.usage.input_tokens = tracing::field::Empty,
659                gen_ai.usage.cache_read.input_tokens = tracing::field::Empty,
660            )
661        } else {
662            tracing::Span::current()
663        };
664
665        span.record("gen_ai.system_instructions", &completion_request.preamble);
666        let request = OllamaCompletionRequest::try_from((self.model.as_ref(), completion_request))?;
667
668        if tracing::enabled!(tracing::Level::TRACE) {
669            tracing::trace!(target: "rig::completions",
670                "Ollama completion request: {}",
671                serde_json::to_string_pretty(&request)?
672            );
673        }
674
675        let body = serde_json::to_vec(&request)?;
676
677        let req = self
678            .client
679            .post("api/chat")?
680            .body(body)
681            .map_err(http_client::Error::from)?;
682
683        let async_block = async move {
684            let response = self.client.send::<_, Bytes>(req).await?;
685            let status = response.status();
686            let response_body = response.into_body().into_future().await?.to_vec();
687
688            if !status.is_success() {
689                return Err(CompletionError::ProviderError(
690                    String::from_utf8_lossy(&response_body).to_string(),
691                ));
692            }
693
694            let response: CompletionResponse = serde_json::from_slice(&response_body)?;
695            let span = tracing::Span::current();
696            span.record("gen_ai.response.model", &response.model);
697            span.record(
698                "gen_ai.usage.input_tokens",
699                response.prompt_eval_count.unwrap_or_default(),
700            );
701            span.record(
702                "gen_ai.usage.output_tokens",
703                response.eval_count.unwrap_or_default(),
704            );
705
706            if tracing::enabled!(tracing::Level::TRACE) {
707                tracing::trace!(target: "rig::completions",
708                    "Ollama completion response: {}",
709                    serde_json::to_string_pretty(&response)?
710                );
711            }
712
713            let response: completion::CompletionResponse<CompletionResponse> =
714                response.try_into()?;
715
716            Ok(response)
717        };
718
719        tracing::Instrument::instrument(async_block, span).await
720    }
721
722    async fn stream(
723        &self,
724        request: CompletionRequest,
725    ) -> Result<streaming::StreamingCompletionResponse<Self::StreamingResponse>, CompletionError>
726    {
727        let span = if tracing::Span::current().is_disabled() {
728            info_span!(
729                target: "rig::completions",
730                "chat_streaming",
731                gen_ai.operation.name = "chat_streaming",
732                gen_ai.provider.name = "ollama",
733                gen_ai.request.model = self.model,
734                gen_ai.system_instructions = tracing::field::Empty,
735                gen_ai.response.id = tracing::field::Empty,
736                gen_ai.response.model = self.model,
737                gen_ai.usage.output_tokens = tracing::field::Empty,
738                gen_ai.usage.input_tokens = tracing::field::Empty,
739                gen_ai.usage.cache_read.input_tokens = tracing::field::Empty,
740            )
741        } else {
742            tracing::Span::current()
743        };
744
745        span.record("gen_ai.system_instructions", &request.preamble);
746
747        let mut request = OllamaCompletionRequest::try_from((self.model.as_ref(), request))?;
748        request.stream = true;
749
750        if tracing::enabled!(tracing::Level::TRACE) {
751            tracing::trace!(target: "rig::completions",
752                "Ollama streaming completion request: {}",
753                serde_json::to_string_pretty(&request)?
754            );
755        }
756
757        let body = serde_json::to_vec(&request)?;
758
759        let req = self
760            .client
761            .post("api/chat")?
762            .body(body)
763            .map_err(http_client::Error::from)?;
764
765        let response = self.client.send_streaming(req).await?;
766        let status = response.status();
767        let mut byte_stream = response.into_body();
768
769        if !status.is_success() {
770            return Err(CompletionError::ProviderError(format!(
771                "Got error status code trying to send a request to Ollama: {status}"
772            )));
773        }
774
775        let stream = try_stream! {
776            let span = tracing::Span::current();
777            let mut tool_calls_final = Vec::new();
778            let mut text_response = String::new();
779            let mut thinking_response = String::new();
780            let mut line_buf = NdjsonBuffer::new();
781
782            while let Some(chunk) = byte_stream.next().await {
783                let bytes = chunk.map_err(|e| http_client::Error::Instance(e.into()))?;
784
785                for line in line_buf.decode(&bytes) {
786                    tracing::debug!(target: "rig", "Received NDJSON line from Ollama: {}", String::from_utf8_lossy(&line));
787
788                    let response: CompletionResponse = serde_json::from_slice(&line)?;
789
790                    if let Message::Assistant { content, thinking, tool_calls, .. } = response.message {
791                        if let Some(thinking_content) = thinking && !thinking_content.is_empty() {
792                            thinking_response += &thinking_content;
793                            yield RawStreamingChoice::ReasoningDelta {
794                                id: None,
795                                reasoning: thinking_content,
796                            };
797                        }
798
799                        if !content.is_empty() {
800                            text_response += &content;
801                            yield RawStreamingChoice::Message(content);
802                        }
803
804                        for tool_call in tool_calls {
805                            tool_calls_final.push(tool_call.clone());
806                            yield RawStreamingChoice::ToolCall(
807                                crate::streaming::RawStreamingToolCall::new(String::new(), tool_call.function.name, tool_call.function.arguments)
808                            );
809                        }
810                    }
811
812                    if response.done {
813                        span.record("gen_ai.usage.input_tokens", response.prompt_eval_count);
814                        span.record("gen_ai.usage.output_tokens", response.eval_count);
815                        let message = Message::Assistant {
816                            content: text_response.clone(),
817                            thinking: if thinking_response.is_empty() { None } else { Some(thinking_response.clone()) },
818                            images: None,
819                            name: None,
820                            tool_calls: tool_calls_final.clone()
821                        };
822                        if let Ok(serialized_message) = serde_json::to_string(&vec![message]) {
823                            span.record("gen_ai.output.messages", serialized_message);
824                        }
825                        yield RawStreamingChoice::FinalResponse(
826                            StreamingCompletionResponse {
827                                total_duration: response.total_duration,
828                                load_duration: response.load_duration,
829                                prompt_eval_count: response.prompt_eval_count,
830                                prompt_eval_duration: response.prompt_eval_duration,
831                                eval_count: response.eval_count,
832                                eval_duration: response.eval_duration,
833                                done_reason: response.done_reason,
834                            }
835                        );
836                        break;
837                    }
838                }
839            }
840        }.instrument(span);
841
842        Ok(streaming::StreamingCompletionResponse::stream(Box::pin(
843            stream,
844        )))
845    }
846}
847
848// ---------- Model Listing  ----------
849
850#[derive(Debug, Deserialize)]
851struct ListModelsResponse {
852    models: Vec<ListModelEntry>,
853}
854
855#[derive(Debug, Deserialize)]
856struct ListModelEntry {
857    name: String,
858    model: String,
859}
860
861impl From<ListModelEntry> for Model {
862    fn from(value: ListModelEntry) -> Self {
863        Model::new(value.model, value.name)
864    }
865}
866
867/// [`ModelLister`] implementation for the Ollama API (`GET /api/tags`).
868#[derive(Clone)]
869pub struct OllamaModelLister<H = reqwest::Client> {
870    client: Client<H>,
871}
872
873impl<H> ModelLister<H> for OllamaModelLister<H>
874where
875    H: HttpClientExt + WasmCompatSend + WasmCompatSync + 'static,
876{
877    type Client = Client<H>;
878
879    fn new(client: Self::Client) -> Self {
880        Self { client }
881    }
882
883    async fn list_all(&self) -> Result<ModelList, ModelListingError> {
884        let path = "/api/tags";
885        let req = self.client.get(path)?.body(http_client::NoBody)?;
886        let response = self.client.send::<_, Vec<u8>>(req).await?;
887
888        if !response.status().is_success() {
889            let status_code = response.status().as_u16();
890            let body = response.into_body().await?;
891            return Err(ModelListingError::api_error_with_context(
892                "Ollama",
893                path,
894                status_code,
895                &body,
896            ));
897        }
898
899        let body = response.into_body().await?;
900        let api_resp: ListModelsResponse = serde_json::from_slice(&body).map_err(|error| {
901            ModelListingError::parse_error_with_context("Ollama", path, &error, &body)
902        })?;
903        let models = api_resp.models.into_iter().map(Model::from).collect();
904
905        Ok(ModelList::new(models))
906    }
907}
908
909// ---------- Tool Definition Conversion ----------
910
911/// Ollama-required tool definition format.
912#[derive(Clone, Debug, Deserialize, Serialize)]
913pub struct ToolDefinition {
914    #[serde(rename = "type")]
915    pub type_field: String, // Fixed as "function"
916    pub function: completion::ToolDefinition,
917}
918
919/// Convert internal ToolDefinition (from the completion module) into Ollama's tool definition.
920impl From<crate::completion::ToolDefinition> for ToolDefinition {
921    fn from(tool: crate::completion::ToolDefinition) -> Self {
922        ToolDefinition {
923            type_field: "function".to_owned(),
924            function: completion::ToolDefinition {
925                name: tool.name,
926                description: tool.description,
927                parameters: tool.parameters,
928            },
929        }
930    }
931}
932
933#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
934pub struct ToolCall {
935    #[serde(default, rename = "type")]
936    pub r#type: ToolType,
937    pub function: Function,
938}
939#[derive(Default, Debug, Serialize, Deserialize, PartialEq, Clone)]
940#[serde(rename_all = "lowercase")]
941pub enum ToolType {
942    #[default]
943    Function,
944}
945#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
946pub struct Function {
947    pub name: String,
948    pub arguments: Value,
949}
950
951// ---------- Provider Message Definition ----------
952
953#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
954#[serde(tag = "role", rename_all = "lowercase")]
955pub enum Message {
956    User {
957        content: String,
958        #[serde(skip_serializing_if = "Option::is_none")]
959        images: Option<Vec<String>>,
960        #[serde(skip_serializing_if = "Option::is_none")]
961        name: Option<String>,
962    },
963    Assistant {
964        #[serde(default)]
965        content: String,
966        #[serde(skip_serializing_if = "Option::is_none")]
967        thinking: Option<String>,
968        #[serde(skip_serializing_if = "Option::is_none")]
969        images: Option<Vec<String>>,
970        #[serde(skip_serializing_if = "Option::is_none")]
971        name: Option<String>,
972        #[serde(default, deserialize_with = "json_utils::null_or_vec")]
973        tool_calls: Vec<ToolCall>,
974    },
975    System {
976        content: String,
977        #[serde(skip_serializing_if = "Option::is_none")]
978        images: Option<Vec<String>>,
979        #[serde(skip_serializing_if = "Option::is_none")]
980        name: Option<String>,
981    },
982    #[serde(rename = "tool")]
983    ToolResult {
984        #[serde(rename = "tool_name")]
985        name: String,
986        content: String,
987    },
988}
989
990/// -----------------------------
991/// Provider Message Conversions
992/// -----------------------------
993/// Conversion from an internal Rig message (crate::message::Message) to a provider Message.
994/// (Only User and Assistant variants are supported.)
995impl TryFrom<crate::message::Message> for Vec<Message> {
996    type Error = crate::message::MessageError;
997    fn try_from(internal_msg: crate::message::Message) -> Result<Self, Self::Error> {
998        use crate::message::Message as InternalMessage;
999        match internal_msg {
1000            InternalMessage::System { content } => Ok(vec![Message::System {
1001                content,
1002                images: None,
1003                name: None,
1004            }]),
1005            InternalMessage::User { content, .. } => {
1006                let (tool_results, other_content): (Vec<_>, Vec<_>) =
1007                    content.into_iter().partition(|content| {
1008                        matches!(content, crate::message::UserContent::ToolResult(_))
1009                    });
1010
1011                if !tool_results.is_empty() {
1012                    tool_results
1013                        .into_iter()
1014                        .map(|content| match content {
1015                            crate::message::UserContent::ToolResult(
1016                                crate::message::ToolResult { id, content, .. },
1017                            ) => {
1018                                // Ollama expects a single string for tool results, so we concatenate
1019                                let content_string = content
1020                                    .into_iter()
1021                                    .map(|content| match content {
1022                                        crate::message::ToolResultContent::Text(text) => text.text,
1023                                        _ => "[Non-text content]".to_string(),
1024                                    })
1025                                    .collect::<Vec<_>>()
1026                                    .join("\n");
1027
1028                                Ok::<_, crate::message::MessageError>(Message::ToolResult {
1029                                    name: id,
1030                                    content: content_string,
1031                                })
1032                            }
1033                            _ => Err(crate::message::MessageError::ConversionError(
1034                                "expected tool result content while converting Ollama input".into(),
1035                            )),
1036                        })
1037                        .collect::<Result<Vec<_>, _>>()
1038                } else {
1039                    // Ollama requires separate text content and images array
1040                    let (texts, images) = other_content.into_iter().fold(
1041                        (Vec::new(), Vec::new()),
1042                        |(mut texts, mut images), content| {
1043                            match content {
1044                                crate::message::UserContent::Text(crate::message::Text {
1045                                    text,
1046                                    ..
1047                                }) => texts.push(text),
1048                                crate::message::UserContent::Image(crate::message::Image {
1049                                    data: DocumentSourceKind::Base64(data),
1050                                    ..
1051                                }) => images.push(data),
1052                                crate::message::UserContent::Document(
1053                                    crate::message::Document {
1054                                        data:
1055                                            DocumentSourceKind::Base64(data)
1056                                            | DocumentSourceKind::String(data),
1057                                        ..
1058                                    },
1059                                ) => texts.push(data),
1060                                _ => {} // Audio not supported by Ollama
1061                            }
1062                            (texts, images)
1063                        },
1064                    );
1065
1066                    Ok(vec![Message::User {
1067                        content: texts.join(" "),
1068                        images: if images.is_empty() {
1069                            None
1070                        } else {
1071                            Some(
1072                                images
1073                                    .into_iter()
1074                                    .map(|x| x.to_string())
1075                                    .collect::<Vec<String>>(),
1076                            )
1077                        },
1078                        name: None,
1079                    }])
1080                }
1081            }
1082            InternalMessage::Assistant { content, .. } => {
1083                let mut thinking: Option<String> = None;
1084                let mut text_content = Vec::new();
1085                let mut tool_calls = Vec::new();
1086
1087                for content in content.into_iter() {
1088                    match content {
1089                        crate::message::AssistantContent::Text(text) => {
1090                            text_content.push(text.text)
1091                        }
1092                        crate::message::AssistantContent::ToolCall(tool_call) => {
1093                            tool_calls.push(tool_call)
1094                        }
1095                        crate::message::AssistantContent::Reasoning(reasoning) => {
1096                            let display = reasoning.display_text();
1097                            if !display.is_empty() {
1098                                thinking = Some(display);
1099                            }
1100                        }
1101                        crate::message::AssistantContent::Image(_) => {
1102                            return Err(crate::message::MessageError::ConversionError(
1103                                "Ollama currently doesn't support images.".into(),
1104                            ));
1105                        }
1106                    }
1107                }
1108
1109                // `OneOrMany` ensures at least one `AssistantContent::Text` or `ToolCall` exists,
1110                //  so either `content` or `tool_calls` will have some content.
1111                Ok(vec![Message::Assistant {
1112                    content: text_content.join(" "),
1113                    thinking,
1114                    images: None,
1115                    name: None,
1116                    tool_calls: tool_calls
1117                        .into_iter()
1118                        .map(|tool_call| tool_call.into())
1119                        .collect::<Vec<_>>(),
1120                }])
1121            }
1122        }
1123    }
1124}
1125
1126/// Conversion from provider Message to a completion message.
1127/// This is needed so that responses can be converted back into chat history.
1128impl From<Message> for crate::completion::Message {
1129    fn from(msg: Message) -> Self {
1130        match msg {
1131            Message::User { content, .. } => crate::completion::Message::User {
1132                content: OneOrMany::one(crate::completion::message::UserContent::Text(Text::new(
1133                    content,
1134                ))),
1135            },
1136            Message::Assistant {
1137                content,
1138                tool_calls,
1139                ..
1140            } => {
1141                let mut assistant_contents =
1142                    vec![crate::completion::message::AssistantContent::Text(
1143                        Text::new(content),
1144                    )];
1145                for tc in tool_calls {
1146                    assistant_contents.push(
1147                        crate::completion::message::AssistantContent::tool_call(
1148                            tc.function.name.clone(),
1149                            tc.function.name,
1150                            tc.function.arguments,
1151                        ),
1152                    );
1153                }
1154                let content =
1155                    OneOrMany::from_iter_optional(assistant_contents).unwrap_or_else(|| {
1156                        OneOrMany::one(crate::completion::message::AssistantContent::Text(
1157                            Text::new(String::new()),
1158                        ))
1159                    });
1160
1161                crate::completion::Message::Assistant { id: None, content }
1162            }
1163            // System and ToolResult are converted to User message as needed.
1164            Message::System { content, .. } => crate::completion::Message::User {
1165                content: OneOrMany::one(crate::completion::message::UserContent::Text(Text::new(
1166                    content,
1167                ))),
1168            },
1169            Message::ToolResult { name, content } => crate::completion::Message::User {
1170                content: OneOrMany::one(message::UserContent::tool_result(
1171                    name,
1172                    OneOrMany::one(message::ToolResultContent::text(content)),
1173                )),
1174            },
1175        }
1176    }
1177}
1178
1179impl Message {
1180    /// Constructs a system message.
1181    pub fn system(content: &str) -> Self {
1182        Message::System {
1183            content: content.to_owned(),
1184            images: None,
1185            name: None,
1186        }
1187    }
1188}
1189
1190// ---------- Additional Message Types ----------
1191
1192impl From<crate::message::ToolCall> for ToolCall {
1193    fn from(tool_call: crate::message::ToolCall) -> Self {
1194        Self {
1195            r#type: ToolType::Function,
1196            function: Function {
1197                name: tool_call.function.name,
1198                arguments: tool_call.function.arguments,
1199            },
1200        }
1201    }
1202}
1203
1204#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1205pub struct SystemContent {
1206    #[serde(default)]
1207    r#type: SystemContentType,
1208    text: String,
1209}
1210
1211#[derive(Default, Debug, Serialize, Deserialize, PartialEq, Clone)]
1212#[serde(rename_all = "lowercase")]
1213pub enum SystemContentType {
1214    #[default]
1215    Text,
1216}
1217
1218impl From<String> for SystemContent {
1219    fn from(s: String) -> Self {
1220        SystemContent {
1221            r#type: SystemContentType::default(),
1222            text: s,
1223        }
1224    }
1225}
1226
1227impl FromStr for SystemContent {
1228    type Err = std::convert::Infallible;
1229    fn from_str(s: &str) -> Result<Self, Self::Err> {
1230        Ok(SystemContent {
1231            r#type: SystemContentType::default(),
1232            text: s.to_string(),
1233        })
1234    }
1235}
1236
1237#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1238pub struct AssistantContent {
1239    pub text: String,
1240}
1241
1242impl FromStr for AssistantContent {
1243    type Err = std::convert::Infallible;
1244    fn from_str(s: &str) -> Result<Self, Self::Err> {
1245        Ok(AssistantContent { text: s.to_owned() })
1246    }
1247}
1248
1249#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1250#[serde(tag = "type", rename_all = "lowercase")]
1251pub enum UserContent {
1252    Text { text: String },
1253    Image { image_url: ImageUrl },
1254    // Audio variant removed as Ollama API does not support audio input.
1255}
1256
1257impl FromStr for UserContent {
1258    type Err = std::convert::Infallible;
1259    fn from_str(s: &str) -> Result<Self, Self::Err> {
1260        Ok(UserContent::Text { text: s.to_owned() })
1261    }
1262}
1263
1264#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1265pub struct ImageUrl {
1266    pub url: String,
1267    #[serde(default)]
1268    pub detail: ImageDetail,
1269}
1270
1271// =================================================================
1272// Tests
1273// =================================================================
1274
1275#[cfg(test)]
1276mod tests {
1277    use super::*;
1278    use serde_json::json;
1279
1280    // Test deserialization and conversion for the /api/chat endpoint.
1281    #[tokio::test]
1282    async fn test_chat_completion() {
1283        // Sample JSON response from /api/chat (non-streaming) based on Ollama docs.
1284        let sample_chat_response = json!({
1285            "model": "llama3.2",
1286            "created_at": "2023-08-04T19:22:45.499127Z",
1287            "message": {
1288                "role": "assistant",
1289                "content": "The sky is blue because of Rayleigh scattering.",
1290                "images": null,
1291                "tool_calls": [
1292                    {
1293                        "type": "function",
1294                        "function": {
1295                            "name": "get_current_weather",
1296                            "arguments": {
1297                                "location": "San Francisco, CA",
1298                                "format": "celsius"
1299                            }
1300                        }
1301                    }
1302                ]
1303            },
1304            "done": true,
1305            "total_duration": 8000000000u64,
1306            "load_duration": 6000000u64,
1307            "prompt_eval_count": 61u64,
1308            "prompt_eval_duration": 400000000u64,
1309            "eval_count": 468u64,
1310            "eval_duration": 7700000000u64
1311        });
1312        let sample_text = sample_chat_response.to_string();
1313
1314        let chat_resp: CompletionResponse =
1315            serde_json::from_str(&sample_text).expect("Invalid JSON structure");
1316        let conv: completion::CompletionResponse<CompletionResponse> =
1317            chat_resp.try_into().unwrap();
1318        assert!(
1319            !conv.choice.is_empty(),
1320            "Expected non-empty choice in chat response"
1321        );
1322    }
1323
1324    // Test conversion from provider Message to completion Message.
1325    #[test]
1326    fn test_message_conversion() {
1327        // Construct a provider Message (User variant with String content).
1328        let provider_msg = Message::User {
1329            content: "Test message".to_owned(),
1330            images: None,
1331            name: None,
1332        };
1333        // Convert it into a completion::Message.
1334        let comp_msg: crate::completion::Message = provider_msg.into();
1335        match comp_msg {
1336            crate::completion::Message::User { content } => {
1337                // Assume OneOrMany<T> has a method first() to access the first element.
1338                let first_content = content.first();
1339                // The expected type is crate::completion::message::UserContent::Text wrapping a Text struct.
1340                match first_content {
1341                    crate::completion::message::UserContent::Text(text_struct) => {
1342                        assert_eq!(text_struct.text, "Test message");
1343                    }
1344                    _ => panic!("Expected text content in conversion"),
1345                }
1346            }
1347            _ => panic!("Conversion from provider Message to completion Message failed"),
1348        }
1349    }
1350
1351    // Test conversion of internal tool definition to Ollama's ToolDefinition format.
1352    #[test]
1353    fn test_tool_definition_conversion() {
1354        // Internal tool definition from the completion module.
1355        let internal_tool = crate::completion::ToolDefinition {
1356            name: "get_current_weather".to_owned(),
1357            description: "Get the current weather for a location".to_owned(),
1358            parameters: json!({
1359                "type": "object",
1360                "properties": {
1361                    "location": {
1362                        "type": "string",
1363                        "description": "The location to get the weather for, e.g. San Francisco, CA"
1364                    },
1365                    "format": {
1366                        "type": "string",
1367                        "description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
1368                        "enum": ["celsius", "fahrenheit"]
1369                    }
1370                },
1371                "required": ["location", "format"]
1372            }),
1373        };
1374        // Convert internal tool to Ollama's tool definition.
1375        let ollama_tool: ToolDefinition = internal_tool.into();
1376        assert_eq!(ollama_tool.type_field, "function");
1377        assert_eq!(ollama_tool.function.name, "get_current_weather");
1378        assert_eq!(
1379            ollama_tool.function.description,
1380            "Get the current weather for a location"
1381        );
1382        // Check JSON fields in parameters.
1383        let params = &ollama_tool.function.parameters;
1384        assert_eq!(params["properties"]["location"]["type"], "string");
1385    }
1386
1387    // Test deserialization of chat response with thinking content
1388    #[tokio::test]
1389    async fn test_chat_completion_with_thinking() {
1390        let sample_response = json!({
1391            "model": "qwen-thinking",
1392            "created_at": "2023-08-04T19:22:45.499127Z",
1393            "message": {
1394                "role": "assistant",
1395                "content": "The answer is 42.",
1396                "thinking": "Let me think about this carefully. The question asks for the meaning of life...",
1397                "images": null,
1398                "tool_calls": []
1399            },
1400            "done": true,
1401            "total_duration": 8000000000u64,
1402            "load_duration": 6000000u64,
1403            "prompt_eval_count": 61u64,
1404            "prompt_eval_duration": 400000000u64,
1405            "eval_count": 468u64,
1406            "eval_duration": 7700000000u64
1407        });
1408
1409        let chat_resp: CompletionResponse =
1410            serde_json::from_value(sample_response).expect("Failed to deserialize");
1411
1412        // Verify thinking field is present
1413        if let Message::Assistant {
1414            thinking, content, ..
1415        } = &chat_resp.message
1416        {
1417            assert_eq!(
1418                thinking.as_ref().unwrap(),
1419                "Let me think about this carefully. The question asks for the meaning of life..."
1420            );
1421            assert_eq!(content, "The answer is 42.");
1422        } else {
1423            panic!("Expected Assistant message");
1424        }
1425    }
1426
1427    // Test deserialization of chat response without thinking content
1428    #[tokio::test]
1429    async fn test_chat_completion_without_thinking() {
1430        let sample_response = json!({
1431            "model": "llama3.2",
1432            "created_at": "2023-08-04T19:22:45.499127Z",
1433            "message": {
1434                "role": "assistant",
1435                "content": "Hello!",
1436                "images": null,
1437                "tool_calls": []
1438            },
1439            "done": true,
1440            "total_duration": 8000000000u64,
1441            "load_duration": 6000000u64,
1442            "prompt_eval_count": 10u64,
1443            "prompt_eval_duration": 400000000u64,
1444            "eval_count": 5u64,
1445            "eval_duration": 7700000000u64
1446        });
1447
1448        let chat_resp: CompletionResponse =
1449            serde_json::from_value(sample_response).expect("Failed to deserialize");
1450
1451        // Verify thinking field is None when not provided
1452        if let Message::Assistant {
1453            thinking, content, ..
1454        } = &chat_resp.message
1455        {
1456            assert!(thinking.is_none());
1457            assert_eq!(content, "Hello!");
1458        } else {
1459            panic!("Expected Assistant message");
1460        }
1461    }
1462
1463    // Test deserialization of streaming response with thinking content
1464    #[test]
1465    fn test_streaming_response_with_thinking() {
1466        let sample_chunk = json!({
1467            "model": "qwen-thinking",
1468            "created_at": "2023-08-04T19:22:45.499127Z",
1469            "message": {
1470                "role": "assistant",
1471                "content": "",
1472                "thinking": "Analyzing the problem...",
1473                "images": null,
1474                "tool_calls": []
1475            },
1476            "done": false
1477        });
1478
1479        let chunk: CompletionResponse =
1480            serde_json::from_value(sample_chunk).expect("Failed to deserialize");
1481
1482        if let Message::Assistant {
1483            thinking, content, ..
1484        } = &chunk.message
1485        {
1486            assert_eq!(thinking.as_ref().unwrap(), "Analyzing the problem...");
1487            assert_eq!(content, "");
1488        } else {
1489            panic!("Expected Assistant message");
1490        }
1491    }
1492
1493    // Test message conversion with thinking content
1494    #[test]
1495    fn test_message_conversion_with_thinking() {
1496        // Create an internal message with reasoning content
1497        let reasoning_content = crate::message::Reasoning::new("Step 1: Consider the problem");
1498
1499        let internal_msg = crate::message::Message::Assistant {
1500            id: None,
1501            content: crate::OneOrMany::many(vec![
1502                crate::message::AssistantContent::Reasoning(reasoning_content),
1503                crate::message::AssistantContent::Text(crate::message::Text::new(
1504                    "The answer is X".to_string(),
1505                )),
1506            ])
1507            .unwrap(),
1508        };
1509
1510        // Convert to provider Message
1511        let provider_msgs: Vec<Message> = internal_msg.try_into().unwrap();
1512        assert_eq!(provider_msgs.len(), 1);
1513
1514        if let Message::Assistant {
1515            thinking, content, ..
1516        } = &provider_msgs[0]
1517        {
1518            assert_eq!(thinking.as_ref().unwrap(), "Step 1: Consider the problem");
1519            assert_eq!(content, "The answer is X");
1520        } else {
1521            panic!("Expected Assistant message with thinking");
1522        }
1523    }
1524
1525    // Test empty thinking content is handled correctly
1526    #[test]
1527    fn test_empty_thinking_content() {
1528        let sample_response = json!({
1529            "model": "llama3.2",
1530            "created_at": "2023-08-04T19:22:45.499127Z",
1531            "message": {
1532                "role": "assistant",
1533                "content": "Response",
1534                "thinking": "",
1535                "images": null,
1536                "tool_calls": []
1537            },
1538            "done": true,
1539            "total_duration": 8000000000u64,
1540            "load_duration": 6000000u64,
1541            "prompt_eval_count": 10u64,
1542            "prompt_eval_duration": 400000000u64,
1543            "eval_count": 5u64,
1544            "eval_duration": 7700000000u64
1545        });
1546
1547        let chat_resp: CompletionResponse =
1548            serde_json::from_value(sample_response).expect("Failed to deserialize");
1549
1550        if let Message::Assistant {
1551            thinking, content, ..
1552        } = &chat_resp.message
1553        {
1554            // Empty string should still deserialize as Some("")
1555            assert_eq!(thinking.as_ref().unwrap(), "");
1556            assert_eq!(content, "Response");
1557        } else {
1558            panic!("Expected Assistant message");
1559        }
1560    }
1561
1562    // Test thinking with tool calls
1563    #[test]
1564    fn test_thinking_with_tool_calls() {
1565        let sample_response = json!({
1566            "model": "qwen-thinking",
1567            "created_at": "2023-08-04T19:22:45.499127Z",
1568            "message": {
1569                "role": "assistant",
1570                "content": "Let me check the weather.",
1571                "thinking": "User wants weather info, I should use the weather tool",
1572                "images": null,
1573                "tool_calls": [
1574                    {
1575                        "type": "function",
1576                        "function": {
1577                            "name": "get_weather",
1578                            "arguments": {
1579                                "location": "San Francisco"
1580                            }
1581                        }
1582                    }
1583                ]
1584            },
1585            "done": true,
1586            "total_duration": 8000000000u64,
1587            "load_duration": 6000000u64,
1588            "prompt_eval_count": 30u64,
1589            "prompt_eval_duration": 400000000u64,
1590            "eval_count": 50u64,
1591            "eval_duration": 7700000000u64
1592        });
1593
1594        let chat_resp: CompletionResponse =
1595            serde_json::from_value(sample_response).expect("Failed to deserialize");
1596
1597        if let Message::Assistant {
1598            thinking,
1599            content,
1600            tool_calls,
1601            ..
1602        } = &chat_resp.message
1603        {
1604            assert_eq!(
1605                thinking.as_ref().unwrap(),
1606                "User wants weather info, I should use the weather tool"
1607            );
1608            assert_eq!(content, "Let me check the weather.");
1609            assert_eq!(tool_calls.len(), 1);
1610            assert_eq!(tool_calls[0].function.name, "get_weather");
1611        } else {
1612            panic!("Expected Assistant message with thinking and tool calls");
1613        }
1614    }
1615
1616    // Test that `think` and `keep_alive` are extracted as top-level params, not in `options`
1617    #[test]
1618    fn test_completion_request_with_think_param() {
1619        use crate::OneOrMany;
1620        use crate::completion::Message as CompletionMessage;
1621        use crate::message::{Text, UserContent};
1622
1623        // Create a CompletionRequest with "think": true, "keep_alive", and "num_ctx" in additional_params
1624        let completion_request = CompletionRequest {
1625            model: None,
1626            preamble: Some("You are a helpful assistant.".to_string()),
1627            chat_history: OneOrMany::one(CompletionMessage::User {
1628                content: OneOrMany::one(UserContent::Text(Text::new("What is 2 + 2?".to_string()))),
1629            }),
1630            documents: vec![],
1631            tools: vec![],
1632            temperature: Some(0.7),
1633            max_tokens: Some(1024),
1634            tool_choice: None,
1635            additional_params: Some(json!({
1636                "think": true,
1637                "keep_alive": "-1m",
1638                "num_ctx": 4096
1639            })),
1640            output_schema: None,
1641        };
1642
1643        // Convert to OllamaCompletionRequest
1644        let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1645            .expect("Failed to create Ollama request");
1646
1647        // Serialize to JSON
1648        let serialized =
1649            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1650
1651        // Assert equality with expected JSON
1652        // - "tools" is skipped when empty (skip_serializing_if)
1653        // - "think" should be a top-level boolean, NOT in options
1654        // - "keep_alive" should be a top-level string, NOT in options
1655        // - "num_ctx" should be in options (it's a model parameter)
1656        let expected = json!({
1657            "model": "qwen3:8b",
1658            "messages": [
1659                {
1660                    "role": "system",
1661                    "content": "You are a helpful assistant."
1662                },
1663                {
1664                    "role": "user",
1665                    "content": "What is 2 + 2?"
1666                }
1667            ],
1668            "temperature": 0.7,
1669            "stream": false,
1670            "think": true,
1671            "max_tokens": 1024,
1672            "keep_alive": "-1m",
1673            "options": {
1674                "temperature": 0.7,
1675                "num_ctx": 4096
1676            }
1677        });
1678
1679        assert_eq!(serialized, expected);
1680    }
1681
1682    // Test that `think` and `keep_alive` are extracted as top-level params, not in `options`
1683    #[test]
1684    fn test_completion_request_with_level_low_think_param() {
1685        use crate::OneOrMany;
1686        use crate::completion::Message as CompletionMessage;
1687        use crate::message::{Text, UserContent};
1688
1689        // Create a CompletionRequest with "think": true, "keep_alive", and "num_ctx" in additional_params
1690        let completion_request = CompletionRequest {
1691            model: None,
1692            preamble: Some("You are a helpful assistant.".to_string()),
1693            chat_history: OneOrMany::one(CompletionMessage::User {
1694                content: OneOrMany::one(UserContent::Text(Text::new("What is 2 + 2?".to_string()))),
1695            }),
1696            documents: vec![],
1697            tools: vec![],
1698            temperature: Some(0.7),
1699            max_tokens: Some(1024),
1700            tool_choice: None,
1701            additional_params: Some(json!({
1702                "think": "low",
1703                "keep_alive": "-1m",
1704                "num_ctx": 4096
1705            })),
1706            output_schema: None,
1707        };
1708
1709        // Convert to OllamaCompletionRequest
1710        let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1711            .expect("Failed to create Ollama request");
1712
1713        // Serialize to JSON
1714        let serialized =
1715            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1716
1717        // Assert equality with expected JSON
1718        // - "tools" is skipped when empty (skip_serializing_if)
1719        // - "think" should be a top-level boolean, NOT in options
1720        // - "keep_alive" should be a top-level string, NOT in options
1721        // - "num_ctx" should be in options (it's a model parameter)
1722        let expected = json!({
1723            "model": "qwen3:8b",
1724            "messages": [
1725                {
1726                    "role": "system",
1727                    "content": "You are a helpful assistant."
1728                },
1729                {
1730                    "role": "user",
1731                    "content": "What is 2 + 2?"
1732                }
1733            ],
1734            "temperature": 0.7,
1735            "stream": false,
1736            "think": "low",
1737            "max_tokens": 1024,
1738            "keep_alive": "-1m",
1739            "options": {
1740                "temperature": 0.7,
1741                "num_ctx": 4096
1742            }
1743        });
1744
1745        assert_eq!(serialized, expected);
1746    }
1747
1748    // Test that `think` and `keep_alive` are extracted as top-level params, not in `options`
1749    #[test]
1750    fn test_completion_request_with_level_medium_think_param() {
1751        use crate::OneOrMany;
1752        use crate::completion::Message as CompletionMessage;
1753        use crate::message::{Text, UserContent};
1754
1755        // Create a CompletionRequest with "think": true, "keep_alive", and "num_ctx" in additional_params
1756        let completion_request = CompletionRequest {
1757            model: None,
1758            preamble: Some("You are a helpful assistant.".to_string()),
1759            chat_history: OneOrMany::one(CompletionMessage::User {
1760                content: OneOrMany::one(UserContent::Text(Text::new("What is 2 + 2?".to_string()))),
1761            }),
1762            documents: vec![],
1763            tools: vec![],
1764            temperature: Some(0.7),
1765            max_tokens: Some(1024),
1766            tool_choice: None,
1767            additional_params: Some(json!({
1768                "think": "medium",
1769                "keep_alive": "-1m",
1770                "num_ctx": 4096
1771            })),
1772            output_schema: None,
1773        };
1774
1775        // Convert to OllamaCompletionRequest
1776        let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1777            .expect("Failed to create Ollama request");
1778
1779        // Serialize to JSON
1780        let serialized =
1781            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1782
1783        // Assert equality with expected JSON
1784        // - "tools" is skipped when empty (skip_serializing_if)
1785        // - "think" should be a top-level boolean, NOT in options
1786        // - "keep_alive" should be a top-level string, NOT in options
1787        // - "num_ctx" should be in options (it's a model parameter)
1788        let expected = json!({
1789            "model": "qwen3:8b",
1790            "messages": [
1791                {
1792                    "role": "system",
1793                    "content": "You are a helpful assistant."
1794                },
1795                {
1796                    "role": "user",
1797                    "content": "What is 2 + 2?"
1798                }
1799            ],
1800            "temperature": 0.7,
1801            "stream": false,
1802            "think": "medium",
1803            "max_tokens": 1024,
1804            "keep_alive": "-1m",
1805            "options": {
1806                "temperature": 0.7,
1807                "num_ctx": 4096
1808            }
1809        });
1810
1811        assert_eq!(serialized, expected);
1812    }
1813
1814    // Test that `think` and `keep_alive` are extracted as top-level params, not in `options`
1815    #[test]
1816    fn test_completion_request_with_level_high_think_param() {
1817        use crate::OneOrMany;
1818        use crate::completion::Message as CompletionMessage;
1819        use crate::message::{Text, UserContent};
1820
1821        // Create a CompletionRequest with "think": true, "keep_alive", and "num_ctx" in additional_params
1822        let completion_request = CompletionRequest {
1823            model: None,
1824            preamble: Some("You are a helpful assistant.".to_string()),
1825            chat_history: OneOrMany::one(CompletionMessage::User {
1826                content: OneOrMany::one(UserContent::Text(Text::new("What is 2 + 2?".to_string()))),
1827            }),
1828            documents: vec![],
1829            tools: vec![],
1830            temperature: Some(0.7),
1831            max_tokens: Some(1024),
1832            tool_choice: None,
1833            additional_params: Some(json!({
1834                "think": "high",
1835                "keep_alive": "-1m",
1836                "num_ctx": 4096
1837            })),
1838            output_schema: None,
1839        };
1840
1841        // Convert to OllamaCompletionRequest
1842        let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1843            .expect("Failed to create Ollama request");
1844
1845        // Serialize to JSON
1846        let serialized =
1847            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1848
1849        // Assert equality with expected JSON
1850        // - "tools" is skipped when empty (skip_serializing_if)
1851        // - "think" should be a top-level boolean, NOT in options
1852        // - "keep_alive" should be a top-level string, NOT in options
1853        // - "num_ctx" should be in options (it's a model parameter)
1854        let expected = json!({
1855            "model": "qwen3:8b",
1856            "messages": [
1857                {
1858                    "role": "system",
1859                    "content": "You are a helpful assistant."
1860                },
1861                {
1862                    "role": "user",
1863                    "content": "What is 2 + 2?"
1864                }
1865            ],
1866            "temperature": 0.7,
1867            "stream": false,
1868            "think": "high",
1869            "max_tokens": 1024,
1870            "keep_alive": "-1m",
1871            "options": {
1872                "temperature": 0.7,
1873                "num_ctx": 4096
1874            }
1875        });
1876
1877        assert_eq!(serialized, expected);
1878    }
1879
1880    // Test that `think` and `keep_alive` are extracted as top-level params, not in `options`
1881    #[test]
1882    fn test_completion_request_with_level_invalid_think_param() {
1883        use crate::OneOrMany;
1884        use crate::completion::Message as CompletionMessage;
1885        use crate::message::{Text, UserContent};
1886
1887        // Create a CompletionRequest with "think": true, "keep_alive", and "num_ctx" in additional_params
1888        let completion_request = CompletionRequest {
1889            model: None,
1890            preamble: Some("You are a helpful assistant.".to_string()),
1891            chat_history: OneOrMany::one(CompletionMessage::User {
1892                content: OneOrMany::one(UserContent::Text(Text::new("What is 2 + 2?".to_string()))),
1893            }),
1894            documents: vec![],
1895            tools: vec![],
1896            temperature: Some(0.7),
1897            max_tokens: Some(1024),
1898            tool_choice: None,
1899            additional_params: Some(json!({
1900                "think": "invalid",
1901                "keep_alive": "-1m",
1902                "num_ctx": 4096
1903            })),
1904            output_schema: None,
1905        };
1906
1907        // Convert to OllamaCompletionRequest
1908        let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request));
1909
1910        assert!(ollama_request.is_err())
1911    }
1912
1913    // Test that `think` defaults to false when not specified
1914    #[test]
1915    fn test_completion_request_with_think_false_default() {
1916        use crate::OneOrMany;
1917        use crate::completion::Message as CompletionMessage;
1918        use crate::message::{Text, UserContent};
1919
1920        // Create a CompletionRequest WITHOUT "think" in additional_params
1921        let completion_request = CompletionRequest {
1922            model: None,
1923            preamble: Some("You are a helpful assistant.".to_string()),
1924            chat_history: OneOrMany::one(CompletionMessage::User {
1925                content: OneOrMany::one(UserContent::Text(Text::new("Hello!".to_string()))),
1926            }),
1927            documents: vec![],
1928            tools: vec![],
1929            temperature: Some(0.5),
1930            max_tokens: None,
1931            tool_choice: None,
1932            additional_params: None,
1933            output_schema: None,
1934        };
1935
1936        // Convert to OllamaCompletionRequest
1937        let ollama_request = OllamaCompletionRequest::try_from(("llama3.2", completion_request))
1938            .expect("Failed to create Ollama request");
1939
1940        // Serialize to JSON
1941        let serialized =
1942            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1943
1944        // Assert that "think" defaults to false and "keep_alive" is not present
1945        let expected = json!({
1946            "model": "llama3.2",
1947            "messages": [
1948                {
1949                    "role": "system",
1950                    "content": "You are a helpful assistant."
1951                },
1952                {
1953                    "role": "user",
1954                    "content": "Hello!"
1955                }
1956            ],
1957            "temperature": 0.5,
1958            "stream": false,
1959            "think": false,
1960            "options": {
1961                "temperature": 0.5
1962            }
1963        });
1964
1965        assert_eq!(serialized, expected);
1966    }
1967
1968    #[test]
1969    fn test_completion_request_with_output_schema() {
1970        use crate::OneOrMany;
1971        use crate::completion::Message as CompletionMessage;
1972        use crate::message::{Text, UserContent};
1973
1974        let schema: schemars::Schema = serde_json::from_value(json!({
1975            "type": "object",
1976            "properties": {
1977                "age": { "type": "integer" },
1978                "available": { "type": "boolean" }
1979            },
1980            "required": ["age", "available"]
1981        }))
1982        .expect("Failed to parse schema");
1983
1984        let completion_request = CompletionRequest {
1985            model: Some("llama3.1".to_string()),
1986            preamble: None,
1987            chat_history: OneOrMany::one(CompletionMessage::User {
1988                content: OneOrMany::one(UserContent::Text(Text::new(
1989                    "How old is Ollama?".to_string(),
1990                ))),
1991            }),
1992            documents: vec![],
1993            tools: vec![],
1994            temperature: None,
1995            max_tokens: None,
1996            tool_choice: None,
1997            additional_params: None,
1998            output_schema: Some(schema),
1999        };
2000
2001        let ollama_request = OllamaCompletionRequest::try_from(("llama3.1", completion_request))
2002            .expect("Failed to create Ollama request");
2003
2004        let serialized =
2005            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
2006
2007        let format = serialized
2008            .get("format")
2009            .expect("format field should be present");
2010        assert_eq!(
2011            *format,
2012            json!({
2013                "type": "object",
2014                "properties": {
2015                    "age": { "type": "integer" },
2016                    "available": { "type": "boolean" }
2017                },
2018                "required": ["age", "available"]
2019            })
2020        );
2021    }
2022
2023    #[test]
2024    fn test_completion_request_without_output_schema() {
2025        use crate::OneOrMany;
2026        use crate::completion::Message as CompletionMessage;
2027        use crate::message::{Text, UserContent};
2028
2029        let completion_request = CompletionRequest {
2030            model: Some("llama3.1".to_string()),
2031            preamble: None,
2032            chat_history: OneOrMany::one(CompletionMessage::User {
2033                content: OneOrMany::one(UserContent::Text(Text::new("Hello!".to_string()))),
2034            }),
2035            documents: vec![],
2036            tools: vec![],
2037            temperature: None,
2038            max_tokens: None,
2039            tool_choice: None,
2040            additional_params: None,
2041            output_schema: None,
2042        };
2043
2044        let ollama_request = OllamaCompletionRequest::try_from(("llama3.1", completion_request))
2045            .expect("Failed to create Ollama request");
2046
2047        let serialized =
2048            serde_json::to_value(&ollama_request).expect("Failed to serialize request");
2049
2050        assert!(
2051            serialized.get("format").is_none(),
2052            "format field should be absent when output_schema is None"
2053        );
2054    }
2055
2056    #[test]
2057    fn test_client_initialization() {
2058        let _client = crate::providers::ollama::Client::new(Nothing).expect("Client::new() failed");
2059        let _client_from_builder = crate::providers::ollama::Client::builder()
2060            .api_key(Nothing)
2061            .build()
2062            .expect("Client::builder() failed");
2063    }
2064
2065    #[test]
2066    fn ndjson_buffer_returns_complete_lines_in_single_chunk() {
2067        let mut buf = NdjsonBuffer::new();
2068        let lines = buf.decode(b"{\"a\":1}\n{\"b\":2}\n");
2069        assert_eq!(lines, vec![b"{\"a\":1}".to_vec(), b"{\"b\":2}".to_vec()]);
2070    }
2071
2072    #[test]
2073    fn ndjson_buffer_reassembles_line_split_across_chunks() {
2074        let mut buf = NdjsonBuffer::new();
2075
2076        assert!(buf.decode(b"{\"model\":\"llama\",\"mes").is_empty());
2077
2078        let lines = buf.decode(b"sage\":\"hi\"}\n{\"done\"");
2079        assert_eq!(
2080            lines,
2081            vec![b"{\"model\":\"llama\",\"message\":\"hi\"}".to_vec()]
2082        );
2083
2084        let lines = buf.decode(b":true}\n");
2085        assert_eq!(lines, vec![b"{\"done\":true}".to_vec()]);
2086    }
2087
2088    #[test]
2089    fn ndjson_buffer_skips_blank_lines() {
2090        let mut buf = NdjsonBuffer::new();
2091        let lines = buf.decode(b"\n{\"a\":1}\n\n");
2092        assert_eq!(lines, vec![b"{\"a\":1}".to_vec()]);
2093    }
2094
2095    #[test]
2096    fn ndjson_buffer_retains_unterminated_trailing_data() {
2097        let mut buf = NdjsonBuffer::new();
2098        let lines = buf.decode(b"{\"a\":1}\n{\"b\":2");
2099        assert_eq!(lines, vec![b"{\"a\":1}".to_vec()]);
2100        let lines = buf.decode(b"}\n");
2101        assert_eq!(lines, vec![b"{\"b\":2}".to_vec()]);
2102    }
2103
2104    #[test]
2105    fn ndjson_buffer_handles_empty_chunk() {
2106        let mut buf = NdjsonBuffer::new();
2107        assert!(buf.decode(b"").is_empty());
2108
2109        buf.decode(b"{\"a\":1");
2110        assert!(buf.decode(b"").is_empty());
2111
2112        let lines = buf.decode(b"}\n");
2113        assert_eq!(lines, vec![b"{\"a\":1}".to_vec()]);
2114    }
2115
2116    #[test]
2117    fn ndjson_buffer_handles_multi_byte_utf8_split_across_chunks() {
2118        // `\n` (0x0A) cannot appear inside any UTF-8 continuation byte, so a
2119        // byte-wise newline scan is always safe — but verify explicitly that a
2120        // multi-byte sequence reassembles correctly when split across chunks.
2121        let mut buf = NdjsonBuffer::new();
2122        assert!(buf.decode(&[0xd0]).is_empty());
2123        assert!(buf.decode(&[0xb8, 0xd0, 0xb7, 0xd0]).is_empty());
2124        assert!(
2125            buf.decode(&[
2126                0xb2, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbd, 0xd0, 0xb8
2127            ])
2128            .is_empty()
2129        );
2130
2131        let lines = buf.decode(b"\n");
2132        assert_eq!(lines.len(), 1);
2133        assert_eq!(std::str::from_utf8(&lines[0]).unwrap(), "известни");
2134    }
2135
2136    #[test]
2137    fn ndjson_buffer_yields_parseable_chunks_when_split_arbitrarily() {
2138        let original = concat!(
2139            "{\"model\":\"llama3.2\",\"message\":{\"role\":\"assistant\",\"content\":\"hi\"},\"done\":false}\n",
2140            "{\"model\":\"llama3.2\",\"message\":{\"role\":\"assistant\",\"content\":\"\"},\"done\":true}\n",
2141        );
2142
2143        let mut buf = NdjsonBuffer::new();
2144        let mut received = Vec::new();
2145        for byte in original.as_bytes() {
2146            for line in buf.decode(std::slice::from_ref(byte)) {
2147                let parsed: serde_json::Value =
2148                    serde_json::from_slice(&line).expect("each drained line must be valid JSON");
2149                received.push(parsed);
2150            }
2151        }
2152
2153        assert_eq!(received.len(), 2);
2154        assert_eq!(received[0]["message"]["content"], "hi");
2155        assert_eq!(received[1]["done"], true);
2156    }
2157}