1use crate::client::{
41 self, ApiKey, Capabilities, Capable, DebugExt, ModelLister, Nothing, Provider, ProviderBuilder,
42 ProviderClient,
43};
44use crate::completion::{GetTokenUsage, Usage};
45use crate::http_client::{self, HttpClientExt};
46use crate::message::DocumentSourceKind;
47use crate::model::{Model, ModelList, ModelListingError};
48use crate::streaming::RawStreamingChoice;
49use crate::{
50 OneOrMany,
51 completion::{self, CompletionError, CompletionRequest},
52 embeddings::{self, EmbeddingError},
53 json_utils, message,
54 message::{ImageDetail, Text},
55 streaming,
56 wasm_compat::{WasmCompatSend, WasmCompatSync},
57};
58use async_stream::try_stream;
59use bytes::Bytes;
60use futures::StreamExt;
61use serde::{Deserialize, Serialize};
62use serde_json::{Value, json};
63use std::{convert::TryFrom, str::FromStr};
64use tracing::info_span;
65use tracing_futures::Instrument;
66const OLLAMA_API_BASE_URL: &str = "http://localhost:11434";
69
70#[derive(Debug, Default, Clone)]
73pub struct OllamaApiKey(Option<String>);
74
75impl ApiKey for OllamaApiKey {
76 fn into_header(
77 self,
78 ) -> Option<http_client::Result<(http::header::HeaderName, http::header::HeaderValue)>> {
79 self.0.map(http_client::make_auth_header)
80 }
81}
82
83impl From<Nothing> for OllamaApiKey {
84 fn from(_: Nothing) -> Self {
85 Self(None)
86 }
87}
88
89impl From<String> for OllamaApiKey {
90 fn from(key: String) -> Self {
91 if key.is_empty() {
92 Self(None)
93 } else {
94 Self(Some(key))
95 }
96 }
97}
98
99impl From<&str> for OllamaApiKey {
100 fn from(key: &str) -> Self {
101 if key.is_empty() {
102 Self(None)
103 } else {
104 Self(Some(key.to_owned()))
105 }
106 }
107}
108
109#[derive(Debug, Default, Clone, Copy)]
110pub struct OllamaExt;
111
112#[derive(Debug, Default, Clone, Copy)]
113pub struct OllamaBuilder;
114
115impl Provider for OllamaExt {
116 type Builder = OllamaBuilder;
117 const VERIFY_PATH: &'static str = "api/tags";
118}
119
120impl<H> Capabilities<H> for OllamaExt {
121 type Completion = Capable<CompletionModel<H>>;
122 type Transcription = Nothing;
123 type Embeddings = Capable<EmbeddingModel<H>>;
124 type ModelListing = Capable<OllamaModelLister<H>>;
125 #[cfg(feature = "image")]
126 type ImageGeneration = Nothing;
127
128 #[cfg(feature = "audio")]
129 type AudioGeneration = Nothing;
130}
131
132impl DebugExt for OllamaExt {}
133
134impl ProviderBuilder for OllamaBuilder {
135 type Extension<H>
136 = OllamaExt
137 where
138 H: HttpClientExt;
139 type ApiKey = OllamaApiKey;
140
141 const BASE_URL: &'static str = OLLAMA_API_BASE_URL;
142
143 fn build<H>(
144 _builder: &client::ClientBuilder<Self, Self::ApiKey, H>,
145 ) -> http_client::Result<Self::Extension<H>>
146 where
147 H: HttpClientExt,
148 {
149 Ok(OllamaExt)
150 }
151}
152
153pub type Client<H = reqwest::Client> = client::Client<OllamaExt, H>;
154pub type ClientBuilder<H = crate::markers::Missing> =
155 client::ClientBuilder<OllamaBuilder, OllamaApiKey, H>;
156
157impl ProviderClient for Client {
158 type Input = OllamaApiKey;
159 type Error = crate::client::ProviderClientError;
160
161 fn from_env() -> Result<Self, Self::Error> {
162 let api_base = crate::client::optional_env_var("OLLAMA_API_BASE_URL")?
163 .unwrap_or_else(|| OLLAMA_API_BASE_URL.to_string());
164
165 let api_key = crate::client::optional_env_var("OLLAMA_API_KEY")?
166 .map(OllamaApiKey::from)
167 .unwrap_or_default();
168
169 Self::builder()
170 .api_key(api_key)
171 .base_url(&api_base)
172 .build()
173 .map_err(Into::into)
174 }
175
176 fn from_val(api_key: Self::Input) -> Result<Self, Self::Error> {
177 Self::builder().api_key(api_key).build().map_err(Into::into)
178 }
179}
180
181#[derive(Debug, Deserialize)]
184struct ApiErrorResponse {
185 message: String,
186}
187
188#[derive(Debug, Deserialize)]
189#[serde(untagged)]
190enum ApiResponse<T> {
191 Ok(T),
192 Err(ApiErrorResponse),
193}
194
195pub const ALL_MINILM: &str = "all-minilm";
198pub const NOMIC_EMBED_TEXT: &str = "nomic-embed-text";
199
200fn model_dimensions_from_identifier(identifier: &str) -> Option<usize> {
201 match identifier {
202 ALL_MINILM => Some(384),
203 NOMIC_EMBED_TEXT => Some(768),
204 _ => None,
205 }
206}
207
208#[derive(Debug, Serialize, Deserialize)]
209pub struct EmbeddingResponse {
210 pub model: String,
211 pub embeddings: Vec<Vec<f64>>,
212 #[serde(default)]
213 pub total_duration: Option<u64>,
214 #[serde(default)]
215 pub load_duration: Option<u64>,
216 #[serde(default)]
217 pub prompt_eval_count: Option<u64>,
218}
219
220impl From<ApiErrorResponse> for EmbeddingError {
221 fn from(err: ApiErrorResponse) -> Self {
222 EmbeddingError::ProviderError(err.message)
223 }
224}
225
226impl From<ApiResponse<EmbeddingResponse>> for Result<EmbeddingResponse, EmbeddingError> {
227 fn from(value: ApiResponse<EmbeddingResponse>) -> Self {
228 match value {
229 ApiResponse::Ok(response) => Ok(response),
230 ApiResponse::Err(err) => Err(EmbeddingError::ProviderError(err.message)),
231 }
232 }
233}
234
235#[derive(Clone)]
238pub struct EmbeddingModel<T = reqwest::Client> {
239 client: Client<T>,
240 pub model: String,
241 ndims: usize,
242}
243
244impl<T> EmbeddingModel<T> {
245 pub fn new(client: Client<T>, model: impl Into<String>, ndims: usize) -> Self {
246 Self {
247 client,
248 model: model.into(),
249 ndims,
250 }
251 }
252
253 pub fn with_model(client: Client<T>, model: &str, ndims: usize) -> Self {
254 Self {
255 client,
256 model: model.into(),
257 ndims,
258 }
259 }
260}
261
262impl<T> embeddings::EmbeddingModel for EmbeddingModel<T>
263where
264 T: HttpClientExt + Clone + 'static,
265{
266 type Client = Client<T>;
267
268 fn make(client: &Self::Client, model: impl Into<String>, dims: Option<usize>) -> Self {
269 let model = model.into();
270 let dims = dims
271 .or(model_dimensions_from_identifier(&model))
272 .unwrap_or_default();
273 Self::new(client.clone(), model, dims)
274 }
275
276 const MAX_DOCUMENTS: usize = 1024;
277 fn ndims(&self) -> usize {
278 self.ndims
279 }
280
281 async fn embed_texts(
282 &self,
283 documents: impl IntoIterator<Item = String>,
284 ) -> Result<Vec<embeddings::Embedding>, EmbeddingError> {
285 let docs: Vec<String> = documents.into_iter().collect();
286
287 let body = serde_json::to_vec(&json!({
288 "model": self.model,
289 "input": docs
290 }))?;
291
292 let req = self
293 .client
294 .post("api/embed")?
295 .body(body)
296 .map_err(|e| EmbeddingError::HttpError(e.into()))?;
297
298 let response = self.client.send::<_, Vec<u8>>(req).await?;
299
300 if !response.status().is_success() {
301 let text = http_client::text(response).await?;
302 return Err(EmbeddingError::ProviderError(text));
303 }
304
305 let bytes: Vec<u8> = response.into_body().await?;
306
307 let api_resp: EmbeddingResponse = serde_json::from_slice(&bytes)?;
308
309 if api_resp.embeddings.len() != docs.len() {
310 return Err(EmbeddingError::ResponseError(
311 "Number of returned embeddings does not match input".into(),
312 ));
313 }
314 Ok(api_resp
315 .embeddings
316 .into_iter()
317 .zip(docs.into_iter())
318 .map(|(vec, document)| embeddings::Embedding { document, vec })
319 .collect())
320 }
321}
322
323pub const LLAMA3_2: &str = "llama3.2";
326pub const LLAVA: &str = "llava";
327pub const MISTRAL: &str = "mistral";
328
329#[derive(Debug, Serialize, Deserialize)]
330pub struct CompletionResponse {
331 pub model: String,
332 pub created_at: String,
333 pub message: Message,
334 pub done: bool,
335 #[serde(default)]
336 pub done_reason: Option<String>,
337 #[serde(default)]
338 pub total_duration: Option<u64>,
339 #[serde(default)]
340 pub load_duration: Option<u64>,
341 #[serde(default)]
342 pub prompt_eval_count: Option<u64>,
343 #[serde(default)]
344 pub prompt_eval_duration: Option<u64>,
345 #[serde(default)]
346 pub eval_count: Option<u64>,
347 #[serde(default)]
348 pub eval_duration: Option<u64>,
349}
350impl TryFrom<CompletionResponse> for completion::CompletionResponse<CompletionResponse> {
351 type Error = CompletionError;
352 fn try_from(resp: CompletionResponse) -> Result<Self, Self::Error> {
353 match resp.message {
354 Message::Assistant {
356 content,
357 thinking,
358 tool_calls,
359 ..
360 } => {
361 let mut assistant_contents = Vec::new();
362 if !content.is_empty() {
364 assistant_contents.push(completion::AssistantContent::text(&content));
365 }
366 for tc in tool_calls.iter() {
369 assistant_contents.push(completion::AssistantContent::tool_call(
370 tc.function.name.clone(),
371 tc.function.name.clone(),
372 tc.function.arguments.clone(),
373 ));
374 }
375 let choice = OneOrMany::many(assistant_contents).map_err(|_| {
376 CompletionError::ResponseError("No content provided".to_owned())
377 })?;
378 let prompt_tokens = resp.prompt_eval_count.unwrap_or(0);
379 let completion_tokens = resp.eval_count.unwrap_or(0);
380
381 let raw_response = CompletionResponse {
382 model: resp.model,
383 created_at: resp.created_at,
384 done: resp.done,
385 done_reason: resp.done_reason,
386 total_duration: resp.total_duration,
387 load_duration: resp.load_duration,
388 prompt_eval_count: resp.prompt_eval_count,
389 prompt_eval_duration: resp.prompt_eval_duration,
390 eval_count: resp.eval_count,
391 eval_duration: resp.eval_duration,
392 message: Message::Assistant {
393 content,
394 thinking,
395 images: None,
396 name: None,
397 tool_calls,
398 },
399 };
400
401 Ok(completion::CompletionResponse {
402 choice,
403 usage: Usage {
404 input_tokens: prompt_tokens,
405 output_tokens: completion_tokens,
406 total_tokens: prompt_tokens + completion_tokens,
407 cached_input_tokens: 0,
408 cache_creation_input_tokens: 0,
409 tool_use_prompt_tokens: 0,
410 reasoning_tokens: 0,
411 },
412 raw_response,
413 message_id: None,
414 })
415 }
416 _ => Err(CompletionError::ResponseError(
417 "Chat response does not include an assistant message".into(),
418 )),
419 }
420 }
421}
422
423#[derive(Debug, Serialize, Deserialize)]
424pub(super) struct OllamaCompletionRequest {
425 model: String,
426 pub messages: Vec<Message>,
427 #[serde(skip_serializing_if = "Option::is_none")]
428 temperature: Option<f64>,
429 #[serde(skip_serializing_if = "Vec::is_empty")]
430 tools: Vec<ToolDefinition>,
431 pub stream: bool,
432 think: Think,
433 #[serde(skip_serializing_if = "Option::is_none")]
434 max_tokens: Option<u64>,
435 #[serde(skip_serializing_if = "Option::is_none")]
436 keep_alive: Option<String>,
437 #[serde(skip_serializing_if = "Option::is_none")]
438 format: Option<schemars::Schema>,
439 options: serde_json::Value,
440}
441
442impl TryFrom<(&str, CompletionRequest)> for OllamaCompletionRequest {
443 type Error = CompletionError;
444
445 fn try_from((model, req): (&str, CompletionRequest)) -> Result<Self, Self::Error> {
446 let model = req.model.clone().unwrap_or_else(|| model.to_string());
447 if req.tool_choice.is_some() {
448 tracing::warn!("WARNING: `tool_choice` not supported for Ollama");
449 }
450 let mut partial_history = vec![];
452 if let Some(docs) = req.normalized_documents() {
453 partial_history.push(docs);
454 }
455 partial_history.extend(req.chat_history);
456
457 let mut full_history: Vec<Message> = match &req.preamble {
459 Some(preamble) => vec![Message::system(preamble)],
460 None => vec![],
461 };
462
463 full_history.extend(
465 partial_history
466 .into_iter()
467 .map(message::Message::try_into)
468 .collect::<Result<Vec<Vec<Message>>, _>>()?
469 .into_iter()
470 .flatten()
471 .collect::<Vec<_>>(),
472 );
473
474 let mut think = Think::Bool(false);
475 let mut keep_alive: Option<String> = None;
476
477 let options = if let Some(mut extra) = req.additional_params {
478 if let Some(obj) = extra.as_object_mut() {
480 if let Some(think_val) = obj.remove("think") {
482 think = match think_val {
483 Value::Bool(think) => Think::Bool(think),
484 Value::String(think) => Think::Level(match think.to_lowercase().as_str() {
485 "low" => Level::Low,
486 "medium" => Level::Medium,
487 "high" => Level::High,
488 _ => {
489 return Err(CompletionError::RequestError(
490 "`think` must be a 'low', 'medium', 'high', or bool".into(),
491 ));
492 }
493 }),
494 _ => {
495 return Err(CompletionError::RequestError(
496 "`think` must be a 'low', 'medium', 'high', or bool".into(),
497 ));
498 }
499 };
500 }
501
502 if let Some(keep_alive_val) = obj.remove("keep_alive") {
504 keep_alive = Some(
505 keep_alive_val
506 .as_str()
507 .ok_or_else(|| {
508 CompletionError::RequestError(
509 "`keep_alive` must be a string".into(),
510 )
511 })?
512 .to_string(),
513 );
514 }
515 }
516
517 json_utils::merge(json!({ "temperature": req.temperature }), extra)
518 } else {
519 json!({ "temperature": req.temperature })
520 };
521
522 Ok(Self {
523 model: model.to_string(),
524 messages: full_history,
525 temperature: req.temperature,
526 max_tokens: req.max_tokens,
527 stream: false,
528 think,
529 keep_alive,
530 format: req.output_schema,
531 tools: req
532 .tools
533 .clone()
534 .into_iter()
535 .map(ToolDefinition::from)
536 .collect::<Vec<_>>(),
537 options,
538 })
539 }
540}
541
542#[derive(Clone)]
543pub struct CompletionModel<T = reqwest::Client> {
544 client: Client<T>,
545 pub model: String,
546}
547
548impl<T> CompletionModel<T> {
549 pub fn new(client: Client<T>, model: &str) -> Self {
550 Self {
551 client,
552 model: model.to_owned(),
553 }
554 }
555}
556
557#[derive(Debug, Clone, Serialize, Deserialize)]
558#[serde(untagged)]
559enum Think {
560 Bool(bool),
561 Level(Level),
562}
563
564#[derive(Debug, Clone, Serialize, Deserialize)]
565#[serde(rename_all = "lowercase")]
566enum Level {
567 Low,
568 Medium,
569 High,
570}
571
572#[derive(Clone, Serialize, Deserialize, Debug)]
575pub struct StreamingCompletionResponse {
576 pub done_reason: Option<String>,
577 pub total_duration: Option<u64>,
578 pub load_duration: Option<u64>,
579 pub prompt_eval_count: Option<u64>,
580 pub prompt_eval_duration: Option<u64>,
581 pub eval_count: Option<u64>,
582 pub eval_duration: Option<u64>,
583}
584
585impl GetTokenUsage for StreamingCompletionResponse {
586 fn token_usage(&self) -> Option<crate::completion::Usage> {
587 let mut usage = crate::completion::Usage::new();
588 let input_tokens = self.prompt_eval_count.unwrap_or_default();
589 let output_tokens = self.eval_count.unwrap_or_default();
590 usage.input_tokens = input_tokens;
591 usage.output_tokens = output_tokens;
592 usage.total_tokens = input_tokens + output_tokens;
593
594 Some(usage)
595 }
596}
597
598#[derive(Default)]
604struct NdjsonBuffer {
605 buf: Vec<u8>,
606}
607
608impl NdjsonBuffer {
609 fn new() -> Self {
610 Self::default()
611 }
612
613 fn decode(&mut self, chunk: &[u8]) -> Vec<Vec<u8>> {
616 self.buf.extend_from_slice(chunk);
617
618 let mut lines = Vec::new();
619 while let Some(pos) = self.buf.iter().position(|&b| b == b'\n') {
620 let mut line: Vec<u8> = self.buf.drain(..=pos).collect();
621 line.pop();
622 if !line.is_empty() {
623 lines.push(line);
624 }
625 }
626 lines
627 }
628}
629
630impl<T> completion::CompletionModel for CompletionModel<T>
631where
632 T: HttpClientExt + Clone + Default + std::fmt::Debug + Send + 'static,
633{
634 type Response = CompletionResponse;
635 type StreamingResponse = StreamingCompletionResponse;
636
637 type Client = Client<T>;
638
639 fn make(client: &Self::Client, model: impl Into<String>) -> Self {
640 Self::new(client.clone(), model.into().as_str())
641 }
642
643 async fn completion(
644 &self,
645 completion_request: CompletionRequest,
646 ) -> Result<completion::CompletionResponse<Self::Response>, CompletionError> {
647 let span = if tracing::Span::current().is_disabled() {
648 info_span!(
649 target: "rig::completions",
650 "chat",
651 gen_ai.operation.name = "chat",
652 gen_ai.provider.name = "ollama",
653 gen_ai.request.model = self.model,
654 gen_ai.system_instructions = tracing::field::Empty,
655 gen_ai.response.id = tracing::field::Empty,
656 gen_ai.response.model = tracing::field::Empty,
657 gen_ai.usage.output_tokens = tracing::field::Empty,
658 gen_ai.usage.input_tokens = tracing::field::Empty,
659 gen_ai.usage.cache_read.input_tokens = tracing::field::Empty,
660 )
661 } else {
662 tracing::Span::current()
663 };
664
665 span.record("gen_ai.system_instructions", &completion_request.preamble);
666 let request = OllamaCompletionRequest::try_from((self.model.as_ref(), completion_request))?;
667
668 if tracing::enabled!(tracing::Level::TRACE) {
669 tracing::trace!(target: "rig::completions",
670 "Ollama completion request: {}",
671 serde_json::to_string_pretty(&request)?
672 );
673 }
674
675 let body = serde_json::to_vec(&request)?;
676
677 let req = self
678 .client
679 .post("api/chat")?
680 .body(body)
681 .map_err(http_client::Error::from)?;
682
683 let async_block = async move {
684 let response = self.client.send::<_, Bytes>(req).await?;
685 let status = response.status();
686 let response_body = response.into_body().into_future().await?.to_vec();
687
688 if !status.is_success() {
689 return Err(CompletionError::ProviderError(
690 String::from_utf8_lossy(&response_body).to_string(),
691 ));
692 }
693
694 let response: CompletionResponse = serde_json::from_slice(&response_body)?;
695 let span = tracing::Span::current();
696 span.record("gen_ai.response.model", &response.model);
697 span.record(
698 "gen_ai.usage.input_tokens",
699 response.prompt_eval_count.unwrap_or_default(),
700 );
701 span.record(
702 "gen_ai.usage.output_tokens",
703 response.eval_count.unwrap_or_default(),
704 );
705
706 if tracing::enabled!(tracing::Level::TRACE) {
707 tracing::trace!(target: "rig::completions",
708 "Ollama completion response: {}",
709 serde_json::to_string_pretty(&response)?
710 );
711 }
712
713 let response: completion::CompletionResponse<CompletionResponse> =
714 response.try_into()?;
715
716 Ok(response)
717 };
718
719 tracing::Instrument::instrument(async_block, span).await
720 }
721
722 async fn stream(
723 &self,
724 request: CompletionRequest,
725 ) -> Result<streaming::StreamingCompletionResponse<Self::StreamingResponse>, CompletionError>
726 {
727 let span = if tracing::Span::current().is_disabled() {
728 info_span!(
729 target: "rig::completions",
730 "chat_streaming",
731 gen_ai.operation.name = "chat_streaming",
732 gen_ai.provider.name = "ollama",
733 gen_ai.request.model = self.model,
734 gen_ai.system_instructions = tracing::field::Empty,
735 gen_ai.response.id = tracing::field::Empty,
736 gen_ai.response.model = self.model,
737 gen_ai.usage.output_tokens = tracing::field::Empty,
738 gen_ai.usage.input_tokens = tracing::field::Empty,
739 gen_ai.usage.cache_read.input_tokens = tracing::field::Empty,
740 )
741 } else {
742 tracing::Span::current()
743 };
744
745 span.record("gen_ai.system_instructions", &request.preamble);
746
747 let mut request = OllamaCompletionRequest::try_from((self.model.as_ref(), request))?;
748 request.stream = true;
749
750 if tracing::enabled!(tracing::Level::TRACE) {
751 tracing::trace!(target: "rig::completions",
752 "Ollama streaming completion request: {}",
753 serde_json::to_string_pretty(&request)?
754 );
755 }
756
757 let body = serde_json::to_vec(&request)?;
758
759 let req = self
760 .client
761 .post("api/chat")?
762 .body(body)
763 .map_err(http_client::Error::from)?;
764
765 let response = self.client.send_streaming(req).await?;
766 let status = response.status();
767 let mut byte_stream = response.into_body();
768
769 if !status.is_success() {
770 return Err(CompletionError::ProviderError(format!(
771 "Got error status code trying to send a request to Ollama: {status}"
772 )));
773 }
774
775 let stream = try_stream! {
776 let span = tracing::Span::current();
777 let mut tool_calls_final = Vec::new();
778 let mut text_response = String::new();
779 let mut thinking_response = String::new();
780 let mut line_buf = NdjsonBuffer::new();
781
782 while let Some(chunk) = byte_stream.next().await {
783 let bytes = chunk.map_err(|e| http_client::Error::Instance(e.into()))?;
784
785 for line in line_buf.decode(&bytes) {
786 tracing::debug!(target: "rig", "Received NDJSON line from Ollama: {}", String::from_utf8_lossy(&line));
787
788 let response: CompletionResponse = serde_json::from_slice(&line)?;
789
790 if let Message::Assistant { content, thinking, tool_calls, .. } = response.message {
791 if let Some(thinking_content) = thinking && !thinking_content.is_empty() {
792 thinking_response += &thinking_content;
793 yield RawStreamingChoice::ReasoningDelta {
794 id: None,
795 reasoning: thinking_content,
796 };
797 }
798
799 if !content.is_empty() {
800 text_response += &content;
801 yield RawStreamingChoice::Message(content);
802 }
803
804 for tool_call in tool_calls {
805 tool_calls_final.push(tool_call.clone());
806 yield RawStreamingChoice::ToolCall(
807 crate::streaming::RawStreamingToolCall::new(String::new(), tool_call.function.name, tool_call.function.arguments)
808 );
809 }
810 }
811
812 if response.done {
813 span.record("gen_ai.usage.input_tokens", response.prompt_eval_count);
814 span.record("gen_ai.usage.output_tokens", response.eval_count);
815 let message = Message::Assistant {
816 content: text_response.clone(),
817 thinking: if thinking_response.is_empty() { None } else { Some(thinking_response.clone()) },
818 images: None,
819 name: None,
820 tool_calls: tool_calls_final.clone()
821 };
822 if let Ok(serialized_message) = serde_json::to_string(&vec![message]) {
823 span.record("gen_ai.output.messages", serialized_message);
824 }
825 yield RawStreamingChoice::FinalResponse(
826 StreamingCompletionResponse {
827 total_duration: response.total_duration,
828 load_duration: response.load_duration,
829 prompt_eval_count: response.prompt_eval_count,
830 prompt_eval_duration: response.prompt_eval_duration,
831 eval_count: response.eval_count,
832 eval_duration: response.eval_duration,
833 done_reason: response.done_reason,
834 }
835 );
836 break;
837 }
838 }
839 }
840 }.instrument(span);
841
842 Ok(streaming::StreamingCompletionResponse::stream(Box::pin(
843 stream,
844 )))
845 }
846}
847
848#[derive(Debug, Deserialize)]
851struct ListModelsResponse {
852 models: Vec<ListModelEntry>,
853}
854
855#[derive(Debug, Deserialize)]
856struct ListModelEntry {
857 name: String,
858 model: String,
859}
860
861impl From<ListModelEntry> for Model {
862 fn from(value: ListModelEntry) -> Self {
863 Model::new(value.model, value.name)
864 }
865}
866
867#[derive(Clone)]
869pub struct OllamaModelLister<H = reqwest::Client> {
870 client: Client<H>,
871}
872
873impl<H> ModelLister<H> for OllamaModelLister<H>
874where
875 H: HttpClientExt + WasmCompatSend + WasmCompatSync + 'static,
876{
877 type Client = Client<H>;
878
879 fn new(client: Self::Client) -> Self {
880 Self { client }
881 }
882
883 async fn list_all(&self) -> Result<ModelList, ModelListingError> {
884 let path = "/api/tags";
885 let req = self.client.get(path)?.body(http_client::NoBody)?;
886 let response = self.client.send::<_, Vec<u8>>(req).await?;
887
888 if !response.status().is_success() {
889 let status_code = response.status().as_u16();
890 let body = response.into_body().await?;
891 return Err(ModelListingError::api_error_with_context(
892 "Ollama",
893 path,
894 status_code,
895 &body,
896 ));
897 }
898
899 let body = response.into_body().await?;
900 let api_resp: ListModelsResponse = serde_json::from_slice(&body).map_err(|error| {
901 ModelListingError::parse_error_with_context("Ollama", path, &error, &body)
902 })?;
903 let models = api_resp.models.into_iter().map(Model::from).collect();
904
905 Ok(ModelList::new(models))
906 }
907}
908
909#[derive(Clone, Debug, Deserialize, Serialize)]
913pub struct ToolDefinition {
914 #[serde(rename = "type")]
915 pub type_field: String, pub function: completion::ToolDefinition,
917}
918
919impl From<crate::completion::ToolDefinition> for ToolDefinition {
921 fn from(tool: crate::completion::ToolDefinition) -> Self {
922 ToolDefinition {
923 type_field: "function".to_owned(),
924 function: completion::ToolDefinition {
925 name: tool.name,
926 description: tool.description,
927 parameters: tool.parameters,
928 },
929 }
930 }
931}
932
933#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
934pub struct ToolCall {
935 #[serde(default, rename = "type")]
936 pub r#type: ToolType,
937 pub function: Function,
938}
939#[derive(Default, Debug, Serialize, Deserialize, PartialEq, Clone)]
940#[serde(rename_all = "lowercase")]
941pub enum ToolType {
942 #[default]
943 Function,
944}
945#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
946pub struct Function {
947 pub name: String,
948 pub arguments: Value,
949}
950
951#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
954#[serde(tag = "role", rename_all = "lowercase")]
955pub enum Message {
956 User {
957 content: String,
958 #[serde(skip_serializing_if = "Option::is_none")]
959 images: Option<Vec<String>>,
960 #[serde(skip_serializing_if = "Option::is_none")]
961 name: Option<String>,
962 },
963 Assistant {
964 #[serde(default)]
965 content: String,
966 #[serde(skip_serializing_if = "Option::is_none")]
967 thinking: Option<String>,
968 #[serde(skip_serializing_if = "Option::is_none")]
969 images: Option<Vec<String>>,
970 #[serde(skip_serializing_if = "Option::is_none")]
971 name: Option<String>,
972 #[serde(default, deserialize_with = "json_utils::null_or_vec")]
973 tool_calls: Vec<ToolCall>,
974 },
975 System {
976 content: String,
977 #[serde(skip_serializing_if = "Option::is_none")]
978 images: Option<Vec<String>>,
979 #[serde(skip_serializing_if = "Option::is_none")]
980 name: Option<String>,
981 },
982 #[serde(rename = "tool")]
983 ToolResult {
984 #[serde(rename = "tool_name")]
985 name: String,
986 content: String,
987 },
988}
989
990impl TryFrom<crate::message::Message> for Vec<Message> {
996 type Error = crate::message::MessageError;
997 fn try_from(internal_msg: crate::message::Message) -> Result<Self, Self::Error> {
998 use crate::message::Message as InternalMessage;
999 match internal_msg {
1000 InternalMessage::System { content } => Ok(vec![Message::System {
1001 content,
1002 images: None,
1003 name: None,
1004 }]),
1005 InternalMessage::User { content, .. } => {
1006 let (tool_results, other_content): (Vec<_>, Vec<_>) =
1007 content.into_iter().partition(|content| {
1008 matches!(content, crate::message::UserContent::ToolResult(_))
1009 });
1010
1011 if !tool_results.is_empty() {
1012 tool_results
1013 .into_iter()
1014 .map(|content| match content {
1015 crate::message::UserContent::ToolResult(
1016 crate::message::ToolResult { id, content, .. },
1017 ) => {
1018 let content_string = content
1020 .into_iter()
1021 .map(|content| match content {
1022 crate::message::ToolResultContent::Text(text) => text.text,
1023 _ => "[Non-text content]".to_string(),
1024 })
1025 .collect::<Vec<_>>()
1026 .join("\n");
1027
1028 Ok::<_, crate::message::MessageError>(Message::ToolResult {
1029 name: id,
1030 content: content_string,
1031 })
1032 }
1033 _ => Err(crate::message::MessageError::ConversionError(
1034 "expected tool result content while converting Ollama input".into(),
1035 )),
1036 })
1037 .collect::<Result<Vec<_>, _>>()
1038 } else {
1039 let (texts, images) = other_content.into_iter().fold(
1041 (Vec::new(), Vec::new()),
1042 |(mut texts, mut images), content| {
1043 match content {
1044 crate::message::UserContent::Text(crate::message::Text {
1045 text,
1046 ..
1047 }) => texts.push(text),
1048 crate::message::UserContent::Image(crate::message::Image {
1049 data: DocumentSourceKind::Base64(data),
1050 ..
1051 }) => images.push(data),
1052 crate::message::UserContent::Document(
1053 crate::message::Document {
1054 data:
1055 DocumentSourceKind::Base64(data)
1056 | DocumentSourceKind::String(data),
1057 ..
1058 },
1059 ) => texts.push(data),
1060 _ => {} }
1062 (texts, images)
1063 },
1064 );
1065
1066 Ok(vec![Message::User {
1067 content: texts.join(" "),
1068 images: if images.is_empty() {
1069 None
1070 } else {
1071 Some(
1072 images
1073 .into_iter()
1074 .map(|x| x.to_string())
1075 .collect::<Vec<String>>(),
1076 )
1077 },
1078 name: None,
1079 }])
1080 }
1081 }
1082 InternalMessage::Assistant { content, .. } => {
1083 let mut thinking: Option<String> = None;
1084 let mut text_content = Vec::new();
1085 let mut tool_calls = Vec::new();
1086
1087 for content in content.into_iter() {
1088 match content {
1089 crate::message::AssistantContent::Text(text) => {
1090 text_content.push(text.text)
1091 }
1092 crate::message::AssistantContent::ToolCall(tool_call) => {
1093 tool_calls.push(tool_call)
1094 }
1095 crate::message::AssistantContent::Reasoning(reasoning) => {
1096 let display = reasoning.display_text();
1097 if !display.is_empty() {
1098 thinking = Some(display);
1099 }
1100 }
1101 crate::message::AssistantContent::Image(_) => {
1102 return Err(crate::message::MessageError::ConversionError(
1103 "Ollama currently doesn't support images.".into(),
1104 ));
1105 }
1106 }
1107 }
1108
1109 Ok(vec![Message::Assistant {
1112 content: text_content.join(" "),
1113 thinking,
1114 images: None,
1115 name: None,
1116 tool_calls: tool_calls
1117 .into_iter()
1118 .map(|tool_call| tool_call.into())
1119 .collect::<Vec<_>>(),
1120 }])
1121 }
1122 }
1123 }
1124}
1125
1126impl From<Message> for crate::completion::Message {
1129 fn from(msg: Message) -> Self {
1130 match msg {
1131 Message::User { content, .. } => crate::completion::Message::User {
1132 content: OneOrMany::one(crate::completion::message::UserContent::Text(Text::new(
1133 content,
1134 ))),
1135 },
1136 Message::Assistant {
1137 content,
1138 tool_calls,
1139 ..
1140 } => {
1141 let mut assistant_contents =
1142 vec![crate::completion::message::AssistantContent::Text(
1143 Text::new(content),
1144 )];
1145 for tc in tool_calls {
1146 assistant_contents.push(
1147 crate::completion::message::AssistantContent::tool_call(
1148 tc.function.name.clone(),
1149 tc.function.name,
1150 tc.function.arguments,
1151 ),
1152 );
1153 }
1154 let content =
1155 OneOrMany::from_iter_optional(assistant_contents).unwrap_or_else(|| {
1156 OneOrMany::one(crate::completion::message::AssistantContent::Text(
1157 Text::new(String::new()),
1158 ))
1159 });
1160
1161 crate::completion::Message::Assistant { id: None, content }
1162 }
1163 Message::System { content, .. } => crate::completion::Message::User {
1165 content: OneOrMany::one(crate::completion::message::UserContent::Text(Text::new(
1166 content,
1167 ))),
1168 },
1169 Message::ToolResult { name, content } => crate::completion::Message::User {
1170 content: OneOrMany::one(message::UserContent::tool_result(
1171 name,
1172 OneOrMany::one(message::ToolResultContent::text(content)),
1173 )),
1174 },
1175 }
1176 }
1177}
1178
1179impl Message {
1180 pub fn system(content: &str) -> Self {
1182 Message::System {
1183 content: content.to_owned(),
1184 images: None,
1185 name: None,
1186 }
1187 }
1188}
1189
1190impl From<crate::message::ToolCall> for ToolCall {
1193 fn from(tool_call: crate::message::ToolCall) -> Self {
1194 Self {
1195 r#type: ToolType::Function,
1196 function: Function {
1197 name: tool_call.function.name,
1198 arguments: tool_call.function.arguments,
1199 },
1200 }
1201 }
1202}
1203
1204#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1205pub struct SystemContent {
1206 #[serde(default)]
1207 r#type: SystemContentType,
1208 text: String,
1209}
1210
1211#[derive(Default, Debug, Serialize, Deserialize, PartialEq, Clone)]
1212#[serde(rename_all = "lowercase")]
1213pub enum SystemContentType {
1214 #[default]
1215 Text,
1216}
1217
1218impl From<String> for SystemContent {
1219 fn from(s: String) -> Self {
1220 SystemContent {
1221 r#type: SystemContentType::default(),
1222 text: s,
1223 }
1224 }
1225}
1226
1227impl FromStr for SystemContent {
1228 type Err = std::convert::Infallible;
1229 fn from_str(s: &str) -> Result<Self, Self::Err> {
1230 Ok(SystemContent {
1231 r#type: SystemContentType::default(),
1232 text: s.to_string(),
1233 })
1234 }
1235}
1236
1237#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1238pub struct AssistantContent {
1239 pub text: String,
1240}
1241
1242impl FromStr for AssistantContent {
1243 type Err = std::convert::Infallible;
1244 fn from_str(s: &str) -> Result<Self, Self::Err> {
1245 Ok(AssistantContent { text: s.to_owned() })
1246 }
1247}
1248
1249#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1250#[serde(tag = "type", rename_all = "lowercase")]
1251pub enum UserContent {
1252 Text { text: String },
1253 Image { image_url: ImageUrl },
1254 }
1256
1257impl FromStr for UserContent {
1258 type Err = std::convert::Infallible;
1259 fn from_str(s: &str) -> Result<Self, Self::Err> {
1260 Ok(UserContent::Text { text: s.to_owned() })
1261 }
1262}
1263
1264#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1265pub struct ImageUrl {
1266 pub url: String,
1267 #[serde(default)]
1268 pub detail: ImageDetail,
1269}
1270
1271#[cfg(test)]
1276mod tests {
1277 use super::*;
1278 use serde_json::json;
1279
1280 #[tokio::test]
1282 async fn test_chat_completion() {
1283 let sample_chat_response = json!({
1285 "model": "llama3.2",
1286 "created_at": "2023-08-04T19:22:45.499127Z",
1287 "message": {
1288 "role": "assistant",
1289 "content": "The sky is blue because of Rayleigh scattering.",
1290 "images": null,
1291 "tool_calls": [
1292 {
1293 "type": "function",
1294 "function": {
1295 "name": "get_current_weather",
1296 "arguments": {
1297 "location": "San Francisco, CA",
1298 "format": "celsius"
1299 }
1300 }
1301 }
1302 ]
1303 },
1304 "done": true,
1305 "total_duration": 8000000000u64,
1306 "load_duration": 6000000u64,
1307 "prompt_eval_count": 61u64,
1308 "prompt_eval_duration": 400000000u64,
1309 "eval_count": 468u64,
1310 "eval_duration": 7700000000u64
1311 });
1312 let sample_text = sample_chat_response.to_string();
1313
1314 let chat_resp: CompletionResponse =
1315 serde_json::from_str(&sample_text).expect("Invalid JSON structure");
1316 let conv: completion::CompletionResponse<CompletionResponse> =
1317 chat_resp.try_into().unwrap();
1318 assert!(
1319 !conv.choice.is_empty(),
1320 "Expected non-empty choice in chat response"
1321 );
1322 }
1323
1324 #[test]
1326 fn test_message_conversion() {
1327 let provider_msg = Message::User {
1329 content: "Test message".to_owned(),
1330 images: None,
1331 name: None,
1332 };
1333 let comp_msg: crate::completion::Message = provider_msg.into();
1335 match comp_msg {
1336 crate::completion::Message::User { content } => {
1337 let first_content = content.first();
1339 match first_content {
1341 crate::completion::message::UserContent::Text(text_struct) => {
1342 assert_eq!(text_struct.text, "Test message");
1343 }
1344 _ => panic!("Expected text content in conversion"),
1345 }
1346 }
1347 _ => panic!("Conversion from provider Message to completion Message failed"),
1348 }
1349 }
1350
1351 #[test]
1353 fn test_tool_definition_conversion() {
1354 let internal_tool = crate::completion::ToolDefinition {
1356 name: "get_current_weather".to_owned(),
1357 description: "Get the current weather for a location".to_owned(),
1358 parameters: json!({
1359 "type": "object",
1360 "properties": {
1361 "location": {
1362 "type": "string",
1363 "description": "The location to get the weather for, e.g. San Francisco, CA"
1364 },
1365 "format": {
1366 "type": "string",
1367 "description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
1368 "enum": ["celsius", "fahrenheit"]
1369 }
1370 },
1371 "required": ["location", "format"]
1372 }),
1373 };
1374 let ollama_tool: ToolDefinition = internal_tool.into();
1376 assert_eq!(ollama_tool.type_field, "function");
1377 assert_eq!(ollama_tool.function.name, "get_current_weather");
1378 assert_eq!(
1379 ollama_tool.function.description,
1380 "Get the current weather for a location"
1381 );
1382 let params = &ollama_tool.function.parameters;
1384 assert_eq!(params["properties"]["location"]["type"], "string");
1385 }
1386
1387 #[tokio::test]
1389 async fn test_chat_completion_with_thinking() {
1390 let sample_response = json!({
1391 "model": "qwen-thinking",
1392 "created_at": "2023-08-04T19:22:45.499127Z",
1393 "message": {
1394 "role": "assistant",
1395 "content": "The answer is 42.",
1396 "thinking": "Let me think about this carefully. The question asks for the meaning of life...",
1397 "images": null,
1398 "tool_calls": []
1399 },
1400 "done": true,
1401 "total_duration": 8000000000u64,
1402 "load_duration": 6000000u64,
1403 "prompt_eval_count": 61u64,
1404 "prompt_eval_duration": 400000000u64,
1405 "eval_count": 468u64,
1406 "eval_duration": 7700000000u64
1407 });
1408
1409 let chat_resp: CompletionResponse =
1410 serde_json::from_value(sample_response).expect("Failed to deserialize");
1411
1412 if let Message::Assistant {
1414 thinking, content, ..
1415 } = &chat_resp.message
1416 {
1417 assert_eq!(
1418 thinking.as_ref().unwrap(),
1419 "Let me think about this carefully. The question asks for the meaning of life..."
1420 );
1421 assert_eq!(content, "The answer is 42.");
1422 } else {
1423 panic!("Expected Assistant message");
1424 }
1425 }
1426
1427 #[tokio::test]
1429 async fn test_chat_completion_without_thinking() {
1430 let sample_response = json!({
1431 "model": "llama3.2",
1432 "created_at": "2023-08-04T19:22:45.499127Z",
1433 "message": {
1434 "role": "assistant",
1435 "content": "Hello!",
1436 "images": null,
1437 "tool_calls": []
1438 },
1439 "done": true,
1440 "total_duration": 8000000000u64,
1441 "load_duration": 6000000u64,
1442 "prompt_eval_count": 10u64,
1443 "prompt_eval_duration": 400000000u64,
1444 "eval_count": 5u64,
1445 "eval_duration": 7700000000u64
1446 });
1447
1448 let chat_resp: CompletionResponse =
1449 serde_json::from_value(sample_response).expect("Failed to deserialize");
1450
1451 if let Message::Assistant {
1453 thinking, content, ..
1454 } = &chat_resp.message
1455 {
1456 assert!(thinking.is_none());
1457 assert_eq!(content, "Hello!");
1458 } else {
1459 panic!("Expected Assistant message");
1460 }
1461 }
1462
1463 #[test]
1465 fn test_streaming_response_with_thinking() {
1466 let sample_chunk = json!({
1467 "model": "qwen-thinking",
1468 "created_at": "2023-08-04T19:22:45.499127Z",
1469 "message": {
1470 "role": "assistant",
1471 "content": "",
1472 "thinking": "Analyzing the problem...",
1473 "images": null,
1474 "tool_calls": []
1475 },
1476 "done": false
1477 });
1478
1479 let chunk: CompletionResponse =
1480 serde_json::from_value(sample_chunk).expect("Failed to deserialize");
1481
1482 if let Message::Assistant {
1483 thinking, content, ..
1484 } = &chunk.message
1485 {
1486 assert_eq!(thinking.as_ref().unwrap(), "Analyzing the problem...");
1487 assert_eq!(content, "");
1488 } else {
1489 panic!("Expected Assistant message");
1490 }
1491 }
1492
1493 #[test]
1495 fn test_message_conversion_with_thinking() {
1496 let reasoning_content = crate::message::Reasoning::new("Step 1: Consider the problem");
1498
1499 let internal_msg = crate::message::Message::Assistant {
1500 id: None,
1501 content: crate::OneOrMany::many(vec![
1502 crate::message::AssistantContent::Reasoning(reasoning_content),
1503 crate::message::AssistantContent::Text(crate::message::Text::new(
1504 "The answer is X".to_string(),
1505 )),
1506 ])
1507 .unwrap(),
1508 };
1509
1510 let provider_msgs: Vec<Message> = internal_msg.try_into().unwrap();
1512 assert_eq!(provider_msgs.len(), 1);
1513
1514 if let Message::Assistant {
1515 thinking, content, ..
1516 } = &provider_msgs[0]
1517 {
1518 assert_eq!(thinking.as_ref().unwrap(), "Step 1: Consider the problem");
1519 assert_eq!(content, "The answer is X");
1520 } else {
1521 panic!("Expected Assistant message with thinking");
1522 }
1523 }
1524
1525 #[test]
1527 fn test_empty_thinking_content() {
1528 let sample_response = json!({
1529 "model": "llama3.2",
1530 "created_at": "2023-08-04T19:22:45.499127Z",
1531 "message": {
1532 "role": "assistant",
1533 "content": "Response",
1534 "thinking": "",
1535 "images": null,
1536 "tool_calls": []
1537 },
1538 "done": true,
1539 "total_duration": 8000000000u64,
1540 "load_duration": 6000000u64,
1541 "prompt_eval_count": 10u64,
1542 "prompt_eval_duration": 400000000u64,
1543 "eval_count": 5u64,
1544 "eval_duration": 7700000000u64
1545 });
1546
1547 let chat_resp: CompletionResponse =
1548 serde_json::from_value(sample_response).expect("Failed to deserialize");
1549
1550 if let Message::Assistant {
1551 thinking, content, ..
1552 } = &chat_resp.message
1553 {
1554 assert_eq!(thinking.as_ref().unwrap(), "");
1556 assert_eq!(content, "Response");
1557 } else {
1558 panic!("Expected Assistant message");
1559 }
1560 }
1561
1562 #[test]
1564 fn test_thinking_with_tool_calls() {
1565 let sample_response = json!({
1566 "model": "qwen-thinking",
1567 "created_at": "2023-08-04T19:22:45.499127Z",
1568 "message": {
1569 "role": "assistant",
1570 "content": "Let me check the weather.",
1571 "thinking": "User wants weather info, I should use the weather tool",
1572 "images": null,
1573 "tool_calls": [
1574 {
1575 "type": "function",
1576 "function": {
1577 "name": "get_weather",
1578 "arguments": {
1579 "location": "San Francisco"
1580 }
1581 }
1582 }
1583 ]
1584 },
1585 "done": true,
1586 "total_duration": 8000000000u64,
1587 "load_duration": 6000000u64,
1588 "prompt_eval_count": 30u64,
1589 "prompt_eval_duration": 400000000u64,
1590 "eval_count": 50u64,
1591 "eval_duration": 7700000000u64
1592 });
1593
1594 let chat_resp: CompletionResponse =
1595 serde_json::from_value(sample_response).expect("Failed to deserialize");
1596
1597 if let Message::Assistant {
1598 thinking,
1599 content,
1600 tool_calls,
1601 ..
1602 } = &chat_resp.message
1603 {
1604 assert_eq!(
1605 thinking.as_ref().unwrap(),
1606 "User wants weather info, I should use the weather tool"
1607 );
1608 assert_eq!(content, "Let me check the weather.");
1609 assert_eq!(tool_calls.len(), 1);
1610 assert_eq!(tool_calls[0].function.name, "get_weather");
1611 } else {
1612 panic!("Expected Assistant message with thinking and tool calls");
1613 }
1614 }
1615
1616 #[test]
1618 fn test_completion_request_with_think_param() {
1619 use crate::OneOrMany;
1620 use crate::completion::Message as CompletionMessage;
1621 use crate::message::{Text, UserContent};
1622
1623 let completion_request = CompletionRequest {
1625 model: None,
1626 preamble: Some("You are a helpful assistant.".to_string()),
1627 chat_history: OneOrMany::one(CompletionMessage::User {
1628 content: OneOrMany::one(UserContent::Text(Text::new("What is 2 + 2?".to_string()))),
1629 }),
1630 documents: vec![],
1631 tools: vec![],
1632 temperature: Some(0.7),
1633 max_tokens: Some(1024),
1634 tool_choice: None,
1635 additional_params: Some(json!({
1636 "think": true,
1637 "keep_alive": "-1m",
1638 "num_ctx": 4096
1639 })),
1640 output_schema: None,
1641 };
1642
1643 let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1645 .expect("Failed to create Ollama request");
1646
1647 let serialized =
1649 serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1650
1651 let expected = json!({
1657 "model": "qwen3:8b",
1658 "messages": [
1659 {
1660 "role": "system",
1661 "content": "You are a helpful assistant."
1662 },
1663 {
1664 "role": "user",
1665 "content": "What is 2 + 2?"
1666 }
1667 ],
1668 "temperature": 0.7,
1669 "stream": false,
1670 "think": true,
1671 "max_tokens": 1024,
1672 "keep_alive": "-1m",
1673 "options": {
1674 "temperature": 0.7,
1675 "num_ctx": 4096
1676 }
1677 });
1678
1679 assert_eq!(serialized, expected);
1680 }
1681
1682 #[test]
1684 fn test_completion_request_with_level_low_think_param() {
1685 use crate::OneOrMany;
1686 use crate::completion::Message as CompletionMessage;
1687 use crate::message::{Text, UserContent};
1688
1689 let completion_request = CompletionRequest {
1691 model: None,
1692 preamble: Some("You are a helpful assistant.".to_string()),
1693 chat_history: OneOrMany::one(CompletionMessage::User {
1694 content: OneOrMany::one(UserContent::Text(Text::new("What is 2 + 2?".to_string()))),
1695 }),
1696 documents: vec![],
1697 tools: vec![],
1698 temperature: Some(0.7),
1699 max_tokens: Some(1024),
1700 tool_choice: None,
1701 additional_params: Some(json!({
1702 "think": "low",
1703 "keep_alive": "-1m",
1704 "num_ctx": 4096
1705 })),
1706 output_schema: None,
1707 };
1708
1709 let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1711 .expect("Failed to create Ollama request");
1712
1713 let serialized =
1715 serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1716
1717 let expected = json!({
1723 "model": "qwen3:8b",
1724 "messages": [
1725 {
1726 "role": "system",
1727 "content": "You are a helpful assistant."
1728 },
1729 {
1730 "role": "user",
1731 "content": "What is 2 + 2?"
1732 }
1733 ],
1734 "temperature": 0.7,
1735 "stream": false,
1736 "think": "low",
1737 "max_tokens": 1024,
1738 "keep_alive": "-1m",
1739 "options": {
1740 "temperature": 0.7,
1741 "num_ctx": 4096
1742 }
1743 });
1744
1745 assert_eq!(serialized, expected);
1746 }
1747
1748 #[test]
1750 fn test_completion_request_with_level_medium_think_param() {
1751 use crate::OneOrMany;
1752 use crate::completion::Message as CompletionMessage;
1753 use crate::message::{Text, UserContent};
1754
1755 let completion_request = CompletionRequest {
1757 model: None,
1758 preamble: Some("You are a helpful assistant.".to_string()),
1759 chat_history: OneOrMany::one(CompletionMessage::User {
1760 content: OneOrMany::one(UserContent::Text(Text::new("What is 2 + 2?".to_string()))),
1761 }),
1762 documents: vec![],
1763 tools: vec![],
1764 temperature: Some(0.7),
1765 max_tokens: Some(1024),
1766 tool_choice: None,
1767 additional_params: Some(json!({
1768 "think": "medium",
1769 "keep_alive": "-1m",
1770 "num_ctx": 4096
1771 })),
1772 output_schema: None,
1773 };
1774
1775 let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1777 .expect("Failed to create Ollama request");
1778
1779 let serialized =
1781 serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1782
1783 let expected = json!({
1789 "model": "qwen3:8b",
1790 "messages": [
1791 {
1792 "role": "system",
1793 "content": "You are a helpful assistant."
1794 },
1795 {
1796 "role": "user",
1797 "content": "What is 2 + 2?"
1798 }
1799 ],
1800 "temperature": 0.7,
1801 "stream": false,
1802 "think": "medium",
1803 "max_tokens": 1024,
1804 "keep_alive": "-1m",
1805 "options": {
1806 "temperature": 0.7,
1807 "num_ctx": 4096
1808 }
1809 });
1810
1811 assert_eq!(serialized, expected);
1812 }
1813
1814 #[test]
1816 fn test_completion_request_with_level_high_think_param() {
1817 use crate::OneOrMany;
1818 use crate::completion::Message as CompletionMessage;
1819 use crate::message::{Text, UserContent};
1820
1821 let completion_request = CompletionRequest {
1823 model: None,
1824 preamble: Some("You are a helpful assistant.".to_string()),
1825 chat_history: OneOrMany::one(CompletionMessage::User {
1826 content: OneOrMany::one(UserContent::Text(Text::new("What is 2 + 2?".to_string()))),
1827 }),
1828 documents: vec![],
1829 tools: vec![],
1830 temperature: Some(0.7),
1831 max_tokens: Some(1024),
1832 tool_choice: None,
1833 additional_params: Some(json!({
1834 "think": "high",
1835 "keep_alive": "-1m",
1836 "num_ctx": 4096
1837 })),
1838 output_schema: None,
1839 };
1840
1841 let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1843 .expect("Failed to create Ollama request");
1844
1845 let serialized =
1847 serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1848
1849 let expected = json!({
1855 "model": "qwen3:8b",
1856 "messages": [
1857 {
1858 "role": "system",
1859 "content": "You are a helpful assistant."
1860 },
1861 {
1862 "role": "user",
1863 "content": "What is 2 + 2?"
1864 }
1865 ],
1866 "temperature": 0.7,
1867 "stream": false,
1868 "think": "high",
1869 "max_tokens": 1024,
1870 "keep_alive": "-1m",
1871 "options": {
1872 "temperature": 0.7,
1873 "num_ctx": 4096
1874 }
1875 });
1876
1877 assert_eq!(serialized, expected);
1878 }
1879
1880 #[test]
1882 fn test_completion_request_with_level_invalid_think_param() {
1883 use crate::OneOrMany;
1884 use crate::completion::Message as CompletionMessage;
1885 use crate::message::{Text, UserContent};
1886
1887 let completion_request = CompletionRequest {
1889 model: None,
1890 preamble: Some("You are a helpful assistant.".to_string()),
1891 chat_history: OneOrMany::one(CompletionMessage::User {
1892 content: OneOrMany::one(UserContent::Text(Text::new("What is 2 + 2?".to_string()))),
1893 }),
1894 documents: vec![],
1895 tools: vec![],
1896 temperature: Some(0.7),
1897 max_tokens: Some(1024),
1898 tool_choice: None,
1899 additional_params: Some(json!({
1900 "think": "invalid",
1901 "keep_alive": "-1m",
1902 "num_ctx": 4096
1903 })),
1904 output_schema: None,
1905 };
1906
1907 let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request));
1909
1910 assert!(ollama_request.is_err())
1911 }
1912
1913 #[test]
1915 fn test_completion_request_with_think_false_default() {
1916 use crate::OneOrMany;
1917 use crate::completion::Message as CompletionMessage;
1918 use crate::message::{Text, UserContent};
1919
1920 let completion_request = CompletionRequest {
1922 model: None,
1923 preamble: Some("You are a helpful assistant.".to_string()),
1924 chat_history: OneOrMany::one(CompletionMessage::User {
1925 content: OneOrMany::one(UserContent::Text(Text::new("Hello!".to_string()))),
1926 }),
1927 documents: vec![],
1928 tools: vec![],
1929 temperature: Some(0.5),
1930 max_tokens: None,
1931 tool_choice: None,
1932 additional_params: None,
1933 output_schema: None,
1934 };
1935
1936 let ollama_request = OllamaCompletionRequest::try_from(("llama3.2", completion_request))
1938 .expect("Failed to create Ollama request");
1939
1940 let serialized =
1942 serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1943
1944 let expected = json!({
1946 "model": "llama3.2",
1947 "messages": [
1948 {
1949 "role": "system",
1950 "content": "You are a helpful assistant."
1951 },
1952 {
1953 "role": "user",
1954 "content": "Hello!"
1955 }
1956 ],
1957 "temperature": 0.5,
1958 "stream": false,
1959 "think": false,
1960 "options": {
1961 "temperature": 0.5
1962 }
1963 });
1964
1965 assert_eq!(serialized, expected);
1966 }
1967
1968 #[test]
1969 fn test_completion_request_with_output_schema() {
1970 use crate::OneOrMany;
1971 use crate::completion::Message as CompletionMessage;
1972 use crate::message::{Text, UserContent};
1973
1974 let schema: schemars::Schema = serde_json::from_value(json!({
1975 "type": "object",
1976 "properties": {
1977 "age": { "type": "integer" },
1978 "available": { "type": "boolean" }
1979 },
1980 "required": ["age", "available"]
1981 }))
1982 .expect("Failed to parse schema");
1983
1984 let completion_request = CompletionRequest {
1985 model: Some("llama3.1".to_string()),
1986 preamble: None,
1987 chat_history: OneOrMany::one(CompletionMessage::User {
1988 content: OneOrMany::one(UserContent::Text(Text::new(
1989 "How old is Ollama?".to_string(),
1990 ))),
1991 }),
1992 documents: vec![],
1993 tools: vec![],
1994 temperature: None,
1995 max_tokens: None,
1996 tool_choice: None,
1997 additional_params: None,
1998 output_schema: Some(schema),
1999 };
2000
2001 let ollama_request = OllamaCompletionRequest::try_from(("llama3.1", completion_request))
2002 .expect("Failed to create Ollama request");
2003
2004 let serialized =
2005 serde_json::to_value(&ollama_request).expect("Failed to serialize request");
2006
2007 let format = serialized
2008 .get("format")
2009 .expect("format field should be present");
2010 assert_eq!(
2011 *format,
2012 json!({
2013 "type": "object",
2014 "properties": {
2015 "age": { "type": "integer" },
2016 "available": { "type": "boolean" }
2017 },
2018 "required": ["age", "available"]
2019 })
2020 );
2021 }
2022
2023 #[test]
2024 fn test_completion_request_without_output_schema() {
2025 use crate::OneOrMany;
2026 use crate::completion::Message as CompletionMessage;
2027 use crate::message::{Text, UserContent};
2028
2029 let completion_request = CompletionRequest {
2030 model: Some("llama3.1".to_string()),
2031 preamble: None,
2032 chat_history: OneOrMany::one(CompletionMessage::User {
2033 content: OneOrMany::one(UserContent::Text(Text::new("Hello!".to_string()))),
2034 }),
2035 documents: vec![],
2036 tools: vec![],
2037 temperature: None,
2038 max_tokens: None,
2039 tool_choice: None,
2040 additional_params: None,
2041 output_schema: None,
2042 };
2043
2044 let ollama_request = OllamaCompletionRequest::try_from(("llama3.1", completion_request))
2045 .expect("Failed to create Ollama request");
2046
2047 let serialized =
2048 serde_json::to_value(&ollama_request).expect("Failed to serialize request");
2049
2050 assert!(
2051 serialized.get("format").is_none(),
2052 "format field should be absent when output_schema is None"
2053 );
2054 }
2055
2056 #[test]
2057 fn test_client_initialization() {
2058 let _client = crate::providers::ollama::Client::new(Nothing).expect("Client::new() failed");
2059 let _client_from_builder = crate::providers::ollama::Client::builder()
2060 .api_key(Nothing)
2061 .build()
2062 .expect("Client::builder() failed");
2063 }
2064
2065 #[test]
2066 fn ndjson_buffer_returns_complete_lines_in_single_chunk() {
2067 let mut buf = NdjsonBuffer::new();
2068 let lines = buf.decode(b"{\"a\":1}\n{\"b\":2}\n");
2069 assert_eq!(lines, vec![b"{\"a\":1}".to_vec(), b"{\"b\":2}".to_vec()]);
2070 }
2071
2072 #[test]
2073 fn ndjson_buffer_reassembles_line_split_across_chunks() {
2074 let mut buf = NdjsonBuffer::new();
2075
2076 assert!(buf.decode(b"{\"model\":\"llama\",\"mes").is_empty());
2077
2078 let lines = buf.decode(b"sage\":\"hi\"}\n{\"done\"");
2079 assert_eq!(
2080 lines,
2081 vec![b"{\"model\":\"llama\",\"message\":\"hi\"}".to_vec()]
2082 );
2083
2084 let lines = buf.decode(b":true}\n");
2085 assert_eq!(lines, vec![b"{\"done\":true}".to_vec()]);
2086 }
2087
2088 #[test]
2089 fn ndjson_buffer_skips_blank_lines() {
2090 let mut buf = NdjsonBuffer::new();
2091 let lines = buf.decode(b"\n{\"a\":1}\n\n");
2092 assert_eq!(lines, vec![b"{\"a\":1}".to_vec()]);
2093 }
2094
2095 #[test]
2096 fn ndjson_buffer_retains_unterminated_trailing_data() {
2097 let mut buf = NdjsonBuffer::new();
2098 let lines = buf.decode(b"{\"a\":1}\n{\"b\":2");
2099 assert_eq!(lines, vec![b"{\"a\":1}".to_vec()]);
2100 let lines = buf.decode(b"}\n");
2101 assert_eq!(lines, vec![b"{\"b\":2}".to_vec()]);
2102 }
2103
2104 #[test]
2105 fn ndjson_buffer_handles_empty_chunk() {
2106 let mut buf = NdjsonBuffer::new();
2107 assert!(buf.decode(b"").is_empty());
2108
2109 buf.decode(b"{\"a\":1");
2110 assert!(buf.decode(b"").is_empty());
2111
2112 let lines = buf.decode(b"}\n");
2113 assert_eq!(lines, vec![b"{\"a\":1}".to_vec()]);
2114 }
2115
2116 #[test]
2117 fn ndjson_buffer_handles_multi_byte_utf8_split_across_chunks() {
2118 let mut buf = NdjsonBuffer::new();
2122 assert!(buf.decode(&[0xd0]).is_empty());
2123 assert!(buf.decode(&[0xb8, 0xd0, 0xb7, 0xd0]).is_empty());
2124 assert!(
2125 buf.decode(&[
2126 0xb2, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xbd, 0xd0, 0xb8
2127 ])
2128 .is_empty()
2129 );
2130
2131 let lines = buf.decode(b"\n");
2132 assert_eq!(lines.len(), 1);
2133 assert_eq!(std::str::from_utf8(&lines[0]).unwrap(), "известни");
2134 }
2135
2136 #[test]
2137 fn ndjson_buffer_yields_parseable_chunks_when_split_arbitrarily() {
2138 let original = concat!(
2139 "{\"model\":\"llama3.2\",\"message\":{\"role\":\"assistant\",\"content\":\"hi\"},\"done\":false}\n",
2140 "{\"model\":\"llama3.2\",\"message\":{\"role\":\"assistant\",\"content\":\"\"},\"done\":true}\n",
2141 );
2142
2143 let mut buf = NdjsonBuffer::new();
2144 let mut received = Vec::new();
2145 for byte in original.as_bytes() {
2146 for line in buf.decode(std::slice::from_ref(byte)) {
2147 let parsed: serde_json::Value =
2148 serde_json::from_slice(&line).expect("each drained line must be valid JSON");
2149 received.push(parsed);
2150 }
2151 }
2152
2153 assert_eq!(received.len(), 2);
2154 assert_eq!(received[0]["message"]["content"], "hi");
2155 assert_eq!(received[1]["done"], true);
2156 }
2157}