1use crate::client::{
41 self, ApiKey, Capabilities, Capable, DebugExt, ModelLister, Nothing, Provider, ProviderBuilder,
42 ProviderClient,
43};
44use crate::completion::{GetTokenUsage, Usage};
45use crate::http_client::{self, HttpClientExt};
46use crate::message::DocumentSourceKind;
47use crate::model::{Model, ModelList, ModelListingError};
48use crate::streaming::RawStreamingChoice;
49use crate::{
50 OneOrMany,
51 completion::{self, CompletionError, CompletionRequest},
52 embeddings::{self, EmbeddingError},
53 json_utils, message,
54 message::{ImageDetail, Text},
55 streaming,
56 wasm_compat::{WasmCompatSend, WasmCompatSync},
57};
58use async_stream::try_stream;
59use bytes::Bytes;
60use futures::StreamExt;
61use serde::{Deserialize, Serialize};
62use serde_json::{Value, json};
63use std::{convert::TryFrom, str::FromStr};
64use tracing::info_span;
65use tracing_futures::Instrument;
66const OLLAMA_API_BASE_URL: &str = "http://localhost:11434";
69
70#[derive(Debug, Default, Clone)]
73pub struct OllamaApiKey(Option<String>);
74
75impl ApiKey for OllamaApiKey {
76 fn into_header(
77 self,
78 ) -> Option<http_client::Result<(http::header::HeaderName, http::header::HeaderValue)>> {
79 self.0.map(http_client::make_auth_header)
80 }
81}
82
83impl From<Nothing> for OllamaApiKey {
84 fn from(_: Nothing) -> Self {
85 Self(None)
86 }
87}
88
89impl From<String> for OllamaApiKey {
90 fn from(key: String) -> Self {
91 if key.is_empty() {
92 Self(None)
93 } else {
94 Self(Some(key))
95 }
96 }
97}
98
99impl From<&str> for OllamaApiKey {
100 fn from(key: &str) -> Self {
101 if key.is_empty() {
102 Self(None)
103 } else {
104 Self(Some(key.to_owned()))
105 }
106 }
107}
108
109#[derive(Debug, Default, Clone, Copy)]
110pub struct OllamaExt;
111
112#[derive(Debug, Default, Clone, Copy)]
113pub struct OllamaBuilder;
114
115impl Provider for OllamaExt {
116 type Builder = OllamaBuilder;
117 const VERIFY_PATH: &'static str = "api/tags";
118}
119
120impl<H> Capabilities<H> for OllamaExt {
121 type Completion = Capable<CompletionModel<H>>;
122 type Transcription = Nothing;
123 type Embeddings = Capable<EmbeddingModel<H>>;
124 type ModelListing = Capable<OllamaModelLister<H>>;
125 #[cfg(feature = "image")]
126 type ImageGeneration = Nothing;
127
128 #[cfg(feature = "audio")]
129 type AudioGeneration = Nothing;
130}
131
132impl DebugExt for OllamaExt {}
133
134impl ProviderBuilder for OllamaBuilder {
135 type Extension<H>
136 = OllamaExt
137 where
138 H: HttpClientExt;
139 type ApiKey = OllamaApiKey;
140
141 const BASE_URL: &'static str = OLLAMA_API_BASE_URL;
142
143 fn build<H>(
144 _builder: &client::ClientBuilder<Self, Self::ApiKey, H>,
145 ) -> http_client::Result<Self::Extension<H>>
146 where
147 H: HttpClientExt,
148 {
149 Ok(OllamaExt)
150 }
151}
152
153pub type Client<H = reqwest::Client> = client::Client<OllamaExt, H>;
154pub type ClientBuilder<H = crate::markers::Missing> =
155 client::ClientBuilder<OllamaBuilder, OllamaApiKey, H>;
156
157impl ProviderClient for Client {
158 type Input = OllamaApiKey;
159 type Error = crate::client::ProviderClientError;
160
161 fn from_env() -> Result<Self, Self::Error> {
162 let api_base = crate::client::optional_env_var("OLLAMA_API_BASE_URL")?
163 .unwrap_or_else(|| OLLAMA_API_BASE_URL.to_string());
164
165 let api_key = crate::client::optional_env_var("OLLAMA_API_KEY")?
166 .map(OllamaApiKey::from)
167 .unwrap_or_default();
168
169 Self::builder()
170 .api_key(api_key)
171 .base_url(&api_base)
172 .build()
173 .map_err(Into::into)
174 }
175
176 fn from_val(api_key: Self::Input) -> Result<Self, Self::Error> {
177 Self::builder().api_key(api_key).build().map_err(Into::into)
178 }
179}
180
181#[derive(Debug, Deserialize)]
184struct ApiErrorResponse {
185 message: String,
186}
187
188#[derive(Debug, Deserialize)]
189#[serde(untagged)]
190enum ApiResponse<T> {
191 Ok(T),
192 Err(ApiErrorResponse),
193}
194
195pub const ALL_MINILM: &str = "all-minilm";
198pub const NOMIC_EMBED_TEXT: &str = "nomic-embed-text";
199
200fn model_dimensions_from_identifier(identifier: &str) -> Option<usize> {
201 match identifier {
202 ALL_MINILM => Some(384),
203 NOMIC_EMBED_TEXT => Some(768),
204 _ => None,
205 }
206}
207
208#[derive(Debug, Serialize, Deserialize)]
209pub struct EmbeddingResponse {
210 pub model: String,
211 pub embeddings: Vec<Vec<f64>>,
212 #[serde(default)]
213 pub total_duration: Option<u64>,
214 #[serde(default)]
215 pub load_duration: Option<u64>,
216 #[serde(default)]
217 pub prompt_eval_count: Option<u64>,
218}
219
220impl From<ApiErrorResponse> for EmbeddingError {
221 fn from(err: ApiErrorResponse) -> Self {
222 EmbeddingError::ProviderError(err.message)
223 }
224}
225
226impl From<ApiResponse<EmbeddingResponse>> for Result<EmbeddingResponse, EmbeddingError> {
227 fn from(value: ApiResponse<EmbeddingResponse>) -> Self {
228 match value {
229 ApiResponse::Ok(response) => Ok(response),
230 ApiResponse::Err(err) => Err(EmbeddingError::ProviderError(err.message)),
231 }
232 }
233}
234
235#[derive(Clone)]
238pub struct EmbeddingModel<T = reqwest::Client> {
239 client: Client<T>,
240 pub model: String,
241 ndims: usize,
242}
243
244impl<T> EmbeddingModel<T> {
245 pub fn new(client: Client<T>, model: impl Into<String>, ndims: usize) -> Self {
246 Self {
247 client,
248 model: model.into(),
249 ndims,
250 }
251 }
252
253 pub fn with_model(client: Client<T>, model: &str, ndims: usize) -> Self {
254 Self {
255 client,
256 model: model.into(),
257 ndims,
258 }
259 }
260}
261
262impl<T> embeddings::EmbeddingModel for EmbeddingModel<T>
263where
264 T: HttpClientExt + Clone + 'static,
265{
266 type Client = Client<T>;
267
268 fn make(client: &Self::Client, model: impl Into<String>, dims: Option<usize>) -> Self {
269 let model = model.into();
270 let dims = dims
271 .or(model_dimensions_from_identifier(&model))
272 .unwrap_or_default();
273 Self::new(client.clone(), model, dims)
274 }
275
276 const MAX_DOCUMENTS: usize = 1024;
277 fn ndims(&self) -> usize {
278 self.ndims
279 }
280
281 async fn embed_texts(
282 &self,
283 documents: impl IntoIterator<Item = String>,
284 ) -> Result<Vec<embeddings::Embedding>, EmbeddingError> {
285 let docs: Vec<String> = documents.into_iter().collect();
286
287 let body = serde_json::to_vec(&json!({
288 "model": self.model,
289 "input": docs
290 }))?;
291
292 let req = self
293 .client
294 .post("api/embed")?
295 .body(body)
296 .map_err(|e| EmbeddingError::HttpError(e.into()))?;
297
298 let response = self.client.send::<_, Vec<u8>>(req).await?;
299
300 if !response.status().is_success() {
301 let text = http_client::text(response).await?;
302 return Err(EmbeddingError::ProviderError(text));
303 }
304
305 let bytes: Vec<u8> = response.into_body().await?;
306
307 let api_resp: EmbeddingResponse = serde_json::from_slice(&bytes)?;
308
309 if api_resp.embeddings.len() != docs.len() {
310 return Err(EmbeddingError::ResponseError(
311 "Number of returned embeddings does not match input".into(),
312 ));
313 }
314 Ok(api_resp
315 .embeddings
316 .into_iter()
317 .zip(docs.into_iter())
318 .map(|(vec, document)| embeddings::Embedding { document, vec })
319 .collect())
320 }
321}
322
323pub const LLAMA3_2: &str = "llama3.2";
326pub const LLAVA: &str = "llava";
327pub const MISTRAL: &str = "mistral";
328
329#[derive(Debug, Serialize, Deserialize)]
330pub struct CompletionResponse {
331 pub model: String,
332 pub created_at: String,
333 pub message: Message,
334 pub done: bool,
335 #[serde(default)]
336 pub done_reason: Option<String>,
337 #[serde(default)]
338 pub total_duration: Option<u64>,
339 #[serde(default)]
340 pub load_duration: Option<u64>,
341 #[serde(default)]
342 pub prompt_eval_count: Option<u64>,
343 #[serde(default)]
344 pub prompt_eval_duration: Option<u64>,
345 #[serde(default)]
346 pub eval_count: Option<u64>,
347 #[serde(default)]
348 pub eval_duration: Option<u64>,
349}
350impl TryFrom<CompletionResponse> for completion::CompletionResponse<CompletionResponse> {
351 type Error = CompletionError;
352 fn try_from(resp: CompletionResponse) -> Result<Self, Self::Error> {
353 match resp.message {
354 Message::Assistant {
356 content,
357 thinking,
358 tool_calls,
359 ..
360 } => {
361 let mut assistant_contents = Vec::new();
362 if !content.is_empty() {
364 assistant_contents.push(completion::AssistantContent::text(&content));
365 }
366 for tc in tool_calls.iter() {
369 assistant_contents.push(completion::AssistantContent::tool_call(
370 tc.function.name.clone(),
371 tc.function.name.clone(),
372 tc.function.arguments.clone(),
373 ));
374 }
375 let choice = OneOrMany::many(assistant_contents).map_err(|_| {
376 CompletionError::ResponseError("No content provided".to_owned())
377 })?;
378 let prompt_tokens = resp.prompt_eval_count.unwrap_or(0);
379 let completion_tokens = resp.eval_count.unwrap_or(0);
380
381 let raw_response = CompletionResponse {
382 model: resp.model,
383 created_at: resp.created_at,
384 done: resp.done,
385 done_reason: resp.done_reason,
386 total_duration: resp.total_duration,
387 load_duration: resp.load_duration,
388 prompt_eval_count: resp.prompt_eval_count,
389 prompt_eval_duration: resp.prompt_eval_duration,
390 eval_count: resp.eval_count,
391 eval_duration: resp.eval_duration,
392 message: Message::Assistant {
393 content,
394 thinking,
395 images: None,
396 name: None,
397 tool_calls,
398 },
399 };
400
401 Ok(completion::CompletionResponse {
402 choice,
403 usage: Usage {
404 input_tokens: prompt_tokens,
405 output_tokens: completion_tokens,
406 total_tokens: prompt_tokens + completion_tokens,
407 cached_input_tokens: 0,
408 cache_creation_input_tokens: 0,
409 reasoning_tokens: 0,
410 },
411 raw_response,
412 message_id: None,
413 })
414 }
415 _ => Err(CompletionError::ResponseError(
416 "Chat response does not include an assistant message".into(),
417 )),
418 }
419 }
420}
421
422#[derive(Debug, Serialize, Deserialize)]
423pub(super) struct OllamaCompletionRequest {
424 model: String,
425 pub messages: Vec<Message>,
426 #[serde(skip_serializing_if = "Option::is_none")]
427 temperature: Option<f64>,
428 #[serde(skip_serializing_if = "Vec::is_empty")]
429 tools: Vec<ToolDefinition>,
430 pub stream: bool,
431 think: Think,
432 #[serde(skip_serializing_if = "Option::is_none")]
433 max_tokens: Option<u64>,
434 #[serde(skip_serializing_if = "Option::is_none")]
435 keep_alive: Option<String>,
436 #[serde(skip_serializing_if = "Option::is_none")]
437 format: Option<schemars::Schema>,
438 options: serde_json::Value,
439}
440
441impl TryFrom<(&str, CompletionRequest)> for OllamaCompletionRequest {
442 type Error = CompletionError;
443
444 fn try_from((model, req): (&str, CompletionRequest)) -> Result<Self, Self::Error> {
445 let model = req.model.clone().unwrap_or_else(|| model.to_string());
446 if req.tool_choice.is_some() {
447 tracing::warn!("WARNING: `tool_choice` not supported for Ollama");
448 }
449 let mut partial_history = vec![];
451 if let Some(docs) = req.normalized_documents() {
452 partial_history.push(docs);
453 }
454 partial_history.extend(req.chat_history);
455
456 let mut full_history: Vec<Message> = match &req.preamble {
458 Some(preamble) => vec![Message::system(preamble)],
459 None => vec![],
460 };
461
462 full_history.extend(
464 partial_history
465 .into_iter()
466 .map(message::Message::try_into)
467 .collect::<Result<Vec<Vec<Message>>, _>>()?
468 .into_iter()
469 .flatten()
470 .collect::<Vec<_>>(),
471 );
472
473 let mut think = Think::Bool(false);
474 let mut keep_alive: Option<String> = None;
475
476 let options = if let Some(mut extra) = req.additional_params {
477 if let Some(obj) = extra.as_object_mut() {
479 if let Some(think_val) = obj.remove("think") {
481 think = match think_val {
482 Value::Bool(think) => Think::Bool(think),
483 Value::String(think) => Think::Level(match think.to_lowercase().as_str() {
484 "low" => Level::Low,
485 "medium" => Level::Medium,
486 "high" => Level::High,
487 _ => {
488 return Err(CompletionError::RequestError(
489 "`think` must be a 'low', 'medium', 'high', or bool".into(),
490 ));
491 }
492 }),
493 _ => {
494 return Err(CompletionError::RequestError(
495 "`think` must be a 'low', 'medium', 'high', or bool".into(),
496 ));
497 }
498 };
499 }
500
501 if let Some(keep_alive_val) = obj.remove("keep_alive") {
503 keep_alive = Some(
504 keep_alive_val
505 .as_str()
506 .ok_or_else(|| {
507 CompletionError::RequestError(
508 "`keep_alive` must be a string".into(),
509 )
510 })?
511 .to_string(),
512 );
513 }
514 }
515
516 json_utils::merge(json!({ "temperature": req.temperature }), extra)
517 } else {
518 json!({ "temperature": req.temperature })
519 };
520
521 Ok(Self {
522 model: model.to_string(),
523 messages: full_history,
524 temperature: req.temperature,
525 max_tokens: req.max_tokens,
526 stream: false,
527 think,
528 keep_alive,
529 format: req.output_schema,
530 tools: req
531 .tools
532 .clone()
533 .into_iter()
534 .map(ToolDefinition::from)
535 .collect::<Vec<_>>(),
536 options,
537 })
538 }
539}
540
541#[derive(Clone)]
542pub struct CompletionModel<T = reqwest::Client> {
543 client: Client<T>,
544 pub model: String,
545}
546
547impl<T> CompletionModel<T> {
548 pub fn new(client: Client<T>, model: &str) -> Self {
549 Self {
550 client,
551 model: model.to_owned(),
552 }
553 }
554}
555
556#[derive(Debug, Clone, Serialize, Deserialize)]
557#[serde(untagged)]
558enum Think {
559 Bool(bool),
560 Level(Level),
561}
562
563#[derive(Debug, Clone, Serialize, Deserialize)]
564#[serde(rename_all = "lowercase")]
565enum Level {
566 Low,
567 Medium,
568 High,
569}
570
571#[derive(Clone, Serialize, Deserialize, Debug)]
574pub struct StreamingCompletionResponse {
575 pub done_reason: Option<String>,
576 pub total_duration: Option<u64>,
577 pub load_duration: Option<u64>,
578 pub prompt_eval_count: Option<u64>,
579 pub prompt_eval_duration: Option<u64>,
580 pub eval_count: Option<u64>,
581 pub eval_duration: Option<u64>,
582}
583
584impl GetTokenUsage for StreamingCompletionResponse {
585 fn token_usage(&self) -> Option<crate::completion::Usage> {
586 let mut usage = crate::completion::Usage::new();
587 let input_tokens = self.prompt_eval_count.unwrap_or_default();
588 let output_tokens = self.eval_count.unwrap_or_default();
589 usage.input_tokens = input_tokens;
590 usage.output_tokens = output_tokens;
591 usage.total_tokens = input_tokens + output_tokens;
592
593 Some(usage)
594 }
595}
596
597impl<T> completion::CompletionModel for CompletionModel<T>
598where
599 T: HttpClientExt + Clone + Default + std::fmt::Debug + Send + 'static,
600{
601 type Response = CompletionResponse;
602 type StreamingResponse = StreamingCompletionResponse;
603
604 type Client = Client<T>;
605
606 fn make(client: &Self::Client, model: impl Into<String>) -> Self {
607 Self::new(client.clone(), model.into().as_str())
608 }
609
610 async fn completion(
611 &self,
612 completion_request: CompletionRequest,
613 ) -> Result<completion::CompletionResponse<Self::Response>, CompletionError> {
614 let span = if tracing::Span::current().is_disabled() {
615 info_span!(
616 target: "rig::completions",
617 "chat",
618 gen_ai.operation.name = "chat",
619 gen_ai.provider.name = "ollama",
620 gen_ai.request.model = self.model,
621 gen_ai.system_instructions = tracing::field::Empty,
622 gen_ai.response.id = tracing::field::Empty,
623 gen_ai.response.model = tracing::field::Empty,
624 gen_ai.usage.output_tokens = tracing::field::Empty,
625 gen_ai.usage.input_tokens = tracing::field::Empty,
626 gen_ai.usage.cache_read.input_tokens = tracing::field::Empty,
627 )
628 } else {
629 tracing::Span::current()
630 };
631
632 span.record("gen_ai.system_instructions", &completion_request.preamble);
633 let request = OllamaCompletionRequest::try_from((self.model.as_ref(), completion_request))?;
634
635 if tracing::enabled!(tracing::Level::TRACE) {
636 tracing::trace!(target: "rig::completions",
637 "Ollama completion request: {}",
638 serde_json::to_string_pretty(&request)?
639 );
640 }
641
642 let body = serde_json::to_vec(&request)?;
643
644 let req = self
645 .client
646 .post("api/chat")?
647 .body(body)
648 .map_err(http_client::Error::from)?;
649
650 let async_block = async move {
651 let response = self.client.send::<_, Bytes>(req).await?;
652 let status = response.status();
653 let response_body = response.into_body().into_future().await?.to_vec();
654
655 if !status.is_success() {
656 return Err(CompletionError::ProviderError(
657 String::from_utf8_lossy(&response_body).to_string(),
658 ));
659 }
660
661 let response: CompletionResponse = serde_json::from_slice(&response_body)?;
662 let span = tracing::Span::current();
663 span.record("gen_ai.response.model", &response.model);
664 span.record(
665 "gen_ai.usage.input_tokens",
666 response.prompt_eval_count.unwrap_or_default(),
667 );
668 span.record(
669 "gen_ai.usage.output_tokens",
670 response.eval_count.unwrap_or_default(),
671 );
672
673 if tracing::enabled!(tracing::Level::TRACE) {
674 tracing::trace!(target: "rig::completions",
675 "Ollama completion response: {}",
676 serde_json::to_string_pretty(&response)?
677 );
678 }
679
680 let response: completion::CompletionResponse<CompletionResponse> =
681 response.try_into()?;
682
683 Ok(response)
684 };
685
686 tracing::Instrument::instrument(async_block, span).await
687 }
688
689 async fn stream(
690 &self,
691 request: CompletionRequest,
692 ) -> Result<streaming::StreamingCompletionResponse<Self::StreamingResponse>, CompletionError>
693 {
694 let span = if tracing::Span::current().is_disabled() {
695 info_span!(
696 target: "rig::completions",
697 "chat_streaming",
698 gen_ai.operation.name = "chat_streaming",
699 gen_ai.provider.name = "ollama",
700 gen_ai.request.model = self.model,
701 gen_ai.system_instructions = tracing::field::Empty,
702 gen_ai.response.id = tracing::field::Empty,
703 gen_ai.response.model = self.model,
704 gen_ai.usage.output_tokens = tracing::field::Empty,
705 gen_ai.usage.input_tokens = tracing::field::Empty,
706 gen_ai.usage.cache_read.input_tokens = tracing::field::Empty,
707 )
708 } else {
709 tracing::Span::current()
710 };
711
712 span.record("gen_ai.system_instructions", &request.preamble);
713
714 let mut request = OllamaCompletionRequest::try_from((self.model.as_ref(), request))?;
715 request.stream = true;
716
717 if tracing::enabled!(tracing::Level::TRACE) {
718 tracing::trace!(target: "rig::completions",
719 "Ollama streaming completion request: {}",
720 serde_json::to_string_pretty(&request)?
721 );
722 }
723
724 let body = serde_json::to_vec(&request)?;
725
726 let req = self
727 .client
728 .post("api/chat")?
729 .body(body)
730 .map_err(http_client::Error::from)?;
731
732 let response = self.client.send_streaming(req).await?;
733 let status = response.status();
734 let mut byte_stream = response.into_body();
735
736 if !status.is_success() {
737 return Err(CompletionError::ProviderError(format!(
738 "Got error status code trying to send a request to Ollama: {status}"
739 )));
740 }
741
742 let stream = try_stream! {
743 let span = tracing::Span::current();
744 let mut tool_calls_final = Vec::new();
745 let mut text_response = String::new();
746 let mut thinking_response = String::new();
747
748 while let Some(chunk) = byte_stream.next().await {
749 let bytes = chunk.map_err(|e| http_client::Error::Instance(e.into()))?;
750
751 for line in bytes.split(|&b| b == b'\n') {
752 if line.is_empty() {
753 continue;
754 }
755
756 tracing::debug!(target: "rig", "Received NDJSON line from Ollama: {}", String::from_utf8_lossy(line));
757
758 let response: CompletionResponse = serde_json::from_slice(line)?;
759
760 if let Message::Assistant { content, thinking, tool_calls, .. } = response.message {
761 if let Some(thinking_content) = thinking && !thinking_content.is_empty() {
762 thinking_response += &thinking_content;
763 yield RawStreamingChoice::ReasoningDelta {
764 id: None,
765 reasoning: thinking_content,
766 };
767 }
768
769 if !content.is_empty() {
770 text_response += &content;
771 yield RawStreamingChoice::Message(content);
772 }
773
774 for tool_call in tool_calls {
775 tool_calls_final.push(tool_call.clone());
776 yield RawStreamingChoice::ToolCall(
777 crate::streaming::RawStreamingToolCall::new(String::new(), tool_call.function.name, tool_call.function.arguments)
778 );
779 }
780 }
781
782 if response.done {
783 span.record("gen_ai.usage.input_tokens", response.prompt_eval_count);
784 span.record("gen_ai.usage.output_tokens", response.eval_count);
785 let message = Message::Assistant {
786 content: text_response.clone(),
787 thinking: if thinking_response.is_empty() { None } else { Some(thinking_response.clone()) },
788 images: None,
789 name: None,
790 tool_calls: tool_calls_final.clone()
791 };
792 if let Ok(serialized_message) = serde_json::to_string(&vec![message]) {
793 span.record("gen_ai.output.messages", serialized_message);
794 }
795 yield RawStreamingChoice::FinalResponse(
796 StreamingCompletionResponse {
797 total_duration: response.total_duration,
798 load_duration: response.load_duration,
799 prompt_eval_count: response.prompt_eval_count,
800 prompt_eval_duration: response.prompt_eval_duration,
801 eval_count: response.eval_count,
802 eval_duration: response.eval_duration,
803 done_reason: response.done_reason,
804 }
805 );
806 break;
807 }
808 }
809 }
810 }.instrument(span);
811
812 Ok(streaming::StreamingCompletionResponse::stream(Box::pin(
813 stream,
814 )))
815 }
816}
817
818#[derive(Debug, Deserialize)]
821struct ListModelsResponse {
822 models: Vec<ListModelEntry>,
823}
824
825#[derive(Debug, Deserialize)]
826struct ListModelEntry {
827 name: String,
828 model: String,
829}
830
831impl From<ListModelEntry> for Model {
832 fn from(value: ListModelEntry) -> Self {
833 Model::new(value.model, value.name)
834 }
835}
836
837#[derive(Clone)]
839pub struct OllamaModelLister<H = reqwest::Client> {
840 client: Client<H>,
841}
842
843impl<H> ModelLister<H> for OllamaModelLister<H>
844where
845 H: HttpClientExt + WasmCompatSend + WasmCompatSync + 'static,
846{
847 type Client = Client<H>;
848
849 fn new(client: Self::Client) -> Self {
850 Self { client }
851 }
852
853 async fn list_all(&self) -> Result<ModelList, ModelListingError> {
854 let path = "/api/tags";
855 let req = self.client.get(path)?.body(http_client::NoBody)?;
856 let response = self.client.send::<_, Vec<u8>>(req).await?;
857
858 if !response.status().is_success() {
859 let status_code = response.status().as_u16();
860 let body = response.into_body().await?;
861 return Err(ModelListingError::api_error_with_context(
862 "Ollama",
863 path,
864 status_code,
865 &body,
866 ));
867 }
868
869 let body = response.into_body().await?;
870 let api_resp: ListModelsResponse = serde_json::from_slice(&body).map_err(|error| {
871 ModelListingError::parse_error_with_context("Ollama", path, &error, &body)
872 })?;
873 let models = api_resp.models.into_iter().map(Model::from).collect();
874
875 Ok(ModelList::new(models))
876 }
877}
878
879#[derive(Clone, Debug, Deserialize, Serialize)]
883pub struct ToolDefinition {
884 #[serde(rename = "type")]
885 pub type_field: String, pub function: completion::ToolDefinition,
887}
888
889impl From<crate::completion::ToolDefinition> for ToolDefinition {
891 fn from(tool: crate::completion::ToolDefinition) -> Self {
892 ToolDefinition {
893 type_field: "function".to_owned(),
894 function: completion::ToolDefinition {
895 name: tool.name,
896 description: tool.description,
897 parameters: tool.parameters,
898 },
899 }
900 }
901}
902
903#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
904pub struct ToolCall {
905 #[serde(default, rename = "type")]
906 pub r#type: ToolType,
907 pub function: Function,
908}
909#[derive(Default, Debug, Serialize, Deserialize, PartialEq, Clone)]
910#[serde(rename_all = "lowercase")]
911pub enum ToolType {
912 #[default]
913 Function,
914}
915#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
916pub struct Function {
917 pub name: String,
918 pub arguments: Value,
919}
920
921#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
924#[serde(tag = "role", rename_all = "lowercase")]
925pub enum Message {
926 User {
927 content: String,
928 #[serde(skip_serializing_if = "Option::is_none")]
929 images: Option<Vec<String>>,
930 #[serde(skip_serializing_if = "Option::is_none")]
931 name: Option<String>,
932 },
933 Assistant {
934 #[serde(default)]
935 content: String,
936 #[serde(skip_serializing_if = "Option::is_none")]
937 thinking: Option<String>,
938 #[serde(skip_serializing_if = "Option::is_none")]
939 images: Option<Vec<String>>,
940 #[serde(skip_serializing_if = "Option::is_none")]
941 name: Option<String>,
942 #[serde(default, deserialize_with = "json_utils::null_or_vec")]
943 tool_calls: Vec<ToolCall>,
944 },
945 System {
946 content: String,
947 #[serde(skip_serializing_if = "Option::is_none")]
948 images: Option<Vec<String>>,
949 #[serde(skip_serializing_if = "Option::is_none")]
950 name: Option<String>,
951 },
952 #[serde(rename = "tool")]
953 ToolResult {
954 #[serde(rename = "tool_name")]
955 name: String,
956 content: String,
957 },
958}
959
960impl TryFrom<crate::message::Message> for Vec<Message> {
966 type Error = crate::message::MessageError;
967 fn try_from(internal_msg: crate::message::Message) -> Result<Self, Self::Error> {
968 use crate::message::Message as InternalMessage;
969 match internal_msg {
970 InternalMessage::System { content } => Ok(vec![Message::System {
971 content,
972 images: None,
973 name: None,
974 }]),
975 InternalMessage::User { content, .. } => {
976 let (tool_results, other_content): (Vec<_>, Vec<_>) =
977 content.into_iter().partition(|content| {
978 matches!(content, crate::message::UserContent::ToolResult(_))
979 });
980
981 if !tool_results.is_empty() {
982 tool_results
983 .into_iter()
984 .map(|content| match content {
985 crate::message::UserContent::ToolResult(
986 crate::message::ToolResult { id, content, .. },
987 ) => {
988 let content_string = content
990 .into_iter()
991 .map(|content| match content {
992 crate::message::ToolResultContent::Text(text) => text.text,
993 _ => "[Non-text content]".to_string(),
994 })
995 .collect::<Vec<_>>()
996 .join("\n");
997
998 Ok::<_, crate::message::MessageError>(Message::ToolResult {
999 name: id,
1000 content: content_string,
1001 })
1002 }
1003 _ => Err(crate::message::MessageError::ConversionError(
1004 "expected tool result content while converting Ollama input".into(),
1005 )),
1006 })
1007 .collect::<Result<Vec<_>, _>>()
1008 } else {
1009 let (texts, images) = other_content.into_iter().fold(
1011 (Vec::new(), Vec::new()),
1012 |(mut texts, mut images), content| {
1013 match content {
1014 crate::message::UserContent::Text(crate::message::Text {
1015 text,
1016 }) => texts.push(text),
1017 crate::message::UserContent::Image(crate::message::Image {
1018 data: DocumentSourceKind::Base64(data),
1019 ..
1020 }) => images.push(data),
1021 crate::message::UserContent::Document(
1022 crate::message::Document {
1023 data:
1024 DocumentSourceKind::Base64(data)
1025 | DocumentSourceKind::String(data),
1026 ..
1027 },
1028 ) => texts.push(data),
1029 _ => {} }
1031 (texts, images)
1032 },
1033 );
1034
1035 Ok(vec![Message::User {
1036 content: texts.join(" "),
1037 images: if images.is_empty() {
1038 None
1039 } else {
1040 Some(
1041 images
1042 .into_iter()
1043 .map(|x| x.to_string())
1044 .collect::<Vec<String>>(),
1045 )
1046 },
1047 name: None,
1048 }])
1049 }
1050 }
1051 InternalMessage::Assistant { content, .. } => {
1052 let mut thinking: Option<String> = None;
1053 let mut text_content = Vec::new();
1054 let mut tool_calls = Vec::new();
1055
1056 for content in content.into_iter() {
1057 match content {
1058 crate::message::AssistantContent::Text(text) => {
1059 text_content.push(text.text)
1060 }
1061 crate::message::AssistantContent::ToolCall(tool_call) => {
1062 tool_calls.push(tool_call)
1063 }
1064 crate::message::AssistantContent::Reasoning(reasoning) => {
1065 let display = reasoning.display_text();
1066 if !display.is_empty() {
1067 thinking = Some(display);
1068 }
1069 }
1070 crate::message::AssistantContent::Image(_) => {
1071 return Err(crate::message::MessageError::ConversionError(
1072 "Ollama currently doesn't support images.".into(),
1073 ));
1074 }
1075 }
1076 }
1077
1078 Ok(vec![Message::Assistant {
1081 content: text_content.join(" "),
1082 thinking,
1083 images: None,
1084 name: None,
1085 tool_calls: tool_calls
1086 .into_iter()
1087 .map(|tool_call| tool_call.into())
1088 .collect::<Vec<_>>(),
1089 }])
1090 }
1091 }
1092 }
1093}
1094
1095impl From<Message> for crate::completion::Message {
1098 fn from(msg: Message) -> Self {
1099 match msg {
1100 Message::User { content, .. } => crate::completion::Message::User {
1101 content: OneOrMany::one(crate::completion::message::UserContent::Text(Text {
1102 text: content,
1103 })),
1104 },
1105 Message::Assistant {
1106 content,
1107 tool_calls,
1108 ..
1109 } => {
1110 let mut assistant_contents =
1111 vec![crate::completion::message::AssistantContent::Text(Text {
1112 text: content,
1113 })];
1114 for tc in tool_calls {
1115 assistant_contents.push(
1116 crate::completion::message::AssistantContent::tool_call(
1117 tc.function.name.clone(),
1118 tc.function.name,
1119 tc.function.arguments,
1120 ),
1121 );
1122 }
1123 let content =
1124 OneOrMany::from_iter_optional(assistant_contents).unwrap_or_else(|| {
1125 OneOrMany::one(crate::completion::message::AssistantContent::Text(Text {
1126 text: String::new(),
1127 }))
1128 });
1129
1130 crate::completion::Message::Assistant { id: None, content }
1131 }
1132 Message::System { content, .. } => crate::completion::Message::User {
1134 content: OneOrMany::one(crate::completion::message::UserContent::Text(Text {
1135 text: content,
1136 })),
1137 },
1138 Message::ToolResult { name, content } => crate::completion::Message::User {
1139 content: OneOrMany::one(message::UserContent::tool_result(
1140 name,
1141 OneOrMany::one(message::ToolResultContent::text(content)),
1142 )),
1143 },
1144 }
1145 }
1146}
1147
1148impl Message {
1149 pub fn system(content: &str) -> Self {
1151 Message::System {
1152 content: content.to_owned(),
1153 images: None,
1154 name: None,
1155 }
1156 }
1157}
1158
1159impl From<crate::message::ToolCall> for ToolCall {
1162 fn from(tool_call: crate::message::ToolCall) -> Self {
1163 Self {
1164 r#type: ToolType::Function,
1165 function: Function {
1166 name: tool_call.function.name,
1167 arguments: tool_call.function.arguments,
1168 },
1169 }
1170 }
1171}
1172
1173#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1174pub struct SystemContent {
1175 #[serde(default)]
1176 r#type: SystemContentType,
1177 text: String,
1178}
1179
1180#[derive(Default, Debug, Serialize, Deserialize, PartialEq, Clone)]
1181#[serde(rename_all = "lowercase")]
1182pub enum SystemContentType {
1183 #[default]
1184 Text,
1185}
1186
1187impl From<String> for SystemContent {
1188 fn from(s: String) -> Self {
1189 SystemContent {
1190 r#type: SystemContentType::default(),
1191 text: s,
1192 }
1193 }
1194}
1195
1196impl FromStr for SystemContent {
1197 type Err = std::convert::Infallible;
1198 fn from_str(s: &str) -> Result<Self, Self::Err> {
1199 Ok(SystemContent {
1200 r#type: SystemContentType::default(),
1201 text: s.to_string(),
1202 })
1203 }
1204}
1205
1206#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1207pub struct AssistantContent {
1208 pub text: String,
1209}
1210
1211impl FromStr for AssistantContent {
1212 type Err = std::convert::Infallible;
1213 fn from_str(s: &str) -> Result<Self, Self::Err> {
1214 Ok(AssistantContent { text: s.to_owned() })
1215 }
1216}
1217
1218#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1219#[serde(tag = "type", rename_all = "lowercase")]
1220pub enum UserContent {
1221 Text { text: String },
1222 Image { image_url: ImageUrl },
1223 }
1225
1226impl FromStr for UserContent {
1227 type Err = std::convert::Infallible;
1228 fn from_str(s: &str) -> Result<Self, Self::Err> {
1229 Ok(UserContent::Text { text: s.to_owned() })
1230 }
1231}
1232
1233#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
1234pub struct ImageUrl {
1235 pub url: String,
1236 #[serde(default)]
1237 pub detail: ImageDetail,
1238}
1239
1240#[cfg(test)]
1245mod tests {
1246 use super::*;
1247 use serde_json::json;
1248
1249 #[tokio::test]
1251 async fn test_chat_completion() {
1252 let sample_chat_response = json!({
1254 "model": "llama3.2",
1255 "created_at": "2023-08-04T19:22:45.499127Z",
1256 "message": {
1257 "role": "assistant",
1258 "content": "The sky is blue because of Rayleigh scattering.",
1259 "images": null,
1260 "tool_calls": [
1261 {
1262 "type": "function",
1263 "function": {
1264 "name": "get_current_weather",
1265 "arguments": {
1266 "location": "San Francisco, CA",
1267 "format": "celsius"
1268 }
1269 }
1270 }
1271 ]
1272 },
1273 "done": true,
1274 "total_duration": 8000000000u64,
1275 "load_duration": 6000000u64,
1276 "prompt_eval_count": 61u64,
1277 "prompt_eval_duration": 400000000u64,
1278 "eval_count": 468u64,
1279 "eval_duration": 7700000000u64
1280 });
1281 let sample_text = sample_chat_response.to_string();
1282
1283 let chat_resp: CompletionResponse =
1284 serde_json::from_str(&sample_text).expect("Invalid JSON structure");
1285 let conv: completion::CompletionResponse<CompletionResponse> =
1286 chat_resp.try_into().unwrap();
1287 assert!(
1288 !conv.choice.is_empty(),
1289 "Expected non-empty choice in chat response"
1290 );
1291 }
1292
1293 #[test]
1295 fn test_message_conversion() {
1296 let provider_msg = Message::User {
1298 content: "Test message".to_owned(),
1299 images: None,
1300 name: None,
1301 };
1302 let comp_msg: crate::completion::Message = provider_msg.into();
1304 match comp_msg {
1305 crate::completion::Message::User { content } => {
1306 let first_content = content.first();
1308 match first_content {
1310 crate::completion::message::UserContent::Text(text_struct) => {
1311 assert_eq!(text_struct.text, "Test message");
1312 }
1313 _ => panic!("Expected text content in conversion"),
1314 }
1315 }
1316 _ => panic!("Conversion from provider Message to completion Message failed"),
1317 }
1318 }
1319
1320 #[test]
1322 fn test_tool_definition_conversion() {
1323 let internal_tool = crate::completion::ToolDefinition {
1325 name: "get_current_weather".to_owned(),
1326 description: "Get the current weather for a location".to_owned(),
1327 parameters: json!({
1328 "type": "object",
1329 "properties": {
1330 "location": {
1331 "type": "string",
1332 "description": "The location to get the weather for, e.g. San Francisco, CA"
1333 },
1334 "format": {
1335 "type": "string",
1336 "description": "The format to return the weather in, e.g. 'celsius' or 'fahrenheit'",
1337 "enum": ["celsius", "fahrenheit"]
1338 }
1339 },
1340 "required": ["location", "format"]
1341 }),
1342 };
1343 let ollama_tool: ToolDefinition = internal_tool.into();
1345 assert_eq!(ollama_tool.type_field, "function");
1346 assert_eq!(ollama_tool.function.name, "get_current_weather");
1347 assert_eq!(
1348 ollama_tool.function.description,
1349 "Get the current weather for a location"
1350 );
1351 let params = &ollama_tool.function.parameters;
1353 assert_eq!(params["properties"]["location"]["type"], "string");
1354 }
1355
1356 #[tokio::test]
1358 async fn test_chat_completion_with_thinking() {
1359 let sample_response = json!({
1360 "model": "qwen-thinking",
1361 "created_at": "2023-08-04T19:22:45.499127Z",
1362 "message": {
1363 "role": "assistant",
1364 "content": "The answer is 42.",
1365 "thinking": "Let me think about this carefully. The question asks for the meaning of life...",
1366 "images": null,
1367 "tool_calls": []
1368 },
1369 "done": true,
1370 "total_duration": 8000000000u64,
1371 "load_duration": 6000000u64,
1372 "prompt_eval_count": 61u64,
1373 "prompt_eval_duration": 400000000u64,
1374 "eval_count": 468u64,
1375 "eval_duration": 7700000000u64
1376 });
1377
1378 let chat_resp: CompletionResponse =
1379 serde_json::from_value(sample_response).expect("Failed to deserialize");
1380
1381 if let Message::Assistant {
1383 thinking, content, ..
1384 } = &chat_resp.message
1385 {
1386 assert_eq!(
1387 thinking.as_ref().unwrap(),
1388 "Let me think about this carefully. The question asks for the meaning of life..."
1389 );
1390 assert_eq!(content, "The answer is 42.");
1391 } else {
1392 panic!("Expected Assistant message");
1393 }
1394 }
1395
1396 #[tokio::test]
1398 async fn test_chat_completion_without_thinking() {
1399 let sample_response = json!({
1400 "model": "llama3.2",
1401 "created_at": "2023-08-04T19:22:45.499127Z",
1402 "message": {
1403 "role": "assistant",
1404 "content": "Hello!",
1405 "images": null,
1406 "tool_calls": []
1407 },
1408 "done": true,
1409 "total_duration": 8000000000u64,
1410 "load_duration": 6000000u64,
1411 "prompt_eval_count": 10u64,
1412 "prompt_eval_duration": 400000000u64,
1413 "eval_count": 5u64,
1414 "eval_duration": 7700000000u64
1415 });
1416
1417 let chat_resp: CompletionResponse =
1418 serde_json::from_value(sample_response).expect("Failed to deserialize");
1419
1420 if let Message::Assistant {
1422 thinking, content, ..
1423 } = &chat_resp.message
1424 {
1425 assert!(thinking.is_none());
1426 assert_eq!(content, "Hello!");
1427 } else {
1428 panic!("Expected Assistant message");
1429 }
1430 }
1431
1432 #[test]
1434 fn test_streaming_response_with_thinking() {
1435 let sample_chunk = json!({
1436 "model": "qwen-thinking",
1437 "created_at": "2023-08-04T19:22:45.499127Z",
1438 "message": {
1439 "role": "assistant",
1440 "content": "",
1441 "thinking": "Analyzing the problem...",
1442 "images": null,
1443 "tool_calls": []
1444 },
1445 "done": false
1446 });
1447
1448 let chunk: CompletionResponse =
1449 serde_json::from_value(sample_chunk).expect("Failed to deserialize");
1450
1451 if let Message::Assistant {
1452 thinking, content, ..
1453 } = &chunk.message
1454 {
1455 assert_eq!(thinking.as_ref().unwrap(), "Analyzing the problem...");
1456 assert_eq!(content, "");
1457 } else {
1458 panic!("Expected Assistant message");
1459 }
1460 }
1461
1462 #[test]
1464 fn test_message_conversion_with_thinking() {
1465 let reasoning_content = crate::message::Reasoning::new("Step 1: Consider the problem");
1467
1468 let internal_msg = crate::message::Message::Assistant {
1469 id: None,
1470 content: crate::OneOrMany::many(vec![
1471 crate::message::AssistantContent::Reasoning(reasoning_content),
1472 crate::message::AssistantContent::Text(crate::message::Text {
1473 text: "The answer is X".to_string(),
1474 }),
1475 ])
1476 .unwrap(),
1477 };
1478
1479 let provider_msgs: Vec<Message> = internal_msg.try_into().unwrap();
1481 assert_eq!(provider_msgs.len(), 1);
1482
1483 if let Message::Assistant {
1484 thinking, content, ..
1485 } = &provider_msgs[0]
1486 {
1487 assert_eq!(thinking.as_ref().unwrap(), "Step 1: Consider the problem");
1488 assert_eq!(content, "The answer is X");
1489 } else {
1490 panic!("Expected Assistant message with thinking");
1491 }
1492 }
1493
1494 #[test]
1496 fn test_empty_thinking_content() {
1497 let sample_response = json!({
1498 "model": "llama3.2",
1499 "created_at": "2023-08-04T19:22:45.499127Z",
1500 "message": {
1501 "role": "assistant",
1502 "content": "Response",
1503 "thinking": "",
1504 "images": null,
1505 "tool_calls": []
1506 },
1507 "done": true,
1508 "total_duration": 8000000000u64,
1509 "load_duration": 6000000u64,
1510 "prompt_eval_count": 10u64,
1511 "prompt_eval_duration": 400000000u64,
1512 "eval_count": 5u64,
1513 "eval_duration": 7700000000u64
1514 });
1515
1516 let chat_resp: CompletionResponse =
1517 serde_json::from_value(sample_response).expect("Failed to deserialize");
1518
1519 if let Message::Assistant {
1520 thinking, content, ..
1521 } = &chat_resp.message
1522 {
1523 assert_eq!(thinking.as_ref().unwrap(), "");
1525 assert_eq!(content, "Response");
1526 } else {
1527 panic!("Expected Assistant message");
1528 }
1529 }
1530
1531 #[test]
1533 fn test_thinking_with_tool_calls() {
1534 let sample_response = json!({
1535 "model": "qwen-thinking",
1536 "created_at": "2023-08-04T19:22:45.499127Z",
1537 "message": {
1538 "role": "assistant",
1539 "content": "Let me check the weather.",
1540 "thinking": "User wants weather info, I should use the weather tool",
1541 "images": null,
1542 "tool_calls": [
1543 {
1544 "type": "function",
1545 "function": {
1546 "name": "get_weather",
1547 "arguments": {
1548 "location": "San Francisco"
1549 }
1550 }
1551 }
1552 ]
1553 },
1554 "done": true,
1555 "total_duration": 8000000000u64,
1556 "load_duration": 6000000u64,
1557 "prompt_eval_count": 30u64,
1558 "prompt_eval_duration": 400000000u64,
1559 "eval_count": 50u64,
1560 "eval_duration": 7700000000u64
1561 });
1562
1563 let chat_resp: CompletionResponse =
1564 serde_json::from_value(sample_response).expect("Failed to deserialize");
1565
1566 if let Message::Assistant {
1567 thinking,
1568 content,
1569 tool_calls,
1570 ..
1571 } = &chat_resp.message
1572 {
1573 assert_eq!(
1574 thinking.as_ref().unwrap(),
1575 "User wants weather info, I should use the weather tool"
1576 );
1577 assert_eq!(content, "Let me check the weather.");
1578 assert_eq!(tool_calls.len(), 1);
1579 assert_eq!(tool_calls[0].function.name, "get_weather");
1580 } else {
1581 panic!("Expected Assistant message with thinking and tool calls");
1582 }
1583 }
1584
1585 #[test]
1587 fn test_completion_request_with_think_param() {
1588 use crate::OneOrMany;
1589 use crate::completion::Message as CompletionMessage;
1590 use crate::message::{Text, UserContent};
1591
1592 let completion_request = CompletionRequest {
1594 model: None,
1595 preamble: Some("You are a helpful assistant.".to_string()),
1596 chat_history: OneOrMany::one(CompletionMessage::User {
1597 content: OneOrMany::one(UserContent::Text(Text {
1598 text: "What is 2 + 2?".to_string(),
1599 })),
1600 }),
1601 documents: vec![],
1602 tools: vec![],
1603 temperature: Some(0.7),
1604 max_tokens: Some(1024),
1605 tool_choice: None,
1606 additional_params: Some(json!({
1607 "think": true,
1608 "keep_alive": "-1m",
1609 "num_ctx": 4096
1610 })),
1611 output_schema: None,
1612 };
1613
1614 let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1616 .expect("Failed to create Ollama request");
1617
1618 let serialized =
1620 serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1621
1622 let expected = json!({
1628 "model": "qwen3:8b",
1629 "messages": [
1630 {
1631 "role": "system",
1632 "content": "You are a helpful assistant."
1633 },
1634 {
1635 "role": "user",
1636 "content": "What is 2 + 2?"
1637 }
1638 ],
1639 "temperature": 0.7,
1640 "stream": false,
1641 "think": true,
1642 "max_tokens": 1024,
1643 "keep_alive": "-1m",
1644 "options": {
1645 "temperature": 0.7,
1646 "num_ctx": 4096
1647 }
1648 });
1649
1650 assert_eq!(serialized, expected);
1651 }
1652
1653 #[test]
1655 fn test_completion_request_with_level_low_think_param() {
1656 use crate::OneOrMany;
1657 use crate::completion::Message as CompletionMessage;
1658 use crate::message::{Text, UserContent};
1659
1660 let completion_request = CompletionRequest {
1662 model: None,
1663 preamble: Some("You are a helpful assistant.".to_string()),
1664 chat_history: OneOrMany::one(CompletionMessage::User {
1665 content: OneOrMany::one(UserContent::Text(Text {
1666 text: "What is 2 + 2?".to_string(),
1667 })),
1668 }),
1669 documents: vec![],
1670 tools: vec![],
1671 temperature: Some(0.7),
1672 max_tokens: Some(1024),
1673 tool_choice: None,
1674 additional_params: Some(json!({
1675 "think": "low",
1676 "keep_alive": "-1m",
1677 "num_ctx": 4096
1678 })),
1679 output_schema: None,
1680 };
1681
1682 let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1684 .expect("Failed to create Ollama request");
1685
1686 let serialized =
1688 serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1689
1690 let expected = json!({
1696 "model": "qwen3:8b",
1697 "messages": [
1698 {
1699 "role": "system",
1700 "content": "You are a helpful assistant."
1701 },
1702 {
1703 "role": "user",
1704 "content": "What is 2 + 2?"
1705 }
1706 ],
1707 "temperature": 0.7,
1708 "stream": false,
1709 "think": "low",
1710 "max_tokens": 1024,
1711 "keep_alive": "-1m",
1712 "options": {
1713 "temperature": 0.7,
1714 "num_ctx": 4096
1715 }
1716 });
1717
1718 assert_eq!(serialized, expected);
1719 }
1720
1721 #[test]
1723 fn test_completion_request_with_level_medium_think_param() {
1724 use crate::OneOrMany;
1725 use crate::completion::Message as CompletionMessage;
1726 use crate::message::{Text, UserContent};
1727
1728 let completion_request = CompletionRequest {
1730 model: None,
1731 preamble: Some("You are a helpful assistant.".to_string()),
1732 chat_history: OneOrMany::one(CompletionMessage::User {
1733 content: OneOrMany::one(UserContent::Text(Text {
1734 text: "What is 2 + 2?".to_string(),
1735 })),
1736 }),
1737 documents: vec![],
1738 tools: vec![],
1739 temperature: Some(0.7),
1740 max_tokens: Some(1024),
1741 tool_choice: None,
1742 additional_params: Some(json!({
1743 "think": "medium",
1744 "keep_alive": "-1m",
1745 "num_ctx": 4096
1746 })),
1747 output_schema: None,
1748 };
1749
1750 let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1752 .expect("Failed to create Ollama request");
1753
1754 let serialized =
1756 serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1757
1758 let expected = json!({
1764 "model": "qwen3:8b",
1765 "messages": [
1766 {
1767 "role": "system",
1768 "content": "You are a helpful assistant."
1769 },
1770 {
1771 "role": "user",
1772 "content": "What is 2 + 2?"
1773 }
1774 ],
1775 "temperature": 0.7,
1776 "stream": false,
1777 "think": "medium",
1778 "max_tokens": 1024,
1779 "keep_alive": "-1m",
1780 "options": {
1781 "temperature": 0.7,
1782 "num_ctx": 4096
1783 }
1784 });
1785
1786 assert_eq!(serialized, expected);
1787 }
1788
1789 #[test]
1791 fn test_completion_request_with_level_high_think_param() {
1792 use crate::OneOrMany;
1793 use crate::completion::Message as CompletionMessage;
1794 use crate::message::{Text, UserContent};
1795
1796 let completion_request = CompletionRequest {
1798 model: None,
1799 preamble: Some("You are a helpful assistant.".to_string()),
1800 chat_history: OneOrMany::one(CompletionMessage::User {
1801 content: OneOrMany::one(UserContent::Text(Text {
1802 text: "What is 2 + 2?".to_string(),
1803 })),
1804 }),
1805 documents: vec![],
1806 tools: vec![],
1807 temperature: Some(0.7),
1808 max_tokens: Some(1024),
1809 tool_choice: None,
1810 additional_params: Some(json!({
1811 "think": "high",
1812 "keep_alive": "-1m",
1813 "num_ctx": 4096
1814 })),
1815 output_schema: None,
1816 };
1817
1818 let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request))
1820 .expect("Failed to create Ollama request");
1821
1822 let serialized =
1824 serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1825
1826 let expected = json!({
1832 "model": "qwen3:8b",
1833 "messages": [
1834 {
1835 "role": "system",
1836 "content": "You are a helpful assistant."
1837 },
1838 {
1839 "role": "user",
1840 "content": "What is 2 + 2?"
1841 }
1842 ],
1843 "temperature": 0.7,
1844 "stream": false,
1845 "think": "high",
1846 "max_tokens": 1024,
1847 "keep_alive": "-1m",
1848 "options": {
1849 "temperature": 0.7,
1850 "num_ctx": 4096
1851 }
1852 });
1853
1854 assert_eq!(serialized, expected);
1855 }
1856
1857 #[test]
1859 fn test_completion_request_with_level_invalid_think_param() {
1860 use crate::OneOrMany;
1861 use crate::completion::Message as CompletionMessage;
1862 use crate::message::{Text, UserContent};
1863
1864 let completion_request = CompletionRequest {
1866 model: None,
1867 preamble: Some("You are a helpful assistant.".to_string()),
1868 chat_history: OneOrMany::one(CompletionMessage::User {
1869 content: OneOrMany::one(UserContent::Text(Text {
1870 text: "What is 2 + 2?".to_string(),
1871 })),
1872 }),
1873 documents: vec![],
1874 tools: vec![],
1875 temperature: Some(0.7),
1876 max_tokens: Some(1024),
1877 tool_choice: None,
1878 additional_params: Some(json!({
1879 "think": "invalid",
1880 "keep_alive": "-1m",
1881 "num_ctx": 4096
1882 })),
1883 output_schema: None,
1884 };
1885
1886 let ollama_request = OllamaCompletionRequest::try_from(("qwen3:8b", completion_request));
1888
1889 assert!(ollama_request.is_err())
1890 }
1891
1892 #[test]
1894 fn test_completion_request_with_think_false_default() {
1895 use crate::OneOrMany;
1896 use crate::completion::Message as CompletionMessage;
1897 use crate::message::{Text, UserContent};
1898
1899 let completion_request = CompletionRequest {
1901 model: None,
1902 preamble: Some("You are a helpful assistant.".to_string()),
1903 chat_history: OneOrMany::one(CompletionMessage::User {
1904 content: OneOrMany::one(UserContent::Text(Text {
1905 text: "Hello!".to_string(),
1906 })),
1907 }),
1908 documents: vec![],
1909 tools: vec![],
1910 temperature: Some(0.5),
1911 max_tokens: None,
1912 tool_choice: None,
1913 additional_params: None,
1914 output_schema: None,
1915 };
1916
1917 let ollama_request = OllamaCompletionRequest::try_from(("llama3.2", completion_request))
1919 .expect("Failed to create Ollama request");
1920
1921 let serialized =
1923 serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1924
1925 let expected = json!({
1927 "model": "llama3.2",
1928 "messages": [
1929 {
1930 "role": "system",
1931 "content": "You are a helpful assistant."
1932 },
1933 {
1934 "role": "user",
1935 "content": "Hello!"
1936 }
1937 ],
1938 "temperature": 0.5,
1939 "stream": false,
1940 "think": false,
1941 "options": {
1942 "temperature": 0.5
1943 }
1944 });
1945
1946 assert_eq!(serialized, expected);
1947 }
1948
1949 #[test]
1950 fn test_completion_request_with_output_schema() {
1951 use crate::OneOrMany;
1952 use crate::completion::Message as CompletionMessage;
1953 use crate::message::{Text, UserContent};
1954
1955 let schema: schemars::Schema = serde_json::from_value(json!({
1956 "type": "object",
1957 "properties": {
1958 "age": { "type": "integer" },
1959 "available": { "type": "boolean" }
1960 },
1961 "required": ["age", "available"]
1962 }))
1963 .expect("Failed to parse schema");
1964
1965 let completion_request = CompletionRequest {
1966 model: Some("llama3.1".to_string()),
1967 preamble: None,
1968 chat_history: OneOrMany::one(CompletionMessage::User {
1969 content: OneOrMany::one(UserContent::Text(Text {
1970 text: "How old is Ollama?".to_string(),
1971 })),
1972 }),
1973 documents: vec![],
1974 tools: vec![],
1975 temperature: None,
1976 max_tokens: None,
1977 tool_choice: None,
1978 additional_params: None,
1979 output_schema: Some(schema),
1980 };
1981
1982 let ollama_request = OllamaCompletionRequest::try_from(("llama3.1", completion_request))
1983 .expect("Failed to create Ollama request");
1984
1985 let serialized =
1986 serde_json::to_value(&ollama_request).expect("Failed to serialize request");
1987
1988 let format = serialized
1989 .get("format")
1990 .expect("format field should be present");
1991 assert_eq!(
1992 *format,
1993 json!({
1994 "type": "object",
1995 "properties": {
1996 "age": { "type": "integer" },
1997 "available": { "type": "boolean" }
1998 },
1999 "required": ["age", "available"]
2000 })
2001 );
2002 }
2003
2004 #[test]
2005 fn test_completion_request_without_output_schema() {
2006 use crate::OneOrMany;
2007 use crate::completion::Message as CompletionMessage;
2008 use crate::message::{Text, UserContent};
2009
2010 let completion_request = CompletionRequest {
2011 model: Some("llama3.1".to_string()),
2012 preamble: None,
2013 chat_history: OneOrMany::one(CompletionMessage::User {
2014 content: OneOrMany::one(UserContent::Text(Text {
2015 text: "Hello!".to_string(),
2016 })),
2017 }),
2018 documents: vec![],
2019 tools: vec![],
2020 temperature: None,
2021 max_tokens: None,
2022 tool_choice: None,
2023 additional_params: None,
2024 output_schema: None,
2025 };
2026
2027 let ollama_request = OllamaCompletionRequest::try_from(("llama3.1", completion_request))
2028 .expect("Failed to create Ollama request");
2029
2030 let serialized =
2031 serde_json::to_value(&ollama_request).expect("Failed to serialize request");
2032
2033 assert!(
2034 serialized.get("format").is_none(),
2035 "format field should be absent when output_schema is None"
2036 );
2037 }
2038
2039 #[test]
2040 fn test_client_initialization() {
2041 let _client = crate::providers::ollama::Client::new(Nothing).expect("Client::new() failed");
2042 let _client_from_builder = crate::providers::ollama::Client::builder()
2043 .api_key(Nothing)
2044 .build()
2045 .expect("Client::builder() failed");
2046 }
2047}