machi/llms/ollama/
chat.rs

1//! Ollama `ChatProvider` implementation.
2
3use std::pin::Pin;
4
5use async_trait::async_trait;
6use futures::{Stream, StreamExt};
7use serde::Deserialize;
8
9use tracing::{Instrument, debug, error, info, info_span};
10
11use crate::chat::ChatProvider;
12use crate::chat::{ChatRequest, ChatResponse};
13use crate::error::{LlmError, Result};
14use crate::message::{Content, Message, Role, ToolCall};
15use crate::stream::{StopReason, StreamChunk};
16use crate::usage::Usage;
17
18use super::client::{Ollama, OllamaToolCall};
19use super::stream::parse_stream_line;
20
21/// Ollama chat completion response.
22#[derive(Debug, Clone, Deserialize)]
23struct OllamaChatResponse {
24    pub model: String,
25    pub message: OllamaResponseMessage,
26    #[serde(default)]
27    pub done_reason: Option<String>,
28    #[serde(default)]
29    pub prompt_eval_count: Option<u32>,
30    #[serde(default)]
31    pub eval_count: Option<u32>,
32}
33
34/// Ollama response message.
35#[derive(Debug, Clone, Deserialize)]
36struct OllamaResponseMessage {
37    #[serde(default)]
38    pub content: String,
39    #[serde(default)]
40    pub tool_calls: Option<Vec<OllamaToolCall>>,
41    #[serde(default)]
42    pub thinking: Option<String>,
43}
44
45impl Ollama {
46    /// Parse the response into `ChatResponse`.
47    fn parse_response(response: OllamaChatResponse) -> ChatResponse {
48        let stop_reason = match response.done_reason.as_deref() {
49            Some("length") => StopReason::Length,
50            // "stop", None, and any other value defaults to Stop
51            _ => StopReason::Stop,
52        };
53
54        let tool_calls = response.message.tool_calls.map(|calls| {
55            calls
56                .into_iter()
57                .map(|tc| {
58                    let args = serde_json::to_string(&tc.function.arguments).unwrap_or_default();
59                    ToolCall::function(
60                        format!("call_{}", uuid::Uuid::new_v4()),
61                        tc.function.name,
62                        args,
63                    )
64                })
65                .collect()
66        });
67
68        let content = if response.message.content.is_empty() {
69            None
70        } else {
71            Some(Content::Text(response.message.content))
72        };
73
74        // Extract thinking content from reasoning models
75        let reasoning_content = response.message.thinking.filter(|t| !t.is_empty());
76
77        let message = Message {
78            role: Role::Assistant,
79            content,
80            refusal: None,
81            annotations: Vec::new(),
82            tool_calls,
83            tool_call_id: None,
84            name: None,
85            reasoning_content,
86            thinking_blocks: None,
87        };
88
89        let usage = match (response.prompt_eval_count, response.eval_count) {
90            (Some(input), Some(output)) => Some(Usage::new(input, output)),
91            _ => None,
92        };
93
94        ChatResponse {
95            message,
96            stop_reason,
97            usage,
98            model: Some(response.model),
99            id: None,
100            service_tier: None,
101            raw: None,
102        }
103    }
104}
105
106#[async_trait]
107impl ChatProvider for Ollama {
108    async fn chat(&self, request: &ChatRequest) -> Result<ChatResponse> {
109        let span = info_span!(
110            "gen_ai.chat",
111            gen_ai.system = "ollama",
112            gen_ai.request.model = %request.model,
113            gen_ai.request.temperature = request.temperature.unwrap_or(-1.0),
114            gen_ai.request.max_tokens = request.max_completion_tokens.or(request.max_tokens).unwrap_or(0),
115            gen_ai.usage.input_tokens = tracing::field::Empty,
116            gen_ai.usage.output_tokens = tracing::field::Empty,
117            gen_ai.response.model = tracing::field::Empty,
118            gen_ai.response.finish_reason = tracing::field::Empty,
119            error = tracing::field::Empty,
120        );
121
122        async {
123            let url = self.chat_url();
124            let mut body = self.build_body(request).await?;
125            body.stream = false;
126
127            debug!(model = %request.model, messages = request.messages.len(), "Sending Ollama chat request");
128
129            let response = self.client().post(&url).json(&body).send().await?;
130
131            let status = response.status();
132            if !status.is_success() {
133                let error_text = response.text().await.unwrap_or_default();
134                let err = Self::parse_error(status.as_u16(), &error_text);
135                error!(error = %err, status = status.as_u16(), "Ollama API error");
136                tracing::Span::current().record("error", tracing::field::display(&err));
137                return Err(err.into());
138            }
139
140            let response_text = response.text().await?;
141            let parsed: OllamaChatResponse = serde_json::from_str(&response_text).map_err(|e| {
142                let err = LlmError::response_format(
143                    "valid Ollama response",
144                    format!("parse error: {e}, response: {response_text}"),
145                );
146                error!(error = %err, "Ollama response parse error");
147                tracing::Span::current().record("error", tracing::field::display(&err));
148                err
149            })?;
150
151            let result = Self::parse_response(parsed);
152
153            // Record usage, model, and finish_reason in the span.
154            let current = tracing::Span::current();
155            if let Some(ref usage) = result.usage {
156                current.record("gen_ai.usage.input_tokens", usage.input_tokens);
157                current.record("gen_ai.usage.output_tokens", usage.output_tokens);
158            }
159            if let Some(ref model) = result.model {
160                current.record("gen_ai.response.model", model.as_str());
161            }
162            current.record("gen_ai.response.finish_reason", result.stop_reason.as_str());
163
164            info!(
165                model = result.model.as_deref().unwrap_or(&request.model),
166                finish_reason = result.stop_reason.as_str(),
167                "Ollama chat completed",
168            );
169
170            Ok(result)
171        }
172        .instrument(span)
173        .await
174    }
175
176    async fn chat_stream(
177        &self,
178        request: &ChatRequest,
179    ) -> Result<Pin<Box<dyn Stream<Item = Result<StreamChunk>> + Send>>> {
180        debug!(
181            gen_ai.system = "ollama",
182            model = %request.model,
183            messages = request.messages.len(),
184            "Starting Ollama chat stream",
185        );
186
187        let url = self.chat_url();
188        let mut body = self.build_body(request).await?;
189        body.stream = true;
190
191        let response = self.client().post(&url).json(&body).send().await?;
192
193        let status = response.status();
194        if !status.is_success() {
195            let error_text = response.text().await.unwrap_or_default();
196            return Err(Self::parse_error(status.as_u16(), &error_text).into());
197        }
198
199        let stream = response.bytes_stream();
200        let parsed_stream = stream.flat_map(move |chunk_result| {
201            let chunks: Vec<Result<StreamChunk>> = match chunk_result {
202                Ok(bytes) => {
203                    let text = String::from_utf8_lossy(&bytes);
204                    text.lines().filter_map(parse_stream_line).collect()
205                }
206                Err(e) => vec![Err(LlmError::stream(e.to_string()).into())],
207            };
208            futures::stream::iter(chunks)
209        });
210
211        Ok(Box::pin(parsed_stream))
212    }
213
214    fn provider_name(&self) -> &'static str {
215        "ollama"
216    }
217
218    fn default_model(&self) -> &str {
219        self.model()
220    }
221
222    fn supports_streaming(&self) -> bool {
223        true
224    }
225
226    fn supports_tools(&self) -> bool {
227        true
228    }
229
230    fn supports_vision(&self) -> bool {
231        true
232    }
233
234    fn supports_json_mode(&self) -> bool {
235        true
236    }
237}
238
239#[cfg(test)]
240#[allow(clippy::unwrap_used, clippy::panic)]
241mod tests {
242    use super::*;
243
244    mod ollama_chat_response {
245        use super::*;
246
247        #[test]
248        fn deserializes_basic_response() {
249            let json = r#"{
250                "model": "llama3",
251                "message": {
252                    "content": "Hello!"
253                },
254                "done_reason": "stop"
255            }"#;
256
257            let response: OllamaChatResponse = serde_json::from_str(json).unwrap();
258
259            assert_eq!(response.model, "llama3");
260            assert_eq!(response.message.content, "Hello!");
261            assert_eq!(response.done_reason, Some("stop".to_owned()));
262        }
263
264        #[test]
265        fn deserializes_with_usage_info() {
266            let json = r#"{
267                "model": "llama3",
268                "message": {"content": "Test"},
269                "done_reason": "stop",
270                "prompt_eval_count": 100,
271                "eval_count": 50
272            }"#;
273
274            let response: OllamaChatResponse = serde_json::from_str(json).unwrap();
275
276            assert_eq!(response.prompt_eval_count, Some(100));
277            assert_eq!(response.eval_count, Some(50));
278        }
279
280        #[test]
281        fn deserializes_with_tool_calls() {
282            let json = r#"{
283                "model": "llama3",
284                "message": {
285                    "content": "",
286                    "tool_calls": [{
287                        "function": {
288                            "name": "get_weather",
289                            "arguments": {"city": "Tokyo"}
290                        }
291                    }]
292                },
293                "done_reason": "stop"
294            }"#;
295
296            let response: OllamaChatResponse = serde_json::from_str(json).unwrap();
297
298            assert!(response.message.tool_calls.is_some());
299            let tool_calls = response.message.tool_calls.unwrap();
300            assert_eq!(tool_calls.len(), 1);
301            assert_eq!(tool_calls[0].function.name, "get_weather");
302        }
303
304        #[test]
305        fn deserializes_with_thinking() {
306            let json = r#"{
307                "model": "qwen3",
308                "message": {
309                    "content": "The answer is 42.",
310                    "thinking": "Let me calculate this..."
311                },
312                "done_reason": "stop"
313            }"#;
314
315            let response: OllamaChatResponse = serde_json::from_str(json).unwrap();
316
317            assert_eq!(
318                response.message.thinking,
319                Some("Let me calculate this...".to_owned())
320            );
321        }
322
323        #[test]
324        fn deserializes_with_length_done_reason() {
325            let json = r#"{
326                "model": "llama3",
327                "message": {"content": "Truncated..."},
328                "done_reason": "length"
329            }"#;
330
331            let response: OllamaChatResponse = serde_json::from_str(json).unwrap();
332
333            assert_eq!(response.done_reason, Some("length".to_owned()));
334        }
335
336        #[test]
337        fn deserializes_without_optional_fields() {
338            let json = r#"{
339                "model": "llama3",
340                "message": {"content": "Hello"}
341            }"#;
342
343            let response: OllamaChatResponse = serde_json::from_str(json).unwrap();
344
345            assert!(response.done_reason.is_none());
346            assert!(response.prompt_eval_count.is_none());
347            assert!(response.eval_count.is_none());
348            assert!(response.message.tool_calls.is_none());
349            assert!(response.message.thinking.is_none());
350        }
351
352        #[test]
353        fn deserializes_empty_content() {
354            let json = r#"{
355                "model": "llama3",
356                "message": {"content": ""}
357            }"#;
358
359            let response: OllamaChatResponse = serde_json::from_str(json).unwrap();
360
361            assert!(response.message.content.is_empty());
362        }
363    }
364
365    mod parse_response {
366        use super::*;
367        use crate::llms::ollama::client::OllamaFunctionCall;
368
369        fn make_response(content: &str, done_reason: Option<&str>) -> OllamaChatResponse {
370            OllamaChatResponse {
371                model: "llama3".to_owned(),
372                message: OllamaResponseMessage {
373                    content: content.to_owned(),
374                    tool_calls: None,
375                    thinking: None,
376                },
377                done_reason: done_reason.map(String::from),
378                prompt_eval_count: None,
379                eval_count: None,
380            }
381        }
382
383        #[test]
384        fn parses_basic_text_response() {
385            let response = make_response("Hello, world!", Some("stop"));
386            let parsed = Ollama::parse_response(response);
387
388            assert_eq!(parsed.message.role, Role::Assistant);
389            assert!(parsed.message.content.is_some());
390            if let Some(Content::Text(text)) = &parsed.message.content {
391                assert_eq!(text, "Hello, world!");
392            } else {
393                panic!("Expected text content");
394            }
395        }
396
397        #[test]
398        fn parses_stop_reason_stop() {
399            let response = make_response("Done", Some("stop"));
400            let parsed = Ollama::parse_response(response);
401
402            assert_eq!(parsed.stop_reason, StopReason::Stop);
403        }
404
405        #[test]
406        fn parses_stop_reason_length() {
407            let response = make_response("Truncated", Some("length"));
408            let parsed = Ollama::parse_response(response);
409
410            assert_eq!(parsed.stop_reason, StopReason::Length);
411        }
412
413        #[test]
414        fn parses_stop_reason_none_defaults_to_stop() {
415            let response = make_response("Done", None);
416            let parsed = Ollama::parse_response(response);
417
418            assert_eq!(parsed.stop_reason, StopReason::Stop);
419        }
420
421        #[test]
422        fn parses_stop_reason_unknown_defaults_to_stop() {
423            let response = make_response("Done", Some("unknown_reason"));
424            let parsed = Ollama::parse_response(response);
425
426            assert_eq!(parsed.stop_reason, StopReason::Stop);
427        }
428
429        #[test]
430        fn parses_empty_content_as_none() {
431            let response = make_response("", Some("stop"));
432            let parsed = Ollama::parse_response(response);
433
434            assert!(parsed.message.content.is_none());
435        }
436
437        #[test]
438        fn parses_usage_info() {
439            let response = OllamaChatResponse {
440                model: "llama3".to_owned(),
441                message: OllamaResponseMessage {
442                    content: "Test".to_owned(),
443                    tool_calls: None,
444                    thinking: None,
445                },
446                done_reason: Some("stop".to_owned()),
447                prompt_eval_count: Some(100),
448                eval_count: Some(50),
449            };
450
451            let parsed = Ollama::parse_response(response);
452
453            assert!(parsed.usage.is_some());
454            let usage = parsed.usage.unwrap();
455            assert_eq!(usage.input_tokens, 100);
456            assert_eq!(usage.output_tokens, 50);
457            assert_eq!(usage.total_tokens, 150);
458        }
459
460        #[test]
461        fn parses_partial_usage_as_none() {
462            let response = OllamaChatResponse {
463                model: "llama3".to_owned(),
464                message: OllamaResponseMessage {
465                    content: "Test".to_owned(),
466                    tool_calls: None,
467                    thinking: None,
468                },
469                done_reason: Some("stop".to_owned()),
470                prompt_eval_count: Some(100),
471                eval_count: None, // Missing eval_count
472            };
473
474            let parsed = Ollama::parse_response(response);
475
476            assert!(parsed.usage.is_none());
477        }
478
479        #[test]
480        fn parses_tool_calls() {
481            let response = OllamaChatResponse {
482                model: "llama3".to_owned(),
483                message: OllamaResponseMessage {
484                    content: String::new(),
485                    tool_calls: Some(vec![OllamaToolCall {
486                        function: OllamaFunctionCall {
487                            name: "get_weather".to_owned(),
488                            arguments: serde_json::json!({"city": "Tokyo"}),
489                        },
490                    }]),
491                    thinking: None,
492                },
493                done_reason: Some("stop".to_owned()),
494                prompt_eval_count: None,
495                eval_count: None,
496            };
497
498            let parsed = Ollama::parse_response(response);
499
500            assert!(parsed.message.tool_calls.is_some());
501            let tool_calls = parsed.message.tool_calls.unwrap();
502            assert_eq!(tool_calls.len(), 1);
503            assert_eq!(tool_calls[0].function.name, "get_weather");
504            assert!(tool_calls[0].id.starts_with("call_"));
505        }
506
507        #[test]
508        fn parses_multiple_tool_calls() {
509            let response = OllamaChatResponse {
510                model: "llama3".to_owned(),
511                message: OllamaResponseMessage {
512                    content: String::new(),
513                    tool_calls: Some(vec![
514                        OllamaToolCall {
515                            function: OllamaFunctionCall {
516                                name: "get_weather".to_owned(),
517                                arguments: serde_json::json!({"city": "Tokyo"}),
518                            },
519                        },
520                        OllamaToolCall {
521                            function: OllamaFunctionCall {
522                                name: "get_time".to_owned(),
523                                arguments: serde_json::json!({"timezone": "JST"}),
524                            },
525                        },
526                    ]),
527                    thinking: None,
528                },
529                done_reason: Some("stop".to_owned()),
530                prompt_eval_count: None,
531                eval_count: None,
532            };
533
534            let parsed = Ollama::parse_response(response);
535
536            let tool_calls = parsed.message.tool_calls.unwrap();
537            assert_eq!(tool_calls.len(), 2);
538            assert_eq!(tool_calls[0].function.name, "get_weather");
539            assert_eq!(tool_calls[1].function.name, "get_time");
540        }
541
542        #[test]
543        fn parses_thinking_content() {
544            let response = OllamaChatResponse {
545                model: "qwen3".to_owned(),
546                message: OllamaResponseMessage {
547                    content: "The answer is 42.".to_owned(),
548                    tool_calls: None,
549                    thinking: Some("Let me think about this...".to_owned()),
550                },
551                done_reason: Some("stop".to_owned()),
552                prompt_eval_count: None,
553                eval_count: None,
554            };
555
556            let parsed = Ollama::parse_response(response);
557
558            assert_eq!(
559                parsed.message.reasoning_content,
560                Some("Let me think about this...".to_owned())
561            );
562        }
563
564        #[test]
565        fn parses_empty_thinking_as_none() {
566            let response = OllamaChatResponse {
567                model: "qwen3".to_owned(),
568                message: OllamaResponseMessage {
569                    content: "Answer".to_owned(),
570                    tool_calls: None,
571                    thinking: Some(String::new()),
572                },
573                done_reason: Some("stop".to_owned()),
574                prompt_eval_count: None,
575                eval_count: None,
576            };
577
578            let parsed = Ollama::parse_response(response);
579
580            assert!(parsed.message.reasoning_content.is_none());
581        }
582
583        #[test]
584        fn includes_model_in_response() {
585            let response = make_response("Test", Some("stop"));
586            let parsed = Ollama::parse_response(response);
587
588            assert_eq!(parsed.model, Some("llama3".to_owned()));
589        }
590
591        #[test]
592        fn id_is_none() {
593            let response = make_response("Test", Some("stop"));
594            let parsed = Ollama::parse_response(response);
595
596            // Ollama doesn't provide a response ID
597            assert!(parsed.id.is_none());
598        }
599
600        #[test]
601        fn service_tier_is_none() {
602            let response = make_response("Test", Some("stop"));
603            let parsed = Ollama::parse_response(response);
604
605            assert!(parsed.service_tier.is_none());
606        }
607
608        #[test]
609        fn raw_is_none() {
610            let response = make_response("Test", Some("stop"));
611            let parsed = Ollama::parse_response(response);
612
613            assert!(parsed.raw.is_none());
614        }
615    }
616
617    mod chat_provider_impl {
618        use super::*;
619
620        #[test]
621        fn provider_name_is_ollama() {
622            let client = Ollama::with_defaults().unwrap();
623            assert_eq!(client.provider_name(), "ollama");
624        }
625
626        #[test]
627        fn default_model_returns_config_model() {
628            let client = Ollama::with_defaults().unwrap();
629            assert_eq!(client.default_model(), client.model());
630        }
631
632        #[test]
633        fn supports_streaming() {
634            let client = Ollama::with_defaults().unwrap();
635            assert!(client.supports_streaming());
636        }
637
638        #[test]
639        fn supports_tools() {
640            let client = Ollama::with_defaults().unwrap();
641            assert!(client.supports_tools());
642        }
643
644        #[test]
645        fn supports_vision() {
646            let client = Ollama::with_defaults().unwrap();
647            assert!(client.supports_vision());
648        }
649
650        #[test]
651        fn supports_json_mode() {
652            let client = Ollama::with_defaults().unwrap();
653            assert!(client.supports_json_mode());
654        }
655    }
656
657    mod tool_call_id_generation {
658        use super::*;
659        use crate::llms::ollama::client::OllamaFunctionCall;
660
661        #[test]
662        fn generates_unique_tool_call_ids() {
663            let response = OllamaChatResponse {
664                model: "llama3".to_owned(),
665                message: OllamaResponseMessage {
666                    content: String::new(),
667                    tool_calls: Some(vec![
668                        OllamaToolCall {
669                            function: OllamaFunctionCall {
670                                name: "tool1".to_owned(),
671                                arguments: serde_json::json!({}),
672                            },
673                        },
674                        OllamaToolCall {
675                            function: OllamaFunctionCall {
676                                name: "tool2".to_owned(),
677                                arguments: serde_json::json!({}),
678                            },
679                        },
680                    ]),
681                    thinking: None,
682                },
683                done_reason: Some("stop".to_owned()),
684                prompt_eval_count: None,
685                eval_count: None,
686            };
687
688            let parsed = Ollama::parse_response(response);
689            let tool_calls = parsed.message.tool_calls.unwrap();
690
691            // Each tool call should have a unique ID
692            assert_ne!(tool_calls[0].id, tool_calls[1].id);
693        }
694
695        #[test]
696        fn tool_call_ids_have_call_prefix() {
697            let response = OllamaChatResponse {
698                model: "llama3".to_owned(),
699                message: OllamaResponseMessage {
700                    content: String::new(),
701                    tool_calls: Some(vec![OllamaToolCall {
702                        function: OllamaFunctionCall {
703                            name: "test".to_owned(),
704                            arguments: serde_json::json!({}),
705                        },
706                    }]),
707                    thinking: None,
708                },
709                done_reason: Some("stop".to_owned()),
710                prompt_eval_count: None,
711                eval_count: None,
712            };
713
714            let parsed = Ollama::parse_response(response);
715            let tool_calls = parsed.message.tool_calls.unwrap();
716
717            assert!(tool_calls[0].id.starts_with("call_"));
718        }
719    }
720
721    mod tool_call_arguments {
722        use super::*;
723        use crate::llms::ollama::client::OllamaFunctionCall;
724
725        #[test]
726        fn serializes_object_arguments() {
727            let response = OllamaChatResponse {
728                model: "llama3".to_owned(),
729                message: OllamaResponseMessage {
730                    content: String::new(),
731                    tool_calls: Some(vec![OllamaToolCall {
732                        function: OllamaFunctionCall {
733                            name: "get_weather".to_owned(),
734                            arguments: serde_json::json!({
735                                "city": "Tokyo",
736                                "units": "celsius"
737                            }),
738                        },
739                    }]),
740                    thinking: None,
741                },
742                done_reason: Some("stop".to_owned()),
743                prompt_eval_count: None,
744                eval_count: None,
745            };
746
747            let parsed = Ollama::parse_response(response);
748            let tool_calls = parsed.message.tool_calls.unwrap();
749            let args = &tool_calls[0].function.arguments;
750
751            // Arguments should be serialized to JSON string
752            assert!(args.contains("Tokyo"));
753            assert!(args.contains("celsius"));
754        }
755
756        #[test]
757        fn handles_empty_arguments() {
758            let response = OllamaChatResponse {
759                model: "llama3".to_owned(),
760                message: OllamaResponseMessage {
761                    content: String::new(),
762                    tool_calls: Some(vec![OllamaToolCall {
763                        function: OllamaFunctionCall {
764                            name: "no_params_tool".to_owned(),
765                            arguments: serde_json::json!({}),
766                        },
767                    }]),
768                    thinking: None,
769                },
770                done_reason: Some("stop".to_owned()),
771                prompt_eval_count: None,
772                eval_count: None,
773            };
774
775            let parsed = Ollama::parse_response(response);
776            let tool_calls = parsed.message.tool_calls.unwrap();
777            let args = &tool_calls[0].function.arguments;
778
779            assert_eq!(args, "{}");
780        }
781    }
782
783    mod realistic_responses {
784        use super::*;
785
786        #[test]
787        fn parses_typical_chat_response() {
788            let json = r#"{
789                "model": "llama3.2:latest",
790                "created_at": "2024-01-15T10:30:00Z",
791                "message": {
792                    "role": "assistant",
793                    "content": "The capital of France is Paris. It is known for the Eiffel Tower."
794                },
795                "done": true,
796                "done_reason": "stop",
797                "total_duration": 1234567890,
798                "load_duration": 123456789,
799                "prompt_eval_count": 15,
800                "prompt_eval_duration": 12345678,
801                "eval_count": 25,
802                "eval_duration": 123456789
803            }"#;
804
805            let response: OllamaChatResponse = serde_json::from_str(json).unwrap();
806            let parsed = Ollama::parse_response(response);
807
808            assert!(parsed.message.content.is_some());
809            assert_eq!(parsed.stop_reason, StopReason::Stop);
810            assert!(parsed.usage.is_some());
811            let usage = parsed.usage.unwrap();
812            assert_eq!(usage.input_tokens, 15);
813            assert_eq!(usage.output_tokens, 25);
814        }
815
816        #[test]
817        fn parses_tool_call_response() {
818            let json = r#"{
819                "model": "llama3.2:latest",
820                "message": {
821                    "role": "assistant",
822                    "content": "",
823                    "tool_calls": [
824                        {
825                            "function": {
826                                "name": "get_current_weather",
827                                "arguments": {
828                                    "location": "San Francisco, CA",
829                                    "format": "fahrenheit"
830                                }
831                            }
832                        }
833                    ]
834                },
835                "done": true,
836                "done_reason": "stop",
837                "prompt_eval_count": 100,
838                "eval_count": 20
839            }"#;
840
841            let response: OllamaChatResponse = serde_json::from_str(json).unwrap();
842            let parsed = Ollama::parse_response(response);
843
844            assert!(parsed.message.content.is_none());
845            assert!(parsed.message.tool_calls.is_some());
846            let tool_calls = parsed.message.tool_calls.unwrap();
847            assert_eq!(tool_calls[0].function.name, "get_current_weather");
848        }
849
850        #[test]
851        fn parses_reasoning_model_response() {
852            let json = r#"{
853                "model": "qwen3:thinking",
854                "message": {
855                    "role": "assistant",
856                    "content": "Based on my analysis, the answer is 42.",
857                    "thinking": "Let me break this down step by step:\n1. First, I need to consider...\n2. Then, applying the formula..."
858                },
859                "done": true,
860                "done_reason": "stop",
861                "prompt_eval_count": 50,
862                "eval_count": 150
863            }"#;
864
865            let response: OllamaChatResponse = serde_json::from_str(json).unwrap();
866            let parsed = Ollama::parse_response(response);
867
868            assert!(parsed.message.content.is_some());
869            assert!(parsed.message.reasoning_content.is_some());
870            assert!(
871                parsed
872                    .message
873                    .reasoning_content
874                    .unwrap()
875                    .contains("step by step")
876            );
877        }
878
879        #[test]
880        fn parses_truncated_response() {
881            let json = r#"{
882                "model": "llama3.2:latest",
883                "message": {
884                    "role": "assistant",
885                    "content": "This is a very long response that was truncated because it reached the maximum token limit. The response continues to explain in great detail about the topic but unfortunately the..."
886                },
887                "done": true,
888                "done_reason": "length",
889                "prompt_eval_count": 20,
890                "eval_count": 4096
891            }"#;
892
893            let response: OllamaChatResponse = serde_json::from_str(json).unwrap();
894            let parsed = Ollama::parse_response(response);
895
896            assert_eq!(parsed.stop_reason, StopReason::Length);
897            assert!(parsed.usage.is_some());
898            assert_eq!(parsed.usage.unwrap().output_tokens, 4096);
899        }
900    }
901}
machi/llms/ollama/chat.rs

machi/llms/ollama/
chat.rs