Skip to main content

walrus_core/model/
response.rs

1//! Chat response abstractions for the unified LLM Interfaces
2
3use crate::model::{Message, Role, tool::ToolCall};
4use compact_str::CompactString;
5use serde::{Deserialize, Serialize};
6
7/// Common metadata shared between streaming and non-streaming completions
8#[derive(Debug, Clone, Deserialize, Default)]
9pub struct CompletionMeta {
10    /// A unique identifier for the chat completion
11    pub id: CompactString,
12
13    /// The object type
14    pub object: CompactString,
15
16    /// Unix timestamp (in seconds) of when the response was created
17    pub created: u64,
18
19    /// The model used for the completion
20    pub model: CompactString,
21
22    /// Backend configuration identifier
23    pub system_fingerprint: Option<CompactString>,
24}
25
26/// Message content in a completion response
27///
28/// Used for both streaming deltas and non-streaming response messages.
29#[derive(Debug, Clone, Deserialize, Default)]
30pub struct Delta {
31    /// The role of the message author
32    pub role: Option<Role>,
33
34    /// The content of the message
35    pub content: Option<String>,
36
37    /// The reasoning content (for deepseek-reasoner model)
38    pub reasoning_content: Option<String>,
39
40    /// Tool calls made by the model
41    pub tool_calls: Option<Vec<ToolCall>>,
42}
43
44/// A chat completion response from the LLM
45#[derive(Debug, Clone, Deserialize)]
46pub struct Response {
47    /// Completion metadata
48    #[serde(flatten)]
49    pub meta: CompletionMeta,
50
51    /// The list of completion choices
52    pub choices: Vec<Choice>,
53
54    /// Token usage statistics
55    pub usage: Usage,
56}
57
58impl Response {
59    pub fn message(&self) -> Option<Message> {
60        let choice = self.choices.first()?;
61        Some(Message::assistant(
62            choice.delta.content.clone().unwrap_or_default(),
63            choice.delta.reasoning_content.clone(),
64            choice.delta.tool_calls.as_deref(),
65        ))
66    }
67
68    /// Get the first message from the response
69    pub fn content(&self) -> Option<&String> {
70        self.choices
71            .first()
72            .and_then(|choice| choice.delta.content.as_ref())
73    }
74
75    /// Get the first message from the response
76    pub fn reasoning(&self) -> Option<&String> {
77        self.choices
78            .first()
79            .and_then(|choice| choice.delta.reasoning_content.as_ref())
80    }
81
82    /// Get the tool calls from the response
83    pub fn tool_calls(&self) -> Option<&[ToolCall]> {
84        self.choices
85            .first()
86            .and_then(|choice| choice.delta.tool_calls.as_deref())
87    }
88
89    /// Get the reason the model stopped generating
90    pub fn reason(&self) -> Option<&FinishReason> {
91        self.choices
92            .first()
93            .and_then(|choice| choice.finish_reason.as_ref())
94    }
95}
96
97/// A completion choice (used for both streaming and non-streaming responses).
98#[derive(Debug, Clone, Deserialize, Default)]
99pub struct Choice {
100    /// The index of this choice in the list
101    pub index: u32,
102
103    /// The message content (streaming: `delta`, non-streaming: `message`)
104    #[serde(alias = "message")]
105    pub delta: Delta,
106
107    /// The reason the model stopped generating
108    pub finish_reason: Option<FinishReason>,
109
110    /// Log probability information
111    pub logprobs: Option<LogProbs>,
112}
113
114/// The reason the model stopped generating
115#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Deserialize)]
116#[serde(rename_all = "snake_case")]
117pub enum FinishReason {
118    /// The model finished naturally
119    Stop,
120
121    /// The model hit the max token limit
122    Length,
123
124    /// Content was filtered
125    ContentFilter,
126
127    /// The model is making tool calls
128    ToolCalls,
129
130    /// Insufficient system resources
131    InsufficientSystemResource,
132}
133
134/// Token usage statistics
135#[derive(Debug, Clone, Deserialize)]
136pub struct Usage {
137    /// Number of tokens in the prompt
138    pub prompt_tokens: u32,
139
140    /// Number of tokens in the completion
141    pub completion_tokens: u32,
142
143    /// Total number of tokens used
144    pub total_tokens: u32,
145
146    /// Number of prompt tokens from cache hits
147    pub prompt_cache_hit_tokens: Option<u32>,
148
149    /// Number of prompt tokens not in cache
150    pub prompt_cache_miss_tokens: Option<u32>,
151
152    /// Detailed breakdown of completion tokens
153    pub completion_tokens_details: Option<CompletionTokensDetails>,
154}
155
156/// Detailed breakdown of completion tokens
157#[derive(Debug, Clone, Deserialize)]
158pub struct CompletionTokensDetails {
159    /// Number of tokens used for reasoning
160    pub reasoning_tokens: Option<u32>,
161}
162
163/// Log probability information
164#[derive(Debug, Clone, Deserialize)]
165pub struct LogProbs {
166    /// Log probabilities for each token
167    pub content: Option<Vec<LogProb>>,
168}
169
170/// Log probability for a single token
171#[derive(Debug, Clone, Deserialize, Serialize)]
172pub struct LogProb {
173    /// The token string
174    pub token: String,
175
176    /// The log probability of this token
177    pub logprob: f64,
178
179    /// Byte representation of the token
180    #[serde(skip_serializing_if = "Option::is_none")]
181    pub bytes: Option<Vec<u8>>,
182
183    /// Top log probabilities for this position
184    #[serde(skip_serializing_if = "Option::is_none")]
185    pub top_logprobs: Option<Vec<TopLogProb>>,
186}
187
188/// Top log probability entry
189#[derive(Debug, Clone, Deserialize, Serialize)]
190pub struct TopLogProb {
191    /// The token string
192    pub token: String,
193
194    /// The log probability
195    pub logprob: f64,
196
197    /// Byte representation of the token
198    #[serde(skip_serializing_if = "Option::is_none")]
199    pub bytes: Option<Vec<u8>>,
200}