Skip to main content

nemo_flow/codec/
response.rs

1// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Normalized LLM response types produced by response codecs.
5//!
6//! This module defines [`AnnotatedLlmResponse`] and its supporting types
7//! for structured, API-agnostic access to LLM response data.
8
9use serde::{Deserialize, Serialize};
10
11use crate::json::Json;
12
13use super::request::MessageContent;
14
15// ---------------------------------------------------------------------------
16// AnnotatedLlmResponse type hierarchy
17// ---------------------------------------------------------------------------
18
19/// Structured view of an LLM response, produced by a response codec from
20/// raw JSON API output.
21///
22/// The `extra` field captures any top-level keys not modeled by the known
23/// fields, ensuring lossless round-trip through serde.
24#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
25pub struct AnnotatedLlmResponse {
26    /// Response ID from the API (e.g., "chatcmpl-abc123", "resp_abc123", "msg_abc123").
27    #[serde(skip_serializing_if = "Option::is_none")]
28    pub id: Option<String>,
29
30    /// The model that actually served the request (may differ from requested model).
31    #[serde(skip_serializing_if = "Option::is_none")]
32    pub model: Option<String>,
33
34    /// The assistant's response content, reusing [`MessageContent`] from request types.
35    #[serde(skip_serializing_if = "Option::is_none")]
36    pub message: Option<MessageContent>,
37
38    /// Tool calls requested by the model, normalized across APIs.
39    ///
40    /// Uses [`ResponseToolCall`] (arguments as [`Json`]) NOT the request-side
41    /// `ToolCall` (arguments as `String`).
42    #[serde(skip_serializing_if = "Option::is_none")]
43    pub tool_calls: Option<Vec<ResponseToolCall>>,
44
45    /// Why generation stopped, normalized across APIs.
46    #[serde(skip_serializing_if = "Option::is_none")]
47    pub finish_reason: Option<FinishReason>,
48
49    /// Token usage statistics.
50    #[serde(skip_serializing_if = "Option::is_none")]
51    pub usage: Option<Usage>,
52
53    /// API-specific response data that cannot be normalized across providers.
54    #[serde(skip_serializing_if = "Option::is_none")]
55    pub api_specific: Option<ApiSpecificResponse>,
56
57    /// Catch-all for unmodeled top-level fields, ensuring lossless round-trip.
58    #[serde(flatten)]
59    pub extra: serde_json::Map<String, Json>,
60}
61
62// ---------------------------------------------------------------------------
63// Usage
64// ---------------------------------------------------------------------------
65
66/// Token usage statistics from an LLM API response.
67///
68/// All fields are `Option<u64>` because not every provider supplies every
69/// field. For example, cache token counts are only available from providers
70/// that support prompt caching.
71#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
72pub struct Usage {
73    /// Tokens consumed by the prompt/input.
74    #[serde(skip_serializing_if = "Option::is_none")]
75    pub prompt_tokens: Option<u64>,
76    /// Tokens generated in the completion/output.
77    #[serde(skip_serializing_if = "Option::is_none")]
78    pub completion_tokens: Option<u64>,
79    /// Total tokens (prompt + completion).
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub total_tokens: Option<u64>,
82    /// Tokens served from prompt cache (read).
83    #[serde(skip_serializing_if = "Option::is_none")]
84    pub cache_read_tokens: Option<u64>,
85    /// Tokens written to prompt cache.
86    #[serde(skip_serializing_if = "Option::is_none")]
87    pub cache_write_tokens: Option<u64>,
88}
89
90// ---------------------------------------------------------------------------
91// FinishReason
92// ---------------------------------------------------------------------------
93
94/// Normalized reason why the model stopped generating.
95///
96/// Maps from provider-specific stop reasons:
97/// - **Complete**: OpenAI Chat `"stop"`, Anthropic `"end_turn"`, Responses `"completed"`
98/// - **Length**: OpenAI Chat `"length"`, Anthropic `"max_tokens"`, Responses incomplete+max_output_tokens
99/// - **ToolUse**: OpenAI Chat `"tool_calls"`, Anthropic `"tool_use"`
100/// - **ContentFilter**: OpenAI Chat `"content_filter"`, Responses incomplete+content_filter
101/// - **Unknown**: Forward-compatible catch-all for unrecognized reasons
102#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
103#[serde(rename_all = "snake_case")]
104pub enum FinishReason {
105    /// Model naturally completed its response.
106    Complete,
107    /// Maximum token limit reached.
108    Length,
109    /// Model requested a tool call.
110    ToolUse,
111    /// Content was filtered by safety systems.
112    ContentFilter,
113    /// Unknown or forward-compatible reason.
114    Unknown(String),
115}
116
117impl FinishReason {
118    /// Returns `true` if the model naturally completed its response.
119    ///
120    /// Only the [`FinishReason::Complete`] variant returns `true`.
121    #[must_use]
122    pub fn is_complete(&self) -> bool {
123        matches!(self, FinishReason::Complete)
124    }
125}
126
127// ---------------------------------------------------------------------------
128// ResponseToolCall
129// ---------------------------------------------------------------------------
130
131/// A tool call requested by the model in its response.
132///
133/// Unlike the request-side `ToolCall` (which stores arguments as a JSON
134/// string per OpenAI convention), response tool calls store arguments as
135/// parsed [`Json`]. Codecs parse OpenAI's string arguments during decode;
136/// Anthropic's `input` is already parsed JSON.
137#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
138pub struct ResponseToolCall {
139    /// Unique identifier for this tool call.
140    pub id: String,
141    /// The function/tool name.
142    pub name: String,
143    /// The arguments as parsed JSON (not a string).
144    pub arguments: Json,
145}
146
147// ---------------------------------------------------------------------------
148// ApiSpecificResponse
149// ---------------------------------------------------------------------------
150
151/// API-specific response data that cannot be normalized across providers.
152///
153/// Each variant captures fields unique to a particular LLM API, stored via
154/// internal tagging on the `"api"` key.
155#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
156#[serde(tag = "api")]
157pub enum ApiSpecificResponse {
158    /// OpenAI Chat Completions-specific fields.
159    #[serde(rename = "openai_chat")]
160    OpenAIChat {
161        /// Token-level log probabilities (raw JSON, too complex to normalize).
162        #[serde(skip_serializing_if = "Option::is_none")]
163        logprobs: Option<Json>,
164        /// System fingerprint for reproducibility.
165        #[serde(skip_serializing_if = "Option::is_none")]
166        system_fingerprint: Option<String>,
167        /// Processing tier used (e.g., "default").
168        #[serde(skip_serializing_if = "Option::is_none")]
169        service_tier: Option<String>,
170    },
171
172    /// OpenAI Responses API-specific fields.
173    #[serde(rename = "openai_responses")]
174    OpenAIResponses {
175        /// Full output items array for direct access.
176        #[serde(skip_serializing_if = "Option::is_none")]
177        output_items: Option<Vec<Json>>,
178        /// Response status (e.g., "completed", "incomplete").
179        #[serde(skip_serializing_if = "Option::is_none")]
180        status: Option<String>,
181        /// Details about why the response is incomplete.
182        #[serde(skip_serializing_if = "Option::is_none")]
183        incomplete_details: Option<Json>,
184        /// Echoed previous response ID for conversation continuation.
185        #[serde(skip_serializing_if = "Option::is_none")]
186        previous_response_id: Option<String>,
187        /// Whether this response is marked for server-side storage.
188        #[serde(skip_serializing_if = "Option::is_none")]
189        store: Option<bool>,
190        /// Service tier used for the response.
191        #[serde(skip_serializing_if = "Option::is_none")]
192        service_tier: Option<String>,
193        /// Truncation behavior metadata.
194        #[serde(skip_serializing_if = "Option::is_none")]
195        truncation: Option<Json>,
196        /// Reasoning configuration/result metadata.
197        #[serde(skip_serializing_if = "Option::is_none")]
198        reasoning: Option<Json>,
199        /// Raw input token details payload.
200        #[serde(skip_serializing_if = "Option::is_none")]
201        input_tokens_details: Option<Json>,
202        /// Raw output token details payload.
203        #[serde(skip_serializing_if = "Option::is_none")]
204        output_tokens_details: Option<Json>,
205    },
206
207    /// Anthropic Messages API-specific fields.
208    #[serde(rename = "anthropic_messages")]
209    AnthropicMessages {
210        /// Anthropic object type (typically `"message"`).
211        #[serde(skip_serializing_if = "Option::is_none")]
212        object_type: Option<String>,
213        /// Anthropic response role (typically `"assistant"`).
214        #[serde(skip_serializing_if = "Option::is_none")]
215        role: Option<String>,
216        /// Raw Anthropic stop_reason.
217        #[serde(skip_serializing_if = "Option::is_none")]
218        stop_reason: Option<String>,
219        /// Which stop sequence was matched (if any).
220        #[serde(skip_serializing_if = "Option::is_none")]
221        stop_sequence: Option<String>,
222        /// Anthropic response service tier when present.
223        #[serde(skip_serializing_if = "Option::is_none")]
224        service_tier: Option<String>,
225        /// Anthropic container payload when present.
226        #[serde(skip_serializing_if = "Option::is_none")]
227        container: Option<Json>,
228        /// Full content blocks array for direct access.
229        #[serde(skip_serializing_if = "Option::is_none")]
230        content_blocks: Option<Vec<Json>>,
231    },
232
233    /// Custom/unknown API -- catch-all for user-implemented codecs.
234    #[serde(rename = "custom")]
235    Custom {
236        /// API identifier.
237        api_name: String,
238        /// Opaque API-specific data.
239        data: Json,
240    },
241}
242
243// ---------------------------------------------------------------------------
244// Helper methods
245// ---------------------------------------------------------------------------
246
247impl AnnotatedLlmResponse {
248    /// Extract the text content of the response message.
249    ///
250    /// For [`MessageContent::Text`], returns the string directly.
251    /// For [`MessageContent::Parts`], returns the text of the first
252    /// [`super::request::ContentPart::Text`] part.
253    /// Returns `None` if `message` is `None`.
254    #[must_use]
255    pub fn response_text(&self) -> Option<&str> {
256        match self.message.as_ref()? {
257            MessageContent::Text(s) => Some(s.as_str()),
258            MessageContent::Parts(parts) => parts.iter().find_map(|p| match p {
259                super::request::ContentPart::Text { text } => Some(text.as_str()),
260                super::request::ContentPart::ImageUrl { .. } => None,
261            }),
262        }
263    }
264
265    /// Check if the response contains any tool calls.
266    ///
267    /// Returns `true` if `tool_calls` is `Some` with at least one element.
268    #[must_use]
269    pub fn has_tool_calls(&self) -> bool {
270        self.tool_calls
271            .as_ref()
272            .is_some_and(|calls| !calls.is_empty())
273    }
274}
275
276// ---------------------------------------------------------------------------
277// Tests
278// ---------------------------------------------------------------------------
279
280#[cfg(test)]
281#[path = "../../tests/unit/codec/response_tests.rs"]
282mod tests;