Skip to main content

nemo_flow/codec/
response.rs

1// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Normalized LLM response types produced by response codecs.
5//!
6//! This module defines [`AnnotatedLlmResponse`] and its supporting types
7//! for structured, API-agnostic access to LLM response data.
8
9use serde::{Deserialize, Serialize};
10
11use crate::json::Json;
12
13use super::request::MessageContent;
14
15// ---------------------------------------------------------------------------
16// AnnotatedLlmResponse type hierarchy
17// ---------------------------------------------------------------------------
18
19/// Structured view of an LLM response, produced by a response codec from
20/// raw JSON API output.
21///
22/// The `extra` field captures any top-level keys not modeled by the known
23/// fields, ensuring lossless round-trip through serde.
24#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
25pub struct AnnotatedLlmResponse {
26    /// Response ID from the API (e.g., "chatcmpl-abc123", "resp_abc123", "msg_abc123").
27    #[serde(skip_serializing_if = "Option::is_none")]
28    pub id: Option<String>,
29
30    /// The model that actually served the request (may differ from requested model).
31    #[serde(skip_serializing_if = "Option::is_none")]
32    pub model: Option<String>,
33
34    /// The assistant's response content, reusing [`MessageContent`] from request types.
35    #[serde(skip_serializing_if = "Option::is_none")]
36    pub message: Option<MessageContent>,
37
38    /// Tool calls requested by the model, normalized across APIs.
39    ///
40    /// Uses [`ResponseToolCall`] (arguments as [`Json`]) NOT the request-side
41    /// `ToolCall` (arguments as `String`).
42    #[serde(skip_serializing_if = "Option::is_none")]
43    pub tool_calls: Option<Vec<ResponseToolCall>>,
44
45    /// Why generation stopped, normalized across APIs.
46    #[serde(skip_serializing_if = "Option::is_none")]
47    pub finish_reason: Option<FinishReason>,
48
49    /// Token usage statistics.
50    #[serde(skip_serializing_if = "Option::is_none")]
51    pub usage: Option<Usage>,
52
53    /// API-specific response data that cannot be normalized across providers.
54    #[serde(skip_serializing_if = "Option::is_none")]
55    pub api_specific: Option<ApiSpecificResponse>,
56
57    /// Catch-all for unmodeled top-level fields, ensuring lossless round-trip.
58    #[serde(flatten)]
59    pub extra: serde_json::Map<String, Json>,
60}
61
62// ---------------------------------------------------------------------------
63// Usage
64// ---------------------------------------------------------------------------
65
66/// Token usage statistics from an LLM API response.
67///
68/// All fields are `Option<u64>` because not every provider supplies every
69/// field. For example, cache token counts are only available from providers
70/// that support prompt caching.
71#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
72pub struct Usage {
73    /// Tokens consumed by the prompt/input.
74    #[serde(skip_serializing_if = "Option::is_none")]
75    pub prompt_tokens: Option<u64>,
76    /// Tokens generated in the completion/output.
77    #[serde(skip_serializing_if = "Option::is_none")]
78    pub completion_tokens: Option<u64>,
79    /// Total tokens (prompt + completion).
80    #[serde(skip_serializing_if = "Option::is_none")]
81    pub total_tokens: Option<u64>,
82    /// Tokens served from prompt cache (read).
83    #[serde(skip_serializing_if = "Option::is_none")]
84    pub cache_read_tokens: Option<u64>,
85    /// Tokens written to prompt cache.
86    #[serde(skip_serializing_if = "Option::is_none")]
87    pub cache_write_tokens: Option<u64>,
88}
89
90// ---------------------------------------------------------------------------
91// FinishReason
92// ---------------------------------------------------------------------------
93
94/// Normalized reason why the model stopped generating.
95///
96/// Maps from provider-specific stop reasons:
97/// - **Complete**: OpenAI Chat `"stop"`, Anthropic `"end_turn"`, Responses `"completed"`
98/// - **Length**: OpenAI Chat `"length"`, Anthropic `"max_tokens"`, Responses incomplete+max_output_tokens
99/// - **ToolUse**: OpenAI Chat `"tool_calls"`, Anthropic `"tool_use"`
100/// - **ContentFilter**: OpenAI Chat `"content_filter"`, Responses incomplete+content_filter
101/// - **Unknown**: Forward-compatible catch-all for unrecognized reasons
102#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
103#[serde(rename_all = "snake_case")]
104pub enum FinishReason {
105    /// Model naturally completed its response.
106    Complete,
107    /// Maximum token limit reached.
108    Length,
109    /// Model requested a tool call.
110    ToolUse,
111    /// Content was filtered by safety systems.
112    ContentFilter,
113    /// Unknown or forward-compatible reason.
114    Unknown(String),
115}
116
117impl FinishReason {
118    /// Returns `true` if the model naturally completed its response.
119    ///
120    /// Only the [`FinishReason::Complete`] variant returns `true`.
121    #[must_use]
122    pub fn is_complete(&self) -> bool {
123        matches!(self, FinishReason::Complete)
124    }
125}
126
127// ---------------------------------------------------------------------------
128// ResponseToolCall
129// ---------------------------------------------------------------------------
130
131/// A tool call requested by the model in its response.
132///
133/// Unlike the request-side `ToolCall` (which stores arguments as a JSON
134/// string per OpenAI convention), response tool calls store arguments as
135/// parsed [`Json`]. Codecs parse OpenAI's string arguments during decode;
136/// Anthropic's `input` is already parsed JSON.
137#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
138pub struct ResponseToolCall {
139    /// Unique identifier for this tool call.
140    pub id: String,
141    /// The function/tool name.
142    pub name: String,
143    /// The arguments as parsed JSON (not a string).
144    pub arguments: Json,
145}
146
147// ---------------------------------------------------------------------------
148// ApiSpecificResponse
149// ---------------------------------------------------------------------------
150
151/// API-specific response data that cannot be normalized across providers.
152///
153/// Each variant captures fields unique to a particular LLM API, stored via
154/// internal tagging on the `"api"` key.
155#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
156#[serde(tag = "api")]
157pub enum ApiSpecificResponse {
158    /// OpenAI Chat Completions-specific fields.
159    #[serde(rename = "openai_chat")]
160    OpenAIChat {
161        /// Token-level log probabilities (raw JSON, too complex to normalize).
162        #[serde(skip_serializing_if = "Option::is_none")]
163        logprobs: Option<Json>,
164        /// System fingerprint for reproducibility.
165        #[serde(skip_serializing_if = "Option::is_none")]
166        system_fingerprint: Option<String>,
167        /// Processing tier used (e.g., "default").
168        #[serde(skip_serializing_if = "Option::is_none")]
169        service_tier: Option<String>,
170    },
171
172    /// OpenAI Responses API-specific fields.
173    #[serde(rename = "openai_responses")]
174    OpenAIResponses {
175        /// Full output items array for direct access.
176        #[serde(skip_serializing_if = "Option::is_none")]
177        output_items: Option<Vec<Json>>,
178        /// Response status (e.g., "completed", "incomplete").
179        #[serde(skip_serializing_if = "Option::is_none")]
180        status: Option<String>,
181        /// Details about why the response is incomplete.
182        #[serde(skip_serializing_if = "Option::is_none")]
183        incomplete_details: Option<Json>,
184    },
185
186    /// Anthropic Messages API-specific fields.
187    #[serde(rename = "anthropic_messages")]
188    AnthropicMessages {
189        /// Which stop sequence was matched (if any).
190        #[serde(skip_serializing_if = "Option::is_none")]
191        stop_sequence: Option<String>,
192        /// Full content blocks array for direct access.
193        #[serde(skip_serializing_if = "Option::is_none")]
194        content_blocks: Option<Vec<Json>>,
195    },
196
197    /// Custom/unknown API -- catch-all for user-implemented codecs.
198    #[serde(rename = "custom")]
199    Custom {
200        /// API identifier.
201        api_name: String,
202        /// Opaque API-specific data.
203        data: Json,
204    },
205}
206
207// ---------------------------------------------------------------------------
208// Helper methods
209// ---------------------------------------------------------------------------
210
211impl AnnotatedLlmResponse {
212    /// Extract the text content of the response message.
213    ///
214    /// For [`MessageContent::Text`], returns the string directly.
215    /// For [`MessageContent::Parts`], returns the text of the first
216    /// [`super::request::ContentPart::Text`] part.
217    /// Returns `None` if `message` is `None`.
218    #[must_use]
219    pub fn response_text(&self) -> Option<&str> {
220        match self.message.as_ref()? {
221            MessageContent::Text(s) => Some(s.as_str()),
222            MessageContent::Parts(parts) => parts
223                .iter()
224                .map(|p| {
225                    let super::request::ContentPart::Text { text } = p;
226                    text.as_str()
227                })
228                .next(),
229        }
230    }
231
232    /// Check if the response contains any tool calls.
233    ///
234    /// Returns `true` if `tool_calls` is `Some` with at least one element.
235    #[must_use]
236    pub fn has_tool_calls(&self) -> bool {
237        self.tool_calls
238            .as_ref()
239            .is_some_and(|calls| !calls.is_empty())
240    }
241}
242
243// ---------------------------------------------------------------------------
244// Tests
245// ---------------------------------------------------------------------------
246
247#[cfg(test)]
248#[path = "../../tests/unit/codec/response_tests.rs"]
249mod tests;