nemo_flow/codec/response.rs
1// SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4//! Normalized LLM response types produced by response codecs.
5//!
6//! This module defines [`AnnotatedLlmResponse`] and its supporting types
7//! for structured, API-agnostic access to LLM response data.
8
9use serde::{Deserialize, Serialize};
10
11use crate::json::Json;
12
13use super::request::MessageContent;
14
15// ---------------------------------------------------------------------------
16// AnnotatedLlmResponse type hierarchy
17// ---------------------------------------------------------------------------
18
19/// Structured view of an LLM response, produced by a response codec from
20/// raw JSON API output.
21///
22/// The `extra` field captures any top-level keys not modeled by the known
23/// fields, ensuring lossless round-trip through serde.
24#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
25pub struct AnnotatedLlmResponse {
26 /// Response ID from the API (e.g., "chatcmpl-abc123", "resp_abc123", "msg_abc123").
27 #[serde(skip_serializing_if = "Option::is_none")]
28 pub id: Option<String>,
29
30 /// The model that actually served the request (may differ from requested model).
31 #[serde(skip_serializing_if = "Option::is_none")]
32 pub model: Option<String>,
33
34 /// The assistant's response content, reusing [`MessageContent`] from request types.
35 #[serde(skip_serializing_if = "Option::is_none")]
36 pub message: Option<MessageContent>,
37
38 /// Tool calls requested by the model, normalized across APIs.
39 ///
40 /// Uses [`ResponseToolCall`] (arguments as [`Json`]) NOT the request-side
41 /// `ToolCall` (arguments as `String`).
42 #[serde(skip_serializing_if = "Option::is_none")]
43 pub tool_calls: Option<Vec<ResponseToolCall>>,
44
45 /// Why generation stopped, normalized across APIs.
46 #[serde(skip_serializing_if = "Option::is_none")]
47 pub finish_reason: Option<FinishReason>,
48
49 /// Token usage statistics.
50 #[serde(skip_serializing_if = "Option::is_none")]
51 pub usage: Option<Usage>,
52
53 /// API-specific response data that cannot be normalized across providers.
54 #[serde(skip_serializing_if = "Option::is_none")]
55 pub api_specific: Option<ApiSpecificResponse>,
56
57 /// Catch-all for unmodeled top-level fields, ensuring lossless round-trip.
58 #[serde(flatten)]
59 pub extra: serde_json::Map<String, Json>,
60}
61
62// ---------------------------------------------------------------------------
63// Usage
64// ---------------------------------------------------------------------------
65
66/// Token usage statistics from an LLM API response.
67///
68/// All fields are `Option<u64>` because not every provider supplies every
69/// field. For example, cache token counts are only available from providers
70/// that support prompt caching.
71#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)]
72pub struct Usage {
73 /// Tokens consumed by the prompt/input.
74 #[serde(skip_serializing_if = "Option::is_none")]
75 pub prompt_tokens: Option<u64>,
76 /// Tokens generated in the completion/output.
77 #[serde(skip_serializing_if = "Option::is_none")]
78 pub completion_tokens: Option<u64>,
79 /// Total tokens (prompt + completion).
80 #[serde(skip_serializing_if = "Option::is_none")]
81 pub total_tokens: Option<u64>,
82 /// Tokens served from prompt cache (read).
83 #[serde(skip_serializing_if = "Option::is_none")]
84 pub cache_read_tokens: Option<u64>,
85 /// Tokens written to prompt cache.
86 #[serde(skip_serializing_if = "Option::is_none")]
87 pub cache_write_tokens: Option<u64>,
88}
89
90// ---------------------------------------------------------------------------
91// FinishReason
92// ---------------------------------------------------------------------------
93
94/// Normalized reason why the model stopped generating.
95///
96/// Maps from provider-specific stop reasons:
97/// - **Complete**: OpenAI Chat `"stop"`, Anthropic `"end_turn"`, Responses `"completed"`
98/// - **Length**: OpenAI Chat `"length"`, Anthropic `"max_tokens"`, Responses incomplete+max_output_tokens
99/// - **ToolUse**: OpenAI Chat `"tool_calls"`, Anthropic `"tool_use"`
100/// - **ContentFilter**: OpenAI Chat `"content_filter"`, Responses incomplete+content_filter
101/// - **Unknown**: Forward-compatible catch-all for unrecognized reasons
102#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
103#[serde(rename_all = "snake_case")]
104pub enum FinishReason {
105 /// Model naturally completed its response.
106 Complete,
107 /// Maximum token limit reached.
108 Length,
109 /// Model requested a tool call.
110 ToolUse,
111 /// Content was filtered by safety systems.
112 ContentFilter,
113 /// Unknown or forward-compatible reason.
114 Unknown(String),
115}
116
117impl FinishReason {
118 /// Returns `true` if the model naturally completed its response.
119 ///
120 /// Only the [`FinishReason::Complete`] variant returns `true`.
121 #[must_use]
122 pub fn is_complete(&self) -> bool {
123 matches!(self, FinishReason::Complete)
124 }
125}
126
127// ---------------------------------------------------------------------------
128// ResponseToolCall
129// ---------------------------------------------------------------------------
130
131/// A tool call requested by the model in its response.
132///
133/// Unlike the request-side `ToolCall` (which stores arguments as a JSON
134/// string per OpenAI convention), response tool calls store arguments as
135/// parsed [`Json`]. Codecs parse OpenAI's string arguments during decode;
136/// Anthropic's `input` is already parsed JSON.
137#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
138pub struct ResponseToolCall {
139 /// Unique identifier for this tool call.
140 pub id: String,
141 /// The function/tool name.
142 pub name: String,
143 /// The arguments as parsed JSON (not a string).
144 pub arguments: Json,
145}
146
147// ---------------------------------------------------------------------------
148// ApiSpecificResponse
149// ---------------------------------------------------------------------------
150
151/// API-specific response data that cannot be normalized across providers.
152///
153/// Each variant captures fields unique to a particular LLM API, stored via
154/// internal tagging on the `"api"` key.
155#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
156#[serde(tag = "api")]
157pub enum ApiSpecificResponse {
158 /// OpenAI Chat Completions-specific fields.
159 #[serde(rename = "openai_chat")]
160 OpenAIChat {
161 /// Token-level log probabilities (raw JSON, too complex to normalize).
162 #[serde(skip_serializing_if = "Option::is_none")]
163 logprobs: Option<Json>,
164 /// System fingerprint for reproducibility.
165 #[serde(skip_serializing_if = "Option::is_none")]
166 system_fingerprint: Option<String>,
167 /// Processing tier used (e.g., "default").
168 #[serde(skip_serializing_if = "Option::is_none")]
169 service_tier: Option<String>,
170 },
171
172 /// OpenAI Responses API-specific fields.
173 #[serde(rename = "openai_responses")]
174 OpenAIResponses {
175 /// Full output items array for direct access.
176 #[serde(skip_serializing_if = "Option::is_none")]
177 output_items: Option<Vec<Json>>,
178 /// Response status (e.g., "completed", "incomplete").
179 #[serde(skip_serializing_if = "Option::is_none")]
180 status: Option<String>,
181 /// Details about why the response is incomplete.
182 #[serde(skip_serializing_if = "Option::is_none")]
183 incomplete_details: Option<Json>,
184 },
185
186 /// Anthropic Messages API-specific fields.
187 #[serde(rename = "anthropic_messages")]
188 AnthropicMessages {
189 /// Which stop sequence was matched (if any).
190 #[serde(skip_serializing_if = "Option::is_none")]
191 stop_sequence: Option<String>,
192 /// Full content blocks array for direct access.
193 #[serde(skip_serializing_if = "Option::is_none")]
194 content_blocks: Option<Vec<Json>>,
195 },
196
197 /// Custom/unknown API -- catch-all for user-implemented codecs.
198 #[serde(rename = "custom")]
199 Custom {
200 /// API identifier.
201 api_name: String,
202 /// Opaque API-specific data.
203 data: Json,
204 },
205}
206
207// ---------------------------------------------------------------------------
208// Helper methods
209// ---------------------------------------------------------------------------
210
211impl AnnotatedLlmResponse {
212 /// Extract the text content of the response message.
213 ///
214 /// For [`MessageContent::Text`], returns the string directly.
215 /// For [`MessageContent::Parts`], returns the text of the first
216 /// [`super::request::ContentPart::Text`] part.
217 /// Returns `None` if `message` is `None`.
218 #[must_use]
219 pub fn response_text(&self) -> Option<&str> {
220 match self.message.as_ref()? {
221 MessageContent::Text(s) => Some(s.as_str()),
222 MessageContent::Parts(parts) => parts
223 .iter()
224 .map(|p| {
225 let super::request::ContentPart::Text { text } = p;
226 text.as_str()
227 })
228 .next(),
229 }
230 }
231
232 /// Check if the response contains any tool calls.
233 ///
234 /// Returns `true` if `tool_calls` is `Some` with at least one element.
235 #[must_use]
236 pub fn has_tool_calls(&self) -> bool {
237 self.tool_calls
238 .as_ref()
239 .is_some_and(|calls| !calls.is_empty())
240 }
241}
242
243// ---------------------------------------------------------------------------
244// Tests
245// ---------------------------------------------------------------------------
246
247#[cfg(test)]
248#[path = "../../tests/unit/codec/response_tests.rs"]
249mod tests;