dynamo_llm/protocols/openai/
responses.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use dynamo_async_openai::types::responses::{
5    Content, Input, OutputContent, OutputMessage, OutputStatus, OutputText, Response,
6    Role as ResponseRole, Status,
7};
8use dynamo_async_openai::types::{
9    ChatCompletionRequestMessage, ChatCompletionRequestUserMessage,
10    ChatCompletionRequestUserMessageContent, CreateChatCompletionRequest,
11};
12use dynamo_runtime::protocols::annotated::AnnotationsProvider;
13use serde::{Deserialize, Serialize};
14use uuid::Uuid;
15use validator::Validate;
16
17use super::chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionResponse};
18use super::nvext::{NvExt, NvExtProvider};
19use super::{OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider};
20
21#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
22pub struct NvCreateResponse {
23    #[serde(flatten)]
24    pub inner: dynamo_async_openai::types::responses::CreateResponse,
25
26    #[serde(skip_serializing_if = "Option::is_none")]
27    pub nvext: Option<NvExt>,
28}
29
30#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
31pub struct NvResponse {
32    #[serde(flatten)]
33    pub inner: dynamo_async_openai::types::responses::Response,
34}
35
36/// Implements `NvExtProvider` for `NvCreateResponse`,
37/// providing access to NVIDIA-specific extensions.
38impl NvExtProvider for NvCreateResponse {
39    /// Returns a reference to the optional `NvExt` extension, if available.
40    fn nvext(&self) -> Option<&NvExt> {
41        self.nvext.as_ref()
42    }
43
44    /// Returns `None`, as raw prompt extraction is not implemented.
45    fn raw_prompt(&self) -> Option<String> {
46        None
47    }
48}
49
50/// Implements `AnnotationsProvider` for `NvCreateResponse`,
51/// enabling retrieval and management of request annotations.
52impl AnnotationsProvider for NvCreateResponse {
53    /// Retrieves the list of annotations from `NvExt`, if present.
54    fn annotations(&self) -> Option<Vec<String>> {
55        self.nvext
56            .as_ref()
57            .and_then(|nvext| nvext.annotations.clone())
58    }
59
60    /// Checks whether a specific annotation exists in the request.
61    ///
62    /// # Arguments
63    /// * `annotation` - A string slice representing the annotation to check.
64    ///
65    /// # Returns
66    /// `true` if the annotation exists, `false` otherwise.
67    fn has_annotation(&self, annotation: &str) -> bool {
68        self.nvext
69            .as_ref()
70            .and_then(|nvext| nvext.annotations.as_ref())
71            .map(|annotations| annotations.contains(&annotation.to_string()))
72            .unwrap_or(false)
73    }
74}
75
76/// Implements `OpenAISamplingOptionsProvider` for `NvCreateResponse`,
77/// exposing OpenAI's sampling parameters for chat completion.
78impl OpenAISamplingOptionsProvider for NvCreateResponse {
79    /// Retrieves the temperature parameter for sampling, if set.
80    fn get_temperature(&self) -> Option<f32> {
81        self.inner.temperature
82    }
83
84    /// Retrieves the top-p (nucleus sampling) parameter, if set.
85    fn get_top_p(&self) -> Option<f32> {
86        self.inner.top_p
87    }
88
89    /// Retrieves the frequency penalty parameter, if set.
90    fn get_frequency_penalty(&self) -> Option<f32> {
91        None // TODO setting as None for now
92    }
93
94    /// Retrieves the presence penalty parameter, if set.
95    fn get_presence_penalty(&self) -> Option<f32> {
96        None // TODO setting as None for now
97    }
98
99    /// Returns a reference to the optional `NvExt` extension, if available.
100    fn nvext(&self) -> Option<&NvExt> {
101        self.nvext.as_ref()
102    }
103
104    fn get_seed(&self) -> Option<i64> {
105        None // TODO setting as None for now
106    }
107
108    fn get_n(&self) -> Option<u8> {
109        None // TODO setting as None for now
110    }
111
112    fn get_best_of(&self) -> Option<u8> {
113        None // TODO setting as None for now
114    }
115}
116
117/// Implements `OpenAIStopConditionsProvider` for `NvCreateResponse`,
118/// providing access to stop conditions that control chat completion behavior.
119impl OpenAIStopConditionsProvider for NvCreateResponse {
120    /// Retrieves the maximum number of tokens allowed in the response.
121    #[allow(deprecated)]
122    fn get_max_tokens(&self) -> Option<u32> {
123        self.inner.max_output_tokens
124    }
125
126    /// Retrieves the minimum number of tokens required in the response.
127    ///
128    /// # Note
129    /// This method is currently a placeholder and always returns `None`
130    /// since `min_tokens` is not an OpenAI-supported parameter.
131    fn get_min_tokens(&self) -> Option<u32> {
132        None
133    }
134
135    /// Retrieves the stop conditions that terminate the chat completion response.
136    ///
137    /// Converts OpenAI's `Stop` enum to a `Vec<String>`, normalizing the representation.
138    ///
139    /// # Returns
140    /// * `Some(Vec<String>)` if stop conditions are set.
141    /// * `None` if no stop conditions are defined.
142    fn get_stop(&self) -> Option<Vec<String>> {
143        None // TODO returning None for now
144    }
145
146    /// Returns a reference to the optional `NvExt` extension, if available.
147    fn nvext(&self) -> Option<&NvExt> {
148        self.nvext.as_ref()
149    }
150}
151
152impl TryFrom<NvCreateResponse> for NvCreateChatCompletionRequest {
153    type Error = anyhow::Error;
154
155    fn try_from(resp: NvCreateResponse) -> Result<Self, Self::Error> {
156        // Create messages from input
157        let input_text = match resp.inner.input {
158            Input::Text(text) => text,
159            Input::Items(_) => {
160                return Err(anyhow::anyhow!(
161                    "Input::Items not supported in conversion to NvCreateChatCompletionRequest"
162                ));
163            }
164        };
165
166        let messages = vec![ChatCompletionRequestMessage::User(
167            ChatCompletionRequestUserMessage {
168                content: ChatCompletionRequestUserMessageContent::Text(input_text),
169                name: None,
170            },
171        )];
172
173        // TODO: See this PR for details: https://github.com/64bit/async-openai/pull/398
174        let top_logprobs = convert_top_logprobs(resp.inner.top_logprobs);
175
176        // The below should encompass all of the allowed configurable parameters
177        Ok(NvCreateChatCompletionRequest {
178            inner: CreateChatCompletionRequest {
179                messages,
180                model: resp.inner.model,
181                temperature: resp.inner.temperature,
182                top_p: resp.inner.top_p,
183                max_completion_tokens: resp.inner.max_output_tokens,
184                top_logprobs,
185                stream: Some(true), // Set this to Some(True) by default to aggregate stream
186                ..Default::default()
187            },
188            common: Default::default(),
189            nvext: resp.nvext,
190            chat_template_args: None,
191        })
192    }
193}
194
195fn convert_top_logprobs(input: Option<u32>) -> Option<u8> {
196    input.map(|x| x.min(20) as u8)
197}
198
199impl TryFrom<NvCreateChatCompletionResponse> for NvResponse {
200    type Error = anyhow::Error;
201
202    fn try_from(nv_resp: NvCreateChatCompletionResponse) -> Result<Self, Self::Error> {
203        let chat_resp = nv_resp;
204        let content_text = chat_resp
205            .choices
206            .into_iter()
207            .next()
208            .and_then(|choice| choice.message.content)
209            .unwrap_or_else(|| {
210                tracing::warn!("No choices in chat completion response, using empty content");
211                String::new()
212            });
213        let message_id = format!("msg_{}", Uuid::new_v4().simple());
214        let response_id = format!("resp_{}", Uuid::new_v4().simple());
215
216        let output = vec![OutputContent::Message(OutputMessage {
217            id: message_id,
218            role: ResponseRole::Assistant,
219            status: OutputStatus::Completed,
220            content: vec![Content::OutputText(OutputText {
221                text: content_text,
222                annotations: vec![],
223            })],
224        })];
225
226        let response = Response {
227            id: response_id,
228            object: "response".to_string(),
229            created_at: chat_resp.created as u64,
230            model: chat_resp.model,
231            status: Status::Completed,
232            output,
233            output_text: None,
234            parallel_tool_calls: None,
235            reasoning: None,
236            service_tier: None,
237            store: None,
238            truncation: None,
239            temperature: None,
240            top_p: None,
241            tools: None,
242            metadata: None,
243            previous_response_id: None,
244            error: None,
245            incomplete_details: None,
246            instructions: None,
247            max_output_tokens: None,
248            text: None,
249            tool_choice: None,
250            usage: None,
251            user: None,
252        };
253
254        Ok(NvResponse { inner: response })
255    }
256}
257
258#[cfg(test)]
259mod tests {
260    use dynamo_async_openai::types::responses::{CreateResponse, Input};
261    use dynamo_async_openai::types::{
262        ChatCompletionRequestMessage, ChatCompletionRequestUserMessageContent,
263    };
264
265    use super::*;
266    use crate::types::openai::chat_completions::NvCreateChatCompletionResponse;
267
268    fn make_response_with_input(text: &str) -> NvCreateResponse {
269        NvCreateResponse {
270            inner: CreateResponse {
271                input: Input::Text(text.into()),
272                model: "test-model".into(),
273                max_output_tokens: Some(1024),
274                temperature: Some(0.5),
275                top_p: Some(0.9),
276                top_logprobs: Some(15),
277                ..Default::default()
278            },
279            nvext: Some(NvExt {
280                annotations: Some(vec!["debug".into(), "trace".into()]),
281                ..Default::default()
282            }),
283        }
284    }
285
286    #[test]
287    fn test_annotations_trait_behavior() {
288        let req = make_response_with_input("hello");
289        assert_eq!(
290            req.annotations(),
291            Some(vec!["debug".to_string(), "trace".to_string()])
292        );
293        assert!(req.has_annotation("debug"));
294        assert!(req.has_annotation("trace"));
295        assert!(!req.has_annotation("missing"));
296    }
297
298    #[test]
299    fn test_openai_sampling_trait_behavior() {
300        let req = make_response_with_input("hello");
301        assert_eq!(req.get_temperature(), Some(0.5));
302        assert_eq!(req.get_top_p(), Some(0.9));
303        assert_eq!(req.get_frequency_penalty(), None);
304        assert_eq!(req.get_presence_penalty(), None);
305    }
306
307    #[test]
308    fn test_openai_stop_conditions_trait_behavior() {
309        let req = make_response_with_input("hello");
310        assert_eq!(req.get_max_tokens(), Some(1024));
311        assert_eq!(req.get_min_tokens(), None);
312        assert_eq!(req.get_stop(), None);
313    }
314
315    #[test]
316    fn test_into_nvcreate_chat_completion_request() {
317        let nv_req: NvCreateChatCompletionRequest =
318            make_response_with_input("hi there").try_into().unwrap();
319
320        assert_eq!(nv_req.inner.model, "test-model");
321        assert_eq!(nv_req.inner.temperature, Some(0.5));
322        assert_eq!(nv_req.inner.top_p, Some(0.9));
323        assert_eq!(nv_req.inner.max_completion_tokens, Some(1024));
324        assert_eq!(nv_req.inner.top_logprobs, Some(15));
325        assert_eq!(nv_req.inner.stream, Some(true));
326
327        let messages = &nv_req.inner.messages;
328        assert_eq!(messages.len(), 1);
329        match &messages[0] {
330            ChatCompletionRequestMessage::User(user_msg) => match &user_msg.content {
331                ChatCompletionRequestUserMessageContent::Text(t) => {
332                    assert_eq!(t, "hi there");
333                }
334                _ => panic!("unexpected user content type"),
335            },
336            _ => panic!("expected user message"),
337        }
338    }
339
340    #[allow(deprecated)]
341    #[test]
342    fn test_into_nvresponse_from_chat_response() {
343        let now = 1_726_000_000;
344        let chat_resp = NvCreateChatCompletionResponse {
345            id: "chatcmpl-xyz".into(),
346            choices: vec![dynamo_async_openai::types::ChatChoice {
347                index: 0,
348                message: dynamo_async_openai::types::ChatCompletionResponseMessage {
349                    content: Some("This is a reply".into()),
350                    refusal: None,
351                    tool_calls: None,
352                    role: dynamo_async_openai::types::Role::Assistant,
353                    function_call: None,
354                    audio: None,
355                    reasoning_content: None,
356                },
357                finish_reason: None,
358                logprobs: None,
359            }],
360            created: now,
361            model: "llama-3.1-8b-instruct".into(),
362            service_tier: None,
363            system_fingerprint: None,
364            object: "chat.completion".to_string(),
365            usage: None,
366        };
367
368        let wrapped: NvResponse = chat_resp.try_into().unwrap();
369
370        assert_eq!(wrapped.inner.model, "llama-3.1-8b-instruct");
371        assert_eq!(wrapped.inner.status, Status::Completed);
372        assert_eq!(wrapped.inner.object, "response");
373        assert!(wrapped.inner.id.starts_with("resp_"));
374
375        let msg = match &wrapped.inner.output[0] {
376            OutputContent::Message(m) => m,
377            _ => panic!("Expected Message variant"),
378        };
379        assert_eq!(msg.role, ResponseRole::Assistant);
380
381        match &msg.content[0] {
382            Content::OutputText(txt) => {
383                assert_eq!(txt.text, "This is a reply");
384            }
385            _ => panic!("Expected OutputText content"),
386        }
387    }
388
389    #[test]
390    fn test_convert_top_logprobs_clamped() {
391        assert_eq!(convert_top_logprobs(Some(5)), Some(5));
392        assert_eq!(convert_top_logprobs(Some(21)), Some(20));
393        assert_eq!(convert_top_logprobs(Some(1000)), Some(20));
394        assert_eq!(convert_top_logprobs(None), None);
395    }
396}