dynamo_llm/protocols/openai/
responses.rs1use dynamo_async_openai::types::responses::{
5 Content, Input, OutputContent, OutputMessage, OutputStatus, OutputText, Response,
6 Role as ResponseRole, Status,
7};
8use dynamo_async_openai::types::{
9 ChatCompletionRequestMessage, ChatCompletionRequestUserMessage,
10 ChatCompletionRequestUserMessageContent, CreateChatCompletionRequest,
11};
12use dynamo_runtime::protocols::annotated::AnnotationsProvider;
13use serde::{Deserialize, Serialize};
14use uuid::Uuid;
15use validator::Validate;
16
17use super::chat_completions::{NvCreateChatCompletionRequest, NvCreateChatCompletionResponse};
18use super::nvext::{NvExt, NvExtProvider};
19use super::{OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider};
20
21#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
22pub struct NvCreateResponse {
23 #[serde(flatten)]
24 pub inner: dynamo_async_openai::types::responses::CreateResponse,
25
26 #[serde(skip_serializing_if = "Option::is_none")]
27 pub nvext: Option<NvExt>,
28}
29
30#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
31pub struct NvResponse {
32 #[serde(flatten)]
33 pub inner: dynamo_async_openai::types::responses::Response,
34}
35
36impl NvExtProvider for NvCreateResponse {
39 fn nvext(&self) -> Option<&NvExt> {
41 self.nvext.as_ref()
42 }
43
44 fn raw_prompt(&self) -> Option<String> {
46 None
47 }
48}
49
50impl AnnotationsProvider for NvCreateResponse {
53 fn annotations(&self) -> Option<Vec<String>> {
55 self.nvext
56 .as_ref()
57 .and_then(|nvext| nvext.annotations.clone())
58 }
59
60 fn has_annotation(&self, annotation: &str) -> bool {
68 self.nvext
69 .as_ref()
70 .and_then(|nvext| nvext.annotations.as_ref())
71 .map(|annotations| annotations.contains(&annotation.to_string()))
72 .unwrap_or(false)
73 }
74}
75
76impl OpenAISamplingOptionsProvider for NvCreateResponse {
79 fn get_temperature(&self) -> Option<f32> {
81 self.inner.temperature
82 }
83
84 fn get_top_p(&self) -> Option<f32> {
86 self.inner.top_p
87 }
88
89 fn get_frequency_penalty(&self) -> Option<f32> {
91 None }
93
94 fn get_presence_penalty(&self) -> Option<f32> {
96 None }
98
99 fn nvext(&self) -> Option<&NvExt> {
101 self.nvext.as_ref()
102 }
103
104 fn get_seed(&self) -> Option<i64> {
105 None }
107
108 fn get_n(&self) -> Option<u8> {
109 None }
111
112 fn get_best_of(&self) -> Option<u8> {
113 None }
115}
116
117impl OpenAIStopConditionsProvider for NvCreateResponse {
120 #[allow(deprecated)]
122 fn get_max_tokens(&self) -> Option<u32> {
123 self.inner.max_output_tokens
124 }
125
126 fn get_min_tokens(&self) -> Option<u32> {
132 None
133 }
134
135 fn get_stop(&self) -> Option<Vec<String>> {
143 None }
145
146 fn nvext(&self) -> Option<&NvExt> {
148 self.nvext.as_ref()
149 }
150}
151
152impl TryFrom<NvCreateResponse> for NvCreateChatCompletionRequest {
153 type Error = anyhow::Error;
154
155 fn try_from(resp: NvCreateResponse) -> Result<Self, Self::Error> {
156 let input_text = match resp.inner.input {
158 Input::Text(text) => text,
159 Input::Items(_) => {
160 return Err(anyhow::anyhow!(
161 "Input::Items not supported in conversion to NvCreateChatCompletionRequest"
162 ));
163 }
164 };
165
166 let messages = vec![ChatCompletionRequestMessage::User(
167 ChatCompletionRequestUserMessage {
168 content: ChatCompletionRequestUserMessageContent::Text(input_text),
169 name: None,
170 },
171 )];
172
173 let top_logprobs = convert_top_logprobs(resp.inner.top_logprobs);
175
176 Ok(NvCreateChatCompletionRequest {
178 inner: CreateChatCompletionRequest {
179 messages,
180 model: resp.inner.model,
181 temperature: resp.inner.temperature,
182 top_p: resp.inner.top_p,
183 max_completion_tokens: resp.inner.max_output_tokens,
184 top_logprobs,
185 stream: Some(true), ..Default::default()
187 },
188 common: Default::default(),
189 nvext: resp.nvext,
190 chat_template_args: None,
191 })
192 }
193}
194
195fn convert_top_logprobs(input: Option<u32>) -> Option<u8> {
196 input.map(|x| x.min(20) as u8)
197}
198
199impl TryFrom<NvCreateChatCompletionResponse> for NvResponse {
200 type Error = anyhow::Error;
201
202 fn try_from(nv_resp: NvCreateChatCompletionResponse) -> Result<Self, Self::Error> {
203 let chat_resp = nv_resp;
204 let content_text = chat_resp
205 .choices
206 .into_iter()
207 .next()
208 .and_then(|choice| choice.message.content)
209 .unwrap_or_else(|| {
210 tracing::warn!("No choices in chat completion response, using empty content");
211 String::new()
212 });
213 let message_id = format!("msg_{}", Uuid::new_v4().simple());
214 let response_id = format!("resp_{}", Uuid::new_v4().simple());
215
216 let output = vec![OutputContent::Message(OutputMessage {
217 id: message_id,
218 role: ResponseRole::Assistant,
219 status: OutputStatus::Completed,
220 content: vec![Content::OutputText(OutputText {
221 text: content_text,
222 annotations: vec![],
223 })],
224 })];
225
226 let response = Response {
227 id: response_id,
228 object: "response".to_string(),
229 created_at: chat_resp.created as u64,
230 model: chat_resp.model,
231 status: Status::Completed,
232 output,
233 output_text: None,
234 parallel_tool_calls: None,
235 reasoning: None,
236 service_tier: None,
237 store: None,
238 truncation: None,
239 temperature: None,
240 top_p: None,
241 tools: None,
242 metadata: None,
243 previous_response_id: None,
244 error: None,
245 incomplete_details: None,
246 instructions: None,
247 max_output_tokens: None,
248 text: None,
249 tool_choice: None,
250 usage: None,
251 user: None,
252 };
253
254 Ok(NvResponse { inner: response })
255 }
256}
257
258#[cfg(test)]
259mod tests {
260 use dynamo_async_openai::types::responses::{CreateResponse, Input};
261 use dynamo_async_openai::types::{
262 ChatCompletionRequestMessage, ChatCompletionRequestUserMessageContent,
263 };
264
265 use super::*;
266 use crate::types::openai::chat_completions::NvCreateChatCompletionResponse;
267
268 fn make_response_with_input(text: &str) -> NvCreateResponse {
269 NvCreateResponse {
270 inner: CreateResponse {
271 input: Input::Text(text.into()),
272 model: "test-model".into(),
273 max_output_tokens: Some(1024),
274 temperature: Some(0.5),
275 top_p: Some(0.9),
276 top_logprobs: Some(15),
277 ..Default::default()
278 },
279 nvext: Some(NvExt {
280 annotations: Some(vec!["debug".into(), "trace".into()]),
281 ..Default::default()
282 }),
283 }
284 }
285
286 #[test]
287 fn test_annotations_trait_behavior() {
288 let req = make_response_with_input("hello");
289 assert_eq!(
290 req.annotations(),
291 Some(vec!["debug".to_string(), "trace".to_string()])
292 );
293 assert!(req.has_annotation("debug"));
294 assert!(req.has_annotation("trace"));
295 assert!(!req.has_annotation("missing"));
296 }
297
298 #[test]
299 fn test_openai_sampling_trait_behavior() {
300 let req = make_response_with_input("hello");
301 assert_eq!(req.get_temperature(), Some(0.5));
302 assert_eq!(req.get_top_p(), Some(0.9));
303 assert_eq!(req.get_frequency_penalty(), None);
304 assert_eq!(req.get_presence_penalty(), None);
305 }
306
307 #[test]
308 fn test_openai_stop_conditions_trait_behavior() {
309 let req = make_response_with_input("hello");
310 assert_eq!(req.get_max_tokens(), Some(1024));
311 assert_eq!(req.get_min_tokens(), None);
312 assert_eq!(req.get_stop(), None);
313 }
314
315 #[test]
316 fn test_into_nvcreate_chat_completion_request() {
317 let nv_req: NvCreateChatCompletionRequest =
318 make_response_with_input("hi there").try_into().unwrap();
319
320 assert_eq!(nv_req.inner.model, "test-model");
321 assert_eq!(nv_req.inner.temperature, Some(0.5));
322 assert_eq!(nv_req.inner.top_p, Some(0.9));
323 assert_eq!(nv_req.inner.max_completion_tokens, Some(1024));
324 assert_eq!(nv_req.inner.top_logprobs, Some(15));
325 assert_eq!(nv_req.inner.stream, Some(true));
326
327 let messages = &nv_req.inner.messages;
328 assert_eq!(messages.len(), 1);
329 match &messages[0] {
330 ChatCompletionRequestMessage::User(user_msg) => match &user_msg.content {
331 ChatCompletionRequestUserMessageContent::Text(t) => {
332 assert_eq!(t, "hi there");
333 }
334 _ => panic!("unexpected user content type"),
335 },
336 _ => panic!("expected user message"),
337 }
338 }
339
340 #[allow(deprecated)]
341 #[test]
342 fn test_into_nvresponse_from_chat_response() {
343 let now = 1_726_000_000;
344 let chat_resp = NvCreateChatCompletionResponse {
345 id: "chatcmpl-xyz".into(),
346 choices: vec![dynamo_async_openai::types::ChatChoice {
347 index: 0,
348 message: dynamo_async_openai::types::ChatCompletionResponseMessage {
349 content: Some("This is a reply".into()),
350 refusal: None,
351 tool_calls: None,
352 role: dynamo_async_openai::types::Role::Assistant,
353 function_call: None,
354 audio: None,
355 reasoning_content: None,
356 },
357 finish_reason: None,
358 logprobs: None,
359 }],
360 created: now,
361 model: "llama-3.1-8b-instruct".into(),
362 service_tier: None,
363 system_fingerprint: None,
364 object: "chat.completion".to_string(),
365 usage: None,
366 };
367
368 let wrapped: NvResponse = chat_resp.try_into().unwrap();
369
370 assert_eq!(wrapped.inner.model, "llama-3.1-8b-instruct");
371 assert_eq!(wrapped.inner.status, Status::Completed);
372 assert_eq!(wrapped.inner.object, "response");
373 assert!(wrapped.inner.id.starts_with("resp_"));
374
375 let msg = match &wrapped.inner.output[0] {
376 OutputContent::Message(m) => m,
377 _ => panic!("Expected Message variant"),
378 };
379 assert_eq!(msg.role, ResponseRole::Assistant);
380
381 match &msg.content[0] {
382 Content::OutputText(txt) => {
383 assert_eq!(txt.text, "This is a reply");
384 }
385 _ => panic!("Expected OutputText content"),
386 }
387 }
388
389 #[test]
390 fn test_convert_top_logprobs_clamped() {
391 assert_eq!(convert_top_logprobs(Some(5)), Some(5));
392 assert_eq!(convert_top_logprobs(Some(21)), Some(20));
393 assert_eq!(convert_top_logprobs(Some(1000)), Some(20));
394 assert_eq!(convert_top_logprobs(None), None);
395 }
396}