dynamo_llm/protocols/openai/
chat_completions.rs1use dynamo_runtime::protocols::annotated::AnnotationsProvider;
5use serde::{Deserialize, Serialize};
6use validator::Validate;
7
8use crate::engines::ValidateRequest;
9
10use super::{
11 OpenAIOutputOptionsProvider, OpenAISamplingOptionsProvider, OpenAIStopConditionsProvider,
12 common_ext::{
13 CommonExt, CommonExtProvider, choose_with_deprecation, emit_nvext_deprecation_warning,
14 },
15 nvext::NvExt,
16 nvext::NvExtProvider,
17 validate,
18};
19
20pub mod aggregator;
21mod delta;
22pub mod jail;
23
24pub use aggregator::DeltaAggregator;
25pub use delta::DeltaGenerator;
26
27#[derive(Serialize, Deserialize, Validate, Debug, Clone)]
36pub struct NvCreateChatCompletionRequest {
37 #[serde(flatten)]
38 pub inner: dynamo_async_openai::types::CreateChatCompletionRequest,
39
40 #[serde(flatten, default)]
41 pub common: CommonExt,
42
43 #[serde(skip_serializing_if = "Option::is_none")]
44 pub nvext: Option<NvExt>,
45
46 #[serde(default, skip_serializing_if = "Option::is_none")]
48 pub chat_template_args: Option<std::collections::HashMap<String, serde_json::Value>>,
49}
50
51pub type NvCreateChatCompletionResponse = dynamo_async_openai::types::CreateChatCompletionResponse;
58
59pub type NvCreateChatCompletionStreamResponse =
66 dynamo_async_openai::types::CreateChatCompletionStreamResponse;
67
68impl NvExtProvider for NvCreateChatCompletionRequest {
71 fn nvext(&self) -> Option<&NvExt> {
73 self.nvext.as_ref()
74 }
75
76 fn raw_prompt(&self) -> Option<String> {
78 None
79 }
80}
81
82impl AnnotationsProvider for NvCreateChatCompletionRequest {
85 fn annotations(&self) -> Option<Vec<String>> {
87 self.nvext
88 .as_ref()
89 .and_then(|nvext| nvext.annotations.clone())
90 }
91
92 fn has_annotation(&self, annotation: &str) -> bool {
100 self.nvext
101 .as_ref()
102 .and_then(|nvext| nvext.annotations.as_ref())
103 .map(|annotations| annotations.contains(&annotation.to_string()))
104 .unwrap_or(false)
105 }
106}
107
108impl OpenAISamplingOptionsProvider for NvCreateChatCompletionRequest {
111 fn get_temperature(&self) -> Option<f32> {
113 self.inner.temperature
114 }
115
116 fn get_top_p(&self) -> Option<f32> {
118 self.inner.top_p
119 }
120
121 fn get_frequency_penalty(&self) -> Option<f32> {
123 self.inner.frequency_penalty
124 }
125
126 fn get_presence_penalty(&self) -> Option<f32> {
128 self.inner.presence_penalty
129 }
130
131 fn nvext(&self) -> Option<&NvExt> {
133 self.nvext.as_ref()
134 }
135 fn get_seed(&self) -> Option<i64> {
137 self.inner.seed
138 }
139
140 fn get_n(&self) -> Option<u8> {
142 self.inner.n
143 }
144
145 fn get_best_of(&self) -> Option<u8> {
147 None }
149}
150
151impl CommonExtProvider for NvCreateChatCompletionRequest {
154 fn common_ext(&self) -> Option<&CommonExt> {
156 Some(&self.common)
157 }
158
159 fn get_guided_json(&self) -> Option<&serde_json::Value> {
161 if let Some(nvext) = &self.nvext
163 && nvext.guided_json.is_some()
164 {
165 emit_nvext_deprecation_warning("guided_json", true, self.common.guided_json.is_some());
166 }
167 self.common
168 .guided_json
169 .as_ref()
170 .or_else(|| self.nvext.as_ref().and_then(|nv| nv.guided_json.as_ref()))
171 }
172
173 fn get_guided_regex(&self) -> Option<String> {
174 choose_with_deprecation(
175 "guided_regex",
176 self.common.guided_regex.as_ref(),
177 self.nvext.as_ref().and_then(|nv| nv.guided_regex.as_ref()),
178 )
179 }
180
181 fn get_guided_grammar(&self) -> Option<String> {
182 choose_with_deprecation(
183 "guided_grammar",
184 self.common.guided_grammar.as_ref(),
185 self.nvext
186 .as_ref()
187 .and_then(|nv| nv.guided_grammar.as_ref()),
188 )
189 }
190
191 fn get_guided_choice(&self) -> Option<Vec<String>> {
192 choose_with_deprecation(
193 "guided_choice",
194 self.common.guided_choice.as_ref(),
195 self.nvext.as_ref().and_then(|nv| nv.guided_choice.as_ref()),
196 )
197 }
198
199 fn get_guided_decoding_backend(&self) -> Option<String> {
200 choose_with_deprecation(
201 "guided_decoding_backend",
202 self.common.guided_decoding_backend.as_ref(),
203 self.nvext
204 .as_ref()
205 .and_then(|nv| nv.guided_decoding_backend.as_ref()),
206 )
207 }
208
209 fn get_top_k(&self) -> Option<i32> {
210 choose_with_deprecation(
211 "top_k",
212 self.common.top_k.as_ref(),
213 self.nvext.as_ref().and_then(|nv| nv.top_k.as_ref()),
214 )
215 }
216
217 fn get_min_p(&self) -> Option<f32> {
218 choose_with_deprecation(
219 "min_p",
220 self.common.min_p.as_ref(),
221 self.nvext.as_ref().and_then(|nv| nv.min_p.as_ref()),
222 )
223 }
224
225 fn get_repetition_penalty(&self) -> Option<f32> {
226 choose_with_deprecation(
227 "repetition_penalty",
228 self.common.repetition_penalty.as_ref(),
229 self.nvext
230 .as_ref()
231 .and_then(|nv| nv.repetition_penalty.as_ref()),
232 )
233 }
234
235 fn get_include_stop_str_in_output(&self) -> Option<bool> {
236 self.common.include_stop_str_in_output
237 }
238}
239
240impl OpenAIStopConditionsProvider for NvCreateChatCompletionRequest {
243 #[allow(deprecated)]
245 fn get_max_tokens(&self) -> Option<u32> {
246 self.inner.max_completion_tokens.or(self.inner.max_tokens)
247 }
248
249 fn get_min_tokens(&self) -> Option<u32> {
253 self.common.min_tokens
254 }
255
256 fn get_stop(&self) -> Option<Vec<String>> {
264 self.inner.stop.as_ref().map(|stop| match stop {
265 dynamo_async_openai::types::Stop::String(s) => vec![s.clone()],
266 dynamo_async_openai::types::Stop::StringArray(arr) => arr.clone(),
267 })
268 }
269
270 fn nvext(&self) -> Option<&NvExt> {
272 self.nvext.as_ref()
273 }
274
275 fn get_common_ignore_eos(&self) -> Option<bool> {
277 self.common.ignore_eos
278 }
279
280 fn get_ignore_eos(&self) -> Option<bool> {
283 choose_with_deprecation(
284 "ignore_eos",
285 self.get_common_ignore_eos().as_ref(),
286 NvExtProvider::nvext(self).and_then(|nv| nv.ignore_eos.as_ref()),
287 )
288 }
289}
290
291impl OpenAIOutputOptionsProvider for NvCreateChatCompletionRequest {
292 fn get_logprobs(&self) -> Option<u32> {
293 match self.inner.logprobs {
294 Some(true) => match self.inner.top_logprobs {
295 Some(top_logprobs) => Some(top_logprobs as u32),
296 None => Some(1_u32),
297 },
298 Some(false) => None,
299 None => None,
300 }
301 }
302
303 fn get_prompt_logprobs(&self) -> Option<u32> {
304 None
305 }
306
307 fn get_skip_special_tokens(&self) -> Option<bool> {
308 None
309 }
310
311 fn get_formatted_prompt(&self) -> Option<bool> {
312 None
313 }
314}
315
316impl ValidateRequest for NvCreateChatCompletionRequest {
319 fn validate(&self) -> Result<(), anyhow::Error> {
320 validate::validate_messages(&self.inner.messages)?;
321 validate::validate_model(&self.inner.model)?;
322 validate::validate_reasoning_effort(&self.inner.reasoning_effort)?;
324 validate::validate_metadata(&self.inner.metadata)?;
325 validate::validate_frequency_penalty(self.inner.frequency_penalty)?;
326 validate::validate_logit_bias(&self.inner.logit_bias)?;
327 validate::validate_top_logprobs(self.inner.top_logprobs)?;
329 validate::validate_max_completion_tokens(self.inner.max_completion_tokens)?;
331 validate::validate_n(self.inner.n)?;
332 validate::validate_presence_penalty(self.inner.presence_penalty)?;
336 validate::validate_service_tier(&self.inner.service_tier)?;
339 validate::validate_stop(&self.inner.stop)?;
340 validate::validate_temperature(self.inner.temperature)?;
343 validate::validate_top_p(self.inner.top_p)?;
344 validate::validate_tools(&self.inner.tools.as_deref())?;
345 validate::validate_user(self.inner.user.as_deref())?;
348 validate::validate_repetition_penalty(self.get_repetition_penalty())?;
352
353 Ok(())
354 }
355}