dynamo_llm/protocols/openai/completions/
delta.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use super::{CompletionChoice, CompletionResponse, NvCreateCompletionRequest};
17use crate::protocols::common;
18use crate::protocols::openai::CompletionUsage;
19
20impl NvCreateCompletionRequest {
21    // put this method on the request
22    // inspect the request to extract options
23    pub fn response_generator(&self) -> DeltaGenerator {
24        let options = DeltaGeneratorOptions {
25            enable_usage: true,
26            enable_logprobs: false,
27        };
28
29        DeltaGenerator::new(self.inner.model.clone(), options)
30    }
31}
32
33#[derive(Debug, Clone, Default)]
34pub struct DeltaGeneratorOptions {
35    pub enable_usage: bool,
36    pub enable_logprobs: bool,
37}
38
39#[derive(Debug, Clone)]
40pub struct DeltaGenerator {
41    id: String,
42    object: String,
43    created: u64,
44    model: String,
45    system_fingerprint: Option<String>,
46    usage: CompletionUsage,
47
48    options: DeltaGeneratorOptions,
49}
50
51impl DeltaGenerator {
52    pub fn new(model: String, options: DeltaGeneratorOptions) -> Self {
53        let now = std::time::SystemTime::now()
54            .duration_since(std::time::UNIX_EPOCH)
55            .unwrap()
56            .as_secs();
57
58        Self {
59            id: format!("cmpl-{}", uuid::Uuid::new_v4()),
60            object: "text_completion".to_string(),
61            created: now,
62            model,
63            system_fingerprint: None,
64            usage: CompletionUsage::default(),
65            options,
66        }
67    }
68
69    pub fn update_isl(&mut self, isl: i32) {
70        self.usage.prompt_tokens = isl;
71    }
72
73    pub fn create_choice(
74        &self,
75        index: u64,
76        text: Option<String>,
77        finish_reason: Option<String>,
78    ) -> CompletionResponse {
79        // todo - update for tool calling
80
81        CompletionResponse {
82            id: self.id.clone(),
83            object: self.object.clone(),
84            created: self.created,
85            model: self.model.clone(),
86            system_fingerprint: self.system_fingerprint.clone(),
87            choices: vec![CompletionChoice {
88                text: text.unwrap_or_default(),
89                index,
90                finish_reason,
91                logprobs: None,
92            }],
93            usage: if self.options.enable_usage {
94                Some(self.usage.clone())
95            } else {
96                None
97            },
98        }
99    }
100}
101
102impl crate::protocols::openai::DeltaGeneratorExt<CompletionResponse> for DeltaGenerator {
103    fn choice_from_postprocessor(
104        &mut self,
105        delta: common::llm_backend::BackendOutput,
106    ) -> anyhow::Result<CompletionResponse> {
107        // aggregate usage
108        if self.options.enable_usage {
109            self.usage.completion_tokens += delta.token_ids.len() as i32;
110        }
111
112        // todo logprobs
113
114        let finish_reason = match delta.finish_reason {
115            Some(common::FinishReason::EoS) => Some("stop".to_string()),
116            Some(common::FinishReason::Stop) => Some("stop".to_string()),
117            Some(common::FinishReason::Length) => Some("length".to_string()),
118            Some(common::FinishReason::Cancelled) => Some("cancelled".to_string()),
119            Some(common::FinishReason::Error(err_msg)) => {
120                return Err(anyhow::anyhow!(err_msg));
121            }
122            None => None,
123        };
124
125        // create choice
126        let index = 0;
127        Ok(self.create_choice(index, delta.text, finish_reason))
128    }
129
130    // TODO: This is a hack. Change `prompt_tokens` to u32
131    fn get_isl(&self) -> Option<u32> {
132        Some(self.usage.prompt_tokens as u32)
133    }
134}