dynamo_llm/protocols/common/
llm_backend.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use serde::{Deserialize, Serialize};
17
18use crate::protocols::TokenIdType;
19
20pub type TokenType = Option<String>;
21pub type LogProbs = Vec<f64>;
22
23pub use super::preprocessor::PreprocessedRequest;
24pub use super::FinishReason;
25
26#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
27pub struct BackendOutput {
28    /// New token_ids generated from the LLM Engine
29    pub token_ids: Vec<TokenIdType>,
30
31    /// Unlike [`LLMEngineOutput::tokens`], this is a vector of tokens, not an optional.
32    /// The size of this vector should be the same as the size of `token_ids`.
33    pub tokens: Vec<TokenType>,
34
35    /// Decoded text from the list tokens.
36    pub text: Option<String>,
37
38    /// Optional cumulative log probabilities
39    pub cum_log_probs: Option<f64>,
40
41    /// Optional log probabilities
42    pub log_probs: Option<LogProbs>,
43
44    // TODO: Enrich this with more information as can apply our first-level postprocessing
45    // logic and return more detailed information
46    pub finish_reason: Option<FinishReason>,
47    // Model Deployment Card checksum
48    //pub mdcsum: String,
49}
50
51/// The LLM engine and backnd with manage it's own state, specifically translating how a
52/// given request/slot is managed on that particular backend.
53///
54/// For nvLLM's purpose, it has a single tracable request_id as part of it's context that
55/// has propaged through the service pipeline to the backend.
56///
57/// This is the minimal raw output from the LLM engine. The Backend may then apply multiple
58/// levels of post-processing before the BackendOutput is returns
59#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
60pub struct LLMEngineOutput {
61    // new token_ids
62    pub token_ids: Vec<TokenIdType>,
63
64    /// If the LLM Engine performs the detokenization, then this will have a Some of the detokenized
65    /// text/tokens. If this value is None, then the Backend is responsible for detokenization.
66    pub tokens: Option<Vec<TokenType>>,
67
68    // decoded text -
69    pub text: Option<String>,
70
71    /// cumulative log probabilities
72    pub cum_log_probs: Option<f64>,
73
74    /// Optional log probabilities
75    pub log_probs: Option<LogProbs>,
76
77    // TODO: Enrich this with more information as can apply our first-level postprocessing
78    // logic and return more detailed information
79    pub finish_reason: Option<FinishReason>,
80}
81
82impl LLMEngineOutput {
83    pub fn cancelled() -> Self {
84        LLMEngineOutput {
85            token_ids: vec![],
86            tokens: None,
87            text: None,
88            cum_log_probs: None,
89            log_probs: None,
90            finish_reason: Some(FinishReason::Cancelled),
91        }
92    }
93
94    pub fn stop() -> Self {
95        LLMEngineOutput {
96            token_ids: vec![],
97            tokens: None,
98            text: None,
99            cum_log_probs: None,
100            log_probs: None,
101            finish_reason: Some(FinishReason::Stop),
102        }
103    }
104
105    pub fn length() -> Self {
106        LLMEngineOutput {
107            token_ids: vec![],
108            tokens: None,
109            text: None,
110            cum_log_probs: None,
111            log_probs: None,
112            finish_reason: Some(FinishReason::Length),
113        }
114    }
115
116    pub fn error(err_msg: String) -> Self {
117        LLMEngineOutput {
118            token_ids: vec![],
119            tokens: None,
120            text: None,
121            cum_log_probs: None,
122            log_probs: None,
123            finish_reason: Some(FinishReason::Error(err_msg)),
124        }
125    }
126}