dynamo_llm/protocols/common/
preprocessor.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use derive_builder::Builder;
17use serde::{Deserialize, Serialize};
18
19use super::{SamplingOptions, StopConditions};
20use crate::protocols::TokenIdType;
21
22/// [`PreprocessedRequest`] is the internal representation of an LLM request. The [`dynamo.llm-preprocessor`]
23/// crate is responsible for converting request from the public APIs to this internal representation.
24#[derive(Serialize, Deserialize, Debug, Clone, Builder)]
25pub struct PreprocessedRequest {
26    /// Type of prompt
27    pub token_ids: Vec<TokenIdType>,
28
29    /// StopConditions are conditions that the inference engine will use to stop generation.
30    pub stop_conditions: StopConditions,
31
32    /// SamplingOptions directs the inference engine to use sampling instead of greedy decoding.
33    /// More documentation on how and on the order in which sampling options are applied
34    /// are needed.
35    pub sampling_options: SamplingOptions,
36
37    /// The EOS token ID(s) for the Model
38    /// Not every backend needs this, but those that do can find it here.
39    /// TODO - refactor this to a better location
40    #[builder(default)]
41    pub eos_token_ids: Vec<TokenIdType>,
42
43    /// The computed checksum of the Model Deployment Card (MDC).
44    #[builder(default)]
45    pub mdc_sum: Option<String>,
46
47    /// User requested annotations for the request
48    #[builder(default)]
49    pub annotations: Vec<String>,
50
51    /// Estimated number of prefix hit tokens (only used in kv aware routing)
52    #[builder(default)]
53    pub estimated_prefix_hit_num_blocks: Option<u32>,
54}
55
56impl PreprocessedRequest {
57    pub fn has_annotation(&self, annotation: &str) -> bool {
58        self.annotations.contains(&annotation.to_string())
59    }
60}
61
62impl PreprocessedRequest {
63    pub fn builder() -> PreprocessedRequestBuilder {
64        PreprocessedRequestBuilder::default()
65    }
66}