dynamo_llm/protocols/common/
preprocessor.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use derive_builder::Builder;
5use serde::{Deserialize, Serialize};
6
7use super::{OutputOptions, SamplingOptions, StopConditions};
8use crate::kv_router::RouterConfigOverride;
9use crate::protocols::TokenIdType;
10
11/// [`PreprocessedRequest`] is the internal representation of an LLM request. The [`dynamo.llm-preprocessor`]
12/// crate is responsible for converting request from the public APIs to this internal representation.
13#[derive(Serialize, Deserialize, Debug, Clone, Builder)]
14pub struct PreprocessedRequest {
15    /// ID of the model to use
16    pub model: String,
17
18    /// Type of prompt
19    pub token_ids: Vec<TokenIdType>,
20
21    /// Batch Token Ids = for batch completion requests (i.e using ArrayOfIntegerArray type from OpenAI /completions)
22    #[builder(default)]
23    pub batch_token_ids: Option<Vec<Vec<TokenIdType>>>,
24
25    /// StopConditions are conditions that the inference engine will use to stop generation.
26    pub stop_conditions: StopConditions,
27
28    /// SamplingOptions directs the inference engine to use sampling instead of greedy decoding.
29    /// More documentation on how and on the order in which sampling options are applied
30    /// are needed.
31    pub sampling_options: SamplingOptions,
32
33    /// OutputOptions are options that control the output of the inference engine such as whether
34    /// to return log probabilities, or whether to skip special tokens in output.
35    pub output_options: OutputOptions,
36
37    /// The EOS token ID(s) for the Model
38    /// Not every backend needs this, but those that do can find it here.
39    /// TODO - refactor this to a better location
40    #[builder(default)]
41    pub eos_token_ids: Vec<TokenIdType>,
42
43    /// The computed checksum of the Model Deployment Card (MDC).
44    #[builder(default)]
45    pub mdc_sum: Option<String>,
46
47    /// User requested annotations for the request
48    #[builder(default)]
49    pub annotations: Vec<String>,
50
51    /// Estimated number of prefix hit tokens (only used in kv aware routing)
52    #[builder(default)]
53    pub estimated_prefix_hit_num_blocks: Option<u32>,
54
55    /// Targeted backend instance ID for the request
56    #[builder(default)]
57    pub backend_instance_id: Option<i64>,
58
59    /// Router configuration overrides for this specific request
60    #[builder(default)]
61    pub router_config_override: Option<RouterConfigOverride>,
62}
63
64impl PreprocessedRequest {
65    pub fn has_annotation(&self, annotation: &str) -> bool {
66        self.annotations.contains(&annotation.to_string())
67    }
68}
69
70impl PreprocessedRequest {
71    pub fn builder() -> PreprocessedRequestBuilder {
72        PreprocessedRequestBuilder::default()
73    }
74}
75
76/// [`PreprocessedEmbeddingRequest`] is the internal representation of an embedding request
77/// after preprocessing. Contains tokenized input ready for embedding engines.
78#[derive(Serialize, Deserialize, Debug, Clone, Builder)]
79pub struct PreprocessedEmbeddingRequest {
80    /// Tokenized input text as token IDs (one Vec per input text)
81    pub token_ids: Vec<Vec<TokenIdType>>,
82
83    /// Model to use for embedding
84    pub model: String,
85
86    /// Encoding format preference
87    pub encoding_format: Option<String>,
88
89    /// Number of dimensions for output embeddings (if supported)
90    pub dimensions: Option<u32>,
91
92    /// The computed checksum of the Model Deployment Card (MDC)
93    #[builder(default)]
94    pub mdc_sum: Option<String>,
95
96    /// User requested annotations for the request
97    #[builder(default)]
98    pub annotations: Vec<String>,
99}
100
101impl PreprocessedEmbeddingRequest {
102    pub fn has_annotation(&self, annotation: &str) -> bool {
103        self.annotations.contains(&annotation.to_string())
104    }
105}
106
107impl PreprocessedEmbeddingRequest {
108    pub fn builder() -> PreprocessedEmbeddingRequestBuilder {
109        PreprocessedEmbeddingRequestBuilder::default()
110    }
111}