dynamo_llm/protocols/common/preprocessor.rs
1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3
4use derive_builder::Builder;
5use serde::{Deserialize, Serialize};
6
7use super::{OutputOptions, SamplingOptions, StopConditions};
8use crate::kv_router::RouterConfigOverride;
9use crate::protocols::TokenIdType;
10
11/// [`PreprocessedRequest`] is the internal representation of an LLM request. The [`dynamo.llm-preprocessor`]
12/// crate is responsible for converting request from the public APIs to this internal representation.
13#[derive(Serialize, Deserialize, Debug, Clone, Builder)]
14pub struct PreprocessedRequest {
15 /// ID of the model to use
16 pub model: String,
17
18 /// Type of prompt
19 pub token_ids: Vec<TokenIdType>,
20
21 /// Batch Token Ids = for batch completion requests (i.e using ArrayOfIntegerArray type from OpenAI /completions)
22 #[builder(default)]
23 pub batch_token_ids: Option<Vec<Vec<TokenIdType>>>,
24
25 /// StopConditions are conditions that the inference engine will use to stop generation.
26 pub stop_conditions: StopConditions,
27
28 /// SamplingOptions directs the inference engine to use sampling instead of greedy decoding.
29 /// More documentation on how and on the order in which sampling options are applied
30 /// are needed.
31 pub sampling_options: SamplingOptions,
32
33 /// OutputOptions are options that control the output of the inference engine such as whether
34 /// to return log probabilities, or whether to skip special tokens in output.
35 pub output_options: OutputOptions,
36
37 /// The EOS token ID(s) for the Model
38 /// Not every backend needs this, but those that do can find it here.
39 /// TODO - refactor this to a better location
40 #[builder(default)]
41 pub eos_token_ids: Vec<TokenIdType>,
42
43 /// The computed checksum of the Model Deployment Card (MDC).
44 #[builder(default)]
45 pub mdc_sum: Option<String>,
46
47 /// User requested annotations for the request
48 #[builder(default)]
49 pub annotations: Vec<String>,
50
51 /// Estimated number of prefix hit tokens (only used in kv aware routing)
52 #[builder(default)]
53 pub estimated_prefix_hit_num_blocks: Option<u32>,
54
55 /// Targeted backend instance ID for the request
56 #[builder(default)]
57 pub backend_instance_id: Option<i64>,
58
59 /// Router configuration overrides for this specific request
60 #[builder(default)]
61 pub router_config_override: Option<RouterConfigOverride>,
62}
63
64impl PreprocessedRequest {
65 pub fn has_annotation(&self, annotation: &str) -> bool {
66 self.annotations.contains(&annotation.to_string())
67 }
68}
69
70impl PreprocessedRequest {
71 pub fn builder() -> PreprocessedRequestBuilder {
72 PreprocessedRequestBuilder::default()
73 }
74}
75
76/// [`PreprocessedEmbeddingRequest`] is the internal representation of an embedding request
77/// after preprocessing. Contains tokenized input ready for embedding engines.
78#[derive(Serialize, Deserialize, Debug, Clone, Builder)]
79pub struct PreprocessedEmbeddingRequest {
80 /// Tokenized input text as token IDs (one Vec per input text)
81 pub token_ids: Vec<Vec<TokenIdType>>,
82
83 /// Model to use for embedding
84 pub model: String,
85
86 /// Encoding format preference
87 pub encoding_format: Option<String>,
88
89 /// Number of dimensions for output embeddings (if supported)
90 pub dimensions: Option<u32>,
91
92 /// The computed checksum of the Model Deployment Card (MDC)
93 #[builder(default)]
94 pub mdc_sum: Option<String>,
95
96 /// User requested annotations for the request
97 #[builder(default)]
98 pub annotations: Vec<String>,
99}
100
101impl PreprocessedEmbeddingRequest {
102 pub fn has_annotation(&self, annotation: &str) -> bool {
103 self.annotations.contains(&annotation.to_string())
104 }
105}
106
107impl PreprocessedEmbeddingRequest {
108 pub fn builder() -> PreprocessedEmbeddingRequestBuilder {
109 PreprocessedEmbeddingRequestBuilder::default()
110 }
111}