dynamo_llm/kv_router/
protocols.rs

1// SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2// SPDX-License-Identifier: Apache-2.0
3//
4// Licensed under the Apache License, Version 2.0 (the "License");
5// you may not use this file except in compliance with the License.
6// You may obtain a copy of the License at
7//
8// http://www.apache.org/licenses/LICENSE-2.0
9//
10// Unless required by applicable law or agreed to in writing, software
11// distributed under the License is distributed on an "AS IS" BASIS,
12// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13// See the License for the specific language governing permissions and
14// limitations under the License.
15
16use crate::tokens::Token;
17use serde::{Deserialize, Serialize};
18
19#[derive(Debug, Clone, Serialize, Deserialize, Default)]
20pub struct RouterRequest {
21    pub tokens: Vec<Token>,
22}
23
24#[derive(Debug, Clone, Serialize, Deserialize, Default)]
25pub struct RouterResponse {
26    pub worker_id: i64,
27}
28
29#[derive(Debug)]
30pub struct WorkerSelectionResult {
31    /// The worker id of the selected worker
32    pub worker_id: i64,
33
34    /// The total number of blocks required to prefill the request
35    pub required_blocks: u64,
36
37    /// The number of blocks that the selected worker may already have cached.
38    /// This is not a guarantee, but an estimate.
39    pub overlap_blocks: usize,
40}
41
42#[derive(Debug, Clone, Serialize, Deserialize, Default)]
43pub struct ForwardPassMetrics {
44    pub request_active_slots: u64,
45    pub request_total_slots: u64,
46    pub kv_active_blocks: u64,
47    pub kv_total_blocks: u64,
48    // integer from 0 to large number
49    pub num_requests_waiting: u64,
50    // percentage represented as a float from 0 to 1
51    pub gpu_cache_usage_perc: f32,
52    // percentage represented as a float from 0 to 1
53    pub gpu_prefix_cache_hit_rate: f32,
54}
55
56/// A [`LocalBlockHash`] is a hash computed from the tokens_ids, extra_token_ids and the optional
57/// lora_id of a block.
58#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Ord, PartialOrd)]
59pub struct LocalBlockHash(pub u64);
60
61/// A sequence aware hash of a block where the hash is computed from the tokens_ids, extra_token_ids
62/// and the optional lora_id of a block, PLUS the hash of the parent block.
63///
64/// In this case, the hashing function is external and unknown.
65#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Ord, PartialOrd)]
66pub struct ExternalSequenceBlockHash(pub u64);
67
68/// Represents a collection of cache events and a shutdown flag.
69#[derive(Serialize, Deserialize, Debug, Clone)]
70pub struct KvCacheEvents {
71    /// A list of cache events.
72    pub events: Vec<KvCacheEvent>,
73    /// A flag indicating whether the cache is shutting down.
74    pub shutdown: bool,
75}
76
77/// Represents a single cache event with an ID and associated data.
78#[derive(Serialize, Deserialize, Debug, Clone)]
79pub struct KvCacheEvent {
80    /// The unique identifier of the event.
81    pub event_id: u64,
82    /// The data associated with the event.
83    pub data: KvCacheEventData,
84}
85
86/// Represents the data associated with a cache event.
87///
88/// Data is either stored or removed.
89#[derive(Serialize, Deserialize, Debug, Clone)]
90#[serde(rename_all = "snake_case")]
91pub enum KvCacheEventData {
92    /// Data for a stored cache event.
93    Stored(KvCacheStoreData),
94    /// Data for a removed cache event.
95    Removed(KvCacheRemoveData),
96}
97
98/// Represents the data associated with a stored cache event.
99#[derive(Serialize, Deserialize, Debug, Clone)]
100pub struct KvCacheStoreData {
101    /// The optional hash of the parent block.
102    pub parent_hash: Option<ExternalSequenceBlockHash>,
103    /// A list of stored blocked data.
104    pub blocks: Vec<KvCacheStoredBlockData>,
105}
106
107/// Represents data for a stored block.
108#[derive(Serialize, Deserialize, Debug, Clone)]
109pub struct KvCacheStoredBlockData {
110    /// The hash of the block.
111    pub block_hash: ExternalSequenceBlockHash,
112    /// The hash of the tokens in the block.
113    pub tokens_hash: LocalBlockHash,
114}
115
116/// Represents the data associated with a removed cache event.
117#[derive(Serialize, Deserialize, Debug, Clone)]
118pub struct KvCacheRemoveData {
119    /// A list of block hashes to remove.
120    pub block_hashes: Vec<ExternalSequenceBlockHash>,
121}
122
123impl Serialize for LocalBlockHash {
124    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
125    where
126        S: serde::Serializer,
127    {
128        serializer.serialize_u64(self.0)
129    }
130}
131
132impl<'de> Deserialize<'de> for LocalBlockHash {
133    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
134    where
135        D: serde::Deserializer<'de>,
136    {
137        let value = u64::deserialize(deserializer)?;
138        Ok(LocalBlockHash(value))
139    }
140}
141
142impl Serialize for ExternalSequenceBlockHash {
143    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
144    where
145        S: serde::Serializer,
146    {
147        serializer.serialize_u64(self.0)
148    }
149}
150
151impl<'de> Deserialize<'de> for ExternalSequenceBlockHash {
152    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
153    where
154        D: serde::Deserializer<'de>,
155    {
156        let value = u64::deserialize(deserializer)?;
157        Ok(ExternalSequenceBlockHash(value))
158    }
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164    use serde_json;
165
166    #[test]
167    fn test_local_block_hash_serialization() {
168        let hash = LocalBlockHash(12345);
169        let serialized = serde_json::to_string(&hash).unwrap();
170        assert_eq!(serialized, "12345");
171
172        let deserialized: LocalBlockHash = serde_json::from_str(&serialized).unwrap();
173        assert_eq!(deserialized, hash);
174    }
175
176    #[test]
177    fn test_external_sequence_block_hash_serialization() {
178        let hash = ExternalSequenceBlockHash(67890);
179        let serialized = serde_json::to_string(&hash).unwrap();
180        assert_eq!(serialized, "67890");
181
182        let deserialized: ExternalSequenceBlockHash = serde_json::from_str(&serialized).unwrap();
183        assert_eq!(deserialized, hash);
184    }
185
186    #[test]
187    fn test_kv_cache_events_serialization() {
188        let event_data = KvCacheEventData::Stored(KvCacheStoreData {
189            parent_hash: Some(ExternalSequenceBlockHash(1)),
190            blocks: vec![KvCacheStoredBlockData {
191                block_hash: ExternalSequenceBlockHash(2),
192                tokens_hash: LocalBlockHash(3),
193            }],
194        });
195
196        let event = KvCacheEvent {
197            event_id: 1,
198            data: event_data,
199        };
200
201        let events = KvCacheEvents {
202            events: vec![event],
203            shutdown: false,
204        };
205
206        let serialized = serde_json::to_string(&events).unwrap();
207        let deserialized: KvCacheEvents = serde_json::from_str(&serialized).unwrap();
208
209        assert_eq!(deserialized.events.len(), 1);
210        assert_eq!(deserialized.events[0].event_id, 1);
211        if let KvCacheEventData::Stored(store_data) = &deserialized.events[0].data {
212            assert_eq!(store_data.parent_hash.unwrap().0, 1);
213            assert_eq!(store_data.blocks.len(), 1);
214            assert_eq!(store_data.blocks[0].block_hash.0, 2);
215            assert_eq!(store_data.blocks[0].tokens_hash.0, 3);
216        } else {
217            panic!("Expected KvCacheEventData::Stored variant");
218        }
219        assert!(!deserialized.shutdown);
220    }
221
222    #[test]
223    fn test_kv_cache_remove_data_serialization() {
224        let remove_data = KvCacheRemoveData {
225            block_hashes: vec![ExternalSequenceBlockHash(4), ExternalSequenceBlockHash(5)],
226        };
227
228        let serialized = serde_json::to_string(&remove_data).unwrap();
229        let deserialized: KvCacheRemoveData = serde_json::from_str(&serialized).unwrap();
230
231        assert_eq!(deserialized.block_hashes.len(), 2);
232        assert_eq!(deserialized.block_hashes[0].0, 4);
233        assert_eq!(deserialized.block_hashes[1].0, 5);
234    }
235}