Skip to main content

cbtop/paged_kv/
types.rs

1//! Core types for paged KV cache: identifiers, blocks, sequences, errors, and strategies.
2
3use std::fmt;
4use std::sync::atomic::{AtomicU32, Ordering};
5use std::time::Instant;
6
7/// Block identifier (index into physical_blocks).
8#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
9pub struct BlockId(pub u32);
10
11impl fmt::Display for BlockId {
12    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
13        write!(f, "B{}", self.0)
14    }
15}
16
17/// Sequence identifier (request ID).
18#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
19pub struct SeqId(pub u64);
20
21impl fmt::Display for SeqId {
22    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
23        write!(f, "S{}", self.0)
24    }
25}
26
27/// KV cache eviction strategies.
28#[derive(Debug, Clone, PartialEq)]
29pub enum EvictionStrategy {
30    /// Least Recently Used
31    LRU,
32    /// Least Frequently Used
33    LFU,
34    /// Evict longest sequences first
35    LongestFirst,
36    /// Evict by priority (preempt low-priority requests)
37    Priority { levels: usize },
38    /// StreamingLLM: keep sink tokens + recent window
39    StreamingLLM {
40        sink_tokens: usize,
41        window_tokens: usize,
42    },
43}
44
45impl Default for EvictionStrategy {
46    fn default() -> Self {
47        EvictionStrategy::LRU
48    }
49}
50
51impl fmt::Display for EvictionStrategy {
52    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53        match self {
54            EvictionStrategy::LRU => write!(f, "LRU"),
55            EvictionStrategy::LFU => write!(f, "LFU"),
56            EvictionStrategy::LongestFirst => write!(f, "LongestFirst"),
57            EvictionStrategy::Priority { levels } => write!(f, "Priority({})", levels),
58            EvictionStrategy::StreamingLLM {
59                sink_tokens,
60                window_tokens,
61            } => {
62                write!(
63                    f,
64                    "StreamingLLM(sink={}, window={})",
65                    sink_tokens, window_tokens
66                )
67            }
68        }
69    }
70}
71
72/// Single KV cache block (logical representation).
73///
74/// In a real implementation, this would contain GPU buffers:
75/// ```ignore
76/// pub keys: DeviceBuffer<f16>,   // [block_size, num_heads, head_dim]
77/// pub values: DeviceBuffer<f16>, // [block_size, num_heads, head_dim]
78/// ```
79#[derive(Debug)]
80pub struct KvBlock {
81    /// Block ID
82    pub id: BlockId,
83    /// Number of tokens stored in this block (0 to block_size)
84    pub num_tokens: usize,
85    /// Reference count (for copy-on-write)
86    pub ref_count: AtomicU32,
87    /// Block size (max tokens)
88    pub capacity: usize,
89}
90
91impl KvBlock {
92    /// Create a new empty block.
93    pub fn new(id: BlockId, capacity: usize) -> Self {
94        Self {
95            id,
96            num_tokens: 0,
97            ref_count: AtomicU32::new(1),
98            capacity,
99        }
100    }
101
102    /// Check if block is full.
103    pub fn is_full(&self) -> bool {
104        self.num_tokens >= self.capacity
105    }
106
107    /// Remaining capacity.
108    pub fn remaining(&self) -> usize {
109        self.capacity.saturating_sub(self.num_tokens)
110    }
111
112    /// Get reference count.
113    pub fn refs(&self) -> u32 {
114        self.ref_count.load(Ordering::Acquire)
115    }
116
117    /// Increment reference count.
118    pub fn inc_ref(&self) {
119        self.ref_count.fetch_add(1, Ordering::AcqRel);
120    }
121
122    /// Decrement reference count, returns true if count reached zero.
123    pub fn dec_ref(&self) -> bool {
124        self.ref_count.fetch_sub(1, Ordering::AcqRel) == 1
125    }
126}
127
128/// Sequence metadata for tracking access patterns.
129#[derive(Debug, Clone)]
130pub struct SequenceInfo {
131    /// Sequence ID
132    pub seq_id: SeqId,
133    /// Total tokens in sequence
134    pub num_tokens: usize,
135    /// Blocks allocated to this sequence
136    pub block_ids: Vec<BlockId>,
137    /// Last access timestamp
138    pub last_access: Instant,
139    /// Access count (for LFU)
140    pub access_count: u64,
141    /// Priority level (for priority-based eviction)
142    pub priority: u32,
143}
144
145impl SequenceInfo {
146    /// Create new sequence info.
147    pub fn new(seq_id: SeqId) -> Self {
148        Self {
149            seq_id,
150            num_tokens: 0,
151            block_ids: Vec::new(),
152            last_access: Instant::now(),
153            access_count: 0,
154            priority: 0,
155        }
156    }
157
158    /// Update access timestamp.
159    pub fn touch(&mut self) {
160        self.last_access = Instant::now();
161        self.access_count += 1;
162    }
163
164    /// Memory usage in blocks.
165    pub fn num_blocks(&self) -> usize {
166        self.block_ids.len()
167    }
168}
169
170/// PagedKvCache error types.
171#[derive(Debug, Clone)]
172pub enum PagedKvError {
173    /// Out of memory (no free blocks)
174    OutOfMemory { requested: usize, available: usize },
175    /// Sequence not found
176    SequenceNotFound(SeqId),
177    /// Block not found
178    BlockNotFound(BlockId),
179    /// Invalid operation
180    InvalidOperation(String),
181}
182
183impl fmt::Display for PagedKvError {
184    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
185        match self {
186            PagedKvError::OutOfMemory {
187                requested,
188                available,
189            } => {
190                write!(
191                    f,
192                    "Out of memory: requested {} blocks, {} available",
193                    requested, available
194                )
195            }
196            PagedKvError::SequenceNotFound(seq_id) => {
197                write!(f, "Sequence not found: {}", seq_id)
198            }
199            PagedKvError::BlockNotFound(block_id) => {
200                write!(f, "Block not found: {}", block_id)
201            }
202            PagedKvError::InvalidOperation(msg) => {
203                write!(f, "Invalid operation: {}", msg)
204            }
205        }
206    }
207}
208
209impl std::error::Error for PagedKvError {}
210
211/// Result type for PagedKvCache operations.
212pub type PagedKvResult<T> = Result<T, PagedKvError>;
213
214/// Cache statistics.
215#[derive(Debug, Clone, Default)]
216pub struct CacheStats {
217    /// Total allocations
218    pub total_allocations: u64,
219    /// Total frees
220    pub total_frees: u64,
221    /// Total evictions
222    pub total_evictions: u64,
223    /// Total forks (copy-on-write)
224    pub total_forks: u64,
225    /// Peak blocks used
226    pub peak_blocks_used: usize,
227}