1use std::fmt;
4use std::sync::atomic::{AtomicU32, Ordering};
5use std::time::Instant;
6
7#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
9pub struct BlockId(pub u32);
10
11impl fmt::Display for BlockId {
12 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
13 write!(f, "B{}", self.0)
14 }
15}
16
17#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
19pub struct SeqId(pub u64);
20
21impl fmt::Display for SeqId {
22 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
23 write!(f, "S{}", self.0)
24 }
25}
26
27#[derive(Debug, Clone, PartialEq)]
29pub enum EvictionStrategy {
30 LRU,
32 LFU,
34 LongestFirst,
36 Priority { levels: usize },
38 StreamingLLM {
40 sink_tokens: usize,
41 window_tokens: usize,
42 },
43}
44
45impl Default for EvictionStrategy {
46 fn default() -> Self {
47 EvictionStrategy::LRU
48 }
49}
50
51impl fmt::Display for EvictionStrategy {
52 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
53 match self {
54 EvictionStrategy::LRU => write!(f, "LRU"),
55 EvictionStrategy::LFU => write!(f, "LFU"),
56 EvictionStrategy::LongestFirst => write!(f, "LongestFirst"),
57 EvictionStrategy::Priority { levels } => write!(f, "Priority({})", levels),
58 EvictionStrategy::StreamingLLM {
59 sink_tokens,
60 window_tokens,
61 } => {
62 write!(
63 f,
64 "StreamingLLM(sink={}, window={})",
65 sink_tokens, window_tokens
66 )
67 }
68 }
69 }
70}
71
72#[derive(Debug)]
80pub struct KvBlock {
81 pub id: BlockId,
83 pub num_tokens: usize,
85 pub ref_count: AtomicU32,
87 pub capacity: usize,
89}
90
91impl KvBlock {
92 pub fn new(id: BlockId, capacity: usize) -> Self {
94 Self {
95 id,
96 num_tokens: 0,
97 ref_count: AtomicU32::new(1),
98 capacity,
99 }
100 }
101
102 pub fn is_full(&self) -> bool {
104 self.num_tokens >= self.capacity
105 }
106
107 pub fn remaining(&self) -> usize {
109 self.capacity.saturating_sub(self.num_tokens)
110 }
111
112 pub fn refs(&self) -> u32 {
114 self.ref_count.load(Ordering::Acquire)
115 }
116
117 pub fn inc_ref(&self) {
119 self.ref_count.fetch_add(1, Ordering::AcqRel);
120 }
121
122 pub fn dec_ref(&self) -> bool {
124 self.ref_count.fetch_sub(1, Ordering::AcqRel) == 1
125 }
126}
127
128#[derive(Debug, Clone)]
130pub struct SequenceInfo {
131 pub seq_id: SeqId,
133 pub num_tokens: usize,
135 pub block_ids: Vec<BlockId>,
137 pub last_access: Instant,
139 pub access_count: u64,
141 pub priority: u32,
143}
144
145impl SequenceInfo {
146 pub fn new(seq_id: SeqId) -> Self {
148 Self {
149 seq_id,
150 num_tokens: 0,
151 block_ids: Vec::new(),
152 last_access: Instant::now(),
153 access_count: 0,
154 priority: 0,
155 }
156 }
157
158 pub fn touch(&mut self) {
160 self.last_access = Instant::now();
161 self.access_count += 1;
162 }
163
164 pub fn num_blocks(&self) -> usize {
166 self.block_ids.len()
167 }
168}
169
170#[derive(Debug, Clone)]
172pub enum PagedKvError {
173 OutOfMemory { requested: usize, available: usize },
175 SequenceNotFound(SeqId),
177 BlockNotFound(BlockId),
179 InvalidOperation(String),
181}
182
183impl fmt::Display for PagedKvError {
184 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
185 match self {
186 PagedKvError::OutOfMemory {
187 requested,
188 available,
189 } => {
190 write!(
191 f,
192 "Out of memory: requested {} blocks, {} available",
193 requested, available
194 )
195 }
196 PagedKvError::SequenceNotFound(seq_id) => {
197 write!(f, "Sequence not found: {}", seq_id)
198 }
199 PagedKvError::BlockNotFound(block_id) => {
200 write!(f, "Block not found: {}", block_id)
201 }
202 PagedKvError::InvalidOperation(msg) => {
203 write!(f, "Invalid operation: {}", msg)
204 }
205 }
206 }
207}
208
209impl std::error::Error for PagedKvError {}
210
211pub type PagedKvResult<T> = Result<T, PagedKvError>;
213
214#[derive(Debug, Clone, Default)]
216pub struct CacheStats {
217 pub total_allocations: u64,
219 pub total_frees: u64,
221 pub total_evictions: u64,
223 pub total_forks: u64,
225 pub peak_blocks_used: usize,
227}