Skip to main content

oxillama_runtime/
kv_pool.rs

1//! Pooled KV-cache page allocator.
2//!
3//! Provides a free-list based page pool so that KV cache memory can be
4//! recycled across requests without returning it to the allocator.
5//! Pages are fixed-size slabs of `f32` values.  The pool never frees pages
6//! until it is itself dropped.
7
8/// A pool of KV-cache pages.
9///
10/// Pages are fixed-size slabs of `f32` data backed by a single `Vec` per page.
11/// The pool uses a simple free-list: allocated pages are handed out by
12/// returning their index, and freed pages are pushed back onto the list.
13///
14/// The pool never shrinks — once a page is allocated it lives until the pool
15/// is dropped.
16pub struct KvCachePool {
17    /// All allocated pages, indexed by page index.
18    pages: Vec<Box<[f32]>>,
19    /// Indices of pages currently not in use.
20    free_list: Vec<usize>,
21    /// Number of `f32` elements per page.
22    page_size: usize,
23}
24
25impl KvCachePool {
26    /// Create a new pool with `initial_pages` pre-allocated pages of
27    /// `page_size` `f32` elements each.
28    ///
29    /// All pages start on the free list, ready for immediate allocation.
30    pub fn new(page_size: usize, initial_pages: usize) -> Self {
31        let mut pages = Vec::with_capacity(initial_pages);
32        let mut free_list = Vec::with_capacity(initial_pages);
33
34        for i in 0..initial_pages {
35            pages.push(vec![0.0f32; page_size].into_boxed_slice());
36            free_list.push(i);
37        }
38
39        Self {
40            pages,
41            free_list,
42            page_size,
43        }
44    }
45
46    /// Allocate a page from the free list.
47    ///
48    /// Returns `Some(page_idx)` on success, or `None` if the pool is
49    /// exhausted.  The caller must pass the returned index to [`free`] when
50    /// the page is no longer needed.
51    ///
52    /// [`free`]: KvCachePool::free
53    pub fn alloc(&mut self) -> Option<usize> {
54        self.free_list.pop()
55    }
56
57    /// Return page `page_idx` to the free list.
58    ///
59    /// The page data is **not** zeroed; callers should zero or overwrite the
60    /// slice before treating it as a fresh allocation.
61    ///
62    /// # Panics
63    ///
64    /// Panics in debug builds if `page_idx >= total_pages()`.
65    pub fn free(&mut self, page_idx: usize) {
66        debug_assert!(
67            page_idx < self.pages.len(),
68            "KvCachePool::free: page_idx {page_idx} out of range (total {})",
69            self.pages.len()
70        );
71        self.free_list.push(page_idx);
72    }
73
74    /// Get an immutable slice to page `page_idx`.
75    ///
76    /// # Panics
77    ///
78    /// Panics if `page_idx >= total_pages()`.
79    pub fn page(&self, page_idx: usize) -> &[f32] {
80        &self.pages[page_idx]
81    }
82
83    /// Get a mutable slice to page `page_idx`.
84    ///
85    /// # Panics
86    ///
87    /// Panics if `page_idx >= total_pages()`.
88    pub fn page_mut(&mut self, page_idx: usize) -> &mut [f32] {
89        &mut self.pages[page_idx]
90    }
91
92    /// Total number of pages ever allocated (including those on the free list).
93    pub fn total_pages(&self) -> usize {
94        self.pages.len()
95    }
96
97    /// Number of pages currently on the free list (available for allocation).
98    pub fn free_pages(&self) -> usize {
99        self.free_list.len()
100    }
101
102    /// Number of pages currently in use (allocated but not yet freed).
103    pub fn used_pages(&self) -> usize {
104        self.pages.len() - self.free_list.len()
105    }
106
107    /// The number of `f32` elements in each page.
108    pub fn page_size(&self) -> usize {
109        self.page_size
110    }
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116
117    #[test]
118    fn test_alloc_and_free() {
119        let mut pool = KvCachePool::new(64, 4);
120        assert_eq!(pool.total_pages(), 4);
121        assert_eq!(pool.free_pages(), 4);
122        assert_eq!(pool.used_pages(), 0);
123
124        let idx0 = pool.alloc().expect("should allocate");
125        assert_eq!(pool.used_pages(), 1);
126        assert_eq!(pool.free_pages(), 3);
127
128        let idx1 = pool.alloc().expect("should allocate");
129        assert_ne!(idx0, idx1);
130
131        pool.free(idx0);
132        assert_eq!(pool.used_pages(), 1);
133        assert_eq!(pool.free_pages(), 3);
134    }
135
136    #[test]
137    fn test_pool_exhaustion_returns_none() {
138        let mut pool = KvCachePool::new(16, 2);
139        let _a = pool.alloc().expect("first alloc");
140        let _b = pool.alloc().expect("second alloc");
141        // Pool is now exhausted
142        assert!(pool.alloc().is_none());
143    }
144
145    #[test]
146    fn test_free_then_realloc() {
147        let mut pool = KvCachePool::new(8, 1);
148        let idx = pool.alloc().expect("alloc");
149        pool.free(idx);
150        // Page should be back
151        let idx2 = pool.alloc().expect("re-alloc after free");
152        assert_eq!(idx, idx2);
153    }
154
155    #[test]
156    fn test_page_read_write() {
157        let mut pool = KvCachePool::new(4, 2);
158        let idx = pool.alloc().expect("alloc");
159        {
160            let page = pool.page_mut(idx);
161            page[0] = 1.0;
162            page[1] = 2.5;
163        }
164        let page = pool.page(idx);
165        assert!((page[0] - 1.0).abs() < 1e-9);
166        assert!((page[1] - 2.5).abs() < 1e-9);
167    }
168
169    #[test]
170    fn test_page_size_accessor() {
171        let pool = KvCachePool::new(128, 0);
172        assert_eq!(pool.page_size(), 128);
173        assert_eq!(pool.total_pages(), 0);
174        assert_eq!(pool.free_pages(), 0);
175    }
176}