cjc_runtime/scratchpad.rs
1//! KV-cache scratchpad -- zero-allocation state persistence for transformer inference.
2//!
3//! Provides [`Scratchpad`], a pre-allocated linear buffer for appending
4//! key/value token vectors without per-token heap allocation. The entire
5//! `[max_seq_len, dim]` storage is allocated once at construction; subsequent
6//! [`append`](Scratchpad::append) calls copy data into existing storage.
7//!
8//! # NoGC guarantee
9//!
10//! After construction, `append` performs no heap allocation -- it writes
11//! directly into the pre-allocated [`Buffer`]. The [`as_tensor`](Scratchpad::as_tensor)
12//! method returns a zero-copy view via `Rc` clone of the underlying buffer.
13//!
14//! # Relationship to [`PagedKvCache`](crate::paged_kv::PagedKvCache)
15//!
16//! `Scratchpad` uses a single contiguous buffer (simpler, better for small
17//! sequences). [`PagedKvCache`](crate::paged_kv::PagedKvCache) uses block
18//! paging (better for large sequences where contiguous allocation may
19//! fragment).
20
21use std::fmt;
22
23use crate::buffer::Buffer;
24use crate::error::RuntimeError;
25use crate::tensor::Tensor;
26
27// ---------------------------------------------------------------------------
28// 2b. KV-Cache Scratchpad (Zero-Allocation State Persistence)
29// ---------------------------------------------------------------------------
30
31/// A pre-allocated scratch buffer for KV-cache. Allows appending new
32/// key/value vectors without re-allocation, up to a fixed `max_seq_len`.
33///
34/// Layout: `[max_seq_len, dim]` with a `current_len` cursor.
35/// All memory is allocated once at construction; `append` only copies
36/// new data into existing storage (zero GC pressure per token).
37#[derive(Debug, Clone)]
38pub struct Scratchpad {
39 /// Underlying tensor of shape `[max_seq_len, dim]`.
40 buffer: Buffer<f64>,
41 /// Maximum sequence length (pre-allocated).
42 max_seq_len: usize,
43 /// Hidden dimension per token.
44 dim: usize,
45 /// Current number of tokens stored.
46 current_len: usize,
47}
48
49impl Scratchpad {
50 /// Create a new scratchpad pre-allocated for `max_seq_len` tokens of
51 /// dimension `dim`. Zero-fills all storage upfront.
52 pub fn new(max_seq_len: usize, dim: usize) -> Self {
53 Scratchpad {
54 buffer: Buffer::alloc(max_seq_len * dim, 0.0),
55 max_seq_len,
56 dim,
57 current_len: 0,
58 }
59 }
60
61 /// Number of tokens currently stored.
62 pub fn len(&self) -> usize {
63 self.current_len
64 }
65
66 /// Whether no tokens are stored.
67 pub fn is_empty(&self) -> bool {
68 self.current_len == 0
69 }
70
71 /// Maximum sequence length this scratchpad can hold.
72 pub fn capacity(&self) -> usize {
73 self.max_seq_len
74 }
75
76 /// Hidden dimension per token.
77 pub fn dim(&self) -> usize {
78 self.dim
79 }
80
81 /// Append a single token vector `[dim]` to the cache.
82 /// Returns an error if the cache is full. **Zero allocation.**
83 pub fn append(&mut self, token_vec: &[f64]) -> Result<(), RuntimeError> {
84 if token_vec.len() != self.dim {
85 return Err(RuntimeError::ShapeMismatch {
86 expected: self.dim,
87 got: token_vec.len(),
88 });
89 }
90 if self.current_len >= self.max_seq_len {
91 return Err(RuntimeError::InvalidOperation(
92 format!(
93 "Scratchpad full: {} / {} tokens",
94 self.current_len, self.max_seq_len
95 ),
96 ));
97 }
98 let base = self.current_len * self.dim;
99 self.buffer.make_unique();
100 for (i, &val) in token_vec.iter().enumerate() {
101 self.buffer.set(base + i, val)?;
102 }
103 self.current_len += 1;
104 Ok(())
105 }
106
107 /// Append a batch of token vectors from a tensor of shape `[n, dim]`.
108 /// **Zero allocation** — writes directly into pre-allocated storage.
109 pub fn append_tensor(&mut self, t: &Tensor) -> Result<(), RuntimeError> {
110 if t.ndim() != 2 || t.shape()[1] != self.dim {
111 return Err(RuntimeError::InvalidOperation(
112 format!(
113 "append_tensor: expected shape [n, {}], got {:?}",
114 self.dim,
115 t.shape()
116 ),
117 ));
118 }
119 let n = t.shape()[0];
120 if self.current_len + n > self.max_seq_len {
121 return Err(RuntimeError::InvalidOperation(
122 format!(
123 "Scratchpad overflow: {} + {} > {} max",
124 self.current_len, n, self.max_seq_len
125 ),
126 ));
127 }
128 let data = t.to_vec();
129 self.buffer.make_unique();
130 let base = self.current_len * self.dim;
131 for (i, &val) in data.iter().enumerate() {
132 self.buffer.set(base + i, val)?;
133 }
134 self.current_len += n;
135 Ok(())
136 }
137
138 /// Get a Tensor view `[current_len, dim]` of the stored data.
139 /// Shares the underlying buffer (zero-copy).
140 pub fn as_tensor(&self) -> Tensor {
141 let shape = vec![self.current_len, self.dim];
142 Tensor {
143 buffer: self.buffer.clone(), // Rc clone, not data copy
144 shape: shape.clone(),
145 strides: Tensor::compute_strides(&shape),
146 offset: 0,
147 }
148 }
149
150 /// Reset the cache to empty without deallocating.
151 /// The underlying buffer is retained for reuse.
152 pub fn clear(&mut self) {
153 self.current_len = 0;
154 }
155}
156
157impl fmt::Display for Scratchpad {
158 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
159 write!(
160 f,
161 "Scratchpad(len={}, capacity={}, dim={})",
162 self.current_len, self.max_seq_len, self.dim
163 )
164 }
165}
166