Skip to main content

ferrum_testkit/op_diff/
kv_cache_append.rs

1//! `kv_cache_append_head_major` op-diff harness — see `crate::op_diff`.
2//!
3//! Appends `new_tokens` head-major K/V vectors into a pre-filled cache at
4//! position `cache_len`. Pure data movement (exact); the compared output is
5//! `[cache_k, cache_v]` after the append.
6
7use super::{random_vec, OpUnderTest, Output};
8
9pub struct KvCacheAppendOp {
10    pub nkv: usize,
11    pub hd: usize,
12    pub capacity: usize,
13    pub cache_len: usize,
14    pub new_tokens: usize,
15}
16
17impl KvCacheAppendOp {
18    fn cache_elems(&self) -> usize {
19        self.nkv * self.capacity * self.hd
20    }
21    fn new_elems(&self) -> usize {
22        self.nkv * self.new_tokens * self.hd
23    }
24}
25
26macro_rules! run_backend {
27    ($B:ty, $self:expr, $seed:expr) => {{
28        use ferrum_kernels::backend::Backend;
29        let ck = random_vec($self.cache_elems(), -1.0, 1.0, $seed);
30        let cv = random_vec($self.cache_elems(), -1.0, 1.0, $seed.wrapping_add(1));
31        let nk = random_vec($self.new_elems(), -2.0, 2.0, $seed.wrapping_add(2));
32        let nv = random_vec($self.new_elems(), -2.0, 2.0, $seed.wrapping_add(3));
33        let mut ctx = <$B>::new_context();
34        let mut cache_k = <$B>::from_slice(&ck);
35        let mut cache_v = <$B>::from_slice(&cv);
36        let new_k = <$B>::from_slice(&nk);
37        let new_v = <$B>::from_slice(&nv);
38        <$B>::kv_cache_append_head_major(
39            &mut ctx,
40            &mut cache_k,
41            &mut cache_v,
42            $self.cache_len,
43            $self.capacity,
44            &new_k,
45            &new_v,
46            $self.new_tokens,
47            $self.nkv,
48            $self.hd,
49        );
50        <$B>::sync(&mut ctx);
51        let mut out = <$B>::to_vec(&cache_k, $self.cache_elems());
52        out.extend(<$B>::to_vec(&cache_v, $self.cache_elems()));
53        out
54    }};
55}
56
57impl OpUnderTest for KvCacheAppendOp {
58    fn name(&self) -> &str {
59        "kv_cache_append"
60    }
61
62    fn run_cpu(&self, seed: u64) -> Output {
63        run_backend!(ferrum_kernels::backend::cpu::CpuBackend, self, seed)
64    }
65
66    #[cfg(all(target_os = "macos", feature = "metal"))]
67    fn run_metal(&self, seed: u64) -> Output {
68        run_backend!(ferrum_kernels::backend::metal::MetalBackend, self, seed)
69    }
70
71    #[cfg(feature = "cuda")]
72    fn run_cuda(&self, seed: u64) -> Output {
73        run_backend!(ferrum_kernels::backend::cuda::CudaBackend, self, seed)
74    }
75}