#[cfg(test)]
mod tests {
use crate::paged_kv::*;
#[test]
fn test_alloc_exact_page_boundary() {
let mut cache = PagedKvCache::new(10, 16, 8, 64);
let seq = cache.allocate_sequence(16).expect("alloc");
assert_eq!(cache.free_page_count(), 9);
assert_eq!(cache.stats().pages_allocated, 1);
cache.free_sequence(seq);
}
#[test]
fn test_alloc_one_over_boundary() {
let mut cache = PagedKvCache::new(10, 16, 8, 64);
let seq = cache.allocate_sequence(17).expect("alloc");
assert_eq!(cache.free_page_count(), 8);
assert_eq!(cache.stats().pages_allocated, 2);
cache.free_sequence(seq);
}
#[test]
fn test_alloc_all_pages_then_free() {
let mut cache = PagedKvCache::new(5, 16, 8, 64);
let seqs: Vec<SeqId> = (0..5)
.map(|_| cache.allocate_sequence(16).expect("alloc"))
.collect();
assert_eq!(cache.free_page_count(), 0);
assert_eq!(cache.stats().pages_allocated, 5);
let result = cache.allocate_sequence(1);
assert!(matches!(result, Err(PagedCacheError::OutOfMemory { .. })));
for seq in seqs {
cache.free_sequence(seq);
}
assert_eq!(cache.free_page_count(), 5);
let _new_seq = cache.allocate_sequence(80).expect("reallocate all 5");
assert_eq!(cache.free_page_count(), 0);
}
#[test]
fn test_alloc_single_token() {
let mut cache = PagedKvCache::new(10, 16, 8, 64);
let seq = cache.allocate_sequence(1).expect("alloc 1 token");
assert_eq!(cache.free_page_count(), 9);
cache.free_sequence(seq);
assert_eq!(cache.free_page_count(), 10);
}
#[test]
fn test_alloc_large_sequence_spanning_many_pages() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let seq = cache.allocate_sequence(250).expect("alloc 250 tokens");
assert_eq!(cache.free_page_count(), 84);
assert_eq!(cache.stats().pages_allocated, 16);
cache.free_sequence(seq);
assert_eq!(cache.free_page_count(), 100);
}
#[test]
fn test_memory_usage_increases_with_allocation() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let initial = cache.memory_usage();
assert_eq!(initial, 0);
let _seq1 = cache.allocate_sequence(16).expect("s1");
let usage1 = cache.memory_usage();
assert!(usage1 > 0);
let _seq2 = cache.allocate_sequence(32).expect("s2");
let usage2 = cache.memory_usage();
assert_eq!(usage2, usage1 * 3); }
#[test]
fn test_utilization_percentage() {
let mut cache = PagedKvCache::new(10, 16, 8, 64);
assert_eq!(cache.utilization(), 0.0);
let _seq1 = cache.allocate_sequence(16).expect("s1"); assert!((cache.utilization() - 10.0).abs() < 0.01);
let _seq2 = cache.allocate_sequence(32).expect("s2"); assert!((cache.utilization() - 30.0).abs() < 0.01);
let _seq3 = cache.allocate_sequence(112).expect("s3"); assert!((cache.utilization() - 100.0).abs() < 0.01);
}
#[test]
fn test_quantized_memory_reduction_q8() {
let fp32_cache = QuantizedPagedKvCache::new(10, 16, 8, 64, KvQuantType::FP32);
let q8_cache = QuantizedPagedKvCache::new(10, 16, 8, 64, KvQuantType::Q8);
let fp32_page = QuantizedKvPage::new(PageId::new(0), KvQuantType::FP32, 16, 8, 64);
let q8_page = QuantizedKvPage::new(PageId::new(0), KvQuantType::Q8, 16, 8, 64);
assert!(q8_page.memory_bytes() < fp32_page.memory_bytes());
assert_eq!(fp32_cache.quant_type(), KvQuantType::FP32);
assert_eq!(q8_cache.quant_type(), KvQuantType::Q8);
}
#[test]
fn test_quantized_memory_reduction_q4() {
let q8_page = QuantizedKvPage::new(PageId::new(0), KvQuantType::Q8, 16, 8, 64);
let q4_page = QuantizedKvPage::new(PageId::new(0), KvQuantType::Q4, 16, 8, 64);
assert!(q4_page.memory_bytes() < q8_page.memory_bytes());
}
#[test]
fn test_fp32_equivalent_memory_calculation() {
let mut cache = QuantizedPagedKvCache::new(100, 16, 8, 64, KvQuantType::Q8);
let _seq = cache.allocate_sequence(32).expect("alloc");
let fp32_equiv = cache.fp32_equivalent_memory();
let actual = cache.memory_usage();
assert!(actual < fp32_equiv);
}
#[test]
fn test_memory_savings_empty_cache() {
let cache = QuantizedPagedKvCache::new(100, 16, 8, 64, KvQuantType::Q8);
assert_eq!(cache.memory_savings(), 1.0);
}
#[test]
fn test_oom_error_details() {
let mut cache = PagedKvCache::new(2, 16, 8, 64);
let _seq = cache.allocate_sequence(16).expect("first alloc");
let result = cache.allocate_sequence(32); match result {
Err(PagedCacheError::OutOfMemory { needed, available }) => {
assert_eq!(needed, 2);
assert_eq!(available, 1);
},
_ => panic!("Expected OutOfMemory error"),
}
}
#[test]
fn test_sequence_not_found_in_get_tokens() {
let cache = PagedKvCache::new(10, 16, 8, 64);
let fake = SeqId::new();
let result = cache.get_sequence_tokens(fake);
match result {
Err(PagedCacheError::SequenceNotFound(id)) => {
assert_eq!(id, fake.value());
},
_ => panic!("Expected SequenceNotFound error"),
}
}
#[test]
fn test_invalid_page_access_deep_position() {
let mut cache = PagedKvCache::new(10, 16, 8, 64);
let seq = cache.allocate_sequence(16).expect("alloc");
let result = cache.get_page(seq, 1000);
match result {
Err(PagedCacheError::InvalidPageAccess { page_id, offset }) => {
assert_eq!(page_id, 62); assert_eq!(offset, 1000);
},
_ => panic!("Expected InvalidPageAccess error"),
}
}
#[test]
fn test_cow_oom_during_get_page_mut() {
let mut cache = PagedKvCache::new(2, 16, 8, 64);
let parent = cache.allocate_sequence(16).expect("parent"); cache.update_tokens(parent, 16).expect("update");
let child = cache.fork_sequence(parent).expect("fork");
let _other = cache.allocate_sequence(16).expect("other");
let result = cache.get_page_mut(child, 0);
assert!(matches!(result, Err(PagedCacheError::OutOfMemory { .. })));
}
#[test]
fn test_extend_oom() {
let mut cache = PagedKvCache::new(2, 16, 8, 64);
let seq = cache.allocate_sequence(16).expect("alloc");
cache.update_tokens(seq, 16).expect("update");
let _other = cache.allocate_sequence(16).expect("other");
let result = cache.extend(seq, 32);
match result {
Err(PagedCacheError::OutOfMemory { needed, available }) => {
assert!(needed > 0);
assert_eq!(available, 0);
},
_ => panic!("Expected OutOfMemory error"),
}
}
#[test]
fn test_quantized_cache_sequence_not_found() {
let cache = QuantizedPagedKvCache::new(10, 16, 8, 64, KvQuantType::Q8);
let fake = SeqId::new();
let result = cache.get_page(fake, 0);
assert!(matches!(result, Err(PagedCacheError::SequenceNotFound(_))));
}
#[test]
fn test_quantized_cache_invalid_page_access() {
let mut cache = QuantizedPagedKvCache::new(10, 16, 8, 64, KvQuantType::Q8);
let seq = cache.allocate_sequence(16).expect("alloc");
let result = cache.get_page(seq, 100); assert!(matches!(
result,
Err(PagedCacheError::InvalidPageAccess { .. })
));
}
#[test]
fn test_cow_multiple_forks() {
let mut cache = PagedKvCache::new(10, 16, 8, 64);
let parent = cache.allocate_sequence(16).expect("parent");
cache.update_tokens(parent, 8).expect("update");
let child1 = cache.fork_sequence(parent).expect("fork1");
let child2 = cache.fork_sequence(parent).expect("fork2");
let child3 = cache.fork_sequence(child1).expect("fork3");
assert_eq!(cache.stats().active_sequences, 4);
assert_eq!(cache.stats().cow_operations, 3);
assert_ne!(parent, child1);
assert_ne!(parent, child2);
assert_ne!(child1, child3);
let _page = cache.get_page_mut(child2, 0).expect("get mut");
assert_eq!(cache.stats().cow_operations, 4);
}
#[test]
fn test_cow_ref_count_after_free() {
let mut cache = PagedKvCache::new(10, 16, 8, 64);
let parent = cache.allocate_sequence(16).expect("parent");
cache.update_tokens(parent, 16).expect("update");
let child = cache.fork_sequence(parent).expect("fork");
cache.free_sequence(child);
assert_eq!(cache.stats().active_sequences, 1);
let page = cache.get_page(parent, 0).expect("get");
assert_eq!(page.ref_count, 1); }
#[test]
fn test_cow_preserves_data() {
let mut cache = PagedKvCache::new(10, 16, 8, 64);
let parent = cache.allocate_sequence(16).expect("parent");
cache.update_tokens(parent, 16).expect("update");
{
let page = cache.get_page_mut(parent, 0).expect("mut");
page.keys[0] = 123.0;
page.values[0] = 456.0;
}
let child = cache.fork_sequence(parent).expect("fork");
{
let page = cache.get_page_mut(child, 0).expect("mut child");
page.keys[0] = 789.0;
}
let parent_page = cache.get_page(parent, 0).expect("get parent");
assert_eq!(parent_page.keys[0], 123.0);
assert_eq!(parent_page.values[0], 456.0);
let child_page = cache.get_page(child, 0).expect("get child");
assert_eq!(child_page.keys[0], 789.0);
}
#[test]
fn test_fragmentation_with_zero_pages() {
let cache = PagedKvCache::new(0, 16, 8, 64);
let stats = cache.fragmentation_stats();
assert_eq!(stats.holes, 0);
assert_eq!(stats.fragmentation_ratio, 0.0);
}
#[test]
fn test_should_defrag_low_free_ratio() {
let mut cache = PagedKvCache::new(10, 16, 8, 64);
let seq = cache.allocate_sequence(144).expect("alloc 144");
cache.update_tokens(seq, 100).expect("update");
let should = cache.should_defragment();
assert!(!should);
}
#[test]
fn test_compact_empty_page_list() {
let mut cache = PagedKvCache::new(10, 16, 8, 64);
let fake = SeqId::new();
let moved = cache.compact_sequence(fake);
assert_eq!(moved, 0);
}
#[test]
fn test_defrag_increments_stats_only_when_pages_moved() {
let mut cache = PagedKvCache::new(10, 16, 8, 64);
let _seq = cache.allocate_sequence(32).expect("alloc");
let moved = cache.defragment();
assert_eq!(moved, 0);
assert_eq!(cache.stats().defrag_operations, 0);
assert_eq!(cache.stats().pages_moved, 0);
}
}