#[test]
fn test_quantized_kv_data_q8() {
let data = QuantizedKvData::new(KvQuantType::Q8, 16, 8, 64);
assert_eq!(data.quant_type(), KvQuantType::Q8);
let fp32_data = QuantizedKvData::new(KvQuantType::FP32, 16, 8, 64);
assert!(data.memory_bytes() < fp32_data.memory_bytes());
}
#[test]
fn test_quantized_kv_data_q4() {
let data = QuantizedKvData::new(KvQuantType::Q4, 16, 8, 64);
assert_eq!(data.quant_type(), KvQuantType::Q4);
let q8_data = QuantizedKvData::new(KvQuantType::Q8, 16, 8, 64);
assert!(data.memory_bytes() < q8_data.memory_bytes());
}
#[test]
fn test_quantized_kv_data_write_read_fp32() {
let mut data = QuantizedKvData::new(KvQuantType::FP32, 16, 8, 64);
let test_keys: Vec<f32> = (0..64).map(|i| i as f32 * 0.1).collect();
data.write_keys(0, &test_keys);
let read_keys = data.read_keys(0, 64);
assert_eq!(read_keys, test_keys);
}
#[test]
fn test_quantized_kv_data_write_read_q8() {
let mut data = QuantizedKvData::new(KvQuantType::Q8, 16, 8, 64);
let test_values: Vec<f32> = (0..64).map(|i| (i as f32 - 32.0) * 0.1).collect();
data.write_values(0, &test_values);
let read_values = data.read_values(0, 64);
for (orig, read) in test_values.iter().zip(read_values.iter()) {
assert!((orig - read).abs() < 0.05);
}
}
#[test]
fn test_quantized_kv_page_new() {
let page = QuantizedKvPage::new(PageId::new(0), KvQuantType::Q8, 16, 8, 64);
assert_eq!(page.quant_type(), KvQuantType::Q8);
assert_eq!(page.num_tokens, 0);
assert_eq!(page.ref_count, 0); assert!(!page.is_full());
assert!(!page.is_shared());
}
#[test]
fn test_quantized_kv_page_read_write() {
let mut page = QuantizedKvPage::new(PageId::new(0), KvQuantType::FP32, 16, 8, 64);
let keys: Vec<f32> = (0..512).map(|i| i as f32 * 0.01).collect();
let values: Vec<f32> = (0..512).map(|i| -i as f32 * 0.01).collect();
page.write_keys(0, &keys);
page.write_values(0, &values);
let read_keys = page.read_keys(0);
let read_values = page.read_values(0);
assert_eq!(read_keys.len(), 512);
assert_eq!(read_values.len(), 512);
assert_eq!(read_keys, keys);
assert_eq!(read_values, values);
}
#[test]
fn test_quantized_kv_page_is_full() {
let mut page = QuantizedKvPage::new(PageId::new(0), KvQuantType::Q8, 16, 8, 64);
assert!(!page.is_full());
assert_eq!(page.remaining_capacity(), 16);
page.num_tokens = 16;
assert!(page.is_full());
assert_eq!(page.remaining_capacity(), 0);
}
#[test]
fn test_quantized_paged_kv_cache_new() {
let cache = QuantizedPagedKvCache::new(100, 16, 8, 64, KvQuantType::Q8);
assert_eq!(cache.quant_type(), KvQuantType::Q8);
assert_eq!(cache.free_page_count(), 100);
assert_eq!(cache.stats().active_sequences, 0);
}
#[test]
fn test_quantized_paged_kv_cache_allocate() {
let mut cache = QuantizedPagedKvCache::new(100, 16, 8, 64, KvQuantType::Q8);
let seq_id = cache.allocate_sequence(32).expect("test");
assert_eq!(cache.free_page_count(), 98); assert_eq!(cache.stats().active_sequences, 1);
assert!(seq_id.value() < u64::MAX);
}
#[test]
fn test_quantized_paged_kv_cache_free() {
let mut cache = QuantizedPagedKvCache::new(100, 16, 8, 64, KvQuantType::Q4);
let seq_id = cache.allocate_sequence(16).expect("test");
assert_eq!(cache.free_page_count(), 99);
cache.free_sequence(seq_id);
assert_eq!(cache.free_page_count(), 100);
assert_eq!(cache.stats().active_sequences, 0);
}
#[test]
fn test_quantized_paged_kv_cache_memory_savings() {
let mut cache = QuantizedPagedKvCache::new(100, 16, 8, 64, KvQuantType::Q8);
let _seq_id = cache.allocate_sequence(16).expect("test");
let savings = cache.memory_savings();
assert!(
savings < 0.6,
"Q8 should use less than 60% of FP32 memory, got {}",
savings
);
}
#[test]
fn test_quantized_paged_kv_cache_q4_savings() {
let mut cache = QuantizedPagedKvCache::new(100, 16, 8, 64, KvQuantType::Q4);
let _seq_id = cache.allocate_sequence(16).expect("test");
let savings = cache.memory_savings();
assert!(
savings < 0.4,
"Q4 should use less than 40% of FP32 memory, got {}",
savings
);
}
#[test]
fn test_quantized_paged_kv_cache_get_page() {
let mut cache = QuantizedPagedKvCache::new(100, 16, 8, 64, KvQuantType::Q8);
let seq_id = cache.allocate_sequence(32).expect("test");
let page = cache.get_page(seq_id, 0).expect("test");
assert_eq!(page.quant_type(), KvQuantType::Q8);
let page2 = cache.get_page(seq_id, 16).expect("test");
assert_eq!(page2.quant_type(), KvQuantType::Q8);
}
#[test]
fn test_quantized_paged_kv_cache_get_page_mut() {
let mut cache = QuantizedPagedKvCache::new(100, 16, 8, 64, KvQuantType::Q8);
let seq_id = cache.allocate_sequence(16).expect("test");
let page = cache.get_page_mut(seq_id, 0).expect("test");
page.num_tokens = 8;
let page2 = cache.get_page(seq_id, 0).expect("test");
assert_eq!(page2.num_tokens, 8);
}
#[test]
fn test_quantized_paged_kv_cache_oom() {
let mut cache = QuantizedPagedKvCache::new(1, 16, 8, 64, KvQuantType::Q8);
let _seq1 = cache.allocate_sequence(16).expect("test");
let result = cache.allocate_sequence(16);
assert!(matches!(result, Err(PagedCacheError::OutOfMemory { .. })));
}
#[test]
fn test_q8_block_default() {
let block = Q8KvBlock::default();
assert_eq!(block.scale, 0.0);
}
#[test]
fn test_q4_block_default() {
let block = Q4KvBlock::default();
assert_eq!(block.scale, 0.0);
}
#[test]
fn test_deep_pkcov_extend_sequence_not_found() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let fake_seq = SeqId::new();
let result = cache.extend(fake_seq, 32);
assert!(matches!(result, Err(PagedCacheError::SequenceNotFound(_))));
}
#[test]
fn test_deep_pkcov_extend_sequence_out_of_memory() {
let mut cache = PagedKvCache::new(2, 16, 8, 64);
let seq_id = cache.allocate_sequence(16).expect("alloc");
let result = cache.extend(seq_id, 48);
assert!(matches!(result, Err(PagedCacheError::OutOfMemory { .. })));
}
#[test]
fn test_deep_pkcov_extend_no_new_pages_needed() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let seq_id = cache.allocate_sequence(16).expect("alloc"); cache.update_tokens(seq_id, 5).expect("update");
let result = cache.extend(seq_id, 5);
assert!(result.is_ok());
assert_eq!(cache.free_page_count(), 99); }
#[test]
fn test_deep_pkcov_update_tokens_not_found() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let fake_seq = SeqId::new();
let result = cache.update_tokens(fake_seq, 10);
assert!(matches!(result, Err(PagedCacheError::SequenceNotFound(_))));
}
#[test]
fn test_deep_pkcov_fork_sequence_not_found() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let fake_seq = SeqId::new();
let result = cache.fork_sequence(fake_seq);
assert!(matches!(result, Err(PagedCacheError::SequenceNotFound(_))));
}
#[test]
fn test_deep_pkcov_get_page_sequence_not_found() {
let cache = PagedKvCache::new(100, 16, 8, 64);
let fake_seq = SeqId::new();
let result = cache.get_page(fake_seq, 0);
assert!(matches!(result, Err(PagedCacheError::SequenceNotFound(_))));
}
#[test]
fn test_deep_pkcov_get_page_mut_sequence_not_found() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let fake_seq = SeqId::new();
let result = cache.get_page_mut(fake_seq, 0);
assert!(matches!(result, Err(PagedCacheError::SequenceNotFound(_))));
}
#[test]
fn test_deep_pkcov_get_page_mut_invalid_page_access() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let seq_id = cache.allocate_sequence(16).expect("alloc");
let result = cache.get_page_mut(seq_id, 100); assert!(matches!(
result,
Err(PagedCacheError::InvalidPageAccess { .. })
));
}
#[test]
fn test_deep_pkcov_utilization_zero_pages() {
let cache = PagedKvCache::new(0, 16, 8, 64);
assert_eq!(cache.utilization(), 0.0);
assert_eq!(cache.free_page_count(), 0);
}
#[test]
fn test_deep_pkcov_free_nonexistent_sequence() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let fake_seq = SeqId::new();
cache.free_sequence(fake_seq);
assert_eq!(cache.stats().sequences_freed, 0);
}
#[test]
fn test_deep_pkcov_allocate_zero_tokens() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let seq_id = cache.allocate_sequence(0).expect("alloc");
assert!(cache.page_tables.contains_key(&seq_id));
}
#[test]
fn test_deep_pkcov_update_tokens_spans_multiple_pages() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let seq_id = cache.allocate_sequence(48).expect("alloc");
cache.update_tokens(seq_id, 48).expect("update");
let tokens = cache.get_sequence_tokens(seq_id).expect("get");
assert_eq!(tokens, 48);
}
#[test]
fn test_deep_pkcov_update_tokens_partial_fill() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let seq_id = cache.allocate_sequence(32).expect("alloc");
cache.update_tokens(seq_id, 5).expect("update");
let tokens = cache.get_sequence_tokens(seq_id).expect("get");
assert_eq!(tokens, 5);
}
#[test]
fn test_deep_pkcov_cow_out_of_memory() {
let mut cache = PagedKvCache::new(2, 16, 8, 64);
let parent_id = cache.allocate_sequence(16).expect("alloc"); let child_id = cache.fork_sequence(parent_id).expect("fork");
let _ = cache.allocate_sequence(16).expect("alloc2");
let result = cache.get_page_mut(child_id, 0);
assert!(matches!(result, Err(PagedCacheError::OutOfMemory { .. })));
}
#[test]
fn test_deep_pkcov_cow_multiple_forks() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let parent_id = cache.allocate_sequence(16).expect("alloc");
cache.update_tokens(parent_id, 16).expect("update");
let child1 = cache.fork_sequence(parent_id).expect("fork1");
let child2 = cache.fork_sequence(parent_id).expect("fork2");
assert_eq!(cache.stats().cow_operations, 2);
let _page = cache.get_page_mut(child1, 0).expect("get");
assert_eq!(cache.stats().cow_operations, 3);
let page = cache.get_page(child2, 0).expect("get2");
assert!(page.ref_count >= 1);
}
#[test]
fn test_deep_pkcov_free_shared_sequence() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let parent_id = cache.allocate_sequence(16).expect("alloc");
let child_id = cache.fork_sequence(parent_id).expect("fork");
cache.free_sequence(parent_id);
assert_eq!(cache.free_page_count(), 99);
cache.free_sequence(child_id);
assert_eq!(cache.free_page_count(), 100);
}
#[test]
fn test_deep_pkcov_should_defragment_low_free_ratio() {
let mut cache = PagedKvCache::new(10, 16, 8, 64);
for _ in 0..9 {
let _ = cache.allocate_sequence(16).expect("alloc");
}
let seq_to_free = cache.allocate_sequence(16).ok();
if let Some(seq) = seq_to_free {
cache.free_sequence(seq);
}
}
#[test]
fn test_deep_pkcov_should_defragment_high_waste() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let seq1 = cache.allocate_sequence(32).expect("alloc1"); let seq2 = cache.allocate_sequence(32).expect("alloc2"); let seq3 = cache.allocate_sequence(32).expect("alloc3");
cache.free_sequence(seq2);
cache.update_tokens(seq1, 1).expect("update1");
cache.update_tokens(seq3, 1).expect("update3");
let stats = cache.fragmentation_stats();
assert!(stats.holes > 0 || stats.wasted_capacity > 0);
}
#[test]
fn test_deep_pkcov_compact_empty_page_list() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let seq_id = cache.allocate_sequence(0).expect("alloc");
let moved = cache.compact_sequence(seq_id);
assert_eq!(moved, 0);
}
#[test]
fn test_deep_pkcov_fragmentation_stats_large_free_region() {
let mut cache = PagedKvCache::new(100, 16, 8, 64);
let _ = cache.allocate_sequence(16).expect("alloc");
let stats = cache.fragmentation_stats();
assert!(stats.largest_free_region >= 99);
}