Skip to main content

ferrum_kv/
lib.rs

1//! # Ferrum KV Cache
2//!
3//! MVP KV-Cache management implementation for Ferrum inference stack.
4//!
5//! This crate provides block-based KV cache management, implementing the
6//! interfaces defined in `ferrum-interfaces::kv_cache`.
7
8pub mod attention;
9pub mod blocks;
10pub mod cache;
11pub mod managers;
12
13// Re-export interface types
14pub use ferrum_interfaces::{
15    kv_cache::{
16        AllocationRequest, BlockTable, CacheConfig, CacheEvictionPolicy, CacheHandleStats,
17        CacheManagerStats, LruEvictionPolicy, PrefixCacheConfig,
18    },
19    KvCacheHandle as KvCacheHandleInterface, KvCacheManager as KvCacheManagerInterface,
20};
21
22pub use ferrum_types::{CacheStats, DataType, Device, FerrumError, RequestId, Result};
23// Note: ferrum-types::KvCacheConfig exists but has different fields for engine-level config
24// This crate uses a simplified internal config
25
26// Re-export implementations
27pub use blocks::*;
28pub use cache::*;
29pub use managers::*;
30
31/// Default KV cache manager factory
32pub fn default_manager(
33    device: Device,
34    block_size: usize,
35    max_blocks: usize,
36) -> Result<Box<dyn KvCacheManagerInterface + Send + Sync>> {
37    let manager = DefaultKvCacheManager::new(device, block_size, max_blocks)?;
38    Ok(Box::new(manager))
39}
40
41/// Internal KV Cache manager configuration
42///
43/// Note: This is distinct from ferrum_types::KvCacheConfig which is the engine-level
44/// configuration. This type is used internally by the KV cache manager implementation.
45#[derive(Debug, Clone)]
46pub struct KvManagerConfig {
47    pub block_size: usize,
48    pub max_blocks_gpu: usize,
49    pub max_blocks_cpu: usize,
50    pub enable_prefix_cache: bool,
51    pub enable_metrics: bool,
52}