pub mod attention;
pub mod blocks;
pub mod cache;
pub mod managers;
pub use ferrum_interfaces::{
kv_cache::{
AllocationRequest, BlockTable, CacheConfig, CacheEvictionPolicy, CacheHandleStats,
CacheManagerStats, LruEvictionPolicy, PrefixCacheConfig,
},
KvCacheHandle as KvCacheHandleInterface, KvCacheManager as KvCacheManagerInterface,
};
pub use ferrum_types::{CacheStats, DataType, Device, FerrumError, RequestId, Result};
pub use blocks::*;
pub use cache::*;
pub use managers::*;
pub fn default_manager(
device: Device,
block_size: usize,
max_blocks: usize,
) -> Result<Box<dyn KvCacheManagerInterface + Send + Sync>> {
let manager = DefaultKvCacheManager::new(device, block_size, max_blocks)?;
Ok(Box::new(manager))
}
#[derive(Debug, Clone)]
pub struct KvManagerConfig {
pub block_size: usize,
pub max_blocks_gpu: usize,
pub max_blocks_cpu: usize,
pub enable_prefix_cache: bool,
pub enable_metrics: bool,
}