kapsl-llm 0.1.0

Large language model inference with GGUF and ONNX backend support for Kapsl
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
#[derive(Default, Clone)]
pub struct LLMMetrics {
    pub total_inference_time: f64,
    pub kv_cache_bytes_used: usize,
    pub kv_cache_bytes_capacity: usize,
    pub kv_cache_blocks_total: usize,
    pub kv_cache_blocks_free: usize,
    pub kv_cache_sequences: usize,
    pub kv_cache_evicted_blocks: u64,
    pub kv_cache_evicted_sequences: u64,
    pub kv_cache_packed_layers: usize,
    /// Blocks currently sitting in the CPU offload store (paged mode only).
    pub kv_cache_cpu_offloaded_blocks: u64,
}