pub struct HardwareConfig {
pub name: String,
pub compute_flops: f64,
pub memory_bandwidth: f64,
pub memory_capacity: u64,
pub kv_cache_capacity: u64,
pub gpu_memory_utilization: f64,
pub bytes_per_param: u32,
pub compute_bound_threshold: u32,
}Fields§
§name: StringAccelerator name (e.g., “H100”, “A100”)
compute_flops: f64Compute capacity in FLOPS (for specific precision, e.g., bf16)
memory_bandwidth: f64Memory bandwidth in bytes/sec
memory_capacity: u64Total memory capacity in bytes
kv_cache_capacity: u64KV cache capacity in bytes (subset of memory_capacity) If not specified, calculated from gpu_memory_utilization
gpu_memory_utilization: f64Fraction of GPU memory to use (vLLM default: 0.9) Used to calculate kv_cache_capacity if not explicitly set
bytes_per_param: u32Number of bytes per parameter (1 for fp8, 2 for bf16)
compute_bound_threshold: u32Compute-bound threshold (derived from flops/bandwidth ratio) This is calculated: bytes_per_param * compute_flops / memory_bandwidth
Implementations§
Source§impl HardwareConfig
impl HardwareConfig
Sourcepub fn compute_threshold(&mut self)
pub fn compute_threshold(&mut self)
Calculate and set the compute-bound threshold
Sourcepub fn compute_kv_cache_capacity(&mut self, model_size_bytes: u64)
pub fn compute_kv_cache_capacity(&mut self, model_size_bytes: u64)
Calculate KV cache capacity if not explicitly set Formula: (memory_capacity * gpu_memory_utilization) - model_size This matches vLLM’s behavior: requested_memory - non_kv_cache_memory
Sourcepub fn with_threshold(self) -> Self
pub fn with_threshold(self) -> Self
Initialize with threshold pre-computed
Trait Implementations§
Source§impl Clone for HardwareConfig
impl Clone for HardwareConfig
Source§fn clone(&self) -> HardwareConfig
fn clone(&self) -> HardwareConfig
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more