pub struct CostModel {
pub prefill_us_per_token: u64,
pub decode_us_per_token: u64,
pub queue_us_per_prefill_token: u64,
pub running_decode_penalty_us: u64,
pub hbm_hit_us: u64,
pub remote_hbm_us_per_mb: u64,
pub cpu_dram_us_per_mb: u64,
pub local_ssd_us_per_mb: u64,
pub object_store_us_per_mb: u64,
pub cross_domain_penalty_us: u64,
}Fields§
§prefill_us_per_token: u64§decode_us_per_token: u64§queue_us_per_prefill_token: u64§running_decode_penalty_us: u64§hbm_hit_us: u64§remote_hbm_us_per_mb: u64§cpu_dram_us_per_mb: u64§local_ssd_us_per_mb: u64§object_store_us_per_mb: u64§cross_domain_penalty_us: u64Implementations§
Source§impl CostModel
impl CostModel
pub fn prefill_cost_us(&self, tokens: u32) -> u64
pub fn decode_cost_us(&self, tokens: u32, running_decodes: u32) -> u64
pub fn queue_cost_us(&self, worker: &WorkerState) -> u64
pub fn transfer_cost_us( &self, tier: CacheTier, bytes: u64, same_worker: bool, same_locality_domain: bool, ) -> u64
Trait Implementations§
impl Copy for CostModel
Source§impl<'de> Deserialize<'de> for CostModel
impl<'de> Deserialize<'de> for CostModel
Source§fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
fn deserialize<__D>(__deserializer: __D) -> Result<Self, __D::Error>where
__D: Deserializer<'de>,
Deserialize this value from the given Serde deserializer. Read more
impl Eq for CostModel
impl StructuralPartialEq for CostModel
Auto Trait Implementations§
impl Freeze for CostModel
impl RefUnwindSafe for CostModel
impl Send for CostModel
impl Sync for CostModel
impl Unpin for CostModel
impl UnsafeUnpin for CostModel
impl UnwindSafe for CostModel
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more