pub struct ComputeEngine { /* private fields */ }Implementations§
Source§impl ComputeEngine
impl ComputeEngine
pub fn new(hardware: HardwareConfig, model: ModelConfig) -> Self
Sourcepub fn calculate_iteration_time(
&self,
batch_requests: &[&Request],
tokens_per_request: &[u32],
) -> f64
pub fn calculate_iteration_time( &self, batch_requests: &[&Request], tokens_per_request: &[u32], ) -> f64
Calculate time to process an iteration (in seconds) Takes batch of requests and number of tokens to process for each Returns max(compute_time, memory_time) since they happen in parallel
Sourcepub fn calculate_flops_utilization(
&self,
batch_requests: &[&Request],
tokens_per_request: &[u32],
actual_time: f64,
) -> f64
pub fn calculate_flops_utilization( &self, batch_requests: &[&Request], tokens_per_request: &[u32], actual_time: f64, ) -> f64
Calculate FLOPS utilization for this iteration (0.0 to 1.0)
Sourcepub fn calculate_bandwidth_utilization(
&self,
bytes_transferred: f64,
actual_time: f64,
) -> f64
pub fn calculate_bandwidth_utilization( &self, bytes_transferred: f64, actual_time: f64, ) -> f64
Calculate memory bandwidth utilization for this iteration (0.0 to 1.0)
Sourcepub fn calculate_bytes_transferred(
&self,
batch_requests: &[&Request],
tokens_per_request: &[u32],
) -> f64
pub fn calculate_bytes_transferred( &self, batch_requests: &[&Request], tokens_per_request: &[u32], ) -> f64
Calculate total bytes transferred for a batch of requests
Auto Trait Implementations§
impl Freeze for ComputeEngine
impl RefUnwindSafe for ComputeEngine
impl Send for ComputeEngine
impl Sync for ComputeEngine
impl Unpin for ComputeEngine
impl UnsafeUnpin for ComputeEngine
impl UnwindSafe for ComputeEngine
Blanket Implementations§
Source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
Source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
Mutably borrows from an owned value. Read more