use std::time::{Duration, Instant};
use crate::common::protocols::MockEngineArgs;
pub fn compute_kv_transfer_delay(
args: &MockEngineArgs,
num_input_tokens: usize,
) -> Option<Duration> {
match (args.kv_transfer_bandwidth, args.kv_bytes_per_token) {
(Some(bw), Some(bpt)) if bw > 0.0 => {
let kv_bytes = num_input_tokens as f64 * bpt as f64;
let delay = Duration::from_secs_f64(kv_bytes / (bw * 1e9));
tracing::debug!(
num_input_tokens,
kv_bytes,
bandwidth_gb_s = bw,
delay_ms = format!("{:.2}", delay.as_secs_f64() * 1000.0),
"KV transfer delay for prefill"
);
Some(delay)
}
_ => None,
}
}
pub async fn sleep_precise(duration: Duration) {
sleep_until_precise(Instant::now() + duration).await;
}
pub async fn sleep_until_precise(deadline: Instant) {
#[cfg(target_os = "linux")]
{
if let Ok(delay) = tokio_timerfd::Delay::new(deadline) {
let _ = delay.await;
} else {
tokio::time::sleep_until(tokio::time::Instant::from_std(deadline)).await;
}
}
#[cfg(not(target_os = "linux"))]
{
tokio::time::sleep_until(tokio::time::Instant::from_std(deadline)).await;
}
}