use std::env;
use std::sync::LazyLock;
use std::time::{SystemTime, UNIX_EPOCH};
const DYN_MOCKER_KV_CACHE_TRACE: &str = "DYN_MOCKER_KV_CACHE_TRACE";
pub static KV_CACHE_TRACE_ENABLED: LazyLock<bool> = LazyLock::new(|| {
env::var(DYN_MOCKER_KV_CACHE_TRACE)
.map(|v| v == "1" || v.eq_ignore_ascii_case("true"))
.unwrap_or(false)
});
fn timestamp_ms() -> u64 {
SystemTime::now()
.duration_since(UNIX_EPOCH)
.unwrap_or_default()
.as_millis() as u64
}
pub fn log_vllm_trace(
event: &str,
dp_rank: u32,
block_size: usize,
active_blocks: usize,
inactive_blocks: usize,
total_blocks: usize,
) {
if !*KV_CACHE_TRACE_ENABLED {
return;
}
let free_blocks = total_blocks
.saturating_sub(active_blocks)
.saturating_sub(inactive_blocks);
let utilization = if total_blocks > 0 {
(active_blocks + inactive_blocks) as f64 / total_blocks as f64
} else {
0.0
};
tracing::info!(
engine_type = "vllm",
event,
timestamp_ms = timestamp_ms(),
dp_rank,
block_size,
free_blocks,
active_blocks,
inactive_blocks,
total_blocks,
utilization,
"KV cache trace"
);
}
pub struct SglangCacheState<'a> {
pub event: &'a str,
pub dp_rank: u32,
pub num_tokens: usize,
pub page_size: usize,
pub available_tokens: usize,
pub evictable_tokens: usize,
pub protected_tokens: usize,
pub total_tokens: usize,
}
pub fn log_sglang_trace(state: &SglangCacheState) {
if !*KV_CACHE_TRACE_ENABLED {
return;
}
let utilization = if state.total_tokens > 0 {
(state.total_tokens - state.available_tokens) as f64 / state.total_tokens as f64
} else {
0.0
};
tracing::info!(
engine_type = "sglang",
event = state.event,
timestamp_ms = timestamp_ms(),
dp_rank = state.dp_rank,
num_tokens = state.num_tokens,
page_size = state.page_size,
available_tokens = state.available_tokens,
evictable_tokens = state.evictable_tokens,
protected_tokens = state.protected_tokens,
total_tokens = state.total_tokens,
utilization,
"KV cache trace"
);
}