mod checksum;
mod tile_stats;
#[cfg(test)]
mod tests;
mod divergence;
mod exec_graph_ext;
mod recording;
mod reporting;
mod tiling;
pub use checksum::{fnv1a_f32_checksum, DivergenceInfo, KernelChecksum};
pub use tile_stats::{TileLevel, TileStats, TileTimer};
use std::time::Instant;
use super::exec_graph::{
BrickCategory, BrickId, BrickStats, CategoryStats, ExecutionGraph, SyncMode,
};
#[derive(Debug, Clone)]
struct PendingMeasurement {
brick_id: Option<BrickId>,
name: Option<String>,
start_ns: u64,
elements: u64,
}
#[derive(Debug)]
pub struct BrickProfiler {
brick_stats: [BrickStats; BrickId::COUNT],
dynamic_stats: std::collections::HashMap<String, BrickStats>,
pending: Vec<PendingMeasurement>,
sync_mode: SyncMode,
epoch: Instant,
enabled: bool,
total_tokens: u64,
total_ns: u64,
l2_cache_hit_rate: Option<f32>,
is_zero_copy: bool,
kernel_checksums: Vec<KernelChecksum>,
graph_enabled: bool,
execution_graph: ExecutionGraph,
tile_stats: [TileStats; 3],
tile_profiling_enabled: bool,
}
#[derive(Debug)]
pub struct BrickTimer {
name: String,
start: Instant,
}
#[derive(Debug)]
pub struct BrickIdTimer {
brick_id: BrickId,
start: Instant,
}
impl Default for BrickProfiler {
fn default() -> Self {
Self::new()
}
}
impl BrickProfiler {
pub fn new() -> Self {
Self {
brick_stats: std::array::from_fn(|i| BrickStats::new(BrickId::ALL[i].name())),
dynamic_stats: std::collections::HashMap::new(),
pending: Vec::new(),
sync_mode: SyncMode::Deferred,
epoch: Instant::now(),
enabled: false,
total_tokens: 0,
total_ns: 0,
l2_cache_hit_rate: None,
is_zero_copy: false,
kernel_checksums: Vec::new(),
graph_enabled: false,
execution_graph: ExecutionGraph::new(),
tile_stats: [
TileStats::new(TileLevel::Macro),
TileStats::new(TileLevel::Midi),
TileStats::new(TileLevel::Micro),
],
tile_profiling_enabled: false,
}
}
pub fn enabled() -> Self {
let mut profiler = Self::new();
profiler.enabled = true;
profiler
}
pub fn set_sync_mode(&mut self, mode: SyncMode) {
contract_pre_sync_verification!();
self.sync_mode = mode;
}
#[must_use]
pub fn sync_mode(&self) -> SyncMode {
self.sync_mode
}
pub fn reset_epoch(&mut self) {
self.epoch = Instant::now();
}
#[inline]
pub fn elapsed_ns(&self) -> u64 {
self.epoch.elapsed().as_nanos() as u64
}
#[inline]
#[must_use]
pub fn start_brick(&self, brick_id: BrickId) -> BrickIdTimer {
BrickIdTimer { brick_id, start: Instant::now() }
}
#[inline]
pub fn stop_brick(&mut self, timer: BrickIdTimer, elements: u64) {
if !self.enabled {
return;
}
let elapsed = timer.start.elapsed();
let elapsed_ns = elapsed.as_nanos() as u64;
debug_assert!(
(timer.brick_id as usize) < self.brick_stats.len(),
"CB-BUDGET: brick_id {} out of bounds (max {})",
timer.brick_id as usize,
self.brick_stats.len()
);
let stats = &mut self.brick_stats[timer.brick_id as usize];
stats.add_sample(elapsed_ns, elements);
self.total_tokens += elements;
self.total_ns += elapsed_ns;
}
#[inline]
#[must_use]
pub fn brick_stats(&self, brick_id: BrickId) -> &BrickStats {
contract_pre_brick_ordering!();
&self.brick_stats[brick_id as usize]
}
#[inline]
pub fn brick_stats_mut(&mut self, brick_id: BrickId) -> &mut BrickStats {
&mut self.brick_stats[brick_id as usize]
}
#[inline]
pub fn record_deferred(&mut self, brick_id: BrickId, start_ns: u64, elements: u64) {
if !self.enabled {
return;
}
self.pending.push(PendingMeasurement {
brick_id: Some(brick_id),
name: None,
start_ns,
elements,
});
}
#[inline]
pub fn record_deferred_dynamic(&mut self, name: &str, start_ns: u64, elements: u64) {
if !self.enabled {
return;
}
self.pending.push(PendingMeasurement {
brick_id: BrickId::from_str(name),
name: Some(name.to_string()),
start_ns,
elements,
});
}
pub fn finalize(&mut self, end_ns: u64) {
if self.pending.is_empty() {
return;
}
for m in self.pending.drain(..) {
let elapsed_ns = end_ns.saturating_sub(m.start_ns);
if let Some(brick_id) = m.brick_id {
let stats = &mut self.brick_stats[brick_id as usize];
stats.add_sample(elapsed_ns, m.elements);
} else if let Some(name) = m.name {
let stats = self
.dynamic_stats
.entry(name.clone())
.or_insert_with(|| BrickStats::new(&name));
stats.add_sample(elapsed_ns, m.elements);
}
self.total_tokens += m.elements;
self.total_ns += elapsed_ns;
}
}
#[inline]
#[must_use]
pub fn has_pending(&self) -> bool {
!self.pending.is_empty()
}
#[inline]
#[must_use]
pub fn pending_count(&self) -> usize {
self.pending.len()
}
#[must_use]
pub fn category_stats(&self) -> [CategoryStats; BrickCategory::COUNT] {
let mut result = [CategoryStats::default(); BrickCategory::COUNT];
for (i, stats) in self.brick_stats.iter().enumerate() {
let brick_id = BrickId::ALL[i];
let cat = brick_id.category() as usize;
result[cat].total_ns += stats.total_ns;
result[cat].total_elements += stats.total_elements;
result[cat].count += stats.count;
}
for stats in self.dynamic_stats.values() {
let cat = BrickCategory::Other as usize;
result[cat].total_ns += stats.total_ns;
result[cat].total_elements += stats.total_elements;
result[cat].count += stats.count;
}
result
}
pub fn set_l2_cache_hit_rate(&mut self, rate: f32) {
self.l2_cache_hit_rate = Some(rate.clamp(0.0, 1.0));
}
pub fn l2_cache_hit_rate(&self) -> Option<f32> {
self.l2_cache_hit_rate
}
pub fn set_zero_copy(&mut self, enabled: bool) {
self.is_zero_copy = enabled;
}
pub fn is_zero_copy(&self) -> bool {
self.is_zero_copy
}
pub fn enable(&mut self) {
self.enabled = true;
}
pub fn disable(&mut self) {
self.enabled = false;
}
#[must_use]
pub fn is_enabled(&self) -> bool {
self.enabled
}
#[must_use]
pub fn total_throughput(&self) -> f64 {
if self.total_ns == 0 {
0.0
} else {
self.total_tokens as f64 / (self.total_ns as f64 / 1_000_000_000.0)
}
}
#[must_use]
pub fn total_tokens(&self) -> u64 {
contract_pre_token_accounting!();
self.total_tokens
}
#[must_use]
pub fn total_ns(&self) -> u64 {
contract_pre_wall_coverage!();
self.total_ns
}
}