use std::time::Instant;
#[derive(Debug, Clone, Default)]
pub struct TileStats {
pub level: TileLevel,
pub count: u64,
pub total_ns: u64,
pub min_ns: u64,
pub max_ns: u64,
pub total_elements: u64,
pub cache_misses: u64,
pub total_flops: u64,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
pub enum TileLevel {
#[default]
Macro,
Midi,
Micro,
}
impl TileLevel {
#[must_use]
pub const fn name(&self) -> &'static str {
match self {
TileLevel::Macro => "macro",
TileLevel::Midi => "midi",
TileLevel::Micro => "micro",
}
}
}
impl TileStats {
pub fn new(level: TileLevel) -> Self {
Self {
level,
count: 0,
total_ns: 0,
min_ns: u64::MAX,
max_ns: 0,
total_elements: 0,
cache_misses: 0,
total_flops: 0,
}
}
pub fn add_sample(&mut self, elapsed_ns: u64, elements: u64, flops: u64) {
debug_assert!(elements > 0, "CB-BUDGET: tile sample elements must be > 0");
self.count += 1;
self.total_ns += elapsed_ns;
self.min_ns = self.min_ns.min(elapsed_ns);
self.max_ns = self.max_ns.max(elapsed_ns);
self.total_elements += elements;
self.total_flops += flops;
}
#[must_use]
pub fn avg_us(&self) -> f64 {
if self.count == 0 {
0.0
} else {
self.total_ns as f64 / self.count as f64 / 1000.0
}
}
#[must_use]
pub fn throughput(&self) -> f64 {
if self.total_ns == 0 {
0.0
} else {
self.total_elements as f64 / (self.total_ns as f64 / 1_000_000_000.0)
}
}
#[must_use]
pub fn gflops(&self) -> f64 {
if self.total_ns == 0 {
0.0
} else {
self.total_flops as f64 / (self.total_ns as f64 / 1_000_000_000.0) / 1e9
}
}
#[must_use]
pub fn arithmetic_intensity(&self) -> f64 {
if self.total_elements == 0 {
0.0
} else {
self.total_flops as f64 / (self.total_elements as f64 * 4.0)
}
}
#[must_use]
pub fn cache_efficiency(&self, peak_gflops: f64) -> f64 {
if peak_gflops <= 0.0 {
0.0
} else {
(self.gflops() / peak_gflops).min(1.0)
}
}
}
#[derive(Debug)]
pub struct TileTimer {
pub(crate) level: TileLevel,
pub(crate) _row: u32,
pub(crate) _col: u32,
pub(crate) start: Instant,
}