trueno/brick/profiler/
tile_stats.rs1use std::time::Instant;
4
5#[derive(Debug, Clone, Default)]
21pub struct TileStats {
22 pub level: TileLevel,
24 pub count: u64,
26 pub total_ns: u64,
28 pub min_ns: u64,
30 pub max_ns: u64,
32 pub total_elements: u64,
34 pub cache_misses: u64,
36 pub total_flops: u64,
38}
39
40#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
42pub enum TileLevel {
43 #[default]
45 Macro,
46 Midi,
48 Micro,
50}
51
52impl TileLevel {
53 #[must_use]
55 pub const fn name(&self) -> &'static str {
56 match self {
57 TileLevel::Macro => "macro",
58 TileLevel::Midi => "midi",
59 TileLevel::Micro => "micro",
60 }
61 }
62}
63
64impl TileStats {
65 pub fn new(level: TileLevel) -> Self {
67 Self {
68 level,
69 count: 0,
70 total_ns: 0,
71 min_ns: u64::MAX,
72 max_ns: 0,
73 total_elements: 0,
74 cache_misses: 0,
75 total_flops: 0,
76 }
77 }
78
79 pub fn add_sample(&mut self, elapsed_ns: u64, elements: u64, flops: u64) {
81 debug_assert!(elements > 0, "CB-BUDGET: tile sample elements must be > 0");
82 self.count += 1;
83 self.total_ns += elapsed_ns;
84 self.min_ns = self.min_ns.min(elapsed_ns);
85 self.max_ns = self.max_ns.max(elapsed_ns);
86 self.total_elements += elements;
87 self.total_flops += flops;
88 }
89
90 #[must_use]
92 pub fn avg_us(&self) -> f64 {
93 if self.count == 0 {
94 0.0
95 } else {
96 self.total_ns as f64 / self.count as f64 / 1000.0
97 }
98 }
99
100 #[must_use]
102 pub fn throughput(&self) -> f64 {
103 if self.total_ns == 0 {
104 0.0
105 } else {
106 self.total_elements as f64 / (self.total_ns as f64 / 1_000_000_000.0)
107 }
108 }
109
110 #[must_use]
112 pub fn gflops(&self) -> f64 {
113 if self.total_ns == 0 {
114 0.0
115 } else {
116 self.total_flops as f64 / (self.total_ns as f64 / 1_000_000_000.0) / 1e9
117 }
118 }
119
120 #[must_use]
124 pub fn arithmetic_intensity(&self) -> f64 {
125 if self.total_elements == 0 {
126 0.0
127 } else {
128 self.total_flops as f64 / (self.total_elements as f64 * 4.0)
129 }
130 }
131
132 #[must_use]
136 pub fn cache_efficiency(&self, peak_gflops: f64) -> f64 {
137 if peak_gflops <= 0.0 {
138 0.0
139 } else {
140 (self.gflops() / peak_gflops).min(1.0)
141 }
142 }
143}
144
145#[derive(Debug)]
147pub struct TileTimer {
148 pub(crate) level: TileLevel,
150 pub(crate) _row: u32,
152 pub(crate) _col: u32,
154 pub(crate) start: Instant,
156}