cbtop/tracing_escalation/
types.rs1use std::collections::HashMap;
4use std::time::{Duration, Instant};
5
6#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
8pub enum EscalationReason {
9 CvExceeded,
11 EfficiencyLow,
13 Both,
15 MemoryCliff,
17 GpuTransferOverhead,
19 Manual,
21}
22
23impl EscalationReason {
24 pub fn description(&self) -> &'static str {
26 match self {
27 EscalationReason::CvExceeded => "CV exceeded threshold (unstable performance)",
28 EscalationReason::EfficiencyLow => "Efficiency below threshold",
29 EscalationReason::Both => "Both CV exceeded and efficiency low",
30 EscalationReason::MemoryCliff => "Memory cliff detected (sudden drop)",
31 EscalationReason::GpuTransferOverhead => "GPU transfer overhead exceeded threshold",
32 EscalationReason::Manual => "Manual escalation requested",
33 }
34 }
35
36 pub fn otlp_value(&self) -> &'static str {
38 match self {
39 EscalationReason::CvExceeded => "cv_exceeded",
40 EscalationReason::EfficiencyLow => "efficiency_low",
41 EscalationReason::Both => "both",
42 EscalationReason::MemoryCliff => "memory_cliff",
43 EscalationReason::GpuTransferOverhead => "gpu_transfer_overhead",
44 EscalationReason::Manual => "manual",
45 }
46 }
47}
48
49#[derive(Debug, Clone)]
51pub struct EscalationThresholds {
52 pub cv_threshold: f64,
54 pub efficiency_threshold: f64,
56 pub gpu_transfer_threshold: f64,
58 pub memory_cliff_threshold: f64,
60 pub rate_limit: u32,
62 pub rate_interval: Duration,
64}
65
66impl Default for EscalationThresholds {
67 fn default() -> Self {
68 Self {
69 cv_threshold: 15.0,
70 efficiency_threshold: 25.0,
71 gpu_transfer_threshold: 50.0,
72 memory_cliff_threshold: 30.0,
73 rate_limit: 100,
74 rate_interval: Duration::from_secs(60),
75 }
76 }
77}
78
79impl EscalationThresholds {
80 pub fn new() -> Self {
82 Self::default()
83 }
84
85 pub fn with_cv(mut self, threshold: f64) -> Self {
87 self.cv_threshold = threshold;
88 self
89 }
90
91 pub fn with_efficiency(mut self, threshold: f64) -> Self {
93 self.efficiency_threshold = threshold;
94 self
95 }
96
97 pub fn with_gpu_transfer(mut self, threshold: f64) -> Self {
99 self.gpu_transfer_threshold = threshold;
100 self
101 }
102
103 pub fn with_rate_limit(mut self, limit: u32) -> Self {
105 self.rate_limit = limit;
106 self
107 }
108
109 pub fn with_rate_interval(mut self, interval: Duration) -> Self {
111 self.rate_interval = interval;
112 self
113 }
114}
115
116#[derive(Debug, Clone, Default)]
118pub struct SyscallBreakdown {
119 pub mmap_us: u64,
121 pub futex_us: u64,
123 pub ioctl_us: u64,
125 pub read_us: u64,
127 pub write_us: u64,
129 pub other_us: u64,
131 pub total_us: u64,
133}
134
135impl SyscallBreakdown {
136 pub fn new() -> Self {
138 Self::default()
139 }
140
141 pub fn compute_us(&self) -> u64 {
143 let syscall_total = self.mmap_us
144 + self.futex_us
145 + self.ioctl_us
146 + self.read_us
147 + self.write_us
148 + self.other_us;
149 self.total_us.saturating_sub(syscall_total)
150 }
151
152 pub fn syscall_overhead_percent(&self) -> f64 {
154 if self.total_us == 0 {
155 return 0.0;
156 }
157 let syscall_total = self.mmap_us
158 + self.futex_us
159 + self.ioctl_us
160 + self.read_us
161 + self.write_us
162 + self.other_us;
163 (syscall_total as f64 / self.total_us as f64) * 100.0
164 }
165
166 pub fn dominant_syscall(&self) -> &'static str {
168 let categories = [
169 (self.mmap_us, "mmap"),
170 (self.futex_us, "futex"),
171 (self.ioctl_us, "ioctl"),
172 (self.read_us, "read"),
173 (self.write_us, "write"),
174 (self.other_us, "other"),
175 ];
176
177 if categories.iter().all(|(time, _)| *time == 0) {
179 return "none";
180 }
181
182 categories
183 .iter()
184 .max_by_key(|(time, _)| time)
185 .map(|(_, name)| *name)
186 .unwrap_or("none")
187 }
188
189 pub fn add_syscall(&mut self, syscall: &str, duration_us: u64) {
193 match syscall {
194 "mmap" | "munmap" | "mprotect" | "brk" => {
195 self.mmap_us = self.mmap_us.saturating_add(duration_us)
196 }
197 "futex" => self.futex_us = self.futex_us.saturating_add(duration_us),
198 "ioctl" => self.ioctl_us = self.ioctl_us.saturating_add(duration_us),
199 "read" | "pread64" | "readv" => self.read_us = self.read_us.saturating_add(duration_us),
200 "write" | "pwrite64" | "writev" => {
201 self.write_us = self.write_us.saturating_add(duration_us)
202 }
203 _ => self.other_us = self.other_us.saturating_add(duration_us),
204 }
205 }
206
207 pub fn as_otlp_attributes(&self) -> HashMap<String, u64> {
209 let mut attrs = HashMap::new();
210 attrs.insert("syscall.mmap_us".to_string(), self.mmap_us);
211 attrs.insert("syscall.futex_us".to_string(), self.futex_us);
212 attrs.insert("syscall.ioctl_us".to_string(), self.ioctl_us);
213 attrs.insert("syscall.read_us".to_string(), self.read_us);
214 attrs.insert("syscall.write_us".to_string(), self.write_us);
215 attrs.insert("syscall.other_us".to_string(), self.other_us);
216 attrs.insert("syscall.compute_us".to_string(), self.compute_us());
217 attrs.insert("syscall.total_us".to_string(), self.total_us);
218 attrs
219 }
220}
221
222#[derive(Debug, Clone)]
224pub struct TraceResult {
225 pub brick_name: String,
227 pub budget_us: u64,
229 pub actual_us: u64,
231 pub reason: EscalationReason,
233 pub syscall_breakdown: SyscallBreakdown,
235 pub timestamp: Instant,
237}
238
239impl TraceResult {
240 pub fn over_budget(&self) -> bool {
242 self.actual_us > self.budget_us
243 }
244
245 pub fn efficiency(&self) -> f64 {
247 if self.actual_us == 0 {
248 return 100.0;
249 }
250 (self.budget_us as f64 / self.actual_us as f64) * 100.0
251 }
252
253 pub fn as_otlp_attributes(&self) -> HashMap<String, String> {
255 let mut attrs = HashMap::new();
256 attrs.insert("brick.name".to_string(), self.brick_name.clone());
257 attrs.insert("brick.budget_us".to_string(), self.budget_us.to_string());
258 attrs.insert("brick.actual_us".to_string(), self.actual_us.to_string());
259 attrs.insert(
260 "brick.efficiency".to_string(),
261 format!("{:.1}", self.efficiency()),
262 );
263 attrs.insert(
264 "brick.over_budget".to_string(),
265 self.over_budget().to_string(),
266 );
267 attrs.insert(
268 "escalation.reason".to_string(),
269 self.reason.otlp_value().to_string(),
270 );
271 attrs.insert(
272 "syscall.overhead_percent".to_string(),
273 format!("{:.1}", self.syscall_breakdown.syscall_overhead_percent()),
274 );
275 attrs.insert(
276 "syscall.dominant".to_string(),
277 self.syscall_breakdown.dominant_syscall().to_string(),
278 );
279 attrs
280 }
281}