1use crate::kernel::KernelMetadata;
7use serde::{Deserialize, Serialize};
8use std::collections::HashMap;
9use std::sync::{Arc, RwLock};
10use std::time::{Duration, Instant};
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
14pub enum SLOResult {
15 Met {
17 actual: f64,
19 target: f64,
21 headroom_pct: f64,
23 },
24 AtRisk {
26 actual: f64,
28 target: f64,
30 usage_pct: f64,
32 },
33 Violated {
35 actual: f64,
37 target: f64,
39 overage_pct: f64,
41 },
42}
43
44impl SLOResult {
45 #[must_use]
47 pub fn is_met(&self) -> bool {
48 matches!(self, SLOResult::Met { .. })
49 }
50
51 #[must_use]
53 pub fn is_at_risk(&self) -> bool {
54 matches!(self, SLOResult::AtRisk { .. })
55 }
56
57 #[must_use]
59 pub fn is_violated(&self) -> bool {
60 matches!(self, SLOResult::Violated { .. })
61 }
62}
63
64#[derive(Debug, Default)]
66pub struct SLOValidator {
67 overrides: HashMap<String, SLOOverride>,
69 strict_mode: bool,
71}
72
73#[derive(Debug, Clone)]
75pub struct SLOOverride {
76 pub throughput: Option<u64>,
78 pub latency_us: Option<f64>,
80 pub tolerance_pct: f64,
82}
83
84impl Default for SLOOverride {
85 fn default() -> Self {
86 Self {
87 throughput: None,
88 latency_us: None,
89 tolerance_pct: 10.0,
90 }
91 }
92}
93
94impl SLOValidator {
95 #[must_use]
97 pub fn new() -> Self {
98 Self::default()
99 }
100
101 #[must_use]
103 pub fn with_strict_mode(mut self) -> Self {
104 self.strict_mode = true;
105 self
106 }
107
108 pub fn with_override(mut self, kernel_id: impl Into<String>, override_: SLOOverride) -> Self {
110 self.overrides.insert(kernel_id.into(), override_);
111 self
112 }
113
114 #[must_use]
116 pub fn validate_throughput(
117 &self,
118 metadata: &KernelMetadata,
119 actual_ops_per_sec: u64,
120 ) -> SLOResult {
121 let target = self
122 .overrides
123 .get(&metadata.id)
124 .and_then(|o| o.throughput)
125 .unwrap_or(metadata.expected_throughput);
126
127 let tolerance_pct = self
128 .overrides
129 .get(&metadata.id)
130 .map(|o| o.tolerance_pct)
131 .unwrap_or(10.0);
132
133 let actual = actual_ops_per_sec as f64;
134 let target_f64 = target as f64;
135
136 if actual >= target_f64 {
138 let headroom = ((actual - target_f64) / target_f64) * 100.0;
139 SLOResult::Met {
140 actual,
141 target: target_f64,
142 headroom_pct: headroom,
143 }
144 } else {
145 let usage = (actual / target_f64) * 100.0;
146 if usage >= (100.0 - tolerance_pct) {
147 SLOResult::AtRisk {
148 actual,
149 target: target_f64,
150 usage_pct: usage,
151 }
152 } else {
153 let overage = ((target_f64 - actual) / target_f64) * 100.0;
154 SLOResult::Violated {
155 actual,
156 target: target_f64,
157 overage_pct: overage,
158 }
159 }
160 }
161 }
162
163 #[must_use]
165 pub fn validate_latency(&self, metadata: &KernelMetadata, actual_latency_us: f64) -> SLOResult {
166 let target = self
167 .overrides
168 .get(&metadata.id)
169 .and_then(|o| o.latency_us)
170 .unwrap_or(metadata.target_latency_us);
171
172 let tolerance_pct = self
173 .overrides
174 .get(&metadata.id)
175 .map(|o| o.tolerance_pct)
176 .unwrap_or(10.0);
177
178 if actual_latency_us <= target {
180 let headroom = ((target - actual_latency_us) / target) * 100.0;
181 SLOResult::Met {
182 actual: actual_latency_us,
183 target,
184 headroom_pct: headroom,
185 }
186 } else {
187 let usage = (actual_latency_us / target) * 100.0;
188 if usage <= (100.0 + tolerance_pct) {
189 SLOResult::AtRisk {
190 actual: actual_latency_us,
191 target,
192 usage_pct: usage,
193 }
194 } else {
195 let overage = ((actual_latency_us - target) / target) * 100.0;
196 SLOResult::Violated {
197 actual: actual_latency_us,
198 target,
199 overage_pct: overage,
200 }
201 }
202 }
203 }
204
205 #[must_use]
207 pub fn is_strict(&self) -> bool {
208 self.strict_mode
209 }
210}
211
212#[derive(Debug, Clone, Default)]
214pub struct KernelMetrics {
215 pub operations: u64,
217 pub total_time: Duration,
219 pub min_latency: Option<Duration>,
221 pub max_latency: Option<Duration>,
223 pub latency_sum: Duration,
225 pub latency_count: u64,
227}
228
229impl KernelMetrics {
230 #[must_use]
232 pub fn new() -> Self {
233 Self::default()
234 }
235
236 pub fn record(&mut self, latency: Duration) {
238 self.operations += 1;
239 self.latency_count += 1;
240 self.latency_sum += latency;
241
242 match self.min_latency {
243 Some(min) if latency < min => self.min_latency = Some(latency),
244 None => self.min_latency = Some(latency),
245 _ => {}
246 }
247
248 match self.max_latency {
249 Some(max) if latency > max => self.max_latency = Some(latency),
250 None => self.max_latency = Some(latency),
251 _ => {}
252 }
253 }
254
255 #[must_use]
257 pub fn avg_latency(&self) -> Option<Duration> {
258 if self.latency_count > 0 {
259 Some(self.latency_sum / self.latency_count as u32)
260 } else {
261 None
262 }
263 }
264
265 #[must_use]
267 pub fn throughput(&self) -> f64 {
268 if self.total_time.is_zero() {
269 0.0
270 } else {
271 self.operations as f64 / self.total_time.as_secs_f64()
272 }
273 }
274
275 pub fn reset(&mut self) {
277 *self = Self::default();
278 }
279}
280
281#[derive(Debug, Clone)]
283pub struct MetricsCollector {
284 metrics: Arc<RwLock<HashMap<String, KernelMetrics>>>,
285}
286
287impl MetricsCollector {
288 #[must_use]
290 pub fn new() -> Self {
291 Self {
292 metrics: Arc::new(RwLock::new(HashMap::new())),
293 }
294 }
295
296 pub fn record(&self, kernel_id: &str, latency: Duration) {
298 let mut metrics = self.metrics.write().unwrap();
299 metrics
300 .entry(kernel_id.to_string())
301 .or_default()
302 .record(latency);
303 }
304
305 #[must_use]
307 pub fn get(&self, kernel_id: &str) -> Option<KernelMetrics> {
308 let metrics = self.metrics.read().unwrap();
309 metrics.get(kernel_id).cloned()
310 }
311
312 #[must_use]
314 pub fn all(&self) -> HashMap<String, KernelMetrics> {
315 self.metrics.read().unwrap().clone()
316 }
317
318 pub fn reset(&self, kernel_id: &str) {
320 let mut metrics = self.metrics.write().unwrap();
321 if let Some(m) = metrics.get_mut(kernel_id) {
322 m.reset();
323 }
324 }
325
326 pub fn reset_all(&self) {
328 let mut metrics = self.metrics.write().unwrap();
329 metrics.clear();
330 }
331}
332
333impl Default for MetricsCollector {
334 fn default() -> Self {
335 Self::new()
336 }
337}
338
339pub struct TimingGuard<'a> {
341 collector: &'a MetricsCollector,
342 kernel_id: String,
343 start: Instant,
344}
345
346impl<'a> TimingGuard<'a> {
347 #[must_use]
349 pub fn new(collector: &'a MetricsCollector, kernel_id: impl Into<String>) -> Self {
350 Self {
351 collector,
352 kernel_id: kernel_id.into(),
353 start: Instant::now(),
354 }
355 }
356}
357
358impl<'a> Drop for TimingGuard<'a> {
359 fn drop(&mut self) {
360 let latency = self.start.elapsed();
361 self.collector.record(&self.kernel_id, latency);
362 }
363}
364
365#[cfg(test)]
366mod tests {
367 use super::*;
368 use crate::domain::Domain;
369 use crate::kernel::KernelMetadata;
370
371 fn test_metadata() -> KernelMetadata {
372 KernelMetadata::ring("test-kernel", Domain::Core)
373 .with_throughput(100_000)
374 .with_latency_us(1.0)
375 }
376
377 #[test]
378 fn test_throughput_met() {
379 let validator = SLOValidator::new();
380 let metadata = test_metadata();
381
382 let result = validator.validate_throughput(&metadata, 120_000);
383 assert!(result.is_met());
384
385 if let SLOResult::Met { headroom_pct, .. } = result {
386 assert!((headroom_pct - 20.0).abs() < 0.1);
387 }
388 }
389
390 #[test]
391 fn test_throughput_at_risk() {
392 let validator = SLOValidator::new();
393 let metadata = test_metadata();
394
395 let result = validator.validate_throughput(&metadata, 95_000);
396 assert!(result.is_at_risk());
397 }
398
399 #[test]
400 fn test_throughput_violated() {
401 let validator = SLOValidator::new();
402 let metadata = test_metadata();
403
404 let result = validator.validate_throughput(&metadata, 50_000);
405 assert!(result.is_violated());
406 }
407
408 #[test]
409 fn test_latency_met() {
410 let validator = SLOValidator::new();
411 let metadata = test_metadata();
412
413 let result = validator.validate_latency(&metadata, 0.5);
414 assert!(result.is_met());
415 }
416
417 #[test]
418 fn test_latency_at_risk() {
419 let validator = SLOValidator::new();
420 let metadata = test_metadata();
421
422 let result = validator.validate_latency(&metadata, 1.05);
423 assert!(result.is_at_risk());
424 }
425
426 #[test]
427 fn test_latency_violated() {
428 let validator = SLOValidator::new();
429 let metadata = test_metadata();
430
431 let result = validator.validate_latency(&metadata, 2.0);
432 assert!(result.is_violated());
433 }
434
435 #[test]
436 fn test_metrics_recording() {
437 let collector = MetricsCollector::new();
438
439 collector.record("test", Duration::from_micros(100));
440 collector.record("test", Duration::from_micros(200));
441 collector.record("test", Duration::from_micros(150));
442
443 let metrics = collector.get("test").unwrap();
444 assert_eq!(metrics.operations, 3);
445 assert_eq!(metrics.min_latency, Some(Duration::from_micros(100)));
446 assert_eq!(metrics.max_latency, Some(Duration::from_micros(200)));
447 assert_eq!(metrics.avg_latency(), Some(Duration::from_micros(150)));
448 }
449
450 #[test]
451 fn test_slo_override() {
452 let validator = SLOValidator::new().with_override(
453 "test-kernel",
454 SLOOverride {
455 throughput: Some(50_000),
456 latency_us: None,
457 tolerance_pct: 5.0,
458 },
459 );
460
461 let metadata = test_metadata();
462
463 let result = validator.validate_throughput(&metadata, 60_000);
465 assert!(result.is_met());
466 }
467}