use crate::kernel::KernelMetadata;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::sync::{Arc, RwLock};
use std::time::{Duration, Instant};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum SLOResult {
Met {
actual: f64,
target: f64,
headroom_pct: f64,
},
AtRisk {
actual: f64,
target: f64,
usage_pct: f64,
},
Violated {
actual: f64,
target: f64,
overage_pct: f64,
},
}
impl SLOResult {
#[must_use]
pub fn is_met(&self) -> bool {
matches!(self, SLOResult::Met { .. })
}
#[must_use]
pub fn is_at_risk(&self) -> bool {
matches!(self, SLOResult::AtRisk { .. })
}
#[must_use]
pub fn is_violated(&self) -> bool {
matches!(self, SLOResult::Violated { .. })
}
}
#[derive(Debug, Default)]
pub struct SLOValidator {
overrides: HashMap<String, SLOOverride>,
strict_mode: bool,
}
#[derive(Debug, Clone)]
pub struct SLOOverride {
pub throughput: Option<u64>,
pub latency_us: Option<f64>,
pub tolerance_pct: f64,
}
impl Default for SLOOverride {
fn default() -> Self {
Self {
throughput: None,
latency_us: None,
tolerance_pct: 10.0,
}
}
}
impl SLOValidator {
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[must_use]
pub fn with_strict_mode(mut self) -> Self {
self.strict_mode = true;
self
}
pub fn with_override(mut self, kernel_id: impl Into<String>, override_: SLOOverride) -> Self {
self.overrides.insert(kernel_id.into(), override_);
self
}
#[must_use]
pub fn validate_throughput(
&self,
metadata: &KernelMetadata,
actual_ops_per_sec: u64,
) -> SLOResult {
let target = self
.overrides
.get(&metadata.id)
.and_then(|o| o.throughput)
.unwrap_or(metadata.expected_throughput);
let tolerance_pct = self
.overrides
.get(&metadata.id)
.map(|o| o.tolerance_pct)
.unwrap_or(10.0);
let actual = actual_ops_per_sec as f64;
let target_f64 = target as f64;
if actual >= target_f64 {
let headroom = ((actual - target_f64) / target_f64) * 100.0;
SLOResult::Met {
actual,
target: target_f64,
headroom_pct: headroom,
}
} else {
let usage = (actual / target_f64) * 100.0;
if usage >= (100.0 - tolerance_pct) {
SLOResult::AtRisk {
actual,
target: target_f64,
usage_pct: usage,
}
} else {
let overage = ((target_f64 - actual) / target_f64) * 100.0;
SLOResult::Violated {
actual,
target: target_f64,
overage_pct: overage,
}
}
}
}
#[must_use]
pub fn validate_latency(&self, metadata: &KernelMetadata, actual_latency_us: f64) -> SLOResult {
let target = self
.overrides
.get(&metadata.id)
.and_then(|o| o.latency_us)
.unwrap_or(metadata.target_latency_us);
let tolerance_pct = self
.overrides
.get(&metadata.id)
.map(|o| o.tolerance_pct)
.unwrap_or(10.0);
if actual_latency_us <= target {
let headroom = ((target - actual_latency_us) / target) * 100.0;
SLOResult::Met {
actual: actual_latency_us,
target,
headroom_pct: headroom,
}
} else {
let usage = (actual_latency_us / target) * 100.0;
if usage <= (100.0 + tolerance_pct) {
SLOResult::AtRisk {
actual: actual_latency_us,
target,
usage_pct: usage,
}
} else {
let overage = ((actual_latency_us - target) / target) * 100.0;
SLOResult::Violated {
actual: actual_latency_us,
target,
overage_pct: overage,
}
}
}
}
#[must_use]
pub fn is_strict(&self) -> bool {
self.strict_mode
}
}
#[derive(Debug, Clone, Default)]
pub struct KernelMetrics {
pub operations: u64,
pub total_time: Duration,
pub min_latency: Option<Duration>,
pub max_latency: Option<Duration>,
pub latency_sum: Duration,
pub latency_count: u64,
}
impl KernelMetrics {
#[must_use]
pub fn new() -> Self {
Self::default()
}
pub fn record(&mut self, latency: Duration) {
self.operations += 1;
self.latency_count += 1;
self.latency_sum += latency;
match self.min_latency {
Some(min) if latency < min => self.min_latency = Some(latency),
None => self.min_latency = Some(latency),
_ => {}
}
match self.max_latency {
Some(max) if latency > max => self.max_latency = Some(latency),
None => self.max_latency = Some(latency),
_ => {}
}
}
#[must_use]
pub fn avg_latency(&self) -> Option<Duration> {
if self.latency_count > 0 {
Some(self.latency_sum / self.latency_count as u32)
} else {
None
}
}
#[must_use]
pub fn throughput(&self) -> f64 {
if self.total_time.is_zero() {
0.0
} else {
self.operations as f64 / self.total_time.as_secs_f64()
}
}
pub fn reset(&mut self) {
*self = Self::default();
}
}
#[derive(Debug, Clone)]
pub struct MetricsCollector {
metrics: Arc<RwLock<HashMap<String, KernelMetrics>>>,
}
impl MetricsCollector {
#[must_use]
pub fn new() -> Self {
Self {
metrics: Arc::new(RwLock::new(HashMap::new())),
}
}
pub fn record(&self, kernel_id: &str, latency: Duration) {
let mut metrics = self.metrics.write().unwrap();
metrics
.entry(kernel_id.to_string())
.or_default()
.record(latency);
}
#[must_use]
pub fn get(&self, kernel_id: &str) -> Option<KernelMetrics> {
let metrics = self.metrics.read().unwrap();
metrics.get(kernel_id).cloned()
}
#[must_use]
pub fn all(&self) -> HashMap<String, KernelMetrics> {
self.metrics.read().unwrap().clone()
}
pub fn reset(&self, kernel_id: &str) {
let mut metrics = self.metrics.write().unwrap();
if let Some(m) = metrics.get_mut(kernel_id) {
m.reset();
}
}
pub fn reset_all(&self) {
let mut metrics = self.metrics.write().unwrap();
metrics.clear();
}
}
impl Default for MetricsCollector {
fn default() -> Self {
Self::new()
}
}
pub struct TimingGuard<'a> {
collector: &'a MetricsCollector,
kernel_id: String,
start: Instant,
}
impl<'a> TimingGuard<'a> {
#[must_use]
pub fn new(collector: &'a MetricsCollector, kernel_id: impl Into<String>) -> Self {
Self {
collector,
kernel_id: kernel_id.into(),
start: Instant::now(),
}
}
}
impl<'a> Drop for TimingGuard<'a> {
fn drop(&mut self) {
let latency = self.start.elapsed();
self.collector.record(&self.kernel_id, latency);
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::domain::Domain;
use crate::kernel::KernelMetadata;
fn test_metadata() -> KernelMetadata {
KernelMetadata::ring("test-kernel", Domain::Core)
.with_throughput(100_000)
.with_latency_us(1.0)
}
#[test]
fn test_throughput_met() {
let validator = SLOValidator::new();
let metadata = test_metadata();
let result = validator.validate_throughput(&metadata, 120_000);
assert!(result.is_met());
if let SLOResult::Met { headroom_pct, .. } = result {
assert!((headroom_pct - 20.0).abs() < 0.1);
}
}
#[test]
fn test_throughput_at_risk() {
let validator = SLOValidator::new();
let metadata = test_metadata();
let result = validator.validate_throughput(&metadata, 95_000);
assert!(result.is_at_risk());
}
#[test]
fn test_throughput_violated() {
let validator = SLOValidator::new();
let metadata = test_metadata();
let result = validator.validate_throughput(&metadata, 50_000);
assert!(result.is_violated());
}
#[test]
fn test_latency_met() {
let validator = SLOValidator::new();
let metadata = test_metadata();
let result = validator.validate_latency(&metadata, 0.5);
assert!(result.is_met());
}
#[test]
fn test_latency_at_risk() {
let validator = SLOValidator::new();
let metadata = test_metadata();
let result = validator.validate_latency(&metadata, 1.05);
assert!(result.is_at_risk());
}
#[test]
fn test_latency_violated() {
let validator = SLOValidator::new();
let metadata = test_metadata();
let result = validator.validate_latency(&metadata, 2.0);
assert!(result.is_violated());
}
#[test]
fn test_metrics_recording() {
let collector = MetricsCollector::new();
collector.record("test", Duration::from_micros(100));
collector.record("test", Duration::from_micros(200));
collector.record("test", Duration::from_micros(150));
let metrics = collector.get("test").unwrap();
assert_eq!(metrics.operations, 3);
assert_eq!(metrics.min_latency, Some(Duration::from_micros(100)));
assert_eq!(metrics.max_latency, Some(Duration::from_micros(200)));
assert_eq!(metrics.avg_latency(), Some(Duration::from_micros(150)));
}
#[test]
fn test_slo_override() {
let validator = SLOValidator::new().with_override(
"test-kernel",
SLOOverride {
throughput: Some(50_000),
latency_us: None,
tolerance_pct: 5.0,
},
);
let metadata = test_metadata();
let result = validator.validate_throughput(&metadata, 60_000);
assert!(result.is_met());
}
}