1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::PathBuf;
9use std::sync::{Mutex, OnceLock};
10use std::time::{Duration, Instant};
11
12use crate::core::budget_tracker::BudgetTracker;
13use crate::core::events;
14
15#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct SloConfig {
21 #[serde(default)]
22 pub slo: Vec<SloDefinition>,
23}
24
25#[derive(Debug, Clone, Serialize, Deserialize)]
26pub struct SloDefinition {
27 pub name: String,
28 pub metric: SloMetric,
29 pub threshold: f64,
30 #[serde(default)]
31 pub direction: SloDirection,
32 #[serde(default)]
33 pub action: SloAction,
34}
35
36#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
37#[serde(rename_all = "snake_case")]
38pub enum SloMetric {
39 SessionContextTokens,
40 SessionCostUsd,
41 CompressionRatio,
42 ShellInvocations,
43 ToolCallsTotal,
44 ToolCallCount,
45}
46
47#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
48#[serde(rename_all = "snake_case")]
49pub enum SloDirection {
50 #[default]
51 Max,
52 Min,
53}
54
55#[derive(Debug, Clone, Copy, Default, PartialEq, Eq, Serialize, Deserialize)]
56#[serde(rename_all = "snake_case")]
57pub enum SloAction {
58 #[default]
59 Warn,
60 Throttle,
61 Block,
62}
63
64#[derive(Debug, Clone, Serialize)]
69pub struct SloStatus {
70 pub name: String,
71 pub metric: SloMetric,
72 pub threshold: f64,
73 pub actual: f64,
74 pub direction: SloDirection,
75 pub action: SloAction,
76 pub violated: bool,
77}
78
79#[derive(Debug, Clone, Serialize)]
80pub struct SloSnapshot {
81 pub slos: Vec<SloStatus>,
82 pub violations: Vec<SloStatus>,
83 pub worst_action: Option<SloAction>,
84}
85
86#[derive(Debug, Default)]
87struct ViolationHistory {
88 entries: Vec<ViolationEntry>,
89}
90
91#[derive(Debug, Clone, Serialize)]
92pub struct ViolationEntry {
93 pub timestamp: String,
94 pub slo_name: String,
95 pub metric: SloMetric,
96 pub threshold: f64,
97 pub actual: f64,
98 pub action: SloAction,
99}
100
101static SLO_CONFIG: OnceLock<Mutex<Vec<SloDefinition>>> = OnceLock::new();
102static VIOLATION_LOG: OnceLock<Mutex<ViolationHistory>> = OnceLock::new();
103static EMIT_STATE: OnceLock<Mutex<HashMap<String, EmitState>>> = OnceLock::new();
104
105const VIOLATION_DEBOUNCE: Duration = Duration::from_secs(30);
106
107#[derive(Debug, Default, Clone)]
108struct EmitState {
109 last_violated: bool,
110 last_emit: Option<Instant>,
111}
112
113fn config_store() -> &'static Mutex<Vec<SloDefinition>> {
114 SLO_CONFIG.get_or_init(|| Mutex::new(load_slos_from_disk()))
115}
116
117fn violation_store() -> &'static Mutex<ViolationHistory> {
118 VIOLATION_LOG.get_or_init(|| Mutex::new(ViolationHistory::default()))
119}
120
121fn emit_state_store() -> &'static Mutex<HashMap<String, EmitState>> {
122 EMIT_STATE.get_or_init(|| Mutex::new(HashMap::new()))
123}
124
125fn slo_toml_paths() -> Vec<PathBuf> {
130 let mut paths = Vec::new();
131
132 if let Ok(dir) = crate::core::data_dir::lean_ctx_data_dir() {
133 paths.push(dir.join("slos.toml"));
134 }
135
136 if let Ok(home) = std::env::var("HOME").or_else(|_| std::env::var("USERPROFILE")) {
137 paths.push(PathBuf::from(home).join(".lean-ctx").join("slos.toml"));
138 }
139
140 if let Ok(cwd) = std::env::current_dir() {
141 paths.push(cwd.join(".lean-ctx").join("slos.toml"));
142 }
143
144 paths
145}
146
147fn load_slos_from_disk() -> Vec<SloDefinition> {
148 for path in slo_toml_paths() {
149 if let Ok(content) = std::fs::read_to_string(&path) {
150 match toml::from_str::<SloConfig>(&content) {
151 Ok(cfg) => return cfg.slo,
152 Err(e) => {
153 eprintln!("[lean-ctx] slo: parse error in {}: {e}", path.display());
154 }
155 }
156 }
157 }
158 default_slos()
159}
160
161fn default_slos() -> Vec<SloDefinition> {
162 vec![
163 SloDefinition {
164 name: "context_budget".into(),
165 metric: SloMetric::SessionContextTokens,
166 threshold: 200_000.0,
167 direction: SloDirection::Max,
168 action: SloAction::Warn,
169 },
170 SloDefinition {
171 name: "cost_per_session".into(),
172 metric: SloMetric::SessionCostUsd,
173 threshold: 5.0,
174 direction: SloDirection::Max,
175 action: SloAction::Throttle,
176 },
177 SloDefinition {
178 name: "compression_efficiency".into(),
179 metric: SloMetric::CompressionRatio,
180 threshold: 0.90,
184 direction: SloDirection::Max,
185 action: SloAction::Warn,
186 },
187 ]
188}
189
190pub fn reload() {
191 let fresh = load_slos_from_disk();
192 if let Ok(mut store) = config_store().lock() {
193 *store = fresh;
194 }
195}
196
197pub fn active_slos() -> Vec<SloDefinition> {
198 config_store().lock().map(|s| s.clone()).unwrap_or_default()
199}
200
201fn read_metric(metric: SloMetric) -> f64 {
206 let tracker = BudgetTracker::global();
207 match metric {
208 SloMetric::SessionContextTokens => tracker.tokens_used() as f64,
209 SloMetric::SessionCostUsd => tracker.cost_usd(),
210 SloMetric::ShellInvocations => tracker.shell_used() as f64,
211 SloMetric::CompressionRatio => {
212 let ledger = crate::core::context_ledger::ContextLedger::load();
213 let total_original: usize = ledger.entries.iter().map(|e| e.original_tokens).sum();
214 if total_original < 5000 {
215 0.0
216 } else {
217 ledger.compression_ratio()
218 }
219 }
220 SloMetric::ToolCallsTotal | SloMetric::ToolCallCount => tracker.tool_calls_count() as f64,
221 }
222}
223
224fn is_violated(actual: f64, threshold: f64, direction: SloDirection) -> bool {
225 match direction {
226 SloDirection::Max => actual > threshold,
227 SloDirection::Min => actual < threshold,
228 }
229}
230
231pub fn evaluate() -> SloSnapshot {
232 let defs = active_slos();
233 let mut slos = Vec::with_capacity(defs.len());
234 let mut violations = Vec::new();
235 let now = Instant::now();
236 let mut emit_state = emit_state_store()
237 .lock()
238 .unwrap_or_else(std::sync::PoisonError::into_inner);
239
240 for def in &defs {
241 let actual = read_metric(def.metric);
242 let violated = is_violated(actual, def.threshold, def.direction);
243
244 let status = SloStatus {
245 name: def.name.clone(),
246 metric: def.metric,
247 threshold: def.threshold,
248 actual,
249 direction: def.direction,
250 action: def.action,
251 violated,
252 };
253
254 if violated {
255 let st = emit_state.entry(def.name.clone()).or_default();
256 let is_first = !st.last_violated;
257 let is_due = st
258 .last_emit
259 .is_none_or(|t| t.elapsed() >= VIOLATION_DEBOUNCE);
260 if is_first || is_due {
261 st.last_emit = Some(now);
262 record_violation(&status);
263 emit_slo_event(&status);
264 }
265 st.last_violated = true;
266 violations.push(status.clone());
267 } else if let Some(st) = emit_state.get_mut(&def.name) {
268 st.last_violated = false;
269 }
270
271 slos.push(status);
272 }
273
274 let worst_action = violations.iter().map(|v| v.action).max_by_key(|a| match a {
275 SloAction::Warn => 0,
276 SloAction::Throttle => 1,
277 SloAction::Block => 2,
278 });
279
280 SloSnapshot {
281 slos,
282 violations,
283 worst_action,
284 }
285}
286
287pub fn evaluate_quiet() -> SloSnapshot {
288 crate::core::verification_observability::record_slo_eval();
290 let defs = active_slos();
291 let mut slos = Vec::with_capacity(defs.len());
292 let mut violations = Vec::new();
293
294 for def in &defs {
295 let actual = read_metric(def.metric);
296 let violated = is_violated(actual, def.threshold, def.direction);
297
298 let status = SloStatus {
299 name: def.name.clone(),
300 metric: def.metric,
301 threshold: def.threshold,
302 actual,
303 direction: def.direction,
304 action: def.action,
305 violated,
306 };
307
308 if violated {
309 violations.push(status.clone());
310 }
311 slos.push(status);
312 }
313
314 let worst_action = violations.iter().map(|v| v.action).max_by_key(|a| match a {
315 SloAction::Warn => 0,
316 SloAction::Throttle => 1,
317 SloAction::Block => 2,
318 });
319
320 SloSnapshot {
321 slos,
322 violations,
323 worst_action,
324 }
325}
326
327fn record_violation(status: &SloStatus) {
328 if let Ok(mut hist) = violation_store().lock() {
329 let entry = ViolationEntry {
330 timestamp: chrono::Local::now()
331 .format("%Y-%m-%dT%H:%M:%S%.3f")
332 .to_string(),
333 slo_name: status.name.clone(),
334 metric: status.metric,
335 threshold: status.threshold,
336 actual: status.actual,
337 action: status.action,
338 };
339 hist.entries.push(entry);
340 if hist.entries.len() > 500 {
341 let excess = hist.entries.len() - 500;
342 hist.entries.drain(..excess);
343 }
344 }
345}
346
347fn emit_slo_event(status: &SloStatus) {
348 events::emit(events::EventKind::SloViolation {
349 slo_name: status.name.clone(),
350 metric: format!("{:?}", status.metric),
351 threshold: status.threshold,
352 actual: status.actual,
353 action: format!("{:?}", status.action),
354 });
355}
356
357pub fn violation_history(limit: usize) -> Vec<ViolationEntry> {
358 violation_store()
359 .lock()
360 .map(|h| {
361 let start = h.entries.len().saturating_sub(limit);
362 h.entries[start..].to_vec()
363 })
364 .unwrap_or_default()
365}
366
367pub fn clear_violations() {
368 if let Ok(mut hist) = violation_store().lock() {
369 hist.entries.clear();
370 }
371}
372
373impl SloSnapshot {
378 pub fn format_compact(&self) -> String {
379 let total = self.slos.len();
380 let violated = self.violations.len();
381 let mut out = format!("SLOs: {}/{} passing", total - violated, total);
382
383 for v in &self.violations {
384 out.push_str(&format!(
385 "\n !! {} ({:?}): {:.2} vs threshold {:.2} → {:?}",
386 v.name, v.metric, v.actual, v.threshold, v.action
387 ));
388 }
389
390 out
391 }
392
393 pub fn should_block(&self) -> bool {
394 self.worst_action == Some(SloAction::Block)
395 }
396
397 pub fn should_throttle(&self) -> bool {
398 matches!(
399 self.worst_action,
400 Some(SloAction::Throttle | SloAction::Block)
401 )
402 }
403}
404
405impl std::fmt::Display for SloMetric {
406 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
407 match self {
408 Self::SessionContextTokens => write!(f, "session_context_tokens"),
409 Self::SessionCostUsd => write!(f, "session_cost_usd"),
410 Self::CompressionRatio => write!(f, "compression_ratio"),
411 Self::ShellInvocations => write!(f, "shell_invocations"),
412 Self::ToolCallsTotal => write!(f, "tool_calls_total"),
413 Self::ToolCallCount => write!(f, "tool_call_count"),
414 }
415 }
416}
417
418impl std::fmt::Display for SloAction {
419 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
420 match self {
421 Self::Warn => write!(f, "warn"),
422 Self::Throttle => write!(f, "throttle"),
423 Self::Block => write!(f, "block"),
424 }
425 }
426}
427
428#[cfg(test)]
433mod tests {
434 use super::*;
435
436 #[test]
437 fn default_slos_are_valid() {
438 let defs = default_slos();
439 assert_eq!(defs.len(), 3);
440 assert_eq!(defs[0].name, "context_budget");
441 assert_eq!(defs[1].action, SloAction::Throttle);
442 assert_eq!(defs[2].direction, SloDirection::Max);
443 }
444
445 #[test]
446 fn violation_detection_max() {
447 assert!(is_violated(60_000.0, 50_000.0, SloDirection::Max));
448 assert!(!is_violated(40_000.0, 50_000.0, SloDirection::Max));
449 }
450
451 #[test]
452 fn violation_detection_min() {
453 assert!(is_violated(0.2, 0.3, SloDirection::Min));
454 assert!(!is_violated(0.5, 0.3, SloDirection::Min));
455 }
456
457 #[test]
458 fn slo_config_parses_from_toml() {
459 let toml_str = r#"
460[[slo]]
461name = "test_budget"
462metric = "session_context_tokens"
463threshold = 100000
464action = "warn"
465
466[[slo]]
467name = "test_cost"
468metric = "session_cost_usd"
469threshold = 2.0
470action = "block"
471direction = "max"
472"#;
473 let cfg: SloConfig = toml::from_str(toml_str).unwrap();
474 assert_eq!(cfg.slo.len(), 2);
475 assert_eq!(cfg.slo[0].name, "test_budget");
476 assert_eq!(cfg.slo[0].metric, SloMetric::SessionContextTokens);
477 assert_eq!(cfg.slo[1].action, SloAction::Block);
478 }
479
480 #[test]
481 fn snapshot_format_compact() {
482 let snap = SloSnapshot {
483 slos: vec![
484 SloStatus {
485 name: "budget".into(),
486 metric: SloMetric::SessionContextTokens,
487 threshold: 50000.0,
488 actual: 30000.0,
489 direction: SloDirection::Max,
490 action: SloAction::Warn,
491 violated: false,
492 },
493 SloStatus {
494 name: "cost".into(),
495 metric: SloMetric::SessionCostUsd,
496 threshold: 1.0,
497 actual: 2.5,
498 direction: SloDirection::Max,
499 action: SloAction::Block,
500 violated: true,
501 },
502 ],
503 violations: vec![SloStatus {
504 name: "cost".into(),
505 metric: SloMetric::SessionCostUsd,
506 threshold: 1.0,
507 actual: 2.5,
508 direction: SloDirection::Max,
509 action: SloAction::Block,
510 violated: true,
511 }],
512 worst_action: Some(SloAction::Block),
513 };
514 let out = snap.format_compact();
515 assert!(out.contains("1/2 passing"));
516 assert!(out.contains("cost"));
517 assert!(snap.should_block());
518 }
519
520 #[test]
521 fn snapshot_no_violations() {
522 let snap = SloSnapshot {
523 slos: vec![SloStatus {
524 name: "ok".into(),
525 metric: SloMetric::SessionContextTokens,
526 threshold: 100_000.0,
527 actual: 5000.0,
528 direction: SloDirection::Max,
529 action: SloAction::Warn,
530 violated: false,
531 }],
532 violations: vec![],
533 worst_action: None,
534 };
535 assert!(!snap.should_block());
536 assert!(!snap.should_throttle());
537 assert!(snap.format_compact().contains("1/1 passing"));
538 }
539
540 #[test]
541 fn violation_history_capped() {
542 clear_violations();
543 for i in 0..10 {
544 record_violation(&SloStatus {
545 name: format!("slo_{i}"),
546 metric: SloMetric::SessionContextTokens,
547 threshold: 100.0,
548 actual: 200.0,
549 direction: SloDirection::Max,
550 action: SloAction::Warn,
551 violated: true,
552 });
553 }
554 let hist = violation_history(5);
555 assert_eq!(hist.len(), 5);
556 assert_eq!(hist[0].slo_name, "slo_5");
557 }
558}