1use anyhow::Result;
2use chrono::{DateTime, Utc};
3use prometheus::{Counter, Gauge, Histogram, Registry};
4use serde::{Deserialize, Serialize};
5use sqlx::PgPool;
6use std::collections::HashMap;
7use std::sync::Arc;
8use tokio::sync::RwLock;
9
10#[derive(Debug, Clone, Serialize, Deserialize)]
11pub struct PerformanceAlert {
12 pub id: String,
13 pub metric_name: String,
14 pub threshold_type: ThresholdType,
15 pub threshold_value: f64,
16 pub current_value: f64,
17 pub severity: AlertSeverity,
18 pub message: String,
19 pub timestamp: DateTime<Utc>,
20 pub resolved: bool,
21}
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub enum ThresholdType {
25 GreaterThan,
26 LessThan,
27 PercentageIncrease,
28 PercentageDecrease,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
32pub enum AlertSeverity {
33 Info,
34 Warning,
35 Critical,
36}
37
38#[derive(Debug, Clone, Serialize, Deserialize)]
39pub struct DashboardConfig {
40 pub alert_thresholds: HashMap<String, AlertThreshold>,
41 pub monitoring_interval_seconds: u64,
42 pub retention_days: u32,
43 pub enable_auto_scaling: bool,
44 pub performance_targets: PerformanceTargets,
45}
46
47#[derive(Debug, Clone, Serialize, Deserialize)]
48pub struct AlertThreshold {
49 pub warning_threshold: f64,
50 pub critical_threshold: f64,
51 pub threshold_type: ThresholdType,
52 pub enabled: bool,
53}
54
55#[derive(Debug, Clone, Serialize, Deserialize)]
56pub struct PerformanceTargets {
57 pub p95_latency_ms: f64, pub token_reduction_percent: f64, pub memory_headroom_percent: f64, pub batch_throughput_ops_sec: f64,
61 pub cache_hit_ratio: f64,
62 pub connection_pool_usage: f64,
63}
64
65#[derive(Debug)]
66pub struct PerformanceDashboard {
67 config: DashboardConfig,
68 pool: PgPool,
69 registry: Arc<Registry>,
70 alerts: Arc<RwLock<Vec<PerformanceAlert>>>,
71
72 latency_histogram: Histogram,
74 throughput_gauge: Gauge,
75 memory_usage_gauge: Gauge,
76 token_reduction_gauge: Gauge,
77 alert_counter: Counter,
78}
79
80impl Default for DashboardConfig {
81 fn default() -> Self {
82 let mut alert_thresholds = HashMap::new();
83
84 alert_thresholds.insert(
86 "p95_latency_ms".to_string(),
87 AlertThreshold {
88 warning_threshold: 1500.0, critical_threshold: 2000.0, threshold_type: ThresholdType::GreaterThan,
91 enabled: true,
92 },
93 );
94
95 alert_thresholds.insert(
96 "memory_headroom_percent".to_string(),
97 AlertThreshold {
98 warning_threshold: 25.0, critical_threshold: 20.0, threshold_type: ThresholdType::LessThan,
101 enabled: true,
102 },
103 );
104
105 alert_thresholds.insert(
106 "token_reduction_percent".to_string(),
107 AlertThreshold {
108 warning_threshold: 85.0, critical_threshold: 90.0, threshold_type: ThresholdType::LessThan,
111 enabled: true,
112 },
113 );
114
115 alert_thresholds.insert(
116 "connection_pool_usage".to_string(),
117 AlertThreshold {
118 warning_threshold: 70.0, critical_threshold: 85.0, threshold_type: ThresholdType::GreaterThan,
121 enabled: true,
122 },
123 );
124
125 alert_thresholds.insert(
126 "batch_throughput_regression".to_string(),
127 AlertThreshold {
128 warning_threshold: 15.0, critical_threshold: 25.0, threshold_type: ThresholdType::PercentageDecrease,
131 enabled: true,
132 },
133 );
134
135 Self {
136 alert_thresholds,
137 monitoring_interval_seconds: 60,
138 retention_days: 30,
139 enable_auto_scaling: true,
140 performance_targets: PerformanceTargets {
141 p95_latency_ms: 2000.0,
142 token_reduction_percent: 90.0,
143 memory_headroom_percent: 20.0,
144 batch_throughput_ops_sec: 1000.0,
145 cache_hit_ratio: 0.9,
146 connection_pool_usage: 0.7,
147 },
148 }
149 }
150}
151
152impl PerformanceDashboard {
153 pub fn new(config: DashboardConfig, pool: PgPool) -> Result<Self> {
154 let registry = Arc::new(Registry::new());
155
156 let latency_histogram = Histogram::with_opts(
157 prometheus::HistogramOpts::new(
158 "memory_operation_duration_seconds",
159 "Duration of memory operations in seconds",
160 )
161 .buckets(vec![0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0, 2.0, 5.0]),
162 )?;
163
164 let throughput_gauge = Gauge::with_opts(prometheus::Opts::new(
165 "memory_operations_per_second",
166 "Number of memory operations per second",
167 ))?;
168
169 let memory_usage_gauge = Gauge::with_opts(prometheus::Opts::new(
170 "memory_headroom_percentage",
171 "Available memory headroom as percentage",
172 ))?;
173
174 let token_reduction_gauge = Gauge::with_opts(prometheus::Opts::new(
175 "token_reduction_percentage",
176 "Token reduction percentage vs full context",
177 ))?;
178
179 let alert_counter = Counter::with_opts(prometheus::Opts::new(
180 "performance_alerts_total",
181 "Total number of performance alerts triggered",
182 ))?;
183
184 registry.register(Box::new(latency_histogram.clone()))?;
185 registry.register(Box::new(throughput_gauge.clone()))?;
186 registry.register(Box::new(memory_usage_gauge.clone()))?;
187 registry.register(Box::new(token_reduction_gauge.clone()))?;
188 registry.register(Box::new(alert_counter.clone()))?;
189
190 Ok(Self {
191 config,
192 pool,
193 registry,
194 alerts: Arc::new(RwLock::new(Vec::new())),
195 latency_histogram,
196 throughput_gauge,
197 memory_usage_gauge,
198 token_reduction_gauge,
199 alert_counter,
200 })
201 }
202
203 pub fn record_latency(&self, duration_seconds: f64) {
205 self.latency_histogram.observe(duration_seconds);
206
207 let p95_latency_ms = duration_seconds * 1000.0;
209 tokio::spawn({
210 let dashboard = self.clone();
211 async move {
212 dashboard.check_latency_threshold(p95_latency_ms).await;
213 }
214 });
215 }
216
217 pub fn record_throughput(&self, ops_per_second: f64) {
219 self.throughput_gauge.set(ops_per_second);
220 }
221
222 pub fn record_memory_headroom(&self, headroom_percent: f64) {
224 self.memory_usage_gauge.set(headroom_percent);
225
226 tokio::spawn({
228 let dashboard = self.clone();
229 async move {
230 dashboard
231 .check_memory_headroom_threshold(headroom_percent)
232 .await;
233 }
234 });
235 }
236
237 pub fn record_token_reduction(&self, reduction_percent: f64) {
239 self.token_reduction_gauge.set(reduction_percent);
240
241 tokio::spawn({
243 let dashboard = self.clone();
244 async move {
245 dashboard
246 .check_token_reduction_threshold(reduction_percent)
247 .await;
248 }
249 });
250 }
251
252 async fn check_latency_threshold(&self, latency_ms: f64) {
254 if let Some(threshold) = self.config.alert_thresholds.get("p95_latency_ms") {
255 if !threshold.enabled {
256 return;
257 }
258
259 let severity = if latency_ms > threshold.critical_threshold {
260 AlertSeverity::Critical
261 } else if latency_ms > threshold.warning_threshold {
262 AlertSeverity::Warning
263 } else {
264 return;
265 };
266
267 let alert = PerformanceAlert {
268 id: uuid::Uuid::new_v4().to_string(),
269 metric_name: "p95_latency_ms".to_string(),
270 threshold_type: ThresholdType::GreaterThan,
271 threshold_value: match severity {
272 AlertSeverity::Critical => threshold.critical_threshold,
273 _ => threshold.warning_threshold,
274 },
275 current_value: latency_ms,
276 severity,
277 message: format!(
278 "P95 latency {latency_ms:.1}ms exceeds threshold. Story 10 requirement: <2000ms"
279 ),
280 timestamp: Utc::now(),
281 resolved: false,
282 };
283
284 self.trigger_alert(alert).await;
285 }
286 }
287
288 async fn check_memory_headroom_threshold(&self, headroom_percent: f64) {
290 if let Some(threshold) = self.config.alert_thresholds.get("memory_headroom_percent") {
291 if !threshold.enabled || headroom_percent >= threshold.warning_threshold {
292 return;
293 }
294
295 let severity = if headroom_percent < threshold.critical_threshold {
296 AlertSeverity::Critical
297 } else {
298 AlertSeverity::Warning
299 };
300
301 let alert = PerformanceAlert {
302 id: uuid::Uuid::new_v4().to_string(),
303 metric_name: "memory_headroom_percent".to_string(),
304 threshold_type: ThresholdType::LessThan,
305 threshold_value: match severity {
306 AlertSeverity::Critical => threshold.critical_threshold,
307 _ => threshold.warning_threshold,
308 },
309 current_value: headroom_percent,
310 severity,
311 message: format!(
312 "Memory headroom {headroom_percent:.1}% below threshold. Story 10 requirement: ≥20%"
313 ),
314 timestamp: Utc::now(),
315 resolved: false,
316 };
317
318 self.trigger_alert(alert).await;
319 }
320 }
321
322 async fn check_token_reduction_threshold(&self, reduction_percent: f64) {
324 if let Some(threshold) = self.config.alert_thresholds.get("token_reduction_percent") {
325 if !threshold.enabled || reduction_percent >= threshold.critical_threshold {
326 return;
327 }
328
329 let severity = if reduction_percent < threshold.critical_threshold {
330 AlertSeverity::Critical
331 } else if reduction_percent < threshold.warning_threshold {
332 AlertSeverity::Warning
333 } else {
334 return;
335 };
336
337 let alert = PerformanceAlert {
338 id: uuid::Uuid::new_v4().to_string(),
339 metric_name: "token_reduction_percent".to_string(),
340 threshold_type: ThresholdType::LessThan,
341 threshold_value: match severity {
342 AlertSeverity::Critical => threshold.critical_threshold,
343 _ => threshold.warning_threshold,
344 },
345 current_value: reduction_percent,
346 severity,
347 message: format!(
348 "Token reduction {reduction_percent:.1}% below target. Story 10 requirement: ≥90%"
349 ),
350 timestamp: Utc::now(),
351 resolved: false,
352 };
353
354 self.trigger_alert(alert).await;
355 }
356 }
357
358 async fn trigger_alert(&self, alert: PerformanceAlert) {
360 self.alert_counter.inc();
361
362 {
364 let mut alerts = self.alerts.write().await;
365 alerts.push(alert.clone());
366
367 alerts.retain(|a| {
369 let age = Utc::now().signed_duration_since(a.timestamp);
370 age.num_days() <= self.config.retention_days as i64
371 });
372 }
373
374 match alert.severity {
376 AlertSeverity::Critical => {
377 tracing::error!(
378 "🚨 CRITICAL Performance Alert: {} - {}",
379 alert.metric_name,
380 alert.message
381 );
382 }
383 AlertSeverity::Warning => {
384 tracing::warn!(
385 "⚠️ WARNING Performance Alert: {} - {}",
386 alert.metric_name,
387 alert.message
388 );
389 }
390 AlertSeverity::Info => {
391 tracing::info!(
392 "ℹ️ INFO Performance Alert: {} - {}",
393 alert.metric_name,
394 alert.message
395 );
396 }
397 }
398
399 if let Err(e) = self.store_alert_in_db(&alert).await {
401 tracing::error!("Failed to store alert in database: {}", e);
402 }
403 }
404
405 async fn store_alert_in_db(&self, alert: &PerformanceAlert) -> Result<()> {
407 let _ = sqlx::query(
409 r#"
410 INSERT INTO performance_alerts (
411 id, metric_name, threshold_type, threshold_value, current_value,
412 severity, message, timestamp, resolved
413 ) VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
414 ON CONFLICT (id) DO NOTHING
415 "#,
416 )
417 .bind(&alert.id)
418 .bind(&alert.metric_name)
419 .bind(serde_json::to_string(&alert.threshold_type)?)
420 .bind(alert.threshold_value)
421 .bind(alert.current_value)
422 .bind(serde_json::to_string(&alert.severity)?)
423 .bind(&alert.message)
424 .bind(alert.timestamp)
425 .bind(alert.resolved)
426 .execute(&self.pool)
427 .await;
428
429 Ok(())
430 }
431
432 pub async fn get_performance_summary(&self) -> Result<PerformanceSummary> {
434 let current_metrics = self.get_current_metrics().await?;
435 let recent_alerts = {
436 let alerts = self.alerts.read().await;
437 alerts
438 .iter()
439 .filter(|a| !a.resolved)
440 .cloned()
441 .collect::<Vec<_>>()
442 };
443
444 let story10_compliance = self.check_story10_compliance(¤t_metrics);
445
446 Ok(PerformanceSummary {
447 current_metrics,
448 recent_alerts,
449 story10_compliance,
450 last_updated: Utc::now(),
451 })
452 }
453
454 async fn get_current_metrics(&self) -> Result<HashMap<String, f64>> {
456 let mut metrics = HashMap::new();
457
458 metrics.insert(
460 "memory_headroom_percent".to_string(),
461 self.memory_usage_gauge.get(),
462 );
463 metrics.insert(
464 "token_reduction_percent".to_string(),
465 self.token_reduction_gauge.get(),
466 );
467 metrics.insert(
468 "throughput_ops_sec".to_string(),
469 self.throughput_gauge.get(),
470 );
471
472 let sample_count = self.latency_histogram.get_sample_count();
474 let p95_latency = if sample_count > 0 {
475 self.latency_histogram.get_sample_sum() / sample_count as f64 * 1000.0
476 } else {
477 0.0
478 };
479 metrics.insert("p95_latency_ms".to_string(), p95_latency);
480
481 let pool_stats = self.pool.size() as f64;
483 let pool_usage = (pool_stats - self.pool.num_idle() as f64) / pool_stats;
484 metrics.insert(
485 "connection_pool_usage_percent".to_string(),
486 pool_usage * 100.0,
487 );
488
489 Ok(metrics)
490 }
491
492 fn check_story10_compliance(&self, metrics: &HashMap<String, f64>) -> Story10Compliance {
494 let p95_latency_compliant = metrics
495 .get("p95_latency_ms")
496 .map(|&v| v < self.config.performance_targets.p95_latency_ms)
497 .unwrap_or(false);
498
499 let token_reduction_compliant = metrics
500 .get("token_reduction_percent")
501 .map(|&v| v >= self.config.performance_targets.token_reduction_percent)
502 .unwrap_or(false);
503
504 let memory_headroom_compliant = metrics
505 .get("memory_headroom_percent")
506 .map(|&v| v >= self.config.performance_targets.memory_headroom_percent)
507 .unwrap_or(false);
508
509 Story10Compliance {
510 p95_latency_compliant,
511 token_reduction_compliant,
512 memory_headroom_compliant,
513 overall_compliant: p95_latency_compliant
514 && token_reduction_compliant
515 && memory_headroom_compliant,
516 }
517 }
518
519 pub fn export_prometheus_metrics(&self) -> String {
521 let encoder = prometheus::TextEncoder::new();
522 let metric_families = self.registry.gather();
523 encoder
524 .encode_to_string(&metric_families)
525 .unwrap_or_default()
526 }
527}
528
529impl Clone for PerformanceDashboard {
530 fn clone(&self) -> Self {
531 Self {
532 config: self.config.clone(),
533 pool: self.pool.clone(),
534 registry: self.registry.clone(),
535 alerts: self.alerts.clone(),
536 latency_histogram: self.latency_histogram.clone(),
537 throughput_gauge: self.throughput_gauge.clone(),
538 memory_usage_gauge: self.memory_usage_gauge.clone(),
539 token_reduction_gauge: self.token_reduction_gauge.clone(),
540 alert_counter: self.alert_counter.clone(),
541 }
542 }
543}
544
545#[derive(Debug, Serialize, Deserialize)]
546pub struct PerformanceSummary {
547 pub current_metrics: HashMap<String, f64>,
548 pub recent_alerts: Vec<PerformanceAlert>,
549 pub story10_compliance: Story10Compliance,
550 pub last_updated: DateTime<Utc>,
551}
552
553#[derive(Debug, Serialize, Deserialize)]
554pub struct Story10Compliance {
555 pub p95_latency_compliant: bool, pub token_reduction_compliant: bool, pub memory_headroom_compliant: bool, pub overall_compliant: bool,
559}